{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 54707, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.827919644652421e-05, "grad_norm": 91.8160206307624, "learning_rate": 0.0, "loss": 34.0346, "step": 1 }, { "epoch": 3.655839289304842e-05, "grad_norm": 73.5999744271434, "learning_rate": 6.092916984006093e-09, "loss": 31.4361, "step": 2 }, { "epoch": 5.483758933957263e-05, "grad_norm": 51.695688615050905, "learning_rate": 1.2185833968012185e-08, "loss": 28.6464, "step": 3 }, { "epoch": 7.311678578609684e-05, "grad_norm": 50.539720587475585, "learning_rate": 1.827875095201828e-08, "loss": 27.7592, "step": 4 }, { "epoch": 9.139598223262106e-05, "grad_norm": 33.302774248523406, "learning_rate": 2.437166793602437e-08, "loss": 23.5683, "step": 5 }, { "epoch": 0.00010967517867914526, "grad_norm": 30.74690231214884, "learning_rate": 3.0464584920030467e-08, "loss": 22.8067, "step": 6 }, { "epoch": 0.0001279543751256695, "grad_norm": 23.259204581165505, "learning_rate": 3.655750190403656e-08, "loss": 22.4653, "step": 7 }, { "epoch": 0.0001462335715721937, "grad_norm": 38.374691871614225, "learning_rate": 4.265041888804266e-08, "loss": 21.5733, "step": 8 }, { "epoch": 0.0001645127680187179, "grad_norm": 27.796505852900417, "learning_rate": 4.874333587204874e-08, "loss": 21.1979, "step": 9 }, { "epoch": 0.00018279196446524212, "grad_norm": 24.10366506784843, "learning_rate": 5.483625285605484e-08, "loss": 21.5174, "step": 10 }, { "epoch": 0.00020107116091176632, "grad_norm": 16.145192145772253, "learning_rate": 6.092916984006093e-08, "loss": 20.6094, "step": 11 }, { "epoch": 0.00021935035735829052, "grad_norm": 15.63771708728853, "learning_rate": 6.702208682406704e-08, "loss": 20.3061, "step": 12 }, { "epoch": 0.00023762955380481475, "grad_norm": 14.09407904830339, "learning_rate": 7.311500380807313e-08, "loss": 20.0126, "step": 13 }, { "epoch": 0.000255908750251339, "grad_norm": 11.623036021341093, "learning_rate": 7.920792079207921e-08, "loss": 19.2338, "step": 14 }, { "epoch": 0.0002741879466978632, "grad_norm": 13.989361579308797, "learning_rate": 8.530083777608532e-08, "loss": 19.9629, "step": 15 }, { "epoch": 0.0002924671431443874, "grad_norm": 16.006656703513595, "learning_rate": 9.13937547600914e-08, "loss": 19.8983, "step": 16 }, { "epoch": 0.0003107463395909116, "grad_norm": 12.953914362703909, "learning_rate": 9.748667174409748e-08, "loss": 19.8281, "step": 17 }, { "epoch": 0.0003290255360374358, "grad_norm": 13.199082096754312, "learning_rate": 1.0357958872810359e-07, "loss": 19.4738, "step": 18 }, { "epoch": 0.00034730473248396, "grad_norm": 12.229065836228527, "learning_rate": 1.0967250571210967e-07, "loss": 18.9835, "step": 19 }, { "epoch": 0.00036558392893048424, "grad_norm": 15.118897649083674, "learning_rate": 1.1576542269611578e-07, "loss": 20.3097, "step": 20 }, { "epoch": 0.00038386312537700844, "grad_norm": 12.148133829458713, "learning_rate": 1.2185833968012187e-07, "loss": 19.2054, "step": 21 }, { "epoch": 0.00040214232182353264, "grad_norm": 11.55384831434423, "learning_rate": 1.2795125666412797e-07, "loss": 18.9609, "step": 22 }, { "epoch": 0.00042042151827005684, "grad_norm": 11.852696223714258, "learning_rate": 1.3404417364813407e-07, "loss": 19.2075, "step": 23 }, { "epoch": 0.00043870071471658104, "grad_norm": 12.726817530901487, "learning_rate": 1.4013709063214015e-07, "loss": 19.6638, "step": 24 }, { "epoch": 0.0004569799111631053, "grad_norm": 13.380375455690142, "learning_rate": 1.4623000761614625e-07, "loss": 19.8093, "step": 25 }, { "epoch": 0.0004752591076096295, "grad_norm": 9.618449769800641, "learning_rate": 1.5232292460015233e-07, "loss": 18.6443, "step": 26 }, { "epoch": 0.0004935383040561537, "grad_norm": 9.468083311223333, "learning_rate": 1.5841584158415843e-07, "loss": 19.0952, "step": 27 }, { "epoch": 0.000511817500502678, "grad_norm": 10.742512704213103, "learning_rate": 1.6450875856816453e-07, "loss": 19.2092, "step": 28 }, { "epoch": 0.0005300966969492021, "grad_norm": 9.384042455482811, "learning_rate": 1.7060167555217063e-07, "loss": 18.7381, "step": 29 }, { "epoch": 0.0005483758933957264, "grad_norm": 9.362811579992922, "learning_rate": 1.766945925361767e-07, "loss": 19.0655, "step": 30 }, { "epoch": 0.0005666550898422505, "grad_norm": 9.59019465210035, "learning_rate": 1.827875095201828e-07, "loss": 18.9933, "step": 31 }, { "epoch": 0.0005849342862887748, "grad_norm": 10.416158655166349, "learning_rate": 1.8888042650418892e-07, "loss": 19.3626, "step": 32 }, { "epoch": 0.0006032134827352989, "grad_norm": 9.362295378103198, "learning_rate": 1.9497334348819496e-07, "loss": 18.5162, "step": 33 }, { "epoch": 0.0006214926791818232, "grad_norm": 10.082680254606226, "learning_rate": 2.0106626047220107e-07, "loss": 18.8908, "step": 34 }, { "epoch": 0.0006397718756283474, "grad_norm": 11.776658098765381, "learning_rate": 2.0715917745620717e-07, "loss": 19.1424, "step": 35 }, { "epoch": 0.0006580510720748716, "grad_norm": 10.029439496961013, "learning_rate": 2.1325209444021327e-07, "loss": 19.0282, "step": 36 }, { "epoch": 0.0006763302685213958, "grad_norm": 8.464048988417401, "learning_rate": 2.1934501142421935e-07, "loss": 18.3312, "step": 37 }, { "epoch": 0.00069460946496792, "grad_norm": 10.778425266213326, "learning_rate": 2.2543792840822545e-07, "loss": 19.3784, "step": 38 }, { "epoch": 0.0007128886614144442, "grad_norm": 10.45936367907346, "learning_rate": 2.3153084539223155e-07, "loss": 18.772, "step": 39 }, { "epoch": 0.0007311678578609685, "grad_norm": 10.180810668857037, "learning_rate": 2.3762376237623766e-07, "loss": 19.0677, "step": 40 }, { "epoch": 0.0007494470543074926, "grad_norm": 9.486856492442154, "learning_rate": 2.4371667936024373e-07, "loss": 18.6111, "step": 41 }, { "epoch": 0.0007677262507540169, "grad_norm": 8.892386702121996, "learning_rate": 2.4980959634424986e-07, "loss": 18.8254, "step": 42 }, { "epoch": 0.000786005447200541, "grad_norm": 10.950408492270602, "learning_rate": 2.5590251332825594e-07, "loss": 19.4113, "step": 43 }, { "epoch": 0.0008042846436470653, "grad_norm": 10.6052965565659, "learning_rate": 2.61995430312262e-07, "loss": 19.4145, "step": 44 }, { "epoch": 0.0008225638400935895, "grad_norm": 9.928430603672055, "learning_rate": 2.6808834729626814e-07, "loss": 18.7649, "step": 45 }, { "epoch": 0.0008408430365401137, "grad_norm": 9.414699639763915, "learning_rate": 2.7418126428027417e-07, "loss": 19.1676, "step": 46 }, { "epoch": 0.0008591222329866379, "grad_norm": 10.136690731179094, "learning_rate": 2.802741812642803e-07, "loss": 18.9255, "step": 47 }, { "epoch": 0.0008774014294331621, "grad_norm": 9.41687674012297, "learning_rate": 2.8636709824828637e-07, "loss": 18.5135, "step": 48 }, { "epoch": 0.0008956806258796863, "grad_norm": 10.230953762022938, "learning_rate": 2.924600152322925e-07, "loss": 19.4077, "step": 49 }, { "epoch": 0.0009139598223262106, "grad_norm": 9.331096241633832, "learning_rate": 2.985529322162986e-07, "loss": 18.5752, "step": 50 }, { "epoch": 0.0009322390187727347, "grad_norm": 9.377359156140896, "learning_rate": 3.0464584920030465e-07, "loss": 18.6574, "step": 51 }, { "epoch": 0.000950518215219259, "grad_norm": 10.24702422065715, "learning_rate": 3.107387661843108e-07, "loss": 18.7512, "step": 52 }, { "epoch": 0.0009687974116657831, "grad_norm": 10.438749932564663, "learning_rate": 3.1683168316831686e-07, "loss": 19.1765, "step": 53 }, { "epoch": 0.0009870766081123074, "grad_norm": 9.395189052530064, "learning_rate": 3.2292460015232293e-07, "loss": 18.7879, "step": 54 }, { "epoch": 0.0010053558045588315, "grad_norm": 9.545581422460163, "learning_rate": 3.2901751713632906e-07, "loss": 18.5035, "step": 55 }, { "epoch": 0.001023635001005356, "grad_norm": 8.756905292427446, "learning_rate": 3.3511043412033514e-07, "loss": 18.8401, "step": 56 }, { "epoch": 0.00104191419745188, "grad_norm": 9.835134510268613, "learning_rate": 3.4120335110434127e-07, "loss": 18.6639, "step": 57 }, { "epoch": 0.0010601933938984042, "grad_norm": 9.329687900850928, "learning_rate": 3.472962680883473e-07, "loss": 18.5866, "step": 58 }, { "epoch": 0.0010784725903449283, "grad_norm": 10.385670344311434, "learning_rate": 3.533891850723534e-07, "loss": 19.4529, "step": 59 }, { "epoch": 0.0010967517867914527, "grad_norm": 8.319642063163402, "learning_rate": 3.594821020563595e-07, "loss": 18.2629, "step": 60 }, { "epoch": 0.0011150309832379769, "grad_norm": 10.166549933688021, "learning_rate": 3.655750190403656e-07, "loss": 19.0247, "step": 61 }, { "epoch": 0.001133310179684501, "grad_norm": 10.381261400913276, "learning_rate": 3.716679360243717e-07, "loss": 19.3155, "step": 62 }, { "epoch": 0.0011515893761310254, "grad_norm": 9.779550735131549, "learning_rate": 3.7776085300837783e-07, "loss": 18.7911, "step": 63 }, { "epoch": 0.0011698685725775495, "grad_norm": 8.899746443889216, "learning_rate": 3.838537699923839e-07, "loss": 18.8994, "step": 64 }, { "epoch": 0.0011881477690240737, "grad_norm": 9.321725176330542, "learning_rate": 3.8994668697638993e-07, "loss": 18.5516, "step": 65 }, { "epoch": 0.0012064269654705978, "grad_norm": 10.686659102112074, "learning_rate": 3.9603960396039606e-07, "loss": 19.1882, "step": 66 }, { "epoch": 0.0012247061619171222, "grad_norm": 10.278368781143437, "learning_rate": 4.0213252094440214e-07, "loss": 19.1586, "step": 67 }, { "epoch": 0.0012429853583636463, "grad_norm": 9.853789742977987, "learning_rate": 4.0822543792840826e-07, "loss": 18.8072, "step": 68 }, { "epoch": 0.0012612645548101705, "grad_norm": 8.424851011261469, "learning_rate": 4.1431835491241434e-07, "loss": 18.0205, "step": 69 }, { "epoch": 0.0012795437512566948, "grad_norm": 11.179083215450209, "learning_rate": 4.2041127189642047e-07, "loss": 19.1082, "step": 70 }, { "epoch": 0.001297822947703219, "grad_norm": 11.205875315547486, "learning_rate": 4.2650418888042655e-07, "loss": 19.1888, "step": 71 }, { "epoch": 0.0013161021441497431, "grad_norm": 9.218667827948646, "learning_rate": 4.325971058644327e-07, "loss": 18.5834, "step": 72 }, { "epoch": 0.0013343813405962675, "grad_norm": 9.114488209681602, "learning_rate": 4.386900228484387e-07, "loss": 18.3438, "step": 73 }, { "epoch": 0.0013526605370427916, "grad_norm": 9.147427856276416, "learning_rate": 4.4478293983244483e-07, "loss": 18.3276, "step": 74 }, { "epoch": 0.0013709397334893158, "grad_norm": 10.815359011016698, "learning_rate": 4.508758568164509e-07, "loss": 18.9537, "step": 75 }, { "epoch": 0.00138921892993584, "grad_norm": 11.106338765167994, "learning_rate": 4.5696877380045703e-07, "loss": 20.4775, "step": 76 }, { "epoch": 0.0014074981263823643, "grad_norm": 9.940729728721143, "learning_rate": 4.630616907844631e-07, "loss": 19.0393, "step": 77 }, { "epoch": 0.0014257773228288884, "grad_norm": 9.792218976670425, "learning_rate": 4.691546077684692e-07, "loss": 18.8914, "step": 78 }, { "epoch": 0.0014440565192754126, "grad_norm": 10.60799501326941, "learning_rate": 4.752475247524753e-07, "loss": 18.8633, "step": 79 }, { "epoch": 0.001462335715721937, "grad_norm": 9.136615230502608, "learning_rate": 4.813404417364814e-07, "loss": 18.6693, "step": 80 }, { "epoch": 0.001480614912168461, "grad_norm": 9.050880233447275, "learning_rate": 4.874333587204875e-07, "loss": 18.3404, "step": 81 }, { "epoch": 0.0014988941086149852, "grad_norm": 9.821799815962711, "learning_rate": 4.935262757044935e-07, "loss": 19.028, "step": 82 }, { "epoch": 0.0015171733050615094, "grad_norm": 9.00509710897049, "learning_rate": 4.996191926884997e-07, "loss": 18.6076, "step": 83 }, { "epoch": 0.0015354525015080338, "grad_norm": 8.4164016249053, "learning_rate": 5.057121096725057e-07, "loss": 18.1026, "step": 84 }, { "epoch": 0.001553731697954558, "grad_norm": 9.747465626841214, "learning_rate": 5.118050266565119e-07, "loss": 18.742, "step": 85 }, { "epoch": 0.001572010894401082, "grad_norm": 11.031842189183193, "learning_rate": 5.17897943640518e-07, "loss": 19.5137, "step": 86 }, { "epoch": 0.0015902900908476064, "grad_norm": 8.60703077645388, "learning_rate": 5.23990860624524e-07, "loss": 18.6961, "step": 87 }, { "epoch": 0.0016085692872941306, "grad_norm": 9.06509676047877, "learning_rate": 5.300837776085301e-07, "loss": 18.5877, "step": 88 }, { "epoch": 0.0016268484837406547, "grad_norm": 9.844496870043358, "learning_rate": 5.361766945925363e-07, "loss": 18.7855, "step": 89 }, { "epoch": 0.001645127680187179, "grad_norm": 9.277358662258633, "learning_rate": 5.422696115765424e-07, "loss": 18.7348, "step": 90 }, { "epoch": 0.0016634068766337032, "grad_norm": 9.252853720597152, "learning_rate": 5.483625285605483e-07, "loss": 18.6759, "step": 91 }, { "epoch": 0.0016816860730802274, "grad_norm": 8.84638620725077, "learning_rate": 5.544554455445545e-07, "loss": 18.4325, "step": 92 }, { "epoch": 0.0016999652695267515, "grad_norm": 10.139260624547632, "learning_rate": 5.605483625285606e-07, "loss": 19.0223, "step": 93 }, { "epoch": 0.0017182444659732759, "grad_norm": 10.300594130638922, "learning_rate": 5.666412795125667e-07, "loss": 18.9603, "step": 94 }, { "epoch": 0.0017365236624198, "grad_norm": 8.558786354910753, "learning_rate": 5.727341964965727e-07, "loss": 18.1716, "step": 95 }, { "epoch": 0.0017548028588663242, "grad_norm": 9.221680135788464, "learning_rate": 5.788271134805789e-07, "loss": 19.0363, "step": 96 }, { "epoch": 0.0017730820553128485, "grad_norm": 9.318146513867163, "learning_rate": 5.84920030464585e-07, "loss": 18.308, "step": 97 }, { "epoch": 0.0017913612517593727, "grad_norm": 8.89684007120383, "learning_rate": 5.910129474485911e-07, "loss": 18.3987, "step": 98 }, { "epoch": 0.0018096404482058968, "grad_norm": 8.770987219267509, "learning_rate": 5.971058644325972e-07, "loss": 18.2499, "step": 99 }, { "epoch": 0.0018279196446524212, "grad_norm": 8.715509627670407, "learning_rate": 6.031987814166032e-07, "loss": 18.3155, "step": 100 }, { "epoch": 0.0018461988410989453, "grad_norm": 9.157266713898336, "learning_rate": 6.092916984006093e-07, "loss": 18.7471, "step": 101 }, { "epoch": 0.0018644780375454695, "grad_norm": 8.141147516544505, "learning_rate": 6.153846153846155e-07, "loss": 17.8998, "step": 102 }, { "epoch": 0.0018827572339919936, "grad_norm": 9.099602296515032, "learning_rate": 6.214775323686216e-07, "loss": 18.5905, "step": 103 }, { "epoch": 0.001901036430438518, "grad_norm": 9.459641647038191, "learning_rate": 6.275704493526276e-07, "loss": 18.919, "step": 104 }, { "epoch": 0.0019193156268850421, "grad_norm": 8.533043987820777, "learning_rate": 6.336633663366337e-07, "loss": 18.2984, "step": 105 }, { "epoch": 0.0019375948233315663, "grad_norm": 8.831215555540918, "learning_rate": 6.397562833206399e-07, "loss": 18.4262, "step": 106 }, { "epoch": 0.0019558740197780904, "grad_norm": 9.760196162863846, "learning_rate": 6.458492003046459e-07, "loss": 18.7845, "step": 107 }, { "epoch": 0.001974153216224615, "grad_norm": 9.619501623532049, "learning_rate": 6.519421172886519e-07, "loss": 18.6491, "step": 108 }, { "epoch": 0.001992432412671139, "grad_norm": 9.120851474475579, "learning_rate": 6.580350342726581e-07, "loss": 18.7755, "step": 109 }, { "epoch": 0.002010711609117663, "grad_norm": 10.429387763449409, "learning_rate": 6.641279512566642e-07, "loss": 18.8206, "step": 110 }, { "epoch": 0.0020289908055641875, "grad_norm": 10.444785293681528, "learning_rate": 6.702208682406703e-07, "loss": 19.0005, "step": 111 }, { "epoch": 0.002047270002010712, "grad_norm": 10.653039310285068, "learning_rate": 6.763137852246764e-07, "loss": 19.1512, "step": 112 }, { "epoch": 0.0020655491984572357, "grad_norm": 8.025944215659788, "learning_rate": 6.824067022086825e-07, "loss": 18.0986, "step": 113 }, { "epoch": 0.00208382839490376, "grad_norm": 9.547795043033215, "learning_rate": 6.884996191926886e-07, "loss": 18.8097, "step": 114 }, { "epoch": 0.002102107591350284, "grad_norm": 9.19001868551479, "learning_rate": 6.945925361766946e-07, "loss": 18.7753, "step": 115 }, { "epoch": 0.0021203867877968084, "grad_norm": 9.39295941251693, "learning_rate": 7.006854531607008e-07, "loss": 18.4715, "step": 116 }, { "epoch": 0.0021386659842433328, "grad_norm": 7.970423062653836, "learning_rate": 7.067783701447068e-07, "loss": 18.0459, "step": 117 }, { "epoch": 0.0021569451806898567, "grad_norm": 9.510873550335704, "learning_rate": 7.128712871287129e-07, "loss": 18.8316, "step": 118 }, { "epoch": 0.002175224377136381, "grad_norm": 10.785468497817561, "learning_rate": 7.18964204112719e-07, "loss": 19.3941, "step": 119 }, { "epoch": 0.0021935035735829054, "grad_norm": 9.073789812838537, "learning_rate": 7.250571210967252e-07, "loss": 18.5127, "step": 120 }, { "epoch": 0.0022117827700294294, "grad_norm": 10.862325819460475, "learning_rate": 7.311500380807313e-07, "loss": 19.019, "step": 121 }, { "epoch": 0.0022300619664759537, "grad_norm": 7.8996003598539035, "learning_rate": 7.372429550647372e-07, "loss": 18.0205, "step": 122 }, { "epoch": 0.002248341162922478, "grad_norm": 8.825842860534118, "learning_rate": 7.433358720487434e-07, "loss": 18.3188, "step": 123 }, { "epoch": 0.002266620359369002, "grad_norm": 8.5897317430774, "learning_rate": 7.494287890327495e-07, "loss": 18.6975, "step": 124 }, { "epoch": 0.0022848995558155264, "grad_norm": 8.95155813372537, "learning_rate": 7.555217060167557e-07, "loss": 18.6758, "step": 125 }, { "epoch": 0.0023031787522620507, "grad_norm": 9.042809889012444, "learning_rate": 7.616146230007616e-07, "loss": 18.2421, "step": 126 }, { "epoch": 0.0023214579487085747, "grad_norm": 8.188339694755228, "learning_rate": 7.677075399847678e-07, "loss": 18.4488, "step": 127 }, { "epoch": 0.002339737145155099, "grad_norm": 9.252561792937517, "learning_rate": 7.738004569687739e-07, "loss": 18.3231, "step": 128 }, { "epoch": 0.0023580163416016234, "grad_norm": 9.19760399419201, "learning_rate": 7.798933739527799e-07, "loss": 18.5613, "step": 129 }, { "epoch": 0.0023762955380481473, "grad_norm": 9.755431876364451, "learning_rate": 7.85986290936786e-07, "loss": 18.842, "step": 130 }, { "epoch": 0.0023945747344946717, "grad_norm": 8.252542603191571, "learning_rate": 7.920792079207921e-07, "loss": 17.9852, "step": 131 }, { "epoch": 0.0024128539309411956, "grad_norm": 8.43311297243739, "learning_rate": 7.981721249047983e-07, "loss": 18.5464, "step": 132 }, { "epoch": 0.00243113312738772, "grad_norm": 9.599173769619222, "learning_rate": 8.042650418888043e-07, "loss": 18.9277, "step": 133 }, { "epoch": 0.0024494123238342443, "grad_norm": 9.308163019342764, "learning_rate": 8.103579588728105e-07, "loss": 18.9868, "step": 134 }, { "epoch": 0.0024676915202807683, "grad_norm": 8.775269468740735, "learning_rate": 8.164508758568165e-07, "loss": 18.3321, "step": 135 }, { "epoch": 0.0024859707167272926, "grad_norm": 9.4470228819566, "learning_rate": 8.225437928408227e-07, "loss": 18.7455, "step": 136 }, { "epoch": 0.002504249913173817, "grad_norm": 8.587540626226483, "learning_rate": 8.286367098248287e-07, "loss": 18.2026, "step": 137 }, { "epoch": 0.002522529109620341, "grad_norm": 9.605233117505259, "learning_rate": 8.347296268088348e-07, "loss": 18.452, "step": 138 }, { "epoch": 0.0025408083060668653, "grad_norm": 9.439766715402934, "learning_rate": 8.408225437928409e-07, "loss": 18.5597, "step": 139 }, { "epoch": 0.0025590875025133897, "grad_norm": 8.865103055248236, "learning_rate": 8.46915460776847e-07, "loss": 18.1067, "step": 140 }, { "epoch": 0.0025773666989599136, "grad_norm": 8.963452516669594, "learning_rate": 8.530083777608531e-07, "loss": 18.3106, "step": 141 }, { "epoch": 0.002595645895406438, "grad_norm": 9.31260967372063, "learning_rate": 8.591012947448592e-07, "loss": 18.1281, "step": 142 }, { "epoch": 0.0026139250918529623, "grad_norm": 9.983750461174719, "learning_rate": 8.651942117288653e-07, "loss": 18.9052, "step": 143 }, { "epoch": 0.0026322042882994862, "grad_norm": 8.525711362497992, "learning_rate": 8.712871287128713e-07, "loss": 17.9311, "step": 144 }, { "epoch": 0.0026504834847460106, "grad_norm": 8.16279592260429, "learning_rate": 8.773800456968774e-07, "loss": 18.0707, "step": 145 }, { "epoch": 0.002668762681192535, "grad_norm": 10.810956985437747, "learning_rate": 8.834729626808836e-07, "loss": 19.6098, "step": 146 }, { "epoch": 0.002687041877639059, "grad_norm": 8.418666058583149, "learning_rate": 8.895658796648897e-07, "loss": 18.3767, "step": 147 }, { "epoch": 0.0027053210740855833, "grad_norm": 7.566753590641935, "learning_rate": 8.956587966488957e-07, "loss": 17.986, "step": 148 }, { "epoch": 0.002723600270532107, "grad_norm": 8.820077881486961, "learning_rate": 9.017517136329018e-07, "loss": 17.9399, "step": 149 }, { "epoch": 0.0027418794669786316, "grad_norm": 7.835441517740366, "learning_rate": 9.07844630616908e-07, "loss": 18.1436, "step": 150 }, { "epoch": 0.002760158663425156, "grad_norm": 10.311120459482858, "learning_rate": 9.139375476009141e-07, "loss": 19.287, "step": 151 }, { "epoch": 0.00277843785987168, "grad_norm": 9.180260647695155, "learning_rate": 9.2003046458492e-07, "loss": 18.2681, "step": 152 }, { "epoch": 0.0027967170563182042, "grad_norm": 9.223803948298361, "learning_rate": 9.261233815689262e-07, "loss": 18.5626, "step": 153 }, { "epoch": 0.0028149962527647286, "grad_norm": 9.743220538497313, "learning_rate": 9.322162985529323e-07, "loss": 18.5406, "step": 154 }, { "epoch": 0.0028332754492112525, "grad_norm": 8.421411343466511, "learning_rate": 9.383092155369384e-07, "loss": 18.3646, "step": 155 }, { "epoch": 0.002851554645657777, "grad_norm": 8.990896416587637, "learning_rate": 9.444021325209444e-07, "loss": 18.4719, "step": 156 }, { "epoch": 0.0028698338421043012, "grad_norm": 8.754872875971705, "learning_rate": 9.504950495049506e-07, "loss": 18.2906, "step": 157 }, { "epoch": 0.002888113038550825, "grad_norm": 9.710686470408715, "learning_rate": 9.565879664889568e-07, "loss": 18.6634, "step": 158 }, { "epoch": 0.0029063922349973495, "grad_norm": 8.724629853904714, "learning_rate": 9.626808834729628e-07, "loss": 18.9012, "step": 159 }, { "epoch": 0.002924671431443874, "grad_norm": 9.188575498830037, "learning_rate": 9.687738004569687e-07, "loss": 18.2614, "step": 160 }, { "epoch": 0.002942950627890398, "grad_norm": 11.013102190121193, "learning_rate": 9.74866717440975e-07, "loss": 19.1223, "step": 161 }, { "epoch": 0.002961229824336922, "grad_norm": 8.394700341610353, "learning_rate": 9.809596344249811e-07, "loss": 18.1358, "step": 162 }, { "epoch": 0.0029795090207834466, "grad_norm": 8.069990243816974, "learning_rate": 9.87052551408987e-07, "loss": 17.8555, "step": 163 }, { "epoch": 0.0029977882172299705, "grad_norm": 9.303198108361302, "learning_rate": 9.931454683929933e-07, "loss": 18.468, "step": 164 }, { "epoch": 0.003016067413676495, "grad_norm": 8.335228410252673, "learning_rate": 9.992383853769994e-07, "loss": 18.0916, "step": 165 }, { "epoch": 0.0030343466101230188, "grad_norm": 8.432740981655469, "learning_rate": 1.0053313023610054e-06, "loss": 18.0996, "step": 166 }, { "epoch": 0.003052625806569543, "grad_norm": 9.519777017936155, "learning_rate": 1.0114242193450114e-06, "loss": 19.1096, "step": 167 }, { "epoch": 0.0030709050030160675, "grad_norm": 8.065844106323338, "learning_rate": 1.0175171363290176e-06, "loss": 18.1032, "step": 168 }, { "epoch": 0.0030891841994625914, "grad_norm": 9.163532440473222, "learning_rate": 1.0236100533130238e-06, "loss": 18.282, "step": 169 }, { "epoch": 0.003107463395909116, "grad_norm": 9.388304585526452, "learning_rate": 1.0297029702970297e-06, "loss": 18.6834, "step": 170 }, { "epoch": 0.00312574259235564, "grad_norm": 8.87715952646457, "learning_rate": 1.035795887281036e-06, "loss": 18.477, "step": 171 }, { "epoch": 0.003144021788802164, "grad_norm": 26.114301441663105, "learning_rate": 1.041888804265042e-06, "loss": 19.293, "step": 172 }, { "epoch": 0.0031623009852486885, "grad_norm": 9.694088824254733, "learning_rate": 1.047981721249048e-06, "loss": 18.7479, "step": 173 }, { "epoch": 0.003180580181695213, "grad_norm": 9.095561608266575, "learning_rate": 1.054074638233054e-06, "loss": 18.3212, "step": 174 }, { "epoch": 0.0031988593781417368, "grad_norm": 9.421354731090213, "learning_rate": 1.0601675552170602e-06, "loss": 18.6331, "step": 175 }, { "epoch": 0.003217138574588261, "grad_norm": 10.830755674328625, "learning_rate": 1.0662604722010664e-06, "loss": 19.3953, "step": 176 }, { "epoch": 0.0032354177710347855, "grad_norm": 8.334796100622082, "learning_rate": 1.0723533891850726e-06, "loss": 18.0036, "step": 177 }, { "epoch": 0.0032536969674813094, "grad_norm": 8.274933800538554, "learning_rate": 1.0784463061690785e-06, "loss": 18.0767, "step": 178 }, { "epoch": 0.0032719761639278338, "grad_norm": 8.952990668764464, "learning_rate": 1.0845392231530847e-06, "loss": 18.3319, "step": 179 }, { "epoch": 0.003290255360374358, "grad_norm": 7.448210693608828, "learning_rate": 1.0906321401370907e-06, "loss": 17.8476, "step": 180 }, { "epoch": 0.003308534556820882, "grad_norm": 11.033797657019463, "learning_rate": 1.0967250571210967e-06, "loss": 19.3991, "step": 181 }, { "epoch": 0.0033268137532674064, "grad_norm": 10.099842515451865, "learning_rate": 1.1028179741051028e-06, "loss": 19.1332, "step": 182 }, { "epoch": 0.0033450929497139304, "grad_norm": 9.208122392507601, "learning_rate": 1.108910891089109e-06, "loss": 18.6594, "step": 183 }, { "epoch": 0.0033633721461604547, "grad_norm": 9.323685684955574, "learning_rate": 1.1150038080731152e-06, "loss": 19.0113, "step": 184 }, { "epoch": 0.003381651342606979, "grad_norm": 9.48121086474806, "learning_rate": 1.1210967250571212e-06, "loss": 18.2684, "step": 185 }, { "epoch": 0.003399930539053503, "grad_norm": 9.458692967694427, "learning_rate": 1.1271896420411274e-06, "loss": 18.5252, "step": 186 }, { "epoch": 0.0034182097355000274, "grad_norm": 10.162976696658363, "learning_rate": 1.1332825590251333e-06, "loss": 18.7188, "step": 187 }, { "epoch": 0.0034364889319465517, "grad_norm": 7.536693138762417, "learning_rate": 1.1393754760091395e-06, "loss": 17.7561, "step": 188 }, { "epoch": 0.0034547681283930757, "grad_norm": 10.261621116891442, "learning_rate": 1.1454683929931455e-06, "loss": 19.0036, "step": 189 }, { "epoch": 0.0034730473248396, "grad_norm": 9.211901187355524, "learning_rate": 1.1515613099771517e-06, "loss": 18.6356, "step": 190 }, { "epoch": 0.0034913265212861244, "grad_norm": 9.798349541549694, "learning_rate": 1.1576542269611578e-06, "loss": 19.301, "step": 191 }, { "epoch": 0.0035096057177326483, "grad_norm": 9.408807780556515, "learning_rate": 1.1637471439451638e-06, "loss": 18.2518, "step": 192 }, { "epoch": 0.0035278849141791727, "grad_norm": 8.703383551292617, "learning_rate": 1.16984006092917e-06, "loss": 18.2204, "step": 193 }, { "epoch": 0.003546164110625697, "grad_norm": 9.964051574549913, "learning_rate": 1.175932977913176e-06, "loss": 18.758, "step": 194 }, { "epoch": 0.003564443307072221, "grad_norm": 8.45774454932824, "learning_rate": 1.1820258948971822e-06, "loss": 18.0124, "step": 195 }, { "epoch": 0.0035827225035187454, "grad_norm": 9.41053515094707, "learning_rate": 1.1881188118811881e-06, "loss": 18.6049, "step": 196 }, { "epoch": 0.0036010016999652697, "grad_norm": 7.926905776853164, "learning_rate": 1.1942117288651943e-06, "loss": 17.9763, "step": 197 }, { "epoch": 0.0036192808964117936, "grad_norm": 7.762301016854572, "learning_rate": 1.2003046458492005e-06, "loss": 18.0592, "step": 198 }, { "epoch": 0.003637560092858318, "grad_norm": 8.298271358933013, "learning_rate": 1.2063975628332065e-06, "loss": 18.0102, "step": 199 }, { "epoch": 0.0036558392893048424, "grad_norm": 10.730216617638389, "learning_rate": 1.2124904798172126e-06, "loss": 19.0529, "step": 200 }, { "epoch": 0.0036741184857513663, "grad_norm": 8.315309216617923, "learning_rate": 1.2185833968012186e-06, "loss": 18.2883, "step": 201 }, { "epoch": 0.0036923976821978907, "grad_norm": 9.937267774764383, "learning_rate": 1.2246763137852248e-06, "loss": 18.8833, "step": 202 }, { "epoch": 0.0037106768786444146, "grad_norm": 8.298280967026155, "learning_rate": 1.230769230769231e-06, "loss": 18.4287, "step": 203 }, { "epoch": 0.003728956075090939, "grad_norm": 8.872669745889823, "learning_rate": 1.236862147753237e-06, "loss": 18.3237, "step": 204 }, { "epoch": 0.0037472352715374633, "grad_norm": 9.26640734442622, "learning_rate": 1.2429550647372431e-06, "loss": 18.5356, "step": 205 }, { "epoch": 0.0037655144679839873, "grad_norm": 9.193263429338693, "learning_rate": 1.249047981721249e-06, "loss": 18.1574, "step": 206 }, { "epoch": 0.0037837936644305116, "grad_norm": 9.406715721001522, "learning_rate": 1.2551408987052553e-06, "loss": 18.5967, "step": 207 }, { "epoch": 0.003802072860877036, "grad_norm": 8.430542392894331, "learning_rate": 1.2612338156892615e-06, "loss": 18.4397, "step": 208 }, { "epoch": 0.00382035205732356, "grad_norm": 8.324568367394546, "learning_rate": 1.2673267326732674e-06, "loss": 18.1317, "step": 209 }, { "epoch": 0.0038386312537700843, "grad_norm": 8.636707220640757, "learning_rate": 1.2734196496572734e-06, "loss": 18.2539, "step": 210 }, { "epoch": 0.0038569104502166086, "grad_norm": 8.559056941183735, "learning_rate": 1.2795125666412798e-06, "loss": 18.1112, "step": 211 }, { "epoch": 0.0038751896466631326, "grad_norm": 8.349106399691129, "learning_rate": 1.2856054836252858e-06, "loss": 18.0289, "step": 212 }, { "epoch": 0.003893468843109657, "grad_norm": 9.109517130979178, "learning_rate": 1.2916984006092917e-06, "loss": 18.4914, "step": 213 }, { "epoch": 0.003911748039556181, "grad_norm": 10.17773107995757, "learning_rate": 1.297791317593298e-06, "loss": 18.686, "step": 214 }, { "epoch": 0.003930027236002705, "grad_norm": 8.880061859485329, "learning_rate": 1.3038842345773039e-06, "loss": 18.1707, "step": 215 }, { "epoch": 0.00394830643244923, "grad_norm": 10.41268299096758, "learning_rate": 1.3099771515613103e-06, "loss": 19.4722, "step": 216 }, { "epoch": 0.003966585628895754, "grad_norm": 9.19944078898899, "learning_rate": 1.3160700685453163e-06, "loss": 18.5689, "step": 217 }, { "epoch": 0.003984864825342278, "grad_norm": 8.52480330975333, "learning_rate": 1.3221629855293222e-06, "loss": 18.3901, "step": 218 }, { "epoch": 0.004003144021788802, "grad_norm": 9.08474388291791, "learning_rate": 1.3282559025133284e-06, "loss": 18.4845, "step": 219 }, { "epoch": 0.004021423218235326, "grad_norm": 9.937722849785393, "learning_rate": 1.3343488194973344e-06, "loss": 18.5381, "step": 220 }, { "epoch": 0.0040397024146818505, "grad_norm": 9.6690554776204, "learning_rate": 1.3404417364813406e-06, "loss": 18.1501, "step": 221 }, { "epoch": 0.004057981611128375, "grad_norm": 8.28729947943615, "learning_rate": 1.3465346534653467e-06, "loss": 18.0598, "step": 222 }, { "epoch": 0.004076260807574899, "grad_norm": 9.286936139854614, "learning_rate": 1.3526275704493527e-06, "loss": 18.5233, "step": 223 }, { "epoch": 0.004094540004021424, "grad_norm": 10.265707040677311, "learning_rate": 1.3587204874333587e-06, "loss": 18.3456, "step": 224 }, { "epoch": 0.004112819200467947, "grad_norm": 9.052625229520075, "learning_rate": 1.364813404417365e-06, "loss": 18.1519, "step": 225 }, { "epoch": 0.0041310983969144715, "grad_norm": 9.00678834172409, "learning_rate": 1.370906321401371e-06, "loss": 18.598, "step": 226 }, { "epoch": 0.004149377593360996, "grad_norm": 9.73419082794281, "learning_rate": 1.3769992383853772e-06, "loss": 18.5196, "step": 227 }, { "epoch": 0.00416765678980752, "grad_norm": 9.43062878041649, "learning_rate": 1.3830921553693832e-06, "loss": 18.437, "step": 228 }, { "epoch": 0.004185935986254045, "grad_norm": 8.860809691215216, "learning_rate": 1.3891850723533892e-06, "loss": 18.2498, "step": 229 }, { "epoch": 0.004204215182700568, "grad_norm": 8.461344696554631, "learning_rate": 1.3952779893373956e-06, "loss": 18.3432, "step": 230 }, { "epoch": 0.0042224943791470924, "grad_norm": 8.923589818596337, "learning_rate": 1.4013709063214015e-06, "loss": 18.1395, "step": 231 }, { "epoch": 0.004240773575593617, "grad_norm": 8.70195064898488, "learning_rate": 1.4074638233054075e-06, "loss": 18.0284, "step": 232 }, { "epoch": 0.004259052772040141, "grad_norm": 9.260996824567075, "learning_rate": 1.4135567402894137e-06, "loss": 18.4668, "step": 233 }, { "epoch": 0.0042773319684866655, "grad_norm": 9.055537513006174, "learning_rate": 1.4196496572734197e-06, "loss": 18.1067, "step": 234 }, { "epoch": 0.00429561116493319, "grad_norm": 8.58620539390862, "learning_rate": 1.4257425742574258e-06, "loss": 17.9723, "step": 235 }, { "epoch": 0.004313890361379713, "grad_norm": 9.219953464408006, "learning_rate": 1.431835491241432e-06, "loss": 18.6434, "step": 236 }, { "epoch": 0.004332169557826238, "grad_norm": 9.00950759799232, "learning_rate": 1.437928408225438e-06, "loss": 18.3838, "step": 237 }, { "epoch": 0.004350448754272762, "grad_norm": 8.951165357685648, "learning_rate": 1.4440213252094442e-06, "loss": 18.0981, "step": 238 }, { "epoch": 0.0043687279507192865, "grad_norm": 10.090790708095982, "learning_rate": 1.4501142421934503e-06, "loss": 18.4739, "step": 239 }, { "epoch": 0.004387007147165811, "grad_norm": 8.43510075285029, "learning_rate": 1.4562071591774563e-06, "loss": 18.1353, "step": 240 }, { "epoch": 0.004405286343612335, "grad_norm": 8.575581867632986, "learning_rate": 1.4623000761614625e-06, "loss": 18.0785, "step": 241 }, { "epoch": 0.004423565540058859, "grad_norm": 7.335566930392319, "learning_rate": 1.4683929931454685e-06, "loss": 17.9746, "step": 242 }, { "epoch": 0.004441844736505383, "grad_norm": 8.967314116339633, "learning_rate": 1.4744859101294744e-06, "loss": 18.5792, "step": 243 }, { "epoch": 0.0044601239329519074, "grad_norm": 9.172005768183263, "learning_rate": 1.4805788271134808e-06, "loss": 18.1535, "step": 244 }, { "epoch": 0.004478403129398432, "grad_norm": 11.596045287093936, "learning_rate": 1.4866717440974868e-06, "loss": 19.2335, "step": 245 }, { "epoch": 0.004496682325844956, "grad_norm": 9.252752460154074, "learning_rate": 1.4927646610814928e-06, "loss": 18.8617, "step": 246 }, { "epoch": 0.00451496152229148, "grad_norm": 8.579697314813979, "learning_rate": 1.498857578065499e-06, "loss": 18.2312, "step": 247 }, { "epoch": 0.004533240718738004, "grad_norm": 7.887835186000388, "learning_rate": 1.504950495049505e-06, "loss": 17.8583, "step": 248 }, { "epoch": 0.004551519915184528, "grad_norm": 9.296133202137408, "learning_rate": 1.5110434120335113e-06, "loss": 18.5319, "step": 249 }, { "epoch": 0.004569799111631053, "grad_norm": 9.58481341796332, "learning_rate": 1.5171363290175173e-06, "loss": 19.2434, "step": 250 }, { "epoch": 0.004588078308077577, "grad_norm": 7.905919594260702, "learning_rate": 1.5232292460015233e-06, "loss": 18.1045, "step": 251 }, { "epoch": 0.0046063575045241015, "grad_norm": 8.59126376514489, "learning_rate": 1.5293221629855294e-06, "loss": 18.3831, "step": 252 }, { "epoch": 0.004624636700970625, "grad_norm": 8.375949981654848, "learning_rate": 1.5354150799695356e-06, "loss": 18.3196, "step": 253 }, { "epoch": 0.004642915897417149, "grad_norm": 8.653570175094352, "learning_rate": 1.5415079969535416e-06, "loss": 18.6534, "step": 254 }, { "epoch": 0.004661195093863674, "grad_norm": 8.793798043219379, "learning_rate": 1.5476009139375478e-06, "loss": 18.3333, "step": 255 }, { "epoch": 0.004679474290310198, "grad_norm": 9.311086736443427, "learning_rate": 1.5536938309215537e-06, "loss": 18.7836, "step": 256 }, { "epoch": 0.004697753486756722, "grad_norm": 9.503701462288985, "learning_rate": 1.5597867479055597e-06, "loss": 18.6113, "step": 257 }, { "epoch": 0.004716032683203247, "grad_norm": 9.64408701783925, "learning_rate": 1.5658796648895661e-06, "loss": 18.3561, "step": 258 }, { "epoch": 0.00473431187964977, "grad_norm": 7.412750251028354, "learning_rate": 1.571972581873572e-06, "loss": 17.6636, "step": 259 }, { "epoch": 0.004752591076096295, "grad_norm": 8.565521370367357, "learning_rate": 1.5780654988575783e-06, "loss": 18.2246, "step": 260 }, { "epoch": 0.004770870272542819, "grad_norm": 9.021155353544811, "learning_rate": 1.5841584158415842e-06, "loss": 18.8278, "step": 261 }, { "epoch": 0.004789149468989343, "grad_norm": 8.911016454755051, "learning_rate": 1.5902513328255902e-06, "loss": 17.9716, "step": 262 }, { "epoch": 0.004807428665435868, "grad_norm": 9.753553688000444, "learning_rate": 1.5963442498095966e-06, "loss": 18.6498, "step": 263 }, { "epoch": 0.004825707861882391, "grad_norm": 9.410473633134915, "learning_rate": 1.6024371667936026e-06, "loss": 18.7673, "step": 264 }, { "epoch": 0.004843987058328916, "grad_norm": 8.841939654519999, "learning_rate": 1.6085300837776085e-06, "loss": 18.7812, "step": 265 }, { "epoch": 0.00486226625477544, "grad_norm": 10.138007808406874, "learning_rate": 1.6146230007616147e-06, "loss": 18.2232, "step": 266 }, { "epoch": 0.004880545451221964, "grad_norm": 7.882651548223116, "learning_rate": 1.620715917745621e-06, "loss": 18.0703, "step": 267 }, { "epoch": 0.004898824647668489, "grad_norm": 7.30363490583701, "learning_rate": 1.6268088347296269e-06, "loss": 17.7259, "step": 268 }, { "epoch": 0.004917103844115013, "grad_norm": 7.617273837055213, "learning_rate": 1.632901751713633e-06, "loss": 18.2572, "step": 269 }, { "epoch": 0.0049353830405615366, "grad_norm": 9.61420946928789, "learning_rate": 1.638994668697639e-06, "loss": 18.7611, "step": 270 }, { "epoch": 0.004953662237008061, "grad_norm": 9.261819370122486, "learning_rate": 1.6450875856816454e-06, "loss": 18.2477, "step": 271 }, { "epoch": 0.004971941433454585, "grad_norm": 8.86145477554711, "learning_rate": 1.6511805026656514e-06, "loss": 18.3001, "step": 272 }, { "epoch": 0.00499022062990111, "grad_norm": 10.384890389543454, "learning_rate": 1.6572734196496574e-06, "loss": 19.3691, "step": 273 }, { "epoch": 0.005008499826347634, "grad_norm": 8.430726901053445, "learning_rate": 1.6633663366336635e-06, "loss": 18.0541, "step": 274 }, { "epoch": 0.005026779022794158, "grad_norm": 8.983040809310916, "learning_rate": 1.6694592536176695e-06, "loss": 18.1607, "step": 275 }, { "epoch": 0.005045058219240682, "grad_norm": 8.829373292406805, "learning_rate": 1.6755521706016755e-06, "loss": 19.0493, "step": 276 }, { "epoch": 0.005063337415687206, "grad_norm": 8.995825308388472, "learning_rate": 1.6816450875856819e-06, "loss": 18.2772, "step": 277 }, { "epoch": 0.005081616612133731, "grad_norm": 9.21103924369425, "learning_rate": 1.6877380045696878e-06, "loss": 18.4878, "step": 278 }, { "epoch": 0.005099895808580255, "grad_norm": 8.451162184646366, "learning_rate": 1.693830921553694e-06, "loss": 18.3464, "step": 279 }, { "epoch": 0.005118175005026779, "grad_norm": 11.161474200142381, "learning_rate": 1.6999238385377e-06, "loss": 18.4198, "step": 280 }, { "epoch": 0.005136454201473303, "grad_norm": 9.183530687167856, "learning_rate": 1.7060167555217062e-06, "loss": 18.1488, "step": 281 }, { "epoch": 0.005154733397919827, "grad_norm": 8.647176482887394, "learning_rate": 1.7121096725057124e-06, "loss": 18.1579, "step": 282 }, { "epoch": 0.0051730125943663515, "grad_norm": 8.517489610499862, "learning_rate": 1.7182025894897183e-06, "loss": 18.3083, "step": 283 }, { "epoch": 0.005191291790812876, "grad_norm": 8.675364535322364, "learning_rate": 1.7242955064737243e-06, "loss": 18.0499, "step": 284 }, { "epoch": 0.0052095709872594, "grad_norm": 7.709633922459531, "learning_rate": 1.7303884234577307e-06, "loss": 17.971, "step": 285 }, { "epoch": 0.005227850183705925, "grad_norm": 7.786420777908942, "learning_rate": 1.7364813404417367e-06, "loss": 17.9058, "step": 286 }, { "epoch": 0.005246129380152448, "grad_norm": 9.372489454130031, "learning_rate": 1.7425742574257426e-06, "loss": 18.5079, "step": 287 }, { "epoch": 0.0052644085765989725, "grad_norm": 8.644590798969862, "learning_rate": 1.7486671744097488e-06, "loss": 18.163, "step": 288 }, { "epoch": 0.005282687773045497, "grad_norm": 7.555436323653494, "learning_rate": 1.7547600913937548e-06, "loss": 17.7009, "step": 289 }, { "epoch": 0.005300966969492021, "grad_norm": 9.88718105471376, "learning_rate": 1.7608530083777612e-06, "loss": 18.5575, "step": 290 }, { "epoch": 0.005319246165938546, "grad_norm": 9.61361164826843, "learning_rate": 1.7669459253617672e-06, "loss": 18.4487, "step": 291 }, { "epoch": 0.00533752536238507, "grad_norm": 9.052047294027663, "learning_rate": 1.7730388423457731e-06, "loss": 18.5464, "step": 292 }, { "epoch": 0.0053558045588315934, "grad_norm": 8.356682986131199, "learning_rate": 1.7791317593297793e-06, "loss": 18.055, "step": 293 }, { "epoch": 0.005374083755278118, "grad_norm": 8.852621554464067, "learning_rate": 1.7852246763137853e-06, "loss": 18.463, "step": 294 }, { "epoch": 0.005392362951724642, "grad_norm": 8.885811095965344, "learning_rate": 1.7913175932977915e-06, "loss": 18.1471, "step": 295 }, { "epoch": 0.0054106421481711665, "grad_norm": 8.926621838046268, "learning_rate": 1.7974105102817976e-06, "loss": 18.4295, "step": 296 }, { "epoch": 0.005428921344617691, "grad_norm": 8.210608181416873, "learning_rate": 1.8035034272658036e-06, "loss": 18.4575, "step": 297 }, { "epoch": 0.005447200541064214, "grad_norm": 9.173397538987054, "learning_rate": 1.8095963442498096e-06, "loss": 18.2811, "step": 298 }, { "epoch": 0.005465479737510739, "grad_norm": 8.352517539856125, "learning_rate": 1.815689261233816e-06, "loss": 17.9896, "step": 299 }, { "epoch": 0.005483758933957263, "grad_norm": 9.58651656979235, "learning_rate": 1.821782178217822e-06, "loss": 18.5783, "step": 300 }, { "epoch": 0.0055020381304037875, "grad_norm": 9.415882626435163, "learning_rate": 1.8278750952018281e-06, "loss": 18.2743, "step": 301 }, { "epoch": 0.005520317326850312, "grad_norm": 9.350881387259488, "learning_rate": 1.833968012185834e-06, "loss": 18.5922, "step": 302 }, { "epoch": 0.005538596523296836, "grad_norm": 9.428154148379228, "learning_rate": 1.84006092916984e-06, "loss": 18.6393, "step": 303 }, { "epoch": 0.00555687571974336, "grad_norm": 8.481958715323854, "learning_rate": 1.8461538461538465e-06, "loss": 18.031, "step": 304 }, { "epoch": 0.005575154916189884, "grad_norm": 8.183490386485298, "learning_rate": 1.8522467631378524e-06, "loss": 18.0689, "step": 305 }, { "epoch": 0.0055934341126364084, "grad_norm": 8.527268923870656, "learning_rate": 1.8583396801218584e-06, "loss": 18.4661, "step": 306 }, { "epoch": 0.005611713309082933, "grad_norm": 7.812079110964028, "learning_rate": 1.8644325971058646e-06, "loss": 17.8002, "step": 307 }, { "epoch": 0.005629992505529457, "grad_norm": 8.41008868031268, "learning_rate": 1.8705255140898706e-06, "loss": 18.105, "step": 308 }, { "epoch": 0.0056482717019759815, "grad_norm": 8.814591940500186, "learning_rate": 1.8766184310738767e-06, "loss": 18.508, "step": 309 }, { "epoch": 0.005666550898422505, "grad_norm": 9.81463607128983, "learning_rate": 1.882711348057883e-06, "loss": 18.5906, "step": 310 }, { "epoch": 0.005684830094869029, "grad_norm": 9.259484237090978, "learning_rate": 1.8888042650418889e-06, "loss": 18.2118, "step": 311 }, { "epoch": 0.005703109291315554, "grad_norm": 8.229928076135344, "learning_rate": 1.8948971820258953e-06, "loss": 17.8685, "step": 312 }, { "epoch": 0.005721388487762078, "grad_norm": 9.328445386502587, "learning_rate": 1.9009900990099013e-06, "loss": 18.5938, "step": 313 }, { "epoch": 0.0057396676842086025, "grad_norm": 10.30968060318531, "learning_rate": 1.9070830159939072e-06, "loss": 19.0348, "step": 314 }, { "epoch": 0.005757946880655126, "grad_norm": 9.588842801909218, "learning_rate": 1.9131759329779136e-06, "loss": 18.6145, "step": 315 }, { "epoch": 0.00577622607710165, "grad_norm": 9.160642251033686, "learning_rate": 1.9192688499619194e-06, "loss": 18.4347, "step": 316 }, { "epoch": 0.005794505273548175, "grad_norm": 9.510478117148674, "learning_rate": 1.9253617669459256e-06, "loss": 18.4841, "step": 317 }, { "epoch": 0.005812784469994699, "grad_norm": 7.720316738121152, "learning_rate": 1.9314546839299317e-06, "loss": 17.8745, "step": 318 }, { "epoch": 0.005831063666441223, "grad_norm": 8.380398366818962, "learning_rate": 1.9375476009139375e-06, "loss": 18.0486, "step": 319 }, { "epoch": 0.005849342862887748, "grad_norm": 9.883905650411798, "learning_rate": 1.9436405178979437e-06, "loss": 18.8674, "step": 320 }, { "epoch": 0.005867622059334271, "grad_norm": 8.708964463668561, "learning_rate": 1.94973343488195e-06, "loss": 18.4288, "step": 321 }, { "epoch": 0.005885901255780796, "grad_norm": 9.297137906751821, "learning_rate": 1.955826351865956e-06, "loss": 19.0322, "step": 322 }, { "epoch": 0.00590418045222732, "grad_norm": 8.618946541704723, "learning_rate": 1.9619192688499622e-06, "loss": 18.214, "step": 323 }, { "epoch": 0.005922459648673844, "grad_norm": 8.060235940220567, "learning_rate": 1.968012185833968e-06, "loss": 17.9303, "step": 324 }, { "epoch": 0.005940738845120369, "grad_norm": 7.665853382725324, "learning_rate": 1.974105102817974e-06, "loss": 17.8826, "step": 325 }, { "epoch": 0.005959018041566893, "grad_norm": 8.121066064855041, "learning_rate": 1.9801980198019803e-06, "loss": 18.1789, "step": 326 }, { "epoch": 0.005977297238013417, "grad_norm": 8.541733313908619, "learning_rate": 1.9862909367859865e-06, "loss": 18.6535, "step": 327 }, { "epoch": 0.005995576434459941, "grad_norm": 8.531476217309224, "learning_rate": 1.9923838537699923e-06, "loss": 18.0689, "step": 328 }, { "epoch": 0.006013855630906465, "grad_norm": 9.387532432564992, "learning_rate": 1.998476770753999e-06, "loss": 18.4685, "step": 329 }, { "epoch": 0.00603213482735299, "grad_norm": 8.560597959419821, "learning_rate": 2.0045696877380047e-06, "loss": 18.1564, "step": 330 }, { "epoch": 0.006050414023799514, "grad_norm": 8.543531262561194, "learning_rate": 2.010662604722011e-06, "loss": 18.0159, "step": 331 }, { "epoch": 0.0060686932202460376, "grad_norm": 9.089859495341674, "learning_rate": 2.016755521706017e-06, "loss": 18.4976, "step": 332 }, { "epoch": 0.006086972416692562, "grad_norm": 7.892896022706285, "learning_rate": 2.0228484386900228e-06, "loss": 18.1215, "step": 333 }, { "epoch": 0.006105251613139086, "grad_norm": 9.459962297194554, "learning_rate": 2.0289413556740294e-06, "loss": 18.4911, "step": 334 }, { "epoch": 0.006123530809585611, "grad_norm": 9.975390120935543, "learning_rate": 2.035034272658035e-06, "loss": 18.3436, "step": 335 }, { "epoch": 0.006141810006032135, "grad_norm": 9.206433781385257, "learning_rate": 2.0411271896420413e-06, "loss": 18.6202, "step": 336 }, { "epoch": 0.006160089202478659, "grad_norm": 9.225056037073902, "learning_rate": 2.0472201066260475e-06, "loss": 18.5441, "step": 337 }, { "epoch": 0.006178368398925183, "grad_norm": 8.79411752064298, "learning_rate": 2.0533130236100533e-06, "loss": 18.2518, "step": 338 }, { "epoch": 0.006196647595371707, "grad_norm": 7.682661005049543, "learning_rate": 2.0594059405940594e-06, "loss": 17.9822, "step": 339 }, { "epoch": 0.006214926791818232, "grad_norm": 8.617648520128705, "learning_rate": 2.0654988575780656e-06, "loss": 18.1993, "step": 340 }, { "epoch": 0.006233205988264756, "grad_norm": 9.284518303747348, "learning_rate": 2.071591774562072e-06, "loss": 18.3468, "step": 341 }, { "epoch": 0.00625148518471128, "grad_norm": 9.146487062937387, "learning_rate": 2.077684691546078e-06, "loss": 18.6132, "step": 342 }, { "epoch": 0.006269764381157805, "grad_norm": 8.55358864306958, "learning_rate": 2.083777608530084e-06, "loss": 18.2002, "step": 343 }, { "epoch": 0.006288043577604328, "grad_norm": 10.309465924549984, "learning_rate": 2.08987052551409e-06, "loss": 18.7386, "step": 344 }, { "epoch": 0.0063063227740508526, "grad_norm": 8.050315978102859, "learning_rate": 2.095963442498096e-06, "loss": 18.0105, "step": 345 }, { "epoch": 0.006324601970497377, "grad_norm": 9.290651889149846, "learning_rate": 2.1020563594821023e-06, "loss": 18.4974, "step": 346 }, { "epoch": 0.006342881166943901, "grad_norm": 8.017310403022595, "learning_rate": 2.108149276466108e-06, "loss": 18.0765, "step": 347 }, { "epoch": 0.006361160363390426, "grad_norm": 8.43604073342653, "learning_rate": 2.1142421934501147e-06, "loss": 18.2749, "step": 348 }, { "epoch": 0.006379439559836949, "grad_norm": 8.412960841835288, "learning_rate": 2.1203351104341204e-06, "loss": 18.1562, "step": 349 }, { "epoch": 0.0063977187562834735, "grad_norm": 8.57195602786084, "learning_rate": 2.1264280274181266e-06, "loss": 18.2799, "step": 350 }, { "epoch": 0.006415997952729998, "grad_norm": 7.794052012116019, "learning_rate": 2.1325209444021328e-06, "loss": 17.8709, "step": 351 }, { "epoch": 0.006434277149176522, "grad_norm": 9.982820168942267, "learning_rate": 2.1386138613861385e-06, "loss": 18.6899, "step": 352 }, { "epoch": 0.006452556345623047, "grad_norm": 10.415493642023899, "learning_rate": 2.144706778370145e-06, "loss": 18.9632, "step": 353 }, { "epoch": 0.006470835542069571, "grad_norm": 9.477248548554387, "learning_rate": 2.150799695354151e-06, "loss": 18.648, "step": 354 }, { "epoch": 0.0064891147385160945, "grad_norm": 9.470811841953198, "learning_rate": 2.156892612338157e-06, "loss": 18.5502, "step": 355 }, { "epoch": 0.006507393934962619, "grad_norm": 8.463485062752705, "learning_rate": 2.1629855293221633e-06, "loss": 18.2422, "step": 356 }, { "epoch": 0.006525673131409143, "grad_norm": 8.813364649737569, "learning_rate": 2.1690784463061694e-06, "loss": 18.5201, "step": 357 }, { "epoch": 0.0065439523278556675, "grad_norm": 9.680619522560455, "learning_rate": 2.175171363290175e-06, "loss": 18.7938, "step": 358 }, { "epoch": 0.006562231524302192, "grad_norm": 7.425556851346463, "learning_rate": 2.1812642802741814e-06, "loss": 17.9377, "step": 359 }, { "epoch": 0.006580510720748716, "grad_norm": 9.254648716313772, "learning_rate": 2.1873571972581876e-06, "loss": 19.1159, "step": 360 }, { "epoch": 0.00659878991719524, "grad_norm": 8.771527877599317, "learning_rate": 2.1934501142421933e-06, "loss": 18.3495, "step": 361 }, { "epoch": 0.006617069113641764, "grad_norm": 8.411301969877785, "learning_rate": 2.1995430312262e-06, "loss": 18.1057, "step": 362 }, { "epoch": 0.0066353483100882885, "grad_norm": 8.639194963524913, "learning_rate": 2.2056359482102057e-06, "loss": 18.071, "step": 363 }, { "epoch": 0.006653627506534813, "grad_norm": 8.385867594796746, "learning_rate": 2.211728865194212e-06, "loss": 18.3967, "step": 364 }, { "epoch": 0.006671906702981337, "grad_norm": 7.846216957233574, "learning_rate": 2.217821782178218e-06, "loss": 17.9985, "step": 365 }, { "epoch": 0.006690185899427861, "grad_norm": 9.3430447459956, "learning_rate": 2.223914699162224e-06, "loss": 18.239, "step": 366 }, { "epoch": 0.006708465095874385, "grad_norm": 8.99258309655721, "learning_rate": 2.2300076161462304e-06, "loss": 18.3144, "step": 367 }, { "epoch": 0.0067267442923209094, "grad_norm": 8.561083662711514, "learning_rate": 2.236100533130236e-06, "loss": 18.0658, "step": 368 }, { "epoch": 0.006745023488767434, "grad_norm": 8.278398615614147, "learning_rate": 2.2421934501142424e-06, "loss": 18.2668, "step": 369 }, { "epoch": 0.006763302685213958, "grad_norm": 9.086882339107945, "learning_rate": 2.2482863670982485e-06, "loss": 18.5646, "step": 370 }, { "epoch": 0.0067815818816604825, "grad_norm": 8.594250049493938, "learning_rate": 2.2543792840822547e-06, "loss": 18.2993, "step": 371 }, { "epoch": 0.006799861078107006, "grad_norm": 7.963099594471555, "learning_rate": 2.2604722010662605e-06, "loss": 17.8993, "step": 372 }, { "epoch": 0.00681814027455353, "grad_norm": 8.712076673097176, "learning_rate": 2.2665651180502667e-06, "loss": 18.2929, "step": 373 }, { "epoch": 0.006836419471000055, "grad_norm": 8.612392279685967, "learning_rate": 2.272658035034273e-06, "loss": 18.394, "step": 374 }, { "epoch": 0.006854698667446579, "grad_norm": 7.550688028009268, "learning_rate": 2.278750952018279e-06, "loss": 17.8218, "step": 375 }, { "epoch": 0.0068729778638931035, "grad_norm": 8.913047191507939, "learning_rate": 2.284843869002285e-06, "loss": 18.3896, "step": 376 }, { "epoch": 0.006891257060339628, "grad_norm": 8.22715602929957, "learning_rate": 2.290936785986291e-06, "loss": 17.933, "step": 377 }, { "epoch": 0.006909536256786151, "grad_norm": 7.2560412530408795, "learning_rate": 2.297029702970297e-06, "loss": 17.7516, "step": 378 }, { "epoch": 0.006927815453232676, "grad_norm": 8.175683719428385, "learning_rate": 2.3031226199543033e-06, "loss": 17.8862, "step": 379 }, { "epoch": 0.0069460946496792, "grad_norm": 9.035425554274873, "learning_rate": 2.309215536938309e-06, "loss": 18.2054, "step": 380 }, { "epoch": 0.0069643738461257244, "grad_norm": 7.955366223753293, "learning_rate": 2.3153084539223157e-06, "loss": 17.9941, "step": 381 }, { "epoch": 0.006982653042572249, "grad_norm": 8.114741230412864, "learning_rate": 2.3214013709063215e-06, "loss": 18.2079, "step": 382 }, { "epoch": 0.007000932239018773, "grad_norm": 9.849098947167999, "learning_rate": 2.3274942878903276e-06, "loss": 18.8764, "step": 383 }, { "epoch": 0.007019211435465297, "grad_norm": 8.816246921630963, "learning_rate": 2.333587204874334e-06, "loss": 18.23, "step": 384 }, { "epoch": 0.007037490631911821, "grad_norm": 8.478770203163386, "learning_rate": 2.33968012185834e-06, "loss": 18.0982, "step": 385 }, { "epoch": 0.007055769828358345, "grad_norm": 8.73342392337039, "learning_rate": 2.345773038842346e-06, "loss": 18.2808, "step": 386 }, { "epoch": 0.00707404902480487, "grad_norm": 8.551873904980154, "learning_rate": 2.351865955826352e-06, "loss": 18.4912, "step": 387 }, { "epoch": 0.007092328221251394, "grad_norm": 7.85190380133635, "learning_rate": 2.357958872810358e-06, "loss": 18.053, "step": 388 }, { "epoch": 0.007110607417697918, "grad_norm": 7.448370159944098, "learning_rate": 2.3640517897943643e-06, "loss": 17.6652, "step": 389 }, { "epoch": 0.007128886614144442, "grad_norm": 8.734311497128099, "learning_rate": 2.3701447067783705e-06, "loss": 18.2995, "step": 390 }, { "epoch": 0.007147165810590966, "grad_norm": 9.316955074087764, "learning_rate": 2.3762376237623762e-06, "loss": 19.1144, "step": 391 }, { "epoch": 0.007165445007037491, "grad_norm": 7.268656975869662, "learning_rate": 2.3823305407463824e-06, "loss": 17.8357, "step": 392 }, { "epoch": 0.007183724203484015, "grad_norm": 8.476905541872776, "learning_rate": 2.3884234577303886e-06, "loss": 17.9871, "step": 393 }, { "epoch": 0.007202003399930539, "grad_norm": 9.06955839869183, "learning_rate": 2.3945163747143944e-06, "loss": 18.0771, "step": 394 }, { "epoch": 0.007220282596377063, "grad_norm": 9.233554569688772, "learning_rate": 2.400609291698401e-06, "loss": 18.3878, "step": 395 }, { "epoch": 0.007238561792823587, "grad_norm": 9.262697112074246, "learning_rate": 2.4067022086824067e-06, "loss": 18.0157, "step": 396 }, { "epoch": 0.007256840989270112, "grad_norm": 8.806520720371429, "learning_rate": 2.412795125666413e-06, "loss": 18.2733, "step": 397 }, { "epoch": 0.007275120185716636, "grad_norm": 9.965781203723397, "learning_rate": 2.418888042650419e-06, "loss": 18.6585, "step": 398 }, { "epoch": 0.00729339938216316, "grad_norm": 7.6022212125416475, "learning_rate": 2.4249809596344253e-06, "loss": 17.5579, "step": 399 }, { "epoch": 0.007311678578609685, "grad_norm": 8.688091981163637, "learning_rate": 2.4310738766184315e-06, "loss": 18.3117, "step": 400 }, { "epoch": 0.007329957775056208, "grad_norm": 10.294080040154196, "learning_rate": 2.4371667936024372e-06, "loss": 18.8181, "step": 401 }, { "epoch": 0.007348236971502733, "grad_norm": 8.829931437470595, "learning_rate": 2.4432597105864434e-06, "loss": 18.1261, "step": 402 }, { "epoch": 0.007366516167949257, "grad_norm": 8.191147431313302, "learning_rate": 2.4493526275704496e-06, "loss": 18.0081, "step": 403 }, { "epoch": 0.007384795364395781, "grad_norm": 7.470300626118907, "learning_rate": 2.4554455445544558e-06, "loss": 17.5224, "step": 404 }, { "epoch": 0.007403074560842306, "grad_norm": 9.007826178671621, "learning_rate": 2.461538461538462e-06, "loss": 18.4673, "step": 405 }, { "epoch": 0.007421353757288829, "grad_norm": 7.5259895720074335, "learning_rate": 2.4676313785224677e-06, "loss": 18.0889, "step": 406 }, { "epoch": 0.0074396329537353536, "grad_norm": 8.50269065120894, "learning_rate": 2.473724295506474e-06, "loss": 18.4111, "step": 407 }, { "epoch": 0.007457912150181878, "grad_norm": 10.621098192229644, "learning_rate": 2.47981721249048e-06, "loss": 18.6, "step": 408 }, { "epoch": 0.007476191346628402, "grad_norm": 7.89152843253615, "learning_rate": 2.4859101294744863e-06, "loss": 17.7234, "step": 409 }, { "epoch": 0.007494470543074927, "grad_norm": 7.698598794778496, "learning_rate": 2.492003046458492e-06, "loss": 17.9908, "step": 410 }, { "epoch": 0.007512749739521451, "grad_norm": 8.585157258990362, "learning_rate": 2.498095963442498e-06, "loss": 18.3271, "step": 411 }, { "epoch": 0.0075310289359679745, "grad_norm": 8.91463773765338, "learning_rate": 2.504188880426505e-06, "loss": 18.1335, "step": 412 }, { "epoch": 0.007549308132414499, "grad_norm": 7.890245109336917, "learning_rate": 2.5102817974105106e-06, "loss": 17.9491, "step": 413 }, { "epoch": 0.007567587328861023, "grad_norm": 9.457275052109484, "learning_rate": 2.5163747143945167e-06, "loss": 18.5356, "step": 414 }, { "epoch": 0.007585866525307548, "grad_norm": 8.431178749153313, "learning_rate": 2.522467631378523e-06, "loss": 18.3072, "step": 415 }, { "epoch": 0.007604145721754072, "grad_norm": 7.7381910924238175, "learning_rate": 2.5285605483625287e-06, "loss": 17.778, "step": 416 }, { "epoch": 0.007622424918200596, "grad_norm": 8.475808326620589, "learning_rate": 2.534653465346535e-06, "loss": 17.9745, "step": 417 }, { "epoch": 0.00764070411464712, "grad_norm": 11.878356849600886, "learning_rate": 2.540746382330541e-06, "loss": 19.1678, "step": 418 }, { "epoch": 0.007658983311093644, "grad_norm": 8.731099408033131, "learning_rate": 2.546839299314547e-06, "loss": 18.3064, "step": 419 }, { "epoch": 0.0076772625075401685, "grad_norm": 9.373694875302448, "learning_rate": 2.552932216298553e-06, "loss": 18.4102, "step": 420 }, { "epoch": 0.007695541703986693, "grad_norm": 9.479865504833226, "learning_rate": 2.5590251332825596e-06, "loss": 18.679, "step": 421 }, { "epoch": 0.007713820900433217, "grad_norm": 9.656544881683654, "learning_rate": 2.565118050266565e-06, "loss": 18.8041, "step": 422 }, { "epoch": 0.007732100096879741, "grad_norm": 8.426811539874398, "learning_rate": 2.5712109672505715e-06, "loss": 18.1999, "step": 423 }, { "epoch": 0.007750379293326265, "grad_norm": 7.631132413151855, "learning_rate": 2.5773038842345777e-06, "loss": 17.8451, "step": 424 }, { "epoch": 0.0077686584897727895, "grad_norm": 9.967810100187242, "learning_rate": 2.5833968012185835e-06, "loss": 18.3001, "step": 425 }, { "epoch": 0.007786937686219314, "grad_norm": 10.34311365776934, "learning_rate": 2.5894897182025897e-06, "loss": 18.9942, "step": 426 }, { "epoch": 0.007805216882665838, "grad_norm": 8.436342406167185, "learning_rate": 2.595582635186596e-06, "loss": 18.5091, "step": 427 }, { "epoch": 0.007823496079112362, "grad_norm": 8.084051116156678, "learning_rate": 2.6016755521706016e-06, "loss": 18.2411, "step": 428 }, { "epoch": 0.007841775275558887, "grad_norm": 9.286977568279523, "learning_rate": 2.6077684691546078e-06, "loss": 18.3457, "step": 429 }, { "epoch": 0.00786005447200541, "grad_norm": 7.550128803367409, "learning_rate": 2.6138613861386144e-06, "loss": 17.8468, "step": 430 }, { "epoch": 0.007878333668451936, "grad_norm": 9.730459473486697, "learning_rate": 2.6199543031226206e-06, "loss": 18.5781, "step": 431 }, { "epoch": 0.00789661286489846, "grad_norm": 9.466978400057728, "learning_rate": 2.6260472201066263e-06, "loss": 18.767, "step": 432 }, { "epoch": 0.007914892061344983, "grad_norm": 8.812812776275303, "learning_rate": 2.6321401370906325e-06, "loss": 18.279, "step": 433 }, { "epoch": 0.007933171257791508, "grad_norm": 9.176005654446904, "learning_rate": 2.6382330540746387e-06, "loss": 18.8576, "step": 434 }, { "epoch": 0.007951450454238031, "grad_norm": 8.825127776543228, "learning_rate": 2.6443259710586444e-06, "loss": 18.334, "step": 435 }, { "epoch": 0.007969729650684557, "grad_norm": 8.78079113285075, "learning_rate": 2.6504188880426506e-06, "loss": 17.8408, "step": 436 }, { "epoch": 0.00798800884713108, "grad_norm": 8.891443387231634, "learning_rate": 2.656511805026657e-06, "loss": 18.2782, "step": 437 }, { "epoch": 0.008006288043577604, "grad_norm": 8.64136165084598, "learning_rate": 2.6626047220106626e-06, "loss": 18.2856, "step": 438 }, { "epoch": 0.008024567240024129, "grad_norm": 11.422630324831495, "learning_rate": 2.6686976389946687e-06, "loss": 19.0644, "step": 439 }, { "epoch": 0.008042846436470652, "grad_norm": 8.816431365246043, "learning_rate": 2.6747905559786754e-06, "loss": 18.2401, "step": 440 }, { "epoch": 0.008061125632917178, "grad_norm": 8.278936924735184, "learning_rate": 2.680883472962681e-06, "loss": 17.7142, "step": 441 }, { "epoch": 0.008079404829363701, "grad_norm": 9.882950868408622, "learning_rate": 2.6869763899466873e-06, "loss": 18.6747, "step": 442 }, { "epoch": 0.008097684025810225, "grad_norm": 8.455266639438943, "learning_rate": 2.6930693069306935e-06, "loss": 18.5501, "step": 443 }, { "epoch": 0.00811596322225675, "grad_norm": 10.369712253033393, "learning_rate": 2.6991622239146992e-06, "loss": 18.9826, "step": 444 }, { "epoch": 0.008134242418703273, "grad_norm": 7.588267951842359, "learning_rate": 2.7052551408987054e-06, "loss": 17.7885, "step": 445 }, { "epoch": 0.008152521615149799, "grad_norm": 7.933435039710536, "learning_rate": 2.7113480578827116e-06, "loss": 17.9875, "step": 446 }, { "epoch": 0.008170800811596322, "grad_norm": 9.844989948105257, "learning_rate": 2.7174409748667174e-06, "loss": 18.874, "step": 447 }, { "epoch": 0.008189080008042847, "grad_norm": 7.816887348231905, "learning_rate": 2.7235338918507235e-06, "loss": 17.8547, "step": 448 }, { "epoch": 0.00820735920448937, "grad_norm": 8.319887922416477, "learning_rate": 2.72962680883473e-06, "loss": 18.1645, "step": 449 }, { "epoch": 0.008225638400935894, "grad_norm": 8.794004300642134, "learning_rate": 2.7357197258187355e-06, "loss": 18.6253, "step": 450 }, { "epoch": 0.00824391759738242, "grad_norm": 8.052292213418909, "learning_rate": 2.741812642802742e-06, "loss": 17.8093, "step": 451 }, { "epoch": 0.008262196793828943, "grad_norm": 9.269781357392727, "learning_rate": 2.7479055597867483e-06, "loss": 18.4396, "step": 452 }, { "epoch": 0.008280475990275468, "grad_norm": 8.652844062265205, "learning_rate": 2.7539984767707544e-06, "loss": 18.473, "step": 453 }, { "epoch": 0.008298755186721992, "grad_norm": 7.383249069054072, "learning_rate": 2.76009139375476e-06, "loss": 17.7205, "step": 454 }, { "epoch": 0.008317034383168515, "grad_norm": 8.18718452206247, "learning_rate": 2.7661843107387664e-06, "loss": 17.7939, "step": 455 }, { "epoch": 0.00833531357961504, "grad_norm": 10.176755743735992, "learning_rate": 2.7722772277227726e-06, "loss": 18.5029, "step": 456 }, { "epoch": 0.008353592776061564, "grad_norm": 9.711682630724553, "learning_rate": 2.7783701447067783e-06, "loss": 18.4737, "step": 457 }, { "epoch": 0.00837187197250809, "grad_norm": 9.317443047461476, "learning_rate": 2.784463061690785e-06, "loss": 18.1023, "step": 458 }, { "epoch": 0.008390151168954613, "grad_norm": 8.11083945397948, "learning_rate": 2.790555978674791e-06, "loss": 17.7362, "step": 459 }, { "epoch": 0.008408430365401136, "grad_norm": 7.3596780356905, "learning_rate": 2.796648895658797e-06, "loss": 18.0177, "step": 460 }, { "epoch": 0.008426709561847661, "grad_norm": 9.332385377216356, "learning_rate": 2.802741812642803e-06, "loss": 18.4037, "step": 461 }, { "epoch": 0.008444988758294185, "grad_norm": 8.070677476220393, "learning_rate": 2.8088347296268092e-06, "loss": 18.1327, "step": 462 }, { "epoch": 0.00846326795474071, "grad_norm": 10.043995385236958, "learning_rate": 2.814927646610815e-06, "loss": 18.7866, "step": 463 }, { "epoch": 0.008481547151187234, "grad_norm": 9.612765078676649, "learning_rate": 2.821020563594821e-06, "loss": 18.3719, "step": 464 }, { "epoch": 0.008499826347633759, "grad_norm": 9.096496728607754, "learning_rate": 2.8271134805788274e-06, "loss": 18.3143, "step": 465 }, { "epoch": 0.008518105544080282, "grad_norm": 8.650570824357551, "learning_rate": 2.833206397562833e-06, "loss": 18.2524, "step": 466 }, { "epoch": 0.008536384740526806, "grad_norm": 8.783777135279273, "learning_rate": 2.8392993145468393e-06, "loss": 18.1164, "step": 467 }, { "epoch": 0.008554663936973331, "grad_norm": 9.136119852739785, "learning_rate": 2.845392231530846e-06, "loss": 18.4177, "step": 468 }, { "epoch": 0.008572943133419855, "grad_norm": 6.913422302003662, "learning_rate": 2.8514851485148517e-06, "loss": 17.6912, "step": 469 }, { "epoch": 0.00859122232986638, "grad_norm": 9.128585392774562, "learning_rate": 2.857578065498858e-06, "loss": 18.6487, "step": 470 }, { "epoch": 0.008609501526312903, "grad_norm": 9.215913965517334, "learning_rate": 2.863670982482864e-06, "loss": 18.528, "step": 471 }, { "epoch": 0.008627780722759427, "grad_norm": 7.578818180282947, "learning_rate": 2.86976389946687e-06, "loss": 17.693, "step": 472 }, { "epoch": 0.008646059919205952, "grad_norm": 8.458790231416076, "learning_rate": 2.875856816450876e-06, "loss": 18.4237, "step": 473 }, { "epoch": 0.008664339115652476, "grad_norm": 8.536550233318124, "learning_rate": 2.881949733434882e-06, "loss": 18.6088, "step": 474 }, { "epoch": 0.008682618312099, "grad_norm": 8.39836290974198, "learning_rate": 2.8880426504188883e-06, "loss": 18.0771, "step": 475 }, { "epoch": 0.008700897508545524, "grad_norm": 8.847571763064265, "learning_rate": 2.894135567402894e-06, "loss": 18.2639, "step": 476 }, { "epoch": 0.008719176704992048, "grad_norm": 8.476705894783775, "learning_rate": 2.9002284843869007e-06, "loss": 17.8502, "step": 477 }, { "epoch": 0.008737455901438573, "grad_norm": 8.431699688669129, "learning_rate": 2.906321401370907e-06, "loss": 18.1192, "step": 478 }, { "epoch": 0.008755735097885096, "grad_norm": 10.200575097290223, "learning_rate": 2.9124143183549126e-06, "loss": 18.5815, "step": 479 }, { "epoch": 0.008774014294331622, "grad_norm": 8.227135289405737, "learning_rate": 2.918507235338919e-06, "loss": 18.035, "step": 480 }, { "epoch": 0.008792293490778145, "grad_norm": 9.188182347644483, "learning_rate": 2.924600152322925e-06, "loss": 18.5853, "step": 481 }, { "epoch": 0.00881057268722467, "grad_norm": 8.748967205379937, "learning_rate": 2.9306930693069308e-06, "loss": 18.3194, "step": 482 }, { "epoch": 0.008828851883671194, "grad_norm": 8.974929578637878, "learning_rate": 2.936785986290937e-06, "loss": 18.4196, "step": 483 }, { "epoch": 0.008847131080117717, "grad_norm": 8.726187193187672, "learning_rate": 2.942878903274943e-06, "loss": 18.5871, "step": 484 }, { "epoch": 0.008865410276564243, "grad_norm": 8.242732808561213, "learning_rate": 2.948971820258949e-06, "loss": 18.1831, "step": 485 }, { "epoch": 0.008883689473010766, "grad_norm": 8.258898171491985, "learning_rate": 2.9550647372429555e-06, "loss": 18.0755, "step": 486 }, { "epoch": 0.008901968669457291, "grad_norm": 8.393333887873215, "learning_rate": 2.9611576542269617e-06, "loss": 18.3118, "step": 487 }, { "epoch": 0.008920247865903815, "grad_norm": 8.683881824599178, "learning_rate": 2.9672505712109674e-06, "loss": 18.2373, "step": 488 }, { "epoch": 0.008938527062350338, "grad_norm": 10.074471771430872, "learning_rate": 2.9733434881949736e-06, "loss": 18.8629, "step": 489 }, { "epoch": 0.008956806258796864, "grad_norm": 9.057481759135253, "learning_rate": 2.97943640517898e-06, "loss": 18.1558, "step": 490 }, { "epoch": 0.008975085455243387, "grad_norm": 8.608543843136571, "learning_rate": 2.9855293221629856e-06, "loss": 18.1241, "step": 491 }, { "epoch": 0.008993364651689912, "grad_norm": 8.893306631487548, "learning_rate": 2.9916222391469917e-06, "loss": 18.4825, "step": 492 }, { "epoch": 0.009011643848136436, "grad_norm": 8.26756330038832, "learning_rate": 2.997715156130998e-06, "loss": 18.3376, "step": 493 }, { "epoch": 0.00902992304458296, "grad_norm": 10.071683744373866, "learning_rate": 3.0038080731150045e-06, "loss": 18.6427, "step": 494 }, { "epoch": 0.009048202241029485, "grad_norm": 7.610212877203644, "learning_rate": 3.00990099009901e-06, "loss": 17.6488, "step": 495 }, { "epoch": 0.009066481437476008, "grad_norm": 8.866784008361785, "learning_rate": 3.0159939070830165e-06, "loss": 18.5203, "step": 496 }, { "epoch": 0.009084760633922533, "grad_norm": 8.267221539992438, "learning_rate": 3.0220868240670226e-06, "loss": 18.162, "step": 497 }, { "epoch": 0.009103039830369057, "grad_norm": 9.833899972727973, "learning_rate": 3.0281797410510284e-06, "loss": 18.4136, "step": 498 }, { "epoch": 0.009121319026815582, "grad_norm": 8.598654687238412, "learning_rate": 3.0342726580350346e-06, "loss": 18.1908, "step": 499 }, { "epoch": 0.009139598223262105, "grad_norm": 8.83631830602541, "learning_rate": 3.0403655750190408e-06, "loss": 18.255, "step": 500 }, { "epoch": 0.009157877419708629, "grad_norm": 8.967581726984752, "learning_rate": 3.0464584920030465e-06, "loss": 18.3796, "step": 501 }, { "epoch": 0.009176156616155154, "grad_norm": 8.391512538029074, "learning_rate": 3.0525514089870527e-06, "loss": 18.1908, "step": 502 }, { "epoch": 0.009194435812601678, "grad_norm": 8.489402237604713, "learning_rate": 3.058644325971059e-06, "loss": 18.0274, "step": 503 }, { "epoch": 0.009212715009048203, "grad_norm": 9.185868381210456, "learning_rate": 3.0647372429550646e-06, "loss": 18.7543, "step": 504 }, { "epoch": 0.009230994205494726, "grad_norm": 8.64799578928608, "learning_rate": 3.0708301599390713e-06, "loss": 18.178, "step": 505 }, { "epoch": 0.00924927340194125, "grad_norm": 8.024152009533692, "learning_rate": 3.0769230769230774e-06, "loss": 17.8823, "step": 506 }, { "epoch": 0.009267552598387775, "grad_norm": 8.913452620302673, "learning_rate": 3.083015993907083e-06, "loss": 18.2871, "step": 507 }, { "epoch": 0.009285831794834299, "grad_norm": 8.739662477117557, "learning_rate": 3.0891089108910894e-06, "loss": 17.8769, "step": 508 }, { "epoch": 0.009304110991280824, "grad_norm": 8.00629871672884, "learning_rate": 3.0952018278750956e-06, "loss": 18.0679, "step": 509 }, { "epoch": 0.009322390187727347, "grad_norm": 9.412734562267573, "learning_rate": 3.1012947448591013e-06, "loss": 18.6921, "step": 510 }, { "epoch": 0.009340669384173871, "grad_norm": 7.921780414150499, "learning_rate": 3.1073876618431075e-06, "loss": 17.9678, "step": 511 }, { "epoch": 0.009358948580620396, "grad_norm": 7.302084134718715, "learning_rate": 3.1134805788271137e-06, "loss": 17.5462, "step": 512 }, { "epoch": 0.00937722777706692, "grad_norm": 10.521947594287234, "learning_rate": 3.1195734958111194e-06, "loss": 19.0029, "step": 513 }, { "epoch": 0.009395506973513445, "grad_norm": 8.848198510870068, "learning_rate": 3.125666412795126e-06, "loss": 18.1732, "step": 514 }, { "epoch": 0.009413786169959968, "grad_norm": 7.693917315803662, "learning_rate": 3.1317593297791322e-06, "loss": 17.8773, "step": 515 }, { "epoch": 0.009432065366406494, "grad_norm": 10.244409066966945, "learning_rate": 3.1378522467631384e-06, "loss": 18.6571, "step": 516 }, { "epoch": 0.009450344562853017, "grad_norm": 7.874769630024627, "learning_rate": 3.143945163747144e-06, "loss": 17.8227, "step": 517 }, { "epoch": 0.00946862375929954, "grad_norm": 7.241900829546251, "learning_rate": 3.1500380807311503e-06, "loss": 17.5432, "step": 518 }, { "epoch": 0.009486902955746066, "grad_norm": 8.907781822473183, "learning_rate": 3.1561309977151565e-06, "loss": 18.0927, "step": 519 }, { "epoch": 0.00950518215219259, "grad_norm": 8.46348947828814, "learning_rate": 3.1622239146991623e-06, "loss": 18.3927, "step": 520 }, { "epoch": 0.009523461348639115, "grad_norm": 9.445588967967891, "learning_rate": 3.1683168316831685e-06, "loss": 18.5475, "step": 521 }, { "epoch": 0.009541740545085638, "grad_norm": 8.61062653825342, "learning_rate": 3.174409748667175e-06, "loss": 18.1747, "step": 522 }, { "epoch": 0.009560019741532162, "grad_norm": 8.633999487272066, "learning_rate": 3.1805026656511804e-06, "loss": 18.1129, "step": 523 }, { "epoch": 0.009578298937978687, "grad_norm": 8.65307697819484, "learning_rate": 3.186595582635187e-06, "loss": 18.0431, "step": 524 }, { "epoch": 0.00959657813442521, "grad_norm": 9.227500699335675, "learning_rate": 3.192688499619193e-06, "loss": 18.4762, "step": 525 }, { "epoch": 0.009614857330871735, "grad_norm": 8.172972022293687, "learning_rate": 3.198781416603199e-06, "loss": 18.3787, "step": 526 }, { "epoch": 0.009633136527318259, "grad_norm": 7.785109284311084, "learning_rate": 3.204874333587205e-06, "loss": 17.7882, "step": 527 }, { "epoch": 0.009651415723764782, "grad_norm": 9.420314791564312, "learning_rate": 3.2109672505712113e-06, "loss": 18.5747, "step": 528 }, { "epoch": 0.009669694920211308, "grad_norm": 9.244874151341696, "learning_rate": 3.217060167555217e-06, "loss": 18.0547, "step": 529 }, { "epoch": 0.009687974116657831, "grad_norm": 8.311126105224647, "learning_rate": 3.2231530845392233e-06, "loss": 18.0579, "step": 530 }, { "epoch": 0.009706253313104356, "grad_norm": 8.997860459062794, "learning_rate": 3.2292460015232294e-06, "loss": 18.2353, "step": 531 }, { "epoch": 0.00972453250955088, "grad_norm": 8.59961107594842, "learning_rate": 3.235338918507235e-06, "loss": 18.2798, "step": 532 }, { "epoch": 0.009742811705997405, "grad_norm": 9.227733614804333, "learning_rate": 3.241431835491242e-06, "loss": 18.3154, "step": 533 }, { "epoch": 0.009761090902443929, "grad_norm": 8.635515736231104, "learning_rate": 3.247524752475248e-06, "loss": 18.5925, "step": 534 }, { "epoch": 0.009779370098890452, "grad_norm": 8.32479719423018, "learning_rate": 3.2536176694592537e-06, "loss": 17.6423, "step": 535 }, { "epoch": 0.009797649295336977, "grad_norm": 8.790696290335909, "learning_rate": 3.25971058644326e-06, "loss": 18.4782, "step": 536 }, { "epoch": 0.009815928491783501, "grad_norm": 9.479560069100044, "learning_rate": 3.265803503427266e-06, "loss": 18.1152, "step": 537 }, { "epoch": 0.009834207688230026, "grad_norm": 8.192728144832243, "learning_rate": 3.2718964204112723e-06, "loss": 18.0095, "step": 538 }, { "epoch": 0.00985248688467655, "grad_norm": 7.876454527850719, "learning_rate": 3.277989337395278e-06, "loss": 17.8086, "step": 539 }, { "epoch": 0.009870766081123073, "grad_norm": 8.067141115388166, "learning_rate": 3.2840822543792842e-06, "loss": 18.0498, "step": 540 }, { "epoch": 0.009889045277569598, "grad_norm": 9.316743951006133, "learning_rate": 3.290175171363291e-06, "loss": 18.3959, "step": 541 }, { "epoch": 0.009907324474016122, "grad_norm": 8.47902828845691, "learning_rate": 3.2962680883472966e-06, "loss": 18.0238, "step": 542 }, { "epoch": 0.009925603670462647, "grad_norm": 8.856677568217423, "learning_rate": 3.3023610053313028e-06, "loss": 18.2624, "step": 543 }, { "epoch": 0.00994388286690917, "grad_norm": 8.443474590903095, "learning_rate": 3.308453922315309e-06, "loss": 18.1976, "step": 544 }, { "epoch": 0.009962162063355694, "grad_norm": 9.36612442369136, "learning_rate": 3.3145468392993147e-06, "loss": 18.8425, "step": 545 }, { "epoch": 0.00998044125980222, "grad_norm": 7.064347581584799, "learning_rate": 3.320639756283321e-06, "loss": 17.4856, "step": 546 }, { "epoch": 0.009998720456248743, "grad_norm": 8.9669000570803, "learning_rate": 3.326732673267327e-06, "loss": 17.9647, "step": 547 }, { "epoch": 0.010016999652695268, "grad_norm": 8.66512459966387, "learning_rate": 3.332825590251333e-06, "loss": 18.1868, "step": 548 }, { "epoch": 0.010035278849141792, "grad_norm": 8.010190288017151, "learning_rate": 3.338918507235339e-06, "loss": 18.1856, "step": 549 }, { "epoch": 0.010053558045588317, "grad_norm": 7.387120482691083, "learning_rate": 3.3450114242193456e-06, "loss": 17.6946, "step": 550 }, { "epoch": 0.01007183724203484, "grad_norm": 8.32475615889767, "learning_rate": 3.351104341203351e-06, "loss": 17.911, "step": 551 }, { "epoch": 0.010090116438481364, "grad_norm": 9.066205394885595, "learning_rate": 3.3571972581873576e-06, "loss": 18.9419, "step": 552 }, { "epoch": 0.010108395634927889, "grad_norm": 7.968193698210883, "learning_rate": 3.3632901751713638e-06, "loss": 18.0055, "step": 553 }, { "epoch": 0.010126674831374412, "grad_norm": 8.571786943624106, "learning_rate": 3.3693830921553695e-06, "loss": 18.0664, "step": 554 }, { "epoch": 0.010144954027820938, "grad_norm": 7.699019935806702, "learning_rate": 3.3754760091393757e-06, "loss": 18.0581, "step": 555 }, { "epoch": 0.010163233224267461, "grad_norm": 8.067866766547853, "learning_rate": 3.381568926123382e-06, "loss": 18.2582, "step": 556 }, { "epoch": 0.010181512420713985, "grad_norm": 9.433101264824572, "learning_rate": 3.387661843107388e-06, "loss": 18.5321, "step": 557 }, { "epoch": 0.01019979161716051, "grad_norm": 8.540100236577365, "learning_rate": 3.393754760091394e-06, "loss": 18.1843, "step": 558 }, { "epoch": 0.010218070813607033, "grad_norm": 9.476723770747716, "learning_rate": 3.3998476770754e-06, "loss": 18.5148, "step": 559 }, { "epoch": 0.010236350010053559, "grad_norm": 7.916222040531197, "learning_rate": 3.4059405940594066e-06, "loss": 17.8445, "step": 560 }, { "epoch": 0.010254629206500082, "grad_norm": 8.660451835322666, "learning_rate": 3.4120335110434124e-06, "loss": 18.2338, "step": 561 }, { "epoch": 0.010272908402946606, "grad_norm": 8.200092029419919, "learning_rate": 3.4181264280274185e-06, "loss": 17.8906, "step": 562 }, { "epoch": 0.01029118759939313, "grad_norm": 7.665208685321601, "learning_rate": 3.4242193450114247e-06, "loss": 18.0897, "step": 563 }, { "epoch": 0.010309466795839654, "grad_norm": 8.377511704569045, "learning_rate": 3.4303122619954305e-06, "loss": 18.3939, "step": 564 }, { "epoch": 0.01032774599228618, "grad_norm": 8.14051934165229, "learning_rate": 3.4364051789794367e-06, "loss": 18.0354, "step": 565 }, { "epoch": 0.010346025188732703, "grad_norm": 8.893467288989342, "learning_rate": 3.442498095963443e-06, "loss": 18.9523, "step": 566 }, { "epoch": 0.010364304385179228, "grad_norm": 13.06411652942076, "learning_rate": 3.4485910129474486e-06, "loss": 18.6884, "step": 567 }, { "epoch": 0.010382583581625752, "grad_norm": 8.159390916538014, "learning_rate": 3.454683929931455e-06, "loss": 17.9738, "step": 568 }, { "epoch": 0.010400862778072275, "grad_norm": 8.901397762873613, "learning_rate": 3.4607768469154614e-06, "loss": 18.5212, "step": 569 }, { "epoch": 0.0104191419745188, "grad_norm": 8.263202136880551, "learning_rate": 3.466869763899467e-06, "loss": 18.1432, "step": 570 }, { "epoch": 0.010437421170965324, "grad_norm": 8.642791941923635, "learning_rate": 3.4729626808834733e-06, "loss": 18.1083, "step": 571 }, { "epoch": 0.01045570036741185, "grad_norm": 9.079489731525967, "learning_rate": 3.4790555978674795e-06, "loss": 18.4298, "step": 572 }, { "epoch": 0.010473979563858373, "grad_norm": 8.28360939638146, "learning_rate": 3.4851485148514853e-06, "loss": 17.9617, "step": 573 }, { "epoch": 0.010492258760304896, "grad_norm": 8.334940108308801, "learning_rate": 3.4912414318354915e-06, "loss": 18.3559, "step": 574 }, { "epoch": 0.010510537956751421, "grad_norm": 7.857994330225646, "learning_rate": 3.4973343488194976e-06, "loss": 18.1056, "step": 575 }, { "epoch": 0.010528817153197945, "grad_norm": 8.330149196467916, "learning_rate": 3.5034272658035034e-06, "loss": 18.0329, "step": 576 }, { "epoch": 0.01054709634964447, "grad_norm": 8.661868500547321, "learning_rate": 3.5095201827875096e-06, "loss": 18.4591, "step": 577 }, { "epoch": 0.010565375546090994, "grad_norm": 8.163560354563982, "learning_rate": 3.515613099771516e-06, "loss": 18.0669, "step": 578 }, { "epoch": 0.010583654742537517, "grad_norm": 8.242693534936947, "learning_rate": 3.5217060167555224e-06, "loss": 18.1963, "step": 579 }, { "epoch": 0.010601933938984042, "grad_norm": 9.176823530741894, "learning_rate": 3.527798933739528e-06, "loss": 18.4939, "step": 580 }, { "epoch": 0.010620213135430566, "grad_norm": 8.788621060069486, "learning_rate": 3.5338918507235343e-06, "loss": 18.2809, "step": 581 }, { "epoch": 0.010638492331877091, "grad_norm": 8.090462283188902, "learning_rate": 3.5399847677075405e-06, "loss": 18.0944, "step": 582 }, { "epoch": 0.010656771528323615, "grad_norm": 8.596137634283256, "learning_rate": 3.5460776846915462e-06, "loss": 18.278, "step": 583 }, { "epoch": 0.01067505072477014, "grad_norm": 8.247695085227974, "learning_rate": 3.5521706016755524e-06, "loss": 18.1307, "step": 584 }, { "epoch": 0.010693329921216663, "grad_norm": 8.127347406336867, "learning_rate": 3.5582635186595586e-06, "loss": 18.1265, "step": 585 }, { "epoch": 0.010711609117663187, "grad_norm": 8.371783660452177, "learning_rate": 3.5643564356435644e-06, "loss": 18.1125, "step": 586 }, { "epoch": 0.010729888314109712, "grad_norm": 8.99520294894812, "learning_rate": 3.5704493526275706e-06, "loss": 18.2121, "step": 587 }, { "epoch": 0.010748167510556236, "grad_norm": 10.608664862244305, "learning_rate": 3.576542269611577e-06, "loss": 18.846, "step": 588 }, { "epoch": 0.01076644670700276, "grad_norm": 8.968867264754163, "learning_rate": 3.582635186595583e-06, "loss": 18.4245, "step": 589 }, { "epoch": 0.010784725903449284, "grad_norm": 7.8113289740846925, "learning_rate": 3.588728103579589e-06, "loss": 17.9216, "step": 590 }, { "epoch": 0.010803005099895808, "grad_norm": 8.547150312484515, "learning_rate": 3.5948210205635953e-06, "loss": 18.164, "step": 591 }, { "epoch": 0.010821284296342333, "grad_norm": 8.034774327385934, "learning_rate": 3.600913937547601e-06, "loss": 18.1415, "step": 592 }, { "epoch": 0.010839563492788857, "grad_norm": 8.482926628353267, "learning_rate": 3.6070068545316072e-06, "loss": 17.8454, "step": 593 }, { "epoch": 0.010857842689235382, "grad_norm": 7.159187145580739, "learning_rate": 3.6130997715156134e-06, "loss": 17.5982, "step": 594 }, { "epoch": 0.010876121885681905, "grad_norm": 6.956524183644067, "learning_rate": 3.619192688499619e-06, "loss": 17.7585, "step": 595 }, { "epoch": 0.010894401082128429, "grad_norm": 8.289400186115694, "learning_rate": 3.6252856054836253e-06, "loss": 18.2065, "step": 596 }, { "epoch": 0.010912680278574954, "grad_norm": 8.926577429484452, "learning_rate": 3.631378522467632e-06, "loss": 18.0875, "step": 597 }, { "epoch": 0.010930959475021478, "grad_norm": 9.22683538460072, "learning_rate": 3.6374714394516377e-06, "loss": 18.6423, "step": 598 }, { "epoch": 0.010949238671468003, "grad_norm": 8.270948741163098, "learning_rate": 3.643564356435644e-06, "loss": 17.7624, "step": 599 }, { "epoch": 0.010967517867914526, "grad_norm": 10.822683093286273, "learning_rate": 3.64965727341965e-06, "loss": 19.2622, "step": 600 }, { "epoch": 0.010985797064361051, "grad_norm": 8.958097787886413, "learning_rate": 3.6557501904036563e-06, "loss": 18.1253, "step": 601 }, { "epoch": 0.011004076260807575, "grad_norm": 7.719258571985554, "learning_rate": 3.661843107387662e-06, "loss": 17.9526, "step": 602 }, { "epoch": 0.011022355457254098, "grad_norm": 8.450593543919977, "learning_rate": 3.667936024371668e-06, "loss": 18.0805, "step": 603 }, { "epoch": 0.011040634653700624, "grad_norm": 8.90466662740431, "learning_rate": 3.6740289413556744e-06, "loss": 18.538, "step": 604 }, { "epoch": 0.011058913850147147, "grad_norm": 8.414490229419439, "learning_rate": 3.68012185833968e-06, "loss": 18.5623, "step": 605 }, { "epoch": 0.011077193046593672, "grad_norm": 7.692666688497806, "learning_rate": 3.6862147753236867e-06, "loss": 18.0062, "step": 606 }, { "epoch": 0.011095472243040196, "grad_norm": 8.293027250646789, "learning_rate": 3.692307692307693e-06, "loss": 17.9804, "step": 607 }, { "epoch": 0.01111375143948672, "grad_norm": 8.690172577012346, "learning_rate": 3.6984006092916987e-06, "loss": 18.4833, "step": 608 }, { "epoch": 0.011132030635933245, "grad_norm": 8.655996628896485, "learning_rate": 3.704493526275705e-06, "loss": 17.9411, "step": 609 }, { "epoch": 0.011150309832379768, "grad_norm": 8.91574917849141, "learning_rate": 3.710586443259711e-06, "loss": 18.256, "step": 610 }, { "epoch": 0.011168589028826293, "grad_norm": 7.01065365822396, "learning_rate": 3.716679360243717e-06, "loss": 17.572, "step": 611 }, { "epoch": 0.011186868225272817, "grad_norm": 9.289538437867213, "learning_rate": 3.722772277227723e-06, "loss": 18.6452, "step": 612 }, { "epoch": 0.01120514742171934, "grad_norm": 8.308113412260557, "learning_rate": 3.728865194211729e-06, "loss": 18.0685, "step": 613 }, { "epoch": 0.011223426618165866, "grad_norm": 7.931418707584115, "learning_rate": 3.734958111195735e-06, "loss": 18.0801, "step": 614 }, { "epoch": 0.011241705814612389, "grad_norm": 8.886288471507592, "learning_rate": 3.741051028179741e-06, "loss": 18.1922, "step": 615 }, { "epoch": 0.011259985011058914, "grad_norm": 7.177341956825686, "learning_rate": 3.7471439451637477e-06, "loss": 17.8097, "step": 616 }, { "epoch": 0.011278264207505438, "grad_norm": 8.904115254334988, "learning_rate": 3.7532368621477535e-06, "loss": 18.1632, "step": 617 }, { "epoch": 0.011296543403951963, "grad_norm": 8.775998773629942, "learning_rate": 3.7593297791317597e-06, "loss": 18.4946, "step": 618 }, { "epoch": 0.011314822600398487, "grad_norm": 7.860619604169376, "learning_rate": 3.765422696115766e-06, "loss": 17.8058, "step": 619 }, { "epoch": 0.01133310179684501, "grad_norm": 9.143618681610725, "learning_rate": 3.771515613099772e-06, "loss": 18.5067, "step": 620 }, { "epoch": 0.011351380993291535, "grad_norm": 7.641110873624864, "learning_rate": 3.7776085300837778e-06, "loss": 18.0647, "step": 621 }, { "epoch": 0.011369660189738059, "grad_norm": 9.881622540341965, "learning_rate": 3.783701447067784e-06, "loss": 18.7808, "step": 622 }, { "epoch": 0.011387939386184584, "grad_norm": 9.079147581664522, "learning_rate": 3.7897943640517906e-06, "loss": 18.6017, "step": 623 }, { "epoch": 0.011406218582631108, "grad_norm": 9.919861848991172, "learning_rate": 3.795887281035796e-06, "loss": 18.5653, "step": 624 }, { "epoch": 0.011424497779077631, "grad_norm": 8.772754960101011, "learning_rate": 3.8019801980198025e-06, "loss": 18.4951, "step": 625 }, { "epoch": 0.011442776975524156, "grad_norm": 9.245420728499028, "learning_rate": 3.8080731150038087e-06, "loss": 18.5503, "step": 626 }, { "epoch": 0.01146105617197068, "grad_norm": 7.1149859102439015, "learning_rate": 3.8141660319878144e-06, "loss": 17.7896, "step": 627 }, { "epoch": 0.011479335368417205, "grad_norm": 8.922405789331233, "learning_rate": 3.820258948971821e-06, "loss": 18.1187, "step": 628 }, { "epoch": 0.011497614564863728, "grad_norm": 9.334662108517135, "learning_rate": 3.826351865955827e-06, "loss": 18.4043, "step": 629 }, { "epoch": 0.011515893761310252, "grad_norm": 7.991348921154775, "learning_rate": 3.832444782939833e-06, "loss": 17.9648, "step": 630 }, { "epoch": 0.011534172957756777, "grad_norm": 10.35442384336909, "learning_rate": 3.838537699923839e-06, "loss": 18.8284, "step": 631 }, { "epoch": 0.0115524521542033, "grad_norm": 7.6281038550182485, "learning_rate": 3.844630616907845e-06, "loss": 17.7663, "step": 632 }, { "epoch": 0.011570731350649826, "grad_norm": 9.003867909251422, "learning_rate": 3.850723533891851e-06, "loss": 18.4097, "step": 633 }, { "epoch": 0.01158901054709635, "grad_norm": 9.353285303990113, "learning_rate": 3.856816450875857e-06, "loss": 18.3862, "step": 634 }, { "epoch": 0.011607289743542875, "grad_norm": 7.913205735721484, "learning_rate": 3.8629093678598635e-06, "loss": 17.7015, "step": 635 }, { "epoch": 0.011625568939989398, "grad_norm": 7.850792293732742, "learning_rate": 3.869002284843869e-06, "loss": 17.7488, "step": 636 }, { "epoch": 0.011643848136435922, "grad_norm": 8.565520597603587, "learning_rate": 3.875095201827875e-06, "loss": 18.0983, "step": 637 }, { "epoch": 0.011662127332882447, "grad_norm": 9.956027586916676, "learning_rate": 3.881188118811882e-06, "loss": 18.7461, "step": 638 }, { "epoch": 0.01168040652932897, "grad_norm": 10.054966931273652, "learning_rate": 3.887281035795887e-06, "loss": 18.8291, "step": 639 }, { "epoch": 0.011698685725775496, "grad_norm": 9.251972467038426, "learning_rate": 3.893373952779894e-06, "loss": 18.4111, "step": 640 }, { "epoch": 0.011716964922222019, "grad_norm": 8.358312202277967, "learning_rate": 3.8994668697639e-06, "loss": 18.0409, "step": 641 }, { "epoch": 0.011735244118668543, "grad_norm": 8.422320418791143, "learning_rate": 3.905559786747906e-06, "loss": 18.1452, "step": 642 }, { "epoch": 0.011753523315115068, "grad_norm": 7.671429243467571, "learning_rate": 3.911652703731912e-06, "loss": 18.0494, "step": 643 }, { "epoch": 0.011771802511561591, "grad_norm": 8.50051421134067, "learning_rate": 3.917745620715918e-06, "loss": 18.4015, "step": 644 }, { "epoch": 0.011790081708008117, "grad_norm": 7.760926670958736, "learning_rate": 3.9238385376999244e-06, "loss": 17.921, "step": 645 }, { "epoch": 0.01180836090445464, "grad_norm": 11.513527376413613, "learning_rate": 3.92993145468393e-06, "loss": 18.1754, "step": 646 }, { "epoch": 0.011826640100901164, "grad_norm": 8.028483144791913, "learning_rate": 3.936024371667936e-06, "loss": 18.1008, "step": 647 }, { "epoch": 0.011844919297347689, "grad_norm": 8.918002922760337, "learning_rate": 3.9421172886519426e-06, "loss": 18.5311, "step": 648 }, { "epoch": 0.011863198493794212, "grad_norm": 8.4695793378545, "learning_rate": 3.948210205635948e-06, "loss": 18.1985, "step": 649 }, { "epoch": 0.011881477690240737, "grad_norm": 8.294055827860042, "learning_rate": 3.954303122619955e-06, "loss": 18.1201, "step": 650 }, { "epoch": 0.011899756886687261, "grad_norm": 9.017151892282369, "learning_rate": 3.960396039603961e-06, "loss": 18.646, "step": 651 }, { "epoch": 0.011918036083133786, "grad_norm": 9.311975409243328, "learning_rate": 3.9664889565879665e-06, "loss": 18.3601, "step": 652 }, { "epoch": 0.01193631527958031, "grad_norm": 9.28881290790521, "learning_rate": 3.972581873571973e-06, "loss": 18.3552, "step": 653 }, { "epoch": 0.011954594476026833, "grad_norm": 8.911077660865459, "learning_rate": 3.978674790555979e-06, "loss": 18.6692, "step": 654 }, { "epoch": 0.011972873672473358, "grad_norm": 8.750159759354592, "learning_rate": 3.9847677075399846e-06, "loss": 17.8843, "step": 655 }, { "epoch": 0.011991152868919882, "grad_norm": 7.5063771211308845, "learning_rate": 3.990860624523991e-06, "loss": 17.684, "step": 656 }, { "epoch": 0.012009432065366407, "grad_norm": 9.058283847455503, "learning_rate": 3.996953541507998e-06, "loss": 18.5061, "step": 657 }, { "epoch": 0.01202771126181293, "grad_norm": 10.043624776829995, "learning_rate": 4.0030464584920035e-06, "loss": 18.1349, "step": 658 }, { "epoch": 0.012045990458259454, "grad_norm": 8.24023318173618, "learning_rate": 4.009139375476009e-06, "loss": 18.1363, "step": 659 }, { "epoch": 0.01206426965470598, "grad_norm": 9.524182484965243, "learning_rate": 4.015232292460016e-06, "loss": 18.3919, "step": 660 }, { "epoch": 0.012082548851152503, "grad_norm": 8.654553969481384, "learning_rate": 4.021325209444022e-06, "loss": 17.8421, "step": 661 }, { "epoch": 0.012100828047599028, "grad_norm": 8.413561700429844, "learning_rate": 4.0274181264280274e-06, "loss": 17.94, "step": 662 }, { "epoch": 0.012119107244045552, "grad_norm": 8.42006127552643, "learning_rate": 4.033511043412034e-06, "loss": 18.0531, "step": 663 }, { "epoch": 0.012137386440492075, "grad_norm": 8.256668684070519, "learning_rate": 4.03960396039604e-06, "loss": 17.8619, "step": 664 }, { "epoch": 0.0121556656369386, "grad_norm": 7.924822317596749, "learning_rate": 4.0456968773800455e-06, "loss": 18.0097, "step": 665 }, { "epoch": 0.012173944833385124, "grad_norm": 8.47007375308315, "learning_rate": 4.051789794364052e-06, "loss": 18.0684, "step": 666 }, { "epoch": 0.012192224029831649, "grad_norm": 9.597647355318228, "learning_rate": 4.057882711348059e-06, "loss": 18.5228, "step": 667 }, { "epoch": 0.012210503226278173, "grad_norm": 8.081301569529527, "learning_rate": 4.0639756283320645e-06, "loss": 18.029, "step": 668 }, { "epoch": 0.012228782422724698, "grad_norm": 8.09801481857068, "learning_rate": 4.07006854531607e-06, "loss": 17.9184, "step": 669 }, { "epoch": 0.012247061619171221, "grad_norm": 7.773602705447997, "learning_rate": 4.076161462300077e-06, "loss": 17.8822, "step": 670 }, { "epoch": 0.012265340815617745, "grad_norm": 9.38410411202206, "learning_rate": 4.082254379284083e-06, "loss": 18.2827, "step": 671 }, { "epoch": 0.01228362001206427, "grad_norm": 9.93505517562674, "learning_rate": 4.088347296268088e-06, "loss": 18.6631, "step": 672 }, { "epoch": 0.012301899208510794, "grad_norm": 10.076632024957327, "learning_rate": 4.094440213252095e-06, "loss": 18.8522, "step": 673 }, { "epoch": 0.012320178404957319, "grad_norm": 9.590342148059808, "learning_rate": 4.100533130236101e-06, "loss": 18.4533, "step": 674 }, { "epoch": 0.012338457601403842, "grad_norm": 8.648378057532792, "learning_rate": 4.1066260472201065e-06, "loss": 18.3743, "step": 675 }, { "epoch": 0.012356736797850366, "grad_norm": 9.225214321976841, "learning_rate": 4.112718964204113e-06, "loss": 18.5045, "step": 676 }, { "epoch": 0.012375015994296891, "grad_norm": 9.641471319011561, "learning_rate": 4.118811881188119e-06, "loss": 18.5693, "step": 677 }, { "epoch": 0.012393295190743414, "grad_norm": 8.71795174350895, "learning_rate": 4.1249047981721255e-06, "loss": 18.3874, "step": 678 }, { "epoch": 0.01241157438718994, "grad_norm": 8.631448803632992, "learning_rate": 4.130997715156131e-06, "loss": 18.1557, "step": 679 }, { "epoch": 0.012429853583636463, "grad_norm": 8.282374061730781, "learning_rate": 4.137090632140137e-06, "loss": 18.2244, "step": 680 }, { "epoch": 0.012448132780082987, "grad_norm": 7.363293065006019, "learning_rate": 4.143183549124144e-06, "loss": 17.7314, "step": 681 }, { "epoch": 0.012466411976529512, "grad_norm": 10.797530506423593, "learning_rate": 4.149276466108149e-06, "loss": 18.9832, "step": 682 }, { "epoch": 0.012484691172976035, "grad_norm": 7.879375084137035, "learning_rate": 4.155369383092156e-06, "loss": 18.1933, "step": 683 }, { "epoch": 0.01250297036942256, "grad_norm": 8.588918231392771, "learning_rate": 4.161462300076162e-06, "loss": 18.3093, "step": 684 }, { "epoch": 0.012521249565869084, "grad_norm": 8.136157675867892, "learning_rate": 4.167555217060168e-06, "loss": 17.8996, "step": 685 }, { "epoch": 0.01253952876231561, "grad_norm": 8.244391120869468, "learning_rate": 4.173648134044174e-06, "loss": 18.3063, "step": 686 }, { "epoch": 0.012557807958762133, "grad_norm": 9.254053025083964, "learning_rate": 4.17974105102818e-06, "loss": 18.552, "step": 687 }, { "epoch": 0.012576087155208656, "grad_norm": 8.510600321405521, "learning_rate": 4.1858339680121865e-06, "loss": 18.4846, "step": 688 }, { "epoch": 0.012594366351655182, "grad_norm": 8.01458664798195, "learning_rate": 4.191926884996192e-06, "loss": 17.715, "step": 689 }, { "epoch": 0.012612645548101705, "grad_norm": 8.626646670224346, "learning_rate": 4.198019801980198e-06, "loss": 18.2243, "step": 690 }, { "epoch": 0.01263092474454823, "grad_norm": 8.939771947894256, "learning_rate": 4.204112718964205e-06, "loss": 18.5696, "step": 691 }, { "epoch": 0.012649203940994754, "grad_norm": 8.120519986053191, "learning_rate": 4.21020563594821e-06, "loss": 17.9166, "step": 692 }, { "epoch": 0.012667483137441277, "grad_norm": 8.029242137098644, "learning_rate": 4.216298552932216e-06, "loss": 18.2364, "step": 693 }, { "epoch": 0.012685762333887803, "grad_norm": 8.858387502251698, "learning_rate": 4.222391469916223e-06, "loss": 18.4259, "step": 694 }, { "epoch": 0.012704041530334326, "grad_norm": 7.660860311701185, "learning_rate": 4.228484386900229e-06, "loss": 17.7606, "step": 695 }, { "epoch": 0.012722320726780851, "grad_norm": 9.906135660762699, "learning_rate": 4.234577303884235e-06, "loss": 18.7554, "step": 696 }, { "epoch": 0.012740599923227375, "grad_norm": 9.840851089802282, "learning_rate": 4.240670220868241e-06, "loss": 18.6306, "step": 697 }, { "epoch": 0.012758879119673898, "grad_norm": 8.604357795575616, "learning_rate": 4.2467631378522474e-06, "loss": 18.024, "step": 698 }, { "epoch": 0.012777158316120424, "grad_norm": 9.356382353257045, "learning_rate": 4.252856054836253e-06, "loss": 18.5935, "step": 699 }, { "epoch": 0.012795437512566947, "grad_norm": 9.281401062431087, "learning_rate": 4.258948971820259e-06, "loss": 18.2712, "step": 700 }, { "epoch": 0.012813716709013472, "grad_norm": 9.449169354332904, "learning_rate": 4.2650418888042656e-06, "loss": 18.5111, "step": 701 }, { "epoch": 0.012831995905459996, "grad_norm": 6.581590647391288, "learning_rate": 4.271134805788271e-06, "loss": 17.2692, "step": 702 }, { "epoch": 0.012850275101906521, "grad_norm": 8.941852584792075, "learning_rate": 4.277227722772277e-06, "loss": 18.1318, "step": 703 }, { "epoch": 0.012868554298353044, "grad_norm": 11.52066482368107, "learning_rate": 4.283320639756284e-06, "loss": 17.9786, "step": 704 }, { "epoch": 0.012886833494799568, "grad_norm": 9.50710199009215, "learning_rate": 4.28941355674029e-06, "loss": 18.4536, "step": 705 }, { "epoch": 0.012905112691246093, "grad_norm": 6.858864428444414, "learning_rate": 4.295506473724296e-06, "loss": 17.5341, "step": 706 }, { "epoch": 0.012923391887692617, "grad_norm": 8.694385305192165, "learning_rate": 4.301599390708302e-06, "loss": 18.3906, "step": 707 }, { "epoch": 0.012941671084139142, "grad_norm": 8.10655763097337, "learning_rate": 4.307692307692308e-06, "loss": 18.2554, "step": 708 }, { "epoch": 0.012959950280585665, "grad_norm": 10.10716824262294, "learning_rate": 4.313785224676314e-06, "loss": 18.3628, "step": 709 }, { "epoch": 0.012978229477032189, "grad_norm": 8.193394417599954, "learning_rate": 4.31987814166032e-06, "loss": 17.9166, "step": 710 }, { "epoch": 0.012996508673478714, "grad_norm": 9.12124387859505, "learning_rate": 4.3259710586443265e-06, "loss": 18.2743, "step": 711 }, { "epoch": 0.013014787869925238, "grad_norm": 8.275436544030585, "learning_rate": 4.332063975628332e-06, "loss": 18.3223, "step": 712 }, { "epoch": 0.013033067066371763, "grad_norm": 9.919219202939884, "learning_rate": 4.338156892612339e-06, "loss": 18.7695, "step": 713 }, { "epoch": 0.013051346262818286, "grad_norm": 9.006295734611193, "learning_rate": 4.344249809596345e-06, "loss": 18.4789, "step": 714 }, { "epoch": 0.01306962545926481, "grad_norm": 8.727624480806657, "learning_rate": 4.35034272658035e-06, "loss": 18.3371, "step": 715 }, { "epoch": 0.013087904655711335, "grad_norm": 7.154181830598685, "learning_rate": 4.356435643564357e-06, "loss": 17.5559, "step": 716 }, { "epoch": 0.013106183852157859, "grad_norm": 7.965608283923321, "learning_rate": 4.362528560548363e-06, "loss": 18.0304, "step": 717 }, { "epoch": 0.013124463048604384, "grad_norm": 7.751987635624145, "learning_rate": 4.3686214775323685e-06, "loss": 17.7836, "step": 718 }, { "epoch": 0.013142742245050907, "grad_norm": 9.822875745653093, "learning_rate": 4.374714394516375e-06, "loss": 18.4279, "step": 719 }, { "epoch": 0.013161021441497433, "grad_norm": 11.257898723251781, "learning_rate": 4.380807311500381e-06, "loss": 18.8577, "step": 720 }, { "epoch": 0.013179300637943956, "grad_norm": 7.736540051326125, "learning_rate": 4.386900228484387e-06, "loss": 17.7715, "step": 721 }, { "epoch": 0.01319757983439048, "grad_norm": 7.872605497863902, "learning_rate": 4.392993145468393e-06, "loss": 17.7712, "step": 722 }, { "epoch": 0.013215859030837005, "grad_norm": 9.113611652172722, "learning_rate": 4.3990860624524e-06, "loss": 18.3717, "step": 723 }, { "epoch": 0.013234138227283528, "grad_norm": 8.358889954921496, "learning_rate": 4.405178979436406e-06, "loss": 18.1748, "step": 724 }, { "epoch": 0.013252417423730053, "grad_norm": 7.803959399616063, "learning_rate": 4.411271896420411e-06, "loss": 17.8882, "step": 725 }, { "epoch": 0.013270696620176577, "grad_norm": 10.211516845076726, "learning_rate": 4.417364813404418e-06, "loss": 18.6295, "step": 726 }, { "epoch": 0.0132889758166231, "grad_norm": 8.15584773898762, "learning_rate": 4.423457730388424e-06, "loss": 17.8261, "step": 727 }, { "epoch": 0.013307255013069626, "grad_norm": 8.265193313412619, "learning_rate": 4.4295506473724295e-06, "loss": 18.2289, "step": 728 }, { "epoch": 0.01332553420951615, "grad_norm": 9.83364866575862, "learning_rate": 4.435643564356436e-06, "loss": 18.6857, "step": 729 }, { "epoch": 0.013343813405962674, "grad_norm": 8.859709514341905, "learning_rate": 4.441736481340443e-06, "loss": 18.426, "step": 730 }, { "epoch": 0.013362092602409198, "grad_norm": 6.84547356640548, "learning_rate": 4.447829398324448e-06, "loss": 17.7192, "step": 731 }, { "epoch": 0.013380371798855721, "grad_norm": 9.62516363738731, "learning_rate": 4.453922315308454e-06, "loss": 18.5106, "step": 732 }, { "epoch": 0.013398650995302247, "grad_norm": 9.710791197347461, "learning_rate": 4.460015232292461e-06, "loss": 18.3155, "step": 733 }, { "epoch": 0.01341693019174877, "grad_norm": 7.088619072988261, "learning_rate": 4.466108149276467e-06, "loss": 17.5585, "step": 734 }, { "epoch": 0.013435209388195295, "grad_norm": 8.813905602728473, "learning_rate": 4.472201066260472e-06, "loss": 18.3018, "step": 735 }, { "epoch": 0.013453488584641819, "grad_norm": 7.7363833694697375, "learning_rate": 4.478293983244479e-06, "loss": 17.8962, "step": 736 }, { "epoch": 0.013471767781088344, "grad_norm": 8.71227916566431, "learning_rate": 4.484386900228485e-06, "loss": 18.3363, "step": 737 }, { "epoch": 0.013490046977534868, "grad_norm": 7.840887611839472, "learning_rate": 4.4904798172124905e-06, "loss": 18.0087, "step": 738 }, { "epoch": 0.013508326173981391, "grad_norm": 8.133505317881973, "learning_rate": 4.496572734196497e-06, "loss": 18.1477, "step": 739 }, { "epoch": 0.013526605370427916, "grad_norm": 7.991598762492575, "learning_rate": 4.502665651180503e-06, "loss": 18.1431, "step": 740 }, { "epoch": 0.01354488456687444, "grad_norm": 8.610191757930943, "learning_rate": 4.5087585681645095e-06, "loss": 18.3059, "step": 741 }, { "epoch": 0.013563163763320965, "grad_norm": 7.179232227677593, "learning_rate": 4.514851485148515e-06, "loss": 17.4715, "step": 742 }, { "epoch": 0.013581442959767489, "grad_norm": 8.35948954945627, "learning_rate": 4.520944402132521e-06, "loss": 18.2916, "step": 743 }, { "epoch": 0.013599722156214012, "grad_norm": 8.46206693768903, "learning_rate": 4.5270373191165276e-06, "loss": 17.9517, "step": 744 }, { "epoch": 0.013618001352660537, "grad_norm": 10.010037950216763, "learning_rate": 4.533130236100533e-06, "loss": 19.1337, "step": 745 }, { "epoch": 0.01363628054910706, "grad_norm": 10.033585816329204, "learning_rate": 4.53922315308454e-06, "loss": 18.9602, "step": 746 }, { "epoch": 0.013654559745553586, "grad_norm": 11.821488713055107, "learning_rate": 4.545316070068546e-06, "loss": 18.67, "step": 747 }, { "epoch": 0.01367283894200011, "grad_norm": 8.7428412408898, "learning_rate": 4.5514089870525515e-06, "loss": 18.1375, "step": 748 }, { "epoch": 0.013691118138446635, "grad_norm": 8.861577418193692, "learning_rate": 4.557501904036558e-06, "loss": 18.4961, "step": 749 }, { "epoch": 0.013709397334893158, "grad_norm": 8.00181313724855, "learning_rate": 4.563594821020564e-06, "loss": 17.7919, "step": 750 }, { "epoch": 0.013727676531339682, "grad_norm": 8.107133488958844, "learning_rate": 4.56968773800457e-06, "loss": 17.9478, "step": 751 }, { "epoch": 0.013745955727786207, "grad_norm": 8.527100099140132, "learning_rate": 4.575780654988576e-06, "loss": 18.3121, "step": 752 }, { "epoch": 0.01376423492423273, "grad_norm": 9.229603260412576, "learning_rate": 4.581873571972582e-06, "loss": 18.3729, "step": 753 }, { "epoch": 0.013782514120679256, "grad_norm": 7.851968233333457, "learning_rate": 4.5879664889565885e-06, "loss": 17.7429, "step": 754 }, { "epoch": 0.01380079331712578, "grad_norm": 8.500953189255293, "learning_rate": 4.594059405940594e-06, "loss": 18.412, "step": 755 }, { "epoch": 0.013819072513572303, "grad_norm": 8.261006107947916, "learning_rate": 4.6001523229246e-06, "loss": 18.09, "step": 756 }, { "epoch": 0.013837351710018828, "grad_norm": 8.336345469458829, "learning_rate": 4.606245239908607e-06, "loss": 18.2866, "step": 757 }, { "epoch": 0.013855630906465351, "grad_norm": 9.247618791384095, "learning_rate": 4.612338156892613e-06, "loss": 18.4629, "step": 758 }, { "epoch": 0.013873910102911877, "grad_norm": 7.692097753420125, "learning_rate": 4.618431073876618e-06, "loss": 18.0023, "step": 759 }, { "epoch": 0.0138921892993584, "grad_norm": 8.185703810583892, "learning_rate": 4.624523990860625e-06, "loss": 17.9522, "step": 760 }, { "epoch": 0.013910468495804924, "grad_norm": 9.73059709231243, "learning_rate": 4.630616907844631e-06, "loss": 18.8149, "step": 761 }, { "epoch": 0.013928747692251449, "grad_norm": 8.807976975651538, "learning_rate": 4.636709824828637e-06, "loss": 18.2019, "step": 762 }, { "epoch": 0.013947026888697972, "grad_norm": 8.807756941411888, "learning_rate": 4.642802741812643e-06, "loss": 18.582, "step": 763 }, { "epoch": 0.013965306085144498, "grad_norm": 7.734928300785946, "learning_rate": 4.6488956587966495e-06, "loss": 18.0705, "step": 764 }, { "epoch": 0.013983585281591021, "grad_norm": 9.32532334645556, "learning_rate": 4.654988575780655e-06, "loss": 18.4407, "step": 765 }, { "epoch": 0.014001864478037546, "grad_norm": 7.226946936382555, "learning_rate": 4.661081492764661e-06, "loss": 17.8439, "step": 766 }, { "epoch": 0.01402014367448407, "grad_norm": 7.562857315784674, "learning_rate": 4.667174409748668e-06, "loss": 17.507, "step": 767 }, { "epoch": 0.014038422870930593, "grad_norm": 8.533365880373301, "learning_rate": 4.673267326732674e-06, "loss": 18.3735, "step": 768 }, { "epoch": 0.014056702067377119, "grad_norm": 8.474421295874395, "learning_rate": 4.67936024371668e-06, "loss": 18.3085, "step": 769 }, { "epoch": 0.014074981263823642, "grad_norm": 7.930561966593505, "learning_rate": 4.685453160700686e-06, "loss": 17.8629, "step": 770 }, { "epoch": 0.014093260460270167, "grad_norm": 8.364342894711397, "learning_rate": 4.691546077684692e-06, "loss": 17.9165, "step": 771 }, { "epoch": 0.01411153965671669, "grad_norm": 8.644803044948478, "learning_rate": 4.697638994668698e-06, "loss": 18.3113, "step": 772 }, { "epoch": 0.014129818853163214, "grad_norm": 7.0042353905597246, "learning_rate": 4.703731911652704e-06, "loss": 17.5256, "step": 773 }, { "epoch": 0.01414809804960974, "grad_norm": 7.770288505567848, "learning_rate": 4.7098248286367105e-06, "loss": 17.6203, "step": 774 }, { "epoch": 0.014166377246056263, "grad_norm": 8.114082084007649, "learning_rate": 4.715917745620716e-06, "loss": 17.8323, "step": 775 }, { "epoch": 0.014184656442502788, "grad_norm": 8.107987396579302, "learning_rate": 4.722010662604722e-06, "loss": 18.0544, "step": 776 }, { "epoch": 0.014202935638949312, "grad_norm": 9.195067350381473, "learning_rate": 4.728103579588729e-06, "loss": 18.643, "step": 777 }, { "epoch": 0.014221214835395835, "grad_norm": 8.057056508611788, "learning_rate": 4.734196496572734e-06, "loss": 17.9456, "step": 778 }, { "epoch": 0.01423949403184236, "grad_norm": 8.593365449774803, "learning_rate": 4.740289413556741e-06, "loss": 18.4926, "step": 779 }, { "epoch": 0.014257773228288884, "grad_norm": 8.77392213344187, "learning_rate": 4.746382330540747e-06, "loss": 18.4204, "step": 780 }, { "epoch": 0.01427605242473541, "grad_norm": 8.686221466198258, "learning_rate": 4.7524752475247525e-06, "loss": 17.8882, "step": 781 }, { "epoch": 0.014294331621181933, "grad_norm": 7.543089350198794, "learning_rate": 4.758568164508759e-06, "loss": 17.8673, "step": 782 }, { "epoch": 0.014312610817628458, "grad_norm": 7.72067947204325, "learning_rate": 4.764661081492765e-06, "loss": 18.07, "step": 783 }, { "epoch": 0.014330890014074981, "grad_norm": 8.14505655255484, "learning_rate": 4.770753998476771e-06, "loss": 18.2828, "step": 784 }, { "epoch": 0.014349169210521505, "grad_norm": 13.760586711568584, "learning_rate": 4.776846915460777e-06, "loss": 17.8392, "step": 785 }, { "epoch": 0.01436744840696803, "grad_norm": 8.131829236787254, "learning_rate": 4.782939832444784e-06, "loss": 18.3155, "step": 786 }, { "epoch": 0.014385727603414554, "grad_norm": 8.479480779011643, "learning_rate": 4.789032749428789e-06, "loss": 18.1185, "step": 787 }, { "epoch": 0.014404006799861079, "grad_norm": 8.001242895150341, "learning_rate": 4.795125666412795e-06, "loss": 17.9873, "step": 788 }, { "epoch": 0.014422285996307602, "grad_norm": 7.5503732470374985, "learning_rate": 4.801218583396802e-06, "loss": 17.8135, "step": 789 }, { "epoch": 0.014440565192754126, "grad_norm": 9.701365399956416, "learning_rate": 4.807311500380808e-06, "loss": 18.2403, "step": 790 }, { "epoch": 0.014458844389200651, "grad_norm": 9.540336103150405, "learning_rate": 4.8134044173648135e-06, "loss": 18.6102, "step": 791 }, { "epoch": 0.014477123585647175, "grad_norm": 9.501216323824327, "learning_rate": 4.81949733434882e-06, "loss": 18.5699, "step": 792 }, { "epoch": 0.0144954027820937, "grad_norm": 9.226566125082897, "learning_rate": 4.825590251332826e-06, "loss": 18.44, "step": 793 }, { "epoch": 0.014513681978540223, "grad_norm": 8.286721370460413, "learning_rate": 4.831683168316832e-06, "loss": 18.0559, "step": 794 }, { "epoch": 0.014531961174986747, "grad_norm": 9.034488978203306, "learning_rate": 4.837776085300838e-06, "loss": 18.4985, "step": 795 }, { "epoch": 0.014550240371433272, "grad_norm": 8.494852627339926, "learning_rate": 4.843869002284845e-06, "loss": 18.5201, "step": 796 }, { "epoch": 0.014568519567879796, "grad_norm": 7.87204466443327, "learning_rate": 4.8499619192688506e-06, "loss": 17.3842, "step": 797 }, { "epoch": 0.01458679876432632, "grad_norm": 6.817930845924937, "learning_rate": 4.856054836252856e-06, "loss": 17.6466, "step": 798 }, { "epoch": 0.014605077960772844, "grad_norm": 8.6349003420488, "learning_rate": 4.862147753236863e-06, "loss": 18.4442, "step": 799 }, { "epoch": 0.01462335715721937, "grad_norm": 9.243550819410242, "learning_rate": 4.868240670220869e-06, "loss": 18.442, "step": 800 }, { "epoch": 0.014641636353665893, "grad_norm": 9.380566812642247, "learning_rate": 4.8743335872048744e-06, "loss": 18.4791, "step": 801 }, { "epoch": 0.014659915550112416, "grad_norm": 8.10614450572759, "learning_rate": 4.880426504188881e-06, "loss": 18.5559, "step": 802 }, { "epoch": 0.014678194746558942, "grad_norm": 8.647298812820837, "learning_rate": 4.886519421172887e-06, "loss": 18.3998, "step": 803 }, { "epoch": 0.014696473943005465, "grad_norm": 7.246022705060174, "learning_rate": 4.8926123381568926e-06, "loss": 17.6121, "step": 804 }, { "epoch": 0.01471475313945199, "grad_norm": 7.971771019689157, "learning_rate": 4.898705255140899e-06, "loss": 17.9529, "step": 805 }, { "epoch": 0.014733032335898514, "grad_norm": 8.063907379592704, "learning_rate": 4.904798172124905e-06, "loss": 18.2078, "step": 806 }, { "epoch": 0.014751311532345037, "grad_norm": 9.569172498445036, "learning_rate": 4.9108910891089115e-06, "loss": 18.7412, "step": 807 }, { "epoch": 0.014769590728791563, "grad_norm": 8.647993281620685, "learning_rate": 4.916984006092917e-06, "loss": 18.1838, "step": 808 }, { "epoch": 0.014787869925238086, "grad_norm": 8.090149228825357, "learning_rate": 4.923076923076924e-06, "loss": 18.025, "step": 809 }, { "epoch": 0.014806149121684611, "grad_norm": 7.454200251754739, "learning_rate": 4.92916984006093e-06, "loss": 17.6568, "step": 810 }, { "epoch": 0.014824428318131135, "grad_norm": 7.943819236198461, "learning_rate": 4.935262757044935e-06, "loss": 17.8122, "step": 811 }, { "epoch": 0.014842707514577658, "grad_norm": 7.699759087568516, "learning_rate": 4.941355674028942e-06, "loss": 17.7524, "step": 812 }, { "epoch": 0.014860986711024184, "grad_norm": 8.368089050350493, "learning_rate": 4.947448591012948e-06, "loss": 18.1158, "step": 813 }, { "epoch": 0.014879265907470707, "grad_norm": 7.805938306042278, "learning_rate": 4.953541507996954e-06, "loss": 18.0641, "step": 814 }, { "epoch": 0.014897545103917232, "grad_norm": 7.943826091683709, "learning_rate": 4.95963442498096e-06, "loss": 17.8384, "step": 815 }, { "epoch": 0.014915824300363756, "grad_norm": 7.757634044377665, "learning_rate": 4.965727341964966e-06, "loss": 17.9461, "step": 816 }, { "epoch": 0.014934103496810281, "grad_norm": 9.9770776203452, "learning_rate": 4.9718202589489725e-06, "loss": 18.1553, "step": 817 }, { "epoch": 0.014952382693256805, "grad_norm": 8.503532888039603, "learning_rate": 4.977913175932978e-06, "loss": 17.8678, "step": 818 }, { "epoch": 0.014970661889703328, "grad_norm": 8.106962674768528, "learning_rate": 4.984006092916984e-06, "loss": 17.8451, "step": 819 }, { "epoch": 0.014988941086149853, "grad_norm": 8.833702556308314, "learning_rate": 4.990099009900991e-06, "loss": 17.9596, "step": 820 }, { "epoch": 0.015007220282596377, "grad_norm": 7.937916712032483, "learning_rate": 4.996191926884996e-06, "loss": 17.9292, "step": 821 }, { "epoch": 0.015025499479042902, "grad_norm": 7.462546508111826, "learning_rate": 5.002284843869003e-06, "loss": 17.8169, "step": 822 }, { "epoch": 0.015043778675489426, "grad_norm": 9.39886461087385, "learning_rate": 5.00837776085301e-06, "loss": 18.4869, "step": 823 }, { "epoch": 0.015062057871935949, "grad_norm": 8.050725336094882, "learning_rate": 5.0144706778370145e-06, "loss": 17.8235, "step": 824 }, { "epoch": 0.015080337068382474, "grad_norm": 9.269730982093947, "learning_rate": 5.020563594821021e-06, "loss": 18.3107, "step": 825 }, { "epoch": 0.015098616264828998, "grad_norm": 6.197477814797283, "learning_rate": 5.026656511805027e-06, "loss": 17.2913, "step": 826 }, { "epoch": 0.015116895461275523, "grad_norm": 8.072231109131868, "learning_rate": 5.0327494287890335e-06, "loss": 18.1054, "step": 827 }, { "epoch": 0.015135174657722046, "grad_norm": 7.95630608538138, "learning_rate": 5.038842345773039e-06, "loss": 18.0839, "step": 828 }, { "epoch": 0.01515345385416857, "grad_norm": 7.566277226086446, "learning_rate": 5.044935262757046e-06, "loss": 18.2472, "step": 829 }, { "epoch": 0.015171733050615095, "grad_norm": 8.616470788304431, "learning_rate": 5.051028179741051e-06, "loss": 18.4674, "step": 830 }, { "epoch": 0.015190012247061619, "grad_norm": 7.0757863279990865, "learning_rate": 5.057121096725057e-06, "loss": 17.5577, "step": 831 }, { "epoch": 0.015208291443508144, "grad_norm": 8.079457005193985, "learning_rate": 5.063214013709063e-06, "loss": 18.3606, "step": 832 }, { "epoch": 0.015226570639954667, "grad_norm": 8.778226755851602, "learning_rate": 5.06930693069307e-06, "loss": 18.2777, "step": 833 }, { "epoch": 0.015244849836401193, "grad_norm": 8.512983358399698, "learning_rate": 5.075399847677076e-06, "loss": 18.5117, "step": 834 }, { "epoch": 0.015263129032847716, "grad_norm": 8.56006329627807, "learning_rate": 5.081492764661082e-06, "loss": 18.4841, "step": 835 }, { "epoch": 0.01528140822929424, "grad_norm": 9.288836121000003, "learning_rate": 5.087585681645088e-06, "loss": 18.3186, "step": 836 }, { "epoch": 0.015299687425740765, "grad_norm": 12.407714577797238, "learning_rate": 5.093678598629094e-06, "loss": 18.7556, "step": 837 }, { "epoch": 0.015317966622187288, "grad_norm": 8.347581071473323, "learning_rate": 5.0997715156131e-06, "loss": 18.1685, "step": 838 }, { "epoch": 0.015336245818633814, "grad_norm": 8.13250343081008, "learning_rate": 5.105864432597106e-06, "loss": 18.0985, "step": 839 }, { "epoch": 0.015354525015080337, "grad_norm": 8.810476732287764, "learning_rate": 5.1119573495811126e-06, "loss": 18.333, "step": 840 }, { "epoch": 0.01537280421152686, "grad_norm": 7.078936610191827, "learning_rate": 5.118050266565119e-06, "loss": 17.4763, "step": 841 }, { "epoch": 0.015391083407973386, "grad_norm": 7.42314765775181, "learning_rate": 5.124143183549125e-06, "loss": 17.6829, "step": 842 }, { "epoch": 0.01540936260441991, "grad_norm": 7.98124248088551, "learning_rate": 5.13023610053313e-06, "loss": 17.992, "step": 843 }, { "epoch": 0.015427641800866435, "grad_norm": 9.077498128006873, "learning_rate": 5.1363290175171365e-06, "loss": 18.2794, "step": 844 }, { "epoch": 0.015445920997312958, "grad_norm": 8.072782086561258, "learning_rate": 5.142421934501143e-06, "loss": 17.945, "step": 845 }, { "epoch": 0.015464200193759482, "grad_norm": 9.451917620903867, "learning_rate": 5.148514851485149e-06, "loss": 18.9475, "step": 846 }, { "epoch": 0.015482479390206007, "grad_norm": 8.796830074489074, "learning_rate": 5.1546077684691554e-06, "loss": 18.49, "step": 847 }, { "epoch": 0.01550075858665253, "grad_norm": 8.306849648605311, "learning_rate": 5.160700685453162e-06, "loss": 18.0991, "step": 848 }, { "epoch": 0.015519037783099056, "grad_norm": 7.459114632117044, "learning_rate": 5.166793602437167e-06, "loss": 17.8777, "step": 849 }, { "epoch": 0.015537316979545579, "grad_norm": 7.5045201012713045, "learning_rate": 5.172886519421173e-06, "loss": 17.9472, "step": 850 }, { "epoch": 0.015555596175992104, "grad_norm": 8.71227984792957, "learning_rate": 5.178979436405179e-06, "loss": 18.0386, "step": 851 }, { "epoch": 0.015573875372438628, "grad_norm": 7.765463154922769, "learning_rate": 5.185072353389186e-06, "loss": 17.9142, "step": 852 }, { "epoch": 0.015592154568885151, "grad_norm": 10.161510898240463, "learning_rate": 5.191165270373192e-06, "loss": 18.6878, "step": 853 }, { "epoch": 0.015610433765331676, "grad_norm": 9.565597600633472, "learning_rate": 5.197258187357198e-06, "loss": 18.3524, "step": 854 }, { "epoch": 0.0156287129617782, "grad_norm": 8.17644137642925, "learning_rate": 5.203351104341203e-06, "loss": 17.7937, "step": 855 }, { "epoch": 0.015646992158224723, "grad_norm": 9.283708677240496, "learning_rate": 5.20944402132521e-06, "loss": 18.368, "step": 856 }, { "epoch": 0.01566527135467125, "grad_norm": 10.0686745278984, "learning_rate": 5.2155369383092155e-06, "loss": 18.7853, "step": 857 }, { "epoch": 0.015683550551117774, "grad_norm": 7.431681847854302, "learning_rate": 5.221629855293222e-06, "loss": 17.6462, "step": 858 }, { "epoch": 0.015701829747564296, "grad_norm": 8.486462263281691, "learning_rate": 5.227722772277229e-06, "loss": 18.0921, "step": 859 }, { "epoch": 0.01572010894401082, "grad_norm": 7.586138553514978, "learning_rate": 5.2338156892612345e-06, "loss": 17.8335, "step": 860 }, { "epoch": 0.015738388140457346, "grad_norm": 7.120489311236346, "learning_rate": 5.239908606245241e-06, "loss": 17.7785, "step": 861 }, { "epoch": 0.01575666733690387, "grad_norm": 7.800959298571683, "learning_rate": 5.246001523229246e-06, "loss": 17.9972, "step": 862 }, { "epoch": 0.015774946533350393, "grad_norm": 8.62951022292379, "learning_rate": 5.252094440213253e-06, "loss": 18.4499, "step": 863 }, { "epoch": 0.01579322572979692, "grad_norm": 7.9691727795812195, "learning_rate": 5.258187357197258e-06, "loss": 18.3753, "step": 864 }, { "epoch": 0.015811504926243444, "grad_norm": 8.388302723530174, "learning_rate": 5.264280274181265e-06, "loss": 18.4872, "step": 865 }, { "epoch": 0.015829784122689965, "grad_norm": 8.049158898020348, "learning_rate": 5.270373191165271e-06, "loss": 17.7163, "step": 866 }, { "epoch": 0.01584806331913649, "grad_norm": 9.6038408512579, "learning_rate": 5.276466108149277e-06, "loss": 19.0707, "step": 867 }, { "epoch": 0.015866342515583016, "grad_norm": 7.382442367842405, "learning_rate": 5.282559025133282e-06, "loss": 17.6808, "step": 868 }, { "epoch": 0.015884621712029538, "grad_norm": 8.080470041328557, "learning_rate": 5.288651942117289e-06, "loss": 18.38, "step": 869 }, { "epoch": 0.015902900908476063, "grad_norm": 9.767620188020627, "learning_rate": 5.2947448591012955e-06, "loss": 18.9061, "step": 870 }, { "epoch": 0.015921180104922588, "grad_norm": 8.31662946648782, "learning_rate": 5.300837776085301e-06, "loss": 17.8949, "step": 871 }, { "epoch": 0.015939459301369113, "grad_norm": 7.701637780454898, "learning_rate": 5.306930693069308e-06, "loss": 17.6824, "step": 872 }, { "epoch": 0.015957738497815635, "grad_norm": 8.790161934550762, "learning_rate": 5.313023610053314e-06, "loss": 18.6216, "step": 873 }, { "epoch": 0.01597601769426216, "grad_norm": 7.674755782194827, "learning_rate": 5.319116527037319e-06, "loss": 17.5485, "step": 874 }, { "epoch": 0.015994296890708685, "grad_norm": 8.867388068015622, "learning_rate": 5.325209444021325e-06, "loss": 18.4883, "step": 875 }, { "epoch": 0.016012576087155207, "grad_norm": 9.1371586404217, "learning_rate": 5.331302361005332e-06, "loss": 18.6562, "step": 876 }, { "epoch": 0.016030855283601732, "grad_norm": 8.012671192217049, "learning_rate": 5.3373952779893375e-06, "loss": 17.9416, "step": 877 }, { "epoch": 0.016049134480048258, "grad_norm": 9.978771087488296, "learning_rate": 5.343488194973344e-06, "loss": 18.8468, "step": 878 }, { "epoch": 0.016067413676494783, "grad_norm": 9.18322782849521, "learning_rate": 5.349581111957351e-06, "loss": 18.4841, "step": 879 }, { "epoch": 0.016085692872941305, "grad_norm": 10.14432801291603, "learning_rate": 5.355674028941356e-06, "loss": 18.7332, "step": 880 }, { "epoch": 0.01610397206938783, "grad_norm": 7.964452973970781, "learning_rate": 5.361766945925362e-06, "loss": 18.2144, "step": 881 }, { "epoch": 0.016122251265834355, "grad_norm": 7.916189059576979, "learning_rate": 5.367859862909368e-06, "loss": 17.9018, "step": 882 }, { "epoch": 0.016140530462280877, "grad_norm": 8.399208049518332, "learning_rate": 5.373952779893375e-06, "loss": 18.0163, "step": 883 }, { "epoch": 0.016158809658727402, "grad_norm": 9.116458026740009, "learning_rate": 5.38004569687738e-06, "loss": 18.4248, "step": 884 }, { "epoch": 0.016177088855173927, "grad_norm": 8.630937961007142, "learning_rate": 5.386138613861387e-06, "loss": 18.3698, "step": 885 }, { "epoch": 0.01619536805162045, "grad_norm": 9.413585620699246, "learning_rate": 5.3922315308453936e-06, "loss": 18.6116, "step": 886 }, { "epoch": 0.016213647248066974, "grad_norm": 7.6368778975300184, "learning_rate": 5.3983244478293985e-06, "loss": 17.8458, "step": 887 }, { "epoch": 0.0162319264445135, "grad_norm": 7.403055825794283, "learning_rate": 5.404417364813404e-06, "loss": 17.747, "step": 888 }, { "epoch": 0.016250205640960025, "grad_norm": 7.766207662372749, "learning_rate": 5.410510281797411e-06, "loss": 18.1277, "step": 889 }, { "epoch": 0.016268484837406547, "grad_norm": 8.930033722117813, "learning_rate": 5.4166031987814174e-06, "loss": 18.3946, "step": 890 }, { "epoch": 0.016286764033853072, "grad_norm": 8.124914772358064, "learning_rate": 5.422696115765423e-06, "loss": 18.2002, "step": 891 }, { "epoch": 0.016305043230299597, "grad_norm": 9.296893407946603, "learning_rate": 5.42878903274943e-06, "loss": 18.4557, "step": 892 }, { "epoch": 0.01632332242674612, "grad_norm": 8.282641846666893, "learning_rate": 5.434881949733435e-06, "loss": 18.3414, "step": 893 }, { "epoch": 0.016341601623192644, "grad_norm": 7.70249475337819, "learning_rate": 5.440974866717441e-06, "loss": 18.0186, "step": 894 }, { "epoch": 0.01635988081963917, "grad_norm": 8.136410031229282, "learning_rate": 5.447067783701447e-06, "loss": 18.4672, "step": 895 }, { "epoch": 0.016378160016085695, "grad_norm": 9.9700678925168, "learning_rate": 5.453160700685454e-06, "loss": 18.9682, "step": 896 }, { "epoch": 0.016396439212532216, "grad_norm": 7.017176755988746, "learning_rate": 5.45925361766946e-06, "loss": 17.8646, "step": 897 }, { "epoch": 0.01641471840897874, "grad_norm": 9.103585413979374, "learning_rate": 5.465346534653466e-06, "loss": 18.3866, "step": 898 }, { "epoch": 0.016432997605425267, "grad_norm": 8.404999518682738, "learning_rate": 5.471439451637471e-06, "loss": 18.418, "step": 899 }, { "epoch": 0.01645127680187179, "grad_norm": 7.937828994479255, "learning_rate": 5.4775323686214776e-06, "loss": 18.3949, "step": 900 }, { "epoch": 0.016469555998318314, "grad_norm": 8.51573068887959, "learning_rate": 5.483625285605484e-06, "loss": 18.1735, "step": 901 }, { "epoch": 0.01648783519476484, "grad_norm": 8.529255174675132, "learning_rate": 5.48971820258949e-06, "loss": 18.2995, "step": 902 }, { "epoch": 0.01650611439121136, "grad_norm": 7.766746131636291, "learning_rate": 5.4958111195734965e-06, "loss": 17.3718, "step": 903 }, { "epoch": 0.016524393587657886, "grad_norm": 9.746610904544609, "learning_rate": 5.501904036557502e-06, "loss": 18.1831, "step": 904 }, { "epoch": 0.01654267278410441, "grad_norm": 8.476263194389706, "learning_rate": 5.507996953541509e-06, "loss": 17.8804, "step": 905 }, { "epoch": 0.016560951980550936, "grad_norm": 7.748230606111784, "learning_rate": 5.514089870525514e-06, "loss": 17.8924, "step": 906 }, { "epoch": 0.016579231176997458, "grad_norm": 7.432102607667173, "learning_rate": 5.52018278750952e-06, "loss": 18.1648, "step": 907 }, { "epoch": 0.016597510373443983, "grad_norm": 7.84694618480964, "learning_rate": 5.526275704493527e-06, "loss": 17.9913, "step": 908 }, { "epoch": 0.01661578956989051, "grad_norm": 7.404929998940032, "learning_rate": 5.532368621477533e-06, "loss": 17.8132, "step": 909 }, { "epoch": 0.01663406876633703, "grad_norm": 9.29751618630069, "learning_rate": 5.538461538461539e-06, "loss": 18.5345, "step": 910 }, { "epoch": 0.016652347962783556, "grad_norm": 7.417833178895084, "learning_rate": 5.544554455445545e-06, "loss": 17.6646, "step": 911 }, { "epoch": 0.01667062715923008, "grad_norm": 9.70772100620693, "learning_rate": 5.550647372429551e-06, "loss": 18.7744, "step": 912 }, { "epoch": 0.016688906355676606, "grad_norm": 9.072232524086857, "learning_rate": 5.556740289413557e-06, "loss": 18.3423, "step": 913 }, { "epoch": 0.016707185552123128, "grad_norm": 9.07910036742243, "learning_rate": 5.562833206397563e-06, "loss": 18.2481, "step": 914 }, { "epoch": 0.016725464748569653, "grad_norm": 9.809361373009457, "learning_rate": 5.56892612338157e-06, "loss": 18.367, "step": 915 }, { "epoch": 0.01674374394501618, "grad_norm": 10.115596121390496, "learning_rate": 5.575019040365576e-06, "loss": 18.3545, "step": 916 }, { "epoch": 0.0167620231414627, "grad_norm": 6.7708275169011305, "learning_rate": 5.581111957349582e-06, "loss": 17.5047, "step": 917 }, { "epoch": 0.016780302337909225, "grad_norm": 8.138730437640659, "learning_rate": 5.587204874333587e-06, "loss": 18.3553, "step": 918 }, { "epoch": 0.01679858153435575, "grad_norm": 9.609285718167515, "learning_rate": 5.593297791317594e-06, "loss": 18.3202, "step": 919 }, { "epoch": 0.016816860730802272, "grad_norm": 8.384124976958663, "learning_rate": 5.5993907083015995e-06, "loss": 18.2082, "step": 920 }, { "epoch": 0.016835139927248798, "grad_norm": 9.14439730562192, "learning_rate": 5.605483625285606e-06, "loss": 17.9248, "step": 921 }, { "epoch": 0.016853419123695323, "grad_norm": 8.996609322297227, "learning_rate": 5.611576542269612e-06, "loss": 18.3729, "step": 922 }, { "epoch": 0.016871698320141848, "grad_norm": 7.315932421270537, "learning_rate": 5.6176694592536185e-06, "loss": 17.8854, "step": 923 }, { "epoch": 0.01688997751658837, "grad_norm": 8.122807052797084, "learning_rate": 5.623762376237625e-06, "loss": 18.3153, "step": 924 }, { "epoch": 0.016908256713034895, "grad_norm": 7.902339895701034, "learning_rate": 5.62985529322163e-06, "loss": 18.2543, "step": 925 }, { "epoch": 0.01692653590948142, "grad_norm": 9.590556579018623, "learning_rate": 5.635948210205637e-06, "loss": 19.1718, "step": 926 }, { "epoch": 0.016944815105927942, "grad_norm": 8.795053677679899, "learning_rate": 5.642041127189642e-06, "loss": 18.236, "step": 927 }, { "epoch": 0.016963094302374467, "grad_norm": 8.946710023619962, "learning_rate": 5.648134044173649e-06, "loss": 18.3254, "step": 928 }, { "epoch": 0.016981373498820992, "grad_norm": 7.717632589221404, "learning_rate": 5.654226961157655e-06, "loss": 17.883, "step": 929 }, { "epoch": 0.016999652695267518, "grad_norm": 8.818060009422503, "learning_rate": 5.660319878141661e-06, "loss": 18.7201, "step": 930 }, { "epoch": 0.01701793189171404, "grad_norm": 7.428304896972738, "learning_rate": 5.666412795125666e-06, "loss": 17.6157, "step": 931 }, { "epoch": 0.017036211088160565, "grad_norm": 8.63741944219696, "learning_rate": 5.672505712109673e-06, "loss": 18.2596, "step": 932 }, { "epoch": 0.01705449028460709, "grad_norm": 8.776657541381901, "learning_rate": 5.678598629093679e-06, "loss": 18.8281, "step": 933 }, { "epoch": 0.01707276948105361, "grad_norm": 8.657479987630891, "learning_rate": 5.684691546077685e-06, "loss": 17.9952, "step": 934 }, { "epoch": 0.017091048677500137, "grad_norm": 8.165623263296865, "learning_rate": 5.690784463061692e-06, "loss": 18.0903, "step": 935 }, { "epoch": 0.017109327873946662, "grad_norm": 7.746609654316087, "learning_rate": 5.6968773800456976e-06, "loss": 17.9119, "step": 936 }, { "epoch": 0.017127607070393184, "grad_norm": 6.5051760343841245, "learning_rate": 5.702970297029703e-06, "loss": 17.2384, "step": 937 }, { "epoch": 0.01714588626683971, "grad_norm": 10.149156592353432, "learning_rate": 5.709063214013709e-06, "loss": 18.8406, "step": 938 }, { "epoch": 0.017164165463286234, "grad_norm": 7.539662391577778, "learning_rate": 5.715156130997716e-06, "loss": 17.7378, "step": 939 }, { "epoch": 0.01718244465973276, "grad_norm": 7.8752178884870565, "learning_rate": 5.7212490479817215e-06, "loss": 17.9224, "step": 940 }, { "epoch": 0.01720072385617928, "grad_norm": 8.16065607456676, "learning_rate": 5.727341964965728e-06, "loss": 18.0614, "step": 941 }, { "epoch": 0.017219003052625807, "grad_norm": 9.572267813617717, "learning_rate": 5.733434881949735e-06, "loss": 18.6002, "step": 942 }, { "epoch": 0.017237282249072332, "grad_norm": 8.646762787246, "learning_rate": 5.73952779893374e-06, "loss": 17.9923, "step": 943 }, { "epoch": 0.017255561445518854, "grad_norm": 7.53253733262006, "learning_rate": 5.745620715917745e-06, "loss": 17.9094, "step": 944 }, { "epoch": 0.01727384064196538, "grad_norm": 7.52707356259082, "learning_rate": 5.751713632901752e-06, "loss": 17.6118, "step": 945 }, { "epoch": 0.017292119838411904, "grad_norm": 8.480907437056311, "learning_rate": 5.7578065498857585e-06, "loss": 17.9889, "step": 946 }, { "epoch": 0.01731039903485843, "grad_norm": 7.035617828510175, "learning_rate": 5.763899466869764e-06, "loss": 17.6165, "step": 947 }, { "epoch": 0.01732867823130495, "grad_norm": 8.0693542132295, "learning_rate": 5.769992383853771e-06, "loss": 18.1385, "step": 948 }, { "epoch": 0.017346957427751476, "grad_norm": 9.662756503100724, "learning_rate": 5.776085300837777e-06, "loss": 19.1192, "step": 949 }, { "epoch": 0.017365236624198, "grad_norm": 7.757126477800354, "learning_rate": 5.7821782178217824e-06, "loss": 17.9017, "step": 950 }, { "epoch": 0.017383515820644523, "grad_norm": 9.677005352029196, "learning_rate": 5.788271134805788e-06, "loss": 18.4819, "step": 951 }, { "epoch": 0.01740179501709105, "grad_norm": 7.30926952903755, "learning_rate": 5.794364051789795e-06, "loss": 17.8772, "step": 952 }, { "epoch": 0.017420074213537574, "grad_norm": 8.325157678978579, "learning_rate": 5.800456968773801e-06, "loss": 18.1306, "step": 953 }, { "epoch": 0.017438353409984095, "grad_norm": 7.731164288521678, "learning_rate": 5.806549885757807e-06, "loss": 17.9777, "step": 954 }, { "epoch": 0.01745663260643062, "grad_norm": 7.6792569282850485, "learning_rate": 5.812642802741814e-06, "loss": 18.0973, "step": 955 }, { "epoch": 0.017474911802877146, "grad_norm": 8.399265872828295, "learning_rate": 5.818735719725819e-06, "loss": 18.2354, "step": 956 }, { "epoch": 0.01749319099932367, "grad_norm": 8.10916575336011, "learning_rate": 5.824828636709825e-06, "loss": 17.878, "step": 957 }, { "epoch": 0.017511470195770193, "grad_norm": 7.027505730094863, "learning_rate": 5.830921553693831e-06, "loss": 17.8894, "step": 958 }, { "epoch": 0.017529749392216718, "grad_norm": 7.652531931486096, "learning_rate": 5.837014470677838e-06, "loss": 18.224, "step": 959 }, { "epoch": 0.017548028588663243, "grad_norm": 10.327063870382897, "learning_rate": 5.843107387661843e-06, "loss": 17.9355, "step": 960 }, { "epoch": 0.017566307785109765, "grad_norm": 8.613566541365316, "learning_rate": 5.84920030464585e-06, "loss": 18.0343, "step": 961 }, { "epoch": 0.01758458698155629, "grad_norm": 8.57057882780188, "learning_rate": 5.855293221629855e-06, "loss": 18.196, "step": 962 }, { "epoch": 0.017602866178002816, "grad_norm": 7.938972752095507, "learning_rate": 5.8613861386138615e-06, "loss": 17.6136, "step": 963 }, { "epoch": 0.01762114537444934, "grad_norm": 7.478286976202789, "learning_rate": 5.867479055597868e-06, "loss": 17.9303, "step": 964 }, { "epoch": 0.017639424570895863, "grad_norm": 8.883723962903053, "learning_rate": 5.873571972581874e-06, "loss": 18.2692, "step": 965 }, { "epoch": 0.017657703767342388, "grad_norm": 8.651125461568094, "learning_rate": 5.8796648895658805e-06, "loss": 18.1206, "step": 966 }, { "epoch": 0.017675982963788913, "grad_norm": 11.154753319717942, "learning_rate": 5.885757806549886e-06, "loss": 19.2872, "step": 967 }, { "epoch": 0.017694262160235435, "grad_norm": 10.085538888665493, "learning_rate": 5.891850723533893e-06, "loss": 18.393, "step": 968 }, { "epoch": 0.01771254135668196, "grad_norm": 7.843160528454247, "learning_rate": 5.897943640517898e-06, "loss": 18.0221, "step": 969 }, { "epoch": 0.017730820553128485, "grad_norm": 7.979372852978716, "learning_rate": 5.904036557501904e-06, "loss": 17.9246, "step": 970 }, { "epoch": 0.017749099749575007, "grad_norm": 9.453295281409622, "learning_rate": 5.910129474485911e-06, "loss": 18.7617, "step": 971 }, { "epoch": 0.017767378946021532, "grad_norm": 8.043964875887363, "learning_rate": 5.916222391469917e-06, "loss": 17.8481, "step": 972 }, { "epoch": 0.017785658142468058, "grad_norm": 8.844986788228319, "learning_rate": 5.922315308453923e-06, "loss": 18.457, "step": 973 }, { "epoch": 0.017803937338914583, "grad_norm": 9.057668393721022, "learning_rate": 5.928408225437929e-06, "loss": 18.1555, "step": 974 }, { "epoch": 0.017822216535361105, "grad_norm": 8.104869602718992, "learning_rate": 5.934501142421935e-06, "loss": 17.8352, "step": 975 }, { "epoch": 0.01784049573180763, "grad_norm": 7.66250929390229, "learning_rate": 5.940594059405941e-06, "loss": 17.9034, "step": 976 }, { "epoch": 0.017858774928254155, "grad_norm": 9.66899303971445, "learning_rate": 5.946686976389947e-06, "loss": 18.6847, "step": 977 }, { "epoch": 0.017877054124700677, "grad_norm": 8.201507650990836, "learning_rate": 5.952779893373953e-06, "loss": 18.3439, "step": 978 }, { "epoch": 0.017895333321147202, "grad_norm": 7.123313056914936, "learning_rate": 5.95887281035796e-06, "loss": 17.396, "step": 979 }, { "epoch": 0.017913612517593727, "grad_norm": 8.536481554834298, "learning_rate": 5.964965727341966e-06, "loss": 18.5038, "step": 980 }, { "epoch": 0.017931891714040252, "grad_norm": 6.346971469399508, "learning_rate": 5.971058644325971e-06, "loss": 17.1418, "step": 981 }, { "epoch": 0.017950170910486774, "grad_norm": 7.619923063560709, "learning_rate": 5.977151561309978e-06, "loss": 17.7694, "step": 982 }, { "epoch": 0.0179684501069333, "grad_norm": 7.432489736143195, "learning_rate": 5.9832444782939835e-06, "loss": 17.6501, "step": 983 }, { "epoch": 0.017986729303379825, "grad_norm": 8.525453283530835, "learning_rate": 5.98933739527799e-06, "loss": 18.0, "step": 984 }, { "epoch": 0.018005008499826346, "grad_norm": 8.319849636997306, "learning_rate": 5.995430312261996e-06, "loss": 17.8633, "step": 985 }, { "epoch": 0.01802328769627287, "grad_norm": 9.110643654811224, "learning_rate": 6.0015232292460024e-06, "loss": 18.3716, "step": 986 }, { "epoch": 0.018041566892719397, "grad_norm": 7.473648054911621, "learning_rate": 6.007616146230009e-06, "loss": 17.6931, "step": 987 }, { "epoch": 0.01805984608916592, "grad_norm": 8.189401317764087, "learning_rate": 6.013709063214014e-06, "loss": 17.9509, "step": 988 }, { "epoch": 0.018078125285612444, "grad_norm": 8.063947143779858, "learning_rate": 6.01980198019802e-06, "loss": 18.3433, "step": 989 }, { "epoch": 0.01809640448205897, "grad_norm": 7.397779832323864, "learning_rate": 6.025894897182026e-06, "loss": 17.6071, "step": 990 }, { "epoch": 0.018114683678505494, "grad_norm": 7.525569710600015, "learning_rate": 6.031987814166033e-06, "loss": 17.718, "step": 991 }, { "epoch": 0.018132962874952016, "grad_norm": 8.473776849496664, "learning_rate": 6.038080731150039e-06, "loss": 18.1782, "step": 992 }, { "epoch": 0.01815124207139854, "grad_norm": 8.206811645257254, "learning_rate": 6.044173648134045e-06, "loss": 17.9923, "step": 993 }, { "epoch": 0.018169521267845067, "grad_norm": 7.4978382746034296, "learning_rate": 6.05026656511805e-06, "loss": 18.3228, "step": 994 }, { "epoch": 0.01818780046429159, "grad_norm": 8.273357499146584, "learning_rate": 6.056359482102057e-06, "loss": 18.4123, "step": 995 }, { "epoch": 0.018206079660738114, "grad_norm": 6.892464190887397, "learning_rate": 6.0624523990860626e-06, "loss": 17.5935, "step": 996 }, { "epoch": 0.01822435885718464, "grad_norm": 7.848376155985841, "learning_rate": 6.068545316070069e-06, "loss": 17.9181, "step": 997 }, { "epoch": 0.018242638053631164, "grad_norm": 8.05134430330745, "learning_rate": 6.074638233054076e-06, "loss": 18.1096, "step": 998 }, { "epoch": 0.018260917250077686, "grad_norm": 8.255534490186607, "learning_rate": 6.0807311500380815e-06, "loss": 17.9048, "step": 999 }, { "epoch": 0.01827919644652421, "grad_norm": 7.252054367954556, "learning_rate": 6.0868240670220864e-06, "loss": 17.9062, "step": 1000 }, { "epoch": 0.018297475642970736, "grad_norm": 9.036905165252518, "learning_rate": 6.092916984006093e-06, "loss": 18.292, "step": 1001 }, { "epoch": 0.018315754839417258, "grad_norm": 7.928256324043463, "learning_rate": 6.0990099009901e-06, "loss": 17.8227, "step": 1002 }, { "epoch": 0.018334034035863783, "grad_norm": 9.459523185652099, "learning_rate": 6.105102817974105e-06, "loss": 18.4981, "step": 1003 }, { "epoch": 0.01835231323231031, "grad_norm": 9.253943259491349, "learning_rate": 6.111195734958112e-06, "loss": 18.2896, "step": 1004 }, { "epoch": 0.01837059242875683, "grad_norm": 7.049122820792015, "learning_rate": 6.117288651942118e-06, "loss": 17.567, "step": 1005 }, { "epoch": 0.018388871625203355, "grad_norm": 7.454126276420805, "learning_rate": 6.1233815689261235e-06, "loss": 17.6288, "step": 1006 }, { "epoch": 0.01840715082164988, "grad_norm": 7.554500865570359, "learning_rate": 6.129474485910129e-06, "loss": 17.5567, "step": 1007 }, { "epoch": 0.018425430018096406, "grad_norm": 7.5218192319804595, "learning_rate": 6.135567402894136e-06, "loss": 17.662, "step": 1008 }, { "epoch": 0.018443709214542928, "grad_norm": 8.684509200583053, "learning_rate": 6.1416603198781425e-06, "loss": 18.411, "step": 1009 }, { "epoch": 0.018461988410989453, "grad_norm": 8.410385879818696, "learning_rate": 6.147753236862148e-06, "loss": 17.9104, "step": 1010 }, { "epoch": 0.018480267607435978, "grad_norm": 7.885404566415697, "learning_rate": 6.153846153846155e-06, "loss": 18.0883, "step": 1011 }, { "epoch": 0.0184985468038825, "grad_norm": 8.439882645200115, "learning_rate": 6.159939070830161e-06, "loss": 18.3346, "step": 1012 }, { "epoch": 0.018516826000329025, "grad_norm": 8.40871647134056, "learning_rate": 6.166031987814166e-06, "loss": 18.1789, "step": 1013 }, { "epoch": 0.01853510519677555, "grad_norm": 8.60571070495959, "learning_rate": 6.172124904798172e-06, "loss": 18.5632, "step": 1014 }, { "epoch": 0.018553384393222076, "grad_norm": 8.243216980529974, "learning_rate": 6.178217821782179e-06, "loss": 18.0231, "step": 1015 }, { "epoch": 0.018571663589668597, "grad_norm": 7.86378260881206, "learning_rate": 6.1843107387661845e-06, "loss": 17.7789, "step": 1016 }, { "epoch": 0.018589942786115123, "grad_norm": 8.820936931499855, "learning_rate": 6.190403655750191e-06, "loss": 18.6289, "step": 1017 }, { "epoch": 0.018608221982561648, "grad_norm": 8.782521896358636, "learning_rate": 6.196496572734198e-06, "loss": 18.4418, "step": 1018 }, { "epoch": 0.01862650117900817, "grad_norm": 6.899410432972345, "learning_rate": 6.202589489718203e-06, "loss": 17.4621, "step": 1019 }, { "epoch": 0.018644780375454695, "grad_norm": 8.786703836897136, "learning_rate": 6.208682406702209e-06, "loss": 18.611, "step": 1020 }, { "epoch": 0.01866305957190122, "grad_norm": 7.575104462004002, "learning_rate": 6.214775323686215e-06, "loss": 17.7327, "step": 1021 }, { "epoch": 0.018681338768347742, "grad_norm": 7.787769432834653, "learning_rate": 6.220868240670222e-06, "loss": 18.0189, "step": 1022 }, { "epoch": 0.018699617964794267, "grad_norm": 8.26130050674653, "learning_rate": 6.226961157654227e-06, "loss": 18.0139, "step": 1023 }, { "epoch": 0.018717897161240792, "grad_norm": 7.317306905270767, "learning_rate": 6.233054074638234e-06, "loss": 17.6614, "step": 1024 }, { "epoch": 0.018736176357687317, "grad_norm": 7.253990770261824, "learning_rate": 6.239146991622239e-06, "loss": 17.7952, "step": 1025 }, { "epoch": 0.01875445555413384, "grad_norm": 7.277989169206523, "learning_rate": 6.2452399086062455e-06, "loss": 17.9357, "step": 1026 }, { "epoch": 0.018772734750580364, "grad_norm": 8.994043472015644, "learning_rate": 6.251332825590252e-06, "loss": 18.5219, "step": 1027 }, { "epoch": 0.01879101394702689, "grad_norm": 10.04532201853213, "learning_rate": 6.257425742574258e-06, "loss": 18.5184, "step": 1028 }, { "epoch": 0.01880929314347341, "grad_norm": 8.069337617902569, "learning_rate": 6.2635186595582645e-06, "loss": 18.1767, "step": 1029 }, { "epoch": 0.018827572339919937, "grad_norm": 8.78105839296198, "learning_rate": 6.26961157654227e-06, "loss": 18.4066, "step": 1030 }, { "epoch": 0.018845851536366462, "grad_norm": 7.886476272784995, "learning_rate": 6.275704493526277e-06, "loss": 17.7988, "step": 1031 }, { "epoch": 0.018864130732812987, "grad_norm": 7.714648845995187, "learning_rate": 6.281797410510282e-06, "loss": 17.9388, "step": 1032 }, { "epoch": 0.01888240992925951, "grad_norm": 9.323420103128699, "learning_rate": 6.287890327494288e-06, "loss": 18.2556, "step": 1033 }, { "epoch": 0.018900689125706034, "grad_norm": 8.036862821596728, "learning_rate": 6.293983244478294e-06, "loss": 17.8791, "step": 1034 }, { "epoch": 0.01891896832215256, "grad_norm": 7.757507873745402, "learning_rate": 6.300076161462301e-06, "loss": 17.9595, "step": 1035 }, { "epoch": 0.01893724751859908, "grad_norm": 7.829337929170519, "learning_rate": 6.306169078446307e-06, "loss": 17.8935, "step": 1036 }, { "epoch": 0.018955526715045606, "grad_norm": 7.992430925493884, "learning_rate": 6.312261995430313e-06, "loss": 17.9734, "step": 1037 }, { "epoch": 0.01897380591149213, "grad_norm": 7.085494698683286, "learning_rate": 6.318354912414319e-06, "loss": 17.7806, "step": 1038 }, { "epoch": 0.018992085107938653, "grad_norm": 7.997564969386241, "learning_rate": 6.324447829398325e-06, "loss": 18.1079, "step": 1039 }, { "epoch": 0.01901036430438518, "grad_norm": 7.5883916646067755, "learning_rate": 6.330540746382331e-06, "loss": 17.9337, "step": 1040 }, { "epoch": 0.019028643500831704, "grad_norm": 7.976100637615995, "learning_rate": 6.336633663366337e-06, "loss": 17.9246, "step": 1041 }, { "epoch": 0.01904692269727823, "grad_norm": 8.071392426167597, "learning_rate": 6.3427265803503435e-06, "loss": 18.0945, "step": 1042 }, { "epoch": 0.01906520189372475, "grad_norm": 9.530560059122761, "learning_rate": 6.34881949733435e-06, "loss": 18.1668, "step": 1043 }, { "epoch": 0.019083481090171276, "grad_norm": 8.56340449411634, "learning_rate": 6.354912414318355e-06, "loss": 18.4313, "step": 1044 }, { "epoch": 0.0191017602866178, "grad_norm": 8.121857464759803, "learning_rate": 6.361005331302361e-06, "loss": 17.8877, "step": 1045 }, { "epoch": 0.019120039483064323, "grad_norm": 8.417793702742772, "learning_rate": 6.3670982482863674e-06, "loss": 18.3695, "step": 1046 }, { "epoch": 0.01913831867951085, "grad_norm": 7.6983153438617835, "learning_rate": 6.373191165270374e-06, "loss": 18.035, "step": 1047 }, { "epoch": 0.019156597875957374, "grad_norm": 8.893385558769452, "learning_rate": 6.37928408225438e-06, "loss": 18.3983, "step": 1048 }, { "epoch": 0.0191748770724039, "grad_norm": 10.604928656082148, "learning_rate": 6.385376999238386e-06, "loss": 19.0163, "step": 1049 }, { "epoch": 0.01919315626885042, "grad_norm": 8.015052112864332, "learning_rate": 6.391469916222392e-06, "loss": 18.0764, "step": 1050 }, { "epoch": 0.019211435465296946, "grad_norm": 7.794808472928623, "learning_rate": 6.397562833206398e-06, "loss": 18.0088, "step": 1051 }, { "epoch": 0.01922971466174347, "grad_norm": 9.257359827663915, "learning_rate": 6.403655750190404e-06, "loss": 18.2095, "step": 1052 }, { "epoch": 0.019247993858189993, "grad_norm": 9.54466142041045, "learning_rate": 6.40974866717441e-06, "loss": 18.9175, "step": 1053 }, { "epoch": 0.019266273054636518, "grad_norm": 8.425516813446993, "learning_rate": 6.415841584158417e-06, "loss": 18.5695, "step": 1054 }, { "epoch": 0.019284552251083043, "grad_norm": 8.821858127632892, "learning_rate": 6.421934501142423e-06, "loss": 18.0299, "step": 1055 }, { "epoch": 0.019302831447529565, "grad_norm": 8.460954236609926, "learning_rate": 6.428027418126429e-06, "loss": 18.195, "step": 1056 }, { "epoch": 0.01932111064397609, "grad_norm": 7.721241701220841, "learning_rate": 6.434120335110434e-06, "loss": 17.8886, "step": 1057 }, { "epoch": 0.019339389840422615, "grad_norm": 8.513571952984433, "learning_rate": 6.440213252094441e-06, "loss": 18.3746, "step": 1058 }, { "epoch": 0.01935766903686914, "grad_norm": 7.964369243429046, "learning_rate": 6.4463061690784465e-06, "loss": 18.0392, "step": 1059 }, { "epoch": 0.019375948233315662, "grad_norm": 8.517667077055881, "learning_rate": 6.452399086062453e-06, "loss": 18.6715, "step": 1060 }, { "epoch": 0.019394227429762188, "grad_norm": 8.02065554970321, "learning_rate": 6.458492003046459e-06, "loss": 17.7864, "step": 1061 }, { "epoch": 0.019412506626208713, "grad_norm": 8.48795408189634, "learning_rate": 6.4645849200304655e-06, "loss": 18.2628, "step": 1062 }, { "epoch": 0.019430785822655235, "grad_norm": 8.544082265276332, "learning_rate": 6.47067783701447e-06, "loss": 18.0344, "step": 1063 }, { "epoch": 0.01944906501910176, "grad_norm": 9.285600661351275, "learning_rate": 6.476770753998477e-06, "loss": 18.3072, "step": 1064 }, { "epoch": 0.019467344215548285, "grad_norm": 10.555387468574859, "learning_rate": 6.482863670982484e-06, "loss": 18.9304, "step": 1065 }, { "epoch": 0.01948562341199481, "grad_norm": 7.76696304450934, "learning_rate": 6.488956587966489e-06, "loss": 18.0832, "step": 1066 }, { "epoch": 0.019503902608441332, "grad_norm": 7.212742111061074, "learning_rate": 6.495049504950496e-06, "loss": 17.7941, "step": 1067 }, { "epoch": 0.019522181804887857, "grad_norm": 7.273730185764739, "learning_rate": 6.501142421934502e-06, "loss": 17.5107, "step": 1068 }, { "epoch": 0.019540461001334383, "grad_norm": 8.713701016851418, "learning_rate": 6.5072353389185075e-06, "loss": 18.1767, "step": 1069 }, { "epoch": 0.019558740197780904, "grad_norm": 7.520575463727811, "learning_rate": 6.513328255902513e-06, "loss": 17.9406, "step": 1070 }, { "epoch": 0.01957701939422743, "grad_norm": 7.842937496796728, "learning_rate": 6.51942117288652e-06, "loss": 18.0233, "step": 1071 }, { "epoch": 0.019595298590673955, "grad_norm": 8.061706991842145, "learning_rate": 6.525514089870526e-06, "loss": 17.7602, "step": 1072 }, { "epoch": 0.019613577787120477, "grad_norm": 7.6223938861675835, "learning_rate": 6.531607006854532e-06, "loss": 18.1191, "step": 1073 }, { "epoch": 0.019631856983567002, "grad_norm": 8.279127166238007, "learning_rate": 6.537699923838539e-06, "loss": 18.3115, "step": 1074 }, { "epoch": 0.019650136180013527, "grad_norm": 7.188251509364014, "learning_rate": 6.543792840822545e-06, "loss": 17.3865, "step": 1075 }, { "epoch": 0.019668415376460052, "grad_norm": 7.216757458949848, "learning_rate": 6.54988575780655e-06, "loss": 17.6607, "step": 1076 }, { "epoch": 0.019686694572906574, "grad_norm": 8.670515815497264, "learning_rate": 6.555978674790556e-06, "loss": 18.1868, "step": 1077 }, { "epoch": 0.0197049737693531, "grad_norm": 8.110697301247246, "learning_rate": 6.562071591774563e-06, "loss": 18.0288, "step": 1078 }, { "epoch": 0.019723252965799624, "grad_norm": 8.809505809170233, "learning_rate": 6.5681645087585685e-06, "loss": 18.0671, "step": 1079 }, { "epoch": 0.019741532162246146, "grad_norm": 9.605865738512373, "learning_rate": 6.574257425742575e-06, "loss": 18.4982, "step": 1080 }, { "epoch": 0.01975981135869267, "grad_norm": 7.916135171417083, "learning_rate": 6.580350342726582e-06, "loss": 18.1144, "step": 1081 }, { "epoch": 0.019778090555139197, "grad_norm": 7.217096870695601, "learning_rate": 6.586443259710587e-06, "loss": 18.0143, "step": 1082 }, { "epoch": 0.019796369751585722, "grad_norm": 7.679991772771017, "learning_rate": 6.592536176694593e-06, "loss": 18.0063, "step": 1083 }, { "epoch": 0.019814648948032244, "grad_norm": 8.20226140412346, "learning_rate": 6.598629093678599e-06, "loss": 18.4665, "step": 1084 }, { "epoch": 0.01983292814447877, "grad_norm": 9.078351119787966, "learning_rate": 6.6047220106626056e-06, "loss": 18.7463, "step": 1085 }, { "epoch": 0.019851207340925294, "grad_norm": 7.516852386672339, "learning_rate": 6.610814927646611e-06, "loss": 17.775, "step": 1086 }, { "epoch": 0.019869486537371816, "grad_norm": 8.181787029821258, "learning_rate": 6.616907844630618e-06, "loss": 18.472, "step": 1087 }, { "epoch": 0.01988776573381834, "grad_norm": 6.560494302351963, "learning_rate": 6.623000761614623e-06, "loss": 17.5194, "step": 1088 }, { "epoch": 0.019906044930264866, "grad_norm": 7.901692922390689, "learning_rate": 6.6290936785986294e-06, "loss": 18.0642, "step": 1089 }, { "epoch": 0.019924324126711388, "grad_norm": 8.061066613367577, "learning_rate": 6.635186595582635e-06, "loss": 18.0876, "step": 1090 }, { "epoch": 0.019942603323157913, "grad_norm": 9.454097103350708, "learning_rate": 6.641279512566642e-06, "loss": 18.8881, "step": 1091 }, { "epoch": 0.01996088251960444, "grad_norm": 8.756284787713158, "learning_rate": 6.647372429550648e-06, "loss": 18.2767, "step": 1092 }, { "epoch": 0.019979161716050964, "grad_norm": 7.533720389804396, "learning_rate": 6.653465346534654e-06, "loss": 18.2497, "step": 1093 }, { "epoch": 0.019997440912497486, "grad_norm": 9.298600283610812, "learning_rate": 6.659558263518661e-06, "loss": 18.5542, "step": 1094 }, { "epoch": 0.02001572010894401, "grad_norm": 7.740774888213946, "learning_rate": 6.665651180502666e-06, "loss": 18.2266, "step": 1095 }, { "epoch": 0.020033999305390536, "grad_norm": 8.909679220515974, "learning_rate": 6.671744097486672e-06, "loss": 18.6344, "step": 1096 }, { "epoch": 0.020052278501837058, "grad_norm": 7.63494248931986, "learning_rate": 6.677837014470678e-06, "loss": 17.5885, "step": 1097 }, { "epoch": 0.020070557698283583, "grad_norm": 8.920698496042283, "learning_rate": 6.683929931454685e-06, "loss": 18.087, "step": 1098 }, { "epoch": 0.020088836894730108, "grad_norm": 7.6970425992182765, "learning_rate": 6.690022848438691e-06, "loss": 17.8351, "step": 1099 }, { "epoch": 0.020107116091176633, "grad_norm": 9.550901562462453, "learning_rate": 6.696115765422697e-06, "loss": 18.9141, "step": 1100 }, { "epoch": 0.020125395287623155, "grad_norm": 7.903962156781939, "learning_rate": 6.702208682406702e-06, "loss": 17.9881, "step": 1101 }, { "epoch": 0.02014367448406968, "grad_norm": 9.009575604456296, "learning_rate": 6.7083015993907085e-06, "loss": 18.5816, "step": 1102 }, { "epoch": 0.020161953680516206, "grad_norm": 8.063362266384035, "learning_rate": 6.714394516374715e-06, "loss": 17.9211, "step": 1103 }, { "epoch": 0.020180232876962727, "grad_norm": 7.689532167015797, "learning_rate": 6.720487433358721e-06, "loss": 17.8806, "step": 1104 }, { "epoch": 0.020198512073409253, "grad_norm": 8.370291445199264, "learning_rate": 6.7265803503427275e-06, "loss": 18.36, "step": 1105 }, { "epoch": 0.020216791269855778, "grad_norm": 7.660706061027755, "learning_rate": 6.732673267326733e-06, "loss": 18.0976, "step": 1106 }, { "epoch": 0.0202350704663023, "grad_norm": 7.696634048353973, "learning_rate": 6.738766184310739e-06, "loss": 17.7352, "step": 1107 }, { "epoch": 0.020253349662748825, "grad_norm": 14.379213048417702, "learning_rate": 6.744859101294745e-06, "loss": 17.7067, "step": 1108 }, { "epoch": 0.02027162885919535, "grad_norm": 8.662425910853875, "learning_rate": 6.750952018278751e-06, "loss": 18.3063, "step": 1109 }, { "epoch": 0.020289908055641875, "grad_norm": 7.891443885305967, "learning_rate": 6.757044935262758e-06, "loss": 17.7426, "step": 1110 }, { "epoch": 0.020308187252088397, "grad_norm": 7.881527662289122, "learning_rate": 6.763137852246764e-06, "loss": 18.2592, "step": 1111 }, { "epoch": 0.020326466448534922, "grad_norm": 6.824244603799724, "learning_rate": 6.76923076923077e-06, "loss": 17.5405, "step": 1112 }, { "epoch": 0.020344745644981448, "grad_norm": 6.955748008576041, "learning_rate": 6.775323686214776e-06, "loss": 17.4328, "step": 1113 }, { "epoch": 0.02036302484142797, "grad_norm": 8.531623209320305, "learning_rate": 6.781416603198782e-06, "loss": 18.531, "step": 1114 }, { "epoch": 0.020381304037874495, "grad_norm": 7.793085602064876, "learning_rate": 6.787509520182788e-06, "loss": 17.8731, "step": 1115 }, { "epoch": 0.02039958323432102, "grad_norm": 8.83908926012251, "learning_rate": 6.793602437166794e-06, "loss": 17.9839, "step": 1116 }, { "epoch": 0.020417862430767545, "grad_norm": 9.225053276907715, "learning_rate": 6.7996953541508e-06, "loss": 18.1455, "step": 1117 }, { "epoch": 0.020436141627214067, "grad_norm": 8.482282192574337, "learning_rate": 6.805788271134807e-06, "loss": 18.2174, "step": 1118 }, { "epoch": 0.020454420823660592, "grad_norm": 8.932118579503443, "learning_rate": 6.811881188118813e-06, "loss": 18.3675, "step": 1119 }, { "epoch": 0.020472700020107117, "grad_norm": 7.505436002781391, "learning_rate": 6.817974105102818e-06, "loss": 18.0324, "step": 1120 }, { "epoch": 0.02049097921655364, "grad_norm": 6.9349703359748585, "learning_rate": 6.824067022086825e-06, "loss": 17.7132, "step": 1121 }, { "epoch": 0.020509258413000164, "grad_norm": 8.981324443612397, "learning_rate": 6.8301599390708305e-06, "loss": 18.2598, "step": 1122 }, { "epoch": 0.02052753760944669, "grad_norm": 7.754558487822148, "learning_rate": 6.836252856054837e-06, "loss": 18.0601, "step": 1123 }, { "epoch": 0.02054581680589321, "grad_norm": 8.370429440767074, "learning_rate": 6.842345773038843e-06, "loss": 17.9774, "step": 1124 }, { "epoch": 0.020564096002339737, "grad_norm": 8.02602419251651, "learning_rate": 6.8484386900228495e-06, "loss": 17.9154, "step": 1125 }, { "epoch": 0.02058237519878626, "grad_norm": 7.650949652381696, "learning_rate": 6.854531607006854e-06, "loss": 17.8702, "step": 1126 }, { "epoch": 0.020600654395232787, "grad_norm": 8.204914255177334, "learning_rate": 6.860624523990861e-06, "loss": 18.0647, "step": 1127 }, { "epoch": 0.02061893359167931, "grad_norm": 6.855912112886448, "learning_rate": 6.866717440974867e-06, "loss": 17.48, "step": 1128 }, { "epoch": 0.020637212788125834, "grad_norm": 8.105338657042287, "learning_rate": 6.872810357958873e-06, "loss": 18.0055, "step": 1129 }, { "epoch": 0.02065549198457236, "grad_norm": 16.77405238000618, "learning_rate": 6.87890327494288e-06, "loss": 18.3776, "step": 1130 }, { "epoch": 0.02067377118101888, "grad_norm": 6.822106523614206, "learning_rate": 6.884996191926886e-06, "loss": 17.4504, "step": 1131 }, { "epoch": 0.020692050377465406, "grad_norm": 7.702096099615508, "learning_rate": 6.8910891089108915e-06, "loss": 17.686, "step": 1132 }, { "epoch": 0.02071032957391193, "grad_norm": 8.577549907942483, "learning_rate": 6.897182025894897e-06, "loss": 18.5179, "step": 1133 }, { "epoch": 0.020728608770358457, "grad_norm": 77.81058128614599, "learning_rate": 6.903274942878904e-06, "loss": 18.0846, "step": 1134 }, { "epoch": 0.02074688796680498, "grad_norm": 7.777221959468819, "learning_rate": 6.90936785986291e-06, "loss": 17.982, "step": 1135 }, { "epoch": 0.020765167163251504, "grad_norm": 8.103358397861477, "learning_rate": 6.915460776846916e-06, "loss": 18.0749, "step": 1136 }, { "epoch": 0.02078344635969803, "grad_norm": 7.954324518176213, "learning_rate": 6.921553693830923e-06, "loss": 17.9536, "step": 1137 }, { "epoch": 0.02080172555614455, "grad_norm": 7.646304408043269, "learning_rate": 6.9276466108149285e-06, "loss": 17.7323, "step": 1138 }, { "epoch": 0.020820004752591076, "grad_norm": 8.764899829664277, "learning_rate": 6.933739527798934e-06, "loss": 17.9325, "step": 1139 }, { "epoch": 0.0208382839490376, "grad_norm": 8.90169081801138, "learning_rate": 6.93983244478294e-06, "loss": 18.2958, "step": 1140 }, { "epoch": 0.020856563145484123, "grad_norm": 8.411302997086182, "learning_rate": 6.945925361766947e-06, "loss": 18.0948, "step": 1141 }, { "epoch": 0.020874842341930648, "grad_norm": 8.049056501337239, "learning_rate": 6.9520182787509524e-06, "loss": 18.2222, "step": 1142 }, { "epoch": 0.020893121538377173, "grad_norm": 7.323621447312056, "learning_rate": 6.958111195734959e-06, "loss": 17.8308, "step": 1143 }, { "epoch": 0.0209114007348237, "grad_norm": 8.592023496690768, "learning_rate": 6.964204112718966e-06, "loss": 18.1117, "step": 1144 }, { "epoch": 0.02092967993127022, "grad_norm": 7.995621972747377, "learning_rate": 6.9702970297029706e-06, "loss": 17.7442, "step": 1145 }, { "epoch": 0.020947959127716746, "grad_norm": 6.8862191468237945, "learning_rate": 6.976389946686976e-06, "loss": 17.4325, "step": 1146 }, { "epoch": 0.02096623832416327, "grad_norm": 8.536205187859835, "learning_rate": 6.982482863670983e-06, "loss": 18.3987, "step": 1147 }, { "epoch": 0.020984517520609793, "grad_norm": 7.969283065900971, "learning_rate": 6.9885757806549895e-06, "loss": 18.0284, "step": 1148 }, { "epoch": 0.021002796717056318, "grad_norm": 8.109090041444265, "learning_rate": 6.994668697638995e-06, "loss": 17.8368, "step": 1149 }, { "epoch": 0.021021075913502843, "grad_norm": 8.094394126698537, "learning_rate": 7.000761614623002e-06, "loss": 17.8873, "step": 1150 }, { "epoch": 0.021039355109949368, "grad_norm": 7.159650139988644, "learning_rate": 7.006854531607007e-06, "loss": 17.7425, "step": 1151 }, { "epoch": 0.02105763430639589, "grad_norm": 8.30888924591329, "learning_rate": 7.012947448591013e-06, "loss": 17.8527, "step": 1152 }, { "epoch": 0.021075913502842415, "grad_norm": 8.820744459819323, "learning_rate": 7.019040365575019e-06, "loss": 18.3725, "step": 1153 }, { "epoch": 0.02109419269928894, "grad_norm": 8.35480812143568, "learning_rate": 7.025133282559026e-06, "loss": 18.1813, "step": 1154 }, { "epoch": 0.021112471895735462, "grad_norm": 8.036912715992331, "learning_rate": 7.031226199543032e-06, "loss": 17.8396, "step": 1155 }, { "epoch": 0.021130751092181987, "grad_norm": 7.277613895898985, "learning_rate": 7.037319116527038e-06, "loss": 17.5703, "step": 1156 }, { "epoch": 0.021149030288628513, "grad_norm": 8.160707335936069, "learning_rate": 7.043412033511045e-06, "loss": 17.9347, "step": 1157 }, { "epoch": 0.021167309485075034, "grad_norm": 8.324350963433098, "learning_rate": 7.04950495049505e-06, "loss": 17.9741, "step": 1158 }, { "epoch": 0.02118558868152156, "grad_norm": 8.313499862915545, "learning_rate": 7.055597867479056e-06, "loss": 17.9326, "step": 1159 }, { "epoch": 0.021203867877968085, "grad_norm": 8.373934244972215, "learning_rate": 7.061690784463062e-06, "loss": 18.0119, "step": 1160 }, { "epoch": 0.02122214707441461, "grad_norm": 8.734489214418272, "learning_rate": 7.067783701447069e-06, "loss": 18.5876, "step": 1161 }, { "epoch": 0.021240426270861132, "grad_norm": 7.940739926024548, "learning_rate": 7.073876618431074e-06, "loss": 17.9932, "step": 1162 }, { "epoch": 0.021258705467307657, "grad_norm": 7.643890938802826, "learning_rate": 7.079969535415081e-06, "loss": 17.6314, "step": 1163 }, { "epoch": 0.021276984663754182, "grad_norm": 7.541401411005756, "learning_rate": 7.086062452399086e-06, "loss": 17.9616, "step": 1164 }, { "epoch": 0.021295263860200704, "grad_norm": 9.213679865116323, "learning_rate": 7.0921553693830925e-06, "loss": 18.9202, "step": 1165 }, { "epoch": 0.02131354305664723, "grad_norm": 8.511693300693606, "learning_rate": 7.098248286367099e-06, "loss": 18.196, "step": 1166 }, { "epoch": 0.021331822253093755, "grad_norm": 7.7357627986481985, "learning_rate": 7.104341203351105e-06, "loss": 18.066, "step": 1167 }, { "epoch": 0.02135010144954028, "grad_norm": 8.057760559908589, "learning_rate": 7.1104341203351115e-06, "loss": 18.0976, "step": 1168 }, { "epoch": 0.0213683806459868, "grad_norm": 8.785475163168408, "learning_rate": 7.116527037319117e-06, "loss": 18.1866, "step": 1169 }, { "epoch": 0.021386659842433327, "grad_norm": 7.4913324178511225, "learning_rate": 7.122619954303123e-06, "loss": 17.7044, "step": 1170 }, { "epoch": 0.021404939038879852, "grad_norm": 7.681956106312898, "learning_rate": 7.128712871287129e-06, "loss": 17.8684, "step": 1171 }, { "epoch": 0.021423218235326374, "grad_norm": 7.4616185648565905, "learning_rate": 7.134805788271135e-06, "loss": 17.7606, "step": 1172 }, { "epoch": 0.0214414974317729, "grad_norm": 7.119361393828716, "learning_rate": 7.140898705255141e-06, "loss": 17.7639, "step": 1173 }, { "epoch": 0.021459776628219424, "grad_norm": 9.123788086352594, "learning_rate": 7.146991622239148e-06, "loss": 18.2655, "step": 1174 }, { "epoch": 0.021478055824665946, "grad_norm": 9.977952496483601, "learning_rate": 7.153084539223154e-06, "loss": 18.1025, "step": 1175 }, { "epoch": 0.02149633502111247, "grad_norm": 8.010184469210712, "learning_rate": 7.15917745620716e-06, "loss": 18.0235, "step": 1176 }, { "epoch": 0.021514614217558996, "grad_norm": 7.340378645183165, "learning_rate": 7.165270373191166e-06, "loss": 17.7148, "step": 1177 }, { "epoch": 0.02153289341400552, "grad_norm": 8.10655777724448, "learning_rate": 7.171363290175172e-06, "loss": 18.1131, "step": 1178 }, { "epoch": 0.021551172610452043, "grad_norm": 6.828081387443637, "learning_rate": 7.177456207159178e-06, "loss": 17.4375, "step": 1179 }, { "epoch": 0.02156945180689857, "grad_norm": 8.552928156945542, "learning_rate": 7.183549124143184e-06, "loss": 18.1215, "step": 1180 }, { "epoch": 0.021587731003345094, "grad_norm": 7.566082042425317, "learning_rate": 7.1896420411271906e-06, "loss": 17.7592, "step": 1181 }, { "epoch": 0.021606010199791616, "grad_norm": 8.748982056419061, "learning_rate": 7.195734958111197e-06, "loss": 18.2236, "step": 1182 }, { "epoch": 0.02162428939623814, "grad_norm": 7.85067598085476, "learning_rate": 7.201827875095202e-06, "loss": 17.6095, "step": 1183 }, { "epoch": 0.021642568592684666, "grad_norm": 7.498161013542759, "learning_rate": 7.207920792079208e-06, "loss": 17.9093, "step": 1184 }, { "epoch": 0.02166084778913119, "grad_norm": 7.768815471400683, "learning_rate": 7.2140137090632144e-06, "loss": 17.9479, "step": 1185 }, { "epoch": 0.021679126985577713, "grad_norm": 8.335391937231623, "learning_rate": 7.220106626047221e-06, "loss": 18.1221, "step": 1186 }, { "epoch": 0.02169740618202424, "grad_norm": 7.984945971046457, "learning_rate": 7.226199543031227e-06, "loss": 18.1438, "step": 1187 }, { "epoch": 0.021715685378470764, "grad_norm": 9.06770301142076, "learning_rate": 7.232292460015233e-06, "loss": 18.6921, "step": 1188 }, { "epoch": 0.021733964574917285, "grad_norm": 8.037666318852676, "learning_rate": 7.238385376999238e-06, "loss": 18.3687, "step": 1189 }, { "epoch": 0.02175224377136381, "grad_norm": 7.792236263999609, "learning_rate": 7.244478293983245e-06, "loss": 18.0255, "step": 1190 }, { "epoch": 0.021770522967810336, "grad_norm": 8.645863994009908, "learning_rate": 7.250571210967251e-06, "loss": 18.3111, "step": 1191 }, { "epoch": 0.021788802164256858, "grad_norm": 8.891587503165612, "learning_rate": 7.256664127951257e-06, "loss": 18.6329, "step": 1192 }, { "epoch": 0.021807081360703383, "grad_norm": 7.7969448972465125, "learning_rate": 7.262757044935264e-06, "loss": 17.9866, "step": 1193 }, { "epoch": 0.021825360557149908, "grad_norm": 9.036182301496826, "learning_rate": 7.26884996191927e-06, "loss": 18.2366, "step": 1194 }, { "epoch": 0.021843639753596433, "grad_norm": 7.9134513330621825, "learning_rate": 7.274942878903275e-06, "loss": 18.3917, "step": 1195 }, { "epoch": 0.021861918950042955, "grad_norm": 10.12767509358225, "learning_rate": 7.281035795887281e-06, "loss": 18.3437, "step": 1196 }, { "epoch": 0.02188019814648948, "grad_norm": 9.349416412034195, "learning_rate": 7.287128712871288e-06, "loss": 18.2855, "step": 1197 }, { "epoch": 0.021898477342936006, "grad_norm": 8.686163963468262, "learning_rate": 7.2932216298552935e-06, "loss": 18.1963, "step": 1198 }, { "epoch": 0.021916756539382527, "grad_norm": 8.215999122820618, "learning_rate": 7.2993145468393e-06, "loss": 18.0912, "step": 1199 }, { "epoch": 0.021935035735829053, "grad_norm": 8.909752446449565, "learning_rate": 7.305407463823307e-06, "loss": 18.4349, "step": 1200 }, { "epoch": 0.021953314932275578, "grad_norm": 7.864052924827168, "learning_rate": 7.3115003808073125e-06, "loss": 18.1708, "step": 1201 }, { "epoch": 0.021971594128722103, "grad_norm": 7.975719874675216, "learning_rate": 7.317593297791317e-06, "loss": 17.9927, "step": 1202 }, { "epoch": 0.021989873325168625, "grad_norm": 8.06241978310815, "learning_rate": 7.323686214775324e-06, "loss": 18.1951, "step": 1203 }, { "epoch": 0.02200815252161515, "grad_norm": 8.61763658655989, "learning_rate": 7.329779131759331e-06, "loss": 18.4821, "step": 1204 }, { "epoch": 0.022026431718061675, "grad_norm": 8.610045656208115, "learning_rate": 7.335872048743336e-06, "loss": 18.4585, "step": 1205 }, { "epoch": 0.022044710914508197, "grad_norm": 7.1058283019046815, "learning_rate": 7.341964965727343e-06, "loss": 17.9557, "step": 1206 }, { "epoch": 0.022062990110954722, "grad_norm": 8.852871625404843, "learning_rate": 7.348057882711349e-06, "loss": 18.8266, "step": 1207 }, { "epoch": 0.022081269307401247, "grad_norm": 8.641928794327619, "learning_rate": 7.3541507996953545e-06, "loss": 18.5967, "step": 1208 }, { "epoch": 0.02209954850384777, "grad_norm": 8.118616842382611, "learning_rate": 7.36024371667936e-06, "loss": 17.9766, "step": 1209 }, { "epoch": 0.022117827700294294, "grad_norm": 7.8062309682541615, "learning_rate": 7.366336633663367e-06, "loss": 18.2301, "step": 1210 }, { "epoch": 0.02213610689674082, "grad_norm": 8.256561305103666, "learning_rate": 7.3724295506473735e-06, "loss": 18.2072, "step": 1211 }, { "epoch": 0.022154386093187345, "grad_norm": 7.541355836416142, "learning_rate": 7.378522467631379e-06, "loss": 17.8086, "step": 1212 }, { "epoch": 0.022172665289633867, "grad_norm": 8.373746914388754, "learning_rate": 7.384615384615386e-06, "loss": 18.2852, "step": 1213 }, { "epoch": 0.022190944486080392, "grad_norm": 6.529008972161452, "learning_rate": 7.390708301599391e-06, "loss": 17.2029, "step": 1214 }, { "epoch": 0.022209223682526917, "grad_norm": 8.548302012635622, "learning_rate": 7.396801218583397e-06, "loss": 18.3498, "step": 1215 }, { "epoch": 0.02222750287897344, "grad_norm": 9.300415300136658, "learning_rate": 7.402894135567403e-06, "loss": 18.4759, "step": 1216 }, { "epoch": 0.022245782075419964, "grad_norm": 7.982304795140464, "learning_rate": 7.40898705255141e-06, "loss": 18.0739, "step": 1217 }, { "epoch": 0.02226406127186649, "grad_norm": 6.895074547057548, "learning_rate": 7.4150799695354155e-06, "loss": 17.5846, "step": 1218 }, { "epoch": 0.022282340468313015, "grad_norm": 9.15493873323251, "learning_rate": 7.421172886519422e-06, "loss": 18.7379, "step": 1219 }, { "epoch": 0.022300619664759536, "grad_norm": 7.37272617147564, "learning_rate": 7.427265803503429e-06, "loss": 17.6261, "step": 1220 }, { "epoch": 0.02231889886120606, "grad_norm": 8.105896696615837, "learning_rate": 7.433358720487434e-06, "loss": 18.3197, "step": 1221 }, { "epoch": 0.022337178057652587, "grad_norm": 6.971224136969124, "learning_rate": 7.43945163747144e-06, "loss": 17.5691, "step": 1222 }, { "epoch": 0.02235545725409911, "grad_norm": 7.7229301300845155, "learning_rate": 7.445544554455446e-06, "loss": 17.9763, "step": 1223 }, { "epoch": 0.022373736450545634, "grad_norm": 8.79029937572499, "learning_rate": 7.4516374714394526e-06, "loss": 18.6975, "step": 1224 }, { "epoch": 0.02239201564699216, "grad_norm": 7.653051662866822, "learning_rate": 7.457730388423458e-06, "loss": 17.8637, "step": 1225 }, { "epoch": 0.02241029484343868, "grad_norm": 10.015118294113028, "learning_rate": 7.463823305407465e-06, "loss": 18.3986, "step": 1226 }, { "epoch": 0.022428574039885206, "grad_norm": 9.135396156077341, "learning_rate": 7.46991622239147e-06, "loss": 18.1684, "step": 1227 }, { "epoch": 0.02244685323633173, "grad_norm": 8.840419125171318, "learning_rate": 7.4760091393754765e-06, "loss": 18.5559, "step": 1228 }, { "epoch": 0.022465132432778256, "grad_norm": 8.685221814751454, "learning_rate": 7.482102056359482e-06, "loss": 17.9879, "step": 1229 }, { "epoch": 0.022483411629224778, "grad_norm": 7.582756705088697, "learning_rate": 7.488194973343489e-06, "loss": 17.8319, "step": 1230 }, { "epoch": 0.022501690825671303, "grad_norm": 9.55332474929589, "learning_rate": 7.4942878903274954e-06, "loss": 18.5795, "step": 1231 }, { "epoch": 0.02251997002211783, "grad_norm": 9.12537600125327, "learning_rate": 7.500380807311501e-06, "loss": 18.5816, "step": 1232 }, { "epoch": 0.02253824921856435, "grad_norm": 8.200056774096385, "learning_rate": 7.506473724295507e-06, "loss": 17.9549, "step": 1233 }, { "epoch": 0.022556528415010876, "grad_norm": 8.269221292204605, "learning_rate": 7.512566641279513e-06, "loss": 17.793, "step": 1234 }, { "epoch": 0.0225748076114574, "grad_norm": 6.9529769475855785, "learning_rate": 7.518659558263519e-06, "loss": 17.6142, "step": 1235 }, { "epoch": 0.022593086807903926, "grad_norm": 9.460123773145922, "learning_rate": 7.524752475247525e-06, "loss": 18.5836, "step": 1236 }, { "epoch": 0.022611366004350448, "grad_norm": 6.935214348736729, "learning_rate": 7.530845392231532e-06, "loss": 17.5914, "step": 1237 }, { "epoch": 0.022629645200796973, "grad_norm": 8.38004457254756, "learning_rate": 7.536938309215538e-06, "loss": 18.0831, "step": 1238 }, { "epoch": 0.0226479243972435, "grad_norm": 7.3943109247066, "learning_rate": 7.543031226199544e-06, "loss": 17.8206, "step": 1239 }, { "epoch": 0.02266620359369002, "grad_norm": 7.861977798654075, "learning_rate": 7.549124143183549e-06, "loss": 17.8095, "step": 1240 }, { "epoch": 0.022684482790136545, "grad_norm": 8.081413578470416, "learning_rate": 7.5552170601675556e-06, "loss": 18.0777, "step": 1241 }, { "epoch": 0.02270276198658307, "grad_norm": 8.227747563013045, "learning_rate": 7.561309977151562e-06, "loss": 18.1766, "step": 1242 }, { "epoch": 0.022721041183029592, "grad_norm": 8.080913904692055, "learning_rate": 7.567402894135568e-06, "loss": 17.864, "step": 1243 }, { "epoch": 0.022739320379476118, "grad_norm": 8.179396165410159, "learning_rate": 7.5734958111195745e-06, "loss": 17.9948, "step": 1244 }, { "epoch": 0.022757599575922643, "grad_norm": 9.386646441003995, "learning_rate": 7.579588728103581e-06, "loss": 18.8239, "step": 1245 }, { "epoch": 0.022775878772369168, "grad_norm": 8.975542711309226, "learning_rate": 7.585681645087586e-06, "loss": 18.5923, "step": 1246 }, { "epoch": 0.02279415796881569, "grad_norm": 8.844141143227652, "learning_rate": 7.591774562071592e-06, "loss": 18.1258, "step": 1247 }, { "epoch": 0.022812437165262215, "grad_norm": 7.184291908214676, "learning_rate": 7.597867479055598e-06, "loss": 17.7494, "step": 1248 }, { "epoch": 0.02283071636170874, "grad_norm": 7.973705351159692, "learning_rate": 7.603960396039605e-06, "loss": 17.9706, "step": 1249 }, { "epoch": 0.022848995558155262, "grad_norm": 7.011117924638664, "learning_rate": 7.610053313023611e-06, "loss": 17.6627, "step": 1250 }, { "epoch": 0.022867274754601787, "grad_norm": 6.581057991386631, "learning_rate": 7.616146230007617e-06, "loss": 17.6904, "step": 1251 }, { "epoch": 0.022885553951048312, "grad_norm": 8.604049446250782, "learning_rate": 7.622239146991622e-06, "loss": 18.4233, "step": 1252 }, { "epoch": 0.022903833147494838, "grad_norm": 6.804372320887771, "learning_rate": 7.628332063975629e-06, "loss": 17.3679, "step": 1253 }, { "epoch": 0.02292211234394136, "grad_norm": 8.276516712834422, "learning_rate": 7.634424980959635e-06, "loss": 18.0717, "step": 1254 }, { "epoch": 0.022940391540387885, "grad_norm": 8.808370707379996, "learning_rate": 7.640517897943641e-06, "loss": 18.0482, "step": 1255 }, { "epoch": 0.02295867073683441, "grad_norm": 7.882389237429789, "learning_rate": 7.646610814927647e-06, "loss": 17.8727, "step": 1256 }, { "epoch": 0.02297694993328093, "grad_norm": 8.213135962682458, "learning_rate": 7.652703731911654e-06, "loss": 18.0153, "step": 1257 }, { "epoch": 0.022995229129727457, "grad_norm": 8.466431958181476, "learning_rate": 7.658796648895659e-06, "loss": 18.008, "step": 1258 }, { "epoch": 0.023013508326173982, "grad_norm": 7.234200452168259, "learning_rate": 7.664889565879666e-06, "loss": 17.4817, "step": 1259 }, { "epoch": 0.023031787522620504, "grad_norm": 9.59877550737015, "learning_rate": 7.670982482863672e-06, "loss": 18.6498, "step": 1260 }, { "epoch": 0.02305006671906703, "grad_norm": 8.341860614988887, "learning_rate": 7.677075399847677e-06, "loss": 18.2708, "step": 1261 }, { "epoch": 0.023068345915513554, "grad_norm": 9.281859020029916, "learning_rate": 7.683168316831683e-06, "loss": 18.9397, "step": 1262 }, { "epoch": 0.02308662511196008, "grad_norm": 7.710288194975174, "learning_rate": 7.68926123381569e-06, "loss": 17.9752, "step": 1263 }, { "epoch": 0.0231049043084066, "grad_norm": 7.063195913746644, "learning_rate": 7.695354150799696e-06, "loss": 17.487, "step": 1264 }, { "epoch": 0.023123183504853127, "grad_norm": 7.9920256770402345, "learning_rate": 7.701447067783702e-06, "loss": 17.8697, "step": 1265 }, { "epoch": 0.023141462701299652, "grad_norm": 8.164032651953145, "learning_rate": 7.707539984767708e-06, "loss": 18.4767, "step": 1266 }, { "epoch": 0.023159741897746174, "grad_norm": 9.071888596238017, "learning_rate": 7.713632901751714e-06, "loss": 18.0909, "step": 1267 }, { "epoch": 0.0231780210941927, "grad_norm": 12.59211531240348, "learning_rate": 7.719725818735721e-06, "loss": 18.4718, "step": 1268 }, { "epoch": 0.023196300290639224, "grad_norm": 7.991050919837849, "learning_rate": 7.725818735719727e-06, "loss": 18.1672, "step": 1269 }, { "epoch": 0.02321457948708575, "grad_norm": 7.370297221569767, "learning_rate": 7.731911652703733e-06, "loss": 17.7813, "step": 1270 }, { "epoch": 0.02323285868353227, "grad_norm": 7.807076685784157, "learning_rate": 7.738004569687738e-06, "loss": 17.8473, "step": 1271 }, { "epoch": 0.023251137879978796, "grad_norm": 7.309326311800068, "learning_rate": 7.744097486671744e-06, "loss": 17.7814, "step": 1272 }, { "epoch": 0.02326941707642532, "grad_norm": 7.143892484378977, "learning_rate": 7.75019040365575e-06, "loss": 17.8614, "step": 1273 }, { "epoch": 0.023287696272871843, "grad_norm": 8.334417705202664, "learning_rate": 7.756283320639757e-06, "loss": 17.9317, "step": 1274 }, { "epoch": 0.02330597546931837, "grad_norm": 8.737528222459407, "learning_rate": 7.762376237623763e-06, "loss": 18.4585, "step": 1275 }, { "epoch": 0.023324254665764894, "grad_norm": 7.829535864184691, "learning_rate": 7.768469154607769e-06, "loss": 18.0296, "step": 1276 }, { "epoch": 0.023342533862211415, "grad_norm": 7.179278938725156, "learning_rate": 7.774562071591775e-06, "loss": 17.659, "step": 1277 }, { "epoch": 0.02336081305865794, "grad_norm": 8.395295598169197, "learning_rate": 7.78065498857578e-06, "loss": 18.4474, "step": 1278 }, { "epoch": 0.023379092255104466, "grad_norm": 8.107695250173576, "learning_rate": 7.786747905559788e-06, "loss": 18.3523, "step": 1279 }, { "epoch": 0.02339737145155099, "grad_norm": 8.02572642939791, "learning_rate": 7.792840822543794e-06, "loss": 18.0061, "step": 1280 }, { "epoch": 0.023415650647997513, "grad_norm": 8.54594362762417, "learning_rate": 7.7989337395278e-06, "loss": 18.2185, "step": 1281 }, { "epoch": 0.023433929844444038, "grad_norm": 7.036244253198506, "learning_rate": 7.805026656511805e-06, "loss": 17.6652, "step": 1282 }, { "epoch": 0.023452209040890563, "grad_norm": 8.064019494416113, "learning_rate": 7.811119573495813e-06, "loss": 18.0908, "step": 1283 }, { "epoch": 0.023470488237337085, "grad_norm": 8.372108528772818, "learning_rate": 7.817212490479817e-06, "loss": 18.4715, "step": 1284 }, { "epoch": 0.02348876743378361, "grad_norm": 9.683067868413996, "learning_rate": 7.823305407463824e-06, "loss": 19.0494, "step": 1285 }, { "epoch": 0.023507046630230136, "grad_norm": 8.750064171301153, "learning_rate": 7.82939832444783e-06, "loss": 18.3841, "step": 1286 }, { "epoch": 0.02352532582667666, "grad_norm": 8.621107473939952, "learning_rate": 7.835491241431836e-06, "loss": 18.4852, "step": 1287 }, { "epoch": 0.023543605023123183, "grad_norm": 7.6179221400081385, "learning_rate": 7.841584158415843e-06, "loss": 17.8382, "step": 1288 }, { "epoch": 0.023561884219569708, "grad_norm": 7.162638702613578, "learning_rate": 7.847677075399849e-06, "loss": 17.9635, "step": 1289 }, { "epoch": 0.023580163416016233, "grad_norm": 7.740343557998449, "learning_rate": 7.853769992383855e-06, "loss": 18.1045, "step": 1290 }, { "epoch": 0.023598442612462755, "grad_norm": 7.856847072877854, "learning_rate": 7.85986290936786e-06, "loss": 17.9265, "step": 1291 }, { "epoch": 0.02361672180890928, "grad_norm": 7.819128359976211, "learning_rate": 7.865955826351866e-06, "loss": 18.0816, "step": 1292 }, { "epoch": 0.023635001005355805, "grad_norm": 6.68728879211887, "learning_rate": 7.872048743335872e-06, "loss": 17.2439, "step": 1293 }, { "epoch": 0.023653280201802327, "grad_norm": 7.637521824211896, "learning_rate": 7.87814166031988e-06, "loss": 17.8988, "step": 1294 }, { "epoch": 0.023671559398248852, "grad_norm": 9.032365359032385, "learning_rate": 7.884234577303885e-06, "loss": 18.6905, "step": 1295 }, { "epoch": 0.023689838594695378, "grad_norm": 8.251170325901612, "learning_rate": 7.890327494287891e-06, "loss": 18.1274, "step": 1296 }, { "epoch": 0.023708117791141903, "grad_norm": 7.744327318199243, "learning_rate": 7.896420411271897e-06, "loss": 17.9762, "step": 1297 }, { "epoch": 0.023726396987588425, "grad_norm": 7.5883481159425665, "learning_rate": 7.902513328255902e-06, "loss": 17.6878, "step": 1298 }, { "epoch": 0.02374467618403495, "grad_norm": 7.853608917102702, "learning_rate": 7.90860624523991e-06, "loss": 18.0944, "step": 1299 }, { "epoch": 0.023762955380481475, "grad_norm": 7.702152236003674, "learning_rate": 7.914699162223916e-06, "loss": 17.8327, "step": 1300 }, { "epoch": 0.023781234576927997, "grad_norm": 8.376841760535141, "learning_rate": 7.920792079207921e-06, "loss": 18.306, "step": 1301 }, { "epoch": 0.023799513773374522, "grad_norm": 8.309740430141177, "learning_rate": 7.926884996191929e-06, "loss": 17.7201, "step": 1302 }, { "epoch": 0.023817792969821047, "grad_norm": 7.3776967431244005, "learning_rate": 7.932977913175933e-06, "loss": 17.6082, "step": 1303 }, { "epoch": 0.023836072166267572, "grad_norm": 8.479560489220585, "learning_rate": 7.93907083015994e-06, "loss": 18.1974, "step": 1304 }, { "epoch": 0.023854351362714094, "grad_norm": 7.627994332940635, "learning_rate": 7.945163747143946e-06, "loss": 17.9485, "step": 1305 }, { "epoch": 0.02387263055916062, "grad_norm": 7.129336933906195, "learning_rate": 7.951256664127952e-06, "loss": 17.7024, "step": 1306 }, { "epoch": 0.023890909755607145, "grad_norm": 10.21498902994689, "learning_rate": 7.957349581111958e-06, "loss": 18.6435, "step": 1307 }, { "epoch": 0.023909188952053666, "grad_norm": 8.606824128185288, "learning_rate": 7.963442498095965e-06, "loss": 18.3299, "step": 1308 }, { "epoch": 0.02392746814850019, "grad_norm": 8.712980733390193, "learning_rate": 7.969535415079969e-06, "loss": 18.6535, "step": 1309 }, { "epoch": 0.023945747344946717, "grad_norm": 7.552296265392005, "learning_rate": 7.975628332063977e-06, "loss": 17.997, "step": 1310 }, { "epoch": 0.02396402654139324, "grad_norm": 8.348030354934505, "learning_rate": 7.981721249047982e-06, "loss": 18.0706, "step": 1311 }, { "epoch": 0.023982305737839764, "grad_norm": 8.030722009723371, "learning_rate": 7.987814166031988e-06, "loss": 18.0331, "step": 1312 }, { "epoch": 0.02400058493428629, "grad_norm": 8.09434320280789, "learning_rate": 7.993907083015996e-06, "loss": 18.4149, "step": 1313 }, { "epoch": 0.024018864130732814, "grad_norm": 8.1395626020549, "learning_rate": 8.000000000000001e-06, "loss": 18.3051, "step": 1314 }, { "epoch": 0.024037143327179336, "grad_norm": 8.052697051283777, "learning_rate": 8.006092916984007e-06, "loss": 18.0066, "step": 1315 }, { "epoch": 0.02405542252362586, "grad_norm": 7.117663298312368, "learning_rate": 8.012185833968013e-06, "loss": 17.6996, "step": 1316 }, { "epoch": 0.024073701720072387, "grad_norm": 7.712078554983164, "learning_rate": 8.018278750952019e-06, "loss": 17.8507, "step": 1317 }, { "epoch": 0.02409198091651891, "grad_norm": 8.428532674109862, "learning_rate": 8.024371667936024e-06, "loss": 18.1838, "step": 1318 }, { "epoch": 0.024110260112965434, "grad_norm": 7.979458394015958, "learning_rate": 8.030464584920032e-06, "loss": 17.9062, "step": 1319 }, { "epoch": 0.02412853930941196, "grad_norm": 8.53801088058669, "learning_rate": 8.036557501904038e-06, "loss": 18.3146, "step": 1320 }, { "epoch": 0.024146818505858484, "grad_norm": 8.362033362045155, "learning_rate": 8.042650418888043e-06, "loss": 18.0613, "step": 1321 }, { "epoch": 0.024165097702305006, "grad_norm": 7.526317721252609, "learning_rate": 8.048743335872049e-06, "loss": 17.9492, "step": 1322 }, { "epoch": 0.02418337689875153, "grad_norm": 7.766073534985361, "learning_rate": 8.054836252856055e-06, "loss": 18.0612, "step": 1323 }, { "epoch": 0.024201656095198056, "grad_norm": 7.771387693349303, "learning_rate": 8.060929169840062e-06, "loss": 17.6484, "step": 1324 }, { "epoch": 0.024219935291644578, "grad_norm": 7.451722400646125, "learning_rate": 8.067022086824068e-06, "loss": 17.7878, "step": 1325 }, { "epoch": 0.024238214488091103, "grad_norm": 7.859467102333334, "learning_rate": 8.073115003808074e-06, "loss": 17.9367, "step": 1326 }, { "epoch": 0.02425649368453763, "grad_norm": 7.056189195058994, "learning_rate": 8.07920792079208e-06, "loss": 17.7121, "step": 1327 }, { "epoch": 0.02427477288098415, "grad_norm": 7.959798202564799, "learning_rate": 8.085300837776085e-06, "loss": 18.172, "step": 1328 }, { "epoch": 0.024293052077430675, "grad_norm": 9.09571371326946, "learning_rate": 8.091393754760091e-06, "loss": 18.5352, "step": 1329 }, { "epoch": 0.0243113312738772, "grad_norm": 9.120709054336443, "learning_rate": 8.097486671744099e-06, "loss": 18.2114, "step": 1330 }, { "epoch": 0.024329610470323726, "grad_norm": 9.68771610352554, "learning_rate": 8.103579588728104e-06, "loss": 18.6835, "step": 1331 }, { "epoch": 0.024347889666770248, "grad_norm": 8.156074313348503, "learning_rate": 8.10967250571211e-06, "loss": 18.0789, "step": 1332 }, { "epoch": 0.024366168863216773, "grad_norm": 7.477498680504085, "learning_rate": 8.115765422696118e-06, "loss": 17.538, "step": 1333 }, { "epoch": 0.024384448059663298, "grad_norm": 8.659555772193457, "learning_rate": 8.121858339680122e-06, "loss": 18.72, "step": 1334 }, { "epoch": 0.02440272725610982, "grad_norm": 7.384290456888504, "learning_rate": 8.127951256664129e-06, "loss": 17.8932, "step": 1335 }, { "epoch": 0.024421006452556345, "grad_norm": 7.994339782859948, "learning_rate": 8.134044173648135e-06, "loss": 17.8404, "step": 1336 }, { "epoch": 0.02443928564900287, "grad_norm": 7.180607066628015, "learning_rate": 8.14013709063214e-06, "loss": 17.462, "step": 1337 }, { "epoch": 0.024457564845449396, "grad_norm": 9.032330013580443, "learning_rate": 8.146230007616146e-06, "loss": 18.5001, "step": 1338 }, { "epoch": 0.024475844041895917, "grad_norm": 7.546251182125515, "learning_rate": 8.152322924600154e-06, "loss": 18.0689, "step": 1339 }, { "epoch": 0.024494123238342443, "grad_norm": 8.256495716099927, "learning_rate": 8.158415841584158e-06, "loss": 18.2256, "step": 1340 }, { "epoch": 0.024512402434788968, "grad_norm": 8.446449937335991, "learning_rate": 8.164508758568165e-06, "loss": 18.2305, "step": 1341 }, { "epoch": 0.02453068163123549, "grad_norm": 7.647762757269953, "learning_rate": 8.170601675552171e-06, "loss": 17.9599, "step": 1342 }, { "epoch": 0.024548960827682015, "grad_norm": 9.336594874256123, "learning_rate": 8.176694592536177e-06, "loss": 18.5839, "step": 1343 }, { "epoch": 0.02456724002412854, "grad_norm": 8.458582893372924, "learning_rate": 8.182787509520184e-06, "loss": 18.3154, "step": 1344 }, { "epoch": 0.024585519220575062, "grad_norm": 7.221096422825132, "learning_rate": 8.18888042650419e-06, "loss": 17.6017, "step": 1345 }, { "epoch": 0.024603798417021587, "grad_norm": 8.195830887342922, "learning_rate": 8.194973343488196e-06, "loss": 18.1285, "step": 1346 }, { "epoch": 0.024622077613468112, "grad_norm": 7.271801674324512, "learning_rate": 8.201066260472202e-06, "loss": 17.6622, "step": 1347 }, { "epoch": 0.024640356809914638, "grad_norm": 7.7027468671500054, "learning_rate": 8.207159177456207e-06, "loss": 17.8249, "step": 1348 }, { "epoch": 0.02465863600636116, "grad_norm": 7.360532133484001, "learning_rate": 8.213252094440213e-06, "loss": 17.363, "step": 1349 }, { "epoch": 0.024676915202807684, "grad_norm": 7.690719116058331, "learning_rate": 8.21934501142422e-06, "loss": 17.9928, "step": 1350 }, { "epoch": 0.02469519439925421, "grad_norm": 7.945431554099586, "learning_rate": 8.225437928408226e-06, "loss": 18.0503, "step": 1351 }, { "epoch": 0.02471347359570073, "grad_norm": 7.106647440398628, "learning_rate": 8.231530845392232e-06, "loss": 17.7759, "step": 1352 }, { "epoch": 0.024731752792147257, "grad_norm": 7.751453091434756, "learning_rate": 8.237623762376238e-06, "loss": 17.9833, "step": 1353 }, { "epoch": 0.024750031988593782, "grad_norm": 8.384614172000148, "learning_rate": 8.243716679360244e-06, "loss": 17.792, "step": 1354 }, { "epoch": 0.024768311185040307, "grad_norm": 7.531315262910314, "learning_rate": 8.249809596344251e-06, "loss": 17.7091, "step": 1355 }, { "epoch": 0.02478659038148683, "grad_norm": 7.387069015047229, "learning_rate": 8.255902513328257e-06, "loss": 17.9286, "step": 1356 }, { "epoch": 0.024804869577933354, "grad_norm": 6.941253945063988, "learning_rate": 8.261995430312262e-06, "loss": 17.6529, "step": 1357 }, { "epoch": 0.02482314877437988, "grad_norm": 7.435027662647396, "learning_rate": 8.26808834729627e-06, "loss": 17.9858, "step": 1358 }, { "epoch": 0.0248414279708264, "grad_norm": 7.562534840266457, "learning_rate": 8.274181264280274e-06, "loss": 17.7777, "step": 1359 }, { "epoch": 0.024859707167272926, "grad_norm": 7.631480061899964, "learning_rate": 8.28027418126428e-06, "loss": 17.7919, "step": 1360 }, { "epoch": 0.02487798636371945, "grad_norm": 7.871776632436914, "learning_rate": 8.286367098248287e-06, "loss": 17.9316, "step": 1361 }, { "epoch": 0.024896265560165973, "grad_norm": 6.821665850570862, "learning_rate": 8.292460015232293e-06, "loss": 17.5959, "step": 1362 }, { "epoch": 0.0249145447566125, "grad_norm": 8.166453099333848, "learning_rate": 8.298552932216299e-06, "loss": 17.8598, "step": 1363 }, { "epoch": 0.024932823953059024, "grad_norm": 9.331197609076883, "learning_rate": 8.304645849200306e-06, "loss": 18.1722, "step": 1364 }, { "epoch": 0.02495110314950555, "grad_norm": 8.276872570089795, "learning_rate": 8.310738766184312e-06, "loss": 18.0691, "step": 1365 }, { "epoch": 0.02496938234595207, "grad_norm": 7.337851108553668, "learning_rate": 8.316831683168318e-06, "loss": 17.4398, "step": 1366 }, { "epoch": 0.024987661542398596, "grad_norm": 8.59083660305102, "learning_rate": 8.322924600152323e-06, "loss": 18.1222, "step": 1367 }, { "epoch": 0.02500594073884512, "grad_norm": 9.118795882833696, "learning_rate": 8.32901751713633e-06, "loss": 18.1363, "step": 1368 }, { "epoch": 0.025024219935291643, "grad_norm": 8.371941378867396, "learning_rate": 8.335110434120337e-06, "loss": 18.2809, "step": 1369 }, { "epoch": 0.02504249913173817, "grad_norm": 7.641238868727458, "learning_rate": 8.341203351104342e-06, "loss": 18.1704, "step": 1370 }, { "epoch": 0.025060778328184694, "grad_norm": 9.042468938009703, "learning_rate": 8.347296268088348e-06, "loss": 18.3551, "step": 1371 }, { "epoch": 0.02507905752463122, "grad_norm": 9.506376707817328, "learning_rate": 8.353389185072354e-06, "loss": 18.368, "step": 1372 }, { "epoch": 0.02509733672107774, "grad_norm": 7.770551176630004, "learning_rate": 8.35948210205636e-06, "loss": 17.8866, "step": 1373 }, { "epoch": 0.025115615917524266, "grad_norm": 8.290447583037134, "learning_rate": 8.365575019040365e-06, "loss": 18.5078, "step": 1374 }, { "epoch": 0.02513389511397079, "grad_norm": 9.77723850031326, "learning_rate": 8.371667936024373e-06, "loss": 18.3157, "step": 1375 }, { "epoch": 0.025152174310417313, "grad_norm": 8.296936345506735, "learning_rate": 8.377760853008379e-06, "loss": 18.2976, "step": 1376 }, { "epoch": 0.025170453506863838, "grad_norm": 8.124263286385034, "learning_rate": 8.383853769992384e-06, "loss": 17.8877, "step": 1377 }, { "epoch": 0.025188732703310363, "grad_norm": 6.454023710336849, "learning_rate": 8.38994668697639e-06, "loss": 17.4242, "step": 1378 }, { "epoch": 0.025207011899756885, "grad_norm": 8.9444550386909, "learning_rate": 8.396039603960396e-06, "loss": 18.3085, "step": 1379 }, { "epoch": 0.02522529109620341, "grad_norm": 9.69406386868736, "learning_rate": 8.402132520944403e-06, "loss": 18.3456, "step": 1380 }, { "epoch": 0.025243570292649935, "grad_norm": 8.074897555646956, "learning_rate": 8.40822543792841e-06, "loss": 17.8998, "step": 1381 }, { "epoch": 0.02526184948909646, "grad_norm": 7.596511536029738, "learning_rate": 8.414318354912415e-06, "loss": 17.9059, "step": 1382 }, { "epoch": 0.025280128685542982, "grad_norm": 8.829738986897409, "learning_rate": 8.42041127189642e-06, "loss": 18.412, "step": 1383 }, { "epoch": 0.025298407881989508, "grad_norm": 8.174371688738475, "learning_rate": 8.426504188880426e-06, "loss": 17.8264, "step": 1384 }, { "epoch": 0.025316687078436033, "grad_norm": 7.676731543848915, "learning_rate": 8.432597105864432e-06, "loss": 17.9388, "step": 1385 }, { "epoch": 0.025334966274882555, "grad_norm": 10.247213204871045, "learning_rate": 8.43869002284844e-06, "loss": 18.6418, "step": 1386 }, { "epoch": 0.02535324547132908, "grad_norm": 8.571825620462517, "learning_rate": 8.444782939832445e-06, "loss": 18.4308, "step": 1387 }, { "epoch": 0.025371524667775605, "grad_norm": 7.6782801297365095, "learning_rate": 8.450875856816451e-06, "loss": 18.0378, "step": 1388 }, { "epoch": 0.02538980386422213, "grad_norm": 7.48276588353417, "learning_rate": 8.456968773800459e-06, "loss": 17.8743, "step": 1389 }, { "epoch": 0.025408083060668652, "grad_norm": 8.583044967740078, "learning_rate": 8.463061690784464e-06, "loss": 17.9977, "step": 1390 }, { "epoch": 0.025426362257115177, "grad_norm": 8.824243738339353, "learning_rate": 8.46915460776847e-06, "loss": 18.3367, "step": 1391 }, { "epoch": 0.025444641453561703, "grad_norm": 7.915063711593708, "learning_rate": 8.475247524752476e-06, "loss": 18.2498, "step": 1392 }, { "epoch": 0.025462920650008224, "grad_norm": 7.177028404247519, "learning_rate": 8.481340441736482e-06, "loss": 17.6313, "step": 1393 }, { "epoch": 0.02548119984645475, "grad_norm": 7.274675016163252, "learning_rate": 8.487433358720487e-06, "loss": 17.9967, "step": 1394 }, { "epoch": 0.025499479042901275, "grad_norm": 8.246748890142642, "learning_rate": 8.493526275704495e-06, "loss": 18.0346, "step": 1395 }, { "epoch": 0.025517758239347797, "grad_norm": 8.132276464199869, "learning_rate": 8.4996191926885e-06, "loss": 17.7565, "step": 1396 }, { "epoch": 0.025536037435794322, "grad_norm": 9.160219262127326, "learning_rate": 8.505712109672506e-06, "loss": 18.8931, "step": 1397 }, { "epoch": 0.025554316632240847, "grad_norm": 6.949895499620933, "learning_rate": 8.511805026656512e-06, "loss": 17.6727, "step": 1398 }, { "epoch": 0.025572595828687372, "grad_norm": 8.297904726022859, "learning_rate": 8.517897943640518e-06, "loss": 18.4404, "step": 1399 }, { "epoch": 0.025590875025133894, "grad_norm": 8.146645435656021, "learning_rate": 8.523990860624525e-06, "loss": 17.9447, "step": 1400 }, { "epoch": 0.02560915422158042, "grad_norm": 8.20198363793982, "learning_rate": 8.530083777608531e-06, "loss": 17.9231, "step": 1401 }, { "epoch": 0.025627433418026944, "grad_norm": 7.367031033065404, "learning_rate": 8.536176694592537e-06, "loss": 17.7908, "step": 1402 }, { "epoch": 0.025645712614473466, "grad_norm": 7.993296262853948, "learning_rate": 8.542269611576543e-06, "loss": 18.4607, "step": 1403 }, { "epoch": 0.02566399181091999, "grad_norm": 8.358059257126076, "learning_rate": 8.548362528560548e-06, "loss": 18.1583, "step": 1404 }, { "epoch": 0.025682271007366517, "grad_norm": 7.8085931466035445, "learning_rate": 8.554455445544554e-06, "loss": 17.9273, "step": 1405 }, { "epoch": 0.025700550203813042, "grad_norm": 8.795074595148867, "learning_rate": 8.560548362528562e-06, "loss": 18.2, "step": 1406 }, { "epoch": 0.025718829400259564, "grad_norm": 7.341380886517458, "learning_rate": 8.566641279512567e-06, "loss": 17.7604, "step": 1407 }, { "epoch": 0.02573710859670609, "grad_norm": 6.530914080059693, "learning_rate": 8.572734196496573e-06, "loss": 17.4624, "step": 1408 }, { "epoch": 0.025755387793152614, "grad_norm": 7.879802029965479, "learning_rate": 8.57882711348058e-06, "loss": 17.9349, "step": 1409 }, { "epoch": 0.025773666989599136, "grad_norm": 8.300464636465835, "learning_rate": 8.584920030464585e-06, "loss": 17.7776, "step": 1410 }, { "epoch": 0.02579194618604566, "grad_norm": 8.395711099456939, "learning_rate": 8.591012947448592e-06, "loss": 18.0982, "step": 1411 }, { "epoch": 0.025810225382492186, "grad_norm": 8.474296514569252, "learning_rate": 8.597105864432598e-06, "loss": 18.0764, "step": 1412 }, { "epoch": 0.025828504578938708, "grad_norm": 8.777423423211545, "learning_rate": 8.603198781416604e-06, "loss": 18.3759, "step": 1413 }, { "epoch": 0.025846783775385233, "grad_norm": 7.36446733094962, "learning_rate": 8.609291698400611e-06, "loss": 17.9045, "step": 1414 }, { "epoch": 0.02586506297183176, "grad_norm": 9.504182858462068, "learning_rate": 8.615384615384617e-06, "loss": 18.6237, "step": 1415 }, { "epoch": 0.025883342168278284, "grad_norm": 9.063770990698762, "learning_rate": 8.621477532368621e-06, "loss": 18.4992, "step": 1416 }, { "epoch": 0.025901621364724806, "grad_norm": 7.996481775653979, "learning_rate": 8.627570449352628e-06, "loss": 18.1038, "step": 1417 }, { "epoch": 0.02591990056117133, "grad_norm": 8.596649768941777, "learning_rate": 8.633663366336634e-06, "loss": 18.3631, "step": 1418 }, { "epoch": 0.025938179757617856, "grad_norm": 7.646225022339392, "learning_rate": 8.63975628332064e-06, "loss": 17.8257, "step": 1419 }, { "epoch": 0.025956458954064378, "grad_norm": 8.799682429765236, "learning_rate": 8.645849200304647e-06, "loss": 18.4929, "step": 1420 }, { "epoch": 0.025974738150510903, "grad_norm": 7.632664788232884, "learning_rate": 8.651942117288653e-06, "loss": 17.7875, "step": 1421 }, { "epoch": 0.02599301734695743, "grad_norm": 6.492463194039088, "learning_rate": 8.658035034272659e-06, "loss": 17.3591, "step": 1422 }, { "epoch": 0.026011296543403954, "grad_norm": 8.51316418431203, "learning_rate": 8.664127951256665e-06, "loss": 18.2339, "step": 1423 }, { "epoch": 0.026029575739850475, "grad_norm": 8.855657011481085, "learning_rate": 8.67022086824067e-06, "loss": 18.3553, "step": 1424 }, { "epoch": 0.026047854936297, "grad_norm": 6.309419290605549, "learning_rate": 8.676313785224678e-06, "loss": 17.1397, "step": 1425 }, { "epoch": 0.026066134132743526, "grad_norm": 7.753423110867713, "learning_rate": 8.682406702208684e-06, "loss": 17.7617, "step": 1426 }, { "epoch": 0.026084413329190047, "grad_norm": 7.523806497889765, "learning_rate": 8.68849961919269e-06, "loss": 17.7815, "step": 1427 }, { "epoch": 0.026102692525636573, "grad_norm": 7.607409503407267, "learning_rate": 8.694592536176695e-06, "loss": 17.8936, "step": 1428 }, { "epoch": 0.026120971722083098, "grad_norm": 6.895009237836748, "learning_rate": 8.7006854531607e-06, "loss": 17.5724, "step": 1429 }, { "epoch": 0.02613925091852962, "grad_norm": 8.873927842757839, "learning_rate": 8.706778370144707e-06, "loss": 18.2576, "step": 1430 }, { "epoch": 0.026157530114976145, "grad_norm": 7.546760724787882, "learning_rate": 8.712871287128714e-06, "loss": 17.628, "step": 1431 }, { "epoch": 0.02617580931142267, "grad_norm": 7.9874520930562705, "learning_rate": 8.71896420411272e-06, "loss": 17.8476, "step": 1432 }, { "epoch": 0.026194088507869195, "grad_norm": 8.739040378905148, "learning_rate": 8.725057121096726e-06, "loss": 18.1888, "step": 1433 }, { "epoch": 0.026212367704315717, "grad_norm": 8.135508969320531, "learning_rate": 8.731150038080733e-06, "loss": 18.3601, "step": 1434 }, { "epoch": 0.026230646900762242, "grad_norm": 7.964696704255999, "learning_rate": 8.737242955064737e-06, "loss": 18.1727, "step": 1435 }, { "epoch": 0.026248926097208768, "grad_norm": 8.904126796062632, "learning_rate": 8.743335872048745e-06, "loss": 18.2646, "step": 1436 }, { "epoch": 0.02626720529365529, "grad_norm": 8.195402872333565, "learning_rate": 8.74942878903275e-06, "loss": 18.0862, "step": 1437 }, { "epoch": 0.026285484490101815, "grad_norm": 6.965868215006295, "learning_rate": 8.755521706016756e-06, "loss": 17.6407, "step": 1438 }, { "epoch": 0.02630376368654834, "grad_norm": 7.587462863152006, "learning_rate": 8.761614623000762e-06, "loss": 17.8189, "step": 1439 }, { "epoch": 0.026322042882994865, "grad_norm": 9.130781072940874, "learning_rate": 8.76770753998477e-06, "loss": 18.4033, "step": 1440 }, { "epoch": 0.026340322079441387, "grad_norm": 6.468831354069997, "learning_rate": 8.773800456968773e-06, "loss": 17.3407, "step": 1441 }, { "epoch": 0.026358601275887912, "grad_norm": 9.156269451296055, "learning_rate": 8.77989337395278e-06, "loss": 18.4001, "step": 1442 }, { "epoch": 0.026376880472334437, "grad_norm": 7.183314914355377, "learning_rate": 8.785986290936787e-06, "loss": 17.9738, "step": 1443 }, { "epoch": 0.02639515966878096, "grad_norm": 8.599106529459707, "learning_rate": 8.792079207920792e-06, "loss": 18.1737, "step": 1444 }, { "epoch": 0.026413438865227484, "grad_norm": 7.942865142973476, "learning_rate": 8.7981721249048e-06, "loss": 18.0981, "step": 1445 }, { "epoch": 0.02643171806167401, "grad_norm": 8.542916435346985, "learning_rate": 8.804265041888805e-06, "loss": 18.4702, "step": 1446 }, { "epoch": 0.02644999725812053, "grad_norm": 7.9391169682901515, "learning_rate": 8.810357958872811e-06, "loss": 17.8375, "step": 1447 }, { "epoch": 0.026468276454567057, "grad_norm": 7.973990058925455, "learning_rate": 8.816450875856817e-06, "loss": 18.0898, "step": 1448 }, { "epoch": 0.026486555651013582, "grad_norm": 8.14941052802256, "learning_rate": 8.822543792840823e-06, "loss": 18.5443, "step": 1449 }, { "epoch": 0.026504834847460107, "grad_norm": 8.351592252513374, "learning_rate": 8.828636709824829e-06, "loss": 18.0385, "step": 1450 }, { "epoch": 0.02652311404390663, "grad_norm": 12.065678884127646, "learning_rate": 8.834729626808836e-06, "loss": 17.76, "step": 1451 }, { "epoch": 0.026541393240353154, "grad_norm": 7.990968201402576, "learning_rate": 8.840822543792842e-06, "loss": 18.057, "step": 1452 }, { "epoch": 0.02655967243679968, "grad_norm": 7.870114463708758, "learning_rate": 8.846915460776847e-06, "loss": 17.5919, "step": 1453 }, { "epoch": 0.0265779516332462, "grad_norm": 8.37421222401054, "learning_rate": 8.853008377760853e-06, "loss": 17.9004, "step": 1454 }, { "epoch": 0.026596230829692726, "grad_norm": 7.512862929896719, "learning_rate": 8.859101294744859e-06, "loss": 17.7529, "step": 1455 }, { "epoch": 0.02661451002613925, "grad_norm": 8.277450520977178, "learning_rate": 8.865194211728866e-06, "loss": 18.1617, "step": 1456 }, { "epoch": 0.026632789222585777, "grad_norm": 7.8667995056229145, "learning_rate": 8.871287128712872e-06, "loss": 17.8809, "step": 1457 }, { "epoch": 0.0266510684190323, "grad_norm": 7.613928529760875, "learning_rate": 8.877380045696878e-06, "loss": 17.5719, "step": 1458 }, { "epoch": 0.026669347615478824, "grad_norm": 8.035004828733218, "learning_rate": 8.883472962680885e-06, "loss": 18.0841, "step": 1459 }, { "epoch": 0.02668762681192535, "grad_norm": 9.677789943846687, "learning_rate": 8.88956587966489e-06, "loss": 18.8802, "step": 1460 }, { "epoch": 0.02670590600837187, "grad_norm": 9.55373517160617, "learning_rate": 8.895658796648895e-06, "loss": 18.8335, "step": 1461 }, { "epoch": 0.026724185204818396, "grad_norm": 7.892072339341589, "learning_rate": 8.901751713632903e-06, "loss": 17.7087, "step": 1462 }, { "epoch": 0.02674246440126492, "grad_norm": 7.999818442281938, "learning_rate": 8.907844630616908e-06, "loss": 18.1386, "step": 1463 }, { "epoch": 0.026760743597711443, "grad_norm": 8.75483316003142, "learning_rate": 8.913937547600914e-06, "loss": 18.1697, "step": 1464 }, { "epoch": 0.026779022794157968, "grad_norm": 7.675223165596685, "learning_rate": 8.920030464584922e-06, "loss": 17.6498, "step": 1465 }, { "epoch": 0.026797301990604493, "grad_norm": 8.578202310137655, "learning_rate": 8.926123381568926e-06, "loss": 18.1871, "step": 1466 }, { "epoch": 0.02681558118705102, "grad_norm": 8.097931941837253, "learning_rate": 8.932216298552933e-06, "loss": 17.8763, "step": 1467 }, { "epoch": 0.02683386038349754, "grad_norm": 7.425964756487236, "learning_rate": 8.938309215536939e-06, "loss": 17.6646, "step": 1468 }, { "epoch": 0.026852139579944066, "grad_norm": 7.352889034745282, "learning_rate": 8.944402132520945e-06, "loss": 17.8098, "step": 1469 }, { "epoch": 0.02687041877639059, "grad_norm": 8.094022459132429, "learning_rate": 8.950495049504952e-06, "loss": 18.0239, "step": 1470 }, { "epoch": 0.026888697972837113, "grad_norm": 7.825426674849305, "learning_rate": 8.956587966488958e-06, "loss": 17.9517, "step": 1471 }, { "epoch": 0.026906977169283638, "grad_norm": 7.934927280932549, "learning_rate": 8.962680883472964e-06, "loss": 18.3729, "step": 1472 }, { "epoch": 0.026925256365730163, "grad_norm": 9.164940864472438, "learning_rate": 8.96877380045697e-06, "loss": 18.8277, "step": 1473 }, { "epoch": 0.026943535562176688, "grad_norm": 8.608148379267885, "learning_rate": 8.974866717440975e-06, "loss": 17.7031, "step": 1474 }, { "epoch": 0.02696181475862321, "grad_norm": 7.558928448476973, "learning_rate": 8.980959634424981e-06, "loss": 17.8896, "step": 1475 }, { "epoch": 0.026980093955069735, "grad_norm": 7.3185664527139265, "learning_rate": 8.987052551408988e-06, "loss": 17.7327, "step": 1476 }, { "epoch": 0.02699837315151626, "grad_norm": 9.086940154591609, "learning_rate": 8.993145468392994e-06, "loss": 18.6602, "step": 1477 }, { "epoch": 0.027016652347962782, "grad_norm": 8.199731072144623, "learning_rate": 8.999238385377e-06, "loss": 18.0815, "step": 1478 }, { "epoch": 0.027034931544409307, "grad_norm": 8.692421826783356, "learning_rate": 9.005331302361006e-06, "loss": 18.5877, "step": 1479 }, { "epoch": 0.027053210740855833, "grad_norm": 8.5464561802531, "learning_rate": 9.011424219345011e-06, "loss": 18.249, "step": 1480 }, { "epoch": 0.027071489937302358, "grad_norm": 8.391435606391324, "learning_rate": 9.017517136329019e-06, "loss": 18.234, "step": 1481 }, { "epoch": 0.02708976913374888, "grad_norm": 8.28855798861418, "learning_rate": 9.023610053313025e-06, "loss": 18.1964, "step": 1482 }, { "epoch": 0.027108048330195405, "grad_norm": 7.522206878997334, "learning_rate": 9.02970297029703e-06, "loss": 18.1083, "step": 1483 }, { "epoch": 0.02712632752664193, "grad_norm": 7.6996569698720165, "learning_rate": 9.035795887281036e-06, "loss": 17.7058, "step": 1484 }, { "epoch": 0.027144606723088452, "grad_norm": 7.770670567376332, "learning_rate": 9.041888804265042e-06, "loss": 17.6966, "step": 1485 }, { "epoch": 0.027162885919534977, "grad_norm": 8.512304497126742, "learning_rate": 9.047981721249048e-06, "loss": 18.2586, "step": 1486 }, { "epoch": 0.027181165115981502, "grad_norm": 7.627840662620894, "learning_rate": 9.054074638233055e-06, "loss": 18.1694, "step": 1487 }, { "epoch": 0.027199444312428024, "grad_norm": 7.91230120789127, "learning_rate": 9.060167555217061e-06, "loss": 17.8939, "step": 1488 }, { "epoch": 0.02721772350887455, "grad_norm": 8.156236620458687, "learning_rate": 9.066260472201067e-06, "loss": 18.2974, "step": 1489 }, { "epoch": 0.027236002705321075, "grad_norm": 7.951072967239382, "learning_rate": 9.072353389185074e-06, "loss": 17.9947, "step": 1490 }, { "epoch": 0.0272542819017676, "grad_norm": 6.436830455155051, "learning_rate": 9.07844630616908e-06, "loss": 17.38, "step": 1491 }, { "epoch": 0.02727256109821412, "grad_norm": 8.842741546278877, "learning_rate": 9.084539223153086e-06, "loss": 18.5525, "step": 1492 }, { "epoch": 0.027290840294660647, "grad_norm": 8.541348994364565, "learning_rate": 9.090632140137091e-06, "loss": 18.2744, "step": 1493 }, { "epoch": 0.027309119491107172, "grad_norm": 7.677758366809842, "learning_rate": 9.096725057121097e-06, "loss": 17.9632, "step": 1494 }, { "epoch": 0.027327398687553694, "grad_norm": 11.031595550811504, "learning_rate": 9.102817974105103e-06, "loss": 18.1301, "step": 1495 }, { "epoch": 0.02734567788400022, "grad_norm": 7.6316264895902615, "learning_rate": 9.10891089108911e-06, "loss": 17.5932, "step": 1496 }, { "epoch": 0.027363957080446744, "grad_norm": 7.942322461509622, "learning_rate": 9.115003808073116e-06, "loss": 17.8623, "step": 1497 }, { "epoch": 0.02738223627689327, "grad_norm": 8.987789022179895, "learning_rate": 9.121096725057122e-06, "loss": 18.4788, "step": 1498 }, { "epoch": 0.02740051547333979, "grad_norm": 7.202629039212628, "learning_rate": 9.127189642041128e-06, "loss": 17.7036, "step": 1499 }, { "epoch": 0.027418794669786316, "grad_norm": 8.718231529858707, "learning_rate": 9.133282559025133e-06, "loss": 18.4281, "step": 1500 }, { "epoch": 0.02743707386623284, "grad_norm": 8.224690865957287, "learning_rate": 9.13937547600914e-06, "loss": 17.9646, "step": 1501 }, { "epoch": 0.027455353062679363, "grad_norm": 8.103325679036537, "learning_rate": 9.145468392993147e-06, "loss": 17.8065, "step": 1502 }, { "epoch": 0.02747363225912589, "grad_norm": 7.595202129353247, "learning_rate": 9.151561309977152e-06, "loss": 17.7352, "step": 1503 }, { "epoch": 0.027491911455572414, "grad_norm": 9.137774770007482, "learning_rate": 9.157654226961158e-06, "loss": 18.267, "step": 1504 }, { "epoch": 0.027510190652018936, "grad_norm": 8.226653766856433, "learning_rate": 9.163747143945164e-06, "loss": 18.1586, "step": 1505 }, { "epoch": 0.02752846984846546, "grad_norm": 8.250869806845207, "learning_rate": 9.16984006092917e-06, "loss": 18.1652, "step": 1506 }, { "epoch": 0.027546749044911986, "grad_norm": 7.819873303002308, "learning_rate": 9.175932977913177e-06, "loss": 17.9536, "step": 1507 }, { "epoch": 0.02756502824135851, "grad_norm": 7.971809249675537, "learning_rate": 9.182025894897183e-06, "loss": 17.8049, "step": 1508 }, { "epoch": 0.027583307437805033, "grad_norm": 7.868639339632311, "learning_rate": 9.188118811881189e-06, "loss": 18.0095, "step": 1509 }, { "epoch": 0.02760158663425156, "grad_norm": 8.046255160243186, "learning_rate": 9.194211728865194e-06, "loss": 18.2376, "step": 1510 }, { "epoch": 0.027619865830698084, "grad_norm": 8.19271834161202, "learning_rate": 9.2003046458492e-06, "loss": 18.1397, "step": 1511 }, { "epoch": 0.027638145027144605, "grad_norm": 7.972401688363723, "learning_rate": 9.206397562833208e-06, "loss": 18.1718, "step": 1512 }, { "epoch": 0.02765642422359113, "grad_norm": 7.806246299530137, "learning_rate": 9.212490479817213e-06, "loss": 17.9699, "step": 1513 }, { "epoch": 0.027674703420037656, "grad_norm": 6.966609041650007, "learning_rate": 9.218583396801219e-06, "loss": 17.8136, "step": 1514 }, { "epoch": 0.02769298261648418, "grad_norm": 7.395969299114421, "learning_rate": 9.224676313785227e-06, "loss": 17.9745, "step": 1515 }, { "epoch": 0.027711261812930703, "grad_norm": 8.108307050861727, "learning_rate": 9.230769230769232e-06, "loss": 18.1577, "step": 1516 }, { "epoch": 0.027729541009377228, "grad_norm": 8.441209289907926, "learning_rate": 9.236862147753236e-06, "loss": 18.3238, "step": 1517 }, { "epoch": 0.027747820205823753, "grad_norm": 7.1675044271113855, "learning_rate": 9.242955064737244e-06, "loss": 17.5438, "step": 1518 }, { "epoch": 0.027766099402270275, "grad_norm": 7.356867118981828, "learning_rate": 9.24904798172125e-06, "loss": 17.5816, "step": 1519 }, { "epoch": 0.0277843785987168, "grad_norm": 8.201443433514227, "learning_rate": 9.255140898705255e-06, "loss": 18.529, "step": 1520 }, { "epoch": 0.027802657795163326, "grad_norm": 8.642047252829247, "learning_rate": 9.261233815689263e-06, "loss": 18.0311, "step": 1521 }, { "epoch": 0.027820936991609847, "grad_norm": 9.120647593902877, "learning_rate": 9.267326732673269e-06, "loss": 18.3459, "step": 1522 }, { "epoch": 0.027839216188056373, "grad_norm": 8.22577861240296, "learning_rate": 9.273419649657274e-06, "loss": 18.0776, "step": 1523 }, { "epoch": 0.027857495384502898, "grad_norm": 8.844626158932435, "learning_rate": 9.27951256664128e-06, "loss": 18.4426, "step": 1524 }, { "epoch": 0.027875774580949423, "grad_norm": 6.779648817912882, "learning_rate": 9.285605483625286e-06, "loss": 17.3176, "step": 1525 }, { "epoch": 0.027894053777395945, "grad_norm": 8.492963085283831, "learning_rate": 9.291698400609293e-06, "loss": 18.0535, "step": 1526 }, { "epoch": 0.02791233297384247, "grad_norm": 7.247862231746354, "learning_rate": 9.297791317593299e-06, "loss": 17.7189, "step": 1527 }, { "epoch": 0.027930612170288995, "grad_norm": 9.165028186905502, "learning_rate": 9.303884234577305e-06, "loss": 18.7618, "step": 1528 }, { "epoch": 0.027948891366735517, "grad_norm": 7.957554243726399, "learning_rate": 9.30997715156131e-06, "loss": 18.3054, "step": 1529 }, { "epoch": 0.027967170563182042, "grad_norm": 8.85634231870708, "learning_rate": 9.316070068545316e-06, "loss": 18.8453, "step": 1530 }, { "epoch": 0.027985449759628567, "grad_norm": 6.072056271219344, "learning_rate": 9.322162985529322e-06, "loss": 17.3045, "step": 1531 }, { "epoch": 0.028003728956075093, "grad_norm": 7.304157703076281, "learning_rate": 9.32825590251333e-06, "loss": 17.8859, "step": 1532 }, { "epoch": 0.028022008152521614, "grad_norm": 8.566346981570362, "learning_rate": 9.334348819497335e-06, "loss": 18.2739, "step": 1533 }, { "epoch": 0.02804028734896814, "grad_norm": 8.863402700936351, "learning_rate": 9.340441736481341e-06, "loss": 18.1267, "step": 1534 }, { "epoch": 0.028058566545414665, "grad_norm": 7.032337434310726, "learning_rate": 9.346534653465348e-06, "loss": 17.5745, "step": 1535 }, { "epoch": 0.028076845741861187, "grad_norm": 7.905856887685362, "learning_rate": 9.352627570449353e-06, "loss": 18.1291, "step": 1536 }, { "epoch": 0.028095124938307712, "grad_norm": 6.83724131858278, "learning_rate": 9.35872048743336e-06, "loss": 17.6759, "step": 1537 }, { "epoch": 0.028113404134754237, "grad_norm": 7.371139363257559, "learning_rate": 9.364813404417366e-06, "loss": 17.6574, "step": 1538 }, { "epoch": 0.02813168333120076, "grad_norm": 7.556429626574926, "learning_rate": 9.370906321401372e-06, "loss": 17.9372, "step": 1539 }, { "epoch": 0.028149962527647284, "grad_norm": 8.229899918476479, "learning_rate": 9.376999238385377e-06, "loss": 18.3724, "step": 1540 }, { "epoch": 0.02816824172409381, "grad_norm": 6.980564490492054, "learning_rate": 9.383092155369385e-06, "loss": 17.4149, "step": 1541 }, { "epoch": 0.028186520920540335, "grad_norm": 8.549941551251717, "learning_rate": 9.389185072353389e-06, "loss": 18.3236, "step": 1542 }, { "epoch": 0.028204800116986856, "grad_norm": 8.117965248196436, "learning_rate": 9.395277989337396e-06, "loss": 18.2625, "step": 1543 }, { "epoch": 0.02822307931343338, "grad_norm": 8.087251903485502, "learning_rate": 9.401370906321402e-06, "loss": 17.9754, "step": 1544 }, { "epoch": 0.028241358509879907, "grad_norm": 8.109262754282136, "learning_rate": 9.407463823305408e-06, "loss": 17.9735, "step": 1545 }, { "epoch": 0.02825963770632643, "grad_norm": 8.449701368675408, "learning_rate": 9.413556740289415e-06, "loss": 18.3141, "step": 1546 }, { "epoch": 0.028277916902772954, "grad_norm": 8.021681692913548, "learning_rate": 9.419649657273421e-06, "loss": 17.7494, "step": 1547 }, { "epoch": 0.02829619609921948, "grad_norm": 8.654610764330709, "learning_rate": 9.425742574257427e-06, "loss": 18.3164, "step": 1548 }, { "epoch": 0.028314475295666004, "grad_norm": 8.934826474472013, "learning_rate": 9.431835491241433e-06, "loss": 18.5265, "step": 1549 }, { "epoch": 0.028332754492112526, "grad_norm": 6.615085962692111, "learning_rate": 9.437928408225438e-06, "loss": 17.4637, "step": 1550 }, { "epoch": 0.02835103368855905, "grad_norm": 7.193084585584398, "learning_rate": 9.444021325209444e-06, "loss": 17.8196, "step": 1551 }, { "epoch": 0.028369312885005576, "grad_norm": 7.5635242082002705, "learning_rate": 9.450114242193451e-06, "loss": 17.8508, "step": 1552 }, { "epoch": 0.028387592081452098, "grad_norm": 8.727849038197348, "learning_rate": 9.456207159177457e-06, "loss": 17.8505, "step": 1553 }, { "epoch": 0.028405871277898623, "grad_norm": 8.546991746139607, "learning_rate": 9.462300076161463e-06, "loss": 18.2066, "step": 1554 }, { "epoch": 0.02842415047434515, "grad_norm": 8.434184761377928, "learning_rate": 9.468392993145469e-06, "loss": 18.3708, "step": 1555 }, { "epoch": 0.02844242967079167, "grad_norm": 7.600295088603328, "learning_rate": 9.474485910129475e-06, "loss": 17.712, "step": 1556 }, { "epoch": 0.028460708867238196, "grad_norm": 8.812810635554333, "learning_rate": 9.480578827113482e-06, "loss": 18.6113, "step": 1557 }, { "epoch": 0.02847898806368472, "grad_norm": 7.1772233524126765, "learning_rate": 9.486671744097488e-06, "loss": 17.3414, "step": 1558 }, { "epoch": 0.028497267260131246, "grad_norm": 8.008227503749223, "learning_rate": 9.492764661081493e-06, "loss": 17.7675, "step": 1559 }, { "epoch": 0.028515546456577768, "grad_norm": 7.615853746598634, "learning_rate": 9.498857578065501e-06, "loss": 17.6669, "step": 1560 }, { "epoch": 0.028533825653024293, "grad_norm": 9.332984012169115, "learning_rate": 9.504950495049505e-06, "loss": 18.7201, "step": 1561 }, { "epoch": 0.02855210484947082, "grad_norm": 6.923109174353397, "learning_rate": 9.51104341203351e-06, "loss": 17.9097, "step": 1562 }, { "epoch": 0.02857038404591734, "grad_norm": 7.931663485509248, "learning_rate": 9.517136329017518e-06, "loss": 17.7847, "step": 1563 }, { "epoch": 0.028588663242363865, "grad_norm": 7.60567237482634, "learning_rate": 9.523229246001524e-06, "loss": 17.9421, "step": 1564 }, { "epoch": 0.02860694243881039, "grad_norm": 7.780004874882818, "learning_rate": 9.52932216298553e-06, "loss": 17.8349, "step": 1565 }, { "epoch": 0.028625221635256916, "grad_norm": 7.848484706525409, "learning_rate": 9.535415079969537e-06, "loss": 17.9837, "step": 1566 }, { "epoch": 0.028643500831703438, "grad_norm": 9.128305637632943, "learning_rate": 9.541507996953541e-06, "loss": 18.6661, "step": 1567 }, { "epoch": 0.028661780028149963, "grad_norm": 8.250098244391708, "learning_rate": 9.547600913937549e-06, "loss": 18.1463, "step": 1568 }, { "epoch": 0.028680059224596488, "grad_norm": 7.282911330342466, "learning_rate": 9.553693830921554e-06, "loss": 17.9766, "step": 1569 }, { "epoch": 0.02869833842104301, "grad_norm": 8.728107028323366, "learning_rate": 9.55978674790556e-06, "loss": 18.4801, "step": 1570 }, { "epoch": 0.028716617617489535, "grad_norm": 7.322854026891061, "learning_rate": 9.565879664889568e-06, "loss": 17.4926, "step": 1571 }, { "epoch": 0.02873489681393606, "grad_norm": 9.024688004243888, "learning_rate": 9.571972581873573e-06, "loss": 18.507, "step": 1572 }, { "epoch": 0.028753176010382582, "grad_norm": 8.72362500062873, "learning_rate": 9.578065498857577e-06, "loss": 18.3873, "step": 1573 }, { "epoch": 0.028771455206829107, "grad_norm": 7.608476714455864, "learning_rate": 9.584158415841585e-06, "loss": 17.8651, "step": 1574 }, { "epoch": 0.028789734403275632, "grad_norm": 7.4612096250296105, "learning_rate": 9.59025133282559e-06, "loss": 17.8404, "step": 1575 }, { "epoch": 0.028808013599722158, "grad_norm": 7.472152034411249, "learning_rate": 9.596344249809596e-06, "loss": 17.8312, "step": 1576 }, { "epoch": 0.02882629279616868, "grad_norm": 8.903692153904508, "learning_rate": 9.602437166793604e-06, "loss": 18.5229, "step": 1577 }, { "epoch": 0.028844571992615205, "grad_norm": 7.796879083278068, "learning_rate": 9.60853008377761e-06, "loss": 17.9882, "step": 1578 }, { "epoch": 0.02886285118906173, "grad_norm": 7.756058097712065, "learning_rate": 9.614623000761615e-06, "loss": 17.9424, "step": 1579 }, { "epoch": 0.02888113038550825, "grad_norm": 9.72247257932452, "learning_rate": 9.620715917745621e-06, "loss": 19.3409, "step": 1580 }, { "epoch": 0.028899409581954777, "grad_norm": 7.420575822299361, "learning_rate": 9.626808834729627e-06, "loss": 17.5876, "step": 1581 }, { "epoch": 0.028917688778401302, "grad_norm": 8.134881444053162, "learning_rate": 9.632901751713634e-06, "loss": 17.9443, "step": 1582 }, { "epoch": 0.028935967974847827, "grad_norm": 8.170014718000967, "learning_rate": 9.63899466869764e-06, "loss": 18.1135, "step": 1583 }, { "epoch": 0.02895424717129435, "grad_norm": 7.727004414989496, "learning_rate": 9.645087585681646e-06, "loss": 18.0018, "step": 1584 }, { "epoch": 0.028972526367740874, "grad_norm": 8.724147523267623, "learning_rate": 9.651180502665652e-06, "loss": 18.3142, "step": 1585 }, { "epoch": 0.0289908055641874, "grad_norm": 6.320770195610164, "learning_rate": 9.657273419649657e-06, "loss": 17.0664, "step": 1586 }, { "epoch": 0.02900908476063392, "grad_norm": 7.725570724377613, "learning_rate": 9.663366336633663e-06, "loss": 18.0783, "step": 1587 }, { "epoch": 0.029027363957080447, "grad_norm": 7.694211438039342, "learning_rate": 9.66945925361767e-06, "loss": 17.6469, "step": 1588 }, { "epoch": 0.029045643153526972, "grad_norm": 7.70476316150784, "learning_rate": 9.675552170601676e-06, "loss": 17.7812, "step": 1589 }, { "epoch": 0.029063922349973494, "grad_norm": 8.563173823510006, "learning_rate": 9.681645087585682e-06, "loss": 18.2982, "step": 1590 }, { "epoch": 0.02908220154642002, "grad_norm": 7.4474122934320635, "learning_rate": 9.68773800456969e-06, "loss": 17.896, "step": 1591 }, { "epoch": 0.029100480742866544, "grad_norm": 8.433496131474252, "learning_rate": 9.693830921553694e-06, "loss": 18.0938, "step": 1592 }, { "epoch": 0.02911875993931307, "grad_norm": 7.952162694222141, "learning_rate": 9.699923838537701e-06, "loss": 17.8548, "step": 1593 }, { "epoch": 0.02913703913575959, "grad_norm": 9.547958041113377, "learning_rate": 9.706016755521707e-06, "loss": 18.7101, "step": 1594 }, { "epoch": 0.029155318332206116, "grad_norm": 7.35777251789467, "learning_rate": 9.712109672505713e-06, "loss": 17.6883, "step": 1595 }, { "epoch": 0.02917359752865264, "grad_norm": 8.204778394839916, "learning_rate": 9.718202589489718e-06, "loss": 18.4319, "step": 1596 }, { "epoch": 0.029191876725099163, "grad_norm": 8.288620463582449, "learning_rate": 9.724295506473726e-06, "loss": 18.2228, "step": 1597 }, { "epoch": 0.02921015592154569, "grad_norm": 7.888180051404007, "learning_rate": 9.730388423457732e-06, "loss": 17.9801, "step": 1598 }, { "epoch": 0.029228435117992214, "grad_norm": 7.935515095892514, "learning_rate": 9.736481340441737e-06, "loss": 18.1441, "step": 1599 }, { "epoch": 0.02924671431443874, "grad_norm": 6.5810518907819615, "learning_rate": 9.742574257425743e-06, "loss": 17.487, "step": 1600 }, { "epoch": 0.02926499351088526, "grad_norm": 8.057191057617796, "learning_rate": 9.748667174409749e-06, "loss": 18.5901, "step": 1601 }, { "epoch": 0.029283272707331786, "grad_norm": 7.454482283214667, "learning_rate": 9.754760091393756e-06, "loss": 17.7255, "step": 1602 }, { "epoch": 0.02930155190377831, "grad_norm": 7.1869910327073825, "learning_rate": 9.760853008377762e-06, "loss": 18.0372, "step": 1603 }, { "epoch": 0.029319831100224833, "grad_norm": 8.727975453617983, "learning_rate": 9.766945925361768e-06, "loss": 18.4149, "step": 1604 }, { "epoch": 0.029338110296671358, "grad_norm": 7.777877274694171, "learning_rate": 9.773038842345774e-06, "loss": 17.7231, "step": 1605 }, { "epoch": 0.029356389493117883, "grad_norm": 7.513074478390638, "learning_rate": 9.77913175932978e-06, "loss": 17.436, "step": 1606 }, { "epoch": 0.029374668689564405, "grad_norm": 7.792920525633787, "learning_rate": 9.785224676313785e-06, "loss": 18.1177, "step": 1607 }, { "epoch": 0.02939294788601093, "grad_norm": 6.898953111016352, "learning_rate": 9.791317593297793e-06, "loss": 17.8014, "step": 1608 }, { "epoch": 0.029411227082457456, "grad_norm": 8.330048553409595, "learning_rate": 9.797410510281798e-06, "loss": 18.1343, "step": 1609 }, { "epoch": 0.02942950627890398, "grad_norm": 8.156161288497497, "learning_rate": 9.803503427265804e-06, "loss": 18.1073, "step": 1610 }, { "epoch": 0.029447785475350503, "grad_norm": 7.707355244288337, "learning_rate": 9.80959634424981e-06, "loss": 17.8602, "step": 1611 }, { "epoch": 0.029466064671797028, "grad_norm": 6.982593291457571, "learning_rate": 9.815689261233816e-06, "loss": 17.3715, "step": 1612 }, { "epoch": 0.029484343868243553, "grad_norm": 7.6136798692364644, "learning_rate": 9.821782178217823e-06, "loss": 17.6577, "step": 1613 }, { "epoch": 0.029502623064690075, "grad_norm": 8.831629567786118, "learning_rate": 9.827875095201829e-06, "loss": 18.5191, "step": 1614 }, { "epoch": 0.0295209022611366, "grad_norm": 8.241696213267609, "learning_rate": 9.833968012185835e-06, "loss": 17.9193, "step": 1615 }, { "epoch": 0.029539181457583125, "grad_norm": 8.513703873129042, "learning_rate": 9.840060929169842e-06, "loss": 18.2604, "step": 1616 }, { "epoch": 0.02955746065402965, "grad_norm": 8.166623481874485, "learning_rate": 9.846153846153848e-06, "loss": 18.3146, "step": 1617 }, { "epoch": 0.029575739850476172, "grad_norm": 9.591123009193785, "learning_rate": 9.852246763137852e-06, "loss": 18.0625, "step": 1618 }, { "epoch": 0.029594019046922698, "grad_norm": 7.342378307838155, "learning_rate": 9.85833968012186e-06, "loss": 17.7567, "step": 1619 }, { "epoch": 0.029612298243369223, "grad_norm": 7.711579400603312, "learning_rate": 9.864432597105865e-06, "loss": 17.5905, "step": 1620 }, { "epoch": 0.029630577439815745, "grad_norm": 8.719640722806627, "learning_rate": 9.87052551408987e-06, "loss": 18.6454, "step": 1621 }, { "epoch": 0.02964885663626227, "grad_norm": 8.310001366312644, "learning_rate": 9.876618431073878e-06, "loss": 18.2321, "step": 1622 }, { "epoch": 0.029667135832708795, "grad_norm": 8.298224474526126, "learning_rate": 9.882711348057884e-06, "loss": 18.2345, "step": 1623 }, { "epoch": 0.029685415029155317, "grad_norm": 7.868201262455768, "learning_rate": 9.88880426504189e-06, "loss": 18.2417, "step": 1624 }, { "epoch": 0.029703694225601842, "grad_norm": 7.868651271556509, "learning_rate": 9.894897182025896e-06, "loss": 18.1694, "step": 1625 }, { "epoch": 0.029721973422048367, "grad_norm": 7.213672615749309, "learning_rate": 9.900990099009901e-06, "loss": 17.8703, "step": 1626 }, { "epoch": 0.029740252618494892, "grad_norm": 9.445225101439867, "learning_rate": 9.907083015993909e-06, "loss": 18.9733, "step": 1627 }, { "epoch": 0.029758531814941414, "grad_norm": 8.085466053201765, "learning_rate": 9.913175932977915e-06, "loss": 18.1514, "step": 1628 }, { "epoch": 0.02977681101138794, "grad_norm": 8.670797616389512, "learning_rate": 9.91926884996192e-06, "loss": 18.1964, "step": 1629 }, { "epoch": 0.029795090207834465, "grad_norm": 9.299323756435573, "learning_rate": 9.925361766945926e-06, "loss": 18.3876, "step": 1630 }, { "epoch": 0.029813369404280986, "grad_norm": 7.965960012792072, "learning_rate": 9.931454683929932e-06, "loss": 17.6732, "step": 1631 }, { "epoch": 0.02983164860072751, "grad_norm": 8.34160354865226, "learning_rate": 9.937547600913938e-06, "loss": 18.0594, "step": 1632 }, { "epoch": 0.029849927797174037, "grad_norm": 8.703633671131135, "learning_rate": 9.943640517897945e-06, "loss": 18.2577, "step": 1633 }, { "epoch": 0.029868206993620562, "grad_norm": 8.138745042647287, "learning_rate": 9.94973343488195e-06, "loss": 17.9059, "step": 1634 }, { "epoch": 0.029886486190067084, "grad_norm": 6.956669647829765, "learning_rate": 9.955826351865957e-06, "loss": 17.4469, "step": 1635 }, { "epoch": 0.02990476538651361, "grad_norm": 7.462595981410311, "learning_rate": 9.961919268849962e-06, "loss": 17.7349, "step": 1636 }, { "epoch": 0.029923044582960134, "grad_norm": 7.365790430570946, "learning_rate": 9.968012185833968e-06, "loss": 17.743, "step": 1637 }, { "epoch": 0.029941323779406656, "grad_norm": 7.9050808159133, "learning_rate": 9.974105102817975e-06, "loss": 17.8508, "step": 1638 }, { "epoch": 0.02995960297585318, "grad_norm": 7.08010023731484, "learning_rate": 9.980198019801981e-06, "loss": 17.6474, "step": 1639 }, { "epoch": 0.029977882172299707, "grad_norm": 7.241082228675219, "learning_rate": 9.986290936785987e-06, "loss": 17.5394, "step": 1640 }, { "epoch": 0.02999616136874623, "grad_norm": 6.70050213401771, "learning_rate": 9.992383853769993e-06, "loss": 17.5436, "step": 1641 }, { "epoch": 0.030014440565192754, "grad_norm": 8.603058800151555, "learning_rate": 9.998476770754e-06, "loss": 18.2784, "step": 1642 }, { "epoch": 0.03003271976163928, "grad_norm": 8.16200725958428, "learning_rate": 9.999999995071216e-06, "loss": 18.1774, "step": 1643 }, { "epoch": 0.030050998958085804, "grad_norm": 8.77092898355948, "learning_rate": 9.99999997316551e-06, "loss": 18.3002, "step": 1644 }, { "epoch": 0.030069278154532326, "grad_norm": 7.532952323927857, "learning_rate": 9.999999933735236e-06, "loss": 17.752, "step": 1645 }, { "epoch": 0.03008755735097885, "grad_norm": 7.604105027865724, "learning_rate": 9.999999876780395e-06, "loss": 17.7217, "step": 1646 }, { "epoch": 0.030105836547425376, "grad_norm": 8.217025734469807, "learning_rate": 9.99999980230099e-06, "loss": 18.2265, "step": 1647 }, { "epoch": 0.030124115743871898, "grad_norm": 7.19985957568226, "learning_rate": 9.99999971029702e-06, "loss": 17.6727, "step": 1648 }, { "epoch": 0.030142394940318423, "grad_norm": 7.4626368055052055, "learning_rate": 9.999999600768484e-06, "loss": 17.6681, "step": 1649 }, { "epoch": 0.03016067413676495, "grad_norm": 8.597444246697309, "learning_rate": 9.999999473715385e-06, "loss": 18.2233, "step": 1650 }, { "epoch": 0.030178953333211474, "grad_norm": 7.339821522451504, "learning_rate": 9.99999932913772e-06, "loss": 18.012, "step": 1651 }, { "epoch": 0.030197232529657995, "grad_norm": 9.495837231113391, "learning_rate": 9.99999916703549e-06, "loss": 18.7163, "step": 1652 }, { "epoch": 0.03021551172610452, "grad_norm": 8.153424924313496, "learning_rate": 9.999998987408699e-06, "loss": 18.1528, "step": 1653 }, { "epoch": 0.030233790922551046, "grad_norm": 7.4629859005548305, "learning_rate": 9.999998790257344e-06, "loss": 17.9068, "step": 1654 }, { "epoch": 0.030252070118997568, "grad_norm": 7.5700990178487695, "learning_rate": 9.99999857558143e-06, "loss": 17.6228, "step": 1655 }, { "epoch": 0.030270349315444093, "grad_norm": 8.497903245407256, "learning_rate": 9.99999834338095e-06, "loss": 18.1078, "step": 1656 }, { "epoch": 0.030288628511890618, "grad_norm": 8.217118353802384, "learning_rate": 9.999998093655913e-06, "loss": 18.2546, "step": 1657 }, { "epoch": 0.03030690770833714, "grad_norm": 7.85525980711628, "learning_rate": 9.999997826406315e-06, "loss": 17.9371, "step": 1658 }, { "epoch": 0.030325186904783665, "grad_norm": 7.506971097466327, "learning_rate": 9.99999754163216e-06, "loss": 17.9911, "step": 1659 }, { "epoch": 0.03034346610123019, "grad_norm": 7.483869241789261, "learning_rate": 9.999997239333448e-06, "loss": 17.6404, "step": 1660 }, { "epoch": 0.030361745297676716, "grad_norm": 7.8702369812607955, "learning_rate": 9.999996919510177e-06, "loss": 18.014, "step": 1661 }, { "epoch": 0.030380024494123237, "grad_norm": 7.478131951125737, "learning_rate": 9.999996582162353e-06, "loss": 17.9119, "step": 1662 }, { "epoch": 0.030398303690569763, "grad_norm": 6.891453312552426, "learning_rate": 9.999996227289975e-06, "loss": 17.5924, "step": 1663 }, { "epoch": 0.030416582887016288, "grad_norm": 7.649411616217506, "learning_rate": 9.999995854893042e-06, "loss": 17.8496, "step": 1664 }, { "epoch": 0.03043486208346281, "grad_norm": 7.687138540451615, "learning_rate": 9.999995464971559e-06, "loss": 18.0789, "step": 1665 }, { "epoch": 0.030453141279909335, "grad_norm": 6.754059755238239, "learning_rate": 9.999995057525525e-06, "loss": 17.5271, "step": 1666 }, { "epoch": 0.03047142047635586, "grad_norm": 9.442846909744565, "learning_rate": 9.999994632554943e-06, "loss": 18.5949, "step": 1667 }, { "epoch": 0.030489699672802385, "grad_norm": 8.563193819594666, "learning_rate": 9.999994190059814e-06, "loss": 18.1301, "step": 1668 }, { "epoch": 0.030507978869248907, "grad_norm": 8.829640791058583, "learning_rate": 9.999993730040137e-06, "loss": 17.6752, "step": 1669 }, { "epoch": 0.030526258065695432, "grad_norm": 7.8977755747954035, "learning_rate": 9.999993252495917e-06, "loss": 18.0845, "step": 1670 }, { "epoch": 0.030544537262141958, "grad_norm": 6.729064744656194, "learning_rate": 9.999992757427155e-06, "loss": 17.4574, "step": 1671 }, { "epoch": 0.03056281645858848, "grad_norm": 6.678060278276319, "learning_rate": 9.999992244833852e-06, "loss": 17.4589, "step": 1672 }, { "epoch": 0.030581095655035005, "grad_norm": 7.737125905516272, "learning_rate": 9.99999171471601e-06, "loss": 17.8177, "step": 1673 }, { "epoch": 0.03059937485148153, "grad_norm": 7.657764374448887, "learning_rate": 9.999991167073632e-06, "loss": 18.2387, "step": 1674 }, { "epoch": 0.03061765404792805, "grad_norm": 8.346199749612207, "learning_rate": 9.999990601906717e-06, "loss": 18.0601, "step": 1675 }, { "epoch": 0.030635933244374577, "grad_norm": 6.530886845125346, "learning_rate": 9.999990019215271e-06, "loss": 17.3699, "step": 1676 }, { "epoch": 0.030654212440821102, "grad_norm": 8.763388542357818, "learning_rate": 9.999989418999292e-06, "loss": 18.0928, "step": 1677 }, { "epoch": 0.030672491637267627, "grad_norm": 8.045196888267036, "learning_rate": 9.999988801258785e-06, "loss": 17.999, "step": 1678 }, { "epoch": 0.03069077083371415, "grad_norm": 9.041537569253046, "learning_rate": 9.999988165993751e-06, "loss": 18.5063, "step": 1679 }, { "epoch": 0.030709050030160674, "grad_norm": 8.317873113243014, "learning_rate": 9.999987513204192e-06, "loss": 18.4539, "step": 1680 }, { "epoch": 0.0307273292266072, "grad_norm": 7.850982443122049, "learning_rate": 9.99998684289011e-06, "loss": 18.1377, "step": 1681 }, { "epoch": 0.03074560842305372, "grad_norm": 8.09374934043548, "learning_rate": 9.999986155051508e-06, "loss": 17.9622, "step": 1682 }, { "epoch": 0.030763887619500246, "grad_norm": 8.2254588213774, "learning_rate": 9.99998544968839e-06, "loss": 18.2687, "step": 1683 }, { "epoch": 0.03078216681594677, "grad_norm": 7.751994688589054, "learning_rate": 9.999984726800756e-06, "loss": 18.1268, "step": 1684 }, { "epoch": 0.030800446012393297, "grad_norm": 8.142840854061875, "learning_rate": 9.99998398638861e-06, "loss": 18.0481, "step": 1685 }, { "epoch": 0.03081872520883982, "grad_norm": 7.412709744013709, "learning_rate": 9.999983228451953e-06, "loss": 17.6665, "step": 1686 }, { "epoch": 0.030837004405286344, "grad_norm": 8.274057649578133, "learning_rate": 9.999982452990789e-06, "loss": 18.1678, "step": 1687 }, { "epoch": 0.03085528360173287, "grad_norm": 7.733422141237051, "learning_rate": 9.99998166000512e-06, "loss": 18.0693, "step": 1688 }, { "epoch": 0.03087356279817939, "grad_norm": 6.331266037416006, "learning_rate": 9.99998084949495e-06, "loss": 17.3541, "step": 1689 }, { "epoch": 0.030891841994625916, "grad_norm": 8.981301420778157, "learning_rate": 9.99998002146028e-06, "loss": 18.3912, "step": 1690 }, { "epoch": 0.03091012119107244, "grad_norm": 8.480155189671931, "learning_rate": 9.999979175901116e-06, "loss": 18.6498, "step": 1691 }, { "epoch": 0.030928400387518963, "grad_norm": 8.869112873111437, "learning_rate": 9.999978312817455e-06, "loss": 18.1799, "step": 1692 }, { "epoch": 0.03094667958396549, "grad_norm": 7.102599203870701, "learning_rate": 9.999977432209306e-06, "loss": 17.654, "step": 1693 }, { "epoch": 0.030964958780412014, "grad_norm": 7.914076704773177, "learning_rate": 9.999976534076672e-06, "loss": 17.7237, "step": 1694 }, { "epoch": 0.03098323797685854, "grad_norm": 7.402304949605478, "learning_rate": 9.999975618419553e-06, "loss": 17.7698, "step": 1695 }, { "epoch": 0.03100151717330506, "grad_norm": 7.1955102355883565, "learning_rate": 9.999974685237951e-06, "loss": 17.6303, "step": 1696 }, { "epoch": 0.031019796369751586, "grad_norm": 7.944128019819306, "learning_rate": 9.999973734531873e-06, "loss": 17.8963, "step": 1697 }, { "epoch": 0.03103807556619811, "grad_norm": 7.73052692176272, "learning_rate": 9.999972766301323e-06, "loss": 18.0474, "step": 1698 }, { "epoch": 0.031056354762644633, "grad_norm": 8.060643549825397, "learning_rate": 9.9999717805463e-06, "loss": 18.2613, "step": 1699 }, { "epoch": 0.031074633959091158, "grad_norm": 6.903036766193613, "learning_rate": 9.99997077726681e-06, "loss": 17.5884, "step": 1700 }, { "epoch": 0.031092913155537683, "grad_norm": 7.937945436907774, "learning_rate": 9.999969756462858e-06, "loss": 18.2496, "step": 1701 }, { "epoch": 0.03111119235198421, "grad_norm": 8.85048821164819, "learning_rate": 9.999968718134443e-06, "loss": 18.4589, "step": 1702 }, { "epoch": 0.03112947154843073, "grad_norm": 6.437826666373901, "learning_rate": 9.999967662281574e-06, "loss": 17.6258, "step": 1703 }, { "epoch": 0.031147750744877255, "grad_norm": 8.149168186932878, "learning_rate": 9.99996658890425e-06, "loss": 18.0005, "step": 1704 }, { "epoch": 0.03116602994132378, "grad_norm": 8.46177987736004, "learning_rate": 9.99996549800248e-06, "loss": 18.4022, "step": 1705 }, { "epoch": 0.031184309137770302, "grad_norm": 7.1207036582000365, "learning_rate": 9.999964389576262e-06, "loss": 17.7006, "step": 1706 }, { "epoch": 0.031202588334216828, "grad_norm": 8.74135307600333, "learning_rate": 9.999963263625604e-06, "loss": 18.1906, "step": 1707 }, { "epoch": 0.031220867530663353, "grad_norm": 7.094062218917512, "learning_rate": 9.999962120150507e-06, "loss": 17.5309, "step": 1708 }, { "epoch": 0.031239146727109875, "grad_norm": 6.13369738189881, "learning_rate": 9.99996095915098e-06, "loss": 17.1825, "step": 1709 }, { "epoch": 0.0312574259235564, "grad_norm": 8.334653328554893, "learning_rate": 9.999959780627021e-06, "loss": 18.2506, "step": 1710 }, { "epoch": 0.031275705120002925, "grad_norm": 7.9067533539838815, "learning_rate": 9.999958584578638e-06, "loss": 18.3141, "step": 1711 }, { "epoch": 0.03129398431644945, "grad_norm": 7.669650189051857, "learning_rate": 9.999957371005833e-06, "loss": 17.7663, "step": 1712 }, { "epoch": 0.031312263512895976, "grad_norm": 6.893918883243496, "learning_rate": 9.999956139908613e-06, "loss": 17.5321, "step": 1713 }, { "epoch": 0.0313305427093425, "grad_norm": 8.880750021766438, "learning_rate": 9.999954891286978e-06, "loss": 18.2603, "step": 1714 }, { "epoch": 0.03134882190578902, "grad_norm": 7.745527335498254, "learning_rate": 9.999953625140938e-06, "loss": 18.1199, "step": 1715 }, { "epoch": 0.03136710110223555, "grad_norm": 8.629927372918612, "learning_rate": 9.999952341470492e-06, "loss": 18.442, "step": 1716 }, { "epoch": 0.03138538029868207, "grad_norm": 9.315829947449322, "learning_rate": 9.999951040275648e-06, "loss": 18.4559, "step": 1717 }, { "epoch": 0.03140365949512859, "grad_norm": 8.011886242632372, "learning_rate": 9.99994972155641e-06, "loss": 17.8989, "step": 1718 }, { "epoch": 0.03142193869157512, "grad_norm": 8.688456567864376, "learning_rate": 9.99994838531278e-06, "loss": 17.9268, "step": 1719 }, { "epoch": 0.03144021788802164, "grad_norm": 7.863237480162188, "learning_rate": 9.999947031544768e-06, "loss": 18.0234, "step": 1720 }, { "epoch": 0.031458497084468164, "grad_norm": 9.614738708606128, "learning_rate": 9.999945660252372e-06, "loss": 19.0878, "step": 1721 }, { "epoch": 0.03147677628091469, "grad_norm": 7.484839742830578, "learning_rate": 9.999944271435604e-06, "loss": 17.7975, "step": 1722 }, { "epoch": 0.031495055477361214, "grad_norm": 8.302144392083973, "learning_rate": 9.999942865094463e-06, "loss": 18.1382, "step": 1723 }, { "epoch": 0.03151333467380774, "grad_norm": 8.800750917473952, "learning_rate": 9.999941441228955e-06, "loss": 18.5514, "step": 1724 }, { "epoch": 0.031531613870254264, "grad_norm": 8.085960134063056, "learning_rate": 9.999939999839087e-06, "loss": 18.0244, "step": 1725 }, { "epoch": 0.031549893066700786, "grad_norm": 8.970989611370843, "learning_rate": 9.999938540924865e-06, "loss": 18.4848, "step": 1726 }, { "epoch": 0.031568172263147315, "grad_norm": 13.206975008573506, "learning_rate": 9.999937064486292e-06, "loss": 18.5302, "step": 1727 }, { "epoch": 0.03158645145959384, "grad_norm": 7.733792813452686, "learning_rate": 9.999935570523371e-06, "loss": 17.971, "step": 1728 }, { "epoch": 0.03160473065604036, "grad_norm": 8.211191887685656, "learning_rate": 9.999934059036111e-06, "loss": 17.9593, "step": 1729 }, { "epoch": 0.03162300985248689, "grad_norm": 7.575399705091664, "learning_rate": 9.999932530024517e-06, "loss": 18.111, "step": 1730 }, { "epoch": 0.03164128904893341, "grad_norm": 7.464121770275439, "learning_rate": 9.999930983488592e-06, "loss": 17.9209, "step": 1731 }, { "epoch": 0.03165956824537993, "grad_norm": 7.272034710638791, "learning_rate": 9.999929419428345e-06, "loss": 17.7786, "step": 1732 }, { "epoch": 0.03167784744182646, "grad_norm": 8.457109924726934, "learning_rate": 9.999927837843778e-06, "loss": 18.276, "step": 1733 }, { "epoch": 0.03169612663827298, "grad_norm": 9.83487092928988, "learning_rate": 9.999926238734896e-06, "loss": 18.1567, "step": 1734 }, { "epoch": 0.0317144058347195, "grad_norm": 7.058737498003848, "learning_rate": 9.999924622101708e-06, "loss": 17.4511, "step": 1735 }, { "epoch": 0.03173268503116603, "grad_norm": 17.665633839089814, "learning_rate": 9.999922987944218e-06, "loss": 18.6444, "step": 1736 }, { "epoch": 0.03175096422761255, "grad_norm": 7.935431722583716, "learning_rate": 9.999921336262432e-06, "loss": 17.7928, "step": 1737 }, { "epoch": 0.031769243424059075, "grad_norm": 7.7888607207513205, "learning_rate": 9.999919667056355e-06, "loss": 18.1576, "step": 1738 }, { "epoch": 0.031787522620505604, "grad_norm": 8.223768512472157, "learning_rate": 9.999917980325993e-06, "loss": 18.2164, "step": 1739 }, { "epoch": 0.031805801816952126, "grad_norm": 7.552464065738869, "learning_rate": 9.999916276071352e-06, "loss": 17.7676, "step": 1740 }, { "epoch": 0.031824081013398654, "grad_norm": 7.752283479585389, "learning_rate": 9.99991455429244e-06, "loss": 17.7247, "step": 1741 }, { "epoch": 0.031842360209845176, "grad_norm": 8.32808808108957, "learning_rate": 9.99991281498926e-06, "loss": 18.2492, "step": 1742 }, { "epoch": 0.0318606394062917, "grad_norm": 7.311715080136691, "learning_rate": 9.999911058161821e-06, "loss": 17.6998, "step": 1743 }, { "epoch": 0.031878918602738227, "grad_norm": 7.809550576897194, "learning_rate": 9.999909283810127e-06, "loss": 17.8632, "step": 1744 }, { "epoch": 0.03189719779918475, "grad_norm": 8.643321746507269, "learning_rate": 9.999907491934184e-06, "loss": 18.3235, "step": 1745 }, { "epoch": 0.03191547699563127, "grad_norm": 8.69193791200664, "learning_rate": 9.999905682534002e-06, "loss": 18.1556, "step": 1746 }, { "epoch": 0.0319337561920778, "grad_norm": 7.41714272691651, "learning_rate": 9.99990385560958e-06, "loss": 17.4267, "step": 1747 }, { "epoch": 0.03195203538852432, "grad_norm": 7.647497052491643, "learning_rate": 9.99990201116093e-06, "loss": 17.8335, "step": 1748 }, { "epoch": 0.03197031458497084, "grad_norm": 8.250167251223719, "learning_rate": 9.99990014918806e-06, "loss": 17.9621, "step": 1749 }, { "epoch": 0.03198859378141737, "grad_norm": 7.817423589544294, "learning_rate": 9.999898269690972e-06, "loss": 18.0841, "step": 1750 }, { "epoch": 0.03200687297786389, "grad_norm": 9.061676820807548, "learning_rate": 9.999896372669675e-06, "loss": 18.3327, "step": 1751 }, { "epoch": 0.032025152174310414, "grad_norm": 8.171012294543834, "learning_rate": 9.999894458124176e-06, "loss": 18.2276, "step": 1752 }, { "epoch": 0.03204343137075694, "grad_norm": 8.81443248428237, "learning_rate": 9.99989252605448e-06, "loss": 18.2164, "step": 1753 }, { "epoch": 0.032061710567203465, "grad_norm": 8.244576239047426, "learning_rate": 9.999890576460593e-06, "loss": 18.0588, "step": 1754 }, { "epoch": 0.03207998976364999, "grad_norm": 7.500940593794085, "learning_rate": 9.999888609342523e-06, "loss": 17.9209, "step": 1755 }, { "epoch": 0.032098268960096515, "grad_norm": 8.445595785573039, "learning_rate": 9.99988662470028e-06, "loss": 18.0948, "step": 1756 }, { "epoch": 0.03211654815654304, "grad_norm": 8.781581898387014, "learning_rate": 9.999884622533866e-06, "loss": 18.289, "step": 1757 }, { "epoch": 0.032134827352989566, "grad_norm": 8.653088007149135, "learning_rate": 9.999882602843292e-06, "loss": 18.0247, "step": 1758 }, { "epoch": 0.03215310654943609, "grad_norm": 8.900805886681988, "learning_rate": 9.999880565628564e-06, "loss": 18.0382, "step": 1759 }, { "epoch": 0.03217138574588261, "grad_norm": 6.761814575284438, "learning_rate": 9.999878510889686e-06, "loss": 17.549, "step": 1760 }, { "epoch": 0.03218966494232914, "grad_norm": 8.382118165256745, "learning_rate": 9.999876438626669e-06, "loss": 18.1429, "step": 1761 }, { "epoch": 0.03220794413877566, "grad_norm": 8.748807697101075, "learning_rate": 9.99987434883952e-06, "loss": 17.8109, "step": 1762 }, { "epoch": 0.03222622333522218, "grad_norm": 8.559930526855778, "learning_rate": 9.999872241528244e-06, "loss": 18.5522, "step": 1763 }, { "epoch": 0.03224450253166871, "grad_norm": 7.741650185924381, "learning_rate": 9.99987011669285e-06, "loss": 17.8154, "step": 1764 }, { "epoch": 0.03226278172811523, "grad_norm": 6.293094129356059, "learning_rate": 9.999867974333345e-06, "loss": 17.4139, "step": 1765 }, { "epoch": 0.032281060924561754, "grad_norm": 7.907972598846681, "learning_rate": 9.999865814449734e-06, "loss": 18.1276, "step": 1766 }, { "epoch": 0.03229934012100828, "grad_norm": 8.237264405705627, "learning_rate": 9.99986363704203e-06, "loss": 18.3052, "step": 1767 }, { "epoch": 0.032317619317454804, "grad_norm": 9.82774985313453, "learning_rate": 9.999861442110238e-06, "loss": 18.9897, "step": 1768 }, { "epoch": 0.032335898513901326, "grad_norm": 6.961508335763593, "learning_rate": 9.999859229654364e-06, "loss": 17.55, "step": 1769 }, { "epoch": 0.032354177710347855, "grad_norm": 7.384652934661802, "learning_rate": 9.99985699967442e-06, "loss": 17.5297, "step": 1770 }, { "epoch": 0.03237245690679438, "grad_norm": 7.411518265275107, "learning_rate": 9.999854752170409e-06, "loss": 17.6561, "step": 1771 }, { "epoch": 0.0323907361032409, "grad_norm": 9.123059934482233, "learning_rate": 9.99985248714234e-06, "loss": 18.4026, "step": 1772 }, { "epoch": 0.03240901529968743, "grad_norm": 8.417191475521836, "learning_rate": 9.999850204590224e-06, "loss": 17.8137, "step": 1773 }, { "epoch": 0.03242729449613395, "grad_norm": 10.61543485670314, "learning_rate": 9.999847904514066e-06, "loss": 18.8098, "step": 1774 }, { "epoch": 0.03244557369258048, "grad_norm": 8.532504542166224, "learning_rate": 9.999845586913876e-06, "loss": 18.3173, "step": 1775 }, { "epoch": 0.032463852889027, "grad_norm": 7.43589045622056, "learning_rate": 9.999843251789659e-06, "loss": 17.5071, "step": 1776 }, { "epoch": 0.03248213208547352, "grad_norm": 8.617687971638343, "learning_rate": 9.999840899141426e-06, "loss": 18.0293, "step": 1777 }, { "epoch": 0.03250041128192005, "grad_norm": 6.778603432440775, "learning_rate": 9.999838528969186e-06, "loss": 17.6333, "step": 1778 }, { "epoch": 0.03251869047836657, "grad_norm": 9.147403305663513, "learning_rate": 9.999836141272945e-06, "loss": 18.2796, "step": 1779 }, { "epoch": 0.03253696967481309, "grad_norm": 8.132127085631549, "learning_rate": 9.99983373605271e-06, "loss": 17.8568, "step": 1780 }, { "epoch": 0.03255524887125962, "grad_norm": 7.365472948844682, "learning_rate": 9.999831313308495e-06, "loss": 17.6739, "step": 1781 }, { "epoch": 0.032573528067706144, "grad_norm": 8.497651957313906, "learning_rate": 9.999828873040303e-06, "loss": 18.2875, "step": 1782 }, { "epoch": 0.032591807264152665, "grad_norm": 8.484429761607142, "learning_rate": 9.999826415248146e-06, "loss": 18.1602, "step": 1783 }, { "epoch": 0.032610086460599194, "grad_norm": 9.548530803029397, "learning_rate": 9.999823939932031e-06, "loss": 19.0218, "step": 1784 }, { "epoch": 0.032628365657045716, "grad_norm": 6.485042818984111, "learning_rate": 9.999821447091967e-06, "loss": 17.4927, "step": 1785 }, { "epoch": 0.03264664485349224, "grad_norm": 8.360808490797504, "learning_rate": 9.999818936727963e-06, "loss": 18.4266, "step": 1786 }, { "epoch": 0.032664924049938766, "grad_norm": 6.781187397765794, "learning_rate": 9.999816408840024e-06, "loss": 17.5942, "step": 1787 }, { "epoch": 0.03268320324638529, "grad_norm": 6.312570278027369, "learning_rate": 9.999813863428167e-06, "loss": 17.3115, "step": 1788 }, { "epoch": 0.03270148244283181, "grad_norm": 8.02380292866149, "learning_rate": 9.999811300492394e-06, "loss": 17.8317, "step": 1789 }, { "epoch": 0.03271976163927834, "grad_norm": 9.2842251146869, "learning_rate": 9.999808720032717e-06, "loss": 18.6453, "step": 1790 }, { "epoch": 0.03273804083572486, "grad_norm": 7.710705644639956, "learning_rate": 9.999806122049144e-06, "loss": 17.9747, "step": 1791 }, { "epoch": 0.03275632003217139, "grad_norm": 7.297062474874555, "learning_rate": 9.999803506541683e-06, "loss": 17.7699, "step": 1792 }, { "epoch": 0.03277459922861791, "grad_norm": 7.1899855935789505, "learning_rate": 9.999800873510347e-06, "loss": 17.5291, "step": 1793 }, { "epoch": 0.03279287842506443, "grad_norm": 7.666647090498624, "learning_rate": 9.999798222955142e-06, "loss": 18.088, "step": 1794 }, { "epoch": 0.03281115762151096, "grad_norm": 8.884884975344518, "learning_rate": 9.999795554876078e-06, "loss": 18.2303, "step": 1795 }, { "epoch": 0.03282943681795748, "grad_norm": 7.208892860970315, "learning_rate": 9.999792869273165e-06, "loss": 17.2642, "step": 1796 }, { "epoch": 0.032847716014404005, "grad_norm": 6.873435544437457, "learning_rate": 9.99979016614641e-06, "loss": 17.6393, "step": 1797 }, { "epoch": 0.032865995210850533, "grad_norm": 8.38149535209502, "learning_rate": 9.999787445495825e-06, "loss": 18.1904, "step": 1798 }, { "epoch": 0.032884274407297055, "grad_norm": 7.783435755971429, "learning_rate": 9.999784707321419e-06, "loss": 18.0207, "step": 1799 }, { "epoch": 0.03290255360374358, "grad_norm": 8.311374471647397, "learning_rate": 9.999781951623202e-06, "loss": 18.3539, "step": 1800 }, { "epoch": 0.032920832800190106, "grad_norm": 8.174072765469017, "learning_rate": 9.999779178401183e-06, "loss": 17.9323, "step": 1801 }, { "epoch": 0.03293911199663663, "grad_norm": 7.232056843641817, "learning_rate": 9.999776387655372e-06, "loss": 17.8261, "step": 1802 }, { "epoch": 0.03295739119308315, "grad_norm": 8.218568405365268, "learning_rate": 9.999773579385779e-06, "loss": 18.2206, "step": 1803 }, { "epoch": 0.03297567038952968, "grad_norm": 8.093855359353427, "learning_rate": 9.999770753592413e-06, "loss": 18.1971, "step": 1804 }, { "epoch": 0.0329939495859762, "grad_norm": 8.901442169351329, "learning_rate": 9.999767910275283e-06, "loss": 18.1861, "step": 1805 }, { "epoch": 0.03301222878242272, "grad_norm": 7.670484431715468, "learning_rate": 9.999765049434403e-06, "loss": 17.7351, "step": 1806 }, { "epoch": 0.03303050797886925, "grad_norm": 8.876821800472102, "learning_rate": 9.999762171069777e-06, "loss": 18.2117, "step": 1807 }, { "epoch": 0.03304878717531577, "grad_norm": 7.313675489864291, "learning_rate": 9.999759275181421e-06, "loss": 17.8017, "step": 1808 }, { "epoch": 0.0330670663717623, "grad_norm": 8.396988203693653, "learning_rate": 9.999756361769342e-06, "loss": 18.074, "step": 1809 }, { "epoch": 0.03308534556820882, "grad_norm": 7.325734865195764, "learning_rate": 9.99975343083355e-06, "loss": 17.8234, "step": 1810 }, { "epoch": 0.033103624764655344, "grad_norm": 7.32385429488872, "learning_rate": 9.999750482374057e-06, "loss": 17.6628, "step": 1811 }, { "epoch": 0.03312190396110187, "grad_norm": 7.584415550741578, "learning_rate": 9.999747516390872e-06, "loss": 17.8922, "step": 1812 }, { "epoch": 0.033140183157548395, "grad_norm": 6.8395529500348164, "learning_rate": 9.999744532884006e-06, "loss": 17.7227, "step": 1813 }, { "epoch": 0.033158462353994916, "grad_norm": 7.67886942130527, "learning_rate": 9.999741531853469e-06, "loss": 17.9991, "step": 1814 }, { "epoch": 0.033176741550441445, "grad_norm": 7.686021597942992, "learning_rate": 9.999738513299273e-06, "loss": 18.0536, "step": 1815 }, { "epoch": 0.03319502074688797, "grad_norm": 7.563094170574433, "learning_rate": 9.999735477221426e-06, "loss": 17.8883, "step": 1816 }, { "epoch": 0.03321329994333449, "grad_norm": 6.360992366183429, "learning_rate": 9.999732423619941e-06, "loss": 17.331, "step": 1817 }, { "epoch": 0.03323157913978102, "grad_norm": 6.9729700691565535, "learning_rate": 9.999729352494827e-06, "loss": 17.9784, "step": 1818 }, { "epoch": 0.03324985833622754, "grad_norm": 8.680611695987398, "learning_rate": 9.999726263846096e-06, "loss": 18.2799, "step": 1819 }, { "epoch": 0.03326813753267406, "grad_norm": 6.483530997935041, "learning_rate": 9.999723157673758e-06, "loss": 17.3454, "step": 1820 }, { "epoch": 0.03328641672912059, "grad_norm": 7.496250705900263, "learning_rate": 9.999720033977824e-06, "loss": 17.8439, "step": 1821 }, { "epoch": 0.03330469592556711, "grad_norm": 7.462704294828384, "learning_rate": 9.999716892758305e-06, "loss": 17.7355, "step": 1822 }, { "epoch": 0.03332297512201363, "grad_norm": 9.644458381912422, "learning_rate": 9.999713734015212e-06, "loss": 18.4893, "step": 1823 }, { "epoch": 0.03334125431846016, "grad_norm": 7.666527601067586, "learning_rate": 9.999710557748557e-06, "loss": 17.9815, "step": 1824 }, { "epoch": 0.033359533514906684, "grad_norm": 7.864692354523234, "learning_rate": 9.99970736395835e-06, "loss": 17.9709, "step": 1825 }, { "epoch": 0.03337781271135321, "grad_norm": 8.264337407873395, "learning_rate": 9.999704152644603e-06, "loss": 17.7038, "step": 1826 }, { "epoch": 0.033396091907799734, "grad_norm": 7.969120135020211, "learning_rate": 9.999700923807326e-06, "loss": 18.1438, "step": 1827 }, { "epoch": 0.033414371104246256, "grad_norm": 8.931013694821827, "learning_rate": 9.999697677446531e-06, "loss": 18.0319, "step": 1828 }, { "epoch": 0.033432650300692784, "grad_norm": 6.194512930888271, "learning_rate": 9.999694413562231e-06, "loss": 17.2692, "step": 1829 }, { "epoch": 0.033450929497139306, "grad_norm": 7.971082112958304, "learning_rate": 9.999691132154435e-06, "loss": 17.8576, "step": 1830 }, { "epoch": 0.03346920869358583, "grad_norm": 9.431769907366288, "learning_rate": 9.999687833223155e-06, "loss": 18.2726, "step": 1831 }, { "epoch": 0.03348748789003236, "grad_norm": 8.729898612350182, "learning_rate": 9.999684516768402e-06, "loss": 18.335, "step": 1832 }, { "epoch": 0.03350576708647888, "grad_norm": 7.8105456031618505, "learning_rate": 9.999681182790191e-06, "loss": 18.1409, "step": 1833 }, { "epoch": 0.0335240462829254, "grad_norm": 7.747628228044708, "learning_rate": 9.99967783128853e-06, "loss": 17.727, "step": 1834 }, { "epoch": 0.03354232547937193, "grad_norm": 6.45043338847289, "learning_rate": 9.999674462263434e-06, "loss": 17.2659, "step": 1835 }, { "epoch": 0.03356060467581845, "grad_norm": 8.008648065130282, "learning_rate": 9.999671075714909e-06, "loss": 17.9419, "step": 1836 }, { "epoch": 0.03357888387226497, "grad_norm": 8.439711355653493, "learning_rate": 9.999667671642975e-06, "loss": 17.9714, "step": 1837 }, { "epoch": 0.0335971630687115, "grad_norm": 8.097927086887031, "learning_rate": 9.999664250047636e-06, "loss": 18.2587, "step": 1838 }, { "epoch": 0.03361544226515802, "grad_norm": 6.714611037306719, "learning_rate": 9.99966081092891e-06, "loss": 17.5767, "step": 1839 }, { "epoch": 0.033633721461604545, "grad_norm": 6.919967731204464, "learning_rate": 9.999657354286806e-06, "loss": 17.6317, "step": 1840 }, { "epoch": 0.03365200065805107, "grad_norm": 8.150508868141921, "learning_rate": 9.999653880121336e-06, "loss": 18.2641, "step": 1841 }, { "epoch": 0.033670279854497595, "grad_norm": 7.5496710279480155, "learning_rate": 9.999650388432513e-06, "loss": 18.2013, "step": 1842 }, { "epoch": 0.033688559050944124, "grad_norm": 7.531807395143053, "learning_rate": 9.99964687922035e-06, "loss": 17.5703, "step": 1843 }, { "epoch": 0.033706838247390646, "grad_norm": 8.08451813867511, "learning_rate": 9.99964335248486e-06, "loss": 18.0743, "step": 1844 }, { "epoch": 0.03372511744383717, "grad_norm": 7.861686401413494, "learning_rate": 9.999639808226051e-06, "loss": 18.0132, "step": 1845 }, { "epoch": 0.033743396640283696, "grad_norm": 9.898916711748324, "learning_rate": 9.999636246443941e-06, "loss": 18.6431, "step": 1846 }, { "epoch": 0.03376167583673022, "grad_norm": 7.855318071091838, "learning_rate": 9.999632667138539e-06, "loss": 17.876, "step": 1847 }, { "epoch": 0.03377995503317674, "grad_norm": 7.994621814215297, "learning_rate": 9.999629070309858e-06, "loss": 18.2559, "step": 1848 }, { "epoch": 0.03379823422962327, "grad_norm": 8.328924256453455, "learning_rate": 9.999625455957912e-06, "loss": 18.039, "step": 1849 }, { "epoch": 0.03381651342606979, "grad_norm": 7.423442934416348, "learning_rate": 9.99962182408271e-06, "loss": 17.8073, "step": 1850 }, { "epoch": 0.03383479262251631, "grad_norm": 8.670113783939007, "learning_rate": 9.99961817468427e-06, "loss": 18.0425, "step": 1851 }, { "epoch": 0.03385307181896284, "grad_norm": 8.341152385144946, "learning_rate": 9.9996145077626e-06, "loss": 18.3054, "step": 1852 }, { "epoch": 0.03387135101540936, "grad_norm": 6.040590275864804, "learning_rate": 9.999610823317716e-06, "loss": 17.2615, "step": 1853 }, { "epoch": 0.033889630211855884, "grad_norm": 6.908992376375361, "learning_rate": 9.99960712134963e-06, "loss": 17.6936, "step": 1854 }, { "epoch": 0.03390790940830241, "grad_norm": 7.7583741758822695, "learning_rate": 9.999603401858354e-06, "loss": 17.8515, "step": 1855 }, { "epoch": 0.033926188604748934, "grad_norm": 7.975857615183032, "learning_rate": 9.999599664843903e-06, "loss": 18.2947, "step": 1856 }, { "epoch": 0.033944467801195456, "grad_norm": 9.107143634269054, "learning_rate": 9.99959591030629e-06, "loss": 18.2875, "step": 1857 }, { "epoch": 0.033962746997641985, "grad_norm": 8.885538749469587, "learning_rate": 9.999592138245524e-06, "loss": 18.4399, "step": 1858 }, { "epoch": 0.03398102619408851, "grad_norm": 7.619089325646182, "learning_rate": 9.999588348661625e-06, "loss": 18.0025, "step": 1859 }, { "epoch": 0.033999305390535035, "grad_norm": 7.923948501979195, "learning_rate": 9.9995845415546e-06, "loss": 17.952, "step": 1860 }, { "epoch": 0.03401758458698156, "grad_norm": 7.556172395801596, "learning_rate": 9.999580716924467e-06, "loss": 17.8152, "step": 1861 }, { "epoch": 0.03403586378342808, "grad_norm": 7.908539024199279, "learning_rate": 9.999576874771236e-06, "loss": 18.2262, "step": 1862 }, { "epoch": 0.03405414297987461, "grad_norm": 7.590176741000986, "learning_rate": 9.999573015094921e-06, "loss": 17.5107, "step": 1863 }, { "epoch": 0.03407242217632113, "grad_norm": 10.501502046595387, "learning_rate": 9.99956913789554e-06, "loss": 19.0452, "step": 1864 }, { "epoch": 0.03409070137276765, "grad_norm": 8.724630148307154, "learning_rate": 9.999565243173099e-06, "loss": 18.1187, "step": 1865 }, { "epoch": 0.03410898056921418, "grad_norm": 6.192883744104812, "learning_rate": 9.999561330927619e-06, "loss": 17.2361, "step": 1866 }, { "epoch": 0.0341272597656607, "grad_norm": 7.32055343370581, "learning_rate": 9.999557401159107e-06, "loss": 18.113, "step": 1867 }, { "epoch": 0.03414553896210722, "grad_norm": 7.015490518486996, "learning_rate": 9.999553453867583e-06, "loss": 17.7648, "step": 1868 }, { "epoch": 0.03416381815855375, "grad_norm": 7.479288524521664, "learning_rate": 9.999549489053056e-06, "loss": 17.6716, "step": 1869 }, { "epoch": 0.034182097355000274, "grad_norm": 9.573109660956048, "learning_rate": 9.999545506715544e-06, "loss": 18.5602, "step": 1870 }, { "epoch": 0.034200376551446796, "grad_norm": 7.398144103675194, "learning_rate": 9.999541506855058e-06, "loss": 17.7404, "step": 1871 }, { "epoch": 0.034218655747893324, "grad_norm": 7.95634507515605, "learning_rate": 9.999537489471612e-06, "loss": 18.0822, "step": 1872 }, { "epoch": 0.034236934944339846, "grad_norm": 7.556869318283255, "learning_rate": 9.999533454565222e-06, "loss": 17.8391, "step": 1873 }, { "epoch": 0.03425521414078637, "grad_norm": 7.993039144663855, "learning_rate": 9.999529402135899e-06, "loss": 18.0815, "step": 1874 }, { "epoch": 0.034273493337232896, "grad_norm": 8.544069018131108, "learning_rate": 9.999525332183662e-06, "loss": 18.4803, "step": 1875 }, { "epoch": 0.03429177253367942, "grad_norm": 9.479112947375867, "learning_rate": 9.99952124470852e-06, "loss": 18.4248, "step": 1876 }, { "epoch": 0.03431005173012595, "grad_norm": 7.197687460803949, "learning_rate": 9.999517139710493e-06, "loss": 17.9663, "step": 1877 }, { "epoch": 0.03432833092657247, "grad_norm": 8.248143550009049, "learning_rate": 9.99951301718959e-06, "loss": 18.4598, "step": 1878 }, { "epoch": 0.03434661012301899, "grad_norm": 8.299650311859507, "learning_rate": 9.999508877145827e-06, "loss": 18.1611, "step": 1879 }, { "epoch": 0.03436488931946552, "grad_norm": 7.568158080890924, "learning_rate": 9.999504719579221e-06, "loss": 17.8868, "step": 1880 }, { "epoch": 0.03438316851591204, "grad_norm": 7.415992966366197, "learning_rate": 9.999500544489785e-06, "loss": 17.7056, "step": 1881 }, { "epoch": 0.03440144771235856, "grad_norm": 8.916900753766164, "learning_rate": 9.999496351877533e-06, "loss": 18.3458, "step": 1882 }, { "epoch": 0.03441972690880509, "grad_norm": 8.935175191478718, "learning_rate": 9.99949214174248e-06, "loss": 18.6321, "step": 1883 }, { "epoch": 0.03443800610525161, "grad_norm": 7.722295304191317, "learning_rate": 9.99948791408464e-06, "loss": 17.9493, "step": 1884 }, { "epoch": 0.034456285301698135, "grad_norm": 8.007541693731353, "learning_rate": 9.999483668904029e-06, "loss": 18.2557, "step": 1885 }, { "epoch": 0.034474564498144664, "grad_norm": 6.611765754812756, "learning_rate": 9.999479406200663e-06, "loss": 17.3192, "step": 1886 }, { "epoch": 0.034492843694591185, "grad_norm": 6.3385108608479985, "learning_rate": 9.999475125974553e-06, "loss": 17.2715, "step": 1887 }, { "epoch": 0.03451112289103771, "grad_norm": 7.520879210002364, "learning_rate": 9.999470828225718e-06, "loss": 17.829, "step": 1888 }, { "epoch": 0.034529402087484236, "grad_norm": 7.494377127553713, "learning_rate": 9.999466512954173e-06, "loss": 17.7672, "step": 1889 }, { "epoch": 0.03454768128393076, "grad_norm": 7.763998443291807, "learning_rate": 9.99946218015993e-06, "loss": 17.8187, "step": 1890 }, { "epoch": 0.03456596048037728, "grad_norm": 7.289771091936282, "learning_rate": 9.999457829843005e-06, "loss": 17.796, "step": 1891 }, { "epoch": 0.03458423967682381, "grad_norm": 6.623575404079241, "learning_rate": 9.999453462003417e-06, "loss": 17.4244, "step": 1892 }, { "epoch": 0.03460251887327033, "grad_norm": 10.017125767421753, "learning_rate": 9.999449076641176e-06, "loss": 18.8644, "step": 1893 }, { "epoch": 0.03462079806971686, "grad_norm": 8.20371403655816, "learning_rate": 9.9994446737563e-06, "loss": 18.0538, "step": 1894 }, { "epoch": 0.03463907726616338, "grad_norm": 9.802161995721683, "learning_rate": 9.999440253348805e-06, "loss": 18.7308, "step": 1895 }, { "epoch": 0.0346573564626099, "grad_norm": 8.322224215761992, "learning_rate": 9.999435815418705e-06, "loss": 18.3015, "step": 1896 }, { "epoch": 0.03467563565905643, "grad_norm": 8.148581700632096, "learning_rate": 9.999431359966017e-06, "loss": 17.8139, "step": 1897 }, { "epoch": 0.03469391485550295, "grad_norm": 8.92816337345986, "learning_rate": 9.999426886990758e-06, "loss": 18.0394, "step": 1898 }, { "epoch": 0.034712194051949474, "grad_norm": 8.825948598919227, "learning_rate": 9.999422396492937e-06, "loss": 18.4764, "step": 1899 }, { "epoch": 0.034730473248396, "grad_norm": 8.627968631064658, "learning_rate": 9.999417888472578e-06, "loss": 17.8798, "step": 1900 }, { "epoch": 0.034748752444842525, "grad_norm": 7.6009155117931675, "learning_rate": 9.999413362929691e-06, "loss": 18.207, "step": 1901 }, { "epoch": 0.034767031641289046, "grad_norm": 7.870549136132222, "learning_rate": 9.999408819864296e-06, "loss": 17.9298, "step": 1902 }, { "epoch": 0.034785310837735575, "grad_norm": 6.961455876093974, "learning_rate": 9.999404259276404e-06, "loss": 17.6394, "step": 1903 }, { "epoch": 0.0348035900341821, "grad_norm": 7.456431814896433, "learning_rate": 9.999399681166036e-06, "loss": 18.1099, "step": 1904 }, { "epoch": 0.03482186923062862, "grad_norm": 7.428766418167107, "learning_rate": 9.999395085533205e-06, "loss": 17.8307, "step": 1905 }, { "epoch": 0.03484014842707515, "grad_norm": 8.163133227231434, "learning_rate": 9.99939047237793e-06, "loss": 18.1721, "step": 1906 }, { "epoch": 0.03485842762352167, "grad_norm": 6.325296098614813, "learning_rate": 9.999385841700224e-06, "loss": 17.3985, "step": 1907 }, { "epoch": 0.03487670681996819, "grad_norm": 8.425185552349872, "learning_rate": 9.999381193500104e-06, "loss": 18.0274, "step": 1908 }, { "epoch": 0.03489498601641472, "grad_norm": 8.931619125908387, "learning_rate": 9.999376527777587e-06, "loss": 18.5191, "step": 1909 }, { "epoch": 0.03491326521286124, "grad_norm": 7.408835504737836, "learning_rate": 9.999371844532689e-06, "loss": 17.9273, "step": 1910 }, { "epoch": 0.03493154440930777, "grad_norm": 7.623356726961149, "learning_rate": 9.999367143765428e-06, "loss": 18.272, "step": 1911 }, { "epoch": 0.03494982360575429, "grad_norm": 7.95269138874238, "learning_rate": 9.999362425475817e-06, "loss": 17.9312, "step": 1912 }, { "epoch": 0.034968102802200814, "grad_norm": 7.462108851980732, "learning_rate": 9.999357689663875e-06, "loss": 17.7149, "step": 1913 }, { "epoch": 0.03498638199864734, "grad_norm": 7.283136697952984, "learning_rate": 9.999352936329619e-06, "loss": 17.6428, "step": 1914 }, { "epoch": 0.035004661195093864, "grad_norm": 8.192691586643772, "learning_rate": 9.999348165473064e-06, "loss": 18.2675, "step": 1915 }, { "epoch": 0.035022940391540386, "grad_norm": 8.498990045712594, "learning_rate": 9.999343377094227e-06, "loss": 18.2104, "step": 1916 }, { "epoch": 0.035041219587986915, "grad_norm": 8.765368366785685, "learning_rate": 9.999338571193126e-06, "loss": 17.692, "step": 1917 }, { "epoch": 0.035059498784433436, "grad_norm": 8.098246273684264, "learning_rate": 9.999333747769777e-06, "loss": 18.1488, "step": 1918 }, { "epoch": 0.03507777798087996, "grad_norm": 6.413145284090817, "learning_rate": 9.999328906824198e-06, "loss": 17.4056, "step": 1919 }, { "epoch": 0.03509605717732649, "grad_norm": 7.89135265827549, "learning_rate": 9.999324048356403e-06, "loss": 17.845, "step": 1920 }, { "epoch": 0.03511433637377301, "grad_norm": 7.709013863430543, "learning_rate": 9.999319172366412e-06, "loss": 17.934, "step": 1921 }, { "epoch": 0.03513261557021953, "grad_norm": 8.564207535733933, "learning_rate": 9.999314278854242e-06, "loss": 18.1819, "step": 1922 }, { "epoch": 0.03515089476666606, "grad_norm": 7.149263373420875, "learning_rate": 9.999309367819907e-06, "loss": 17.7353, "step": 1923 }, { "epoch": 0.03516917396311258, "grad_norm": 8.18832302643092, "learning_rate": 9.999304439263428e-06, "loss": 18.0075, "step": 1924 }, { "epoch": 0.0351874531595591, "grad_norm": 8.182007369919635, "learning_rate": 9.999299493184822e-06, "loss": 18.1542, "step": 1925 }, { "epoch": 0.03520573235600563, "grad_norm": 9.944147453652565, "learning_rate": 9.999294529584102e-06, "loss": 18.7217, "step": 1926 }, { "epoch": 0.03522401155245215, "grad_norm": 7.885567662621238, "learning_rate": 9.999289548461292e-06, "loss": 18.1261, "step": 1927 }, { "epoch": 0.03524229074889868, "grad_norm": 7.620667758005586, "learning_rate": 9.999284549816403e-06, "loss": 17.7779, "step": 1928 }, { "epoch": 0.0352605699453452, "grad_norm": 9.852841139533853, "learning_rate": 9.999279533649458e-06, "loss": 18.6092, "step": 1929 }, { "epoch": 0.035278849141791725, "grad_norm": 7.928579728197884, "learning_rate": 9.99927449996047e-06, "loss": 17.9599, "step": 1930 }, { "epoch": 0.035297128338238254, "grad_norm": 8.184321980595112, "learning_rate": 9.999269448749461e-06, "loss": 18.1889, "step": 1931 }, { "epoch": 0.035315407534684776, "grad_norm": 8.402935327668816, "learning_rate": 9.999264380016444e-06, "loss": 18.3697, "step": 1932 }, { "epoch": 0.0353336867311313, "grad_norm": 9.086470670696299, "learning_rate": 9.99925929376144e-06, "loss": 18.5396, "step": 1933 }, { "epoch": 0.035351965927577826, "grad_norm": 8.490461255836463, "learning_rate": 9.999254189984466e-06, "loss": 18.2341, "step": 1934 }, { "epoch": 0.03537024512402435, "grad_norm": 7.094884201354597, "learning_rate": 9.999249068685539e-06, "loss": 17.5918, "step": 1935 }, { "epoch": 0.03538852432047087, "grad_norm": 8.499136896972471, "learning_rate": 9.999243929864679e-06, "loss": 18.1365, "step": 1936 }, { "epoch": 0.0354068035169174, "grad_norm": 7.535927532248468, "learning_rate": 9.999238773521902e-06, "loss": 17.9605, "step": 1937 }, { "epoch": 0.03542508271336392, "grad_norm": 8.389999966836863, "learning_rate": 9.999233599657228e-06, "loss": 17.7126, "step": 1938 }, { "epoch": 0.03544336190981044, "grad_norm": 8.287982338740152, "learning_rate": 9.999228408270674e-06, "loss": 18.2419, "step": 1939 }, { "epoch": 0.03546164110625697, "grad_norm": 7.9503520158933165, "learning_rate": 9.999223199362257e-06, "loss": 18.0246, "step": 1940 }, { "epoch": 0.03547992030270349, "grad_norm": 9.21418739958329, "learning_rate": 9.999217972931998e-06, "loss": 18.7629, "step": 1941 }, { "epoch": 0.035498199499150014, "grad_norm": 7.782900053670787, "learning_rate": 9.999212728979912e-06, "loss": 17.839, "step": 1942 }, { "epoch": 0.03551647869559654, "grad_norm": 8.143164639304983, "learning_rate": 9.999207467506022e-06, "loss": 18.0542, "step": 1943 }, { "epoch": 0.035534757892043065, "grad_norm": 7.389277482561079, "learning_rate": 9.999202188510341e-06, "loss": 17.6361, "step": 1944 }, { "epoch": 0.03555303708848959, "grad_norm": 10.202846117878034, "learning_rate": 9.999196891992892e-06, "loss": 18.7116, "step": 1945 }, { "epoch": 0.035571316284936115, "grad_norm": 7.494605340684641, "learning_rate": 9.999191577953692e-06, "loss": 18.011, "step": 1946 }, { "epoch": 0.03558959548138264, "grad_norm": 8.38155763029932, "learning_rate": 9.999186246392756e-06, "loss": 18.3662, "step": 1947 }, { "epoch": 0.035607874677829165, "grad_norm": 7.850468999216842, "learning_rate": 9.999180897310108e-06, "loss": 17.9277, "step": 1948 }, { "epoch": 0.03562615387427569, "grad_norm": 7.2770297355717615, "learning_rate": 9.999175530705765e-06, "loss": 17.8763, "step": 1949 }, { "epoch": 0.03564443307072221, "grad_norm": 7.497010366244362, "learning_rate": 9.999170146579746e-06, "loss": 17.9092, "step": 1950 }, { "epoch": 0.03566271226716874, "grad_norm": 7.147592489045447, "learning_rate": 9.999164744932069e-06, "loss": 17.5548, "step": 1951 }, { "epoch": 0.03568099146361526, "grad_norm": 7.1198846531257995, "learning_rate": 9.999159325762753e-06, "loss": 17.7998, "step": 1952 }, { "epoch": 0.03569927066006178, "grad_norm": 8.128197845288225, "learning_rate": 9.999153889071818e-06, "loss": 17.9989, "step": 1953 }, { "epoch": 0.03571754985650831, "grad_norm": 7.423488993866567, "learning_rate": 9.999148434859282e-06, "loss": 17.7657, "step": 1954 }, { "epoch": 0.03573582905295483, "grad_norm": 9.400429043325508, "learning_rate": 9.999142963125164e-06, "loss": 18.5479, "step": 1955 }, { "epoch": 0.03575410824940135, "grad_norm": 10.386644570380422, "learning_rate": 9.999137473869484e-06, "loss": 18.5998, "step": 1956 }, { "epoch": 0.03577238744584788, "grad_norm": 7.552400237830502, "learning_rate": 9.999131967092262e-06, "loss": 17.7759, "step": 1957 }, { "epoch": 0.035790666642294404, "grad_norm": 7.230520694917503, "learning_rate": 9.999126442793515e-06, "loss": 17.8236, "step": 1958 }, { "epoch": 0.035808945838740926, "grad_norm": 9.240852364016215, "learning_rate": 9.999120900973264e-06, "loss": 18.3381, "step": 1959 }, { "epoch": 0.035827225035187454, "grad_norm": 8.479502318693042, "learning_rate": 9.999115341631528e-06, "loss": 18.5659, "step": 1960 }, { "epoch": 0.035845504231633976, "grad_norm": 8.255809598397338, "learning_rate": 9.999109764768328e-06, "loss": 17.9439, "step": 1961 }, { "epoch": 0.035863783428080505, "grad_norm": 7.10311904635282, "learning_rate": 9.99910417038368e-06, "loss": 17.5031, "step": 1962 }, { "epoch": 0.03588206262452703, "grad_norm": 8.354939519427754, "learning_rate": 9.999098558477606e-06, "loss": 18.2835, "step": 1963 }, { "epoch": 0.03590034182097355, "grad_norm": 8.169834269388787, "learning_rate": 9.999092929050126e-06, "loss": 18.0539, "step": 1964 }, { "epoch": 0.03591862101742008, "grad_norm": 7.209724239423868, "learning_rate": 9.99908728210126e-06, "loss": 17.5991, "step": 1965 }, { "epoch": 0.0359369002138666, "grad_norm": 7.6799187576786245, "learning_rate": 9.999081617631026e-06, "loss": 18.0274, "step": 1966 }, { "epoch": 0.03595517941031312, "grad_norm": 8.902126792562187, "learning_rate": 9.999075935639445e-06, "loss": 18.6911, "step": 1967 }, { "epoch": 0.03597345860675965, "grad_norm": 7.673549516548946, "learning_rate": 9.999070236126536e-06, "loss": 18.1461, "step": 1968 }, { "epoch": 0.03599173780320617, "grad_norm": 7.166515808137726, "learning_rate": 9.99906451909232e-06, "loss": 17.4114, "step": 1969 }, { "epoch": 0.03601001699965269, "grad_norm": 7.428286380871508, "learning_rate": 9.999058784536816e-06, "loss": 17.7822, "step": 1970 }, { "epoch": 0.03602829619609922, "grad_norm": 8.27330724470139, "learning_rate": 9.999053032460044e-06, "loss": 17.432, "step": 1971 }, { "epoch": 0.03604657539254574, "grad_norm": 7.744580295743159, "learning_rate": 9.999047262862027e-06, "loss": 17.8798, "step": 1972 }, { "epoch": 0.036064854588992265, "grad_norm": 7.065151922031846, "learning_rate": 9.999041475742783e-06, "loss": 18.016, "step": 1973 }, { "epoch": 0.036083133785438794, "grad_norm": 7.15757239909588, "learning_rate": 9.99903567110233e-06, "loss": 17.8817, "step": 1974 }, { "epoch": 0.036101412981885316, "grad_norm": 8.853898794161905, "learning_rate": 9.999029848940694e-06, "loss": 17.9513, "step": 1975 }, { "epoch": 0.03611969217833184, "grad_norm": 6.892125342950771, "learning_rate": 9.99902400925789e-06, "loss": 17.4626, "step": 1976 }, { "epoch": 0.036137971374778366, "grad_norm": 8.855795305081163, "learning_rate": 9.999018152053942e-06, "loss": 18.453, "step": 1977 }, { "epoch": 0.03615625057122489, "grad_norm": 9.553477180571765, "learning_rate": 9.999012277328868e-06, "loss": 19.0001, "step": 1978 }, { "epoch": 0.036174529767671416, "grad_norm": 7.146861108136913, "learning_rate": 9.99900638508269e-06, "loss": 17.5435, "step": 1979 }, { "epoch": 0.03619280896411794, "grad_norm": 7.083904017713698, "learning_rate": 9.999000475315429e-06, "loss": 17.6269, "step": 1980 }, { "epoch": 0.03621108816056446, "grad_norm": 8.181874569690923, "learning_rate": 9.998994548027106e-06, "loss": 18.339, "step": 1981 }, { "epoch": 0.03622936735701099, "grad_norm": 8.15235850629926, "learning_rate": 9.998988603217738e-06, "loss": 18.3941, "step": 1982 }, { "epoch": 0.03624764655345751, "grad_norm": 8.495091145565938, "learning_rate": 9.998982640887352e-06, "loss": 17.9946, "step": 1983 }, { "epoch": 0.03626592574990403, "grad_norm": 6.4835607043705785, "learning_rate": 9.998976661035964e-06, "loss": 17.6544, "step": 1984 }, { "epoch": 0.03628420494635056, "grad_norm": 8.841987597518289, "learning_rate": 9.998970663663596e-06, "loss": 18.0508, "step": 1985 }, { "epoch": 0.03630248414279708, "grad_norm": 10.217090662048435, "learning_rate": 9.998964648770271e-06, "loss": 18.1618, "step": 1986 }, { "epoch": 0.036320763339243604, "grad_norm": 8.019689206397057, "learning_rate": 9.998958616356006e-06, "loss": 17.9514, "step": 1987 }, { "epoch": 0.03633904253569013, "grad_norm": 6.991445236201103, "learning_rate": 9.998952566420828e-06, "loss": 17.8643, "step": 1988 }, { "epoch": 0.036357321732136655, "grad_norm": 7.789091397301192, "learning_rate": 9.998946498964755e-06, "loss": 18.2038, "step": 1989 }, { "epoch": 0.03637560092858318, "grad_norm": 7.141653228475028, "learning_rate": 9.998940413987805e-06, "loss": 17.8061, "step": 1990 }, { "epoch": 0.036393880125029705, "grad_norm": 8.066331113358645, "learning_rate": 9.998934311490005e-06, "loss": 18.1814, "step": 1991 }, { "epoch": 0.03641215932147623, "grad_norm": 7.335124048620984, "learning_rate": 9.998928191471376e-06, "loss": 17.875, "step": 1992 }, { "epoch": 0.03643043851792275, "grad_norm": 9.112505337222876, "learning_rate": 9.998922053931935e-06, "loss": 18.6293, "step": 1993 }, { "epoch": 0.03644871771436928, "grad_norm": 7.54777861867234, "learning_rate": 9.998915898871705e-06, "loss": 17.8176, "step": 1994 }, { "epoch": 0.0364669969108158, "grad_norm": 8.540294308523698, "learning_rate": 9.998909726290711e-06, "loss": 18.4626, "step": 1995 }, { "epoch": 0.03648527610726233, "grad_norm": 8.571296800654416, "learning_rate": 9.99890353618897e-06, "loss": 18.1324, "step": 1996 }, { "epoch": 0.03650355530370885, "grad_norm": 8.426795245663996, "learning_rate": 9.998897328566506e-06, "loss": 17.8787, "step": 1997 }, { "epoch": 0.03652183450015537, "grad_norm": 7.607679739554118, "learning_rate": 9.998891103423343e-06, "loss": 17.7262, "step": 1998 }, { "epoch": 0.0365401136966019, "grad_norm": 7.353285686355623, "learning_rate": 9.998884860759499e-06, "loss": 17.7066, "step": 1999 }, { "epoch": 0.03655839289304842, "grad_norm": 8.198968849277952, "learning_rate": 9.998878600574998e-06, "loss": 18.0655, "step": 2000 }, { "epoch": 0.036576672089494944, "grad_norm": 7.90908222211619, "learning_rate": 9.998872322869859e-06, "loss": 18.1217, "step": 2001 }, { "epoch": 0.03659495128594147, "grad_norm": 7.434964235402946, "learning_rate": 9.99886602764411e-06, "loss": 17.6969, "step": 2002 }, { "epoch": 0.036613230482387994, "grad_norm": 8.434196775394652, "learning_rate": 9.998859714897765e-06, "loss": 18.1832, "step": 2003 }, { "epoch": 0.036631509678834516, "grad_norm": 7.846211573266152, "learning_rate": 9.998853384630853e-06, "loss": 17.8832, "step": 2004 }, { "epoch": 0.036649788875281045, "grad_norm": 8.75645058921149, "learning_rate": 9.998847036843394e-06, "loss": 18.4057, "step": 2005 }, { "epoch": 0.036668068071727566, "grad_norm": 7.4793447817184955, "learning_rate": 9.998840671535411e-06, "loss": 17.7704, "step": 2006 }, { "epoch": 0.03668634726817409, "grad_norm": 8.060297496152803, "learning_rate": 9.998834288706922e-06, "loss": 17.9751, "step": 2007 }, { "epoch": 0.03670462646462062, "grad_norm": 6.269820340808365, "learning_rate": 9.998827888357956e-06, "loss": 17.3203, "step": 2008 }, { "epoch": 0.03672290566106714, "grad_norm": 7.616782429628068, "learning_rate": 9.998821470488529e-06, "loss": 17.8384, "step": 2009 }, { "epoch": 0.03674118485751366, "grad_norm": 8.33085901188751, "learning_rate": 9.998815035098668e-06, "loss": 18.0717, "step": 2010 }, { "epoch": 0.03675946405396019, "grad_norm": 7.723734714418637, "learning_rate": 9.998808582188393e-06, "loss": 17.8507, "step": 2011 }, { "epoch": 0.03677774325040671, "grad_norm": 9.018866210300486, "learning_rate": 9.998802111757729e-06, "loss": 18.6216, "step": 2012 }, { "epoch": 0.03679602244685324, "grad_norm": 6.885161674366217, "learning_rate": 9.998795623806697e-06, "loss": 17.5822, "step": 2013 }, { "epoch": 0.03681430164329976, "grad_norm": 8.091115820985259, "learning_rate": 9.99878911833532e-06, "loss": 17.9485, "step": 2014 }, { "epoch": 0.03683258083974628, "grad_norm": 6.987997512528637, "learning_rate": 9.998782595343621e-06, "loss": 17.4912, "step": 2015 }, { "epoch": 0.03685086003619281, "grad_norm": 8.222991809959506, "learning_rate": 9.998776054831623e-06, "loss": 18.1029, "step": 2016 }, { "epoch": 0.036869139232639334, "grad_norm": 9.05027801037263, "learning_rate": 9.998769496799347e-06, "loss": 18.2343, "step": 2017 }, { "epoch": 0.036887418429085855, "grad_norm": 9.128431730728373, "learning_rate": 9.99876292124682e-06, "loss": 18.5984, "step": 2018 }, { "epoch": 0.036905697625532384, "grad_norm": 7.954206344639059, "learning_rate": 9.998756328174062e-06, "loss": 18.0505, "step": 2019 }, { "epoch": 0.036923976821978906, "grad_norm": 8.188015996937526, "learning_rate": 9.998749717581097e-06, "loss": 17.894, "step": 2020 }, { "epoch": 0.03694225601842543, "grad_norm": 7.75412895491765, "learning_rate": 9.998743089467949e-06, "loss": 18.148, "step": 2021 }, { "epoch": 0.036960535214871956, "grad_norm": 8.109647665884854, "learning_rate": 9.998736443834637e-06, "loss": 18.3236, "step": 2022 }, { "epoch": 0.03697881441131848, "grad_norm": 7.780509706408551, "learning_rate": 9.99872978068119e-06, "loss": 18.0863, "step": 2023 }, { "epoch": 0.036997093607765, "grad_norm": 9.09458460854067, "learning_rate": 9.998723100007628e-06, "loss": 18.5133, "step": 2024 }, { "epoch": 0.03701537280421153, "grad_norm": 9.863279863855022, "learning_rate": 9.998716401813975e-06, "loss": 19.0626, "step": 2025 }, { "epoch": 0.03703365200065805, "grad_norm": 7.253133859486355, "learning_rate": 9.998709686100256e-06, "loss": 17.9412, "step": 2026 }, { "epoch": 0.03705193119710457, "grad_norm": 7.932175176406323, "learning_rate": 9.998702952866494e-06, "loss": 17.6815, "step": 2027 }, { "epoch": 0.0370702103935511, "grad_norm": 7.339503659937987, "learning_rate": 9.99869620211271e-06, "loss": 17.6606, "step": 2028 }, { "epoch": 0.03708848958999762, "grad_norm": 7.907738127442454, "learning_rate": 9.99868943383893e-06, "loss": 18.2433, "step": 2029 }, { "epoch": 0.03710676878644415, "grad_norm": 7.775746964937671, "learning_rate": 9.998682648045178e-06, "loss": 17.9101, "step": 2030 }, { "epoch": 0.03712504798289067, "grad_norm": 9.04856581963131, "learning_rate": 9.998675844731475e-06, "loss": 18.1439, "step": 2031 }, { "epoch": 0.037143327179337195, "grad_norm": 7.3359385924263485, "learning_rate": 9.99866902389785e-06, "loss": 18.0449, "step": 2032 }, { "epoch": 0.03716160637578372, "grad_norm": 7.519600586027241, "learning_rate": 9.998662185544323e-06, "loss": 17.8224, "step": 2033 }, { "epoch": 0.037179885572230245, "grad_norm": 7.987638704160971, "learning_rate": 9.998655329670918e-06, "loss": 18.2923, "step": 2034 }, { "epoch": 0.03719816476867677, "grad_norm": 7.681158036608714, "learning_rate": 9.998648456277659e-06, "loss": 18.1971, "step": 2035 }, { "epoch": 0.037216443965123296, "grad_norm": 8.12090519344087, "learning_rate": 9.998641565364573e-06, "loss": 18.4449, "step": 2036 }, { "epoch": 0.03723472316156982, "grad_norm": 7.51928492652649, "learning_rate": 9.99863465693168e-06, "loss": 17.6491, "step": 2037 }, { "epoch": 0.03725300235801634, "grad_norm": 7.913103109118726, "learning_rate": 9.998627730979008e-06, "loss": 17.7631, "step": 2038 }, { "epoch": 0.03727128155446287, "grad_norm": 7.669560039298758, "learning_rate": 9.99862078750658e-06, "loss": 17.8085, "step": 2039 }, { "epoch": 0.03728956075090939, "grad_norm": 8.251834017752726, "learning_rate": 9.998613826514418e-06, "loss": 18.243, "step": 2040 }, { "epoch": 0.03730783994735591, "grad_norm": 8.873049721162209, "learning_rate": 9.998606848002548e-06, "loss": 18.4131, "step": 2041 }, { "epoch": 0.03732611914380244, "grad_norm": 7.451580746322862, "learning_rate": 9.998599851970997e-06, "loss": 17.9024, "step": 2042 }, { "epoch": 0.03734439834024896, "grad_norm": 7.697008875890094, "learning_rate": 9.998592838419787e-06, "loss": 18.1986, "step": 2043 }, { "epoch": 0.037362677536695484, "grad_norm": 8.120055913869294, "learning_rate": 9.998585807348942e-06, "loss": 17.8143, "step": 2044 }, { "epoch": 0.03738095673314201, "grad_norm": 8.278267453559662, "learning_rate": 9.998578758758486e-06, "loss": 18.3097, "step": 2045 }, { "epoch": 0.037399235929588534, "grad_norm": 8.153431653087639, "learning_rate": 9.998571692648447e-06, "loss": 18.1901, "step": 2046 }, { "epoch": 0.03741751512603506, "grad_norm": 8.006834892764367, "learning_rate": 9.998564609018848e-06, "loss": 18.4237, "step": 2047 }, { "epoch": 0.037435794322481585, "grad_norm": 8.303060078555337, "learning_rate": 9.998557507869714e-06, "loss": 18.3405, "step": 2048 }, { "epoch": 0.037454073518928106, "grad_norm": 7.2265990118581955, "learning_rate": 9.99855038920107e-06, "loss": 17.6187, "step": 2049 }, { "epoch": 0.037472352715374635, "grad_norm": 8.041556758724703, "learning_rate": 9.998543253012938e-06, "loss": 17.9432, "step": 2050 }, { "epoch": 0.03749063191182116, "grad_norm": 6.991937103045851, "learning_rate": 9.998536099305348e-06, "loss": 17.675, "step": 2051 }, { "epoch": 0.03750891110826768, "grad_norm": 8.035188795438987, "learning_rate": 9.998528928078321e-06, "loss": 18.1265, "step": 2052 }, { "epoch": 0.03752719030471421, "grad_norm": 7.620085867284849, "learning_rate": 9.998521739331886e-06, "loss": 17.9205, "step": 2053 }, { "epoch": 0.03754546950116073, "grad_norm": 9.072221427512684, "learning_rate": 9.998514533066066e-06, "loss": 18.5335, "step": 2054 }, { "epoch": 0.03756374869760725, "grad_norm": 8.568238815700667, "learning_rate": 9.998507309280886e-06, "loss": 18.0401, "step": 2055 }, { "epoch": 0.03758202789405378, "grad_norm": 10.039383540553157, "learning_rate": 9.99850006797637e-06, "loss": 18.831, "step": 2056 }, { "epoch": 0.0376003070905003, "grad_norm": 6.1131761158643245, "learning_rate": 9.998492809152545e-06, "loss": 17.1808, "step": 2057 }, { "epoch": 0.03761858628694682, "grad_norm": 6.7440363014529074, "learning_rate": 9.99848553280944e-06, "loss": 17.7552, "step": 2058 }, { "epoch": 0.03763686548339335, "grad_norm": 6.955143337867716, "learning_rate": 9.998478238947074e-06, "loss": 17.4316, "step": 2059 }, { "epoch": 0.03765514467983987, "grad_norm": 7.28685085214896, "learning_rate": 9.998470927565476e-06, "loss": 17.5236, "step": 2060 }, { "epoch": 0.037673423876286395, "grad_norm": 9.235502043145019, "learning_rate": 9.998463598664669e-06, "loss": 18.3993, "step": 2061 }, { "epoch": 0.037691703072732924, "grad_norm": 8.432838318719247, "learning_rate": 9.998456252244684e-06, "loss": 18.4229, "step": 2062 }, { "epoch": 0.037709982269179446, "grad_norm": 7.339767176483528, "learning_rate": 9.998448888305543e-06, "loss": 17.6698, "step": 2063 }, { "epoch": 0.037728261465625974, "grad_norm": 6.833365341315317, "learning_rate": 9.998441506847271e-06, "loss": 17.5013, "step": 2064 }, { "epoch": 0.037746540662072496, "grad_norm": 6.318341016075187, "learning_rate": 9.998434107869897e-06, "loss": 17.2457, "step": 2065 }, { "epoch": 0.03776481985851902, "grad_norm": 7.97844973845759, "learning_rate": 9.998426691373443e-06, "loss": 18.2731, "step": 2066 }, { "epoch": 0.03778309905496555, "grad_norm": 9.022125937561709, "learning_rate": 9.99841925735794e-06, "loss": 18.7425, "step": 2067 }, { "epoch": 0.03780137825141207, "grad_norm": 8.080430879740227, "learning_rate": 9.99841180582341e-06, "loss": 18.5238, "step": 2068 }, { "epoch": 0.03781965744785859, "grad_norm": 7.334230547063637, "learning_rate": 9.99840433676988e-06, "loss": 17.6445, "step": 2069 }, { "epoch": 0.03783793664430512, "grad_norm": 7.8255129735717155, "learning_rate": 9.998396850197376e-06, "loss": 18.0013, "step": 2070 }, { "epoch": 0.03785621584075164, "grad_norm": 7.146299981961723, "learning_rate": 9.998389346105925e-06, "loss": 17.7126, "step": 2071 }, { "epoch": 0.03787449503719816, "grad_norm": 7.973704356003748, "learning_rate": 9.998381824495556e-06, "loss": 17.6137, "step": 2072 }, { "epoch": 0.03789277423364469, "grad_norm": 8.325580556641624, "learning_rate": 9.998374285366289e-06, "loss": 18.1783, "step": 2073 }, { "epoch": 0.03791105343009121, "grad_norm": 8.501162206117245, "learning_rate": 9.998366728718155e-06, "loss": 18.1392, "step": 2074 }, { "epoch": 0.037929332626537735, "grad_norm": 6.5954947334230365, "learning_rate": 9.998359154551178e-06, "loss": 17.4255, "step": 2075 }, { "epoch": 0.03794761182298426, "grad_norm": 9.095095045655555, "learning_rate": 9.998351562865387e-06, "loss": 18.9657, "step": 2076 }, { "epoch": 0.037965891019430785, "grad_norm": 8.181969869684231, "learning_rate": 9.99834395366081e-06, "loss": 18.2521, "step": 2077 }, { "epoch": 0.03798417021587731, "grad_norm": 7.557685423507036, "learning_rate": 9.998336326937468e-06, "loss": 17.8126, "step": 2078 }, { "epoch": 0.038002449412323835, "grad_norm": 8.69340557232809, "learning_rate": 9.998328682695391e-06, "loss": 18.5631, "step": 2079 }, { "epoch": 0.03802072860877036, "grad_norm": 7.546948153844951, "learning_rate": 9.998321020934607e-06, "loss": 18.0963, "step": 2080 }, { "epoch": 0.038039007805216886, "grad_norm": 8.297753147884801, "learning_rate": 9.998313341655142e-06, "loss": 18.312, "step": 2081 }, { "epoch": 0.03805728700166341, "grad_norm": 6.922271759890242, "learning_rate": 9.99830564485702e-06, "loss": 17.6805, "step": 2082 }, { "epoch": 0.03807556619810993, "grad_norm": 7.008500913491092, "learning_rate": 9.998297930540273e-06, "loss": 17.6757, "step": 2083 }, { "epoch": 0.03809384539455646, "grad_norm": 9.141224497140042, "learning_rate": 9.998290198704924e-06, "loss": 18.5254, "step": 2084 }, { "epoch": 0.03811212459100298, "grad_norm": 6.172946546054869, "learning_rate": 9.998282449351002e-06, "loss": 17.2599, "step": 2085 }, { "epoch": 0.0381304037874495, "grad_norm": 8.891334316381819, "learning_rate": 9.998274682478535e-06, "loss": 18.6278, "step": 2086 }, { "epoch": 0.03814868298389603, "grad_norm": 8.313104732808613, "learning_rate": 9.998266898087546e-06, "loss": 18.0706, "step": 2087 }, { "epoch": 0.03816696218034255, "grad_norm": 7.617135262532365, "learning_rate": 9.998259096178067e-06, "loss": 18.0323, "step": 2088 }, { "epoch": 0.038185241376789074, "grad_norm": 7.312424259446676, "learning_rate": 9.998251276750124e-06, "loss": 17.57, "step": 2089 }, { "epoch": 0.0382035205732356, "grad_norm": 7.557625541284032, "learning_rate": 9.998243439803743e-06, "loss": 17.8035, "step": 2090 }, { "epoch": 0.038221799769682124, "grad_norm": 8.286744336007226, "learning_rate": 9.998235585338953e-06, "loss": 18.3543, "step": 2091 }, { "epoch": 0.038240078966128646, "grad_norm": 8.51951773168702, "learning_rate": 9.998227713355782e-06, "loss": 18.3313, "step": 2092 }, { "epoch": 0.038258358162575175, "grad_norm": 7.784579198221027, "learning_rate": 9.998219823854255e-06, "loss": 18.3275, "step": 2093 }, { "epoch": 0.0382766373590217, "grad_norm": 6.878269528459254, "learning_rate": 9.998211916834402e-06, "loss": 17.506, "step": 2094 }, { "epoch": 0.03829491655546822, "grad_norm": 6.077809690735215, "learning_rate": 9.99820399229625e-06, "loss": 17.1299, "step": 2095 }, { "epoch": 0.03831319575191475, "grad_norm": 7.245859913601165, "learning_rate": 9.998196050239827e-06, "loss": 17.6635, "step": 2096 }, { "epoch": 0.03833147494836127, "grad_norm": 7.879792392452729, "learning_rate": 9.998188090665159e-06, "loss": 17.8758, "step": 2097 }, { "epoch": 0.0383497541448078, "grad_norm": 6.97668441078997, "learning_rate": 9.998180113572277e-06, "loss": 17.6424, "step": 2098 }, { "epoch": 0.03836803334125432, "grad_norm": 7.754336743164202, "learning_rate": 9.998172118961207e-06, "loss": 18.2514, "step": 2099 }, { "epoch": 0.03838631253770084, "grad_norm": 8.158229133475526, "learning_rate": 9.998164106831978e-06, "loss": 18.0946, "step": 2100 }, { "epoch": 0.03840459173414737, "grad_norm": 7.938238548236804, "learning_rate": 9.998156077184617e-06, "loss": 18.078, "step": 2101 }, { "epoch": 0.03842287093059389, "grad_norm": 8.48571756895092, "learning_rate": 9.998148030019152e-06, "loss": 18.186, "step": 2102 }, { "epoch": 0.03844115012704041, "grad_norm": 6.878298713002423, "learning_rate": 9.998139965335613e-06, "loss": 17.4737, "step": 2103 }, { "epoch": 0.03845942932348694, "grad_norm": 8.569729559872155, "learning_rate": 9.998131883134028e-06, "loss": 18.0949, "step": 2104 }, { "epoch": 0.038477708519933464, "grad_norm": 7.296476454258555, "learning_rate": 9.998123783414421e-06, "loss": 18.0296, "step": 2105 }, { "epoch": 0.038495987716379985, "grad_norm": 8.22560600922055, "learning_rate": 9.998115666176828e-06, "loss": 17.8063, "step": 2106 }, { "epoch": 0.038514266912826514, "grad_norm": 8.49243278887264, "learning_rate": 9.99810753142127e-06, "loss": 18.2786, "step": 2107 }, { "epoch": 0.038532546109273036, "grad_norm": 7.4911301907547685, "learning_rate": 9.99809937914778e-06, "loss": 17.9241, "step": 2108 }, { "epoch": 0.03855082530571956, "grad_norm": 8.29966767865699, "learning_rate": 9.998091209356387e-06, "loss": 18.4018, "step": 2109 }, { "epoch": 0.038569104502166086, "grad_norm": 7.275110598935181, "learning_rate": 9.998083022047116e-06, "loss": 17.7244, "step": 2110 }, { "epoch": 0.03858738369861261, "grad_norm": 6.894500030672901, "learning_rate": 9.998074817219999e-06, "loss": 17.4782, "step": 2111 }, { "epoch": 0.03860566289505913, "grad_norm": 6.973114564655955, "learning_rate": 9.998066594875063e-06, "loss": 17.6483, "step": 2112 }, { "epoch": 0.03862394209150566, "grad_norm": 7.8712395141148495, "learning_rate": 9.998058355012337e-06, "loss": 18.2726, "step": 2113 }, { "epoch": 0.03864222128795218, "grad_norm": 7.846501033939585, "learning_rate": 9.99805009763185e-06, "loss": 17.8641, "step": 2114 }, { "epoch": 0.03866050048439871, "grad_norm": 7.796771949115895, "learning_rate": 9.99804182273363e-06, "loss": 18.0188, "step": 2115 }, { "epoch": 0.03867877968084523, "grad_norm": 6.346450837225925, "learning_rate": 9.99803353031771e-06, "loss": 17.2014, "step": 2116 }, { "epoch": 0.03869705887729175, "grad_norm": 9.008885742021926, "learning_rate": 9.998025220384114e-06, "loss": 18.5797, "step": 2117 }, { "epoch": 0.03871533807373828, "grad_norm": 6.600728835551202, "learning_rate": 9.998016892932873e-06, "loss": 17.3858, "step": 2118 }, { "epoch": 0.0387336172701848, "grad_norm": 7.9493070426391075, "learning_rate": 9.998008547964018e-06, "loss": 17.9694, "step": 2119 }, { "epoch": 0.038751896466631325, "grad_norm": 8.29793855732644, "learning_rate": 9.998000185477576e-06, "loss": 18.1664, "step": 2120 }, { "epoch": 0.038770175663077854, "grad_norm": 8.018611705768237, "learning_rate": 9.997991805473577e-06, "loss": 18.1572, "step": 2121 }, { "epoch": 0.038788454859524375, "grad_norm": 7.7486960969514245, "learning_rate": 9.997983407952052e-06, "loss": 17.7031, "step": 2122 }, { "epoch": 0.0388067340559709, "grad_norm": 7.005074737558086, "learning_rate": 9.997974992913026e-06, "loss": 17.7374, "step": 2123 }, { "epoch": 0.038825013252417426, "grad_norm": 8.463804697132712, "learning_rate": 9.997966560356534e-06, "loss": 18.3967, "step": 2124 }, { "epoch": 0.03884329244886395, "grad_norm": 8.827072480057215, "learning_rate": 9.997958110282602e-06, "loss": 18.3065, "step": 2125 }, { "epoch": 0.03886157164531047, "grad_norm": 8.028287371587094, "learning_rate": 9.99794964269126e-06, "loss": 18.1977, "step": 2126 }, { "epoch": 0.038879850841757, "grad_norm": 7.609837991470296, "learning_rate": 9.997941157582538e-06, "loss": 17.8667, "step": 2127 }, { "epoch": 0.03889813003820352, "grad_norm": 7.609650111579857, "learning_rate": 9.997932654956467e-06, "loss": 17.8524, "step": 2128 }, { "epoch": 0.03891640923465004, "grad_norm": 6.3001475843222075, "learning_rate": 9.997924134813075e-06, "loss": 17.3262, "step": 2129 }, { "epoch": 0.03893468843109657, "grad_norm": 8.218052885471584, "learning_rate": 9.997915597152394e-06, "loss": 18.016, "step": 2130 }, { "epoch": 0.03895296762754309, "grad_norm": 8.96614079578861, "learning_rate": 9.99790704197445e-06, "loss": 18.2741, "step": 2131 }, { "epoch": 0.03897124682398962, "grad_norm": 7.338381226610959, "learning_rate": 9.997898469279278e-06, "loss": 17.5664, "step": 2132 }, { "epoch": 0.03898952602043614, "grad_norm": 7.397884465912606, "learning_rate": 9.997889879066904e-06, "loss": 18.05, "step": 2133 }, { "epoch": 0.039007805216882664, "grad_norm": 6.351306050693116, "learning_rate": 9.99788127133736e-06, "loss": 17.3067, "step": 2134 }, { "epoch": 0.03902608441332919, "grad_norm": 8.342880151387575, "learning_rate": 9.997872646090675e-06, "loss": 18.4756, "step": 2135 }, { "epoch": 0.039044363609775715, "grad_norm": 8.520078110217645, "learning_rate": 9.997864003326882e-06, "loss": 18.5616, "step": 2136 }, { "epoch": 0.039062642806222236, "grad_norm": 7.15659043512593, "learning_rate": 9.997855343046007e-06, "loss": 17.5117, "step": 2137 }, { "epoch": 0.039080922002668765, "grad_norm": 6.854967211447595, "learning_rate": 9.997846665248086e-06, "loss": 17.6334, "step": 2138 }, { "epoch": 0.03909920119911529, "grad_norm": 7.0789309063840165, "learning_rate": 9.997837969933144e-06, "loss": 17.7366, "step": 2139 }, { "epoch": 0.03911748039556181, "grad_norm": 9.024590484928225, "learning_rate": 9.997829257101214e-06, "loss": 18.69, "step": 2140 }, { "epoch": 0.03913575959200834, "grad_norm": 8.416640311669644, "learning_rate": 9.997820526752327e-06, "loss": 17.8544, "step": 2141 }, { "epoch": 0.03915403878845486, "grad_norm": 8.718851178261065, "learning_rate": 9.99781177888651e-06, "loss": 18.3247, "step": 2142 }, { "epoch": 0.03917231798490138, "grad_norm": 8.160393105502534, "learning_rate": 9.9978030135038e-06, "loss": 17.906, "step": 2143 }, { "epoch": 0.03919059718134791, "grad_norm": 7.707170090801509, "learning_rate": 9.997794230604221e-06, "loss": 17.9793, "step": 2144 }, { "epoch": 0.03920887637779443, "grad_norm": 6.371815042310033, "learning_rate": 9.997785430187808e-06, "loss": 17.5896, "step": 2145 }, { "epoch": 0.03922715557424095, "grad_norm": 6.710940234620757, "learning_rate": 9.99777661225459e-06, "loss": 17.4544, "step": 2146 }, { "epoch": 0.03924543477068748, "grad_norm": 7.709280379495519, "learning_rate": 9.997767776804601e-06, "loss": 18.0954, "step": 2147 }, { "epoch": 0.039263713967134004, "grad_norm": 7.760644279078112, "learning_rate": 9.997758923837868e-06, "loss": 18.0527, "step": 2148 }, { "epoch": 0.03928199316358053, "grad_norm": 7.049552993140452, "learning_rate": 9.997750053354425e-06, "loss": 17.6109, "step": 2149 }, { "epoch": 0.039300272360027054, "grad_norm": 7.566624110683898, "learning_rate": 9.9977411653543e-06, "loss": 18.1585, "step": 2150 }, { "epoch": 0.039318551556473576, "grad_norm": 7.097300511901119, "learning_rate": 9.997732259837528e-06, "loss": 17.6664, "step": 2151 }, { "epoch": 0.039336830752920104, "grad_norm": 7.893771154913385, "learning_rate": 9.997723336804134e-06, "loss": 18.1228, "step": 2152 }, { "epoch": 0.039355109949366626, "grad_norm": 7.819552400648903, "learning_rate": 9.997714396254157e-06, "loss": 17.9937, "step": 2153 }, { "epoch": 0.03937338914581315, "grad_norm": 7.864297066489012, "learning_rate": 9.997705438187624e-06, "loss": 17.902, "step": 2154 }, { "epoch": 0.03939166834225968, "grad_norm": 7.733725836401352, "learning_rate": 9.997696462604567e-06, "loss": 17.6863, "step": 2155 }, { "epoch": 0.0394099475387062, "grad_norm": 8.522942184970894, "learning_rate": 9.997687469505018e-06, "loss": 18.3173, "step": 2156 }, { "epoch": 0.03942822673515272, "grad_norm": 8.175017067967467, "learning_rate": 9.997678458889006e-06, "loss": 17.6859, "step": 2157 }, { "epoch": 0.03944650593159925, "grad_norm": 8.978411114997986, "learning_rate": 9.997669430756567e-06, "loss": 19.04, "step": 2158 }, { "epoch": 0.03946478512804577, "grad_norm": 7.482174928325935, "learning_rate": 9.99766038510773e-06, "loss": 17.8164, "step": 2159 }, { "epoch": 0.03948306432449229, "grad_norm": 7.921072208230814, "learning_rate": 9.997651321942526e-06, "loss": 18.1876, "step": 2160 }, { "epoch": 0.03950134352093882, "grad_norm": 7.17372865010213, "learning_rate": 9.997642241260988e-06, "loss": 17.7621, "step": 2161 }, { "epoch": 0.03951962271738534, "grad_norm": 7.3970388554120605, "learning_rate": 9.997633143063147e-06, "loss": 17.8985, "step": 2162 }, { "epoch": 0.039537901913831865, "grad_norm": 8.093249250600254, "learning_rate": 9.997624027349038e-06, "loss": 18.1239, "step": 2163 }, { "epoch": 0.03955618111027839, "grad_norm": 9.891216885761784, "learning_rate": 9.99761489411869e-06, "loss": 18.365, "step": 2164 }, { "epoch": 0.039574460306724915, "grad_norm": 8.247118588942913, "learning_rate": 9.997605743372135e-06, "loss": 17.8682, "step": 2165 }, { "epoch": 0.039592739503171444, "grad_norm": 8.006775421381619, "learning_rate": 9.997596575109403e-06, "loss": 18.0026, "step": 2166 }, { "epoch": 0.039611018699617966, "grad_norm": 8.080014015242162, "learning_rate": 9.99758738933053e-06, "loss": 18.2187, "step": 2167 }, { "epoch": 0.03962929789606449, "grad_norm": 8.059337299934262, "learning_rate": 9.997578186035548e-06, "loss": 18.2853, "step": 2168 }, { "epoch": 0.039647577092511016, "grad_norm": 8.587820665517523, "learning_rate": 9.997568965224489e-06, "loss": 18.5507, "step": 2169 }, { "epoch": 0.03966585628895754, "grad_norm": 7.372263721174855, "learning_rate": 9.997559726897382e-06, "loss": 18.0406, "step": 2170 }, { "epoch": 0.03968413548540406, "grad_norm": 6.542605135963088, "learning_rate": 9.997550471054262e-06, "loss": 17.5526, "step": 2171 }, { "epoch": 0.03970241468185059, "grad_norm": 7.189110663432018, "learning_rate": 9.997541197695165e-06, "loss": 17.8062, "step": 2172 }, { "epoch": 0.03972069387829711, "grad_norm": 6.404821137499881, "learning_rate": 9.997531906820114e-06, "loss": 17.2803, "step": 2173 }, { "epoch": 0.03973897307474363, "grad_norm": 8.369116003250504, "learning_rate": 9.997522598429152e-06, "loss": 18.1112, "step": 2174 }, { "epoch": 0.03975725227119016, "grad_norm": 8.105760046900059, "learning_rate": 9.997513272522306e-06, "loss": 18.1757, "step": 2175 }, { "epoch": 0.03977553146763668, "grad_norm": 6.8914560879095, "learning_rate": 9.997503929099608e-06, "loss": 17.6367, "step": 2176 }, { "epoch": 0.039793810664083204, "grad_norm": 8.74928482923437, "learning_rate": 9.997494568161094e-06, "loss": 18.797, "step": 2177 }, { "epoch": 0.03981208986052973, "grad_norm": 7.606032195700107, "learning_rate": 9.997485189706794e-06, "loss": 18.0399, "step": 2178 }, { "epoch": 0.039830369056976254, "grad_norm": 7.957640315235665, "learning_rate": 9.997475793736742e-06, "loss": 18.1217, "step": 2179 }, { "epoch": 0.039848648253422776, "grad_norm": 8.172829907955222, "learning_rate": 9.997466380250972e-06, "loss": 17.9818, "step": 2180 }, { "epoch": 0.039866927449869305, "grad_norm": 8.639348016920609, "learning_rate": 9.997456949249516e-06, "loss": 18.3638, "step": 2181 }, { "epoch": 0.03988520664631583, "grad_norm": 8.28297815129272, "learning_rate": 9.997447500732408e-06, "loss": 18.0711, "step": 2182 }, { "epoch": 0.039903485842762355, "grad_norm": 7.301929917593747, "learning_rate": 9.997438034699676e-06, "loss": 17.9614, "step": 2183 }, { "epoch": 0.03992176503920888, "grad_norm": 9.2133706103282, "learning_rate": 9.99742855115136e-06, "loss": 18.3219, "step": 2184 }, { "epoch": 0.0399400442356554, "grad_norm": 7.6662851379575, "learning_rate": 9.997419050087491e-06, "loss": 18.0362, "step": 2185 }, { "epoch": 0.03995832343210193, "grad_norm": 9.40252182257759, "learning_rate": 9.997409531508102e-06, "loss": 18.7361, "step": 2186 }, { "epoch": 0.03997660262854845, "grad_norm": 7.715201964930718, "learning_rate": 9.997399995413225e-06, "loss": 18.0677, "step": 2187 }, { "epoch": 0.03999488182499497, "grad_norm": 8.180109037376967, "learning_rate": 9.997390441802896e-06, "loss": 17.8739, "step": 2188 }, { "epoch": 0.0400131610214415, "grad_norm": 7.474350082348093, "learning_rate": 9.997380870677147e-06, "loss": 17.736, "step": 2189 }, { "epoch": 0.04003144021788802, "grad_norm": 7.30539824775302, "learning_rate": 9.997371282036012e-06, "loss": 17.7862, "step": 2190 }, { "epoch": 0.04004971941433454, "grad_norm": 8.10049297693191, "learning_rate": 9.997361675879524e-06, "loss": 18.3183, "step": 2191 }, { "epoch": 0.04006799861078107, "grad_norm": 8.148440778879321, "learning_rate": 9.997352052207717e-06, "loss": 17.8117, "step": 2192 }, { "epoch": 0.040086277807227594, "grad_norm": 7.662804144562566, "learning_rate": 9.997342411020623e-06, "loss": 17.9414, "step": 2193 }, { "epoch": 0.040104557003674116, "grad_norm": 8.657393083701246, "learning_rate": 9.99733275231828e-06, "loss": 18.4035, "step": 2194 }, { "epoch": 0.040122836200120644, "grad_norm": 6.363443744243816, "learning_rate": 9.997323076100718e-06, "loss": 17.273, "step": 2195 }, { "epoch": 0.040141115396567166, "grad_norm": 8.061330105461877, "learning_rate": 9.997313382367973e-06, "loss": 18.3943, "step": 2196 }, { "epoch": 0.04015939459301369, "grad_norm": 7.726391277553473, "learning_rate": 9.997303671120077e-06, "loss": 17.7805, "step": 2197 }, { "epoch": 0.040177673789460217, "grad_norm": 8.212107448574407, "learning_rate": 9.997293942357065e-06, "loss": 17.96, "step": 2198 }, { "epoch": 0.04019595298590674, "grad_norm": 8.290238516875284, "learning_rate": 9.997284196078974e-06, "loss": 17.9312, "step": 2199 }, { "epoch": 0.04021423218235327, "grad_norm": 7.98989253657042, "learning_rate": 9.997274432285833e-06, "loss": 17.9388, "step": 2200 }, { "epoch": 0.04023251137879979, "grad_norm": 6.8044758666856815, "learning_rate": 9.997264650977681e-06, "loss": 17.4357, "step": 2201 }, { "epoch": 0.04025079057524631, "grad_norm": 8.221859777178526, "learning_rate": 9.997254852154548e-06, "loss": 18.1218, "step": 2202 }, { "epoch": 0.04026906977169284, "grad_norm": 8.345525451984651, "learning_rate": 9.997245035816471e-06, "loss": 18.1535, "step": 2203 }, { "epoch": 0.04028734896813936, "grad_norm": 7.8169540138706335, "learning_rate": 9.997235201963484e-06, "loss": 17.9238, "step": 2204 }, { "epoch": 0.04030562816458588, "grad_norm": 7.951482942811827, "learning_rate": 9.99722535059562e-06, "loss": 17.6738, "step": 2205 }, { "epoch": 0.04032390736103241, "grad_norm": 7.5674725394502, "learning_rate": 9.997215481712917e-06, "loss": 17.9341, "step": 2206 }, { "epoch": 0.04034218655747893, "grad_norm": 8.671669698506888, "learning_rate": 9.997205595315406e-06, "loss": 18.5597, "step": 2207 }, { "epoch": 0.040360465753925455, "grad_norm": 6.815993343653375, "learning_rate": 9.997195691403123e-06, "loss": 17.3178, "step": 2208 }, { "epoch": 0.040378744950371984, "grad_norm": 8.389172345015542, "learning_rate": 9.997185769976104e-06, "loss": 18.2111, "step": 2209 }, { "epoch": 0.040397024146818505, "grad_norm": 7.831804934723512, "learning_rate": 9.997175831034382e-06, "loss": 18.0893, "step": 2210 }, { "epoch": 0.04041530334326503, "grad_norm": 7.819787617633279, "learning_rate": 9.99716587457799e-06, "loss": 17.8114, "step": 2211 }, { "epoch": 0.040433582539711556, "grad_norm": 7.213440914722836, "learning_rate": 9.997155900606968e-06, "loss": 17.5792, "step": 2212 }, { "epoch": 0.04045186173615808, "grad_norm": 7.467749826895467, "learning_rate": 9.99714590912135e-06, "loss": 18.1147, "step": 2213 }, { "epoch": 0.0404701409326046, "grad_norm": 7.3030276478195715, "learning_rate": 9.997135900121164e-06, "loss": 17.7149, "step": 2214 }, { "epoch": 0.04048842012905113, "grad_norm": 9.368146596367051, "learning_rate": 9.997125873606452e-06, "loss": 18.43, "step": 2215 }, { "epoch": 0.04050669932549765, "grad_norm": 7.54753489549492, "learning_rate": 9.99711582957725e-06, "loss": 17.8491, "step": 2216 }, { "epoch": 0.04052497852194418, "grad_norm": 7.018911450745933, "learning_rate": 9.997105768033588e-06, "loss": 17.5845, "step": 2217 }, { "epoch": 0.0405432577183907, "grad_norm": 8.017074287783036, "learning_rate": 9.997095688975506e-06, "loss": 18.0492, "step": 2218 }, { "epoch": 0.04056153691483722, "grad_norm": 6.573016273674605, "learning_rate": 9.997085592403036e-06, "loss": 17.4416, "step": 2219 }, { "epoch": 0.04057981611128375, "grad_norm": 7.165610220362177, "learning_rate": 9.997075478316213e-06, "loss": 17.748, "step": 2220 }, { "epoch": 0.04059809530773027, "grad_norm": 8.116293660856455, "learning_rate": 9.997065346715079e-06, "loss": 18.1911, "step": 2221 }, { "epoch": 0.040616374504176794, "grad_norm": 8.492003188224011, "learning_rate": 9.99705519759966e-06, "loss": 18.0617, "step": 2222 }, { "epoch": 0.04063465370062332, "grad_norm": 6.45728722483106, "learning_rate": 9.997045030969997e-06, "loss": 17.3788, "step": 2223 }, { "epoch": 0.040652932897069845, "grad_norm": 9.357708050753015, "learning_rate": 9.997034846826126e-06, "loss": 18.6276, "step": 2224 }, { "epoch": 0.040671212093516367, "grad_norm": 8.108005144604292, "learning_rate": 9.99702464516808e-06, "loss": 18.2943, "step": 2225 }, { "epoch": 0.040689491289962895, "grad_norm": 7.856412638551117, "learning_rate": 9.997014425995898e-06, "loss": 18.0768, "step": 2226 }, { "epoch": 0.04070777048640942, "grad_norm": 7.9677426244573795, "learning_rate": 9.997004189309614e-06, "loss": 17.9339, "step": 2227 }, { "epoch": 0.04072604968285594, "grad_norm": 8.68121235006211, "learning_rate": 9.996993935109263e-06, "loss": 18.3509, "step": 2228 }, { "epoch": 0.04074432887930247, "grad_norm": 9.159087383676308, "learning_rate": 9.99698366339488e-06, "loss": 18.6396, "step": 2229 }, { "epoch": 0.04076260807574899, "grad_norm": 9.129100089272592, "learning_rate": 9.996973374166505e-06, "loss": 18.3589, "step": 2230 }, { "epoch": 0.04078088727219551, "grad_norm": 7.63200488625213, "learning_rate": 9.996963067424173e-06, "loss": 17.8978, "step": 2231 }, { "epoch": 0.04079916646864204, "grad_norm": 7.037195597311852, "learning_rate": 9.996952743167919e-06, "loss": 17.9303, "step": 2232 }, { "epoch": 0.04081744566508856, "grad_norm": 6.697245553985456, "learning_rate": 9.996942401397776e-06, "loss": 17.2696, "step": 2233 }, { "epoch": 0.04083572486153509, "grad_norm": 8.364622068437564, "learning_rate": 9.996932042113785e-06, "loss": 18.2497, "step": 2234 }, { "epoch": 0.04085400405798161, "grad_norm": 7.55560813284806, "learning_rate": 9.996921665315982e-06, "loss": 17.7116, "step": 2235 }, { "epoch": 0.040872283254428134, "grad_norm": 7.189021265997685, "learning_rate": 9.996911271004403e-06, "loss": 17.7219, "step": 2236 }, { "epoch": 0.04089056245087466, "grad_norm": 8.405771124735566, "learning_rate": 9.996900859179082e-06, "loss": 18.4455, "step": 2237 }, { "epoch": 0.040908841647321184, "grad_norm": 8.110808715552439, "learning_rate": 9.996890429840057e-06, "loss": 18.0719, "step": 2238 }, { "epoch": 0.040927120843767706, "grad_norm": 6.7657326654624805, "learning_rate": 9.996879982987365e-06, "loss": 17.6897, "step": 2239 }, { "epoch": 0.040945400040214235, "grad_norm": 7.902971602385438, "learning_rate": 9.996869518621043e-06, "loss": 17.916, "step": 2240 }, { "epoch": 0.040963679236660756, "grad_norm": 8.739284291169165, "learning_rate": 9.996859036741125e-06, "loss": 18.4968, "step": 2241 }, { "epoch": 0.04098195843310728, "grad_norm": 8.863783295034544, "learning_rate": 9.996848537347651e-06, "loss": 18.4603, "step": 2242 }, { "epoch": 0.04100023762955381, "grad_norm": 8.827975485715196, "learning_rate": 9.996838020440656e-06, "loss": 18.1549, "step": 2243 }, { "epoch": 0.04101851682600033, "grad_norm": 7.242534711509926, "learning_rate": 9.996827486020178e-06, "loss": 18.0313, "step": 2244 }, { "epoch": 0.04103679602244685, "grad_norm": 8.24530816384671, "learning_rate": 9.996816934086253e-06, "loss": 18.1642, "step": 2245 }, { "epoch": 0.04105507521889338, "grad_norm": 8.827207274456681, "learning_rate": 9.996806364638917e-06, "loss": 17.7236, "step": 2246 }, { "epoch": 0.0410733544153399, "grad_norm": 7.681208695267572, "learning_rate": 9.996795777678212e-06, "loss": 18.0287, "step": 2247 }, { "epoch": 0.04109163361178642, "grad_norm": 7.7030414415403685, "learning_rate": 9.996785173204168e-06, "loss": 17.817, "step": 2248 }, { "epoch": 0.04110991280823295, "grad_norm": 7.760771743532918, "learning_rate": 9.996774551216825e-06, "loss": 17.6442, "step": 2249 }, { "epoch": 0.04112819200467947, "grad_norm": 7.646132278653959, "learning_rate": 9.996763911716223e-06, "loss": 17.8221, "step": 2250 }, { "epoch": 0.041146471201126, "grad_norm": 7.664331066563644, "learning_rate": 9.996753254702396e-06, "loss": 18.082, "step": 2251 }, { "epoch": 0.04116475039757252, "grad_norm": 8.051571433608776, "learning_rate": 9.996742580175383e-06, "loss": 17.8551, "step": 2252 }, { "epoch": 0.041183029594019045, "grad_norm": 9.186677857311098, "learning_rate": 9.996731888135221e-06, "loss": 18.7887, "step": 2253 }, { "epoch": 0.041201308790465574, "grad_norm": 9.121127798151235, "learning_rate": 9.996721178581948e-06, "loss": 18.4585, "step": 2254 }, { "epoch": 0.041219587986912096, "grad_norm": 7.671654595276185, "learning_rate": 9.9967104515156e-06, "loss": 17.8728, "step": 2255 }, { "epoch": 0.04123786718335862, "grad_norm": 6.970048735806223, "learning_rate": 9.996699706936214e-06, "loss": 17.661, "step": 2256 }, { "epoch": 0.041256146379805146, "grad_norm": 7.430312463763205, "learning_rate": 9.99668894484383e-06, "loss": 18.0237, "step": 2257 }, { "epoch": 0.04127442557625167, "grad_norm": 7.511826843925589, "learning_rate": 9.996678165238486e-06, "loss": 17.7627, "step": 2258 }, { "epoch": 0.04129270477269819, "grad_norm": 8.420303737893573, "learning_rate": 9.996667368120219e-06, "loss": 18.2609, "step": 2259 }, { "epoch": 0.04131098396914472, "grad_norm": 7.10238546153036, "learning_rate": 9.996656553489063e-06, "loss": 17.7902, "step": 2260 }, { "epoch": 0.04132926316559124, "grad_norm": 6.945695856385279, "learning_rate": 9.996645721345064e-06, "loss": 17.4524, "step": 2261 }, { "epoch": 0.04134754236203776, "grad_norm": 6.764161856334369, "learning_rate": 9.996634871688252e-06, "loss": 17.5518, "step": 2262 }, { "epoch": 0.04136582155848429, "grad_norm": 7.665182816750473, "learning_rate": 9.99662400451867e-06, "loss": 18.115, "step": 2263 }, { "epoch": 0.04138410075493081, "grad_norm": 8.059909396412177, "learning_rate": 9.996613119836354e-06, "loss": 18.2972, "step": 2264 }, { "epoch": 0.041402379951377334, "grad_norm": 8.569543981619749, "learning_rate": 9.996602217641342e-06, "loss": 18.4112, "step": 2265 }, { "epoch": 0.04142065914782386, "grad_norm": 7.7239610781494425, "learning_rate": 9.996591297933674e-06, "loss": 17.869, "step": 2266 }, { "epoch": 0.041438938344270385, "grad_norm": 9.416283894916795, "learning_rate": 9.996580360713386e-06, "loss": 18.6436, "step": 2267 }, { "epoch": 0.04145721754071691, "grad_norm": 7.5379396551312885, "learning_rate": 9.996569405980517e-06, "loss": 17.797, "step": 2268 }, { "epoch": 0.041475496737163435, "grad_norm": 7.955023806419039, "learning_rate": 9.996558433735106e-06, "loss": 17.8707, "step": 2269 }, { "epoch": 0.04149377593360996, "grad_norm": 8.470373947819228, "learning_rate": 9.996547443977193e-06, "loss": 18.0362, "step": 2270 }, { "epoch": 0.041512055130056486, "grad_norm": 8.130937681452403, "learning_rate": 9.996536436706815e-06, "loss": 17.9607, "step": 2271 }, { "epoch": 0.04153033432650301, "grad_norm": 9.185888336239445, "learning_rate": 9.996525411924008e-06, "loss": 18.8378, "step": 2272 }, { "epoch": 0.04154861352294953, "grad_norm": 7.703635781287982, "learning_rate": 9.996514369628813e-06, "loss": 17.9426, "step": 2273 }, { "epoch": 0.04156689271939606, "grad_norm": 8.686634293642456, "learning_rate": 9.99650330982127e-06, "loss": 18.4029, "step": 2274 }, { "epoch": 0.04158517191584258, "grad_norm": 7.505067804924687, "learning_rate": 9.996492232501416e-06, "loss": 17.9971, "step": 2275 }, { "epoch": 0.0416034511122891, "grad_norm": 8.266241750127806, "learning_rate": 9.996481137669291e-06, "loss": 18.1485, "step": 2276 }, { "epoch": 0.04162173030873563, "grad_norm": 6.9270516165961755, "learning_rate": 9.996470025324933e-06, "loss": 17.66, "step": 2277 }, { "epoch": 0.04164000950518215, "grad_norm": 8.25286348451379, "learning_rate": 9.99645889546838e-06, "loss": 18.0654, "step": 2278 }, { "epoch": 0.041658288701628673, "grad_norm": 7.159158430658167, "learning_rate": 9.996447748099673e-06, "loss": 17.8872, "step": 2279 }, { "epoch": 0.0416765678980752, "grad_norm": 6.995221413745061, "learning_rate": 9.996436583218852e-06, "loss": 17.4837, "step": 2280 }, { "epoch": 0.041694847094521724, "grad_norm": 6.775646813300009, "learning_rate": 9.996425400825952e-06, "loss": 17.4218, "step": 2281 }, { "epoch": 0.041713126290968246, "grad_norm": 6.985900307532805, "learning_rate": 9.996414200921014e-06, "loss": 17.6457, "step": 2282 }, { "epoch": 0.041731405487414774, "grad_norm": 6.908522988366118, "learning_rate": 9.996402983504079e-06, "loss": 17.5612, "step": 2283 }, { "epoch": 0.041749684683861296, "grad_norm": 8.094841746586784, "learning_rate": 9.996391748575184e-06, "loss": 18.0505, "step": 2284 }, { "epoch": 0.041767963880307825, "grad_norm": 7.070760098061046, "learning_rate": 9.996380496134372e-06, "loss": 17.9362, "step": 2285 }, { "epoch": 0.04178624307675435, "grad_norm": 8.978344297434703, "learning_rate": 9.996369226181678e-06, "loss": 18.6022, "step": 2286 }, { "epoch": 0.04180452227320087, "grad_norm": 9.632714862373605, "learning_rate": 9.996357938717144e-06, "loss": 18.3985, "step": 2287 }, { "epoch": 0.0418228014696474, "grad_norm": 8.054088454669444, "learning_rate": 9.996346633740809e-06, "loss": 18.1617, "step": 2288 }, { "epoch": 0.04184108066609392, "grad_norm": 8.577846213672203, "learning_rate": 9.996335311252712e-06, "loss": 18.3944, "step": 2289 }, { "epoch": 0.04185935986254044, "grad_norm": 6.67528905552185, "learning_rate": 9.996323971252895e-06, "loss": 17.6992, "step": 2290 }, { "epoch": 0.04187763905898697, "grad_norm": 8.042630167840407, "learning_rate": 9.996312613741394e-06, "loss": 18.6025, "step": 2291 }, { "epoch": 0.04189591825543349, "grad_norm": 7.633142920361117, "learning_rate": 9.996301238718251e-06, "loss": 18.0448, "step": 2292 }, { "epoch": 0.04191419745188001, "grad_norm": 8.627363254939468, "learning_rate": 9.996289846183506e-06, "loss": 18.4076, "step": 2293 }, { "epoch": 0.04193247664832654, "grad_norm": 6.953966791148869, "learning_rate": 9.9962784361372e-06, "loss": 17.5413, "step": 2294 }, { "epoch": 0.04195075584477306, "grad_norm": 8.570535225232478, "learning_rate": 9.99626700857937e-06, "loss": 18.5337, "step": 2295 }, { "epoch": 0.041969035041219585, "grad_norm": 7.60298214718083, "learning_rate": 9.996255563510059e-06, "loss": 17.8707, "step": 2296 }, { "epoch": 0.041987314237666114, "grad_norm": 8.817426993262535, "learning_rate": 9.996244100929305e-06, "loss": 18.5549, "step": 2297 }, { "epoch": 0.042005593434112636, "grad_norm": 8.051074390814165, "learning_rate": 9.99623262083715e-06, "loss": 18.0903, "step": 2298 }, { "epoch": 0.04202387263055916, "grad_norm": 6.914049677660477, "learning_rate": 9.996221123233631e-06, "loss": 17.652, "step": 2299 }, { "epoch": 0.042042151827005686, "grad_norm": 9.300933604045385, "learning_rate": 9.996209608118792e-06, "loss": 18.8076, "step": 2300 }, { "epoch": 0.04206043102345221, "grad_norm": 8.587086318109883, "learning_rate": 9.99619807549267e-06, "loss": 18.1632, "step": 2301 }, { "epoch": 0.042078710219898736, "grad_norm": 8.43519282951352, "learning_rate": 9.996186525355312e-06, "loss": 17.9335, "step": 2302 }, { "epoch": 0.04209698941634526, "grad_norm": 7.099712415454915, "learning_rate": 9.99617495770675e-06, "loss": 17.8879, "step": 2303 }, { "epoch": 0.04211526861279178, "grad_norm": 7.410752085634336, "learning_rate": 9.99616337254703e-06, "loss": 17.6165, "step": 2304 }, { "epoch": 0.04213354780923831, "grad_norm": 9.23145486560416, "learning_rate": 9.99615176987619e-06, "loss": 18.5385, "step": 2305 }, { "epoch": 0.04215182700568483, "grad_norm": 7.804166649928118, "learning_rate": 9.996140149694271e-06, "loss": 18.1071, "step": 2306 }, { "epoch": 0.04217010620213135, "grad_norm": 7.292123905585162, "learning_rate": 9.996128512001315e-06, "loss": 17.7422, "step": 2307 }, { "epoch": 0.04218838539857788, "grad_norm": 7.915688154902509, "learning_rate": 9.996116856797361e-06, "loss": 17.8606, "step": 2308 }, { "epoch": 0.0422066645950244, "grad_norm": 7.326183825319675, "learning_rate": 9.996105184082451e-06, "loss": 17.6888, "step": 2309 }, { "epoch": 0.042224943791470924, "grad_norm": 7.985166596550133, "learning_rate": 9.996093493856629e-06, "loss": 17.962, "step": 2310 }, { "epoch": 0.04224322298791745, "grad_norm": 8.03668699206688, "learning_rate": 9.996081786119932e-06, "loss": 18.0788, "step": 2311 }, { "epoch": 0.042261502184363975, "grad_norm": 7.6983489170030825, "learning_rate": 9.996070060872397e-06, "loss": 17.7543, "step": 2312 }, { "epoch": 0.0422797813808105, "grad_norm": 8.654245011201022, "learning_rate": 9.996058318114076e-06, "loss": 18.1858, "step": 2313 }, { "epoch": 0.042298060577257025, "grad_norm": 7.674808547428686, "learning_rate": 9.996046557845e-06, "loss": 17.8757, "step": 2314 }, { "epoch": 0.04231633977370355, "grad_norm": 7.9920460527959305, "learning_rate": 9.996034780065218e-06, "loss": 17.7592, "step": 2315 }, { "epoch": 0.04233461897015007, "grad_norm": 8.769256034609777, "learning_rate": 9.996022984774764e-06, "loss": 18.0602, "step": 2316 }, { "epoch": 0.0423528981665966, "grad_norm": 7.93222165293833, "learning_rate": 9.996011171973686e-06, "loss": 18.0744, "step": 2317 }, { "epoch": 0.04237117736304312, "grad_norm": 7.781045243938328, "learning_rate": 9.995999341662021e-06, "loss": 17.9564, "step": 2318 }, { "epoch": 0.04238945655948965, "grad_norm": 7.744615501885769, "learning_rate": 9.995987493839812e-06, "loss": 17.7038, "step": 2319 }, { "epoch": 0.04240773575593617, "grad_norm": 7.516691960954121, "learning_rate": 9.995975628507099e-06, "loss": 17.6575, "step": 2320 }, { "epoch": 0.04242601495238269, "grad_norm": 6.115400759637814, "learning_rate": 9.995963745663928e-06, "loss": 17.475, "step": 2321 }, { "epoch": 0.04244429414882922, "grad_norm": 7.22892817618116, "learning_rate": 9.995951845310334e-06, "loss": 17.5793, "step": 2322 }, { "epoch": 0.04246257334527574, "grad_norm": 8.062822196393663, "learning_rate": 9.995939927446366e-06, "loss": 18.0531, "step": 2323 }, { "epoch": 0.042480852541722264, "grad_norm": 7.660615550768664, "learning_rate": 9.995927992072058e-06, "loss": 17.7797, "step": 2324 }, { "epoch": 0.04249913173816879, "grad_norm": 7.982847840410579, "learning_rate": 9.995916039187458e-06, "loss": 18.2002, "step": 2325 }, { "epoch": 0.042517410934615314, "grad_norm": 8.785543921953549, "learning_rate": 9.995904068792607e-06, "loss": 18.5416, "step": 2326 }, { "epoch": 0.042535690131061836, "grad_norm": 7.097569729541003, "learning_rate": 9.995892080887545e-06, "loss": 17.6896, "step": 2327 }, { "epoch": 0.042553969327508365, "grad_norm": 8.176532027468705, "learning_rate": 9.995880075472315e-06, "loss": 18.2831, "step": 2328 }, { "epoch": 0.042572248523954886, "grad_norm": 7.885895150721695, "learning_rate": 9.995868052546957e-06, "loss": 17.9436, "step": 2329 }, { "epoch": 0.04259052772040141, "grad_norm": 6.775177913906577, "learning_rate": 9.995856012111517e-06, "loss": 17.3252, "step": 2330 }, { "epoch": 0.04260880691684794, "grad_norm": 7.94354048743229, "learning_rate": 9.995843954166036e-06, "loss": 17.9838, "step": 2331 }, { "epoch": 0.04262708611329446, "grad_norm": 7.764688523989066, "learning_rate": 9.995831878710553e-06, "loss": 17.9115, "step": 2332 }, { "epoch": 0.04264536530974098, "grad_norm": 7.409280094124277, "learning_rate": 9.995819785745113e-06, "loss": 17.729, "step": 2333 }, { "epoch": 0.04266364450618751, "grad_norm": 6.778335589173222, "learning_rate": 9.995807675269759e-06, "loss": 17.6421, "step": 2334 }, { "epoch": 0.04268192370263403, "grad_norm": 8.37586586816977, "learning_rate": 9.995795547284533e-06, "loss": 18.3069, "step": 2335 }, { "epoch": 0.04270020289908056, "grad_norm": 8.1383868168471, "learning_rate": 9.995783401789476e-06, "loss": 17.9825, "step": 2336 }, { "epoch": 0.04271848209552708, "grad_norm": 8.10989994308069, "learning_rate": 9.995771238784633e-06, "loss": 18.2562, "step": 2337 }, { "epoch": 0.0427367612919736, "grad_norm": 7.72664475872143, "learning_rate": 9.995759058270046e-06, "loss": 17.9149, "step": 2338 }, { "epoch": 0.04275504048842013, "grad_norm": 8.247015240804508, "learning_rate": 9.995746860245754e-06, "loss": 18.2216, "step": 2339 }, { "epoch": 0.042773319684866654, "grad_norm": 7.422653431185456, "learning_rate": 9.995734644711806e-06, "loss": 17.6408, "step": 2340 }, { "epoch": 0.042791598881313175, "grad_norm": 7.072127301139403, "learning_rate": 9.99572241166824e-06, "loss": 17.7117, "step": 2341 }, { "epoch": 0.042809878077759704, "grad_norm": 7.858912139734376, "learning_rate": 9.9957101611151e-06, "loss": 18.0305, "step": 2342 }, { "epoch": 0.042828157274206226, "grad_norm": 8.45495617543098, "learning_rate": 9.99569789305243e-06, "loss": 18.0969, "step": 2343 }, { "epoch": 0.04284643647065275, "grad_norm": 8.59786241900796, "learning_rate": 9.995685607480272e-06, "loss": 18.3043, "step": 2344 }, { "epoch": 0.042864715667099276, "grad_norm": 7.112209556935003, "learning_rate": 9.99567330439867e-06, "loss": 17.716, "step": 2345 }, { "epoch": 0.0428829948635458, "grad_norm": 7.4017512032226565, "learning_rate": 9.995660983807667e-06, "loss": 17.7608, "step": 2346 }, { "epoch": 0.04290127405999232, "grad_norm": 8.037185708433707, "learning_rate": 9.995648645707305e-06, "loss": 18.2411, "step": 2347 }, { "epoch": 0.04291955325643885, "grad_norm": 8.216894936542884, "learning_rate": 9.995636290097627e-06, "loss": 18.3719, "step": 2348 }, { "epoch": 0.04293783245288537, "grad_norm": 7.685407484316789, "learning_rate": 9.99562391697868e-06, "loss": 17.9064, "step": 2349 }, { "epoch": 0.04295611164933189, "grad_norm": 8.427764315977564, "learning_rate": 9.995611526350502e-06, "loss": 18.5452, "step": 2350 }, { "epoch": 0.04297439084577842, "grad_norm": 8.095761168162861, "learning_rate": 9.99559911821314e-06, "loss": 17.9802, "step": 2351 }, { "epoch": 0.04299267004222494, "grad_norm": 7.0316064907918445, "learning_rate": 9.995586692566636e-06, "loss": 17.8435, "step": 2352 }, { "epoch": 0.04301094923867147, "grad_norm": 7.839253788821011, "learning_rate": 9.995574249411035e-06, "loss": 17.8707, "step": 2353 }, { "epoch": 0.04302922843511799, "grad_norm": 7.38435360392192, "learning_rate": 9.99556178874638e-06, "loss": 17.7948, "step": 2354 }, { "epoch": 0.043047507631564515, "grad_norm": 7.911927060871132, "learning_rate": 9.995549310572714e-06, "loss": 18.0407, "step": 2355 }, { "epoch": 0.04306578682801104, "grad_norm": 7.234185652413484, "learning_rate": 9.995536814890081e-06, "loss": 17.8514, "step": 2356 }, { "epoch": 0.043084066024457565, "grad_norm": 7.87114672882479, "learning_rate": 9.995524301698525e-06, "loss": 17.9316, "step": 2357 }, { "epoch": 0.04310234522090409, "grad_norm": 6.701925047119131, "learning_rate": 9.995511770998089e-06, "loss": 17.5415, "step": 2358 }, { "epoch": 0.043120624417350616, "grad_norm": 7.237738766893789, "learning_rate": 9.99549922278882e-06, "loss": 17.8181, "step": 2359 }, { "epoch": 0.04313890361379714, "grad_norm": 8.366866841506923, "learning_rate": 9.995486657070758e-06, "loss": 18.3393, "step": 2360 }, { "epoch": 0.04315718281024366, "grad_norm": 8.159031238202944, "learning_rate": 9.99547407384395e-06, "loss": 18.1373, "step": 2361 }, { "epoch": 0.04317546200669019, "grad_norm": 9.519533536980633, "learning_rate": 9.99546147310844e-06, "loss": 19.0391, "step": 2362 }, { "epoch": 0.04319374120313671, "grad_norm": 7.203080784424371, "learning_rate": 9.995448854864267e-06, "loss": 17.6502, "step": 2363 }, { "epoch": 0.04321202039958323, "grad_norm": 7.785504030875423, "learning_rate": 9.99543621911148e-06, "loss": 18.0516, "step": 2364 }, { "epoch": 0.04323029959602976, "grad_norm": 7.418746620233205, "learning_rate": 9.995423565850125e-06, "loss": 17.7921, "step": 2365 }, { "epoch": 0.04324857879247628, "grad_norm": 6.901505175666439, "learning_rate": 9.995410895080242e-06, "loss": 17.5693, "step": 2366 }, { "epoch": 0.043266857988922804, "grad_norm": 7.735162149311585, "learning_rate": 9.995398206801878e-06, "loss": 17.93, "step": 2367 }, { "epoch": 0.04328513718536933, "grad_norm": 7.727478640094747, "learning_rate": 9.995385501015079e-06, "loss": 17.6187, "step": 2368 }, { "epoch": 0.043303416381815854, "grad_norm": 6.75990624060066, "learning_rate": 9.995372777719885e-06, "loss": 17.6831, "step": 2369 }, { "epoch": 0.04332169557826238, "grad_norm": 7.396815271837521, "learning_rate": 9.995360036916342e-06, "loss": 17.6261, "step": 2370 }, { "epoch": 0.043339974774708905, "grad_norm": 8.951021034027882, "learning_rate": 9.995347278604497e-06, "loss": 18.623, "step": 2371 }, { "epoch": 0.043358253971155426, "grad_norm": 7.478975138883387, "learning_rate": 9.995334502784392e-06, "loss": 17.8627, "step": 2372 }, { "epoch": 0.043376533167601955, "grad_norm": 7.210516005272581, "learning_rate": 9.995321709456074e-06, "loss": 17.6906, "step": 2373 }, { "epoch": 0.04339481236404848, "grad_norm": 8.089111675125123, "learning_rate": 9.995308898619587e-06, "loss": 17.8964, "step": 2374 }, { "epoch": 0.043413091560495, "grad_norm": 7.221725490241234, "learning_rate": 9.995296070274976e-06, "loss": 17.7029, "step": 2375 }, { "epoch": 0.04343137075694153, "grad_norm": 9.049432813964266, "learning_rate": 9.995283224422285e-06, "loss": 18.5432, "step": 2376 }, { "epoch": 0.04344964995338805, "grad_norm": 8.101618835608113, "learning_rate": 9.99527036106156e-06, "loss": 17.9293, "step": 2377 }, { "epoch": 0.04346792914983457, "grad_norm": 7.583140210100685, "learning_rate": 9.995257480192845e-06, "loss": 17.9971, "step": 2378 }, { "epoch": 0.0434862083462811, "grad_norm": 7.069738679577622, "learning_rate": 9.995244581816189e-06, "loss": 17.5991, "step": 2379 }, { "epoch": 0.04350448754272762, "grad_norm": 7.385570093641195, "learning_rate": 9.99523166593163e-06, "loss": 17.629, "step": 2380 }, { "epoch": 0.04352276673917414, "grad_norm": 7.691840493197007, "learning_rate": 9.99521873253922e-06, "loss": 17.9091, "step": 2381 }, { "epoch": 0.04354104593562067, "grad_norm": 8.304064472508092, "learning_rate": 9.995205781639001e-06, "loss": 18.22, "step": 2382 }, { "epoch": 0.04355932513206719, "grad_norm": 8.722371398782412, "learning_rate": 9.99519281323102e-06, "loss": 17.6181, "step": 2383 }, { "epoch": 0.043577604328513715, "grad_norm": 8.297329667723888, "learning_rate": 9.995179827315321e-06, "loss": 18.0572, "step": 2384 }, { "epoch": 0.043595883524960244, "grad_norm": 6.547568797748973, "learning_rate": 9.99516682389195e-06, "loss": 17.7223, "step": 2385 }, { "epoch": 0.043614162721406766, "grad_norm": 7.8858528862674016, "learning_rate": 9.995153802960952e-06, "loss": 18.2852, "step": 2386 }, { "epoch": 0.043632441917853294, "grad_norm": 7.346611184463637, "learning_rate": 9.995140764522377e-06, "loss": 17.7132, "step": 2387 }, { "epoch": 0.043650721114299816, "grad_norm": 7.232702754468648, "learning_rate": 9.995127708576265e-06, "loss": 17.7087, "step": 2388 }, { "epoch": 0.04366900031074634, "grad_norm": 7.8902377192310285, "learning_rate": 9.995114635122663e-06, "loss": 18.1182, "step": 2389 }, { "epoch": 0.04368727950719287, "grad_norm": 7.541585681087609, "learning_rate": 9.995101544161617e-06, "loss": 17.9787, "step": 2390 }, { "epoch": 0.04370555870363939, "grad_norm": 8.022977044455, "learning_rate": 9.995088435693174e-06, "loss": 18.0081, "step": 2391 }, { "epoch": 0.04372383790008591, "grad_norm": 9.649686114276376, "learning_rate": 9.995075309717382e-06, "loss": 18.4127, "step": 2392 }, { "epoch": 0.04374211709653244, "grad_norm": 9.461293606285105, "learning_rate": 9.995062166234281e-06, "loss": 18.5204, "step": 2393 }, { "epoch": 0.04376039629297896, "grad_norm": 7.099137168277874, "learning_rate": 9.995049005243922e-06, "loss": 18.0249, "step": 2394 }, { "epoch": 0.04377867548942548, "grad_norm": 7.888544589686886, "learning_rate": 9.995035826746351e-06, "loss": 18.1497, "step": 2395 }, { "epoch": 0.04379695468587201, "grad_norm": 8.461906939436192, "learning_rate": 9.99502263074161e-06, "loss": 17.9549, "step": 2396 }, { "epoch": 0.04381523388231853, "grad_norm": 8.681457125286627, "learning_rate": 9.99500941722975e-06, "loss": 18.5861, "step": 2397 }, { "epoch": 0.043833513078765055, "grad_norm": 7.982245776764294, "learning_rate": 9.994996186210818e-06, "loss": 17.9586, "step": 2398 }, { "epoch": 0.04385179227521158, "grad_norm": 6.70762930155129, "learning_rate": 9.994982937684854e-06, "loss": 17.6772, "step": 2399 }, { "epoch": 0.043870071471658105, "grad_norm": 7.625164098269424, "learning_rate": 9.994969671651908e-06, "loss": 18.307, "step": 2400 }, { "epoch": 0.04388835066810463, "grad_norm": 8.025152234520164, "learning_rate": 9.99495638811203e-06, "loss": 17.929, "step": 2401 }, { "epoch": 0.043906629864551155, "grad_norm": 7.188413988586524, "learning_rate": 9.99494308706526e-06, "loss": 17.5913, "step": 2402 }, { "epoch": 0.04392490906099768, "grad_norm": 7.566729513834296, "learning_rate": 9.99492976851165e-06, "loss": 18.0366, "step": 2403 }, { "epoch": 0.043943188257444206, "grad_norm": 7.22772701732747, "learning_rate": 9.994916432451245e-06, "loss": 17.6594, "step": 2404 }, { "epoch": 0.04396146745389073, "grad_norm": 8.347832930396576, "learning_rate": 9.99490307888409e-06, "loss": 18.0701, "step": 2405 }, { "epoch": 0.04397974665033725, "grad_norm": 7.283857657416851, "learning_rate": 9.994889707810234e-06, "loss": 17.6575, "step": 2406 }, { "epoch": 0.04399802584678378, "grad_norm": 6.820984119408071, "learning_rate": 9.99487631922972e-06, "loss": 17.2445, "step": 2407 }, { "epoch": 0.0440163050432303, "grad_norm": 7.403463314533847, "learning_rate": 9.994862913142602e-06, "loss": 18.2281, "step": 2408 }, { "epoch": 0.04403458423967682, "grad_norm": 7.991456749842262, "learning_rate": 9.994849489548921e-06, "loss": 18.057, "step": 2409 }, { "epoch": 0.04405286343612335, "grad_norm": 7.159303653286646, "learning_rate": 9.994836048448726e-06, "loss": 17.8026, "step": 2410 }, { "epoch": 0.04407114263256987, "grad_norm": 7.207852452805044, "learning_rate": 9.994822589842065e-06, "loss": 18.0369, "step": 2411 }, { "epoch": 0.044089421829016394, "grad_norm": 8.142630712018148, "learning_rate": 9.994809113728982e-06, "loss": 18.0258, "step": 2412 }, { "epoch": 0.04410770102546292, "grad_norm": 7.681017370419984, "learning_rate": 9.99479562010953e-06, "loss": 18.1285, "step": 2413 }, { "epoch": 0.044125980221909444, "grad_norm": 6.686508257741154, "learning_rate": 9.994782108983749e-06, "loss": 17.2982, "step": 2414 }, { "epoch": 0.044144259418355966, "grad_norm": 8.305098334149148, "learning_rate": 9.994768580351692e-06, "loss": 18.1322, "step": 2415 }, { "epoch": 0.044162538614802495, "grad_norm": 8.117129217177633, "learning_rate": 9.994755034213405e-06, "loss": 18.311, "step": 2416 }, { "epoch": 0.04418081781124902, "grad_norm": 7.380525097184161, "learning_rate": 9.994741470568937e-06, "loss": 17.8848, "step": 2417 }, { "epoch": 0.04419909700769554, "grad_norm": 7.3047276088671005, "learning_rate": 9.99472788941833e-06, "loss": 17.8573, "step": 2418 }, { "epoch": 0.04421737620414207, "grad_norm": 7.392153203517438, "learning_rate": 9.994714290761636e-06, "loss": 17.8191, "step": 2419 }, { "epoch": 0.04423565540058859, "grad_norm": 7.1426956309508896, "learning_rate": 9.994700674598902e-06, "loss": 17.4996, "step": 2420 }, { "epoch": 0.04425393459703512, "grad_norm": 8.566850220352459, "learning_rate": 9.994687040930176e-06, "loss": 17.8436, "step": 2421 }, { "epoch": 0.04427221379348164, "grad_norm": 9.009723701167259, "learning_rate": 9.994673389755504e-06, "loss": 18.4966, "step": 2422 }, { "epoch": 0.04429049298992816, "grad_norm": 7.648464419628126, "learning_rate": 9.994659721074937e-06, "loss": 17.5574, "step": 2423 }, { "epoch": 0.04430877218637469, "grad_norm": 6.899282785325929, "learning_rate": 9.99464603488852e-06, "loss": 17.5233, "step": 2424 }, { "epoch": 0.04432705138282121, "grad_norm": 7.419090401494869, "learning_rate": 9.994632331196303e-06, "loss": 17.6791, "step": 2425 }, { "epoch": 0.04434533057926773, "grad_norm": 7.856012202760776, "learning_rate": 9.994618609998333e-06, "loss": 17.9967, "step": 2426 }, { "epoch": 0.04436360977571426, "grad_norm": 7.899339983429512, "learning_rate": 9.994604871294658e-06, "loss": 18.1975, "step": 2427 }, { "epoch": 0.044381888972160784, "grad_norm": 7.842972356313221, "learning_rate": 9.994591115085324e-06, "loss": 18.1075, "step": 2428 }, { "epoch": 0.044400168168607305, "grad_norm": 8.647233203391728, "learning_rate": 9.994577341370384e-06, "loss": 18.4699, "step": 2429 }, { "epoch": 0.044418447365053834, "grad_norm": 8.191500746778265, "learning_rate": 9.994563550149884e-06, "loss": 18.016, "step": 2430 }, { "epoch": 0.044436726561500356, "grad_norm": 9.147080973632784, "learning_rate": 9.994549741423871e-06, "loss": 18.5194, "step": 2431 }, { "epoch": 0.04445500575794688, "grad_norm": 7.428488346981479, "learning_rate": 9.994535915192396e-06, "loss": 17.7912, "step": 2432 }, { "epoch": 0.044473284954393406, "grad_norm": 7.011024965235697, "learning_rate": 9.994522071455505e-06, "loss": 17.6894, "step": 2433 }, { "epoch": 0.04449156415083993, "grad_norm": 8.19376074623173, "learning_rate": 9.994508210213248e-06, "loss": 17.7326, "step": 2434 }, { "epoch": 0.04450984334728645, "grad_norm": 7.26414722027003, "learning_rate": 9.994494331465672e-06, "loss": 17.8731, "step": 2435 }, { "epoch": 0.04452812254373298, "grad_norm": 7.258459064949432, "learning_rate": 9.994480435212827e-06, "loss": 17.8062, "step": 2436 }, { "epoch": 0.0445464017401795, "grad_norm": 6.059524784370213, "learning_rate": 9.994466521454762e-06, "loss": 17.1452, "step": 2437 }, { "epoch": 0.04456468093662603, "grad_norm": 6.39379233735651, "learning_rate": 9.994452590191525e-06, "loss": 17.6071, "step": 2438 }, { "epoch": 0.04458296013307255, "grad_norm": 7.5348896456735615, "learning_rate": 9.994438641423166e-06, "loss": 18.2033, "step": 2439 }, { "epoch": 0.04460123932951907, "grad_norm": 6.765819290377159, "learning_rate": 9.994424675149733e-06, "loss": 17.7391, "step": 2440 }, { "epoch": 0.0446195185259656, "grad_norm": 8.045367878632184, "learning_rate": 9.994410691371274e-06, "loss": 18.264, "step": 2441 }, { "epoch": 0.04463779772241212, "grad_norm": 8.450665323988279, "learning_rate": 9.99439669008784e-06, "loss": 17.9337, "step": 2442 }, { "epoch": 0.044656076918858645, "grad_norm": 7.566173010961893, "learning_rate": 9.994382671299477e-06, "loss": 17.8623, "step": 2443 }, { "epoch": 0.044674356115305174, "grad_norm": 7.130095907079948, "learning_rate": 9.994368635006238e-06, "loss": 17.78, "step": 2444 }, { "epoch": 0.044692635311751695, "grad_norm": 7.27100666984323, "learning_rate": 9.99435458120817e-06, "loss": 17.8144, "step": 2445 }, { "epoch": 0.04471091450819822, "grad_norm": 7.489837115950441, "learning_rate": 9.994340509905321e-06, "loss": 18.0407, "step": 2446 }, { "epoch": 0.044729193704644746, "grad_norm": 7.612045002891485, "learning_rate": 9.994326421097744e-06, "loss": 18.1473, "step": 2447 }, { "epoch": 0.04474747290109127, "grad_norm": 7.642973655488191, "learning_rate": 9.994312314785486e-06, "loss": 18.1447, "step": 2448 }, { "epoch": 0.04476575209753779, "grad_norm": 6.7859223007249705, "learning_rate": 9.994298190968595e-06, "loss": 17.3404, "step": 2449 }, { "epoch": 0.04478403129398432, "grad_norm": 8.558894441486864, "learning_rate": 9.994284049647123e-06, "loss": 18.4606, "step": 2450 }, { "epoch": 0.04480231049043084, "grad_norm": 7.807862136108618, "learning_rate": 9.99426989082112e-06, "loss": 18.0718, "step": 2451 }, { "epoch": 0.04482058968687736, "grad_norm": 7.627670044603417, "learning_rate": 9.994255714490633e-06, "loss": 17.7779, "step": 2452 }, { "epoch": 0.04483886888332389, "grad_norm": 7.095676428920803, "learning_rate": 9.994241520655713e-06, "loss": 17.7749, "step": 2453 }, { "epoch": 0.04485714807977041, "grad_norm": 8.826666879389675, "learning_rate": 9.99422730931641e-06, "loss": 18.3998, "step": 2454 }, { "epoch": 0.04487542727621694, "grad_norm": 7.622749814638532, "learning_rate": 9.994213080472776e-06, "loss": 17.8817, "step": 2455 }, { "epoch": 0.04489370647266346, "grad_norm": 7.67749884483121, "learning_rate": 9.994198834124856e-06, "loss": 18.1708, "step": 2456 }, { "epoch": 0.044911985669109984, "grad_norm": 7.882270641978379, "learning_rate": 9.994184570272704e-06, "loss": 18.1039, "step": 2457 }, { "epoch": 0.04493026486555651, "grad_norm": 7.53739610912295, "learning_rate": 9.994170288916367e-06, "loss": 17.9699, "step": 2458 }, { "epoch": 0.044948544062003035, "grad_norm": 7.929458391148995, "learning_rate": 9.994155990055897e-06, "loss": 18.0522, "step": 2459 }, { "epoch": 0.044966823258449556, "grad_norm": 8.150928372404985, "learning_rate": 9.994141673691345e-06, "loss": 18.3842, "step": 2460 }, { "epoch": 0.044985102454896085, "grad_norm": 8.197250165025617, "learning_rate": 9.994127339822756e-06, "loss": 18.397, "step": 2461 }, { "epoch": 0.04500338165134261, "grad_norm": 7.0408254789572275, "learning_rate": 9.99411298845019e-06, "loss": 17.6103, "step": 2462 }, { "epoch": 0.04502166084778913, "grad_norm": 9.306036505095154, "learning_rate": 9.994098619573687e-06, "loss": 18.6617, "step": 2463 }, { "epoch": 0.04503994004423566, "grad_norm": 8.719533738544047, "learning_rate": 9.994084233193303e-06, "loss": 18.2127, "step": 2464 }, { "epoch": 0.04505821924068218, "grad_norm": 7.993337299543475, "learning_rate": 9.994069829309086e-06, "loss": 17.7749, "step": 2465 }, { "epoch": 0.0450764984371287, "grad_norm": 8.786320469873472, "learning_rate": 9.994055407921088e-06, "loss": 18.265, "step": 2466 }, { "epoch": 0.04509477763357523, "grad_norm": 8.088655571473474, "learning_rate": 9.99404096902936e-06, "loss": 18.0567, "step": 2467 }, { "epoch": 0.04511305683002175, "grad_norm": 10.785255980731446, "learning_rate": 9.99402651263395e-06, "loss": 18.8973, "step": 2468 }, { "epoch": 0.04513133602646827, "grad_norm": 6.947880353235863, "learning_rate": 9.994012038734912e-06, "loss": 17.7813, "step": 2469 }, { "epoch": 0.0451496152229148, "grad_norm": 8.40840965315626, "learning_rate": 9.993997547332295e-06, "loss": 18.0729, "step": 2470 }, { "epoch": 0.045167894419361324, "grad_norm": 7.141559747847017, "learning_rate": 9.993983038426149e-06, "loss": 17.7627, "step": 2471 }, { "epoch": 0.04518617361580785, "grad_norm": 8.083598178975093, "learning_rate": 9.993968512016528e-06, "loss": 18.0206, "step": 2472 }, { "epoch": 0.045204452812254374, "grad_norm": 7.941864410688481, "learning_rate": 9.993953968103479e-06, "loss": 17.7503, "step": 2473 }, { "epoch": 0.045222732008700896, "grad_norm": 8.276880607497981, "learning_rate": 9.993939406687055e-06, "loss": 18.1408, "step": 2474 }, { "epoch": 0.045241011205147424, "grad_norm": 7.494499518134069, "learning_rate": 9.993924827767306e-06, "loss": 17.717, "step": 2475 }, { "epoch": 0.045259290401593946, "grad_norm": 6.279519489638303, "learning_rate": 9.993910231344286e-06, "loss": 17.3121, "step": 2476 }, { "epoch": 0.04527756959804047, "grad_norm": 6.10448976291681, "learning_rate": 9.993895617418042e-06, "loss": 17.2826, "step": 2477 }, { "epoch": 0.045295848794487, "grad_norm": 8.485248276703604, "learning_rate": 9.993880985988629e-06, "loss": 18.2969, "step": 2478 }, { "epoch": 0.04531412799093352, "grad_norm": 6.956430929645586, "learning_rate": 9.993866337056095e-06, "loss": 17.6805, "step": 2479 }, { "epoch": 0.04533240718738004, "grad_norm": 7.577875930364536, "learning_rate": 9.993851670620494e-06, "loss": 17.957, "step": 2480 }, { "epoch": 0.04535068638382657, "grad_norm": 7.293365459040691, "learning_rate": 9.993836986681876e-06, "loss": 17.8977, "step": 2481 }, { "epoch": 0.04536896558027309, "grad_norm": 10.212845440385083, "learning_rate": 9.99382228524029e-06, "loss": 19.0827, "step": 2482 }, { "epoch": 0.04538724477671961, "grad_norm": 6.5640105097470505, "learning_rate": 9.993807566295793e-06, "loss": 17.4783, "step": 2483 }, { "epoch": 0.04540552397316614, "grad_norm": 7.88718427575359, "learning_rate": 9.993792829848433e-06, "loss": 17.909, "step": 2484 }, { "epoch": 0.04542380316961266, "grad_norm": 7.313171341929883, "learning_rate": 9.993778075898262e-06, "loss": 17.6938, "step": 2485 }, { "epoch": 0.045442082366059185, "grad_norm": 9.18976380206171, "learning_rate": 9.993763304445335e-06, "loss": 18.4729, "step": 2486 }, { "epoch": 0.04546036156250571, "grad_norm": 7.671585186971003, "learning_rate": 9.993748515489698e-06, "loss": 17.5632, "step": 2487 }, { "epoch": 0.045478640758952235, "grad_norm": 7.396646593191951, "learning_rate": 9.993733709031406e-06, "loss": 18.0158, "step": 2488 }, { "epoch": 0.045496919955398764, "grad_norm": 7.30338313789073, "learning_rate": 9.993718885070512e-06, "loss": 17.8184, "step": 2489 }, { "epoch": 0.045515199151845286, "grad_norm": 8.156573329646484, "learning_rate": 9.993704043607065e-06, "loss": 18.0749, "step": 2490 }, { "epoch": 0.04553347834829181, "grad_norm": 7.444852773785196, "learning_rate": 9.99368918464112e-06, "loss": 18.1151, "step": 2491 }, { "epoch": 0.045551757544738336, "grad_norm": 8.574863713539944, "learning_rate": 9.993674308172727e-06, "loss": 18.6178, "step": 2492 }, { "epoch": 0.04557003674118486, "grad_norm": 9.383076273075883, "learning_rate": 9.993659414201939e-06, "loss": 18.8024, "step": 2493 }, { "epoch": 0.04558831593763138, "grad_norm": 7.58726551821705, "learning_rate": 9.993644502728809e-06, "loss": 17.6787, "step": 2494 }, { "epoch": 0.04560659513407791, "grad_norm": 6.9906904839724175, "learning_rate": 9.993629573753387e-06, "loss": 17.6734, "step": 2495 }, { "epoch": 0.04562487433052443, "grad_norm": 7.602778161713747, "learning_rate": 9.993614627275728e-06, "loss": 18.0471, "step": 2496 }, { "epoch": 0.04564315352697095, "grad_norm": 6.743092129784845, "learning_rate": 9.993599663295883e-06, "loss": 17.7301, "step": 2497 }, { "epoch": 0.04566143272341748, "grad_norm": 6.698977437278408, "learning_rate": 9.993584681813904e-06, "loss": 17.4601, "step": 2498 }, { "epoch": 0.045679711919864, "grad_norm": 8.453281591316307, "learning_rate": 9.993569682829843e-06, "loss": 18.4345, "step": 2499 }, { "epoch": 0.045697991116310524, "grad_norm": 10.112200193544616, "learning_rate": 9.993554666343755e-06, "loss": 18.6083, "step": 2500 }, { "epoch": 0.04571627031275705, "grad_norm": 7.121410796053459, "learning_rate": 9.99353963235569e-06, "loss": 17.675, "step": 2501 }, { "epoch": 0.045734549509203574, "grad_norm": 7.625019759673931, "learning_rate": 9.993524580865704e-06, "loss": 17.769, "step": 2502 }, { "epoch": 0.045752828705650096, "grad_norm": 10.04571223137181, "learning_rate": 9.993509511873845e-06, "loss": 18.951, "step": 2503 }, { "epoch": 0.045771107902096625, "grad_norm": 8.376891770038641, "learning_rate": 9.99349442538017e-06, "loss": 18.0827, "step": 2504 }, { "epoch": 0.04578938709854315, "grad_norm": 7.862265599796088, "learning_rate": 9.99347932138473e-06, "loss": 17.9922, "step": 2505 }, { "epoch": 0.045807666294989675, "grad_norm": 8.124258938135004, "learning_rate": 9.993464199887578e-06, "loss": 18.638, "step": 2506 }, { "epoch": 0.0458259454914362, "grad_norm": 7.022653855067991, "learning_rate": 9.993449060888768e-06, "loss": 17.6937, "step": 2507 }, { "epoch": 0.04584422468788272, "grad_norm": 6.996402976687276, "learning_rate": 9.993433904388352e-06, "loss": 17.3463, "step": 2508 }, { "epoch": 0.04586250388432925, "grad_norm": 7.98813478789894, "learning_rate": 9.993418730386384e-06, "loss": 17.7135, "step": 2509 }, { "epoch": 0.04588078308077577, "grad_norm": 7.056808940985014, "learning_rate": 9.993403538882915e-06, "loss": 17.8142, "step": 2510 }, { "epoch": 0.04589906227722229, "grad_norm": 8.740012775396496, "learning_rate": 9.993388329878002e-06, "loss": 18.1227, "step": 2511 }, { "epoch": 0.04591734147366882, "grad_norm": 8.882864774096744, "learning_rate": 9.993373103371695e-06, "loss": 18.1953, "step": 2512 }, { "epoch": 0.04593562067011534, "grad_norm": 7.958453931071923, "learning_rate": 9.993357859364048e-06, "loss": 17.9278, "step": 2513 }, { "epoch": 0.04595389986656186, "grad_norm": 7.776292641995152, "learning_rate": 9.993342597855117e-06, "loss": 18.2712, "step": 2514 }, { "epoch": 0.04597217906300839, "grad_norm": 8.292059225346584, "learning_rate": 9.993327318844952e-06, "loss": 18.0476, "step": 2515 }, { "epoch": 0.045990458259454914, "grad_norm": 8.386415553531005, "learning_rate": 9.993312022333608e-06, "loss": 18.35, "step": 2516 }, { "epoch": 0.046008737455901436, "grad_norm": 8.045486169098941, "learning_rate": 9.99329670832114e-06, "loss": 18.2319, "step": 2517 }, { "epoch": 0.046027016652347964, "grad_norm": 9.232407530293376, "learning_rate": 9.993281376807598e-06, "loss": 18.4699, "step": 2518 }, { "epoch": 0.046045295848794486, "grad_norm": 7.8520732123053225, "learning_rate": 9.99326602779304e-06, "loss": 18.191, "step": 2519 }, { "epoch": 0.04606357504524101, "grad_norm": 7.16087588755416, "learning_rate": 9.993250661277519e-06, "loss": 17.6772, "step": 2520 }, { "epoch": 0.046081854241687537, "grad_norm": 8.187990817383524, "learning_rate": 9.993235277261084e-06, "loss": 18.0416, "step": 2521 }, { "epoch": 0.04610013343813406, "grad_norm": 7.291193162948097, "learning_rate": 9.993219875743796e-06, "loss": 17.7921, "step": 2522 }, { "epoch": 0.04611841263458059, "grad_norm": 7.269722958984533, "learning_rate": 9.993204456725703e-06, "loss": 17.7402, "step": 2523 }, { "epoch": 0.04613669183102711, "grad_norm": 9.39702523786115, "learning_rate": 9.993189020206863e-06, "loss": 18.7017, "step": 2524 }, { "epoch": 0.04615497102747363, "grad_norm": 8.422467668553633, "learning_rate": 9.99317356618733e-06, "loss": 18.2036, "step": 2525 }, { "epoch": 0.04617325022392016, "grad_norm": 7.692280567456548, "learning_rate": 9.993158094667155e-06, "loss": 17.9819, "step": 2526 }, { "epoch": 0.04619152942036668, "grad_norm": 8.8049564707133, "learning_rate": 9.993142605646395e-06, "loss": 18.1562, "step": 2527 }, { "epoch": 0.0462098086168132, "grad_norm": 7.517125465847618, "learning_rate": 9.993127099125104e-06, "loss": 17.8768, "step": 2528 }, { "epoch": 0.04622808781325973, "grad_norm": 7.10833132981136, "learning_rate": 9.993111575103334e-06, "loss": 17.7035, "step": 2529 }, { "epoch": 0.04624636700970625, "grad_norm": 8.616302291529108, "learning_rate": 9.993096033581143e-06, "loss": 18.3784, "step": 2530 }, { "epoch": 0.046264646206152775, "grad_norm": 8.768140196492558, "learning_rate": 9.993080474558584e-06, "loss": 18.3982, "step": 2531 }, { "epoch": 0.046282925402599304, "grad_norm": 8.069596110815922, "learning_rate": 9.993064898035709e-06, "loss": 18.495, "step": 2532 }, { "epoch": 0.046301204599045825, "grad_norm": 6.436102127592379, "learning_rate": 9.993049304012576e-06, "loss": 17.221, "step": 2533 }, { "epoch": 0.04631948379549235, "grad_norm": 9.965611577331051, "learning_rate": 9.993033692489239e-06, "loss": 19.1679, "step": 2534 }, { "epoch": 0.046337762991938876, "grad_norm": 8.699977371157395, "learning_rate": 9.993018063465751e-06, "loss": 18.0354, "step": 2535 }, { "epoch": 0.0463560421883854, "grad_norm": 7.766489544229284, "learning_rate": 9.993002416942171e-06, "loss": 18.1615, "step": 2536 }, { "epoch": 0.04637432138483192, "grad_norm": 7.966456947663706, "learning_rate": 9.992986752918548e-06, "loss": 18.0083, "step": 2537 }, { "epoch": 0.04639260058127845, "grad_norm": 7.8614001915835265, "learning_rate": 9.99297107139494e-06, "loss": 18.0716, "step": 2538 }, { "epoch": 0.04641087977772497, "grad_norm": 7.738265008664061, "learning_rate": 9.9929553723714e-06, "loss": 17.7186, "step": 2539 }, { "epoch": 0.0464291589741715, "grad_norm": 7.755710041382345, "learning_rate": 9.992939655847988e-06, "loss": 17.9862, "step": 2540 }, { "epoch": 0.04644743817061802, "grad_norm": 7.007740920005631, "learning_rate": 9.992923921824754e-06, "loss": 17.6481, "step": 2541 }, { "epoch": 0.04646571736706454, "grad_norm": 7.8676222835982506, "learning_rate": 9.992908170301755e-06, "loss": 17.9741, "step": 2542 }, { "epoch": 0.04648399656351107, "grad_norm": 8.11514801090296, "learning_rate": 9.992892401279047e-06, "loss": 18.0688, "step": 2543 }, { "epoch": 0.04650227575995759, "grad_norm": 7.784713886404579, "learning_rate": 9.992876614756684e-06, "loss": 18.4181, "step": 2544 }, { "epoch": 0.046520554956404114, "grad_norm": 7.55659106472757, "learning_rate": 9.992860810734722e-06, "loss": 17.849, "step": 2545 }, { "epoch": 0.04653883415285064, "grad_norm": 8.141532762416936, "learning_rate": 9.992844989213213e-06, "loss": 18.0762, "step": 2546 }, { "epoch": 0.046557113349297165, "grad_norm": 7.9981572386877495, "learning_rate": 9.99282915019222e-06, "loss": 18.0064, "step": 2547 }, { "epoch": 0.04657539254574369, "grad_norm": 5.807312095679033, "learning_rate": 9.992813293671792e-06, "loss": 17.2469, "step": 2548 }, { "epoch": 0.046593671742190215, "grad_norm": 8.14120649526761, "learning_rate": 9.992797419651988e-06, "loss": 18.2199, "step": 2549 }, { "epoch": 0.04661195093863674, "grad_norm": 7.693158337588116, "learning_rate": 9.99278152813286e-06, "loss": 17.988, "step": 2550 }, { "epoch": 0.04663023013508326, "grad_norm": 8.492286261870182, "learning_rate": 9.992765619114467e-06, "loss": 18.3276, "step": 2551 }, { "epoch": 0.04664850933152979, "grad_norm": 7.146893703259297, "learning_rate": 9.992749692596863e-06, "loss": 17.8245, "step": 2552 }, { "epoch": 0.04666678852797631, "grad_norm": 7.384159350830155, "learning_rate": 9.992733748580108e-06, "loss": 17.9852, "step": 2553 }, { "epoch": 0.04668506772442283, "grad_norm": 8.624144394164366, "learning_rate": 9.99271778706425e-06, "loss": 17.6288, "step": 2554 }, { "epoch": 0.04670334692086936, "grad_norm": 7.930266695448252, "learning_rate": 9.992701808049351e-06, "loss": 18.2687, "step": 2555 }, { "epoch": 0.04672162611731588, "grad_norm": 7.435136965216283, "learning_rate": 9.992685811535467e-06, "loss": 17.8033, "step": 2556 }, { "epoch": 0.04673990531376241, "grad_norm": 7.599543264066056, "learning_rate": 9.992669797522652e-06, "loss": 17.7481, "step": 2557 }, { "epoch": 0.04675818451020893, "grad_norm": 8.429798985553415, "learning_rate": 9.992653766010961e-06, "loss": 18.4653, "step": 2558 }, { "epoch": 0.046776463706655454, "grad_norm": 8.076173987447698, "learning_rate": 9.992637717000453e-06, "loss": 18.2065, "step": 2559 }, { "epoch": 0.04679474290310198, "grad_norm": 8.781536678777618, "learning_rate": 9.992621650491184e-06, "loss": 18.5883, "step": 2560 }, { "epoch": 0.046813022099548504, "grad_norm": 7.444958861920365, "learning_rate": 9.992605566483206e-06, "loss": 17.9761, "step": 2561 }, { "epoch": 0.046831301295995026, "grad_norm": 8.423244032545847, "learning_rate": 9.992589464976581e-06, "loss": 18.1584, "step": 2562 }, { "epoch": 0.046849580492441555, "grad_norm": 7.3249632914321054, "learning_rate": 9.992573345971364e-06, "loss": 18.007, "step": 2563 }, { "epoch": 0.046867859688888076, "grad_norm": 8.690883457836394, "learning_rate": 9.99255720946761e-06, "loss": 18.4969, "step": 2564 }, { "epoch": 0.0468861388853346, "grad_norm": 8.766408683723455, "learning_rate": 9.992541055465374e-06, "loss": 18.6329, "step": 2565 }, { "epoch": 0.04690441808178113, "grad_norm": 7.732559015449722, "learning_rate": 9.992524883964717e-06, "loss": 18.4675, "step": 2566 }, { "epoch": 0.04692269727822765, "grad_norm": 11.206808671649624, "learning_rate": 9.992508694965695e-06, "loss": 19.42, "step": 2567 }, { "epoch": 0.04694097647467417, "grad_norm": 6.774508301080423, "learning_rate": 9.99249248846836e-06, "loss": 17.5349, "step": 2568 }, { "epoch": 0.0469592556711207, "grad_norm": 7.27514549721159, "learning_rate": 9.992476264472774e-06, "loss": 17.8747, "step": 2569 }, { "epoch": 0.04697753486756722, "grad_norm": 7.423724748051973, "learning_rate": 9.992460022978991e-06, "loss": 17.8545, "step": 2570 }, { "epoch": 0.04699581406401374, "grad_norm": 8.096889005274987, "learning_rate": 9.99244376398707e-06, "loss": 18.4889, "step": 2571 }, { "epoch": 0.04701409326046027, "grad_norm": 7.674387346092054, "learning_rate": 9.992427487497065e-06, "loss": 18.1159, "step": 2572 }, { "epoch": 0.04703237245690679, "grad_norm": 7.788396641401589, "learning_rate": 9.992411193509037e-06, "loss": 17.8568, "step": 2573 }, { "epoch": 0.04705065165335332, "grad_norm": 7.8762417225446395, "learning_rate": 9.99239488202304e-06, "loss": 18.0088, "step": 2574 }, { "epoch": 0.047068930849799843, "grad_norm": 7.238786197429148, "learning_rate": 9.992378553039132e-06, "loss": 17.8319, "step": 2575 }, { "epoch": 0.047087210046246365, "grad_norm": 7.068213361844402, "learning_rate": 9.99236220655737e-06, "loss": 17.73, "step": 2576 }, { "epoch": 0.047105489242692894, "grad_norm": 8.780673471941459, "learning_rate": 9.992345842577814e-06, "loss": 18.3554, "step": 2577 }, { "epoch": 0.047123768439139416, "grad_norm": 8.159953183875249, "learning_rate": 9.992329461100517e-06, "loss": 17.8448, "step": 2578 }, { "epoch": 0.04714204763558594, "grad_norm": 7.723023562155313, "learning_rate": 9.992313062125538e-06, "loss": 17.7103, "step": 2579 }, { "epoch": 0.047160326832032466, "grad_norm": 7.616277240305632, "learning_rate": 9.992296645652934e-06, "loss": 17.9065, "step": 2580 }, { "epoch": 0.04717860602847899, "grad_norm": 7.571498465607509, "learning_rate": 9.992280211682766e-06, "loss": 17.9898, "step": 2581 }, { "epoch": 0.04719688522492551, "grad_norm": 9.024191217366022, "learning_rate": 9.992263760215089e-06, "loss": 18.402, "step": 2582 }, { "epoch": 0.04721516442137204, "grad_norm": 7.7871098173690125, "learning_rate": 9.992247291249959e-06, "loss": 18.2878, "step": 2583 }, { "epoch": 0.04723344361781856, "grad_norm": 8.830236557374969, "learning_rate": 9.992230804787438e-06, "loss": 17.8729, "step": 2584 }, { "epoch": 0.04725172281426508, "grad_norm": 7.122543761547274, "learning_rate": 9.992214300827579e-06, "loss": 17.8097, "step": 2585 }, { "epoch": 0.04727000201071161, "grad_norm": 7.615968526497754, "learning_rate": 9.992197779370443e-06, "loss": 17.6749, "step": 2586 }, { "epoch": 0.04728828120715813, "grad_norm": 6.58962357922287, "learning_rate": 9.992181240416086e-06, "loss": 17.2602, "step": 2587 }, { "epoch": 0.047306560403604654, "grad_norm": 7.1704096445101335, "learning_rate": 9.992164683964567e-06, "loss": 17.594, "step": 2588 }, { "epoch": 0.04732483960005118, "grad_norm": 7.340001869666006, "learning_rate": 9.992148110015944e-06, "loss": 17.9317, "step": 2589 }, { "epoch": 0.047343118796497705, "grad_norm": 7.094104168318126, "learning_rate": 9.992131518570276e-06, "loss": 17.5242, "step": 2590 }, { "epoch": 0.04736139799294423, "grad_norm": 8.661227238066143, "learning_rate": 9.99211490962762e-06, "loss": 18.1788, "step": 2591 }, { "epoch": 0.047379677189390755, "grad_norm": 9.758540631505968, "learning_rate": 9.992098283188034e-06, "loss": 18.3244, "step": 2592 }, { "epoch": 0.04739795638583728, "grad_norm": 7.213278668106901, "learning_rate": 9.992081639251578e-06, "loss": 17.8225, "step": 2593 }, { "epoch": 0.047416235582283806, "grad_norm": 7.203090426209105, "learning_rate": 9.992064977818307e-06, "loss": 17.7184, "step": 2594 }, { "epoch": 0.04743451477873033, "grad_norm": 7.4925183784994065, "learning_rate": 9.992048298888284e-06, "loss": 17.9008, "step": 2595 }, { "epoch": 0.04745279397517685, "grad_norm": 7.907230747099184, "learning_rate": 9.992031602461562e-06, "loss": 18.0021, "step": 2596 }, { "epoch": 0.04747107317162338, "grad_norm": 7.467349716623535, "learning_rate": 9.992014888538204e-06, "loss": 17.9413, "step": 2597 }, { "epoch": 0.0474893523680699, "grad_norm": 7.353131603693356, "learning_rate": 9.991998157118267e-06, "loss": 17.9257, "step": 2598 }, { "epoch": 0.04750763156451642, "grad_norm": 8.172457199217725, "learning_rate": 9.99198140820181e-06, "loss": 18.531, "step": 2599 }, { "epoch": 0.04752591076096295, "grad_norm": 7.448640737510543, "learning_rate": 9.991964641788892e-06, "loss": 17.7291, "step": 2600 }, { "epoch": 0.04754418995740947, "grad_norm": 7.752089224294541, "learning_rate": 9.991947857879569e-06, "loss": 18.0091, "step": 2601 }, { "epoch": 0.047562469153855993, "grad_norm": 8.286655052760677, "learning_rate": 9.991931056473902e-06, "loss": 18.1219, "step": 2602 }, { "epoch": 0.04758074835030252, "grad_norm": 7.051573843177877, "learning_rate": 9.991914237571952e-06, "loss": 17.877, "step": 2603 }, { "epoch": 0.047599027546749044, "grad_norm": 7.29651890949779, "learning_rate": 9.991897401173774e-06, "loss": 17.7525, "step": 2604 }, { "epoch": 0.047617306743195566, "grad_norm": 6.765985640866973, "learning_rate": 9.99188054727943e-06, "loss": 17.6648, "step": 2605 }, { "epoch": 0.047635585939642094, "grad_norm": 8.575109016142505, "learning_rate": 9.991863675888977e-06, "loss": 18.3309, "step": 2606 }, { "epoch": 0.047653865136088616, "grad_norm": 8.388452131734812, "learning_rate": 9.991846787002474e-06, "loss": 18.1312, "step": 2607 }, { "epoch": 0.047672144332535145, "grad_norm": 8.464403390656695, "learning_rate": 9.991829880619983e-06, "loss": 18.205, "step": 2608 }, { "epoch": 0.04769042352898167, "grad_norm": 7.73207336863152, "learning_rate": 9.99181295674156e-06, "loss": 18.0825, "step": 2609 }, { "epoch": 0.04770870272542819, "grad_norm": 7.234080259893069, "learning_rate": 9.991796015367267e-06, "loss": 17.675, "step": 2610 }, { "epoch": 0.04772698192187472, "grad_norm": 8.487742452379393, "learning_rate": 9.991779056497161e-06, "loss": 18.1511, "step": 2611 }, { "epoch": 0.04774526111832124, "grad_norm": 9.64341880092905, "learning_rate": 9.991762080131305e-06, "loss": 18.5887, "step": 2612 }, { "epoch": 0.04776354031476776, "grad_norm": 6.820414549309318, "learning_rate": 9.991745086269753e-06, "loss": 17.8486, "step": 2613 }, { "epoch": 0.04778181951121429, "grad_norm": 7.205085987664576, "learning_rate": 9.99172807491257e-06, "loss": 18.1047, "step": 2614 }, { "epoch": 0.04780009870766081, "grad_norm": 8.163994358227953, "learning_rate": 9.991711046059812e-06, "loss": 18.2374, "step": 2615 }, { "epoch": 0.04781837790410733, "grad_norm": 8.120598451868254, "learning_rate": 9.991693999711541e-06, "loss": 17.9739, "step": 2616 }, { "epoch": 0.04783665710055386, "grad_norm": 8.265683503522936, "learning_rate": 9.991676935867813e-06, "loss": 18.0476, "step": 2617 }, { "epoch": 0.04785493629700038, "grad_norm": 8.903684856768098, "learning_rate": 9.991659854528693e-06, "loss": 18.1277, "step": 2618 }, { "epoch": 0.047873215493446905, "grad_norm": 7.700767221730183, "learning_rate": 9.991642755694238e-06, "loss": 17.9356, "step": 2619 }, { "epoch": 0.047891494689893434, "grad_norm": 8.101046234456692, "learning_rate": 9.991625639364508e-06, "loss": 18.0207, "step": 2620 }, { "epoch": 0.047909773886339956, "grad_norm": 7.70355085855746, "learning_rate": 9.991608505539563e-06, "loss": 18.1949, "step": 2621 }, { "epoch": 0.04792805308278648, "grad_norm": 7.208674369499795, "learning_rate": 9.991591354219463e-06, "loss": 17.9747, "step": 2622 }, { "epoch": 0.047946332279233006, "grad_norm": 7.9057625499185615, "learning_rate": 9.991574185404269e-06, "loss": 18.1491, "step": 2623 }, { "epoch": 0.04796461147567953, "grad_norm": 8.137422512066872, "learning_rate": 9.99155699909404e-06, "loss": 17.7673, "step": 2624 }, { "epoch": 0.047982890672126056, "grad_norm": 6.801004677122794, "learning_rate": 9.991539795288839e-06, "loss": 17.7284, "step": 2625 }, { "epoch": 0.04800116986857258, "grad_norm": 7.036059525082331, "learning_rate": 9.991522573988723e-06, "loss": 17.792, "step": 2626 }, { "epoch": 0.0480194490650191, "grad_norm": 6.357666108609581, "learning_rate": 9.991505335193752e-06, "loss": 17.4053, "step": 2627 }, { "epoch": 0.04803772826146563, "grad_norm": 6.981203675681132, "learning_rate": 9.99148807890399e-06, "loss": 17.5025, "step": 2628 }, { "epoch": 0.04805600745791215, "grad_norm": 7.709372655406259, "learning_rate": 9.991470805119494e-06, "loss": 18.0022, "step": 2629 }, { "epoch": 0.04807428665435867, "grad_norm": 8.657897652446362, "learning_rate": 9.991453513840327e-06, "loss": 18.302, "step": 2630 }, { "epoch": 0.0480925658508052, "grad_norm": 8.339666057920908, "learning_rate": 9.991436205066549e-06, "loss": 18.4404, "step": 2631 }, { "epoch": 0.04811084504725172, "grad_norm": 8.137596756990114, "learning_rate": 9.991418878798219e-06, "loss": 18.1143, "step": 2632 }, { "epoch": 0.048129124243698244, "grad_norm": 7.829696040410456, "learning_rate": 9.991401535035398e-06, "loss": 17.6556, "step": 2633 }, { "epoch": 0.04814740344014477, "grad_norm": 7.93700230501803, "learning_rate": 9.99138417377815e-06, "loss": 17.9485, "step": 2634 }, { "epoch": 0.048165682636591295, "grad_norm": 7.596326532724841, "learning_rate": 9.991366795026533e-06, "loss": 17.8489, "step": 2635 }, { "epoch": 0.04818396183303782, "grad_norm": 7.7847163418671865, "learning_rate": 9.991349398780607e-06, "loss": 18.0203, "step": 2636 }, { "epoch": 0.048202241029484345, "grad_norm": 8.30779716323521, "learning_rate": 9.991331985040437e-06, "loss": 18.1566, "step": 2637 }, { "epoch": 0.04822052022593087, "grad_norm": 7.767510160412045, "learning_rate": 9.991314553806079e-06, "loss": 17.8402, "step": 2638 }, { "epoch": 0.04823879942237739, "grad_norm": 24.00078297240223, "learning_rate": 9.9912971050776e-06, "loss": 19.0444, "step": 2639 }, { "epoch": 0.04825707861882392, "grad_norm": 8.314372895709644, "learning_rate": 9.991279638855056e-06, "loss": 18.3847, "step": 2640 }, { "epoch": 0.04827535781527044, "grad_norm": 6.9919850584153265, "learning_rate": 9.991262155138507e-06, "loss": 18.0172, "step": 2641 }, { "epoch": 0.04829363701171697, "grad_norm": 7.086213799597908, "learning_rate": 9.991244653928021e-06, "loss": 17.582, "step": 2642 }, { "epoch": 0.04831191620816349, "grad_norm": 8.058177867690771, "learning_rate": 9.991227135223656e-06, "loss": 18.4592, "step": 2643 }, { "epoch": 0.04833019540461001, "grad_norm": 7.769721264013488, "learning_rate": 9.991209599025471e-06, "loss": 18.0455, "step": 2644 }, { "epoch": 0.04834847460105654, "grad_norm": 7.232355571336095, "learning_rate": 9.991192045333529e-06, "loss": 17.6612, "step": 2645 }, { "epoch": 0.04836675379750306, "grad_norm": 8.41234655633902, "learning_rate": 9.991174474147892e-06, "loss": 18.1701, "step": 2646 }, { "epoch": 0.048385032993949584, "grad_norm": 8.208951686838457, "learning_rate": 9.991156885468626e-06, "loss": 18.4221, "step": 2647 }, { "epoch": 0.04840331219039611, "grad_norm": 6.2405343761220315, "learning_rate": 9.991139279295784e-06, "loss": 17.4688, "step": 2648 }, { "epoch": 0.048421591386842634, "grad_norm": 8.138753409541787, "learning_rate": 9.991121655629433e-06, "loss": 18.0926, "step": 2649 }, { "epoch": 0.048439870583289156, "grad_norm": 7.509042346002413, "learning_rate": 9.991104014469633e-06, "loss": 18.0035, "step": 2650 }, { "epoch": 0.048458149779735685, "grad_norm": 9.058818337912584, "learning_rate": 9.991086355816445e-06, "loss": 18.5006, "step": 2651 }, { "epoch": 0.048476428976182206, "grad_norm": 7.074231101969972, "learning_rate": 9.991068679669936e-06, "loss": 17.4708, "step": 2652 }, { "epoch": 0.04849470817262873, "grad_norm": 8.563493350493948, "learning_rate": 9.991050986030163e-06, "loss": 18.3651, "step": 2653 }, { "epoch": 0.04851298736907526, "grad_norm": 7.195424460601591, "learning_rate": 9.991033274897189e-06, "loss": 17.8509, "step": 2654 }, { "epoch": 0.04853126656552178, "grad_norm": 7.36165428435057, "learning_rate": 9.991015546271076e-06, "loss": 17.838, "step": 2655 }, { "epoch": 0.0485495457619683, "grad_norm": 7.307704359588099, "learning_rate": 9.990997800151888e-06, "loss": 17.606, "step": 2656 }, { "epoch": 0.04856782495841483, "grad_norm": 7.144856676722134, "learning_rate": 9.990980036539683e-06, "loss": 17.7495, "step": 2657 }, { "epoch": 0.04858610415486135, "grad_norm": 8.362609901603209, "learning_rate": 9.99096225543453e-06, "loss": 18.3898, "step": 2658 }, { "epoch": 0.04860438335130788, "grad_norm": 7.095504670362594, "learning_rate": 9.990944456836485e-06, "loss": 17.6543, "step": 2659 }, { "epoch": 0.0486226625477544, "grad_norm": 7.662936816391292, "learning_rate": 9.990926640745612e-06, "loss": 17.8874, "step": 2660 }, { "epoch": 0.04864094174420092, "grad_norm": 7.528980619883442, "learning_rate": 9.990908807161976e-06, "loss": 17.9651, "step": 2661 }, { "epoch": 0.04865922094064745, "grad_norm": 7.575553019127141, "learning_rate": 9.990890956085638e-06, "loss": 18.044, "step": 2662 }, { "epoch": 0.048677500137093974, "grad_norm": 6.732173943432213, "learning_rate": 9.99087308751666e-06, "loss": 17.5589, "step": 2663 }, { "epoch": 0.048695779333540495, "grad_norm": 7.923031811798792, "learning_rate": 9.990855201455104e-06, "loss": 18.3133, "step": 2664 }, { "epoch": 0.048714058529987024, "grad_norm": 8.057810619771036, "learning_rate": 9.990837297901034e-06, "loss": 18.3014, "step": 2665 }, { "epoch": 0.048732337726433546, "grad_norm": 8.077171777686797, "learning_rate": 9.990819376854512e-06, "loss": 18.3852, "step": 2666 }, { "epoch": 0.04875061692288007, "grad_norm": 6.893345504430657, "learning_rate": 9.990801438315603e-06, "loss": 17.6469, "step": 2667 }, { "epoch": 0.048768896119326596, "grad_norm": 7.148773305250935, "learning_rate": 9.990783482284366e-06, "loss": 17.9787, "step": 2668 }, { "epoch": 0.04878717531577312, "grad_norm": 7.814760911182298, "learning_rate": 9.990765508760867e-06, "loss": 17.8342, "step": 2669 }, { "epoch": 0.04880545451221964, "grad_norm": 7.085645296399639, "learning_rate": 9.990747517745168e-06, "loss": 17.7406, "step": 2670 }, { "epoch": 0.04882373370866617, "grad_norm": 7.047769382770596, "learning_rate": 9.99072950923733e-06, "loss": 17.857, "step": 2671 }, { "epoch": 0.04884201290511269, "grad_norm": 8.144642378718615, "learning_rate": 9.99071148323742e-06, "loss": 18.29, "step": 2672 }, { "epoch": 0.04886029210155921, "grad_norm": 9.409388533866025, "learning_rate": 9.9906934397455e-06, "loss": 18.1425, "step": 2673 }, { "epoch": 0.04887857129800574, "grad_norm": 7.787783845583783, "learning_rate": 9.99067537876163e-06, "loss": 17.5491, "step": 2674 }, { "epoch": 0.04889685049445226, "grad_norm": 7.101698957878979, "learning_rate": 9.990657300285878e-06, "loss": 17.5371, "step": 2675 }, { "epoch": 0.04891512969089879, "grad_norm": 7.236285902937417, "learning_rate": 9.990639204318305e-06, "loss": 17.491, "step": 2676 }, { "epoch": 0.04893340888734531, "grad_norm": 6.455738269410737, "learning_rate": 9.990621090858974e-06, "loss": 17.3108, "step": 2677 }, { "epoch": 0.048951688083791835, "grad_norm": 7.938276799312635, "learning_rate": 9.99060295990795e-06, "loss": 18.1582, "step": 2678 }, { "epoch": 0.04896996728023836, "grad_norm": 7.514158271321571, "learning_rate": 9.990584811465295e-06, "loss": 17.871, "step": 2679 }, { "epoch": 0.048988246476684885, "grad_norm": 7.4360056843811755, "learning_rate": 9.990566645531074e-06, "loss": 17.8645, "step": 2680 }, { "epoch": 0.04900652567313141, "grad_norm": 7.44016188929528, "learning_rate": 9.99054846210535e-06, "loss": 17.5871, "step": 2681 }, { "epoch": 0.049024804869577936, "grad_norm": 7.422628097035798, "learning_rate": 9.990530261188185e-06, "loss": 17.8896, "step": 2682 }, { "epoch": 0.04904308406602446, "grad_norm": 8.248708541352205, "learning_rate": 9.990512042779646e-06, "loss": 18.092, "step": 2683 }, { "epoch": 0.04906136326247098, "grad_norm": 9.317770876223529, "learning_rate": 9.990493806879795e-06, "loss": 18.4137, "step": 2684 }, { "epoch": 0.04907964245891751, "grad_norm": 8.191308927578604, "learning_rate": 9.990475553488697e-06, "loss": 18.0061, "step": 2685 }, { "epoch": 0.04909792165536403, "grad_norm": 8.381473981870313, "learning_rate": 9.990457282606413e-06, "loss": 18.1359, "step": 2686 }, { "epoch": 0.04911620085181055, "grad_norm": 8.144436950856923, "learning_rate": 9.990438994233012e-06, "loss": 17.9591, "step": 2687 }, { "epoch": 0.04913448004825708, "grad_norm": 9.027632766190614, "learning_rate": 9.990420688368553e-06, "loss": 18.4355, "step": 2688 }, { "epoch": 0.0491527592447036, "grad_norm": 8.154001017546962, "learning_rate": 9.990402365013106e-06, "loss": 18.1974, "step": 2689 }, { "epoch": 0.049171038441150124, "grad_norm": 7.254699510203792, "learning_rate": 9.990384024166729e-06, "loss": 17.431, "step": 2690 }, { "epoch": 0.04918931763759665, "grad_norm": 6.519549556102076, "learning_rate": 9.990365665829489e-06, "loss": 17.2872, "step": 2691 }, { "epoch": 0.049207596834043174, "grad_norm": 8.202965101946765, "learning_rate": 9.99034729000145e-06, "loss": 18.3197, "step": 2692 }, { "epoch": 0.0492258760304897, "grad_norm": 6.895803622581931, "learning_rate": 9.990328896682678e-06, "loss": 17.5714, "step": 2693 }, { "epoch": 0.049244155226936225, "grad_norm": 6.846212451053361, "learning_rate": 9.990310485873236e-06, "loss": 17.408, "step": 2694 }, { "epoch": 0.049262434423382746, "grad_norm": 8.374596122436799, "learning_rate": 9.990292057573188e-06, "loss": 18.2443, "step": 2695 }, { "epoch": 0.049280713619829275, "grad_norm": 8.820118880435693, "learning_rate": 9.990273611782602e-06, "loss": 18.427, "step": 2696 }, { "epoch": 0.0492989928162758, "grad_norm": 7.0649054904768995, "learning_rate": 9.990255148501537e-06, "loss": 17.8637, "step": 2697 }, { "epoch": 0.04931727201272232, "grad_norm": 7.902482265181302, "learning_rate": 9.990236667730062e-06, "loss": 18.1622, "step": 2698 }, { "epoch": 0.04933555120916885, "grad_norm": 7.266759864899283, "learning_rate": 9.99021816946824e-06, "loss": 17.6958, "step": 2699 }, { "epoch": 0.04935383040561537, "grad_norm": 8.263652197088238, "learning_rate": 9.990199653716138e-06, "loss": 18.38, "step": 2700 }, { "epoch": 0.04937210960206189, "grad_norm": 8.477531153024145, "learning_rate": 9.990181120473816e-06, "loss": 18.3485, "step": 2701 }, { "epoch": 0.04939038879850842, "grad_norm": 7.514234356040806, "learning_rate": 9.990162569741346e-06, "loss": 17.9061, "step": 2702 }, { "epoch": 0.04940866799495494, "grad_norm": 8.407196745090438, "learning_rate": 9.990144001518787e-06, "loss": 18.3472, "step": 2703 }, { "epoch": 0.04942694719140146, "grad_norm": 7.354320770017759, "learning_rate": 9.990125415806207e-06, "loss": 17.6776, "step": 2704 }, { "epoch": 0.04944522638784799, "grad_norm": 7.396485600416532, "learning_rate": 9.990106812603671e-06, "loss": 17.9695, "step": 2705 }, { "epoch": 0.04946350558429451, "grad_norm": 7.814055803305334, "learning_rate": 9.990088191911242e-06, "loss": 18.1308, "step": 2706 }, { "epoch": 0.049481784780741035, "grad_norm": 7.437319022808082, "learning_rate": 9.990069553728986e-06, "loss": 17.9965, "step": 2707 }, { "epoch": 0.049500063977187564, "grad_norm": 7.669524625555445, "learning_rate": 9.990050898056973e-06, "loss": 18.1744, "step": 2708 }, { "epoch": 0.049518343173634086, "grad_norm": 7.536290606373893, "learning_rate": 9.99003222489526e-06, "loss": 18.0637, "step": 2709 }, { "epoch": 0.049536622370080614, "grad_norm": 8.743718557438587, "learning_rate": 9.99001353424392e-06, "loss": 18.8466, "step": 2710 }, { "epoch": 0.049554901566527136, "grad_norm": 8.740644797272857, "learning_rate": 9.989994826103017e-06, "loss": 18.5599, "step": 2711 }, { "epoch": 0.04957318076297366, "grad_norm": 6.463562885126097, "learning_rate": 9.989976100472612e-06, "loss": 17.3284, "step": 2712 }, { "epoch": 0.04959145995942019, "grad_norm": 7.729746826931038, "learning_rate": 9.989957357352775e-06, "loss": 18.1162, "step": 2713 }, { "epoch": 0.04960973915586671, "grad_norm": 8.397131288738244, "learning_rate": 9.989938596743571e-06, "loss": 18.1535, "step": 2714 }, { "epoch": 0.04962801835231323, "grad_norm": 7.299517582885823, "learning_rate": 9.989919818645066e-06, "loss": 17.8696, "step": 2715 }, { "epoch": 0.04964629754875976, "grad_norm": 6.118793825633256, "learning_rate": 9.989901023057323e-06, "loss": 17.3392, "step": 2716 }, { "epoch": 0.04966457674520628, "grad_norm": 8.087623894782654, "learning_rate": 9.989882209980411e-06, "loss": 18.0371, "step": 2717 }, { "epoch": 0.0496828559416528, "grad_norm": 7.182842784130826, "learning_rate": 9.989863379414395e-06, "loss": 17.953, "step": 2718 }, { "epoch": 0.04970113513809933, "grad_norm": 7.789297634414667, "learning_rate": 9.98984453135934e-06, "loss": 18.2924, "step": 2719 }, { "epoch": 0.04971941433454585, "grad_norm": 7.484815689910988, "learning_rate": 9.989825665815314e-06, "loss": 17.7452, "step": 2720 }, { "epoch": 0.049737693530992375, "grad_norm": 8.464123159713012, "learning_rate": 9.989806782782381e-06, "loss": 18.1465, "step": 2721 }, { "epoch": 0.0497559727274389, "grad_norm": 7.29323077246332, "learning_rate": 9.989787882260609e-06, "loss": 17.7274, "step": 2722 }, { "epoch": 0.049774251923885425, "grad_norm": 7.370170116436082, "learning_rate": 9.989768964250062e-06, "loss": 17.826, "step": 2723 }, { "epoch": 0.04979253112033195, "grad_norm": 8.555492267977986, "learning_rate": 9.98975002875081e-06, "loss": 18.3993, "step": 2724 }, { "epoch": 0.049810810316778475, "grad_norm": 7.738168820238301, "learning_rate": 9.989731075762916e-06, "loss": 18.149, "step": 2725 }, { "epoch": 0.049829089513225, "grad_norm": 6.684681111753755, "learning_rate": 9.989712105286446e-06, "loss": 17.6446, "step": 2726 }, { "epoch": 0.049847368709671526, "grad_norm": 7.423002418597444, "learning_rate": 9.98969311732147e-06, "loss": 17.9281, "step": 2727 }, { "epoch": 0.04986564790611805, "grad_norm": 7.046937825717382, "learning_rate": 9.989674111868052e-06, "loss": 17.6348, "step": 2728 }, { "epoch": 0.04988392710256457, "grad_norm": 7.3771534692869, "learning_rate": 9.98965508892626e-06, "loss": 17.5365, "step": 2729 }, { "epoch": 0.0499022062990111, "grad_norm": 7.3746896958532995, "learning_rate": 9.98963604849616e-06, "loss": 17.8783, "step": 2730 }, { "epoch": 0.04992048549545762, "grad_norm": 7.743231553670266, "learning_rate": 9.989616990577817e-06, "loss": 17.9597, "step": 2731 }, { "epoch": 0.04993876469190414, "grad_norm": 9.20320942441712, "learning_rate": 9.9895979151713e-06, "loss": 18.9336, "step": 2732 }, { "epoch": 0.04995704388835067, "grad_norm": 7.675649772850519, "learning_rate": 9.989578822276678e-06, "loss": 18.2667, "step": 2733 }, { "epoch": 0.04997532308479719, "grad_norm": 7.708890407167971, "learning_rate": 9.989559711894012e-06, "loss": 18.1385, "step": 2734 }, { "epoch": 0.049993602281243714, "grad_norm": 6.9503072081652215, "learning_rate": 9.989540584023374e-06, "loss": 17.496, "step": 2735 }, { "epoch": 0.05001188147769024, "grad_norm": 7.330806638692699, "learning_rate": 9.989521438664828e-06, "loss": 17.7176, "step": 2736 }, { "epoch": 0.050030160674136764, "grad_norm": 9.208583128486223, "learning_rate": 9.989502275818443e-06, "loss": 18.4525, "step": 2737 }, { "epoch": 0.050048439870583286, "grad_norm": 6.781366804392853, "learning_rate": 9.989483095484284e-06, "loss": 17.7662, "step": 2738 }, { "epoch": 0.050066719067029815, "grad_norm": 7.084519030626298, "learning_rate": 9.989463897662421e-06, "loss": 17.546, "step": 2739 }, { "epoch": 0.05008499826347634, "grad_norm": 6.586986390290971, "learning_rate": 9.98944468235292e-06, "loss": 17.7685, "step": 2740 }, { "epoch": 0.05010327745992286, "grad_norm": 9.214525055319351, "learning_rate": 9.989425449555848e-06, "loss": 18.6747, "step": 2741 }, { "epoch": 0.05012155665636939, "grad_norm": 8.345511864208909, "learning_rate": 9.989406199271273e-06, "loss": 18.0107, "step": 2742 }, { "epoch": 0.05013983585281591, "grad_norm": 6.7572346177421965, "learning_rate": 9.989386931499264e-06, "loss": 17.6164, "step": 2743 }, { "epoch": 0.05015811504926244, "grad_norm": 8.56372854986229, "learning_rate": 9.989367646239883e-06, "loss": 18.5043, "step": 2744 }, { "epoch": 0.05017639424570896, "grad_norm": 7.870519450132207, "learning_rate": 9.989348343493204e-06, "loss": 18.1654, "step": 2745 }, { "epoch": 0.05019467344215548, "grad_norm": 11.299407608036292, "learning_rate": 9.989329023259291e-06, "loss": 17.8543, "step": 2746 }, { "epoch": 0.05021295263860201, "grad_norm": 6.967217079294844, "learning_rate": 9.989309685538214e-06, "loss": 17.8087, "step": 2747 }, { "epoch": 0.05023123183504853, "grad_norm": 7.634909711788494, "learning_rate": 9.989290330330038e-06, "loss": 17.8961, "step": 2748 }, { "epoch": 0.05024951103149505, "grad_norm": 8.418568458532993, "learning_rate": 9.989270957634834e-06, "loss": 18.7163, "step": 2749 }, { "epoch": 0.05026779022794158, "grad_norm": 8.256148404351109, "learning_rate": 9.989251567452668e-06, "loss": 18.1897, "step": 2750 }, { "epoch": 0.050286069424388104, "grad_norm": 7.9624461708300975, "learning_rate": 9.989232159783606e-06, "loss": 17.9329, "step": 2751 }, { "epoch": 0.050304348620834625, "grad_norm": 8.20349321365802, "learning_rate": 9.98921273462772e-06, "loss": 17.9774, "step": 2752 }, { "epoch": 0.050322627817281154, "grad_norm": 8.250555558527648, "learning_rate": 9.989193291985076e-06, "loss": 18.4838, "step": 2753 }, { "epoch": 0.050340907013727676, "grad_norm": 7.451051830766465, "learning_rate": 9.98917383185574e-06, "loss": 17.7306, "step": 2754 }, { "epoch": 0.0503591862101742, "grad_norm": 7.542378455227766, "learning_rate": 9.989154354239787e-06, "loss": 18.0032, "step": 2755 }, { "epoch": 0.050377465406620726, "grad_norm": 8.392777781864194, "learning_rate": 9.989134859137279e-06, "loss": 18.193, "step": 2756 }, { "epoch": 0.05039574460306725, "grad_norm": 7.408888359247001, "learning_rate": 9.989115346548285e-06, "loss": 18.0566, "step": 2757 }, { "epoch": 0.05041402379951377, "grad_norm": 7.532589654429319, "learning_rate": 9.989095816472876e-06, "loss": 17.8037, "step": 2758 }, { "epoch": 0.0504323029959603, "grad_norm": 7.664168018715292, "learning_rate": 9.989076268911119e-06, "loss": 18.1644, "step": 2759 }, { "epoch": 0.05045058219240682, "grad_norm": 7.5215914971492115, "learning_rate": 9.989056703863083e-06, "loss": 17.9341, "step": 2760 }, { "epoch": 0.05046886138885335, "grad_norm": 7.206447022471866, "learning_rate": 9.989037121328835e-06, "loss": 17.9137, "step": 2761 }, { "epoch": 0.05048714058529987, "grad_norm": 7.76329719211878, "learning_rate": 9.989017521308447e-06, "loss": 18.292, "step": 2762 }, { "epoch": 0.05050541978174639, "grad_norm": 7.716220081439177, "learning_rate": 9.988997903801984e-06, "loss": 17.5243, "step": 2763 }, { "epoch": 0.05052369897819292, "grad_norm": 8.135237247062651, "learning_rate": 9.988978268809516e-06, "loss": 18.4224, "step": 2764 }, { "epoch": 0.05054197817463944, "grad_norm": 7.340949505261392, "learning_rate": 9.988958616331112e-06, "loss": 17.8489, "step": 2765 }, { "epoch": 0.050560257371085965, "grad_norm": 8.604957659993723, "learning_rate": 9.988938946366841e-06, "loss": 18.1277, "step": 2766 }, { "epoch": 0.050578536567532494, "grad_norm": 6.703904060868726, "learning_rate": 9.988919258916775e-06, "loss": 17.6016, "step": 2767 }, { "epoch": 0.050596815763979015, "grad_norm": 7.615904567530197, "learning_rate": 9.988899553980975e-06, "loss": 18.0835, "step": 2768 }, { "epoch": 0.05061509496042554, "grad_norm": 7.656346752005079, "learning_rate": 9.988879831559516e-06, "loss": 17.724, "step": 2769 }, { "epoch": 0.050633374156872066, "grad_norm": 7.6703645021028946, "learning_rate": 9.988860091652469e-06, "loss": 17.9447, "step": 2770 }, { "epoch": 0.05065165335331859, "grad_norm": 6.133725110298065, "learning_rate": 9.988840334259898e-06, "loss": 17.2218, "step": 2771 }, { "epoch": 0.05066993254976511, "grad_norm": 8.109487016435057, "learning_rate": 9.988820559381876e-06, "loss": 18.2571, "step": 2772 }, { "epoch": 0.05068821174621164, "grad_norm": 6.865556555909455, "learning_rate": 9.98880076701847e-06, "loss": 17.6203, "step": 2773 }, { "epoch": 0.05070649094265816, "grad_norm": 8.299023898912916, "learning_rate": 9.98878095716975e-06, "loss": 17.972, "step": 2774 }, { "epoch": 0.05072477013910468, "grad_norm": 7.050039866717872, "learning_rate": 9.988761129835787e-06, "loss": 17.6643, "step": 2775 }, { "epoch": 0.05074304933555121, "grad_norm": 7.070438246786039, "learning_rate": 9.988741285016648e-06, "loss": 17.748, "step": 2776 }, { "epoch": 0.05076132853199773, "grad_norm": 7.422909549351179, "learning_rate": 9.988721422712401e-06, "loss": 17.976, "step": 2777 }, { "epoch": 0.05077960772844426, "grad_norm": 7.5322755767621326, "learning_rate": 9.988701542923122e-06, "loss": 17.9389, "step": 2778 }, { "epoch": 0.05079788692489078, "grad_norm": 7.267922269520265, "learning_rate": 9.988681645648876e-06, "loss": 17.4306, "step": 2779 }, { "epoch": 0.050816166121337304, "grad_norm": 10.366644639631332, "learning_rate": 9.988661730889734e-06, "loss": 18.8708, "step": 2780 }, { "epoch": 0.05083444531778383, "grad_norm": 8.489072949808808, "learning_rate": 9.988641798645767e-06, "loss": 18.211, "step": 2781 }, { "epoch": 0.050852724514230355, "grad_norm": 7.94867403158134, "learning_rate": 9.98862184891704e-06, "loss": 17.809, "step": 2782 }, { "epoch": 0.050871003710676876, "grad_norm": 8.308052879184427, "learning_rate": 9.988601881703628e-06, "loss": 18.4109, "step": 2783 }, { "epoch": 0.050889282907123405, "grad_norm": 6.382505749570598, "learning_rate": 9.9885818970056e-06, "loss": 17.5987, "step": 2784 }, { "epoch": 0.05090756210356993, "grad_norm": 8.65878624516278, "learning_rate": 9.988561894823025e-06, "loss": 18.5557, "step": 2785 }, { "epoch": 0.05092584130001645, "grad_norm": 7.704704682550004, "learning_rate": 9.988541875155972e-06, "loss": 18.2683, "step": 2786 }, { "epoch": 0.05094412049646298, "grad_norm": 7.87920528871043, "learning_rate": 9.988521838004515e-06, "loss": 17.8086, "step": 2787 }, { "epoch": 0.0509623996929095, "grad_norm": 9.26821564888294, "learning_rate": 9.98850178336872e-06, "loss": 18.5718, "step": 2788 }, { "epoch": 0.05098067888935602, "grad_norm": 6.667709687642425, "learning_rate": 9.988481711248659e-06, "loss": 17.7336, "step": 2789 }, { "epoch": 0.05099895808580255, "grad_norm": 7.861762973631642, "learning_rate": 9.988461621644403e-06, "loss": 17.9559, "step": 2790 }, { "epoch": 0.05101723728224907, "grad_norm": 8.98523827817954, "learning_rate": 9.988441514556022e-06, "loss": 18.5481, "step": 2791 }, { "epoch": 0.05103551647869559, "grad_norm": 7.411424716591436, "learning_rate": 9.988421389983588e-06, "loss": 17.8808, "step": 2792 }, { "epoch": 0.05105379567514212, "grad_norm": 7.7931229957919035, "learning_rate": 9.988401247927167e-06, "loss": 17.8705, "step": 2793 }, { "epoch": 0.051072074871588644, "grad_norm": 8.198926676791432, "learning_rate": 9.988381088386833e-06, "loss": 18.1572, "step": 2794 }, { "epoch": 0.05109035406803517, "grad_norm": 7.361559633425119, "learning_rate": 9.988360911362658e-06, "loss": 17.7629, "step": 2795 }, { "epoch": 0.051108633264481694, "grad_norm": 8.045631817199665, "learning_rate": 9.98834071685471e-06, "loss": 18.3438, "step": 2796 }, { "epoch": 0.051126912460928216, "grad_norm": 6.775119038069602, "learning_rate": 9.98832050486306e-06, "loss": 17.5067, "step": 2797 }, { "epoch": 0.051145191657374744, "grad_norm": 7.169523145096916, "learning_rate": 9.988300275387781e-06, "loss": 17.9069, "step": 2798 }, { "epoch": 0.051163470853821266, "grad_norm": 9.458653998953265, "learning_rate": 9.98828002842894e-06, "loss": 18.2526, "step": 2799 }, { "epoch": 0.05118175005026779, "grad_norm": 7.221610231316801, "learning_rate": 9.988259763986613e-06, "loss": 17.6764, "step": 2800 }, { "epoch": 0.05120002924671432, "grad_norm": 8.594630950349286, "learning_rate": 9.988239482060867e-06, "loss": 18.0721, "step": 2801 }, { "epoch": 0.05121830844316084, "grad_norm": 7.934044726120394, "learning_rate": 9.988219182651775e-06, "loss": 18.1069, "step": 2802 }, { "epoch": 0.05123658763960736, "grad_norm": 9.296702673248296, "learning_rate": 9.988198865759406e-06, "loss": 18.4842, "step": 2803 }, { "epoch": 0.05125486683605389, "grad_norm": 7.743424084410392, "learning_rate": 9.988178531383834e-06, "loss": 18.0088, "step": 2804 }, { "epoch": 0.05127314603250041, "grad_norm": 7.314397756537535, "learning_rate": 9.98815817952513e-06, "loss": 17.5609, "step": 2805 }, { "epoch": 0.05129142522894693, "grad_norm": 8.346376439142999, "learning_rate": 9.988137810183363e-06, "loss": 18.2768, "step": 2806 }, { "epoch": 0.05130970442539346, "grad_norm": 7.676496545059605, "learning_rate": 9.988117423358607e-06, "loss": 18.0011, "step": 2807 }, { "epoch": 0.05132798362183998, "grad_norm": 6.422274098613539, "learning_rate": 9.988097019050932e-06, "loss": 17.4351, "step": 2808 }, { "epoch": 0.051346262818286505, "grad_norm": 8.137567816071021, "learning_rate": 9.98807659726041e-06, "loss": 17.8924, "step": 2809 }, { "epoch": 0.05136454201473303, "grad_norm": 8.880395227381214, "learning_rate": 9.988056157987111e-06, "loss": 18.3481, "step": 2810 }, { "epoch": 0.051382821211179555, "grad_norm": 7.717293646585981, "learning_rate": 9.988035701231108e-06, "loss": 18.0143, "step": 2811 }, { "epoch": 0.051401100407626084, "grad_norm": 7.959960217519171, "learning_rate": 9.988015226992476e-06, "loss": 18.2028, "step": 2812 }, { "epoch": 0.051419379604072606, "grad_norm": 7.950395785233589, "learning_rate": 9.98799473527128e-06, "loss": 18.3118, "step": 2813 }, { "epoch": 0.05143765880051913, "grad_norm": 7.384008026563201, "learning_rate": 9.987974226067597e-06, "loss": 17.6602, "step": 2814 }, { "epoch": 0.051455937996965656, "grad_norm": 9.229153952983667, "learning_rate": 9.987953699381496e-06, "loss": 18.5138, "step": 2815 }, { "epoch": 0.05147421719341218, "grad_norm": 8.544361818534913, "learning_rate": 9.98793315521305e-06, "loss": 19.0313, "step": 2816 }, { "epoch": 0.0514924963898587, "grad_norm": 8.774064474288327, "learning_rate": 9.987912593562332e-06, "loss": 17.9237, "step": 2817 }, { "epoch": 0.05151077558630523, "grad_norm": 7.47245073365734, "learning_rate": 9.987892014429414e-06, "loss": 17.6392, "step": 2818 }, { "epoch": 0.05152905478275175, "grad_norm": 7.947494112070558, "learning_rate": 9.987871417814365e-06, "loss": 18.1905, "step": 2819 }, { "epoch": 0.05154733397919827, "grad_norm": 7.694083457772169, "learning_rate": 9.987850803717262e-06, "loss": 17.829, "step": 2820 }, { "epoch": 0.0515656131756448, "grad_norm": 7.212210507516075, "learning_rate": 9.987830172138174e-06, "loss": 17.9155, "step": 2821 }, { "epoch": 0.05158389237209132, "grad_norm": 7.334533983864379, "learning_rate": 9.987809523077173e-06, "loss": 17.5396, "step": 2822 }, { "epoch": 0.051602171568537844, "grad_norm": 8.101411659752552, "learning_rate": 9.987788856534333e-06, "loss": 18.021, "step": 2823 }, { "epoch": 0.05162045076498437, "grad_norm": 6.834266760261926, "learning_rate": 9.987768172509727e-06, "loss": 17.5892, "step": 2824 }, { "epoch": 0.051638729961430894, "grad_norm": 7.733353573907947, "learning_rate": 9.987747471003424e-06, "loss": 18.3115, "step": 2825 }, { "epoch": 0.051657009157877416, "grad_norm": 7.736141934885637, "learning_rate": 9.9877267520155e-06, "loss": 18.0004, "step": 2826 }, { "epoch": 0.051675288354323945, "grad_norm": 7.272801249753164, "learning_rate": 9.987706015546027e-06, "loss": 17.8209, "step": 2827 }, { "epoch": 0.05169356755077047, "grad_norm": 6.3552622795233145, "learning_rate": 9.987685261595077e-06, "loss": 17.6643, "step": 2828 }, { "epoch": 0.051711846747216995, "grad_norm": 7.530113854779712, "learning_rate": 9.987664490162722e-06, "loss": 18.1216, "step": 2829 }, { "epoch": 0.05173012594366352, "grad_norm": 6.3255561749029425, "learning_rate": 9.987643701249038e-06, "loss": 17.3771, "step": 2830 }, { "epoch": 0.05174840514011004, "grad_norm": 7.601770793981465, "learning_rate": 9.987622894854094e-06, "loss": 17.9845, "step": 2831 }, { "epoch": 0.05176668433655657, "grad_norm": 7.655184473142678, "learning_rate": 9.987602070977966e-06, "loss": 17.804, "step": 2832 }, { "epoch": 0.05178496353300309, "grad_norm": 6.22552614974228, "learning_rate": 9.987581229620724e-06, "loss": 17.4074, "step": 2833 }, { "epoch": 0.05180324272944961, "grad_norm": 8.24580283423378, "learning_rate": 9.987560370782443e-06, "loss": 18.3363, "step": 2834 }, { "epoch": 0.05182152192589614, "grad_norm": 8.827466730094583, "learning_rate": 9.987539494463197e-06, "loss": 18.5687, "step": 2835 }, { "epoch": 0.05183980112234266, "grad_norm": 8.22822456444839, "learning_rate": 9.987518600663055e-06, "loss": 18.0483, "step": 2836 }, { "epoch": 0.05185808031878918, "grad_norm": 7.6572746032820636, "learning_rate": 9.987497689382095e-06, "loss": 17.8647, "step": 2837 }, { "epoch": 0.05187635951523571, "grad_norm": 7.507526556204351, "learning_rate": 9.987476760620389e-06, "loss": 17.7855, "step": 2838 }, { "epoch": 0.051894638711682234, "grad_norm": 8.800249578147918, "learning_rate": 9.987455814378008e-06, "loss": 18.3258, "step": 2839 }, { "epoch": 0.051912917908128756, "grad_norm": 8.583545120493428, "learning_rate": 9.98743485065503e-06, "loss": 18.4643, "step": 2840 }, { "epoch": 0.051931197104575284, "grad_norm": 8.589006782220283, "learning_rate": 9.987413869451522e-06, "loss": 18.3036, "step": 2841 }, { "epoch": 0.051949476301021806, "grad_norm": 8.02098650833697, "learning_rate": 9.987392870767563e-06, "loss": 18.0703, "step": 2842 }, { "epoch": 0.05196775549746833, "grad_norm": 8.222004790638712, "learning_rate": 9.987371854603226e-06, "loss": 18.2864, "step": 2843 }, { "epoch": 0.05198603469391486, "grad_norm": 6.197485706489133, "learning_rate": 9.987350820958581e-06, "loss": 17.4297, "step": 2844 }, { "epoch": 0.05200431389036138, "grad_norm": 7.830326125569888, "learning_rate": 9.987329769833705e-06, "loss": 18.1324, "step": 2845 }, { "epoch": 0.05202259308680791, "grad_norm": 8.706779415610775, "learning_rate": 9.987308701228672e-06, "loss": 18.2751, "step": 2846 }, { "epoch": 0.05204087228325443, "grad_norm": 6.159749869858416, "learning_rate": 9.987287615143554e-06, "loss": 17.5055, "step": 2847 }, { "epoch": 0.05205915147970095, "grad_norm": 8.608441703583498, "learning_rate": 9.987266511578425e-06, "loss": 18.2668, "step": 2848 }, { "epoch": 0.05207743067614748, "grad_norm": 8.791378746879808, "learning_rate": 9.98724539053336e-06, "loss": 18.3234, "step": 2849 }, { "epoch": 0.052095709872594, "grad_norm": 7.175952125316248, "learning_rate": 9.987224252008433e-06, "loss": 17.918, "step": 2850 }, { "epoch": 0.05211398906904052, "grad_norm": 8.17411494576387, "learning_rate": 9.987203096003719e-06, "loss": 18.7266, "step": 2851 }, { "epoch": 0.05213226826548705, "grad_norm": 7.924278562250412, "learning_rate": 9.98718192251929e-06, "loss": 18.2808, "step": 2852 }, { "epoch": 0.05215054746193357, "grad_norm": 7.0300478896758225, "learning_rate": 9.98716073155522e-06, "loss": 17.9663, "step": 2853 }, { "epoch": 0.052168826658380095, "grad_norm": 8.450063164139488, "learning_rate": 9.987139523111585e-06, "loss": 18.3254, "step": 2854 }, { "epoch": 0.052187105854826624, "grad_norm": 8.215549204227752, "learning_rate": 9.98711829718846e-06, "loss": 18.232, "step": 2855 }, { "epoch": 0.052205385051273145, "grad_norm": 7.8183822272817505, "learning_rate": 9.987097053785918e-06, "loss": 18.2791, "step": 2856 }, { "epoch": 0.05222366424771967, "grad_norm": 8.670558646100888, "learning_rate": 9.987075792904031e-06, "loss": 18.415, "step": 2857 }, { "epoch": 0.052241943444166196, "grad_norm": 8.412970962496319, "learning_rate": 9.987054514542878e-06, "loss": 18.25, "step": 2858 }, { "epoch": 0.05226022264061272, "grad_norm": 6.930987927455506, "learning_rate": 9.987033218702531e-06, "loss": 17.5139, "step": 2859 }, { "epoch": 0.05227850183705924, "grad_norm": 7.58864030339164, "learning_rate": 9.987011905383066e-06, "loss": 17.9744, "step": 2860 }, { "epoch": 0.05229678103350577, "grad_norm": 10.258257176125628, "learning_rate": 9.986990574584556e-06, "loss": 18.803, "step": 2861 }, { "epoch": 0.05231506022995229, "grad_norm": 7.413678639752427, "learning_rate": 9.986969226307078e-06, "loss": 17.7989, "step": 2862 }, { "epoch": 0.05233333942639882, "grad_norm": 7.445005414972241, "learning_rate": 9.986947860550705e-06, "loss": 17.9625, "step": 2863 }, { "epoch": 0.05235161862284534, "grad_norm": 7.4133344111490915, "learning_rate": 9.986926477315512e-06, "loss": 17.7285, "step": 2864 }, { "epoch": 0.05236989781929186, "grad_norm": 7.1495490687528624, "learning_rate": 9.986905076601577e-06, "loss": 17.9919, "step": 2865 }, { "epoch": 0.05238817701573839, "grad_norm": 7.945602880664831, "learning_rate": 9.98688365840897e-06, "loss": 18.034, "step": 2866 }, { "epoch": 0.05240645621218491, "grad_norm": 9.226799939576006, "learning_rate": 9.98686222273777e-06, "loss": 18.2153, "step": 2867 }, { "epoch": 0.052424735408631434, "grad_norm": 7.4125133452789544, "learning_rate": 9.986840769588049e-06, "loss": 17.8405, "step": 2868 }, { "epoch": 0.05244301460507796, "grad_norm": 8.488537991500818, "learning_rate": 9.986819298959885e-06, "loss": 18.0606, "step": 2869 }, { "epoch": 0.052461293801524485, "grad_norm": 6.820147326899155, "learning_rate": 9.986797810853352e-06, "loss": 17.4996, "step": 2870 }, { "epoch": 0.05247957299797101, "grad_norm": 5.829696315380732, "learning_rate": 9.986776305268524e-06, "loss": 17.2355, "step": 2871 }, { "epoch": 0.052497852194417535, "grad_norm": 6.216952987512384, "learning_rate": 9.986754782205479e-06, "loss": 17.3076, "step": 2872 }, { "epoch": 0.05251613139086406, "grad_norm": 7.184225084460174, "learning_rate": 9.986733241664291e-06, "loss": 17.6507, "step": 2873 }, { "epoch": 0.05253441058731058, "grad_norm": 7.666798363507448, "learning_rate": 9.986711683645036e-06, "loss": 17.8974, "step": 2874 }, { "epoch": 0.05255268978375711, "grad_norm": 8.129386680646322, "learning_rate": 9.98669010814779e-06, "loss": 18.1159, "step": 2875 }, { "epoch": 0.05257096898020363, "grad_norm": 8.878008739092836, "learning_rate": 9.986668515172626e-06, "loss": 18.3956, "step": 2876 }, { "epoch": 0.05258924817665015, "grad_norm": 8.609838043081584, "learning_rate": 9.986646904719624e-06, "loss": 17.837, "step": 2877 }, { "epoch": 0.05260752737309668, "grad_norm": 8.710736010767981, "learning_rate": 9.986625276788855e-06, "loss": 18.8199, "step": 2878 }, { "epoch": 0.0526258065695432, "grad_norm": 7.748052330501188, "learning_rate": 9.986603631380399e-06, "loss": 17.93, "step": 2879 }, { "epoch": 0.05264408576598973, "grad_norm": 7.966771545840098, "learning_rate": 9.986581968494328e-06, "loss": 18.3258, "step": 2880 }, { "epoch": 0.05266236496243625, "grad_norm": 7.6868080308718785, "learning_rate": 9.98656028813072e-06, "loss": 18.0548, "step": 2881 }, { "epoch": 0.052680644158882774, "grad_norm": 7.655713983096808, "learning_rate": 9.986538590289654e-06, "loss": 18.0637, "step": 2882 }, { "epoch": 0.0526989233553293, "grad_norm": 7.5684529254383595, "learning_rate": 9.9865168749712e-06, "loss": 17.9218, "step": 2883 }, { "epoch": 0.052717202551775824, "grad_norm": 7.6454991418926905, "learning_rate": 9.986495142175437e-06, "loss": 17.786, "step": 2884 }, { "epoch": 0.052735481748222346, "grad_norm": 7.733469324401486, "learning_rate": 9.986473391902442e-06, "loss": 18.0281, "step": 2885 }, { "epoch": 0.052753760944668875, "grad_norm": 7.64452382839804, "learning_rate": 9.986451624152291e-06, "loss": 17.9382, "step": 2886 }, { "epoch": 0.052772040141115396, "grad_norm": 8.626678024177481, "learning_rate": 9.986429838925059e-06, "loss": 18.3031, "step": 2887 }, { "epoch": 0.05279031933756192, "grad_norm": 7.30641898232523, "learning_rate": 9.986408036220825e-06, "loss": 17.8578, "step": 2888 }, { "epoch": 0.05280859853400845, "grad_norm": 8.222903766705286, "learning_rate": 9.986386216039661e-06, "loss": 18.5021, "step": 2889 }, { "epoch": 0.05282687773045497, "grad_norm": 6.821592759931198, "learning_rate": 9.986364378381647e-06, "loss": 17.5543, "step": 2890 }, { "epoch": 0.05284515692690149, "grad_norm": 5.638335855246105, "learning_rate": 9.986342523246857e-06, "loss": 17.3, "step": 2891 }, { "epoch": 0.05286343612334802, "grad_norm": 6.637079075331286, "learning_rate": 9.986320650635371e-06, "loss": 17.6331, "step": 2892 }, { "epoch": 0.05288171531979454, "grad_norm": 8.285688386237048, "learning_rate": 9.986298760547264e-06, "loss": 18.2569, "step": 2893 }, { "epoch": 0.05289999451624106, "grad_norm": 7.658564001180271, "learning_rate": 9.986276852982612e-06, "loss": 17.7906, "step": 2894 }, { "epoch": 0.05291827371268759, "grad_norm": 9.256670358946646, "learning_rate": 9.986254927941492e-06, "loss": 18.7537, "step": 2895 }, { "epoch": 0.05293655290913411, "grad_norm": 8.027355404343318, "learning_rate": 9.986232985423982e-06, "loss": 18.3691, "step": 2896 }, { "epoch": 0.05295483210558064, "grad_norm": 7.654962863757842, "learning_rate": 9.986211025430156e-06, "loss": 17.8887, "step": 2897 }, { "epoch": 0.052973111302027164, "grad_norm": 8.15685620262823, "learning_rate": 9.986189047960096e-06, "loss": 18.5433, "step": 2898 }, { "epoch": 0.052991390498473685, "grad_norm": 11.409212729323968, "learning_rate": 9.986167053013875e-06, "loss": 18.1699, "step": 2899 }, { "epoch": 0.053009669694920214, "grad_norm": 7.302661018768685, "learning_rate": 9.986145040591571e-06, "loss": 17.9199, "step": 2900 }, { "epoch": 0.053027948891366736, "grad_norm": 8.143621464920205, "learning_rate": 9.986123010693261e-06, "loss": 18.4157, "step": 2901 }, { "epoch": 0.05304622808781326, "grad_norm": 6.974448154848328, "learning_rate": 9.986100963319023e-06, "loss": 17.4241, "step": 2902 }, { "epoch": 0.053064507284259786, "grad_norm": 9.076493067839538, "learning_rate": 9.986078898468934e-06, "loss": 18.4846, "step": 2903 }, { "epoch": 0.05308278648070631, "grad_norm": 8.606778338151203, "learning_rate": 9.98605681614307e-06, "loss": 18.4125, "step": 2904 }, { "epoch": 0.05310106567715283, "grad_norm": 9.238450490429171, "learning_rate": 9.98603471634151e-06, "loss": 18.6629, "step": 2905 }, { "epoch": 0.05311934487359936, "grad_norm": 7.476422017962867, "learning_rate": 9.986012599064332e-06, "loss": 17.6207, "step": 2906 }, { "epoch": 0.05313762407004588, "grad_norm": 10.354709407007224, "learning_rate": 9.98599046431161e-06, "loss": 19.1014, "step": 2907 }, { "epoch": 0.0531559032664924, "grad_norm": 7.589139360560411, "learning_rate": 9.985968312083428e-06, "loss": 17.8751, "step": 2908 }, { "epoch": 0.05317418246293893, "grad_norm": 7.32388149339212, "learning_rate": 9.985946142379856e-06, "loss": 17.7151, "step": 2909 }, { "epoch": 0.05319246165938545, "grad_norm": 9.125049790803184, "learning_rate": 9.985923955200977e-06, "loss": 18.2955, "step": 2910 }, { "epoch": 0.053210740855831974, "grad_norm": 7.4579332937791945, "learning_rate": 9.985901750546867e-06, "loss": 17.7339, "step": 2911 }, { "epoch": 0.0532290200522785, "grad_norm": 8.629397393390434, "learning_rate": 9.985879528417604e-06, "loss": 18.2328, "step": 2912 }, { "epoch": 0.053247299248725025, "grad_norm": 8.475233804732257, "learning_rate": 9.985857288813266e-06, "loss": 17.9082, "step": 2913 }, { "epoch": 0.05326557844517155, "grad_norm": 7.094067999462129, "learning_rate": 9.985835031733931e-06, "loss": 17.638, "step": 2914 }, { "epoch": 0.053283857641618075, "grad_norm": 7.467000267979098, "learning_rate": 9.985812757179677e-06, "loss": 17.728, "step": 2915 }, { "epoch": 0.0533021368380646, "grad_norm": 8.54598987332618, "learning_rate": 9.98579046515058e-06, "loss": 18.105, "step": 2916 }, { "epoch": 0.053320416034511126, "grad_norm": 7.344890427722456, "learning_rate": 9.985768155646721e-06, "loss": 17.7487, "step": 2917 }, { "epoch": 0.05333869523095765, "grad_norm": 6.628978628649656, "learning_rate": 9.98574582866818e-06, "loss": 17.7106, "step": 2918 }, { "epoch": 0.05335697442740417, "grad_norm": 8.360264382365479, "learning_rate": 9.98572348421503e-06, "loss": 17.9406, "step": 2919 }, { "epoch": 0.0533752536238507, "grad_norm": 6.455080799676221, "learning_rate": 9.985701122287352e-06, "loss": 17.3594, "step": 2920 }, { "epoch": 0.05339353282029722, "grad_norm": 9.13581655350652, "learning_rate": 9.985678742885225e-06, "loss": 18.6982, "step": 2921 }, { "epoch": 0.05341181201674374, "grad_norm": 9.032337151984729, "learning_rate": 9.985656346008727e-06, "loss": 18.4182, "step": 2922 }, { "epoch": 0.05343009121319027, "grad_norm": 8.318073385313161, "learning_rate": 9.985633931657934e-06, "loss": 18.0261, "step": 2923 }, { "epoch": 0.05344837040963679, "grad_norm": 7.455914868336505, "learning_rate": 9.985611499832929e-06, "loss": 17.9919, "step": 2924 }, { "epoch": 0.053466649606083314, "grad_norm": 7.73762423130492, "learning_rate": 9.985589050533787e-06, "loss": 17.6016, "step": 2925 }, { "epoch": 0.05348492880252984, "grad_norm": 7.469676296186834, "learning_rate": 9.98556658376059e-06, "loss": 18.2103, "step": 2926 }, { "epoch": 0.053503207998976364, "grad_norm": 9.74712854445016, "learning_rate": 9.985544099513412e-06, "loss": 18.6005, "step": 2927 }, { "epoch": 0.053521487195422886, "grad_norm": 8.990621077448539, "learning_rate": 9.985521597792336e-06, "loss": 18.4498, "step": 2928 }, { "epoch": 0.053539766391869414, "grad_norm": 7.56040073408485, "learning_rate": 9.985499078597438e-06, "loss": 17.9899, "step": 2929 }, { "epoch": 0.053558045588315936, "grad_norm": 7.9332027847347515, "learning_rate": 9.9854765419288e-06, "loss": 18.2143, "step": 2930 }, { "epoch": 0.053576324784762465, "grad_norm": 8.469629512471046, "learning_rate": 9.985453987786498e-06, "loss": 18.0529, "step": 2931 }, { "epoch": 0.05359460398120899, "grad_norm": 7.188217590980575, "learning_rate": 9.985431416170615e-06, "loss": 17.7905, "step": 2932 }, { "epoch": 0.05361288317765551, "grad_norm": 7.1093033521734235, "learning_rate": 9.985408827081225e-06, "loss": 17.8705, "step": 2933 }, { "epoch": 0.05363116237410204, "grad_norm": 7.80175291704277, "learning_rate": 9.98538622051841e-06, "loss": 18.183, "step": 2934 }, { "epoch": 0.05364944157054856, "grad_norm": 6.68863290206066, "learning_rate": 9.98536359648225e-06, "loss": 17.4041, "step": 2935 }, { "epoch": 0.05366772076699508, "grad_norm": 9.240501255887352, "learning_rate": 9.985340954972822e-06, "loss": 18.7026, "step": 2936 }, { "epoch": 0.05368599996344161, "grad_norm": 6.466188007149112, "learning_rate": 9.985318295990207e-06, "loss": 17.3034, "step": 2937 }, { "epoch": 0.05370427915988813, "grad_norm": 7.41776976246194, "learning_rate": 9.985295619534482e-06, "loss": 17.8361, "step": 2938 }, { "epoch": 0.05372255835633465, "grad_norm": 7.012147727360407, "learning_rate": 9.985272925605732e-06, "loss": 17.793, "step": 2939 }, { "epoch": 0.05374083755278118, "grad_norm": 7.991114266886512, "learning_rate": 9.985250214204032e-06, "loss": 18.0574, "step": 2940 }, { "epoch": 0.0537591167492277, "grad_norm": 9.579076946959471, "learning_rate": 9.985227485329461e-06, "loss": 18.7311, "step": 2941 }, { "epoch": 0.053777395945674225, "grad_norm": 7.725627071464176, "learning_rate": 9.985204738982102e-06, "loss": 17.8584, "step": 2942 }, { "epoch": 0.053795675142120754, "grad_norm": 8.69365900468432, "learning_rate": 9.985181975162032e-06, "loss": 18.2724, "step": 2943 }, { "epoch": 0.053813954338567276, "grad_norm": 9.613125557602531, "learning_rate": 9.985159193869333e-06, "loss": 19.1958, "step": 2944 }, { "epoch": 0.053832233535013804, "grad_norm": 6.2900735210417436, "learning_rate": 9.985136395104082e-06, "loss": 17.5073, "step": 2945 }, { "epoch": 0.053850512731460326, "grad_norm": 6.6392678417274755, "learning_rate": 9.98511357886636e-06, "loss": 17.6563, "step": 2946 }, { "epoch": 0.05386879192790685, "grad_norm": 7.664506189258505, "learning_rate": 9.985090745156249e-06, "loss": 17.8886, "step": 2947 }, { "epoch": 0.053887071124353376, "grad_norm": 9.27521136601304, "learning_rate": 9.985067893973828e-06, "loss": 18.4531, "step": 2948 }, { "epoch": 0.0539053503207999, "grad_norm": 10.37789530787545, "learning_rate": 9.985045025319175e-06, "loss": 18.9894, "step": 2949 }, { "epoch": 0.05392362951724642, "grad_norm": 7.78415898265666, "learning_rate": 9.985022139192372e-06, "loss": 17.8419, "step": 2950 }, { "epoch": 0.05394190871369295, "grad_norm": 7.510278415092891, "learning_rate": 9.984999235593502e-06, "loss": 17.9377, "step": 2951 }, { "epoch": 0.05396018791013947, "grad_norm": 6.68026717118842, "learning_rate": 9.984976314522638e-06, "loss": 17.4353, "step": 2952 }, { "epoch": 0.05397846710658599, "grad_norm": 7.087524709653039, "learning_rate": 9.984953375979868e-06, "loss": 17.7807, "step": 2953 }, { "epoch": 0.05399674630303252, "grad_norm": 8.411742752976526, "learning_rate": 9.984930419965266e-06, "loss": 18.2082, "step": 2954 }, { "epoch": 0.05401502549947904, "grad_norm": 7.812448934713648, "learning_rate": 9.984907446478918e-06, "loss": 18.1363, "step": 2955 }, { "epoch": 0.054033304695925564, "grad_norm": 8.044356207894898, "learning_rate": 9.9848844555209e-06, "loss": 18.2735, "step": 2956 }, { "epoch": 0.05405158389237209, "grad_norm": 7.736509632554892, "learning_rate": 9.984861447091296e-06, "loss": 18.1222, "step": 2957 }, { "epoch": 0.054069863088818615, "grad_norm": 5.933792456814279, "learning_rate": 9.984838421190184e-06, "loss": 17.1622, "step": 2958 }, { "epoch": 0.05408814228526514, "grad_norm": 7.904815188668401, "learning_rate": 9.984815377817648e-06, "loss": 17.5206, "step": 2959 }, { "epoch": 0.054106421481711665, "grad_norm": 7.295836961563647, "learning_rate": 9.984792316973765e-06, "loss": 17.8553, "step": 2960 }, { "epoch": 0.05412470067815819, "grad_norm": 8.1689220413142, "learning_rate": 9.984769238658617e-06, "loss": 18.0965, "step": 2961 }, { "epoch": 0.054142979874604716, "grad_norm": 7.841275180485553, "learning_rate": 9.984746142872287e-06, "loss": 18.3405, "step": 2962 }, { "epoch": 0.05416125907105124, "grad_norm": 6.116042868616801, "learning_rate": 9.984723029614853e-06, "loss": 17.2451, "step": 2963 }, { "epoch": 0.05417953826749776, "grad_norm": 7.649454684530826, "learning_rate": 9.984699898886397e-06, "loss": 17.9682, "step": 2964 }, { "epoch": 0.05419781746394429, "grad_norm": 8.02078081864306, "learning_rate": 9.984676750687e-06, "loss": 18.1302, "step": 2965 }, { "epoch": 0.05421609666039081, "grad_norm": 6.873682823028608, "learning_rate": 9.984653585016747e-06, "loss": 17.6275, "step": 2966 }, { "epoch": 0.05423437585683733, "grad_norm": 7.627279055780362, "learning_rate": 9.984630401875712e-06, "loss": 17.8492, "step": 2967 }, { "epoch": 0.05425265505328386, "grad_norm": 7.2484768612040185, "learning_rate": 9.984607201263983e-06, "loss": 17.9409, "step": 2968 }, { "epoch": 0.05427093424973038, "grad_norm": 8.53649524945606, "learning_rate": 9.984583983181634e-06, "loss": 18.4741, "step": 2969 }, { "epoch": 0.054289213446176904, "grad_norm": 7.404241934179543, "learning_rate": 9.984560747628755e-06, "loss": 17.9715, "step": 2970 }, { "epoch": 0.05430749264262343, "grad_norm": 7.885455730551289, "learning_rate": 9.984537494605422e-06, "loss": 18.206, "step": 2971 }, { "epoch": 0.054325771839069954, "grad_norm": 7.419783122451107, "learning_rate": 9.984514224111717e-06, "loss": 18.0498, "step": 2972 }, { "epoch": 0.054344051035516476, "grad_norm": 7.201191579877522, "learning_rate": 9.984490936147722e-06, "loss": 17.7067, "step": 2973 }, { "epoch": 0.054362330231963005, "grad_norm": 7.263656909714293, "learning_rate": 9.98446763071352e-06, "loss": 17.8044, "step": 2974 }, { "epoch": 0.054380609428409526, "grad_norm": 7.688869607832282, "learning_rate": 9.984444307809189e-06, "loss": 18.0741, "step": 2975 }, { "epoch": 0.05439888862485605, "grad_norm": 7.933950074273668, "learning_rate": 9.984420967434815e-06, "loss": 18.2206, "step": 2976 }, { "epoch": 0.05441716782130258, "grad_norm": 7.689423644462968, "learning_rate": 9.984397609590478e-06, "loss": 17.866, "step": 2977 }, { "epoch": 0.0544354470177491, "grad_norm": 6.19100681423261, "learning_rate": 9.98437423427626e-06, "loss": 17.2061, "step": 2978 }, { "epoch": 0.05445372621419563, "grad_norm": 7.220269564326452, "learning_rate": 9.984350841492243e-06, "loss": 17.8103, "step": 2979 }, { "epoch": 0.05447200541064215, "grad_norm": 7.535332315801858, "learning_rate": 9.984327431238508e-06, "loss": 17.9382, "step": 2980 }, { "epoch": 0.05449028460708867, "grad_norm": 8.61754436364827, "learning_rate": 9.984304003515137e-06, "loss": 17.9783, "step": 2981 }, { "epoch": 0.0545085638035352, "grad_norm": 6.6467237653339, "learning_rate": 9.984280558322215e-06, "loss": 17.4914, "step": 2982 }, { "epoch": 0.05452684299998172, "grad_norm": 8.041593626098898, "learning_rate": 9.984257095659821e-06, "loss": 17.8183, "step": 2983 }, { "epoch": 0.05454512219642824, "grad_norm": 8.110138118165157, "learning_rate": 9.98423361552804e-06, "loss": 17.7319, "step": 2984 }, { "epoch": 0.05456340139287477, "grad_norm": 6.615120623978179, "learning_rate": 9.984210117926952e-06, "loss": 17.3877, "step": 2985 }, { "epoch": 0.054581680589321294, "grad_norm": 7.320762110935072, "learning_rate": 9.984186602856639e-06, "loss": 17.9738, "step": 2986 }, { "epoch": 0.054599959785767815, "grad_norm": 6.925733595848153, "learning_rate": 9.984163070317186e-06, "loss": 17.7418, "step": 2987 }, { "epoch": 0.054618238982214344, "grad_norm": 7.202411637507689, "learning_rate": 9.984139520308672e-06, "loss": 18.0995, "step": 2988 }, { "epoch": 0.054636518178660866, "grad_norm": 7.164653278773729, "learning_rate": 9.984115952831182e-06, "loss": 17.8108, "step": 2989 }, { "epoch": 0.05465479737510739, "grad_norm": 8.897122619932148, "learning_rate": 9.9840923678848e-06, "loss": 18.1692, "step": 2990 }, { "epoch": 0.054673076571553916, "grad_norm": 7.030115125998609, "learning_rate": 9.984068765469603e-06, "loss": 17.8911, "step": 2991 }, { "epoch": 0.05469135576800044, "grad_norm": 7.308291531501065, "learning_rate": 9.984045145585681e-06, "loss": 17.8525, "step": 2992 }, { "epoch": 0.05470963496444696, "grad_norm": 9.113822979077941, "learning_rate": 9.984021508233111e-06, "loss": 18.4587, "step": 2993 }, { "epoch": 0.05472791416089349, "grad_norm": 7.429441521159165, "learning_rate": 9.98399785341198e-06, "loss": 17.9046, "step": 2994 }, { "epoch": 0.05474619335734001, "grad_norm": 6.609227078282274, "learning_rate": 9.983974181122368e-06, "loss": 17.3922, "step": 2995 }, { "epoch": 0.05476447255378654, "grad_norm": 7.506487250012753, "learning_rate": 9.98395049136436e-06, "loss": 17.6925, "step": 2996 }, { "epoch": 0.05478275175023306, "grad_norm": 7.4518922699203864, "learning_rate": 9.983926784138036e-06, "loss": 17.8667, "step": 2997 }, { "epoch": 0.05480103094667958, "grad_norm": 8.356611829728001, "learning_rate": 9.983903059443482e-06, "loss": 18.1327, "step": 2998 }, { "epoch": 0.05481931014312611, "grad_norm": 8.82366343581557, "learning_rate": 9.98387931728078e-06, "loss": 18.3259, "step": 2999 }, { "epoch": 0.05483758933957263, "grad_norm": 8.375593874272335, "learning_rate": 9.983855557650015e-06, "loss": 18.6367, "step": 3000 }, { "epoch": 0.054855868536019155, "grad_norm": 6.387070611131344, "learning_rate": 9.983831780551268e-06, "loss": 17.3594, "step": 3001 }, { "epoch": 0.05487414773246568, "grad_norm": 7.948890739914074, "learning_rate": 9.983807985984621e-06, "loss": 18.0505, "step": 3002 }, { "epoch": 0.054892426928912205, "grad_norm": 7.114125273937795, "learning_rate": 9.983784173950163e-06, "loss": 17.6408, "step": 3003 }, { "epoch": 0.05491070612535873, "grad_norm": 7.006302790073139, "learning_rate": 9.983760344447972e-06, "loss": 17.6835, "step": 3004 }, { "epoch": 0.054928985321805256, "grad_norm": 7.28263684589425, "learning_rate": 9.983736497478134e-06, "loss": 17.6965, "step": 3005 }, { "epoch": 0.05494726451825178, "grad_norm": 7.751363796915561, "learning_rate": 9.983712633040732e-06, "loss": 17.8543, "step": 3006 }, { "epoch": 0.0549655437146983, "grad_norm": 8.07461854526666, "learning_rate": 9.983688751135849e-06, "loss": 18.1354, "step": 3007 }, { "epoch": 0.05498382291114483, "grad_norm": 8.218066766385554, "learning_rate": 9.983664851763571e-06, "loss": 18.3705, "step": 3008 }, { "epoch": 0.05500210210759135, "grad_norm": 8.192671558395386, "learning_rate": 9.983640934923977e-06, "loss": 18.5413, "step": 3009 }, { "epoch": 0.05502038130403787, "grad_norm": 7.698426913627192, "learning_rate": 9.983617000617157e-06, "loss": 17.7436, "step": 3010 }, { "epoch": 0.0550386605004844, "grad_norm": 7.305591498734186, "learning_rate": 9.98359304884319e-06, "loss": 17.8984, "step": 3011 }, { "epoch": 0.05505693969693092, "grad_norm": 7.462744730812517, "learning_rate": 9.983569079602163e-06, "loss": 17.8616, "step": 3012 }, { "epoch": 0.05507521889337745, "grad_norm": 9.204157449070113, "learning_rate": 9.983545092894158e-06, "loss": 18.6582, "step": 3013 }, { "epoch": 0.05509349808982397, "grad_norm": 6.781635136989092, "learning_rate": 9.983521088719262e-06, "loss": 17.5635, "step": 3014 }, { "epoch": 0.055111777286270494, "grad_norm": 7.529536812907106, "learning_rate": 9.983497067077554e-06, "loss": 17.8964, "step": 3015 }, { "epoch": 0.05513005648271702, "grad_norm": 8.508994685012931, "learning_rate": 9.983473027969122e-06, "loss": 18.6022, "step": 3016 }, { "epoch": 0.055148335679163545, "grad_norm": 7.681556406311905, "learning_rate": 9.983448971394051e-06, "loss": 17.9823, "step": 3017 }, { "epoch": 0.055166614875610066, "grad_norm": 7.653760825345166, "learning_rate": 9.983424897352422e-06, "loss": 18.1274, "step": 3018 }, { "epoch": 0.055184894072056595, "grad_norm": 7.305335009265789, "learning_rate": 9.983400805844324e-06, "loss": 18.0893, "step": 3019 }, { "epoch": 0.05520317326850312, "grad_norm": 5.745154385526005, "learning_rate": 9.983376696869836e-06, "loss": 17.3378, "step": 3020 }, { "epoch": 0.05522145246494964, "grad_norm": 6.646640837725589, "learning_rate": 9.983352570429046e-06, "loss": 17.5679, "step": 3021 }, { "epoch": 0.05523973166139617, "grad_norm": 7.178155182805772, "learning_rate": 9.983328426522036e-06, "loss": 17.7329, "step": 3022 }, { "epoch": 0.05525801085784269, "grad_norm": 8.229139627854392, "learning_rate": 9.983304265148894e-06, "loss": 18.2517, "step": 3023 }, { "epoch": 0.05527629005428921, "grad_norm": 7.105184523923117, "learning_rate": 9.983280086309703e-06, "loss": 17.9693, "step": 3024 }, { "epoch": 0.05529456925073574, "grad_norm": 7.504820287503861, "learning_rate": 9.983255890004548e-06, "loss": 18.0733, "step": 3025 }, { "epoch": 0.05531284844718226, "grad_norm": 8.121601282141208, "learning_rate": 9.983231676233513e-06, "loss": 18.4775, "step": 3026 }, { "epoch": 0.05533112764362878, "grad_norm": 7.522589727206308, "learning_rate": 9.983207444996682e-06, "loss": 17.9586, "step": 3027 }, { "epoch": 0.05534940684007531, "grad_norm": 8.58402787228452, "learning_rate": 9.983183196294144e-06, "loss": 18.5355, "step": 3028 }, { "epoch": 0.05536768603652183, "grad_norm": 6.920000719747553, "learning_rate": 9.98315893012598e-06, "loss": 17.6501, "step": 3029 }, { "epoch": 0.05538596523296836, "grad_norm": 6.34494087197748, "learning_rate": 9.983134646492277e-06, "loss": 17.4743, "step": 3030 }, { "epoch": 0.055404244429414884, "grad_norm": 9.061419916247212, "learning_rate": 9.983110345393119e-06, "loss": 18.9192, "step": 3031 }, { "epoch": 0.055422523625861406, "grad_norm": 7.2914007617061145, "learning_rate": 9.983086026828592e-06, "loss": 17.5493, "step": 3032 }, { "epoch": 0.055440802822307934, "grad_norm": 7.286057303199407, "learning_rate": 9.983061690798782e-06, "loss": 17.8788, "step": 3033 }, { "epoch": 0.055459082018754456, "grad_norm": 7.314665473423591, "learning_rate": 9.98303733730377e-06, "loss": 18.1814, "step": 3034 }, { "epoch": 0.05547736121520098, "grad_norm": 8.666248371560606, "learning_rate": 9.983012966343645e-06, "loss": 18.5522, "step": 3035 }, { "epoch": 0.05549564041164751, "grad_norm": 6.389428773229833, "learning_rate": 9.982988577918494e-06, "loss": 17.7468, "step": 3036 }, { "epoch": 0.05551391960809403, "grad_norm": 6.422842320255038, "learning_rate": 9.9829641720284e-06, "loss": 17.5976, "step": 3037 }, { "epoch": 0.05553219880454055, "grad_norm": 8.907083855297007, "learning_rate": 9.982939748673447e-06, "loss": 18.4818, "step": 3038 }, { "epoch": 0.05555047800098708, "grad_norm": 6.595998114701068, "learning_rate": 9.982915307853725e-06, "loss": 17.5303, "step": 3039 }, { "epoch": 0.0555687571974336, "grad_norm": 8.342567757686064, "learning_rate": 9.982890849569318e-06, "loss": 18.3321, "step": 3040 }, { "epoch": 0.05558703639388012, "grad_norm": 7.3346381271810825, "learning_rate": 9.982866373820308e-06, "loss": 17.9998, "step": 3041 }, { "epoch": 0.05560531559032665, "grad_norm": 8.02451992770075, "learning_rate": 9.982841880606786e-06, "loss": 17.886, "step": 3042 }, { "epoch": 0.05562359478677317, "grad_norm": 6.86313206794148, "learning_rate": 9.982817369928834e-06, "loss": 17.8138, "step": 3043 }, { "epoch": 0.055641873983219695, "grad_norm": 7.792835334586353, "learning_rate": 9.98279284178654e-06, "loss": 18.1808, "step": 3044 }, { "epoch": 0.05566015317966622, "grad_norm": 8.365750307803037, "learning_rate": 9.982768296179989e-06, "loss": 18.1171, "step": 3045 }, { "epoch": 0.055678432376112745, "grad_norm": 9.19430205300137, "learning_rate": 9.98274373310927e-06, "loss": 18.6058, "step": 3046 }, { "epoch": 0.055696711572559274, "grad_norm": 9.578064016682275, "learning_rate": 9.982719152574465e-06, "loss": 18.844, "step": 3047 }, { "epoch": 0.055714990769005796, "grad_norm": 7.766110325583367, "learning_rate": 9.982694554575661e-06, "loss": 18.0555, "step": 3048 }, { "epoch": 0.05573326996545232, "grad_norm": 8.788885331065469, "learning_rate": 9.982669939112946e-06, "loss": 18.2255, "step": 3049 }, { "epoch": 0.055751549161898846, "grad_norm": 6.953589933728604, "learning_rate": 9.982645306186405e-06, "loss": 17.6454, "step": 3050 }, { "epoch": 0.05576982835834537, "grad_norm": 8.833252919081332, "learning_rate": 9.982620655796123e-06, "loss": 18.5215, "step": 3051 }, { "epoch": 0.05578810755479189, "grad_norm": 7.84034395593779, "learning_rate": 9.98259598794219e-06, "loss": 18.1439, "step": 3052 }, { "epoch": 0.05580638675123842, "grad_norm": 6.8936031465011185, "learning_rate": 9.982571302624691e-06, "loss": 17.6117, "step": 3053 }, { "epoch": 0.05582466594768494, "grad_norm": 7.101308281568558, "learning_rate": 9.982546599843709e-06, "loss": 17.7474, "step": 3054 }, { "epoch": 0.05584294514413146, "grad_norm": 9.115871047523548, "learning_rate": 9.982521879599337e-06, "loss": 18.3247, "step": 3055 }, { "epoch": 0.05586122434057799, "grad_norm": 7.753280953249115, "learning_rate": 9.982497141891656e-06, "loss": 17.8628, "step": 3056 }, { "epoch": 0.05587950353702451, "grad_norm": 8.13964987987562, "learning_rate": 9.982472386720754e-06, "loss": 18.2396, "step": 3057 }, { "epoch": 0.055897782733471034, "grad_norm": 7.772282244173961, "learning_rate": 9.982447614086721e-06, "loss": 18.3326, "step": 3058 }, { "epoch": 0.05591606192991756, "grad_norm": 8.526465003179315, "learning_rate": 9.98242282398964e-06, "loss": 18.188, "step": 3059 }, { "epoch": 0.055934341126364084, "grad_norm": 6.492358861107072, "learning_rate": 9.982398016429599e-06, "loss": 17.7256, "step": 3060 }, { "epoch": 0.055952620322810606, "grad_norm": 7.825082106363907, "learning_rate": 9.982373191406687e-06, "loss": 17.8356, "step": 3061 }, { "epoch": 0.055970899519257135, "grad_norm": 7.567674437585254, "learning_rate": 9.982348348920988e-06, "loss": 17.7985, "step": 3062 }, { "epoch": 0.05598917871570366, "grad_norm": 7.691814573161281, "learning_rate": 9.982323488972592e-06, "loss": 18.0445, "step": 3063 }, { "epoch": 0.056007457912150185, "grad_norm": 8.31240527788603, "learning_rate": 9.982298611561583e-06, "loss": 18.3168, "step": 3064 }, { "epoch": 0.05602573710859671, "grad_norm": 9.123638086422385, "learning_rate": 9.982273716688048e-06, "loss": 18.5168, "step": 3065 }, { "epoch": 0.05604401630504323, "grad_norm": 7.429816320145203, "learning_rate": 9.982248804352079e-06, "loss": 17.961, "step": 3066 }, { "epoch": 0.05606229550148976, "grad_norm": 7.028700829005101, "learning_rate": 9.98222387455376e-06, "loss": 17.6662, "step": 3067 }, { "epoch": 0.05608057469793628, "grad_norm": 7.397826548373838, "learning_rate": 9.982198927293177e-06, "loss": 17.9261, "step": 3068 }, { "epoch": 0.0560988538943828, "grad_norm": 7.661716198505955, "learning_rate": 9.98217396257042e-06, "loss": 17.9355, "step": 3069 }, { "epoch": 0.05611713309082933, "grad_norm": 7.724760272102188, "learning_rate": 9.982148980385576e-06, "loss": 18.0717, "step": 3070 }, { "epoch": 0.05613541228727585, "grad_norm": 6.8711206059020995, "learning_rate": 9.982123980738731e-06, "loss": 17.8262, "step": 3071 }, { "epoch": 0.05615369148372237, "grad_norm": 8.514880584015112, "learning_rate": 9.982098963629975e-06, "loss": 18.3857, "step": 3072 }, { "epoch": 0.0561719706801689, "grad_norm": 6.537398221025665, "learning_rate": 9.982073929059394e-06, "loss": 17.4544, "step": 3073 }, { "epoch": 0.056190249876615424, "grad_norm": 7.224619991155388, "learning_rate": 9.982048877027077e-06, "loss": 17.7508, "step": 3074 }, { "epoch": 0.056208529073061946, "grad_norm": 8.93773857365674, "learning_rate": 9.98202380753311e-06, "loss": 18.9491, "step": 3075 }, { "epoch": 0.056226808269508474, "grad_norm": 7.119907582320052, "learning_rate": 9.98199872057758e-06, "loss": 17.6432, "step": 3076 }, { "epoch": 0.056245087465954996, "grad_norm": 7.169995182665816, "learning_rate": 9.98197361616058e-06, "loss": 17.4187, "step": 3077 }, { "epoch": 0.05626336666240152, "grad_norm": 7.795123421903371, "learning_rate": 9.981948494282195e-06, "loss": 18.0035, "step": 3078 }, { "epoch": 0.056281645858848046, "grad_norm": 7.726347244191609, "learning_rate": 9.98192335494251e-06, "loss": 18.0938, "step": 3079 }, { "epoch": 0.05629992505529457, "grad_norm": 6.6654710399569606, "learning_rate": 9.98189819814162e-06, "loss": 17.3324, "step": 3080 }, { "epoch": 0.0563182042517411, "grad_norm": 8.028342189665967, "learning_rate": 9.981873023879605e-06, "loss": 17.6997, "step": 3081 }, { "epoch": 0.05633648344818762, "grad_norm": 7.351889325695132, "learning_rate": 9.981847832156559e-06, "loss": 17.9691, "step": 3082 }, { "epoch": 0.05635476264463414, "grad_norm": 6.877054372041403, "learning_rate": 9.981822622972568e-06, "loss": 17.6384, "step": 3083 }, { "epoch": 0.05637304184108067, "grad_norm": 6.459901030651059, "learning_rate": 9.981797396327722e-06, "loss": 17.3577, "step": 3084 }, { "epoch": 0.05639132103752719, "grad_norm": 7.909241642474109, "learning_rate": 9.981772152222109e-06, "loss": 18.504, "step": 3085 }, { "epoch": 0.05640960023397371, "grad_norm": 7.433748334982519, "learning_rate": 9.981746890655815e-06, "loss": 17.7713, "step": 3086 }, { "epoch": 0.05642787943042024, "grad_norm": 6.659830404372923, "learning_rate": 9.981721611628932e-06, "loss": 17.4545, "step": 3087 }, { "epoch": 0.05644615862686676, "grad_norm": 7.765799042403962, "learning_rate": 9.981696315141546e-06, "loss": 17.8737, "step": 3088 }, { "epoch": 0.056464437823313285, "grad_norm": 8.867682423038241, "learning_rate": 9.981671001193748e-06, "loss": 18.1657, "step": 3089 }, { "epoch": 0.056482717019759814, "grad_norm": 6.237795543665309, "learning_rate": 9.981645669785624e-06, "loss": 17.1925, "step": 3090 }, { "epoch": 0.056500996216206335, "grad_norm": 6.925673048313482, "learning_rate": 9.981620320917264e-06, "loss": 17.7825, "step": 3091 }, { "epoch": 0.05651927541265286, "grad_norm": 7.10750480705101, "learning_rate": 9.981594954588759e-06, "loss": 17.7329, "step": 3092 }, { "epoch": 0.056537554609099386, "grad_norm": 7.388764822206102, "learning_rate": 9.981569570800194e-06, "loss": 17.9458, "step": 3093 }, { "epoch": 0.05655583380554591, "grad_norm": 7.695452348366865, "learning_rate": 9.98154416955166e-06, "loss": 17.8532, "step": 3094 }, { "epoch": 0.05657411300199243, "grad_norm": 7.685635568854148, "learning_rate": 9.981518750843247e-06, "loss": 17.948, "step": 3095 }, { "epoch": 0.05659239219843896, "grad_norm": 8.262705401974944, "learning_rate": 9.981493314675044e-06, "loss": 18.4305, "step": 3096 }, { "epoch": 0.05661067139488548, "grad_norm": 8.344981147862454, "learning_rate": 9.981467861047137e-06, "loss": 18.1224, "step": 3097 }, { "epoch": 0.05662895059133201, "grad_norm": 7.611113823112375, "learning_rate": 9.981442389959619e-06, "loss": 17.7372, "step": 3098 }, { "epoch": 0.05664722978777853, "grad_norm": 8.82550241492585, "learning_rate": 9.981416901412577e-06, "loss": 17.7954, "step": 3099 }, { "epoch": 0.05666550898422505, "grad_norm": 6.426091669920725, "learning_rate": 9.9813913954061e-06, "loss": 17.4338, "step": 3100 }, { "epoch": 0.05668378818067158, "grad_norm": 7.6781511310715445, "learning_rate": 9.981365871940281e-06, "loss": 17.8858, "step": 3101 }, { "epoch": 0.0567020673771181, "grad_norm": 8.51942401916345, "learning_rate": 9.981340331015205e-06, "loss": 18.1104, "step": 3102 }, { "epoch": 0.056720346573564624, "grad_norm": 7.303153742323697, "learning_rate": 9.981314772630963e-06, "loss": 17.8026, "step": 3103 }, { "epoch": 0.05673862577001115, "grad_norm": 7.70273749115573, "learning_rate": 9.981289196787646e-06, "loss": 17.8407, "step": 3104 }, { "epoch": 0.056756904966457675, "grad_norm": 7.384810357472592, "learning_rate": 9.981263603485343e-06, "loss": 17.916, "step": 3105 }, { "epoch": 0.056775184162904196, "grad_norm": 6.9427720875237915, "learning_rate": 9.981237992724142e-06, "loss": 17.5603, "step": 3106 }, { "epoch": 0.056793463359350725, "grad_norm": 7.586135612857699, "learning_rate": 9.981212364504135e-06, "loss": 17.96, "step": 3107 }, { "epoch": 0.05681174255579725, "grad_norm": 6.941673895097665, "learning_rate": 9.98118671882541e-06, "loss": 17.7614, "step": 3108 }, { "epoch": 0.05683002175224377, "grad_norm": 7.669828434678138, "learning_rate": 9.98116105568806e-06, "loss": 18.0786, "step": 3109 }, { "epoch": 0.0568483009486903, "grad_norm": 8.214138452932188, "learning_rate": 9.98113537509217e-06, "loss": 18.5006, "step": 3110 }, { "epoch": 0.05686658014513682, "grad_norm": 8.769302543947646, "learning_rate": 9.981109677037834e-06, "loss": 18.7026, "step": 3111 }, { "epoch": 0.05688485934158334, "grad_norm": 6.736288333953572, "learning_rate": 9.981083961525142e-06, "loss": 17.7235, "step": 3112 }, { "epoch": 0.05690313853802987, "grad_norm": 7.525238302445102, "learning_rate": 9.981058228554182e-06, "loss": 17.7236, "step": 3113 }, { "epoch": 0.05692141773447639, "grad_norm": 8.304692348974578, "learning_rate": 9.981032478125044e-06, "loss": 18.3424, "step": 3114 }, { "epoch": 0.05693969693092292, "grad_norm": 7.416249290681871, "learning_rate": 9.981006710237822e-06, "loss": 17.8649, "step": 3115 }, { "epoch": 0.05695797612736944, "grad_norm": 7.713644412755266, "learning_rate": 9.9809809248926e-06, "loss": 18.1338, "step": 3116 }, { "epoch": 0.056976255323815964, "grad_norm": 8.090571385668342, "learning_rate": 9.980955122089476e-06, "loss": 17.7447, "step": 3117 }, { "epoch": 0.05699453452026249, "grad_norm": 6.579862160274033, "learning_rate": 9.980929301828533e-06, "loss": 17.3605, "step": 3118 }, { "epoch": 0.057012813716709014, "grad_norm": 6.459900712847999, "learning_rate": 9.980903464109868e-06, "loss": 17.5769, "step": 3119 }, { "epoch": 0.057031092913155536, "grad_norm": 8.30606420475151, "learning_rate": 9.980877608933566e-06, "loss": 18.0798, "step": 3120 }, { "epoch": 0.057049372109602065, "grad_norm": 7.7536226712048295, "learning_rate": 9.98085173629972e-06, "loss": 18.4216, "step": 3121 }, { "epoch": 0.057067651306048586, "grad_norm": 6.864119640927641, "learning_rate": 9.980825846208424e-06, "loss": 17.6863, "step": 3122 }, { "epoch": 0.05708593050249511, "grad_norm": 8.094039835281302, "learning_rate": 9.980799938659764e-06, "loss": 18.0637, "step": 3123 }, { "epoch": 0.05710420969894164, "grad_norm": 9.114359137978889, "learning_rate": 9.980774013653834e-06, "loss": 18.836, "step": 3124 }, { "epoch": 0.05712248889538816, "grad_norm": 6.761714260561507, "learning_rate": 9.980748071190721e-06, "loss": 17.8163, "step": 3125 }, { "epoch": 0.05714076809183468, "grad_norm": 7.915229109343791, "learning_rate": 9.980722111270518e-06, "loss": 18.1644, "step": 3126 }, { "epoch": 0.05715904728828121, "grad_norm": 7.696451437354202, "learning_rate": 9.980696133893317e-06, "loss": 17.7345, "step": 3127 }, { "epoch": 0.05717732648472773, "grad_norm": 8.703426742138486, "learning_rate": 9.98067013905921e-06, "loss": 18.429, "step": 3128 }, { "epoch": 0.05719560568117425, "grad_norm": 8.743940883294867, "learning_rate": 9.980644126768283e-06, "loss": 18.8959, "step": 3129 }, { "epoch": 0.05721388487762078, "grad_norm": 8.240905753578073, "learning_rate": 9.980618097020634e-06, "loss": 18.2815, "step": 3130 }, { "epoch": 0.0572321640740673, "grad_norm": 7.306110914231443, "learning_rate": 9.98059204981635e-06, "loss": 17.9447, "step": 3131 }, { "epoch": 0.05725044327051383, "grad_norm": 8.189961498568708, "learning_rate": 9.98056598515552e-06, "loss": 18.007, "step": 3132 }, { "epoch": 0.05726872246696035, "grad_norm": 6.871746975009577, "learning_rate": 9.980539903038241e-06, "loss": 17.5626, "step": 3133 }, { "epoch": 0.057287001663406875, "grad_norm": 7.01148624787496, "learning_rate": 9.980513803464602e-06, "loss": 17.7858, "step": 3134 }, { "epoch": 0.057305280859853404, "grad_norm": 7.346460217629107, "learning_rate": 9.980487686434694e-06, "loss": 17.4628, "step": 3135 }, { "epoch": 0.057323560056299926, "grad_norm": 7.5282863277135865, "learning_rate": 9.980461551948609e-06, "loss": 18.0323, "step": 3136 }, { "epoch": 0.05734183925274645, "grad_norm": 8.097465864323018, "learning_rate": 9.980435400006436e-06, "loss": 18.2426, "step": 3137 }, { "epoch": 0.057360118449192976, "grad_norm": 7.932203910254175, "learning_rate": 9.980409230608272e-06, "loss": 17.9906, "step": 3138 }, { "epoch": 0.0573783976456395, "grad_norm": 6.793077500662868, "learning_rate": 9.980383043754206e-06, "loss": 17.822, "step": 3139 }, { "epoch": 0.05739667684208602, "grad_norm": 7.4046220582163, "learning_rate": 9.980356839444328e-06, "loss": 18.009, "step": 3140 }, { "epoch": 0.05741495603853255, "grad_norm": 7.34207226631687, "learning_rate": 9.980330617678731e-06, "loss": 17.8602, "step": 3141 }, { "epoch": 0.05743323523497907, "grad_norm": 8.544845945713869, "learning_rate": 9.980304378457508e-06, "loss": 18.5323, "step": 3142 }, { "epoch": 0.05745151443142559, "grad_norm": 7.644146605020451, "learning_rate": 9.98027812178075e-06, "loss": 17.9759, "step": 3143 }, { "epoch": 0.05746979362787212, "grad_norm": 7.028547691424313, "learning_rate": 9.980251847648551e-06, "loss": 17.8357, "step": 3144 }, { "epoch": 0.05748807282431864, "grad_norm": 7.782382990392942, "learning_rate": 9.980225556061e-06, "loss": 18.0765, "step": 3145 }, { "epoch": 0.057506352020765164, "grad_norm": 7.8146244170079955, "learning_rate": 9.980199247018193e-06, "loss": 18.1027, "step": 3146 }, { "epoch": 0.05752463121721169, "grad_norm": 7.558449598618881, "learning_rate": 9.980172920520216e-06, "loss": 18.4348, "step": 3147 }, { "epoch": 0.057542910413658215, "grad_norm": 8.008248929861436, "learning_rate": 9.980146576567167e-06, "loss": 18.2474, "step": 3148 }, { "epoch": 0.05756118961010474, "grad_norm": 6.320683275914581, "learning_rate": 9.980120215159138e-06, "loss": 17.4358, "step": 3149 }, { "epoch": 0.057579468806551265, "grad_norm": 7.500021336597586, "learning_rate": 9.980093836296216e-06, "loss": 18.0094, "step": 3150 }, { "epoch": 0.05759774800299779, "grad_norm": 6.889861023698829, "learning_rate": 9.9800674399785e-06, "loss": 17.8159, "step": 3151 }, { "epoch": 0.057616027199444315, "grad_norm": 8.129009473177554, "learning_rate": 9.98004102620608e-06, "loss": 18.1608, "step": 3152 }, { "epoch": 0.05763430639589084, "grad_norm": 8.48684312755742, "learning_rate": 9.980014594979047e-06, "loss": 18.0661, "step": 3153 }, { "epoch": 0.05765258559233736, "grad_norm": 7.411642195765292, "learning_rate": 9.979988146297494e-06, "loss": 17.8226, "step": 3154 }, { "epoch": 0.05767086478878389, "grad_norm": 8.940883368045336, "learning_rate": 9.979961680161517e-06, "loss": 18.8211, "step": 3155 }, { "epoch": 0.05768914398523041, "grad_norm": 7.436304395529918, "learning_rate": 9.979935196571207e-06, "loss": 17.8067, "step": 3156 }, { "epoch": 0.05770742318167693, "grad_norm": 7.192700013865126, "learning_rate": 9.979908695526655e-06, "loss": 17.8254, "step": 3157 }, { "epoch": 0.05772570237812346, "grad_norm": 7.848188521253715, "learning_rate": 9.979882177027955e-06, "loss": 17.769, "step": 3158 }, { "epoch": 0.05774398157456998, "grad_norm": 7.25186655023415, "learning_rate": 9.9798556410752e-06, "loss": 17.7228, "step": 3159 }, { "epoch": 0.0577622607710165, "grad_norm": 9.492483714987832, "learning_rate": 9.979829087668483e-06, "loss": 18.5494, "step": 3160 }, { "epoch": 0.05778053996746303, "grad_norm": 8.523093300760328, "learning_rate": 9.979802516807897e-06, "loss": 18.058, "step": 3161 }, { "epoch": 0.057798819163909554, "grad_norm": 9.087293568614113, "learning_rate": 9.979775928493536e-06, "loss": 18.4453, "step": 3162 }, { "epoch": 0.057817098360356076, "grad_norm": 7.340983964773885, "learning_rate": 9.979749322725492e-06, "loss": 17.9543, "step": 3163 }, { "epoch": 0.057835377556802604, "grad_norm": 8.117419409975199, "learning_rate": 9.979722699503859e-06, "loss": 18.4618, "step": 3164 }, { "epoch": 0.057853656753249126, "grad_norm": 8.060086976673936, "learning_rate": 9.97969605882873e-06, "loss": 18.0255, "step": 3165 }, { "epoch": 0.057871935949695655, "grad_norm": 6.691453776028693, "learning_rate": 9.979669400700198e-06, "loss": 17.5696, "step": 3166 }, { "epoch": 0.05789021514614218, "grad_norm": 7.723493050490721, "learning_rate": 9.979642725118358e-06, "loss": 18.1638, "step": 3167 }, { "epoch": 0.0579084943425887, "grad_norm": 7.910569492674526, "learning_rate": 9.979616032083301e-06, "loss": 18.0093, "step": 3168 }, { "epoch": 0.05792677353903523, "grad_norm": 7.362247485355764, "learning_rate": 9.979589321595123e-06, "loss": 17.5434, "step": 3169 }, { "epoch": 0.05794505273548175, "grad_norm": 7.540447907965872, "learning_rate": 9.979562593653916e-06, "loss": 18.1084, "step": 3170 }, { "epoch": 0.05796333193192827, "grad_norm": 7.413722488365144, "learning_rate": 9.979535848259775e-06, "loss": 17.7816, "step": 3171 }, { "epoch": 0.0579816111283748, "grad_norm": 8.349769595748214, "learning_rate": 9.979509085412793e-06, "loss": 18.3193, "step": 3172 }, { "epoch": 0.05799989032482132, "grad_norm": 7.2552155432041365, "learning_rate": 9.979482305113062e-06, "loss": 17.6679, "step": 3173 }, { "epoch": 0.05801816952126784, "grad_norm": 9.039718750437268, "learning_rate": 9.979455507360679e-06, "loss": 18.6886, "step": 3174 }, { "epoch": 0.05803644871771437, "grad_norm": 6.273050204214056, "learning_rate": 9.979428692155737e-06, "loss": 17.3761, "step": 3175 }, { "epoch": 0.05805472791416089, "grad_norm": 6.715594056949398, "learning_rate": 9.979401859498327e-06, "loss": 17.3928, "step": 3176 }, { "epoch": 0.058073007110607415, "grad_norm": 6.650037070977743, "learning_rate": 9.979375009388548e-06, "loss": 17.7325, "step": 3177 }, { "epoch": 0.058091286307053944, "grad_norm": 6.673201876651628, "learning_rate": 9.979348141826491e-06, "loss": 17.5246, "step": 3178 }, { "epoch": 0.058109565503500465, "grad_norm": 7.527923065096524, "learning_rate": 9.979321256812252e-06, "loss": 18.2962, "step": 3179 }, { "epoch": 0.05812784469994699, "grad_norm": 9.008414939763789, "learning_rate": 9.979294354345923e-06, "loss": 18.6181, "step": 3180 }, { "epoch": 0.058146123896393516, "grad_norm": 7.737827894858357, "learning_rate": 9.979267434427599e-06, "loss": 17.8865, "step": 3181 }, { "epoch": 0.05816440309284004, "grad_norm": 7.435350935716762, "learning_rate": 9.979240497057374e-06, "loss": 18.1134, "step": 3182 }, { "epoch": 0.058182682289286566, "grad_norm": 6.392868758180979, "learning_rate": 9.979213542235346e-06, "loss": 17.4657, "step": 3183 }, { "epoch": 0.05820096148573309, "grad_norm": 8.061051245157545, "learning_rate": 9.979186569961603e-06, "loss": 18.4981, "step": 3184 }, { "epoch": 0.05821924068217961, "grad_norm": 7.574884996174679, "learning_rate": 9.979159580236246e-06, "loss": 17.8623, "step": 3185 }, { "epoch": 0.05823751987862614, "grad_norm": 6.972585575102587, "learning_rate": 9.979132573059366e-06, "loss": 17.7043, "step": 3186 }, { "epoch": 0.05825579907507266, "grad_norm": 8.01277178504557, "learning_rate": 9.979105548431058e-06, "loss": 18.2272, "step": 3187 }, { "epoch": 0.05827407827151918, "grad_norm": 6.883657298753507, "learning_rate": 9.979078506351418e-06, "loss": 17.5574, "step": 3188 }, { "epoch": 0.05829235746796571, "grad_norm": 8.252378597440396, "learning_rate": 9.979051446820539e-06, "loss": 18.0717, "step": 3189 }, { "epoch": 0.05831063666441223, "grad_norm": 9.18754927192144, "learning_rate": 9.979024369838516e-06, "loss": 18.5159, "step": 3190 }, { "epoch": 0.058328915860858754, "grad_norm": 7.617789065932563, "learning_rate": 9.978997275405447e-06, "loss": 18.0219, "step": 3191 }, { "epoch": 0.05834719505730528, "grad_norm": 5.51650755113365, "learning_rate": 9.978970163521422e-06, "loss": 17.0512, "step": 3192 }, { "epoch": 0.058365474253751805, "grad_norm": 7.808301822700816, "learning_rate": 9.978943034186539e-06, "loss": 17.8967, "step": 3193 }, { "epoch": 0.05838375345019833, "grad_norm": 8.157376624839548, "learning_rate": 9.978915887400894e-06, "loss": 18.2762, "step": 3194 }, { "epoch": 0.058402032646644855, "grad_norm": 7.530590281296273, "learning_rate": 9.978888723164581e-06, "loss": 17.754, "step": 3195 }, { "epoch": 0.05842031184309138, "grad_norm": 8.107889478601699, "learning_rate": 9.978861541477694e-06, "loss": 18.3321, "step": 3196 }, { "epoch": 0.0584385910395379, "grad_norm": 7.672274118425343, "learning_rate": 9.978834342340329e-06, "loss": 17.8744, "step": 3197 }, { "epoch": 0.05845687023598443, "grad_norm": 7.50720109128021, "learning_rate": 9.978807125752582e-06, "loss": 18.3519, "step": 3198 }, { "epoch": 0.05847514943243095, "grad_norm": 8.526593217492945, "learning_rate": 9.97877989171455e-06, "loss": 18.2489, "step": 3199 }, { "epoch": 0.05849342862887748, "grad_norm": 9.441229667642398, "learning_rate": 9.978752640226325e-06, "loss": 19.2197, "step": 3200 }, { "epoch": 0.058511707825324, "grad_norm": 8.319443367985162, "learning_rate": 9.978725371288004e-06, "loss": 18.3359, "step": 3201 }, { "epoch": 0.05852998702177052, "grad_norm": 8.011564051634045, "learning_rate": 9.978698084899682e-06, "loss": 18.2131, "step": 3202 }, { "epoch": 0.05854826621821705, "grad_norm": 7.764131198002185, "learning_rate": 9.978670781061457e-06, "loss": 18.3141, "step": 3203 }, { "epoch": 0.05856654541466357, "grad_norm": 7.6742758845209735, "learning_rate": 9.97864345977342e-06, "loss": 18.1446, "step": 3204 }, { "epoch": 0.058584824611110094, "grad_norm": 7.481482628458801, "learning_rate": 9.978616121035672e-06, "loss": 18.2536, "step": 3205 }, { "epoch": 0.05860310380755662, "grad_norm": 8.053813882400595, "learning_rate": 9.978588764848307e-06, "loss": 17.7258, "step": 3206 }, { "epoch": 0.058621383004003144, "grad_norm": 9.181886304858015, "learning_rate": 9.97856139121142e-06, "loss": 18.8872, "step": 3207 }, { "epoch": 0.058639662200449666, "grad_norm": 6.952728968364468, "learning_rate": 9.978534000125106e-06, "loss": 17.8382, "step": 3208 }, { "epoch": 0.058657941396896195, "grad_norm": 6.639926778610925, "learning_rate": 9.978506591589463e-06, "loss": 17.7191, "step": 3209 }, { "epoch": 0.058676220593342716, "grad_norm": 7.467723950323885, "learning_rate": 9.978479165604586e-06, "loss": 18.1541, "step": 3210 }, { "epoch": 0.05869449978978924, "grad_norm": 8.352101402192078, "learning_rate": 9.978451722170572e-06, "loss": 17.8843, "step": 3211 }, { "epoch": 0.05871277898623577, "grad_norm": 6.614723848081901, "learning_rate": 9.978424261287518e-06, "loss": 17.4353, "step": 3212 }, { "epoch": 0.05873105818268229, "grad_norm": 9.070620645938524, "learning_rate": 9.978396782955518e-06, "loss": 19.1536, "step": 3213 }, { "epoch": 0.05874933737912881, "grad_norm": 8.718314512695132, "learning_rate": 9.978369287174668e-06, "loss": 18.5638, "step": 3214 }, { "epoch": 0.05876761657557534, "grad_norm": 6.716483752096194, "learning_rate": 9.978341773945067e-06, "loss": 17.8021, "step": 3215 }, { "epoch": 0.05878589577202186, "grad_norm": 8.165609762942184, "learning_rate": 9.97831424326681e-06, "loss": 18.0288, "step": 3216 }, { "epoch": 0.05880417496846839, "grad_norm": 6.437159544469477, "learning_rate": 9.978286695139993e-06, "loss": 17.4915, "step": 3217 }, { "epoch": 0.05882245416491491, "grad_norm": 7.366466706580026, "learning_rate": 9.978259129564713e-06, "loss": 17.7899, "step": 3218 }, { "epoch": 0.05884073336136143, "grad_norm": 9.018297973451752, "learning_rate": 9.978231546541069e-06, "loss": 18.2572, "step": 3219 }, { "epoch": 0.05885901255780796, "grad_norm": 8.295663361723875, "learning_rate": 9.978203946069154e-06, "loss": 18.2259, "step": 3220 }, { "epoch": 0.058877291754254484, "grad_norm": 6.665727972785896, "learning_rate": 9.978176328149064e-06, "loss": 17.5892, "step": 3221 }, { "epoch": 0.058895570950701005, "grad_norm": 9.108174279374897, "learning_rate": 9.9781486927809e-06, "loss": 18.667, "step": 3222 }, { "epoch": 0.058913850147147534, "grad_norm": 7.2191477273365425, "learning_rate": 9.978121039964757e-06, "loss": 17.9186, "step": 3223 }, { "epoch": 0.058932129343594056, "grad_norm": 6.952581207518228, "learning_rate": 9.978093369700733e-06, "loss": 18.0369, "step": 3224 }, { "epoch": 0.05895040854004058, "grad_norm": 9.454293435602219, "learning_rate": 9.978065681988921e-06, "loss": 19.1317, "step": 3225 }, { "epoch": 0.058968687736487106, "grad_norm": 7.661129555256189, "learning_rate": 9.978037976829423e-06, "loss": 18.2276, "step": 3226 }, { "epoch": 0.05898696693293363, "grad_norm": 7.190124056734599, "learning_rate": 9.978010254222332e-06, "loss": 17.7638, "step": 3227 }, { "epoch": 0.05900524612938015, "grad_norm": 6.885324932029891, "learning_rate": 9.977982514167748e-06, "loss": 17.6156, "step": 3228 }, { "epoch": 0.05902352532582668, "grad_norm": 8.836508839201812, "learning_rate": 9.977954756665766e-06, "loss": 18.5187, "step": 3229 }, { "epoch": 0.0590418045222732, "grad_norm": 7.042024853413884, "learning_rate": 9.977926981716486e-06, "loss": 17.7895, "step": 3230 }, { "epoch": 0.05906008371871972, "grad_norm": 7.222072324753219, "learning_rate": 9.977899189320002e-06, "loss": 18.0641, "step": 3231 }, { "epoch": 0.05907836291516625, "grad_norm": 7.789156150808883, "learning_rate": 9.977871379476416e-06, "loss": 18.1854, "step": 3232 }, { "epoch": 0.05909664211161277, "grad_norm": 7.019874445437046, "learning_rate": 9.977843552185822e-06, "loss": 17.5921, "step": 3233 }, { "epoch": 0.0591149213080593, "grad_norm": 6.961227256593004, "learning_rate": 9.977815707448317e-06, "loss": 17.638, "step": 3234 }, { "epoch": 0.05913320050450582, "grad_norm": 7.404831439084697, "learning_rate": 9.977787845264001e-06, "loss": 18.1359, "step": 3235 }, { "epoch": 0.059151479700952345, "grad_norm": 7.6081190895585085, "learning_rate": 9.97775996563297e-06, "loss": 17.9784, "step": 3236 }, { "epoch": 0.05916975889739887, "grad_norm": 8.189798289907229, "learning_rate": 9.977732068555323e-06, "loss": 18.1947, "step": 3237 }, { "epoch": 0.059188038093845395, "grad_norm": 9.603313968659359, "learning_rate": 9.977704154031156e-06, "loss": 18.4913, "step": 3238 }, { "epoch": 0.05920631729029192, "grad_norm": 7.24942481827696, "learning_rate": 9.977676222060568e-06, "loss": 17.6742, "step": 3239 }, { "epoch": 0.059224596486738446, "grad_norm": 7.837441304765053, "learning_rate": 9.977648272643658e-06, "loss": 18.347, "step": 3240 }, { "epoch": 0.05924287568318497, "grad_norm": 7.488596814459485, "learning_rate": 9.977620305780522e-06, "loss": 17.9552, "step": 3241 }, { "epoch": 0.05926115487963149, "grad_norm": 25.05949073017427, "learning_rate": 9.977592321471259e-06, "loss": 18.189, "step": 3242 }, { "epoch": 0.05927943407607802, "grad_norm": 8.342189405871983, "learning_rate": 9.977564319715966e-06, "loss": 17.5618, "step": 3243 }, { "epoch": 0.05929771327252454, "grad_norm": 9.221504010474346, "learning_rate": 9.977536300514742e-06, "loss": 17.8729, "step": 3244 }, { "epoch": 0.05931599246897106, "grad_norm": 10.218891821200936, "learning_rate": 9.977508263867688e-06, "loss": 18.9667, "step": 3245 }, { "epoch": 0.05933427166541759, "grad_norm": 8.044011082674539, "learning_rate": 9.977480209774897e-06, "loss": 18.1998, "step": 3246 }, { "epoch": 0.05935255086186411, "grad_norm": 8.786931638511275, "learning_rate": 9.977452138236469e-06, "loss": 17.7495, "step": 3247 }, { "epoch": 0.059370830058310634, "grad_norm": 7.66967034121712, "learning_rate": 9.977424049252504e-06, "loss": 17.4421, "step": 3248 }, { "epoch": 0.05938910925475716, "grad_norm": 7.727496643488349, "learning_rate": 9.9773959428231e-06, "loss": 18.2082, "step": 3249 }, { "epoch": 0.059407388451203684, "grad_norm": 8.994041901925366, "learning_rate": 9.977367818948355e-06, "loss": 18.2771, "step": 3250 }, { "epoch": 0.05942566764765021, "grad_norm": 8.026884494537635, "learning_rate": 9.977339677628369e-06, "loss": 17.595, "step": 3251 }, { "epoch": 0.059443946844096734, "grad_norm": 7.570263993485957, "learning_rate": 9.977311518863237e-06, "loss": 17.6949, "step": 3252 }, { "epoch": 0.059462226040543256, "grad_norm": 7.846573858742153, "learning_rate": 9.97728334265306e-06, "loss": 18.1315, "step": 3253 }, { "epoch": 0.059480505236989785, "grad_norm": 9.346891470386613, "learning_rate": 9.97725514899794e-06, "loss": 18.8245, "step": 3254 }, { "epoch": 0.05949878443343631, "grad_norm": 8.173897293518895, "learning_rate": 9.97722693789797e-06, "loss": 17.6173, "step": 3255 }, { "epoch": 0.05951706362988283, "grad_norm": 6.700094234008094, "learning_rate": 9.97719870935325e-06, "loss": 17.3285, "step": 3256 }, { "epoch": 0.05953534282632936, "grad_norm": 7.4321584915430785, "learning_rate": 9.977170463363883e-06, "loss": 17.7529, "step": 3257 }, { "epoch": 0.05955362202277588, "grad_norm": 8.382330015612283, "learning_rate": 9.977142199929965e-06, "loss": 17.927, "step": 3258 }, { "epoch": 0.0595719012192224, "grad_norm": 7.894075544574096, "learning_rate": 9.977113919051595e-06, "loss": 17.815, "step": 3259 }, { "epoch": 0.05959018041566893, "grad_norm": 8.098918306058742, "learning_rate": 9.977085620728875e-06, "loss": 18.2385, "step": 3260 }, { "epoch": 0.05960845961211545, "grad_norm": 8.068916376934771, "learning_rate": 9.977057304961899e-06, "loss": 17.9026, "step": 3261 }, { "epoch": 0.05962673880856197, "grad_norm": 8.10641133511313, "learning_rate": 9.977028971750769e-06, "loss": 18.2146, "step": 3262 }, { "epoch": 0.0596450180050085, "grad_norm": 6.512236878155436, "learning_rate": 9.977000621095585e-06, "loss": 17.4373, "step": 3263 }, { "epoch": 0.05966329720145502, "grad_norm": 8.71608684189703, "learning_rate": 9.976972252996447e-06, "loss": 18.4464, "step": 3264 }, { "epoch": 0.059681576397901545, "grad_norm": 7.281434667691815, "learning_rate": 9.976943867453452e-06, "loss": 17.6039, "step": 3265 }, { "epoch": 0.059699855594348074, "grad_norm": 8.526349343559344, "learning_rate": 9.9769154644667e-06, "loss": 18.5155, "step": 3266 }, { "epoch": 0.059718134790794596, "grad_norm": 7.266321725012322, "learning_rate": 9.976887044036291e-06, "loss": 17.7049, "step": 3267 }, { "epoch": 0.059736413987241124, "grad_norm": 8.96036466200305, "learning_rate": 9.976858606162326e-06, "loss": 18.1321, "step": 3268 }, { "epoch": 0.059754693183687646, "grad_norm": 9.878808320607032, "learning_rate": 9.976830150844902e-06, "loss": 18.5663, "step": 3269 }, { "epoch": 0.05977297238013417, "grad_norm": 8.15912469902459, "learning_rate": 9.976801678084123e-06, "loss": 17.9054, "step": 3270 }, { "epoch": 0.059791251576580697, "grad_norm": 6.4488928278431255, "learning_rate": 9.976773187880083e-06, "loss": 17.4122, "step": 3271 }, { "epoch": 0.05980953077302722, "grad_norm": 8.785649582603085, "learning_rate": 9.976744680232886e-06, "loss": 18.1126, "step": 3272 }, { "epoch": 0.05982780996947374, "grad_norm": 8.9666925267377, "learning_rate": 9.976716155142632e-06, "loss": 18.475, "step": 3273 }, { "epoch": 0.05984608916592027, "grad_norm": 8.085738504039458, "learning_rate": 9.976687612609417e-06, "loss": 18.0753, "step": 3274 }, { "epoch": 0.05986436836236679, "grad_norm": 8.834988133785304, "learning_rate": 9.976659052633347e-06, "loss": 18.6388, "step": 3275 }, { "epoch": 0.05988264755881331, "grad_norm": 7.149216722572933, "learning_rate": 9.976630475214515e-06, "loss": 17.5655, "step": 3276 }, { "epoch": 0.05990092675525984, "grad_norm": 7.248163919124378, "learning_rate": 9.976601880353028e-06, "loss": 17.8018, "step": 3277 }, { "epoch": 0.05991920595170636, "grad_norm": 7.903396617238845, "learning_rate": 9.976573268048983e-06, "loss": 18.1317, "step": 3278 }, { "epoch": 0.059937485148152884, "grad_norm": 7.2367672796718105, "learning_rate": 9.97654463830248e-06, "loss": 17.8175, "step": 3279 }, { "epoch": 0.05995576434459941, "grad_norm": 7.61388358163043, "learning_rate": 9.97651599111362e-06, "loss": 17.9395, "step": 3280 }, { "epoch": 0.059974043541045935, "grad_norm": 6.776332874611314, "learning_rate": 9.976487326482503e-06, "loss": 17.7383, "step": 3281 }, { "epoch": 0.05999232273749246, "grad_norm": 8.476125130232775, "learning_rate": 9.976458644409231e-06, "loss": 18.3575, "step": 3282 }, { "epoch": 0.060010601933938985, "grad_norm": 7.487495575125886, "learning_rate": 9.976429944893902e-06, "loss": 17.8411, "step": 3283 }, { "epoch": 0.06002888113038551, "grad_norm": 7.629057785867848, "learning_rate": 9.976401227936616e-06, "loss": 17.9797, "step": 3284 }, { "epoch": 0.060047160326832036, "grad_norm": 7.9242789683691015, "learning_rate": 9.976372493537479e-06, "loss": 17.9919, "step": 3285 }, { "epoch": 0.06006543952327856, "grad_norm": 7.6290860103886695, "learning_rate": 9.976343741696586e-06, "loss": 17.9728, "step": 3286 }, { "epoch": 0.06008371871972508, "grad_norm": 7.552164863018478, "learning_rate": 9.97631497241404e-06, "loss": 17.7703, "step": 3287 }, { "epoch": 0.06010199791617161, "grad_norm": 7.17350157489045, "learning_rate": 9.976286185689944e-06, "loss": 17.8132, "step": 3288 }, { "epoch": 0.06012027711261813, "grad_norm": 7.168377024849365, "learning_rate": 9.976257381524396e-06, "loss": 17.771, "step": 3289 }, { "epoch": 0.06013855630906465, "grad_norm": 9.442343423631371, "learning_rate": 9.976228559917497e-06, "loss": 18.45, "step": 3290 }, { "epoch": 0.06015683550551118, "grad_norm": 7.685395625931217, "learning_rate": 9.976199720869348e-06, "loss": 18.0959, "step": 3291 }, { "epoch": 0.0601751147019577, "grad_norm": 7.789790910507743, "learning_rate": 9.976170864380052e-06, "loss": 18.2507, "step": 3292 }, { "epoch": 0.060193393898404224, "grad_norm": 6.91393548772296, "learning_rate": 9.976141990449708e-06, "loss": 17.4989, "step": 3293 }, { "epoch": 0.06021167309485075, "grad_norm": 7.900983241934687, "learning_rate": 9.97611309907842e-06, "loss": 18.2346, "step": 3294 }, { "epoch": 0.060229952291297274, "grad_norm": 10.003760275284977, "learning_rate": 9.976084190266286e-06, "loss": 19.1263, "step": 3295 }, { "epoch": 0.060248231487743796, "grad_norm": 8.209602792150497, "learning_rate": 9.976055264013408e-06, "loss": 18.0399, "step": 3296 }, { "epoch": 0.060266510684190325, "grad_norm": 8.706734941265879, "learning_rate": 9.97602632031989e-06, "loss": 18.4225, "step": 3297 }, { "epoch": 0.060284789880636847, "grad_norm": 8.016640715379337, "learning_rate": 9.97599735918583e-06, "loss": 18.3653, "step": 3298 }, { "epoch": 0.06030306907708337, "grad_norm": 6.334820181175187, "learning_rate": 9.975968380611332e-06, "loss": 17.2041, "step": 3299 }, { "epoch": 0.0603213482735299, "grad_norm": 7.578792617036335, "learning_rate": 9.975939384596496e-06, "loss": 17.9529, "step": 3300 }, { "epoch": 0.06033962746997642, "grad_norm": 8.093553982308935, "learning_rate": 9.975910371141424e-06, "loss": 18.4033, "step": 3301 }, { "epoch": 0.06035790666642295, "grad_norm": 8.083567144807205, "learning_rate": 9.975881340246218e-06, "loss": 18.0603, "step": 3302 }, { "epoch": 0.06037618586286947, "grad_norm": 9.014212016579211, "learning_rate": 9.975852291910982e-06, "loss": 17.8603, "step": 3303 }, { "epoch": 0.06039446505931599, "grad_norm": 7.197672327541092, "learning_rate": 9.975823226135813e-06, "loss": 17.8271, "step": 3304 }, { "epoch": 0.06041274425576252, "grad_norm": 6.384490700127685, "learning_rate": 9.975794142920815e-06, "loss": 17.4841, "step": 3305 }, { "epoch": 0.06043102345220904, "grad_norm": 7.368130733758599, "learning_rate": 9.975765042266091e-06, "loss": 18.0889, "step": 3306 }, { "epoch": 0.06044930264865556, "grad_norm": 6.495304240788988, "learning_rate": 9.975735924171744e-06, "loss": 17.2142, "step": 3307 }, { "epoch": 0.06046758184510209, "grad_norm": 6.956934837553296, "learning_rate": 9.97570678863787e-06, "loss": 17.6698, "step": 3308 }, { "epoch": 0.060485861041548614, "grad_norm": 8.587591323241254, "learning_rate": 9.97567763566458e-06, "loss": 18.2999, "step": 3309 }, { "epoch": 0.060504140237995135, "grad_norm": 8.128712306915489, "learning_rate": 9.97564846525197e-06, "loss": 18.3157, "step": 3310 }, { "epoch": 0.060522419434441664, "grad_norm": 6.778574663367813, "learning_rate": 9.975619277400144e-06, "loss": 17.5754, "step": 3311 }, { "epoch": 0.060540698630888186, "grad_norm": 7.144848975595105, "learning_rate": 9.975590072109205e-06, "loss": 17.6057, "step": 3312 }, { "epoch": 0.06055897782733471, "grad_norm": 6.863204769693211, "learning_rate": 9.975560849379253e-06, "loss": 17.6446, "step": 3313 }, { "epoch": 0.060577257023781236, "grad_norm": 6.954857254229112, "learning_rate": 9.975531609210393e-06, "loss": 17.3515, "step": 3314 }, { "epoch": 0.06059553622022776, "grad_norm": 7.387071064673921, "learning_rate": 9.975502351602726e-06, "loss": 17.9307, "step": 3315 }, { "epoch": 0.06061381541667428, "grad_norm": 7.204790219760822, "learning_rate": 9.975473076556355e-06, "loss": 17.552, "step": 3316 }, { "epoch": 0.06063209461312081, "grad_norm": 7.034817915669464, "learning_rate": 9.975443784071383e-06, "loss": 17.6357, "step": 3317 }, { "epoch": 0.06065037380956733, "grad_norm": 7.178123674791742, "learning_rate": 9.975414474147911e-06, "loss": 17.7269, "step": 3318 }, { "epoch": 0.06066865300601386, "grad_norm": 7.519903671620943, "learning_rate": 9.975385146786044e-06, "loss": 17.9158, "step": 3319 }, { "epoch": 0.06068693220246038, "grad_norm": 7.304931624101692, "learning_rate": 9.975355801985885e-06, "loss": 17.7184, "step": 3320 }, { "epoch": 0.0607052113989069, "grad_norm": 6.743292187568647, "learning_rate": 9.975326439747534e-06, "loss": 17.5652, "step": 3321 }, { "epoch": 0.06072349059535343, "grad_norm": 9.112577517205871, "learning_rate": 9.975297060071097e-06, "loss": 18.6824, "step": 3322 }, { "epoch": 0.06074176979179995, "grad_norm": 7.689219605823097, "learning_rate": 9.975267662956674e-06, "loss": 17.776, "step": 3323 }, { "epoch": 0.060760048988246475, "grad_norm": 8.972564384176065, "learning_rate": 9.97523824840437e-06, "loss": 18.6782, "step": 3324 }, { "epoch": 0.060778328184693, "grad_norm": 7.75198143338471, "learning_rate": 9.975208816414288e-06, "loss": 17.868, "step": 3325 }, { "epoch": 0.060796607381139525, "grad_norm": 6.605808021525001, "learning_rate": 9.97517936698653e-06, "loss": 17.6094, "step": 3326 }, { "epoch": 0.06081488657758605, "grad_norm": 6.495524699742615, "learning_rate": 9.975149900121201e-06, "loss": 17.3402, "step": 3327 }, { "epoch": 0.060833165774032576, "grad_norm": 8.519181664837557, "learning_rate": 9.975120415818403e-06, "loss": 18.2129, "step": 3328 }, { "epoch": 0.0608514449704791, "grad_norm": 6.830774350900405, "learning_rate": 9.97509091407824e-06, "loss": 17.6935, "step": 3329 }, { "epoch": 0.06086972416692562, "grad_norm": 6.619043740946671, "learning_rate": 9.975061394900814e-06, "loss": 17.4408, "step": 3330 }, { "epoch": 0.06088800336337215, "grad_norm": 7.538531552487765, "learning_rate": 9.97503185828623e-06, "loss": 18.0834, "step": 3331 }, { "epoch": 0.06090628255981867, "grad_norm": 8.20921101532402, "learning_rate": 9.975002304234593e-06, "loss": 18.2413, "step": 3332 }, { "epoch": 0.06092456175626519, "grad_norm": 9.01460801019503, "learning_rate": 9.974972732746002e-06, "loss": 18.0666, "step": 3333 }, { "epoch": 0.06094284095271172, "grad_norm": 7.724620821664519, "learning_rate": 9.974943143820564e-06, "loss": 17.9919, "step": 3334 }, { "epoch": 0.06096112014915824, "grad_norm": 7.1984340650835215, "learning_rate": 9.974913537458384e-06, "loss": 17.9003, "step": 3335 }, { "epoch": 0.06097939934560477, "grad_norm": 6.1580223199414, "learning_rate": 9.974883913659561e-06, "loss": 17.3667, "step": 3336 }, { "epoch": 0.06099767854205129, "grad_norm": 7.75991150855965, "learning_rate": 9.974854272424203e-06, "loss": 17.8572, "step": 3337 }, { "epoch": 0.061015957738497814, "grad_norm": 7.0164285715245605, "learning_rate": 9.974824613752412e-06, "loss": 17.5536, "step": 3338 }, { "epoch": 0.06103423693494434, "grad_norm": 7.859173707418357, "learning_rate": 9.974794937644292e-06, "loss": 17.8944, "step": 3339 }, { "epoch": 0.061052516131390865, "grad_norm": 7.5441465924666735, "learning_rate": 9.97476524409995e-06, "loss": 17.7754, "step": 3340 }, { "epoch": 0.061070795327837386, "grad_norm": 6.853068682320698, "learning_rate": 9.974735533119485e-06, "loss": 17.7593, "step": 3341 }, { "epoch": 0.061089074524283915, "grad_norm": 7.482705010266999, "learning_rate": 9.974705804703002e-06, "loss": 17.8316, "step": 3342 }, { "epoch": 0.06110735372073044, "grad_norm": 8.469190246912817, "learning_rate": 9.97467605885061e-06, "loss": 18.1225, "step": 3343 }, { "epoch": 0.06112563291717696, "grad_norm": 6.541346402218264, "learning_rate": 9.97464629556241e-06, "loss": 17.1778, "step": 3344 }, { "epoch": 0.06114391211362349, "grad_norm": 8.319928154406021, "learning_rate": 9.974616514838504e-06, "loss": 18.5495, "step": 3345 }, { "epoch": 0.06116219131007001, "grad_norm": 8.379862459573424, "learning_rate": 9.974586716679e-06, "loss": 18.2181, "step": 3346 }, { "epoch": 0.06118047050651653, "grad_norm": 8.57272529587977, "learning_rate": 9.974556901084002e-06, "loss": 18.0759, "step": 3347 }, { "epoch": 0.06119874970296306, "grad_norm": 9.490933256616948, "learning_rate": 9.974527068053613e-06, "loss": 18.5071, "step": 3348 }, { "epoch": 0.06121702889940958, "grad_norm": 8.791459883568644, "learning_rate": 9.97449721758794e-06, "loss": 18.239, "step": 3349 }, { "epoch": 0.0612353080958561, "grad_norm": 7.428366883134674, "learning_rate": 9.974467349687082e-06, "loss": 17.9357, "step": 3350 }, { "epoch": 0.06125358729230263, "grad_norm": 9.111407809345186, "learning_rate": 9.974437464351151e-06, "loss": 18.6755, "step": 3351 }, { "epoch": 0.061271866488749153, "grad_norm": 8.635498076159992, "learning_rate": 9.974407561580248e-06, "loss": 18.2551, "step": 3352 }, { "epoch": 0.06129014568519568, "grad_norm": 6.650086668964261, "learning_rate": 9.974377641374477e-06, "loss": 17.3573, "step": 3353 }, { "epoch": 0.061308424881642204, "grad_norm": 7.135388716063664, "learning_rate": 9.974347703733945e-06, "loss": 17.7386, "step": 3354 }, { "epoch": 0.061326704078088726, "grad_norm": 7.224218446437377, "learning_rate": 9.974317748658754e-06, "loss": 17.7415, "step": 3355 }, { "epoch": 0.061344983274535254, "grad_norm": 6.7564680913700315, "learning_rate": 9.974287776149013e-06, "loss": 17.8325, "step": 3356 }, { "epoch": 0.061363262470981776, "grad_norm": 6.540021535054071, "learning_rate": 9.974257786204826e-06, "loss": 17.4231, "step": 3357 }, { "epoch": 0.0613815416674283, "grad_norm": 6.815830767117462, "learning_rate": 9.974227778826296e-06, "loss": 17.4986, "step": 3358 }, { "epoch": 0.06139982086387483, "grad_norm": 7.129445299972657, "learning_rate": 9.974197754013527e-06, "loss": 17.8041, "step": 3359 }, { "epoch": 0.06141810006032135, "grad_norm": 7.153791344324305, "learning_rate": 9.974167711766629e-06, "loss": 17.6922, "step": 3360 }, { "epoch": 0.06143637925676787, "grad_norm": 8.150271325666516, "learning_rate": 9.974137652085705e-06, "loss": 18.0894, "step": 3361 }, { "epoch": 0.0614546584532144, "grad_norm": 6.76458176038737, "learning_rate": 9.974107574970858e-06, "loss": 17.9596, "step": 3362 }, { "epoch": 0.06147293764966092, "grad_norm": 6.562435612957236, "learning_rate": 9.974077480422197e-06, "loss": 17.5366, "step": 3363 }, { "epoch": 0.06149121684610744, "grad_norm": 8.310894250512407, "learning_rate": 9.974047368439827e-06, "loss": 17.8685, "step": 3364 }, { "epoch": 0.06150949604255397, "grad_norm": 6.424976980092459, "learning_rate": 9.974017239023851e-06, "loss": 17.3777, "step": 3365 }, { "epoch": 0.06152777523900049, "grad_norm": 9.159875494056582, "learning_rate": 9.973987092174377e-06, "loss": 18.3677, "step": 3366 }, { "epoch": 0.061546054435447015, "grad_norm": 9.207276557072449, "learning_rate": 9.97395692789151e-06, "loss": 19.1496, "step": 3367 }, { "epoch": 0.06156433363189354, "grad_norm": 8.280780761104374, "learning_rate": 9.973926746175354e-06, "loss": 18.1527, "step": 3368 }, { "epoch": 0.061582612828340065, "grad_norm": 9.105685183822006, "learning_rate": 9.973896547026019e-06, "loss": 18.2787, "step": 3369 }, { "epoch": 0.061600892024786594, "grad_norm": 7.568047989769413, "learning_rate": 9.973866330443606e-06, "loss": 17.6217, "step": 3370 }, { "epoch": 0.061619171221233116, "grad_norm": 8.525715853579365, "learning_rate": 9.973836096428224e-06, "loss": 17.9766, "step": 3371 }, { "epoch": 0.06163745041767964, "grad_norm": 8.70039255709809, "learning_rate": 9.973805844979978e-06, "loss": 18.2261, "step": 3372 }, { "epoch": 0.061655729614126166, "grad_norm": 7.227871794954165, "learning_rate": 9.973775576098974e-06, "loss": 17.7765, "step": 3373 }, { "epoch": 0.06167400881057269, "grad_norm": 6.400738094315887, "learning_rate": 9.973745289785318e-06, "loss": 17.3244, "step": 3374 }, { "epoch": 0.06169228800701921, "grad_norm": 5.9325840074765885, "learning_rate": 9.973714986039117e-06, "loss": 17.1561, "step": 3375 }, { "epoch": 0.06171056720346574, "grad_norm": 7.481540854368121, "learning_rate": 9.973684664860477e-06, "loss": 17.8805, "step": 3376 }, { "epoch": 0.06172884639991226, "grad_norm": 8.170644267216476, "learning_rate": 9.973654326249502e-06, "loss": 18.4836, "step": 3377 }, { "epoch": 0.06174712559635878, "grad_norm": 6.232161029270701, "learning_rate": 9.973623970206302e-06, "loss": 17.2242, "step": 3378 }, { "epoch": 0.06176540479280531, "grad_norm": 8.752431819138764, "learning_rate": 9.97359359673098e-06, "loss": 18.2505, "step": 3379 }, { "epoch": 0.06178368398925183, "grad_norm": 8.109599262971056, "learning_rate": 9.973563205823645e-06, "loss": 17.9687, "step": 3380 }, { "epoch": 0.061801963185698354, "grad_norm": 8.134697727474459, "learning_rate": 9.973532797484403e-06, "loss": 18.2258, "step": 3381 }, { "epoch": 0.06182024238214488, "grad_norm": 7.605585006406203, "learning_rate": 9.973502371713359e-06, "loss": 18.0899, "step": 3382 }, { "epoch": 0.061838521578591404, "grad_norm": 7.353451548277089, "learning_rate": 9.973471928510621e-06, "loss": 17.7314, "step": 3383 }, { "epoch": 0.061856800775037926, "grad_norm": 7.641356288460134, "learning_rate": 9.973441467876298e-06, "loss": 17.9315, "step": 3384 }, { "epoch": 0.061875079971484455, "grad_norm": 7.034745775133391, "learning_rate": 9.97341098981049e-06, "loss": 17.6319, "step": 3385 }, { "epoch": 0.06189335916793098, "grad_norm": 8.052019579235315, "learning_rate": 9.973380494313312e-06, "loss": 18.3079, "step": 3386 }, { "epoch": 0.061911638364377505, "grad_norm": 7.919250964595605, "learning_rate": 9.973349981384864e-06, "loss": 18.1278, "step": 3387 }, { "epoch": 0.06192991756082403, "grad_norm": 7.239696372922274, "learning_rate": 9.973319451025256e-06, "loss": 17.9743, "step": 3388 }, { "epoch": 0.06194819675727055, "grad_norm": 6.624433741140478, "learning_rate": 9.973288903234597e-06, "loss": 17.6247, "step": 3389 }, { "epoch": 0.06196647595371708, "grad_norm": 7.931092143699261, "learning_rate": 9.97325833801299e-06, "loss": 18.0357, "step": 3390 }, { "epoch": 0.0619847551501636, "grad_norm": 7.642311280538421, "learning_rate": 9.973227755360547e-06, "loss": 17.9035, "step": 3391 }, { "epoch": 0.06200303434661012, "grad_norm": 7.292890780594306, "learning_rate": 9.973197155277368e-06, "loss": 17.7307, "step": 3392 }, { "epoch": 0.06202131354305665, "grad_norm": 7.668085599362952, "learning_rate": 9.973166537763568e-06, "loss": 17.962, "step": 3393 }, { "epoch": 0.06203959273950317, "grad_norm": 7.360767722070579, "learning_rate": 9.973135902819249e-06, "loss": 17.6829, "step": 3394 }, { "epoch": 0.06205787193594969, "grad_norm": 8.017153441352935, "learning_rate": 9.973105250444522e-06, "loss": 18.1401, "step": 3395 }, { "epoch": 0.06207615113239622, "grad_norm": 7.94897649970752, "learning_rate": 9.97307458063949e-06, "loss": 17.9615, "step": 3396 }, { "epoch": 0.062094430328842744, "grad_norm": 6.8966507827622445, "learning_rate": 9.973043893404264e-06, "loss": 17.6592, "step": 3397 }, { "epoch": 0.062112709525289266, "grad_norm": 7.875780027846666, "learning_rate": 9.97301318873895e-06, "loss": 18.0234, "step": 3398 }, { "epoch": 0.062130988721735794, "grad_norm": 8.625055502829102, "learning_rate": 9.97298246664366e-06, "loss": 18.2048, "step": 3399 }, { "epoch": 0.062149267918182316, "grad_norm": 9.224413111984235, "learning_rate": 9.972951727118493e-06, "loss": 18.4906, "step": 3400 }, { "epoch": 0.06216754711462884, "grad_norm": 8.224259994090788, "learning_rate": 9.972920970163566e-06, "loss": 17.6905, "step": 3401 }, { "epoch": 0.062185826311075366, "grad_norm": 8.59232117911046, "learning_rate": 9.972890195778982e-06, "loss": 18.0538, "step": 3402 }, { "epoch": 0.06220410550752189, "grad_norm": 6.533585167274885, "learning_rate": 9.972859403964848e-06, "loss": 17.3578, "step": 3403 }, { "epoch": 0.06222238470396842, "grad_norm": 8.334520291081128, "learning_rate": 9.972828594721272e-06, "loss": 18.3773, "step": 3404 }, { "epoch": 0.06224066390041494, "grad_norm": 8.596086287415487, "learning_rate": 9.972797768048366e-06, "loss": 18.4815, "step": 3405 }, { "epoch": 0.06225894309686146, "grad_norm": 6.803339182577718, "learning_rate": 9.972766923946233e-06, "loss": 17.5434, "step": 3406 }, { "epoch": 0.06227722229330799, "grad_norm": 8.767755354595359, "learning_rate": 9.972736062414985e-06, "loss": 18.2653, "step": 3407 }, { "epoch": 0.06229550148975451, "grad_norm": 8.634460171921882, "learning_rate": 9.972705183454728e-06, "loss": 17.8145, "step": 3408 }, { "epoch": 0.06231378068620103, "grad_norm": 8.138618267251033, "learning_rate": 9.972674287065572e-06, "loss": 17.9175, "step": 3409 }, { "epoch": 0.06233205988264756, "grad_norm": 8.218118739045496, "learning_rate": 9.972643373247622e-06, "loss": 18.3895, "step": 3410 }, { "epoch": 0.06235033907909408, "grad_norm": 7.523550041610053, "learning_rate": 9.97261244200099e-06, "loss": 17.7614, "step": 3411 }, { "epoch": 0.062368618275540605, "grad_norm": 6.8500248444360405, "learning_rate": 9.972581493325781e-06, "loss": 17.5702, "step": 3412 }, { "epoch": 0.062386897471987134, "grad_norm": 7.924404364025476, "learning_rate": 9.972550527222107e-06, "loss": 18.0084, "step": 3413 }, { "epoch": 0.062405176668433655, "grad_norm": 7.130359191753166, "learning_rate": 9.972519543690076e-06, "loss": 17.6316, "step": 3414 }, { "epoch": 0.06242345586488018, "grad_norm": 6.881607236454959, "learning_rate": 9.972488542729795e-06, "loss": 17.5451, "step": 3415 }, { "epoch": 0.062441735061326706, "grad_norm": 6.049960297311258, "learning_rate": 9.972457524341372e-06, "loss": 17.432, "step": 3416 }, { "epoch": 0.06246001425777323, "grad_norm": 5.9836785326342214, "learning_rate": 9.972426488524916e-06, "loss": 17.3387, "step": 3417 }, { "epoch": 0.06247829345421975, "grad_norm": 7.300580050033733, "learning_rate": 9.972395435280539e-06, "loss": 17.7574, "step": 3418 }, { "epoch": 0.06249657265066628, "grad_norm": 7.969434870318104, "learning_rate": 9.972364364608347e-06, "loss": 18.3107, "step": 3419 }, { "epoch": 0.0625148518471128, "grad_norm": 9.14055506522522, "learning_rate": 9.972333276508449e-06, "loss": 18.5925, "step": 3420 }, { "epoch": 0.06253313104355933, "grad_norm": 8.606727375443928, "learning_rate": 9.972302170980953e-06, "loss": 18.408, "step": 3421 }, { "epoch": 0.06255141024000585, "grad_norm": 7.729437250438963, "learning_rate": 9.97227104802597e-06, "loss": 17.821, "step": 3422 }, { "epoch": 0.06256968943645237, "grad_norm": 7.356468589302877, "learning_rate": 9.97223990764361e-06, "loss": 18.1116, "step": 3423 }, { "epoch": 0.0625879686328989, "grad_norm": 6.923663093282652, "learning_rate": 9.97220874983398e-06, "loss": 17.5378, "step": 3424 }, { "epoch": 0.06260624782934542, "grad_norm": 7.690729163444116, "learning_rate": 9.972177574597188e-06, "loss": 17.9849, "step": 3425 }, { "epoch": 0.06262452702579195, "grad_norm": 7.548743588291021, "learning_rate": 9.972146381933348e-06, "loss": 17.9003, "step": 3426 }, { "epoch": 0.06264280622223847, "grad_norm": 9.100394284286327, "learning_rate": 9.972115171842565e-06, "loss": 18.5092, "step": 3427 }, { "epoch": 0.062661085418685, "grad_norm": 8.163577626288586, "learning_rate": 9.972083944324948e-06, "loss": 18.0888, "step": 3428 }, { "epoch": 0.06267936461513152, "grad_norm": 6.535376515979392, "learning_rate": 9.97205269938061e-06, "loss": 17.5992, "step": 3429 }, { "epoch": 0.06269764381157804, "grad_norm": 8.565121635484441, "learning_rate": 9.972021437009659e-06, "loss": 18.2891, "step": 3430 }, { "epoch": 0.06271592300802457, "grad_norm": 7.981921959868312, "learning_rate": 9.971990157212203e-06, "loss": 17.9579, "step": 3431 }, { "epoch": 0.0627342022044711, "grad_norm": 8.589852885032029, "learning_rate": 9.971958859988356e-06, "loss": 18.361, "step": 3432 }, { "epoch": 0.06275248140091762, "grad_norm": 7.383544479894661, "learning_rate": 9.971927545338222e-06, "loss": 17.6967, "step": 3433 }, { "epoch": 0.06277076059736414, "grad_norm": 8.73758303850346, "learning_rate": 9.971896213261913e-06, "loss": 18.295, "step": 3434 }, { "epoch": 0.06278903979381066, "grad_norm": 7.249676291582927, "learning_rate": 9.971864863759539e-06, "loss": 17.9134, "step": 3435 }, { "epoch": 0.06280731899025718, "grad_norm": 6.715105176458013, "learning_rate": 9.971833496831212e-06, "loss": 17.5182, "step": 3436 }, { "epoch": 0.06282559818670372, "grad_norm": 7.243179566201054, "learning_rate": 9.97180211247704e-06, "loss": 17.8722, "step": 3437 }, { "epoch": 0.06284387738315024, "grad_norm": 9.627863930721766, "learning_rate": 9.971770710697132e-06, "loss": 18.5971, "step": 3438 }, { "epoch": 0.06286215657959676, "grad_norm": 7.2162061993454545, "learning_rate": 9.9717392914916e-06, "loss": 17.6061, "step": 3439 }, { "epoch": 0.06288043577604328, "grad_norm": 7.9558665182817165, "learning_rate": 9.971707854860552e-06, "loss": 17.984, "step": 3440 }, { "epoch": 0.0628987149724898, "grad_norm": 7.062158461069705, "learning_rate": 9.9716764008041e-06, "loss": 17.6622, "step": 3441 }, { "epoch": 0.06291699416893633, "grad_norm": 8.35263761669116, "learning_rate": 9.971644929322352e-06, "loss": 18.0986, "step": 3442 }, { "epoch": 0.06293527336538286, "grad_norm": 6.708141528712058, "learning_rate": 9.971613440415423e-06, "loss": 17.6747, "step": 3443 }, { "epoch": 0.06295355256182938, "grad_norm": 8.084755472624233, "learning_rate": 9.971581934083419e-06, "loss": 18.0422, "step": 3444 }, { "epoch": 0.0629718317582759, "grad_norm": 6.789099319257001, "learning_rate": 9.971550410326452e-06, "loss": 17.6722, "step": 3445 }, { "epoch": 0.06299011095472243, "grad_norm": 8.053466806811533, "learning_rate": 9.971518869144632e-06, "loss": 18.0015, "step": 3446 }, { "epoch": 0.06300839015116895, "grad_norm": 7.713966160652883, "learning_rate": 9.971487310538068e-06, "loss": 17.8359, "step": 3447 }, { "epoch": 0.06302666934761549, "grad_norm": 6.868844053402038, "learning_rate": 9.971455734506875e-06, "loss": 17.6808, "step": 3448 }, { "epoch": 0.06304494854406201, "grad_norm": 7.828485730439966, "learning_rate": 9.97142414105116e-06, "loss": 18.2127, "step": 3449 }, { "epoch": 0.06306322774050853, "grad_norm": 7.415642665524057, "learning_rate": 9.971392530171034e-06, "loss": 17.7132, "step": 3450 }, { "epoch": 0.06308150693695505, "grad_norm": 8.8612358790264, "learning_rate": 9.971360901866609e-06, "loss": 18.7788, "step": 3451 }, { "epoch": 0.06309978613340157, "grad_norm": 7.967788386559646, "learning_rate": 9.971329256137996e-06, "loss": 17.5848, "step": 3452 }, { "epoch": 0.0631180653298481, "grad_norm": 7.6248665283854535, "learning_rate": 9.971297592985305e-06, "loss": 17.7204, "step": 3453 }, { "epoch": 0.06313634452629463, "grad_norm": 7.280807643531678, "learning_rate": 9.971265912408647e-06, "loss": 17.9504, "step": 3454 }, { "epoch": 0.06315462372274115, "grad_norm": 9.04288844435571, "learning_rate": 9.971234214408135e-06, "loss": 18.5277, "step": 3455 }, { "epoch": 0.06317290291918767, "grad_norm": 6.844101706341605, "learning_rate": 9.971202498983878e-06, "loss": 17.5276, "step": 3456 }, { "epoch": 0.0631911821156342, "grad_norm": 6.796439532736444, "learning_rate": 9.971170766135986e-06, "loss": 17.5491, "step": 3457 }, { "epoch": 0.06320946131208072, "grad_norm": 8.0182914134866, "learning_rate": 9.971139015864573e-06, "loss": 18.0436, "step": 3458 }, { "epoch": 0.06322774050852724, "grad_norm": 7.014998252217009, "learning_rate": 9.97110724816975e-06, "loss": 17.7785, "step": 3459 }, { "epoch": 0.06324601970497377, "grad_norm": 7.01435714493046, "learning_rate": 9.971075463051625e-06, "loss": 17.6574, "step": 3460 }, { "epoch": 0.0632642989014203, "grad_norm": 7.469032959375107, "learning_rate": 9.971043660510313e-06, "loss": 17.9465, "step": 3461 }, { "epoch": 0.06328257809786682, "grad_norm": 7.121677354722678, "learning_rate": 9.971011840545925e-06, "loss": 17.7377, "step": 3462 }, { "epoch": 0.06330085729431334, "grad_norm": 6.821687248846872, "learning_rate": 9.970980003158573e-06, "loss": 17.4453, "step": 3463 }, { "epoch": 0.06331913649075986, "grad_norm": 8.478505236474815, "learning_rate": 9.970948148348365e-06, "loss": 17.7499, "step": 3464 }, { "epoch": 0.0633374156872064, "grad_norm": 7.661870204257442, "learning_rate": 9.970916276115416e-06, "loss": 17.9667, "step": 3465 }, { "epoch": 0.06335569488365292, "grad_norm": 9.228132847856529, "learning_rate": 9.970884386459835e-06, "loss": 18.3447, "step": 3466 }, { "epoch": 0.06337397408009944, "grad_norm": 7.207955661898897, "learning_rate": 9.970852479381739e-06, "loss": 17.6804, "step": 3467 }, { "epoch": 0.06339225327654596, "grad_norm": 7.677179675585283, "learning_rate": 9.970820554881235e-06, "loss": 17.9184, "step": 3468 }, { "epoch": 0.06341053247299248, "grad_norm": 7.036440996411475, "learning_rate": 9.970788612958435e-06, "loss": 17.6884, "step": 3469 }, { "epoch": 0.063428811669439, "grad_norm": 6.1990578642071785, "learning_rate": 9.970756653613454e-06, "loss": 17.3292, "step": 3470 }, { "epoch": 0.06344709086588554, "grad_norm": 6.978394586225296, "learning_rate": 9.970724676846401e-06, "loss": 17.8265, "step": 3471 }, { "epoch": 0.06346537006233206, "grad_norm": 7.80215406458022, "learning_rate": 9.97069268265739e-06, "loss": 17.8772, "step": 3472 }, { "epoch": 0.06348364925877859, "grad_norm": 6.220208366230502, "learning_rate": 9.970660671046533e-06, "loss": 17.3685, "step": 3473 }, { "epoch": 0.0635019284552251, "grad_norm": 7.043690752149071, "learning_rate": 9.97062864201394e-06, "loss": 17.2635, "step": 3474 }, { "epoch": 0.06352020765167163, "grad_norm": 7.7375271177917435, "learning_rate": 9.970596595559727e-06, "loss": 17.8664, "step": 3475 }, { "epoch": 0.06353848684811815, "grad_norm": 6.803555219732546, "learning_rate": 9.970564531684005e-06, "loss": 17.731, "step": 3476 }, { "epoch": 0.06355676604456469, "grad_norm": 9.905881943875691, "learning_rate": 9.970532450386883e-06, "loss": 18.3341, "step": 3477 }, { "epoch": 0.06357504524101121, "grad_norm": 7.17266199032207, "learning_rate": 9.970500351668476e-06, "loss": 17.8205, "step": 3478 }, { "epoch": 0.06359332443745773, "grad_norm": 8.859449122632668, "learning_rate": 9.970468235528898e-06, "loss": 18.7001, "step": 3479 }, { "epoch": 0.06361160363390425, "grad_norm": 7.684843891034342, "learning_rate": 9.97043610196826e-06, "loss": 17.6728, "step": 3480 }, { "epoch": 0.06362988283035077, "grad_norm": 7.3613624030745965, "learning_rate": 9.970403950986675e-06, "loss": 17.8042, "step": 3481 }, { "epoch": 0.06364816202679731, "grad_norm": 7.933813741236401, "learning_rate": 9.970371782584254e-06, "loss": 17.8566, "step": 3482 }, { "epoch": 0.06366644122324383, "grad_norm": 7.20114050981594, "learning_rate": 9.970339596761113e-06, "loss": 17.7873, "step": 3483 }, { "epoch": 0.06368472041969035, "grad_norm": 6.539420780296843, "learning_rate": 9.970307393517363e-06, "loss": 17.6053, "step": 3484 }, { "epoch": 0.06370299961613687, "grad_norm": 7.198210037702214, "learning_rate": 9.970275172853116e-06, "loss": 17.8825, "step": 3485 }, { "epoch": 0.0637212788125834, "grad_norm": 6.423065344478874, "learning_rate": 9.970242934768486e-06, "loss": 17.1835, "step": 3486 }, { "epoch": 0.06373955800902992, "grad_norm": 9.592006472164796, "learning_rate": 9.970210679263585e-06, "loss": 18.8019, "step": 3487 }, { "epoch": 0.06375783720547645, "grad_norm": 8.381054912742002, "learning_rate": 9.970178406338528e-06, "loss": 17.8104, "step": 3488 }, { "epoch": 0.06377611640192297, "grad_norm": 6.2245779054760995, "learning_rate": 9.970146115993426e-06, "loss": 17.3555, "step": 3489 }, { "epoch": 0.0637943955983695, "grad_norm": 8.6187944119128, "learning_rate": 9.970113808228395e-06, "loss": 18.4059, "step": 3490 }, { "epoch": 0.06381267479481602, "grad_norm": 6.55115468548905, "learning_rate": 9.970081483043545e-06, "loss": 17.3778, "step": 3491 }, { "epoch": 0.06383095399126254, "grad_norm": 7.6721607603954, "learning_rate": 9.970049140438991e-06, "loss": 18.1595, "step": 3492 }, { "epoch": 0.06384923318770906, "grad_norm": 8.173171947955758, "learning_rate": 9.970016780414844e-06, "loss": 18.1178, "step": 3493 }, { "epoch": 0.0638675123841556, "grad_norm": 8.001892357122205, "learning_rate": 9.969984402971223e-06, "loss": 17.9552, "step": 3494 }, { "epoch": 0.06388579158060212, "grad_norm": 8.106211428069072, "learning_rate": 9.969952008108236e-06, "loss": 17.9411, "step": 3495 }, { "epoch": 0.06390407077704864, "grad_norm": 6.928555843489604, "learning_rate": 9.969919595825999e-06, "loss": 17.5931, "step": 3496 }, { "epoch": 0.06392234997349516, "grad_norm": 9.80360507054187, "learning_rate": 9.969887166124625e-06, "loss": 18.8677, "step": 3497 }, { "epoch": 0.06394062916994168, "grad_norm": 6.728350926935754, "learning_rate": 9.969854719004227e-06, "loss": 17.6105, "step": 3498 }, { "epoch": 0.06395890836638822, "grad_norm": 7.422988096770044, "learning_rate": 9.96982225446492e-06, "loss": 17.9216, "step": 3499 }, { "epoch": 0.06397718756283474, "grad_norm": 8.024234797780116, "learning_rate": 9.969789772506817e-06, "loss": 18.2083, "step": 3500 }, { "epoch": 0.06399546675928126, "grad_norm": 8.045249821909978, "learning_rate": 9.969757273130032e-06, "loss": 18.256, "step": 3501 }, { "epoch": 0.06401374595572779, "grad_norm": 7.986518270825566, "learning_rate": 9.96972475633468e-06, "loss": 18.283, "step": 3502 }, { "epoch": 0.06403202515217431, "grad_norm": 7.573442982924717, "learning_rate": 9.969692222120875e-06, "loss": 18.0723, "step": 3503 }, { "epoch": 0.06405030434862083, "grad_norm": 7.784550718432774, "learning_rate": 9.969659670488728e-06, "loss": 17.801, "step": 3504 }, { "epoch": 0.06406858354506736, "grad_norm": 8.831415996964335, "learning_rate": 9.969627101438356e-06, "loss": 17.9731, "step": 3505 }, { "epoch": 0.06408686274151389, "grad_norm": 7.652962288303948, "learning_rate": 9.969594514969871e-06, "loss": 17.7879, "step": 3506 }, { "epoch": 0.06410514193796041, "grad_norm": 7.566719122832205, "learning_rate": 9.96956191108339e-06, "loss": 17.9537, "step": 3507 }, { "epoch": 0.06412342113440693, "grad_norm": 6.850498608584895, "learning_rate": 9.969529289779024e-06, "loss": 17.5989, "step": 3508 }, { "epoch": 0.06414170033085345, "grad_norm": 7.642963557059988, "learning_rate": 9.96949665105689e-06, "loss": 18.1601, "step": 3509 }, { "epoch": 0.06415997952729997, "grad_norm": 6.92207305517525, "learning_rate": 9.9694639949171e-06, "loss": 17.8639, "step": 3510 }, { "epoch": 0.06417825872374651, "grad_norm": 6.9900197831105135, "learning_rate": 9.969431321359773e-06, "loss": 18.0146, "step": 3511 }, { "epoch": 0.06419653792019303, "grad_norm": 7.962592126136193, "learning_rate": 9.969398630385019e-06, "loss": 18.0781, "step": 3512 }, { "epoch": 0.06421481711663955, "grad_norm": 8.439722553655928, "learning_rate": 9.969365921992955e-06, "loss": 17.9793, "step": 3513 }, { "epoch": 0.06423309631308607, "grad_norm": 7.4717803840423835, "learning_rate": 9.969333196183693e-06, "loss": 17.9807, "step": 3514 }, { "epoch": 0.0642513755095326, "grad_norm": 7.821943658249452, "learning_rate": 9.96930045295735e-06, "loss": 18.2078, "step": 3515 }, { "epoch": 0.06426965470597913, "grad_norm": 7.15850318514372, "learning_rate": 9.969267692314039e-06, "loss": 17.624, "step": 3516 }, { "epoch": 0.06428793390242565, "grad_norm": 7.166171418224422, "learning_rate": 9.969234914253877e-06, "loss": 17.6763, "step": 3517 }, { "epoch": 0.06430621309887218, "grad_norm": 7.645142143798253, "learning_rate": 9.969202118776979e-06, "loss": 18.1204, "step": 3518 }, { "epoch": 0.0643244922953187, "grad_norm": 7.800160231317408, "learning_rate": 9.969169305883458e-06, "loss": 17.9998, "step": 3519 }, { "epoch": 0.06434277149176522, "grad_norm": 6.9924045145022955, "learning_rate": 9.969136475573429e-06, "loss": 17.7088, "step": 3520 }, { "epoch": 0.06436105068821174, "grad_norm": 7.876068104956508, "learning_rate": 9.969103627847008e-06, "loss": 17.8554, "step": 3521 }, { "epoch": 0.06437932988465828, "grad_norm": 7.203601413722107, "learning_rate": 9.969070762704311e-06, "loss": 17.8915, "step": 3522 }, { "epoch": 0.0643976090811048, "grad_norm": 7.71843516495048, "learning_rate": 9.96903788014545e-06, "loss": 18.3282, "step": 3523 }, { "epoch": 0.06441588827755132, "grad_norm": 7.691478339940075, "learning_rate": 9.969004980170546e-06, "loss": 18.2412, "step": 3524 }, { "epoch": 0.06443416747399784, "grad_norm": 6.741784960109834, "learning_rate": 9.968972062779708e-06, "loss": 17.612, "step": 3525 }, { "epoch": 0.06445244667044436, "grad_norm": 9.279574291796338, "learning_rate": 9.968939127973055e-06, "loss": 17.8691, "step": 3526 }, { "epoch": 0.06447072586689089, "grad_norm": 8.218887362358476, "learning_rate": 9.9689061757507e-06, "loss": 18.3941, "step": 3527 }, { "epoch": 0.06448900506333742, "grad_norm": 9.030415553386225, "learning_rate": 9.968873206112764e-06, "loss": 18.4235, "step": 3528 }, { "epoch": 0.06450728425978394, "grad_norm": 8.968290073544493, "learning_rate": 9.968840219059355e-06, "loss": 18.5045, "step": 3529 }, { "epoch": 0.06452556345623046, "grad_norm": 6.169962621475131, "learning_rate": 9.968807214590592e-06, "loss": 17.5488, "step": 3530 }, { "epoch": 0.06454384265267699, "grad_norm": 8.821595914720909, "learning_rate": 9.968774192706593e-06, "loss": 18.3084, "step": 3531 }, { "epoch": 0.06456212184912351, "grad_norm": 6.222977406102921, "learning_rate": 9.96874115340747e-06, "loss": 17.3397, "step": 3532 }, { "epoch": 0.06458040104557004, "grad_norm": 7.376944655592562, "learning_rate": 9.968708096693343e-06, "loss": 17.4941, "step": 3533 }, { "epoch": 0.06459868024201657, "grad_norm": 7.297119054188936, "learning_rate": 9.968675022564322e-06, "loss": 17.7489, "step": 3534 }, { "epoch": 0.06461695943846309, "grad_norm": 6.189290074236147, "learning_rate": 9.968641931020528e-06, "loss": 17.1373, "step": 3535 }, { "epoch": 0.06463523863490961, "grad_norm": 7.243176252691162, "learning_rate": 9.968608822062075e-06, "loss": 17.8784, "step": 3536 }, { "epoch": 0.06465351783135613, "grad_norm": 7.9600401586531495, "learning_rate": 9.968575695689078e-06, "loss": 18.1522, "step": 3537 }, { "epoch": 0.06467179702780265, "grad_norm": 8.094219192503523, "learning_rate": 9.968542551901657e-06, "loss": 18.0917, "step": 3538 }, { "epoch": 0.06469007622424919, "grad_norm": 6.900213326533915, "learning_rate": 9.968509390699923e-06, "loss": 17.6893, "step": 3539 }, { "epoch": 0.06470835542069571, "grad_norm": 7.771757318070501, "learning_rate": 9.968476212083994e-06, "loss": 17.6794, "step": 3540 }, { "epoch": 0.06472663461714223, "grad_norm": 7.796128646024294, "learning_rate": 9.96844301605399e-06, "loss": 18.0542, "step": 3541 }, { "epoch": 0.06474491381358875, "grad_norm": 6.333926310879374, "learning_rate": 9.968409802610024e-06, "loss": 17.492, "step": 3542 }, { "epoch": 0.06476319301003527, "grad_norm": 7.893493169383418, "learning_rate": 9.96837657175221e-06, "loss": 17.8904, "step": 3543 }, { "epoch": 0.0647814722064818, "grad_norm": 7.682323622852044, "learning_rate": 9.96834332348067e-06, "loss": 18.0747, "step": 3544 }, { "epoch": 0.06479975140292833, "grad_norm": 7.6343847131599825, "learning_rate": 9.968310057795516e-06, "loss": 17.9874, "step": 3545 }, { "epoch": 0.06481803059937485, "grad_norm": 7.70931668635223, "learning_rate": 9.968276774696867e-06, "loss": 18.1432, "step": 3546 }, { "epoch": 0.06483630979582138, "grad_norm": 7.042406319373183, "learning_rate": 9.96824347418484e-06, "loss": 17.7371, "step": 3547 }, { "epoch": 0.0648545889922679, "grad_norm": 7.032820555371767, "learning_rate": 9.96821015625955e-06, "loss": 17.6376, "step": 3548 }, { "epoch": 0.06487286818871442, "grad_norm": 8.465692286677994, "learning_rate": 9.968176820921113e-06, "loss": 18.5406, "step": 3549 }, { "epoch": 0.06489114738516095, "grad_norm": 8.325907663854666, "learning_rate": 9.968143468169651e-06, "loss": 17.8953, "step": 3550 }, { "epoch": 0.06490942658160748, "grad_norm": 6.391093030845671, "learning_rate": 9.968110098005274e-06, "loss": 17.4928, "step": 3551 }, { "epoch": 0.064927705778054, "grad_norm": 8.014652913343959, "learning_rate": 9.968076710428103e-06, "loss": 18.3309, "step": 3552 }, { "epoch": 0.06494598497450052, "grad_norm": 7.211976385444955, "learning_rate": 9.968043305438256e-06, "loss": 17.7902, "step": 3553 }, { "epoch": 0.06496426417094704, "grad_norm": 8.320694553004552, "learning_rate": 9.968009883035847e-06, "loss": 18.1843, "step": 3554 }, { "epoch": 0.06498254336739356, "grad_norm": 7.695379480450924, "learning_rate": 9.967976443220994e-06, "loss": 18.1156, "step": 3555 }, { "epoch": 0.0650008225638401, "grad_norm": 7.5360398867864555, "learning_rate": 9.967942985993815e-06, "loss": 17.8755, "step": 3556 }, { "epoch": 0.06501910176028662, "grad_norm": 7.394783515888027, "learning_rate": 9.967909511354427e-06, "loss": 17.8335, "step": 3557 }, { "epoch": 0.06503738095673314, "grad_norm": 6.878805886311911, "learning_rate": 9.967876019302947e-06, "loss": 17.758, "step": 3558 }, { "epoch": 0.06505566015317966, "grad_norm": 7.069124085978708, "learning_rate": 9.967842509839493e-06, "loss": 17.4327, "step": 3559 }, { "epoch": 0.06507393934962619, "grad_norm": 7.8435432108511085, "learning_rate": 9.967808982964183e-06, "loss": 17.4035, "step": 3560 }, { "epoch": 0.06509221854607271, "grad_norm": 8.897590660997398, "learning_rate": 9.967775438677131e-06, "loss": 18.4507, "step": 3561 }, { "epoch": 0.06511049774251924, "grad_norm": 7.665305953256472, "learning_rate": 9.967741876978459e-06, "loss": 17.8019, "step": 3562 }, { "epoch": 0.06512877693896577, "grad_norm": 7.7893858413132655, "learning_rate": 9.967708297868282e-06, "loss": 17.9123, "step": 3563 }, { "epoch": 0.06514705613541229, "grad_norm": 7.407263317066761, "learning_rate": 9.96767470134672e-06, "loss": 17.5609, "step": 3564 }, { "epoch": 0.06516533533185881, "grad_norm": 8.131104674091887, "learning_rate": 9.967641087413888e-06, "loss": 18.0436, "step": 3565 }, { "epoch": 0.06518361452830533, "grad_norm": 7.596401755151315, "learning_rate": 9.967607456069905e-06, "loss": 17.6687, "step": 3566 }, { "epoch": 0.06520189372475187, "grad_norm": 7.74646869203929, "learning_rate": 9.96757380731489e-06, "loss": 17.9164, "step": 3567 }, { "epoch": 0.06522017292119839, "grad_norm": 7.045659198787928, "learning_rate": 9.967540141148959e-06, "loss": 17.8673, "step": 3568 }, { "epoch": 0.06523845211764491, "grad_norm": 8.146337097205732, "learning_rate": 9.96750645757223e-06, "loss": 17.9246, "step": 3569 }, { "epoch": 0.06525673131409143, "grad_norm": 8.405984154371131, "learning_rate": 9.967472756584823e-06, "loss": 18.0451, "step": 3570 }, { "epoch": 0.06527501051053795, "grad_norm": 6.775109016848172, "learning_rate": 9.967439038186855e-06, "loss": 17.445, "step": 3571 }, { "epoch": 0.06529328970698448, "grad_norm": 7.400385393023171, "learning_rate": 9.967405302378444e-06, "loss": 17.738, "step": 3572 }, { "epoch": 0.06531156890343101, "grad_norm": 7.833313423241914, "learning_rate": 9.96737154915971e-06, "loss": 18.1323, "step": 3573 }, { "epoch": 0.06532984809987753, "grad_norm": 7.106764032621514, "learning_rate": 9.967337778530769e-06, "loss": 17.7625, "step": 3574 }, { "epoch": 0.06534812729632405, "grad_norm": 10.22666235137378, "learning_rate": 9.967303990491738e-06, "loss": 18.1572, "step": 3575 }, { "epoch": 0.06536640649277058, "grad_norm": 8.277992830655526, "learning_rate": 9.96727018504274e-06, "loss": 18.3004, "step": 3576 }, { "epoch": 0.0653846856892171, "grad_norm": 7.567160386383156, "learning_rate": 9.96723636218389e-06, "loss": 17.5543, "step": 3577 }, { "epoch": 0.06540296488566362, "grad_norm": 7.00449520450629, "learning_rate": 9.967202521915307e-06, "loss": 17.7054, "step": 3578 }, { "epoch": 0.06542124408211016, "grad_norm": 7.73123805219894, "learning_rate": 9.96716866423711e-06, "loss": 17.8919, "step": 3579 }, { "epoch": 0.06543952327855668, "grad_norm": 6.885900831776829, "learning_rate": 9.967134789149419e-06, "loss": 17.3299, "step": 3580 }, { "epoch": 0.0654578024750032, "grad_norm": 8.112229552955466, "learning_rate": 9.967100896652352e-06, "loss": 17.5914, "step": 3581 }, { "epoch": 0.06547608167144972, "grad_norm": 6.4199801399130765, "learning_rate": 9.967066986746026e-06, "loss": 17.2268, "step": 3582 }, { "epoch": 0.06549436086789624, "grad_norm": 7.675054421496131, "learning_rate": 9.967033059430562e-06, "loss": 17.9806, "step": 3583 }, { "epoch": 0.06551264006434278, "grad_norm": 7.478719143186732, "learning_rate": 9.96699911470608e-06, "loss": 17.7654, "step": 3584 }, { "epoch": 0.0655309192607893, "grad_norm": 8.405844893480488, "learning_rate": 9.966965152572694e-06, "loss": 18.1927, "step": 3585 }, { "epoch": 0.06554919845723582, "grad_norm": 7.736951161334362, "learning_rate": 9.966931173030528e-06, "loss": 18.1067, "step": 3586 }, { "epoch": 0.06556747765368234, "grad_norm": 7.903216192741018, "learning_rate": 9.9668971760797e-06, "loss": 17.7961, "step": 3587 }, { "epoch": 0.06558575685012887, "grad_norm": 8.324500951199788, "learning_rate": 9.966863161720326e-06, "loss": 18.0361, "step": 3588 }, { "epoch": 0.06560403604657539, "grad_norm": 6.013914722533633, "learning_rate": 9.966829129952528e-06, "loss": 17.18, "step": 3589 }, { "epoch": 0.06562231524302192, "grad_norm": 8.545617171751642, "learning_rate": 9.966795080776425e-06, "loss": 18.2204, "step": 3590 }, { "epoch": 0.06564059443946844, "grad_norm": 8.526968088011222, "learning_rate": 9.966761014192138e-06, "loss": 18.2618, "step": 3591 }, { "epoch": 0.06565887363591497, "grad_norm": 6.06931638243933, "learning_rate": 9.966726930199784e-06, "loss": 17.3711, "step": 3592 }, { "epoch": 0.06567715283236149, "grad_norm": 9.795067314208612, "learning_rate": 9.966692828799483e-06, "loss": 18.8647, "step": 3593 }, { "epoch": 0.06569543202880801, "grad_norm": 7.331816713163978, "learning_rate": 9.966658709991352e-06, "loss": 17.7088, "step": 3594 }, { "epoch": 0.06571371122525453, "grad_norm": 7.639954185195284, "learning_rate": 9.966624573775517e-06, "loss": 18.0959, "step": 3595 }, { "epoch": 0.06573199042170107, "grad_norm": 7.700168932923506, "learning_rate": 9.96659042015209e-06, "loss": 18.3082, "step": 3596 }, { "epoch": 0.06575026961814759, "grad_norm": 9.13690457500145, "learning_rate": 9.966556249121199e-06, "loss": 18.2918, "step": 3597 }, { "epoch": 0.06576854881459411, "grad_norm": 7.598414664686019, "learning_rate": 9.966522060682957e-06, "loss": 18.0308, "step": 3598 }, { "epoch": 0.06578682801104063, "grad_norm": 7.102651558654934, "learning_rate": 9.966487854837485e-06, "loss": 17.8665, "step": 3599 }, { "epoch": 0.06580510720748715, "grad_norm": 6.876939283695511, "learning_rate": 9.966453631584906e-06, "loss": 17.6633, "step": 3600 }, { "epoch": 0.06582338640393369, "grad_norm": 8.127901195429326, "learning_rate": 9.966419390925336e-06, "loss": 18.1879, "step": 3601 }, { "epoch": 0.06584166560038021, "grad_norm": 8.314206067549518, "learning_rate": 9.9663851328589e-06, "loss": 18.4616, "step": 3602 }, { "epoch": 0.06585994479682673, "grad_norm": 7.930742515215014, "learning_rate": 9.966350857385714e-06, "loss": 18.1745, "step": 3603 }, { "epoch": 0.06587822399327325, "grad_norm": 7.938715755344231, "learning_rate": 9.966316564505897e-06, "loss": 18.0051, "step": 3604 }, { "epoch": 0.06589650318971978, "grad_norm": 7.412150240046248, "learning_rate": 9.966282254219575e-06, "loss": 17.49, "step": 3605 }, { "epoch": 0.0659147823861663, "grad_norm": 8.901639329323402, "learning_rate": 9.966247926526862e-06, "loss": 18.5454, "step": 3606 }, { "epoch": 0.06593306158261283, "grad_norm": 7.632601527695011, "learning_rate": 9.96621358142788e-06, "loss": 18.0513, "step": 3607 }, { "epoch": 0.06595134077905936, "grad_norm": 7.813855987024707, "learning_rate": 9.966179218922754e-06, "loss": 17.5087, "step": 3608 }, { "epoch": 0.06596961997550588, "grad_norm": 7.983668250431304, "learning_rate": 9.966144839011597e-06, "loss": 18.0178, "step": 3609 }, { "epoch": 0.0659878991719524, "grad_norm": 7.489202970740367, "learning_rate": 9.966110441694536e-06, "loss": 17.7378, "step": 3610 }, { "epoch": 0.06600617836839892, "grad_norm": 6.415050654753606, "learning_rate": 9.966076026971688e-06, "loss": 17.3449, "step": 3611 }, { "epoch": 0.06602445756484544, "grad_norm": 7.864728428559402, "learning_rate": 9.966041594843175e-06, "loss": 18.1784, "step": 3612 }, { "epoch": 0.06604273676129198, "grad_norm": 7.576611071022064, "learning_rate": 9.966007145309115e-06, "loss": 17.6168, "step": 3613 }, { "epoch": 0.0660610159577385, "grad_norm": 7.670906182898218, "learning_rate": 9.965972678369633e-06, "loss": 18.0569, "step": 3614 }, { "epoch": 0.06607929515418502, "grad_norm": 7.881212668285415, "learning_rate": 9.965938194024846e-06, "loss": 18.0989, "step": 3615 }, { "epoch": 0.06609757435063154, "grad_norm": 7.617339270567452, "learning_rate": 9.965903692274878e-06, "loss": 18.0277, "step": 3616 }, { "epoch": 0.06611585354707807, "grad_norm": 7.220682379213172, "learning_rate": 9.965869173119849e-06, "loss": 17.8478, "step": 3617 }, { "epoch": 0.0661341327435246, "grad_norm": 7.9974297402963614, "learning_rate": 9.96583463655988e-06, "loss": 18.1157, "step": 3618 }, { "epoch": 0.06615241193997112, "grad_norm": 8.25383357327032, "learning_rate": 9.96580008259509e-06, "loss": 18.1823, "step": 3619 }, { "epoch": 0.06617069113641764, "grad_norm": 9.085583064693404, "learning_rate": 9.9657655112256e-06, "loss": 18.2514, "step": 3620 }, { "epoch": 0.06618897033286417, "grad_norm": 8.827278744364282, "learning_rate": 9.965730922451535e-06, "loss": 18.6595, "step": 3621 }, { "epoch": 0.06620724952931069, "grad_norm": 6.40800024285416, "learning_rate": 9.965696316273013e-06, "loss": 17.4722, "step": 3622 }, { "epoch": 0.06622552872575721, "grad_norm": 7.322330542321388, "learning_rate": 9.965661692690158e-06, "loss": 17.9229, "step": 3623 }, { "epoch": 0.06624380792220375, "grad_norm": 8.63134294616613, "learning_rate": 9.965627051703088e-06, "loss": 18.2308, "step": 3624 }, { "epoch": 0.06626208711865027, "grad_norm": 8.221315315450456, "learning_rate": 9.965592393311927e-06, "loss": 18.0214, "step": 3625 }, { "epoch": 0.06628036631509679, "grad_norm": 7.948911081925447, "learning_rate": 9.965557717516794e-06, "loss": 17.9538, "step": 3626 }, { "epoch": 0.06629864551154331, "grad_norm": 7.98964185910891, "learning_rate": 9.965523024317814e-06, "loss": 18.4565, "step": 3627 }, { "epoch": 0.06631692470798983, "grad_norm": 6.571445289451289, "learning_rate": 9.965488313715107e-06, "loss": 17.5406, "step": 3628 }, { "epoch": 0.06633520390443635, "grad_norm": 7.668087049288176, "learning_rate": 9.965453585708791e-06, "loss": 17.8423, "step": 3629 }, { "epoch": 0.06635348310088289, "grad_norm": 9.127646705308155, "learning_rate": 9.965418840298995e-06, "loss": 18.2477, "step": 3630 }, { "epoch": 0.06637176229732941, "grad_norm": 7.320556338144031, "learning_rate": 9.965384077485834e-06, "loss": 17.6702, "step": 3631 }, { "epoch": 0.06639004149377593, "grad_norm": 8.18728420801698, "learning_rate": 9.965349297269435e-06, "loss": 18.4822, "step": 3632 }, { "epoch": 0.06640832069022246, "grad_norm": 8.238194702977536, "learning_rate": 9.965314499649917e-06, "loss": 17.9435, "step": 3633 }, { "epoch": 0.06642659988666898, "grad_norm": 7.770663751371553, "learning_rate": 9.9652796846274e-06, "loss": 17.8512, "step": 3634 }, { "epoch": 0.06644487908311551, "grad_norm": 6.931472580092162, "learning_rate": 9.965244852202011e-06, "loss": 17.6323, "step": 3635 }, { "epoch": 0.06646315827956203, "grad_norm": 7.978645203749096, "learning_rate": 9.965210002373869e-06, "loss": 18.0182, "step": 3636 }, { "epoch": 0.06648143747600856, "grad_norm": 7.060176683857077, "learning_rate": 9.965175135143098e-06, "loss": 17.7228, "step": 3637 }, { "epoch": 0.06649971667245508, "grad_norm": 6.91300065249346, "learning_rate": 9.965140250509818e-06, "loss": 18.0372, "step": 3638 }, { "epoch": 0.0665179958689016, "grad_norm": 7.198185264977327, "learning_rate": 9.965105348474153e-06, "loss": 17.6783, "step": 3639 }, { "epoch": 0.06653627506534812, "grad_norm": 7.877918246822304, "learning_rate": 9.965070429036223e-06, "loss": 18.0394, "step": 3640 }, { "epoch": 0.06655455426179466, "grad_norm": 7.018533476143148, "learning_rate": 9.965035492196154e-06, "loss": 17.5641, "step": 3641 }, { "epoch": 0.06657283345824118, "grad_norm": 7.8852153569181525, "learning_rate": 9.965000537954064e-06, "loss": 17.7846, "step": 3642 }, { "epoch": 0.0665911126546877, "grad_norm": 7.746320258295745, "learning_rate": 9.96496556631008e-06, "loss": 17.9326, "step": 3643 }, { "epoch": 0.06660939185113422, "grad_norm": 7.22849019265795, "learning_rate": 9.964930577264323e-06, "loss": 17.7381, "step": 3644 }, { "epoch": 0.06662767104758074, "grad_norm": 7.310177395722805, "learning_rate": 9.964895570816912e-06, "loss": 17.9833, "step": 3645 }, { "epoch": 0.06664595024402727, "grad_norm": 6.936029700317219, "learning_rate": 9.964860546967976e-06, "loss": 17.7155, "step": 3646 }, { "epoch": 0.0666642294404738, "grad_norm": 7.711533693081667, "learning_rate": 9.964825505717633e-06, "loss": 18.1309, "step": 3647 }, { "epoch": 0.06668250863692032, "grad_norm": 7.779978389906678, "learning_rate": 9.964790447066008e-06, "loss": 17.9141, "step": 3648 }, { "epoch": 0.06670078783336685, "grad_norm": 7.150644276614255, "learning_rate": 9.964755371013225e-06, "loss": 17.8392, "step": 3649 }, { "epoch": 0.06671906702981337, "grad_norm": 8.12025679040642, "learning_rate": 9.964720277559401e-06, "loss": 18.2699, "step": 3650 }, { "epoch": 0.06673734622625989, "grad_norm": 7.177619422772814, "learning_rate": 9.964685166704666e-06, "loss": 17.6499, "step": 3651 }, { "epoch": 0.06675562542270642, "grad_norm": 6.934823447479643, "learning_rate": 9.964650038449139e-06, "loss": 17.798, "step": 3652 }, { "epoch": 0.06677390461915295, "grad_norm": 6.407166485372936, "learning_rate": 9.964614892792948e-06, "loss": 17.2369, "step": 3653 }, { "epoch": 0.06679218381559947, "grad_norm": 8.633064407304595, "learning_rate": 9.96457972973621e-06, "loss": 18.3923, "step": 3654 }, { "epoch": 0.06681046301204599, "grad_norm": 7.686353050424286, "learning_rate": 9.96454454927905e-06, "loss": 17.9912, "step": 3655 }, { "epoch": 0.06682874220849251, "grad_norm": 7.543979592879128, "learning_rate": 9.964509351421593e-06, "loss": 17.9041, "step": 3656 }, { "epoch": 0.06684702140493903, "grad_norm": 7.682844748231194, "learning_rate": 9.964474136163961e-06, "loss": 18.1909, "step": 3657 }, { "epoch": 0.06686530060138557, "grad_norm": 7.542047685677389, "learning_rate": 9.96443890350628e-06, "loss": 17.5756, "step": 3658 }, { "epoch": 0.06688357979783209, "grad_norm": 7.144669916419882, "learning_rate": 9.964403653448669e-06, "loss": 17.7512, "step": 3659 }, { "epoch": 0.06690185899427861, "grad_norm": 7.516784141610715, "learning_rate": 9.964368385991254e-06, "loss": 17.7277, "step": 3660 }, { "epoch": 0.06692013819072513, "grad_norm": 6.809284532807876, "learning_rate": 9.964333101134161e-06, "loss": 17.6542, "step": 3661 }, { "epoch": 0.06693841738717166, "grad_norm": 8.016323300616692, "learning_rate": 9.964297798877509e-06, "loss": 17.992, "step": 3662 }, { "epoch": 0.06695669658361818, "grad_norm": 7.236700239267418, "learning_rate": 9.964262479221424e-06, "loss": 17.6687, "step": 3663 }, { "epoch": 0.06697497578006471, "grad_norm": 8.445816640265996, "learning_rate": 9.964227142166033e-06, "loss": 18.187, "step": 3664 }, { "epoch": 0.06699325497651124, "grad_norm": 6.898403266475036, "learning_rate": 9.964191787711453e-06, "loss": 17.3689, "step": 3665 }, { "epoch": 0.06701153417295776, "grad_norm": 7.603489925944102, "learning_rate": 9.964156415857813e-06, "loss": 18.1952, "step": 3666 }, { "epoch": 0.06702981336940428, "grad_norm": 7.130434251094245, "learning_rate": 9.964121026605236e-06, "loss": 17.8387, "step": 3667 }, { "epoch": 0.0670480925658508, "grad_norm": 7.173812988187238, "learning_rate": 9.964085619953845e-06, "loss": 17.7426, "step": 3668 }, { "epoch": 0.06706637176229734, "grad_norm": 7.663182796601119, "learning_rate": 9.964050195903764e-06, "loss": 17.9353, "step": 3669 }, { "epoch": 0.06708465095874386, "grad_norm": 7.781673745246008, "learning_rate": 9.964014754455117e-06, "loss": 17.958, "step": 3670 }, { "epoch": 0.06710293015519038, "grad_norm": 6.557163244961542, "learning_rate": 9.963979295608031e-06, "loss": 17.3812, "step": 3671 }, { "epoch": 0.0671212093516369, "grad_norm": 8.194036113397624, "learning_rate": 9.963943819362628e-06, "loss": 18.3379, "step": 3672 }, { "epoch": 0.06713948854808342, "grad_norm": 7.0195021885865065, "learning_rate": 9.963908325719034e-06, "loss": 17.6027, "step": 3673 }, { "epoch": 0.06715776774452994, "grad_norm": 6.562676858630681, "learning_rate": 9.963872814677369e-06, "loss": 17.4976, "step": 3674 }, { "epoch": 0.06717604694097648, "grad_norm": 7.478989990416041, "learning_rate": 9.963837286237763e-06, "loss": 17.799, "step": 3675 }, { "epoch": 0.067194326137423, "grad_norm": 8.057254508520662, "learning_rate": 9.963801740400336e-06, "loss": 18.1576, "step": 3676 }, { "epoch": 0.06721260533386952, "grad_norm": 7.333838327509158, "learning_rate": 9.963766177165216e-06, "loss": 17.8813, "step": 3677 }, { "epoch": 0.06723088453031605, "grad_norm": 7.007514023860895, "learning_rate": 9.963730596532526e-06, "loss": 17.8473, "step": 3678 }, { "epoch": 0.06724916372676257, "grad_norm": 8.248034560183765, "learning_rate": 9.963694998502392e-06, "loss": 17.9892, "step": 3679 }, { "epoch": 0.06726744292320909, "grad_norm": 7.664262096770969, "learning_rate": 9.963659383074937e-06, "loss": 17.9312, "step": 3680 }, { "epoch": 0.06728572211965562, "grad_norm": 7.487524861207348, "learning_rate": 9.963623750250285e-06, "loss": 17.9139, "step": 3681 }, { "epoch": 0.06730400131610215, "grad_norm": 7.707866711653239, "learning_rate": 9.963588100028565e-06, "loss": 17.8018, "step": 3682 }, { "epoch": 0.06732228051254867, "grad_norm": 7.78447837652025, "learning_rate": 9.963552432409897e-06, "loss": 17.9661, "step": 3683 }, { "epoch": 0.06734055970899519, "grad_norm": 8.626494614214739, "learning_rate": 9.963516747394411e-06, "loss": 18.0817, "step": 3684 }, { "epoch": 0.06735883890544171, "grad_norm": 7.607945441358762, "learning_rate": 9.963481044982227e-06, "loss": 18.0393, "step": 3685 }, { "epoch": 0.06737711810188825, "grad_norm": 6.902426308636471, "learning_rate": 9.963445325173473e-06, "loss": 17.4743, "step": 3686 }, { "epoch": 0.06739539729833477, "grad_norm": 8.311685125637775, "learning_rate": 9.963409587968276e-06, "loss": 17.8539, "step": 3687 }, { "epoch": 0.06741367649478129, "grad_norm": 7.048286794631574, "learning_rate": 9.963373833366758e-06, "loss": 18.0255, "step": 3688 }, { "epoch": 0.06743195569122781, "grad_norm": 7.1412325068410105, "learning_rate": 9.963338061369045e-06, "loss": 18.0242, "step": 3689 }, { "epoch": 0.06745023488767433, "grad_norm": 7.348625419574826, "learning_rate": 9.963302271975263e-06, "loss": 17.469, "step": 3690 }, { "epoch": 0.06746851408412086, "grad_norm": 16.995275294386243, "learning_rate": 9.963266465185536e-06, "loss": 18.8894, "step": 3691 }, { "epoch": 0.06748679328056739, "grad_norm": 7.533170174551889, "learning_rate": 9.963230640999993e-06, "loss": 17.7024, "step": 3692 }, { "epoch": 0.06750507247701391, "grad_norm": 7.248560779489402, "learning_rate": 9.963194799418755e-06, "loss": 17.7748, "step": 3693 }, { "epoch": 0.06752335167346044, "grad_norm": 8.587210185826397, "learning_rate": 9.963158940441953e-06, "loss": 18.3314, "step": 3694 }, { "epoch": 0.06754163086990696, "grad_norm": 8.66856194071746, "learning_rate": 9.963123064069707e-06, "loss": 18.6316, "step": 3695 }, { "epoch": 0.06755991006635348, "grad_norm": 6.8005485687612275, "learning_rate": 9.963087170302146e-06, "loss": 17.366, "step": 3696 }, { "epoch": 0.0675781892628, "grad_norm": 7.590585740008545, "learning_rate": 9.963051259139395e-06, "loss": 17.9532, "step": 3697 }, { "epoch": 0.06759646845924654, "grad_norm": 7.415068222981645, "learning_rate": 9.963015330581581e-06, "loss": 17.8159, "step": 3698 }, { "epoch": 0.06761474765569306, "grad_norm": 7.445670101225243, "learning_rate": 9.962979384628829e-06, "loss": 17.7052, "step": 3699 }, { "epoch": 0.06763302685213958, "grad_norm": 6.826785358678163, "learning_rate": 9.962943421281264e-06, "loss": 17.737, "step": 3700 }, { "epoch": 0.0676513060485861, "grad_norm": 7.432857338679918, "learning_rate": 9.962907440539012e-06, "loss": 18.2263, "step": 3701 }, { "epoch": 0.06766958524503262, "grad_norm": 6.261111150705873, "learning_rate": 9.962871442402203e-06, "loss": 17.2048, "step": 3702 }, { "epoch": 0.06768786444147916, "grad_norm": 7.454775341619104, "learning_rate": 9.962835426870958e-06, "loss": 17.999, "step": 3703 }, { "epoch": 0.06770614363792568, "grad_norm": 7.56542599926941, "learning_rate": 9.962799393945406e-06, "loss": 18.0887, "step": 3704 }, { "epoch": 0.0677244228343722, "grad_norm": 7.967020769632536, "learning_rate": 9.962763343625672e-06, "loss": 17.9031, "step": 3705 }, { "epoch": 0.06774270203081872, "grad_norm": 7.667758830754452, "learning_rate": 9.962727275911883e-06, "loss": 17.941, "step": 3706 }, { "epoch": 0.06776098122726525, "grad_norm": 7.1873448141762335, "learning_rate": 9.962691190804167e-06, "loss": 17.7057, "step": 3707 }, { "epoch": 0.06777926042371177, "grad_norm": 7.984551617788175, "learning_rate": 9.962655088302648e-06, "loss": 18.1008, "step": 3708 }, { "epoch": 0.0677975396201583, "grad_norm": 7.350720256152015, "learning_rate": 9.962618968407455e-06, "loss": 17.7906, "step": 3709 }, { "epoch": 0.06781581881660483, "grad_norm": 8.235914722238629, "learning_rate": 9.962582831118712e-06, "loss": 18.0387, "step": 3710 }, { "epoch": 0.06783409801305135, "grad_norm": 7.3465049296915215, "learning_rate": 9.962546676436547e-06, "loss": 18.0271, "step": 3711 }, { "epoch": 0.06785237720949787, "grad_norm": 9.091010809250884, "learning_rate": 9.962510504361087e-06, "loss": 18.4861, "step": 3712 }, { "epoch": 0.06787065640594439, "grad_norm": 6.606151229758446, "learning_rate": 9.962474314892456e-06, "loss": 17.4758, "step": 3713 }, { "epoch": 0.06788893560239091, "grad_norm": 8.364753569679415, "learning_rate": 9.962438108030786e-06, "loss": 18.0549, "step": 3714 }, { "epoch": 0.06790721479883745, "grad_norm": 5.8204707977722965, "learning_rate": 9.9624018837762e-06, "loss": 17.1909, "step": 3715 }, { "epoch": 0.06792549399528397, "grad_norm": 8.532616712133498, "learning_rate": 9.962365642128825e-06, "loss": 18.3254, "step": 3716 }, { "epoch": 0.06794377319173049, "grad_norm": 7.6865609996747155, "learning_rate": 9.962329383088791e-06, "loss": 18.0385, "step": 3717 }, { "epoch": 0.06796205238817701, "grad_norm": 8.483761109157722, "learning_rate": 9.962293106656222e-06, "loss": 18.2805, "step": 3718 }, { "epoch": 0.06798033158462354, "grad_norm": 7.960116500492674, "learning_rate": 9.962256812831245e-06, "loss": 18.2548, "step": 3719 }, { "epoch": 0.06799861078107007, "grad_norm": 7.351339077773364, "learning_rate": 9.962220501613991e-06, "loss": 17.7659, "step": 3720 }, { "epoch": 0.06801688997751659, "grad_norm": 7.306086424667269, "learning_rate": 9.962184173004583e-06, "loss": 17.9074, "step": 3721 }, { "epoch": 0.06803516917396311, "grad_norm": 7.366303798764439, "learning_rate": 9.962147827003152e-06, "loss": 17.5614, "step": 3722 }, { "epoch": 0.06805344837040964, "grad_norm": 7.477827781803198, "learning_rate": 9.962111463609822e-06, "loss": 17.7797, "step": 3723 }, { "epoch": 0.06807172756685616, "grad_norm": 8.984259687353106, "learning_rate": 9.962075082824722e-06, "loss": 18.0538, "step": 3724 }, { "epoch": 0.06809000676330268, "grad_norm": 6.958962392731023, "learning_rate": 9.96203868464798e-06, "loss": 17.5696, "step": 3725 }, { "epoch": 0.06810828595974922, "grad_norm": 6.8283761609229074, "learning_rate": 9.962002269079722e-06, "loss": 17.5826, "step": 3726 }, { "epoch": 0.06812656515619574, "grad_norm": 7.9269979599067355, "learning_rate": 9.961965836120076e-06, "loss": 17.9868, "step": 3727 }, { "epoch": 0.06814484435264226, "grad_norm": 7.339234417282792, "learning_rate": 9.961929385769172e-06, "loss": 17.7234, "step": 3728 }, { "epoch": 0.06816312354908878, "grad_norm": 7.82796076854723, "learning_rate": 9.961892918027136e-06, "loss": 18.6493, "step": 3729 }, { "epoch": 0.0681814027455353, "grad_norm": 7.671431642947557, "learning_rate": 9.961856432894096e-06, "loss": 17.8904, "step": 3730 }, { "epoch": 0.06819968194198182, "grad_norm": 8.118278044873199, "learning_rate": 9.961819930370178e-06, "loss": 18.332, "step": 3731 }, { "epoch": 0.06821796113842836, "grad_norm": 7.163604470953865, "learning_rate": 9.961783410455514e-06, "loss": 17.7846, "step": 3732 }, { "epoch": 0.06823624033487488, "grad_norm": 7.088079444102592, "learning_rate": 9.961746873150228e-06, "loss": 17.6912, "step": 3733 }, { "epoch": 0.0682545195313214, "grad_norm": 8.259109711126358, "learning_rate": 9.96171031845445e-06, "loss": 18.3023, "step": 3734 }, { "epoch": 0.06827279872776792, "grad_norm": 6.390717546983588, "learning_rate": 9.961673746368307e-06, "loss": 17.4842, "step": 3735 }, { "epoch": 0.06829107792421445, "grad_norm": 8.54189722197786, "learning_rate": 9.961637156891931e-06, "loss": 18.6318, "step": 3736 }, { "epoch": 0.06830935712066098, "grad_norm": 8.003033235527171, "learning_rate": 9.961600550025445e-06, "loss": 18.1477, "step": 3737 }, { "epoch": 0.0683276363171075, "grad_norm": 6.786485587457048, "learning_rate": 9.961563925768981e-06, "loss": 17.5007, "step": 3738 }, { "epoch": 0.06834591551355403, "grad_norm": 9.1781443473539, "learning_rate": 9.961527284122665e-06, "loss": 18.4901, "step": 3739 }, { "epoch": 0.06836419471000055, "grad_norm": 6.747170850842066, "learning_rate": 9.961490625086627e-06, "loss": 17.6464, "step": 3740 }, { "epoch": 0.06838247390644707, "grad_norm": 9.24932579666635, "learning_rate": 9.961453948660995e-06, "loss": 18.4559, "step": 3741 }, { "epoch": 0.06840075310289359, "grad_norm": 8.075774718250974, "learning_rate": 9.961417254845897e-06, "loss": 18.0175, "step": 3742 }, { "epoch": 0.06841903229934013, "grad_norm": 7.389772889445342, "learning_rate": 9.961380543641461e-06, "loss": 17.7848, "step": 3743 }, { "epoch": 0.06843731149578665, "grad_norm": 7.2004802013132885, "learning_rate": 9.961343815047818e-06, "loss": 17.598, "step": 3744 }, { "epoch": 0.06845559069223317, "grad_norm": 8.669813104855471, "learning_rate": 9.961307069065096e-06, "loss": 18.007, "step": 3745 }, { "epoch": 0.06847386988867969, "grad_norm": 7.563564649745429, "learning_rate": 9.961270305693422e-06, "loss": 17.961, "step": 3746 }, { "epoch": 0.06849214908512621, "grad_norm": 7.855342442049126, "learning_rate": 9.961233524932927e-06, "loss": 18.0859, "step": 3747 }, { "epoch": 0.06851042828157274, "grad_norm": 6.835256251563989, "learning_rate": 9.961196726783738e-06, "loss": 17.5802, "step": 3748 }, { "epoch": 0.06852870747801927, "grad_norm": 9.09076091118749, "learning_rate": 9.961159911245986e-06, "loss": 18.1888, "step": 3749 }, { "epoch": 0.06854698667446579, "grad_norm": 8.082008770565626, "learning_rate": 9.961123078319798e-06, "loss": 18.0489, "step": 3750 }, { "epoch": 0.06856526587091231, "grad_norm": 7.637977939444455, "learning_rate": 9.961086228005305e-06, "loss": 17.9384, "step": 3751 }, { "epoch": 0.06858354506735884, "grad_norm": 8.208068112705568, "learning_rate": 9.961049360302634e-06, "loss": 18.1634, "step": 3752 }, { "epoch": 0.06860182426380536, "grad_norm": 7.719268232487972, "learning_rate": 9.961012475211917e-06, "loss": 18.1333, "step": 3753 }, { "epoch": 0.0686201034602519, "grad_norm": 7.367828581028043, "learning_rate": 9.960975572733282e-06, "loss": 17.7244, "step": 3754 }, { "epoch": 0.06863838265669842, "grad_norm": 7.456793050592895, "learning_rate": 9.960938652866857e-06, "loss": 17.7094, "step": 3755 }, { "epoch": 0.06865666185314494, "grad_norm": 7.800264409200894, "learning_rate": 9.960901715612774e-06, "loss": 18.1531, "step": 3756 }, { "epoch": 0.06867494104959146, "grad_norm": 6.7314809857282665, "learning_rate": 9.960864760971159e-06, "loss": 17.6805, "step": 3757 }, { "epoch": 0.06869322024603798, "grad_norm": 7.6788020934336405, "learning_rate": 9.960827788942144e-06, "loss": 17.6742, "step": 3758 }, { "epoch": 0.0687114994424845, "grad_norm": 8.146205801876379, "learning_rate": 9.96079079952586e-06, "loss": 18.6369, "step": 3759 }, { "epoch": 0.06872977863893104, "grad_norm": 8.26115784078957, "learning_rate": 9.960753792722432e-06, "loss": 18.5603, "step": 3760 }, { "epoch": 0.06874805783537756, "grad_norm": 9.019812898584123, "learning_rate": 9.960716768531993e-06, "loss": 18.0269, "step": 3761 }, { "epoch": 0.06876633703182408, "grad_norm": 6.465686684918821, "learning_rate": 9.960679726954674e-06, "loss": 17.5717, "step": 3762 }, { "epoch": 0.0687846162282706, "grad_norm": 8.125270644039192, "learning_rate": 9.960642667990601e-06, "loss": 18.1933, "step": 3763 }, { "epoch": 0.06880289542471713, "grad_norm": 8.771975936087479, "learning_rate": 9.960605591639908e-06, "loss": 18.4901, "step": 3764 }, { "epoch": 0.06882117462116365, "grad_norm": 6.339127645571222, "learning_rate": 9.96056849790272e-06, "loss": 17.5171, "step": 3765 }, { "epoch": 0.06883945381761018, "grad_norm": 6.245842990050287, "learning_rate": 9.960531386779173e-06, "loss": 17.5162, "step": 3766 }, { "epoch": 0.0688577330140567, "grad_norm": 7.009487915994345, "learning_rate": 9.960494258269391e-06, "loss": 17.6565, "step": 3767 }, { "epoch": 0.06887601221050323, "grad_norm": 8.15864628189637, "learning_rate": 9.96045711237351e-06, "loss": 17.8422, "step": 3768 }, { "epoch": 0.06889429140694975, "grad_norm": 8.073448513013414, "learning_rate": 9.960419949091655e-06, "loss": 18.0036, "step": 3769 }, { "epoch": 0.06891257060339627, "grad_norm": 7.674304767211397, "learning_rate": 9.96038276842396e-06, "loss": 18.26, "step": 3770 }, { "epoch": 0.0689308497998428, "grad_norm": 7.740456753546985, "learning_rate": 9.960345570370552e-06, "loss": 18.2854, "step": 3771 }, { "epoch": 0.06894912899628933, "grad_norm": 7.139536987911921, "learning_rate": 9.960308354931564e-06, "loss": 17.8847, "step": 3772 }, { "epoch": 0.06896740819273585, "grad_norm": 6.721470384251413, "learning_rate": 9.960271122107127e-06, "loss": 17.5681, "step": 3773 }, { "epoch": 0.06898568738918237, "grad_norm": 7.326324475271985, "learning_rate": 9.96023387189737e-06, "loss": 17.8968, "step": 3774 }, { "epoch": 0.06900396658562889, "grad_norm": 7.8901041168503045, "learning_rate": 9.960196604302421e-06, "loss": 17.9237, "step": 3775 }, { "epoch": 0.06902224578207541, "grad_norm": 9.181082980181333, "learning_rate": 9.960159319322415e-06, "loss": 18.29, "step": 3776 }, { "epoch": 0.06904052497852195, "grad_norm": 7.311520840686915, "learning_rate": 9.960122016957483e-06, "loss": 17.7929, "step": 3777 }, { "epoch": 0.06905880417496847, "grad_norm": 6.928751391376276, "learning_rate": 9.96008469720775e-06, "loss": 17.477, "step": 3778 }, { "epoch": 0.069077083371415, "grad_norm": 8.172636126545248, "learning_rate": 9.960047360073353e-06, "loss": 18.1091, "step": 3779 }, { "epoch": 0.06909536256786152, "grad_norm": 8.057727424639237, "learning_rate": 9.96001000555442e-06, "loss": 17.8632, "step": 3780 }, { "epoch": 0.06911364176430804, "grad_norm": 7.684672935232596, "learning_rate": 9.959972633651083e-06, "loss": 18.0447, "step": 3781 }, { "epoch": 0.06913192096075456, "grad_norm": 7.216855034027728, "learning_rate": 9.95993524436347e-06, "loss": 17.7419, "step": 3782 }, { "epoch": 0.0691502001572011, "grad_norm": 7.853531177231895, "learning_rate": 9.959897837691717e-06, "loss": 18.1424, "step": 3783 }, { "epoch": 0.06916847935364762, "grad_norm": 7.374683926499741, "learning_rate": 9.959860413635951e-06, "loss": 17.782, "step": 3784 }, { "epoch": 0.06918675855009414, "grad_norm": 6.963507141610772, "learning_rate": 9.959822972196306e-06, "loss": 17.3137, "step": 3785 }, { "epoch": 0.06920503774654066, "grad_norm": 7.795562780346318, "learning_rate": 9.95978551337291e-06, "loss": 17.886, "step": 3786 }, { "epoch": 0.06922331694298718, "grad_norm": 8.110133946860046, "learning_rate": 9.959748037165897e-06, "loss": 18.2596, "step": 3787 }, { "epoch": 0.06924159613943372, "grad_norm": 7.645011338232504, "learning_rate": 9.959710543575397e-06, "loss": 17.8402, "step": 3788 }, { "epoch": 0.06925987533588024, "grad_norm": 6.859118651318195, "learning_rate": 9.959673032601544e-06, "loss": 17.7896, "step": 3789 }, { "epoch": 0.06927815453232676, "grad_norm": 7.007663177435595, "learning_rate": 9.959635504244466e-06, "loss": 17.732, "step": 3790 }, { "epoch": 0.06929643372877328, "grad_norm": 7.4868070815628815, "learning_rate": 9.959597958504296e-06, "loss": 17.9148, "step": 3791 }, { "epoch": 0.0693147129252198, "grad_norm": 6.576444156646345, "learning_rate": 9.959560395381166e-06, "loss": 17.5448, "step": 3792 }, { "epoch": 0.06933299212166633, "grad_norm": 6.707517850683992, "learning_rate": 9.959522814875206e-06, "loss": 17.8627, "step": 3793 }, { "epoch": 0.06935127131811286, "grad_norm": 6.707322458438496, "learning_rate": 9.959485216986548e-06, "loss": 17.4698, "step": 3794 }, { "epoch": 0.06936955051455938, "grad_norm": 7.485522659872726, "learning_rate": 9.959447601715328e-06, "loss": 17.8513, "step": 3795 }, { "epoch": 0.0693878297110059, "grad_norm": 7.275228060747942, "learning_rate": 9.959409969061673e-06, "loss": 17.8599, "step": 3796 }, { "epoch": 0.06940610890745243, "grad_norm": 7.809365144659854, "learning_rate": 9.959372319025715e-06, "loss": 17.8961, "step": 3797 }, { "epoch": 0.06942438810389895, "grad_norm": 6.528049873087958, "learning_rate": 9.959334651607589e-06, "loss": 17.2796, "step": 3798 }, { "epoch": 0.06944266730034547, "grad_norm": 6.793507717491632, "learning_rate": 9.959296966807425e-06, "loss": 17.607, "step": 3799 }, { "epoch": 0.069460946496792, "grad_norm": 5.979708116887712, "learning_rate": 9.959259264625355e-06, "loss": 17.1363, "step": 3800 }, { "epoch": 0.06947922569323853, "grad_norm": 7.522838490297659, "learning_rate": 9.959221545061512e-06, "loss": 17.9469, "step": 3801 }, { "epoch": 0.06949750488968505, "grad_norm": 7.249327032656113, "learning_rate": 9.959183808116028e-06, "loss": 17.6304, "step": 3802 }, { "epoch": 0.06951578408613157, "grad_norm": 8.86934517787543, "learning_rate": 9.959146053789035e-06, "loss": 18.454, "step": 3803 }, { "epoch": 0.06953406328257809, "grad_norm": 8.022226081210857, "learning_rate": 9.959108282080665e-06, "loss": 18.1153, "step": 3804 }, { "epoch": 0.06955234247902463, "grad_norm": 6.8823684446573665, "learning_rate": 9.959070492991052e-06, "loss": 17.4093, "step": 3805 }, { "epoch": 0.06957062167547115, "grad_norm": 7.2076966204602, "learning_rate": 9.959032686520326e-06, "loss": 17.7794, "step": 3806 }, { "epoch": 0.06958890087191767, "grad_norm": 7.8839595532464415, "learning_rate": 9.95899486266862e-06, "loss": 18.0942, "step": 3807 }, { "epoch": 0.0696071800683642, "grad_norm": 8.653682956089938, "learning_rate": 9.95895702143607e-06, "loss": 18.2604, "step": 3808 }, { "epoch": 0.06962545926481072, "grad_norm": 9.14225694344878, "learning_rate": 9.958919162822805e-06, "loss": 18.6332, "step": 3809 }, { "epoch": 0.06964373846125724, "grad_norm": 7.246660931457984, "learning_rate": 9.958881286828956e-06, "loss": 17.7692, "step": 3810 }, { "epoch": 0.06966201765770377, "grad_norm": 7.45850903341891, "learning_rate": 9.95884339345466e-06, "loss": 18.0857, "step": 3811 }, { "epoch": 0.0696802968541503, "grad_norm": 8.270664122826734, "learning_rate": 9.95880548270005e-06, "loss": 18.2097, "step": 3812 }, { "epoch": 0.06969857605059682, "grad_norm": 7.304124010185371, "learning_rate": 9.958767554565256e-06, "loss": 17.8819, "step": 3813 }, { "epoch": 0.06971685524704334, "grad_norm": 6.074939583078227, "learning_rate": 9.958729609050412e-06, "loss": 17.3484, "step": 3814 }, { "epoch": 0.06973513444348986, "grad_norm": 7.497330253821664, "learning_rate": 9.95869164615565e-06, "loss": 18.0494, "step": 3815 }, { "epoch": 0.06975341363993638, "grad_norm": 7.511123475752274, "learning_rate": 9.958653665881106e-06, "loss": 17.6961, "step": 3816 }, { "epoch": 0.06977169283638292, "grad_norm": 7.020476709462503, "learning_rate": 9.95861566822691e-06, "loss": 17.5925, "step": 3817 }, { "epoch": 0.06978997203282944, "grad_norm": 7.613458171651699, "learning_rate": 9.958577653193195e-06, "loss": 18.1222, "step": 3818 }, { "epoch": 0.06980825122927596, "grad_norm": 6.7776175462570425, "learning_rate": 9.9585396207801e-06, "loss": 17.5001, "step": 3819 }, { "epoch": 0.06982653042572248, "grad_norm": 7.048944765465509, "learning_rate": 9.958501570987748e-06, "loss": 17.6315, "step": 3820 }, { "epoch": 0.069844809622169, "grad_norm": 6.996495382862664, "learning_rate": 9.958463503816283e-06, "loss": 17.7513, "step": 3821 }, { "epoch": 0.06986308881861554, "grad_norm": 6.207166277887507, "learning_rate": 9.958425419265831e-06, "loss": 17.2869, "step": 3822 }, { "epoch": 0.06988136801506206, "grad_norm": 7.289063728499782, "learning_rate": 9.958387317336529e-06, "loss": 18.0618, "step": 3823 }, { "epoch": 0.06989964721150858, "grad_norm": 6.313155286089765, "learning_rate": 9.95834919802851e-06, "loss": 17.2904, "step": 3824 }, { "epoch": 0.0699179264079551, "grad_norm": 7.962502631421321, "learning_rate": 9.958311061341908e-06, "loss": 17.9601, "step": 3825 }, { "epoch": 0.06993620560440163, "grad_norm": 8.332043311595505, "learning_rate": 9.958272907276855e-06, "loss": 18.1206, "step": 3826 }, { "epoch": 0.06995448480084815, "grad_norm": 7.400477058760804, "learning_rate": 9.958234735833485e-06, "loss": 17.89, "step": 3827 }, { "epoch": 0.06997276399729468, "grad_norm": 8.613183284713479, "learning_rate": 9.958196547011934e-06, "loss": 18.0946, "step": 3828 }, { "epoch": 0.0699910431937412, "grad_norm": 7.362058331686377, "learning_rate": 9.958158340812333e-06, "loss": 17.6704, "step": 3829 }, { "epoch": 0.07000932239018773, "grad_norm": 8.011238170474346, "learning_rate": 9.958120117234818e-06, "loss": 18.2923, "step": 3830 }, { "epoch": 0.07002760158663425, "grad_norm": 7.26819504568335, "learning_rate": 9.958081876279523e-06, "loss": 18.0602, "step": 3831 }, { "epoch": 0.07004588078308077, "grad_norm": 6.505793670949363, "learning_rate": 9.95804361794658e-06, "loss": 17.2733, "step": 3832 }, { "epoch": 0.0700641599795273, "grad_norm": 8.085243306574576, "learning_rate": 9.958005342236126e-06, "loss": 18.3954, "step": 3833 }, { "epoch": 0.07008243917597383, "grad_norm": 8.094394056718064, "learning_rate": 9.957967049148292e-06, "loss": 18.2679, "step": 3834 }, { "epoch": 0.07010071837242035, "grad_norm": 8.984286766048601, "learning_rate": 9.957928738683213e-06, "loss": 18.5315, "step": 3835 }, { "epoch": 0.07011899756886687, "grad_norm": 7.361178826776351, "learning_rate": 9.957890410841026e-06, "loss": 17.8741, "step": 3836 }, { "epoch": 0.0701372767653134, "grad_norm": 8.090209621958795, "learning_rate": 9.957852065621862e-06, "loss": 18.0183, "step": 3837 }, { "epoch": 0.07015555596175992, "grad_norm": 7.679469249415488, "learning_rate": 9.957813703025857e-06, "loss": 17.987, "step": 3838 }, { "epoch": 0.07017383515820645, "grad_norm": 7.349116689247304, "learning_rate": 9.957775323053146e-06, "loss": 17.8111, "step": 3839 }, { "epoch": 0.07019211435465297, "grad_norm": 9.154605941566015, "learning_rate": 9.957736925703863e-06, "loss": 18.5949, "step": 3840 }, { "epoch": 0.0702103935510995, "grad_norm": 8.187506651271368, "learning_rate": 9.95769851097814e-06, "loss": 18.2681, "step": 3841 }, { "epoch": 0.07022867274754602, "grad_norm": 5.924782325222919, "learning_rate": 9.957660078876116e-06, "loss": 17.3512, "step": 3842 }, { "epoch": 0.07024695194399254, "grad_norm": 6.55882708458667, "learning_rate": 9.957621629397921e-06, "loss": 17.4009, "step": 3843 }, { "epoch": 0.07026523114043906, "grad_norm": 7.487796551520118, "learning_rate": 9.957583162543696e-06, "loss": 17.9639, "step": 3844 }, { "epoch": 0.0702835103368856, "grad_norm": 7.275007051117932, "learning_rate": 9.95754467831357e-06, "loss": 17.5755, "step": 3845 }, { "epoch": 0.07030178953333212, "grad_norm": 12.569893684604146, "learning_rate": 9.95750617670768e-06, "loss": 17.2282, "step": 3846 }, { "epoch": 0.07032006872977864, "grad_norm": 8.240543272249862, "learning_rate": 9.957467657726163e-06, "loss": 17.7524, "step": 3847 }, { "epoch": 0.07033834792622516, "grad_norm": 7.302420800858163, "learning_rate": 9.95742912136915e-06, "loss": 17.7059, "step": 3848 }, { "epoch": 0.07035662712267168, "grad_norm": 7.840809494218581, "learning_rate": 9.95739056763678e-06, "loss": 17.8699, "step": 3849 }, { "epoch": 0.0703749063191182, "grad_norm": 7.174100674831677, "learning_rate": 9.957351996529185e-06, "loss": 17.8066, "step": 3850 }, { "epoch": 0.07039318551556474, "grad_norm": 8.467747837492595, "learning_rate": 9.9573134080465e-06, "loss": 18.4042, "step": 3851 }, { "epoch": 0.07041146471201126, "grad_norm": 5.958143703909017, "learning_rate": 9.957274802188864e-06, "loss": 17.1516, "step": 3852 }, { "epoch": 0.07042974390845778, "grad_norm": 6.857922505161306, "learning_rate": 9.95723617895641e-06, "loss": 17.5791, "step": 3853 }, { "epoch": 0.0704480231049043, "grad_norm": 8.567716149969353, "learning_rate": 9.957197538349273e-06, "loss": 18.0442, "step": 3854 }, { "epoch": 0.07046630230135083, "grad_norm": 8.980549231024167, "learning_rate": 9.957158880367589e-06, "loss": 17.886, "step": 3855 }, { "epoch": 0.07048458149779736, "grad_norm": 6.891651257083474, "learning_rate": 9.957120205011492e-06, "loss": 17.8852, "step": 3856 }, { "epoch": 0.07050286069424389, "grad_norm": 8.484493154188359, "learning_rate": 9.95708151228112e-06, "loss": 18.456, "step": 3857 }, { "epoch": 0.0705211398906904, "grad_norm": 6.898141184519592, "learning_rate": 9.957042802176606e-06, "loss": 17.7343, "step": 3858 }, { "epoch": 0.07053941908713693, "grad_norm": 6.88328401354471, "learning_rate": 9.957004074698088e-06, "loss": 17.7004, "step": 3859 }, { "epoch": 0.07055769828358345, "grad_norm": 6.211656798973414, "learning_rate": 9.956965329845701e-06, "loss": 17.2023, "step": 3860 }, { "epoch": 0.07057597748002997, "grad_norm": 8.845461155674911, "learning_rate": 9.95692656761958e-06, "loss": 18.26, "step": 3861 }, { "epoch": 0.07059425667647651, "grad_norm": 7.889961654619663, "learning_rate": 9.956887788019863e-06, "loss": 17.9673, "step": 3862 }, { "epoch": 0.07061253587292303, "grad_norm": 7.44714448615704, "learning_rate": 9.956848991046682e-06, "loss": 18.1349, "step": 3863 }, { "epoch": 0.07063081506936955, "grad_norm": 6.90674757093021, "learning_rate": 9.956810176700178e-06, "loss": 17.8154, "step": 3864 }, { "epoch": 0.07064909426581607, "grad_norm": 8.046083685464394, "learning_rate": 9.956771344980483e-06, "loss": 17.9673, "step": 3865 }, { "epoch": 0.0706673734622626, "grad_norm": 7.859387615224365, "learning_rate": 9.956732495887736e-06, "loss": 18.0598, "step": 3866 }, { "epoch": 0.07068565265870912, "grad_norm": 7.641056445194432, "learning_rate": 9.95669362942207e-06, "loss": 18.1409, "step": 3867 }, { "epoch": 0.07070393185515565, "grad_norm": 8.94055665340746, "learning_rate": 9.956654745583623e-06, "loss": 18.8027, "step": 3868 }, { "epoch": 0.07072221105160217, "grad_norm": 6.691974164414335, "learning_rate": 9.956615844372532e-06, "loss": 17.6638, "step": 3869 }, { "epoch": 0.0707404902480487, "grad_norm": 6.057754083197547, "learning_rate": 9.956576925788934e-06, "loss": 17.2145, "step": 3870 }, { "epoch": 0.07075876944449522, "grad_norm": 7.604425340976951, "learning_rate": 9.956537989832961e-06, "loss": 17.7097, "step": 3871 }, { "epoch": 0.07077704864094174, "grad_norm": 6.472158783529807, "learning_rate": 9.956499036504754e-06, "loss": 17.372, "step": 3872 }, { "epoch": 0.07079532783738827, "grad_norm": 6.465712103051698, "learning_rate": 9.956460065804449e-06, "loss": 17.5418, "step": 3873 }, { "epoch": 0.0708136070338348, "grad_norm": 6.280648814493716, "learning_rate": 9.95642107773218e-06, "loss": 17.2867, "step": 3874 }, { "epoch": 0.07083188623028132, "grad_norm": 7.0407610117133945, "learning_rate": 9.956382072288086e-06, "loss": 17.6109, "step": 3875 }, { "epoch": 0.07085016542672784, "grad_norm": 7.994480393016992, "learning_rate": 9.956343049472302e-06, "loss": 17.8335, "step": 3876 }, { "epoch": 0.07086844462317436, "grad_norm": 7.107490375781242, "learning_rate": 9.956304009284968e-06, "loss": 17.9981, "step": 3877 }, { "epoch": 0.07088672381962088, "grad_norm": 6.842196758503717, "learning_rate": 9.956264951726216e-06, "loss": 17.5187, "step": 3878 }, { "epoch": 0.07090500301606742, "grad_norm": 8.76140509406288, "learning_rate": 9.956225876796185e-06, "loss": 18.4938, "step": 3879 }, { "epoch": 0.07092328221251394, "grad_norm": 8.632549706370371, "learning_rate": 9.956186784495014e-06, "loss": 18.1769, "step": 3880 }, { "epoch": 0.07094156140896046, "grad_norm": 6.453701664390098, "learning_rate": 9.956147674822838e-06, "loss": 17.4795, "step": 3881 }, { "epoch": 0.07095984060540698, "grad_norm": 6.693906139208951, "learning_rate": 9.956108547779795e-06, "loss": 17.4784, "step": 3882 }, { "epoch": 0.0709781198018535, "grad_norm": 8.410888034663625, "learning_rate": 9.956069403366022e-06, "loss": 18.2885, "step": 3883 }, { "epoch": 0.07099639899830003, "grad_norm": 7.609992702552747, "learning_rate": 9.956030241581655e-06, "loss": 18.289, "step": 3884 }, { "epoch": 0.07101467819474656, "grad_norm": 7.361900219256501, "learning_rate": 9.955991062426831e-06, "loss": 18.0587, "step": 3885 }, { "epoch": 0.07103295739119309, "grad_norm": 7.517307551589767, "learning_rate": 9.955951865901689e-06, "loss": 17.9746, "step": 3886 }, { "epoch": 0.07105123658763961, "grad_norm": 7.414760374856745, "learning_rate": 9.955912652006368e-06, "loss": 17.9533, "step": 3887 }, { "epoch": 0.07106951578408613, "grad_norm": 6.804474401638567, "learning_rate": 9.955873420741001e-06, "loss": 17.9495, "step": 3888 }, { "epoch": 0.07108779498053265, "grad_norm": 8.028523634313913, "learning_rate": 9.955834172105728e-06, "loss": 17.9176, "step": 3889 }, { "epoch": 0.07110607417697919, "grad_norm": 7.412426346205773, "learning_rate": 9.955794906100687e-06, "loss": 17.7117, "step": 3890 }, { "epoch": 0.07112435337342571, "grad_norm": 8.429497997814615, "learning_rate": 9.955755622726013e-06, "loss": 18.1698, "step": 3891 }, { "epoch": 0.07114263256987223, "grad_norm": 7.746424875395029, "learning_rate": 9.955716321981847e-06, "loss": 18.2551, "step": 3892 }, { "epoch": 0.07116091176631875, "grad_norm": 6.949464628106947, "learning_rate": 9.955677003868326e-06, "loss": 17.7316, "step": 3893 }, { "epoch": 0.07117919096276527, "grad_norm": 6.86136952791035, "learning_rate": 9.955637668385585e-06, "loss": 17.5325, "step": 3894 }, { "epoch": 0.0711974701592118, "grad_norm": 7.210314086051575, "learning_rate": 9.955598315533767e-06, "loss": 17.4802, "step": 3895 }, { "epoch": 0.07121574935565833, "grad_norm": 8.799671632444277, "learning_rate": 9.955558945313005e-06, "loss": 18.1703, "step": 3896 }, { "epoch": 0.07123402855210485, "grad_norm": 9.348757079785866, "learning_rate": 9.955519557723438e-06, "loss": 18.8315, "step": 3897 }, { "epoch": 0.07125230774855137, "grad_norm": 7.394544857227317, "learning_rate": 9.955480152765206e-06, "loss": 17.5749, "step": 3898 }, { "epoch": 0.0712705869449979, "grad_norm": 8.004974836420036, "learning_rate": 9.955440730438448e-06, "loss": 18.1448, "step": 3899 }, { "epoch": 0.07128886614144442, "grad_norm": 6.7621017788199325, "learning_rate": 9.9554012907433e-06, "loss": 17.7479, "step": 3900 }, { "epoch": 0.07130714533789094, "grad_norm": 6.965136473977121, "learning_rate": 9.955361833679898e-06, "loss": 17.7969, "step": 3901 }, { "epoch": 0.07132542453433748, "grad_norm": 6.460864792601465, "learning_rate": 9.955322359248382e-06, "loss": 17.5655, "step": 3902 }, { "epoch": 0.071343703730784, "grad_norm": 7.078763293573598, "learning_rate": 9.955282867448893e-06, "loss": 17.7961, "step": 3903 }, { "epoch": 0.07136198292723052, "grad_norm": 7.453499886376171, "learning_rate": 9.955243358281567e-06, "loss": 17.8465, "step": 3904 }, { "epoch": 0.07138026212367704, "grad_norm": 7.344132117766187, "learning_rate": 9.955203831746544e-06, "loss": 17.7976, "step": 3905 }, { "epoch": 0.07139854132012356, "grad_norm": 6.28162514941662, "learning_rate": 9.955164287843961e-06, "loss": 17.5085, "step": 3906 }, { "epoch": 0.0714168205165701, "grad_norm": 6.301605044284223, "learning_rate": 9.955124726573957e-06, "loss": 17.2057, "step": 3907 }, { "epoch": 0.07143509971301662, "grad_norm": 8.324558992093518, "learning_rate": 9.955085147936672e-06, "loss": 18.319, "step": 3908 }, { "epoch": 0.07145337890946314, "grad_norm": 39.34073829515376, "learning_rate": 9.955045551932244e-06, "loss": 18.6678, "step": 3909 }, { "epoch": 0.07147165810590966, "grad_norm": 7.0815990079285065, "learning_rate": 9.95500593856081e-06, "loss": 17.5787, "step": 3910 }, { "epoch": 0.07148993730235619, "grad_norm": 7.758661495696111, "learning_rate": 9.954966307822509e-06, "loss": 18.1176, "step": 3911 }, { "epoch": 0.0715082164988027, "grad_norm": 5.4359752949865365, "learning_rate": 9.95492665971748e-06, "loss": 16.9614, "step": 3912 }, { "epoch": 0.07152649569524924, "grad_norm": 6.701826729506655, "learning_rate": 9.954886994245868e-06, "loss": 17.541, "step": 3913 }, { "epoch": 0.07154477489169576, "grad_norm": 7.496048174752599, "learning_rate": 9.954847311407802e-06, "loss": 17.9465, "step": 3914 }, { "epoch": 0.07156305408814229, "grad_norm": 7.645185163814274, "learning_rate": 9.954807611203429e-06, "loss": 17.8904, "step": 3915 }, { "epoch": 0.07158133328458881, "grad_norm": 8.495001972004014, "learning_rate": 9.954767893632885e-06, "loss": 18.0269, "step": 3916 }, { "epoch": 0.07159961248103533, "grad_norm": 7.298855466968175, "learning_rate": 9.95472815869631e-06, "loss": 17.8787, "step": 3917 }, { "epoch": 0.07161789167748185, "grad_norm": 8.149951520511012, "learning_rate": 9.954688406393842e-06, "loss": 18.1366, "step": 3918 }, { "epoch": 0.07163617087392839, "grad_norm": 8.855197248094138, "learning_rate": 9.95464863672562e-06, "loss": 18.1028, "step": 3919 }, { "epoch": 0.07165445007037491, "grad_norm": 8.326114062175687, "learning_rate": 9.954608849691785e-06, "loss": 18.048, "step": 3920 }, { "epoch": 0.07167272926682143, "grad_norm": 8.429519275282749, "learning_rate": 9.954569045292478e-06, "loss": 18.2035, "step": 3921 }, { "epoch": 0.07169100846326795, "grad_norm": 6.783247772055827, "learning_rate": 9.954529223527833e-06, "loss": 17.6596, "step": 3922 }, { "epoch": 0.07170928765971447, "grad_norm": 9.927533382657202, "learning_rate": 9.954489384397994e-06, "loss": 18.119, "step": 3923 }, { "epoch": 0.07172756685616101, "grad_norm": 8.204292956300954, "learning_rate": 9.954449527903101e-06, "loss": 17.7851, "step": 3924 }, { "epoch": 0.07174584605260753, "grad_norm": 7.3709617881107485, "learning_rate": 9.954409654043292e-06, "loss": 17.9231, "step": 3925 }, { "epoch": 0.07176412524905405, "grad_norm": 6.613022753483986, "learning_rate": 9.954369762818708e-06, "loss": 17.7306, "step": 3926 }, { "epoch": 0.07178240444550057, "grad_norm": 7.856914378194838, "learning_rate": 9.954329854229485e-06, "loss": 18.1396, "step": 3927 }, { "epoch": 0.0718006836419471, "grad_norm": 7.518274259323055, "learning_rate": 9.954289928275769e-06, "loss": 17.5279, "step": 3928 }, { "epoch": 0.07181896283839362, "grad_norm": 8.176998030566098, "learning_rate": 9.954249984957695e-06, "loss": 17.9249, "step": 3929 }, { "epoch": 0.07183724203484015, "grad_norm": 7.313919425219405, "learning_rate": 9.954210024275403e-06, "loss": 17.8802, "step": 3930 }, { "epoch": 0.07185552123128668, "grad_norm": 6.774606189206345, "learning_rate": 9.954170046229039e-06, "loss": 17.6312, "step": 3931 }, { "epoch": 0.0718738004277332, "grad_norm": 8.35589329756528, "learning_rate": 9.954130050818735e-06, "loss": 18.0618, "step": 3932 }, { "epoch": 0.07189207962417972, "grad_norm": 7.354747245044039, "learning_rate": 9.954090038044637e-06, "loss": 17.6745, "step": 3933 }, { "epoch": 0.07191035882062624, "grad_norm": 6.369932691161392, "learning_rate": 9.954050007906883e-06, "loss": 17.4517, "step": 3934 }, { "epoch": 0.07192863801707276, "grad_norm": 6.04383158675737, "learning_rate": 9.954009960405613e-06, "loss": 17.2224, "step": 3935 }, { "epoch": 0.0719469172135193, "grad_norm": 8.684643178179634, "learning_rate": 9.953969895540969e-06, "loss": 18.2651, "step": 3936 }, { "epoch": 0.07196519640996582, "grad_norm": 8.006094844175474, "learning_rate": 9.95392981331309e-06, "loss": 18.0621, "step": 3937 }, { "epoch": 0.07198347560641234, "grad_norm": 7.1188474842092635, "learning_rate": 9.953889713722118e-06, "loss": 17.6638, "step": 3938 }, { "epoch": 0.07200175480285886, "grad_norm": 7.686894365864788, "learning_rate": 9.95384959676819e-06, "loss": 17.7463, "step": 3939 }, { "epoch": 0.07202003399930539, "grad_norm": 7.501717431496252, "learning_rate": 9.953809462451452e-06, "loss": 17.8232, "step": 3940 }, { "epoch": 0.07203831319575192, "grad_norm": 8.094419695444945, "learning_rate": 9.95376931077204e-06, "loss": 18.1749, "step": 3941 }, { "epoch": 0.07205659239219844, "grad_norm": 7.936655585513716, "learning_rate": 9.953729141730095e-06, "loss": 17.7579, "step": 3942 }, { "epoch": 0.07207487158864496, "grad_norm": 6.066799466763743, "learning_rate": 9.95368895532576e-06, "loss": 17.4134, "step": 3943 }, { "epoch": 0.07209315078509149, "grad_norm": 7.904978332949921, "learning_rate": 9.953648751559177e-06, "loss": 17.931, "step": 3944 }, { "epoch": 0.07211142998153801, "grad_norm": 7.5199540264729325, "learning_rate": 9.953608530430483e-06, "loss": 18.0643, "step": 3945 }, { "epoch": 0.07212970917798453, "grad_norm": 7.682338263648552, "learning_rate": 9.953568291939822e-06, "loss": 17.7252, "step": 3946 }, { "epoch": 0.07214798837443107, "grad_norm": 8.975695313385652, "learning_rate": 9.953528036087334e-06, "loss": 18.3403, "step": 3947 }, { "epoch": 0.07216626757087759, "grad_norm": 7.9441040428145655, "learning_rate": 9.95348776287316e-06, "loss": 17.718, "step": 3948 }, { "epoch": 0.07218454676732411, "grad_norm": 7.4782322260295855, "learning_rate": 9.953447472297442e-06, "loss": 18.0214, "step": 3949 }, { "epoch": 0.07220282596377063, "grad_norm": 7.993625387885855, "learning_rate": 9.953407164360317e-06, "loss": 17.5924, "step": 3950 }, { "epoch": 0.07222110516021715, "grad_norm": 6.53021490621029, "learning_rate": 9.953366839061932e-06, "loss": 17.3348, "step": 3951 }, { "epoch": 0.07223938435666367, "grad_norm": 7.648185006668104, "learning_rate": 9.953326496402428e-06, "loss": 17.8196, "step": 3952 }, { "epoch": 0.07225766355311021, "grad_norm": 8.067465309558568, "learning_rate": 9.953286136381944e-06, "loss": 18.3634, "step": 3953 }, { "epoch": 0.07227594274955673, "grad_norm": 7.379918960516362, "learning_rate": 9.95324575900062e-06, "loss": 17.9421, "step": 3954 }, { "epoch": 0.07229422194600325, "grad_norm": 7.497042354897406, "learning_rate": 9.9532053642586e-06, "loss": 17.9736, "step": 3955 }, { "epoch": 0.07231250114244978, "grad_norm": 9.806995871791335, "learning_rate": 9.953164952156024e-06, "loss": 18.8548, "step": 3956 }, { "epoch": 0.0723307803388963, "grad_norm": 6.247902363816244, "learning_rate": 9.953124522693036e-06, "loss": 17.4974, "step": 3957 }, { "epoch": 0.07234905953534283, "grad_norm": 7.512323418953835, "learning_rate": 9.953084075869777e-06, "loss": 17.755, "step": 3958 }, { "epoch": 0.07236733873178935, "grad_norm": 8.35778571923837, "learning_rate": 9.953043611686387e-06, "loss": 18.5947, "step": 3959 }, { "epoch": 0.07238561792823588, "grad_norm": 7.266903566371978, "learning_rate": 9.953003130143008e-06, "loss": 17.7397, "step": 3960 }, { "epoch": 0.0724038971246824, "grad_norm": 8.868681862384477, "learning_rate": 9.952962631239785e-06, "loss": 18.2875, "step": 3961 }, { "epoch": 0.07242217632112892, "grad_norm": 7.137759581150072, "learning_rate": 9.952922114976856e-06, "loss": 17.76, "step": 3962 }, { "epoch": 0.07244045551757544, "grad_norm": 6.571967067078624, "learning_rate": 9.952881581354366e-06, "loss": 17.7174, "step": 3963 }, { "epoch": 0.07245873471402198, "grad_norm": 8.084110521015178, "learning_rate": 9.952841030372455e-06, "loss": 18.1417, "step": 3964 }, { "epoch": 0.0724770139104685, "grad_norm": 6.855205404867292, "learning_rate": 9.952800462031267e-06, "loss": 17.7353, "step": 3965 }, { "epoch": 0.07249529310691502, "grad_norm": 7.362992009751969, "learning_rate": 9.952759876330941e-06, "loss": 18.034, "step": 3966 }, { "epoch": 0.07251357230336154, "grad_norm": 7.209436599592481, "learning_rate": 9.952719273271625e-06, "loss": 17.7053, "step": 3967 }, { "epoch": 0.07253185149980806, "grad_norm": 10.08601339002671, "learning_rate": 9.952678652853457e-06, "loss": 18.1241, "step": 3968 }, { "epoch": 0.07255013069625459, "grad_norm": 7.466547884926921, "learning_rate": 9.952638015076577e-06, "loss": 18.0381, "step": 3969 }, { "epoch": 0.07256840989270112, "grad_norm": 7.990963338576961, "learning_rate": 9.952597359941132e-06, "loss": 18.2016, "step": 3970 }, { "epoch": 0.07258668908914764, "grad_norm": 7.256105294918094, "learning_rate": 9.952556687447264e-06, "loss": 17.9083, "step": 3971 }, { "epoch": 0.07260496828559417, "grad_norm": 9.089438335438436, "learning_rate": 9.952515997595113e-06, "loss": 18.5281, "step": 3972 }, { "epoch": 0.07262324748204069, "grad_norm": 8.329081459579776, "learning_rate": 9.952475290384824e-06, "loss": 18.3693, "step": 3973 }, { "epoch": 0.07264152667848721, "grad_norm": 6.519775676102489, "learning_rate": 9.95243456581654e-06, "loss": 17.5964, "step": 3974 }, { "epoch": 0.07265980587493374, "grad_norm": 8.470011468438747, "learning_rate": 9.9523938238904e-06, "loss": 18.4528, "step": 3975 }, { "epoch": 0.07267808507138027, "grad_norm": 6.759815054276921, "learning_rate": 9.952353064606553e-06, "loss": 17.523, "step": 3976 }, { "epoch": 0.07269636426782679, "grad_norm": 6.796952167284505, "learning_rate": 9.952312287965136e-06, "loss": 17.618, "step": 3977 }, { "epoch": 0.07271464346427331, "grad_norm": 6.743480792586531, "learning_rate": 9.952271493966293e-06, "loss": 17.6924, "step": 3978 }, { "epoch": 0.07273292266071983, "grad_norm": 7.624318428914813, "learning_rate": 9.95223068261017e-06, "loss": 17.9983, "step": 3979 }, { "epoch": 0.07275120185716635, "grad_norm": 8.376676897364284, "learning_rate": 9.952189853896907e-06, "loss": 18.4505, "step": 3980 }, { "epoch": 0.07276948105361289, "grad_norm": 6.885788415651327, "learning_rate": 9.95214900782665e-06, "loss": 17.576, "step": 3981 }, { "epoch": 0.07278776025005941, "grad_norm": 6.942396515459639, "learning_rate": 9.952108144399539e-06, "loss": 17.5863, "step": 3982 }, { "epoch": 0.07280603944650593, "grad_norm": 7.3150546757752855, "learning_rate": 9.952067263615719e-06, "loss": 18.1692, "step": 3983 }, { "epoch": 0.07282431864295245, "grad_norm": 7.031306967707324, "learning_rate": 9.952026365475333e-06, "loss": 17.7734, "step": 3984 }, { "epoch": 0.07284259783939898, "grad_norm": 9.289935613803255, "learning_rate": 9.951985449978524e-06, "loss": 18.4523, "step": 3985 }, { "epoch": 0.0728608770358455, "grad_norm": 7.277500424707788, "learning_rate": 9.951944517125436e-06, "loss": 17.8173, "step": 3986 }, { "epoch": 0.07287915623229203, "grad_norm": 7.301225178483241, "learning_rate": 9.951903566916213e-06, "loss": 17.8435, "step": 3987 }, { "epoch": 0.07289743542873856, "grad_norm": 8.825591628752957, "learning_rate": 9.951862599350998e-06, "loss": 18.2613, "step": 3988 }, { "epoch": 0.07291571462518508, "grad_norm": 8.208799393223494, "learning_rate": 9.951821614429934e-06, "loss": 17.8988, "step": 3989 }, { "epoch": 0.0729339938216316, "grad_norm": 8.139743030720322, "learning_rate": 9.951780612153163e-06, "loss": 18.1046, "step": 3990 }, { "epoch": 0.07295227301807812, "grad_norm": 6.784330373655954, "learning_rate": 9.951739592520833e-06, "loss": 17.2882, "step": 3991 }, { "epoch": 0.07297055221452466, "grad_norm": 7.61520459988752, "learning_rate": 9.951698555533082e-06, "loss": 18.0849, "step": 3992 }, { "epoch": 0.07298883141097118, "grad_norm": 7.072648312272576, "learning_rate": 9.951657501190061e-06, "loss": 17.6032, "step": 3993 }, { "epoch": 0.0730071106074177, "grad_norm": 7.917906172330156, "learning_rate": 9.951616429491908e-06, "loss": 18.2392, "step": 3994 }, { "epoch": 0.07302538980386422, "grad_norm": 6.925692444592428, "learning_rate": 9.95157534043877e-06, "loss": 17.6648, "step": 3995 }, { "epoch": 0.07304366900031074, "grad_norm": 7.54444893535727, "learning_rate": 9.951534234030791e-06, "loss": 17.9823, "step": 3996 }, { "epoch": 0.07306194819675726, "grad_norm": 6.612402859175103, "learning_rate": 9.951493110268111e-06, "loss": 17.8484, "step": 3997 }, { "epoch": 0.0730802273932038, "grad_norm": 8.501304712562513, "learning_rate": 9.95145196915088e-06, "loss": 18.386, "step": 3998 }, { "epoch": 0.07309850658965032, "grad_norm": 7.528057117856636, "learning_rate": 9.95141081067924e-06, "loss": 17.9842, "step": 3999 }, { "epoch": 0.07311678578609684, "grad_norm": 6.8067888312337725, "learning_rate": 9.951369634853335e-06, "loss": 17.2365, "step": 4000 }, { "epoch": 0.07313506498254337, "grad_norm": 6.125900010871575, "learning_rate": 9.951328441673307e-06, "loss": 17.3047, "step": 4001 }, { "epoch": 0.07315334417898989, "grad_norm": 6.362462663547174, "learning_rate": 9.951287231139303e-06, "loss": 17.2798, "step": 4002 }, { "epoch": 0.07317162337543641, "grad_norm": 7.327786497261743, "learning_rate": 9.951246003251466e-06, "loss": 17.7502, "step": 4003 }, { "epoch": 0.07318990257188294, "grad_norm": 7.816029605754106, "learning_rate": 9.951204758009944e-06, "loss": 18.0539, "step": 4004 }, { "epoch": 0.07320818176832947, "grad_norm": 6.597918062036601, "learning_rate": 9.951163495414878e-06, "loss": 17.4532, "step": 4005 }, { "epoch": 0.07322646096477599, "grad_norm": 6.82620713681258, "learning_rate": 9.951122215466412e-06, "loss": 17.7182, "step": 4006 }, { "epoch": 0.07324474016122251, "grad_norm": 7.169722973585692, "learning_rate": 9.951080918164693e-06, "loss": 17.589, "step": 4007 }, { "epoch": 0.07326301935766903, "grad_norm": 8.637690090782502, "learning_rate": 9.951039603509867e-06, "loss": 18.7023, "step": 4008 }, { "epoch": 0.07328129855411557, "grad_norm": 7.390510482823445, "learning_rate": 9.950998271502074e-06, "loss": 17.6186, "step": 4009 }, { "epoch": 0.07329957775056209, "grad_norm": 8.516459019691691, "learning_rate": 9.950956922141464e-06, "loss": 18.2599, "step": 4010 }, { "epoch": 0.07331785694700861, "grad_norm": 8.216007324001174, "learning_rate": 9.950915555428178e-06, "loss": 18.036, "step": 4011 }, { "epoch": 0.07333613614345513, "grad_norm": 7.413571630216674, "learning_rate": 9.950874171362364e-06, "loss": 17.9904, "step": 4012 }, { "epoch": 0.07335441533990165, "grad_norm": 6.988794377704065, "learning_rate": 9.950832769944162e-06, "loss": 17.4064, "step": 4013 }, { "epoch": 0.07337269453634818, "grad_norm": 7.667425168599029, "learning_rate": 9.950791351173726e-06, "loss": 18.2205, "step": 4014 }, { "epoch": 0.07339097373279471, "grad_norm": 8.019972368439227, "learning_rate": 9.950749915051192e-06, "loss": 18.0422, "step": 4015 }, { "epoch": 0.07340925292924123, "grad_norm": 7.834211179579535, "learning_rate": 9.950708461576711e-06, "loss": 18.1822, "step": 4016 }, { "epoch": 0.07342753212568776, "grad_norm": 6.469877667609167, "learning_rate": 9.950666990750426e-06, "loss": 17.3202, "step": 4017 }, { "epoch": 0.07344581132213428, "grad_norm": 7.870032854928659, "learning_rate": 9.950625502572484e-06, "loss": 18.0954, "step": 4018 }, { "epoch": 0.0734640905185808, "grad_norm": 7.566350062095447, "learning_rate": 9.950583997043027e-06, "loss": 17.8075, "step": 4019 }, { "epoch": 0.07348236971502732, "grad_norm": 7.780315938865126, "learning_rate": 9.950542474162204e-06, "loss": 17.9308, "step": 4020 }, { "epoch": 0.07350064891147386, "grad_norm": 7.922106515004746, "learning_rate": 9.950500933930158e-06, "loss": 17.9235, "step": 4021 }, { "epoch": 0.07351892810792038, "grad_norm": 7.45719124248031, "learning_rate": 9.950459376347039e-06, "loss": 18.1789, "step": 4022 }, { "epoch": 0.0735372073043669, "grad_norm": 7.22212766677821, "learning_rate": 9.950417801412986e-06, "loss": 17.7704, "step": 4023 }, { "epoch": 0.07355548650081342, "grad_norm": 7.677127034832653, "learning_rate": 9.95037620912815e-06, "loss": 17.8526, "step": 4024 }, { "epoch": 0.07357376569725994, "grad_norm": 8.816380996715482, "learning_rate": 9.950334599492674e-06, "loss": 18.3883, "step": 4025 }, { "epoch": 0.07359204489370648, "grad_norm": 8.390724021524845, "learning_rate": 9.950292972506706e-06, "loss": 18.2191, "step": 4026 }, { "epoch": 0.073610324090153, "grad_norm": 7.368897272812623, "learning_rate": 9.950251328170389e-06, "loss": 17.4886, "step": 4027 }, { "epoch": 0.07362860328659952, "grad_norm": 7.692438911894966, "learning_rate": 9.950209666483873e-06, "loss": 17.9969, "step": 4028 }, { "epoch": 0.07364688248304604, "grad_norm": 7.742024905837131, "learning_rate": 9.9501679874473e-06, "loss": 18.0472, "step": 4029 }, { "epoch": 0.07366516167949257, "grad_norm": 6.126921754427087, "learning_rate": 9.950126291060818e-06, "loss": 17.1164, "step": 4030 }, { "epoch": 0.07368344087593909, "grad_norm": 8.124959370701161, "learning_rate": 9.950084577324573e-06, "loss": 18.0901, "step": 4031 }, { "epoch": 0.07370172007238562, "grad_norm": 7.026397606492624, "learning_rate": 9.95004284623871e-06, "loss": 17.8502, "step": 4032 }, { "epoch": 0.07371999926883215, "grad_norm": 7.895297019900992, "learning_rate": 9.950001097803377e-06, "loss": 17.9183, "step": 4033 }, { "epoch": 0.07373827846527867, "grad_norm": 6.902516650119397, "learning_rate": 9.94995933201872e-06, "loss": 17.728, "step": 4034 }, { "epoch": 0.07375655766172519, "grad_norm": 6.500875070735881, "learning_rate": 9.949917548884886e-06, "loss": 17.5944, "step": 4035 }, { "epoch": 0.07377483685817171, "grad_norm": 7.61340253150107, "learning_rate": 9.94987574840202e-06, "loss": 17.8991, "step": 4036 }, { "epoch": 0.07379311605461823, "grad_norm": 7.249979086155434, "learning_rate": 9.949833930570267e-06, "loss": 17.8095, "step": 4037 }, { "epoch": 0.07381139525106477, "grad_norm": 6.905778959602601, "learning_rate": 9.949792095389778e-06, "loss": 17.5323, "step": 4038 }, { "epoch": 0.07382967444751129, "grad_norm": 7.230588307395547, "learning_rate": 9.949750242860696e-06, "loss": 17.9275, "step": 4039 }, { "epoch": 0.07384795364395781, "grad_norm": 8.082270819590986, "learning_rate": 9.94970837298317e-06, "loss": 18.1718, "step": 4040 }, { "epoch": 0.07386623284040433, "grad_norm": 7.657347363032943, "learning_rate": 9.949666485757345e-06, "loss": 17.7518, "step": 4041 }, { "epoch": 0.07388451203685086, "grad_norm": 6.796068906671142, "learning_rate": 9.949624581183369e-06, "loss": 17.4804, "step": 4042 }, { "epoch": 0.07390279123329739, "grad_norm": 7.122222622706773, "learning_rate": 9.949582659261387e-06, "loss": 17.6666, "step": 4043 }, { "epoch": 0.07392107042974391, "grad_norm": 8.144535069668137, "learning_rate": 9.949540719991549e-06, "loss": 17.8327, "step": 4044 }, { "epoch": 0.07393934962619043, "grad_norm": 7.21638860677488, "learning_rate": 9.949498763374e-06, "loss": 17.7529, "step": 4045 }, { "epoch": 0.07395762882263696, "grad_norm": 6.333474243227332, "learning_rate": 9.949456789408887e-06, "loss": 17.2955, "step": 4046 }, { "epoch": 0.07397590801908348, "grad_norm": 6.911443461586524, "learning_rate": 9.949414798096357e-06, "loss": 17.5879, "step": 4047 }, { "epoch": 0.07399418721553, "grad_norm": 6.792686640770893, "learning_rate": 9.949372789436559e-06, "loss": 17.7279, "step": 4048 }, { "epoch": 0.07401246641197654, "grad_norm": 7.241880275905046, "learning_rate": 9.949330763429637e-06, "loss": 18.0251, "step": 4049 }, { "epoch": 0.07403074560842306, "grad_norm": 8.604560593156664, "learning_rate": 9.949288720075741e-06, "loss": 18.1001, "step": 4050 }, { "epoch": 0.07404902480486958, "grad_norm": 6.813629230605672, "learning_rate": 9.949246659375017e-06, "loss": 17.4705, "step": 4051 }, { "epoch": 0.0740673040013161, "grad_norm": 8.35377728722315, "learning_rate": 9.949204581327615e-06, "loss": 18.4, "step": 4052 }, { "epoch": 0.07408558319776262, "grad_norm": 7.689640765761798, "learning_rate": 9.949162485933677e-06, "loss": 18.1278, "step": 4053 }, { "epoch": 0.07410386239420914, "grad_norm": 6.838277020464443, "learning_rate": 9.949120373193354e-06, "loss": 17.4119, "step": 4054 }, { "epoch": 0.07412214159065568, "grad_norm": 6.290167469321867, "learning_rate": 9.949078243106797e-06, "loss": 17.1962, "step": 4055 }, { "epoch": 0.0741404207871022, "grad_norm": 7.008453919533337, "learning_rate": 9.949036095674148e-06, "loss": 17.5201, "step": 4056 }, { "epoch": 0.07415869998354872, "grad_norm": 7.558657647542814, "learning_rate": 9.948993930895558e-06, "loss": 17.9684, "step": 4057 }, { "epoch": 0.07417697917999524, "grad_norm": 7.365638752495691, "learning_rate": 9.948951748771174e-06, "loss": 17.7711, "step": 4058 }, { "epoch": 0.07419525837644177, "grad_norm": 8.646260946036872, "learning_rate": 9.948909549301143e-06, "loss": 18.1282, "step": 4059 }, { "epoch": 0.0742135375728883, "grad_norm": 8.438851141537185, "learning_rate": 9.948867332485613e-06, "loss": 18.2028, "step": 4060 }, { "epoch": 0.07423181676933482, "grad_norm": 8.511358088612571, "learning_rate": 9.948825098324732e-06, "loss": 18.4583, "step": 4061 }, { "epoch": 0.07425009596578135, "grad_norm": 6.702850206941317, "learning_rate": 9.948782846818649e-06, "loss": 17.29, "step": 4062 }, { "epoch": 0.07426837516222787, "grad_norm": 9.318593419797512, "learning_rate": 9.948740577967512e-06, "loss": 18.9032, "step": 4063 }, { "epoch": 0.07428665435867439, "grad_norm": 8.15920685077243, "learning_rate": 9.948698291771467e-06, "loss": 17.9272, "step": 4064 }, { "epoch": 0.07430493355512091, "grad_norm": 7.552555566038091, "learning_rate": 9.948655988230667e-06, "loss": 17.9185, "step": 4065 }, { "epoch": 0.07432321275156745, "grad_norm": 7.421642743888335, "learning_rate": 9.948613667345255e-06, "loss": 17.9962, "step": 4066 }, { "epoch": 0.07434149194801397, "grad_norm": 7.589915691254926, "learning_rate": 9.948571329115382e-06, "loss": 17.811, "step": 4067 }, { "epoch": 0.07435977114446049, "grad_norm": 6.773882338508222, "learning_rate": 9.948528973541195e-06, "loss": 17.598, "step": 4068 }, { "epoch": 0.07437805034090701, "grad_norm": 7.744106412655492, "learning_rate": 9.948486600622845e-06, "loss": 17.9758, "step": 4069 }, { "epoch": 0.07439632953735353, "grad_norm": 6.691671558962903, "learning_rate": 9.948444210360478e-06, "loss": 17.4987, "step": 4070 }, { "epoch": 0.07441460873380006, "grad_norm": 7.116877447696861, "learning_rate": 9.948401802754245e-06, "loss": 17.7262, "step": 4071 }, { "epoch": 0.07443288793024659, "grad_norm": 6.537008838427648, "learning_rate": 9.948359377804291e-06, "loss": 17.6175, "step": 4072 }, { "epoch": 0.07445116712669311, "grad_norm": 9.792917696642098, "learning_rate": 9.948316935510766e-06, "loss": 18.7539, "step": 4073 }, { "epoch": 0.07446944632313963, "grad_norm": 7.123510681040195, "learning_rate": 9.948274475873823e-06, "loss": 17.885, "step": 4074 }, { "epoch": 0.07448772551958616, "grad_norm": 7.761363184437969, "learning_rate": 9.948231998893605e-06, "loss": 18.178, "step": 4075 }, { "epoch": 0.07450600471603268, "grad_norm": 7.701810438309446, "learning_rate": 9.948189504570263e-06, "loss": 18.1316, "step": 4076 }, { "epoch": 0.07452428391247921, "grad_norm": 8.334436625697984, "learning_rate": 9.948146992903947e-06, "loss": 17.8821, "step": 4077 }, { "epoch": 0.07454256310892574, "grad_norm": 7.408338899300994, "learning_rate": 9.948104463894805e-06, "loss": 17.8913, "step": 4078 }, { "epoch": 0.07456084230537226, "grad_norm": 6.739679926354867, "learning_rate": 9.948061917542986e-06, "loss": 17.7448, "step": 4079 }, { "epoch": 0.07457912150181878, "grad_norm": 7.439166133164964, "learning_rate": 9.94801935384864e-06, "loss": 18.0901, "step": 4080 }, { "epoch": 0.0745974006982653, "grad_norm": 7.322434827980138, "learning_rate": 9.947976772811915e-06, "loss": 17.8107, "step": 4081 }, { "epoch": 0.07461567989471182, "grad_norm": 7.209222093140329, "learning_rate": 9.94793417443296e-06, "loss": 17.9077, "step": 4082 }, { "epoch": 0.07463395909115836, "grad_norm": 7.28439817785033, "learning_rate": 9.947891558711926e-06, "loss": 17.854, "step": 4083 }, { "epoch": 0.07465223828760488, "grad_norm": 6.633480726270058, "learning_rate": 9.947848925648961e-06, "loss": 17.5347, "step": 4084 }, { "epoch": 0.0746705174840514, "grad_norm": 8.384485505266616, "learning_rate": 9.947806275244216e-06, "loss": 18.024, "step": 4085 }, { "epoch": 0.07468879668049792, "grad_norm": 6.944517935602008, "learning_rate": 9.947763607497839e-06, "loss": 17.8275, "step": 4086 }, { "epoch": 0.07470707587694445, "grad_norm": 7.061560685453435, "learning_rate": 9.94772092240998e-06, "loss": 17.4181, "step": 4087 }, { "epoch": 0.07472535507339097, "grad_norm": 9.971019832050642, "learning_rate": 9.947678219980788e-06, "loss": 19.1205, "step": 4088 }, { "epoch": 0.0747436342698375, "grad_norm": 6.843703280412826, "learning_rate": 9.947635500210413e-06, "loss": 17.4305, "step": 4089 }, { "epoch": 0.07476191346628402, "grad_norm": 6.489921106555448, "learning_rate": 9.947592763099005e-06, "loss": 17.2924, "step": 4090 }, { "epoch": 0.07478019266273055, "grad_norm": 6.624219617143493, "learning_rate": 9.947550008646713e-06, "loss": 17.3873, "step": 4091 }, { "epoch": 0.07479847185917707, "grad_norm": 7.034030188330044, "learning_rate": 9.947507236853689e-06, "loss": 17.6899, "step": 4092 }, { "epoch": 0.07481675105562359, "grad_norm": 8.931076521754736, "learning_rate": 9.947464447720082e-06, "loss": 18.4088, "step": 4093 }, { "epoch": 0.07483503025207013, "grad_norm": 7.8038980993245515, "learning_rate": 9.947421641246038e-06, "loss": 18.0855, "step": 4094 }, { "epoch": 0.07485330944851665, "grad_norm": 7.184042813121832, "learning_rate": 9.947378817431714e-06, "loss": 17.754, "step": 4095 }, { "epoch": 0.07487158864496317, "grad_norm": 6.9364919302493835, "learning_rate": 9.947335976277256e-06, "loss": 17.6305, "step": 4096 }, { "epoch": 0.07488986784140969, "grad_norm": 7.332355591307151, "learning_rate": 9.947293117782813e-06, "loss": 17.736, "step": 4097 }, { "epoch": 0.07490814703785621, "grad_norm": 6.37794444152234, "learning_rate": 9.947250241948538e-06, "loss": 17.3367, "step": 4098 }, { "epoch": 0.07492642623430273, "grad_norm": 7.422075968050891, "learning_rate": 9.94720734877458e-06, "loss": 17.7243, "step": 4099 }, { "epoch": 0.07494470543074927, "grad_norm": 6.284693446169713, "learning_rate": 9.94716443826109e-06, "loss": 17.6514, "step": 4100 }, { "epoch": 0.07496298462719579, "grad_norm": 6.969561100740545, "learning_rate": 9.947121510408218e-06, "loss": 17.3915, "step": 4101 }, { "epoch": 0.07498126382364231, "grad_norm": 7.057265356575579, "learning_rate": 9.947078565216112e-06, "loss": 17.8751, "step": 4102 }, { "epoch": 0.07499954302008884, "grad_norm": 8.521052023661452, "learning_rate": 9.947035602684929e-06, "loss": 18.1689, "step": 4103 }, { "epoch": 0.07501782221653536, "grad_norm": 8.966329658182065, "learning_rate": 9.946992622814813e-06, "loss": 18.0758, "step": 4104 }, { "epoch": 0.07503610141298188, "grad_norm": 6.917196134949586, "learning_rate": 9.946949625605918e-06, "loss": 17.7005, "step": 4105 }, { "epoch": 0.07505438060942841, "grad_norm": 7.431754061761407, "learning_rate": 9.946906611058394e-06, "loss": 17.5164, "step": 4106 }, { "epoch": 0.07507265980587494, "grad_norm": 7.824983042402745, "learning_rate": 9.94686357917239e-06, "loss": 18.0068, "step": 4107 }, { "epoch": 0.07509093900232146, "grad_norm": 6.246252175853513, "learning_rate": 9.94682052994806e-06, "loss": 17.4046, "step": 4108 }, { "epoch": 0.07510921819876798, "grad_norm": 8.081686411815316, "learning_rate": 9.946777463385552e-06, "loss": 18.3135, "step": 4109 }, { "epoch": 0.0751274973952145, "grad_norm": 7.337183123648379, "learning_rate": 9.94673437948502e-06, "loss": 17.6567, "step": 4110 }, { "epoch": 0.07514577659166104, "grad_norm": 7.780530171818472, "learning_rate": 9.946691278246611e-06, "loss": 17.9705, "step": 4111 }, { "epoch": 0.07516405578810756, "grad_norm": 6.994340854220237, "learning_rate": 9.94664815967048e-06, "loss": 17.965, "step": 4112 }, { "epoch": 0.07518233498455408, "grad_norm": 9.142171164436062, "learning_rate": 9.946605023756776e-06, "loss": 18.7063, "step": 4113 }, { "epoch": 0.0752006141810006, "grad_norm": 8.526814159440248, "learning_rate": 9.94656187050565e-06, "loss": 18.3471, "step": 4114 }, { "epoch": 0.07521889337744712, "grad_norm": 8.498938940911465, "learning_rate": 9.946518699917254e-06, "loss": 18.1441, "step": 4115 }, { "epoch": 0.07523717257389365, "grad_norm": 8.097700734472312, "learning_rate": 9.94647551199174e-06, "loss": 18.1501, "step": 4116 }, { "epoch": 0.07525545177034018, "grad_norm": 7.225025441865226, "learning_rate": 9.94643230672926e-06, "loss": 17.9419, "step": 4117 }, { "epoch": 0.0752737309667867, "grad_norm": 6.190358687026648, "learning_rate": 9.946389084129962e-06, "loss": 17.3257, "step": 4118 }, { "epoch": 0.07529201016323323, "grad_norm": 7.727382977574147, "learning_rate": 9.946345844194e-06, "loss": 18.0806, "step": 4119 }, { "epoch": 0.07531028935967975, "grad_norm": 8.843536075563375, "learning_rate": 9.946302586921523e-06, "loss": 18.3376, "step": 4120 }, { "epoch": 0.07532856855612627, "grad_norm": 7.095710743979115, "learning_rate": 9.946259312312687e-06, "loss": 17.499, "step": 4121 }, { "epoch": 0.07534684775257279, "grad_norm": 9.882176262423766, "learning_rate": 9.946216020367641e-06, "loss": 18.5481, "step": 4122 }, { "epoch": 0.07536512694901933, "grad_norm": 6.449542517877799, "learning_rate": 9.946172711086538e-06, "loss": 17.4244, "step": 4123 }, { "epoch": 0.07538340614546585, "grad_norm": 6.409241883041255, "learning_rate": 9.946129384469526e-06, "loss": 17.2406, "step": 4124 }, { "epoch": 0.07540168534191237, "grad_norm": 7.892889481909539, "learning_rate": 9.946086040516762e-06, "loss": 18.6329, "step": 4125 }, { "epoch": 0.07541996453835889, "grad_norm": 7.326463313717324, "learning_rate": 9.946042679228394e-06, "loss": 17.5176, "step": 4126 }, { "epoch": 0.07543824373480541, "grad_norm": 7.5983136570427785, "learning_rate": 9.945999300604578e-06, "loss": 18.0372, "step": 4127 }, { "epoch": 0.07545652293125195, "grad_norm": 8.566667387731414, "learning_rate": 9.945955904645461e-06, "loss": 18.5739, "step": 4128 }, { "epoch": 0.07547480212769847, "grad_norm": 6.308984762709798, "learning_rate": 9.945912491351199e-06, "loss": 17.4579, "step": 4129 }, { "epoch": 0.07549308132414499, "grad_norm": 7.636671245026625, "learning_rate": 9.945869060721945e-06, "loss": 17.8382, "step": 4130 }, { "epoch": 0.07551136052059151, "grad_norm": 7.152409547096006, "learning_rate": 9.945825612757846e-06, "loss": 17.6699, "step": 4131 }, { "epoch": 0.07552963971703804, "grad_norm": 6.251052994274618, "learning_rate": 9.945782147459057e-06, "loss": 17.6965, "step": 4132 }, { "epoch": 0.07554791891348456, "grad_norm": 6.497513929491633, "learning_rate": 9.945738664825734e-06, "loss": 17.6654, "step": 4133 }, { "epoch": 0.0755661981099311, "grad_norm": 7.20433608214677, "learning_rate": 9.945695164858024e-06, "loss": 17.6154, "step": 4134 }, { "epoch": 0.07558447730637761, "grad_norm": 8.678988533293408, "learning_rate": 9.945651647556083e-06, "loss": 18.2711, "step": 4135 }, { "epoch": 0.07560275650282414, "grad_norm": 7.675365740961694, "learning_rate": 9.94560811292006e-06, "loss": 17.7857, "step": 4136 }, { "epoch": 0.07562103569927066, "grad_norm": 6.623604759756009, "learning_rate": 9.945564560950112e-06, "loss": 17.5991, "step": 4137 }, { "epoch": 0.07563931489571718, "grad_norm": 6.792192435163245, "learning_rate": 9.945520991646387e-06, "loss": 17.7569, "step": 4138 }, { "epoch": 0.0756575940921637, "grad_norm": 6.2358037854952535, "learning_rate": 9.945477405009041e-06, "loss": 17.4349, "step": 4139 }, { "epoch": 0.07567587328861024, "grad_norm": 6.7391726750467225, "learning_rate": 9.945433801038226e-06, "loss": 17.6439, "step": 4140 }, { "epoch": 0.07569415248505676, "grad_norm": 6.574972610136312, "learning_rate": 9.945390179734095e-06, "loss": 17.4439, "step": 4141 }, { "epoch": 0.07571243168150328, "grad_norm": 6.728158734735512, "learning_rate": 9.9453465410968e-06, "loss": 17.617, "step": 4142 }, { "epoch": 0.0757307108779498, "grad_norm": 7.279794345302788, "learning_rate": 9.945302885126496e-06, "loss": 18.1515, "step": 4143 }, { "epoch": 0.07574899007439632, "grad_norm": 7.039529505380779, "learning_rate": 9.945259211823332e-06, "loss": 17.6819, "step": 4144 }, { "epoch": 0.07576726927084286, "grad_norm": 6.995006558034378, "learning_rate": 9.945215521187465e-06, "loss": 17.8893, "step": 4145 }, { "epoch": 0.07578554846728938, "grad_norm": 7.013601369894375, "learning_rate": 9.945171813219048e-06, "loss": 17.7406, "step": 4146 }, { "epoch": 0.0758038276637359, "grad_norm": 7.644985109226018, "learning_rate": 9.945128087918233e-06, "loss": 17.93, "step": 4147 }, { "epoch": 0.07582210686018243, "grad_norm": 8.513745268468803, "learning_rate": 9.945084345285172e-06, "loss": 18.1143, "step": 4148 }, { "epoch": 0.07584038605662895, "grad_norm": 8.338563306688147, "learning_rate": 9.945040585320019e-06, "loss": 18.039, "step": 4149 }, { "epoch": 0.07585866525307547, "grad_norm": 8.422151833158328, "learning_rate": 9.944996808022926e-06, "loss": 17.9575, "step": 4150 }, { "epoch": 0.075876944449522, "grad_norm": 7.36414342599897, "learning_rate": 9.944953013394053e-06, "loss": 17.6962, "step": 4151 }, { "epoch": 0.07589522364596853, "grad_norm": 6.611161073638273, "learning_rate": 9.944909201433545e-06, "loss": 17.4743, "step": 4152 }, { "epoch": 0.07591350284241505, "grad_norm": 8.11113165044071, "learning_rate": 9.944865372141562e-06, "loss": 17.8998, "step": 4153 }, { "epoch": 0.07593178203886157, "grad_norm": 8.147540883815966, "learning_rate": 9.94482152551825e-06, "loss": 17.7785, "step": 4154 }, { "epoch": 0.07595006123530809, "grad_norm": 6.037791007125641, "learning_rate": 9.944777661563774e-06, "loss": 17.1556, "step": 4155 }, { "epoch": 0.07596834043175461, "grad_norm": 8.005885219212052, "learning_rate": 9.944733780278277e-06, "loss": 18.1231, "step": 4156 }, { "epoch": 0.07598661962820115, "grad_norm": 7.258942344087547, "learning_rate": 9.94468988166192e-06, "loss": 17.6645, "step": 4157 }, { "epoch": 0.07600489882464767, "grad_norm": 6.8830099394215285, "learning_rate": 9.944645965714851e-06, "loss": 17.6559, "step": 4158 }, { "epoch": 0.07602317802109419, "grad_norm": 6.907603205447108, "learning_rate": 9.94460203243723e-06, "loss": 17.5304, "step": 4159 }, { "epoch": 0.07604145721754071, "grad_norm": 7.609951382296154, "learning_rate": 9.944558081829208e-06, "loss": 17.7897, "step": 4160 }, { "epoch": 0.07605973641398724, "grad_norm": 8.234379233176638, "learning_rate": 9.944514113890938e-06, "loss": 18.4789, "step": 4161 }, { "epoch": 0.07607801561043377, "grad_norm": 8.28646045976863, "learning_rate": 9.944470128622574e-06, "loss": 17.8717, "step": 4162 }, { "epoch": 0.0760962948068803, "grad_norm": 7.497066969234497, "learning_rate": 9.944426126024271e-06, "loss": 17.9437, "step": 4163 }, { "epoch": 0.07611457400332682, "grad_norm": 7.739957927204389, "learning_rate": 9.944382106096186e-06, "loss": 17.8309, "step": 4164 }, { "epoch": 0.07613285319977334, "grad_norm": 8.3163273818476, "learning_rate": 9.944338068838469e-06, "loss": 18.029, "step": 4165 }, { "epoch": 0.07615113239621986, "grad_norm": 7.273911546813022, "learning_rate": 9.944294014251276e-06, "loss": 17.7673, "step": 4166 }, { "epoch": 0.07616941159266638, "grad_norm": 7.337268981521612, "learning_rate": 9.944249942334763e-06, "loss": 17.8419, "step": 4167 }, { "epoch": 0.07618769078911292, "grad_norm": 8.363235672027164, "learning_rate": 9.944205853089081e-06, "loss": 18.3541, "step": 4168 }, { "epoch": 0.07620596998555944, "grad_norm": 8.452200252193368, "learning_rate": 9.944161746514387e-06, "loss": 17.9309, "step": 4169 }, { "epoch": 0.07622424918200596, "grad_norm": 8.218646460160082, "learning_rate": 9.944117622610836e-06, "loss": 18.2633, "step": 4170 }, { "epoch": 0.07624252837845248, "grad_norm": 6.357008423240061, "learning_rate": 9.944073481378581e-06, "loss": 17.5283, "step": 4171 }, { "epoch": 0.076260807574899, "grad_norm": 7.7439428807246715, "learning_rate": 9.94402932281778e-06, "loss": 17.9626, "step": 4172 }, { "epoch": 0.07627908677134553, "grad_norm": 7.774958259158812, "learning_rate": 9.94398514692858e-06, "loss": 18.0529, "step": 4173 }, { "epoch": 0.07629736596779206, "grad_norm": 7.238700095656024, "learning_rate": 9.943940953711147e-06, "loss": 17.8406, "step": 4174 }, { "epoch": 0.07631564516423858, "grad_norm": 6.849452434091139, "learning_rate": 9.943896743165626e-06, "loss": 17.5181, "step": 4175 }, { "epoch": 0.0763339243606851, "grad_norm": 6.757854383035227, "learning_rate": 9.943852515292177e-06, "loss": 17.6835, "step": 4176 }, { "epoch": 0.07635220355713163, "grad_norm": 7.455393760134097, "learning_rate": 9.943808270090953e-06, "loss": 17.6921, "step": 4177 }, { "epoch": 0.07637048275357815, "grad_norm": 5.992826452646012, "learning_rate": 9.943764007562113e-06, "loss": 17.6179, "step": 4178 }, { "epoch": 0.07638876195002468, "grad_norm": 6.886465080017079, "learning_rate": 9.943719727705807e-06, "loss": 17.6493, "step": 4179 }, { "epoch": 0.0764070411464712, "grad_norm": 8.684834787939241, "learning_rate": 9.943675430522191e-06, "loss": 18.7955, "step": 4180 }, { "epoch": 0.07642532034291773, "grad_norm": 7.323220417378751, "learning_rate": 9.943631116011424e-06, "loss": 17.9174, "step": 4181 }, { "epoch": 0.07644359953936425, "grad_norm": 10.592096380410188, "learning_rate": 9.943586784173657e-06, "loss": 18.4581, "step": 4182 }, { "epoch": 0.07646187873581077, "grad_norm": 7.469103899829183, "learning_rate": 9.94354243500905e-06, "loss": 17.918, "step": 4183 }, { "epoch": 0.07648015793225729, "grad_norm": 8.429324116324565, "learning_rate": 9.943498068517754e-06, "loss": 18.197, "step": 4184 }, { "epoch": 0.07649843712870383, "grad_norm": 7.630150917105338, "learning_rate": 9.943453684699927e-06, "loss": 18.0674, "step": 4185 }, { "epoch": 0.07651671632515035, "grad_norm": 7.5848018531771855, "learning_rate": 9.943409283555722e-06, "loss": 17.9029, "step": 4186 }, { "epoch": 0.07653499552159687, "grad_norm": 6.024753089526956, "learning_rate": 9.943364865085298e-06, "loss": 17.2247, "step": 4187 }, { "epoch": 0.0765532747180434, "grad_norm": 7.86967665440768, "learning_rate": 9.943320429288808e-06, "loss": 17.8321, "step": 4188 }, { "epoch": 0.07657155391448991, "grad_norm": 6.302985308683977, "learning_rate": 9.943275976166412e-06, "loss": 17.1027, "step": 4189 }, { "epoch": 0.07658983311093644, "grad_norm": 6.833109397879678, "learning_rate": 9.943231505718258e-06, "loss": 17.6025, "step": 4190 }, { "epoch": 0.07660811230738297, "grad_norm": 8.365916398266814, "learning_rate": 9.94318701794451e-06, "loss": 18.2189, "step": 4191 }, { "epoch": 0.0766263915038295, "grad_norm": 8.477496443844009, "learning_rate": 9.943142512845318e-06, "loss": 18.3351, "step": 4192 }, { "epoch": 0.07664467070027602, "grad_norm": 6.698769837563342, "learning_rate": 9.943097990420843e-06, "loss": 17.3659, "step": 4193 }, { "epoch": 0.07666294989672254, "grad_norm": 7.8840085628076935, "learning_rate": 9.943053450671236e-06, "loss": 18.0192, "step": 4194 }, { "epoch": 0.07668122909316906, "grad_norm": 6.602380845223933, "learning_rate": 9.943008893596657e-06, "loss": 17.426, "step": 4195 }, { "epoch": 0.0766995082896156, "grad_norm": 6.888029864878589, "learning_rate": 9.94296431919726e-06, "loss": 17.6183, "step": 4196 }, { "epoch": 0.07671778748606212, "grad_norm": 8.251367427605121, "learning_rate": 9.942919727473204e-06, "loss": 18.3592, "step": 4197 }, { "epoch": 0.07673606668250864, "grad_norm": 7.964296308652757, "learning_rate": 9.942875118424641e-06, "loss": 18.2928, "step": 4198 }, { "epoch": 0.07675434587895516, "grad_norm": 8.075699505373022, "learning_rate": 9.94283049205173e-06, "loss": 18.2313, "step": 4199 }, { "epoch": 0.07677262507540168, "grad_norm": 7.184827022794858, "learning_rate": 9.942785848354628e-06, "loss": 17.7186, "step": 4200 }, { "epoch": 0.0767909042718482, "grad_norm": 7.626331857415009, "learning_rate": 9.94274118733349e-06, "loss": 17.826, "step": 4201 }, { "epoch": 0.07680918346829474, "grad_norm": 6.239000698427575, "learning_rate": 9.942696508988472e-06, "loss": 17.4026, "step": 4202 }, { "epoch": 0.07682746266474126, "grad_norm": 6.543866858237177, "learning_rate": 9.942651813319733e-06, "loss": 17.5936, "step": 4203 }, { "epoch": 0.07684574186118778, "grad_norm": 8.238881612293955, "learning_rate": 9.942607100327428e-06, "loss": 18.183, "step": 4204 }, { "epoch": 0.0768640210576343, "grad_norm": 7.722407507706026, "learning_rate": 9.942562370011714e-06, "loss": 18.4659, "step": 4205 }, { "epoch": 0.07688230025408083, "grad_norm": 7.565818940073065, "learning_rate": 9.942517622372749e-06, "loss": 17.9724, "step": 4206 }, { "epoch": 0.07690057945052735, "grad_norm": 7.277322276406721, "learning_rate": 9.942472857410687e-06, "loss": 17.6006, "step": 4207 }, { "epoch": 0.07691885864697388, "grad_norm": 6.97125103493985, "learning_rate": 9.942428075125688e-06, "loss": 17.4252, "step": 4208 }, { "epoch": 0.0769371378434204, "grad_norm": 8.40918433822153, "learning_rate": 9.942383275517905e-06, "loss": 18.2623, "step": 4209 }, { "epoch": 0.07695541703986693, "grad_norm": 7.667828948326944, "learning_rate": 9.942338458587499e-06, "loss": 17.804, "step": 4210 }, { "epoch": 0.07697369623631345, "grad_norm": 8.03912646465718, "learning_rate": 9.942293624334627e-06, "loss": 18.2884, "step": 4211 }, { "epoch": 0.07699197543275997, "grad_norm": 7.589292928257367, "learning_rate": 9.942248772759443e-06, "loss": 17.6766, "step": 4212 }, { "epoch": 0.0770102546292065, "grad_norm": 7.67375910134019, "learning_rate": 9.942203903862105e-06, "loss": 17.9565, "step": 4213 }, { "epoch": 0.07702853382565303, "grad_norm": 8.881236727814375, "learning_rate": 9.942159017642772e-06, "loss": 18.1565, "step": 4214 }, { "epoch": 0.07704681302209955, "grad_norm": 7.164802997735672, "learning_rate": 9.9421141141016e-06, "loss": 17.4539, "step": 4215 }, { "epoch": 0.07706509221854607, "grad_norm": 7.440014489282296, "learning_rate": 9.942069193238749e-06, "loss": 17.8582, "step": 4216 }, { "epoch": 0.0770833714149926, "grad_norm": 7.292631624423447, "learning_rate": 9.942024255054373e-06, "loss": 17.6257, "step": 4217 }, { "epoch": 0.07710165061143912, "grad_norm": 7.431778358488795, "learning_rate": 9.941979299548631e-06, "loss": 17.9583, "step": 4218 }, { "epoch": 0.07711992980788565, "grad_norm": 8.686717295254155, "learning_rate": 9.94193432672168e-06, "loss": 18.0693, "step": 4219 }, { "epoch": 0.07713820900433217, "grad_norm": 7.077506617393422, "learning_rate": 9.941889336573678e-06, "loss": 17.575, "step": 4220 }, { "epoch": 0.0771564882007787, "grad_norm": 7.241133479233001, "learning_rate": 9.941844329104784e-06, "loss": 17.4906, "step": 4221 }, { "epoch": 0.07717476739722522, "grad_norm": 8.624029205933166, "learning_rate": 9.941799304315152e-06, "loss": 18.6572, "step": 4222 }, { "epoch": 0.07719304659367174, "grad_norm": 6.033995693434952, "learning_rate": 9.941754262204945e-06, "loss": 17.4807, "step": 4223 }, { "epoch": 0.07721132579011826, "grad_norm": 6.659671442220002, "learning_rate": 9.941709202774317e-06, "loss": 17.5266, "step": 4224 }, { "epoch": 0.0772296049865648, "grad_norm": 7.65291781378316, "learning_rate": 9.941664126023427e-06, "loss": 17.9475, "step": 4225 }, { "epoch": 0.07724788418301132, "grad_norm": 7.73272908049448, "learning_rate": 9.941619031952433e-06, "loss": 18.2557, "step": 4226 }, { "epoch": 0.07726616337945784, "grad_norm": 7.436490386916161, "learning_rate": 9.941573920561493e-06, "loss": 17.7364, "step": 4227 }, { "epoch": 0.07728444257590436, "grad_norm": 7.1492946285383985, "learning_rate": 9.941528791850765e-06, "loss": 17.4325, "step": 4228 }, { "epoch": 0.07730272177235088, "grad_norm": 7.145356946317946, "learning_rate": 9.941483645820406e-06, "loss": 17.8871, "step": 4229 }, { "epoch": 0.07732100096879742, "grad_norm": 7.033499107558293, "learning_rate": 9.941438482470578e-06, "loss": 17.6935, "step": 4230 }, { "epoch": 0.07733928016524394, "grad_norm": 9.827010628938945, "learning_rate": 9.941393301801436e-06, "loss": 18.3654, "step": 4231 }, { "epoch": 0.07735755936169046, "grad_norm": 7.0975941131186735, "learning_rate": 9.94134810381314e-06, "loss": 18.0554, "step": 4232 }, { "epoch": 0.07737583855813698, "grad_norm": 5.92454778215829, "learning_rate": 9.941302888505848e-06, "loss": 17.1777, "step": 4233 }, { "epoch": 0.0773941177545835, "grad_norm": 6.959103247304966, "learning_rate": 9.941257655879717e-06, "loss": 17.8269, "step": 4234 }, { "epoch": 0.07741239695103003, "grad_norm": 7.022073491872154, "learning_rate": 9.941212405934907e-06, "loss": 17.5797, "step": 4235 }, { "epoch": 0.07743067614747656, "grad_norm": 7.1063690036947715, "learning_rate": 9.941167138671577e-06, "loss": 17.7828, "step": 4236 }, { "epoch": 0.07744895534392308, "grad_norm": 7.954584175919247, "learning_rate": 9.941121854089883e-06, "loss": 17.995, "step": 4237 }, { "epoch": 0.0774672345403696, "grad_norm": 6.729010062244928, "learning_rate": 9.941076552189987e-06, "loss": 17.4944, "step": 4238 }, { "epoch": 0.07748551373681613, "grad_norm": 6.587494040315997, "learning_rate": 9.941031232972047e-06, "loss": 17.5591, "step": 4239 }, { "epoch": 0.07750379293326265, "grad_norm": 5.837888384091902, "learning_rate": 9.940985896436221e-06, "loss": 17.161, "step": 4240 }, { "epoch": 0.07752207212970917, "grad_norm": 7.4309429538383425, "learning_rate": 9.940940542582668e-06, "loss": 17.7261, "step": 4241 }, { "epoch": 0.07754035132615571, "grad_norm": 7.308490142334014, "learning_rate": 9.940895171411547e-06, "loss": 18.0319, "step": 4242 }, { "epoch": 0.07755863052260223, "grad_norm": 7.27763941755643, "learning_rate": 9.940849782923016e-06, "loss": 17.8745, "step": 4243 }, { "epoch": 0.07757690971904875, "grad_norm": 7.929012297932565, "learning_rate": 9.940804377117238e-06, "loss": 17.8578, "step": 4244 }, { "epoch": 0.07759518891549527, "grad_norm": 8.097441680087801, "learning_rate": 9.940758953994367e-06, "loss": 18.4698, "step": 4245 }, { "epoch": 0.0776134681119418, "grad_norm": 8.606117810919702, "learning_rate": 9.940713513554567e-06, "loss": 18.5981, "step": 4246 }, { "epoch": 0.07763174730838833, "grad_norm": 7.9802614013269615, "learning_rate": 9.940668055797993e-06, "loss": 17.9609, "step": 4247 }, { "epoch": 0.07765002650483485, "grad_norm": 6.401310771525295, "learning_rate": 9.940622580724806e-06, "loss": 17.5281, "step": 4248 }, { "epoch": 0.07766830570128137, "grad_norm": 7.765152466156165, "learning_rate": 9.940577088335167e-06, "loss": 18.4588, "step": 4249 }, { "epoch": 0.0776865848977279, "grad_norm": 11.812201013267776, "learning_rate": 9.940531578629234e-06, "loss": 18.3213, "step": 4250 }, { "epoch": 0.07770486409417442, "grad_norm": 8.107179031342193, "learning_rate": 9.940486051607164e-06, "loss": 18.1375, "step": 4251 }, { "epoch": 0.07772314329062094, "grad_norm": 7.390909636393926, "learning_rate": 9.940440507269121e-06, "loss": 18.0795, "step": 4252 }, { "epoch": 0.07774142248706747, "grad_norm": 6.608999100204548, "learning_rate": 9.940394945615262e-06, "loss": 17.4263, "step": 4253 }, { "epoch": 0.077759701683514, "grad_norm": 7.24455911996213, "learning_rate": 9.94034936664575e-06, "loss": 17.7903, "step": 4254 }, { "epoch": 0.07777798087996052, "grad_norm": 6.510455486033311, "learning_rate": 9.94030377036074e-06, "loss": 17.4657, "step": 4255 }, { "epoch": 0.07779626007640704, "grad_norm": 6.702799210893266, "learning_rate": 9.940258156760393e-06, "loss": 17.7637, "step": 4256 }, { "epoch": 0.07781453927285356, "grad_norm": 6.25184406311624, "learning_rate": 9.940212525844872e-06, "loss": 17.3767, "step": 4257 }, { "epoch": 0.07783281846930008, "grad_norm": 6.063002518150071, "learning_rate": 9.940166877614332e-06, "loss": 17.1084, "step": 4258 }, { "epoch": 0.07785109766574662, "grad_norm": 6.809055593118283, "learning_rate": 9.940121212068938e-06, "loss": 17.5896, "step": 4259 }, { "epoch": 0.07786937686219314, "grad_norm": 7.500670999454073, "learning_rate": 9.940075529208848e-06, "loss": 18.0046, "step": 4260 }, { "epoch": 0.07788765605863966, "grad_norm": 7.555990688721894, "learning_rate": 9.940029829034221e-06, "loss": 17.9302, "step": 4261 }, { "epoch": 0.07790593525508618, "grad_norm": 7.888456038024813, "learning_rate": 9.93998411154522e-06, "loss": 18.5802, "step": 4262 }, { "epoch": 0.0779242144515327, "grad_norm": 7.8178178993913745, "learning_rate": 9.939938376742e-06, "loss": 18.2034, "step": 4263 }, { "epoch": 0.07794249364797924, "grad_norm": 7.175630237951054, "learning_rate": 9.939892624624727e-06, "loss": 17.7728, "step": 4264 }, { "epoch": 0.07796077284442576, "grad_norm": 7.269642141011237, "learning_rate": 9.939846855193558e-06, "loss": 17.5007, "step": 4265 }, { "epoch": 0.07797905204087228, "grad_norm": 8.343995757845725, "learning_rate": 9.939801068448655e-06, "loss": 18.0206, "step": 4266 }, { "epoch": 0.0779973312373188, "grad_norm": 8.104335902213569, "learning_rate": 9.939755264390179e-06, "loss": 17.8248, "step": 4267 }, { "epoch": 0.07801561043376533, "grad_norm": 6.52825714404, "learning_rate": 9.939709443018288e-06, "loss": 17.5734, "step": 4268 }, { "epoch": 0.07803388963021185, "grad_norm": 7.635623842563316, "learning_rate": 9.939663604333144e-06, "loss": 18.1016, "step": 4269 }, { "epoch": 0.07805216882665839, "grad_norm": 7.086943496847928, "learning_rate": 9.939617748334908e-06, "loss": 17.8211, "step": 4270 }, { "epoch": 0.07807044802310491, "grad_norm": 6.366837535999178, "learning_rate": 9.939571875023742e-06, "loss": 17.3253, "step": 4271 }, { "epoch": 0.07808872721955143, "grad_norm": 10.72186509949408, "learning_rate": 9.939525984399803e-06, "loss": 18.0906, "step": 4272 }, { "epoch": 0.07810700641599795, "grad_norm": 8.041045026155597, "learning_rate": 9.939480076463255e-06, "loss": 17.9549, "step": 4273 }, { "epoch": 0.07812528561244447, "grad_norm": 8.001466904872732, "learning_rate": 9.939434151214258e-06, "loss": 18.0185, "step": 4274 }, { "epoch": 0.078143564808891, "grad_norm": 8.31181029522459, "learning_rate": 9.93938820865297e-06, "loss": 17.6518, "step": 4275 }, { "epoch": 0.07816184400533753, "grad_norm": 7.490995115319857, "learning_rate": 9.93934224877956e-06, "loss": 17.8578, "step": 4276 }, { "epoch": 0.07818012320178405, "grad_norm": 6.490426920828199, "learning_rate": 9.939296271594181e-06, "loss": 17.338, "step": 4277 }, { "epoch": 0.07819840239823057, "grad_norm": 7.262720232209258, "learning_rate": 9.939250277096998e-06, "loss": 17.7793, "step": 4278 }, { "epoch": 0.0782166815946771, "grad_norm": 8.556554607249234, "learning_rate": 9.939204265288172e-06, "loss": 18.791, "step": 4279 }, { "epoch": 0.07823496079112362, "grad_norm": 6.868949045187712, "learning_rate": 9.939158236167863e-06, "loss": 17.5968, "step": 4280 }, { "epoch": 0.07825323998757015, "grad_norm": 6.888572727906375, "learning_rate": 9.939112189736231e-06, "loss": 17.615, "step": 4281 }, { "epoch": 0.07827151918401667, "grad_norm": 8.452397434426997, "learning_rate": 9.939066125993442e-06, "loss": 18.4356, "step": 4282 }, { "epoch": 0.0782897983804632, "grad_norm": 7.715575386065501, "learning_rate": 9.939020044939654e-06, "loss": 18.2356, "step": 4283 }, { "epoch": 0.07830807757690972, "grad_norm": 7.778073512619908, "learning_rate": 9.93897394657503e-06, "loss": 18.0708, "step": 4284 }, { "epoch": 0.07832635677335624, "grad_norm": 6.902819822531594, "learning_rate": 9.938927830899729e-06, "loss": 17.7574, "step": 4285 }, { "epoch": 0.07834463596980276, "grad_norm": 6.658068507941403, "learning_rate": 9.938881697913917e-06, "loss": 17.5977, "step": 4286 }, { "epoch": 0.0783629151662493, "grad_norm": 7.359021623098024, "learning_rate": 9.93883554761775e-06, "loss": 17.7763, "step": 4287 }, { "epoch": 0.07838119436269582, "grad_norm": 7.15910064266547, "learning_rate": 9.938789380011396e-06, "loss": 17.4, "step": 4288 }, { "epoch": 0.07839947355914234, "grad_norm": 7.149554481888531, "learning_rate": 9.938743195095013e-06, "loss": 17.796, "step": 4289 }, { "epoch": 0.07841775275558886, "grad_norm": 6.898549445763817, "learning_rate": 9.938696992868764e-06, "loss": 17.9461, "step": 4290 }, { "epoch": 0.07843603195203538, "grad_norm": 7.216662003346921, "learning_rate": 9.93865077333281e-06, "loss": 17.7372, "step": 4291 }, { "epoch": 0.0784543111484819, "grad_norm": 8.810328140667025, "learning_rate": 9.938604536487314e-06, "loss": 17.9731, "step": 4292 }, { "epoch": 0.07847259034492844, "grad_norm": 8.580926615759612, "learning_rate": 9.938558282332438e-06, "loss": 17.9521, "step": 4293 }, { "epoch": 0.07849086954137496, "grad_norm": 7.240334581817721, "learning_rate": 9.938512010868342e-06, "loss": 17.9799, "step": 4294 }, { "epoch": 0.07850914873782149, "grad_norm": 7.753443804416742, "learning_rate": 9.938465722095192e-06, "loss": 18.105, "step": 4295 }, { "epoch": 0.07852742793426801, "grad_norm": 7.328982040101361, "learning_rate": 9.938419416013146e-06, "loss": 17.6112, "step": 4296 }, { "epoch": 0.07854570713071453, "grad_norm": 8.148839296910063, "learning_rate": 9.93837309262237e-06, "loss": 18.1112, "step": 4297 }, { "epoch": 0.07856398632716106, "grad_norm": 7.801790674173154, "learning_rate": 9.938326751923025e-06, "loss": 17.9724, "step": 4298 }, { "epoch": 0.07858226552360759, "grad_norm": 7.505775094420256, "learning_rate": 9.938280393915272e-06, "loss": 17.8813, "step": 4299 }, { "epoch": 0.07860054472005411, "grad_norm": 7.702707200415012, "learning_rate": 9.938234018599275e-06, "loss": 17.8986, "step": 4300 }, { "epoch": 0.07861882391650063, "grad_norm": 7.049950975054393, "learning_rate": 9.938187625975197e-06, "loss": 17.6547, "step": 4301 }, { "epoch": 0.07863710311294715, "grad_norm": 8.44393217676752, "learning_rate": 9.9381412160432e-06, "loss": 17.6841, "step": 4302 }, { "epoch": 0.07865538230939367, "grad_norm": 7.337931856883426, "learning_rate": 9.938094788803448e-06, "loss": 17.7211, "step": 4303 }, { "epoch": 0.07867366150584021, "grad_norm": 8.163342510925498, "learning_rate": 9.938048344256099e-06, "loss": 18.1403, "step": 4304 }, { "epoch": 0.07869194070228673, "grad_norm": 6.903131863262117, "learning_rate": 9.938001882401321e-06, "loss": 17.3792, "step": 4305 }, { "epoch": 0.07871021989873325, "grad_norm": 6.557663602936739, "learning_rate": 9.937955403239275e-06, "loss": 17.4879, "step": 4306 }, { "epoch": 0.07872849909517977, "grad_norm": 8.7068292546031, "learning_rate": 9.937908906770122e-06, "loss": 18.3442, "step": 4307 }, { "epoch": 0.0787467782916263, "grad_norm": 7.533992396360987, "learning_rate": 9.937862392994029e-06, "loss": 18.1053, "step": 4308 }, { "epoch": 0.07876505748807282, "grad_norm": 7.633646651233243, "learning_rate": 9.937815861911155e-06, "loss": 18.0692, "step": 4309 }, { "epoch": 0.07878333668451935, "grad_norm": 8.756093481318945, "learning_rate": 9.937769313521666e-06, "loss": 18.8583, "step": 4310 }, { "epoch": 0.07880161588096588, "grad_norm": 7.024380608001842, "learning_rate": 9.937722747825724e-06, "loss": 18.0651, "step": 4311 }, { "epoch": 0.0788198950774124, "grad_norm": 6.308581936823587, "learning_rate": 9.937676164823493e-06, "loss": 17.2601, "step": 4312 }, { "epoch": 0.07883817427385892, "grad_norm": 8.201208200163915, "learning_rate": 9.937629564515134e-06, "loss": 18.1754, "step": 4313 }, { "epoch": 0.07885645347030544, "grad_norm": 6.615981830198527, "learning_rate": 9.937582946900812e-06, "loss": 17.5142, "step": 4314 }, { "epoch": 0.07887473266675198, "grad_norm": 6.925562656026488, "learning_rate": 9.93753631198069e-06, "loss": 17.6064, "step": 4315 }, { "epoch": 0.0788930118631985, "grad_norm": 7.071526950789263, "learning_rate": 9.937489659754933e-06, "loss": 17.6915, "step": 4316 }, { "epoch": 0.07891129105964502, "grad_norm": 6.93320424115655, "learning_rate": 9.937442990223703e-06, "loss": 17.8624, "step": 4317 }, { "epoch": 0.07892957025609154, "grad_norm": 7.17766099048035, "learning_rate": 9.937396303387164e-06, "loss": 17.812, "step": 4318 }, { "epoch": 0.07894784945253806, "grad_norm": 6.987283203511792, "learning_rate": 9.937349599245478e-06, "loss": 17.4972, "step": 4319 }, { "epoch": 0.07896612864898458, "grad_norm": 7.177250193446535, "learning_rate": 9.937302877798811e-06, "loss": 17.7235, "step": 4320 }, { "epoch": 0.07898440784543112, "grad_norm": 5.464667511273553, "learning_rate": 9.937256139047326e-06, "loss": 17.2885, "step": 4321 }, { "epoch": 0.07900268704187764, "grad_norm": 7.365873459882448, "learning_rate": 9.937209382991187e-06, "loss": 17.5206, "step": 4322 }, { "epoch": 0.07902096623832416, "grad_norm": 7.726819290001141, "learning_rate": 9.937162609630556e-06, "loss": 18.4063, "step": 4323 }, { "epoch": 0.07903924543477069, "grad_norm": 9.092986251084202, "learning_rate": 9.937115818965599e-06, "loss": 18.6479, "step": 4324 }, { "epoch": 0.07905752463121721, "grad_norm": 8.531475571100337, "learning_rate": 9.93706901099648e-06, "loss": 18.4475, "step": 4325 }, { "epoch": 0.07907580382766373, "grad_norm": 7.452186063785753, "learning_rate": 9.937022185723363e-06, "loss": 18.0895, "step": 4326 }, { "epoch": 0.07909408302411026, "grad_norm": 7.9536205749635345, "learning_rate": 9.936975343146411e-06, "loss": 18.2044, "step": 4327 }, { "epoch": 0.07911236222055679, "grad_norm": 9.890426772928912, "learning_rate": 9.93692848326579e-06, "loss": 18.667, "step": 4328 }, { "epoch": 0.07913064141700331, "grad_norm": 6.224300772800686, "learning_rate": 9.936881606081662e-06, "loss": 17.4493, "step": 4329 }, { "epoch": 0.07914892061344983, "grad_norm": 7.671174616757518, "learning_rate": 9.93683471159419e-06, "loss": 17.6893, "step": 4330 }, { "epoch": 0.07916719980989635, "grad_norm": 7.377338604220668, "learning_rate": 9.936787799803544e-06, "loss": 17.8501, "step": 4331 }, { "epoch": 0.07918547900634289, "grad_norm": 6.423033613389302, "learning_rate": 9.936740870709885e-06, "loss": 17.4775, "step": 4332 }, { "epoch": 0.07920375820278941, "grad_norm": 9.213909832510181, "learning_rate": 9.936693924313377e-06, "loss": 18.5094, "step": 4333 }, { "epoch": 0.07922203739923593, "grad_norm": 7.030187660722593, "learning_rate": 9.936646960614185e-06, "loss": 17.8259, "step": 4334 }, { "epoch": 0.07924031659568245, "grad_norm": 6.524362985021903, "learning_rate": 9.936599979612473e-06, "loss": 17.3938, "step": 4335 }, { "epoch": 0.07925859579212897, "grad_norm": 6.345926930675069, "learning_rate": 9.936552981308408e-06, "loss": 17.2927, "step": 4336 }, { "epoch": 0.0792768749885755, "grad_norm": 8.18555463990253, "learning_rate": 9.93650596570215e-06, "loss": 17.9193, "step": 4337 }, { "epoch": 0.07929515418502203, "grad_norm": 7.172318325546454, "learning_rate": 9.936458932793872e-06, "loss": 17.9054, "step": 4338 }, { "epoch": 0.07931343338146855, "grad_norm": 7.01926924122479, "learning_rate": 9.93641188258373e-06, "loss": 17.7644, "step": 4339 }, { "epoch": 0.07933171257791508, "grad_norm": 7.223352662426126, "learning_rate": 9.936364815071894e-06, "loss": 17.6415, "step": 4340 }, { "epoch": 0.0793499917743616, "grad_norm": 6.99194164800054, "learning_rate": 9.936317730258527e-06, "loss": 17.6133, "step": 4341 }, { "epoch": 0.07936827097080812, "grad_norm": 7.2545172317223034, "learning_rate": 9.936270628143796e-06, "loss": 17.6298, "step": 4342 }, { "epoch": 0.07938655016725464, "grad_norm": 7.276928721931867, "learning_rate": 9.936223508727864e-06, "loss": 17.9755, "step": 4343 }, { "epoch": 0.07940482936370118, "grad_norm": 6.7277421681572, "learning_rate": 9.936176372010898e-06, "loss": 17.3964, "step": 4344 }, { "epoch": 0.0794231085601477, "grad_norm": 7.525938658288535, "learning_rate": 9.93612921799306e-06, "loss": 17.7782, "step": 4345 }, { "epoch": 0.07944138775659422, "grad_norm": 6.957726541270671, "learning_rate": 9.93608204667452e-06, "loss": 17.4248, "step": 4346 }, { "epoch": 0.07945966695304074, "grad_norm": 7.6856434204431885, "learning_rate": 9.936034858055439e-06, "loss": 18.1327, "step": 4347 }, { "epoch": 0.07947794614948726, "grad_norm": 6.898323366882175, "learning_rate": 9.935987652135983e-06, "loss": 17.616, "step": 4348 }, { "epoch": 0.0794962253459338, "grad_norm": 8.20052684478149, "learning_rate": 9.93594042891632e-06, "loss": 17.9773, "step": 4349 }, { "epoch": 0.07951450454238032, "grad_norm": 7.194753526930869, "learning_rate": 9.935893188396614e-06, "loss": 17.8848, "step": 4350 }, { "epoch": 0.07953278373882684, "grad_norm": 10.113227968945113, "learning_rate": 9.935845930577032e-06, "loss": 19.0136, "step": 4351 }, { "epoch": 0.07955106293527336, "grad_norm": 8.986283506326176, "learning_rate": 9.935798655457737e-06, "loss": 18.8284, "step": 4352 }, { "epoch": 0.07956934213171989, "grad_norm": 7.133922398959805, "learning_rate": 9.935751363038897e-06, "loss": 17.5253, "step": 4353 }, { "epoch": 0.07958762132816641, "grad_norm": 8.27673277270031, "learning_rate": 9.935704053320676e-06, "loss": 18.2957, "step": 4354 }, { "epoch": 0.07960590052461294, "grad_norm": 7.676831202603974, "learning_rate": 9.93565672630324e-06, "loss": 17.9769, "step": 4355 }, { "epoch": 0.07962417972105947, "grad_norm": 6.54709400823552, "learning_rate": 9.935609381986756e-06, "loss": 17.472, "step": 4356 }, { "epoch": 0.07964245891750599, "grad_norm": 7.033668989331142, "learning_rate": 9.93556202037139e-06, "loss": 17.5056, "step": 4357 }, { "epoch": 0.07966073811395251, "grad_norm": 5.438645737245922, "learning_rate": 9.935514641457307e-06, "loss": 16.8826, "step": 4358 }, { "epoch": 0.07967901731039903, "grad_norm": 7.4635302373287224, "learning_rate": 9.935467245244674e-06, "loss": 18.0071, "step": 4359 }, { "epoch": 0.07969729650684555, "grad_norm": 8.001907384704584, "learning_rate": 9.935419831733655e-06, "loss": 18.1448, "step": 4360 }, { "epoch": 0.07971557570329209, "grad_norm": 5.58895525179972, "learning_rate": 9.93537240092442e-06, "loss": 16.9925, "step": 4361 }, { "epoch": 0.07973385489973861, "grad_norm": 8.394101913852397, "learning_rate": 9.935324952817132e-06, "loss": 18.2056, "step": 4362 }, { "epoch": 0.07975213409618513, "grad_norm": 6.185491922238464, "learning_rate": 9.935277487411957e-06, "loss": 17.406, "step": 4363 }, { "epoch": 0.07977041329263165, "grad_norm": 7.82182368160327, "learning_rate": 9.935230004709064e-06, "loss": 18.0795, "step": 4364 }, { "epoch": 0.07978869248907818, "grad_norm": 7.832177300994306, "learning_rate": 9.935182504708619e-06, "loss": 17.9343, "step": 4365 }, { "epoch": 0.07980697168552471, "grad_norm": 8.041377318900343, "learning_rate": 9.935134987410786e-06, "loss": 17.9911, "step": 4366 }, { "epoch": 0.07982525088197123, "grad_norm": 8.440419909385117, "learning_rate": 9.935087452815733e-06, "loss": 18.4141, "step": 4367 }, { "epoch": 0.07984353007841775, "grad_norm": 7.705909091524087, "learning_rate": 9.935039900923627e-06, "loss": 18.129, "step": 4368 }, { "epoch": 0.07986180927486428, "grad_norm": 7.256927719180046, "learning_rate": 9.934992331734635e-06, "loss": 17.8265, "step": 4369 }, { "epoch": 0.0798800884713108, "grad_norm": 6.727981028320891, "learning_rate": 9.934944745248922e-06, "loss": 17.3921, "step": 4370 }, { "epoch": 0.07989836766775732, "grad_norm": 8.888879986522245, "learning_rate": 9.934897141466658e-06, "loss": 18.2255, "step": 4371 }, { "epoch": 0.07991664686420386, "grad_norm": 7.5446180440391295, "learning_rate": 9.934849520388006e-06, "loss": 17.8565, "step": 4372 }, { "epoch": 0.07993492606065038, "grad_norm": 7.762442570123377, "learning_rate": 9.934801882013136e-06, "loss": 17.9927, "step": 4373 }, { "epoch": 0.0799532052570969, "grad_norm": 7.63817698154924, "learning_rate": 9.934754226342212e-06, "loss": 17.7823, "step": 4374 }, { "epoch": 0.07997148445354342, "grad_norm": 7.623312193856963, "learning_rate": 9.934706553375404e-06, "loss": 17.9167, "step": 4375 }, { "epoch": 0.07998976364998994, "grad_norm": 7.403930180012222, "learning_rate": 9.934658863112877e-06, "loss": 17.728, "step": 4376 }, { "epoch": 0.08000804284643646, "grad_norm": 7.913128623022587, "learning_rate": 9.934611155554799e-06, "loss": 17.9917, "step": 4377 }, { "epoch": 0.080026322042883, "grad_norm": 7.8683142771557, "learning_rate": 9.934563430701336e-06, "loss": 17.7655, "step": 4378 }, { "epoch": 0.08004460123932952, "grad_norm": 8.258553859426897, "learning_rate": 9.934515688552656e-06, "loss": 18.4457, "step": 4379 }, { "epoch": 0.08006288043577604, "grad_norm": 6.9581842199049735, "learning_rate": 9.934467929108931e-06, "loss": 17.6692, "step": 4380 }, { "epoch": 0.08008115963222256, "grad_norm": 6.791523737572199, "learning_rate": 9.934420152370321e-06, "loss": 17.255, "step": 4381 }, { "epoch": 0.08009943882866909, "grad_norm": 8.517797819825347, "learning_rate": 9.934372358336996e-06, "loss": 18.1414, "step": 4382 }, { "epoch": 0.08011771802511562, "grad_norm": 7.555933529906602, "learning_rate": 9.934324547009125e-06, "loss": 17.8544, "step": 4383 }, { "epoch": 0.08013599722156214, "grad_norm": 7.502407572634842, "learning_rate": 9.934276718386874e-06, "loss": 17.6756, "step": 4384 }, { "epoch": 0.08015427641800867, "grad_norm": 8.479011498340665, "learning_rate": 9.934228872470413e-06, "loss": 18.5501, "step": 4385 }, { "epoch": 0.08017255561445519, "grad_norm": 7.359950844823468, "learning_rate": 9.934181009259907e-06, "loss": 17.9449, "step": 4386 }, { "epoch": 0.08019083481090171, "grad_norm": 8.057010421147657, "learning_rate": 9.934133128755524e-06, "loss": 17.7369, "step": 4387 }, { "epoch": 0.08020911400734823, "grad_norm": 7.298814909341879, "learning_rate": 9.934085230957434e-06, "loss": 17.8067, "step": 4388 }, { "epoch": 0.08022739320379477, "grad_norm": 6.168865086056675, "learning_rate": 9.934037315865802e-06, "loss": 17.2977, "step": 4389 }, { "epoch": 0.08024567240024129, "grad_norm": 8.712264724286051, "learning_rate": 9.933989383480797e-06, "loss": 18.6987, "step": 4390 }, { "epoch": 0.08026395159668781, "grad_norm": 8.715454270171263, "learning_rate": 9.93394143380259e-06, "loss": 18.5641, "step": 4391 }, { "epoch": 0.08028223079313433, "grad_norm": 6.871268595151202, "learning_rate": 9.933893466831344e-06, "loss": 17.7549, "step": 4392 }, { "epoch": 0.08030050998958085, "grad_norm": 6.50455131844434, "learning_rate": 9.933845482567231e-06, "loss": 17.4704, "step": 4393 }, { "epoch": 0.08031878918602738, "grad_norm": 7.495903605026569, "learning_rate": 9.933797481010417e-06, "loss": 17.7965, "step": 4394 }, { "epoch": 0.08033706838247391, "grad_norm": 7.308092299369829, "learning_rate": 9.93374946216107e-06, "loss": 17.8812, "step": 4395 }, { "epoch": 0.08035534757892043, "grad_norm": 8.936969907894749, "learning_rate": 9.93370142601936e-06, "loss": 18.3376, "step": 4396 }, { "epoch": 0.08037362677536695, "grad_norm": 7.012556279337693, "learning_rate": 9.933653372585454e-06, "loss": 17.6605, "step": 4397 }, { "epoch": 0.08039190597181348, "grad_norm": 8.659173418122123, "learning_rate": 9.933605301859523e-06, "loss": 18.6341, "step": 4398 }, { "epoch": 0.08041018516826, "grad_norm": 5.968503921611904, "learning_rate": 9.933557213841733e-06, "loss": 17.2194, "step": 4399 }, { "epoch": 0.08042846436470653, "grad_norm": 7.426403281013527, "learning_rate": 9.933509108532251e-06, "loss": 17.8206, "step": 4400 }, { "epoch": 0.08044674356115306, "grad_norm": 8.197047539262414, "learning_rate": 9.93346098593125e-06, "loss": 18.3993, "step": 4401 }, { "epoch": 0.08046502275759958, "grad_norm": 7.065256372234396, "learning_rate": 9.933412846038896e-06, "loss": 17.9417, "step": 4402 }, { "epoch": 0.0804833019540461, "grad_norm": 6.907884075342692, "learning_rate": 9.933364688855357e-06, "loss": 17.8059, "step": 4403 }, { "epoch": 0.08050158115049262, "grad_norm": 7.340969901260953, "learning_rate": 9.933316514380804e-06, "loss": 17.6631, "step": 4404 }, { "epoch": 0.08051986034693914, "grad_norm": 7.9374478532920465, "learning_rate": 9.933268322615403e-06, "loss": 18.5271, "step": 4405 }, { "epoch": 0.08053813954338568, "grad_norm": 7.4468071554846444, "learning_rate": 9.933220113559327e-06, "loss": 17.822, "step": 4406 }, { "epoch": 0.0805564187398322, "grad_norm": 6.920858580541004, "learning_rate": 9.93317188721274e-06, "loss": 17.5771, "step": 4407 }, { "epoch": 0.08057469793627872, "grad_norm": 7.114072198051114, "learning_rate": 9.933123643575816e-06, "loss": 17.8293, "step": 4408 }, { "epoch": 0.08059297713272524, "grad_norm": 6.841327818725455, "learning_rate": 9.933075382648721e-06, "loss": 17.4228, "step": 4409 }, { "epoch": 0.08061125632917177, "grad_norm": 7.100727522736129, "learning_rate": 9.933027104431624e-06, "loss": 17.6061, "step": 4410 }, { "epoch": 0.08062953552561829, "grad_norm": 9.30397315079259, "learning_rate": 9.932978808924696e-06, "loss": 18.1698, "step": 4411 }, { "epoch": 0.08064781472206482, "grad_norm": 6.896796777753139, "learning_rate": 9.932930496128105e-06, "loss": 17.8227, "step": 4412 }, { "epoch": 0.08066609391851134, "grad_norm": 7.270965623141832, "learning_rate": 9.93288216604202e-06, "loss": 17.7673, "step": 4413 }, { "epoch": 0.08068437311495787, "grad_norm": 8.296988554740144, "learning_rate": 9.932833818666611e-06, "loss": 18.2694, "step": 4414 }, { "epoch": 0.08070265231140439, "grad_norm": 8.778225644823923, "learning_rate": 9.93278545400205e-06, "loss": 18.4442, "step": 4415 }, { "epoch": 0.08072093150785091, "grad_norm": 7.208020527454145, "learning_rate": 9.932737072048502e-06, "loss": 17.9922, "step": 4416 }, { "epoch": 0.08073921070429745, "grad_norm": 7.010547771327252, "learning_rate": 9.932688672806138e-06, "loss": 17.7569, "step": 4417 }, { "epoch": 0.08075748990074397, "grad_norm": 7.83570712937895, "learning_rate": 9.93264025627513e-06, "loss": 17.9074, "step": 4418 }, { "epoch": 0.08077576909719049, "grad_norm": 8.072153380337696, "learning_rate": 9.932591822455643e-06, "loss": 18.5003, "step": 4419 }, { "epoch": 0.08079404829363701, "grad_norm": 7.838063096096014, "learning_rate": 9.932543371347853e-06, "loss": 18.0529, "step": 4420 }, { "epoch": 0.08081232749008353, "grad_norm": 7.6052270885843045, "learning_rate": 9.932494902951925e-06, "loss": 18.0015, "step": 4421 }, { "epoch": 0.08083060668653005, "grad_norm": 7.646716818032306, "learning_rate": 9.932446417268031e-06, "loss": 17.9335, "step": 4422 }, { "epoch": 0.08084888588297659, "grad_norm": 6.74951305218201, "learning_rate": 9.93239791429634e-06, "loss": 17.895, "step": 4423 }, { "epoch": 0.08086716507942311, "grad_norm": 7.638016311406168, "learning_rate": 9.932349394037022e-06, "loss": 17.9338, "step": 4424 }, { "epoch": 0.08088544427586963, "grad_norm": 6.775714397132939, "learning_rate": 9.932300856490248e-06, "loss": 17.6984, "step": 4425 }, { "epoch": 0.08090372347231616, "grad_norm": 6.715265997146286, "learning_rate": 9.932252301656187e-06, "loss": 17.8797, "step": 4426 }, { "epoch": 0.08092200266876268, "grad_norm": 6.708322945726105, "learning_rate": 9.932203729535011e-06, "loss": 17.7322, "step": 4427 }, { "epoch": 0.0809402818652092, "grad_norm": 7.674689554676557, "learning_rate": 9.932155140126886e-06, "loss": 18.0179, "step": 4428 }, { "epoch": 0.08095856106165573, "grad_norm": 6.0953774890285946, "learning_rate": 9.932106533431988e-06, "loss": 17.266, "step": 4429 }, { "epoch": 0.08097684025810226, "grad_norm": 8.123767572496034, "learning_rate": 9.932057909450483e-06, "loss": 17.9947, "step": 4430 }, { "epoch": 0.08099511945454878, "grad_norm": 8.007848703055455, "learning_rate": 9.932009268182542e-06, "loss": 18.0589, "step": 4431 }, { "epoch": 0.0810133986509953, "grad_norm": 7.728613347671448, "learning_rate": 9.93196060962834e-06, "loss": 18.0519, "step": 4432 }, { "epoch": 0.08103167784744182, "grad_norm": 6.914287168216555, "learning_rate": 9.931911933788043e-06, "loss": 17.6688, "step": 4433 }, { "epoch": 0.08104995704388836, "grad_norm": 6.594437207858085, "learning_rate": 9.931863240661821e-06, "loss": 17.6638, "step": 4434 }, { "epoch": 0.08106823624033488, "grad_norm": 6.6198252529318395, "learning_rate": 9.931814530249847e-06, "loss": 17.5418, "step": 4435 }, { "epoch": 0.0810865154367814, "grad_norm": 11.144267892914211, "learning_rate": 9.93176580255229e-06, "loss": 18.2372, "step": 4436 }, { "epoch": 0.08110479463322792, "grad_norm": 7.014288301350778, "learning_rate": 9.931717057569323e-06, "loss": 17.6551, "step": 4437 }, { "epoch": 0.08112307382967444, "grad_norm": 7.318544846682362, "learning_rate": 9.931668295301116e-06, "loss": 17.9492, "step": 4438 }, { "epoch": 0.08114135302612097, "grad_norm": 10.872672173263489, "learning_rate": 9.931619515747838e-06, "loss": 17.9141, "step": 4439 }, { "epoch": 0.0811596322225675, "grad_norm": 6.240998164763673, "learning_rate": 9.931570718909664e-06, "loss": 17.1257, "step": 4440 }, { "epoch": 0.08117791141901402, "grad_norm": 7.99350319505303, "learning_rate": 9.931521904786761e-06, "loss": 17.9102, "step": 4441 }, { "epoch": 0.08119619061546055, "grad_norm": 6.712015008086295, "learning_rate": 9.931473073379304e-06, "loss": 17.6588, "step": 4442 }, { "epoch": 0.08121446981190707, "grad_norm": 7.902877742430194, "learning_rate": 9.931424224687458e-06, "loss": 18.1366, "step": 4443 }, { "epoch": 0.08123274900835359, "grad_norm": 7.78949894709229, "learning_rate": 9.9313753587114e-06, "loss": 18.3117, "step": 4444 }, { "epoch": 0.08125102820480011, "grad_norm": 7.1016311906620855, "learning_rate": 9.9313264754513e-06, "loss": 17.7372, "step": 4445 }, { "epoch": 0.08126930740124665, "grad_norm": 6.478915491179522, "learning_rate": 9.931277574907328e-06, "loss": 17.4951, "step": 4446 }, { "epoch": 0.08128758659769317, "grad_norm": 8.509384946617994, "learning_rate": 9.931228657079655e-06, "loss": 18.6333, "step": 4447 }, { "epoch": 0.08130586579413969, "grad_norm": 7.650938902341948, "learning_rate": 9.931179721968453e-06, "loss": 17.7259, "step": 4448 }, { "epoch": 0.08132414499058621, "grad_norm": 7.7217687624076765, "learning_rate": 9.931130769573896e-06, "loss": 18.0419, "step": 4449 }, { "epoch": 0.08134242418703273, "grad_norm": 8.289211316693782, "learning_rate": 9.931081799896153e-06, "loss": 18.0502, "step": 4450 }, { "epoch": 0.08136070338347927, "grad_norm": 7.714887931684799, "learning_rate": 9.931032812935397e-06, "loss": 17.8473, "step": 4451 }, { "epoch": 0.08137898257992579, "grad_norm": 7.736070745534125, "learning_rate": 9.930983808691796e-06, "loss": 18.1318, "step": 4452 }, { "epoch": 0.08139726177637231, "grad_norm": 6.1809140483148965, "learning_rate": 9.930934787165527e-06, "loss": 17.2089, "step": 4453 }, { "epoch": 0.08141554097281883, "grad_norm": 6.949007285441479, "learning_rate": 9.930885748356759e-06, "loss": 17.5245, "step": 4454 }, { "epoch": 0.08143382016926536, "grad_norm": 6.645321717408077, "learning_rate": 9.930836692265664e-06, "loss": 17.6439, "step": 4455 }, { "epoch": 0.08145209936571188, "grad_norm": 6.932555337897875, "learning_rate": 9.930787618892415e-06, "loss": 17.7075, "step": 4456 }, { "epoch": 0.08147037856215841, "grad_norm": 7.62905891946172, "learning_rate": 9.930738528237183e-06, "loss": 17.7215, "step": 4457 }, { "epoch": 0.08148865775860493, "grad_norm": 6.926128597480996, "learning_rate": 9.93068942030014e-06, "loss": 17.4867, "step": 4458 }, { "epoch": 0.08150693695505146, "grad_norm": 6.5618999252141545, "learning_rate": 9.930640295081458e-06, "loss": 17.3902, "step": 4459 }, { "epoch": 0.08152521615149798, "grad_norm": 6.971387561714625, "learning_rate": 9.93059115258131e-06, "loss": 17.6631, "step": 4460 }, { "epoch": 0.0815434953479445, "grad_norm": 7.164499672355387, "learning_rate": 9.930541992799868e-06, "loss": 17.5324, "step": 4461 }, { "epoch": 0.08156177454439102, "grad_norm": 8.462225980434976, "learning_rate": 9.930492815737305e-06, "loss": 18.4413, "step": 4462 }, { "epoch": 0.08158005374083756, "grad_norm": 7.348675859790189, "learning_rate": 9.930443621393792e-06, "loss": 17.9459, "step": 4463 }, { "epoch": 0.08159833293728408, "grad_norm": 5.875306457033801, "learning_rate": 9.9303944097695e-06, "loss": 17.3848, "step": 4464 }, { "epoch": 0.0816166121337306, "grad_norm": 6.196416695570449, "learning_rate": 9.930345180864606e-06, "loss": 17.3206, "step": 4465 }, { "epoch": 0.08163489133017712, "grad_norm": 6.784910250618494, "learning_rate": 9.93029593467928e-06, "loss": 17.3085, "step": 4466 }, { "epoch": 0.08165317052662364, "grad_norm": 7.625413552891611, "learning_rate": 9.930246671213693e-06, "loss": 18.3514, "step": 4467 }, { "epoch": 0.08167144972307018, "grad_norm": 5.547533240305542, "learning_rate": 9.93019739046802e-06, "loss": 17.0815, "step": 4468 }, { "epoch": 0.0816897289195167, "grad_norm": 7.210661771640025, "learning_rate": 9.930148092442433e-06, "loss": 17.8987, "step": 4469 }, { "epoch": 0.08170800811596322, "grad_norm": 8.65273559111609, "learning_rate": 9.930098777137104e-06, "loss": 18.4925, "step": 4470 }, { "epoch": 0.08172628731240975, "grad_norm": 8.22570860272434, "learning_rate": 9.930049444552207e-06, "loss": 18.1606, "step": 4471 }, { "epoch": 0.08174456650885627, "grad_norm": 7.6859657155698065, "learning_rate": 9.930000094687916e-06, "loss": 17.8538, "step": 4472 }, { "epoch": 0.08176284570530279, "grad_norm": 7.92076233119735, "learning_rate": 9.929950727544401e-06, "loss": 18.4207, "step": 4473 }, { "epoch": 0.08178112490174932, "grad_norm": 7.3580475160391945, "learning_rate": 9.929901343121838e-06, "loss": 17.9415, "step": 4474 }, { "epoch": 0.08179940409819585, "grad_norm": 9.13179251316065, "learning_rate": 9.929851941420396e-06, "loss": 18.4544, "step": 4475 }, { "epoch": 0.08181768329464237, "grad_norm": 7.910382887037255, "learning_rate": 9.929802522440252e-06, "loss": 18.353, "step": 4476 }, { "epoch": 0.08183596249108889, "grad_norm": 6.534012703103533, "learning_rate": 9.929753086181578e-06, "loss": 17.5061, "step": 4477 }, { "epoch": 0.08185424168753541, "grad_norm": 7.269820604266411, "learning_rate": 9.929703632644547e-06, "loss": 17.5173, "step": 4478 }, { "epoch": 0.08187252088398193, "grad_norm": 6.758277820192978, "learning_rate": 9.929654161829333e-06, "loss": 17.5249, "step": 4479 }, { "epoch": 0.08189080008042847, "grad_norm": 6.214356646569967, "learning_rate": 9.929604673736108e-06, "loss": 17.3733, "step": 4480 }, { "epoch": 0.08190907927687499, "grad_norm": 6.651755790691796, "learning_rate": 9.929555168365048e-06, "loss": 17.4953, "step": 4481 }, { "epoch": 0.08192735847332151, "grad_norm": 5.65275080863377, "learning_rate": 9.929505645716323e-06, "loss": 17.0668, "step": 4482 }, { "epoch": 0.08194563766976803, "grad_norm": 7.818691995099381, "learning_rate": 9.92945610579011e-06, "loss": 17.8689, "step": 4483 }, { "epoch": 0.08196391686621456, "grad_norm": 6.846748702401642, "learning_rate": 9.92940654858658e-06, "loss": 17.832, "step": 4484 }, { "epoch": 0.08198219606266109, "grad_norm": 6.774227618839608, "learning_rate": 9.929356974105909e-06, "loss": 17.4402, "step": 4485 }, { "epoch": 0.08200047525910761, "grad_norm": 7.18282719284545, "learning_rate": 9.92930738234827e-06, "loss": 17.7953, "step": 4486 }, { "epoch": 0.08201875445555414, "grad_norm": 7.10034438045775, "learning_rate": 9.929257773313834e-06, "loss": 17.7903, "step": 4487 }, { "epoch": 0.08203703365200066, "grad_norm": 8.918489618016627, "learning_rate": 9.92920814700278e-06, "loss": 18.1086, "step": 4488 }, { "epoch": 0.08205531284844718, "grad_norm": 7.117493121501945, "learning_rate": 9.929158503415277e-06, "loss": 17.6571, "step": 4489 }, { "epoch": 0.0820735920448937, "grad_norm": 8.701853632068818, "learning_rate": 9.929108842551502e-06, "loss": 18.5493, "step": 4490 }, { "epoch": 0.08209187124134024, "grad_norm": 7.033729075683853, "learning_rate": 9.929059164411627e-06, "loss": 17.6714, "step": 4491 }, { "epoch": 0.08211015043778676, "grad_norm": 7.01506236368206, "learning_rate": 9.92900946899583e-06, "loss": 17.5937, "step": 4492 }, { "epoch": 0.08212842963423328, "grad_norm": 7.028807544280329, "learning_rate": 9.92895975630428e-06, "loss": 17.6652, "step": 4493 }, { "epoch": 0.0821467088306798, "grad_norm": 6.729522681157575, "learning_rate": 9.928910026337154e-06, "loss": 17.4521, "step": 4494 }, { "epoch": 0.08216498802712632, "grad_norm": 8.344437666062118, "learning_rate": 9.928860279094628e-06, "loss": 17.9272, "step": 4495 }, { "epoch": 0.08218326722357285, "grad_norm": 6.37483648731223, "learning_rate": 9.928810514576874e-06, "loss": 17.2194, "step": 4496 }, { "epoch": 0.08220154642001938, "grad_norm": 6.102383968868863, "learning_rate": 9.928760732784067e-06, "loss": 17.4369, "step": 4497 }, { "epoch": 0.0822198256164659, "grad_norm": 6.770251894115572, "learning_rate": 9.928710933716379e-06, "loss": 17.6148, "step": 4498 }, { "epoch": 0.08223810481291242, "grad_norm": 7.365270009701503, "learning_rate": 9.92866111737399e-06, "loss": 17.896, "step": 4499 }, { "epoch": 0.08225638400935895, "grad_norm": 9.041320640724681, "learning_rate": 9.928611283757068e-06, "loss": 18.3787, "step": 4500 }, { "epoch": 0.08227466320580547, "grad_norm": 7.341942300548844, "learning_rate": 9.928561432865793e-06, "loss": 17.7244, "step": 4501 }, { "epoch": 0.082292942402252, "grad_norm": 9.128635582132091, "learning_rate": 9.928511564700339e-06, "loss": 18.6112, "step": 4502 }, { "epoch": 0.08231122159869853, "grad_norm": 8.78146881450675, "learning_rate": 9.928461679260877e-06, "loss": 18.7338, "step": 4503 }, { "epoch": 0.08232950079514505, "grad_norm": 8.036743843156453, "learning_rate": 9.928411776547587e-06, "loss": 17.936, "step": 4504 }, { "epoch": 0.08234777999159157, "grad_norm": 7.310140149812051, "learning_rate": 9.92836185656064e-06, "loss": 17.5116, "step": 4505 }, { "epoch": 0.08236605918803809, "grad_norm": 7.1757266381119, "learning_rate": 9.928311919300214e-06, "loss": 17.4799, "step": 4506 }, { "epoch": 0.08238433838448461, "grad_norm": 7.604737458119529, "learning_rate": 9.928261964766481e-06, "loss": 18.138, "step": 4507 }, { "epoch": 0.08240261758093115, "grad_norm": 8.247450984783418, "learning_rate": 9.928211992959617e-06, "loss": 18.3907, "step": 4508 }, { "epoch": 0.08242089677737767, "grad_norm": 7.957011173756499, "learning_rate": 9.928162003879797e-06, "loss": 18.2793, "step": 4509 }, { "epoch": 0.08243917597382419, "grad_norm": 6.8367147643001385, "learning_rate": 9.9281119975272e-06, "loss": 17.653, "step": 4510 }, { "epoch": 0.08245745517027071, "grad_norm": 7.324051785613831, "learning_rate": 9.928061973901995e-06, "loss": 17.7182, "step": 4511 }, { "epoch": 0.08247573436671723, "grad_norm": 6.355023708620587, "learning_rate": 9.928011933004363e-06, "loss": 17.3312, "step": 4512 }, { "epoch": 0.08249401356316376, "grad_norm": 7.022166479823654, "learning_rate": 9.927961874834473e-06, "loss": 17.8033, "step": 4513 }, { "epoch": 0.08251229275961029, "grad_norm": 6.956236792848833, "learning_rate": 9.927911799392506e-06, "loss": 17.7529, "step": 4514 }, { "epoch": 0.08253057195605681, "grad_norm": 8.524558072843963, "learning_rate": 9.927861706678638e-06, "loss": 18.4426, "step": 4515 }, { "epoch": 0.08254885115250334, "grad_norm": 7.456906784203291, "learning_rate": 9.92781159669304e-06, "loss": 18.0318, "step": 4516 }, { "epoch": 0.08256713034894986, "grad_norm": 6.328814492522303, "learning_rate": 9.92776146943589e-06, "loss": 17.3499, "step": 4517 }, { "epoch": 0.08258540954539638, "grad_norm": 6.642606672615187, "learning_rate": 9.927711324907366e-06, "loss": 17.5684, "step": 4518 }, { "epoch": 0.08260368874184291, "grad_norm": 7.2954811864296865, "learning_rate": 9.927661163107639e-06, "loss": 18.0425, "step": 4519 }, { "epoch": 0.08262196793828944, "grad_norm": 7.460025589824433, "learning_rate": 9.927610984036888e-06, "loss": 17.6947, "step": 4520 }, { "epoch": 0.08264024713473596, "grad_norm": 8.381247810702762, "learning_rate": 9.927560787695288e-06, "loss": 18.1759, "step": 4521 }, { "epoch": 0.08265852633118248, "grad_norm": 5.7085072531084355, "learning_rate": 9.927510574083014e-06, "loss": 17.1358, "step": 4522 }, { "epoch": 0.082676805527629, "grad_norm": 7.967265722628898, "learning_rate": 9.927460343200245e-06, "loss": 18.0888, "step": 4523 }, { "epoch": 0.08269508472407552, "grad_norm": 6.815552621381163, "learning_rate": 9.927410095047154e-06, "loss": 17.6987, "step": 4524 }, { "epoch": 0.08271336392052206, "grad_norm": 7.394571511538061, "learning_rate": 9.92735982962392e-06, "loss": 17.7943, "step": 4525 }, { "epoch": 0.08273164311696858, "grad_norm": 9.67717658562339, "learning_rate": 9.927309546930714e-06, "loss": 18.2716, "step": 4526 }, { "epoch": 0.0827499223134151, "grad_norm": 8.130599036793969, "learning_rate": 9.927259246967718e-06, "loss": 18.3201, "step": 4527 }, { "epoch": 0.08276820150986162, "grad_norm": 7.964373664485016, "learning_rate": 9.927208929735105e-06, "loss": 18.0592, "step": 4528 }, { "epoch": 0.08278648070630815, "grad_norm": 7.2245216886891495, "learning_rate": 9.927158595233053e-06, "loss": 17.7279, "step": 4529 }, { "epoch": 0.08280475990275467, "grad_norm": 7.0316490672981695, "learning_rate": 9.927108243461737e-06, "loss": 17.7399, "step": 4530 }, { "epoch": 0.0828230390992012, "grad_norm": 7.761029426910646, "learning_rate": 9.927057874421335e-06, "loss": 17.9445, "step": 4531 }, { "epoch": 0.08284131829564773, "grad_norm": 5.900261226061749, "learning_rate": 9.927007488112022e-06, "loss": 17.4243, "step": 4532 }, { "epoch": 0.08285959749209425, "grad_norm": 8.414529587707785, "learning_rate": 9.926957084533975e-06, "loss": 17.9991, "step": 4533 }, { "epoch": 0.08287787668854077, "grad_norm": 7.605849402215815, "learning_rate": 9.926906663687371e-06, "loss": 17.9538, "step": 4534 }, { "epoch": 0.08289615588498729, "grad_norm": 6.576826027627958, "learning_rate": 9.926856225572388e-06, "loss": 17.4712, "step": 4535 }, { "epoch": 0.08291443508143383, "grad_norm": 6.501336675316235, "learning_rate": 9.9268057701892e-06, "loss": 17.5895, "step": 4536 }, { "epoch": 0.08293271427788035, "grad_norm": 6.672729074948429, "learning_rate": 9.926755297537985e-06, "loss": 17.6235, "step": 4537 }, { "epoch": 0.08295099347432687, "grad_norm": 7.931759467513436, "learning_rate": 9.92670480761892e-06, "loss": 17.9637, "step": 4538 }, { "epoch": 0.08296927267077339, "grad_norm": 6.402769235168133, "learning_rate": 9.926654300432185e-06, "loss": 17.4368, "step": 4539 }, { "epoch": 0.08298755186721991, "grad_norm": 7.409572920926637, "learning_rate": 9.926603775977953e-06, "loss": 17.9268, "step": 4540 }, { "epoch": 0.08300583106366644, "grad_norm": 7.917990749667958, "learning_rate": 9.926553234256401e-06, "loss": 18.1149, "step": 4541 }, { "epoch": 0.08302411026011297, "grad_norm": 8.082940647734613, "learning_rate": 9.92650267526771e-06, "loss": 18.0548, "step": 4542 }, { "epoch": 0.08304238945655949, "grad_norm": 7.277137111479075, "learning_rate": 9.92645209901205e-06, "loss": 17.7861, "step": 4543 }, { "epoch": 0.08306066865300601, "grad_norm": 7.4733012873198685, "learning_rate": 9.926401505489606e-06, "loss": 18.0153, "step": 4544 }, { "epoch": 0.08307894784945254, "grad_norm": 7.9215653230932395, "learning_rate": 9.926350894700552e-06, "loss": 17.9856, "step": 4545 }, { "epoch": 0.08309722704589906, "grad_norm": 7.791247708572704, "learning_rate": 9.926300266645066e-06, "loss": 18.0828, "step": 4546 }, { "epoch": 0.08311550624234558, "grad_norm": 5.959070382662814, "learning_rate": 9.926249621323325e-06, "loss": 17.1554, "step": 4547 }, { "epoch": 0.08313378543879212, "grad_norm": 8.26889967906215, "learning_rate": 9.926198958735505e-06, "loss": 18.0522, "step": 4548 }, { "epoch": 0.08315206463523864, "grad_norm": 7.152983459665711, "learning_rate": 9.926148278881787e-06, "loss": 17.669, "step": 4549 }, { "epoch": 0.08317034383168516, "grad_norm": 8.371490963724002, "learning_rate": 9.926097581762345e-06, "loss": 18.0951, "step": 4550 }, { "epoch": 0.08318862302813168, "grad_norm": 6.357697406914699, "learning_rate": 9.92604686737736e-06, "loss": 17.2828, "step": 4551 }, { "epoch": 0.0832069022245782, "grad_norm": 7.768501300521136, "learning_rate": 9.925996135727008e-06, "loss": 17.9727, "step": 4552 }, { "epoch": 0.08322518142102474, "grad_norm": 9.06060336298556, "learning_rate": 9.925945386811465e-06, "loss": 18.6941, "step": 4553 }, { "epoch": 0.08324346061747126, "grad_norm": 7.674919283870002, "learning_rate": 9.925894620630913e-06, "loss": 17.8699, "step": 4554 }, { "epoch": 0.08326173981391778, "grad_norm": 7.289462358372745, "learning_rate": 9.925843837185527e-06, "loss": 17.9227, "step": 4555 }, { "epoch": 0.0832800190103643, "grad_norm": 7.090399647235003, "learning_rate": 9.925793036475487e-06, "loss": 17.5483, "step": 4556 }, { "epoch": 0.08329829820681083, "grad_norm": 7.85372437426703, "learning_rate": 9.925742218500968e-06, "loss": 18.0304, "step": 4557 }, { "epoch": 0.08331657740325735, "grad_norm": 7.387644049768619, "learning_rate": 9.92569138326215e-06, "loss": 17.8261, "step": 4558 }, { "epoch": 0.08333485659970388, "grad_norm": 8.319402741482605, "learning_rate": 9.925640530759213e-06, "loss": 18.2823, "step": 4559 }, { "epoch": 0.0833531357961504, "grad_norm": 7.572417281552823, "learning_rate": 9.925589660992332e-06, "loss": 17.8767, "step": 4560 }, { "epoch": 0.08337141499259693, "grad_norm": 8.27022005659377, "learning_rate": 9.925538773961687e-06, "loss": 18.3284, "step": 4561 }, { "epoch": 0.08338969418904345, "grad_norm": 7.35226779602324, "learning_rate": 9.925487869667456e-06, "loss": 17.6777, "step": 4562 }, { "epoch": 0.08340797338548997, "grad_norm": 8.234002551773756, "learning_rate": 9.925436948109817e-06, "loss": 17.6783, "step": 4563 }, { "epoch": 0.08342625258193649, "grad_norm": 8.303573018991921, "learning_rate": 9.92538600928895e-06, "loss": 18.1547, "step": 4564 }, { "epoch": 0.08344453177838303, "grad_norm": 7.785953917779978, "learning_rate": 9.925335053205032e-06, "loss": 18.1239, "step": 4565 }, { "epoch": 0.08346281097482955, "grad_norm": 6.477639244898419, "learning_rate": 9.925284079858242e-06, "loss": 17.4685, "step": 4566 }, { "epoch": 0.08348109017127607, "grad_norm": 7.880983959901846, "learning_rate": 9.925233089248758e-06, "loss": 17.9942, "step": 4567 }, { "epoch": 0.08349936936772259, "grad_norm": 7.570587483189525, "learning_rate": 9.92518208137676e-06, "loss": 17.9259, "step": 4568 }, { "epoch": 0.08351764856416911, "grad_norm": 6.457233257151615, "learning_rate": 9.925131056242426e-06, "loss": 17.7171, "step": 4569 }, { "epoch": 0.08353592776061565, "grad_norm": 5.575333496408933, "learning_rate": 9.925080013845935e-06, "loss": 17.086, "step": 4570 }, { "epoch": 0.08355420695706217, "grad_norm": 6.370758261416243, "learning_rate": 9.925028954187466e-06, "loss": 17.4101, "step": 4571 }, { "epoch": 0.0835724861535087, "grad_norm": 7.532208336609889, "learning_rate": 9.924977877267198e-06, "loss": 17.9726, "step": 4572 }, { "epoch": 0.08359076534995522, "grad_norm": 7.79379453646087, "learning_rate": 9.924926783085309e-06, "loss": 17.7622, "step": 4573 }, { "epoch": 0.08360904454640174, "grad_norm": 7.349686634113788, "learning_rate": 9.92487567164198e-06, "loss": 17.5063, "step": 4574 }, { "epoch": 0.08362732374284826, "grad_norm": 7.145670600779787, "learning_rate": 9.92482454293739e-06, "loss": 17.713, "step": 4575 }, { "epoch": 0.0836456029392948, "grad_norm": 7.5937590159500505, "learning_rate": 9.924773396971716e-06, "loss": 18.2291, "step": 4576 }, { "epoch": 0.08366388213574132, "grad_norm": 6.435686012792753, "learning_rate": 9.924722233745139e-06, "loss": 17.3644, "step": 4577 }, { "epoch": 0.08368216133218784, "grad_norm": 6.725851490081413, "learning_rate": 9.924671053257838e-06, "loss": 17.9066, "step": 4578 }, { "epoch": 0.08370044052863436, "grad_norm": 8.07687328527373, "learning_rate": 9.924619855509992e-06, "loss": 18.215, "step": 4579 }, { "epoch": 0.08371871972508088, "grad_norm": 7.684491657654788, "learning_rate": 9.92456864050178e-06, "loss": 18.1529, "step": 4580 }, { "epoch": 0.0837369989215274, "grad_norm": 6.807024820501327, "learning_rate": 9.924517408233385e-06, "loss": 17.4883, "step": 4581 }, { "epoch": 0.08375527811797394, "grad_norm": 7.203634154142472, "learning_rate": 9.92446615870498e-06, "loss": 18.0135, "step": 4582 }, { "epoch": 0.08377355731442046, "grad_norm": 6.9425479112905855, "learning_rate": 9.924414891916752e-06, "loss": 17.6753, "step": 4583 }, { "epoch": 0.08379183651086698, "grad_norm": 10.335705400356025, "learning_rate": 9.924363607868875e-06, "loss": 18.7902, "step": 4584 }, { "epoch": 0.0838101157073135, "grad_norm": 6.551381299515295, "learning_rate": 9.92431230656153e-06, "loss": 17.2856, "step": 4585 }, { "epoch": 0.08382839490376003, "grad_norm": 6.193330792540202, "learning_rate": 9.9242609879949e-06, "loss": 17.3872, "step": 4586 }, { "epoch": 0.08384667410020656, "grad_norm": 7.093456749267061, "learning_rate": 9.92420965216916e-06, "loss": 17.6227, "step": 4587 }, { "epoch": 0.08386495329665308, "grad_norm": 6.386760426569066, "learning_rate": 9.924158299084497e-06, "loss": 17.5053, "step": 4588 }, { "epoch": 0.0838832324930996, "grad_norm": 7.250432946564252, "learning_rate": 9.924106928741081e-06, "loss": 17.8459, "step": 4589 }, { "epoch": 0.08390151168954613, "grad_norm": 8.420710002988253, "learning_rate": 9.924055541139101e-06, "loss": 18.6423, "step": 4590 }, { "epoch": 0.08391979088599265, "grad_norm": 7.091370597423831, "learning_rate": 9.924004136278734e-06, "loss": 17.7346, "step": 4591 }, { "epoch": 0.08393807008243917, "grad_norm": 7.407183416785795, "learning_rate": 9.923952714160158e-06, "loss": 17.9074, "step": 4592 }, { "epoch": 0.0839563492788857, "grad_norm": 6.229878431683514, "learning_rate": 9.923901274783556e-06, "loss": 17.4906, "step": 4593 }, { "epoch": 0.08397462847533223, "grad_norm": 6.24420542102906, "learning_rate": 9.923849818149106e-06, "loss": 17.3914, "step": 4594 }, { "epoch": 0.08399290767177875, "grad_norm": 7.694442797149234, "learning_rate": 9.92379834425699e-06, "loss": 17.9646, "step": 4595 }, { "epoch": 0.08401118686822527, "grad_norm": 7.534197946991661, "learning_rate": 9.923746853107389e-06, "loss": 17.7066, "step": 4596 }, { "epoch": 0.08402946606467179, "grad_norm": 7.5512725979457676, "learning_rate": 9.92369534470048e-06, "loss": 17.7111, "step": 4597 }, { "epoch": 0.08404774526111831, "grad_norm": 6.850704097721616, "learning_rate": 9.92364381903645e-06, "loss": 17.7127, "step": 4598 }, { "epoch": 0.08406602445756485, "grad_norm": 7.458877663742378, "learning_rate": 9.923592276115474e-06, "loss": 17.9754, "step": 4599 }, { "epoch": 0.08408430365401137, "grad_norm": 7.719899131278976, "learning_rate": 9.923540715937734e-06, "loss": 17.8802, "step": 4600 }, { "epoch": 0.0841025828504579, "grad_norm": 7.38570962640599, "learning_rate": 9.92348913850341e-06, "loss": 17.7526, "step": 4601 }, { "epoch": 0.08412086204690442, "grad_norm": 7.310302922438322, "learning_rate": 9.923437543812686e-06, "loss": 18.011, "step": 4602 }, { "epoch": 0.08413914124335094, "grad_norm": 7.199501970253766, "learning_rate": 9.92338593186574e-06, "loss": 17.6991, "step": 4603 }, { "epoch": 0.08415742043979747, "grad_norm": 7.660553506333359, "learning_rate": 9.923334302662753e-06, "loss": 18.1197, "step": 4604 }, { "epoch": 0.084175699636244, "grad_norm": 7.4994971748351285, "learning_rate": 9.923282656203906e-06, "loss": 17.9303, "step": 4605 }, { "epoch": 0.08419397883269052, "grad_norm": 7.022139786012747, "learning_rate": 9.92323099248938e-06, "loss": 17.6392, "step": 4606 }, { "epoch": 0.08421225802913704, "grad_norm": 6.842919258651133, "learning_rate": 9.92317931151936e-06, "loss": 17.9257, "step": 4607 }, { "epoch": 0.08423053722558356, "grad_norm": 6.455226973441174, "learning_rate": 9.923127613294021e-06, "loss": 17.3945, "step": 4608 }, { "epoch": 0.08424881642203008, "grad_norm": 7.221120648143459, "learning_rate": 9.923075897813548e-06, "loss": 17.7731, "step": 4609 }, { "epoch": 0.08426709561847662, "grad_norm": 7.568262938606192, "learning_rate": 9.92302416507812e-06, "loss": 18.06, "step": 4610 }, { "epoch": 0.08428537481492314, "grad_norm": 6.058389900149614, "learning_rate": 9.92297241508792e-06, "loss": 17.4748, "step": 4611 }, { "epoch": 0.08430365401136966, "grad_norm": 7.583642015596835, "learning_rate": 9.922920647843128e-06, "loss": 17.8556, "step": 4612 }, { "epoch": 0.08432193320781618, "grad_norm": 6.698944587393052, "learning_rate": 9.922868863343929e-06, "loss": 17.72, "step": 4613 }, { "epoch": 0.0843402124042627, "grad_norm": 7.125979338649392, "learning_rate": 9.922817061590499e-06, "loss": 17.6758, "step": 4614 }, { "epoch": 0.08435849160070923, "grad_norm": 8.228399202299315, "learning_rate": 9.922765242583023e-06, "loss": 18.3639, "step": 4615 }, { "epoch": 0.08437677079715576, "grad_norm": 9.651616808629765, "learning_rate": 9.922713406321684e-06, "loss": 18.5995, "step": 4616 }, { "epoch": 0.08439504999360228, "grad_norm": 6.877989458037605, "learning_rate": 9.92266155280666e-06, "loss": 17.4893, "step": 4617 }, { "epoch": 0.0844133291900488, "grad_norm": 6.939083593763451, "learning_rate": 9.922609682038134e-06, "loss": 17.2929, "step": 4618 }, { "epoch": 0.08443160838649533, "grad_norm": 8.447030077035594, "learning_rate": 9.92255779401629e-06, "loss": 18.0655, "step": 4619 }, { "epoch": 0.08444988758294185, "grad_norm": 7.385349730703015, "learning_rate": 9.922505888741306e-06, "loss": 17.8132, "step": 4620 }, { "epoch": 0.08446816677938838, "grad_norm": 7.611129849504548, "learning_rate": 9.922453966213366e-06, "loss": 17.9683, "step": 4621 }, { "epoch": 0.0844864459758349, "grad_norm": 6.948264508071709, "learning_rate": 9.922402026432653e-06, "loss": 17.9001, "step": 4622 }, { "epoch": 0.08450472517228143, "grad_norm": 7.269876331297846, "learning_rate": 9.922350069399349e-06, "loss": 17.7451, "step": 4623 }, { "epoch": 0.08452300436872795, "grad_norm": 8.361984673894524, "learning_rate": 9.922298095113634e-06, "loss": 17.8335, "step": 4624 }, { "epoch": 0.08454128356517447, "grad_norm": 6.842570747235827, "learning_rate": 9.922246103575692e-06, "loss": 17.7474, "step": 4625 }, { "epoch": 0.084559562761621, "grad_norm": 7.79425463051433, "learning_rate": 9.922194094785704e-06, "loss": 18.3362, "step": 4626 }, { "epoch": 0.08457784195806753, "grad_norm": 7.305390359806515, "learning_rate": 9.922142068743852e-06, "loss": 17.7493, "step": 4627 }, { "epoch": 0.08459612115451405, "grad_norm": 7.481217680342079, "learning_rate": 9.92209002545032e-06, "loss": 17.9787, "step": 4628 }, { "epoch": 0.08461440035096057, "grad_norm": 7.037488075031782, "learning_rate": 9.92203796490529e-06, "loss": 17.7031, "step": 4629 }, { "epoch": 0.0846326795474071, "grad_norm": 7.394315921256108, "learning_rate": 9.921985887108944e-06, "loss": 18.1961, "step": 4630 }, { "epoch": 0.08465095874385362, "grad_norm": 7.456021612583716, "learning_rate": 9.921933792061464e-06, "loss": 17.7568, "step": 4631 }, { "epoch": 0.08466923794030014, "grad_norm": 6.620630947578043, "learning_rate": 9.921881679763033e-06, "loss": 17.6499, "step": 4632 }, { "epoch": 0.08468751713674667, "grad_norm": 6.172522661410363, "learning_rate": 9.921829550213834e-06, "loss": 17.5983, "step": 4633 }, { "epoch": 0.0847057963331932, "grad_norm": 7.4829460278418996, "learning_rate": 9.92177740341405e-06, "loss": 18.0093, "step": 4634 }, { "epoch": 0.08472407552963972, "grad_norm": 6.460635217610971, "learning_rate": 9.921725239363862e-06, "loss": 17.4151, "step": 4635 }, { "epoch": 0.08474235472608624, "grad_norm": 7.598256342533212, "learning_rate": 9.921673058063456e-06, "loss": 17.6448, "step": 4636 }, { "epoch": 0.08476063392253276, "grad_norm": 7.915591943281325, "learning_rate": 9.921620859513012e-06, "loss": 17.9776, "step": 4637 }, { "epoch": 0.0847789131189793, "grad_norm": 6.605683142047098, "learning_rate": 9.921568643712715e-06, "loss": 17.6836, "step": 4638 }, { "epoch": 0.08479719231542582, "grad_norm": 7.004548467830464, "learning_rate": 9.921516410662745e-06, "loss": 17.6931, "step": 4639 }, { "epoch": 0.08481547151187234, "grad_norm": 7.748531232232952, "learning_rate": 9.921464160363288e-06, "loss": 18.0534, "step": 4640 }, { "epoch": 0.08483375070831886, "grad_norm": 6.109900033572359, "learning_rate": 9.921411892814527e-06, "loss": 17.3033, "step": 4641 }, { "epoch": 0.08485202990476538, "grad_norm": 6.663640655959265, "learning_rate": 9.921359608016644e-06, "loss": 17.5133, "step": 4642 }, { "epoch": 0.0848703091012119, "grad_norm": 8.913487294427815, "learning_rate": 9.921307305969822e-06, "loss": 18.4524, "step": 4643 }, { "epoch": 0.08488858829765844, "grad_norm": 8.592237863143653, "learning_rate": 9.921254986674245e-06, "loss": 18.2266, "step": 4644 }, { "epoch": 0.08490686749410496, "grad_norm": 7.136635747172818, "learning_rate": 9.921202650130098e-06, "loss": 17.7482, "step": 4645 }, { "epoch": 0.08492514669055148, "grad_norm": 9.632606101164654, "learning_rate": 9.92115029633756e-06, "loss": 19.086, "step": 4646 }, { "epoch": 0.084943425886998, "grad_norm": 7.265895092754076, "learning_rate": 9.921097925296819e-06, "loss": 17.9276, "step": 4647 }, { "epoch": 0.08496170508344453, "grad_norm": 7.297272946366155, "learning_rate": 9.921045537008057e-06, "loss": 17.635, "step": 4648 }, { "epoch": 0.08497998427989105, "grad_norm": 6.623038852274819, "learning_rate": 9.920993131471456e-06, "loss": 17.5517, "step": 4649 }, { "epoch": 0.08499826347633758, "grad_norm": 7.517246150877132, "learning_rate": 9.920940708687201e-06, "loss": 17.8812, "step": 4650 }, { "epoch": 0.0850165426727841, "grad_norm": 7.6787574631385676, "learning_rate": 9.920888268655477e-06, "loss": 18.1996, "step": 4651 }, { "epoch": 0.08503482186923063, "grad_norm": 9.662814240443105, "learning_rate": 9.920835811376468e-06, "loss": 18.6466, "step": 4652 }, { "epoch": 0.08505310106567715, "grad_norm": 7.901212720931815, "learning_rate": 9.920783336850353e-06, "loss": 18.3602, "step": 4653 }, { "epoch": 0.08507138026212367, "grad_norm": 6.018024588978739, "learning_rate": 9.920730845077323e-06, "loss": 17.4173, "step": 4654 }, { "epoch": 0.08508965945857021, "grad_norm": 6.6539443475311835, "learning_rate": 9.920678336057555e-06, "loss": 17.5248, "step": 4655 }, { "epoch": 0.08510793865501673, "grad_norm": 7.04117038173876, "learning_rate": 9.920625809791237e-06, "loss": 17.7653, "step": 4656 }, { "epoch": 0.08512621785146325, "grad_norm": 6.725694182599704, "learning_rate": 9.920573266278555e-06, "loss": 17.6774, "step": 4657 }, { "epoch": 0.08514449704790977, "grad_norm": 7.5109578583494585, "learning_rate": 9.920520705519689e-06, "loss": 18.0893, "step": 4658 }, { "epoch": 0.0851627762443563, "grad_norm": 8.935053293856278, "learning_rate": 9.920468127514827e-06, "loss": 18.1475, "step": 4659 }, { "epoch": 0.08518105544080282, "grad_norm": 6.195186641273887, "learning_rate": 9.920415532264149e-06, "loss": 17.4453, "step": 4660 }, { "epoch": 0.08519933463724935, "grad_norm": 7.545760255505536, "learning_rate": 9.920362919767841e-06, "loss": 18.1785, "step": 4661 }, { "epoch": 0.08521761383369587, "grad_norm": 6.711386547356334, "learning_rate": 9.92031029002609e-06, "loss": 17.3788, "step": 4662 }, { "epoch": 0.0852358930301424, "grad_norm": 7.660182236696144, "learning_rate": 9.920257643039076e-06, "loss": 18.1192, "step": 4663 }, { "epoch": 0.08525417222658892, "grad_norm": 7.581371210557854, "learning_rate": 9.920204978806987e-06, "loss": 18.0288, "step": 4664 }, { "epoch": 0.08527245142303544, "grad_norm": 6.448097302083825, "learning_rate": 9.920152297330009e-06, "loss": 17.3281, "step": 4665 }, { "epoch": 0.08529073061948196, "grad_norm": 8.203186645850211, "learning_rate": 9.920099598608318e-06, "loss": 18.4436, "step": 4666 }, { "epoch": 0.0853090098159285, "grad_norm": 6.542271610026919, "learning_rate": 9.920046882642111e-06, "loss": 17.41, "step": 4667 }, { "epoch": 0.08532728901237502, "grad_norm": 8.341157410091265, "learning_rate": 9.919994149431564e-06, "loss": 18.218, "step": 4668 }, { "epoch": 0.08534556820882154, "grad_norm": 7.659982388627232, "learning_rate": 9.919941398976864e-06, "loss": 18.1263, "step": 4669 }, { "epoch": 0.08536384740526806, "grad_norm": 7.88822957890354, "learning_rate": 9.919888631278199e-06, "loss": 18.1774, "step": 4670 }, { "epoch": 0.08538212660171458, "grad_norm": 7.882926322531679, "learning_rate": 9.919835846335748e-06, "loss": 18.1639, "step": 4671 }, { "epoch": 0.08540040579816112, "grad_norm": 6.636317951245712, "learning_rate": 9.919783044149701e-06, "loss": 17.3923, "step": 4672 }, { "epoch": 0.08541868499460764, "grad_norm": 6.930030128289028, "learning_rate": 9.919730224720241e-06, "loss": 17.5655, "step": 4673 }, { "epoch": 0.08543696419105416, "grad_norm": 7.387145584405131, "learning_rate": 9.919677388047553e-06, "loss": 18.1185, "step": 4674 }, { "epoch": 0.08545524338750068, "grad_norm": 7.546342043076622, "learning_rate": 9.919624534131824e-06, "loss": 17.9868, "step": 4675 }, { "epoch": 0.0854735225839472, "grad_norm": 6.770470122143234, "learning_rate": 9.919571662973238e-06, "loss": 17.5021, "step": 4676 }, { "epoch": 0.08549180178039373, "grad_norm": 6.804411522980926, "learning_rate": 9.919518774571979e-06, "loss": 17.7389, "step": 4677 }, { "epoch": 0.08551008097684026, "grad_norm": 8.267455277026194, "learning_rate": 9.919465868928234e-06, "loss": 18.3289, "step": 4678 }, { "epoch": 0.08552836017328679, "grad_norm": 8.291586811682983, "learning_rate": 9.919412946042186e-06, "loss": 18.2858, "step": 4679 }, { "epoch": 0.08554663936973331, "grad_norm": 7.4112105963364945, "learning_rate": 9.919360005914026e-06, "loss": 17.9025, "step": 4680 }, { "epoch": 0.08556491856617983, "grad_norm": 6.3738661114120285, "learning_rate": 9.919307048543933e-06, "loss": 17.4316, "step": 4681 }, { "epoch": 0.08558319776262635, "grad_norm": 6.371580144636659, "learning_rate": 9.9192540739321e-06, "loss": 17.38, "step": 4682 }, { "epoch": 0.08560147695907287, "grad_norm": 7.31919105880218, "learning_rate": 9.919201082078703e-06, "loss": 17.9746, "step": 4683 }, { "epoch": 0.08561975615551941, "grad_norm": 8.388661511336576, "learning_rate": 9.919148072983936e-06, "loss": 18.3804, "step": 4684 }, { "epoch": 0.08563803535196593, "grad_norm": 8.085792428675228, "learning_rate": 9.919095046647982e-06, "loss": 18.1638, "step": 4685 }, { "epoch": 0.08565631454841245, "grad_norm": 5.777711832221259, "learning_rate": 9.919042003071026e-06, "loss": 16.9436, "step": 4686 }, { "epoch": 0.08567459374485897, "grad_norm": 6.629114929315756, "learning_rate": 9.918988942253254e-06, "loss": 17.4534, "step": 4687 }, { "epoch": 0.0856928729413055, "grad_norm": 6.559108221099402, "learning_rate": 9.918935864194855e-06, "loss": 17.2541, "step": 4688 }, { "epoch": 0.08571115213775203, "grad_norm": 6.288491885844113, "learning_rate": 9.91888276889601e-06, "loss": 17.3603, "step": 4689 }, { "epoch": 0.08572943133419855, "grad_norm": 7.1051717030267465, "learning_rate": 9.918829656356908e-06, "loss": 17.7719, "step": 4690 }, { "epoch": 0.08574771053064507, "grad_norm": 8.21598696323983, "learning_rate": 9.918776526577735e-06, "loss": 18.1435, "step": 4691 }, { "epoch": 0.0857659897270916, "grad_norm": 6.7548119803895865, "learning_rate": 9.918723379558679e-06, "loss": 17.4835, "step": 4692 }, { "epoch": 0.08578426892353812, "grad_norm": 10.160157657831514, "learning_rate": 9.918670215299924e-06, "loss": 18.7581, "step": 4693 }, { "epoch": 0.08580254811998464, "grad_norm": 6.750081472867717, "learning_rate": 9.918617033801654e-06, "loss": 17.4776, "step": 4694 }, { "epoch": 0.08582082731643118, "grad_norm": 6.571634199227151, "learning_rate": 9.91856383506406e-06, "loss": 17.4038, "step": 4695 }, { "epoch": 0.0858391065128777, "grad_norm": 6.569445575180033, "learning_rate": 9.918510619087327e-06, "loss": 17.3845, "step": 4696 }, { "epoch": 0.08585738570932422, "grad_norm": 8.874425126433062, "learning_rate": 9.91845738587164e-06, "loss": 18.4588, "step": 4697 }, { "epoch": 0.08587566490577074, "grad_norm": 7.072914521496579, "learning_rate": 9.918404135417187e-06, "loss": 17.528, "step": 4698 }, { "epoch": 0.08589394410221726, "grad_norm": 8.364495270293883, "learning_rate": 9.918350867724156e-06, "loss": 18.5498, "step": 4699 }, { "epoch": 0.08591222329866378, "grad_norm": 7.461258301437727, "learning_rate": 9.91829758279273e-06, "loss": 17.431, "step": 4700 }, { "epoch": 0.08593050249511032, "grad_norm": 7.5937863251442375, "learning_rate": 9.9182442806231e-06, "loss": 17.7866, "step": 4701 }, { "epoch": 0.08594878169155684, "grad_norm": 8.314622980150814, "learning_rate": 9.918190961215449e-06, "loss": 18.5394, "step": 4702 }, { "epoch": 0.08596706088800336, "grad_norm": 7.14383782736478, "learning_rate": 9.918137624569964e-06, "loss": 17.626, "step": 4703 }, { "epoch": 0.08598534008444988, "grad_norm": 8.670159331909955, "learning_rate": 9.918084270686836e-06, "loss": 18.3442, "step": 4704 }, { "epoch": 0.0860036192808964, "grad_norm": 6.101264289550609, "learning_rate": 9.918030899566247e-06, "loss": 17.3995, "step": 4705 }, { "epoch": 0.08602189847734294, "grad_norm": 6.313456545793547, "learning_rate": 9.917977511208388e-06, "loss": 17.3159, "step": 4706 }, { "epoch": 0.08604017767378946, "grad_norm": 6.812465386859783, "learning_rate": 9.917924105613444e-06, "loss": 17.5959, "step": 4707 }, { "epoch": 0.08605845687023599, "grad_norm": 7.838779619179673, "learning_rate": 9.917870682781604e-06, "loss": 17.8477, "step": 4708 }, { "epoch": 0.08607673606668251, "grad_norm": 6.076232743193538, "learning_rate": 9.917817242713052e-06, "loss": 17.3116, "step": 4709 }, { "epoch": 0.08609501526312903, "grad_norm": 7.855771540727316, "learning_rate": 9.917763785407979e-06, "loss": 17.781, "step": 4710 }, { "epoch": 0.08611329445957555, "grad_norm": 8.143194125428792, "learning_rate": 9.917710310866571e-06, "loss": 18.1333, "step": 4711 }, { "epoch": 0.08613157365602209, "grad_norm": 7.099837426909703, "learning_rate": 9.917656819089013e-06, "loss": 17.6271, "step": 4712 }, { "epoch": 0.08614985285246861, "grad_norm": 8.042551395431243, "learning_rate": 9.917603310075497e-06, "loss": 18.2618, "step": 4713 }, { "epoch": 0.08616813204891513, "grad_norm": 8.082796459586074, "learning_rate": 9.917549783826205e-06, "loss": 17.8096, "step": 4714 }, { "epoch": 0.08618641124536165, "grad_norm": 6.055973030126549, "learning_rate": 9.917496240341332e-06, "loss": 17.377, "step": 4715 }, { "epoch": 0.08620469044180817, "grad_norm": 7.261066096383178, "learning_rate": 9.917442679621057e-06, "loss": 17.9038, "step": 4716 }, { "epoch": 0.0862229696382547, "grad_norm": 7.210584710222747, "learning_rate": 9.917389101665574e-06, "loss": 17.4521, "step": 4717 }, { "epoch": 0.08624124883470123, "grad_norm": 7.603368845283619, "learning_rate": 9.91733550647507e-06, "loss": 17.5343, "step": 4718 }, { "epoch": 0.08625952803114775, "grad_norm": 6.967903521521943, "learning_rate": 9.91728189404973e-06, "loss": 17.827, "step": 4719 }, { "epoch": 0.08627780722759427, "grad_norm": 7.427687752088161, "learning_rate": 9.917228264389744e-06, "loss": 18.0324, "step": 4720 }, { "epoch": 0.0862960864240408, "grad_norm": 6.799035386057385, "learning_rate": 9.9171746174953e-06, "loss": 17.6637, "step": 4721 }, { "epoch": 0.08631436562048732, "grad_norm": 8.780168374615268, "learning_rate": 9.917120953366585e-06, "loss": 18.5605, "step": 4722 }, { "epoch": 0.08633264481693385, "grad_norm": 7.326438217879566, "learning_rate": 9.917067272003789e-06, "loss": 17.785, "step": 4723 }, { "epoch": 0.08635092401338038, "grad_norm": 6.52323903292793, "learning_rate": 9.9170135734071e-06, "loss": 17.4051, "step": 4724 }, { "epoch": 0.0863692032098269, "grad_norm": 11.463202013983967, "learning_rate": 9.916959857576703e-06, "loss": 19.7018, "step": 4725 }, { "epoch": 0.08638748240627342, "grad_norm": 8.491255105504997, "learning_rate": 9.916906124512789e-06, "loss": 18.0327, "step": 4726 }, { "epoch": 0.08640576160271994, "grad_norm": 6.7005263588979265, "learning_rate": 9.916852374215545e-06, "loss": 17.5082, "step": 4727 }, { "epoch": 0.08642404079916646, "grad_norm": 7.021664812018635, "learning_rate": 9.91679860668516e-06, "loss": 17.7016, "step": 4728 }, { "epoch": 0.086442319995613, "grad_norm": 7.7617505641899225, "learning_rate": 9.916744821921824e-06, "loss": 18.0579, "step": 4729 }, { "epoch": 0.08646059919205952, "grad_norm": 6.873010943974916, "learning_rate": 9.916691019925723e-06, "loss": 17.5972, "step": 4730 }, { "epoch": 0.08647887838850604, "grad_norm": 6.5318766190601645, "learning_rate": 9.916637200697047e-06, "loss": 17.35, "step": 4731 }, { "epoch": 0.08649715758495256, "grad_norm": 6.739330700608552, "learning_rate": 9.916583364235985e-06, "loss": 17.579, "step": 4732 }, { "epoch": 0.08651543678139909, "grad_norm": 7.6456354628620975, "learning_rate": 9.916529510542722e-06, "loss": 18.0121, "step": 4733 }, { "epoch": 0.08653371597784561, "grad_norm": 7.915953905378684, "learning_rate": 9.916475639617454e-06, "loss": 18.1782, "step": 4734 }, { "epoch": 0.08655199517429214, "grad_norm": 7.95960802520631, "learning_rate": 9.916421751460363e-06, "loss": 17.9121, "step": 4735 }, { "epoch": 0.08657027437073866, "grad_norm": 7.2095480650830615, "learning_rate": 9.91636784607164e-06, "loss": 17.8019, "step": 4736 }, { "epoch": 0.08658855356718519, "grad_norm": 7.394382143574761, "learning_rate": 9.916313923451475e-06, "loss": 17.7964, "step": 4737 }, { "epoch": 0.08660683276363171, "grad_norm": 8.569244455072077, "learning_rate": 9.916259983600056e-06, "loss": 18.3305, "step": 4738 }, { "epoch": 0.08662511196007823, "grad_norm": 7.378206640272703, "learning_rate": 9.916206026517572e-06, "loss": 17.888, "step": 4739 }, { "epoch": 0.08664339115652477, "grad_norm": 8.314015635870948, "learning_rate": 9.916152052204215e-06, "loss": 18.3373, "step": 4740 }, { "epoch": 0.08666167035297129, "grad_norm": 5.5408637013582505, "learning_rate": 9.916098060660169e-06, "loss": 17.0137, "step": 4741 }, { "epoch": 0.08667994954941781, "grad_norm": 6.79331002480117, "learning_rate": 9.916044051885627e-06, "loss": 17.6932, "step": 4742 }, { "epoch": 0.08669822874586433, "grad_norm": 7.744255218459719, "learning_rate": 9.915990025880777e-06, "loss": 17.5147, "step": 4743 }, { "epoch": 0.08671650794231085, "grad_norm": 6.530015917386727, "learning_rate": 9.915935982645807e-06, "loss": 17.4595, "step": 4744 }, { "epoch": 0.08673478713875737, "grad_norm": 10.463815575400252, "learning_rate": 9.915881922180911e-06, "loss": 18.8998, "step": 4745 }, { "epoch": 0.08675306633520391, "grad_norm": 7.452022149867731, "learning_rate": 9.915827844486275e-06, "loss": 18.0894, "step": 4746 }, { "epoch": 0.08677134553165043, "grad_norm": 6.207915039399545, "learning_rate": 9.915773749562086e-06, "loss": 17.3329, "step": 4747 }, { "epoch": 0.08678962472809695, "grad_norm": 7.126148411356433, "learning_rate": 9.915719637408538e-06, "loss": 17.8531, "step": 4748 }, { "epoch": 0.08680790392454348, "grad_norm": 8.512808138400677, "learning_rate": 9.91566550802582e-06, "loss": 18.3305, "step": 4749 }, { "epoch": 0.08682618312099, "grad_norm": 7.792977232722275, "learning_rate": 9.91561136141412e-06, "loss": 18.0873, "step": 4750 }, { "epoch": 0.08684446231743652, "grad_norm": 6.565287707543799, "learning_rate": 9.915557197573631e-06, "loss": 17.6469, "step": 4751 }, { "epoch": 0.08686274151388305, "grad_norm": 8.4981894700173, "learning_rate": 9.915503016504539e-06, "loss": 18.4064, "step": 4752 }, { "epoch": 0.08688102071032958, "grad_norm": 6.643736434631383, "learning_rate": 9.915448818207035e-06, "loss": 17.5126, "step": 4753 }, { "epoch": 0.0868992999067761, "grad_norm": 6.366278565981884, "learning_rate": 9.91539460268131e-06, "loss": 17.3545, "step": 4754 }, { "epoch": 0.08691757910322262, "grad_norm": 6.333523566708861, "learning_rate": 9.915340369927553e-06, "loss": 17.4884, "step": 4755 }, { "epoch": 0.08693585829966914, "grad_norm": 7.293604589598897, "learning_rate": 9.915286119945955e-06, "loss": 17.9825, "step": 4756 }, { "epoch": 0.08695413749611568, "grad_norm": 6.0694639902843495, "learning_rate": 9.915231852736707e-06, "loss": 17.5076, "step": 4757 }, { "epoch": 0.0869724166925622, "grad_norm": 6.599357207289291, "learning_rate": 9.915177568299995e-06, "loss": 17.5141, "step": 4758 }, { "epoch": 0.08699069588900872, "grad_norm": 6.2672771878372595, "learning_rate": 9.915123266636013e-06, "loss": 17.5482, "step": 4759 }, { "epoch": 0.08700897508545524, "grad_norm": 7.239215769355867, "learning_rate": 9.915068947744953e-06, "loss": 17.615, "step": 4760 }, { "epoch": 0.08702725428190176, "grad_norm": 7.145135515623931, "learning_rate": 9.915014611627e-06, "loss": 17.8667, "step": 4761 }, { "epoch": 0.08704553347834829, "grad_norm": 7.1987700167538975, "learning_rate": 9.914960258282348e-06, "loss": 17.9123, "step": 4762 }, { "epoch": 0.08706381267479482, "grad_norm": 6.962138340360233, "learning_rate": 9.914905887711187e-06, "loss": 17.4851, "step": 4763 }, { "epoch": 0.08708209187124134, "grad_norm": 6.647227321497944, "learning_rate": 9.914851499913707e-06, "loss": 17.4062, "step": 4764 }, { "epoch": 0.08710037106768787, "grad_norm": 6.563736064089683, "learning_rate": 9.9147970948901e-06, "loss": 17.3302, "step": 4765 }, { "epoch": 0.08711865026413439, "grad_norm": 8.042800811705872, "learning_rate": 9.914742672640554e-06, "loss": 18.0595, "step": 4766 }, { "epoch": 0.08713692946058091, "grad_norm": 7.16015375190771, "learning_rate": 9.914688233165262e-06, "loss": 17.533, "step": 4767 }, { "epoch": 0.08715520865702743, "grad_norm": 8.225380882641392, "learning_rate": 9.914633776464415e-06, "loss": 17.943, "step": 4768 }, { "epoch": 0.08717348785347397, "grad_norm": 7.895018666215797, "learning_rate": 9.914579302538203e-06, "loss": 18.1833, "step": 4769 }, { "epoch": 0.08719176704992049, "grad_norm": 8.523766580365836, "learning_rate": 9.914524811386816e-06, "loss": 18.2362, "step": 4770 }, { "epoch": 0.08721004624636701, "grad_norm": 8.483258717599346, "learning_rate": 9.914470303010447e-06, "loss": 17.8643, "step": 4771 }, { "epoch": 0.08722832544281353, "grad_norm": 7.704944567254081, "learning_rate": 9.914415777409286e-06, "loss": 18.081, "step": 4772 }, { "epoch": 0.08724660463926005, "grad_norm": 7.574746124933421, "learning_rate": 9.914361234583524e-06, "loss": 17.983, "step": 4773 }, { "epoch": 0.08726488383570659, "grad_norm": 6.792623067204275, "learning_rate": 9.914306674533352e-06, "loss": 17.5434, "step": 4774 }, { "epoch": 0.08728316303215311, "grad_norm": 8.007057542549422, "learning_rate": 9.914252097258964e-06, "loss": 18.0533, "step": 4775 }, { "epoch": 0.08730144222859963, "grad_norm": 8.021933047667138, "learning_rate": 9.914197502760545e-06, "loss": 18.1056, "step": 4776 }, { "epoch": 0.08731972142504615, "grad_norm": 7.911876140707684, "learning_rate": 9.914142891038291e-06, "loss": 18.0806, "step": 4777 }, { "epoch": 0.08733800062149268, "grad_norm": 7.287706255735107, "learning_rate": 9.914088262092393e-06, "loss": 17.8173, "step": 4778 }, { "epoch": 0.0873562798179392, "grad_norm": 8.259683811458032, "learning_rate": 9.914033615923044e-06, "loss": 18.1553, "step": 4779 }, { "epoch": 0.08737455901438573, "grad_norm": 8.524263562062561, "learning_rate": 9.913978952530432e-06, "loss": 18.0442, "step": 4780 }, { "epoch": 0.08739283821083225, "grad_norm": 6.98562030134132, "learning_rate": 9.91392427191475e-06, "loss": 17.7846, "step": 4781 }, { "epoch": 0.08741111740727878, "grad_norm": 7.762663006939629, "learning_rate": 9.913869574076189e-06, "loss": 18.0809, "step": 4782 }, { "epoch": 0.0874293966037253, "grad_norm": 7.434293300734314, "learning_rate": 9.913814859014943e-06, "loss": 18.0468, "step": 4783 }, { "epoch": 0.08744767580017182, "grad_norm": 8.033458339692666, "learning_rate": 9.913760126731201e-06, "loss": 18.3616, "step": 4784 }, { "epoch": 0.08746595499661834, "grad_norm": 6.832057399654449, "learning_rate": 9.913705377225157e-06, "loss": 17.6117, "step": 4785 }, { "epoch": 0.08748423419306488, "grad_norm": 6.430135623362784, "learning_rate": 9.913650610497002e-06, "loss": 17.6004, "step": 4786 }, { "epoch": 0.0875025133895114, "grad_norm": 7.710422672894228, "learning_rate": 9.91359582654693e-06, "loss": 17.7769, "step": 4787 }, { "epoch": 0.08752079258595792, "grad_norm": 6.469440017204641, "learning_rate": 9.913541025375128e-06, "loss": 17.4185, "step": 4788 }, { "epoch": 0.08753907178240444, "grad_norm": 6.6826980747643105, "learning_rate": 9.913486206981794e-06, "loss": 17.6902, "step": 4789 }, { "epoch": 0.08755735097885096, "grad_norm": 6.941843842252527, "learning_rate": 9.913431371367115e-06, "loss": 17.7312, "step": 4790 }, { "epoch": 0.0875756301752975, "grad_norm": 7.046209410946652, "learning_rate": 9.913376518531287e-06, "loss": 17.7428, "step": 4791 }, { "epoch": 0.08759390937174402, "grad_norm": 7.9225176278770055, "learning_rate": 9.913321648474499e-06, "loss": 18.0596, "step": 4792 }, { "epoch": 0.08761218856819054, "grad_norm": 8.505779030910574, "learning_rate": 9.913266761196945e-06, "loss": 18.3003, "step": 4793 }, { "epoch": 0.08763046776463707, "grad_norm": 7.72827539043185, "learning_rate": 9.913211856698817e-06, "loss": 18.2406, "step": 4794 }, { "epoch": 0.08764874696108359, "grad_norm": 7.219869508615633, "learning_rate": 9.913156934980309e-06, "loss": 17.7627, "step": 4795 }, { "epoch": 0.08766702615753011, "grad_norm": 8.101370575717883, "learning_rate": 9.913101996041612e-06, "loss": 18.3698, "step": 4796 }, { "epoch": 0.08768530535397664, "grad_norm": 6.325286891409339, "learning_rate": 9.913047039882919e-06, "loss": 17.4737, "step": 4797 }, { "epoch": 0.08770358455042317, "grad_norm": 7.524763115589298, "learning_rate": 9.912992066504422e-06, "loss": 17.5426, "step": 4798 }, { "epoch": 0.08772186374686969, "grad_norm": 8.507126810654984, "learning_rate": 9.912937075906315e-06, "loss": 17.987, "step": 4799 }, { "epoch": 0.08774014294331621, "grad_norm": 9.281446893128436, "learning_rate": 9.91288206808879e-06, "loss": 18.6362, "step": 4800 }, { "epoch": 0.08775842213976273, "grad_norm": 8.169945727922903, "learning_rate": 9.912827043052038e-06, "loss": 18.0307, "step": 4801 }, { "epoch": 0.08777670133620925, "grad_norm": 6.166911678655189, "learning_rate": 9.912772000796253e-06, "loss": 17.4021, "step": 4802 }, { "epoch": 0.08779498053265579, "grad_norm": 7.306370136133016, "learning_rate": 9.912716941321632e-06, "loss": 18.0219, "step": 4803 }, { "epoch": 0.08781325972910231, "grad_norm": 7.204213842910007, "learning_rate": 9.912661864628362e-06, "loss": 17.8936, "step": 4804 }, { "epoch": 0.08783153892554883, "grad_norm": 6.472655632465372, "learning_rate": 9.912606770716638e-06, "loss": 17.6235, "step": 4805 }, { "epoch": 0.08784981812199535, "grad_norm": 8.738770305828906, "learning_rate": 9.912551659586655e-06, "loss": 18.3371, "step": 4806 }, { "epoch": 0.08786809731844188, "grad_norm": 9.395092312423438, "learning_rate": 9.912496531238605e-06, "loss": 18.6876, "step": 4807 }, { "epoch": 0.08788637651488841, "grad_norm": 7.667475452363174, "learning_rate": 9.912441385672679e-06, "loss": 17.7632, "step": 4808 }, { "epoch": 0.08790465571133493, "grad_norm": 6.495863960634322, "learning_rate": 9.912386222889073e-06, "loss": 17.563, "step": 4809 }, { "epoch": 0.08792293490778146, "grad_norm": 7.017018985938606, "learning_rate": 9.91233104288798e-06, "loss": 17.7194, "step": 4810 }, { "epoch": 0.08794121410422798, "grad_norm": 8.260612330153704, "learning_rate": 9.912275845669592e-06, "loss": 17.9905, "step": 4811 }, { "epoch": 0.0879594933006745, "grad_norm": 6.019433249129388, "learning_rate": 9.912220631234105e-06, "loss": 17.2822, "step": 4812 }, { "epoch": 0.08797777249712102, "grad_norm": 7.593604052343888, "learning_rate": 9.91216539958171e-06, "loss": 18.2624, "step": 4813 }, { "epoch": 0.08799605169356756, "grad_norm": 7.076792624049634, "learning_rate": 9.912110150712601e-06, "loss": 17.86, "step": 4814 }, { "epoch": 0.08801433089001408, "grad_norm": 8.710339145917724, "learning_rate": 9.912054884626974e-06, "loss": 17.9205, "step": 4815 }, { "epoch": 0.0880326100864606, "grad_norm": 9.585008062178307, "learning_rate": 9.91199960132502e-06, "loss": 19.4597, "step": 4816 }, { "epoch": 0.08805088928290712, "grad_norm": 5.69442629687945, "learning_rate": 9.911944300806932e-06, "loss": 17.1384, "step": 4817 }, { "epoch": 0.08806916847935364, "grad_norm": 7.750747831978089, "learning_rate": 9.911888983072908e-06, "loss": 18.1424, "step": 4818 }, { "epoch": 0.08808744767580017, "grad_norm": 8.011201559127326, "learning_rate": 9.911833648123139e-06, "loss": 18.2933, "step": 4819 }, { "epoch": 0.0881057268722467, "grad_norm": 8.256293581137744, "learning_rate": 9.911778295957817e-06, "loss": 18.3694, "step": 4820 }, { "epoch": 0.08812400606869322, "grad_norm": 6.96854580648387, "learning_rate": 9.911722926577141e-06, "loss": 17.6918, "step": 4821 }, { "epoch": 0.08814228526513974, "grad_norm": 7.4813761705247, "learning_rate": 9.9116675399813e-06, "loss": 18.2377, "step": 4822 }, { "epoch": 0.08816056446158627, "grad_norm": 6.499835442153083, "learning_rate": 9.911612136170492e-06, "loss": 17.6314, "step": 4823 }, { "epoch": 0.08817884365803279, "grad_norm": 9.329402462013125, "learning_rate": 9.91155671514491e-06, "loss": 17.9491, "step": 4824 }, { "epoch": 0.08819712285447932, "grad_norm": 7.134590714179622, "learning_rate": 9.911501276904746e-06, "loss": 17.8917, "step": 4825 }, { "epoch": 0.08821540205092585, "grad_norm": 6.014950998411903, "learning_rate": 9.911445821450199e-06, "loss": 17.2828, "step": 4826 }, { "epoch": 0.08823368124737237, "grad_norm": 8.36064771060975, "learning_rate": 9.911390348781458e-06, "loss": 18.8198, "step": 4827 }, { "epoch": 0.08825196044381889, "grad_norm": 6.492310583631008, "learning_rate": 9.911334858898721e-06, "loss": 17.488, "step": 4828 }, { "epoch": 0.08827023964026541, "grad_norm": 7.191727143888856, "learning_rate": 9.911279351802182e-06, "loss": 17.7162, "step": 4829 }, { "epoch": 0.08828851883671193, "grad_norm": 7.072271640359286, "learning_rate": 9.911223827492035e-06, "loss": 17.6843, "step": 4830 }, { "epoch": 0.08830679803315847, "grad_norm": 6.8461577876455655, "learning_rate": 9.911168285968474e-06, "loss": 17.5735, "step": 4831 }, { "epoch": 0.08832507722960499, "grad_norm": 8.769748706606327, "learning_rate": 9.911112727231694e-06, "loss": 18.546, "step": 4832 }, { "epoch": 0.08834335642605151, "grad_norm": 6.88971957797013, "learning_rate": 9.911057151281892e-06, "loss": 17.524, "step": 4833 }, { "epoch": 0.08836163562249803, "grad_norm": 8.044215283794703, "learning_rate": 9.911001558119258e-06, "loss": 18.0864, "step": 4834 }, { "epoch": 0.08837991481894455, "grad_norm": 7.21672603923443, "learning_rate": 9.910945947743992e-06, "loss": 17.9637, "step": 4835 }, { "epoch": 0.08839819401539108, "grad_norm": 8.055828634294114, "learning_rate": 9.910890320156285e-06, "loss": 18.0511, "step": 4836 }, { "epoch": 0.08841647321183761, "grad_norm": 7.4949272257948865, "learning_rate": 9.910834675356336e-06, "loss": 18.0165, "step": 4837 }, { "epoch": 0.08843475240828413, "grad_norm": 9.754306941590027, "learning_rate": 9.910779013344336e-06, "loss": 18.4512, "step": 4838 }, { "epoch": 0.08845303160473066, "grad_norm": 7.5429698147869235, "learning_rate": 9.910723334120482e-06, "loss": 17.7982, "step": 4839 }, { "epoch": 0.08847131080117718, "grad_norm": 6.962705737769747, "learning_rate": 9.910667637684968e-06, "loss": 17.6604, "step": 4840 }, { "epoch": 0.0884895899976237, "grad_norm": 6.602786660120906, "learning_rate": 9.91061192403799e-06, "loss": 17.5104, "step": 4841 }, { "epoch": 0.08850786919407024, "grad_norm": 8.19722256545566, "learning_rate": 9.910556193179744e-06, "loss": 18.0938, "step": 4842 }, { "epoch": 0.08852614839051676, "grad_norm": 6.613293074237941, "learning_rate": 9.910500445110424e-06, "loss": 17.3452, "step": 4843 }, { "epoch": 0.08854442758696328, "grad_norm": 7.168804242379337, "learning_rate": 9.910444679830227e-06, "loss": 17.9111, "step": 4844 }, { "epoch": 0.0885627067834098, "grad_norm": 5.794603819679539, "learning_rate": 9.910388897339347e-06, "loss": 17.1512, "step": 4845 }, { "epoch": 0.08858098597985632, "grad_norm": 7.296220367182794, "learning_rate": 9.91033309763798e-06, "loss": 17.3953, "step": 4846 }, { "epoch": 0.08859926517630284, "grad_norm": 6.924173425291186, "learning_rate": 9.910277280726322e-06, "loss": 17.7091, "step": 4847 }, { "epoch": 0.08861754437274938, "grad_norm": 7.9899815344020135, "learning_rate": 9.910221446604569e-06, "loss": 18.2742, "step": 4848 }, { "epoch": 0.0886358235691959, "grad_norm": 7.09281784394237, "learning_rate": 9.910165595272913e-06, "loss": 17.5841, "step": 4849 }, { "epoch": 0.08865410276564242, "grad_norm": 6.455267811002289, "learning_rate": 9.910109726731556e-06, "loss": 17.4705, "step": 4850 }, { "epoch": 0.08867238196208894, "grad_norm": 6.142848707058533, "learning_rate": 9.910053840980688e-06, "loss": 17.2928, "step": 4851 }, { "epoch": 0.08869066115853547, "grad_norm": 7.6149171844813655, "learning_rate": 9.90999793802051e-06, "loss": 17.9281, "step": 4852 }, { "epoch": 0.08870894035498199, "grad_norm": 6.384730092886496, "learning_rate": 9.909942017851212e-06, "loss": 17.5453, "step": 4853 }, { "epoch": 0.08872721955142852, "grad_norm": 6.718959207291093, "learning_rate": 9.909886080472997e-06, "loss": 17.5757, "step": 4854 }, { "epoch": 0.08874549874787505, "grad_norm": 5.89251746724107, "learning_rate": 9.909830125886055e-06, "loss": 17.2406, "step": 4855 }, { "epoch": 0.08876377794432157, "grad_norm": 8.454824001567015, "learning_rate": 9.909774154090584e-06, "loss": 17.9786, "step": 4856 }, { "epoch": 0.08878205714076809, "grad_norm": 6.098006609564664, "learning_rate": 9.909718165086781e-06, "loss": 17.1602, "step": 4857 }, { "epoch": 0.08880033633721461, "grad_norm": 7.064665276083377, "learning_rate": 9.909662158874845e-06, "loss": 17.7347, "step": 4858 }, { "epoch": 0.08881861553366115, "grad_norm": 6.158050025592081, "learning_rate": 9.909606135454965e-06, "loss": 17.4858, "step": 4859 }, { "epoch": 0.08883689473010767, "grad_norm": 7.752886612954595, "learning_rate": 9.909550094827343e-06, "loss": 18.1125, "step": 4860 }, { "epoch": 0.08885517392655419, "grad_norm": 8.195774319177469, "learning_rate": 9.909494036992174e-06, "loss": 18.2586, "step": 4861 }, { "epoch": 0.08887345312300071, "grad_norm": 7.892604259341922, "learning_rate": 9.909437961949655e-06, "loss": 18.0855, "step": 4862 }, { "epoch": 0.08889173231944723, "grad_norm": 7.542061459784922, "learning_rate": 9.909381869699981e-06, "loss": 18.0079, "step": 4863 }, { "epoch": 0.08891001151589376, "grad_norm": 6.491660444918835, "learning_rate": 9.90932576024335e-06, "loss": 17.4933, "step": 4864 }, { "epoch": 0.08892829071234029, "grad_norm": 8.473319499724814, "learning_rate": 9.909269633579959e-06, "loss": 18.123, "step": 4865 }, { "epoch": 0.08894656990878681, "grad_norm": 6.984022328522888, "learning_rate": 9.909213489710002e-06, "loss": 17.9093, "step": 4866 }, { "epoch": 0.08896484910523333, "grad_norm": 7.477287314187048, "learning_rate": 9.909157328633678e-06, "loss": 18.1638, "step": 4867 }, { "epoch": 0.08898312830167986, "grad_norm": 6.4279946010692495, "learning_rate": 9.909101150351186e-06, "loss": 17.2532, "step": 4868 }, { "epoch": 0.08900140749812638, "grad_norm": 6.248805498100486, "learning_rate": 9.909044954862718e-06, "loss": 17.4604, "step": 4869 }, { "epoch": 0.0890196866945729, "grad_norm": 6.761360549226345, "learning_rate": 9.908988742168474e-06, "loss": 17.7246, "step": 4870 }, { "epoch": 0.08903796589101944, "grad_norm": 7.103710072593917, "learning_rate": 9.908932512268652e-06, "loss": 17.7659, "step": 4871 }, { "epoch": 0.08905624508746596, "grad_norm": 8.684011467300552, "learning_rate": 9.908876265163446e-06, "loss": 18.0777, "step": 4872 }, { "epoch": 0.08907452428391248, "grad_norm": 6.801153374397417, "learning_rate": 9.908820000853054e-06, "loss": 17.4871, "step": 4873 }, { "epoch": 0.089092803480359, "grad_norm": 6.996604349004501, "learning_rate": 9.908763719337675e-06, "loss": 17.7669, "step": 4874 }, { "epoch": 0.08911108267680552, "grad_norm": 6.95254617222263, "learning_rate": 9.908707420617505e-06, "loss": 17.7054, "step": 4875 }, { "epoch": 0.08912936187325206, "grad_norm": 5.833851020191169, "learning_rate": 9.908651104692742e-06, "loss": 17.3961, "step": 4876 }, { "epoch": 0.08914764106969858, "grad_norm": 7.028065768944944, "learning_rate": 9.908594771563583e-06, "loss": 17.7726, "step": 4877 }, { "epoch": 0.0891659202661451, "grad_norm": 5.877460860277679, "learning_rate": 9.908538421230224e-06, "loss": 17.1981, "step": 4878 }, { "epoch": 0.08918419946259162, "grad_norm": 8.044787459744423, "learning_rate": 9.908482053692864e-06, "loss": 18.3129, "step": 4879 }, { "epoch": 0.08920247865903815, "grad_norm": 5.537464004149115, "learning_rate": 9.9084256689517e-06, "loss": 16.954, "step": 4880 }, { "epoch": 0.08922075785548467, "grad_norm": 6.421239681199296, "learning_rate": 9.908369267006932e-06, "loss": 17.4468, "step": 4881 }, { "epoch": 0.0892390370519312, "grad_norm": 7.060424910594412, "learning_rate": 9.908312847858753e-06, "loss": 17.726, "step": 4882 }, { "epoch": 0.08925731624837772, "grad_norm": 6.713220527294039, "learning_rate": 9.908256411507363e-06, "loss": 17.8644, "step": 4883 }, { "epoch": 0.08927559544482425, "grad_norm": 7.28506267164789, "learning_rate": 9.908199957952964e-06, "loss": 17.9029, "step": 4884 }, { "epoch": 0.08929387464127077, "grad_norm": 7.159823541978301, "learning_rate": 9.908143487195747e-06, "loss": 17.6019, "step": 4885 }, { "epoch": 0.08931215383771729, "grad_norm": 7.448306011287324, "learning_rate": 9.908086999235914e-06, "loss": 17.7881, "step": 4886 }, { "epoch": 0.08933043303416381, "grad_norm": 7.490854975147979, "learning_rate": 9.908030494073662e-06, "loss": 17.9684, "step": 4887 }, { "epoch": 0.08934871223061035, "grad_norm": 7.010397319852804, "learning_rate": 9.907973971709189e-06, "loss": 17.8058, "step": 4888 }, { "epoch": 0.08936699142705687, "grad_norm": 8.304541211986436, "learning_rate": 9.907917432142693e-06, "loss": 18.5242, "step": 4889 }, { "epoch": 0.08938527062350339, "grad_norm": 6.9716640880108764, "learning_rate": 9.907860875374373e-06, "loss": 17.8519, "step": 4890 }, { "epoch": 0.08940354981994991, "grad_norm": 7.075654539312293, "learning_rate": 9.907804301404424e-06, "loss": 17.5409, "step": 4891 }, { "epoch": 0.08942182901639643, "grad_norm": 7.748924403105278, "learning_rate": 9.907747710233049e-06, "loss": 18.0154, "step": 4892 }, { "epoch": 0.08944010821284297, "grad_norm": 8.908085631203509, "learning_rate": 9.907691101860444e-06, "loss": 18.3611, "step": 4893 }, { "epoch": 0.08945838740928949, "grad_norm": 6.3659490031572465, "learning_rate": 9.907634476286807e-06, "loss": 17.4746, "step": 4894 }, { "epoch": 0.08947666660573601, "grad_norm": 6.763138824222327, "learning_rate": 9.907577833512338e-06, "loss": 17.5551, "step": 4895 }, { "epoch": 0.08949494580218254, "grad_norm": 6.652661034895993, "learning_rate": 9.907521173537234e-06, "loss": 17.5258, "step": 4896 }, { "epoch": 0.08951322499862906, "grad_norm": 6.924303028863853, "learning_rate": 9.907464496361694e-06, "loss": 17.5145, "step": 4897 }, { "epoch": 0.08953150419507558, "grad_norm": 7.8742076124462805, "learning_rate": 9.907407801985916e-06, "loss": 17.8574, "step": 4898 }, { "epoch": 0.08954978339152211, "grad_norm": 7.02700500083737, "learning_rate": 9.9073510904101e-06, "loss": 17.9066, "step": 4899 }, { "epoch": 0.08956806258796864, "grad_norm": 8.325333538927772, "learning_rate": 9.907294361634445e-06, "loss": 18.1865, "step": 4900 }, { "epoch": 0.08958634178441516, "grad_norm": 7.366991577966159, "learning_rate": 9.907237615659149e-06, "loss": 17.8358, "step": 4901 }, { "epoch": 0.08960462098086168, "grad_norm": 7.4898641121745815, "learning_rate": 9.90718085248441e-06, "loss": 18.0713, "step": 4902 }, { "epoch": 0.0896229001773082, "grad_norm": 8.208024418378578, "learning_rate": 9.907124072110428e-06, "loss": 18.3265, "step": 4903 }, { "epoch": 0.08964117937375472, "grad_norm": 8.606868574897145, "learning_rate": 9.907067274537404e-06, "loss": 18.0227, "step": 4904 }, { "epoch": 0.08965945857020126, "grad_norm": 6.479156336338076, "learning_rate": 9.907010459765534e-06, "loss": 17.5345, "step": 4905 }, { "epoch": 0.08967773776664778, "grad_norm": 7.722528397467274, "learning_rate": 9.906953627795018e-06, "loss": 17.8959, "step": 4906 }, { "epoch": 0.0896960169630943, "grad_norm": 7.752109692661014, "learning_rate": 9.906896778626054e-06, "loss": 17.8554, "step": 4907 }, { "epoch": 0.08971429615954082, "grad_norm": 6.47635829372542, "learning_rate": 9.906839912258843e-06, "loss": 17.3513, "step": 4908 }, { "epoch": 0.08973257535598735, "grad_norm": 7.908803958084195, "learning_rate": 9.906783028693585e-06, "loss": 18.1248, "step": 4909 }, { "epoch": 0.08975085455243388, "grad_norm": 7.819372055678718, "learning_rate": 9.906726127930477e-06, "loss": 18.1958, "step": 4910 }, { "epoch": 0.0897691337488804, "grad_norm": 7.131489980549072, "learning_rate": 9.906669209969721e-06, "loss": 17.9138, "step": 4911 }, { "epoch": 0.08978741294532692, "grad_norm": 9.451767695330918, "learning_rate": 9.906612274811516e-06, "loss": 18.75, "step": 4912 }, { "epoch": 0.08980569214177345, "grad_norm": 8.053565778914212, "learning_rate": 9.906555322456059e-06, "loss": 18.0745, "step": 4913 }, { "epoch": 0.08982397133821997, "grad_norm": 5.741438465348301, "learning_rate": 9.90649835290355e-06, "loss": 17.3908, "step": 4914 }, { "epoch": 0.08984225053466649, "grad_norm": 7.753535854981617, "learning_rate": 9.906441366154194e-06, "loss": 18.1406, "step": 4915 }, { "epoch": 0.08986052973111303, "grad_norm": 7.358112511580242, "learning_rate": 9.906384362208183e-06, "loss": 17.8397, "step": 4916 }, { "epoch": 0.08987880892755955, "grad_norm": 7.459473289924685, "learning_rate": 9.906327341065722e-06, "loss": 18.116, "step": 4917 }, { "epoch": 0.08989708812400607, "grad_norm": 5.350762216150077, "learning_rate": 9.90627030272701e-06, "loss": 17.2346, "step": 4918 }, { "epoch": 0.08991536732045259, "grad_norm": 6.29069708317689, "learning_rate": 9.906213247192246e-06, "loss": 17.4428, "step": 4919 }, { "epoch": 0.08993364651689911, "grad_norm": 7.490105325337597, "learning_rate": 9.906156174461631e-06, "loss": 18.0593, "step": 4920 }, { "epoch": 0.08995192571334563, "grad_norm": 7.813982124712491, "learning_rate": 9.906099084535362e-06, "loss": 17.9516, "step": 4921 }, { "epoch": 0.08997020490979217, "grad_norm": 5.685757307791417, "learning_rate": 9.906041977413646e-06, "loss": 17.2931, "step": 4922 }, { "epoch": 0.08998848410623869, "grad_norm": 8.182059085116599, "learning_rate": 9.905984853096676e-06, "loss": 18.2598, "step": 4923 }, { "epoch": 0.09000676330268521, "grad_norm": 7.562270683203775, "learning_rate": 9.905927711584652e-06, "loss": 17.8086, "step": 4924 }, { "epoch": 0.09002504249913174, "grad_norm": 7.620203216056942, "learning_rate": 9.90587055287778e-06, "loss": 17.9578, "step": 4925 }, { "epoch": 0.09004332169557826, "grad_norm": 7.301869554789333, "learning_rate": 9.905813376976257e-06, "loss": 17.7887, "step": 4926 }, { "epoch": 0.09006160089202479, "grad_norm": 5.655472240721557, "learning_rate": 9.905756183880284e-06, "loss": 17.0582, "step": 4927 }, { "epoch": 0.09007988008847131, "grad_norm": 6.698368435760824, "learning_rate": 9.905698973590061e-06, "loss": 17.7889, "step": 4928 }, { "epoch": 0.09009815928491784, "grad_norm": 9.527879894553903, "learning_rate": 9.90564174610579e-06, "loss": 18.563, "step": 4929 }, { "epoch": 0.09011643848136436, "grad_norm": 7.603525625587197, "learning_rate": 9.905584501427669e-06, "loss": 17.9311, "step": 4930 }, { "epoch": 0.09013471767781088, "grad_norm": 8.398402960489525, "learning_rate": 9.905527239555899e-06, "loss": 18.5133, "step": 4931 }, { "epoch": 0.0901529968742574, "grad_norm": 7.928934032367986, "learning_rate": 9.905469960490683e-06, "loss": 18.0871, "step": 4932 }, { "epoch": 0.09017127607070394, "grad_norm": 8.12174092784868, "learning_rate": 9.905412664232222e-06, "loss": 18.1964, "step": 4933 }, { "epoch": 0.09018955526715046, "grad_norm": 6.392108989691651, "learning_rate": 9.905355350780713e-06, "loss": 17.1349, "step": 4934 }, { "epoch": 0.09020783446359698, "grad_norm": 6.851478822719325, "learning_rate": 9.90529802013636e-06, "loss": 17.6792, "step": 4935 }, { "epoch": 0.0902261136600435, "grad_norm": 6.760071373340467, "learning_rate": 9.905240672299363e-06, "loss": 17.478, "step": 4936 }, { "epoch": 0.09024439285649002, "grad_norm": 8.275567365192778, "learning_rate": 9.905183307269922e-06, "loss": 18.2972, "step": 4937 }, { "epoch": 0.09026267205293655, "grad_norm": 6.7658162244016, "learning_rate": 9.90512592504824e-06, "loss": 17.516, "step": 4938 }, { "epoch": 0.09028095124938308, "grad_norm": 6.968360606268438, "learning_rate": 9.905068525634519e-06, "loss": 17.7538, "step": 4939 }, { "epoch": 0.0902992304458296, "grad_norm": 6.082148817294284, "learning_rate": 9.905011109028957e-06, "loss": 17.4172, "step": 4940 }, { "epoch": 0.09031750964227613, "grad_norm": 7.345070877970718, "learning_rate": 9.904953675231757e-06, "loss": 17.7168, "step": 4941 }, { "epoch": 0.09033578883872265, "grad_norm": 7.134130876369556, "learning_rate": 9.90489622424312e-06, "loss": 17.6517, "step": 4942 }, { "epoch": 0.09035406803516917, "grad_norm": 7.403768346109211, "learning_rate": 9.904838756063246e-06, "loss": 18.0293, "step": 4943 }, { "epoch": 0.0903723472316157, "grad_norm": 7.579206572533839, "learning_rate": 9.904781270692338e-06, "loss": 17.9277, "step": 4944 }, { "epoch": 0.09039062642806223, "grad_norm": 7.01991912954351, "learning_rate": 9.904723768130598e-06, "loss": 17.7241, "step": 4945 }, { "epoch": 0.09040890562450875, "grad_norm": 8.3797845230764, "learning_rate": 9.904666248378228e-06, "loss": 18.167, "step": 4946 }, { "epoch": 0.09042718482095527, "grad_norm": 9.131412748192803, "learning_rate": 9.904608711435426e-06, "loss": 18.1724, "step": 4947 }, { "epoch": 0.09044546401740179, "grad_norm": 7.08883566417059, "learning_rate": 9.904551157302398e-06, "loss": 17.7404, "step": 4948 }, { "epoch": 0.09046374321384831, "grad_norm": 7.861559932400002, "learning_rate": 9.904493585979343e-06, "loss": 17.9231, "step": 4949 }, { "epoch": 0.09048202241029485, "grad_norm": 6.353152324499593, "learning_rate": 9.904435997466463e-06, "loss": 17.2822, "step": 4950 }, { "epoch": 0.09050030160674137, "grad_norm": 7.584299532570088, "learning_rate": 9.904378391763962e-06, "loss": 17.9455, "step": 4951 }, { "epoch": 0.09051858080318789, "grad_norm": 6.9418694944928445, "learning_rate": 9.90432076887204e-06, "loss": 17.6901, "step": 4952 }, { "epoch": 0.09053685999963441, "grad_norm": 8.779481141723359, "learning_rate": 9.904263128790899e-06, "loss": 18.6719, "step": 4953 }, { "epoch": 0.09055513919608094, "grad_norm": 7.6466083260157145, "learning_rate": 9.90420547152074e-06, "loss": 17.713, "step": 4954 }, { "epoch": 0.09057341839252746, "grad_norm": 6.944497802117786, "learning_rate": 9.904147797061767e-06, "loss": 17.5404, "step": 4955 }, { "epoch": 0.090591697588974, "grad_norm": 6.788706594050242, "learning_rate": 9.904090105414184e-06, "loss": 17.6658, "step": 4956 }, { "epoch": 0.09060997678542052, "grad_norm": 6.270923613989111, "learning_rate": 9.904032396578188e-06, "loss": 17.4603, "step": 4957 }, { "epoch": 0.09062825598186704, "grad_norm": 6.404011945641866, "learning_rate": 9.903974670553984e-06, "loss": 17.3798, "step": 4958 }, { "epoch": 0.09064653517831356, "grad_norm": 7.196345513098332, "learning_rate": 9.903916927341776e-06, "loss": 17.7213, "step": 4959 }, { "epoch": 0.09066481437476008, "grad_norm": 8.873870347748749, "learning_rate": 9.903859166941762e-06, "loss": 18.2185, "step": 4960 }, { "epoch": 0.09068309357120662, "grad_norm": 7.8470185747211065, "learning_rate": 9.90380138935415e-06, "loss": 17.9964, "step": 4961 }, { "epoch": 0.09070137276765314, "grad_norm": 8.355801206994641, "learning_rate": 9.903743594579139e-06, "loss": 18.1677, "step": 4962 }, { "epoch": 0.09071965196409966, "grad_norm": 6.500662505566888, "learning_rate": 9.903685782616932e-06, "loss": 17.3051, "step": 4963 }, { "epoch": 0.09073793116054618, "grad_norm": 7.8126043232462985, "learning_rate": 9.903627953467731e-06, "loss": 18.1204, "step": 4964 }, { "epoch": 0.0907562103569927, "grad_norm": 7.284690556818036, "learning_rate": 9.90357010713174e-06, "loss": 18.1257, "step": 4965 }, { "epoch": 0.09077448955343922, "grad_norm": 8.89816870388913, "learning_rate": 9.903512243609161e-06, "loss": 18.6274, "step": 4966 }, { "epoch": 0.09079276874988576, "grad_norm": 9.319042947402458, "learning_rate": 9.903454362900197e-06, "loss": 18.1239, "step": 4967 }, { "epoch": 0.09081104794633228, "grad_norm": 8.13528093699929, "learning_rate": 9.903396465005054e-06, "loss": 18.4412, "step": 4968 }, { "epoch": 0.0908293271427788, "grad_norm": 8.30774638527725, "learning_rate": 9.903338549923928e-06, "loss": 18.2453, "step": 4969 }, { "epoch": 0.09084760633922533, "grad_norm": 7.597627094489424, "learning_rate": 9.903280617657027e-06, "loss": 18.0494, "step": 4970 }, { "epoch": 0.09086588553567185, "grad_norm": 6.0707407297946645, "learning_rate": 9.903222668204553e-06, "loss": 17.3874, "step": 4971 }, { "epoch": 0.09088416473211837, "grad_norm": 6.115617491789437, "learning_rate": 9.90316470156671e-06, "loss": 17.3557, "step": 4972 }, { "epoch": 0.0909024439285649, "grad_norm": 8.01193067033986, "learning_rate": 9.9031067177437e-06, "loss": 18.2971, "step": 4973 }, { "epoch": 0.09092072312501143, "grad_norm": 8.204679748834103, "learning_rate": 9.903048716735725e-06, "loss": 17.9572, "step": 4974 }, { "epoch": 0.09093900232145795, "grad_norm": 7.497913081293649, "learning_rate": 9.902990698542992e-06, "loss": 17.9283, "step": 4975 }, { "epoch": 0.09095728151790447, "grad_norm": 7.121495298556463, "learning_rate": 9.9029326631657e-06, "loss": 17.752, "step": 4976 }, { "epoch": 0.09097556071435099, "grad_norm": 7.31486842562587, "learning_rate": 9.902874610604054e-06, "loss": 17.6785, "step": 4977 }, { "epoch": 0.09099383991079753, "grad_norm": 9.291313903532426, "learning_rate": 9.90281654085826e-06, "loss": 18.8064, "step": 4978 }, { "epoch": 0.09101211910724405, "grad_norm": 9.22651887095488, "learning_rate": 9.902758453928519e-06, "loss": 18.6168, "step": 4979 }, { "epoch": 0.09103039830369057, "grad_norm": 7.6036357163146695, "learning_rate": 9.902700349815035e-06, "loss": 17.8308, "step": 4980 }, { "epoch": 0.09104867750013709, "grad_norm": 8.618788747025125, "learning_rate": 9.90264222851801e-06, "loss": 18.4711, "step": 4981 }, { "epoch": 0.09106695669658361, "grad_norm": 6.951221154408608, "learning_rate": 9.902584090037651e-06, "loss": 17.8209, "step": 4982 }, { "epoch": 0.09108523589303014, "grad_norm": 7.433568318100594, "learning_rate": 9.90252593437416e-06, "loss": 17.7275, "step": 4983 }, { "epoch": 0.09110351508947667, "grad_norm": 8.384901533021308, "learning_rate": 9.902467761527741e-06, "loss": 18.0449, "step": 4984 }, { "epoch": 0.0911217942859232, "grad_norm": 9.209068538222029, "learning_rate": 9.902409571498598e-06, "loss": 18.3365, "step": 4985 }, { "epoch": 0.09114007348236972, "grad_norm": 8.04366712785056, "learning_rate": 9.902351364286935e-06, "loss": 18.1736, "step": 4986 }, { "epoch": 0.09115835267881624, "grad_norm": 7.321603522303677, "learning_rate": 9.902293139892956e-06, "loss": 17.9776, "step": 4987 }, { "epoch": 0.09117663187526276, "grad_norm": 8.305108390363309, "learning_rate": 9.902234898316863e-06, "loss": 18.4951, "step": 4988 }, { "epoch": 0.09119491107170928, "grad_norm": 8.580880097757682, "learning_rate": 9.902176639558865e-06, "loss": 18.4488, "step": 4989 }, { "epoch": 0.09121319026815582, "grad_norm": 7.17768760344289, "learning_rate": 9.902118363619163e-06, "loss": 17.8056, "step": 4990 }, { "epoch": 0.09123146946460234, "grad_norm": 7.56579698012164, "learning_rate": 9.902060070497958e-06, "loss": 17.8564, "step": 4991 }, { "epoch": 0.09124974866104886, "grad_norm": 6.686694426761798, "learning_rate": 9.90200176019546e-06, "loss": 17.4764, "step": 4992 }, { "epoch": 0.09126802785749538, "grad_norm": 7.136554403411247, "learning_rate": 9.901943432711872e-06, "loss": 17.6427, "step": 4993 }, { "epoch": 0.0912863070539419, "grad_norm": 6.136446274441091, "learning_rate": 9.901885088047398e-06, "loss": 17.3511, "step": 4994 }, { "epoch": 0.09130458625038844, "grad_norm": 6.57630498772527, "learning_rate": 9.901826726202242e-06, "loss": 17.8012, "step": 4995 }, { "epoch": 0.09132286544683496, "grad_norm": 7.830314180937311, "learning_rate": 9.901768347176607e-06, "loss": 17.9419, "step": 4996 }, { "epoch": 0.09134114464328148, "grad_norm": 6.2905804893550705, "learning_rate": 9.9017099509707e-06, "loss": 17.2592, "step": 4997 }, { "epoch": 0.091359423839728, "grad_norm": 7.436038988566306, "learning_rate": 9.901651537584725e-06, "loss": 17.9945, "step": 4998 }, { "epoch": 0.09137770303617453, "grad_norm": 7.705229230160873, "learning_rate": 9.901593107018887e-06, "loss": 18.1507, "step": 4999 }, { "epoch": 0.09139598223262105, "grad_norm": 6.445126525326775, "learning_rate": 9.901534659273391e-06, "loss": 17.2738, "step": 5000 }, { "epoch": 0.09141426142906758, "grad_norm": 8.461811534463543, "learning_rate": 9.901476194348441e-06, "loss": 18.2201, "step": 5001 }, { "epoch": 0.0914325406255141, "grad_norm": 6.924860783049049, "learning_rate": 9.901417712244242e-06, "loss": 17.7411, "step": 5002 }, { "epoch": 0.09145081982196063, "grad_norm": 5.649922596212064, "learning_rate": 9.901359212960999e-06, "loss": 17.3142, "step": 5003 }, { "epoch": 0.09146909901840715, "grad_norm": 6.6697417774244485, "learning_rate": 9.901300696498917e-06, "loss": 17.4254, "step": 5004 }, { "epoch": 0.09148737821485367, "grad_norm": 6.3284775144210075, "learning_rate": 9.901242162858202e-06, "loss": 17.2525, "step": 5005 }, { "epoch": 0.09150565741130019, "grad_norm": 7.828468842486411, "learning_rate": 9.901183612039058e-06, "loss": 17.6558, "step": 5006 }, { "epoch": 0.09152393660774673, "grad_norm": 6.945227607333281, "learning_rate": 9.901125044041692e-06, "loss": 17.6458, "step": 5007 }, { "epoch": 0.09154221580419325, "grad_norm": 8.33917572690878, "learning_rate": 9.901066458866306e-06, "loss": 18.0863, "step": 5008 }, { "epoch": 0.09156049500063977, "grad_norm": 8.694129911562369, "learning_rate": 9.901007856513109e-06, "loss": 18.4328, "step": 5009 }, { "epoch": 0.0915787741970863, "grad_norm": 7.191391130756593, "learning_rate": 9.900949236982305e-06, "loss": 17.7385, "step": 5010 }, { "epoch": 0.09159705339353282, "grad_norm": 7.075946211949113, "learning_rate": 9.900890600274097e-06, "loss": 17.6208, "step": 5011 }, { "epoch": 0.09161533258997935, "grad_norm": 7.265462122920251, "learning_rate": 9.900831946388696e-06, "loss": 17.6656, "step": 5012 }, { "epoch": 0.09163361178642587, "grad_norm": 7.713863902687523, "learning_rate": 9.900773275326302e-06, "loss": 17.8481, "step": 5013 }, { "epoch": 0.0916518909828724, "grad_norm": 7.118312489912055, "learning_rate": 9.900714587087123e-06, "loss": 17.8214, "step": 5014 }, { "epoch": 0.09167017017931892, "grad_norm": 8.160408961123743, "learning_rate": 9.900655881671365e-06, "loss": 18.3873, "step": 5015 }, { "epoch": 0.09168844937576544, "grad_norm": 7.1922571340580905, "learning_rate": 9.900597159079235e-06, "loss": 17.6674, "step": 5016 }, { "epoch": 0.09170672857221196, "grad_norm": 6.552480056514527, "learning_rate": 9.900538419310935e-06, "loss": 17.5884, "step": 5017 }, { "epoch": 0.0917250077686585, "grad_norm": 7.554329320381714, "learning_rate": 9.900479662366673e-06, "loss": 18.0221, "step": 5018 }, { "epoch": 0.09174328696510502, "grad_norm": 7.2563650475563515, "learning_rate": 9.900420888246657e-06, "loss": 17.7067, "step": 5019 }, { "epoch": 0.09176156616155154, "grad_norm": 7.587595855844159, "learning_rate": 9.90036209695109e-06, "loss": 17.8569, "step": 5020 }, { "epoch": 0.09177984535799806, "grad_norm": 7.588997794433961, "learning_rate": 9.900303288480178e-06, "loss": 17.9387, "step": 5021 }, { "epoch": 0.09179812455444458, "grad_norm": 7.94984395924185, "learning_rate": 9.90024446283413e-06, "loss": 18.1619, "step": 5022 }, { "epoch": 0.0918164037508911, "grad_norm": 7.614288176046099, "learning_rate": 9.90018562001315e-06, "loss": 17.8588, "step": 5023 }, { "epoch": 0.09183468294733764, "grad_norm": 7.117966262212396, "learning_rate": 9.900126760017444e-06, "loss": 17.8255, "step": 5024 }, { "epoch": 0.09185296214378416, "grad_norm": 7.546946708664759, "learning_rate": 9.900067882847218e-06, "loss": 17.7908, "step": 5025 }, { "epoch": 0.09187124134023068, "grad_norm": 6.669129871761577, "learning_rate": 9.900008988502681e-06, "loss": 17.6401, "step": 5026 }, { "epoch": 0.0918895205366772, "grad_norm": 7.45925356671338, "learning_rate": 9.899950076984038e-06, "loss": 17.9925, "step": 5027 }, { "epoch": 0.09190779973312373, "grad_norm": 7.081006160114588, "learning_rate": 9.899891148291493e-06, "loss": 17.8191, "step": 5028 }, { "epoch": 0.09192607892957026, "grad_norm": 8.581660837824266, "learning_rate": 9.899832202425256e-06, "loss": 18.2224, "step": 5029 }, { "epoch": 0.09194435812601678, "grad_norm": 7.0700686828812405, "learning_rate": 9.899773239385533e-06, "loss": 17.6403, "step": 5030 }, { "epoch": 0.0919626373224633, "grad_norm": 8.272419729877685, "learning_rate": 9.899714259172528e-06, "loss": 18.4661, "step": 5031 }, { "epoch": 0.09198091651890983, "grad_norm": 6.328804256362053, "learning_rate": 9.899655261786452e-06, "loss": 17.404, "step": 5032 }, { "epoch": 0.09199919571535635, "grad_norm": 6.251582485630017, "learning_rate": 9.899596247227508e-06, "loss": 16.9598, "step": 5033 }, { "epoch": 0.09201747491180287, "grad_norm": 7.446638770898681, "learning_rate": 9.899537215495905e-06, "loss": 18.0199, "step": 5034 }, { "epoch": 0.0920357541082494, "grad_norm": 6.4283726762518025, "learning_rate": 9.899478166591849e-06, "loss": 17.5608, "step": 5035 }, { "epoch": 0.09205403330469593, "grad_norm": 6.0300426778732374, "learning_rate": 9.899419100515547e-06, "loss": 17.3951, "step": 5036 }, { "epoch": 0.09207231250114245, "grad_norm": 8.184646622407854, "learning_rate": 9.899360017267205e-06, "loss": 17.9212, "step": 5037 }, { "epoch": 0.09209059169758897, "grad_norm": 6.638060436107272, "learning_rate": 9.899300916847034e-06, "loss": 17.4981, "step": 5038 }, { "epoch": 0.0921088708940355, "grad_norm": 5.517966476871986, "learning_rate": 9.899241799255236e-06, "loss": 17.0434, "step": 5039 }, { "epoch": 0.09212715009048202, "grad_norm": 6.673691804201888, "learning_rate": 9.899182664492022e-06, "loss": 17.409, "step": 5040 }, { "epoch": 0.09214542928692855, "grad_norm": 6.796683992349393, "learning_rate": 9.899123512557598e-06, "loss": 17.5109, "step": 5041 }, { "epoch": 0.09216370848337507, "grad_norm": 8.356684608781904, "learning_rate": 9.899064343452171e-06, "loss": 18.1871, "step": 5042 }, { "epoch": 0.0921819876798216, "grad_norm": 6.329480264267073, "learning_rate": 9.899005157175949e-06, "loss": 17.3431, "step": 5043 }, { "epoch": 0.09220026687626812, "grad_norm": 6.529968145350565, "learning_rate": 9.898945953729138e-06, "loss": 17.5718, "step": 5044 }, { "epoch": 0.09221854607271464, "grad_norm": 6.9738124871435465, "learning_rate": 9.898886733111948e-06, "loss": 17.7357, "step": 5045 }, { "epoch": 0.09223682526916117, "grad_norm": 7.533163221333678, "learning_rate": 9.898827495324582e-06, "loss": 17.8633, "step": 5046 }, { "epoch": 0.0922551044656077, "grad_norm": 7.957121859668378, "learning_rate": 9.898768240367254e-06, "loss": 17.8633, "step": 5047 }, { "epoch": 0.09227338366205422, "grad_norm": 7.630567599799659, "learning_rate": 9.898708968240168e-06, "loss": 17.6692, "step": 5048 }, { "epoch": 0.09229166285850074, "grad_norm": 7.732342182942792, "learning_rate": 9.89864967894353e-06, "loss": 17.956, "step": 5049 }, { "epoch": 0.09230994205494726, "grad_norm": 9.629260157681703, "learning_rate": 9.898590372477553e-06, "loss": 18.385, "step": 5050 }, { "epoch": 0.09232822125139378, "grad_norm": 6.843438829330014, "learning_rate": 9.898531048842439e-06, "loss": 17.8654, "step": 5051 }, { "epoch": 0.09234650044784032, "grad_norm": 6.593065429758437, "learning_rate": 9.898471708038399e-06, "loss": 17.6917, "step": 5052 }, { "epoch": 0.09236477964428684, "grad_norm": 7.098351330255455, "learning_rate": 9.898412350065643e-06, "loss": 17.6001, "step": 5053 }, { "epoch": 0.09238305884073336, "grad_norm": 6.341244681315394, "learning_rate": 9.898352974924375e-06, "loss": 17.5316, "step": 5054 }, { "epoch": 0.09240133803717988, "grad_norm": 7.729340030369656, "learning_rate": 9.898293582614807e-06, "loss": 18.3122, "step": 5055 }, { "epoch": 0.0924196172336264, "grad_norm": 7.956024790318432, "learning_rate": 9.898234173137142e-06, "loss": 17.9008, "step": 5056 }, { "epoch": 0.09243789643007293, "grad_norm": 5.546061194674989, "learning_rate": 9.898174746491593e-06, "loss": 17.1677, "step": 5057 }, { "epoch": 0.09245617562651946, "grad_norm": 6.732295760779667, "learning_rate": 9.898115302678366e-06, "loss": 17.6001, "step": 5058 }, { "epoch": 0.09247445482296598, "grad_norm": 6.84673457871143, "learning_rate": 9.898055841697671e-06, "loss": 17.5328, "step": 5059 }, { "epoch": 0.0924927340194125, "grad_norm": 7.606882429479325, "learning_rate": 9.897996363549714e-06, "loss": 18.1756, "step": 5060 }, { "epoch": 0.09251101321585903, "grad_norm": 6.977198794202178, "learning_rate": 9.897936868234706e-06, "loss": 18.0095, "step": 5061 }, { "epoch": 0.09252929241230555, "grad_norm": 9.701642926743286, "learning_rate": 9.897877355752851e-06, "loss": 18.3897, "step": 5062 }, { "epoch": 0.09254757160875209, "grad_norm": 6.549979390468102, "learning_rate": 9.897817826104364e-06, "loss": 17.5967, "step": 5063 }, { "epoch": 0.09256585080519861, "grad_norm": 8.463070939744693, "learning_rate": 9.897758279289452e-06, "loss": 18.212, "step": 5064 }, { "epoch": 0.09258413000164513, "grad_norm": 10.159200115442955, "learning_rate": 9.897698715308319e-06, "loss": 18.6003, "step": 5065 }, { "epoch": 0.09260240919809165, "grad_norm": 7.8889470110630215, "learning_rate": 9.897639134161177e-06, "loss": 18.4059, "step": 5066 }, { "epoch": 0.09262068839453817, "grad_norm": 5.849307190283272, "learning_rate": 9.897579535848236e-06, "loss": 17.1294, "step": 5067 }, { "epoch": 0.0926389675909847, "grad_norm": 7.288718858669303, "learning_rate": 9.897519920369705e-06, "loss": 17.8934, "step": 5068 }, { "epoch": 0.09265724678743123, "grad_norm": 6.839732780830549, "learning_rate": 9.89746028772579e-06, "loss": 17.7358, "step": 5069 }, { "epoch": 0.09267552598387775, "grad_norm": 7.413594438277912, "learning_rate": 9.897400637916702e-06, "loss": 18.1762, "step": 5070 }, { "epoch": 0.09269380518032427, "grad_norm": 7.062197968061983, "learning_rate": 9.89734097094265e-06, "loss": 17.5506, "step": 5071 }, { "epoch": 0.0927120843767708, "grad_norm": 7.603135725863668, "learning_rate": 9.897281286803842e-06, "loss": 17.9349, "step": 5072 }, { "epoch": 0.09273036357321732, "grad_norm": 8.952409962326763, "learning_rate": 9.89722158550049e-06, "loss": 18.6004, "step": 5073 }, { "epoch": 0.09274864276966384, "grad_norm": 6.36242751579794, "learning_rate": 9.897161867032799e-06, "loss": 17.4027, "step": 5074 }, { "epoch": 0.09276692196611037, "grad_norm": 7.772562341085767, "learning_rate": 9.897102131400981e-06, "loss": 18.0767, "step": 5075 }, { "epoch": 0.0927852011625569, "grad_norm": 6.650751263867157, "learning_rate": 9.897042378605245e-06, "loss": 17.1936, "step": 5076 }, { "epoch": 0.09280348035900342, "grad_norm": 7.866651835631159, "learning_rate": 9.896982608645802e-06, "loss": 18.0655, "step": 5077 }, { "epoch": 0.09282175955544994, "grad_norm": 7.146689741848216, "learning_rate": 9.896922821522858e-06, "loss": 17.6362, "step": 5078 }, { "epoch": 0.09284003875189646, "grad_norm": 7.2105555521385725, "learning_rate": 9.896863017236626e-06, "loss": 17.8813, "step": 5079 }, { "epoch": 0.092858317948343, "grad_norm": 8.091388948800052, "learning_rate": 9.896803195787315e-06, "loss": 18.272, "step": 5080 }, { "epoch": 0.09287659714478952, "grad_norm": 6.17918857902524, "learning_rate": 9.896743357175131e-06, "loss": 17.3417, "step": 5081 }, { "epoch": 0.09289487634123604, "grad_norm": 6.513701464355702, "learning_rate": 9.896683501400289e-06, "loss": 17.4773, "step": 5082 }, { "epoch": 0.09291315553768256, "grad_norm": 8.14203944656455, "learning_rate": 9.896623628462994e-06, "loss": 18.1933, "step": 5083 }, { "epoch": 0.09293143473412908, "grad_norm": 7.145422400847804, "learning_rate": 9.896563738363458e-06, "loss": 18.1071, "step": 5084 }, { "epoch": 0.0929497139305756, "grad_norm": 9.565680857666239, "learning_rate": 9.896503831101893e-06, "loss": 18.2842, "step": 5085 }, { "epoch": 0.09296799312702214, "grad_norm": 6.958968934837667, "learning_rate": 9.896443906678505e-06, "loss": 17.7952, "step": 5086 }, { "epoch": 0.09298627232346866, "grad_norm": 6.937333260918055, "learning_rate": 9.896383965093508e-06, "loss": 17.833, "step": 5087 }, { "epoch": 0.09300455151991519, "grad_norm": 6.517897336294009, "learning_rate": 9.896324006347109e-06, "loss": 17.704, "step": 5088 }, { "epoch": 0.0930228307163617, "grad_norm": 8.297352501735967, "learning_rate": 9.896264030439518e-06, "loss": 18.3063, "step": 5089 }, { "epoch": 0.09304110991280823, "grad_norm": 7.175602043362299, "learning_rate": 9.896204037370949e-06, "loss": 18.0502, "step": 5090 }, { "epoch": 0.09305938910925475, "grad_norm": 6.07682584332193, "learning_rate": 9.896144027141608e-06, "loss": 17.2171, "step": 5091 }, { "epoch": 0.09307766830570129, "grad_norm": 7.418730929559951, "learning_rate": 9.896083999751707e-06, "loss": 18.0613, "step": 5092 }, { "epoch": 0.09309594750214781, "grad_norm": 7.917331776771053, "learning_rate": 9.896023955201456e-06, "loss": 17.8008, "step": 5093 }, { "epoch": 0.09311422669859433, "grad_norm": 7.2096344389067895, "learning_rate": 9.895963893491068e-06, "loss": 17.872, "step": 5094 }, { "epoch": 0.09313250589504085, "grad_norm": 6.838032571383418, "learning_rate": 9.895903814620752e-06, "loss": 17.6463, "step": 5095 }, { "epoch": 0.09315078509148737, "grad_norm": 7.1996244387813135, "learning_rate": 9.895843718590715e-06, "loss": 17.8283, "step": 5096 }, { "epoch": 0.09316906428793391, "grad_norm": 6.547244638406464, "learning_rate": 9.89578360540117e-06, "loss": 17.31, "step": 5097 }, { "epoch": 0.09318734348438043, "grad_norm": 8.132690254906443, "learning_rate": 9.895723475052332e-06, "loss": 18.2499, "step": 5098 }, { "epoch": 0.09320562268082695, "grad_norm": 5.954262308126594, "learning_rate": 9.895663327544405e-06, "loss": 17.1972, "step": 5099 }, { "epoch": 0.09322390187727347, "grad_norm": 7.4598041309153995, "learning_rate": 9.895603162877606e-06, "loss": 17.6992, "step": 5100 }, { "epoch": 0.09324218107372, "grad_norm": 7.397237648454, "learning_rate": 9.89554298105214e-06, "loss": 17.9287, "step": 5101 }, { "epoch": 0.09326046027016652, "grad_norm": 6.987429839174725, "learning_rate": 9.89548278206822e-06, "loss": 17.5882, "step": 5102 }, { "epoch": 0.09327873946661305, "grad_norm": 8.65559946879881, "learning_rate": 9.89542256592606e-06, "loss": 18.4917, "step": 5103 }, { "epoch": 0.09329701866305957, "grad_norm": 7.200226542093818, "learning_rate": 9.895362332625867e-06, "loss": 17.8861, "step": 5104 }, { "epoch": 0.0933152978595061, "grad_norm": 6.4526778792290225, "learning_rate": 9.895302082167854e-06, "loss": 17.6469, "step": 5105 }, { "epoch": 0.09333357705595262, "grad_norm": 7.965510530399193, "learning_rate": 9.895241814552234e-06, "loss": 18.2435, "step": 5106 }, { "epoch": 0.09335185625239914, "grad_norm": 8.022839044254871, "learning_rate": 9.895181529779214e-06, "loss": 18.2214, "step": 5107 }, { "epoch": 0.09337013544884566, "grad_norm": 7.814000778915349, "learning_rate": 9.895121227849009e-06, "loss": 18.5997, "step": 5108 }, { "epoch": 0.0933884146452922, "grad_norm": 7.707830413959619, "learning_rate": 9.895060908761829e-06, "loss": 18.0669, "step": 5109 }, { "epoch": 0.09340669384173872, "grad_norm": 5.822862398431597, "learning_rate": 9.895000572517883e-06, "loss": 17.2046, "step": 5110 }, { "epoch": 0.09342497303818524, "grad_norm": 7.583345046065925, "learning_rate": 9.894940219117386e-06, "loss": 17.8696, "step": 5111 }, { "epoch": 0.09344325223463176, "grad_norm": 7.070215747881176, "learning_rate": 9.89487984856055e-06, "loss": 17.8606, "step": 5112 }, { "epoch": 0.09346153143107828, "grad_norm": 8.945290950233952, "learning_rate": 9.894819460847583e-06, "loss": 18.428, "step": 5113 }, { "epoch": 0.09347981062752482, "grad_norm": 6.672497587160126, "learning_rate": 9.894759055978698e-06, "loss": 17.673, "step": 5114 }, { "epoch": 0.09349808982397134, "grad_norm": 7.727826447480226, "learning_rate": 9.89469863395411e-06, "loss": 17.9795, "step": 5115 }, { "epoch": 0.09351636902041786, "grad_norm": 7.988666697174414, "learning_rate": 9.894638194774026e-06, "loss": 17.7235, "step": 5116 }, { "epoch": 0.09353464821686439, "grad_norm": 7.320396905888832, "learning_rate": 9.89457773843866e-06, "loss": 18.0135, "step": 5117 }, { "epoch": 0.09355292741331091, "grad_norm": 8.546963096960763, "learning_rate": 9.894517264948223e-06, "loss": 17.8598, "step": 5118 }, { "epoch": 0.09357120660975743, "grad_norm": 9.650559515995681, "learning_rate": 9.89445677430293e-06, "loss": 18.5405, "step": 5119 }, { "epoch": 0.09358948580620396, "grad_norm": 6.276272669282278, "learning_rate": 9.894396266502988e-06, "loss": 17.2553, "step": 5120 }, { "epoch": 0.09360776500265049, "grad_norm": 8.242730452685143, "learning_rate": 9.894335741548612e-06, "loss": 18.464, "step": 5121 }, { "epoch": 0.09362604419909701, "grad_norm": 7.62750328126906, "learning_rate": 9.894275199440017e-06, "loss": 17.6168, "step": 5122 }, { "epoch": 0.09364432339554353, "grad_norm": 8.633977352790344, "learning_rate": 9.894214640177411e-06, "loss": 18.4184, "step": 5123 }, { "epoch": 0.09366260259199005, "grad_norm": 6.397828277696224, "learning_rate": 9.894154063761005e-06, "loss": 17.3383, "step": 5124 }, { "epoch": 0.09368088178843657, "grad_norm": 7.15244964329956, "learning_rate": 9.894093470191016e-06, "loss": 17.741, "step": 5125 }, { "epoch": 0.09369916098488311, "grad_norm": 7.8472696501581325, "learning_rate": 9.894032859467653e-06, "loss": 17.6663, "step": 5126 }, { "epoch": 0.09371744018132963, "grad_norm": 5.85358871564374, "learning_rate": 9.893972231591131e-06, "loss": 17.1793, "step": 5127 }, { "epoch": 0.09373571937777615, "grad_norm": 6.8866450513713495, "learning_rate": 9.89391158656166e-06, "loss": 17.7025, "step": 5128 }, { "epoch": 0.09375399857422267, "grad_norm": 8.32418053354413, "learning_rate": 9.893850924379453e-06, "loss": 17.9356, "step": 5129 }, { "epoch": 0.0937722777706692, "grad_norm": 6.706146911778752, "learning_rate": 9.893790245044723e-06, "loss": 17.4315, "step": 5130 }, { "epoch": 0.09379055696711573, "grad_norm": 9.072350200702102, "learning_rate": 9.893729548557684e-06, "loss": 18.5131, "step": 5131 }, { "epoch": 0.09380883616356225, "grad_norm": 7.738821989454404, "learning_rate": 9.893668834918547e-06, "loss": 17.5097, "step": 5132 }, { "epoch": 0.09382711536000878, "grad_norm": 6.246479875134676, "learning_rate": 9.893608104127525e-06, "loss": 17.5148, "step": 5133 }, { "epoch": 0.0938453945564553, "grad_norm": 7.412553148077846, "learning_rate": 9.893547356184832e-06, "loss": 17.6774, "step": 5134 }, { "epoch": 0.09386367375290182, "grad_norm": 6.0480645864262055, "learning_rate": 9.89348659109068e-06, "loss": 17.2246, "step": 5135 }, { "epoch": 0.09388195294934834, "grad_norm": 6.586074664871277, "learning_rate": 9.893425808845283e-06, "loss": 17.566, "step": 5136 }, { "epoch": 0.09390023214579488, "grad_norm": 7.415747677424086, "learning_rate": 9.893365009448853e-06, "loss": 17.7678, "step": 5137 }, { "epoch": 0.0939185113422414, "grad_norm": 6.840226891812664, "learning_rate": 9.893304192901601e-06, "loss": 17.4448, "step": 5138 }, { "epoch": 0.09393679053868792, "grad_norm": 7.7067826992775945, "learning_rate": 9.893243359203743e-06, "loss": 18.0775, "step": 5139 }, { "epoch": 0.09395506973513444, "grad_norm": 7.110277590355808, "learning_rate": 9.893182508355493e-06, "loss": 17.7244, "step": 5140 }, { "epoch": 0.09397334893158096, "grad_norm": 7.988985961336596, "learning_rate": 9.893121640357063e-06, "loss": 18.1297, "step": 5141 }, { "epoch": 0.09399162812802749, "grad_norm": 6.612055979101377, "learning_rate": 9.893060755208666e-06, "loss": 17.2624, "step": 5142 }, { "epoch": 0.09400990732447402, "grad_norm": 6.668732972463614, "learning_rate": 9.892999852910515e-06, "loss": 17.4488, "step": 5143 }, { "epoch": 0.09402818652092054, "grad_norm": 7.944290272730593, "learning_rate": 9.892938933462824e-06, "loss": 18.0692, "step": 5144 }, { "epoch": 0.09404646571736706, "grad_norm": 8.289932641940927, "learning_rate": 9.892877996865807e-06, "loss": 18.2843, "step": 5145 }, { "epoch": 0.09406474491381359, "grad_norm": 6.047733902583171, "learning_rate": 9.892817043119679e-06, "loss": 17.3479, "step": 5146 }, { "epoch": 0.09408302411026011, "grad_norm": 7.676574651211155, "learning_rate": 9.89275607222465e-06, "loss": 17.9895, "step": 5147 }, { "epoch": 0.09410130330670664, "grad_norm": 6.572160583036424, "learning_rate": 9.892695084180934e-06, "loss": 17.495, "step": 5148 }, { "epoch": 0.09411958250315317, "grad_norm": 7.760488971814796, "learning_rate": 9.892634078988748e-06, "loss": 17.8998, "step": 5149 }, { "epoch": 0.09413786169959969, "grad_norm": 6.831034321957358, "learning_rate": 9.892573056648305e-06, "loss": 17.8413, "step": 5150 }, { "epoch": 0.09415614089604621, "grad_norm": 6.667496624357254, "learning_rate": 9.892512017159817e-06, "loss": 17.3853, "step": 5151 }, { "epoch": 0.09417442009249273, "grad_norm": 7.427303461028795, "learning_rate": 9.892450960523499e-06, "loss": 17.6116, "step": 5152 }, { "epoch": 0.09419269928893925, "grad_norm": 7.9004683741857535, "learning_rate": 9.892389886739563e-06, "loss": 18.2583, "step": 5153 }, { "epoch": 0.09421097848538579, "grad_norm": 9.096128004383484, "learning_rate": 9.892328795808228e-06, "loss": 18.4646, "step": 5154 }, { "epoch": 0.09422925768183231, "grad_norm": 7.952199273037197, "learning_rate": 9.892267687729704e-06, "loss": 18.2197, "step": 5155 }, { "epoch": 0.09424753687827883, "grad_norm": 6.217855809239918, "learning_rate": 9.892206562504207e-06, "loss": 17.389, "step": 5156 }, { "epoch": 0.09426581607472535, "grad_norm": 6.659049781394738, "learning_rate": 9.89214542013195e-06, "loss": 17.3625, "step": 5157 }, { "epoch": 0.09428409527117187, "grad_norm": 7.4188287578494565, "learning_rate": 9.892084260613148e-06, "loss": 17.8858, "step": 5158 }, { "epoch": 0.0943023744676184, "grad_norm": 8.965030987127742, "learning_rate": 9.892023083948015e-06, "loss": 18.5783, "step": 5159 }, { "epoch": 0.09432065366406493, "grad_norm": 7.567770700328357, "learning_rate": 9.891961890136766e-06, "loss": 17.9085, "step": 5160 }, { "epoch": 0.09433893286051145, "grad_norm": 6.064575987108541, "learning_rate": 9.891900679179614e-06, "loss": 17.2038, "step": 5161 }, { "epoch": 0.09435721205695798, "grad_norm": 7.434547395908676, "learning_rate": 9.891839451076775e-06, "loss": 17.9291, "step": 5162 }, { "epoch": 0.0943754912534045, "grad_norm": 7.773537562494793, "learning_rate": 9.891778205828465e-06, "loss": 18.3101, "step": 5163 }, { "epoch": 0.09439377044985102, "grad_norm": 6.476420851524025, "learning_rate": 9.891716943434893e-06, "loss": 17.4162, "step": 5164 }, { "epoch": 0.09441204964629756, "grad_norm": 7.067572833939982, "learning_rate": 9.89165566389628e-06, "loss": 17.9595, "step": 5165 }, { "epoch": 0.09443032884274408, "grad_norm": 6.601796886973542, "learning_rate": 9.891594367212837e-06, "loss": 17.614, "step": 5166 }, { "epoch": 0.0944486080391906, "grad_norm": 8.40956824993968, "learning_rate": 9.891533053384784e-06, "loss": 18.378, "step": 5167 }, { "epoch": 0.09446688723563712, "grad_norm": 7.105079782615868, "learning_rate": 9.89147172241233e-06, "loss": 17.7088, "step": 5168 }, { "epoch": 0.09448516643208364, "grad_norm": 9.065696790013208, "learning_rate": 9.89141037429569e-06, "loss": 18.3997, "step": 5169 }, { "epoch": 0.09450344562853016, "grad_norm": 7.408161416479056, "learning_rate": 9.891349009035082e-06, "loss": 17.6111, "step": 5170 }, { "epoch": 0.0945217248249767, "grad_norm": 6.676298820376263, "learning_rate": 9.891287626630721e-06, "loss": 17.4411, "step": 5171 }, { "epoch": 0.09454000402142322, "grad_norm": 7.4254242397519254, "learning_rate": 9.891226227082822e-06, "loss": 17.8507, "step": 5172 }, { "epoch": 0.09455828321786974, "grad_norm": 6.595901024711817, "learning_rate": 9.891164810391599e-06, "loss": 17.7702, "step": 5173 }, { "epoch": 0.09457656241431626, "grad_norm": 6.2201256864739625, "learning_rate": 9.891103376557268e-06, "loss": 17.4838, "step": 5174 }, { "epoch": 0.09459484161076279, "grad_norm": 8.051874223536906, "learning_rate": 9.891041925580043e-06, "loss": 18.1326, "step": 5175 }, { "epoch": 0.09461312080720931, "grad_norm": 6.810582802101014, "learning_rate": 9.890980457460143e-06, "loss": 17.8296, "step": 5176 }, { "epoch": 0.09463140000365584, "grad_norm": 8.420908316405063, "learning_rate": 9.890918972197777e-06, "loss": 18.1608, "step": 5177 }, { "epoch": 0.09464967920010237, "grad_norm": 6.736216916777759, "learning_rate": 9.890857469793168e-06, "loss": 17.6606, "step": 5178 }, { "epoch": 0.09466795839654889, "grad_norm": 6.118770812982109, "learning_rate": 9.890795950246526e-06, "loss": 17.2436, "step": 5179 }, { "epoch": 0.09468623759299541, "grad_norm": 6.537623612244409, "learning_rate": 9.890734413558068e-06, "loss": 17.6136, "step": 5180 }, { "epoch": 0.09470451678944193, "grad_norm": 6.320666536498495, "learning_rate": 9.890672859728013e-06, "loss": 17.4728, "step": 5181 }, { "epoch": 0.09472279598588847, "grad_norm": 7.744391672160248, "learning_rate": 9.89061128875657e-06, "loss": 17.9127, "step": 5182 }, { "epoch": 0.09474107518233499, "grad_norm": 6.3098633894761065, "learning_rate": 9.890549700643962e-06, "loss": 17.379, "step": 5183 }, { "epoch": 0.09475935437878151, "grad_norm": 7.4749505601951345, "learning_rate": 9.890488095390401e-06, "loss": 18.1063, "step": 5184 }, { "epoch": 0.09477763357522803, "grad_norm": 7.196265894310334, "learning_rate": 9.890426472996104e-06, "loss": 18.1147, "step": 5185 }, { "epoch": 0.09479591277167455, "grad_norm": 7.06196872795695, "learning_rate": 9.890364833461285e-06, "loss": 17.3641, "step": 5186 }, { "epoch": 0.09481419196812108, "grad_norm": 8.512656954267568, "learning_rate": 9.890303176786163e-06, "loss": 18.4658, "step": 5187 }, { "epoch": 0.09483247116456761, "grad_norm": 7.498556706900513, "learning_rate": 9.890241502970952e-06, "loss": 17.4943, "step": 5188 }, { "epoch": 0.09485075036101413, "grad_norm": 6.214599113020599, "learning_rate": 9.89017981201587e-06, "loss": 17.2711, "step": 5189 }, { "epoch": 0.09486902955746065, "grad_norm": 6.733266265875151, "learning_rate": 9.890118103921132e-06, "loss": 17.4952, "step": 5190 }, { "epoch": 0.09488730875390718, "grad_norm": 6.724170248327515, "learning_rate": 9.890056378686952e-06, "loss": 17.1852, "step": 5191 }, { "epoch": 0.0949055879503537, "grad_norm": 7.085453016784695, "learning_rate": 9.889994636313551e-06, "loss": 17.8099, "step": 5192 }, { "epoch": 0.09492386714680022, "grad_norm": 8.154429760869085, "learning_rate": 9.889932876801143e-06, "loss": 18.3366, "step": 5193 }, { "epoch": 0.09494214634324676, "grad_norm": 6.980399329141078, "learning_rate": 9.889871100149944e-06, "loss": 17.8377, "step": 5194 }, { "epoch": 0.09496042553969328, "grad_norm": 8.254654376262852, "learning_rate": 9.889809306360171e-06, "loss": 17.9285, "step": 5195 }, { "epoch": 0.0949787047361398, "grad_norm": 7.801505937494357, "learning_rate": 9.88974749543204e-06, "loss": 17.6987, "step": 5196 }, { "epoch": 0.09499698393258632, "grad_norm": 7.095195996412267, "learning_rate": 9.88968566736577e-06, "loss": 17.8568, "step": 5197 }, { "epoch": 0.09501526312903284, "grad_norm": 7.474276996536663, "learning_rate": 9.889623822161575e-06, "loss": 18.023, "step": 5198 }, { "epoch": 0.09503354232547938, "grad_norm": 6.559234082417345, "learning_rate": 9.889561959819673e-06, "loss": 17.7538, "step": 5199 }, { "epoch": 0.0950518215219259, "grad_norm": 8.300625114491275, "learning_rate": 9.889500080340281e-06, "loss": 17.8038, "step": 5200 }, { "epoch": 0.09507010071837242, "grad_norm": 8.68399699423883, "learning_rate": 9.889438183723616e-06, "loss": 18.3276, "step": 5201 }, { "epoch": 0.09508837991481894, "grad_norm": 5.858054324353686, "learning_rate": 9.889376269969895e-06, "loss": 17.1554, "step": 5202 }, { "epoch": 0.09510665911126547, "grad_norm": 6.728176275885843, "learning_rate": 9.889314339079332e-06, "loss": 17.4803, "step": 5203 }, { "epoch": 0.09512493830771199, "grad_norm": 6.44781365917043, "learning_rate": 9.88925239105215e-06, "loss": 17.5822, "step": 5204 }, { "epoch": 0.09514321750415852, "grad_norm": 6.417523267951457, "learning_rate": 9.889190425888558e-06, "loss": 17.4344, "step": 5205 }, { "epoch": 0.09516149670060504, "grad_norm": 7.042317903426137, "learning_rate": 9.889128443588781e-06, "loss": 17.7418, "step": 5206 }, { "epoch": 0.09517977589705157, "grad_norm": 7.397451006051058, "learning_rate": 9.889066444153032e-06, "loss": 17.7729, "step": 5207 }, { "epoch": 0.09519805509349809, "grad_norm": 7.143273439551852, "learning_rate": 9.88900442758153e-06, "loss": 18.024, "step": 5208 }, { "epoch": 0.09521633428994461, "grad_norm": 8.615455102068974, "learning_rate": 9.88894239387449e-06, "loss": 18.2291, "step": 5209 }, { "epoch": 0.09523461348639113, "grad_norm": 7.529587476696245, "learning_rate": 9.888880343032133e-06, "loss": 17.7377, "step": 5210 }, { "epoch": 0.09525289268283767, "grad_norm": 6.63088867569911, "learning_rate": 9.888818275054675e-06, "loss": 17.6644, "step": 5211 }, { "epoch": 0.09527117187928419, "grad_norm": 7.519668617792681, "learning_rate": 9.888756189942333e-06, "loss": 18.1103, "step": 5212 }, { "epoch": 0.09528945107573071, "grad_norm": 6.972288231076231, "learning_rate": 9.888694087695323e-06, "loss": 17.536, "step": 5213 }, { "epoch": 0.09530773027217723, "grad_norm": 7.115601967477167, "learning_rate": 9.888631968313866e-06, "loss": 17.6653, "step": 5214 }, { "epoch": 0.09532600946862375, "grad_norm": 6.747204981196159, "learning_rate": 9.888569831798178e-06, "loss": 17.5997, "step": 5215 }, { "epoch": 0.09534428866507029, "grad_norm": 7.6468415772145555, "learning_rate": 9.888507678148475e-06, "loss": 18.1247, "step": 5216 }, { "epoch": 0.09536256786151681, "grad_norm": 7.070793850550537, "learning_rate": 9.88844550736498e-06, "loss": 17.4151, "step": 5217 }, { "epoch": 0.09538084705796333, "grad_norm": 6.474401361103499, "learning_rate": 9.888383319447905e-06, "loss": 17.2561, "step": 5218 }, { "epoch": 0.09539912625440986, "grad_norm": 7.58341485224294, "learning_rate": 9.888321114397472e-06, "loss": 18.0286, "step": 5219 }, { "epoch": 0.09541740545085638, "grad_norm": 6.972150944961971, "learning_rate": 9.888258892213898e-06, "loss": 17.8591, "step": 5220 }, { "epoch": 0.0954356846473029, "grad_norm": 6.744493413140763, "learning_rate": 9.8881966528974e-06, "loss": 17.5452, "step": 5221 }, { "epoch": 0.09545396384374943, "grad_norm": 7.873660672430703, "learning_rate": 9.888134396448198e-06, "loss": 17.9007, "step": 5222 }, { "epoch": 0.09547224304019596, "grad_norm": 7.298583415260131, "learning_rate": 9.888072122866508e-06, "loss": 17.6763, "step": 5223 }, { "epoch": 0.09549052223664248, "grad_norm": 6.003618951036251, "learning_rate": 9.888009832152549e-06, "loss": 17.2141, "step": 5224 }, { "epoch": 0.095508801433089, "grad_norm": 6.2177497010827105, "learning_rate": 9.887947524306541e-06, "loss": 17.4351, "step": 5225 }, { "epoch": 0.09552708062953552, "grad_norm": 6.666056134453053, "learning_rate": 9.8878851993287e-06, "loss": 17.6842, "step": 5226 }, { "epoch": 0.09554535982598204, "grad_norm": 6.516298719067309, "learning_rate": 9.887822857219248e-06, "loss": 17.4614, "step": 5227 }, { "epoch": 0.09556363902242858, "grad_norm": 7.024816101465736, "learning_rate": 9.887760497978397e-06, "loss": 17.7201, "step": 5228 }, { "epoch": 0.0955819182188751, "grad_norm": 7.200756136312985, "learning_rate": 9.887698121606374e-06, "loss": 17.7464, "step": 5229 }, { "epoch": 0.09560019741532162, "grad_norm": 7.3783861132193564, "learning_rate": 9.887635728103388e-06, "loss": 17.7704, "step": 5230 }, { "epoch": 0.09561847661176814, "grad_norm": 7.2206811986317945, "learning_rate": 9.887573317469668e-06, "loss": 17.6605, "step": 5231 }, { "epoch": 0.09563675580821467, "grad_norm": 8.70079828108925, "learning_rate": 9.887510889705426e-06, "loss": 18.4645, "step": 5232 }, { "epoch": 0.0956550350046612, "grad_norm": 7.636005799765782, "learning_rate": 9.887448444810881e-06, "loss": 17.893, "step": 5233 }, { "epoch": 0.09567331420110772, "grad_norm": 6.993684305512817, "learning_rate": 9.887385982786252e-06, "loss": 17.7158, "step": 5234 }, { "epoch": 0.09569159339755424, "grad_norm": 6.608801131132104, "learning_rate": 9.887323503631762e-06, "loss": 17.3909, "step": 5235 }, { "epoch": 0.09570987259400077, "grad_norm": 6.771682563084261, "learning_rate": 9.887261007347627e-06, "loss": 17.3143, "step": 5236 }, { "epoch": 0.09572815179044729, "grad_norm": 7.950569785671081, "learning_rate": 9.887198493934067e-06, "loss": 18.1489, "step": 5237 }, { "epoch": 0.09574643098689381, "grad_norm": 7.3982490836298656, "learning_rate": 9.8871359633913e-06, "loss": 17.8159, "step": 5238 }, { "epoch": 0.09576471018334035, "grad_norm": 6.990996331506278, "learning_rate": 9.887073415719544e-06, "loss": 17.5424, "step": 5239 }, { "epoch": 0.09578298937978687, "grad_norm": 7.710399343547748, "learning_rate": 9.88701085091902e-06, "loss": 17.9353, "step": 5240 }, { "epoch": 0.09580126857623339, "grad_norm": 8.072937592461615, "learning_rate": 9.886948268989947e-06, "loss": 18.0326, "step": 5241 }, { "epoch": 0.09581954777267991, "grad_norm": 7.40238496151935, "learning_rate": 9.886885669932545e-06, "loss": 17.9291, "step": 5242 }, { "epoch": 0.09583782696912643, "grad_norm": 7.78551859722493, "learning_rate": 9.886823053747034e-06, "loss": 18.1532, "step": 5243 }, { "epoch": 0.09585610616557295, "grad_norm": 7.8382493474777, "learning_rate": 9.88676042043363e-06, "loss": 18.0345, "step": 5244 }, { "epoch": 0.09587438536201949, "grad_norm": 7.25646190192345, "learning_rate": 9.886697769992556e-06, "loss": 17.8306, "step": 5245 }, { "epoch": 0.09589266455846601, "grad_norm": 10.804791626584706, "learning_rate": 9.88663510242403e-06, "loss": 18.711, "step": 5246 }, { "epoch": 0.09591094375491253, "grad_norm": 6.201093743279867, "learning_rate": 9.886572417728274e-06, "loss": 17.3102, "step": 5247 }, { "epoch": 0.09592922295135906, "grad_norm": 8.3642670823898, "learning_rate": 9.886509715905502e-06, "loss": 17.9239, "step": 5248 }, { "epoch": 0.09594750214780558, "grad_norm": 7.276983929820145, "learning_rate": 9.886446996955939e-06, "loss": 17.8457, "step": 5249 }, { "epoch": 0.09596578134425211, "grad_norm": 6.708835063338171, "learning_rate": 9.886384260879804e-06, "loss": 17.6739, "step": 5250 }, { "epoch": 0.09598406054069863, "grad_norm": 7.093126797744419, "learning_rate": 9.886321507677316e-06, "loss": 17.8085, "step": 5251 }, { "epoch": 0.09600233973714516, "grad_norm": 6.497532496574756, "learning_rate": 9.886258737348695e-06, "loss": 17.4002, "step": 5252 }, { "epoch": 0.09602061893359168, "grad_norm": 6.969377082513094, "learning_rate": 9.886195949894162e-06, "loss": 17.582, "step": 5253 }, { "epoch": 0.0960388981300382, "grad_norm": 5.624883262985429, "learning_rate": 9.886133145313934e-06, "loss": 17.3625, "step": 5254 }, { "epoch": 0.09605717732648472, "grad_norm": 7.3925160558442915, "learning_rate": 9.886070323608236e-06, "loss": 17.7853, "step": 5255 }, { "epoch": 0.09607545652293126, "grad_norm": 6.872808240356434, "learning_rate": 9.886007484777284e-06, "loss": 17.6445, "step": 5256 }, { "epoch": 0.09609373571937778, "grad_norm": 6.041244016989075, "learning_rate": 9.8859446288213e-06, "loss": 17.5063, "step": 5257 }, { "epoch": 0.0961120149158243, "grad_norm": 6.506616084585259, "learning_rate": 9.885881755740503e-06, "loss": 17.3917, "step": 5258 }, { "epoch": 0.09613029411227082, "grad_norm": 5.72404838769049, "learning_rate": 9.885818865535115e-06, "loss": 17.0782, "step": 5259 }, { "epoch": 0.09614857330871734, "grad_norm": 5.600220206652367, "learning_rate": 9.885755958205357e-06, "loss": 17.2309, "step": 5260 }, { "epoch": 0.09616685250516387, "grad_norm": 8.256578669744444, "learning_rate": 9.885693033751447e-06, "loss": 18.4663, "step": 5261 }, { "epoch": 0.0961851317016104, "grad_norm": 7.469744356520143, "learning_rate": 9.885630092173608e-06, "loss": 17.8741, "step": 5262 }, { "epoch": 0.09620341089805692, "grad_norm": 9.012604336564129, "learning_rate": 9.885567133472059e-06, "loss": 18.3334, "step": 5263 }, { "epoch": 0.09622169009450345, "grad_norm": 6.558039475714017, "learning_rate": 9.88550415764702e-06, "loss": 17.8242, "step": 5264 }, { "epoch": 0.09623996929094997, "grad_norm": 9.286626068016695, "learning_rate": 9.885441164698715e-06, "loss": 18.3687, "step": 5265 }, { "epoch": 0.09625824848739649, "grad_norm": 6.79502705833962, "learning_rate": 9.885378154627362e-06, "loss": 17.3884, "step": 5266 }, { "epoch": 0.09627652768384302, "grad_norm": 7.629505796668096, "learning_rate": 9.885315127433182e-06, "loss": 17.9658, "step": 5267 }, { "epoch": 0.09629480688028955, "grad_norm": 7.347670861701307, "learning_rate": 9.885252083116398e-06, "loss": 17.5675, "step": 5268 }, { "epoch": 0.09631308607673607, "grad_norm": 7.17750685681595, "learning_rate": 9.885189021677226e-06, "loss": 17.7342, "step": 5269 }, { "epoch": 0.09633136527318259, "grad_norm": 7.1640515597335, "learning_rate": 9.885125943115892e-06, "loss": 17.7269, "step": 5270 }, { "epoch": 0.09634964446962911, "grad_norm": 7.506204128755075, "learning_rate": 9.885062847432615e-06, "loss": 17.8263, "step": 5271 }, { "epoch": 0.09636792366607563, "grad_norm": 8.211680854441479, "learning_rate": 9.884999734627618e-06, "loss": 18.2696, "step": 5272 }, { "epoch": 0.09638620286252217, "grad_norm": 7.649439093091455, "learning_rate": 9.884936604701122e-06, "loss": 17.6967, "step": 5273 }, { "epoch": 0.09640448205896869, "grad_norm": 7.083866308988564, "learning_rate": 9.884873457653345e-06, "loss": 17.6615, "step": 5274 }, { "epoch": 0.09642276125541521, "grad_norm": 8.367405155710328, "learning_rate": 9.88481029348451e-06, "loss": 18.1005, "step": 5275 }, { "epoch": 0.09644104045186173, "grad_norm": 7.022332031462355, "learning_rate": 9.884747112194839e-06, "loss": 17.7434, "step": 5276 }, { "epoch": 0.09645931964830826, "grad_norm": 7.394241098382519, "learning_rate": 9.884683913784553e-06, "loss": 17.8212, "step": 5277 }, { "epoch": 0.09647759884475478, "grad_norm": 7.119784720405873, "learning_rate": 9.884620698253877e-06, "loss": 17.0399, "step": 5278 }, { "epoch": 0.09649587804120131, "grad_norm": 7.982731933158025, "learning_rate": 9.884557465603026e-06, "loss": 17.7377, "step": 5279 }, { "epoch": 0.09651415723764784, "grad_norm": 7.178376538639677, "learning_rate": 9.884494215832225e-06, "loss": 17.8772, "step": 5280 }, { "epoch": 0.09653243643409436, "grad_norm": 7.123912373964505, "learning_rate": 9.884430948941697e-06, "loss": 18.037, "step": 5281 }, { "epoch": 0.09655071563054088, "grad_norm": 7.554159375338772, "learning_rate": 9.884367664931663e-06, "loss": 17.9028, "step": 5282 }, { "epoch": 0.0965689948269874, "grad_norm": 6.008364724596991, "learning_rate": 9.884304363802342e-06, "loss": 17.0345, "step": 5283 }, { "epoch": 0.09658727402343394, "grad_norm": 5.624135781155062, "learning_rate": 9.88424104555396e-06, "loss": 16.9863, "step": 5284 }, { "epoch": 0.09660555321988046, "grad_norm": 6.8371078213504255, "learning_rate": 9.884177710186736e-06, "loss": 17.6411, "step": 5285 }, { "epoch": 0.09662383241632698, "grad_norm": 6.528436990810273, "learning_rate": 9.884114357700893e-06, "loss": 17.2872, "step": 5286 }, { "epoch": 0.0966421116127735, "grad_norm": 7.469541449794446, "learning_rate": 9.884050988096651e-06, "loss": 17.8118, "step": 5287 }, { "epoch": 0.09666039080922002, "grad_norm": 10.212809506208576, "learning_rate": 9.883987601374238e-06, "loss": 18.2187, "step": 5288 }, { "epoch": 0.09667867000566654, "grad_norm": 7.366905909371753, "learning_rate": 9.88392419753387e-06, "loss": 18.2504, "step": 5289 }, { "epoch": 0.09669694920211308, "grad_norm": 7.3333108584614495, "learning_rate": 9.883860776575772e-06, "loss": 17.779, "step": 5290 }, { "epoch": 0.0967152283985596, "grad_norm": 7.976466138224247, "learning_rate": 9.883797338500165e-06, "loss": 18.1815, "step": 5291 }, { "epoch": 0.09673350759500612, "grad_norm": 7.365136220235471, "learning_rate": 9.883733883307272e-06, "loss": 17.7525, "step": 5292 }, { "epoch": 0.09675178679145265, "grad_norm": 8.039554757285382, "learning_rate": 9.883670410997318e-06, "loss": 18.4228, "step": 5293 }, { "epoch": 0.09677006598789917, "grad_norm": 7.53287053112969, "learning_rate": 9.883606921570521e-06, "loss": 18.0332, "step": 5294 }, { "epoch": 0.09678834518434569, "grad_norm": 7.081858402661466, "learning_rate": 9.883543415027107e-06, "loss": 17.6161, "step": 5295 }, { "epoch": 0.09680662438079222, "grad_norm": 6.785646720458371, "learning_rate": 9.883479891367294e-06, "loss": 17.467, "step": 5296 }, { "epoch": 0.09682490357723875, "grad_norm": 7.477258930459312, "learning_rate": 9.88341635059131e-06, "loss": 18.217, "step": 5297 }, { "epoch": 0.09684318277368527, "grad_norm": 6.869902920200466, "learning_rate": 9.883352792699375e-06, "loss": 17.8621, "step": 5298 }, { "epoch": 0.09686146197013179, "grad_norm": 7.789336928668197, "learning_rate": 9.883289217691712e-06, "loss": 18.2331, "step": 5299 }, { "epoch": 0.09687974116657831, "grad_norm": 8.526389110172985, "learning_rate": 9.883225625568544e-06, "loss": 18.3998, "step": 5300 }, { "epoch": 0.09689802036302485, "grad_norm": 6.130281431362759, "learning_rate": 9.883162016330094e-06, "loss": 17.2481, "step": 5301 }, { "epoch": 0.09691629955947137, "grad_norm": 6.948104012679294, "learning_rate": 9.883098389976586e-06, "loss": 17.8148, "step": 5302 }, { "epoch": 0.09693457875591789, "grad_norm": 6.542422082061749, "learning_rate": 9.88303474650824e-06, "loss": 17.3871, "step": 5303 }, { "epoch": 0.09695285795236441, "grad_norm": 7.134880808521567, "learning_rate": 9.882971085925283e-06, "loss": 17.8606, "step": 5304 }, { "epoch": 0.09697113714881093, "grad_norm": 8.605364075819775, "learning_rate": 9.882907408227934e-06, "loss": 18.4823, "step": 5305 }, { "epoch": 0.09698941634525746, "grad_norm": 7.582757133706279, "learning_rate": 9.882843713416421e-06, "loss": 17.8743, "step": 5306 }, { "epoch": 0.09700769554170399, "grad_norm": 6.79791230027872, "learning_rate": 9.882780001490963e-06, "loss": 17.4309, "step": 5307 }, { "epoch": 0.09702597473815051, "grad_norm": 7.506229977541841, "learning_rate": 9.882716272451785e-06, "loss": 17.7997, "step": 5308 }, { "epoch": 0.09704425393459704, "grad_norm": 6.909299202803373, "learning_rate": 9.882652526299109e-06, "loss": 17.583, "step": 5309 }, { "epoch": 0.09706253313104356, "grad_norm": 8.08860684609603, "learning_rate": 9.88258876303316e-06, "loss": 17.9465, "step": 5310 }, { "epoch": 0.09708081232749008, "grad_norm": 6.7712705978128405, "learning_rate": 9.882524982654162e-06, "loss": 17.6611, "step": 5311 }, { "epoch": 0.0970990915239366, "grad_norm": 7.700096808984828, "learning_rate": 9.882461185162338e-06, "loss": 18.1099, "step": 5312 }, { "epoch": 0.09711737072038314, "grad_norm": 7.1087451538688, "learning_rate": 9.88239737055791e-06, "loss": 18.051, "step": 5313 }, { "epoch": 0.09713564991682966, "grad_norm": 6.897441875558762, "learning_rate": 9.882333538841103e-06, "loss": 17.5645, "step": 5314 }, { "epoch": 0.09715392911327618, "grad_norm": 6.684874766663511, "learning_rate": 9.88226969001214e-06, "loss": 17.5739, "step": 5315 }, { "epoch": 0.0971722083097227, "grad_norm": 6.078619167484666, "learning_rate": 9.882205824071246e-06, "loss": 17.1586, "step": 5316 }, { "epoch": 0.09719048750616922, "grad_norm": 6.92733611336219, "learning_rate": 9.882141941018644e-06, "loss": 17.6455, "step": 5317 }, { "epoch": 0.09720876670261576, "grad_norm": 7.5784526259969995, "learning_rate": 9.882078040854559e-06, "loss": 17.7522, "step": 5318 }, { "epoch": 0.09722704589906228, "grad_norm": 7.36961789501853, "learning_rate": 9.882014123579215e-06, "loss": 17.5854, "step": 5319 }, { "epoch": 0.0972453250955088, "grad_norm": 7.317098772859468, "learning_rate": 9.881950189192833e-06, "loss": 17.8084, "step": 5320 }, { "epoch": 0.09726360429195532, "grad_norm": 5.787194122942332, "learning_rate": 9.88188623769564e-06, "loss": 17.2275, "step": 5321 }, { "epoch": 0.09728188348840185, "grad_norm": 7.168416748681587, "learning_rate": 9.88182226908786e-06, "loss": 17.7341, "step": 5322 }, { "epoch": 0.09730016268484837, "grad_norm": 7.407609911779401, "learning_rate": 9.881758283369715e-06, "loss": 17.9142, "step": 5323 }, { "epoch": 0.0973184418812949, "grad_norm": 6.927321379262219, "learning_rate": 9.881694280541431e-06, "loss": 17.6755, "step": 5324 }, { "epoch": 0.09733672107774143, "grad_norm": 7.267632829932315, "learning_rate": 9.881630260603234e-06, "loss": 17.7475, "step": 5325 }, { "epoch": 0.09735500027418795, "grad_norm": 7.04217802331472, "learning_rate": 9.881566223555345e-06, "loss": 17.3577, "step": 5326 }, { "epoch": 0.09737327947063447, "grad_norm": 6.746752510385598, "learning_rate": 9.881502169397991e-06, "loss": 17.5972, "step": 5327 }, { "epoch": 0.09739155866708099, "grad_norm": 5.2912247121295435, "learning_rate": 9.881438098131395e-06, "loss": 17.1553, "step": 5328 }, { "epoch": 0.09740983786352751, "grad_norm": 7.325975113070174, "learning_rate": 9.881374009755784e-06, "loss": 18.1632, "step": 5329 }, { "epoch": 0.09742811705997405, "grad_norm": 6.080116422589618, "learning_rate": 9.881309904271378e-06, "loss": 17.3491, "step": 5330 }, { "epoch": 0.09744639625642057, "grad_norm": 6.3866380418005635, "learning_rate": 9.881245781678405e-06, "loss": 17.1599, "step": 5331 }, { "epoch": 0.09746467545286709, "grad_norm": 6.035010066941613, "learning_rate": 9.881181641977088e-06, "loss": 17.3709, "step": 5332 }, { "epoch": 0.09748295464931361, "grad_norm": 7.703056986080128, "learning_rate": 9.881117485167656e-06, "loss": 17.9805, "step": 5333 }, { "epoch": 0.09750123384576014, "grad_norm": 8.709080770062386, "learning_rate": 9.881053311250328e-06, "loss": 18.6102, "step": 5334 }, { "epoch": 0.09751951304220667, "grad_norm": 6.192408709014584, "learning_rate": 9.880989120225333e-06, "loss": 17.413, "step": 5335 }, { "epoch": 0.09753779223865319, "grad_norm": 8.105822771494752, "learning_rate": 9.880924912092894e-06, "loss": 18.212, "step": 5336 }, { "epoch": 0.09755607143509971, "grad_norm": 6.7656047760564775, "learning_rate": 9.880860686853238e-06, "loss": 17.779, "step": 5337 }, { "epoch": 0.09757435063154624, "grad_norm": 6.374458346957026, "learning_rate": 9.880796444506588e-06, "loss": 17.3893, "step": 5338 }, { "epoch": 0.09759262982799276, "grad_norm": 6.271245313507004, "learning_rate": 9.88073218505317e-06, "loss": 17.1529, "step": 5339 }, { "epoch": 0.09761090902443928, "grad_norm": 7.322123584958095, "learning_rate": 9.880667908493209e-06, "loss": 17.7049, "step": 5340 }, { "epoch": 0.09762918822088582, "grad_norm": 7.979414944996543, "learning_rate": 9.88060361482693e-06, "loss": 18.3133, "step": 5341 }, { "epoch": 0.09764746741733234, "grad_norm": 8.61766632091909, "learning_rate": 9.88053930405456e-06, "loss": 18.5276, "step": 5342 }, { "epoch": 0.09766574661377886, "grad_norm": 9.690909411811695, "learning_rate": 9.880474976176322e-06, "loss": 19.0181, "step": 5343 }, { "epoch": 0.09768402581022538, "grad_norm": 7.445909909009077, "learning_rate": 9.880410631192444e-06, "loss": 17.7228, "step": 5344 }, { "epoch": 0.0977023050066719, "grad_norm": 6.5210768427174095, "learning_rate": 9.88034626910315e-06, "loss": 17.4109, "step": 5345 }, { "epoch": 0.09772058420311842, "grad_norm": 7.888989937175442, "learning_rate": 9.880281889908665e-06, "loss": 18.2857, "step": 5346 }, { "epoch": 0.09773886339956496, "grad_norm": 7.253556566254523, "learning_rate": 9.880217493609216e-06, "loss": 17.6233, "step": 5347 }, { "epoch": 0.09775714259601148, "grad_norm": 7.493275082127145, "learning_rate": 9.880153080205028e-06, "loss": 17.9103, "step": 5348 }, { "epoch": 0.097775421792458, "grad_norm": 6.687304133962577, "learning_rate": 9.880088649696327e-06, "loss": 17.7951, "step": 5349 }, { "epoch": 0.09779370098890453, "grad_norm": 6.6487009943221835, "learning_rate": 9.88002420208334e-06, "loss": 17.6598, "step": 5350 }, { "epoch": 0.09781198018535105, "grad_norm": 6.052180943355565, "learning_rate": 9.879959737366289e-06, "loss": 16.9574, "step": 5351 }, { "epoch": 0.09783025938179758, "grad_norm": 7.663162660828646, "learning_rate": 9.879895255545405e-06, "loss": 18.334, "step": 5352 }, { "epoch": 0.0978485385782441, "grad_norm": 7.163208212979588, "learning_rate": 9.87983075662091e-06, "loss": 17.9333, "step": 5353 }, { "epoch": 0.09786681777469063, "grad_norm": 7.711414455564948, "learning_rate": 9.879766240593033e-06, "loss": 17.826, "step": 5354 }, { "epoch": 0.09788509697113715, "grad_norm": 9.910252461266577, "learning_rate": 9.879701707461998e-06, "loss": 18.5209, "step": 5355 }, { "epoch": 0.09790337616758367, "grad_norm": 7.544857749664136, "learning_rate": 9.879637157228032e-06, "loss": 17.5333, "step": 5356 }, { "epoch": 0.09792165536403019, "grad_norm": 7.099394526580665, "learning_rate": 9.87957258989136e-06, "loss": 17.7289, "step": 5357 }, { "epoch": 0.09793993456047673, "grad_norm": 8.002312473025757, "learning_rate": 9.879508005452212e-06, "loss": 18.2373, "step": 5358 }, { "epoch": 0.09795821375692325, "grad_norm": 6.8712882139932185, "learning_rate": 9.879443403910812e-06, "loss": 17.4482, "step": 5359 }, { "epoch": 0.09797649295336977, "grad_norm": 6.542254007103261, "learning_rate": 9.879378785267383e-06, "loss": 17.6043, "step": 5360 }, { "epoch": 0.09799477214981629, "grad_norm": 8.079079767229963, "learning_rate": 9.879314149522156e-06, "loss": 18.41, "step": 5361 }, { "epoch": 0.09801305134626281, "grad_norm": 7.230662786934817, "learning_rate": 9.879249496675359e-06, "loss": 17.8467, "step": 5362 }, { "epoch": 0.09803133054270934, "grad_norm": 8.237757438238472, "learning_rate": 9.879184826727213e-06, "loss": 18.0755, "step": 5363 }, { "epoch": 0.09804960973915587, "grad_norm": 6.64001757792681, "learning_rate": 9.879120139677949e-06, "loss": 17.7599, "step": 5364 }, { "epoch": 0.0980678889356024, "grad_norm": 9.00030856218242, "learning_rate": 9.87905543552779e-06, "loss": 18.5465, "step": 5365 }, { "epoch": 0.09808616813204891, "grad_norm": 5.9319514025512206, "learning_rate": 9.878990714276968e-06, "loss": 17.1628, "step": 5366 }, { "epoch": 0.09810444732849544, "grad_norm": 6.982178255073946, "learning_rate": 9.878925975925707e-06, "loss": 17.8482, "step": 5367 }, { "epoch": 0.09812272652494196, "grad_norm": 7.016553498668367, "learning_rate": 9.878861220474232e-06, "loss": 17.7667, "step": 5368 }, { "epoch": 0.0981410057213885, "grad_norm": 7.201736280021446, "learning_rate": 9.878796447922771e-06, "loss": 17.5916, "step": 5369 }, { "epoch": 0.09815928491783502, "grad_norm": 7.532481678533857, "learning_rate": 9.878731658271554e-06, "loss": 17.8754, "step": 5370 }, { "epoch": 0.09817756411428154, "grad_norm": 6.311230681650899, "learning_rate": 9.878666851520805e-06, "loss": 17.4986, "step": 5371 }, { "epoch": 0.09819584331072806, "grad_norm": 6.475750631197108, "learning_rate": 9.878602027670751e-06, "loss": 17.5613, "step": 5372 }, { "epoch": 0.09821412250717458, "grad_norm": 7.224910645473922, "learning_rate": 9.878537186721623e-06, "loss": 17.752, "step": 5373 }, { "epoch": 0.0982324017036211, "grad_norm": 8.045719608506186, "learning_rate": 9.878472328673643e-06, "loss": 18.1327, "step": 5374 }, { "epoch": 0.09825068090006764, "grad_norm": 7.44718921926065, "learning_rate": 9.878407453527042e-06, "loss": 18.0817, "step": 5375 }, { "epoch": 0.09826896009651416, "grad_norm": 8.031113172574356, "learning_rate": 9.878342561282046e-06, "loss": 17.434, "step": 5376 }, { "epoch": 0.09828723929296068, "grad_norm": 7.582931894439497, "learning_rate": 9.878277651938883e-06, "loss": 17.8379, "step": 5377 }, { "epoch": 0.0983055184894072, "grad_norm": 6.2691037363538324, "learning_rate": 9.87821272549778e-06, "loss": 17.4363, "step": 5378 }, { "epoch": 0.09832379768585373, "grad_norm": 7.910581747723305, "learning_rate": 9.878147781958965e-06, "loss": 18.1192, "step": 5379 }, { "epoch": 0.09834207688230025, "grad_norm": 7.379569009044979, "learning_rate": 9.878082821322663e-06, "loss": 18.0581, "step": 5380 }, { "epoch": 0.09836035607874678, "grad_norm": 6.527518056624541, "learning_rate": 9.878017843589107e-06, "loss": 17.4445, "step": 5381 }, { "epoch": 0.0983786352751933, "grad_norm": 6.78755253095713, "learning_rate": 9.877952848758519e-06, "loss": 17.6175, "step": 5382 }, { "epoch": 0.09839691447163983, "grad_norm": 6.977920251600107, "learning_rate": 9.877887836831132e-06, "loss": 17.6141, "step": 5383 }, { "epoch": 0.09841519366808635, "grad_norm": 8.290696312597492, "learning_rate": 9.87782280780717e-06, "loss": 18.2516, "step": 5384 }, { "epoch": 0.09843347286453287, "grad_norm": 6.44443039994811, "learning_rate": 9.877757761686864e-06, "loss": 17.2411, "step": 5385 }, { "epoch": 0.0984517520609794, "grad_norm": 8.039395493731158, "learning_rate": 9.877692698470438e-06, "loss": 18.1254, "step": 5386 }, { "epoch": 0.09847003125742593, "grad_norm": 6.174780650552495, "learning_rate": 9.877627618158123e-06, "loss": 17.3012, "step": 5387 }, { "epoch": 0.09848831045387245, "grad_norm": 8.564452027284869, "learning_rate": 9.877562520750148e-06, "loss": 18.3757, "step": 5388 }, { "epoch": 0.09850658965031897, "grad_norm": 12.723700125736002, "learning_rate": 9.877497406246739e-06, "loss": 18.9342, "step": 5389 }, { "epoch": 0.09852486884676549, "grad_norm": 6.570689834194891, "learning_rate": 9.877432274648125e-06, "loss": 17.6527, "step": 5390 }, { "epoch": 0.09854314804321201, "grad_norm": 7.212075969847427, "learning_rate": 9.877367125954532e-06, "loss": 17.6892, "step": 5391 }, { "epoch": 0.09856142723965855, "grad_norm": 7.162008135860617, "learning_rate": 9.877301960166192e-06, "loss": 17.8613, "step": 5392 }, { "epoch": 0.09857970643610507, "grad_norm": 7.799232092978829, "learning_rate": 9.877236777283332e-06, "loss": 17.9345, "step": 5393 }, { "epoch": 0.0985979856325516, "grad_norm": 6.31091864296637, "learning_rate": 9.877171577306181e-06, "loss": 17.4031, "step": 5394 }, { "epoch": 0.09861626482899812, "grad_norm": 6.791065851383708, "learning_rate": 9.877106360234964e-06, "loss": 17.6314, "step": 5395 }, { "epoch": 0.09863454402544464, "grad_norm": 7.314248203014236, "learning_rate": 9.877041126069917e-06, "loss": 17.7338, "step": 5396 }, { "epoch": 0.09865282322189116, "grad_norm": 8.803413494157793, "learning_rate": 9.876975874811261e-06, "loss": 18.8308, "step": 5397 }, { "epoch": 0.0986711024183377, "grad_norm": 11.129809467457516, "learning_rate": 9.876910606459228e-06, "loss": 18.3517, "step": 5398 }, { "epoch": 0.09868938161478422, "grad_norm": 6.960028940931708, "learning_rate": 9.876845321014047e-06, "loss": 17.7416, "step": 5399 }, { "epoch": 0.09870766081123074, "grad_norm": 7.766890205745459, "learning_rate": 9.876780018475947e-06, "loss": 18.0501, "step": 5400 }, { "epoch": 0.09872594000767726, "grad_norm": 6.609650701226153, "learning_rate": 9.876714698845153e-06, "loss": 17.2986, "step": 5401 }, { "epoch": 0.09874421920412378, "grad_norm": 7.285538815241017, "learning_rate": 9.876649362121901e-06, "loss": 17.8622, "step": 5402 }, { "epoch": 0.09876249840057032, "grad_norm": 7.016219968300518, "learning_rate": 9.876584008306414e-06, "loss": 17.8093, "step": 5403 }, { "epoch": 0.09878077759701684, "grad_norm": 7.575328071727429, "learning_rate": 9.876518637398924e-06, "loss": 17.7971, "step": 5404 }, { "epoch": 0.09879905679346336, "grad_norm": 5.507656527965496, "learning_rate": 9.87645324939966e-06, "loss": 17.1415, "step": 5405 }, { "epoch": 0.09881733598990988, "grad_norm": 7.515076034232811, "learning_rate": 9.87638784430885e-06, "loss": 17.5956, "step": 5406 }, { "epoch": 0.0988356151863564, "grad_norm": 6.698772758642591, "learning_rate": 9.876322422126722e-06, "loss": 17.5607, "step": 5407 }, { "epoch": 0.09885389438280293, "grad_norm": 7.084818200127224, "learning_rate": 9.87625698285351e-06, "loss": 17.8535, "step": 5408 }, { "epoch": 0.09887217357924946, "grad_norm": 7.361523689917995, "learning_rate": 9.876191526489438e-06, "loss": 17.7445, "step": 5409 }, { "epoch": 0.09889045277569598, "grad_norm": 7.5897634403978955, "learning_rate": 9.87612605303474e-06, "loss": 17.8592, "step": 5410 }, { "epoch": 0.0989087319721425, "grad_norm": 8.596622229806462, "learning_rate": 9.876060562489643e-06, "loss": 18.291, "step": 5411 }, { "epoch": 0.09892701116858903, "grad_norm": 7.167603474624208, "learning_rate": 9.875995054854375e-06, "loss": 17.9727, "step": 5412 }, { "epoch": 0.09894529036503555, "grad_norm": 7.666953240832883, "learning_rate": 9.875929530129167e-06, "loss": 18.0282, "step": 5413 }, { "epoch": 0.09896356956148207, "grad_norm": 7.604734768374753, "learning_rate": 9.875863988314252e-06, "loss": 18.2639, "step": 5414 }, { "epoch": 0.0989818487579286, "grad_norm": 7.861239661676397, "learning_rate": 9.875798429409855e-06, "loss": 18.2204, "step": 5415 }, { "epoch": 0.09900012795437513, "grad_norm": 7.2050207094450895, "learning_rate": 9.875732853416208e-06, "loss": 17.7534, "step": 5416 }, { "epoch": 0.09901840715082165, "grad_norm": 8.512749916335364, "learning_rate": 9.87566726033354e-06, "loss": 17.7452, "step": 5417 }, { "epoch": 0.09903668634726817, "grad_norm": 7.1990424107187305, "learning_rate": 9.875601650162082e-06, "loss": 17.7461, "step": 5418 }, { "epoch": 0.0990549655437147, "grad_norm": 7.561257344374474, "learning_rate": 9.875536022902064e-06, "loss": 17.911, "step": 5419 }, { "epoch": 0.09907324474016123, "grad_norm": 7.608397345256539, "learning_rate": 9.875470378553714e-06, "loss": 17.753, "step": 5420 }, { "epoch": 0.09909152393660775, "grad_norm": 6.4574803895087705, "learning_rate": 9.875404717117263e-06, "loss": 17.4264, "step": 5421 }, { "epoch": 0.09910980313305427, "grad_norm": 6.691709365344509, "learning_rate": 9.875339038592944e-06, "loss": 17.6796, "step": 5422 }, { "epoch": 0.0991280823295008, "grad_norm": 7.47764722151518, "learning_rate": 9.875273342980982e-06, "loss": 18.1239, "step": 5423 }, { "epoch": 0.09914636152594732, "grad_norm": 6.205851196218604, "learning_rate": 9.875207630281611e-06, "loss": 17.4564, "step": 5424 }, { "epoch": 0.09916464072239384, "grad_norm": 7.055443921268711, "learning_rate": 9.87514190049506e-06, "loss": 17.8814, "step": 5425 }, { "epoch": 0.09918291991884037, "grad_norm": 6.347899534679206, "learning_rate": 9.87507615362156e-06, "loss": 17.3062, "step": 5426 }, { "epoch": 0.0992011991152869, "grad_norm": 6.219401244317228, "learning_rate": 9.875010389661341e-06, "loss": 17.2959, "step": 5427 }, { "epoch": 0.09921947831173342, "grad_norm": 5.659634986824411, "learning_rate": 9.874944608614634e-06, "loss": 17.1571, "step": 5428 }, { "epoch": 0.09923775750817994, "grad_norm": 9.489737878088745, "learning_rate": 9.874878810481669e-06, "loss": 18.5848, "step": 5429 }, { "epoch": 0.09925603670462646, "grad_norm": 6.934344610746548, "learning_rate": 9.874812995262676e-06, "loss": 17.7542, "step": 5430 }, { "epoch": 0.09927431590107298, "grad_norm": 7.698012600565545, "learning_rate": 9.87474716295789e-06, "loss": 18.2417, "step": 5431 }, { "epoch": 0.09929259509751952, "grad_norm": 8.826267470753006, "learning_rate": 9.874681313567533e-06, "loss": 18.6452, "step": 5432 }, { "epoch": 0.09931087429396604, "grad_norm": 8.153216940800014, "learning_rate": 9.874615447091845e-06, "loss": 17.8559, "step": 5433 }, { "epoch": 0.09932915349041256, "grad_norm": 5.80915749006908, "learning_rate": 9.874549563531051e-06, "loss": 17.4492, "step": 5434 }, { "epoch": 0.09934743268685908, "grad_norm": 7.003977421272474, "learning_rate": 9.874483662885383e-06, "loss": 17.6575, "step": 5435 }, { "epoch": 0.0993657118833056, "grad_norm": 7.256371239184099, "learning_rate": 9.874417745155075e-06, "loss": 17.9347, "step": 5436 }, { "epoch": 0.09938399107975214, "grad_norm": 6.660592230882827, "learning_rate": 9.874351810340355e-06, "loss": 17.7508, "step": 5437 }, { "epoch": 0.09940227027619866, "grad_norm": 6.208497065931254, "learning_rate": 9.874285858441455e-06, "loss": 17.3796, "step": 5438 }, { "epoch": 0.09942054947264518, "grad_norm": 7.348407501302437, "learning_rate": 9.874219889458605e-06, "loss": 17.5801, "step": 5439 }, { "epoch": 0.0994388286690917, "grad_norm": 8.023627408244378, "learning_rate": 9.874153903392037e-06, "loss": 18.1776, "step": 5440 }, { "epoch": 0.09945710786553823, "grad_norm": 7.112190887858089, "learning_rate": 9.874087900241984e-06, "loss": 17.5814, "step": 5441 }, { "epoch": 0.09947538706198475, "grad_norm": 7.154703241715606, "learning_rate": 9.874021880008675e-06, "loss": 17.4067, "step": 5442 }, { "epoch": 0.09949366625843128, "grad_norm": 6.544009814900835, "learning_rate": 9.873955842692341e-06, "loss": 17.3562, "step": 5443 }, { "epoch": 0.0995119454548778, "grad_norm": 8.454864865370041, "learning_rate": 9.873889788293217e-06, "loss": 17.7853, "step": 5444 }, { "epoch": 0.09953022465132433, "grad_norm": 7.003491466502003, "learning_rate": 9.873823716811533e-06, "loss": 17.6397, "step": 5445 }, { "epoch": 0.09954850384777085, "grad_norm": 7.77339412187411, "learning_rate": 9.873757628247516e-06, "loss": 17.9778, "step": 5446 }, { "epoch": 0.09956678304421737, "grad_norm": 6.698825218030373, "learning_rate": 9.873691522601406e-06, "loss": 17.5973, "step": 5447 }, { "epoch": 0.0995850622406639, "grad_norm": 7.93327135814933, "learning_rate": 9.873625399873426e-06, "loss": 18.0847, "step": 5448 }, { "epoch": 0.09960334143711043, "grad_norm": 7.123565048979408, "learning_rate": 9.873559260063814e-06, "loss": 17.9277, "step": 5449 }, { "epoch": 0.09962162063355695, "grad_norm": 8.034482781471866, "learning_rate": 9.8734931031728e-06, "loss": 18.2641, "step": 5450 }, { "epoch": 0.09963989983000347, "grad_norm": 7.698896824331821, "learning_rate": 9.873426929200614e-06, "loss": 18.0796, "step": 5451 }, { "epoch": 0.09965817902645, "grad_norm": 6.570824200178621, "learning_rate": 9.87336073814749e-06, "loss": 17.3793, "step": 5452 }, { "epoch": 0.09967645822289652, "grad_norm": 6.639729025706504, "learning_rate": 9.873294530013661e-06, "loss": 17.6847, "step": 5453 }, { "epoch": 0.09969473741934305, "grad_norm": 7.220849341731857, "learning_rate": 9.873228304799357e-06, "loss": 17.5644, "step": 5454 }, { "epoch": 0.09971301661578957, "grad_norm": 7.309164793387815, "learning_rate": 9.87316206250481e-06, "loss": 18.1547, "step": 5455 }, { "epoch": 0.0997312958122361, "grad_norm": 7.2867175641362145, "learning_rate": 9.873095803130252e-06, "loss": 17.7135, "step": 5456 }, { "epoch": 0.09974957500868262, "grad_norm": 8.09917890677358, "learning_rate": 9.873029526675916e-06, "loss": 17.8355, "step": 5457 }, { "epoch": 0.09976785420512914, "grad_norm": 7.5774145356789955, "learning_rate": 9.872963233142037e-06, "loss": 17.8691, "step": 5458 }, { "epoch": 0.09978613340157566, "grad_norm": 7.1023109715224, "learning_rate": 9.872896922528842e-06, "loss": 17.4237, "step": 5459 }, { "epoch": 0.0998044125980222, "grad_norm": 7.081672996540094, "learning_rate": 9.872830594836568e-06, "loss": 17.5306, "step": 5460 }, { "epoch": 0.09982269179446872, "grad_norm": 7.587101306162368, "learning_rate": 9.872764250065445e-06, "loss": 17.9003, "step": 5461 }, { "epoch": 0.09984097099091524, "grad_norm": 7.449325638916134, "learning_rate": 9.872697888215706e-06, "loss": 17.6771, "step": 5462 }, { "epoch": 0.09985925018736176, "grad_norm": 7.989129904760626, "learning_rate": 9.872631509287585e-06, "loss": 18.3158, "step": 5463 }, { "epoch": 0.09987752938380828, "grad_norm": 7.944580874655752, "learning_rate": 9.872565113281312e-06, "loss": 17.7879, "step": 5464 }, { "epoch": 0.0998958085802548, "grad_norm": 8.284103749673918, "learning_rate": 9.872498700197121e-06, "loss": 17.8936, "step": 5465 }, { "epoch": 0.09991408777670134, "grad_norm": 8.227151047533471, "learning_rate": 9.872432270035245e-06, "loss": 17.9144, "step": 5466 }, { "epoch": 0.09993236697314786, "grad_norm": 6.448364487132646, "learning_rate": 9.872365822795917e-06, "loss": 17.2946, "step": 5467 }, { "epoch": 0.09995064616959438, "grad_norm": 8.653589408219817, "learning_rate": 9.872299358479369e-06, "loss": 18.6194, "step": 5468 }, { "epoch": 0.0999689253660409, "grad_norm": 8.562719262758154, "learning_rate": 9.872232877085835e-06, "loss": 18.3316, "step": 5469 }, { "epoch": 0.09998720456248743, "grad_norm": 8.422446447160278, "learning_rate": 9.872166378615547e-06, "loss": 18.5237, "step": 5470 }, { "epoch": 0.10000548375893396, "grad_norm": 7.674541089888339, "learning_rate": 9.87209986306874e-06, "loss": 17.6449, "step": 5471 }, { "epoch": 0.10002376295538049, "grad_norm": 6.9285136410043044, "learning_rate": 9.872033330445645e-06, "loss": 17.7269, "step": 5472 }, { "epoch": 0.10004204215182701, "grad_norm": 7.052208472841874, "learning_rate": 9.871966780746495e-06, "loss": 17.7994, "step": 5473 }, { "epoch": 0.10006032134827353, "grad_norm": 6.902007178782599, "learning_rate": 9.871900213971527e-06, "loss": 17.5682, "step": 5474 }, { "epoch": 0.10007860054472005, "grad_norm": 6.6627383854906626, "learning_rate": 9.871833630120968e-06, "loss": 17.4002, "step": 5475 }, { "epoch": 0.10009687974116657, "grad_norm": 7.244903023072964, "learning_rate": 9.871767029195058e-06, "loss": 17.9044, "step": 5476 }, { "epoch": 0.10011515893761311, "grad_norm": 7.31228976482119, "learning_rate": 9.871700411194025e-06, "loss": 17.7802, "step": 5477 }, { "epoch": 0.10013343813405963, "grad_norm": 7.614321893886776, "learning_rate": 9.871633776118106e-06, "loss": 17.8275, "step": 5478 }, { "epoch": 0.10015171733050615, "grad_norm": 8.593718313171816, "learning_rate": 9.871567123967533e-06, "loss": 18.3694, "step": 5479 }, { "epoch": 0.10016999652695267, "grad_norm": 7.081722185202732, "learning_rate": 9.87150045474254e-06, "loss": 17.7706, "step": 5480 }, { "epoch": 0.1001882757233992, "grad_norm": 7.434183754851058, "learning_rate": 9.87143376844336e-06, "loss": 17.7374, "step": 5481 }, { "epoch": 0.10020655491984572, "grad_norm": 8.14436549996752, "learning_rate": 9.871367065070228e-06, "loss": 18.0334, "step": 5482 }, { "epoch": 0.10022483411629225, "grad_norm": 6.6415783121783205, "learning_rate": 9.871300344623378e-06, "loss": 17.6521, "step": 5483 }, { "epoch": 0.10024311331273877, "grad_norm": 8.553573215241945, "learning_rate": 9.871233607103042e-06, "loss": 18.6418, "step": 5484 }, { "epoch": 0.1002613925091853, "grad_norm": 7.890120053246864, "learning_rate": 9.871166852509456e-06, "loss": 18.1467, "step": 5485 }, { "epoch": 0.10027967170563182, "grad_norm": 6.657974996457115, "learning_rate": 9.87110008084285e-06, "loss": 17.6226, "step": 5486 }, { "epoch": 0.10029795090207834, "grad_norm": 8.216213128635172, "learning_rate": 9.871033292103462e-06, "loss": 18.5853, "step": 5487 }, { "epoch": 0.10031623009852488, "grad_norm": 6.911988755346808, "learning_rate": 9.870966486291527e-06, "loss": 17.5637, "step": 5488 }, { "epoch": 0.1003345092949714, "grad_norm": 8.964046214265261, "learning_rate": 9.870899663407276e-06, "loss": 18.7191, "step": 5489 }, { "epoch": 0.10035278849141792, "grad_norm": 6.00516386692326, "learning_rate": 9.870832823450945e-06, "loss": 17.3014, "step": 5490 }, { "epoch": 0.10037106768786444, "grad_norm": 7.277179726087112, "learning_rate": 9.870765966422766e-06, "loss": 17.6824, "step": 5491 }, { "epoch": 0.10038934688431096, "grad_norm": 7.756126483330481, "learning_rate": 9.870699092322977e-06, "loss": 18.0805, "step": 5492 }, { "epoch": 0.10040762608075748, "grad_norm": 8.112557487227987, "learning_rate": 9.870632201151808e-06, "loss": 17.9869, "step": 5493 }, { "epoch": 0.10042590527720402, "grad_norm": 6.362690345339067, "learning_rate": 9.870565292909498e-06, "loss": 17.3655, "step": 5494 }, { "epoch": 0.10044418447365054, "grad_norm": 6.883410614087181, "learning_rate": 9.870498367596278e-06, "loss": 17.4808, "step": 5495 }, { "epoch": 0.10046246367009706, "grad_norm": 7.798277815013971, "learning_rate": 9.870431425212384e-06, "loss": 17.8913, "step": 5496 }, { "epoch": 0.10048074286654358, "grad_norm": 7.343465589360618, "learning_rate": 9.870364465758052e-06, "loss": 17.8053, "step": 5497 }, { "epoch": 0.1004990220629901, "grad_norm": 7.755293902232316, "learning_rate": 9.870297489233512e-06, "loss": 18.117, "step": 5498 }, { "epoch": 0.10051730125943663, "grad_norm": 6.895189993995462, "learning_rate": 9.870230495639004e-06, "loss": 17.591, "step": 5499 }, { "epoch": 0.10053558045588316, "grad_norm": 9.636986893842327, "learning_rate": 9.870163484974761e-06, "loss": 17.9947, "step": 5500 }, { "epoch": 0.10055385965232969, "grad_norm": 7.017073909550062, "learning_rate": 9.870096457241016e-06, "loss": 17.8571, "step": 5501 }, { "epoch": 0.10057213884877621, "grad_norm": 7.872760716791785, "learning_rate": 9.870029412438007e-06, "loss": 18.1904, "step": 5502 }, { "epoch": 0.10059041804522273, "grad_norm": 6.872200858062733, "learning_rate": 9.869962350565967e-06, "loss": 17.6585, "step": 5503 }, { "epoch": 0.10060869724166925, "grad_norm": 7.059145424434454, "learning_rate": 9.869895271625131e-06, "loss": 17.6791, "step": 5504 }, { "epoch": 0.10062697643811579, "grad_norm": 6.207781969423849, "learning_rate": 9.869828175615737e-06, "loss": 17.2415, "step": 5505 }, { "epoch": 0.10064525563456231, "grad_norm": 5.979674323002116, "learning_rate": 9.869761062538016e-06, "loss": 17.2384, "step": 5506 }, { "epoch": 0.10066353483100883, "grad_norm": 6.240413722493483, "learning_rate": 9.869693932392205e-06, "loss": 17.3594, "step": 5507 }, { "epoch": 0.10068181402745535, "grad_norm": 7.930051945652052, "learning_rate": 9.86962678517854e-06, "loss": 18.0924, "step": 5508 }, { "epoch": 0.10070009322390187, "grad_norm": 8.069429040495264, "learning_rate": 9.869559620897255e-06, "loss": 18.4979, "step": 5509 }, { "epoch": 0.1007183724203484, "grad_norm": 6.733855373581168, "learning_rate": 9.869492439548587e-06, "loss": 17.3737, "step": 5510 }, { "epoch": 0.10073665161679493, "grad_norm": 7.640919666323922, "learning_rate": 9.86942524113277e-06, "loss": 17.77, "step": 5511 }, { "epoch": 0.10075493081324145, "grad_norm": 6.810456747217254, "learning_rate": 9.86935802565004e-06, "loss": 17.5015, "step": 5512 }, { "epoch": 0.10077321000968797, "grad_norm": 7.358783064217443, "learning_rate": 9.869290793100631e-06, "loss": 17.9874, "step": 5513 }, { "epoch": 0.1007914892061345, "grad_norm": 7.266051700607326, "learning_rate": 9.869223543484782e-06, "loss": 17.5573, "step": 5514 }, { "epoch": 0.10080976840258102, "grad_norm": 7.477396230815225, "learning_rate": 9.869156276802729e-06, "loss": 17.7798, "step": 5515 }, { "epoch": 0.10082804759902754, "grad_norm": 8.008841306883555, "learning_rate": 9.869088993054703e-06, "loss": 18.2135, "step": 5516 }, { "epoch": 0.10084632679547408, "grad_norm": 7.280361453713527, "learning_rate": 9.869021692240943e-06, "loss": 17.9519, "step": 5517 }, { "epoch": 0.1008646059919206, "grad_norm": 6.833982330880781, "learning_rate": 9.868954374361685e-06, "loss": 17.7122, "step": 5518 }, { "epoch": 0.10088288518836712, "grad_norm": 8.068587522092342, "learning_rate": 9.868887039417163e-06, "loss": 18.1906, "step": 5519 }, { "epoch": 0.10090116438481364, "grad_norm": 7.51306169463813, "learning_rate": 9.868819687407616e-06, "loss": 17.8529, "step": 5520 }, { "epoch": 0.10091944358126016, "grad_norm": 6.7116284526960435, "learning_rate": 9.868752318333279e-06, "loss": 17.6203, "step": 5521 }, { "epoch": 0.1009377227777067, "grad_norm": 7.722773093298634, "learning_rate": 9.868684932194387e-06, "loss": 18.0917, "step": 5522 }, { "epoch": 0.10095600197415322, "grad_norm": 6.92277413530129, "learning_rate": 9.868617528991177e-06, "loss": 17.6773, "step": 5523 }, { "epoch": 0.10097428117059974, "grad_norm": 7.725717045192423, "learning_rate": 9.868550108723884e-06, "loss": 18.1749, "step": 5524 }, { "epoch": 0.10099256036704626, "grad_norm": 7.57412875694219, "learning_rate": 9.868482671392747e-06, "loss": 17.7129, "step": 5525 }, { "epoch": 0.10101083956349279, "grad_norm": 8.740221519151344, "learning_rate": 9.868415216998e-06, "loss": 18.6339, "step": 5526 }, { "epoch": 0.10102911875993931, "grad_norm": 7.5562932901323165, "learning_rate": 9.86834774553988e-06, "loss": 18.1447, "step": 5527 }, { "epoch": 0.10104739795638584, "grad_norm": 6.976972762478665, "learning_rate": 9.868280257018623e-06, "loss": 17.7476, "step": 5528 }, { "epoch": 0.10106567715283236, "grad_norm": 6.569964094907622, "learning_rate": 9.868212751434467e-06, "loss": 17.5981, "step": 5529 }, { "epoch": 0.10108395634927889, "grad_norm": 7.021708305851113, "learning_rate": 9.868145228787647e-06, "loss": 17.5095, "step": 5530 }, { "epoch": 0.10110223554572541, "grad_norm": 5.597428921716891, "learning_rate": 9.8680776890784e-06, "loss": 17.2427, "step": 5531 }, { "epoch": 0.10112051474217193, "grad_norm": 6.9568685905472805, "learning_rate": 9.868010132306965e-06, "loss": 17.5931, "step": 5532 }, { "epoch": 0.10113879393861845, "grad_norm": 8.334661748358617, "learning_rate": 9.867942558473575e-06, "loss": 18.3263, "step": 5533 }, { "epoch": 0.10115707313506499, "grad_norm": 8.3366835042184, "learning_rate": 9.86787496757847e-06, "loss": 18.0709, "step": 5534 }, { "epoch": 0.10117535233151151, "grad_norm": 7.667699375805215, "learning_rate": 9.867807359621885e-06, "loss": 17.7879, "step": 5535 }, { "epoch": 0.10119363152795803, "grad_norm": 8.824124664629514, "learning_rate": 9.867739734604059e-06, "loss": 18.2866, "step": 5536 }, { "epoch": 0.10121191072440455, "grad_norm": 7.017944125814402, "learning_rate": 9.867672092525224e-06, "loss": 17.7497, "step": 5537 }, { "epoch": 0.10123018992085107, "grad_norm": 8.18842017569041, "learning_rate": 9.867604433385625e-06, "loss": 18.0759, "step": 5538 }, { "epoch": 0.10124846911729761, "grad_norm": 7.997107795168364, "learning_rate": 9.867536757185491e-06, "loss": 18.5344, "step": 5539 }, { "epoch": 0.10126674831374413, "grad_norm": 7.065617845960572, "learning_rate": 9.867469063925065e-06, "loss": 17.7069, "step": 5540 }, { "epoch": 0.10128502751019065, "grad_norm": 8.911140431793497, "learning_rate": 9.867401353604582e-06, "loss": 18.6952, "step": 5541 }, { "epoch": 0.10130330670663718, "grad_norm": 7.068744400916903, "learning_rate": 9.867333626224282e-06, "loss": 17.6487, "step": 5542 }, { "epoch": 0.1013215859030837, "grad_norm": 6.838812273696042, "learning_rate": 9.867265881784399e-06, "loss": 17.5511, "step": 5543 }, { "epoch": 0.10133986509953022, "grad_norm": 7.812591132063666, "learning_rate": 9.867198120285169e-06, "loss": 18.1763, "step": 5544 }, { "epoch": 0.10135814429597675, "grad_norm": 6.59172368394794, "learning_rate": 9.867130341726835e-06, "loss": 17.3611, "step": 5545 }, { "epoch": 0.10137642349242328, "grad_norm": 5.833388458503554, "learning_rate": 9.867062546109627e-06, "loss": 17.216, "step": 5546 }, { "epoch": 0.1013947026888698, "grad_norm": 7.554618888036021, "learning_rate": 9.866994733433792e-06, "loss": 17.9252, "step": 5547 }, { "epoch": 0.10141298188531632, "grad_norm": 6.992305674904265, "learning_rate": 9.866926903699561e-06, "loss": 17.5893, "step": 5548 }, { "epoch": 0.10143126108176284, "grad_norm": 7.343410832162275, "learning_rate": 9.866859056907171e-06, "loss": 17.8714, "step": 5549 }, { "epoch": 0.10144954027820936, "grad_norm": 6.688629495531107, "learning_rate": 9.866791193056866e-06, "loss": 17.5182, "step": 5550 }, { "epoch": 0.1014678194746559, "grad_norm": 8.989511728832127, "learning_rate": 9.86672331214888e-06, "loss": 18.3834, "step": 5551 }, { "epoch": 0.10148609867110242, "grad_norm": 7.0848265652247235, "learning_rate": 9.86665541418345e-06, "loss": 17.724, "step": 5552 }, { "epoch": 0.10150437786754894, "grad_norm": 6.465072724557792, "learning_rate": 9.866587499160813e-06, "loss": 17.5554, "step": 5553 }, { "epoch": 0.10152265706399546, "grad_norm": 8.03355687173735, "learning_rate": 9.866519567081213e-06, "loss": 18.4101, "step": 5554 }, { "epoch": 0.10154093626044199, "grad_norm": 7.843843302158278, "learning_rate": 9.866451617944881e-06, "loss": 17.962, "step": 5555 }, { "epoch": 0.10155921545688852, "grad_norm": 8.061815569234346, "learning_rate": 9.86638365175206e-06, "loss": 18.1134, "step": 5556 }, { "epoch": 0.10157749465333504, "grad_norm": 6.913228063862076, "learning_rate": 9.866315668502986e-06, "loss": 17.8261, "step": 5557 }, { "epoch": 0.10159577384978156, "grad_norm": 7.602059052770785, "learning_rate": 9.8662476681979e-06, "loss": 17.9167, "step": 5558 }, { "epoch": 0.10161405304622809, "grad_norm": 7.284534676397395, "learning_rate": 9.866179650837035e-06, "loss": 17.9744, "step": 5559 }, { "epoch": 0.10163233224267461, "grad_norm": 8.078679404599374, "learning_rate": 9.866111616420635e-06, "loss": 17.7994, "step": 5560 }, { "epoch": 0.10165061143912113, "grad_norm": 7.880967909463967, "learning_rate": 9.866043564948935e-06, "loss": 17.8808, "step": 5561 }, { "epoch": 0.10166889063556767, "grad_norm": 7.9904514906741415, "learning_rate": 9.865975496422175e-06, "loss": 18.4223, "step": 5562 }, { "epoch": 0.10168716983201419, "grad_norm": 7.984160213272381, "learning_rate": 9.865907410840592e-06, "loss": 18.4557, "step": 5563 }, { "epoch": 0.10170544902846071, "grad_norm": 7.195063687482623, "learning_rate": 9.865839308204425e-06, "loss": 17.646, "step": 5564 }, { "epoch": 0.10172372822490723, "grad_norm": 8.913339901124274, "learning_rate": 9.865771188513917e-06, "loss": 18.6782, "step": 5565 }, { "epoch": 0.10174200742135375, "grad_norm": 6.952717261414831, "learning_rate": 9.8657030517693e-06, "loss": 17.8719, "step": 5566 }, { "epoch": 0.10176028661780027, "grad_norm": 7.409917137842797, "learning_rate": 9.865634897970817e-06, "loss": 18.1012, "step": 5567 }, { "epoch": 0.10177856581424681, "grad_norm": 6.704729071553811, "learning_rate": 9.865566727118708e-06, "loss": 17.6171, "step": 5568 }, { "epoch": 0.10179684501069333, "grad_norm": 6.115780679568604, "learning_rate": 9.865498539213207e-06, "loss": 17.1877, "step": 5569 }, { "epoch": 0.10181512420713985, "grad_norm": 7.119916928732996, "learning_rate": 9.865430334254557e-06, "loss": 17.8221, "step": 5570 }, { "epoch": 0.10183340340358638, "grad_norm": 6.134259618783842, "learning_rate": 9.865362112242995e-06, "loss": 17.0704, "step": 5571 }, { "epoch": 0.1018516826000329, "grad_norm": 7.978678414155514, "learning_rate": 9.865293873178762e-06, "loss": 18.0233, "step": 5572 }, { "epoch": 0.10186996179647943, "grad_norm": 7.215070387118926, "learning_rate": 9.865225617062096e-06, "loss": 17.801, "step": 5573 }, { "epoch": 0.10188824099292595, "grad_norm": 7.31919282192941, "learning_rate": 9.865157343893238e-06, "loss": 17.9607, "step": 5574 }, { "epoch": 0.10190652018937248, "grad_norm": 7.913734248080607, "learning_rate": 9.865089053672422e-06, "loss": 18.0854, "step": 5575 }, { "epoch": 0.101924799385819, "grad_norm": 6.924365059570353, "learning_rate": 9.865020746399894e-06, "loss": 17.7547, "step": 5576 }, { "epoch": 0.10194307858226552, "grad_norm": 8.725882960381123, "learning_rate": 9.864952422075889e-06, "loss": 18.7398, "step": 5577 }, { "epoch": 0.10196135777871204, "grad_norm": 7.353269797435892, "learning_rate": 9.864884080700648e-06, "loss": 18.0692, "step": 5578 }, { "epoch": 0.10197963697515858, "grad_norm": 6.545132518592989, "learning_rate": 9.86481572227441e-06, "loss": 17.5665, "step": 5579 }, { "epoch": 0.1019979161716051, "grad_norm": 7.193654720088067, "learning_rate": 9.864747346797416e-06, "loss": 17.8518, "step": 5580 }, { "epoch": 0.10201619536805162, "grad_norm": 6.715525651364767, "learning_rate": 9.864678954269904e-06, "loss": 17.4871, "step": 5581 }, { "epoch": 0.10203447456449814, "grad_norm": 6.364410358375826, "learning_rate": 9.864610544692115e-06, "loss": 17.2479, "step": 5582 }, { "epoch": 0.10205275376094466, "grad_norm": 7.787402952253585, "learning_rate": 9.864542118064289e-06, "loss": 18.2042, "step": 5583 }, { "epoch": 0.10207103295739119, "grad_norm": 8.656929520314254, "learning_rate": 9.864473674386663e-06, "loss": 18.2817, "step": 5584 }, { "epoch": 0.10208931215383772, "grad_norm": 7.1349927267277815, "learning_rate": 9.86440521365948e-06, "loss": 17.9565, "step": 5585 }, { "epoch": 0.10210759135028424, "grad_norm": 8.451405774295017, "learning_rate": 9.86433673588298e-06, "loss": 18.2044, "step": 5586 }, { "epoch": 0.10212587054673077, "grad_norm": 6.227900189800435, "learning_rate": 9.8642682410574e-06, "loss": 17.0415, "step": 5587 }, { "epoch": 0.10214414974317729, "grad_norm": 7.342311981082835, "learning_rate": 9.864199729182983e-06, "loss": 17.8797, "step": 5588 }, { "epoch": 0.10216242893962381, "grad_norm": 7.0039651354517805, "learning_rate": 9.864131200259967e-06, "loss": 17.7251, "step": 5589 }, { "epoch": 0.10218070813607034, "grad_norm": 6.62975218233901, "learning_rate": 9.864062654288595e-06, "loss": 17.4632, "step": 5590 }, { "epoch": 0.10219898733251687, "grad_norm": 7.889146122312645, "learning_rate": 9.863994091269104e-06, "loss": 18.0917, "step": 5591 }, { "epoch": 0.10221726652896339, "grad_norm": 7.2736825293141285, "learning_rate": 9.863925511201737e-06, "loss": 17.9487, "step": 5592 }, { "epoch": 0.10223554572540991, "grad_norm": 6.440638029377337, "learning_rate": 9.863856914086732e-06, "loss": 17.2682, "step": 5593 }, { "epoch": 0.10225382492185643, "grad_norm": 7.658628036651079, "learning_rate": 9.86378829992433e-06, "loss": 17.8727, "step": 5594 }, { "epoch": 0.10227210411830295, "grad_norm": 6.086356623607372, "learning_rate": 9.863719668714774e-06, "loss": 17.2037, "step": 5595 }, { "epoch": 0.10229038331474949, "grad_norm": 6.786964768966258, "learning_rate": 9.8636510204583e-06, "loss": 17.5606, "step": 5596 }, { "epoch": 0.10230866251119601, "grad_norm": 7.003310323853189, "learning_rate": 9.863582355155154e-06, "loss": 17.6939, "step": 5597 }, { "epoch": 0.10232694170764253, "grad_norm": 7.884423014166429, "learning_rate": 9.863513672805572e-06, "loss": 18.2019, "step": 5598 }, { "epoch": 0.10234522090408905, "grad_norm": 7.115546747698863, "learning_rate": 9.863444973409797e-06, "loss": 17.6195, "step": 5599 }, { "epoch": 0.10236350010053558, "grad_norm": 7.908531301386409, "learning_rate": 9.86337625696807e-06, "loss": 17.6112, "step": 5600 }, { "epoch": 0.1023817792969821, "grad_norm": 9.359206717718962, "learning_rate": 9.86330752348063e-06, "loss": 18.813, "step": 5601 }, { "epoch": 0.10240005849342863, "grad_norm": 7.512060769664707, "learning_rate": 9.86323877294772e-06, "loss": 18.0456, "step": 5602 }, { "epoch": 0.10241833768987516, "grad_norm": 7.484972278754374, "learning_rate": 9.863170005369581e-06, "loss": 17.7385, "step": 5603 }, { "epoch": 0.10243661688632168, "grad_norm": 7.909413744077415, "learning_rate": 9.863101220746452e-06, "loss": 18.1526, "step": 5604 }, { "epoch": 0.1024548960827682, "grad_norm": 6.501674884686735, "learning_rate": 9.863032419078576e-06, "loss": 17.4334, "step": 5605 }, { "epoch": 0.10247317527921472, "grad_norm": 7.725305001947346, "learning_rate": 9.862963600366193e-06, "loss": 17.8977, "step": 5606 }, { "epoch": 0.10249145447566126, "grad_norm": 7.262891534764362, "learning_rate": 9.862894764609545e-06, "loss": 17.6885, "step": 5607 }, { "epoch": 0.10250973367210778, "grad_norm": 5.806943049019033, "learning_rate": 9.862825911808872e-06, "loss": 17.182, "step": 5608 }, { "epoch": 0.1025280128685543, "grad_norm": 6.841754023206308, "learning_rate": 9.862757041964417e-06, "loss": 17.8432, "step": 5609 }, { "epoch": 0.10254629206500082, "grad_norm": 8.892583362257891, "learning_rate": 9.862688155076418e-06, "loss": 18.766, "step": 5610 }, { "epoch": 0.10256457126144734, "grad_norm": 6.444854193252805, "learning_rate": 9.862619251145123e-06, "loss": 17.5268, "step": 5611 }, { "epoch": 0.10258285045789386, "grad_norm": 7.675333679960771, "learning_rate": 9.862550330170767e-06, "loss": 17.7674, "step": 5612 }, { "epoch": 0.1026011296543404, "grad_norm": 6.055736683653771, "learning_rate": 9.862481392153595e-06, "loss": 17.2301, "step": 5613 }, { "epoch": 0.10261940885078692, "grad_norm": 7.384604145089058, "learning_rate": 9.862412437093846e-06, "loss": 17.5344, "step": 5614 }, { "epoch": 0.10263768804723344, "grad_norm": 7.949191918095978, "learning_rate": 9.862343464991765e-06, "loss": 17.8813, "step": 5615 }, { "epoch": 0.10265596724367997, "grad_norm": 8.488756267526043, "learning_rate": 9.862274475847591e-06, "loss": 18.6542, "step": 5616 }, { "epoch": 0.10267424644012649, "grad_norm": 8.487859338016493, "learning_rate": 9.862205469661567e-06, "loss": 18.5466, "step": 5617 }, { "epoch": 0.10269252563657301, "grad_norm": 7.933801626997772, "learning_rate": 9.862136446433936e-06, "loss": 18.0013, "step": 5618 }, { "epoch": 0.10271080483301955, "grad_norm": 7.866191316386422, "learning_rate": 9.862067406164939e-06, "loss": 17.9831, "step": 5619 }, { "epoch": 0.10272908402946607, "grad_norm": 6.736840911650169, "learning_rate": 9.861998348854815e-06, "loss": 17.8773, "step": 5620 }, { "epoch": 0.10274736322591259, "grad_norm": 8.099231571762177, "learning_rate": 9.861929274503812e-06, "loss": 18.3194, "step": 5621 }, { "epoch": 0.10276564242235911, "grad_norm": 8.313637183638072, "learning_rate": 9.861860183112167e-06, "loss": 18.4127, "step": 5622 }, { "epoch": 0.10278392161880563, "grad_norm": 8.375544465998125, "learning_rate": 9.861791074680123e-06, "loss": 18.2892, "step": 5623 }, { "epoch": 0.10280220081525217, "grad_norm": 7.308802441418126, "learning_rate": 9.861721949207924e-06, "loss": 17.8897, "step": 5624 }, { "epoch": 0.10282048001169869, "grad_norm": 7.4435089226258935, "learning_rate": 9.861652806695811e-06, "loss": 18.1091, "step": 5625 }, { "epoch": 0.10283875920814521, "grad_norm": 7.672842510905069, "learning_rate": 9.861583647144028e-06, "loss": 18.1591, "step": 5626 }, { "epoch": 0.10285703840459173, "grad_norm": 6.804388092466486, "learning_rate": 9.861514470552817e-06, "loss": 17.5523, "step": 5627 }, { "epoch": 0.10287531760103825, "grad_norm": 7.947931883356575, "learning_rate": 9.861445276922416e-06, "loss": 17.6266, "step": 5628 }, { "epoch": 0.10289359679748478, "grad_norm": 6.9495225887857135, "learning_rate": 9.861376066253075e-06, "loss": 17.6151, "step": 5629 }, { "epoch": 0.10291187599393131, "grad_norm": 7.0574007178605696, "learning_rate": 9.861306838545031e-06, "loss": 17.5255, "step": 5630 }, { "epoch": 0.10293015519037783, "grad_norm": 7.42909000481963, "learning_rate": 9.86123759379853e-06, "loss": 18.3477, "step": 5631 }, { "epoch": 0.10294843438682436, "grad_norm": 7.400343424466621, "learning_rate": 9.86116833201381e-06, "loss": 17.5692, "step": 5632 }, { "epoch": 0.10296671358327088, "grad_norm": 6.656659719531691, "learning_rate": 9.86109905319112e-06, "loss": 17.5363, "step": 5633 }, { "epoch": 0.1029849927797174, "grad_norm": 8.869011790536229, "learning_rate": 9.861029757330696e-06, "loss": 18.233, "step": 5634 }, { "epoch": 0.10300327197616392, "grad_norm": 10.78138637955777, "learning_rate": 9.860960444432788e-06, "loss": 19.547, "step": 5635 }, { "epoch": 0.10302155117261046, "grad_norm": 8.69105743680655, "learning_rate": 9.860891114497632e-06, "loss": 18.1938, "step": 5636 }, { "epoch": 0.10303983036905698, "grad_norm": 7.406666591015115, "learning_rate": 9.860821767525478e-06, "loss": 17.8067, "step": 5637 }, { "epoch": 0.1030581095655035, "grad_norm": 7.236512474195321, "learning_rate": 9.860752403516565e-06, "loss": 17.8224, "step": 5638 }, { "epoch": 0.10307638876195002, "grad_norm": 6.740368023501267, "learning_rate": 9.860683022471134e-06, "loss": 17.7572, "step": 5639 }, { "epoch": 0.10309466795839654, "grad_norm": 6.801684355389477, "learning_rate": 9.860613624389433e-06, "loss": 17.1892, "step": 5640 }, { "epoch": 0.10311294715484308, "grad_norm": 6.728856808681663, "learning_rate": 9.860544209271702e-06, "loss": 17.2071, "step": 5641 }, { "epoch": 0.1031312263512896, "grad_norm": 6.5262368131426785, "learning_rate": 9.860474777118186e-06, "loss": 17.3267, "step": 5642 }, { "epoch": 0.10314950554773612, "grad_norm": 6.389057697504511, "learning_rate": 9.860405327929127e-06, "loss": 17.3653, "step": 5643 }, { "epoch": 0.10316778474418264, "grad_norm": 8.191552019361835, "learning_rate": 9.860335861704771e-06, "loss": 18.0956, "step": 5644 }, { "epoch": 0.10318606394062917, "grad_norm": 6.7991328360954, "learning_rate": 9.860266378445357e-06, "loss": 17.534, "step": 5645 }, { "epoch": 0.10320434313707569, "grad_norm": 7.887544108450004, "learning_rate": 9.860196878151132e-06, "loss": 17.6669, "step": 5646 }, { "epoch": 0.10322262233352222, "grad_norm": 7.62854995909802, "learning_rate": 9.86012736082234e-06, "loss": 18.1849, "step": 5647 }, { "epoch": 0.10324090152996875, "grad_norm": 8.87544728712017, "learning_rate": 9.860057826459221e-06, "loss": 18.2109, "step": 5648 }, { "epoch": 0.10325918072641527, "grad_norm": 7.432743426683649, "learning_rate": 9.859988275062023e-06, "loss": 18.0735, "step": 5649 }, { "epoch": 0.10327745992286179, "grad_norm": 7.19049960693097, "learning_rate": 9.859918706630988e-06, "loss": 17.8247, "step": 5650 }, { "epoch": 0.10329573911930831, "grad_norm": 8.581469903701562, "learning_rate": 9.859849121166358e-06, "loss": 18.3868, "step": 5651 }, { "epoch": 0.10331401831575483, "grad_norm": 7.167860600574844, "learning_rate": 9.85977951866838e-06, "loss": 17.7402, "step": 5652 }, { "epoch": 0.10333229751220137, "grad_norm": 7.317461419644228, "learning_rate": 9.859709899137296e-06, "loss": 17.716, "step": 5653 }, { "epoch": 0.10335057670864789, "grad_norm": 8.126845056444337, "learning_rate": 9.85964026257335e-06, "loss": 17.937, "step": 5654 }, { "epoch": 0.10336885590509441, "grad_norm": 6.775535340147825, "learning_rate": 9.859570608976788e-06, "loss": 17.7823, "step": 5655 }, { "epoch": 0.10338713510154093, "grad_norm": 8.02478382232567, "learning_rate": 9.85950093834785e-06, "loss": 18.0325, "step": 5656 }, { "epoch": 0.10340541429798746, "grad_norm": 7.84770673825979, "learning_rate": 9.859431250686786e-06, "loss": 18.0718, "step": 5657 }, { "epoch": 0.10342369349443399, "grad_norm": 7.928305293440953, "learning_rate": 9.859361545993835e-06, "loss": 18.3205, "step": 5658 }, { "epoch": 0.10344197269088051, "grad_norm": 6.308012622949614, "learning_rate": 9.859291824269244e-06, "loss": 17.5742, "step": 5659 }, { "epoch": 0.10346025188732703, "grad_norm": 6.553638426485782, "learning_rate": 9.859222085513257e-06, "loss": 17.3746, "step": 5660 }, { "epoch": 0.10347853108377356, "grad_norm": 7.8237799859017985, "learning_rate": 9.859152329726119e-06, "loss": 18.0363, "step": 5661 }, { "epoch": 0.10349681028022008, "grad_norm": 8.70988408675445, "learning_rate": 9.859082556908074e-06, "loss": 18.5131, "step": 5662 }, { "epoch": 0.1035150894766666, "grad_norm": 5.983760418825584, "learning_rate": 9.859012767059364e-06, "loss": 17.1997, "step": 5663 }, { "epoch": 0.10353336867311314, "grad_norm": 6.314545435734154, "learning_rate": 9.858942960180236e-06, "loss": 17.3162, "step": 5664 }, { "epoch": 0.10355164786955966, "grad_norm": 7.783637790785526, "learning_rate": 9.858873136270936e-06, "loss": 18.0471, "step": 5665 }, { "epoch": 0.10356992706600618, "grad_norm": 8.911846521787316, "learning_rate": 9.858803295331708e-06, "loss": 18.5961, "step": 5666 }, { "epoch": 0.1035882062624527, "grad_norm": 7.104290413771939, "learning_rate": 9.858733437362794e-06, "loss": 17.9537, "step": 5667 }, { "epoch": 0.10360648545889922, "grad_norm": 7.441246196882701, "learning_rate": 9.858663562364442e-06, "loss": 17.6768, "step": 5668 }, { "epoch": 0.10362476465534574, "grad_norm": 6.230320373537345, "learning_rate": 9.858593670336896e-06, "loss": 17.5156, "step": 5669 }, { "epoch": 0.10364304385179228, "grad_norm": 7.054016654154026, "learning_rate": 9.8585237612804e-06, "loss": 18.0473, "step": 5670 }, { "epoch": 0.1036613230482388, "grad_norm": 6.80265935611233, "learning_rate": 9.858453835195201e-06, "loss": 17.611, "step": 5671 }, { "epoch": 0.10367960224468532, "grad_norm": 8.505054168058228, "learning_rate": 9.858383892081543e-06, "loss": 18.0694, "step": 5672 }, { "epoch": 0.10369788144113185, "grad_norm": 7.974423983390342, "learning_rate": 9.858313931939671e-06, "loss": 17.7729, "step": 5673 }, { "epoch": 0.10371616063757837, "grad_norm": 7.314855359130303, "learning_rate": 9.858243954769828e-06, "loss": 17.6772, "step": 5674 }, { "epoch": 0.1037344398340249, "grad_norm": 6.987436129085345, "learning_rate": 9.858173960572263e-06, "loss": 17.7026, "step": 5675 }, { "epoch": 0.10375271903047142, "grad_norm": 6.998286275181286, "learning_rate": 9.85810394934722e-06, "loss": 17.6099, "step": 5676 }, { "epoch": 0.10377099822691795, "grad_norm": 7.006892086593356, "learning_rate": 9.858033921094945e-06, "loss": 17.7675, "step": 5677 }, { "epoch": 0.10378927742336447, "grad_norm": 6.879778033254961, "learning_rate": 9.85796387581568e-06, "loss": 17.5298, "step": 5678 }, { "epoch": 0.10380755661981099, "grad_norm": 6.76985971738366, "learning_rate": 9.857893813509679e-06, "loss": 17.6621, "step": 5679 }, { "epoch": 0.10382583581625751, "grad_norm": 6.72691845560067, "learning_rate": 9.857823734177176e-06, "loss": 17.5424, "step": 5680 }, { "epoch": 0.10384411501270405, "grad_norm": 8.53200920093947, "learning_rate": 9.857753637818424e-06, "loss": 18.1764, "step": 5681 }, { "epoch": 0.10386239420915057, "grad_norm": 7.741867404671852, "learning_rate": 9.857683524433667e-06, "loss": 17.9676, "step": 5682 }, { "epoch": 0.10388067340559709, "grad_norm": 7.869945756666038, "learning_rate": 9.857613394023151e-06, "loss": 18.0102, "step": 5683 }, { "epoch": 0.10389895260204361, "grad_norm": 8.34587849748405, "learning_rate": 9.857543246587123e-06, "loss": 18.0476, "step": 5684 }, { "epoch": 0.10391723179849013, "grad_norm": 7.460410896072189, "learning_rate": 9.857473082125826e-06, "loss": 17.6671, "step": 5685 }, { "epoch": 0.10393551099493666, "grad_norm": 5.469985484096757, "learning_rate": 9.857402900639506e-06, "loss": 17.071, "step": 5686 }, { "epoch": 0.10395379019138319, "grad_norm": 7.54252787671895, "learning_rate": 9.857332702128413e-06, "loss": 18.142, "step": 5687 }, { "epoch": 0.10397206938782971, "grad_norm": 7.176446966677361, "learning_rate": 9.857262486592789e-06, "loss": 17.9131, "step": 5688 }, { "epoch": 0.10399034858427623, "grad_norm": 7.231311940389295, "learning_rate": 9.857192254032881e-06, "loss": 17.7265, "step": 5689 }, { "epoch": 0.10400862778072276, "grad_norm": 5.430374545880149, "learning_rate": 9.857122004448937e-06, "loss": 16.9641, "step": 5690 }, { "epoch": 0.10402690697716928, "grad_norm": 6.5152725917889835, "learning_rate": 9.8570517378412e-06, "loss": 17.713, "step": 5691 }, { "epoch": 0.10404518617361581, "grad_norm": 6.808305671955995, "learning_rate": 9.856981454209919e-06, "loss": 17.5756, "step": 5692 }, { "epoch": 0.10406346537006234, "grad_norm": 7.811642796693837, "learning_rate": 9.856911153555339e-06, "loss": 18.1096, "step": 5693 }, { "epoch": 0.10408174456650886, "grad_norm": 7.481230820798903, "learning_rate": 9.856840835877708e-06, "loss": 17.877, "step": 5694 }, { "epoch": 0.10410002376295538, "grad_norm": 8.124339711772812, "learning_rate": 9.85677050117727e-06, "loss": 18.1425, "step": 5695 }, { "epoch": 0.1041183029594019, "grad_norm": 7.089440559584176, "learning_rate": 9.856700149454274e-06, "loss": 18.2303, "step": 5696 }, { "epoch": 0.10413658215584842, "grad_norm": 6.9627328745022545, "learning_rate": 9.856629780708966e-06, "loss": 17.7311, "step": 5697 }, { "epoch": 0.10415486135229496, "grad_norm": 7.262370127311515, "learning_rate": 9.856559394941591e-06, "loss": 17.6369, "step": 5698 }, { "epoch": 0.10417314054874148, "grad_norm": 6.9301616574918805, "learning_rate": 9.856488992152398e-06, "loss": 17.872, "step": 5699 }, { "epoch": 0.104191419745188, "grad_norm": 8.421575270284432, "learning_rate": 9.85641857234163e-06, "loss": 18.5551, "step": 5700 }, { "epoch": 0.10420969894163452, "grad_norm": 8.354074911118072, "learning_rate": 9.85634813550954e-06, "loss": 18.44, "step": 5701 }, { "epoch": 0.10422797813808105, "grad_norm": 7.699607647268898, "learning_rate": 9.856277681656367e-06, "loss": 18.0969, "step": 5702 }, { "epoch": 0.10424625733452757, "grad_norm": 6.76819580678942, "learning_rate": 9.856207210782365e-06, "loss": 17.6372, "step": 5703 }, { "epoch": 0.1042645365309741, "grad_norm": 8.428388101099076, "learning_rate": 9.856136722887777e-06, "loss": 18.3778, "step": 5704 }, { "epoch": 0.10428281572742062, "grad_norm": 6.669753854352136, "learning_rate": 9.85606621797285e-06, "loss": 17.6416, "step": 5705 }, { "epoch": 0.10430109492386715, "grad_norm": 6.133803018299268, "learning_rate": 9.855995696037835e-06, "loss": 17.3228, "step": 5706 }, { "epoch": 0.10431937412031367, "grad_norm": 7.26969586228127, "learning_rate": 9.855925157082975e-06, "loss": 17.8272, "step": 5707 }, { "epoch": 0.10433765331676019, "grad_norm": 6.995612745175263, "learning_rate": 9.85585460110852e-06, "loss": 17.711, "step": 5708 }, { "epoch": 0.10435593251320673, "grad_norm": 6.963593528382055, "learning_rate": 9.855784028114715e-06, "loss": 17.802, "step": 5709 }, { "epoch": 0.10437421170965325, "grad_norm": 7.495000273619035, "learning_rate": 9.855713438101807e-06, "loss": 17.9456, "step": 5710 }, { "epoch": 0.10439249090609977, "grad_norm": 6.8303246224641585, "learning_rate": 9.855642831070046e-06, "loss": 17.7261, "step": 5711 }, { "epoch": 0.10441077010254629, "grad_norm": 8.105013767699267, "learning_rate": 9.85557220701968e-06, "loss": 18.0448, "step": 5712 }, { "epoch": 0.10442904929899281, "grad_norm": 7.403994990361161, "learning_rate": 9.85550156595095e-06, "loss": 17.8851, "step": 5713 }, { "epoch": 0.10444732849543933, "grad_norm": 7.770646153599739, "learning_rate": 9.855430907864113e-06, "loss": 18.2886, "step": 5714 }, { "epoch": 0.10446560769188587, "grad_norm": 7.474763922450885, "learning_rate": 9.85536023275941e-06, "loss": 17.843, "step": 5715 }, { "epoch": 0.10448388688833239, "grad_norm": 7.064507913735838, "learning_rate": 9.855289540637092e-06, "loss": 17.8636, "step": 5716 }, { "epoch": 0.10450216608477891, "grad_norm": 6.756623432684942, "learning_rate": 9.855218831497403e-06, "loss": 17.421, "step": 5717 }, { "epoch": 0.10452044528122544, "grad_norm": 7.261625310230588, "learning_rate": 9.855148105340595e-06, "loss": 17.5943, "step": 5718 }, { "epoch": 0.10453872447767196, "grad_norm": 7.1522997287521, "learning_rate": 9.855077362166914e-06, "loss": 17.6506, "step": 5719 }, { "epoch": 0.10455700367411848, "grad_norm": 7.21845105333718, "learning_rate": 9.855006601976608e-06, "loss": 17.5548, "step": 5720 }, { "epoch": 0.10457528287056501, "grad_norm": 5.992877911997206, "learning_rate": 9.854935824769926e-06, "loss": 17.5561, "step": 5721 }, { "epoch": 0.10459356206701154, "grad_norm": 8.51209455874131, "learning_rate": 9.854865030547115e-06, "loss": 18.3696, "step": 5722 }, { "epoch": 0.10461184126345806, "grad_norm": 5.996228208506489, "learning_rate": 9.854794219308424e-06, "loss": 17.2701, "step": 5723 }, { "epoch": 0.10463012045990458, "grad_norm": 6.118078868879499, "learning_rate": 9.854723391054097e-06, "loss": 17.5607, "step": 5724 }, { "epoch": 0.1046483996563511, "grad_norm": 7.117069409509465, "learning_rate": 9.854652545784388e-06, "loss": 17.538, "step": 5725 }, { "epoch": 0.10466667885279764, "grad_norm": 6.266101120638053, "learning_rate": 9.854581683499544e-06, "loss": 17.4521, "step": 5726 }, { "epoch": 0.10468495804924416, "grad_norm": 6.146426237738708, "learning_rate": 9.854510804199813e-06, "loss": 17.2019, "step": 5727 }, { "epoch": 0.10470323724569068, "grad_norm": 6.1851417483044235, "learning_rate": 9.854439907885441e-06, "loss": 17.437, "step": 5728 }, { "epoch": 0.1047215164421372, "grad_norm": 6.894948126716912, "learning_rate": 9.85436899455668e-06, "loss": 17.8609, "step": 5729 }, { "epoch": 0.10473979563858372, "grad_norm": 6.171758143473966, "learning_rate": 9.854298064213775e-06, "loss": 17.4036, "step": 5730 }, { "epoch": 0.10475807483503025, "grad_norm": 8.770487531018796, "learning_rate": 9.854227116856977e-06, "loss": 18.0638, "step": 5731 }, { "epoch": 0.10477635403147678, "grad_norm": 6.636523335196349, "learning_rate": 9.854156152486536e-06, "loss": 17.4718, "step": 5732 }, { "epoch": 0.1047946332279233, "grad_norm": 7.392082438690228, "learning_rate": 9.854085171102697e-06, "loss": 17.9873, "step": 5733 }, { "epoch": 0.10481291242436983, "grad_norm": 7.909454206722237, "learning_rate": 9.85401417270571e-06, "loss": 18.2473, "step": 5734 }, { "epoch": 0.10483119162081635, "grad_norm": 7.006888620312817, "learning_rate": 9.853943157295826e-06, "loss": 17.7185, "step": 5735 }, { "epoch": 0.10484947081726287, "grad_norm": 7.782189659230426, "learning_rate": 9.853872124873294e-06, "loss": 18.2295, "step": 5736 }, { "epoch": 0.10486775001370939, "grad_norm": 5.875710856288465, "learning_rate": 9.85380107543836e-06, "loss": 17.2333, "step": 5737 }, { "epoch": 0.10488602921015593, "grad_norm": 7.165194222622087, "learning_rate": 9.853730008991274e-06, "loss": 18.0052, "step": 5738 }, { "epoch": 0.10490430840660245, "grad_norm": 7.001827406262003, "learning_rate": 9.853658925532285e-06, "loss": 17.6866, "step": 5739 }, { "epoch": 0.10492258760304897, "grad_norm": 6.194436513322145, "learning_rate": 9.853587825061644e-06, "loss": 17.4752, "step": 5740 }, { "epoch": 0.10494086679949549, "grad_norm": 7.2999102677724155, "learning_rate": 9.853516707579598e-06, "loss": 17.6792, "step": 5741 }, { "epoch": 0.10495914599594201, "grad_norm": 11.076180042171714, "learning_rate": 9.853445573086396e-06, "loss": 18.2204, "step": 5742 }, { "epoch": 0.10497742519238855, "grad_norm": 6.769448177150094, "learning_rate": 9.853374421582291e-06, "loss": 17.4852, "step": 5743 }, { "epoch": 0.10499570438883507, "grad_norm": 7.339161168489227, "learning_rate": 9.853303253067528e-06, "loss": 17.9064, "step": 5744 }, { "epoch": 0.10501398358528159, "grad_norm": 8.842002938007269, "learning_rate": 9.853232067542358e-06, "loss": 18.4983, "step": 5745 }, { "epoch": 0.10503226278172811, "grad_norm": 7.149292050165079, "learning_rate": 9.853160865007032e-06, "loss": 17.9278, "step": 5746 }, { "epoch": 0.10505054197817464, "grad_norm": 7.63771705425234, "learning_rate": 9.853089645461798e-06, "loss": 18.2209, "step": 5747 }, { "epoch": 0.10506882117462116, "grad_norm": 7.203846304017191, "learning_rate": 9.853018408906905e-06, "loss": 17.4716, "step": 5748 }, { "epoch": 0.1050871003710677, "grad_norm": 7.158352887577222, "learning_rate": 9.852947155342606e-06, "loss": 17.6188, "step": 5749 }, { "epoch": 0.10510537956751421, "grad_norm": 8.11297657185183, "learning_rate": 9.852875884769146e-06, "loss": 18.0543, "step": 5750 }, { "epoch": 0.10512365876396074, "grad_norm": 7.727460131316621, "learning_rate": 9.852804597186777e-06, "loss": 17.9759, "step": 5751 }, { "epoch": 0.10514193796040726, "grad_norm": 7.057238262452905, "learning_rate": 9.85273329259575e-06, "loss": 17.6428, "step": 5752 }, { "epoch": 0.10516021715685378, "grad_norm": 6.714331539931519, "learning_rate": 9.852661970996314e-06, "loss": 17.4928, "step": 5753 }, { "epoch": 0.1051784963533003, "grad_norm": 7.166353058410686, "learning_rate": 9.852590632388719e-06, "loss": 17.9264, "step": 5754 }, { "epoch": 0.10519677554974684, "grad_norm": 7.137643891664648, "learning_rate": 9.852519276773214e-06, "loss": 17.9394, "step": 5755 }, { "epoch": 0.10521505474619336, "grad_norm": 7.616904490886513, "learning_rate": 9.85244790415005e-06, "loss": 18.1653, "step": 5756 }, { "epoch": 0.10523333394263988, "grad_norm": 7.455770390212299, "learning_rate": 9.852376514519478e-06, "loss": 18.1438, "step": 5757 }, { "epoch": 0.1052516131390864, "grad_norm": 9.109545469929985, "learning_rate": 9.852305107881747e-06, "loss": 18.4404, "step": 5758 }, { "epoch": 0.10526989233553292, "grad_norm": 7.181036814075525, "learning_rate": 9.852233684237107e-06, "loss": 17.8544, "step": 5759 }, { "epoch": 0.10528817153197946, "grad_norm": 6.999738257101895, "learning_rate": 9.85216224358581e-06, "loss": 17.8273, "step": 5760 }, { "epoch": 0.10530645072842598, "grad_norm": 8.094214736012356, "learning_rate": 9.852090785928105e-06, "loss": 18.148, "step": 5761 }, { "epoch": 0.1053247299248725, "grad_norm": 7.561774993680934, "learning_rate": 9.852019311264242e-06, "loss": 18.1062, "step": 5762 }, { "epoch": 0.10534300912131903, "grad_norm": 6.070270020687608, "learning_rate": 9.851947819594474e-06, "loss": 17.2214, "step": 5763 }, { "epoch": 0.10536128831776555, "grad_norm": 6.835985532335688, "learning_rate": 9.85187631091905e-06, "loss": 17.7722, "step": 5764 }, { "epoch": 0.10537956751421207, "grad_norm": 7.871504429435472, "learning_rate": 9.85180478523822e-06, "loss": 18.1517, "step": 5765 }, { "epoch": 0.1053978467106586, "grad_norm": 7.179465741023101, "learning_rate": 9.851733242552234e-06, "loss": 17.8843, "step": 5766 }, { "epoch": 0.10541612590710513, "grad_norm": 8.111602194577884, "learning_rate": 9.851661682861346e-06, "loss": 18.4305, "step": 5767 }, { "epoch": 0.10543440510355165, "grad_norm": 7.511389701700761, "learning_rate": 9.851590106165802e-06, "loss": 18.0181, "step": 5768 }, { "epoch": 0.10545268429999817, "grad_norm": 7.887199715998375, "learning_rate": 9.851518512465858e-06, "loss": 17.847, "step": 5769 }, { "epoch": 0.10547096349644469, "grad_norm": 7.949776827589897, "learning_rate": 9.851446901761763e-06, "loss": 18.3067, "step": 5770 }, { "epoch": 0.10548924269289121, "grad_norm": 7.935419213525369, "learning_rate": 9.851375274053766e-06, "loss": 18.0919, "step": 5771 }, { "epoch": 0.10550752188933775, "grad_norm": 8.700440051234581, "learning_rate": 9.851303629342122e-06, "loss": 18.42, "step": 5772 }, { "epoch": 0.10552580108578427, "grad_norm": 7.515220487706339, "learning_rate": 9.851231967627077e-06, "loss": 17.6783, "step": 5773 }, { "epoch": 0.10554408028223079, "grad_norm": 7.473438351789878, "learning_rate": 9.851160288908885e-06, "loss": 17.8782, "step": 5774 }, { "epoch": 0.10556235947867731, "grad_norm": 7.902432249555323, "learning_rate": 9.851088593187798e-06, "loss": 17.3811, "step": 5775 }, { "epoch": 0.10558063867512384, "grad_norm": 7.851866298826325, "learning_rate": 9.851016880464069e-06, "loss": 18.4099, "step": 5776 }, { "epoch": 0.10559891787157037, "grad_norm": 7.944718451043134, "learning_rate": 9.850945150737943e-06, "loss": 18.0104, "step": 5777 }, { "epoch": 0.1056171970680169, "grad_norm": 6.751090297928792, "learning_rate": 9.850873404009678e-06, "loss": 17.6698, "step": 5778 }, { "epoch": 0.10563547626446342, "grad_norm": 6.202448126840248, "learning_rate": 9.85080164027952e-06, "loss": 17.3501, "step": 5779 }, { "epoch": 0.10565375546090994, "grad_norm": 9.054764255352312, "learning_rate": 9.850729859547725e-06, "loss": 18.7583, "step": 5780 }, { "epoch": 0.10567203465735646, "grad_norm": 8.027026165249456, "learning_rate": 9.850658061814542e-06, "loss": 18.2361, "step": 5781 }, { "epoch": 0.10569031385380298, "grad_norm": 6.088920807800374, "learning_rate": 9.850586247080222e-06, "loss": 17.3435, "step": 5782 }, { "epoch": 0.10570859305024952, "grad_norm": 8.14422214983587, "learning_rate": 9.850514415345021e-06, "loss": 17.9607, "step": 5783 }, { "epoch": 0.10572687224669604, "grad_norm": 13.53775301378071, "learning_rate": 9.850442566609186e-06, "loss": 18.0239, "step": 5784 }, { "epoch": 0.10574515144314256, "grad_norm": 7.6884599728396985, "learning_rate": 9.850370700872971e-06, "loss": 18.2373, "step": 5785 }, { "epoch": 0.10576343063958908, "grad_norm": 6.910462429635305, "learning_rate": 9.85029881813663e-06, "loss": 17.3886, "step": 5786 }, { "epoch": 0.1057817098360356, "grad_norm": 6.389760222001589, "learning_rate": 9.85022691840041e-06, "loss": 17.8356, "step": 5787 }, { "epoch": 0.10579998903248213, "grad_norm": 7.11371599600536, "learning_rate": 9.850155001664565e-06, "loss": 18.0679, "step": 5788 }, { "epoch": 0.10581826822892866, "grad_norm": 8.723889814178113, "learning_rate": 9.85008306792935e-06, "loss": 17.9585, "step": 5789 }, { "epoch": 0.10583654742537518, "grad_norm": 8.407616307477696, "learning_rate": 9.850011117195013e-06, "loss": 18.2149, "step": 5790 }, { "epoch": 0.1058548266218217, "grad_norm": 7.7439217350300025, "learning_rate": 9.849939149461807e-06, "loss": 18.1134, "step": 5791 }, { "epoch": 0.10587310581826823, "grad_norm": 7.181175190486328, "learning_rate": 9.849867164729987e-06, "loss": 17.6784, "step": 5792 }, { "epoch": 0.10589138501471475, "grad_norm": 7.445632998724882, "learning_rate": 9.849795162999803e-06, "loss": 17.9333, "step": 5793 }, { "epoch": 0.10590966421116128, "grad_norm": 18.655089934948883, "learning_rate": 9.849723144271508e-06, "loss": 18.4331, "step": 5794 }, { "epoch": 0.1059279434076078, "grad_norm": 8.358368378192516, "learning_rate": 9.849651108545352e-06, "loss": 18.5816, "step": 5795 }, { "epoch": 0.10594622260405433, "grad_norm": 7.184188658641208, "learning_rate": 9.849579055821593e-06, "loss": 17.4722, "step": 5796 }, { "epoch": 0.10596450180050085, "grad_norm": 6.01263250028207, "learning_rate": 9.84950698610048e-06, "loss": 17.2748, "step": 5797 }, { "epoch": 0.10598278099694737, "grad_norm": 6.957652918901836, "learning_rate": 9.849434899382262e-06, "loss": 17.7175, "step": 5798 }, { "epoch": 0.10600106019339389, "grad_norm": 7.834313757092882, "learning_rate": 9.8493627956672e-06, "loss": 17.8026, "step": 5799 }, { "epoch": 0.10601933938984043, "grad_norm": 6.410711562250967, "learning_rate": 9.84929067495554e-06, "loss": 17.3805, "step": 5800 }, { "epoch": 0.10603761858628695, "grad_norm": 7.576663577969768, "learning_rate": 9.849218537247536e-06, "loss": 17.9673, "step": 5801 }, { "epoch": 0.10605589778273347, "grad_norm": 7.310560435954196, "learning_rate": 9.849146382543445e-06, "loss": 18.0013, "step": 5802 }, { "epoch": 0.10607417697918, "grad_norm": 9.029088680320438, "learning_rate": 9.849074210843513e-06, "loss": 18.54, "step": 5803 }, { "epoch": 0.10609245617562651, "grad_norm": 7.417258750524977, "learning_rate": 9.849002022148e-06, "loss": 18.0685, "step": 5804 }, { "epoch": 0.10611073537207304, "grad_norm": 8.217334091701655, "learning_rate": 9.848929816457153e-06, "loss": 18.4651, "step": 5805 }, { "epoch": 0.10612901456851957, "grad_norm": 8.143445762336139, "learning_rate": 9.848857593771228e-06, "loss": 18.1567, "step": 5806 }, { "epoch": 0.1061472937649661, "grad_norm": 7.917228134414341, "learning_rate": 9.84878535409048e-06, "loss": 18.3476, "step": 5807 }, { "epoch": 0.10616557296141262, "grad_norm": 7.883135615970315, "learning_rate": 9.848713097415159e-06, "loss": 18.3613, "step": 5808 }, { "epoch": 0.10618385215785914, "grad_norm": 6.170311486863312, "learning_rate": 9.848640823745518e-06, "loss": 17.4302, "step": 5809 }, { "epoch": 0.10620213135430566, "grad_norm": 7.893547079062363, "learning_rate": 9.848568533081815e-06, "loss": 18.4974, "step": 5810 }, { "epoch": 0.1062204105507522, "grad_norm": 5.988982428265839, "learning_rate": 9.848496225424298e-06, "loss": 17.2988, "step": 5811 }, { "epoch": 0.10623868974719872, "grad_norm": 7.861364822837715, "learning_rate": 9.84842390077322e-06, "loss": 18.2354, "step": 5812 }, { "epoch": 0.10625696894364524, "grad_norm": 6.942108917709645, "learning_rate": 9.848351559128842e-06, "loss": 17.7698, "step": 5813 }, { "epoch": 0.10627524814009176, "grad_norm": 6.922394060836148, "learning_rate": 9.848279200491409e-06, "loss": 17.5157, "step": 5814 }, { "epoch": 0.10629352733653828, "grad_norm": 6.213133657148186, "learning_rate": 9.848206824861179e-06, "loss": 17.3671, "step": 5815 }, { "epoch": 0.1063118065329848, "grad_norm": 7.1841358065442265, "learning_rate": 9.848134432238404e-06, "loss": 17.5998, "step": 5816 }, { "epoch": 0.10633008572943134, "grad_norm": 6.10260872789878, "learning_rate": 9.84806202262334e-06, "loss": 17.3957, "step": 5817 }, { "epoch": 0.10634836492587786, "grad_norm": 8.596100792519103, "learning_rate": 9.847989596016239e-06, "loss": 18.6582, "step": 5818 }, { "epoch": 0.10636664412232438, "grad_norm": 7.174812522740201, "learning_rate": 9.847917152417354e-06, "loss": 18.0218, "step": 5819 }, { "epoch": 0.1063849233187709, "grad_norm": 7.463897320160827, "learning_rate": 9.847844691826941e-06, "loss": 17.8591, "step": 5820 }, { "epoch": 0.10640320251521743, "grad_norm": 6.616699430849265, "learning_rate": 9.847772214245252e-06, "loss": 17.6413, "step": 5821 }, { "epoch": 0.10642148171166395, "grad_norm": 7.484435459009545, "learning_rate": 9.847699719672543e-06, "loss": 17.8048, "step": 5822 }, { "epoch": 0.10643976090811048, "grad_norm": 7.785509005815104, "learning_rate": 9.847627208109067e-06, "loss": 18.2513, "step": 5823 }, { "epoch": 0.106458040104557, "grad_norm": 7.029673768842909, "learning_rate": 9.847554679555078e-06, "loss": 17.6034, "step": 5824 }, { "epoch": 0.10647631930100353, "grad_norm": 7.060688571159415, "learning_rate": 9.847482134010833e-06, "loss": 17.9917, "step": 5825 }, { "epoch": 0.10649459849745005, "grad_norm": 7.408573515883231, "learning_rate": 9.84740957147658e-06, "loss": 18.0168, "step": 5826 }, { "epoch": 0.10651287769389657, "grad_norm": 6.007931771023013, "learning_rate": 9.84733699195258e-06, "loss": 17.0875, "step": 5827 }, { "epoch": 0.1065311568903431, "grad_norm": 6.920791435860862, "learning_rate": 9.847264395439083e-06, "loss": 17.8657, "step": 5828 }, { "epoch": 0.10654943608678963, "grad_norm": 7.043542400746885, "learning_rate": 9.847191781936344e-06, "loss": 17.6116, "step": 5829 }, { "epoch": 0.10656771528323615, "grad_norm": 8.035065072369735, "learning_rate": 9.84711915144462e-06, "loss": 17.736, "step": 5830 }, { "epoch": 0.10658599447968267, "grad_norm": 6.635212426764212, "learning_rate": 9.847046503964165e-06, "loss": 17.4266, "step": 5831 }, { "epoch": 0.1066042736761292, "grad_norm": 7.135348780517533, "learning_rate": 9.84697383949523e-06, "loss": 17.7999, "step": 5832 }, { "epoch": 0.10662255287257572, "grad_norm": 7.415046581608795, "learning_rate": 9.846901158038074e-06, "loss": 17.75, "step": 5833 }, { "epoch": 0.10664083206902225, "grad_norm": 7.4214861048291, "learning_rate": 9.846828459592949e-06, "loss": 17.939, "step": 5834 }, { "epoch": 0.10665911126546877, "grad_norm": 6.618460113089114, "learning_rate": 9.84675574416011e-06, "loss": 17.8388, "step": 5835 }, { "epoch": 0.1066773904619153, "grad_norm": 7.193582755325592, "learning_rate": 9.846683011739814e-06, "loss": 17.7923, "step": 5836 }, { "epoch": 0.10669566965836182, "grad_norm": 6.276348611044246, "learning_rate": 9.846610262332316e-06, "loss": 17.6146, "step": 5837 }, { "epoch": 0.10671394885480834, "grad_norm": 6.945116714956209, "learning_rate": 9.846537495937868e-06, "loss": 17.5195, "step": 5838 }, { "epoch": 0.10673222805125486, "grad_norm": 7.042928447394009, "learning_rate": 9.846464712556727e-06, "loss": 17.6989, "step": 5839 }, { "epoch": 0.1067505072477014, "grad_norm": 8.330215381609719, "learning_rate": 9.846391912189147e-06, "loss": 18.2885, "step": 5840 }, { "epoch": 0.10676878644414792, "grad_norm": 8.290067979149216, "learning_rate": 9.846319094835385e-06, "loss": 17.9417, "step": 5841 }, { "epoch": 0.10678706564059444, "grad_norm": 7.494712764272132, "learning_rate": 9.846246260495694e-06, "loss": 17.9946, "step": 5842 }, { "epoch": 0.10680534483704096, "grad_norm": 7.461643328013089, "learning_rate": 9.846173409170333e-06, "loss": 17.9484, "step": 5843 }, { "epoch": 0.10682362403348748, "grad_norm": 6.459782126537119, "learning_rate": 9.846100540859552e-06, "loss": 17.4873, "step": 5844 }, { "epoch": 0.10684190322993402, "grad_norm": 7.096034223286772, "learning_rate": 9.846027655563608e-06, "loss": 17.851, "step": 5845 }, { "epoch": 0.10686018242638054, "grad_norm": 6.840368061161655, "learning_rate": 9.84595475328276e-06, "loss": 17.6252, "step": 5846 }, { "epoch": 0.10687846162282706, "grad_norm": 8.080219112226988, "learning_rate": 9.84588183401726e-06, "loss": 18.2538, "step": 5847 }, { "epoch": 0.10689674081927358, "grad_norm": 7.212462871658821, "learning_rate": 9.845808897767366e-06, "loss": 17.5176, "step": 5848 }, { "epoch": 0.1069150200157201, "grad_norm": 6.984673547302289, "learning_rate": 9.845735944533333e-06, "loss": 17.6274, "step": 5849 }, { "epoch": 0.10693329921216663, "grad_norm": 7.583678677218762, "learning_rate": 9.845662974315413e-06, "loss": 17.9241, "step": 5850 }, { "epoch": 0.10695157840861316, "grad_norm": 6.838934157794366, "learning_rate": 9.845589987113866e-06, "loss": 17.6212, "step": 5851 }, { "epoch": 0.10696985760505968, "grad_norm": 8.029517620405162, "learning_rate": 9.845516982928948e-06, "loss": 17.9396, "step": 5852 }, { "epoch": 0.1069881368015062, "grad_norm": 7.383262938437746, "learning_rate": 9.845443961760912e-06, "loss": 17.9403, "step": 5853 }, { "epoch": 0.10700641599795273, "grad_norm": 7.23154303085679, "learning_rate": 9.845370923610016e-06, "loss": 18.1187, "step": 5854 }, { "epoch": 0.10702469519439925, "grad_norm": 9.145868149614028, "learning_rate": 9.845297868476515e-06, "loss": 18.387, "step": 5855 }, { "epoch": 0.10704297439084577, "grad_norm": 8.650895626845806, "learning_rate": 9.845224796360666e-06, "loss": 18.1168, "step": 5856 }, { "epoch": 0.10706125358729231, "grad_norm": 6.243190581430269, "learning_rate": 9.845151707262724e-06, "loss": 17.367, "step": 5857 }, { "epoch": 0.10707953278373883, "grad_norm": 8.252484291615621, "learning_rate": 9.845078601182945e-06, "loss": 18.4255, "step": 5858 }, { "epoch": 0.10709781198018535, "grad_norm": 6.875617965557466, "learning_rate": 9.845005478121588e-06, "loss": 17.7237, "step": 5859 }, { "epoch": 0.10711609117663187, "grad_norm": 11.891019983136049, "learning_rate": 9.844932338078905e-06, "loss": 18.2797, "step": 5860 }, { "epoch": 0.1071343703730784, "grad_norm": 8.115981877473052, "learning_rate": 9.844859181055155e-06, "loss": 18.3821, "step": 5861 }, { "epoch": 0.10715264956952493, "grad_norm": 8.74529679768113, "learning_rate": 9.844786007050595e-06, "loss": 18.3951, "step": 5862 }, { "epoch": 0.10717092876597145, "grad_norm": 7.823005414652111, "learning_rate": 9.84471281606548e-06, "loss": 17.94, "step": 5863 }, { "epoch": 0.10718920796241797, "grad_norm": 8.660947518752678, "learning_rate": 9.844639608100066e-06, "loss": 18.5589, "step": 5864 }, { "epoch": 0.1072074871588645, "grad_norm": 7.86980803121859, "learning_rate": 9.844566383154613e-06, "loss": 18.2445, "step": 5865 }, { "epoch": 0.10722576635531102, "grad_norm": 7.739345685288175, "learning_rate": 9.844493141229374e-06, "loss": 17.8264, "step": 5866 }, { "epoch": 0.10724404555175754, "grad_norm": 7.16751922221443, "learning_rate": 9.844419882324606e-06, "loss": 17.4311, "step": 5867 }, { "epoch": 0.10726232474820407, "grad_norm": 6.227577904831335, "learning_rate": 9.844346606440566e-06, "loss": 17.5006, "step": 5868 }, { "epoch": 0.1072806039446506, "grad_norm": 8.450923211490991, "learning_rate": 9.844273313577516e-06, "loss": 18.5062, "step": 5869 }, { "epoch": 0.10729888314109712, "grad_norm": 6.767150093959317, "learning_rate": 9.844200003735703e-06, "loss": 17.5192, "step": 5870 }, { "epoch": 0.10731716233754364, "grad_norm": 7.859555479293641, "learning_rate": 9.844126676915393e-06, "loss": 17.8175, "step": 5871 }, { "epoch": 0.10733544153399016, "grad_norm": 7.985006211089855, "learning_rate": 9.844053333116836e-06, "loss": 18.0713, "step": 5872 }, { "epoch": 0.1073537207304367, "grad_norm": 8.03526743666188, "learning_rate": 9.843979972340295e-06, "loss": 17.9783, "step": 5873 }, { "epoch": 0.10737199992688322, "grad_norm": 7.193838453325093, "learning_rate": 9.843906594586025e-06, "loss": 17.7052, "step": 5874 }, { "epoch": 0.10739027912332974, "grad_norm": 7.0876536261099305, "learning_rate": 9.84383319985428e-06, "loss": 17.8391, "step": 5875 }, { "epoch": 0.10740855831977626, "grad_norm": 8.60170433420796, "learning_rate": 9.843759788145323e-06, "loss": 18.1661, "step": 5876 }, { "epoch": 0.10742683751622278, "grad_norm": 8.318778188731175, "learning_rate": 9.843686359459406e-06, "loss": 18.1817, "step": 5877 }, { "epoch": 0.1074451167126693, "grad_norm": 6.712672907800277, "learning_rate": 9.84361291379679e-06, "loss": 17.5527, "step": 5878 }, { "epoch": 0.10746339590911584, "grad_norm": 8.150534231108727, "learning_rate": 9.84353945115773e-06, "loss": 18.1581, "step": 5879 }, { "epoch": 0.10748167510556236, "grad_norm": 6.359388740830054, "learning_rate": 9.843465971542485e-06, "loss": 17.3468, "step": 5880 }, { "epoch": 0.10749995430200888, "grad_norm": 6.293291538227234, "learning_rate": 9.843392474951312e-06, "loss": 17.4126, "step": 5881 }, { "epoch": 0.1075182334984554, "grad_norm": 7.168148977752168, "learning_rate": 9.843318961384469e-06, "loss": 17.7767, "step": 5882 }, { "epoch": 0.10753651269490193, "grad_norm": 7.088881635224715, "learning_rate": 9.843245430842215e-06, "loss": 17.7949, "step": 5883 }, { "epoch": 0.10755479189134845, "grad_norm": 6.50650777081013, "learning_rate": 9.843171883324802e-06, "loss": 17.6451, "step": 5884 }, { "epoch": 0.10757307108779499, "grad_norm": 6.569283480801569, "learning_rate": 9.843098318832495e-06, "loss": 17.816, "step": 5885 }, { "epoch": 0.10759135028424151, "grad_norm": 7.836204033462986, "learning_rate": 9.843024737365548e-06, "loss": 17.3799, "step": 5886 }, { "epoch": 0.10760962948068803, "grad_norm": 8.043309457711423, "learning_rate": 9.84295113892422e-06, "loss": 18.2676, "step": 5887 }, { "epoch": 0.10762790867713455, "grad_norm": 7.7764713348071695, "learning_rate": 9.842877523508766e-06, "loss": 18.2114, "step": 5888 }, { "epoch": 0.10764618787358107, "grad_norm": 8.185116511570026, "learning_rate": 9.842803891119448e-06, "loss": 17.9973, "step": 5889 }, { "epoch": 0.10766446707002761, "grad_norm": 7.8794932825997295, "learning_rate": 9.842730241756524e-06, "loss": 18.078, "step": 5890 }, { "epoch": 0.10768274626647413, "grad_norm": 6.539034919480205, "learning_rate": 9.842656575420248e-06, "loss": 17.4952, "step": 5891 }, { "epoch": 0.10770102546292065, "grad_norm": 6.66230941878411, "learning_rate": 9.842582892110884e-06, "loss": 17.3589, "step": 5892 }, { "epoch": 0.10771930465936717, "grad_norm": 7.067289793977617, "learning_rate": 9.842509191828686e-06, "loss": 17.7478, "step": 5893 }, { "epoch": 0.1077375838558137, "grad_norm": 7.951195424808459, "learning_rate": 9.842435474573912e-06, "loss": 17.9751, "step": 5894 }, { "epoch": 0.10775586305226022, "grad_norm": 6.353737087620511, "learning_rate": 9.842361740346824e-06, "loss": 17.5808, "step": 5895 }, { "epoch": 0.10777414224870675, "grad_norm": 6.010220626394896, "learning_rate": 9.842287989147676e-06, "loss": 17.3642, "step": 5896 }, { "epoch": 0.10779242144515327, "grad_norm": 7.558035373074513, "learning_rate": 9.84221422097673e-06, "loss": 17.7662, "step": 5897 }, { "epoch": 0.1078107006415998, "grad_norm": 7.404483922619043, "learning_rate": 9.842140435834245e-06, "loss": 17.7805, "step": 5898 }, { "epoch": 0.10782897983804632, "grad_norm": 7.124061337922959, "learning_rate": 9.842066633720477e-06, "loss": 17.914, "step": 5899 }, { "epoch": 0.10784725903449284, "grad_norm": 6.873473487171974, "learning_rate": 9.841992814635683e-06, "loss": 17.6703, "step": 5900 }, { "epoch": 0.10786553823093936, "grad_norm": 8.722661879244907, "learning_rate": 9.841918978580128e-06, "loss": 18.4554, "step": 5901 }, { "epoch": 0.1078838174273859, "grad_norm": 6.38762554509518, "learning_rate": 9.841845125554067e-06, "loss": 17.3544, "step": 5902 }, { "epoch": 0.10790209662383242, "grad_norm": 6.1568219449726405, "learning_rate": 9.841771255557757e-06, "loss": 17.3558, "step": 5903 }, { "epoch": 0.10792037582027894, "grad_norm": 6.749550000349546, "learning_rate": 9.841697368591458e-06, "loss": 17.4397, "step": 5904 }, { "epoch": 0.10793865501672546, "grad_norm": 5.768599118609642, "learning_rate": 9.841623464655433e-06, "loss": 17.223, "step": 5905 }, { "epoch": 0.10795693421317198, "grad_norm": 6.995410955249487, "learning_rate": 9.841549543749935e-06, "loss": 17.6919, "step": 5906 }, { "epoch": 0.10797521340961852, "grad_norm": 6.957556811186082, "learning_rate": 9.841475605875227e-06, "loss": 17.8107, "step": 5907 }, { "epoch": 0.10799349260606504, "grad_norm": 12.347647844029785, "learning_rate": 9.841401651031568e-06, "loss": 18.7845, "step": 5908 }, { "epoch": 0.10801177180251156, "grad_norm": 6.269954101478955, "learning_rate": 9.841327679219214e-06, "loss": 17.7051, "step": 5909 }, { "epoch": 0.10803005099895809, "grad_norm": 6.948777492521897, "learning_rate": 9.841253690438429e-06, "loss": 17.6624, "step": 5910 }, { "epoch": 0.10804833019540461, "grad_norm": 5.970114760504495, "learning_rate": 9.84117968468947e-06, "loss": 17.1389, "step": 5911 }, { "epoch": 0.10806660939185113, "grad_norm": 7.510658700805964, "learning_rate": 9.841105661972594e-06, "loss": 17.6788, "step": 5912 }, { "epoch": 0.10808488858829766, "grad_norm": 7.203161598491203, "learning_rate": 9.841031622288065e-06, "loss": 17.8298, "step": 5913 }, { "epoch": 0.10810316778474419, "grad_norm": 6.859963289080416, "learning_rate": 9.84095756563614e-06, "loss": 17.3716, "step": 5914 }, { "epoch": 0.10812144698119071, "grad_norm": 7.47334448271494, "learning_rate": 9.840883492017078e-06, "loss": 17.9008, "step": 5915 }, { "epoch": 0.10813972617763723, "grad_norm": 6.113853926642932, "learning_rate": 9.84080940143114e-06, "loss": 17.2308, "step": 5916 }, { "epoch": 0.10815800537408375, "grad_norm": 6.4299119208739866, "learning_rate": 9.840735293878585e-06, "loss": 17.5363, "step": 5917 }, { "epoch": 0.10817628457053027, "grad_norm": 7.4029030745756215, "learning_rate": 9.840661169359673e-06, "loss": 18.1808, "step": 5918 }, { "epoch": 0.10819456376697681, "grad_norm": 7.13773841709815, "learning_rate": 9.840587027874661e-06, "loss": 17.9393, "step": 5919 }, { "epoch": 0.10821284296342333, "grad_norm": 7.589838535219721, "learning_rate": 9.840512869423816e-06, "loss": 17.9286, "step": 5920 }, { "epoch": 0.10823112215986985, "grad_norm": 6.882416624256553, "learning_rate": 9.840438694007391e-06, "loss": 17.5512, "step": 5921 }, { "epoch": 0.10824940135631637, "grad_norm": 6.743110939806677, "learning_rate": 9.840364501625647e-06, "loss": 17.6088, "step": 5922 }, { "epoch": 0.1082676805527629, "grad_norm": 8.729816397999063, "learning_rate": 9.84029029227885e-06, "loss": 18.6794, "step": 5923 }, { "epoch": 0.10828595974920943, "grad_norm": 7.751836073429958, "learning_rate": 9.840216065967251e-06, "loss": 18.2195, "step": 5924 }, { "epoch": 0.10830423894565595, "grad_norm": 7.417194641590402, "learning_rate": 9.840141822691116e-06, "loss": 17.6567, "step": 5925 }, { "epoch": 0.10832251814210248, "grad_norm": 7.6162404810380036, "learning_rate": 9.840067562450704e-06, "loss": 17.8032, "step": 5926 }, { "epoch": 0.108340797338549, "grad_norm": 6.9424441577606695, "learning_rate": 9.839993285246276e-06, "loss": 17.9385, "step": 5927 }, { "epoch": 0.10835907653499552, "grad_norm": 7.8256812426243645, "learning_rate": 9.839918991078091e-06, "loss": 17.7594, "step": 5928 }, { "epoch": 0.10837735573144204, "grad_norm": 8.19408397305482, "learning_rate": 9.83984467994641e-06, "loss": 18.3648, "step": 5929 }, { "epoch": 0.10839563492788858, "grad_norm": 7.69688900212192, "learning_rate": 9.839770351851494e-06, "loss": 18.1308, "step": 5930 }, { "epoch": 0.1084139141243351, "grad_norm": 7.403797404073557, "learning_rate": 9.839696006793601e-06, "loss": 17.5829, "step": 5931 }, { "epoch": 0.10843219332078162, "grad_norm": 7.783961116571769, "learning_rate": 9.839621644772996e-06, "loss": 18.2717, "step": 5932 }, { "epoch": 0.10845047251722814, "grad_norm": 8.471935534814689, "learning_rate": 9.839547265789935e-06, "loss": 18.0535, "step": 5933 }, { "epoch": 0.10846875171367466, "grad_norm": 6.6672925736520625, "learning_rate": 9.839472869844683e-06, "loss": 17.2993, "step": 5934 }, { "epoch": 0.10848703091012118, "grad_norm": 8.592884386398634, "learning_rate": 9.839398456937497e-06, "loss": 18.2076, "step": 5935 }, { "epoch": 0.10850531010656772, "grad_norm": 7.319125332836831, "learning_rate": 9.839324027068638e-06, "loss": 17.8035, "step": 5936 }, { "epoch": 0.10852358930301424, "grad_norm": 7.908470337743244, "learning_rate": 9.83924958023837e-06, "loss": 18.4819, "step": 5937 }, { "epoch": 0.10854186849946076, "grad_norm": 8.936807942210304, "learning_rate": 9.839175116446953e-06, "loss": 18.6143, "step": 5938 }, { "epoch": 0.10856014769590729, "grad_norm": 6.446386405095052, "learning_rate": 9.839100635694644e-06, "loss": 17.5366, "step": 5939 }, { "epoch": 0.10857842689235381, "grad_norm": 7.69040520412441, "learning_rate": 9.83902613798171e-06, "loss": 17.7365, "step": 5940 }, { "epoch": 0.10859670608880034, "grad_norm": 6.101400336742993, "learning_rate": 9.83895162330841e-06, "loss": 17.2802, "step": 5941 }, { "epoch": 0.10861498528524687, "grad_norm": 6.626590924164906, "learning_rate": 9.838877091675001e-06, "loss": 17.5935, "step": 5942 }, { "epoch": 0.10863326448169339, "grad_norm": 9.680701863675633, "learning_rate": 9.83880254308175e-06, "loss": 18.6882, "step": 5943 }, { "epoch": 0.10865154367813991, "grad_norm": 7.19408308711397, "learning_rate": 9.838727977528917e-06, "loss": 17.7942, "step": 5944 }, { "epoch": 0.10866982287458643, "grad_norm": 6.788870980507977, "learning_rate": 9.83865339501676e-06, "loss": 17.4205, "step": 5945 }, { "epoch": 0.10868810207103295, "grad_norm": 7.200683231587349, "learning_rate": 9.838578795545544e-06, "loss": 17.8494, "step": 5946 }, { "epoch": 0.10870638126747949, "grad_norm": 6.88088555284488, "learning_rate": 9.838504179115528e-06, "loss": 17.8073, "step": 5947 }, { "epoch": 0.10872466046392601, "grad_norm": 9.24156242485691, "learning_rate": 9.838429545726977e-06, "loss": 18.2847, "step": 5948 }, { "epoch": 0.10874293966037253, "grad_norm": 7.4897225574815405, "learning_rate": 9.83835489538015e-06, "loss": 17.7234, "step": 5949 }, { "epoch": 0.10876121885681905, "grad_norm": 7.260971801654793, "learning_rate": 9.838280228075306e-06, "loss": 18.0967, "step": 5950 }, { "epoch": 0.10877949805326557, "grad_norm": 7.938202720302389, "learning_rate": 9.838205543812712e-06, "loss": 17.9975, "step": 5951 }, { "epoch": 0.1087977772497121, "grad_norm": 7.715660297079626, "learning_rate": 9.838130842592626e-06, "loss": 17.9109, "step": 5952 }, { "epoch": 0.10881605644615863, "grad_norm": 7.0959843202199675, "learning_rate": 9.838056124415312e-06, "loss": 17.5292, "step": 5953 }, { "epoch": 0.10883433564260515, "grad_norm": 7.051511554435222, "learning_rate": 9.837981389281031e-06, "loss": 17.4281, "step": 5954 }, { "epoch": 0.10885261483905168, "grad_norm": 7.186044339479653, "learning_rate": 9.837906637190046e-06, "loss": 17.6825, "step": 5955 }, { "epoch": 0.1088708940354982, "grad_norm": 6.802459216189041, "learning_rate": 9.837831868142618e-06, "loss": 17.7053, "step": 5956 }, { "epoch": 0.10888917323194472, "grad_norm": 6.841393741814351, "learning_rate": 9.837757082139007e-06, "loss": 17.5404, "step": 5957 }, { "epoch": 0.10890745242839125, "grad_norm": 6.915070035197545, "learning_rate": 9.837682279179479e-06, "loss": 17.7828, "step": 5958 }, { "epoch": 0.10892573162483778, "grad_norm": 6.676637498053301, "learning_rate": 9.837607459264294e-06, "loss": 17.5676, "step": 5959 }, { "epoch": 0.1089440108212843, "grad_norm": 8.202980699646995, "learning_rate": 9.837532622393716e-06, "loss": 17.9422, "step": 5960 }, { "epoch": 0.10896229001773082, "grad_norm": 7.886380009870115, "learning_rate": 9.837457768568004e-06, "loss": 18.4824, "step": 5961 }, { "epoch": 0.10898056921417734, "grad_norm": 7.758729587760364, "learning_rate": 9.837382897787423e-06, "loss": 17.5843, "step": 5962 }, { "epoch": 0.10899884841062386, "grad_norm": 7.420610363317163, "learning_rate": 9.837308010052236e-06, "loss": 18.0441, "step": 5963 }, { "epoch": 0.1090171276070704, "grad_norm": 8.33113732511878, "learning_rate": 9.837233105362703e-06, "loss": 18.0701, "step": 5964 }, { "epoch": 0.10903540680351692, "grad_norm": 7.841425209863305, "learning_rate": 9.837158183719086e-06, "loss": 17.9798, "step": 5965 }, { "epoch": 0.10905368599996344, "grad_norm": 8.093031035859577, "learning_rate": 9.837083245121651e-06, "loss": 18.37, "step": 5966 }, { "epoch": 0.10907196519640996, "grad_norm": 8.81414523026481, "learning_rate": 9.83700828957066e-06, "loss": 18.2651, "step": 5967 }, { "epoch": 0.10909024439285649, "grad_norm": 6.699458389845013, "learning_rate": 9.836933317066373e-06, "loss": 17.5269, "step": 5968 }, { "epoch": 0.10910852358930301, "grad_norm": 9.599192042839118, "learning_rate": 9.836858327609055e-06, "loss": 18.7339, "step": 5969 }, { "epoch": 0.10912680278574954, "grad_norm": 7.296966831416328, "learning_rate": 9.836783321198968e-06, "loss": 18.1201, "step": 5970 }, { "epoch": 0.10914508198219607, "grad_norm": 7.493497210818273, "learning_rate": 9.836708297836375e-06, "loss": 17.7271, "step": 5971 }, { "epoch": 0.10916336117864259, "grad_norm": 6.62248749502922, "learning_rate": 9.83663325752154e-06, "loss": 17.6155, "step": 5972 }, { "epoch": 0.10918164037508911, "grad_norm": 7.586502094244091, "learning_rate": 9.836558200254725e-06, "loss": 17.8567, "step": 5973 }, { "epoch": 0.10919991957153563, "grad_norm": 6.763971976362987, "learning_rate": 9.836483126036192e-06, "loss": 17.2696, "step": 5974 }, { "epoch": 0.10921819876798217, "grad_norm": 6.633354735131761, "learning_rate": 9.836408034866207e-06, "loss": 17.3721, "step": 5975 }, { "epoch": 0.10923647796442869, "grad_norm": 6.853504839258941, "learning_rate": 9.836332926745031e-06, "loss": 17.5828, "step": 5976 }, { "epoch": 0.10925475716087521, "grad_norm": 7.780416455535054, "learning_rate": 9.836257801672927e-06, "loss": 18.1243, "step": 5977 }, { "epoch": 0.10927303635732173, "grad_norm": 6.5123631034928495, "learning_rate": 9.83618265965016e-06, "loss": 17.4059, "step": 5978 }, { "epoch": 0.10929131555376825, "grad_norm": 8.787405722245287, "learning_rate": 9.836107500676992e-06, "loss": 18.1918, "step": 5979 }, { "epoch": 0.10930959475021478, "grad_norm": 6.9205916043598235, "learning_rate": 9.836032324753687e-06, "loss": 17.8655, "step": 5980 }, { "epoch": 0.10932787394666131, "grad_norm": 7.818444687855867, "learning_rate": 9.835957131880508e-06, "loss": 18.3451, "step": 5981 }, { "epoch": 0.10934615314310783, "grad_norm": 7.955793940216197, "learning_rate": 9.83588192205772e-06, "loss": 18.0892, "step": 5982 }, { "epoch": 0.10936443233955435, "grad_norm": 6.522733972424496, "learning_rate": 9.835806695285583e-06, "loss": 17.4412, "step": 5983 }, { "epoch": 0.10938271153600088, "grad_norm": 5.546367331964139, "learning_rate": 9.835731451564365e-06, "loss": 17.0042, "step": 5984 }, { "epoch": 0.1094009907324474, "grad_norm": 8.254720366045445, "learning_rate": 9.835656190894329e-06, "loss": 18.5262, "step": 5985 }, { "epoch": 0.10941926992889392, "grad_norm": 8.905036048111233, "learning_rate": 9.835580913275736e-06, "loss": 18.3572, "step": 5986 }, { "epoch": 0.10943754912534046, "grad_norm": 5.716347342494404, "learning_rate": 9.835505618708851e-06, "loss": 17.3397, "step": 5987 }, { "epoch": 0.10945582832178698, "grad_norm": 7.297773266161931, "learning_rate": 9.83543030719394e-06, "loss": 17.7705, "step": 5988 }, { "epoch": 0.1094741075182335, "grad_norm": 7.542163751455365, "learning_rate": 9.835354978731265e-06, "loss": 17.8389, "step": 5989 }, { "epoch": 0.10949238671468002, "grad_norm": 7.400703798602654, "learning_rate": 9.835279633321091e-06, "loss": 17.7062, "step": 5990 }, { "epoch": 0.10951066591112654, "grad_norm": 6.609218880417126, "learning_rate": 9.83520427096368e-06, "loss": 17.4464, "step": 5991 }, { "epoch": 0.10952894510757308, "grad_norm": 8.27129966257969, "learning_rate": 9.835128891659298e-06, "loss": 18.5295, "step": 5992 }, { "epoch": 0.1095472243040196, "grad_norm": 7.842780011688152, "learning_rate": 9.835053495408209e-06, "loss": 17.9532, "step": 5993 }, { "epoch": 0.10956550350046612, "grad_norm": 7.941038020737538, "learning_rate": 9.834978082210678e-06, "loss": 18.1383, "step": 5994 }, { "epoch": 0.10958378269691264, "grad_norm": 7.385043901041865, "learning_rate": 9.834902652066966e-06, "loss": 18.058, "step": 5995 }, { "epoch": 0.10960206189335917, "grad_norm": 6.432381254149862, "learning_rate": 9.834827204977342e-06, "loss": 17.1324, "step": 5996 }, { "epoch": 0.10962034108980569, "grad_norm": 7.004934340731052, "learning_rate": 9.834751740942068e-06, "loss": 17.6494, "step": 5997 }, { "epoch": 0.10963862028625222, "grad_norm": 6.879043813875452, "learning_rate": 9.834676259961407e-06, "loss": 17.6899, "step": 5998 }, { "epoch": 0.10965689948269874, "grad_norm": 8.461280155671439, "learning_rate": 9.834600762035626e-06, "loss": 18.5268, "step": 5999 }, { "epoch": 0.10967517867914527, "grad_norm": 7.585188465613532, "learning_rate": 9.83452524716499e-06, "loss": 17.8837, "step": 6000 }, { "epoch": 0.10969345787559179, "grad_norm": 7.404804223634506, "learning_rate": 9.83444971534976e-06, "loss": 18.0314, "step": 6001 }, { "epoch": 0.10971173707203831, "grad_norm": 8.341881637201688, "learning_rate": 9.834374166590206e-06, "loss": 17.9829, "step": 6002 }, { "epoch": 0.10973001626848483, "grad_norm": 6.846917649382119, "learning_rate": 9.834298600886589e-06, "loss": 17.9417, "step": 6003 }, { "epoch": 0.10974829546493137, "grad_norm": 6.829582477175553, "learning_rate": 9.834223018239175e-06, "loss": 17.6357, "step": 6004 }, { "epoch": 0.10976657466137789, "grad_norm": 6.599843832524429, "learning_rate": 9.83414741864823e-06, "loss": 17.4774, "step": 6005 }, { "epoch": 0.10978485385782441, "grad_norm": 6.424816102653562, "learning_rate": 9.834071802114016e-06, "loss": 17.4823, "step": 6006 }, { "epoch": 0.10980313305427093, "grad_norm": 6.48361962247988, "learning_rate": 9.833996168636801e-06, "loss": 17.4452, "step": 6007 }, { "epoch": 0.10982141225071745, "grad_norm": 6.130064538530384, "learning_rate": 9.833920518216848e-06, "loss": 17.2327, "step": 6008 }, { "epoch": 0.10983969144716399, "grad_norm": 7.414484958983045, "learning_rate": 9.833844850854422e-06, "loss": 18.0387, "step": 6009 }, { "epoch": 0.10985797064361051, "grad_norm": 6.878425341927839, "learning_rate": 9.833769166549792e-06, "loss": 17.406, "step": 6010 }, { "epoch": 0.10987624984005703, "grad_norm": 7.473652822725783, "learning_rate": 9.833693465303217e-06, "loss": 17.9417, "step": 6011 }, { "epoch": 0.10989452903650355, "grad_norm": 5.7167561414418095, "learning_rate": 9.833617747114969e-06, "loss": 17.4244, "step": 6012 }, { "epoch": 0.10991280823295008, "grad_norm": 7.021581532495483, "learning_rate": 9.83354201198531e-06, "loss": 17.5103, "step": 6013 }, { "epoch": 0.1099310874293966, "grad_norm": 6.459942931667375, "learning_rate": 9.833466259914503e-06, "loss": 17.5817, "step": 6014 }, { "epoch": 0.10994936662584313, "grad_norm": 7.1900032351537515, "learning_rate": 9.833390490902819e-06, "loss": 17.847, "step": 6015 }, { "epoch": 0.10996764582228966, "grad_norm": 7.904453231222815, "learning_rate": 9.83331470495052e-06, "loss": 17.956, "step": 6016 }, { "epoch": 0.10998592501873618, "grad_norm": 7.427178197504679, "learning_rate": 9.833238902057873e-06, "loss": 17.8389, "step": 6017 }, { "epoch": 0.1100042042151827, "grad_norm": 6.208686767567184, "learning_rate": 9.83316308222514e-06, "loss": 17.3005, "step": 6018 }, { "epoch": 0.11002248341162922, "grad_norm": 7.847786718059512, "learning_rate": 9.833087245452594e-06, "loss": 17.9887, "step": 6019 }, { "epoch": 0.11004076260807574, "grad_norm": 6.654436993277937, "learning_rate": 9.833011391740494e-06, "loss": 17.688, "step": 6020 }, { "epoch": 0.11005904180452228, "grad_norm": 7.973594040381017, "learning_rate": 9.832935521089109e-06, "loss": 18.3191, "step": 6021 }, { "epoch": 0.1100773210009688, "grad_norm": 8.951738839041905, "learning_rate": 9.832859633498704e-06, "loss": 18.1994, "step": 6022 }, { "epoch": 0.11009560019741532, "grad_norm": 5.833255689683318, "learning_rate": 9.832783728969546e-06, "loss": 17.0594, "step": 6023 }, { "epoch": 0.11011387939386184, "grad_norm": 6.54454264521597, "learning_rate": 9.832707807501902e-06, "loss": 17.3724, "step": 6024 }, { "epoch": 0.11013215859030837, "grad_norm": 7.690288277658039, "learning_rate": 9.832631869096034e-06, "loss": 18.2661, "step": 6025 }, { "epoch": 0.1101504377867549, "grad_norm": 7.299021017295833, "learning_rate": 9.832555913752211e-06, "loss": 17.555, "step": 6026 }, { "epoch": 0.11016871698320142, "grad_norm": 6.828135335092559, "learning_rate": 9.832479941470699e-06, "loss": 17.5603, "step": 6027 }, { "epoch": 0.11018699617964794, "grad_norm": 7.864703887956888, "learning_rate": 9.832403952251765e-06, "loss": 18.39, "step": 6028 }, { "epoch": 0.11020527537609447, "grad_norm": 6.4129004093808515, "learning_rate": 9.832327946095674e-06, "loss": 17.4481, "step": 6029 }, { "epoch": 0.11022355457254099, "grad_norm": 10.061475671645136, "learning_rate": 9.832251923002692e-06, "loss": 19.0137, "step": 6030 }, { "epoch": 0.11024183376898751, "grad_norm": 6.325548363634723, "learning_rate": 9.832175882973088e-06, "loss": 17.4535, "step": 6031 }, { "epoch": 0.11026011296543405, "grad_norm": 7.493245225301195, "learning_rate": 9.832099826007126e-06, "loss": 17.9325, "step": 6032 }, { "epoch": 0.11027839216188057, "grad_norm": 7.8488940376631255, "learning_rate": 9.832023752105073e-06, "loss": 17.7612, "step": 6033 }, { "epoch": 0.11029667135832709, "grad_norm": 7.88173752708001, "learning_rate": 9.831947661267196e-06, "loss": 18.057, "step": 6034 }, { "epoch": 0.11031495055477361, "grad_norm": 6.316696587296195, "learning_rate": 9.831871553493763e-06, "loss": 17.3385, "step": 6035 }, { "epoch": 0.11033322975122013, "grad_norm": 6.573038168903045, "learning_rate": 9.831795428785038e-06, "loss": 17.3735, "step": 6036 }, { "epoch": 0.11035150894766665, "grad_norm": 7.502583056333343, "learning_rate": 9.83171928714129e-06, "loss": 18.2043, "step": 6037 }, { "epoch": 0.11036978814411319, "grad_norm": 6.639241512757685, "learning_rate": 9.831643128562786e-06, "loss": 17.56, "step": 6038 }, { "epoch": 0.11038806734055971, "grad_norm": 6.481501088163189, "learning_rate": 9.831566953049791e-06, "loss": 17.4618, "step": 6039 }, { "epoch": 0.11040634653700623, "grad_norm": 7.354046600163907, "learning_rate": 9.831490760602573e-06, "loss": 18.3112, "step": 6040 }, { "epoch": 0.11042462573345276, "grad_norm": 7.255419059578754, "learning_rate": 9.8314145512214e-06, "loss": 17.9431, "step": 6041 }, { "epoch": 0.11044290492989928, "grad_norm": 6.92008124755534, "learning_rate": 9.831338324906537e-06, "loss": 17.8271, "step": 6042 }, { "epoch": 0.11046118412634581, "grad_norm": 6.3004293240695235, "learning_rate": 9.831262081658253e-06, "loss": 17.1417, "step": 6043 }, { "epoch": 0.11047946332279233, "grad_norm": 6.620006993373256, "learning_rate": 9.831185821476815e-06, "loss": 17.4947, "step": 6044 }, { "epoch": 0.11049774251923886, "grad_norm": 6.307453629542876, "learning_rate": 9.831109544362489e-06, "loss": 17.689, "step": 6045 }, { "epoch": 0.11051602171568538, "grad_norm": 7.194822447342063, "learning_rate": 9.831033250315544e-06, "loss": 17.7781, "step": 6046 }, { "epoch": 0.1105343009121319, "grad_norm": 8.625339065623256, "learning_rate": 9.830956939336248e-06, "loss": 18.3123, "step": 6047 }, { "epoch": 0.11055258010857842, "grad_norm": 6.474551579719272, "learning_rate": 9.830880611424866e-06, "loss": 17.5376, "step": 6048 }, { "epoch": 0.11057085930502496, "grad_norm": 7.036493049200884, "learning_rate": 9.830804266581667e-06, "loss": 17.7203, "step": 6049 }, { "epoch": 0.11058913850147148, "grad_norm": 8.329100061696156, "learning_rate": 9.830727904806918e-06, "loss": 18.3742, "step": 6050 }, { "epoch": 0.110607417697918, "grad_norm": 7.052059043051446, "learning_rate": 9.830651526100884e-06, "loss": 17.7059, "step": 6051 }, { "epoch": 0.11062569689436452, "grad_norm": 6.58886304139191, "learning_rate": 9.83057513046384e-06, "loss": 17.5383, "step": 6052 }, { "epoch": 0.11064397609081104, "grad_norm": 6.97142208965401, "learning_rate": 9.830498717896047e-06, "loss": 17.6627, "step": 6053 }, { "epoch": 0.11066225528725757, "grad_norm": 6.601100442211125, "learning_rate": 9.830422288397776e-06, "loss": 17.5335, "step": 6054 }, { "epoch": 0.1106805344837041, "grad_norm": 6.573928381752774, "learning_rate": 9.830345841969294e-06, "loss": 17.7606, "step": 6055 }, { "epoch": 0.11069881368015062, "grad_norm": 6.799830932989246, "learning_rate": 9.830269378610868e-06, "loss": 17.5729, "step": 6056 }, { "epoch": 0.11071709287659715, "grad_norm": 7.876836877064901, "learning_rate": 9.830192898322768e-06, "loss": 17.851, "step": 6057 }, { "epoch": 0.11073537207304367, "grad_norm": 7.555648849237257, "learning_rate": 9.83011640110526e-06, "loss": 17.9312, "step": 6058 }, { "epoch": 0.11075365126949019, "grad_norm": 7.203739739028518, "learning_rate": 9.830039886958615e-06, "loss": 17.7517, "step": 6059 }, { "epoch": 0.11077193046593672, "grad_norm": 7.177766089570443, "learning_rate": 9.829963355883098e-06, "loss": 17.9962, "step": 6060 }, { "epoch": 0.11079020966238325, "grad_norm": 6.5571110304539, "learning_rate": 9.829886807878979e-06, "loss": 17.6729, "step": 6061 }, { "epoch": 0.11080848885882977, "grad_norm": 6.139209121599606, "learning_rate": 9.829810242946525e-06, "loss": 17.3619, "step": 6062 }, { "epoch": 0.11082676805527629, "grad_norm": 7.792781352189638, "learning_rate": 9.829733661086005e-06, "loss": 17.941, "step": 6063 }, { "epoch": 0.11084504725172281, "grad_norm": 7.615161081426064, "learning_rate": 9.82965706229769e-06, "loss": 17.8296, "step": 6064 }, { "epoch": 0.11086332644816933, "grad_norm": 8.869587136548185, "learning_rate": 9.829580446581843e-06, "loss": 18.6859, "step": 6065 }, { "epoch": 0.11088160564461587, "grad_norm": 7.0451364874647595, "learning_rate": 9.82950381393874e-06, "loss": 17.4014, "step": 6066 }, { "epoch": 0.11089988484106239, "grad_norm": 6.641124738432329, "learning_rate": 9.82942716436864e-06, "loss": 17.6367, "step": 6067 }, { "epoch": 0.11091816403750891, "grad_norm": 6.614254508646868, "learning_rate": 9.82935049787182e-06, "loss": 17.6394, "step": 6068 }, { "epoch": 0.11093644323395543, "grad_norm": 6.62975390893817, "learning_rate": 9.829273814448546e-06, "loss": 17.7465, "step": 6069 }, { "epoch": 0.11095472243040196, "grad_norm": 7.512657342412706, "learning_rate": 9.829197114099084e-06, "loss": 17.6712, "step": 6070 }, { "epoch": 0.11097300162684848, "grad_norm": 6.07668994729175, "learning_rate": 9.829120396823706e-06, "loss": 17.2348, "step": 6071 }, { "epoch": 0.11099128082329501, "grad_norm": 8.122956038676117, "learning_rate": 9.829043662622681e-06, "loss": 18.3812, "step": 6072 }, { "epoch": 0.11100956001974153, "grad_norm": 7.771634793500091, "learning_rate": 9.828966911496277e-06, "loss": 17.8931, "step": 6073 }, { "epoch": 0.11102783921618806, "grad_norm": 7.414760524464213, "learning_rate": 9.828890143444763e-06, "loss": 18.34, "step": 6074 }, { "epoch": 0.11104611841263458, "grad_norm": 6.855095671572821, "learning_rate": 9.828813358468406e-06, "loss": 17.9983, "step": 6075 }, { "epoch": 0.1110643976090811, "grad_norm": 6.077699519279136, "learning_rate": 9.82873655656748e-06, "loss": 17.3106, "step": 6076 }, { "epoch": 0.11108267680552764, "grad_norm": 7.0648388877619075, "learning_rate": 9.82865973774225e-06, "loss": 17.9716, "step": 6077 }, { "epoch": 0.11110095600197416, "grad_norm": 6.766820816789677, "learning_rate": 9.828582901992987e-06, "loss": 17.4199, "step": 6078 }, { "epoch": 0.11111923519842068, "grad_norm": 7.267164185676141, "learning_rate": 9.82850604931996e-06, "loss": 17.7048, "step": 6079 }, { "epoch": 0.1111375143948672, "grad_norm": 6.3016461713514484, "learning_rate": 9.828429179723437e-06, "loss": 17.3628, "step": 6080 }, { "epoch": 0.11115579359131372, "grad_norm": 8.301755776026813, "learning_rate": 9.82835229320369e-06, "loss": 18.1444, "step": 6081 }, { "epoch": 0.11117407278776024, "grad_norm": 7.392556694316968, "learning_rate": 9.828275389760986e-06, "loss": 17.7959, "step": 6082 }, { "epoch": 0.11119235198420678, "grad_norm": 6.276445797133483, "learning_rate": 9.828198469395598e-06, "loss": 17.5543, "step": 6083 }, { "epoch": 0.1112106311806533, "grad_norm": 7.318058488520969, "learning_rate": 9.82812153210779e-06, "loss": 17.8725, "step": 6084 }, { "epoch": 0.11122891037709982, "grad_norm": 7.157296095234143, "learning_rate": 9.828044577897837e-06, "loss": 18.1602, "step": 6085 }, { "epoch": 0.11124718957354635, "grad_norm": 7.323118370588629, "learning_rate": 9.827967606766009e-06, "loss": 17.5684, "step": 6086 }, { "epoch": 0.11126546876999287, "grad_norm": 6.531660504221052, "learning_rate": 9.82789061871257e-06, "loss": 17.4564, "step": 6087 }, { "epoch": 0.11128374796643939, "grad_norm": 6.230555906074557, "learning_rate": 9.827813613737795e-06, "loss": 17.376, "step": 6088 }, { "epoch": 0.11130202716288592, "grad_norm": 6.835899987099556, "learning_rate": 9.82773659184195e-06, "loss": 18.0869, "step": 6089 }, { "epoch": 0.11132030635933245, "grad_norm": 6.9819025137206285, "learning_rate": 9.827659553025311e-06, "loss": 17.5553, "step": 6090 }, { "epoch": 0.11133858555577897, "grad_norm": 6.9195461465580825, "learning_rate": 9.827582497288142e-06, "loss": 17.494, "step": 6091 }, { "epoch": 0.11135686475222549, "grad_norm": 7.16870145138108, "learning_rate": 9.827505424630714e-06, "loss": 18.086, "step": 6092 }, { "epoch": 0.11137514394867201, "grad_norm": 7.75470910521547, "learning_rate": 9.827428335053301e-06, "loss": 17.9726, "step": 6093 }, { "epoch": 0.11139342314511855, "grad_norm": 6.356708930907695, "learning_rate": 9.827351228556168e-06, "loss": 17.4048, "step": 6094 }, { "epoch": 0.11141170234156507, "grad_norm": 8.195102079708166, "learning_rate": 9.82727410513959e-06, "loss": 17.998, "step": 6095 }, { "epoch": 0.11142998153801159, "grad_norm": 6.096094184633512, "learning_rate": 9.827196964803833e-06, "loss": 17.219, "step": 6096 }, { "epoch": 0.11144826073445811, "grad_norm": 6.789787708293502, "learning_rate": 9.827119807549172e-06, "loss": 17.4854, "step": 6097 }, { "epoch": 0.11146653993090463, "grad_norm": 7.9023625358560015, "learning_rate": 9.827042633375873e-06, "loss": 17.9394, "step": 6098 }, { "epoch": 0.11148481912735116, "grad_norm": 6.9377014505029715, "learning_rate": 9.826965442284212e-06, "loss": 17.1389, "step": 6099 }, { "epoch": 0.11150309832379769, "grad_norm": 8.037621294410371, "learning_rate": 9.826888234274452e-06, "loss": 18.2612, "step": 6100 }, { "epoch": 0.11152137752024421, "grad_norm": 7.304666527307093, "learning_rate": 9.826811009346869e-06, "loss": 17.7392, "step": 6101 }, { "epoch": 0.11153965671669074, "grad_norm": 6.3745404473633425, "learning_rate": 9.826733767501731e-06, "loss": 17.3288, "step": 6102 }, { "epoch": 0.11155793591313726, "grad_norm": 10.343701118274206, "learning_rate": 9.826656508739311e-06, "loss": 18.9521, "step": 6103 }, { "epoch": 0.11157621510958378, "grad_norm": 8.011024726129985, "learning_rate": 9.826579233059878e-06, "loss": 18.0134, "step": 6104 }, { "epoch": 0.1115944943060303, "grad_norm": 6.7895829546955175, "learning_rate": 9.826501940463706e-06, "loss": 17.5846, "step": 6105 }, { "epoch": 0.11161277350247684, "grad_norm": 7.49594915628895, "learning_rate": 9.826424630951062e-06, "loss": 17.7317, "step": 6106 }, { "epoch": 0.11163105269892336, "grad_norm": 7.049010994020828, "learning_rate": 9.826347304522219e-06, "loss": 17.5041, "step": 6107 }, { "epoch": 0.11164933189536988, "grad_norm": 6.625397145218589, "learning_rate": 9.826269961177447e-06, "loss": 17.3501, "step": 6108 }, { "epoch": 0.1116676110918164, "grad_norm": 6.79675307076588, "learning_rate": 9.826192600917017e-06, "loss": 17.8181, "step": 6109 }, { "epoch": 0.11168589028826292, "grad_norm": 6.889365418259622, "learning_rate": 9.826115223741202e-06, "loss": 17.5375, "step": 6110 }, { "epoch": 0.11170416948470946, "grad_norm": 7.6520421109127295, "learning_rate": 9.826037829650271e-06, "loss": 18.1953, "step": 6111 }, { "epoch": 0.11172244868115598, "grad_norm": 6.8452885499139215, "learning_rate": 9.825960418644495e-06, "loss": 17.5787, "step": 6112 }, { "epoch": 0.1117407278776025, "grad_norm": 6.151882389545902, "learning_rate": 9.82588299072415e-06, "loss": 17.4467, "step": 6113 }, { "epoch": 0.11175900707404902, "grad_norm": 6.83752001813512, "learning_rate": 9.8258055458895e-06, "loss": 17.3972, "step": 6114 }, { "epoch": 0.11177728627049555, "grad_norm": 7.396162960704652, "learning_rate": 9.825728084140824e-06, "loss": 17.8534, "step": 6115 }, { "epoch": 0.11179556546694207, "grad_norm": 8.660874668974742, "learning_rate": 9.825650605478386e-06, "loss": 18.5825, "step": 6116 }, { "epoch": 0.1118138446633886, "grad_norm": 7.5702717335581715, "learning_rate": 9.825573109902465e-06, "loss": 18.273, "step": 6117 }, { "epoch": 0.11183212385983513, "grad_norm": 7.7147435530331245, "learning_rate": 9.825495597413328e-06, "loss": 18.2381, "step": 6118 }, { "epoch": 0.11185040305628165, "grad_norm": 7.152143115685819, "learning_rate": 9.825418068011245e-06, "loss": 17.8437, "step": 6119 }, { "epoch": 0.11186868225272817, "grad_norm": 6.567868181370384, "learning_rate": 9.825340521696493e-06, "loss": 17.3631, "step": 6120 }, { "epoch": 0.11188696144917469, "grad_norm": 7.48768227300808, "learning_rate": 9.825262958469342e-06, "loss": 17.8795, "step": 6121 }, { "epoch": 0.11190524064562121, "grad_norm": 5.681827530665296, "learning_rate": 9.82518537833006e-06, "loss": 16.9753, "step": 6122 }, { "epoch": 0.11192351984206775, "grad_norm": 7.042510126919909, "learning_rate": 9.825107781278924e-06, "loss": 17.9713, "step": 6123 }, { "epoch": 0.11194179903851427, "grad_norm": 6.066116133662346, "learning_rate": 9.825030167316204e-06, "loss": 17.4079, "step": 6124 }, { "epoch": 0.11196007823496079, "grad_norm": 8.930087487018563, "learning_rate": 9.824952536442171e-06, "loss": 18.8651, "step": 6125 }, { "epoch": 0.11197835743140731, "grad_norm": 8.021392553948088, "learning_rate": 9.824874888657099e-06, "loss": 18.0091, "step": 6126 }, { "epoch": 0.11199663662785383, "grad_norm": 6.0462716777884795, "learning_rate": 9.824797223961259e-06, "loss": 17.4654, "step": 6127 }, { "epoch": 0.11201491582430037, "grad_norm": 7.870660144820459, "learning_rate": 9.824719542354923e-06, "loss": 18.1762, "step": 6128 }, { "epoch": 0.11203319502074689, "grad_norm": 7.304537260867494, "learning_rate": 9.824641843838364e-06, "loss": 17.9305, "step": 6129 }, { "epoch": 0.11205147421719341, "grad_norm": 6.882291977888932, "learning_rate": 9.824564128411854e-06, "loss": 17.6084, "step": 6130 }, { "epoch": 0.11206975341363994, "grad_norm": 8.883903749463636, "learning_rate": 9.824486396075665e-06, "loss": 18.6727, "step": 6131 }, { "epoch": 0.11208803261008646, "grad_norm": 7.763208221495181, "learning_rate": 9.82440864683007e-06, "loss": 18.1982, "step": 6132 }, { "epoch": 0.11210631180653298, "grad_norm": 7.494317510006399, "learning_rate": 9.824330880675341e-06, "loss": 17.5689, "step": 6133 }, { "epoch": 0.11212459100297952, "grad_norm": 6.734347902921629, "learning_rate": 9.824253097611751e-06, "loss": 17.6065, "step": 6134 }, { "epoch": 0.11214287019942604, "grad_norm": 6.903138003440501, "learning_rate": 9.824175297639573e-06, "loss": 17.7784, "step": 6135 }, { "epoch": 0.11216114939587256, "grad_norm": 8.975992327832438, "learning_rate": 9.82409748075908e-06, "loss": 18.3752, "step": 6136 }, { "epoch": 0.11217942859231908, "grad_norm": 6.789633529830011, "learning_rate": 9.824019646970543e-06, "loss": 17.6402, "step": 6137 }, { "epoch": 0.1121977077887656, "grad_norm": 6.409081435221622, "learning_rate": 9.823941796274235e-06, "loss": 17.5977, "step": 6138 }, { "epoch": 0.11221598698521212, "grad_norm": 6.116320825092029, "learning_rate": 9.823863928670431e-06, "loss": 17.6209, "step": 6139 }, { "epoch": 0.11223426618165866, "grad_norm": 7.867902646461152, "learning_rate": 9.823786044159403e-06, "loss": 18.1709, "step": 6140 }, { "epoch": 0.11225254537810518, "grad_norm": 5.947024102002984, "learning_rate": 9.823708142741422e-06, "loss": 17.2042, "step": 6141 }, { "epoch": 0.1122708245745517, "grad_norm": 6.851079550186919, "learning_rate": 9.823630224416762e-06, "loss": 17.4814, "step": 6142 }, { "epoch": 0.11228910377099822, "grad_norm": 7.498051350606899, "learning_rate": 9.823552289185699e-06, "loss": 18.1426, "step": 6143 }, { "epoch": 0.11230738296744475, "grad_norm": 6.564015601323493, "learning_rate": 9.823474337048502e-06, "loss": 17.9036, "step": 6144 }, { "epoch": 0.11232566216389128, "grad_norm": 7.85380257407971, "learning_rate": 9.823396368005445e-06, "loss": 17.9739, "step": 6145 }, { "epoch": 0.1123439413603378, "grad_norm": 7.5435428933914475, "learning_rate": 9.823318382056803e-06, "loss": 17.4072, "step": 6146 }, { "epoch": 0.11236222055678433, "grad_norm": 7.9263872401537645, "learning_rate": 9.823240379202851e-06, "loss": 18.1183, "step": 6147 }, { "epoch": 0.11238049975323085, "grad_norm": 6.175677186935788, "learning_rate": 9.823162359443858e-06, "loss": 17.1602, "step": 6148 }, { "epoch": 0.11239877894967737, "grad_norm": 8.336031690932213, "learning_rate": 9.8230843227801e-06, "loss": 18.207, "step": 6149 }, { "epoch": 0.11241705814612389, "grad_norm": 7.549068160390573, "learning_rate": 9.82300626921185e-06, "loss": 17.9894, "step": 6150 }, { "epoch": 0.11243533734257043, "grad_norm": 5.67824785647607, "learning_rate": 9.822928198739381e-06, "loss": 17.1215, "step": 6151 }, { "epoch": 0.11245361653901695, "grad_norm": 6.724954031901365, "learning_rate": 9.822850111362968e-06, "loss": 17.609, "step": 6152 }, { "epoch": 0.11247189573546347, "grad_norm": 5.731028461003114, "learning_rate": 9.822772007082883e-06, "loss": 17.1988, "step": 6153 }, { "epoch": 0.11249017493190999, "grad_norm": 7.958600282388625, "learning_rate": 9.8226938858994e-06, "loss": 18.0315, "step": 6154 }, { "epoch": 0.11250845412835651, "grad_norm": 5.418456092849641, "learning_rate": 9.822615747812794e-06, "loss": 17.0944, "step": 6155 }, { "epoch": 0.11252673332480304, "grad_norm": 6.250556961926755, "learning_rate": 9.82253759282334e-06, "loss": 17.3175, "step": 6156 }, { "epoch": 0.11254501252124957, "grad_norm": 7.435879048175508, "learning_rate": 9.822459420931308e-06, "loss": 17.8584, "step": 6157 }, { "epoch": 0.11256329171769609, "grad_norm": 7.8249285387608305, "learning_rate": 9.822381232136974e-06, "loss": 17.9644, "step": 6158 }, { "epoch": 0.11258157091414261, "grad_norm": 6.8331697717654984, "learning_rate": 9.822303026440614e-06, "loss": 17.6083, "step": 6159 }, { "epoch": 0.11259985011058914, "grad_norm": 6.290695489058567, "learning_rate": 9.822224803842501e-06, "loss": 17.3292, "step": 6160 }, { "epoch": 0.11261812930703566, "grad_norm": 6.905225875045079, "learning_rate": 9.822146564342907e-06, "loss": 17.5662, "step": 6161 }, { "epoch": 0.1126364085034822, "grad_norm": 7.702543026983446, "learning_rate": 9.822068307942107e-06, "loss": 18.0518, "step": 6162 }, { "epoch": 0.11265468769992872, "grad_norm": 9.555595004282456, "learning_rate": 9.821990034640377e-06, "loss": 18.3942, "step": 6163 }, { "epoch": 0.11267296689637524, "grad_norm": 7.863144492117609, "learning_rate": 9.82191174443799e-06, "loss": 18.304, "step": 6164 }, { "epoch": 0.11269124609282176, "grad_norm": 6.460215556460458, "learning_rate": 9.821833437335222e-06, "loss": 17.5291, "step": 6165 }, { "epoch": 0.11270952528926828, "grad_norm": 8.68694114150609, "learning_rate": 9.821755113332346e-06, "loss": 18.0716, "step": 6166 }, { "epoch": 0.1127278044857148, "grad_norm": 6.3340952582156484, "learning_rate": 9.821676772429635e-06, "loss": 17.5139, "step": 6167 }, { "epoch": 0.11274608368216134, "grad_norm": 8.46967776440184, "learning_rate": 9.821598414627366e-06, "loss": 18.1872, "step": 6168 }, { "epoch": 0.11276436287860786, "grad_norm": 6.4976312789064865, "learning_rate": 9.821520039925813e-06, "loss": 17.483, "step": 6169 }, { "epoch": 0.11278264207505438, "grad_norm": 7.102957846646031, "learning_rate": 9.821441648325251e-06, "loss": 17.8098, "step": 6170 }, { "epoch": 0.1128009212715009, "grad_norm": 8.305200924217363, "learning_rate": 9.821363239825955e-06, "loss": 18.351, "step": 6171 }, { "epoch": 0.11281920046794743, "grad_norm": 6.250879529314112, "learning_rate": 9.821284814428198e-06, "loss": 17.3108, "step": 6172 }, { "epoch": 0.11283747966439395, "grad_norm": 7.34027541996215, "learning_rate": 9.821206372132256e-06, "loss": 17.4873, "step": 6173 }, { "epoch": 0.11285575886084048, "grad_norm": 6.9034516263348475, "learning_rate": 9.821127912938406e-06, "loss": 17.8491, "step": 6174 }, { "epoch": 0.112874038057287, "grad_norm": 6.760558702840803, "learning_rate": 9.82104943684692e-06, "loss": 17.6755, "step": 6175 }, { "epoch": 0.11289231725373353, "grad_norm": 7.475051783595172, "learning_rate": 9.820970943858074e-06, "loss": 17.6946, "step": 6176 }, { "epoch": 0.11291059645018005, "grad_norm": 8.605565217503512, "learning_rate": 9.820892433972143e-06, "loss": 18.4892, "step": 6177 }, { "epoch": 0.11292887564662657, "grad_norm": 7.463849238422301, "learning_rate": 9.820813907189401e-06, "loss": 17.7594, "step": 6178 }, { "epoch": 0.1129471548430731, "grad_norm": 6.493951292443605, "learning_rate": 9.820735363510128e-06, "loss": 17.5372, "step": 6179 }, { "epoch": 0.11296543403951963, "grad_norm": 6.778429363469027, "learning_rate": 9.820656802934593e-06, "loss": 17.8115, "step": 6180 }, { "epoch": 0.11298371323596615, "grad_norm": 5.498516120360731, "learning_rate": 9.820578225463076e-06, "loss": 17.1009, "step": 6181 }, { "epoch": 0.11300199243241267, "grad_norm": 6.7522866139760644, "learning_rate": 9.820499631095847e-06, "loss": 17.7332, "step": 6182 }, { "epoch": 0.11302027162885919, "grad_norm": 7.128485909347858, "learning_rate": 9.82042101983319e-06, "loss": 17.7156, "step": 6183 }, { "epoch": 0.11303855082530571, "grad_norm": 6.56856444299014, "learning_rate": 9.820342391675373e-06, "loss": 17.7751, "step": 6184 }, { "epoch": 0.11305683002175225, "grad_norm": 6.936302922317391, "learning_rate": 9.820263746622674e-06, "loss": 17.6575, "step": 6185 }, { "epoch": 0.11307510921819877, "grad_norm": 7.840124363272216, "learning_rate": 9.82018508467537e-06, "loss": 18.32, "step": 6186 }, { "epoch": 0.1130933884146453, "grad_norm": 6.888341902062848, "learning_rate": 9.820106405833735e-06, "loss": 17.6309, "step": 6187 }, { "epoch": 0.11311166761109182, "grad_norm": 7.271992003510105, "learning_rate": 9.820027710098045e-06, "loss": 17.9614, "step": 6188 }, { "epoch": 0.11312994680753834, "grad_norm": 8.010739408017542, "learning_rate": 9.819948997468577e-06, "loss": 18.4273, "step": 6189 }, { "epoch": 0.11314822600398486, "grad_norm": 8.042282447923073, "learning_rate": 9.819870267945605e-06, "loss": 18.3378, "step": 6190 }, { "epoch": 0.1131665052004314, "grad_norm": 7.8011236944362325, "learning_rate": 9.819791521529408e-06, "loss": 18.1001, "step": 6191 }, { "epoch": 0.11318478439687792, "grad_norm": 7.728160979327711, "learning_rate": 9.819712758220257e-06, "loss": 17.9143, "step": 6192 }, { "epoch": 0.11320306359332444, "grad_norm": 6.325758801504882, "learning_rate": 9.819633978018432e-06, "loss": 17.5259, "step": 6193 }, { "epoch": 0.11322134278977096, "grad_norm": 5.32983254100174, "learning_rate": 9.81955518092421e-06, "loss": 17.1029, "step": 6194 }, { "epoch": 0.11323962198621748, "grad_norm": 6.033918154854671, "learning_rate": 9.819476366937863e-06, "loss": 17.5463, "step": 6195 }, { "epoch": 0.11325790118266402, "grad_norm": 8.331254585623563, "learning_rate": 9.81939753605967e-06, "loss": 18.2479, "step": 6196 }, { "epoch": 0.11327618037911054, "grad_norm": 8.115214118595839, "learning_rate": 9.819318688289907e-06, "loss": 18.2687, "step": 6197 }, { "epoch": 0.11329445957555706, "grad_norm": 7.051646875887396, "learning_rate": 9.819239823628852e-06, "loss": 17.8834, "step": 6198 }, { "epoch": 0.11331273877200358, "grad_norm": 9.367605477448407, "learning_rate": 9.819160942076778e-06, "loss": 18.6733, "step": 6199 }, { "epoch": 0.1133310179684501, "grad_norm": 9.731200169257715, "learning_rate": 9.819082043633963e-06, "loss": 18.1608, "step": 6200 }, { "epoch": 0.11334929716489663, "grad_norm": 7.864027832001035, "learning_rate": 9.819003128300684e-06, "loss": 18.2864, "step": 6201 }, { "epoch": 0.11336757636134316, "grad_norm": 7.5567191452454345, "learning_rate": 9.818924196077215e-06, "loss": 18.0862, "step": 6202 }, { "epoch": 0.11338585555778968, "grad_norm": 6.385528665166163, "learning_rate": 9.818845246963838e-06, "loss": 17.6081, "step": 6203 }, { "epoch": 0.1134041347542362, "grad_norm": 8.142955437200175, "learning_rate": 9.818766280960825e-06, "loss": 17.981, "step": 6204 }, { "epoch": 0.11342241395068273, "grad_norm": 6.624707244814315, "learning_rate": 9.818687298068454e-06, "loss": 17.707, "step": 6205 }, { "epoch": 0.11344069314712925, "grad_norm": 9.578494114196964, "learning_rate": 9.818608298287004e-06, "loss": 17.9611, "step": 6206 }, { "epoch": 0.11345897234357577, "grad_norm": 8.46016392350176, "learning_rate": 9.81852928161675e-06, "loss": 18.2633, "step": 6207 }, { "epoch": 0.1134772515400223, "grad_norm": 6.596661582164722, "learning_rate": 9.818450248057967e-06, "loss": 17.3136, "step": 6208 }, { "epoch": 0.11349553073646883, "grad_norm": 7.295401665588104, "learning_rate": 9.818371197610935e-06, "loss": 18.0474, "step": 6209 }, { "epoch": 0.11351380993291535, "grad_norm": 7.639919598354987, "learning_rate": 9.81829213027593e-06, "loss": 17.8791, "step": 6210 }, { "epoch": 0.11353208912936187, "grad_norm": 6.652570304730243, "learning_rate": 9.818213046053228e-06, "loss": 17.9262, "step": 6211 }, { "epoch": 0.11355036832580839, "grad_norm": 6.35751685274225, "learning_rate": 9.81813394494311e-06, "loss": 17.6467, "step": 6212 }, { "epoch": 0.11356864752225493, "grad_norm": 6.7906346431503195, "learning_rate": 9.818054826945848e-06, "loss": 17.6755, "step": 6213 }, { "epoch": 0.11358692671870145, "grad_norm": 6.576625503696223, "learning_rate": 9.817975692061723e-06, "loss": 17.6876, "step": 6214 }, { "epoch": 0.11360520591514797, "grad_norm": 7.090944854282477, "learning_rate": 9.817896540291013e-06, "loss": 17.7948, "step": 6215 }, { "epoch": 0.1136234851115945, "grad_norm": 6.508156125368024, "learning_rate": 9.817817371633992e-06, "loss": 17.5299, "step": 6216 }, { "epoch": 0.11364176430804102, "grad_norm": 7.270076954305761, "learning_rate": 9.81773818609094e-06, "loss": 17.8332, "step": 6217 }, { "epoch": 0.11366004350448754, "grad_norm": 6.599159646684854, "learning_rate": 9.817658983662132e-06, "loss": 17.6176, "step": 6218 }, { "epoch": 0.11367832270093407, "grad_norm": 7.421039349125596, "learning_rate": 9.817579764347849e-06, "loss": 17.7094, "step": 6219 }, { "epoch": 0.1136966018973806, "grad_norm": 7.875083752384193, "learning_rate": 9.817500528148365e-06, "loss": 18.0289, "step": 6220 }, { "epoch": 0.11371488109382712, "grad_norm": 7.348782010366022, "learning_rate": 9.817421275063962e-06, "loss": 17.9597, "step": 6221 }, { "epoch": 0.11373316029027364, "grad_norm": 6.3017741227720165, "learning_rate": 9.817342005094915e-06, "loss": 17.4832, "step": 6222 }, { "epoch": 0.11375143948672016, "grad_norm": 7.030648821542163, "learning_rate": 9.8172627182415e-06, "loss": 17.8352, "step": 6223 }, { "epoch": 0.11376971868316668, "grad_norm": 7.744853782477074, "learning_rate": 9.817183414504e-06, "loss": 18.175, "step": 6224 }, { "epoch": 0.11378799787961322, "grad_norm": 6.717189735431535, "learning_rate": 9.81710409388269e-06, "loss": 17.5284, "step": 6225 }, { "epoch": 0.11380627707605974, "grad_norm": 6.592936080943766, "learning_rate": 9.817024756377847e-06, "loss": 17.6753, "step": 6226 }, { "epoch": 0.11382455627250626, "grad_norm": 7.49766059181947, "learning_rate": 9.81694540198975e-06, "loss": 17.4403, "step": 6227 }, { "epoch": 0.11384283546895278, "grad_norm": 8.463039411504896, "learning_rate": 9.81686603071868e-06, "loss": 17.8722, "step": 6228 }, { "epoch": 0.1138611146653993, "grad_norm": 5.78079434548121, "learning_rate": 9.816786642564909e-06, "loss": 17.2611, "step": 6229 }, { "epoch": 0.11387939386184584, "grad_norm": 6.61962245135833, "learning_rate": 9.816707237528719e-06, "loss": 17.7266, "step": 6230 }, { "epoch": 0.11389767305829236, "grad_norm": 7.793304355250502, "learning_rate": 9.81662781561039e-06, "loss": 17.872, "step": 6231 }, { "epoch": 0.11391595225473888, "grad_norm": 6.104698234880663, "learning_rate": 9.816548376810199e-06, "loss": 17.2371, "step": 6232 }, { "epoch": 0.1139342314511854, "grad_norm": 7.53738109773018, "learning_rate": 9.816468921128422e-06, "loss": 17.5866, "step": 6233 }, { "epoch": 0.11395251064763193, "grad_norm": 7.47770185312477, "learning_rate": 9.81638944856534e-06, "loss": 18.0712, "step": 6234 }, { "epoch": 0.11397078984407845, "grad_norm": 9.253215609884657, "learning_rate": 9.816309959121231e-06, "loss": 18.1412, "step": 6235 }, { "epoch": 0.11398906904052498, "grad_norm": 6.569894728303271, "learning_rate": 9.816230452796373e-06, "loss": 17.4748, "step": 6236 }, { "epoch": 0.1140073482369715, "grad_norm": 9.180895486113846, "learning_rate": 9.816150929591046e-06, "loss": 17.4326, "step": 6237 }, { "epoch": 0.11402562743341803, "grad_norm": 7.557170943012073, "learning_rate": 9.816071389505529e-06, "loss": 17.9139, "step": 6238 }, { "epoch": 0.11404390662986455, "grad_norm": 7.487304070854714, "learning_rate": 9.815991832540098e-06, "loss": 17.8918, "step": 6239 }, { "epoch": 0.11406218582631107, "grad_norm": 7.379009782163196, "learning_rate": 9.815912258695034e-06, "loss": 17.9594, "step": 6240 }, { "epoch": 0.1140804650227576, "grad_norm": 6.995329675365152, "learning_rate": 9.815832667970615e-06, "loss": 17.6184, "step": 6241 }, { "epoch": 0.11409874421920413, "grad_norm": 10.00764842834887, "learning_rate": 9.815753060367122e-06, "loss": 18.0156, "step": 6242 }, { "epoch": 0.11411702341565065, "grad_norm": 6.87247120564153, "learning_rate": 9.815673435884831e-06, "loss": 17.6839, "step": 6243 }, { "epoch": 0.11413530261209717, "grad_norm": 6.830404337435096, "learning_rate": 9.815593794524022e-06, "loss": 18.0374, "step": 6244 }, { "epoch": 0.1141535818085437, "grad_norm": 7.639694761758586, "learning_rate": 9.815514136284977e-06, "loss": 17.8666, "step": 6245 }, { "epoch": 0.11417186100499022, "grad_norm": 6.708206899982387, "learning_rate": 9.81543446116797e-06, "loss": 17.6746, "step": 6246 }, { "epoch": 0.11419014020143675, "grad_norm": 6.541288388294831, "learning_rate": 9.815354769173284e-06, "loss": 17.2896, "step": 6247 }, { "epoch": 0.11420841939788327, "grad_norm": 6.76058205704926, "learning_rate": 9.815275060301198e-06, "loss": 17.5915, "step": 6248 }, { "epoch": 0.1142266985943298, "grad_norm": 7.19369988720146, "learning_rate": 9.81519533455199e-06, "loss": 17.7179, "step": 6249 }, { "epoch": 0.11424497779077632, "grad_norm": 7.1423211120451615, "learning_rate": 9.81511559192594e-06, "loss": 17.7816, "step": 6250 }, { "epoch": 0.11426325698722284, "grad_norm": 7.522866178135697, "learning_rate": 9.815035832423329e-06, "loss": 18.0128, "step": 6251 }, { "epoch": 0.11428153618366936, "grad_norm": 7.608876775665035, "learning_rate": 9.814956056044433e-06, "loss": 17.9453, "step": 6252 }, { "epoch": 0.1142998153801159, "grad_norm": 7.774338330810066, "learning_rate": 9.814876262789537e-06, "loss": 18.0482, "step": 6253 }, { "epoch": 0.11431809457656242, "grad_norm": 8.76759266751559, "learning_rate": 9.814796452658915e-06, "loss": 18.689, "step": 6254 }, { "epoch": 0.11433637377300894, "grad_norm": 6.949687430786227, "learning_rate": 9.81471662565285e-06, "loss": 17.6662, "step": 6255 }, { "epoch": 0.11435465296945546, "grad_norm": 7.502716402019781, "learning_rate": 9.814636781771621e-06, "loss": 17.7238, "step": 6256 }, { "epoch": 0.11437293216590198, "grad_norm": 7.893843327867272, "learning_rate": 9.814556921015509e-06, "loss": 18.1044, "step": 6257 }, { "epoch": 0.1143912113623485, "grad_norm": 8.017401807950714, "learning_rate": 9.814477043384791e-06, "loss": 17.8748, "step": 6258 }, { "epoch": 0.11440949055879504, "grad_norm": 6.854928440046626, "learning_rate": 9.814397148879751e-06, "loss": 17.5991, "step": 6259 }, { "epoch": 0.11442776975524156, "grad_norm": 10.515771082598214, "learning_rate": 9.814317237500664e-06, "loss": 18.2981, "step": 6260 }, { "epoch": 0.11444604895168808, "grad_norm": 6.037075326124429, "learning_rate": 9.814237309247814e-06, "loss": 17.3793, "step": 6261 }, { "epoch": 0.1144643281481346, "grad_norm": 6.462855231352157, "learning_rate": 9.81415736412148e-06, "loss": 17.5027, "step": 6262 }, { "epoch": 0.11448260734458113, "grad_norm": 6.400781963784588, "learning_rate": 9.814077402121943e-06, "loss": 17.5447, "step": 6263 }, { "epoch": 0.11450088654102766, "grad_norm": 7.292791950012517, "learning_rate": 9.813997423249482e-06, "loss": 17.9095, "step": 6264 }, { "epoch": 0.11451916573747419, "grad_norm": 7.009803397848102, "learning_rate": 9.813917427504378e-06, "loss": 17.8501, "step": 6265 }, { "epoch": 0.1145374449339207, "grad_norm": 6.570670431757007, "learning_rate": 9.813837414886909e-06, "loss": 17.7744, "step": 6266 }, { "epoch": 0.11455572413036723, "grad_norm": 8.574922859793212, "learning_rate": 9.81375738539736e-06, "loss": 17.9815, "step": 6267 }, { "epoch": 0.11457400332681375, "grad_norm": 6.7876422049688285, "learning_rate": 9.813677339036009e-06, "loss": 17.5424, "step": 6268 }, { "epoch": 0.11459228252326027, "grad_norm": 8.11468122302412, "learning_rate": 9.813597275803135e-06, "loss": 18.5315, "step": 6269 }, { "epoch": 0.11461056171970681, "grad_norm": 6.6613413290634895, "learning_rate": 9.813517195699022e-06, "loss": 17.5249, "step": 6270 }, { "epoch": 0.11462884091615333, "grad_norm": 6.895460372431868, "learning_rate": 9.813437098723948e-06, "loss": 17.5528, "step": 6271 }, { "epoch": 0.11464712011259985, "grad_norm": 6.422300246062773, "learning_rate": 9.813356984878196e-06, "loss": 17.1556, "step": 6272 }, { "epoch": 0.11466539930904637, "grad_norm": 7.886666955754807, "learning_rate": 9.813276854162043e-06, "loss": 18.1806, "step": 6273 }, { "epoch": 0.1146836785054929, "grad_norm": 7.4543683702574794, "learning_rate": 9.813196706575774e-06, "loss": 17.779, "step": 6274 }, { "epoch": 0.11470195770193942, "grad_norm": 7.156585439808601, "learning_rate": 9.813116542119666e-06, "loss": 17.9412, "step": 6275 }, { "epoch": 0.11472023689838595, "grad_norm": 9.138870548783235, "learning_rate": 9.813036360794007e-06, "loss": 18.5025, "step": 6276 }, { "epoch": 0.11473851609483247, "grad_norm": 6.259275145245058, "learning_rate": 9.81295616259907e-06, "loss": 17.3672, "step": 6277 }, { "epoch": 0.114756795291279, "grad_norm": 7.411431181384231, "learning_rate": 9.812875947535138e-06, "loss": 17.8309, "step": 6278 }, { "epoch": 0.11477507448772552, "grad_norm": 7.031345429104616, "learning_rate": 9.812795715602495e-06, "loss": 17.736, "step": 6279 }, { "epoch": 0.11479335368417204, "grad_norm": 6.921873970480513, "learning_rate": 9.812715466801422e-06, "loss": 17.3918, "step": 6280 }, { "epoch": 0.11481163288061857, "grad_norm": 7.009409342805002, "learning_rate": 9.812635201132197e-06, "loss": 17.6489, "step": 6281 }, { "epoch": 0.1148299120770651, "grad_norm": 6.468373114451255, "learning_rate": 9.812554918595103e-06, "loss": 17.5001, "step": 6282 }, { "epoch": 0.11484819127351162, "grad_norm": 6.844055770329313, "learning_rate": 9.812474619190422e-06, "loss": 17.5318, "step": 6283 }, { "epoch": 0.11486647046995814, "grad_norm": 5.93876635730014, "learning_rate": 9.812394302918436e-06, "loss": 17.2376, "step": 6284 }, { "epoch": 0.11488474966640466, "grad_norm": 7.332919688746114, "learning_rate": 9.812313969779426e-06, "loss": 17.7357, "step": 6285 }, { "epoch": 0.11490302886285118, "grad_norm": 7.43211164073885, "learning_rate": 9.812233619773673e-06, "loss": 17.6347, "step": 6286 }, { "epoch": 0.11492130805929772, "grad_norm": 6.855720150266296, "learning_rate": 9.812153252901457e-06, "loss": 17.539, "step": 6287 }, { "epoch": 0.11493958725574424, "grad_norm": 7.8882473645624565, "learning_rate": 9.812072869163063e-06, "loss": 17.6707, "step": 6288 }, { "epoch": 0.11495786645219076, "grad_norm": 9.086128872398263, "learning_rate": 9.811992468558769e-06, "loss": 18.2784, "step": 6289 }, { "epoch": 0.11497614564863728, "grad_norm": 8.208285616076992, "learning_rate": 9.811912051088861e-06, "loss": 18.4073, "step": 6290 }, { "epoch": 0.1149944248450838, "grad_norm": 9.03003262984523, "learning_rate": 9.811831616753618e-06, "loss": 18.3147, "step": 6291 }, { "epoch": 0.11501270404153033, "grad_norm": 7.348070894525367, "learning_rate": 9.811751165553322e-06, "loss": 17.6348, "step": 6292 }, { "epoch": 0.11503098323797686, "grad_norm": 6.825424625846023, "learning_rate": 9.811670697488258e-06, "loss": 17.5614, "step": 6293 }, { "epoch": 0.11504926243442339, "grad_norm": 7.763856822305459, "learning_rate": 9.811590212558704e-06, "loss": 17.9533, "step": 6294 }, { "epoch": 0.11506754163086991, "grad_norm": 6.819436018457505, "learning_rate": 9.811509710764945e-06, "loss": 17.4559, "step": 6295 }, { "epoch": 0.11508582082731643, "grad_norm": 6.698750109017797, "learning_rate": 9.81142919210726e-06, "loss": 17.5214, "step": 6296 }, { "epoch": 0.11510410002376295, "grad_norm": 7.438442475434122, "learning_rate": 9.811348656585936e-06, "loss": 18.0474, "step": 6297 }, { "epoch": 0.11512237922020949, "grad_norm": 7.1820486597397055, "learning_rate": 9.81126810420125e-06, "loss": 17.9241, "step": 6298 }, { "epoch": 0.11514065841665601, "grad_norm": 6.977489322993325, "learning_rate": 9.811187534953488e-06, "loss": 17.8234, "step": 6299 }, { "epoch": 0.11515893761310253, "grad_norm": 6.914916530668801, "learning_rate": 9.811106948842931e-06, "loss": 17.5373, "step": 6300 }, { "epoch": 0.11517721680954905, "grad_norm": 7.063726615733678, "learning_rate": 9.811026345869862e-06, "loss": 17.5611, "step": 6301 }, { "epoch": 0.11519549600599557, "grad_norm": 7.1751863227292905, "learning_rate": 9.810945726034563e-06, "loss": 17.7032, "step": 6302 }, { "epoch": 0.1152137752024421, "grad_norm": 6.682785236121788, "learning_rate": 9.810865089337316e-06, "loss": 17.662, "step": 6303 }, { "epoch": 0.11523205439888863, "grad_norm": 8.435816868510173, "learning_rate": 9.810784435778404e-06, "loss": 18.1695, "step": 6304 }, { "epoch": 0.11525033359533515, "grad_norm": 6.802208917897139, "learning_rate": 9.810703765358111e-06, "loss": 17.7794, "step": 6305 }, { "epoch": 0.11526861279178167, "grad_norm": 8.180548363814108, "learning_rate": 9.810623078076719e-06, "loss": 18.2576, "step": 6306 }, { "epoch": 0.1152868919882282, "grad_norm": 7.930981478452358, "learning_rate": 9.810542373934511e-06, "loss": 18.0729, "step": 6307 }, { "epoch": 0.11530517118467472, "grad_norm": 6.838572780336669, "learning_rate": 9.810461652931768e-06, "loss": 17.4724, "step": 6308 }, { "epoch": 0.11532345038112124, "grad_norm": 8.068676228106554, "learning_rate": 9.810380915068775e-06, "loss": 17.8672, "step": 6309 }, { "epoch": 0.11534172957756778, "grad_norm": 7.196540776256688, "learning_rate": 9.810300160345814e-06, "loss": 17.9999, "step": 6310 }, { "epoch": 0.1153600087740143, "grad_norm": 12.344777323849373, "learning_rate": 9.810219388763168e-06, "loss": 18.4425, "step": 6311 }, { "epoch": 0.11537828797046082, "grad_norm": 6.833460845059853, "learning_rate": 9.810138600321122e-06, "loss": 17.389, "step": 6312 }, { "epoch": 0.11539656716690734, "grad_norm": 7.2852103617293, "learning_rate": 9.810057795019956e-06, "loss": 17.7295, "step": 6313 }, { "epoch": 0.11541484636335386, "grad_norm": 7.220185693867019, "learning_rate": 9.809976972859956e-06, "loss": 18.0016, "step": 6314 }, { "epoch": 0.1154331255598004, "grad_norm": 7.8150757138486115, "learning_rate": 9.809896133841404e-06, "loss": 17.8048, "step": 6315 }, { "epoch": 0.11545140475624692, "grad_norm": 5.557317761478397, "learning_rate": 9.809815277964582e-06, "loss": 17.2288, "step": 6316 }, { "epoch": 0.11546968395269344, "grad_norm": 7.061668131990722, "learning_rate": 9.809734405229776e-06, "loss": 17.369, "step": 6317 }, { "epoch": 0.11548796314913996, "grad_norm": 7.244504281890309, "learning_rate": 9.809653515637268e-06, "loss": 18.0361, "step": 6318 }, { "epoch": 0.11550624234558649, "grad_norm": 8.180238124252234, "learning_rate": 9.809572609187341e-06, "loss": 18.1929, "step": 6319 }, { "epoch": 0.115524521542033, "grad_norm": 7.531069188380508, "learning_rate": 9.80949168588028e-06, "loss": 17.9694, "step": 6320 }, { "epoch": 0.11554280073847954, "grad_norm": 6.1840911491435095, "learning_rate": 9.809410745716367e-06, "loss": 17.4163, "step": 6321 }, { "epoch": 0.11556107993492606, "grad_norm": 5.799217051722377, "learning_rate": 9.80932978869589e-06, "loss": 17.3084, "step": 6322 }, { "epoch": 0.11557935913137259, "grad_norm": 6.4627063889616165, "learning_rate": 9.809248814819126e-06, "loss": 17.5635, "step": 6323 }, { "epoch": 0.11559763832781911, "grad_norm": 7.339022253426556, "learning_rate": 9.809167824086365e-06, "loss": 18.1535, "step": 6324 }, { "epoch": 0.11561591752426563, "grad_norm": 8.484544218942316, "learning_rate": 9.809086816497886e-06, "loss": 18.4038, "step": 6325 }, { "epoch": 0.11563419672071215, "grad_norm": 7.2863558889769875, "learning_rate": 9.809005792053976e-06, "loss": 17.6745, "step": 6326 }, { "epoch": 0.11565247591715869, "grad_norm": 8.3274098521248, "learning_rate": 9.808924750754918e-06, "loss": 17.3246, "step": 6327 }, { "epoch": 0.11567075511360521, "grad_norm": 6.559594966952365, "learning_rate": 9.808843692600995e-06, "loss": 17.5227, "step": 6328 }, { "epoch": 0.11568903431005173, "grad_norm": 6.639002728992262, "learning_rate": 9.808762617592494e-06, "loss": 17.4937, "step": 6329 }, { "epoch": 0.11570731350649825, "grad_norm": 6.373177086925029, "learning_rate": 9.808681525729696e-06, "loss": 17.3868, "step": 6330 }, { "epoch": 0.11572559270294477, "grad_norm": 9.208906527418707, "learning_rate": 9.808600417012886e-06, "loss": 19.1502, "step": 6331 }, { "epoch": 0.11574387189939131, "grad_norm": 6.392016892776988, "learning_rate": 9.80851929144235e-06, "loss": 17.2708, "step": 6332 }, { "epoch": 0.11576215109583783, "grad_norm": 9.832547492016513, "learning_rate": 9.80843814901837e-06, "loss": 18.5383, "step": 6333 }, { "epoch": 0.11578043029228435, "grad_norm": 7.692122147960202, "learning_rate": 9.808356989741231e-06, "loss": 18.2824, "step": 6334 }, { "epoch": 0.11579870948873087, "grad_norm": 7.55166145720147, "learning_rate": 9.80827581361122e-06, "loss": 17.9764, "step": 6335 }, { "epoch": 0.1158169886851774, "grad_norm": 6.364236199278281, "learning_rate": 9.808194620628619e-06, "loss": 17.2458, "step": 6336 }, { "epoch": 0.11583526788162392, "grad_norm": 6.98256771151916, "learning_rate": 9.808113410793713e-06, "loss": 17.7631, "step": 6337 }, { "epoch": 0.11585354707807045, "grad_norm": 8.25863248027287, "learning_rate": 9.808032184106786e-06, "loss": 18.33, "step": 6338 }, { "epoch": 0.11587182627451698, "grad_norm": 6.55601754729298, "learning_rate": 9.807950940568124e-06, "loss": 17.4528, "step": 6339 }, { "epoch": 0.1158901054709635, "grad_norm": 6.976318082824107, "learning_rate": 9.80786968017801e-06, "loss": 17.8263, "step": 6340 }, { "epoch": 0.11590838466741002, "grad_norm": 6.232620318070562, "learning_rate": 9.807788402936732e-06, "loss": 17.338, "step": 6341 }, { "epoch": 0.11592666386385654, "grad_norm": 6.2455593197647925, "learning_rate": 9.807707108844572e-06, "loss": 17.5911, "step": 6342 }, { "epoch": 0.11594494306030306, "grad_norm": 6.7502933548553745, "learning_rate": 9.807625797901817e-06, "loss": 17.7904, "step": 6343 }, { "epoch": 0.1159632222567496, "grad_norm": 7.082728421548852, "learning_rate": 9.807544470108748e-06, "loss": 17.7445, "step": 6344 }, { "epoch": 0.11598150145319612, "grad_norm": 6.19434001023653, "learning_rate": 9.807463125465655e-06, "loss": 17.1742, "step": 6345 }, { "epoch": 0.11599978064964264, "grad_norm": 6.948315780633028, "learning_rate": 9.80738176397282e-06, "loss": 17.83, "step": 6346 }, { "epoch": 0.11601805984608916, "grad_norm": 7.265773545397512, "learning_rate": 9.80730038563053e-06, "loss": 17.7742, "step": 6347 }, { "epoch": 0.11603633904253569, "grad_norm": 6.466850105613359, "learning_rate": 9.807218990439068e-06, "loss": 17.6626, "step": 6348 }, { "epoch": 0.11605461823898222, "grad_norm": 6.787913398772441, "learning_rate": 9.80713757839872e-06, "loss": 17.5217, "step": 6349 }, { "epoch": 0.11607289743542874, "grad_norm": 7.117216201535235, "learning_rate": 9.807056149509775e-06, "loss": 17.6632, "step": 6350 }, { "epoch": 0.11609117663187526, "grad_norm": 5.943020646342875, "learning_rate": 9.806974703772513e-06, "loss": 17.3552, "step": 6351 }, { "epoch": 0.11610945582832179, "grad_norm": 6.792120512788166, "learning_rate": 9.806893241187223e-06, "loss": 17.508, "step": 6352 }, { "epoch": 0.11612773502476831, "grad_norm": 7.289555594562179, "learning_rate": 9.806811761754188e-06, "loss": 17.7113, "step": 6353 }, { "epoch": 0.11614601422121483, "grad_norm": 7.061349160039012, "learning_rate": 9.806730265473694e-06, "loss": 17.7871, "step": 6354 }, { "epoch": 0.11616429341766137, "grad_norm": 7.775418162119797, "learning_rate": 9.806648752346029e-06, "loss": 18.329, "step": 6355 }, { "epoch": 0.11618257261410789, "grad_norm": 5.823594541185173, "learning_rate": 9.806567222371478e-06, "loss": 17.2493, "step": 6356 }, { "epoch": 0.11620085181055441, "grad_norm": 9.340370280309296, "learning_rate": 9.806485675550326e-06, "loss": 18.6586, "step": 6357 }, { "epoch": 0.11621913100700093, "grad_norm": 6.957343250007208, "learning_rate": 9.806404111882857e-06, "loss": 17.7079, "step": 6358 }, { "epoch": 0.11623741020344745, "grad_norm": 7.179960364024654, "learning_rate": 9.80632253136936e-06, "loss": 17.9265, "step": 6359 }, { "epoch": 0.11625568939989397, "grad_norm": 7.021714206245028, "learning_rate": 9.806240934010118e-06, "loss": 17.7393, "step": 6360 }, { "epoch": 0.11627396859634051, "grad_norm": 6.334167341151742, "learning_rate": 9.806159319805421e-06, "loss": 17.5247, "step": 6361 }, { "epoch": 0.11629224779278703, "grad_norm": 7.539256044216627, "learning_rate": 9.80607768875555e-06, "loss": 17.9543, "step": 6362 }, { "epoch": 0.11631052698923355, "grad_norm": 7.686195639813259, "learning_rate": 9.805996040860796e-06, "loss": 18.1302, "step": 6363 }, { "epoch": 0.11632880618568008, "grad_norm": 6.545812377155543, "learning_rate": 9.805914376121443e-06, "loss": 17.5107, "step": 6364 }, { "epoch": 0.1163470853821266, "grad_norm": 7.668924002686527, "learning_rate": 9.805832694537777e-06, "loss": 18.2149, "step": 6365 }, { "epoch": 0.11636536457857313, "grad_norm": 7.675284574438888, "learning_rate": 9.805750996110082e-06, "loss": 18.0789, "step": 6366 }, { "epoch": 0.11638364377501965, "grad_norm": 6.274208863681385, "learning_rate": 9.80566928083865e-06, "loss": 17.5136, "step": 6367 }, { "epoch": 0.11640192297146618, "grad_norm": 6.975382543720655, "learning_rate": 9.805587548723763e-06, "loss": 17.9814, "step": 6368 }, { "epoch": 0.1164202021679127, "grad_norm": 5.754036728902357, "learning_rate": 9.805505799765708e-06, "loss": 17.1506, "step": 6369 }, { "epoch": 0.11643848136435922, "grad_norm": 8.085855456180017, "learning_rate": 9.805424033964773e-06, "loss": 18.0149, "step": 6370 }, { "epoch": 0.11645676056080574, "grad_norm": 6.392223105722064, "learning_rate": 9.805342251321242e-06, "loss": 17.6478, "step": 6371 }, { "epoch": 0.11647503975725228, "grad_norm": 7.042051932028251, "learning_rate": 9.805260451835405e-06, "loss": 17.6313, "step": 6372 }, { "epoch": 0.1164933189536988, "grad_norm": 6.753130957874067, "learning_rate": 9.805178635507547e-06, "loss": 17.733, "step": 6373 }, { "epoch": 0.11651159815014532, "grad_norm": 6.277624375642367, "learning_rate": 9.805096802337954e-06, "loss": 17.4527, "step": 6374 }, { "epoch": 0.11652987734659184, "grad_norm": 8.559251219177527, "learning_rate": 9.805014952326915e-06, "loss": 18.4475, "step": 6375 }, { "epoch": 0.11654815654303836, "grad_norm": 6.427180880428312, "learning_rate": 9.804933085474715e-06, "loss": 17.4267, "step": 6376 }, { "epoch": 0.11656643573948489, "grad_norm": 6.625726581180322, "learning_rate": 9.804851201781641e-06, "loss": 17.6082, "step": 6377 }, { "epoch": 0.11658471493593142, "grad_norm": 6.868704675957353, "learning_rate": 9.80476930124798e-06, "loss": 17.6177, "step": 6378 }, { "epoch": 0.11660299413237794, "grad_norm": 7.023585021843053, "learning_rate": 9.804687383874021e-06, "loss": 17.7069, "step": 6379 }, { "epoch": 0.11662127332882447, "grad_norm": 8.013740996817095, "learning_rate": 9.80460544966005e-06, "loss": 18.3447, "step": 6380 }, { "epoch": 0.11663955252527099, "grad_norm": 7.301934200534333, "learning_rate": 9.804523498606351e-06, "loss": 18.204, "step": 6381 }, { "epoch": 0.11665783172171751, "grad_norm": 8.691675307504308, "learning_rate": 9.804441530713217e-06, "loss": 18.1091, "step": 6382 }, { "epoch": 0.11667611091816404, "grad_norm": 5.254428444675815, "learning_rate": 9.804359545980931e-06, "loss": 16.9172, "step": 6383 }, { "epoch": 0.11669439011461057, "grad_norm": 6.155877543797338, "learning_rate": 9.804277544409782e-06, "loss": 17.4798, "step": 6384 }, { "epoch": 0.11671266931105709, "grad_norm": 6.564385697941725, "learning_rate": 9.804195526000057e-06, "loss": 17.5002, "step": 6385 }, { "epoch": 0.11673094850750361, "grad_norm": 8.185972406187952, "learning_rate": 9.804113490752044e-06, "loss": 18.2783, "step": 6386 }, { "epoch": 0.11674922770395013, "grad_norm": 8.422075775427512, "learning_rate": 9.80403143866603e-06, "loss": 18.0337, "step": 6387 }, { "epoch": 0.11676750690039665, "grad_norm": 7.796217893917706, "learning_rate": 9.803949369742303e-06, "loss": 18.2966, "step": 6388 }, { "epoch": 0.11678578609684319, "grad_norm": 7.3098209433518875, "learning_rate": 9.803867283981149e-06, "loss": 17.8664, "step": 6389 }, { "epoch": 0.11680406529328971, "grad_norm": 7.785805337739391, "learning_rate": 9.803785181382858e-06, "loss": 17.8803, "step": 6390 }, { "epoch": 0.11682234448973623, "grad_norm": 7.111380704612183, "learning_rate": 9.803703061947716e-06, "loss": 17.5372, "step": 6391 }, { "epoch": 0.11684062368618275, "grad_norm": 6.453292618911451, "learning_rate": 9.803620925676011e-06, "loss": 17.5098, "step": 6392 }, { "epoch": 0.11685890288262928, "grad_norm": 8.43288871285481, "learning_rate": 9.803538772568034e-06, "loss": 18.4735, "step": 6393 }, { "epoch": 0.1168771820790758, "grad_norm": 7.025785259728636, "learning_rate": 9.803456602624069e-06, "loss": 17.8423, "step": 6394 }, { "epoch": 0.11689546127552233, "grad_norm": 7.078785596352402, "learning_rate": 9.803374415844406e-06, "loss": 17.6048, "step": 6395 }, { "epoch": 0.11691374047196885, "grad_norm": 7.191447585990892, "learning_rate": 9.803292212229332e-06, "loss": 17.6487, "step": 6396 }, { "epoch": 0.11693201966841538, "grad_norm": 6.977973314987887, "learning_rate": 9.803209991779134e-06, "loss": 17.8112, "step": 6397 }, { "epoch": 0.1169502988648619, "grad_norm": 6.160629282524603, "learning_rate": 9.803127754494105e-06, "loss": 17.5099, "step": 6398 }, { "epoch": 0.11696857806130842, "grad_norm": 6.456529053780158, "learning_rate": 9.803045500374528e-06, "loss": 17.5749, "step": 6399 }, { "epoch": 0.11698685725775496, "grad_norm": 6.625938420317166, "learning_rate": 9.802963229420694e-06, "loss": 17.5932, "step": 6400 }, { "epoch": 0.11700513645420148, "grad_norm": 8.576589550161236, "learning_rate": 9.802880941632891e-06, "loss": 18.4689, "step": 6401 }, { "epoch": 0.117023415650648, "grad_norm": 6.944979006020021, "learning_rate": 9.802798637011406e-06, "loss": 17.1432, "step": 6402 }, { "epoch": 0.11704169484709452, "grad_norm": 6.443120159768358, "learning_rate": 9.802716315556528e-06, "loss": 17.5015, "step": 6403 }, { "epoch": 0.11705997404354104, "grad_norm": 6.125188488621004, "learning_rate": 9.802633977268547e-06, "loss": 17.443, "step": 6404 }, { "epoch": 0.11707825323998756, "grad_norm": 9.200141359939387, "learning_rate": 9.80255162214775e-06, "loss": 18.4959, "step": 6405 }, { "epoch": 0.1170965324364341, "grad_norm": 8.204597681266769, "learning_rate": 9.802469250194429e-06, "loss": 18.3473, "step": 6406 }, { "epoch": 0.11711481163288062, "grad_norm": 6.374708048046521, "learning_rate": 9.802386861408868e-06, "loss": 17.453, "step": 6407 }, { "epoch": 0.11713309082932714, "grad_norm": 8.756956476021765, "learning_rate": 9.802304455791358e-06, "loss": 18.4878, "step": 6408 }, { "epoch": 0.11715137002577367, "grad_norm": 7.145826288671658, "learning_rate": 9.802222033342187e-06, "loss": 17.8972, "step": 6409 }, { "epoch": 0.11716964922222019, "grad_norm": 7.308038267239658, "learning_rate": 9.802139594061645e-06, "loss": 17.4195, "step": 6410 }, { "epoch": 0.11718792841866671, "grad_norm": 6.691694777386175, "learning_rate": 9.80205713795002e-06, "loss": 17.5466, "step": 6411 }, { "epoch": 0.11720620761511324, "grad_norm": 10.977301694384694, "learning_rate": 9.801974665007602e-06, "loss": 18.3623, "step": 6412 }, { "epoch": 0.11722448681155977, "grad_norm": 7.136042291072314, "learning_rate": 9.80189217523468e-06, "loss": 18.1256, "step": 6413 }, { "epoch": 0.11724276600800629, "grad_norm": 7.732945738852875, "learning_rate": 9.801809668631542e-06, "loss": 18.0761, "step": 6414 }, { "epoch": 0.11726104520445281, "grad_norm": 6.625258155185103, "learning_rate": 9.801727145198478e-06, "loss": 17.6355, "step": 6415 }, { "epoch": 0.11727932440089933, "grad_norm": 7.129781251303618, "learning_rate": 9.801644604935776e-06, "loss": 17.8709, "step": 6416 }, { "epoch": 0.11729760359734587, "grad_norm": 6.751901967124584, "learning_rate": 9.801562047843727e-06, "loss": 17.5563, "step": 6417 }, { "epoch": 0.11731588279379239, "grad_norm": 7.896663379369354, "learning_rate": 9.80147947392262e-06, "loss": 18.2147, "step": 6418 }, { "epoch": 0.11733416199023891, "grad_norm": 6.829852354286273, "learning_rate": 9.801396883172744e-06, "loss": 17.8405, "step": 6419 }, { "epoch": 0.11735244118668543, "grad_norm": 8.027455516082357, "learning_rate": 9.801314275594389e-06, "loss": 17.7207, "step": 6420 }, { "epoch": 0.11737072038313195, "grad_norm": 5.83849562073646, "learning_rate": 9.801231651187844e-06, "loss": 17.2513, "step": 6421 }, { "epoch": 0.11738899957957848, "grad_norm": 6.498057274825543, "learning_rate": 9.801149009953397e-06, "loss": 17.5261, "step": 6422 }, { "epoch": 0.11740727877602501, "grad_norm": 6.481551183585061, "learning_rate": 9.801066351891341e-06, "loss": 17.5379, "step": 6423 }, { "epoch": 0.11742555797247153, "grad_norm": 8.404955598684786, "learning_rate": 9.800983677001962e-06, "loss": 17.6598, "step": 6424 }, { "epoch": 0.11744383716891806, "grad_norm": 8.42212805213432, "learning_rate": 9.800900985285554e-06, "loss": 18.046, "step": 6425 }, { "epoch": 0.11746211636536458, "grad_norm": 7.176008702550471, "learning_rate": 9.800818276742405e-06, "loss": 17.7202, "step": 6426 }, { "epoch": 0.1174803955618111, "grad_norm": 8.417184757751102, "learning_rate": 9.800735551372804e-06, "loss": 18.2777, "step": 6427 }, { "epoch": 0.11749867475825762, "grad_norm": 6.548088527082842, "learning_rate": 9.80065280917704e-06, "loss": 17.5299, "step": 6428 }, { "epoch": 0.11751695395470416, "grad_norm": 10.397837457025595, "learning_rate": 9.800570050155406e-06, "loss": 18.4056, "step": 6429 }, { "epoch": 0.11753523315115068, "grad_norm": 6.12273016604697, "learning_rate": 9.800487274308191e-06, "loss": 17.3399, "step": 6430 }, { "epoch": 0.1175535123475972, "grad_norm": 7.357488772562387, "learning_rate": 9.800404481635683e-06, "loss": 18.1048, "step": 6431 }, { "epoch": 0.11757179154404372, "grad_norm": 7.470281827265112, "learning_rate": 9.800321672138176e-06, "loss": 17.644, "step": 6432 }, { "epoch": 0.11759007074049024, "grad_norm": 7.999336679854122, "learning_rate": 9.800238845815956e-06, "loss": 18.5439, "step": 6433 }, { "epoch": 0.11760834993693678, "grad_norm": 8.806640978534022, "learning_rate": 9.800156002669317e-06, "loss": 18.4895, "step": 6434 }, { "epoch": 0.1176266291333833, "grad_norm": 7.52573875475446, "learning_rate": 9.800073142698545e-06, "loss": 18.1755, "step": 6435 }, { "epoch": 0.11764490832982982, "grad_norm": 6.675257678545468, "learning_rate": 9.799990265903936e-06, "loss": 17.564, "step": 6436 }, { "epoch": 0.11766318752627634, "grad_norm": 7.144905545526197, "learning_rate": 9.799907372285778e-06, "loss": 17.9521, "step": 6437 }, { "epoch": 0.11768146672272287, "grad_norm": 6.362649613459145, "learning_rate": 9.799824461844358e-06, "loss": 17.2563, "step": 6438 }, { "epoch": 0.11769974591916939, "grad_norm": 6.904921285489309, "learning_rate": 9.799741534579972e-06, "loss": 17.6227, "step": 6439 }, { "epoch": 0.11771802511561592, "grad_norm": 7.443050796729453, "learning_rate": 9.799658590492909e-06, "loss": 17.8672, "step": 6440 }, { "epoch": 0.11773630431206245, "grad_norm": 6.956501862183829, "learning_rate": 9.799575629583457e-06, "loss": 17.602, "step": 6441 }, { "epoch": 0.11775458350850897, "grad_norm": 7.315264576892508, "learning_rate": 9.79949265185191e-06, "loss": 17.796, "step": 6442 }, { "epoch": 0.11777286270495549, "grad_norm": 7.571036138757203, "learning_rate": 9.799409657298559e-06, "loss": 18.1444, "step": 6443 }, { "epoch": 0.11779114190140201, "grad_norm": 6.906011070661719, "learning_rate": 9.799326645923692e-06, "loss": 17.7722, "step": 6444 }, { "epoch": 0.11780942109784853, "grad_norm": 6.901236194432724, "learning_rate": 9.799243617727603e-06, "loss": 17.7963, "step": 6445 }, { "epoch": 0.11782770029429507, "grad_norm": 8.612221112737895, "learning_rate": 9.79916057271058e-06, "loss": 18.0595, "step": 6446 }, { "epoch": 0.11784597949074159, "grad_norm": 8.554722330225086, "learning_rate": 9.799077510872916e-06, "loss": 18.0822, "step": 6447 }, { "epoch": 0.11786425868718811, "grad_norm": 6.5271612616553, "learning_rate": 9.798994432214901e-06, "loss": 17.4995, "step": 6448 }, { "epoch": 0.11788253788363463, "grad_norm": 6.553220588831177, "learning_rate": 9.798911336736829e-06, "loss": 17.3902, "step": 6449 }, { "epoch": 0.11790081708008116, "grad_norm": 7.953352621405939, "learning_rate": 9.79882822443899e-06, "loss": 18.1591, "step": 6450 }, { "epoch": 0.11791909627652769, "grad_norm": 7.450150154473341, "learning_rate": 9.79874509532167e-06, "loss": 17.8105, "step": 6451 }, { "epoch": 0.11793737547297421, "grad_norm": 7.344572475847884, "learning_rate": 9.79866194938517e-06, "loss": 17.9767, "step": 6452 }, { "epoch": 0.11795565466942073, "grad_norm": 8.533770454549908, "learning_rate": 9.798578786629774e-06, "loss": 18.6686, "step": 6453 }, { "epoch": 0.11797393386586726, "grad_norm": 6.325517074232184, "learning_rate": 9.798495607055773e-06, "loss": 17.2783, "step": 6454 }, { "epoch": 0.11799221306231378, "grad_norm": 8.253855300487562, "learning_rate": 9.798412410663466e-06, "loss": 18.3235, "step": 6455 }, { "epoch": 0.1180104922587603, "grad_norm": 8.596405157604142, "learning_rate": 9.798329197453136e-06, "loss": 18.1615, "step": 6456 }, { "epoch": 0.11802877145520684, "grad_norm": 5.917307156883019, "learning_rate": 9.798245967425081e-06, "loss": 17.3634, "step": 6457 }, { "epoch": 0.11804705065165336, "grad_norm": 8.188917822781782, "learning_rate": 9.79816272057959e-06, "loss": 18.2016, "step": 6458 }, { "epoch": 0.11806532984809988, "grad_norm": 6.486414674149526, "learning_rate": 9.798079456916954e-06, "loss": 17.3954, "step": 6459 }, { "epoch": 0.1180836090445464, "grad_norm": 7.555090203044301, "learning_rate": 9.797996176437467e-06, "loss": 17.7253, "step": 6460 }, { "epoch": 0.11810188824099292, "grad_norm": 8.175442463062188, "learning_rate": 9.79791287914142e-06, "loss": 17.8376, "step": 6461 }, { "epoch": 0.11812016743743944, "grad_norm": 6.995608011331642, "learning_rate": 9.797829565029103e-06, "loss": 17.6152, "step": 6462 }, { "epoch": 0.11813844663388598, "grad_norm": 6.1874870425570165, "learning_rate": 9.797746234100811e-06, "loss": 17.4161, "step": 6463 }, { "epoch": 0.1181567258303325, "grad_norm": 7.507360658716251, "learning_rate": 9.797662886356833e-06, "loss": 17.953, "step": 6464 }, { "epoch": 0.11817500502677902, "grad_norm": 6.90773495193432, "learning_rate": 9.797579521797466e-06, "loss": 17.7612, "step": 6465 }, { "epoch": 0.11819328422322554, "grad_norm": 6.636870108478468, "learning_rate": 9.797496140422997e-06, "loss": 17.5357, "step": 6466 }, { "epoch": 0.11821156341967207, "grad_norm": 8.033725427160187, "learning_rate": 9.797412742233721e-06, "loss": 17.9367, "step": 6467 }, { "epoch": 0.1182298426161186, "grad_norm": 7.471387238579187, "learning_rate": 9.79732932722993e-06, "loss": 18.1184, "step": 6468 }, { "epoch": 0.11824812181256512, "grad_norm": 6.63298260552759, "learning_rate": 9.797245895411915e-06, "loss": 17.6509, "step": 6469 }, { "epoch": 0.11826640100901165, "grad_norm": 7.265387902671119, "learning_rate": 9.797162446779969e-06, "loss": 17.6117, "step": 6470 }, { "epoch": 0.11828468020545817, "grad_norm": 7.474828687299429, "learning_rate": 9.797078981334386e-06, "loss": 18.049, "step": 6471 }, { "epoch": 0.11830295940190469, "grad_norm": 6.423152761167049, "learning_rate": 9.796995499075457e-06, "loss": 17.5033, "step": 6472 }, { "epoch": 0.11832123859835121, "grad_norm": 6.747138888401617, "learning_rate": 9.796912000003475e-06, "loss": 17.7517, "step": 6473 }, { "epoch": 0.11833951779479775, "grad_norm": 7.160861374191156, "learning_rate": 9.796828484118734e-06, "loss": 17.9269, "step": 6474 }, { "epoch": 0.11835779699124427, "grad_norm": 6.613910339644413, "learning_rate": 9.796744951421524e-06, "loss": 17.4928, "step": 6475 }, { "epoch": 0.11837607618769079, "grad_norm": 8.225071522638741, "learning_rate": 9.796661401912138e-06, "loss": 18.0207, "step": 6476 }, { "epoch": 0.11839435538413731, "grad_norm": 7.226856116001023, "learning_rate": 9.796577835590873e-06, "loss": 17.8802, "step": 6477 }, { "epoch": 0.11841263458058383, "grad_norm": 6.713185030496041, "learning_rate": 9.796494252458018e-06, "loss": 17.7731, "step": 6478 }, { "epoch": 0.11843091377703036, "grad_norm": 7.02622367697721, "learning_rate": 9.796410652513866e-06, "loss": 18.0381, "step": 6479 }, { "epoch": 0.11844919297347689, "grad_norm": 7.305191679821685, "learning_rate": 9.79632703575871e-06, "loss": 17.9136, "step": 6480 }, { "epoch": 0.11846747216992341, "grad_norm": 7.283521586820067, "learning_rate": 9.796243402192845e-06, "loss": 17.5094, "step": 6481 }, { "epoch": 0.11848575136636993, "grad_norm": 7.068732044920499, "learning_rate": 9.796159751816563e-06, "loss": 18.1282, "step": 6482 }, { "epoch": 0.11850403056281646, "grad_norm": 8.194073750688434, "learning_rate": 9.796076084630157e-06, "loss": 17.8692, "step": 6483 }, { "epoch": 0.11852230975926298, "grad_norm": 6.827089857509455, "learning_rate": 9.795992400633923e-06, "loss": 17.8544, "step": 6484 }, { "epoch": 0.11854058895570951, "grad_norm": 7.3389057459812, "learning_rate": 9.79590869982815e-06, "loss": 17.486, "step": 6485 }, { "epoch": 0.11855886815215604, "grad_norm": 6.334095457705487, "learning_rate": 9.795824982213132e-06, "loss": 17.6059, "step": 6486 }, { "epoch": 0.11857714734860256, "grad_norm": 5.770866119526604, "learning_rate": 9.795741247789164e-06, "loss": 17.2927, "step": 6487 }, { "epoch": 0.11859542654504908, "grad_norm": 6.7825318697015105, "learning_rate": 9.79565749655654e-06, "loss": 17.5687, "step": 6488 }, { "epoch": 0.1186137057414956, "grad_norm": 7.812693039624006, "learning_rate": 9.795573728515553e-06, "loss": 18.2304, "step": 6489 }, { "epoch": 0.11863198493794212, "grad_norm": 6.52843103040547, "learning_rate": 9.795489943666494e-06, "loss": 17.6061, "step": 6490 }, { "epoch": 0.11865026413438866, "grad_norm": 6.42790272523629, "learning_rate": 9.79540614200966e-06, "loss": 17.6545, "step": 6491 }, { "epoch": 0.11866854333083518, "grad_norm": 7.890960366904658, "learning_rate": 9.795322323545345e-06, "loss": 17.8171, "step": 6492 }, { "epoch": 0.1186868225272817, "grad_norm": 8.097096802324884, "learning_rate": 9.795238488273841e-06, "loss": 18.2555, "step": 6493 }, { "epoch": 0.11870510172372822, "grad_norm": 8.405031935416428, "learning_rate": 9.79515463619544e-06, "loss": 18.142, "step": 6494 }, { "epoch": 0.11872338092017475, "grad_norm": 6.724443918721763, "learning_rate": 9.79507076731044e-06, "loss": 17.6722, "step": 6495 }, { "epoch": 0.11874166011662127, "grad_norm": 7.80273706210259, "learning_rate": 9.794986881619132e-06, "loss": 18.2217, "step": 6496 }, { "epoch": 0.1187599393130678, "grad_norm": 6.818244420135292, "learning_rate": 9.794902979121813e-06, "loss": 17.4039, "step": 6497 }, { "epoch": 0.11877821850951432, "grad_norm": 7.656089440816266, "learning_rate": 9.794819059818775e-06, "loss": 17.6009, "step": 6498 }, { "epoch": 0.11879649770596085, "grad_norm": 7.1953928996191046, "learning_rate": 9.794735123710311e-06, "loss": 17.8115, "step": 6499 }, { "epoch": 0.11881477690240737, "grad_norm": 7.191384965477584, "learning_rate": 9.794651170796717e-06, "loss": 17.7287, "step": 6500 }, { "epoch": 0.11883305609885389, "grad_norm": 5.405317409410683, "learning_rate": 9.794567201078284e-06, "loss": 16.9769, "step": 6501 }, { "epoch": 0.11885133529530043, "grad_norm": 5.916881991659906, "learning_rate": 9.794483214555313e-06, "loss": 17.3112, "step": 6502 }, { "epoch": 0.11886961449174695, "grad_norm": 8.955135080248823, "learning_rate": 9.794399211228092e-06, "loss": 18.3316, "step": 6503 }, { "epoch": 0.11888789368819347, "grad_norm": 7.922817280931787, "learning_rate": 9.79431519109692e-06, "loss": 18.2057, "step": 6504 }, { "epoch": 0.11890617288463999, "grad_norm": 7.3757294383020975, "learning_rate": 9.794231154162087e-06, "loss": 17.7756, "step": 6505 }, { "epoch": 0.11892445208108651, "grad_norm": 6.700722597819988, "learning_rate": 9.79414710042389e-06, "loss": 17.5947, "step": 6506 }, { "epoch": 0.11894273127753303, "grad_norm": 8.167591367137772, "learning_rate": 9.794063029882625e-06, "loss": 18.071, "step": 6507 }, { "epoch": 0.11896101047397957, "grad_norm": 7.338127437379368, "learning_rate": 9.793978942538583e-06, "loss": 17.724, "step": 6508 }, { "epoch": 0.11897928967042609, "grad_norm": 7.703296730988331, "learning_rate": 9.793894838392062e-06, "loss": 17.9528, "step": 6509 }, { "epoch": 0.11899756886687261, "grad_norm": 6.490671664928666, "learning_rate": 9.793810717443356e-06, "loss": 17.4084, "step": 6510 }, { "epoch": 0.11901584806331914, "grad_norm": 10.372900084340094, "learning_rate": 9.79372657969276e-06, "loss": 19.1718, "step": 6511 }, { "epoch": 0.11903412725976566, "grad_norm": 9.095616702660106, "learning_rate": 9.793642425140567e-06, "loss": 18.216, "step": 6512 }, { "epoch": 0.11905240645621218, "grad_norm": 9.791638205380364, "learning_rate": 9.793558253787072e-06, "loss": 19.3034, "step": 6513 }, { "epoch": 0.11907068565265871, "grad_norm": 6.938984583399672, "learning_rate": 9.793474065632574e-06, "loss": 17.6442, "step": 6514 }, { "epoch": 0.11908896484910524, "grad_norm": 6.617292142701157, "learning_rate": 9.793389860677364e-06, "loss": 17.734, "step": 6515 }, { "epoch": 0.11910724404555176, "grad_norm": 8.419519258940735, "learning_rate": 9.793305638921738e-06, "loss": 18.0048, "step": 6516 }, { "epoch": 0.11912552324199828, "grad_norm": 7.748242283614979, "learning_rate": 9.793221400365993e-06, "loss": 17.7052, "step": 6517 }, { "epoch": 0.1191438024384448, "grad_norm": 8.192685354082709, "learning_rate": 9.793137145010423e-06, "loss": 18.4234, "step": 6518 }, { "epoch": 0.11916208163489134, "grad_norm": 6.9733892634945995, "learning_rate": 9.793052872855322e-06, "loss": 17.5065, "step": 6519 }, { "epoch": 0.11918036083133786, "grad_norm": 7.444446036851159, "learning_rate": 9.792968583900988e-06, "loss": 17.8992, "step": 6520 }, { "epoch": 0.11919864002778438, "grad_norm": 6.807499423338599, "learning_rate": 9.792884278147714e-06, "loss": 17.6398, "step": 6521 }, { "epoch": 0.1192169192242309, "grad_norm": 6.096419198087533, "learning_rate": 9.792799955595796e-06, "loss": 17.264, "step": 6522 }, { "epoch": 0.11923519842067742, "grad_norm": 6.42210350027216, "learning_rate": 9.792715616245532e-06, "loss": 17.5809, "step": 6523 }, { "epoch": 0.11925347761712395, "grad_norm": 7.436633208657352, "learning_rate": 9.792631260097214e-06, "loss": 17.7595, "step": 6524 }, { "epoch": 0.11927175681357048, "grad_norm": 9.280548299826297, "learning_rate": 9.79254688715114e-06, "loss": 18.5013, "step": 6525 }, { "epoch": 0.119290036010017, "grad_norm": 8.279025851654675, "learning_rate": 9.792462497407604e-06, "loss": 17.7648, "step": 6526 }, { "epoch": 0.11930831520646352, "grad_norm": 8.371288874130814, "learning_rate": 9.792378090866904e-06, "loss": 18.0224, "step": 6527 }, { "epoch": 0.11932659440291005, "grad_norm": 6.9459894384664755, "learning_rate": 9.792293667529334e-06, "loss": 17.8487, "step": 6528 }, { "epoch": 0.11934487359935657, "grad_norm": 5.985626661647108, "learning_rate": 9.79220922739519e-06, "loss": 17.2801, "step": 6529 }, { "epoch": 0.11936315279580309, "grad_norm": 7.520338406965312, "learning_rate": 9.79212477046477e-06, "loss": 17.7614, "step": 6530 }, { "epoch": 0.11938143199224963, "grad_norm": 8.01768301805792, "learning_rate": 9.792040296738367e-06, "loss": 18.0922, "step": 6531 }, { "epoch": 0.11939971118869615, "grad_norm": 6.270714170837754, "learning_rate": 9.79195580621628e-06, "loss": 17.7132, "step": 6532 }, { "epoch": 0.11941799038514267, "grad_norm": 7.937258621373097, "learning_rate": 9.791871298898804e-06, "loss": 18.1272, "step": 6533 }, { "epoch": 0.11943626958158919, "grad_norm": 8.66782154213655, "learning_rate": 9.791786774786234e-06, "loss": 18.029, "step": 6534 }, { "epoch": 0.11945454877803571, "grad_norm": 6.578285988582351, "learning_rate": 9.791702233878867e-06, "loss": 17.6913, "step": 6535 }, { "epoch": 0.11947282797448225, "grad_norm": 6.336874696532593, "learning_rate": 9.791617676176999e-06, "loss": 17.4965, "step": 6536 }, { "epoch": 0.11949110717092877, "grad_norm": 7.599347049717939, "learning_rate": 9.791533101680928e-06, "loss": 18.0085, "step": 6537 }, { "epoch": 0.11950938636737529, "grad_norm": 5.948878988910011, "learning_rate": 9.791448510390948e-06, "loss": 17.3512, "step": 6538 }, { "epoch": 0.11952766556382181, "grad_norm": 6.822620871610262, "learning_rate": 9.791363902307357e-06, "loss": 17.5646, "step": 6539 }, { "epoch": 0.11954594476026834, "grad_norm": 8.442392421658504, "learning_rate": 9.791279277430453e-06, "loss": 18.2527, "step": 6540 }, { "epoch": 0.11956422395671486, "grad_norm": 7.432941474388194, "learning_rate": 9.79119463576053e-06, "loss": 17.8236, "step": 6541 }, { "epoch": 0.11958250315316139, "grad_norm": 8.100810925325277, "learning_rate": 9.791109977297886e-06, "loss": 18.1798, "step": 6542 }, { "epoch": 0.11960078234960791, "grad_norm": 6.695797003240366, "learning_rate": 9.791025302042816e-06, "loss": 17.7148, "step": 6543 }, { "epoch": 0.11961906154605444, "grad_norm": 7.120505481509592, "learning_rate": 9.790940609995618e-06, "loss": 17.9138, "step": 6544 }, { "epoch": 0.11963734074250096, "grad_norm": 7.296967713881201, "learning_rate": 9.79085590115659e-06, "loss": 17.8762, "step": 6545 }, { "epoch": 0.11965561993894748, "grad_norm": 7.328045571999824, "learning_rate": 9.790771175526028e-06, "loss": 17.5075, "step": 6546 }, { "epoch": 0.119673899135394, "grad_norm": 6.998914971455784, "learning_rate": 9.790686433104229e-06, "loss": 17.4713, "step": 6547 }, { "epoch": 0.11969217833184054, "grad_norm": 8.160249802553942, "learning_rate": 9.790601673891488e-06, "loss": 18.2393, "step": 6548 }, { "epoch": 0.11971045752828706, "grad_norm": 6.258968282972264, "learning_rate": 9.790516897888105e-06, "loss": 17.227, "step": 6549 }, { "epoch": 0.11972873672473358, "grad_norm": 6.358986838184495, "learning_rate": 9.790432105094376e-06, "loss": 17.3255, "step": 6550 }, { "epoch": 0.1197470159211801, "grad_norm": 6.693172918968336, "learning_rate": 9.790347295510597e-06, "loss": 17.4555, "step": 6551 }, { "epoch": 0.11976529511762662, "grad_norm": 6.735622330137925, "learning_rate": 9.790262469137068e-06, "loss": 17.8505, "step": 6552 }, { "epoch": 0.11978357431407316, "grad_norm": 7.432447853508037, "learning_rate": 9.790177625974084e-06, "loss": 17.8742, "step": 6553 }, { "epoch": 0.11980185351051968, "grad_norm": 7.732242969332778, "learning_rate": 9.790092766021943e-06, "loss": 18.0215, "step": 6554 }, { "epoch": 0.1198201327069662, "grad_norm": 7.625990731517096, "learning_rate": 9.790007889280942e-06, "loss": 18.2303, "step": 6555 }, { "epoch": 0.11983841190341273, "grad_norm": 7.136837631975101, "learning_rate": 9.78992299575138e-06, "loss": 18.2816, "step": 6556 }, { "epoch": 0.11985669109985925, "grad_norm": 8.04377617285793, "learning_rate": 9.789838085433554e-06, "loss": 18.6148, "step": 6557 }, { "epoch": 0.11987497029630577, "grad_norm": 7.731295894701705, "learning_rate": 9.78975315832776e-06, "loss": 18.208, "step": 6558 }, { "epoch": 0.1198932494927523, "grad_norm": 7.052510981025374, "learning_rate": 9.789668214434296e-06, "loss": 17.6294, "step": 6559 }, { "epoch": 0.11991152868919883, "grad_norm": 8.321861239998073, "learning_rate": 9.789583253753463e-06, "loss": 17.7319, "step": 6560 }, { "epoch": 0.11992980788564535, "grad_norm": 6.498838604629319, "learning_rate": 9.789498276285554e-06, "loss": 17.6915, "step": 6561 }, { "epoch": 0.11994808708209187, "grad_norm": 7.443228388632784, "learning_rate": 9.78941328203087e-06, "loss": 17.7216, "step": 6562 }, { "epoch": 0.11996636627853839, "grad_norm": 6.529059873526048, "learning_rate": 9.789328270989709e-06, "loss": 17.5777, "step": 6563 }, { "epoch": 0.11998464547498491, "grad_norm": 6.723451701396521, "learning_rate": 9.789243243162368e-06, "loss": 17.6907, "step": 6564 }, { "epoch": 0.12000292467143145, "grad_norm": 6.8791003187608855, "learning_rate": 9.789158198549142e-06, "loss": 17.7716, "step": 6565 }, { "epoch": 0.12002120386787797, "grad_norm": 7.197813593107489, "learning_rate": 9.789073137150335e-06, "loss": 18.0089, "step": 6566 }, { "epoch": 0.12003948306432449, "grad_norm": 7.657156468405905, "learning_rate": 9.788988058966242e-06, "loss": 17.79, "step": 6567 }, { "epoch": 0.12005776226077101, "grad_norm": 7.010305958903088, "learning_rate": 9.788902963997161e-06, "loss": 17.4947, "step": 6568 }, { "epoch": 0.12007604145721754, "grad_norm": 7.652939229137941, "learning_rate": 9.78881785224339e-06, "loss": 18.1834, "step": 6569 }, { "epoch": 0.12009432065366407, "grad_norm": 7.3832717808589905, "learning_rate": 9.78873272370523e-06, "loss": 17.7098, "step": 6570 }, { "epoch": 0.1201125998501106, "grad_norm": 6.797808442800341, "learning_rate": 9.788647578382975e-06, "loss": 17.3152, "step": 6571 }, { "epoch": 0.12013087904655712, "grad_norm": 9.297017300811333, "learning_rate": 9.788562416276928e-06, "loss": 18.6175, "step": 6572 }, { "epoch": 0.12014915824300364, "grad_norm": 7.311421378016831, "learning_rate": 9.788477237387384e-06, "loss": 17.615, "step": 6573 }, { "epoch": 0.12016743743945016, "grad_norm": 6.93790451572851, "learning_rate": 9.788392041714642e-06, "loss": 17.9713, "step": 6574 }, { "epoch": 0.12018571663589668, "grad_norm": 7.290709844297043, "learning_rate": 9.788306829259002e-06, "loss": 17.7142, "step": 6575 }, { "epoch": 0.12020399583234322, "grad_norm": 6.99043375359672, "learning_rate": 9.788221600020763e-06, "loss": 17.4934, "step": 6576 }, { "epoch": 0.12022227502878974, "grad_norm": 8.057731335287574, "learning_rate": 9.788136354000221e-06, "loss": 18.308, "step": 6577 }, { "epoch": 0.12024055422523626, "grad_norm": 9.220128603302165, "learning_rate": 9.788051091197679e-06, "loss": 18.803, "step": 6578 }, { "epoch": 0.12025883342168278, "grad_norm": 7.180361320473192, "learning_rate": 9.78796581161343e-06, "loss": 17.7055, "step": 6579 }, { "epoch": 0.1202771126181293, "grad_norm": 7.202360795240422, "learning_rate": 9.78788051524778e-06, "loss": 17.6053, "step": 6580 }, { "epoch": 0.12029539181457582, "grad_norm": 7.949602049517095, "learning_rate": 9.787795202101022e-06, "loss": 18.6818, "step": 6581 }, { "epoch": 0.12031367101102236, "grad_norm": 7.290520032674815, "learning_rate": 9.787709872173459e-06, "loss": 17.8462, "step": 6582 }, { "epoch": 0.12033195020746888, "grad_norm": 7.486266145220681, "learning_rate": 9.787624525465386e-06, "loss": 17.8674, "step": 6583 }, { "epoch": 0.1203502294039154, "grad_norm": 8.300157535115929, "learning_rate": 9.787539161977107e-06, "loss": 18.4809, "step": 6584 }, { "epoch": 0.12036850860036193, "grad_norm": 7.249217641057837, "learning_rate": 9.787453781708918e-06, "loss": 17.7896, "step": 6585 }, { "epoch": 0.12038678779680845, "grad_norm": 8.136674193153222, "learning_rate": 9.787368384661117e-06, "loss": 18.0296, "step": 6586 }, { "epoch": 0.12040506699325498, "grad_norm": 7.170638263949791, "learning_rate": 9.787282970834008e-06, "loss": 17.782, "step": 6587 }, { "epoch": 0.1204233461897015, "grad_norm": 7.385747835699894, "learning_rate": 9.787197540227887e-06, "loss": 17.8215, "step": 6588 }, { "epoch": 0.12044162538614803, "grad_norm": 7.774225347474626, "learning_rate": 9.787112092843052e-06, "loss": 18.4174, "step": 6589 }, { "epoch": 0.12045990458259455, "grad_norm": 6.440299019895617, "learning_rate": 9.787026628679806e-06, "loss": 17.5166, "step": 6590 }, { "epoch": 0.12047818377904107, "grad_norm": 6.0228606643020495, "learning_rate": 9.786941147738446e-06, "loss": 17.2161, "step": 6591 }, { "epoch": 0.12049646297548759, "grad_norm": 8.424455245304133, "learning_rate": 9.786855650019275e-06, "loss": 18.4105, "step": 6592 }, { "epoch": 0.12051474217193413, "grad_norm": 6.116248050842922, "learning_rate": 9.78677013552259e-06, "loss": 17.1726, "step": 6593 }, { "epoch": 0.12053302136838065, "grad_norm": 7.7242918289890925, "learning_rate": 9.786684604248688e-06, "loss": 18.0852, "step": 6594 }, { "epoch": 0.12055130056482717, "grad_norm": 6.654921197561543, "learning_rate": 9.786599056197874e-06, "loss": 17.6023, "step": 6595 }, { "epoch": 0.12056957976127369, "grad_norm": 6.641258300875004, "learning_rate": 9.786513491370446e-06, "loss": 17.6764, "step": 6596 }, { "epoch": 0.12058785895772021, "grad_norm": 7.115677003135941, "learning_rate": 9.786427909766703e-06, "loss": 17.7292, "step": 6597 }, { "epoch": 0.12060613815416674, "grad_norm": 7.760198261135722, "learning_rate": 9.786342311386946e-06, "loss": 18.1984, "step": 6598 }, { "epoch": 0.12062441735061327, "grad_norm": 6.9906326340819955, "learning_rate": 9.786256696231473e-06, "loss": 17.7639, "step": 6599 }, { "epoch": 0.1206426965470598, "grad_norm": 7.08097195907497, "learning_rate": 9.786171064300587e-06, "loss": 17.4673, "step": 6600 }, { "epoch": 0.12066097574350632, "grad_norm": 7.867628079964912, "learning_rate": 9.786085415594588e-06, "loss": 17.9685, "step": 6601 }, { "epoch": 0.12067925493995284, "grad_norm": 8.08039809075577, "learning_rate": 9.785999750113772e-06, "loss": 18.3153, "step": 6602 }, { "epoch": 0.12069753413639936, "grad_norm": 6.4718450271338375, "learning_rate": 9.785914067858444e-06, "loss": 17.5679, "step": 6603 }, { "epoch": 0.1207158133328459, "grad_norm": 7.613223665863531, "learning_rate": 9.785828368828903e-06, "loss": 18.1562, "step": 6604 }, { "epoch": 0.12073409252929242, "grad_norm": 6.654676585758407, "learning_rate": 9.785742653025448e-06, "loss": 17.5961, "step": 6605 }, { "epoch": 0.12075237172573894, "grad_norm": 7.751378363143706, "learning_rate": 9.78565692044838e-06, "loss": 18.3553, "step": 6606 }, { "epoch": 0.12077065092218546, "grad_norm": 7.724338371565218, "learning_rate": 9.785571171098e-06, "loss": 17.8555, "step": 6607 }, { "epoch": 0.12078893011863198, "grad_norm": 7.7391000573388515, "learning_rate": 9.785485404974608e-06, "loss": 18.285, "step": 6608 }, { "epoch": 0.1208072093150785, "grad_norm": 6.503173067269977, "learning_rate": 9.785399622078505e-06, "loss": 17.5102, "step": 6609 }, { "epoch": 0.12082548851152504, "grad_norm": 9.305663182717261, "learning_rate": 9.785313822409992e-06, "loss": 17.6926, "step": 6610 }, { "epoch": 0.12084376770797156, "grad_norm": 8.544767880225772, "learning_rate": 9.785228005969369e-06, "loss": 18.3077, "step": 6611 }, { "epoch": 0.12086204690441808, "grad_norm": 7.273019054278335, "learning_rate": 9.785142172756937e-06, "loss": 17.7531, "step": 6612 }, { "epoch": 0.1208803261008646, "grad_norm": 9.788212676017627, "learning_rate": 9.785056322772997e-06, "loss": 18.5349, "step": 6613 }, { "epoch": 0.12089860529731113, "grad_norm": 6.958116567792477, "learning_rate": 9.784970456017851e-06, "loss": 17.5714, "step": 6614 }, { "epoch": 0.12091688449375765, "grad_norm": 8.095007728727529, "learning_rate": 9.784884572491798e-06, "loss": 18.2647, "step": 6615 }, { "epoch": 0.12093516369020418, "grad_norm": 6.642636193639059, "learning_rate": 9.784798672195138e-06, "loss": 17.8691, "step": 6616 }, { "epoch": 0.1209534428866507, "grad_norm": 6.723446292301932, "learning_rate": 9.784712755128176e-06, "loss": 17.5251, "step": 6617 }, { "epoch": 0.12097172208309723, "grad_norm": 6.4877476563560075, "learning_rate": 9.78462682129121e-06, "loss": 17.5709, "step": 6618 }, { "epoch": 0.12099000127954375, "grad_norm": 5.652538915549849, "learning_rate": 9.784540870684542e-06, "loss": 17.1876, "step": 6619 }, { "epoch": 0.12100828047599027, "grad_norm": 8.07854844816267, "learning_rate": 9.784454903308475e-06, "loss": 18.2179, "step": 6620 }, { "epoch": 0.1210265596724368, "grad_norm": 7.189175614139578, "learning_rate": 9.784368919163307e-06, "loss": 18.0607, "step": 6621 }, { "epoch": 0.12104483886888333, "grad_norm": 6.85131669886279, "learning_rate": 9.78428291824934e-06, "loss": 17.696, "step": 6622 }, { "epoch": 0.12106311806532985, "grad_norm": 6.980487847130155, "learning_rate": 9.78419690056688e-06, "loss": 17.5203, "step": 6623 }, { "epoch": 0.12108139726177637, "grad_norm": 7.103323988532092, "learning_rate": 9.784110866116223e-06, "loss": 17.7473, "step": 6624 }, { "epoch": 0.1210996764582229, "grad_norm": 7.107746857581005, "learning_rate": 9.784024814897675e-06, "loss": 17.7612, "step": 6625 }, { "epoch": 0.12111795565466942, "grad_norm": 7.899770965041136, "learning_rate": 9.783938746911532e-06, "loss": 18.4489, "step": 6626 }, { "epoch": 0.12113623485111595, "grad_norm": 7.480258079191613, "learning_rate": 9.7838526621581e-06, "loss": 18.023, "step": 6627 }, { "epoch": 0.12115451404756247, "grad_norm": 5.904831813224204, "learning_rate": 9.78376656063768e-06, "loss": 17.1418, "step": 6628 }, { "epoch": 0.121172793244009, "grad_norm": 7.742130858072729, "learning_rate": 9.783680442350571e-06, "loss": 18.3106, "step": 6629 }, { "epoch": 0.12119107244045552, "grad_norm": 6.462180802164989, "learning_rate": 9.78359430729708e-06, "loss": 17.504, "step": 6630 }, { "epoch": 0.12120935163690204, "grad_norm": 5.869227175449395, "learning_rate": 9.783508155477506e-06, "loss": 17.2101, "step": 6631 }, { "epoch": 0.12122763083334856, "grad_norm": 5.5460802093685455, "learning_rate": 9.78342198689215e-06, "loss": 17.1091, "step": 6632 }, { "epoch": 0.1212459100297951, "grad_norm": 7.030778273171266, "learning_rate": 9.783335801541314e-06, "loss": 17.6532, "step": 6633 }, { "epoch": 0.12126418922624162, "grad_norm": 7.3654630835564125, "learning_rate": 9.783249599425302e-06, "loss": 17.6943, "step": 6634 }, { "epoch": 0.12128246842268814, "grad_norm": 8.340096371958918, "learning_rate": 9.783163380544416e-06, "loss": 18.7983, "step": 6635 }, { "epoch": 0.12130074761913466, "grad_norm": 6.741932066440961, "learning_rate": 9.783077144898957e-06, "loss": 17.7768, "step": 6636 }, { "epoch": 0.12131902681558118, "grad_norm": 7.2685583397958835, "learning_rate": 9.782990892489227e-06, "loss": 17.8446, "step": 6637 }, { "epoch": 0.12133730601202772, "grad_norm": 7.525327853935263, "learning_rate": 9.78290462331553e-06, "loss": 18.0467, "step": 6638 }, { "epoch": 0.12135558520847424, "grad_norm": 7.054087612526331, "learning_rate": 9.782818337378166e-06, "loss": 17.8198, "step": 6639 }, { "epoch": 0.12137386440492076, "grad_norm": 6.507013946601593, "learning_rate": 9.78273203467744e-06, "loss": 17.4221, "step": 6640 }, { "epoch": 0.12139214360136728, "grad_norm": 8.665104638381612, "learning_rate": 9.782645715213651e-06, "loss": 18.4877, "step": 6641 }, { "epoch": 0.1214104227978138, "grad_norm": 6.2512124400221944, "learning_rate": 9.782559378987106e-06, "loss": 17.3166, "step": 6642 }, { "epoch": 0.12142870199426033, "grad_norm": 6.566995098817002, "learning_rate": 9.782473025998105e-06, "loss": 17.5743, "step": 6643 }, { "epoch": 0.12144698119070686, "grad_norm": 8.687345618797119, "learning_rate": 9.782386656246951e-06, "loss": 17.5453, "step": 6644 }, { "epoch": 0.12146526038715338, "grad_norm": 6.810078433225566, "learning_rate": 9.782300269733947e-06, "loss": 17.5568, "step": 6645 }, { "epoch": 0.1214835395835999, "grad_norm": 5.867345526931524, "learning_rate": 9.782213866459395e-06, "loss": 17.2831, "step": 6646 }, { "epoch": 0.12150181878004643, "grad_norm": 6.927825558368225, "learning_rate": 9.7821274464236e-06, "loss": 17.8387, "step": 6647 }, { "epoch": 0.12152009797649295, "grad_norm": 8.042051243900016, "learning_rate": 9.78204100962686e-06, "loss": 18.4047, "step": 6648 }, { "epoch": 0.12153837717293947, "grad_norm": 7.815543835435438, "learning_rate": 9.781954556069484e-06, "loss": 18.4583, "step": 6649 }, { "epoch": 0.121556656369386, "grad_norm": 6.365267516889937, "learning_rate": 9.781868085751772e-06, "loss": 17.7982, "step": 6650 }, { "epoch": 0.12157493556583253, "grad_norm": 7.928597987709175, "learning_rate": 9.781781598674027e-06, "loss": 18.4748, "step": 6651 }, { "epoch": 0.12159321476227905, "grad_norm": 6.770908180356978, "learning_rate": 9.781695094836553e-06, "loss": 17.5607, "step": 6652 }, { "epoch": 0.12161149395872557, "grad_norm": 8.076061530040407, "learning_rate": 9.78160857423965e-06, "loss": 18.4446, "step": 6653 }, { "epoch": 0.1216297731551721, "grad_norm": 6.989353573018683, "learning_rate": 9.781522036883626e-06, "loss": 18.0134, "step": 6654 }, { "epoch": 0.12164805235161863, "grad_norm": 7.163769031715184, "learning_rate": 9.781435482768781e-06, "loss": 17.879, "step": 6655 }, { "epoch": 0.12166633154806515, "grad_norm": 5.748367459077812, "learning_rate": 9.78134891189542e-06, "loss": 17.3384, "step": 6656 }, { "epoch": 0.12168461074451167, "grad_norm": 7.788122691341108, "learning_rate": 9.781262324263846e-06, "loss": 17.8478, "step": 6657 }, { "epoch": 0.1217028899409582, "grad_norm": 6.219641421602568, "learning_rate": 9.781175719874364e-06, "loss": 17.3303, "step": 6658 }, { "epoch": 0.12172116913740472, "grad_norm": 6.823288884294493, "learning_rate": 9.781089098727274e-06, "loss": 17.6334, "step": 6659 }, { "epoch": 0.12173944833385124, "grad_norm": 6.968891928695853, "learning_rate": 9.781002460822883e-06, "loss": 17.7669, "step": 6660 }, { "epoch": 0.12175772753029777, "grad_norm": 6.87242956629981, "learning_rate": 9.780915806161493e-06, "loss": 17.5215, "step": 6661 }, { "epoch": 0.1217760067267443, "grad_norm": 6.415485023430618, "learning_rate": 9.780829134743408e-06, "loss": 17.7493, "step": 6662 }, { "epoch": 0.12179428592319082, "grad_norm": 6.771392867085264, "learning_rate": 9.780742446568932e-06, "loss": 17.6961, "step": 6663 }, { "epoch": 0.12181256511963734, "grad_norm": 6.6995121966166185, "learning_rate": 9.780655741638367e-06, "loss": 17.6009, "step": 6664 }, { "epoch": 0.12183084431608386, "grad_norm": 6.648799284857779, "learning_rate": 9.78056901995202e-06, "loss": 17.2554, "step": 6665 }, { "epoch": 0.12184912351253038, "grad_norm": 5.713634795954836, "learning_rate": 9.780482281510194e-06, "loss": 17.3779, "step": 6666 }, { "epoch": 0.12186740270897692, "grad_norm": 7.321783359721326, "learning_rate": 9.780395526313188e-06, "loss": 18.013, "step": 6667 }, { "epoch": 0.12188568190542344, "grad_norm": 6.368595998721913, "learning_rate": 9.780308754361316e-06, "loss": 17.3284, "step": 6668 }, { "epoch": 0.12190396110186996, "grad_norm": 7.940013034290859, "learning_rate": 9.780221965654874e-06, "loss": 18.2431, "step": 6669 }, { "epoch": 0.12192224029831648, "grad_norm": 8.2597910346473, "learning_rate": 9.780135160194168e-06, "loss": 18.3547, "step": 6670 }, { "epoch": 0.121940519494763, "grad_norm": 8.342409103263948, "learning_rate": 9.780048337979505e-06, "loss": 18.4465, "step": 6671 }, { "epoch": 0.12195879869120954, "grad_norm": 7.896434680159246, "learning_rate": 9.779961499011187e-06, "loss": 18.0811, "step": 6672 }, { "epoch": 0.12197707788765606, "grad_norm": 7.1427655665856955, "learning_rate": 9.779874643289517e-06, "loss": 17.8061, "step": 6673 }, { "epoch": 0.12199535708410258, "grad_norm": 7.285996116609734, "learning_rate": 9.779787770814804e-06, "loss": 17.7508, "step": 6674 }, { "epoch": 0.1220136362805491, "grad_norm": 7.961217904499843, "learning_rate": 9.779700881587349e-06, "loss": 17.8702, "step": 6675 }, { "epoch": 0.12203191547699563, "grad_norm": 9.678940575997192, "learning_rate": 9.779613975607456e-06, "loss": 18.5119, "step": 6676 }, { "epoch": 0.12205019467344215, "grad_norm": 7.305182452069914, "learning_rate": 9.779527052875431e-06, "loss": 18.0059, "step": 6677 }, { "epoch": 0.12206847386988869, "grad_norm": 8.011995951602925, "learning_rate": 9.779440113391578e-06, "loss": 18.1858, "step": 6678 }, { "epoch": 0.12208675306633521, "grad_norm": 7.799713488923933, "learning_rate": 9.779353157156202e-06, "loss": 17.9749, "step": 6679 }, { "epoch": 0.12210503226278173, "grad_norm": 7.960979082936661, "learning_rate": 9.77926618416961e-06, "loss": 18.3202, "step": 6680 }, { "epoch": 0.12212331145922825, "grad_norm": 6.171541488608712, "learning_rate": 9.779179194432102e-06, "loss": 17.2526, "step": 6681 }, { "epoch": 0.12214159065567477, "grad_norm": 6.397595410388515, "learning_rate": 9.779092187943988e-06, "loss": 17.7538, "step": 6682 }, { "epoch": 0.1221598698521213, "grad_norm": 7.348032278924981, "learning_rate": 9.779005164705568e-06, "loss": 17.7268, "step": 6683 }, { "epoch": 0.12217814904856783, "grad_norm": 6.751145450140728, "learning_rate": 9.778918124717151e-06, "loss": 17.9829, "step": 6684 }, { "epoch": 0.12219642824501435, "grad_norm": 6.997427910648309, "learning_rate": 9.778831067979043e-06, "loss": 17.7092, "step": 6685 }, { "epoch": 0.12221470744146087, "grad_norm": 6.570589473132914, "learning_rate": 9.778743994491544e-06, "loss": 17.425, "step": 6686 }, { "epoch": 0.1222329866379074, "grad_norm": 8.874818025027167, "learning_rate": 9.778656904254962e-06, "loss": 17.9388, "step": 6687 }, { "epoch": 0.12225126583435392, "grad_norm": 6.758030485303756, "learning_rate": 9.778569797269604e-06, "loss": 17.8268, "step": 6688 }, { "epoch": 0.12226954503080045, "grad_norm": 6.9498680833266455, "learning_rate": 9.778482673535772e-06, "loss": 17.806, "step": 6689 }, { "epoch": 0.12228782422724697, "grad_norm": 7.238401612147517, "learning_rate": 9.778395533053772e-06, "loss": 17.9575, "step": 6690 }, { "epoch": 0.1223061034236935, "grad_norm": 7.79205501513286, "learning_rate": 9.778308375823912e-06, "loss": 18.188, "step": 6691 }, { "epoch": 0.12232438262014002, "grad_norm": 8.232803048564039, "learning_rate": 9.778221201846496e-06, "loss": 17.9286, "step": 6692 }, { "epoch": 0.12234266181658654, "grad_norm": 7.463984094963095, "learning_rate": 9.778134011121829e-06, "loss": 17.9496, "step": 6693 }, { "epoch": 0.12236094101303306, "grad_norm": 6.277175201204437, "learning_rate": 9.778046803650216e-06, "loss": 17.3479, "step": 6694 }, { "epoch": 0.1223792202094796, "grad_norm": 7.628884492213488, "learning_rate": 9.777959579431964e-06, "loss": 17.9509, "step": 6695 }, { "epoch": 0.12239749940592612, "grad_norm": 6.142749128251153, "learning_rate": 9.77787233846738e-06, "loss": 17.1414, "step": 6696 }, { "epoch": 0.12241577860237264, "grad_norm": 7.916450165820055, "learning_rate": 9.777785080756765e-06, "loss": 18.203, "step": 6697 }, { "epoch": 0.12243405779881916, "grad_norm": 7.338705383324853, "learning_rate": 9.77769780630043e-06, "loss": 17.9081, "step": 6698 }, { "epoch": 0.12245233699526568, "grad_norm": 8.047757701247363, "learning_rate": 9.777610515098677e-06, "loss": 18.1902, "step": 6699 }, { "epoch": 0.1224706161917122, "grad_norm": 7.780277635133266, "learning_rate": 9.777523207151816e-06, "loss": 17.9597, "step": 6700 }, { "epoch": 0.12248889538815874, "grad_norm": 5.451371512759478, "learning_rate": 9.777435882460149e-06, "loss": 17.0746, "step": 6701 }, { "epoch": 0.12250717458460526, "grad_norm": 5.919984741863563, "learning_rate": 9.777348541023986e-06, "loss": 17.3077, "step": 6702 }, { "epoch": 0.12252545378105179, "grad_norm": 6.335150978536211, "learning_rate": 9.777261182843627e-06, "loss": 17.4041, "step": 6703 }, { "epoch": 0.12254373297749831, "grad_norm": 7.911778775410028, "learning_rate": 9.777173807919386e-06, "loss": 17.6603, "step": 6704 }, { "epoch": 0.12256201217394483, "grad_norm": 6.039306447646183, "learning_rate": 9.777086416251564e-06, "loss": 17.2483, "step": 6705 }, { "epoch": 0.12258029137039136, "grad_norm": 6.443788852913615, "learning_rate": 9.77699900784047e-06, "loss": 17.5087, "step": 6706 }, { "epoch": 0.12259857056683789, "grad_norm": 7.261665013006005, "learning_rate": 9.776911582686405e-06, "loss": 17.8151, "step": 6707 }, { "epoch": 0.12261684976328441, "grad_norm": 6.636859775767234, "learning_rate": 9.776824140789683e-06, "loss": 17.4764, "step": 6708 }, { "epoch": 0.12263512895973093, "grad_norm": 7.254475992626495, "learning_rate": 9.776736682150606e-06, "loss": 17.8066, "step": 6709 }, { "epoch": 0.12265340815617745, "grad_norm": 7.229741150410805, "learning_rate": 9.77664920676948e-06, "loss": 18.0929, "step": 6710 }, { "epoch": 0.12267168735262397, "grad_norm": 6.5449328205287225, "learning_rate": 9.776561714646616e-06, "loss": 17.3675, "step": 6711 }, { "epoch": 0.12268996654907051, "grad_norm": 6.250228378134764, "learning_rate": 9.776474205782315e-06, "loss": 17.5722, "step": 6712 }, { "epoch": 0.12270824574551703, "grad_norm": 6.9380784567055995, "learning_rate": 9.776386680176888e-06, "loss": 17.8353, "step": 6713 }, { "epoch": 0.12272652494196355, "grad_norm": 5.478467383133045, "learning_rate": 9.776299137830638e-06, "loss": 17.147, "step": 6714 }, { "epoch": 0.12274480413841007, "grad_norm": 8.9457506259024, "learning_rate": 9.776211578743875e-06, "loss": 18.5953, "step": 6715 }, { "epoch": 0.1227630833348566, "grad_norm": 7.88474119114791, "learning_rate": 9.776124002916907e-06, "loss": 18.0582, "step": 6716 }, { "epoch": 0.12278136253130312, "grad_norm": 7.879462358077435, "learning_rate": 9.776036410350035e-06, "loss": 17.8196, "step": 6717 }, { "epoch": 0.12279964172774965, "grad_norm": 7.4188074690427985, "learning_rate": 9.775948801043573e-06, "loss": 17.9607, "step": 6718 }, { "epoch": 0.12281792092419618, "grad_norm": 10.335397547280172, "learning_rate": 9.775861174997824e-06, "loss": 17.7169, "step": 6719 }, { "epoch": 0.1228362001206427, "grad_norm": 6.594194572109577, "learning_rate": 9.775773532213096e-06, "loss": 17.6715, "step": 6720 }, { "epoch": 0.12285447931708922, "grad_norm": 6.208944364137212, "learning_rate": 9.775685872689696e-06, "loss": 17.3092, "step": 6721 }, { "epoch": 0.12287275851353574, "grad_norm": 5.93769402082507, "learning_rate": 9.775598196427931e-06, "loss": 17.423, "step": 6722 }, { "epoch": 0.12289103770998228, "grad_norm": 10.046276137696408, "learning_rate": 9.77551050342811e-06, "loss": 18.779, "step": 6723 }, { "epoch": 0.1229093169064288, "grad_norm": 7.706093370283888, "learning_rate": 9.775422793690539e-06, "loss": 17.8201, "step": 6724 }, { "epoch": 0.12292759610287532, "grad_norm": 6.828056449910487, "learning_rate": 9.775335067215524e-06, "loss": 17.6539, "step": 6725 }, { "epoch": 0.12294587529932184, "grad_norm": 7.260831132405209, "learning_rate": 9.775247324003375e-06, "loss": 17.9743, "step": 6726 }, { "epoch": 0.12296415449576836, "grad_norm": 7.853148072594991, "learning_rate": 9.775159564054398e-06, "loss": 18.0551, "step": 6727 }, { "epoch": 0.12298243369221488, "grad_norm": 6.893374319447, "learning_rate": 9.775071787368902e-06, "loss": 17.907, "step": 6728 }, { "epoch": 0.12300071288866142, "grad_norm": 6.796229898152696, "learning_rate": 9.774983993947194e-06, "loss": 17.591, "step": 6729 }, { "epoch": 0.12301899208510794, "grad_norm": 7.352417629549728, "learning_rate": 9.774896183789579e-06, "loss": 18.0169, "step": 6730 }, { "epoch": 0.12303727128155446, "grad_norm": 7.420712217607259, "learning_rate": 9.77480835689637e-06, "loss": 17.8592, "step": 6731 }, { "epoch": 0.12305555047800099, "grad_norm": 7.79655590706962, "learning_rate": 9.77472051326787e-06, "loss": 18.2393, "step": 6732 }, { "epoch": 0.12307382967444751, "grad_norm": 8.562040061722492, "learning_rate": 9.77463265290439e-06, "loss": 18.5743, "step": 6733 }, { "epoch": 0.12309210887089403, "grad_norm": 7.419169310113422, "learning_rate": 9.774544775806238e-06, "loss": 17.645, "step": 6734 }, { "epoch": 0.12311038806734056, "grad_norm": 7.62493487074596, "learning_rate": 9.774456881973718e-06, "loss": 18.0855, "step": 6735 }, { "epoch": 0.12312866726378709, "grad_norm": 5.854997719179686, "learning_rate": 9.774368971407143e-06, "loss": 17.136, "step": 6736 }, { "epoch": 0.12314694646023361, "grad_norm": 8.432702579713537, "learning_rate": 9.774281044106818e-06, "loss": 18.8493, "step": 6737 }, { "epoch": 0.12316522565668013, "grad_norm": 7.084231465389021, "learning_rate": 9.774193100073054e-06, "loss": 17.6815, "step": 6738 }, { "epoch": 0.12318350485312665, "grad_norm": 7.2206962214256105, "learning_rate": 9.774105139306156e-06, "loss": 17.8568, "step": 6739 }, { "epoch": 0.12320178404957319, "grad_norm": 8.011314791988577, "learning_rate": 9.774017161806434e-06, "loss": 18.0389, "step": 6740 }, { "epoch": 0.12322006324601971, "grad_norm": 6.809095578692524, "learning_rate": 9.773929167574197e-06, "loss": 17.3972, "step": 6741 }, { "epoch": 0.12323834244246623, "grad_norm": 6.631416005685428, "learning_rate": 9.773841156609751e-06, "loss": 17.4618, "step": 6742 }, { "epoch": 0.12325662163891275, "grad_norm": 6.491790080792121, "learning_rate": 9.773753128913406e-06, "loss": 17.4074, "step": 6743 }, { "epoch": 0.12327490083535927, "grad_norm": 7.8694925596265906, "learning_rate": 9.773665084485472e-06, "loss": 18.1842, "step": 6744 }, { "epoch": 0.1232931800318058, "grad_norm": 7.138056421575226, "learning_rate": 9.773577023326255e-06, "loss": 17.8597, "step": 6745 }, { "epoch": 0.12331145922825233, "grad_norm": 7.1014946326973405, "learning_rate": 9.773488945436064e-06, "loss": 17.6909, "step": 6746 }, { "epoch": 0.12332973842469885, "grad_norm": 8.25137421905207, "learning_rate": 9.77340085081521e-06, "loss": 17.7142, "step": 6747 }, { "epoch": 0.12334801762114538, "grad_norm": 7.22933512730961, "learning_rate": 9.773312739464001e-06, "loss": 17.8855, "step": 6748 }, { "epoch": 0.1233662968175919, "grad_norm": 6.406227023502943, "learning_rate": 9.773224611382744e-06, "loss": 17.4731, "step": 6749 }, { "epoch": 0.12338457601403842, "grad_norm": 6.29912276582381, "learning_rate": 9.77313646657175e-06, "loss": 17.3361, "step": 6750 }, { "epoch": 0.12340285521048494, "grad_norm": 7.163796354375712, "learning_rate": 9.773048305031324e-06, "loss": 17.9806, "step": 6751 }, { "epoch": 0.12342113440693148, "grad_norm": 8.083667017627093, "learning_rate": 9.772960126761779e-06, "loss": 17.6488, "step": 6752 }, { "epoch": 0.123439413603378, "grad_norm": 9.335094240895968, "learning_rate": 9.772871931763423e-06, "loss": 18.4421, "step": 6753 }, { "epoch": 0.12345769279982452, "grad_norm": 8.650517180629848, "learning_rate": 9.772783720036566e-06, "loss": 18.2238, "step": 6754 }, { "epoch": 0.12347597199627104, "grad_norm": 7.408655689107285, "learning_rate": 9.772695491581517e-06, "loss": 18.178, "step": 6755 }, { "epoch": 0.12349425119271756, "grad_norm": 6.272185991151557, "learning_rate": 9.772607246398582e-06, "loss": 17.3344, "step": 6756 }, { "epoch": 0.1235125303891641, "grad_norm": 8.625669454668147, "learning_rate": 9.772518984488076e-06, "loss": 18.3302, "step": 6757 }, { "epoch": 0.12353080958561062, "grad_norm": 8.106328974221528, "learning_rate": 9.772430705850302e-06, "loss": 18.0292, "step": 6758 }, { "epoch": 0.12354908878205714, "grad_norm": 7.396995596141605, "learning_rate": 9.772342410485574e-06, "loss": 18.1042, "step": 6759 }, { "epoch": 0.12356736797850366, "grad_norm": 8.424871113247226, "learning_rate": 9.772254098394199e-06, "loss": 18.2745, "step": 6760 }, { "epoch": 0.12358564717495019, "grad_norm": 7.550498496474941, "learning_rate": 9.772165769576487e-06, "loss": 17.8764, "step": 6761 }, { "epoch": 0.12360392637139671, "grad_norm": 6.283517836040569, "learning_rate": 9.77207742403275e-06, "loss": 17.5406, "step": 6762 }, { "epoch": 0.12362220556784324, "grad_norm": 7.32837620388845, "learning_rate": 9.771989061763295e-06, "loss": 17.9969, "step": 6763 }, { "epoch": 0.12364048476428977, "grad_norm": 6.999174504920259, "learning_rate": 9.771900682768431e-06, "loss": 17.6985, "step": 6764 }, { "epoch": 0.12365876396073629, "grad_norm": 7.353166074856369, "learning_rate": 9.771812287048473e-06, "loss": 17.9816, "step": 6765 }, { "epoch": 0.12367704315718281, "grad_norm": 6.872916666751801, "learning_rate": 9.771723874603722e-06, "loss": 17.7053, "step": 6766 }, { "epoch": 0.12369532235362933, "grad_norm": 10.832766572986543, "learning_rate": 9.771635445434497e-06, "loss": 17.7978, "step": 6767 }, { "epoch": 0.12371360155007585, "grad_norm": 6.904832355542965, "learning_rate": 9.771546999541101e-06, "loss": 18.0874, "step": 6768 }, { "epoch": 0.12373188074652239, "grad_norm": 8.029770332971117, "learning_rate": 9.77145853692385e-06, "loss": 17.9579, "step": 6769 }, { "epoch": 0.12375015994296891, "grad_norm": 7.006988683324624, "learning_rate": 9.771370057583047e-06, "loss": 17.7966, "step": 6770 }, { "epoch": 0.12376843913941543, "grad_norm": 7.2888792726150955, "learning_rate": 9.771281561519009e-06, "loss": 17.9035, "step": 6771 }, { "epoch": 0.12378671833586195, "grad_norm": 7.46306368992549, "learning_rate": 9.771193048732041e-06, "loss": 17.8343, "step": 6772 }, { "epoch": 0.12380499753230848, "grad_norm": 6.408100073477963, "learning_rate": 9.771104519222457e-06, "loss": 17.3325, "step": 6773 }, { "epoch": 0.12382327672875501, "grad_norm": 8.208083765211683, "learning_rate": 9.771015972990564e-06, "loss": 18.5437, "step": 6774 }, { "epoch": 0.12384155592520153, "grad_norm": 6.773044224298968, "learning_rate": 9.770927410036677e-06, "loss": 17.7102, "step": 6775 }, { "epoch": 0.12385983512164805, "grad_norm": 7.451165839563993, "learning_rate": 9.770838830361101e-06, "loss": 17.8902, "step": 6776 }, { "epoch": 0.12387811431809458, "grad_norm": 7.838133310158093, "learning_rate": 9.77075023396415e-06, "loss": 17.9973, "step": 6777 }, { "epoch": 0.1238963935145411, "grad_norm": 7.657081364273012, "learning_rate": 9.770661620846132e-06, "loss": 17.6833, "step": 6778 }, { "epoch": 0.12391467271098762, "grad_norm": 6.482389867965016, "learning_rate": 9.770572991007362e-06, "loss": 17.4262, "step": 6779 }, { "epoch": 0.12393295190743416, "grad_norm": 6.927975602071881, "learning_rate": 9.770484344448144e-06, "loss": 17.9453, "step": 6780 }, { "epoch": 0.12395123110388068, "grad_norm": 7.236795933527305, "learning_rate": 9.770395681168794e-06, "loss": 17.639, "step": 6781 }, { "epoch": 0.1239695103003272, "grad_norm": 6.495468232258661, "learning_rate": 9.770307001169621e-06, "loss": 17.3891, "step": 6782 }, { "epoch": 0.12398778949677372, "grad_norm": 8.383316197209089, "learning_rate": 9.770218304450935e-06, "loss": 18.4597, "step": 6783 }, { "epoch": 0.12400606869322024, "grad_norm": 5.72011174779133, "learning_rate": 9.770129591013049e-06, "loss": 17.4688, "step": 6784 }, { "epoch": 0.12402434788966676, "grad_norm": 6.9603655146625325, "learning_rate": 9.770040860856273e-06, "loss": 17.9703, "step": 6785 }, { "epoch": 0.1240426270861133, "grad_norm": 8.70264578219701, "learning_rate": 9.769952113980917e-06, "loss": 18.574, "step": 6786 }, { "epoch": 0.12406090628255982, "grad_norm": 7.579062368171308, "learning_rate": 9.769863350387293e-06, "loss": 18.1362, "step": 6787 }, { "epoch": 0.12407918547900634, "grad_norm": 6.936197691883063, "learning_rate": 9.769774570075711e-06, "loss": 17.8093, "step": 6788 }, { "epoch": 0.12409746467545286, "grad_norm": 7.832351463230458, "learning_rate": 9.769685773046484e-06, "loss": 18.2735, "step": 6789 }, { "epoch": 0.12411574387189939, "grad_norm": 8.877510471961573, "learning_rate": 9.769596959299923e-06, "loss": 18.4219, "step": 6790 }, { "epoch": 0.12413402306834592, "grad_norm": 7.385366838914088, "learning_rate": 9.769508128836338e-06, "loss": 17.6268, "step": 6791 }, { "epoch": 0.12415230226479244, "grad_norm": 8.63879098043357, "learning_rate": 9.76941928165604e-06, "loss": 18.8794, "step": 6792 }, { "epoch": 0.12417058146123897, "grad_norm": 7.084520759446865, "learning_rate": 9.769330417759342e-06, "loss": 17.846, "step": 6793 }, { "epoch": 0.12418886065768549, "grad_norm": 6.977025543661845, "learning_rate": 9.769241537146555e-06, "loss": 17.5868, "step": 6794 }, { "epoch": 0.12420713985413201, "grad_norm": 8.314269462705171, "learning_rate": 9.769152639817988e-06, "loss": 18.8014, "step": 6795 }, { "epoch": 0.12422541905057853, "grad_norm": 7.26374498994195, "learning_rate": 9.769063725773957e-06, "loss": 18.0784, "step": 6796 }, { "epoch": 0.12424369824702507, "grad_norm": 7.750043978676372, "learning_rate": 9.768974795014772e-06, "loss": 17.9159, "step": 6797 }, { "epoch": 0.12426197744347159, "grad_norm": 7.184436727363733, "learning_rate": 9.768885847540743e-06, "loss": 17.7996, "step": 6798 }, { "epoch": 0.12428025663991811, "grad_norm": 6.598999271682578, "learning_rate": 9.768796883352183e-06, "loss": 17.9671, "step": 6799 }, { "epoch": 0.12429853583636463, "grad_norm": 6.794746580189616, "learning_rate": 9.768707902449403e-06, "loss": 17.5868, "step": 6800 }, { "epoch": 0.12431681503281115, "grad_norm": 8.294978666463557, "learning_rate": 9.768618904832718e-06, "loss": 17.8646, "step": 6801 }, { "epoch": 0.12433509422925768, "grad_norm": 6.481829932916731, "learning_rate": 9.768529890502435e-06, "loss": 17.5915, "step": 6802 }, { "epoch": 0.12435337342570421, "grad_norm": 8.799674341553661, "learning_rate": 9.76844085945887e-06, "loss": 17.8731, "step": 6803 }, { "epoch": 0.12437165262215073, "grad_norm": 6.441341548209323, "learning_rate": 9.768351811702333e-06, "loss": 17.5145, "step": 6804 }, { "epoch": 0.12438993181859725, "grad_norm": 7.724334044183744, "learning_rate": 9.768262747233137e-06, "loss": 17.9513, "step": 6805 }, { "epoch": 0.12440821101504378, "grad_norm": 8.228683522833085, "learning_rate": 9.768173666051594e-06, "loss": 17.9028, "step": 6806 }, { "epoch": 0.1244264902114903, "grad_norm": 7.413906193257426, "learning_rate": 9.768084568158015e-06, "loss": 17.8082, "step": 6807 }, { "epoch": 0.12444476940793683, "grad_norm": 8.965826870301163, "learning_rate": 9.767995453552714e-06, "loss": 18.545, "step": 6808 }, { "epoch": 0.12446304860438336, "grad_norm": 7.178717823470608, "learning_rate": 9.767906322236002e-06, "loss": 17.8343, "step": 6809 }, { "epoch": 0.12448132780082988, "grad_norm": 8.822238423199295, "learning_rate": 9.767817174208194e-06, "loss": 18.6875, "step": 6810 }, { "epoch": 0.1244996069972764, "grad_norm": 6.395336496052081, "learning_rate": 9.7677280094696e-06, "loss": 17.4369, "step": 6811 }, { "epoch": 0.12451788619372292, "grad_norm": 6.550944616976022, "learning_rate": 9.767638828020532e-06, "loss": 17.5714, "step": 6812 }, { "epoch": 0.12453616539016944, "grad_norm": 7.3943820592035525, "learning_rate": 9.767549629861304e-06, "loss": 17.8888, "step": 6813 }, { "epoch": 0.12455444458661598, "grad_norm": 6.955204796907054, "learning_rate": 9.767460414992229e-06, "loss": 17.688, "step": 6814 }, { "epoch": 0.1245727237830625, "grad_norm": 9.371640516387902, "learning_rate": 9.767371183413619e-06, "loss": 17.7048, "step": 6815 }, { "epoch": 0.12459100297950902, "grad_norm": 6.945829805828666, "learning_rate": 9.767281935125785e-06, "loss": 17.9757, "step": 6816 }, { "epoch": 0.12460928217595554, "grad_norm": 8.224404276896458, "learning_rate": 9.767192670129042e-06, "loss": 18.285, "step": 6817 }, { "epoch": 0.12462756137240207, "grad_norm": 6.9793598341825165, "learning_rate": 9.767103388423704e-06, "loss": 17.9039, "step": 6818 }, { "epoch": 0.12464584056884859, "grad_norm": 6.452361576578343, "learning_rate": 9.767014090010081e-06, "loss": 17.5745, "step": 6819 }, { "epoch": 0.12466411976529512, "grad_norm": 7.7401983160125445, "learning_rate": 9.766924774888487e-06, "loss": 17.6759, "step": 6820 }, { "epoch": 0.12468239896174164, "grad_norm": 6.461913283266975, "learning_rate": 9.766835443059235e-06, "loss": 17.2202, "step": 6821 }, { "epoch": 0.12470067815818817, "grad_norm": 6.408831285939423, "learning_rate": 9.76674609452264e-06, "loss": 17.4377, "step": 6822 }, { "epoch": 0.12471895735463469, "grad_norm": 6.4698280102972285, "learning_rate": 9.766656729279012e-06, "loss": 17.3235, "step": 6823 }, { "epoch": 0.12473723655108121, "grad_norm": 7.456689630962659, "learning_rate": 9.766567347328667e-06, "loss": 18.078, "step": 6824 }, { "epoch": 0.12475551574752775, "grad_norm": 6.2871557378464615, "learning_rate": 9.766477948671918e-06, "loss": 17.3276, "step": 6825 }, { "epoch": 0.12477379494397427, "grad_norm": 6.4628018911268805, "learning_rate": 9.766388533309075e-06, "loss": 17.4461, "step": 6826 }, { "epoch": 0.12479207414042079, "grad_norm": 8.798310200953944, "learning_rate": 9.766299101240455e-06, "loss": 18.3388, "step": 6827 }, { "epoch": 0.12481035333686731, "grad_norm": 9.01801958880886, "learning_rate": 9.76620965246637e-06, "loss": 18.0676, "step": 6828 }, { "epoch": 0.12482863253331383, "grad_norm": 7.637732117289141, "learning_rate": 9.766120186987134e-06, "loss": 17.857, "step": 6829 }, { "epoch": 0.12484691172976035, "grad_norm": 7.3498358064409155, "learning_rate": 9.76603070480306e-06, "loss": 17.7859, "step": 6830 }, { "epoch": 0.12486519092620689, "grad_norm": 7.96401297844795, "learning_rate": 9.765941205914461e-06, "loss": 17.8464, "step": 6831 }, { "epoch": 0.12488347012265341, "grad_norm": 7.491558225034503, "learning_rate": 9.765851690321652e-06, "loss": 18.0553, "step": 6832 }, { "epoch": 0.12490174931909993, "grad_norm": 6.684682411726688, "learning_rate": 9.765762158024948e-06, "loss": 17.9563, "step": 6833 }, { "epoch": 0.12492002851554646, "grad_norm": 7.678492114914815, "learning_rate": 9.765672609024662e-06, "loss": 18.0928, "step": 6834 }, { "epoch": 0.12493830771199298, "grad_norm": 6.75221266810973, "learning_rate": 9.765583043321104e-06, "loss": 17.642, "step": 6835 }, { "epoch": 0.1249565869084395, "grad_norm": 5.652893022972405, "learning_rate": 9.765493460914592e-06, "loss": 17.2443, "step": 6836 }, { "epoch": 0.12497486610488603, "grad_norm": 7.211616485499025, "learning_rate": 9.76540386180544e-06, "loss": 17.5644, "step": 6837 }, { "epoch": 0.12499314530133256, "grad_norm": 7.491578036573935, "learning_rate": 9.76531424599396e-06, "loss": 18.2474, "step": 6838 }, { "epoch": 0.12501142449777908, "grad_norm": 8.106648613675649, "learning_rate": 9.765224613480468e-06, "loss": 18.2582, "step": 6839 }, { "epoch": 0.1250297036942256, "grad_norm": 7.301726902083591, "learning_rate": 9.765134964265277e-06, "loss": 17.7908, "step": 6840 }, { "epoch": 0.12504798289067212, "grad_norm": 5.857463158845806, "learning_rate": 9.765045298348701e-06, "loss": 17.3778, "step": 6841 }, { "epoch": 0.12506626208711866, "grad_norm": 6.6013829363255, "learning_rate": 9.764955615731054e-06, "loss": 17.5809, "step": 6842 }, { "epoch": 0.12508454128356516, "grad_norm": 7.349650909699515, "learning_rate": 9.764865916412651e-06, "loss": 18.0713, "step": 6843 }, { "epoch": 0.1251028204800117, "grad_norm": 7.1705446033353555, "learning_rate": 9.764776200393809e-06, "loss": 17.7023, "step": 6844 }, { "epoch": 0.12512109967645824, "grad_norm": 9.594565997134804, "learning_rate": 9.764686467674837e-06, "loss": 18.6706, "step": 6845 }, { "epoch": 0.12513937887290474, "grad_norm": 6.26969894834299, "learning_rate": 9.764596718256054e-06, "loss": 17.4378, "step": 6846 }, { "epoch": 0.12515765806935128, "grad_norm": 6.419926332413454, "learning_rate": 9.764506952137772e-06, "loss": 17.5315, "step": 6847 }, { "epoch": 0.1251759372657978, "grad_norm": 7.022424959197518, "learning_rate": 9.764417169320308e-06, "loss": 17.9142, "step": 6848 }, { "epoch": 0.12519421646224432, "grad_norm": 8.789123413034922, "learning_rate": 9.764327369803974e-06, "loss": 18.9441, "step": 6849 }, { "epoch": 0.12521249565869083, "grad_norm": 8.052600772208601, "learning_rate": 9.764237553589086e-06, "loss": 18.049, "step": 6850 }, { "epoch": 0.12523077485513737, "grad_norm": 7.815449913994883, "learning_rate": 9.764147720675959e-06, "loss": 17.9423, "step": 6851 }, { "epoch": 0.1252490540515839, "grad_norm": 7.716998644654304, "learning_rate": 9.764057871064908e-06, "loss": 18.2239, "step": 6852 }, { "epoch": 0.1252673332480304, "grad_norm": 7.985093938302353, "learning_rate": 9.763968004756248e-06, "loss": 17.9283, "step": 6853 }, { "epoch": 0.12528561244447695, "grad_norm": 5.863768231151736, "learning_rate": 9.763878121750293e-06, "loss": 17.1499, "step": 6854 }, { "epoch": 0.12530389164092345, "grad_norm": 7.512199977568955, "learning_rate": 9.763788222047358e-06, "loss": 17.7701, "step": 6855 }, { "epoch": 0.12532217083737, "grad_norm": 7.015726812898855, "learning_rate": 9.76369830564776e-06, "loss": 17.7275, "step": 6856 }, { "epoch": 0.12534045003381653, "grad_norm": 8.576731202571079, "learning_rate": 9.763608372551812e-06, "loss": 18.4529, "step": 6857 }, { "epoch": 0.12535872923026303, "grad_norm": 9.091430597613348, "learning_rate": 9.763518422759829e-06, "loss": 18.7546, "step": 6858 }, { "epoch": 0.12537700842670957, "grad_norm": 6.60377192208082, "learning_rate": 9.763428456272127e-06, "loss": 17.4729, "step": 6859 }, { "epoch": 0.12539528762315608, "grad_norm": 6.8105219824174545, "learning_rate": 9.763338473089023e-06, "loss": 17.6938, "step": 6860 }, { "epoch": 0.1254135668196026, "grad_norm": 6.301755296093012, "learning_rate": 9.76324847321083e-06, "loss": 17.3673, "step": 6861 }, { "epoch": 0.12543184601604915, "grad_norm": 9.262335557793726, "learning_rate": 9.763158456637868e-06, "loss": 18.244, "step": 6862 }, { "epoch": 0.12545012521249566, "grad_norm": 6.558293955485627, "learning_rate": 9.763068423370446e-06, "loss": 17.7602, "step": 6863 }, { "epoch": 0.1254684044089422, "grad_norm": 6.851336277162645, "learning_rate": 9.762978373408882e-06, "loss": 17.8758, "step": 6864 }, { "epoch": 0.1254866836053887, "grad_norm": 6.178099064026581, "learning_rate": 9.762888306753493e-06, "loss": 17.2753, "step": 6865 }, { "epoch": 0.12550496280183523, "grad_norm": 7.971854847632996, "learning_rate": 9.762798223404595e-06, "loss": 18.2635, "step": 6866 }, { "epoch": 0.12552324199828174, "grad_norm": 7.461915683024421, "learning_rate": 9.7627081233625e-06, "loss": 17.9847, "step": 6867 }, { "epoch": 0.12554152119472828, "grad_norm": 6.167959628386896, "learning_rate": 9.762618006627526e-06, "loss": 17.1975, "step": 6868 }, { "epoch": 0.12555980039117481, "grad_norm": 5.904957131953893, "learning_rate": 9.762527873199991e-06, "loss": 17.3502, "step": 6869 }, { "epoch": 0.12557807958762132, "grad_norm": 7.319643468981985, "learning_rate": 9.762437723080209e-06, "loss": 17.8596, "step": 6870 }, { "epoch": 0.12559635878406786, "grad_norm": 6.776461760249466, "learning_rate": 9.762347556268497e-06, "loss": 17.5943, "step": 6871 }, { "epoch": 0.12561463798051437, "grad_norm": 6.945555187723439, "learning_rate": 9.762257372765169e-06, "loss": 17.6469, "step": 6872 }, { "epoch": 0.1256329171769609, "grad_norm": 7.5795008587308175, "learning_rate": 9.762167172570541e-06, "loss": 18.1053, "step": 6873 }, { "epoch": 0.12565119637340744, "grad_norm": 7.625446892507235, "learning_rate": 9.762076955684932e-06, "loss": 18.0321, "step": 6874 }, { "epoch": 0.12566947556985394, "grad_norm": 7.550851598821339, "learning_rate": 9.761986722108656e-06, "loss": 18.0622, "step": 6875 }, { "epoch": 0.12568775476630048, "grad_norm": 5.343112907295592, "learning_rate": 9.761896471842029e-06, "loss": 17.2661, "step": 6876 }, { "epoch": 0.125706033962747, "grad_norm": 6.896208545413777, "learning_rate": 9.76180620488537e-06, "loss": 17.6864, "step": 6877 }, { "epoch": 0.12572431315919352, "grad_norm": 7.185623538717204, "learning_rate": 9.76171592123899e-06, "loss": 17.2856, "step": 6878 }, { "epoch": 0.12574259235564006, "grad_norm": 7.686167353778615, "learning_rate": 9.761625620903212e-06, "loss": 18.3195, "step": 6879 }, { "epoch": 0.12576087155208657, "grad_norm": 8.296321085350883, "learning_rate": 9.761535303878349e-06, "loss": 18.4403, "step": 6880 }, { "epoch": 0.1257791507485331, "grad_norm": 6.768592561279704, "learning_rate": 9.761444970164717e-06, "loss": 17.6828, "step": 6881 }, { "epoch": 0.1257974299449796, "grad_norm": 7.435027612748613, "learning_rate": 9.761354619762634e-06, "loss": 17.8256, "step": 6882 }, { "epoch": 0.12581570914142615, "grad_norm": 7.849360277751968, "learning_rate": 9.761264252672416e-06, "loss": 18.2053, "step": 6883 }, { "epoch": 0.12583398833787265, "grad_norm": 6.429397492132217, "learning_rate": 9.76117386889438e-06, "loss": 17.4916, "step": 6884 }, { "epoch": 0.1258522675343192, "grad_norm": 6.067239574248806, "learning_rate": 9.761083468428843e-06, "loss": 17.3733, "step": 6885 }, { "epoch": 0.12587054673076573, "grad_norm": 8.315848729518695, "learning_rate": 9.760993051276121e-06, "loss": 18.2736, "step": 6886 }, { "epoch": 0.12588882592721223, "grad_norm": 7.32837061349687, "learning_rate": 9.760902617436532e-06, "loss": 17.9103, "step": 6887 }, { "epoch": 0.12590710512365877, "grad_norm": 7.383107886144405, "learning_rate": 9.760812166910391e-06, "loss": 17.6546, "step": 6888 }, { "epoch": 0.12592538432010528, "grad_norm": 7.476709793828603, "learning_rate": 9.760721699698019e-06, "loss": 18.0114, "step": 6889 }, { "epoch": 0.1259436635165518, "grad_norm": 9.167709907246126, "learning_rate": 9.76063121579973e-06, "loss": 18.7377, "step": 6890 }, { "epoch": 0.12596194271299835, "grad_norm": 7.271487503461663, "learning_rate": 9.76054071521584e-06, "loss": 17.9853, "step": 6891 }, { "epoch": 0.12598022190944486, "grad_norm": 6.085035177977042, "learning_rate": 9.760450197946669e-06, "loss": 17.4638, "step": 6892 }, { "epoch": 0.1259985011058914, "grad_norm": 6.552638913672762, "learning_rate": 9.760359663992534e-06, "loss": 17.5143, "step": 6893 }, { "epoch": 0.1260167803023379, "grad_norm": 7.28866953297157, "learning_rate": 9.760269113353751e-06, "loss": 17.7996, "step": 6894 }, { "epoch": 0.12603505949878444, "grad_norm": 7.506247584791319, "learning_rate": 9.760178546030638e-06, "loss": 18.177, "step": 6895 }, { "epoch": 0.12605333869523097, "grad_norm": 7.568777077230304, "learning_rate": 9.76008796202351e-06, "loss": 17.8969, "step": 6896 }, { "epoch": 0.12607161789167748, "grad_norm": 6.8513200679177775, "learning_rate": 9.75999736133269e-06, "loss": 17.728, "step": 6897 }, { "epoch": 0.12608989708812401, "grad_norm": 6.321393301559122, "learning_rate": 9.75990674395849e-06, "loss": 17.4091, "step": 6898 }, { "epoch": 0.12610817628457052, "grad_norm": 5.671400566827995, "learning_rate": 9.75981610990123e-06, "loss": 17.1359, "step": 6899 }, { "epoch": 0.12612645548101706, "grad_norm": 7.333047507187936, "learning_rate": 9.759725459161229e-06, "loss": 17.8416, "step": 6900 }, { "epoch": 0.12614473467746357, "grad_norm": 7.014759835628037, "learning_rate": 9.759634791738803e-06, "loss": 17.8615, "step": 6901 }, { "epoch": 0.1261630138739101, "grad_norm": 7.572353559355617, "learning_rate": 9.75954410763427e-06, "loss": 18.027, "step": 6902 }, { "epoch": 0.12618129307035664, "grad_norm": 7.869481255483959, "learning_rate": 9.759453406847948e-06, "loss": 18.0191, "step": 6903 }, { "epoch": 0.12619957226680314, "grad_norm": 6.993283310444016, "learning_rate": 9.759362689380154e-06, "loss": 17.9355, "step": 6904 }, { "epoch": 0.12621785146324968, "grad_norm": 7.073599533523402, "learning_rate": 9.759271955231207e-06, "loss": 17.9398, "step": 6905 }, { "epoch": 0.1262361306596962, "grad_norm": 6.7543327331622915, "learning_rate": 9.759181204401425e-06, "loss": 17.6489, "step": 6906 }, { "epoch": 0.12625440985614272, "grad_norm": 7.396854189496331, "learning_rate": 9.759090436891126e-06, "loss": 18.0247, "step": 6907 }, { "epoch": 0.12627268905258926, "grad_norm": 7.473337310410864, "learning_rate": 9.758999652700628e-06, "loss": 17.9618, "step": 6908 }, { "epoch": 0.12629096824903577, "grad_norm": 6.54808082938672, "learning_rate": 9.758908851830248e-06, "loss": 17.6502, "step": 6909 }, { "epoch": 0.1263092474454823, "grad_norm": 7.962637624156711, "learning_rate": 9.758818034280306e-06, "loss": 18.1697, "step": 6910 }, { "epoch": 0.1263275266419288, "grad_norm": 8.316194122517786, "learning_rate": 9.75872720005112e-06, "loss": 18.2445, "step": 6911 }, { "epoch": 0.12634580583837535, "grad_norm": 9.076794257119204, "learning_rate": 9.758636349143008e-06, "loss": 18.2431, "step": 6912 }, { "epoch": 0.12636408503482188, "grad_norm": 8.080425757154485, "learning_rate": 9.758545481556289e-06, "loss": 17.9978, "step": 6913 }, { "epoch": 0.1263823642312684, "grad_norm": 7.6418440516441475, "learning_rate": 9.758454597291282e-06, "loss": 17.9627, "step": 6914 }, { "epoch": 0.12640064342771493, "grad_norm": 6.644158785752914, "learning_rate": 9.758363696348303e-06, "loss": 17.3803, "step": 6915 }, { "epoch": 0.12641892262416143, "grad_norm": 6.916556386304829, "learning_rate": 9.758272778727673e-06, "loss": 17.8038, "step": 6916 }, { "epoch": 0.12643720182060797, "grad_norm": 6.866739546264169, "learning_rate": 9.758181844429709e-06, "loss": 17.7915, "step": 6917 }, { "epoch": 0.12645548101705448, "grad_norm": 6.518695509133655, "learning_rate": 9.75809089345473e-06, "loss": 17.4465, "step": 6918 }, { "epoch": 0.126473760213501, "grad_norm": 7.140291668607941, "learning_rate": 9.757999925803057e-06, "loss": 17.9112, "step": 6919 }, { "epoch": 0.12649203940994755, "grad_norm": 7.05297639968776, "learning_rate": 9.757908941475005e-06, "loss": 17.7311, "step": 6920 }, { "epoch": 0.12651031860639406, "grad_norm": 8.412370846508411, "learning_rate": 9.757817940470898e-06, "loss": 18.1769, "step": 6921 }, { "epoch": 0.1265285978028406, "grad_norm": 7.493224194573999, "learning_rate": 9.75772692279105e-06, "loss": 17.8737, "step": 6922 }, { "epoch": 0.1265468769992871, "grad_norm": 7.492658416453076, "learning_rate": 9.75763588843578e-06, "loss": 17.6434, "step": 6923 }, { "epoch": 0.12656515619573364, "grad_norm": 8.217101745761529, "learning_rate": 9.757544837405413e-06, "loss": 17.9622, "step": 6924 }, { "epoch": 0.12658343539218017, "grad_norm": 6.094890483992856, "learning_rate": 9.757453769700263e-06, "loss": 17.5712, "step": 6925 }, { "epoch": 0.12660171458862668, "grad_norm": 8.286935384573368, "learning_rate": 9.757362685320651e-06, "loss": 17.9444, "step": 6926 }, { "epoch": 0.12661999378507321, "grad_norm": 7.465689725152292, "learning_rate": 9.757271584266894e-06, "loss": 17.8804, "step": 6927 }, { "epoch": 0.12663827298151972, "grad_norm": 6.505481223851254, "learning_rate": 9.757180466539314e-06, "loss": 17.4906, "step": 6928 }, { "epoch": 0.12665655217796626, "grad_norm": 7.675165837939194, "learning_rate": 9.757089332138227e-06, "loss": 18.3166, "step": 6929 }, { "epoch": 0.1266748313744128, "grad_norm": 8.04806473890603, "learning_rate": 9.756998181063956e-06, "loss": 17.9215, "step": 6930 }, { "epoch": 0.1266931105708593, "grad_norm": 7.944870275372156, "learning_rate": 9.75690701331682e-06, "loss": 18.3751, "step": 6931 }, { "epoch": 0.12671138976730584, "grad_norm": 8.007043568618746, "learning_rate": 9.756815828897139e-06, "loss": 18.252, "step": 6932 }, { "epoch": 0.12672966896375235, "grad_norm": 6.366963345032425, "learning_rate": 9.756724627805228e-06, "loss": 17.3992, "step": 6933 }, { "epoch": 0.12674794816019888, "grad_norm": 6.433359645818439, "learning_rate": 9.756633410041412e-06, "loss": 17.5832, "step": 6934 }, { "epoch": 0.1267662273566454, "grad_norm": 8.018598139254433, "learning_rate": 9.756542175606009e-06, "loss": 18.1824, "step": 6935 }, { "epoch": 0.12678450655309192, "grad_norm": 6.591706798911266, "learning_rate": 9.756450924499337e-06, "loss": 17.482, "step": 6936 }, { "epoch": 0.12680278574953846, "grad_norm": 6.670475773897604, "learning_rate": 9.756359656721718e-06, "loss": 17.4916, "step": 6937 }, { "epoch": 0.12682106494598497, "grad_norm": 7.389590100695097, "learning_rate": 9.756268372273471e-06, "loss": 18.1617, "step": 6938 }, { "epoch": 0.1268393441424315, "grad_norm": 5.68649928573887, "learning_rate": 9.756177071154917e-06, "loss": 17.4034, "step": 6939 }, { "epoch": 0.126857623338878, "grad_norm": 7.291106003259725, "learning_rate": 9.756085753366374e-06, "loss": 17.7263, "step": 6940 }, { "epoch": 0.12687590253532455, "grad_norm": 7.8527776067626345, "learning_rate": 9.755994418908163e-06, "loss": 18.3024, "step": 6941 }, { "epoch": 0.12689418173177108, "grad_norm": 8.270895321656669, "learning_rate": 9.755903067780604e-06, "loss": 18.3105, "step": 6942 }, { "epoch": 0.1269124609282176, "grad_norm": 6.777498831085499, "learning_rate": 9.755811699984019e-06, "loss": 17.7027, "step": 6943 }, { "epoch": 0.12693074012466413, "grad_norm": 8.05246083098168, "learning_rate": 9.755720315518724e-06, "loss": 18.021, "step": 6944 }, { "epoch": 0.12694901932111063, "grad_norm": 7.863604419253536, "learning_rate": 9.755628914385045e-06, "loss": 17.9604, "step": 6945 }, { "epoch": 0.12696729851755717, "grad_norm": 7.362790015904113, "learning_rate": 9.755537496583299e-06, "loss": 17.7251, "step": 6946 }, { "epoch": 0.1269855777140037, "grad_norm": 6.790814436100857, "learning_rate": 9.755446062113804e-06, "loss": 17.7339, "step": 6947 }, { "epoch": 0.1270038569104502, "grad_norm": 6.046904756191634, "learning_rate": 9.755354610976887e-06, "loss": 17.222, "step": 6948 }, { "epoch": 0.12702213610689675, "grad_norm": 7.016191801706925, "learning_rate": 9.755263143172861e-06, "loss": 17.7207, "step": 6949 }, { "epoch": 0.12704041530334326, "grad_norm": 6.905456591821923, "learning_rate": 9.755171658702053e-06, "loss": 17.7975, "step": 6950 }, { "epoch": 0.1270586944997898, "grad_norm": 6.787073661730627, "learning_rate": 9.75508015756478e-06, "loss": 17.6786, "step": 6951 }, { "epoch": 0.1270769736962363, "grad_norm": 7.194243921241274, "learning_rate": 9.754988639761364e-06, "loss": 17.9999, "step": 6952 }, { "epoch": 0.12709525289268284, "grad_norm": 7.201920287961771, "learning_rate": 9.754897105292125e-06, "loss": 18.3577, "step": 6953 }, { "epoch": 0.12711353208912937, "grad_norm": 6.270448311111634, "learning_rate": 9.754805554157384e-06, "loss": 17.4376, "step": 6954 }, { "epoch": 0.12713181128557588, "grad_norm": 7.552657929665543, "learning_rate": 9.754713986357462e-06, "loss": 17.8489, "step": 6955 }, { "epoch": 0.12715009048202242, "grad_norm": 7.505230455309283, "learning_rate": 9.754622401892681e-06, "loss": 18.0299, "step": 6956 }, { "epoch": 0.12716836967846892, "grad_norm": 8.194620048769831, "learning_rate": 9.75453080076336e-06, "loss": 18.0597, "step": 6957 }, { "epoch": 0.12718664887491546, "grad_norm": 6.852443864005118, "learning_rate": 9.754439182969822e-06, "loss": 17.8304, "step": 6958 }, { "epoch": 0.127204928071362, "grad_norm": 8.07085489741905, "learning_rate": 9.754347548512388e-06, "loss": 18.2323, "step": 6959 }, { "epoch": 0.1272232072678085, "grad_norm": 7.321412998953154, "learning_rate": 9.754255897391378e-06, "loss": 17.8115, "step": 6960 }, { "epoch": 0.12724148646425504, "grad_norm": 6.655116694758986, "learning_rate": 9.754164229607112e-06, "loss": 17.566, "step": 6961 }, { "epoch": 0.12725976566070155, "grad_norm": 7.635913731930229, "learning_rate": 9.754072545159914e-06, "loss": 17.9166, "step": 6962 }, { "epoch": 0.12727804485714808, "grad_norm": 7.4899502176585635, "learning_rate": 9.753980844050104e-06, "loss": 18.1651, "step": 6963 }, { "epoch": 0.12729632405359462, "grad_norm": 7.474395602806392, "learning_rate": 9.753889126278004e-06, "loss": 18.2668, "step": 6964 }, { "epoch": 0.12731460325004113, "grad_norm": 6.253527536548421, "learning_rate": 9.753797391843936e-06, "loss": 17.3812, "step": 6965 }, { "epoch": 0.12733288244648766, "grad_norm": 6.303688954789781, "learning_rate": 9.753705640748219e-06, "loss": 17.4008, "step": 6966 }, { "epoch": 0.12735116164293417, "grad_norm": 7.603106855419932, "learning_rate": 9.753613872991176e-06, "loss": 18.1482, "step": 6967 }, { "epoch": 0.1273694408393807, "grad_norm": 6.2429332485433395, "learning_rate": 9.75352208857313e-06, "loss": 17.5214, "step": 6968 }, { "epoch": 0.1273877200358272, "grad_norm": 8.375732323577289, "learning_rate": 9.753430287494403e-06, "loss": 17.8937, "step": 6969 }, { "epoch": 0.12740599923227375, "grad_norm": 7.625430775782968, "learning_rate": 9.753338469755314e-06, "loss": 18.2324, "step": 6970 }, { "epoch": 0.12742427842872028, "grad_norm": 8.041214519123109, "learning_rate": 9.753246635356186e-06, "loss": 18.3126, "step": 6971 }, { "epoch": 0.1274425576251668, "grad_norm": 6.434700696600225, "learning_rate": 9.753154784297341e-06, "loss": 17.7066, "step": 6972 }, { "epoch": 0.12746083682161333, "grad_norm": 7.253644918125441, "learning_rate": 9.7530629165791e-06, "loss": 17.7911, "step": 6973 }, { "epoch": 0.12747911601805983, "grad_norm": 7.015871110635574, "learning_rate": 9.752971032201787e-06, "loss": 17.5315, "step": 6974 }, { "epoch": 0.12749739521450637, "grad_norm": 6.720732521649265, "learning_rate": 9.752879131165722e-06, "loss": 17.8203, "step": 6975 }, { "epoch": 0.1275156744109529, "grad_norm": 7.3385673433295535, "learning_rate": 9.752787213471229e-06, "loss": 17.9138, "step": 6976 }, { "epoch": 0.12753395360739941, "grad_norm": 8.646071528793843, "learning_rate": 9.752695279118629e-06, "loss": 18.4491, "step": 6977 }, { "epoch": 0.12755223280384595, "grad_norm": 7.223292956533253, "learning_rate": 9.752603328108245e-06, "loss": 18.0877, "step": 6978 }, { "epoch": 0.12757051200029246, "grad_norm": 10.667741010269506, "learning_rate": 9.752511360440397e-06, "loss": 18.8063, "step": 6979 }, { "epoch": 0.127588791196739, "grad_norm": 6.884591377310287, "learning_rate": 9.752419376115412e-06, "loss": 17.8854, "step": 6980 }, { "epoch": 0.12760707039318553, "grad_norm": 6.616257206443414, "learning_rate": 9.752327375133608e-06, "loss": 17.8793, "step": 6981 }, { "epoch": 0.12762534958963204, "grad_norm": 8.354632150166884, "learning_rate": 9.752235357495307e-06, "loss": 18.566, "step": 6982 }, { "epoch": 0.12764362878607857, "grad_norm": 5.616048601322087, "learning_rate": 9.752143323200837e-06, "loss": 17.2041, "step": 6983 }, { "epoch": 0.12766190798252508, "grad_norm": 7.053696647062669, "learning_rate": 9.752051272250516e-06, "loss": 17.7545, "step": 6984 }, { "epoch": 0.12768018717897162, "grad_norm": 7.646423507021111, "learning_rate": 9.751959204644665e-06, "loss": 18.1997, "step": 6985 }, { "epoch": 0.12769846637541812, "grad_norm": 7.470353436516299, "learning_rate": 9.751867120383611e-06, "loss": 18.1901, "step": 6986 }, { "epoch": 0.12771674557186466, "grad_norm": 5.582291147049489, "learning_rate": 9.751775019467677e-06, "loss": 17.353, "step": 6987 }, { "epoch": 0.1277350247683112, "grad_norm": 7.072988608141418, "learning_rate": 9.751682901897181e-06, "loss": 17.7201, "step": 6988 }, { "epoch": 0.1277533039647577, "grad_norm": 7.051201083526724, "learning_rate": 9.751590767672451e-06, "loss": 17.6554, "step": 6989 }, { "epoch": 0.12777158316120424, "grad_norm": 8.399054978627413, "learning_rate": 9.751498616793806e-06, "loss": 18.7966, "step": 6990 }, { "epoch": 0.12778986235765075, "grad_norm": 7.191940606262273, "learning_rate": 9.751406449261572e-06, "loss": 18.2619, "step": 6991 }, { "epoch": 0.12780814155409728, "grad_norm": 7.391879516430415, "learning_rate": 9.75131426507607e-06, "loss": 17.8321, "step": 6992 }, { "epoch": 0.12782642075054382, "grad_norm": 6.499690438607266, "learning_rate": 9.751222064237624e-06, "loss": 17.6385, "step": 6993 }, { "epoch": 0.12784469994699033, "grad_norm": 7.034250916249036, "learning_rate": 9.751129846746557e-06, "loss": 17.7252, "step": 6994 }, { "epoch": 0.12786297914343686, "grad_norm": 6.4572172651760065, "learning_rate": 9.75103761260319e-06, "loss": 17.5323, "step": 6995 }, { "epoch": 0.12788125833988337, "grad_norm": 6.04905851054078, "learning_rate": 9.750945361807852e-06, "loss": 17.4104, "step": 6996 }, { "epoch": 0.1278995375363299, "grad_norm": 6.133914125359956, "learning_rate": 9.750853094360861e-06, "loss": 17.4268, "step": 6997 }, { "epoch": 0.12791781673277644, "grad_norm": 5.781869773694671, "learning_rate": 9.750760810262543e-06, "loss": 17.2417, "step": 6998 }, { "epoch": 0.12793609592922295, "grad_norm": 6.7826479384989, "learning_rate": 9.750668509513219e-06, "loss": 17.4406, "step": 6999 }, { "epoch": 0.12795437512566948, "grad_norm": 8.786267572688896, "learning_rate": 9.750576192113216e-06, "loss": 18.801, "step": 7000 }, { "epoch": 0.127972654322116, "grad_norm": 5.910149796637377, "learning_rate": 9.750483858062856e-06, "loss": 17.5747, "step": 7001 }, { "epoch": 0.12799093351856253, "grad_norm": 8.012497036080479, "learning_rate": 9.75039150736246e-06, "loss": 18.2773, "step": 7002 }, { "epoch": 0.12800921271500904, "grad_norm": 6.451909566644818, "learning_rate": 9.750299140012355e-06, "loss": 17.5821, "step": 7003 }, { "epoch": 0.12802749191145557, "grad_norm": 7.423518602264178, "learning_rate": 9.750206756012864e-06, "loss": 17.8494, "step": 7004 }, { "epoch": 0.1280457711079021, "grad_norm": 8.361455105057958, "learning_rate": 9.750114355364311e-06, "loss": 18.4579, "step": 7005 }, { "epoch": 0.12806405030434861, "grad_norm": 7.878516491188714, "learning_rate": 9.750021938067018e-06, "loss": 18.3428, "step": 7006 }, { "epoch": 0.12808232950079515, "grad_norm": 7.737349443881559, "learning_rate": 9.74992950412131e-06, "loss": 17.9246, "step": 7007 }, { "epoch": 0.12810060869724166, "grad_norm": 6.69412927981068, "learning_rate": 9.749837053527512e-06, "loss": 17.9627, "step": 7008 }, { "epoch": 0.1281188878936882, "grad_norm": 8.423308730240684, "learning_rate": 9.749744586285948e-06, "loss": 18.1873, "step": 7009 }, { "epoch": 0.12813716709013473, "grad_norm": 5.8435805157842475, "learning_rate": 9.749652102396942e-06, "loss": 17.3101, "step": 7010 }, { "epoch": 0.12815544628658124, "grad_norm": 6.086695525064017, "learning_rate": 9.749559601860816e-06, "loss": 17.3515, "step": 7011 }, { "epoch": 0.12817372548302777, "grad_norm": 8.287445424367927, "learning_rate": 9.749467084677896e-06, "loss": 18.5489, "step": 7012 }, { "epoch": 0.12819200467947428, "grad_norm": 6.899548081555345, "learning_rate": 9.749374550848506e-06, "loss": 17.7959, "step": 7013 }, { "epoch": 0.12821028387592082, "grad_norm": 6.732953731533117, "learning_rate": 9.74928200037297e-06, "loss": 17.4766, "step": 7014 }, { "epoch": 0.12822856307236735, "grad_norm": 6.580377041773626, "learning_rate": 9.749189433251614e-06, "loss": 17.704, "step": 7015 }, { "epoch": 0.12824684226881386, "grad_norm": 9.179270160637689, "learning_rate": 9.74909684948476e-06, "loss": 18.4665, "step": 7016 }, { "epoch": 0.1282651214652604, "grad_norm": 7.6005357688836455, "learning_rate": 9.749004249072735e-06, "loss": 17.8569, "step": 7017 }, { "epoch": 0.1282834006617069, "grad_norm": 6.72430777417885, "learning_rate": 9.74891163201586e-06, "loss": 17.7693, "step": 7018 }, { "epoch": 0.12830167985815344, "grad_norm": 7.187779294795083, "learning_rate": 9.748818998314465e-06, "loss": 17.799, "step": 7019 }, { "epoch": 0.12831995905459995, "grad_norm": 8.76545881250089, "learning_rate": 9.748726347968868e-06, "loss": 17.7423, "step": 7020 }, { "epoch": 0.12833823825104648, "grad_norm": 6.881822173934101, "learning_rate": 9.7486336809794e-06, "loss": 17.5504, "step": 7021 }, { "epoch": 0.12835651744749302, "grad_norm": 6.769410542320792, "learning_rate": 9.748540997346382e-06, "loss": 17.7062, "step": 7022 }, { "epoch": 0.12837479664393953, "grad_norm": 8.1003885379815, "learning_rate": 9.74844829707014e-06, "loss": 18.3501, "step": 7023 }, { "epoch": 0.12839307584038606, "grad_norm": 7.0562596310985874, "learning_rate": 9.748355580150999e-06, "loss": 18.0205, "step": 7024 }, { "epoch": 0.12841135503683257, "grad_norm": 6.034469589388568, "learning_rate": 9.748262846589282e-06, "loss": 17.2974, "step": 7025 }, { "epoch": 0.1284296342332791, "grad_norm": 7.393384845502981, "learning_rate": 9.748170096385316e-06, "loss": 17.7875, "step": 7026 }, { "epoch": 0.12844791342972564, "grad_norm": 7.7241030594842695, "learning_rate": 9.748077329539428e-06, "loss": 18.3198, "step": 7027 }, { "epoch": 0.12846619262617215, "grad_norm": 7.704276417514002, "learning_rate": 9.747984546051941e-06, "loss": 17.7201, "step": 7028 }, { "epoch": 0.12848447182261868, "grad_norm": 7.267082095675214, "learning_rate": 9.747891745923177e-06, "loss": 17.913, "step": 7029 }, { "epoch": 0.1285027510190652, "grad_norm": 6.5439131308136735, "learning_rate": 9.747798929153467e-06, "loss": 17.6853, "step": 7030 }, { "epoch": 0.12852103021551173, "grad_norm": 7.366102539188293, "learning_rate": 9.747706095743134e-06, "loss": 17.9143, "step": 7031 }, { "epoch": 0.12853930941195826, "grad_norm": 7.780048607840062, "learning_rate": 9.747613245692503e-06, "loss": 17.9475, "step": 7032 }, { "epoch": 0.12855758860840477, "grad_norm": 9.331706422813205, "learning_rate": 9.747520379001898e-06, "loss": 18.4692, "step": 7033 }, { "epoch": 0.1285758678048513, "grad_norm": 6.554863952948978, "learning_rate": 9.747427495671646e-06, "loss": 17.8817, "step": 7034 }, { "epoch": 0.12859414700129781, "grad_norm": 6.619621923388074, "learning_rate": 9.747334595702073e-06, "loss": 17.5481, "step": 7035 }, { "epoch": 0.12861242619774435, "grad_norm": 7.2867554019539496, "learning_rate": 9.747241679093506e-06, "loss": 17.7352, "step": 7036 }, { "epoch": 0.12863070539419086, "grad_norm": 6.168823558666478, "learning_rate": 9.747148745846266e-06, "loss": 17.3254, "step": 7037 }, { "epoch": 0.1286489845906374, "grad_norm": 7.343672397809512, "learning_rate": 9.747055795960685e-06, "loss": 17.8599, "step": 7038 }, { "epoch": 0.12866726378708393, "grad_norm": 7.788202047403616, "learning_rate": 9.746962829437084e-06, "loss": 17.5477, "step": 7039 }, { "epoch": 0.12868554298353044, "grad_norm": 7.753770133924901, "learning_rate": 9.746869846275788e-06, "loss": 18.0494, "step": 7040 }, { "epoch": 0.12870382217997697, "grad_norm": 7.254213768272908, "learning_rate": 9.746776846477127e-06, "loss": 18.1565, "step": 7041 }, { "epoch": 0.12872210137642348, "grad_norm": 5.482515321584168, "learning_rate": 9.746683830041425e-06, "loss": 16.9708, "step": 7042 }, { "epoch": 0.12874038057287002, "grad_norm": 8.073657743389086, "learning_rate": 9.746590796969009e-06, "loss": 18.122, "step": 7043 }, { "epoch": 0.12875865976931655, "grad_norm": 6.337742609991647, "learning_rate": 9.746497747260202e-06, "loss": 17.7471, "step": 7044 }, { "epoch": 0.12877693896576306, "grad_norm": 6.861792871142012, "learning_rate": 9.746404680915334e-06, "loss": 17.674, "step": 7045 }, { "epoch": 0.1287952181622096, "grad_norm": 6.073323009333164, "learning_rate": 9.746311597934729e-06, "loss": 17.5672, "step": 7046 }, { "epoch": 0.1288134973586561, "grad_norm": 7.737803858797643, "learning_rate": 9.746218498318713e-06, "loss": 17.9827, "step": 7047 }, { "epoch": 0.12883177655510264, "grad_norm": 6.595915249149195, "learning_rate": 9.746125382067614e-06, "loss": 17.655, "step": 7048 }, { "epoch": 0.12885005575154918, "grad_norm": 7.94492656241729, "learning_rate": 9.746032249181755e-06, "loss": 17.9237, "step": 7049 }, { "epoch": 0.12886833494799568, "grad_norm": 7.785146945906904, "learning_rate": 9.745939099661467e-06, "loss": 18.0476, "step": 7050 }, { "epoch": 0.12888661414444222, "grad_norm": 7.299852152564878, "learning_rate": 9.745845933507075e-06, "loss": 18.0855, "step": 7051 }, { "epoch": 0.12890489334088873, "grad_norm": 6.080794940359898, "learning_rate": 9.745752750718904e-06, "loss": 17.081, "step": 7052 }, { "epoch": 0.12892317253733526, "grad_norm": 9.400370378916696, "learning_rate": 9.745659551297282e-06, "loss": 18.9966, "step": 7053 }, { "epoch": 0.12894145173378177, "grad_norm": 6.607722833662794, "learning_rate": 9.745566335242534e-06, "loss": 17.7522, "step": 7054 }, { "epoch": 0.1289597309302283, "grad_norm": 5.881197931608681, "learning_rate": 9.745473102554988e-06, "loss": 17.0914, "step": 7055 }, { "epoch": 0.12897801012667484, "grad_norm": 8.8913460947489, "learning_rate": 9.74537985323497e-06, "loss": 18.1216, "step": 7056 }, { "epoch": 0.12899628932312135, "grad_norm": 7.533926893007955, "learning_rate": 9.74528658728281e-06, "loss": 18.1686, "step": 7057 }, { "epoch": 0.12901456851956788, "grad_norm": 7.346160191812552, "learning_rate": 9.74519330469883e-06, "loss": 17.9362, "step": 7058 }, { "epoch": 0.1290328477160144, "grad_norm": 6.833229114822008, "learning_rate": 9.745100005483359e-06, "loss": 17.6809, "step": 7059 }, { "epoch": 0.12905112691246093, "grad_norm": 9.286908375591532, "learning_rate": 9.745006689636725e-06, "loss": 18.9791, "step": 7060 }, { "epoch": 0.12906940610890746, "grad_norm": 7.635266165167685, "learning_rate": 9.744913357159253e-06, "loss": 17.998, "step": 7061 }, { "epoch": 0.12908768530535397, "grad_norm": 7.121177886747499, "learning_rate": 9.744820008051275e-06, "loss": 17.8321, "step": 7062 }, { "epoch": 0.1291059645018005, "grad_norm": 5.4608475466962965, "learning_rate": 9.744726642313112e-06, "loss": 17.3149, "step": 7063 }, { "epoch": 0.12912424369824702, "grad_norm": 7.152818697394675, "learning_rate": 9.744633259945093e-06, "loss": 17.515, "step": 7064 }, { "epoch": 0.12914252289469355, "grad_norm": 6.100990233000748, "learning_rate": 9.744539860947548e-06, "loss": 17.2829, "step": 7065 }, { "epoch": 0.1291608020911401, "grad_norm": 6.021616617687753, "learning_rate": 9.744446445320801e-06, "loss": 17.3312, "step": 7066 }, { "epoch": 0.1291790812875866, "grad_norm": 6.535681209185815, "learning_rate": 9.744353013065183e-06, "loss": 17.4638, "step": 7067 }, { "epoch": 0.12919736048403313, "grad_norm": 7.784917741765711, "learning_rate": 9.744259564181016e-06, "loss": 18.0221, "step": 7068 }, { "epoch": 0.12921563968047964, "grad_norm": 7.617343393835883, "learning_rate": 9.744166098668635e-06, "loss": 17.9527, "step": 7069 }, { "epoch": 0.12923391887692617, "grad_norm": 7.90601165287111, "learning_rate": 9.74407261652836e-06, "loss": 18.1686, "step": 7070 }, { "epoch": 0.12925219807337268, "grad_norm": 7.008037531889261, "learning_rate": 9.743979117760525e-06, "loss": 17.8141, "step": 7071 }, { "epoch": 0.12927047726981922, "grad_norm": 8.37047423130954, "learning_rate": 9.743885602365453e-06, "loss": 18.0465, "step": 7072 }, { "epoch": 0.12928875646626575, "grad_norm": 6.9934475487925525, "learning_rate": 9.743792070343474e-06, "loss": 17.8282, "step": 7073 }, { "epoch": 0.12930703566271226, "grad_norm": 7.237583451545216, "learning_rate": 9.743698521694915e-06, "loss": 17.7588, "step": 7074 }, { "epoch": 0.1293253148591588, "grad_norm": 7.070649931872262, "learning_rate": 9.743604956420105e-06, "loss": 17.8541, "step": 7075 }, { "epoch": 0.1293435940556053, "grad_norm": 7.078642539692684, "learning_rate": 9.743511374519371e-06, "loss": 17.8196, "step": 7076 }, { "epoch": 0.12936187325205184, "grad_norm": 7.02475733593393, "learning_rate": 9.743417775993041e-06, "loss": 17.5844, "step": 7077 }, { "epoch": 0.12938015244849838, "grad_norm": 9.37372271448173, "learning_rate": 9.743324160841444e-06, "loss": 18.5396, "step": 7078 }, { "epoch": 0.12939843164494488, "grad_norm": 8.236034768064512, "learning_rate": 9.743230529064906e-06, "loss": 18.2293, "step": 7079 }, { "epoch": 0.12941671084139142, "grad_norm": 8.536656548092834, "learning_rate": 9.743136880663759e-06, "loss": 17.7928, "step": 7080 }, { "epoch": 0.12943499003783793, "grad_norm": 6.649512660286544, "learning_rate": 9.743043215638328e-06, "loss": 17.7391, "step": 7081 }, { "epoch": 0.12945326923428446, "grad_norm": 6.926789336046934, "learning_rate": 9.742949533988942e-06, "loss": 17.7954, "step": 7082 }, { "epoch": 0.129471548430731, "grad_norm": 7.691534260735286, "learning_rate": 9.742855835715928e-06, "loss": 18.1081, "step": 7083 }, { "epoch": 0.1294898276271775, "grad_norm": 7.150770709615593, "learning_rate": 9.742762120819618e-06, "loss": 17.947, "step": 7084 }, { "epoch": 0.12950810682362404, "grad_norm": 6.809459253562558, "learning_rate": 9.742668389300335e-06, "loss": 17.5234, "step": 7085 }, { "epoch": 0.12952638602007055, "grad_norm": 6.585411731518869, "learning_rate": 9.742574641158414e-06, "loss": 17.5973, "step": 7086 }, { "epoch": 0.12954466521651709, "grad_norm": 7.445803412206817, "learning_rate": 9.74248087639418e-06, "loss": 17.6825, "step": 7087 }, { "epoch": 0.1295629444129636, "grad_norm": 7.96816585816933, "learning_rate": 9.742387095007962e-06, "loss": 18.1658, "step": 7088 }, { "epoch": 0.12958122360941013, "grad_norm": 8.622071128910187, "learning_rate": 9.742293297000088e-06, "loss": 18.2712, "step": 7089 }, { "epoch": 0.12959950280585666, "grad_norm": 6.31027197922364, "learning_rate": 9.74219948237089e-06, "loss": 17.6311, "step": 7090 }, { "epoch": 0.12961778200230317, "grad_norm": 6.698700480938034, "learning_rate": 9.742105651120691e-06, "loss": 17.1604, "step": 7091 }, { "epoch": 0.1296360611987497, "grad_norm": 8.636028796784073, "learning_rate": 9.742011803249824e-06, "loss": 18.434, "step": 7092 }, { "epoch": 0.12965434039519622, "grad_norm": 7.473634804013628, "learning_rate": 9.741917938758617e-06, "loss": 17.9458, "step": 7093 }, { "epoch": 0.12967261959164275, "grad_norm": 8.03960125708875, "learning_rate": 9.7418240576474e-06, "loss": 18.0053, "step": 7094 }, { "epoch": 0.1296908987880893, "grad_norm": 6.118158265189932, "learning_rate": 9.7417301599165e-06, "loss": 17.3436, "step": 7095 }, { "epoch": 0.1297091779845358, "grad_norm": 7.823548294120425, "learning_rate": 9.741636245566248e-06, "loss": 18.0909, "step": 7096 }, { "epoch": 0.12972745718098233, "grad_norm": 8.919167180196236, "learning_rate": 9.741542314596973e-06, "loss": 18.4706, "step": 7097 }, { "epoch": 0.12974573637742884, "grad_norm": 6.797446443118667, "learning_rate": 9.741448367009003e-06, "loss": 17.5313, "step": 7098 }, { "epoch": 0.12976401557387537, "grad_norm": 6.891792027736653, "learning_rate": 9.741354402802668e-06, "loss": 17.7328, "step": 7099 }, { "epoch": 0.1297822947703219, "grad_norm": 6.24232287085528, "learning_rate": 9.741260421978297e-06, "loss": 17.4414, "step": 7100 }, { "epoch": 0.12980057396676842, "grad_norm": 8.769541465073212, "learning_rate": 9.74116642453622e-06, "loss": 18.6885, "step": 7101 }, { "epoch": 0.12981885316321495, "grad_norm": 7.405110796041737, "learning_rate": 9.741072410476766e-06, "loss": 17.6396, "step": 7102 }, { "epoch": 0.12983713235966146, "grad_norm": 6.332131906245619, "learning_rate": 9.740978379800265e-06, "loss": 17.5006, "step": 7103 }, { "epoch": 0.129855411556108, "grad_norm": 8.541532411360873, "learning_rate": 9.740884332507045e-06, "loss": 18.0133, "step": 7104 }, { "epoch": 0.1298736907525545, "grad_norm": 6.965117238326117, "learning_rate": 9.740790268597438e-06, "loss": 17.6445, "step": 7105 }, { "epoch": 0.12989196994900104, "grad_norm": 7.739700403058189, "learning_rate": 9.740696188071772e-06, "loss": 17.6502, "step": 7106 }, { "epoch": 0.12991024914544758, "grad_norm": 10.651807663877248, "learning_rate": 9.740602090930378e-06, "loss": 18.537, "step": 7107 }, { "epoch": 0.12992852834189408, "grad_norm": 7.011919563083695, "learning_rate": 9.740507977173585e-06, "loss": 17.766, "step": 7108 }, { "epoch": 0.12994680753834062, "grad_norm": 6.805176369720356, "learning_rate": 9.740413846801722e-06, "loss": 17.5963, "step": 7109 }, { "epoch": 0.12996508673478713, "grad_norm": 6.300468268748072, "learning_rate": 9.74031969981512e-06, "loss": 17.3778, "step": 7110 }, { "epoch": 0.12998336593123366, "grad_norm": 6.625256291228368, "learning_rate": 9.740225536214108e-06, "loss": 17.4592, "step": 7111 }, { "epoch": 0.1300016451276802, "grad_norm": 6.53112538703904, "learning_rate": 9.740131355999018e-06, "loss": 17.4195, "step": 7112 }, { "epoch": 0.1300199243241267, "grad_norm": 7.174879385056336, "learning_rate": 9.740037159170179e-06, "loss": 17.6717, "step": 7113 }, { "epoch": 0.13003820352057324, "grad_norm": 7.357221230274843, "learning_rate": 9.73994294572792e-06, "loss": 17.7064, "step": 7114 }, { "epoch": 0.13005648271701975, "grad_norm": 7.222772844113478, "learning_rate": 9.739848715672573e-06, "loss": 17.7126, "step": 7115 }, { "epoch": 0.13007476191346629, "grad_norm": 8.832836088421047, "learning_rate": 9.739754469004467e-06, "loss": 18.5505, "step": 7116 }, { "epoch": 0.13009304110991282, "grad_norm": 7.159356110822697, "learning_rate": 9.739660205723935e-06, "loss": 18.1078, "step": 7117 }, { "epoch": 0.13011132030635933, "grad_norm": 7.163747630000917, "learning_rate": 9.739565925831304e-06, "loss": 17.7092, "step": 7118 }, { "epoch": 0.13012959950280586, "grad_norm": 8.932830491978832, "learning_rate": 9.739471629326904e-06, "loss": 18.3991, "step": 7119 }, { "epoch": 0.13014787869925237, "grad_norm": 7.314547240989594, "learning_rate": 9.73937731621107e-06, "loss": 17.7927, "step": 7120 }, { "epoch": 0.1301661578956989, "grad_norm": 7.43226134822261, "learning_rate": 9.73928298648413e-06, "loss": 18.1889, "step": 7121 }, { "epoch": 0.13018443709214542, "grad_norm": 6.246887550119452, "learning_rate": 9.73918864014641e-06, "loss": 17.4334, "step": 7122 }, { "epoch": 0.13020271628859195, "grad_norm": 6.4398750407548295, "learning_rate": 9.739094277198249e-06, "loss": 17.699, "step": 7123 }, { "epoch": 0.1302209954850385, "grad_norm": 6.860107349364731, "learning_rate": 9.738999897639973e-06, "loss": 17.8638, "step": 7124 }, { "epoch": 0.130239274681485, "grad_norm": 9.113527874042134, "learning_rate": 9.738905501471914e-06, "loss": 18.2573, "step": 7125 }, { "epoch": 0.13025755387793153, "grad_norm": 7.166982317668824, "learning_rate": 9.738811088694401e-06, "loss": 17.4839, "step": 7126 }, { "epoch": 0.13027583307437804, "grad_norm": 5.822801345459667, "learning_rate": 9.738716659307767e-06, "loss": 17.2673, "step": 7127 }, { "epoch": 0.13029411227082457, "grad_norm": 7.079865700002657, "learning_rate": 9.738622213312343e-06, "loss": 18.0549, "step": 7128 }, { "epoch": 0.1303123914672711, "grad_norm": 6.880181479310935, "learning_rate": 9.738527750708458e-06, "loss": 17.8285, "step": 7129 }, { "epoch": 0.13033067066371762, "grad_norm": 6.270793460597133, "learning_rate": 9.738433271496445e-06, "loss": 17.3228, "step": 7130 }, { "epoch": 0.13034894986016415, "grad_norm": 6.520184219414416, "learning_rate": 9.738338775676634e-06, "loss": 17.5676, "step": 7131 }, { "epoch": 0.13036722905661066, "grad_norm": 6.757513125586967, "learning_rate": 9.73824426324936e-06, "loss": 17.7184, "step": 7132 }, { "epoch": 0.1303855082530572, "grad_norm": 8.613481182446472, "learning_rate": 9.738149734214947e-06, "loss": 18.1121, "step": 7133 }, { "epoch": 0.13040378744950373, "grad_norm": 7.9340290228511705, "learning_rate": 9.738055188573731e-06, "loss": 18.2459, "step": 7134 }, { "epoch": 0.13042206664595024, "grad_norm": 7.213911441171221, "learning_rate": 9.737960626326044e-06, "loss": 17.7702, "step": 7135 }, { "epoch": 0.13044034584239678, "grad_norm": 7.1004579036797955, "learning_rate": 9.737866047472215e-06, "loss": 17.7682, "step": 7136 }, { "epoch": 0.13045862503884328, "grad_norm": 6.839243807506448, "learning_rate": 9.737771452012579e-06, "loss": 17.8289, "step": 7137 }, { "epoch": 0.13047690423528982, "grad_norm": 8.331470974201615, "learning_rate": 9.737676839947463e-06, "loss": 18.0984, "step": 7138 }, { "epoch": 0.13049518343173633, "grad_norm": 6.714694618846473, "learning_rate": 9.7375822112772e-06, "loss": 17.7931, "step": 7139 }, { "epoch": 0.13051346262818286, "grad_norm": 7.5737923207037205, "learning_rate": 9.737487566002126e-06, "loss": 17.827, "step": 7140 }, { "epoch": 0.1305317418246294, "grad_norm": 7.105649428705973, "learning_rate": 9.737392904122565e-06, "loss": 17.9188, "step": 7141 }, { "epoch": 0.1305500210210759, "grad_norm": 7.47606356144569, "learning_rate": 9.737298225638856e-06, "loss": 17.7877, "step": 7142 }, { "epoch": 0.13056830021752244, "grad_norm": 7.131386039255281, "learning_rate": 9.737203530551327e-06, "loss": 17.7167, "step": 7143 }, { "epoch": 0.13058657941396895, "grad_norm": 7.9805889492847255, "learning_rate": 9.73710881886031e-06, "loss": 18.4009, "step": 7144 }, { "epoch": 0.1306048586104155, "grad_norm": 7.448580722022492, "learning_rate": 9.737014090566138e-06, "loss": 17.9732, "step": 7145 }, { "epoch": 0.13062313780686202, "grad_norm": 7.4679063591013275, "learning_rate": 9.736919345669142e-06, "loss": 17.9908, "step": 7146 }, { "epoch": 0.13064141700330853, "grad_norm": 7.384603113967751, "learning_rate": 9.736824584169656e-06, "loss": 17.4913, "step": 7147 }, { "epoch": 0.13065969619975507, "grad_norm": 7.806417424820133, "learning_rate": 9.73672980606801e-06, "loss": 18.0279, "step": 7148 }, { "epoch": 0.13067797539620157, "grad_norm": 6.997974581913617, "learning_rate": 9.736635011364538e-06, "loss": 17.92, "step": 7149 }, { "epoch": 0.1306962545926481, "grad_norm": 7.478948956993409, "learning_rate": 9.736540200059572e-06, "loss": 17.796, "step": 7150 }, { "epoch": 0.13071453378909464, "grad_norm": 6.024958741105644, "learning_rate": 9.736445372153441e-06, "loss": 17.4914, "step": 7151 }, { "epoch": 0.13073281298554115, "grad_norm": 7.520078447002026, "learning_rate": 9.736350527646481e-06, "loss": 18.1718, "step": 7152 }, { "epoch": 0.1307510921819877, "grad_norm": 7.159633598905982, "learning_rate": 9.736255666539026e-06, "loss": 17.9506, "step": 7153 }, { "epoch": 0.1307693713784342, "grad_norm": 7.515554897738069, "learning_rate": 9.736160788831401e-06, "loss": 18.3908, "step": 7154 }, { "epoch": 0.13078765057488073, "grad_norm": 8.618024991764836, "learning_rate": 9.736065894523947e-06, "loss": 18.1717, "step": 7155 }, { "epoch": 0.13080592977132724, "grad_norm": 6.254382251240645, "learning_rate": 9.735970983616992e-06, "loss": 17.5342, "step": 7156 }, { "epoch": 0.13082420896777378, "grad_norm": 7.67952894743319, "learning_rate": 9.73587605611087e-06, "loss": 18.0999, "step": 7157 }, { "epoch": 0.1308424881642203, "grad_norm": 7.759082523709545, "learning_rate": 9.735781112005913e-06, "loss": 17.9696, "step": 7158 }, { "epoch": 0.13086076736066682, "grad_norm": 6.85593738247165, "learning_rate": 9.735686151302455e-06, "loss": 17.8862, "step": 7159 }, { "epoch": 0.13087904655711335, "grad_norm": 6.5318907715428685, "learning_rate": 9.735591174000828e-06, "loss": 17.4204, "step": 7160 }, { "epoch": 0.13089732575355986, "grad_norm": 7.0040496867619115, "learning_rate": 9.735496180101362e-06, "loss": 17.637, "step": 7161 }, { "epoch": 0.1309156049500064, "grad_norm": 5.7779722178513016, "learning_rate": 9.735401169604396e-06, "loss": 17.3363, "step": 7162 }, { "epoch": 0.13093388414645293, "grad_norm": 8.303287070527276, "learning_rate": 9.735306142510259e-06, "loss": 18.3085, "step": 7163 }, { "epoch": 0.13095216334289944, "grad_norm": 7.110065308053239, "learning_rate": 9.735211098819283e-06, "loss": 17.6274, "step": 7164 }, { "epoch": 0.13097044253934598, "grad_norm": 6.262788790202189, "learning_rate": 9.735116038531806e-06, "loss": 17.3397, "step": 7165 }, { "epoch": 0.13098872173579248, "grad_norm": 8.564939431946543, "learning_rate": 9.735020961648156e-06, "loss": 17.9833, "step": 7166 }, { "epoch": 0.13100700093223902, "grad_norm": 6.293016637755449, "learning_rate": 9.73492586816867e-06, "loss": 17.4136, "step": 7167 }, { "epoch": 0.13102528012868556, "grad_norm": 6.822811223665426, "learning_rate": 9.734830758093679e-06, "loss": 17.5242, "step": 7168 }, { "epoch": 0.13104355932513206, "grad_norm": 7.397815281411342, "learning_rate": 9.734735631423517e-06, "loss": 17.8625, "step": 7169 }, { "epoch": 0.1310618385215786, "grad_norm": 6.853785375824345, "learning_rate": 9.734640488158517e-06, "loss": 17.7668, "step": 7170 }, { "epoch": 0.1310801177180251, "grad_norm": 8.2618386970712, "learning_rate": 9.734545328299014e-06, "loss": 18.3031, "step": 7171 }, { "epoch": 0.13109839691447164, "grad_norm": 6.532337976922704, "learning_rate": 9.734450151845341e-06, "loss": 17.4171, "step": 7172 }, { "epoch": 0.13111667611091815, "grad_norm": 7.061745401124181, "learning_rate": 9.734354958797829e-06, "loss": 17.7711, "step": 7173 }, { "epoch": 0.1311349553073647, "grad_norm": 8.452499012569488, "learning_rate": 9.734259749156815e-06, "loss": 18.0384, "step": 7174 }, { "epoch": 0.13115323450381122, "grad_norm": 6.123261611912143, "learning_rate": 9.734164522922631e-06, "loss": 17.498, "step": 7175 }, { "epoch": 0.13117151370025773, "grad_norm": 7.517392231596765, "learning_rate": 9.73406928009561e-06, "loss": 17.7518, "step": 7176 }, { "epoch": 0.13118979289670427, "grad_norm": 7.742621226274448, "learning_rate": 9.733974020676089e-06, "loss": 18.1766, "step": 7177 }, { "epoch": 0.13120807209315077, "grad_norm": 7.387095435548084, "learning_rate": 9.7338787446644e-06, "loss": 17.6182, "step": 7178 }, { "epoch": 0.1312263512895973, "grad_norm": 7.446250508000315, "learning_rate": 9.733783452060874e-06, "loss": 18.1282, "step": 7179 }, { "epoch": 0.13124463048604385, "grad_norm": 7.5390636994354505, "learning_rate": 9.73368814286585e-06, "loss": 18.2006, "step": 7180 }, { "epoch": 0.13126290968249035, "grad_norm": 7.599898660645298, "learning_rate": 9.733592817079661e-06, "loss": 17.602, "step": 7181 }, { "epoch": 0.1312811888789369, "grad_norm": 6.262963191082417, "learning_rate": 9.733497474702638e-06, "loss": 17.0976, "step": 7182 }, { "epoch": 0.1312994680753834, "grad_norm": 6.14727093181255, "learning_rate": 9.733402115735117e-06, "loss": 17.4255, "step": 7183 }, { "epoch": 0.13131774727182993, "grad_norm": 5.7133178099579665, "learning_rate": 9.733306740177432e-06, "loss": 16.9744, "step": 7184 }, { "epoch": 0.13133602646827647, "grad_norm": 8.507051019811717, "learning_rate": 9.73321134802992e-06, "loss": 18.5356, "step": 7185 }, { "epoch": 0.13135430566472298, "grad_norm": 9.555240157396033, "learning_rate": 9.73311593929291e-06, "loss": 18.7287, "step": 7186 }, { "epoch": 0.1313725848611695, "grad_norm": 7.73195458476903, "learning_rate": 9.73302051396674e-06, "loss": 17.776, "step": 7187 }, { "epoch": 0.13139086405761602, "grad_norm": 6.3066851006271785, "learning_rate": 9.732925072051746e-06, "loss": 17.4364, "step": 7188 }, { "epoch": 0.13140914325406255, "grad_norm": 6.530512281586624, "learning_rate": 9.732829613548258e-06, "loss": 17.4735, "step": 7189 }, { "epoch": 0.13142742245050906, "grad_norm": 7.670285926957407, "learning_rate": 9.732734138456614e-06, "loss": 18.0228, "step": 7190 }, { "epoch": 0.1314457016469556, "grad_norm": 6.881575360490226, "learning_rate": 9.732638646777148e-06, "loss": 17.6759, "step": 7191 }, { "epoch": 0.13146398084340213, "grad_norm": 9.05563547716694, "learning_rate": 9.732543138510193e-06, "loss": 18.2275, "step": 7192 }, { "epoch": 0.13148226003984864, "grad_norm": 7.306381515657076, "learning_rate": 9.732447613656087e-06, "loss": 17.9175, "step": 7193 }, { "epoch": 0.13150053923629518, "grad_norm": 9.280282371695481, "learning_rate": 9.732352072215162e-06, "loss": 17.8701, "step": 7194 }, { "epoch": 0.13151881843274169, "grad_norm": 7.755610675155056, "learning_rate": 9.73225651418775e-06, "loss": 17.9723, "step": 7195 }, { "epoch": 0.13153709762918822, "grad_norm": 5.94216702467463, "learning_rate": 9.732160939574194e-06, "loss": 17.276, "step": 7196 }, { "epoch": 0.13155537682563476, "grad_norm": 5.8325769245965935, "learning_rate": 9.732065348374821e-06, "loss": 17.3216, "step": 7197 }, { "epoch": 0.13157365602208126, "grad_norm": 7.8918461462941405, "learning_rate": 9.731969740589972e-06, "loss": 17.9062, "step": 7198 }, { "epoch": 0.1315919352185278, "grad_norm": 5.9828504034800085, "learning_rate": 9.731874116219981e-06, "loss": 17.3003, "step": 7199 }, { "epoch": 0.1316102144149743, "grad_norm": 6.271695218333179, "learning_rate": 9.73177847526518e-06, "loss": 17.408, "step": 7200 }, { "epoch": 0.13162849361142084, "grad_norm": 6.884913084335879, "learning_rate": 9.731682817725907e-06, "loss": 17.5766, "step": 7201 }, { "epoch": 0.13164677280786738, "grad_norm": 6.887401816460929, "learning_rate": 9.731587143602494e-06, "loss": 17.8024, "step": 7202 }, { "epoch": 0.1316650520043139, "grad_norm": 6.825550600345836, "learning_rate": 9.731491452895281e-06, "loss": 17.7197, "step": 7203 }, { "epoch": 0.13168333120076042, "grad_norm": 8.208850888222933, "learning_rate": 9.7313957456046e-06, "loss": 18.3821, "step": 7204 }, { "epoch": 0.13170161039720693, "grad_norm": 8.074895936854027, "learning_rate": 9.731300021730787e-06, "loss": 18.7384, "step": 7205 }, { "epoch": 0.13171988959365347, "grad_norm": 7.821903652140027, "learning_rate": 9.731204281274178e-06, "loss": 18.4072, "step": 7206 }, { "epoch": 0.13173816879009997, "grad_norm": 7.682609503179762, "learning_rate": 9.73110852423511e-06, "loss": 17.8128, "step": 7207 }, { "epoch": 0.1317564479865465, "grad_norm": 6.782940292836674, "learning_rate": 9.731012750613918e-06, "loss": 17.7275, "step": 7208 }, { "epoch": 0.13177472718299305, "grad_norm": 6.679171848756854, "learning_rate": 9.730916960410934e-06, "loss": 17.4771, "step": 7209 }, { "epoch": 0.13179300637943955, "grad_norm": 10.636301648491877, "learning_rate": 9.730821153626497e-06, "loss": 17.6422, "step": 7210 }, { "epoch": 0.1318112855758861, "grad_norm": 7.2774302201233985, "learning_rate": 9.730725330260945e-06, "loss": 17.6334, "step": 7211 }, { "epoch": 0.1318295647723326, "grad_norm": 9.41703242932954, "learning_rate": 9.730629490314609e-06, "loss": 18.422, "step": 7212 }, { "epoch": 0.13184784396877913, "grad_norm": 6.869794181421306, "learning_rate": 9.730533633787827e-06, "loss": 17.5357, "step": 7213 }, { "epoch": 0.13186612316522567, "grad_norm": 8.155327972728386, "learning_rate": 9.730437760680936e-06, "loss": 18.3217, "step": 7214 }, { "epoch": 0.13188440236167218, "grad_norm": 7.624147658658768, "learning_rate": 9.73034187099427e-06, "loss": 17.7722, "step": 7215 }, { "epoch": 0.1319026815581187, "grad_norm": 7.420377534640088, "learning_rate": 9.730245964728167e-06, "loss": 17.9153, "step": 7216 }, { "epoch": 0.13192096075456522, "grad_norm": 8.872178765800202, "learning_rate": 9.730150041882962e-06, "loss": 18.5382, "step": 7217 }, { "epoch": 0.13193923995101176, "grad_norm": 7.320922652635634, "learning_rate": 9.730054102458992e-06, "loss": 18.0761, "step": 7218 }, { "epoch": 0.1319575191474583, "grad_norm": 7.280140268895857, "learning_rate": 9.729958146456593e-06, "loss": 17.4843, "step": 7219 }, { "epoch": 0.1319757983439048, "grad_norm": 7.547182011375449, "learning_rate": 9.729862173876102e-06, "loss": 17.7176, "step": 7220 }, { "epoch": 0.13199407754035133, "grad_norm": 7.300794574100374, "learning_rate": 9.729766184717853e-06, "loss": 17.5465, "step": 7221 }, { "epoch": 0.13201235673679784, "grad_norm": 7.611837634531434, "learning_rate": 9.729670178982184e-06, "loss": 17.9243, "step": 7222 }, { "epoch": 0.13203063593324438, "grad_norm": 7.081527995399049, "learning_rate": 9.729574156669433e-06, "loss": 17.7627, "step": 7223 }, { "epoch": 0.13204891512969089, "grad_norm": 8.915259132218761, "learning_rate": 9.729478117779933e-06, "loss": 18.1401, "step": 7224 }, { "epoch": 0.13206719432613742, "grad_norm": 8.292487473290121, "learning_rate": 9.729382062314023e-06, "loss": 18.0996, "step": 7225 }, { "epoch": 0.13208547352258396, "grad_norm": 6.296746454594278, "learning_rate": 9.72928599027204e-06, "loss": 17.5431, "step": 7226 }, { "epoch": 0.13210375271903047, "grad_norm": 7.2671164091935285, "learning_rate": 9.72918990165432e-06, "loss": 17.8938, "step": 7227 }, { "epoch": 0.132122031915477, "grad_norm": 6.9395733615132835, "learning_rate": 9.7290937964612e-06, "loss": 17.7086, "step": 7228 }, { "epoch": 0.1321403111119235, "grad_norm": 7.3315954949805295, "learning_rate": 9.728997674693015e-06, "loss": 17.3778, "step": 7229 }, { "epoch": 0.13215859030837004, "grad_norm": 7.9153149440768855, "learning_rate": 9.728901536350106e-06, "loss": 18.2457, "step": 7230 }, { "epoch": 0.13217686950481658, "grad_norm": 6.8914822685486925, "learning_rate": 9.728805381432805e-06, "loss": 17.5621, "step": 7231 }, { "epoch": 0.1321951487012631, "grad_norm": 8.83534302114429, "learning_rate": 9.728709209941453e-06, "loss": 18.7026, "step": 7232 }, { "epoch": 0.13221342789770962, "grad_norm": 6.966925026227877, "learning_rate": 9.728613021876385e-06, "loss": 17.4717, "step": 7233 }, { "epoch": 0.13223170709415613, "grad_norm": 6.289173813053085, "learning_rate": 9.728516817237939e-06, "loss": 17.4835, "step": 7234 }, { "epoch": 0.13224998629060267, "grad_norm": 6.135824237560181, "learning_rate": 9.72842059602645e-06, "loss": 17.3179, "step": 7235 }, { "epoch": 0.1322682654870492, "grad_norm": 6.6226126286860865, "learning_rate": 9.72832435824226e-06, "loss": 17.4737, "step": 7236 }, { "epoch": 0.1322865446834957, "grad_norm": 6.62079242021924, "learning_rate": 9.728228103885702e-06, "loss": 17.5764, "step": 7237 }, { "epoch": 0.13230482387994225, "grad_norm": 10.527562765818056, "learning_rate": 9.728131832957115e-06, "loss": 17.4243, "step": 7238 }, { "epoch": 0.13232310307638875, "grad_norm": 7.049840722526844, "learning_rate": 9.728035545456837e-06, "loss": 17.6683, "step": 7239 }, { "epoch": 0.1323413822728353, "grad_norm": 5.980215077656075, "learning_rate": 9.727939241385203e-06, "loss": 17.0616, "step": 7240 }, { "epoch": 0.1323596614692818, "grad_norm": 7.133091112035594, "learning_rate": 9.727842920742554e-06, "loss": 17.6764, "step": 7241 }, { "epoch": 0.13237794066572833, "grad_norm": 7.454956226368991, "learning_rate": 9.727746583529225e-06, "loss": 18.0583, "step": 7242 }, { "epoch": 0.13239621986217487, "grad_norm": 8.062267426384313, "learning_rate": 9.727650229745554e-06, "loss": 18.0237, "step": 7243 }, { "epoch": 0.13241449905862138, "grad_norm": 7.5373814766321, "learning_rate": 9.727553859391881e-06, "loss": 17.7991, "step": 7244 }, { "epoch": 0.1324327782550679, "grad_norm": 7.597588160329692, "learning_rate": 9.72745747246854e-06, "loss": 18.081, "step": 7245 }, { "epoch": 0.13245105745151442, "grad_norm": 6.602527071310561, "learning_rate": 9.727361068975871e-06, "loss": 17.7052, "step": 7246 }, { "epoch": 0.13246933664796096, "grad_norm": 6.3018292516121575, "learning_rate": 9.727264648914212e-06, "loss": 17.4789, "step": 7247 }, { "epoch": 0.1324876158444075, "grad_norm": 6.348523634478017, "learning_rate": 9.727168212283902e-06, "loss": 17.2301, "step": 7248 }, { "epoch": 0.132505895040854, "grad_norm": 7.582114574713264, "learning_rate": 9.727071759085275e-06, "loss": 18.2832, "step": 7249 }, { "epoch": 0.13252417423730053, "grad_norm": 6.408570717052727, "learning_rate": 9.726975289318674e-06, "loss": 17.2156, "step": 7250 }, { "epoch": 0.13254245343374704, "grad_norm": 6.670369978913939, "learning_rate": 9.726878802984434e-06, "loss": 17.6147, "step": 7251 }, { "epoch": 0.13256073263019358, "grad_norm": 8.150834883500712, "learning_rate": 9.726782300082893e-06, "loss": 17.8667, "step": 7252 }, { "epoch": 0.13257901182664011, "grad_norm": 8.54348521469152, "learning_rate": 9.72668578061439e-06, "loss": 19.1827, "step": 7253 }, { "epoch": 0.13259729102308662, "grad_norm": 7.161062456562297, "learning_rate": 9.726589244579265e-06, "loss": 17.9603, "step": 7254 }, { "epoch": 0.13261557021953316, "grad_norm": 6.987094257255618, "learning_rate": 9.726492691977856e-06, "loss": 17.6345, "step": 7255 }, { "epoch": 0.13263384941597967, "grad_norm": 7.33513881872116, "learning_rate": 9.726396122810497e-06, "loss": 17.6273, "step": 7256 }, { "epoch": 0.1326521286124262, "grad_norm": 5.771420058036647, "learning_rate": 9.726299537077533e-06, "loss": 17.1635, "step": 7257 }, { "epoch": 0.1326704078088727, "grad_norm": 5.634709424961658, "learning_rate": 9.726202934779297e-06, "loss": 16.9322, "step": 7258 }, { "epoch": 0.13268868700531924, "grad_norm": 6.113352690103021, "learning_rate": 9.726106315916131e-06, "loss": 17.2111, "step": 7259 }, { "epoch": 0.13270696620176578, "grad_norm": 7.089025293622658, "learning_rate": 9.726009680488371e-06, "loss": 17.8984, "step": 7260 }, { "epoch": 0.1327252453982123, "grad_norm": 6.380160783041163, "learning_rate": 9.725913028496359e-06, "loss": 17.4098, "step": 7261 }, { "epoch": 0.13274352459465882, "grad_norm": 7.805516414406185, "learning_rate": 9.72581635994043e-06, "loss": 18.1524, "step": 7262 }, { "epoch": 0.13276180379110533, "grad_norm": 6.972621356864681, "learning_rate": 9.725719674820926e-06, "loss": 17.4139, "step": 7263 }, { "epoch": 0.13278008298755187, "grad_norm": 6.571282474587021, "learning_rate": 9.725622973138185e-06, "loss": 17.2932, "step": 7264 }, { "epoch": 0.1327983621839984, "grad_norm": 7.598398266168324, "learning_rate": 9.725526254892544e-06, "loss": 18.0883, "step": 7265 }, { "epoch": 0.1328166413804449, "grad_norm": 8.096978763128138, "learning_rate": 9.725429520084345e-06, "loss": 18.2204, "step": 7266 }, { "epoch": 0.13283492057689145, "grad_norm": 7.3218974170460225, "learning_rate": 9.725332768713924e-06, "loss": 18.1063, "step": 7267 }, { "epoch": 0.13285319977333795, "grad_norm": 6.885678643390233, "learning_rate": 9.725236000781623e-06, "loss": 17.9394, "step": 7268 }, { "epoch": 0.1328714789697845, "grad_norm": 6.873700299264502, "learning_rate": 9.72513921628778e-06, "loss": 17.9246, "step": 7269 }, { "epoch": 0.13288975816623103, "grad_norm": 6.7128758381037725, "learning_rate": 9.725042415232734e-06, "loss": 17.5954, "step": 7270 }, { "epoch": 0.13290803736267753, "grad_norm": 12.04240883480532, "learning_rate": 9.724945597616824e-06, "loss": 17.5589, "step": 7271 }, { "epoch": 0.13292631655912407, "grad_norm": 5.888560453507167, "learning_rate": 9.724848763440389e-06, "loss": 17.2392, "step": 7272 }, { "epoch": 0.13294459575557058, "grad_norm": 5.959386286115697, "learning_rate": 9.72475191270377e-06, "loss": 17.0283, "step": 7273 }, { "epoch": 0.1329628749520171, "grad_norm": 7.266865757445522, "learning_rate": 9.724655045407306e-06, "loss": 18.0163, "step": 7274 }, { "epoch": 0.13298115414846362, "grad_norm": 8.959452733145644, "learning_rate": 9.724558161551335e-06, "loss": 18.9249, "step": 7275 }, { "epoch": 0.13299943334491016, "grad_norm": 7.953370336785721, "learning_rate": 9.724461261136198e-06, "loss": 18.1367, "step": 7276 }, { "epoch": 0.1330177125413567, "grad_norm": 6.98834233648399, "learning_rate": 9.724364344162234e-06, "loss": 18.0126, "step": 7277 }, { "epoch": 0.1330359917378032, "grad_norm": 8.68664005119283, "learning_rate": 9.724267410629785e-06, "loss": 18.7515, "step": 7278 }, { "epoch": 0.13305427093424974, "grad_norm": 7.220871982516756, "learning_rate": 9.724170460539185e-06, "loss": 17.8004, "step": 7279 }, { "epoch": 0.13307255013069624, "grad_norm": 7.41225811833119, "learning_rate": 9.72407349389078e-06, "loss": 17.9176, "step": 7280 }, { "epoch": 0.13309082932714278, "grad_norm": 9.264816320895811, "learning_rate": 9.723976510684907e-06, "loss": 18.2521, "step": 7281 }, { "epoch": 0.13310910852358931, "grad_norm": 6.796793887604724, "learning_rate": 9.723879510921904e-06, "loss": 17.8195, "step": 7282 }, { "epoch": 0.13312738772003582, "grad_norm": 6.742387205753071, "learning_rate": 9.723782494602117e-06, "loss": 17.3863, "step": 7283 }, { "epoch": 0.13314566691648236, "grad_norm": 7.413465376909987, "learning_rate": 9.72368546172588e-06, "loss": 17.8029, "step": 7284 }, { "epoch": 0.13316394611292887, "grad_norm": 7.306590371199734, "learning_rate": 9.723588412293536e-06, "loss": 17.8976, "step": 7285 }, { "epoch": 0.1331822253093754, "grad_norm": 7.805619937583306, "learning_rate": 9.723491346305426e-06, "loss": 18.1022, "step": 7286 }, { "epoch": 0.13320050450582194, "grad_norm": 9.380610587391823, "learning_rate": 9.723394263761885e-06, "loss": 18.6342, "step": 7287 }, { "epoch": 0.13321878370226845, "grad_norm": 7.2669165607591655, "learning_rate": 9.72329716466326e-06, "loss": 17.7078, "step": 7288 }, { "epoch": 0.13323706289871498, "grad_norm": 6.1314312177878065, "learning_rate": 9.723200049009886e-06, "loss": 17.2683, "step": 7289 }, { "epoch": 0.1332553420951615, "grad_norm": 7.459467060270579, "learning_rate": 9.723102916802108e-06, "loss": 17.8149, "step": 7290 }, { "epoch": 0.13327362129160802, "grad_norm": 5.240316852090448, "learning_rate": 9.723005768040264e-06, "loss": 16.8855, "step": 7291 }, { "epoch": 0.13329190048805453, "grad_norm": 6.234999252504696, "learning_rate": 9.722908602724693e-06, "loss": 17.2466, "step": 7292 }, { "epoch": 0.13331017968450107, "grad_norm": 7.8177911789367, "learning_rate": 9.722811420855738e-06, "loss": 18.1054, "step": 7293 }, { "epoch": 0.1333284588809476, "grad_norm": 7.301076125871522, "learning_rate": 9.722714222433738e-06, "loss": 17.9058, "step": 7294 }, { "epoch": 0.1333467380773941, "grad_norm": 7.592960509600471, "learning_rate": 9.722617007459037e-06, "loss": 18.154, "step": 7295 }, { "epoch": 0.13336501727384065, "grad_norm": 6.44851950623422, "learning_rate": 9.72251977593197e-06, "loss": 17.6372, "step": 7296 }, { "epoch": 0.13338329647028715, "grad_norm": 6.131893496868285, "learning_rate": 9.722422527852883e-06, "loss": 17.4133, "step": 7297 }, { "epoch": 0.1334015756667337, "grad_norm": 7.4913639236958, "learning_rate": 9.722325263222114e-06, "loss": 17.7869, "step": 7298 }, { "epoch": 0.13341985486318023, "grad_norm": 8.281805478945252, "learning_rate": 9.722227982040004e-06, "loss": 18.1848, "step": 7299 }, { "epoch": 0.13343813405962673, "grad_norm": 8.013137361924885, "learning_rate": 9.722130684306897e-06, "loss": 18.0568, "step": 7300 }, { "epoch": 0.13345641325607327, "grad_norm": 6.442051320790764, "learning_rate": 9.722033370023129e-06, "loss": 17.4183, "step": 7301 }, { "epoch": 0.13347469245251978, "grad_norm": 7.030411239913883, "learning_rate": 9.721936039189046e-06, "loss": 17.4873, "step": 7302 }, { "epoch": 0.1334929716489663, "grad_norm": 8.713878967086993, "learning_rate": 9.721838691804986e-06, "loss": 18.4822, "step": 7303 }, { "epoch": 0.13351125084541285, "grad_norm": 7.3964301208580014, "learning_rate": 9.721741327871291e-06, "loss": 17.7409, "step": 7304 }, { "epoch": 0.13352953004185936, "grad_norm": 6.409802596344187, "learning_rate": 9.721643947388304e-06, "loss": 17.4356, "step": 7305 }, { "epoch": 0.1335478092383059, "grad_norm": 6.553199538093425, "learning_rate": 9.721546550356362e-06, "loss": 17.635, "step": 7306 }, { "epoch": 0.1335660884347524, "grad_norm": 7.855535169751103, "learning_rate": 9.721449136775811e-06, "loss": 17.5149, "step": 7307 }, { "epoch": 0.13358436763119894, "grad_norm": 7.926555715016358, "learning_rate": 9.72135170664699e-06, "loss": 17.9294, "step": 7308 }, { "epoch": 0.13360264682764544, "grad_norm": 8.247210705388246, "learning_rate": 9.721254259970241e-06, "loss": 17.9499, "step": 7309 }, { "epoch": 0.13362092602409198, "grad_norm": 6.916463039840437, "learning_rate": 9.721156796745905e-06, "loss": 17.5853, "step": 7310 }, { "epoch": 0.13363920522053852, "grad_norm": 8.408292063266318, "learning_rate": 9.721059316974324e-06, "loss": 17.7235, "step": 7311 }, { "epoch": 0.13365748441698502, "grad_norm": 6.352660543096414, "learning_rate": 9.72096182065584e-06, "loss": 17.5232, "step": 7312 }, { "epoch": 0.13367576361343156, "grad_norm": 7.349600166175085, "learning_rate": 9.720864307790796e-06, "loss": 18.1619, "step": 7313 }, { "epoch": 0.13369404280987807, "grad_norm": 7.874987001024496, "learning_rate": 9.720766778379531e-06, "loss": 18.2057, "step": 7314 }, { "epoch": 0.1337123220063246, "grad_norm": 7.334889029040655, "learning_rate": 9.720669232422388e-06, "loss": 17.6721, "step": 7315 }, { "epoch": 0.13373060120277114, "grad_norm": 6.819419836540446, "learning_rate": 9.72057166991971e-06, "loss": 17.7121, "step": 7316 }, { "epoch": 0.13374888039921765, "grad_norm": 6.804240942531425, "learning_rate": 9.720474090871836e-06, "loss": 17.8763, "step": 7317 }, { "epoch": 0.13376715959566418, "grad_norm": 5.970110473439147, "learning_rate": 9.720376495279111e-06, "loss": 17.3412, "step": 7318 }, { "epoch": 0.1337854387921107, "grad_norm": 7.229349135900173, "learning_rate": 9.720278883141876e-06, "loss": 17.9148, "step": 7319 }, { "epoch": 0.13380371798855722, "grad_norm": 8.226883619932043, "learning_rate": 9.720181254460473e-06, "loss": 18.4782, "step": 7320 }, { "epoch": 0.13382199718500376, "grad_norm": 7.727062975450835, "learning_rate": 9.720083609235244e-06, "loss": 18.0103, "step": 7321 }, { "epoch": 0.13384027638145027, "grad_norm": 7.473018375793038, "learning_rate": 9.719985947466532e-06, "loss": 17.8981, "step": 7322 }, { "epoch": 0.1338585555778968, "grad_norm": 6.779991513233267, "learning_rate": 9.719888269154679e-06, "loss": 17.7578, "step": 7323 }, { "epoch": 0.1338768347743433, "grad_norm": 8.484725122790097, "learning_rate": 9.719790574300026e-06, "loss": 18.3271, "step": 7324 }, { "epoch": 0.13389511397078985, "grad_norm": 6.646616396806981, "learning_rate": 9.719692862902919e-06, "loss": 17.3037, "step": 7325 }, { "epoch": 0.13391339316723636, "grad_norm": 7.235995780937685, "learning_rate": 9.719595134963694e-06, "loss": 17.7542, "step": 7326 }, { "epoch": 0.1339316723636829, "grad_norm": 7.072527221787121, "learning_rate": 9.719497390482701e-06, "loss": 17.8879, "step": 7327 }, { "epoch": 0.13394995156012943, "grad_norm": 6.8811664976307645, "learning_rate": 9.71939962946028e-06, "loss": 17.525, "step": 7328 }, { "epoch": 0.13396823075657593, "grad_norm": 8.695005723193574, "learning_rate": 9.71930185189677e-06, "loss": 18.2196, "step": 7329 }, { "epoch": 0.13398650995302247, "grad_norm": 6.1970851068351935, "learning_rate": 9.719204057792517e-06, "loss": 17.4303, "step": 7330 }, { "epoch": 0.13400478914946898, "grad_norm": 7.213331679947561, "learning_rate": 9.719106247147864e-06, "loss": 17.6202, "step": 7331 }, { "epoch": 0.1340230683459155, "grad_norm": 5.970498912276426, "learning_rate": 9.719008419963153e-06, "loss": 17.2458, "step": 7332 }, { "epoch": 0.13404134754236205, "grad_norm": 7.097097701811893, "learning_rate": 9.718910576238728e-06, "loss": 17.7279, "step": 7333 }, { "epoch": 0.13405962673880856, "grad_norm": 7.759600890157557, "learning_rate": 9.71881271597493e-06, "loss": 17.9428, "step": 7334 }, { "epoch": 0.1340779059352551, "grad_norm": 6.609218161336705, "learning_rate": 9.718714839172103e-06, "loss": 17.6602, "step": 7335 }, { "epoch": 0.1340961851317016, "grad_norm": 7.715512359303132, "learning_rate": 9.71861694583059e-06, "loss": 18.1546, "step": 7336 }, { "epoch": 0.13411446432814814, "grad_norm": 7.113875413837293, "learning_rate": 9.718519035950733e-06, "loss": 17.6451, "step": 7337 }, { "epoch": 0.13413274352459467, "grad_norm": 6.7710599191576515, "learning_rate": 9.718421109532879e-06, "loss": 17.474, "step": 7338 }, { "epoch": 0.13415102272104118, "grad_norm": 7.337391508280774, "learning_rate": 9.718323166577367e-06, "loss": 17.5065, "step": 7339 }, { "epoch": 0.13416930191748772, "grad_norm": 6.347988914894903, "learning_rate": 9.718225207084539e-06, "loss": 17.6424, "step": 7340 }, { "epoch": 0.13418758111393422, "grad_norm": 7.442010441490582, "learning_rate": 9.718127231054745e-06, "loss": 18.2821, "step": 7341 }, { "epoch": 0.13420586031038076, "grad_norm": 7.816204751329472, "learning_rate": 9.718029238488322e-06, "loss": 18.1054, "step": 7342 }, { "epoch": 0.13422413950682727, "grad_norm": 6.817636326102619, "learning_rate": 9.717931229385618e-06, "loss": 17.7242, "step": 7343 }, { "epoch": 0.1342424187032738, "grad_norm": 7.613804038438869, "learning_rate": 9.717833203746974e-06, "loss": 17.922, "step": 7344 }, { "epoch": 0.13426069789972034, "grad_norm": 6.112081337830192, "learning_rate": 9.717735161572732e-06, "loss": 17.2962, "step": 7345 }, { "epoch": 0.13427897709616685, "grad_norm": 6.61055652895237, "learning_rate": 9.71763710286324e-06, "loss": 17.5574, "step": 7346 }, { "epoch": 0.13429725629261338, "grad_norm": 7.126469762062783, "learning_rate": 9.717539027618837e-06, "loss": 17.8764, "step": 7347 }, { "epoch": 0.1343155354890599, "grad_norm": 6.825928999519179, "learning_rate": 9.71744093583987e-06, "loss": 17.5086, "step": 7348 }, { "epoch": 0.13433381468550643, "grad_norm": 6.6660706796394, "learning_rate": 9.717342827526684e-06, "loss": 17.8061, "step": 7349 }, { "epoch": 0.13435209388195296, "grad_norm": 9.300381817120178, "learning_rate": 9.717244702679618e-06, "loss": 17.9765, "step": 7350 }, { "epoch": 0.13437037307839947, "grad_norm": 6.728624399804422, "learning_rate": 9.71714656129902e-06, "loss": 17.5401, "step": 7351 }, { "epoch": 0.134388652274846, "grad_norm": 6.380027503238772, "learning_rate": 9.717048403385231e-06, "loss": 17.2511, "step": 7352 }, { "epoch": 0.1344069314712925, "grad_norm": 7.939909625296418, "learning_rate": 9.7169502289386e-06, "loss": 18.3813, "step": 7353 }, { "epoch": 0.13442521066773905, "grad_norm": 5.937995054653439, "learning_rate": 9.716852037959465e-06, "loss": 17.3262, "step": 7354 }, { "epoch": 0.13444348986418558, "grad_norm": 6.869378397025138, "learning_rate": 9.716753830448174e-06, "loss": 17.6853, "step": 7355 }, { "epoch": 0.1344617690606321, "grad_norm": 6.967266107514495, "learning_rate": 9.71665560640507e-06, "loss": 17.6118, "step": 7356 }, { "epoch": 0.13448004825707863, "grad_norm": 6.354453718530233, "learning_rate": 9.716557365830496e-06, "loss": 17.3243, "step": 7357 }, { "epoch": 0.13449832745352513, "grad_norm": 7.8805286975140545, "learning_rate": 9.716459108724799e-06, "loss": 18.221, "step": 7358 }, { "epoch": 0.13451660664997167, "grad_norm": 7.128020043174866, "learning_rate": 9.716360835088324e-06, "loss": 17.9753, "step": 7359 }, { "epoch": 0.13453488584641818, "grad_norm": 6.383961886304486, "learning_rate": 9.716262544921411e-06, "loss": 17.5263, "step": 7360 }, { "epoch": 0.13455316504286471, "grad_norm": 7.271209331479864, "learning_rate": 9.716164238224406e-06, "loss": 18.0944, "step": 7361 }, { "epoch": 0.13457144423931125, "grad_norm": 6.937910866916811, "learning_rate": 9.716065914997657e-06, "loss": 17.8308, "step": 7362 }, { "epoch": 0.13458972343575776, "grad_norm": 6.288875662832325, "learning_rate": 9.715967575241506e-06, "loss": 17.503, "step": 7363 }, { "epoch": 0.1346080026322043, "grad_norm": 7.058704016267175, "learning_rate": 9.715869218956297e-06, "loss": 17.6936, "step": 7364 }, { "epoch": 0.1346262818286508, "grad_norm": 6.839820264143985, "learning_rate": 9.715770846142376e-06, "loss": 17.9178, "step": 7365 }, { "epoch": 0.13464456102509734, "grad_norm": 11.633991662685553, "learning_rate": 9.715672456800087e-06, "loss": 19.238, "step": 7366 }, { "epoch": 0.13466284022154387, "grad_norm": 7.89246996117323, "learning_rate": 9.715574050929775e-06, "loss": 18.4244, "step": 7367 }, { "epoch": 0.13468111941799038, "grad_norm": 8.79725255643353, "learning_rate": 9.715475628531785e-06, "loss": 18.4606, "step": 7368 }, { "epoch": 0.13469939861443692, "grad_norm": 7.29585189884504, "learning_rate": 9.715377189606462e-06, "loss": 17.928, "step": 7369 }, { "epoch": 0.13471767781088342, "grad_norm": 7.865076076181476, "learning_rate": 9.715278734154155e-06, "loss": 17.7899, "step": 7370 }, { "epoch": 0.13473595700732996, "grad_norm": 6.006823076315517, "learning_rate": 9.715180262175202e-06, "loss": 17.1828, "step": 7371 }, { "epoch": 0.1347542362037765, "grad_norm": 6.490649690145237, "learning_rate": 9.715081773669949e-06, "loss": 17.432, "step": 7372 }, { "epoch": 0.134772515400223, "grad_norm": 8.093132151465447, "learning_rate": 9.714983268638747e-06, "loss": 18.1537, "step": 7373 }, { "epoch": 0.13479079459666954, "grad_norm": 7.385779412925701, "learning_rate": 9.714884747081937e-06, "loss": 18.2123, "step": 7374 }, { "epoch": 0.13480907379311605, "grad_norm": 6.923081296403451, "learning_rate": 9.714786208999864e-06, "loss": 17.7324, "step": 7375 }, { "epoch": 0.13482735298956258, "grad_norm": 8.347399195705641, "learning_rate": 9.714687654392876e-06, "loss": 18.5434, "step": 7376 }, { "epoch": 0.1348456321860091, "grad_norm": 6.52387836191315, "learning_rate": 9.714589083261316e-06, "loss": 17.4424, "step": 7377 }, { "epoch": 0.13486391138245563, "grad_norm": 7.227908509359091, "learning_rate": 9.714490495605531e-06, "loss": 17.6268, "step": 7378 }, { "epoch": 0.13488219057890216, "grad_norm": 7.676719661969092, "learning_rate": 9.714391891425866e-06, "loss": 17.5231, "step": 7379 }, { "epoch": 0.13490046977534867, "grad_norm": 7.507731889119987, "learning_rate": 9.714293270722665e-06, "loss": 17.7889, "step": 7380 }, { "epoch": 0.1349187489717952, "grad_norm": 7.489507476864272, "learning_rate": 9.714194633496276e-06, "loss": 17.8013, "step": 7381 }, { "epoch": 0.1349370281682417, "grad_norm": 7.1894071929284715, "learning_rate": 9.714095979747044e-06, "loss": 17.9802, "step": 7382 }, { "epoch": 0.13495530736468825, "grad_norm": 11.852791486406433, "learning_rate": 9.713997309475316e-06, "loss": 17.1034, "step": 7383 }, { "epoch": 0.13497358656113478, "grad_norm": 8.161722880080935, "learning_rate": 9.713898622681436e-06, "loss": 18.4728, "step": 7384 }, { "epoch": 0.1349918657575813, "grad_norm": 7.143327922896236, "learning_rate": 9.71379991936575e-06, "loss": 17.7502, "step": 7385 }, { "epoch": 0.13501014495402783, "grad_norm": 5.735791390923804, "learning_rate": 9.713701199528602e-06, "loss": 17.2922, "step": 7386 }, { "epoch": 0.13502842415047434, "grad_norm": 6.952909023256605, "learning_rate": 9.713602463170345e-06, "loss": 17.6708, "step": 7387 }, { "epoch": 0.13504670334692087, "grad_norm": 7.659718136904871, "learning_rate": 9.713503710291317e-06, "loss": 17.9768, "step": 7388 }, { "epoch": 0.1350649825433674, "grad_norm": 7.496735274558631, "learning_rate": 9.713404940891867e-06, "loss": 17.7919, "step": 7389 }, { "epoch": 0.13508326173981391, "grad_norm": 7.251590006579304, "learning_rate": 9.713306154972344e-06, "loss": 17.8879, "step": 7390 }, { "epoch": 0.13510154093626045, "grad_norm": 8.674042320462984, "learning_rate": 9.71320735253309e-06, "loss": 18.4063, "step": 7391 }, { "epoch": 0.13511982013270696, "grad_norm": 8.016817300470889, "learning_rate": 9.713108533574455e-06, "loss": 18.3258, "step": 7392 }, { "epoch": 0.1351380993291535, "grad_norm": 8.244659610467385, "learning_rate": 9.713009698096782e-06, "loss": 18.2969, "step": 7393 }, { "epoch": 0.1351563785256, "grad_norm": 6.014535679292738, "learning_rate": 9.71291084610042e-06, "loss": 17.2641, "step": 7394 }, { "epoch": 0.13517465772204654, "grad_norm": 6.153890770683312, "learning_rate": 9.712811977585715e-06, "loss": 17.3391, "step": 7395 }, { "epoch": 0.13519293691849307, "grad_norm": 7.288188074912993, "learning_rate": 9.712713092553012e-06, "loss": 18.0131, "step": 7396 }, { "epoch": 0.13521121611493958, "grad_norm": 6.553780700162622, "learning_rate": 9.712614191002657e-06, "loss": 17.7899, "step": 7397 }, { "epoch": 0.13522949531138612, "grad_norm": 6.867287067987206, "learning_rate": 9.712515272935e-06, "loss": 17.8165, "step": 7398 }, { "epoch": 0.13524777450783262, "grad_norm": 6.399640444184357, "learning_rate": 9.712416338350386e-06, "loss": 17.4553, "step": 7399 }, { "epoch": 0.13526605370427916, "grad_norm": 8.089816224610908, "learning_rate": 9.712317387249162e-06, "loss": 17.7951, "step": 7400 }, { "epoch": 0.1352843329007257, "grad_norm": 7.140421054078072, "learning_rate": 9.712218419631673e-06, "loss": 17.9161, "step": 7401 }, { "epoch": 0.1353026120971722, "grad_norm": 6.5962874189656855, "learning_rate": 9.712119435498268e-06, "loss": 17.5488, "step": 7402 }, { "epoch": 0.13532089129361874, "grad_norm": 6.718803772161681, "learning_rate": 9.712020434849294e-06, "loss": 17.8015, "step": 7403 }, { "epoch": 0.13533917049006525, "grad_norm": 7.2415029216503575, "learning_rate": 9.711921417685097e-06, "loss": 18.0733, "step": 7404 }, { "epoch": 0.13535744968651178, "grad_norm": 7.816229348832906, "learning_rate": 9.711822384006025e-06, "loss": 17.9967, "step": 7405 }, { "epoch": 0.13537572888295832, "grad_norm": 7.116408758357093, "learning_rate": 9.711723333812422e-06, "loss": 17.8816, "step": 7406 }, { "epoch": 0.13539400807940483, "grad_norm": 5.932070533501284, "learning_rate": 9.71162426710464e-06, "loss": 17.273, "step": 7407 }, { "epoch": 0.13541228727585136, "grad_norm": 6.6459074293993, "learning_rate": 9.711525183883021e-06, "loss": 17.7613, "step": 7408 }, { "epoch": 0.13543056647229787, "grad_norm": 6.422762667907978, "learning_rate": 9.711426084147918e-06, "loss": 17.4116, "step": 7409 }, { "epoch": 0.1354488456687444, "grad_norm": 6.265882057739489, "learning_rate": 9.711326967899674e-06, "loss": 17.65, "step": 7410 }, { "epoch": 0.1354671248651909, "grad_norm": 8.118062402360124, "learning_rate": 9.71122783513864e-06, "loss": 18.3274, "step": 7411 }, { "epoch": 0.13548540406163745, "grad_norm": 6.59997902432031, "learning_rate": 9.711128685865158e-06, "loss": 17.4206, "step": 7412 }, { "epoch": 0.13550368325808398, "grad_norm": 6.698676748752259, "learning_rate": 9.71102952007958e-06, "loss": 17.6831, "step": 7413 }, { "epoch": 0.1355219624545305, "grad_norm": 7.462208115105021, "learning_rate": 9.710930337782254e-06, "loss": 17.9643, "step": 7414 }, { "epoch": 0.13554024165097703, "grad_norm": 6.881641144148816, "learning_rate": 9.710831138973524e-06, "loss": 17.6377, "step": 7415 }, { "epoch": 0.13555852084742354, "grad_norm": 6.74850301229193, "learning_rate": 9.71073192365374e-06, "loss": 17.9104, "step": 7416 }, { "epoch": 0.13557680004387007, "grad_norm": 7.372550253170825, "learning_rate": 9.710632691823249e-06, "loss": 17.9916, "step": 7417 }, { "epoch": 0.1355950792403166, "grad_norm": 8.937901853678206, "learning_rate": 9.710533443482399e-06, "loss": 18.3555, "step": 7418 }, { "epoch": 0.13561335843676312, "grad_norm": 6.817766288220093, "learning_rate": 9.71043417863154e-06, "loss": 17.8772, "step": 7419 }, { "epoch": 0.13563163763320965, "grad_norm": 7.757249117581919, "learning_rate": 9.710334897271016e-06, "loss": 18.1575, "step": 7420 }, { "epoch": 0.13564991682965616, "grad_norm": 6.063796383370273, "learning_rate": 9.71023559940118e-06, "loss": 17.3542, "step": 7421 }, { "epoch": 0.1356681960261027, "grad_norm": 7.686602160021942, "learning_rate": 9.710136285022374e-06, "loss": 18.175, "step": 7422 }, { "epoch": 0.13568647522254923, "grad_norm": 8.243732315581704, "learning_rate": 9.710036954134948e-06, "loss": 18.5862, "step": 7423 }, { "epoch": 0.13570475441899574, "grad_norm": 7.6194402466320055, "learning_rate": 9.709937606739252e-06, "loss": 18.1703, "step": 7424 }, { "epoch": 0.13572303361544227, "grad_norm": 6.469208520653512, "learning_rate": 9.709838242835635e-06, "loss": 17.5793, "step": 7425 }, { "epoch": 0.13574131281188878, "grad_norm": 6.219589586439911, "learning_rate": 9.709738862424442e-06, "loss": 17.587, "step": 7426 }, { "epoch": 0.13575959200833532, "grad_norm": 8.22034380512448, "learning_rate": 9.709639465506026e-06, "loss": 17.7433, "step": 7427 }, { "epoch": 0.13577787120478182, "grad_norm": 6.614378456818435, "learning_rate": 9.70954005208073e-06, "loss": 17.4989, "step": 7428 }, { "epoch": 0.13579615040122836, "grad_norm": 6.906101381445755, "learning_rate": 9.709440622148905e-06, "loss": 17.7696, "step": 7429 }, { "epoch": 0.1358144295976749, "grad_norm": 6.914231304372284, "learning_rate": 9.709341175710899e-06, "loss": 17.7633, "step": 7430 }, { "epoch": 0.1358327087941214, "grad_norm": 7.924595493657493, "learning_rate": 9.709241712767062e-06, "loss": 18.3542, "step": 7431 }, { "epoch": 0.13585098799056794, "grad_norm": 7.733756354225041, "learning_rate": 9.709142233317739e-06, "loss": 17.9405, "step": 7432 }, { "epoch": 0.13586926718701445, "grad_norm": 8.678824703147397, "learning_rate": 9.709042737363283e-06, "loss": 18.6903, "step": 7433 }, { "epoch": 0.13588754638346098, "grad_norm": 8.161165981960721, "learning_rate": 9.708943224904041e-06, "loss": 18.1929, "step": 7434 }, { "epoch": 0.13590582557990752, "grad_norm": 5.921200411436585, "learning_rate": 9.70884369594036e-06, "loss": 17.2303, "step": 7435 }, { "epoch": 0.13592410477635403, "grad_norm": 7.3493418588230375, "learning_rate": 9.708744150472594e-06, "loss": 17.6725, "step": 7436 }, { "epoch": 0.13594238397280056, "grad_norm": 8.713112108397668, "learning_rate": 9.708644588501084e-06, "loss": 18.2403, "step": 7437 }, { "epoch": 0.13596066316924707, "grad_norm": 8.260071257944007, "learning_rate": 9.708545010026187e-06, "loss": 18.6511, "step": 7438 }, { "epoch": 0.1359789423656936, "grad_norm": 7.311836953155976, "learning_rate": 9.708445415048245e-06, "loss": 17.7767, "step": 7439 }, { "epoch": 0.13599722156214014, "grad_norm": 8.075841208882839, "learning_rate": 9.708345803567612e-06, "loss": 18.1745, "step": 7440 }, { "epoch": 0.13601550075858665, "grad_norm": 6.461272470575603, "learning_rate": 9.708246175584637e-06, "loss": 17.6392, "step": 7441 }, { "epoch": 0.13603377995503318, "grad_norm": 6.3777355538496545, "learning_rate": 9.708146531099665e-06, "loss": 17.4888, "step": 7442 }, { "epoch": 0.1360520591514797, "grad_norm": 6.305521191061348, "learning_rate": 9.70804687011305e-06, "loss": 17.4775, "step": 7443 }, { "epoch": 0.13607033834792623, "grad_norm": 9.468671047870306, "learning_rate": 9.707947192625137e-06, "loss": 18.7227, "step": 7444 }, { "epoch": 0.13608861754437274, "grad_norm": 7.909827586776542, "learning_rate": 9.70784749863628e-06, "loss": 18.0857, "step": 7445 }, { "epoch": 0.13610689674081927, "grad_norm": 6.3380149814229085, "learning_rate": 9.707747788146826e-06, "loss": 17.3367, "step": 7446 }, { "epoch": 0.1361251759372658, "grad_norm": 7.207068106258375, "learning_rate": 9.707648061157124e-06, "loss": 18.0903, "step": 7447 }, { "epoch": 0.13614345513371232, "grad_norm": 9.115628758208079, "learning_rate": 9.707548317667523e-06, "loss": 18.3233, "step": 7448 }, { "epoch": 0.13616173433015885, "grad_norm": 7.762250566589427, "learning_rate": 9.707448557678374e-06, "loss": 17.9831, "step": 7449 }, { "epoch": 0.13618001352660536, "grad_norm": 7.74005275425354, "learning_rate": 9.707348781190028e-06, "loss": 18.019, "step": 7450 }, { "epoch": 0.1361982927230519, "grad_norm": 6.637406948041515, "learning_rate": 9.707248988202832e-06, "loss": 17.4043, "step": 7451 }, { "epoch": 0.13621657191949843, "grad_norm": 6.835654410013408, "learning_rate": 9.707149178717136e-06, "loss": 17.9024, "step": 7452 }, { "epoch": 0.13623485111594494, "grad_norm": 6.473983229279323, "learning_rate": 9.70704935273329e-06, "loss": 17.3907, "step": 7453 }, { "epoch": 0.13625313031239147, "grad_norm": 7.516646582083344, "learning_rate": 9.706949510251647e-06, "loss": 18.1707, "step": 7454 }, { "epoch": 0.13627140950883798, "grad_norm": 9.462754966995899, "learning_rate": 9.706849651272551e-06, "loss": 18.5969, "step": 7455 }, { "epoch": 0.13628968870528452, "grad_norm": 7.452679335354314, "learning_rate": 9.706749775796359e-06, "loss": 18.1997, "step": 7456 }, { "epoch": 0.13630796790173105, "grad_norm": 8.791480284434773, "learning_rate": 9.706649883823415e-06, "loss": 18.5841, "step": 7457 }, { "epoch": 0.13632624709817756, "grad_norm": 7.897907052793343, "learning_rate": 9.706549975354073e-06, "loss": 18.1403, "step": 7458 }, { "epoch": 0.1363445262946241, "grad_norm": 8.894721361673117, "learning_rate": 9.70645005038868e-06, "loss": 18.3676, "step": 7459 }, { "epoch": 0.1363628054910706, "grad_norm": 6.520883671534357, "learning_rate": 9.70635010892759e-06, "loss": 17.631, "step": 7460 }, { "epoch": 0.13638108468751714, "grad_norm": 7.071421314080508, "learning_rate": 9.70625015097115e-06, "loss": 17.8233, "step": 7461 }, { "epoch": 0.13639936388396365, "grad_norm": 5.6300301550366925, "learning_rate": 9.706150176519713e-06, "loss": 17.1715, "step": 7462 }, { "epoch": 0.13641764308041018, "grad_norm": 7.852800304353618, "learning_rate": 9.706050185573626e-06, "loss": 17.9435, "step": 7463 }, { "epoch": 0.13643592227685672, "grad_norm": 6.699257770943432, "learning_rate": 9.705950178133243e-06, "loss": 17.821, "step": 7464 }, { "epoch": 0.13645420147330323, "grad_norm": 6.588299143131586, "learning_rate": 9.705850154198912e-06, "loss": 17.5653, "step": 7465 }, { "epoch": 0.13647248066974976, "grad_norm": 6.66726651315699, "learning_rate": 9.705750113770986e-06, "loss": 17.6059, "step": 7466 }, { "epoch": 0.13649075986619627, "grad_norm": 6.233600401310327, "learning_rate": 9.705650056849813e-06, "loss": 17.589, "step": 7467 }, { "epoch": 0.1365090390626428, "grad_norm": 6.968923947025399, "learning_rate": 9.705549983435744e-06, "loss": 17.9795, "step": 7468 }, { "epoch": 0.13652731825908934, "grad_norm": 7.938093167827697, "learning_rate": 9.705449893529133e-06, "loss": 17.9217, "step": 7469 }, { "epoch": 0.13654559745553585, "grad_norm": 6.998871223151411, "learning_rate": 9.705349787130327e-06, "loss": 17.6893, "step": 7470 }, { "epoch": 0.13656387665198239, "grad_norm": 7.820470612951028, "learning_rate": 9.70524966423968e-06, "loss": 18.362, "step": 7471 }, { "epoch": 0.1365821558484289, "grad_norm": 7.775147417972436, "learning_rate": 9.705149524857539e-06, "loss": 18.1892, "step": 7472 }, { "epoch": 0.13660043504487543, "grad_norm": 6.673182990397212, "learning_rate": 9.705049368984259e-06, "loss": 17.817, "step": 7473 }, { "epoch": 0.13661871424132196, "grad_norm": 5.406971223600896, "learning_rate": 9.704949196620188e-06, "loss": 17.0513, "step": 7474 }, { "epoch": 0.13663699343776847, "grad_norm": 6.563913203413318, "learning_rate": 9.704849007765677e-06, "loss": 17.3962, "step": 7475 }, { "epoch": 0.136655272634215, "grad_norm": 7.338919945878583, "learning_rate": 9.70474880242108e-06, "loss": 17.6328, "step": 7476 }, { "epoch": 0.13667355183066152, "grad_norm": 7.261563972332712, "learning_rate": 9.704648580586748e-06, "loss": 17.7875, "step": 7477 }, { "epoch": 0.13669183102710805, "grad_norm": 8.396518761867243, "learning_rate": 9.704548342263029e-06, "loss": 18.0135, "step": 7478 }, { "epoch": 0.13671011022355456, "grad_norm": 6.422952971004934, "learning_rate": 9.704448087450278e-06, "loss": 17.7736, "step": 7479 }, { "epoch": 0.1367283894200011, "grad_norm": 6.094479934565342, "learning_rate": 9.704347816148842e-06, "loss": 17.4413, "step": 7480 }, { "epoch": 0.13674666861644763, "grad_norm": 8.832583222945484, "learning_rate": 9.704247528359079e-06, "loss": 18.0457, "step": 7481 }, { "epoch": 0.13676494781289414, "grad_norm": 6.953333687060656, "learning_rate": 9.704147224081332e-06, "loss": 17.827, "step": 7482 }, { "epoch": 0.13678322700934067, "grad_norm": 6.900389147758031, "learning_rate": 9.70404690331596e-06, "loss": 17.7573, "step": 7483 }, { "epoch": 0.13680150620578718, "grad_norm": 6.581673943697085, "learning_rate": 9.703946566063314e-06, "loss": 17.6126, "step": 7484 }, { "epoch": 0.13681978540223372, "grad_norm": 6.340699010437135, "learning_rate": 9.703846212323739e-06, "loss": 17.4173, "step": 7485 }, { "epoch": 0.13683806459868025, "grad_norm": 6.756441239325302, "learning_rate": 9.703745842097594e-06, "loss": 17.5447, "step": 7486 }, { "epoch": 0.13685634379512676, "grad_norm": 7.340464170563029, "learning_rate": 9.703645455385227e-06, "loss": 18.3228, "step": 7487 }, { "epoch": 0.1368746229915733, "grad_norm": 6.415627907213983, "learning_rate": 9.703545052186992e-06, "loss": 17.6938, "step": 7488 }, { "epoch": 0.1368929021880198, "grad_norm": 7.493575468964602, "learning_rate": 9.703444632503239e-06, "loss": 17.7694, "step": 7489 }, { "epoch": 0.13691118138446634, "grad_norm": 5.726018751368653, "learning_rate": 9.703344196334319e-06, "loss": 17.1624, "step": 7490 }, { "epoch": 0.13692946058091288, "grad_norm": 6.382580616173418, "learning_rate": 9.703243743680589e-06, "loss": 17.5575, "step": 7491 }, { "epoch": 0.13694773977735938, "grad_norm": 8.02022727836731, "learning_rate": 9.703143274542395e-06, "loss": 18.0148, "step": 7492 }, { "epoch": 0.13696601897380592, "grad_norm": 7.796984691625947, "learning_rate": 9.703042788920094e-06, "loss": 18.2498, "step": 7493 }, { "epoch": 0.13698429817025243, "grad_norm": 6.847058154079785, "learning_rate": 9.702942286814034e-06, "loss": 17.5306, "step": 7494 }, { "epoch": 0.13700257736669896, "grad_norm": 6.693193351252166, "learning_rate": 9.70284176822457e-06, "loss": 17.6616, "step": 7495 }, { "epoch": 0.13702085656314547, "grad_norm": 6.468462283579735, "learning_rate": 9.702741233152055e-06, "loss": 17.6046, "step": 7496 }, { "epoch": 0.137039135759592, "grad_norm": 8.447953586046685, "learning_rate": 9.702640681596839e-06, "loss": 18.14, "step": 7497 }, { "epoch": 0.13705741495603854, "grad_norm": 6.7410512609657784, "learning_rate": 9.702540113559276e-06, "loss": 17.4294, "step": 7498 }, { "epoch": 0.13707569415248505, "grad_norm": 7.260428605576361, "learning_rate": 9.702439529039718e-06, "loss": 18.0614, "step": 7499 }, { "epoch": 0.13709397334893159, "grad_norm": 7.358399236749688, "learning_rate": 9.702338928038517e-06, "loss": 18.1374, "step": 7500 }, { "epoch": 0.1371122525453781, "grad_norm": 6.405876792649341, "learning_rate": 9.702238310556027e-06, "loss": 17.6907, "step": 7501 }, { "epoch": 0.13713053174182463, "grad_norm": 7.4062837846026985, "learning_rate": 9.702137676592598e-06, "loss": 17.8288, "step": 7502 }, { "epoch": 0.13714881093827117, "grad_norm": 6.830382935003411, "learning_rate": 9.702037026148586e-06, "loss": 17.947, "step": 7503 }, { "epoch": 0.13716709013471767, "grad_norm": 7.770829752486227, "learning_rate": 9.701936359224341e-06, "loss": 18.0785, "step": 7504 }, { "epoch": 0.1371853693311642, "grad_norm": 7.660208673592639, "learning_rate": 9.701835675820218e-06, "loss": 18.3223, "step": 7505 }, { "epoch": 0.13720364852761072, "grad_norm": 7.583835662282823, "learning_rate": 9.701734975936568e-06, "loss": 18.0849, "step": 7506 }, { "epoch": 0.13722192772405725, "grad_norm": 6.956978730707371, "learning_rate": 9.701634259573747e-06, "loss": 17.9131, "step": 7507 }, { "epoch": 0.1372402069205038, "grad_norm": 7.649369274987982, "learning_rate": 9.701533526732104e-06, "loss": 17.9172, "step": 7508 }, { "epoch": 0.1372584861169503, "grad_norm": 6.578214212991516, "learning_rate": 9.701432777411995e-06, "loss": 17.5643, "step": 7509 }, { "epoch": 0.13727676531339683, "grad_norm": 5.534817711350081, "learning_rate": 9.701332011613771e-06, "loss": 17.0359, "step": 7510 }, { "epoch": 0.13729504450984334, "grad_norm": 6.667518505261539, "learning_rate": 9.701231229337788e-06, "loss": 17.6601, "step": 7511 }, { "epoch": 0.13731332370628987, "grad_norm": 8.121299665199137, "learning_rate": 9.701130430584396e-06, "loss": 17.7347, "step": 7512 }, { "epoch": 0.13733160290273638, "grad_norm": 7.05480122411721, "learning_rate": 9.701029615353949e-06, "loss": 17.9691, "step": 7513 }, { "epoch": 0.13734988209918292, "grad_norm": 6.894109265636699, "learning_rate": 9.700928783646804e-06, "loss": 17.7868, "step": 7514 }, { "epoch": 0.13736816129562945, "grad_norm": 5.633288824085069, "learning_rate": 9.70082793546331e-06, "loss": 17.3854, "step": 7515 }, { "epoch": 0.13738644049207596, "grad_norm": 6.582019190776378, "learning_rate": 9.700727070803822e-06, "loss": 17.5675, "step": 7516 }, { "epoch": 0.1374047196885225, "grad_norm": 7.911740293895738, "learning_rate": 9.700626189668694e-06, "loss": 17.9536, "step": 7517 }, { "epoch": 0.137422998884969, "grad_norm": 7.304427641569884, "learning_rate": 9.700525292058278e-06, "loss": 18.1302, "step": 7518 }, { "epoch": 0.13744127808141554, "grad_norm": 6.930641441163955, "learning_rate": 9.700424377972928e-06, "loss": 17.8615, "step": 7519 }, { "epoch": 0.13745955727786208, "grad_norm": 7.1536806801317825, "learning_rate": 9.700323447413e-06, "loss": 18.0929, "step": 7520 }, { "epoch": 0.13747783647430858, "grad_norm": 6.965101634971381, "learning_rate": 9.700222500378846e-06, "loss": 17.8401, "step": 7521 }, { "epoch": 0.13749611567075512, "grad_norm": 8.294417327476186, "learning_rate": 9.700121536870822e-06, "loss": 18.6985, "step": 7522 }, { "epoch": 0.13751439486720163, "grad_norm": 6.73186924609752, "learning_rate": 9.700020556889275e-06, "loss": 17.4993, "step": 7523 }, { "epoch": 0.13753267406364816, "grad_norm": 6.773047454341339, "learning_rate": 9.699919560434568e-06, "loss": 17.4125, "step": 7524 }, { "epoch": 0.1375509532600947, "grad_norm": 7.670128620195902, "learning_rate": 9.69981854750705e-06, "loss": 18.2021, "step": 7525 }, { "epoch": 0.1375692324565412, "grad_norm": 6.6562117075155856, "learning_rate": 9.699717518107075e-06, "loss": 17.542, "step": 7526 }, { "epoch": 0.13758751165298774, "grad_norm": 7.134487262285221, "learning_rate": 9.699616472234998e-06, "loss": 17.796, "step": 7527 }, { "epoch": 0.13760579084943425, "grad_norm": 8.67954375693659, "learning_rate": 9.699515409891173e-06, "loss": 19.0484, "step": 7528 }, { "epoch": 0.1376240700458808, "grad_norm": 6.161607217740553, "learning_rate": 9.699414331075955e-06, "loss": 17.453, "step": 7529 }, { "epoch": 0.1376423492423273, "grad_norm": 6.842734209054109, "learning_rate": 9.699313235789698e-06, "loss": 17.7173, "step": 7530 }, { "epoch": 0.13766062843877383, "grad_norm": 6.833805176580314, "learning_rate": 9.699212124032754e-06, "loss": 17.4478, "step": 7531 }, { "epoch": 0.13767890763522037, "grad_norm": 7.496615619801857, "learning_rate": 9.699110995805481e-06, "loss": 18.0135, "step": 7532 }, { "epoch": 0.13769718683166687, "grad_norm": 7.622864562921994, "learning_rate": 9.69900985110823e-06, "loss": 18.4149, "step": 7533 }, { "epoch": 0.1377154660281134, "grad_norm": 7.754625756622168, "learning_rate": 9.698908689941358e-06, "loss": 18.1251, "step": 7534 }, { "epoch": 0.13773374522455992, "grad_norm": 6.3318055787919, "learning_rate": 9.69880751230522e-06, "loss": 17.5096, "step": 7535 }, { "epoch": 0.13775202442100645, "grad_norm": 6.6294924082068, "learning_rate": 9.698706318200169e-06, "loss": 17.6423, "step": 7536 }, { "epoch": 0.137770303617453, "grad_norm": 6.916513568451425, "learning_rate": 9.698605107626559e-06, "loss": 17.878, "step": 7537 }, { "epoch": 0.1377885828138995, "grad_norm": 6.199471801380165, "learning_rate": 9.698503880584746e-06, "loss": 17.4741, "step": 7538 }, { "epoch": 0.13780686201034603, "grad_norm": 7.6927948140504325, "learning_rate": 9.698402637075085e-06, "loss": 18.1918, "step": 7539 }, { "epoch": 0.13782514120679254, "grad_norm": 7.90499299859101, "learning_rate": 9.698301377097929e-06, "loss": 17.8412, "step": 7540 }, { "epoch": 0.13784342040323908, "grad_norm": 6.846420909602271, "learning_rate": 9.698200100653636e-06, "loss": 17.5772, "step": 7541 }, { "epoch": 0.1378616995996856, "grad_norm": 6.531231633745436, "learning_rate": 9.698098807742559e-06, "loss": 17.613, "step": 7542 }, { "epoch": 0.13787997879613212, "grad_norm": 6.329550184621328, "learning_rate": 9.697997498365054e-06, "loss": 17.4516, "step": 7543 }, { "epoch": 0.13789825799257865, "grad_norm": 6.818289281580703, "learning_rate": 9.697896172521475e-06, "loss": 17.9329, "step": 7544 }, { "epoch": 0.13791653718902516, "grad_norm": 6.314645548487325, "learning_rate": 9.697794830212178e-06, "loss": 17.4605, "step": 7545 }, { "epoch": 0.1379348163854717, "grad_norm": 5.9485092370941395, "learning_rate": 9.697693471437516e-06, "loss": 17.2601, "step": 7546 }, { "epoch": 0.1379530955819182, "grad_norm": 6.979378822346856, "learning_rate": 9.697592096197849e-06, "loss": 17.9736, "step": 7547 }, { "epoch": 0.13797137477836474, "grad_norm": 6.219796983006217, "learning_rate": 9.697490704493527e-06, "loss": 17.4588, "step": 7548 }, { "epoch": 0.13798965397481128, "grad_norm": 7.0799748577732355, "learning_rate": 9.697389296324908e-06, "loss": 18.1643, "step": 7549 }, { "epoch": 0.13800793317125779, "grad_norm": 7.552326328445858, "learning_rate": 9.697287871692349e-06, "loss": 17.6755, "step": 7550 }, { "epoch": 0.13802621236770432, "grad_norm": 6.084133107890047, "learning_rate": 9.697186430596201e-06, "loss": 17.4025, "step": 7551 }, { "epoch": 0.13804449156415083, "grad_norm": 7.4388834971540225, "learning_rate": 9.697084973036823e-06, "loss": 17.9366, "step": 7552 }, { "epoch": 0.13806277076059736, "grad_norm": 5.488343762675065, "learning_rate": 9.696983499014572e-06, "loss": 17.1962, "step": 7553 }, { "epoch": 0.1380810499570439, "grad_norm": 6.007124088766023, "learning_rate": 9.696882008529797e-06, "loss": 17.4313, "step": 7554 }, { "epoch": 0.1380993291534904, "grad_norm": 8.488478544872322, "learning_rate": 9.696780501582862e-06, "loss": 18.1383, "step": 7555 }, { "epoch": 0.13811760834993694, "grad_norm": 5.140191295738998, "learning_rate": 9.696678978174118e-06, "loss": 17.0095, "step": 7556 }, { "epoch": 0.13813588754638345, "grad_norm": 7.613492131171581, "learning_rate": 9.69657743830392e-06, "loss": 18.0298, "step": 7557 }, { "epoch": 0.13815416674283, "grad_norm": 9.479618418224536, "learning_rate": 9.696475881972627e-06, "loss": 18.2041, "step": 7558 }, { "epoch": 0.13817244593927652, "grad_norm": 7.058720172453753, "learning_rate": 9.696374309180593e-06, "loss": 17.9852, "step": 7559 }, { "epoch": 0.13819072513572303, "grad_norm": 6.269282928623162, "learning_rate": 9.696272719928177e-06, "loss": 17.4109, "step": 7560 }, { "epoch": 0.13820900433216957, "grad_norm": 7.357859970941546, "learning_rate": 9.69617111421573e-06, "loss": 18.0383, "step": 7561 }, { "epoch": 0.13822728352861607, "grad_norm": 7.68302082306943, "learning_rate": 9.696069492043611e-06, "loss": 18.0667, "step": 7562 }, { "epoch": 0.1382455627250626, "grad_norm": 6.386505217125653, "learning_rate": 9.695967853412177e-06, "loss": 17.5746, "step": 7563 }, { "epoch": 0.13826384192150912, "grad_norm": 6.0812781948558365, "learning_rate": 9.695866198321782e-06, "loss": 17.4049, "step": 7564 }, { "epoch": 0.13828212111795565, "grad_norm": 7.885285814407381, "learning_rate": 9.695764526772784e-06, "loss": 18.2242, "step": 7565 }, { "epoch": 0.1383004003144022, "grad_norm": 8.503405783523597, "learning_rate": 9.69566283876554e-06, "loss": 18.4756, "step": 7566 }, { "epoch": 0.1383186795108487, "grad_norm": 7.325141136986131, "learning_rate": 9.695561134300403e-06, "loss": 17.6815, "step": 7567 }, { "epoch": 0.13833695870729523, "grad_norm": 6.819358139332634, "learning_rate": 9.695459413377732e-06, "loss": 17.6192, "step": 7568 }, { "epoch": 0.13835523790374174, "grad_norm": 7.196714241431053, "learning_rate": 9.695357675997886e-06, "loss": 17.8749, "step": 7569 }, { "epoch": 0.13837351710018828, "grad_norm": 7.124930042216272, "learning_rate": 9.695255922161216e-06, "loss": 17.8248, "step": 7570 }, { "epoch": 0.1383917962966348, "grad_norm": 7.577439664001599, "learning_rate": 9.695154151868082e-06, "loss": 18.048, "step": 7571 }, { "epoch": 0.13841007549308132, "grad_norm": 5.946709674463609, "learning_rate": 9.69505236511884e-06, "loss": 17.3948, "step": 7572 }, { "epoch": 0.13842835468952785, "grad_norm": 6.849109850690806, "learning_rate": 9.694950561913847e-06, "loss": 17.8114, "step": 7573 }, { "epoch": 0.13844663388597436, "grad_norm": 6.254112894892067, "learning_rate": 9.69484874225346e-06, "loss": 17.3582, "step": 7574 }, { "epoch": 0.1384649130824209, "grad_norm": 6.83043995617411, "learning_rate": 9.694746906138037e-06, "loss": 17.7437, "step": 7575 }, { "epoch": 0.13848319227886743, "grad_norm": 6.8425114925436485, "learning_rate": 9.69464505356793e-06, "loss": 17.5462, "step": 7576 }, { "epoch": 0.13850147147531394, "grad_norm": 8.338983089259113, "learning_rate": 9.694543184543503e-06, "loss": 17.8099, "step": 7577 }, { "epoch": 0.13851975067176048, "grad_norm": 8.441731655404608, "learning_rate": 9.694441299065108e-06, "loss": 18.0928, "step": 7578 }, { "epoch": 0.13853802986820699, "grad_norm": 7.39996527709027, "learning_rate": 9.694339397133103e-06, "loss": 18.0476, "step": 7579 }, { "epoch": 0.13855630906465352, "grad_norm": 6.481565025506474, "learning_rate": 9.694237478747845e-06, "loss": 17.4842, "step": 7580 }, { "epoch": 0.13857458826110003, "grad_norm": 7.5816203621605744, "learning_rate": 9.694135543909695e-06, "loss": 18.1815, "step": 7581 }, { "epoch": 0.13859286745754656, "grad_norm": 6.556496899939846, "learning_rate": 9.694033592619005e-06, "loss": 17.7246, "step": 7582 }, { "epoch": 0.1386111466539931, "grad_norm": 9.199344677937912, "learning_rate": 9.693931624876134e-06, "loss": 19.1485, "step": 7583 }, { "epoch": 0.1386294258504396, "grad_norm": 8.306551459532058, "learning_rate": 9.693829640681443e-06, "loss": 17.9562, "step": 7584 }, { "epoch": 0.13864770504688614, "grad_norm": 7.317573061319512, "learning_rate": 9.693727640035284e-06, "loss": 18.0196, "step": 7585 }, { "epoch": 0.13866598424333265, "grad_norm": 6.088249381911165, "learning_rate": 9.693625622938016e-06, "loss": 17.2675, "step": 7586 }, { "epoch": 0.1386842634397792, "grad_norm": 8.32568238400912, "learning_rate": 9.69352358939e-06, "loss": 18.5118, "step": 7587 }, { "epoch": 0.13870254263622572, "grad_norm": 6.881622182942428, "learning_rate": 9.69342153939159e-06, "loss": 17.7963, "step": 7588 }, { "epoch": 0.13872082183267223, "grad_norm": 7.233722115198136, "learning_rate": 9.693319472943144e-06, "loss": 18.0334, "step": 7589 }, { "epoch": 0.13873910102911877, "grad_norm": 6.933665393243024, "learning_rate": 9.693217390045022e-06, "loss": 17.3835, "step": 7590 }, { "epoch": 0.13875738022556527, "grad_norm": 6.877804478341359, "learning_rate": 9.693115290697579e-06, "loss": 17.7326, "step": 7591 }, { "epoch": 0.1387756594220118, "grad_norm": 6.088325576611347, "learning_rate": 9.693013174901176e-06, "loss": 17.442, "step": 7592 }, { "epoch": 0.13879393861845835, "grad_norm": 5.491087823667131, "learning_rate": 9.692911042656168e-06, "loss": 17.1538, "step": 7593 }, { "epoch": 0.13881221781490485, "grad_norm": 6.739863673958903, "learning_rate": 9.692808893962913e-06, "loss": 17.9538, "step": 7594 }, { "epoch": 0.1388304970113514, "grad_norm": 7.724576329318004, "learning_rate": 9.69270672882177e-06, "loss": 18.2345, "step": 7595 }, { "epoch": 0.1388487762077979, "grad_norm": 8.162198807594477, "learning_rate": 9.6926045472331e-06, "loss": 18.0554, "step": 7596 }, { "epoch": 0.13886705540424443, "grad_norm": 6.825174864807807, "learning_rate": 9.692502349197255e-06, "loss": 17.8946, "step": 7597 }, { "epoch": 0.13888533460069094, "grad_norm": 6.685053782309902, "learning_rate": 9.692400134714597e-06, "loss": 17.8092, "step": 7598 }, { "epoch": 0.13890361379713748, "grad_norm": 7.50105563200056, "learning_rate": 9.692297903785485e-06, "loss": 17.4689, "step": 7599 }, { "epoch": 0.138921892993584, "grad_norm": 8.213849230114867, "learning_rate": 9.692195656410276e-06, "loss": 18.4874, "step": 7600 }, { "epoch": 0.13894017219003052, "grad_norm": 7.210729939117689, "learning_rate": 9.692093392589328e-06, "loss": 17.9628, "step": 7601 }, { "epoch": 0.13895845138647706, "grad_norm": 5.780872468116353, "learning_rate": 9.691991112323e-06, "loss": 17.5676, "step": 7602 }, { "epoch": 0.13897673058292356, "grad_norm": 7.7448713027600355, "learning_rate": 9.69188881561165e-06, "loss": 18.3937, "step": 7603 }, { "epoch": 0.1389950097793701, "grad_norm": 6.451954438687236, "learning_rate": 9.691786502455637e-06, "loss": 17.5783, "step": 7604 }, { "epoch": 0.13901328897581663, "grad_norm": 7.375328926641005, "learning_rate": 9.691684172855318e-06, "loss": 18.0649, "step": 7605 }, { "epoch": 0.13903156817226314, "grad_norm": 7.42328733850513, "learning_rate": 9.691581826811056e-06, "loss": 18.0613, "step": 7606 }, { "epoch": 0.13904984736870968, "grad_norm": 7.27599208052216, "learning_rate": 9.691479464323205e-06, "loss": 17.9711, "step": 7607 }, { "epoch": 0.13906812656515619, "grad_norm": 6.195330420273026, "learning_rate": 9.691377085392126e-06, "loss": 17.3167, "step": 7608 }, { "epoch": 0.13908640576160272, "grad_norm": 7.422560638770763, "learning_rate": 9.691274690018177e-06, "loss": 17.9469, "step": 7609 }, { "epoch": 0.13910468495804926, "grad_norm": 7.982168416130319, "learning_rate": 9.691172278201717e-06, "loss": 18.3047, "step": 7610 }, { "epoch": 0.13912296415449577, "grad_norm": 6.823450716965158, "learning_rate": 9.691069849943106e-06, "loss": 17.7405, "step": 7611 }, { "epoch": 0.1391412433509423, "grad_norm": 6.8991255450433995, "learning_rate": 9.690967405242702e-06, "loss": 17.9281, "step": 7612 }, { "epoch": 0.1391595225473888, "grad_norm": 7.549502658587642, "learning_rate": 9.690864944100864e-06, "loss": 18.1085, "step": 7613 }, { "epoch": 0.13917780174383534, "grad_norm": 7.010243806112929, "learning_rate": 9.690762466517953e-06, "loss": 17.9053, "step": 7614 }, { "epoch": 0.13919608094028185, "grad_norm": 6.2672935541821335, "learning_rate": 9.690659972494325e-06, "loss": 17.5679, "step": 7615 }, { "epoch": 0.1392143601367284, "grad_norm": 6.51920355904111, "learning_rate": 9.69055746203034e-06, "loss": 17.7973, "step": 7616 }, { "epoch": 0.13923263933317492, "grad_norm": 4.972536691426739, "learning_rate": 9.690454935126362e-06, "loss": 17.0055, "step": 7617 }, { "epoch": 0.13925091852962143, "grad_norm": 9.16414804210936, "learning_rate": 9.690352391782742e-06, "loss": 18.683, "step": 7618 }, { "epoch": 0.13926919772606797, "grad_norm": 6.353458986930466, "learning_rate": 9.690249831999845e-06, "loss": 17.6393, "step": 7619 }, { "epoch": 0.13928747692251447, "grad_norm": 7.549727257799057, "learning_rate": 9.69014725577803e-06, "loss": 17.8279, "step": 7620 }, { "epoch": 0.139305756118961, "grad_norm": 6.459371878675763, "learning_rate": 9.690044663117657e-06, "loss": 17.7354, "step": 7621 }, { "epoch": 0.13932403531540755, "grad_norm": 7.096093754094462, "learning_rate": 9.689942054019084e-06, "loss": 17.8056, "step": 7622 }, { "epoch": 0.13934231451185405, "grad_norm": 5.646822138702501, "learning_rate": 9.689839428482668e-06, "loss": 17.2833, "step": 7623 }, { "epoch": 0.1393605937083006, "grad_norm": 6.392818847550784, "learning_rate": 9.689736786508775e-06, "loss": 17.5295, "step": 7624 }, { "epoch": 0.1393788729047471, "grad_norm": 6.543122674993934, "learning_rate": 9.68963412809776e-06, "loss": 17.5072, "step": 7625 }, { "epoch": 0.13939715210119363, "grad_norm": 6.044145106145599, "learning_rate": 9.689531453249985e-06, "loss": 17.4496, "step": 7626 }, { "epoch": 0.13941543129764017, "grad_norm": 6.320195166276094, "learning_rate": 9.689428761965812e-06, "loss": 17.6063, "step": 7627 }, { "epoch": 0.13943371049408668, "grad_norm": 5.93275108209908, "learning_rate": 9.689326054245594e-06, "loss": 17.4617, "step": 7628 }, { "epoch": 0.1394519896905332, "grad_norm": 7.48701383614776, "learning_rate": 9.689223330089697e-06, "loss": 18.0072, "step": 7629 }, { "epoch": 0.13947026888697972, "grad_norm": 7.892054430291195, "learning_rate": 9.689120589498478e-06, "loss": 18.23, "step": 7630 }, { "epoch": 0.13948854808342626, "grad_norm": 8.388621093175491, "learning_rate": 9.689017832472298e-06, "loss": 18.298, "step": 7631 }, { "epoch": 0.13950682727987276, "grad_norm": 7.940115082999377, "learning_rate": 9.688915059011519e-06, "loss": 17.8922, "step": 7632 }, { "epoch": 0.1395251064763193, "grad_norm": 6.6214486318031405, "learning_rate": 9.688812269116498e-06, "loss": 17.3944, "step": 7633 }, { "epoch": 0.13954338567276584, "grad_norm": 8.291700876812332, "learning_rate": 9.688709462787598e-06, "loss": 18.3481, "step": 7634 }, { "epoch": 0.13956166486921234, "grad_norm": 8.239108156558169, "learning_rate": 9.688606640025178e-06, "loss": 18.5325, "step": 7635 }, { "epoch": 0.13957994406565888, "grad_norm": 7.025021516402914, "learning_rate": 9.6885038008296e-06, "loss": 17.9765, "step": 7636 }, { "epoch": 0.1395982232621054, "grad_norm": 6.098258722584013, "learning_rate": 9.68840094520122e-06, "loss": 17.4001, "step": 7637 }, { "epoch": 0.13961650245855192, "grad_norm": 7.6009707832906725, "learning_rate": 9.688298073140403e-06, "loss": 18.0495, "step": 7638 }, { "epoch": 0.13963478165499846, "grad_norm": 7.40103737490222, "learning_rate": 9.688195184647509e-06, "loss": 17.8624, "step": 7639 }, { "epoch": 0.13965306085144497, "grad_norm": 8.4314965735498, "learning_rate": 9.688092279722896e-06, "loss": 18.8786, "step": 7640 }, { "epoch": 0.1396713400478915, "grad_norm": 6.055062072160815, "learning_rate": 9.687989358366927e-06, "loss": 17.6618, "step": 7641 }, { "epoch": 0.139689619244338, "grad_norm": 6.05575625200334, "learning_rate": 9.687886420579962e-06, "loss": 17.3134, "step": 7642 }, { "epoch": 0.13970789844078454, "grad_norm": 8.622498771976538, "learning_rate": 9.687783466362362e-06, "loss": 18.3974, "step": 7643 }, { "epoch": 0.13972617763723108, "grad_norm": 5.865694263033465, "learning_rate": 9.687680495714488e-06, "loss": 17.2247, "step": 7644 }, { "epoch": 0.1397444568336776, "grad_norm": 6.811234057849274, "learning_rate": 9.6875775086367e-06, "loss": 17.7938, "step": 7645 }, { "epoch": 0.13976273603012412, "grad_norm": 7.106931351353856, "learning_rate": 9.687474505129362e-06, "loss": 18.1443, "step": 7646 }, { "epoch": 0.13978101522657063, "grad_norm": 6.482267760104404, "learning_rate": 9.687371485192831e-06, "loss": 17.4304, "step": 7647 }, { "epoch": 0.13979929442301717, "grad_norm": 7.974894210989417, "learning_rate": 9.687268448827468e-06, "loss": 18.3447, "step": 7648 }, { "epoch": 0.13981757361946368, "grad_norm": 7.5935022550416456, "learning_rate": 9.687165396033638e-06, "loss": 18.0296, "step": 7649 }, { "epoch": 0.1398358528159102, "grad_norm": 6.468510798971305, "learning_rate": 9.6870623268117e-06, "loss": 17.866, "step": 7650 }, { "epoch": 0.13985413201235675, "grad_norm": 7.5046531587992735, "learning_rate": 9.686959241162013e-06, "loss": 17.7633, "step": 7651 }, { "epoch": 0.13987241120880325, "grad_norm": 7.670355311867818, "learning_rate": 9.686856139084943e-06, "loss": 18.0413, "step": 7652 }, { "epoch": 0.1398906904052498, "grad_norm": 8.67865132995876, "learning_rate": 9.686753020580847e-06, "loss": 18.3526, "step": 7653 }, { "epoch": 0.1399089696016963, "grad_norm": 9.344161512670489, "learning_rate": 9.68664988565009e-06, "loss": 18.4226, "step": 7654 }, { "epoch": 0.13992724879814283, "grad_norm": 7.732963302029577, "learning_rate": 9.686546734293032e-06, "loss": 17.8059, "step": 7655 }, { "epoch": 0.13994552799458937, "grad_norm": 5.753291438789974, "learning_rate": 9.686443566510033e-06, "loss": 17.315, "step": 7656 }, { "epoch": 0.13996380719103588, "grad_norm": 6.825847797501684, "learning_rate": 9.686340382301457e-06, "loss": 17.8254, "step": 7657 }, { "epoch": 0.1399820863874824, "grad_norm": 9.393549575400462, "learning_rate": 9.686237181667664e-06, "loss": 18.7241, "step": 7658 }, { "epoch": 0.14000036558392892, "grad_norm": 5.97960450460996, "learning_rate": 9.686133964609017e-06, "loss": 17.4859, "step": 7659 }, { "epoch": 0.14001864478037546, "grad_norm": 8.19156414760745, "learning_rate": 9.686030731125877e-06, "loss": 18.161, "step": 7660 }, { "epoch": 0.140036923976822, "grad_norm": 7.13560642700512, "learning_rate": 9.685927481218605e-06, "loss": 17.7538, "step": 7661 }, { "epoch": 0.1400552031732685, "grad_norm": 7.955656266645502, "learning_rate": 9.685824214887565e-06, "loss": 18.4976, "step": 7662 }, { "epoch": 0.14007348236971504, "grad_norm": 10.783803224347071, "learning_rate": 9.685720932133117e-06, "loss": 18.1724, "step": 7663 }, { "epoch": 0.14009176156616154, "grad_norm": 7.623917149763039, "learning_rate": 9.685617632955625e-06, "loss": 18.119, "step": 7664 }, { "epoch": 0.14011004076260808, "grad_norm": 6.780303406814478, "learning_rate": 9.685514317355446e-06, "loss": 17.2607, "step": 7665 }, { "epoch": 0.1401283199590546, "grad_norm": 6.972500743781565, "learning_rate": 9.685410985332951e-06, "loss": 17.7824, "step": 7666 }, { "epoch": 0.14014659915550112, "grad_norm": 6.729420002075346, "learning_rate": 9.685307636888494e-06, "loss": 17.4817, "step": 7667 }, { "epoch": 0.14016487835194766, "grad_norm": 7.774515179824839, "learning_rate": 9.685204272022442e-06, "loss": 18.115, "step": 7668 }, { "epoch": 0.14018315754839417, "grad_norm": 6.472736167199539, "learning_rate": 9.685100890735153e-06, "loss": 17.8969, "step": 7669 }, { "epoch": 0.1402014367448407, "grad_norm": 7.681000135664825, "learning_rate": 9.684997493026994e-06, "loss": 18.0205, "step": 7670 }, { "epoch": 0.1402197159412872, "grad_norm": 8.360965006565488, "learning_rate": 9.684894078898325e-06, "loss": 18.1404, "step": 7671 }, { "epoch": 0.14023799513773375, "grad_norm": 6.861502415358219, "learning_rate": 9.68479064834951e-06, "loss": 17.7546, "step": 7672 }, { "epoch": 0.14025627433418028, "grad_norm": 7.025707723981484, "learning_rate": 9.684687201380908e-06, "loss": 17.9243, "step": 7673 }, { "epoch": 0.1402745535306268, "grad_norm": 6.833530124182017, "learning_rate": 9.684583737992884e-06, "loss": 17.7647, "step": 7674 }, { "epoch": 0.14029283272707332, "grad_norm": 6.082676889038833, "learning_rate": 9.684480258185802e-06, "loss": 17.3952, "step": 7675 }, { "epoch": 0.14031111192351983, "grad_norm": 8.40792409343925, "learning_rate": 9.684376761960022e-06, "loss": 18.2987, "step": 7676 }, { "epoch": 0.14032939111996637, "grad_norm": 7.225649769389998, "learning_rate": 9.684273249315909e-06, "loss": 18.0845, "step": 7677 }, { "epoch": 0.1403476703164129, "grad_norm": 6.613362749393355, "learning_rate": 9.684169720253824e-06, "loss": 17.6433, "step": 7678 }, { "epoch": 0.1403659495128594, "grad_norm": 6.17401684762471, "learning_rate": 9.68406617477413e-06, "loss": 17.6461, "step": 7679 }, { "epoch": 0.14038422870930595, "grad_norm": 5.907300268861695, "learning_rate": 9.683962612877191e-06, "loss": 17.4147, "step": 7680 }, { "epoch": 0.14040250790575245, "grad_norm": 6.438438388115875, "learning_rate": 9.68385903456337e-06, "loss": 17.7628, "step": 7681 }, { "epoch": 0.140420787102199, "grad_norm": 7.15609337283895, "learning_rate": 9.683755439833029e-06, "loss": 17.9813, "step": 7682 }, { "epoch": 0.1404390662986455, "grad_norm": 8.407826266878, "learning_rate": 9.683651828686533e-06, "loss": 18.6392, "step": 7683 }, { "epoch": 0.14045734549509203, "grad_norm": 7.438446230283374, "learning_rate": 9.683548201124242e-06, "loss": 18.1408, "step": 7684 }, { "epoch": 0.14047562469153857, "grad_norm": 5.922113971888481, "learning_rate": 9.683444557146522e-06, "loss": 17.4242, "step": 7685 }, { "epoch": 0.14049390388798508, "grad_norm": 7.767049220233212, "learning_rate": 9.683340896753736e-06, "loss": 18.243, "step": 7686 }, { "epoch": 0.1405121830844316, "grad_norm": 7.7351425289605285, "learning_rate": 9.683237219946244e-06, "loss": 17.8322, "step": 7687 }, { "epoch": 0.14053046228087812, "grad_norm": 5.766262580266691, "learning_rate": 9.683133526724413e-06, "loss": 17.4055, "step": 7688 }, { "epoch": 0.14054874147732466, "grad_norm": 5.425520660615894, "learning_rate": 9.683029817088608e-06, "loss": 17.2274, "step": 7689 }, { "epoch": 0.1405670206737712, "grad_norm": 7.575820684465325, "learning_rate": 9.682926091039187e-06, "loss": 18.0752, "step": 7690 }, { "epoch": 0.1405852998702177, "grad_norm": 8.092227554164923, "learning_rate": 9.682822348576518e-06, "loss": 17.9636, "step": 7691 }, { "epoch": 0.14060357906666424, "grad_norm": 7.253130689899035, "learning_rate": 9.68271858970096e-06, "loss": 17.9492, "step": 7692 }, { "epoch": 0.14062185826311074, "grad_norm": 8.145629293534444, "learning_rate": 9.682614814412883e-06, "loss": 18.4608, "step": 7693 }, { "epoch": 0.14064013745955728, "grad_norm": 7.160254710914674, "learning_rate": 9.682511022712646e-06, "loss": 17.9893, "step": 7694 }, { "epoch": 0.14065841665600382, "grad_norm": 7.357299262847844, "learning_rate": 9.682407214600615e-06, "loss": 18.0813, "step": 7695 }, { "epoch": 0.14067669585245032, "grad_norm": 8.08579147665916, "learning_rate": 9.682303390077153e-06, "loss": 17.9742, "step": 7696 }, { "epoch": 0.14069497504889686, "grad_norm": 6.973170726147441, "learning_rate": 9.682199549142623e-06, "loss": 17.6363, "step": 7697 }, { "epoch": 0.14071325424534337, "grad_norm": 6.969016413291826, "learning_rate": 9.682095691797391e-06, "loss": 18.1067, "step": 7698 }, { "epoch": 0.1407315334417899, "grad_norm": 7.506545156191458, "learning_rate": 9.681991818041818e-06, "loss": 18.1744, "step": 7699 }, { "epoch": 0.1407498126382364, "grad_norm": 7.9121187519485705, "learning_rate": 9.681887927876271e-06, "loss": 17.9692, "step": 7700 }, { "epoch": 0.14076809183468295, "grad_norm": 8.679743774834556, "learning_rate": 9.681784021301112e-06, "loss": 17.9166, "step": 7701 }, { "epoch": 0.14078637103112948, "grad_norm": 7.024434714046227, "learning_rate": 9.68168009831671e-06, "loss": 17.8942, "step": 7702 }, { "epoch": 0.140804650227576, "grad_norm": 7.349989985891914, "learning_rate": 9.681576158923423e-06, "loss": 18.1657, "step": 7703 }, { "epoch": 0.14082292942402252, "grad_norm": 8.459378945845003, "learning_rate": 9.681472203121617e-06, "loss": 18.7275, "step": 7704 }, { "epoch": 0.14084120862046903, "grad_norm": 9.695222479773127, "learning_rate": 9.681368230911659e-06, "loss": 18.4332, "step": 7705 }, { "epoch": 0.14085948781691557, "grad_norm": 7.186765750034922, "learning_rate": 9.68126424229391e-06, "loss": 18.0776, "step": 7706 }, { "epoch": 0.1408777670133621, "grad_norm": 6.854102062564552, "learning_rate": 9.681160237268737e-06, "loss": 17.9172, "step": 7707 }, { "epoch": 0.1408960462098086, "grad_norm": 7.3632229794090005, "learning_rate": 9.681056215836501e-06, "loss": 18.0963, "step": 7708 }, { "epoch": 0.14091432540625515, "grad_norm": 7.7221081410587455, "learning_rate": 9.680952177997572e-06, "loss": 18.1901, "step": 7709 }, { "epoch": 0.14093260460270166, "grad_norm": 5.924403423489709, "learning_rate": 9.680848123752312e-06, "loss": 17.3988, "step": 7710 }, { "epoch": 0.1409508837991482, "grad_norm": 6.993460661737724, "learning_rate": 9.680744053101084e-06, "loss": 17.8039, "step": 7711 }, { "epoch": 0.14096916299559473, "grad_norm": 8.421952771436054, "learning_rate": 9.680639966044256e-06, "loss": 18.4236, "step": 7712 }, { "epoch": 0.14098744219204123, "grad_norm": 6.789647333264264, "learning_rate": 9.68053586258219e-06, "loss": 17.6687, "step": 7713 }, { "epoch": 0.14100572138848777, "grad_norm": 7.722165563852823, "learning_rate": 9.680431742715252e-06, "loss": 17.7669, "step": 7714 }, { "epoch": 0.14102400058493428, "grad_norm": 7.719011591664162, "learning_rate": 9.680327606443806e-06, "loss": 18.1179, "step": 7715 }, { "epoch": 0.1410422797813808, "grad_norm": 7.767842326935953, "learning_rate": 9.680223453768219e-06, "loss": 18.1272, "step": 7716 }, { "epoch": 0.14106055897782732, "grad_norm": 7.5680030448530955, "learning_rate": 9.680119284688855e-06, "loss": 17.723, "step": 7717 }, { "epoch": 0.14107883817427386, "grad_norm": 7.7507445436356015, "learning_rate": 9.68001509920608e-06, "loss": 18.0347, "step": 7718 }, { "epoch": 0.1410971173707204, "grad_norm": 7.307328906945655, "learning_rate": 9.679910897320254e-06, "loss": 17.8308, "step": 7719 }, { "epoch": 0.1411153965671669, "grad_norm": 7.951724740756475, "learning_rate": 9.679806679031751e-06, "loss": 18.0057, "step": 7720 }, { "epoch": 0.14113367576361344, "grad_norm": 6.778070343960303, "learning_rate": 9.67970244434093e-06, "loss": 17.4964, "step": 7721 }, { "epoch": 0.14115195496005994, "grad_norm": 8.07497389710269, "learning_rate": 9.679598193248159e-06, "loss": 18.3359, "step": 7722 }, { "epoch": 0.14117023415650648, "grad_norm": 6.6248376575767605, "learning_rate": 9.6794939257538e-06, "loss": 17.7897, "step": 7723 }, { "epoch": 0.14118851335295302, "grad_norm": 5.905110351666292, "learning_rate": 9.679389641858224e-06, "loss": 17.2798, "step": 7724 }, { "epoch": 0.14120679254939952, "grad_norm": 7.142681101724337, "learning_rate": 9.67928534156179e-06, "loss": 17.8447, "step": 7725 }, { "epoch": 0.14122507174584606, "grad_norm": 6.413274281557343, "learning_rate": 9.679181024864869e-06, "loss": 17.6408, "step": 7726 }, { "epoch": 0.14124335094229257, "grad_norm": 8.088982050404411, "learning_rate": 9.679076691767823e-06, "loss": 18.3308, "step": 7727 }, { "epoch": 0.1412616301387391, "grad_norm": 7.639545561389265, "learning_rate": 9.678972342271023e-06, "loss": 17.6528, "step": 7728 }, { "epoch": 0.14127990933518564, "grad_norm": 8.758924487537499, "learning_rate": 9.678867976374827e-06, "loss": 18.6422, "step": 7729 }, { "epoch": 0.14129818853163215, "grad_norm": 7.8443891245671065, "learning_rate": 9.678763594079605e-06, "loss": 18.2581, "step": 7730 }, { "epoch": 0.14131646772807868, "grad_norm": 8.158889575581766, "learning_rate": 9.678659195385724e-06, "loss": 17.9907, "step": 7731 }, { "epoch": 0.1413347469245252, "grad_norm": 7.128034971801864, "learning_rate": 9.67855478029355e-06, "loss": 18.0491, "step": 7732 }, { "epoch": 0.14135302612097173, "grad_norm": 7.524698700334921, "learning_rate": 9.678450348803445e-06, "loss": 18.0207, "step": 7733 }, { "epoch": 0.14137130531741823, "grad_norm": 7.413578969658936, "learning_rate": 9.678345900915778e-06, "loss": 17.9179, "step": 7734 }, { "epoch": 0.14138958451386477, "grad_norm": 6.944010819432446, "learning_rate": 9.678241436630916e-06, "loss": 17.7795, "step": 7735 }, { "epoch": 0.1414078637103113, "grad_norm": 9.25296851022195, "learning_rate": 9.67813695594922e-06, "loss": 18.8079, "step": 7736 }, { "epoch": 0.1414261429067578, "grad_norm": 7.284723548912895, "learning_rate": 9.678032458871063e-06, "loss": 18.1341, "step": 7737 }, { "epoch": 0.14144442210320435, "grad_norm": 8.14959581041399, "learning_rate": 9.677927945396808e-06, "loss": 18.3563, "step": 7738 }, { "epoch": 0.14146270129965086, "grad_norm": 7.108870755550117, "learning_rate": 9.677823415526822e-06, "loss": 17.8498, "step": 7739 }, { "epoch": 0.1414809804960974, "grad_norm": 6.407079401501023, "learning_rate": 9.67771886926147e-06, "loss": 17.5913, "step": 7740 }, { "epoch": 0.14149925969254393, "grad_norm": 7.224881529647223, "learning_rate": 9.67761430660112e-06, "loss": 17.9471, "step": 7741 }, { "epoch": 0.14151753888899044, "grad_norm": 6.837148647998776, "learning_rate": 9.677509727546134e-06, "loss": 17.4927, "step": 7742 }, { "epoch": 0.14153581808543697, "grad_norm": 7.846606169916832, "learning_rate": 9.677405132096887e-06, "loss": 18.4396, "step": 7743 }, { "epoch": 0.14155409728188348, "grad_norm": 7.524877752546834, "learning_rate": 9.677300520253738e-06, "loss": 18.0791, "step": 7744 }, { "epoch": 0.14157237647833001, "grad_norm": 7.487832029774735, "learning_rate": 9.677195892017059e-06, "loss": 17.7253, "step": 7745 }, { "epoch": 0.14159065567477655, "grad_norm": 6.419594968610413, "learning_rate": 9.677091247387214e-06, "loss": 17.5961, "step": 7746 }, { "epoch": 0.14160893487122306, "grad_norm": 7.8628128876797945, "learning_rate": 9.676986586364567e-06, "loss": 18.4315, "step": 7747 }, { "epoch": 0.1416272140676696, "grad_norm": 7.299063522511162, "learning_rate": 9.676881908949492e-06, "loss": 17.8903, "step": 7748 }, { "epoch": 0.1416454932641161, "grad_norm": 5.806594807064861, "learning_rate": 9.676777215142348e-06, "loss": 17.2699, "step": 7749 }, { "epoch": 0.14166377246056264, "grad_norm": 5.770711123757083, "learning_rate": 9.676672504943508e-06, "loss": 17.4297, "step": 7750 }, { "epoch": 0.14168205165700914, "grad_norm": 7.502838539343097, "learning_rate": 9.676567778353337e-06, "loss": 17.9715, "step": 7751 }, { "epoch": 0.14170033085345568, "grad_norm": 6.662998191797699, "learning_rate": 9.6764630353722e-06, "loss": 17.5581, "step": 7752 }, { "epoch": 0.14171861004990222, "grad_norm": 7.416255808314275, "learning_rate": 9.676358276000466e-06, "loss": 17.9441, "step": 7753 }, { "epoch": 0.14173688924634872, "grad_norm": 7.192597832118229, "learning_rate": 9.676253500238503e-06, "loss": 17.8188, "step": 7754 }, { "epoch": 0.14175516844279526, "grad_norm": 6.303949808613113, "learning_rate": 9.676148708086677e-06, "loss": 17.5782, "step": 7755 }, { "epoch": 0.14177344763924177, "grad_norm": 7.537919742112997, "learning_rate": 9.676043899545356e-06, "loss": 18.1434, "step": 7756 }, { "epoch": 0.1417917268356883, "grad_norm": 6.324639285936871, "learning_rate": 9.675939074614907e-06, "loss": 17.4917, "step": 7757 }, { "epoch": 0.14181000603213484, "grad_norm": 6.705794880450858, "learning_rate": 9.675834233295696e-06, "loss": 17.5962, "step": 7758 }, { "epoch": 0.14182828522858135, "grad_norm": 6.831759017623418, "learning_rate": 9.675729375588092e-06, "loss": 17.6193, "step": 7759 }, { "epoch": 0.14184656442502788, "grad_norm": 6.973219084686587, "learning_rate": 9.675624501492462e-06, "loss": 17.7955, "step": 7760 }, { "epoch": 0.1418648436214744, "grad_norm": 6.677520348869061, "learning_rate": 9.675519611009176e-06, "loss": 17.5277, "step": 7761 }, { "epoch": 0.14188312281792093, "grad_norm": 6.567747401858718, "learning_rate": 9.675414704138596e-06, "loss": 17.499, "step": 7762 }, { "epoch": 0.14190140201436746, "grad_norm": 6.144341090505756, "learning_rate": 9.675309780881097e-06, "loss": 17.484, "step": 7763 }, { "epoch": 0.14191968121081397, "grad_norm": 6.445653628061477, "learning_rate": 9.67520484123704e-06, "loss": 17.36, "step": 7764 }, { "epoch": 0.1419379604072605, "grad_norm": 8.560003692490604, "learning_rate": 9.675099885206798e-06, "loss": 18.2682, "step": 7765 }, { "epoch": 0.141956239603707, "grad_norm": 7.859397461991227, "learning_rate": 9.674994912790736e-06, "loss": 18.1824, "step": 7766 }, { "epoch": 0.14197451880015355, "grad_norm": 7.106172855319866, "learning_rate": 9.674889923989222e-06, "loss": 17.9058, "step": 7767 }, { "epoch": 0.14199279799660006, "grad_norm": 7.00829803230192, "learning_rate": 9.674784918802624e-06, "loss": 17.3542, "step": 7768 }, { "epoch": 0.1420110771930466, "grad_norm": 7.173244707430433, "learning_rate": 9.674679897231311e-06, "loss": 17.8225, "step": 7769 }, { "epoch": 0.14202935638949313, "grad_norm": 7.232542256664458, "learning_rate": 9.67457485927565e-06, "loss": 17.7652, "step": 7770 }, { "epoch": 0.14204763558593964, "grad_norm": 7.01716133149273, "learning_rate": 9.674469804936012e-06, "loss": 17.7022, "step": 7771 }, { "epoch": 0.14206591478238617, "grad_norm": 5.810302252713791, "learning_rate": 9.67436473421276e-06, "loss": 17.1869, "step": 7772 }, { "epoch": 0.14208419397883268, "grad_norm": 7.134096152854323, "learning_rate": 9.674259647106268e-06, "loss": 17.8671, "step": 7773 }, { "epoch": 0.14210247317527921, "grad_norm": 6.786937004516573, "learning_rate": 9.6741545436169e-06, "loss": 17.7471, "step": 7774 }, { "epoch": 0.14212075237172575, "grad_norm": 6.500229037330626, "learning_rate": 9.674049423745025e-06, "loss": 17.8578, "step": 7775 }, { "epoch": 0.14213903156817226, "grad_norm": 6.220294935342864, "learning_rate": 9.673944287491013e-06, "loss": 17.4893, "step": 7776 }, { "epoch": 0.1421573107646188, "grad_norm": 6.9788471623255175, "learning_rate": 9.673839134855233e-06, "loss": 17.8216, "step": 7777 }, { "epoch": 0.1421755899610653, "grad_norm": 6.73552468574272, "learning_rate": 9.673733965838053e-06, "loss": 17.7899, "step": 7778 }, { "epoch": 0.14219386915751184, "grad_norm": 7.2627811451102335, "learning_rate": 9.673628780439839e-06, "loss": 17.8894, "step": 7779 }, { "epoch": 0.14221214835395837, "grad_norm": 7.622253576899311, "learning_rate": 9.673523578660962e-06, "loss": 18.0757, "step": 7780 }, { "epoch": 0.14223042755040488, "grad_norm": 6.749261020223755, "learning_rate": 9.67341836050179e-06, "loss": 17.6294, "step": 7781 }, { "epoch": 0.14224870674685142, "grad_norm": 5.852666143775732, "learning_rate": 9.673313125962693e-06, "loss": 17.6006, "step": 7782 }, { "epoch": 0.14226698594329792, "grad_norm": 5.88140172454095, "learning_rate": 9.673207875044039e-06, "loss": 17.4948, "step": 7783 }, { "epoch": 0.14228526513974446, "grad_norm": 7.210294985040148, "learning_rate": 9.673102607746198e-06, "loss": 17.5802, "step": 7784 }, { "epoch": 0.14230354433619097, "grad_norm": 6.494521279404934, "learning_rate": 9.672997324069536e-06, "loss": 17.5406, "step": 7785 }, { "epoch": 0.1423218235326375, "grad_norm": 8.046854484935118, "learning_rate": 9.672892024014426e-06, "loss": 18.7805, "step": 7786 }, { "epoch": 0.14234010272908404, "grad_norm": 7.789728345892877, "learning_rate": 9.672786707581232e-06, "loss": 18.1358, "step": 7787 }, { "epoch": 0.14235838192553055, "grad_norm": 7.264851122492589, "learning_rate": 9.672681374770328e-06, "loss": 17.9878, "step": 7788 }, { "epoch": 0.14237666112197708, "grad_norm": 6.076518274182032, "learning_rate": 9.672576025582081e-06, "loss": 17.3517, "step": 7789 }, { "epoch": 0.1423949403184236, "grad_norm": 6.342632611380603, "learning_rate": 9.672470660016862e-06, "loss": 17.4663, "step": 7790 }, { "epoch": 0.14241321951487013, "grad_norm": 6.879084098925314, "learning_rate": 9.672365278075035e-06, "loss": 17.7861, "step": 7791 }, { "epoch": 0.14243149871131666, "grad_norm": 7.5159207183567025, "learning_rate": 9.672259879756976e-06, "loss": 17.668, "step": 7792 }, { "epoch": 0.14244977790776317, "grad_norm": 8.093294225857699, "learning_rate": 9.672154465063051e-06, "loss": 18.1166, "step": 7793 }, { "epoch": 0.1424680571042097, "grad_norm": 6.536931922435906, "learning_rate": 9.672049033993632e-06, "loss": 17.5174, "step": 7794 }, { "epoch": 0.1424863363006562, "grad_norm": 5.961237974227077, "learning_rate": 9.671943586549085e-06, "loss": 17.2613, "step": 7795 }, { "epoch": 0.14250461549710275, "grad_norm": 7.349228206352338, "learning_rate": 9.67183812272978e-06, "loss": 17.9133, "step": 7796 }, { "epoch": 0.14252289469354928, "grad_norm": 7.074451173895017, "learning_rate": 9.671732642536087e-06, "loss": 17.6657, "step": 7797 }, { "epoch": 0.1425411738899958, "grad_norm": 6.834222829579723, "learning_rate": 9.67162714596838e-06, "loss": 17.8446, "step": 7798 }, { "epoch": 0.14255945308644233, "grad_norm": 6.867635214185919, "learning_rate": 9.671521633027022e-06, "loss": 17.5231, "step": 7799 }, { "epoch": 0.14257773228288884, "grad_norm": 7.517409601017157, "learning_rate": 9.671416103712389e-06, "loss": 18.0342, "step": 7800 }, { "epoch": 0.14259601147933537, "grad_norm": 7.247493154377069, "learning_rate": 9.671310558024844e-06, "loss": 17.6106, "step": 7801 }, { "epoch": 0.14261429067578188, "grad_norm": 8.626717076846722, "learning_rate": 9.671204995964762e-06, "loss": 18.3426, "step": 7802 }, { "epoch": 0.14263256987222842, "grad_norm": 6.661527484937428, "learning_rate": 9.671099417532515e-06, "loss": 17.5053, "step": 7803 }, { "epoch": 0.14265084906867495, "grad_norm": 8.057202515842757, "learning_rate": 9.670993822728467e-06, "loss": 17.7858, "step": 7804 }, { "epoch": 0.14266912826512146, "grad_norm": 7.05030211660043, "learning_rate": 9.670888211552992e-06, "loss": 17.6583, "step": 7805 }, { "epoch": 0.142687407461568, "grad_norm": 5.706726345107122, "learning_rate": 9.670782584006459e-06, "loss": 17.141, "step": 7806 }, { "epoch": 0.1427056866580145, "grad_norm": 7.344611184842882, "learning_rate": 9.670676940089239e-06, "loss": 17.8914, "step": 7807 }, { "epoch": 0.14272396585446104, "grad_norm": 7.022220843795626, "learning_rate": 9.670571279801699e-06, "loss": 17.6126, "step": 7808 }, { "epoch": 0.14274224505090757, "grad_norm": 7.564178950445632, "learning_rate": 9.670465603144214e-06, "loss": 18.0142, "step": 7809 }, { "epoch": 0.14276052424735408, "grad_norm": 5.9335851077868735, "learning_rate": 9.670359910117153e-06, "loss": 17.1887, "step": 7810 }, { "epoch": 0.14277880344380062, "grad_norm": 6.846377150268151, "learning_rate": 9.670254200720886e-06, "loss": 17.8817, "step": 7811 }, { "epoch": 0.14279708264024712, "grad_norm": 6.747451226197997, "learning_rate": 9.67014847495578e-06, "loss": 17.6365, "step": 7812 }, { "epoch": 0.14281536183669366, "grad_norm": 8.90710576128569, "learning_rate": 9.670042732822212e-06, "loss": 18.3125, "step": 7813 }, { "epoch": 0.1428336410331402, "grad_norm": 5.612174147043535, "learning_rate": 9.669936974320548e-06, "loss": 17.2074, "step": 7814 }, { "epoch": 0.1428519202295867, "grad_norm": 6.7189199468273495, "learning_rate": 9.669831199451161e-06, "loss": 17.4914, "step": 7815 }, { "epoch": 0.14287019942603324, "grad_norm": 6.57693825095924, "learning_rate": 9.66972540821442e-06, "loss": 17.2984, "step": 7816 }, { "epoch": 0.14288847862247975, "grad_norm": 7.099477125925888, "learning_rate": 9.669619600610699e-06, "loss": 17.7003, "step": 7817 }, { "epoch": 0.14290675781892628, "grad_norm": 5.980220718531627, "learning_rate": 9.669513776640364e-06, "loss": 17.449, "step": 7818 }, { "epoch": 0.1429250370153728, "grad_norm": 6.659901544676826, "learning_rate": 9.66940793630379e-06, "loss": 17.4634, "step": 7819 }, { "epoch": 0.14294331621181933, "grad_norm": 7.064695813405876, "learning_rate": 9.669302079601345e-06, "loss": 17.9158, "step": 7820 }, { "epoch": 0.14296159540826586, "grad_norm": 9.535819291754025, "learning_rate": 9.669196206533402e-06, "loss": 18.5917, "step": 7821 }, { "epoch": 0.14297987460471237, "grad_norm": 6.882356198216137, "learning_rate": 9.669090317100331e-06, "loss": 17.802, "step": 7822 }, { "epoch": 0.1429981538011589, "grad_norm": 8.259487175303988, "learning_rate": 9.668984411302504e-06, "loss": 18.0371, "step": 7823 }, { "epoch": 0.1430164329976054, "grad_norm": 7.107527463967547, "learning_rate": 9.668878489140292e-06, "loss": 17.5423, "step": 7824 }, { "epoch": 0.14303471219405195, "grad_norm": 6.455719803755345, "learning_rate": 9.668772550614067e-06, "loss": 17.4033, "step": 7825 }, { "epoch": 0.14305299139049849, "grad_norm": 6.653754036169663, "learning_rate": 9.668666595724196e-06, "loss": 17.7759, "step": 7826 }, { "epoch": 0.143071270586945, "grad_norm": 7.153991568865042, "learning_rate": 9.668560624471057e-06, "loss": 17.9118, "step": 7827 }, { "epoch": 0.14308954978339153, "grad_norm": 6.569018776209766, "learning_rate": 9.668454636855018e-06, "loss": 17.8073, "step": 7828 }, { "epoch": 0.14310782897983804, "grad_norm": 6.221145113425062, "learning_rate": 9.668348632876448e-06, "loss": 17.3776, "step": 7829 }, { "epoch": 0.14312610817628457, "grad_norm": 6.1130743532573595, "learning_rate": 9.668242612535723e-06, "loss": 17.3465, "step": 7830 }, { "epoch": 0.1431443873727311, "grad_norm": 8.126195268243293, "learning_rate": 9.668136575833213e-06, "loss": 18.4532, "step": 7831 }, { "epoch": 0.14316266656917762, "grad_norm": 6.292462249411014, "learning_rate": 9.668030522769289e-06, "loss": 17.4285, "step": 7832 }, { "epoch": 0.14318094576562415, "grad_norm": 6.387665515722563, "learning_rate": 9.667924453344324e-06, "loss": 17.534, "step": 7833 }, { "epoch": 0.14319922496207066, "grad_norm": 5.734749013084801, "learning_rate": 9.667818367558687e-06, "loss": 17.2402, "step": 7834 }, { "epoch": 0.1432175041585172, "grad_norm": 6.849385585121317, "learning_rate": 9.667712265412751e-06, "loss": 18.0146, "step": 7835 }, { "epoch": 0.1432357833549637, "grad_norm": 7.558442824431708, "learning_rate": 9.667606146906892e-06, "loss": 17.546, "step": 7836 }, { "epoch": 0.14325406255141024, "grad_norm": 7.460597625231734, "learning_rate": 9.667500012041476e-06, "loss": 17.6715, "step": 7837 }, { "epoch": 0.14327234174785677, "grad_norm": 7.270287286908869, "learning_rate": 9.667393860816878e-06, "loss": 17.7303, "step": 7838 }, { "epoch": 0.14329062094430328, "grad_norm": 6.437807876874178, "learning_rate": 9.667287693233471e-06, "loss": 17.6004, "step": 7839 }, { "epoch": 0.14330890014074982, "grad_norm": 7.281635820821214, "learning_rate": 9.667181509291623e-06, "loss": 17.8611, "step": 7840 }, { "epoch": 0.14332717933719633, "grad_norm": 9.048553612734262, "learning_rate": 9.66707530899171e-06, "loss": 18.4027, "step": 7841 }, { "epoch": 0.14334545853364286, "grad_norm": 8.407622272493642, "learning_rate": 9.666969092334104e-06, "loss": 18.317, "step": 7842 }, { "epoch": 0.1433637377300894, "grad_norm": 6.741525791108761, "learning_rate": 9.666862859319175e-06, "loss": 17.5255, "step": 7843 }, { "epoch": 0.1433820169265359, "grad_norm": 7.334684755636911, "learning_rate": 9.666756609947297e-06, "loss": 17.7949, "step": 7844 }, { "epoch": 0.14340029612298244, "grad_norm": 6.222155132693972, "learning_rate": 9.666650344218842e-06, "loss": 17.4305, "step": 7845 }, { "epoch": 0.14341857531942895, "grad_norm": 7.249972428703729, "learning_rate": 9.666544062134182e-06, "loss": 17.6051, "step": 7846 }, { "epoch": 0.14343685451587548, "grad_norm": 6.377084097167007, "learning_rate": 9.666437763693691e-06, "loss": 17.5562, "step": 7847 }, { "epoch": 0.14345513371232202, "grad_norm": 7.704324039458236, "learning_rate": 9.66633144889774e-06, "loss": 18.0369, "step": 7848 }, { "epoch": 0.14347341290876853, "grad_norm": 8.830517083057435, "learning_rate": 9.666225117746703e-06, "loss": 18.7811, "step": 7849 }, { "epoch": 0.14349169210521506, "grad_norm": 7.704071862496725, "learning_rate": 9.66611877024095e-06, "loss": 17.5736, "step": 7850 }, { "epoch": 0.14350997130166157, "grad_norm": 6.587411827645895, "learning_rate": 9.666012406380858e-06, "loss": 17.5842, "step": 7851 }, { "epoch": 0.1435282504981081, "grad_norm": 10.814748016890894, "learning_rate": 9.665906026166796e-06, "loss": 19.9485, "step": 7852 }, { "epoch": 0.14354652969455461, "grad_norm": 6.428524945881435, "learning_rate": 9.66579962959914e-06, "loss": 17.6798, "step": 7853 }, { "epoch": 0.14356480889100115, "grad_norm": 7.238464464725674, "learning_rate": 9.665693216678259e-06, "loss": 17.9204, "step": 7854 }, { "epoch": 0.14358308808744769, "grad_norm": 7.525942968671081, "learning_rate": 9.665586787404528e-06, "loss": 18.2486, "step": 7855 }, { "epoch": 0.1436013672838942, "grad_norm": 6.829776281861548, "learning_rate": 9.665480341778322e-06, "loss": 17.51, "step": 7856 }, { "epoch": 0.14361964648034073, "grad_norm": 7.571890437381861, "learning_rate": 9.66537387980001e-06, "loss": 18.1075, "step": 7857 }, { "epoch": 0.14363792567678724, "grad_norm": 7.484927683844003, "learning_rate": 9.66526740146997e-06, "loss": 17.8707, "step": 7858 }, { "epoch": 0.14365620487323377, "grad_norm": 8.00425417309483, "learning_rate": 9.665160906788571e-06, "loss": 18.1998, "step": 7859 }, { "epoch": 0.1436744840696803, "grad_norm": 7.277403688336308, "learning_rate": 9.665054395756188e-06, "loss": 17.9342, "step": 7860 }, { "epoch": 0.14369276326612682, "grad_norm": 7.344833164501549, "learning_rate": 9.664947868373195e-06, "loss": 17.8342, "step": 7861 }, { "epoch": 0.14371104246257335, "grad_norm": 7.627254165312603, "learning_rate": 9.664841324639963e-06, "loss": 18.0097, "step": 7862 }, { "epoch": 0.14372932165901986, "grad_norm": 6.655124794610509, "learning_rate": 9.664734764556869e-06, "loss": 17.5146, "step": 7863 }, { "epoch": 0.1437476008554664, "grad_norm": 7.226636848068177, "learning_rate": 9.664628188124282e-06, "loss": 17.9697, "step": 7864 }, { "epoch": 0.14376588005191293, "grad_norm": 7.629781323015025, "learning_rate": 9.66452159534258e-06, "loss": 18.1848, "step": 7865 }, { "epoch": 0.14378415924835944, "grad_norm": 6.377562023438023, "learning_rate": 9.664414986212134e-06, "loss": 17.3943, "step": 7866 }, { "epoch": 0.14380243844480597, "grad_norm": 7.888258891692893, "learning_rate": 9.664308360733316e-06, "loss": 18.0158, "step": 7867 }, { "epoch": 0.14382071764125248, "grad_norm": 6.701332911179065, "learning_rate": 9.664201718906506e-06, "loss": 17.4241, "step": 7868 }, { "epoch": 0.14383899683769902, "grad_norm": 6.929538947451322, "learning_rate": 9.66409506073207e-06, "loss": 17.7239, "step": 7869 }, { "epoch": 0.14385727603414553, "grad_norm": 7.044341210749867, "learning_rate": 9.663988386210388e-06, "loss": 17.8299, "step": 7870 }, { "epoch": 0.14387555523059206, "grad_norm": 6.927395988763612, "learning_rate": 9.66388169534183e-06, "loss": 17.7399, "step": 7871 }, { "epoch": 0.1438938344270386, "grad_norm": 6.678235413763966, "learning_rate": 9.663774988126772e-06, "loss": 17.2341, "step": 7872 }, { "epoch": 0.1439121136234851, "grad_norm": 6.617014339663367, "learning_rate": 9.663668264565589e-06, "loss": 17.5994, "step": 7873 }, { "epoch": 0.14393039281993164, "grad_norm": 6.135451945627378, "learning_rate": 9.663561524658652e-06, "loss": 17.3528, "step": 7874 }, { "epoch": 0.14394867201637815, "grad_norm": 10.054335633136397, "learning_rate": 9.663454768406335e-06, "loss": 17.8716, "step": 7875 }, { "epoch": 0.14396695121282468, "grad_norm": 7.821853537649704, "learning_rate": 9.663347995809016e-06, "loss": 17.5585, "step": 7876 }, { "epoch": 0.14398523040927122, "grad_norm": 6.6592194858021765, "learning_rate": 9.663241206867065e-06, "loss": 17.5602, "step": 7877 }, { "epoch": 0.14400350960571773, "grad_norm": 7.406957218568989, "learning_rate": 9.66313440158086e-06, "loss": 17.6681, "step": 7878 }, { "epoch": 0.14402178880216426, "grad_norm": 7.183931433865366, "learning_rate": 9.663027579950771e-06, "loss": 17.731, "step": 7879 }, { "epoch": 0.14404006799861077, "grad_norm": 6.795145994337344, "learning_rate": 9.662920741977177e-06, "loss": 17.5502, "step": 7880 }, { "epoch": 0.1440583471950573, "grad_norm": 7.425098615702723, "learning_rate": 9.662813887660451e-06, "loss": 17.3886, "step": 7881 }, { "epoch": 0.14407662639150384, "grad_norm": 7.467302204427384, "learning_rate": 9.662707017000967e-06, "loss": 18.0887, "step": 7882 }, { "epoch": 0.14409490558795035, "grad_norm": 8.129398788716378, "learning_rate": 9.662600129999098e-06, "loss": 18.5446, "step": 7883 }, { "epoch": 0.14411318478439689, "grad_norm": 6.196146066413878, "learning_rate": 9.66249322665522e-06, "loss": 17.2915, "step": 7884 }, { "epoch": 0.1441314639808434, "grad_norm": 6.0383151707049025, "learning_rate": 9.662386306969708e-06, "loss": 17.2602, "step": 7885 }, { "epoch": 0.14414974317728993, "grad_norm": 6.422942706740273, "learning_rate": 9.66227937094294e-06, "loss": 17.3709, "step": 7886 }, { "epoch": 0.14416802237373644, "grad_norm": 7.858451025377491, "learning_rate": 9.662172418575284e-06, "loss": 18.1276, "step": 7887 }, { "epoch": 0.14418630157018297, "grad_norm": 7.249390263111178, "learning_rate": 9.662065449867117e-06, "loss": 17.5421, "step": 7888 }, { "epoch": 0.1442045807666295, "grad_norm": 7.125012968987952, "learning_rate": 9.661958464818818e-06, "loss": 17.6351, "step": 7889 }, { "epoch": 0.14422285996307602, "grad_norm": 6.6766843944047745, "learning_rate": 9.661851463430757e-06, "loss": 17.5045, "step": 7890 }, { "epoch": 0.14424113915952255, "grad_norm": 8.734256701885561, "learning_rate": 9.661744445703314e-06, "loss": 18.4634, "step": 7891 }, { "epoch": 0.14425941835596906, "grad_norm": 6.883130009873722, "learning_rate": 9.661637411636859e-06, "loss": 17.9115, "step": 7892 }, { "epoch": 0.1442776975524156, "grad_norm": 7.642350001935957, "learning_rate": 9.66153036123177e-06, "loss": 18.0374, "step": 7893 }, { "epoch": 0.14429597674886213, "grad_norm": 7.02852331794176, "learning_rate": 9.66142329448842e-06, "loss": 17.8951, "step": 7894 }, { "epoch": 0.14431425594530864, "grad_norm": 8.516363209070965, "learning_rate": 9.66131621140719e-06, "loss": 18.2965, "step": 7895 }, { "epoch": 0.14433253514175517, "grad_norm": 6.603701088154492, "learning_rate": 9.661209111988448e-06, "loss": 17.6132, "step": 7896 }, { "epoch": 0.14435081433820168, "grad_norm": 6.540115720505726, "learning_rate": 9.661101996232572e-06, "loss": 17.4681, "step": 7897 }, { "epoch": 0.14436909353464822, "grad_norm": 6.274607422961523, "learning_rate": 9.66099486413994e-06, "loss": 17.4182, "step": 7898 }, { "epoch": 0.14438737273109475, "grad_norm": 6.738978375188624, "learning_rate": 9.660887715710923e-06, "loss": 17.4482, "step": 7899 }, { "epoch": 0.14440565192754126, "grad_norm": 7.9156443709594555, "learning_rate": 9.6607805509459e-06, "loss": 18.0224, "step": 7900 }, { "epoch": 0.1444239311239878, "grad_norm": 6.811895160844932, "learning_rate": 9.660673369845247e-06, "loss": 17.6024, "step": 7901 }, { "epoch": 0.1444422103204343, "grad_norm": 6.721687375262563, "learning_rate": 9.660566172409339e-06, "loss": 17.6821, "step": 7902 }, { "epoch": 0.14446048951688084, "grad_norm": 7.7652829502734955, "learning_rate": 9.660458958638547e-06, "loss": 17.9376, "step": 7903 }, { "epoch": 0.14447876871332735, "grad_norm": 8.002762225554621, "learning_rate": 9.660351728533256e-06, "loss": 18.1968, "step": 7904 }, { "epoch": 0.14449704790977388, "grad_norm": 6.918270285826691, "learning_rate": 9.660244482093833e-06, "loss": 17.6539, "step": 7905 }, { "epoch": 0.14451532710622042, "grad_norm": 8.26402368486267, "learning_rate": 9.660137219320658e-06, "loss": 18.4072, "step": 7906 }, { "epoch": 0.14453360630266693, "grad_norm": 7.355595966344457, "learning_rate": 9.660029940214107e-06, "loss": 18.0655, "step": 7907 }, { "epoch": 0.14455188549911346, "grad_norm": 6.551187241907956, "learning_rate": 9.659922644774555e-06, "loss": 17.6814, "step": 7908 }, { "epoch": 0.14457016469555997, "grad_norm": 6.360395957826397, "learning_rate": 9.659815333002378e-06, "loss": 17.6062, "step": 7909 }, { "epoch": 0.1445884438920065, "grad_norm": 6.073180624066442, "learning_rate": 9.659708004897953e-06, "loss": 17.4986, "step": 7910 }, { "epoch": 0.14460672308845304, "grad_norm": 7.76183195591155, "learning_rate": 9.659600660461657e-06, "loss": 18.102, "step": 7911 }, { "epoch": 0.14462500228489955, "grad_norm": 8.64672075810826, "learning_rate": 9.659493299693862e-06, "loss": 17.8132, "step": 7912 }, { "epoch": 0.1446432814813461, "grad_norm": 6.318985302818086, "learning_rate": 9.65938592259495e-06, "loss": 17.5563, "step": 7913 }, { "epoch": 0.1446615606777926, "grad_norm": 6.467317018488494, "learning_rate": 9.659278529165295e-06, "loss": 17.3215, "step": 7914 }, { "epoch": 0.14467983987423913, "grad_norm": 6.232911434779717, "learning_rate": 9.659171119405272e-06, "loss": 17.7199, "step": 7915 }, { "epoch": 0.14469811907068567, "grad_norm": 6.687040938801778, "learning_rate": 9.659063693315259e-06, "loss": 17.5385, "step": 7916 }, { "epoch": 0.14471639826713217, "grad_norm": 7.415660908423637, "learning_rate": 9.658956250895631e-06, "loss": 17.9783, "step": 7917 }, { "epoch": 0.1447346774635787, "grad_norm": 7.98814069842345, "learning_rate": 9.658848792146767e-06, "loss": 18.0066, "step": 7918 }, { "epoch": 0.14475295666002522, "grad_norm": 7.844484202682719, "learning_rate": 9.658741317069042e-06, "loss": 17.8192, "step": 7919 }, { "epoch": 0.14477123585647175, "grad_norm": 6.42135409314389, "learning_rate": 9.65863382566283e-06, "loss": 17.5073, "step": 7920 }, { "epoch": 0.14478951505291826, "grad_norm": 7.155972754662482, "learning_rate": 9.658526317928515e-06, "loss": 17.7637, "step": 7921 }, { "epoch": 0.1448077942493648, "grad_norm": 7.226949915611771, "learning_rate": 9.658418793866468e-06, "loss": 17.7123, "step": 7922 }, { "epoch": 0.14482607344581133, "grad_norm": 6.366337750877327, "learning_rate": 9.658311253477066e-06, "loss": 17.652, "step": 7923 }, { "epoch": 0.14484435264225784, "grad_norm": 7.225688591426677, "learning_rate": 9.658203696760688e-06, "loss": 17.9749, "step": 7924 }, { "epoch": 0.14486263183870438, "grad_norm": 7.059189808583485, "learning_rate": 9.658096123717713e-06, "loss": 17.8764, "step": 7925 }, { "epoch": 0.14488091103515088, "grad_norm": 6.54725420805, "learning_rate": 9.65798853434851e-06, "loss": 17.533, "step": 7926 }, { "epoch": 0.14489919023159742, "grad_norm": 8.026008665268165, "learning_rate": 9.657880928653465e-06, "loss": 18.3109, "step": 7927 }, { "epoch": 0.14491746942804395, "grad_norm": 9.46751004244742, "learning_rate": 9.657773306632951e-06, "loss": 18.9406, "step": 7928 }, { "epoch": 0.14493574862449046, "grad_norm": 6.59519055352195, "learning_rate": 9.657665668287345e-06, "loss": 17.6014, "step": 7929 }, { "epoch": 0.144954027820937, "grad_norm": 7.99155854136064, "learning_rate": 9.657558013617028e-06, "loss": 18.1103, "step": 7930 }, { "epoch": 0.1449723070173835, "grad_norm": 6.067982080334245, "learning_rate": 9.657450342622371e-06, "loss": 17.3093, "step": 7931 }, { "epoch": 0.14499058621383004, "grad_norm": 6.81035303639027, "learning_rate": 9.657342655303756e-06, "loss": 17.6884, "step": 7932 }, { "epoch": 0.14500886541027658, "grad_norm": 7.270363925155759, "learning_rate": 9.657234951661558e-06, "loss": 17.8257, "step": 7933 }, { "epoch": 0.14502714460672309, "grad_norm": 5.945637240185427, "learning_rate": 9.657127231696157e-06, "loss": 17.4792, "step": 7934 }, { "epoch": 0.14504542380316962, "grad_norm": 6.75617973397292, "learning_rate": 9.657019495407929e-06, "loss": 17.554, "step": 7935 }, { "epoch": 0.14506370299961613, "grad_norm": 6.7116586245694405, "learning_rate": 9.65691174279725e-06, "loss": 17.6093, "step": 7936 }, { "epoch": 0.14508198219606266, "grad_norm": 6.064911541197413, "learning_rate": 9.656803973864502e-06, "loss": 17.1471, "step": 7937 }, { "epoch": 0.14510026139250917, "grad_norm": 8.827844568819508, "learning_rate": 9.656696188610059e-06, "loss": 18.6249, "step": 7938 }, { "epoch": 0.1451185405889557, "grad_norm": 6.6565575863675965, "learning_rate": 9.656588387034301e-06, "loss": 17.5621, "step": 7939 }, { "epoch": 0.14513681978540224, "grad_norm": 6.424663743895477, "learning_rate": 9.656480569137602e-06, "loss": 17.4067, "step": 7940 }, { "epoch": 0.14515509898184875, "grad_norm": 7.788567478831424, "learning_rate": 9.656372734920345e-06, "loss": 18.0129, "step": 7941 }, { "epoch": 0.1451733781782953, "grad_norm": 7.143982819053273, "learning_rate": 9.656264884382905e-06, "loss": 17.9295, "step": 7942 }, { "epoch": 0.1451916573747418, "grad_norm": 9.235043443693883, "learning_rate": 9.65615701752566e-06, "loss": 17.9051, "step": 7943 }, { "epoch": 0.14520993657118833, "grad_norm": 6.128521553350904, "learning_rate": 9.65604913434899e-06, "loss": 17.4178, "step": 7944 }, { "epoch": 0.14522821576763487, "grad_norm": 6.962582465150469, "learning_rate": 9.655941234853272e-06, "loss": 17.841, "step": 7945 }, { "epoch": 0.14524649496408137, "grad_norm": 6.385896950318575, "learning_rate": 9.655833319038883e-06, "loss": 17.4645, "step": 7946 }, { "epoch": 0.1452647741605279, "grad_norm": 7.562271523019747, "learning_rate": 9.655725386906202e-06, "loss": 18.0817, "step": 7947 }, { "epoch": 0.14528305335697442, "grad_norm": 7.199204285470184, "learning_rate": 9.655617438455608e-06, "loss": 17.69, "step": 7948 }, { "epoch": 0.14530133255342095, "grad_norm": 6.840929518434979, "learning_rate": 9.655509473687479e-06, "loss": 17.4528, "step": 7949 }, { "epoch": 0.1453196117498675, "grad_norm": 6.711427011156019, "learning_rate": 9.655401492602192e-06, "loss": 17.4159, "step": 7950 }, { "epoch": 0.145337890946314, "grad_norm": 5.965039206783731, "learning_rate": 9.655293495200128e-06, "loss": 17.2832, "step": 7951 }, { "epoch": 0.14535617014276053, "grad_norm": 7.310858054849152, "learning_rate": 9.655185481481663e-06, "loss": 17.6517, "step": 7952 }, { "epoch": 0.14537444933920704, "grad_norm": 6.977513673077216, "learning_rate": 9.655077451447179e-06, "loss": 17.6552, "step": 7953 }, { "epoch": 0.14539272853565358, "grad_norm": 5.821779274738296, "learning_rate": 9.654969405097053e-06, "loss": 17.3532, "step": 7954 }, { "epoch": 0.14541100773210008, "grad_norm": 6.758713108971858, "learning_rate": 9.654861342431661e-06, "loss": 17.5775, "step": 7955 }, { "epoch": 0.14542928692854662, "grad_norm": 7.863685222010629, "learning_rate": 9.654753263451385e-06, "loss": 17.8642, "step": 7956 }, { "epoch": 0.14544756612499316, "grad_norm": 9.153913203782722, "learning_rate": 9.654645168156601e-06, "loss": 18.5971, "step": 7957 }, { "epoch": 0.14546584532143966, "grad_norm": 6.745258601713347, "learning_rate": 9.654537056547691e-06, "loss": 17.6042, "step": 7958 }, { "epoch": 0.1454841245178862, "grad_norm": 5.9247000548100734, "learning_rate": 9.654428928625033e-06, "loss": 17.271, "step": 7959 }, { "epoch": 0.1455024037143327, "grad_norm": 7.441009387067109, "learning_rate": 9.654320784389004e-06, "loss": 17.9059, "step": 7960 }, { "epoch": 0.14552068291077924, "grad_norm": 6.891244859298569, "learning_rate": 9.654212623839985e-06, "loss": 18.0463, "step": 7961 }, { "epoch": 0.14553896210722578, "grad_norm": 6.297664198439215, "learning_rate": 9.654104446978357e-06, "loss": 17.7346, "step": 7962 }, { "epoch": 0.14555724130367229, "grad_norm": 8.043669055116208, "learning_rate": 9.653996253804493e-06, "loss": 17.9576, "step": 7963 }, { "epoch": 0.14557552050011882, "grad_norm": 7.57704490359053, "learning_rate": 9.653888044318778e-06, "loss": 18.1225, "step": 7964 }, { "epoch": 0.14559379969656533, "grad_norm": 7.788256163236495, "learning_rate": 9.65377981852159e-06, "loss": 17.9134, "step": 7965 }, { "epoch": 0.14561207889301186, "grad_norm": 6.710542169959897, "learning_rate": 9.653671576413306e-06, "loss": 17.732, "step": 7966 }, { "epoch": 0.1456303580894584, "grad_norm": 6.024146392515028, "learning_rate": 9.653563317994307e-06, "loss": 17.4358, "step": 7967 }, { "epoch": 0.1456486372859049, "grad_norm": 6.633504266152885, "learning_rate": 9.653455043264974e-06, "loss": 17.4505, "step": 7968 }, { "epoch": 0.14566691648235144, "grad_norm": 6.211227687837813, "learning_rate": 9.653346752225683e-06, "loss": 17.4499, "step": 7969 }, { "epoch": 0.14568519567879795, "grad_norm": 8.400148275059234, "learning_rate": 9.653238444876817e-06, "loss": 18.3171, "step": 7970 }, { "epoch": 0.1457034748752445, "grad_norm": 7.739331881310937, "learning_rate": 9.653130121218754e-06, "loss": 18.3042, "step": 7971 }, { "epoch": 0.145721754071691, "grad_norm": 7.955012682984678, "learning_rate": 9.653021781251872e-06, "loss": 17.9062, "step": 7972 }, { "epoch": 0.14574003326813753, "grad_norm": 6.259595286305838, "learning_rate": 9.652913424976553e-06, "loss": 17.4814, "step": 7973 }, { "epoch": 0.14575831246458407, "grad_norm": 6.078297879047728, "learning_rate": 9.652805052393178e-06, "loss": 17.4524, "step": 7974 }, { "epoch": 0.14577659166103057, "grad_norm": 9.375932177975441, "learning_rate": 9.652696663502123e-06, "loss": 18.5682, "step": 7975 }, { "epoch": 0.1457948708574771, "grad_norm": 6.257526122207114, "learning_rate": 9.65258825830377e-06, "loss": 17.4044, "step": 7976 }, { "epoch": 0.14581315005392362, "grad_norm": 7.945139211966594, "learning_rate": 9.652479836798501e-06, "loss": 18.2119, "step": 7977 }, { "epoch": 0.14583142925037015, "grad_norm": 7.520533804922809, "learning_rate": 9.65237139898669e-06, "loss": 17.8324, "step": 7978 }, { "epoch": 0.1458497084468167, "grad_norm": 7.5445427347964, "learning_rate": 9.652262944868724e-06, "loss": 18.0339, "step": 7979 }, { "epoch": 0.1458679876432632, "grad_norm": 5.744613699632315, "learning_rate": 9.65215447444498e-06, "loss": 17.1529, "step": 7980 }, { "epoch": 0.14588626683970973, "grad_norm": 7.102725612497753, "learning_rate": 9.652045987715838e-06, "loss": 17.7757, "step": 7981 }, { "epoch": 0.14590454603615624, "grad_norm": 9.217413539931432, "learning_rate": 9.651937484681678e-06, "loss": 18.5535, "step": 7982 }, { "epoch": 0.14592282523260278, "grad_norm": 7.146597866539307, "learning_rate": 9.651828965342882e-06, "loss": 17.6874, "step": 7983 }, { "epoch": 0.1459411044290493, "grad_norm": 5.622544955129633, "learning_rate": 9.651720429699827e-06, "loss": 17.1046, "step": 7984 }, { "epoch": 0.14595938362549582, "grad_norm": 7.543782899563225, "learning_rate": 9.651611877752897e-06, "loss": 17.7099, "step": 7985 }, { "epoch": 0.14597766282194236, "grad_norm": 7.342211067682582, "learning_rate": 9.65150330950247e-06, "loss": 17.8767, "step": 7986 }, { "epoch": 0.14599594201838886, "grad_norm": 7.435391159299244, "learning_rate": 9.651394724948929e-06, "loss": 17.9265, "step": 7987 }, { "epoch": 0.1460142212148354, "grad_norm": 6.28794682429356, "learning_rate": 9.651286124092653e-06, "loss": 17.5435, "step": 7988 }, { "epoch": 0.1460325004112819, "grad_norm": 6.252197394803422, "learning_rate": 9.651177506934022e-06, "loss": 17.2206, "step": 7989 }, { "epoch": 0.14605077960772844, "grad_norm": 7.09374348651782, "learning_rate": 9.651068873473417e-06, "loss": 17.8473, "step": 7990 }, { "epoch": 0.14606905880417498, "grad_norm": 7.013619760961217, "learning_rate": 9.65096022371122e-06, "loss": 17.5454, "step": 7991 }, { "epoch": 0.14608733800062149, "grad_norm": 7.179235417174337, "learning_rate": 9.65085155764781e-06, "loss": 17.66, "step": 7992 }, { "epoch": 0.14610561719706802, "grad_norm": 5.4883918065536434, "learning_rate": 9.65074287528357e-06, "loss": 16.9709, "step": 7993 }, { "epoch": 0.14612389639351453, "grad_norm": 8.699244438136956, "learning_rate": 9.65063417661888e-06, "loss": 18.3041, "step": 7994 }, { "epoch": 0.14614217558996107, "grad_norm": 7.01955896623706, "learning_rate": 9.65052546165412e-06, "loss": 17.5045, "step": 7995 }, { "epoch": 0.1461604547864076, "grad_norm": 7.715833247104848, "learning_rate": 9.650416730389672e-06, "loss": 18.2539, "step": 7996 }, { "epoch": 0.1461787339828541, "grad_norm": 7.261767826837706, "learning_rate": 9.650307982825917e-06, "loss": 17.5952, "step": 7997 }, { "epoch": 0.14619701317930064, "grad_norm": 6.385983497206502, "learning_rate": 9.650199218963236e-06, "loss": 17.6332, "step": 7998 }, { "epoch": 0.14621529237574715, "grad_norm": 8.344318378430048, "learning_rate": 9.650090438802012e-06, "loss": 18.0755, "step": 7999 }, { "epoch": 0.1462335715721937, "grad_norm": 6.472024326689775, "learning_rate": 9.649981642342621e-06, "loss": 17.4189, "step": 8000 }, { "epoch": 0.14625185076864022, "grad_norm": 6.715048457193912, "learning_rate": 9.64987282958545e-06, "loss": 17.6987, "step": 8001 }, { "epoch": 0.14627012996508673, "grad_norm": 6.13079996475156, "learning_rate": 9.649764000530878e-06, "loss": 17.2704, "step": 8002 }, { "epoch": 0.14628840916153327, "grad_norm": 6.671879485010782, "learning_rate": 9.649655155179287e-06, "loss": 17.36, "step": 8003 }, { "epoch": 0.14630668835797978, "grad_norm": 7.370770516624002, "learning_rate": 9.649546293531057e-06, "loss": 17.542, "step": 8004 }, { "epoch": 0.1463249675544263, "grad_norm": 8.079342234334176, "learning_rate": 9.64943741558657e-06, "loss": 18.2899, "step": 8005 }, { "epoch": 0.14634324675087282, "grad_norm": 6.903482231699645, "learning_rate": 9.64932852134621e-06, "loss": 17.8285, "step": 8006 }, { "epoch": 0.14636152594731935, "grad_norm": 10.070179892659032, "learning_rate": 9.649219610810359e-06, "loss": 18.9447, "step": 8007 }, { "epoch": 0.1463798051437659, "grad_norm": 7.730014505390262, "learning_rate": 9.649110683979394e-06, "loss": 18.2886, "step": 8008 }, { "epoch": 0.1463980843402124, "grad_norm": 8.30281918365547, "learning_rate": 9.6490017408537e-06, "loss": 18.4275, "step": 8009 }, { "epoch": 0.14641636353665893, "grad_norm": 7.1197873864420895, "learning_rate": 9.648892781433657e-06, "loss": 17.7806, "step": 8010 }, { "epoch": 0.14643464273310544, "grad_norm": 8.427168269867161, "learning_rate": 9.64878380571965e-06, "loss": 18.1895, "step": 8011 }, { "epoch": 0.14645292192955198, "grad_norm": 6.783781928597438, "learning_rate": 9.648674813712059e-06, "loss": 17.9056, "step": 8012 }, { "epoch": 0.1464712011259985, "grad_norm": 6.678274522360001, "learning_rate": 9.648565805411265e-06, "loss": 17.4284, "step": 8013 }, { "epoch": 0.14648948032244502, "grad_norm": 7.8352790288474585, "learning_rate": 9.648456780817651e-06, "loss": 18.2195, "step": 8014 }, { "epoch": 0.14650775951889156, "grad_norm": 8.09375938615011, "learning_rate": 9.648347739931603e-06, "loss": 17.9966, "step": 8015 }, { "epoch": 0.14652603871533806, "grad_norm": 7.659916058842947, "learning_rate": 9.648238682753497e-06, "loss": 17.8873, "step": 8016 }, { "epoch": 0.1465443179117846, "grad_norm": 5.915365313583533, "learning_rate": 9.648129609283716e-06, "loss": 17.3144, "step": 8017 }, { "epoch": 0.14656259710823114, "grad_norm": 7.193445790671711, "learning_rate": 9.648020519522647e-06, "loss": 17.7632, "step": 8018 }, { "epoch": 0.14658087630467764, "grad_norm": 7.0641124489645, "learning_rate": 9.647911413470668e-06, "loss": 17.9136, "step": 8019 }, { "epoch": 0.14659915550112418, "grad_norm": 5.959791630226095, "learning_rate": 9.647802291128163e-06, "loss": 17.2778, "step": 8020 }, { "epoch": 0.1466174346975707, "grad_norm": 6.2009743065833485, "learning_rate": 9.647693152495514e-06, "loss": 17.4677, "step": 8021 }, { "epoch": 0.14663571389401722, "grad_norm": 7.8164225640752045, "learning_rate": 9.647583997573105e-06, "loss": 18.0936, "step": 8022 }, { "epoch": 0.14665399309046373, "grad_norm": 5.6426609914409065, "learning_rate": 9.647474826361316e-06, "loss": 17.1358, "step": 8023 }, { "epoch": 0.14667227228691027, "grad_norm": 7.281189933670641, "learning_rate": 9.64736563886053e-06, "loss": 18.0002, "step": 8024 }, { "epoch": 0.1466905514833568, "grad_norm": 7.156884754375371, "learning_rate": 9.647256435071133e-06, "loss": 17.9063, "step": 8025 }, { "epoch": 0.1467088306798033, "grad_norm": 5.953496097128827, "learning_rate": 9.647147214993504e-06, "loss": 17.1232, "step": 8026 }, { "epoch": 0.14672710987624984, "grad_norm": 7.920627054244241, "learning_rate": 9.647037978628029e-06, "loss": 17.8285, "step": 8027 }, { "epoch": 0.14674538907269635, "grad_norm": 10.69892625842698, "learning_rate": 9.646928725975087e-06, "loss": 18.6059, "step": 8028 }, { "epoch": 0.1467636682691429, "grad_norm": 6.3435637466396715, "learning_rate": 9.646819457035064e-06, "loss": 17.3268, "step": 8029 }, { "epoch": 0.14678194746558942, "grad_norm": 7.240777143096906, "learning_rate": 9.646710171808342e-06, "loss": 18.0841, "step": 8030 }, { "epoch": 0.14680022666203593, "grad_norm": 8.367751790399302, "learning_rate": 9.646600870295305e-06, "loss": 18.0374, "step": 8031 }, { "epoch": 0.14681850585848247, "grad_norm": 6.4560850582753275, "learning_rate": 9.646491552496336e-06, "loss": 17.5513, "step": 8032 }, { "epoch": 0.14683678505492898, "grad_norm": 7.321443836457617, "learning_rate": 9.646382218411813e-06, "loss": 18.0615, "step": 8033 }, { "epoch": 0.1468550642513755, "grad_norm": 6.3652537476119555, "learning_rate": 9.646272868042129e-06, "loss": 17.4165, "step": 8034 }, { "epoch": 0.14687334344782205, "grad_norm": 7.594431029046291, "learning_rate": 9.646163501387658e-06, "loss": 17.8259, "step": 8035 }, { "epoch": 0.14689162264426855, "grad_norm": 7.19503817395823, "learning_rate": 9.646054118448787e-06, "loss": 17.8467, "step": 8036 }, { "epoch": 0.1469099018407151, "grad_norm": 10.580051274042303, "learning_rate": 9.645944719225902e-06, "loss": 18.1914, "step": 8037 }, { "epoch": 0.1469281810371616, "grad_norm": 8.852314628875943, "learning_rate": 9.645835303719382e-06, "loss": 18.2564, "step": 8038 }, { "epoch": 0.14694646023360813, "grad_norm": 6.427733925668023, "learning_rate": 9.645725871929614e-06, "loss": 17.3538, "step": 8039 }, { "epoch": 0.14696473943005464, "grad_norm": 7.311157127937562, "learning_rate": 9.645616423856978e-06, "loss": 17.7997, "step": 8040 }, { "epoch": 0.14698301862650118, "grad_norm": 6.070143885303128, "learning_rate": 9.64550695950186e-06, "loss": 17.2713, "step": 8041 }, { "epoch": 0.1470012978229477, "grad_norm": 5.817498232664236, "learning_rate": 9.645397478864645e-06, "loss": 17.4812, "step": 8042 }, { "epoch": 0.14701957701939422, "grad_norm": 6.67491314100426, "learning_rate": 9.645287981945712e-06, "loss": 17.652, "step": 8043 }, { "epoch": 0.14703785621584076, "grad_norm": 7.288494360246087, "learning_rate": 9.64517846874545e-06, "loss": 18.0519, "step": 8044 }, { "epoch": 0.14705613541228726, "grad_norm": 7.709262361894076, "learning_rate": 9.64506893926424e-06, "loss": 17.821, "step": 8045 }, { "epoch": 0.1470744146087338, "grad_norm": 7.2491534881899415, "learning_rate": 9.644959393502467e-06, "loss": 17.957, "step": 8046 }, { "epoch": 0.14709269380518034, "grad_norm": 8.27859495332456, "learning_rate": 9.644849831460513e-06, "loss": 18.3226, "step": 8047 }, { "epoch": 0.14711097300162684, "grad_norm": 6.229448535308603, "learning_rate": 9.644740253138765e-06, "loss": 17.3666, "step": 8048 }, { "epoch": 0.14712925219807338, "grad_norm": 8.281485590991695, "learning_rate": 9.644630658537604e-06, "loss": 18.089, "step": 8049 }, { "epoch": 0.1471475313945199, "grad_norm": 7.260511924938508, "learning_rate": 9.644521047657416e-06, "loss": 17.8782, "step": 8050 }, { "epoch": 0.14716581059096642, "grad_norm": 8.075624424086456, "learning_rate": 9.644411420498585e-06, "loss": 17.9388, "step": 8051 }, { "epoch": 0.14718408978741296, "grad_norm": 7.205715603077248, "learning_rate": 9.644301777061495e-06, "loss": 18.0204, "step": 8052 }, { "epoch": 0.14720236898385947, "grad_norm": 7.077535799197512, "learning_rate": 9.64419211734653e-06, "loss": 17.7748, "step": 8053 }, { "epoch": 0.147220648180306, "grad_norm": 9.997143239685304, "learning_rate": 9.644082441354075e-06, "loss": 17.8055, "step": 8054 }, { "epoch": 0.1472389273767525, "grad_norm": 8.951299796489199, "learning_rate": 9.643972749084513e-06, "loss": 18.4639, "step": 8055 }, { "epoch": 0.14725720657319905, "grad_norm": 6.512609840491759, "learning_rate": 9.643863040538231e-06, "loss": 17.4196, "step": 8056 }, { "epoch": 0.14727548576964555, "grad_norm": 9.13164669058537, "learning_rate": 9.64375331571561e-06, "loss": 17.8644, "step": 8057 }, { "epoch": 0.1472937649660921, "grad_norm": 6.442756046873929, "learning_rate": 9.643643574617039e-06, "loss": 17.5869, "step": 8058 }, { "epoch": 0.14731204416253862, "grad_norm": 6.495695349937012, "learning_rate": 9.6435338172429e-06, "loss": 17.464, "step": 8059 }, { "epoch": 0.14733032335898513, "grad_norm": 7.259579718403372, "learning_rate": 9.643424043593576e-06, "loss": 17.7688, "step": 8060 }, { "epoch": 0.14734860255543167, "grad_norm": 7.712910100182315, "learning_rate": 9.643314253669455e-06, "loss": 17.8725, "step": 8061 }, { "epoch": 0.14736688175187818, "grad_norm": 7.739286195484188, "learning_rate": 9.643204447470922e-06, "loss": 17.7906, "step": 8062 }, { "epoch": 0.1473851609483247, "grad_norm": 7.248123211068903, "learning_rate": 9.643094624998357e-06, "loss": 17.7043, "step": 8063 }, { "epoch": 0.14740344014477125, "grad_norm": 7.188458907414833, "learning_rate": 9.64298478625215e-06, "loss": 17.6625, "step": 8064 }, { "epoch": 0.14742171934121776, "grad_norm": 7.148484029794683, "learning_rate": 9.642874931232684e-06, "loss": 17.8504, "step": 8065 }, { "epoch": 0.1474399985376643, "grad_norm": 7.387959283440316, "learning_rate": 9.642765059940344e-06, "loss": 18.3822, "step": 8066 }, { "epoch": 0.1474582777341108, "grad_norm": 8.856985975380033, "learning_rate": 9.642655172375516e-06, "loss": 18.7405, "step": 8067 }, { "epoch": 0.14747655693055733, "grad_norm": 7.766467161611203, "learning_rate": 9.642545268538585e-06, "loss": 17.7352, "step": 8068 }, { "epoch": 0.14749483612700387, "grad_norm": 6.911400134219339, "learning_rate": 9.642435348429935e-06, "loss": 17.7442, "step": 8069 }, { "epoch": 0.14751311532345038, "grad_norm": 8.535640558915466, "learning_rate": 9.642325412049952e-06, "loss": 18.199, "step": 8070 }, { "epoch": 0.1475313945198969, "grad_norm": 8.537335326600212, "learning_rate": 9.64221545939902e-06, "loss": 17.82, "step": 8071 }, { "epoch": 0.14754967371634342, "grad_norm": 6.633873060369528, "learning_rate": 9.642105490477527e-06, "loss": 17.7843, "step": 8072 }, { "epoch": 0.14756795291278996, "grad_norm": 6.4278340117422905, "learning_rate": 9.641995505285858e-06, "loss": 17.3694, "step": 8073 }, { "epoch": 0.14758623210923646, "grad_norm": 7.069412562342773, "learning_rate": 9.641885503824395e-06, "loss": 18.1548, "step": 8074 }, { "epoch": 0.147604511305683, "grad_norm": 6.767035618080706, "learning_rate": 9.64177548609353e-06, "loss": 17.9882, "step": 8075 }, { "epoch": 0.14762279050212954, "grad_norm": 7.903839015408739, "learning_rate": 9.641665452093641e-06, "loss": 18.1572, "step": 8076 }, { "epoch": 0.14764106969857604, "grad_norm": 7.2825054707225405, "learning_rate": 9.641555401825118e-06, "loss": 17.8129, "step": 8077 }, { "epoch": 0.14765934889502258, "grad_norm": 6.761561047503617, "learning_rate": 9.641445335288346e-06, "loss": 17.7295, "step": 8078 }, { "epoch": 0.1476776280914691, "grad_norm": 6.618816072053667, "learning_rate": 9.641335252483712e-06, "loss": 17.4507, "step": 8079 }, { "epoch": 0.14769590728791562, "grad_norm": 6.487847127290106, "learning_rate": 9.6412251534116e-06, "loss": 17.3601, "step": 8080 }, { "epoch": 0.14771418648436216, "grad_norm": 6.9250736911951725, "learning_rate": 9.641115038072397e-06, "loss": 17.8905, "step": 8081 }, { "epoch": 0.14773246568080867, "grad_norm": 8.476983225022746, "learning_rate": 9.641004906466488e-06, "loss": 18.3762, "step": 8082 }, { "epoch": 0.1477507448772552, "grad_norm": 8.761072999506217, "learning_rate": 9.64089475859426e-06, "loss": 18.624, "step": 8083 }, { "epoch": 0.1477690240737017, "grad_norm": 6.328830632630538, "learning_rate": 9.6407845944561e-06, "loss": 17.3398, "step": 8084 }, { "epoch": 0.14778730327014825, "grad_norm": 8.9338263657113, "learning_rate": 9.640674414052391e-06, "loss": 18.9664, "step": 8085 }, { "epoch": 0.14780558246659478, "grad_norm": 7.661138891664235, "learning_rate": 9.640564217383522e-06, "loss": 18.3349, "step": 8086 }, { "epoch": 0.1478238616630413, "grad_norm": 7.187970700208174, "learning_rate": 9.640454004449877e-06, "loss": 17.8618, "step": 8087 }, { "epoch": 0.14784214085948783, "grad_norm": 5.871236125636451, "learning_rate": 9.640343775251844e-06, "loss": 17.2336, "step": 8088 }, { "epoch": 0.14786042005593433, "grad_norm": 7.393317643417872, "learning_rate": 9.640233529789806e-06, "loss": 17.8336, "step": 8089 }, { "epoch": 0.14787869925238087, "grad_norm": 9.451071523509363, "learning_rate": 9.640123268064156e-06, "loss": 18.0403, "step": 8090 }, { "epoch": 0.14789697844882738, "grad_norm": 6.5374182020368, "learning_rate": 9.640012990075274e-06, "loss": 17.6703, "step": 8091 }, { "epoch": 0.1479152576452739, "grad_norm": 8.233324766017134, "learning_rate": 9.63990269582355e-06, "loss": 17.9709, "step": 8092 }, { "epoch": 0.14793353684172045, "grad_norm": 5.731050775772712, "learning_rate": 9.63979238530937e-06, "loss": 17.242, "step": 8093 }, { "epoch": 0.14795181603816696, "grad_norm": 6.218482515209717, "learning_rate": 9.63968205853312e-06, "loss": 17.3829, "step": 8094 }, { "epoch": 0.1479700952346135, "grad_norm": 6.736553892820769, "learning_rate": 9.639571715495189e-06, "loss": 17.6474, "step": 8095 }, { "epoch": 0.14798837443106, "grad_norm": 7.513901910688545, "learning_rate": 9.639461356195958e-06, "loss": 18.1575, "step": 8096 }, { "epoch": 0.14800665362750653, "grad_norm": 10.488684500822124, "learning_rate": 9.63935098063582e-06, "loss": 17.6842, "step": 8097 }, { "epoch": 0.14802493282395307, "grad_norm": 8.081786160838991, "learning_rate": 9.63924058881516e-06, "loss": 18.2097, "step": 8098 }, { "epoch": 0.14804321202039958, "grad_norm": 8.68145308215769, "learning_rate": 9.639130180734362e-06, "loss": 18.1371, "step": 8099 }, { "epoch": 0.14806149121684611, "grad_norm": 8.067734968278177, "learning_rate": 9.639019756393817e-06, "loss": 17.8399, "step": 8100 }, { "epoch": 0.14807977041329262, "grad_norm": 6.382101361188045, "learning_rate": 9.63890931579391e-06, "loss": 17.3079, "step": 8101 }, { "epoch": 0.14809804960973916, "grad_norm": 5.183982865642566, "learning_rate": 9.638798858935028e-06, "loss": 16.8802, "step": 8102 }, { "epoch": 0.1481163288061857, "grad_norm": 6.6951633909441535, "learning_rate": 9.638688385817558e-06, "loss": 17.7609, "step": 8103 }, { "epoch": 0.1481346080026322, "grad_norm": 6.194876638611268, "learning_rate": 9.63857789644189e-06, "loss": 17.3454, "step": 8104 }, { "epoch": 0.14815288719907874, "grad_norm": 5.317406037764646, "learning_rate": 9.638467390808405e-06, "loss": 17.1418, "step": 8105 }, { "epoch": 0.14817116639552524, "grad_norm": 6.895413692636472, "learning_rate": 9.638356868917497e-06, "loss": 17.577, "step": 8106 }, { "epoch": 0.14818944559197178, "grad_norm": 8.008443410440348, "learning_rate": 9.638246330769552e-06, "loss": 18.035, "step": 8107 }, { "epoch": 0.1482077247884183, "grad_norm": 5.74120716951446, "learning_rate": 9.638135776364954e-06, "loss": 17.203, "step": 8108 }, { "epoch": 0.14822600398486482, "grad_norm": 8.54913912805209, "learning_rate": 9.638025205704094e-06, "loss": 18.1013, "step": 8109 }, { "epoch": 0.14824428318131136, "grad_norm": 8.312580974732796, "learning_rate": 9.637914618787356e-06, "loss": 18.1672, "step": 8110 }, { "epoch": 0.14826256237775787, "grad_norm": 6.738350444607828, "learning_rate": 9.63780401561513e-06, "loss": 17.7801, "step": 8111 }, { "epoch": 0.1482808415742044, "grad_norm": 7.886413767494075, "learning_rate": 9.637693396187806e-06, "loss": 17.9368, "step": 8112 }, { "epoch": 0.1482991207706509, "grad_norm": 7.875815936487451, "learning_rate": 9.637582760505767e-06, "loss": 17.3937, "step": 8113 }, { "epoch": 0.14831739996709745, "grad_norm": 6.596152726567714, "learning_rate": 9.637472108569404e-06, "loss": 17.5027, "step": 8114 }, { "epoch": 0.14833567916354398, "grad_norm": 6.710516305595572, "learning_rate": 9.637361440379102e-06, "loss": 17.8169, "step": 8115 }, { "epoch": 0.1483539583599905, "grad_norm": 6.928265849809754, "learning_rate": 9.637250755935252e-06, "loss": 17.3836, "step": 8116 }, { "epoch": 0.14837223755643703, "grad_norm": 6.500901711677311, "learning_rate": 9.63714005523824e-06, "loss": 17.4885, "step": 8117 }, { "epoch": 0.14839051675288353, "grad_norm": 7.405538827349513, "learning_rate": 9.637029338288454e-06, "loss": 17.837, "step": 8118 }, { "epoch": 0.14840879594933007, "grad_norm": 7.252059624928057, "learning_rate": 9.636918605086283e-06, "loss": 17.5855, "step": 8119 }, { "epoch": 0.1484270751457766, "grad_norm": 6.926791768096578, "learning_rate": 9.636807855632115e-06, "loss": 17.6411, "step": 8120 }, { "epoch": 0.1484453543422231, "grad_norm": 6.5719604807225895, "learning_rate": 9.636697089926338e-06, "loss": 17.5774, "step": 8121 }, { "epoch": 0.14846363353866965, "grad_norm": 6.662271402853505, "learning_rate": 9.636586307969338e-06, "loss": 17.7661, "step": 8122 }, { "epoch": 0.14848191273511616, "grad_norm": 7.132505202028377, "learning_rate": 9.636475509761507e-06, "loss": 17.6119, "step": 8123 }, { "epoch": 0.1485001919315627, "grad_norm": 8.094454678992484, "learning_rate": 9.636364695303234e-06, "loss": 17.5317, "step": 8124 }, { "epoch": 0.1485184711280092, "grad_norm": 7.831661732240495, "learning_rate": 9.6362538645949e-06, "loss": 17.7834, "step": 8125 }, { "epoch": 0.14853675032445574, "grad_norm": 6.011204911661293, "learning_rate": 9.636143017636901e-06, "loss": 17.1371, "step": 8126 }, { "epoch": 0.14855502952090227, "grad_norm": 7.7689813298236645, "learning_rate": 9.636032154429624e-06, "loss": 18.2324, "step": 8127 }, { "epoch": 0.14857330871734878, "grad_norm": 6.4227869864023805, "learning_rate": 9.635921274973457e-06, "loss": 17.782, "step": 8128 }, { "epoch": 0.14859158791379531, "grad_norm": 7.21488812752111, "learning_rate": 9.635810379268786e-06, "loss": 17.543, "step": 8129 }, { "epoch": 0.14860986711024182, "grad_norm": 7.942197203982288, "learning_rate": 9.635699467316002e-06, "loss": 17.9881, "step": 8130 }, { "epoch": 0.14862814630668836, "grad_norm": 8.764202749204488, "learning_rate": 9.635588539115495e-06, "loss": 18.4816, "step": 8131 }, { "epoch": 0.1486464255031349, "grad_norm": 7.777306134782962, "learning_rate": 9.635477594667653e-06, "loss": 18.1013, "step": 8132 }, { "epoch": 0.1486647046995814, "grad_norm": 7.83180863384052, "learning_rate": 9.635366633972863e-06, "loss": 17.3537, "step": 8133 }, { "epoch": 0.14868298389602794, "grad_norm": 6.262583354547982, "learning_rate": 9.635255657031515e-06, "loss": 17.5054, "step": 8134 }, { "epoch": 0.14870126309247444, "grad_norm": 7.658041012486796, "learning_rate": 9.635144663843999e-06, "loss": 17.9531, "step": 8135 }, { "epoch": 0.14871954228892098, "grad_norm": 6.808612918804337, "learning_rate": 9.635033654410703e-06, "loss": 17.7325, "step": 8136 }, { "epoch": 0.14873782148536752, "grad_norm": 7.258150622509804, "learning_rate": 9.634922628732015e-06, "loss": 17.7368, "step": 8137 }, { "epoch": 0.14875610068181402, "grad_norm": 6.9982574431914575, "learning_rate": 9.634811586808327e-06, "loss": 17.6144, "step": 8138 }, { "epoch": 0.14877437987826056, "grad_norm": 7.857884053169739, "learning_rate": 9.634700528640026e-06, "loss": 17.9164, "step": 8139 }, { "epoch": 0.14879265907470707, "grad_norm": 7.452730871277512, "learning_rate": 9.634589454227502e-06, "loss": 17.6904, "step": 8140 }, { "epoch": 0.1488109382711536, "grad_norm": 6.9768223589685, "learning_rate": 9.634478363571144e-06, "loss": 17.6711, "step": 8141 }, { "epoch": 0.1488292174676001, "grad_norm": 6.947093534664517, "learning_rate": 9.634367256671342e-06, "loss": 17.8603, "step": 8142 }, { "epoch": 0.14884749666404665, "grad_norm": 8.065395022833629, "learning_rate": 9.634256133528483e-06, "loss": 17.9711, "step": 8143 }, { "epoch": 0.14886577586049318, "grad_norm": 5.587161523282454, "learning_rate": 9.63414499414296e-06, "loss": 17.0566, "step": 8144 }, { "epoch": 0.1488840550569397, "grad_norm": 6.629157279648962, "learning_rate": 9.634033838515162e-06, "loss": 17.5394, "step": 8145 }, { "epoch": 0.14890233425338623, "grad_norm": 9.91512398762502, "learning_rate": 9.633922666645475e-06, "loss": 17.7891, "step": 8146 }, { "epoch": 0.14892061344983273, "grad_norm": 6.5990876856195655, "learning_rate": 9.633811478534293e-06, "loss": 17.5892, "step": 8147 }, { "epoch": 0.14893889264627927, "grad_norm": 7.022881311193155, "learning_rate": 9.633700274182003e-06, "loss": 17.5351, "step": 8148 }, { "epoch": 0.1489571718427258, "grad_norm": 5.6144195274142055, "learning_rate": 9.633589053588997e-06, "loss": 16.9259, "step": 8149 }, { "epoch": 0.1489754510391723, "grad_norm": 7.738487347820152, "learning_rate": 9.63347781675566e-06, "loss": 18.1853, "step": 8150 }, { "epoch": 0.14899373023561885, "grad_norm": 6.193180519595679, "learning_rate": 9.63336656368239e-06, "loss": 17.5029, "step": 8151 }, { "epoch": 0.14901200943206536, "grad_norm": 7.092227173166322, "learning_rate": 9.633255294369569e-06, "loss": 17.9659, "step": 8152 }, { "epoch": 0.1490302886285119, "grad_norm": 6.745416215426979, "learning_rate": 9.63314400881759e-06, "loss": 17.657, "step": 8153 }, { "epoch": 0.14904856782495843, "grad_norm": 8.15380868776113, "learning_rate": 9.633032707026846e-06, "loss": 18.2057, "step": 8154 }, { "epoch": 0.14906684702140494, "grad_norm": 6.331248922332758, "learning_rate": 9.632921388997722e-06, "loss": 17.1826, "step": 8155 }, { "epoch": 0.14908512621785147, "grad_norm": 6.805012976161833, "learning_rate": 9.632810054730611e-06, "loss": 17.6045, "step": 8156 }, { "epoch": 0.14910340541429798, "grad_norm": 6.046630693939717, "learning_rate": 9.632698704225904e-06, "loss": 17.2938, "step": 8157 }, { "epoch": 0.14912168461074451, "grad_norm": 6.757383522376019, "learning_rate": 9.632587337483989e-06, "loss": 17.5262, "step": 8158 }, { "epoch": 0.14913996380719102, "grad_norm": 8.323600026726774, "learning_rate": 9.632475954505258e-06, "loss": 18.1282, "step": 8159 }, { "epoch": 0.14915824300363756, "grad_norm": 6.499193928467957, "learning_rate": 9.6323645552901e-06, "loss": 17.0955, "step": 8160 }, { "epoch": 0.1491765222000841, "grad_norm": 6.966078916831902, "learning_rate": 9.632253139838906e-06, "loss": 17.8794, "step": 8161 }, { "epoch": 0.1491948013965306, "grad_norm": 6.599636555603426, "learning_rate": 9.632141708152068e-06, "loss": 17.5405, "step": 8162 }, { "epoch": 0.14921308059297714, "grad_norm": 7.28404182257285, "learning_rate": 9.632030260229974e-06, "loss": 17.5913, "step": 8163 }, { "epoch": 0.14923135978942365, "grad_norm": 7.026371351499305, "learning_rate": 9.631918796073017e-06, "loss": 17.6872, "step": 8164 }, { "epoch": 0.14924963898587018, "grad_norm": 6.586875317455057, "learning_rate": 9.631807315681586e-06, "loss": 17.4985, "step": 8165 }, { "epoch": 0.14926791818231672, "grad_norm": 6.5463039923326, "learning_rate": 9.631695819056073e-06, "loss": 17.7064, "step": 8166 }, { "epoch": 0.14928619737876322, "grad_norm": 9.692044910483968, "learning_rate": 9.631584306196866e-06, "loss": 18.3559, "step": 8167 }, { "epoch": 0.14930447657520976, "grad_norm": 7.022885606869071, "learning_rate": 9.631472777104361e-06, "loss": 17.4101, "step": 8168 }, { "epoch": 0.14932275577165627, "grad_norm": 8.913693817095838, "learning_rate": 9.631361231778944e-06, "loss": 18.2285, "step": 8169 }, { "epoch": 0.1493410349681028, "grad_norm": 6.636074960057482, "learning_rate": 9.631249670221007e-06, "loss": 17.6323, "step": 8170 }, { "epoch": 0.14935931416454934, "grad_norm": 6.843279083385719, "learning_rate": 9.631138092430943e-06, "loss": 17.6723, "step": 8171 }, { "epoch": 0.14937759336099585, "grad_norm": 6.873244248974223, "learning_rate": 9.631026498409142e-06, "loss": 17.5206, "step": 8172 }, { "epoch": 0.14939587255744238, "grad_norm": 6.66895985156897, "learning_rate": 9.630914888155993e-06, "loss": 17.5482, "step": 8173 }, { "epoch": 0.1494141517538889, "grad_norm": 6.853114025017008, "learning_rate": 9.630803261671892e-06, "loss": 17.7752, "step": 8174 }, { "epoch": 0.14943243095033543, "grad_norm": 6.133412828735075, "learning_rate": 9.630691618957225e-06, "loss": 17.5157, "step": 8175 }, { "epoch": 0.14945071014678193, "grad_norm": 7.621202411677604, "learning_rate": 9.630579960012387e-06, "loss": 18.3171, "step": 8176 }, { "epoch": 0.14946898934322847, "grad_norm": 7.582676238264134, "learning_rate": 9.630468284837769e-06, "loss": 17.986, "step": 8177 }, { "epoch": 0.149487268539675, "grad_norm": 6.258409215946996, "learning_rate": 9.63035659343376e-06, "loss": 17.3532, "step": 8178 }, { "epoch": 0.1495055477361215, "grad_norm": 6.355855197434192, "learning_rate": 9.630244885800753e-06, "loss": 17.6792, "step": 8179 }, { "epoch": 0.14952382693256805, "grad_norm": 8.220841132045704, "learning_rate": 9.63013316193914e-06, "loss": 18.196, "step": 8180 }, { "epoch": 0.14954210612901456, "grad_norm": 6.636291930323637, "learning_rate": 9.630021421849311e-06, "loss": 17.5067, "step": 8181 }, { "epoch": 0.1495603853254611, "grad_norm": 7.5266465981644854, "learning_rate": 9.629909665531661e-06, "loss": 18.0338, "step": 8182 }, { "epoch": 0.14957866452190763, "grad_norm": 7.035010101666306, "learning_rate": 9.629797892986576e-06, "loss": 17.7137, "step": 8183 }, { "epoch": 0.14959694371835414, "grad_norm": 7.163355240729544, "learning_rate": 9.629686104214453e-06, "loss": 17.8504, "step": 8184 }, { "epoch": 0.14961522291480067, "grad_norm": 7.1227862610442925, "learning_rate": 9.629574299215682e-06, "loss": 17.8507, "step": 8185 }, { "epoch": 0.14963350211124718, "grad_norm": 7.485342666547704, "learning_rate": 9.629462477990656e-06, "loss": 18.1443, "step": 8186 }, { "epoch": 0.14965178130769372, "grad_norm": 8.198642072686255, "learning_rate": 9.629350640539763e-06, "loss": 18.1238, "step": 8187 }, { "epoch": 0.14967006050414025, "grad_norm": 6.974602313031887, "learning_rate": 9.629238786863401e-06, "loss": 17.547, "step": 8188 }, { "epoch": 0.14968833970058676, "grad_norm": 7.12877973991162, "learning_rate": 9.629126916961958e-06, "loss": 17.8723, "step": 8189 }, { "epoch": 0.1497066188970333, "grad_norm": 7.462106769459328, "learning_rate": 9.629015030835824e-06, "loss": 17.7146, "step": 8190 }, { "epoch": 0.1497248980934798, "grad_norm": 6.839041078834273, "learning_rate": 9.628903128485396e-06, "loss": 17.96, "step": 8191 }, { "epoch": 0.14974317728992634, "grad_norm": 6.82052383103971, "learning_rate": 9.628791209911063e-06, "loss": 17.986, "step": 8192 }, { "epoch": 0.14976145648637285, "grad_norm": 7.663386127315567, "learning_rate": 9.62867927511322e-06, "loss": 17.935, "step": 8193 }, { "epoch": 0.14977973568281938, "grad_norm": 8.631183227611471, "learning_rate": 9.628567324092259e-06, "loss": 18.2473, "step": 8194 }, { "epoch": 0.14979801487926592, "grad_norm": 7.283198318557219, "learning_rate": 9.62845535684857e-06, "loss": 17.8827, "step": 8195 }, { "epoch": 0.14981629407571243, "grad_norm": 6.898003121324752, "learning_rate": 9.628343373382545e-06, "loss": 17.7923, "step": 8196 }, { "epoch": 0.14983457327215896, "grad_norm": 7.205962008746917, "learning_rate": 9.628231373694579e-06, "loss": 17.7067, "step": 8197 }, { "epoch": 0.14985285246860547, "grad_norm": 6.369033875223682, "learning_rate": 9.628119357785064e-06, "loss": 17.4865, "step": 8198 }, { "epoch": 0.149871131665052, "grad_norm": 7.086959868206967, "learning_rate": 9.628007325654392e-06, "loss": 17.7848, "step": 8199 }, { "epoch": 0.14988941086149854, "grad_norm": 7.554476693798808, "learning_rate": 9.627895277302957e-06, "loss": 17.9422, "step": 8200 }, { "epoch": 0.14990769005794505, "grad_norm": 7.975346708415603, "learning_rate": 9.62778321273115e-06, "loss": 18.1469, "step": 8201 }, { "epoch": 0.14992596925439158, "grad_norm": 7.016908788442051, "learning_rate": 9.627671131939363e-06, "loss": 17.8922, "step": 8202 }, { "epoch": 0.1499442484508381, "grad_norm": 8.85927995340893, "learning_rate": 9.627559034927992e-06, "loss": 18.2604, "step": 8203 }, { "epoch": 0.14996252764728463, "grad_norm": 6.774773064259678, "learning_rate": 9.627446921697427e-06, "loss": 17.3408, "step": 8204 }, { "epoch": 0.14998080684373116, "grad_norm": 6.294227610418616, "learning_rate": 9.627334792248064e-06, "loss": 17.2782, "step": 8205 }, { "epoch": 0.14999908604017767, "grad_norm": 7.369330199875309, "learning_rate": 9.627222646580291e-06, "loss": 17.9668, "step": 8206 }, { "epoch": 0.1500173652366242, "grad_norm": 8.27917234719453, "learning_rate": 9.627110484694506e-06, "loss": 18.1977, "step": 8207 }, { "epoch": 0.15003564443307071, "grad_norm": 7.8009489813351856, "learning_rate": 9.626998306591101e-06, "loss": 17.868, "step": 8208 }, { "epoch": 0.15005392362951725, "grad_norm": 7.77946634764763, "learning_rate": 9.626886112270467e-06, "loss": 17.7802, "step": 8209 }, { "epoch": 0.15007220282596376, "grad_norm": 6.884261644919327, "learning_rate": 9.626773901733e-06, "loss": 17.7854, "step": 8210 }, { "epoch": 0.1500904820224103, "grad_norm": 8.006721368370812, "learning_rate": 9.62666167497909e-06, "loss": 17.9747, "step": 8211 }, { "epoch": 0.15010876121885683, "grad_norm": 5.848516308608391, "learning_rate": 9.626549432009135e-06, "loss": 17.2307, "step": 8212 }, { "epoch": 0.15012704041530334, "grad_norm": 9.007683572794024, "learning_rate": 9.626437172823523e-06, "loss": 18.6756, "step": 8213 }, { "epoch": 0.15014531961174987, "grad_norm": 6.84217977983457, "learning_rate": 9.626324897422651e-06, "loss": 17.8017, "step": 8214 }, { "epoch": 0.15016359880819638, "grad_norm": 6.124497949168756, "learning_rate": 9.626212605806914e-06, "loss": 17.3608, "step": 8215 }, { "epoch": 0.15018187800464292, "grad_norm": 6.226158237857329, "learning_rate": 9.626100297976702e-06, "loss": 17.1185, "step": 8216 }, { "epoch": 0.15020015720108945, "grad_norm": 7.583217676650587, "learning_rate": 9.62598797393241e-06, "loss": 17.8746, "step": 8217 }, { "epoch": 0.15021843639753596, "grad_norm": 7.05815119157305, "learning_rate": 9.625875633674428e-06, "loss": 17.5794, "step": 8218 }, { "epoch": 0.1502367155939825, "grad_norm": 6.131312477463043, "learning_rate": 9.625763277203157e-06, "loss": 17.2176, "step": 8219 }, { "epoch": 0.150254994790429, "grad_norm": 9.33147680707214, "learning_rate": 9.625650904518986e-06, "loss": 18.486, "step": 8220 }, { "epoch": 0.15027327398687554, "grad_norm": 7.028451742104949, "learning_rate": 9.625538515622311e-06, "loss": 17.3888, "step": 8221 }, { "epoch": 0.15029155318332207, "grad_norm": 6.105778569504314, "learning_rate": 9.625426110513524e-06, "loss": 17.4368, "step": 8222 }, { "epoch": 0.15030983237976858, "grad_norm": 6.231559648553829, "learning_rate": 9.625313689193021e-06, "loss": 17.5064, "step": 8223 }, { "epoch": 0.15032811157621512, "grad_norm": 6.6105422614414096, "learning_rate": 9.625201251661193e-06, "loss": 17.4768, "step": 8224 }, { "epoch": 0.15034639077266163, "grad_norm": 7.787476106469747, "learning_rate": 9.625088797918437e-06, "loss": 18.2933, "step": 8225 }, { "epoch": 0.15036466996910816, "grad_norm": 6.97417096298724, "learning_rate": 9.624976327965146e-06, "loss": 17.5706, "step": 8226 }, { "epoch": 0.15038294916555467, "grad_norm": 5.758546880269337, "learning_rate": 9.624863841801715e-06, "loss": 17.1828, "step": 8227 }, { "epoch": 0.1504012283620012, "grad_norm": 6.765842795066055, "learning_rate": 9.624751339428537e-06, "loss": 17.3873, "step": 8228 }, { "epoch": 0.15041950755844774, "grad_norm": 6.8451688463139675, "learning_rate": 9.624638820846005e-06, "loss": 17.8134, "step": 8229 }, { "epoch": 0.15043778675489425, "grad_norm": 7.433415462032307, "learning_rate": 9.624526286054519e-06, "loss": 17.9911, "step": 8230 }, { "epoch": 0.15045606595134078, "grad_norm": 6.736119752188272, "learning_rate": 9.624413735054468e-06, "loss": 17.5756, "step": 8231 }, { "epoch": 0.1504743451477873, "grad_norm": 6.432736461057537, "learning_rate": 9.624301167846246e-06, "loss": 17.4461, "step": 8232 }, { "epoch": 0.15049262434423383, "grad_norm": 6.918900635903595, "learning_rate": 9.624188584430252e-06, "loss": 17.4558, "step": 8233 }, { "epoch": 0.15051090354068036, "grad_norm": 7.75983457150723, "learning_rate": 9.624075984806878e-06, "loss": 17.8908, "step": 8234 }, { "epoch": 0.15052918273712687, "grad_norm": 8.282034442417402, "learning_rate": 9.623963368976519e-06, "loss": 18.27, "step": 8235 }, { "epoch": 0.1505474619335734, "grad_norm": 7.158220967820405, "learning_rate": 9.623850736939568e-06, "loss": 17.8442, "step": 8236 }, { "epoch": 0.15056574113001991, "grad_norm": 7.071703795375992, "learning_rate": 9.623738088696425e-06, "loss": 17.6172, "step": 8237 }, { "epoch": 0.15058402032646645, "grad_norm": 6.619604300037013, "learning_rate": 9.623625424247479e-06, "loss": 17.538, "step": 8238 }, { "epoch": 0.15060229952291299, "grad_norm": 8.159289456968516, "learning_rate": 9.623512743593126e-06, "loss": 18.3043, "step": 8239 }, { "epoch": 0.1506205787193595, "grad_norm": 7.891801653755344, "learning_rate": 9.623400046733762e-06, "loss": 18.214, "step": 8240 }, { "epoch": 0.15063885791580603, "grad_norm": 7.315861513886885, "learning_rate": 9.623287333669784e-06, "loss": 17.6339, "step": 8241 }, { "epoch": 0.15065713711225254, "grad_norm": 6.842762964775592, "learning_rate": 9.623174604401584e-06, "loss": 17.8821, "step": 8242 }, { "epoch": 0.15067541630869907, "grad_norm": 7.45071955468202, "learning_rate": 9.623061858929558e-06, "loss": 17.8013, "step": 8243 }, { "epoch": 0.15069369550514558, "grad_norm": 7.278205613368842, "learning_rate": 9.622949097254103e-06, "loss": 17.5657, "step": 8244 }, { "epoch": 0.15071197470159212, "grad_norm": 6.689719787695791, "learning_rate": 9.62283631937561e-06, "loss": 17.3731, "step": 8245 }, { "epoch": 0.15073025389803865, "grad_norm": 5.89983201704077, "learning_rate": 9.62272352529448e-06, "loss": 17.1696, "step": 8246 }, { "epoch": 0.15074853309448516, "grad_norm": 7.432211932522837, "learning_rate": 9.622610715011103e-06, "loss": 17.8963, "step": 8247 }, { "epoch": 0.1507668122909317, "grad_norm": 7.287658201953378, "learning_rate": 9.622497888525878e-06, "loss": 18.0643, "step": 8248 }, { "epoch": 0.1507850914873782, "grad_norm": 7.3204055710807, "learning_rate": 9.622385045839197e-06, "loss": 17.8771, "step": 8249 }, { "epoch": 0.15080337068382474, "grad_norm": 7.268925788270548, "learning_rate": 9.622272186951458e-06, "loss": 18.2706, "step": 8250 }, { "epoch": 0.15082164988027127, "grad_norm": 7.474373604212861, "learning_rate": 9.622159311863057e-06, "loss": 18.1105, "step": 8251 }, { "epoch": 0.15083992907671778, "grad_norm": 6.301086570045507, "learning_rate": 9.622046420574389e-06, "loss": 17.472, "step": 8252 }, { "epoch": 0.15085820827316432, "grad_norm": 6.698138661279464, "learning_rate": 9.621933513085848e-06, "loss": 17.7003, "step": 8253 }, { "epoch": 0.15087648746961083, "grad_norm": 7.081841556616977, "learning_rate": 9.621820589397832e-06, "loss": 17.6653, "step": 8254 }, { "epoch": 0.15089476666605736, "grad_norm": 7.3238418748277745, "learning_rate": 9.621707649510736e-06, "loss": 17.9191, "step": 8255 }, { "epoch": 0.1509130458625039, "grad_norm": 6.742651680720778, "learning_rate": 9.621594693424955e-06, "loss": 17.6599, "step": 8256 }, { "epoch": 0.1509313250589504, "grad_norm": 7.58101079672959, "learning_rate": 9.621481721140885e-06, "loss": 18.2079, "step": 8257 }, { "epoch": 0.15094960425539694, "grad_norm": 6.863753551861713, "learning_rate": 9.621368732658925e-06, "loss": 17.8148, "step": 8258 }, { "epoch": 0.15096788345184345, "grad_norm": 6.47264056577828, "learning_rate": 9.621255727979467e-06, "loss": 17.4016, "step": 8259 }, { "epoch": 0.15098616264828998, "grad_norm": 7.603533666439708, "learning_rate": 9.621142707102908e-06, "loss": 18.2694, "step": 8260 }, { "epoch": 0.1510044418447365, "grad_norm": 6.540029090559662, "learning_rate": 9.621029670029647e-06, "loss": 17.6542, "step": 8261 }, { "epoch": 0.15102272104118303, "grad_norm": 7.316452676273734, "learning_rate": 9.620916616760076e-06, "loss": 17.7939, "step": 8262 }, { "epoch": 0.15104100023762956, "grad_norm": 7.181725949839706, "learning_rate": 9.620803547294595e-06, "loss": 17.8618, "step": 8263 }, { "epoch": 0.15105927943407607, "grad_norm": 8.046996352147591, "learning_rate": 9.620690461633597e-06, "loss": 18.103, "step": 8264 }, { "epoch": 0.1510775586305226, "grad_norm": 6.452773693713817, "learning_rate": 9.620577359777481e-06, "loss": 17.5604, "step": 8265 }, { "epoch": 0.15109583782696911, "grad_norm": 6.74310278963078, "learning_rate": 9.62046424172664e-06, "loss": 17.6312, "step": 8266 }, { "epoch": 0.15111411702341565, "grad_norm": 8.863382977064171, "learning_rate": 9.620351107481476e-06, "loss": 18.6413, "step": 8267 }, { "epoch": 0.1511323962198622, "grad_norm": 6.426130488184393, "learning_rate": 9.620237957042382e-06, "loss": 17.4138, "step": 8268 }, { "epoch": 0.1511506754163087, "grad_norm": 7.165066842644639, "learning_rate": 9.620124790409752e-06, "loss": 17.4794, "step": 8269 }, { "epoch": 0.15116895461275523, "grad_norm": 5.783885020588841, "learning_rate": 9.620011607583988e-06, "loss": 17.186, "step": 8270 }, { "epoch": 0.15118723380920174, "grad_norm": 6.653501274399079, "learning_rate": 9.619898408565485e-06, "loss": 17.8724, "step": 8271 }, { "epoch": 0.15120551300564827, "grad_norm": 9.47444081173225, "learning_rate": 9.619785193354636e-06, "loss": 17.9671, "step": 8272 }, { "epoch": 0.1512237922020948, "grad_norm": 8.314644938169216, "learning_rate": 9.619671961951843e-06, "loss": 18.4144, "step": 8273 }, { "epoch": 0.15124207139854132, "grad_norm": 7.804214639424101, "learning_rate": 9.6195587143575e-06, "loss": 17.815, "step": 8274 }, { "epoch": 0.15126035059498785, "grad_norm": 6.494363859522821, "learning_rate": 9.619445450572005e-06, "loss": 17.7352, "step": 8275 }, { "epoch": 0.15127862979143436, "grad_norm": 6.75121851459919, "learning_rate": 9.619332170595753e-06, "loss": 17.5873, "step": 8276 }, { "epoch": 0.1512969089878809, "grad_norm": 8.221373152111685, "learning_rate": 9.619218874429144e-06, "loss": 18.549, "step": 8277 }, { "epoch": 0.1513151881843274, "grad_norm": 7.065653786126481, "learning_rate": 9.619105562072573e-06, "loss": 17.5719, "step": 8278 }, { "epoch": 0.15133346738077394, "grad_norm": 8.495427470673599, "learning_rate": 9.618992233526438e-06, "loss": 18.1892, "step": 8279 }, { "epoch": 0.15135174657722048, "grad_norm": 6.594544755034357, "learning_rate": 9.618878888791136e-06, "loss": 17.3768, "step": 8280 }, { "epoch": 0.15137002577366698, "grad_norm": 7.174457454636815, "learning_rate": 9.618765527867065e-06, "loss": 17.9663, "step": 8281 }, { "epoch": 0.15138830497011352, "grad_norm": 6.486051817146391, "learning_rate": 9.618652150754621e-06, "loss": 17.5412, "step": 8282 }, { "epoch": 0.15140658416656003, "grad_norm": 6.660156852894946, "learning_rate": 9.618538757454202e-06, "loss": 17.6116, "step": 8283 }, { "epoch": 0.15142486336300656, "grad_norm": 6.652304984094611, "learning_rate": 9.618425347966206e-06, "loss": 17.7333, "step": 8284 }, { "epoch": 0.1514431425594531, "grad_norm": 9.697218587626619, "learning_rate": 9.61831192229103e-06, "loss": 18.755, "step": 8285 }, { "epoch": 0.1514614217558996, "grad_norm": 5.983378811259161, "learning_rate": 9.618198480429071e-06, "loss": 17.2025, "step": 8286 }, { "epoch": 0.15147970095234614, "grad_norm": 6.9403136869832, "learning_rate": 9.618085022380727e-06, "loss": 17.7489, "step": 8287 }, { "epoch": 0.15149798014879265, "grad_norm": 6.289242074092284, "learning_rate": 9.617971548146395e-06, "loss": 17.511, "step": 8288 }, { "epoch": 0.15151625934523918, "grad_norm": 10.072604609154403, "learning_rate": 9.617858057726474e-06, "loss": 17.6072, "step": 8289 }, { "epoch": 0.15153453854168572, "grad_norm": 7.738213544209298, "learning_rate": 9.617744551121362e-06, "loss": 18.0687, "step": 8290 }, { "epoch": 0.15155281773813223, "grad_norm": 8.88955160968627, "learning_rate": 9.617631028331455e-06, "loss": 18.2794, "step": 8291 }, { "epoch": 0.15157109693457876, "grad_norm": 7.129723897496373, "learning_rate": 9.617517489357153e-06, "loss": 18.0201, "step": 8292 }, { "epoch": 0.15158937613102527, "grad_norm": 6.187657092673538, "learning_rate": 9.617403934198852e-06, "loss": 17.2293, "step": 8293 }, { "epoch": 0.1516076553274718, "grad_norm": 7.77523702408604, "learning_rate": 9.61729036285695e-06, "loss": 18.4022, "step": 8294 }, { "epoch": 0.15162593452391832, "grad_norm": 7.051666309443853, "learning_rate": 9.617176775331848e-06, "loss": 18.0027, "step": 8295 }, { "epoch": 0.15164421372036485, "grad_norm": 7.648719821643449, "learning_rate": 9.61706317162394e-06, "loss": 17.4062, "step": 8296 }, { "epoch": 0.1516624929168114, "grad_norm": 6.553220877152369, "learning_rate": 9.61694955173363e-06, "loss": 17.4329, "step": 8297 }, { "epoch": 0.1516807721132579, "grad_norm": 7.048283344257818, "learning_rate": 9.616835915661308e-06, "loss": 17.8347, "step": 8298 }, { "epoch": 0.15169905130970443, "grad_norm": 8.622176775355504, "learning_rate": 9.616722263407381e-06, "loss": 18.3363, "step": 8299 }, { "epoch": 0.15171733050615094, "grad_norm": 7.3725743209216645, "learning_rate": 9.61660859497224e-06, "loss": 17.8952, "step": 8300 }, { "epoch": 0.15173560970259747, "grad_norm": 7.667911868647525, "learning_rate": 9.616494910356287e-06, "loss": 18.1777, "step": 8301 }, { "epoch": 0.151753888899044, "grad_norm": 5.6619680493594675, "learning_rate": 9.61638120955992e-06, "loss": 17.278, "step": 8302 }, { "epoch": 0.15177216809549052, "grad_norm": 7.518921455476554, "learning_rate": 9.616267492583538e-06, "loss": 17.9855, "step": 8303 }, { "epoch": 0.15179044729193705, "grad_norm": 7.362345100622486, "learning_rate": 9.616153759427539e-06, "loss": 17.8035, "step": 8304 }, { "epoch": 0.15180872648838356, "grad_norm": 6.771119541757221, "learning_rate": 9.616040010092322e-06, "loss": 17.447, "step": 8305 }, { "epoch": 0.1518270056848301, "grad_norm": 11.090780843549402, "learning_rate": 9.615926244578283e-06, "loss": 19.3201, "step": 8306 }, { "epoch": 0.15184528488127663, "grad_norm": 7.69397219050792, "learning_rate": 9.615812462885825e-06, "loss": 18.143, "step": 8307 }, { "epoch": 0.15186356407772314, "grad_norm": 6.581930909105399, "learning_rate": 9.615698665015345e-06, "loss": 17.4815, "step": 8308 }, { "epoch": 0.15188184327416968, "grad_norm": 7.343480040899406, "learning_rate": 9.615584850967242e-06, "loss": 18.1581, "step": 8309 }, { "epoch": 0.15190012247061618, "grad_norm": 7.281686300188413, "learning_rate": 9.615471020741913e-06, "loss": 17.8876, "step": 8310 }, { "epoch": 0.15191840166706272, "grad_norm": 6.023210772722194, "learning_rate": 9.615357174339759e-06, "loss": 17.1831, "step": 8311 }, { "epoch": 0.15193668086350923, "grad_norm": 8.554098411455712, "learning_rate": 9.61524331176118e-06, "loss": 18.1684, "step": 8312 }, { "epoch": 0.15195496005995576, "grad_norm": 6.123535324372592, "learning_rate": 9.615129433006573e-06, "loss": 17.3887, "step": 8313 }, { "epoch": 0.1519732392564023, "grad_norm": 5.73365668953176, "learning_rate": 9.615015538076338e-06, "loss": 16.9956, "step": 8314 }, { "epoch": 0.1519915184528488, "grad_norm": 7.799627749598492, "learning_rate": 9.614901626970873e-06, "loss": 18.2006, "step": 8315 }, { "epoch": 0.15200979764929534, "grad_norm": 6.543433915866202, "learning_rate": 9.61478769969058e-06, "loss": 17.4647, "step": 8316 }, { "epoch": 0.15202807684574185, "grad_norm": 8.78596846594541, "learning_rate": 9.614673756235854e-06, "loss": 18.5613, "step": 8317 }, { "epoch": 0.15204635604218839, "grad_norm": 7.318576023539961, "learning_rate": 9.6145597966071e-06, "loss": 17.8715, "step": 8318 }, { "epoch": 0.15206463523863492, "grad_norm": 7.380504903544179, "learning_rate": 9.614445820804711e-06, "loss": 17.8962, "step": 8319 }, { "epoch": 0.15208291443508143, "grad_norm": 7.335299172284095, "learning_rate": 9.614331828829091e-06, "loss": 18.0066, "step": 8320 }, { "epoch": 0.15210119363152796, "grad_norm": 6.601506403760204, "learning_rate": 9.614217820680641e-06, "loss": 17.2911, "step": 8321 }, { "epoch": 0.15211947282797447, "grad_norm": 8.246965261381105, "learning_rate": 9.614103796359755e-06, "loss": 18.3982, "step": 8322 }, { "epoch": 0.152137752024421, "grad_norm": 5.151529787885932, "learning_rate": 9.613989755866835e-06, "loss": 16.8491, "step": 8323 }, { "epoch": 0.15215603122086754, "grad_norm": 11.081636020111297, "learning_rate": 9.613875699202284e-06, "loss": 18.446, "step": 8324 }, { "epoch": 0.15217431041731405, "grad_norm": 6.756162709600048, "learning_rate": 9.613761626366498e-06, "loss": 17.7069, "step": 8325 }, { "epoch": 0.1521925896137606, "grad_norm": 6.15423922061458, "learning_rate": 9.613647537359878e-06, "loss": 17.3873, "step": 8326 }, { "epoch": 0.1522108688102071, "grad_norm": 6.603928084287123, "learning_rate": 9.613533432182822e-06, "loss": 17.5039, "step": 8327 }, { "epoch": 0.15222914800665363, "grad_norm": 5.958683100556394, "learning_rate": 9.613419310835734e-06, "loss": 17.2975, "step": 8328 }, { "epoch": 0.15224742720310014, "grad_norm": 7.536025657207551, "learning_rate": 9.61330517331901e-06, "loss": 17.6498, "step": 8329 }, { "epoch": 0.15226570639954667, "grad_norm": 7.580601309602559, "learning_rate": 9.613191019633053e-06, "loss": 18.1946, "step": 8330 }, { "epoch": 0.1522839855959932, "grad_norm": 6.5379268220550575, "learning_rate": 9.61307684977826e-06, "loss": 17.5752, "step": 8331 }, { "epoch": 0.15230226479243972, "grad_norm": 7.891329417755743, "learning_rate": 9.612962663755035e-06, "loss": 18.1304, "step": 8332 }, { "epoch": 0.15232054398888625, "grad_norm": 5.96099476374749, "learning_rate": 9.612848461563776e-06, "loss": 17.3644, "step": 8333 }, { "epoch": 0.15233882318533276, "grad_norm": 7.558239477538888, "learning_rate": 9.612734243204882e-06, "loss": 18.055, "step": 8334 }, { "epoch": 0.1523571023817793, "grad_norm": 7.722599467178709, "learning_rate": 9.612620008678755e-06, "loss": 17.9425, "step": 8335 }, { "epoch": 0.15237538157822583, "grad_norm": 7.270872958368524, "learning_rate": 9.612505757985795e-06, "loss": 17.782, "step": 8336 }, { "epoch": 0.15239366077467234, "grad_norm": 7.504788950693975, "learning_rate": 9.612391491126403e-06, "loss": 18.0103, "step": 8337 }, { "epoch": 0.15241193997111888, "grad_norm": 7.005902932978128, "learning_rate": 9.612277208100979e-06, "loss": 17.8027, "step": 8338 }, { "epoch": 0.15243021916756538, "grad_norm": 8.191436162031327, "learning_rate": 9.612162908909924e-06, "loss": 18.4753, "step": 8339 }, { "epoch": 0.15244849836401192, "grad_norm": 7.765269070423985, "learning_rate": 9.612048593553639e-06, "loss": 18.4064, "step": 8340 }, { "epoch": 0.15246677756045846, "grad_norm": 6.722960780984315, "learning_rate": 9.611934262032522e-06, "loss": 17.2695, "step": 8341 }, { "epoch": 0.15248505675690496, "grad_norm": 7.270974839599677, "learning_rate": 9.611819914346978e-06, "loss": 17.8775, "step": 8342 }, { "epoch": 0.1525033359533515, "grad_norm": 6.80859111730569, "learning_rate": 9.611705550497404e-06, "loss": 17.4734, "step": 8343 }, { "epoch": 0.152521615149798, "grad_norm": 6.633348536040492, "learning_rate": 9.611591170484202e-06, "loss": 17.5199, "step": 8344 }, { "epoch": 0.15253989434624454, "grad_norm": 6.301785897670512, "learning_rate": 9.611476774307773e-06, "loss": 17.3887, "step": 8345 }, { "epoch": 0.15255817354269105, "grad_norm": 7.8771904174235035, "learning_rate": 9.611362361968519e-06, "loss": 18.2089, "step": 8346 }, { "epoch": 0.15257645273913759, "grad_norm": 6.054082585091909, "learning_rate": 9.611247933466838e-06, "loss": 17.1358, "step": 8347 }, { "epoch": 0.15259473193558412, "grad_norm": 6.025642937983709, "learning_rate": 9.611133488803134e-06, "loss": 17.3968, "step": 8348 }, { "epoch": 0.15261301113203063, "grad_norm": 6.454400638632909, "learning_rate": 9.611019027977809e-06, "loss": 17.4408, "step": 8349 }, { "epoch": 0.15263129032847716, "grad_norm": 8.576715940344029, "learning_rate": 9.610904550991262e-06, "loss": 18.3775, "step": 8350 }, { "epoch": 0.15264956952492367, "grad_norm": 6.125045802018629, "learning_rate": 9.610790057843892e-06, "loss": 17.254, "step": 8351 }, { "epoch": 0.1526678487213702, "grad_norm": 6.4307968288897275, "learning_rate": 9.610675548536107e-06, "loss": 17.4323, "step": 8352 }, { "epoch": 0.15268612791781674, "grad_norm": 7.637818889551061, "learning_rate": 9.610561023068301e-06, "loss": 17.7409, "step": 8353 }, { "epoch": 0.15270440711426325, "grad_norm": 8.152296239217721, "learning_rate": 9.61044648144088e-06, "loss": 17.9951, "step": 8354 }, { "epoch": 0.1527226863107098, "grad_norm": 7.319634671798852, "learning_rate": 9.610331923654243e-06, "loss": 18.2639, "step": 8355 }, { "epoch": 0.1527409655071563, "grad_norm": 7.4942349441715175, "learning_rate": 9.610217349708796e-06, "loss": 18.0278, "step": 8356 }, { "epoch": 0.15275924470360283, "grad_norm": 8.099602138005944, "learning_rate": 9.610102759604934e-06, "loss": 18.1418, "step": 8357 }, { "epoch": 0.15277752390004937, "grad_norm": 6.82012127276604, "learning_rate": 9.609988153343064e-06, "loss": 17.4784, "step": 8358 }, { "epoch": 0.15279580309649587, "grad_norm": 7.054166587710082, "learning_rate": 9.609873530923584e-06, "loss": 17.6846, "step": 8359 }, { "epoch": 0.1528140822929424, "grad_norm": 7.558182667582104, "learning_rate": 9.609758892346897e-06, "loss": 17.7426, "step": 8360 }, { "epoch": 0.15283236148938892, "grad_norm": 8.073310691773505, "learning_rate": 9.609644237613407e-06, "loss": 17.7529, "step": 8361 }, { "epoch": 0.15285064068583545, "grad_norm": 7.4418851112152185, "learning_rate": 9.609529566723512e-06, "loss": 18.0476, "step": 8362 }, { "epoch": 0.15286891988228196, "grad_norm": 6.894493856924612, "learning_rate": 9.609414879677617e-06, "loss": 17.5686, "step": 8363 }, { "epoch": 0.1528871990787285, "grad_norm": 7.459148446865004, "learning_rate": 9.609300176476123e-06, "loss": 18.014, "step": 8364 }, { "epoch": 0.15290547827517503, "grad_norm": 6.4473080737450115, "learning_rate": 9.60918545711943e-06, "loss": 17.5612, "step": 8365 }, { "epoch": 0.15292375747162154, "grad_norm": 5.680097380401487, "learning_rate": 9.609070721607943e-06, "loss": 17.1274, "step": 8366 }, { "epoch": 0.15294203666806808, "grad_norm": 6.237437755248435, "learning_rate": 9.608955969942064e-06, "loss": 17.4624, "step": 8367 }, { "epoch": 0.15296031586451458, "grad_norm": 8.264595237228436, "learning_rate": 9.608841202122193e-06, "loss": 18.344, "step": 8368 }, { "epoch": 0.15297859506096112, "grad_norm": 8.77713457863881, "learning_rate": 9.608726418148736e-06, "loss": 17.7051, "step": 8369 }, { "epoch": 0.15299687425740766, "grad_norm": 5.972412339171528, "learning_rate": 9.60861161802209e-06, "loss": 17.4506, "step": 8370 }, { "epoch": 0.15301515345385416, "grad_norm": 7.518788273320577, "learning_rate": 9.60849680174266e-06, "loss": 17.8397, "step": 8371 }, { "epoch": 0.1530334326503007, "grad_norm": 6.5896332317018285, "learning_rate": 9.608381969310851e-06, "loss": 17.5284, "step": 8372 }, { "epoch": 0.1530517118467472, "grad_norm": 7.233193816201468, "learning_rate": 9.608267120727061e-06, "loss": 17.8026, "step": 8373 }, { "epoch": 0.15306999104319374, "grad_norm": 7.96773633243533, "learning_rate": 9.608152255991696e-06, "loss": 17.6385, "step": 8374 }, { "epoch": 0.15308827023964028, "grad_norm": 6.059440494427977, "learning_rate": 9.608037375105157e-06, "loss": 17.675, "step": 8375 }, { "epoch": 0.1531065494360868, "grad_norm": 7.290037755876943, "learning_rate": 9.607922478067845e-06, "loss": 18.0835, "step": 8376 }, { "epoch": 0.15312482863253332, "grad_norm": 7.756126214542827, "learning_rate": 9.607807564880168e-06, "loss": 17.9592, "step": 8377 }, { "epoch": 0.15314310782897983, "grad_norm": 8.972905092985298, "learning_rate": 9.607692635542523e-06, "loss": 18.5581, "step": 8378 }, { "epoch": 0.15316138702542637, "grad_norm": 8.293569395540622, "learning_rate": 9.607577690055316e-06, "loss": 18.278, "step": 8379 }, { "epoch": 0.15317966622187287, "grad_norm": 7.054632744946273, "learning_rate": 9.607462728418948e-06, "loss": 18.0781, "step": 8380 }, { "epoch": 0.1531979454183194, "grad_norm": 7.178135813819243, "learning_rate": 9.607347750633824e-06, "loss": 17.4505, "step": 8381 }, { "epoch": 0.15321622461476594, "grad_norm": 6.747552815412986, "learning_rate": 9.607232756700345e-06, "loss": 17.7336, "step": 8382 }, { "epoch": 0.15323450381121245, "grad_norm": 6.898898806189686, "learning_rate": 9.607117746618916e-06, "loss": 17.9267, "step": 8383 }, { "epoch": 0.153252783007659, "grad_norm": 6.3177247531269884, "learning_rate": 9.607002720389938e-06, "loss": 17.5096, "step": 8384 }, { "epoch": 0.1532710622041055, "grad_norm": 7.065871314133091, "learning_rate": 9.606887678013817e-06, "loss": 17.6314, "step": 8385 }, { "epoch": 0.15328934140055203, "grad_norm": 6.845906887067965, "learning_rate": 9.606772619490952e-06, "loss": 17.5358, "step": 8386 }, { "epoch": 0.15330762059699857, "grad_norm": 5.547321384041577, "learning_rate": 9.60665754482175e-06, "loss": 17.0189, "step": 8387 }, { "epoch": 0.15332589979344508, "grad_norm": 7.777365897968196, "learning_rate": 9.606542454006614e-06, "loss": 17.3437, "step": 8388 }, { "epoch": 0.1533441789898916, "grad_norm": 6.35152422892199, "learning_rate": 9.606427347045945e-06, "loss": 17.3832, "step": 8389 }, { "epoch": 0.15336245818633812, "grad_norm": 7.921700262257623, "learning_rate": 9.606312223940149e-06, "loss": 18.2209, "step": 8390 }, { "epoch": 0.15338073738278465, "grad_norm": 8.568656423301833, "learning_rate": 9.606197084689628e-06, "loss": 17.256, "step": 8391 }, { "epoch": 0.1533990165792312, "grad_norm": 7.973003413749782, "learning_rate": 9.606081929294785e-06, "loss": 18.0763, "step": 8392 }, { "epoch": 0.1534172957756777, "grad_norm": 6.084094784940632, "learning_rate": 9.605966757756025e-06, "loss": 17.402, "step": 8393 }, { "epoch": 0.15343557497212423, "grad_norm": 7.196630568735634, "learning_rate": 9.605851570073751e-06, "loss": 17.8722, "step": 8394 }, { "epoch": 0.15345385416857074, "grad_norm": 7.565946134509732, "learning_rate": 9.605736366248368e-06, "loss": 17.703, "step": 8395 }, { "epoch": 0.15347213336501728, "grad_norm": 7.121086636966807, "learning_rate": 9.605621146280278e-06, "loss": 17.5359, "step": 8396 }, { "epoch": 0.15349041256146378, "grad_norm": 8.071905115767212, "learning_rate": 9.605505910169885e-06, "loss": 18.0466, "step": 8397 }, { "epoch": 0.15350869175791032, "grad_norm": 7.1637062157975135, "learning_rate": 9.605390657917594e-06, "loss": 17.9921, "step": 8398 }, { "epoch": 0.15352697095435686, "grad_norm": 6.79267118405408, "learning_rate": 9.605275389523809e-06, "loss": 17.437, "step": 8399 }, { "epoch": 0.15354525015080336, "grad_norm": 6.152885576362294, "learning_rate": 9.605160104988934e-06, "loss": 17.1758, "step": 8400 }, { "epoch": 0.1535635293472499, "grad_norm": 7.600485582114135, "learning_rate": 9.60504480431337e-06, "loss": 18.0655, "step": 8401 }, { "epoch": 0.1535818085436964, "grad_norm": 8.148246820658834, "learning_rate": 9.604929487497525e-06, "loss": 18.264, "step": 8402 }, { "epoch": 0.15360008774014294, "grad_norm": 7.703202854237343, "learning_rate": 9.604814154541801e-06, "loss": 18.1432, "step": 8403 }, { "epoch": 0.15361836693658948, "grad_norm": 6.731615825436619, "learning_rate": 9.604698805446604e-06, "loss": 17.361, "step": 8404 }, { "epoch": 0.153636646133036, "grad_norm": 7.287228303237335, "learning_rate": 9.604583440212338e-06, "loss": 17.8045, "step": 8405 }, { "epoch": 0.15365492532948252, "grad_norm": 6.349368019047275, "learning_rate": 9.604468058839405e-06, "loss": 17.2851, "step": 8406 }, { "epoch": 0.15367320452592903, "grad_norm": 6.740836440994955, "learning_rate": 9.604352661328212e-06, "loss": 17.4433, "step": 8407 }, { "epoch": 0.15369148372237557, "grad_norm": 6.536896433117541, "learning_rate": 9.604237247679162e-06, "loss": 17.553, "step": 8408 }, { "epoch": 0.1537097629188221, "grad_norm": 7.988312390636531, "learning_rate": 9.60412181789266e-06, "loss": 18.0865, "step": 8409 }, { "epoch": 0.1537280421152686, "grad_norm": 7.7923249945074025, "learning_rate": 9.604006371969111e-06, "loss": 17.7819, "step": 8410 }, { "epoch": 0.15374632131171515, "grad_norm": 7.338200333907157, "learning_rate": 9.603890909908917e-06, "loss": 17.9571, "step": 8411 }, { "epoch": 0.15376460050816165, "grad_norm": 7.761879206895382, "learning_rate": 9.603775431712487e-06, "loss": 17.6326, "step": 8412 }, { "epoch": 0.1537828797046082, "grad_norm": 6.48170202759222, "learning_rate": 9.603659937380223e-06, "loss": 17.4447, "step": 8413 }, { "epoch": 0.1538011589010547, "grad_norm": 7.836939441436887, "learning_rate": 9.60354442691253e-06, "loss": 17.8237, "step": 8414 }, { "epoch": 0.15381943809750123, "grad_norm": 6.719946252733968, "learning_rate": 9.603428900309815e-06, "loss": 17.6225, "step": 8415 }, { "epoch": 0.15383771729394777, "grad_norm": 7.542350517608685, "learning_rate": 9.60331335757248e-06, "loss": 17.9143, "step": 8416 }, { "epoch": 0.15385599649039428, "grad_norm": 6.557273227773028, "learning_rate": 9.60319779870093e-06, "loss": 17.6563, "step": 8417 }, { "epoch": 0.1538742756868408, "grad_norm": 7.687704941739925, "learning_rate": 9.603082223695572e-06, "loss": 17.8355, "step": 8418 }, { "epoch": 0.15389255488328732, "grad_norm": 5.9273300557097, "learning_rate": 9.602966632556812e-06, "loss": 17.266, "step": 8419 }, { "epoch": 0.15391083407973385, "grad_norm": 7.122823065006941, "learning_rate": 9.602851025285052e-06, "loss": 17.7552, "step": 8420 }, { "epoch": 0.1539291132761804, "grad_norm": 6.624075396223145, "learning_rate": 9.602735401880699e-06, "loss": 17.7015, "step": 8421 }, { "epoch": 0.1539473924726269, "grad_norm": 6.480000062232727, "learning_rate": 9.602619762344156e-06, "loss": 17.4433, "step": 8422 }, { "epoch": 0.15396567166907343, "grad_norm": 7.189402278528076, "learning_rate": 9.602504106675832e-06, "loss": 17.7305, "step": 8423 }, { "epoch": 0.15398395086551994, "grad_norm": 8.228315896538703, "learning_rate": 9.60238843487613e-06, "loss": 18.2283, "step": 8424 }, { "epoch": 0.15400223006196648, "grad_norm": 6.209607023376287, "learning_rate": 9.602272746945455e-06, "loss": 17.4764, "step": 8425 }, { "epoch": 0.154020509258413, "grad_norm": 7.474945940669149, "learning_rate": 9.602157042884214e-06, "loss": 18.2579, "step": 8426 }, { "epoch": 0.15403878845485952, "grad_norm": 8.210926673852006, "learning_rate": 9.602041322692811e-06, "loss": 17.5434, "step": 8427 }, { "epoch": 0.15405706765130606, "grad_norm": 6.769935715137518, "learning_rate": 9.601925586371655e-06, "loss": 17.7453, "step": 8428 }, { "epoch": 0.15407534684775256, "grad_norm": 6.101879424645034, "learning_rate": 9.601809833921148e-06, "loss": 17.1897, "step": 8429 }, { "epoch": 0.1540936260441991, "grad_norm": 7.916061720624111, "learning_rate": 9.601694065341697e-06, "loss": 18.0381, "step": 8430 }, { "epoch": 0.1541119052406456, "grad_norm": 8.024275521095763, "learning_rate": 9.601578280633707e-06, "loss": 18.1038, "step": 8431 }, { "epoch": 0.15413018443709214, "grad_norm": 6.076529789367406, "learning_rate": 9.601462479797585e-06, "loss": 17.3861, "step": 8432 }, { "epoch": 0.15414846363353868, "grad_norm": 6.127567418286736, "learning_rate": 9.601346662833735e-06, "loss": 17.5513, "step": 8433 }, { "epoch": 0.1541667428299852, "grad_norm": 7.639056821110453, "learning_rate": 9.601230829742566e-06, "loss": 17.6508, "step": 8434 }, { "epoch": 0.15418502202643172, "grad_norm": 5.856297758944837, "learning_rate": 9.601114980524481e-06, "loss": 17.1349, "step": 8435 }, { "epoch": 0.15420330122287823, "grad_norm": 7.046360103050307, "learning_rate": 9.600999115179888e-06, "loss": 17.7374, "step": 8436 }, { "epoch": 0.15422158041932477, "grad_norm": 8.808883027907436, "learning_rate": 9.600883233709192e-06, "loss": 18.8813, "step": 8437 }, { "epoch": 0.1542398596157713, "grad_norm": 6.65821248202838, "learning_rate": 9.6007673361128e-06, "loss": 17.6938, "step": 8438 }, { "epoch": 0.1542581388122178, "grad_norm": 8.040611572705162, "learning_rate": 9.600651422391116e-06, "loss": 17.9305, "step": 8439 }, { "epoch": 0.15427641800866435, "grad_norm": 6.922120606496327, "learning_rate": 9.600535492544551e-06, "loss": 17.6077, "step": 8440 }, { "epoch": 0.15429469720511085, "grad_norm": 6.183653454314233, "learning_rate": 9.600419546573506e-06, "loss": 17.3557, "step": 8441 }, { "epoch": 0.1543129764015574, "grad_norm": 7.595367575093661, "learning_rate": 9.60030358447839e-06, "loss": 18.0367, "step": 8442 }, { "epoch": 0.15433125559800392, "grad_norm": 5.957698249849723, "learning_rate": 9.60018760625961e-06, "loss": 17.3916, "step": 8443 }, { "epoch": 0.15434953479445043, "grad_norm": 7.12068244520129, "learning_rate": 9.60007161191757e-06, "loss": 17.7851, "step": 8444 }, { "epoch": 0.15436781399089697, "grad_norm": 6.939791736338509, "learning_rate": 9.59995560145268e-06, "loss": 17.8164, "step": 8445 }, { "epoch": 0.15438609318734348, "grad_norm": 6.039847122761099, "learning_rate": 9.599839574865345e-06, "loss": 17.2839, "step": 8446 }, { "epoch": 0.15440437238379, "grad_norm": 7.360061800195574, "learning_rate": 9.59972353215597e-06, "loss": 17.5683, "step": 8447 }, { "epoch": 0.15442265158023652, "grad_norm": 7.856934463667765, "learning_rate": 9.599607473324963e-06, "loss": 17.6077, "step": 8448 }, { "epoch": 0.15444093077668306, "grad_norm": 5.795155529479495, "learning_rate": 9.599491398372731e-06, "loss": 17.2855, "step": 8449 }, { "epoch": 0.1544592099731296, "grad_norm": 6.385509459357291, "learning_rate": 9.599375307299682e-06, "loss": 17.6995, "step": 8450 }, { "epoch": 0.1544774891695761, "grad_norm": 7.730295345935798, "learning_rate": 9.59925920010622e-06, "loss": 17.8271, "step": 8451 }, { "epoch": 0.15449576836602263, "grad_norm": 6.656709036539596, "learning_rate": 9.599143076792756e-06, "loss": 17.5784, "step": 8452 }, { "epoch": 0.15451404756246914, "grad_norm": 6.494362497958026, "learning_rate": 9.599026937359694e-06, "loss": 17.5043, "step": 8453 }, { "epoch": 0.15453232675891568, "grad_norm": 7.926585516267862, "learning_rate": 9.59891078180744e-06, "loss": 17.8609, "step": 8454 }, { "epoch": 0.1545506059553622, "grad_norm": 7.2283692594229745, "learning_rate": 9.598794610136405e-06, "loss": 17.9943, "step": 8455 }, { "epoch": 0.15456888515180872, "grad_norm": 8.839924010422484, "learning_rate": 9.598678422346992e-06, "loss": 18.5454, "step": 8456 }, { "epoch": 0.15458716434825526, "grad_norm": 6.560765416062664, "learning_rate": 9.598562218439612e-06, "loss": 17.6048, "step": 8457 }, { "epoch": 0.15460544354470176, "grad_norm": 5.979901552116303, "learning_rate": 9.598445998414668e-06, "loss": 17.2934, "step": 8458 }, { "epoch": 0.1546237227411483, "grad_norm": 7.905131329432131, "learning_rate": 9.598329762272571e-06, "loss": 18.2834, "step": 8459 }, { "epoch": 0.15464200193759484, "grad_norm": 7.238376643054584, "learning_rate": 9.598213510013728e-06, "loss": 17.7917, "step": 8460 }, { "epoch": 0.15466028113404134, "grad_norm": 8.377683188548191, "learning_rate": 9.598097241638544e-06, "loss": 18.7146, "step": 8461 }, { "epoch": 0.15467856033048788, "grad_norm": 8.320108141643493, "learning_rate": 9.59798095714743e-06, "loss": 18.2979, "step": 8462 }, { "epoch": 0.1546968395269344, "grad_norm": 5.638796431764524, "learning_rate": 9.597864656540789e-06, "loss": 17.1185, "step": 8463 }, { "epoch": 0.15471511872338092, "grad_norm": 7.106385638757528, "learning_rate": 9.597748339819035e-06, "loss": 17.7804, "step": 8464 }, { "epoch": 0.15473339791982743, "grad_norm": 6.978364375188652, "learning_rate": 9.597632006982569e-06, "loss": 17.5941, "step": 8465 }, { "epoch": 0.15475167711627397, "grad_norm": 5.698546235618529, "learning_rate": 9.597515658031804e-06, "loss": 17.3585, "step": 8466 }, { "epoch": 0.1547699563127205, "grad_norm": 8.112020435876998, "learning_rate": 9.597399292967144e-06, "loss": 18.0897, "step": 8467 }, { "epoch": 0.154788235509167, "grad_norm": 7.282222268329545, "learning_rate": 9.597282911789e-06, "loss": 18.0584, "step": 8468 }, { "epoch": 0.15480651470561355, "grad_norm": 7.765696488224575, "learning_rate": 9.597166514497777e-06, "loss": 17.7791, "step": 8469 }, { "epoch": 0.15482479390206005, "grad_norm": 6.515923023581861, "learning_rate": 9.597050101093885e-06, "loss": 17.3934, "step": 8470 }, { "epoch": 0.1548430730985066, "grad_norm": 7.251663888841731, "learning_rate": 9.596933671577731e-06, "loss": 17.911, "step": 8471 }, { "epoch": 0.15486135229495313, "grad_norm": 6.547575766660995, "learning_rate": 9.596817225949722e-06, "loss": 17.2965, "step": 8472 }, { "epoch": 0.15487963149139963, "grad_norm": 7.60869051294314, "learning_rate": 9.59670076421027e-06, "loss": 17.9197, "step": 8473 }, { "epoch": 0.15489791068784617, "grad_norm": 6.14338822215821, "learning_rate": 9.59658428635978e-06, "loss": 17.379, "step": 8474 }, { "epoch": 0.15491618988429268, "grad_norm": 6.5197838892209194, "learning_rate": 9.59646779239866e-06, "loss": 17.4835, "step": 8475 }, { "epoch": 0.1549344690807392, "grad_norm": 6.654093881851663, "learning_rate": 9.59635128232732e-06, "loss": 17.7212, "step": 8476 }, { "epoch": 0.15495274827718575, "grad_norm": 7.3700092470496195, "learning_rate": 9.596234756146167e-06, "loss": 17.4965, "step": 8477 }, { "epoch": 0.15497102747363226, "grad_norm": 6.659004832144627, "learning_rate": 9.596118213855611e-06, "loss": 17.415, "step": 8478 }, { "epoch": 0.1549893066700788, "grad_norm": 6.1720652632126685, "learning_rate": 9.596001655456059e-06, "loss": 17.6741, "step": 8479 }, { "epoch": 0.1550075858665253, "grad_norm": 7.032000921413487, "learning_rate": 9.59588508094792e-06, "loss": 18.3352, "step": 8480 }, { "epoch": 0.15502586506297183, "grad_norm": 6.243013315635809, "learning_rate": 9.595768490331603e-06, "loss": 17.2966, "step": 8481 }, { "epoch": 0.15504414425941834, "grad_norm": 7.426378919149949, "learning_rate": 9.595651883607514e-06, "loss": 17.4242, "step": 8482 }, { "epoch": 0.15506242345586488, "grad_norm": 7.1075562450734235, "learning_rate": 9.595535260776066e-06, "loss": 17.8095, "step": 8483 }, { "epoch": 0.15508070265231141, "grad_norm": 5.365278674569126, "learning_rate": 9.595418621837667e-06, "loss": 17.0794, "step": 8484 }, { "epoch": 0.15509898184875792, "grad_norm": 6.349419628952927, "learning_rate": 9.595301966792722e-06, "loss": 17.5486, "step": 8485 }, { "epoch": 0.15511726104520446, "grad_norm": 7.738224246855898, "learning_rate": 9.595185295641644e-06, "loss": 18.1471, "step": 8486 }, { "epoch": 0.15513554024165097, "grad_norm": 7.300456034374533, "learning_rate": 9.59506860838484e-06, "loss": 17.767, "step": 8487 }, { "epoch": 0.1551538194380975, "grad_norm": 6.674806091221157, "learning_rate": 9.594951905022718e-06, "loss": 17.3993, "step": 8488 }, { "epoch": 0.15517209863454404, "grad_norm": 5.689164721165032, "learning_rate": 9.594835185555688e-06, "loss": 17.0957, "step": 8489 }, { "epoch": 0.15519037783099054, "grad_norm": 7.221146427191833, "learning_rate": 9.594718449984162e-06, "loss": 17.7497, "step": 8490 }, { "epoch": 0.15520865702743708, "grad_norm": 6.531405122433829, "learning_rate": 9.594601698308545e-06, "loss": 17.5837, "step": 8491 }, { "epoch": 0.1552269362238836, "grad_norm": 7.893076460747014, "learning_rate": 9.594484930529248e-06, "loss": 18.0133, "step": 8492 }, { "epoch": 0.15524521542033012, "grad_norm": 6.635984489532212, "learning_rate": 9.59436814664668e-06, "loss": 17.7828, "step": 8493 }, { "epoch": 0.15526349461677666, "grad_norm": 7.302024162710131, "learning_rate": 9.594251346661249e-06, "loss": 17.8628, "step": 8494 }, { "epoch": 0.15528177381322317, "grad_norm": 6.549778065930616, "learning_rate": 9.594134530573367e-06, "loss": 17.3769, "step": 8495 }, { "epoch": 0.1553000530096697, "grad_norm": 7.195584202554672, "learning_rate": 9.594017698383442e-06, "loss": 17.7915, "step": 8496 }, { "epoch": 0.1553183322061162, "grad_norm": 9.34243831281511, "learning_rate": 9.593900850091885e-06, "loss": 18.2791, "step": 8497 }, { "epoch": 0.15533661140256275, "grad_norm": 5.988360475293336, "learning_rate": 9.593783985699101e-06, "loss": 17.5919, "step": 8498 }, { "epoch": 0.15535489059900925, "grad_norm": 6.715506159691648, "learning_rate": 9.593667105205506e-06, "loss": 17.8342, "step": 8499 }, { "epoch": 0.1553731697954558, "grad_norm": 6.318911904885308, "learning_rate": 9.593550208611505e-06, "loss": 17.4947, "step": 8500 }, { "epoch": 0.15539144899190233, "grad_norm": 8.154867457940789, "learning_rate": 9.59343329591751e-06, "loss": 18.4076, "step": 8501 }, { "epoch": 0.15540972818834883, "grad_norm": 6.3570347394540265, "learning_rate": 9.593316367123928e-06, "loss": 17.2772, "step": 8502 }, { "epoch": 0.15542800738479537, "grad_norm": 8.662408364427396, "learning_rate": 9.593199422231173e-06, "loss": 18.6454, "step": 8503 }, { "epoch": 0.15544628658124188, "grad_norm": 6.591927918941316, "learning_rate": 9.59308246123965e-06, "loss": 17.6183, "step": 8504 }, { "epoch": 0.1554645657776884, "grad_norm": 7.350871149854708, "learning_rate": 9.592965484149772e-06, "loss": 17.6501, "step": 8505 }, { "epoch": 0.15548284497413495, "grad_norm": 6.2228465194042615, "learning_rate": 9.59284849096195e-06, "loss": 17.3951, "step": 8506 }, { "epoch": 0.15550112417058146, "grad_norm": 7.340207689427119, "learning_rate": 9.592731481676592e-06, "loss": 17.8985, "step": 8507 }, { "epoch": 0.155519403367028, "grad_norm": 5.860694243527434, "learning_rate": 9.59261445629411e-06, "loss": 17.2409, "step": 8508 }, { "epoch": 0.1555376825634745, "grad_norm": 8.852024998275656, "learning_rate": 9.592497414814911e-06, "loss": 17.4417, "step": 8509 }, { "epoch": 0.15555596175992104, "grad_norm": 8.071592123190793, "learning_rate": 9.592380357239408e-06, "loss": 17.9372, "step": 8510 }, { "epoch": 0.15557424095636757, "grad_norm": 5.926252834765691, "learning_rate": 9.592263283568008e-06, "loss": 17.1788, "step": 8511 }, { "epoch": 0.15559252015281408, "grad_norm": 6.7011376664875915, "learning_rate": 9.592146193801127e-06, "loss": 17.6574, "step": 8512 }, { "epoch": 0.15561079934926061, "grad_norm": 6.303016037431096, "learning_rate": 9.59202908793917e-06, "loss": 17.4543, "step": 8513 }, { "epoch": 0.15562907854570712, "grad_norm": 7.412547093100808, "learning_rate": 9.59191196598255e-06, "loss": 18.2372, "step": 8514 }, { "epoch": 0.15564735774215366, "grad_norm": 6.53962961001332, "learning_rate": 9.591794827931679e-06, "loss": 17.4454, "step": 8515 }, { "epoch": 0.15566563693860017, "grad_norm": 5.892445713863878, "learning_rate": 9.591677673786963e-06, "loss": 17.3634, "step": 8516 }, { "epoch": 0.1556839161350467, "grad_norm": 6.433111180533362, "learning_rate": 9.591560503548816e-06, "loss": 17.5807, "step": 8517 }, { "epoch": 0.15570219533149324, "grad_norm": 7.891231384464062, "learning_rate": 9.591443317217647e-06, "loss": 18.1661, "step": 8518 }, { "epoch": 0.15572047452793975, "grad_norm": 6.588605284400302, "learning_rate": 9.591326114793871e-06, "loss": 17.68, "step": 8519 }, { "epoch": 0.15573875372438628, "grad_norm": 6.726872242898803, "learning_rate": 9.591208896277892e-06, "loss": 17.7196, "step": 8520 }, { "epoch": 0.1557570329208328, "grad_norm": 6.676150957695583, "learning_rate": 9.591091661670125e-06, "loss": 17.7211, "step": 8521 }, { "epoch": 0.15577531211727932, "grad_norm": 6.354196660907414, "learning_rate": 9.590974410970981e-06, "loss": 17.5494, "step": 8522 }, { "epoch": 0.15579359131372586, "grad_norm": 8.81674414046929, "learning_rate": 9.59085714418087e-06, "loss": 18.3573, "step": 8523 }, { "epoch": 0.15581187051017237, "grad_norm": 6.622294384275694, "learning_rate": 9.590739861300202e-06, "loss": 17.6632, "step": 8524 }, { "epoch": 0.1558301497066189, "grad_norm": 5.559659622657357, "learning_rate": 9.59062256232939e-06, "loss": 17.2316, "step": 8525 }, { "epoch": 0.1558484289030654, "grad_norm": 7.307350725079425, "learning_rate": 9.590505247268842e-06, "loss": 18.2184, "step": 8526 }, { "epoch": 0.15586670809951195, "grad_norm": 6.817131197650809, "learning_rate": 9.590387916118975e-06, "loss": 17.5981, "step": 8527 }, { "epoch": 0.15588498729595848, "grad_norm": 7.096014670023533, "learning_rate": 9.590270568880194e-06, "loss": 18.1983, "step": 8528 }, { "epoch": 0.155903266492405, "grad_norm": 7.992282933840935, "learning_rate": 9.590153205552914e-06, "loss": 18.0549, "step": 8529 }, { "epoch": 0.15592154568885153, "grad_norm": 6.723702128700275, "learning_rate": 9.590035826137546e-06, "loss": 17.6164, "step": 8530 }, { "epoch": 0.15593982488529803, "grad_norm": 6.69807799183464, "learning_rate": 9.5899184306345e-06, "loss": 17.5677, "step": 8531 }, { "epoch": 0.15595810408174457, "grad_norm": 7.331194963743321, "learning_rate": 9.589801019044188e-06, "loss": 17.8097, "step": 8532 }, { "epoch": 0.15597638327819108, "grad_norm": 6.734347726978225, "learning_rate": 9.589683591367022e-06, "loss": 17.5873, "step": 8533 }, { "epoch": 0.1559946624746376, "grad_norm": 11.856050505777045, "learning_rate": 9.589566147603413e-06, "loss": 17.5046, "step": 8534 }, { "epoch": 0.15601294167108415, "grad_norm": 7.222427471083795, "learning_rate": 9.589448687753773e-06, "loss": 17.6746, "step": 8535 }, { "epoch": 0.15603122086753066, "grad_norm": 7.3315001487201705, "learning_rate": 9.589331211818515e-06, "loss": 17.8191, "step": 8536 }, { "epoch": 0.1560495000639772, "grad_norm": 7.039387644346871, "learning_rate": 9.589213719798048e-06, "loss": 17.7906, "step": 8537 }, { "epoch": 0.1560677792604237, "grad_norm": 6.956771012404695, "learning_rate": 9.589096211692785e-06, "loss": 17.8115, "step": 8538 }, { "epoch": 0.15608605845687024, "grad_norm": 7.537196469773764, "learning_rate": 9.588978687503139e-06, "loss": 18.0115, "step": 8539 }, { "epoch": 0.15610433765331677, "grad_norm": 7.644245909337144, "learning_rate": 9.588861147229521e-06, "loss": 17.8212, "step": 8540 }, { "epoch": 0.15612261684976328, "grad_norm": 8.022592212437466, "learning_rate": 9.58874359087234e-06, "loss": 18.2235, "step": 8541 }, { "epoch": 0.15614089604620981, "grad_norm": 7.173768077180046, "learning_rate": 9.588626018432014e-06, "loss": 17.6495, "step": 8542 }, { "epoch": 0.15615917524265632, "grad_norm": 6.66266495172659, "learning_rate": 9.58850842990895e-06, "loss": 17.455, "step": 8543 }, { "epoch": 0.15617745443910286, "grad_norm": 5.842986755428583, "learning_rate": 9.588390825303564e-06, "loss": 17.1366, "step": 8544 }, { "epoch": 0.1561957336355494, "grad_norm": 7.2621323602948715, "learning_rate": 9.588273204616266e-06, "loss": 17.7071, "step": 8545 }, { "epoch": 0.1562140128319959, "grad_norm": 5.88392891932802, "learning_rate": 9.588155567847469e-06, "loss": 17.0827, "step": 8546 }, { "epoch": 0.15623229202844244, "grad_norm": 7.344325399835583, "learning_rate": 9.588037914997582e-06, "loss": 17.7232, "step": 8547 }, { "epoch": 0.15625057122488895, "grad_norm": 5.859663652010187, "learning_rate": 9.587920246067022e-06, "loss": 17.3981, "step": 8548 }, { "epoch": 0.15626885042133548, "grad_norm": 6.56078940492395, "learning_rate": 9.5878025610562e-06, "loss": 17.5337, "step": 8549 }, { "epoch": 0.156287129617782, "grad_norm": 7.681235712099995, "learning_rate": 9.587684859965529e-06, "loss": 17.8948, "step": 8550 }, { "epoch": 0.15630540881422852, "grad_norm": 7.524388588177178, "learning_rate": 9.587567142795419e-06, "loss": 17.806, "step": 8551 }, { "epoch": 0.15632368801067506, "grad_norm": 8.154780063802253, "learning_rate": 9.587449409546284e-06, "loss": 18.2139, "step": 8552 }, { "epoch": 0.15634196720712157, "grad_norm": 8.234716189725752, "learning_rate": 9.587331660218537e-06, "loss": 17.9248, "step": 8553 }, { "epoch": 0.1563602464035681, "grad_norm": 6.719842263902505, "learning_rate": 9.587213894812593e-06, "loss": 17.6152, "step": 8554 }, { "epoch": 0.1563785256000146, "grad_norm": 6.783205588697717, "learning_rate": 9.58709611332886e-06, "loss": 17.7465, "step": 8555 }, { "epoch": 0.15639680479646115, "grad_norm": 7.795942526111688, "learning_rate": 9.586978315767755e-06, "loss": 18.381, "step": 8556 }, { "epoch": 0.15641508399290768, "grad_norm": 7.623458633933016, "learning_rate": 9.586860502129686e-06, "loss": 17.8816, "step": 8557 }, { "epoch": 0.1564333631893542, "grad_norm": 6.789083733935318, "learning_rate": 9.586742672415073e-06, "loss": 17.8753, "step": 8558 }, { "epoch": 0.15645164238580073, "grad_norm": 7.157241078959216, "learning_rate": 9.586624826624322e-06, "loss": 17.9102, "step": 8559 }, { "epoch": 0.15646992158224723, "grad_norm": 5.642902572370437, "learning_rate": 9.586506964757849e-06, "loss": 17.266, "step": 8560 }, { "epoch": 0.15648820077869377, "grad_norm": 7.0361229462686525, "learning_rate": 9.586389086816068e-06, "loss": 17.6732, "step": 8561 }, { "epoch": 0.1565064799751403, "grad_norm": 8.400883349565074, "learning_rate": 9.586271192799392e-06, "loss": 18.2939, "step": 8562 }, { "epoch": 0.1565247591715868, "grad_norm": 7.9279995310918, "learning_rate": 9.586153282708233e-06, "loss": 18.1139, "step": 8563 }, { "epoch": 0.15654303836803335, "grad_norm": 5.758986063647926, "learning_rate": 9.586035356543005e-06, "loss": 17.2393, "step": 8564 }, { "epoch": 0.15656131756447986, "grad_norm": 6.874973190523697, "learning_rate": 9.585917414304119e-06, "loss": 17.6617, "step": 8565 }, { "epoch": 0.1565795967609264, "grad_norm": 8.324872630273362, "learning_rate": 9.58579945599199e-06, "loss": 18.464, "step": 8566 }, { "epoch": 0.1565978759573729, "grad_norm": 7.410527394539033, "learning_rate": 9.585681481607035e-06, "loss": 18.2697, "step": 8567 }, { "epoch": 0.15661615515381944, "grad_norm": 7.343080238999604, "learning_rate": 9.585563491149663e-06, "loss": 18.1351, "step": 8568 }, { "epoch": 0.15663443435026597, "grad_norm": 7.575183517132464, "learning_rate": 9.585445484620288e-06, "loss": 17.939, "step": 8569 }, { "epoch": 0.15665271354671248, "grad_norm": 8.1357613990169, "learning_rate": 9.585327462019327e-06, "loss": 18.5413, "step": 8570 }, { "epoch": 0.15667099274315902, "grad_norm": 6.779577490105369, "learning_rate": 9.585209423347188e-06, "loss": 17.4974, "step": 8571 }, { "epoch": 0.15668927193960552, "grad_norm": 8.07894326475309, "learning_rate": 9.58509136860429e-06, "loss": 17.9928, "step": 8572 }, { "epoch": 0.15670755113605206, "grad_norm": 7.0722506209810945, "learning_rate": 9.584973297791045e-06, "loss": 17.9897, "step": 8573 }, { "epoch": 0.1567258303324986, "grad_norm": 6.438500881550452, "learning_rate": 9.584855210907864e-06, "loss": 17.5617, "step": 8574 }, { "epoch": 0.1567441095289451, "grad_norm": 6.270999339681938, "learning_rate": 9.584737107955165e-06, "loss": 17.4895, "step": 8575 }, { "epoch": 0.15676238872539164, "grad_norm": 7.517518615524435, "learning_rate": 9.58461898893336e-06, "loss": 17.9209, "step": 8576 }, { "epoch": 0.15678066792183815, "grad_norm": 6.540675387314155, "learning_rate": 9.584500853842865e-06, "loss": 17.7416, "step": 8577 }, { "epoch": 0.15679894711828468, "grad_norm": 6.896809487961548, "learning_rate": 9.58438270268409e-06, "loss": 18.0654, "step": 8578 }, { "epoch": 0.15681722631473122, "grad_norm": 6.9106355894783436, "learning_rate": 9.58426453545745e-06, "loss": 17.3348, "step": 8579 }, { "epoch": 0.15683550551117773, "grad_norm": 6.73826832223587, "learning_rate": 9.584146352163365e-06, "loss": 17.4553, "step": 8580 }, { "epoch": 0.15685378470762426, "grad_norm": 7.9892625504157015, "learning_rate": 9.58402815280224e-06, "loss": 18.3169, "step": 8581 }, { "epoch": 0.15687206390407077, "grad_norm": 6.190261729230549, "learning_rate": 9.583909937374498e-06, "loss": 17.3757, "step": 8582 }, { "epoch": 0.1568903431005173, "grad_norm": 6.502355971704945, "learning_rate": 9.583791705880548e-06, "loss": 17.386, "step": 8583 }, { "epoch": 0.1569086222969638, "grad_norm": 5.691456524327374, "learning_rate": 9.583673458320806e-06, "loss": 17.205, "step": 8584 }, { "epoch": 0.15692690149341035, "grad_norm": 8.101221139386617, "learning_rate": 9.583555194695686e-06, "loss": 18.3278, "step": 8585 }, { "epoch": 0.15694518068985688, "grad_norm": 7.13921274085771, "learning_rate": 9.583436915005602e-06, "loss": 17.7824, "step": 8586 }, { "epoch": 0.1569634598863034, "grad_norm": 7.508935075788792, "learning_rate": 9.583318619250973e-06, "loss": 17.8597, "step": 8587 }, { "epoch": 0.15698173908274993, "grad_norm": 6.290705774851627, "learning_rate": 9.583200307432206e-06, "loss": 17.4956, "step": 8588 }, { "epoch": 0.15700001827919643, "grad_norm": 7.18031074118022, "learning_rate": 9.58308197954972e-06, "loss": 17.493, "step": 8589 }, { "epoch": 0.15701829747564297, "grad_norm": 7.055950999653975, "learning_rate": 9.58296363560393e-06, "loss": 17.7738, "step": 8590 }, { "epoch": 0.1570365766720895, "grad_norm": 6.898125200720675, "learning_rate": 9.582845275595252e-06, "loss": 17.6118, "step": 8591 }, { "epoch": 0.15705485586853601, "grad_norm": 6.822839256959291, "learning_rate": 9.582726899524096e-06, "loss": 17.5974, "step": 8592 }, { "epoch": 0.15707313506498255, "grad_norm": 7.498924502404051, "learning_rate": 9.582608507390883e-06, "loss": 17.9733, "step": 8593 }, { "epoch": 0.15709141426142906, "grad_norm": 6.109250911940779, "learning_rate": 9.582490099196023e-06, "loss": 17.4529, "step": 8594 }, { "epoch": 0.1571096934578756, "grad_norm": 7.184334856210555, "learning_rate": 9.582371674939932e-06, "loss": 17.7924, "step": 8595 }, { "epoch": 0.15712797265432213, "grad_norm": 6.6232562938971755, "learning_rate": 9.582253234623027e-06, "loss": 17.388, "step": 8596 }, { "epoch": 0.15714625185076864, "grad_norm": 7.165670758294181, "learning_rate": 9.582134778245722e-06, "loss": 17.7685, "step": 8597 }, { "epoch": 0.15716453104721517, "grad_norm": 6.425743050384466, "learning_rate": 9.582016305808433e-06, "loss": 17.3309, "step": 8598 }, { "epoch": 0.15718281024366168, "grad_norm": 6.040470941431053, "learning_rate": 9.581897817311571e-06, "loss": 17.4582, "step": 8599 }, { "epoch": 0.15720108944010822, "grad_norm": 6.525849640296614, "learning_rate": 9.58177931275556e-06, "loss": 17.5882, "step": 8600 }, { "epoch": 0.15721936863655472, "grad_norm": 6.830962980849063, "learning_rate": 9.581660792140807e-06, "loss": 17.5274, "step": 8601 }, { "epoch": 0.15723764783300126, "grad_norm": 5.97855634507643, "learning_rate": 9.58154225546773e-06, "loss": 17.4051, "step": 8602 }, { "epoch": 0.1572559270294478, "grad_norm": 8.005231048138352, "learning_rate": 9.581423702736747e-06, "loss": 18.2805, "step": 8603 }, { "epoch": 0.1572742062258943, "grad_norm": 6.965833565128532, "learning_rate": 9.581305133948269e-06, "loss": 17.9589, "step": 8604 }, { "epoch": 0.15729248542234084, "grad_norm": 8.307702529661622, "learning_rate": 9.581186549102717e-06, "loss": 18.2579, "step": 8605 }, { "epoch": 0.15731076461878735, "grad_norm": 6.439174824316372, "learning_rate": 9.581067948200503e-06, "loss": 17.3737, "step": 8606 }, { "epoch": 0.15732904381523388, "grad_norm": 7.123205864341623, "learning_rate": 9.580949331242042e-06, "loss": 17.5323, "step": 8607 }, { "epoch": 0.15734732301168042, "grad_norm": 7.049663918196313, "learning_rate": 9.58083069822775e-06, "loss": 17.8473, "step": 8608 }, { "epoch": 0.15736560220812693, "grad_norm": 6.139725136135639, "learning_rate": 9.580712049158046e-06, "loss": 17.2127, "step": 8609 }, { "epoch": 0.15738388140457346, "grad_norm": 8.677418539267073, "learning_rate": 9.580593384033343e-06, "loss": 18.3846, "step": 8610 }, { "epoch": 0.15740216060101997, "grad_norm": 7.20631993395279, "learning_rate": 9.580474702854058e-06, "loss": 18.1721, "step": 8611 }, { "epoch": 0.1574204397974665, "grad_norm": 6.5505428804457795, "learning_rate": 9.580356005620608e-06, "loss": 17.4097, "step": 8612 }, { "epoch": 0.15743871899391304, "grad_norm": 7.123841848182562, "learning_rate": 9.580237292333406e-06, "loss": 17.8687, "step": 8613 }, { "epoch": 0.15745699819035955, "grad_norm": 6.39697252165992, "learning_rate": 9.580118562992868e-06, "loss": 17.3785, "step": 8614 }, { "epoch": 0.15747527738680608, "grad_norm": 7.070870038480495, "learning_rate": 9.579999817599415e-06, "loss": 17.692, "step": 8615 }, { "epoch": 0.1574935565832526, "grad_norm": 6.26053032908961, "learning_rate": 9.579881056153459e-06, "loss": 17.2914, "step": 8616 }, { "epoch": 0.15751183577969913, "grad_norm": 6.697145280584922, "learning_rate": 9.579762278655417e-06, "loss": 17.5871, "step": 8617 }, { "epoch": 0.15753011497614564, "grad_norm": 7.715165824108414, "learning_rate": 9.579643485105706e-06, "loss": 18.0104, "step": 8618 }, { "epoch": 0.15754839417259217, "grad_norm": 6.722127511425423, "learning_rate": 9.579524675504743e-06, "loss": 17.7836, "step": 8619 }, { "epoch": 0.1575666733690387, "grad_norm": 8.465236155753898, "learning_rate": 9.579405849852942e-06, "loss": 18.3863, "step": 8620 }, { "epoch": 0.15758495256548521, "grad_norm": 7.765050473720229, "learning_rate": 9.579287008150721e-06, "loss": 18.0953, "step": 8621 }, { "epoch": 0.15760323176193175, "grad_norm": 7.757385280429804, "learning_rate": 9.579168150398496e-06, "loss": 18.1099, "step": 8622 }, { "epoch": 0.15762151095837826, "grad_norm": 6.575447720202369, "learning_rate": 9.579049276596684e-06, "loss": 17.8145, "step": 8623 }, { "epoch": 0.1576397901548248, "grad_norm": 7.214456986634153, "learning_rate": 9.578930386745704e-06, "loss": 17.5937, "step": 8624 }, { "epoch": 0.15765806935127133, "grad_norm": 6.66633688657013, "learning_rate": 9.578811480845968e-06, "loss": 17.6349, "step": 8625 }, { "epoch": 0.15767634854771784, "grad_norm": 8.18054623746064, "learning_rate": 9.578692558897895e-06, "loss": 18.2933, "step": 8626 }, { "epoch": 0.15769462774416437, "grad_norm": 5.721921894328776, "learning_rate": 9.578573620901903e-06, "loss": 17.0205, "step": 8627 }, { "epoch": 0.15771290694061088, "grad_norm": 6.523186426909358, "learning_rate": 9.578454666858408e-06, "loss": 17.5072, "step": 8628 }, { "epoch": 0.15773118613705742, "grad_norm": 6.5150941377836205, "learning_rate": 9.578335696767825e-06, "loss": 17.5661, "step": 8629 }, { "epoch": 0.15774946533350395, "grad_norm": 6.904503266798888, "learning_rate": 9.578216710630574e-06, "loss": 17.4964, "step": 8630 }, { "epoch": 0.15776774452995046, "grad_norm": 6.901044540590434, "learning_rate": 9.57809770844707e-06, "loss": 17.551, "step": 8631 }, { "epoch": 0.157786023726397, "grad_norm": 6.8625124003165885, "learning_rate": 9.577978690217732e-06, "loss": 17.514, "step": 8632 }, { "epoch": 0.1578043029228435, "grad_norm": 6.79711657068216, "learning_rate": 9.577859655942975e-06, "loss": 17.5851, "step": 8633 }, { "epoch": 0.15782258211929004, "grad_norm": 7.428381409221911, "learning_rate": 9.577740605623218e-06, "loss": 18.121, "step": 8634 }, { "epoch": 0.15784086131573655, "grad_norm": 9.082209566902788, "learning_rate": 9.577621539258876e-06, "loss": 18.3479, "step": 8635 }, { "epoch": 0.15785914051218308, "grad_norm": 6.739861152037539, "learning_rate": 9.577502456850368e-06, "loss": 17.7048, "step": 8636 }, { "epoch": 0.15787741970862962, "grad_norm": 6.52170095545089, "learning_rate": 9.577383358398111e-06, "loss": 17.3415, "step": 8637 }, { "epoch": 0.15789569890507613, "grad_norm": 6.253937057566765, "learning_rate": 9.577264243902524e-06, "loss": 17.3816, "step": 8638 }, { "epoch": 0.15791397810152266, "grad_norm": 6.5586503017309035, "learning_rate": 9.577145113364022e-06, "loss": 17.7927, "step": 8639 }, { "epoch": 0.15793225729796917, "grad_norm": 6.822586486919763, "learning_rate": 9.577025966783025e-06, "loss": 17.5244, "step": 8640 }, { "epoch": 0.1579505364944157, "grad_norm": 6.156764339679498, "learning_rate": 9.576906804159947e-06, "loss": 17.5124, "step": 8641 }, { "epoch": 0.15796881569086224, "grad_norm": 7.525565901915947, "learning_rate": 9.57678762549521e-06, "loss": 17.9959, "step": 8642 }, { "epoch": 0.15798709488730875, "grad_norm": 7.573590715549222, "learning_rate": 9.576668430789227e-06, "loss": 17.8283, "step": 8643 }, { "epoch": 0.15800537408375528, "grad_norm": 5.465960370810305, "learning_rate": 9.576549220042419e-06, "loss": 17.0043, "step": 8644 }, { "epoch": 0.1580236532802018, "grad_norm": 7.507270295043513, "learning_rate": 9.576429993255203e-06, "loss": 18.4469, "step": 8645 }, { "epoch": 0.15804193247664833, "grad_norm": 8.381314359700884, "learning_rate": 9.576310750427998e-06, "loss": 18.4688, "step": 8646 }, { "epoch": 0.15806021167309486, "grad_norm": 5.195004795725945, "learning_rate": 9.57619149156122e-06, "loss": 17.1027, "step": 8647 }, { "epoch": 0.15807849086954137, "grad_norm": 6.5466336494995305, "learning_rate": 9.57607221665529e-06, "loss": 17.6005, "step": 8648 }, { "epoch": 0.1580967700659879, "grad_norm": 7.650693031787093, "learning_rate": 9.57595292571062e-06, "loss": 17.7174, "step": 8649 }, { "epoch": 0.15811504926243442, "grad_norm": 7.524522513133388, "learning_rate": 9.575833618727637e-06, "loss": 18.1352, "step": 8650 }, { "epoch": 0.15813332845888095, "grad_norm": 7.076198658972048, "learning_rate": 9.575714295706751e-06, "loss": 17.7296, "step": 8651 }, { "epoch": 0.15815160765532746, "grad_norm": 9.334436224397965, "learning_rate": 9.575594956648384e-06, "loss": 18.0572, "step": 8652 }, { "epoch": 0.158169886851774, "grad_norm": 6.865725757427967, "learning_rate": 9.575475601552955e-06, "loss": 17.5508, "step": 8653 }, { "epoch": 0.15818816604822053, "grad_norm": 5.968781954249258, "learning_rate": 9.57535623042088e-06, "loss": 17.2271, "step": 8654 }, { "epoch": 0.15820644524466704, "grad_norm": 7.927357273268601, "learning_rate": 9.575236843252578e-06, "loss": 17.9615, "step": 8655 }, { "epoch": 0.15822472444111357, "grad_norm": 6.147631388732034, "learning_rate": 9.575117440048469e-06, "loss": 17.5935, "step": 8656 }, { "epoch": 0.15824300363756008, "grad_norm": 6.664098577906064, "learning_rate": 9.574998020808969e-06, "loss": 17.3875, "step": 8657 }, { "epoch": 0.15826128283400662, "grad_norm": 7.039980596920565, "learning_rate": 9.574878585534498e-06, "loss": 17.7298, "step": 8658 }, { "epoch": 0.15827956203045315, "grad_norm": 8.43221093342362, "learning_rate": 9.574759134225476e-06, "loss": 18.1742, "step": 8659 }, { "epoch": 0.15829784122689966, "grad_norm": 7.32183485146767, "learning_rate": 9.574639666882319e-06, "loss": 17.7696, "step": 8660 }, { "epoch": 0.1583161204233462, "grad_norm": 8.225441049432984, "learning_rate": 9.574520183505447e-06, "loss": 18.4105, "step": 8661 }, { "epoch": 0.1583343996197927, "grad_norm": 7.767409983133676, "learning_rate": 9.57440068409528e-06, "loss": 18.0198, "step": 8662 }, { "epoch": 0.15835267881623924, "grad_norm": 7.541541219196348, "learning_rate": 9.574281168652234e-06, "loss": 17.8972, "step": 8663 }, { "epoch": 0.15837095801268578, "grad_norm": 7.343574019287776, "learning_rate": 9.57416163717673e-06, "loss": 17.9099, "step": 8664 }, { "epoch": 0.15838923720913228, "grad_norm": 6.989636427625274, "learning_rate": 9.574042089669186e-06, "loss": 17.8249, "step": 8665 }, { "epoch": 0.15840751640557882, "grad_norm": 7.189037334290815, "learning_rate": 9.573922526130021e-06, "loss": 18.0304, "step": 8666 }, { "epoch": 0.15842579560202533, "grad_norm": 7.552814407373591, "learning_rate": 9.573802946559656e-06, "loss": 18.1647, "step": 8667 }, { "epoch": 0.15844407479847186, "grad_norm": 7.719845968675812, "learning_rate": 9.57368335095851e-06, "loss": 18.1486, "step": 8668 }, { "epoch": 0.15846235399491837, "grad_norm": 7.165440768937146, "learning_rate": 9.573563739326997e-06, "loss": 18.0737, "step": 8669 }, { "epoch": 0.1584806331913649, "grad_norm": 7.197366971362831, "learning_rate": 9.573444111665542e-06, "loss": 17.9356, "step": 8670 }, { "epoch": 0.15849891238781144, "grad_norm": 6.445771316820499, "learning_rate": 9.573324467974562e-06, "loss": 17.5002, "step": 8671 }, { "epoch": 0.15851719158425795, "grad_norm": 7.326785099838132, "learning_rate": 9.573204808254476e-06, "loss": 17.7761, "step": 8672 }, { "epoch": 0.15853547078070448, "grad_norm": 6.1440248773733375, "learning_rate": 9.573085132505705e-06, "loss": 17.5076, "step": 8673 }, { "epoch": 0.158553749977151, "grad_norm": 7.317194130755906, "learning_rate": 9.572965440728667e-06, "loss": 17.8295, "step": 8674 }, { "epoch": 0.15857202917359753, "grad_norm": 6.759062435788617, "learning_rate": 9.572845732923781e-06, "loss": 17.4328, "step": 8675 }, { "epoch": 0.15859030837004406, "grad_norm": 6.7954543106881715, "learning_rate": 9.572726009091469e-06, "loss": 17.6332, "step": 8676 }, { "epoch": 0.15860858756649057, "grad_norm": 8.776539660855445, "learning_rate": 9.572606269232148e-06, "loss": 18.7997, "step": 8677 }, { "epoch": 0.1586268667629371, "grad_norm": 5.6294904211526005, "learning_rate": 9.572486513346239e-06, "loss": 17.1821, "step": 8678 }, { "epoch": 0.15864514595938362, "grad_norm": 6.258365595852227, "learning_rate": 9.572366741434163e-06, "loss": 17.564, "step": 8679 }, { "epoch": 0.15866342515583015, "grad_norm": 11.479545019600947, "learning_rate": 9.572246953496336e-06, "loss": 18.2172, "step": 8680 }, { "epoch": 0.1586817043522767, "grad_norm": 6.403658755230618, "learning_rate": 9.572127149533182e-06, "loss": 17.5483, "step": 8681 }, { "epoch": 0.1586999835487232, "grad_norm": 5.101281347483226, "learning_rate": 9.572007329545119e-06, "loss": 16.9884, "step": 8682 }, { "epoch": 0.15871826274516973, "grad_norm": 9.981835161632372, "learning_rate": 9.571887493532566e-06, "loss": 17.8174, "step": 8683 }, { "epoch": 0.15873654194161624, "grad_norm": 6.548260835919465, "learning_rate": 9.571767641495944e-06, "loss": 17.3436, "step": 8684 }, { "epoch": 0.15875482113806277, "grad_norm": 6.8461827153199275, "learning_rate": 9.571647773435674e-06, "loss": 17.8834, "step": 8685 }, { "epoch": 0.15877310033450928, "grad_norm": 8.502495700321242, "learning_rate": 9.571527889352174e-06, "loss": 18.5276, "step": 8686 }, { "epoch": 0.15879137953095582, "grad_norm": 6.176439201496272, "learning_rate": 9.571407989245866e-06, "loss": 17.4895, "step": 8687 }, { "epoch": 0.15880965872740235, "grad_norm": 6.022477784888412, "learning_rate": 9.571288073117171e-06, "loss": 17.6312, "step": 8688 }, { "epoch": 0.15882793792384886, "grad_norm": 7.118900886235516, "learning_rate": 9.571168140966506e-06, "loss": 17.8077, "step": 8689 }, { "epoch": 0.1588462171202954, "grad_norm": 7.404382215336522, "learning_rate": 9.571048192794297e-06, "loss": 17.7534, "step": 8690 }, { "epoch": 0.1588644963167419, "grad_norm": 7.673872954903309, "learning_rate": 9.570928228600957e-06, "loss": 18.2886, "step": 8691 }, { "epoch": 0.15888277551318844, "grad_norm": 8.213756221609747, "learning_rate": 9.570808248386911e-06, "loss": 18.2046, "step": 8692 }, { "epoch": 0.15890105470963498, "grad_norm": 6.004541473602897, "learning_rate": 9.57068825215258e-06, "loss": 17.3993, "step": 8693 }, { "epoch": 0.15891933390608148, "grad_norm": 7.67699382473935, "learning_rate": 9.570568239898383e-06, "loss": 17.8866, "step": 8694 }, { "epoch": 0.15893761310252802, "grad_norm": 7.801825759335127, "learning_rate": 9.570448211624738e-06, "loss": 17.812, "step": 8695 }, { "epoch": 0.15895589229897453, "grad_norm": 6.661143090418699, "learning_rate": 9.570328167332072e-06, "loss": 17.5459, "step": 8696 }, { "epoch": 0.15897417149542106, "grad_norm": 6.379851837271899, "learning_rate": 9.570208107020802e-06, "loss": 17.7867, "step": 8697 }, { "epoch": 0.1589924506918676, "grad_norm": 7.174447656296335, "learning_rate": 9.570088030691348e-06, "loss": 17.6017, "step": 8698 }, { "epoch": 0.1590107298883141, "grad_norm": 7.3451704570964695, "learning_rate": 9.569967938344134e-06, "loss": 18.1274, "step": 8699 }, { "epoch": 0.15902900908476064, "grad_norm": 7.1401695622395485, "learning_rate": 9.569847829979577e-06, "loss": 17.5868, "step": 8700 }, { "epoch": 0.15904728828120715, "grad_norm": 7.1894016334097985, "learning_rate": 9.5697277055981e-06, "loss": 17.7075, "step": 8701 }, { "epoch": 0.15906556747765369, "grad_norm": 6.458617217159287, "learning_rate": 9.569607565200123e-06, "loss": 17.5532, "step": 8702 }, { "epoch": 0.1590838466741002, "grad_norm": 6.825022394793046, "learning_rate": 9.56948740878607e-06, "loss": 17.9135, "step": 8703 }, { "epoch": 0.15910212587054673, "grad_norm": 6.28830097810962, "learning_rate": 9.56936723635636e-06, "loss": 17.3174, "step": 8704 }, { "epoch": 0.15912040506699326, "grad_norm": 7.956355899333767, "learning_rate": 9.569247047911414e-06, "loss": 17.7205, "step": 8705 }, { "epoch": 0.15913868426343977, "grad_norm": 6.332582650391563, "learning_rate": 9.569126843451652e-06, "loss": 17.463, "step": 8706 }, { "epoch": 0.1591569634598863, "grad_norm": 7.004815411625668, "learning_rate": 9.569006622977499e-06, "loss": 17.6234, "step": 8707 }, { "epoch": 0.15917524265633282, "grad_norm": 8.335114969107142, "learning_rate": 9.568886386489373e-06, "loss": 18.0666, "step": 8708 }, { "epoch": 0.15919352185277935, "grad_norm": 5.926383063830562, "learning_rate": 9.568766133987698e-06, "loss": 17.3391, "step": 8709 }, { "epoch": 0.1592118010492259, "grad_norm": 6.491640226766058, "learning_rate": 9.568645865472893e-06, "loss": 17.6675, "step": 8710 }, { "epoch": 0.1592300802456724, "grad_norm": 6.932616478421611, "learning_rate": 9.568525580945382e-06, "loss": 17.6239, "step": 8711 }, { "epoch": 0.15924835944211893, "grad_norm": 7.634918806624296, "learning_rate": 9.568405280405583e-06, "loss": 18.0179, "step": 8712 }, { "epoch": 0.15926663863856544, "grad_norm": 7.197427622249253, "learning_rate": 9.568284963853923e-06, "loss": 17.6312, "step": 8713 }, { "epoch": 0.15928491783501197, "grad_norm": 8.088800271659048, "learning_rate": 9.568164631290819e-06, "loss": 18.0679, "step": 8714 }, { "epoch": 0.1593031970314585, "grad_norm": 6.445077687085442, "learning_rate": 9.568044282716695e-06, "loss": 17.4712, "step": 8715 }, { "epoch": 0.15932147622790502, "grad_norm": 6.698055899410576, "learning_rate": 9.567923918131971e-06, "loss": 17.9143, "step": 8716 }, { "epoch": 0.15933975542435155, "grad_norm": 7.635732485280628, "learning_rate": 9.567803537537071e-06, "loss": 18.2345, "step": 8717 }, { "epoch": 0.15935803462079806, "grad_norm": 6.9098385174088675, "learning_rate": 9.567683140932415e-06, "loss": 17.914, "step": 8718 }, { "epoch": 0.1593763138172446, "grad_norm": 7.689218781560204, "learning_rate": 9.567562728318426e-06, "loss": 18.0113, "step": 8719 }, { "epoch": 0.1593945930136911, "grad_norm": 5.908448434642699, "learning_rate": 9.567442299695526e-06, "loss": 17.334, "step": 8720 }, { "epoch": 0.15941287221013764, "grad_norm": 7.383001977435694, "learning_rate": 9.567321855064137e-06, "loss": 18.0264, "step": 8721 }, { "epoch": 0.15943115140658418, "grad_norm": 6.0762845717344485, "learning_rate": 9.567201394424683e-06, "loss": 17.2396, "step": 8722 }, { "epoch": 0.15944943060303068, "grad_norm": 7.249071343599725, "learning_rate": 9.567080917777582e-06, "loss": 18.3273, "step": 8723 }, { "epoch": 0.15946770979947722, "grad_norm": 6.79927527377071, "learning_rate": 9.566960425123262e-06, "loss": 17.464, "step": 8724 }, { "epoch": 0.15948598899592373, "grad_norm": 6.032798301046214, "learning_rate": 9.566839916462139e-06, "loss": 17.1785, "step": 8725 }, { "epoch": 0.15950426819237026, "grad_norm": 7.2277709946029205, "learning_rate": 9.566719391794639e-06, "loss": 17.6834, "step": 8726 }, { "epoch": 0.1595225473888168, "grad_norm": 7.274076078711354, "learning_rate": 9.566598851121184e-06, "loss": 17.9066, "step": 8727 }, { "epoch": 0.1595408265852633, "grad_norm": 7.194632883119047, "learning_rate": 9.566478294442197e-06, "loss": 17.8001, "step": 8728 }, { "epoch": 0.15955910578170984, "grad_norm": 7.523618385263561, "learning_rate": 9.566357721758099e-06, "loss": 17.9979, "step": 8729 }, { "epoch": 0.15957738497815635, "grad_norm": 5.817974321190137, "learning_rate": 9.566237133069314e-06, "loss": 17.3732, "step": 8730 }, { "epoch": 0.15959566417460289, "grad_norm": 7.13909383047071, "learning_rate": 9.566116528376264e-06, "loss": 17.8938, "step": 8731 }, { "epoch": 0.15961394337104942, "grad_norm": 8.348706340397504, "learning_rate": 9.56599590767937e-06, "loss": 18.6279, "step": 8732 }, { "epoch": 0.15963222256749593, "grad_norm": 5.503249224940718, "learning_rate": 9.56587527097906e-06, "loss": 17.1792, "step": 8733 }, { "epoch": 0.15965050176394247, "grad_norm": 6.56382302545479, "learning_rate": 9.56575461827575e-06, "loss": 17.7867, "step": 8734 }, { "epoch": 0.15966878096038897, "grad_norm": 7.307967686522048, "learning_rate": 9.565633949569869e-06, "loss": 17.6034, "step": 8735 }, { "epoch": 0.1596870601568355, "grad_norm": 6.929338459157738, "learning_rate": 9.565513264861837e-06, "loss": 17.5525, "step": 8736 }, { "epoch": 0.15970533935328202, "grad_norm": 6.443022151956936, "learning_rate": 9.565392564152074e-06, "loss": 17.477, "step": 8737 }, { "epoch": 0.15972361854972855, "grad_norm": 7.241973816833101, "learning_rate": 9.56527184744101e-06, "loss": 17.5325, "step": 8738 }, { "epoch": 0.1597418977461751, "grad_norm": 6.563851454103735, "learning_rate": 9.565151114729063e-06, "loss": 17.4029, "step": 8739 }, { "epoch": 0.1597601769426216, "grad_norm": 6.508498858443748, "learning_rate": 9.565030366016656e-06, "loss": 17.6719, "step": 8740 }, { "epoch": 0.15977845613906813, "grad_norm": 6.456972815731345, "learning_rate": 9.564909601304215e-06, "loss": 17.4541, "step": 8741 }, { "epoch": 0.15979673533551464, "grad_norm": 6.163034436791755, "learning_rate": 9.564788820592162e-06, "loss": 17.3692, "step": 8742 }, { "epoch": 0.15981501453196117, "grad_norm": 5.575126929964949, "learning_rate": 9.564668023880921e-06, "loss": 17.1237, "step": 8743 }, { "epoch": 0.1598332937284077, "grad_norm": 7.64526785634227, "learning_rate": 9.564547211170914e-06, "loss": 18.0375, "step": 8744 }, { "epoch": 0.15985157292485422, "grad_norm": 7.417285485026281, "learning_rate": 9.564426382462564e-06, "loss": 17.5785, "step": 8745 }, { "epoch": 0.15986985212130075, "grad_norm": 6.4554490728201195, "learning_rate": 9.564305537756298e-06, "loss": 17.3587, "step": 8746 }, { "epoch": 0.15988813131774726, "grad_norm": 7.298964984532181, "learning_rate": 9.564184677052536e-06, "loss": 18.0229, "step": 8747 }, { "epoch": 0.1599064105141938, "grad_norm": 6.613225166292407, "learning_rate": 9.564063800351702e-06, "loss": 17.5211, "step": 8748 }, { "epoch": 0.15992468971064033, "grad_norm": 9.573566077771629, "learning_rate": 9.56394290765422e-06, "loss": 18.4496, "step": 8749 }, { "epoch": 0.15994296890708684, "grad_norm": 6.406176375922915, "learning_rate": 9.563821998960516e-06, "loss": 17.5678, "step": 8750 }, { "epoch": 0.15996124810353338, "grad_norm": 6.260061914930123, "learning_rate": 9.56370107427101e-06, "loss": 17.409, "step": 8751 }, { "epoch": 0.15997952729997988, "grad_norm": 6.754535119047297, "learning_rate": 9.56358013358613e-06, "loss": 17.56, "step": 8752 }, { "epoch": 0.15999780649642642, "grad_norm": 6.432051614070615, "learning_rate": 9.563459176906296e-06, "loss": 17.7027, "step": 8753 }, { "epoch": 0.16001608569287293, "grad_norm": 7.594418566449968, "learning_rate": 9.563338204231933e-06, "loss": 17.9458, "step": 8754 }, { "epoch": 0.16003436488931946, "grad_norm": 7.371403461236743, "learning_rate": 9.563217215563468e-06, "loss": 17.6388, "step": 8755 }, { "epoch": 0.160052644085766, "grad_norm": 8.453908317655701, "learning_rate": 9.563096210901321e-06, "loss": 18.4992, "step": 8756 }, { "epoch": 0.1600709232822125, "grad_norm": 7.239148752398614, "learning_rate": 9.562975190245917e-06, "loss": 17.2711, "step": 8757 }, { "epoch": 0.16008920247865904, "grad_norm": 7.535249967549871, "learning_rate": 9.562854153597682e-06, "loss": 18.0152, "step": 8758 }, { "epoch": 0.16010748167510555, "grad_norm": 6.377766363405279, "learning_rate": 9.56273310095704e-06, "loss": 17.6365, "step": 8759 }, { "epoch": 0.1601257608715521, "grad_norm": 5.729475222732704, "learning_rate": 9.562612032324414e-06, "loss": 17.0194, "step": 8760 }, { "epoch": 0.16014404006799862, "grad_norm": 6.904956145505646, "learning_rate": 9.562490947700228e-06, "loss": 17.6042, "step": 8761 }, { "epoch": 0.16016231926444513, "grad_norm": 5.786519415904625, "learning_rate": 9.562369847084906e-06, "loss": 17.294, "step": 8762 }, { "epoch": 0.16018059846089167, "grad_norm": 6.966187542125642, "learning_rate": 9.562248730478875e-06, "loss": 17.7972, "step": 8763 }, { "epoch": 0.16019887765733817, "grad_norm": 6.957106629142053, "learning_rate": 9.56212759788256e-06, "loss": 17.7768, "step": 8764 }, { "epoch": 0.1602171568537847, "grad_norm": 7.653301452414369, "learning_rate": 9.562006449296381e-06, "loss": 17.6291, "step": 8765 }, { "epoch": 0.16023543605023124, "grad_norm": 7.294945839487996, "learning_rate": 9.561885284720767e-06, "loss": 17.8276, "step": 8766 }, { "epoch": 0.16025371524667775, "grad_norm": 7.134948801613209, "learning_rate": 9.561764104156139e-06, "loss": 17.7329, "step": 8767 }, { "epoch": 0.1602719944431243, "grad_norm": 6.642006525302264, "learning_rate": 9.561642907602923e-06, "loss": 17.2391, "step": 8768 }, { "epoch": 0.1602902736395708, "grad_norm": 6.475013961772217, "learning_rate": 9.561521695061547e-06, "loss": 17.4464, "step": 8769 }, { "epoch": 0.16030855283601733, "grad_norm": 7.212239729576027, "learning_rate": 9.561400466532433e-06, "loss": 17.1043, "step": 8770 }, { "epoch": 0.16032683203246384, "grad_norm": 7.095701461594068, "learning_rate": 9.561279222016004e-06, "loss": 17.7539, "step": 8771 }, { "epoch": 0.16034511122891038, "grad_norm": 6.593081321509201, "learning_rate": 9.56115796151269e-06, "loss": 17.5958, "step": 8772 }, { "epoch": 0.1603633904253569, "grad_norm": 8.66286614955099, "learning_rate": 9.561036685022911e-06, "loss": 18.3973, "step": 8773 }, { "epoch": 0.16038166962180342, "grad_norm": 7.398229057452632, "learning_rate": 9.560915392547095e-06, "loss": 17.97, "step": 8774 }, { "epoch": 0.16039994881824995, "grad_norm": 7.52899839892259, "learning_rate": 9.560794084085667e-06, "loss": 18.0149, "step": 8775 }, { "epoch": 0.16041822801469646, "grad_norm": 7.0884022559299735, "learning_rate": 9.560672759639052e-06, "loss": 17.6417, "step": 8776 }, { "epoch": 0.160436507211143, "grad_norm": 7.6345269325657314, "learning_rate": 9.560551419207673e-06, "loss": 18.2399, "step": 8777 }, { "epoch": 0.16045478640758953, "grad_norm": 6.786548576572578, "learning_rate": 9.560430062791956e-06, "loss": 17.7836, "step": 8778 }, { "epoch": 0.16047306560403604, "grad_norm": 7.630147309540174, "learning_rate": 9.560308690392331e-06, "loss": 17.9565, "step": 8779 }, { "epoch": 0.16049134480048258, "grad_norm": 9.243567669339185, "learning_rate": 9.560187302009216e-06, "loss": 18.5087, "step": 8780 }, { "epoch": 0.16050962399692908, "grad_norm": 6.68852694381934, "learning_rate": 9.560065897643043e-06, "loss": 17.4963, "step": 8781 }, { "epoch": 0.16052790319337562, "grad_norm": 7.723848633136946, "learning_rate": 9.559944477294235e-06, "loss": 17.9461, "step": 8782 }, { "epoch": 0.16054618238982216, "grad_norm": 6.027865934325465, "learning_rate": 9.559823040963214e-06, "loss": 17.4491, "step": 8783 }, { "epoch": 0.16056446158626866, "grad_norm": 6.525364454120052, "learning_rate": 9.55970158865041e-06, "loss": 17.6133, "step": 8784 }, { "epoch": 0.1605827407827152, "grad_norm": 6.234268240730879, "learning_rate": 9.55958012035625e-06, "loss": 17.5524, "step": 8785 }, { "epoch": 0.1606010199791617, "grad_norm": 7.6452263245109515, "learning_rate": 9.559458636081156e-06, "loss": 17.8828, "step": 8786 }, { "epoch": 0.16061929917560824, "grad_norm": 6.805593151866449, "learning_rate": 9.559337135825555e-06, "loss": 17.8694, "step": 8787 }, { "epoch": 0.16063757837205475, "grad_norm": 7.376660941074235, "learning_rate": 9.559215619589872e-06, "loss": 17.9796, "step": 8788 }, { "epoch": 0.1606558575685013, "grad_norm": 7.114462555908165, "learning_rate": 9.559094087374535e-06, "loss": 17.6144, "step": 8789 }, { "epoch": 0.16067413676494782, "grad_norm": 6.750929925152189, "learning_rate": 9.558972539179969e-06, "loss": 17.5734, "step": 8790 }, { "epoch": 0.16069241596139433, "grad_norm": 6.7640328775666125, "learning_rate": 9.558850975006599e-06, "loss": 17.6017, "step": 8791 }, { "epoch": 0.16071069515784087, "grad_norm": 6.540873147196798, "learning_rate": 9.558729394854854e-06, "loss": 17.5003, "step": 8792 }, { "epoch": 0.16072897435428737, "grad_norm": 5.795009420922162, "learning_rate": 9.558607798725155e-06, "loss": 17.0964, "step": 8793 }, { "epoch": 0.1607472535507339, "grad_norm": 7.8149691739728, "learning_rate": 9.558486186617933e-06, "loss": 17.9489, "step": 8794 }, { "epoch": 0.16076553274718045, "grad_norm": 5.961826323415727, "learning_rate": 9.558364558533613e-06, "loss": 17.3436, "step": 8795 }, { "epoch": 0.16078381194362695, "grad_norm": 7.980379462536579, "learning_rate": 9.558242914472619e-06, "loss": 18.2505, "step": 8796 }, { "epoch": 0.1608020911400735, "grad_norm": 8.799802937393846, "learning_rate": 9.55812125443538e-06, "loss": 18.4094, "step": 8797 }, { "epoch": 0.16082037033652, "grad_norm": 7.875497214055206, "learning_rate": 9.557999578422323e-06, "loss": 17.8522, "step": 8798 }, { "epoch": 0.16083864953296653, "grad_norm": 6.825628477609388, "learning_rate": 9.55787788643387e-06, "loss": 17.6042, "step": 8799 }, { "epoch": 0.16085692872941307, "grad_norm": 6.656232687908398, "learning_rate": 9.557756178470453e-06, "loss": 17.4498, "step": 8800 }, { "epoch": 0.16087520792585958, "grad_norm": 7.452719354689855, "learning_rate": 9.557634454532495e-06, "loss": 17.791, "step": 8801 }, { "epoch": 0.1608934871223061, "grad_norm": 7.075444203805298, "learning_rate": 9.557512714620424e-06, "loss": 18.0032, "step": 8802 }, { "epoch": 0.16091176631875262, "grad_norm": 7.753044557459597, "learning_rate": 9.557390958734667e-06, "loss": 18.0721, "step": 8803 }, { "epoch": 0.16093004551519915, "grad_norm": 6.633972339969448, "learning_rate": 9.557269186875649e-06, "loss": 17.4466, "step": 8804 }, { "epoch": 0.16094832471164566, "grad_norm": 6.876666467275851, "learning_rate": 9.5571473990438e-06, "loss": 17.6804, "step": 8805 }, { "epoch": 0.1609666039080922, "grad_norm": 8.2795245562326, "learning_rate": 9.557025595239543e-06, "loss": 17.9313, "step": 8806 }, { "epoch": 0.16098488310453873, "grad_norm": 6.303225996141523, "learning_rate": 9.556903775463306e-06, "loss": 17.5084, "step": 8807 }, { "epoch": 0.16100316230098524, "grad_norm": 6.5407205829138, "learning_rate": 9.556781939715519e-06, "loss": 17.3902, "step": 8808 }, { "epoch": 0.16102144149743178, "grad_norm": 8.460848418617777, "learning_rate": 9.556660087996605e-06, "loss": 18.3657, "step": 8809 }, { "epoch": 0.16103972069387829, "grad_norm": 6.897862897657835, "learning_rate": 9.556538220306994e-06, "loss": 17.5954, "step": 8810 }, { "epoch": 0.16105799989032482, "grad_norm": 7.164626874468124, "learning_rate": 9.55641633664711e-06, "loss": 17.7571, "step": 8811 }, { "epoch": 0.16107627908677136, "grad_norm": 6.833970612165855, "learning_rate": 9.556294437017383e-06, "loss": 17.6259, "step": 8812 }, { "epoch": 0.16109455828321786, "grad_norm": 6.4596259350114025, "learning_rate": 9.556172521418241e-06, "loss": 17.577, "step": 8813 }, { "epoch": 0.1611128374796644, "grad_norm": 7.498344388477673, "learning_rate": 9.556050589850109e-06, "loss": 17.7124, "step": 8814 }, { "epoch": 0.1611311166761109, "grad_norm": 7.971377103769383, "learning_rate": 9.555928642313415e-06, "loss": 18.1817, "step": 8815 }, { "epoch": 0.16114939587255744, "grad_norm": 6.699257541796852, "learning_rate": 9.555806678808586e-06, "loss": 17.6014, "step": 8816 }, { "epoch": 0.16116767506900398, "grad_norm": 7.14970775353149, "learning_rate": 9.55568469933605e-06, "loss": 17.8008, "step": 8817 }, { "epoch": 0.1611859542654505, "grad_norm": 6.899336621313897, "learning_rate": 9.555562703896232e-06, "loss": 17.7719, "step": 8818 }, { "epoch": 0.16120423346189702, "grad_norm": 7.5932976917227295, "learning_rate": 9.555440692489566e-06, "loss": 17.9854, "step": 8819 }, { "epoch": 0.16122251265834353, "grad_norm": 7.497653414850729, "learning_rate": 9.555318665116475e-06, "loss": 17.5895, "step": 8820 }, { "epoch": 0.16124079185479007, "grad_norm": 6.501683877973553, "learning_rate": 9.555196621777385e-06, "loss": 17.3843, "step": 8821 }, { "epoch": 0.16125907105123657, "grad_norm": 6.046399938171552, "learning_rate": 9.555074562472728e-06, "loss": 17.4456, "step": 8822 }, { "epoch": 0.1612773502476831, "grad_norm": 8.333521331479742, "learning_rate": 9.554952487202929e-06, "loss": 17.9339, "step": 8823 }, { "epoch": 0.16129562944412965, "grad_norm": 7.188884164898245, "learning_rate": 9.554830395968417e-06, "loss": 17.9014, "step": 8824 }, { "epoch": 0.16131390864057615, "grad_norm": 8.20227410730392, "learning_rate": 9.55470828876962e-06, "loss": 18.1764, "step": 8825 }, { "epoch": 0.1613321878370227, "grad_norm": 7.485773635555615, "learning_rate": 9.554586165606967e-06, "loss": 17.878, "step": 8826 }, { "epoch": 0.1613504670334692, "grad_norm": 8.393810519342246, "learning_rate": 9.554464026480884e-06, "loss": 18.1257, "step": 8827 }, { "epoch": 0.16136874622991573, "grad_norm": 7.989067804682207, "learning_rate": 9.554341871391799e-06, "loss": 17.798, "step": 8828 }, { "epoch": 0.16138702542636227, "grad_norm": 7.123354701496578, "learning_rate": 9.55421970034014e-06, "loss": 18.0067, "step": 8829 }, { "epoch": 0.16140530462280878, "grad_norm": 7.798993085522868, "learning_rate": 9.554097513326338e-06, "loss": 18.1148, "step": 8830 }, { "epoch": 0.1614235838192553, "grad_norm": 4.76814135126192, "learning_rate": 9.553975310350819e-06, "loss": 16.7965, "step": 8831 }, { "epoch": 0.16144186301570182, "grad_norm": 6.4682768640768336, "learning_rate": 9.55385309141401e-06, "loss": 17.2909, "step": 8832 }, { "epoch": 0.16146014221214836, "grad_norm": 6.797191004024916, "learning_rate": 9.553730856516343e-06, "loss": 17.7865, "step": 8833 }, { "epoch": 0.1614784214085949, "grad_norm": 7.591471038689188, "learning_rate": 9.553608605658244e-06, "loss": 17.6864, "step": 8834 }, { "epoch": 0.1614967006050414, "grad_norm": 6.842949793100091, "learning_rate": 9.553486338840143e-06, "loss": 17.3557, "step": 8835 }, { "epoch": 0.16151497980148793, "grad_norm": 8.929409275541913, "learning_rate": 9.553364056062467e-06, "loss": 18.7151, "step": 8836 }, { "epoch": 0.16153325899793444, "grad_norm": 7.785349282093318, "learning_rate": 9.553241757325644e-06, "loss": 17.9587, "step": 8837 }, { "epoch": 0.16155153819438098, "grad_norm": 8.211137903475462, "learning_rate": 9.553119442630103e-06, "loss": 17.9899, "step": 8838 }, { "epoch": 0.16156981739082749, "grad_norm": 6.7164478822489375, "learning_rate": 9.552997111976275e-06, "loss": 17.529, "step": 8839 }, { "epoch": 0.16158809658727402, "grad_norm": 7.546942335017686, "learning_rate": 9.552874765364587e-06, "loss": 17.9005, "step": 8840 }, { "epoch": 0.16160637578372056, "grad_norm": 8.153689286797787, "learning_rate": 9.552752402795469e-06, "loss": 18.4123, "step": 8841 }, { "epoch": 0.16162465498016707, "grad_norm": 7.152845343535854, "learning_rate": 9.552630024269347e-06, "loss": 17.7524, "step": 8842 }, { "epoch": 0.1616429341766136, "grad_norm": 7.337910363949701, "learning_rate": 9.552507629786653e-06, "loss": 17.8114, "step": 8843 }, { "epoch": 0.1616612133730601, "grad_norm": 7.350235652986651, "learning_rate": 9.552385219347816e-06, "loss": 17.8237, "step": 8844 }, { "epoch": 0.16167949256950664, "grad_norm": 6.14665711848872, "learning_rate": 9.552262792953262e-06, "loss": 17.3868, "step": 8845 }, { "epoch": 0.16169777176595318, "grad_norm": 6.272718841791613, "learning_rate": 9.55214035060342e-06, "loss": 17.4701, "step": 8846 }, { "epoch": 0.1617160509623997, "grad_norm": 6.863264719435674, "learning_rate": 9.552017892298724e-06, "loss": 17.508, "step": 8847 }, { "epoch": 0.16173433015884622, "grad_norm": 7.136719533652093, "learning_rate": 9.551895418039601e-06, "loss": 17.4004, "step": 8848 }, { "epoch": 0.16175260935529273, "grad_norm": 7.283507678344315, "learning_rate": 9.551772927826477e-06, "loss": 17.9247, "step": 8849 }, { "epoch": 0.16177088855173927, "grad_norm": 6.05289082646221, "learning_rate": 9.551650421659786e-06, "loss": 17.0785, "step": 8850 }, { "epoch": 0.1617891677481858, "grad_norm": 8.625028740166986, "learning_rate": 9.551527899539954e-06, "loss": 17.9702, "step": 8851 }, { "epoch": 0.1618074469446323, "grad_norm": 6.3450623363035294, "learning_rate": 9.551405361467412e-06, "loss": 17.3423, "step": 8852 }, { "epoch": 0.16182572614107885, "grad_norm": 6.320029822690952, "learning_rate": 9.55128280744259e-06, "loss": 17.4594, "step": 8853 }, { "epoch": 0.16184400533752535, "grad_norm": 8.117196672477366, "learning_rate": 9.551160237465915e-06, "loss": 18.0902, "step": 8854 }, { "epoch": 0.1618622845339719, "grad_norm": 11.078199725555908, "learning_rate": 9.55103765153782e-06, "loss": 18.8538, "step": 8855 }, { "epoch": 0.1618805637304184, "grad_norm": 8.079093213144644, "learning_rate": 9.550915049658733e-06, "loss": 18.4629, "step": 8856 }, { "epoch": 0.16189884292686493, "grad_norm": 6.484534105389004, "learning_rate": 9.550792431829082e-06, "loss": 17.5436, "step": 8857 }, { "epoch": 0.16191712212331147, "grad_norm": 7.49800621285524, "learning_rate": 9.5506697980493e-06, "loss": 18.0245, "step": 8858 }, { "epoch": 0.16193540131975798, "grad_norm": 6.5500496032887545, "learning_rate": 9.550547148319814e-06, "loss": 17.3675, "step": 8859 }, { "epoch": 0.1619536805162045, "grad_norm": 9.450945561850927, "learning_rate": 9.550424482641057e-06, "loss": 18.4343, "step": 8860 }, { "epoch": 0.16197195971265102, "grad_norm": 5.6372846773157015, "learning_rate": 9.550301801013456e-06, "loss": 17.2191, "step": 8861 }, { "epoch": 0.16199023890909756, "grad_norm": 8.082414695165987, "learning_rate": 9.55017910343744e-06, "loss": 18.0178, "step": 8862 }, { "epoch": 0.1620085181055441, "grad_norm": 6.189643379768784, "learning_rate": 9.550056389913443e-06, "loss": 17.2269, "step": 8863 }, { "epoch": 0.1620267973019906, "grad_norm": 7.229971847798873, "learning_rate": 9.549933660441892e-06, "loss": 17.4883, "step": 8864 }, { "epoch": 0.16204507649843714, "grad_norm": 6.811545341412197, "learning_rate": 9.549810915023222e-06, "loss": 17.3983, "step": 8865 }, { "epoch": 0.16206335569488364, "grad_norm": 6.727419034143885, "learning_rate": 9.549688153657855e-06, "loss": 17.4161, "step": 8866 }, { "epoch": 0.16208163489133018, "grad_norm": 7.330171877370214, "learning_rate": 9.549565376346229e-06, "loss": 17.8667, "step": 8867 }, { "epoch": 0.16209991408777671, "grad_norm": 8.272764506260637, "learning_rate": 9.549442583088769e-06, "loss": 18.1347, "step": 8868 }, { "epoch": 0.16211819328422322, "grad_norm": 6.147822248842741, "learning_rate": 9.549319773885908e-06, "loss": 17.4762, "step": 8869 }, { "epoch": 0.16213647248066976, "grad_norm": 6.189533530420878, "learning_rate": 9.549196948738078e-06, "loss": 17.1608, "step": 8870 }, { "epoch": 0.16215475167711627, "grad_norm": 6.38942492187238, "learning_rate": 9.549074107645704e-06, "loss": 17.533, "step": 8871 }, { "epoch": 0.1621730308735628, "grad_norm": 5.844114971106341, "learning_rate": 9.548951250609223e-06, "loss": 17.0304, "step": 8872 }, { "epoch": 0.1621913100700093, "grad_norm": 6.919844670071196, "learning_rate": 9.54882837762906e-06, "loss": 17.6341, "step": 8873 }, { "epoch": 0.16220958926645584, "grad_norm": 6.1769616846319675, "learning_rate": 9.548705488705651e-06, "loss": 17.6437, "step": 8874 }, { "epoch": 0.16222786846290238, "grad_norm": 7.123526203979538, "learning_rate": 9.548582583839424e-06, "loss": 17.6275, "step": 8875 }, { "epoch": 0.1622461476593489, "grad_norm": 7.8622977543729, "learning_rate": 9.548459663030807e-06, "loss": 18.1613, "step": 8876 }, { "epoch": 0.16226442685579542, "grad_norm": 6.988235033949581, "learning_rate": 9.548336726280235e-06, "loss": 17.8994, "step": 8877 }, { "epoch": 0.16228270605224193, "grad_norm": 8.668390020559814, "learning_rate": 9.548213773588137e-06, "loss": 18.4199, "step": 8878 }, { "epoch": 0.16230098524868847, "grad_norm": 7.592850934071577, "learning_rate": 9.548090804954946e-06, "loss": 18.0574, "step": 8879 }, { "epoch": 0.162319264445135, "grad_norm": 6.243808839154162, "learning_rate": 9.54796782038109e-06, "loss": 17.2268, "step": 8880 }, { "epoch": 0.1623375436415815, "grad_norm": 6.92471135372571, "learning_rate": 9.547844819867002e-06, "loss": 17.7505, "step": 8881 }, { "epoch": 0.16235582283802805, "grad_norm": 8.250122694694106, "learning_rate": 9.547721803413113e-06, "loss": 18.4426, "step": 8882 }, { "epoch": 0.16237410203447455, "grad_norm": 7.90110995616912, "learning_rate": 9.547598771019853e-06, "loss": 17.9777, "step": 8883 }, { "epoch": 0.1623923812309211, "grad_norm": 7.675239492202096, "learning_rate": 9.547475722687653e-06, "loss": 18.0677, "step": 8884 }, { "epoch": 0.16241066042736763, "grad_norm": 7.2494996796705635, "learning_rate": 9.547352658416946e-06, "loss": 17.9652, "step": 8885 }, { "epoch": 0.16242893962381413, "grad_norm": 7.074103628188423, "learning_rate": 9.547229578208164e-06, "loss": 17.7379, "step": 8886 }, { "epoch": 0.16244721882026067, "grad_norm": 6.9007334560151685, "learning_rate": 9.547106482061734e-06, "loss": 17.7754, "step": 8887 }, { "epoch": 0.16246549801670718, "grad_norm": 6.010855450347364, "learning_rate": 9.546983369978093e-06, "loss": 17.1205, "step": 8888 }, { "epoch": 0.1624837772131537, "grad_norm": 7.231444756815707, "learning_rate": 9.546860241957669e-06, "loss": 17.7948, "step": 8889 }, { "epoch": 0.16250205640960022, "grad_norm": 7.131651381970469, "learning_rate": 9.546737098000893e-06, "loss": 17.9893, "step": 8890 }, { "epoch": 0.16252033560604676, "grad_norm": 7.70387932922149, "learning_rate": 9.5466139381082e-06, "loss": 18.0789, "step": 8891 }, { "epoch": 0.1625386148024933, "grad_norm": 6.632072531801575, "learning_rate": 9.546490762280018e-06, "loss": 17.6724, "step": 8892 }, { "epoch": 0.1625568939989398, "grad_norm": 6.936192890123043, "learning_rate": 9.546367570516782e-06, "loss": 17.6417, "step": 8893 }, { "epoch": 0.16257517319538634, "grad_norm": 6.956442992720794, "learning_rate": 9.546244362818922e-06, "loss": 17.681, "step": 8894 }, { "epoch": 0.16259345239183284, "grad_norm": 7.446021023217806, "learning_rate": 9.546121139186869e-06, "loss": 18.232, "step": 8895 }, { "epoch": 0.16261173158827938, "grad_norm": 7.878532797276115, "learning_rate": 9.545997899621057e-06, "loss": 17.8911, "step": 8896 }, { "epoch": 0.16263001078472591, "grad_norm": 6.543557569485853, "learning_rate": 9.545874644121915e-06, "loss": 17.519, "step": 8897 }, { "epoch": 0.16264828998117242, "grad_norm": 7.015477953441177, "learning_rate": 9.545751372689879e-06, "loss": 17.8328, "step": 8898 }, { "epoch": 0.16266656917761896, "grad_norm": 6.3181880335265665, "learning_rate": 9.545628085325378e-06, "loss": 17.2829, "step": 8899 }, { "epoch": 0.16268484837406547, "grad_norm": 6.697700055575188, "learning_rate": 9.545504782028845e-06, "loss": 17.3818, "step": 8900 }, { "epoch": 0.162703127570512, "grad_norm": 5.856440390979823, "learning_rate": 9.545381462800713e-06, "loss": 17.1333, "step": 8901 }, { "epoch": 0.16272140676695854, "grad_norm": 7.761739161167816, "learning_rate": 9.545258127641412e-06, "loss": 17.8042, "step": 8902 }, { "epoch": 0.16273968596340505, "grad_norm": 5.975151780291863, "learning_rate": 9.545134776551377e-06, "loss": 17.4304, "step": 8903 }, { "epoch": 0.16275796515985158, "grad_norm": 6.5665951275741845, "learning_rate": 9.545011409531037e-06, "loss": 17.7145, "step": 8904 }, { "epoch": 0.1627762443562981, "grad_norm": 6.733855110402917, "learning_rate": 9.544888026580827e-06, "loss": 17.099, "step": 8905 }, { "epoch": 0.16279452355274462, "grad_norm": 5.905538250052269, "learning_rate": 9.54476462770118e-06, "loss": 17.0874, "step": 8906 }, { "epoch": 0.16281280274919113, "grad_norm": 6.573048580583706, "learning_rate": 9.544641212892526e-06, "loss": 17.5165, "step": 8907 }, { "epoch": 0.16283108194563767, "grad_norm": 9.295126203116364, "learning_rate": 9.544517782155302e-06, "loss": 18.2697, "step": 8908 }, { "epoch": 0.1628493611420842, "grad_norm": 8.26365264438235, "learning_rate": 9.544394335489935e-06, "loss": 17.8954, "step": 8909 }, { "epoch": 0.1628676403385307, "grad_norm": 7.084231977177663, "learning_rate": 9.54427087289686e-06, "loss": 17.4922, "step": 8910 }, { "epoch": 0.16288591953497725, "grad_norm": 6.964164989018065, "learning_rate": 9.54414739437651e-06, "loss": 17.7176, "step": 8911 }, { "epoch": 0.16290419873142375, "grad_norm": 6.997736867494417, "learning_rate": 9.54402389992932e-06, "loss": 17.4331, "step": 8912 }, { "epoch": 0.1629224779278703, "grad_norm": 8.043540829147126, "learning_rate": 9.543900389555718e-06, "loss": 18.0513, "step": 8913 }, { "epoch": 0.16294075712431683, "grad_norm": 7.16888807126414, "learning_rate": 9.54377686325614e-06, "loss": 17.7014, "step": 8914 }, { "epoch": 0.16295903632076333, "grad_norm": 8.30871827815993, "learning_rate": 9.54365332103102e-06, "loss": 18.0837, "step": 8915 }, { "epoch": 0.16297731551720987, "grad_norm": 8.503896845917318, "learning_rate": 9.543529762880787e-06, "loss": 18.0608, "step": 8916 }, { "epoch": 0.16299559471365638, "grad_norm": 6.712750605148803, "learning_rate": 9.543406188805877e-06, "loss": 17.3239, "step": 8917 }, { "epoch": 0.1630138739101029, "grad_norm": 6.401990566639425, "learning_rate": 9.543282598806723e-06, "loss": 17.4695, "step": 8918 }, { "epoch": 0.16303215310654945, "grad_norm": 6.642985318262605, "learning_rate": 9.543158992883758e-06, "loss": 17.6847, "step": 8919 }, { "epoch": 0.16305043230299596, "grad_norm": 7.542441204187607, "learning_rate": 9.543035371037415e-06, "loss": 18.0636, "step": 8920 }, { "epoch": 0.1630687114994425, "grad_norm": 6.2766294396528375, "learning_rate": 9.542911733268126e-06, "loss": 17.5489, "step": 8921 }, { "epoch": 0.163086990695889, "grad_norm": 7.384420981284008, "learning_rate": 9.542788079576326e-06, "loss": 17.7955, "step": 8922 }, { "epoch": 0.16310526989233554, "grad_norm": 5.8974784763862935, "learning_rate": 9.54266440996245e-06, "loss": 17.3754, "step": 8923 }, { "epoch": 0.16312354908878204, "grad_norm": 6.578968888106481, "learning_rate": 9.542540724426927e-06, "loss": 17.6573, "step": 8924 }, { "epoch": 0.16314182828522858, "grad_norm": 6.860280161621239, "learning_rate": 9.542417022970194e-06, "loss": 17.6178, "step": 8925 }, { "epoch": 0.16316010748167512, "grad_norm": 6.979930012077105, "learning_rate": 9.542293305592683e-06, "loss": 17.6281, "step": 8926 }, { "epoch": 0.16317838667812162, "grad_norm": 7.599394431780828, "learning_rate": 9.54216957229483e-06, "loss": 18.0106, "step": 8927 }, { "epoch": 0.16319666587456816, "grad_norm": 7.238928157250578, "learning_rate": 9.542045823077064e-06, "loss": 18.2261, "step": 8928 }, { "epoch": 0.16321494507101467, "grad_norm": 7.766677650452347, "learning_rate": 9.541922057939823e-06, "loss": 18.0347, "step": 8929 }, { "epoch": 0.1632332242674612, "grad_norm": 7.662017576676054, "learning_rate": 9.54179827688354e-06, "loss": 17.8838, "step": 8930 }, { "epoch": 0.16325150346390774, "grad_norm": 7.232795696096888, "learning_rate": 9.541674479908647e-06, "loss": 17.866, "step": 8931 }, { "epoch": 0.16326978266035425, "grad_norm": 7.886448752917773, "learning_rate": 9.54155066701558e-06, "loss": 17.6537, "step": 8932 }, { "epoch": 0.16328806185680078, "grad_norm": 7.881010893635565, "learning_rate": 9.541426838204771e-06, "loss": 18.0129, "step": 8933 }, { "epoch": 0.1633063410532473, "grad_norm": 7.606065616458181, "learning_rate": 9.541302993476655e-06, "loss": 17.8275, "step": 8934 }, { "epoch": 0.16332462024969382, "grad_norm": 7.123188545637438, "learning_rate": 9.541179132831666e-06, "loss": 17.6886, "step": 8935 }, { "epoch": 0.16334289944614036, "grad_norm": 7.226467542992779, "learning_rate": 9.54105525627024e-06, "loss": 17.7167, "step": 8936 }, { "epoch": 0.16336117864258687, "grad_norm": 5.937890595858325, "learning_rate": 9.540931363792808e-06, "loss": 17.3241, "step": 8937 }, { "epoch": 0.1633794578390334, "grad_norm": 6.032465860912279, "learning_rate": 9.540807455399806e-06, "loss": 17.4818, "step": 8938 }, { "epoch": 0.1633977370354799, "grad_norm": 7.150257416553276, "learning_rate": 9.540683531091667e-06, "loss": 17.7593, "step": 8939 }, { "epoch": 0.16341601623192645, "grad_norm": 6.682123741954857, "learning_rate": 9.540559590868826e-06, "loss": 17.4994, "step": 8940 }, { "epoch": 0.16343429542837296, "grad_norm": 7.771814579829153, "learning_rate": 9.54043563473172e-06, "loss": 18.012, "step": 8941 }, { "epoch": 0.1634525746248195, "grad_norm": 7.097596182243404, "learning_rate": 9.540311662680779e-06, "loss": 17.7753, "step": 8942 }, { "epoch": 0.16347085382126603, "grad_norm": 7.096495466577756, "learning_rate": 9.540187674716439e-06, "loss": 17.9051, "step": 8943 }, { "epoch": 0.16348913301771253, "grad_norm": 6.006982250832487, "learning_rate": 9.540063670839138e-06, "loss": 17.148, "step": 8944 }, { "epoch": 0.16350741221415907, "grad_norm": 11.197540538981825, "learning_rate": 9.539939651049306e-06, "loss": 17.011, "step": 8945 }, { "epoch": 0.16352569141060558, "grad_norm": 7.675777563157956, "learning_rate": 9.539815615347378e-06, "loss": 18.1948, "step": 8946 }, { "epoch": 0.1635439706070521, "grad_norm": 9.277697757170023, "learning_rate": 9.539691563733793e-06, "loss": 18.748, "step": 8947 }, { "epoch": 0.16356224980349865, "grad_norm": 6.754838801733986, "learning_rate": 9.53956749620898e-06, "loss": 17.4221, "step": 8948 }, { "epoch": 0.16358052899994516, "grad_norm": 7.415932795215124, "learning_rate": 9.53944341277338e-06, "loss": 17.9688, "step": 8949 }, { "epoch": 0.1635988081963917, "grad_norm": 6.2238818314264, "learning_rate": 9.539319313427424e-06, "loss": 17.4398, "step": 8950 }, { "epoch": 0.1636170873928382, "grad_norm": 9.021443626345528, "learning_rate": 9.539195198171547e-06, "loss": 18.203, "step": 8951 }, { "epoch": 0.16363536658928474, "grad_norm": 9.027950393619902, "learning_rate": 9.539071067006185e-06, "loss": 18.3249, "step": 8952 }, { "epoch": 0.16365364578573127, "grad_norm": 7.381196928707877, "learning_rate": 9.538946919931773e-06, "loss": 18.3222, "step": 8953 }, { "epoch": 0.16367192498217778, "grad_norm": 7.174947326296616, "learning_rate": 9.538822756948746e-06, "loss": 17.8498, "step": 8954 }, { "epoch": 0.16369020417862432, "grad_norm": 6.901302070754817, "learning_rate": 9.538698578057538e-06, "loss": 17.7299, "step": 8955 }, { "epoch": 0.16370848337507082, "grad_norm": 7.761207539143965, "learning_rate": 9.538574383258586e-06, "loss": 18.3238, "step": 8956 }, { "epoch": 0.16372676257151736, "grad_norm": 7.44744965291629, "learning_rate": 9.538450172552324e-06, "loss": 17.5281, "step": 8957 }, { "epoch": 0.16374504176796387, "grad_norm": 6.987075909306739, "learning_rate": 9.53832594593919e-06, "loss": 17.7534, "step": 8958 }, { "epoch": 0.1637633209644104, "grad_norm": 6.624246707335277, "learning_rate": 9.538201703419616e-06, "loss": 17.6023, "step": 8959 }, { "epoch": 0.16378160016085694, "grad_norm": 7.097707090816197, "learning_rate": 9.538077444994039e-06, "loss": 18.0419, "step": 8960 }, { "epoch": 0.16379987935730345, "grad_norm": 7.760668279565784, "learning_rate": 9.537953170662894e-06, "loss": 17.9141, "step": 8961 }, { "epoch": 0.16381815855374998, "grad_norm": 8.699347616373558, "learning_rate": 9.537828880426617e-06, "loss": 18.2053, "step": 8962 }, { "epoch": 0.1638364377501965, "grad_norm": 7.326815693030651, "learning_rate": 9.537704574285644e-06, "loss": 17.7434, "step": 8963 }, { "epoch": 0.16385471694664303, "grad_norm": 8.0552337540858, "learning_rate": 9.53758025224041e-06, "loss": 18.2094, "step": 8964 }, { "epoch": 0.16387299614308956, "grad_norm": 8.021030654479663, "learning_rate": 9.537455914291351e-06, "loss": 17.5572, "step": 8965 }, { "epoch": 0.16389127533953607, "grad_norm": 9.40369593502039, "learning_rate": 9.537331560438903e-06, "loss": 18.1477, "step": 8966 }, { "epoch": 0.1639095545359826, "grad_norm": 7.449777743755273, "learning_rate": 9.537207190683501e-06, "loss": 18.0022, "step": 8967 }, { "epoch": 0.1639278337324291, "grad_norm": 6.755855208741576, "learning_rate": 9.537082805025581e-06, "loss": 17.6478, "step": 8968 }, { "epoch": 0.16394611292887565, "grad_norm": 7.829269949213463, "learning_rate": 9.536958403465581e-06, "loss": 18.2046, "step": 8969 }, { "epoch": 0.16396439212532218, "grad_norm": 7.561169726588245, "learning_rate": 9.536833986003935e-06, "loss": 17.9798, "step": 8970 }, { "epoch": 0.1639826713217687, "grad_norm": 7.979160217539934, "learning_rate": 9.536709552641079e-06, "loss": 18.1425, "step": 8971 }, { "epoch": 0.16400095051821523, "grad_norm": 7.4120818373445925, "learning_rate": 9.53658510337745e-06, "loss": 18.0333, "step": 8972 }, { "epoch": 0.16401922971466174, "grad_norm": 7.110277402834711, "learning_rate": 9.536460638213484e-06, "loss": 17.8487, "step": 8973 }, { "epoch": 0.16403750891110827, "grad_norm": 6.450188051453727, "learning_rate": 9.536336157149617e-06, "loss": 17.5824, "step": 8974 }, { "epoch": 0.16405578810755478, "grad_norm": 6.619651163009498, "learning_rate": 9.536211660186285e-06, "loss": 17.6198, "step": 8975 }, { "epoch": 0.16407406730400131, "grad_norm": 6.628904688632431, "learning_rate": 9.536087147323925e-06, "loss": 17.3563, "step": 8976 }, { "epoch": 0.16409234650044785, "grad_norm": 6.390833912123965, "learning_rate": 9.535962618562973e-06, "loss": 17.6438, "step": 8977 }, { "epoch": 0.16411062569689436, "grad_norm": 8.84388746773094, "learning_rate": 9.535838073903867e-06, "loss": 18.0541, "step": 8978 }, { "epoch": 0.1641289048933409, "grad_norm": 6.898897406262503, "learning_rate": 9.535713513347041e-06, "loss": 17.5198, "step": 8979 }, { "epoch": 0.1641471840897874, "grad_norm": 6.938844238649979, "learning_rate": 9.535588936892934e-06, "loss": 17.473, "step": 8980 }, { "epoch": 0.16416546328623394, "grad_norm": 10.265314021194731, "learning_rate": 9.53546434454198e-06, "loss": 17.8857, "step": 8981 }, { "epoch": 0.16418374248268047, "grad_norm": 5.973072517552841, "learning_rate": 9.535339736294618e-06, "loss": 17.269, "step": 8982 }, { "epoch": 0.16420202167912698, "grad_norm": 6.949757546277983, "learning_rate": 9.535215112151281e-06, "loss": 17.7386, "step": 8983 }, { "epoch": 0.16422030087557352, "grad_norm": 6.326312509864464, "learning_rate": 9.535090472112411e-06, "loss": 17.2417, "step": 8984 }, { "epoch": 0.16423858007202002, "grad_norm": 9.264120030924737, "learning_rate": 9.534965816178443e-06, "loss": 18.3123, "step": 8985 }, { "epoch": 0.16425685926846656, "grad_norm": 6.772672020924337, "learning_rate": 9.534841144349813e-06, "loss": 17.7576, "step": 8986 }, { "epoch": 0.1642751384649131, "grad_norm": 8.329126811228969, "learning_rate": 9.534716456626957e-06, "loss": 18.0118, "step": 8987 }, { "epoch": 0.1642934176613596, "grad_norm": 7.400742624479253, "learning_rate": 9.534591753010314e-06, "loss": 17.9224, "step": 8988 }, { "epoch": 0.16431169685780614, "grad_norm": 6.007836449564718, "learning_rate": 9.53446703350032e-06, "loss": 17.0726, "step": 8989 }, { "epoch": 0.16432997605425265, "grad_norm": 7.562766480730223, "learning_rate": 9.534342298097412e-06, "loss": 17.8489, "step": 8990 }, { "epoch": 0.16434825525069918, "grad_norm": 6.98880304185308, "learning_rate": 9.53421754680203e-06, "loss": 17.4952, "step": 8991 }, { "epoch": 0.1643665344471457, "grad_norm": 9.724134196821625, "learning_rate": 9.534092779614607e-06, "loss": 18.9074, "step": 8992 }, { "epoch": 0.16438481364359223, "grad_norm": 7.251382567170701, "learning_rate": 9.533967996535584e-06, "loss": 17.9577, "step": 8993 }, { "epoch": 0.16440309284003876, "grad_norm": 7.498903365392758, "learning_rate": 9.533843197565396e-06, "loss": 18.052, "step": 8994 }, { "epoch": 0.16442137203648527, "grad_norm": 7.262856847283712, "learning_rate": 9.53371838270448e-06, "loss": 17.8097, "step": 8995 }, { "epoch": 0.1644396512329318, "grad_norm": 7.681262369820483, "learning_rate": 9.533593551953276e-06, "loss": 18.0161, "step": 8996 }, { "epoch": 0.1644579304293783, "grad_norm": 6.880805878430297, "learning_rate": 9.533468705312218e-06, "loss": 17.5347, "step": 8997 }, { "epoch": 0.16447620962582485, "grad_norm": 7.242434003077306, "learning_rate": 9.533343842781746e-06, "loss": 17.8824, "step": 8998 }, { "epoch": 0.16449448882227138, "grad_norm": 9.904514161620357, "learning_rate": 9.533218964362299e-06, "loss": 18.8511, "step": 8999 }, { "epoch": 0.1645127680187179, "grad_norm": 7.483815263177726, "learning_rate": 9.533094070054311e-06, "loss": 17.3156, "step": 9000 }, { "epoch": 0.16453104721516443, "grad_norm": 8.05181023024189, "learning_rate": 9.532969159858223e-06, "loss": 18.2519, "step": 9001 }, { "epoch": 0.16454932641161094, "grad_norm": 7.113555245957429, "learning_rate": 9.53284423377447e-06, "loss": 17.8087, "step": 9002 }, { "epoch": 0.16456760560805747, "grad_norm": 7.012927381244689, "learning_rate": 9.532719291803492e-06, "loss": 17.6893, "step": 9003 }, { "epoch": 0.164585884804504, "grad_norm": 6.958872956133071, "learning_rate": 9.532594333945727e-06, "loss": 17.6789, "step": 9004 }, { "epoch": 0.16460416400095051, "grad_norm": 7.436941121855428, "learning_rate": 9.532469360201612e-06, "loss": 18.0439, "step": 9005 }, { "epoch": 0.16462244319739705, "grad_norm": 8.062205985339064, "learning_rate": 9.532344370571584e-06, "loss": 18.1162, "step": 9006 }, { "epoch": 0.16464072239384356, "grad_norm": 7.177649603497674, "learning_rate": 9.532219365056083e-06, "loss": 17.6347, "step": 9007 }, { "epoch": 0.1646590015902901, "grad_norm": 6.7695673105289025, "learning_rate": 9.532094343655548e-06, "loss": 17.4804, "step": 9008 }, { "epoch": 0.1646772807867366, "grad_norm": 8.496341139684638, "learning_rate": 9.531969306370412e-06, "loss": 18.0347, "step": 9009 }, { "epoch": 0.16469555998318314, "grad_norm": 7.318266467326885, "learning_rate": 9.531844253201119e-06, "loss": 18.001, "step": 9010 }, { "epoch": 0.16471383917962967, "grad_norm": 7.999530387100114, "learning_rate": 9.531719184148106e-06, "loss": 18.4319, "step": 9011 }, { "epoch": 0.16473211837607618, "grad_norm": 7.3789199094497535, "learning_rate": 9.53159409921181e-06, "loss": 17.6925, "step": 9012 }, { "epoch": 0.16475039757252272, "grad_norm": 7.426827848723716, "learning_rate": 9.531468998392669e-06, "loss": 17.5441, "step": 9013 }, { "epoch": 0.16476867676896922, "grad_norm": 7.415358347673079, "learning_rate": 9.531343881691122e-06, "loss": 17.6196, "step": 9014 }, { "epoch": 0.16478695596541576, "grad_norm": 8.714016979434655, "learning_rate": 9.53121874910761e-06, "loss": 18.4228, "step": 9015 }, { "epoch": 0.1648052351618623, "grad_norm": 7.923681657299792, "learning_rate": 9.531093600642567e-06, "loss": 17.8882, "step": 9016 }, { "epoch": 0.1648235143583088, "grad_norm": 7.345767412881944, "learning_rate": 9.530968436296435e-06, "loss": 17.8592, "step": 9017 }, { "epoch": 0.16484179355475534, "grad_norm": 6.56783751919579, "learning_rate": 9.530843256069654e-06, "loss": 17.6087, "step": 9018 }, { "epoch": 0.16486007275120185, "grad_norm": 7.22552776237849, "learning_rate": 9.530718059962658e-06, "loss": 17.7078, "step": 9019 }, { "epoch": 0.16487835194764838, "grad_norm": 5.982942068514054, "learning_rate": 9.53059284797589e-06, "loss": 17.0983, "step": 9020 }, { "epoch": 0.16489663114409492, "grad_norm": 7.0002543746521795, "learning_rate": 9.530467620109786e-06, "loss": 17.7523, "step": 9021 }, { "epoch": 0.16491491034054143, "grad_norm": 8.649614178075742, "learning_rate": 9.530342376364786e-06, "loss": 17.756, "step": 9022 }, { "epoch": 0.16493318953698796, "grad_norm": 7.16265798516129, "learning_rate": 9.530217116741329e-06, "loss": 17.9631, "step": 9023 }, { "epoch": 0.16495146873343447, "grad_norm": 5.529878030601523, "learning_rate": 9.530091841239854e-06, "loss": 17.0858, "step": 9024 }, { "epoch": 0.164969747929881, "grad_norm": 5.93450893703127, "learning_rate": 9.529966549860801e-06, "loss": 17.1779, "step": 9025 }, { "epoch": 0.1649880271263275, "grad_norm": 5.920941626064105, "learning_rate": 9.529841242604609e-06, "loss": 17.3672, "step": 9026 }, { "epoch": 0.16500630632277405, "grad_norm": 7.387747158692213, "learning_rate": 9.529715919471715e-06, "loss": 17.7749, "step": 9027 }, { "epoch": 0.16502458551922058, "grad_norm": 8.98797917732331, "learning_rate": 9.529590580462562e-06, "loss": 18.6328, "step": 9028 }, { "epoch": 0.1650428647156671, "grad_norm": 6.590188461567282, "learning_rate": 9.529465225577586e-06, "loss": 17.5267, "step": 9029 }, { "epoch": 0.16506114391211363, "grad_norm": 8.336657364415158, "learning_rate": 9.529339854817226e-06, "loss": 17.4487, "step": 9030 }, { "epoch": 0.16507942310856014, "grad_norm": 7.971714010329702, "learning_rate": 9.529214468181924e-06, "loss": 18.2908, "step": 9031 }, { "epoch": 0.16509770230500667, "grad_norm": 6.820560068494281, "learning_rate": 9.529089065672118e-06, "loss": 17.4721, "step": 9032 }, { "epoch": 0.1651159815014532, "grad_norm": 7.444814055406021, "learning_rate": 9.528963647288247e-06, "loss": 17.5634, "step": 9033 }, { "epoch": 0.16513426069789972, "grad_norm": 8.52045768338414, "learning_rate": 9.528838213030753e-06, "loss": 18.699, "step": 9034 }, { "epoch": 0.16515253989434625, "grad_norm": 5.784679984227972, "learning_rate": 9.528712762900074e-06, "loss": 17.4384, "step": 9035 }, { "epoch": 0.16517081909079276, "grad_norm": 7.653037685529301, "learning_rate": 9.528587296896649e-06, "loss": 18.2578, "step": 9036 }, { "epoch": 0.1651890982872393, "grad_norm": 6.799617147984463, "learning_rate": 9.528461815020918e-06, "loss": 17.8007, "step": 9037 }, { "epoch": 0.16520737748368583, "grad_norm": 6.968598825158566, "learning_rate": 9.528336317273324e-06, "loss": 17.888, "step": 9038 }, { "epoch": 0.16522565668013234, "grad_norm": 7.165305393866474, "learning_rate": 9.528210803654302e-06, "loss": 17.699, "step": 9039 }, { "epoch": 0.16524393587657887, "grad_norm": 5.371448743197811, "learning_rate": 9.528085274164294e-06, "loss": 17.0871, "step": 9040 }, { "epoch": 0.16526221507302538, "grad_norm": 7.1524163925995685, "learning_rate": 9.52795972880374e-06, "loss": 18.0725, "step": 9041 }, { "epoch": 0.16528049426947192, "grad_norm": 6.932547375366222, "learning_rate": 9.52783416757308e-06, "loss": 17.4842, "step": 9042 }, { "epoch": 0.16529877346591842, "grad_norm": 6.666372747082885, "learning_rate": 9.527708590472755e-06, "loss": 17.5194, "step": 9043 }, { "epoch": 0.16531705266236496, "grad_norm": 6.116693421795748, "learning_rate": 9.527582997503203e-06, "loss": 17.6409, "step": 9044 }, { "epoch": 0.1653353318588115, "grad_norm": 6.27081926121905, "learning_rate": 9.527457388664866e-06, "loss": 17.5528, "step": 9045 }, { "epoch": 0.165353611055258, "grad_norm": 6.842569182429887, "learning_rate": 9.527331763958186e-06, "loss": 17.6139, "step": 9046 }, { "epoch": 0.16537189025170454, "grad_norm": 8.74274288483976, "learning_rate": 9.527206123383597e-06, "loss": 18.2157, "step": 9047 }, { "epoch": 0.16539016944815105, "grad_norm": 6.264777251475679, "learning_rate": 9.527080466941546e-06, "loss": 17.285, "step": 9048 }, { "epoch": 0.16540844864459758, "grad_norm": 7.537827085353265, "learning_rate": 9.52695479463247e-06, "loss": 18.1821, "step": 9049 }, { "epoch": 0.16542672784104412, "grad_norm": 7.565179764824264, "learning_rate": 9.526829106456811e-06, "loss": 17.8749, "step": 9050 }, { "epoch": 0.16544500703749063, "grad_norm": 6.884819067861885, "learning_rate": 9.526703402415007e-06, "loss": 17.7371, "step": 9051 }, { "epoch": 0.16546328623393716, "grad_norm": 6.510990120480692, "learning_rate": 9.526577682507504e-06, "loss": 17.6405, "step": 9052 }, { "epoch": 0.16548156543038367, "grad_norm": 6.513347772149104, "learning_rate": 9.526451946734736e-06, "loss": 17.5503, "step": 9053 }, { "epoch": 0.1654998446268302, "grad_norm": 5.428298287850259, "learning_rate": 9.526326195097146e-06, "loss": 17.1256, "step": 9054 }, { "epoch": 0.16551812382327674, "grad_norm": 6.582988013596661, "learning_rate": 9.526200427595178e-06, "loss": 17.6936, "step": 9055 }, { "epoch": 0.16553640301972325, "grad_norm": 7.370843557480547, "learning_rate": 9.526074644229269e-06, "loss": 17.517, "step": 9056 }, { "epoch": 0.16555468221616979, "grad_norm": 6.923825195792302, "learning_rate": 9.525948844999861e-06, "loss": 17.7574, "step": 9057 }, { "epoch": 0.1655729614126163, "grad_norm": 6.8302860552307445, "learning_rate": 9.525823029907396e-06, "loss": 17.2951, "step": 9058 }, { "epoch": 0.16559124060906283, "grad_norm": 6.958696460058351, "learning_rate": 9.525697198952313e-06, "loss": 17.741, "step": 9059 }, { "epoch": 0.16560951980550934, "grad_norm": 6.996366141266741, "learning_rate": 9.525571352135055e-06, "loss": 17.6815, "step": 9060 }, { "epoch": 0.16562779900195587, "grad_norm": 6.157407783203882, "learning_rate": 9.52544548945606e-06, "loss": 17.4198, "step": 9061 }, { "epoch": 0.1656460781984024, "grad_norm": 6.440894364279506, "learning_rate": 9.525319610915773e-06, "loss": 17.3242, "step": 9062 }, { "epoch": 0.16566435739484892, "grad_norm": 6.854729327882623, "learning_rate": 9.525193716514634e-06, "loss": 17.5983, "step": 9063 }, { "epoch": 0.16568263659129545, "grad_norm": 6.171897296806404, "learning_rate": 9.525067806253082e-06, "loss": 17.2897, "step": 9064 }, { "epoch": 0.16570091578774196, "grad_norm": 8.008405996013842, "learning_rate": 9.524941880131562e-06, "loss": 18.3425, "step": 9065 }, { "epoch": 0.1657191949841885, "grad_norm": 6.725834049524768, "learning_rate": 9.52481593815051e-06, "loss": 17.3255, "step": 9066 }, { "epoch": 0.16573747418063503, "grad_norm": 7.92148911913241, "learning_rate": 9.524689980310375e-06, "loss": 17.8377, "step": 9067 }, { "epoch": 0.16575575337708154, "grad_norm": 7.0223075023185775, "learning_rate": 9.524564006611592e-06, "loss": 17.6414, "step": 9068 }, { "epoch": 0.16577403257352807, "grad_norm": 6.3789484707849295, "learning_rate": 9.524438017054604e-06, "loss": 17.4408, "step": 9069 }, { "epoch": 0.16579231176997458, "grad_norm": 5.897977748498922, "learning_rate": 9.524312011639856e-06, "loss": 17.3339, "step": 9070 }, { "epoch": 0.16581059096642112, "grad_norm": 6.207281410268841, "learning_rate": 9.524185990367785e-06, "loss": 17.2, "step": 9071 }, { "epoch": 0.16582887016286765, "grad_norm": 8.342653154892563, "learning_rate": 9.524059953238836e-06, "loss": 18.3694, "step": 9072 }, { "epoch": 0.16584714935931416, "grad_norm": 6.598668454206093, "learning_rate": 9.523933900253448e-06, "loss": 17.6158, "step": 9073 }, { "epoch": 0.1658654285557607, "grad_norm": 6.59926984065775, "learning_rate": 9.523807831412065e-06, "loss": 17.7051, "step": 9074 }, { "epoch": 0.1658837077522072, "grad_norm": 7.1647343560089505, "learning_rate": 9.523681746715128e-06, "loss": 17.5268, "step": 9075 }, { "epoch": 0.16590198694865374, "grad_norm": 7.627179920498086, "learning_rate": 9.523555646163078e-06, "loss": 17.6897, "step": 9076 }, { "epoch": 0.16592026614510025, "grad_norm": 7.637983839679278, "learning_rate": 9.52342952975636e-06, "loss": 18.0547, "step": 9077 }, { "epoch": 0.16593854534154678, "grad_norm": 8.041272758097744, "learning_rate": 9.523303397495414e-06, "loss": 18.2532, "step": 9078 }, { "epoch": 0.16595682453799332, "grad_norm": 6.30967522088222, "learning_rate": 9.52317724938068e-06, "loss": 17.4144, "step": 9079 }, { "epoch": 0.16597510373443983, "grad_norm": 6.995478330767793, "learning_rate": 9.523051085412603e-06, "loss": 17.6971, "step": 9080 }, { "epoch": 0.16599338293088636, "grad_norm": 6.919525674027049, "learning_rate": 9.522924905591625e-06, "loss": 17.7011, "step": 9081 }, { "epoch": 0.16601166212733287, "grad_norm": 7.863781012799176, "learning_rate": 9.522798709918189e-06, "loss": 18.3145, "step": 9082 }, { "epoch": 0.1660299413237794, "grad_norm": 6.244417281280778, "learning_rate": 9.522672498392734e-06, "loss": 17.4247, "step": 9083 }, { "epoch": 0.16604822052022594, "grad_norm": 7.8538816717185895, "learning_rate": 9.522546271015705e-06, "loss": 17.696, "step": 9084 }, { "epoch": 0.16606649971667245, "grad_norm": 9.300180918835974, "learning_rate": 9.522420027787543e-06, "loss": 18.6505, "step": 9085 }, { "epoch": 0.16608477891311899, "grad_norm": 5.7769481419581155, "learning_rate": 9.522293768708691e-06, "loss": 17.0681, "step": 9086 }, { "epoch": 0.1661030581095655, "grad_norm": 7.199260336614925, "learning_rate": 9.522167493779593e-06, "loss": 17.9693, "step": 9087 }, { "epoch": 0.16612133730601203, "grad_norm": 6.379245550741849, "learning_rate": 9.52204120300069e-06, "loss": 17.7245, "step": 9088 }, { "epoch": 0.16613961650245856, "grad_norm": 8.844373939107124, "learning_rate": 9.521914896372424e-06, "loss": 18.4572, "step": 9089 }, { "epoch": 0.16615789569890507, "grad_norm": 8.247430546903153, "learning_rate": 9.52178857389524e-06, "loss": 18.4524, "step": 9090 }, { "epoch": 0.1661761748953516, "grad_norm": 6.444161082567926, "learning_rate": 9.52166223556958e-06, "loss": 17.3894, "step": 9091 }, { "epoch": 0.16619445409179812, "grad_norm": 5.442652376247098, "learning_rate": 9.521535881395884e-06, "loss": 17.0941, "step": 9092 }, { "epoch": 0.16621273328824465, "grad_norm": 7.279601640261967, "learning_rate": 9.5214095113746e-06, "loss": 17.6853, "step": 9093 }, { "epoch": 0.16623101248469116, "grad_norm": 6.189097401956089, "learning_rate": 9.521283125506166e-06, "loss": 17.417, "step": 9094 }, { "epoch": 0.1662492916811377, "grad_norm": 5.91625718018307, "learning_rate": 9.521156723791028e-06, "loss": 17.0529, "step": 9095 }, { "epoch": 0.16626757087758423, "grad_norm": 6.6431258708244565, "learning_rate": 9.521030306229627e-06, "loss": 17.6213, "step": 9096 }, { "epoch": 0.16628585007403074, "grad_norm": 7.330145933182181, "learning_rate": 9.520903872822407e-06, "loss": 17.8557, "step": 9097 }, { "epoch": 0.16630412927047727, "grad_norm": 6.671444664843496, "learning_rate": 9.520777423569812e-06, "loss": 17.6392, "step": 9098 }, { "epoch": 0.16632240846692378, "grad_norm": 7.772729339617621, "learning_rate": 9.520650958472285e-06, "loss": 17.9338, "step": 9099 }, { "epoch": 0.16634068766337032, "grad_norm": 7.466196889005785, "learning_rate": 9.520524477530266e-06, "loss": 17.9547, "step": 9100 }, { "epoch": 0.16635896685981685, "grad_norm": 7.539791802325543, "learning_rate": 9.520397980744204e-06, "loss": 17.8858, "step": 9101 }, { "epoch": 0.16637724605626336, "grad_norm": 7.129833234767096, "learning_rate": 9.520271468114539e-06, "loss": 17.8684, "step": 9102 }, { "epoch": 0.1663955252527099, "grad_norm": 5.323013439138336, "learning_rate": 9.520144939641713e-06, "loss": 16.938, "step": 9103 }, { "epoch": 0.1664138044491564, "grad_norm": 7.393868613524314, "learning_rate": 9.520018395326171e-06, "loss": 18.0491, "step": 9104 }, { "epoch": 0.16643208364560294, "grad_norm": 7.244360215684982, "learning_rate": 9.519891835168359e-06, "loss": 17.443, "step": 9105 }, { "epoch": 0.16645036284204948, "grad_norm": 6.633329129216576, "learning_rate": 9.519765259168716e-06, "loss": 17.2289, "step": 9106 }, { "epoch": 0.16646864203849598, "grad_norm": 6.955839024230898, "learning_rate": 9.519638667327691e-06, "loss": 17.6643, "step": 9107 }, { "epoch": 0.16648692123494252, "grad_norm": 5.691450733880839, "learning_rate": 9.519512059645723e-06, "loss": 17.0938, "step": 9108 }, { "epoch": 0.16650520043138903, "grad_norm": 7.454345822977432, "learning_rate": 9.519385436123256e-06, "loss": 17.7329, "step": 9109 }, { "epoch": 0.16652347962783556, "grad_norm": 8.266789452889764, "learning_rate": 9.519258796760738e-06, "loss": 17.6905, "step": 9110 }, { "epoch": 0.16654175882428207, "grad_norm": 9.33813301366781, "learning_rate": 9.519132141558607e-06, "loss": 18.6763, "step": 9111 }, { "epoch": 0.1665600380207286, "grad_norm": 8.822979246389119, "learning_rate": 9.519005470517312e-06, "loss": 18.5242, "step": 9112 }, { "epoch": 0.16657831721717514, "grad_norm": 8.916193767795104, "learning_rate": 9.518878783637296e-06, "loss": 18.3267, "step": 9113 }, { "epoch": 0.16659659641362165, "grad_norm": 8.055989505768691, "learning_rate": 9.518752080918999e-06, "loss": 18.1398, "step": 9114 }, { "epoch": 0.16661487561006819, "grad_norm": 12.760266756194506, "learning_rate": 9.51862536236287e-06, "loss": 18.4892, "step": 9115 }, { "epoch": 0.1666331548065147, "grad_norm": 7.3553093464905235, "learning_rate": 9.518498627969351e-06, "loss": 18.0986, "step": 9116 }, { "epoch": 0.16665143400296123, "grad_norm": 7.113153293673835, "learning_rate": 9.518371877738885e-06, "loss": 17.8732, "step": 9117 }, { "epoch": 0.16666971319940777, "grad_norm": 7.311254523474977, "learning_rate": 9.51824511167192e-06, "loss": 17.6422, "step": 9118 }, { "epoch": 0.16668799239585427, "grad_norm": 7.902962118314464, "learning_rate": 9.518118329768897e-06, "loss": 18.2016, "step": 9119 }, { "epoch": 0.1667062715923008, "grad_norm": 6.982253601525164, "learning_rate": 9.51799153203026e-06, "loss": 17.5656, "step": 9120 }, { "epoch": 0.16672455078874732, "grad_norm": 9.64107093255908, "learning_rate": 9.517864718456457e-06, "loss": 18.3941, "step": 9121 }, { "epoch": 0.16674282998519385, "grad_norm": 6.648779777416434, "learning_rate": 9.51773788904793e-06, "loss": 17.7655, "step": 9122 }, { "epoch": 0.1667611091816404, "grad_norm": 7.52096006546244, "learning_rate": 9.517611043805122e-06, "loss": 18.342, "step": 9123 }, { "epoch": 0.1667793883780869, "grad_norm": 6.967318309335177, "learning_rate": 9.517484182728481e-06, "loss": 17.4008, "step": 9124 }, { "epoch": 0.16679766757453343, "grad_norm": 5.507922484346446, "learning_rate": 9.517357305818447e-06, "loss": 16.934, "step": 9125 }, { "epoch": 0.16681594677097994, "grad_norm": 8.284403361115952, "learning_rate": 9.517230413075471e-06, "loss": 18.146, "step": 9126 }, { "epoch": 0.16683422596742647, "grad_norm": 6.162027763574121, "learning_rate": 9.517103504499993e-06, "loss": 17.4649, "step": 9127 }, { "epoch": 0.16685250516387298, "grad_norm": 5.993646032662872, "learning_rate": 9.516976580092459e-06, "loss": 17.2926, "step": 9128 }, { "epoch": 0.16687078436031952, "grad_norm": 7.503613357613627, "learning_rate": 9.516849639853314e-06, "loss": 18.0304, "step": 9129 }, { "epoch": 0.16688906355676605, "grad_norm": 6.890723043094061, "learning_rate": 9.516722683783003e-06, "loss": 18.1006, "step": 9130 }, { "epoch": 0.16690734275321256, "grad_norm": 6.5303244166727525, "learning_rate": 9.516595711881972e-06, "loss": 17.6837, "step": 9131 }, { "epoch": 0.1669256219496591, "grad_norm": 7.579422848183306, "learning_rate": 9.516468724150664e-06, "loss": 18.1218, "step": 9132 }, { "epoch": 0.1669439011461056, "grad_norm": 7.730681670507118, "learning_rate": 9.516341720589525e-06, "loss": 17.8222, "step": 9133 }, { "epoch": 0.16696218034255214, "grad_norm": 5.854139019858464, "learning_rate": 9.516214701199e-06, "loss": 17.2757, "step": 9134 }, { "epoch": 0.16698045953899868, "grad_norm": 9.443290375382677, "learning_rate": 9.516087665979536e-06, "loss": 17.5585, "step": 9135 }, { "epoch": 0.16699873873544518, "grad_norm": 7.466096064656885, "learning_rate": 9.515960614931575e-06, "loss": 18.0145, "step": 9136 }, { "epoch": 0.16701701793189172, "grad_norm": 6.513489335465951, "learning_rate": 9.515833548055563e-06, "loss": 17.6289, "step": 9137 }, { "epoch": 0.16703529712833823, "grad_norm": 7.957763787735906, "learning_rate": 9.51570646535195e-06, "loss": 18.057, "step": 9138 }, { "epoch": 0.16705357632478476, "grad_norm": 7.047812347213954, "learning_rate": 9.515579366821175e-06, "loss": 17.9389, "step": 9139 }, { "epoch": 0.1670718555212313, "grad_norm": 6.718738424242314, "learning_rate": 9.515452252463687e-06, "loss": 17.5463, "step": 9140 }, { "epoch": 0.1670901347176778, "grad_norm": 6.0169625408681835, "learning_rate": 9.51532512227993e-06, "loss": 17.4768, "step": 9141 }, { "epoch": 0.16710841391412434, "grad_norm": 6.4821786910550365, "learning_rate": 9.51519797627035e-06, "loss": 17.5259, "step": 9142 }, { "epoch": 0.16712669311057085, "grad_norm": 6.517838393509723, "learning_rate": 9.515070814435394e-06, "loss": 17.4701, "step": 9143 }, { "epoch": 0.1671449723070174, "grad_norm": 8.17111782001087, "learning_rate": 9.514943636775504e-06, "loss": 18.0557, "step": 9144 }, { "epoch": 0.1671632515034639, "grad_norm": 7.30316783584968, "learning_rate": 9.51481644329113e-06, "loss": 17.4473, "step": 9145 }, { "epoch": 0.16718153069991043, "grad_norm": 7.990399924648765, "learning_rate": 9.514689233982718e-06, "loss": 17.9287, "step": 9146 }, { "epoch": 0.16719980989635697, "grad_norm": 7.193933282375789, "learning_rate": 9.51456200885071e-06, "loss": 17.6412, "step": 9147 }, { "epoch": 0.16721808909280347, "grad_norm": 8.033713818889083, "learning_rate": 9.514434767895555e-06, "loss": 18.1817, "step": 9148 }, { "epoch": 0.16723636828925, "grad_norm": 6.876274301198719, "learning_rate": 9.514307511117697e-06, "loss": 17.4852, "step": 9149 }, { "epoch": 0.16725464748569652, "grad_norm": 9.832202016975332, "learning_rate": 9.514180238517583e-06, "loss": 18.4228, "step": 9150 }, { "epoch": 0.16727292668214305, "grad_norm": 7.205506056626777, "learning_rate": 9.514052950095659e-06, "loss": 17.7711, "step": 9151 }, { "epoch": 0.1672912058785896, "grad_norm": 7.148710541106331, "learning_rate": 9.51392564585237e-06, "loss": 17.6796, "step": 9152 }, { "epoch": 0.1673094850750361, "grad_norm": 7.348555779791997, "learning_rate": 9.513798325788167e-06, "loss": 17.7698, "step": 9153 }, { "epoch": 0.16732776427148263, "grad_norm": 7.677600866506125, "learning_rate": 9.51367098990349e-06, "loss": 17.7246, "step": 9154 }, { "epoch": 0.16734604346792914, "grad_norm": 7.2066128084205285, "learning_rate": 9.513543638198787e-06, "loss": 17.3363, "step": 9155 }, { "epoch": 0.16736432266437568, "grad_norm": 7.187256040340105, "learning_rate": 9.513416270674506e-06, "loss": 17.952, "step": 9156 }, { "epoch": 0.1673826018608222, "grad_norm": 6.73423065779139, "learning_rate": 9.513288887331093e-06, "loss": 17.8486, "step": 9157 }, { "epoch": 0.16740088105726872, "grad_norm": 7.845017050587436, "learning_rate": 9.513161488168993e-06, "loss": 18.1444, "step": 9158 }, { "epoch": 0.16741916025371525, "grad_norm": 7.398952054211818, "learning_rate": 9.513034073188656e-06, "loss": 18.2226, "step": 9159 }, { "epoch": 0.16743743945016176, "grad_norm": 6.692649336390622, "learning_rate": 9.512906642390526e-06, "loss": 17.5083, "step": 9160 }, { "epoch": 0.1674557186466083, "grad_norm": 6.974035709920217, "learning_rate": 9.512779195775048e-06, "loss": 17.8033, "step": 9161 }, { "epoch": 0.1674739978430548, "grad_norm": 6.330347140989411, "learning_rate": 9.512651733342671e-06, "loss": 17.5932, "step": 9162 }, { "epoch": 0.16749227703950134, "grad_norm": 7.61732463302869, "learning_rate": 9.51252425509384e-06, "loss": 17.8264, "step": 9163 }, { "epoch": 0.16751055623594788, "grad_norm": 7.355653977371296, "learning_rate": 9.512396761029004e-06, "loss": 17.8449, "step": 9164 }, { "epoch": 0.16752883543239439, "grad_norm": 6.141978303978035, "learning_rate": 9.51226925114861e-06, "loss": 17.3168, "step": 9165 }, { "epoch": 0.16754711462884092, "grad_norm": 6.877778001090658, "learning_rate": 9.512141725453103e-06, "loss": 17.7299, "step": 9166 }, { "epoch": 0.16756539382528743, "grad_norm": 6.62830012161467, "learning_rate": 9.512014183942932e-06, "loss": 17.5122, "step": 9167 }, { "epoch": 0.16758367302173396, "grad_norm": 7.861931039407114, "learning_rate": 9.51188662661854e-06, "loss": 18.0033, "step": 9168 }, { "epoch": 0.1676019522181805, "grad_norm": 6.158488854561656, "learning_rate": 9.51175905348038e-06, "loss": 17.3119, "step": 9169 }, { "epoch": 0.167620231414627, "grad_norm": 7.31101099269187, "learning_rate": 9.511631464528894e-06, "loss": 17.6251, "step": 9170 }, { "epoch": 0.16763851061107354, "grad_norm": 6.069969422578263, "learning_rate": 9.511503859764532e-06, "loss": 17.281, "step": 9171 }, { "epoch": 0.16765678980752005, "grad_norm": 7.98784248537527, "learning_rate": 9.511376239187741e-06, "loss": 18.3653, "step": 9172 }, { "epoch": 0.1676750690039666, "grad_norm": 7.4389387664164515, "learning_rate": 9.511248602798967e-06, "loss": 17.8475, "step": 9173 }, { "epoch": 0.16769334820041312, "grad_norm": 8.398094097487682, "learning_rate": 9.511120950598659e-06, "loss": 17.9461, "step": 9174 }, { "epoch": 0.16771162739685963, "grad_norm": 5.612258948719771, "learning_rate": 9.510993282587262e-06, "loss": 17.1776, "step": 9175 }, { "epoch": 0.16772990659330617, "grad_norm": 7.1766094368171505, "learning_rate": 9.510865598765227e-06, "loss": 17.7494, "step": 9176 }, { "epoch": 0.16774818578975267, "grad_norm": 7.2923563662027195, "learning_rate": 9.510737899132998e-06, "loss": 18.1259, "step": 9177 }, { "epoch": 0.1677664649861992, "grad_norm": 6.7949270375861195, "learning_rate": 9.510610183691023e-06, "loss": 17.5615, "step": 9178 }, { "epoch": 0.16778474418264572, "grad_norm": 8.461201564995129, "learning_rate": 9.510482452439753e-06, "loss": 18.3026, "step": 9179 }, { "epoch": 0.16780302337909225, "grad_norm": 6.799459045467971, "learning_rate": 9.510354705379632e-06, "loss": 17.7901, "step": 9180 }, { "epoch": 0.1678213025755388, "grad_norm": 8.015234311899766, "learning_rate": 9.51022694251111e-06, "loss": 17.9572, "step": 9181 }, { "epoch": 0.1678395817719853, "grad_norm": 6.784102337008483, "learning_rate": 9.510099163834633e-06, "loss": 17.7439, "step": 9182 }, { "epoch": 0.16785786096843183, "grad_norm": 7.6356079692659655, "learning_rate": 9.50997136935065e-06, "loss": 18.1289, "step": 9183 }, { "epoch": 0.16787614016487834, "grad_norm": 6.4649360917136995, "learning_rate": 9.50984355905961e-06, "loss": 17.3035, "step": 9184 }, { "epoch": 0.16789441936132488, "grad_norm": 5.971468991654149, "learning_rate": 9.509715732961959e-06, "loss": 17.2778, "step": 9185 }, { "epoch": 0.1679126985577714, "grad_norm": 7.19222144592288, "learning_rate": 9.509587891058144e-06, "loss": 17.628, "step": 9186 }, { "epoch": 0.16793097775421792, "grad_norm": 5.6168211935698436, "learning_rate": 9.509460033348617e-06, "loss": 17.0353, "step": 9187 }, { "epoch": 0.16794925695066446, "grad_norm": 7.143353577569963, "learning_rate": 9.509332159833824e-06, "loss": 17.691, "step": 9188 }, { "epoch": 0.16796753614711096, "grad_norm": 7.99527562581922, "learning_rate": 9.509204270514212e-06, "loss": 18.5328, "step": 9189 }, { "epoch": 0.1679858153435575, "grad_norm": 8.806796971586664, "learning_rate": 9.509076365390231e-06, "loss": 18.1942, "step": 9190 }, { "epoch": 0.16800409454000403, "grad_norm": 6.80142875338237, "learning_rate": 9.508948444462327e-06, "loss": 17.8174, "step": 9191 }, { "epoch": 0.16802237373645054, "grad_norm": 8.82466589624803, "learning_rate": 9.508820507730952e-06, "loss": 18.6217, "step": 9192 }, { "epoch": 0.16804065293289708, "grad_norm": 7.190350624726181, "learning_rate": 9.50869255519655e-06, "loss": 17.8272, "step": 9193 }, { "epoch": 0.16805893212934359, "grad_norm": 7.015287144182023, "learning_rate": 9.508564586859575e-06, "loss": 17.9058, "step": 9194 }, { "epoch": 0.16807721132579012, "grad_norm": 5.8777521897708525, "learning_rate": 9.50843660272047e-06, "loss": 17.3103, "step": 9195 }, { "epoch": 0.16809549052223663, "grad_norm": 7.262802111291724, "learning_rate": 9.508308602779686e-06, "loss": 18.0411, "step": 9196 }, { "epoch": 0.16811376971868316, "grad_norm": 7.969769699031251, "learning_rate": 9.508180587037673e-06, "loss": 17.8796, "step": 9197 }, { "epoch": 0.1681320489151297, "grad_norm": 8.27773983726501, "learning_rate": 9.508052555494878e-06, "loss": 18.2145, "step": 9198 }, { "epoch": 0.1681503281115762, "grad_norm": 7.160206735734677, "learning_rate": 9.50792450815175e-06, "loss": 17.6246, "step": 9199 }, { "epoch": 0.16816860730802274, "grad_norm": 8.286567525870572, "learning_rate": 9.507796445008737e-06, "loss": 17.7956, "step": 9200 }, { "epoch": 0.16818688650446925, "grad_norm": 7.4618560161589444, "learning_rate": 9.507668366066289e-06, "loss": 17.9003, "step": 9201 }, { "epoch": 0.1682051657009158, "grad_norm": 6.215729319601452, "learning_rate": 9.507540271324852e-06, "loss": 17.2985, "step": 9202 }, { "epoch": 0.16822344489736232, "grad_norm": 8.174184185896188, "learning_rate": 9.507412160784881e-06, "loss": 18.2771, "step": 9203 }, { "epoch": 0.16824172409380883, "grad_norm": 5.97025787633085, "learning_rate": 9.50728403444682e-06, "loss": 17.3012, "step": 9204 }, { "epoch": 0.16826000329025537, "grad_norm": 8.322850448430295, "learning_rate": 9.50715589231112e-06, "loss": 17.9653, "step": 9205 }, { "epoch": 0.16827828248670187, "grad_norm": 6.128258394920188, "learning_rate": 9.50702773437823e-06, "loss": 17.4, "step": 9206 }, { "epoch": 0.1682965616831484, "grad_norm": 5.603444969341588, "learning_rate": 9.5068995606486e-06, "loss": 17.2268, "step": 9207 }, { "epoch": 0.16831484087959495, "grad_norm": 6.814603802889895, "learning_rate": 9.506771371122676e-06, "loss": 17.7093, "step": 9208 }, { "epoch": 0.16833312007604145, "grad_norm": 7.321675236998762, "learning_rate": 9.50664316580091e-06, "loss": 17.8145, "step": 9209 }, { "epoch": 0.168351399272488, "grad_norm": 6.474422885000199, "learning_rate": 9.506514944683752e-06, "loss": 17.464, "step": 9210 }, { "epoch": 0.1683696784689345, "grad_norm": 7.30339512472791, "learning_rate": 9.506386707771648e-06, "loss": 17.8909, "step": 9211 }, { "epoch": 0.16838795766538103, "grad_norm": 7.456931045903403, "learning_rate": 9.50625845506505e-06, "loss": 17.8489, "step": 9212 }, { "epoch": 0.16840623686182754, "grad_norm": 7.463795746898944, "learning_rate": 9.506130186564408e-06, "loss": 18.1574, "step": 9213 }, { "epoch": 0.16842451605827408, "grad_norm": 8.117000953008189, "learning_rate": 9.50600190227017e-06, "loss": 18.2337, "step": 9214 }, { "epoch": 0.1684427952547206, "grad_norm": 7.830745084486118, "learning_rate": 9.505873602182788e-06, "loss": 18.0402, "step": 9215 }, { "epoch": 0.16846107445116712, "grad_norm": 9.237325081003904, "learning_rate": 9.505745286302707e-06, "loss": 18.5084, "step": 9216 }, { "epoch": 0.16847935364761366, "grad_norm": 7.6594654940174856, "learning_rate": 9.505616954630383e-06, "loss": 18.4119, "step": 9217 }, { "epoch": 0.16849763284406016, "grad_norm": 6.192285300112505, "learning_rate": 9.505488607166262e-06, "loss": 17.3778, "step": 9218 }, { "epoch": 0.1685159120405067, "grad_norm": 9.209633641285492, "learning_rate": 9.505360243910792e-06, "loss": 18.6063, "step": 9219 }, { "epoch": 0.16853419123695323, "grad_norm": 9.379714619614132, "learning_rate": 9.505231864864429e-06, "loss": 18.6675, "step": 9220 }, { "epoch": 0.16855247043339974, "grad_norm": 8.075719850020935, "learning_rate": 9.505103470027617e-06, "loss": 18.3677, "step": 9221 }, { "epoch": 0.16857074962984628, "grad_norm": 8.194403618110798, "learning_rate": 9.504975059400807e-06, "loss": 18.0993, "step": 9222 }, { "epoch": 0.16858902882629279, "grad_norm": 6.596985374575552, "learning_rate": 9.504846632984452e-06, "loss": 17.5941, "step": 9223 }, { "epoch": 0.16860730802273932, "grad_norm": 6.485470657566503, "learning_rate": 9.504718190779001e-06, "loss": 17.6721, "step": 9224 }, { "epoch": 0.16862558721918586, "grad_norm": 6.6423030234339295, "learning_rate": 9.504589732784903e-06, "loss": 17.5654, "step": 9225 }, { "epoch": 0.16864386641563237, "grad_norm": 6.915606847521376, "learning_rate": 9.50446125900261e-06, "loss": 17.7335, "step": 9226 }, { "epoch": 0.1686621456120789, "grad_norm": 5.773394547521735, "learning_rate": 9.50433276943257e-06, "loss": 17.2237, "step": 9227 }, { "epoch": 0.1686804248085254, "grad_norm": 7.11829780011999, "learning_rate": 9.504204264075234e-06, "loss": 17.641, "step": 9228 }, { "epoch": 0.16869870400497194, "grad_norm": 6.4137613968029505, "learning_rate": 9.504075742931052e-06, "loss": 17.5393, "step": 9229 }, { "epoch": 0.16871698320141845, "grad_norm": 6.7655899129952255, "learning_rate": 9.503947206000477e-06, "loss": 17.5254, "step": 9230 }, { "epoch": 0.168735262397865, "grad_norm": 5.51686495662697, "learning_rate": 9.503818653283959e-06, "loss": 17.1054, "step": 9231 }, { "epoch": 0.16875354159431152, "grad_norm": 7.395907813258995, "learning_rate": 9.503690084781945e-06, "loss": 18.1155, "step": 9232 }, { "epoch": 0.16877182079075803, "grad_norm": 6.682609950792527, "learning_rate": 9.503561500494889e-06, "loss": 17.6452, "step": 9233 }, { "epoch": 0.16879009998720457, "grad_norm": 6.556590064553701, "learning_rate": 9.503432900423242e-06, "loss": 17.8319, "step": 9234 }, { "epoch": 0.16880837918365107, "grad_norm": 6.682219631635365, "learning_rate": 9.503304284567451e-06, "loss": 17.5929, "step": 9235 }, { "epoch": 0.1688266583800976, "grad_norm": 8.022964534443789, "learning_rate": 9.503175652927972e-06, "loss": 17.993, "step": 9236 }, { "epoch": 0.16884493757654415, "grad_norm": 7.680467349943672, "learning_rate": 9.50304700550525e-06, "loss": 18.2086, "step": 9237 }, { "epoch": 0.16886321677299065, "grad_norm": 7.797938679905344, "learning_rate": 9.502918342299742e-06, "loss": 17.9965, "step": 9238 }, { "epoch": 0.1688814959694372, "grad_norm": 8.175512592833154, "learning_rate": 9.502789663311896e-06, "loss": 17.7192, "step": 9239 }, { "epoch": 0.1688997751658837, "grad_norm": 7.807439592723891, "learning_rate": 9.50266096854216e-06, "loss": 18.2918, "step": 9240 }, { "epoch": 0.16891805436233023, "grad_norm": 7.1402803356561355, "learning_rate": 9.502532257990991e-06, "loss": 17.7497, "step": 9241 }, { "epoch": 0.16893633355877677, "grad_norm": 7.1479553245385405, "learning_rate": 9.502403531658834e-06, "loss": 17.7492, "step": 9242 }, { "epoch": 0.16895461275522328, "grad_norm": 7.6740459373006615, "learning_rate": 9.502274789546147e-06, "loss": 18.0036, "step": 9243 }, { "epoch": 0.1689728919516698, "grad_norm": 7.04948573680895, "learning_rate": 9.502146031653375e-06, "loss": 17.817, "step": 9244 }, { "epoch": 0.16899117114811632, "grad_norm": 7.472060819134237, "learning_rate": 9.502017257980972e-06, "loss": 17.7266, "step": 9245 }, { "epoch": 0.16900945034456286, "grad_norm": 11.345342492889468, "learning_rate": 9.50188846852939e-06, "loss": 18.4252, "step": 9246 }, { "epoch": 0.16902772954100936, "grad_norm": 6.324292922016057, "learning_rate": 9.501759663299079e-06, "loss": 17.4231, "step": 9247 }, { "epoch": 0.1690460087374559, "grad_norm": 7.947575167075575, "learning_rate": 9.501630842290492e-06, "loss": 18.4636, "step": 9248 }, { "epoch": 0.16906428793390244, "grad_norm": 7.064056122783764, "learning_rate": 9.501502005504079e-06, "loss": 17.7031, "step": 9249 }, { "epoch": 0.16908256713034894, "grad_norm": 6.607574761603722, "learning_rate": 9.501373152940292e-06, "loss": 17.5506, "step": 9250 }, { "epoch": 0.16910084632679548, "grad_norm": 7.47745336555634, "learning_rate": 9.501244284599582e-06, "loss": 18.0999, "step": 9251 }, { "epoch": 0.169119125523242, "grad_norm": 6.442336287804173, "learning_rate": 9.501115400482401e-06, "loss": 17.6483, "step": 9252 }, { "epoch": 0.16913740471968852, "grad_norm": 6.689660504446449, "learning_rate": 9.500986500589204e-06, "loss": 17.7348, "step": 9253 }, { "epoch": 0.16915568391613506, "grad_norm": 7.0556173662591055, "learning_rate": 9.500857584920438e-06, "loss": 17.8223, "step": 9254 }, { "epoch": 0.16917396311258157, "grad_norm": 6.435332083102732, "learning_rate": 9.500728653476556e-06, "loss": 17.435, "step": 9255 }, { "epoch": 0.1691922423090281, "grad_norm": 8.178954829322633, "learning_rate": 9.500599706258012e-06, "loss": 18.1133, "step": 9256 }, { "epoch": 0.1692105215054746, "grad_norm": 7.486000079432466, "learning_rate": 9.500470743265256e-06, "loss": 17.9521, "step": 9257 }, { "epoch": 0.16922880070192114, "grad_norm": 7.414235605822304, "learning_rate": 9.50034176449874e-06, "loss": 17.9106, "step": 9258 }, { "epoch": 0.16924707989836768, "grad_norm": 7.29957041738579, "learning_rate": 9.500212769958916e-06, "loss": 17.9123, "step": 9259 }, { "epoch": 0.1692653590948142, "grad_norm": 6.419324021770509, "learning_rate": 9.50008375964624e-06, "loss": 17.5822, "step": 9260 }, { "epoch": 0.16928363829126072, "grad_norm": 7.183503307851751, "learning_rate": 9.499954733561159e-06, "loss": 17.827, "step": 9261 }, { "epoch": 0.16930191748770723, "grad_norm": 5.9867549399642535, "learning_rate": 9.499825691704125e-06, "loss": 17.3328, "step": 9262 }, { "epoch": 0.16932019668415377, "grad_norm": 6.538978287646619, "learning_rate": 9.499696634075595e-06, "loss": 17.1429, "step": 9263 }, { "epoch": 0.16933847588060028, "grad_norm": 6.492351933050396, "learning_rate": 9.499567560676018e-06, "loss": 17.5489, "step": 9264 }, { "epoch": 0.1693567550770468, "grad_norm": 7.239262810517472, "learning_rate": 9.499438471505848e-06, "loss": 17.7833, "step": 9265 }, { "epoch": 0.16937503427349335, "grad_norm": 6.616435648759427, "learning_rate": 9.499309366565536e-06, "loss": 17.4924, "step": 9266 }, { "epoch": 0.16939331346993985, "grad_norm": 6.554446689823281, "learning_rate": 9.499180245855535e-06, "loss": 17.3935, "step": 9267 }, { "epoch": 0.1694115926663864, "grad_norm": 6.5185668445542655, "learning_rate": 9.499051109376298e-06, "loss": 17.4669, "step": 9268 }, { "epoch": 0.1694298718628329, "grad_norm": 7.007126029712472, "learning_rate": 9.498921957128278e-06, "loss": 17.9778, "step": 9269 }, { "epoch": 0.16944815105927943, "grad_norm": 8.031631202231285, "learning_rate": 9.498792789111924e-06, "loss": 18.2722, "step": 9270 }, { "epoch": 0.16946643025572597, "grad_norm": 6.224877837271482, "learning_rate": 9.498663605327694e-06, "loss": 17.4046, "step": 9271 }, { "epoch": 0.16948470945217248, "grad_norm": 7.110763781889539, "learning_rate": 9.498534405776038e-06, "loss": 17.8919, "step": 9272 }, { "epoch": 0.169502988648619, "grad_norm": 7.6365904652147885, "learning_rate": 9.498405190457409e-06, "loss": 17.9798, "step": 9273 }, { "epoch": 0.16952126784506552, "grad_norm": 8.219589460169788, "learning_rate": 9.49827595937226e-06, "loss": 18.0456, "step": 9274 }, { "epoch": 0.16953954704151206, "grad_norm": 7.332579281884612, "learning_rate": 9.498146712521046e-06, "loss": 17.9169, "step": 9275 }, { "epoch": 0.1695578262379586, "grad_norm": 6.298096144118503, "learning_rate": 9.498017449904216e-06, "loss": 17.4031, "step": 9276 }, { "epoch": 0.1695761054344051, "grad_norm": 7.552642894361333, "learning_rate": 9.497888171522227e-06, "loss": 17.9247, "step": 9277 }, { "epoch": 0.16959438463085164, "grad_norm": 6.453234743792428, "learning_rate": 9.497758877375528e-06, "loss": 17.3823, "step": 9278 }, { "epoch": 0.16961266382729814, "grad_norm": 8.067008263297412, "learning_rate": 9.497629567464576e-06, "loss": 17.763, "step": 9279 }, { "epoch": 0.16963094302374468, "grad_norm": 8.0629395551368, "learning_rate": 9.497500241789822e-06, "loss": 17.9484, "step": 9280 }, { "epoch": 0.1696492222201912, "grad_norm": 7.35078081421746, "learning_rate": 9.49737090035172e-06, "loss": 17.988, "step": 9281 }, { "epoch": 0.16966750141663772, "grad_norm": 7.718604409984683, "learning_rate": 9.497241543150724e-06, "loss": 17.883, "step": 9282 }, { "epoch": 0.16968578061308426, "grad_norm": 6.59497357682199, "learning_rate": 9.497112170187287e-06, "loss": 17.394, "step": 9283 }, { "epoch": 0.16970405980953077, "grad_norm": 7.051795670369403, "learning_rate": 9.49698278146186e-06, "loss": 17.551, "step": 9284 }, { "epoch": 0.1697223390059773, "grad_norm": 7.299070725776896, "learning_rate": 9.4968533769749e-06, "loss": 17.7815, "step": 9285 }, { "epoch": 0.1697406182024238, "grad_norm": 7.968673991617567, "learning_rate": 9.49672395672686e-06, "loss": 17.9397, "step": 9286 }, { "epoch": 0.16975889739887035, "grad_norm": 7.478284411752328, "learning_rate": 9.49659452071819e-06, "loss": 18.2165, "step": 9287 }, { "epoch": 0.16977717659531688, "grad_norm": 6.5996511026123, "learning_rate": 9.496465068949348e-06, "loss": 17.4892, "step": 9288 }, { "epoch": 0.1697954557917634, "grad_norm": 6.682420819376153, "learning_rate": 9.496335601420786e-06, "loss": 17.5018, "step": 9289 }, { "epoch": 0.16981373498820992, "grad_norm": 5.949975487642374, "learning_rate": 9.496206118132958e-06, "loss": 17.3644, "step": 9290 }, { "epoch": 0.16983201418465643, "grad_norm": 6.953868735342993, "learning_rate": 9.496076619086318e-06, "loss": 17.7816, "step": 9291 }, { "epoch": 0.16985029338110297, "grad_norm": 7.628905059730847, "learning_rate": 9.495947104281318e-06, "loss": 17.9063, "step": 9292 }, { "epoch": 0.1698685725775495, "grad_norm": 6.472233724623038, "learning_rate": 9.495817573718415e-06, "loss": 17.8451, "step": 9293 }, { "epoch": 0.169886851773996, "grad_norm": 6.881108318120469, "learning_rate": 9.495688027398062e-06, "loss": 17.6711, "step": 9294 }, { "epoch": 0.16990513097044255, "grad_norm": 7.015050799593046, "learning_rate": 9.495558465320712e-06, "loss": 17.7221, "step": 9295 }, { "epoch": 0.16992341016688906, "grad_norm": 6.518346540265528, "learning_rate": 9.49542888748682e-06, "loss": 17.5115, "step": 9296 }, { "epoch": 0.1699416893633356, "grad_norm": 6.619956820599698, "learning_rate": 9.495299293896839e-06, "loss": 17.5084, "step": 9297 }, { "epoch": 0.1699599685597821, "grad_norm": 7.572798054728141, "learning_rate": 9.495169684551224e-06, "loss": 17.9554, "step": 9298 }, { "epoch": 0.16997824775622863, "grad_norm": 6.131299772610638, "learning_rate": 9.495040059450431e-06, "loss": 17.3223, "step": 9299 }, { "epoch": 0.16999652695267517, "grad_norm": 8.178945579484868, "learning_rate": 9.494910418594911e-06, "loss": 17.4849, "step": 9300 }, { "epoch": 0.17001480614912168, "grad_norm": 7.652603534647454, "learning_rate": 9.494780761985121e-06, "loss": 17.8424, "step": 9301 }, { "epoch": 0.1700330853455682, "grad_norm": 5.783751209083505, "learning_rate": 9.494651089621514e-06, "loss": 17.2083, "step": 9302 }, { "epoch": 0.17005136454201472, "grad_norm": 7.664250666867046, "learning_rate": 9.494521401504543e-06, "loss": 17.8247, "step": 9303 }, { "epoch": 0.17006964373846126, "grad_norm": 5.82619101418688, "learning_rate": 9.494391697634669e-06, "loss": 17.4791, "step": 9304 }, { "epoch": 0.1700879229349078, "grad_norm": 7.838781534300981, "learning_rate": 9.494261978012339e-06, "loss": 18.1943, "step": 9305 }, { "epoch": 0.1701062021313543, "grad_norm": 5.987071466722858, "learning_rate": 9.49413224263801e-06, "loss": 17.2892, "step": 9306 }, { "epoch": 0.17012448132780084, "grad_norm": 8.175921098814772, "learning_rate": 9.49400249151214e-06, "loss": 18.1773, "step": 9307 }, { "epoch": 0.17014276052424734, "grad_norm": 8.073834735977083, "learning_rate": 9.493872724635181e-06, "loss": 18.4408, "step": 9308 }, { "epoch": 0.17016103972069388, "grad_norm": 7.009521229929869, "learning_rate": 9.493742942007586e-06, "loss": 17.8554, "step": 9309 }, { "epoch": 0.17017931891714042, "grad_norm": 7.246449100658442, "learning_rate": 9.493613143629812e-06, "loss": 17.4506, "step": 9310 }, { "epoch": 0.17019759811358692, "grad_norm": 6.370295476618179, "learning_rate": 9.493483329502315e-06, "loss": 17.4496, "step": 9311 }, { "epoch": 0.17021587731003346, "grad_norm": 6.840840264164594, "learning_rate": 9.49335349962555e-06, "loss": 17.5516, "step": 9312 }, { "epoch": 0.17023415650647997, "grad_norm": 6.1046331762652155, "learning_rate": 9.493223653999968e-06, "loss": 17.2566, "step": 9313 }, { "epoch": 0.1702524357029265, "grad_norm": 7.016368323732165, "learning_rate": 9.493093792626029e-06, "loss": 18.0346, "step": 9314 }, { "epoch": 0.170270714899373, "grad_norm": 6.76175892001121, "learning_rate": 9.492963915504188e-06, "loss": 17.7956, "step": 9315 }, { "epoch": 0.17028899409581955, "grad_norm": 6.531249376786396, "learning_rate": 9.492834022634895e-06, "loss": 17.4568, "step": 9316 }, { "epoch": 0.17030727329226608, "grad_norm": 7.5987658416392225, "learning_rate": 9.49270411401861e-06, "loss": 17.7604, "step": 9317 }, { "epoch": 0.1703255524887126, "grad_norm": 7.431519081490518, "learning_rate": 9.492574189655786e-06, "loss": 17.8669, "step": 9318 }, { "epoch": 0.17034383168515912, "grad_norm": 7.958235479289281, "learning_rate": 9.49244424954688e-06, "loss": 18.2507, "step": 9319 }, { "epoch": 0.17036211088160563, "grad_norm": 6.986082779483737, "learning_rate": 9.492314293692348e-06, "loss": 17.9994, "step": 9320 }, { "epoch": 0.17038039007805217, "grad_norm": 8.672267116506786, "learning_rate": 9.492184322092642e-06, "loss": 18.2985, "step": 9321 }, { "epoch": 0.1703986692744987, "grad_norm": 8.225867360448504, "learning_rate": 9.492054334748221e-06, "loss": 18.4201, "step": 9322 }, { "epoch": 0.1704169484709452, "grad_norm": 7.556418871422229, "learning_rate": 9.491924331659539e-06, "loss": 18.1668, "step": 9323 }, { "epoch": 0.17043522766739175, "grad_norm": 7.06440955032604, "learning_rate": 9.491794312827051e-06, "loss": 17.9922, "step": 9324 }, { "epoch": 0.17045350686383826, "grad_norm": 6.717742517738556, "learning_rate": 9.491664278251215e-06, "loss": 17.4833, "step": 9325 }, { "epoch": 0.1704717860602848, "grad_norm": 12.546466403581084, "learning_rate": 9.491534227932487e-06, "loss": 18.7726, "step": 9326 }, { "epoch": 0.17049006525673133, "grad_norm": 8.062593558854038, "learning_rate": 9.49140416187132e-06, "loss": 18.2224, "step": 9327 }, { "epoch": 0.17050834445317783, "grad_norm": 7.0581222055222685, "learning_rate": 9.49127408006817e-06, "loss": 17.5139, "step": 9328 }, { "epoch": 0.17052662364962437, "grad_norm": 7.099344618501691, "learning_rate": 9.491143982523494e-06, "loss": 17.9362, "step": 9329 }, { "epoch": 0.17054490284607088, "grad_norm": 7.402714202305478, "learning_rate": 9.49101386923775e-06, "loss": 18.0988, "step": 9330 }, { "epoch": 0.17056318204251741, "grad_norm": 7.7416532297368645, "learning_rate": 9.490883740211392e-06, "loss": 18.1308, "step": 9331 }, { "epoch": 0.17058146123896392, "grad_norm": 5.993456403412218, "learning_rate": 9.490753595444875e-06, "loss": 17.3263, "step": 9332 }, { "epoch": 0.17059974043541046, "grad_norm": 7.382773762165547, "learning_rate": 9.490623434938655e-06, "loss": 18.0158, "step": 9333 }, { "epoch": 0.170618019631857, "grad_norm": 6.90202197645296, "learning_rate": 9.490493258693192e-06, "loss": 17.852, "step": 9334 }, { "epoch": 0.1706362988283035, "grad_norm": 6.578185739755169, "learning_rate": 9.490363066708939e-06, "loss": 17.4495, "step": 9335 }, { "epoch": 0.17065457802475004, "grad_norm": 5.573591882246648, "learning_rate": 9.490232858986353e-06, "loss": 16.9851, "step": 9336 }, { "epoch": 0.17067285722119654, "grad_norm": 5.830884237617553, "learning_rate": 9.490102635525891e-06, "loss": 17.0644, "step": 9337 }, { "epoch": 0.17069113641764308, "grad_norm": 7.101055498888201, "learning_rate": 9.489972396328008e-06, "loss": 17.8703, "step": 9338 }, { "epoch": 0.17070941561408962, "grad_norm": 6.8893580123268325, "learning_rate": 9.489842141393162e-06, "loss": 17.3605, "step": 9339 }, { "epoch": 0.17072769481053612, "grad_norm": 7.00415508406558, "learning_rate": 9.489711870721808e-06, "loss": 17.6772, "step": 9340 }, { "epoch": 0.17074597400698266, "grad_norm": 6.687536240515922, "learning_rate": 9.489581584314404e-06, "loss": 17.6577, "step": 9341 }, { "epoch": 0.17076425320342917, "grad_norm": 6.716421562949809, "learning_rate": 9.489451282171407e-06, "loss": 17.8492, "step": 9342 }, { "epoch": 0.1707825323998757, "grad_norm": 7.025368007532384, "learning_rate": 9.489320964293272e-06, "loss": 17.8394, "step": 9343 }, { "epoch": 0.17080081159632224, "grad_norm": 8.1347781081007, "learning_rate": 9.489190630680456e-06, "loss": 18.1092, "step": 9344 }, { "epoch": 0.17081909079276875, "grad_norm": 8.161686929233428, "learning_rate": 9.489060281333417e-06, "loss": 18.0617, "step": 9345 }, { "epoch": 0.17083736998921528, "grad_norm": 7.743324549428767, "learning_rate": 9.488929916252611e-06, "loss": 18.2099, "step": 9346 }, { "epoch": 0.1708556491856618, "grad_norm": 7.457913709310798, "learning_rate": 9.488799535438496e-06, "loss": 17.9657, "step": 9347 }, { "epoch": 0.17087392838210833, "grad_norm": 6.4565171819898275, "learning_rate": 9.48866913889153e-06, "loss": 17.4339, "step": 9348 }, { "epoch": 0.17089220757855483, "grad_norm": 5.7198681828309175, "learning_rate": 9.488538726612165e-06, "loss": 17.1374, "step": 9349 }, { "epoch": 0.17091048677500137, "grad_norm": 5.829504990701503, "learning_rate": 9.488408298600861e-06, "loss": 17.1341, "step": 9350 }, { "epoch": 0.1709287659714479, "grad_norm": 8.547314581987964, "learning_rate": 9.488277854858077e-06, "loss": 18.4687, "step": 9351 }, { "epoch": 0.1709470451678944, "grad_norm": 6.427092565488006, "learning_rate": 9.488147395384267e-06, "loss": 17.4307, "step": 9352 }, { "epoch": 0.17096532436434095, "grad_norm": 5.945035141440985, "learning_rate": 9.488016920179892e-06, "loss": 17.3094, "step": 9353 }, { "epoch": 0.17098360356078746, "grad_norm": 5.212870788799658, "learning_rate": 9.487886429245406e-06, "loss": 16.9609, "step": 9354 }, { "epoch": 0.171001882757234, "grad_norm": 7.182918513019496, "learning_rate": 9.487755922581267e-06, "loss": 17.9397, "step": 9355 }, { "epoch": 0.17102016195368053, "grad_norm": 8.180485431431427, "learning_rate": 9.487625400187935e-06, "loss": 18.2699, "step": 9356 }, { "epoch": 0.17103844115012704, "grad_norm": 6.908136535638802, "learning_rate": 9.487494862065863e-06, "loss": 17.6333, "step": 9357 }, { "epoch": 0.17105672034657357, "grad_norm": 6.582988597315935, "learning_rate": 9.487364308215513e-06, "loss": 17.5628, "step": 9358 }, { "epoch": 0.17107499954302008, "grad_norm": 7.458926835537541, "learning_rate": 9.487233738637338e-06, "loss": 17.9193, "step": 9359 }, { "epoch": 0.17109327873946661, "grad_norm": 7.8258914490505065, "learning_rate": 9.487103153331799e-06, "loss": 17.9659, "step": 9360 }, { "epoch": 0.17111155793591315, "grad_norm": 7.791564347644736, "learning_rate": 9.486972552299354e-06, "loss": 18.1149, "step": 9361 }, { "epoch": 0.17112983713235966, "grad_norm": 7.872110184686366, "learning_rate": 9.486841935540458e-06, "loss": 17.9103, "step": 9362 }, { "epoch": 0.1711481163288062, "grad_norm": 7.707540377198523, "learning_rate": 9.486711303055571e-06, "loss": 18.2933, "step": 9363 }, { "epoch": 0.1711663955252527, "grad_norm": 7.144271057657575, "learning_rate": 9.486580654845151e-06, "loss": 17.9076, "step": 9364 }, { "epoch": 0.17118467472169924, "grad_norm": 5.582895005836393, "learning_rate": 9.486449990909654e-06, "loss": 17.0059, "step": 9365 }, { "epoch": 0.17120295391814574, "grad_norm": 5.283504181308812, "learning_rate": 9.48631931124954e-06, "loss": 16.9713, "step": 9366 }, { "epoch": 0.17122123311459228, "grad_norm": 7.036891960760273, "learning_rate": 9.486188615865267e-06, "loss": 17.764, "step": 9367 }, { "epoch": 0.17123951231103882, "grad_norm": 7.0244422250085155, "learning_rate": 9.48605790475729e-06, "loss": 17.9067, "step": 9368 }, { "epoch": 0.17125779150748532, "grad_norm": 6.402158325323122, "learning_rate": 9.485927177926071e-06, "loss": 17.339, "step": 9369 }, { "epoch": 0.17127607070393186, "grad_norm": 6.943192297422052, "learning_rate": 9.485796435372066e-06, "loss": 17.7603, "step": 9370 }, { "epoch": 0.17129434990037837, "grad_norm": 6.398925418463297, "learning_rate": 9.485665677095733e-06, "loss": 17.2458, "step": 9371 }, { "epoch": 0.1713126290968249, "grad_norm": 6.856151315543031, "learning_rate": 9.48553490309753e-06, "loss": 17.853, "step": 9372 }, { "epoch": 0.17133090829327144, "grad_norm": 7.3519575505114565, "learning_rate": 9.48540411337792e-06, "loss": 17.97, "step": 9373 }, { "epoch": 0.17134918748971795, "grad_norm": 6.2059150149780615, "learning_rate": 9.485273307937354e-06, "loss": 17.2831, "step": 9374 }, { "epoch": 0.17136746668616448, "grad_norm": 5.751463733804113, "learning_rate": 9.485142486776297e-06, "loss": 17.1818, "step": 9375 }, { "epoch": 0.171385745882611, "grad_norm": 7.047700854948244, "learning_rate": 9.485011649895204e-06, "loss": 17.7665, "step": 9376 }, { "epoch": 0.17140402507905753, "grad_norm": 8.475999209283327, "learning_rate": 9.484880797294534e-06, "loss": 18.6278, "step": 9377 }, { "epoch": 0.17142230427550406, "grad_norm": 6.271246333842615, "learning_rate": 9.484749928974745e-06, "loss": 17.466, "step": 9378 }, { "epoch": 0.17144058347195057, "grad_norm": 7.504721163883052, "learning_rate": 9.4846190449363e-06, "loss": 17.8629, "step": 9379 }, { "epoch": 0.1714588626683971, "grad_norm": 6.753137748892236, "learning_rate": 9.48448814517965e-06, "loss": 17.3884, "step": 9380 }, { "epoch": 0.1714771418648436, "grad_norm": 6.683449785929558, "learning_rate": 9.484357229705262e-06, "loss": 17.3045, "step": 9381 }, { "epoch": 0.17149542106129015, "grad_norm": 7.038968690947826, "learning_rate": 9.48422629851359e-06, "loss": 17.6161, "step": 9382 }, { "epoch": 0.17151370025773666, "grad_norm": 8.246775361551709, "learning_rate": 9.484095351605093e-06, "loss": 18.5538, "step": 9383 }, { "epoch": 0.1715319794541832, "grad_norm": 6.04432556618661, "learning_rate": 9.48396438898023e-06, "loss": 17.5251, "step": 9384 }, { "epoch": 0.17155025865062973, "grad_norm": 6.775248989592338, "learning_rate": 9.483833410639465e-06, "loss": 17.7649, "step": 9385 }, { "epoch": 0.17156853784707624, "grad_norm": 5.488825339415775, "learning_rate": 9.483702416583249e-06, "loss": 17.0887, "step": 9386 }, { "epoch": 0.17158681704352277, "grad_norm": 6.663263419093369, "learning_rate": 9.483571406812046e-06, "loss": 17.3529, "step": 9387 }, { "epoch": 0.17160509623996928, "grad_norm": 6.451288086920941, "learning_rate": 9.483440381326316e-06, "loss": 17.446, "step": 9388 }, { "epoch": 0.17162337543641581, "grad_norm": 9.190972193048253, "learning_rate": 9.483309340126514e-06, "loss": 18.0486, "step": 9389 }, { "epoch": 0.17164165463286235, "grad_norm": 6.5989987104060726, "learning_rate": 9.483178283213104e-06, "loss": 17.6012, "step": 9390 }, { "epoch": 0.17165993382930886, "grad_norm": 6.328149633870934, "learning_rate": 9.483047210586542e-06, "loss": 17.3198, "step": 9391 }, { "epoch": 0.1716782130257554, "grad_norm": 7.2569753611456065, "learning_rate": 9.482916122247289e-06, "loss": 17.9867, "step": 9392 }, { "epoch": 0.1716964922222019, "grad_norm": 6.898388072866421, "learning_rate": 9.482785018195803e-06, "loss": 17.5655, "step": 9393 }, { "epoch": 0.17171477141864844, "grad_norm": 8.01711657514621, "learning_rate": 9.482653898432546e-06, "loss": 18.3204, "step": 9394 }, { "epoch": 0.17173305061509497, "grad_norm": 6.975439275024527, "learning_rate": 9.482522762957976e-06, "loss": 18.1094, "step": 9395 }, { "epoch": 0.17175132981154148, "grad_norm": 5.868700240209868, "learning_rate": 9.48239161177255e-06, "loss": 16.9796, "step": 9396 }, { "epoch": 0.17176960900798802, "grad_norm": 7.08908481341133, "learning_rate": 9.482260444876733e-06, "loss": 17.6141, "step": 9397 }, { "epoch": 0.17178788820443452, "grad_norm": 7.055610246006772, "learning_rate": 9.482129262270982e-06, "loss": 17.5318, "step": 9398 }, { "epoch": 0.17180616740088106, "grad_norm": 6.116674772768493, "learning_rate": 9.481998063955756e-06, "loss": 17.1327, "step": 9399 }, { "epoch": 0.17182444659732757, "grad_norm": 8.588323819381399, "learning_rate": 9.481866849931516e-06, "loss": 18.4417, "step": 9400 }, { "epoch": 0.1718427257937741, "grad_norm": 7.51140039685407, "learning_rate": 9.481735620198722e-06, "loss": 17.8579, "step": 9401 }, { "epoch": 0.17186100499022064, "grad_norm": 8.035682754298808, "learning_rate": 9.481604374757834e-06, "loss": 17.7268, "step": 9402 }, { "epoch": 0.17187928418666715, "grad_norm": 7.592674702990398, "learning_rate": 9.481473113609309e-06, "loss": 18.0118, "step": 9403 }, { "epoch": 0.17189756338311368, "grad_norm": 6.667093276838761, "learning_rate": 9.481341836753612e-06, "loss": 17.4662, "step": 9404 }, { "epoch": 0.1719158425795602, "grad_norm": 7.810607329528676, "learning_rate": 9.4812105441912e-06, "loss": 17.719, "step": 9405 }, { "epoch": 0.17193412177600673, "grad_norm": 7.194972914942562, "learning_rate": 9.481079235922534e-06, "loss": 17.5976, "step": 9406 }, { "epoch": 0.17195240097245326, "grad_norm": 7.9834727347623895, "learning_rate": 9.480947911948075e-06, "loss": 18.1629, "step": 9407 }, { "epoch": 0.17197068016889977, "grad_norm": 7.18606951996447, "learning_rate": 9.480816572268281e-06, "loss": 17.5883, "step": 9408 }, { "epoch": 0.1719889593653463, "grad_norm": 6.486718640880905, "learning_rate": 9.480685216883614e-06, "loss": 17.5277, "step": 9409 }, { "epoch": 0.1720072385617928, "grad_norm": 9.50867175041148, "learning_rate": 9.480553845794534e-06, "loss": 18.0472, "step": 9410 }, { "epoch": 0.17202551775823935, "grad_norm": 6.833479189201265, "learning_rate": 9.480422459001503e-06, "loss": 17.6381, "step": 9411 }, { "epoch": 0.17204379695468588, "grad_norm": 6.277692317050017, "learning_rate": 9.480291056504978e-06, "loss": 17.2588, "step": 9412 }, { "epoch": 0.1720620761511324, "grad_norm": 5.7551353372689995, "learning_rate": 9.480159638305424e-06, "loss": 17.2131, "step": 9413 }, { "epoch": 0.17208035534757893, "grad_norm": 8.692658198171147, "learning_rate": 9.480028204403298e-06, "loss": 17.8449, "step": 9414 }, { "epoch": 0.17209863454402544, "grad_norm": 7.435976881499505, "learning_rate": 9.47989675479906e-06, "loss": 17.9663, "step": 9415 }, { "epoch": 0.17211691374047197, "grad_norm": 8.665769778151954, "learning_rate": 9.479765289493176e-06, "loss": 18.0989, "step": 9416 }, { "epoch": 0.17213519293691848, "grad_norm": 7.548053517846902, "learning_rate": 9.479633808486103e-06, "loss": 17.9994, "step": 9417 }, { "epoch": 0.17215347213336502, "grad_norm": 8.24601842526659, "learning_rate": 9.4795023117783e-06, "loss": 17.7702, "step": 9418 }, { "epoch": 0.17217175132981155, "grad_norm": 7.3080893508518265, "learning_rate": 9.479370799370231e-06, "loss": 17.6346, "step": 9419 }, { "epoch": 0.17219003052625806, "grad_norm": 6.95859428350624, "learning_rate": 9.479239271262356e-06, "loss": 17.7272, "step": 9420 }, { "epoch": 0.1722083097227046, "grad_norm": 5.658747751909899, "learning_rate": 9.479107727455137e-06, "loss": 17.1958, "step": 9421 }, { "epoch": 0.1722265889191511, "grad_norm": 9.264521686672701, "learning_rate": 9.478976167949032e-06, "loss": 18.6736, "step": 9422 }, { "epoch": 0.17224486811559764, "grad_norm": 10.464266390240628, "learning_rate": 9.478844592744507e-06, "loss": 18.6533, "step": 9423 }, { "epoch": 0.17226314731204417, "grad_norm": 6.793778486617364, "learning_rate": 9.478713001842019e-06, "loss": 17.5827, "step": 9424 }, { "epoch": 0.17228142650849068, "grad_norm": 7.856395977325854, "learning_rate": 9.47858139524203e-06, "loss": 18.2891, "step": 9425 }, { "epoch": 0.17229970570493722, "grad_norm": 6.938032387697228, "learning_rate": 9.478449772945003e-06, "loss": 17.7025, "step": 9426 }, { "epoch": 0.17231798490138373, "grad_norm": 6.694615648790434, "learning_rate": 9.478318134951396e-06, "loss": 17.7233, "step": 9427 }, { "epoch": 0.17233626409783026, "grad_norm": 7.934515973309323, "learning_rate": 9.478186481261674e-06, "loss": 18.6778, "step": 9428 }, { "epoch": 0.1723545432942768, "grad_norm": 7.6012840022268655, "learning_rate": 9.478054811876298e-06, "loss": 17.8783, "step": 9429 }, { "epoch": 0.1723728224907233, "grad_norm": 7.09475782007339, "learning_rate": 9.477923126795727e-06, "loss": 17.8057, "step": 9430 }, { "epoch": 0.17239110168716984, "grad_norm": 6.954595742628623, "learning_rate": 9.477791426020425e-06, "loss": 17.8825, "step": 9431 }, { "epoch": 0.17240938088361635, "grad_norm": 6.6808165057065, "learning_rate": 9.477659709550852e-06, "loss": 17.6323, "step": 9432 }, { "epoch": 0.17242766008006288, "grad_norm": 9.677616218480246, "learning_rate": 9.477527977387471e-06, "loss": 18.3513, "step": 9433 }, { "epoch": 0.1724459392765094, "grad_norm": 6.898574913690863, "learning_rate": 9.477396229530742e-06, "loss": 17.7003, "step": 9434 }, { "epoch": 0.17246421847295593, "grad_norm": 6.22690980824051, "learning_rate": 9.477264465981128e-06, "loss": 17.5063, "step": 9435 }, { "epoch": 0.17248249766940246, "grad_norm": 6.551990444920954, "learning_rate": 9.477132686739091e-06, "loss": 17.5723, "step": 9436 }, { "epoch": 0.17250077686584897, "grad_norm": 8.222108731581205, "learning_rate": 9.477000891805092e-06, "loss": 18.1106, "step": 9437 }, { "epoch": 0.1725190560622955, "grad_norm": 7.472141826139331, "learning_rate": 9.476869081179595e-06, "loss": 17.926, "step": 9438 }, { "epoch": 0.17253733525874201, "grad_norm": 5.729838615705796, "learning_rate": 9.476737254863057e-06, "loss": 17.2853, "step": 9439 }, { "epoch": 0.17255561445518855, "grad_norm": 6.169816445120499, "learning_rate": 9.476605412855946e-06, "loss": 17.3952, "step": 9440 }, { "epoch": 0.17257389365163509, "grad_norm": 5.714087489923032, "learning_rate": 9.47647355515872e-06, "loss": 17.0376, "step": 9441 }, { "epoch": 0.1725921728480816, "grad_norm": 6.319413544978291, "learning_rate": 9.476341681771844e-06, "loss": 17.6697, "step": 9442 }, { "epoch": 0.17261045204452813, "grad_norm": 6.365535308271305, "learning_rate": 9.476209792695779e-06, "loss": 17.5086, "step": 9443 }, { "epoch": 0.17262873124097464, "grad_norm": 9.358700783372539, "learning_rate": 9.476077887930985e-06, "loss": 18.0325, "step": 9444 }, { "epoch": 0.17264701043742117, "grad_norm": 5.875412377637854, "learning_rate": 9.475945967477929e-06, "loss": 17.2611, "step": 9445 }, { "epoch": 0.1726652896338677, "grad_norm": 6.57373215116459, "learning_rate": 9.47581403133707e-06, "loss": 17.5664, "step": 9446 }, { "epoch": 0.17268356883031422, "grad_norm": 6.9318182688007655, "learning_rate": 9.47568207950887e-06, "loss": 17.4997, "step": 9447 }, { "epoch": 0.17270184802676075, "grad_norm": 8.41243281063665, "learning_rate": 9.475550111993794e-06, "loss": 17.8644, "step": 9448 }, { "epoch": 0.17272012722320726, "grad_norm": 7.471280300668058, "learning_rate": 9.475418128792302e-06, "loss": 17.8876, "step": 9449 }, { "epoch": 0.1727384064196538, "grad_norm": 6.601480469516368, "learning_rate": 9.47528612990486e-06, "loss": 17.4701, "step": 9450 }, { "epoch": 0.1727566856161003, "grad_norm": 6.810351201802684, "learning_rate": 9.475154115331926e-06, "loss": 17.5172, "step": 9451 }, { "epoch": 0.17277496481254684, "grad_norm": 7.398992952527537, "learning_rate": 9.475022085073967e-06, "loss": 17.4504, "step": 9452 }, { "epoch": 0.17279324400899337, "grad_norm": 6.6966327893105415, "learning_rate": 9.474890039131442e-06, "loss": 17.4509, "step": 9453 }, { "epoch": 0.17281152320543988, "grad_norm": 6.158766465461097, "learning_rate": 9.474757977504817e-06, "loss": 17.4434, "step": 9454 }, { "epoch": 0.17282980240188642, "grad_norm": 5.923134645722096, "learning_rate": 9.474625900194554e-06, "loss": 16.9824, "step": 9455 }, { "epoch": 0.17284808159833293, "grad_norm": 8.076136920082657, "learning_rate": 9.474493807201114e-06, "loss": 17.8831, "step": 9456 }, { "epoch": 0.17286636079477946, "grad_norm": 6.456655976023691, "learning_rate": 9.474361698524963e-06, "loss": 17.6123, "step": 9457 }, { "epoch": 0.172884639991226, "grad_norm": 7.792694797980782, "learning_rate": 9.474229574166563e-06, "loss": 17.6647, "step": 9458 }, { "epoch": 0.1729029191876725, "grad_norm": 7.571500525823527, "learning_rate": 9.474097434126374e-06, "loss": 18.2985, "step": 9459 }, { "epoch": 0.17292119838411904, "grad_norm": 7.8823129226813275, "learning_rate": 9.473965278404865e-06, "loss": 17.6927, "step": 9460 }, { "epoch": 0.17293947758056555, "grad_norm": 6.901785326814127, "learning_rate": 9.473833107002492e-06, "loss": 17.8228, "step": 9461 }, { "epoch": 0.17295775677701208, "grad_norm": 9.092246095613532, "learning_rate": 9.473700919919725e-06, "loss": 18.1414, "step": 9462 }, { "epoch": 0.17297603597345862, "grad_norm": 8.048377244864216, "learning_rate": 9.473568717157024e-06, "loss": 18.1686, "step": 9463 }, { "epoch": 0.17299431516990513, "grad_norm": 6.299790719426887, "learning_rate": 9.473436498714852e-06, "loss": 17.4108, "step": 9464 }, { "epoch": 0.17301259436635166, "grad_norm": 7.137264906670612, "learning_rate": 9.473304264593674e-06, "loss": 17.9108, "step": 9465 }, { "epoch": 0.17303087356279817, "grad_norm": 6.39898306603595, "learning_rate": 9.473172014793953e-06, "loss": 17.4791, "step": 9466 }, { "epoch": 0.1730491527592447, "grad_norm": 6.967386160536522, "learning_rate": 9.473039749316152e-06, "loss": 17.5312, "step": 9467 }, { "epoch": 0.17306743195569121, "grad_norm": 6.762288020691088, "learning_rate": 9.472907468160735e-06, "loss": 17.6392, "step": 9468 }, { "epoch": 0.17308571115213775, "grad_norm": 8.540444264241714, "learning_rate": 9.472775171328165e-06, "loss": 18.0354, "step": 9469 }, { "epoch": 0.17310399034858429, "grad_norm": 6.765161338247924, "learning_rate": 9.472642858818906e-06, "loss": 17.7281, "step": 9470 }, { "epoch": 0.1731222695450308, "grad_norm": 6.962275288338839, "learning_rate": 9.472510530633422e-06, "loss": 17.6537, "step": 9471 }, { "epoch": 0.17314054874147733, "grad_norm": 7.049348562423112, "learning_rate": 9.472378186772177e-06, "loss": 17.7875, "step": 9472 }, { "epoch": 0.17315882793792384, "grad_norm": 6.193548539422179, "learning_rate": 9.472245827235636e-06, "loss": 17.3468, "step": 9473 }, { "epoch": 0.17317710713437037, "grad_norm": 7.164603951954371, "learning_rate": 9.472113452024258e-06, "loss": 17.9715, "step": 9474 }, { "epoch": 0.1731953863308169, "grad_norm": 6.968068689021553, "learning_rate": 9.471981061138513e-06, "loss": 17.7948, "step": 9475 }, { "epoch": 0.17321366552726342, "grad_norm": 7.232139235254524, "learning_rate": 9.471848654578862e-06, "loss": 17.7792, "step": 9476 }, { "epoch": 0.17323194472370995, "grad_norm": 5.9023763429473215, "learning_rate": 9.471716232345769e-06, "loss": 16.9879, "step": 9477 }, { "epoch": 0.17325022392015646, "grad_norm": 5.738475405414184, "learning_rate": 9.4715837944397e-06, "loss": 17.0774, "step": 9478 }, { "epoch": 0.173268503116603, "grad_norm": 6.112365319319647, "learning_rate": 9.471451340861117e-06, "loss": 17.4933, "step": 9479 }, { "epoch": 0.17328678231304953, "grad_norm": 6.886066210165729, "learning_rate": 9.471318871610484e-06, "loss": 17.8008, "step": 9480 }, { "epoch": 0.17330506150949604, "grad_norm": 7.979041327998414, "learning_rate": 9.471186386688267e-06, "loss": 17.9677, "step": 9481 }, { "epoch": 0.17332334070594257, "grad_norm": 6.447605600130551, "learning_rate": 9.47105388609493e-06, "loss": 17.4334, "step": 9482 }, { "epoch": 0.17334161990238908, "grad_norm": 6.919822594374885, "learning_rate": 9.470921369830938e-06, "loss": 17.4449, "step": 9483 }, { "epoch": 0.17335989909883562, "grad_norm": 6.224254864865902, "learning_rate": 9.470788837896753e-06, "loss": 17.3345, "step": 9484 }, { "epoch": 0.17337817829528213, "grad_norm": 7.83572911688027, "learning_rate": 9.470656290292842e-06, "loss": 17.9033, "step": 9485 }, { "epoch": 0.17339645749172866, "grad_norm": 6.013585901590195, "learning_rate": 9.47052372701967e-06, "loss": 17.1984, "step": 9486 }, { "epoch": 0.1734147366881752, "grad_norm": 6.787471336236899, "learning_rate": 9.470391148077697e-06, "loss": 17.2764, "step": 9487 }, { "epoch": 0.1734330158846217, "grad_norm": 8.49541940305247, "learning_rate": 9.470258553467392e-06, "loss": 18.336, "step": 9488 }, { "epoch": 0.17345129508106824, "grad_norm": 7.359129457025921, "learning_rate": 9.47012594318922e-06, "loss": 17.8962, "step": 9489 }, { "epoch": 0.17346957427751475, "grad_norm": 5.905825614079544, "learning_rate": 9.469993317243644e-06, "loss": 17.2531, "step": 9490 }, { "epoch": 0.17348785347396128, "grad_norm": 6.661236621727768, "learning_rate": 9.469860675631129e-06, "loss": 17.4225, "step": 9491 }, { "epoch": 0.17350613267040782, "grad_norm": 7.970349098170724, "learning_rate": 9.46972801835214e-06, "loss": 18.0449, "step": 9492 }, { "epoch": 0.17352441186685433, "grad_norm": 7.870137417874784, "learning_rate": 9.469595345407144e-06, "loss": 18.2065, "step": 9493 }, { "epoch": 0.17354269106330086, "grad_norm": 6.630693665009491, "learning_rate": 9.4694626567966e-06, "loss": 17.6834, "step": 9494 }, { "epoch": 0.17356097025974737, "grad_norm": 6.727202801770572, "learning_rate": 9.46932995252098e-06, "loss": 17.6419, "step": 9495 }, { "epoch": 0.1735792494561939, "grad_norm": 8.627751500853709, "learning_rate": 9.469197232580747e-06, "loss": 18.1795, "step": 9496 }, { "epoch": 0.17359752865264044, "grad_norm": 6.179486784072806, "learning_rate": 9.469064496976365e-06, "loss": 17.31, "step": 9497 }, { "epoch": 0.17361580784908695, "grad_norm": 6.334591869376551, "learning_rate": 9.468931745708298e-06, "loss": 17.6099, "step": 9498 }, { "epoch": 0.1736340870455335, "grad_norm": 6.316203075348294, "learning_rate": 9.468798978777016e-06, "loss": 17.4841, "step": 9499 }, { "epoch": 0.17365236624198, "grad_norm": 7.7296442146211515, "learning_rate": 9.46866619618298e-06, "loss": 17.8273, "step": 9500 }, { "epoch": 0.17367064543842653, "grad_norm": 6.517640989987836, "learning_rate": 9.468533397926656e-06, "loss": 17.6511, "step": 9501 }, { "epoch": 0.17368892463487304, "grad_norm": 6.486360811626599, "learning_rate": 9.46840058400851e-06, "loss": 17.7592, "step": 9502 }, { "epoch": 0.17370720383131957, "grad_norm": 6.973484127279262, "learning_rate": 9.468267754429009e-06, "loss": 17.5944, "step": 9503 }, { "epoch": 0.1737254830277661, "grad_norm": 6.580408787595043, "learning_rate": 9.468134909188616e-06, "loss": 17.7031, "step": 9504 }, { "epoch": 0.17374376222421262, "grad_norm": 6.121295434799951, "learning_rate": 9.468002048287799e-06, "loss": 17.4001, "step": 9505 }, { "epoch": 0.17376204142065915, "grad_norm": 6.64922992160366, "learning_rate": 9.467869171727022e-06, "loss": 17.7164, "step": 9506 }, { "epoch": 0.17378032061710566, "grad_norm": 6.549658495995932, "learning_rate": 9.467736279506752e-06, "loss": 17.4104, "step": 9507 }, { "epoch": 0.1737985998135522, "grad_norm": 6.197963987146198, "learning_rate": 9.467603371627454e-06, "loss": 17.3903, "step": 9508 }, { "epoch": 0.17381687900999873, "grad_norm": 6.181427883663454, "learning_rate": 9.467470448089592e-06, "loss": 17.346, "step": 9509 }, { "epoch": 0.17383515820644524, "grad_norm": 6.575355283380181, "learning_rate": 9.467337508893635e-06, "loss": 17.5846, "step": 9510 }, { "epoch": 0.17385343740289178, "grad_norm": 7.123198600785866, "learning_rate": 9.467204554040049e-06, "loss": 18.1258, "step": 9511 }, { "epoch": 0.17387171659933828, "grad_norm": 6.802118401190757, "learning_rate": 9.467071583529297e-06, "loss": 17.6492, "step": 9512 }, { "epoch": 0.17388999579578482, "grad_norm": 6.764335831864584, "learning_rate": 9.466938597361847e-06, "loss": 17.4252, "step": 9513 }, { "epoch": 0.17390827499223135, "grad_norm": 6.252125581799929, "learning_rate": 9.466805595538165e-06, "loss": 17.5834, "step": 9514 }, { "epoch": 0.17392655418867786, "grad_norm": 7.274545612028333, "learning_rate": 9.466672578058718e-06, "loss": 17.6257, "step": 9515 }, { "epoch": 0.1739448333851244, "grad_norm": 7.550294008739678, "learning_rate": 9.46653954492397e-06, "loss": 17.8097, "step": 9516 }, { "epoch": 0.1739631125815709, "grad_norm": 6.783902071701367, "learning_rate": 9.466406496134388e-06, "loss": 17.7032, "step": 9517 }, { "epoch": 0.17398139177801744, "grad_norm": 7.286554500889945, "learning_rate": 9.466273431690439e-06, "loss": 17.752, "step": 9518 }, { "epoch": 0.17399967097446395, "grad_norm": 6.522243344605513, "learning_rate": 9.46614035159259e-06, "loss": 17.6034, "step": 9519 }, { "epoch": 0.17401795017091048, "grad_norm": 6.418133639420691, "learning_rate": 9.466007255841307e-06, "loss": 17.4132, "step": 9520 }, { "epoch": 0.17403622936735702, "grad_norm": 6.754508075717153, "learning_rate": 9.465874144437055e-06, "loss": 17.8215, "step": 9521 }, { "epoch": 0.17405450856380353, "grad_norm": 6.2240970765795085, "learning_rate": 9.465741017380302e-06, "loss": 17.3865, "step": 9522 }, { "epoch": 0.17407278776025006, "grad_norm": 7.749530890247584, "learning_rate": 9.465607874671514e-06, "loss": 17.9878, "step": 9523 }, { "epoch": 0.17409106695669657, "grad_norm": 7.70501705038296, "learning_rate": 9.465474716311159e-06, "loss": 18.3077, "step": 9524 }, { "epoch": 0.1741093461531431, "grad_norm": 6.65303915166089, "learning_rate": 9.465341542299701e-06, "loss": 17.6386, "step": 9525 }, { "epoch": 0.17412762534958964, "grad_norm": 5.461389075851484, "learning_rate": 9.46520835263761e-06, "loss": 17.0258, "step": 9526 }, { "epoch": 0.17414590454603615, "grad_norm": 9.381998953111543, "learning_rate": 9.46507514732535e-06, "loss": 17.5598, "step": 9527 }, { "epoch": 0.1741641837424827, "grad_norm": 6.1220603773567, "learning_rate": 9.464941926363389e-06, "loss": 17.4067, "step": 9528 }, { "epoch": 0.1741824629389292, "grad_norm": 8.00793554449915, "learning_rate": 9.464808689752195e-06, "loss": 18.0463, "step": 9529 }, { "epoch": 0.17420074213537573, "grad_norm": 6.465493988527969, "learning_rate": 9.464675437492234e-06, "loss": 17.4025, "step": 9530 }, { "epoch": 0.17421902133182227, "grad_norm": 6.306562954623618, "learning_rate": 9.464542169583972e-06, "loss": 17.3987, "step": 9531 }, { "epoch": 0.17423730052826877, "grad_norm": 8.160215439993134, "learning_rate": 9.464408886027878e-06, "loss": 18.1336, "step": 9532 }, { "epoch": 0.1742555797247153, "grad_norm": 6.695224799762931, "learning_rate": 9.464275586824418e-06, "loss": 17.6448, "step": 9533 }, { "epoch": 0.17427385892116182, "grad_norm": 6.977009153648582, "learning_rate": 9.46414227197406e-06, "loss": 17.7975, "step": 9534 }, { "epoch": 0.17429213811760835, "grad_norm": 7.331561066680906, "learning_rate": 9.46400894147727e-06, "loss": 17.8779, "step": 9535 }, { "epoch": 0.17431041731405486, "grad_norm": 7.011303994873853, "learning_rate": 9.463875595334516e-06, "loss": 17.9372, "step": 9536 }, { "epoch": 0.1743286965105014, "grad_norm": 6.169869422341589, "learning_rate": 9.463742233546264e-06, "loss": 17.5423, "step": 9537 }, { "epoch": 0.17434697570694793, "grad_norm": 7.389420360269115, "learning_rate": 9.463608856112985e-06, "loss": 18.075, "step": 9538 }, { "epoch": 0.17436525490339444, "grad_norm": 8.262990116786577, "learning_rate": 9.463475463035143e-06, "loss": 18.1106, "step": 9539 }, { "epoch": 0.17438353409984098, "grad_norm": 7.2006562511719, "learning_rate": 9.463342054313207e-06, "loss": 17.6868, "step": 9540 }, { "epoch": 0.17440181329628748, "grad_norm": 7.840608118510198, "learning_rate": 9.463208629947645e-06, "loss": 18.3659, "step": 9541 }, { "epoch": 0.17442009249273402, "grad_norm": 7.0299308033934755, "learning_rate": 9.463075189938925e-06, "loss": 17.4907, "step": 9542 }, { "epoch": 0.17443837168918055, "grad_norm": 6.323297700497119, "learning_rate": 9.462941734287511e-06, "loss": 17.3497, "step": 9543 }, { "epoch": 0.17445665088562706, "grad_norm": 7.9047948424288315, "learning_rate": 9.462808262993876e-06, "loss": 17.917, "step": 9544 }, { "epoch": 0.1744749300820736, "grad_norm": 6.179598644734943, "learning_rate": 9.462674776058485e-06, "loss": 17.4775, "step": 9545 }, { "epoch": 0.1744932092785201, "grad_norm": 6.596936136388797, "learning_rate": 9.462541273481804e-06, "loss": 17.3298, "step": 9546 }, { "epoch": 0.17451148847496664, "grad_norm": 7.149311211802416, "learning_rate": 9.462407755264305e-06, "loss": 17.6487, "step": 9547 }, { "epoch": 0.17452976767141318, "grad_norm": 6.735124720416647, "learning_rate": 9.462274221406455e-06, "loss": 17.4208, "step": 9548 }, { "epoch": 0.17454804686785969, "grad_norm": 7.670894500045217, "learning_rate": 9.46214067190872e-06, "loss": 18.224, "step": 9549 }, { "epoch": 0.17456632606430622, "grad_norm": 6.435070996214837, "learning_rate": 9.462007106771569e-06, "loss": 17.6639, "step": 9550 }, { "epoch": 0.17458460526075273, "grad_norm": 7.6112651650811305, "learning_rate": 9.461873525995469e-06, "loss": 18.2247, "step": 9551 }, { "epoch": 0.17460288445719926, "grad_norm": 5.044347889863651, "learning_rate": 9.461739929580892e-06, "loss": 17.024, "step": 9552 }, { "epoch": 0.17462116365364577, "grad_norm": 6.5749082393182645, "learning_rate": 9.4616063175283e-06, "loss": 17.595, "step": 9553 }, { "epoch": 0.1746394428500923, "grad_norm": 6.841947019941837, "learning_rate": 9.46147268983817e-06, "loss": 17.6326, "step": 9554 }, { "epoch": 0.17465772204653884, "grad_norm": 7.120305404120127, "learning_rate": 9.461339046510962e-06, "loss": 17.5067, "step": 9555 }, { "epoch": 0.17467600124298535, "grad_norm": 9.627379721584031, "learning_rate": 9.461205387547147e-06, "loss": 18.5417, "step": 9556 }, { "epoch": 0.1746942804394319, "grad_norm": 6.590698216359157, "learning_rate": 9.461071712947197e-06, "loss": 17.5086, "step": 9557 }, { "epoch": 0.1747125596358784, "grad_norm": 6.823081760392698, "learning_rate": 9.460938022711576e-06, "loss": 17.5671, "step": 9558 }, { "epoch": 0.17473083883232493, "grad_norm": 7.28494667014169, "learning_rate": 9.460804316840755e-06, "loss": 18.4391, "step": 9559 }, { "epoch": 0.17474911802877147, "grad_norm": 7.1721623372429555, "learning_rate": 9.460670595335201e-06, "loss": 17.804, "step": 9560 }, { "epoch": 0.17476739722521797, "grad_norm": 7.984601933362222, "learning_rate": 9.460536858195387e-06, "loss": 18.2357, "step": 9561 }, { "epoch": 0.1747856764216645, "grad_norm": 6.3102168532318395, "learning_rate": 9.460403105421773e-06, "loss": 17.2541, "step": 9562 }, { "epoch": 0.17480395561811102, "grad_norm": 7.554736078456946, "learning_rate": 9.460269337014838e-06, "loss": 17.9618, "step": 9563 }, { "epoch": 0.17482223481455755, "grad_norm": 8.529445122710069, "learning_rate": 9.460135552975043e-06, "loss": 18.1476, "step": 9564 }, { "epoch": 0.1748405140110041, "grad_norm": 7.224102814581794, "learning_rate": 9.460001753302862e-06, "loss": 17.9554, "step": 9565 }, { "epoch": 0.1748587932074506, "grad_norm": 7.694910481548016, "learning_rate": 9.459867937998762e-06, "loss": 18.3189, "step": 9566 }, { "epoch": 0.17487707240389713, "grad_norm": 6.902715343961525, "learning_rate": 9.45973410706321e-06, "loss": 17.6001, "step": 9567 }, { "epoch": 0.17489535160034364, "grad_norm": 6.689873324000622, "learning_rate": 9.459600260496677e-06, "loss": 17.6243, "step": 9568 }, { "epoch": 0.17491363079679018, "grad_norm": 6.086483769488931, "learning_rate": 9.459466398299632e-06, "loss": 17.2564, "step": 9569 }, { "epoch": 0.17493190999323668, "grad_norm": 7.063496404856035, "learning_rate": 9.459332520472546e-06, "loss": 17.6633, "step": 9570 }, { "epoch": 0.17495018918968322, "grad_norm": 7.943562445239964, "learning_rate": 9.459198627015885e-06, "loss": 18.2078, "step": 9571 }, { "epoch": 0.17496846838612976, "grad_norm": 6.494236879534126, "learning_rate": 9.45906471793012e-06, "loss": 17.5818, "step": 9572 }, { "epoch": 0.17498674758257626, "grad_norm": 7.441500171931855, "learning_rate": 9.458930793215722e-06, "loss": 17.849, "step": 9573 }, { "epoch": 0.1750050267790228, "grad_norm": 8.228828669558796, "learning_rate": 9.458796852873155e-06, "loss": 18.0843, "step": 9574 }, { "epoch": 0.1750233059754693, "grad_norm": 5.825673737747694, "learning_rate": 9.458662896902895e-06, "loss": 17.0919, "step": 9575 }, { "epoch": 0.17504158517191584, "grad_norm": 5.872710126464134, "learning_rate": 9.458528925305406e-06, "loss": 17.2865, "step": 9576 }, { "epoch": 0.17505986436836238, "grad_norm": 7.611633683494937, "learning_rate": 9.458394938081162e-06, "loss": 17.8971, "step": 9577 }, { "epoch": 0.17507814356480889, "grad_norm": 7.063929080650453, "learning_rate": 9.45826093523063e-06, "loss": 17.736, "step": 9578 }, { "epoch": 0.17509642276125542, "grad_norm": 7.791055831721408, "learning_rate": 9.45812691675428e-06, "loss": 18.0464, "step": 9579 }, { "epoch": 0.17511470195770193, "grad_norm": 7.736108451433705, "learning_rate": 9.457992882652583e-06, "loss": 17.5614, "step": 9580 }, { "epoch": 0.17513298115414846, "grad_norm": 8.217265916851911, "learning_rate": 9.457858832926006e-06, "loss": 17.9376, "step": 9581 }, { "epoch": 0.175151260350595, "grad_norm": 6.331160305006264, "learning_rate": 9.457724767575023e-06, "loss": 17.4408, "step": 9582 }, { "epoch": 0.1751695395470415, "grad_norm": 6.254197417031652, "learning_rate": 9.4575906866001e-06, "loss": 17.1966, "step": 9583 }, { "epoch": 0.17518781874348804, "grad_norm": 7.363918482060166, "learning_rate": 9.457456590001708e-06, "loss": 17.5009, "step": 9584 }, { "epoch": 0.17520609793993455, "grad_norm": 6.023205168853669, "learning_rate": 9.457322477780317e-06, "loss": 17.2705, "step": 9585 }, { "epoch": 0.1752243771363811, "grad_norm": 6.668531385943409, "learning_rate": 9.457188349936399e-06, "loss": 17.4821, "step": 9586 }, { "epoch": 0.1752426563328276, "grad_norm": 6.677434109572615, "learning_rate": 9.457054206470422e-06, "loss": 17.4897, "step": 9587 }, { "epoch": 0.17526093552927413, "grad_norm": 7.421678870093581, "learning_rate": 9.456920047382858e-06, "loss": 17.7544, "step": 9588 }, { "epoch": 0.17527921472572067, "grad_norm": 5.749753796678464, "learning_rate": 9.456785872674174e-06, "loss": 17.1499, "step": 9589 }, { "epoch": 0.17529749392216717, "grad_norm": 7.657753373935576, "learning_rate": 9.456651682344844e-06, "loss": 17.8947, "step": 9590 }, { "epoch": 0.1753157731186137, "grad_norm": 5.800114173360619, "learning_rate": 9.456517476395334e-06, "loss": 17.5072, "step": 9591 }, { "epoch": 0.17533405231506022, "grad_norm": 6.370407507874773, "learning_rate": 9.45638325482612e-06, "loss": 17.6061, "step": 9592 }, { "epoch": 0.17535233151150675, "grad_norm": 7.54027055496554, "learning_rate": 9.456249017637669e-06, "loss": 18.0063, "step": 9593 }, { "epoch": 0.1753706107079533, "grad_norm": 7.614861521245855, "learning_rate": 9.45611476483045e-06, "loss": 17.9974, "step": 9594 }, { "epoch": 0.1753888899043998, "grad_norm": 7.161606441548655, "learning_rate": 9.455980496404937e-06, "loss": 18.0735, "step": 9595 }, { "epoch": 0.17540716910084633, "grad_norm": 8.015904801234518, "learning_rate": 9.4558462123616e-06, "loss": 18.0219, "step": 9596 }, { "epoch": 0.17542544829729284, "grad_norm": 6.314264808075519, "learning_rate": 9.455711912700906e-06, "loss": 17.5404, "step": 9597 }, { "epoch": 0.17544372749373938, "grad_norm": 6.389007170985312, "learning_rate": 9.455577597423329e-06, "loss": 17.4354, "step": 9598 }, { "epoch": 0.1754620066901859, "grad_norm": 6.450687917607619, "learning_rate": 9.45544326652934e-06, "loss": 17.6394, "step": 9599 }, { "epoch": 0.17548028588663242, "grad_norm": 8.174249896841113, "learning_rate": 9.455308920019407e-06, "loss": 17.6692, "step": 9600 }, { "epoch": 0.17549856508307896, "grad_norm": 6.553016425433632, "learning_rate": 9.455174557894006e-06, "loss": 17.7826, "step": 9601 }, { "epoch": 0.17551684427952546, "grad_norm": 6.90399626900844, "learning_rate": 9.455040180153602e-06, "loss": 17.644, "step": 9602 }, { "epoch": 0.175535123475972, "grad_norm": 7.13345054726534, "learning_rate": 9.454905786798671e-06, "loss": 17.8096, "step": 9603 }, { "epoch": 0.1755534026724185, "grad_norm": 6.901085836989342, "learning_rate": 9.454771377829682e-06, "loss": 17.8616, "step": 9604 }, { "epoch": 0.17557168186886504, "grad_norm": 6.407392162682347, "learning_rate": 9.454636953247106e-06, "loss": 17.5621, "step": 9605 }, { "epoch": 0.17558996106531158, "grad_norm": 8.909637325449719, "learning_rate": 9.454502513051411e-06, "loss": 18.2019, "step": 9606 }, { "epoch": 0.1756082402617581, "grad_norm": 7.75470214126141, "learning_rate": 9.454368057243072e-06, "loss": 17.8469, "step": 9607 }, { "epoch": 0.17562651945820462, "grad_norm": 6.488640748696882, "learning_rate": 9.454233585822562e-06, "loss": 17.6242, "step": 9608 }, { "epoch": 0.17564479865465113, "grad_norm": 7.347397929283839, "learning_rate": 9.454099098790348e-06, "loss": 17.8359, "step": 9609 }, { "epoch": 0.17566307785109767, "grad_norm": 7.888743786396532, "learning_rate": 9.453964596146902e-06, "loss": 18.2884, "step": 9610 }, { "epoch": 0.1756813570475442, "grad_norm": 8.862293313268275, "learning_rate": 9.453830077892699e-06, "loss": 18.3987, "step": 9611 }, { "epoch": 0.1756996362439907, "grad_norm": 7.377641513426715, "learning_rate": 9.453695544028207e-06, "loss": 17.9834, "step": 9612 }, { "epoch": 0.17571791544043724, "grad_norm": 9.07862999594866, "learning_rate": 9.453560994553899e-06, "loss": 18.696, "step": 9613 }, { "epoch": 0.17573619463688375, "grad_norm": 6.545419816988638, "learning_rate": 9.453426429470247e-06, "loss": 17.328, "step": 9614 }, { "epoch": 0.1757544738333303, "grad_norm": 6.713530668856103, "learning_rate": 9.45329184877772e-06, "loss": 17.2036, "step": 9615 }, { "epoch": 0.17577275302977682, "grad_norm": 5.675338688006315, "learning_rate": 9.453157252476793e-06, "loss": 17.1771, "step": 9616 }, { "epoch": 0.17579103222622333, "grad_norm": 7.9498118466028185, "learning_rate": 9.453022640567936e-06, "loss": 17.8481, "step": 9617 }, { "epoch": 0.17580931142266987, "grad_norm": 7.229448778468172, "learning_rate": 9.45288801305162e-06, "loss": 17.6368, "step": 9618 }, { "epoch": 0.17582759061911638, "grad_norm": 6.703388893560162, "learning_rate": 9.452753369928318e-06, "loss": 17.4984, "step": 9619 }, { "epoch": 0.1758458698155629, "grad_norm": 6.976151557876985, "learning_rate": 9.452618711198503e-06, "loss": 17.7493, "step": 9620 }, { "epoch": 0.17586414901200942, "grad_norm": 6.700961464767145, "learning_rate": 9.452484036862644e-06, "loss": 17.6641, "step": 9621 }, { "epoch": 0.17588242820845595, "grad_norm": 7.089096875826108, "learning_rate": 9.452349346921217e-06, "loss": 17.7229, "step": 9622 }, { "epoch": 0.1759007074049025, "grad_norm": 6.745625988494399, "learning_rate": 9.45221464137469e-06, "loss": 17.4635, "step": 9623 }, { "epoch": 0.175918986601349, "grad_norm": 5.519624368429729, "learning_rate": 9.452079920223538e-06, "loss": 16.9726, "step": 9624 }, { "epoch": 0.17593726579779553, "grad_norm": 7.556663734793128, "learning_rate": 9.451945183468232e-06, "loss": 17.9782, "step": 9625 }, { "epoch": 0.17595554499424204, "grad_norm": 7.05912937539355, "learning_rate": 9.451810431109245e-06, "loss": 17.5649, "step": 9626 }, { "epoch": 0.17597382419068858, "grad_norm": 7.26119786780201, "learning_rate": 9.451675663147049e-06, "loss": 17.8877, "step": 9627 }, { "epoch": 0.1759921033871351, "grad_norm": 6.14689326735572, "learning_rate": 9.451540879582115e-06, "loss": 17.3651, "step": 9628 }, { "epoch": 0.17601038258358162, "grad_norm": 7.051044815148934, "learning_rate": 9.451406080414915e-06, "loss": 17.7112, "step": 9629 }, { "epoch": 0.17602866178002816, "grad_norm": 8.696622023622663, "learning_rate": 9.451271265645925e-06, "loss": 17.8157, "step": 9630 }, { "epoch": 0.17604694097647466, "grad_norm": 7.096216250790805, "learning_rate": 9.451136435275617e-06, "loss": 17.6581, "step": 9631 }, { "epoch": 0.1760652201729212, "grad_norm": 6.879831978724544, "learning_rate": 9.45100158930446e-06, "loss": 17.526, "step": 9632 }, { "epoch": 0.17608349936936774, "grad_norm": 6.42182044924456, "learning_rate": 9.450866727732929e-06, "loss": 17.5388, "step": 9633 }, { "epoch": 0.17610177856581424, "grad_norm": 6.712998064872013, "learning_rate": 9.450731850561496e-06, "loss": 17.6062, "step": 9634 }, { "epoch": 0.17612005776226078, "grad_norm": 7.162659350706177, "learning_rate": 9.450596957790635e-06, "loss": 17.6215, "step": 9635 }, { "epoch": 0.1761383369587073, "grad_norm": 8.155756487703615, "learning_rate": 9.450462049420816e-06, "loss": 18.093, "step": 9636 }, { "epoch": 0.17615661615515382, "grad_norm": 7.107062681470219, "learning_rate": 9.450327125452517e-06, "loss": 17.442, "step": 9637 }, { "epoch": 0.17617489535160033, "grad_norm": 6.436645919508923, "learning_rate": 9.450192185886205e-06, "loss": 17.4097, "step": 9638 }, { "epoch": 0.17619317454804687, "grad_norm": 7.073010915110433, "learning_rate": 9.450057230722356e-06, "loss": 17.6839, "step": 9639 }, { "epoch": 0.1762114537444934, "grad_norm": 6.3032993215199635, "learning_rate": 9.449922259961443e-06, "loss": 17.4623, "step": 9640 }, { "epoch": 0.1762297329409399, "grad_norm": 6.099549268477369, "learning_rate": 9.44978727360394e-06, "loss": 17.335, "step": 9641 }, { "epoch": 0.17624801213738645, "grad_norm": 7.786219841755767, "learning_rate": 9.449652271650314e-06, "loss": 18.2596, "step": 9642 }, { "epoch": 0.17626629133383295, "grad_norm": 5.731132304582805, "learning_rate": 9.449517254101048e-06, "loss": 17.0984, "step": 9643 }, { "epoch": 0.1762845705302795, "grad_norm": 9.083475202687568, "learning_rate": 9.449382220956607e-06, "loss": 18.6494, "step": 9644 }, { "epoch": 0.17630284972672602, "grad_norm": 6.359743207189475, "learning_rate": 9.449247172217468e-06, "loss": 17.3785, "step": 9645 }, { "epoch": 0.17632112892317253, "grad_norm": 7.051513680981001, "learning_rate": 9.449112107884105e-06, "loss": 17.505, "step": 9646 }, { "epoch": 0.17633940811961907, "grad_norm": 6.2166941084379985, "learning_rate": 9.448977027956989e-06, "loss": 17.4986, "step": 9647 }, { "epoch": 0.17635768731606558, "grad_norm": 7.336685680841691, "learning_rate": 9.448841932436596e-06, "loss": 17.8284, "step": 9648 }, { "epoch": 0.1763759665125121, "grad_norm": 7.702246485336523, "learning_rate": 9.448706821323396e-06, "loss": 17.7406, "step": 9649 }, { "epoch": 0.17639424570895865, "grad_norm": 6.4258054404145355, "learning_rate": 9.448571694617868e-06, "loss": 17.4923, "step": 9650 }, { "epoch": 0.17641252490540515, "grad_norm": 9.007529205106438, "learning_rate": 9.448436552320479e-06, "loss": 18.8918, "step": 9651 }, { "epoch": 0.1764308041018517, "grad_norm": 8.425281287035055, "learning_rate": 9.448301394431707e-06, "loss": 18.4124, "step": 9652 }, { "epoch": 0.1764490832982982, "grad_norm": 8.206058412032595, "learning_rate": 9.448166220952025e-06, "loss": 18.5093, "step": 9653 }, { "epoch": 0.17646736249474473, "grad_norm": 9.808163777383681, "learning_rate": 9.448031031881905e-06, "loss": 18.3022, "step": 9654 }, { "epoch": 0.17648564169119124, "grad_norm": 6.733960562872665, "learning_rate": 9.447895827221822e-06, "loss": 17.687, "step": 9655 }, { "epoch": 0.17650392088763778, "grad_norm": 8.096405222635829, "learning_rate": 9.447760606972252e-06, "loss": 18.2036, "step": 9656 }, { "epoch": 0.1765222000840843, "grad_norm": 7.329142019655432, "learning_rate": 9.447625371133667e-06, "loss": 17.9571, "step": 9657 }, { "epoch": 0.17654047928053082, "grad_norm": 6.581840834842928, "learning_rate": 9.44749011970654e-06, "loss": 17.5703, "step": 9658 }, { "epoch": 0.17655875847697736, "grad_norm": 6.129898632277393, "learning_rate": 9.447354852691345e-06, "loss": 17.1393, "step": 9659 }, { "epoch": 0.17657703767342386, "grad_norm": 6.762291526337835, "learning_rate": 9.44721957008856e-06, "loss": 17.4261, "step": 9660 }, { "epoch": 0.1765953168698704, "grad_norm": 6.813084022090381, "learning_rate": 9.447084271898654e-06, "loss": 17.7524, "step": 9661 }, { "epoch": 0.17661359606631694, "grad_norm": 7.259022069175019, "learning_rate": 9.446948958122105e-06, "loss": 17.8218, "step": 9662 }, { "epoch": 0.17663187526276344, "grad_norm": 6.345286917442418, "learning_rate": 9.446813628759385e-06, "loss": 17.5285, "step": 9663 }, { "epoch": 0.17665015445920998, "grad_norm": 7.507549321072424, "learning_rate": 9.44667828381097e-06, "loss": 17.8682, "step": 9664 }, { "epoch": 0.1766684336556565, "grad_norm": 8.854101984446455, "learning_rate": 9.446542923277334e-06, "loss": 18.0953, "step": 9665 }, { "epoch": 0.17668671285210302, "grad_norm": 7.0324185514608075, "learning_rate": 9.446407547158948e-06, "loss": 17.8778, "step": 9666 }, { "epoch": 0.17670499204854956, "grad_norm": 6.7879172787870825, "learning_rate": 9.44627215545629e-06, "loss": 17.4351, "step": 9667 }, { "epoch": 0.17672327124499607, "grad_norm": 8.513656505846203, "learning_rate": 9.446136748169836e-06, "loss": 18.1137, "step": 9668 }, { "epoch": 0.1767415504414426, "grad_norm": 7.215523316095467, "learning_rate": 9.446001325300058e-06, "loss": 17.8333, "step": 9669 }, { "epoch": 0.1767598296378891, "grad_norm": 8.158464109641324, "learning_rate": 9.445865886847429e-06, "loss": 18.5054, "step": 9670 }, { "epoch": 0.17677810883433565, "grad_norm": 6.687987689542999, "learning_rate": 9.445730432812429e-06, "loss": 17.5526, "step": 9671 }, { "epoch": 0.17679638803078215, "grad_norm": 6.478660305319019, "learning_rate": 9.445594963195529e-06, "loss": 17.5868, "step": 9672 }, { "epoch": 0.1768146672272287, "grad_norm": 5.151104796398375, "learning_rate": 9.445459477997203e-06, "loss": 16.9636, "step": 9673 }, { "epoch": 0.17683294642367522, "grad_norm": 7.433461809978908, "learning_rate": 9.445323977217927e-06, "loss": 18.2122, "step": 9674 }, { "epoch": 0.17685122562012173, "grad_norm": 7.8410617284588255, "learning_rate": 9.445188460858176e-06, "loss": 17.9256, "step": 9675 }, { "epoch": 0.17686950481656827, "grad_norm": 7.321948538376839, "learning_rate": 9.445052928918428e-06, "loss": 17.8209, "step": 9676 }, { "epoch": 0.17688778401301478, "grad_norm": 6.376730731198681, "learning_rate": 9.444917381399153e-06, "loss": 17.3759, "step": 9677 }, { "epoch": 0.1769060632094613, "grad_norm": 7.316377252451896, "learning_rate": 9.444781818300828e-06, "loss": 18.0705, "step": 9678 }, { "epoch": 0.17692434240590785, "grad_norm": 7.027882068028113, "learning_rate": 9.444646239623929e-06, "loss": 17.7918, "step": 9679 }, { "epoch": 0.17694262160235436, "grad_norm": 7.850501897469477, "learning_rate": 9.444510645368932e-06, "loss": 17.9935, "step": 9680 }, { "epoch": 0.1769609007988009, "grad_norm": 7.419180565871145, "learning_rate": 9.444375035536309e-06, "loss": 17.9222, "step": 9681 }, { "epoch": 0.1769791799952474, "grad_norm": 6.110589969967948, "learning_rate": 9.444239410126538e-06, "loss": 17.4264, "step": 9682 }, { "epoch": 0.17699745919169393, "grad_norm": 6.622212880277883, "learning_rate": 9.444103769140094e-06, "loss": 17.6152, "step": 9683 }, { "epoch": 0.17701573838814047, "grad_norm": 7.350055275318429, "learning_rate": 9.44396811257745e-06, "loss": 18.2224, "step": 9684 }, { "epoch": 0.17703401758458698, "grad_norm": 5.899957783709529, "learning_rate": 9.443832440439084e-06, "loss": 17.0868, "step": 9685 }, { "epoch": 0.1770522967810335, "grad_norm": 9.15888569103452, "learning_rate": 9.443696752725473e-06, "loss": 18.4873, "step": 9686 }, { "epoch": 0.17707057597748002, "grad_norm": 7.1445693175578775, "learning_rate": 9.443561049437089e-06, "loss": 17.8865, "step": 9687 }, { "epoch": 0.17708885517392656, "grad_norm": 8.27896390971788, "learning_rate": 9.44342533057441e-06, "loss": 17.904, "step": 9688 }, { "epoch": 0.17710713437037306, "grad_norm": 7.739589589633317, "learning_rate": 9.443289596137909e-06, "loss": 17.9802, "step": 9689 }, { "epoch": 0.1771254135668196, "grad_norm": 6.871999460302412, "learning_rate": 9.443153846128063e-06, "loss": 17.5567, "step": 9690 }, { "epoch": 0.17714369276326614, "grad_norm": 6.700836374363081, "learning_rate": 9.443018080545352e-06, "loss": 17.5755, "step": 9691 }, { "epoch": 0.17716197195971264, "grad_norm": 6.385486423826585, "learning_rate": 9.442882299390246e-06, "loss": 17.4786, "step": 9692 }, { "epoch": 0.17718025115615918, "grad_norm": 7.213933951038367, "learning_rate": 9.442746502663223e-06, "loss": 17.8697, "step": 9693 }, { "epoch": 0.1771985303526057, "grad_norm": 7.214876349743183, "learning_rate": 9.442610690364758e-06, "loss": 17.9732, "step": 9694 }, { "epoch": 0.17721680954905222, "grad_norm": 6.9425163586911225, "learning_rate": 9.44247486249533e-06, "loss": 17.7903, "step": 9695 }, { "epoch": 0.17723508874549876, "grad_norm": 6.178478150841416, "learning_rate": 9.442339019055412e-06, "loss": 17.3213, "step": 9696 }, { "epoch": 0.17725336794194527, "grad_norm": 6.278235189723746, "learning_rate": 9.442203160045482e-06, "loss": 17.541, "step": 9697 }, { "epoch": 0.1772716471383918, "grad_norm": 6.115087524574453, "learning_rate": 9.442067285466014e-06, "loss": 17.7573, "step": 9698 }, { "epoch": 0.1772899263348383, "grad_norm": 7.899599701776045, "learning_rate": 9.441931395317488e-06, "loss": 18.0926, "step": 9699 }, { "epoch": 0.17730820553128485, "grad_norm": 6.094800422486613, "learning_rate": 9.441795489600374e-06, "loss": 17.2838, "step": 9700 }, { "epoch": 0.17732648472773138, "grad_norm": 8.220077362304531, "learning_rate": 9.441659568315156e-06, "loss": 18.1649, "step": 9701 }, { "epoch": 0.1773447639241779, "grad_norm": 8.180979102384269, "learning_rate": 9.441523631462306e-06, "loss": 18.0576, "step": 9702 }, { "epoch": 0.17736304312062443, "grad_norm": 6.821931812911939, "learning_rate": 9.4413876790423e-06, "loss": 17.7972, "step": 9703 }, { "epoch": 0.17738132231707093, "grad_norm": 9.030951775341116, "learning_rate": 9.441251711055616e-06, "loss": 18.5217, "step": 9704 }, { "epoch": 0.17739960151351747, "grad_norm": 6.0484269852928225, "learning_rate": 9.44111572750273e-06, "loss": 17.4727, "step": 9705 }, { "epoch": 0.17741788070996398, "grad_norm": 7.721039215305776, "learning_rate": 9.440979728384118e-06, "loss": 18.0058, "step": 9706 }, { "epoch": 0.1774361599064105, "grad_norm": 6.295985288778256, "learning_rate": 9.440843713700258e-06, "loss": 17.2709, "step": 9707 }, { "epoch": 0.17745443910285705, "grad_norm": 7.319272852063073, "learning_rate": 9.440707683451627e-06, "loss": 17.9808, "step": 9708 }, { "epoch": 0.17747271829930356, "grad_norm": 9.102364113732065, "learning_rate": 9.4405716376387e-06, "loss": 17.7096, "step": 9709 }, { "epoch": 0.1774909974957501, "grad_norm": 6.809521540271753, "learning_rate": 9.440435576261957e-06, "loss": 17.5893, "step": 9710 }, { "epoch": 0.1775092766921966, "grad_norm": 6.852650002439833, "learning_rate": 9.44029949932187e-06, "loss": 17.7249, "step": 9711 }, { "epoch": 0.17752755588864313, "grad_norm": 6.526419833756175, "learning_rate": 9.440163406818919e-06, "loss": 17.6222, "step": 9712 }, { "epoch": 0.17754583508508967, "grad_norm": 6.898101318285359, "learning_rate": 9.44002729875358e-06, "loss": 17.765, "step": 9713 }, { "epoch": 0.17756411428153618, "grad_norm": 6.788442053561319, "learning_rate": 9.439891175126331e-06, "loss": 17.6916, "step": 9714 }, { "epoch": 0.17758239347798271, "grad_norm": 6.906235154036778, "learning_rate": 9.43975503593765e-06, "loss": 17.5038, "step": 9715 }, { "epoch": 0.17760067267442922, "grad_norm": 7.042972173507783, "learning_rate": 9.439618881188014e-06, "loss": 17.6661, "step": 9716 }, { "epoch": 0.17761895187087576, "grad_norm": 7.19275145236644, "learning_rate": 9.439482710877896e-06, "loss": 17.5986, "step": 9717 }, { "epoch": 0.1776372310673223, "grad_norm": 7.130931651391098, "learning_rate": 9.439346525007777e-06, "loss": 17.9464, "step": 9718 }, { "epoch": 0.1776555102637688, "grad_norm": 6.67146969845684, "learning_rate": 9.439210323578134e-06, "loss": 17.3904, "step": 9719 }, { "epoch": 0.17767378946021534, "grad_norm": 7.465071058551338, "learning_rate": 9.439074106589445e-06, "loss": 17.5445, "step": 9720 }, { "epoch": 0.17769206865666184, "grad_norm": 7.690613516101903, "learning_rate": 9.438937874042185e-06, "loss": 17.9147, "step": 9721 }, { "epoch": 0.17771034785310838, "grad_norm": 6.656617422977974, "learning_rate": 9.438801625936832e-06, "loss": 17.6754, "step": 9722 }, { "epoch": 0.1777286270495549, "grad_norm": 7.680997168893982, "learning_rate": 9.438665362273868e-06, "loss": 17.6818, "step": 9723 }, { "epoch": 0.17774690624600142, "grad_norm": 6.51239015634626, "learning_rate": 9.438529083053765e-06, "loss": 17.5307, "step": 9724 }, { "epoch": 0.17776518544244796, "grad_norm": 6.603219062765521, "learning_rate": 9.438392788277002e-06, "loss": 17.8635, "step": 9725 }, { "epoch": 0.17778346463889447, "grad_norm": 7.559472034188811, "learning_rate": 9.438256477944058e-06, "loss": 17.8341, "step": 9726 }, { "epoch": 0.177801743835341, "grad_norm": 7.86013382771788, "learning_rate": 9.438120152055413e-06, "loss": 18.0118, "step": 9727 }, { "epoch": 0.1778200230317875, "grad_norm": 6.620496532263314, "learning_rate": 9.437983810611537e-06, "loss": 17.5389, "step": 9728 }, { "epoch": 0.17783830222823405, "grad_norm": 7.8524726290862175, "learning_rate": 9.437847453612916e-06, "loss": 18.0236, "step": 9729 }, { "epoch": 0.17785658142468058, "grad_norm": 6.803781577666788, "learning_rate": 9.437711081060024e-06, "loss": 17.722, "step": 9730 }, { "epoch": 0.1778748606211271, "grad_norm": 6.804629365353588, "learning_rate": 9.437574692953339e-06, "loss": 17.4993, "step": 9731 }, { "epoch": 0.17789313981757363, "grad_norm": 6.2524073121621715, "learning_rate": 9.437438289293342e-06, "loss": 17.3258, "step": 9732 }, { "epoch": 0.17791141901402013, "grad_norm": 6.462773309750215, "learning_rate": 9.437301870080507e-06, "loss": 17.9451, "step": 9733 }, { "epoch": 0.17792969821046667, "grad_norm": 6.460433370820935, "learning_rate": 9.437165435315315e-06, "loss": 17.2655, "step": 9734 }, { "epoch": 0.1779479774069132, "grad_norm": 7.436405726537002, "learning_rate": 9.437028984998242e-06, "loss": 17.7149, "step": 9735 }, { "epoch": 0.1779662566033597, "grad_norm": 6.176887549546213, "learning_rate": 9.436892519129767e-06, "loss": 17.4163, "step": 9736 }, { "epoch": 0.17798453579980625, "grad_norm": 7.730634578467468, "learning_rate": 9.436756037710371e-06, "loss": 17.4026, "step": 9737 }, { "epoch": 0.17800281499625276, "grad_norm": 5.161988822903628, "learning_rate": 9.436619540740528e-06, "loss": 16.8931, "step": 9738 }, { "epoch": 0.1780210941926993, "grad_norm": 7.344605573317709, "learning_rate": 9.436483028220719e-06, "loss": 17.7463, "step": 9739 }, { "epoch": 0.1780393733891458, "grad_norm": 8.046813266600296, "learning_rate": 9.436346500151423e-06, "loss": 17.9651, "step": 9740 }, { "epoch": 0.17805765258559234, "grad_norm": 6.393431035630223, "learning_rate": 9.436209956533117e-06, "loss": 17.5466, "step": 9741 }, { "epoch": 0.17807593178203887, "grad_norm": 6.994112434580161, "learning_rate": 9.436073397366282e-06, "loss": 17.9602, "step": 9742 }, { "epoch": 0.17809421097848538, "grad_norm": 7.571745779934617, "learning_rate": 9.435936822651391e-06, "loss": 17.9859, "step": 9743 }, { "epoch": 0.17811249017493191, "grad_norm": 7.051017052383482, "learning_rate": 9.435800232388927e-06, "loss": 17.8538, "step": 9744 }, { "epoch": 0.17813076937137842, "grad_norm": 7.086992505485541, "learning_rate": 9.43566362657937e-06, "loss": 17.9082, "step": 9745 }, { "epoch": 0.17814904856782496, "grad_norm": 7.575001476737331, "learning_rate": 9.435527005223197e-06, "loss": 18.3628, "step": 9746 }, { "epoch": 0.1781673277642715, "grad_norm": 6.620470467188751, "learning_rate": 9.435390368320885e-06, "loss": 17.7196, "step": 9747 }, { "epoch": 0.178185606960718, "grad_norm": 6.767458937127244, "learning_rate": 9.435253715872917e-06, "loss": 17.5436, "step": 9748 }, { "epoch": 0.17820388615716454, "grad_norm": 7.768517323398791, "learning_rate": 9.435117047879768e-06, "loss": 17.5803, "step": 9749 }, { "epoch": 0.17822216535361105, "grad_norm": 8.52002998348253, "learning_rate": 9.434980364341917e-06, "loss": 18.5091, "step": 9750 }, { "epoch": 0.17824044455005758, "grad_norm": 8.60681587905685, "learning_rate": 9.434843665259847e-06, "loss": 18.4451, "step": 9751 }, { "epoch": 0.17825872374650412, "grad_norm": 6.936536731140007, "learning_rate": 9.434706950634034e-06, "loss": 17.6692, "step": 9752 }, { "epoch": 0.17827700294295062, "grad_norm": 7.080582829257614, "learning_rate": 9.434570220464959e-06, "loss": 17.5401, "step": 9753 }, { "epoch": 0.17829528213939716, "grad_norm": 6.9923608229850815, "learning_rate": 9.434433474753098e-06, "loss": 17.8878, "step": 9754 }, { "epoch": 0.17831356133584367, "grad_norm": 8.90585203556734, "learning_rate": 9.434296713498934e-06, "loss": 18.6246, "step": 9755 }, { "epoch": 0.1783318405322902, "grad_norm": 7.578519220791576, "learning_rate": 9.434159936702943e-06, "loss": 17.9341, "step": 9756 }, { "epoch": 0.1783501197287367, "grad_norm": 6.728053978855922, "learning_rate": 9.434023144365608e-06, "loss": 17.7731, "step": 9757 }, { "epoch": 0.17836839892518325, "grad_norm": 7.003711526559738, "learning_rate": 9.433886336487407e-06, "loss": 17.5275, "step": 9758 }, { "epoch": 0.17838667812162978, "grad_norm": 8.455193694248191, "learning_rate": 9.433749513068818e-06, "loss": 18.0235, "step": 9759 }, { "epoch": 0.1784049573180763, "grad_norm": 5.342623445358985, "learning_rate": 9.433612674110322e-06, "loss": 16.9604, "step": 9760 }, { "epoch": 0.17842323651452283, "grad_norm": 7.356764788393615, "learning_rate": 9.433475819612399e-06, "loss": 17.9372, "step": 9761 }, { "epoch": 0.17844151571096933, "grad_norm": 7.828348541507714, "learning_rate": 9.433338949575527e-06, "loss": 17.8288, "step": 9762 }, { "epoch": 0.17845979490741587, "grad_norm": 6.98720409664005, "learning_rate": 9.433202064000187e-06, "loss": 17.7981, "step": 9763 }, { "epoch": 0.1784780741038624, "grad_norm": 6.788120270191315, "learning_rate": 9.433065162886859e-06, "loss": 17.7682, "step": 9764 }, { "epoch": 0.1784963533003089, "grad_norm": 5.223666450826554, "learning_rate": 9.432928246236022e-06, "loss": 16.8469, "step": 9765 }, { "epoch": 0.17851463249675545, "grad_norm": 7.726231461511586, "learning_rate": 9.432791314048156e-06, "loss": 18.1611, "step": 9766 }, { "epoch": 0.17853291169320196, "grad_norm": 6.963165020375716, "learning_rate": 9.432654366323741e-06, "loss": 17.8006, "step": 9767 }, { "epoch": 0.1785511908896485, "grad_norm": 5.360391003965571, "learning_rate": 9.432517403063257e-06, "loss": 17.1445, "step": 9768 }, { "epoch": 0.17856947008609503, "grad_norm": 6.51934534987101, "learning_rate": 9.432380424267185e-06, "loss": 17.8409, "step": 9769 }, { "epoch": 0.17858774928254154, "grad_norm": 6.2131845811506095, "learning_rate": 9.432243429936003e-06, "loss": 17.5967, "step": 9770 }, { "epoch": 0.17860602847898807, "grad_norm": 6.490373216880616, "learning_rate": 9.432106420070193e-06, "loss": 17.188, "step": 9771 }, { "epoch": 0.17862430767543458, "grad_norm": 7.772721584194353, "learning_rate": 9.431969394670235e-06, "loss": 18.0158, "step": 9772 }, { "epoch": 0.17864258687188111, "grad_norm": 6.6291600442599625, "learning_rate": 9.431832353736608e-06, "loss": 17.5063, "step": 9773 }, { "epoch": 0.17866086606832762, "grad_norm": 6.337764937297904, "learning_rate": 9.431695297269794e-06, "loss": 17.4012, "step": 9774 }, { "epoch": 0.17867914526477416, "grad_norm": 5.528648977290598, "learning_rate": 9.431558225270272e-06, "loss": 17.1467, "step": 9775 }, { "epoch": 0.1786974244612207, "grad_norm": 8.17410182241707, "learning_rate": 9.431421137738523e-06, "loss": 18.0794, "step": 9776 }, { "epoch": 0.1787157036576672, "grad_norm": 7.257810407204982, "learning_rate": 9.431284034675029e-06, "loss": 17.6139, "step": 9777 }, { "epoch": 0.17873398285411374, "grad_norm": 7.471120028201146, "learning_rate": 9.431146916080267e-06, "loss": 17.7815, "step": 9778 }, { "epoch": 0.17875226205056025, "grad_norm": 6.23432741622461, "learning_rate": 9.431009781954721e-06, "loss": 17.3911, "step": 9779 }, { "epoch": 0.17877054124700678, "grad_norm": 8.843764130326498, "learning_rate": 9.430872632298868e-06, "loss": 18.4642, "step": 9780 }, { "epoch": 0.17878882044345332, "grad_norm": 6.205504168993172, "learning_rate": 9.430735467113192e-06, "loss": 17.344, "step": 9781 }, { "epoch": 0.17880709963989982, "grad_norm": 8.572140095617657, "learning_rate": 9.430598286398174e-06, "loss": 18.2109, "step": 9782 }, { "epoch": 0.17882537883634636, "grad_norm": 9.004655976115545, "learning_rate": 9.430461090154293e-06, "loss": 18.4639, "step": 9783 }, { "epoch": 0.17884365803279287, "grad_norm": 8.06569636279287, "learning_rate": 9.43032387838203e-06, "loss": 18.4043, "step": 9784 }, { "epoch": 0.1788619372292394, "grad_norm": 6.832525538322095, "learning_rate": 9.430186651081865e-06, "loss": 17.5653, "step": 9785 }, { "epoch": 0.17888021642568594, "grad_norm": 7.490859644683718, "learning_rate": 9.430049408254282e-06, "loss": 18.0886, "step": 9786 }, { "epoch": 0.17889849562213245, "grad_norm": 7.045513759459739, "learning_rate": 9.429912149899758e-06, "loss": 17.5878, "step": 9787 }, { "epoch": 0.17891677481857898, "grad_norm": 8.059792695285463, "learning_rate": 9.429774876018779e-06, "loss": 18.1043, "step": 9788 }, { "epoch": 0.1789350540150255, "grad_norm": 6.0787364962803645, "learning_rate": 9.429637586611822e-06, "loss": 17.3551, "step": 9789 }, { "epoch": 0.17895333321147203, "grad_norm": 4.943654424937247, "learning_rate": 9.42950028167937e-06, "loss": 16.8844, "step": 9790 }, { "epoch": 0.17897161240791853, "grad_norm": 6.14956192712607, "learning_rate": 9.429362961221904e-06, "loss": 17.6326, "step": 9791 }, { "epoch": 0.17898989160436507, "grad_norm": 8.847278040251075, "learning_rate": 9.429225625239906e-06, "loss": 18.5878, "step": 9792 }, { "epoch": 0.1790081708008116, "grad_norm": 10.696567532179113, "learning_rate": 9.429088273733855e-06, "loss": 18.5805, "step": 9793 }, { "epoch": 0.1790264499972581, "grad_norm": 6.926405386042904, "learning_rate": 9.428950906704234e-06, "loss": 17.7401, "step": 9794 }, { "epoch": 0.17904472919370465, "grad_norm": 6.489936352810286, "learning_rate": 9.428813524151525e-06, "loss": 17.5293, "step": 9795 }, { "epoch": 0.17906300839015116, "grad_norm": 6.964940518291793, "learning_rate": 9.428676126076208e-06, "loss": 17.8764, "step": 9796 }, { "epoch": 0.1790812875865977, "grad_norm": 7.295689182900891, "learning_rate": 9.428538712478767e-06, "loss": 17.7729, "step": 9797 }, { "epoch": 0.17909956678304423, "grad_norm": 7.437427660905084, "learning_rate": 9.428401283359682e-06, "loss": 17.875, "step": 9798 }, { "epoch": 0.17911784597949074, "grad_norm": 7.679807104368676, "learning_rate": 9.428263838719434e-06, "loss": 17.7642, "step": 9799 }, { "epoch": 0.17913612517593727, "grad_norm": 6.1585842593847895, "learning_rate": 9.428126378558506e-06, "loss": 17.3325, "step": 9800 }, { "epoch": 0.17915440437238378, "grad_norm": 7.7743560244330405, "learning_rate": 9.427988902877378e-06, "loss": 17.3411, "step": 9801 }, { "epoch": 0.17917268356883032, "grad_norm": 6.914870919299745, "learning_rate": 9.427851411676535e-06, "loss": 17.7374, "step": 9802 }, { "epoch": 0.17919096276527685, "grad_norm": 7.269947505575387, "learning_rate": 9.427713904956455e-06, "loss": 17.6458, "step": 9803 }, { "epoch": 0.17920924196172336, "grad_norm": 7.7465526450080375, "learning_rate": 9.427576382717624e-06, "loss": 18.127, "step": 9804 }, { "epoch": 0.1792275211581699, "grad_norm": 7.438328267476282, "learning_rate": 9.427438844960521e-06, "loss": 17.9759, "step": 9805 }, { "epoch": 0.1792458003546164, "grad_norm": 6.431880029486637, "learning_rate": 9.42730129168563e-06, "loss": 17.5455, "step": 9806 }, { "epoch": 0.17926407955106294, "grad_norm": 7.497073928030626, "learning_rate": 9.42716372289343e-06, "loss": 17.7965, "step": 9807 }, { "epoch": 0.17928235874750945, "grad_norm": 6.6020967923882345, "learning_rate": 9.427026138584408e-06, "loss": 17.5642, "step": 9808 }, { "epoch": 0.17930063794395598, "grad_norm": 7.76060879963446, "learning_rate": 9.426888538759042e-06, "loss": 17.9669, "step": 9809 }, { "epoch": 0.17931891714040252, "grad_norm": 7.807309750568793, "learning_rate": 9.426750923417815e-06, "loss": 18.3616, "step": 9810 }, { "epoch": 0.17933719633684903, "grad_norm": 7.828851331167055, "learning_rate": 9.42661329256121e-06, "loss": 18.0805, "step": 9811 }, { "epoch": 0.17935547553329556, "grad_norm": 6.59582425984953, "learning_rate": 9.426475646189713e-06, "loss": 17.4784, "step": 9812 }, { "epoch": 0.17937375472974207, "grad_norm": 7.099462090867203, "learning_rate": 9.426337984303799e-06, "loss": 17.8802, "step": 9813 }, { "epoch": 0.1793920339261886, "grad_norm": 8.122302428401728, "learning_rate": 9.426200306903957e-06, "loss": 18.4685, "step": 9814 }, { "epoch": 0.17941031312263514, "grad_norm": 6.179400084084576, "learning_rate": 9.426062613990667e-06, "loss": 17.3515, "step": 9815 }, { "epoch": 0.17942859231908165, "grad_norm": 7.911103376841772, "learning_rate": 9.42592490556441e-06, "loss": 18.0463, "step": 9816 }, { "epoch": 0.17944687151552818, "grad_norm": 7.205215188326355, "learning_rate": 9.425787181625671e-06, "loss": 17.8049, "step": 9817 }, { "epoch": 0.1794651507119747, "grad_norm": 6.062865053906306, "learning_rate": 9.425649442174933e-06, "loss": 17.1905, "step": 9818 }, { "epoch": 0.17948342990842123, "grad_norm": 7.380725263172451, "learning_rate": 9.425511687212677e-06, "loss": 18.1633, "step": 9819 }, { "epoch": 0.17950170910486776, "grad_norm": 6.652388341966013, "learning_rate": 9.425373916739384e-06, "loss": 17.4928, "step": 9820 }, { "epoch": 0.17951998830131427, "grad_norm": 6.9587652781673555, "learning_rate": 9.425236130755544e-06, "loss": 17.7109, "step": 9821 }, { "epoch": 0.1795382674977608, "grad_norm": 6.35310895058361, "learning_rate": 9.425098329261632e-06, "loss": 17.3162, "step": 9822 }, { "epoch": 0.17955654669420731, "grad_norm": 7.297330906963598, "learning_rate": 9.424960512258136e-06, "loss": 18.0997, "step": 9823 }, { "epoch": 0.17957482589065385, "grad_norm": 6.775515192904784, "learning_rate": 9.424822679745536e-06, "loss": 17.8178, "step": 9824 }, { "epoch": 0.17959310508710036, "grad_norm": 9.758082204390167, "learning_rate": 9.424684831724318e-06, "loss": 18.8286, "step": 9825 }, { "epoch": 0.1796113842835469, "grad_norm": 7.322342902333123, "learning_rate": 9.424546968194963e-06, "loss": 17.5736, "step": 9826 }, { "epoch": 0.17962966347999343, "grad_norm": 5.823542098051173, "learning_rate": 9.424409089157955e-06, "loss": 17.1623, "step": 9827 }, { "epoch": 0.17964794267643994, "grad_norm": 7.162051981471785, "learning_rate": 9.424271194613776e-06, "loss": 17.82, "step": 9828 }, { "epoch": 0.17966622187288647, "grad_norm": 6.903782555726466, "learning_rate": 9.424133284562911e-06, "loss": 17.8435, "step": 9829 }, { "epoch": 0.17968450106933298, "grad_norm": 7.52178146204798, "learning_rate": 9.423995359005844e-06, "loss": 17.5393, "step": 9830 }, { "epoch": 0.17970278026577952, "grad_norm": 6.0192047068100765, "learning_rate": 9.423857417943057e-06, "loss": 17.2282, "step": 9831 }, { "epoch": 0.17972105946222605, "grad_norm": 6.631329068271743, "learning_rate": 9.423719461375031e-06, "loss": 17.7655, "step": 9832 }, { "epoch": 0.17973933865867256, "grad_norm": 5.770385109698054, "learning_rate": 9.423581489302255e-06, "loss": 17.3378, "step": 9833 }, { "epoch": 0.1797576178551191, "grad_norm": 6.433071754790578, "learning_rate": 9.423443501725209e-06, "loss": 17.3947, "step": 9834 }, { "epoch": 0.1797758970515656, "grad_norm": 7.123725461827745, "learning_rate": 9.423305498644376e-06, "loss": 17.3041, "step": 9835 }, { "epoch": 0.17979417624801214, "grad_norm": 6.843623403109756, "learning_rate": 9.423167480060242e-06, "loss": 17.6027, "step": 9836 }, { "epoch": 0.17981245544445867, "grad_norm": 6.312874700147576, "learning_rate": 9.423029445973291e-06, "loss": 17.2771, "step": 9837 }, { "epoch": 0.17983073464090518, "grad_norm": 6.790106535741972, "learning_rate": 9.422891396384004e-06, "loss": 17.3934, "step": 9838 }, { "epoch": 0.17984901383735172, "grad_norm": 6.414037164648971, "learning_rate": 9.422753331292867e-06, "loss": 17.6646, "step": 9839 }, { "epoch": 0.17986729303379823, "grad_norm": 7.2004252066756615, "learning_rate": 9.422615250700363e-06, "loss": 17.9489, "step": 9840 }, { "epoch": 0.17988557223024476, "grad_norm": 7.387366300328245, "learning_rate": 9.422477154606978e-06, "loss": 17.9723, "step": 9841 }, { "epoch": 0.17990385142669127, "grad_norm": 6.858661119487742, "learning_rate": 9.422339043013192e-06, "loss": 17.6167, "step": 9842 }, { "epoch": 0.1799221306231378, "grad_norm": 6.1255287752906336, "learning_rate": 9.422200915919493e-06, "loss": 17.2448, "step": 9843 }, { "epoch": 0.17994040981958434, "grad_norm": 6.7525066962578935, "learning_rate": 9.422062773326361e-06, "loss": 17.758, "step": 9844 }, { "epoch": 0.17995868901603085, "grad_norm": 7.1877365319482855, "learning_rate": 9.421924615234286e-06, "loss": 17.9013, "step": 9845 }, { "epoch": 0.17997696821247738, "grad_norm": 8.481912440458515, "learning_rate": 9.421786441643748e-06, "loss": 18.6036, "step": 9846 }, { "epoch": 0.1799952474089239, "grad_norm": 6.284005726091458, "learning_rate": 9.42164825255523e-06, "loss": 17.2517, "step": 9847 }, { "epoch": 0.18001352660537043, "grad_norm": 6.2068072418850395, "learning_rate": 9.421510047969223e-06, "loss": 17.4497, "step": 9848 }, { "epoch": 0.18003180580181696, "grad_norm": 6.416446149254776, "learning_rate": 9.421371827886203e-06, "loss": 17.3684, "step": 9849 }, { "epoch": 0.18005008499826347, "grad_norm": 6.15143758420953, "learning_rate": 9.42123359230666e-06, "loss": 17.264, "step": 9850 }, { "epoch": 0.18006836419471, "grad_norm": 7.432611704220462, "learning_rate": 9.421095341231077e-06, "loss": 17.6573, "step": 9851 }, { "epoch": 0.18008664339115651, "grad_norm": 6.965229914547005, "learning_rate": 9.420957074659938e-06, "loss": 18.2525, "step": 9852 }, { "epoch": 0.18010492258760305, "grad_norm": 6.079438368269971, "learning_rate": 9.420818792593729e-06, "loss": 17.3922, "step": 9853 }, { "epoch": 0.18012320178404959, "grad_norm": 7.949627215604142, "learning_rate": 9.420680495032932e-06, "loss": 18.1077, "step": 9854 }, { "epoch": 0.1801414809804961, "grad_norm": 6.430165678416542, "learning_rate": 9.420542181978034e-06, "loss": 17.4384, "step": 9855 }, { "epoch": 0.18015976017694263, "grad_norm": 8.106001315581224, "learning_rate": 9.42040385342952e-06, "loss": 18.1259, "step": 9856 }, { "epoch": 0.18017803937338914, "grad_norm": 6.931273081965224, "learning_rate": 9.420265509387874e-06, "loss": 17.6603, "step": 9857 }, { "epoch": 0.18019631856983567, "grad_norm": 7.860322040698806, "learning_rate": 9.420127149853581e-06, "loss": 17.6595, "step": 9858 }, { "epoch": 0.18021459776628218, "grad_norm": 7.522948316284845, "learning_rate": 9.419988774827126e-06, "loss": 17.9882, "step": 9859 }, { "epoch": 0.18023287696272872, "grad_norm": 6.3089213056183535, "learning_rate": 9.419850384308993e-06, "loss": 17.4897, "step": 9860 }, { "epoch": 0.18025115615917525, "grad_norm": 7.5995078159913225, "learning_rate": 9.419711978299668e-06, "loss": 17.979, "step": 9861 }, { "epoch": 0.18026943535562176, "grad_norm": 6.3541903338947066, "learning_rate": 9.419573556799637e-06, "loss": 17.4696, "step": 9862 }, { "epoch": 0.1802877145520683, "grad_norm": 6.597222063316642, "learning_rate": 9.419435119809384e-06, "loss": 17.7096, "step": 9863 }, { "epoch": 0.1803059937485148, "grad_norm": 6.657552047121865, "learning_rate": 9.419296667329394e-06, "loss": 17.4063, "step": 9864 }, { "epoch": 0.18032427294496134, "grad_norm": 6.959719643856069, "learning_rate": 9.419158199360153e-06, "loss": 17.5825, "step": 9865 }, { "epoch": 0.18034255214140787, "grad_norm": 6.589167108452301, "learning_rate": 9.419019715902146e-06, "loss": 17.6088, "step": 9866 }, { "epoch": 0.18036083133785438, "grad_norm": 6.289589791858059, "learning_rate": 9.418881216955858e-06, "loss": 17.3522, "step": 9867 }, { "epoch": 0.18037911053430092, "grad_norm": 6.72399211882093, "learning_rate": 9.418742702521774e-06, "loss": 17.2417, "step": 9868 }, { "epoch": 0.18039738973074743, "grad_norm": 7.565165408967926, "learning_rate": 9.418604172600382e-06, "loss": 17.848, "step": 9869 }, { "epoch": 0.18041566892719396, "grad_norm": 5.029258380477208, "learning_rate": 9.418465627192165e-06, "loss": 16.9919, "step": 9870 }, { "epoch": 0.1804339481236405, "grad_norm": 6.991090099572182, "learning_rate": 9.41832706629761e-06, "loss": 17.768, "step": 9871 }, { "epoch": 0.180452227320087, "grad_norm": 6.551253626491615, "learning_rate": 9.418188489917202e-06, "loss": 17.4717, "step": 9872 }, { "epoch": 0.18047050651653354, "grad_norm": 5.915775501322046, "learning_rate": 9.418049898051425e-06, "loss": 17.1038, "step": 9873 }, { "epoch": 0.18048878571298005, "grad_norm": 7.9635352828765225, "learning_rate": 9.417911290700767e-06, "loss": 18.2166, "step": 9874 }, { "epoch": 0.18050706490942658, "grad_norm": 7.690572860183641, "learning_rate": 9.417772667865714e-06, "loss": 18.1678, "step": 9875 }, { "epoch": 0.1805253441058731, "grad_norm": 6.588647620562926, "learning_rate": 9.417634029546751e-06, "loss": 17.3728, "step": 9876 }, { "epoch": 0.18054362330231963, "grad_norm": 6.904329940733219, "learning_rate": 9.417495375744365e-06, "loss": 17.7944, "step": 9877 }, { "epoch": 0.18056190249876616, "grad_norm": 5.317146962442455, "learning_rate": 9.41735670645904e-06, "loss": 16.975, "step": 9878 }, { "epoch": 0.18058018169521267, "grad_norm": 7.1684649269101195, "learning_rate": 9.417218021691263e-06, "loss": 17.7719, "step": 9879 }, { "epoch": 0.1805984608916592, "grad_norm": 5.861741382044178, "learning_rate": 9.417079321441522e-06, "loss": 17.4348, "step": 9880 }, { "epoch": 0.18061674008810572, "grad_norm": 7.671495340190093, "learning_rate": 9.416940605710298e-06, "loss": 18.0847, "step": 9881 }, { "epoch": 0.18063501928455225, "grad_norm": 7.011991807235905, "learning_rate": 9.416801874498082e-06, "loss": 17.7374, "step": 9882 }, { "epoch": 0.1806532984809988, "grad_norm": 6.787270191127173, "learning_rate": 9.41666312780536e-06, "loss": 17.9021, "step": 9883 }, { "epoch": 0.1806715776774453, "grad_norm": 7.500985617097195, "learning_rate": 9.416524365632615e-06, "loss": 18.2551, "step": 9884 }, { "epoch": 0.18068985687389183, "grad_norm": 6.540146133715169, "learning_rate": 9.416385587980337e-06, "loss": 17.3675, "step": 9885 }, { "epoch": 0.18070813607033834, "grad_norm": 6.088448734189069, "learning_rate": 9.41624679484901e-06, "loss": 17.3463, "step": 9886 }, { "epoch": 0.18072641526678487, "grad_norm": 7.054512600951021, "learning_rate": 9.416107986239121e-06, "loss": 17.8378, "step": 9887 }, { "epoch": 0.1807446944632314, "grad_norm": 6.275529072292613, "learning_rate": 9.415969162151157e-06, "loss": 17.5825, "step": 9888 }, { "epoch": 0.18076297365967792, "grad_norm": 5.658267038377804, "learning_rate": 9.415830322585604e-06, "loss": 17.0669, "step": 9889 }, { "epoch": 0.18078125285612445, "grad_norm": 8.489460603910983, "learning_rate": 9.415691467542948e-06, "loss": 18.1464, "step": 9890 }, { "epoch": 0.18079953205257096, "grad_norm": 6.28354033344867, "learning_rate": 9.415552597023679e-06, "loss": 17.5069, "step": 9891 }, { "epoch": 0.1808178112490175, "grad_norm": 8.075302061599936, "learning_rate": 9.41541371102828e-06, "loss": 17.9517, "step": 9892 }, { "epoch": 0.180836090445464, "grad_norm": 6.797481229337563, "learning_rate": 9.41527480955724e-06, "loss": 17.6253, "step": 9893 }, { "epoch": 0.18085436964191054, "grad_norm": 6.691582855088232, "learning_rate": 9.415135892611043e-06, "loss": 17.5782, "step": 9894 }, { "epoch": 0.18087264883835708, "grad_norm": 8.017505693934982, "learning_rate": 9.414996960190179e-06, "loss": 18.2248, "step": 9895 }, { "epoch": 0.18089092803480358, "grad_norm": 8.452980092894855, "learning_rate": 9.414858012295134e-06, "loss": 18.584, "step": 9896 }, { "epoch": 0.18090920723125012, "grad_norm": 6.917700380185664, "learning_rate": 9.414719048926393e-06, "loss": 17.5312, "step": 9897 }, { "epoch": 0.18092748642769663, "grad_norm": 6.910260643563813, "learning_rate": 9.414580070084446e-06, "loss": 17.5055, "step": 9898 }, { "epoch": 0.18094576562414316, "grad_norm": 10.117433650943333, "learning_rate": 9.41444107576978e-06, "loss": 18.4476, "step": 9899 }, { "epoch": 0.1809640448205897, "grad_norm": 7.18260649555454, "learning_rate": 9.414302065982882e-06, "loss": 17.9295, "step": 9900 }, { "epoch": 0.1809823240170362, "grad_norm": 6.440786598596819, "learning_rate": 9.414163040724235e-06, "loss": 17.7439, "step": 9901 }, { "epoch": 0.18100060321348274, "grad_norm": 7.063733425870379, "learning_rate": 9.414023999994332e-06, "loss": 17.7516, "step": 9902 }, { "epoch": 0.18101888240992925, "grad_norm": 9.131458627224548, "learning_rate": 9.413884943793657e-06, "loss": 18.7895, "step": 9903 }, { "epoch": 0.18103716160637578, "grad_norm": 6.618670036736652, "learning_rate": 9.413745872122698e-06, "loss": 17.6756, "step": 9904 }, { "epoch": 0.18105544080282232, "grad_norm": 7.358234377721218, "learning_rate": 9.413606784981943e-06, "loss": 17.6505, "step": 9905 }, { "epoch": 0.18107371999926883, "grad_norm": 6.151961923717866, "learning_rate": 9.413467682371879e-06, "loss": 17.4507, "step": 9906 }, { "epoch": 0.18109199919571536, "grad_norm": 7.212741272650908, "learning_rate": 9.413328564292994e-06, "loss": 17.6875, "step": 9907 }, { "epoch": 0.18111027839216187, "grad_norm": 8.969986783784599, "learning_rate": 9.413189430745776e-06, "loss": 17.6648, "step": 9908 }, { "epoch": 0.1811285575886084, "grad_norm": 8.318671839400224, "learning_rate": 9.413050281730712e-06, "loss": 18.4372, "step": 9909 }, { "epoch": 0.18114683678505492, "grad_norm": 7.071225881915424, "learning_rate": 9.412911117248289e-06, "loss": 17.5117, "step": 9910 }, { "epoch": 0.18116511598150145, "grad_norm": 6.090548068212628, "learning_rate": 9.412771937298995e-06, "loss": 17.4232, "step": 9911 }, { "epoch": 0.181183395177948, "grad_norm": 7.119644974550266, "learning_rate": 9.412632741883319e-06, "loss": 17.7013, "step": 9912 }, { "epoch": 0.1812016743743945, "grad_norm": 7.907039076382293, "learning_rate": 9.412493531001747e-06, "loss": 18.181, "step": 9913 }, { "epoch": 0.18121995357084103, "grad_norm": 7.043056196251649, "learning_rate": 9.41235430465477e-06, "loss": 17.7467, "step": 9914 }, { "epoch": 0.18123823276728754, "grad_norm": 7.077150538842783, "learning_rate": 9.412215062842872e-06, "loss": 17.6708, "step": 9915 }, { "epoch": 0.18125651196373407, "grad_norm": 9.17201448218564, "learning_rate": 9.412075805566545e-06, "loss": 18.8287, "step": 9916 }, { "epoch": 0.1812747911601806, "grad_norm": 5.606994231286592, "learning_rate": 9.411936532826274e-06, "loss": 17.4046, "step": 9917 }, { "epoch": 0.18129307035662712, "grad_norm": 6.554508488550973, "learning_rate": 9.41179724462255e-06, "loss": 17.5461, "step": 9918 }, { "epoch": 0.18131134955307365, "grad_norm": 7.503180711274367, "learning_rate": 9.411657940955858e-06, "loss": 17.7825, "step": 9919 }, { "epoch": 0.18132962874952016, "grad_norm": 6.995120372790944, "learning_rate": 9.411518621826687e-06, "loss": 17.9555, "step": 9920 }, { "epoch": 0.1813479079459667, "grad_norm": 7.139291895107338, "learning_rate": 9.411379287235527e-06, "loss": 17.5175, "step": 9921 }, { "epoch": 0.18136618714241323, "grad_norm": 7.344403626673978, "learning_rate": 9.411239937182866e-06, "loss": 17.9815, "step": 9922 }, { "epoch": 0.18138446633885974, "grad_norm": 6.886892858166152, "learning_rate": 9.411100571669192e-06, "loss": 17.9211, "step": 9923 }, { "epoch": 0.18140274553530628, "grad_norm": 6.923457077091651, "learning_rate": 9.41096119069499e-06, "loss": 18.0434, "step": 9924 }, { "epoch": 0.18142102473175278, "grad_norm": 7.035732712622207, "learning_rate": 9.410821794260756e-06, "loss": 17.9492, "step": 9925 }, { "epoch": 0.18143930392819932, "grad_norm": 7.3410971156511415, "learning_rate": 9.410682382366973e-06, "loss": 17.9117, "step": 9926 }, { "epoch": 0.18145758312464583, "grad_norm": 7.014893919851392, "learning_rate": 9.410542955014131e-06, "loss": 17.8421, "step": 9927 }, { "epoch": 0.18147586232109236, "grad_norm": 8.667741479907686, "learning_rate": 9.410403512202718e-06, "loss": 18.7418, "step": 9928 }, { "epoch": 0.1814941415175389, "grad_norm": 7.676598377016749, "learning_rate": 9.410264053933222e-06, "loss": 17.7935, "step": 9929 }, { "epoch": 0.1815124207139854, "grad_norm": 9.793170691535114, "learning_rate": 9.410124580206136e-06, "loss": 17.8563, "step": 9930 }, { "epoch": 0.18153069991043194, "grad_norm": 6.292817351753322, "learning_rate": 9.409985091021944e-06, "loss": 17.4834, "step": 9931 }, { "epoch": 0.18154897910687845, "grad_norm": 5.9271806986499005, "learning_rate": 9.409845586381139e-06, "loss": 17.2664, "step": 9932 }, { "epoch": 0.18156725830332499, "grad_norm": 6.017048522299535, "learning_rate": 9.409706066284206e-06, "loss": 17.3449, "step": 9933 }, { "epoch": 0.18158553749977152, "grad_norm": 6.069216586223563, "learning_rate": 9.409566530731638e-06, "loss": 17.3073, "step": 9934 }, { "epoch": 0.18160381669621803, "grad_norm": 6.3845350850509615, "learning_rate": 9.409426979723919e-06, "loss": 17.4427, "step": 9935 }, { "epoch": 0.18162209589266456, "grad_norm": 5.896983928031587, "learning_rate": 9.409287413261543e-06, "loss": 17.2706, "step": 9936 }, { "epoch": 0.18164037508911107, "grad_norm": 6.096302848327695, "learning_rate": 9.409147831344997e-06, "loss": 17.468, "step": 9937 }, { "epoch": 0.1816586542855576, "grad_norm": 7.979016556832373, "learning_rate": 9.40900823397477e-06, "loss": 18.0505, "step": 9938 }, { "epoch": 0.18167693348200414, "grad_norm": 7.068405087083261, "learning_rate": 9.408868621151352e-06, "loss": 17.8445, "step": 9939 }, { "epoch": 0.18169521267845065, "grad_norm": 6.774797892285199, "learning_rate": 9.408728992875233e-06, "loss": 17.6941, "step": 9940 }, { "epoch": 0.1817134918748972, "grad_norm": 6.888916109967754, "learning_rate": 9.408589349146901e-06, "loss": 17.7118, "step": 9941 }, { "epoch": 0.1817317710713437, "grad_norm": 6.737448231143625, "learning_rate": 9.408449689966845e-06, "loss": 17.4894, "step": 9942 }, { "epoch": 0.18175005026779023, "grad_norm": 6.93402269608677, "learning_rate": 9.408310015335555e-06, "loss": 17.6713, "step": 9943 }, { "epoch": 0.18176832946423674, "grad_norm": 7.468910853899415, "learning_rate": 9.408170325253524e-06, "loss": 17.9388, "step": 9944 }, { "epoch": 0.18178660866068327, "grad_norm": 6.567975554918502, "learning_rate": 9.408030619721235e-06, "loss": 17.6042, "step": 9945 }, { "epoch": 0.1818048878571298, "grad_norm": 7.991804699293005, "learning_rate": 9.407890898739182e-06, "loss": 18.2704, "step": 9946 }, { "epoch": 0.18182316705357632, "grad_norm": 6.919307371545031, "learning_rate": 9.407751162307855e-06, "loss": 17.7989, "step": 9947 }, { "epoch": 0.18184144625002285, "grad_norm": 6.082884753677575, "learning_rate": 9.407611410427742e-06, "loss": 17.2717, "step": 9948 }, { "epoch": 0.18185972544646936, "grad_norm": 7.396876469407254, "learning_rate": 9.407471643099333e-06, "loss": 17.7669, "step": 9949 }, { "epoch": 0.1818780046429159, "grad_norm": 6.193139067930993, "learning_rate": 9.407331860323118e-06, "loss": 17.1319, "step": 9950 }, { "epoch": 0.18189628383936243, "grad_norm": 7.05041214785699, "learning_rate": 9.407192062099589e-06, "loss": 17.6034, "step": 9951 }, { "epoch": 0.18191456303580894, "grad_norm": 7.293537176686419, "learning_rate": 9.407052248429234e-06, "loss": 17.9462, "step": 9952 }, { "epoch": 0.18193284223225548, "grad_norm": 5.572292741725751, "learning_rate": 9.406912419312543e-06, "loss": 17.0839, "step": 9953 }, { "epoch": 0.18195112142870198, "grad_norm": 5.805385138851213, "learning_rate": 9.406772574750006e-06, "loss": 17.2494, "step": 9954 }, { "epoch": 0.18196940062514852, "grad_norm": 7.218236653398646, "learning_rate": 9.406632714742115e-06, "loss": 17.6228, "step": 9955 }, { "epoch": 0.18198767982159506, "grad_norm": 5.406273098967501, "learning_rate": 9.406492839289355e-06, "loss": 17.088, "step": 9956 }, { "epoch": 0.18200595901804156, "grad_norm": 8.057469465486134, "learning_rate": 9.406352948392224e-06, "loss": 18.4128, "step": 9957 }, { "epoch": 0.1820242382144881, "grad_norm": 8.223652800163661, "learning_rate": 9.406213042051207e-06, "loss": 17.7673, "step": 9958 }, { "epoch": 0.1820425174109346, "grad_norm": 6.334895683420072, "learning_rate": 9.406073120266794e-06, "loss": 17.4154, "step": 9959 }, { "epoch": 0.18206079660738114, "grad_norm": 7.323503860436332, "learning_rate": 9.405933183039479e-06, "loss": 18.0966, "step": 9960 }, { "epoch": 0.18207907580382765, "grad_norm": 6.70794441563754, "learning_rate": 9.40579323036975e-06, "loss": 17.5014, "step": 9961 }, { "epoch": 0.18209735500027419, "grad_norm": 7.085619004783449, "learning_rate": 9.405653262258097e-06, "loss": 17.9702, "step": 9962 }, { "epoch": 0.18211563419672072, "grad_norm": 7.806110273938615, "learning_rate": 9.405513278705013e-06, "loss": 18.001, "step": 9963 }, { "epoch": 0.18213391339316723, "grad_norm": 6.311410983703041, "learning_rate": 9.405373279710988e-06, "loss": 17.3548, "step": 9964 }, { "epoch": 0.18215219258961377, "grad_norm": 7.53607579749945, "learning_rate": 9.40523326527651e-06, "loss": 17.7716, "step": 9965 }, { "epoch": 0.18217047178606027, "grad_norm": 7.118059738999945, "learning_rate": 9.405093235402072e-06, "loss": 17.9583, "step": 9966 }, { "epoch": 0.1821887509825068, "grad_norm": 6.554212662623746, "learning_rate": 9.404953190088165e-06, "loss": 17.8426, "step": 9967 }, { "epoch": 0.18220703017895334, "grad_norm": 5.7344565315641916, "learning_rate": 9.40481312933528e-06, "loss": 17.0365, "step": 9968 }, { "epoch": 0.18222530937539985, "grad_norm": 7.77142609011555, "learning_rate": 9.404673053143905e-06, "loss": 17.7052, "step": 9969 }, { "epoch": 0.1822435885718464, "grad_norm": 6.707515501917969, "learning_rate": 9.404532961514536e-06, "loss": 17.8102, "step": 9970 }, { "epoch": 0.1822618677682929, "grad_norm": 5.616284311175807, "learning_rate": 9.40439285444766e-06, "loss": 17.2327, "step": 9971 }, { "epoch": 0.18228014696473943, "grad_norm": 7.549665088110754, "learning_rate": 9.404252731943768e-06, "loss": 17.9735, "step": 9972 }, { "epoch": 0.18229842616118597, "grad_norm": 8.599801538164002, "learning_rate": 9.404112594003353e-06, "loss": 18.1074, "step": 9973 }, { "epoch": 0.18231670535763247, "grad_norm": 7.606886871850189, "learning_rate": 9.403972440626907e-06, "loss": 18.0169, "step": 9974 }, { "epoch": 0.182334984554079, "grad_norm": 6.391215479650936, "learning_rate": 9.403832271814918e-06, "loss": 17.5384, "step": 9975 }, { "epoch": 0.18235326375052552, "grad_norm": 5.869188700593644, "learning_rate": 9.40369208756788e-06, "loss": 17.2025, "step": 9976 }, { "epoch": 0.18237154294697205, "grad_norm": 7.249672643884424, "learning_rate": 9.403551887886282e-06, "loss": 17.8932, "step": 9977 }, { "epoch": 0.18238982214341856, "grad_norm": 7.626645051386975, "learning_rate": 9.403411672770618e-06, "loss": 17.8717, "step": 9978 }, { "epoch": 0.1824081013398651, "grad_norm": 7.5216968414726955, "learning_rate": 9.403271442221378e-06, "loss": 18.1983, "step": 9979 }, { "epoch": 0.18242638053631163, "grad_norm": 6.94418319476342, "learning_rate": 9.403131196239053e-06, "loss": 17.8647, "step": 9980 }, { "epoch": 0.18244465973275814, "grad_norm": 7.65154381549128, "learning_rate": 9.402990934824137e-06, "loss": 17.8206, "step": 9981 }, { "epoch": 0.18246293892920468, "grad_norm": 7.73998953084367, "learning_rate": 9.402850657977119e-06, "loss": 17.909, "step": 9982 }, { "epoch": 0.18248121812565118, "grad_norm": 7.880188873447152, "learning_rate": 9.40271036569849e-06, "loss": 17.5407, "step": 9983 }, { "epoch": 0.18249949732209772, "grad_norm": 6.521102938355329, "learning_rate": 9.402570057988746e-06, "loss": 17.4695, "step": 9984 }, { "epoch": 0.18251777651854426, "grad_norm": 7.2910739138281295, "learning_rate": 9.402429734848374e-06, "loss": 17.7697, "step": 9985 }, { "epoch": 0.18253605571499076, "grad_norm": 7.031356327043205, "learning_rate": 9.402289396277869e-06, "loss": 17.8313, "step": 9986 }, { "epoch": 0.1825543349114373, "grad_norm": 7.243873441445957, "learning_rate": 9.40214904227772e-06, "loss": 17.6849, "step": 9987 }, { "epoch": 0.1825726141078838, "grad_norm": 7.303002672344109, "learning_rate": 9.402008672848422e-06, "loss": 17.6079, "step": 9988 }, { "epoch": 0.18259089330433034, "grad_norm": 5.444014869557507, "learning_rate": 9.401868287990465e-06, "loss": 17.0645, "step": 9989 }, { "epoch": 0.18260917250077688, "grad_norm": 7.635553170981905, "learning_rate": 9.401727887704341e-06, "loss": 18.2581, "step": 9990 }, { "epoch": 0.1826274516972234, "grad_norm": 6.646319730785736, "learning_rate": 9.401587471990544e-06, "loss": 17.7106, "step": 9991 }, { "epoch": 0.18264573089366992, "grad_norm": 6.3913214185636855, "learning_rate": 9.401447040849565e-06, "loss": 17.4812, "step": 9992 }, { "epoch": 0.18266401009011643, "grad_norm": 6.793394615008496, "learning_rate": 9.401306594281896e-06, "loss": 17.6085, "step": 9993 }, { "epoch": 0.18268228928656297, "grad_norm": 5.887989498671422, "learning_rate": 9.401166132288028e-06, "loss": 17.2802, "step": 9994 }, { "epoch": 0.18270056848300947, "grad_norm": 6.55002944821401, "learning_rate": 9.401025654868455e-06, "loss": 17.7171, "step": 9995 }, { "epoch": 0.182718847679456, "grad_norm": 7.108601306973718, "learning_rate": 9.40088516202367e-06, "loss": 18.119, "step": 9996 }, { "epoch": 0.18273712687590254, "grad_norm": 7.148578925418961, "learning_rate": 9.400744653754164e-06, "loss": 17.9143, "step": 9997 }, { "epoch": 0.18275540607234905, "grad_norm": 7.107843541409638, "learning_rate": 9.400604130060429e-06, "loss": 17.6363, "step": 9998 }, { "epoch": 0.1827736852687956, "grad_norm": 6.141869069173877, "learning_rate": 9.400463590942959e-06, "loss": 17.3609, "step": 9999 }, { "epoch": 0.1827919644652421, "grad_norm": 6.372164605878134, "learning_rate": 9.400323036402246e-06, "loss": 17.2801, "step": 10000 }, { "epoch": 0.18281024366168863, "grad_norm": 7.07052019562697, "learning_rate": 9.400182466438783e-06, "loss": 17.6534, "step": 10001 }, { "epoch": 0.18282852285813517, "grad_norm": 6.7555910254831915, "learning_rate": 9.400041881053062e-06, "loss": 17.7892, "step": 10002 }, { "epoch": 0.18284680205458168, "grad_norm": 5.703967614405472, "learning_rate": 9.399901280245576e-06, "loss": 17.1553, "step": 10003 }, { "epoch": 0.1828650812510282, "grad_norm": 7.350236892808791, "learning_rate": 9.399760664016817e-06, "loss": 17.7499, "step": 10004 }, { "epoch": 0.18288336044747472, "grad_norm": 9.360314148330353, "learning_rate": 9.399620032367279e-06, "loss": 18.4034, "step": 10005 }, { "epoch": 0.18290163964392125, "grad_norm": 6.742925435216869, "learning_rate": 9.399479385297456e-06, "loss": 17.5546, "step": 10006 }, { "epoch": 0.1829199188403678, "grad_norm": 7.9932538496041, "learning_rate": 9.399338722807838e-06, "loss": 18.0338, "step": 10007 }, { "epoch": 0.1829381980368143, "grad_norm": 5.844579655978202, "learning_rate": 9.39919804489892e-06, "loss": 17.2263, "step": 10008 }, { "epoch": 0.18295647723326083, "grad_norm": 7.339076586709035, "learning_rate": 9.399057351571194e-06, "loss": 17.9018, "step": 10009 }, { "epoch": 0.18297475642970734, "grad_norm": 6.562142229656559, "learning_rate": 9.398916642825155e-06, "loss": 17.3051, "step": 10010 }, { "epoch": 0.18299303562615388, "grad_norm": 5.616188088521416, "learning_rate": 9.398775918661295e-06, "loss": 17.1079, "step": 10011 }, { "epoch": 0.18301131482260038, "grad_norm": 6.6211991045661165, "learning_rate": 9.398635179080105e-06, "loss": 17.5124, "step": 10012 }, { "epoch": 0.18302959401904692, "grad_norm": 6.806000524611527, "learning_rate": 9.398494424082082e-06, "loss": 17.4438, "step": 10013 }, { "epoch": 0.18304787321549346, "grad_norm": 7.556791874915172, "learning_rate": 9.398353653667719e-06, "loss": 17.9809, "step": 10014 }, { "epoch": 0.18306615241193996, "grad_norm": 6.576924936052816, "learning_rate": 9.398212867837505e-06, "loss": 17.6573, "step": 10015 }, { "epoch": 0.1830844316083865, "grad_norm": 6.834198367818092, "learning_rate": 9.398072066591937e-06, "loss": 17.6258, "step": 10016 }, { "epoch": 0.183102710804833, "grad_norm": 6.899273350066197, "learning_rate": 9.39793124993151e-06, "loss": 17.6503, "step": 10017 }, { "epoch": 0.18312099000127954, "grad_norm": 7.19435807009442, "learning_rate": 9.397790417856714e-06, "loss": 17.7391, "step": 10018 }, { "epoch": 0.18313926919772608, "grad_norm": 5.963027513530443, "learning_rate": 9.397649570368046e-06, "loss": 17.2179, "step": 10019 }, { "epoch": 0.1831575483941726, "grad_norm": 6.606682036623378, "learning_rate": 9.397508707465997e-06, "loss": 17.327, "step": 10020 }, { "epoch": 0.18317582759061912, "grad_norm": 8.032489710799243, "learning_rate": 9.39736782915106e-06, "loss": 17.8206, "step": 10021 }, { "epoch": 0.18319410678706563, "grad_norm": 5.835923644342947, "learning_rate": 9.397226935423734e-06, "loss": 17.0689, "step": 10022 }, { "epoch": 0.18321238598351217, "grad_norm": 6.729184548387186, "learning_rate": 9.397086026284505e-06, "loss": 17.6471, "step": 10023 }, { "epoch": 0.1832306651799587, "grad_norm": 7.4706077757716285, "learning_rate": 9.396945101733874e-06, "loss": 17.8337, "step": 10024 }, { "epoch": 0.1832489443764052, "grad_norm": 9.125053382198983, "learning_rate": 9.396804161772331e-06, "loss": 17.1154, "step": 10025 }, { "epoch": 0.18326722357285175, "grad_norm": 6.067168561109368, "learning_rate": 9.396663206400372e-06, "loss": 17.4188, "step": 10026 }, { "epoch": 0.18328550276929825, "grad_norm": 5.82922530940505, "learning_rate": 9.396522235618488e-06, "loss": 17.2414, "step": 10027 }, { "epoch": 0.1833037819657448, "grad_norm": 6.557609650978757, "learning_rate": 9.396381249427176e-06, "loss": 17.5663, "step": 10028 }, { "epoch": 0.1833220611621913, "grad_norm": 7.821631448805034, "learning_rate": 9.396240247826929e-06, "loss": 17.898, "step": 10029 }, { "epoch": 0.18334034035863783, "grad_norm": 6.802719553007564, "learning_rate": 9.39609923081824e-06, "loss": 17.6314, "step": 10030 }, { "epoch": 0.18335861955508437, "grad_norm": 6.593847507280106, "learning_rate": 9.395958198401608e-06, "loss": 18.0311, "step": 10031 }, { "epoch": 0.18337689875153088, "grad_norm": 6.582581645853874, "learning_rate": 9.395817150577522e-06, "loss": 17.6809, "step": 10032 }, { "epoch": 0.1833951779479774, "grad_norm": 6.975542477321055, "learning_rate": 9.395676087346478e-06, "loss": 18.0356, "step": 10033 }, { "epoch": 0.18341345714442392, "grad_norm": 7.001343203133638, "learning_rate": 9.395535008708972e-06, "loss": 17.8048, "step": 10034 }, { "epoch": 0.18343173634087045, "grad_norm": 7.530149791963838, "learning_rate": 9.395393914665496e-06, "loss": 18.0788, "step": 10035 }, { "epoch": 0.183450015537317, "grad_norm": 11.947231797910732, "learning_rate": 9.395252805216545e-06, "loss": 18.8291, "step": 10036 }, { "epoch": 0.1834682947337635, "grad_norm": 8.45783714915619, "learning_rate": 9.395111680362616e-06, "loss": 18.9009, "step": 10037 }, { "epoch": 0.18348657393021003, "grad_norm": 6.250795698995749, "learning_rate": 9.394970540104203e-06, "loss": 17.2324, "step": 10038 }, { "epoch": 0.18350485312665654, "grad_norm": 6.887931607530826, "learning_rate": 9.394829384441796e-06, "loss": 17.6447, "step": 10039 }, { "epoch": 0.18352313232310308, "grad_norm": 6.881663556628881, "learning_rate": 9.394688213375897e-06, "loss": 17.4677, "step": 10040 }, { "epoch": 0.1835414115195496, "grad_norm": 7.094733645881841, "learning_rate": 9.394547026906996e-06, "loss": 17.7156, "step": 10041 }, { "epoch": 0.18355969071599612, "grad_norm": 6.721369910362387, "learning_rate": 9.394405825035588e-06, "loss": 17.5653, "step": 10042 }, { "epoch": 0.18357796991244266, "grad_norm": 7.905659665079018, "learning_rate": 9.394264607762171e-06, "loss": 18.1893, "step": 10043 }, { "epoch": 0.18359624910888916, "grad_norm": 8.182496878526997, "learning_rate": 9.394123375087236e-06, "loss": 18.1963, "step": 10044 }, { "epoch": 0.1836145283053357, "grad_norm": 5.924467389440648, "learning_rate": 9.39398212701128e-06, "loss": 17.3593, "step": 10045 }, { "epoch": 0.1836328075017822, "grad_norm": 7.158563543479542, "learning_rate": 9.393840863534798e-06, "loss": 17.7377, "step": 10046 }, { "epoch": 0.18365108669822874, "grad_norm": 6.6072075670565855, "learning_rate": 9.393699584658287e-06, "loss": 17.5862, "step": 10047 }, { "epoch": 0.18366936589467528, "grad_norm": 6.610929634307579, "learning_rate": 9.393558290382238e-06, "loss": 17.4893, "step": 10048 }, { "epoch": 0.1836876450911218, "grad_norm": 7.360027450851649, "learning_rate": 9.393416980707148e-06, "loss": 17.7952, "step": 10049 }, { "epoch": 0.18370592428756832, "grad_norm": 5.487437849285093, "learning_rate": 9.393275655633515e-06, "loss": 17.0889, "step": 10050 }, { "epoch": 0.18372420348401483, "grad_norm": 7.046362910788258, "learning_rate": 9.393134315161832e-06, "loss": 17.9629, "step": 10051 }, { "epoch": 0.18374248268046137, "grad_norm": 8.130850826922257, "learning_rate": 9.392992959292593e-06, "loss": 17.8732, "step": 10052 }, { "epoch": 0.1837607618769079, "grad_norm": 7.145303003288416, "learning_rate": 9.392851588026295e-06, "loss": 17.8343, "step": 10053 }, { "epoch": 0.1837790410733544, "grad_norm": 7.878470503338938, "learning_rate": 9.392710201363433e-06, "loss": 17.9847, "step": 10054 }, { "epoch": 0.18379732026980095, "grad_norm": 7.385497631069745, "learning_rate": 9.392568799304504e-06, "loss": 18.0775, "step": 10055 }, { "epoch": 0.18381559946624745, "grad_norm": 7.56836478360785, "learning_rate": 9.392427381850002e-06, "loss": 18.3344, "step": 10056 }, { "epoch": 0.183833878662694, "grad_norm": 6.030781229462143, "learning_rate": 9.392285949000422e-06, "loss": 17.0676, "step": 10057 }, { "epoch": 0.18385215785914052, "grad_norm": 6.762860666840852, "learning_rate": 9.392144500756261e-06, "loss": 17.5847, "step": 10058 }, { "epoch": 0.18387043705558703, "grad_norm": 8.958143732662874, "learning_rate": 9.392003037118018e-06, "loss": 18.4871, "step": 10059 }, { "epoch": 0.18388871625203357, "grad_norm": 7.639826919790793, "learning_rate": 9.391861558086183e-06, "loss": 18.2061, "step": 10060 }, { "epoch": 0.18390699544848008, "grad_norm": 5.734010063500707, "learning_rate": 9.391720063661253e-06, "loss": 17.0315, "step": 10061 }, { "epoch": 0.1839252746449266, "grad_norm": 8.518019074220268, "learning_rate": 9.391578553843727e-06, "loss": 17.5116, "step": 10062 }, { "epoch": 0.18394355384137312, "grad_norm": 5.843246554835414, "learning_rate": 9.3914370286341e-06, "loss": 17.3507, "step": 10063 }, { "epoch": 0.18396183303781966, "grad_norm": 5.959346547632752, "learning_rate": 9.391295488032866e-06, "loss": 17.3375, "step": 10064 }, { "epoch": 0.1839801122342662, "grad_norm": 8.566221840609426, "learning_rate": 9.391153932040524e-06, "loss": 18.4116, "step": 10065 }, { "epoch": 0.1839983914307127, "grad_norm": 7.258289236998518, "learning_rate": 9.391012360657567e-06, "loss": 17.9535, "step": 10066 }, { "epoch": 0.18401667062715923, "grad_norm": 8.74913373707712, "learning_rate": 9.390870773884493e-06, "loss": 18.2529, "step": 10067 }, { "epoch": 0.18403494982360574, "grad_norm": 6.276195917759392, "learning_rate": 9.390729171721797e-06, "loss": 17.5967, "step": 10068 }, { "epoch": 0.18405322902005228, "grad_norm": 6.043802023406444, "learning_rate": 9.390587554169978e-06, "loss": 17.4002, "step": 10069 }, { "epoch": 0.1840715082164988, "grad_norm": 7.056645386674894, "learning_rate": 9.390445921229529e-06, "loss": 17.681, "step": 10070 }, { "epoch": 0.18408978741294532, "grad_norm": 6.877355921850625, "learning_rate": 9.390304272900949e-06, "loss": 17.8717, "step": 10071 }, { "epoch": 0.18410806660939186, "grad_norm": 7.529191829145646, "learning_rate": 9.390162609184735e-06, "loss": 18.1924, "step": 10072 }, { "epoch": 0.18412634580583837, "grad_norm": 7.1874899146623905, "learning_rate": 9.390020930081378e-06, "loss": 17.9067, "step": 10073 }, { "epoch": 0.1841446250022849, "grad_norm": 4.955860626321352, "learning_rate": 9.389879235591381e-06, "loss": 16.8302, "step": 10074 }, { "epoch": 0.18416290419873144, "grad_norm": 7.089664707773817, "learning_rate": 9.38973752571524e-06, "loss": 17.7532, "step": 10075 }, { "epoch": 0.18418118339517794, "grad_norm": 6.880420490934243, "learning_rate": 9.389595800453447e-06, "loss": 17.7116, "step": 10076 }, { "epoch": 0.18419946259162448, "grad_norm": 7.391470423927875, "learning_rate": 9.389454059806502e-06, "loss": 18.0362, "step": 10077 }, { "epoch": 0.184217741788071, "grad_norm": 8.1552671824139, "learning_rate": 9.389312303774902e-06, "loss": 18.1661, "step": 10078 }, { "epoch": 0.18423602098451752, "grad_norm": 4.865258771724745, "learning_rate": 9.389170532359145e-06, "loss": 16.8387, "step": 10079 }, { "epoch": 0.18425430018096403, "grad_norm": 6.256518850905666, "learning_rate": 9.389028745559724e-06, "loss": 17.465, "step": 10080 }, { "epoch": 0.18427257937741057, "grad_norm": 7.334441424716792, "learning_rate": 9.388886943377139e-06, "loss": 17.9603, "step": 10081 }, { "epoch": 0.1842908585738571, "grad_norm": 6.080709347340776, "learning_rate": 9.388745125811884e-06, "loss": 17.4325, "step": 10082 }, { "epoch": 0.1843091377703036, "grad_norm": 7.740074002067427, "learning_rate": 9.38860329286446e-06, "loss": 17.9683, "step": 10083 }, { "epoch": 0.18432741696675015, "grad_norm": 6.758180192876033, "learning_rate": 9.388461444535364e-06, "loss": 17.5877, "step": 10084 }, { "epoch": 0.18434569616319665, "grad_norm": 7.360070317757277, "learning_rate": 9.38831958082509e-06, "loss": 18.0347, "step": 10085 }, { "epoch": 0.1843639753596432, "grad_norm": 6.233753484045297, "learning_rate": 9.388177701734135e-06, "loss": 17.5804, "step": 10086 }, { "epoch": 0.18438225455608973, "grad_norm": 7.317271295294663, "learning_rate": 9.388035807263e-06, "loss": 18.0553, "step": 10087 }, { "epoch": 0.18440053375253623, "grad_norm": 6.537674890608169, "learning_rate": 9.38789389741218e-06, "loss": 17.4096, "step": 10088 }, { "epoch": 0.18441881294898277, "grad_norm": 6.928024621313798, "learning_rate": 9.387751972182171e-06, "loss": 17.7727, "step": 10089 }, { "epoch": 0.18443709214542928, "grad_norm": 7.5708364315683, "learning_rate": 9.387610031573474e-06, "loss": 17.839, "step": 10090 }, { "epoch": 0.1844553713418758, "grad_norm": 7.908171570383844, "learning_rate": 9.387468075586583e-06, "loss": 18.1779, "step": 10091 }, { "epoch": 0.18447365053832235, "grad_norm": 5.5242661833756435, "learning_rate": 9.387326104221999e-06, "loss": 17.1945, "step": 10092 }, { "epoch": 0.18449192973476886, "grad_norm": 5.186178471195878, "learning_rate": 9.387184117480217e-06, "loss": 16.9887, "step": 10093 }, { "epoch": 0.1845102089312154, "grad_norm": 6.9561430161599755, "learning_rate": 9.387042115361735e-06, "loss": 17.9478, "step": 10094 }, { "epoch": 0.1845284881276619, "grad_norm": 7.4253635627651775, "learning_rate": 9.38690009786705e-06, "loss": 18.0151, "step": 10095 }, { "epoch": 0.18454676732410843, "grad_norm": 6.854517931928672, "learning_rate": 9.386758064996663e-06, "loss": 17.6601, "step": 10096 }, { "epoch": 0.18456504652055494, "grad_norm": 7.891688813940142, "learning_rate": 9.386616016751069e-06, "loss": 18.3511, "step": 10097 }, { "epoch": 0.18458332571700148, "grad_norm": 6.971177894420033, "learning_rate": 9.386473953130766e-06, "loss": 17.6837, "step": 10098 }, { "epoch": 0.18460160491344801, "grad_norm": 7.559625105872039, "learning_rate": 9.386331874136252e-06, "loss": 17.6225, "step": 10099 }, { "epoch": 0.18461988410989452, "grad_norm": 9.163406483025124, "learning_rate": 9.386189779768026e-06, "loss": 18.6586, "step": 10100 }, { "epoch": 0.18463816330634106, "grad_norm": 6.754811481736553, "learning_rate": 9.386047670026585e-06, "loss": 17.5206, "step": 10101 }, { "epoch": 0.18465644250278757, "grad_norm": 6.034743620294371, "learning_rate": 9.385905544912427e-06, "loss": 17.3234, "step": 10102 }, { "epoch": 0.1846747216992341, "grad_norm": 6.749802836729946, "learning_rate": 9.385763404426053e-06, "loss": 17.4607, "step": 10103 }, { "epoch": 0.18469300089568064, "grad_norm": 8.09839861060369, "learning_rate": 9.385621248567957e-06, "loss": 18.2918, "step": 10104 }, { "epoch": 0.18471128009212714, "grad_norm": 6.523424208415734, "learning_rate": 9.38547907733864e-06, "loss": 17.3888, "step": 10105 }, { "epoch": 0.18472955928857368, "grad_norm": 7.515683686348755, "learning_rate": 9.385336890738599e-06, "loss": 18.2368, "step": 10106 }, { "epoch": 0.1847478384850202, "grad_norm": 6.969248317430007, "learning_rate": 9.385194688768334e-06, "loss": 17.6281, "step": 10107 }, { "epoch": 0.18476611768146672, "grad_norm": 6.434898297464353, "learning_rate": 9.38505247142834e-06, "loss": 17.5382, "step": 10108 }, { "epoch": 0.18478439687791326, "grad_norm": 5.989770127717803, "learning_rate": 9.384910238719119e-06, "loss": 17.2119, "step": 10109 }, { "epoch": 0.18480267607435977, "grad_norm": 5.946040994438568, "learning_rate": 9.384767990641166e-06, "loss": 17.28, "step": 10110 }, { "epoch": 0.1848209552708063, "grad_norm": 7.4997105516833775, "learning_rate": 9.384625727194983e-06, "loss": 18.1866, "step": 10111 }, { "epoch": 0.1848392344672528, "grad_norm": 10.378955170882492, "learning_rate": 9.384483448381068e-06, "loss": 17.5535, "step": 10112 }, { "epoch": 0.18485751366369935, "grad_norm": 8.133534058588747, "learning_rate": 9.384341154199918e-06, "loss": 17.8558, "step": 10113 }, { "epoch": 0.18487579286014585, "grad_norm": 5.925984036602524, "learning_rate": 9.384198844652034e-06, "loss": 17.2775, "step": 10114 }, { "epoch": 0.1848940720565924, "grad_norm": 8.087471806115738, "learning_rate": 9.384056519737912e-06, "loss": 17.8861, "step": 10115 }, { "epoch": 0.18491235125303893, "grad_norm": 7.360788970198534, "learning_rate": 9.383914179458053e-06, "loss": 17.9566, "step": 10116 }, { "epoch": 0.18493063044948543, "grad_norm": 7.029999292910309, "learning_rate": 9.383771823812957e-06, "loss": 17.6375, "step": 10117 }, { "epoch": 0.18494890964593197, "grad_norm": 5.860795062387218, "learning_rate": 9.383629452803118e-06, "loss": 17.3074, "step": 10118 }, { "epoch": 0.18496718884237848, "grad_norm": 6.72733824557979, "learning_rate": 9.38348706642904e-06, "loss": 17.6756, "step": 10119 }, { "epoch": 0.184985468038825, "grad_norm": 6.348613506593034, "learning_rate": 9.38334466469122e-06, "loss": 17.251, "step": 10120 }, { "epoch": 0.18500374723527155, "grad_norm": 6.4407131573845025, "learning_rate": 9.383202247590157e-06, "loss": 17.4239, "step": 10121 }, { "epoch": 0.18502202643171806, "grad_norm": 7.66417138062931, "learning_rate": 9.38305981512635e-06, "loss": 18.2959, "step": 10122 }, { "epoch": 0.1850403056281646, "grad_norm": 8.096402083085778, "learning_rate": 9.3829173673003e-06, "loss": 18.2023, "step": 10123 }, { "epoch": 0.1850585848246111, "grad_norm": 7.441095682577782, "learning_rate": 9.382774904112505e-06, "loss": 17.9975, "step": 10124 }, { "epoch": 0.18507686402105764, "grad_norm": 5.874787975602297, "learning_rate": 9.382632425563462e-06, "loss": 17.1499, "step": 10125 }, { "epoch": 0.18509514321750417, "grad_norm": 8.396712904272395, "learning_rate": 9.382489931653675e-06, "loss": 18.1694, "step": 10126 }, { "epoch": 0.18511342241395068, "grad_norm": 6.880236844657977, "learning_rate": 9.38234742238364e-06, "loss": 17.9352, "step": 10127 }, { "epoch": 0.18513170161039721, "grad_norm": 6.891231969914025, "learning_rate": 9.38220489775386e-06, "loss": 17.673, "step": 10128 }, { "epoch": 0.18514998080684372, "grad_norm": 7.265333835185887, "learning_rate": 9.382062357764828e-06, "loss": 17.8743, "step": 10129 }, { "epoch": 0.18516826000329026, "grad_norm": 6.99979969720114, "learning_rate": 9.38191980241705e-06, "loss": 17.6382, "step": 10130 }, { "epoch": 0.18518653919973677, "grad_norm": 8.187962009688826, "learning_rate": 9.381777231711024e-06, "loss": 18.2169, "step": 10131 }, { "epoch": 0.1852048183961833, "grad_norm": 8.05353103911642, "learning_rate": 9.381634645647247e-06, "loss": 18.227, "step": 10132 }, { "epoch": 0.18522309759262984, "grad_norm": 7.505607921149841, "learning_rate": 9.38149204422622e-06, "loss": 17.8716, "step": 10133 }, { "epoch": 0.18524137678907635, "grad_norm": 6.635816173964813, "learning_rate": 9.381349427448448e-06, "loss": 17.619, "step": 10134 }, { "epoch": 0.18525965598552288, "grad_norm": 7.975934284092967, "learning_rate": 9.381206795314424e-06, "loss": 18.2861, "step": 10135 }, { "epoch": 0.1852779351819694, "grad_norm": 7.2156964395727945, "learning_rate": 9.38106414782465e-06, "loss": 17.5814, "step": 10136 }, { "epoch": 0.18529621437841592, "grad_norm": 6.413651784388292, "learning_rate": 9.380921484979626e-06, "loss": 17.7215, "step": 10137 }, { "epoch": 0.18531449357486246, "grad_norm": 6.167896859875163, "learning_rate": 9.380778806779853e-06, "loss": 17.3194, "step": 10138 }, { "epoch": 0.18533277277130897, "grad_norm": 10.13812019241804, "learning_rate": 9.38063611322583e-06, "loss": 18.9814, "step": 10139 }, { "epoch": 0.1853510519677555, "grad_norm": 6.022633303778132, "learning_rate": 9.380493404318059e-06, "loss": 17.2726, "step": 10140 }, { "epoch": 0.185369331164202, "grad_norm": 8.295729304229326, "learning_rate": 9.380350680057038e-06, "loss": 18.1683, "step": 10141 }, { "epoch": 0.18538761036064855, "grad_norm": 7.2665935336368, "learning_rate": 9.380207940443266e-06, "loss": 18.1368, "step": 10142 }, { "epoch": 0.18540588955709508, "grad_norm": 6.446637878102153, "learning_rate": 9.380065185477247e-06, "loss": 17.5363, "step": 10143 }, { "epoch": 0.1854241687535416, "grad_norm": 6.383097911432984, "learning_rate": 9.379922415159479e-06, "loss": 17.4873, "step": 10144 }, { "epoch": 0.18544244794998813, "grad_norm": 6.344580774345908, "learning_rate": 9.379779629490463e-06, "loss": 17.3647, "step": 10145 }, { "epoch": 0.18546072714643463, "grad_norm": 7.348473901289618, "learning_rate": 9.379636828470702e-06, "loss": 17.9987, "step": 10146 }, { "epoch": 0.18547900634288117, "grad_norm": 6.160850989268245, "learning_rate": 9.379494012100691e-06, "loss": 17.3402, "step": 10147 }, { "epoch": 0.18549728553932768, "grad_norm": 5.993419326775649, "learning_rate": 9.379351180380934e-06, "loss": 17.3536, "step": 10148 }, { "epoch": 0.1855155647357742, "grad_norm": 8.509094960522217, "learning_rate": 9.379208333311932e-06, "loss": 17.8672, "step": 10149 }, { "epoch": 0.18553384393222075, "grad_norm": 7.807610592514754, "learning_rate": 9.379065470894185e-06, "loss": 18.1017, "step": 10150 }, { "epoch": 0.18555212312866726, "grad_norm": 7.123159893464457, "learning_rate": 9.378922593128192e-06, "loss": 17.5521, "step": 10151 }, { "epoch": 0.1855704023251138, "grad_norm": 7.48066657787623, "learning_rate": 9.378779700014457e-06, "loss": 18.2265, "step": 10152 }, { "epoch": 0.1855886815215603, "grad_norm": 6.13410547093554, "learning_rate": 9.378636791553479e-06, "loss": 17.1917, "step": 10153 }, { "epoch": 0.18560696071800684, "grad_norm": 7.5276101799697654, "learning_rate": 9.378493867745757e-06, "loss": 17.8194, "step": 10154 }, { "epoch": 0.18562523991445337, "grad_norm": 7.534516614694993, "learning_rate": 9.378350928591795e-06, "loss": 18.3422, "step": 10155 }, { "epoch": 0.18564351911089988, "grad_norm": 7.091677494357677, "learning_rate": 9.378207974092094e-06, "loss": 17.7917, "step": 10156 }, { "epoch": 0.18566179830734642, "grad_norm": 8.561682749273809, "learning_rate": 9.378065004247154e-06, "loss": 18.1131, "step": 10157 }, { "epoch": 0.18568007750379292, "grad_norm": 5.976930234644536, "learning_rate": 9.377922019057475e-06, "loss": 17.1155, "step": 10158 }, { "epoch": 0.18569835670023946, "grad_norm": 7.4921041502776005, "learning_rate": 9.377779018523558e-06, "loss": 18.0279, "step": 10159 }, { "epoch": 0.185716635896686, "grad_norm": 7.823224316133014, "learning_rate": 9.377636002645907e-06, "loss": 17.7456, "step": 10160 }, { "epoch": 0.1857349150931325, "grad_norm": 6.3364572003429585, "learning_rate": 9.377492971425022e-06, "loss": 17.4062, "step": 10161 }, { "epoch": 0.18575319428957904, "grad_norm": 5.9663246026209995, "learning_rate": 9.377349924861404e-06, "loss": 17.2638, "step": 10162 }, { "epoch": 0.18577147348602555, "grad_norm": 6.679882467011436, "learning_rate": 9.377206862955554e-06, "loss": 17.5757, "step": 10163 }, { "epoch": 0.18578975268247208, "grad_norm": 6.940412229607728, "learning_rate": 9.377063785707974e-06, "loss": 17.6697, "step": 10164 }, { "epoch": 0.1858080318789186, "grad_norm": 6.6646624172232105, "learning_rate": 9.376920693119164e-06, "loss": 17.6532, "step": 10165 }, { "epoch": 0.18582631107536512, "grad_norm": 7.239341796820249, "learning_rate": 9.376777585189629e-06, "loss": 17.9977, "step": 10166 }, { "epoch": 0.18584459027181166, "grad_norm": 7.771468138586223, "learning_rate": 9.376634461919867e-06, "loss": 18.2488, "step": 10167 }, { "epoch": 0.18586286946825817, "grad_norm": 8.85702272829546, "learning_rate": 9.37649132331038e-06, "loss": 17.7924, "step": 10168 }, { "epoch": 0.1858811486647047, "grad_norm": 7.300670685126151, "learning_rate": 9.376348169361673e-06, "loss": 17.9292, "step": 10169 }, { "epoch": 0.1858994278611512, "grad_norm": 8.538267262281579, "learning_rate": 9.376205000074243e-06, "loss": 17.9711, "step": 10170 }, { "epoch": 0.18591770705759775, "grad_norm": 6.43279408233411, "learning_rate": 9.376061815448596e-06, "loss": 17.5059, "step": 10171 }, { "epoch": 0.18593598625404428, "grad_norm": 6.055184820090015, "learning_rate": 9.375918615485231e-06, "loss": 17.1337, "step": 10172 }, { "epoch": 0.1859542654504908, "grad_norm": 7.993831665485996, "learning_rate": 9.375775400184652e-06, "loss": 17.4964, "step": 10173 }, { "epoch": 0.18597254464693733, "grad_norm": 6.944128214910914, "learning_rate": 9.37563216954736e-06, "loss": 17.7661, "step": 10174 }, { "epoch": 0.18599082384338383, "grad_norm": 7.7628760757659725, "learning_rate": 9.375488923573857e-06, "loss": 18.064, "step": 10175 }, { "epoch": 0.18600910303983037, "grad_norm": 6.433721378115287, "learning_rate": 9.375345662264644e-06, "loss": 17.7262, "step": 10176 }, { "epoch": 0.1860273822362769, "grad_norm": 6.346395656175717, "learning_rate": 9.375202385620223e-06, "loss": 17.3542, "step": 10177 }, { "epoch": 0.1860456614327234, "grad_norm": 7.679772556378218, "learning_rate": 9.375059093641099e-06, "loss": 18.1816, "step": 10178 }, { "epoch": 0.18606394062916995, "grad_norm": 6.895906852677195, "learning_rate": 9.374915786327773e-06, "loss": 17.5511, "step": 10179 }, { "epoch": 0.18608221982561646, "grad_norm": 6.281701990782323, "learning_rate": 9.374772463680745e-06, "loss": 17.4323, "step": 10180 }, { "epoch": 0.186100499022063, "grad_norm": 7.485888610640797, "learning_rate": 9.374629125700522e-06, "loss": 17.5209, "step": 10181 }, { "epoch": 0.1861187782185095, "grad_norm": 5.6333109911431904, "learning_rate": 9.374485772387602e-06, "loss": 17.2553, "step": 10182 }, { "epoch": 0.18613705741495604, "grad_norm": 7.691203740517854, "learning_rate": 9.374342403742489e-06, "loss": 17.6799, "step": 10183 }, { "epoch": 0.18615533661140257, "grad_norm": 6.906312158650607, "learning_rate": 9.374199019765685e-06, "loss": 17.5729, "step": 10184 }, { "epoch": 0.18617361580784908, "grad_norm": 6.663267416505823, "learning_rate": 9.374055620457693e-06, "loss": 17.6142, "step": 10185 }, { "epoch": 0.18619189500429562, "grad_norm": 6.339292529984208, "learning_rate": 9.373912205819016e-06, "loss": 17.4268, "step": 10186 }, { "epoch": 0.18621017420074212, "grad_norm": 7.67804688835375, "learning_rate": 9.373768775850156e-06, "loss": 17.8058, "step": 10187 }, { "epoch": 0.18622845339718866, "grad_norm": 8.012542663727075, "learning_rate": 9.373625330551617e-06, "loss": 17.9881, "step": 10188 }, { "epoch": 0.1862467325936352, "grad_norm": 7.4263693912563635, "learning_rate": 9.3734818699239e-06, "loss": 18.0399, "step": 10189 }, { "epoch": 0.1862650117900817, "grad_norm": 9.111480253737692, "learning_rate": 9.373338393967508e-06, "loss": 18.4965, "step": 10190 }, { "epoch": 0.18628329098652824, "grad_norm": 6.5584971932398375, "learning_rate": 9.373194902682945e-06, "loss": 17.6057, "step": 10191 }, { "epoch": 0.18630157018297475, "grad_norm": 7.604951707353146, "learning_rate": 9.373051396070713e-06, "loss": 18.274, "step": 10192 }, { "epoch": 0.18631984937942128, "grad_norm": 7.375005504809956, "learning_rate": 9.372907874131316e-06, "loss": 18.1264, "step": 10193 }, { "epoch": 0.18633812857586782, "grad_norm": 7.43140604220699, "learning_rate": 9.372764336865255e-06, "loss": 17.9132, "step": 10194 }, { "epoch": 0.18635640777231433, "grad_norm": 8.193563811574354, "learning_rate": 9.372620784273036e-06, "loss": 18.137, "step": 10195 }, { "epoch": 0.18637468696876086, "grad_norm": 8.009154005190014, "learning_rate": 9.372477216355158e-06, "loss": 17.9132, "step": 10196 }, { "epoch": 0.18639296616520737, "grad_norm": 7.128934812209609, "learning_rate": 9.372333633112129e-06, "loss": 18.0419, "step": 10197 }, { "epoch": 0.1864112453616539, "grad_norm": 7.317620610851088, "learning_rate": 9.37219003454445e-06, "loss": 17.5791, "step": 10198 }, { "epoch": 0.1864295245581004, "grad_norm": 6.525025639592337, "learning_rate": 9.372046420652625e-06, "loss": 17.7318, "step": 10199 }, { "epoch": 0.18644780375454695, "grad_norm": 7.735597506460469, "learning_rate": 9.371902791437155e-06, "loss": 17.8672, "step": 10200 }, { "epoch": 0.18646608295099348, "grad_norm": 8.226678540737261, "learning_rate": 9.371759146898547e-06, "loss": 17.9677, "step": 10201 }, { "epoch": 0.18648436214744, "grad_norm": 5.501891707057994, "learning_rate": 9.371615487037302e-06, "loss": 17.2741, "step": 10202 }, { "epoch": 0.18650264134388653, "grad_norm": 6.572065461356103, "learning_rate": 9.371471811853923e-06, "loss": 17.4299, "step": 10203 }, { "epoch": 0.18652092054033304, "grad_norm": 8.720931550768686, "learning_rate": 9.371328121348914e-06, "loss": 18.4159, "step": 10204 }, { "epoch": 0.18653919973677957, "grad_norm": 6.38453974087123, "learning_rate": 9.37118441552278e-06, "loss": 17.5934, "step": 10205 }, { "epoch": 0.1865574789332261, "grad_norm": 7.3772736465978515, "learning_rate": 9.371040694376026e-06, "loss": 17.9282, "step": 10206 }, { "epoch": 0.18657575812967261, "grad_norm": 8.31169856553042, "learning_rate": 9.370896957909151e-06, "loss": 18.1827, "step": 10207 }, { "epoch": 0.18659403732611915, "grad_norm": 6.2329002713871695, "learning_rate": 9.370753206122662e-06, "loss": 17.4252, "step": 10208 }, { "epoch": 0.18661231652256566, "grad_norm": 7.739947269091635, "learning_rate": 9.370609439017064e-06, "loss": 18.0942, "step": 10209 }, { "epoch": 0.1866305957190122, "grad_norm": 6.710341963329881, "learning_rate": 9.370465656592858e-06, "loss": 17.6096, "step": 10210 }, { "epoch": 0.18664887491545873, "grad_norm": 7.315130101646613, "learning_rate": 9.37032185885055e-06, "loss": 17.9044, "step": 10211 }, { "epoch": 0.18666715411190524, "grad_norm": 6.345220118476805, "learning_rate": 9.37017804579064e-06, "loss": 17.5248, "step": 10212 }, { "epoch": 0.18668543330835177, "grad_norm": 6.268704186528599, "learning_rate": 9.370034217413638e-06, "loss": 17.661, "step": 10213 }, { "epoch": 0.18670371250479828, "grad_norm": 5.01108789980958, "learning_rate": 9.369890373720044e-06, "loss": 16.9445, "step": 10214 }, { "epoch": 0.18672199170124482, "grad_norm": 5.2239857534211245, "learning_rate": 9.369746514710365e-06, "loss": 16.9543, "step": 10215 }, { "epoch": 0.18674027089769132, "grad_norm": 6.487987836666354, "learning_rate": 9.369602640385102e-06, "loss": 17.7461, "step": 10216 }, { "epoch": 0.18675855009413786, "grad_norm": 7.865169897417005, "learning_rate": 9.369458750744762e-06, "loss": 18.0864, "step": 10217 }, { "epoch": 0.1867768292905844, "grad_norm": 6.197612176392014, "learning_rate": 9.369314845789847e-06, "loss": 17.458, "step": 10218 }, { "epoch": 0.1867951084870309, "grad_norm": 7.561435627743233, "learning_rate": 9.369170925520865e-06, "loss": 18.1334, "step": 10219 }, { "epoch": 0.18681338768347744, "grad_norm": 7.44381941931186, "learning_rate": 9.369026989938318e-06, "loss": 17.7356, "step": 10220 }, { "epoch": 0.18683166687992395, "grad_norm": 7.017214885184996, "learning_rate": 9.368883039042706e-06, "loss": 17.5028, "step": 10221 }, { "epoch": 0.18684994607637048, "grad_norm": 7.229664469044334, "learning_rate": 9.368739072834543e-06, "loss": 17.5561, "step": 10222 }, { "epoch": 0.18686822527281702, "grad_norm": 7.189737803230755, "learning_rate": 9.368595091314326e-06, "loss": 18.2759, "step": 10223 }, { "epoch": 0.18688650446926353, "grad_norm": 8.319257418022968, "learning_rate": 9.368451094482564e-06, "loss": 18.3799, "step": 10224 }, { "epoch": 0.18690478366571006, "grad_norm": 6.4889484367898005, "learning_rate": 9.368307082339758e-06, "loss": 17.5709, "step": 10225 }, { "epoch": 0.18692306286215657, "grad_norm": 12.229828869233472, "learning_rate": 9.368163054886417e-06, "loss": 17.6928, "step": 10226 }, { "epoch": 0.1869413420586031, "grad_norm": 6.322971098801259, "learning_rate": 9.368019012123042e-06, "loss": 17.4152, "step": 10227 }, { "epoch": 0.18695962125504964, "grad_norm": 6.77865355835652, "learning_rate": 9.36787495405014e-06, "loss": 17.6817, "step": 10228 }, { "epoch": 0.18697790045149615, "grad_norm": 6.426308198557916, "learning_rate": 9.367730880668214e-06, "loss": 17.4643, "step": 10229 }, { "epoch": 0.18699617964794268, "grad_norm": 5.632297905265568, "learning_rate": 9.367586791977772e-06, "loss": 17.073, "step": 10230 }, { "epoch": 0.1870144588443892, "grad_norm": 6.974509567515869, "learning_rate": 9.367442687979317e-06, "loss": 17.7247, "step": 10231 }, { "epoch": 0.18703273804083573, "grad_norm": 6.83206984158418, "learning_rate": 9.367298568673354e-06, "loss": 17.5729, "step": 10232 }, { "epoch": 0.18705101723728224, "grad_norm": 9.757596180231703, "learning_rate": 9.367154434060389e-06, "loss": 18.1063, "step": 10233 }, { "epoch": 0.18706929643372877, "grad_norm": 6.313951935010378, "learning_rate": 9.367010284140925e-06, "loss": 17.4356, "step": 10234 }, { "epoch": 0.1870875756301753, "grad_norm": 6.317780261667729, "learning_rate": 9.366866118915469e-06, "loss": 17.4707, "step": 10235 }, { "epoch": 0.18710585482662181, "grad_norm": 7.5308562210962195, "learning_rate": 9.36672193838453e-06, "loss": 18.0669, "step": 10236 }, { "epoch": 0.18712413402306835, "grad_norm": 6.156414652177409, "learning_rate": 9.366577742548606e-06, "loss": 17.4318, "step": 10237 }, { "epoch": 0.18714241321951486, "grad_norm": 7.545443507483131, "learning_rate": 9.366433531408206e-06, "loss": 17.7277, "step": 10238 }, { "epoch": 0.1871606924159614, "grad_norm": 7.1117771320861145, "learning_rate": 9.366289304963835e-06, "loss": 17.7777, "step": 10239 }, { "epoch": 0.18717897161240793, "grad_norm": 7.38012130805539, "learning_rate": 9.366145063216002e-06, "loss": 17.7226, "step": 10240 }, { "epoch": 0.18719725080885444, "grad_norm": 6.287584397312286, "learning_rate": 9.366000806165208e-06, "loss": 17.5169, "step": 10241 }, { "epoch": 0.18721553000530097, "grad_norm": 8.272480728809974, "learning_rate": 9.365856533811958e-06, "loss": 18.2174, "step": 10242 }, { "epoch": 0.18723380920174748, "grad_norm": 6.334337383323769, "learning_rate": 9.365712246156762e-06, "loss": 17.4599, "step": 10243 }, { "epoch": 0.18725208839819402, "grad_norm": 6.252816761629262, "learning_rate": 9.365567943200122e-06, "loss": 17.4577, "step": 10244 }, { "epoch": 0.18727036759464055, "grad_norm": 7.720308446724791, "learning_rate": 9.365423624942546e-06, "loss": 17.9001, "step": 10245 }, { "epoch": 0.18728864679108706, "grad_norm": 6.447776379913213, "learning_rate": 9.365279291384539e-06, "loss": 17.6432, "step": 10246 }, { "epoch": 0.1873069259875336, "grad_norm": 6.534097759809123, "learning_rate": 9.365134942526606e-06, "loss": 17.6352, "step": 10247 }, { "epoch": 0.1873252051839801, "grad_norm": 5.86259723785051, "learning_rate": 9.364990578369255e-06, "loss": 17.3302, "step": 10248 }, { "epoch": 0.18734348438042664, "grad_norm": 7.858264480931946, "learning_rate": 9.36484619891299e-06, "loss": 18.0367, "step": 10249 }, { "epoch": 0.18736176357687315, "grad_norm": 6.153744360184581, "learning_rate": 9.364701804158318e-06, "loss": 17.3333, "step": 10250 }, { "epoch": 0.18738004277331968, "grad_norm": 6.815205240338957, "learning_rate": 9.364557394105746e-06, "loss": 17.5782, "step": 10251 }, { "epoch": 0.18739832196976622, "grad_norm": 7.874192778617276, "learning_rate": 9.364412968755777e-06, "loss": 18.5423, "step": 10252 }, { "epoch": 0.18741660116621273, "grad_norm": 8.207110183282222, "learning_rate": 9.36426852810892e-06, "loss": 18.8335, "step": 10253 }, { "epoch": 0.18743488036265926, "grad_norm": 6.255530075436144, "learning_rate": 9.36412407216568e-06, "loss": 17.1486, "step": 10254 }, { "epoch": 0.18745315955910577, "grad_norm": 5.798395753238181, "learning_rate": 9.363979600926567e-06, "loss": 17.2161, "step": 10255 }, { "epoch": 0.1874714387555523, "grad_norm": 6.092057908902809, "learning_rate": 9.363835114392082e-06, "loss": 17.2954, "step": 10256 }, { "epoch": 0.18748971795199884, "grad_norm": 5.69434192164635, "learning_rate": 9.363690612562732e-06, "loss": 17.2059, "step": 10257 }, { "epoch": 0.18750799714844535, "grad_norm": 5.785054308131656, "learning_rate": 9.363546095439026e-06, "loss": 17.1083, "step": 10258 }, { "epoch": 0.18752627634489188, "grad_norm": 6.479975436775092, "learning_rate": 9.363401563021472e-06, "loss": 17.6234, "step": 10259 }, { "epoch": 0.1875445555413384, "grad_norm": 7.197517234570277, "learning_rate": 9.363257015310572e-06, "loss": 17.8155, "step": 10260 }, { "epoch": 0.18756283473778493, "grad_norm": 6.108622891651722, "learning_rate": 9.363112452306835e-06, "loss": 17.2461, "step": 10261 }, { "epoch": 0.18758111393423146, "grad_norm": 8.276899118951857, "learning_rate": 9.362967874010768e-06, "loss": 17.9235, "step": 10262 }, { "epoch": 0.18759939313067797, "grad_norm": 6.631154795403799, "learning_rate": 9.362823280422877e-06, "loss": 17.6759, "step": 10263 }, { "epoch": 0.1876176723271245, "grad_norm": 6.665722795146296, "learning_rate": 9.362678671543668e-06, "loss": 17.5608, "step": 10264 }, { "epoch": 0.18763595152357102, "grad_norm": 7.2208338930364215, "learning_rate": 9.36253404737365e-06, "loss": 17.7458, "step": 10265 }, { "epoch": 0.18765423072001755, "grad_norm": 8.015701508939888, "learning_rate": 9.362389407913327e-06, "loss": 18.339, "step": 10266 }, { "epoch": 0.18767250991646406, "grad_norm": 7.048841773166798, "learning_rate": 9.36224475316321e-06, "loss": 17.832, "step": 10267 }, { "epoch": 0.1876907891129106, "grad_norm": 6.466751494526317, "learning_rate": 9.362100083123803e-06, "loss": 17.7112, "step": 10268 }, { "epoch": 0.18770906830935713, "grad_norm": 7.476181853417334, "learning_rate": 9.361955397795613e-06, "loss": 17.9526, "step": 10269 }, { "epoch": 0.18772734750580364, "grad_norm": 7.011969367959185, "learning_rate": 9.36181069717915e-06, "loss": 17.746, "step": 10270 }, { "epoch": 0.18774562670225017, "grad_norm": 6.297938662302559, "learning_rate": 9.361665981274916e-06, "loss": 17.3984, "step": 10271 }, { "epoch": 0.18776390589869668, "grad_norm": 6.835725591588053, "learning_rate": 9.361521250083422e-06, "loss": 17.6402, "step": 10272 }, { "epoch": 0.18778218509514322, "grad_norm": 6.118580864255345, "learning_rate": 9.361376503605174e-06, "loss": 17.5137, "step": 10273 }, { "epoch": 0.18780046429158975, "grad_norm": 6.648036608556335, "learning_rate": 9.361231741840684e-06, "loss": 17.4866, "step": 10274 }, { "epoch": 0.18781874348803626, "grad_norm": 6.4771008015554035, "learning_rate": 9.361086964790452e-06, "loss": 17.3811, "step": 10275 }, { "epoch": 0.1878370226844828, "grad_norm": 6.243823701274997, "learning_rate": 9.360942172454987e-06, "loss": 17.1627, "step": 10276 }, { "epoch": 0.1878553018809293, "grad_norm": 5.83650409867925, "learning_rate": 9.360797364834799e-06, "loss": 17.3612, "step": 10277 }, { "epoch": 0.18787358107737584, "grad_norm": 8.524501459586926, "learning_rate": 9.360652541930396e-06, "loss": 18.3621, "step": 10278 }, { "epoch": 0.18789186027382238, "grad_norm": 6.137853498680438, "learning_rate": 9.360507703742285e-06, "loss": 17.3574, "step": 10279 }, { "epoch": 0.18791013947026888, "grad_norm": 7.107590055221519, "learning_rate": 9.36036285027097e-06, "loss": 17.6897, "step": 10280 }, { "epoch": 0.18792841866671542, "grad_norm": 8.289986445811634, "learning_rate": 9.360217981516963e-06, "loss": 18.3256, "step": 10281 }, { "epoch": 0.18794669786316193, "grad_norm": 8.307333242252078, "learning_rate": 9.360073097480771e-06, "loss": 17.9867, "step": 10282 }, { "epoch": 0.18796497705960846, "grad_norm": 6.639776770178107, "learning_rate": 9.3599281981629e-06, "loss": 17.4129, "step": 10283 }, { "epoch": 0.18798325625605497, "grad_norm": 9.511438673072446, "learning_rate": 9.35978328356386e-06, "loss": 18.4594, "step": 10284 }, { "epoch": 0.1880015354525015, "grad_norm": 7.389362953957318, "learning_rate": 9.359638353684157e-06, "loss": 17.4465, "step": 10285 }, { "epoch": 0.18801981464894804, "grad_norm": 7.843654781813417, "learning_rate": 9.3594934085243e-06, "loss": 18.0209, "step": 10286 }, { "epoch": 0.18803809384539455, "grad_norm": 5.828374392464251, "learning_rate": 9.359348448084798e-06, "loss": 17.119, "step": 10287 }, { "epoch": 0.18805637304184109, "grad_norm": 6.8428373551619055, "learning_rate": 9.359203472366158e-06, "loss": 17.4392, "step": 10288 }, { "epoch": 0.1880746522382876, "grad_norm": 6.622676320783083, "learning_rate": 9.359058481368888e-06, "loss": 17.6567, "step": 10289 }, { "epoch": 0.18809293143473413, "grad_norm": 6.338188570627085, "learning_rate": 9.358913475093496e-06, "loss": 17.4636, "step": 10290 }, { "epoch": 0.18811121063118066, "grad_norm": 6.943641282726379, "learning_rate": 9.35876845354049e-06, "loss": 17.5162, "step": 10291 }, { "epoch": 0.18812948982762717, "grad_norm": 7.508261262549054, "learning_rate": 9.358623416710378e-06, "loss": 18.2523, "step": 10292 }, { "epoch": 0.1881477690240737, "grad_norm": 7.615613676014433, "learning_rate": 9.35847836460367e-06, "loss": 17.8776, "step": 10293 }, { "epoch": 0.18816604822052022, "grad_norm": 7.960862017040444, "learning_rate": 9.358333297220875e-06, "loss": 18.1081, "step": 10294 }, { "epoch": 0.18818432741696675, "grad_norm": 7.710056369955521, "learning_rate": 9.358188214562499e-06, "loss": 17.9136, "step": 10295 }, { "epoch": 0.1882026066134133, "grad_norm": 7.86632430016447, "learning_rate": 9.35804311662905e-06, "loss": 18.3673, "step": 10296 }, { "epoch": 0.1882208858098598, "grad_norm": 7.047120650918058, "learning_rate": 9.35789800342104e-06, "loss": 17.8213, "step": 10297 }, { "epoch": 0.18823916500630633, "grad_norm": 8.508225806407097, "learning_rate": 9.357752874938975e-06, "loss": 18.6244, "step": 10298 }, { "epoch": 0.18825744420275284, "grad_norm": 7.954864284670036, "learning_rate": 9.357607731183362e-06, "loss": 18.0167, "step": 10299 }, { "epoch": 0.18827572339919937, "grad_norm": 7.305427095963071, "learning_rate": 9.357462572154716e-06, "loss": 17.9168, "step": 10300 }, { "epoch": 0.18829400259564588, "grad_norm": 7.117544417631457, "learning_rate": 9.35731739785354e-06, "loss": 17.4382, "step": 10301 }, { "epoch": 0.18831228179209242, "grad_norm": 7.269573153020997, "learning_rate": 9.357172208280344e-06, "loss": 17.7793, "step": 10302 }, { "epoch": 0.18833056098853895, "grad_norm": 5.5781282396621865, "learning_rate": 9.357027003435638e-06, "loss": 17.145, "step": 10303 }, { "epoch": 0.18834884018498546, "grad_norm": 8.244892639621913, "learning_rate": 9.356881783319932e-06, "loss": 17.9596, "step": 10304 }, { "epoch": 0.188367119381432, "grad_norm": 7.491096000135977, "learning_rate": 9.356736547933731e-06, "loss": 17.7012, "step": 10305 }, { "epoch": 0.1883853985778785, "grad_norm": 6.639930293503112, "learning_rate": 9.356591297277548e-06, "loss": 17.5532, "step": 10306 }, { "epoch": 0.18840367777432504, "grad_norm": 7.712491671647756, "learning_rate": 9.35644603135189e-06, "loss": 17.6495, "step": 10307 }, { "epoch": 0.18842195697077158, "grad_norm": 7.09722689162491, "learning_rate": 9.356300750157266e-06, "loss": 17.4289, "step": 10308 }, { "epoch": 0.18844023616721808, "grad_norm": 7.023241890632782, "learning_rate": 9.356155453694186e-06, "loss": 17.7111, "step": 10309 }, { "epoch": 0.18845851536366462, "grad_norm": 6.371635969895364, "learning_rate": 9.356010141963161e-06, "loss": 17.6677, "step": 10310 }, { "epoch": 0.18847679456011113, "grad_norm": 6.038358992274468, "learning_rate": 9.355864814964696e-06, "loss": 17.4605, "step": 10311 }, { "epoch": 0.18849507375655766, "grad_norm": 6.295758541209396, "learning_rate": 9.355719472699306e-06, "loss": 17.3981, "step": 10312 }, { "epoch": 0.1885133529530042, "grad_norm": 7.163153660352481, "learning_rate": 9.355574115167493e-06, "loss": 17.5929, "step": 10313 }, { "epoch": 0.1885316321494507, "grad_norm": 7.708420745067751, "learning_rate": 9.355428742369774e-06, "loss": 17.9523, "step": 10314 }, { "epoch": 0.18854991134589724, "grad_norm": 7.916856030708843, "learning_rate": 9.355283354306655e-06, "loss": 18.0848, "step": 10315 }, { "epoch": 0.18856819054234375, "grad_norm": 6.276171146796963, "learning_rate": 9.355137950978644e-06, "loss": 17.5003, "step": 10316 }, { "epoch": 0.18858646973879029, "grad_norm": 5.716743447353856, "learning_rate": 9.354992532386253e-06, "loss": 17.2313, "step": 10317 }, { "epoch": 0.1886047489352368, "grad_norm": 5.909377827740273, "learning_rate": 9.35484709852999e-06, "loss": 17.4874, "step": 10318 }, { "epoch": 0.18862302813168333, "grad_norm": 7.926833432487396, "learning_rate": 9.354701649410369e-06, "loss": 18.0493, "step": 10319 }, { "epoch": 0.18864130732812986, "grad_norm": 5.893369379778734, "learning_rate": 9.354556185027894e-06, "loss": 17.1941, "step": 10320 }, { "epoch": 0.18865958652457637, "grad_norm": 6.925840564390966, "learning_rate": 9.354410705383079e-06, "loss": 17.9839, "step": 10321 }, { "epoch": 0.1886778657210229, "grad_norm": 6.496801526721922, "learning_rate": 9.354265210476432e-06, "loss": 17.4445, "step": 10322 }, { "epoch": 0.18869614491746942, "grad_norm": 7.164697364599269, "learning_rate": 9.354119700308463e-06, "loss": 17.9656, "step": 10323 }, { "epoch": 0.18871442411391595, "grad_norm": 5.870553458649823, "learning_rate": 9.353974174879684e-06, "loss": 17.3856, "step": 10324 }, { "epoch": 0.1887327033103625, "grad_norm": 6.473687299671137, "learning_rate": 9.3538286341906e-06, "loss": 17.3945, "step": 10325 }, { "epoch": 0.188750982506809, "grad_norm": 6.800167833248619, "learning_rate": 9.353683078241726e-06, "loss": 17.5475, "step": 10326 }, { "epoch": 0.18876926170325553, "grad_norm": 8.797610668770592, "learning_rate": 9.35353750703357e-06, "loss": 18.4934, "step": 10327 }, { "epoch": 0.18878754089970204, "grad_norm": 6.9870955597911895, "learning_rate": 9.353391920566643e-06, "loss": 17.7022, "step": 10328 }, { "epoch": 0.18880582009614857, "grad_norm": 7.91011523509433, "learning_rate": 9.353246318841456e-06, "loss": 17.9763, "step": 10329 }, { "epoch": 0.1888240992925951, "grad_norm": 6.583339412986538, "learning_rate": 9.353100701858517e-06, "loss": 17.4026, "step": 10330 }, { "epoch": 0.18884237848904162, "grad_norm": 7.0968858218294795, "learning_rate": 9.35295506961834e-06, "loss": 17.9342, "step": 10331 }, { "epoch": 0.18886065768548815, "grad_norm": 7.034159509522916, "learning_rate": 9.352809422121432e-06, "loss": 17.4617, "step": 10332 }, { "epoch": 0.18887893688193466, "grad_norm": 7.634212798384977, "learning_rate": 9.352663759368303e-06, "loss": 17.9708, "step": 10333 }, { "epoch": 0.1888972160783812, "grad_norm": 7.91627024684817, "learning_rate": 9.352518081359468e-06, "loss": 18.0931, "step": 10334 }, { "epoch": 0.1889154952748277, "grad_norm": 7.879522839290091, "learning_rate": 9.352372388095435e-06, "loss": 17.8632, "step": 10335 }, { "epoch": 0.18893377447127424, "grad_norm": 5.875811968143654, "learning_rate": 9.352226679576712e-06, "loss": 17.447, "step": 10336 }, { "epoch": 0.18895205366772078, "grad_norm": 6.97015228690603, "learning_rate": 9.352080955803813e-06, "loss": 17.7629, "step": 10337 }, { "epoch": 0.18897033286416728, "grad_norm": 5.965939193317881, "learning_rate": 9.351935216777248e-06, "loss": 17.1888, "step": 10338 }, { "epoch": 0.18898861206061382, "grad_norm": 7.103798476791659, "learning_rate": 9.351789462497529e-06, "loss": 17.7767, "step": 10339 }, { "epoch": 0.18900689125706033, "grad_norm": 6.769213501368472, "learning_rate": 9.351643692965164e-06, "loss": 17.9347, "step": 10340 }, { "epoch": 0.18902517045350686, "grad_norm": 7.488585050253277, "learning_rate": 9.351497908180664e-06, "loss": 17.8217, "step": 10341 }, { "epoch": 0.1890434496499534, "grad_norm": 7.6060020362558545, "learning_rate": 9.351352108144544e-06, "loss": 18.2705, "step": 10342 }, { "epoch": 0.1890617288463999, "grad_norm": 6.013510437455529, "learning_rate": 9.351206292857312e-06, "loss": 17.2332, "step": 10343 }, { "epoch": 0.18908000804284644, "grad_norm": 7.556401584698262, "learning_rate": 9.35106046231948e-06, "loss": 18.0483, "step": 10344 }, { "epoch": 0.18909828723929295, "grad_norm": 8.603068189092529, "learning_rate": 9.350914616531557e-06, "loss": 17.7321, "step": 10345 }, { "epoch": 0.18911656643573949, "grad_norm": 13.481326330149399, "learning_rate": 9.350768755494057e-06, "loss": 17.8328, "step": 10346 }, { "epoch": 0.18913484563218602, "grad_norm": 7.0546202021714945, "learning_rate": 9.35062287920749e-06, "loss": 17.8827, "step": 10347 }, { "epoch": 0.18915312482863253, "grad_norm": 7.258429206863186, "learning_rate": 9.350476987672367e-06, "loss": 17.8897, "step": 10348 }, { "epoch": 0.18917140402507907, "grad_norm": 6.951416055469755, "learning_rate": 9.350331080889201e-06, "loss": 18.0216, "step": 10349 }, { "epoch": 0.18918968322152557, "grad_norm": 7.256919398246387, "learning_rate": 9.3501851588585e-06, "loss": 17.7268, "step": 10350 }, { "epoch": 0.1892079624179721, "grad_norm": 6.828697846997594, "learning_rate": 9.350039221580778e-06, "loss": 17.7134, "step": 10351 }, { "epoch": 0.18922624161441862, "grad_norm": 5.90178890801479, "learning_rate": 9.349893269056547e-06, "loss": 17.1515, "step": 10352 }, { "epoch": 0.18924452081086515, "grad_norm": 6.496939715605593, "learning_rate": 9.349747301286317e-06, "loss": 17.6936, "step": 10353 }, { "epoch": 0.1892628000073117, "grad_norm": 9.108829587225198, "learning_rate": 9.349601318270601e-06, "loss": 18.645, "step": 10354 }, { "epoch": 0.1892810792037582, "grad_norm": 5.86049343307796, "learning_rate": 9.349455320009907e-06, "loss": 17.3097, "step": 10355 }, { "epoch": 0.18929935840020473, "grad_norm": 6.701323732100626, "learning_rate": 9.349309306504752e-06, "loss": 17.5384, "step": 10356 }, { "epoch": 0.18931763759665124, "grad_norm": 6.497133703269843, "learning_rate": 9.349163277755646e-06, "loss": 17.5846, "step": 10357 }, { "epoch": 0.18933591679309777, "grad_norm": 6.982833709429088, "learning_rate": 9.349017233763099e-06, "loss": 17.5816, "step": 10358 }, { "epoch": 0.1893541959895443, "grad_norm": 6.928286726453096, "learning_rate": 9.348871174527622e-06, "loss": 17.7773, "step": 10359 }, { "epoch": 0.18937247518599082, "grad_norm": 5.814438366221879, "learning_rate": 9.348725100049732e-06, "loss": 17.2186, "step": 10360 }, { "epoch": 0.18939075438243735, "grad_norm": 6.791611147286944, "learning_rate": 9.348579010329938e-06, "loss": 17.5189, "step": 10361 }, { "epoch": 0.18940903357888386, "grad_norm": 7.006833346279452, "learning_rate": 9.34843290536875e-06, "loss": 17.6483, "step": 10362 }, { "epoch": 0.1894273127753304, "grad_norm": 6.7005998859096225, "learning_rate": 9.348286785166682e-06, "loss": 17.5486, "step": 10363 }, { "epoch": 0.18944559197177693, "grad_norm": 7.311681418110757, "learning_rate": 9.348140649724246e-06, "loss": 17.7656, "step": 10364 }, { "epoch": 0.18946387116822344, "grad_norm": 5.209233208298461, "learning_rate": 9.347994499041958e-06, "loss": 16.818, "step": 10365 }, { "epoch": 0.18948215036466998, "grad_norm": 8.244729645321057, "learning_rate": 9.347848333120321e-06, "loss": 18.3306, "step": 10366 }, { "epoch": 0.18950042956111648, "grad_norm": 5.912515092024369, "learning_rate": 9.347702151959856e-06, "loss": 17.4306, "step": 10367 }, { "epoch": 0.18951870875756302, "grad_norm": 7.409086396633225, "learning_rate": 9.347555955561072e-06, "loss": 17.9846, "step": 10368 }, { "epoch": 0.18953698795400953, "grad_norm": 7.55162462688707, "learning_rate": 9.347409743924483e-06, "loss": 17.9045, "step": 10369 }, { "epoch": 0.18955526715045606, "grad_norm": 6.335150015054112, "learning_rate": 9.347263517050598e-06, "loss": 17.3046, "step": 10370 }, { "epoch": 0.1895735463469026, "grad_norm": 4.602767659201406, "learning_rate": 9.347117274939933e-06, "loss": 16.7154, "step": 10371 }, { "epoch": 0.1895918255433491, "grad_norm": 6.774494471367734, "learning_rate": 9.346971017592996e-06, "loss": 17.6588, "step": 10372 }, { "epoch": 0.18961010473979564, "grad_norm": 7.728532927150962, "learning_rate": 9.346824745010306e-06, "loss": 18.0744, "step": 10373 }, { "epoch": 0.18962838393624215, "grad_norm": 5.58968017477524, "learning_rate": 9.346678457192372e-06, "loss": 17.0602, "step": 10374 }, { "epoch": 0.1896466631326887, "grad_norm": 7.543744868813387, "learning_rate": 9.346532154139707e-06, "loss": 18.1805, "step": 10375 }, { "epoch": 0.18966494232913522, "grad_norm": 7.632468889333532, "learning_rate": 9.346385835852824e-06, "loss": 17.1272, "step": 10376 }, { "epoch": 0.18968322152558173, "grad_norm": 7.269525125102864, "learning_rate": 9.346239502332234e-06, "loss": 17.8056, "step": 10377 }, { "epoch": 0.18970150072202827, "grad_norm": 7.960912859082299, "learning_rate": 9.346093153578455e-06, "loss": 18.0061, "step": 10378 }, { "epoch": 0.18971977991847477, "grad_norm": 7.9004152781134795, "learning_rate": 9.345946789591995e-06, "loss": 18.1292, "step": 10379 }, { "epoch": 0.1897380591149213, "grad_norm": 8.526333028061192, "learning_rate": 9.345800410373366e-06, "loss": 18.1318, "step": 10380 }, { "epoch": 0.18975633831136784, "grad_norm": 6.8944897455067125, "learning_rate": 9.345654015923088e-06, "loss": 17.6491, "step": 10381 }, { "epoch": 0.18977461750781435, "grad_norm": 7.406943808270386, "learning_rate": 9.345507606241668e-06, "loss": 17.9084, "step": 10382 }, { "epoch": 0.1897928967042609, "grad_norm": 6.049216269328242, "learning_rate": 9.34536118132962e-06, "loss": 17.2285, "step": 10383 }, { "epoch": 0.1898111759007074, "grad_norm": 7.43964672162737, "learning_rate": 9.345214741187461e-06, "loss": 17.5745, "step": 10384 }, { "epoch": 0.18982945509715393, "grad_norm": 8.26994894520124, "learning_rate": 9.345068285815698e-06, "loss": 18.1613, "step": 10385 }, { "epoch": 0.18984773429360044, "grad_norm": 7.707488960899155, "learning_rate": 9.34492181521485e-06, "loss": 17.9862, "step": 10386 }, { "epoch": 0.18986601349004698, "grad_norm": 16.5411776967301, "learning_rate": 9.344775329385427e-06, "loss": 17.9865, "step": 10387 }, { "epoch": 0.1898842926864935, "grad_norm": 5.718349447364829, "learning_rate": 9.344628828327944e-06, "loss": 17.2111, "step": 10388 }, { "epoch": 0.18990257188294002, "grad_norm": 5.886715887936213, "learning_rate": 9.344482312042914e-06, "loss": 17.2884, "step": 10389 }, { "epoch": 0.18992085107938655, "grad_norm": 6.4689961628351735, "learning_rate": 9.34433578053085e-06, "loss": 17.2763, "step": 10390 }, { "epoch": 0.18993913027583306, "grad_norm": 5.2140418989877615, "learning_rate": 9.344189233792265e-06, "loss": 16.9948, "step": 10391 }, { "epoch": 0.1899574094722796, "grad_norm": 6.900030581357269, "learning_rate": 9.344042671827676e-06, "loss": 17.7844, "step": 10392 }, { "epoch": 0.18997568866872613, "grad_norm": 9.860719450425975, "learning_rate": 9.343896094637593e-06, "loss": 18.0996, "step": 10393 }, { "epoch": 0.18999396786517264, "grad_norm": 6.861050770224629, "learning_rate": 9.343749502222532e-06, "loss": 17.8036, "step": 10394 }, { "epoch": 0.19001224706161918, "grad_norm": 6.806301180607863, "learning_rate": 9.343602894583004e-06, "loss": 17.6822, "step": 10395 }, { "epoch": 0.19003052625806569, "grad_norm": 6.882477940579044, "learning_rate": 9.343456271719527e-06, "loss": 17.7113, "step": 10396 }, { "epoch": 0.19004880545451222, "grad_norm": 6.843754715150214, "learning_rate": 9.34330963363261e-06, "loss": 17.2852, "step": 10397 }, { "epoch": 0.19006708465095876, "grad_norm": 7.827123227667387, "learning_rate": 9.343162980322773e-06, "loss": 18.1252, "step": 10398 }, { "epoch": 0.19008536384740526, "grad_norm": 8.605165913765676, "learning_rate": 9.343016311790525e-06, "loss": 18.6341, "step": 10399 }, { "epoch": 0.1901036430438518, "grad_norm": 6.029185270059626, "learning_rate": 9.342869628036382e-06, "loss": 17.426, "step": 10400 }, { "epoch": 0.1901219222402983, "grad_norm": 6.334705243565285, "learning_rate": 9.342722929060858e-06, "loss": 17.5421, "step": 10401 }, { "epoch": 0.19014020143674484, "grad_norm": 6.863610700531152, "learning_rate": 9.342576214864466e-06, "loss": 17.9516, "step": 10402 }, { "epoch": 0.19015848063319135, "grad_norm": 6.793845212938461, "learning_rate": 9.342429485447721e-06, "loss": 17.5948, "step": 10403 }, { "epoch": 0.1901767598296379, "grad_norm": 5.971066691459425, "learning_rate": 9.342282740811139e-06, "loss": 17.4468, "step": 10404 }, { "epoch": 0.19019503902608442, "grad_norm": 8.698390597005583, "learning_rate": 9.342135980955233e-06, "loss": 18.2342, "step": 10405 }, { "epoch": 0.19021331822253093, "grad_norm": 6.172372569569622, "learning_rate": 9.341989205880516e-06, "loss": 17.3087, "step": 10406 }, { "epoch": 0.19023159741897747, "grad_norm": 5.496960166619962, "learning_rate": 9.341842415587502e-06, "loss": 17.0624, "step": 10407 }, { "epoch": 0.19024987661542397, "grad_norm": 7.701462103673928, "learning_rate": 9.34169561007671e-06, "loss": 18.0715, "step": 10408 }, { "epoch": 0.1902681558118705, "grad_norm": 7.671296168887727, "learning_rate": 9.341548789348652e-06, "loss": 17.813, "step": 10409 }, { "epoch": 0.19028643500831705, "grad_norm": 5.881014951677098, "learning_rate": 9.34140195340384e-06, "loss": 17.299, "step": 10410 }, { "epoch": 0.19030471420476355, "grad_norm": 8.036024617783314, "learning_rate": 9.341255102242792e-06, "loss": 18.0828, "step": 10411 }, { "epoch": 0.1903229934012101, "grad_norm": 6.9130025699726065, "learning_rate": 9.34110823586602e-06, "loss": 17.6697, "step": 10412 }, { "epoch": 0.1903412725976566, "grad_norm": 8.82180752045835, "learning_rate": 9.340961354274043e-06, "loss": 18.1533, "step": 10413 }, { "epoch": 0.19035955179410313, "grad_norm": 5.968379644562009, "learning_rate": 9.34081445746737e-06, "loss": 17.1328, "step": 10414 }, { "epoch": 0.19037783099054967, "grad_norm": 7.278498339936495, "learning_rate": 9.340667545446522e-06, "loss": 18.0188, "step": 10415 }, { "epoch": 0.19039611018699618, "grad_norm": 6.124699532025761, "learning_rate": 9.34052061821201e-06, "loss": 17.4712, "step": 10416 }, { "epoch": 0.1904143893834427, "grad_norm": 6.572271076368137, "learning_rate": 9.34037367576435e-06, "loss": 17.8327, "step": 10417 }, { "epoch": 0.19043266857988922, "grad_norm": 5.6625490691218845, "learning_rate": 9.340226718104057e-06, "loss": 16.9799, "step": 10418 }, { "epoch": 0.19045094777633576, "grad_norm": 8.470132525616112, "learning_rate": 9.340079745231645e-06, "loss": 17.9597, "step": 10419 }, { "epoch": 0.19046922697278226, "grad_norm": 7.046150956276198, "learning_rate": 9.33993275714763e-06, "loss": 17.8574, "step": 10420 }, { "epoch": 0.1904875061692288, "grad_norm": 6.966526694615226, "learning_rate": 9.339785753852529e-06, "loss": 17.6667, "step": 10421 }, { "epoch": 0.19050578536567533, "grad_norm": 5.102008911949953, "learning_rate": 9.339638735346854e-06, "loss": 16.9083, "step": 10422 }, { "epoch": 0.19052406456212184, "grad_norm": 6.537757982753672, "learning_rate": 9.339491701631122e-06, "loss": 17.4962, "step": 10423 }, { "epoch": 0.19054234375856838, "grad_norm": 6.043523754574515, "learning_rate": 9.339344652705848e-06, "loss": 17.3628, "step": 10424 }, { "epoch": 0.19056062295501489, "grad_norm": 8.846123908373793, "learning_rate": 9.339197588571549e-06, "loss": 18.4887, "step": 10425 }, { "epoch": 0.19057890215146142, "grad_norm": 9.695608602744546, "learning_rate": 9.339050509228737e-06, "loss": 18.5418, "step": 10426 }, { "epoch": 0.19059718134790796, "grad_norm": 6.77165990869322, "learning_rate": 9.33890341467793e-06, "loss": 17.6323, "step": 10427 }, { "epoch": 0.19061546054435446, "grad_norm": 6.4706053546411395, "learning_rate": 9.338756304919644e-06, "loss": 17.5203, "step": 10428 }, { "epoch": 0.190633739740801, "grad_norm": 7.2279891914592245, "learning_rate": 9.338609179954393e-06, "loss": 18.0453, "step": 10429 }, { "epoch": 0.1906520189372475, "grad_norm": 5.717809797629613, "learning_rate": 9.338462039782695e-06, "loss": 17.0092, "step": 10430 }, { "epoch": 0.19067029813369404, "grad_norm": 6.453387703051551, "learning_rate": 9.33831488440506e-06, "loss": 17.5693, "step": 10431 }, { "epoch": 0.19068857733014058, "grad_norm": 6.286712840872121, "learning_rate": 9.33816771382201e-06, "loss": 17.3303, "step": 10432 }, { "epoch": 0.1907068565265871, "grad_norm": 6.518167738374248, "learning_rate": 9.33802052803406e-06, "loss": 17.5911, "step": 10433 }, { "epoch": 0.19072513572303362, "grad_norm": 6.043467175502499, "learning_rate": 9.337873327041723e-06, "loss": 17.274, "step": 10434 }, { "epoch": 0.19074341491948013, "grad_norm": 7.354373411650934, "learning_rate": 9.337726110845518e-06, "loss": 17.6185, "step": 10435 }, { "epoch": 0.19076169411592667, "grad_norm": 6.884420085630948, "learning_rate": 9.337578879445957e-06, "loss": 17.7226, "step": 10436 }, { "epoch": 0.19077997331237317, "grad_norm": 7.995802622064922, "learning_rate": 9.33743163284356e-06, "loss": 18.1389, "step": 10437 }, { "epoch": 0.1907982525088197, "grad_norm": 6.190516487906699, "learning_rate": 9.337284371038841e-06, "loss": 17.4509, "step": 10438 }, { "epoch": 0.19081653170526625, "grad_norm": 7.035449624213116, "learning_rate": 9.337137094032316e-06, "loss": 17.7969, "step": 10439 }, { "epoch": 0.19083481090171275, "grad_norm": 5.999930652722676, "learning_rate": 9.336989801824504e-06, "loss": 17.3565, "step": 10440 }, { "epoch": 0.1908530900981593, "grad_norm": 7.1151169267855705, "learning_rate": 9.336842494415916e-06, "loss": 18.1344, "step": 10441 }, { "epoch": 0.1908713692946058, "grad_norm": 6.458074269714437, "learning_rate": 9.336695171807074e-06, "loss": 17.5333, "step": 10442 }, { "epoch": 0.19088964849105233, "grad_norm": 5.981033483435007, "learning_rate": 9.33654783399849e-06, "loss": 17.2124, "step": 10443 }, { "epoch": 0.19090792768749887, "grad_norm": 6.505129219203311, "learning_rate": 9.336400480990684e-06, "loss": 17.423, "step": 10444 }, { "epoch": 0.19092620688394538, "grad_norm": 6.410413952750364, "learning_rate": 9.336253112784169e-06, "loss": 17.5055, "step": 10445 }, { "epoch": 0.1909444860803919, "grad_norm": 5.943410520611284, "learning_rate": 9.336105729379463e-06, "loss": 17.3018, "step": 10446 }, { "epoch": 0.19096276527683842, "grad_norm": 6.662730581833728, "learning_rate": 9.335958330777084e-06, "loss": 17.4771, "step": 10447 }, { "epoch": 0.19098104447328496, "grad_norm": 7.714959509225025, "learning_rate": 9.335810916977547e-06, "loss": 17.6309, "step": 10448 }, { "epoch": 0.1909993236697315, "grad_norm": 7.267940668830873, "learning_rate": 9.335663487981368e-06, "loss": 17.585, "step": 10449 }, { "epoch": 0.191017602866178, "grad_norm": 6.499461630992459, "learning_rate": 9.335516043789065e-06, "loss": 17.4037, "step": 10450 }, { "epoch": 0.19103588206262453, "grad_norm": 7.661848776562771, "learning_rate": 9.335368584401156e-06, "loss": 18.0498, "step": 10451 }, { "epoch": 0.19105416125907104, "grad_norm": 7.489698080064114, "learning_rate": 9.335221109818154e-06, "loss": 17.7291, "step": 10452 }, { "epoch": 0.19107244045551758, "grad_norm": 6.139836763169081, "learning_rate": 9.33507362004058e-06, "loss": 17.4132, "step": 10453 }, { "epoch": 0.19109071965196409, "grad_norm": 7.4440984180972904, "learning_rate": 9.334926115068949e-06, "loss": 17.8699, "step": 10454 }, { "epoch": 0.19110899884841062, "grad_norm": 6.946981728420909, "learning_rate": 9.334778594903777e-06, "loss": 17.8496, "step": 10455 }, { "epoch": 0.19112727804485716, "grad_norm": 7.214419541432831, "learning_rate": 9.334631059545583e-06, "loss": 17.9652, "step": 10456 }, { "epoch": 0.19114555724130367, "grad_norm": 6.70901587454331, "learning_rate": 9.334483508994883e-06, "loss": 17.8415, "step": 10457 }, { "epoch": 0.1911638364377502, "grad_norm": 8.014234392354286, "learning_rate": 9.334335943252196e-06, "loss": 18.1527, "step": 10458 }, { "epoch": 0.1911821156341967, "grad_norm": 6.94485658188503, "learning_rate": 9.334188362318035e-06, "loss": 17.511, "step": 10459 }, { "epoch": 0.19120039483064324, "grad_norm": 7.417707844606827, "learning_rate": 9.33404076619292e-06, "loss": 17.6998, "step": 10460 }, { "epoch": 0.19121867402708978, "grad_norm": 7.657042394726569, "learning_rate": 9.333893154877369e-06, "loss": 18.0186, "step": 10461 }, { "epoch": 0.1912369532235363, "grad_norm": 10.22892844201954, "learning_rate": 9.3337455283719e-06, "loss": 18.4338, "step": 10462 }, { "epoch": 0.19125523241998282, "grad_norm": 7.238723278249536, "learning_rate": 9.333597886677027e-06, "loss": 17.7325, "step": 10463 }, { "epoch": 0.19127351161642933, "grad_norm": 5.993710460992475, "learning_rate": 9.33345022979327e-06, "loss": 17.3588, "step": 10464 }, { "epoch": 0.19129179081287587, "grad_norm": 6.340197372919521, "learning_rate": 9.333302557721146e-06, "loss": 17.7382, "step": 10465 }, { "epoch": 0.1913100700093224, "grad_norm": 6.791912878405465, "learning_rate": 9.333154870461174e-06, "loss": 17.539, "step": 10466 }, { "epoch": 0.1913283492057689, "grad_norm": 6.060523017852661, "learning_rate": 9.333007168013868e-06, "loss": 17.3509, "step": 10467 }, { "epoch": 0.19134662840221545, "grad_norm": 7.2323114052940305, "learning_rate": 9.33285945037975e-06, "loss": 17.8316, "step": 10468 }, { "epoch": 0.19136490759866195, "grad_norm": 6.52251643048427, "learning_rate": 9.332711717559334e-06, "loss": 17.3098, "step": 10469 }, { "epoch": 0.1913831867951085, "grad_norm": 7.333715729836014, "learning_rate": 9.33256396955314e-06, "loss": 17.8015, "step": 10470 }, { "epoch": 0.191401465991555, "grad_norm": 7.244729110021288, "learning_rate": 9.332416206361686e-06, "loss": 17.6741, "step": 10471 }, { "epoch": 0.19141974518800153, "grad_norm": 7.336581266459314, "learning_rate": 9.332268427985487e-06, "loss": 17.6178, "step": 10472 }, { "epoch": 0.19143802438444807, "grad_norm": 9.518093122476557, "learning_rate": 9.332120634425067e-06, "loss": 18.0936, "step": 10473 }, { "epoch": 0.19145630358089458, "grad_norm": 5.7833375579983075, "learning_rate": 9.331972825680935e-06, "loss": 17.1807, "step": 10474 }, { "epoch": 0.1914745827773411, "grad_norm": 6.422057620330305, "learning_rate": 9.331825001753617e-06, "loss": 17.5081, "step": 10475 }, { "epoch": 0.19149286197378762, "grad_norm": 7.77793057493313, "learning_rate": 9.331677162643629e-06, "loss": 17.7322, "step": 10476 }, { "epoch": 0.19151114117023416, "grad_norm": 6.446977267695607, "learning_rate": 9.331529308351485e-06, "loss": 17.4001, "step": 10477 }, { "epoch": 0.1915294203666807, "grad_norm": 5.9778560695633125, "learning_rate": 9.33138143887771e-06, "loss": 17.3762, "step": 10478 }, { "epoch": 0.1915476995631272, "grad_norm": 6.533211496809169, "learning_rate": 9.331233554222819e-06, "loss": 17.6646, "step": 10479 }, { "epoch": 0.19156597875957374, "grad_norm": 5.958983266303413, "learning_rate": 9.331085654387328e-06, "loss": 17.2335, "step": 10480 }, { "epoch": 0.19158425795602024, "grad_norm": 6.083401883455692, "learning_rate": 9.33093773937176e-06, "loss": 17.2894, "step": 10481 }, { "epoch": 0.19160253715246678, "grad_norm": 6.732405095493683, "learning_rate": 9.33078980917663e-06, "loss": 17.529, "step": 10482 }, { "epoch": 0.19162081634891331, "grad_norm": 7.588140581672176, "learning_rate": 9.330641863802457e-06, "loss": 17.6138, "step": 10483 }, { "epoch": 0.19163909554535982, "grad_norm": 6.839579510099215, "learning_rate": 9.33049390324976e-06, "loss": 17.571, "step": 10484 }, { "epoch": 0.19165737474180636, "grad_norm": 6.41343049608932, "learning_rate": 9.330345927519057e-06, "loss": 17.846, "step": 10485 }, { "epoch": 0.19167565393825287, "grad_norm": 7.451348130479233, "learning_rate": 9.33019793661087e-06, "loss": 17.8147, "step": 10486 }, { "epoch": 0.1916939331346994, "grad_norm": 5.82472880839712, "learning_rate": 9.330049930525713e-06, "loss": 17.2141, "step": 10487 }, { "epoch": 0.1917122123311459, "grad_norm": 5.445147209150908, "learning_rate": 9.329901909264107e-06, "loss": 16.9596, "step": 10488 }, { "epoch": 0.19173049152759244, "grad_norm": 6.562132849224037, "learning_rate": 9.32975387282657e-06, "loss": 17.6303, "step": 10489 }, { "epoch": 0.19174877072403898, "grad_norm": 6.901108693152038, "learning_rate": 9.329605821213623e-06, "loss": 17.7932, "step": 10490 }, { "epoch": 0.1917670499204855, "grad_norm": 6.675714856643467, "learning_rate": 9.329457754425782e-06, "loss": 17.564, "step": 10491 }, { "epoch": 0.19178532911693202, "grad_norm": 7.517726578104453, "learning_rate": 9.329309672463567e-06, "loss": 18.0812, "step": 10492 }, { "epoch": 0.19180360831337853, "grad_norm": 7.3784629388301255, "learning_rate": 9.329161575327499e-06, "loss": 17.8641, "step": 10493 }, { "epoch": 0.19182188750982507, "grad_norm": 6.862251404933019, "learning_rate": 9.329013463018093e-06, "loss": 17.6595, "step": 10494 }, { "epoch": 0.1918401667062716, "grad_norm": 6.260407870718441, "learning_rate": 9.328865335535872e-06, "loss": 17.5309, "step": 10495 }, { "epoch": 0.1918584459027181, "grad_norm": 5.439647176748364, "learning_rate": 9.328717192881353e-06, "loss": 17.1329, "step": 10496 }, { "epoch": 0.19187672509916465, "grad_norm": 6.668790465111025, "learning_rate": 9.328569035055058e-06, "loss": 17.8241, "step": 10497 }, { "epoch": 0.19189500429561115, "grad_norm": 6.47700749270235, "learning_rate": 9.3284208620575e-06, "loss": 17.7274, "step": 10498 }, { "epoch": 0.1919132834920577, "grad_norm": 7.3122825475634485, "learning_rate": 9.328272673889206e-06, "loss": 17.7026, "step": 10499 }, { "epoch": 0.19193156268850423, "grad_norm": 7.002894709055904, "learning_rate": 9.32812447055069e-06, "loss": 17.5361, "step": 10500 }, { "epoch": 0.19194984188495073, "grad_norm": 8.460734664670872, "learning_rate": 9.327976252042474e-06, "loss": 18.4211, "step": 10501 }, { "epoch": 0.19196812108139727, "grad_norm": 6.835754991854399, "learning_rate": 9.327828018365078e-06, "loss": 17.9194, "step": 10502 }, { "epoch": 0.19198640027784378, "grad_norm": 6.5207677165144355, "learning_rate": 9.327679769519017e-06, "loss": 17.5511, "step": 10503 }, { "epoch": 0.1920046794742903, "grad_norm": 6.307911366992025, "learning_rate": 9.327531505504818e-06, "loss": 17.3657, "step": 10504 }, { "epoch": 0.19202295867073682, "grad_norm": 5.608000385527272, "learning_rate": 9.327383226322995e-06, "loss": 17.2065, "step": 10505 }, { "epoch": 0.19204123786718336, "grad_norm": 7.304296844315352, "learning_rate": 9.327234931974068e-06, "loss": 17.6759, "step": 10506 }, { "epoch": 0.1920595170636299, "grad_norm": 7.672875311084413, "learning_rate": 9.327086622458559e-06, "loss": 18.0848, "step": 10507 }, { "epoch": 0.1920777962600764, "grad_norm": 7.913357073006939, "learning_rate": 9.326938297776987e-06, "loss": 17.7198, "step": 10508 }, { "epoch": 0.19209607545652294, "grad_norm": 6.333736124268162, "learning_rate": 9.326789957929872e-06, "loss": 17.3474, "step": 10509 }, { "epoch": 0.19211435465296944, "grad_norm": 6.905903139041645, "learning_rate": 9.326641602917734e-06, "loss": 17.6535, "step": 10510 }, { "epoch": 0.19213263384941598, "grad_norm": 6.520776450934488, "learning_rate": 9.326493232741092e-06, "loss": 17.5217, "step": 10511 }, { "epoch": 0.19215091304586251, "grad_norm": 7.317714389966107, "learning_rate": 9.326344847400466e-06, "loss": 17.874, "step": 10512 }, { "epoch": 0.19216919224230902, "grad_norm": 6.8801757926825875, "learning_rate": 9.326196446896377e-06, "loss": 17.6736, "step": 10513 }, { "epoch": 0.19218747143875556, "grad_norm": 5.998845084749652, "learning_rate": 9.326048031229346e-06, "loss": 17.5381, "step": 10514 }, { "epoch": 0.19220575063520207, "grad_norm": 7.2534001313545335, "learning_rate": 9.32589960039989e-06, "loss": 17.9485, "step": 10515 }, { "epoch": 0.1922240298316486, "grad_norm": 6.1874438901416156, "learning_rate": 9.325751154408534e-06, "loss": 17.2185, "step": 10516 }, { "epoch": 0.19224230902809514, "grad_norm": 7.784618928324922, "learning_rate": 9.325602693255793e-06, "loss": 18.0031, "step": 10517 }, { "epoch": 0.19226058822454165, "grad_norm": 5.117871695439608, "learning_rate": 9.325454216942192e-06, "loss": 16.8991, "step": 10518 }, { "epoch": 0.19227886742098818, "grad_norm": 6.289985694303816, "learning_rate": 9.325305725468248e-06, "loss": 17.4479, "step": 10519 }, { "epoch": 0.1922971466174347, "grad_norm": 7.29537081876363, "learning_rate": 9.325157218834481e-06, "loss": 18.2189, "step": 10520 }, { "epoch": 0.19231542581388122, "grad_norm": 6.799227527051088, "learning_rate": 9.325008697041418e-06, "loss": 17.4002, "step": 10521 }, { "epoch": 0.19233370501032773, "grad_norm": 8.351520976907086, "learning_rate": 9.324860160089571e-06, "loss": 17.766, "step": 10522 }, { "epoch": 0.19235198420677427, "grad_norm": 7.0551663389754555, "learning_rate": 9.324711607979466e-06, "loss": 17.8016, "step": 10523 }, { "epoch": 0.1923702634032208, "grad_norm": 6.490942895032392, "learning_rate": 9.324563040711621e-06, "loss": 17.4714, "step": 10524 }, { "epoch": 0.1923885425996673, "grad_norm": 7.953588425719637, "learning_rate": 9.32441445828656e-06, "loss": 18.4641, "step": 10525 }, { "epoch": 0.19240682179611385, "grad_norm": 5.234308943229955, "learning_rate": 9.3242658607048e-06, "loss": 16.9446, "step": 10526 }, { "epoch": 0.19242510099256036, "grad_norm": 7.309660964790849, "learning_rate": 9.324117247966863e-06, "loss": 17.6846, "step": 10527 }, { "epoch": 0.1924433801890069, "grad_norm": 7.29274034818036, "learning_rate": 9.323968620073271e-06, "loss": 18.0063, "step": 10528 }, { "epoch": 0.19246165938545343, "grad_norm": 7.037351986652799, "learning_rate": 9.323819977024545e-06, "loss": 17.624, "step": 10529 }, { "epoch": 0.19247993858189993, "grad_norm": 7.479671712839552, "learning_rate": 9.323671318821203e-06, "loss": 17.892, "step": 10530 }, { "epoch": 0.19249821777834647, "grad_norm": 9.330847404690038, "learning_rate": 9.32352264546377e-06, "loss": 18.7533, "step": 10531 }, { "epoch": 0.19251649697479298, "grad_norm": 6.156017588113379, "learning_rate": 9.323373956952764e-06, "loss": 17.4061, "step": 10532 }, { "epoch": 0.1925347761712395, "grad_norm": 6.484326782442109, "learning_rate": 9.323225253288709e-06, "loss": 17.5862, "step": 10533 }, { "epoch": 0.19255305536768605, "grad_norm": 7.395482460822145, "learning_rate": 9.323076534472123e-06, "loss": 18.0998, "step": 10534 }, { "epoch": 0.19257133456413256, "grad_norm": 7.022486083857744, "learning_rate": 9.322927800503529e-06, "loss": 17.7172, "step": 10535 }, { "epoch": 0.1925896137605791, "grad_norm": 6.440153027775848, "learning_rate": 9.32277905138345e-06, "loss": 17.2067, "step": 10536 }, { "epoch": 0.1926078929570256, "grad_norm": 6.646389189065471, "learning_rate": 9.322630287112404e-06, "loss": 17.7001, "step": 10537 }, { "epoch": 0.19262617215347214, "grad_norm": 6.233991271662693, "learning_rate": 9.322481507690916e-06, "loss": 17.4111, "step": 10538 }, { "epoch": 0.19264445134991864, "grad_norm": 7.583188301422039, "learning_rate": 9.322332713119501e-06, "loss": 18.0243, "step": 10539 }, { "epoch": 0.19266273054636518, "grad_norm": 6.985848731048602, "learning_rate": 9.322183903398689e-06, "loss": 17.4681, "step": 10540 }, { "epoch": 0.19268100974281172, "grad_norm": 6.944841568800823, "learning_rate": 9.322035078528996e-06, "loss": 17.6833, "step": 10541 }, { "epoch": 0.19269928893925822, "grad_norm": 6.769110657349653, "learning_rate": 9.321886238510945e-06, "loss": 18.0905, "step": 10542 }, { "epoch": 0.19271756813570476, "grad_norm": 6.175205785648062, "learning_rate": 9.321737383345059e-06, "loss": 17.5036, "step": 10543 }, { "epoch": 0.19273584733215127, "grad_norm": 7.39292483491582, "learning_rate": 9.321588513031857e-06, "loss": 18.1339, "step": 10544 }, { "epoch": 0.1927541265285978, "grad_norm": 7.62593299399902, "learning_rate": 9.321439627571863e-06, "loss": 17.6942, "step": 10545 }, { "epoch": 0.19277240572504434, "grad_norm": 7.852047992226749, "learning_rate": 9.321290726965598e-06, "loss": 18.4223, "step": 10546 }, { "epoch": 0.19279068492149085, "grad_norm": 6.79943435019734, "learning_rate": 9.321141811213582e-06, "loss": 17.6777, "step": 10547 }, { "epoch": 0.19280896411793738, "grad_norm": 5.801317405164884, "learning_rate": 9.320992880316342e-06, "loss": 17.3783, "step": 10548 }, { "epoch": 0.1928272433143839, "grad_norm": 11.13761053265287, "learning_rate": 9.320843934274396e-06, "loss": 18.3038, "step": 10549 }, { "epoch": 0.19284552251083042, "grad_norm": 7.955616697808509, "learning_rate": 9.320694973088267e-06, "loss": 18.0079, "step": 10550 }, { "epoch": 0.19286380170727696, "grad_norm": 8.347267151769406, "learning_rate": 9.320545996758477e-06, "loss": 18.0559, "step": 10551 }, { "epoch": 0.19288208090372347, "grad_norm": 6.7086603689711115, "learning_rate": 9.320397005285548e-06, "loss": 17.5578, "step": 10552 }, { "epoch": 0.19290036010017, "grad_norm": 6.701291688077603, "learning_rate": 9.320247998670003e-06, "loss": 17.3163, "step": 10553 }, { "epoch": 0.1929186392966165, "grad_norm": 6.043578464403494, "learning_rate": 9.320098976912362e-06, "loss": 17.2764, "step": 10554 }, { "epoch": 0.19293691849306305, "grad_norm": 6.42510153394605, "learning_rate": 9.319949940013149e-06, "loss": 17.6753, "step": 10555 }, { "epoch": 0.19295519768950956, "grad_norm": 7.745886185222929, "learning_rate": 9.319800887972887e-06, "loss": 18.0879, "step": 10556 }, { "epoch": 0.1929734768859561, "grad_norm": 7.468077190633721, "learning_rate": 9.3196518207921e-06, "loss": 17.7699, "step": 10557 }, { "epoch": 0.19299175608240263, "grad_norm": 7.070802593273419, "learning_rate": 9.319502738471304e-06, "loss": 18.0692, "step": 10558 }, { "epoch": 0.19301003527884913, "grad_norm": 6.909086042391009, "learning_rate": 9.319353641011028e-06, "loss": 17.9103, "step": 10559 }, { "epoch": 0.19302831447529567, "grad_norm": 7.037366292756734, "learning_rate": 9.319204528411794e-06, "loss": 17.8153, "step": 10560 }, { "epoch": 0.19304659367174218, "grad_norm": 7.930576037493707, "learning_rate": 9.31905540067412e-06, "loss": 18.2639, "step": 10561 }, { "epoch": 0.1930648728681887, "grad_norm": 6.254364915771636, "learning_rate": 9.318906257798533e-06, "loss": 17.3226, "step": 10562 }, { "epoch": 0.19308315206463525, "grad_norm": 7.822969907801438, "learning_rate": 9.318757099785554e-06, "loss": 18.1452, "step": 10563 }, { "epoch": 0.19310143126108176, "grad_norm": 5.464234538796677, "learning_rate": 9.318607926635708e-06, "loss": 17.168, "step": 10564 }, { "epoch": 0.1931197104575283, "grad_norm": 7.450394358946171, "learning_rate": 9.318458738349514e-06, "loss": 18.4264, "step": 10565 }, { "epoch": 0.1931379896539748, "grad_norm": 6.567630052692855, "learning_rate": 9.318309534927496e-06, "loss": 17.5298, "step": 10566 }, { "epoch": 0.19315626885042134, "grad_norm": 6.996251927452832, "learning_rate": 9.31816031637018e-06, "loss": 17.6233, "step": 10567 }, { "epoch": 0.19317454804686787, "grad_norm": 7.19843337457532, "learning_rate": 9.318011082678084e-06, "loss": 17.7532, "step": 10568 }, { "epoch": 0.19319282724331438, "grad_norm": 8.16279084086477, "learning_rate": 9.317861833851737e-06, "loss": 18.0928, "step": 10569 }, { "epoch": 0.19321110643976092, "grad_norm": 6.8381809249343215, "learning_rate": 9.317712569891656e-06, "loss": 17.8484, "step": 10570 }, { "epoch": 0.19322938563620742, "grad_norm": 6.215993573824501, "learning_rate": 9.31756329079837e-06, "loss": 17.2163, "step": 10571 }, { "epoch": 0.19324766483265396, "grad_norm": 8.464180348844982, "learning_rate": 9.317413996572398e-06, "loss": 17.8885, "step": 10572 }, { "epoch": 0.19326594402910047, "grad_norm": 5.687433583149414, "learning_rate": 9.317264687214266e-06, "loss": 17.083, "step": 10573 }, { "epoch": 0.193284223225547, "grad_norm": 6.170371554072229, "learning_rate": 9.317115362724492e-06, "loss": 17.3101, "step": 10574 }, { "epoch": 0.19330250242199354, "grad_norm": 6.302181705482549, "learning_rate": 9.316966023103606e-06, "loss": 17.6045, "step": 10575 }, { "epoch": 0.19332078161844005, "grad_norm": 6.693383226821278, "learning_rate": 9.316816668352129e-06, "loss": 17.4768, "step": 10576 }, { "epoch": 0.19333906081488658, "grad_norm": 6.738883879953232, "learning_rate": 9.316667298470583e-06, "loss": 17.6461, "step": 10577 }, { "epoch": 0.1933573400113331, "grad_norm": 6.650902543790805, "learning_rate": 9.316517913459495e-06, "loss": 17.4843, "step": 10578 }, { "epoch": 0.19337561920777963, "grad_norm": 5.945529606703904, "learning_rate": 9.316368513319383e-06, "loss": 17.3006, "step": 10579 }, { "epoch": 0.19339389840422616, "grad_norm": 6.5654109763194315, "learning_rate": 9.316219098050777e-06, "loss": 17.4598, "step": 10580 }, { "epoch": 0.19341217760067267, "grad_norm": 9.49438091067944, "learning_rate": 9.316069667654196e-06, "loss": 18.5121, "step": 10581 }, { "epoch": 0.1934304567971192, "grad_norm": 7.18090129984022, "learning_rate": 9.315920222130163e-06, "loss": 17.6947, "step": 10582 }, { "epoch": 0.1934487359935657, "grad_norm": 8.277954961005245, "learning_rate": 9.315770761479209e-06, "loss": 18.0253, "step": 10583 }, { "epoch": 0.19346701519001225, "grad_norm": 9.693217376877973, "learning_rate": 9.31562128570185e-06, "loss": 18.3004, "step": 10584 }, { "epoch": 0.19348529438645878, "grad_norm": 7.771114929456253, "learning_rate": 9.315471794798614e-06, "loss": 17.9545, "step": 10585 }, { "epoch": 0.1935035735829053, "grad_norm": 6.674895642955014, "learning_rate": 9.315322288770024e-06, "loss": 17.8881, "step": 10586 }, { "epoch": 0.19352185277935183, "grad_norm": 6.407049298848421, "learning_rate": 9.315172767616602e-06, "loss": 17.1966, "step": 10587 }, { "epoch": 0.19354013197579834, "grad_norm": 6.797248276208662, "learning_rate": 9.315023231338875e-06, "loss": 17.5365, "step": 10588 }, { "epoch": 0.19355841117224487, "grad_norm": 7.642996263755199, "learning_rate": 9.314873679937366e-06, "loss": 18.1229, "step": 10589 }, { "epoch": 0.19357669036869138, "grad_norm": 6.3013328966463655, "learning_rate": 9.314724113412599e-06, "loss": 17.155, "step": 10590 }, { "epoch": 0.19359496956513791, "grad_norm": 7.847098323725367, "learning_rate": 9.3145745317651e-06, "loss": 18.1772, "step": 10591 }, { "epoch": 0.19361324876158445, "grad_norm": 7.028199647753349, "learning_rate": 9.31442493499539e-06, "loss": 17.6506, "step": 10592 }, { "epoch": 0.19363152795803096, "grad_norm": 7.597175193959776, "learning_rate": 9.314275323103994e-06, "loss": 17.9633, "step": 10593 }, { "epoch": 0.1936498071544775, "grad_norm": 7.568322940690469, "learning_rate": 9.31412569609144e-06, "loss": 18.1824, "step": 10594 }, { "epoch": 0.193668086350924, "grad_norm": 7.226247408972656, "learning_rate": 9.313976053958249e-06, "loss": 17.7692, "step": 10595 }, { "epoch": 0.19368636554737054, "grad_norm": 7.776126565740525, "learning_rate": 9.313826396704945e-06, "loss": 18.2208, "step": 10596 }, { "epoch": 0.19370464474381707, "grad_norm": 6.3933084635983, "learning_rate": 9.313676724332054e-06, "loss": 17.4391, "step": 10597 }, { "epoch": 0.19372292394026358, "grad_norm": 6.872603044415165, "learning_rate": 9.313527036840103e-06, "loss": 17.7786, "step": 10598 }, { "epoch": 0.19374120313671012, "grad_norm": 6.527531981879228, "learning_rate": 9.31337733422961e-06, "loss": 17.4212, "step": 10599 }, { "epoch": 0.19375948233315662, "grad_norm": 6.301659271412311, "learning_rate": 9.313227616501106e-06, "loss": 17.3434, "step": 10600 }, { "epoch": 0.19377776152960316, "grad_norm": 7.712829624281709, "learning_rate": 9.313077883655112e-06, "loss": 18.0235, "step": 10601 }, { "epoch": 0.1937960407260497, "grad_norm": 6.327685086474394, "learning_rate": 9.312928135692156e-06, "loss": 17.269, "step": 10602 }, { "epoch": 0.1938143199224962, "grad_norm": 7.151033400204553, "learning_rate": 9.312778372612761e-06, "loss": 17.8822, "step": 10603 }, { "epoch": 0.19383259911894274, "grad_norm": 5.65434615832553, "learning_rate": 9.312628594417452e-06, "loss": 16.9864, "step": 10604 }, { "epoch": 0.19385087831538925, "grad_norm": 7.526213554422045, "learning_rate": 9.312478801106754e-06, "loss": 18.1226, "step": 10605 }, { "epoch": 0.19386915751183578, "grad_norm": 7.449896069951539, "learning_rate": 9.312328992681191e-06, "loss": 18.0462, "step": 10606 }, { "epoch": 0.1938874367082823, "grad_norm": 7.313314532439838, "learning_rate": 9.312179169141292e-06, "loss": 17.7867, "step": 10607 }, { "epoch": 0.19390571590472883, "grad_norm": 7.535135216513795, "learning_rate": 9.312029330487576e-06, "loss": 17.8511, "step": 10608 }, { "epoch": 0.19392399510117536, "grad_norm": 7.6273741504347985, "learning_rate": 9.311879476720572e-06, "loss": 18.0612, "step": 10609 }, { "epoch": 0.19394227429762187, "grad_norm": 7.3073291992089615, "learning_rate": 9.311729607840804e-06, "loss": 17.8439, "step": 10610 }, { "epoch": 0.1939605534940684, "grad_norm": 9.447661877803624, "learning_rate": 9.3115797238488e-06, "loss": 18.8587, "step": 10611 }, { "epoch": 0.1939788326905149, "grad_norm": 6.56812485016018, "learning_rate": 9.311429824745082e-06, "loss": 17.6294, "step": 10612 }, { "epoch": 0.19399711188696145, "grad_norm": 6.583498507950827, "learning_rate": 9.311279910530177e-06, "loss": 17.5195, "step": 10613 }, { "epoch": 0.19401539108340798, "grad_norm": 6.368004010018813, "learning_rate": 9.31112998120461e-06, "loss": 17.4466, "step": 10614 }, { "epoch": 0.1940336702798545, "grad_norm": 6.474764642472075, "learning_rate": 9.310980036768905e-06, "loss": 17.5982, "step": 10615 }, { "epoch": 0.19405194947630103, "grad_norm": 6.380601550493955, "learning_rate": 9.310830077223592e-06, "loss": 17.4736, "step": 10616 }, { "epoch": 0.19407022867274754, "grad_norm": 7.878257561781664, "learning_rate": 9.310680102569192e-06, "loss": 17.8663, "step": 10617 }, { "epoch": 0.19408850786919407, "grad_norm": 7.34069814682072, "learning_rate": 9.310530112806232e-06, "loss": 17.8274, "step": 10618 }, { "epoch": 0.1941067870656406, "grad_norm": 8.218669835162492, "learning_rate": 9.310380107935238e-06, "loss": 17.9432, "step": 10619 }, { "epoch": 0.19412506626208711, "grad_norm": 7.3994748891972675, "learning_rate": 9.310230087956736e-06, "loss": 18.1808, "step": 10620 }, { "epoch": 0.19414334545853365, "grad_norm": 5.658178055925301, "learning_rate": 9.310080052871252e-06, "loss": 17.1895, "step": 10621 }, { "epoch": 0.19416162465498016, "grad_norm": 9.654160263039296, "learning_rate": 9.30993000267931e-06, "loss": 18.5662, "step": 10622 }, { "epoch": 0.1941799038514267, "grad_norm": 6.784352251926139, "learning_rate": 9.30977993738144e-06, "loss": 17.7657, "step": 10623 }, { "epoch": 0.1941981830478732, "grad_norm": 7.395272450714351, "learning_rate": 9.309629856978163e-06, "loss": 17.5341, "step": 10624 }, { "epoch": 0.19421646224431974, "grad_norm": 6.801140426252479, "learning_rate": 9.309479761470008e-06, "loss": 17.6007, "step": 10625 }, { "epoch": 0.19423474144076627, "grad_norm": 6.93117260309201, "learning_rate": 9.309329650857501e-06, "loss": 17.8933, "step": 10626 }, { "epoch": 0.19425302063721278, "grad_norm": 9.095551821382614, "learning_rate": 9.309179525141167e-06, "loss": 18.7848, "step": 10627 }, { "epoch": 0.19427129983365932, "grad_norm": 8.740240869200871, "learning_rate": 9.309029384321533e-06, "loss": 18.1465, "step": 10628 }, { "epoch": 0.19428957903010582, "grad_norm": 6.321263675952195, "learning_rate": 9.308879228399123e-06, "loss": 17.6637, "step": 10629 }, { "epoch": 0.19430785822655236, "grad_norm": 7.488813766017334, "learning_rate": 9.308729057374468e-06, "loss": 18.1155, "step": 10630 }, { "epoch": 0.1943261374229989, "grad_norm": 6.80232486213669, "learning_rate": 9.30857887124809e-06, "loss": 17.3481, "step": 10631 }, { "epoch": 0.1943444166194454, "grad_norm": 6.272396843005326, "learning_rate": 9.308428670020517e-06, "loss": 17.3226, "step": 10632 }, { "epoch": 0.19436269581589194, "grad_norm": 11.094605897917983, "learning_rate": 9.308278453692277e-06, "loss": 18.3589, "step": 10633 }, { "epoch": 0.19438097501233845, "grad_norm": 9.03773670620496, "learning_rate": 9.308128222263893e-06, "loss": 18.6966, "step": 10634 }, { "epoch": 0.19439925420878498, "grad_norm": 6.290513129643883, "learning_rate": 9.307977975735894e-06, "loss": 17.4824, "step": 10635 }, { "epoch": 0.19441753340523152, "grad_norm": 6.140177837419432, "learning_rate": 9.307827714108805e-06, "loss": 17.5921, "step": 10636 }, { "epoch": 0.19443581260167803, "grad_norm": 6.344802791878528, "learning_rate": 9.307677437383156e-06, "loss": 17.4377, "step": 10637 }, { "epoch": 0.19445409179812456, "grad_norm": 7.6818635390571, "learning_rate": 9.30752714555947e-06, "loss": 17.8808, "step": 10638 }, { "epoch": 0.19447237099457107, "grad_norm": 7.467567891909981, "learning_rate": 9.307376838638274e-06, "loss": 17.9963, "step": 10639 }, { "epoch": 0.1944906501910176, "grad_norm": 7.601758098061826, "learning_rate": 9.307226516620096e-06, "loss": 18.4309, "step": 10640 }, { "epoch": 0.1945089293874641, "grad_norm": 6.949781637654198, "learning_rate": 9.307076179505466e-06, "loss": 17.7668, "step": 10641 }, { "epoch": 0.19452720858391065, "grad_norm": 6.344390623967537, "learning_rate": 9.306925827294905e-06, "loss": 17.6243, "step": 10642 }, { "epoch": 0.19454548778035718, "grad_norm": 6.121671076286463, "learning_rate": 9.306775459988944e-06, "loss": 17.3048, "step": 10643 }, { "epoch": 0.1945637669768037, "grad_norm": 6.634782358505514, "learning_rate": 9.306625077588108e-06, "loss": 17.1535, "step": 10644 }, { "epoch": 0.19458204617325023, "grad_norm": 7.101410142798328, "learning_rate": 9.306474680092925e-06, "loss": 17.5748, "step": 10645 }, { "epoch": 0.19460032536969674, "grad_norm": 7.109025070919761, "learning_rate": 9.306324267503921e-06, "loss": 17.6979, "step": 10646 }, { "epoch": 0.19461860456614327, "grad_norm": 7.028477675482064, "learning_rate": 9.306173839821626e-06, "loss": 17.587, "step": 10647 }, { "epoch": 0.1946368837625898, "grad_norm": 6.6055027926956145, "learning_rate": 9.306023397046564e-06, "loss": 17.51, "step": 10648 }, { "epoch": 0.19465516295903632, "grad_norm": 8.327068170217741, "learning_rate": 9.305872939179263e-06, "loss": 18.2495, "step": 10649 }, { "epoch": 0.19467344215548285, "grad_norm": 6.141988982849482, "learning_rate": 9.305722466220253e-06, "loss": 17.316, "step": 10650 }, { "epoch": 0.19469172135192936, "grad_norm": 5.932637633055441, "learning_rate": 9.305571978170058e-06, "loss": 17.3231, "step": 10651 }, { "epoch": 0.1947100005483759, "grad_norm": 7.503772590917745, "learning_rate": 9.305421475029209e-06, "loss": 17.8163, "step": 10652 }, { "epoch": 0.19472827974482243, "grad_norm": 6.695506480582686, "learning_rate": 9.30527095679823e-06, "loss": 17.4673, "step": 10653 }, { "epoch": 0.19474655894126894, "grad_norm": 6.662264358726758, "learning_rate": 9.305120423477647e-06, "loss": 17.9, "step": 10654 }, { "epoch": 0.19476483813771547, "grad_norm": 6.896771157978884, "learning_rate": 9.304969875067995e-06, "loss": 17.7757, "step": 10655 }, { "epoch": 0.19478311733416198, "grad_norm": 6.460014333683582, "learning_rate": 9.304819311569794e-06, "loss": 17.4464, "step": 10656 }, { "epoch": 0.19480139653060852, "grad_norm": 6.586314892185982, "learning_rate": 9.304668732983576e-06, "loss": 17.4563, "step": 10657 }, { "epoch": 0.19481967572705503, "grad_norm": 7.077086928979827, "learning_rate": 9.304518139309869e-06, "loss": 17.83, "step": 10658 }, { "epoch": 0.19483795492350156, "grad_norm": 6.568784637257991, "learning_rate": 9.304367530549197e-06, "loss": 17.7367, "step": 10659 }, { "epoch": 0.1948562341199481, "grad_norm": 8.723658430859082, "learning_rate": 9.304216906702092e-06, "loss": 17.9172, "step": 10660 }, { "epoch": 0.1948745133163946, "grad_norm": 7.5472534182362745, "learning_rate": 9.304066267769078e-06, "loss": 17.334, "step": 10661 }, { "epoch": 0.19489279251284114, "grad_norm": 7.6130460575294965, "learning_rate": 9.303915613750689e-06, "loss": 17.8953, "step": 10662 }, { "epoch": 0.19491107170928765, "grad_norm": 6.632862241041892, "learning_rate": 9.303764944647447e-06, "loss": 17.3226, "step": 10663 }, { "epoch": 0.19492935090573418, "grad_norm": 6.255270723640296, "learning_rate": 9.303614260459882e-06, "loss": 17.2681, "step": 10664 }, { "epoch": 0.19494763010218072, "grad_norm": 6.770546445817632, "learning_rate": 9.303463561188522e-06, "loss": 17.4723, "step": 10665 }, { "epoch": 0.19496590929862723, "grad_norm": 6.307885423130489, "learning_rate": 9.303312846833897e-06, "loss": 17.2604, "step": 10666 }, { "epoch": 0.19498418849507376, "grad_norm": 6.301987745633895, "learning_rate": 9.303162117396534e-06, "loss": 17.5328, "step": 10667 }, { "epoch": 0.19500246769152027, "grad_norm": 5.773298416189256, "learning_rate": 9.30301137287696e-06, "loss": 17.0753, "step": 10668 }, { "epoch": 0.1950207468879668, "grad_norm": 8.46297537772062, "learning_rate": 9.302860613275705e-06, "loss": 18.3674, "step": 10669 }, { "epoch": 0.19503902608441334, "grad_norm": 6.870016457950988, "learning_rate": 9.302709838593299e-06, "loss": 17.4719, "step": 10670 }, { "epoch": 0.19505730528085985, "grad_norm": 6.93741752124257, "learning_rate": 9.302559048830266e-06, "loss": 17.7941, "step": 10671 }, { "epoch": 0.19507558447730639, "grad_norm": 6.897580639313483, "learning_rate": 9.302408243987137e-06, "loss": 17.8397, "step": 10672 }, { "epoch": 0.1950938636737529, "grad_norm": 7.6283759654674625, "learning_rate": 9.302257424064441e-06, "loss": 17.693, "step": 10673 }, { "epoch": 0.19511214287019943, "grad_norm": 9.701676756354317, "learning_rate": 9.302106589062705e-06, "loss": 18.6908, "step": 10674 }, { "epoch": 0.19513042206664594, "grad_norm": 6.684327878630932, "learning_rate": 9.30195573898246e-06, "loss": 17.3524, "step": 10675 }, { "epoch": 0.19514870126309247, "grad_norm": 8.020973963191935, "learning_rate": 9.301804873824234e-06, "loss": 17.8621, "step": 10676 }, { "epoch": 0.195166980459539, "grad_norm": 5.992558588825945, "learning_rate": 9.301653993588554e-06, "loss": 17.4288, "step": 10677 }, { "epoch": 0.19518525965598552, "grad_norm": 6.253317221883683, "learning_rate": 9.30150309827595e-06, "loss": 17.5206, "step": 10678 }, { "epoch": 0.19520353885243205, "grad_norm": 6.709397417090899, "learning_rate": 9.301352187886952e-06, "loss": 17.6034, "step": 10679 }, { "epoch": 0.19522181804887856, "grad_norm": 6.5657654728391535, "learning_rate": 9.301201262422086e-06, "loss": 17.66, "step": 10680 }, { "epoch": 0.1952400972453251, "grad_norm": 8.283829826030313, "learning_rate": 9.301050321881884e-06, "loss": 18.0831, "step": 10681 }, { "epoch": 0.19525837644177163, "grad_norm": 7.298913795471017, "learning_rate": 9.300899366266875e-06, "loss": 17.2761, "step": 10682 }, { "epoch": 0.19527665563821814, "grad_norm": 9.970929517780979, "learning_rate": 9.300748395577585e-06, "loss": 18.5915, "step": 10683 }, { "epoch": 0.19529493483466467, "grad_norm": 7.506747819785145, "learning_rate": 9.300597409814546e-06, "loss": 17.8649, "step": 10684 }, { "epoch": 0.19531321403111118, "grad_norm": 7.590362488356904, "learning_rate": 9.300446408978285e-06, "loss": 17.9497, "step": 10685 }, { "epoch": 0.19533149322755772, "grad_norm": 7.031810394509858, "learning_rate": 9.300295393069333e-06, "loss": 17.7331, "step": 10686 }, { "epoch": 0.19534977242400425, "grad_norm": 6.029242218241611, "learning_rate": 9.300144362088221e-06, "loss": 17.5721, "step": 10687 }, { "epoch": 0.19536805162045076, "grad_norm": 5.882413409238314, "learning_rate": 9.299993316035474e-06, "loss": 17.5585, "step": 10688 }, { "epoch": 0.1953863308168973, "grad_norm": 6.850432216812345, "learning_rate": 9.299842254911625e-06, "loss": 17.7273, "step": 10689 }, { "epoch": 0.1954046100133438, "grad_norm": 5.193430594848009, "learning_rate": 9.2996911787172e-06, "loss": 17.0182, "step": 10690 }, { "epoch": 0.19542288920979034, "grad_norm": 6.219183428513139, "learning_rate": 9.299540087452732e-06, "loss": 17.4285, "step": 10691 }, { "epoch": 0.19544116840623685, "grad_norm": 6.628865620959073, "learning_rate": 9.299388981118749e-06, "loss": 17.6518, "step": 10692 }, { "epoch": 0.19545944760268338, "grad_norm": 6.177767982169759, "learning_rate": 9.29923785971578e-06, "loss": 17.3978, "step": 10693 }, { "epoch": 0.19547772679912992, "grad_norm": 7.3571905538615, "learning_rate": 9.299086723244358e-06, "loss": 17.9955, "step": 10694 }, { "epoch": 0.19549600599557643, "grad_norm": 7.729865331947313, "learning_rate": 9.298935571705005e-06, "loss": 17.8947, "step": 10695 }, { "epoch": 0.19551428519202296, "grad_norm": 6.118071552231917, "learning_rate": 9.29878440509826e-06, "loss": 17.1034, "step": 10696 }, { "epoch": 0.19553256438846947, "grad_norm": 5.95000903021858, "learning_rate": 9.298633223424647e-06, "loss": 17.2047, "step": 10697 }, { "epoch": 0.195550843584916, "grad_norm": 6.843863320766885, "learning_rate": 9.298482026684699e-06, "loss": 17.8315, "step": 10698 }, { "epoch": 0.19556912278136254, "grad_norm": 6.487031716869424, "learning_rate": 9.298330814878944e-06, "loss": 17.5716, "step": 10699 }, { "epoch": 0.19558740197780905, "grad_norm": 7.355481764198525, "learning_rate": 9.298179588007912e-06, "loss": 17.663, "step": 10700 }, { "epoch": 0.19560568117425559, "grad_norm": 6.468301288869318, "learning_rate": 9.298028346072132e-06, "loss": 17.4242, "step": 10701 }, { "epoch": 0.1956239603707021, "grad_norm": 7.243010837289347, "learning_rate": 9.297877089072138e-06, "loss": 18.0509, "step": 10702 }, { "epoch": 0.19564223956714863, "grad_norm": 7.9735117004437805, "learning_rate": 9.297725817008458e-06, "loss": 17.4712, "step": 10703 }, { "epoch": 0.19566051876359516, "grad_norm": 6.844671694673614, "learning_rate": 9.297574529881619e-06, "loss": 17.5793, "step": 10704 }, { "epoch": 0.19567879796004167, "grad_norm": 6.642715710082763, "learning_rate": 9.297423227692158e-06, "loss": 17.3391, "step": 10705 }, { "epoch": 0.1956970771564882, "grad_norm": 7.867678656114496, "learning_rate": 9.297271910440598e-06, "loss": 18.1073, "step": 10706 }, { "epoch": 0.19571535635293472, "grad_norm": 6.688362811911548, "learning_rate": 9.297120578127474e-06, "loss": 17.4292, "step": 10707 }, { "epoch": 0.19573363554938125, "grad_norm": 7.8432901584227945, "learning_rate": 9.296969230753316e-06, "loss": 18.1777, "step": 10708 }, { "epoch": 0.19575191474582776, "grad_norm": 6.725996842444234, "learning_rate": 9.296817868318653e-06, "loss": 17.6476, "step": 10709 }, { "epoch": 0.1957701939422743, "grad_norm": 8.875850812368208, "learning_rate": 9.296666490824018e-06, "loss": 18.4301, "step": 10710 }, { "epoch": 0.19578847313872083, "grad_norm": 6.075179535741035, "learning_rate": 9.296515098269938e-06, "loss": 17.2243, "step": 10711 }, { "epoch": 0.19580675233516734, "grad_norm": 6.834905199062873, "learning_rate": 9.296363690656947e-06, "loss": 17.6401, "step": 10712 }, { "epoch": 0.19582503153161387, "grad_norm": 5.901095979828014, "learning_rate": 9.296212267985572e-06, "loss": 17.2658, "step": 10713 }, { "epoch": 0.19584331072806038, "grad_norm": 6.3500574552408455, "learning_rate": 9.296060830256346e-06, "loss": 17.3889, "step": 10714 }, { "epoch": 0.19586158992450692, "grad_norm": 6.91711244092669, "learning_rate": 9.2959093774698e-06, "loss": 17.9106, "step": 10715 }, { "epoch": 0.19587986912095345, "grad_norm": 6.207672715333535, "learning_rate": 9.295757909626465e-06, "loss": 17.1979, "step": 10716 }, { "epoch": 0.19589814831739996, "grad_norm": 6.810790272390377, "learning_rate": 9.29560642672687e-06, "loss": 17.5168, "step": 10717 }, { "epoch": 0.1959164275138465, "grad_norm": 7.976867102812035, "learning_rate": 9.29545492877155e-06, "loss": 18.029, "step": 10718 }, { "epoch": 0.195934706710293, "grad_norm": 7.261506383099081, "learning_rate": 9.295303415761032e-06, "loss": 17.7888, "step": 10719 }, { "epoch": 0.19595298590673954, "grad_norm": 7.008392962604296, "learning_rate": 9.295151887695846e-06, "loss": 17.5551, "step": 10720 }, { "epoch": 0.19597126510318608, "grad_norm": 8.457848839474217, "learning_rate": 9.295000344576528e-06, "loss": 17.6012, "step": 10721 }, { "epoch": 0.19598954429963258, "grad_norm": 6.2744108798805955, "learning_rate": 9.294848786403605e-06, "loss": 17.4231, "step": 10722 }, { "epoch": 0.19600782349607912, "grad_norm": 7.999772964827329, "learning_rate": 9.294697213177611e-06, "loss": 18.4582, "step": 10723 }, { "epoch": 0.19602610269252563, "grad_norm": 5.8756792379465805, "learning_rate": 9.294545624899076e-06, "loss": 17.3341, "step": 10724 }, { "epoch": 0.19604438188897216, "grad_norm": 7.126986733708883, "learning_rate": 9.294394021568529e-06, "loss": 17.692, "step": 10725 }, { "epoch": 0.19606266108541867, "grad_norm": 7.765735288662383, "learning_rate": 9.294242403186507e-06, "loss": 17.8931, "step": 10726 }, { "epoch": 0.1960809402818652, "grad_norm": 7.604591023698333, "learning_rate": 9.294090769753534e-06, "loss": 17.8412, "step": 10727 }, { "epoch": 0.19609921947831174, "grad_norm": 6.772845344253822, "learning_rate": 9.293939121270148e-06, "loss": 17.6838, "step": 10728 }, { "epoch": 0.19611749867475825, "grad_norm": 8.594384432462094, "learning_rate": 9.293787457736878e-06, "loss": 18.3965, "step": 10729 }, { "epoch": 0.1961357778712048, "grad_norm": 6.806616763621282, "learning_rate": 9.293635779154255e-06, "loss": 17.5861, "step": 10730 }, { "epoch": 0.1961540570676513, "grad_norm": 6.43974598625156, "learning_rate": 9.293484085522812e-06, "loss": 17.3388, "step": 10731 }, { "epoch": 0.19617233626409783, "grad_norm": 7.452707094156926, "learning_rate": 9.293332376843078e-06, "loss": 17.93, "step": 10732 }, { "epoch": 0.19619061546054437, "grad_norm": 5.686505492084528, "learning_rate": 9.293180653115587e-06, "loss": 17.1631, "step": 10733 }, { "epoch": 0.19620889465699087, "grad_norm": 5.864712950618436, "learning_rate": 9.293028914340873e-06, "loss": 17.1778, "step": 10734 }, { "epoch": 0.1962271738534374, "grad_norm": 5.693583083329729, "learning_rate": 9.292877160519463e-06, "loss": 17.0784, "step": 10735 }, { "epoch": 0.19624545304988392, "grad_norm": 6.146937285583666, "learning_rate": 9.29272539165189e-06, "loss": 17.2606, "step": 10736 }, { "epoch": 0.19626373224633045, "grad_norm": 5.731994226922119, "learning_rate": 9.292573607738688e-06, "loss": 17.3579, "step": 10737 }, { "epoch": 0.196282011442777, "grad_norm": 6.736091173241974, "learning_rate": 9.292421808780389e-06, "loss": 17.7158, "step": 10738 }, { "epoch": 0.1963002906392235, "grad_norm": 8.56794848346165, "learning_rate": 9.292269994777524e-06, "loss": 18.3193, "step": 10739 }, { "epoch": 0.19631856983567003, "grad_norm": 5.8844783678847685, "learning_rate": 9.292118165730625e-06, "loss": 17.149, "step": 10740 }, { "epoch": 0.19633684903211654, "grad_norm": 5.790033531735474, "learning_rate": 9.291966321640223e-06, "loss": 17.1911, "step": 10741 }, { "epoch": 0.19635512822856308, "grad_norm": 9.037962856954309, "learning_rate": 9.291814462506852e-06, "loss": 18.7479, "step": 10742 }, { "epoch": 0.19637340742500958, "grad_norm": 10.393617610573418, "learning_rate": 9.291662588331046e-06, "loss": 18.0216, "step": 10743 }, { "epoch": 0.19639168662145612, "grad_norm": 7.880354051662654, "learning_rate": 9.291510699113332e-06, "loss": 18.13, "step": 10744 }, { "epoch": 0.19640996581790265, "grad_norm": 6.649269674339112, "learning_rate": 9.291358794854246e-06, "loss": 17.7472, "step": 10745 }, { "epoch": 0.19642824501434916, "grad_norm": 6.24457974277704, "learning_rate": 9.29120687555432e-06, "loss": 17.4464, "step": 10746 }, { "epoch": 0.1964465242107957, "grad_norm": 5.379803587595591, "learning_rate": 9.291054941214087e-06, "loss": 17.0152, "step": 10747 }, { "epoch": 0.1964648034072422, "grad_norm": 7.007398276187704, "learning_rate": 9.290902991834078e-06, "loss": 17.7299, "step": 10748 }, { "epoch": 0.19648308260368874, "grad_norm": 7.699192976521023, "learning_rate": 9.290751027414828e-06, "loss": 17.9241, "step": 10749 }, { "epoch": 0.19650136180013528, "grad_norm": 5.54371662902325, "learning_rate": 9.290599047956865e-06, "loss": 17.1983, "step": 10750 }, { "epoch": 0.19651964099658178, "grad_norm": 6.092367698307502, "learning_rate": 9.290447053460727e-06, "loss": 17.5637, "step": 10751 }, { "epoch": 0.19653792019302832, "grad_norm": 6.810666588395866, "learning_rate": 9.290295043926945e-06, "loss": 17.5853, "step": 10752 }, { "epoch": 0.19655619938947483, "grad_norm": 7.0641765133106285, "learning_rate": 9.290143019356048e-06, "loss": 17.8393, "step": 10753 }, { "epoch": 0.19657447858592136, "grad_norm": 7.402254855193758, "learning_rate": 9.289990979748575e-06, "loss": 17.8503, "step": 10754 }, { "epoch": 0.1965927577823679, "grad_norm": 6.726314999452945, "learning_rate": 9.289838925105054e-06, "loss": 17.6592, "step": 10755 }, { "epoch": 0.1966110369788144, "grad_norm": 5.848487032895969, "learning_rate": 9.28968685542602e-06, "loss": 17.1868, "step": 10756 }, { "epoch": 0.19662931617526094, "grad_norm": 7.762407867966833, "learning_rate": 9.289534770712007e-06, "loss": 17.8981, "step": 10757 }, { "epoch": 0.19664759537170745, "grad_norm": 6.529578507189958, "learning_rate": 9.289382670963548e-06, "loss": 17.4256, "step": 10758 }, { "epoch": 0.196665874568154, "grad_norm": 7.857419411920845, "learning_rate": 9.289230556181172e-06, "loss": 18.0849, "step": 10759 }, { "epoch": 0.1966841537646005, "grad_norm": 7.205082738089403, "learning_rate": 9.289078426365417e-06, "loss": 17.5877, "step": 10760 }, { "epoch": 0.19670243296104703, "grad_norm": 5.807288445967445, "learning_rate": 9.288926281516812e-06, "loss": 17.2288, "step": 10761 }, { "epoch": 0.19672071215749357, "grad_norm": 8.342745064606985, "learning_rate": 9.288774121635895e-06, "loss": 17.6209, "step": 10762 }, { "epoch": 0.19673899135394007, "grad_norm": 7.180800065658491, "learning_rate": 9.288621946723196e-06, "loss": 17.6955, "step": 10763 }, { "epoch": 0.1967572705503866, "grad_norm": 6.565589161064451, "learning_rate": 9.28846975677925e-06, "loss": 17.5578, "step": 10764 }, { "epoch": 0.19677554974683312, "grad_norm": 7.649048599746578, "learning_rate": 9.28831755180459e-06, "loss": 18.1785, "step": 10765 }, { "epoch": 0.19679382894327965, "grad_norm": 6.6356404509470694, "learning_rate": 9.288165331799746e-06, "loss": 17.6297, "step": 10766 }, { "epoch": 0.1968121081397262, "grad_norm": 7.221765529876187, "learning_rate": 9.288013096765258e-06, "loss": 17.8655, "step": 10767 }, { "epoch": 0.1968303873361727, "grad_norm": 6.003297464221561, "learning_rate": 9.287860846701654e-06, "loss": 17.2949, "step": 10768 }, { "epoch": 0.19684866653261923, "grad_norm": 6.526098385174477, "learning_rate": 9.287708581609472e-06, "loss": 17.2405, "step": 10769 }, { "epoch": 0.19686694572906574, "grad_norm": 7.148816755709115, "learning_rate": 9.28755630148924e-06, "loss": 17.6601, "step": 10770 }, { "epoch": 0.19688522492551228, "grad_norm": 6.88800618744152, "learning_rate": 9.287404006341498e-06, "loss": 17.3901, "step": 10771 }, { "epoch": 0.1969035041219588, "grad_norm": 7.202044151925685, "learning_rate": 9.287251696166777e-06, "loss": 17.5591, "step": 10772 }, { "epoch": 0.19692178331840532, "grad_norm": 8.632297946052338, "learning_rate": 9.28709937096561e-06, "loss": 18.4776, "step": 10773 }, { "epoch": 0.19694006251485185, "grad_norm": 9.534936187783808, "learning_rate": 9.286947030738532e-06, "loss": 18.5315, "step": 10774 }, { "epoch": 0.19695834171129836, "grad_norm": 6.679001612778986, "learning_rate": 9.286794675486076e-06, "loss": 17.3928, "step": 10775 }, { "epoch": 0.1969766209077449, "grad_norm": 6.254906701770776, "learning_rate": 9.286642305208776e-06, "loss": 17.3015, "step": 10776 }, { "epoch": 0.1969949001041914, "grad_norm": 6.088482117192647, "learning_rate": 9.28648991990717e-06, "loss": 17.483, "step": 10777 }, { "epoch": 0.19701317930063794, "grad_norm": 7.250488850608622, "learning_rate": 9.286337519581786e-06, "loss": 17.6471, "step": 10778 }, { "epoch": 0.19703145849708448, "grad_norm": 7.044707665896951, "learning_rate": 9.286185104233162e-06, "loss": 17.7596, "step": 10779 }, { "epoch": 0.19704973769353099, "grad_norm": 7.233640126102035, "learning_rate": 9.286032673861831e-06, "loss": 18.0291, "step": 10780 }, { "epoch": 0.19706801688997752, "grad_norm": 7.2387001895141205, "learning_rate": 9.285880228468327e-06, "loss": 17.7576, "step": 10781 }, { "epoch": 0.19708629608642403, "grad_norm": 6.579224732400311, "learning_rate": 9.285727768053185e-06, "loss": 17.5907, "step": 10782 }, { "epoch": 0.19710457528287056, "grad_norm": 7.167222966537241, "learning_rate": 9.285575292616938e-06, "loss": 17.7615, "step": 10783 }, { "epoch": 0.1971228544793171, "grad_norm": 6.263058696373889, "learning_rate": 9.285422802160123e-06, "loss": 17.6282, "step": 10784 }, { "epoch": 0.1971411336757636, "grad_norm": 5.558002420549016, "learning_rate": 9.285270296683273e-06, "loss": 17.2044, "step": 10785 }, { "epoch": 0.19715941287221014, "grad_norm": 6.457501984565157, "learning_rate": 9.285117776186922e-06, "loss": 17.3073, "step": 10786 }, { "epoch": 0.19717769206865665, "grad_norm": 6.8279219970562215, "learning_rate": 9.284965240671604e-06, "loss": 17.796, "step": 10787 }, { "epoch": 0.1971959712651032, "grad_norm": 6.265323462252164, "learning_rate": 9.284812690137857e-06, "loss": 17.3556, "step": 10788 }, { "epoch": 0.19721425046154972, "grad_norm": 7.24634507097224, "learning_rate": 9.28466012458621e-06, "loss": 17.9901, "step": 10789 }, { "epoch": 0.19723252965799623, "grad_norm": 6.7671818090699976, "learning_rate": 9.284507544017203e-06, "loss": 17.3748, "step": 10790 }, { "epoch": 0.19725080885444277, "grad_norm": 7.11350885505168, "learning_rate": 9.28435494843137e-06, "loss": 17.4191, "step": 10791 }, { "epoch": 0.19726908805088927, "grad_norm": 6.429692882394918, "learning_rate": 9.284202337829244e-06, "loss": 17.6613, "step": 10792 }, { "epoch": 0.1972873672473358, "grad_norm": 6.543431871173417, "learning_rate": 9.28404971221136e-06, "loss": 17.5675, "step": 10793 }, { "epoch": 0.19730564644378232, "grad_norm": 7.3774655749339315, "learning_rate": 9.283897071578254e-06, "loss": 17.8745, "step": 10794 }, { "epoch": 0.19732392564022885, "grad_norm": 7.393943625505083, "learning_rate": 9.28374441593046e-06, "loss": 17.5336, "step": 10795 }, { "epoch": 0.1973422048366754, "grad_norm": 6.2664293535677995, "learning_rate": 9.283591745268512e-06, "loss": 17.5629, "step": 10796 }, { "epoch": 0.1973604840331219, "grad_norm": 6.8093411548541365, "learning_rate": 9.283439059592949e-06, "loss": 17.6704, "step": 10797 }, { "epoch": 0.19737876322956843, "grad_norm": 6.954806390677689, "learning_rate": 9.283286358904304e-06, "loss": 17.5025, "step": 10798 }, { "epoch": 0.19739704242601494, "grad_norm": 8.512641998122215, "learning_rate": 9.283133643203111e-06, "loss": 18.2705, "step": 10799 }, { "epoch": 0.19741532162246148, "grad_norm": 6.994446648295691, "learning_rate": 9.282980912489906e-06, "loss": 17.8209, "step": 10800 }, { "epoch": 0.197433600818908, "grad_norm": 6.001762851933642, "learning_rate": 9.282828166765226e-06, "loss": 17.3401, "step": 10801 }, { "epoch": 0.19745188001535452, "grad_norm": 6.380943936450123, "learning_rate": 9.282675406029604e-06, "loss": 17.1109, "step": 10802 }, { "epoch": 0.19747015921180106, "grad_norm": 6.851594593750535, "learning_rate": 9.282522630283575e-06, "loss": 17.4685, "step": 10803 }, { "epoch": 0.19748843840824756, "grad_norm": 6.1245737107628875, "learning_rate": 9.282369839527678e-06, "loss": 17.3703, "step": 10804 }, { "epoch": 0.1975067176046941, "grad_norm": 7.088313913282194, "learning_rate": 9.282217033762444e-06, "loss": 17.644, "step": 10805 }, { "epoch": 0.19752499680114063, "grad_norm": 6.70642803619076, "learning_rate": 9.282064212988414e-06, "loss": 17.7818, "step": 10806 }, { "epoch": 0.19754327599758714, "grad_norm": 5.523099811272099, "learning_rate": 9.281911377206118e-06, "loss": 17.089, "step": 10807 }, { "epoch": 0.19756155519403368, "grad_norm": 7.182200762733401, "learning_rate": 9.281758526416094e-06, "loss": 17.5055, "step": 10808 }, { "epoch": 0.19757983439048019, "grad_norm": 5.83526328434585, "learning_rate": 9.28160566061888e-06, "loss": 16.8324, "step": 10809 }, { "epoch": 0.19759811358692672, "grad_norm": 7.869972066413671, "learning_rate": 9.281452779815007e-06, "loss": 18.2756, "step": 10810 }, { "epoch": 0.19761639278337323, "grad_norm": 5.962958806178682, "learning_rate": 9.281299884005017e-06, "loss": 17.4023, "step": 10811 }, { "epoch": 0.19763467197981976, "grad_norm": 6.822327165227126, "learning_rate": 9.28114697318944e-06, "loss": 17.5601, "step": 10812 }, { "epoch": 0.1976529511762663, "grad_norm": 7.4630866633682365, "learning_rate": 9.280994047368815e-06, "loss": 17.8554, "step": 10813 }, { "epoch": 0.1976712303727128, "grad_norm": 7.617800974958288, "learning_rate": 9.280841106543677e-06, "loss": 17.9616, "step": 10814 }, { "epoch": 0.19768950956915934, "grad_norm": 6.9302645605626525, "learning_rate": 9.280688150714562e-06, "loss": 17.2055, "step": 10815 }, { "epoch": 0.19770778876560585, "grad_norm": 6.354683628285268, "learning_rate": 9.280535179882008e-06, "loss": 17.3398, "step": 10816 }, { "epoch": 0.1977260679620524, "grad_norm": 5.553582914311273, "learning_rate": 9.280382194046548e-06, "loss": 17.2443, "step": 10817 }, { "epoch": 0.19774434715849892, "grad_norm": 6.313404246286071, "learning_rate": 9.28022919320872e-06, "loss": 17.6153, "step": 10818 }, { "epoch": 0.19776262635494543, "grad_norm": 7.183189177438069, "learning_rate": 9.280076177369062e-06, "loss": 17.8033, "step": 10819 }, { "epoch": 0.19778090555139197, "grad_norm": 6.426110523637121, "learning_rate": 9.279923146528106e-06, "loss": 17.4548, "step": 10820 }, { "epoch": 0.19779918474783847, "grad_norm": 6.8997417900141835, "learning_rate": 9.279770100686391e-06, "loss": 17.6989, "step": 10821 }, { "epoch": 0.197817463944285, "grad_norm": 7.029461683598869, "learning_rate": 9.279617039844455e-06, "loss": 17.317, "step": 10822 }, { "epoch": 0.19783574314073155, "grad_norm": 5.334324070800717, "learning_rate": 9.279463964002832e-06, "loss": 17.0673, "step": 10823 }, { "epoch": 0.19785402233717805, "grad_norm": 7.220410031420295, "learning_rate": 9.279310873162059e-06, "loss": 17.7454, "step": 10824 }, { "epoch": 0.1978723015336246, "grad_norm": 6.445500181237727, "learning_rate": 9.279157767322673e-06, "loss": 17.4973, "step": 10825 }, { "epoch": 0.1978905807300711, "grad_norm": 7.370052113078593, "learning_rate": 9.27900464648521e-06, "loss": 18.4546, "step": 10826 }, { "epoch": 0.19790885992651763, "grad_norm": 7.436753759920104, "learning_rate": 9.278851510650207e-06, "loss": 17.5844, "step": 10827 }, { "epoch": 0.19792713912296414, "grad_norm": 6.357018547397667, "learning_rate": 9.2786983598182e-06, "loss": 17.5205, "step": 10828 }, { "epoch": 0.19794541831941068, "grad_norm": 6.274501885764996, "learning_rate": 9.278545193989728e-06, "loss": 17.3663, "step": 10829 }, { "epoch": 0.1979636975158572, "grad_norm": 6.895095266400871, "learning_rate": 9.278392013165325e-06, "loss": 17.6496, "step": 10830 }, { "epoch": 0.19798197671230372, "grad_norm": 5.365151583121495, "learning_rate": 9.27823881734553e-06, "loss": 17.0242, "step": 10831 }, { "epoch": 0.19800025590875026, "grad_norm": 7.001238935540446, "learning_rate": 9.278085606530879e-06, "loss": 17.775, "step": 10832 }, { "epoch": 0.19801853510519676, "grad_norm": 7.1434068090519265, "learning_rate": 9.277932380721908e-06, "loss": 17.6297, "step": 10833 }, { "epoch": 0.1980368143016433, "grad_norm": 7.27685512217264, "learning_rate": 9.277779139919157e-06, "loss": 17.9849, "step": 10834 }, { "epoch": 0.19805509349808983, "grad_norm": 6.499332836240062, "learning_rate": 9.277625884123162e-06, "loss": 17.5162, "step": 10835 }, { "epoch": 0.19807337269453634, "grad_norm": 7.142852026254099, "learning_rate": 9.277472613334457e-06, "loss": 17.7445, "step": 10836 }, { "epoch": 0.19809165189098288, "grad_norm": 8.129783649562425, "learning_rate": 9.277319327553584e-06, "loss": 18.3967, "step": 10837 }, { "epoch": 0.1981099310874294, "grad_norm": 6.8305374317355705, "learning_rate": 9.277166026781076e-06, "loss": 17.5188, "step": 10838 }, { "epoch": 0.19812821028387592, "grad_norm": 6.344683701928417, "learning_rate": 9.277012711017473e-06, "loss": 17.4358, "step": 10839 }, { "epoch": 0.19814648948032246, "grad_norm": 7.054374100658382, "learning_rate": 9.276859380263313e-06, "loss": 17.6545, "step": 10840 }, { "epoch": 0.19816476867676897, "grad_norm": 6.621685344435275, "learning_rate": 9.276706034519129e-06, "loss": 17.7552, "step": 10841 }, { "epoch": 0.1981830478732155, "grad_norm": 6.43218685786527, "learning_rate": 9.276552673785464e-06, "loss": 17.2751, "step": 10842 }, { "epoch": 0.198201327069662, "grad_norm": 5.896436973847265, "learning_rate": 9.276399298062853e-06, "loss": 16.996, "step": 10843 }, { "epoch": 0.19821960626610854, "grad_norm": 12.692140193254197, "learning_rate": 9.276245907351831e-06, "loss": 18.7467, "step": 10844 }, { "epoch": 0.19823788546255505, "grad_norm": 8.392272527927158, "learning_rate": 9.27609250165294e-06, "loss": 18.1811, "step": 10845 }, { "epoch": 0.1982561646590016, "grad_norm": 6.3733792969341, "learning_rate": 9.275939080966716e-06, "loss": 17.4744, "step": 10846 }, { "epoch": 0.19827444385544812, "grad_norm": 6.670479841457684, "learning_rate": 9.275785645293697e-06, "loss": 17.6064, "step": 10847 }, { "epoch": 0.19829272305189463, "grad_norm": 7.498507478063741, "learning_rate": 9.27563219463442e-06, "loss": 17.9733, "step": 10848 }, { "epoch": 0.19831100224834117, "grad_norm": 6.610138137691433, "learning_rate": 9.275478728989422e-06, "loss": 17.5745, "step": 10849 }, { "epoch": 0.19832928144478768, "grad_norm": 10.92354219974126, "learning_rate": 9.275325248359245e-06, "loss": 18.6201, "step": 10850 }, { "epoch": 0.1983475606412342, "grad_norm": 7.570411642303618, "learning_rate": 9.27517175274442e-06, "loss": 18.1232, "step": 10851 }, { "epoch": 0.19836583983768075, "grad_norm": 7.008058196705809, "learning_rate": 9.27501824214549e-06, "loss": 17.7092, "step": 10852 }, { "epoch": 0.19838411903412725, "grad_norm": 6.787764942709426, "learning_rate": 9.274864716562993e-06, "loss": 17.4613, "step": 10853 }, { "epoch": 0.1984023982305738, "grad_norm": 7.169628550706973, "learning_rate": 9.274711175997466e-06, "loss": 17.7941, "step": 10854 }, { "epoch": 0.1984206774270203, "grad_norm": 6.785940497665481, "learning_rate": 9.274557620449448e-06, "loss": 17.6994, "step": 10855 }, { "epoch": 0.19843895662346683, "grad_norm": 5.900930090399167, "learning_rate": 9.274404049919475e-06, "loss": 17.4232, "step": 10856 }, { "epoch": 0.19845723581991337, "grad_norm": 6.298469309316371, "learning_rate": 9.274250464408087e-06, "loss": 17.6218, "step": 10857 }, { "epoch": 0.19847551501635988, "grad_norm": 7.4420262749846, "learning_rate": 9.274096863915824e-06, "loss": 18.0382, "step": 10858 }, { "epoch": 0.1984937942128064, "grad_norm": 8.457845366616512, "learning_rate": 9.27394324844322e-06, "loss": 18.1733, "step": 10859 }, { "epoch": 0.19851207340925292, "grad_norm": 6.5472351756218075, "learning_rate": 9.273789617990818e-06, "loss": 17.4029, "step": 10860 }, { "epoch": 0.19853035260569946, "grad_norm": 6.117378900362806, "learning_rate": 9.273635972559152e-06, "loss": 17.3464, "step": 10861 }, { "epoch": 0.19854863180214596, "grad_norm": 6.69009016103747, "learning_rate": 9.273482312148766e-06, "loss": 17.4837, "step": 10862 }, { "epoch": 0.1985669109985925, "grad_norm": 6.675722361896055, "learning_rate": 9.27332863676019e-06, "loss": 17.777, "step": 10863 }, { "epoch": 0.19858519019503904, "grad_norm": 6.670294770073691, "learning_rate": 9.273174946393973e-06, "loss": 17.6327, "step": 10864 }, { "epoch": 0.19860346939148554, "grad_norm": 5.8263897901825334, "learning_rate": 9.273021241050645e-06, "loss": 17.1493, "step": 10865 }, { "epoch": 0.19862174858793208, "grad_norm": 6.1740388700652105, "learning_rate": 9.27286752073075e-06, "loss": 17.3768, "step": 10866 }, { "epoch": 0.1986400277843786, "grad_norm": 5.703210716908118, "learning_rate": 9.272713785434827e-06, "loss": 17.1263, "step": 10867 }, { "epoch": 0.19865830698082512, "grad_norm": 7.443573776636084, "learning_rate": 9.27256003516341e-06, "loss": 18.0349, "step": 10868 }, { "epoch": 0.19867658617727166, "grad_norm": 5.991726657722256, "learning_rate": 9.272406269917041e-06, "loss": 17.1115, "step": 10869 }, { "epoch": 0.19869486537371817, "grad_norm": 7.437226033210058, "learning_rate": 9.27225248969626e-06, "loss": 18.4094, "step": 10870 }, { "epoch": 0.1987131445701647, "grad_norm": 7.523218516674712, "learning_rate": 9.272098694501604e-06, "loss": 17.7628, "step": 10871 }, { "epoch": 0.1987314237666112, "grad_norm": 6.54169780413771, "learning_rate": 9.271944884333614e-06, "loss": 17.5334, "step": 10872 }, { "epoch": 0.19874970296305774, "grad_norm": 7.856703845300767, "learning_rate": 9.271791059192828e-06, "loss": 18.1138, "step": 10873 }, { "epoch": 0.19876798215950428, "grad_norm": 7.311420298885436, "learning_rate": 9.271637219079784e-06, "loss": 17.978, "step": 10874 }, { "epoch": 0.1987862613559508, "grad_norm": 7.086903096477535, "learning_rate": 9.27148336399502e-06, "loss": 17.587, "step": 10875 }, { "epoch": 0.19880454055239732, "grad_norm": 6.838484668499034, "learning_rate": 9.27132949393908e-06, "loss": 17.5756, "step": 10876 }, { "epoch": 0.19882281974884383, "grad_norm": 6.848692496481984, "learning_rate": 9.271175608912501e-06, "loss": 17.5735, "step": 10877 }, { "epoch": 0.19884109894529037, "grad_norm": 8.390905815444885, "learning_rate": 9.271021708915822e-06, "loss": 18.1065, "step": 10878 }, { "epoch": 0.19885937814173688, "grad_norm": 6.230535545187577, "learning_rate": 9.270867793949583e-06, "loss": 17.6444, "step": 10879 }, { "epoch": 0.1988776573381834, "grad_norm": 6.248780898488394, "learning_rate": 9.270713864014321e-06, "loss": 17.6363, "step": 10880 }, { "epoch": 0.19889593653462995, "grad_norm": 6.088296431862701, "learning_rate": 9.270559919110579e-06, "loss": 17.0838, "step": 10881 }, { "epoch": 0.19891421573107645, "grad_norm": 7.964064373572228, "learning_rate": 9.270405959238896e-06, "loss": 17.9265, "step": 10882 }, { "epoch": 0.198932494927523, "grad_norm": 6.243094934678688, "learning_rate": 9.270251984399807e-06, "loss": 17.4484, "step": 10883 }, { "epoch": 0.1989507741239695, "grad_norm": 7.907115831522865, "learning_rate": 9.270097994593859e-06, "loss": 18.049, "step": 10884 }, { "epoch": 0.19896905332041603, "grad_norm": 6.272438758235311, "learning_rate": 9.269943989821587e-06, "loss": 17.3441, "step": 10885 }, { "epoch": 0.19898733251686257, "grad_norm": 6.211685599992287, "learning_rate": 9.269789970083531e-06, "loss": 17.1009, "step": 10886 }, { "epoch": 0.19900561171330908, "grad_norm": 5.111757953037374, "learning_rate": 9.269635935380233e-06, "loss": 16.9119, "step": 10887 }, { "epoch": 0.1990238909097556, "grad_norm": 7.02040051715025, "learning_rate": 9.26948188571223e-06, "loss": 17.9046, "step": 10888 }, { "epoch": 0.19904217010620212, "grad_norm": 5.70667606270827, "learning_rate": 9.269327821080064e-06, "loss": 17.2795, "step": 10889 }, { "epoch": 0.19906044930264866, "grad_norm": 6.159647776524561, "learning_rate": 9.269173741484277e-06, "loss": 17.2703, "step": 10890 }, { "epoch": 0.1990787284990952, "grad_norm": 7.719437738199956, "learning_rate": 9.269019646925404e-06, "loss": 17.9644, "step": 10891 }, { "epoch": 0.1990970076955417, "grad_norm": 5.748278172028444, "learning_rate": 9.268865537403987e-06, "loss": 17.1837, "step": 10892 }, { "epoch": 0.19911528689198824, "grad_norm": 6.12576407150042, "learning_rate": 9.268711412920567e-06, "loss": 17.3183, "step": 10893 }, { "epoch": 0.19913356608843474, "grad_norm": 7.2380097679109845, "learning_rate": 9.268557273475685e-06, "loss": 17.6635, "step": 10894 }, { "epoch": 0.19915184528488128, "grad_norm": 7.244045805527676, "learning_rate": 9.26840311906988e-06, "loss": 17.9317, "step": 10895 }, { "epoch": 0.1991701244813278, "grad_norm": 7.473918145896732, "learning_rate": 9.268248949703693e-06, "loss": 17.9833, "step": 10896 }, { "epoch": 0.19918840367777432, "grad_norm": 9.104871874023008, "learning_rate": 9.268094765377662e-06, "loss": 18.0553, "step": 10897 }, { "epoch": 0.19920668287422086, "grad_norm": 6.944195293043811, "learning_rate": 9.26794056609233e-06, "loss": 17.1542, "step": 10898 }, { "epoch": 0.19922496207066737, "grad_norm": 5.977489454455816, "learning_rate": 9.267786351848238e-06, "loss": 17.3206, "step": 10899 }, { "epoch": 0.1992432412671139, "grad_norm": 7.571865722419859, "learning_rate": 9.267632122645924e-06, "loss": 18.0964, "step": 10900 }, { "epoch": 0.1992615204635604, "grad_norm": 7.121409349012491, "learning_rate": 9.26747787848593e-06, "loss": 17.2025, "step": 10901 }, { "epoch": 0.19927979966000695, "grad_norm": 7.834341990595362, "learning_rate": 9.267323619368795e-06, "loss": 17.7799, "step": 10902 }, { "epoch": 0.19929807885645348, "grad_norm": 6.495101935179148, "learning_rate": 9.267169345295063e-06, "loss": 17.5505, "step": 10903 }, { "epoch": 0.1993163580529, "grad_norm": 8.68647864663089, "learning_rate": 9.267015056265272e-06, "loss": 18.4633, "step": 10904 }, { "epoch": 0.19933463724934652, "grad_norm": 5.533154408312759, "learning_rate": 9.266860752279964e-06, "loss": 17.2265, "step": 10905 }, { "epoch": 0.19935291644579303, "grad_norm": 9.045653357664435, "learning_rate": 9.266706433339678e-06, "loss": 18.4361, "step": 10906 }, { "epoch": 0.19937119564223957, "grad_norm": 6.681364541307185, "learning_rate": 9.266552099444957e-06, "loss": 17.7212, "step": 10907 }, { "epoch": 0.1993894748386861, "grad_norm": 8.276329180423547, "learning_rate": 9.26639775059634e-06, "loss": 18.4311, "step": 10908 }, { "epoch": 0.1994077540351326, "grad_norm": 7.544906962956952, "learning_rate": 9.266243386794372e-06, "loss": 17.7837, "step": 10909 }, { "epoch": 0.19942603323157915, "grad_norm": 6.3703401990947155, "learning_rate": 9.266089008039589e-06, "loss": 17.3522, "step": 10910 }, { "epoch": 0.19944431242802566, "grad_norm": 7.063462640206068, "learning_rate": 9.265934614332534e-06, "loss": 17.6295, "step": 10911 }, { "epoch": 0.1994625916244722, "grad_norm": 7.051708868623823, "learning_rate": 9.265780205673749e-06, "loss": 17.9861, "step": 10912 }, { "epoch": 0.1994808708209187, "grad_norm": 6.990076011496199, "learning_rate": 9.265625782063774e-06, "loss": 17.8551, "step": 10913 }, { "epoch": 0.19949915001736523, "grad_norm": 6.814238402632026, "learning_rate": 9.265471343503152e-06, "loss": 17.6684, "step": 10914 }, { "epoch": 0.19951742921381177, "grad_norm": 6.723381483146955, "learning_rate": 9.265316889992422e-06, "loss": 18.0331, "step": 10915 }, { "epoch": 0.19953570841025828, "grad_norm": 6.216760276952046, "learning_rate": 9.265162421532126e-06, "loss": 17.3248, "step": 10916 }, { "epoch": 0.1995539876067048, "grad_norm": 7.183698849365839, "learning_rate": 9.265007938122807e-06, "loss": 17.9315, "step": 10917 }, { "epoch": 0.19957226680315132, "grad_norm": 7.490734028144163, "learning_rate": 9.264853439765005e-06, "loss": 18.2393, "step": 10918 }, { "epoch": 0.19959054599959786, "grad_norm": 8.24745633213347, "learning_rate": 9.264698926459261e-06, "loss": 18.0886, "step": 10919 }, { "epoch": 0.1996088251960444, "grad_norm": 6.113159803142408, "learning_rate": 9.264544398206119e-06, "loss": 17.486, "step": 10920 }, { "epoch": 0.1996271043924909, "grad_norm": 8.206470583068661, "learning_rate": 9.264389855006118e-06, "loss": 18.1456, "step": 10921 }, { "epoch": 0.19964538358893744, "grad_norm": 7.452119559441181, "learning_rate": 9.2642352968598e-06, "loss": 17.9846, "step": 10922 }, { "epoch": 0.19966366278538394, "grad_norm": 6.938408694079041, "learning_rate": 9.264080723767707e-06, "loss": 17.8101, "step": 10923 }, { "epoch": 0.19968194198183048, "grad_norm": 6.019695578849084, "learning_rate": 9.263926135730383e-06, "loss": 17.3154, "step": 10924 }, { "epoch": 0.19970022117827702, "grad_norm": 6.782514032171597, "learning_rate": 9.263771532748367e-06, "loss": 17.5067, "step": 10925 }, { "epoch": 0.19971850037472352, "grad_norm": 6.118197987069956, "learning_rate": 9.263616914822201e-06, "loss": 17.0961, "step": 10926 }, { "epoch": 0.19973677957117006, "grad_norm": 7.035595199165753, "learning_rate": 9.26346228195243e-06, "loss": 17.7247, "step": 10927 }, { "epoch": 0.19975505876761657, "grad_norm": 6.921837355340135, "learning_rate": 9.26330763413959e-06, "loss": 17.6363, "step": 10928 }, { "epoch": 0.1997733379640631, "grad_norm": 7.238623576682792, "learning_rate": 9.26315297138423e-06, "loss": 17.6164, "step": 10929 }, { "epoch": 0.1997916171605096, "grad_norm": 9.928512166924047, "learning_rate": 9.262998293686888e-06, "loss": 17.7197, "step": 10930 }, { "epoch": 0.19980989635695615, "grad_norm": 5.792244353120198, "learning_rate": 9.262843601048104e-06, "loss": 17.2798, "step": 10931 }, { "epoch": 0.19982817555340268, "grad_norm": 5.401133240098436, "learning_rate": 9.262688893468426e-06, "loss": 17.0325, "step": 10932 }, { "epoch": 0.1998464547498492, "grad_norm": 6.911698034065736, "learning_rate": 9.262534170948392e-06, "loss": 17.7217, "step": 10933 }, { "epoch": 0.19986473394629573, "grad_norm": 7.515353464874539, "learning_rate": 9.262379433488547e-06, "loss": 17.8443, "step": 10934 }, { "epoch": 0.19988301314274223, "grad_norm": 6.765047061950747, "learning_rate": 9.262224681089432e-06, "loss": 17.6567, "step": 10935 }, { "epoch": 0.19990129233918877, "grad_norm": 6.717639189321588, "learning_rate": 9.262069913751589e-06, "loss": 17.6313, "step": 10936 }, { "epoch": 0.1999195715356353, "grad_norm": 7.848672015083547, "learning_rate": 9.261915131475561e-06, "loss": 17.9072, "step": 10937 }, { "epoch": 0.1999378507320818, "grad_norm": 7.656189118648847, "learning_rate": 9.261760334261888e-06, "loss": 18.2532, "step": 10938 }, { "epoch": 0.19995612992852835, "grad_norm": 6.906178224734655, "learning_rate": 9.261605522111117e-06, "loss": 17.8904, "step": 10939 }, { "epoch": 0.19997440912497486, "grad_norm": 5.500725992902303, "learning_rate": 9.261450695023789e-06, "loss": 17.2585, "step": 10940 }, { "epoch": 0.1999926883214214, "grad_norm": 7.063104493861325, "learning_rate": 9.261295853000445e-06, "loss": 17.7415, "step": 10941 }, { "epoch": 0.20001096751786793, "grad_norm": 7.882567722388475, "learning_rate": 9.26114099604163e-06, "loss": 18.3038, "step": 10942 }, { "epoch": 0.20002924671431443, "grad_norm": 6.572875620156166, "learning_rate": 9.260986124147884e-06, "loss": 17.6132, "step": 10943 }, { "epoch": 0.20004752591076097, "grad_norm": 6.43107508416896, "learning_rate": 9.260831237319752e-06, "loss": 17.4252, "step": 10944 }, { "epoch": 0.20006580510720748, "grad_norm": 6.133192475896936, "learning_rate": 9.260676335557774e-06, "loss": 17.3416, "step": 10945 }, { "epoch": 0.20008408430365401, "grad_norm": 5.888722241992663, "learning_rate": 9.260521418862498e-06, "loss": 17.3938, "step": 10946 }, { "epoch": 0.20010236350010052, "grad_norm": 8.157620817807263, "learning_rate": 9.260366487234465e-06, "loss": 18.2197, "step": 10947 }, { "epoch": 0.20012064269654706, "grad_norm": 7.065616268419404, "learning_rate": 9.260211540674215e-06, "loss": 17.7513, "step": 10948 }, { "epoch": 0.2001389218929936, "grad_norm": 6.562597400109817, "learning_rate": 9.260056579182292e-06, "loss": 17.3057, "step": 10949 }, { "epoch": 0.2001572010894401, "grad_norm": 6.39383651983612, "learning_rate": 9.259901602759244e-06, "loss": 17.6876, "step": 10950 }, { "epoch": 0.20017548028588664, "grad_norm": 6.396783115609105, "learning_rate": 9.25974661140561e-06, "loss": 17.347, "step": 10951 }, { "epoch": 0.20019375948233314, "grad_norm": 5.385791227480741, "learning_rate": 9.259591605121932e-06, "loss": 17.1368, "step": 10952 }, { "epoch": 0.20021203867877968, "grad_norm": 6.698850836391977, "learning_rate": 9.259436583908754e-06, "loss": 17.3805, "step": 10953 }, { "epoch": 0.20023031787522622, "grad_norm": 6.7136310001901744, "learning_rate": 9.259281547766623e-06, "loss": 17.939, "step": 10954 }, { "epoch": 0.20024859707167272, "grad_norm": 6.801087623392054, "learning_rate": 9.259126496696079e-06, "loss": 17.7017, "step": 10955 }, { "epoch": 0.20026687626811926, "grad_norm": 7.7693655840737526, "learning_rate": 9.258971430697666e-06, "loss": 17.7556, "step": 10956 }, { "epoch": 0.20028515546456577, "grad_norm": 6.9253554573314515, "learning_rate": 9.258816349771927e-06, "loss": 17.4131, "step": 10957 }, { "epoch": 0.2003034346610123, "grad_norm": 6.094023118280109, "learning_rate": 9.258661253919408e-06, "loss": 17.5846, "step": 10958 }, { "epoch": 0.20032171385745884, "grad_norm": 6.475855412892942, "learning_rate": 9.25850614314065e-06, "loss": 17.5506, "step": 10959 }, { "epoch": 0.20033999305390535, "grad_norm": 6.121848840087118, "learning_rate": 9.258351017436196e-06, "loss": 17.2962, "step": 10960 }, { "epoch": 0.20035827225035188, "grad_norm": 6.760167166388431, "learning_rate": 9.258195876806593e-06, "loss": 17.7835, "step": 10961 }, { "epoch": 0.2003765514467984, "grad_norm": 5.917783471813344, "learning_rate": 9.258040721252383e-06, "loss": 17.2104, "step": 10962 }, { "epoch": 0.20039483064324493, "grad_norm": 8.432581227712953, "learning_rate": 9.257885550774108e-06, "loss": 18.203, "step": 10963 }, { "epoch": 0.20041310983969143, "grad_norm": 6.43125791912894, "learning_rate": 9.257730365372315e-06, "loss": 17.3735, "step": 10964 }, { "epoch": 0.20043138903613797, "grad_norm": 6.348945505247846, "learning_rate": 9.257575165047547e-06, "loss": 17.5365, "step": 10965 }, { "epoch": 0.2004496682325845, "grad_norm": 5.887554857610085, "learning_rate": 9.257419949800347e-06, "loss": 17.173, "step": 10966 }, { "epoch": 0.200467947429031, "grad_norm": 7.285541336636381, "learning_rate": 9.257264719631259e-06, "loss": 17.8792, "step": 10967 }, { "epoch": 0.20048622662547755, "grad_norm": 7.007494663319904, "learning_rate": 9.257109474540828e-06, "loss": 17.6362, "step": 10968 }, { "epoch": 0.20050450582192406, "grad_norm": 6.328330410958518, "learning_rate": 9.256954214529599e-06, "loss": 17.5554, "step": 10969 }, { "epoch": 0.2005227850183706, "grad_norm": 6.510092270616266, "learning_rate": 9.256798939598113e-06, "loss": 17.5696, "step": 10970 }, { "epoch": 0.20054106421481713, "grad_norm": 5.909293919661958, "learning_rate": 9.256643649746917e-06, "loss": 17.2425, "step": 10971 }, { "epoch": 0.20055934341126364, "grad_norm": 6.915669059292693, "learning_rate": 9.256488344976552e-06, "loss": 17.5665, "step": 10972 }, { "epoch": 0.20057762260771017, "grad_norm": 7.034467967793795, "learning_rate": 9.256333025287569e-06, "loss": 17.6718, "step": 10973 }, { "epoch": 0.20059590180415668, "grad_norm": 6.076594057259614, "learning_rate": 9.256177690680506e-06, "loss": 17.0781, "step": 10974 }, { "epoch": 0.20061418100060321, "grad_norm": 5.794475106139136, "learning_rate": 9.256022341155909e-06, "loss": 17.5427, "step": 10975 }, { "epoch": 0.20063246019704975, "grad_norm": 5.456558611233308, "learning_rate": 9.255866976714323e-06, "loss": 17.1452, "step": 10976 }, { "epoch": 0.20065073939349626, "grad_norm": 7.4855302093332075, "learning_rate": 9.255711597356293e-06, "loss": 17.6005, "step": 10977 }, { "epoch": 0.2006690185899428, "grad_norm": 8.035824468405956, "learning_rate": 9.255556203082363e-06, "loss": 17.6279, "step": 10978 }, { "epoch": 0.2006872977863893, "grad_norm": 6.521175712373388, "learning_rate": 9.25540079389308e-06, "loss": 17.6865, "step": 10979 }, { "epoch": 0.20070557698283584, "grad_norm": 6.731090043339445, "learning_rate": 9.255245369788983e-06, "loss": 17.648, "step": 10980 }, { "epoch": 0.20072385617928235, "grad_norm": 8.79038401908676, "learning_rate": 9.255089930770621e-06, "loss": 17.3754, "step": 10981 }, { "epoch": 0.20074213537572888, "grad_norm": 6.29924407503212, "learning_rate": 9.254934476838539e-06, "loss": 17.3005, "step": 10982 }, { "epoch": 0.20076041457217542, "grad_norm": 5.544228481101844, "learning_rate": 9.254779007993281e-06, "loss": 17.005, "step": 10983 }, { "epoch": 0.20077869376862192, "grad_norm": 8.489774790847997, "learning_rate": 9.254623524235392e-06, "loss": 18.2677, "step": 10984 }, { "epoch": 0.20079697296506846, "grad_norm": 7.079374417733842, "learning_rate": 9.254468025565414e-06, "loss": 17.6445, "step": 10985 }, { "epoch": 0.20081525216151497, "grad_norm": 7.550008096182841, "learning_rate": 9.254312511983898e-06, "loss": 17.9238, "step": 10986 }, { "epoch": 0.2008335313579615, "grad_norm": 6.945211437259713, "learning_rate": 9.254156983491385e-06, "loss": 17.7057, "step": 10987 }, { "epoch": 0.20085181055440804, "grad_norm": 7.7531398968572685, "learning_rate": 9.25400144008842e-06, "loss": 17.6414, "step": 10988 }, { "epoch": 0.20087008975085455, "grad_norm": 6.249767722058566, "learning_rate": 9.25384588177555e-06, "loss": 17.4012, "step": 10989 }, { "epoch": 0.20088836894730108, "grad_norm": 7.076621240543565, "learning_rate": 9.253690308553318e-06, "loss": 17.9639, "step": 10990 }, { "epoch": 0.2009066481437476, "grad_norm": 6.622467245554146, "learning_rate": 9.253534720422272e-06, "loss": 17.5139, "step": 10991 }, { "epoch": 0.20092492734019413, "grad_norm": 10.602283253980888, "learning_rate": 9.253379117382957e-06, "loss": 18.7819, "step": 10992 }, { "epoch": 0.20094320653664066, "grad_norm": 7.706285912694962, "learning_rate": 9.253223499435916e-06, "loss": 17.9942, "step": 10993 }, { "epoch": 0.20096148573308717, "grad_norm": 7.467509917385291, "learning_rate": 9.253067866581696e-06, "loss": 17.9538, "step": 10994 }, { "epoch": 0.2009797649295337, "grad_norm": 7.20151835260283, "learning_rate": 9.25291221882084e-06, "loss": 17.9342, "step": 10995 }, { "epoch": 0.2009980441259802, "grad_norm": 6.681603235474797, "learning_rate": 9.252756556153898e-06, "loss": 17.5466, "step": 10996 }, { "epoch": 0.20101632332242675, "grad_norm": 5.785585526516853, "learning_rate": 9.252600878581413e-06, "loss": 17.3131, "step": 10997 }, { "epoch": 0.20103460251887326, "grad_norm": 6.260524820848509, "learning_rate": 9.252445186103931e-06, "loss": 17.3714, "step": 10998 }, { "epoch": 0.2010528817153198, "grad_norm": 5.946378521976767, "learning_rate": 9.252289478721996e-06, "loss": 17.1623, "step": 10999 }, { "epoch": 0.20107116091176633, "grad_norm": 6.690654018343409, "learning_rate": 9.252133756436158e-06, "loss": 17.7501, "step": 11000 }, { "epoch": 0.20108944010821284, "grad_norm": 6.182391269665302, "learning_rate": 9.251978019246957e-06, "loss": 17.2598, "step": 11001 }, { "epoch": 0.20110771930465937, "grad_norm": 6.812728097434233, "learning_rate": 9.251822267154946e-06, "loss": 17.5469, "step": 11002 }, { "epoch": 0.20112599850110588, "grad_norm": 7.386204815518348, "learning_rate": 9.251666500160663e-06, "loss": 17.613, "step": 11003 }, { "epoch": 0.20114427769755241, "grad_norm": 6.417796772408095, "learning_rate": 9.251510718264661e-06, "loss": 17.3802, "step": 11004 }, { "epoch": 0.20116255689399895, "grad_norm": 7.005406068602088, "learning_rate": 9.251354921467482e-06, "loss": 17.8147, "step": 11005 }, { "epoch": 0.20118083609044546, "grad_norm": 7.020304115906132, "learning_rate": 9.25119910976967e-06, "loss": 17.8303, "step": 11006 }, { "epoch": 0.201199115286892, "grad_norm": 7.980386483023567, "learning_rate": 9.251043283171777e-06, "loss": 18.3758, "step": 11007 }, { "epoch": 0.2012173944833385, "grad_norm": 7.526697060476995, "learning_rate": 9.250887441674345e-06, "loss": 18.1684, "step": 11008 }, { "epoch": 0.20123567367978504, "grad_norm": 7.133275257449116, "learning_rate": 9.250731585277924e-06, "loss": 17.7657, "step": 11009 }, { "epoch": 0.20125395287623157, "grad_norm": 7.324366944288287, "learning_rate": 9.250575713983056e-06, "loss": 17.7502, "step": 11010 }, { "epoch": 0.20127223207267808, "grad_norm": 5.644457924810181, "learning_rate": 9.250419827790287e-06, "loss": 17.3969, "step": 11011 }, { "epoch": 0.20129051126912462, "grad_norm": 7.12453096673587, "learning_rate": 9.250263926700168e-06, "loss": 17.6701, "step": 11012 }, { "epoch": 0.20130879046557112, "grad_norm": 7.7097577919955365, "learning_rate": 9.25010801071324e-06, "loss": 17.8423, "step": 11013 }, { "epoch": 0.20132706966201766, "grad_norm": 7.522354809228273, "learning_rate": 9.249952079830055e-06, "loss": 17.8738, "step": 11014 }, { "epoch": 0.20134534885846417, "grad_norm": 7.91721991854234, "learning_rate": 9.249796134051156e-06, "loss": 18.1216, "step": 11015 }, { "epoch": 0.2013636280549107, "grad_norm": 7.953338394016145, "learning_rate": 9.24964017337709e-06, "loss": 18.6414, "step": 11016 }, { "epoch": 0.20138190725135724, "grad_norm": 5.907648607896258, "learning_rate": 9.249484197808405e-06, "loss": 17.4272, "step": 11017 }, { "epoch": 0.20140018644780375, "grad_norm": 7.452040030432068, "learning_rate": 9.249328207345645e-06, "loss": 18.0281, "step": 11018 }, { "epoch": 0.20141846564425028, "grad_norm": 12.690281287890302, "learning_rate": 9.24917220198936e-06, "loss": 17.2009, "step": 11019 }, { "epoch": 0.2014367448406968, "grad_norm": 5.707084774368487, "learning_rate": 9.249016181740093e-06, "loss": 17.3794, "step": 11020 }, { "epoch": 0.20145502403714333, "grad_norm": 7.7033771874914425, "learning_rate": 9.248860146598395e-06, "loss": 18.2721, "step": 11021 }, { "epoch": 0.20147330323358986, "grad_norm": 7.073375834259554, "learning_rate": 9.24870409656481e-06, "loss": 17.5917, "step": 11022 }, { "epoch": 0.20149158243003637, "grad_norm": 5.304524765367442, "learning_rate": 9.248548031639885e-06, "loss": 17.1065, "step": 11023 }, { "epoch": 0.2015098616264829, "grad_norm": 8.004742125968127, "learning_rate": 9.24839195182417e-06, "loss": 18.5089, "step": 11024 }, { "epoch": 0.2015281408229294, "grad_norm": 7.681500562120354, "learning_rate": 9.248235857118209e-06, "loss": 17.8945, "step": 11025 }, { "epoch": 0.20154642001937595, "grad_norm": 6.671975679551059, "learning_rate": 9.248079747522549e-06, "loss": 17.4852, "step": 11026 }, { "epoch": 0.20156469921582248, "grad_norm": 6.981758276056119, "learning_rate": 9.247923623037739e-06, "loss": 18.1079, "step": 11027 }, { "epoch": 0.201582978412269, "grad_norm": 8.387298847381826, "learning_rate": 9.247767483664325e-06, "loss": 18.0783, "step": 11028 }, { "epoch": 0.20160125760871553, "grad_norm": 5.622659635224309, "learning_rate": 9.247611329402854e-06, "loss": 17.0959, "step": 11029 }, { "epoch": 0.20161953680516204, "grad_norm": 6.730399075504475, "learning_rate": 9.247455160253874e-06, "loss": 17.6984, "step": 11030 }, { "epoch": 0.20163781600160857, "grad_norm": 6.899416727690391, "learning_rate": 9.247298976217933e-06, "loss": 17.6986, "step": 11031 }, { "epoch": 0.20165609519805508, "grad_norm": 6.0520197184089595, "learning_rate": 9.247142777295578e-06, "loss": 17.3721, "step": 11032 }, { "epoch": 0.20167437439450162, "grad_norm": 6.393644475255894, "learning_rate": 9.246986563487355e-06, "loss": 17.461, "step": 11033 }, { "epoch": 0.20169265359094815, "grad_norm": 6.182542631902963, "learning_rate": 9.246830334793812e-06, "loss": 17.4043, "step": 11034 }, { "epoch": 0.20171093278739466, "grad_norm": 7.691680825883224, "learning_rate": 9.246674091215499e-06, "loss": 17.9927, "step": 11035 }, { "epoch": 0.2017292119838412, "grad_norm": 5.994311075954899, "learning_rate": 9.246517832752961e-06, "loss": 17.284, "step": 11036 }, { "epoch": 0.2017474911802877, "grad_norm": 5.222483291875188, "learning_rate": 9.246361559406747e-06, "loss": 16.9912, "step": 11037 }, { "epoch": 0.20176577037673424, "grad_norm": 6.857095782740968, "learning_rate": 9.246205271177405e-06, "loss": 17.9019, "step": 11038 }, { "epoch": 0.20178404957318077, "grad_norm": 6.771680986195066, "learning_rate": 9.246048968065479e-06, "loss": 17.588, "step": 11039 }, { "epoch": 0.20180232876962728, "grad_norm": 6.828068076002624, "learning_rate": 9.245892650071521e-06, "loss": 17.5896, "step": 11040 }, { "epoch": 0.20182060796607382, "grad_norm": 18.483989187270236, "learning_rate": 9.245736317196079e-06, "loss": 17.829, "step": 11041 }, { "epoch": 0.20183888716252033, "grad_norm": 6.282962753229796, "learning_rate": 9.2455799694397e-06, "loss": 17.3659, "step": 11042 }, { "epoch": 0.20185716635896686, "grad_norm": 6.50966517020041, "learning_rate": 9.24542360680293e-06, "loss": 17.7501, "step": 11043 }, { "epoch": 0.2018754455554134, "grad_norm": 6.407674126181323, "learning_rate": 9.245267229286319e-06, "loss": 17.4041, "step": 11044 }, { "epoch": 0.2018937247518599, "grad_norm": 6.749213381678077, "learning_rate": 9.245110836890415e-06, "loss": 17.6395, "step": 11045 }, { "epoch": 0.20191200394830644, "grad_norm": 6.644572490671224, "learning_rate": 9.244954429615766e-06, "loss": 17.4293, "step": 11046 }, { "epoch": 0.20193028314475295, "grad_norm": 6.201464127877089, "learning_rate": 9.244798007462919e-06, "loss": 17.2814, "step": 11047 }, { "epoch": 0.20194856234119948, "grad_norm": 6.999797919378978, "learning_rate": 9.244641570432426e-06, "loss": 17.9026, "step": 11048 }, { "epoch": 0.201966841537646, "grad_norm": 8.051882457201522, "learning_rate": 9.24448511852483e-06, "loss": 18.3227, "step": 11049 }, { "epoch": 0.20198512073409253, "grad_norm": 7.266519999310683, "learning_rate": 9.244328651740684e-06, "loss": 17.7956, "step": 11050 }, { "epoch": 0.20200339993053906, "grad_norm": 8.556487865262474, "learning_rate": 9.244172170080532e-06, "loss": 18.994, "step": 11051 }, { "epoch": 0.20202167912698557, "grad_norm": 6.118251722258119, "learning_rate": 9.244015673544925e-06, "loss": 17.5535, "step": 11052 }, { "epoch": 0.2020399583234321, "grad_norm": 7.353448160888481, "learning_rate": 9.243859162134414e-06, "loss": 17.6268, "step": 11053 }, { "epoch": 0.20205823751987861, "grad_norm": 6.1185326123313315, "learning_rate": 9.243702635849542e-06, "loss": 17.36, "step": 11054 }, { "epoch": 0.20207651671632515, "grad_norm": 6.173621161907901, "learning_rate": 9.243546094690863e-06, "loss": 17.5314, "step": 11055 }, { "epoch": 0.20209479591277169, "grad_norm": 7.005533150540325, "learning_rate": 9.243389538658922e-06, "loss": 17.8785, "step": 11056 }, { "epoch": 0.2021130751092182, "grad_norm": 8.048076185664863, "learning_rate": 9.243232967754269e-06, "loss": 18.4834, "step": 11057 }, { "epoch": 0.20213135430566473, "grad_norm": 5.875936160734458, "learning_rate": 9.243076381977453e-06, "loss": 16.9904, "step": 11058 }, { "epoch": 0.20214963350211124, "grad_norm": 6.812158374551983, "learning_rate": 9.242919781329021e-06, "loss": 17.4696, "step": 11059 }, { "epoch": 0.20216791269855777, "grad_norm": 6.78174248427758, "learning_rate": 9.242763165809525e-06, "loss": 17.8872, "step": 11060 }, { "epoch": 0.2021861918950043, "grad_norm": 6.690291000638457, "learning_rate": 9.24260653541951e-06, "loss": 17.7383, "step": 11061 }, { "epoch": 0.20220447109145082, "grad_norm": 7.568173641090415, "learning_rate": 9.24244989015953e-06, "loss": 17.9104, "step": 11062 }, { "epoch": 0.20222275028789735, "grad_norm": 5.596928581622695, "learning_rate": 9.24229323003013e-06, "loss": 17.2972, "step": 11063 }, { "epoch": 0.20224102948434386, "grad_norm": 6.015883396400564, "learning_rate": 9.242136555031862e-06, "loss": 17.4608, "step": 11064 }, { "epoch": 0.2022593086807904, "grad_norm": 6.328501309808907, "learning_rate": 9.241979865165271e-06, "loss": 17.1676, "step": 11065 }, { "epoch": 0.2022775878772369, "grad_norm": 7.222603573510737, "learning_rate": 9.24182316043091e-06, "loss": 17.8221, "step": 11066 }, { "epoch": 0.20229586707368344, "grad_norm": 7.106749197423903, "learning_rate": 9.241666440829326e-06, "loss": 18.1133, "step": 11067 }, { "epoch": 0.20231414627012997, "grad_norm": 5.983886671823255, "learning_rate": 9.241509706361072e-06, "loss": 17.2629, "step": 11068 }, { "epoch": 0.20233242546657648, "grad_norm": 7.179488341236794, "learning_rate": 9.24135295702669e-06, "loss": 17.8632, "step": 11069 }, { "epoch": 0.20235070466302302, "grad_norm": 6.524423525191377, "learning_rate": 9.241196192826738e-06, "loss": 17.7955, "step": 11070 }, { "epoch": 0.20236898385946953, "grad_norm": 6.264568453092564, "learning_rate": 9.24103941376176e-06, "loss": 17.2563, "step": 11071 }, { "epoch": 0.20238726305591606, "grad_norm": 8.58333432459693, "learning_rate": 9.240882619832306e-06, "loss": 18.3223, "step": 11072 }, { "epoch": 0.2024055422523626, "grad_norm": 7.066775398865614, "learning_rate": 9.240725811038927e-06, "loss": 17.8233, "step": 11073 }, { "epoch": 0.2024238214488091, "grad_norm": 6.8026509708611735, "learning_rate": 9.240568987382173e-06, "loss": 17.5153, "step": 11074 }, { "epoch": 0.20244210064525564, "grad_norm": 6.634238330328132, "learning_rate": 9.240412148862591e-06, "loss": 17.5084, "step": 11075 }, { "epoch": 0.20246037984170215, "grad_norm": 8.026273114106043, "learning_rate": 9.240255295480734e-06, "loss": 18.0841, "step": 11076 }, { "epoch": 0.20247865903814868, "grad_norm": 6.690087092426927, "learning_rate": 9.240098427237148e-06, "loss": 17.6633, "step": 11077 }, { "epoch": 0.20249693823459522, "grad_norm": 5.8175423129199775, "learning_rate": 9.239941544132386e-06, "loss": 17.0738, "step": 11078 }, { "epoch": 0.20251521743104173, "grad_norm": 6.414427782891805, "learning_rate": 9.239784646166999e-06, "loss": 17.1785, "step": 11079 }, { "epoch": 0.20253349662748826, "grad_norm": 8.766801582188432, "learning_rate": 9.239627733341531e-06, "loss": 18.1234, "step": 11080 }, { "epoch": 0.20255177582393477, "grad_norm": 7.365357716575713, "learning_rate": 9.239470805656538e-06, "loss": 17.6308, "step": 11081 }, { "epoch": 0.2025700550203813, "grad_norm": 5.164235986921989, "learning_rate": 9.239313863112567e-06, "loss": 16.938, "step": 11082 }, { "epoch": 0.20258833421682781, "grad_norm": 6.453351094731791, "learning_rate": 9.23915690571017e-06, "loss": 17.3139, "step": 11083 }, { "epoch": 0.20260661341327435, "grad_norm": 6.6864016587948, "learning_rate": 9.238999933449894e-06, "loss": 17.4998, "step": 11084 }, { "epoch": 0.20262489260972089, "grad_norm": 6.803955050509063, "learning_rate": 9.238842946332292e-06, "loss": 17.7202, "step": 11085 }, { "epoch": 0.2026431718061674, "grad_norm": 6.7583508701882975, "learning_rate": 9.238685944357913e-06, "loss": 17.5772, "step": 11086 }, { "epoch": 0.20266145100261393, "grad_norm": 7.021391029608066, "learning_rate": 9.238528927527308e-06, "loss": 17.8551, "step": 11087 }, { "epoch": 0.20267973019906044, "grad_norm": 5.875048450870385, "learning_rate": 9.238371895841027e-06, "loss": 17.4093, "step": 11088 }, { "epoch": 0.20269800939550697, "grad_norm": 6.7214107399667835, "learning_rate": 9.23821484929962e-06, "loss": 17.1692, "step": 11089 }, { "epoch": 0.2027162885919535, "grad_norm": 8.274023358814, "learning_rate": 9.238057787903637e-06, "loss": 18.4087, "step": 11090 }, { "epoch": 0.20273456778840002, "grad_norm": 6.979774278339356, "learning_rate": 9.23790071165363e-06, "loss": 17.2612, "step": 11091 }, { "epoch": 0.20275284698484655, "grad_norm": 7.624076237694389, "learning_rate": 9.237743620550148e-06, "loss": 17.7562, "step": 11092 }, { "epoch": 0.20277112618129306, "grad_norm": 7.896095765190013, "learning_rate": 9.237586514593743e-06, "loss": 18.3727, "step": 11093 }, { "epoch": 0.2027894053777396, "grad_norm": 7.956270754415964, "learning_rate": 9.237429393784965e-06, "loss": 18.0114, "step": 11094 }, { "epoch": 0.20280768457418613, "grad_norm": 7.989114623913356, "learning_rate": 9.237272258124365e-06, "loss": 18.1503, "step": 11095 }, { "epoch": 0.20282596377063264, "grad_norm": 7.1258781155501705, "learning_rate": 9.237115107612493e-06, "loss": 17.8629, "step": 11096 }, { "epoch": 0.20284424296707917, "grad_norm": 7.152743907610953, "learning_rate": 9.236957942249902e-06, "loss": 17.8655, "step": 11097 }, { "epoch": 0.20286252216352568, "grad_norm": 7.663056514872933, "learning_rate": 9.23680076203714e-06, "loss": 17.8698, "step": 11098 }, { "epoch": 0.20288080135997222, "grad_norm": 6.719182007292677, "learning_rate": 9.236643566974758e-06, "loss": 17.5382, "step": 11099 }, { "epoch": 0.20289908055641873, "grad_norm": 6.699845952976791, "learning_rate": 9.236486357063307e-06, "loss": 17.601, "step": 11100 }, { "epoch": 0.20291735975286526, "grad_norm": 6.53621108530564, "learning_rate": 9.23632913230334e-06, "loss": 17.469, "step": 11101 }, { "epoch": 0.2029356389493118, "grad_norm": 6.2682005317902645, "learning_rate": 9.236171892695408e-06, "loss": 17.6244, "step": 11102 }, { "epoch": 0.2029539181457583, "grad_norm": 6.7197314798793135, "learning_rate": 9.23601463824006e-06, "loss": 17.5137, "step": 11103 }, { "epoch": 0.20297219734220484, "grad_norm": 6.336006258595372, "learning_rate": 9.23585736893785e-06, "loss": 17.3593, "step": 11104 }, { "epoch": 0.20299047653865135, "grad_norm": 6.137737038681827, "learning_rate": 9.235700084789325e-06, "loss": 17.0645, "step": 11105 }, { "epoch": 0.20300875573509788, "grad_norm": 7.834039642186673, "learning_rate": 9.23554278579504e-06, "loss": 18.0745, "step": 11106 }, { "epoch": 0.20302703493154442, "grad_norm": 6.954668084270081, "learning_rate": 9.235385471955546e-06, "loss": 17.7245, "step": 11107 }, { "epoch": 0.20304531412799093, "grad_norm": 7.721553135183198, "learning_rate": 9.235228143271392e-06, "loss": 17.9186, "step": 11108 }, { "epoch": 0.20306359332443746, "grad_norm": 9.495352412979067, "learning_rate": 9.23507079974313e-06, "loss": 18.7159, "step": 11109 }, { "epoch": 0.20308187252088397, "grad_norm": 5.810202473978871, "learning_rate": 9.234913441371314e-06, "loss": 17.2472, "step": 11110 }, { "epoch": 0.2031001517173305, "grad_norm": 6.442896637173475, "learning_rate": 9.234756068156494e-06, "loss": 17.4719, "step": 11111 }, { "epoch": 0.20311843091377704, "grad_norm": 6.402017573467823, "learning_rate": 9.234598680099222e-06, "loss": 17.6745, "step": 11112 }, { "epoch": 0.20313671011022355, "grad_norm": 5.4501875542114115, "learning_rate": 9.234441277200048e-06, "loss": 17.0627, "step": 11113 }, { "epoch": 0.2031549893066701, "grad_norm": 5.838435571181767, "learning_rate": 9.234283859459525e-06, "loss": 17.2981, "step": 11114 }, { "epoch": 0.2031732685031166, "grad_norm": 7.30484957185883, "learning_rate": 9.234126426878203e-06, "loss": 17.6792, "step": 11115 }, { "epoch": 0.20319154769956313, "grad_norm": 8.679037738220984, "learning_rate": 9.233968979456637e-06, "loss": 18.3425, "step": 11116 }, { "epoch": 0.20320982689600964, "grad_norm": 6.49102380204587, "learning_rate": 9.233811517195378e-06, "loss": 17.5286, "step": 11117 }, { "epoch": 0.20322810609245617, "grad_norm": 6.991123610421662, "learning_rate": 9.233654040094976e-06, "loss": 17.8174, "step": 11118 }, { "epoch": 0.2032463852889027, "grad_norm": 6.168237386342085, "learning_rate": 9.233496548155984e-06, "loss": 17.4737, "step": 11119 }, { "epoch": 0.20326466448534922, "grad_norm": 6.4433712741488485, "learning_rate": 9.233339041378952e-06, "loss": 17.4473, "step": 11120 }, { "epoch": 0.20328294368179575, "grad_norm": 5.891067840361349, "learning_rate": 9.233181519764437e-06, "loss": 17.2874, "step": 11121 }, { "epoch": 0.20330122287824226, "grad_norm": 8.143132742497487, "learning_rate": 9.233023983312987e-06, "loss": 18.0829, "step": 11122 }, { "epoch": 0.2033195020746888, "grad_norm": 8.487263957275987, "learning_rate": 9.232866432025156e-06, "loss": 18.3101, "step": 11123 }, { "epoch": 0.20333778127113533, "grad_norm": 6.394992361882281, "learning_rate": 9.232708865901495e-06, "loss": 17.3551, "step": 11124 }, { "epoch": 0.20335606046758184, "grad_norm": 6.575868690305045, "learning_rate": 9.232551284942554e-06, "loss": 17.5616, "step": 11125 }, { "epoch": 0.20337433966402838, "grad_norm": 5.0399235075632856, "learning_rate": 9.23239368914889e-06, "loss": 16.9346, "step": 11126 }, { "epoch": 0.20339261886047488, "grad_norm": 9.2722299744395, "learning_rate": 9.232236078521055e-06, "loss": 18.2814, "step": 11127 }, { "epoch": 0.20341089805692142, "grad_norm": 6.8905819906211585, "learning_rate": 9.232078453059598e-06, "loss": 17.5832, "step": 11128 }, { "epoch": 0.20342917725336795, "grad_norm": 6.132241847838988, "learning_rate": 9.231920812765074e-06, "loss": 17.467, "step": 11129 }, { "epoch": 0.20344745644981446, "grad_norm": 6.379810305024003, "learning_rate": 9.231763157638036e-06, "loss": 17.4435, "step": 11130 }, { "epoch": 0.203465735646261, "grad_norm": 5.808808655742433, "learning_rate": 9.231605487679033e-06, "loss": 17.2186, "step": 11131 }, { "epoch": 0.2034840148427075, "grad_norm": 6.0394033521351345, "learning_rate": 9.23144780288862e-06, "loss": 17.122, "step": 11132 }, { "epoch": 0.20350229403915404, "grad_norm": 8.642312035088267, "learning_rate": 9.231290103267352e-06, "loss": 18.5249, "step": 11133 }, { "epoch": 0.20352057323560055, "grad_norm": 6.4284436306351385, "learning_rate": 9.231132388815778e-06, "loss": 17.4943, "step": 11134 }, { "epoch": 0.20353885243204708, "grad_norm": 8.370086693427135, "learning_rate": 9.230974659534451e-06, "loss": 18.1642, "step": 11135 }, { "epoch": 0.20355713162849362, "grad_norm": 8.025758280170024, "learning_rate": 9.230816915423928e-06, "loss": 18.0183, "step": 11136 }, { "epoch": 0.20357541082494013, "grad_norm": 6.295715707343801, "learning_rate": 9.230659156484755e-06, "loss": 17.3867, "step": 11137 }, { "epoch": 0.20359369002138666, "grad_norm": 7.556217004668174, "learning_rate": 9.230501382717492e-06, "loss": 17.8981, "step": 11138 }, { "epoch": 0.20361196921783317, "grad_norm": 5.740317766351861, "learning_rate": 9.230343594122687e-06, "loss": 17.3958, "step": 11139 }, { "epoch": 0.2036302484142797, "grad_norm": 6.944209250198379, "learning_rate": 9.230185790700895e-06, "loss": 17.7332, "step": 11140 }, { "epoch": 0.20364852761072624, "grad_norm": 6.564848811373634, "learning_rate": 9.230027972452669e-06, "loss": 17.4095, "step": 11141 }, { "epoch": 0.20366680680717275, "grad_norm": 6.854415444915705, "learning_rate": 9.229870139378562e-06, "loss": 17.5654, "step": 11142 }, { "epoch": 0.2036850860036193, "grad_norm": 7.647143387274446, "learning_rate": 9.229712291479128e-06, "loss": 18.1861, "step": 11143 }, { "epoch": 0.2037033652000658, "grad_norm": 6.264718415869328, "learning_rate": 9.229554428754918e-06, "loss": 17.5786, "step": 11144 }, { "epoch": 0.20372164439651233, "grad_norm": 7.050400247635081, "learning_rate": 9.229396551206488e-06, "loss": 17.6802, "step": 11145 }, { "epoch": 0.20373992359295887, "grad_norm": 7.013656808967406, "learning_rate": 9.22923865883439e-06, "loss": 17.7487, "step": 11146 }, { "epoch": 0.20375820278940537, "grad_norm": 7.176620897988137, "learning_rate": 9.229080751639177e-06, "loss": 17.7222, "step": 11147 }, { "epoch": 0.2037764819858519, "grad_norm": 7.104754563617455, "learning_rate": 9.228922829621403e-06, "loss": 17.6496, "step": 11148 }, { "epoch": 0.20379476118229842, "grad_norm": 7.098889295955707, "learning_rate": 9.228764892781622e-06, "loss": 18.0888, "step": 11149 }, { "epoch": 0.20381304037874495, "grad_norm": 8.365287733535423, "learning_rate": 9.228606941120386e-06, "loss": 18.5649, "step": 11150 }, { "epoch": 0.20383131957519146, "grad_norm": 7.066472823936408, "learning_rate": 9.228448974638252e-06, "loss": 17.8336, "step": 11151 }, { "epoch": 0.203849598771638, "grad_norm": 7.190799257549998, "learning_rate": 9.228290993335768e-06, "loss": 17.4468, "step": 11152 }, { "epoch": 0.20386787796808453, "grad_norm": 7.424093952787657, "learning_rate": 9.228132997213493e-06, "loss": 17.9261, "step": 11153 }, { "epoch": 0.20388615716453104, "grad_norm": 6.104952908897735, "learning_rate": 9.227974986271976e-06, "loss": 17.3901, "step": 11154 }, { "epoch": 0.20390443636097758, "grad_norm": 5.532896939303763, "learning_rate": 9.227816960511778e-06, "loss": 17.4417, "step": 11155 }, { "epoch": 0.20392271555742408, "grad_norm": 6.510258284011305, "learning_rate": 9.227658919933446e-06, "loss": 17.5093, "step": 11156 }, { "epoch": 0.20394099475387062, "grad_norm": 6.876360849657799, "learning_rate": 9.227500864537536e-06, "loss": 17.7952, "step": 11157 }, { "epoch": 0.20395927395031715, "grad_norm": 5.942607970633072, "learning_rate": 9.227342794324603e-06, "loss": 17.3487, "step": 11158 }, { "epoch": 0.20397755314676366, "grad_norm": 7.574700009519517, "learning_rate": 9.2271847092952e-06, "loss": 18.1533, "step": 11159 }, { "epoch": 0.2039958323432102, "grad_norm": 6.682538345208364, "learning_rate": 9.227026609449881e-06, "loss": 17.4988, "step": 11160 }, { "epoch": 0.2040141115396567, "grad_norm": 7.752221844167152, "learning_rate": 9.226868494789203e-06, "loss": 18.0337, "step": 11161 }, { "epoch": 0.20403239073610324, "grad_norm": 6.064331059840321, "learning_rate": 9.226710365313714e-06, "loss": 17.4478, "step": 11162 }, { "epoch": 0.20405066993254978, "grad_norm": 6.199532295651936, "learning_rate": 9.226552221023974e-06, "loss": 17.4396, "step": 11163 }, { "epoch": 0.20406894912899629, "grad_norm": 7.424588487200107, "learning_rate": 9.226394061920537e-06, "loss": 17.9378, "step": 11164 }, { "epoch": 0.20408722832544282, "grad_norm": 7.034173364726601, "learning_rate": 9.226235888003952e-06, "loss": 17.9955, "step": 11165 }, { "epoch": 0.20410550752188933, "grad_norm": 6.187052812857753, "learning_rate": 9.226077699274778e-06, "loss": 17.395, "step": 11166 }, { "epoch": 0.20412378671833586, "grad_norm": 6.251115632206645, "learning_rate": 9.225919495733569e-06, "loss": 17.6024, "step": 11167 }, { "epoch": 0.20414206591478237, "grad_norm": 7.201508065677238, "learning_rate": 9.225761277380878e-06, "loss": 17.7733, "step": 11168 }, { "epoch": 0.2041603451112289, "grad_norm": 6.595517365453235, "learning_rate": 9.225603044217261e-06, "loss": 17.6297, "step": 11169 }, { "epoch": 0.20417862430767544, "grad_norm": 7.720351350143014, "learning_rate": 9.225444796243273e-06, "loss": 17.9286, "step": 11170 }, { "epoch": 0.20419690350412195, "grad_norm": 6.852815619637015, "learning_rate": 9.225286533459468e-06, "loss": 17.5033, "step": 11171 }, { "epoch": 0.2042151827005685, "grad_norm": 7.025534011609405, "learning_rate": 9.225128255866397e-06, "loss": 17.7254, "step": 11172 }, { "epoch": 0.204233461897015, "grad_norm": 8.401970977588938, "learning_rate": 9.224969963464623e-06, "loss": 18.3252, "step": 11173 }, { "epoch": 0.20425174109346153, "grad_norm": 5.386890150228694, "learning_rate": 9.224811656254694e-06, "loss": 17.17, "step": 11174 }, { "epoch": 0.20427002028990807, "grad_norm": 7.185666553147642, "learning_rate": 9.224653334237163e-06, "loss": 18.0141, "step": 11175 }, { "epoch": 0.20428829948635457, "grad_norm": 7.066949308004852, "learning_rate": 9.224494997412593e-06, "loss": 17.7945, "step": 11176 }, { "epoch": 0.2043065786828011, "grad_norm": 7.613231902013903, "learning_rate": 9.224336645781533e-06, "loss": 17.8731, "step": 11177 }, { "epoch": 0.20432485787924762, "grad_norm": 6.427204203579496, "learning_rate": 9.22417827934454e-06, "loss": 17.5776, "step": 11178 }, { "epoch": 0.20434313707569415, "grad_norm": 8.003971223075348, "learning_rate": 9.224019898102168e-06, "loss": 17.8197, "step": 11179 }, { "epoch": 0.2043614162721407, "grad_norm": 5.5872193885185455, "learning_rate": 9.223861502054974e-06, "loss": 17.19, "step": 11180 }, { "epoch": 0.2043796954685872, "grad_norm": 6.276657648988144, "learning_rate": 9.223703091203511e-06, "loss": 17.3117, "step": 11181 }, { "epoch": 0.20439797466503373, "grad_norm": 6.897398207329288, "learning_rate": 9.223544665548337e-06, "loss": 17.6208, "step": 11182 }, { "epoch": 0.20441625386148024, "grad_norm": 7.3221433396788305, "learning_rate": 9.223386225090002e-06, "loss": 17.4744, "step": 11183 }, { "epoch": 0.20443453305792678, "grad_norm": 6.278377547536221, "learning_rate": 9.223227769829068e-06, "loss": 17.3664, "step": 11184 }, { "epoch": 0.20445281225437328, "grad_norm": 7.80024806356108, "learning_rate": 9.223069299766085e-06, "loss": 18.1642, "step": 11185 }, { "epoch": 0.20447109145081982, "grad_norm": 7.538758445830707, "learning_rate": 9.222910814901611e-06, "loss": 17.8844, "step": 11186 }, { "epoch": 0.20448937064726636, "grad_norm": 6.2320072295754825, "learning_rate": 9.222752315236203e-06, "loss": 16.996, "step": 11187 }, { "epoch": 0.20450764984371286, "grad_norm": 5.768389402983314, "learning_rate": 9.222593800770411e-06, "loss": 17.2548, "step": 11188 }, { "epoch": 0.2045259290401594, "grad_norm": 9.386362018993129, "learning_rate": 9.222435271504797e-06, "loss": 18.8809, "step": 11189 }, { "epoch": 0.2045442082366059, "grad_norm": 6.2184328619791085, "learning_rate": 9.222276727439914e-06, "loss": 17.5199, "step": 11190 }, { "epoch": 0.20456248743305244, "grad_norm": 7.577401563324187, "learning_rate": 9.222118168576316e-06, "loss": 17.8251, "step": 11191 }, { "epoch": 0.20458076662949898, "grad_norm": 7.8018276763599195, "learning_rate": 9.22195959491456e-06, "loss": 17.7496, "step": 11192 }, { "epoch": 0.20459904582594549, "grad_norm": 7.7073048193379465, "learning_rate": 9.221801006455204e-06, "loss": 18.5523, "step": 11193 }, { "epoch": 0.20461732502239202, "grad_norm": 7.672101512759613, "learning_rate": 9.2216424031988e-06, "loss": 17.8634, "step": 11194 }, { "epoch": 0.20463560421883853, "grad_norm": 7.427337842379527, "learning_rate": 9.221483785145906e-06, "loss": 17.9564, "step": 11195 }, { "epoch": 0.20465388341528506, "grad_norm": 6.569593973534207, "learning_rate": 9.221325152297079e-06, "loss": 17.6455, "step": 11196 }, { "epoch": 0.2046721626117316, "grad_norm": 8.427328028996476, "learning_rate": 9.221166504652871e-06, "loss": 18.462, "step": 11197 }, { "epoch": 0.2046904418081781, "grad_norm": 6.744904946082712, "learning_rate": 9.221007842213843e-06, "loss": 17.515, "step": 11198 }, { "epoch": 0.20470872100462464, "grad_norm": 8.33352714628349, "learning_rate": 9.220849164980548e-06, "loss": 18.5856, "step": 11199 }, { "epoch": 0.20472700020107115, "grad_norm": 7.956233550220343, "learning_rate": 9.220690472953542e-06, "loss": 18.1, "step": 11200 }, { "epoch": 0.2047452793975177, "grad_norm": 7.034235858966999, "learning_rate": 9.220531766133383e-06, "loss": 17.6427, "step": 11201 }, { "epoch": 0.2047635585939642, "grad_norm": 7.374752028705149, "learning_rate": 9.220373044520628e-06, "loss": 17.513, "step": 11202 }, { "epoch": 0.20478183779041073, "grad_norm": 7.120915378950373, "learning_rate": 9.22021430811583e-06, "loss": 17.5448, "step": 11203 }, { "epoch": 0.20480011698685727, "grad_norm": 5.5208955331425065, "learning_rate": 9.220055556919547e-06, "loss": 17.1941, "step": 11204 }, { "epoch": 0.20481839618330377, "grad_norm": 7.252932148688536, "learning_rate": 9.219896790932334e-06, "loss": 17.8942, "step": 11205 }, { "epoch": 0.2048366753797503, "grad_norm": 6.855217393125561, "learning_rate": 9.219738010154753e-06, "loss": 17.6032, "step": 11206 }, { "epoch": 0.20485495457619682, "grad_norm": 7.905595712034856, "learning_rate": 9.219579214587354e-06, "loss": 18.0844, "step": 11207 }, { "epoch": 0.20487323377264335, "grad_norm": 7.305491779279216, "learning_rate": 9.219420404230694e-06, "loss": 17.8717, "step": 11208 }, { "epoch": 0.2048915129690899, "grad_norm": 7.109208649456086, "learning_rate": 9.219261579085335e-06, "loss": 17.9267, "step": 11209 }, { "epoch": 0.2049097921655364, "grad_norm": 6.880451246231312, "learning_rate": 9.21910273915183e-06, "loss": 17.6225, "step": 11210 }, { "epoch": 0.20492807136198293, "grad_norm": 7.476702279942833, "learning_rate": 9.218943884430733e-06, "loss": 18.0985, "step": 11211 }, { "epoch": 0.20494635055842944, "grad_norm": 6.738653096375818, "learning_rate": 9.218785014922606e-06, "loss": 17.3817, "step": 11212 }, { "epoch": 0.20496462975487598, "grad_norm": 6.053858809942252, "learning_rate": 9.218626130628003e-06, "loss": 17.3523, "step": 11213 }, { "epoch": 0.2049829089513225, "grad_norm": 7.031194283853823, "learning_rate": 9.218467231547482e-06, "loss": 17.9715, "step": 11214 }, { "epoch": 0.20500118814776902, "grad_norm": 7.617933246938375, "learning_rate": 9.2183083176816e-06, "loss": 18.2486, "step": 11215 }, { "epoch": 0.20501946734421556, "grad_norm": 7.016914651326241, "learning_rate": 9.218149389030913e-06, "loss": 17.7558, "step": 11216 }, { "epoch": 0.20503774654066206, "grad_norm": 6.314429775341909, "learning_rate": 9.21799044559598e-06, "loss": 17.4803, "step": 11217 }, { "epoch": 0.2050560257371086, "grad_norm": 6.298121876385616, "learning_rate": 9.217831487377354e-06, "loss": 17.2778, "step": 11218 }, { "epoch": 0.2050743049335551, "grad_norm": 6.463091502828433, "learning_rate": 9.217672514375594e-06, "loss": 17.6105, "step": 11219 }, { "epoch": 0.20509258413000164, "grad_norm": 7.081118131205817, "learning_rate": 9.217513526591259e-06, "loss": 17.6592, "step": 11220 }, { "epoch": 0.20511086332644818, "grad_norm": 7.573218858127629, "learning_rate": 9.217354524024905e-06, "loss": 17.7936, "step": 11221 }, { "epoch": 0.2051291425228947, "grad_norm": 8.4098199670913, "learning_rate": 9.21719550667709e-06, "loss": 17.9587, "step": 11222 }, { "epoch": 0.20514742171934122, "grad_norm": 6.121847899495171, "learning_rate": 9.21703647454837e-06, "loss": 17.4993, "step": 11223 }, { "epoch": 0.20516570091578773, "grad_norm": 7.413990228255566, "learning_rate": 9.216877427639303e-06, "loss": 17.9504, "step": 11224 }, { "epoch": 0.20518398011223427, "grad_norm": 7.076346612130487, "learning_rate": 9.216718365950448e-06, "loss": 18.0455, "step": 11225 }, { "epoch": 0.2052022593086808, "grad_norm": 8.428479294844449, "learning_rate": 9.21655928948236e-06, "loss": 17.9222, "step": 11226 }, { "epoch": 0.2052205385051273, "grad_norm": 9.552257974022613, "learning_rate": 9.216400198235598e-06, "loss": 18.1117, "step": 11227 }, { "epoch": 0.20523881770157384, "grad_norm": 5.859977160157482, "learning_rate": 9.216241092210718e-06, "loss": 17.3769, "step": 11228 }, { "epoch": 0.20525709689802035, "grad_norm": 6.6096858069978115, "learning_rate": 9.21608197140828e-06, "loss": 17.6359, "step": 11229 }, { "epoch": 0.2052753760944669, "grad_norm": 6.743129044468079, "learning_rate": 9.215922835828839e-06, "loss": 17.7402, "step": 11230 }, { "epoch": 0.20529365529091342, "grad_norm": 7.3244944650637, "learning_rate": 9.215763685472955e-06, "loss": 17.8573, "step": 11231 }, { "epoch": 0.20531193448735993, "grad_norm": 7.177089459925122, "learning_rate": 9.215604520341186e-06, "loss": 17.9606, "step": 11232 }, { "epoch": 0.20533021368380647, "grad_norm": 7.654941124166402, "learning_rate": 9.215445340434088e-06, "loss": 17.8949, "step": 11233 }, { "epoch": 0.20534849288025298, "grad_norm": 7.105827489680923, "learning_rate": 9.215286145752222e-06, "loss": 17.7934, "step": 11234 }, { "epoch": 0.2053667720766995, "grad_norm": 5.612250988367692, "learning_rate": 9.215126936296141e-06, "loss": 17.2452, "step": 11235 }, { "epoch": 0.20538505127314602, "grad_norm": 12.762976536213964, "learning_rate": 9.214967712066408e-06, "loss": 19.0867, "step": 11236 }, { "epoch": 0.20540333046959255, "grad_norm": 5.904836314826313, "learning_rate": 9.214808473063578e-06, "loss": 17.2909, "step": 11237 }, { "epoch": 0.2054216096660391, "grad_norm": 7.576621186342858, "learning_rate": 9.21464921928821e-06, "loss": 17.8573, "step": 11238 }, { "epoch": 0.2054398888624856, "grad_norm": 6.779356927404155, "learning_rate": 9.21448995074086e-06, "loss": 17.5983, "step": 11239 }, { "epoch": 0.20545816805893213, "grad_norm": 6.856019507235172, "learning_rate": 9.214330667422092e-06, "loss": 17.8275, "step": 11240 }, { "epoch": 0.20547644725537864, "grad_norm": 9.397441998703927, "learning_rate": 9.21417136933246e-06, "loss": 18.7988, "step": 11241 }, { "epoch": 0.20549472645182518, "grad_norm": 5.921991480316953, "learning_rate": 9.214012056472521e-06, "loss": 17.259, "step": 11242 }, { "epoch": 0.2055130056482717, "grad_norm": 7.2698720955148675, "learning_rate": 9.213852728842839e-06, "loss": 17.8498, "step": 11243 }, { "epoch": 0.20553128484471822, "grad_norm": 6.651611949906887, "learning_rate": 9.213693386443966e-06, "loss": 17.7188, "step": 11244 }, { "epoch": 0.20554956404116476, "grad_norm": 7.508074955197114, "learning_rate": 9.213534029276464e-06, "loss": 18.2089, "step": 11245 }, { "epoch": 0.20556784323761126, "grad_norm": 8.512802495561843, "learning_rate": 9.21337465734089e-06, "loss": 17.8531, "step": 11246 }, { "epoch": 0.2055861224340578, "grad_norm": 7.357293392894167, "learning_rate": 9.213215270637805e-06, "loss": 17.7409, "step": 11247 }, { "epoch": 0.20560440163050434, "grad_norm": 6.011106680943368, "learning_rate": 9.213055869167767e-06, "loss": 17.2643, "step": 11248 }, { "epoch": 0.20562268082695084, "grad_norm": 6.57061944873097, "learning_rate": 9.212896452931331e-06, "loss": 17.6767, "step": 11249 }, { "epoch": 0.20564096002339738, "grad_norm": 6.287256367807107, "learning_rate": 9.21273702192906e-06, "loss": 17.2238, "step": 11250 }, { "epoch": 0.2056592392198439, "grad_norm": 6.000760785422642, "learning_rate": 9.21257757616151e-06, "loss": 17.226, "step": 11251 }, { "epoch": 0.20567751841629042, "grad_norm": 6.890166883988605, "learning_rate": 9.212418115629243e-06, "loss": 17.8284, "step": 11252 }, { "epoch": 0.20569579761273693, "grad_norm": 7.627504871921874, "learning_rate": 9.212258640332815e-06, "loss": 17.4906, "step": 11253 }, { "epoch": 0.20571407680918347, "grad_norm": 7.70247386428855, "learning_rate": 9.212099150272786e-06, "loss": 18.3294, "step": 11254 }, { "epoch": 0.20573235600563, "grad_norm": 8.44945226014485, "learning_rate": 9.211939645449715e-06, "loss": 18.5711, "step": 11255 }, { "epoch": 0.2057506352020765, "grad_norm": 8.303653254049596, "learning_rate": 9.211780125864162e-06, "loss": 18.3465, "step": 11256 }, { "epoch": 0.20576891439852305, "grad_norm": 5.468438191150138, "learning_rate": 9.211620591516683e-06, "loss": 17.101, "step": 11257 }, { "epoch": 0.20578719359496955, "grad_norm": 7.026778992898313, "learning_rate": 9.211461042407841e-06, "loss": 17.995, "step": 11258 }, { "epoch": 0.2058054727914161, "grad_norm": 7.124654173790567, "learning_rate": 9.211301478538194e-06, "loss": 17.3347, "step": 11259 }, { "epoch": 0.20582375198786262, "grad_norm": 6.822023858200299, "learning_rate": 9.2111418999083e-06, "loss": 17.7613, "step": 11260 }, { "epoch": 0.20584203118430913, "grad_norm": 7.2272941338736905, "learning_rate": 9.210982306518719e-06, "loss": 17.6922, "step": 11261 }, { "epoch": 0.20586031038075567, "grad_norm": 6.602902486894802, "learning_rate": 9.21082269837001e-06, "loss": 17.5514, "step": 11262 }, { "epoch": 0.20587858957720218, "grad_norm": 7.480254531432755, "learning_rate": 9.210663075462733e-06, "loss": 18.2328, "step": 11263 }, { "epoch": 0.2058968687736487, "grad_norm": 6.128790145698104, "learning_rate": 9.210503437797448e-06, "loss": 17.3685, "step": 11264 }, { "epoch": 0.20591514797009525, "grad_norm": 8.293712419676533, "learning_rate": 9.210343785374713e-06, "loss": 18.1156, "step": 11265 }, { "epoch": 0.20593342716654175, "grad_norm": 7.404471374026537, "learning_rate": 9.21018411819509e-06, "loss": 17.6813, "step": 11266 }, { "epoch": 0.2059517063629883, "grad_norm": 6.296386617109181, "learning_rate": 9.210024436259135e-06, "loss": 17.4807, "step": 11267 }, { "epoch": 0.2059699855594348, "grad_norm": 7.517825134964458, "learning_rate": 9.20986473956741e-06, "loss": 17.9497, "step": 11268 }, { "epoch": 0.20598826475588133, "grad_norm": 7.198564620537514, "learning_rate": 9.209705028120475e-06, "loss": 18.061, "step": 11269 }, { "epoch": 0.20600654395232784, "grad_norm": 7.446215235124976, "learning_rate": 9.209545301918889e-06, "loss": 17.8568, "step": 11270 }, { "epoch": 0.20602482314877438, "grad_norm": 6.199927643465172, "learning_rate": 9.209385560963212e-06, "loss": 17.2508, "step": 11271 }, { "epoch": 0.2060431023452209, "grad_norm": 7.165829469897477, "learning_rate": 9.209225805254004e-06, "loss": 17.7853, "step": 11272 }, { "epoch": 0.20606138154166742, "grad_norm": 6.174745654746108, "learning_rate": 9.209066034791824e-06, "loss": 17.3414, "step": 11273 }, { "epoch": 0.20607966073811396, "grad_norm": 6.613461508753073, "learning_rate": 9.208906249577234e-06, "loss": 17.5057, "step": 11274 }, { "epoch": 0.20609793993456046, "grad_norm": 7.359812146534588, "learning_rate": 9.208746449610792e-06, "loss": 17.8326, "step": 11275 }, { "epoch": 0.206116219131007, "grad_norm": 6.829152532704157, "learning_rate": 9.20858663489306e-06, "loss": 17.6674, "step": 11276 }, { "epoch": 0.20613449832745354, "grad_norm": 7.597519963948508, "learning_rate": 9.208426805424596e-06, "loss": 18.0787, "step": 11277 }, { "epoch": 0.20615277752390004, "grad_norm": 6.740113924962893, "learning_rate": 9.208266961205961e-06, "loss": 17.5505, "step": 11278 }, { "epoch": 0.20617105672034658, "grad_norm": 6.659659448971432, "learning_rate": 9.208107102237717e-06, "loss": 17.5001, "step": 11279 }, { "epoch": 0.2061893359167931, "grad_norm": 6.516375911744139, "learning_rate": 9.207947228520421e-06, "loss": 17.4043, "step": 11280 }, { "epoch": 0.20620761511323962, "grad_norm": 7.591304504931219, "learning_rate": 9.207787340054637e-06, "loss": 17.5647, "step": 11281 }, { "epoch": 0.20622589430968616, "grad_norm": 6.431169455060361, "learning_rate": 9.207627436840922e-06, "loss": 17.4243, "step": 11282 }, { "epoch": 0.20624417350613267, "grad_norm": 5.904620637899576, "learning_rate": 9.207467518879838e-06, "loss": 17.2062, "step": 11283 }, { "epoch": 0.2062624527025792, "grad_norm": 6.255924391466615, "learning_rate": 9.207307586171946e-06, "loss": 17.4979, "step": 11284 }, { "epoch": 0.2062807318990257, "grad_norm": 5.775303579231696, "learning_rate": 9.207147638717807e-06, "loss": 17.4686, "step": 11285 }, { "epoch": 0.20629901109547225, "grad_norm": 8.48887826399413, "learning_rate": 9.20698767651798e-06, "loss": 18.7141, "step": 11286 }, { "epoch": 0.20631729029191875, "grad_norm": 5.258576051977037, "learning_rate": 9.206827699573024e-06, "loss": 16.9402, "step": 11287 }, { "epoch": 0.2063355694883653, "grad_norm": 8.300980279165882, "learning_rate": 9.206667707883504e-06, "loss": 18.4599, "step": 11288 }, { "epoch": 0.20635384868481182, "grad_norm": 6.571769161341952, "learning_rate": 9.206507701449978e-06, "loss": 17.6144, "step": 11289 }, { "epoch": 0.20637212788125833, "grad_norm": 7.422547512778995, "learning_rate": 9.206347680273008e-06, "loss": 18.0077, "step": 11290 }, { "epoch": 0.20639040707770487, "grad_norm": 5.961834342625839, "learning_rate": 9.206187644353155e-06, "loss": 17.343, "step": 11291 }, { "epoch": 0.20640868627415138, "grad_norm": 7.77587413166556, "learning_rate": 9.206027593690978e-06, "loss": 18.1333, "step": 11292 }, { "epoch": 0.2064269654705979, "grad_norm": 7.670740089916023, "learning_rate": 9.20586752828704e-06, "loss": 18.0532, "step": 11293 }, { "epoch": 0.20644524466704445, "grad_norm": 6.4864691492117235, "learning_rate": 9.205707448141901e-06, "loss": 17.4666, "step": 11294 }, { "epoch": 0.20646352386349096, "grad_norm": 6.757883265958228, "learning_rate": 9.205547353256123e-06, "loss": 17.739, "step": 11295 }, { "epoch": 0.2064818030599375, "grad_norm": 6.691835969288469, "learning_rate": 9.205387243630267e-06, "loss": 17.8983, "step": 11296 }, { "epoch": 0.206500082256384, "grad_norm": 7.450243470813527, "learning_rate": 9.205227119264892e-06, "loss": 17.7776, "step": 11297 }, { "epoch": 0.20651836145283053, "grad_norm": 7.294062044729192, "learning_rate": 9.205066980160561e-06, "loss": 17.6949, "step": 11298 }, { "epoch": 0.20653664064927707, "grad_norm": 6.869556676245195, "learning_rate": 9.204906826317835e-06, "loss": 17.8619, "step": 11299 }, { "epoch": 0.20655491984572358, "grad_norm": 7.881950895303181, "learning_rate": 9.204746657737276e-06, "loss": 18.0034, "step": 11300 }, { "epoch": 0.2065731990421701, "grad_norm": 6.516121643789768, "learning_rate": 9.204586474419445e-06, "loss": 17.5569, "step": 11301 }, { "epoch": 0.20659147823861662, "grad_norm": 7.121390109024867, "learning_rate": 9.204426276364905e-06, "loss": 17.8313, "step": 11302 }, { "epoch": 0.20660975743506316, "grad_norm": 8.016283913408762, "learning_rate": 9.204266063574212e-06, "loss": 18.2278, "step": 11303 }, { "epoch": 0.20662803663150967, "grad_norm": 6.9818234347910755, "learning_rate": 9.204105836047934e-06, "loss": 17.5215, "step": 11304 }, { "epoch": 0.2066463158279562, "grad_norm": 5.269493716760305, "learning_rate": 9.203945593786628e-06, "loss": 17.0022, "step": 11305 }, { "epoch": 0.20666459502440274, "grad_norm": 6.455078838645331, "learning_rate": 9.20378533679086e-06, "loss": 17.3951, "step": 11306 }, { "epoch": 0.20668287422084924, "grad_norm": 6.523396538436194, "learning_rate": 9.203625065061186e-06, "loss": 17.3609, "step": 11307 }, { "epoch": 0.20670115341729578, "grad_norm": 6.544004948372208, "learning_rate": 9.203464778598173e-06, "loss": 17.4493, "step": 11308 }, { "epoch": 0.2067194326137423, "grad_norm": 6.419709541482147, "learning_rate": 9.203304477402382e-06, "loss": 17.6385, "step": 11309 }, { "epoch": 0.20673771181018882, "grad_norm": 6.332698117626546, "learning_rate": 9.20314416147437e-06, "loss": 17.6083, "step": 11310 }, { "epoch": 0.20675599100663536, "grad_norm": 6.55180966767108, "learning_rate": 9.202983830814704e-06, "loss": 17.3829, "step": 11311 }, { "epoch": 0.20677427020308187, "grad_norm": 6.576574494804952, "learning_rate": 9.202823485423946e-06, "loss": 17.5929, "step": 11312 }, { "epoch": 0.2067925493995284, "grad_norm": 7.869222319228628, "learning_rate": 9.202663125302656e-06, "loss": 17.711, "step": 11313 }, { "epoch": 0.2068108285959749, "grad_norm": 7.220267385438705, "learning_rate": 9.202502750451394e-06, "loss": 17.8717, "step": 11314 }, { "epoch": 0.20682910779242145, "grad_norm": 6.498563801180941, "learning_rate": 9.202342360870726e-06, "loss": 17.4267, "step": 11315 }, { "epoch": 0.20684738698886798, "grad_norm": 8.49745539591033, "learning_rate": 9.202181956561213e-06, "loss": 18.3729, "step": 11316 }, { "epoch": 0.2068656661853145, "grad_norm": 6.113489474513642, "learning_rate": 9.202021537523417e-06, "loss": 17.3899, "step": 11317 }, { "epoch": 0.20688394538176103, "grad_norm": 7.259428603948771, "learning_rate": 9.2018611037579e-06, "loss": 17.6373, "step": 11318 }, { "epoch": 0.20690222457820753, "grad_norm": 6.862792995101118, "learning_rate": 9.201700655265224e-06, "loss": 17.6216, "step": 11319 }, { "epoch": 0.20692050377465407, "grad_norm": 5.896591952112572, "learning_rate": 9.201540192045952e-06, "loss": 17.1226, "step": 11320 }, { "epoch": 0.20693878297110058, "grad_norm": 7.457648774614467, "learning_rate": 9.201379714100647e-06, "loss": 18.2697, "step": 11321 }, { "epoch": 0.2069570621675471, "grad_norm": 6.263572739947989, "learning_rate": 9.201219221429869e-06, "loss": 17.4497, "step": 11322 }, { "epoch": 0.20697534136399365, "grad_norm": 5.976391011517805, "learning_rate": 9.201058714034183e-06, "loss": 17.3546, "step": 11323 }, { "epoch": 0.20699362056044016, "grad_norm": 6.554414667157984, "learning_rate": 9.200898191914152e-06, "loss": 17.4139, "step": 11324 }, { "epoch": 0.2070118997568867, "grad_norm": 5.507861465424304, "learning_rate": 9.200737655070336e-06, "loss": 16.9907, "step": 11325 }, { "epoch": 0.2070301789533332, "grad_norm": 9.746191475372257, "learning_rate": 9.2005771035033e-06, "loss": 18.7296, "step": 11326 }, { "epoch": 0.20704845814977973, "grad_norm": 6.341279694026166, "learning_rate": 9.200416537213604e-06, "loss": 17.1672, "step": 11327 }, { "epoch": 0.20706673734622627, "grad_norm": 6.435856974138208, "learning_rate": 9.200255956201814e-06, "loss": 17.3943, "step": 11328 }, { "epoch": 0.20708501654267278, "grad_norm": 6.875642123729105, "learning_rate": 9.200095360468491e-06, "loss": 17.8153, "step": 11329 }, { "epoch": 0.20710329573911931, "grad_norm": 5.805741798094388, "learning_rate": 9.1999347500142e-06, "loss": 17.2909, "step": 11330 }, { "epoch": 0.20712157493556582, "grad_norm": 6.748350954475548, "learning_rate": 9.199774124839499e-06, "loss": 17.7981, "step": 11331 }, { "epoch": 0.20713985413201236, "grad_norm": 7.744974087535545, "learning_rate": 9.199613484944956e-06, "loss": 17.9069, "step": 11332 }, { "epoch": 0.2071581333284589, "grad_norm": 6.331897098997471, "learning_rate": 9.19945283033113e-06, "loss": 17.4355, "step": 11333 }, { "epoch": 0.2071764125249054, "grad_norm": 9.14905122928649, "learning_rate": 9.19929216099859e-06, "loss": 18.0282, "step": 11334 }, { "epoch": 0.20719469172135194, "grad_norm": 7.462062481892406, "learning_rate": 9.199131476947893e-06, "loss": 17.7042, "step": 11335 }, { "epoch": 0.20721297091779844, "grad_norm": 6.223147064247256, "learning_rate": 9.198970778179605e-06, "loss": 17.1327, "step": 11336 }, { "epoch": 0.20723125011424498, "grad_norm": 5.925743273389555, "learning_rate": 9.198810064694287e-06, "loss": 17.0214, "step": 11337 }, { "epoch": 0.2072495293106915, "grad_norm": 6.9396073366208135, "learning_rate": 9.198649336492506e-06, "loss": 17.5051, "step": 11338 }, { "epoch": 0.20726780850713802, "grad_norm": 6.337173387833642, "learning_rate": 9.198488593574823e-06, "loss": 17.7323, "step": 11339 }, { "epoch": 0.20728608770358456, "grad_norm": 7.55508858779258, "learning_rate": 9.198327835941803e-06, "loss": 17.9832, "step": 11340 }, { "epoch": 0.20730436690003107, "grad_norm": 7.509552391910138, "learning_rate": 9.198167063594006e-06, "loss": 17.8462, "step": 11341 }, { "epoch": 0.2073226460964776, "grad_norm": 6.973246871998054, "learning_rate": 9.198006276531999e-06, "loss": 17.5752, "step": 11342 }, { "epoch": 0.2073409252929241, "grad_norm": 8.300606732955098, "learning_rate": 9.197845474756344e-06, "loss": 17.9324, "step": 11343 }, { "epoch": 0.20735920448937065, "grad_norm": 6.386337974565366, "learning_rate": 9.197684658267606e-06, "loss": 17.2407, "step": 11344 }, { "epoch": 0.20737748368581718, "grad_norm": 6.466076760950484, "learning_rate": 9.197523827066347e-06, "loss": 17.7687, "step": 11345 }, { "epoch": 0.2073957628822637, "grad_norm": 7.731434377724766, "learning_rate": 9.19736298115313e-06, "loss": 17.6565, "step": 11346 }, { "epoch": 0.20741404207871023, "grad_norm": 8.723847987198447, "learning_rate": 9.19720212052852e-06, "loss": 17.8272, "step": 11347 }, { "epoch": 0.20743232127515673, "grad_norm": 7.794014451859341, "learning_rate": 9.197041245193084e-06, "loss": 17.756, "step": 11348 }, { "epoch": 0.20745060047160327, "grad_norm": 6.766738502526163, "learning_rate": 9.19688035514738e-06, "loss": 17.5409, "step": 11349 }, { "epoch": 0.2074688796680498, "grad_norm": 5.770209024310648, "learning_rate": 9.196719450391975e-06, "loss": 17.3905, "step": 11350 }, { "epoch": 0.2074871588644963, "grad_norm": 6.549221562153917, "learning_rate": 9.19655853092743e-06, "loss": 17.5294, "step": 11351 }, { "epoch": 0.20750543806094285, "grad_norm": 6.980172463938054, "learning_rate": 9.196397596754316e-06, "loss": 17.4521, "step": 11352 }, { "epoch": 0.20752371725738936, "grad_norm": 6.60327897428517, "learning_rate": 9.196236647873189e-06, "loss": 17.5934, "step": 11353 }, { "epoch": 0.2075419964538359, "grad_norm": 7.065355057578235, "learning_rate": 9.19607568428462e-06, "loss": 17.8144, "step": 11354 }, { "epoch": 0.2075602756502824, "grad_norm": 6.866725361095603, "learning_rate": 9.195914705989166e-06, "loss": 17.7256, "step": 11355 }, { "epoch": 0.20757855484672894, "grad_norm": 5.844090363108879, "learning_rate": 9.195753712987397e-06, "loss": 17.2677, "step": 11356 }, { "epoch": 0.20759683404317547, "grad_norm": 8.539182934160497, "learning_rate": 9.195592705279876e-06, "loss": 18.7554, "step": 11357 }, { "epoch": 0.20761511323962198, "grad_norm": 6.5348978855057, "learning_rate": 9.195431682867166e-06, "loss": 17.5521, "step": 11358 }, { "epoch": 0.20763339243606851, "grad_norm": 6.803589053570613, "learning_rate": 9.195270645749833e-06, "loss": 17.6202, "step": 11359 }, { "epoch": 0.20765167163251502, "grad_norm": 6.727479910864619, "learning_rate": 9.195109593928438e-06, "loss": 17.6091, "step": 11360 }, { "epoch": 0.20766995082896156, "grad_norm": 6.413173530944886, "learning_rate": 9.19494852740355e-06, "loss": 17.3738, "step": 11361 }, { "epoch": 0.2076882300254081, "grad_norm": 7.601512957017651, "learning_rate": 9.19478744617573e-06, "loss": 17.9366, "step": 11362 }, { "epoch": 0.2077065092218546, "grad_norm": 5.789707525698412, "learning_rate": 9.194626350245546e-06, "loss": 17.1879, "step": 11363 }, { "epoch": 0.20772478841830114, "grad_norm": 5.443213409859194, "learning_rate": 9.19446523961356e-06, "loss": 17.0118, "step": 11364 }, { "epoch": 0.20774306761474765, "grad_norm": 6.413676483015337, "learning_rate": 9.194304114280335e-06, "loss": 17.4193, "step": 11365 }, { "epoch": 0.20776134681119418, "grad_norm": 9.445433958255618, "learning_rate": 9.194142974246441e-06, "loss": 18.1818, "step": 11366 }, { "epoch": 0.20777962600764072, "grad_norm": 7.0577186360297794, "learning_rate": 9.193981819512439e-06, "loss": 17.6962, "step": 11367 }, { "epoch": 0.20779790520408722, "grad_norm": 6.606599937932465, "learning_rate": 9.193820650078893e-06, "loss": 17.3329, "step": 11368 }, { "epoch": 0.20781618440053376, "grad_norm": 7.269147101298209, "learning_rate": 9.19365946594637e-06, "loss": 17.7729, "step": 11369 }, { "epoch": 0.20783446359698027, "grad_norm": 10.434064155259405, "learning_rate": 9.193498267115435e-06, "loss": 17.6019, "step": 11370 }, { "epoch": 0.2078527427934268, "grad_norm": 6.489079456392142, "learning_rate": 9.193337053586654e-06, "loss": 17.5649, "step": 11371 }, { "epoch": 0.2078710219898733, "grad_norm": 5.076335939992683, "learning_rate": 9.19317582536059e-06, "loss": 16.8645, "step": 11372 }, { "epoch": 0.20788930118631985, "grad_norm": 6.619757491576467, "learning_rate": 9.193014582437806e-06, "loss": 17.4425, "step": 11373 }, { "epoch": 0.20790758038276638, "grad_norm": 7.043006390120621, "learning_rate": 9.192853324818873e-06, "loss": 17.7465, "step": 11374 }, { "epoch": 0.2079258595792129, "grad_norm": 7.056339196299807, "learning_rate": 9.192692052504351e-06, "loss": 18.1348, "step": 11375 }, { "epoch": 0.20794413877565943, "grad_norm": 7.062149859477818, "learning_rate": 9.192530765494807e-06, "loss": 17.8394, "step": 11376 }, { "epoch": 0.20796241797210593, "grad_norm": 6.9000246413895026, "learning_rate": 9.192369463790807e-06, "loss": 17.7384, "step": 11377 }, { "epoch": 0.20798069716855247, "grad_norm": 6.717007672062542, "learning_rate": 9.192208147392916e-06, "loss": 17.7328, "step": 11378 }, { "epoch": 0.207998976364999, "grad_norm": 7.64450286934646, "learning_rate": 9.192046816301701e-06, "loss": 18.4562, "step": 11379 }, { "epoch": 0.2080172555614455, "grad_norm": 5.88855172135595, "learning_rate": 9.191885470517724e-06, "loss": 17.2166, "step": 11380 }, { "epoch": 0.20803553475789205, "grad_norm": 5.841888329181857, "learning_rate": 9.191724110041551e-06, "loss": 16.9695, "step": 11381 }, { "epoch": 0.20805381395433856, "grad_norm": 6.762546033378747, "learning_rate": 9.19156273487375e-06, "loss": 17.7965, "step": 11382 }, { "epoch": 0.2080720931507851, "grad_norm": 6.768479023383248, "learning_rate": 9.191401345014886e-06, "loss": 17.5318, "step": 11383 }, { "epoch": 0.20809037234723163, "grad_norm": 6.559398362446401, "learning_rate": 9.191239940465522e-06, "loss": 17.4949, "step": 11384 }, { "epoch": 0.20810865154367814, "grad_norm": 7.3701661115443615, "learning_rate": 9.191078521226226e-06, "loss": 18.0867, "step": 11385 }, { "epoch": 0.20812693074012467, "grad_norm": 5.117281048479846, "learning_rate": 9.190917087297565e-06, "loss": 16.8779, "step": 11386 }, { "epoch": 0.20814520993657118, "grad_norm": 8.126069612112843, "learning_rate": 9.190755638680102e-06, "loss": 18.0336, "step": 11387 }, { "epoch": 0.20816348913301772, "grad_norm": 7.396998678970835, "learning_rate": 9.190594175374406e-06, "loss": 17.7872, "step": 11388 }, { "epoch": 0.20818176832946422, "grad_norm": 7.287776594949756, "learning_rate": 9.19043269738104e-06, "loss": 17.6405, "step": 11389 }, { "epoch": 0.20820004752591076, "grad_norm": 6.668249500613658, "learning_rate": 9.19027120470057e-06, "loss": 17.6275, "step": 11390 }, { "epoch": 0.2082183267223573, "grad_norm": 6.037178264374184, "learning_rate": 9.190109697333565e-06, "loss": 17.2264, "step": 11391 }, { "epoch": 0.2082366059188038, "grad_norm": 6.525459289361489, "learning_rate": 9.189948175280588e-06, "loss": 17.3118, "step": 11392 }, { "epoch": 0.20825488511525034, "grad_norm": 6.459079614435108, "learning_rate": 9.189786638542206e-06, "loss": 17.4513, "step": 11393 }, { "epoch": 0.20827316431169685, "grad_norm": 8.017385972157495, "learning_rate": 9.189625087118985e-06, "loss": 18.3373, "step": 11394 }, { "epoch": 0.20829144350814338, "grad_norm": 7.19975293101108, "learning_rate": 9.189463521011492e-06, "loss": 18.112, "step": 11395 }, { "epoch": 0.20830972270458992, "grad_norm": 7.312861634682219, "learning_rate": 9.189301940220295e-06, "loss": 17.7899, "step": 11396 }, { "epoch": 0.20832800190103642, "grad_norm": 5.8548590381567776, "learning_rate": 9.189140344745954e-06, "loss": 17.3417, "step": 11397 }, { "epoch": 0.20834628109748296, "grad_norm": 7.4346127839201515, "learning_rate": 9.188978734589043e-06, "loss": 17.8877, "step": 11398 }, { "epoch": 0.20836456029392947, "grad_norm": 8.118283307165527, "learning_rate": 9.188817109750124e-06, "loss": 18.2895, "step": 11399 }, { "epoch": 0.208382839490376, "grad_norm": 7.3696099466786436, "learning_rate": 9.188655470229766e-06, "loss": 17.7555, "step": 11400 }, { "epoch": 0.20840111868682254, "grad_norm": 7.903233934029004, "learning_rate": 9.188493816028532e-06, "loss": 18.2937, "step": 11401 }, { "epoch": 0.20841939788326905, "grad_norm": 5.666262011006457, "learning_rate": 9.188332147146991e-06, "loss": 17.3378, "step": 11402 }, { "epoch": 0.20843767707971558, "grad_norm": 7.385682597886673, "learning_rate": 9.188170463585709e-06, "loss": 17.8607, "step": 11403 }, { "epoch": 0.2084559562761621, "grad_norm": 6.771538572618234, "learning_rate": 9.188008765345253e-06, "loss": 17.6437, "step": 11404 }, { "epoch": 0.20847423547260863, "grad_norm": 6.72804536843456, "learning_rate": 9.18784705242619e-06, "loss": 17.7804, "step": 11405 }, { "epoch": 0.20849251466905513, "grad_norm": 7.677137840821924, "learning_rate": 9.187685324829088e-06, "loss": 18.1629, "step": 11406 }, { "epoch": 0.20851079386550167, "grad_norm": 7.704103037260511, "learning_rate": 9.187523582554512e-06, "loss": 17.9746, "step": 11407 }, { "epoch": 0.2085290730619482, "grad_norm": 6.845532859197307, "learning_rate": 9.187361825603027e-06, "loss": 17.6056, "step": 11408 }, { "epoch": 0.2085473522583947, "grad_norm": 7.021336147229711, "learning_rate": 9.187200053975203e-06, "loss": 17.5864, "step": 11409 }, { "epoch": 0.20856563145484125, "grad_norm": 6.675649621210543, "learning_rate": 9.187038267671606e-06, "loss": 17.7085, "step": 11410 }, { "epoch": 0.20858391065128776, "grad_norm": 6.906161304809612, "learning_rate": 9.186876466692805e-06, "loss": 17.7359, "step": 11411 }, { "epoch": 0.2086021898477343, "grad_norm": 7.051825935775604, "learning_rate": 9.186714651039364e-06, "loss": 17.8063, "step": 11412 }, { "epoch": 0.20862046904418083, "grad_norm": 7.323049960219737, "learning_rate": 9.186552820711852e-06, "loss": 18.0081, "step": 11413 }, { "epoch": 0.20863874824062734, "grad_norm": 5.1518009573793115, "learning_rate": 9.186390975710835e-06, "loss": 16.8863, "step": 11414 }, { "epoch": 0.20865702743707387, "grad_norm": 5.688528340186772, "learning_rate": 9.18622911603688e-06, "loss": 16.9123, "step": 11415 }, { "epoch": 0.20867530663352038, "grad_norm": 7.3617259942742646, "learning_rate": 9.186067241690556e-06, "loss": 17.7749, "step": 11416 }, { "epoch": 0.20869358582996692, "grad_norm": 8.028959493961613, "learning_rate": 9.18590535267243e-06, "loss": 17.6508, "step": 11417 }, { "epoch": 0.20871186502641345, "grad_norm": 6.41800121755563, "learning_rate": 9.185743448983068e-06, "loss": 17.5967, "step": 11418 }, { "epoch": 0.20873014422285996, "grad_norm": 6.732489245810147, "learning_rate": 9.18558153062304e-06, "loss": 17.5627, "step": 11419 }, { "epoch": 0.2087484234193065, "grad_norm": 8.23426094625324, "learning_rate": 9.185419597592912e-06, "loss": 18.5474, "step": 11420 }, { "epoch": 0.208766702615753, "grad_norm": 5.885122435227352, "learning_rate": 9.185257649893251e-06, "loss": 17.3825, "step": 11421 }, { "epoch": 0.20878498181219954, "grad_norm": 8.256182545916584, "learning_rate": 9.185095687524625e-06, "loss": 18.2636, "step": 11422 }, { "epoch": 0.20880326100864605, "grad_norm": 6.254738320064321, "learning_rate": 9.184933710487602e-06, "loss": 17.4135, "step": 11423 }, { "epoch": 0.20882154020509258, "grad_norm": 9.116702380571455, "learning_rate": 9.184771718782748e-06, "loss": 17.8799, "step": 11424 }, { "epoch": 0.20883981940153912, "grad_norm": 7.7365396336510095, "learning_rate": 9.184609712410633e-06, "loss": 18.1285, "step": 11425 }, { "epoch": 0.20885809859798563, "grad_norm": 7.124670689954153, "learning_rate": 9.184447691371825e-06, "loss": 17.692, "step": 11426 }, { "epoch": 0.20887637779443216, "grad_norm": 8.189534692613202, "learning_rate": 9.18428565566689e-06, "loss": 18.231, "step": 11427 }, { "epoch": 0.20889465699087867, "grad_norm": 6.6969344705123595, "learning_rate": 9.184123605296397e-06, "loss": 17.6378, "step": 11428 }, { "epoch": 0.2089129361873252, "grad_norm": 7.448276271927157, "learning_rate": 9.183961540260914e-06, "loss": 18.1656, "step": 11429 }, { "epoch": 0.20893121538377174, "grad_norm": 6.750870260961923, "learning_rate": 9.18379946056101e-06, "loss": 17.7285, "step": 11430 }, { "epoch": 0.20894949458021825, "grad_norm": 6.91195418997168, "learning_rate": 9.183637366197252e-06, "loss": 17.4656, "step": 11431 }, { "epoch": 0.20896777377666478, "grad_norm": 7.112439559293752, "learning_rate": 9.183475257170205e-06, "loss": 17.8876, "step": 11432 }, { "epoch": 0.2089860529731113, "grad_norm": 7.193365368428257, "learning_rate": 9.183313133480442e-06, "loss": 17.7278, "step": 11433 }, { "epoch": 0.20900433216955783, "grad_norm": 6.876107552977072, "learning_rate": 9.18315099512853e-06, "loss": 17.8101, "step": 11434 }, { "epoch": 0.20902261136600436, "grad_norm": 8.549983313567564, "learning_rate": 9.182988842115037e-06, "loss": 18.7058, "step": 11435 }, { "epoch": 0.20904089056245087, "grad_norm": 9.235386252918953, "learning_rate": 9.18282667444053e-06, "loss": 18.5952, "step": 11436 }, { "epoch": 0.2090591697588974, "grad_norm": 7.496746496783825, "learning_rate": 9.182664492105579e-06, "loss": 17.7112, "step": 11437 }, { "epoch": 0.20907744895534391, "grad_norm": 8.29534400602403, "learning_rate": 9.182502295110754e-06, "loss": 17.9013, "step": 11438 }, { "epoch": 0.20909572815179045, "grad_norm": 7.806434686132032, "learning_rate": 9.182340083456618e-06, "loss": 17.938, "step": 11439 }, { "epoch": 0.20911400734823696, "grad_norm": 7.688533630590921, "learning_rate": 9.182177857143744e-06, "loss": 18.0619, "step": 11440 }, { "epoch": 0.2091322865446835, "grad_norm": 7.974976010537834, "learning_rate": 9.1820156161727e-06, "loss": 18.0908, "step": 11441 }, { "epoch": 0.20915056574113003, "grad_norm": 8.852588540338171, "learning_rate": 9.181853360544054e-06, "loss": 18.5536, "step": 11442 }, { "epoch": 0.20916884493757654, "grad_norm": 5.238748007782483, "learning_rate": 9.181691090258375e-06, "loss": 16.9777, "step": 11443 }, { "epoch": 0.20918712413402307, "grad_norm": 5.822094027495809, "learning_rate": 9.18152880531623e-06, "loss": 17.2267, "step": 11444 }, { "epoch": 0.20920540333046958, "grad_norm": 8.633433623820073, "learning_rate": 9.181366505718192e-06, "loss": 18.2636, "step": 11445 }, { "epoch": 0.20922368252691612, "grad_norm": 6.403210662031341, "learning_rate": 9.181204191464827e-06, "loss": 17.5143, "step": 11446 }, { "epoch": 0.20924196172336265, "grad_norm": 6.161692954214665, "learning_rate": 9.181041862556703e-06, "loss": 17.2519, "step": 11447 }, { "epoch": 0.20926024091980916, "grad_norm": 6.270337845095842, "learning_rate": 9.18087951899439e-06, "loss": 17.325, "step": 11448 }, { "epoch": 0.2092785201162557, "grad_norm": 6.037258527576589, "learning_rate": 9.180717160778458e-06, "loss": 17.2384, "step": 11449 }, { "epoch": 0.2092967993127022, "grad_norm": 7.532790883811677, "learning_rate": 9.180554787909474e-06, "loss": 17.6423, "step": 11450 }, { "epoch": 0.20931507850914874, "grad_norm": 6.960346383221193, "learning_rate": 9.180392400388008e-06, "loss": 17.9873, "step": 11451 }, { "epoch": 0.20933335770559527, "grad_norm": 6.3281520616882245, "learning_rate": 9.180229998214632e-06, "loss": 17.4254, "step": 11452 }, { "epoch": 0.20935163690204178, "grad_norm": 7.930718464564416, "learning_rate": 9.180067581389908e-06, "loss": 17.9693, "step": 11453 }, { "epoch": 0.20936991609848832, "grad_norm": 7.16065467253636, "learning_rate": 9.179905149914413e-06, "loss": 17.9344, "step": 11454 }, { "epoch": 0.20938819529493483, "grad_norm": 6.461398696890172, "learning_rate": 9.179742703788715e-06, "loss": 17.5761, "step": 11455 }, { "epoch": 0.20940647449138136, "grad_norm": 5.583603477464138, "learning_rate": 9.179580243013378e-06, "loss": 17.2408, "step": 11456 }, { "epoch": 0.20942475368782787, "grad_norm": 8.131423284574632, "learning_rate": 9.179417767588975e-06, "loss": 18.8045, "step": 11457 }, { "epoch": 0.2094430328842744, "grad_norm": 6.570135935211809, "learning_rate": 9.179255277516077e-06, "loss": 17.7531, "step": 11458 }, { "epoch": 0.20946131208072094, "grad_norm": 7.046249102869978, "learning_rate": 9.179092772795253e-06, "loss": 17.7526, "step": 11459 }, { "epoch": 0.20947959127716745, "grad_norm": 8.84210435560813, "learning_rate": 9.17893025342707e-06, "loss": 18.1264, "step": 11460 }, { "epoch": 0.20949787047361398, "grad_norm": 7.013666125292309, "learning_rate": 9.178767719412098e-06, "loss": 17.6529, "step": 11461 }, { "epoch": 0.2095161496700605, "grad_norm": 6.193632295670529, "learning_rate": 9.17860517075091e-06, "loss": 17.5707, "step": 11462 }, { "epoch": 0.20953442886650703, "grad_norm": 6.444396119092666, "learning_rate": 9.17844260744407e-06, "loss": 17.2939, "step": 11463 }, { "epoch": 0.20955270806295356, "grad_norm": 5.333388606505633, "learning_rate": 9.178280029492154e-06, "loss": 16.963, "step": 11464 }, { "epoch": 0.20957098725940007, "grad_norm": 5.560412811342642, "learning_rate": 9.178117436895731e-06, "loss": 17.177, "step": 11465 }, { "epoch": 0.2095892664558466, "grad_norm": 6.115751298206976, "learning_rate": 9.177954829655367e-06, "loss": 17.423, "step": 11466 }, { "epoch": 0.20960754565229311, "grad_norm": 5.775824499667194, "learning_rate": 9.177792207771632e-06, "loss": 17.2843, "step": 11467 }, { "epoch": 0.20962582484873965, "grad_norm": 6.633139118448926, "learning_rate": 9.177629571245099e-06, "loss": 17.6942, "step": 11468 }, { "epoch": 0.20964410404518619, "grad_norm": 6.442523114555858, "learning_rate": 9.177466920076337e-06, "loss": 17.6104, "step": 11469 }, { "epoch": 0.2096623832416327, "grad_norm": 6.9251317768575955, "learning_rate": 9.177304254265918e-06, "loss": 17.7068, "step": 11470 }, { "epoch": 0.20968066243807923, "grad_norm": 7.398398019701001, "learning_rate": 9.177141573814407e-06, "loss": 17.8246, "step": 11471 }, { "epoch": 0.20969894163452574, "grad_norm": 6.424344074639616, "learning_rate": 9.176978878722378e-06, "loss": 17.4117, "step": 11472 }, { "epoch": 0.20971722083097227, "grad_norm": 6.306753861980729, "learning_rate": 9.176816168990402e-06, "loss": 17.2655, "step": 11473 }, { "epoch": 0.20973550002741878, "grad_norm": 6.672184053816812, "learning_rate": 9.176653444619045e-06, "loss": 17.54, "step": 11474 }, { "epoch": 0.20975377922386532, "grad_norm": 5.9405160859744655, "learning_rate": 9.176490705608883e-06, "loss": 17.3811, "step": 11475 }, { "epoch": 0.20977205842031185, "grad_norm": 6.3595404036329795, "learning_rate": 9.176327951960481e-06, "loss": 17.5449, "step": 11476 }, { "epoch": 0.20979033761675836, "grad_norm": 7.935320178925701, "learning_rate": 9.176165183674415e-06, "loss": 18.1486, "step": 11477 }, { "epoch": 0.2098086168132049, "grad_norm": 7.020614202529794, "learning_rate": 9.17600240075125e-06, "loss": 17.6709, "step": 11478 }, { "epoch": 0.2098268960096514, "grad_norm": 6.801796338808055, "learning_rate": 9.175839603191562e-06, "loss": 17.6184, "step": 11479 }, { "epoch": 0.20984517520609794, "grad_norm": 7.41898396181858, "learning_rate": 9.175676790995914e-06, "loss": 18.0923, "step": 11480 }, { "epoch": 0.20986345440254447, "grad_norm": 7.078454204215108, "learning_rate": 9.175513964164884e-06, "loss": 17.5183, "step": 11481 }, { "epoch": 0.20988173359899098, "grad_norm": 7.246881122848099, "learning_rate": 9.175351122699039e-06, "loss": 17.873, "step": 11482 }, { "epoch": 0.20990001279543752, "grad_norm": 7.2882921527209765, "learning_rate": 9.175188266598952e-06, "loss": 18.4119, "step": 11483 }, { "epoch": 0.20991829199188403, "grad_norm": 5.771480647417177, "learning_rate": 9.175025395865191e-06, "loss": 17.2252, "step": 11484 }, { "epoch": 0.20993657118833056, "grad_norm": 5.6758103847942705, "learning_rate": 9.174862510498328e-06, "loss": 17.0851, "step": 11485 }, { "epoch": 0.2099548503847771, "grad_norm": 7.267595105528622, "learning_rate": 9.174699610498935e-06, "loss": 17.7672, "step": 11486 }, { "epoch": 0.2099731295812236, "grad_norm": 7.854747483556572, "learning_rate": 9.174536695867582e-06, "loss": 17.9662, "step": 11487 }, { "epoch": 0.20999140877767014, "grad_norm": 6.087095273285454, "learning_rate": 9.17437376660484e-06, "loss": 17.4715, "step": 11488 }, { "epoch": 0.21000968797411665, "grad_norm": 9.184815723203007, "learning_rate": 9.17421082271128e-06, "loss": 18.5931, "step": 11489 }, { "epoch": 0.21002796717056318, "grad_norm": 6.876450065885871, "learning_rate": 9.174047864187474e-06, "loss": 17.8128, "step": 11490 }, { "epoch": 0.2100462463670097, "grad_norm": 7.182804026312148, "learning_rate": 9.173884891033991e-06, "loss": 18.0019, "step": 11491 }, { "epoch": 0.21006452556345623, "grad_norm": 7.132975071055591, "learning_rate": 9.173721903251406e-06, "loss": 17.7768, "step": 11492 }, { "epoch": 0.21008280475990276, "grad_norm": 7.024857048951854, "learning_rate": 9.173558900840286e-06, "loss": 17.8144, "step": 11493 }, { "epoch": 0.21010108395634927, "grad_norm": 7.681145450321271, "learning_rate": 9.173395883801205e-06, "loss": 17.9513, "step": 11494 }, { "epoch": 0.2101193631527958, "grad_norm": 6.469819414030036, "learning_rate": 9.173232852134733e-06, "loss": 17.4744, "step": 11495 }, { "epoch": 0.21013764234924232, "grad_norm": 6.146947289438684, "learning_rate": 9.173069805841442e-06, "loss": 17.3704, "step": 11496 }, { "epoch": 0.21015592154568885, "grad_norm": 6.551915270906452, "learning_rate": 9.172906744921904e-06, "loss": 17.3509, "step": 11497 }, { "epoch": 0.2101742007421354, "grad_norm": 6.192812102346293, "learning_rate": 9.172743669376691e-06, "loss": 17.6447, "step": 11498 }, { "epoch": 0.2101924799385819, "grad_norm": 6.415154541814359, "learning_rate": 9.172580579206372e-06, "loss": 17.3324, "step": 11499 }, { "epoch": 0.21021075913502843, "grad_norm": 6.722914924792808, "learning_rate": 9.172417474411521e-06, "loss": 17.6114, "step": 11500 }, { "epoch": 0.21022903833147494, "grad_norm": 8.548949703373932, "learning_rate": 9.172254354992707e-06, "loss": 18.1477, "step": 11501 }, { "epoch": 0.21024731752792147, "grad_norm": 5.861101349220818, "learning_rate": 9.172091220950505e-06, "loss": 17.2611, "step": 11502 }, { "epoch": 0.210265596724368, "grad_norm": 5.905902955464844, "learning_rate": 9.171928072285486e-06, "loss": 17.3396, "step": 11503 }, { "epoch": 0.21028387592081452, "grad_norm": 5.915945906752697, "learning_rate": 9.17176490899822e-06, "loss": 17.4707, "step": 11504 }, { "epoch": 0.21030215511726105, "grad_norm": 6.276079061196319, "learning_rate": 9.17160173108928e-06, "loss": 17.4897, "step": 11505 }, { "epoch": 0.21032043431370756, "grad_norm": 6.4612495913653625, "learning_rate": 9.171438538559239e-06, "loss": 17.8194, "step": 11506 }, { "epoch": 0.2103387135101541, "grad_norm": 6.872186108418846, "learning_rate": 9.171275331408667e-06, "loss": 17.6669, "step": 11507 }, { "epoch": 0.2103569927066006, "grad_norm": 5.683670818575571, "learning_rate": 9.171112109638136e-06, "loss": 17.1627, "step": 11508 }, { "epoch": 0.21037527190304714, "grad_norm": 6.323289335138741, "learning_rate": 9.17094887324822e-06, "loss": 17.5417, "step": 11509 }, { "epoch": 0.21039355109949368, "grad_norm": 5.994572405306893, "learning_rate": 9.170785622239491e-06, "loss": 17.4275, "step": 11510 }, { "epoch": 0.21041183029594018, "grad_norm": 6.553108039899262, "learning_rate": 9.17062235661252e-06, "loss": 17.749, "step": 11511 }, { "epoch": 0.21043010949238672, "grad_norm": 6.268290066568144, "learning_rate": 9.170459076367878e-06, "loss": 17.5229, "step": 11512 }, { "epoch": 0.21044838868883323, "grad_norm": 6.242131561088442, "learning_rate": 9.17029578150614e-06, "loss": 17.6085, "step": 11513 }, { "epoch": 0.21046666788527976, "grad_norm": 6.839671989520956, "learning_rate": 9.170132472027876e-06, "loss": 17.7471, "step": 11514 }, { "epoch": 0.2104849470817263, "grad_norm": 6.299417662322813, "learning_rate": 9.169969147933661e-06, "loss": 17.3098, "step": 11515 }, { "epoch": 0.2105032262781728, "grad_norm": 6.664050719356811, "learning_rate": 9.169805809224067e-06, "loss": 17.5669, "step": 11516 }, { "epoch": 0.21052150547461934, "grad_norm": 6.881472460367836, "learning_rate": 9.169642455899664e-06, "loss": 17.6966, "step": 11517 }, { "epoch": 0.21053978467106585, "grad_norm": 7.650325287111541, "learning_rate": 9.169479087961026e-06, "loss": 17.976, "step": 11518 }, { "epoch": 0.21055806386751239, "grad_norm": 6.622241294044782, "learning_rate": 9.169315705408725e-06, "loss": 17.5739, "step": 11519 }, { "epoch": 0.21057634306395892, "grad_norm": 6.596926006218519, "learning_rate": 9.169152308243334e-06, "loss": 17.704, "step": 11520 }, { "epoch": 0.21059462226040543, "grad_norm": 6.179397638603914, "learning_rate": 9.168988896465428e-06, "loss": 17.3994, "step": 11521 }, { "epoch": 0.21061290145685196, "grad_norm": 7.3885773820083935, "learning_rate": 9.168825470075575e-06, "loss": 18.2278, "step": 11522 }, { "epoch": 0.21063118065329847, "grad_norm": 7.24958690158779, "learning_rate": 9.168662029074354e-06, "loss": 17.681, "step": 11523 }, { "epoch": 0.210649459849745, "grad_norm": 7.721728704817233, "learning_rate": 9.168498573462332e-06, "loss": 17.9807, "step": 11524 }, { "epoch": 0.21066773904619152, "grad_norm": 8.601277694577712, "learning_rate": 9.168335103240085e-06, "loss": 17.554, "step": 11525 }, { "epoch": 0.21068601824263805, "grad_norm": 7.818730322223752, "learning_rate": 9.168171618408184e-06, "loss": 17.7242, "step": 11526 }, { "epoch": 0.2107042974390846, "grad_norm": 6.578160810947474, "learning_rate": 9.168008118967205e-06, "loss": 17.8085, "step": 11527 }, { "epoch": 0.2107225766355311, "grad_norm": 7.813451617036049, "learning_rate": 9.167844604917718e-06, "loss": 17.8416, "step": 11528 }, { "epoch": 0.21074085583197763, "grad_norm": 7.586779397627988, "learning_rate": 9.167681076260296e-06, "loss": 18.3751, "step": 11529 }, { "epoch": 0.21075913502842414, "grad_norm": 7.190603490535477, "learning_rate": 9.167517532995516e-06, "loss": 18.267, "step": 11530 }, { "epoch": 0.21077741422487067, "grad_norm": 6.1094351684151835, "learning_rate": 9.167353975123947e-06, "loss": 17.2526, "step": 11531 }, { "epoch": 0.2107956934213172, "grad_norm": 5.802959557639488, "learning_rate": 9.167190402646165e-06, "loss": 17.4219, "step": 11532 }, { "epoch": 0.21081397261776372, "grad_norm": 7.848723997652496, "learning_rate": 9.167026815562742e-06, "loss": 17.758, "step": 11533 }, { "epoch": 0.21083225181421025, "grad_norm": 6.219336050238912, "learning_rate": 9.166863213874251e-06, "loss": 17.2805, "step": 11534 }, { "epoch": 0.21085053101065676, "grad_norm": 6.420279170239606, "learning_rate": 9.166699597581267e-06, "loss": 17.3998, "step": 11535 }, { "epoch": 0.2108688102071033, "grad_norm": 6.206917950445866, "learning_rate": 9.166535966684362e-06, "loss": 17.4474, "step": 11536 }, { "epoch": 0.21088708940354983, "grad_norm": 7.389121348819259, "learning_rate": 9.16637232118411e-06, "loss": 17.8443, "step": 11537 }, { "epoch": 0.21090536859999634, "grad_norm": 7.116199840483028, "learning_rate": 9.166208661081084e-06, "loss": 17.713, "step": 11538 }, { "epoch": 0.21092364779644288, "grad_norm": 6.67231809526451, "learning_rate": 9.16604498637586e-06, "loss": 17.4988, "step": 11539 }, { "epoch": 0.21094192699288938, "grad_norm": 7.694803892212349, "learning_rate": 9.165881297069008e-06, "loss": 18.0834, "step": 11540 }, { "epoch": 0.21096020618933592, "grad_norm": 6.290457034681836, "learning_rate": 9.165717593161104e-06, "loss": 17.6581, "step": 11541 }, { "epoch": 0.21097848538578243, "grad_norm": 6.212094558893326, "learning_rate": 9.16555387465272e-06, "loss": 17.4364, "step": 11542 }, { "epoch": 0.21099676458222896, "grad_norm": 6.628723132600602, "learning_rate": 9.165390141544433e-06, "loss": 17.5085, "step": 11543 }, { "epoch": 0.2110150437786755, "grad_norm": 6.358862691567192, "learning_rate": 9.165226393836815e-06, "loss": 17.3477, "step": 11544 }, { "epoch": 0.211033322975122, "grad_norm": 6.740305082496149, "learning_rate": 9.16506263153044e-06, "loss": 17.9455, "step": 11545 }, { "epoch": 0.21105160217156854, "grad_norm": 6.26308925039984, "learning_rate": 9.164898854625882e-06, "loss": 17.5103, "step": 11546 }, { "epoch": 0.21106988136801505, "grad_norm": 7.128480082817722, "learning_rate": 9.164735063123714e-06, "loss": 17.646, "step": 11547 }, { "epoch": 0.21108816056446159, "grad_norm": 6.4775515543540605, "learning_rate": 9.16457125702451e-06, "loss": 17.6652, "step": 11548 }, { "epoch": 0.21110643976090812, "grad_norm": 6.380750658999174, "learning_rate": 9.164407436328845e-06, "loss": 17.6208, "step": 11549 }, { "epoch": 0.21112471895735463, "grad_norm": 6.888637801448464, "learning_rate": 9.164243601037297e-06, "loss": 17.5228, "step": 11550 }, { "epoch": 0.21114299815380116, "grad_norm": 5.799070743378431, "learning_rate": 9.164079751150434e-06, "loss": 17.1002, "step": 11551 }, { "epoch": 0.21116127735024767, "grad_norm": 6.427275669896442, "learning_rate": 9.163915886668832e-06, "loss": 17.6247, "step": 11552 }, { "epoch": 0.2111795565466942, "grad_norm": 6.910129880790046, "learning_rate": 9.163752007593068e-06, "loss": 17.7014, "step": 11553 }, { "epoch": 0.21119783574314074, "grad_norm": 6.334479730762166, "learning_rate": 9.163588113923714e-06, "loss": 17.4241, "step": 11554 }, { "epoch": 0.21121611493958725, "grad_norm": 10.11538820518104, "learning_rate": 9.163424205661345e-06, "loss": 18.7674, "step": 11555 }, { "epoch": 0.2112343941360338, "grad_norm": 5.632433495283178, "learning_rate": 9.163260282806533e-06, "loss": 17.0817, "step": 11556 }, { "epoch": 0.2112526733324803, "grad_norm": 7.162886238789814, "learning_rate": 9.163096345359859e-06, "loss": 17.8033, "step": 11557 }, { "epoch": 0.21127095252892683, "grad_norm": 6.708746524379571, "learning_rate": 9.162932393321891e-06, "loss": 17.5362, "step": 11558 }, { "epoch": 0.21128923172537334, "grad_norm": 7.855253920799346, "learning_rate": 9.162768426693207e-06, "loss": 18.2244, "step": 11559 }, { "epoch": 0.21130751092181987, "grad_norm": 7.777970909711207, "learning_rate": 9.162604445474382e-06, "loss": 18.2098, "step": 11560 }, { "epoch": 0.2113257901182664, "grad_norm": 5.156482272524668, "learning_rate": 9.162440449665988e-06, "loss": 16.9433, "step": 11561 }, { "epoch": 0.21134406931471292, "grad_norm": 6.151643993226001, "learning_rate": 9.162276439268601e-06, "loss": 17.3725, "step": 11562 }, { "epoch": 0.21136234851115945, "grad_norm": 6.370579279810969, "learning_rate": 9.162112414282797e-06, "loss": 17.4643, "step": 11563 }, { "epoch": 0.21138062770760596, "grad_norm": 8.356616226837273, "learning_rate": 9.16194837470915e-06, "loss": 18.4377, "step": 11564 }, { "epoch": 0.2113989069040525, "grad_norm": 6.467923689781751, "learning_rate": 9.161784320548237e-06, "loss": 17.3117, "step": 11565 }, { "epoch": 0.21141718610049903, "grad_norm": 9.517975928212337, "learning_rate": 9.161620251800629e-06, "loss": 18.5935, "step": 11566 }, { "epoch": 0.21143546529694554, "grad_norm": 6.024618051987733, "learning_rate": 9.161456168466905e-06, "loss": 17.2635, "step": 11567 }, { "epoch": 0.21145374449339208, "grad_norm": 6.240698126621197, "learning_rate": 9.161292070547636e-06, "loss": 17.4433, "step": 11568 }, { "epoch": 0.21147202368983858, "grad_norm": 6.112310765398839, "learning_rate": 9.1611279580434e-06, "loss": 17.5563, "step": 11569 }, { "epoch": 0.21149030288628512, "grad_norm": 6.5602463956907595, "learning_rate": 9.160963830954772e-06, "loss": 17.8943, "step": 11570 }, { "epoch": 0.21150858208273166, "grad_norm": 6.263790052624193, "learning_rate": 9.160799689282327e-06, "loss": 17.509, "step": 11571 }, { "epoch": 0.21152686127917816, "grad_norm": 7.064124643991461, "learning_rate": 9.16063553302664e-06, "loss": 17.9556, "step": 11572 }, { "epoch": 0.2115451404756247, "grad_norm": 7.44979062916576, "learning_rate": 9.160471362188285e-06, "loss": 17.534, "step": 11573 }, { "epoch": 0.2115634196720712, "grad_norm": 7.0510195637169595, "learning_rate": 9.160307176767841e-06, "loss": 17.726, "step": 11574 }, { "epoch": 0.21158169886851774, "grad_norm": 5.517175463698974, "learning_rate": 9.16014297676588e-06, "loss": 17.1502, "step": 11575 }, { "epoch": 0.21159997806496425, "grad_norm": 7.138670414777385, "learning_rate": 9.159978762182979e-06, "loss": 17.9271, "step": 11576 }, { "epoch": 0.21161825726141079, "grad_norm": 6.403940223475687, "learning_rate": 9.159814533019716e-06, "loss": 17.4957, "step": 11577 }, { "epoch": 0.21163653645785732, "grad_norm": 6.7509313342979205, "learning_rate": 9.15965028927666e-06, "loss": 17.6066, "step": 11578 }, { "epoch": 0.21165481565430383, "grad_norm": 6.171380155539927, "learning_rate": 9.159486030954395e-06, "loss": 17.4828, "step": 11579 }, { "epoch": 0.21167309485075037, "grad_norm": 6.342175557557343, "learning_rate": 9.15932175805349e-06, "loss": 17.5831, "step": 11580 }, { "epoch": 0.21169137404719687, "grad_norm": 6.400579261216163, "learning_rate": 9.159157470574522e-06, "loss": 17.6458, "step": 11581 }, { "epoch": 0.2117096532436434, "grad_norm": 6.927875169843063, "learning_rate": 9.15899316851807e-06, "loss": 17.5782, "step": 11582 }, { "epoch": 0.21172793244008994, "grad_norm": 6.849203963747864, "learning_rate": 9.158828851884707e-06, "loss": 17.6371, "step": 11583 }, { "epoch": 0.21174621163653645, "grad_norm": 7.280309477198249, "learning_rate": 9.158664520675009e-06, "loss": 17.5368, "step": 11584 }, { "epoch": 0.211764490832983, "grad_norm": 7.687530189637583, "learning_rate": 9.158500174889553e-06, "loss": 17.8132, "step": 11585 }, { "epoch": 0.2117827700294295, "grad_norm": 7.155417598881707, "learning_rate": 9.158335814528916e-06, "loss": 17.8905, "step": 11586 }, { "epoch": 0.21180104922587603, "grad_norm": 6.738417741555908, "learning_rate": 9.158171439593671e-06, "loss": 17.4718, "step": 11587 }, { "epoch": 0.21181932842232257, "grad_norm": 7.578109723057844, "learning_rate": 9.1580070500844e-06, "loss": 17.9955, "step": 11588 }, { "epoch": 0.21183760761876907, "grad_norm": 6.373369435944889, "learning_rate": 9.15784264600167e-06, "loss": 17.2122, "step": 11589 }, { "epoch": 0.2118558868152156, "grad_norm": 8.153928745446654, "learning_rate": 9.157678227346066e-06, "loss": 17.6803, "step": 11590 }, { "epoch": 0.21187416601166212, "grad_norm": 7.7531314636113215, "learning_rate": 9.157513794118158e-06, "loss": 17.7606, "step": 11591 }, { "epoch": 0.21189244520810865, "grad_norm": 8.286553170015672, "learning_rate": 9.157349346318525e-06, "loss": 17.923, "step": 11592 }, { "epoch": 0.21191072440455516, "grad_norm": 6.890711489229295, "learning_rate": 9.157184883947745e-06, "loss": 17.7812, "step": 11593 }, { "epoch": 0.2119290036010017, "grad_norm": 8.536433843597331, "learning_rate": 9.157020407006392e-06, "loss": 17.5298, "step": 11594 }, { "epoch": 0.21194728279744823, "grad_norm": 8.666063839961629, "learning_rate": 9.156855915495043e-06, "loss": 18.1918, "step": 11595 }, { "epoch": 0.21196556199389474, "grad_norm": 6.250026801355732, "learning_rate": 9.156691409414275e-06, "loss": 17.3656, "step": 11596 }, { "epoch": 0.21198384119034128, "grad_norm": 6.820324698686371, "learning_rate": 9.156526888764664e-06, "loss": 18.0642, "step": 11597 }, { "epoch": 0.21200212038678778, "grad_norm": 5.655656072320394, "learning_rate": 9.156362353546789e-06, "loss": 17.1208, "step": 11598 }, { "epoch": 0.21202039958323432, "grad_norm": 7.6187923804479265, "learning_rate": 9.156197803761223e-06, "loss": 18.0396, "step": 11599 }, { "epoch": 0.21203867877968086, "grad_norm": 5.836772120744281, "learning_rate": 9.156033239408544e-06, "loss": 17.3792, "step": 11600 }, { "epoch": 0.21205695797612736, "grad_norm": 6.5679101584275825, "learning_rate": 9.155868660489327e-06, "loss": 17.4726, "step": 11601 }, { "epoch": 0.2120752371725739, "grad_norm": 6.321499740706384, "learning_rate": 9.155704067004154e-06, "loss": 17.3891, "step": 11602 }, { "epoch": 0.2120935163690204, "grad_norm": 6.889131606502487, "learning_rate": 9.155539458953597e-06, "loss": 17.5789, "step": 11603 }, { "epoch": 0.21211179556546694, "grad_norm": 6.89687307770892, "learning_rate": 9.155374836338237e-06, "loss": 17.4977, "step": 11604 }, { "epoch": 0.21213007476191348, "grad_norm": 5.848217535319225, "learning_rate": 9.155210199158648e-06, "loss": 17.0581, "step": 11605 }, { "epoch": 0.21214835395836, "grad_norm": 6.740320780209365, "learning_rate": 9.155045547415408e-06, "loss": 17.4307, "step": 11606 }, { "epoch": 0.21216663315480652, "grad_norm": 8.038571701517087, "learning_rate": 9.154880881109093e-06, "loss": 18.2448, "step": 11607 }, { "epoch": 0.21218491235125303, "grad_norm": 6.480469147176548, "learning_rate": 9.15471620024028e-06, "loss": 17.4355, "step": 11608 }, { "epoch": 0.21220319154769957, "grad_norm": 6.531460315315049, "learning_rate": 9.15455150480955e-06, "loss": 17.3958, "step": 11609 }, { "epoch": 0.21222147074414607, "grad_norm": 7.304185423649102, "learning_rate": 9.154386794817476e-06, "loss": 18.0237, "step": 11610 }, { "epoch": 0.2122397499405926, "grad_norm": 6.429840559018353, "learning_rate": 9.154222070264637e-06, "loss": 17.3974, "step": 11611 }, { "epoch": 0.21225802913703914, "grad_norm": 5.9068517909284965, "learning_rate": 9.154057331151612e-06, "loss": 17.2643, "step": 11612 }, { "epoch": 0.21227630833348565, "grad_norm": 6.46069971775121, "learning_rate": 9.153892577478973e-06, "loss": 17.4117, "step": 11613 }, { "epoch": 0.2122945875299322, "grad_norm": 7.125258530227882, "learning_rate": 9.153727809247303e-06, "loss": 17.6316, "step": 11614 }, { "epoch": 0.2123128667263787, "grad_norm": 6.122252316473095, "learning_rate": 9.153563026457178e-06, "loss": 17.4086, "step": 11615 }, { "epoch": 0.21233114592282523, "grad_norm": 6.444666365891119, "learning_rate": 9.153398229109174e-06, "loss": 17.4249, "step": 11616 }, { "epoch": 0.21234942511927177, "grad_norm": 7.493875148419845, "learning_rate": 9.15323341720387e-06, "loss": 17.8383, "step": 11617 }, { "epoch": 0.21236770431571828, "grad_norm": 8.095743451259024, "learning_rate": 9.153068590741843e-06, "loss": 18.0685, "step": 11618 }, { "epoch": 0.2123859835121648, "grad_norm": 7.440249340564604, "learning_rate": 9.152903749723671e-06, "loss": 18.1068, "step": 11619 }, { "epoch": 0.21240426270861132, "grad_norm": 6.3605355322271295, "learning_rate": 9.152738894149931e-06, "loss": 17.5995, "step": 11620 }, { "epoch": 0.21242254190505785, "grad_norm": 5.294877973158245, "learning_rate": 9.152574024021202e-06, "loss": 17.0195, "step": 11621 }, { "epoch": 0.2124408211015044, "grad_norm": 7.75287908114004, "learning_rate": 9.152409139338062e-06, "loss": 18.4059, "step": 11622 }, { "epoch": 0.2124591002979509, "grad_norm": 5.93776794059065, "learning_rate": 9.152244240101088e-06, "loss": 17.2836, "step": 11623 }, { "epoch": 0.21247737949439743, "grad_norm": 8.964291260472157, "learning_rate": 9.152079326310858e-06, "loss": 18.1602, "step": 11624 }, { "epoch": 0.21249565869084394, "grad_norm": 5.577234267455074, "learning_rate": 9.151914397967952e-06, "loss": 17.0911, "step": 11625 }, { "epoch": 0.21251393788729048, "grad_norm": 5.771379750917453, "learning_rate": 9.151749455072945e-06, "loss": 17.3774, "step": 11626 }, { "epoch": 0.21253221708373699, "grad_norm": 6.673806956891338, "learning_rate": 9.151584497626415e-06, "loss": 17.5897, "step": 11627 }, { "epoch": 0.21255049628018352, "grad_norm": 6.5728951995585945, "learning_rate": 9.151419525628942e-06, "loss": 17.6813, "step": 11628 }, { "epoch": 0.21256877547663006, "grad_norm": 6.38576838913584, "learning_rate": 9.151254539081106e-06, "loss": 17.3931, "step": 11629 }, { "epoch": 0.21258705467307656, "grad_norm": 6.229414042846477, "learning_rate": 9.151089537983482e-06, "loss": 17.3624, "step": 11630 }, { "epoch": 0.2126053338695231, "grad_norm": 7.222747357571975, "learning_rate": 9.150924522336648e-06, "loss": 17.5589, "step": 11631 }, { "epoch": 0.2126236130659696, "grad_norm": 6.316618315287245, "learning_rate": 9.150759492141186e-06, "loss": 17.6173, "step": 11632 }, { "epoch": 0.21264189226241614, "grad_norm": 6.707411291395412, "learning_rate": 9.15059444739767e-06, "loss": 17.5739, "step": 11633 }, { "epoch": 0.21266017145886268, "grad_norm": 7.791898302407172, "learning_rate": 9.150429388106684e-06, "loss": 17.9749, "step": 11634 }, { "epoch": 0.2126784506553092, "grad_norm": 7.6586665707499195, "learning_rate": 9.1502643142688e-06, "loss": 17.9408, "step": 11635 }, { "epoch": 0.21269672985175572, "grad_norm": 7.394545431095758, "learning_rate": 9.1500992258846e-06, "loss": 17.6004, "step": 11636 }, { "epoch": 0.21271500904820223, "grad_norm": 7.765655019870423, "learning_rate": 9.149934122954662e-06, "loss": 18.0874, "step": 11637 }, { "epoch": 0.21273328824464877, "grad_norm": 5.704399797239602, "learning_rate": 9.149769005479568e-06, "loss": 16.9736, "step": 11638 }, { "epoch": 0.2127515674410953, "grad_norm": 7.2538294596919854, "learning_rate": 9.14960387345989e-06, "loss": 18.0825, "step": 11639 }, { "epoch": 0.2127698466375418, "grad_norm": 7.820846597900338, "learning_rate": 9.149438726896213e-06, "loss": 17.9034, "step": 11640 }, { "epoch": 0.21278812583398835, "grad_norm": 7.4306590408094, "learning_rate": 9.149273565789112e-06, "loss": 17.8273, "step": 11641 }, { "epoch": 0.21280640503043485, "grad_norm": 7.559820598413737, "learning_rate": 9.149108390139168e-06, "loss": 18.0741, "step": 11642 }, { "epoch": 0.2128246842268814, "grad_norm": 6.603188208185257, "learning_rate": 9.148943199946958e-06, "loss": 17.4214, "step": 11643 }, { "epoch": 0.2128429634233279, "grad_norm": 6.026696245161655, "learning_rate": 9.148777995213062e-06, "loss": 17.2833, "step": 11644 }, { "epoch": 0.21286124261977443, "grad_norm": 7.963908018718288, "learning_rate": 9.148612775938062e-06, "loss": 18.3751, "step": 11645 }, { "epoch": 0.21287952181622097, "grad_norm": 7.869206912743326, "learning_rate": 9.148447542122532e-06, "loss": 18.0016, "step": 11646 }, { "epoch": 0.21289780101266748, "grad_norm": 6.242017443935883, "learning_rate": 9.148282293767053e-06, "loss": 17.4454, "step": 11647 }, { "epoch": 0.212916080209114, "grad_norm": 7.435901395097871, "learning_rate": 9.148117030872206e-06, "loss": 17.8332, "step": 11648 }, { "epoch": 0.21293435940556052, "grad_norm": 7.250999550074894, "learning_rate": 9.147951753438568e-06, "loss": 17.8396, "step": 11649 }, { "epoch": 0.21295263860200705, "grad_norm": 6.9846991067583355, "learning_rate": 9.147786461466718e-06, "loss": 18.0227, "step": 11650 }, { "epoch": 0.2129709177984536, "grad_norm": 6.5260356530520305, "learning_rate": 9.147621154957238e-06, "loss": 17.4412, "step": 11651 }, { "epoch": 0.2129891969949001, "grad_norm": 7.501193901397891, "learning_rate": 9.147455833910706e-06, "loss": 18.01, "step": 11652 }, { "epoch": 0.21300747619134663, "grad_norm": 6.063653386051944, "learning_rate": 9.147290498327698e-06, "loss": 17.2201, "step": 11653 }, { "epoch": 0.21302575538779314, "grad_norm": 6.530678628238025, "learning_rate": 9.1471251482088e-06, "loss": 17.4399, "step": 11654 }, { "epoch": 0.21304403458423968, "grad_norm": 6.143164244941838, "learning_rate": 9.146959783554587e-06, "loss": 17.2389, "step": 11655 }, { "epoch": 0.2130623137806862, "grad_norm": 7.801729789804826, "learning_rate": 9.14679440436564e-06, "loss": 18.2345, "step": 11656 }, { "epoch": 0.21308059297713272, "grad_norm": 7.16214036325532, "learning_rate": 9.14662901064254e-06, "loss": 17.6856, "step": 11657 }, { "epoch": 0.21309887217357926, "grad_norm": 6.371340189688993, "learning_rate": 9.146463602385863e-06, "loss": 17.192, "step": 11658 }, { "epoch": 0.21311715137002576, "grad_norm": 8.98845559325511, "learning_rate": 9.146298179596191e-06, "loss": 18.3197, "step": 11659 }, { "epoch": 0.2131354305664723, "grad_norm": 7.3263238137853826, "learning_rate": 9.146132742274106e-06, "loss": 18.0394, "step": 11660 }, { "epoch": 0.2131537097629188, "grad_norm": 5.462073782100659, "learning_rate": 9.145967290420184e-06, "loss": 17.2091, "step": 11661 }, { "epoch": 0.21317198895936534, "grad_norm": 6.746639516360985, "learning_rate": 9.145801824035006e-06, "loss": 17.8386, "step": 11662 }, { "epoch": 0.21319026815581188, "grad_norm": 7.6689912130221645, "learning_rate": 9.145636343119152e-06, "loss": 17.9464, "step": 11663 }, { "epoch": 0.2132085473522584, "grad_norm": 6.695748876445942, "learning_rate": 9.145470847673203e-06, "loss": 17.5456, "step": 11664 }, { "epoch": 0.21322682654870492, "grad_norm": 8.198516003864741, "learning_rate": 9.145305337697737e-06, "loss": 17.909, "step": 11665 }, { "epoch": 0.21324510574515143, "grad_norm": 6.908031273691143, "learning_rate": 9.145139813193337e-06, "loss": 17.966, "step": 11666 }, { "epoch": 0.21326338494159797, "grad_norm": 7.055918827022239, "learning_rate": 9.144974274160582e-06, "loss": 17.9561, "step": 11667 }, { "epoch": 0.2132816641380445, "grad_norm": 6.398251192858152, "learning_rate": 9.144808720600052e-06, "loss": 17.5263, "step": 11668 }, { "epoch": 0.213299943334491, "grad_norm": 6.898360543071194, "learning_rate": 9.144643152512326e-06, "loss": 17.321, "step": 11669 }, { "epoch": 0.21331822253093755, "grad_norm": 6.859210135796859, "learning_rate": 9.144477569897984e-06, "loss": 17.7609, "step": 11670 }, { "epoch": 0.21333650172738405, "grad_norm": 6.722237461387456, "learning_rate": 9.14431197275761e-06, "loss": 17.55, "step": 11671 }, { "epoch": 0.2133547809238306, "grad_norm": 5.435865481530694, "learning_rate": 9.144146361091784e-06, "loss": 17.2483, "step": 11672 }, { "epoch": 0.21337306012027712, "grad_norm": 6.935190425853276, "learning_rate": 9.143980734901082e-06, "loss": 17.8095, "step": 11673 }, { "epoch": 0.21339133931672363, "grad_norm": 6.078212964955392, "learning_rate": 9.143815094186088e-06, "loss": 17.3058, "step": 11674 }, { "epoch": 0.21340961851317017, "grad_norm": 6.8529284508095865, "learning_rate": 9.14364943894738e-06, "loss": 17.7405, "step": 11675 }, { "epoch": 0.21342789770961668, "grad_norm": 7.243324531782682, "learning_rate": 9.143483769185542e-06, "loss": 18.0304, "step": 11676 }, { "epoch": 0.2134461769060632, "grad_norm": 7.7500115768982765, "learning_rate": 9.143318084901152e-06, "loss": 17.9287, "step": 11677 }, { "epoch": 0.21346445610250972, "grad_norm": 6.009762398044453, "learning_rate": 9.143152386094791e-06, "loss": 17.3918, "step": 11678 }, { "epoch": 0.21348273529895626, "grad_norm": 7.524750726459312, "learning_rate": 9.142986672767042e-06, "loss": 17.9176, "step": 11679 }, { "epoch": 0.2135010144954028, "grad_norm": 6.387471572678087, "learning_rate": 9.142820944918485e-06, "loss": 17.4112, "step": 11680 }, { "epoch": 0.2135192936918493, "grad_norm": 7.499335328259966, "learning_rate": 9.142655202549698e-06, "loss": 17.9846, "step": 11681 }, { "epoch": 0.21353757288829583, "grad_norm": 9.125716072213985, "learning_rate": 9.142489445661265e-06, "loss": 17.9601, "step": 11682 }, { "epoch": 0.21355585208474234, "grad_norm": 8.12236431625984, "learning_rate": 9.142323674253766e-06, "loss": 18.2265, "step": 11683 }, { "epoch": 0.21357413128118888, "grad_norm": 7.314767327594051, "learning_rate": 9.142157888327781e-06, "loss": 17.8159, "step": 11684 }, { "epoch": 0.2135924104776354, "grad_norm": 6.091776107112946, "learning_rate": 9.141992087883893e-06, "loss": 17.414, "step": 11685 }, { "epoch": 0.21361068967408192, "grad_norm": 8.327713269654664, "learning_rate": 9.141826272922683e-06, "loss": 17.9633, "step": 11686 }, { "epoch": 0.21362896887052846, "grad_norm": 7.269109843841839, "learning_rate": 9.14166044344473e-06, "loss": 17.8597, "step": 11687 }, { "epoch": 0.21364724806697497, "grad_norm": 4.9721885361299805, "learning_rate": 9.141494599450615e-06, "loss": 16.8696, "step": 11688 }, { "epoch": 0.2136655272634215, "grad_norm": 7.090417751395468, "learning_rate": 9.141328740940922e-06, "loss": 17.8949, "step": 11689 }, { "epoch": 0.21368380645986804, "grad_norm": 7.035078818145415, "learning_rate": 9.141162867916234e-06, "loss": 17.4371, "step": 11690 }, { "epoch": 0.21370208565631454, "grad_norm": 16.43434679237421, "learning_rate": 9.140996980377126e-06, "loss": 18.5113, "step": 11691 }, { "epoch": 0.21372036485276108, "grad_norm": 6.384761996289143, "learning_rate": 9.140831078324183e-06, "loss": 17.2118, "step": 11692 }, { "epoch": 0.2137386440492076, "grad_norm": 4.832332939261827, "learning_rate": 9.140665161757988e-06, "loss": 16.8148, "step": 11693 }, { "epoch": 0.21375692324565412, "grad_norm": 5.436442815946285, "learning_rate": 9.14049923067912e-06, "loss": 17.0378, "step": 11694 }, { "epoch": 0.21377520244210063, "grad_norm": 6.427883132973813, "learning_rate": 9.140333285088162e-06, "loss": 17.2919, "step": 11695 }, { "epoch": 0.21379348163854717, "grad_norm": 6.633390866294447, "learning_rate": 9.140167324985695e-06, "loss": 17.6946, "step": 11696 }, { "epoch": 0.2138117608349937, "grad_norm": 6.974052542862472, "learning_rate": 9.140001350372302e-06, "loss": 17.7674, "step": 11697 }, { "epoch": 0.2138300400314402, "grad_norm": 7.825334790348214, "learning_rate": 9.139835361248563e-06, "loss": 18.2005, "step": 11698 }, { "epoch": 0.21384831922788675, "grad_norm": 6.478210075398582, "learning_rate": 9.139669357615059e-06, "loss": 17.6169, "step": 11699 }, { "epoch": 0.21386659842433325, "grad_norm": 5.855948759772832, "learning_rate": 9.139503339472375e-06, "loss": 17.3109, "step": 11700 }, { "epoch": 0.2138848776207798, "grad_norm": 7.681421099782268, "learning_rate": 9.139337306821089e-06, "loss": 18.3091, "step": 11701 }, { "epoch": 0.21390315681722633, "grad_norm": 7.687429016987093, "learning_rate": 9.139171259661785e-06, "loss": 18.2078, "step": 11702 }, { "epoch": 0.21392143601367283, "grad_norm": 7.724126260515264, "learning_rate": 9.139005197995046e-06, "loss": 18.0587, "step": 11703 }, { "epoch": 0.21393971521011937, "grad_norm": 7.107129911998939, "learning_rate": 9.138839121821454e-06, "loss": 17.5602, "step": 11704 }, { "epoch": 0.21395799440656588, "grad_norm": 7.221122836910635, "learning_rate": 9.138673031141587e-06, "loss": 17.8326, "step": 11705 }, { "epoch": 0.2139762736030124, "grad_norm": 7.428135615526704, "learning_rate": 9.138506925956032e-06, "loss": 17.6103, "step": 11706 }, { "epoch": 0.21399455279945895, "grad_norm": 6.715112095164756, "learning_rate": 9.13834080626537e-06, "loss": 17.5912, "step": 11707 }, { "epoch": 0.21401283199590546, "grad_norm": 6.911483038049616, "learning_rate": 9.138174672070181e-06, "loss": 17.9271, "step": 11708 }, { "epoch": 0.214031111192352, "grad_norm": 7.199424686812347, "learning_rate": 9.13800852337105e-06, "loss": 17.4627, "step": 11709 }, { "epoch": 0.2140493903887985, "grad_norm": 6.34042024283748, "learning_rate": 9.137842360168559e-06, "loss": 17.6055, "step": 11710 }, { "epoch": 0.21406766958524504, "grad_norm": 7.195130962687598, "learning_rate": 9.137676182463287e-06, "loss": 18.0304, "step": 11711 }, { "epoch": 0.21408594878169154, "grad_norm": 7.174069647519072, "learning_rate": 9.13750999025582e-06, "loss": 18.0179, "step": 11712 }, { "epoch": 0.21410422797813808, "grad_norm": 6.9921154121447895, "learning_rate": 9.137343783546741e-06, "loss": 17.8961, "step": 11713 }, { "epoch": 0.21412250717458461, "grad_norm": 6.625369188687903, "learning_rate": 9.13717756233663e-06, "loss": 17.7955, "step": 11714 }, { "epoch": 0.21414078637103112, "grad_norm": 6.851585685886345, "learning_rate": 9.137011326626071e-06, "loss": 17.5806, "step": 11715 }, { "epoch": 0.21415906556747766, "grad_norm": 5.764128584589972, "learning_rate": 9.136845076415645e-06, "loss": 17.2906, "step": 11716 }, { "epoch": 0.21417734476392417, "grad_norm": 6.615149485649565, "learning_rate": 9.136678811705937e-06, "loss": 17.7314, "step": 11717 }, { "epoch": 0.2141956239603707, "grad_norm": 7.0609122220256575, "learning_rate": 9.13651253249753e-06, "loss": 17.646, "step": 11718 }, { "epoch": 0.21421390315681724, "grad_norm": 6.169439045457185, "learning_rate": 9.136346238791004e-06, "loss": 17.2506, "step": 11719 }, { "epoch": 0.21423218235326374, "grad_norm": 6.770321955388486, "learning_rate": 9.136179930586944e-06, "loss": 17.7473, "step": 11720 }, { "epoch": 0.21425046154971028, "grad_norm": 6.334847259047038, "learning_rate": 9.136013607885931e-06, "loss": 17.3342, "step": 11721 }, { "epoch": 0.2142687407461568, "grad_norm": 5.40271471549218, "learning_rate": 9.135847270688548e-06, "loss": 17.1949, "step": 11722 }, { "epoch": 0.21428701994260332, "grad_norm": 5.540679582173256, "learning_rate": 9.135680918995382e-06, "loss": 17.0815, "step": 11723 }, { "epoch": 0.21430529913904986, "grad_norm": 6.869525382603854, "learning_rate": 9.135514552807014e-06, "loss": 17.702, "step": 11724 }, { "epoch": 0.21432357833549637, "grad_norm": 7.410156317625125, "learning_rate": 9.135348172124024e-06, "loss": 17.9923, "step": 11725 }, { "epoch": 0.2143418575319429, "grad_norm": 7.271365725918522, "learning_rate": 9.135181776946998e-06, "loss": 17.9684, "step": 11726 }, { "epoch": 0.2143601367283894, "grad_norm": 6.263625276598185, "learning_rate": 9.135015367276519e-06, "loss": 17.4419, "step": 11727 }, { "epoch": 0.21437841592483595, "grad_norm": 8.377459866777203, "learning_rate": 9.13484894311317e-06, "loss": 18.2825, "step": 11728 }, { "epoch": 0.21439669512128248, "grad_norm": 7.152149590163419, "learning_rate": 9.134682504457534e-06, "loss": 17.6428, "step": 11729 }, { "epoch": 0.214414974317729, "grad_norm": 6.015003874468464, "learning_rate": 9.134516051310196e-06, "loss": 17.3465, "step": 11730 }, { "epoch": 0.21443325351417553, "grad_norm": 6.631122011287288, "learning_rate": 9.134349583671738e-06, "loss": 17.4083, "step": 11731 }, { "epoch": 0.21445153271062203, "grad_norm": 7.006134388504954, "learning_rate": 9.134183101542742e-06, "loss": 18.0137, "step": 11732 }, { "epoch": 0.21446981190706857, "grad_norm": 7.5871653350385735, "learning_rate": 9.134016604923792e-06, "loss": 17.796, "step": 11733 }, { "epoch": 0.21448809110351508, "grad_norm": 5.97955367235346, "learning_rate": 9.133850093815474e-06, "loss": 17.3184, "step": 11734 }, { "epoch": 0.2145063702999616, "grad_norm": 8.284393843268226, "learning_rate": 9.13368356821837e-06, "loss": 18.2927, "step": 11735 }, { "epoch": 0.21452464949640815, "grad_norm": 8.14329714951248, "learning_rate": 9.133517028133066e-06, "loss": 17.8919, "step": 11736 }, { "epoch": 0.21454292869285466, "grad_norm": 6.615274421398922, "learning_rate": 9.13335047356014e-06, "loss": 17.3906, "step": 11737 }, { "epoch": 0.2145612078893012, "grad_norm": 6.764795377485563, "learning_rate": 9.13318390450018e-06, "loss": 17.5324, "step": 11738 }, { "epoch": 0.2145794870857477, "grad_norm": 6.6683910449413455, "learning_rate": 9.133017320953769e-06, "loss": 17.6568, "step": 11739 }, { "epoch": 0.21459776628219424, "grad_norm": 5.7621555547544006, "learning_rate": 9.132850722921494e-06, "loss": 17.3257, "step": 11740 }, { "epoch": 0.21461604547864077, "grad_norm": 6.748899472649107, "learning_rate": 9.132684110403934e-06, "loss": 17.5096, "step": 11741 }, { "epoch": 0.21463432467508728, "grad_norm": 6.898553539535939, "learning_rate": 9.132517483401673e-06, "loss": 17.6772, "step": 11742 }, { "epoch": 0.21465260387153381, "grad_norm": 7.127063150402214, "learning_rate": 9.132350841915299e-06, "loss": 18.1117, "step": 11743 }, { "epoch": 0.21467088306798032, "grad_norm": 6.748241952896581, "learning_rate": 9.132184185945392e-06, "loss": 17.6061, "step": 11744 }, { "epoch": 0.21468916226442686, "grad_norm": 6.273668380566096, "learning_rate": 9.132017515492539e-06, "loss": 17.5028, "step": 11745 }, { "epoch": 0.2147074414608734, "grad_norm": 5.616942758696618, "learning_rate": 9.131850830557323e-06, "loss": 16.9859, "step": 11746 }, { "epoch": 0.2147257206573199, "grad_norm": 6.6702528076925605, "learning_rate": 9.131684131140328e-06, "loss": 17.7597, "step": 11747 }, { "epoch": 0.21474399985376644, "grad_norm": 6.4314363441149895, "learning_rate": 9.13151741724214e-06, "loss": 17.4066, "step": 11748 }, { "epoch": 0.21476227905021295, "grad_norm": 6.254027235472205, "learning_rate": 9.131350688863341e-06, "loss": 17.4192, "step": 11749 }, { "epoch": 0.21478055824665948, "grad_norm": 6.399946667461864, "learning_rate": 9.131183946004515e-06, "loss": 17.7945, "step": 11750 }, { "epoch": 0.214798837443106, "grad_norm": 12.592891370944914, "learning_rate": 9.131017188666251e-06, "loss": 18.6017, "step": 11751 }, { "epoch": 0.21481711663955252, "grad_norm": 7.892271113563236, "learning_rate": 9.13085041684913e-06, "loss": 17.752, "step": 11752 }, { "epoch": 0.21483539583599906, "grad_norm": 6.360236938234864, "learning_rate": 9.130683630553734e-06, "loss": 17.7222, "step": 11753 }, { "epoch": 0.21485367503244557, "grad_norm": 6.957877398085484, "learning_rate": 9.130516829780652e-06, "loss": 17.6721, "step": 11754 }, { "epoch": 0.2148719542288921, "grad_norm": 5.36767478304127, "learning_rate": 9.130350014530465e-06, "loss": 17.0818, "step": 11755 }, { "epoch": 0.2148902334253386, "grad_norm": 6.7866213840917045, "learning_rate": 9.13018318480376e-06, "loss": 17.5193, "step": 11756 }, { "epoch": 0.21490851262178515, "grad_norm": 7.310114998337191, "learning_rate": 9.130016340601124e-06, "loss": 18.1721, "step": 11757 }, { "epoch": 0.21492679181823168, "grad_norm": 7.766754764704571, "learning_rate": 9.129849481923137e-06, "loss": 17.9885, "step": 11758 }, { "epoch": 0.2149450710146782, "grad_norm": 7.358159678313833, "learning_rate": 9.129682608770388e-06, "loss": 17.8406, "step": 11759 }, { "epoch": 0.21496335021112473, "grad_norm": 6.34846838623486, "learning_rate": 9.129515721143459e-06, "loss": 17.1203, "step": 11760 }, { "epoch": 0.21498162940757123, "grad_norm": 6.503362003029298, "learning_rate": 9.129348819042934e-06, "loss": 17.3981, "step": 11761 }, { "epoch": 0.21499990860401777, "grad_norm": 7.710973776237608, "learning_rate": 9.1291819024694e-06, "loss": 18.0849, "step": 11762 }, { "epoch": 0.2150181878004643, "grad_norm": 7.169148622837023, "learning_rate": 9.129014971423442e-06, "loss": 17.79, "step": 11763 }, { "epoch": 0.2150364669969108, "grad_norm": 6.419588401375401, "learning_rate": 9.128848025905645e-06, "loss": 17.4207, "step": 11764 }, { "epoch": 0.21505474619335735, "grad_norm": 6.879463926786173, "learning_rate": 9.128681065916596e-06, "loss": 17.5164, "step": 11765 }, { "epoch": 0.21507302538980386, "grad_norm": 6.559677967491284, "learning_rate": 9.128514091456876e-06, "loss": 17.3865, "step": 11766 }, { "epoch": 0.2150913045862504, "grad_norm": 6.290869658016945, "learning_rate": 9.128347102527072e-06, "loss": 17.1739, "step": 11767 }, { "epoch": 0.2151095837826969, "grad_norm": 7.5171335927678635, "learning_rate": 9.128180099127772e-06, "loss": 17.824, "step": 11768 }, { "epoch": 0.21512786297914344, "grad_norm": 6.275706202548021, "learning_rate": 9.128013081259557e-06, "loss": 17.4745, "step": 11769 }, { "epoch": 0.21514614217558997, "grad_norm": 7.312128322408952, "learning_rate": 9.127846048923015e-06, "loss": 17.5866, "step": 11770 }, { "epoch": 0.21516442137203648, "grad_norm": 6.570097448543907, "learning_rate": 9.127679002118731e-06, "loss": 17.8862, "step": 11771 }, { "epoch": 0.21518270056848302, "grad_norm": 6.802289447448312, "learning_rate": 9.12751194084729e-06, "loss": 17.5273, "step": 11772 }, { "epoch": 0.21520097976492952, "grad_norm": 7.7264833228498775, "learning_rate": 9.127344865109276e-06, "loss": 17.8344, "step": 11773 }, { "epoch": 0.21521925896137606, "grad_norm": 6.054914653447179, "learning_rate": 9.12717777490528e-06, "loss": 17.6735, "step": 11774 }, { "epoch": 0.2152375381578226, "grad_norm": 6.365035929911253, "learning_rate": 9.12701067023588e-06, "loss": 17.7055, "step": 11775 }, { "epoch": 0.2152558173542691, "grad_norm": 5.932996851989658, "learning_rate": 9.12684355110167e-06, "loss": 17.1325, "step": 11776 }, { "epoch": 0.21527409655071564, "grad_norm": 6.515564613167469, "learning_rate": 9.126676417503229e-06, "loss": 17.8026, "step": 11777 }, { "epoch": 0.21529237574716215, "grad_norm": 7.039026329151559, "learning_rate": 9.126509269441144e-06, "loss": 17.803, "step": 11778 }, { "epoch": 0.21531065494360868, "grad_norm": 7.11009534613514, "learning_rate": 9.126342106916005e-06, "loss": 17.6082, "step": 11779 }, { "epoch": 0.21532893414005522, "grad_norm": 6.431209903045549, "learning_rate": 9.126174929928394e-06, "loss": 17.5655, "step": 11780 }, { "epoch": 0.21534721333650172, "grad_norm": 6.794067942928916, "learning_rate": 9.126007738478897e-06, "loss": 17.7129, "step": 11781 }, { "epoch": 0.21536549253294826, "grad_norm": 6.129034430365924, "learning_rate": 9.1258405325681e-06, "loss": 17.2967, "step": 11782 }, { "epoch": 0.21538377172939477, "grad_norm": 6.228539361149761, "learning_rate": 9.125673312196592e-06, "loss": 17.4575, "step": 11783 }, { "epoch": 0.2154020509258413, "grad_norm": 6.686981454888693, "learning_rate": 9.125506077364958e-06, "loss": 17.8332, "step": 11784 }, { "epoch": 0.2154203301222878, "grad_norm": 5.786652186785163, "learning_rate": 9.125338828073781e-06, "loss": 17.1764, "step": 11785 }, { "epoch": 0.21543860931873435, "grad_norm": 8.721482698306923, "learning_rate": 9.125171564323649e-06, "loss": 18.1051, "step": 11786 }, { "epoch": 0.21545688851518088, "grad_norm": 5.932436499512886, "learning_rate": 9.12500428611515e-06, "loss": 17.0535, "step": 11787 }, { "epoch": 0.2154751677116274, "grad_norm": 6.780036753127285, "learning_rate": 9.124836993448868e-06, "loss": 17.8021, "step": 11788 }, { "epoch": 0.21549344690807393, "grad_norm": 6.002060187567544, "learning_rate": 9.12466968632539e-06, "loss": 17.2499, "step": 11789 }, { "epoch": 0.21551172610452043, "grad_norm": 6.589613600248243, "learning_rate": 9.124502364745305e-06, "loss": 17.5803, "step": 11790 }, { "epoch": 0.21553000530096697, "grad_norm": 5.867571287420147, "learning_rate": 9.124335028709197e-06, "loss": 17.2653, "step": 11791 }, { "epoch": 0.2155482844974135, "grad_norm": 7.073145531524499, "learning_rate": 9.12416767821765e-06, "loss": 18.1059, "step": 11792 }, { "epoch": 0.21556656369386, "grad_norm": 6.988284935897942, "learning_rate": 9.124000313271256e-06, "loss": 17.9629, "step": 11793 }, { "epoch": 0.21558484289030655, "grad_norm": 5.539287280895075, "learning_rate": 9.123832933870597e-06, "loss": 17.3149, "step": 11794 }, { "epoch": 0.21560312208675306, "grad_norm": 6.4895026181279984, "learning_rate": 9.123665540016262e-06, "loss": 17.5314, "step": 11795 }, { "epoch": 0.2156214012831996, "grad_norm": 7.069783717638051, "learning_rate": 9.123498131708837e-06, "loss": 17.6707, "step": 11796 }, { "epoch": 0.21563968047964613, "grad_norm": 6.540982541515931, "learning_rate": 9.123330708948908e-06, "loss": 17.6134, "step": 11797 }, { "epoch": 0.21565795967609264, "grad_norm": 7.211356462757298, "learning_rate": 9.123163271737063e-06, "loss": 17.7928, "step": 11798 }, { "epoch": 0.21567623887253917, "grad_norm": 5.38931003982935, "learning_rate": 9.12299582007389e-06, "loss": 17.2277, "step": 11799 }, { "epoch": 0.21569451806898568, "grad_norm": 6.84494750179637, "learning_rate": 9.122828353959971e-06, "loss": 17.9003, "step": 11800 }, { "epoch": 0.21571279726543222, "grad_norm": 6.745374541778459, "learning_rate": 9.1226608733959e-06, "loss": 17.5247, "step": 11801 }, { "epoch": 0.21573107646187872, "grad_norm": 5.554898939967108, "learning_rate": 9.122493378382259e-06, "loss": 17.0669, "step": 11802 }, { "epoch": 0.21574935565832526, "grad_norm": 6.301156541169049, "learning_rate": 9.122325868919637e-06, "loss": 17.3965, "step": 11803 }, { "epoch": 0.2157676348547718, "grad_norm": 6.630125518593899, "learning_rate": 9.12215834500862e-06, "loss": 17.8173, "step": 11804 }, { "epoch": 0.2157859140512183, "grad_norm": 6.10525941766438, "learning_rate": 9.121990806649795e-06, "loss": 17.5165, "step": 11805 }, { "epoch": 0.21580419324766484, "grad_norm": 6.8259056305339945, "learning_rate": 9.12182325384375e-06, "loss": 17.8236, "step": 11806 }, { "epoch": 0.21582247244411135, "grad_norm": 5.807572762701553, "learning_rate": 9.121655686591073e-06, "loss": 17.262, "step": 11807 }, { "epoch": 0.21584075164055788, "grad_norm": 5.953191464279736, "learning_rate": 9.121488104892352e-06, "loss": 17.3984, "step": 11808 }, { "epoch": 0.21585903083700442, "grad_norm": 5.750383837581517, "learning_rate": 9.121320508748171e-06, "loss": 17.3132, "step": 11809 }, { "epoch": 0.21587731003345093, "grad_norm": 6.541225734110672, "learning_rate": 9.121152898159118e-06, "loss": 17.7538, "step": 11810 }, { "epoch": 0.21589558922989746, "grad_norm": 8.125911642449207, "learning_rate": 9.120985273125784e-06, "loss": 18.2132, "step": 11811 }, { "epoch": 0.21591386842634397, "grad_norm": 7.2094951004451, "learning_rate": 9.120817633648753e-06, "loss": 17.8451, "step": 11812 }, { "epoch": 0.2159321476227905, "grad_norm": 5.94342052308106, "learning_rate": 9.120649979728615e-06, "loss": 17.1825, "step": 11813 }, { "epoch": 0.21595042681923704, "grad_norm": 5.519430438837936, "learning_rate": 9.120482311365955e-06, "loss": 17.0082, "step": 11814 }, { "epoch": 0.21596870601568355, "grad_norm": 7.840471022481249, "learning_rate": 9.120314628561362e-06, "loss": 18.1265, "step": 11815 }, { "epoch": 0.21598698521213008, "grad_norm": 6.227794689099335, "learning_rate": 9.120146931315424e-06, "loss": 17.2198, "step": 11816 }, { "epoch": 0.2160052644085766, "grad_norm": 6.449503747615225, "learning_rate": 9.11997921962873e-06, "loss": 17.7227, "step": 11817 }, { "epoch": 0.21602354360502313, "grad_norm": 6.598491712152966, "learning_rate": 9.119811493501865e-06, "loss": 17.4789, "step": 11818 }, { "epoch": 0.21604182280146964, "grad_norm": 7.658603780721176, "learning_rate": 9.11964375293542e-06, "loss": 17.9342, "step": 11819 }, { "epoch": 0.21606010199791617, "grad_norm": 6.813154337910873, "learning_rate": 9.11947599792998e-06, "loss": 17.698, "step": 11820 }, { "epoch": 0.2160783811943627, "grad_norm": 7.735146543824995, "learning_rate": 9.11930822848613e-06, "loss": 17.4835, "step": 11821 }, { "epoch": 0.21609666039080921, "grad_norm": 6.586523536038008, "learning_rate": 9.119140444604467e-06, "loss": 17.6442, "step": 11822 }, { "epoch": 0.21611493958725575, "grad_norm": 5.802005255457315, "learning_rate": 9.118972646285573e-06, "loss": 17.0101, "step": 11823 }, { "epoch": 0.21613321878370226, "grad_norm": 7.947883271402378, "learning_rate": 9.118804833530037e-06, "loss": 18.2263, "step": 11824 }, { "epoch": 0.2161514979801488, "grad_norm": 6.600167212655567, "learning_rate": 9.118637006338448e-06, "loss": 17.4165, "step": 11825 }, { "epoch": 0.21616977717659533, "grad_norm": 8.321030702889383, "learning_rate": 9.118469164711394e-06, "loss": 18.3109, "step": 11826 }, { "epoch": 0.21618805637304184, "grad_norm": 6.815631949701846, "learning_rate": 9.118301308649461e-06, "loss": 17.6911, "step": 11827 }, { "epoch": 0.21620633556948837, "grad_norm": 7.600373915365126, "learning_rate": 9.118133438153242e-06, "loss": 17.961, "step": 11828 }, { "epoch": 0.21622461476593488, "grad_norm": 7.772198325610063, "learning_rate": 9.11796555322332e-06, "loss": 18.0769, "step": 11829 }, { "epoch": 0.21624289396238142, "grad_norm": 6.9767876318635675, "learning_rate": 9.117797653860288e-06, "loss": 17.5627, "step": 11830 }, { "epoch": 0.21626117315882795, "grad_norm": 6.443769562013518, "learning_rate": 9.117629740064732e-06, "loss": 17.4205, "step": 11831 }, { "epoch": 0.21627945235527446, "grad_norm": 7.680971999628747, "learning_rate": 9.117461811837241e-06, "loss": 17.9219, "step": 11832 }, { "epoch": 0.216297731551721, "grad_norm": 6.172812442547839, "learning_rate": 9.117293869178404e-06, "loss": 17.427, "step": 11833 }, { "epoch": 0.2163160107481675, "grad_norm": 6.9445119510917666, "learning_rate": 9.11712591208881e-06, "loss": 17.9369, "step": 11834 }, { "epoch": 0.21633428994461404, "grad_norm": 6.02796978592889, "learning_rate": 9.116957940569044e-06, "loss": 17.5246, "step": 11835 }, { "epoch": 0.21635256914106055, "grad_norm": 5.972191777563683, "learning_rate": 9.1167899546197e-06, "loss": 17.317, "step": 11836 }, { "epoch": 0.21637084833750708, "grad_norm": 6.9378546496847235, "learning_rate": 9.116621954241364e-06, "loss": 17.3589, "step": 11837 }, { "epoch": 0.21638912753395362, "grad_norm": 6.452519156452831, "learning_rate": 9.116453939434626e-06, "loss": 17.6529, "step": 11838 }, { "epoch": 0.21640740673040013, "grad_norm": 5.643146956750747, "learning_rate": 9.116285910200074e-06, "loss": 17.2308, "step": 11839 }, { "epoch": 0.21642568592684666, "grad_norm": 8.741670301991528, "learning_rate": 9.116117866538297e-06, "loss": 18.1486, "step": 11840 }, { "epoch": 0.21644396512329317, "grad_norm": 7.543726740294994, "learning_rate": 9.115949808449883e-06, "loss": 18.0334, "step": 11841 }, { "epoch": 0.2164622443197397, "grad_norm": 8.26587446562942, "learning_rate": 9.115781735935423e-06, "loss": 18.1301, "step": 11842 }, { "epoch": 0.21648052351618624, "grad_norm": 7.667334811607829, "learning_rate": 9.115613648995504e-06, "loss": 17.8988, "step": 11843 }, { "epoch": 0.21649880271263275, "grad_norm": 7.209493264717548, "learning_rate": 9.115445547630716e-06, "loss": 17.7141, "step": 11844 }, { "epoch": 0.21651708190907928, "grad_norm": 6.4966574132294745, "learning_rate": 9.115277431841652e-06, "loss": 17.5758, "step": 11845 }, { "epoch": 0.2165353611055258, "grad_norm": 7.053509179934936, "learning_rate": 9.115109301628893e-06, "loss": 18.1974, "step": 11846 }, { "epoch": 0.21655364030197233, "grad_norm": 6.753767758501346, "learning_rate": 9.114941156993036e-06, "loss": 17.7995, "step": 11847 }, { "epoch": 0.21657191949841886, "grad_norm": 8.437271655610894, "learning_rate": 9.114772997934667e-06, "loss": 18.6091, "step": 11848 }, { "epoch": 0.21659019869486537, "grad_norm": 7.405182553876967, "learning_rate": 9.114604824454376e-06, "loss": 18.0246, "step": 11849 }, { "epoch": 0.2166084778913119, "grad_norm": 6.925499954862183, "learning_rate": 9.11443663655275e-06, "loss": 17.683, "step": 11850 }, { "epoch": 0.21662675708775841, "grad_norm": 7.02375638010205, "learning_rate": 9.114268434230383e-06, "loss": 17.6752, "step": 11851 }, { "epoch": 0.21664503628420495, "grad_norm": 8.319262822755558, "learning_rate": 9.11410021748786e-06, "loss": 18.2466, "step": 11852 }, { "epoch": 0.21666331548065146, "grad_norm": 5.675523731439726, "learning_rate": 9.113931986325775e-06, "loss": 17.0465, "step": 11853 }, { "epoch": 0.216681594677098, "grad_norm": 6.491090021517005, "learning_rate": 9.113763740744715e-06, "loss": 17.6491, "step": 11854 }, { "epoch": 0.21669987387354453, "grad_norm": 8.177124437509113, "learning_rate": 9.11359548074527e-06, "loss": 18.0958, "step": 11855 }, { "epoch": 0.21671815306999104, "grad_norm": 5.667189848145001, "learning_rate": 9.113427206328028e-06, "loss": 17.1354, "step": 11856 }, { "epoch": 0.21673643226643757, "grad_norm": 7.862395710919348, "learning_rate": 9.113258917493581e-06, "loss": 17.8076, "step": 11857 }, { "epoch": 0.21675471146288408, "grad_norm": 5.5223788730459775, "learning_rate": 9.11309061424252e-06, "loss": 17.1542, "step": 11858 }, { "epoch": 0.21677299065933062, "grad_norm": 7.686488475290111, "learning_rate": 9.112922296575433e-06, "loss": 18.2554, "step": 11859 }, { "epoch": 0.21679126985577715, "grad_norm": 5.599340225252916, "learning_rate": 9.11275396449291e-06, "loss": 17.1083, "step": 11860 }, { "epoch": 0.21680954905222366, "grad_norm": 7.6510061032633, "learning_rate": 9.11258561799554e-06, "loss": 18.2384, "step": 11861 }, { "epoch": 0.2168278282486702, "grad_norm": 6.827662276542155, "learning_rate": 9.112417257083916e-06, "loss": 17.6346, "step": 11862 }, { "epoch": 0.2168461074451167, "grad_norm": 6.4249535929699695, "learning_rate": 9.112248881758625e-06, "loss": 17.3116, "step": 11863 }, { "epoch": 0.21686438664156324, "grad_norm": 6.345876603272063, "learning_rate": 9.11208049202026e-06, "loss": 17.6555, "step": 11864 }, { "epoch": 0.21688266583800977, "grad_norm": 6.5773462015567565, "learning_rate": 9.111912087869408e-06, "loss": 17.8457, "step": 11865 }, { "epoch": 0.21690094503445628, "grad_norm": 7.1769804192445, "learning_rate": 9.111743669306663e-06, "loss": 17.7754, "step": 11866 }, { "epoch": 0.21691922423090282, "grad_norm": 6.6183778970578455, "learning_rate": 9.111575236332613e-06, "loss": 17.5283, "step": 11867 }, { "epoch": 0.21693750342734933, "grad_norm": 7.250872914207179, "learning_rate": 9.111406788947848e-06, "loss": 18.2171, "step": 11868 }, { "epoch": 0.21695578262379586, "grad_norm": 7.022477133112317, "learning_rate": 9.111238327152958e-06, "loss": 17.7527, "step": 11869 }, { "epoch": 0.21697406182024237, "grad_norm": 7.619132096028822, "learning_rate": 9.111069850948535e-06, "loss": 18.0703, "step": 11870 }, { "epoch": 0.2169923410166889, "grad_norm": 6.948474573255075, "learning_rate": 9.110901360335169e-06, "loss": 17.984, "step": 11871 }, { "epoch": 0.21701062021313544, "grad_norm": 6.442358726349963, "learning_rate": 9.11073285531345e-06, "loss": 17.4928, "step": 11872 }, { "epoch": 0.21702889940958195, "grad_norm": 7.101785052699681, "learning_rate": 9.110564335883968e-06, "loss": 17.9451, "step": 11873 }, { "epoch": 0.21704717860602848, "grad_norm": 6.56195274322934, "learning_rate": 9.110395802047319e-06, "loss": 17.7829, "step": 11874 }, { "epoch": 0.217065457802475, "grad_norm": 8.06071242426235, "learning_rate": 9.110227253804085e-06, "loss": 18.6257, "step": 11875 }, { "epoch": 0.21708373699892153, "grad_norm": 6.432825201440536, "learning_rate": 9.110058691154864e-06, "loss": 17.4202, "step": 11876 }, { "epoch": 0.21710201619536806, "grad_norm": 5.845197097981462, "learning_rate": 9.109890114100242e-06, "loss": 17.3125, "step": 11877 }, { "epoch": 0.21712029539181457, "grad_norm": 7.532775156558808, "learning_rate": 9.109721522640814e-06, "loss": 18.237, "step": 11878 }, { "epoch": 0.2171385745882611, "grad_norm": 6.287582770450092, "learning_rate": 9.109552916777166e-06, "loss": 17.3277, "step": 11879 }, { "epoch": 0.21715685378470762, "grad_norm": 6.93569583883979, "learning_rate": 9.109384296509893e-06, "loss": 17.7749, "step": 11880 }, { "epoch": 0.21717513298115415, "grad_norm": 6.934288715874354, "learning_rate": 9.109215661839585e-06, "loss": 17.8361, "step": 11881 }, { "epoch": 0.2171934121776007, "grad_norm": 7.443478827388052, "learning_rate": 9.109047012766832e-06, "loss": 18.0715, "step": 11882 }, { "epoch": 0.2172116913740472, "grad_norm": 6.191747183977963, "learning_rate": 9.108878349292225e-06, "loss": 17.3165, "step": 11883 }, { "epoch": 0.21722997057049373, "grad_norm": 8.187954700447742, "learning_rate": 9.108709671416357e-06, "loss": 17.983, "step": 11884 }, { "epoch": 0.21724824976694024, "grad_norm": 7.587699318994457, "learning_rate": 9.108540979139818e-06, "loss": 17.8604, "step": 11885 }, { "epoch": 0.21726652896338677, "grad_norm": 6.688204633883863, "learning_rate": 9.1083722724632e-06, "loss": 17.604, "step": 11886 }, { "epoch": 0.21728480815983328, "grad_norm": 7.402876541490754, "learning_rate": 9.108203551387093e-06, "loss": 17.8469, "step": 11887 }, { "epoch": 0.21730308735627982, "grad_norm": 8.390172591765566, "learning_rate": 9.108034815912089e-06, "loss": 18.5591, "step": 11888 }, { "epoch": 0.21732136655272635, "grad_norm": 6.715244974854403, "learning_rate": 9.10786606603878e-06, "loss": 17.9823, "step": 11889 }, { "epoch": 0.21733964574917286, "grad_norm": 7.638039044905897, "learning_rate": 9.107697301767757e-06, "loss": 17.8907, "step": 11890 }, { "epoch": 0.2173579249456194, "grad_norm": 12.617379013698208, "learning_rate": 9.10752852309961e-06, "loss": 19.1123, "step": 11891 }, { "epoch": 0.2173762041420659, "grad_norm": 8.498138539586357, "learning_rate": 9.107359730034932e-06, "loss": 18.0663, "step": 11892 }, { "epoch": 0.21739448333851244, "grad_norm": 6.258669536509158, "learning_rate": 9.107190922574316e-06, "loss": 17.4816, "step": 11893 }, { "epoch": 0.21741276253495898, "grad_norm": 8.139844778110206, "learning_rate": 9.107022100718353e-06, "loss": 18.3541, "step": 11894 }, { "epoch": 0.21743104173140548, "grad_norm": 6.958877476992919, "learning_rate": 9.106853264467632e-06, "loss": 17.824, "step": 11895 }, { "epoch": 0.21744932092785202, "grad_norm": 8.369859115580702, "learning_rate": 9.106684413822746e-06, "loss": 18.3058, "step": 11896 }, { "epoch": 0.21746760012429853, "grad_norm": 6.164079534457812, "learning_rate": 9.106515548784289e-06, "loss": 17.6808, "step": 11897 }, { "epoch": 0.21748587932074506, "grad_norm": 6.402373408976194, "learning_rate": 9.106346669352852e-06, "loss": 17.6395, "step": 11898 }, { "epoch": 0.2175041585171916, "grad_norm": 5.6330556941323975, "learning_rate": 9.106177775529026e-06, "loss": 17.204, "step": 11899 }, { "epoch": 0.2175224377136381, "grad_norm": 6.350200341278443, "learning_rate": 9.106008867313402e-06, "loss": 17.5304, "step": 11900 }, { "epoch": 0.21754071691008464, "grad_norm": 6.8636540147606215, "learning_rate": 9.105839944706573e-06, "loss": 17.6201, "step": 11901 }, { "epoch": 0.21755899610653115, "grad_norm": 6.294765863989509, "learning_rate": 9.105671007709134e-06, "loss": 17.6585, "step": 11902 }, { "epoch": 0.21757727530297769, "grad_norm": 6.703014440079122, "learning_rate": 9.105502056321672e-06, "loss": 17.7441, "step": 11903 }, { "epoch": 0.2175955544994242, "grad_norm": 7.431064453772132, "learning_rate": 9.10533309054478e-06, "loss": 18.0045, "step": 11904 }, { "epoch": 0.21761383369587073, "grad_norm": 5.927868837021644, "learning_rate": 9.105164110379054e-06, "loss": 17.4985, "step": 11905 }, { "epoch": 0.21763211289231726, "grad_norm": 6.456673769313275, "learning_rate": 9.104995115825086e-06, "loss": 17.6168, "step": 11906 }, { "epoch": 0.21765039208876377, "grad_norm": 6.264982376343958, "learning_rate": 9.104826106883463e-06, "loss": 17.1106, "step": 11907 }, { "epoch": 0.2176686712852103, "grad_norm": 6.995548377019063, "learning_rate": 9.104657083554783e-06, "loss": 17.756, "step": 11908 }, { "epoch": 0.21768695048165682, "grad_norm": 6.899534664640044, "learning_rate": 9.104488045839635e-06, "loss": 17.8964, "step": 11909 }, { "epoch": 0.21770522967810335, "grad_norm": 6.974231060934944, "learning_rate": 9.104318993738611e-06, "loss": 17.5191, "step": 11910 }, { "epoch": 0.2177235088745499, "grad_norm": 7.325421162880059, "learning_rate": 9.104149927252308e-06, "loss": 17.8467, "step": 11911 }, { "epoch": 0.2177417880709964, "grad_norm": 5.671400688697253, "learning_rate": 9.103980846381313e-06, "loss": 17.2265, "step": 11912 }, { "epoch": 0.21776006726744293, "grad_norm": 7.071496767851761, "learning_rate": 9.103811751126223e-06, "loss": 17.6563, "step": 11913 }, { "epoch": 0.21777834646388944, "grad_norm": 7.053972963952122, "learning_rate": 9.10364264148763e-06, "loss": 17.7474, "step": 11914 }, { "epoch": 0.21779662566033597, "grad_norm": 6.579193874837373, "learning_rate": 9.103473517466122e-06, "loss": 17.5621, "step": 11915 }, { "epoch": 0.2178149048567825, "grad_norm": 7.67772167225187, "learning_rate": 9.103304379062298e-06, "loss": 17.775, "step": 11916 }, { "epoch": 0.21783318405322902, "grad_norm": 7.306930891834455, "learning_rate": 9.103135226276747e-06, "loss": 17.7983, "step": 11917 }, { "epoch": 0.21785146324967555, "grad_norm": 6.552866014793707, "learning_rate": 9.102966059110065e-06, "loss": 17.3175, "step": 11918 }, { "epoch": 0.21786974244612206, "grad_norm": 7.010313412892092, "learning_rate": 9.10279687756284e-06, "loss": 17.6073, "step": 11919 }, { "epoch": 0.2178880216425686, "grad_norm": 6.072376752884002, "learning_rate": 9.102627681635672e-06, "loss": 17.5278, "step": 11920 }, { "epoch": 0.2179063008390151, "grad_norm": 7.112949881592971, "learning_rate": 9.102458471329147e-06, "loss": 17.6626, "step": 11921 }, { "epoch": 0.21792458003546164, "grad_norm": 7.054496167321983, "learning_rate": 9.102289246643862e-06, "loss": 17.9616, "step": 11922 }, { "epoch": 0.21794285923190818, "grad_norm": 8.625310648247885, "learning_rate": 9.102120007580408e-06, "loss": 18.2248, "step": 11923 }, { "epoch": 0.21796113842835468, "grad_norm": 8.026328983401024, "learning_rate": 9.101950754139381e-06, "loss": 18.2001, "step": 11924 }, { "epoch": 0.21797941762480122, "grad_norm": 6.643022144527676, "learning_rate": 9.101781486321371e-06, "loss": 17.6109, "step": 11925 }, { "epoch": 0.21799769682124773, "grad_norm": 7.646103526543786, "learning_rate": 9.101612204126975e-06, "loss": 17.9194, "step": 11926 }, { "epoch": 0.21801597601769426, "grad_norm": 5.488633472006613, "learning_rate": 9.101442907556782e-06, "loss": 17.4202, "step": 11927 }, { "epoch": 0.2180342552141408, "grad_norm": 7.098617209330299, "learning_rate": 9.101273596611388e-06, "loss": 17.8842, "step": 11928 }, { "epoch": 0.2180525344105873, "grad_norm": 5.565334616556032, "learning_rate": 9.101104271291386e-06, "loss": 17.1628, "step": 11929 }, { "epoch": 0.21807081360703384, "grad_norm": 6.913246404151351, "learning_rate": 9.10093493159737e-06, "loss": 17.8469, "step": 11930 }, { "epoch": 0.21808909280348035, "grad_norm": 8.524606431905655, "learning_rate": 9.100765577529934e-06, "loss": 17.9882, "step": 11931 }, { "epoch": 0.21810737199992689, "grad_norm": 5.645896753841313, "learning_rate": 9.100596209089668e-06, "loss": 17.2779, "step": 11932 }, { "epoch": 0.21812565119637342, "grad_norm": 6.447210336160625, "learning_rate": 9.10042682627717e-06, "loss": 17.4815, "step": 11933 }, { "epoch": 0.21814393039281993, "grad_norm": 6.838627540587328, "learning_rate": 9.100257429093031e-06, "loss": 17.8926, "step": 11934 }, { "epoch": 0.21816220958926646, "grad_norm": 7.473140013540203, "learning_rate": 9.100088017537844e-06, "loss": 17.7996, "step": 11935 }, { "epoch": 0.21818048878571297, "grad_norm": 7.335375339384342, "learning_rate": 9.099918591612207e-06, "loss": 18.0495, "step": 11936 }, { "epoch": 0.2181987679821595, "grad_norm": 7.779144124552107, "learning_rate": 9.09974915131671e-06, "loss": 17.809, "step": 11937 }, { "epoch": 0.21821704717860602, "grad_norm": 6.432819143883393, "learning_rate": 9.099579696651949e-06, "loss": 17.458, "step": 11938 }, { "epoch": 0.21823532637505255, "grad_norm": 6.6374244225346635, "learning_rate": 9.099410227618514e-06, "loss": 17.5334, "step": 11939 }, { "epoch": 0.2182536055714991, "grad_norm": 6.027833915964923, "learning_rate": 9.099240744217005e-06, "loss": 17.3092, "step": 11940 }, { "epoch": 0.2182718847679456, "grad_norm": 7.355883839099431, "learning_rate": 9.099071246448012e-06, "loss": 17.8985, "step": 11941 }, { "epoch": 0.21829016396439213, "grad_norm": 7.36139068850242, "learning_rate": 9.098901734312128e-06, "loss": 17.8174, "step": 11942 }, { "epoch": 0.21830844316083864, "grad_norm": 7.893589632897089, "learning_rate": 9.098732207809951e-06, "loss": 18.1183, "step": 11943 }, { "epoch": 0.21832672235728517, "grad_norm": 7.700565711248756, "learning_rate": 9.098562666942073e-06, "loss": 18.0619, "step": 11944 }, { "epoch": 0.2183450015537317, "grad_norm": 5.125093256215956, "learning_rate": 9.09839311170909e-06, "loss": 16.9294, "step": 11945 }, { "epoch": 0.21836328075017822, "grad_norm": 8.333853040328622, "learning_rate": 9.098223542111593e-06, "loss": 18.1513, "step": 11946 }, { "epoch": 0.21838155994662475, "grad_norm": 8.139593106388162, "learning_rate": 9.098053958150178e-06, "loss": 18.036, "step": 11947 }, { "epoch": 0.21839983914307126, "grad_norm": 7.634997353143541, "learning_rate": 9.09788435982544e-06, "loss": 18.227, "step": 11948 }, { "epoch": 0.2184181183395178, "grad_norm": 6.822586696970956, "learning_rate": 9.097714747137974e-06, "loss": 17.9048, "step": 11949 }, { "epoch": 0.21843639753596433, "grad_norm": 5.224206413541762, "learning_rate": 9.097545120088371e-06, "loss": 17.1113, "step": 11950 }, { "epoch": 0.21845467673241084, "grad_norm": 6.060260021453217, "learning_rate": 9.097375478677228e-06, "loss": 17.4103, "step": 11951 }, { "epoch": 0.21847295592885738, "grad_norm": 5.282185504490752, "learning_rate": 9.097205822905141e-06, "loss": 16.9444, "step": 11952 }, { "epoch": 0.21849123512530388, "grad_norm": 7.1102882167471195, "learning_rate": 9.097036152772703e-06, "loss": 17.9322, "step": 11953 }, { "epoch": 0.21850951432175042, "grad_norm": 7.57731183830557, "learning_rate": 9.09686646828051e-06, "loss": 17.554, "step": 11954 }, { "epoch": 0.21852779351819693, "grad_norm": 6.975978544577265, "learning_rate": 9.096696769429154e-06, "loss": 17.4307, "step": 11955 }, { "epoch": 0.21854607271464346, "grad_norm": 5.449457076461351, "learning_rate": 9.09652705621923e-06, "loss": 17.1161, "step": 11956 }, { "epoch": 0.21856435191109, "grad_norm": 6.9973741598750685, "learning_rate": 9.096357328651337e-06, "loss": 18.023, "step": 11957 }, { "epoch": 0.2185826311075365, "grad_norm": 8.436011815734975, "learning_rate": 9.096187586726064e-06, "loss": 17.7318, "step": 11958 }, { "epoch": 0.21860091030398304, "grad_norm": 7.78096901465431, "learning_rate": 9.09601783044401e-06, "loss": 18.0124, "step": 11959 }, { "epoch": 0.21861918950042955, "grad_norm": 4.859143022196665, "learning_rate": 9.09584805980577e-06, "loss": 16.7077, "step": 11960 }, { "epoch": 0.21863746869687609, "grad_norm": 5.9820407948444725, "learning_rate": 9.095678274811938e-06, "loss": 17.56, "step": 11961 }, { "epoch": 0.21865574789332262, "grad_norm": 6.9941686148540745, "learning_rate": 9.095508475463108e-06, "loss": 17.5618, "step": 11962 }, { "epoch": 0.21867402708976913, "grad_norm": 6.513741789828851, "learning_rate": 9.095338661759879e-06, "loss": 17.6074, "step": 11963 }, { "epoch": 0.21869230628621567, "grad_norm": 6.469130823331481, "learning_rate": 9.09516883370284e-06, "loss": 17.6614, "step": 11964 }, { "epoch": 0.21871058548266217, "grad_norm": 7.339213971916416, "learning_rate": 9.09499899129259e-06, "loss": 17.7564, "step": 11965 }, { "epoch": 0.2187288646791087, "grad_norm": 7.051393632555219, "learning_rate": 9.094829134529726e-06, "loss": 17.7138, "step": 11966 }, { "epoch": 0.21874714387555524, "grad_norm": 6.655667605004137, "learning_rate": 9.094659263414838e-06, "loss": 17.6074, "step": 11967 }, { "epoch": 0.21876542307200175, "grad_norm": 5.697676945789829, "learning_rate": 9.094489377948528e-06, "loss": 17.228, "step": 11968 }, { "epoch": 0.2187837022684483, "grad_norm": 7.858658288547763, "learning_rate": 9.094319478131387e-06, "loss": 18.2695, "step": 11969 }, { "epoch": 0.2188019814648948, "grad_norm": 6.885393288110382, "learning_rate": 9.09414956396401e-06, "loss": 17.5842, "step": 11970 }, { "epoch": 0.21882026066134133, "grad_norm": 7.015131369993315, "learning_rate": 9.093979635446994e-06, "loss": 17.59, "step": 11971 }, { "epoch": 0.21883853985778784, "grad_norm": 7.2145632118926155, "learning_rate": 9.093809692580937e-06, "loss": 17.8017, "step": 11972 }, { "epoch": 0.21885681905423437, "grad_norm": 6.565744924503204, "learning_rate": 9.093639735366431e-06, "loss": 17.5354, "step": 11973 }, { "epoch": 0.2188750982506809, "grad_norm": 7.018434256222989, "learning_rate": 9.093469763804073e-06, "loss": 17.5274, "step": 11974 }, { "epoch": 0.21889337744712742, "grad_norm": 7.556468320286656, "learning_rate": 9.093299777894458e-06, "loss": 17.6066, "step": 11975 }, { "epoch": 0.21891165664357395, "grad_norm": 5.617374179301001, "learning_rate": 9.093129777638183e-06, "loss": 17.1376, "step": 11976 }, { "epoch": 0.21892993584002046, "grad_norm": 8.243979994025754, "learning_rate": 9.092959763035843e-06, "loss": 17.8591, "step": 11977 }, { "epoch": 0.218948215036467, "grad_norm": 6.783744924746423, "learning_rate": 9.092789734088034e-06, "loss": 17.8086, "step": 11978 }, { "epoch": 0.21896649423291353, "grad_norm": 7.659724633477323, "learning_rate": 9.092619690795354e-06, "loss": 18.0718, "step": 11979 }, { "epoch": 0.21898477342936004, "grad_norm": 6.391978236108534, "learning_rate": 9.092449633158395e-06, "loss": 17.5176, "step": 11980 }, { "epoch": 0.21900305262580658, "grad_norm": 6.81729565369, "learning_rate": 9.092279561177758e-06, "loss": 17.6482, "step": 11981 }, { "epoch": 0.21902133182225308, "grad_norm": 6.671017714206008, "learning_rate": 9.092109474854031e-06, "loss": 17.5313, "step": 11982 }, { "epoch": 0.21903961101869962, "grad_norm": 6.339744106747581, "learning_rate": 9.091939374187821e-06, "loss": 17.3638, "step": 11983 }, { "epoch": 0.21905789021514616, "grad_norm": 5.983675068374729, "learning_rate": 9.091769259179715e-06, "loss": 17.229, "step": 11984 }, { "epoch": 0.21907616941159266, "grad_norm": 7.391299335830813, "learning_rate": 9.091599129830313e-06, "loss": 17.9019, "step": 11985 }, { "epoch": 0.2190944486080392, "grad_norm": 6.425807083269966, "learning_rate": 9.091428986140213e-06, "loss": 17.414, "step": 11986 }, { "epoch": 0.2191127278044857, "grad_norm": 5.483876618268429, "learning_rate": 9.091258828110008e-06, "loss": 17.0787, "step": 11987 }, { "epoch": 0.21913100700093224, "grad_norm": 8.051543722042831, "learning_rate": 9.091088655740298e-06, "loss": 17.9003, "step": 11988 }, { "epoch": 0.21914928619737875, "grad_norm": 6.625316110521182, "learning_rate": 9.090918469031676e-06, "loss": 17.5168, "step": 11989 }, { "epoch": 0.2191675653938253, "grad_norm": 6.4224362385000715, "learning_rate": 9.090748267984738e-06, "loss": 17.2853, "step": 11990 }, { "epoch": 0.21918584459027182, "grad_norm": 7.061070925059579, "learning_rate": 9.090578052600082e-06, "loss": 17.6916, "step": 11991 }, { "epoch": 0.21920412378671833, "grad_norm": 6.970553476191607, "learning_rate": 9.090407822878308e-06, "loss": 17.6499, "step": 11992 }, { "epoch": 0.21922240298316487, "grad_norm": 7.158033677684297, "learning_rate": 9.09023757882001e-06, "loss": 17.7665, "step": 11993 }, { "epoch": 0.21924068217961137, "grad_norm": 6.272418194792395, "learning_rate": 9.090067320425782e-06, "loss": 17.4884, "step": 11994 }, { "epoch": 0.2192589613760579, "grad_norm": 7.872646561097602, "learning_rate": 9.089897047696223e-06, "loss": 18.2421, "step": 11995 }, { "epoch": 0.21927724057250444, "grad_norm": 6.128466211050685, "learning_rate": 9.089726760631929e-06, "loss": 17.2306, "step": 11996 }, { "epoch": 0.21929551976895095, "grad_norm": 6.01276693195329, "learning_rate": 9.0895564592335e-06, "loss": 17.2313, "step": 11997 }, { "epoch": 0.2193137989653975, "grad_norm": 7.076087289152299, "learning_rate": 9.089386143501528e-06, "loss": 17.803, "step": 11998 }, { "epoch": 0.219332078161844, "grad_norm": 7.062194248458111, "learning_rate": 9.089215813436614e-06, "loss": 17.7454, "step": 11999 }, { "epoch": 0.21935035735829053, "grad_norm": 5.747517064898039, "learning_rate": 9.089045469039353e-06, "loss": 17.0884, "step": 12000 }, { "epoch": 0.21936863655473707, "grad_norm": 5.707829334885778, "learning_rate": 9.088875110310343e-06, "loss": 17.1666, "step": 12001 }, { "epoch": 0.21938691575118358, "grad_norm": 7.905205923263187, "learning_rate": 9.08870473725018e-06, "loss": 18.4898, "step": 12002 }, { "epoch": 0.2194051949476301, "grad_norm": 7.221712254111787, "learning_rate": 9.088534349859462e-06, "loss": 18.192, "step": 12003 }, { "epoch": 0.21942347414407662, "grad_norm": 7.848459947058525, "learning_rate": 9.088363948138786e-06, "loss": 18.0316, "step": 12004 }, { "epoch": 0.21944175334052315, "grad_norm": 7.562366816646246, "learning_rate": 9.088193532088747e-06, "loss": 18.1611, "step": 12005 }, { "epoch": 0.21946003253696966, "grad_norm": 6.323638346385794, "learning_rate": 9.088023101709946e-06, "loss": 17.3996, "step": 12006 }, { "epoch": 0.2194783117334162, "grad_norm": 7.368769866620946, "learning_rate": 9.08785265700298e-06, "loss": 18.0641, "step": 12007 }, { "epoch": 0.21949659092986273, "grad_norm": 6.762553503819832, "learning_rate": 9.087682197968444e-06, "loss": 17.7905, "step": 12008 }, { "epoch": 0.21951487012630924, "grad_norm": 7.787144860856624, "learning_rate": 9.087511724606936e-06, "loss": 17.965, "step": 12009 }, { "epoch": 0.21953314932275578, "grad_norm": 6.843154924681289, "learning_rate": 9.087341236919055e-06, "loss": 17.6036, "step": 12010 }, { "epoch": 0.21955142851920229, "grad_norm": 6.479468034628051, "learning_rate": 9.087170734905397e-06, "loss": 17.3977, "step": 12011 }, { "epoch": 0.21956970771564882, "grad_norm": 6.161715466614702, "learning_rate": 9.087000218566562e-06, "loss": 17.4078, "step": 12012 }, { "epoch": 0.21958798691209536, "grad_norm": 7.24460315815929, "learning_rate": 9.086829687903144e-06, "loss": 17.5102, "step": 12013 }, { "epoch": 0.21960626610854186, "grad_norm": 7.761451058530412, "learning_rate": 9.086659142915744e-06, "loss": 17.99, "step": 12014 }, { "epoch": 0.2196245453049884, "grad_norm": 5.437564033660677, "learning_rate": 9.086488583604956e-06, "loss": 17.1171, "step": 12015 }, { "epoch": 0.2196428245014349, "grad_norm": 7.523027903179191, "learning_rate": 9.086318009971383e-06, "loss": 17.8936, "step": 12016 }, { "epoch": 0.21966110369788144, "grad_norm": 7.238757594020898, "learning_rate": 9.086147422015617e-06, "loss": 17.6363, "step": 12017 }, { "epoch": 0.21967938289432798, "grad_norm": 6.316033839709729, "learning_rate": 9.085976819738261e-06, "loss": 17.4743, "step": 12018 }, { "epoch": 0.2196976620907745, "grad_norm": 7.593845688623457, "learning_rate": 9.08580620313991e-06, "loss": 17.8691, "step": 12019 }, { "epoch": 0.21971594128722102, "grad_norm": 5.60514414452451, "learning_rate": 9.085635572221163e-06, "loss": 17.2711, "step": 12020 }, { "epoch": 0.21973422048366753, "grad_norm": 6.490622293399584, "learning_rate": 9.08546492698262e-06, "loss": 17.7255, "step": 12021 }, { "epoch": 0.21975249968011407, "grad_norm": 6.909787167782599, "learning_rate": 9.085294267424874e-06, "loss": 17.6954, "step": 12022 }, { "epoch": 0.21977077887656057, "grad_norm": 8.902887335133869, "learning_rate": 9.085123593548526e-06, "loss": 17.6157, "step": 12023 }, { "epoch": 0.2197890580730071, "grad_norm": 5.928585006625503, "learning_rate": 9.084952905354177e-06, "loss": 17.3431, "step": 12024 }, { "epoch": 0.21980733726945365, "grad_norm": 6.630666001768404, "learning_rate": 9.08478220284242e-06, "loss": 17.6577, "step": 12025 }, { "epoch": 0.21982561646590015, "grad_norm": 6.622126466769476, "learning_rate": 9.084611486013857e-06, "loss": 17.795, "step": 12026 }, { "epoch": 0.2198438956623467, "grad_norm": 6.015359889451512, "learning_rate": 9.084440754869085e-06, "loss": 17.2767, "step": 12027 }, { "epoch": 0.2198621748587932, "grad_norm": 5.932151449422499, "learning_rate": 9.084270009408701e-06, "loss": 17.358, "step": 12028 }, { "epoch": 0.21988045405523973, "grad_norm": 6.126648192498557, "learning_rate": 9.084099249633307e-06, "loss": 17.1403, "step": 12029 }, { "epoch": 0.21989873325168627, "grad_norm": 6.819311160305065, "learning_rate": 9.083928475543498e-06, "loss": 17.9101, "step": 12030 }, { "epoch": 0.21991701244813278, "grad_norm": 7.7970206104472615, "learning_rate": 9.083757687139876e-06, "loss": 18.031, "step": 12031 }, { "epoch": 0.2199352916445793, "grad_norm": 8.01782031017913, "learning_rate": 9.083586884423037e-06, "loss": 18.1894, "step": 12032 }, { "epoch": 0.21995357084102582, "grad_norm": 6.155004043893107, "learning_rate": 9.08341606739358e-06, "loss": 17.2879, "step": 12033 }, { "epoch": 0.21997185003747236, "grad_norm": 7.134669392044465, "learning_rate": 9.083245236052103e-06, "loss": 17.8754, "step": 12034 }, { "epoch": 0.2199901292339189, "grad_norm": 9.047554058890503, "learning_rate": 9.083074390399208e-06, "loss": 18.2534, "step": 12035 }, { "epoch": 0.2200084084303654, "grad_norm": 6.879625297549601, "learning_rate": 9.08290353043549e-06, "loss": 17.9168, "step": 12036 }, { "epoch": 0.22002668762681193, "grad_norm": 6.552809811227273, "learning_rate": 9.08273265616155e-06, "loss": 17.797, "step": 12037 }, { "epoch": 0.22004496682325844, "grad_norm": 6.8308385565125285, "learning_rate": 9.082561767577986e-06, "loss": 17.7753, "step": 12038 }, { "epoch": 0.22006324601970498, "grad_norm": 7.742324753665272, "learning_rate": 9.082390864685397e-06, "loss": 17.6787, "step": 12039 }, { "epoch": 0.22008152521615149, "grad_norm": 9.080837449731453, "learning_rate": 9.082219947484383e-06, "loss": 17.7981, "step": 12040 }, { "epoch": 0.22009980441259802, "grad_norm": 6.729636067383213, "learning_rate": 9.082049015975542e-06, "loss": 17.5586, "step": 12041 }, { "epoch": 0.22011808360904456, "grad_norm": 7.289131014769498, "learning_rate": 9.081878070159475e-06, "loss": 17.9244, "step": 12042 }, { "epoch": 0.22013636280549106, "grad_norm": 6.764780826991467, "learning_rate": 9.081707110036777e-06, "loss": 17.8212, "step": 12043 }, { "epoch": 0.2201546420019376, "grad_norm": 4.907240918746769, "learning_rate": 9.081536135608052e-06, "loss": 16.8488, "step": 12044 }, { "epoch": 0.2201729211983841, "grad_norm": 5.758994634397533, "learning_rate": 9.081365146873895e-06, "loss": 17.2962, "step": 12045 }, { "epoch": 0.22019120039483064, "grad_norm": 5.986978869383253, "learning_rate": 9.081194143834908e-06, "loss": 17.2695, "step": 12046 }, { "epoch": 0.22020947959127718, "grad_norm": 6.843056842103075, "learning_rate": 9.08102312649169e-06, "loss": 17.5858, "step": 12047 }, { "epoch": 0.2202277587877237, "grad_norm": 6.659494582954172, "learning_rate": 9.080852094844839e-06, "loss": 17.5676, "step": 12048 }, { "epoch": 0.22024603798417022, "grad_norm": 7.580450822304439, "learning_rate": 9.080681048894957e-06, "loss": 17.8294, "step": 12049 }, { "epoch": 0.22026431718061673, "grad_norm": 6.177606235294419, "learning_rate": 9.080509988642641e-06, "loss": 17.3763, "step": 12050 }, { "epoch": 0.22028259637706327, "grad_norm": 6.720988067004841, "learning_rate": 9.080338914088494e-06, "loss": 17.426, "step": 12051 }, { "epoch": 0.2203008755735098, "grad_norm": 7.5094124068149615, "learning_rate": 9.08016782523311e-06, "loss": 17.7883, "step": 12052 }, { "epoch": 0.2203191547699563, "grad_norm": 7.748640349325527, "learning_rate": 9.079996722077094e-06, "loss": 18.0248, "step": 12053 }, { "epoch": 0.22033743396640285, "grad_norm": 5.936252782832204, "learning_rate": 9.079825604621041e-06, "loss": 17.3862, "step": 12054 }, { "epoch": 0.22035571316284935, "grad_norm": 7.384229712370265, "learning_rate": 9.079654472865556e-06, "loss": 17.8948, "step": 12055 }, { "epoch": 0.2203739923592959, "grad_norm": 6.597747095443318, "learning_rate": 9.079483326811236e-06, "loss": 17.5383, "step": 12056 }, { "epoch": 0.2203922715557424, "grad_norm": 5.842929305692039, "learning_rate": 9.079312166458678e-06, "loss": 17.3827, "step": 12057 }, { "epoch": 0.22041055075218893, "grad_norm": 6.588821379665793, "learning_rate": 9.079140991808488e-06, "loss": 17.5008, "step": 12058 }, { "epoch": 0.22042882994863547, "grad_norm": 6.60250764612619, "learning_rate": 9.078969802861262e-06, "loss": 17.5604, "step": 12059 }, { "epoch": 0.22044710914508198, "grad_norm": 6.2355489714401084, "learning_rate": 9.0787985996176e-06, "loss": 17.0363, "step": 12060 }, { "epoch": 0.2204653883415285, "grad_norm": 6.538423104713626, "learning_rate": 9.078627382078103e-06, "loss": 17.8154, "step": 12061 }, { "epoch": 0.22048366753797502, "grad_norm": 7.022784672294815, "learning_rate": 9.078456150243371e-06, "loss": 17.7635, "step": 12062 }, { "epoch": 0.22050194673442156, "grad_norm": 7.1416528461418, "learning_rate": 9.078284904114005e-06, "loss": 17.7005, "step": 12063 }, { "epoch": 0.2205202259308681, "grad_norm": 6.059971626355186, "learning_rate": 9.078113643690602e-06, "loss": 17.2732, "step": 12064 }, { "epoch": 0.2205385051273146, "grad_norm": 6.5397619475435205, "learning_rate": 9.077942368973767e-06, "loss": 17.8203, "step": 12065 }, { "epoch": 0.22055678432376113, "grad_norm": 6.575952660749232, "learning_rate": 9.077771079964097e-06, "loss": 17.6508, "step": 12066 }, { "epoch": 0.22057506352020764, "grad_norm": 6.674876409356483, "learning_rate": 9.077599776662194e-06, "loss": 17.5094, "step": 12067 }, { "epoch": 0.22059334271665418, "grad_norm": 6.747351806384442, "learning_rate": 9.077428459068656e-06, "loss": 17.6921, "step": 12068 }, { "epoch": 0.22061162191310071, "grad_norm": 8.107350141197397, "learning_rate": 9.077257127184087e-06, "loss": 18.0762, "step": 12069 }, { "epoch": 0.22062990110954722, "grad_norm": 7.087594877867561, "learning_rate": 9.077085781009084e-06, "loss": 17.6351, "step": 12070 }, { "epoch": 0.22064818030599376, "grad_norm": 7.426294762385301, "learning_rate": 9.07691442054425e-06, "loss": 17.9902, "step": 12071 }, { "epoch": 0.22066645950244027, "grad_norm": 6.433734635874548, "learning_rate": 9.076743045790184e-06, "loss": 17.5732, "step": 12072 }, { "epoch": 0.2206847386988868, "grad_norm": 7.418091579853038, "learning_rate": 9.076571656747488e-06, "loss": 17.8136, "step": 12073 }, { "epoch": 0.2207030178953333, "grad_norm": 8.994427748065133, "learning_rate": 9.076400253416762e-06, "loss": 18.7329, "step": 12074 }, { "epoch": 0.22072129709177984, "grad_norm": 6.890757558467735, "learning_rate": 9.076228835798606e-06, "loss": 17.6003, "step": 12075 }, { "epoch": 0.22073957628822638, "grad_norm": 4.939340614448473, "learning_rate": 9.076057403893624e-06, "loss": 16.8055, "step": 12076 }, { "epoch": 0.2207578554846729, "grad_norm": 5.740784737536925, "learning_rate": 9.075885957702411e-06, "loss": 17.1904, "step": 12077 }, { "epoch": 0.22077613468111942, "grad_norm": 5.982820156246118, "learning_rate": 9.075714497225574e-06, "loss": 17.4779, "step": 12078 }, { "epoch": 0.22079441387756593, "grad_norm": 6.580558522174517, "learning_rate": 9.075543022463711e-06, "loss": 17.7942, "step": 12079 }, { "epoch": 0.22081269307401247, "grad_norm": 6.624701415458696, "learning_rate": 9.075371533417423e-06, "loss": 17.4807, "step": 12080 }, { "epoch": 0.220830972270459, "grad_norm": 6.630159754538874, "learning_rate": 9.07520003008731e-06, "loss": 17.6889, "step": 12081 }, { "epoch": 0.2208492514669055, "grad_norm": 6.679633768938046, "learning_rate": 9.075028512473976e-06, "loss": 17.7805, "step": 12082 }, { "epoch": 0.22086753066335205, "grad_norm": 7.804806343191544, "learning_rate": 9.074856980578022e-06, "loss": 17.8208, "step": 12083 }, { "epoch": 0.22088580985979855, "grad_norm": 7.006482526280416, "learning_rate": 9.074685434400046e-06, "loss": 18.0162, "step": 12084 }, { "epoch": 0.2209040890562451, "grad_norm": 7.047766027138001, "learning_rate": 9.074513873940651e-06, "loss": 17.4851, "step": 12085 }, { "epoch": 0.22092236825269163, "grad_norm": 6.3626919655071035, "learning_rate": 9.07434229920044e-06, "loss": 17.3642, "step": 12086 }, { "epoch": 0.22094064744913813, "grad_norm": 7.247062356212498, "learning_rate": 9.07417071018001e-06, "loss": 18.0834, "step": 12087 }, { "epoch": 0.22095892664558467, "grad_norm": 5.822405120819056, "learning_rate": 9.07399910687997e-06, "loss": 17.1425, "step": 12088 }, { "epoch": 0.22097720584203118, "grad_norm": 7.587197075988354, "learning_rate": 9.073827489300913e-06, "loss": 17.8303, "step": 12089 }, { "epoch": 0.2209954850384777, "grad_norm": 6.068602889869737, "learning_rate": 9.073655857443444e-06, "loss": 17.369, "step": 12090 }, { "epoch": 0.22101376423492422, "grad_norm": 7.393926217882674, "learning_rate": 9.073484211308166e-06, "loss": 17.5568, "step": 12091 }, { "epoch": 0.22103204343137076, "grad_norm": 7.637252425495857, "learning_rate": 9.073312550895678e-06, "loss": 17.6237, "step": 12092 }, { "epoch": 0.2210503226278173, "grad_norm": 6.042011046293065, "learning_rate": 9.073140876206585e-06, "loss": 17.231, "step": 12093 }, { "epoch": 0.2210686018242638, "grad_norm": 6.446392263232922, "learning_rate": 9.072969187241484e-06, "loss": 17.4578, "step": 12094 }, { "epoch": 0.22108688102071034, "grad_norm": 6.754235291343457, "learning_rate": 9.072797484000983e-06, "loss": 17.2657, "step": 12095 }, { "epoch": 0.22110516021715684, "grad_norm": 7.994525310983319, "learning_rate": 9.072625766485678e-06, "loss": 18.352, "step": 12096 }, { "epoch": 0.22112343941360338, "grad_norm": 6.907998648106382, "learning_rate": 9.072454034696173e-06, "loss": 17.8207, "step": 12097 }, { "epoch": 0.22114171861004991, "grad_norm": 7.5304715476577275, "learning_rate": 9.07228228863307e-06, "loss": 18.1671, "step": 12098 }, { "epoch": 0.22115999780649642, "grad_norm": 6.864295122027136, "learning_rate": 9.072110528296971e-06, "loss": 17.8149, "step": 12099 }, { "epoch": 0.22117827700294296, "grad_norm": 6.496031378288208, "learning_rate": 9.07193875368848e-06, "loss": 17.5587, "step": 12100 }, { "epoch": 0.22119655619938947, "grad_norm": 6.7719935577797, "learning_rate": 9.071766964808193e-06, "loss": 17.8892, "step": 12101 }, { "epoch": 0.221214835395836, "grad_norm": 6.019303894423978, "learning_rate": 9.071595161656718e-06, "loss": 17.6829, "step": 12102 }, { "epoch": 0.22123311459228254, "grad_norm": 7.398712332894582, "learning_rate": 9.071423344234658e-06, "loss": 18.0845, "step": 12103 }, { "epoch": 0.22125139378872904, "grad_norm": 5.725552928569081, "learning_rate": 9.07125151254261e-06, "loss": 17.1853, "step": 12104 }, { "epoch": 0.22126967298517558, "grad_norm": 5.828800196676859, "learning_rate": 9.071079666581178e-06, "loss": 17.1133, "step": 12105 }, { "epoch": 0.2212879521816221, "grad_norm": 5.561468258385647, "learning_rate": 9.070907806350965e-06, "loss": 16.9982, "step": 12106 }, { "epoch": 0.22130623137806862, "grad_norm": 6.118137601073453, "learning_rate": 9.070735931852575e-06, "loss": 17.4773, "step": 12107 }, { "epoch": 0.22132451057451513, "grad_norm": 6.723948730169479, "learning_rate": 9.07056404308661e-06, "loss": 17.8599, "step": 12108 }, { "epoch": 0.22134278977096167, "grad_norm": 6.251266020611498, "learning_rate": 9.070392140053667e-06, "loss": 17.5272, "step": 12109 }, { "epoch": 0.2213610689674082, "grad_norm": 5.895701484924443, "learning_rate": 9.070220222754356e-06, "loss": 17.346, "step": 12110 }, { "epoch": 0.2213793481638547, "grad_norm": 8.795498461265977, "learning_rate": 9.070048291189276e-06, "loss": 18.1511, "step": 12111 }, { "epoch": 0.22139762736030125, "grad_norm": 7.085636395269582, "learning_rate": 9.06987634535903e-06, "loss": 18.2111, "step": 12112 }, { "epoch": 0.22141590655674775, "grad_norm": 5.200696641321617, "learning_rate": 9.06970438526422e-06, "loss": 17.1305, "step": 12113 }, { "epoch": 0.2214341857531943, "grad_norm": 6.834922203306733, "learning_rate": 9.069532410905448e-06, "loss": 17.7227, "step": 12114 }, { "epoch": 0.22145246494964083, "grad_norm": 6.393837202331609, "learning_rate": 9.06936042228332e-06, "loss": 17.3959, "step": 12115 }, { "epoch": 0.22147074414608733, "grad_norm": 5.5328302722177085, "learning_rate": 9.069188419398437e-06, "loss": 17.0514, "step": 12116 }, { "epoch": 0.22148902334253387, "grad_norm": 8.510829456601536, "learning_rate": 9.0690164022514e-06, "loss": 18.1078, "step": 12117 }, { "epoch": 0.22150730253898038, "grad_norm": 5.904706526852767, "learning_rate": 9.068844370842812e-06, "loss": 17.2968, "step": 12118 }, { "epoch": 0.2215255817354269, "grad_norm": 6.622754466255814, "learning_rate": 9.068672325173282e-06, "loss": 17.389, "step": 12119 }, { "epoch": 0.22154386093187345, "grad_norm": 6.508877281910572, "learning_rate": 9.068500265243407e-06, "loss": 17.6178, "step": 12120 }, { "epoch": 0.22156214012831996, "grad_norm": 5.864018435445338, "learning_rate": 9.06832819105379e-06, "loss": 17.1021, "step": 12121 }, { "epoch": 0.2215804193247665, "grad_norm": 6.403591175170202, "learning_rate": 9.068156102605037e-06, "loss": 17.4996, "step": 12122 }, { "epoch": 0.221598698521213, "grad_norm": 6.990439120209378, "learning_rate": 9.067983999897751e-06, "loss": 17.4864, "step": 12123 }, { "epoch": 0.22161697771765954, "grad_norm": 6.4612132220284915, "learning_rate": 9.067811882932533e-06, "loss": 17.3983, "step": 12124 }, { "epoch": 0.22163525691410604, "grad_norm": 5.932857378409871, "learning_rate": 9.067639751709987e-06, "loss": 17.5504, "step": 12125 }, { "epoch": 0.22165353611055258, "grad_norm": 6.832095320539582, "learning_rate": 9.067467606230717e-06, "loss": 17.6407, "step": 12126 }, { "epoch": 0.22167181530699911, "grad_norm": 5.785721329037084, "learning_rate": 9.067295446495326e-06, "loss": 17.2609, "step": 12127 }, { "epoch": 0.22169009450344562, "grad_norm": 6.631662600339456, "learning_rate": 9.067123272504417e-06, "loss": 17.3671, "step": 12128 }, { "epoch": 0.22170837369989216, "grad_norm": 7.589981984568511, "learning_rate": 9.066951084258593e-06, "loss": 18.2443, "step": 12129 }, { "epoch": 0.22172665289633867, "grad_norm": 5.525196115915716, "learning_rate": 9.06677888175846e-06, "loss": 16.9806, "step": 12130 }, { "epoch": 0.2217449320927852, "grad_norm": 6.955554771941406, "learning_rate": 9.06660666500462e-06, "loss": 17.662, "step": 12131 }, { "epoch": 0.22176321128923174, "grad_norm": 5.991003541933081, "learning_rate": 9.066434433997674e-06, "loss": 17.4138, "step": 12132 }, { "epoch": 0.22178149048567825, "grad_norm": 7.893901139197201, "learning_rate": 9.06626218873823e-06, "loss": 18.0052, "step": 12133 }, { "epoch": 0.22179976968212478, "grad_norm": 7.379640206812513, "learning_rate": 9.066089929226891e-06, "loss": 18.0161, "step": 12134 }, { "epoch": 0.2218180488785713, "grad_norm": 5.8641168083447806, "learning_rate": 9.065917655464258e-06, "loss": 17.3031, "step": 12135 }, { "epoch": 0.22183632807501782, "grad_norm": 6.350075766092297, "learning_rate": 9.065745367450938e-06, "loss": 17.3539, "step": 12136 }, { "epoch": 0.22185460727146436, "grad_norm": 7.8634994124227955, "learning_rate": 9.065573065187531e-06, "loss": 17.9024, "step": 12137 }, { "epoch": 0.22187288646791087, "grad_norm": 5.082605456754155, "learning_rate": 9.065400748674646e-06, "loss": 16.9995, "step": 12138 }, { "epoch": 0.2218911656643574, "grad_norm": 9.111701523707396, "learning_rate": 9.065228417912882e-06, "loss": 18.4247, "step": 12139 }, { "epoch": 0.2219094448608039, "grad_norm": 7.461955655034305, "learning_rate": 9.065056072902847e-06, "loss": 17.6911, "step": 12140 }, { "epoch": 0.22192772405725045, "grad_norm": 5.56744554253036, "learning_rate": 9.06488371364514e-06, "loss": 17.2422, "step": 12141 }, { "epoch": 0.22194600325369696, "grad_norm": 6.874676922587669, "learning_rate": 9.064711340140373e-06, "loss": 17.7655, "step": 12142 }, { "epoch": 0.2219642824501435, "grad_norm": 6.967705772045241, "learning_rate": 9.064538952389141e-06, "loss": 17.7408, "step": 12143 }, { "epoch": 0.22198256164659003, "grad_norm": 5.7354699215764136, "learning_rate": 9.064366550392056e-06, "loss": 17.2371, "step": 12144 }, { "epoch": 0.22200084084303653, "grad_norm": 6.379178656800686, "learning_rate": 9.064194134149718e-06, "loss": 17.4984, "step": 12145 }, { "epoch": 0.22201912003948307, "grad_norm": 6.637973537030778, "learning_rate": 9.064021703662732e-06, "loss": 17.7825, "step": 12146 }, { "epoch": 0.22203739923592958, "grad_norm": 8.032869745002564, "learning_rate": 9.0638492589317e-06, "loss": 18.2834, "step": 12147 }, { "epoch": 0.2220556784323761, "grad_norm": 6.144100501655099, "learning_rate": 9.063676799957231e-06, "loss": 17.0968, "step": 12148 }, { "epoch": 0.22207395762882265, "grad_norm": 6.762697195423152, "learning_rate": 9.063504326739929e-06, "loss": 17.5261, "step": 12149 }, { "epoch": 0.22209223682526916, "grad_norm": 5.334294547815651, "learning_rate": 9.063331839280395e-06, "loss": 17.4522, "step": 12150 }, { "epoch": 0.2221105160217157, "grad_norm": 6.868929711870742, "learning_rate": 9.063159337579238e-06, "loss": 17.7483, "step": 12151 }, { "epoch": 0.2221287952181622, "grad_norm": 6.366454532936392, "learning_rate": 9.062986821637056e-06, "loss": 17.6542, "step": 12152 }, { "epoch": 0.22214707441460874, "grad_norm": 7.9848156345152255, "learning_rate": 9.06281429145446e-06, "loss": 18.2242, "step": 12153 }, { "epoch": 0.22216535361105527, "grad_norm": 6.573086364479184, "learning_rate": 9.062641747032052e-06, "loss": 17.4293, "step": 12154 }, { "epoch": 0.22218363280750178, "grad_norm": 6.234651122800711, "learning_rate": 9.06246918837044e-06, "loss": 17.3009, "step": 12155 }, { "epoch": 0.22220191200394832, "grad_norm": 6.491836262913137, "learning_rate": 9.062296615470223e-06, "loss": 17.6475, "step": 12156 }, { "epoch": 0.22222019120039482, "grad_norm": 5.141991111949409, "learning_rate": 9.062124028332008e-06, "loss": 16.8352, "step": 12157 }, { "epoch": 0.22223847039684136, "grad_norm": 7.045667001321327, "learning_rate": 9.061951426956403e-06, "loss": 17.8412, "step": 12158 }, { "epoch": 0.22225674959328787, "grad_norm": 7.651692323400573, "learning_rate": 9.06177881134401e-06, "loss": 18.0355, "step": 12159 }, { "epoch": 0.2222750287897344, "grad_norm": 7.487448585021054, "learning_rate": 9.061606181495436e-06, "loss": 17.893, "step": 12160 }, { "epoch": 0.22229330798618094, "grad_norm": 7.015425481940219, "learning_rate": 9.061433537411285e-06, "loss": 18.1156, "step": 12161 }, { "epoch": 0.22231158718262745, "grad_norm": 6.962871882681409, "learning_rate": 9.06126087909216e-06, "loss": 17.9275, "step": 12162 }, { "epoch": 0.22232986637907398, "grad_norm": 6.817616740706698, "learning_rate": 9.061088206538668e-06, "loss": 17.5149, "step": 12163 }, { "epoch": 0.2223481455755205, "grad_norm": 6.509896268933717, "learning_rate": 9.060915519751415e-06, "loss": 17.6248, "step": 12164 }, { "epoch": 0.22236642477196703, "grad_norm": 6.315903827237091, "learning_rate": 9.060742818731006e-06, "loss": 17.1711, "step": 12165 }, { "epoch": 0.22238470396841356, "grad_norm": 6.4543926230363935, "learning_rate": 9.060570103478043e-06, "loss": 17.5929, "step": 12166 }, { "epoch": 0.22240298316486007, "grad_norm": 7.083003033265911, "learning_rate": 9.060397373993138e-06, "loss": 18.1269, "step": 12167 }, { "epoch": 0.2224212623613066, "grad_norm": 6.918556123420934, "learning_rate": 9.06022463027689e-06, "loss": 17.7514, "step": 12168 }, { "epoch": 0.2224395415577531, "grad_norm": 6.13862923828019, "learning_rate": 9.060051872329907e-06, "loss": 17.3936, "step": 12169 }, { "epoch": 0.22245782075419965, "grad_norm": 7.548290244094069, "learning_rate": 9.059879100152795e-06, "loss": 18.0276, "step": 12170 }, { "epoch": 0.22247609995064618, "grad_norm": 6.654352827636281, "learning_rate": 9.05970631374616e-06, "loss": 17.6661, "step": 12171 }, { "epoch": 0.2224943791470927, "grad_norm": 6.23502675325729, "learning_rate": 9.059533513110605e-06, "loss": 17.4606, "step": 12172 }, { "epoch": 0.22251265834353923, "grad_norm": 7.595924061961402, "learning_rate": 9.05936069824674e-06, "loss": 18.3619, "step": 12173 }, { "epoch": 0.22253093753998573, "grad_norm": 5.993091462334962, "learning_rate": 9.059187869155167e-06, "loss": 17.4158, "step": 12174 }, { "epoch": 0.22254921673643227, "grad_norm": 5.892020062009522, "learning_rate": 9.05901502583649e-06, "loss": 17.3697, "step": 12175 }, { "epoch": 0.22256749593287878, "grad_norm": 4.909215357289737, "learning_rate": 9.05884216829132e-06, "loss": 16.8452, "step": 12176 }, { "epoch": 0.22258577512932531, "grad_norm": 6.505330308008546, "learning_rate": 9.05866929652026e-06, "loss": 17.6892, "step": 12177 }, { "epoch": 0.22260405432577185, "grad_norm": 7.260690227232895, "learning_rate": 9.058496410523917e-06, "loss": 18.0694, "step": 12178 }, { "epoch": 0.22262233352221836, "grad_norm": 6.559486921726839, "learning_rate": 9.058323510302896e-06, "loss": 17.6832, "step": 12179 }, { "epoch": 0.2226406127186649, "grad_norm": 6.477247629775747, "learning_rate": 9.058150595857803e-06, "loss": 17.4083, "step": 12180 }, { "epoch": 0.2226588919151114, "grad_norm": 6.222961774733105, "learning_rate": 9.057977667189244e-06, "loss": 17.6799, "step": 12181 }, { "epoch": 0.22267717111155794, "grad_norm": 7.0625027289353275, "learning_rate": 9.057804724297825e-06, "loss": 17.8661, "step": 12182 }, { "epoch": 0.22269545030800447, "grad_norm": 6.206804862803218, "learning_rate": 9.057631767184153e-06, "loss": 17.3949, "step": 12183 }, { "epoch": 0.22271372950445098, "grad_norm": 6.557056824978244, "learning_rate": 9.057458795848834e-06, "loss": 17.5059, "step": 12184 }, { "epoch": 0.22273200870089752, "grad_norm": 6.20159662590665, "learning_rate": 9.057285810292474e-06, "loss": 17.278, "step": 12185 }, { "epoch": 0.22275028789734402, "grad_norm": 6.695076245431577, "learning_rate": 9.057112810515681e-06, "loss": 17.743, "step": 12186 }, { "epoch": 0.22276856709379056, "grad_norm": 6.108160305567904, "learning_rate": 9.056939796519056e-06, "loss": 17.4396, "step": 12187 }, { "epoch": 0.2227868462902371, "grad_norm": 9.339105529261538, "learning_rate": 9.056766768303212e-06, "loss": 18.6124, "step": 12188 }, { "epoch": 0.2228051254866836, "grad_norm": 5.530740482200743, "learning_rate": 9.056593725868752e-06, "loss": 16.9295, "step": 12189 }, { "epoch": 0.22282340468313014, "grad_norm": 6.706535134975608, "learning_rate": 9.056420669216281e-06, "loss": 17.8878, "step": 12190 }, { "epoch": 0.22284168387957665, "grad_norm": 5.2416419568065145, "learning_rate": 9.05624759834641e-06, "loss": 17.0756, "step": 12191 }, { "epoch": 0.22285996307602318, "grad_norm": 7.805341729400662, "learning_rate": 9.056074513259742e-06, "loss": 18.2496, "step": 12192 }, { "epoch": 0.2228782422724697, "grad_norm": 7.356348571563792, "learning_rate": 9.055901413956885e-06, "loss": 17.8351, "step": 12193 }, { "epoch": 0.22289652146891623, "grad_norm": 6.1622294015402845, "learning_rate": 9.055728300438445e-06, "loss": 17.1803, "step": 12194 }, { "epoch": 0.22291480066536276, "grad_norm": 6.4323271410674945, "learning_rate": 9.05555517270503e-06, "loss": 17.4918, "step": 12195 }, { "epoch": 0.22293307986180927, "grad_norm": 7.902888915890806, "learning_rate": 9.055382030757244e-06, "loss": 18.0494, "step": 12196 }, { "epoch": 0.2229513590582558, "grad_norm": 14.911929793128163, "learning_rate": 9.0552088745957e-06, "loss": 17.9418, "step": 12197 }, { "epoch": 0.2229696382547023, "grad_norm": 8.190149957916722, "learning_rate": 9.055035704220998e-06, "loss": 18.1531, "step": 12198 }, { "epoch": 0.22298791745114885, "grad_norm": 6.5247192925214375, "learning_rate": 9.054862519633749e-06, "loss": 17.3929, "step": 12199 }, { "epoch": 0.22300619664759538, "grad_norm": 5.65360135672644, "learning_rate": 9.054689320834557e-06, "loss": 16.9583, "step": 12200 }, { "epoch": 0.2230244758440419, "grad_norm": 6.677669658979401, "learning_rate": 9.054516107824031e-06, "loss": 17.7014, "step": 12201 }, { "epoch": 0.22304275504048843, "grad_norm": 6.8350644668940514, "learning_rate": 9.05434288060278e-06, "loss": 17.5601, "step": 12202 }, { "epoch": 0.22306103423693494, "grad_norm": 7.85227969398357, "learning_rate": 9.054169639171407e-06, "loss": 17.7565, "step": 12203 }, { "epoch": 0.22307931343338147, "grad_norm": 6.945638606751802, "learning_rate": 9.05399638353052e-06, "loss": 17.969, "step": 12204 }, { "epoch": 0.223097592629828, "grad_norm": 6.036209937688848, "learning_rate": 9.053823113680731e-06, "loss": 17.3046, "step": 12205 }, { "epoch": 0.22311587182627451, "grad_norm": 6.685623520160601, "learning_rate": 9.053649829622642e-06, "loss": 18.1842, "step": 12206 }, { "epoch": 0.22313415102272105, "grad_norm": 5.7246782458163965, "learning_rate": 9.053476531356861e-06, "loss": 17.2299, "step": 12207 }, { "epoch": 0.22315243021916756, "grad_norm": 7.408503539027606, "learning_rate": 9.053303218883998e-06, "loss": 18.1562, "step": 12208 }, { "epoch": 0.2231707094156141, "grad_norm": 6.427588789991955, "learning_rate": 9.05312989220466e-06, "loss": 17.3499, "step": 12209 }, { "epoch": 0.2231889886120606, "grad_norm": 6.795794685545221, "learning_rate": 9.052956551319452e-06, "loss": 17.3862, "step": 12210 }, { "epoch": 0.22320726780850714, "grad_norm": 7.06147244109122, "learning_rate": 9.052783196228983e-06, "loss": 17.317, "step": 12211 }, { "epoch": 0.22322554700495367, "grad_norm": 6.7282083201337315, "learning_rate": 9.05260982693386e-06, "loss": 17.6491, "step": 12212 }, { "epoch": 0.22324382620140018, "grad_norm": 5.4593299113472495, "learning_rate": 9.05243644343469e-06, "loss": 17.2507, "step": 12213 }, { "epoch": 0.22326210539784672, "grad_norm": 6.109716435642244, "learning_rate": 9.052263045732087e-06, "loss": 17.6383, "step": 12214 }, { "epoch": 0.22328038459429322, "grad_norm": 8.530470453759085, "learning_rate": 9.05208963382665e-06, "loss": 17.5968, "step": 12215 }, { "epoch": 0.22329866379073976, "grad_norm": 6.024941972181284, "learning_rate": 9.05191620771899e-06, "loss": 17.3743, "step": 12216 }, { "epoch": 0.2233169429871863, "grad_norm": 5.780308959116288, "learning_rate": 9.051742767409716e-06, "loss": 16.8978, "step": 12217 }, { "epoch": 0.2233352221836328, "grad_norm": 7.2216651620068735, "learning_rate": 9.051569312899436e-06, "loss": 17.4587, "step": 12218 }, { "epoch": 0.22335350138007934, "grad_norm": 6.794678175019179, "learning_rate": 9.051395844188755e-06, "loss": 17.5773, "step": 12219 }, { "epoch": 0.22337178057652585, "grad_norm": 6.631565450155475, "learning_rate": 9.051222361278286e-06, "loss": 17.563, "step": 12220 }, { "epoch": 0.22339005977297238, "grad_norm": 7.237088734195267, "learning_rate": 9.051048864168632e-06, "loss": 17.6461, "step": 12221 }, { "epoch": 0.22340833896941892, "grad_norm": 5.937021023793204, "learning_rate": 9.050875352860404e-06, "loss": 17.3293, "step": 12222 }, { "epoch": 0.22342661816586543, "grad_norm": 8.196093042757301, "learning_rate": 9.050701827354211e-06, "loss": 17.6361, "step": 12223 }, { "epoch": 0.22344489736231196, "grad_norm": 7.425693450852276, "learning_rate": 9.050528287650657e-06, "loss": 17.8749, "step": 12224 }, { "epoch": 0.22346317655875847, "grad_norm": 5.215058225206581, "learning_rate": 9.050354733750354e-06, "loss": 17.0018, "step": 12225 }, { "epoch": 0.223481455755205, "grad_norm": 5.787247505005862, "learning_rate": 9.05018116565391e-06, "loss": 17.2517, "step": 12226 }, { "epoch": 0.2234997349516515, "grad_norm": 6.5409886645510635, "learning_rate": 9.05000758336193e-06, "loss": 17.6792, "step": 12227 }, { "epoch": 0.22351801414809805, "grad_norm": 8.445251899757077, "learning_rate": 9.049833986875027e-06, "loss": 17.5437, "step": 12228 }, { "epoch": 0.22353629334454458, "grad_norm": 6.821241351948771, "learning_rate": 9.049660376193808e-06, "loss": 17.546, "step": 12229 }, { "epoch": 0.2235545725409911, "grad_norm": 7.823994149490272, "learning_rate": 9.049486751318879e-06, "loss": 17.9458, "step": 12230 }, { "epoch": 0.22357285173743763, "grad_norm": 7.369459589380154, "learning_rate": 9.04931311225085e-06, "loss": 18.0869, "step": 12231 }, { "epoch": 0.22359113093388414, "grad_norm": 7.6500548569966185, "learning_rate": 9.04913945899033e-06, "loss": 17.8878, "step": 12232 }, { "epoch": 0.22360941013033067, "grad_norm": 5.837867098791316, "learning_rate": 9.048965791537929e-06, "loss": 17.042, "step": 12233 }, { "epoch": 0.2236276893267772, "grad_norm": 6.050463393955847, "learning_rate": 9.048792109894253e-06, "loss": 17.1248, "step": 12234 }, { "epoch": 0.22364596852322371, "grad_norm": 8.164927438234022, "learning_rate": 9.048618414059912e-06, "loss": 18.2822, "step": 12235 }, { "epoch": 0.22366424771967025, "grad_norm": 7.768789550138342, "learning_rate": 9.048444704035517e-06, "loss": 18.2395, "step": 12236 }, { "epoch": 0.22368252691611676, "grad_norm": 5.907906867932357, "learning_rate": 9.048270979821673e-06, "loss": 17.217, "step": 12237 }, { "epoch": 0.2237008061125633, "grad_norm": 6.668599665032728, "learning_rate": 9.04809724141899e-06, "loss": 17.6935, "step": 12238 }, { "epoch": 0.22371908530900983, "grad_norm": 6.426863155040541, "learning_rate": 9.047923488828079e-06, "loss": 17.4179, "step": 12239 }, { "epoch": 0.22373736450545634, "grad_norm": 7.489766139729761, "learning_rate": 9.047749722049545e-06, "loss": 17.9008, "step": 12240 }, { "epoch": 0.22375564370190287, "grad_norm": 7.03745251707885, "learning_rate": 9.047575941084002e-06, "loss": 17.8194, "step": 12241 }, { "epoch": 0.22377392289834938, "grad_norm": 8.213014293127971, "learning_rate": 9.047402145932055e-06, "loss": 18.2351, "step": 12242 }, { "epoch": 0.22379220209479592, "grad_norm": 6.809378559607105, "learning_rate": 9.047228336594315e-06, "loss": 17.6302, "step": 12243 }, { "epoch": 0.22381048129124242, "grad_norm": 7.37470269898717, "learning_rate": 9.047054513071391e-06, "loss": 17.9828, "step": 12244 }, { "epoch": 0.22382876048768896, "grad_norm": 11.370524647387033, "learning_rate": 9.046880675363892e-06, "loss": 17.5631, "step": 12245 }, { "epoch": 0.2238470396841355, "grad_norm": 7.798356886013414, "learning_rate": 9.046706823472428e-06, "loss": 18.0636, "step": 12246 }, { "epoch": 0.223865318880582, "grad_norm": 7.08648155761893, "learning_rate": 9.046532957397606e-06, "loss": 17.9467, "step": 12247 }, { "epoch": 0.22388359807702854, "grad_norm": 5.435527706552968, "learning_rate": 9.046359077140039e-06, "loss": 17.0462, "step": 12248 }, { "epoch": 0.22390187727347505, "grad_norm": 6.68293259405961, "learning_rate": 9.046185182700333e-06, "loss": 17.7712, "step": 12249 }, { "epoch": 0.22392015646992158, "grad_norm": 5.368867604488464, "learning_rate": 9.0460112740791e-06, "loss": 17.112, "step": 12250 }, { "epoch": 0.22393843566636812, "grad_norm": 5.77104810479662, "learning_rate": 9.045837351276949e-06, "loss": 17.4235, "step": 12251 }, { "epoch": 0.22395671486281463, "grad_norm": 6.159005156455112, "learning_rate": 9.04566341429449e-06, "loss": 17.606, "step": 12252 }, { "epoch": 0.22397499405926116, "grad_norm": 6.8603474478294215, "learning_rate": 9.04548946313233e-06, "loss": 17.6305, "step": 12253 }, { "epoch": 0.22399327325570767, "grad_norm": 6.893380745047261, "learning_rate": 9.04531549779108e-06, "loss": 17.9018, "step": 12254 }, { "epoch": 0.2240115524521542, "grad_norm": 7.395851504751612, "learning_rate": 9.045141518271352e-06, "loss": 18.0254, "step": 12255 }, { "epoch": 0.22402983164860074, "grad_norm": 7.459036947022944, "learning_rate": 9.044967524573754e-06, "loss": 17.4971, "step": 12256 }, { "epoch": 0.22404811084504725, "grad_norm": 7.4754209209697455, "learning_rate": 9.044793516698894e-06, "loss": 17.8842, "step": 12257 }, { "epoch": 0.22406639004149378, "grad_norm": 6.060827315306458, "learning_rate": 9.044619494647383e-06, "loss": 17.2545, "step": 12258 }, { "epoch": 0.2240846692379403, "grad_norm": 9.437373283210361, "learning_rate": 9.044445458419834e-06, "loss": 18.409, "step": 12259 }, { "epoch": 0.22410294843438683, "grad_norm": 7.125314012595321, "learning_rate": 9.044271408016856e-06, "loss": 17.8622, "step": 12260 }, { "epoch": 0.22412122763083334, "grad_norm": 6.546315968350545, "learning_rate": 9.044097343439055e-06, "loss": 17.2875, "step": 12261 }, { "epoch": 0.22413950682727987, "grad_norm": 6.622025460441975, "learning_rate": 9.043923264687045e-06, "loss": 17.3571, "step": 12262 }, { "epoch": 0.2241577860237264, "grad_norm": 7.090160753684515, "learning_rate": 9.043749171761433e-06, "loss": 17.9867, "step": 12263 }, { "epoch": 0.22417606522017292, "grad_norm": 6.387876368059813, "learning_rate": 9.043575064662833e-06, "loss": 17.4131, "step": 12264 }, { "epoch": 0.22419434441661945, "grad_norm": 5.936893993367334, "learning_rate": 9.043400943391853e-06, "loss": 17.2094, "step": 12265 }, { "epoch": 0.22421262361306596, "grad_norm": 6.055384088286888, "learning_rate": 9.043226807949103e-06, "loss": 17.3158, "step": 12266 }, { "epoch": 0.2242309028095125, "grad_norm": 7.2211548054105, "learning_rate": 9.043052658335195e-06, "loss": 17.7897, "step": 12267 }, { "epoch": 0.22424918200595903, "grad_norm": 6.633570997443187, "learning_rate": 9.042878494550736e-06, "loss": 17.3886, "step": 12268 }, { "epoch": 0.22426746120240554, "grad_norm": 6.899123000691209, "learning_rate": 9.04270431659634e-06, "loss": 17.9087, "step": 12269 }, { "epoch": 0.22428574039885207, "grad_norm": 6.548213007922687, "learning_rate": 9.042530124472617e-06, "loss": 17.3299, "step": 12270 }, { "epoch": 0.22430401959529858, "grad_norm": 6.58743098605056, "learning_rate": 9.042355918180176e-06, "loss": 17.5219, "step": 12271 }, { "epoch": 0.22432229879174512, "grad_norm": 5.714440633680433, "learning_rate": 9.042181697719627e-06, "loss": 17.272, "step": 12272 }, { "epoch": 0.22434057798819165, "grad_norm": 8.0434319343778, "learning_rate": 9.042007463091584e-06, "loss": 18.2251, "step": 12273 }, { "epoch": 0.22435885718463816, "grad_norm": 6.3418512583693545, "learning_rate": 9.041833214296656e-06, "loss": 17.4084, "step": 12274 }, { "epoch": 0.2243771363810847, "grad_norm": 6.476545250193185, "learning_rate": 9.041658951335451e-06, "loss": 17.1878, "step": 12275 }, { "epoch": 0.2243954155775312, "grad_norm": 6.491641118428361, "learning_rate": 9.041484674208584e-06, "loss": 17.5175, "step": 12276 }, { "epoch": 0.22441369477397774, "grad_norm": 7.263383471347915, "learning_rate": 9.041310382916663e-06, "loss": 18.2752, "step": 12277 }, { "epoch": 0.22443197397042425, "grad_norm": 5.8331459930085465, "learning_rate": 9.0411360774603e-06, "loss": 17.2002, "step": 12278 }, { "epoch": 0.22445025316687078, "grad_norm": 5.8998655177738515, "learning_rate": 9.040961757840105e-06, "loss": 17.4966, "step": 12279 }, { "epoch": 0.22446853236331732, "grad_norm": 6.678973847779486, "learning_rate": 9.04078742405669e-06, "loss": 17.5399, "step": 12280 }, { "epoch": 0.22448681155976383, "grad_norm": 8.828607081786217, "learning_rate": 9.040613076110667e-06, "loss": 18.1534, "step": 12281 }, { "epoch": 0.22450509075621036, "grad_norm": 7.563487311099231, "learning_rate": 9.040438714002645e-06, "loss": 18.0516, "step": 12282 }, { "epoch": 0.22452336995265687, "grad_norm": 6.668099309745011, "learning_rate": 9.040264337733236e-06, "loss": 17.7034, "step": 12283 }, { "epoch": 0.2245416491491034, "grad_norm": 7.098032500757102, "learning_rate": 9.04008994730305e-06, "loss": 17.812, "step": 12284 }, { "epoch": 0.22455992834554994, "grad_norm": 6.616043707524381, "learning_rate": 9.0399155427127e-06, "loss": 17.7823, "step": 12285 }, { "epoch": 0.22457820754199645, "grad_norm": 7.516322271528726, "learning_rate": 9.039741123962797e-06, "loss": 17.8506, "step": 12286 }, { "epoch": 0.22459648673844299, "grad_norm": 7.694205402970119, "learning_rate": 9.039566691053952e-06, "loss": 18.3337, "step": 12287 }, { "epoch": 0.2246147659348895, "grad_norm": 7.268622837670842, "learning_rate": 9.039392243986775e-06, "loss": 17.6922, "step": 12288 }, { "epoch": 0.22463304513133603, "grad_norm": 6.224198191373773, "learning_rate": 9.03921778276188e-06, "loss": 17.4293, "step": 12289 }, { "epoch": 0.22465132432778256, "grad_norm": 6.813770995982024, "learning_rate": 9.039043307379878e-06, "loss": 17.2797, "step": 12290 }, { "epoch": 0.22466960352422907, "grad_norm": 7.772614195152835, "learning_rate": 9.038868817841378e-06, "loss": 17.9302, "step": 12291 }, { "epoch": 0.2246878827206756, "grad_norm": 6.295833759594762, "learning_rate": 9.038694314146994e-06, "loss": 17.3379, "step": 12292 }, { "epoch": 0.22470616191712212, "grad_norm": 6.433483661840641, "learning_rate": 9.038519796297336e-06, "loss": 17.4104, "step": 12293 }, { "epoch": 0.22472444111356865, "grad_norm": 6.797406734423923, "learning_rate": 9.038345264293019e-06, "loss": 17.5433, "step": 12294 }, { "epoch": 0.22474272031001516, "grad_norm": 8.778103723823701, "learning_rate": 9.038170718134649e-06, "loss": 18.4851, "step": 12295 }, { "epoch": 0.2247609995064617, "grad_norm": 7.7645876826132225, "learning_rate": 9.037996157822843e-06, "loss": 17.9773, "step": 12296 }, { "epoch": 0.22477927870290823, "grad_norm": 7.250220959212541, "learning_rate": 9.03782158335821e-06, "loss": 17.9881, "step": 12297 }, { "epoch": 0.22479755789935474, "grad_norm": 6.823593498163983, "learning_rate": 9.037646994741362e-06, "loss": 17.6734, "step": 12298 }, { "epoch": 0.22481583709580127, "grad_norm": 6.678320920993797, "learning_rate": 9.037472391972915e-06, "loss": 17.838, "step": 12299 }, { "epoch": 0.22483411629224778, "grad_norm": 6.64005663537683, "learning_rate": 9.037297775053476e-06, "loss": 17.5277, "step": 12300 }, { "epoch": 0.22485239548869432, "grad_norm": 6.95275096027094, "learning_rate": 9.037123143983658e-06, "loss": 17.8921, "step": 12301 }, { "epoch": 0.22487067468514085, "grad_norm": 7.228429314890307, "learning_rate": 9.036948498764071e-06, "loss": 17.8771, "step": 12302 }, { "epoch": 0.22488895388158736, "grad_norm": 7.130853561770472, "learning_rate": 9.036773839395335e-06, "loss": 17.9073, "step": 12303 }, { "epoch": 0.2249072330780339, "grad_norm": 7.2958135662705255, "learning_rate": 9.036599165878053e-06, "loss": 17.8241, "step": 12304 }, { "epoch": 0.2249255122744804, "grad_norm": 6.491199829661077, "learning_rate": 9.036424478212843e-06, "loss": 17.3875, "step": 12305 }, { "epoch": 0.22494379147092694, "grad_norm": 7.487583887635919, "learning_rate": 9.036249776400317e-06, "loss": 17.8521, "step": 12306 }, { "epoch": 0.22496207066737348, "grad_norm": 5.91990847320521, "learning_rate": 9.036075060441083e-06, "loss": 17.1465, "step": 12307 }, { "epoch": 0.22498034986381998, "grad_norm": 16.952346676832793, "learning_rate": 9.035900330335757e-06, "loss": 17.8836, "step": 12308 }, { "epoch": 0.22499862906026652, "grad_norm": 6.955858301762353, "learning_rate": 9.035725586084951e-06, "loss": 17.3048, "step": 12309 }, { "epoch": 0.22501690825671303, "grad_norm": 7.941360770280564, "learning_rate": 9.035550827689276e-06, "loss": 18.2653, "step": 12310 }, { "epoch": 0.22503518745315956, "grad_norm": 8.13819670099608, "learning_rate": 9.035376055149347e-06, "loss": 18.0662, "step": 12311 }, { "epoch": 0.22505346664960607, "grad_norm": 7.437032540865966, "learning_rate": 9.035201268465774e-06, "loss": 17.7133, "step": 12312 }, { "epoch": 0.2250717458460526, "grad_norm": 8.855246124244507, "learning_rate": 9.035026467639172e-06, "loss": 18.5506, "step": 12313 }, { "epoch": 0.22509002504249914, "grad_norm": 7.074037704114128, "learning_rate": 9.034851652670151e-06, "loss": 17.9627, "step": 12314 }, { "epoch": 0.22510830423894565, "grad_norm": 7.773871078629837, "learning_rate": 9.034676823559326e-06, "loss": 17.9938, "step": 12315 }, { "epoch": 0.22512658343539219, "grad_norm": 7.41166943339688, "learning_rate": 9.034501980307309e-06, "loss": 17.877, "step": 12316 }, { "epoch": 0.2251448626318387, "grad_norm": 7.703758649739981, "learning_rate": 9.034327122914711e-06, "loss": 17.9387, "step": 12317 }, { "epoch": 0.22516314182828523, "grad_norm": 6.287787631773602, "learning_rate": 9.034152251382148e-06, "loss": 17.5248, "step": 12318 }, { "epoch": 0.22518142102473176, "grad_norm": 5.991404866001102, "learning_rate": 9.033977365710231e-06, "loss": 17.175, "step": 12319 }, { "epoch": 0.22519970022117827, "grad_norm": 6.737952129673305, "learning_rate": 9.033802465899573e-06, "loss": 17.4521, "step": 12320 }, { "epoch": 0.2252179794176248, "grad_norm": 6.635142538134955, "learning_rate": 9.033627551950788e-06, "loss": 17.5758, "step": 12321 }, { "epoch": 0.22523625861407132, "grad_norm": 6.187063928254991, "learning_rate": 9.03345262386449e-06, "loss": 17.3834, "step": 12322 }, { "epoch": 0.22525453781051785, "grad_norm": 6.854516115810566, "learning_rate": 9.033277681641288e-06, "loss": 17.5332, "step": 12323 }, { "epoch": 0.2252728170069644, "grad_norm": 7.3160421278758445, "learning_rate": 9.033102725281799e-06, "loss": 18.0384, "step": 12324 }, { "epoch": 0.2252910962034109, "grad_norm": 5.726426405407629, "learning_rate": 9.032927754786633e-06, "loss": 17.1843, "step": 12325 }, { "epoch": 0.22530937539985743, "grad_norm": 6.094053491340705, "learning_rate": 9.032752770156408e-06, "loss": 17.4087, "step": 12326 }, { "epoch": 0.22532765459630394, "grad_norm": 7.677299805986886, "learning_rate": 9.032577771391732e-06, "loss": 18.114, "step": 12327 }, { "epoch": 0.22534593379275047, "grad_norm": 6.226787280353511, "learning_rate": 9.032402758493222e-06, "loss": 17.5154, "step": 12328 }, { "epoch": 0.22536421298919698, "grad_norm": 7.993772075133782, "learning_rate": 9.032227731461492e-06, "loss": 18.0567, "step": 12329 }, { "epoch": 0.22538249218564352, "grad_norm": 6.667197143954941, "learning_rate": 9.03205269029715e-06, "loss": 17.7498, "step": 12330 }, { "epoch": 0.22540077138209005, "grad_norm": 6.9567092826670915, "learning_rate": 9.031877635000817e-06, "loss": 17.7729, "step": 12331 }, { "epoch": 0.22541905057853656, "grad_norm": 6.854502695810457, "learning_rate": 9.0317025655731e-06, "loss": 17.7204, "step": 12332 }, { "epoch": 0.2254373297749831, "grad_norm": 6.71738812866057, "learning_rate": 9.031527482014617e-06, "loss": 17.5618, "step": 12333 }, { "epoch": 0.2254556089714296, "grad_norm": 7.189306010353928, "learning_rate": 9.031352384325977e-06, "loss": 17.6072, "step": 12334 }, { "epoch": 0.22547388816787614, "grad_norm": 7.855535097029646, "learning_rate": 9.0311772725078e-06, "loss": 17.5918, "step": 12335 }, { "epoch": 0.22549216736432268, "grad_norm": 6.600393763396325, "learning_rate": 9.031002146560697e-06, "loss": 17.3361, "step": 12336 }, { "epoch": 0.22551044656076918, "grad_norm": 6.748223412611591, "learning_rate": 9.03082700648528e-06, "loss": 17.6995, "step": 12337 }, { "epoch": 0.22552872575721572, "grad_norm": 5.70658566896244, "learning_rate": 9.030651852282164e-06, "loss": 17.1565, "step": 12338 }, { "epoch": 0.22554700495366223, "grad_norm": 6.576046686350633, "learning_rate": 9.030476683951961e-06, "loss": 17.6812, "step": 12339 }, { "epoch": 0.22556528415010876, "grad_norm": 6.747470316806803, "learning_rate": 9.03030150149529e-06, "loss": 17.6554, "step": 12340 }, { "epoch": 0.2255835633465553, "grad_norm": 6.812748807744925, "learning_rate": 9.03012630491276e-06, "loss": 17.8052, "step": 12341 }, { "epoch": 0.2256018425430018, "grad_norm": 6.138326043650591, "learning_rate": 9.029951094204988e-06, "loss": 17.3363, "step": 12342 }, { "epoch": 0.22562012173944834, "grad_norm": 6.25429209699059, "learning_rate": 9.029775869372589e-06, "loss": 17.4272, "step": 12343 }, { "epoch": 0.22563840093589485, "grad_norm": 8.214937102579777, "learning_rate": 9.029600630416171e-06, "loss": 18.3377, "step": 12344 }, { "epoch": 0.2256566801323414, "grad_norm": 6.270352054936354, "learning_rate": 9.029425377336356e-06, "loss": 17.568, "step": 12345 }, { "epoch": 0.2256749593287879, "grad_norm": 5.901018463275873, "learning_rate": 9.029250110133753e-06, "loss": 17.266, "step": 12346 }, { "epoch": 0.22569323852523443, "grad_norm": 6.701032995897633, "learning_rate": 9.02907482880898e-06, "loss": 18.0243, "step": 12347 }, { "epoch": 0.22571151772168097, "grad_norm": 6.966690579019389, "learning_rate": 9.028899533362645e-06, "loss": 17.8246, "step": 12348 }, { "epoch": 0.22572979691812747, "grad_norm": 7.11109379819678, "learning_rate": 9.02872422379537e-06, "loss": 17.9911, "step": 12349 }, { "epoch": 0.225748076114574, "grad_norm": 7.267833352735575, "learning_rate": 9.028548900107767e-06, "loss": 17.9534, "step": 12350 }, { "epoch": 0.22576635531102052, "grad_norm": 6.169634868324523, "learning_rate": 9.028373562300448e-06, "loss": 17.2552, "step": 12351 }, { "epoch": 0.22578463450746705, "grad_norm": 6.5126285468693474, "learning_rate": 9.02819821037403e-06, "loss": 17.5073, "step": 12352 }, { "epoch": 0.2258029137039136, "grad_norm": 7.21379651562743, "learning_rate": 9.028022844329126e-06, "loss": 17.9977, "step": 12353 }, { "epoch": 0.2258211929003601, "grad_norm": 6.5548063039005315, "learning_rate": 9.027847464166353e-06, "loss": 17.8834, "step": 12354 }, { "epoch": 0.22583947209680663, "grad_norm": 7.08098150183937, "learning_rate": 9.027672069886322e-06, "loss": 17.8187, "step": 12355 }, { "epoch": 0.22585775129325314, "grad_norm": 7.038826231934636, "learning_rate": 9.02749666148965e-06, "loss": 17.9689, "step": 12356 }, { "epoch": 0.22587603048969968, "grad_norm": 7.07682686405181, "learning_rate": 9.027321238976954e-06, "loss": 17.8265, "step": 12357 }, { "epoch": 0.2258943096861462, "grad_norm": 7.530074738438792, "learning_rate": 9.027145802348844e-06, "loss": 17.7535, "step": 12358 }, { "epoch": 0.22591258888259272, "grad_norm": 6.446825344866139, "learning_rate": 9.02697035160594e-06, "loss": 17.585, "step": 12359 }, { "epoch": 0.22593086807903925, "grad_norm": 7.637084923790986, "learning_rate": 9.026794886748853e-06, "loss": 18.0022, "step": 12360 }, { "epoch": 0.22594914727548576, "grad_norm": 6.987077479178613, "learning_rate": 9.0266194077782e-06, "loss": 17.7459, "step": 12361 }, { "epoch": 0.2259674264719323, "grad_norm": 6.824769412214328, "learning_rate": 9.026443914694594e-06, "loss": 17.9237, "step": 12362 }, { "epoch": 0.2259857056683788, "grad_norm": 6.348585215954245, "learning_rate": 9.026268407498651e-06, "loss": 17.421, "step": 12363 }, { "epoch": 0.22600398486482534, "grad_norm": 6.102880320893344, "learning_rate": 9.026092886190989e-06, "loss": 17.4555, "step": 12364 }, { "epoch": 0.22602226406127188, "grad_norm": 6.55237701023886, "learning_rate": 9.02591735077222e-06, "loss": 17.4844, "step": 12365 }, { "epoch": 0.22604054325771838, "grad_norm": 6.144109160099827, "learning_rate": 9.025741801242959e-06, "loss": 17.6064, "step": 12366 }, { "epoch": 0.22605882245416492, "grad_norm": 7.027676643650388, "learning_rate": 9.025566237603822e-06, "loss": 17.5832, "step": 12367 }, { "epoch": 0.22607710165061143, "grad_norm": 6.0564480838712775, "learning_rate": 9.025390659855426e-06, "loss": 17.2484, "step": 12368 }, { "epoch": 0.22609538084705796, "grad_norm": 6.531320813353044, "learning_rate": 9.025215067998386e-06, "loss": 17.6086, "step": 12369 }, { "epoch": 0.2261136600435045, "grad_norm": 6.95048430465292, "learning_rate": 9.025039462033314e-06, "loss": 17.643, "step": 12370 }, { "epoch": 0.226131939239951, "grad_norm": 7.44620805011939, "learning_rate": 9.024863841960829e-06, "loss": 17.6497, "step": 12371 }, { "epoch": 0.22615021843639754, "grad_norm": 8.917362870062181, "learning_rate": 9.024688207781547e-06, "loss": 18.8994, "step": 12372 }, { "epoch": 0.22616849763284405, "grad_norm": 7.280697209067718, "learning_rate": 9.02451255949608e-06, "loss": 17.5772, "step": 12373 }, { "epoch": 0.2261867768292906, "grad_norm": 6.702853337006878, "learning_rate": 9.024336897105045e-06, "loss": 17.9045, "step": 12374 }, { "epoch": 0.22620505602573712, "grad_norm": 6.547542319992636, "learning_rate": 9.024161220609061e-06, "loss": 17.392, "step": 12375 }, { "epoch": 0.22622333522218363, "grad_norm": 6.929016110654037, "learning_rate": 9.023985530008742e-06, "loss": 17.8594, "step": 12376 }, { "epoch": 0.22624161441863017, "grad_norm": 5.367073952459153, "learning_rate": 9.023809825304698e-06, "loss": 17.1462, "step": 12377 }, { "epoch": 0.22625989361507667, "grad_norm": 7.988439728945888, "learning_rate": 9.023634106497555e-06, "loss": 17.9267, "step": 12378 }, { "epoch": 0.2262781728115232, "grad_norm": 7.494605964344144, "learning_rate": 9.02345837358792e-06, "loss": 18.0292, "step": 12379 }, { "epoch": 0.22629645200796972, "grad_norm": 5.4240483208814, "learning_rate": 9.023282626576413e-06, "loss": 17.0903, "step": 12380 }, { "epoch": 0.22631473120441625, "grad_norm": 7.704682973963291, "learning_rate": 9.02310686546365e-06, "loss": 17.8397, "step": 12381 }, { "epoch": 0.2263330104008628, "grad_norm": 6.395365992338035, "learning_rate": 9.022931090250247e-06, "loss": 17.623, "step": 12382 }, { "epoch": 0.2263512895973093, "grad_norm": 6.859183186820666, "learning_rate": 9.022755300936821e-06, "loss": 17.7596, "step": 12383 }, { "epoch": 0.22636956879375583, "grad_norm": 5.765596472141772, "learning_rate": 9.022579497523985e-06, "loss": 17.1133, "step": 12384 }, { "epoch": 0.22638784799020234, "grad_norm": 7.140007933470052, "learning_rate": 9.022403680012357e-06, "loss": 17.7501, "step": 12385 }, { "epoch": 0.22640612718664888, "grad_norm": 6.718543300536023, "learning_rate": 9.022227848402552e-06, "loss": 17.6706, "step": 12386 }, { "epoch": 0.2264244063830954, "grad_norm": 5.924532017217724, "learning_rate": 9.02205200269519e-06, "loss": 17.33, "step": 12387 }, { "epoch": 0.22644268557954192, "grad_norm": 8.222872391505957, "learning_rate": 9.021876142890882e-06, "loss": 18.6182, "step": 12388 }, { "epoch": 0.22646096477598845, "grad_norm": 6.734366446496459, "learning_rate": 9.02170026899025e-06, "loss": 17.9106, "step": 12389 }, { "epoch": 0.22647924397243496, "grad_norm": 6.8857777869063055, "learning_rate": 9.021524380993906e-06, "loss": 17.7422, "step": 12390 }, { "epoch": 0.2264975231688815, "grad_norm": 7.125279116793899, "learning_rate": 9.021348478902468e-06, "loss": 17.674, "step": 12391 }, { "epoch": 0.22651580236532803, "grad_norm": 7.436442051858977, "learning_rate": 9.021172562716551e-06, "loss": 17.9451, "step": 12392 }, { "epoch": 0.22653408156177454, "grad_norm": 6.187236880627777, "learning_rate": 9.020996632436775e-06, "loss": 17.5915, "step": 12393 }, { "epoch": 0.22655236075822108, "grad_norm": 5.903576888005673, "learning_rate": 9.020820688063755e-06, "loss": 17.2732, "step": 12394 }, { "epoch": 0.22657063995466759, "grad_norm": 6.413378047315744, "learning_rate": 9.020644729598107e-06, "loss": 17.5233, "step": 12395 }, { "epoch": 0.22658891915111412, "grad_norm": 6.3484233436312, "learning_rate": 9.020468757040449e-06, "loss": 17.7247, "step": 12396 }, { "epoch": 0.22660719834756063, "grad_norm": 5.911804654071197, "learning_rate": 9.020292770391394e-06, "loss": 17.2241, "step": 12397 }, { "epoch": 0.22662547754400716, "grad_norm": 6.724395222585753, "learning_rate": 9.020116769651565e-06, "loss": 17.6779, "step": 12398 }, { "epoch": 0.2266437567404537, "grad_norm": 7.966132708749416, "learning_rate": 9.019940754821574e-06, "loss": 17.897, "step": 12399 }, { "epoch": 0.2266620359369002, "grad_norm": 8.099072340263199, "learning_rate": 9.01976472590204e-06, "loss": 18.1186, "step": 12400 }, { "epoch": 0.22668031513334674, "grad_norm": 5.802227930996529, "learning_rate": 9.01958868289358e-06, "loss": 17.2699, "step": 12401 }, { "epoch": 0.22669859432979325, "grad_norm": 6.146892512705219, "learning_rate": 9.019412625796808e-06, "loss": 17.5141, "step": 12402 }, { "epoch": 0.2267168735262398, "grad_norm": 6.308607104957777, "learning_rate": 9.019236554612346e-06, "loss": 17.3927, "step": 12403 }, { "epoch": 0.22673515272268632, "grad_norm": 8.704926628000624, "learning_rate": 9.019060469340807e-06, "loss": 18.2199, "step": 12404 }, { "epoch": 0.22675343191913283, "grad_norm": 6.7601731358840516, "learning_rate": 9.01888436998281e-06, "loss": 17.7247, "step": 12405 }, { "epoch": 0.22677171111557937, "grad_norm": 7.0988439140350135, "learning_rate": 9.018708256538972e-06, "loss": 17.521, "step": 12406 }, { "epoch": 0.22678999031202587, "grad_norm": 6.072413155833133, "learning_rate": 9.018532129009912e-06, "loss": 17.3915, "step": 12407 }, { "epoch": 0.2268082695084724, "grad_norm": 7.826093537676457, "learning_rate": 9.018355987396244e-06, "loss": 17.8245, "step": 12408 }, { "epoch": 0.22682654870491895, "grad_norm": 6.606313446617124, "learning_rate": 9.018179831698588e-06, "loss": 17.714, "step": 12409 }, { "epoch": 0.22684482790136545, "grad_norm": 8.096086364572532, "learning_rate": 9.01800366191756e-06, "loss": 17.9681, "step": 12410 }, { "epoch": 0.226863107097812, "grad_norm": 6.754049321361079, "learning_rate": 9.017827478053778e-06, "loss": 17.6225, "step": 12411 }, { "epoch": 0.2268813862942585, "grad_norm": 5.726172916773858, "learning_rate": 9.017651280107859e-06, "loss": 17.231, "step": 12412 }, { "epoch": 0.22689966549070503, "grad_norm": 7.080996932121469, "learning_rate": 9.01747506808042e-06, "loss": 17.7147, "step": 12413 }, { "epoch": 0.22691794468715154, "grad_norm": 7.132248945901099, "learning_rate": 9.017298841972082e-06, "loss": 17.9743, "step": 12414 }, { "epoch": 0.22693622388359808, "grad_norm": 5.890459474635145, "learning_rate": 9.017122601783457e-06, "loss": 17.2655, "step": 12415 }, { "epoch": 0.2269545030800446, "grad_norm": 7.9925754588042395, "learning_rate": 9.016946347515168e-06, "loss": 17.7998, "step": 12416 }, { "epoch": 0.22697278227649112, "grad_norm": 6.339927451707241, "learning_rate": 9.016770079167829e-06, "loss": 17.4274, "step": 12417 }, { "epoch": 0.22699106147293766, "grad_norm": 6.3839981397111, "learning_rate": 9.016593796742062e-06, "loss": 17.5346, "step": 12418 }, { "epoch": 0.22700934066938416, "grad_norm": 6.714476029999333, "learning_rate": 9.01641750023848e-06, "loss": 17.6582, "step": 12419 }, { "epoch": 0.2270276198658307, "grad_norm": 8.259896179033946, "learning_rate": 9.016241189657705e-06, "loss": 18.2777, "step": 12420 }, { "epoch": 0.22704589906227723, "grad_norm": 6.835530238088462, "learning_rate": 9.01606486500035e-06, "loss": 17.698, "step": 12421 }, { "epoch": 0.22706417825872374, "grad_norm": 5.3640041829032, "learning_rate": 9.015888526267039e-06, "loss": 17.0343, "step": 12422 }, { "epoch": 0.22708245745517028, "grad_norm": 6.968767675137546, "learning_rate": 9.015712173458387e-06, "loss": 17.7252, "step": 12423 }, { "epoch": 0.22710073665161679, "grad_norm": 7.748829080671614, "learning_rate": 9.01553580657501e-06, "loss": 17.9642, "step": 12424 }, { "epoch": 0.22711901584806332, "grad_norm": 6.653206515480309, "learning_rate": 9.015359425617532e-06, "loss": 17.6293, "step": 12425 }, { "epoch": 0.22713729504450986, "grad_norm": 6.874471388442792, "learning_rate": 9.015183030586565e-06, "loss": 17.8499, "step": 12426 }, { "epoch": 0.22715557424095636, "grad_norm": 6.867013574778568, "learning_rate": 9.015006621482731e-06, "loss": 17.6569, "step": 12427 }, { "epoch": 0.2271738534374029, "grad_norm": 7.165359716422239, "learning_rate": 9.014830198306648e-06, "loss": 18.0228, "step": 12428 }, { "epoch": 0.2271921326338494, "grad_norm": 7.550212146351529, "learning_rate": 9.014653761058932e-06, "loss": 17.9426, "step": 12429 }, { "epoch": 0.22721041183029594, "grad_norm": 5.286133948250526, "learning_rate": 9.014477309740203e-06, "loss": 17.0602, "step": 12430 }, { "epoch": 0.22722869102674245, "grad_norm": 7.310267374318483, "learning_rate": 9.014300844351081e-06, "loss": 17.6793, "step": 12431 }, { "epoch": 0.227246970223189, "grad_norm": 6.601547178421724, "learning_rate": 9.014124364892181e-06, "loss": 17.8393, "step": 12432 }, { "epoch": 0.22726524941963552, "grad_norm": 5.8132377945373905, "learning_rate": 9.013947871364123e-06, "loss": 17.1707, "step": 12433 }, { "epoch": 0.22728352861608203, "grad_norm": 6.378790816808448, "learning_rate": 9.013771363767527e-06, "loss": 17.482, "step": 12434 }, { "epoch": 0.22730180781252857, "grad_norm": 7.174213886845487, "learning_rate": 9.013594842103012e-06, "loss": 17.3914, "step": 12435 }, { "epoch": 0.22732008700897507, "grad_norm": 7.168202336406966, "learning_rate": 9.013418306371194e-06, "loss": 17.7358, "step": 12436 }, { "epoch": 0.2273383662054216, "grad_norm": 6.661420616944232, "learning_rate": 9.013241756572692e-06, "loss": 17.6339, "step": 12437 }, { "epoch": 0.22735664540186815, "grad_norm": 7.098420385874402, "learning_rate": 9.013065192708128e-06, "loss": 17.7022, "step": 12438 }, { "epoch": 0.22737492459831465, "grad_norm": 5.441501533007861, "learning_rate": 9.01288861477812e-06, "loss": 16.9966, "step": 12439 }, { "epoch": 0.2273932037947612, "grad_norm": 7.681352001784799, "learning_rate": 9.012712022783283e-06, "loss": 17.913, "step": 12440 }, { "epoch": 0.2274114829912077, "grad_norm": 8.949349796420888, "learning_rate": 9.012535416724238e-06, "loss": 18.6086, "step": 12441 }, { "epoch": 0.22742976218765423, "grad_norm": 7.739705134628737, "learning_rate": 9.012358796601605e-06, "loss": 18.1888, "step": 12442 }, { "epoch": 0.22744804138410077, "grad_norm": 7.583475393017301, "learning_rate": 9.012182162416003e-06, "loss": 17.5618, "step": 12443 }, { "epoch": 0.22746632058054728, "grad_norm": 7.548928680040296, "learning_rate": 9.012005514168052e-06, "loss": 17.7478, "step": 12444 }, { "epoch": 0.2274845997769938, "grad_norm": 7.358591726462837, "learning_rate": 9.01182885185837e-06, "loss": 17.736, "step": 12445 }, { "epoch": 0.22750287897344032, "grad_norm": 5.2893363235863395, "learning_rate": 9.011652175487574e-06, "loss": 16.9848, "step": 12446 }, { "epoch": 0.22752115816988686, "grad_norm": 6.65548419118315, "learning_rate": 9.011475485056285e-06, "loss": 17.7805, "step": 12447 }, { "epoch": 0.22753943736633336, "grad_norm": 7.441514325461889, "learning_rate": 9.011298780565124e-06, "loss": 17.8744, "step": 12448 }, { "epoch": 0.2275577165627799, "grad_norm": 6.741444606272991, "learning_rate": 9.011122062014709e-06, "loss": 17.8172, "step": 12449 }, { "epoch": 0.22757599575922643, "grad_norm": 8.334131899863701, "learning_rate": 9.010945329405658e-06, "loss": 18.2828, "step": 12450 }, { "epoch": 0.22759427495567294, "grad_norm": 7.322963341330085, "learning_rate": 9.010768582738592e-06, "loss": 17.596, "step": 12451 }, { "epoch": 0.22761255415211948, "grad_norm": 6.027285555663227, "learning_rate": 9.01059182201413e-06, "loss": 17.4733, "step": 12452 }, { "epoch": 0.227630833348566, "grad_norm": 5.202836623511983, "learning_rate": 9.010415047232894e-06, "loss": 17.0539, "step": 12453 }, { "epoch": 0.22764911254501252, "grad_norm": 6.665075150072932, "learning_rate": 9.010238258395498e-06, "loss": 17.7745, "step": 12454 }, { "epoch": 0.22766739174145906, "grad_norm": 8.423251777382154, "learning_rate": 9.010061455502567e-06, "loss": 18.1023, "step": 12455 }, { "epoch": 0.22768567093790557, "grad_norm": 6.547059997079752, "learning_rate": 9.009884638554718e-06, "loss": 17.5038, "step": 12456 }, { "epoch": 0.2277039501343521, "grad_norm": 5.767718578287258, "learning_rate": 9.00970780755257e-06, "loss": 17.2456, "step": 12457 }, { "epoch": 0.2277222293307986, "grad_norm": 7.283933715684057, "learning_rate": 9.009530962496746e-06, "loss": 18.127, "step": 12458 }, { "epoch": 0.22774050852724514, "grad_norm": 5.845148615638448, "learning_rate": 9.009354103387864e-06, "loss": 17.2741, "step": 12459 }, { "epoch": 0.22775878772369168, "grad_norm": 7.51111630909009, "learning_rate": 9.009177230226542e-06, "loss": 18.3568, "step": 12460 }, { "epoch": 0.2277770669201382, "grad_norm": 7.1047743057025095, "learning_rate": 9.009000343013403e-06, "loss": 17.9295, "step": 12461 }, { "epoch": 0.22779534611658472, "grad_norm": 7.938009218971499, "learning_rate": 9.008823441749067e-06, "loss": 17.8752, "step": 12462 }, { "epoch": 0.22781362531303123, "grad_norm": 7.037058314775159, "learning_rate": 9.008646526434151e-06, "loss": 17.6727, "step": 12463 }, { "epoch": 0.22783190450947777, "grad_norm": 6.121077682756427, "learning_rate": 9.008469597069276e-06, "loss": 17.6428, "step": 12464 }, { "epoch": 0.22785018370592428, "grad_norm": 6.593101398863978, "learning_rate": 9.008292653655064e-06, "loss": 17.5556, "step": 12465 }, { "epoch": 0.2278684629023708, "grad_norm": 7.31502964129193, "learning_rate": 9.008115696192133e-06, "loss": 18.0551, "step": 12466 }, { "epoch": 0.22788674209881735, "grad_norm": 6.146276626518445, "learning_rate": 9.007938724681106e-06, "loss": 17.4062, "step": 12467 }, { "epoch": 0.22790502129526385, "grad_norm": 5.469743171696297, "learning_rate": 9.0077617391226e-06, "loss": 17.1875, "step": 12468 }, { "epoch": 0.2279233004917104, "grad_norm": 7.813582794895173, "learning_rate": 9.007584739517237e-06, "loss": 18.0963, "step": 12469 }, { "epoch": 0.2279415796881569, "grad_norm": 7.550237307960634, "learning_rate": 9.007407725865638e-06, "loss": 17.8713, "step": 12470 }, { "epoch": 0.22795985888460343, "grad_norm": 6.20967047309663, "learning_rate": 9.007230698168422e-06, "loss": 17.3119, "step": 12471 }, { "epoch": 0.22797813808104997, "grad_norm": 6.346006043995053, "learning_rate": 9.007053656426213e-06, "loss": 17.5279, "step": 12472 }, { "epoch": 0.22799641727749648, "grad_norm": 6.260159084648033, "learning_rate": 9.006876600639624e-06, "loss": 17.5299, "step": 12473 }, { "epoch": 0.228014696473943, "grad_norm": 6.323058036066158, "learning_rate": 9.006699530809284e-06, "loss": 17.4276, "step": 12474 }, { "epoch": 0.22803297567038952, "grad_norm": 6.19932219235715, "learning_rate": 9.006522446935807e-06, "loss": 17.4824, "step": 12475 }, { "epoch": 0.22805125486683606, "grad_norm": 7.865928942197825, "learning_rate": 9.006345349019818e-06, "loss": 18.0053, "step": 12476 }, { "epoch": 0.2280695340632826, "grad_norm": 6.363254127694012, "learning_rate": 9.006168237061936e-06, "loss": 17.3546, "step": 12477 }, { "epoch": 0.2280878132597291, "grad_norm": 5.913155834157607, "learning_rate": 9.005991111062782e-06, "loss": 17.4868, "step": 12478 }, { "epoch": 0.22810609245617564, "grad_norm": 7.065934559973342, "learning_rate": 9.005813971022977e-06, "loss": 17.7804, "step": 12479 }, { "epoch": 0.22812437165262214, "grad_norm": 6.476520955100705, "learning_rate": 9.005636816943141e-06, "loss": 17.4125, "step": 12480 }, { "epoch": 0.22814265084906868, "grad_norm": 6.216250226614293, "learning_rate": 9.005459648823897e-06, "loss": 17.3022, "step": 12481 }, { "epoch": 0.2281609300455152, "grad_norm": 6.116706968470408, "learning_rate": 9.005282466665864e-06, "loss": 17.5586, "step": 12482 }, { "epoch": 0.22817920924196172, "grad_norm": 6.999228409098107, "learning_rate": 9.005105270469663e-06, "loss": 17.8334, "step": 12483 }, { "epoch": 0.22819748843840826, "grad_norm": 7.125612598589452, "learning_rate": 9.004928060235915e-06, "loss": 17.7083, "step": 12484 }, { "epoch": 0.22821576763485477, "grad_norm": 7.8696084480006085, "learning_rate": 9.004750835965241e-06, "loss": 18.3996, "step": 12485 }, { "epoch": 0.2282340468313013, "grad_norm": 6.2676428832425, "learning_rate": 9.004573597658265e-06, "loss": 17.313, "step": 12486 }, { "epoch": 0.2282523260277478, "grad_norm": 7.765047865395796, "learning_rate": 9.004396345315604e-06, "loss": 17.9367, "step": 12487 }, { "epoch": 0.22827060522419435, "grad_norm": 6.9273134841788035, "learning_rate": 9.004219078937883e-06, "loss": 17.5964, "step": 12488 }, { "epoch": 0.22828888442064088, "grad_norm": 6.450689977922569, "learning_rate": 9.004041798525723e-06, "loss": 17.374, "step": 12489 }, { "epoch": 0.2283071636170874, "grad_norm": 5.876191355914733, "learning_rate": 9.00386450407974e-06, "loss": 17.3217, "step": 12490 }, { "epoch": 0.22832544281353392, "grad_norm": 6.824802720193081, "learning_rate": 9.003687195600561e-06, "loss": 17.6096, "step": 12491 }, { "epoch": 0.22834372200998043, "grad_norm": 6.326670893919381, "learning_rate": 9.003509873088806e-06, "loss": 17.4869, "step": 12492 }, { "epoch": 0.22836200120642697, "grad_norm": 6.080616837358299, "learning_rate": 9.003332536545097e-06, "loss": 17.6232, "step": 12493 }, { "epoch": 0.2283802804028735, "grad_norm": 6.481898583726981, "learning_rate": 9.003155185970055e-06, "loss": 17.6116, "step": 12494 }, { "epoch": 0.22839855959932, "grad_norm": 7.2780683463881495, "learning_rate": 9.0029778213643e-06, "loss": 17.7783, "step": 12495 }, { "epoch": 0.22841683879576655, "grad_norm": 7.187827518993483, "learning_rate": 9.002800442728456e-06, "loss": 17.9959, "step": 12496 }, { "epoch": 0.22843511799221305, "grad_norm": 6.31378587333316, "learning_rate": 9.002623050063144e-06, "loss": 17.3304, "step": 12497 }, { "epoch": 0.2284533971886596, "grad_norm": 6.371838918299203, "learning_rate": 9.002445643368985e-06, "loss": 17.5231, "step": 12498 }, { "epoch": 0.2284716763851061, "grad_norm": 6.670629055001435, "learning_rate": 9.002268222646602e-06, "loss": 17.683, "step": 12499 }, { "epoch": 0.22848995558155263, "grad_norm": 7.111535221045845, "learning_rate": 9.002090787896616e-06, "loss": 17.6803, "step": 12500 }, { "epoch": 0.22850823477799917, "grad_norm": 6.907882759243566, "learning_rate": 9.001913339119647e-06, "loss": 17.8974, "step": 12501 }, { "epoch": 0.22852651397444568, "grad_norm": 6.7099161801259966, "learning_rate": 9.001735876316323e-06, "loss": 17.4129, "step": 12502 }, { "epoch": 0.2285447931708922, "grad_norm": 6.24639562168642, "learning_rate": 9.001558399487257e-06, "loss": 17.2618, "step": 12503 }, { "epoch": 0.22856307236733872, "grad_norm": 6.191726897974517, "learning_rate": 9.00138090863308e-06, "loss": 17.3679, "step": 12504 }, { "epoch": 0.22858135156378526, "grad_norm": 6.494394177223834, "learning_rate": 9.00120340375441e-06, "loss": 17.8027, "step": 12505 }, { "epoch": 0.2285996307602318, "grad_norm": 6.973080184641169, "learning_rate": 9.001025884851868e-06, "loss": 17.6395, "step": 12506 }, { "epoch": 0.2286179099566783, "grad_norm": 6.050184229241976, "learning_rate": 9.000848351926077e-06, "loss": 17.1681, "step": 12507 }, { "epoch": 0.22863618915312484, "grad_norm": 6.583322560980222, "learning_rate": 9.000670804977661e-06, "loss": 17.5065, "step": 12508 }, { "epoch": 0.22865446834957134, "grad_norm": 8.761571713223706, "learning_rate": 9.00049324400724e-06, "loss": 18.2826, "step": 12509 }, { "epoch": 0.22867274754601788, "grad_norm": 10.445797901169518, "learning_rate": 9.000315669015438e-06, "loss": 18.0063, "step": 12510 }, { "epoch": 0.22869102674246441, "grad_norm": 7.187926896437648, "learning_rate": 9.000138080002876e-06, "loss": 17.5907, "step": 12511 }, { "epoch": 0.22870930593891092, "grad_norm": 8.741326794947055, "learning_rate": 8.999960476970178e-06, "loss": 18.0142, "step": 12512 }, { "epoch": 0.22872758513535746, "grad_norm": 5.776068892469077, "learning_rate": 8.999782859917966e-06, "loss": 17.1766, "step": 12513 }, { "epoch": 0.22874586433180397, "grad_norm": 6.171879540219162, "learning_rate": 8.99960522884686e-06, "loss": 17.3487, "step": 12514 }, { "epoch": 0.2287641435282505, "grad_norm": 6.41565242675409, "learning_rate": 8.999427583757487e-06, "loss": 17.6119, "step": 12515 }, { "epoch": 0.228782422724697, "grad_norm": 6.044914902041561, "learning_rate": 8.999249924650467e-06, "loss": 17.156, "step": 12516 }, { "epoch": 0.22880070192114355, "grad_norm": 6.738196133000485, "learning_rate": 8.999072251526422e-06, "loss": 17.8699, "step": 12517 }, { "epoch": 0.22881898111759008, "grad_norm": 6.419311646700771, "learning_rate": 8.998894564385976e-06, "loss": 17.4026, "step": 12518 }, { "epoch": 0.2288372603140366, "grad_norm": 6.392433973015512, "learning_rate": 8.998716863229753e-06, "loss": 17.6232, "step": 12519 }, { "epoch": 0.22885553951048312, "grad_norm": 5.8958085404852865, "learning_rate": 8.998539148058371e-06, "loss": 17.3042, "step": 12520 }, { "epoch": 0.22887381870692963, "grad_norm": 6.74062692196872, "learning_rate": 8.99836141887246e-06, "loss": 17.7199, "step": 12521 }, { "epoch": 0.22889209790337617, "grad_norm": 6.636453772326242, "learning_rate": 8.998183675672639e-06, "loss": 17.727, "step": 12522 }, { "epoch": 0.2289103770998227, "grad_norm": 6.151624342410829, "learning_rate": 8.998005918459529e-06, "loss": 17.3302, "step": 12523 }, { "epoch": 0.2289286562962692, "grad_norm": 7.524741787304753, "learning_rate": 8.997828147233756e-06, "loss": 18.1206, "step": 12524 }, { "epoch": 0.22894693549271575, "grad_norm": 7.0537688901822415, "learning_rate": 8.997650361995942e-06, "loss": 17.8464, "step": 12525 }, { "epoch": 0.22896521468916226, "grad_norm": 6.47024756749618, "learning_rate": 8.997472562746711e-06, "loss": 17.3939, "step": 12526 }, { "epoch": 0.2289834938856088, "grad_norm": 7.684796251635377, "learning_rate": 8.997294749486685e-06, "loss": 18.0094, "step": 12527 }, { "epoch": 0.22900177308205533, "grad_norm": 7.475698948068777, "learning_rate": 8.997116922216487e-06, "loss": 17.8596, "step": 12528 }, { "epoch": 0.22902005227850183, "grad_norm": 7.102665538397156, "learning_rate": 8.996939080936743e-06, "loss": 17.4937, "step": 12529 }, { "epoch": 0.22903833147494837, "grad_norm": 6.416082285582877, "learning_rate": 8.996761225648074e-06, "loss": 17.3821, "step": 12530 }, { "epoch": 0.22905661067139488, "grad_norm": 6.281991184229925, "learning_rate": 8.996583356351103e-06, "loss": 17.4101, "step": 12531 }, { "epoch": 0.2290748898678414, "grad_norm": 5.685979659910896, "learning_rate": 8.996405473046455e-06, "loss": 17.2703, "step": 12532 }, { "epoch": 0.22909316906428792, "grad_norm": 7.809788721353094, "learning_rate": 8.996227575734751e-06, "loss": 17.7103, "step": 12533 }, { "epoch": 0.22911144826073446, "grad_norm": 5.807257328034285, "learning_rate": 8.996049664416617e-06, "loss": 17.0531, "step": 12534 }, { "epoch": 0.229129727457181, "grad_norm": 6.141165254363331, "learning_rate": 8.995871739092676e-06, "loss": 17.3797, "step": 12535 }, { "epoch": 0.2291480066536275, "grad_norm": 6.562547748460574, "learning_rate": 8.99569379976355e-06, "loss": 17.626, "step": 12536 }, { "epoch": 0.22916628585007404, "grad_norm": 6.177743602828855, "learning_rate": 8.995515846429865e-06, "loss": 17.4222, "step": 12537 }, { "epoch": 0.22918456504652054, "grad_norm": 6.947711500882194, "learning_rate": 8.995337879092244e-06, "loss": 17.5877, "step": 12538 }, { "epoch": 0.22920284424296708, "grad_norm": 6.481432353025655, "learning_rate": 8.995159897751311e-06, "loss": 17.5561, "step": 12539 }, { "epoch": 0.22922112343941362, "grad_norm": 6.853642406435706, "learning_rate": 8.994981902407688e-06, "loss": 17.5894, "step": 12540 }, { "epoch": 0.22923940263586012, "grad_norm": 6.96098333316694, "learning_rate": 8.994803893062e-06, "loss": 17.7678, "step": 12541 }, { "epoch": 0.22925768183230666, "grad_norm": 7.911748352299507, "learning_rate": 8.994625869714872e-06, "loss": 17.9697, "step": 12542 }, { "epoch": 0.22927596102875317, "grad_norm": 8.921371968463037, "learning_rate": 8.994447832366926e-06, "loss": 18.1782, "step": 12543 }, { "epoch": 0.2292942402251997, "grad_norm": 7.279355339017593, "learning_rate": 8.994269781018787e-06, "loss": 17.3983, "step": 12544 }, { "epoch": 0.22931251942164624, "grad_norm": 7.475994898745621, "learning_rate": 8.994091715671081e-06, "loss": 18.0412, "step": 12545 }, { "epoch": 0.22933079861809275, "grad_norm": 8.479527737811965, "learning_rate": 8.993913636324427e-06, "loss": 18.1023, "step": 12546 }, { "epoch": 0.22934907781453928, "grad_norm": 5.878535855809932, "learning_rate": 8.993735542979453e-06, "loss": 17.0986, "step": 12547 }, { "epoch": 0.2293673570109858, "grad_norm": 5.2999987002435, "learning_rate": 8.993557435636784e-06, "loss": 16.8318, "step": 12548 }, { "epoch": 0.22938563620743233, "grad_norm": 8.142359630907281, "learning_rate": 8.993379314297042e-06, "loss": 18.0559, "step": 12549 }, { "epoch": 0.22940391540387883, "grad_norm": 5.272730622394096, "learning_rate": 8.993201178960853e-06, "loss": 17.0443, "step": 12550 }, { "epoch": 0.22942219460032537, "grad_norm": 5.915003041650118, "learning_rate": 8.99302302962884e-06, "loss": 17.2106, "step": 12551 }, { "epoch": 0.2294404737967719, "grad_norm": 5.603933753552654, "learning_rate": 8.992844866301627e-06, "loss": 17.2869, "step": 12552 }, { "epoch": 0.2294587529932184, "grad_norm": 6.60340195412723, "learning_rate": 8.992666688979838e-06, "loss": 17.4153, "step": 12553 }, { "epoch": 0.22947703218966495, "grad_norm": 7.213417414091133, "learning_rate": 8.992488497664101e-06, "loss": 17.777, "step": 12554 }, { "epoch": 0.22949531138611146, "grad_norm": 7.264297281644561, "learning_rate": 8.992310292355037e-06, "loss": 17.8815, "step": 12555 }, { "epoch": 0.229513590582558, "grad_norm": 7.399917509915868, "learning_rate": 8.992132073053272e-06, "loss": 18.0332, "step": 12556 }, { "epoch": 0.22953186977900453, "grad_norm": 6.5106449740748715, "learning_rate": 8.991953839759432e-06, "loss": 17.5683, "step": 12557 }, { "epoch": 0.22955014897545103, "grad_norm": 6.162216353065298, "learning_rate": 8.99177559247414e-06, "loss": 17.4597, "step": 12558 }, { "epoch": 0.22956842817189757, "grad_norm": 6.409297331701278, "learning_rate": 8.991597331198018e-06, "loss": 17.5378, "step": 12559 }, { "epoch": 0.22958670736834408, "grad_norm": 6.79721675397959, "learning_rate": 8.991419055931697e-06, "loss": 17.4977, "step": 12560 }, { "epoch": 0.22960498656479061, "grad_norm": 5.680684048615769, "learning_rate": 8.991240766675798e-06, "loss": 17.1447, "step": 12561 }, { "epoch": 0.22962326576123715, "grad_norm": 7.109729819431134, "learning_rate": 8.991062463430943e-06, "loss": 17.5235, "step": 12562 }, { "epoch": 0.22964154495768366, "grad_norm": 6.663936086487505, "learning_rate": 8.990884146197765e-06, "loss": 17.5464, "step": 12563 }, { "epoch": 0.2296598241541302, "grad_norm": 7.0581609461212835, "learning_rate": 8.990705814976883e-06, "loss": 17.9784, "step": 12564 }, { "epoch": 0.2296781033505767, "grad_norm": 6.513759642232706, "learning_rate": 8.990527469768921e-06, "loss": 17.4535, "step": 12565 }, { "epoch": 0.22969638254702324, "grad_norm": 7.505513390026089, "learning_rate": 8.99034911057451e-06, "loss": 17.7397, "step": 12566 }, { "epoch": 0.22971466174346974, "grad_norm": 6.137401951152139, "learning_rate": 8.99017073739427e-06, "loss": 17.4381, "step": 12567 }, { "epoch": 0.22973294093991628, "grad_norm": 6.098398579223316, "learning_rate": 8.989992350228827e-06, "loss": 17.3569, "step": 12568 }, { "epoch": 0.22975122013636282, "grad_norm": 5.639631094059768, "learning_rate": 8.989813949078808e-06, "loss": 17.3075, "step": 12569 }, { "epoch": 0.22976949933280932, "grad_norm": 6.526841461697767, "learning_rate": 8.989635533944837e-06, "loss": 17.5349, "step": 12570 }, { "epoch": 0.22978777852925586, "grad_norm": 7.68988900944073, "learning_rate": 8.98945710482754e-06, "loss": 18.0816, "step": 12571 }, { "epoch": 0.22980605772570237, "grad_norm": 5.977019301105379, "learning_rate": 8.989278661727541e-06, "loss": 17.3921, "step": 12572 }, { "epoch": 0.2298243369221489, "grad_norm": 6.478667226965484, "learning_rate": 8.989100204645469e-06, "loss": 17.3424, "step": 12573 }, { "epoch": 0.22984261611859544, "grad_norm": 7.289630488656758, "learning_rate": 8.988921733581944e-06, "loss": 17.7558, "step": 12574 }, { "epoch": 0.22986089531504195, "grad_norm": 7.0332537951974805, "learning_rate": 8.988743248537597e-06, "loss": 17.9819, "step": 12575 }, { "epoch": 0.22987917451148848, "grad_norm": 7.884691853844178, "learning_rate": 8.988564749513048e-06, "loss": 17.991, "step": 12576 }, { "epoch": 0.229897453707935, "grad_norm": 6.5376790791049375, "learning_rate": 8.988386236508928e-06, "loss": 17.553, "step": 12577 }, { "epoch": 0.22991573290438153, "grad_norm": 7.066783121330573, "learning_rate": 8.98820770952586e-06, "loss": 17.7031, "step": 12578 }, { "epoch": 0.22993401210082806, "grad_norm": 7.013659824042925, "learning_rate": 8.988029168564471e-06, "loss": 17.97, "step": 12579 }, { "epoch": 0.22995229129727457, "grad_norm": 6.74694556488098, "learning_rate": 8.987850613625384e-06, "loss": 17.5762, "step": 12580 }, { "epoch": 0.2299705704937211, "grad_norm": 6.3388721790425455, "learning_rate": 8.987672044709228e-06, "loss": 17.5761, "step": 12581 }, { "epoch": 0.2299888496901676, "grad_norm": 7.183547300713726, "learning_rate": 8.987493461816626e-06, "loss": 17.6874, "step": 12582 }, { "epoch": 0.23000712888661415, "grad_norm": 6.9699117910774175, "learning_rate": 8.987314864948207e-06, "loss": 17.8068, "step": 12583 }, { "epoch": 0.23002540808306066, "grad_norm": 5.608276462681504, "learning_rate": 8.987136254104594e-06, "loss": 17.2474, "step": 12584 }, { "epoch": 0.2300436872795072, "grad_norm": 6.677230612371447, "learning_rate": 8.986957629286416e-06, "loss": 17.5887, "step": 12585 }, { "epoch": 0.23006196647595373, "grad_norm": 6.837957475119126, "learning_rate": 8.986778990494296e-06, "loss": 17.5032, "step": 12586 }, { "epoch": 0.23008024567240024, "grad_norm": 7.362353618173081, "learning_rate": 8.986600337728863e-06, "loss": 17.7967, "step": 12587 }, { "epoch": 0.23009852486884677, "grad_norm": 7.822005971236524, "learning_rate": 8.98642167099074e-06, "loss": 17.9193, "step": 12588 }, { "epoch": 0.23011680406529328, "grad_norm": 7.1110178575429694, "learning_rate": 8.986242990280556e-06, "loss": 17.1459, "step": 12589 }, { "epoch": 0.23013508326173981, "grad_norm": 7.040891386515305, "learning_rate": 8.986064295598937e-06, "loss": 17.6653, "step": 12590 }, { "epoch": 0.23015336245818635, "grad_norm": 5.6072354844521906, "learning_rate": 8.985885586946507e-06, "loss": 16.9168, "step": 12591 }, { "epoch": 0.23017164165463286, "grad_norm": 6.016958462413485, "learning_rate": 8.985706864323896e-06, "loss": 17.1112, "step": 12592 }, { "epoch": 0.2301899208510794, "grad_norm": 7.227371297609713, "learning_rate": 8.985528127731727e-06, "loss": 17.474, "step": 12593 }, { "epoch": 0.2302082000475259, "grad_norm": 6.279187554419717, "learning_rate": 8.985349377170626e-06, "loss": 17.4538, "step": 12594 }, { "epoch": 0.23022647924397244, "grad_norm": 6.718872122911122, "learning_rate": 8.985170612641222e-06, "loss": 17.6669, "step": 12595 }, { "epoch": 0.23024475844041897, "grad_norm": 6.508252219883713, "learning_rate": 8.984991834144143e-06, "loss": 17.583, "step": 12596 }, { "epoch": 0.23026303763686548, "grad_norm": 5.965840370153033, "learning_rate": 8.984813041680013e-06, "loss": 17.1702, "step": 12597 }, { "epoch": 0.23028131683331202, "grad_norm": 6.235020004113385, "learning_rate": 8.984634235249457e-06, "loss": 17.2796, "step": 12598 }, { "epoch": 0.23029959602975852, "grad_norm": 7.665610962559877, "learning_rate": 8.984455414853106e-06, "loss": 18.0396, "step": 12599 }, { "epoch": 0.23031787522620506, "grad_norm": 6.81995482604741, "learning_rate": 8.984276580491585e-06, "loss": 17.6362, "step": 12600 }, { "epoch": 0.23033615442265157, "grad_norm": 7.102639361588913, "learning_rate": 8.984097732165518e-06, "loss": 17.8958, "step": 12601 }, { "epoch": 0.2303544336190981, "grad_norm": 5.969602264257139, "learning_rate": 8.983918869875535e-06, "loss": 17.3347, "step": 12602 }, { "epoch": 0.23037271281554464, "grad_norm": 7.001558828077685, "learning_rate": 8.983739993622262e-06, "loss": 17.5424, "step": 12603 }, { "epoch": 0.23039099201199115, "grad_norm": 6.195951514051619, "learning_rate": 8.983561103406326e-06, "loss": 17.281, "step": 12604 }, { "epoch": 0.23040927120843768, "grad_norm": 6.406386080044461, "learning_rate": 8.983382199228355e-06, "loss": 17.5129, "step": 12605 }, { "epoch": 0.2304275504048842, "grad_norm": 6.80794578612935, "learning_rate": 8.983203281088972e-06, "loss": 17.8294, "step": 12606 }, { "epoch": 0.23044582960133073, "grad_norm": 7.146491191474552, "learning_rate": 8.983024348988812e-06, "loss": 17.8156, "step": 12607 }, { "epoch": 0.23046410879777726, "grad_norm": 6.658368575968671, "learning_rate": 8.982845402928492e-06, "loss": 17.831, "step": 12608 }, { "epoch": 0.23048238799422377, "grad_norm": 7.188384913391123, "learning_rate": 8.982666442908647e-06, "loss": 17.928, "step": 12609 }, { "epoch": 0.2305006671906703, "grad_norm": 7.181366336026206, "learning_rate": 8.982487468929903e-06, "loss": 17.5838, "step": 12610 }, { "epoch": 0.2305189463871168, "grad_norm": 6.113867339288262, "learning_rate": 8.982308480992886e-06, "loss": 17.1665, "step": 12611 }, { "epoch": 0.23053722558356335, "grad_norm": 6.6525692485777075, "learning_rate": 8.982129479098221e-06, "loss": 17.2119, "step": 12612 }, { "epoch": 0.23055550478000988, "grad_norm": 6.1259140723075545, "learning_rate": 8.981950463246538e-06, "loss": 17.3334, "step": 12613 }, { "epoch": 0.2305737839764564, "grad_norm": 6.034860566559422, "learning_rate": 8.981771433438467e-06, "loss": 17.1934, "step": 12614 }, { "epoch": 0.23059206317290293, "grad_norm": 6.124692958782826, "learning_rate": 8.98159238967463e-06, "loss": 17.2687, "step": 12615 }, { "epoch": 0.23061034236934944, "grad_norm": 6.116606685379102, "learning_rate": 8.981413331955657e-06, "loss": 17.4827, "step": 12616 }, { "epoch": 0.23062862156579597, "grad_norm": 6.836158558654621, "learning_rate": 8.981234260282177e-06, "loss": 17.7202, "step": 12617 }, { "epoch": 0.23064690076224248, "grad_norm": 6.262228607960215, "learning_rate": 8.981055174654815e-06, "loss": 17.3742, "step": 12618 }, { "epoch": 0.23066517995868902, "grad_norm": 6.4218464165121825, "learning_rate": 8.980876075074202e-06, "loss": 17.4499, "step": 12619 }, { "epoch": 0.23068345915513555, "grad_norm": 6.699852881167036, "learning_rate": 8.980696961540964e-06, "loss": 17.7633, "step": 12620 }, { "epoch": 0.23070173835158206, "grad_norm": 6.044320608620665, "learning_rate": 8.980517834055728e-06, "loss": 17.2964, "step": 12621 }, { "epoch": 0.2307200175480286, "grad_norm": 8.171391327262505, "learning_rate": 8.980338692619122e-06, "loss": 17.8528, "step": 12622 }, { "epoch": 0.2307382967444751, "grad_norm": 6.700840355458898, "learning_rate": 8.980159537231774e-06, "loss": 17.5962, "step": 12623 }, { "epoch": 0.23075657594092164, "grad_norm": 7.140536927571538, "learning_rate": 8.979980367894313e-06, "loss": 17.5118, "step": 12624 }, { "epoch": 0.23077485513736817, "grad_norm": 6.444970035759333, "learning_rate": 8.979801184607364e-06, "loss": 17.3046, "step": 12625 }, { "epoch": 0.23079313433381468, "grad_norm": 5.836426830078904, "learning_rate": 8.97962198737156e-06, "loss": 17.4554, "step": 12626 }, { "epoch": 0.23081141353026122, "grad_norm": 6.501548222905501, "learning_rate": 8.979442776187524e-06, "loss": 17.4258, "step": 12627 }, { "epoch": 0.23082969272670772, "grad_norm": 8.510282767251702, "learning_rate": 8.979263551055887e-06, "loss": 18.5991, "step": 12628 }, { "epoch": 0.23084797192315426, "grad_norm": 6.544339383939356, "learning_rate": 8.979084311977277e-06, "loss": 17.2998, "step": 12629 }, { "epoch": 0.2308662511196008, "grad_norm": 4.810441782297864, "learning_rate": 8.978905058952323e-06, "loss": 16.7762, "step": 12630 }, { "epoch": 0.2308845303160473, "grad_norm": 6.753057630563337, "learning_rate": 8.978725791981651e-06, "loss": 17.7448, "step": 12631 }, { "epoch": 0.23090280951249384, "grad_norm": 6.878295922185571, "learning_rate": 8.978546511065889e-06, "loss": 17.8306, "step": 12632 }, { "epoch": 0.23092108870894035, "grad_norm": 6.601055261394896, "learning_rate": 8.978367216205668e-06, "loss": 17.6226, "step": 12633 }, { "epoch": 0.23093936790538688, "grad_norm": 6.96294244153231, "learning_rate": 8.978187907401615e-06, "loss": 17.5169, "step": 12634 }, { "epoch": 0.2309576471018334, "grad_norm": 7.629841294699073, "learning_rate": 8.97800858465436e-06, "loss": 17.7823, "step": 12635 }, { "epoch": 0.23097592629827993, "grad_norm": 6.490291733731224, "learning_rate": 8.977829247964526e-06, "loss": 17.6466, "step": 12636 }, { "epoch": 0.23099420549472646, "grad_norm": 5.933055992843987, "learning_rate": 8.97764989733275e-06, "loss": 17.1796, "step": 12637 }, { "epoch": 0.23101248469117297, "grad_norm": 6.914293204747319, "learning_rate": 8.977470532759654e-06, "loss": 17.6409, "step": 12638 }, { "epoch": 0.2310307638876195, "grad_norm": 7.44226165891364, "learning_rate": 8.97729115424587e-06, "loss": 18.0814, "step": 12639 }, { "epoch": 0.231049043084066, "grad_norm": 5.928863316211935, "learning_rate": 8.977111761792026e-06, "loss": 17.2507, "step": 12640 }, { "epoch": 0.23106732228051255, "grad_norm": 6.736219046219845, "learning_rate": 8.97693235539875e-06, "loss": 17.6348, "step": 12641 }, { "epoch": 0.23108560147695908, "grad_norm": 6.705627412403998, "learning_rate": 8.976752935066671e-06, "loss": 17.7984, "step": 12642 }, { "epoch": 0.2311038806734056, "grad_norm": 6.887482315364359, "learning_rate": 8.976573500796417e-06, "loss": 17.8859, "step": 12643 }, { "epoch": 0.23112215986985213, "grad_norm": 6.418883513819873, "learning_rate": 8.97639405258862e-06, "loss": 17.6612, "step": 12644 }, { "epoch": 0.23114043906629864, "grad_norm": 6.931046768284051, "learning_rate": 8.976214590443905e-06, "loss": 17.7548, "step": 12645 }, { "epoch": 0.23115871826274517, "grad_norm": 6.651831068192549, "learning_rate": 8.976035114362903e-06, "loss": 17.7041, "step": 12646 }, { "epoch": 0.2311769974591917, "grad_norm": 7.906315986151466, "learning_rate": 8.975855624346244e-06, "loss": 17.8006, "step": 12647 }, { "epoch": 0.23119527665563822, "grad_norm": 7.181775795765101, "learning_rate": 8.975676120394555e-06, "loss": 17.8929, "step": 12648 }, { "epoch": 0.23121355585208475, "grad_norm": 6.2908296422667185, "learning_rate": 8.975496602508467e-06, "loss": 17.3297, "step": 12649 }, { "epoch": 0.23123183504853126, "grad_norm": 7.118094861537474, "learning_rate": 8.975317070688608e-06, "loss": 17.6819, "step": 12650 }, { "epoch": 0.2312501142449778, "grad_norm": 7.577308961162696, "learning_rate": 8.975137524935609e-06, "loss": 17.8884, "step": 12651 }, { "epoch": 0.2312683934414243, "grad_norm": 6.652621699368609, "learning_rate": 8.974957965250097e-06, "loss": 17.7604, "step": 12652 }, { "epoch": 0.23128667263787084, "grad_norm": 6.353048449739577, "learning_rate": 8.9747783916327e-06, "loss": 17.4359, "step": 12653 }, { "epoch": 0.23130495183431737, "grad_norm": 6.765843695500798, "learning_rate": 8.974598804084052e-06, "loss": 17.5919, "step": 12654 }, { "epoch": 0.23132323103076388, "grad_norm": 8.23210520293349, "learning_rate": 8.97441920260478e-06, "loss": 18.1861, "step": 12655 }, { "epoch": 0.23134151022721042, "grad_norm": 5.642530187166705, "learning_rate": 8.974239587195514e-06, "loss": 17.0812, "step": 12656 }, { "epoch": 0.23135978942365693, "grad_norm": 5.990051907955578, "learning_rate": 8.974059957856882e-06, "loss": 17.1461, "step": 12657 }, { "epoch": 0.23137806862010346, "grad_norm": 7.530881392920648, "learning_rate": 8.973880314589516e-06, "loss": 17.6615, "step": 12658 }, { "epoch": 0.23139634781655, "grad_norm": 6.605208950411215, "learning_rate": 8.973700657394043e-06, "loss": 17.5248, "step": 12659 }, { "epoch": 0.2314146270129965, "grad_norm": 7.059490635933132, "learning_rate": 8.973520986271094e-06, "loss": 17.6861, "step": 12660 }, { "epoch": 0.23143290620944304, "grad_norm": 6.815923729239079, "learning_rate": 8.9733413012213e-06, "loss": 17.4846, "step": 12661 }, { "epoch": 0.23145118540588955, "grad_norm": 6.614975434214911, "learning_rate": 8.973161602245288e-06, "loss": 17.7974, "step": 12662 }, { "epoch": 0.23146946460233608, "grad_norm": 6.350078515902742, "learning_rate": 8.97298188934369e-06, "loss": 17.5161, "step": 12663 }, { "epoch": 0.23148774379878262, "grad_norm": 6.253468614428047, "learning_rate": 8.972802162517136e-06, "loss": 17.8805, "step": 12664 }, { "epoch": 0.23150602299522913, "grad_norm": 6.4332769519713775, "learning_rate": 8.972622421766254e-06, "loss": 17.5103, "step": 12665 }, { "epoch": 0.23152430219167566, "grad_norm": 7.082491712673213, "learning_rate": 8.972442667091676e-06, "loss": 17.6118, "step": 12666 }, { "epoch": 0.23154258138812217, "grad_norm": 5.952803943455232, "learning_rate": 8.97226289849403e-06, "loss": 17.4437, "step": 12667 }, { "epoch": 0.2315608605845687, "grad_norm": 5.789099746715334, "learning_rate": 8.972083115973949e-06, "loss": 17.1918, "step": 12668 }, { "epoch": 0.23157913978101521, "grad_norm": 7.577675757610278, "learning_rate": 8.97190331953206e-06, "loss": 17.6084, "step": 12669 }, { "epoch": 0.23159741897746175, "grad_norm": 5.860040349956942, "learning_rate": 8.971723509168996e-06, "loss": 17.5445, "step": 12670 }, { "epoch": 0.23161569817390829, "grad_norm": 6.469267679013469, "learning_rate": 8.971543684885384e-06, "loss": 17.7468, "step": 12671 }, { "epoch": 0.2316339773703548, "grad_norm": 5.457767309634818, "learning_rate": 8.971363846681858e-06, "loss": 17.0538, "step": 12672 }, { "epoch": 0.23165225656680133, "grad_norm": 7.300162018109881, "learning_rate": 8.971183994559046e-06, "loss": 17.9823, "step": 12673 }, { "epoch": 0.23167053576324784, "grad_norm": 6.09247248393594, "learning_rate": 8.971004128517577e-06, "loss": 17.6069, "step": 12674 }, { "epoch": 0.23168881495969437, "grad_norm": 7.876931662668772, "learning_rate": 8.970824248558083e-06, "loss": 17.938, "step": 12675 }, { "epoch": 0.2317070941561409, "grad_norm": 5.714717702916206, "learning_rate": 8.970644354681196e-06, "loss": 17.1884, "step": 12676 }, { "epoch": 0.23172537335258742, "grad_norm": 6.139455239961285, "learning_rate": 8.970464446887544e-06, "loss": 17.1756, "step": 12677 }, { "epoch": 0.23174365254903395, "grad_norm": 6.4457883195426415, "learning_rate": 8.97028452517776e-06, "loss": 17.4024, "step": 12678 }, { "epoch": 0.23176193174548046, "grad_norm": 10.106626906983957, "learning_rate": 8.970104589552472e-06, "loss": 19.3147, "step": 12679 }, { "epoch": 0.231780210941927, "grad_norm": 7.817700354434268, "learning_rate": 8.969924640012312e-06, "loss": 18.1592, "step": 12680 }, { "epoch": 0.23179849013837353, "grad_norm": 7.803055270135185, "learning_rate": 8.969744676557912e-06, "loss": 18.1815, "step": 12681 }, { "epoch": 0.23181676933482004, "grad_norm": 8.738677894192076, "learning_rate": 8.9695646991899e-06, "loss": 18.6096, "step": 12682 }, { "epoch": 0.23183504853126657, "grad_norm": 8.074642631159707, "learning_rate": 8.96938470790891e-06, "loss": 17.9484, "step": 12683 }, { "epoch": 0.23185332772771308, "grad_norm": 6.445153140159986, "learning_rate": 8.969204702715568e-06, "loss": 17.2538, "step": 12684 }, { "epoch": 0.23187160692415962, "grad_norm": 7.28801149522414, "learning_rate": 8.96902468361051e-06, "loss": 18.0793, "step": 12685 }, { "epoch": 0.23188988612060613, "grad_norm": 6.45430193524104, "learning_rate": 8.968844650594363e-06, "loss": 17.4716, "step": 12686 }, { "epoch": 0.23190816531705266, "grad_norm": 7.189783644839498, "learning_rate": 8.968664603667763e-06, "loss": 17.4625, "step": 12687 }, { "epoch": 0.2319264445134992, "grad_norm": 6.673588427535199, "learning_rate": 8.968484542831337e-06, "loss": 17.4997, "step": 12688 }, { "epoch": 0.2319447237099457, "grad_norm": 7.319510417789561, "learning_rate": 8.968304468085715e-06, "loss": 18.1252, "step": 12689 }, { "epoch": 0.23196300290639224, "grad_norm": 6.755870910300304, "learning_rate": 8.968124379431533e-06, "loss": 17.6274, "step": 12690 }, { "epoch": 0.23198128210283875, "grad_norm": 6.9792511913835655, "learning_rate": 8.967944276869419e-06, "loss": 17.5144, "step": 12691 }, { "epoch": 0.23199956129928528, "grad_norm": 6.2217243828616455, "learning_rate": 8.967764160400002e-06, "loss": 17.3082, "step": 12692 }, { "epoch": 0.23201784049573182, "grad_norm": 6.089639947574932, "learning_rate": 8.967584030023916e-06, "loss": 17.3434, "step": 12693 }, { "epoch": 0.23203611969217833, "grad_norm": 6.430558611785808, "learning_rate": 8.967403885741795e-06, "loss": 17.5896, "step": 12694 }, { "epoch": 0.23205439888862486, "grad_norm": 5.206884105273005, "learning_rate": 8.967223727554267e-06, "loss": 16.9609, "step": 12695 }, { "epoch": 0.23207267808507137, "grad_norm": 6.467597193668249, "learning_rate": 8.967043555461964e-06, "loss": 17.4015, "step": 12696 }, { "epoch": 0.2320909572815179, "grad_norm": 6.7907705601061235, "learning_rate": 8.966863369465517e-06, "loss": 17.4738, "step": 12697 }, { "epoch": 0.23210923647796444, "grad_norm": 7.911519043403179, "learning_rate": 8.966683169565557e-06, "loss": 18.1934, "step": 12698 }, { "epoch": 0.23212751567441095, "grad_norm": 6.4982645684128615, "learning_rate": 8.96650295576272e-06, "loss": 17.3533, "step": 12699 }, { "epoch": 0.23214579487085749, "grad_norm": 5.802643014414197, "learning_rate": 8.966322728057632e-06, "loss": 17.2984, "step": 12700 }, { "epoch": 0.232164074067304, "grad_norm": 6.9079188967253655, "learning_rate": 8.966142486450925e-06, "loss": 17.6317, "step": 12701 }, { "epoch": 0.23218235326375053, "grad_norm": 6.650105180734968, "learning_rate": 8.965962230943236e-06, "loss": 17.5343, "step": 12702 }, { "epoch": 0.23220063246019704, "grad_norm": 6.969674418237345, "learning_rate": 8.965781961535194e-06, "loss": 17.6164, "step": 12703 }, { "epoch": 0.23221891165664357, "grad_norm": 6.3892235768617915, "learning_rate": 8.96560167822743e-06, "loss": 17.4689, "step": 12704 }, { "epoch": 0.2322371908530901, "grad_norm": 6.387298192158086, "learning_rate": 8.965421381020573e-06, "loss": 17.2971, "step": 12705 }, { "epoch": 0.23225547004953662, "grad_norm": 9.097806062465553, "learning_rate": 8.965241069915262e-06, "loss": 18.1642, "step": 12706 }, { "epoch": 0.23227374924598315, "grad_norm": 7.635211232771337, "learning_rate": 8.965060744912123e-06, "loss": 18.0983, "step": 12707 }, { "epoch": 0.23229202844242966, "grad_norm": 6.649749231872817, "learning_rate": 8.96488040601179e-06, "loss": 17.6738, "step": 12708 }, { "epoch": 0.2323103076388762, "grad_norm": 8.254706481009299, "learning_rate": 8.964700053214896e-06, "loss": 17.9174, "step": 12709 }, { "epoch": 0.23232858683532273, "grad_norm": 7.083713670044754, "learning_rate": 8.964519686522073e-06, "loss": 17.7431, "step": 12710 }, { "epoch": 0.23234686603176924, "grad_norm": 6.654268352523479, "learning_rate": 8.964339305933952e-06, "loss": 17.5484, "step": 12711 }, { "epoch": 0.23236514522821577, "grad_norm": 7.396280795160867, "learning_rate": 8.964158911451165e-06, "loss": 17.6135, "step": 12712 }, { "epoch": 0.23238342442466228, "grad_norm": 6.058802860490418, "learning_rate": 8.963978503074345e-06, "loss": 17.4924, "step": 12713 }, { "epoch": 0.23240170362110882, "grad_norm": 6.604578914598495, "learning_rate": 8.963798080804126e-06, "loss": 17.426, "step": 12714 }, { "epoch": 0.23241998281755535, "grad_norm": 6.19249065287721, "learning_rate": 8.963617644641138e-06, "loss": 17.308, "step": 12715 }, { "epoch": 0.23243826201400186, "grad_norm": 6.900919621465627, "learning_rate": 8.963437194586013e-06, "loss": 17.8536, "step": 12716 }, { "epoch": 0.2324565412104484, "grad_norm": 6.7578084758593, "learning_rate": 8.963256730639384e-06, "loss": 17.2981, "step": 12717 }, { "epoch": 0.2324748204068949, "grad_norm": 6.030502383830794, "learning_rate": 8.963076252801886e-06, "loss": 17.32, "step": 12718 }, { "epoch": 0.23249309960334144, "grad_norm": 7.2092960685317555, "learning_rate": 8.96289576107415e-06, "loss": 17.8071, "step": 12719 }, { "epoch": 0.23251137879978795, "grad_norm": 6.50642651713053, "learning_rate": 8.962715255456806e-06, "loss": 17.6434, "step": 12720 }, { "epoch": 0.23252965799623448, "grad_norm": 8.128287658462336, "learning_rate": 8.96253473595049e-06, "loss": 17.6088, "step": 12721 }, { "epoch": 0.23254793719268102, "grad_norm": 6.46303576738472, "learning_rate": 8.962354202555834e-06, "loss": 17.3952, "step": 12722 }, { "epoch": 0.23256621638912753, "grad_norm": 8.03783768046001, "learning_rate": 8.96217365527347e-06, "loss": 18.1464, "step": 12723 }, { "epoch": 0.23258449558557406, "grad_norm": 6.817841189634383, "learning_rate": 8.961993094104031e-06, "loss": 17.5995, "step": 12724 }, { "epoch": 0.23260277478202057, "grad_norm": 6.97774497101537, "learning_rate": 8.96181251904815e-06, "loss": 17.9206, "step": 12725 }, { "epoch": 0.2326210539784671, "grad_norm": 7.485176546420922, "learning_rate": 8.96163193010646e-06, "loss": 18.1183, "step": 12726 }, { "epoch": 0.23263933317491364, "grad_norm": 7.727677566925891, "learning_rate": 8.961451327279595e-06, "loss": 17.796, "step": 12727 }, { "epoch": 0.23265761237136015, "grad_norm": 7.2993677626770115, "learning_rate": 8.961270710568185e-06, "loss": 17.8907, "step": 12728 }, { "epoch": 0.2326758915678067, "grad_norm": 7.207807208792943, "learning_rate": 8.961090079972865e-06, "loss": 17.7457, "step": 12729 }, { "epoch": 0.2326941707642532, "grad_norm": 7.123250316362162, "learning_rate": 8.960909435494269e-06, "loss": 17.9557, "step": 12730 }, { "epoch": 0.23271244996069973, "grad_norm": 5.795406937156422, "learning_rate": 8.96072877713303e-06, "loss": 17.187, "step": 12731 }, { "epoch": 0.23273072915714627, "grad_norm": 6.060058257382089, "learning_rate": 8.960548104889778e-06, "loss": 17.377, "step": 12732 }, { "epoch": 0.23274900835359277, "grad_norm": 9.463240980061615, "learning_rate": 8.960367418765152e-06, "loss": 18.9757, "step": 12733 }, { "epoch": 0.2327672875500393, "grad_norm": 7.1875089109188535, "learning_rate": 8.960186718759778e-06, "loss": 17.7208, "step": 12734 }, { "epoch": 0.23278556674648582, "grad_norm": 9.197511583933995, "learning_rate": 8.960006004874295e-06, "loss": 18.0441, "step": 12735 }, { "epoch": 0.23280384594293235, "grad_norm": 7.426393746853364, "learning_rate": 8.959825277109334e-06, "loss": 18.1336, "step": 12736 }, { "epoch": 0.23282212513937886, "grad_norm": 4.9142180878185915, "learning_rate": 8.95964453546553e-06, "loss": 16.8768, "step": 12737 }, { "epoch": 0.2328404043358254, "grad_norm": 7.249341077977238, "learning_rate": 8.959463779943516e-06, "loss": 17.856, "step": 12738 }, { "epoch": 0.23285868353227193, "grad_norm": 6.544874946263931, "learning_rate": 8.959283010543923e-06, "loss": 17.4914, "step": 12739 }, { "epoch": 0.23287696272871844, "grad_norm": 6.973432445210602, "learning_rate": 8.959102227267387e-06, "loss": 17.5418, "step": 12740 }, { "epoch": 0.23289524192516498, "grad_norm": 7.673467131029792, "learning_rate": 8.958921430114542e-06, "loss": 18.4472, "step": 12741 }, { "epoch": 0.23291352112161148, "grad_norm": 6.957441134901725, "learning_rate": 8.95874061908602e-06, "loss": 17.5476, "step": 12742 }, { "epoch": 0.23293180031805802, "grad_norm": 7.476236583511359, "learning_rate": 8.958559794182457e-06, "loss": 18.1306, "step": 12743 }, { "epoch": 0.23295007951450455, "grad_norm": 7.373016572378293, "learning_rate": 8.958378955404486e-06, "loss": 17.622, "step": 12744 }, { "epoch": 0.23296835871095106, "grad_norm": 7.485544217058992, "learning_rate": 8.95819810275274e-06, "loss": 17.9682, "step": 12745 }, { "epoch": 0.2329866379073976, "grad_norm": 6.912468244774014, "learning_rate": 8.958017236227851e-06, "loss": 17.6617, "step": 12746 }, { "epoch": 0.2330049171038441, "grad_norm": 6.7659822035034, "learning_rate": 8.957836355830456e-06, "loss": 17.5976, "step": 12747 }, { "epoch": 0.23302319630029064, "grad_norm": 6.703200527664176, "learning_rate": 8.957655461561188e-06, "loss": 17.5962, "step": 12748 }, { "epoch": 0.23304147549673718, "grad_norm": 6.790172140328692, "learning_rate": 8.957474553420681e-06, "loss": 17.5057, "step": 12749 }, { "epoch": 0.23305975469318368, "grad_norm": 8.478019310612417, "learning_rate": 8.957293631409571e-06, "loss": 18.1948, "step": 12750 }, { "epoch": 0.23307803388963022, "grad_norm": 6.657350113306031, "learning_rate": 8.95711269552849e-06, "loss": 17.4076, "step": 12751 }, { "epoch": 0.23309631308607673, "grad_norm": 6.443575958162277, "learning_rate": 8.95693174577807e-06, "loss": 17.593, "step": 12752 }, { "epoch": 0.23311459228252326, "grad_norm": 5.822279201249806, "learning_rate": 8.956750782158948e-06, "loss": 17.2569, "step": 12753 }, { "epoch": 0.23313287147896977, "grad_norm": 5.829004248004184, "learning_rate": 8.956569804671759e-06, "loss": 17.3094, "step": 12754 }, { "epoch": 0.2331511506754163, "grad_norm": 6.440022187282789, "learning_rate": 8.956388813317136e-06, "loss": 17.5126, "step": 12755 }, { "epoch": 0.23316942987186284, "grad_norm": 5.129374670689684, "learning_rate": 8.956207808095713e-06, "loss": 16.9751, "step": 12756 }, { "epoch": 0.23318770906830935, "grad_norm": 7.600104472662688, "learning_rate": 8.956026789008126e-06, "loss": 18.2364, "step": 12757 }, { "epoch": 0.2332059882647559, "grad_norm": 6.964699187721291, "learning_rate": 8.955845756055007e-06, "loss": 17.9402, "step": 12758 }, { "epoch": 0.2332242674612024, "grad_norm": 6.181002011419887, "learning_rate": 8.955664709236992e-06, "loss": 17.3155, "step": 12759 }, { "epoch": 0.23324254665764893, "grad_norm": 6.814603945740874, "learning_rate": 8.955483648554716e-06, "loss": 17.8268, "step": 12760 }, { "epoch": 0.23326082585409547, "grad_norm": 7.091746026402167, "learning_rate": 8.955302574008813e-06, "loss": 17.8012, "step": 12761 }, { "epoch": 0.23327910505054197, "grad_norm": 8.211456720586112, "learning_rate": 8.955121485599919e-06, "loss": 18.3147, "step": 12762 }, { "epoch": 0.2332973842469885, "grad_norm": 6.807629775205399, "learning_rate": 8.954940383328666e-06, "loss": 17.7392, "step": 12763 }, { "epoch": 0.23331566344343502, "grad_norm": 6.383849031235497, "learning_rate": 8.95475926719569e-06, "loss": 17.5924, "step": 12764 }, { "epoch": 0.23333394263988155, "grad_norm": 6.147107205787592, "learning_rate": 8.954578137201625e-06, "loss": 17.4159, "step": 12765 }, { "epoch": 0.2333522218363281, "grad_norm": 7.702299531053069, "learning_rate": 8.954396993347107e-06, "loss": 17.5864, "step": 12766 }, { "epoch": 0.2333705010327746, "grad_norm": 7.270374554348164, "learning_rate": 8.954215835632774e-06, "loss": 17.8174, "step": 12767 }, { "epoch": 0.23338878022922113, "grad_norm": 7.072935465236767, "learning_rate": 8.954034664059254e-06, "loss": 18.1406, "step": 12768 }, { "epoch": 0.23340705942566764, "grad_norm": 6.522551304604889, "learning_rate": 8.953853478627187e-06, "loss": 17.4885, "step": 12769 }, { "epoch": 0.23342533862211418, "grad_norm": 7.153524358540162, "learning_rate": 8.953672279337206e-06, "loss": 17.5572, "step": 12770 }, { "epoch": 0.23344361781856068, "grad_norm": 7.474197906048436, "learning_rate": 8.953491066189948e-06, "loss": 17.7423, "step": 12771 }, { "epoch": 0.23346189701500722, "grad_norm": 6.416496933823563, "learning_rate": 8.953309839186047e-06, "loss": 17.5954, "step": 12772 }, { "epoch": 0.23348017621145375, "grad_norm": 6.322843043443495, "learning_rate": 8.953128598326136e-06, "loss": 17.4895, "step": 12773 }, { "epoch": 0.23349845540790026, "grad_norm": 6.33223118161616, "learning_rate": 8.952947343610854e-06, "loss": 17.1197, "step": 12774 }, { "epoch": 0.2335167346043468, "grad_norm": 7.066635935635524, "learning_rate": 8.952766075040833e-06, "loss": 17.8293, "step": 12775 }, { "epoch": 0.2335350138007933, "grad_norm": 5.944662456169544, "learning_rate": 8.952584792616712e-06, "loss": 17.1289, "step": 12776 }, { "epoch": 0.23355329299723984, "grad_norm": 6.3679384570264554, "learning_rate": 8.952403496339124e-06, "loss": 17.5574, "step": 12777 }, { "epoch": 0.23357157219368638, "grad_norm": 6.424547901175277, "learning_rate": 8.952222186208703e-06, "loss": 17.4749, "step": 12778 }, { "epoch": 0.23358985139013289, "grad_norm": 7.124514106204784, "learning_rate": 8.952040862226089e-06, "loss": 17.9767, "step": 12779 }, { "epoch": 0.23360813058657942, "grad_norm": 5.489985558523886, "learning_rate": 8.951859524391912e-06, "loss": 17.1535, "step": 12780 }, { "epoch": 0.23362640978302593, "grad_norm": 6.904053710838957, "learning_rate": 8.95167817270681e-06, "loss": 17.8272, "step": 12781 }, { "epoch": 0.23364468897947246, "grad_norm": 7.261939271079995, "learning_rate": 8.95149680717142e-06, "loss": 17.7751, "step": 12782 }, { "epoch": 0.233662968175919, "grad_norm": 8.35605896603481, "learning_rate": 8.951315427786378e-06, "loss": 18.2255, "step": 12783 }, { "epoch": 0.2336812473723655, "grad_norm": 6.444609110134202, "learning_rate": 8.951134034552316e-06, "loss": 17.2849, "step": 12784 }, { "epoch": 0.23369952656881204, "grad_norm": 7.343231371580179, "learning_rate": 8.950952627469873e-06, "loss": 17.8932, "step": 12785 }, { "epoch": 0.23371780576525855, "grad_norm": 6.195981454310583, "learning_rate": 8.950771206539685e-06, "loss": 17.4204, "step": 12786 }, { "epoch": 0.2337360849617051, "grad_norm": 6.676724333091879, "learning_rate": 8.950589771762386e-06, "loss": 17.728, "step": 12787 }, { "epoch": 0.2337543641581516, "grad_norm": 7.8657389649096645, "learning_rate": 8.950408323138612e-06, "loss": 17.8626, "step": 12788 }, { "epoch": 0.23377264335459813, "grad_norm": 6.928884863418598, "learning_rate": 8.950226860669001e-06, "loss": 17.6, "step": 12789 }, { "epoch": 0.23379092255104467, "grad_norm": 5.847325564075314, "learning_rate": 8.950045384354185e-06, "loss": 17.2806, "step": 12790 }, { "epoch": 0.23380920174749117, "grad_norm": 7.902027079744283, "learning_rate": 8.949863894194806e-06, "loss": 18.4469, "step": 12791 }, { "epoch": 0.2338274809439377, "grad_norm": 6.5891202226334515, "learning_rate": 8.949682390191495e-06, "loss": 17.6076, "step": 12792 }, { "epoch": 0.23384576014038422, "grad_norm": 9.77565731334052, "learning_rate": 8.94950087234489e-06, "loss": 17.8782, "step": 12793 }, { "epoch": 0.23386403933683075, "grad_norm": 5.912218694025467, "learning_rate": 8.949319340655628e-06, "loss": 17.1442, "step": 12794 }, { "epoch": 0.2338823185332773, "grad_norm": 6.861787325914959, "learning_rate": 8.949137795124342e-06, "loss": 17.6416, "step": 12795 }, { "epoch": 0.2339005977297238, "grad_norm": 7.681415121848571, "learning_rate": 8.948956235751673e-06, "loss": 18.4279, "step": 12796 }, { "epoch": 0.23391887692617033, "grad_norm": 6.311152196060109, "learning_rate": 8.948774662538255e-06, "loss": 17.3943, "step": 12797 }, { "epoch": 0.23393715612261684, "grad_norm": 5.858300943469095, "learning_rate": 8.948593075484724e-06, "loss": 17.4344, "step": 12798 }, { "epoch": 0.23395543531906338, "grad_norm": 7.530169804622749, "learning_rate": 8.948411474591716e-06, "loss": 18.3767, "step": 12799 }, { "epoch": 0.2339737145155099, "grad_norm": 5.9681224194622615, "learning_rate": 8.94822985985987e-06, "loss": 17.1825, "step": 12800 }, { "epoch": 0.23399199371195642, "grad_norm": 8.049380654632524, "learning_rate": 8.948048231289822e-06, "loss": 18.1197, "step": 12801 }, { "epoch": 0.23401027290840296, "grad_norm": 5.58038187881301, "learning_rate": 8.947866588882204e-06, "loss": 16.7757, "step": 12802 }, { "epoch": 0.23402855210484946, "grad_norm": 5.958354439439862, "learning_rate": 8.947684932637658e-06, "loss": 17.3348, "step": 12803 }, { "epoch": 0.234046831301296, "grad_norm": 6.689782861703746, "learning_rate": 8.947503262556819e-06, "loss": 17.3113, "step": 12804 }, { "epoch": 0.2340651104977425, "grad_norm": 6.538472801654387, "learning_rate": 8.947321578640323e-06, "loss": 17.3539, "step": 12805 }, { "epoch": 0.23408338969418904, "grad_norm": 7.123948310786515, "learning_rate": 8.94713988088881e-06, "loss": 18.0318, "step": 12806 }, { "epoch": 0.23410166889063558, "grad_norm": 7.1576811407236915, "learning_rate": 8.94695816930291e-06, "loss": 17.4988, "step": 12807 }, { "epoch": 0.23411994808708209, "grad_norm": 6.831561770504608, "learning_rate": 8.946776443883267e-06, "loss": 17.8309, "step": 12808 }, { "epoch": 0.23413822728352862, "grad_norm": 6.5033539221865455, "learning_rate": 8.946594704630514e-06, "loss": 17.8588, "step": 12809 }, { "epoch": 0.23415650647997513, "grad_norm": 8.120063990024946, "learning_rate": 8.946412951545289e-06, "loss": 17.649, "step": 12810 }, { "epoch": 0.23417478567642167, "grad_norm": 6.680866550908306, "learning_rate": 8.94623118462823e-06, "loss": 17.6354, "step": 12811 }, { "epoch": 0.2341930648728682, "grad_norm": 6.258034552886107, "learning_rate": 8.946049403879973e-06, "loss": 17.4059, "step": 12812 }, { "epoch": 0.2342113440693147, "grad_norm": 6.586197990355327, "learning_rate": 8.945867609301153e-06, "loss": 17.6528, "step": 12813 }, { "epoch": 0.23422962326576124, "grad_norm": 7.588036149324091, "learning_rate": 8.945685800892412e-06, "loss": 18.1779, "step": 12814 }, { "epoch": 0.23424790246220775, "grad_norm": 8.872585606885204, "learning_rate": 8.945503978654384e-06, "loss": 16.9234, "step": 12815 }, { "epoch": 0.2342661816586543, "grad_norm": 6.366790588751804, "learning_rate": 8.945322142587706e-06, "loss": 17.4695, "step": 12816 }, { "epoch": 0.23428446085510082, "grad_norm": 6.534386797969441, "learning_rate": 8.945140292693017e-06, "loss": 17.5107, "step": 12817 }, { "epoch": 0.23430274005154733, "grad_norm": 6.230167768905743, "learning_rate": 8.944958428970954e-06, "loss": 17.4147, "step": 12818 }, { "epoch": 0.23432101924799387, "grad_norm": 5.579383110813056, "learning_rate": 8.944776551422154e-06, "loss": 17.3322, "step": 12819 }, { "epoch": 0.23433929844444037, "grad_norm": 6.323520705722305, "learning_rate": 8.944594660047254e-06, "loss": 17.1582, "step": 12820 }, { "epoch": 0.2343575776408869, "grad_norm": 6.766699744550753, "learning_rate": 8.944412754846892e-06, "loss": 17.4968, "step": 12821 }, { "epoch": 0.23437585683733342, "grad_norm": 6.279371910677322, "learning_rate": 8.944230835821706e-06, "loss": 17.4284, "step": 12822 }, { "epoch": 0.23439413603377995, "grad_norm": 7.40148709909635, "learning_rate": 8.944048902972334e-06, "loss": 18.0285, "step": 12823 }, { "epoch": 0.2344124152302265, "grad_norm": 7.320197020027127, "learning_rate": 8.943866956299413e-06, "loss": 17.9752, "step": 12824 }, { "epoch": 0.234430694426673, "grad_norm": 6.236710783299506, "learning_rate": 8.943684995803578e-06, "loss": 17.4248, "step": 12825 }, { "epoch": 0.23444897362311953, "grad_norm": 7.525352570988221, "learning_rate": 8.943503021485472e-06, "loss": 18.0884, "step": 12826 }, { "epoch": 0.23446725281956604, "grad_norm": 6.153165647566515, "learning_rate": 8.943321033345726e-06, "loss": 17.4561, "step": 12827 }, { "epoch": 0.23448553201601258, "grad_norm": 6.773943377440282, "learning_rate": 8.943139031384986e-06, "loss": 17.6015, "step": 12828 }, { "epoch": 0.2345038112124591, "grad_norm": 6.637106740298016, "learning_rate": 8.942957015603883e-06, "loss": 17.4788, "step": 12829 }, { "epoch": 0.23452209040890562, "grad_norm": 5.7709940820779, "learning_rate": 8.942774986003062e-06, "loss": 17.2076, "step": 12830 }, { "epoch": 0.23454036960535216, "grad_norm": 7.194045688760218, "learning_rate": 8.942592942583152e-06, "loss": 18.0743, "step": 12831 }, { "epoch": 0.23455864880179866, "grad_norm": 8.773155649550077, "learning_rate": 8.942410885344798e-06, "loss": 18.5074, "step": 12832 }, { "epoch": 0.2345769279982452, "grad_norm": 6.372220643277621, "learning_rate": 8.942228814288638e-06, "loss": 17.4747, "step": 12833 }, { "epoch": 0.23459520719469173, "grad_norm": 7.278206166951027, "learning_rate": 8.942046729415305e-06, "loss": 17.9158, "step": 12834 }, { "epoch": 0.23461348639113824, "grad_norm": 8.174325706353997, "learning_rate": 8.941864630725442e-06, "loss": 18.0042, "step": 12835 }, { "epoch": 0.23463176558758478, "grad_norm": 6.661736661078449, "learning_rate": 8.941682518219685e-06, "loss": 17.703, "step": 12836 }, { "epoch": 0.2346500447840313, "grad_norm": 6.85330602778627, "learning_rate": 8.941500391898672e-06, "loss": 17.486, "step": 12837 }, { "epoch": 0.23466832398047782, "grad_norm": 6.094453699887702, "learning_rate": 8.941318251763043e-06, "loss": 17.5093, "step": 12838 }, { "epoch": 0.23468660317692433, "grad_norm": 6.089287552241428, "learning_rate": 8.941136097813437e-06, "loss": 17.4301, "step": 12839 }, { "epoch": 0.23470488237337087, "grad_norm": 6.786967476713644, "learning_rate": 8.940953930050488e-06, "loss": 17.8137, "step": 12840 }, { "epoch": 0.2347231615698174, "grad_norm": 6.399359306518545, "learning_rate": 8.94077174847484e-06, "loss": 17.1748, "step": 12841 }, { "epoch": 0.2347414407662639, "grad_norm": 5.57518412161416, "learning_rate": 8.940589553087128e-06, "loss": 17.1147, "step": 12842 }, { "epoch": 0.23475971996271044, "grad_norm": 5.686589382383859, "learning_rate": 8.940407343887991e-06, "loss": 17.2473, "step": 12843 }, { "epoch": 0.23477799915915695, "grad_norm": 7.324812993600869, "learning_rate": 8.940225120878069e-06, "loss": 17.9225, "step": 12844 }, { "epoch": 0.2347962783556035, "grad_norm": 6.808685104709658, "learning_rate": 8.940042884058e-06, "loss": 17.626, "step": 12845 }, { "epoch": 0.23481455755205002, "grad_norm": 7.521610994948991, "learning_rate": 8.93986063342842e-06, "loss": 17.8172, "step": 12846 }, { "epoch": 0.23483283674849653, "grad_norm": 6.801419585653584, "learning_rate": 8.939678368989973e-06, "loss": 17.6693, "step": 12847 }, { "epoch": 0.23485111594494307, "grad_norm": 6.844620130150307, "learning_rate": 8.939496090743296e-06, "loss": 17.5882, "step": 12848 }, { "epoch": 0.23486939514138958, "grad_norm": 7.224860326249475, "learning_rate": 8.939313798689026e-06, "loss": 17.6952, "step": 12849 }, { "epoch": 0.2348876743378361, "grad_norm": 7.1950922179245005, "learning_rate": 8.939131492827801e-06, "loss": 17.9068, "step": 12850 }, { "epoch": 0.23490595353428265, "grad_norm": 7.036529686621534, "learning_rate": 8.938949173160266e-06, "loss": 17.6325, "step": 12851 }, { "epoch": 0.23492423273072915, "grad_norm": 5.548452549926464, "learning_rate": 8.938766839687053e-06, "loss": 16.9409, "step": 12852 }, { "epoch": 0.2349425119271757, "grad_norm": 6.460058088370336, "learning_rate": 8.938584492408805e-06, "loss": 17.7953, "step": 12853 }, { "epoch": 0.2349607911236222, "grad_norm": 5.865582744887052, "learning_rate": 8.938402131326158e-06, "loss": 17.3237, "step": 12854 }, { "epoch": 0.23497907032006873, "grad_norm": 5.507687234652714, "learning_rate": 8.938219756439755e-06, "loss": 16.9353, "step": 12855 }, { "epoch": 0.23499734951651524, "grad_norm": 6.740356373881581, "learning_rate": 8.938037367750234e-06, "loss": 17.8141, "step": 12856 }, { "epoch": 0.23501562871296178, "grad_norm": 8.740237844071164, "learning_rate": 8.937854965258234e-06, "loss": 18.3672, "step": 12857 }, { "epoch": 0.2350339079094083, "grad_norm": 6.841865049004293, "learning_rate": 8.937672548964394e-06, "loss": 17.5426, "step": 12858 }, { "epoch": 0.23505218710585482, "grad_norm": 6.5395683840692, "learning_rate": 8.937490118869353e-06, "loss": 17.7061, "step": 12859 }, { "epoch": 0.23507046630230136, "grad_norm": 7.60807010734477, "learning_rate": 8.93730767497375e-06, "loss": 17.8479, "step": 12860 }, { "epoch": 0.23508874549874786, "grad_norm": 8.067536951243808, "learning_rate": 8.937125217278225e-06, "loss": 17.991, "step": 12861 }, { "epoch": 0.2351070246951944, "grad_norm": 9.064851064343651, "learning_rate": 8.936942745783419e-06, "loss": 18.023, "step": 12862 }, { "epoch": 0.23512530389164094, "grad_norm": 6.907223170853111, "learning_rate": 8.93676026048997e-06, "loss": 17.6457, "step": 12863 }, { "epoch": 0.23514358308808744, "grad_norm": 6.0732925254634695, "learning_rate": 8.936577761398517e-06, "loss": 17.3544, "step": 12864 }, { "epoch": 0.23516186228453398, "grad_norm": 6.541697149923802, "learning_rate": 8.936395248509701e-06, "loss": 17.4098, "step": 12865 }, { "epoch": 0.2351801414809805, "grad_norm": 7.675510136049659, "learning_rate": 8.936212721824163e-06, "loss": 18.1187, "step": 12866 }, { "epoch": 0.23519842067742702, "grad_norm": 5.363468558491413, "learning_rate": 8.936030181342538e-06, "loss": 17.0235, "step": 12867 }, { "epoch": 0.23521669987387356, "grad_norm": 7.878788765603472, "learning_rate": 8.93584762706547e-06, "loss": 18.066, "step": 12868 }, { "epoch": 0.23523497907032007, "grad_norm": 6.932014476267707, "learning_rate": 8.9356650589936e-06, "loss": 17.7187, "step": 12869 }, { "epoch": 0.2352532582667666, "grad_norm": 7.661415455649792, "learning_rate": 8.935482477127562e-06, "loss": 18.097, "step": 12870 }, { "epoch": 0.2352715374632131, "grad_norm": 6.603252898003402, "learning_rate": 8.935299881468e-06, "loss": 17.6628, "step": 12871 }, { "epoch": 0.23528981665965965, "grad_norm": 7.8976845859302935, "learning_rate": 8.935117272015556e-06, "loss": 17.9888, "step": 12872 }, { "epoch": 0.23530809585610615, "grad_norm": 6.789037235858639, "learning_rate": 8.934934648770865e-06, "loss": 17.3038, "step": 12873 }, { "epoch": 0.2353263750525527, "grad_norm": 7.693775745814941, "learning_rate": 8.93475201173457e-06, "loss": 17.7252, "step": 12874 }, { "epoch": 0.23534465424899922, "grad_norm": 6.818681729605643, "learning_rate": 8.934569360907311e-06, "loss": 17.5436, "step": 12875 }, { "epoch": 0.23536293344544573, "grad_norm": 5.904524982784955, "learning_rate": 8.934386696289728e-06, "loss": 17.2418, "step": 12876 }, { "epoch": 0.23538121264189227, "grad_norm": 6.5986397666194, "learning_rate": 8.93420401788246e-06, "loss": 17.5913, "step": 12877 }, { "epoch": 0.23539949183833878, "grad_norm": 6.815460505642056, "learning_rate": 8.934021325686149e-06, "loss": 17.6721, "step": 12878 }, { "epoch": 0.2354177710347853, "grad_norm": 6.401695212403851, "learning_rate": 8.933838619701435e-06, "loss": 17.3721, "step": 12879 }, { "epoch": 0.23543605023123185, "grad_norm": 8.346930827955845, "learning_rate": 8.933655899928958e-06, "loss": 18.7162, "step": 12880 }, { "epoch": 0.23545432942767835, "grad_norm": 7.052508918572586, "learning_rate": 8.933473166369358e-06, "loss": 18.0449, "step": 12881 }, { "epoch": 0.2354726086241249, "grad_norm": 8.469230945531697, "learning_rate": 8.933290419023276e-06, "loss": 17.8177, "step": 12882 }, { "epoch": 0.2354908878205714, "grad_norm": 4.87375639354391, "learning_rate": 8.933107657891352e-06, "loss": 16.851, "step": 12883 }, { "epoch": 0.23550916701701793, "grad_norm": 6.521544436001019, "learning_rate": 8.932924882974228e-06, "loss": 17.6313, "step": 12884 }, { "epoch": 0.23552744621346447, "grad_norm": 6.171467419393666, "learning_rate": 8.932742094272541e-06, "loss": 17.2186, "step": 12885 }, { "epoch": 0.23554572540991098, "grad_norm": 7.055093229422603, "learning_rate": 8.932559291786937e-06, "loss": 17.8389, "step": 12886 }, { "epoch": 0.2355640046063575, "grad_norm": 6.894632962673046, "learning_rate": 8.932376475518054e-06, "loss": 17.9723, "step": 12887 }, { "epoch": 0.23558228380280402, "grad_norm": 6.846858043183039, "learning_rate": 8.932193645466531e-06, "loss": 17.7025, "step": 12888 }, { "epoch": 0.23560056299925056, "grad_norm": 7.236398562057061, "learning_rate": 8.93201080163301e-06, "loss": 17.7761, "step": 12889 }, { "epoch": 0.23561884219569706, "grad_norm": 7.315558731868937, "learning_rate": 8.931827944018134e-06, "loss": 18.054, "step": 12890 }, { "epoch": 0.2356371213921436, "grad_norm": 8.255749287326358, "learning_rate": 8.931645072622544e-06, "loss": 18.0403, "step": 12891 }, { "epoch": 0.23565540058859014, "grad_norm": 6.367375865660743, "learning_rate": 8.931462187446875e-06, "loss": 17.4986, "step": 12892 }, { "epoch": 0.23567367978503664, "grad_norm": 7.502225720027451, "learning_rate": 8.931279288491774e-06, "loss": 17.772, "step": 12893 }, { "epoch": 0.23569195898148318, "grad_norm": 5.609192326773627, "learning_rate": 8.931096375757882e-06, "loss": 17.3737, "step": 12894 }, { "epoch": 0.2357102381779297, "grad_norm": 8.235533754702457, "learning_rate": 8.930913449245836e-06, "loss": 18.11, "step": 12895 }, { "epoch": 0.23572851737437622, "grad_norm": 6.837312824543016, "learning_rate": 8.93073050895628e-06, "loss": 17.6328, "step": 12896 }, { "epoch": 0.23574679657082276, "grad_norm": 6.357263898915531, "learning_rate": 8.930547554889854e-06, "loss": 17.4212, "step": 12897 }, { "epoch": 0.23576507576726927, "grad_norm": 6.196444524082612, "learning_rate": 8.930364587047202e-06, "loss": 17.5793, "step": 12898 }, { "epoch": 0.2357833549637158, "grad_norm": 6.513390266384138, "learning_rate": 8.930181605428962e-06, "loss": 17.5772, "step": 12899 }, { "epoch": 0.2358016341601623, "grad_norm": 7.346719368535289, "learning_rate": 8.929998610035777e-06, "loss": 17.6624, "step": 12900 }, { "epoch": 0.23581991335660885, "grad_norm": 6.863613585163401, "learning_rate": 8.929815600868286e-06, "loss": 17.5932, "step": 12901 }, { "epoch": 0.23583819255305538, "grad_norm": 6.334064267736984, "learning_rate": 8.929632577927133e-06, "loss": 17.518, "step": 12902 }, { "epoch": 0.2358564717495019, "grad_norm": 7.053893819976255, "learning_rate": 8.92944954121296e-06, "loss": 17.9189, "step": 12903 }, { "epoch": 0.23587475094594842, "grad_norm": 7.980046746522379, "learning_rate": 8.929266490726408e-06, "loss": 17.9997, "step": 12904 }, { "epoch": 0.23589303014239493, "grad_norm": 6.962348750119907, "learning_rate": 8.929083426468117e-06, "loss": 17.6001, "step": 12905 }, { "epoch": 0.23591130933884147, "grad_norm": 7.442237121091488, "learning_rate": 8.92890034843873e-06, "loss": 17.8534, "step": 12906 }, { "epoch": 0.23592958853528798, "grad_norm": 6.388804993595424, "learning_rate": 8.928717256638887e-06, "loss": 17.6104, "step": 12907 }, { "epoch": 0.2359478677317345, "grad_norm": 5.611522836208369, "learning_rate": 8.928534151069231e-06, "loss": 17.0817, "step": 12908 }, { "epoch": 0.23596614692818105, "grad_norm": 7.23191169122668, "learning_rate": 8.928351031730405e-06, "loss": 17.8185, "step": 12909 }, { "epoch": 0.23598442612462756, "grad_norm": 6.204385501247323, "learning_rate": 8.928167898623048e-06, "loss": 17.4054, "step": 12910 }, { "epoch": 0.2360027053210741, "grad_norm": 8.513596232673757, "learning_rate": 8.927984751747805e-06, "loss": 18.1255, "step": 12911 }, { "epoch": 0.2360209845175206, "grad_norm": 6.8787495159890035, "learning_rate": 8.927801591105314e-06, "loss": 17.8178, "step": 12912 }, { "epoch": 0.23603926371396713, "grad_norm": 7.396079286809878, "learning_rate": 8.92761841669622e-06, "loss": 17.8402, "step": 12913 }, { "epoch": 0.23605754291041367, "grad_norm": 6.512736366078796, "learning_rate": 8.927435228521166e-06, "loss": 17.4054, "step": 12914 }, { "epoch": 0.23607582210686018, "grad_norm": 7.197978482763735, "learning_rate": 8.92725202658079e-06, "loss": 17.8006, "step": 12915 }, { "epoch": 0.2360941013033067, "grad_norm": 6.898895637699667, "learning_rate": 8.927068810875739e-06, "loss": 17.586, "step": 12916 }, { "epoch": 0.23611238049975322, "grad_norm": 6.525968119741383, "learning_rate": 8.92688558140665e-06, "loss": 17.5909, "step": 12917 }, { "epoch": 0.23613065969619976, "grad_norm": 7.4227272985848565, "learning_rate": 8.92670233817417e-06, "loss": 17.6597, "step": 12918 }, { "epoch": 0.2361489388926463, "grad_norm": 5.584828933954418, "learning_rate": 8.926519081178938e-06, "loss": 17.2085, "step": 12919 }, { "epoch": 0.2361672180890928, "grad_norm": 6.658550046066718, "learning_rate": 8.926335810421598e-06, "loss": 17.8171, "step": 12920 }, { "epoch": 0.23618549728553934, "grad_norm": 7.349185977238887, "learning_rate": 8.926152525902792e-06, "loss": 18.0539, "step": 12921 }, { "epoch": 0.23620377648198584, "grad_norm": 6.463275205700063, "learning_rate": 8.92596922762316e-06, "loss": 17.5902, "step": 12922 }, { "epoch": 0.23622205567843238, "grad_norm": 8.628181653084809, "learning_rate": 8.925785915583348e-06, "loss": 17.5339, "step": 12923 }, { "epoch": 0.2362403348748789, "grad_norm": 8.540834670203148, "learning_rate": 8.925602589783996e-06, "loss": 18.7885, "step": 12924 }, { "epoch": 0.23625861407132542, "grad_norm": 5.297173130657822, "learning_rate": 8.925419250225748e-06, "loss": 16.9093, "step": 12925 }, { "epoch": 0.23627689326777196, "grad_norm": 6.726903272632686, "learning_rate": 8.925235896909249e-06, "loss": 17.4664, "step": 12926 }, { "epoch": 0.23629517246421847, "grad_norm": 6.332665712850595, "learning_rate": 8.925052529835135e-06, "loss": 17.659, "step": 12927 }, { "epoch": 0.236313451660665, "grad_norm": 7.600987729347651, "learning_rate": 8.924869149004054e-06, "loss": 18.2163, "step": 12928 }, { "epoch": 0.2363317308571115, "grad_norm": 5.9369767914865115, "learning_rate": 8.924685754416647e-06, "loss": 17.4254, "step": 12929 }, { "epoch": 0.23635001005355805, "grad_norm": 7.368030724782585, "learning_rate": 8.924502346073557e-06, "loss": 17.8777, "step": 12930 }, { "epoch": 0.23636828925000458, "grad_norm": 6.516669152894072, "learning_rate": 8.924318923975427e-06, "loss": 17.4103, "step": 12931 }, { "epoch": 0.2363865684464511, "grad_norm": 7.678784942366478, "learning_rate": 8.924135488122901e-06, "loss": 17.7098, "step": 12932 }, { "epoch": 0.23640484764289763, "grad_norm": 7.832311221067831, "learning_rate": 8.923952038516618e-06, "loss": 18.3829, "step": 12933 }, { "epoch": 0.23642312683934413, "grad_norm": 6.78578352786296, "learning_rate": 8.923768575157225e-06, "loss": 17.9866, "step": 12934 }, { "epoch": 0.23644140603579067, "grad_norm": 8.651035737802951, "learning_rate": 8.923585098045362e-06, "loss": 17.7727, "step": 12935 }, { "epoch": 0.2364596852322372, "grad_norm": 13.048462594403414, "learning_rate": 8.923401607181676e-06, "loss": 17.1627, "step": 12936 }, { "epoch": 0.2364779644286837, "grad_norm": 5.5618522535398345, "learning_rate": 8.923218102566807e-06, "loss": 17.355, "step": 12937 }, { "epoch": 0.23649624362513025, "grad_norm": 5.53513557869465, "learning_rate": 8.923034584201399e-06, "loss": 17.1682, "step": 12938 }, { "epoch": 0.23651452282157676, "grad_norm": 6.201165463046301, "learning_rate": 8.922851052086095e-06, "loss": 17.4474, "step": 12939 }, { "epoch": 0.2365328020180233, "grad_norm": 6.598837901107383, "learning_rate": 8.922667506221538e-06, "loss": 17.4188, "step": 12940 }, { "epoch": 0.2365510812144698, "grad_norm": 8.209426477597665, "learning_rate": 8.922483946608373e-06, "loss": 17.8844, "step": 12941 }, { "epoch": 0.23656936041091634, "grad_norm": 6.694747798026469, "learning_rate": 8.922300373247243e-06, "loss": 17.669, "step": 12942 }, { "epoch": 0.23658763960736287, "grad_norm": 8.050533141119972, "learning_rate": 8.922116786138787e-06, "loss": 18.3496, "step": 12943 }, { "epoch": 0.23660591880380938, "grad_norm": 8.172578495070955, "learning_rate": 8.921933185283655e-06, "loss": 18.6932, "step": 12944 }, { "epoch": 0.23662419800025591, "grad_norm": 6.267768583441307, "learning_rate": 8.921749570682487e-06, "loss": 17.3662, "step": 12945 }, { "epoch": 0.23664247719670242, "grad_norm": 6.258043866978442, "learning_rate": 8.921565942335926e-06, "loss": 17.7663, "step": 12946 }, { "epoch": 0.23666075639314896, "grad_norm": 8.38711526934771, "learning_rate": 8.92138230024462e-06, "loss": 18.1128, "step": 12947 }, { "epoch": 0.2366790355895955, "grad_norm": 6.5203555330921095, "learning_rate": 8.921198644409205e-06, "loss": 17.3761, "step": 12948 }, { "epoch": 0.236697314786042, "grad_norm": 7.421198378402833, "learning_rate": 8.92101497483033e-06, "loss": 17.9431, "step": 12949 }, { "epoch": 0.23671559398248854, "grad_norm": 7.692947297720974, "learning_rate": 8.92083129150864e-06, "loss": 17.9351, "step": 12950 }, { "epoch": 0.23673387317893504, "grad_norm": 6.8739816391982735, "learning_rate": 8.920647594444774e-06, "loss": 17.3881, "step": 12951 }, { "epoch": 0.23675215237538158, "grad_norm": 10.080419192852398, "learning_rate": 8.92046388363938e-06, "loss": 17.6047, "step": 12952 }, { "epoch": 0.23677043157182812, "grad_norm": 5.3604729480078825, "learning_rate": 8.9202801590931e-06, "loss": 17.0675, "step": 12953 }, { "epoch": 0.23678871076827462, "grad_norm": 8.015903862743421, "learning_rate": 8.920096420806578e-06, "loss": 17.9858, "step": 12954 }, { "epoch": 0.23680698996472116, "grad_norm": 7.491044387526426, "learning_rate": 8.919912668780458e-06, "loss": 17.679, "step": 12955 }, { "epoch": 0.23682526916116767, "grad_norm": 8.120830097300725, "learning_rate": 8.919728903015383e-06, "loss": 18.2456, "step": 12956 }, { "epoch": 0.2368435483576142, "grad_norm": 6.5733303166914725, "learning_rate": 8.919545123512001e-06, "loss": 17.4546, "step": 12957 }, { "epoch": 0.2368618275540607, "grad_norm": 7.039356996593743, "learning_rate": 8.919361330270953e-06, "loss": 17.6232, "step": 12958 }, { "epoch": 0.23688010675050725, "grad_norm": 5.745232514859346, "learning_rate": 8.919177523292882e-06, "loss": 17.0815, "step": 12959 }, { "epoch": 0.23689838594695378, "grad_norm": 7.6415996339346, "learning_rate": 8.918993702578435e-06, "loss": 17.7463, "step": 12960 }, { "epoch": 0.2369166651434003, "grad_norm": 6.798791473928604, "learning_rate": 8.918809868128255e-06, "loss": 17.4995, "step": 12961 }, { "epoch": 0.23693494433984683, "grad_norm": 7.3922512658335915, "learning_rate": 8.918626019942987e-06, "loss": 17.8245, "step": 12962 }, { "epoch": 0.23695322353629333, "grad_norm": 6.017111653920449, "learning_rate": 8.918442158023272e-06, "loss": 17.3875, "step": 12963 }, { "epoch": 0.23697150273273987, "grad_norm": 6.253744335544148, "learning_rate": 8.91825828236976e-06, "loss": 17.5898, "step": 12964 }, { "epoch": 0.2369897819291864, "grad_norm": 5.89477590318735, "learning_rate": 8.918074392983093e-06, "loss": 17.1829, "step": 12965 }, { "epoch": 0.2370080611256329, "grad_norm": 6.316536465879392, "learning_rate": 8.917890489863915e-06, "loss": 17.3599, "step": 12966 }, { "epoch": 0.23702634032207945, "grad_norm": 6.1109856005017855, "learning_rate": 8.91770657301287e-06, "loss": 17.5658, "step": 12967 }, { "epoch": 0.23704461951852596, "grad_norm": 6.237782407754986, "learning_rate": 8.917522642430603e-06, "loss": 17.3666, "step": 12968 }, { "epoch": 0.2370628987149725, "grad_norm": 5.6297601624356215, "learning_rate": 8.91733869811776e-06, "loss": 17.3055, "step": 12969 }, { "epoch": 0.23708117791141903, "grad_norm": 6.639135237707821, "learning_rate": 8.917154740074984e-06, "loss": 17.4063, "step": 12970 }, { "epoch": 0.23709945710786554, "grad_norm": 9.328502384457716, "learning_rate": 8.916970768302921e-06, "loss": 18.093, "step": 12971 }, { "epoch": 0.23711773630431207, "grad_norm": 8.167995637422464, "learning_rate": 8.916786782802216e-06, "loss": 17.8628, "step": 12972 }, { "epoch": 0.23713601550075858, "grad_norm": 6.9413891111221, "learning_rate": 8.916602783573514e-06, "loss": 17.4451, "step": 12973 }, { "epoch": 0.23715429469720511, "grad_norm": 5.304652642268245, "learning_rate": 8.916418770617457e-06, "loss": 17.0545, "step": 12974 }, { "epoch": 0.23717257389365162, "grad_norm": 5.701778711333664, "learning_rate": 8.916234743934693e-06, "loss": 17.1716, "step": 12975 }, { "epoch": 0.23719085309009816, "grad_norm": 5.964328240908642, "learning_rate": 8.916050703525867e-06, "loss": 17.4856, "step": 12976 }, { "epoch": 0.2372091322865447, "grad_norm": 7.325778422518426, "learning_rate": 8.91586664939162e-06, "loss": 18.1201, "step": 12977 }, { "epoch": 0.2372274114829912, "grad_norm": 7.438311462390998, "learning_rate": 8.915682581532604e-06, "loss": 17.6801, "step": 12978 }, { "epoch": 0.23724569067943774, "grad_norm": 7.920146329375921, "learning_rate": 8.915498499949458e-06, "loss": 17.7486, "step": 12979 }, { "epoch": 0.23726396987588425, "grad_norm": 6.617229114538101, "learning_rate": 8.91531440464283e-06, "loss": 17.3874, "step": 12980 }, { "epoch": 0.23728224907233078, "grad_norm": 13.694352843739813, "learning_rate": 8.915130295613364e-06, "loss": 17.8205, "step": 12981 }, { "epoch": 0.23730052826877732, "grad_norm": 7.00801840795046, "learning_rate": 8.914946172861707e-06, "loss": 17.4743, "step": 12982 }, { "epoch": 0.23731880746522382, "grad_norm": 6.043636742624499, "learning_rate": 8.914762036388504e-06, "loss": 17.3542, "step": 12983 }, { "epoch": 0.23733708666167036, "grad_norm": 6.713773364585318, "learning_rate": 8.914577886194399e-06, "loss": 17.7853, "step": 12984 }, { "epoch": 0.23735536585811687, "grad_norm": 8.468798535179605, "learning_rate": 8.914393722280039e-06, "loss": 17.895, "step": 12985 }, { "epoch": 0.2373736450545634, "grad_norm": 6.485210633111785, "learning_rate": 8.914209544646066e-06, "loss": 17.6248, "step": 12986 }, { "epoch": 0.23739192425100994, "grad_norm": 6.579365789610776, "learning_rate": 8.914025353293132e-06, "loss": 17.8761, "step": 12987 }, { "epoch": 0.23741020344745645, "grad_norm": 6.270679818890554, "learning_rate": 8.913841148221875e-06, "loss": 17.2867, "step": 12988 }, { "epoch": 0.23742848264390298, "grad_norm": 7.067963757021928, "learning_rate": 8.913656929432948e-06, "loss": 17.7708, "step": 12989 }, { "epoch": 0.2374467618403495, "grad_norm": 6.913535836401947, "learning_rate": 8.91347269692699e-06, "loss": 17.6606, "step": 12990 }, { "epoch": 0.23746504103679603, "grad_norm": 8.262615181485657, "learning_rate": 8.913288450704653e-06, "loss": 18.3434, "step": 12991 }, { "epoch": 0.23748332023324253, "grad_norm": 6.780892302162567, "learning_rate": 8.913104190766577e-06, "loss": 17.4476, "step": 12992 }, { "epoch": 0.23750159942968907, "grad_norm": 5.48441401148886, "learning_rate": 8.912919917113412e-06, "loss": 17.1764, "step": 12993 }, { "epoch": 0.2375198786261356, "grad_norm": 8.026787375146164, "learning_rate": 8.9127356297458e-06, "loss": 18.0069, "step": 12994 }, { "epoch": 0.2375381578225821, "grad_norm": 7.492718384645257, "learning_rate": 8.912551328664392e-06, "loss": 18.2687, "step": 12995 }, { "epoch": 0.23755643701902865, "grad_norm": 7.000165263766671, "learning_rate": 8.91236701386983e-06, "loss": 17.9321, "step": 12996 }, { "epoch": 0.23757471621547516, "grad_norm": 7.877236264654493, "learning_rate": 8.91218268536276e-06, "loss": 17.5602, "step": 12997 }, { "epoch": 0.2375929954119217, "grad_norm": 6.946470021963183, "learning_rate": 8.91199834314383e-06, "loss": 17.5178, "step": 12998 }, { "epoch": 0.23761127460836823, "grad_norm": 5.90998841843763, "learning_rate": 8.911813987213685e-06, "loss": 17.1298, "step": 12999 }, { "epoch": 0.23762955380481474, "grad_norm": 6.551602095948039, "learning_rate": 8.911629617572971e-06, "loss": 17.5774, "step": 13000 }, { "epoch": 0.23764783300126127, "grad_norm": 5.662640598811488, "learning_rate": 8.911445234222335e-06, "loss": 17.0583, "step": 13001 }, { "epoch": 0.23766611219770778, "grad_norm": 7.234619571718053, "learning_rate": 8.911260837162423e-06, "loss": 18.1144, "step": 13002 }, { "epoch": 0.23768439139415432, "grad_norm": 6.715066588459917, "learning_rate": 8.911076426393881e-06, "loss": 17.698, "step": 13003 }, { "epoch": 0.23770267059060085, "grad_norm": 6.230777428028448, "learning_rate": 8.910892001917357e-06, "loss": 17.3361, "step": 13004 }, { "epoch": 0.23772094978704736, "grad_norm": 6.988059654715737, "learning_rate": 8.910707563733495e-06, "loss": 17.3595, "step": 13005 }, { "epoch": 0.2377392289834939, "grad_norm": 7.159736432205771, "learning_rate": 8.910523111842942e-06, "loss": 17.7156, "step": 13006 }, { "epoch": 0.2377575081799404, "grad_norm": 7.3308498510596225, "learning_rate": 8.910338646246344e-06, "loss": 17.9566, "step": 13007 }, { "epoch": 0.23777578737638694, "grad_norm": 7.0589468435521825, "learning_rate": 8.910154166944348e-06, "loss": 17.6011, "step": 13008 }, { "epoch": 0.23779406657283345, "grad_norm": 6.9181011176341585, "learning_rate": 8.909969673937603e-06, "loss": 17.7502, "step": 13009 }, { "epoch": 0.23781234576927998, "grad_norm": 6.4022308663041985, "learning_rate": 8.909785167226755e-06, "loss": 17.4139, "step": 13010 }, { "epoch": 0.23783062496572652, "grad_norm": 6.066161465753598, "learning_rate": 8.909600646812446e-06, "loss": 17.4185, "step": 13011 }, { "epoch": 0.23784890416217302, "grad_norm": 8.197393433135085, "learning_rate": 8.909416112695327e-06, "loss": 18.1142, "step": 13012 }, { "epoch": 0.23786718335861956, "grad_norm": 6.369683808467332, "learning_rate": 8.909231564876045e-06, "loss": 17.5209, "step": 13013 }, { "epoch": 0.23788546255506607, "grad_norm": 7.031029507963712, "learning_rate": 8.909047003355244e-06, "loss": 17.7567, "step": 13014 }, { "epoch": 0.2379037417515126, "grad_norm": 7.37520805755393, "learning_rate": 8.90886242813357e-06, "loss": 18.1819, "step": 13015 }, { "epoch": 0.23792202094795914, "grad_norm": 6.454543253959291, "learning_rate": 8.908677839211677e-06, "loss": 17.4121, "step": 13016 }, { "epoch": 0.23794030014440565, "grad_norm": 7.113403509963748, "learning_rate": 8.908493236590206e-06, "loss": 17.695, "step": 13017 }, { "epoch": 0.23795857934085218, "grad_norm": 5.791101109650206, "learning_rate": 8.908308620269806e-06, "loss": 17.2096, "step": 13018 }, { "epoch": 0.2379768585372987, "grad_norm": 6.6434223453450105, "learning_rate": 8.908123990251121e-06, "loss": 17.7579, "step": 13019 }, { "epoch": 0.23799513773374523, "grad_norm": 5.8890298099976075, "learning_rate": 8.907939346534802e-06, "loss": 17.3712, "step": 13020 }, { "epoch": 0.23801341693019176, "grad_norm": 6.076916740522336, "learning_rate": 8.907754689121495e-06, "loss": 17.2601, "step": 13021 }, { "epoch": 0.23803169612663827, "grad_norm": 6.499479833887206, "learning_rate": 8.907570018011846e-06, "loss": 17.448, "step": 13022 }, { "epoch": 0.2380499753230848, "grad_norm": 6.908927066972406, "learning_rate": 8.907385333206505e-06, "loss": 17.8089, "step": 13023 }, { "epoch": 0.2380682545195313, "grad_norm": 9.149487012087734, "learning_rate": 8.907200634706116e-06, "loss": 18.5294, "step": 13024 }, { "epoch": 0.23808653371597785, "grad_norm": 7.6501435084001175, "learning_rate": 8.907015922511329e-06, "loss": 18.1171, "step": 13025 }, { "epoch": 0.23810481291242436, "grad_norm": 6.763376327046655, "learning_rate": 8.906831196622787e-06, "loss": 17.5626, "step": 13026 }, { "epoch": 0.2381230921088709, "grad_norm": 7.702371224133755, "learning_rate": 8.906646457041144e-06, "loss": 17.7776, "step": 13027 }, { "epoch": 0.23814137130531743, "grad_norm": 7.977697597983092, "learning_rate": 8.906461703767043e-06, "loss": 18.0928, "step": 13028 }, { "epoch": 0.23815965050176394, "grad_norm": 5.379696040297563, "learning_rate": 8.906276936801132e-06, "loss": 17.231, "step": 13029 }, { "epoch": 0.23817792969821047, "grad_norm": 6.577581666634239, "learning_rate": 8.906092156144062e-06, "loss": 17.7253, "step": 13030 }, { "epoch": 0.23819620889465698, "grad_norm": 8.389811716463779, "learning_rate": 8.905907361796476e-06, "loss": 17.9966, "step": 13031 }, { "epoch": 0.23821448809110352, "grad_norm": 6.530808002128086, "learning_rate": 8.905722553759023e-06, "loss": 17.6125, "step": 13032 }, { "epoch": 0.23823276728755005, "grad_norm": 6.515670033066622, "learning_rate": 8.905537732032352e-06, "loss": 17.6081, "step": 13033 }, { "epoch": 0.23825104648399656, "grad_norm": 5.760940434326444, "learning_rate": 8.90535289661711e-06, "loss": 17.2928, "step": 13034 }, { "epoch": 0.2382693256804431, "grad_norm": 8.140661054656807, "learning_rate": 8.905168047513945e-06, "loss": 18.464, "step": 13035 }, { "epoch": 0.2382876048768896, "grad_norm": 5.78604602671709, "learning_rate": 8.904983184723505e-06, "loss": 17.4396, "step": 13036 }, { "epoch": 0.23830588407333614, "grad_norm": 7.311959534577604, "learning_rate": 8.90479830824644e-06, "loss": 17.2399, "step": 13037 }, { "epoch": 0.23832416326978267, "grad_norm": 7.868191176366132, "learning_rate": 8.904613418083393e-06, "loss": 17.9482, "step": 13038 }, { "epoch": 0.23834244246622918, "grad_norm": 7.234337688278188, "learning_rate": 8.904428514235016e-06, "loss": 17.7567, "step": 13039 }, { "epoch": 0.23836072166267572, "grad_norm": 6.489097226413424, "learning_rate": 8.904243596701954e-06, "loss": 17.5105, "step": 13040 }, { "epoch": 0.23837900085912223, "grad_norm": 5.794715028733325, "learning_rate": 8.904058665484859e-06, "loss": 17.4734, "step": 13041 }, { "epoch": 0.23839728005556876, "grad_norm": 6.8670725460927295, "learning_rate": 8.903873720584376e-06, "loss": 17.6704, "step": 13042 }, { "epoch": 0.23841555925201527, "grad_norm": 6.4415121819820165, "learning_rate": 8.903688762001154e-06, "loss": 17.5562, "step": 13043 }, { "epoch": 0.2384338384484618, "grad_norm": 7.108884199427061, "learning_rate": 8.903503789735843e-06, "loss": 17.8931, "step": 13044 }, { "epoch": 0.23845211764490834, "grad_norm": 7.446173452101878, "learning_rate": 8.903318803789089e-06, "loss": 18.1885, "step": 13045 }, { "epoch": 0.23847039684135485, "grad_norm": 7.67893773097202, "learning_rate": 8.903133804161543e-06, "loss": 17.9477, "step": 13046 }, { "epoch": 0.23848867603780138, "grad_norm": 6.6717209776126865, "learning_rate": 8.90294879085385e-06, "loss": 17.8488, "step": 13047 }, { "epoch": 0.2385069552342479, "grad_norm": 6.984363402847883, "learning_rate": 8.902763763866661e-06, "loss": 17.969, "step": 13048 }, { "epoch": 0.23852523443069443, "grad_norm": 5.666023704553353, "learning_rate": 8.902578723200623e-06, "loss": 17.2028, "step": 13049 }, { "epoch": 0.23854351362714096, "grad_norm": 6.75865032362205, "learning_rate": 8.902393668856386e-06, "loss": 17.4377, "step": 13050 }, { "epoch": 0.23856179282358747, "grad_norm": 9.262343010243793, "learning_rate": 8.902208600834596e-06, "loss": 18.5766, "step": 13051 }, { "epoch": 0.238580072020034, "grad_norm": 7.539999763369353, "learning_rate": 8.902023519135906e-06, "loss": 17.9697, "step": 13052 }, { "epoch": 0.23859835121648051, "grad_norm": 6.9925561920139385, "learning_rate": 8.901838423760962e-06, "loss": 17.8635, "step": 13053 }, { "epoch": 0.23861663041292705, "grad_norm": 9.722389640327213, "learning_rate": 8.90165331471041e-06, "loss": 17.665, "step": 13054 }, { "epoch": 0.23863490960937359, "grad_norm": 7.012750000220342, "learning_rate": 8.901468191984905e-06, "loss": 17.9913, "step": 13055 }, { "epoch": 0.2386531888058201, "grad_norm": 6.897659451433708, "learning_rate": 8.901283055585091e-06, "loss": 18.0401, "step": 13056 }, { "epoch": 0.23867146800226663, "grad_norm": 5.730732391719401, "learning_rate": 8.90109790551162e-06, "loss": 17.2895, "step": 13057 }, { "epoch": 0.23868974719871314, "grad_norm": 8.528304224384835, "learning_rate": 8.900912741765137e-06, "loss": 19.029, "step": 13058 }, { "epoch": 0.23870802639515967, "grad_norm": 7.6515832604941565, "learning_rate": 8.900727564346294e-06, "loss": 18.3438, "step": 13059 }, { "epoch": 0.23872630559160618, "grad_norm": 6.588949651884337, "learning_rate": 8.90054237325574e-06, "loss": 17.5206, "step": 13060 }, { "epoch": 0.23874458478805272, "grad_norm": 5.966831394317411, "learning_rate": 8.900357168494123e-06, "loss": 17.5629, "step": 13061 }, { "epoch": 0.23876286398449925, "grad_norm": 5.853515766728405, "learning_rate": 8.900171950062092e-06, "loss": 17.0216, "step": 13062 }, { "epoch": 0.23878114318094576, "grad_norm": 5.879637566081577, "learning_rate": 8.899986717960298e-06, "loss": 17.4537, "step": 13063 }, { "epoch": 0.2387994223773923, "grad_norm": 6.965208279550614, "learning_rate": 8.899801472189389e-06, "loss": 17.9539, "step": 13064 }, { "epoch": 0.2388177015738388, "grad_norm": 6.346859650529671, "learning_rate": 8.899616212750013e-06, "loss": 17.3691, "step": 13065 }, { "epoch": 0.23883598077028534, "grad_norm": 10.205520720902255, "learning_rate": 8.899430939642822e-06, "loss": 17.953, "step": 13066 }, { "epoch": 0.23885425996673187, "grad_norm": 6.291004518877176, "learning_rate": 8.899245652868461e-06, "loss": 17.7778, "step": 13067 }, { "epoch": 0.23887253916317838, "grad_norm": 7.281603008158021, "learning_rate": 8.899060352427587e-06, "loss": 17.8255, "step": 13068 }, { "epoch": 0.23889081835962492, "grad_norm": 7.89320208932405, "learning_rate": 8.898875038320842e-06, "loss": 17.6524, "step": 13069 }, { "epoch": 0.23890909755607143, "grad_norm": 6.083023775630701, "learning_rate": 8.898689710548878e-06, "loss": 17.5337, "step": 13070 }, { "epoch": 0.23892737675251796, "grad_norm": 6.442767289991403, "learning_rate": 8.898504369112346e-06, "loss": 17.5752, "step": 13071 }, { "epoch": 0.2389456559489645, "grad_norm": 5.926364232680868, "learning_rate": 8.898319014011893e-06, "loss": 17.4152, "step": 13072 }, { "epoch": 0.238963935145411, "grad_norm": 5.853578651618931, "learning_rate": 8.898133645248172e-06, "loss": 17.361, "step": 13073 }, { "epoch": 0.23898221434185754, "grad_norm": 8.037353131529064, "learning_rate": 8.89794826282183e-06, "loss": 18.3901, "step": 13074 }, { "epoch": 0.23900049353830405, "grad_norm": 6.839033028328603, "learning_rate": 8.897762866733516e-06, "loss": 17.7177, "step": 13075 }, { "epoch": 0.23901877273475058, "grad_norm": 6.458383770802645, "learning_rate": 8.897577456983884e-06, "loss": 17.5274, "step": 13076 }, { "epoch": 0.2390370519311971, "grad_norm": 6.186196520678368, "learning_rate": 8.89739203357358e-06, "loss": 17.5473, "step": 13077 }, { "epoch": 0.23905533112764363, "grad_norm": 6.113652323107251, "learning_rate": 8.897206596503256e-06, "loss": 17.2725, "step": 13078 }, { "epoch": 0.23907361032409016, "grad_norm": 7.155653850176197, "learning_rate": 8.89702114577356e-06, "loss": 17.838, "step": 13079 }, { "epoch": 0.23909188952053667, "grad_norm": 6.4961595382647825, "learning_rate": 8.896835681385143e-06, "loss": 17.5084, "step": 13080 }, { "epoch": 0.2391101687169832, "grad_norm": 5.803814602285096, "learning_rate": 8.896650203338655e-06, "loss": 17.1967, "step": 13081 }, { "epoch": 0.23912844791342971, "grad_norm": 6.125421174580098, "learning_rate": 8.896464711634748e-06, "loss": 17.5201, "step": 13082 }, { "epoch": 0.23914672710987625, "grad_norm": 7.170635367768776, "learning_rate": 8.896279206274069e-06, "loss": 17.9011, "step": 13083 }, { "epoch": 0.23916500630632279, "grad_norm": 7.9103584189316765, "learning_rate": 8.89609368725727e-06, "loss": 17.7902, "step": 13084 }, { "epoch": 0.2391832855027693, "grad_norm": 5.583691209122781, "learning_rate": 8.895908154585e-06, "loss": 17.2785, "step": 13085 }, { "epoch": 0.23920156469921583, "grad_norm": 6.787952200758381, "learning_rate": 8.895722608257909e-06, "loss": 17.717, "step": 13086 }, { "epoch": 0.23921984389566234, "grad_norm": 6.404965713753553, "learning_rate": 8.89553704827665e-06, "loss": 17.1054, "step": 13087 }, { "epoch": 0.23923812309210887, "grad_norm": 6.5079624006987205, "learning_rate": 8.895351474641872e-06, "loss": 17.4883, "step": 13088 }, { "epoch": 0.2392564022885554, "grad_norm": 6.930271806626031, "learning_rate": 8.895165887354225e-06, "loss": 17.6584, "step": 13089 }, { "epoch": 0.23927468148500192, "grad_norm": 7.241331026542094, "learning_rate": 8.894980286414358e-06, "loss": 17.7407, "step": 13090 }, { "epoch": 0.23929296068144845, "grad_norm": 8.199361623072374, "learning_rate": 8.894794671822924e-06, "loss": 18.387, "step": 13091 }, { "epoch": 0.23931123987789496, "grad_norm": 6.145782542820028, "learning_rate": 8.894609043580573e-06, "loss": 17.5219, "step": 13092 }, { "epoch": 0.2393295190743415, "grad_norm": 5.964790287197087, "learning_rate": 8.894423401687954e-06, "loss": 17.2074, "step": 13093 }, { "epoch": 0.239347798270788, "grad_norm": 7.531933580267063, "learning_rate": 8.894237746145719e-06, "loss": 18.2056, "step": 13094 }, { "epoch": 0.23936607746723454, "grad_norm": 6.628781494873603, "learning_rate": 8.894052076954521e-06, "loss": 17.7886, "step": 13095 }, { "epoch": 0.23938435666368107, "grad_norm": 7.072780596815424, "learning_rate": 8.893866394115006e-06, "loss": 17.6488, "step": 13096 }, { "epoch": 0.23940263586012758, "grad_norm": 7.582250839989128, "learning_rate": 8.893680697627829e-06, "loss": 17.9527, "step": 13097 }, { "epoch": 0.23942091505657412, "grad_norm": 8.00874576216099, "learning_rate": 8.893494987493637e-06, "loss": 18.2301, "step": 13098 }, { "epoch": 0.23943919425302063, "grad_norm": 6.624719063862705, "learning_rate": 8.893309263713084e-06, "loss": 17.5278, "step": 13099 }, { "epoch": 0.23945747344946716, "grad_norm": 6.073831096744187, "learning_rate": 8.89312352628682e-06, "loss": 17.3268, "step": 13100 }, { "epoch": 0.2394757526459137, "grad_norm": 5.6543958726883945, "learning_rate": 8.892937775215496e-06, "loss": 17.3742, "step": 13101 }, { "epoch": 0.2394940318423602, "grad_norm": 6.598721554417625, "learning_rate": 8.892752010499762e-06, "loss": 17.414, "step": 13102 }, { "epoch": 0.23951231103880674, "grad_norm": 8.802692124687134, "learning_rate": 8.892566232140271e-06, "loss": 18.4354, "step": 13103 }, { "epoch": 0.23953059023525325, "grad_norm": 7.377216148339256, "learning_rate": 8.892380440137674e-06, "loss": 17.6755, "step": 13104 }, { "epoch": 0.23954886943169978, "grad_norm": 6.042696197432081, "learning_rate": 8.892194634492619e-06, "loss": 17.3203, "step": 13105 }, { "epoch": 0.23956714862814632, "grad_norm": 7.389500299627137, "learning_rate": 8.89200881520576e-06, "loss": 18.1021, "step": 13106 }, { "epoch": 0.23958542782459283, "grad_norm": 6.678872073304817, "learning_rate": 8.89182298227775e-06, "loss": 17.6422, "step": 13107 }, { "epoch": 0.23960370702103936, "grad_norm": 6.928150862474782, "learning_rate": 8.891637135709236e-06, "loss": 17.8863, "step": 13108 }, { "epoch": 0.23962198621748587, "grad_norm": 7.159367597370521, "learning_rate": 8.891451275500872e-06, "loss": 17.8805, "step": 13109 }, { "epoch": 0.2396402654139324, "grad_norm": 5.940165891535454, "learning_rate": 8.891265401653308e-06, "loss": 17.1754, "step": 13110 }, { "epoch": 0.23965854461037892, "grad_norm": 8.137977278209508, "learning_rate": 8.891079514167198e-06, "loss": 18.3722, "step": 13111 }, { "epoch": 0.23967682380682545, "grad_norm": 6.995921613837327, "learning_rate": 8.890893613043191e-06, "loss": 17.7443, "step": 13112 }, { "epoch": 0.239695103003272, "grad_norm": 6.8188755108798995, "learning_rate": 8.890707698281941e-06, "loss": 17.7557, "step": 13113 }, { "epoch": 0.2397133821997185, "grad_norm": 7.630031424376271, "learning_rate": 8.890521769884097e-06, "loss": 18.0336, "step": 13114 }, { "epoch": 0.23973166139616503, "grad_norm": 6.747272391453249, "learning_rate": 8.890335827850312e-06, "loss": 17.648, "step": 13115 }, { "epoch": 0.23974994059261154, "grad_norm": 5.524067398388494, "learning_rate": 8.890149872181237e-06, "loss": 17.2013, "step": 13116 }, { "epoch": 0.23976821978905807, "grad_norm": 7.001019747305786, "learning_rate": 8.889963902877525e-06, "loss": 17.6696, "step": 13117 }, { "epoch": 0.2397864989855046, "grad_norm": 5.892740280706896, "learning_rate": 8.889777919939827e-06, "loss": 17.3016, "step": 13118 }, { "epoch": 0.23980477818195112, "grad_norm": 6.4090602622592465, "learning_rate": 8.889591923368794e-06, "loss": 17.3847, "step": 13119 }, { "epoch": 0.23982305737839765, "grad_norm": 6.843988185794902, "learning_rate": 8.88940591316508e-06, "loss": 17.6176, "step": 13120 }, { "epoch": 0.23984133657484416, "grad_norm": 7.653232303178483, "learning_rate": 8.889219889329337e-06, "loss": 17.6849, "step": 13121 }, { "epoch": 0.2398596157712907, "grad_norm": 6.222701452795798, "learning_rate": 8.889033851862213e-06, "loss": 17.0545, "step": 13122 }, { "epoch": 0.23987789496773723, "grad_norm": 6.872948127642796, "learning_rate": 8.888847800764364e-06, "loss": 17.8578, "step": 13123 }, { "epoch": 0.23989617416418374, "grad_norm": 6.182030799781346, "learning_rate": 8.888661736036442e-06, "loss": 17.5718, "step": 13124 }, { "epoch": 0.23991445336063028, "grad_norm": 8.496963706036816, "learning_rate": 8.888475657679096e-06, "loss": 18.1289, "step": 13125 }, { "epoch": 0.23993273255707678, "grad_norm": 6.461940796817892, "learning_rate": 8.88828956569298e-06, "loss": 17.4589, "step": 13126 }, { "epoch": 0.23995101175352332, "grad_norm": 7.833861150936122, "learning_rate": 8.88810346007875e-06, "loss": 17.9519, "step": 13127 }, { "epoch": 0.23996929094996983, "grad_norm": 7.524091408967035, "learning_rate": 8.88791734083705e-06, "loss": 18.0065, "step": 13128 }, { "epoch": 0.23998757014641636, "grad_norm": 7.119184391647039, "learning_rate": 8.887731207968541e-06, "loss": 17.6692, "step": 13129 }, { "epoch": 0.2400058493428629, "grad_norm": 7.258629108754563, "learning_rate": 8.88754506147387e-06, "loss": 18.0561, "step": 13130 }, { "epoch": 0.2400241285393094, "grad_norm": 6.744296533148103, "learning_rate": 8.887358901353691e-06, "loss": 17.7762, "step": 13131 }, { "epoch": 0.24004240773575594, "grad_norm": 6.343742431230002, "learning_rate": 8.887172727608656e-06, "loss": 17.8432, "step": 13132 }, { "epoch": 0.24006068693220245, "grad_norm": 7.908862263357061, "learning_rate": 8.886986540239418e-06, "loss": 17.9585, "step": 13133 }, { "epoch": 0.24007896612864899, "grad_norm": 6.879068165449237, "learning_rate": 8.88680033924663e-06, "loss": 18.0544, "step": 13134 }, { "epoch": 0.24009724532509552, "grad_norm": 6.480055404882929, "learning_rate": 8.886614124630944e-06, "loss": 17.5558, "step": 13135 }, { "epoch": 0.24011552452154203, "grad_norm": 7.085668774874577, "learning_rate": 8.886427896393014e-06, "loss": 17.843, "step": 13136 }, { "epoch": 0.24013380371798856, "grad_norm": 6.890374086423882, "learning_rate": 8.886241654533489e-06, "loss": 17.7323, "step": 13137 }, { "epoch": 0.24015208291443507, "grad_norm": 5.987956008528032, "learning_rate": 8.886055399053023e-06, "loss": 17.2639, "step": 13138 }, { "epoch": 0.2401703621108816, "grad_norm": 6.744770214996133, "learning_rate": 8.885869129952273e-06, "loss": 17.5546, "step": 13139 }, { "epoch": 0.24018864130732814, "grad_norm": 6.60720983685564, "learning_rate": 8.88568284723189e-06, "loss": 17.6936, "step": 13140 }, { "epoch": 0.24020692050377465, "grad_norm": 7.8990858114798845, "learning_rate": 8.885496550892523e-06, "loss": 18.0575, "step": 13141 }, { "epoch": 0.2402251997002212, "grad_norm": 7.336673911300776, "learning_rate": 8.885310240934829e-06, "loss": 17.7049, "step": 13142 }, { "epoch": 0.2402434788966677, "grad_norm": 7.110353962766776, "learning_rate": 8.885123917359459e-06, "loss": 17.8205, "step": 13143 }, { "epoch": 0.24026175809311423, "grad_norm": 7.2817624107343555, "learning_rate": 8.884937580167069e-06, "loss": 18.0881, "step": 13144 }, { "epoch": 0.24028003728956074, "grad_norm": 6.346453683807578, "learning_rate": 8.884751229358307e-06, "loss": 17.5181, "step": 13145 }, { "epoch": 0.24029831648600727, "grad_norm": 9.293193205765766, "learning_rate": 8.884564864933831e-06, "loss": 18.3708, "step": 13146 }, { "epoch": 0.2403165956824538, "grad_norm": 8.133346329073769, "learning_rate": 8.884378486894292e-06, "loss": 18.2355, "step": 13147 }, { "epoch": 0.24033487487890032, "grad_norm": 7.132364080017355, "learning_rate": 8.884192095240342e-06, "loss": 17.7903, "step": 13148 }, { "epoch": 0.24035315407534685, "grad_norm": 8.38705214516052, "learning_rate": 8.884005689972638e-06, "loss": 18.0754, "step": 13149 }, { "epoch": 0.24037143327179336, "grad_norm": 6.907929538270149, "learning_rate": 8.883819271091829e-06, "loss": 17.922, "step": 13150 }, { "epoch": 0.2403897124682399, "grad_norm": 6.488311848475475, "learning_rate": 8.883632838598571e-06, "loss": 17.4053, "step": 13151 }, { "epoch": 0.24040799166468643, "grad_norm": 5.511859400811804, "learning_rate": 8.883446392493517e-06, "loss": 17.0804, "step": 13152 }, { "epoch": 0.24042627086113294, "grad_norm": 7.59345289195484, "learning_rate": 8.883259932777321e-06, "loss": 18.2424, "step": 13153 }, { "epoch": 0.24044455005757948, "grad_norm": 8.42928295970774, "learning_rate": 8.883073459450634e-06, "loss": 18.2357, "step": 13154 }, { "epoch": 0.24046282925402598, "grad_norm": 6.6186807129970635, "learning_rate": 8.882886972514115e-06, "loss": 17.8616, "step": 13155 }, { "epoch": 0.24048110845047252, "grad_norm": 5.684230765944608, "learning_rate": 8.88270047196841e-06, "loss": 16.9709, "step": 13156 }, { "epoch": 0.24049938764691906, "grad_norm": 7.766390927007206, "learning_rate": 8.882513957814181e-06, "loss": 18.3054, "step": 13157 }, { "epoch": 0.24051766684336556, "grad_norm": 8.166219961211254, "learning_rate": 8.882327430052073e-06, "loss": 17.8959, "step": 13158 }, { "epoch": 0.2405359460398121, "grad_norm": 7.978006879651882, "learning_rate": 8.882140888682749e-06, "loss": 18.1603, "step": 13159 }, { "epoch": 0.2405542252362586, "grad_norm": 4.8592994633480835, "learning_rate": 8.881954333706854e-06, "loss": 16.9342, "step": 13160 }, { "epoch": 0.24057250443270514, "grad_norm": 6.951007142144269, "learning_rate": 8.88176776512505e-06, "loss": 17.8005, "step": 13161 }, { "epoch": 0.24059078362915165, "grad_norm": 6.92737058163528, "learning_rate": 8.881581182937983e-06, "loss": 17.4765, "step": 13162 }, { "epoch": 0.24060906282559819, "grad_norm": 6.316452115981913, "learning_rate": 8.881394587146313e-06, "loss": 17.4542, "step": 13163 }, { "epoch": 0.24062734202204472, "grad_norm": 6.695363778855367, "learning_rate": 8.88120797775069e-06, "loss": 17.319, "step": 13164 }, { "epoch": 0.24064562121849123, "grad_norm": 7.128956621944359, "learning_rate": 8.881021354751772e-06, "loss": 17.9394, "step": 13165 }, { "epoch": 0.24066390041493776, "grad_norm": 6.611792036140452, "learning_rate": 8.880834718150209e-06, "loss": 17.4905, "step": 13166 }, { "epoch": 0.24068217961138427, "grad_norm": 7.183528477062133, "learning_rate": 8.880648067946658e-06, "loss": 17.7129, "step": 13167 }, { "epoch": 0.2407004588078308, "grad_norm": 8.30278988075915, "learning_rate": 8.880461404141771e-06, "loss": 18.2827, "step": 13168 }, { "epoch": 0.24071873800427734, "grad_norm": 8.699138208964596, "learning_rate": 8.880274726736204e-06, "loss": 18.3182, "step": 13169 }, { "epoch": 0.24073701720072385, "grad_norm": 6.688449049955284, "learning_rate": 8.880088035730611e-06, "loss": 17.6063, "step": 13170 }, { "epoch": 0.2407552963971704, "grad_norm": 6.367619050050093, "learning_rate": 8.879901331125649e-06, "loss": 17.463, "step": 13171 }, { "epoch": 0.2407735755936169, "grad_norm": 4.951303404699288, "learning_rate": 8.879714612921966e-06, "loss": 16.8762, "step": 13172 }, { "epoch": 0.24079185479006343, "grad_norm": 7.467414625288263, "learning_rate": 8.879527881120222e-06, "loss": 18.2681, "step": 13173 }, { "epoch": 0.24081013398650997, "grad_norm": 7.872630041482053, "learning_rate": 8.879341135721067e-06, "loss": 18.0894, "step": 13174 }, { "epoch": 0.24082841318295647, "grad_norm": 7.730099311866059, "learning_rate": 8.87915437672516e-06, "loss": 17.6484, "step": 13175 }, { "epoch": 0.240846692379403, "grad_norm": 5.927060754664423, "learning_rate": 8.878967604133153e-06, "loss": 17.2848, "step": 13176 }, { "epoch": 0.24086497157584952, "grad_norm": 7.020997094443355, "learning_rate": 8.878780817945701e-06, "loss": 18.143, "step": 13177 }, { "epoch": 0.24088325077229605, "grad_norm": 5.337990125998545, "learning_rate": 8.87859401816346e-06, "loss": 17.156, "step": 13178 }, { "epoch": 0.24090152996874256, "grad_norm": 7.799283810345035, "learning_rate": 8.87840720478708e-06, "loss": 18.2052, "step": 13179 }, { "epoch": 0.2409198091651891, "grad_norm": 7.465493378785632, "learning_rate": 8.878220377817222e-06, "loss": 18.0285, "step": 13180 }, { "epoch": 0.24093808836163563, "grad_norm": 6.178996919767709, "learning_rate": 8.878033537254537e-06, "loss": 17.4663, "step": 13181 }, { "epoch": 0.24095636755808214, "grad_norm": 5.900119250643999, "learning_rate": 8.877846683099684e-06, "loss": 17.1177, "step": 13182 }, { "epoch": 0.24097464675452868, "grad_norm": 8.19533820450328, "learning_rate": 8.877659815353313e-06, "loss": 17.7, "step": 13183 }, { "epoch": 0.24099292595097518, "grad_norm": 6.52315300706894, "learning_rate": 8.87747293401608e-06, "loss": 17.7248, "step": 13184 }, { "epoch": 0.24101120514742172, "grad_norm": 6.058365530682624, "learning_rate": 8.877286039088642e-06, "loss": 17.0972, "step": 13185 }, { "epoch": 0.24102948434386826, "grad_norm": 7.314506776774982, "learning_rate": 8.87709913057165e-06, "loss": 17.7921, "step": 13186 }, { "epoch": 0.24104776354031476, "grad_norm": 6.62011695634416, "learning_rate": 8.876912208465765e-06, "loss": 17.7254, "step": 13187 }, { "epoch": 0.2410660427367613, "grad_norm": 7.241284419157164, "learning_rate": 8.876725272771639e-06, "loss": 17.8527, "step": 13188 }, { "epoch": 0.2410843219332078, "grad_norm": 6.002350376615827, "learning_rate": 8.876538323489925e-06, "loss": 17.4492, "step": 13189 }, { "epoch": 0.24110260112965434, "grad_norm": 6.5183420354195585, "learning_rate": 8.876351360621283e-06, "loss": 17.6424, "step": 13190 }, { "epoch": 0.24112088032610088, "grad_norm": 7.2924557115800726, "learning_rate": 8.876164384166365e-06, "loss": 17.5475, "step": 13191 }, { "epoch": 0.24113915952254739, "grad_norm": 6.838292540672922, "learning_rate": 8.875977394125828e-06, "loss": 17.7877, "step": 13192 }, { "epoch": 0.24115743871899392, "grad_norm": 7.474611917261855, "learning_rate": 8.875790390500325e-06, "loss": 17.638, "step": 13193 }, { "epoch": 0.24117571791544043, "grad_norm": 6.373800063567433, "learning_rate": 8.875603373290515e-06, "loss": 17.3922, "step": 13194 }, { "epoch": 0.24119399711188697, "grad_norm": 6.205982421908158, "learning_rate": 8.875416342497049e-06, "loss": 17.4431, "step": 13195 }, { "epoch": 0.24121227630833347, "grad_norm": 6.899377166810302, "learning_rate": 8.875229298120587e-06, "loss": 17.8066, "step": 13196 }, { "epoch": 0.24123055550478, "grad_norm": 7.718711203766756, "learning_rate": 8.875042240161781e-06, "loss": 17.9228, "step": 13197 }, { "epoch": 0.24124883470122654, "grad_norm": 7.243355889180906, "learning_rate": 8.87485516862129e-06, "loss": 17.8304, "step": 13198 }, { "epoch": 0.24126711389767305, "grad_norm": 7.233421254394535, "learning_rate": 8.874668083499767e-06, "loss": 17.9284, "step": 13199 }, { "epoch": 0.2412853930941196, "grad_norm": 7.8699390802999725, "learning_rate": 8.874480984797869e-06, "loss": 17.888, "step": 13200 }, { "epoch": 0.2413036722905661, "grad_norm": 7.071979508139762, "learning_rate": 8.87429387251625e-06, "loss": 17.6058, "step": 13201 }, { "epoch": 0.24132195148701263, "grad_norm": 6.285841114328047, "learning_rate": 8.874106746655569e-06, "loss": 17.1742, "step": 13202 }, { "epoch": 0.24134023068345917, "grad_norm": 6.578547586790232, "learning_rate": 8.873919607216478e-06, "loss": 17.5148, "step": 13203 }, { "epoch": 0.24135850987990567, "grad_norm": 5.450388431257817, "learning_rate": 8.873732454199638e-06, "loss": 17.0852, "step": 13204 }, { "epoch": 0.2413767890763522, "grad_norm": 7.313273339527103, "learning_rate": 8.873545287605701e-06, "loss": 18.157, "step": 13205 }, { "epoch": 0.24139506827279872, "grad_norm": 6.348646613613565, "learning_rate": 8.873358107435322e-06, "loss": 17.5325, "step": 13206 }, { "epoch": 0.24141334746924525, "grad_norm": 6.498333129874512, "learning_rate": 8.87317091368916e-06, "loss": 17.3581, "step": 13207 }, { "epoch": 0.2414316266656918, "grad_norm": 6.580875241650784, "learning_rate": 8.87298370636787e-06, "loss": 17.6637, "step": 13208 }, { "epoch": 0.2414499058621383, "grad_norm": 6.1743400167801035, "learning_rate": 8.872796485472109e-06, "loss": 17.4734, "step": 13209 }, { "epoch": 0.24146818505858483, "grad_norm": 6.9045866598215335, "learning_rate": 8.87260925100253e-06, "loss": 17.6767, "step": 13210 }, { "epoch": 0.24148646425503134, "grad_norm": 7.419330066661811, "learning_rate": 8.872422002959792e-06, "loss": 17.8789, "step": 13211 }, { "epoch": 0.24150474345147788, "grad_norm": 6.687812864967737, "learning_rate": 8.872234741344553e-06, "loss": 17.4704, "step": 13212 }, { "epoch": 0.24152302264792438, "grad_norm": 7.046758839553343, "learning_rate": 8.872047466157467e-06, "loss": 17.7868, "step": 13213 }, { "epoch": 0.24154130184437092, "grad_norm": 8.749597595690418, "learning_rate": 8.87186017739919e-06, "loss": 18.1431, "step": 13214 }, { "epoch": 0.24155958104081746, "grad_norm": 7.802112272264556, "learning_rate": 8.871672875070378e-06, "loss": 17.8983, "step": 13215 }, { "epoch": 0.24157786023726396, "grad_norm": 6.5238253261698205, "learning_rate": 8.87148555917169e-06, "loss": 17.4987, "step": 13216 }, { "epoch": 0.2415961394337105, "grad_norm": 8.130165120182328, "learning_rate": 8.87129822970378e-06, "loss": 18.2404, "step": 13217 }, { "epoch": 0.241614418630157, "grad_norm": 5.819613447220564, "learning_rate": 8.871110886667307e-06, "loss": 17.2527, "step": 13218 }, { "epoch": 0.24163269782660354, "grad_norm": 6.428250895224231, "learning_rate": 8.870923530062925e-06, "loss": 17.473, "step": 13219 }, { "epoch": 0.24165097702305008, "grad_norm": 9.147063504535623, "learning_rate": 8.870736159891294e-06, "loss": 18.2164, "step": 13220 }, { "epoch": 0.2416692562194966, "grad_norm": 6.396228529713454, "learning_rate": 8.870548776153066e-06, "loss": 17.3975, "step": 13221 }, { "epoch": 0.24168753541594312, "grad_norm": 5.741575381771392, "learning_rate": 8.870361378848902e-06, "loss": 17.2317, "step": 13222 }, { "epoch": 0.24170581461238963, "grad_norm": 6.002851179289545, "learning_rate": 8.870173967979457e-06, "loss": 17.4199, "step": 13223 }, { "epoch": 0.24172409380883617, "grad_norm": 6.375433046931361, "learning_rate": 8.869986543545386e-06, "loss": 17.4255, "step": 13224 }, { "epoch": 0.2417423730052827, "grad_norm": 6.964841275569278, "learning_rate": 8.869799105547349e-06, "loss": 17.7085, "step": 13225 }, { "epoch": 0.2417606522017292, "grad_norm": 6.189593330134313, "learning_rate": 8.869611653986004e-06, "loss": 17.3318, "step": 13226 }, { "epoch": 0.24177893139817574, "grad_norm": 6.558356271458293, "learning_rate": 8.869424188862005e-06, "loss": 17.6748, "step": 13227 }, { "epoch": 0.24179721059462225, "grad_norm": 7.597312531011783, "learning_rate": 8.869236710176011e-06, "loss": 18.17, "step": 13228 }, { "epoch": 0.2418154897910688, "grad_norm": 8.952541142696836, "learning_rate": 8.869049217928675e-06, "loss": 18.4476, "step": 13229 }, { "epoch": 0.2418337689875153, "grad_norm": 5.988686992748572, "learning_rate": 8.86886171212066e-06, "loss": 17.1655, "step": 13230 }, { "epoch": 0.24185204818396183, "grad_norm": 6.461742159027759, "learning_rate": 8.868674192752617e-06, "loss": 17.3513, "step": 13231 }, { "epoch": 0.24187032738040837, "grad_norm": 6.431905079479456, "learning_rate": 8.86848665982521e-06, "loss": 17.3626, "step": 13232 }, { "epoch": 0.24188860657685488, "grad_norm": 7.974793074328844, "learning_rate": 8.868299113339093e-06, "loss": 17.9988, "step": 13233 }, { "epoch": 0.2419068857733014, "grad_norm": 7.704360688764585, "learning_rate": 8.868111553294922e-06, "loss": 18.0645, "step": 13234 }, { "epoch": 0.24192516496974792, "grad_norm": 5.54608407310233, "learning_rate": 8.867923979693355e-06, "loss": 17.1499, "step": 13235 }, { "epoch": 0.24194344416619445, "grad_norm": 7.503894412720114, "learning_rate": 8.867736392535051e-06, "loss": 17.714, "step": 13236 }, { "epoch": 0.241961723362641, "grad_norm": 6.077081447168073, "learning_rate": 8.867548791820669e-06, "loss": 17.3986, "step": 13237 }, { "epoch": 0.2419800025590875, "grad_norm": 7.31575454844323, "learning_rate": 8.86736117755086e-06, "loss": 18.2215, "step": 13238 }, { "epoch": 0.24199828175553403, "grad_norm": 7.196479914345386, "learning_rate": 8.867173549726288e-06, "loss": 17.7561, "step": 13239 }, { "epoch": 0.24201656095198054, "grad_norm": 6.805207069244284, "learning_rate": 8.866985908347608e-06, "loss": 17.7114, "step": 13240 }, { "epoch": 0.24203484014842708, "grad_norm": 5.460241965285942, "learning_rate": 8.866798253415477e-06, "loss": 17.1375, "step": 13241 }, { "epoch": 0.2420531193448736, "grad_norm": 6.996102130263047, "learning_rate": 8.866610584930555e-06, "loss": 17.9595, "step": 13242 }, { "epoch": 0.24207139854132012, "grad_norm": 6.57622767178465, "learning_rate": 8.866422902893497e-06, "loss": 17.8044, "step": 13243 }, { "epoch": 0.24208967773776666, "grad_norm": 7.344064393129531, "learning_rate": 8.866235207304963e-06, "loss": 17.5162, "step": 13244 }, { "epoch": 0.24210795693421316, "grad_norm": 6.509075212063092, "learning_rate": 8.86604749816561e-06, "loss": 17.4359, "step": 13245 }, { "epoch": 0.2421262361306597, "grad_norm": 6.909143099713661, "learning_rate": 8.865859775476096e-06, "loss": 17.8215, "step": 13246 }, { "epoch": 0.2421445153271062, "grad_norm": 6.543960356519501, "learning_rate": 8.865672039237079e-06, "loss": 17.7186, "step": 13247 }, { "epoch": 0.24216279452355274, "grad_norm": 6.254846658838982, "learning_rate": 8.865484289449218e-06, "loss": 17.3325, "step": 13248 }, { "epoch": 0.24218107371999928, "grad_norm": 7.641910661522797, "learning_rate": 8.865296526113167e-06, "loss": 17.8863, "step": 13249 }, { "epoch": 0.2421993529164458, "grad_norm": 5.761708675709654, "learning_rate": 8.86510874922959e-06, "loss": 17.3231, "step": 13250 }, { "epoch": 0.24221763211289232, "grad_norm": 7.396955615086446, "learning_rate": 8.864920958799141e-06, "loss": 18.1548, "step": 13251 }, { "epoch": 0.24223591130933883, "grad_norm": 6.385589888431112, "learning_rate": 8.86473315482248e-06, "loss": 17.4871, "step": 13252 }, { "epoch": 0.24225419050578537, "grad_norm": 7.515137772539918, "learning_rate": 8.864545337300264e-06, "loss": 18.0275, "step": 13253 }, { "epoch": 0.2422724697022319, "grad_norm": 7.713963158824052, "learning_rate": 8.864357506233153e-06, "loss": 17.8809, "step": 13254 }, { "epoch": 0.2422907488986784, "grad_norm": 7.668300760106162, "learning_rate": 8.864169661621803e-06, "loss": 17.8319, "step": 13255 }, { "epoch": 0.24230902809512495, "grad_norm": 7.517969916661936, "learning_rate": 8.863981803466875e-06, "loss": 17.7307, "step": 13256 }, { "epoch": 0.24232730729157145, "grad_norm": 6.106088600959224, "learning_rate": 8.863793931769024e-06, "loss": 17.4672, "step": 13257 }, { "epoch": 0.242345586488018, "grad_norm": 8.690410270027261, "learning_rate": 8.863606046528911e-06, "loss": 17.8399, "step": 13258 }, { "epoch": 0.24236386568446452, "grad_norm": 5.9146146761704514, "learning_rate": 8.863418147747196e-06, "loss": 17.5012, "step": 13259 }, { "epoch": 0.24238214488091103, "grad_norm": 6.576900680122438, "learning_rate": 8.863230235424536e-06, "loss": 17.61, "step": 13260 }, { "epoch": 0.24240042407735757, "grad_norm": 6.039999456418752, "learning_rate": 8.863042309561587e-06, "loss": 17.3064, "step": 13261 }, { "epoch": 0.24241870327380408, "grad_norm": 6.466453689308442, "learning_rate": 8.86285437015901e-06, "loss": 17.4131, "step": 13262 }, { "epoch": 0.2424369824702506, "grad_norm": 8.318876179177755, "learning_rate": 8.862666417217465e-06, "loss": 18.016, "step": 13263 }, { "epoch": 0.24245526166669712, "grad_norm": 6.039915497893999, "learning_rate": 8.862478450737609e-06, "loss": 17.3117, "step": 13264 }, { "epoch": 0.24247354086314366, "grad_norm": 7.369563502714089, "learning_rate": 8.862290470720101e-06, "loss": 17.8927, "step": 13265 }, { "epoch": 0.2424918200595902, "grad_norm": 6.822151712253876, "learning_rate": 8.862102477165599e-06, "loss": 17.4322, "step": 13266 }, { "epoch": 0.2425100992560367, "grad_norm": 6.346122680891925, "learning_rate": 8.861914470074765e-06, "loss": 17.6086, "step": 13267 }, { "epoch": 0.24252837845248323, "grad_norm": 7.771126110246708, "learning_rate": 8.861726449448255e-06, "loss": 18.0334, "step": 13268 }, { "epoch": 0.24254665764892974, "grad_norm": 7.316897383884866, "learning_rate": 8.861538415286727e-06, "loss": 17.8483, "step": 13269 }, { "epoch": 0.24256493684537628, "grad_norm": 7.346397828902137, "learning_rate": 8.861350367590845e-06, "loss": 17.6931, "step": 13270 }, { "epoch": 0.2425832160418228, "grad_norm": 7.626975587403114, "learning_rate": 8.861162306361263e-06, "loss": 17.9159, "step": 13271 }, { "epoch": 0.24260149523826932, "grad_norm": 6.737947123073347, "learning_rate": 8.860974231598645e-06, "loss": 17.3233, "step": 13272 }, { "epoch": 0.24261977443471586, "grad_norm": 7.72127351728017, "learning_rate": 8.860786143303645e-06, "loss": 17.8567, "step": 13273 }, { "epoch": 0.24263805363116236, "grad_norm": 5.858346090633954, "learning_rate": 8.860598041476924e-06, "loss": 17.2809, "step": 13274 }, { "epoch": 0.2426563328276089, "grad_norm": 6.938778930396184, "learning_rate": 8.860409926119142e-06, "loss": 17.655, "step": 13275 }, { "epoch": 0.24267461202405544, "grad_norm": 7.698228884170294, "learning_rate": 8.86022179723096e-06, "loss": 17.669, "step": 13276 }, { "epoch": 0.24269289122050194, "grad_norm": 5.775058615370471, "learning_rate": 8.860033654813033e-06, "loss": 17.0366, "step": 13277 }, { "epoch": 0.24271117041694848, "grad_norm": 6.537461079637749, "learning_rate": 8.859845498866027e-06, "loss": 17.527, "step": 13278 }, { "epoch": 0.242729449613395, "grad_norm": 5.8875767795931475, "learning_rate": 8.859657329390595e-06, "loss": 17.2091, "step": 13279 }, { "epoch": 0.24274772880984152, "grad_norm": 8.352655247672722, "learning_rate": 8.859469146387399e-06, "loss": 18.5289, "step": 13280 }, { "epoch": 0.24276600800628803, "grad_norm": 7.035080451229858, "learning_rate": 8.859280949857098e-06, "loss": 17.9262, "step": 13281 }, { "epoch": 0.24278428720273457, "grad_norm": 7.949003052893934, "learning_rate": 8.859092739800353e-06, "loss": 17.8076, "step": 13282 }, { "epoch": 0.2428025663991811, "grad_norm": 6.352949564202136, "learning_rate": 8.858904516217821e-06, "loss": 17.4697, "step": 13283 }, { "epoch": 0.2428208455956276, "grad_norm": 6.412127243721164, "learning_rate": 8.858716279110166e-06, "loss": 17.2088, "step": 13284 }, { "epoch": 0.24283912479207415, "grad_norm": 7.182949874929911, "learning_rate": 8.858528028478044e-06, "loss": 17.8308, "step": 13285 }, { "epoch": 0.24285740398852065, "grad_norm": 6.6943952253064865, "learning_rate": 8.858339764322118e-06, "loss": 17.285, "step": 13286 }, { "epoch": 0.2428756831849672, "grad_norm": 7.803722863692137, "learning_rate": 8.858151486643043e-06, "loss": 18.2402, "step": 13287 }, { "epoch": 0.24289396238141372, "grad_norm": 5.810079677159859, "learning_rate": 8.857963195441483e-06, "loss": 17.1884, "step": 13288 }, { "epoch": 0.24291224157786023, "grad_norm": 8.499056624784934, "learning_rate": 8.857774890718098e-06, "loss": 18.0358, "step": 13289 }, { "epoch": 0.24293052077430677, "grad_norm": 5.607226767592612, "learning_rate": 8.857586572473544e-06, "loss": 16.9867, "step": 13290 }, { "epoch": 0.24294879997075328, "grad_norm": 7.082798162482926, "learning_rate": 8.857398240708487e-06, "loss": 17.9252, "step": 13291 }, { "epoch": 0.2429670791671998, "grad_norm": 6.928844891573724, "learning_rate": 8.857209895423582e-06, "loss": 17.8962, "step": 13292 }, { "epoch": 0.24298535836364635, "grad_norm": 6.79816833825027, "learning_rate": 8.857021536619493e-06, "loss": 17.7566, "step": 13293 }, { "epoch": 0.24300363756009286, "grad_norm": 8.708056121991014, "learning_rate": 8.856833164296877e-06, "loss": 17.7842, "step": 13294 }, { "epoch": 0.2430219167565394, "grad_norm": 6.249069389945501, "learning_rate": 8.856644778456394e-06, "loss": 17.4322, "step": 13295 }, { "epoch": 0.2430401959529859, "grad_norm": 5.2644819088888015, "learning_rate": 8.856456379098707e-06, "loss": 16.7531, "step": 13296 }, { "epoch": 0.24305847514943243, "grad_norm": 6.690942598412263, "learning_rate": 8.856267966224474e-06, "loss": 17.544, "step": 13297 }, { "epoch": 0.24307675434587894, "grad_norm": 8.679717368008, "learning_rate": 8.856079539834357e-06, "loss": 18.7665, "step": 13298 }, { "epoch": 0.24309503354232548, "grad_norm": 6.965027228531672, "learning_rate": 8.855891099929017e-06, "loss": 17.8125, "step": 13299 }, { "epoch": 0.243113312738772, "grad_norm": 6.9312897841704615, "learning_rate": 8.855702646509113e-06, "loss": 17.747, "step": 13300 }, { "epoch": 0.24313159193521852, "grad_norm": 7.540184567771135, "learning_rate": 8.855514179575305e-06, "loss": 18.2535, "step": 13301 }, { "epoch": 0.24314987113166506, "grad_norm": 6.7747784609436135, "learning_rate": 8.855325699128255e-06, "loss": 17.7905, "step": 13302 }, { "epoch": 0.24316815032811157, "grad_norm": 6.1467852029172985, "learning_rate": 8.855137205168623e-06, "loss": 17.6317, "step": 13303 }, { "epoch": 0.2431864295245581, "grad_norm": 6.450889722139722, "learning_rate": 8.854948697697068e-06, "loss": 17.5033, "step": 13304 }, { "epoch": 0.24320470872100464, "grad_norm": 6.690876513263371, "learning_rate": 8.854760176714254e-06, "loss": 17.7083, "step": 13305 }, { "epoch": 0.24322298791745114, "grad_norm": 7.881204537273559, "learning_rate": 8.854571642220839e-06, "loss": 17.9876, "step": 13306 }, { "epoch": 0.24324126711389768, "grad_norm": 6.481490924340865, "learning_rate": 8.854383094217485e-06, "loss": 17.5346, "step": 13307 }, { "epoch": 0.2432595463103442, "grad_norm": 6.515330290429976, "learning_rate": 8.854194532704854e-06, "loss": 17.0989, "step": 13308 }, { "epoch": 0.24327782550679072, "grad_norm": 6.822772907358152, "learning_rate": 8.854005957683604e-06, "loss": 17.7118, "step": 13309 }, { "epoch": 0.24329610470323726, "grad_norm": 7.0022039778280885, "learning_rate": 8.8538173691544e-06, "loss": 17.6289, "step": 13310 }, { "epoch": 0.24331438389968377, "grad_norm": 6.884454555826788, "learning_rate": 8.853628767117899e-06, "loss": 17.6373, "step": 13311 }, { "epoch": 0.2433326630961303, "grad_norm": 6.250198007933299, "learning_rate": 8.853440151574762e-06, "loss": 17.3452, "step": 13312 }, { "epoch": 0.2433509422925768, "grad_norm": 7.231128093485779, "learning_rate": 8.853251522525655e-06, "loss": 17.971, "step": 13313 }, { "epoch": 0.24336922148902335, "grad_norm": 7.839890967614947, "learning_rate": 8.853062879971232e-06, "loss": 18.1151, "step": 13314 }, { "epoch": 0.24338750068546985, "grad_norm": 6.908170487681074, "learning_rate": 8.85287422391216e-06, "loss": 17.5089, "step": 13315 }, { "epoch": 0.2434057798819164, "grad_norm": 7.084431505572852, "learning_rate": 8.8526855543491e-06, "loss": 17.7467, "step": 13316 }, { "epoch": 0.24342405907836293, "grad_norm": 6.4107654527837195, "learning_rate": 8.852496871282707e-06, "loss": 17.8434, "step": 13317 }, { "epoch": 0.24344233827480943, "grad_norm": 6.668920080219149, "learning_rate": 8.85230817471365e-06, "loss": 17.7183, "step": 13318 }, { "epoch": 0.24346061747125597, "grad_norm": 6.298989301834971, "learning_rate": 8.852119464642586e-06, "loss": 17.5342, "step": 13319 }, { "epoch": 0.24347889666770248, "grad_norm": 5.739422634792878, "learning_rate": 8.851930741070179e-06, "loss": 17.2153, "step": 13320 }, { "epoch": 0.243497175864149, "grad_norm": 5.214420217905649, "learning_rate": 8.851742003997088e-06, "loss": 17.0749, "step": 13321 }, { "epoch": 0.24351545506059555, "grad_norm": 8.661960000088818, "learning_rate": 8.851553253423974e-06, "loss": 17.9828, "step": 13322 }, { "epoch": 0.24353373425704206, "grad_norm": 6.674174401769316, "learning_rate": 8.851364489351504e-06, "loss": 17.4714, "step": 13323 }, { "epoch": 0.2435520134534886, "grad_norm": 6.187393832054604, "learning_rate": 8.851175711780331e-06, "loss": 17.2636, "step": 13324 }, { "epoch": 0.2435702926499351, "grad_norm": 6.536215441237082, "learning_rate": 8.850986920711124e-06, "loss": 17.3158, "step": 13325 }, { "epoch": 0.24358857184638164, "grad_norm": 6.333101863390559, "learning_rate": 8.850798116144542e-06, "loss": 17.4345, "step": 13326 }, { "epoch": 0.24360685104282817, "grad_norm": 7.629197284006749, "learning_rate": 8.850609298081247e-06, "loss": 18.2452, "step": 13327 }, { "epoch": 0.24362513023927468, "grad_norm": 7.707916341218809, "learning_rate": 8.8504204665219e-06, "loss": 18.0921, "step": 13328 }, { "epoch": 0.24364340943572121, "grad_norm": 6.931500141022469, "learning_rate": 8.850231621467162e-06, "loss": 17.552, "step": 13329 }, { "epoch": 0.24366168863216772, "grad_norm": 6.7577127924474265, "learning_rate": 8.850042762917698e-06, "loss": 17.6586, "step": 13330 }, { "epoch": 0.24367996782861426, "grad_norm": 6.71347104740245, "learning_rate": 8.849853890874168e-06, "loss": 17.7506, "step": 13331 }, { "epoch": 0.24369824702506077, "grad_norm": 6.60361960143841, "learning_rate": 8.849665005337234e-06, "loss": 17.6244, "step": 13332 }, { "epoch": 0.2437165262215073, "grad_norm": 6.404002992694197, "learning_rate": 8.849476106307558e-06, "loss": 17.5198, "step": 13333 }, { "epoch": 0.24373480541795384, "grad_norm": 6.740798468982144, "learning_rate": 8.849287193785803e-06, "loss": 17.7947, "step": 13334 }, { "epoch": 0.24375308461440034, "grad_norm": 7.478272733324508, "learning_rate": 8.84909826777263e-06, "loss": 17.9603, "step": 13335 }, { "epoch": 0.24377136381084688, "grad_norm": 5.8534147617358165, "learning_rate": 8.848909328268702e-06, "loss": 17.2796, "step": 13336 }, { "epoch": 0.2437896430072934, "grad_norm": 6.2500079619499305, "learning_rate": 8.84872037527468e-06, "loss": 17.4809, "step": 13337 }, { "epoch": 0.24380792220373992, "grad_norm": 7.060013727147254, "learning_rate": 8.848531408791226e-06, "loss": 17.8596, "step": 13338 }, { "epoch": 0.24382620140018646, "grad_norm": 6.129256935432026, "learning_rate": 8.848342428819006e-06, "loss": 17.5985, "step": 13339 }, { "epoch": 0.24384448059663297, "grad_norm": 7.852104461299691, "learning_rate": 8.848153435358678e-06, "loss": 17.7454, "step": 13340 }, { "epoch": 0.2438627597930795, "grad_norm": 7.179842200790302, "learning_rate": 8.847964428410907e-06, "loss": 17.709, "step": 13341 }, { "epoch": 0.243881038989526, "grad_norm": 7.099405078928812, "learning_rate": 8.847775407976353e-06, "loss": 17.9011, "step": 13342 }, { "epoch": 0.24389931818597255, "grad_norm": 6.579104355020609, "learning_rate": 8.84758637405568e-06, "loss": 17.4511, "step": 13343 }, { "epoch": 0.24391759738241908, "grad_norm": 6.361251746274071, "learning_rate": 8.847397326649553e-06, "loss": 17.3535, "step": 13344 }, { "epoch": 0.2439358765788656, "grad_norm": 6.134572598632472, "learning_rate": 8.847208265758633e-06, "loss": 17.4643, "step": 13345 }, { "epoch": 0.24395415577531213, "grad_norm": 7.10404281793083, "learning_rate": 8.84701919138358e-06, "loss": 17.8232, "step": 13346 }, { "epoch": 0.24397243497175863, "grad_norm": 6.672163379254932, "learning_rate": 8.846830103525056e-06, "loss": 17.4976, "step": 13347 }, { "epoch": 0.24399071416820517, "grad_norm": 7.540641617800829, "learning_rate": 8.84664100218373e-06, "loss": 17.9285, "step": 13348 }, { "epoch": 0.24400899336465168, "grad_norm": 8.027690292367463, "learning_rate": 8.84645188736026e-06, "loss": 18.2729, "step": 13349 }, { "epoch": 0.2440272725610982, "grad_norm": 7.01108329933926, "learning_rate": 8.846262759055311e-06, "loss": 17.7341, "step": 13350 }, { "epoch": 0.24404555175754475, "grad_norm": 7.618609732374895, "learning_rate": 8.846073617269542e-06, "loss": 17.7382, "step": 13351 }, { "epoch": 0.24406383095399126, "grad_norm": 7.669566130584318, "learning_rate": 8.84588446200362e-06, "loss": 17.8425, "step": 13352 }, { "epoch": 0.2440821101504378, "grad_norm": 7.6072715587573345, "learning_rate": 8.845695293258207e-06, "loss": 18.1951, "step": 13353 }, { "epoch": 0.2441003893468843, "grad_norm": 6.899448402243731, "learning_rate": 8.845506111033966e-06, "loss": 17.5865, "step": 13354 }, { "epoch": 0.24411866854333084, "grad_norm": 6.901240507089418, "learning_rate": 8.84531691533156e-06, "loss": 17.8634, "step": 13355 }, { "epoch": 0.24413694773977737, "grad_norm": 8.183313145335115, "learning_rate": 8.84512770615165e-06, "loss": 16.903, "step": 13356 }, { "epoch": 0.24415522693622388, "grad_norm": 7.5261891727210095, "learning_rate": 8.844938483494905e-06, "loss": 17.9907, "step": 13357 }, { "epoch": 0.24417350613267041, "grad_norm": 7.149270791029402, "learning_rate": 8.844749247361982e-06, "loss": 17.6361, "step": 13358 }, { "epoch": 0.24419178532911692, "grad_norm": 10.214463600002174, "learning_rate": 8.844559997753546e-06, "loss": 18.5494, "step": 13359 }, { "epoch": 0.24421006452556346, "grad_norm": 7.225317121006948, "learning_rate": 8.84437073467026e-06, "loss": 17.7026, "step": 13360 }, { "epoch": 0.24422834372201, "grad_norm": 7.632819658495426, "learning_rate": 8.844181458112791e-06, "loss": 17.9165, "step": 13361 }, { "epoch": 0.2442466229184565, "grad_norm": 7.972366299924804, "learning_rate": 8.843992168081796e-06, "loss": 18.0311, "step": 13362 }, { "epoch": 0.24426490211490304, "grad_norm": 6.2756417482546905, "learning_rate": 8.843802864577944e-06, "loss": 17.5389, "step": 13363 }, { "epoch": 0.24428318131134955, "grad_norm": 8.193139934904673, "learning_rate": 8.843613547601896e-06, "loss": 18.3795, "step": 13364 }, { "epoch": 0.24430146050779608, "grad_norm": 6.389396940466, "learning_rate": 8.843424217154318e-06, "loss": 17.7544, "step": 13365 }, { "epoch": 0.2443197397042426, "grad_norm": 7.160598777970506, "learning_rate": 8.843234873235869e-06, "loss": 17.7244, "step": 13366 }, { "epoch": 0.24433801890068912, "grad_norm": 6.64930968727387, "learning_rate": 8.843045515847217e-06, "loss": 17.5518, "step": 13367 }, { "epoch": 0.24435629809713566, "grad_norm": 5.671549508002502, "learning_rate": 8.842856144989023e-06, "loss": 17.1291, "step": 13368 }, { "epoch": 0.24437457729358217, "grad_norm": 7.902320979850315, "learning_rate": 8.842666760661951e-06, "loss": 17.6099, "step": 13369 }, { "epoch": 0.2443928564900287, "grad_norm": 5.965673149399645, "learning_rate": 8.842477362866667e-06, "loss": 17.2664, "step": 13370 }, { "epoch": 0.2444111356864752, "grad_norm": 5.644593567042512, "learning_rate": 8.842287951603833e-06, "loss": 17.3299, "step": 13371 }, { "epoch": 0.24442941488292175, "grad_norm": 6.662397886446397, "learning_rate": 8.842098526874113e-06, "loss": 17.4437, "step": 13372 }, { "epoch": 0.24444769407936828, "grad_norm": 7.231356628187275, "learning_rate": 8.841909088678172e-06, "loss": 17.954, "step": 13373 }, { "epoch": 0.2444659732758148, "grad_norm": 6.770349758066719, "learning_rate": 8.84171963701667e-06, "loss": 17.6923, "step": 13374 }, { "epoch": 0.24448425247226133, "grad_norm": 7.465032746595875, "learning_rate": 8.841530171890275e-06, "loss": 18.0996, "step": 13375 }, { "epoch": 0.24450253166870783, "grad_norm": 6.4393473099515575, "learning_rate": 8.841340693299653e-06, "loss": 17.4858, "step": 13376 }, { "epoch": 0.24452081086515437, "grad_norm": 7.163912942037377, "learning_rate": 8.841151201245462e-06, "loss": 17.8614, "step": 13377 }, { "epoch": 0.2445390900616009, "grad_norm": 7.405374688845051, "learning_rate": 8.840961695728372e-06, "loss": 17.8952, "step": 13378 }, { "epoch": 0.2445573692580474, "grad_norm": 5.637338579282636, "learning_rate": 8.840772176749042e-06, "loss": 17.0259, "step": 13379 }, { "epoch": 0.24457564845449395, "grad_norm": 6.369503905085804, "learning_rate": 8.84058264430814e-06, "loss": 17.3215, "step": 13380 }, { "epoch": 0.24459392765094046, "grad_norm": 5.335592655839366, "learning_rate": 8.84039309840633e-06, "loss": 17.1554, "step": 13381 }, { "epoch": 0.244612206847387, "grad_norm": 6.716870205319661, "learning_rate": 8.840203539044273e-06, "loss": 17.7323, "step": 13382 }, { "epoch": 0.2446304860438335, "grad_norm": 7.826668518376646, "learning_rate": 8.840013966222638e-06, "loss": 17.7759, "step": 13383 }, { "epoch": 0.24464876524028004, "grad_norm": 6.812892731960148, "learning_rate": 8.839824379942089e-06, "loss": 17.7177, "step": 13384 }, { "epoch": 0.24466704443672657, "grad_norm": 5.63299034570083, "learning_rate": 8.839634780203285e-06, "loss": 17.0862, "step": 13385 }, { "epoch": 0.24468532363317308, "grad_norm": 7.337248803763474, "learning_rate": 8.839445167006894e-06, "loss": 18.2525, "step": 13386 }, { "epoch": 0.24470360282961962, "grad_norm": 6.918295093298403, "learning_rate": 8.839255540353583e-06, "loss": 17.6056, "step": 13387 }, { "epoch": 0.24472188202606612, "grad_norm": 7.020567883012543, "learning_rate": 8.839065900244015e-06, "loss": 18.1106, "step": 13388 }, { "epoch": 0.24474016122251266, "grad_norm": 6.150241307155845, "learning_rate": 8.838876246678854e-06, "loss": 17.4877, "step": 13389 }, { "epoch": 0.2447584404189592, "grad_norm": 7.208694299041957, "learning_rate": 8.838686579658763e-06, "loss": 18.076, "step": 13390 }, { "epoch": 0.2447767196154057, "grad_norm": 5.86685802657085, "learning_rate": 8.83849689918441e-06, "loss": 17.3161, "step": 13391 }, { "epoch": 0.24479499881185224, "grad_norm": 7.405617494174632, "learning_rate": 8.838307205256458e-06, "loss": 17.7358, "step": 13392 }, { "epoch": 0.24481327800829875, "grad_norm": 8.410389976756395, "learning_rate": 8.838117497875572e-06, "loss": 18.2559, "step": 13393 }, { "epoch": 0.24483155720474528, "grad_norm": 7.882687619487962, "learning_rate": 8.83792777704242e-06, "loss": 17.7044, "step": 13394 }, { "epoch": 0.24484983640119182, "grad_norm": 5.986235546857629, "learning_rate": 8.83773804275766e-06, "loss": 17.1556, "step": 13395 }, { "epoch": 0.24486811559763833, "grad_norm": 6.177255456277056, "learning_rate": 8.837548295021963e-06, "loss": 17.2895, "step": 13396 }, { "epoch": 0.24488639479408486, "grad_norm": 6.904485474398583, "learning_rate": 8.837358533835992e-06, "loss": 17.6568, "step": 13397 }, { "epoch": 0.24490467399053137, "grad_norm": 7.28159790149412, "learning_rate": 8.837168759200413e-06, "loss": 18.0456, "step": 13398 }, { "epoch": 0.2449229531869779, "grad_norm": 7.663327211935231, "learning_rate": 8.83697897111589e-06, "loss": 18.1913, "step": 13399 }, { "epoch": 0.2449412323834244, "grad_norm": 8.820204283182152, "learning_rate": 8.836789169583089e-06, "loss": 18.0061, "step": 13400 }, { "epoch": 0.24495951157987095, "grad_norm": 8.317022410557518, "learning_rate": 8.836599354602674e-06, "loss": 17.9331, "step": 13401 }, { "epoch": 0.24497779077631748, "grad_norm": 6.1434665356788205, "learning_rate": 8.836409526175314e-06, "loss": 17.0856, "step": 13402 }, { "epoch": 0.244996069972764, "grad_norm": 6.046074046650978, "learning_rate": 8.836219684301667e-06, "loss": 17.3953, "step": 13403 }, { "epoch": 0.24501434916921053, "grad_norm": 6.058140723586201, "learning_rate": 8.836029828982407e-06, "loss": 17.609, "step": 13404 }, { "epoch": 0.24503262836565703, "grad_norm": 7.067027057229567, "learning_rate": 8.835839960218193e-06, "loss": 17.7556, "step": 13405 }, { "epoch": 0.24505090756210357, "grad_norm": 7.224133713204975, "learning_rate": 8.835650078009694e-06, "loss": 17.7724, "step": 13406 }, { "epoch": 0.2450691867585501, "grad_norm": 5.508434191393395, "learning_rate": 8.835460182357573e-06, "loss": 17.1609, "step": 13407 }, { "epoch": 0.24508746595499661, "grad_norm": 6.611146244016963, "learning_rate": 8.835270273262498e-06, "loss": 17.6224, "step": 13408 }, { "epoch": 0.24510574515144315, "grad_norm": 6.309339124168227, "learning_rate": 8.835080350725133e-06, "loss": 17.4649, "step": 13409 }, { "epoch": 0.24512402434788966, "grad_norm": 7.618887437895719, "learning_rate": 8.834890414746144e-06, "loss": 17.9619, "step": 13410 }, { "epoch": 0.2451423035443362, "grad_norm": 5.807315268732273, "learning_rate": 8.834700465326198e-06, "loss": 17.3501, "step": 13411 }, { "epoch": 0.24516058274078273, "grad_norm": 5.200649025143558, "learning_rate": 8.834510502465959e-06, "loss": 17.21, "step": 13412 }, { "epoch": 0.24517886193722924, "grad_norm": 7.39514446397721, "learning_rate": 8.834320526166092e-06, "loss": 17.6912, "step": 13413 }, { "epoch": 0.24519714113367577, "grad_norm": 7.080208674680301, "learning_rate": 8.834130536427266e-06, "loss": 17.8667, "step": 13414 }, { "epoch": 0.24521542033012228, "grad_norm": 6.669847957111236, "learning_rate": 8.833940533250146e-06, "loss": 17.5223, "step": 13415 }, { "epoch": 0.24523369952656882, "grad_norm": 6.999556313077716, "learning_rate": 8.833750516635395e-06, "loss": 17.6839, "step": 13416 }, { "epoch": 0.24525197872301532, "grad_norm": 7.493540137187102, "learning_rate": 8.83356048658368e-06, "loss": 17.5081, "step": 13417 }, { "epoch": 0.24527025791946186, "grad_norm": 5.980800051120309, "learning_rate": 8.833370443095671e-06, "loss": 17.5579, "step": 13418 }, { "epoch": 0.2452885371159084, "grad_norm": 6.675804185575348, "learning_rate": 8.833180386172032e-06, "loss": 17.6918, "step": 13419 }, { "epoch": 0.2453068163123549, "grad_norm": 5.669189846765466, "learning_rate": 8.832990315813425e-06, "loss": 16.9523, "step": 13420 }, { "epoch": 0.24532509550880144, "grad_norm": 8.225106534681808, "learning_rate": 8.832800232020521e-06, "loss": 18.0924, "step": 13421 }, { "epoch": 0.24534337470524795, "grad_norm": 6.543938118391621, "learning_rate": 8.832610134793985e-06, "loss": 17.3952, "step": 13422 }, { "epoch": 0.24536165390169448, "grad_norm": 6.19665585703796, "learning_rate": 8.832420024134484e-06, "loss": 17.3581, "step": 13423 }, { "epoch": 0.24537993309814102, "grad_norm": 7.108267611247485, "learning_rate": 8.83222990004268e-06, "loss": 17.6781, "step": 13424 }, { "epoch": 0.24539821229458753, "grad_norm": 7.422004670472646, "learning_rate": 8.832039762519247e-06, "loss": 18.1953, "step": 13425 }, { "epoch": 0.24541649149103406, "grad_norm": 6.954008379135574, "learning_rate": 8.831849611564845e-06, "loss": 17.3529, "step": 13426 }, { "epoch": 0.24543477068748057, "grad_norm": 5.8034365000088215, "learning_rate": 8.831659447180143e-06, "loss": 17.3271, "step": 13427 }, { "epoch": 0.2454530498839271, "grad_norm": 7.666960779097515, "learning_rate": 8.831469269365808e-06, "loss": 18.2337, "step": 13428 }, { "epoch": 0.24547132908037364, "grad_norm": 7.806620772604487, "learning_rate": 8.831279078122505e-06, "loss": 18.0384, "step": 13429 }, { "epoch": 0.24548960827682015, "grad_norm": 5.910725999860661, "learning_rate": 8.831088873450902e-06, "loss": 17.5923, "step": 13430 }, { "epoch": 0.24550788747326668, "grad_norm": 6.647740532577579, "learning_rate": 8.830898655351663e-06, "loss": 17.7018, "step": 13431 }, { "epoch": 0.2455261666697132, "grad_norm": 6.381486614235224, "learning_rate": 8.830708423825458e-06, "loss": 17.5859, "step": 13432 }, { "epoch": 0.24554444586615973, "grad_norm": 6.394719458885018, "learning_rate": 8.83051817887295e-06, "loss": 17.3302, "step": 13433 }, { "epoch": 0.24556272506260624, "grad_norm": 6.066904758483784, "learning_rate": 8.830327920494812e-06, "loss": 17.4341, "step": 13434 }, { "epoch": 0.24558100425905277, "grad_norm": 7.070178994718958, "learning_rate": 8.830137648691705e-06, "loss": 17.9941, "step": 13435 }, { "epoch": 0.2455992834554993, "grad_norm": 6.085682839386033, "learning_rate": 8.8299473634643e-06, "loss": 17.2157, "step": 13436 }, { "epoch": 0.24561756265194581, "grad_norm": 6.91618577276595, "learning_rate": 8.829757064813257e-06, "loss": 17.8577, "step": 13437 }, { "epoch": 0.24563584184839235, "grad_norm": 7.0115887406466975, "learning_rate": 8.829566752739252e-06, "loss": 17.8506, "step": 13438 }, { "epoch": 0.24565412104483886, "grad_norm": 6.654487020029335, "learning_rate": 8.829376427242948e-06, "loss": 17.6821, "step": 13439 }, { "epoch": 0.2456724002412854, "grad_norm": 7.002473609128466, "learning_rate": 8.82918608832501e-06, "loss": 17.5262, "step": 13440 }, { "epoch": 0.24569067943773193, "grad_norm": 6.400880240180771, "learning_rate": 8.828995735986107e-06, "loss": 17.5627, "step": 13441 }, { "epoch": 0.24570895863417844, "grad_norm": 7.434675361462428, "learning_rate": 8.828805370226906e-06, "loss": 18.0045, "step": 13442 }, { "epoch": 0.24572723783062497, "grad_norm": 7.331143894515387, "learning_rate": 8.828614991048076e-06, "loss": 17.7971, "step": 13443 }, { "epoch": 0.24574551702707148, "grad_norm": 7.619034032513552, "learning_rate": 8.828424598450282e-06, "loss": 18.0238, "step": 13444 }, { "epoch": 0.24576379622351802, "grad_norm": 7.39259731663535, "learning_rate": 8.828234192434192e-06, "loss": 17.5899, "step": 13445 }, { "epoch": 0.24578207541996455, "grad_norm": 6.776273380382308, "learning_rate": 8.828043773000474e-06, "loss": 17.765, "step": 13446 }, { "epoch": 0.24580035461641106, "grad_norm": 6.252730432829984, "learning_rate": 8.827853340149794e-06, "loss": 17.5249, "step": 13447 }, { "epoch": 0.2458186338128576, "grad_norm": 7.435038084033365, "learning_rate": 8.82766289388282e-06, "loss": 17.8393, "step": 13448 }, { "epoch": 0.2458369130093041, "grad_norm": 7.802496312818711, "learning_rate": 8.827472434200219e-06, "loss": 17.8409, "step": 13449 }, { "epoch": 0.24585519220575064, "grad_norm": 7.202308153481494, "learning_rate": 8.82728196110266e-06, "loss": 18.0122, "step": 13450 }, { "epoch": 0.24587347140219715, "grad_norm": 6.924654009065137, "learning_rate": 8.827091474590811e-06, "loss": 17.8219, "step": 13451 }, { "epoch": 0.24589175059864368, "grad_norm": 7.404016704344671, "learning_rate": 8.826900974665337e-06, "loss": 17.8862, "step": 13452 }, { "epoch": 0.24591002979509022, "grad_norm": 6.6462088088911875, "learning_rate": 8.826710461326908e-06, "loss": 17.6568, "step": 13453 }, { "epoch": 0.24592830899153673, "grad_norm": 7.39997599394969, "learning_rate": 8.826519934576192e-06, "loss": 17.5758, "step": 13454 }, { "epoch": 0.24594658818798326, "grad_norm": 6.15160971977275, "learning_rate": 8.826329394413855e-06, "loss": 17.3852, "step": 13455 }, { "epoch": 0.24596486738442977, "grad_norm": 7.913428805973735, "learning_rate": 8.826138840840563e-06, "loss": 17.6378, "step": 13456 }, { "epoch": 0.2459831465808763, "grad_norm": 8.847662828382003, "learning_rate": 8.825948273856988e-06, "loss": 18.1707, "step": 13457 }, { "epoch": 0.24600142577732284, "grad_norm": 5.690044873405773, "learning_rate": 8.825757693463797e-06, "loss": 17.1624, "step": 13458 }, { "epoch": 0.24601970497376935, "grad_norm": 7.173770483159854, "learning_rate": 8.825567099661656e-06, "loss": 17.9517, "step": 13459 }, { "epoch": 0.24603798417021588, "grad_norm": 5.671500016403455, "learning_rate": 8.825376492451237e-06, "loss": 17.2507, "step": 13460 }, { "epoch": 0.2460562633666624, "grad_norm": 5.760964048052998, "learning_rate": 8.825185871833203e-06, "loss": 17.3155, "step": 13461 }, { "epoch": 0.24607454256310893, "grad_norm": 6.8402574657905255, "learning_rate": 8.824995237808224e-06, "loss": 17.7137, "step": 13462 }, { "epoch": 0.24609282175955546, "grad_norm": 6.924451096801156, "learning_rate": 8.82480459037697e-06, "loss": 17.8487, "step": 13463 }, { "epoch": 0.24611110095600197, "grad_norm": 6.399619906996067, "learning_rate": 8.824613929540107e-06, "loss": 17.5705, "step": 13464 }, { "epoch": 0.2461293801524485, "grad_norm": 5.980177457081154, "learning_rate": 8.824423255298305e-06, "loss": 17.3176, "step": 13465 }, { "epoch": 0.24614765934889501, "grad_norm": 6.200361250393343, "learning_rate": 8.824232567652232e-06, "loss": 17.407, "step": 13466 }, { "epoch": 0.24616593854534155, "grad_norm": 7.278235109275367, "learning_rate": 8.824041866602554e-06, "loss": 17.62, "step": 13467 }, { "epoch": 0.24618421774178806, "grad_norm": 6.757109365456597, "learning_rate": 8.823851152149941e-06, "loss": 18.0304, "step": 13468 }, { "epoch": 0.2462024969382346, "grad_norm": 6.254819297715207, "learning_rate": 8.823660424295062e-06, "loss": 17.5373, "step": 13469 }, { "epoch": 0.24622077613468113, "grad_norm": 6.996033752640962, "learning_rate": 8.823469683038583e-06, "loss": 17.8082, "step": 13470 }, { "epoch": 0.24623905533112764, "grad_norm": 6.9903218827039915, "learning_rate": 8.823278928381179e-06, "loss": 17.9962, "step": 13471 }, { "epoch": 0.24625733452757417, "grad_norm": 6.974909060118697, "learning_rate": 8.82308816032351e-06, "loss": 17.6751, "step": 13472 }, { "epoch": 0.24627561372402068, "grad_norm": 7.311919385715046, "learning_rate": 8.822897378866252e-06, "loss": 17.8247, "step": 13473 }, { "epoch": 0.24629389292046722, "grad_norm": 8.234798677547689, "learning_rate": 8.822706584010068e-06, "loss": 18.0569, "step": 13474 }, { "epoch": 0.24631217211691375, "grad_norm": 7.492414245740768, "learning_rate": 8.82251577575563e-06, "loss": 17.7471, "step": 13475 }, { "epoch": 0.24633045131336026, "grad_norm": 6.416032807014928, "learning_rate": 8.822324954103606e-06, "loss": 17.2773, "step": 13476 }, { "epoch": 0.2463487305098068, "grad_norm": 6.605216545214185, "learning_rate": 8.822134119054665e-06, "loss": 17.3079, "step": 13477 }, { "epoch": 0.2463670097062533, "grad_norm": 7.473347962662394, "learning_rate": 8.821943270609475e-06, "loss": 18.0302, "step": 13478 }, { "epoch": 0.24638528890269984, "grad_norm": 6.399162085666547, "learning_rate": 8.821752408768706e-06, "loss": 17.5172, "step": 13479 }, { "epoch": 0.24640356809914638, "grad_norm": 7.468345587531287, "learning_rate": 8.821561533533026e-06, "loss": 17.6476, "step": 13480 }, { "epoch": 0.24642184729559288, "grad_norm": 7.025834326555146, "learning_rate": 8.821370644903105e-06, "loss": 17.8186, "step": 13481 }, { "epoch": 0.24644012649203942, "grad_norm": 6.223068864164905, "learning_rate": 8.821179742879611e-06, "loss": 17.3983, "step": 13482 }, { "epoch": 0.24645840568848593, "grad_norm": 7.284991771763925, "learning_rate": 8.820988827463213e-06, "loss": 17.8984, "step": 13483 }, { "epoch": 0.24647668488493246, "grad_norm": 5.936303452699871, "learning_rate": 8.820797898654581e-06, "loss": 17.1977, "step": 13484 }, { "epoch": 0.24649496408137897, "grad_norm": 5.221180502955405, "learning_rate": 8.820606956454386e-06, "loss": 16.9444, "step": 13485 }, { "epoch": 0.2465132432778255, "grad_norm": 6.811217939005427, "learning_rate": 8.820416000863292e-06, "loss": 17.898, "step": 13486 }, { "epoch": 0.24653152247427204, "grad_norm": 8.177478662926852, "learning_rate": 8.820225031881974e-06, "loss": 18.3779, "step": 13487 }, { "epoch": 0.24654980167071855, "grad_norm": 7.4405321701852145, "learning_rate": 8.820034049511097e-06, "loss": 18.1126, "step": 13488 }, { "epoch": 0.24656808086716508, "grad_norm": 6.192990290104176, "learning_rate": 8.819843053751334e-06, "loss": 17.7177, "step": 13489 }, { "epoch": 0.2465863600636116, "grad_norm": 6.153071832176295, "learning_rate": 8.81965204460335e-06, "loss": 17.5496, "step": 13490 }, { "epoch": 0.24660463926005813, "grad_norm": 6.078233706862764, "learning_rate": 8.81946102206782e-06, "loss": 17.4564, "step": 13491 }, { "epoch": 0.24662291845650466, "grad_norm": 8.38149022730245, "learning_rate": 8.819269986145407e-06, "loss": 18.0966, "step": 13492 }, { "epoch": 0.24664119765295117, "grad_norm": 6.9040545767910455, "learning_rate": 8.819078936836786e-06, "loss": 17.6468, "step": 13493 }, { "epoch": 0.2466594768493977, "grad_norm": 6.477670606043992, "learning_rate": 8.818887874142625e-06, "loss": 17.3782, "step": 13494 }, { "epoch": 0.24667775604584422, "grad_norm": 6.599624140942617, "learning_rate": 8.818696798063594e-06, "loss": 17.4778, "step": 13495 }, { "epoch": 0.24669603524229075, "grad_norm": 6.918902171740237, "learning_rate": 8.818505708600363e-06, "loss": 18.0571, "step": 13496 }, { "epoch": 0.2467143144387373, "grad_norm": 6.167114363379106, "learning_rate": 8.818314605753598e-06, "loss": 17.5789, "step": 13497 }, { "epoch": 0.2467325936351838, "grad_norm": 5.4991456389946025, "learning_rate": 8.818123489523973e-06, "loss": 17.0248, "step": 13498 }, { "epoch": 0.24675087283163033, "grad_norm": 10.464567584802637, "learning_rate": 8.817932359912156e-06, "loss": 17.6705, "step": 13499 }, { "epoch": 0.24676915202807684, "grad_norm": 6.058815033485125, "learning_rate": 8.817741216918818e-06, "loss": 17.3583, "step": 13500 }, { "epoch": 0.24678743122452337, "grad_norm": 7.607334550198356, "learning_rate": 8.81755006054463e-06, "loss": 17.8198, "step": 13501 }, { "epoch": 0.24680571042096988, "grad_norm": 6.781707046645076, "learning_rate": 8.817358890790256e-06, "loss": 17.5437, "step": 13502 }, { "epoch": 0.24682398961741642, "grad_norm": 6.236260480634458, "learning_rate": 8.817167707656373e-06, "loss": 17.4142, "step": 13503 }, { "epoch": 0.24684226881386295, "grad_norm": 7.156356990978949, "learning_rate": 8.816976511143648e-06, "loss": 17.8336, "step": 13504 }, { "epoch": 0.24686054801030946, "grad_norm": 7.385429118551683, "learning_rate": 8.816785301252752e-06, "loss": 17.8506, "step": 13505 }, { "epoch": 0.246878827206756, "grad_norm": 7.008744010798375, "learning_rate": 8.816594077984355e-06, "loss": 17.7951, "step": 13506 }, { "epoch": 0.2468971064032025, "grad_norm": 6.027277764001174, "learning_rate": 8.816402841339125e-06, "loss": 17.3543, "step": 13507 }, { "epoch": 0.24691538559964904, "grad_norm": 7.306801806072262, "learning_rate": 8.816211591317736e-06, "loss": 18.0441, "step": 13508 }, { "epoch": 0.24693366479609558, "grad_norm": 8.105265740723043, "learning_rate": 8.816020327920855e-06, "loss": 18.2235, "step": 13509 }, { "epoch": 0.24695194399254208, "grad_norm": 6.669435866966423, "learning_rate": 8.815829051149156e-06, "loss": 17.3582, "step": 13510 }, { "epoch": 0.24697022318898862, "grad_norm": 8.120380399622045, "learning_rate": 8.815637761003306e-06, "loss": 17.9397, "step": 13511 }, { "epoch": 0.24698850238543513, "grad_norm": 7.940693990461981, "learning_rate": 8.815446457483977e-06, "loss": 17.9778, "step": 13512 }, { "epoch": 0.24700678158188166, "grad_norm": 6.139624392103352, "learning_rate": 8.81525514059184e-06, "loss": 17.4869, "step": 13513 }, { "epoch": 0.2470250607783282, "grad_norm": 6.140321004105239, "learning_rate": 8.815063810327564e-06, "loss": 17.4739, "step": 13514 }, { "epoch": 0.2470433399747747, "grad_norm": 7.220012039096887, "learning_rate": 8.81487246669182e-06, "loss": 17.9978, "step": 13515 }, { "epoch": 0.24706161917122124, "grad_norm": 6.052610323392659, "learning_rate": 8.81468110968528e-06, "loss": 17.4726, "step": 13516 }, { "epoch": 0.24707989836766775, "grad_norm": 7.2392828573847945, "learning_rate": 8.814489739308613e-06, "loss": 17.749, "step": 13517 }, { "epoch": 0.24709817756411429, "grad_norm": 9.178255731900403, "learning_rate": 8.81429835556249e-06, "loss": 18.6995, "step": 13518 }, { "epoch": 0.2471164567605608, "grad_norm": 6.151055631501488, "learning_rate": 8.814106958447584e-06, "loss": 17.5311, "step": 13519 }, { "epoch": 0.24713473595700733, "grad_norm": 6.351774958778554, "learning_rate": 8.813915547964561e-06, "loss": 17.5953, "step": 13520 }, { "epoch": 0.24715301515345386, "grad_norm": 6.20297852456471, "learning_rate": 8.813724124114099e-06, "loss": 17.3503, "step": 13521 }, { "epoch": 0.24717129434990037, "grad_norm": 7.4313810432804575, "learning_rate": 8.813532686896861e-06, "loss": 18.0759, "step": 13522 }, { "epoch": 0.2471895735463469, "grad_norm": 5.945238925825888, "learning_rate": 8.813341236313527e-06, "loss": 17.5475, "step": 13523 }, { "epoch": 0.24720785274279342, "grad_norm": 8.513827079892803, "learning_rate": 8.813149772364758e-06, "loss": 18.2024, "step": 13524 }, { "epoch": 0.24722613193923995, "grad_norm": 5.6187932784378125, "learning_rate": 8.812958295051232e-06, "loss": 17.0056, "step": 13525 }, { "epoch": 0.2472444111356865, "grad_norm": 7.632836096581084, "learning_rate": 8.812766804373617e-06, "loss": 18.03, "step": 13526 }, { "epoch": 0.247262690332133, "grad_norm": 6.981690508258919, "learning_rate": 8.812575300332587e-06, "loss": 17.6715, "step": 13527 }, { "epoch": 0.24728096952857953, "grad_norm": 8.709129676610265, "learning_rate": 8.81238378292881e-06, "loss": 18.5582, "step": 13528 }, { "epoch": 0.24729924872502604, "grad_norm": 6.365832607157043, "learning_rate": 8.812192252162958e-06, "loss": 17.5791, "step": 13529 }, { "epoch": 0.24731752792147257, "grad_norm": 6.899931755926244, "learning_rate": 8.812000708035704e-06, "loss": 17.9722, "step": 13530 }, { "epoch": 0.2473358071179191, "grad_norm": 7.397138075266554, "learning_rate": 8.811809150547718e-06, "loss": 17.7979, "step": 13531 }, { "epoch": 0.24735408631436562, "grad_norm": 6.563167452099872, "learning_rate": 8.811617579699671e-06, "loss": 17.4955, "step": 13532 }, { "epoch": 0.24737236551081215, "grad_norm": 6.41796307114215, "learning_rate": 8.811425995492238e-06, "loss": 17.4837, "step": 13533 }, { "epoch": 0.24739064470725866, "grad_norm": 7.329600825860249, "learning_rate": 8.811234397926085e-06, "loss": 17.8683, "step": 13534 }, { "epoch": 0.2474089239037052, "grad_norm": 7.651120667298316, "learning_rate": 8.811042787001887e-06, "loss": 17.6583, "step": 13535 }, { "epoch": 0.2474272031001517, "grad_norm": 6.719738297247194, "learning_rate": 8.810851162720315e-06, "loss": 17.9221, "step": 13536 }, { "epoch": 0.24744548229659824, "grad_norm": 5.9192372353376745, "learning_rate": 8.81065952508204e-06, "loss": 17.3504, "step": 13537 }, { "epoch": 0.24746376149304478, "grad_norm": 6.505587578775084, "learning_rate": 8.810467874087733e-06, "loss": 17.4247, "step": 13538 }, { "epoch": 0.24748204068949128, "grad_norm": 6.073858449932765, "learning_rate": 8.810276209738069e-06, "loss": 17.3974, "step": 13539 }, { "epoch": 0.24750031988593782, "grad_norm": 7.17598401883612, "learning_rate": 8.810084532033715e-06, "loss": 17.7629, "step": 13540 }, { "epoch": 0.24751859908238433, "grad_norm": 6.072667004573831, "learning_rate": 8.809892840975347e-06, "loss": 17.5451, "step": 13541 }, { "epoch": 0.24753687827883086, "grad_norm": 5.8307657994823545, "learning_rate": 8.809701136563635e-06, "loss": 17.2472, "step": 13542 }, { "epoch": 0.2475551574752774, "grad_norm": 7.9425616507954215, "learning_rate": 8.80950941879925e-06, "loss": 18.4223, "step": 13543 }, { "epoch": 0.2475734366717239, "grad_norm": 6.61336833311643, "learning_rate": 8.809317687682865e-06, "loss": 17.6788, "step": 13544 }, { "epoch": 0.24759171586817044, "grad_norm": 7.353439073462062, "learning_rate": 8.809125943215153e-06, "loss": 17.7704, "step": 13545 }, { "epoch": 0.24760999506461695, "grad_norm": 6.87891029506922, "learning_rate": 8.808934185396787e-06, "loss": 17.5211, "step": 13546 }, { "epoch": 0.24762827426106349, "grad_norm": 7.2274130161211385, "learning_rate": 8.808742414228435e-06, "loss": 17.3957, "step": 13547 }, { "epoch": 0.24764655345751002, "grad_norm": 6.5308196450905225, "learning_rate": 8.808550629710772e-06, "loss": 17.7209, "step": 13548 }, { "epoch": 0.24766483265395653, "grad_norm": 5.982434903221757, "learning_rate": 8.808358831844468e-06, "loss": 17.1228, "step": 13549 }, { "epoch": 0.24768311185040306, "grad_norm": 6.148208212333493, "learning_rate": 8.808167020630198e-06, "loss": 17.524, "step": 13550 }, { "epoch": 0.24770139104684957, "grad_norm": 6.485957048327351, "learning_rate": 8.807975196068633e-06, "loss": 17.6687, "step": 13551 }, { "epoch": 0.2477196702432961, "grad_norm": 5.341858827419484, "learning_rate": 8.807783358160447e-06, "loss": 17.0652, "step": 13552 }, { "epoch": 0.24773794943974262, "grad_norm": 8.274691688700468, "learning_rate": 8.807591506906307e-06, "loss": 18.3032, "step": 13553 }, { "epoch": 0.24775622863618915, "grad_norm": 6.638551424156307, "learning_rate": 8.807399642306894e-06, "loss": 17.8078, "step": 13554 }, { "epoch": 0.2477745078326357, "grad_norm": 5.17861307736317, "learning_rate": 8.80720776436287e-06, "loss": 17.094, "step": 13555 }, { "epoch": 0.2477927870290822, "grad_norm": 7.188883248739621, "learning_rate": 8.807015873074918e-06, "loss": 17.9123, "step": 13556 }, { "epoch": 0.24781106622552873, "grad_norm": 6.701056752872817, "learning_rate": 8.806823968443704e-06, "loss": 17.7605, "step": 13557 }, { "epoch": 0.24782934542197524, "grad_norm": 6.839555891126196, "learning_rate": 8.806632050469901e-06, "loss": 17.6233, "step": 13558 }, { "epoch": 0.24784762461842177, "grad_norm": 7.8974902605052035, "learning_rate": 8.806440119154185e-06, "loss": 18.1118, "step": 13559 }, { "epoch": 0.2478659038148683, "grad_norm": 5.850233448254094, "learning_rate": 8.806248174497225e-06, "loss": 17.1914, "step": 13560 }, { "epoch": 0.24788418301131482, "grad_norm": 6.1803635027236, "learning_rate": 8.806056216499697e-06, "loss": 17.3134, "step": 13561 }, { "epoch": 0.24790246220776135, "grad_norm": 7.44515695266595, "learning_rate": 8.805864245162272e-06, "loss": 17.8741, "step": 13562 }, { "epoch": 0.24792074140420786, "grad_norm": 6.730564786986195, "learning_rate": 8.805672260485623e-06, "loss": 17.6039, "step": 13563 }, { "epoch": 0.2479390206006544, "grad_norm": 6.6264531741244195, "learning_rate": 8.805480262470422e-06, "loss": 17.7734, "step": 13564 }, { "epoch": 0.24795729979710093, "grad_norm": 6.879737015030938, "learning_rate": 8.805288251117343e-06, "loss": 17.3106, "step": 13565 }, { "epoch": 0.24797557899354744, "grad_norm": 6.596703873166253, "learning_rate": 8.805096226427059e-06, "loss": 17.6785, "step": 13566 }, { "epoch": 0.24799385818999398, "grad_norm": 6.330324217831363, "learning_rate": 8.804904188400243e-06, "loss": 17.4953, "step": 13567 }, { "epoch": 0.24801213738644048, "grad_norm": 6.476138941986639, "learning_rate": 8.80471213703757e-06, "loss": 17.8462, "step": 13568 }, { "epoch": 0.24803041658288702, "grad_norm": 7.202468307013269, "learning_rate": 8.804520072339709e-06, "loss": 17.9772, "step": 13569 }, { "epoch": 0.24804869577933353, "grad_norm": 6.487313451592617, "learning_rate": 8.804327994307335e-06, "loss": 17.4103, "step": 13570 }, { "epoch": 0.24806697497578006, "grad_norm": 5.396480993896546, "learning_rate": 8.804135902941121e-06, "loss": 16.9656, "step": 13571 }, { "epoch": 0.2480852541722266, "grad_norm": 6.209982091023376, "learning_rate": 8.803943798241744e-06, "loss": 17.1646, "step": 13572 }, { "epoch": 0.2481035333686731, "grad_norm": 6.259138045292799, "learning_rate": 8.80375168020987e-06, "loss": 17.5918, "step": 13573 }, { "epoch": 0.24812181256511964, "grad_norm": 6.4363935474218, "learning_rate": 8.803559548846178e-06, "loss": 17.6681, "step": 13574 }, { "epoch": 0.24814009176156615, "grad_norm": 8.033724951282872, "learning_rate": 8.803367404151341e-06, "loss": 18.1917, "step": 13575 }, { "epoch": 0.2481583709580127, "grad_norm": 8.919849199637072, "learning_rate": 8.803175246126032e-06, "loss": 18.8369, "step": 13576 }, { "epoch": 0.24817665015445922, "grad_norm": 8.243067495335772, "learning_rate": 8.802983074770922e-06, "loss": 18.2087, "step": 13577 }, { "epoch": 0.24819492935090573, "grad_norm": 6.223858430854021, "learning_rate": 8.802790890086686e-06, "loss": 17.5654, "step": 13578 }, { "epoch": 0.24821320854735227, "grad_norm": 7.387343307779693, "learning_rate": 8.802598692074e-06, "loss": 18.1791, "step": 13579 }, { "epoch": 0.24823148774379877, "grad_norm": 6.975913950209664, "learning_rate": 8.802406480733534e-06, "loss": 17.8566, "step": 13580 }, { "epoch": 0.2482497669402453, "grad_norm": 5.483926267998872, "learning_rate": 8.802214256065963e-06, "loss": 17.0997, "step": 13581 }, { "epoch": 0.24826804613669184, "grad_norm": 7.6291144664324415, "learning_rate": 8.802022018071961e-06, "loss": 18.0416, "step": 13582 }, { "epoch": 0.24828632533313835, "grad_norm": 6.39217728338996, "learning_rate": 8.801829766752203e-06, "loss": 17.6541, "step": 13583 }, { "epoch": 0.2483046045295849, "grad_norm": 6.595641814029959, "learning_rate": 8.80163750210736e-06, "loss": 17.8408, "step": 13584 }, { "epoch": 0.2483228837260314, "grad_norm": 7.254710124077112, "learning_rate": 8.80144522413811e-06, "loss": 17.797, "step": 13585 }, { "epoch": 0.24834116292247793, "grad_norm": 6.5113618090311345, "learning_rate": 8.801252932845122e-06, "loss": 17.67, "step": 13586 }, { "epoch": 0.24835944211892444, "grad_norm": 6.110250760314191, "learning_rate": 8.801060628229072e-06, "loss": 17.4549, "step": 13587 }, { "epoch": 0.24837772131537098, "grad_norm": 7.17153914014889, "learning_rate": 8.800868310290635e-06, "loss": 17.944, "step": 13588 }, { "epoch": 0.2483960005118175, "grad_norm": 6.6406924892044685, "learning_rate": 8.800675979030484e-06, "loss": 17.746, "step": 13589 }, { "epoch": 0.24841427970826402, "grad_norm": 7.9014519895766355, "learning_rate": 8.800483634449295e-06, "loss": 18.5655, "step": 13590 }, { "epoch": 0.24843255890471055, "grad_norm": 6.178339948853413, "learning_rate": 8.80029127654774e-06, "loss": 17.3984, "step": 13591 }, { "epoch": 0.24845083810115706, "grad_norm": 7.3444746228631095, "learning_rate": 8.800098905326493e-06, "loss": 18.1451, "step": 13592 }, { "epoch": 0.2484691172976036, "grad_norm": 7.1783347093880945, "learning_rate": 8.79990652078623e-06, "loss": 17.7128, "step": 13593 }, { "epoch": 0.24848739649405013, "grad_norm": 6.075669041088203, "learning_rate": 8.799714122927625e-06, "loss": 17.5606, "step": 13594 }, { "epoch": 0.24850567569049664, "grad_norm": 6.382625845225516, "learning_rate": 8.79952171175135e-06, "loss": 17.4704, "step": 13595 }, { "epoch": 0.24852395488694318, "grad_norm": 6.3752340265404355, "learning_rate": 8.799329287258083e-06, "loss": 17.5366, "step": 13596 }, { "epoch": 0.24854223408338968, "grad_norm": 6.915529798093468, "learning_rate": 8.799136849448496e-06, "loss": 17.659, "step": 13597 }, { "epoch": 0.24856051327983622, "grad_norm": 6.612852806965295, "learning_rate": 8.798944398323261e-06, "loss": 17.8432, "step": 13598 }, { "epoch": 0.24857879247628276, "grad_norm": 6.2407458376604055, "learning_rate": 8.798751933883058e-06, "loss": 17.5232, "step": 13599 }, { "epoch": 0.24859707167272926, "grad_norm": 6.711701644131944, "learning_rate": 8.79855945612856e-06, "loss": 17.7623, "step": 13600 }, { "epoch": 0.2486153508691758, "grad_norm": 7.128477037322055, "learning_rate": 8.79836696506044e-06, "loss": 18.0356, "step": 13601 }, { "epoch": 0.2486336300656223, "grad_norm": 5.804775169214023, "learning_rate": 8.798174460679374e-06, "loss": 17.2787, "step": 13602 }, { "epoch": 0.24865190926206884, "grad_norm": 5.70416433023589, "learning_rate": 8.797981942986035e-06, "loss": 17.5225, "step": 13603 }, { "epoch": 0.24867018845851535, "grad_norm": 7.346183355605944, "learning_rate": 8.797789411981098e-06, "loss": 18.0112, "step": 13604 }, { "epoch": 0.2486884676549619, "grad_norm": 8.26468927004845, "learning_rate": 8.797596867665241e-06, "loss": 18.4587, "step": 13605 }, { "epoch": 0.24870674685140842, "grad_norm": 8.255270705426234, "learning_rate": 8.797404310039135e-06, "loss": 18.4868, "step": 13606 }, { "epoch": 0.24872502604785493, "grad_norm": 8.923343128536514, "learning_rate": 8.797211739103458e-06, "loss": 18.3208, "step": 13607 }, { "epoch": 0.24874330524430147, "grad_norm": 5.397304363700296, "learning_rate": 8.797019154858881e-06, "loss": 16.9809, "step": 13608 }, { "epoch": 0.24876158444074797, "grad_norm": 7.430666195594035, "learning_rate": 8.796826557306083e-06, "loss": 18.066, "step": 13609 }, { "epoch": 0.2487798636371945, "grad_norm": 6.003755063812365, "learning_rate": 8.796633946445737e-06, "loss": 17.4161, "step": 13610 }, { "epoch": 0.24879814283364104, "grad_norm": 5.620294923954437, "learning_rate": 8.796441322278518e-06, "loss": 17.3661, "step": 13611 }, { "epoch": 0.24881642203008755, "grad_norm": 5.7806892935003615, "learning_rate": 8.796248684805103e-06, "loss": 17.2277, "step": 13612 }, { "epoch": 0.2488347012265341, "grad_norm": 6.581495995371712, "learning_rate": 8.796056034026164e-06, "loss": 17.485, "step": 13613 }, { "epoch": 0.2488529804229806, "grad_norm": 7.568553920028167, "learning_rate": 8.795863369942379e-06, "loss": 18.2171, "step": 13614 }, { "epoch": 0.24887125961942713, "grad_norm": 12.072031985212378, "learning_rate": 8.795670692554422e-06, "loss": 18.5831, "step": 13615 }, { "epoch": 0.24888953881587367, "grad_norm": 6.5694429939948185, "learning_rate": 8.79547800186297e-06, "loss": 17.3885, "step": 13616 }, { "epoch": 0.24890781801232018, "grad_norm": 7.464572581084682, "learning_rate": 8.795285297868695e-06, "loss": 18.21, "step": 13617 }, { "epoch": 0.2489260972087667, "grad_norm": 7.097192977191152, "learning_rate": 8.795092580572274e-06, "loss": 17.9665, "step": 13618 }, { "epoch": 0.24894437640521322, "grad_norm": 5.887254196280174, "learning_rate": 8.794899849974384e-06, "loss": 17.5091, "step": 13619 }, { "epoch": 0.24896265560165975, "grad_norm": 6.292638117710199, "learning_rate": 8.7947071060757e-06, "loss": 17.9647, "step": 13620 }, { "epoch": 0.24898093479810626, "grad_norm": 5.610482638041628, "learning_rate": 8.794514348876894e-06, "loss": 17.3419, "step": 13621 }, { "epoch": 0.2489992139945528, "grad_norm": 6.985345441765878, "learning_rate": 8.794321578378649e-06, "loss": 17.9621, "step": 13622 }, { "epoch": 0.24901749319099933, "grad_norm": 7.2929065272559646, "learning_rate": 8.794128794581634e-06, "loss": 18.2403, "step": 13623 }, { "epoch": 0.24903577238744584, "grad_norm": 6.574658771008031, "learning_rate": 8.793935997486525e-06, "loss": 17.8019, "step": 13624 }, { "epoch": 0.24905405158389238, "grad_norm": 5.88672130310422, "learning_rate": 8.793743187094002e-06, "loss": 17.4474, "step": 13625 }, { "epoch": 0.24907233078033889, "grad_norm": 8.240587812286298, "learning_rate": 8.793550363404737e-06, "loss": 18.3002, "step": 13626 }, { "epoch": 0.24909060997678542, "grad_norm": 7.550864745136357, "learning_rate": 8.793357526419406e-06, "loss": 17.6373, "step": 13627 }, { "epoch": 0.24910888917323196, "grad_norm": 7.53562386247126, "learning_rate": 8.793164676138687e-06, "loss": 17.9381, "step": 13628 }, { "epoch": 0.24912716836967846, "grad_norm": 6.208485718974186, "learning_rate": 8.792971812563258e-06, "loss": 17.4192, "step": 13629 }, { "epoch": 0.249145447566125, "grad_norm": 6.042275434645566, "learning_rate": 8.792778935693788e-06, "loss": 17.1798, "step": 13630 }, { "epoch": 0.2491637267625715, "grad_norm": 7.903588151094938, "learning_rate": 8.792586045530958e-06, "loss": 18.0765, "step": 13631 }, { "epoch": 0.24918200595901804, "grad_norm": 6.725425760282418, "learning_rate": 8.792393142075443e-06, "loss": 17.4452, "step": 13632 }, { "epoch": 0.24920028515546458, "grad_norm": 7.613950756375264, "learning_rate": 8.79220022532792e-06, "loss": 17.8006, "step": 13633 }, { "epoch": 0.2492185643519111, "grad_norm": 6.311147051785585, "learning_rate": 8.792007295289064e-06, "loss": 17.6108, "step": 13634 }, { "epoch": 0.24923684354835762, "grad_norm": 8.6277951319873, "learning_rate": 8.791814351959551e-06, "loss": 17.8419, "step": 13635 }, { "epoch": 0.24925512274480413, "grad_norm": 6.009905504629565, "learning_rate": 8.79162139534006e-06, "loss": 17.4084, "step": 13636 }, { "epoch": 0.24927340194125067, "grad_norm": 7.029158841169786, "learning_rate": 8.791428425431263e-06, "loss": 17.9272, "step": 13637 }, { "epoch": 0.24929168113769717, "grad_norm": 9.781101938345776, "learning_rate": 8.791235442233837e-06, "loss": 18.2717, "step": 13638 }, { "epoch": 0.2493099603341437, "grad_norm": 8.353990401220457, "learning_rate": 8.791042445748462e-06, "loss": 18.5817, "step": 13639 }, { "epoch": 0.24932823953059025, "grad_norm": 6.6549857354042885, "learning_rate": 8.790849435975813e-06, "loss": 17.6618, "step": 13640 }, { "epoch": 0.24934651872703675, "grad_norm": 6.2320122434893594, "learning_rate": 8.790656412916563e-06, "loss": 17.4033, "step": 13641 }, { "epoch": 0.2493647979234833, "grad_norm": 7.176249965546837, "learning_rate": 8.790463376571392e-06, "loss": 17.7429, "step": 13642 }, { "epoch": 0.2493830771199298, "grad_norm": 5.734651787696095, "learning_rate": 8.790270326940976e-06, "loss": 17.1494, "step": 13643 }, { "epoch": 0.24940135631637633, "grad_norm": 6.9045363113073455, "learning_rate": 8.790077264025992e-06, "loss": 17.9641, "step": 13644 }, { "epoch": 0.24941963551282287, "grad_norm": 5.184670303198655, "learning_rate": 8.789884187827116e-06, "loss": 17.0292, "step": 13645 }, { "epoch": 0.24943791470926938, "grad_norm": 7.711816299566391, "learning_rate": 8.789691098345023e-06, "loss": 17.9144, "step": 13646 }, { "epoch": 0.2494561939057159, "grad_norm": 7.719521491741951, "learning_rate": 8.789497995580395e-06, "loss": 18.3423, "step": 13647 }, { "epoch": 0.24947447310216242, "grad_norm": 5.627474291296823, "learning_rate": 8.789304879533901e-06, "loss": 17.2978, "step": 13648 }, { "epoch": 0.24949275229860896, "grad_norm": 6.158697685436154, "learning_rate": 8.789111750206224e-06, "loss": 17.36, "step": 13649 }, { "epoch": 0.2495110314950555, "grad_norm": 6.65429193676253, "learning_rate": 8.78891860759804e-06, "loss": 17.7113, "step": 13650 }, { "epoch": 0.249529310691502, "grad_norm": 5.166960750257122, "learning_rate": 8.788725451710026e-06, "loss": 16.8692, "step": 13651 }, { "epoch": 0.24954758988794853, "grad_norm": 6.376480383954239, "learning_rate": 8.788532282542857e-06, "loss": 17.4281, "step": 13652 }, { "epoch": 0.24956586908439504, "grad_norm": 5.847633936487089, "learning_rate": 8.788339100097209e-06, "loss": 17.232, "step": 13653 }, { "epoch": 0.24958414828084158, "grad_norm": 8.168623132831579, "learning_rate": 8.788145904373765e-06, "loss": 18.1684, "step": 13654 }, { "epoch": 0.24960242747728809, "grad_norm": 7.436815384741335, "learning_rate": 8.787952695373197e-06, "loss": 18.0997, "step": 13655 }, { "epoch": 0.24962070667373462, "grad_norm": 5.440164677593967, "learning_rate": 8.787759473096182e-06, "loss": 17.3317, "step": 13656 }, { "epoch": 0.24963898587018116, "grad_norm": 7.515954411219964, "learning_rate": 8.7875662375434e-06, "loss": 17.7748, "step": 13657 }, { "epoch": 0.24965726506662766, "grad_norm": 7.113133122298919, "learning_rate": 8.787372988715525e-06, "loss": 18.1626, "step": 13658 }, { "epoch": 0.2496755442630742, "grad_norm": 7.010804874667533, "learning_rate": 8.78717972661324e-06, "loss": 17.7421, "step": 13659 }, { "epoch": 0.2496938234595207, "grad_norm": 5.665932415627742, "learning_rate": 8.786986451237217e-06, "loss": 17.1337, "step": 13660 }, { "epoch": 0.24971210265596724, "grad_norm": 7.371647518398055, "learning_rate": 8.786793162588135e-06, "loss": 18.1092, "step": 13661 }, { "epoch": 0.24973038185241378, "grad_norm": 5.964545631329175, "learning_rate": 8.78659986066667e-06, "loss": 17.1018, "step": 13662 }, { "epoch": 0.2497486610488603, "grad_norm": 6.145087876449944, "learning_rate": 8.786406545473503e-06, "loss": 17.179, "step": 13663 }, { "epoch": 0.24976694024530682, "grad_norm": 7.724709929005375, "learning_rate": 8.786213217009309e-06, "loss": 17.7277, "step": 13664 }, { "epoch": 0.24978521944175333, "grad_norm": 5.767009279703939, "learning_rate": 8.786019875274764e-06, "loss": 17.0923, "step": 13665 }, { "epoch": 0.24980349863819987, "grad_norm": 5.577553796148244, "learning_rate": 8.785826520270553e-06, "loss": 17.1695, "step": 13666 }, { "epoch": 0.2498217778346464, "grad_norm": 7.324165428076282, "learning_rate": 8.785633151997343e-06, "loss": 17.6439, "step": 13667 }, { "epoch": 0.2498400570310929, "grad_norm": 7.22453251197266, "learning_rate": 8.785439770455821e-06, "loss": 17.5587, "step": 13668 }, { "epoch": 0.24985833622753945, "grad_norm": 6.3350418539347215, "learning_rate": 8.785246375646662e-06, "loss": 17.3439, "step": 13669 }, { "epoch": 0.24987661542398595, "grad_norm": 8.105900333242337, "learning_rate": 8.78505296757054e-06, "loss": 18.0721, "step": 13670 }, { "epoch": 0.2498948946204325, "grad_norm": 6.073732518571119, "learning_rate": 8.784859546228136e-06, "loss": 17.3716, "step": 13671 }, { "epoch": 0.249913173816879, "grad_norm": 7.028398136879655, "learning_rate": 8.78466611162013e-06, "loss": 17.7907, "step": 13672 }, { "epoch": 0.24993145301332553, "grad_norm": 7.816419386903626, "learning_rate": 8.784472663747195e-06, "loss": 17.9682, "step": 13673 }, { "epoch": 0.24994973220977207, "grad_norm": 7.585974508246072, "learning_rate": 8.784279202610012e-06, "loss": 17.9536, "step": 13674 }, { "epoch": 0.24996801140621858, "grad_norm": 6.368820454232451, "learning_rate": 8.784085728209261e-06, "loss": 17.5333, "step": 13675 }, { "epoch": 0.2499862906026651, "grad_norm": 6.801929375828293, "learning_rate": 8.783892240545618e-06, "loss": 17.6958, "step": 13676 }, { "epoch": 0.2500045697991116, "grad_norm": 7.71347140343787, "learning_rate": 8.783698739619759e-06, "loss": 18.0251, "step": 13677 }, { "epoch": 0.25002284899555816, "grad_norm": 7.172448762691464, "learning_rate": 8.783505225432364e-06, "loss": 17.8857, "step": 13678 }, { "epoch": 0.2500411281920047, "grad_norm": 7.8308999600022835, "learning_rate": 8.783311697984113e-06, "loss": 18.3854, "step": 13679 }, { "epoch": 0.2500594073884512, "grad_norm": 7.909860254259011, "learning_rate": 8.783118157275683e-06, "loss": 18.1653, "step": 13680 }, { "epoch": 0.2500776865848977, "grad_norm": 7.399177070632512, "learning_rate": 8.78292460330775e-06, "loss": 18.1998, "step": 13681 }, { "epoch": 0.25009596578134424, "grad_norm": 5.943759962856429, "learning_rate": 8.782731036080996e-06, "loss": 17.5429, "step": 13682 }, { "epoch": 0.2501142449777908, "grad_norm": 7.135033931021666, "learning_rate": 8.782537455596099e-06, "loss": 17.7688, "step": 13683 }, { "epoch": 0.2501325241742373, "grad_norm": 6.35122949967104, "learning_rate": 8.782343861853735e-06, "loss": 17.3839, "step": 13684 }, { "epoch": 0.25015080337068385, "grad_norm": 9.789846079296307, "learning_rate": 8.782150254854584e-06, "loss": 18.6432, "step": 13685 }, { "epoch": 0.25016908256713033, "grad_norm": 7.367962037787293, "learning_rate": 8.781956634599325e-06, "loss": 18.0811, "step": 13686 }, { "epoch": 0.25018736176357687, "grad_norm": 8.561889029472841, "learning_rate": 8.781763001088636e-06, "loss": 18.1209, "step": 13687 }, { "epoch": 0.2502056409600234, "grad_norm": 6.019826809806316, "learning_rate": 8.781569354323197e-06, "loss": 17.1458, "step": 13688 }, { "epoch": 0.25022392015646994, "grad_norm": 6.142316335916776, "learning_rate": 8.781375694303683e-06, "loss": 17.2524, "step": 13689 }, { "epoch": 0.2502421993529165, "grad_norm": 6.430875967650894, "learning_rate": 8.781182021030777e-06, "loss": 17.3811, "step": 13690 }, { "epoch": 0.25026047854936295, "grad_norm": 6.6397344390401845, "learning_rate": 8.780988334505156e-06, "loss": 17.4814, "step": 13691 }, { "epoch": 0.2502787577458095, "grad_norm": 6.656274131397097, "learning_rate": 8.7807946347275e-06, "loss": 17.7153, "step": 13692 }, { "epoch": 0.250297036942256, "grad_norm": 6.486317260392342, "learning_rate": 8.780600921698485e-06, "loss": 17.6734, "step": 13693 }, { "epoch": 0.25031531613870256, "grad_norm": 6.526161603655303, "learning_rate": 8.780407195418792e-06, "loss": 17.3982, "step": 13694 }, { "epoch": 0.25033359533514904, "grad_norm": 6.3537219414511, "learning_rate": 8.7802134558891e-06, "loss": 17.364, "step": 13695 }, { "epoch": 0.2503518745315956, "grad_norm": 6.729428797262897, "learning_rate": 8.78001970311009e-06, "loss": 17.9462, "step": 13696 }, { "epoch": 0.2503701537280421, "grad_norm": 6.65367901168732, "learning_rate": 8.779825937082436e-06, "loss": 17.4791, "step": 13697 }, { "epoch": 0.25038843292448865, "grad_norm": 5.8306443302054385, "learning_rate": 8.779632157806821e-06, "loss": 17.4159, "step": 13698 }, { "epoch": 0.2504067121209352, "grad_norm": 6.171992721075378, "learning_rate": 8.779438365283924e-06, "loss": 17.364, "step": 13699 }, { "epoch": 0.25042499131738166, "grad_norm": 7.253356684839225, "learning_rate": 8.779244559514424e-06, "loss": 17.9746, "step": 13700 }, { "epoch": 0.2504432705138282, "grad_norm": 6.053186052422697, "learning_rate": 8.779050740498998e-06, "loss": 17.4817, "step": 13701 }, { "epoch": 0.25046154971027473, "grad_norm": 6.230493315371202, "learning_rate": 8.77885690823833e-06, "loss": 17.1891, "step": 13702 }, { "epoch": 0.25047982890672127, "grad_norm": 6.227282377287225, "learning_rate": 8.778663062733093e-06, "loss": 17.286, "step": 13703 }, { "epoch": 0.2504981081031678, "grad_norm": 7.03631259111469, "learning_rate": 8.778469203983971e-06, "loss": 17.4506, "step": 13704 }, { "epoch": 0.2505163872996143, "grad_norm": 6.550702438917967, "learning_rate": 8.778275331991643e-06, "loss": 17.6496, "step": 13705 }, { "epoch": 0.2505346664960608, "grad_norm": 6.766698513387287, "learning_rate": 8.778081446756787e-06, "loss": 17.5792, "step": 13706 }, { "epoch": 0.25055294569250736, "grad_norm": 6.666839859058256, "learning_rate": 8.777887548280084e-06, "loss": 17.4515, "step": 13707 }, { "epoch": 0.2505712248889539, "grad_norm": 6.154213037421661, "learning_rate": 8.777693636562212e-06, "loss": 17.2226, "step": 13708 }, { "epoch": 0.2505895040854004, "grad_norm": 8.374144174894667, "learning_rate": 8.777499711603854e-06, "loss": 18.1339, "step": 13709 }, { "epoch": 0.2506077832818469, "grad_norm": 7.375942271841453, "learning_rate": 8.777305773405684e-06, "loss": 17.7976, "step": 13710 }, { "epoch": 0.25062606247829344, "grad_norm": 7.844301927949076, "learning_rate": 8.777111821968386e-06, "loss": 18.0166, "step": 13711 }, { "epoch": 0.25064434167474, "grad_norm": 6.245290524021204, "learning_rate": 8.776917857292641e-06, "loss": 17.3728, "step": 13712 }, { "epoch": 0.2506626208711865, "grad_norm": 7.027406029170456, "learning_rate": 8.776723879379126e-06, "loss": 18.0623, "step": 13713 }, { "epoch": 0.25068090006763305, "grad_norm": 6.170920509560837, "learning_rate": 8.77652988822852e-06, "loss": 17.3041, "step": 13714 }, { "epoch": 0.25069917926407953, "grad_norm": 7.877857309985771, "learning_rate": 8.776335883841504e-06, "loss": 18.2322, "step": 13715 }, { "epoch": 0.25071745846052607, "grad_norm": 7.326621885665798, "learning_rate": 8.776141866218761e-06, "loss": 17.8354, "step": 13716 }, { "epoch": 0.2507357376569726, "grad_norm": 7.026959857664446, "learning_rate": 8.775947835360967e-06, "loss": 17.7837, "step": 13717 }, { "epoch": 0.25075401685341914, "grad_norm": 7.637534254447708, "learning_rate": 8.775753791268804e-06, "loss": 18.2689, "step": 13718 }, { "epoch": 0.2507722960498657, "grad_norm": 7.447805613172726, "learning_rate": 8.775559733942952e-06, "loss": 17.8548, "step": 13719 }, { "epoch": 0.25079057524631215, "grad_norm": 6.978672598215624, "learning_rate": 8.775365663384088e-06, "loss": 17.8056, "step": 13720 }, { "epoch": 0.2508088544427587, "grad_norm": 6.789059085714313, "learning_rate": 8.775171579592898e-06, "loss": 17.8224, "step": 13721 }, { "epoch": 0.2508271336392052, "grad_norm": 6.528536194001972, "learning_rate": 8.774977482570058e-06, "loss": 17.9032, "step": 13722 }, { "epoch": 0.25084541283565176, "grad_norm": 5.894994552697368, "learning_rate": 8.77478337231625e-06, "loss": 17.3327, "step": 13723 }, { "epoch": 0.2508636920320983, "grad_norm": 6.889212073845134, "learning_rate": 8.774589248832153e-06, "loss": 17.5949, "step": 13724 }, { "epoch": 0.2508819712285448, "grad_norm": 5.663663159463208, "learning_rate": 8.77439511211845e-06, "loss": 17.4429, "step": 13725 }, { "epoch": 0.2509002504249913, "grad_norm": 6.10287318775543, "learning_rate": 8.774200962175816e-06, "loss": 17.5407, "step": 13726 }, { "epoch": 0.25091852962143785, "grad_norm": 7.309553641820336, "learning_rate": 8.77400679900494e-06, "loss": 17.8893, "step": 13727 }, { "epoch": 0.2509368088178844, "grad_norm": 6.064058751608981, "learning_rate": 8.773812622606494e-06, "loss": 17.4146, "step": 13728 }, { "epoch": 0.25095508801433086, "grad_norm": 6.750710304996495, "learning_rate": 8.773618432981163e-06, "loss": 17.7342, "step": 13729 }, { "epoch": 0.2509733672107774, "grad_norm": 6.2763482507359925, "learning_rate": 8.773424230129628e-06, "loss": 17.5536, "step": 13730 }, { "epoch": 0.25099164640722393, "grad_norm": 7.598059918733686, "learning_rate": 8.773230014052568e-06, "loss": 18.1678, "step": 13731 }, { "epoch": 0.25100992560367047, "grad_norm": 7.145922873989569, "learning_rate": 8.773035784750663e-06, "loss": 17.4477, "step": 13732 }, { "epoch": 0.251028204800117, "grad_norm": 6.458154817569209, "learning_rate": 8.772841542224596e-06, "loss": 17.4468, "step": 13733 }, { "epoch": 0.2510464839965635, "grad_norm": 7.087951311196237, "learning_rate": 8.772647286475047e-06, "loss": 18.3194, "step": 13734 }, { "epoch": 0.25106476319301, "grad_norm": 6.436018704490948, "learning_rate": 8.772453017502695e-06, "loss": 17.663, "step": 13735 }, { "epoch": 0.25108304238945656, "grad_norm": 6.977212610563405, "learning_rate": 8.772258735308225e-06, "loss": 17.5539, "step": 13736 }, { "epoch": 0.2511013215859031, "grad_norm": 6.239105916239187, "learning_rate": 8.772064439892314e-06, "loss": 17.4486, "step": 13737 }, { "epoch": 0.25111960078234963, "grad_norm": 6.015031005242834, "learning_rate": 8.771870131255646e-06, "loss": 17.2309, "step": 13738 }, { "epoch": 0.2511378799787961, "grad_norm": 6.721088540210563, "learning_rate": 8.771675809398898e-06, "loss": 17.5095, "step": 13739 }, { "epoch": 0.25115615917524264, "grad_norm": 6.41520292709623, "learning_rate": 8.771481474322755e-06, "loss": 17.5005, "step": 13740 }, { "epoch": 0.2511744383716892, "grad_norm": 6.2391058121266, "learning_rate": 8.771287126027897e-06, "loss": 17.1922, "step": 13741 }, { "epoch": 0.2511927175681357, "grad_norm": 7.028197784691112, "learning_rate": 8.771092764515006e-06, "loss": 17.6695, "step": 13742 }, { "epoch": 0.25121099676458225, "grad_norm": 6.927224606206834, "learning_rate": 8.77089838978476e-06, "loss": 17.3227, "step": 13743 }, { "epoch": 0.25122927596102873, "grad_norm": 5.501662333540958, "learning_rate": 8.770704001837843e-06, "loss": 17.1795, "step": 13744 }, { "epoch": 0.25124755515747527, "grad_norm": 7.98795049015665, "learning_rate": 8.770509600674934e-06, "loss": 17.6757, "step": 13745 }, { "epoch": 0.2512658343539218, "grad_norm": 6.386750052454475, "learning_rate": 8.770315186296719e-06, "loss": 17.5961, "step": 13746 }, { "epoch": 0.25128411355036834, "grad_norm": 5.7846685922114816, "learning_rate": 8.770120758703874e-06, "loss": 17.1132, "step": 13747 }, { "epoch": 0.2513023927468149, "grad_norm": 7.50890297590246, "learning_rate": 8.769926317897084e-06, "loss": 18.0496, "step": 13748 }, { "epoch": 0.25132067194326135, "grad_norm": 7.499891461814617, "learning_rate": 8.76973186387703e-06, "loss": 18.0797, "step": 13749 }, { "epoch": 0.2513389511397079, "grad_norm": 6.708457406673459, "learning_rate": 8.769537396644393e-06, "loss": 17.261, "step": 13750 }, { "epoch": 0.2513572303361544, "grad_norm": 5.309378532147494, "learning_rate": 8.769342916199854e-06, "loss": 17.024, "step": 13751 }, { "epoch": 0.25137550953260096, "grad_norm": 6.084734857501226, "learning_rate": 8.769148422544095e-06, "loss": 17.3959, "step": 13752 }, { "epoch": 0.2513937887290475, "grad_norm": 8.956039493326504, "learning_rate": 8.768953915677798e-06, "loss": 18.5505, "step": 13753 }, { "epoch": 0.251412067925494, "grad_norm": 6.96444705521746, "learning_rate": 8.768759395601645e-06, "loss": 17.9796, "step": 13754 }, { "epoch": 0.2514303471219405, "grad_norm": 6.557172749327448, "learning_rate": 8.768564862316316e-06, "loss": 17.5487, "step": 13755 }, { "epoch": 0.25144862631838705, "grad_norm": 6.0961799240180135, "learning_rate": 8.768370315822496e-06, "loss": 17.4884, "step": 13756 }, { "epoch": 0.2514669055148336, "grad_norm": 5.801513897231396, "learning_rate": 8.768175756120864e-06, "loss": 17.4211, "step": 13757 }, { "epoch": 0.2514851847112801, "grad_norm": 7.995947668387565, "learning_rate": 8.767981183212103e-06, "loss": 18.1506, "step": 13758 }, { "epoch": 0.2515034639077266, "grad_norm": 6.520774360548156, "learning_rate": 8.767786597096895e-06, "loss": 17.4924, "step": 13759 }, { "epoch": 0.25152174310417313, "grad_norm": 5.587148555102545, "learning_rate": 8.767591997775922e-06, "loss": 16.8752, "step": 13760 }, { "epoch": 0.25154002230061967, "grad_norm": 6.552470227336554, "learning_rate": 8.767397385249865e-06, "loss": 17.7395, "step": 13761 }, { "epoch": 0.2515583014970662, "grad_norm": 8.321874718686061, "learning_rate": 8.767202759519409e-06, "loss": 17.7483, "step": 13762 }, { "epoch": 0.2515765806935127, "grad_norm": 6.215825841701845, "learning_rate": 8.767008120585233e-06, "loss": 17.5189, "step": 13763 }, { "epoch": 0.2515948598899592, "grad_norm": 6.550420398143107, "learning_rate": 8.76681346844802e-06, "loss": 17.7779, "step": 13764 }, { "epoch": 0.25161313908640576, "grad_norm": 6.570276372280605, "learning_rate": 8.766618803108454e-06, "loss": 17.6302, "step": 13765 }, { "epoch": 0.2516314182828523, "grad_norm": 6.98919670996425, "learning_rate": 8.766424124567215e-06, "loss": 17.8021, "step": 13766 }, { "epoch": 0.25164969747929883, "grad_norm": 7.385807087426103, "learning_rate": 8.766229432824986e-06, "loss": 18.305, "step": 13767 }, { "epoch": 0.2516679766757453, "grad_norm": 6.239690219179112, "learning_rate": 8.76603472788245e-06, "loss": 17.2774, "step": 13768 }, { "epoch": 0.25168625587219184, "grad_norm": 7.529944805267041, "learning_rate": 8.765840009740289e-06, "loss": 17.7005, "step": 13769 }, { "epoch": 0.2517045350686384, "grad_norm": 6.707427014138782, "learning_rate": 8.765645278399187e-06, "loss": 17.8625, "step": 13770 }, { "epoch": 0.2517228142650849, "grad_norm": 6.376799995019203, "learning_rate": 8.765450533859823e-06, "loss": 17.7751, "step": 13771 }, { "epoch": 0.25174109346153145, "grad_norm": 7.842346926988082, "learning_rate": 8.765255776122884e-06, "loss": 18.1796, "step": 13772 }, { "epoch": 0.25175937265797793, "grad_norm": 7.827714310710927, "learning_rate": 8.765061005189048e-06, "loss": 17.8566, "step": 13773 }, { "epoch": 0.25177765185442447, "grad_norm": 9.714632895819868, "learning_rate": 8.764866221059e-06, "loss": 18.6999, "step": 13774 }, { "epoch": 0.251795931050871, "grad_norm": 5.1415860911994615, "learning_rate": 8.764671423733424e-06, "loss": 17.1295, "step": 13775 }, { "epoch": 0.25181421024731754, "grad_norm": 5.465218869950854, "learning_rate": 8.764476613213e-06, "loss": 17.1065, "step": 13776 }, { "epoch": 0.2518324894437641, "grad_norm": 7.352397364400496, "learning_rate": 8.764281789498412e-06, "loss": 17.879, "step": 13777 }, { "epoch": 0.25185076864021055, "grad_norm": 5.749608396722773, "learning_rate": 8.764086952590345e-06, "loss": 17.3095, "step": 13778 }, { "epoch": 0.2518690478366571, "grad_norm": 5.911839463727114, "learning_rate": 8.763892102489478e-06, "loss": 17.1792, "step": 13779 }, { "epoch": 0.2518873270331036, "grad_norm": 6.544310704774565, "learning_rate": 8.763697239196496e-06, "loss": 17.6827, "step": 13780 }, { "epoch": 0.25190560622955016, "grad_norm": 7.4383699086318815, "learning_rate": 8.763502362712082e-06, "loss": 17.9255, "step": 13781 }, { "epoch": 0.2519238854259967, "grad_norm": 5.8094518104292385, "learning_rate": 8.763307473036919e-06, "loss": 17.2015, "step": 13782 }, { "epoch": 0.2519421646224432, "grad_norm": 4.97326767951542, "learning_rate": 8.76311257017169e-06, "loss": 16.8358, "step": 13783 }, { "epoch": 0.2519604438188897, "grad_norm": 6.373518140325814, "learning_rate": 8.762917654117077e-06, "loss": 17.5874, "step": 13784 }, { "epoch": 0.25197872301533625, "grad_norm": 6.702180106813105, "learning_rate": 8.762722724873766e-06, "loss": 17.7037, "step": 13785 }, { "epoch": 0.2519970022117828, "grad_norm": 5.64079520470863, "learning_rate": 8.762527782442436e-06, "loss": 17.2127, "step": 13786 }, { "epoch": 0.2520152814082293, "grad_norm": 5.75393418504257, "learning_rate": 8.762332826823774e-06, "loss": 17.3287, "step": 13787 }, { "epoch": 0.2520335606046758, "grad_norm": 8.09878369793486, "learning_rate": 8.762137858018463e-06, "loss": 18.1549, "step": 13788 }, { "epoch": 0.25205183980112233, "grad_norm": 6.6744313033977445, "learning_rate": 8.761942876027185e-06, "loss": 17.7911, "step": 13789 }, { "epoch": 0.25207011899756887, "grad_norm": 5.864234424938747, "learning_rate": 8.761747880850622e-06, "loss": 17.4366, "step": 13790 }, { "epoch": 0.2520883981940154, "grad_norm": 5.890294583815388, "learning_rate": 8.76155287248946e-06, "loss": 17.2478, "step": 13791 }, { "epoch": 0.25210667739046194, "grad_norm": 6.613126406826565, "learning_rate": 8.76135785094438e-06, "loss": 17.4939, "step": 13792 }, { "epoch": 0.2521249565869084, "grad_norm": 5.950499194438246, "learning_rate": 8.76116281621607e-06, "loss": 17.2808, "step": 13793 }, { "epoch": 0.25214323578335496, "grad_norm": 6.577486756184717, "learning_rate": 8.760967768305208e-06, "loss": 17.8002, "step": 13794 }, { "epoch": 0.2521615149798015, "grad_norm": 7.997368502367322, "learning_rate": 8.760772707212483e-06, "loss": 18.0038, "step": 13795 }, { "epoch": 0.25217979417624803, "grad_norm": 6.024074301066061, "learning_rate": 8.760577632938574e-06, "loss": 17.196, "step": 13796 }, { "epoch": 0.2521980733726945, "grad_norm": 6.751303858541155, "learning_rate": 8.760382545484167e-06, "loss": 17.7404, "step": 13797 }, { "epoch": 0.25221635256914104, "grad_norm": 6.5231244162970095, "learning_rate": 8.760187444849946e-06, "loss": 17.4269, "step": 13798 }, { "epoch": 0.2522346317655876, "grad_norm": 6.231742603340174, "learning_rate": 8.759992331036595e-06, "loss": 17.4389, "step": 13799 }, { "epoch": 0.2522529109620341, "grad_norm": 6.846407027722996, "learning_rate": 8.759797204044796e-06, "loss": 17.6918, "step": 13800 }, { "epoch": 0.25227119015848065, "grad_norm": 5.93279747930007, "learning_rate": 8.759602063875234e-06, "loss": 17.2997, "step": 13801 }, { "epoch": 0.25228946935492713, "grad_norm": 6.880431100924478, "learning_rate": 8.759406910528595e-06, "loss": 17.5852, "step": 13802 }, { "epoch": 0.25230774855137367, "grad_norm": 6.464710572953449, "learning_rate": 8.759211744005558e-06, "loss": 17.5171, "step": 13803 }, { "epoch": 0.2523260277478202, "grad_norm": 6.6056523743240145, "learning_rate": 8.759016564306813e-06, "loss": 17.8632, "step": 13804 }, { "epoch": 0.25234430694426674, "grad_norm": 7.711844446399845, "learning_rate": 8.758821371433038e-06, "loss": 18.2702, "step": 13805 }, { "epoch": 0.2523625861407133, "grad_norm": 8.361546716754969, "learning_rate": 8.758626165384922e-06, "loss": 17.6389, "step": 13806 }, { "epoch": 0.25238086533715975, "grad_norm": 5.348057047525714, "learning_rate": 8.758430946163147e-06, "loss": 17.2781, "step": 13807 }, { "epoch": 0.2523991445336063, "grad_norm": 5.907196701423856, "learning_rate": 8.7582357137684e-06, "loss": 17.2221, "step": 13808 }, { "epoch": 0.2524174237300528, "grad_norm": 5.698606558156475, "learning_rate": 8.75804046820136e-06, "loss": 17.2344, "step": 13809 }, { "epoch": 0.25243570292649936, "grad_norm": 7.619353413647588, "learning_rate": 8.757845209462714e-06, "loss": 18.3026, "step": 13810 }, { "epoch": 0.2524539821229459, "grad_norm": 6.521224886232347, "learning_rate": 8.757649937553149e-06, "loss": 17.608, "step": 13811 }, { "epoch": 0.2524722613193924, "grad_norm": 7.291541060939861, "learning_rate": 8.757454652473345e-06, "loss": 17.6405, "step": 13812 }, { "epoch": 0.2524905405158389, "grad_norm": 10.104211927869024, "learning_rate": 8.75725935422399e-06, "loss": 17.9236, "step": 13813 }, { "epoch": 0.25250881971228545, "grad_norm": 5.76059508199635, "learning_rate": 8.757064042805767e-06, "loss": 17.3052, "step": 13814 }, { "epoch": 0.252527098908732, "grad_norm": 6.423065067097856, "learning_rate": 8.75686871821936e-06, "loss": 17.5747, "step": 13815 }, { "epoch": 0.2525453781051785, "grad_norm": 6.507071909941204, "learning_rate": 8.756673380465453e-06, "loss": 17.6979, "step": 13816 }, { "epoch": 0.252563657301625, "grad_norm": 5.826902914321585, "learning_rate": 8.756478029544733e-06, "loss": 17.3318, "step": 13817 }, { "epoch": 0.25258193649807154, "grad_norm": 5.9334082046521965, "learning_rate": 8.756282665457884e-06, "loss": 17.2388, "step": 13818 }, { "epoch": 0.25260021569451807, "grad_norm": 6.744000989321716, "learning_rate": 8.756087288205588e-06, "loss": 17.7638, "step": 13819 }, { "epoch": 0.2526184948909646, "grad_norm": 5.564590959550204, "learning_rate": 8.755891897788534e-06, "loss": 17.0945, "step": 13820 }, { "epoch": 0.25263677408741114, "grad_norm": 5.291913492772538, "learning_rate": 8.755696494207405e-06, "loss": 17.153, "step": 13821 }, { "epoch": 0.2526550532838576, "grad_norm": 6.384061545444824, "learning_rate": 8.755501077462885e-06, "loss": 17.267, "step": 13822 }, { "epoch": 0.25267333248030416, "grad_norm": 6.841622797234484, "learning_rate": 8.75530564755566e-06, "loss": 17.4829, "step": 13823 }, { "epoch": 0.2526916116767507, "grad_norm": 6.412165616148324, "learning_rate": 8.755110204486414e-06, "loss": 17.7288, "step": 13824 }, { "epoch": 0.25270989087319723, "grad_norm": 8.189640179660579, "learning_rate": 8.754914748255832e-06, "loss": 17.7499, "step": 13825 }, { "epoch": 0.25272817006964376, "grad_norm": 6.376840522198051, "learning_rate": 8.754719278864601e-06, "loss": 17.3897, "step": 13826 }, { "epoch": 0.25274644926609025, "grad_norm": 5.0816212245536345, "learning_rate": 8.754523796313404e-06, "loss": 16.8883, "step": 13827 }, { "epoch": 0.2527647284625368, "grad_norm": 10.482001907576482, "learning_rate": 8.754328300602928e-06, "loss": 18.3032, "step": 13828 }, { "epoch": 0.2527830076589833, "grad_norm": 6.792427911599967, "learning_rate": 8.754132791733856e-06, "loss": 17.7622, "step": 13829 }, { "epoch": 0.25280128685542985, "grad_norm": 6.72865970828854, "learning_rate": 8.753937269706873e-06, "loss": 17.924, "step": 13830 }, { "epoch": 0.25281956605187633, "grad_norm": 6.264580177397738, "learning_rate": 8.753741734522668e-06, "loss": 17.551, "step": 13831 }, { "epoch": 0.25283784524832287, "grad_norm": 7.223483013362514, "learning_rate": 8.753546186181924e-06, "loss": 17.7338, "step": 13832 }, { "epoch": 0.2528561244447694, "grad_norm": 5.476514762019108, "learning_rate": 8.753350624685325e-06, "loss": 17.0362, "step": 13833 }, { "epoch": 0.25287440364121594, "grad_norm": 6.736717364355989, "learning_rate": 8.753155050033558e-06, "loss": 17.6898, "step": 13834 }, { "epoch": 0.2528926828376625, "grad_norm": 6.826553700256823, "learning_rate": 8.752959462227308e-06, "loss": 17.6636, "step": 13835 }, { "epoch": 0.25291096203410895, "grad_norm": 6.110653589759968, "learning_rate": 8.752763861267262e-06, "loss": 17.4454, "step": 13836 }, { "epoch": 0.2529292412305555, "grad_norm": 8.146771509318832, "learning_rate": 8.752568247154103e-06, "loss": 18.3319, "step": 13837 }, { "epoch": 0.252947520427002, "grad_norm": 6.042919896918304, "learning_rate": 8.752372619888519e-06, "loss": 17.3727, "step": 13838 }, { "epoch": 0.25296579962344856, "grad_norm": 5.608386316499777, "learning_rate": 8.752176979471194e-06, "loss": 17.0876, "step": 13839 }, { "epoch": 0.2529840788198951, "grad_norm": 6.371974087607878, "learning_rate": 8.751981325902814e-06, "loss": 17.6724, "step": 13840 }, { "epoch": 0.2530023580163416, "grad_norm": 7.217163091063996, "learning_rate": 8.751785659184066e-06, "loss": 17.7458, "step": 13841 }, { "epoch": 0.2530206372127881, "grad_norm": 8.851814579077875, "learning_rate": 8.751589979315634e-06, "loss": 18.2688, "step": 13842 }, { "epoch": 0.25303891640923465, "grad_norm": 7.482599558175977, "learning_rate": 8.751394286298204e-06, "loss": 17.707, "step": 13843 }, { "epoch": 0.2530571956056812, "grad_norm": 6.040627743486871, "learning_rate": 8.751198580132464e-06, "loss": 17.5163, "step": 13844 }, { "epoch": 0.2530754748021277, "grad_norm": 8.717437003205008, "learning_rate": 8.751002860819098e-06, "loss": 18.3448, "step": 13845 }, { "epoch": 0.2530937539985742, "grad_norm": 5.543502707869062, "learning_rate": 8.750807128358792e-06, "loss": 17.028, "step": 13846 }, { "epoch": 0.25311203319502074, "grad_norm": 5.832826377516405, "learning_rate": 8.750611382752233e-06, "loss": 17.4799, "step": 13847 }, { "epoch": 0.25313031239146727, "grad_norm": 6.421644335761602, "learning_rate": 8.750415624000105e-06, "loss": 17.5648, "step": 13848 }, { "epoch": 0.2531485915879138, "grad_norm": 7.0638634413962205, "learning_rate": 8.750219852103098e-06, "loss": 17.6503, "step": 13849 }, { "epoch": 0.25316687078436034, "grad_norm": 6.496441590330903, "learning_rate": 8.750024067061895e-06, "loss": 17.3566, "step": 13850 }, { "epoch": 0.2531851499808068, "grad_norm": 7.657369861136381, "learning_rate": 8.749828268877182e-06, "loss": 18.2053, "step": 13851 }, { "epoch": 0.25320342917725336, "grad_norm": 5.9427225811770645, "learning_rate": 8.74963245754965e-06, "loss": 17.3519, "step": 13852 }, { "epoch": 0.2532217083736999, "grad_norm": 8.107318124705973, "learning_rate": 8.749436633079977e-06, "loss": 17.6699, "step": 13853 }, { "epoch": 0.25323998757014643, "grad_norm": 6.6438036051962825, "learning_rate": 8.749240795468856e-06, "loss": 17.5672, "step": 13854 }, { "epoch": 0.25325826676659297, "grad_norm": 6.85004338489899, "learning_rate": 8.749044944716972e-06, "loss": 17.7759, "step": 13855 }, { "epoch": 0.25327654596303945, "grad_norm": 5.702389014906411, "learning_rate": 8.748849080825011e-06, "loss": 17.2129, "step": 13856 }, { "epoch": 0.253294825159486, "grad_norm": 5.6610331545114, "learning_rate": 8.748653203793658e-06, "loss": 17.0836, "step": 13857 }, { "epoch": 0.2533131043559325, "grad_norm": 7.884955625477877, "learning_rate": 8.7484573136236e-06, "loss": 18.0809, "step": 13858 }, { "epoch": 0.25333138355237905, "grad_norm": 6.298036069861454, "learning_rate": 8.748261410315527e-06, "loss": 17.5352, "step": 13859 }, { "epoch": 0.2533496627488256, "grad_norm": 5.899677885519186, "learning_rate": 8.748065493870122e-06, "loss": 17.3049, "step": 13860 }, { "epoch": 0.25336794194527207, "grad_norm": 7.452406224920589, "learning_rate": 8.747869564288072e-06, "loss": 18.1885, "step": 13861 }, { "epoch": 0.2533862211417186, "grad_norm": 6.616031007667161, "learning_rate": 8.747673621570063e-06, "loss": 17.498, "step": 13862 }, { "epoch": 0.25340450033816514, "grad_norm": 6.190275743116507, "learning_rate": 8.747477665716786e-06, "loss": 17.239, "step": 13863 }, { "epoch": 0.2534227795346117, "grad_norm": 6.69834684601848, "learning_rate": 8.747281696728922e-06, "loss": 17.6468, "step": 13864 }, { "epoch": 0.25344105873105816, "grad_norm": 6.465176767968881, "learning_rate": 8.747085714607164e-06, "loss": 17.3516, "step": 13865 }, { "epoch": 0.2534593379275047, "grad_norm": 7.138906865721986, "learning_rate": 8.746889719352194e-06, "loss": 18.082, "step": 13866 }, { "epoch": 0.2534776171239512, "grad_norm": 7.035732221360487, "learning_rate": 8.746693710964702e-06, "loss": 17.628, "step": 13867 }, { "epoch": 0.25349589632039776, "grad_norm": 6.487640928438261, "learning_rate": 8.746497689445373e-06, "loss": 17.6296, "step": 13868 }, { "epoch": 0.2535141755168443, "grad_norm": 6.642219085040461, "learning_rate": 8.746301654794894e-06, "loss": 17.533, "step": 13869 }, { "epoch": 0.2535324547132908, "grad_norm": 6.657256827746655, "learning_rate": 8.746105607013952e-06, "loss": 17.4731, "step": 13870 }, { "epoch": 0.2535507339097373, "grad_norm": 8.098118528783477, "learning_rate": 8.745909546103237e-06, "loss": 17.325, "step": 13871 }, { "epoch": 0.25356901310618385, "grad_norm": 6.5370206159418425, "learning_rate": 8.745713472063432e-06, "loss": 17.4677, "step": 13872 }, { "epoch": 0.2535872923026304, "grad_norm": 5.555840875869279, "learning_rate": 8.745517384895228e-06, "loss": 17.1296, "step": 13873 }, { "epoch": 0.2536055714990769, "grad_norm": 5.61644510958449, "learning_rate": 8.745321284599311e-06, "loss": 17.238, "step": 13874 }, { "epoch": 0.2536238506955234, "grad_norm": 7.451073328635847, "learning_rate": 8.745125171176367e-06, "loss": 17.946, "step": 13875 }, { "epoch": 0.25364212989196994, "grad_norm": 6.068830688793433, "learning_rate": 8.744929044627084e-06, "loss": 17.3497, "step": 13876 }, { "epoch": 0.25366040908841647, "grad_norm": 6.33350173519903, "learning_rate": 8.74473290495215e-06, "loss": 17.3324, "step": 13877 }, { "epoch": 0.253678688284863, "grad_norm": 6.278681972251112, "learning_rate": 8.744536752152251e-06, "loss": 17.4805, "step": 13878 }, { "epoch": 0.25369696748130954, "grad_norm": 6.739404763073544, "learning_rate": 8.744340586228077e-06, "loss": 17.6275, "step": 13879 }, { "epoch": 0.253715246677756, "grad_norm": 5.301174165885163, "learning_rate": 8.744144407180315e-06, "loss": 16.9004, "step": 13880 }, { "epoch": 0.25373352587420256, "grad_norm": 5.948201083202224, "learning_rate": 8.74394821500965e-06, "loss": 17.1906, "step": 13881 }, { "epoch": 0.2537518050706491, "grad_norm": 6.435881926797897, "learning_rate": 8.743752009716772e-06, "loss": 17.597, "step": 13882 }, { "epoch": 0.25377008426709563, "grad_norm": 5.458443195542765, "learning_rate": 8.743555791302368e-06, "loss": 17.2105, "step": 13883 }, { "epoch": 0.25378836346354217, "grad_norm": 6.027649550947092, "learning_rate": 8.743359559767127e-06, "loss": 17.2979, "step": 13884 }, { "epoch": 0.25380664265998865, "grad_norm": 6.813964020203235, "learning_rate": 8.743163315111733e-06, "loss": 17.7843, "step": 13885 }, { "epoch": 0.2538249218564352, "grad_norm": 7.348223519766389, "learning_rate": 8.742967057336877e-06, "loss": 17.8382, "step": 13886 }, { "epoch": 0.2538432010528817, "grad_norm": 5.694232252564015, "learning_rate": 8.742770786443249e-06, "loss": 17.0623, "step": 13887 }, { "epoch": 0.25386148024932825, "grad_norm": 6.130295450701716, "learning_rate": 8.742574502431532e-06, "loss": 17.6467, "step": 13888 }, { "epoch": 0.2538797594457748, "grad_norm": 7.682631583749733, "learning_rate": 8.742378205302415e-06, "loss": 17.8717, "step": 13889 }, { "epoch": 0.25389803864222127, "grad_norm": 9.483293760520688, "learning_rate": 8.74218189505659e-06, "loss": 18.3314, "step": 13890 }, { "epoch": 0.2539163178386678, "grad_norm": 6.698039296153758, "learning_rate": 8.74198557169474e-06, "loss": 17.7914, "step": 13891 }, { "epoch": 0.25393459703511434, "grad_norm": 6.219013252527224, "learning_rate": 8.741789235217558e-06, "loss": 17.3265, "step": 13892 }, { "epoch": 0.2539528762315609, "grad_norm": 7.93899289527623, "learning_rate": 8.741592885625724e-06, "loss": 18.2619, "step": 13893 }, { "epoch": 0.2539711554280074, "grad_norm": 7.431414942399366, "learning_rate": 8.741396522919937e-06, "loss": 17.8052, "step": 13894 }, { "epoch": 0.2539894346244539, "grad_norm": 6.055708376786302, "learning_rate": 8.741200147100877e-06, "loss": 17.224, "step": 13895 }, { "epoch": 0.2540077138209004, "grad_norm": 7.543950459697201, "learning_rate": 8.741003758169236e-06, "loss": 17.7052, "step": 13896 }, { "epoch": 0.25402599301734696, "grad_norm": 6.290080852767708, "learning_rate": 8.740807356125702e-06, "loss": 17.3903, "step": 13897 }, { "epoch": 0.2540442722137935, "grad_norm": 6.140791835594011, "learning_rate": 8.740610940970962e-06, "loss": 17.4723, "step": 13898 }, { "epoch": 0.25406255141024, "grad_norm": 5.786914095509982, "learning_rate": 8.740414512705706e-06, "loss": 17.2791, "step": 13899 }, { "epoch": 0.2540808306066865, "grad_norm": 6.452620799877661, "learning_rate": 8.740218071330622e-06, "loss": 17.7083, "step": 13900 }, { "epoch": 0.25409910980313305, "grad_norm": 7.575533529925206, "learning_rate": 8.740021616846397e-06, "loss": 18.1212, "step": 13901 }, { "epoch": 0.2541173889995796, "grad_norm": 7.746599654235914, "learning_rate": 8.739825149253721e-06, "loss": 18.0708, "step": 13902 }, { "epoch": 0.2541356681960261, "grad_norm": 6.4143169159359, "learning_rate": 8.739628668553283e-06, "loss": 17.7577, "step": 13903 }, { "epoch": 0.2541539473924726, "grad_norm": 7.181830896596694, "learning_rate": 8.73943217474577e-06, "loss": 18.2878, "step": 13904 }, { "epoch": 0.25417222658891914, "grad_norm": 6.212225736628983, "learning_rate": 8.739235667831874e-06, "loss": 17.5087, "step": 13905 }, { "epoch": 0.2541905057853657, "grad_norm": 7.833271790484041, "learning_rate": 8.739039147812278e-06, "loss": 18.1298, "step": 13906 }, { "epoch": 0.2542087849818122, "grad_norm": 7.227486585626569, "learning_rate": 8.738842614687676e-06, "loss": 17.6637, "step": 13907 }, { "epoch": 0.25422706417825874, "grad_norm": 7.374676572810546, "learning_rate": 8.738646068458757e-06, "loss": 17.8617, "step": 13908 }, { "epoch": 0.2542453433747052, "grad_norm": 6.5000955280762245, "learning_rate": 8.738449509126205e-06, "loss": 17.7477, "step": 13909 }, { "epoch": 0.25426362257115176, "grad_norm": 6.593971964695243, "learning_rate": 8.738252936690713e-06, "loss": 17.4493, "step": 13910 }, { "epoch": 0.2542819017675983, "grad_norm": 7.818910279079011, "learning_rate": 8.73805635115297e-06, "loss": 18.1238, "step": 13911 }, { "epoch": 0.25430018096404483, "grad_norm": 7.10818976180064, "learning_rate": 8.737859752513661e-06, "loss": 17.6805, "step": 13912 }, { "epoch": 0.25431846016049137, "grad_norm": 6.605018965616754, "learning_rate": 8.73766314077348e-06, "loss": 17.3774, "step": 13913 }, { "epoch": 0.25433673935693785, "grad_norm": 6.737453418711159, "learning_rate": 8.737466515933116e-06, "loss": 17.7287, "step": 13914 }, { "epoch": 0.2543550185533844, "grad_norm": 6.726761825052466, "learning_rate": 8.737269877993254e-06, "loss": 17.3611, "step": 13915 }, { "epoch": 0.2543732977498309, "grad_norm": 6.686468349938027, "learning_rate": 8.737073226954585e-06, "loss": 17.5626, "step": 13916 }, { "epoch": 0.25439157694627745, "grad_norm": 9.721580061837525, "learning_rate": 8.736876562817798e-06, "loss": 18.8001, "step": 13917 }, { "epoch": 0.254409856142724, "grad_norm": 7.320850100564549, "learning_rate": 8.736679885583583e-06, "loss": 18.041, "step": 13918 }, { "epoch": 0.25442813533917047, "grad_norm": 7.4543355840587475, "learning_rate": 8.73648319525263e-06, "loss": 17.9807, "step": 13919 }, { "epoch": 0.254446414535617, "grad_norm": 7.569963979882603, "learning_rate": 8.736286491825627e-06, "loss": 17.8133, "step": 13920 }, { "epoch": 0.25446469373206354, "grad_norm": 6.071590080065675, "learning_rate": 8.736089775303266e-06, "loss": 17.4813, "step": 13921 }, { "epoch": 0.2544829729285101, "grad_norm": 6.161994516871227, "learning_rate": 8.735893045686233e-06, "loss": 17.3722, "step": 13922 }, { "epoch": 0.2545012521249566, "grad_norm": 7.862371069244069, "learning_rate": 8.735696302975219e-06, "loss": 18.0639, "step": 13923 }, { "epoch": 0.2545195313214031, "grad_norm": 6.276884236657523, "learning_rate": 8.735499547170914e-06, "loss": 17.3353, "step": 13924 }, { "epoch": 0.2545378105178496, "grad_norm": 5.967898857485332, "learning_rate": 8.735302778274009e-06, "loss": 17.3818, "step": 13925 }, { "epoch": 0.25455608971429616, "grad_norm": 7.193800609691263, "learning_rate": 8.73510599628519e-06, "loss": 18.0089, "step": 13926 }, { "epoch": 0.2545743689107427, "grad_norm": 6.54437382860853, "learning_rate": 8.734909201205148e-06, "loss": 17.6759, "step": 13927 }, { "epoch": 0.25459264810718923, "grad_norm": 5.981309927884284, "learning_rate": 8.734712393034574e-06, "loss": 17.073, "step": 13928 }, { "epoch": 0.2546109273036357, "grad_norm": 6.252306285214709, "learning_rate": 8.734515571774157e-06, "loss": 17.2187, "step": 13929 }, { "epoch": 0.25462920650008225, "grad_norm": 6.1530465363391915, "learning_rate": 8.734318737424588e-06, "loss": 17.4144, "step": 13930 }, { "epoch": 0.2546474856965288, "grad_norm": 7.53964874121549, "learning_rate": 8.734121889986555e-06, "loss": 17.6794, "step": 13931 }, { "epoch": 0.2546657648929753, "grad_norm": 6.32611595168549, "learning_rate": 8.733925029460747e-06, "loss": 17.4249, "step": 13932 }, { "epoch": 0.2546840440894218, "grad_norm": 6.687145282129531, "learning_rate": 8.733728155847858e-06, "loss": 17.9875, "step": 13933 }, { "epoch": 0.25470232328586834, "grad_norm": 6.882934764697469, "learning_rate": 8.733531269148576e-06, "loss": 17.7335, "step": 13934 }, { "epoch": 0.2547206024823149, "grad_norm": 7.678694872833678, "learning_rate": 8.73333436936359e-06, "loss": 18.0639, "step": 13935 }, { "epoch": 0.2547388816787614, "grad_norm": 6.626212475299726, "learning_rate": 8.733137456493593e-06, "loss": 17.6482, "step": 13936 }, { "epoch": 0.25475716087520794, "grad_norm": 7.1339521180408445, "learning_rate": 8.732940530539271e-06, "loss": 17.8985, "step": 13937 }, { "epoch": 0.2547754400716544, "grad_norm": 6.607697252050391, "learning_rate": 8.732743591501316e-06, "loss": 17.5549, "step": 13938 }, { "epoch": 0.25479371926810096, "grad_norm": 6.155478463975609, "learning_rate": 8.732546639380419e-06, "loss": 17.4897, "step": 13939 }, { "epoch": 0.2548119984645475, "grad_norm": 7.410949350228957, "learning_rate": 8.732349674177272e-06, "loss": 17.678, "step": 13940 }, { "epoch": 0.25483027766099403, "grad_norm": 5.955900673297592, "learning_rate": 8.732152695892562e-06, "loss": 17.3842, "step": 13941 }, { "epoch": 0.25484855685744057, "grad_norm": 7.429077063390365, "learning_rate": 8.73195570452698e-06, "loss": 17.7786, "step": 13942 }, { "epoch": 0.25486683605388705, "grad_norm": 6.964177608064067, "learning_rate": 8.731758700081217e-06, "loss": 17.8819, "step": 13943 }, { "epoch": 0.2548851152503336, "grad_norm": 7.494081046872606, "learning_rate": 8.731561682555965e-06, "loss": 18.024, "step": 13944 }, { "epoch": 0.2549033944467801, "grad_norm": 7.457489692667915, "learning_rate": 8.73136465195191e-06, "loss": 17.6645, "step": 13945 }, { "epoch": 0.25492167364322665, "grad_norm": 7.575199443544168, "learning_rate": 8.73116760826975e-06, "loss": 17.6916, "step": 13946 }, { "epoch": 0.2549399528396732, "grad_norm": 6.211321489064392, "learning_rate": 8.73097055151017e-06, "loss": 17.3002, "step": 13947 }, { "epoch": 0.25495823203611967, "grad_norm": 6.231393405979343, "learning_rate": 8.73077348167386e-06, "loss": 17.3247, "step": 13948 }, { "epoch": 0.2549765112325662, "grad_norm": 7.8589940161868475, "learning_rate": 8.730576398761514e-06, "loss": 17.8705, "step": 13949 }, { "epoch": 0.25499479042901274, "grad_norm": 6.092213046044782, "learning_rate": 8.730379302773822e-06, "loss": 17.2994, "step": 13950 }, { "epoch": 0.2550130696254593, "grad_norm": 6.144376781693334, "learning_rate": 8.730182193711472e-06, "loss": 17.2841, "step": 13951 }, { "epoch": 0.2550313488219058, "grad_norm": 6.309420077332737, "learning_rate": 8.729985071575158e-06, "loss": 17.5078, "step": 13952 }, { "epoch": 0.2550496280183523, "grad_norm": 6.982088765553154, "learning_rate": 8.729787936365572e-06, "loss": 17.7818, "step": 13953 }, { "epoch": 0.25506790721479883, "grad_norm": 7.710406989976027, "learning_rate": 8.729590788083403e-06, "loss": 17.8721, "step": 13954 }, { "epoch": 0.25508618641124536, "grad_norm": 6.559110564146433, "learning_rate": 8.72939362672934e-06, "loss": 17.9672, "step": 13955 }, { "epoch": 0.2551044656076919, "grad_norm": 6.982277674262234, "learning_rate": 8.729196452304076e-06, "loss": 17.608, "step": 13956 }, { "epoch": 0.25512274480413843, "grad_norm": 7.703209985365643, "learning_rate": 8.728999264808303e-06, "loss": 18.0226, "step": 13957 }, { "epoch": 0.2551410240005849, "grad_norm": 7.204138122688372, "learning_rate": 8.72880206424271e-06, "loss": 17.6252, "step": 13958 }, { "epoch": 0.25515930319703145, "grad_norm": 7.232827552461167, "learning_rate": 8.72860485060799e-06, "loss": 17.8974, "step": 13959 }, { "epoch": 0.255177582393478, "grad_norm": 6.758013803673432, "learning_rate": 8.728407623904833e-06, "loss": 17.7697, "step": 13960 }, { "epoch": 0.2551958615899245, "grad_norm": 7.997805217356011, "learning_rate": 8.728210384133932e-06, "loss": 18.4118, "step": 13961 }, { "epoch": 0.25521414078637106, "grad_norm": 6.200605591400633, "learning_rate": 8.728013131295976e-06, "loss": 17.4238, "step": 13962 }, { "epoch": 0.25523241998281754, "grad_norm": 6.175082033002962, "learning_rate": 8.727815865391657e-06, "loss": 17.2796, "step": 13963 }, { "epoch": 0.2552506991792641, "grad_norm": 6.141657054367813, "learning_rate": 8.727618586421669e-06, "loss": 17.1876, "step": 13964 }, { "epoch": 0.2552689783757106, "grad_norm": 6.099863422243416, "learning_rate": 8.7274212943867e-06, "loss": 17.4914, "step": 13965 }, { "epoch": 0.25528725757215714, "grad_norm": 5.8144270085021486, "learning_rate": 8.727223989287443e-06, "loss": 17.0464, "step": 13966 }, { "epoch": 0.2553055367686036, "grad_norm": 8.306539054197764, "learning_rate": 8.72702667112459e-06, "loss": 18.2899, "step": 13967 }, { "epoch": 0.25532381596505016, "grad_norm": 5.482667624083453, "learning_rate": 8.72682933989883e-06, "loss": 17.1317, "step": 13968 }, { "epoch": 0.2553420951614967, "grad_norm": 5.693117008850541, "learning_rate": 8.72663199561086e-06, "loss": 17.2629, "step": 13969 }, { "epoch": 0.25536037435794323, "grad_norm": 6.306026003840627, "learning_rate": 8.726434638261365e-06, "loss": 17.4804, "step": 13970 }, { "epoch": 0.25537865355438977, "grad_norm": 6.4212860185533716, "learning_rate": 8.726237267851041e-06, "loss": 17.4157, "step": 13971 }, { "epoch": 0.25539693275083625, "grad_norm": 6.958322437037328, "learning_rate": 8.726039884380579e-06, "loss": 17.7611, "step": 13972 }, { "epoch": 0.2554152119472828, "grad_norm": 5.555300668995391, "learning_rate": 8.72584248785067e-06, "loss": 17.1776, "step": 13973 }, { "epoch": 0.2554334911437293, "grad_norm": 6.687111880084589, "learning_rate": 8.725645078262007e-06, "loss": 17.6282, "step": 13974 }, { "epoch": 0.25545177034017585, "grad_norm": 6.638536443221294, "learning_rate": 8.72544765561528e-06, "loss": 17.5349, "step": 13975 }, { "epoch": 0.2554700495366224, "grad_norm": 5.914273252875999, "learning_rate": 8.725250219911184e-06, "loss": 17.1228, "step": 13976 }, { "epoch": 0.25548832873306887, "grad_norm": 6.425812346335787, "learning_rate": 8.725052771150409e-06, "loss": 17.4639, "step": 13977 }, { "epoch": 0.2555066079295154, "grad_norm": 5.8223807022443985, "learning_rate": 8.724855309333646e-06, "loss": 16.9239, "step": 13978 }, { "epoch": 0.25552488712596194, "grad_norm": 6.484578700575855, "learning_rate": 8.72465783446159e-06, "loss": 17.4878, "step": 13979 }, { "epoch": 0.2555431663224085, "grad_norm": 6.083479783688803, "learning_rate": 8.72446034653493e-06, "loss": 17.4781, "step": 13980 }, { "epoch": 0.255561445518855, "grad_norm": 8.393779815892302, "learning_rate": 8.72426284555436e-06, "loss": 18.4201, "step": 13981 }, { "epoch": 0.2555797247153015, "grad_norm": 7.917411502202908, "learning_rate": 8.724065331520572e-06, "loss": 18.1957, "step": 13982 }, { "epoch": 0.25559800391174803, "grad_norm": 6.664139696039288, "learning_rate": 8.723867804434259e-06, "loss": 17.3455, "step": 13983 }, { "epoch": 0.25561628310819456, "grad_norm": 6.308596894214555, "learning_rate": 8.723670264296111e-06, "loss": 17.3711, "step": 13984 }, { "epoch": 0.2556345623046411, "grad_norm": 8.337303492069122, "learning_rate": 8.723472711106825e-06, "loss": 18.7995, "step": 13985 }, { "epoch": 0.25565284150108764, "grad_norm": 7.278473652304813, "learning_rate": 8.723275144867086e-06, "loss": 18.0212, "step": 13986 }, { "epoch": 0.2556711206975341, "grad_norm": 5.6860856800645445, "learning_rate": 8.723077565577594e-06, "loss": 17.3226, "step": 13987 }, { "epoch": 0.25568939989398065, "grad_norm": 6.1040925281351885, "learning_rate": 8.722879973239035e-06, "loss": 17.3142, "step": 13988 }, { "epoch": 0.2557076790904272, "grad_norm": 7.849493113221841, "learning_rate": 8.722682367852107e-06, "loss": 17.8536, "step": 13989 }, { "epoch": 0.2557259582868737, "grad_norm": 7.419112982774977, "learning_rate": 8.722484749417502e-06, "loss": 18.091, "step": 13990 }, { "epoch": 0.25574423748332026, "grad_norm": 6.844159293020602, "learning_rate": 8.722287117935908e-06, "loss": 17.6159, "step": 13991 }, { "epoch": 0.25576251667976674, "grad_norm": 11.027302576659178, "learning_rate": 8.722089473408023e-06, "loss": 18.0748, "step": 13992 }, { "epoch": 0.2557807958762133, "grad_norm": 6.231287286717342, "learning_rate": 8.721891815834534e-06, "loss": 17.5187, "step": 13993 }, { "epoch": 0.2557990750726598, "grad_norm": 5.926319474170429, "learning_rate": 8.72169414521614e-06, "loss": 17.2324, "step": 13994 }, { "epoch": 0.25581735426910635, "grad_norm": 9.830976195430448, "learning_rate": 8.721496461553528e-06, "loss": 18.9214, "step": 13995 }, { "epoch": 0.2558356334655529, "grad_norm": 5.332807377255744, "learning_rate": 8.721298764847397e-06, "loss": 16.7563, "step": 13996 }, { "epoch": 0.25585391266199936, "grad_norm": 5.556155595301225, "learning_rate": 8.721101055098436e-06, "loss": 17.1854, "step": 13997 }, { "epoch": 0.2558721918584459, "grad_norm": 6.470278581951466, "learning_rate": 8.720903332307339e-06, "loss": 17.4058, "step": 13998 }, { "epoch": 0.25589047105489243, "grad_norm": 5.6971359483934965, "learning_rate": 8.720705596474797e-06, "loss": 17.1146, "step": 13999 }, { "epoch": 0.25590875025133897, "grad_norm": 7.508495639754144, "learning_rate": 8.720507847601508e-06, "loss": 17.8727, "step": 14000 }, { "epoch": 0.25592702944778545, "grad_norm": 6.073201872739274, "learning_rate": 8.720310085688158e-06, "loss": 17.1639, "step": 14001 }, { "epoch": 0.255945308644232, "grad_norm": 6.219751981885219, "learning_rate": 8.720112310735445e-06, "loss": 17.5806, "step": 14002 }, { "epoch": 0.2559635878406785, "grad_norm": 6.3722758522487135, "learning_rate": 8.719914522744063e-06, "loss": 17.4845, "step": 14003 }, { "epoch": 0.25598186703712505, "grad_norm": 6.430087771797434, "learning_rate": 8.719716721714702e-06, "loss": 17.2951, "step": 14004 }, { "epoch": 0.2560001462335716, "grad_norm": 5.954364206203981, "learning_rate": 8.719518907648057e-06, "loss": 17.0703, "step": 14005 }, { "epoch": 0.25601842543001807, "grad_norm": 7.3816256220639405, "learning_rate": 8.71932108054482e-06, "loss": 17.8422, "step": 14006 }, { "epoch": 0.2560367046264646, "grad_norm": 5.585556965955438, "learning_rate": 8.719123240405686e-06, "loss": 17.3696, "step": 14007 }, { "epoch": 0.25605498382291114, "grad_norm": 6.562984981021811, "learning_rate": 8.71892538723135e-06, "loss": 17.5934, "step": 14008 }, { "epoch": 0.2560732630193577, "grad_norm": 5.994157750088668, "learning_rate": 8.7187275210225e-06, "loss": 17.5556, "step": 14009 }, { "epoch": 0.2560915422158042, "grad_norm": 7.149441666470342, "learning_rate": 8.718529641779834e-06, "loss": 17.8358, "step": 14010 }, { "epoch": 0.2561098214122507, "grad_norm": 10.585428630174878, "learning_rate": 8.718331749504045e-06, "loss": 18.3699, "step": 14011 }, { "epoch": 0.25612810060869723, "grad_norm": 5.729931093024647, "learning_rate": 8.718133844195825e-06, "loss": 17.2859, "step": 14012 }, { "epoch": 0.25614637980514376, "grad_norm": 8.041743853428542, "learning_rate": 8.717935925855869e-06, "loss": 17.9278, "step": 14013 }, { "epoch": 0.2561646590015903, "grad_norm": 6.644423763100476, "learning_rate": 8.717737994484869e-06, "loss": 17.8461, "step": 14014 }, { "epoch": 0.25618293819803684, "grad_norm": 7.099340966536888, "learning_rate": 8.717540050083522e-06, "loss": 18.0045, "step": 14015 }, { "epoch": 0.2562012173944833, "grad_norm": 6.345347347165285, "learning_rate": 8.717342092652518e-06, "loss": 17.5001, "step": 14016 }, { "epoch": 0.25621949659092985, "grad_norm": 6.670132674655566, "learning_rate": 8.717144122192553e-06, "loss": 17.6985, "step": 14017 }, { "epoch": 0.2562377757873764, "grad_norm": 7.582148281771751, "learning_rate": 8.71694613870432e-06, "loss": 18.2426, "step": 14018 }, { "epoch": 0.2562560549838229, "grad_norm": 7.761620215830936, "learning_rate": 8.716748142188514e-06, "loss": 17.9051, "step": 14019 }, { "epoch": 0.25627433418026946, "grad_norm": 6.94165788412036, "learning_rate": 8.71655013264583e-06, "loss": 17.7852, "step": 14020 }, { "epoch": 0.25629261337671594, "grad_norm": 6.112493460876916, "learning_rate": 8.716352110076958e-06, "loss": 17.0819, "step": 14021 }, { "epoch": 0.2563108925731625, "grad_norm": 6.820209225329127, "learning_rate": 8.716154074482594e-06, "loss": 17.7083, "step": 14022 }, { "epoch": 0.256329171769609, "grad_norm": 6.890240706035527, "learning_rate": 8.715956025863433e-06, "loss": 17.4598, "step": 14023 }, { "epoch": 0.25634745096605555, "grad_norm": 5.865000848072777, "learning_rate": 8.71575796422017e-06, "loss": 17.1963, "step": 14024 }, { "epoch": 0.2563657301625021, "grad_norm": 5.9749062755893485, "learning_rate": 8.715559889553496e-06, "loss": 17.4812, "step": 14025 }, { "epoch": 0.25638400935894856, "grad_norm": 6.635796194940061, "learning_rate": 8.715361801864107e-06, "loss": 17.6757, "step": 14026 }, { "epoch": 0.2564022885553951, "grad_norm": 6.332782098815111, "learning_rate": 8.715163701152698e-06, "loss": 17.6062, "step": 14027 }, { "epoch": 0.25642056775184163, "grad_norm": 6.569707572976014, "learning_rate": 8.714965587419964e-06, "loss": 17.7143, "step": 14028 }, { "epoch": 0.25643884694828817, "grad_norm": 7.926658291308322, "learning_rate": 8.714767460666595e-06, "loss": 18.3433, "step": 14029 }, { "epoch": 0.2564571261447347, "grad_norm": 6.703570270573447, "learning_rate": 8.71456932089329e-06, "loss": 17.3925, "step": 14030 }, { "epoch": 0.2564754053411812, "grad_norm": 8.7198142927402, "learning_rate": 8.714371168100742e-06, "loss": 18.5173, "step": 14031 }, { "epoch": 0.2564936845376277, "grad_norm": 6.5711447764108, "learning_rate": 8.714173002289645e-06, "loss": 17.5438, "step": 14032 }, { "epoch": 0.25651196373407426, "grad_norm": 6.498513708490086, "learning_rate": 8.713974823460693e-06, "loss": 17.5866, "step": 14033 }, { "epoch": 0.2565302429305208, "grad_norm": 6.707410223345111, "learning_rate": 8.713776631614583e-06, "loss": 17.7918, "step": 14034 }, { "epoch": 0.25654852212696727, "grad_norm": 7.497156527708566, "learning_rate": 8.71357842675201e-06, "loss": 17.8623, "step": 14035 }, { "epoch": 0.2565668013234138, "grad_norm": 6.806956228926704, "learning_rate": 8.713380208873663e-06, "loss": 17.7035, "step": 14036 }, { "epoch": 0.25658508051986034, "grad_norm": 6.737806666072355, "learning_rate": 8.713181977980242e-06, "loss": 17.9184, "step": 14037 }, { "epoch": 0.2566033597163069, "grad_norm": 5.857737233930091, "learning_rate": 8.712983734072442e-06, "loss": 17.3217, "step": 14038 }, { "epoch": 0.2566216389127534, "grad_norm": 6.127108906197689, "learning_rate": 8.712785477150954e-06, "loss": 17.6265, "step": 14039 }, { "epoch": 0.2566399181091999, "grad_norm": 5.505189658724705, "learning_rate": 8.712587207216476e-06, "loss": 17.1689, "step": 14040 }, { "epoch": 0.25665819730564643, "grad_norm": 5.2633515517705955, "learning_rate": 8.712388924269701e-06, "loss": 17.2424, "step": 14041 }, { "epoch": 0.25667647650209297, "grad_norm": 7.385141693929254, "learning_rate": 8.712190628311327e-06, "loss": 17.9504, "step": 14042 }, { "epoch": 0.2566947556985395, "grad_norm": 7.28567741353326, "learning_rate": 8.711992319342047e-06, "loss": 17.8721, "step": 14043 }, { "epoch": 0.25671303489498604, "grad_norm": 7.0865725746620365, "learning_rate": 8.711793997362555e-06, "loss": 17.5467, "step": 14044 }, { "epoch": 0.2567313140914325, "grad_norm": 6.524143900895566, "learning_rate": 8.711595662373545e-06, "loss": 17.818, "step": 14045 }, { "epoch": 0.25674959328787905, "grad_norm": 7.689675182264577, "learning_rate": 8.711397314375717e-06, "loss": 17.8134, "step": 14046 }, { "epoch": 0.2567678724843256, "grad_norm": 7.314549210631258, "learning_rate": 8.711198953369763e-06, "loss": 17.7351, "step": 14047 }, { "epoch": 0.2567861516807721, "grad_norm": 7.349427702300283, "learning_rate": 8.711000579356379e-06, "loss": 17.9681, "step": 14048 }, { "epoch": 0.25680443087721866, "grad_norm": 5.522520021641442, "learning_rate": 8.710802192336258e-06, "loss": 17.2265, "step": 14049 }, { "epoch": 0.25682271007366514, "grad_norm": 6.809408078333556, "learning_rate": 8.7106037923101e-06, "loss": 17.8225, "step": 14050 }, { "epoch": 0.2568409892701117, "grad_norm": 5.658643531266042, "learning_rate": 8.710405379278597e-06, "loss": 17.1175, "step": 14051 }, { "epoch": 0.2568592684665582, "grad_norm": 7.664486081417579, "learning_rate": 8.710206953242444e-06, "loss": 18.2874, "step": 14052 }, { "epoch": 0.25687754766300475, "grad_norm": 6.86069447753021, "learning_rate": 8.710008514202336e-06, "loss": 17.63, "step": 14053 }, { "epoch": 0.2568958268594513, "grad_norm": 6.8516514476035715, "learning_rate": 8.709810062158974e-06, "loss": 17.5871, "step": 14054 }, { "epoch": 0.25691410605589776, "grad_norm": 6.834744715769432, "learning_rate": 8.709611597113048e-06, "loss": 17.7219, "step": 14055 }, { "epoch": 0.2569323852523443, "grad_norm": 6.19308693771849, "learning_rate": 8.709413119065255e-06, "loss": 17.4019, "step": 14056 }, { "epoch": 0.25695066444879083, "grad_norm": 5.851593282103616, "learning_rate": 8.709214628016292e-06, "loss": 17.3128, "step": 14057 }, { "epoch": 0.25696894364523737, "grad_norm": 6.868437862739134, "learning_rate": 8.709016123966851e-06, "loss": 17.3939, "step": 14058 }, { "epoch": 0.2569872228416839, "grad_norm": 5.822625513677289, "learning_rate": 8.708817606917633e-06, "loss": 17.2102, "step": 14059 }, { "epoch": 0.2570055020381304, "grad_norm": 7.895151638643454, "learning_rate": 8.70861907686933e-06, "loss": 18.2374, "step": 14060 }, { "epoch": 0.2570237812345769, "grad_norm": 6.178522074785444, "learning_rate": 8.70842053382264e-06, "loss": 17.3785, "step": 14061 }, { "epoch": 0.25704206043102346, "grad_norm": 6.946304226628333, "learning_rate": 8.708221977778256e-06, "loss": 17.5099, "step": 14062 }, { "epoch": 0.25706033962747, "grad_norm": 7.167277756268976, "learning_rate": 8.708023408736877e-06, "loss": 17.826, "step": 14063 }, { "epoch": 0.2570786188239165, "grad_norm": 6.693399938414784, "learning_rate": 8.707824826699199e-06, "loss": 17.3958, "step": 14064 }, { "epoch": 0.257096898020363, "grad_norm": 6.101790113077589, "learning_rate": 8.707626231665914e-06, "loss": 17.3234, "step": 14065 }, { "epoch": 0.25711517721680954, "grad_norm": 6.464939958515629, "learning_rate": 8.70742762363772e-06, "loss": 17.7037, "step": 14066 }, { "epoch": 0.2571334564132561, "grad_norm": 6.746677057749509, "learning_rate": 8.707229002615317e-06, "loss": 17.8575, "step": 14067 }, { "epoch": 0.2571517356097026, "grad_norm": 6.621445967947263, "learning_rate": 8.707030368599398e-06, "loss": 17.5681, "step": 14068 }, { "epoch": 0.2571700148061491, "grad_norm": 6.322926138021842, "learning_rate": 8.706831721590657e-06, "loss": 17.5165, "step": 14069 }, { "epoch": 0.25718829400259563, "grad_norm": 7.287228708380956, "learning_rate": 8.706633061589794e-06, "loss": 17.9319, "step": 14070 }, { "epoch": 0.25720657319904217, "grad_norm": 6.064790777506287, "learning_rate": 8.706434388597503e-06, "loss": 17.2462, "step": 14071 }, { "epoch": 0.2572248523954887, "grad_norm": 6.563810232322666, "learning_rate": 8.706235702614482e-06, "loss": 17.7225, "step": 14072 }, { "epoch": 0.25724313159193524, "grad_norm": 8.809152384498775, "learning_rate": 8.706037003641426e-06, "loss": 18.0658, "step": 14073 }, { "epoch": 0.2572614107883817, "grad_norm": 6.113233339539685, "learning_rate": 8.705838291679032e-06, "loss": 17.4984, "step": 14074 }, { "epoch": 0.25727968998482825, "grad_norm": 7.392843049875668, "learning_rate": 8.705639566727997e-06, "loss": 18.0276, "step": 14075 }, { "epoch": 0.2572979691812748, "grad_norm": 6.22602578544969, "learning_rate": 8.705440828789015e-06, "loss": 17.7045, "step": 14076 }, { "epoch": 0.2573162483777213, "grad_norm": 7.396244514051507, "learning_rate": 8.705242077862786e-06, "loss": 17.5306, "step": 14077 }, { "epoch": 0.25733452757416786, "grad_norm": 6.418514741742404, "learning_rate": 8.705043313950004e-06, "loss": 17.5961, "step": 14078 }, { "epoch": 0.25735280677061434, "grad_norm": 6.534272716960642, "learning_rate": 8.704844537051368e-06, "loss": 17.4244, "step": 14079 }, { "epoch": 0.2573710859670609, "grad_norm": 5.499315925609631, "learning_rate": 8.704645747167572e-06, "loss": 16.9318, "step": 14080 }, { "epoch": 0.2573893651635074, "grad_norm": 7.21959522451206, "learning_rate": 8.704446944299314e-06, "loss": 18.1147, "step": 14081 }, { "epoch": 0.25740764435995395, "grad_norm": 7.815593595945188, "learning_rate": 8.704248128447293e-06, "loss": 18.2904, "step": 14082 }, { "epoch": 0.2574259235564005, "grad_norm": 6.228024877521958, "learning_rate": 8.704049299612203e-06, "loss": 17.3302, "step": 14083 }, { "epoch": 0.25744420275284696, "grad_norm": 6.8981733996518235, "learning_rate": 8.70385045779474e-06, "loss": 17.8574, "step": 14084 }, { "epoch": 0.2574624819492935, "grad_norm": 7.958254587077419, "learning_rate": 8.703651602995605e-06, "loss": 18.485, "step": 14085 }, { "epoch": 0.25748076114574003, "grad_norm": 7.544062214252388, "learning_rate": 8.70345273521549e-06, "loss": 18.1102, "step": 14086 }, { "epoch": 0.25749904034218657, "grad_norm": 5.914422136425655, "learning_rate": 8.703253854455095e-06, "loss": 17.2718, "step": 14087 }, { "epoch": 0.2575173195386331, "grad_norm": 7.573257093818667, "learning_rate": 8.703054960715118e-06, "loss": 18.1964, "step": 14088 }, { "epoch": 0.2575355987350796, "grad_norm": 9.158315114811106, "learning_rate": 8.702856053996254e-06, "loss": 18.8402, "step": 14089 }, { "epoch": 0.2575538779315261, "grad_norm": 6.991139114717488, "learning_rate": 8.702657134299201e-06, "loss": 17.718, "step": 14090 }, { "epoch": 0.25757215712797266, "grad_norm": 7.629686502898165, "learning_rate": 8.702458201624657e-06, "loss": 17.9014, "step": 14091 }, { "epoch": 0.2575904363244192, "grad_norm": 5.873192405342047, "learning_rate": 8.702259255973315e-06, "loss": 17.5239, "step": 14092 }, { "epoch": 0.2576087155208657, "grad_norm": 8.36751721033347, "learning_rate": 8.70206029734588e-06, "loss": 17.4272, "step": 14093 }, { "epoch": 0.2576269947173122, "grad_norm": 6.438144667109233, "learning_rate": 8.701861325743043e-06, "loss": 17.3664, "step": 14094 }, { "epoch": 0.25764527391375874, "grad_norm": 5.985339058250906, "learning_rate": 8.701662341165502e-06, "loss": 17.5232, "step": 14095 }, { "epoch": 0.2576635531102053, "grad_norm": 7.386255272232896, "learning_rate": 8.701463343613957e-06, "loss": 17.9152, "step": 14096 }, { "epoch": 0.2576818323066518, "grad_norm": 6.83644269450758, "learning_rate": 8.701264333089104e-06, "loss": 17.8121, "step": 14097 }, { "epoch": 0.25770011150309835, "grad_norm": 6.551490461044074, "learning_rate": 8.70106530959164e-06, "loss": 17.7306, "step": 14098 }, { "epoch": 0.25771839069954483, "grad_norm": 7.642030726394805, "learning_rate": 8.700866273122264e-06, "loss": 18.1874, "step": 14099 }, { "epoch": 0.25773666989599137, "grad_norm": 6.977711470901913, "learning_rate": 8.700667223681672e-06, "loss": 17.7862, "step": 14100 }, { "epoch": 0.2577549490924379, "grad_norm": 7.207789253193888, "learning_rate": 8.700468161270563e-06, "loss": 17.9599, "step": 14101 }, { "epoch": 0.25777322828888444, "grad_norm": 6.095674515026532, "learning_rate": 8.700269085889634e-06, "loss": 17.4106, "step": 14102 }, { "epoch": 0.2577915074853309, "grad_norm": 7.4632469140407585, "learning_rate": 8.700069997539584e-06, "loss": 17.8782, "step": 14103 }, { "epoch": 0.25780978668177745, "grad_norm": 5.210845208316065, "learning_rate": 8.69987089622111e-06, "loss": 16.9404, "step": 14104 }, { "epoch": 0.257828065878224, "grad_norm": 8.190158864530341, "learning_rate": 8.699671781934907e-06, "loss": 18.2329, "step": 14105 }, { "epoch": 0.2578463450746705, "grad_norm": 8.530594742188688, "learning_rate": 8.699472654681677e-06, "loss": 18.5188, "step": 14106 }, { "epoch": 0.25786462427111706, "grad_norm": 5.757533253815192, "learning_rate": 8.699273514462116e-06, "loss": 17.2406, "step": 14107 }, { "epoch": 0.25788290346756354, "grad_norm": 7.059106984663106, "learning_rate": 8.699074361276923e-06, "loss": 17.765, "step": 14108 }, { "epoch": 0.2579011826640101, "grad_norm": 6.105087348487071, "learning_rate": 8.698875195126796e-06, "loss": 17.5095, "step": 14109 }, { "epoch": 0.2579194618604566, "grad_norm": 7.334075919128877, "learning_rate": 8.69867601601243e-06, "loss": 17.6755, "step": 14110 }, { "epoch": 0.25793774105690315, "grad_norm": 6.858709746550251, "learning_rate": 8.698476823934529e-06, "loss": 17.954, "step": 14111 }, { "epoch": 0.2579560202533497, "grad_norm": 5.552323518060006, "learning_rate": 8.698277618893784e-06, "loss": 17.2246, "step": 14112 }, { "epoch": 0.25797429944979616, "grad_norm": 7.004207031243522, "learning_rate": 8.6980784008909e-06, "loss": 17.7233, "step": 14113 }, { "epoch": 0.2579925786462427, "grad_norm": 6.8987513859118685, "learning_rate": 8.69787916992657e-06, "loss": 18.1197, "step": 14114 }, { "epoch": 0.25801085784268923, "grad_norm": 6.120743129137839, "learning_rate": 8.697679926001496e-06, "loss": 17.2854, "step": 14115 }, { "epoch": 0.25802913703913577, "grad_norm": 7.381495184158092, "learning_rate": 8.697480669116373e-06, "loss": 18.2839, "step": 14116 }, { "epoch": 0.2580474162355823, "grad_norm": 9.896775387834815, "learning_rate": 8.697281399271902e-06, "loss": 18.4107, "step": 14117 }, { "epoch": 0.2580656954320288, "grad_norm": 6.9585472563611654, "learning_rate": 8.69708211646878e-06, "loss": 17.7435, "step": 14118 }, { "epoch": 0.2580839746284753, "grad_norm": 8.22483594138805, "learning_rate": 8.696882820707708e-06, "loss": 18.5855, "step": 14119 }, { "epoch": 0.25810225382492186, "grad_norm": 7.280764723094892, "learning_rate": 8.696683511989381e-06, "loss": 17.8302, "step": 14120 }, { "epoch": 0.2581205330213684, "grad_norm": 8.321951653277008, "learning_rate": 8.696484190314498e-06, "loss": 18.6115, "step": 14121 }, { "epoch": 0.25813881221781493, "grad_norm": 6.6238999856305405, "learning_rate": 8.69628485568376e-06, "loss": 17.4643, "step": 14122 }, { "epoch": 0.2581570914142614, "grad_norm": 6.503710160939189, "learning_rate": 8.696085508097865e-06, "loss": 17.7514, "step": 14123 }, { "epoch": 0.25817537061070794, "grad_norm": 5.775511509425728, "learning_rate": 8.695886147557508e-06, "loss": 17.2477, "step": 14124 }, { "epoch": 0.2581936498071545, "grad_norm": 6.4818374067254485, "learning_rate": 8.695686774063394e-06, "loss": 17.3527, "step": 14125 }, { "epoch": 0.258211929003601, "grad_norm": 7.701902668089003, "learning_rate": 8.695487387616217e-06, "loss": 17.9372, "step": 14126 }, { "epoch": 0.25823020820004755, "grad_norm": 6.697959652546757, "learning_rate": 8.695287988216679e-06, "loss": 17.6515, "step": 14127 }, { "epoch": 0.25824848739649403, "grad_norm": 8.041435335901662, "learning_rate": 8.695088575865476e-06, "loss": 18.4181, "step": 14128 }, { "epoch": 0.25826676659294057, "grad_norm": 6.567403984090422, "learning_rate": 8.694889150563308e-06, "loss": 17.7741, "step": 14129 }, { "epoch": 0.2582850457893871, "grad_norm": 6.918198819549715, "learning_rate": 8.694689712310875e-06, "loss": 18.0391, "step": 14130 }, { "epoch": 0.25830332498583364, "grad_norm": 6.697927322725348, "learning_rate": 8.694490261108874e-06, "loss": 17.2621, "step": 14131 }, { "epoch": 0.2583216041822802, "grad_norm": 6.011527171912248, "learning_rate": 8.694290796958004e-06, "loss": 17.4086, "step": 14132 }, { "epoch": 0.25833988337872665, "grad_norm": 6.1745037575596475, "learning_rate": 8.694091319858968e-06, "loss": 17.41, "step": 14133 }, { "epoch": 0.2583581625751732, "grad_norm": 6.627972132134733, "learning_rate": 8.693891829812463e-06, "loss": 17.4986, "step": 14134 }, { "epoch": 0.2583764417716197, "grad_norm": 6.36915373828169, "learning_rate": 8.693692326819185e-06, "loss": 17.4596, "step": 14135 }, { "epoch": 0.25839472096806626, "grad_norm": 8.292436876517803, "learning_rate": 8.693492810879838e-06, "loss": 18.0977, "step": 14136 }, { "epoch": 0.25841300016451274, "grad_norm": 7.847096691742847, "learning_rate": 8.693293281995118e-06, "loss": 17.936, "step": 14137 }, { "epoch": 0.2584312793609593, "grad_norm": 6.133172170823049, "learning_rate": 8.693093740165725e-06, "loss": 17.4722, "step": 14138 }, { "epoch": 0.2584495585574058, "grad_norm": 8.876256817910992, "learning_rate": 8.69289418539236e-06, "loss": 18.5546, "step": 14139 }, { "epoch": 0.25846783775385235, "grad_norm": 6.003085546094814, "learning_rate": 8.692694617675721e-06, "loss": 17.5186, "step": 14140 }, { "epoch": 0.2584861169502989, "grad_norm": 6.202426404784945, "learning_rate": 8.692495037016509e-06, "loss": 17.2455, "step": 14141 }, { "epoch": 0.25850439614674536, "grad_norm": 8.097268470393931, "learning_rate": 8.692295443415422e-06, "loss": 17.5599, "step": 14142 }, { "epoch": 0.2585226753431919, "grad_norm": 5.829988422698184, "learning_rate": 8.692095836873159e-06, "loss": 17.1431, "step": 14143 }, { "epoch": 0.25854095453963843, "grad_norm": 6.237348241267024, "learning_rate": 8.691896217390421e-06, "loss": 17.3951, "step": 14144 }, { "epoch": 0.25855923373608497, "grad_norm": 6.28420789545548, "learning_rate": 8.691696584967905e-06, "loss": 17.5018, "step": 14145 }, { "epoch": 0.2585775129325315, "grad_norm": 6.371079497773243, "learning_rate": 8.691496939606315e-06, "loss": 17.581, "step": 14146 }, { "epoch": 0.258595792128978, "grad_norm": 5.7376703639207705, "learning_rate": 8.69129728130635e-06, "loss": 17.3224, "step": 14147 }, { "epoch": 0.2586140713254245, "grad_norm": 6.198038893127131, "learning_rate": 8.691097610068705e-06, "loss": 17.3983, "step": 14148 }, { "epoch": 0.25863235052187106, "grad_norm": 6.718181695319792, "learning_rate": 8.690897925894085e-06, "loss": 17.2986, "step": 14149 }, { "epoch": 0.2586506297183176, "grad_norm": 6.42715301445234, "learning_rate": 8.690698228783188e-06, "loss": 17.5038, "step": 14150 }, { "epoch": 0.25866890891476413, "grad_norm": 6.500218420373457, "learning_rate": 8.690498518736715e-06, "loss": 17.7886, "step": 14151 }, { "epoch": 0.2586871881112106, "grad_norm": 5.362641201212311, "learning_rate": 8.690298795755362e-06, "loss": 16.9478, "step": 14152 }, { "epoch": 0.25870546730765714, "grad_norm": 9.544536326229629, "learning_rate": 8.690099059839834e-06, "loss": 18.2867, "step": 14153 }, { "epoch": 0.2587237465041037, "grad_norm": 7.370924052965017, "learning_rate": 8.689899310990828e-06, "loss": 18.1317, "step": 14154 }, { "epoch": 0.2587420257005502, "grad_norm": 8.413257304783276, "learning_rate": 8.689699549209046e-06, "loss": 18.2081, "step": 14155 }, { "epoch": 0.25876030489699675, "grad_norm": 5.693286958941834, "learning_rate": 8.689499774495186e-06, "loss": 17.1675, "step": 14156 }, { "epoch": 0.25877858409344323, "grad_norm": 6.486964766282015, "learning_rate": 8.689299986849952e-06, "loss": 17.5089, "step": 14157 }, { "epoch": 0.25879686328988977, "grad_norm": 7.348963029950461, "learning_rate": 8.68910018627404e-06, "loss": 18.0337, "step": 14158 }, { "epoch": 0.2588151424863363, "grad_norm": 5.7470422693526, "learning_rate": 8.688900372768152e-06, "loss": 17.1518, "step": 14159 }, { "epoch": 0.25883342168278284, "grad_norm": 7.724256889793154, "learning_rate": 8.688700546332989e-06, "loss": 18.0078, "step": 14160 }, { "epoch": 0.2588517008792294, "grad_norm": 7.23284061844154, "learning_rate": 8.68850070696925e-06, "loss": 18.124, "step": 14161 }, { "epoch": 0.25886998007567585, "grad_norm": 6.394090609045597, "learning_rate": 8.688300854677636e-06, "loss": 17.4614, "step": 14162 }, { "epoch": 0.2588882592721224, "grad_norm": 6.993271961062448, "learning_rate": 8.688100989458848e-06, "loss": 17.6922, "step": 14163 }, { "epoch": 0.2589065384685689, "grad_norm": 6.712416485913673, "learning_rate": 8.687901111313587e-06, "loss": 17.6312, "step": 14164 }, { "epoch": 0.25892481766501546, "grad_norm": 7.060217217080197, "learning_rate": 8.68770122024255e-06, "loss": 17.6092, "step": 14165 }, { "epoch": 0.258943096861462, "grad_norm": 6.572287768613955, "learning_rate": 8.687501316246441e-06, "loss": 17.6795, "step": 14166 }, { "epoch": 0.2589613760579085, "grad_norm": 6.32235361034704, "learning_rate": 8.68730139932596e-06, "loss": 17.4758, "step": 14167 }, { "epoch": 0.258979655254355, "grad_norm": 6.795511266468757, "learning_rate": 8.687101469481809e-06, "loss": 17.9259, "step": 14168 }, { "epoch": 0.25899793445080155, "grad_norm": 6.103485003033015, "learning_rate": 8.686901526714686e-06, "loss": 17.1723, "step": 14169 }, { "epoch": 0.2590162136472481, "grad_norm": 8.167025485819599, "learning_rate": 8.686701571025293e-06, "loss": 18.2686, "step": 14170 }, { "epoch": 0.25903449284369456, "grad_norm": 8.928956404945458, "learning_rate": 8.686501602414332e-06, "loss": 18.4698, "step": 14171 }, { "epoch": 0.2590527720401411, "grad_norm": 6.538180948003185, "learning_rate": 8.686301620882502e-06, "loss": 17.5651, "step": 14172 }, { "epoch": 0.25907105123658764, "grad_norm": 8.051077866943555, "learning_rate": 8.686101626430505e-06, "loss": 18.3328, "step": 14173 }, { "epoch": 0.25908933043303417, "grad_norm": 7.46434567692638, "learning_rate": 8.685901619059041e-06, "loss": 17.6777, "step": 14174 }, { "epoch": 0.2591076096294807, "grad_norm": 6.7844431445493365, "learning_rate": 8.685701598768813e-06, "loss": 18.0084, "step": 14175 }, { "epoch": 0.2591258888259272, "grad_norm": 7.663998584431132, "learning_rate": 8.685501565560519e-06, "loss": 18.1988, "step": 14176 }, { "epoch": 0.2591441680223737, "grad_norm": 5.93982789782059, "learning_rate": 8.685301519434863e-06, "loss": 17.4027, "step": 14177 }, { "epoch": 0.25916244721882026, "grad_norm": 6.029004675891996, "learning_rate": 8.685101460392545e-06, "loss": 17.2887, "step": 14178 }, { "epoch": 0.2591807264152668, "grad_norm": 9.013383911709616, "learning_rate": 8.684901388434266e-06, "loss": 17.7601, "step": 14179 }, { "epoch": 0.25919900561171333, "grad_norm": 7.1253852801615984, "learning_rate": 8.684701303560728e-06, "loss": 17.8046, "step": 14180 }, { "epoch": 0.2592172848081598, "grad_norm": 6.100035498801834, "learning_rate": 8.68450120577263e-06, "loss": 17.3895, "step": 14181 }, { "epoch": 0.25923556400460634, "grad_norm": 5.189228718505733, "learning_rate": 8.684301095070677e-06, "loss": 17.1329, "step": 14182 }, { "epoch": 0.2592538432010529, "grad_norm": 6.665781143457299, "learning_rate": 8.684100971455567e-06, "loss": 17.7856, "step": 14183 }, { "epoch": 0.2592721223974994, "grad_norm": 7.47350467978439, "learning_rate": 8.683900834928003e-06, "loss": 18.08, "step": 14184 }, { "epoch": 0.25929040159394595, "grad_norm": 7.525122210015121, "learning_rate": 8.683700685488687e-06, "loss": 18.0142, "step": 14185 }, { "epoch": 0.25930868079039243, "grad_norm": 6.440948791857707, "learning_rate": 8.683500523138318e-06, "loss": 17.8687, "step": 14186 }, { "epoch": 0.25932695998683897, "grad_norm": 6.380271622063288, "learning_rate": 8.6833003478776e-06, "loss": 17.5271, "step": 14187 }, { "epoch": 0.2593452391832855, "grad_norm": 6.1840804899448765, "learning_rate": 8.683100159707235e-06, "loss": 17.2603, "step": 14188 }, { "epoch": 0.25936351837973204, "grad_norm": 7.107260335076593, "learning_rate": 8.682899958627922e-06, "loss": 18.0198, "step": 14189 }, { "epoch": 0.2593817975761786, "grad_norm": 7.361694465003641, "learning_rate": 8.682699744640365e-06, "loss": 17.6505, "step": 14190 }, { "epoch": 0.25940007677262505, "grad_norm": 9.544649755749758, "learning_rate": 8.682499517745266e-06, "loss": 18.7415, "step": 14191 }, { "epoch": 0.2594183559690716, "grad_norm": 7.23844238396064, "learning_rate": 8.682299277943325e-06, "loss": 17.8093, "step": 14192 }, { "epoch": 0.2594366351655181, "grad_norm": 7.331660525820289, "learning_rate": 8.682099025235244e-06, "loss": 17.6294, "step": 14193 }, { "epoch": 0.25945491436196466, "grad_norm": 6.918174831241119, "learning_rate": 8.681898759621726e-06, "loss": 17.565, "step": 14194 }, { "epoch": 0.2594731935584112, "grad_norm": 7.533312081848028, "learning_rate": 8.681698481103472e-06, "loss": 17.773, "step": 14195 }, { "epoch": 0.2594914727548577, "grad_norm": 6.6700859081693205, "learning_rate": 8.681498189681185e-06, "loss": 17.4458, "step": 14196 }, { "epoch": 0.2595097519513042, "grad_norm": 6.484848541244922, "learning_rate": 8.681297885355567e-06, "loss": 17.4729, "step": 14197 }, { "epoch": 0.25952803114775075, "grad_norm": 9.179260736816243, "learning_rate": 8.681097568127316e-06, "loss": 18.4558, "step": 14198 }, { "epoch": 0.2595463103441973, "grad_norm": 6.965157906115924, "learning_rate": 8.680897237997141e-06, "loss": 17.6901, "step": 14199 }, { "epoch": 0.2595645895406438, "grad_norm": 7.252637507695194, "learning_rate": 8.68069689496574e-06, "loss": 18.0804, "step": 14200 }, { "epoch": 0.2595828687370903, "grad_norm": 8.236373944975917, "learning_rate": 8.680496539033814e-06, "loss": 17.9463, "step": 14201 }, { "epoch": 0.25960114793353684, "grad_norm": 7.359481017944861, "learning_rate": 8.680296170202066e-06, "loss": 17.9905, "step": 14202 }, { "epoch": 0.25961942712998337, "grad_norm": 6.535176048230325, "learning_rate": 8.680095788471202e-06, "loss": 17.5322, "step": 14203 }, { "epoch": 0.2596377063264299, "grad_norm": 6.628991932210777, "learning_rate": 8.679895393841922e-06, "loss": 17.411, "step": 14204 }, { "epoch": 0.2596559855228764, "grad_norm": 6.523055493984605, "learning_rate": 8.679694986314926e-06, "loss": 17.4709, "step": 14205 }, { "epoch": 0.2596742647193229, "grad_norm": 7.371541668892755, "learning_rate": 8.67949456589092e-06, "loss": 17.7989, "step": 14206 }, { "epoch": 0.25969254391576946, "grad_norm": 6.085751981455777, "learning_rate": 8.679294132570604e-06, "loss": 17.2445, "step": 14207 }, { "epoch": 0.259710823112216, "grad_norm": 5.324822737500185, "learning_rate": 8.67909368635468e-06, "loss": 16.9973, "step": 14208 }, { "epoch": 0.25972910230866253, "grad_norm": 6.077237755113356, "learning_rate": 8.678893227243854e-06, "loss": 17.3057, "step": 14209 }, { "epoch": 0.259747381505109, "grad_norm": 6.874065151832448, "learning_rate": 8.678692755238827e-06, "loss": 17.8281, "step": 14210 }, { "epoch": 0.25976566070155555, "grad_norm": 6.495768176593526, "learning_rate": 8.678492270340299e-06, "loss": 17.7346, "step": 14211 }, { "epoch": 0.2597839398980021, "grad_norm": 7.387890247946272, "learning_rate": 8.678291772548975e-06, "loss": 17.9833, "step": 14212 }, { "epoch": 0.2598022190944486, "grad_norm": 6.996674065804481, "learning_rate": 8.67809126186556e-06, "loss": 17.7884, "step": 14213 }, { "epoch": 0.25982049829089515, "grad_norm": 6.281673670852194, "learning_rate": 8.67789073829075e-06, "loss": 17.3121, "step": 14214 }, { "epoch": 0.25983877748734163, "grad_norm": 7.452836805760374, "learning_rate": 8.677690201825255e-06, "loss": 17.789, "step": 14215 }, { "epoch": 0.25985705668378817, "grad_norm": 5.957035006205307, "learning_rate": 8.677489652469775e-06, "loss": 17.4055, "step": 14216 }, { "epoch": 0.2598753358802347, "grad_norm": 6.3458021800711615, "learning_rate": 8.677289090225013e-06, "loss": 17.3333, "step": 14217 }, { "epoch": 0.25989361507668124, "grad_norm": 6.118127226736724, "learning_rate": 8.677088515091671e-06, "loss": 17.3757, "step": 14218 }, { "epoch": 0.2599118942731278, "grad_norm": 6.751555649206739, "learning_rate": 8.676887927070453e-06, "loss": 17.5914, "step": 14219 }, { "epoch": 0.25993017346957425, "grad_norm": 6.897398110608393, "learning_rate": 8.676687326162063e-06, "loss": 17.4974, "step": 14220 }, { "epoch": 0.2599484526660208, "grad_norm": 5.837874375748976, "learning_rate": 8.676486712367201e-06, "loss": 17.2377, "step": 14221 }, { "epoch": 0.2599667318624673, "grad_norm": 7.752103161333534, "learning_rate": 8.676286085686573e-06, "loss": 18.2379, "step": 14222 }, { "epoch": 0.25998501105891386, "grad_norm": 5.741388568907354, "learning_rate": 8.676085446120882e-06, "loss": 17.176, "step": 14223 }, { "epoch": 0.2600032902553604, "grad_norm": 7.389644836119392, "learning_rate": 8.67588479367083e-06, "loss": 17.7166, "step": 14224 }, { "epoch": 0.2600215694518069, "grad_norm": 6.8645852808263825, "learning_rate": 8.675684128337121e-06, "loss": 17.7229, "step": 14225 }, { "epoch": 0.2600398486482534, "grad_norm": 6.655345409236313, "learning_rate": 8.675483450120456e-06, "loss": 17.732, "step": 14226 }, { "epoch": 0.26005812784469995, "grad_norm": 6.494098822622264, "learning_rate": 8.675282759021544e-06, "loss": 17.2915, "step": 14227 }, { "epoch": 0.2600764070411465, "grad_norm": 6.522231857490047, "learning_rate": 8.675082055041082e-06, "loss": 17.5535, "step": 14228 }, { "epoch": 0.260094686237593, "grad_norm": 6.703443328300827, "learning_rate": 8.674881338179778e-06, "loss": 17.7362, "step": 14229 }, { "epoch": 0.2601129654340395, "grad_norm": 7.665242439256601, "learning_rate": 8.674680608438332e-06, "loss": 17.1427, "step": 14230 }, { "epoch": 0.26013124463048604, "grad_norm": 5.979924565427676, "learning_rate": 8.674479865817452e-06, "loss": 17.3177, "step": 14231 }, { "epoch": 0.26014952382693257, "grad_norm": 6.298123909968524, "learning_rate": 8.674279110317837e-06, "loss": 17.3941, "step": 14232 }, { "epoch": 0.2601678030233791, "grad_norm": 6.134596837049285, "learning_rate": 8.674078341940193e-06, "loss": 17.3473, "step": 14233 }, { "epoch": 0.26018608221982564, "grad_norm": 6.690030188988819, "learning_rate": 8.673877560685225e-06, "loss": 17.6278, "step": 14234 }, { "epoch": 0.2602043614162721, "grad_norm": 7.04849350575925, "learning_rate": 8.673676766553632e-06, "loss": 17.5964, "step": 14235 }, { "epoch": 0.26022264061271866, "grad_norm": 5.807512367297847, "learning_rate": 8.673475959546122e-06, "loss": 17.1041, "step": 14236 }, { "epoch": 0.2602409198091652, "grad_norm": 7.804949548570082, "learning_rate": 8.673275139663397e-06, "loss": 18.0493, "step": 14237 }, { "epoch": 0.26025919900561173, "grad_norm": 5.631732771120599, "learning_rate": 8.673074306906162e-06, "loss": 17.1282, "step": 14238 }, { "epoch": 0.2602774782020582, "grad_norm": 6.623288987579542, "learning_rate": 8.67287346127512e-06, "loss": 17.4896, "step": 14239 }, { "epoch": 0.26029575739850475, "grad_norm": 7.451615977421559, "learning_rate": 8.672672602770976e-06, "loss": 17.6082, "step": 14240 }, { "epoch": 0.2603140365949513, "grad_norm": 6.66514506670663, "learning_rate": 8.672471731394432e-06, "loss": 17.5331, "step": 14241 }, { "epoch": 0.2603323157913978, "grad_norm": 6.899726283753662, "learning_rate": 8.672270847146193e-06, "loss": 17.6937, "step": 14242 }, { "epoch": 0.26035059498784435, "grad_norm": 7.7005808922718115, "learning_rate": 8.672069950026964e-06, "loss": 18.1453, "step": 14243 }, { "epoch": 0.26036887418429083, "grad_norm": 6.37171306374178, "learning_rate": 8.67186904003745e-06, "loss": 17.5089, "step": 14244 }, { "epoch": 0.26038715338073737, "grad_norm": 7.908325408627519, "learning_rate": 8.671668117178351e-06, "loss": 18.2844, "step": 14245 }, { "epoch": 0.2604054325771839, "grad_norm": 5.920169292135648, "learning_rate": 8.671467181450376e-06, "loss": 17.1884, "step": 14246 }, { "epoch": 0.26042371177363044, "grad_norm": 6.898063108802871, "learning_rate": 8.671266232854227e-06, "loss": 17.935, "step": 14247 }, { "epoch": 0.260441990970077, "grad_norm": 6.529362513605127, "learning_rate": 8.671065271390606e-06, "loss": 17.6221, "step": 14248 }, { "epoch": 0.26046027016652346, "grad_norm": 7.228320070578015, "learning_rate": 8.670864297060223e-06, "loss": 18.1665, "step": 14249 }, { "epoch": 0.26047854936297, "grad_norm": 5.948830610352112, "learning_rate": 8.670663309863778e-06, "loss": 17.3457, "step": 14250 }, { "epoch": 0.2604968285594165, "grad_norm": 6.602771669230802, "learning_rate": 8.670462309801976e-06, "loss": 17.6515, "step": 14251 }, { "epoch": 0.26051510775586306, "grad_norm": 7.077321993934937, "learning_rate": 8.670261296875521e-06, "loss": 17.8299, "step": 14252 }, { "epoch": 0.2605333869523096, "grad_norm": 4.706703887188802, "learning_rate": 8.670060271085122e-06, "loss": 16.854, "step": 14253 }, { "epoch": 0.2605516661487561, "grad_norm": 6.60047464851554, "learning_rate": 8.669859232431478e-06, "loss": 17.4978, "step": 14254 }, { "epoch": 0.2605699453452026, "grad_norm": 5.9323500391190285, "learning_rate": 8.669658180915295e-06, "loss": 17.3478, "step": 14255 }, { "epoch": 0.26058822454164915, "grad_norm": 6.901721871946896, "learning_rate": 8.669457116537281e-06, "loss": 17.6855, "step": 14256 }, { "epoch": 0.2606065037380957, "grad_norm": 6.578632990964375, "learning_rate": 8.669256039298136e-06, "loss": 17.3722, "step": 14257 }, { "epoch": 0.2606247829345422, "grad_norm": 5.39057658087867, "learning_rate": 8.669054949198567e-06, "loss": 16.9551, "step": 14258 }, { "epoch": 0.2606430621309887, "grad_norm": 6.211108870611746, "learning_rate": 8.66885384623928e-06, "loss": 17.3891, "step": 14259 }, { "epoch": 0.26066134132743524, "grad_norm": 6.384579728924579, "learning_rate": 8.668652730420979e-06, "loss": 17.6692, "step": 14260 }, { "epoch": 0.26067962052388177, "grad_norm": 5.313832268782017, "learning_rate": 8.668451601744367e-06, "loss": 16.9212, "step": 14261 }, { "epoch": 0.2606978997203283, "grad_norm": 6.327507524674901, "learning_rate": 8.668250460210152e-06, "loss": 17.2745, "step": 14262 }, { "epoch": 0.26071617891677484, "grad_norm": 7.276705564390637, "learning_rate": 8.668049305819038e-06, "loss": 17.9003, "step": 14263 }, { "epoch": 0.2607344581132213, "grad_norm": 6.954134425200045, "learning_rate": 8.667848138571727e-06, "loss": 17.9699, "step": 14264 }, { "epoch": 0.26075273730966786, "grad_norm": 6.360331324137992, "learning_rate": 8.667646958468928e-06, "loss": 17.4674, "step": 14265 }, { "epoch": 0.2607710165061144, "grad_norm": 6.5458612551608555, "learning_rate": 8.667445765511345e-06, "loss": 17.5799, "step": 14266 }, { "epoch": 0.26078929570256093, "grad_norm": 6.615452750596066, "learning_rate": 8.66724455969968e-06, "loss": 17.5238, "step": 14267 }, { "epoch": 0.26080757489900747, "grad_norm": 6.133766441116057, "learning_rate": 8.667043341034646e-06, "loss": 17.353, "step": 14268 }, { "epoch": 0.26082585409545395, "grad_norm": 6.822704766326976, "learning_rate": 8.66684210951694e-06, "loss": 17.8244, "step": 14269 }, { "epoch": 0.2608441332919005, "grad_norm": 6.510204609711557, "learning_rate": 8.66664086514727e-06, "loss": 17.4648, "step": 14270 }, { "epoch": 0.260862412488347, "grad_norm": 7.596246445747984, "learning_rate": 8.666439607926345e-06, "loss": 17.7896, "step": 14271 }, { "epoch": 0.26088069168479355, "grad_norm": 5.235495628093616, "learning_rate": 8.666238337854864e-06, "loss": 16.7636, "step": 14272 }, { "epoch": 0.26089897088124003, "grad_norm": 7.000231983092996, "learning_rate": 8.666037054933538e-06, "loss": 17.706, "step": 14273 }, { "epoch": 0.26091725007768657, "grad_norm": 6.577012523913233, "learning_rate": 8.66583575916307e-06, "loss": 17.6584, "step": 14274 }, { "epoch": 0.2609355292741331, "grad_norm": 6.546348910698961, "learning_rate": 8.665634450544165e-06, "loss": 17.2979, "step": 14275 }, { "epoch": 0.26095380847057964, "grad_norm": 6.821977935311509, "learning_rate": 8.66543312907753e-06, "loss": 17.4793, "step": 14276 }, { "epoch": 0.2609720876670262, "grad_norm": 7.523576969932582, "learning_rate": 8.665231794763868e-06, "loss": 18.2738, "step": 14277 }, { "epoch": 0.26099036686347266, "grad_norm": 5.151727795926751, "learning_rate": 8.66503044760389e-06, "loss": 16.9207, "step": 14278 }, { "epoch": 0.2610086460599192, "grad_norm": 6.743005251847526, "learning_rate": 8.664829087598297e-06, "loss": 17.8879, "step": 14279 }, { "epoch": 0.2610269252563657, "grad_norm": 7.27416254634992, "learning_rate": 8.664627714747796e-06, "loss": 17.6607, "step": 14280 }, { "epoch": 0.26104520445281226, "grad_norm": 5.7338638288078725, "learning_rate": 8.66442632905309e-06, "loss": 17.0997, "step": 14281 }, { "epoch": 0.2610634836492588, "grad_norm": 6.238847600389716, "learning_rate": 8.664224930514891e-06, "loss": 17.543, "step": 14282 }, { "epoch": 0.2610817628457053, "grad_norm": 6.262960496814813, "learning_rate": 8.664023519133901e-06, "loss": 17.1668, "step": 14283 }, { "epoch": 0.2611000420421518, "grad_norm": 6.312394038715761, "learning_rate": 8.663822094910826e-06, "loss": 17.2952, "step": 14284 }, { "epoch": 0.26111832123859835, "grad_norm": 7.314451336858432, "learning_rate": 8.663620657846372e-06, "loss": 17.9633, "step": 14285 }, { "epoch": 0.2611366004350449, "grad_norm": 7.068644350015441, "learning_rate": 8.663419207941248e-06, "loss": 17.6935, "step": 14286 }, { "epoch": 0.2611548796314914, "grad_norm": 6.731138266776832, "learning_rate": 8.663217745196155e-06, "loss": 17.6665, "step": 14287 }, { "epoch": 0.2611731588279379, "grad_norm": 7.2949333980703726, "learning_rate": 8.663016269611802e-06, "loss": 17.6784, "step": 14288 }, { "epoch": 0.26119143802438444, "grad_norm": 6.200160114403457, "learning_rate": 8.662814781188896e-06, "loss": 17.2703, "step": 14289 }, { "epoch": 0.261209717220831, "grad_norm": 7.511082518027258, "learning_rate": 8.662613279928141e-06, "loss": 17.5261, "step": 14290 }, { "epoch": 0.2612279964172775, "grad_norm": 5.669264255267169, "learning_rate": 8.662411765830245e-06, "loss": 17.1438, "step": 14291 }, { "epoch": 0.26124627561372404, "grad_norm": 5.7018480564985365, "learning_rate": 8.662210238895914e-06, "loss": 17.0191, "step": 14292 }, { "epoch": 0.2612645548101705, "grad_norm": 7.153404570753518, "learning_rate": 8.662008699125853e-06, "loss": 17.5494, "step": 14293 }, { "epoch": 0.26128283400661706, "grad_norm": 5.644824221333599, "learning_rate": 8.661807146520769e-06, "loss": 17.2817, "step": 14294 }, { "epoch": 0.2613011132030636, "grad_norm": 6.488183206471269, "learning_rate": 8.661605581081369e-06, "loss": 17.728, "step": 14295 }, { "epoch": 0.26131939239951013, "grad_norm": 6.725374470937904, "learning_rate": 8.66140400280836e-06, "loss": 17.5082, "step": 14296 }, { "epoch": 0.26133767159595667, "grad_norm": 6.779234937895279, "learning_rate": 8.661202411702448e-06, "loss": 17.4111, "step": 14297 }, { "epoch": 0.26135595079240315, "grad_norm": 7.621404763034471, "learning_rate": 8.661000807764338e-06, "loss": 17.9106, "step": 14298 }, { "epoch": 0.2613742299888497, "grad_norm": 6.2451423751081405, "learning_rate": 8.660799190994738e-06, "loss": 17.3519, "step": 14299 }, { "epoch": 0.2613925091852962, "grad_norm": 6.7033198164479, "learning_rate": 8.660597561394354e-06, "loss": 17.8168, "step": 14300 }, { "epoch": 0.26141078838174275, "grad_norm": 5.580826480366983, "learning_rate": 8.660395918963894e-06, "loss": 17.1418, "step": 14301 }, { "epoch": 0.2614290675781893, "grad_norm": 8.43652772775704, "learning_rate": 8.660194263704063e-06, "loss": 18.2843, "step": 14302 }, { "epoch": 0.26144734677463577, "grad_norm": 8.126025296361352, "learning_rate": 8.659992595615569e-06, "loss": 18.0668, "step": 14303 }, { "epoch": 0.2614656259710823, "grad_norm": 9.150277131661936, "learning_rate": 8.65979091469912e-06, "loss": 18.4163, "step": 14304 }, { "epoch": 0.26148390516752884, "grad_norm": 9.556173073928285, "learning_rate": 8.65958922095542e-06, "loss": 18.7502, "step": 14305 }, { "epoch": 0.2615021843639754, "grad_norm": 6.128587431759308, "learning_rate": 8.659387514385179e-06, "loss": 17.253, "step": 14306 }, { "epoch": 0.26152046356042186, "grad_norm": 6.943525255298647, "learning_rate": 8.6591857949891e-06, "loss": 17.7416, "step": 14307 }, { "epoch": 0.2615387427568684, "grad_norm": 6.444330711899049, "learning_rate": 8.658984062767893e-06, "loss": 17.4775, "step": 14308 }, { "epoch": 0.2615570219533149, "grad_norm": 8.10083039055212, "learning_rate": 8.658782317722265e-06, "loss": 18.2508, "step": 14309 }, { "epoch": 0.26157530114976146, "grad_norm": 6.220214814321148, "learning_rate": 8.658580559852922e-06, "loss": 17.3301, "step": 14310 }, { "epoch": 0.261593580346208, "grad_norm": 7.169790426415843, "learning_rate": 8.658378789160573e-06, "loss": 17.8293, "step": 14311 }, { "epoch": 0.2616118595426545, "grad_norm": 6.650129065417017, "learning_rate": 8.658177005645922e-06, "loss": 17.2107, "step": 14312 }, { "epoch": 0.261630138739101, "grad_norm": 5.8399668845782955, "learning_rate": 8.657975209309679e-06, "loss": 17.4181, "step": 14313 }, { "epoch": 0.26164841793554755, "grad_norm": 6.8481940107929455, "learning_rate": 8.657773400152549e-06, "loss": 17.5672, "step": 14314 }, { "epoch": 0.2616666971319941, "grad_norm": 6.905469070345408, "learning_rate": 8.657571578175243e-06, "loss": 18.1343, "step": 14315 }, { "epoch": 0.2616849763284406, "grad_norm": 6.860346363177648, "learning_rate": 8.657369743378464e-06, "loss": 17.866, "step": 14316 }, { "epoch": 0.2617032555248871, "grad_norm": 6.535592953025446, "learning_rate": 8.657167895762923e-06, "loss": 17.4644, "step": 14317 }, { "epoch": 0.26172153472133364, "grad_norm": 7.669681712929007, "learning_rate": 8.656966035329325e-06, "loss": 17.6827, "step": 14318 }, { "epoch": 0.2617398139177802, "grad_norm": 5.8820288919884876, "learning_rate": 8.656764162078377e-06, "loss": 17.1236, "step": 14319 }, { "epoch": 0.2617580931142267, "grad_norm": 6.043916132868095, "learning_rate": 8.65656227601079e-06, "loss": 17.5664, "step": 14320 }, { "epoch": 0.26177637231067324, "grad_norm": 7.263720419922502, "learning_rate": 8.656360377127269e-06, "loss": 18.2175, "step": 14321 }, { "epoch": 0.2617946515071197, "grad_norm": 8.64963241796326, "learning_rate": 8.656158465428523e-06, "loss": 17.9681, "step": 14322 }, { "epoch": 0.26181293070356626, "grad_norm": 5.8059009438770515, "learning_rate": 8.655956540915256e-06, "loss": 17.4548, "step": 14323 }, { "epoch": 0.2618312099000128, "grad_norm": 5.79451611257415, "learning_rate": 8.65575460358818e-06, "loss": 17.3694, "step": 14324 }, { "epoch": 0.26184948909645933, "grad_norm": 6.402302482213285, "learning_rate": 8.655552653448003e-06, "loss": 17.8392, "step": 14325 }, { "epoch": 0.26186776829290587, "grad_norm": 8.61783076396563, "learning_rate": 8.65535069049543e-06, "loss": 18.7223, "step": 14326 }, { "epoch": 0.26188604748935235, "grad_norm": 8.587924319046229, "learning_rate": 8.65514871473117e-06, "loss": 18.417, "step": 14327 }, { "epoch": 0.2619043266857989, "grad_norm": 5.952877435089593, "learning_rate": 8.654946726155931e-06, "loss": 17.1031, "step": 14328 }, { "epoch": 0.2619226058822454, "grad_norm": 6.014144087383838, "learning_rate": 8.65474472477042e-06, "loss": 17.2704, "step": 14329 }, { "epoch": 0.26194088507869195, "grad_norm": 5.634620473152719, "learning_rate": 8.654542710575348e-06, "loss": 17.1087, "step": 14330 }, { "epoch": 0.2619591642751385, "grad_norm": 7.144477716391988, "learning_rate": 8.654340683571418e-06, "loss": 17.6253, "step": 14331 }, { "epoch": 0.26197744347158497, "grad_norm": 6.951082058232852, "learning_rate": 8.654138643759344e-06, "loss": 17.5187, "step": 14332 }, { "epoch": 0.2619957226680315, "grad_norm": 6.539490304534041, "learning_rate": 8.653936591139829e-06, "loss": 17.753, "step": 14333 }, { "epoch": 0.26201400186447804, "grad_norm": 7.091311866726209, "learning_rate": 8.653734525713583e-06, "loss": 17.6396, "step": 14334 }, { "epoch": 0.2620322810609246, "grad_norm": 7.244588545356523, "learning_rate": 8.653532447481316e-06, "loss": 18.1019, "step": 14335 }, { "epoch": 0.2620505602573711, "grad_norm": 6.7605898758674625, "learning_rate": 8.653330356443735e-06, "loss": 18.0615, "step": 14336 }, { "epoch": 0.2620688394538176, "grad_norm": 6.235687289679575, "learning_rate": 8.653128252601547e-06, "loss": 17.3753, "step": 14337 }, { "epoch": 0.26208711865026413, "grad_norm": 6.913880155405421, "learning_rate": 8.652926135955464e-06, "loss": 17.8539, "step": 14338 }, { "epoch": 0.26210539784671066, "grad_norm": 8.72100510318729, "learning_rate": 8.652724006506189e-06, "loss": 18.2457, "step": 14339 }, { "epoch": 0.2621236770431572, "grad_norm": 7.547763899914024, "learning_rate": 8.652521864254435e-06, "loss": 18.258, "step": 14340 }, { "epoch": 0.2621419562396037, "grad_norm": 8.664727161749722, "learning_rate": 8.652319709200907e-06, "loss": 17.8626, "step": 14341 }, { "epoch": 0.2621602354360502, "grad_norm": 6.820000589447856, "learning_rate": 8.652117541346317e-06, "loss": 17.6796, "step": 14342 }, { "epoch": 0.26217851463249675, "grad_norm": 7.484719337981444, "learning_rate": 8.651915360691373e-06, "loss": 17.9818, "step": 14343 }, { "epoch": 0.2621967938289433, "grad_norm": 6.702311338215585, "learning_rate": 8.65171316723678e-06, "loss": 17.9814, "step": 14344 }, { "epoch": 0.2622150730253898, "grad_norm": 6.274173578402506, "learning_rate": 8.651510960983249e-06, "loss": 17.5691, "step": 14345 }, { "epoch": 0.2622333522218363, "grad_norm": 5.154677440741493, "learning_rate": 8.65130874193149e-06, "loss": 17.0477, "step": 14346 }, { "epoch": 0.26225163141828284, "grad_norm": 6.143556865771956, "learning_rate": 8.651106510082211e-06, "loss": 17.2127, "step": 14347 }, { "epoch": 0.2622699106147294, "grad_norm": 6.597435458258558, "learning_rate": 8.65090426543612e-06, "loss": 17.7153, "step": 14348 }, { "epoch": 0.2622881898111759, "grad_norm": 7.095482561854488, "learning_rate": 8.650702007993928e-06, "loss": 17.5848, "step": 14349 }, { "epoch": 0.26230646900762244, "grad_norm": 7.919294559570361, "learning_rate": 8.65049973775634e-06, "loss": 18.287, "step": 14350 }, { "epoch": 0.2623247482040689, "grad_norm": 6.989630058163078, "learning_rate": 8.650297454724069e-06, "loss": 17.9026, "step": 14351 }, { "epoch": 0.26234302740051546, "grad_norm": 7.142012838853306, "learning_rate": 8.65009515889782e-06, "loss": 17.6811, "step": 14352 }, { "epoch": 0.262361306596962, "grad_norm": 7.663580209790349, "learning_rate": 8.649892850278305e-06, "loss": 17.713, "step": 14353 }, { "epoch": 0.26237958579340853, "grad_norm": 6.804175445291541, "learning_rate": 8.649690528866234e-06, "loss": 17.546, "step": 14354 }, { "epoch": 0.26239786498985507, "grad_norm": 5.806853932241845, "learning_rate": 8.649488194662313e-06, "loss": 17.1717, "step": 14355 }, { "epoch": 0.26241614418630155, "grad_norm": 7.266232184784769, "learning_rate": 8.649285847667252e-06, "loss": 17.4944, "step": 14356 }, { "epoch": 0.2624344233827481, "grad_norm": 6.913518241050929, "learning_rate": 8.649083487881762e-06, "loss": 17.7306, "step": 14357 }, { "epoch": 0.2624527025791946, "grad_norm": 7.103733007219354, "learning_rate": 8.64888111530655e-06, "loss": 17.883, "step": 14358 }, { "epoch": 0.26247098177564115, "grad_norm": 7.970926037303044, "learning_rate": 8.648678729942329e-06, "loss": 18.3431, "step": 14359 }, { "epoch": 0.2624892609720877, "grad_norm": 6.871957535751437, "learning_rate": 8.648476331789802e-06, "loss": 17.7505, "step": 14360 }, { "epoch": 0.26250754016853417, "grad_norm": 6.039876635119999, "learning_rate": 8.648273920849684e-06, "loss": 17.4392, "step": 14361 }, { "epoch": 0.2625258193649807, "grad_norm": 7.349633870407833, "learning_rate": 8.648071497122681e-06, "loss": 17.7897, "step": 14362 }, { "epoch": 0.26254409856142724, "grad_norm": 6.617894636394728, "learning_rate": 8.647869060609506e-06, "loss": 17.5581, "step": 14363 }, { "epoch": 0.2625623777578738, "grad_norm": 7.306333841700882, "learning_rate": 8.647666611310865e-06, "loss": 17.6994, "step": 14364 }, { "epoch": 0.2625806569543203, "grad_norm": 7.493017193538997, "learning_rate": 8.647464149227469e-06, "loss": 17.8527, "step": 14365 }, { "epoch": 0.2625989361507668, "grad_norm": 6.956499612622345, "learning_rate": 8.647261674360029e-06, "loss": 17.6242, "step": 14366 }, { "epoch": 0.26261721534721333, "grad_norm": 9.324868903766106, "learning_rate": 8.647059186709252e-06, "loss": 17.9245, "step": 14367 }, { "epoch": 0.26263549454365986, "grad_norm": 6.0025171735272185, "learning_rate": 8.646856686275851e-06, "loss": 17.3891, "step": 14368 }, { "epoch": 0.2626537737401064, "grad_norm": 7.528251366992634, "learning_rate": 8.646654173060531e-06, "loss": 18.0916, "step": 14369 }, { "epoch": 0.26267205293655294, "grad_norm": 7.942889659972795, "learning_rate": 8.646451647064007e-06, "loss": 18.0743, "step": 14370 }, { "epoch": 0.2626903321329994, "grad_norm": 6.4762044959895695, "learning_rate": 8.646249108286985e-06, "loss": 17.4625, "step": 14371 }, { "epoch": 0.26270861132944595, "grad_norm": 6.371259526902738, "learning_rate": 8.646046556730176e-06, "loss": 17.4361, "step": 14372 }, { "epoch": 0.2627268905258925, "grad_norm": 8.00752586799204, "learning_rate": 8.645843992394291e-06, "loss": 18.4024, "step": 14373 }, { "epoch": 0.262745169722339, "grad_norm": 7.283049339422857, "learning_rate": 8.64564141528004e-06, "loss": 17.7196, "step": 14374 }, { "epoch": 0.2627634489187855, "grad_norm": 5.903938845138504, "learning_rate": 8.64543882538813e-06, "loss": 17.3946, "step": 14375 }, { "epoch": 0.26278172811523204, "grad_norm": 6.541995076847163, "learning_rate": 8.645236222719275e-06, "loss": 17.7814, "step": 14376 }, { "epoch": 0.2628000073116786, "grad_norm": 6.989811084200486, "learning_rate": 8.645033607274183e-06, "loss": 17.7339, "step": 14377 }, { "epoch": 0.2628182865081251, "grad_norm": 6.963482701141724, "learning_rate": 8.644830979053565e-06, "loss": 17.754, "step": 14378 }, { "epoch": 0.26283656570457165, "grad_norm": 6.733724655273646, "learning_rate": 8.64462833805813e-06, "loss": 17.6504, "step": 14379 }, { "epoch": 0.2628548449010181, "grad_norm": 6.9312705576119145, "learning_rate": 8.644425684288589e-06, "loss": 17.6847, "step": 14380 }, { "epoch": 0.26287312409746466, "grad_norm": 5.788289883011871, "learning_rate": 8.644223017745653e-06, "loss": 17.2606, "step": 14381 }, { "epoch": 0.2628914032939112, "grad_norm": 6.175323501223622, "learning_rate": 8.644020338430029e-06, "loss": 17.3564, "step": 14382 }, { "epoch": 0.26290968249035773, "grad_norm": 8.410916425732914, "learning_rate": 8.643817646342433e-06, "loss": 18.51, "step": 14383 }, { "epoch": 0.26292796168680427, "grad_norm": 7.237520964244881, "learning_rate": 8.64361494148357e-06, "loss": 17.6957, "step": 14384 }, { "epoch": 0.26294624088325075, "grad_norm": 7.886911415886462, "learning_rate": 8.643412223854154e-06, "loss": 18.1002, "step": 14385 }, { "epoch": 0.2629645200796973, "grad_norm": 6.373927767746825, "learning_rate": 8.643209493454893e-06, "loss": 17.5343, "step": 14386 }, { "epoch": 0.2629827992761438, "grad_norm": 4.982759648815514, "learning_rate": 8.6430067502865e-06, "loss": 16.9397, "step": 14387 }, { "epoch": 0.26300107847259035, "grad_norm": 7.369552463687001, "learning_rate": 8.642803994349686e-06, "loss": 17.7397, "step": 14388 }, { "epoch": 0.2630193576690369, "grad_norm": 6.155446198720482, "learning_rate": 8.642601225645158e-06, "loss": 17.3574, "step": 14389 }, { "epoch": 0.26303763686548337, "grad_norm": 6.270228967039644, "learning_rate": 8.64239844417363e-06, "loss": 17.4627, "step": 14390 }, { "epoch": 0.2630559160619299, "grad_norm": 6.196351432520529, "learning_rate": 8.64219564993581e-06, "loss": 17.2828, "step": 14391 }, { "epoch": 0.26307419525837644, "grad_norm": 6.101617643826063, "learning_rate": 8.641992842932411e-06, "loss": 17.301, "step": 14392 }, { "epoch": 0.263092474454823, "grad_norm": 6.358708273663958, "learning_rate": 8.641790023164146e-06, "loss": 17.3201, "step": 14393 }, { "epoch": 0.2631107536512695, "grad_norm": 6.078181863374826, "learning_rate": 8.641587190631719e-06, "loss": 17.1216, "step": 14394 }, { "epoch": 0.263129032847716, "grad_norm": 6.656127913610376, "learning_rate": 8.641384345335845e-06, "loss": 17.4851, "step": 14395 }, { "epoch": 0.26314731204416253, "grad_norm": 6.16934047646913, "learning_rate": 8.641181487277237e-06, "loss": 17.3669, "step": 14396 }, { "epoch": 0.26316559124060906, "grad_norm": 7.6829119752010495, "learning_rate": 8.640978616456604e-06, "loss": 17.688, "step": 14397 }, { "epoch": 0.2631838704370556, "grad_norm": 7.996643508772909, "learning_rate": 8.640775732874655e-06, "loss": 17.9074, "step": 14398 }, { "epoch": 0.26320214963350214, "grad_norm": 5.867890861865463, "learning_rate": 8.640572836532104e-06, "loss": 17.182, "step": 14399 }, { "epoch": 0.2632204288299486, "grad_norm": 6.574988478382767, "learning_rate": 8.64036992742966e-06, "loss": 17.6623, "step": 14400 }, { "epoch": 0.26323870802639515, "grad_norm": 7.9735831120235074, "learning_rate": 8.640167005568036e-06, "loss": 17.795, "step": 14401 }, { "epoch": 0.2632569872228417, "grad_norm": 6.9479004744618775, "learning_rate": 8.639964070947944e-06, "loss": 17.6584, "step": 14402 }, { "epoch": 0.2632752664192882, "grad_norm": 6.30627480439822, "learning_rate": 8.639761123570093e-06, "loss": 17.3807, "step": 14403 }, { "epoch": 0.26329354561573476, "grad_norm": 6.508813470371485, "learning_rate": 8.639558163435195e-06, "loss": 17.6887, "step": 14404 }, { "epoch": 0.26331182481218124, "grad_norm": 6.034621266277597, "learning_rate": 8.63935519054396e-06, "loss": 17.3219, "step": 14405 }, { "epoch": 0.2633301040086278, "grad_norm": 7.550417473570721, "learning_rate": 8.639152204897103e-06, "loss": 17.4877, "step": 14406 }, { "epoch": 0.2633483832050743, "grad_norm": 5.8813861967069725, "learning_rate": 8.63894920649533e-06, "loss": 17.2511, "step": 14407 }, { "epoch": 0.26336666240152085, "grad_norm": 6.541356247451769, "learning_rate": 8.63874619533936e-06, "loss": 17.7163, "step": 14408 }, { "epoch": 0.2633849415979673, "grad_norm": 6.868842271992227, "learning_rate": 8.638543171429898e-06, "loss": 17.5807, "step": 14409 }, { "epoch": 0.26340322079441386, "grad_norm": 8.45474998240171, "learning_rate": 8.638340134767658e-06, "loss": 17.9626, "step": 14410 }, { "epoch": 0.2634214999908604, "grad_norm": 8.6414741204504, "learning_rate": 8.638137085353353e-06, "loss": 18.3836, "step": 14411 }, { "epoch": 0.26343977918730693, "grad_norm": 8.247130410778336, "learning_rate": 8.63793402318769e-06, "loss": 18.1161, "step": 14412 }, { "epoch": 0.26345805838375347, "grad_norm": 6.67902185171827, "learning_rate": 8.637730948271388e-06, "loss": 17.6532, "step": 14413 }, { "epoch": 0.26347633758019995, "grad_norm": 6.451326450170129, "learning_rate": 8.637527860605153e-06, "loss": 17.7221, "step": 14414 }, { "epoch": 0.2634946167766465, "grad_norm": 7.756004704377866, "learning_rate": 8.637324760189698e-06, "loss": 17.9408, "step": 14415 }, { "epoch": 0.263512895973093, "grad_norm": 7.634821663966823, "learning_rate": 8.637121647025734e-06, "loss": 18.153, "step": 14416 }, { "epoch": 0.26353117516953956, "grad_norm": 7.6466337157005295, "learning_rate": 8.636918521113978e-06, "loss": 18.325, "step": 14417 }, { "epoch": 0.2635494543659861, "grad_norm": 6.582028684780957, "learning_rate": 8.636715382455136e-06, "loss": 17.5692, "step": 14418 }, { "epoch": 0.26356773356243257, "grad_norm": 7.190494107280675, "learning_rate": 8.636512231049921e-06, "loss": 17.8639, "step": 14419 }, { "epoch": 0.2635860127588791, "grad_norm": 6.38178429188656, "learning_rate": 8.636309066899049e-06, "loss": 17.5957, "step": 14420 }, { "epoch": 0.26360429195532564, "grad_norm": 7.037021519255821, "learning_rate": 8.636105890003226e-06, "loss": 17.8983, "step": 14421 }, { "epoch": 0.2636225711517722, "grad_norm": 7.051985242002053, "learning_rate": 8.63590270036317e-06, "loss": 17.7895, "step": 14422 }, { "epoch": 0.2636408503482187, "grad_norm": 8.026578241369034, "learning_rate": 8.635699497979589e-06, "loss": 18.2436, "step": 14423 }, { "epoch": 0.2636591295446652, "grad_norm": 5.57639793423407, "learning_rate": 8.635496282853198e-06, "loss": 17.2686, "step": 14424 }, { "epoch": 0.26367740874111173, "grad_norm": 6.7780316664429385, "learning_rate": 8.635293054984708e-06, "loss": 17.6475, "step": 14425 }, { "epoch": 0.26369568793755827, "grad_norm": 5.959951157596639, "learning_rate": 8.63508981437483e-06, "loss": 17.4411, "step": 14426 }, { "epoch": 0.2637139671340048, "grad_norm": 7.292027672746734, "learning_rate": 8.634886561024278e-06, "loss": 17.8111, "step": 14427 }, { "epoch": 0.26373224633045134, "grad_norm": 6.441026208286518, "learning_rate": 8.634683294933764e-06, "loss": 17.4282, "step": 14428 }, { "epoch": 0.2637505255268978, "grad_norm": 8.410255753178866, "learning_rate": 8.634480016104e-06, "loss": 18.1782, "step": 14429 }, { "epoch": 0.26376880472334435, "grad_norm": 6.094913465751023, "learning_rate": 8.6342767245357e-06, "loss": 17.4715, "step": 14430 }, { "epoch": 0.2637870839197909, "grad_norm": 8.85569749760153, "learning_rate": 8.634073420229576e-06, "loss": 18.1532, "step": 14431 }, { "epoch": 0.2638053631162374, "grad_norm": 5.934754241360311, "learning_rate": 8.63387010318634e-06, "loss": 17.4521, "step": 14432 }, { "epoch": 0.26382364231268396, "grad_norm": 5.7609081418518935, "learning_rate": 8.633666773406703e-06, "loss": 17.3163, "step": 14433 }, { "epoch": 0.26384192150913044, "grad_norm": 7.808406130301736, "learning_rate": 8.63346343089138e-06, "loss": 17.7417, "step": 14434 }, { "epoch": 0.263860200705577, "grad_norm": 7.9984500516837596, "learning_rate": 8.633260075641084e-06, "loss": 17.859, "step": 14435 }, { "epoch": 0.2638784799020235, "grad_norm": 7.958538072619891, "learning_rate": 8.633056707656524e-06, "loss": 17.8181, "step": 14436 }, { "epoch": 0.26389675909847005, "grad_norm": 6.342604782252801, "learning_rate": 8.632853326938417e-06, "loss": 17.5564, "step": 14437 }, { "epoch": 0.2639150382949166, "grad_norm": 6.403760931250704, "learning_rate": 8.632649933487476e-06, "loss": 17.3521, "step": 14438 }, { "epoch": 0.26393331749136306, "grad_norm": 5.973544886025513, "learning_rate": 8.63244652730441e-06, "loss": 17.3415, "step": 14439 }, { "epoch": 0.2639515966878096, "grad_norm": 7.24917701930819, "learning_rate": 8.632243108389935e-06, "loss": 17.8393, "step": 14440 }, { "epoch": 0.26396987588425613, "grad_norm": 6.79616203564851, "learning_rate": 8.632039676744764e-06, "loss": 17.7178, "step": 14441 }, { "epoch": 0.26398815508070267, "grad_norm": 5.588646667658552, "learning_rate": 8.631836232369607e-06, "loss": 17.0608, "step": 14442 }, { "epoch": 0.26400643427714915, "grad_norm": 5.409842674096702, "learning_rate": 8.63163277526518e-06, "loss": 16.9792, "step": 14443 }, { "epoch": 0.2640247134735957, "grad_norm": 8.08944987169309, "learning_rate": 8.631429305432196e-06, "loss": 18.0805, "step": 14444 }, { "epoch": 0.2640429926700422, "grad_norm": 5.948482874138709, "learning_rate": 8.631225822871368e-06, "loss": 17.1899, "step": 14445 }, { "epoch": 0.26406127186648876, "grad_norm": 6.837103304402086, "learning_rate": 8.631022327583407e-06, "loss": 17.5914, "step": 14446 }, { "epoch": 0.2640795510629353, "grad_norm": 5.961297593954145, "learning_rate": 8.630818819569028e-06, "loss": 17.2807, "step": 14447 }, { "epoch": 0.26409783025938177, "grad_norm": 8.932231364435959, "learning_rate": 8.630615298828946e-06, "loss": 18.4577, "step": 14448 }, { "epoch": 0.2641161094558283, "grad_norm": 7.367569768686041, "learning_rate": 8.63041176536387e-06, "loss": 17.9257, "step": 14449 }, { "epoch": 0.26413438865227484, "grad_norm": 7.204328541088552, "learning_rate": 8.630208219174516e-06, "loss": 17.8198, "step": 14450 }, { "epoch": 0.2641526678487214, "grad_norm": 6.21378854137978, "learning_rate": 8.630004660261598e-06, "loss": 17.2751, "step": 14451 }, { "epoch": 0.2641709470451679, "grad_norm": 6.513665379950189, "learning_rate": 8.629801088625829e-06, "loss": 17.2317, "step": 14452 }, { "epoch": 0.2641892262416144, "grad_norm": 7.386715292027365, "learning_rate": 8.629597504267922e-06, "loss": 17.7965, "step": 14453 }, { "epoch": 0.26420750543806093, "grad_norm": 7.460690865408325, "learning_rate": 8.62939390718859e-06, "loss": 17.8174, "step": 14454 }, { "epoch": 0.26422578463450747, "grad_norm": 7.629898885971991, "learning_rate": 8.629190297388548e-06, "loss": 18.054, "step": 14455 }, { "epoch": 0.264244063830954, "grad_norm": 8.84710712682071, "learning_rate": 8.62898667486851e-06, "loss": 17.9528, "step": 14456 }, { "epoch": 0.26426234302740054, "grad_norm": 6.6735793700097235, "learning_rate": 8.628783039629185e-06, "loss": 17.5621, "step": 14457 }, { "epoch": 0.264280622223847, "grad_norm": 6.3392768492060725, "learning_rate": 8.628579391671294e-06, "loss": 17.2754, "step": 14458 }, { "epoch": 0.26429890142029355, "grad_norm": 6.123077425313889, "learning_rate": 8.628375730995546e-06, "loss": 17.5178, "step": 14459 }, { "epoch": 0.2643171806167401, "grad_norm": 6.035249372008523, "learning_rate": 8.628172057602655e-06, "loss": 17.315, "step": 14460 }, { "epoch": 0.2643354598131866, "grad_norm": 6.471941189761329, "learning_rate": 8.627968371493337e-06, "loss": 17.3207, "step": 14461 }, { "epoch": 0.26435373900963316, "grad_norm": 6.960751916740965, "learning_rate": 8.627764672668303e-06, "loss": 17.8303, "step": 14462 }, { "epoch": 0.26437201820607964, "grad_norm": 7.260904605695864, "learning_rate": 8.62756096112827e-06, "loss": 17.9214, "step": 14463 }, { "epoch": 0.2643902974025262, "grad_norm": 7.283451103095669, "learning_rate": 8.62735723687395e-06, "loss": 17.9416, "step": 14464 }, { "epoch": 0.2644085765989727, "grad_norm": 8.309014198787658, "learning_rate": 8.627153499906058e-06, "loss": 18.113, "step": 14465 }, { "epoch": 0.26442685579541925, "grad_norm": 7.576112195195461, "learning_rate": 8.626949750225309e-06, "loss": 18.0264, "step": 14466 }, { "epoch": 0.2644451349918658, "grad_norm": 10.809108033541476, "learning_rate": 8.626745987832415e-06, "loss": 18.7501, "step": 14467 }, { "epoch": 0.26446341418831226, "grad_norm": 7.762386969248576, "learning_rate": 8.626542212728089e-06, "loss": 17.9793, "step": 14468 }, { "epoch": 0.2644816933847588, "grad_norm": 6.39395296041559, "learning_rate": 8.62633842491305e-06, "loss": 17.3432, "step": 14469 }, { "epoch": 0.26449997258120533, "grad_norm": 7.274541419351831, "learning_rate": 8.626134624388008e-06, "loss": 17.459, "step": 14470 }, { "epoch": 0.26451825177765187, "grad_norm": 8.082208749520747, "learning_rate": 8.625930811153679e-06, "loss": 18.1278, "step": 14471 }, { "epoch": 0.2645365309740984, "grad_norm": 7.935183376958134, "learning_rate": 8.625726985210778e-06, "loss": 17.8109, "step": 14472 }, { "epoch": 0.2645548101705449, "grad_norm": 6.301684228265136, "learning_rate": 8.625523146560017e-06, "loss": 17.5847, "step": 14473 }, { "epoch": 0.2645730893669914, "grad_norm": 7.030397305329106, "learning_rate": 8.625319295202113e-06, "loss": 17.9024, "step": 14474 }, { "epoch": 0.26459136856343796, "grad_norm": 6.374695690179215, "learning_rate": 8.625115431137779e-06, "loss": 17.9539, "step": 14475 }, { "epoch": 0.2646096477598845, "grad_norm": 6.083516775600971, "learning_rate": 8.62491155436773e-06, "loss": 17.3094, "step": 14476 }, { "epoch": 0.26462792695633097, "grad_norm": 6.625890657782554, "learning_rate": 8.624707664892683e-06, "loss": 17.8002, "step": 14477 }, { "epoch": 0.2646462061527775, "grad_norm": 5.524092202770874, "learning_rate": 8.624503762713347e-06, "loss": 17.1825, "step": 14478 }, { "epoch": 0.26466448534922404, "grad_norm": 6.2277588844392024, "learning_rate": 8.624299847830441e-06, "loss": 17.3205, "step": 14479 }, { "epoch": 0.2646827645456706, "grad_norm": 5.560057789845338, "learning_rate": 8.624095920244677e-06, "loss": 16.9911, "step": 14480 }, { "epoch": 0.2647010437421171, "grad_norm": 7.52043293493985, "learning_rate": 8.623891979956773e-06, "loss": 18.0707, "step": 14481 }, { "epoch": 0.2647193229385636, "grad_norm": 6.405890877516015, "learning_rate": 8.623688026967443e-06, "loss": 17.3705, "step": 14482 }, { "epoch": 0.26473760213501013, "grad_norm": 5.585600076849905, "learning_rate": 8.623484061277398e-06, "loss": 17.0289, "step": 14483 }, { "epoch": 0.26475588133145667, "grad_norm": 7.335419385052503, "learning_rate": 8.623280082887357e-06, "loss": 18.0216, "step": 14484 }, { "epoch": 0.2647741605279032, "grad_norm": 6.6583291029687715, "learning_rate": 8.623076091798036e-06, "loss": 17.7233, "step": 14485 }, { "epoch": 0.26479243972434974, "grad_norm": 5.613739837826254, "learning_rate": 8.622872088010145e-06, "loss": 17.111, "step": 14486 }, { "epoch": 0.2648107189207962, "grad_norm": 6.1139705866781835, "learning_rate": 8.622668071524403e-06, "loss": 17.5578, "step": 14487 }, { "epoch": 0.26482899811724275, "grad_norm": 8.973784823611156, "learning_rate": 8.622464042341523e-06, "loss": 18.1655, "step": 14488 }, { "epoch": 0.2648472773136893, "grad_norm": 6.036535448526162, "learning_rate": 8.622260000462222e-06, "loss": 17.6154, "step": 14489 }, { "epoch": 0.2648655565101358, "grad_norm": 8.662657752172933, "learning_rate": 8.622055945887213e-06, "loss": 17.9798, "step": 14490 }, { "epoch": 0.26488383570658236, "grad_norm": 6.563700174415634, "learning_rate": 8.621851878617212e-06, "loss": 17.4027, "step": 14491 }, { "epoch": 0.26490211490302884, "grad_norm": 7.4564540130171215, "learning_rate": 8.621647798652935e-06, "loss": 18.0004, "step": 14492 }, { "epoch": 0.2649203940994754, "grad_norm": 6.749344302101685, "learning_rate": 8.621443705995097e-06, "loss": 17.5439, "step": 14493 }, { "epoch": 0.2649386732959219, "grad_norm": 5.882199541576623, "learning_rate": 8.621239600644414e-06, "loss": 17.3339, "step": 14494 }, { "epoch": 0.26495695249236845, "grad_norm": 6.383130482277111, "learning_rate": 8.621035482601599e-06, "loss": 17.7137, "step": 14495 }, { "epoch": 0.264975231688815, "grad_norm": 7.484397129034903, "learning_rate": 8.620831351867368e-06, "loss": 18.2378, "step": 14496 }, { "epoch": 0.26499351088526146, "grad_norm": 5.502308298688108, "learning_rate": 8.620627208442439e-06, "loss": 17.0519, "step": 14497 }, { "epoch": 0.265011790081708, "grad_norm": 6.728858981392801, "learning_rate": 8.620423052327525e-06, "loss": 17.559, "step": 14498 }, { "epoch": 0.26503006927815453, "grad_norm": 6.985670955971041, "learning_rate": 8.620218883523342e-06, "loss": 17.6186, "step": 14499 }, { "epoch": 0.26504834847460107, "grad_norm": 6.758263002532076, "learning_rate": 8.620014702030607e-06, "loss": 17.909, "step": 14500 }, { "epoch": 0.2650666276710476, "grad_norm": 7.034051090086027, "learning_rate": 8.619810507850034e-06, "loss": 18.0054, "step": 14501 }, { "epoch": 0.2650849068674941, "grad_norm": 6.88773969867417, "learning_rate": 8.619606300982339e-06, "loss": 17.8061, "step": 14502 }, { "epoch": 0.2651031860639406, "grad_norm": 7.212959710152913, "learning_rate": 8.619402081428238e-06, "loss": 17.6886, "step": 14503 }, { "epoch": 0.26512146526038716, "grad_norm": 5.972178634538793, "learning_rate": 8.619197849188447e-06, "loss": 17.1257, "step": 14504 }, { "epoch": 0.2651397444568337, "grad_norm": 6.370199677002604, "learning_rate": 8.618993604263683e-06, "loss": 17.6626, "step": 14505 }, { "epoch": 0.26515802365328023, "grad_norm": 7.295011005090328, "learning_rate": 8.618789346654659e-06, "loss": 18.0519, "step": 14506 }, { "epoch": 0.2651763028497267, "grad_norm": 6.9344097714109365, "learning_rate": 8.618585076362092e-06, "loss": 17.8175, "step": 14507 }, { "epoch": 0.26519458204617324, "grad_norm": 6.1592932685006305, "learning_rate": 8.618380793386699e-06, "loss": 17.4501, "step": 14508 }, { "epoch": 0.2652128612426198, "grad_norm": 5.916675937894433, "learning_rate": 8.618176497729197e-06, "loss": 17.4744, "step": 14509 }, { "epoch": 0.2652311404390663, "grad_norm": 7.562135774921811, "learning_rate": 8.617972189390297e-06, "loss": 17.6995, "step": 14510 }, { "epoch": 0.2652494196355128, "grad_norm": 6.258737997475496, "learning_rate": 8.61776786837072e-06, "loss": 17.5063, "step": 14511 }, { "epoch": 0.26526769883195933, "grad_norm": 5.661577547064191, "learning_rate": 8.61756353467118e-06, "loss": 17.2176, "step": 14512 }, { "epoch": 0.26528597802840587, "grad_norm": 7.15404900491835, "learning_rate": 8.617359188292395e-06, "loss": 17.4497, "step": 14513 }, { "epoch": 0.2653042572248524, "grad_norm": 8.59396164793559, "learning_rate": 8.61715482923508e-06, "loss": 18.2819, "step": 14514 }, { "epoch": 0.26532253642129894, "grad_norm": 7.052255945748163, "learning_rate": 8.61695045749995e-06, "loss": 17.7761, "step": 14515 }, { "epoch": 0.2653408156177454, "grad_norm": 6.875029062853168, "learning_rate": 8.616746073087723e-06, "loss": 17.636, "step": 14516 }, { "epoch": 0.26535909481419195, "grad_norm": 6.914797687086098, "learning_rate": 8.616541675999114e-06, "loss": 17.8272, "step": 14517 }, { "epoch": 0.2653773740106385, "grad_norm": 7.397885008124451, "learning_rate": 8.616337266234841e-06, "loss": 18.0082, "step": 14518 }, { "epoch": 0.265395653207085, "grad_norm": 7.338120618208661, "learning_rate": 8.616132843795619e-06, "loss": 17.4697, "step": 14519 }, { "epoch": 0.26541393240353156, "grad_norm": 6.894638124573064, "learning_rate": 8.615928408682167e-06, "loss": 17.6686, "step": 14520 }, { "epoch": 0.26543221159997804, "grad_norm": 6.8820780687649785, "learning_rate": 8.615723960895198e-06, "loss": 17.5165, "step": 14521 }, { "epoch": 0.2654504907964246, "grad_norm": 9.896950902426656, "learning_rate": 8.615519500435432e-06, "loss": 17.7061, "step": 14522 }, { "epoch": 0.2654687699928711, "grad_norm": 6.559777394456265, "learning_rate": 8.615315027303582e-06, "loss": 17.9128, "step": 14523 }, { "epoch": 0.26548704918931765, "grad_norm": 6.956146471135961, "learning_rate": 8.615110541500366e-06, "loss": 17.4917, "step": 14524 }, { "epoch": 0.2655053283857642, "grad_norm": 7.823582414680977, "learning_rate": 8.614906043026503e-06, "loss": 18.1326, "step": 14525 }, { "epoch": 0.26552360758221066, "grad_norm": 7.476708248993483, "learning_rate": 8.614701531882708e-06, "loss": 17.8015, "step": 14526 }, { "epoch": 0.2655418867786572, "grad_norm": 7.054524244592589, "learning_rate": 8.614497008069697e-06, "loss": 17.6633, "step": 14527 }, { "epoch": 0.26556016597510373, "grad_norm": 6.199399833770583, "learning_rate": 8.614292471588188e-06, "loss": 17.1404, "step": 14528 }, { "epoch": 0.26557844517155027, "grad_norm": 5.429267886410322, "learning_rate": 8.614087922438899e-06, "loss": 16.9066, "step": 14529 }, { "epoch": 0.2655967243679968, "grad_norm": 6.745613806224077, "learning_rate": 8.613883360622543e-06, "loss": 17.7029, "step": 14530 }, { "epoch": 0.2656150035644433, "grad_norm": 7.164765055197569, "learning_rate": 8.61367878613984e-06, "loss": 17.398, "step": 14531 }, { "epoch": 0.2656332827608898, "grad_norm": 7.451254490299786, "learning_rate": 8.613474198991508e-06, "loss": 17.8344, "step": 14532 }, { "epoch": 0.26565156195733636, "grad_norm": 6.210960468432399, "learning_rate": 8.61326959917826e-06, "loss": 17.331, "step": 14533 }, { "epoch": 0.2656698411537829, "grad_norm": 6.761316470227943, "learning_rate": 8.613064986700817e-06, "loss": 17.7626, "step": 14534 }, { "epoch": 0.26568812035022943, "grad_norm": 7.566935423976791, "learning_rate": 8.612860361559895e-06, "loss": 17.8178, "step": 14535 }, { "epoch": 0.2657063995466759, "grad_norm": 8.833309756272051, "learning_rate": 8.61265572375621e-06, "loss": 18.8689, "step": 14536 }, { "epoch": 0.26572467874312244, "grad_norm": 6.8978068956033365, "learning_rate": 8.612451073290481e-06, "loss": 17.5856, "step": 14537 }, { "epoch": 0.265742957939569, "grad_norm": 6.444851151410214, "learning_rate": 8.612246410163426e-06, "loss": 17.4258, "step": 14538 }, { "epoch": 0.2657612371360155, "grad_norm": 6.46776566739074, "learning_rate": 8.612041734375758e-06, "loss": 17.569, "step": 14539 }, { "epoch": 0.26577951633246205, "grad_norm": 6.977395888144881, "learning_rate": 8.611837045928199e-06, "loss": 17.8813, "step": 14540 }, { "epoch": 0.26579779552890853, "grad_norm": 6.410358467372472, "learning_rate": 8.611632344821463e-06, "loss": 17.4866, "step": 14541 }, { "epoch": 0.26581607472535507, "grad_norm": 7.950135684060448, "learning_rate": 8.61142763105627e-06, "loss": 18.0721, "step": 14542 }, { "epoch": 0.2658343539218016, "grad_norm": 6.61809402317436, "learning_rate": 8.611222904633336e-06, "loss": 17.6161, "step": 14543 }, { "epoch": 0.26585263311824814, "grad_norm": 7.336321824901097, "learning_rate": 8.61101816555338e-06, "loss": 17.8365, "step": 14544 }, { "epoch": 0.2658709123146946, "grad_norm": 6.737137848233468, "learning_rate": 8.61081341381712e-06, "loss": 17.5722, "step": 14545 }, { "epoch": 0.26588919151114115, "grad_norm": 7.192786767385998, "learning_rate": 8.610608649425269e-06, "loss": 17.6626, "step": 14546 }, { "epoch": 0.2659074707075877, "grad_norm": 6.510261348040814, "learning_rate": 8.61040387237855e-06, "loss": 17.5307, "step": 14547 }, { "epoch": 0.2659257499040342, "grad_norm": 6.578354409706376, "learning_rate": 8.610199082677676e-06, "loss": 17.2704, "step": 14548 }, { "epoch": 0.26594402910048076, "grad_norm": 6.025007721595394, "learning_rate": 8.609994280323371e-06, "loss": 17.1077, "step": 14549 }, { "epoch": 0.26596230829692724, "grad_norm": 5.938124798069925, "learning_rate": 8.609789465316349e-06, "loss": 17.3366, "step": 14550 }, { "epoch": 0.2659805874933738, "grad_norm": 7.9793418290082325, "learning_rate": 8.609584637657327e-06, "loss": 17.2621, "step": 14551 }, { "epoch": 0.2659988666898203, "grad_norm": 7.599806684319761, "learning_rate": 8.609379797347025e-06, "loss": 18.1463, "step": 14552 }, { "epoch": 0.26601714588626685, "grad_norm": 6.192626430350422, "learning_rate": 8.609174944386158e-06, "loss": 17.5365, "step": 14553 }, { "epoch": 0.2660354250827134, "grad_norm": 7.024956925027557, "learning_rate": 8.608970078775448e-06, "loss": 17.8295, "step": 14554 }, { "epoch": 0.26605370427915986, "grad_norm": 7.1205064808478475, "learning_rate": 8.60876520051561e-06, "loss": 17.9813, "step": 14555 }, { "epoch": 0.2660719834756064, "grad_norm": 6.742292643754117, "learning_rate": 8.608560309607364e-06, "loss": 17.7733, "step": 14556 }, { "epoch": 0.26609026267205294, "grad_norm": 6.602844337637779, "learning_rate": 8.608355406051426e-06, "loss": 17.6018, "step": 14557 }, { "epoch": 0.26610854186849947, "grad_norm": 6.674092450200186, "learning_rate": 8.608150489848516e-06, "loss": 17.5333, "step": 14558 }, { "epoch": 0.266126821064946, "grad_norm": 6.330228946805338, "learning_rate": 8.607945560999351e-06, "loss": 17.2829, "step": 14559 }, { "epoch": 0.2661451002613925, "grad_norm": 5.141724277165614, "learning_rate": 8.607740619504651e-06, "loss": 16.9891, "step": 14560 }, { "epoch": 0.266163379457839, "grad_norm": 6.274518320128126, "learning_rate": 8.607535665365133e-06, "loss": 17.0762, "step": 14561 }, { "epoch": 0.26618165865428556, "grad_norm": 7.271786139082388, "learning_rate": 8.607330698581516e-06, "loss": 17.8042, "step": 14562 }, { "epoch": 0.2661999378507321, "grad_norm": 6.7092664226247525, "learning_rate": 8.607125719154517e-06, "loss": 17.6421, "step": 14563 }, { "epoch": 0.26621821704717863, "grad_norm": 7.780958231915097, "learning_rate": 8.606920727084856e-06, "loss": 17.9761, "step": 14564 }, { "epoch": 0.2662364962436251, "grad_norm": 10.250632342629785, "learning_rate": 8.60671572237325e-06, "loss": 18.1369, "step": 14565 }, { "epoch": 0.26625477544007164, "grad_norm": 5.938952848297895, "learning_rate": 8.606510705020418e-06, "loss": 17.3673, "step": 14566 }, { "epoch": 0.2662730546365182, "grad_norm": 6.644777949786896, "learning_rate": 8.606305675027081e-06, "loss": 17.4135, "step": 14567 }, { "epoch": 0.2662913338329647, "grad_norm": 6.9680846421093525, "learning_rate": 8.606100632393955e-06, "loss": 17.6119, "step": 14568 }, { "epoch": 0.26630961302941125, "grad_norm": 7.228445164925551, "learning_rate": 8.605895577121756e-06, "loss": 17.6872, "step": 14569 }, { "epoch": 0.26632789222585773, "grad_norm": 6.793796231481781, "learning_rate": 8.60569050921121e-06, "loss": 17.6463, "step": 14570 }, { "epoch": 0.26634617142230427, "grad_norm": 8.184261291908337, "learning_rate": 8.60548542866303e-06, "loss": 18.0862, "step": 14571 }, { "epoch": 0.2663644506187508, "grad_norm": 6.669093698274262, "learning_rate": 8.605280335477937e-06, "loss": 17.3625, "step": 14572 }, { "epoch": 0.26638272981519734, "grad_norm": 8.022540232457578, "learning_rate": 8.60507522965665e-06, "loss": 17.8668, "step": 14573 }, { "epoch": 0.2664010090116439, "grad_norm": 6.359887614228244, "learning_rate": 8.604870111199884e-06, "loss": 17.3532, "step": 14574 }, { "epoch": 0.26641928820809035, "grad_norm": 6.00556214434223, "learning_rate": 8.604664980108363e-06, "loss": 17.2011, "step": 14575 }, { "epoch": 0.2664375674045369, "grad_norm": 8.240097576013113, "learning_rate": 8.604459836382805e-06, "loss": 18.0064, "step": 14576 }, { "epoch": 0.2664558466009834, "grad_norm": 7.621136000245503, "learning_rate": 8.604254680023926e-06, "loss": 18.0655, "step": 14577 }, { "epoch": 0.26647412579742996, "grad_norm": 7.241932252630637, "learning_rate": 8.604049511032448e-06, "loss": 17.7946, "step": 14578 }, { "epoch": 0.26649240499387644, "grad_norm": 7.38691990934202, "learning_rate": 8.60384432940909e-06, "loss": 18.3702, "step": 14579 }, { "epoch": 0.266510684190323, "grad_norm": 5.840428221561425, "learning_rate": 8.603639135154571e-06, "loss": 17.2816, "step": 14580 }, { "epoch": 0.2665289633867695, "grad_norm": 5.866197550155619, "learning_rate": 8.603433928269607e-06, "loss": 17.1281, "step": 14581 }, { "epoch": 0.26654724258321605, "grad_norm": 5.739027440440411, "learning_rate": 8.603228708754923e-06, "loss": 17.2506, "step": 14582 }, { "epoch": 0.2665655217796626, "grad_norm": 7.235600044854483, "learning_rate": 8.603023476611231e-06, "loss": 18.0437, "step": 14583 }, { "epoch": 0.26658380097610906, "grad_norm": 5.410088168252828, "learning_rate": 8.602818231839258e-06, "loss": 17.0568, "step": 14584 }, { "epoch": 0.2666020801725556, "grad_norm": 5.259849600457909, "learning_rate": 8.602612974439719e-06, "loss": 17.0523, "step": 14585 }, { "epoch": 0.26662035936900214, "grad_norm": 7.771111353775161, "learning_rate": 8.602407704413333e-06, "loss": 18.318, "step": 14586 }, { "epoch": 0.26663863856544867, "grad_norm": 6.016256629883184, "learning_rate": 8.602202421760821e-06, "loss": 17.3069, "step": 14587 }, { "epoch": 0.2666569177618952, "grad_norm": 8.535257209161756, "learning_rate": 8.601997126482902e-06, "loss": 18.725, "step": 14588 }, { "epoch": 0.2666751969583417, "grad_norm": 6.675129458470847, "learning_rate": 8.601791818580296e-06, "loss": 17.8127, "step": 14589 }, { "epoch": 0.2666934761547882, "grad_norm": 7.876609683187753, "learning_rate": 8.60158649805372e-06, "loss": 18.1652, "step": 14590 }, { "epoch": 0.26671175535123476, "grad_norm": 17.921230096264836, "learning_rate": 8.6013811649039e-06, "loss": 17.9632, "step": 14591 }, { "epoch": 0.2667300345476813, "grad_norm": 6.598397901157439, "learning_rate": 8.601175819131548e-06, "loss": 17.505, "step": 14592 }, { "epoch": 0.26674831374412783, "grad_norm": 7.095345991456853, "learning_rate": 8.60097046073739e-06, "loss": 18.1829, "step": 14593 }, { "epoch": 0.2667665929405743, "grad_norm": 9.159927122606996, "learning_rate": 8.600765089722141e-06, "loss": 17.9415, "step": 14594 }, { "epoch": 0.26678487213702085, "grad_norm": 6.433372609963939, "learning_rate": 8.600559706086524e-06, "loss": 17.3582, "step": 14595 }, { "epoch": 0.2668031513334674, "grad_norm": 6.477033924801939, "learning_rate": 8.600354309831256e-06, "loss": 17.3693, "step": 14596 }, { "epoch": 0.2668214305299139, "grad_norm": 7.83575756565574, "learning_rate": 8.60014890095706e-06, "loss": 18.4398, "step": 14597 }, { "epoch": 0.26683970972636045, "grad_norm": 7.317859995714428, "learning_rate": 8.599943479464655e-06, "loss": 17.9391, "step": 14598 }, { "epoch": 0.26685798892280693, "grad_norm": 6.995436993292523, "learning_rate": 8.59973804535476e-06, "loss": 17.8577, "step": 14599 }, { "epoch": 0.26687626811925347, "grad_norm": 5.771502054950156, "learning_rate": 8.599532598628095e-06, "loss": 17.3081, "step": 14600 }, { "epoch": 0.2668945473157, "grad_norm": 6.9322850295424105, "learning_rate": 8.599327139285381e-06, "loss": 17.506, "step": 14601 }, { "epoch": 0.26691282651214654, "grad_norm": 7.608625176015776, "learning_rate": 8.599121667327336e-06, "loss": 17.9819, "step": 14602 }, { "epoch": 0.2669311057085931, "grad_norm": 6.300309634688053, "learning_rate": 8.598916182754684e-06, "loss": 17.4234, "step": 14603 }, { "epoch": 0.26694938490503956, "grad_norm": 6.457807239142948, "learning_rate": 8.598710685568144e-06, "loss": 17.3832, "step": 14604 }, { "epoch": 0.2669676641014861, "grad_norm": 6.317312787942724, "learning_rate": 8.598505175768433e-06, "loss": 17.5229, "step": 14605 }, { "epoch": 0.2669859432979326, "grad_norm": 6.8525647886978245, "learning_rate": 8.598299653356275e-06, "loss": 17.914, "step": 14606 }, { "epoch": 0.26700422249437916, "grad_norm": 6.08648184309682, "learning_rate": 8.598094118332387e-06, "loss": 17.3699, "step": 14607 }, { "epoch": 0.2670225016908257, "grad_norm": 7.168418025498164, "learning_rate": 8.597888570697493e-06, "loss": 17.8875, "step": 14608 }, { "epoch": 0.2670407808872722, "grad_norm": 6.978072260586743, "learning_rate": 8.597683010452313e-06, "loss": 17.8669, "step": 14609 }, { "epoch": 0.2670590600837187, "grad_norm": 5.68928948623838, "learning_rate": 8.597477437597564e-06, "loss": 17.4055, "step": 14610 }, { "epoch": 0.26707733928016525, "grad_norm": 6.7831148775858034, "learning_rate": 8.59727185213397e-06, "loss": 17.8555, "step": 14611 }, { "epoch": 0.2670956184766118, "grad_norm": 6.9694905142115395, "learning_rate": 8.59706625406225e-06, "loss": 17.8434, "step": 14612 }, { "epoch": 0.26711389767305826, "grad_norm": 6.878696532269876, "learning_rate": 8.596860643383124e-06, "loss": 17.6899, "step": 14613 }, { "epoch": 0.2671321768695048, "grad_norm": 5.971757207828341, "learning_rate": 8.596655020097314e-06, "loss": 17.3628, "step": 14614 }, { "epoch": 0.26715045606595134, "grad_norm": 5.842889536628196, "learning_rate": 8.596449384205541e-06, "loss": 17.2973, "step": 14615 }, { "epoch": 0.26716873526239787, "grad_norm": 7.238832715010462, "learning_rate": 8.596243735708525e-06, "loss": 17.9668, "step": 14616 }, { "epoch": 0.2671870144588444, "grad_norm": 7.1924676079170276, "learning_rate": 8.596038074606987e-06, "loss": 17.6888, "step": 14617 }, { "epoch": 0.2672052936552909, "grad_norm": 6.889844749206356, "learning_rate": 8.595832400901645e-06, "loss": 17.5642, "step": 14618 }, { "epoch": 0.2672235728517374, "grad_norm": 7.144385758465032, "learning_rate": 8.595626714593226e-06, "loss": 17.5755, "step": 14619 }, { "epoch": 0.26724185204818396, "grad_norm": 7.062425882232001, "learning_rate": 8.595421015682446e-06, "loss": 17.8704, "step": 14620 }, { "epoch": 0.2672601312446305, "grad_norm": 6.977461440582134, "learning_rate": 8.595215304170026e-06, "loss": 17.7499, "step": 14621 }, { "epoch": 0.26727841044107703, "grad_norm": 7.886577020516829, "learning_rate": 8.59500958005669e-06, "loss": 17.8037, "step": 14622 }, { "epoch": 0.2672966896375235, "grad_norm": 6.188005188647903, "learning_rate": 8.594803843343158e-06, "loss": 17.3182, "step": 14623 }, { "epoch": 0.26731496883397005, "grad_norm": 7.089846529298709, "learning_rate": 8.59459809403015e-06, "loss": 17.7172, "step": 14624 }, { "epoch": 0.2673332480304166, "grad_norm": 8.26504148083267, "learning_rate": 8.594392332118385e-06, "loss": 18.5234, "step": 14625 }, { "epoch": 0.2673515272268631, "grad_norm": 6.886041654859538, "learning_rate": 8.594186557608587e-06, "loss": 17.6315, "step": 14626 }, { "epoch": 0.26736980642330965, "grad_norm": 8.288044682399347, "learning_rate": 8.59398077050148e-06, "loss": 17.97, "step": 14627 }, { "epoch": 0.26738808561975613, "grad_norm": 7.169456483789491, "learning_rate": 8.59377497079778e-06, "loss": 18.0894, "step": 14628 }, { "epoch": 0.26740636481620267, "grad_norm": 5.904160673266783, "learning_rate": 8.593569158498213e-06, "loss": 17.3124, "step": 14629 }, { "epoch": 0.2674246440126492, "grad_norm": 6.192390315062664, "learning_rate": 8.593363333603498e-06, "loss": 17.4309, "step": 14630 }, { "epoch": 0.26744292320909574, "grad_norm": 6.815067021765786, "learning_rate": 8.593157496114353e-06, "loss": 17.8669, "step": 14631 }, { "epoch": 0.2674612024055423, "grad_norm": 9.663353977931491, "learning_rate": 8.592951646031504e-06, "loss": 18.0593, "step": 14632 }, { "epoch": 0.26747948160198876, "grad_norm": 7.027431175448057, "learning_rate": 8.592745783355673e-06, "loss": 17.511, "step": 14633 }, { "epoch": 0.2674977607984353, "grad_norm": 7.165705104412249, "learning_rate": 8.592539908087578e-06, "loss": 17.9487, "step": 14634 }, { "epoch": 0.2675160399948818, "grad_norm": 7.1448749636832725, "learning_rate": 8.592334020227943e-06, "loss": 17.998, "step": 14635 }, { "epoch": 0.26753431919132836, "grad_norm": 7.8146278493319, "learning_rate": 8.59212811977749e-06, "loss": 17.9695, "step": 14636 }, { "epoch": 0.2675525983877749, "grad_norm": 5.7202952675042384, "learning_rate": 8.591922206736937e-06, "loss": 17.4506, "step": 14637 }, { "epoch": 0.2675708775842214, "grad_norm": 6.279828032073376, "learning_rate": 8.591716281107013e-06, "loss": 17.7869, "step": 14638 }, { "epoch": 0.2675891567806679, "grad_norm": 5.402484171696131, "learning_rate": 8.59151034288843e-06, "loss": 17.0872, "step": 14639 }, { "epoch": 0.26760743597711445, "grad_norm": 6.172026518418237, "learning_rate": 8.591304392081917e-06, "loss": 17.471, "step": 14640 }, { "epoch": 0.267625715173561, "grad_norm": 6.823439932628753, "learning_rate": 8.591098428688194e-06, "loss": 18.1081, "step": 14641 }, { "epoch": 0.2676439943700075, "grad_norm": 6.5398142477364924, "learning_rate": 8.590892452707983e-06, "loss": 17.6611, "step": 14642 }, { "epoch": 0.267662273566454, "grad_norm": 5.766165614977785, "learning_rate": 8.590686464142006e-06, "loss": 17.4685, "step": 14643 }, { "epoch": 0.26768055276290054, "grad_norm": 6.625144022004418, "learning_rate": 8.590480462990983e-06, "loss": 18.028, "step": 14644 }, { "epoch": 0.26769883195934707, "grad_norm": 6.958001094920115, "learning_rate": 8.590274449255638e-06, "loss": 17.6994, "step": 14645 }, { "epoch": 0.2677171111557936, "grad_norm": 6.206127260729637, "learning_rate": 8.590068422936693e-06, "loss": 17.5663, "step": 14646 }, { "epoch": 0.2677353903522401, "grad_norm": 5.524311365369359, "learning_rate": 8.589862384034869e-06, "loss": 17.2548, "step": 14647 }, { "epoch": 0.2677536695486866, "grad_norm": 6.794287230566698, "learning_rate": 8.589656332550888e-06, "loss": 17.6116, "step": 14648 }, { "epoch": 0.26777194874513316, "grad_norm": 6.011601703938022, "learning_rate": 8.589450268485475e-06, "loss": 17.4124, "step": 14649 }, { "epoch": 0.2677902279415797, "grad_norm": 7.3348265994159, "learning_rate": 8.58924419183935e-06, "loss": 18.0039, "step": 14650 }, { "epoch": 0.26780850713802623, "grad_norm": 6.469191733093986, "learning_rate": 8.589038102613235e-06, "loss": 17.4543, "step": 14651 }, { "epoch": 0.2678267863344727, "grad_norm": 6.734930912931436, "learning_rate": 8.588832000807852e-06, "loss": 18.0038, "step": 14652 }, { "epoch": 0.26784506553091925, "grad_norm": 6.841974117358319, "learning_rate": 8.588625886423926e-06, "loss": 17.8994, "step": 14653 }, { "epoch": 0.2678633447273658, "grad_norm": 6.354695599013013, "learning_rate": 8.588419759462176e-06, "loss": 17.1604, "step": 14654 }, { "epoch": 0.2678816239238123, "grad_norm": 6.437731998455454, "learning_rate": 8.588213619923328e-06, "loss": 17.7682, "step": 14655 }, { "epoch": 0.26789990312025885, "grad_norm": 6.279993746879131, "learning_rate": 8.588007467808101e-06, "loss": 17.2485, "step": 14656 }, { "epoch": 0.26791818231670533, "grad_norm": 8.061355332527341, "learning_rate": 8.58780130311722e-06, "loss": 18.0273, "step": 14657 }, { "epoch": 0.26793646151315187, "grad_norm": 5.96166967807284, "learning_rate": 8.587595125851407e-06, "loss": 17.2501, "step": 14658 }, { "epoch": 0.2679547407095984, "grad_norm": 7.3753159021860055, "learning_rate": 8.587388936011386e-06, "loss": 18.1227, "step": 14659 }, { "epoch": 0.26797301990604494, "grad_norm": 6.456202514061895, "learning_rate": 8.587182733597874e-06, "loss": 17.478, "step": 14660 }, { "epoch": 0.2679912991024915, "grad_norm": 6.459535256272963, "learning_rate": 8.5869765186116e-06, "loss": 17.5381, "step": 14661 }, { "epoch": 0.26800957829893796, "grad_norm": 5.474720377856238, "learning_rate": 8.586770291053286e-06, "loss": 17.1773, "step": 14662 }, { "epoch": 0.2680278574953845, "grad_norm": 6.648757690869516, "learning_rate": 8.586564050923651e-06, "loss": 17.7136, "step": 14663 }, { "epoch": 0.268046136691831, "grad_norm": 6.966701301159261, "learning_rate": 8.586357798223421e-06, "loss": 17.5491, "step": 14664 }, { "epoch": 0.26806441588827756, "grad_norm": 8.349272246970072, "learning_rate": 8.586151532953319e-06, "loss": 18.0761, "step": 14665 }, { "epoch": 0.2680826950847241, "grad_norm": 8.472676963353326, "learning_rate": 8.585945255114065e-06, "loss": 18.4994, "step": 14666 }, { "epoch": 0.2681009742811706, "grad_norm": 6.414726136351401, "learning_rate": 8.585738964706387e-06, "loss": 17.297, "step": 14667 }, { "epoch": 0.2681192534776171, "grad_norm": 6.668931944589009, "learning_rate": 8.585532661731002e-06, "loss": 17.6723, "step": 14668 }, { "epoch": 0.26813753267406365, "grad_norm": 6.954940343528749, "learning_rate": 8.585326346188639e-06, "loss": 17.718, "step": 14669 }, { "epoch": 0.2681558118705102, "grad_norm": 5.9340071981898035, "learning_rate": 8.585120018080016e-06, "loss": 17.4597, "step": 14670 }, { "epoch": 0.2681740910669567, "grad_norm": 7.326870791647553, "learning_rate": 8.58491367740586e-06, "loss": 18.0068, "step": 14671 }, { "epoch": 0.2681923702634032, "grad_norm": 7.827022488528485, "learning_rate": 8.584707324166892e-06, "loss": 17.7907, "step": 14672 }, { "epoch": 0.26821064945984974, "grad_norm": 10.011623012769867, "learning_rate": 8.584500958363835e-06, "loss": 17.5316, "step": 14673 }, { "epoch": 0.2682289286562963, "grad_norm": 6.798564723673695, "learning_rate": 8.584294579997414e-06, "loss": 17.4025, "step": 14674 }, { "epoch": 0.2682472078527428, "grad_norm": 7.56727091434625, "learning_rate": 8.584088189068352e-06, "loss": 17.8541, "step": 14675 }, { "epoch": 0.26826548704918934, "grad_norm": 7.037791027692974, "learning_rate": 8.583881785577372e-06, "loss": 17.7043, "step": 14676 }, { "epoch": 0.2682837662456358, "grad_norm": 7.1995554718453185, "learning_rate": 8.583675369525196e-06, "loss": 17.6086, "step": 14677 }, { "epoch": 0.26830204544208236, "grad_norm": 7.771830931547811, "learning_rate": 8.583468940912549e-06, "loss": 18.2094, "step": 14678 }, { "epoch": 0.2683203246385289, "grad_norm": 6.473819895791835, "learning_rate": 8.583262499740156e-06, "loss": 17.6683, "step": 14679 }, { "epoch": 0.26833860383497543, "grad_norm": 5.336545610883745, "learning_rate": 8.583056046008738e-06, "loss": 17.047, "step": 14680 }, { "epoch": 0.2683568830314219, "grad_norm": 6.476933158971933, "learning_rate": 8.58284957971902e-06, "loss": 17.7432, "step": 14681 }, { "epoch": 0.26837516222786845, "grad_norm": 7.194844716455216, "learning_rate": 8.582643100871723e-06, "loss": 18.0126, "step": 14682 }, { "epoch": 0.268393441424315, "grad_norm": 9.837567698985303, "learning_rate": 8.582436609467574e-06, "loss": 18.3976, "step": 14683 }, { "epoch": 0.2684117206207615, "grad_norm": 6.302737098745762, "learning_rate": 8.582230105507297e-06, "loss": 17.3444, "step": 14684 }, { "epoch": 0.26842999981720805, "grad_norm": 6.031561994737572, "learning_rate": 8.582023588991612e-06, "loss": 17.243, "step": 14685 }, { "epoch": 0.26844827901365453, "grad_norm": 6.7063610725854135, "learning_rate": 8.581817059921246e-06, "loss": 17.7287, "step": 14686 }, { "epoch": 0.26846655821010107, "grad_norm": 5.076633321812692, "learning_rate": 8.581610518296923e-06, "loss": 17.0169, "step": 14687 }, { "epoch": 0.2684848374065476, "grad_norm": 7.5560594426511285, "learning_rate": 8.581403964119366e-06, "loss": 18.364, "step": 14688 }, { "epoch": 0.26850311660299414, "grad_norm": 6.224973921589403, "learning_rate": 8.581197397389299e-06, "loss": 17.4573, "step": 14689 }, { "epoch": 0.2685213957994407, "grad_norm": 7.531539240589316, "learning_rate": 8.580990818107444e-06, "loss": 18.3858, "step": 14690 }, { "epoch": 0.26853967499588716, "grad_norm": 8.32563910370348, "learning_rate": 8.580784226274527e-06, "loss": 18.243, "step": 14691 }, { "epoch": 0.2685579541923337, "grad_norm": 6.877877813170907, "learning_rate": 8.580577621891274e-06, "loss": 17.7593, "step": 14692 }, { "epoch": 0.26857623338878023, "grad_norm": 5.580909439182357, "learning_rate": 8.580371004958406e-06, "loss": 17.2123, "step": 14693 }, { "epoch": 0.26859451258522676, "grad_norm": 7.284307503234026, "learning_rate": 8.580164375476649e-06, "loss": 17.8056, "step": 14694 }, { "epoch": 0.2686127917816733, "grad_norm": 6.045374886150081, "learning_rate": 8.579957733446725e-06, "loss": 17.376, "step": 14695 }, { "epoch": 0.2686310709781198, "grad_norm": 5.784900666939494, "learning_rate": 8.57975107886936e-06, "loss": 16.9846, "step": 14696 }, { "epoch": 0.2686493501745663, "grad_norm": 5.744827589367595, "learning_rate": 8.579544411745279e-06, "loss": 17.1729, "step": 14697 }, { "epoch": 0.26866762937101285, "grad_norm": 6.333080325574875, "learning_rate": 8.579337732075207e-06, "loss": 17.565, "step": 14698 }, { "epoch": 0.2686859085674594, "grad_norm": 7.097610393011616, "learning_rate": 8.579131039859865e-06, "loss": 18.1011, "step": 14699 }, { "epoch": 0.2687041877639059, "grad_norm": 6.688524342749437, "learning_rate": 8.578924335099979e-06, "loss": 17.5679, "step": 14700 }, { "epoch": 0.2687224669603524, "grad_norm": 7.050501574071304, "learning_rate": 8.578717617796275e-06, "loss": 17.8998, "step": 14701 }, { "epoch": 0.26874074615679894, "grad_norm": 6.523816221993048, "learning_rate": 8.578510887949475e-06, "loss": 17.5661, "step": 14702 }, { "epoch": 0.2687590253532455, "grad_norm": 7.428933579013749, "learning_rate": 8.578304145560306e-06, "loss": 17.7928, "step": 14703 }, { "epoch": 0.268777304549692, "grad_norm": 6.858388721237472, "learning_rate": 8.578097390629491e-06, "loss": 17.6887, "step": 14704 }, { "epoch": 0.26879558374613854, "grad_norm": 6.412739863494405, "learning_rate": 8.577890623157754e-06, "loss": 17.45, "step": 14705 }, { "epoch": 0.268813862942585, "grad_norm": 6.470617535701904, "learning_rate": 8.577683843145823e-06, "loss": 17.5752, "step": 14706 }, { "epoch": 0.26883214213903156, "grad_norm": 6.746735318473387, "learning_rate": 8.577477050594419e-06, "loss": 17.7946, "step": 14707 }, { "epoch": 0.2688504213354781, "grad_norm": 5.971641974763012, "learning_rate": 8.57727024550427e-06, "loss": 17.4674, "step": 14708 }, { "epoch": 0.26886870053192463, "grad_norm": 6.175575672423403, "learning_rate": 8.577063427876096e-06, "loss": 17.384, "step": 14709 }, { "epoch": 0.26888697972837117, "grad_norm": 7.383734868781345, "learning_rate": 8.576856597710628e-06, "loss": 17.8635, "step": 14710 }, { "epoch": 0.26890525892481765, "grad_norm": 12.295733493486797, "learning_rate": 8.576649755008587e-06, "loss": 18.6965, "step": 14711 }, { "epoch": 0.2689235381212642, "grad_norm": 6.466883236775337, "learning_rate": 8.576442899770699e-06, "loss": 17.5949, "step": 14712 }, { "epoch": 0.2689418173177107, "grad_norm": 7.800539756039745, "learning_rate": 8.576236031997689e-06, "loss": 17.9315, "step": 14713 }, { "epoch": 0.26896009651415725, "grad_norm": 6.894007335467035, "learning_rate": 8.576029151690282e-06, "loss": 17.6541, "step": 14714 }, { "epoch": 0.26897837571060373, "grad_norm": 6.148859316110533, "learning_rate": 8.575822258849203e-06, "loss": 17.4111, "step": 14715 }, { "epoch": 0.26899665490705027, "grad_norm": 6.58256675453406, "learning_rate": 8.575615353475178e-06, "loss": 17.6077, "step": 14716 }, { "epoch": 0.2690149341034968, "grad_norm": 7.441271575092082, "learning_rate": 8.57540843556893e-06, "loss": 17.9539, "step": 14717 }, { "epoch": 0.26903321329994334, "grad_norm": 5.361833844903456, "learning_rate": 8.575201505131188e-06, "loss": 16.9696, "step": 14718 }, { "epoch": 0.2690514924963899, "grad_norm": 5.678366120117593, "learning_rate": 8.574994562162672e-06, "loss": 17.3139, "step": 14719 }, { "epoch": 0.26906977169283636, "grad_norm": 6.0893837231018715, "learning_rate": 8.574787606664115e-06, "loss": 17.4303, "step": 14720 }, { "epoch": 0.2690880508892829, "grad_norm": 6.978650869235168, "learning_rate": 8.574580638636233e-06, "loss": 17.7617, "step": 14721 }, { "epoch": 0.26910633008572943, "grad_norm": 6.399336943829046, "learning_rate": 8.574373658079758e-06, "loss": 17.4543, "step": 14722 }, { "epoch": 0.26912460928217596, "grad_norm": 6.7738837958731795, "learning_rate": 8.574166664995413e-06, "loss": 17.5596, "step": 14723 }, { "epoch": 0.2691428884786225, "grad_norm": 6.833367457168881, "learning_rate": 8.573959659383923e-06, "loss": 17.2438, "step": 14724 }, { "epoch": 0.269161167675069, "grad_norm": 6.853494356915995, "learning_rate": 8.573752641246018e-06, "loss": 17.9752, "step": 14725 }, { "epoch": 0.2691794468715155, "grad_norm": 6.813532429082994, "learning_rate": 8.573545610582416e-06, "loss": 17.7973, "step": 14726 }, { "epoch": 0.26919772606796205, "grad_norm": 7.404253275459462, "learning_rate": 8.57333856739385e-06, "loss": 18.1289, "step": 14727 }, { "epoch": 0.2692160052644086, "grad_norm": 5.67083780852939, "learning_rate": 8.57313151168104e-06, "loss": 17.1733, "step": 14728 }, { "epoch": 0.2692342844608551, "grad_norm": 6.618964024735214, "learning_rate": 8.572924443444714e-06, "loss": 17.6785, "step": 14729 }, { "epoch": 0.2692525636573016, "grad_norm": 4.780337707127116, "learning_rate": 8.572717362685601e-06, "loss": 16.988, "step": 14730 }, { "epoch": 0.26927084285374814, "grad_norm": 6.151126655675175, "learning_rate": 8.57251026940442e-06, "loss": 17.3929, "step": 14731 }, { "epoch": 0.2692891220501947, "grad_norm": 6.235799780952094, "learning_rate": 8.572303163601902e-06, "loss": 17.585, "step": 14732 }, { "epoch": 0.2693074012466412, "grad_norm": 5.437297085695997, "learning_rate": 8.572096045278773e-06, "loss": 17.1169, "step": 14733 }, { "epoch": 0.26932568044308774, "grad_norm": 7.3332844311428484, "learning_rate": 8.571888914435756e-06, "loss": 17.8667, "step": 14734 }, { "epoch": 0.2693439596395342, "grad_norm": 6.5775330738577305, "learning_rate": 8.571681771073576e-06, "loss": 17.3096, "step": 14735 }, { "epoch": 0.26936223883598076, "grad_norm": 6.474062535106741, "learning_rate": 8.571474615192965e-06, "loss": 17.3985, "step": 14736 }, { "epoch": 0.2693805180324273, "grad_norm": 6.667701680170781, "learning_rate": 8.571267446794643e-06, "loss": 17.4353, "step": 14737 }, { "epoch": 0.26939879722887383, "grad_norm": 6.9320060803068, "learning_rate": 8.57106026587934e-06, "loss": 17.827, "step": 14738 }, { "epoch": 0.26941707642532037, "grad_norm": 6.716519546581107, "learning_rate": 8.57085307244778e-06, "loss": 17.6117, "step": 14739 }, { "epoch": 0.26943535562176685, "grad_norm": 6.790569901350317, "learning_rate": 8.57064586650069e-06, "loss": 17.2713, "step": 14740 }, { "epoch": 0.2694536348182134, "grad_norm": 7.202666511234887, "learning_rate": 8.570438648038795e-06, "loss": 17.6958, "step": 14741 }, { "epoch": 0.2694719140146599, "grad_norm": 6.375522015279112, "learning_rate": 8.570231417062824e-06, "loss": 17.4966, "step": 14742 }, { "epoch": 0.26949019321110645, "grad_norm": 6.551655264635537, "learning_rate": 8.570024173573502e-06, "loss": 17.4394, "step": 14743 }, { "epoch": 0.269508472407553, "grad_norm": 7.195717947624498, "learning_rate": 8.569816917571553e-06, "loss": 17.6685, "step": 14744 }, { "epoch": 0.26952675160399947, "grad_norm": 5.360594593921079, "learning_rate": 8.569609649057706e-06, "loss": 16.7244, "step": 14745 }, { "epoch": 0.269545030800446, "grad_norm": 6.824536535650638, "learning_rate": 8.569402368032688e-06, "loss": 17.8826, "step": 14746 }, { "epoch": 0.26956330999689254, "grad_norm": 7.403124834009568, "learning_rate": 8.569195074497224e-06, "loss": 17.7747, "step": 14747 }, { "epoch": 0.2695815891933391, "grad_norm": 7.319336075058227, "learning_rate": 8.56898776845204e-06, "loss": 17.659, "step": 14748 }, { "epoch": 0.26959986838978556, "grad_norm": 5.614063931073716, "learning_rate": 8.568780449897866e-06, "loss": 17.0689, "step": 14749 }, { "epoch": 0.2696181475862321, "grad_norm": 6.691994166706363, "learning_rate": 8.568573118835423e-06, "loss": 17.5283, "step": 14750 }, { "epoch": 0.26963642678267863, "grad_norm": 8.027288059140304, "learning_rate": 8.568365775265443e-06, "loss": 17.7729, "step": 14751 }, { "epoch": 0.26965470597912516, "grad_norm": 5.625788602106047, "learning_rate": 8.56815841918865e-06, "loss": 17.3177, "step": 14752 }, { "epoch": 0.2696729851755717, "grad_norm": 6.418223505014459, "learning_rate": 8.567951050605771e-06, "loss": 17.3564, "step": 14753 }, { "epoch": 0.2696912643720182, "grad_norm": 6.191659165789929, "learning_rate": 8.567743669517534e-06, "loss": 17.565, "step": 14754 }, { "epoch": 0.2697095435684647, "grad_norm": 6.952676833930705, "learning_rate": 8.567536275924666e-06, "loss": 17.789, "step": 14755 }, { "epoch": 0.26972782276491125, "grad_norm": 6.728365928300831, "learning_rate": 8.567328869827891e-06, "loss": 17.4448, "step": 14756 }, { "epoch": 0.2697461019613578, "grad_norm": 6.9397302438025665, "learning_rate": 8.56712145122794e-06, "loss": 17.8716, "step": 14757 }, { "epoch": 0.2697643811578043, "grad_norm": 6.421020196663757, "learning_rate": 8.566914020125536e-06, "loss": 17.47, "step": 14758 }, { "epoch": 0.2697826603542508, "grad_norm": 6.311965000870438, "learning_rate": 8.566706576521407e-06, "loss": 17.5558, "step": 14759 }, { "epoch": 0.26980093955069734, "grad_norm": 6.596533882459412, "learning_rate": 8.566499120416283e-06, "loss": 17.5017, "step": 14760 }, { "epoch": 0.2698192187471439, "grad_norm": 6.058452312693287, "learning_rate": 8.566291651810887e-06, "loss": 16.9859, "step": 14761 }, { "epoch": 0.2698374979435904, "grad_norm": 8.718469231759896, "learning_rate": 8.56608417070595e-06, "loss": 18.3771, "step": 14762 }, { "epoch": 0.26985577714003695, "grad_norm": 5.622414403765055, "learning_rate": 8.565876677102198e-06, "loss": 17.2049, "step": 14763 }, { "epoch": 0.2698740563364834, "grad_norm": 6.467884336016666, "learning_rate": 8.565669171000357e-06, "loss": 17.454, "step": 14764 }, { "epoch": 0.26989233553292996, "grad_norm": 6.698985687010132, "learning_rate": 8.565461652401155e-06, "loss": 17.6975, "step": 14765 }, { "epoch": 0.2699106147293765, "grad_norm": 6.320444788794005, "learning_rate": 8.565254121305318e-06, "loss": 17.2421, "step": 14766 }, { "epoch": 0.26992889392582303, "grad_norm": 7.469850399429338, "learning_rate": 8.565046577713576e-06, "loss": 17.7894, "step": 14767 }, { "epoch": 0.26994717312226957, "grad_norm": 8.681383457387533, "learning_rate": 8.564839021626653e-06, "loss": 18.0657, "step": 14768 }, { "epoch": 0.26996545231871605, "grad_norm": 6.303465573667526, "learning_rate": 8.564631453045283e-06, "loss": 17.695, "step": 14769 }, { "epoch": 0.2699837315151626, "grad_norm": 6.587916498995225, "learning_rate": 8.564423871970185e-06, "loss": 17.5307, "step": 14770 }, { "epoch": 0.2700020107116091, "grad_norm": 8.782459938025932, "learning_rate": 8.564216278402092e-06, "loss": 18.4805, "step": 14771 }, { "epoch": 0.27002028990805566, "grad_norm": 6.264035444124062, "learning_rate": 8.564008672341731e-06, "loss": 17.5215, "step": 14772 }, { "epoch": 0.2700385691045022, "grad_norm": 6.1909731522116225, "learning_rate": 8.563801053789828e-06, "loss": 17.3149, "step": 14773 }, { "epoch": 0.27005684830094867, "grad_norm": 7.125301342293887, "learning_rate": 8.563593422747112e-06, "loss": 18.0346, "step": 14774 }, { "epoch": 0.2700751274973952, "grad_norm": 8.09012375487751, "learning_rate": 8.563385779214308e-06, "loss": 18.1556, "step": 14775 }, { "epoch": 0.27009340669384174, "grad_norm": 6.838611555632258, "learning_rate": 8.563178123192148e-06, "loss": 17.4825, "step": 14776 }, { "epoch": 0.2701116858902883, "grad_norm": 7.091593604846901, "learning_rate": 8.562970454681359e-06, "loss": 17.9692, "step": 14777 }, { "epoch": 0.2701299650867348, "grad_norm": 27.42424937940139, "learning_rate": 8.562762773682667e-06, "loss": 18.6833, "step": 14778 }, { "epoch": 0.2701482442831813, "grad_norm": 6.105921118984554, "learning_rate": 8.5625550801968e-06, "loss": 17.5999, "step": 14779 }, { "epoch": 0.27016652347962783, "grad_norm": 7.891322010519658, "learning_rate": 8.562347374224487e-06, "loss": 18.0696, "step": 14780 }, { "epoch": 0.27018480267607436, "grad_norm": 5.089558546095618, "learning_rate": 8.562139655766456e-06, "loss": 17.0662, "step": 14781 }, { "epoch": 0.2702030818725209, "grad_norm": 6.964422777174042, "learning_rate": 8.561931924823433e-06, "loss": 17.9785, "step": 14782 }, { "epoch": 0.2702213610689674, "grad_norm": 6.644066222632282, "learning_rate": 8.56172418139615e-06, "loss": 17.6049, "step": 14783 }, { "epoch": 0.2702396402654139, "grad_norm": 6.717271400281902, "learning_rate": 8.56151642548533e-06, "loss": 17.7115, "step": 14784 }, { "epoch": 0.27025791946186045, "grad_norm": 6.717954264561899, "learning_rate": 8.561308657091707e-06, "loss": 17.4734, "step": 14785 }, { "epoch": 0.270276198658307, "grad_norm": 7.859560363688032, "learning_rate": 8.561100876216004e-06, "loss": 18.0413, "step": 14786 }, { "epoch": 0.2702944778547535, "grad_norm": 6.054518782247669, "learning_rate": 8.560893082858952e-06, "loss": 17.5724, "step": 14787 }, { "epoch": 0.2703127570512, "grad_norm": 6.357519366751193, "learning_rate": 8.560685277021278e-06, "loss": 17.5095, "step": 14788 }, { "epoch": 0.27033103624764654, "grad_norm": 6.419423602347501, "learning_rate": 8.560477458703714e-06, "loss": 17.3069, "step": 14789 }, { "epoch": 0.2703493154440931, "grad_norm": 5.996425424062401, "learning_rate": 8.560269627906983e-06, "loss": 17.3726, "step": 14790 }, { "epoch": 0.2703675946405396, "grad_norm": 7.306677999264691, "learning_rate": 8.560061784631816e-06, "loss": 18.0985, "step": 14791 }, { "epoch": 0.27038587383698615, "grad_norm": 7.643579111586635, "learning_rate": 8.559853928878941e-06, "loss": 18.2205, "step": 14792 }, { "epoch": 0.2704041530334326, "grad_norm": 7.4952996707611, "learning_rate": 8.559646060649086e-06, "loss": 18.0018, "step": 14793 }, { "epoch": 0.27042243222987916, "grad_norm": 6.56709098086221, "learning_rate": 8.559438179942985e-06, "loss": 17.5855, "step": 14794 }, { "epoch": 0.2704407114263257, "grad_norm": 6.906590166390661, "learning_rate": 8.559230286761359e-06, "loss": 17.7104, "step": 14795 }, { "epoch": 0.27045899062277223, "grad_norm": 7.337185897035148, "learning_rate": 8.55902238110494e-06, "loss": 17.888, "step": 14796 }, { "epoch": 0.27047726981921877, "grad_norm": 7.5890777682589965, "learning_rate": 8.558814462974457e-06, "loss": 17.9747, "step": 14797 }, { "epoch": 0.27049554901566525, "grad_norm": 6.966595784185228, "learning_rate": 8.558606532370635e-06, "loss": 17.587, "step": 14798 }, { "epoch": 0.2705138282121118, "grad_norm": 6.432904766596979, "learning_rate": 8.55839858929421e-06, "loss": 17.7732, "step": 14799 }, { "epoch": 0.2705321074085583, "grad_norm": 6.468720967410889, "learning_rate": 8.558190633745904e-06, "loss": 17.4364, "step": 14800 }, { "epoch": 0.27055038660500486, "grad_norm": 7.185045518131907, "learning_rate": 8.55798266572645e-06, "loss": 17.6472, "step": 14801 }, { "epoch": 0.2705686658014514, "grad_norm": 5.51531722764198, "learning_rate": 8.557774685236575e-06, "loss": 17.1683, "step": 14802 }, { "epoch": 0.27058694499789787, "grad_norm": 8.265389929871407, "learning_rate": 8.557566692277008e-06, "loss": 18.4747, "step": 14803 }, { "epoch": 0.2706052241943444, "grad_norm": 6.174231177908045, "learning_rate": 8.55735868684848e-06, "loss": 17.317, "step": 14804 }, { "epoch": 0.27062350339079094, "grad_norm": 5.838048605328569, "learning_rate": 8.557150668951717e-06, "loss": 17.4088, "step": 14805 }, { "epoch": 0.2706417825872375, "grad_norm": 6.0436949025897615, "learning_rate": 8.55694263858745e-06, "loss": 17.2727, "step": 14806 }, { "epoch": 0.270660061783684, "grad_norm": 6.548063149899086, "learning_rate": 8.556734595756409e-06, "loss": 17.5251, "step": 14807 }, { "epoch": 0.2706783409801305, "grad_norm": 8.060561911075787, "learning_rate": 8.55652654045932e-06, "loss": 18.448, "step": 14808 }, { "epoch": 0.27069662017657703, "grad_norm": 6.624057521585169, "learning_rate": 8.556318472696915e-06, "loss": 17.572, "step": 14809 }, { "epoch": 0.27071489937302357, "grad_norm": 6.718039617867972, "learning_rate": 8.556110392469923e-06, "loss": 17.7396, "step": 14810 }, { "epoch": 0.2707331785694701, "grad_norm": 6.602064901695705, "learning_rate": 8.555902299779071e-06, "loss": 17.6202, "step": 14811 }, { "epoch": 0.27075145776591664, "grad_norm": 5.040929278256974, "learning_rate": 8.55569419462509e-06, "loss": 17.0216, "step": 14812 }, { "epoch": 0.2707697369623631, "grad_norm": 5.99684895534528, "learning_rate": 8.55548607700871e-06, "loss": 17.267, "step": 14813 }, { "epoch": 0.27078801615880965, "grad_norm": 7.119770017639812, "learning_rate": 8.55527794693066e-06, "loss": 18.0201, "step": 14814 }, { "epoch": 0.2708062953552562, "grad_norm": 6.668933993092999, "learning_rate": 8.55506980439167e-06, "loss": 17.7468, "step": 14815 }, { "epoch": 0.2708245745517027, "grad_norm": 7.787840570922672, "learning_rate": 8.554861649392468e-06, "loss": 17.9452, "step": 14816 }, { "epoch": 0.2708428537481492, "grad_norm": 9.98440755798373, "learning_rate": 8.554653481933784e-06, "loss": 18.604, "step": 14817 }, { "epoch": 0.27086113294459574, "grad_norm": 6.193246128929784, "learning_rate": 8.55444530201635e-06, "loss": 17.3345, "step": 14818 }, { "epoch": 0.2708794121410423, "grad_norm": 6.473158814119927, "learning_rate": 8.554237109640891e-06, "loss": 17.4942, "step": 14819 }, { "epoch": 0.2708976913374888, "grad_norm": 5.477792509880523, "learning_rate": 8.554028904808141e-06, "loss": 17.167, "step": 14820 }, { "epoch": 0.27091597053393535, "grad_norm": 5.824733941489929, "learning_rate": 8.553820687518828e-06, "loss": 17.1212, "step": 14821 }, { "epoch": 0.2709342497303818, "grad_norm": 6.969990034486477, "learning_rate": 8.553612457773681e-06, "loss": 17.816, "step": 14822 }, { "epoch": 0.27095252892682836, "grad_norm": 7.040876437065263, "learning_rate": 8.55340421557343e-06, "loss": 17.7973, "step": 14823 }, { "epoch": 0.2709708081232749, "grad_norm": 6.981931948666637, "learning_rate": 8.553195960918808e-06, "loss": 17.7812, "step": 14824 }, { "epoch": 0.27098908731972143, "grad_norm": 17.040261005387123, "learning_rate": 8.552987693810542e-06, "loss": 17.6161, "step": 14825 }, { "epoch": 0.27100736651616797, "grad_norm": 6.440303304400119, "learning_rate": 8.552779414249362e-06, "loss": 17.7299, "step": 14826 }, { "epoch": 0.27102564571261445, "grad_norm": 6.066347738116784, "learning_rate": 8.552571122235998e-06, "loss": 17.4761, "step": 14827 }, { "epoch": 0.271043924909061, "grad_norm": 6.101927145793574, "learning_rate": 8.55236281777118e-06, "loss": 17.3433, "step": 14828 }, { "epoch": 0.2710622041055075, "grad_norm": 7.106033455011255, "learning_rate": 8.55215450085564e-06, "loss": 18.0241, "step": 14829 }, { "epoch": 0.27108048330195406, "grad_norm": 5.42307002578256, "learning_rate": 8.551946171490107e-06, "loss": 17.2391, "step": 14830 }, { "epoch": 0.2710987624984006, "grad_norm": 5.149141178645675, "learning_rate": 8.551737829675309e-06, "loss": 16.998, "step": 14831 }, { "epoch": 0.27111704169484707, "grad_norm": 5.687623706865836, "learning_rate": 8.551529475411979e-06, "loss": 17.3505, "step": 14832 }, { "epoch": 0.2711353208912936, "grad_norm": 6.000635344331766, "learning_rate": 8.551321108700846e-06, "loss": 17.1844, "step": 14833 }, { "epoch": 0.27115360008774014, "grad_norm": 7.146910464294515, "learning_rate": 8.551112729542642e-06, "loss": 17.7004, "step": 14834 }, { "epoch": 0.2711718792841867, "grad_norm": 6.072344435017131, "learning_rate": 8.550904337938095e-06, "loss": 17.3035, "step": 14835 }, { "epoch": 0.2711901584806332, "grad_norm": 5.883160755130152, "learning_rate": 8.550695933887935e-06, "loss": 17.3569, "step": 14836 }, { "epoch": 0.2712084376770797, "grad_norm": 6.013574719384061, "learning_rate": 8.550487517392896e-06, "loss": 17.2092, "step": 14837 }, { "epoch": 0.27122671687352623, "grad_norm": 6.581247721001421, "learning_rate": 8.550279088453705e-06, "loss": 17.361, "step": 14838 }, { "epoch": 0.27124499606997277, "grad_norm": 7.229041708791945, "learning_rate": 8.550070647071095e-06, "loss": 17.9663, "step": 14839 }, { "epoch": 0.2712632752664193, "grad_norm": 9.233208225351778, "learning_rate": 8.549862193245795e-06, "loss": 17.8901, "step": 14840 }, { "epoch": 0.27128155446286584, "grad_norm": 7.9748289764601985, "learning_rate": 8.549653726978538e-06, "loss": 18.2539, "step": 14841 }, { "epoch": 0.2712998336593123, "grad_norm": 5.6688321120436385, "learning_rate": 8.549445248270049e-06, "loss": 17.2339, "step": 14842 }, { "epoch": 0.27131811285575885, "grad_norm": 6.736587469374453, "learning_rate": 8.549236757121065e-06, "loss": 17.5542, "step": 14843 }, { "epoch": 0.2713363920522054, "grad_norm": 7.316596212398285, "learning_rate": 8.549028253532313e-06, "loss": 18.1392, "step": 14844 }, { "epoch": 0.2713546712486519, "grad_norm": 7.007929928197967, "learning_rate": 8.548819737504525e-06, "loss": 17.7737, "step": 14845 }, { "epoch": 0.27137295044509846, "grad_norm": 6.586250496167358, "learning_rate": 8.548611209038433e-06, "loss": 17.5552, "step": 14846 }, { "epoch": 0.27139122964154494, "grad_norm": 7.821666686457381, "learning_rate": 8.548402668134766e-06, "loss": 18.471, "step": 14847 }, { "epoch": 0.2714095088379915, "grad_norm": 6.463512499320828, "learning_rate": 8.548194114794255e-06, "loss": 17.4083, "step": 14848 }, { "epoch": 0.271427788034438, "grad_norm": 5.91809828471948, "learning_rate": 8.547985549017633e-06, "loss": 17.3628, "step": 14849 }, { "epoch": 0.27144606723088455, "grad_norm": 5.691880825744473, "learning_rate": 8.547776970805627e-06, "loss": 17.0096, "step": 14850 }, { "epoch": 0.271464346427331, "grad_norm": 5.823969072739495, "learning_rate": 8.547568380158973e-06, "loss": 17.1374, "step": 14851 }, { "epoch": 0.27148262562377756, "grad_norm": 6.396999802354459, "learning_rate": 8.547359777078399e-06, "loss": 17.5505, "step": 14852 }, { "epoch": 0.2715009048202241, "grad_norm": 6.055306693350301, "learning_rate": 8.547151161564636e-06, "loss": 17.3091, "step": 14853 }, { "epoch": 0.27151918401667063, "grad_norm": 6.948563482297491, "learning_rate": 8.546942533618417e-06, "loss": 17.6344, "step": 14854 }, { "epoch": 0.27153746321311717, "grad_norm": 6.777693158596401, "learning_rate": 8.54673389324047e-06, "loss": 17.8987, "step": 14855 }, { "epoch": 0.27155574240956365, "grad_norm": 6.641151041194556, "learning_rate": 8.54652524043153e-06, "loss": 17.8, "step": 14856 }, { "epoch": 0.2715740216060102, "grad_norm": 7.462915438731451, "learning_rate": 8.546316575192328e-06, "loss": 18.3269, "step": 14857 }, { "epoch": 0.2715923008024567, "grad_norm": 6.2508430357152704, "learning_rate": 8.546107897523592e-06, "loss": 17.6674, "step": 14858 }, { "epoch": 0.27161057999890326, "grad_norm": 6.624476504090634, "learning_rate": 8.545899207426056e-06, "loss": 17.6947, "step": 14859 }, { "epoch": 0.2716288591953498, "grad_norm": 6.221072911878215, "learning_rate": 8.545690504900452e-06, "loss": 17.4292, "step": 14860 }, { "epoch": 0.2716471383917963, "grad_norm": 10.603644841939005, "learning_rate": 8.54548178994751e-06, "loss": 18.6189, "step": 14861 }, { "epoch": 0.2716654175882428, "grad_norm": 7.174358900772363, "learning_rate": 8.545273062567963e-06, "loss": 17.9352, "step": 14862 }, { "epoch": 0.27168369678468934, "grad_norm": 6.203643896967865, "learning_rate": 8.54506432276254e-06, "loss": 17.3654, "step": 14863 }, { "epoch": 0.2717019759811359, "grad_norm": 7.270744433630595, "learning_rate": 8.544855570531974e-06, "loss": 17.7381, "step": 14864 }, { "epoch": 0.2717202551775824, "grad_norm": 5.932433514868857, "learning_rate": 8.544646805876999e-06, "loss": 17.3567, "step": 14865 }, { "epoch": 0.2717385343740289, "grad_norm": 5.7418441762364765, "learning_rate": 8.544438028798342e-06, "loss": 16.9767, "step": 14866 }, { "epoch": 0.27175681357047543, "grad_norm": 5.8992892914146635, "learning_rate": 8.54422923929674e-06, "loss": 17.4688, "step": 14867 }, { "epoch": 0.27177509276692197, "grad_norm": 5.4741094203996665, "learning_rate": 8.544020437372919e-06, "loss": 17.1419, "step": 14868 }, { "epoch": 0.2717933719633685, "grad_norm": 8.441886739004868, "learning_rate": 8.543811623027616e-06, "loss": 18.798, "step": 14869 }, { "epoch": 0.27181165115981504, "grad_norm": 7.094430727346486, "learning_rate": 8.54360279626156e-06, "loss": 17.8168, "step": 14870 }, { "epoch": 0.2718299303562615, "grad_norm": 7.59321936874265, "learning_rate": 8.543393957075483e-06, "loss": 17.806, "step": 14871 }, { "epoch": 0.27184820955270805, "grad_norm": 7.263613009008624, "learning_rate": 8.54318510547012e-06, "loss": 18.1295, "step": 14872 }, { "epoch": 0.2718664887491546, "grad_norm": 6.984233747273519, "learning_rate": 8.5429762414462e-06, "loss": 17.5231, "step": 14873 }, { "epoch": 0.2718847679456011, "grad_norm": 5.080683098741494, "learning_rate": 8.542767365004454e-06, "loss": 16.8671, "step": 14874 }, { "epoch": 0.27190304714204766, "grad_norm": 6.434155928121867, "learning_rate": 8.542558476145616e-06, "loss": 17.6545, "step": 14875 }, { "epoch": 0.27192132633849414, "grad_norm": 8.879287344854903, "learning_rate": 8.54234957487042e-06, "loss": 18.6452, "step": 14876 }, { "epoch": 0.2719396055349407, "grad_norm": 6.323199241643357, "learning_rate": 8.542140661179594e-06, "loss": 17.7601, "step": 14877 }, { "epoch": 0.2719578847313872, "grad_norm": 6.301146414422232, "learning_rate": 8.541931735073872e-06, "loss": 17.6795, "step": 14878 }, { "epoch": 0.27197616392783375, "grad_norm": 6.785669327266767, "learning_rate": 8.541722796553988e-06, "loss": 17.5498, "step": 14879 }, { "epoch": 0.2719944431242803, "grad_norm": 5.844865942548047, "learning_rate": 8.541513845620672e-06, "loss": 17.3334, "step": 14880 }, { "epoch": 0.27201272232072676, "grad_norm": 22.634779033249952, "learning_rate": 8.54130488227466e-06, "loss": 17.8193, "step": 14881 }, { "epoch": 0.2720310015171733, "grad_norm": 8.142868092799928, "learning_rate": 8.54109590651668e-06, "loss": 18.1071, "step": 14882 }, { "epoch": 0.27204928071361983, "grad_norm": 8.427618812524283, "learning_rate": 8.540886918347465e-06, "loss": 17.4152, "step": 14883 }, { "epoch": 0.27206755991006637, "grad_norm": 6.93423553369138, "learning_rate": 8.540677917767749e-06, "loss": 17.6729, "step": 14884 }, { "epoch": 0.27208583910651285, "grad_norm": 7.193372060519649, "learning_rate": 8.540468904778265e-06, "loss": 17.6561, "step": 14885 }, { "epoch": 0.2721041183029594, "grad_norm": 8.280421848562657, "learning_rate": 8.540259879379744e-06, "loss": 18.4438, "step": 14886 }, { "epoch": 0.2721223974994059, "grad_norm": 5.945522270852512, "learning_rate": 8.54005084157292e-06, "loss": 17.331, "step": 14887 }, { "epoch": 0.27214067669585246, "grad_norm": 7.74093699090885, "learning_rate": 8.539841791358523e-06, "loss": 17.8281, "step": 14888 }, { "epoch": 0.272158955892299, "grad_norm": 7.664311082329237, "learning_rate": 8.539632728737291e-06, "loss": 17.9029, "step": 14889 }, { "epoch": 0.2721772350887455, "grad_norm": 5.472508028003336, "learning_rate": 8.53942365370995e-06, "loss": 17.1254, "step": 14890 }, { "epoch": 0.272195514285192, "grad_norm": 7.2114970326785395, "learning_rate": 8.539214566277239e-06, "loss": 17.8657, "step": 14891 }, { "epoch": 0.27221379348163854, "grad_norm": 5.760661427336524, "learning_rate": 8.539005466439886e-06, "loss": 17.424, "step": 14892 }, { "epoch": 0.2722320726780851, "grad_norm": 6.507112035057774, "learning_rate": 8.538796354198629e-06, "loss": 17.6589, "step": 14893 }, { "epoch": 0.2722503518745316, "grad_norm": 6.348619332970952, "learning_rate": 8.538587229554195e-06, "loss": 17.5345, "step": 14894 }, { "epoch": 0.2722686310709781, "grad_norm": 8.662099122948792, "learning_rate": 8.53837809250732e-06, "loss": 18.2038, "step": 14895 }, { "epoch": 0.27228691026742463, "grad_norm": 6.92314883051264, "learning_rate": 8.538168943058738e-06, "loss": 17.8408, "step": 14896 }, { "epoch": 0.27230518946387117, "grad_norm": 8.953082316841071, "learning_rate": 8.537959781209181e-06, "loss": 18.1729, "step": 14897 }, { "epoch": 0.2723234686603177, "grad_norm": 7.092600096105889, "learning_rate": 8.537750606959381e-06, "loss": 18.1305, "step": 14898 }, { "epoch": 0.27234174785676424, "grad_norm": 7.241337699361368, "learning_rate": 8.537541420310072e-06, "loss": 17.98, "step": 14899 }, { "epoch": 0.2723600270532107, "grad_norm": 8.802883025943345, "learning_rate": 8.537332221261988e-06, "loss": 18.5611, "step": 14900 }, { "epoch": 0.27237830624965725, "grad_norm": 7.593543267720216, "learning_rate": 8.537123009815861e-06, "loss": 18.0538, "step": 14901 }, { "epoch": 0.2723965854461038, "grad_norm": 5.630264023919765, "learning_rate": 8.536913785972424e-06, "loss": 17.1605, "step": 14902 }, { "epoch": 0.2724148646425503, "grad_norm": 6.380544619337826, "learning_rate": 8.536704549732413e-06, "loss": 17.7363, "step": 14903 }, { "epoch": 0.27243314383899686, "grad_norm": 6.157146247117923, "learning_rate": 8.536495301096559e-06, "loss": 17.3588, "step": 14904 }, { "epoch": 0.27245142303544334, "grad_norm": 6.743080721881028, "learning_rate": 8.536286040065595e-06, "loss": 17.7994, "step": 14905 }, { "epoch": 0.2724697022318899, "grad_norm": 7.064760121129258, "learning_rate": 8.536076766640258e-06, "loss": 18.0931, "step": 14906 }, { "epoch": 0.2724879814283364, "grad_norm": 7.407565010651201, "learning_rate": 8.535867480821275e-06, "loss": 17.8473, "step": 14907 }, { "epoch": 0.27250626062478295, "grad_norm": 7.81441975945342, "learning_rate": 8.535658182609386e-06, "loss": 18.1158, "step": 14908 }, { "epoch": 0.2725245398212295, "grad_norm": 6.045435312495618, "learning_rate": 8.53544887200532e-06, "loss": 17.3503, "step": 14909 }, { "epoch": 0.27254281901767596, "grad_norm": 5.707968340287296, "learning_rate": 8.535239549009813e-06, "loss": 17.099, "step": 14910 }, { "epoch": 0.2725610982141225, "grad_norm": 6.066638919619541, "learning_rate": 8.535030213623599e-06, "loss": 17.2943, "step": 14911 }, { "epoch": 0.27257937741056903, "grad_norm": 5.900745626891175, "learning_rate": 8.53482086584741e-06, "loss": 17.4147, "step": 14912 }, { "epoch": 0.27259765660701557, "grad_norm": 6.370327324649582, "learning_rate": 8.53461150568198e-06, "loss": 17.4563, "step": 14913 }, { "epoch": 0.2726159358034621, "grad_norm": 8.7428366840071, "learning_rate": 8.534402133128044e-06, "loss": 18.1068, "step": 14914 }, { "epoch": 0.2726342149999086, "grad_norm": 7.311969184989779, "learning_rate": 8.534192748186337e-06, "loss": 18.1549, "step": 14915 }, { "epoch": 0.2726524941963551, "grad_norm": 7.222571937555025, "learning_rate": 8.53398335085759e-06, "loss": 17.9561, "step": 14916 }, { "epoch": 0.27267077339280166, "grad_norm": 6.817968807815459, "learning_rate": 8.533773941142535e-06, "loss": 17.6033, "step": 14917 }, { "epoch": 0.2726890525892482, "grad_norm": 6.645826784715755, "learning_rate": 8.533564519041913e-06, "loss": 17.4718, "step": 14918 }, { "epoch": 0.2727073317856947, "grad_norm": 7.061940441232614, "learning_rate": 8.533355084556452e-06, "loss": 17.3461, "step": 14919 }, { "epoch": 0.2727256109821412, "grad_norm": 9.420514471152142, "learning_rate": 8.533145637686889e-06, "loss": 18.8242, "step": 14920 }, { "epoch": 0.27274389017858774, "grad_norm": 5.597796718640966, "learning_rate": 8.532936178433958e-06, "loss": 16.9579, "step": 14921 }, { "epoch": 0.2727621693750343, "grad_norm": 6.1370517575521735, "learning_rate": 8.53272670679839e-06, "loss": 17.1182, "step": 14922 }, { "epoch": 0.2727804485714808, "grad_norm": 5.938901037287833, "learning_rate": 8.532517222780922e-06, "loss": 17.2498, "step": 14923 }, { "epoch": 0.2727987277679273, "grad_norm": 7.485266218181458, "learning_rate": 8.532307726382288e-06, "loss": 18.0095, "step": 14924 }, { "epoch": 0.27281700696437383, "grad_norm": 7.9292761561775835, "learning_rate": 8.532098217603222e-06, "loss": 18.264, "step": 14925 }, { "epoch": 0.27283528616082037, "grad_norm": 8.333234729593757, "learning_rate": 8.53188869644446e-06, "loss": 17.9407, "step": 14926 }, { "epoch": 0.2728535653572669, "grad_norm": 7.081977271341544, "learning_rate": 8.531679162906732e-06, "loss": 17.4561, "step": 14927 }, { "epoch": 0.27287184455371344, "grad_norm": 6.116784005544873, "learning_rate": 8.531469616990776e-06, "loss": 17.3566, "step": 14928 }, { "epoch": 0.2728901237501599, "grad_norm": 5.5221505721350175, "learning_rate": 8.531260058697326e-06, "loss": 17.4277, "step": 14929 }, { "epoch": 0.27290840294660645, "grad_norm": 7.0237127520366425, "learning_rate": 8.531050488027115e-06, "loss": 17.7201, "step": 14930 }, { "epoch": 0.272926682143053, "grad_norm": 9.834204976154917, "learning_rate": 8.530840904980878e-06, "loss": 18.8974, "step": 14931 }, { "epoch": 0.2729449613394995, "grad_norm": 7.53462570626672, "learning_rate": 8.53063130955935e-06, "loss": 17.8331, "step": 14932 }, { "epoch": 0.27296324053594606, "grad_norm": 10.658949058256267, "learning_rate": 8.530421701763268e-06, "loss": 17.5252, "step": 14933 }, { "epoch": 0.27298151973239254, "grad_norm": 6.624393401300358, "learning_rate": 8.530212081593362e-06, "loss": 17.5098, "step": 14934 }, { "epoch": 0.2729997989288391, "grad_norm": 7.940640834054324, "learning_rate": 8.53000244905037e-06, "loss": 18.0564, "step": 14935 }, { "epoch": 0.2730180781252856, "grad_norm": 6.292335902678796, "learning_rate": 8.529792804135025e-06, "loss": 17.5755, "step": 14936 }, { "epoch": 0.27303635732173215, "grad_norm": 7.781507536910704, "learning_rate": 8.529583146848063e-06, "loss": 17.5398, "step": 14937 }, { "epoch": 0.2730546365181787, "grad_norm": 6.929292405792139, "learning_rate": 8.529373477190218e-06, "loss": 17.5629, "step": 14938 }, { "epoch": 0.27307291571462516, "grad_norm": 5.940024667463652, "learning_rate": 8.529163795162225e-06, "loss": 17.3973, "step": 14939 }, { "epoch": 0.2730911949110717, "grad_norm": 6.528807380553815, "learning_rate": 8.528954100764822e-06, "loss": 17.619, "step": 14940 }, { "epoch": 0.27310947410751824, "grad_norm": 8.385382036274383, "learning_rate": 8.528744393998736e-06, "loss": 18.0734, "step": 14941 }, { "epoch": 0.27312775330396477, "grad_norm": 6.400757498934926, "learning_rate": 8.52853467486471e-06, "loss": 17.0158, "step": 14942 }, { "epoch": 0.2731460325004113, "grad_norm": 7.138467067392454, "learning_rate": 8.528324943363477e-06, "loss": 17.5021, "step": 14943 }, { "epoch": 0.2731643116968578, "grad_norm": 5.918351364852236, "learning_rate": 8.52811519949577e-06, "loss": 17.325, "step": 14944 }, { "epoch": 0.2731825908933043, "grad_norm": 6.330543141395617, "learning_rate": 8.527905443262325e-06, "loss": 17.5151, "step": 14945 }, { "epoch": 0.27320087008975086, "grad_norm": 6.996704629960327, "learning_rate": 8.527695674663878e-06, "loss": 17.7408, "step": 14946 }, { "epoch": 0.2732191492861974, "grad_norm": 8.191466999963437, "learning_rate": 8.527485893701166e-06, "loss": 17.0555, "step": 14947 }, { "epoch": 0.27323742848264393, "grad_norm": 5.806599569310809, "learning_rate": 8.527276100374919e-06, "loss": 17.3758, "step": 14948 }, { "epoch": 0.2732557076790904, "grad_norm": 7.703322244771542, "learning_rate": 8.527066294685878e-06, "loss": 18.212, "step": 14949 }, { "epoch": 0.27327398687553695, "grad_norm": 20.52918467256652, "learning_rate": 8.526856476634773e-06, "loss": 18.5831, "step": 14950 }, { "epoch": 0.2732922660719835, "grad_norm": 6.8727490222692795, "learning_rate": 8.526646646222343e-06, "loss": 17.6496, "step": 14951 }, { "epoch": 0.27331054526843, "grad_norm": 7.175407514213813, "learning_rate": 8.526436803449323e-06, "loss": 17.4754, "step": 14952 }, { "epoch": 0.2733288244648765, "grad_norm": 6.653790660641926, "learning_rate": 8.526226948316447e-06, "loss": 17.6719, "step": 14953 }, { "epoch": 0.27334710366132303, "grad_norm": 5.722892147579569, "learning_rate": 8.526017080824452e-06, "loss": 17.2459, "step": 14954 }, { "epoch": 0.27336538285776957, "grad_norm": 6.279245110003837, "learning_rate": 8.525807200974074e-06, "loss": 17.3389, "step": 14955 }, { "epoch": 0.2733836620542161, "grad_norm": 7.746941569880032, "learning_rate": 8.525597308766047e-06, "loss": 17.9757, "step": 14956 }, { "epoch": 0.27340194125066264, "grad_norm": 5.989310738332474, "learning_rate": 8.525387404201108e-06, "loss": 17.2385, "step": 14957 }, { "epoch": 0.2734202204471091, "grad_norm": 6.196033537084806, "learning_rate": 8.525177487279992e-06, "loss": 17.2249, "step": 14958 }, { "epoch": 0.27343849964355565, "grad_norm": 7.11327945725399, "learning_rate": 8.524967558003434e-06, "loss": 18.0703, "step": 14959 }, { "epoch": 0.2734567788400022, "grad_norm": 8.4964544098655, "learning_rate": 8.52475761637217e-06, "loss": 17.5121, "step": 14960 }, { "epoch": 0.2734750580364487, "grad_norm": 6.516779618103367, "learning_rate": 8.524547662386937e-06, "loss": 17.4908, "step": 14961 }, { "epoch": 0.27349333723289526, "grad_norm": 9.044453293633982, "learning_rate": 8.52433769604847e-06, "loss": 18.6725, "step": 14962 }, { "epoch": 0.27351161642934174, "grad_norm": 6.709590072747863, "learning_rate": 8.524127717357506e-06, "loss": 17.2904, "step": 14963 }, { "epoch": 0.2735298956257883, "grad_norm": 6.880374789406854, "learning_rate": 8.52391772631478e-06, "loss": 17.7179, "step": 14964 }, { "epoch": 0.2735481748222348, "grad_norm": 7.765052854218366, "learning_rate": 8.52370772292103e-06, "loss": 17.9241, "step": 14965 }, { "epoch": 0.27356645401868135, "grad_norm": 7.258938768054528, "learning_rate": 8.523497707176987e-06, "loss": 17.9309, "step": 14966 }, { "epoch": 0.2735847332151279, "grad_norm": 7.668267933278983, "learning_rate": 8.523287679083393e-06, "loss": 18.2377, "step": 14967 }, { "epoch": 0.27360301241157436, "grad_norm": 6.0395295066238885, "learning_rate": 8.52307763864098e-06, "loss": 17.3118, "step": 14968 }, { "epoch": 0.2736212916080209, "grad_norm": 6.61967279349748, "learning_rate": 8.522867585850484e-06, "loss": 17.5734, "step": 14969 }, { "epoch": 0.27363957080446744, "grad_norm": 9.006013486169277, "learning_rate": 8.522657520712645e-06, "loss": 18.049, "step": 14970 }, { "epoch": 0.27365785000091397, "grad_norm": 6.182940380764518, "learning_rate": 8.522447443228196e-06, "loss": 17.3366, "step": 14971 }, { "epoch": 0.2736761291973605, "grad_norm": 7.791436530074532, "learning_rate": 8.522237353397876e-06, "loss": 18.1704, "step": 14972 }, { "epoch": 0.273694408393807, "grad_norm": 9.39990182708251, "learning_rate": 8.522027251222418e-06, "loss": 18.3011, "step": 14973 }, { "epoch": 0.2737126875902535, "grad_norm": 7.578812576891775, "learning_rate": 8.521817136702561e-06, "loss": 18.2556, "step": 14974 }, { "epoch": 0.27373096678670006, "grad_norm": 5.352122012319598, "learning_rate": 8.52160700983904e-06, "loss": 17.3055, "step": 14975 }, { "epoch": 0.2737492459831466, "grad_norm": 7.664265293018091, "learning_rate": 8.521396870632593e-06, "loss": 18.1554, "step": 14976 }, { "epoch": 0.27376752517959313, "grad_norm": 6.232458080284897, "learning_rate": 8.521186719083954e-06, "loss": 17.4724, "step": 14977 }, { "epoch": 0.2737858043760396, "grad_norm": 6.451326554195879, "learning_rate": 8.520976555193862e-06, "loss": 17.4553, "step": 14978 }, { "epoch": 0.27380408357248615, "grad_norm": 7.112308104893728, "learning_rate": 8.520766378963054e-06, "loss": 17.8498, "step": 14979 }, { "epoch": 0.2738223627689327, "grad_norm": 6.854962262606233, "learning_rate": 8.520556190392263e-06, "loss": 17.6454, "step": 14980 }, { "epoch": 0.2738406419653792, "grad_norm": 6.434217395773612, "learning_rate": 8.52034598948223e-06, "loss": 17.3864, "step": 14981 }, { "epoch": 0.27385892116182575, "grad_norm": 7.466742836596856, "learning_rate": 8.520135776233689e-06, "loss": 18.077, "step": 14982 }, { "epoch": 0.27387720035827223, "grad_norm": 8.569752531016313, "learning_rate": 8.519925550647377e-06, "loss": 17.762, "step": 14983 }, { "epoch": 0.27389547955471877, "grad_norm": 6.082117234849648, "learning_rate": 8.519715312724032e-06, "loss": 17.2923, "step": 14984 }, { "epoch": 0.2739137587511653, "grad_norm": 6.103727004459452, "learning_rate": 8.519505062464391e-06, "loss": 17.1734, "step": 14985 }, { "epoch": 0.27393203794761184, "grad_norm": 6.199682998443549, "learning_rate": 8.519294799869189e-06, "loss": 17.4157, "step": 14986 }, { "epoch": 0.2739503171440583, "grad_norm": 9.39093050723225, "learning_rate": 8.519084524939163e-06, "loss": 18.3416, "step": 14987 }, { "epoch": 0.27396859634050486, "grad_norm": 7.2478387446581545, "learning_rate": 8.518874237675053e-06, "loss": 18.1011, "step": 14988 }, { "epoch": 0.2739868755369514, "grad_norm": 6.429645863607817, "learning_rate": 8.518663938077594e-06, "loss": 17.7044, "step": 14989 }, { "epoch": 0.2740051547333979, "grad_norm": 7.608277744380132, "learning_rate": 8.518453626147522e-06, "loss": 17.5516, "step": 14990 }, { "epoch": 0.27402343392984446, "grad_norm": 7.515836219198456, "learning_rate": 8.518243301885577e-06, "loss": 18.1083, "step": 14991 }, { "epoch": 0.27404171312629094, "grad_norm": 7.008733753396864, "learning_rate": 8.518032965292494e-06, "loss": 17.7349, "step": 14992 }, { "epoch": 0.2740599923227375, "grad_norm": 6.56982189128168, "learning_rate": 8.51782261636901e-06, "loss": 17.6305, "step": 14993 }, { "epoch": 0.274078271519184, "grad_norm": 9.070740479244042, "learning_rate": 8.517612255115864e-06, "loss": 17.8937, "step": 14994 }, { "epoch": 0.27409655071563055, "grad_norm": 7.192224862176739, "learning_rate": 8.51740188153379e-06, "loss": 18.0503, "step": 14995 }, { "epoch": 0.2741148299120771, "grad_norm": 6.73445119174964, "learning_rate": 8.517191495623532e-06, "loss": 17.3977, "step": 14996 }, { "epoch": 0.27413310910852356, "grad_norm": 7.535594093343021, "learning_rate": 8.516981097385819e-06, "loss": 17.8374, "step": 14997 }, { "epoch": 0.2741513883049701, "grad_norm": 8.484477246434745, "learning_rate": 8.516770686821394e-06, "loss": 17.9086, "step": 14998 }, { "epoch": 0.27416966750141664, "grad_norm": 6.7194537965221395, "learning_rate": 8.516560263930994e-06, "loss": 17.6165, "step": 14999 }, { "epoch": 0.27418794669786317, "grad_norm": 6.20939998983172, "learning_rate": 8.516349828715354e-06, "loss": 17.3942, "step": 15000 }, { "epoch": 0.2742062258943097, "grad_norm": 6.63079232764498, "learning_rate": 8.516139381175212e-06, "loss": 17.3441, "step": 15001 }, { "epoch": 0.2742245050907562, "grad_norm": 6.460953619207861, "learning_rate": 8.515928921311309e-06, "loss": 17.4968, "step": 15002 }, { "epoch": 0.2742427842872027, "grad_norm": 7.039951232819122, "learning_rate": 8.515718449124378e-06, "loss": 17.429, "step": 15003 }, { "epoch": 0.27426106348364926, "grad_norm": 6.60295901253017, "learning_rate": 8.51550796461516e-06, "loss": 17.5674, "step": 15004 }, { "epoch": 0.2742793426800958, "grad_norm": 6.809811201680587, "learning_rate": 8.515297467784392e-06, "loss": 17.6928, "step": 15005 }, { "epoch": 0.27429762187654233, "grad_norm": 5.919394271802698, "learning_rate": 8.515086958632812e-06, "loss": 17.132, "step": 15006 }, { "epoch": 0.2743159010729888, "grad_norm": 9.171031124987774, "learning_rate": 8.514876437161154e-06, "loss": 18.0037, "step": 15007 }, { "epoch": 0.27433418026943535, "grad_norm": 6.5707004573525785, "learning_rate": 8.514665903370163e-06, "loss": 17.612, "step": 15008 }, { "epoch": 0.2743524594658819, "grad_norm": 7.942019601890254, "learning_rate": 8.514455357260572e-06, "loss": 18.1443, "step": 15009 }, { "epoch": 0.2743707386623284, "grad_norm": 7.361567440445024, "learning_rate": 8.514244798833118e-06, "loss": 17.7264, "step": 15010 }, { "epoch": 0.27438901785877495, "grad_norm": 7.443035909101035, "learning_rate": 8.514034228088542e-06, "loss": 17.4236, "step": 15011 }, { "epoch": 0.27440729705522143, "grad_norm": 6.680721693866458, "learning_rate": 8.513823645027581e-06, "loss": 17.6669, "step": 15012 }, { "epoch": 0.27442557625166797, "grad_norm": 7.444716603510005, "learning_rate": 8.513613049650972e-06, "loss": 17.6669, "step": 15013 }, { "epoch": 0.2744438554481145, "grad_norm": 5.649032063460323, "learning_rate": 8.513402441959457e-06, "loss": 17.1612, "step": 15014 }, { "epoch": 0.27446213464456104, "grad_norm": 7.1527954607750495, "learning_rate": 8.513191821953771e-06, "loss": 17.6386, "step": 15015 }, { "epoch": 0.2744804138410076, "grad_norm": 5.197978034091865, "learning_rate": 8.51298118963465e-06, "loss": 16.9496, "step": 15016 }, { "epoch": 0.27449869303745406, "grad_norm": 5.957658222387719, "learning_rate": 8.512770545002835e-06, "loss": 17.0909, "step": 15017 }, { "epoch": 0.2745169722339006, "grad_norm": 5.579661912686926, "learning_rate": 8.512559888059066e-06, "loss": 17.0729, "step": 15018 }, { "epoch": 0.2745352514303471, "grad_norm": 6.901410980448181, "learning_rate": 8.51234921880408e-06, "loss": 17.6866, "step": 15019 }, { "epoch": 0.27455353062679366, "grad_norm": 7.105586637830406, "learning_rate": 8.51213853723861e-06, "loss": 17.481, "step": 15020 }, { "epoch": 0.27457180982324014, "grad_norm": 6.268941979747861, "learning_rate": 8.511927843363403e-06, "loss": 17.5731, "step": 15021 }, { "epoch": 0.2745900890196867, "grad_norm": 6.8508749311522745, "learning_rate": 8.511717137179193e-06, "loss": 17.7081, "step": 15022 }, { "epoch": 0.2746083682161332, "grad_norm": 7.883185569147722, "learning_rate": 8.51150641868672e-06, "loss": 17.7303, "step": 15023 }, { "epoch": 0.27462664741257975, "grad_norm": 7.524898587945424, "learning_rate": 8.511295687886721e-06, "loss": 17.9761, "step": 15024 }, { "epoch": 0.2746449266090263, "grad_norm": 5.85153730137204, "learning_rate": 8.511084944779935e-06, "loss": 17.2036, "step": 15025 }, { "epoch": 0.27466320580547277, "grad_norm": 6.219150490546941, "learning_rate": 8.510874189367103e-06, "loss": 17.5252, "step": 15026 }, { "epoch": 0.2746814850019193, "grad_norm": 5.920963708816412, "learning_rate": 8.51066342164896e-06, "loss": 17.2042, "step": 15027 }, { "epoch": 0.27469976419836584, "grad_norm": 7.768715350254114, "learning_rate": 8.510452641626244e-06, "loss": 17.8859, "step": 15028 }, { "epoch": 0.2747180433948124, "grad_norm": 6.978706382257655, "learning_rate": 8.510241849299698e-06, "loss": 17.3981, "step": 15029 }, { "epoch": 0.2747363225912589, "grad_norm": 6.927354688887749, "learning_rate": 8.51003104467006e-06, "loss": 17.3634, "step": 15030 }, { "epoch": 0.2747546017877054, "grad_norm": 6.215084155147541, "learning_rate": 8.509820227738068e-06, "loss": 17.346, "step": 15031 }, { "epoch": 0.2747728809841519, "grad_norm": 7.464315358997942, "learning_rate": 8.50960939850446e-06, "loss": 17.6887, "step": 15032 }, { "epoch": 0.27479116018059846, "grad_norm": 8.080319942077573, "learning_rate": 8.509398556969975e-06, "loss": 18.0081, "step": 15033 }, { "epoch": 0.274809439377045, "grad_norm": 6.1164665928270185, "learning_rate": 8.509187703135352e-06, "loss": 17.149, "step": 15034 }, { "epoch": 0.27482771857349153, "grad_norm": 7.341736505958095, "learning_rate": 8.508976837001331e-06, "loss": 17.5834, "step": 15035 }, { "epoch": 0.274845997769938, "grad_norm": 5.674655937620607, "learning_rate": 8.508765958568651e-06, "loss": 17.2539, "step": 15036 }, { "epoch": 0.27486427696638455, "grad_norm": 6.70835978578212, "learning_rate": 8.508555067838051e-06, "loss": 17.6066, "step": 15037 }, { "epoch": 0.2748825561628311, "grad_norm": 9.383413690301147, "learning_rate": 8.50834416481027e-06, "loss": 18.3229, "step": 15038 }, { "epoch": 0.2749008353592776, "grad_norm": 6.253300911206716, "learning_rate": 8.508133249486048e-06, "loss": 17.358, "step": 15039 }, { "epoch": 0.27491911455572415, "grad_norm": 6.783213990413725, "learning_rate": 8.507922321866122e-06, "loss": 17.7181, "step": 15040 }, { "epoch": 0.27493739375217063, "grad_norm": 6.3578078618944875, "learning_rate": 8.507711381951234e-06, "loss": 17.2029, "step": 15041 }, { "epoch": 0.27495567294861717, "grad_norm": 6.230551192999519, "learning_rate": 8.50750042974212e-06, "loss": 17.4552, "step": 15042 }, { "epoch": 0.2749739521450637, "grad_norm": 7.4337802008058755, "learning_rate": 8.507289465239523e-06, "loss": 17.7231, "step": 15043 }, { "epoch": 0.27499223134151024, "grad_norm": 6.350096799832979, "learning_rate": 8.50707848844418e-06, "loss": 17.2701, "step": 15044 }, { "epoch": 0.2750105105379568, "grad_norm": 6.071829725294881, "learning_rate": 8.506867499356832e-06, "loss": 17.0834, "step": 15045 }, { "epoch": 0.27502878973440326, "grad_norm": 8.824764724261522, "learning_rate": 8.506656497978216e-06, "loss": 18.5188, "step": 15046 }, { "epoch": 0.2750470689308498, "grad_norm": 6.326763097824839, "learning_rate": 8.506445484309075e-06, "loss": 17.5691, "step": 15047 }, { "epoch": 0.2750653481272963, "grad_norm": 6.299871779090824, "learning_rate": 8.506234458350146e-06, "loss": 17.5947, "step": 15048 }, { "epoch": 0.27508362732374286, "grad_norm": 6.784974901995447, "learning_rate": 8.50602342010217e-06, "loss": 17.2225, "step": 15049 }, { "epoch": 0.2751019065201894, "grad_norm": 5.913411381000654, "learning_rate": 8.505812369565886e-06, "loss": 17.0461, "step": 15050 }, { "epoch": 0.2751201857166359, "grad_norm": 9.54384824095882, "learning_rate": 8.505601306742035e-06, "loss": 17.6065, "step": 15051 }, { "epoch": 0.2751384649130824, "grad_norm": 8.708857677923003, "learning_rate": 8.505390231631354e-06, "loss": 18.7173, "step": 15052 }, { "epoch": 0.27515674410952895, "grad_norm": 6.889078884155962, "learning_rate": 8.505179144234583e-06, "loss": 17.5667, "step": 15053 }, { "epoch": 0.2751750233059755, "grad_norm": 6.497347007870209, "learning_rate": 8.504968044552466e-06, "loss": 17.6546, "step": 15054 }, { "epoch": 0.27519330250242197, "grad_norm": 6.421667153551877, "learning_rate": 8.50475693258574e-06, "loss": 17.5946, "step": 15055 }, { "epoch": 0.2752115816988685, "grad_norm": 5.667448860596541, "learning_rate": 8.504545808335144e-06, "loss": 16.8418, "step": 15056 }, { "epoch": 0.27522986089531504, "grad_norm": 6.312400458371548, "learning_rate": 8.50433467180142e-06, "loss": 17.1851, "step": 15057 }, { "epoch": 0.2752481400917616, "grad_norm": 8.135814509613942, "learning_rate": 8.504123522985306e-06, "loss": 18.0047, "step": 15058 }, { "epoch": 0.2752664192882081, "grad_norm": 6.2714951204111165, "learning_rate": 8.503912361887543e-06, "loss": 17.4653, "step": 15059 }, { "epoch": 0.2752846984846546, "grad_norm": 6.249592788376225, "learning_rate": 8.503701188508872e-06, "loss": 17.3866, "step": 15060 }, { "epoch": 0.2753029776811011, "grad_norm": 9.173638770203379, "learning_rate": 8.503490002850032e-06, "loss": 18.9345, "step": 15061 }, { "epoch": 0.27532125687754766, "grad_norm": 6.725246630707651, "learning_rate": 8.503278804911763e-06, "loss": 17.5321, "step": 15062 }, { "epoch": 0.2753395360739942, "grad_norm": 6.304788185687925, "learning_rate": 8.503067594694807e-06, "loss": 17.5342, "step": 15063 }, { "epoch": 0.27535781527044073, "grad_norm": 6.899815264393031, "learning_rate": 8.502856372199903e-06, "loss": 17.7755, "step": 15064 }, { "epoch": 0.2753760944668872, "grad_norm": 7.4373658126487525, "learning_rate": 8.502645137427793e-06, "loss": 18.2004, "step": 15065 }, { "epoch": 0.27539437366333375, "grad_norm": 6.685823341875556, "learning_rate": 8.502433890379212e-06, "loss": 17.7411, "step": 15066 }, { "epoch": 0.2754126528597803, "grad_norm": 6.258873398752116, "learning_rate": 8.502222631054906e-06, "loss": 17.3507, "step": 15067 }, { "epoch": 0.2754309320562268, "grad_norm": 6.580744993073601, "learning_rate": 8.502011359455613e-06, "loss": 17.6622, "step": 15068 }, { "epoch": 0.27544921125267335, "grad_norm": 5.382922333129405, "learning_rate": 8.501800075582073e-06, "loss": 16.902, "step": 15069 }, { "epoch": 0.27546749044911983, "grad_norm": 6.387901150632027, "learning_rate": 8.501588779435031e-06, "loss": 17.3893, "step": 15070 }, { "epoch": 0.27548576964556637, "grad_norm": 6.282565559703935, "learning_rate": 8.501377471015222e-06, "loss": 17.4775, "step": 15071 }, { "epoch": 0.2755040488420129, "grad_norm": 8.347736709520138, "learning_rate": 8.501166150323389e-06, "loss": 18.3767, "step": 15072 }, { "epoch": 0.27552232803845944, "grad_norm": 6.133801029727497, "learning_rate": 8.500954817360271e-06, "loss": 17.4881, "step": 15073 }, { "epoch": 0.275540607234906, "grad_norm": 6.7079679931503025, "learning_rate": 8.500743472126613e-06, "loss": 17.2911, "step": 15074 }, { "epoch": 0.27555888643135246, "grad_norm": 6.837401980531877, "learning_rate": 8.500532114623152e-06, "loss": 17.5818, "step": 15075 }, { "epoch": 0.275577165627799, "grad_norm": 7.802182273194156, "learning_rate": 8.50032074485063e-06, "loss": 17.8849, "step": 15076 }, { "epoch": 0.27559544482424553, "grad_norm": 6.60104311402147, "learning_rate": 8.500109362809785e-06, "loss": 17.4672, "step": 15077 }, { "epoch": 0.27561372402069206, "grad_norm": 7.163784549768292, "learning_rate": 8.499897968501363e-06, "loss": 17.4801, "step": 15078 }, { "epoch": 0.2756320032171386, "grad_norm": 5.791770037994613, "learning_rate": 8.499686561926102e-06, "loss": 17.0722, "step": 15079 }, { "epoch": 0.2756502824135851, "grad_norm": 8.259606438480516, "learning_rate": 8.499475143084743e-06, "loss": 18.5193, "step": 15080 }, { "epoch": 0.2756685616100316, "grad_norm": 9.952855206016688, "learning_rate": 8.499263711978027e-06, "loss": 18.9349, "step": 15081 }, { "epoch": 0.27568684080647815, "grad_norm": 7.221818700686994, "learning_rate": 8.499052268606696e-06, "loss": 18.1122, "step": 15082 }, { "epoch": 0.2757051200029247, "grad_norm": 5.80364490686129, "learning_rate": 8.49884081297149e-06, "loss": 17.311, "step": 15083 }, { "epoch": 0.2757233991993712, "grad_norm": 6.391870139942319, "learning_rate": 8.49862934507315e-06, "loss": 17.7093, "step": 15084 }, { "epoch": 0.2757416783958177, "grad_norm": 11.325604702446954, "learning_rate": 8.498417864912418e-06, "loss": 18.6128, "step": 15085 }, { "epoch": 0.27575995759226424, "grad_norm": 6.693411345117484, "learning_rate": 8.498206372490036e-06, "loss": 17.5917, "step": 15086 }, { "epoch": 0.2757782367887108, "grad_norm": 6.856529657181099, "learning_rate": 8.497994867806743e-06, "loss": 17.7577, "step": 15087 }, { "epoch": 0.2757965159851573, "grad_norm": 6.169603565284105, "learning_rate": 8.497783350863282e-06, "loss": 17.3158, "step": 15088 }, { "epoch": 0.2758147951816038, "grad_norm": 6.683157507006591, "learning_rate": 8.497571821660393e-06, "loss": 17.6057, "step": 15089 }, { "epoch": 0.2758330743780503, "grad_norm": 6.776299964282804, "learning_rate": 8.497360280198819e-06, "loss": 17.5894, "step": 15090 }, { "epoch": 0.27585135357449686, "grad_norm": 6.663756349873055, "learning_rate": 8.497148726479302e-06, "loss": 17.6064, "step": 15091 }, { "epoch": 0.2758696327709434, "grad_norm": 5.606218917028794, "learning_rate": 8.49693716050258e-06, "loss": 17.1346, "step": 15092 }, { "epoch": 0.27588791196738993, "grad_norm": 6.761619243793991, "learning_rate": 8.496725582269399e-06, "loss": 17.8594, "step": 15093 }, { "epoch": 0.2759061911638364, "grad_norm": 7.383838866864371, "learning_rate": 8.496513991780496e-06, "loss": 17.9355, "step": 15094 }, { "epoch": 0.27592447036028295, "grad_norm": 5.878574225581679, "learning_rate": 8.496302389036618e-06, "loss": 17.3235, "step": 15095 }, { "epoch": 0.2759427495567295, "grad_norm": 7.045753771894859, "learning_rate": 8.4960907740385e-06, "loss": 17.9473, "step": 15096 }, { "epoch": 0.275961028753176, "grad_norm": 6.874159400949003, "learning_rate": 8.495879146786888e-06, "loss": 17.6823, "step": 15097 }, { "epoch": 0.27597930794962255, "grad_norm": 5.931777682275133, "learning_rate": 8.495667507282523e-06, "loss": 17.2954, "step": 15098 }, { "epoch": 0.27599758714606903, "grad_norm": 5.658249804303634, "learning_rate": 8.495455855526149e-06, "loss": 17.437, "step": 15099 }, { "epoch": 0.27601586634251557, "grad_norm": 7.709657203321893, "learning_rate": 8.495244191518503e-06, "loss": 18.0923, "step": 15100 }, { "epoch": 0.2760341455389621, "grad_norm": 6.419851040462697, "learning_rate": 8.495032515260332e-06, "loss": 17.3243, "step": 15101 }, { "epoch": 0.27605242473540864, "grad_norm": 6.235144609086725, "learning_rate": 8.494820826752373e-06, "loss": 17.1859, "step": 15102 }, { "epoch": 0.2760707039318552, "grad_norm": 6.224796415996176, "learning_rate": 8.49460912599537e-06, "loss": 17.5906, "step": 15103 }, { "epoch": 0.27608898312830166, "grad_norm": 6.818567391082437, "learning_rate": 8.494397412990064e-06, "loss": 17.7974, "step": 15104 }, { "epoch": 0.2761072623247482, "grad_norm": 6.345565752759843, "learning_rate": 8.494185687737202e-06, "loss": 17.3961, "step": 15105 }, { "epoch": 0.27612554152119473, "grad_norm": 6.181317681537005, "learning_rate": 8.49397395023752e-06, "loss": 17.3983, "step": 15106 }, { "epoch": 0.27614382071764126, "grad_norm": 5.922731987022923, "learning_rate": 8.493762200491764e-06, "loss": 17.3817, "step": 15107 }, { "epoch": 0.2761620999140878, "grad_norm": 7.005057570083185, "learning_rate": 8.493550438500674e-06, "loss": 17.7266, "step": 15108 }, { "epoch": 0.2761803791105343, "grad_norm": 6.056443598234483, "learning_rate": 8.493338664264994e-06, "loss": 17.4693, "step": 15109 }, { "epoch": 0.2761986583069808, "grad_norm": 6.870748091461127, "learning_rate": 8.493126877785462e-06, "loss": 17.5765, "step": 15110 }, { "epoch": 0.27621693750342735, "grad_norm": 6.255024517094979, "learning_rate": 8.492915079062825e-06, "loss": 17.3604, "step": 15111 }, { "epoch": 0.2762352166998739, "grad_norm": 7.560081869785773, "learning_rate": 8.492703268097826e-06, "loss": 17.9092, "step": 15112 }, { "epoch": 0.2762534958963204, "grad_norm": 6.943035687647867, "learning_rate": 8.492491444891202e-06, "loss": 17.582, "step": 15113 }, { "epoch": 0.2762717750927669, "grad_norm": 6.237283884131271, "learning_rate": 8.4922796094437e-06, "loss": 17.3341, "step": 15114 }, { "epoch": 0.27629005428921344, "grad_norm": 8.386729445077309, "learning_rate": 8.49206776175606e-06, "loss": 18.5546, "step": 15115 }, { "epoch": 0.27630833348566, "grad_norm": 6.636602832320478, "learning_rate": 8.491855901829028e-06, "loss": 17.4801, "step": 15116 }, { "epoch": 0.2763266126821065, "grad_norm": 5.739313425652385, "learning_rate": 8.491644029663342e-06, "loss": 17.1745, "step": 15117 }, { "epoch": 0.27634489187855305, "grad_norm": 5.728039251304245, "learning_rate": 8.491432145259746e-06, "loss": 17.0786, "step": 15118 }, { "epoch": 0.2763631710749995, "grad_norm": 6.301290758430111, "learning_rate": 8.491220248618985e-06, "loss": 17.5457, "step": 15119 }, { "epoch": 0.27638145027144606, "grad_norm": 7.303871962582155, "learning_rate": 8.4910083397418e-06, "loss": 18.0031, "step": 15120 }, { "epoch": 0.2763997294678926, "grad_norm": 6.767786507919163, "learning_rate": 8.490796418628933e-06, "loss": 17.4871, "step": 15121 }, { "epoch": 0.27641800866433913, "grad_norm": 7.215094564305718, "learning_rate": 8.490584485281126e-06, "loss": 17.8481, "step": 15122 }, { "epoch": 0.2764362878607856, "grad_norm": 6.125606787595317, "learning_rate": 8.490372539699125e-06, "loss": 17.2545, "step": 15123 }, { "epoch": 0.27645456705723215, "grad_norm": 7.274970598348587, "learning_rate": 8.49016058188367e-06, "loss": 17.6086, "step": 15124 }, { "epoch": 0.2764728462536787, "grad_norm": 7.0521901233773745, "learning_rate": 8.489948611835507e-06, "loss": 17.9983, "step": 15125 }, { "epoch": 0.2764911254501252, "grad_norm": 7.406438818444638, "learning_rate": 8.489736629555376e-06, "loss": 17.8855, "step": 15126 }, { "epoch": 0.27650940464657175, "grad_norm": 5.464736523236706, "learning_rate": 8.48952463504402e-06, "loss": 17.2808, "step": 15127 }, { "epoch": 0.27652768384301823, "grad_norm": 6.7985981641731925, "learning_rate": 8.489312628302184e-06, "loss": 17.553, "step": 15128 }, { "epoch": 0.27654596303946477, "grad_norm": 5.7880092429170755, "learning_rate": 8.489100609330611e-06, "loss": 17.5625, "step": 15129 }, { "epoch": 0.2765642422359113, "grad_norm": 7.5237098939421445, "learning_rate": 8.48888857813004e-06, "loss": 17.8427, "step": 15130 }, { "epoch": 0.27658252143235784, "grad_norm": 5.818770988021056, "learning_rate": 8.488676534701222e-06, "loss": 17.4699, "step": 15131 }, { "epoch": 0.2766008006288044, "grad_norm": 7.122951813383191, "learning_rate": 8.48846447904489e-06, "loss": 17.8285, "step": 15132 }, { "epoch": 0.27661907982525086, "grad_norm": 7.38715813034999, "learning_rate": 8.488252411161797e-06, "loss": 18.2292, "step": 15133 }, { "epoch": 0.2766373590216974, "grad_norm": 6.675865547873947, "learning_rate": 8.48804033105268e-06, "loss": 17.6094, "step": 15134 }, { "epoch": 0.27665563821814393, "grad_norm": 6.027580623893715, "learning_rate": 8.487828238718286e-06, "loss": 17.2813, "step": 15135 }, { "epoch": 0.27667391741459046, "grad_norm": 7.023246440827114, "learning_rate": 8.487616134159355e-06, "loss": 17.7784, "step": 15136 }, { "epoch": 0.276692196611037, "grad_norm": 6.092554966001453, "learning_rate": 8.487404017376632e-06, "loss": 17.2843, "step": 15137 }, { "epoch": 0.2767104758074835, "grad_norm": 5.93347495303618, "learning_rate": 8.48719188837086e-06, "loss": 17.4982, "step": 15138 }, { "epoch": 0.27672875500393, "grad_norm": 5.654297826826496, "learning_rate": 8.486979747142785e-06, "loss": 16.9761, "step": 15139 }, { "epoch": 0.27674703420037655, "grad_norm": 6.0145806159862, "learning_rate": 8.486767593693148e-06, "loss": 17.1847, "step": 15140 }, { "epoch": 0.2767653133968231, "grad_norm": 13.957909146183193, "learning_rate": 8.486555428022692e-06, "loss": 18.0061, "step": 15141 }, { "epoch": 0.2767835925932696, "grad_norm": 7.1051146284803135, "learning_rate": 8.486343250132163e-06, "loss": 17.6647, "step": 15142 }, { "epoch": 0.2768018717897161, "grad_norm": 6.318525136817179, "learning_rate": 8.486131060022303e-06, "loss": 17.2715, "step": 15143 }, { "epoch": 0.27682015098616264, "grad_norm": 6.556186221346598, "learning_rate": 8.485918857693855e-06, "loss": 17.885, "step": 15144 }, { "epoch": 0.2768384301826092, "grad_norm": 5.674162468492682, "learning_rate": 8.485706643147567e-06, "loss": 17.1208, "step": 15145 }, { "epoch": 0.2768567093790557, "grad_norm": 7.981543699737078, "learning_rate": 8.485494416384177e-06, "loss": 17.711, "step": 15146 }, { "epoch": 0.27687498857550225, "grad_norm": 5.898058900565279, "learning_rate": 8.485282177404433e-06, "loss": 17.3461, "step": 15147 }, { "epoch": 0.2768932677719487, "grad_norm": 7.49345719971531, "learning_rate": 8.485069926209076e-06, "loss": 17.9474, "step": 15148 }, { "epoch": 0.27691154696839526, "grad_norm": 6.075856025454034, "learning_rate": 8.484857662798853e-06, "loss": 17.3403, "step": 15149 }, { "epoch": 0.2769298261648418, "grad_norm": 8.057692135962697, "learning_rate": 8.484645387174505e-06, "loss": 18.4278, "step": 15150 }, { "epoch": 0.27694810536128833, "grad_norm": 5.72547415657414, "learning_rate": 8.484433099336778e-06, "loss": 17.1654, "step": 15151 }, { "epoch": 0.27696638455773487, "grad_norm": 8.258010290672802, "learning_rate": 8.484220799286414e-06, "loss": 18.2429, "step": 15152 }, { "epoch": 0.27698466375418135, "grad_norm": 6.320224459549836, "learning_rate": 8.484008487024159e-06, "loss": 17.6195, "step": 15153 }, { "epoch": 0.2770029429506279, "grad_norm": 5.809548766519819, "learning_rate": 8.483796162550756e-06, "loss": 17.1871, "step": 15154 }, { "epoch": 0.2770212221470744, "grad_norm": 6.292791614927964, "learning_rate": 8.483583825866952e-06, "loss": 17.5189, "step": 15155 }, { "epoch": 0.27703950134352096, "grad_norm": 7.2897651927097975, "learning_rate": 8.483371476973488e-06, "loss": 17.9668, "step": 15156 }, { "epoch": 0.27705778053996744, "grad_norm": 6.346951510267883, "learning_rate": 8.483159115871109e-06, "loss": 17.4059, "step": 15157 }, { "epoch": 0.27707605973641397, "grad_norm": 8.069388792248683, "learning_rate": 8.48294674256056e-06, "loss": 18.2912, "step": 15158 }, { "epoch": 0.2770943389328605, "grad_norm": 6.267336526982611, "learning_rate": 8.482734357042584e-06, "loss": 17.3161, "step": 15159 }, { "epoch": 0.27711261812930704, "grad_norm": 6.705459898303039, "learning_rate": 8.482521959317926e-06, "loss": 17.3089, "step": 15160 }, { "epoch": 0.2771308973257536, "grad_norm": 5.928807624867987, "learning_rate": 8.482309549387333e-06, "loss": 17.4545, "step": 15161 }, { "epoch": 0.27714917652220006, "grad_norm": 7.814767906037155, "learning_rate": 8.482097127251545e-06, "loss": 17.5292, "step": 15162 }, { "epoch": 0.2771674557186466, "grad_norm": 5.816355538136252, "learning_rate": 8.481884692911308e-06, "loss": 17.2492, "step": 15163 }, { "epoch": 0.27718573491509313, "grad_norm": 7.033786910236613, "learning_rate": 8.48167224636737e-06, "loss": 18.1615, "step": 15164 }, { "epoch": 0.27720401411153966, "grad_norm": 5.430195463635135, "learning_rate": 8.481459787620472e-06, "loss": 17.1092, "step": 15165 }, { "epoch": 0.2772222933079862, "grad_norm": 6.123967341476146, "learning_rate": 8.481247316671358e-06, "loss": 17.4193, "step": 15166 }, { "epoch": 0.2772405725044327, "grad_norm": 5.5224896668202765, "learning_rate": 8.481034833520776e-06, "loss": 17.1137, "step": 15167 }, { "epoch": 0.2772588517008792, "grad_norm": 5.863051389843126, "learning_rate": 8.480822338169468e-06, "loss": 17.2692, "step": 15168 }, { "epoch": 0.27727713089732575, "grad_norm": 6.441945504782519, "learning_rate": 8.48060983061818e-06, "loss": 17.6335, "step": 15169 }, { "epoch": 0.2772954100937723, "grad_norm": 6.34402376146172, "learning_rate": 8.480397310867657e-06, "loss": 17.4571, "step": 15170 }, { "epoch": 0.2773136892902188, "grad_norm": 8.353750088079025, "learning_rate": 8.480184778918644e-06, "loss": 18.0318, "step": 15171 }, { "epoch": 0.2773319684866653, "grad_norm": 7.132669222601691, "learning_rate": 8.479972234771883e-06, "loss": 17.8458, "step": 15172 }, { "epoch": 0.27735024768311184, "grad_norm": 6.210608983505865, "learning_rate": 8.479759678428123e-06, "loss": 17.5225, "step": 15173 }, { "epoch": 0.2773685268795584, "grad_norm": 6.017708530134503, "learning_rate": 8.479547109888108e-06, "loss": 17.2827, "step": 15174 }, { "epoch": 0.2773868060760049, "grad_norm": 7.086861207018966, "learning_rate": 8.479334529152582e-06, "loss": 17.6825, "step": 15175 }, { "epoch": 0.27740508527245145, "grad_norm": 6.989417468633682, "learning_rate": 8.479121936222288e-06, "loss": 17.7719, "step": 15176 }, { "epoch": 0.2774233644688979, "grad_norm": 6.034472674549152, "learning_rate": 8.478909331097975e-06, "loss": 17.4463, "step": 15177 }, { "epoch": 0.27744164366534446, "grad_norm": 6.788649532752917, "learning_rate": 8.478696713780388e-06, "loss": 17.771, "step": 15178 }, { "epoch": 0.277459922861791, "grad_norm": 6.179746451369153, "learning_rate": 8.47848408427027e-06, "loss": 17.2697, "step": 15179 }, { "epoch": 0.27747820205823753, "grad_norm": 6.021264277558732, "learning_rate": 8.478271442568366e-06, "loss": 17.1685, "step": 15180 }, { "epoch": 0.27749648125468407, "grad_norm": 6.211870963934204, "learning_rate": 8.478058788675424e-06, "loss": 17.2924, "step": 15181 }, { "epoch": 0.27751476045113055, "grad_norm": 6.08344944737126, "learning_rate": 8.477846122592188e-06, "loss": 17.3834, "step": 15182 }, { "epoch": 0.2775330396475771, "grad_norm": 7.322540912423537, "learning_rate": 8.477633444319401e-06, "loss": 17.6685, "step": 15183 }, { "epoch": 0.2775513188440236, "grad_norm": 6.987847075776526, "learning_rate": 8.477420753857813e-06, "loss": 17.7501, "step": 15184 }, { "epoch": 0.27756959804047016, "grad_norm": 7.145721719355926, "learning_rate": 8.477208051208166e-06, "loss": 17.8193, "step": 15185 }, { "epoch": 0.2775878772369167, "grad_norm": 7.465490201658691, "learning_rate": 8.476995336371207e-06, "loss": 17.9038, "step": 15186 }, { "epoch": 0.27760615643336317, "grad_norm": 6.450686128331409, "learning_rate": 8.47678260934768e-06, "loss": 17.7545, "step": 15187 }, { "epoch": 0.2776244356298097, "grad_norm": 5.831032254584482, "learning_rate": 8.476569870138332e-06, "loss": 17.2206, "step": 15188 }, { "epoch": 0.27764271482625624, "grad_norm": 6.9563310533709055, "learning_rate": 8.476357118743909e-06, "loss": 17.5481, "step": 15189 }, { "epoch": 0.2776609940227028, "grad_norm": 7.393013819431112, "learning_rate": 8.476144355165154e-06, "loss": 17.7355, "step": 15190 }, { "epoch": 0.27767927321914926, "grad_norm": 6.801743686131153, "learning_rate": 8.475931579402816e-06, "loss": 17.6852, "step": 15191 }, { "epoch": 0.2776975524155958, "grad_norm": 6.118024024183996, "learning_rate": 8.475718791457642e-06, "loss": 17.3726, "step": 15192 }, { "epoch": 0.27771583161204233, "grad_norm": 6.365383240770513, "learning_rate": 8.47550599133037e-06, "loss": 17.4786, "step": 15193 }, { "epoch": 0.27773411080848887, "grad_norm": 7.625483733167843, "learning_rate": 8.475293179021756e-06, "loss": 18.2475, "step": 15194 }, { "epoch": 0.2777523900049354, "grad_norm": 5.163126788299081, "learning_rate": 8.475080354532538e-06, "loss": 17.2556, "step": 15195 }, { "epoch": 0.2777706692013819, "grad_norm": 6.736192556992801, "learning_rate": 8.474867517863466e-06, "loss": 17.5523, "step": 15196 }, { "epoch": 0.2777889483978284, "grad_norm": 6.561879531359489, "learning_rate": 8.474654669015285e-06, "loss": 17.5083, "step": 15197 }, { "epoch": 0.27780722759427495, "grad_norm": 6.758920266786231, "learning_rate": 8.47444180798874e-06, "loss": 17.5761, "step": 15198 }, { "epoch": 0.2778255067907215, "grad_norm": 6.142855284877659, "learning_rate": 8.474228934784579e-06, "loss": 17.2216, "step": 15199 }, { "epoch": 0.277843785987168, "grad_norm": 6.649292497145365, "learning_rate": 8.474016049403547e-06, "loss": 17.4655, "step": 15200 }, { "epoch": 0.2778620651836145, "grad_norm": 5.50710130195196, "learning_rate": 8.473803151846389e-06, "loss": 17.1131, "step": 15201 }, { "epoch": 0.27788034438006104, "grad_norm": 5.051523217599611, "learning_rate": 8.473590242113854e-06, "loss": 16.9992, "step": 15202 }, { "epoch": 0.2778986235765076, "grad_norm": 7.419341418076164, "learning_rate": 8.473377320206685e-06, "loss": 18.1681, "step": 15203 }, { "epoch": 0.2779169027729541, "grad_norm": 6.583411215133172, "learning_rate": 8.47316438612563e-06, "loss": 17.6993, "step": 15204 }, { "epoch": 0.27793518196940065, "grad_norm": 10.621010132856448, "learning_rate": 8.472951439871437e-06, "loss": 17.979, "step": 15205 }, { "epoch": 0.2779534611658471, "grad_norm": 6.9761154458200485, "learning_rate": 8.472738481444849e-06, "loss": 17.7062, "step": 15206 }, { "epoch": 0.27797174036229366, "grad_norm": 8.09995108118209, "learning_rate": 8.472525510846615e-06, "loss": 18.1606, "step": 15207 }, { "epoch": 0.2779900195587402, "grad_norm": 6.237726067150974, "learning_rate": 8.472312528077478e-06, "loss": 17.2962, "step": 15208 }, { "epoch": 0.27800829875518673, "grad_norm": 8.064495181918465, "learning_rate": 8.472099533138189e-06, "loss": 18.4658, "step": 15209 }, { "epoch": 0.27802657795163327, "grad_norm": 5.936813021744895, "learning_rate": 8.471886526029494e-06, "loss": 17.1052, "step": 15210 }, { "epoch": 0.27804485714807975, "grad_norm": 6.116096489328429, "learning_rate": 8.471673506752135e-06, "loss": 17.3807, "step": 15211 }, { "epoch": 0.2780631363445263, "grad_norm": 6.622652093645998, "learning_rate": 8.471460475306862e-06, "loss": 17.6376, "step": 15212 }, { "epoch": 0.2780814155409728, "grad_norm": 6.51173487662506, "learning_rate": 8.471247431694422e-06, "loss": 17.6722, "step": 15213 }, { "epoch": 0.27809969473741936, "grad_norm": 6.6349494648415925, "learning_rate": 8.471034375915562e-06, "loss": 17.2563, "step": 15214 }, { "epoch": 0.2781179739338659, "grad_norm": 6.05963094836774, "learning_rate": 8.470821307971026e-06, "loss": 17.3188, "step": 15215 }, { "epoch": 0.27813625313031237, "grad_norm": 7.724124385030072, "learning_rate": 8.470608227861565e-06, "loss": 17.6223, "step": 15216 }, { "epoch": 0.2781545323267589, "grad_norm": 7.930717661901581, "learning_rate": 8.47039513558792e-06, "loss": 18.4473, "step": 15217 }, { "epoch": 0.27817281152320544, "grad_norm": 6.870144256274125, "learning_rate": 8.470182031150843e-06, "loss": 17.8599, "step": 15218 }, { "epoch": 0.278191090719652, "grad_norm": 6.339470043941988, "learning_rate": 8.46996891455108e-06, "loss": 17.6292, "step": 15219 }, { "epoch": 0.2782093699160985, "grad_norm": 6.876491826935089, "learning_rate": 8.469755785789375e-06, "loss": 17.8334, "step": 15220 }, { "epoch": 0.278227649112545, "grad_norm": 8.450256509774235, "learning_rate": 8.469542644866478e-06, "loss": 18.408, "step": 15221 }, { "epoch": 0.27824592830899153, "grad_norm": 7.71338462243798, "learning_rate": 8.469329491783136e-06, "loss": 17.814, "step": 15222 }, { "epoch": 0.27826420750543807, "grad_norm": 6.619253118741429, "learning_rate": 8.469116326540094e-06, "loss": 17.4123, "step": 15223 }, { "epoch": 0.2782824867018846, "grad_norm": 8.064322766637254, "learning_rate": 8.468903149138102e-06, "loss": 18.4396, "step": 15224 }, { "epoch": 0.2783007658983311, "grad_norm": 7.097520508723367, "learning_rate": 8.468689959577902e-06, "loss": 17.9868, "step": 15225 }, { "epoch": 0.2783190450947776, "grad_norm": 6.357331624950789, "learning_rate": 8.468476757860247e-06, "loss": 17.1258, "step": 15226 }, { "epoch": 0.27833732429122415, "grad_norm": 6.795499416133291, "learning_rate": 8.468263543985882e-06, "loss": 18.116, "step": 15227 }, { "epoch": 0.2783556034876707, "grad_norm": 5.91248706771861, "learning_rate": 8.468050317955554e-06, "loss": 17.3861, "step": 15228 }, { "epoch": 0.2783738826841172, "grad_norm": 6.875592199296723, "learning_rate": 8.467837079770012e-06, "loss": 17.7825, "step": 15229 }, { "epoch": 0.2783921618805637, "grad_norm": 7.0886399327734475, "learning_rate": 8.46762382943e-06, "loss": 17.9897, "step": 15230 }, { "epoch": 0.27841044107701024, "grad_norm": 6.236725079378205, "learning_rate": 8.467410566936267e-06, "loss": 17.7535, "step": 15231 }, { "epoch": 0.2784287202734568, "grad_norm": 7.666071532080812, "learning_rate": 8.467197292289562e-06, "loss": 18.0279, "step": 15232 }, { "epoch": 0.2784469994699033, "grad_norm": 6.587910759013121, "learning_rate": 8.46698400549063e-06, "loss": 17.2562, "step": 15233 }, { "epoch": 0.27846527866634985, "grad_norm": 6.868167550343062, "learning_rate": 8.466770706540222e-06, "loss": 17.8229, "step": 15234 }, { "epoch": 0.2784835578627963, "grad_norm": 5.841143692922496, "learning_rate": 8.466557395439083e-06, "loss": 17.2415, "step": 15235 }, { "epoch": 0.27850183705924286, "grad_norm": 6.171709736836082, "learning_rate": 8.46634407218796e-06, "loss": 17.4398, "step": 15236 }, { "epoch": 0.2785201162556894, "grad_norm": 8.425015007776755, "learning_rate": 8.466130736787603e-06, "loss": 18.005, "step": 15237 }, { "epoch": 0.27853839545213593, "grad_norm": 6.6812126239316125, "learning_rate": 8.465917389238757e-06, "loss": 17.6213, "step": 15238 }, { "epoch": 0.27855667464858247, "grad_norm": 5.946752238077004, "learning_rate": 8.465704029542173e-06, "loss": 17.321, "step": 15239 }, { "epoch": 0.27857495384502895, "grad_norm": 6.8249810466527405, "learning_rate": 8.465490657698596e-06, "loss": 17.6091, "step": 15240 }, { "epoch": 0.2785932330414755, "grad_norm": 6.257116990467253, "learning_rate": 8.465277273708777e-06, "loss": 17.5928, "step": 15241 }, { "epoch": 0.278611512237922, "grad_norm": 8.377971703319272, "learning_rate": 8.465063877573459e-06, "loss": 18.2086, "step": 15242 }, { "epoch": 0.27862979143436856, "grad_norm": 6.373933554991297, "learning_rate": 8.464850469293396e-06, "loss": 17.3858, "step": 15243 }, { "epoch": 0.2786480706308151, "grad_norm": 6.426640808081779, "learning_rate": 8.46463704886933e-06, "loss": 17.4808, "step": 15244 }, { "epoch": 0.2786663498272616, "grad_norm": 6.208908698906014, "learning_rate": 8.464423616302012e-06, "loss": 17.2587, "step": 15245 }, { "epoch": 0.2786846290237081, "grad_norm": 6.426478401626894, "learning_rate": 8.46421017159219e-06, "loss": 17.3913, "step": 15246 }, { "epoch": 0.27870290822015464, "grad_norm": 6.509789723463751, "learning_rate": 8.463996714740614e-06, "loss": 17.543, "step": 15247 }, { "epoch": 0.2787211874166012, "grad_norm": 6.309781055249573, "learning_rate": 8.463783245748028e-06, "loss": 17.9775, "step": 15248 }, { "epoch": 0.2787394666130477, "grad_norm": 7.551419698529556, "learning_rate": 8.463569764615183e-06, "loss": 18.1277, "step": 15249 }, { "epoch": 0.2787577458094942, "grad_norm": 5.969849642908827, "learning_rate": 8.463356271342826e-06, "loss": 17.3494, "step": 15250 }, { "epoch": 0.27877602500594073, "grad_norm": 6.701235421654121, "learning_rate": 8.463142765931706e-06, "loss": 17.9205, "step": 15251 }, { "epoch": 0.27879430420238727, "grad_norm": 5.386231186161452, "learning_rate": 8.462929248382572e-06, "loss": 17.265, "step": 15252 }, { "epoch": 0.2788125833988338, "grad_norm": 7.491369075230487, "learning_rate": 8.46271571869617e-06, "loss": 17.4947, "step": 15253 }, { "epoch": 0.27883086259528034, "grad_norm": 6.546424260274679, "learning_rate": 8.46250217687325e-06, "loss": 17.5241, "step": 15254 }, { "epoch": 0.2788491417917268, "grad_norm": 5.975339806240297, "learning_rate": 8.462288622914563e-06, "loss": 17.4023, "step": 15255 }, { "epoch": 0.27886742098817335, "grad_norm": 5.741204871009067, "learning_rate": 8.462075056820852e-06, "loss": 17.3395, "step": 15256 }, { "epoch": 0.2788857001846199, "grad_norm": 6.986842947076972, "learning_rate": 8.46186147859287e-06, "loss": 17.5549, "step": 15257 }, { "epoch": 0.2789039793810664, "grad_norm": 8.520068982359538, "learning_rate": 8.461647888231363e-06, "loss": 18.7468, "step": 15258 }, { "epoch": 0.2789222585775129, "grad_norm": 5.132084312559286, "learning_rate": 8.46143428573708e-06, "loss": 16.9836, "step": 15259 }, { "epoch": 0.27894053777395944, "grad_norm": 6.018714877199563, "learning_rate": 8.46122067111077e-06, "loss": 17.1421, "step": 15260 }, { "epoch": 0.278958816970406, "grad_norm": 8.834433967022733, "learning_rate": 8.461007044353184e-06, "loss": 18.7856, "step": 15261 }, { "epoch": 0.2789770961668525, "grad_norm": 6.720650682562521, "learning_rate": 8.460793405465066e-06, "loss": 17.8257, "step": 15262 }, { "epoch": 0.27899537536329905, "grad_norm": 5.181558983727919, "learning_rate": 8.46057975444717e-06, "loss": 16.9849, "step": 15263 }, { "epoch": 0.2790136545597455, "grad_norm": 6.330977568142363, "learning_rate": 8.460366091300241e-06, "loss": 17.465, "step": 15264 }, { "epoch": 0.27903193375619206, "grad_norm": 7.0797762558721935, "learning_rate": 8.460152416025029e-06, "loss": 17.7323, "step": 15265 }, { "epoch": 0.2790502129526386, "grad_norm": 6.604603805911028, "learning_rate": 8.459938728622284e-06, "loss": 17.4136, "step": 15266 }, { "epoch": 0.27906849214908513, "grad_norm": 5.953423256831014, "learning_rate": 8.459725029092754e-06, "loss": 17.2549, "step": 15267 }, { "epoch": 0.27908677134553167, "grad_norm": 5.211822564212015, "learning_rate": 8.459511317437185e-06, "loss": 17.1655, "step": 15268 }, { "epoch": 0.27910505054197815, "grad_norm": 7.650219297859471, "learning_rate": 8.459297593656333e-06, "loss": 18.0305, "step": 15269 }, { "epoch": 0.2791233297384247, "grad_norm": 5.4568084959639025, "learning_rate": 8.45908385775094e-06, "loss": 17.2651, "step": 15270 }, { "epoch": 0.2791416089348712, "grad_norm": 6.108865668184069, "learning_rate": 8.45887010972176e-06, "loss": 17.3542, "step": 15271 }, { "epoch": 0.27915988813131776, "grad_norm": 7.042935463620705, "learning_rate": 8.45865634956954e-06, "loss": 17.7592, "step": 15272 }, { "epoch": 0.2791781673277643, "grad_norm": 7.0895414363913165, "learning_rate": 8.45844257729503e-06, "loss": 17.6137, "step": 15273 }, { "epoch": 0.2791964465242108, "grad_norm": 5.97027588161131, "learning_rate": 8.45822879289898e-06, "loss": 17.2089, "step": 15274 }, { "epoch": 0.2792147257206573, "grad_norm": 6.328044998520751, "learning_rate": 8.458014996382136e-06, "loss": 17.2472, "step": 15275 }, { "epoch": 0.27923300491710384, "grad_norm": 8.500126130847903, "learning_rate": 8.457801187745249e-06, "loss": 18.5316, "step": 15276 }, { "epoch": 0.2792512841135504, "grad_norm": 5.9999936008840296, "learning_rate": 8.45758736698907e-06, "loss": 17.2843, "step": 15277 }, { "epoch": 0.2792695633099969, "grad_norm": 5.768989858580754, "learning_rate": 8.457373534114347e-06, "loss": 17.23, "step": 15278 }, { "epoch": 0.2792878425064434, "grad_norm": 5.676785352440452, "learning_rate": 8.457159689121832e-06, "loss": 17.2136, "step": 15279 }, { "epoch": 0.27930612170288993, "grad_norm": 5.623070110265875, "learning_rate": 8.45694583201227e-06, "loss": 17.233, "step": 15280 }, { "epoch": 0.27932440089933647, "grad_norm": 5.297347293676569, "learning_rate": 8.456731962786413e-06, "loss": 16.9945, "step": 15281 }, { "epoch": 0.279342680095783, "grad_norm": 5.718687791115308, "learning_rate": 8.456518081445012e-06, "loss": 17.4141, "step": 15282 }, { "epoch": 0.27936095929222954, "grad_norm": 7.002780859931119, "learning_rate": 8.456304187988813e-06, "loss": 17.5433, "step": 15283 }, { "epoch": 0.279379238488676, "grad_norm": 4.166955275633923, "learning_rate": 8.456090282418567e-06, "loss": 16.6323, "step": 15284 }, { "epoch": 0.27939751768512255, "grad_norm": 6.373873436434632, "learning_rate": 8.455876364735029e-06, "loss": 17.451, "step": 15285 }, { "epoch": 0.2794157968815691, "grad_norm": 6.079561982362909, "learning_rate": 8.45566243493894e-06, "loss": 17.389, "step": 15286 }, { "epoch": 0.2794340760780156, "grad_norm": 6.948018820344659, "learning_rate": 8.455448493031055e-06, "loss": 17.5749, "step": 15287 }, { "epoch": 0.27945235527446216, "grad_norm": 5.979548652802669, "learning_rate": 8.455234539012124e-06, "loss": 17.3088, "step": 15288 }, { "epoch": 0.27947063447090864, "grad_norm": 6.235225684157927, "learning_rate": 8.455020572882895e-06, "loss": 17.519, "step": 15289 }, { "epoch": 0.2794889136673552, "grad_norm": 5.57882309167049, "learning_rate": 8.454806594644118e-06, "loss": 16.9601, "step": 15290 }, { "epoch": 0.2795071928638017, "grad_norm": 8.709692939224485, "learning_rate": 8.454592604296544e-06, "loss": 18.4858, "step": 15291 }, { "epoch": 0.27952547206024825, "grad_norm": 5.684572081931391, "learning_rate": 8.454378601840924e-06, "loss": 17.139, "step": 15292 }, { "epoch": 0.27954375125669473, "grad_norm": 5.942677312137361, "learning_rate": 8.454164587278005e-06, "loss": 17.2136, "step": 15293 }, { "epoch": 0.27956203045314126, "grad_norm": 5.81287032481252, "learning_rate": 8.45395056060854e-06, "loss": 17.1906, "step": 15294 }, { "epoch": 0.2795803096495878, "grad_norm": 7.4421357602697, "learning_rate": 8.453736521833279e-06, "loss": 17.7026, "step": 15295 }, { "epoch": 0.27959858884603433, "grad_norm": 6.87095794038651, "learning_rate": 8.453522470952968e-06, "loss": 17.4585, "step": 15296 }, { "epoch": 0.27961686804248087, "grad_norm": 8.767742792641915, "learning_rate": 8.453308407968363e-06, "loss": 17.8881, "step": 15297 }, { "epoch": 0.27963514723892735, "grad_norm": 6.283689084853071, "learning_rate": 8.45309433288021e-06, "loss": 17.3557, "step": 15298 }, { "epoch": 0.2796534264353739, "grad_norm": 6.797083943751389, "learning_rate": 8.452880245689264e-06, "loss": 17.7693, "step": 15299 }, { "epoch": 0.2796717056318204, "grad_norm": 6.775824178656483, "learning_rate": 8.452666146396268e-06, "loss": 17.8483, "step": 15300 }, { "epoch": 0.27968998482826696, "grad_norm": 7.137447876229566, "learning_rate": 8.452452035001981e-06, "loss": 18.0176, "step": 15301 }, { "epoch": 0.2797082640247135, "grad_norm": 6.7099557814928055, "learning_rate": 8.452237911507147e-06, "loss": 17.6612, "step": 15302 }, { "epoch": 0.27972654322116, "grad_norm": 6.140429114030082, "learning_rate": 8.45202377591252e-06, "loss": 17.333, "step": 15303 }, { "epoch": 0.2797448224176065, "grad_norm": 5.7035182472633705, "learning_rate": 8.451809628218847e-06, "loss": 17.1685, "step": 15304 }, { "epoch": 0.27976310161405304, "grad_norm": 6.961475674515516, "learning_rate": 8.451595468426882e-06, "loss": 17.4019, "step": 15305 }, { "epoch": 0.2797813808104996, "grad_norm": 6.981320637514193, "learning_rate": 8.451381296537375e-06, "loss": 17.6227, "step": 15306 }, { "epoch": 0.2797996600069461, "grad_norm": 7.217081963757125, "learning_rate": 8.451167112551076e-06, "loss": 17.7034, "step": 15307 }, { "epoch": 0.2798179392033926, "grad_norm": 6.63198113376361, "learning_rate": 8.450952916468734e-06, "loss": 17.5213, "step": 15308 }, { "epoch": 0.27983621839983913, "grad_norm": 6.327382881871625, "learning_rate": 8.450738708291105e-06, "loss": 17.511, "step": 15309 }, { "epoch": 0.27985449759628567, "grad_norm": 6.813876586995132, "learning_rate": 8.450524488018933e-06, "loss": 17.7001, "step": 15310 }, { "epoch": 0.2798727767927322, "grad_norm": 6.38672657410487, "learning_rate": 8.450310255652972e-06, "loss": 17.3549, "step": 15311 }, { "epoch": 0.27989105598917874, "grad_norm": 6.390512941314512, "learning_rate": 8.450096011193975e-06, "loss": 17.6012, "step": 15312 }, { "epoch": 0.2799093351856252, "grad_norm": 5.632404099979596, "learning_rate": 8.44988175464269e-06, "loss": 16.9748, "step": 15313 }, { "epoch": 0.27992761438207175, "grad_norm": 7.337163444232834, "learning_rate": 8.449667485999868e-06, "loss": 17.5949, "step": 15314 }, { "epoch": 0.2799458935785183, "grad_norm": 6.18730547586552, "learning_rate": 8.449453205266262e-06, "loss": 17.6807, "step": 15315 }, { "epoch": 0.2799641727749648, "grad_norm": 7.3207355037896225, "learning_rate": 8.449238912442621e-06, "loss": 17.6556, "step": 15316 }, { "epoch": 0.27998245197141136, "grad_norm": 5.9872163701769265, "learning_rate": 8.449024607529696e-06, "loss": 17.3126, "step": 15317 }, { "epoch": 0.28000073116785784, "grad_norm": 5.858221998433997, "learning_rate": 8.448810290528242e-06, "loss": 17.4937, "step": 15318 }, { "epoch": 0.2800190103643044, "grad_norm": 6.761771220793409, "learning_rate": 8.448595961439006e-06, "loss": 17.6793, "step": 15319 }, { "epoch": 0.2800372895607509, "grad_norm": 5.870076405893251, "learning_rate": 8.44838162026274e-06, "loss": 17.0499, "step": 15320 }, { "epoch": 0.28005556875719745, "grad_norm": 7.916559103623928, "learning_rate": 8.448167267000195e-06, "loss": 18.4175, "step": 15321 }, { "epoch": 0.280073847953644, "grad_norm": 6.791354085807862, "learning_rate": 8.447952901652123e-06, "loss": 17.4933, "step": 15322 }, { "epoch": 0.28009212715009046, "grad_norm": 7.122444690494139, "learning_rate": 8.447738524219277e-06, "loss": 18.0934, "step": 15323 }, { "epoch": 0.280110406346537, "grad_norm": 5.3402499262137155, "learning_rate": 8.447524134702405e-06, "loss": 17.1074, "step": 15324 }, { "epoch": 0.28012868554298354, "grad_norm": 6.198544112878502, "learning_rate": 8.44730973310226e-06, "loss": 17.4524, "step": 15325 }, { "epoch": 0.28014696473943007, "grad_norm": 7.0907625758521675, "learning_rate": 8.447095319419594e-06, "loss": 17.8427, "step": 15326 }, { "epoch": 0.28016524393587655, "grad_norm": 5.831380913616366, "learning_rate": 8.446880893655158e-06, "loss": 17.3841, "step": 15327 }, { "epoch": 0.2801835231323231, "grad_norm": 7.4686457544120275, "learning_rate": 8.446666455809705e-06, "loss": 17.7447, "step": 15328 }, { "epoch": 0.2802018023287696, "grad_norm": 6.541385684027912, "learning_rate": 8.446452005883982e-06, "loss": 17.8826, "step": 15329 }, { "epoch": 0.28022008152521616, "grad_norm": 6.4541582637804, "learning_rate": 8.446237543878748e-06, "loss": 17.5701, "step": 15330 }, { "epoch": 0.2802383607216627, "grad_norm": 6.377299000804159, "learning_rate": 8.446023069794747e-06, "loss": 17.5911, "step": 15331 }, { "epoch": 0.2802566399181092, "grad_norm": 6.58921984015208, "learning_rate": 8.445808583632734e-06, "loss": 17.7445, "step": 15332 }, { "epoch": 0.2802749191145557, "grad_norm": 7.439213185690679, "learning_rate": 8.445594085393463e-06, "loss": 17.6031, "step": 15333 }, { "epoch": 0.28029319831100225, "grad_norm": 6.227352568603799, "learning_rate": 8.445379575077683e-06, "loss": 17.3409, "step": 15334 }, { "epoch": 0.2803114775074488, "grad_norm": 8.549842256884657, "learning_rate": 8.445165052686147e-06, "loss": 18.3006, "step": 15335 }, { "epoch": 0.2803297567038953, "grad_norm": 7.032400686381592, "learning_rate": 8.444950518219605e-06, "loss": 17.9843, "step": 15336 }, { "epoch": 0.2803480359003418, "grad_norm": 6.67063177388569, "learning_rate": 8.444735971678812e-06, "loss": 17.4586, "step": 15337 }, { "epoch": 0.28036631509678833, "grad_norm": 7.207074160898403, "learning_rate": 8.444521413064517e-06, "loss": 17.9207, "step": 15338 }, { "epoch": 0.28038459429323487, "grad_norm": 5.3906090184452085, "learning_rate": 8.444306842377474e-06, "loss": 16.894, "step": 15339 }, { "epoch": 0.2804028734896814, "grad_norm": 8.653548263979046, "learning_rate": 8.444092259618435e-06, "loss": 18.2815, "step": 15340 }, { "epoch": 0.28042115268612794, "grad_norm": 6.18004532586676, "learning_rate": 8.443877664788151e-06, "loss": 17.6296, "step": 15341 }, { "epoch": 0.2804394318825744, "grad_norm": 8.501764140211932, "learning_rate": 8.443663057887374e-06, "loss": 17.7249, "step": 15342 }, { "epoch": 0.28045771107902095, "grad_norm": 5.7218125891780325, "learning_rate": 8.44344843891686e-06, "loss": 16.9147, "step": 15343 }, { "epoch": 0.2804759902754675, "grad_norm": 5.913127454267365, "learning_rate": 8.443233807877353e-06, "loss": 17.2629, "step": 15344 }, { "epoch": 0.280494269471914, "grad_norm": 6.820627725264064, "learning_rate": 8.443019164769613e-06, "loss": 17.5698, "step": 15345 }, { "epoch": 0.28051254866836056, "grad_norm": 6.022789544760202, "learning_rate": 8.442804509594388e-06, "loss": 17.3107, "step": 15346 }, { "epoch": 0.28053082786480704, "grad_norm": 8.94514036680201, "learning_rate": 8.442589842352436e-06, "loss": 18.5076, "step": 15347 }, { "epoch": 0.2805491070612536, "grad_norm": 6.1553618854905485, "learning_rate": 8.442375163044502e-06, "loss": 17.2621, "step": 15348 }, { "epoch": 0.2805673862577001, "grad_norm": 5.815419277840977, "learning_rate": 8.442160471671341e-06, "loss": 17.3209, "step": 15349 }, { "epoch": 0.28058566545414665, "grad_norm": 7.022413121021528, "learning_rate": 8.441945768233709e-06, "loss": 17.7672, "step": 15350 }, { "epoch": 0.2806039446505932, "grad_norm": 6.314723020154745, "learning_rate": 8.441731052732354e-06, "loss": 17.2963, "step": 15351 }, { "epoch": 0.28062222384703966, "grad_norm": 6.0191116234295645, "learning_rate": 8.44151632516803e-06, "loss": 17.4075, "step": 15352 }, { "epoch": 0.2806405030434862, "grad_norm": 5.652747273261852, "learning_rate": 8.44130158554149e-06, "loss": 17.2558, "step": 15353 }, { "epoch": 0.28065878223993274, "grad_norm": 7.9446397636589765, "learning_rate": 8.441086833853489e-06, "loss": 17.9289, "step": 15354 }, { "epoch": 0.28067706143637927, "grad_norm": 7.053167863551996, "learning_rate": 8.440872070104776e-06, "loss": 17.8498, "step": 15355 }, { "epoch": 0.2806953406328258, "grad_norm": 7.827079397809964, "learning_rate": 8.440657294296103e-06, "loss": 17.8238, "step": 15356 }, { "epoch": 0.2807136198292723, "grad_norm": 6.41107055789248, "learning_rate": 8.440442506428225e-06, "loss": 17.5368, "step": 15357 }, { "epoch": 0.2807318990257188, "grad_norm": 6.278500525959484, "learning_rate": 8.440227706501897e-06, "loss": 17.7145, "step": 15358 }, { "epoch": 0.28075017822216536, "grad_norm": 7.4986718163922745, "learning_rate": 8.440012894517868e-06, "loss": 17.7521, "step": 15359 }, { "epoch": 0.2807684574186119, "grad_norm": 6.184931507062844, "learning_rate": 8.439798070476891e-06, "loss": 17.5835, "step": 15360 }, { "epoch": 0.2807867366150584, "grad_norm": 6.681625724115947, "learning_rate": 8.439583234379722e-06, "loss": 17.6689, "step": 15361 }, { "epoch": 0.2808050158115049, "grad_norm": 5.35760730682152, "learning_rate": 8.439368386227114e-06, "loss": 16.8947, "step": 15362 }, { "epoch": 0.28082329500795145, "grad_norm": 6.548154694259325, "learning_rate": 8.439153526019814e-06, "loss": 17.3629, "step": 15363 }, { "epoch": 0.280841574204398, "grad_norm": 5.17716918435155, "learning_rate": 8.438938653758583e-06, "loss": 16.9253, "step": 15364 }, { "epoch": 0.2808598534008445, "grad_norm": 6.725656835076324, "learning_rate": 8.438723769444167e-06, "loss": 17.6387, "step": 15365 }, { "epoch": 0.280878132597291, "grad_norm": 7.6716436303256605, "learning_rate": 8.438508873077326e-06, "loss": 17.9415, "step": 15366 }, { "epoch": 0.28089641179373753, "grad_norm": 6.062060340168047, "learning_rate": 8.438293964658808e-06, "loss": 17.4276, "step": 15367 }, { "epoch": 0.28091469099018407, "grad_norm": 6.5146708472749, "learning_rate": 8.438079044189369e-06, "loss": 17.4647, "step": 15368 }, { "epoch": 0.2809329701866306, "grad_norm": 6.356540002782304, "learning_rate": 8.43786411166976e-06, "loss": 17.5814, "step": 15369 }, { "epoch": 0.28095124938307714, "grad_norm": 7.0890911572255995, "learning_rate": 8.437649167100736e-06, "loss": 18.2637, "step": 15370 }, { "epoch": 0.2809695285795236, "grad_norm": 6.842492378927014, "learning_rate": 8.43743421048305e-06, "loss": 17.8266, "step": 15371 }, { "epoch": 0.28098780777597016, "grad_norm": 7.198672708326323, "learning_rate": 8.437219241817456e-06, "loss": 18.1389, "step": 15372 }, { "epoch": 0.2810060869724167, "grad_norm": 7.038255403981865, "learning_rate": 8.437004261104706e-06, "loss": 17.7662, "step": 15373 }, { "epoch": 0.2810243661688632, "grad_norm": 7.696241643414113, "learning_rate": 8.436789268345555e-06, "loss": 17.8438, "step": 15374 }, { "epoch": 0.28104264536530976, "grad_norm": 6.362290596275576, "learning_rate": 8.436574263540756e-06, "loss": 17.4141, "step": 15375 }, { "epoch": 0.28106092456175624, "grad_norm": 6.772775790329577, "learning_rate": 8.436359246691062e-06, "loss": 17.8197, "step": 15376 }, { "epoch": 0.2810792037582028, "grad_norm": 6.125749331631318, "learning_rate": 8.436144217797227e-06, "loss": 17.3761, "step": 15377 }, { "epoch": 0.2810974829546493, "grad_norm": 7.208241423222614, "learning_rate": 8.435929176860004e-06, "loss": 18.3448, "step": 15378 }, { "epoch": 0.28111576215109585, "grad_norm": 7.937853810954681, "learning_rate": 8.435714123880149e-06, "loss": 17.792, "step": 15379 }, { "epoch": 0.2811340413475424, "grad_norm": 6.1100040916416605, "learning_rate": 8.435499058858413e-06, "loss": 17.5025, "step": 15380 }, { "epoch": 0.28115232054398887, "grad_norm": 8.344809145928709, "learning_rate": 8.435283981795551e-06, "loss": 18.2036, "step": 15381 }, { "epoch": 0.2811705997404354, "grad_norm": 7.70988367137395, "learning_rate": 8.435068892692317e-06, "loss": 18.2193, "step": 15382 }, { "epoch": 0.28118887893688194, "grad_norm": 6.554828258743084, "learning_rate": 8.434853791549464e-06, "loss": 17.4626, "step": 15383 }, { "epoch": 0.28120715813332847, "grad_norm": 5.832048397802159, "learning_rate": 8.434638678367747e-06, "loss": 17.0833, "step": 15384 }, { "epoch": 0.281225437329775, "grad_norm": 6.280671045705303, "learning_rate": 8.43442355314792e-06, "loss": 17.5083, "step": 15385 }, { "epoch": 0.2812437165262215, "grad_norm": 7.426942848885526, "learning_rate": 8.434208415890738e-06, "loss": 18.0593, "step": 15386 }, { "epoch": 0.281261995722668, "grad_norm": 6.082815319521803, "learning_rate": 8.433993266596949e-06, "loss": 17.0419, "step": 15387 }, { "epoch": 0.28128027491911456, "grad_norm": 15.46209403251262, "learning_rate": 8.433778105267314e-06, "loss": 18.4867, "step": 15388 }, { "epoch": 0.2812985541155611, "grad_norm": 7.5902355214347255, "learning_rate": 8.433562931902585e-06, "loss": 17.9188, "step": 15389 }, { "epoch": 0.28131683331200763, "grad_norm": 6.61907768246289, "learning_rate": 8.433347746503516e-06, "loss": 17.4187, "step": 15390 }, { "epoch": 0.2813351125084541, "grad_norm": 5.5708106598423965, "learning_rate": 8.433132549070861e-06, "loss": 17.286, "step": 15391 }, { "epoch": 0.28135339170490065, "grad_norm": 7.01446519671279, "learning_rate": 8.432917339605375e-06, "loss": 17.9308, "step": 15392 }, { "epoch": 0.2813716709013472, "grad_norm": 7.418061223298455, "learning_rate": 8.43270211810781e-06, "loss": 17.9232, "step": 15393 }, { "epoch": 0.2813899500977937, "grad_norm": 6.169425131779138, "learning_rate": 8.432486884578922e-06, "loss": 17.5341, "step": 15394 }, { "epoch": 0.2814082292942402, "grad_norm": 7.663989986517547, "learning_rate": 8.432271639019466e-06, "loss": 17.9359, "step": 15395 }, { "epoch": 0.28142650849068673, "grad_norm": 6.417772884984047, "learning_rate": 8.432056381430196e-06, "loss": 17.4621, "step": 15396 }, { "epoch": 0.28144478768713327, "grad_norm": 7.620574417470514, "learning_rate": 8.431841111811864e-06, "loss": 17.4967, "step": 15397 }, { "epoch": 0.2814630668835798, "grad_norm": 5.205424429005494, "learning_rate": 8.43162583016523e-06, "loss": 16.956, "step": 15398 }, { "epoch": 0.28148134608002634, "grad_norm": 9.94632121770399, "learning_rate": 8.431410536491044e-06, "loss": 18.3539, "step": 15399 }, { "epoch": 0.2814996252764728, "grad_norm": 6.594947742304105, "learning_rate": 8.431195230790062e-06, "loss": 17.7453, "step": 15400 }, { "epoch": 0.28151790447291936, "grad_norm": 5.987288957161305, "learning_rate": 8.430979913063038e-06, "loss": 17.1959, "step": 15401 }, { "epoch": 0.2815361836693659, "grad_norm": 7.359071074054522, "learning_rate": 8.430764583310727e-06, "loss": 17.8892, "step": 15402 }, { "epoch": 0.2815544628658124, "grad_norm": 6.409411253387844, "learning_rate": 8.430549241533885e-06, "loss": 17.3761, "step": 15403 }, { "epoch": 0.28157274206225896, "grad_norm": 6.850570345828789, "learning_rate": 8.430333887733265e-06, "loss": 17.7419, "step": 15404 }, { "epoch": 0.28159102125870544, "grad_norm": 7.4295910040714634, "learning_rate": 8.430118521909623e-06, "loss": 18.2057, "step": 15405 }, { "epoch": 0.281609300455152, "grad_norm": 7.416582043114402, "learning_rate": 8.429903144063711e-06, "loss": 17.7305, "step": 15406 }, { "epoch": 0.2816275796515985, "grad_norm": 8.537220994678437, "learning_rate": 8.429687754196287e-06, "loss": 18.4273, "step": 15407 }, { "epoch": 0.28164585884804505, "grad_norm": 6.840629534012434, "learning_rate": 8.429472352308106e-06, "loss": 17.5493, "step": 15408 }, { "epoch": 0.2816641380444916, "grad_norm": 7.47801949131773, "learning_rate": 8.429256938399922e-06, "loss": 17.9558, "step": 15409 }, { "epoch": 0.28168241724093807, "grad_norm": 6.860487889768455, "learning_rate": 8.42904151247249e-06, "loss": 17.5536, "step": 15410 }, { "epoch": 0.2817006964373846, "grad_norm": 7.406907383950079, "learning_rate": 8.428826074526564e-06, "loss": 18.189, "step": 15411 }, { "epoch": 0.28171897563383114, "grad_norm": 6.839722894076168, "learning_rate": 8.428610624562901e-06, "loss": 17.9266, "step": 15412 }, { "epoch": 0.2817372548302777, "grad_norm": 6.394184142195576, "learning_rate": 8.428395162582255e-06, "loss": 17.7828, "step": 15413 }, { "epoch": 0.2817555340267242, "grad_norm": 6.365026512467458, "learning_rate": 8.428179688585381e-06, "loss": 17.3171, "step": 15414 }, { "epoch": 0.2817738132231707, "grad_norm": 6.239152923057427, "learning_rate": 8.427964202573035e-06, "loss": 17.1903, "step": 15415 }, { "epoch": 0.2817920924196172, "grad_norm": 6.800554911763028, "learning_rate": 8.427748704545973e-06, "loss": 17.5401, "step": 15416 }, { "epoch": 0.28181037161606376, "grad_norm": 6.606856572064669, "learning_rate": 8.427533194504947e-06, "loss": 17.622, "step": 15417 }, { "epoch": 0.2818286508125103, "grad_norm": 7.890341607230291, "learning_rate": 8.427317672450717e-06, "loss": 18.1035, "step": 15418 }, { "epoch": 0.28184693000895683, "grad_norm": 4.968144029529073, "learning_rate": 8.427102138384035e-06, "loss": 16.7538, "step": 15419 }, { "epoch": 0.2818652092054033, "grad_norm": 6.890989192427271, "learning_rate": 8.426886592305656e-06, "loss": 17.4269, "step": 15420 }, { "epoch": 0.28188348840184985, "grad_norm": 6.498491295188947, "learning_rate": 8.426671034216339e-06, "loss": 17.6894, "step": 15421 }, { "epoch": 0.2819017675982964, "grad_norm": 7.178414595108676, "learning_rate": 8.426455464116836e-06, "loss": 17.9236, "step": 15422 }, { "epoch": 0.2819200467947429, "grad_norm": 8.1254247136996, "learning_rate": 8.426239882007906e-06, "loss": 18.5536, "step": 15423 }, { "epoch": 0.28193832599118945, "grad_norm": 7.220048205570177, "learning_rate": 8.4260242878903e-06, "loss": 17.6249, "step": 15424 }, { "epoch": 0.28195660518763593, "grad_norm": 7.2183637462018515, "learning_rate": 8.425808681764776e-06, "loss": 17.3812, "step": 15425 }, { "epoch": 0.28197488438408247, "grad_norm": 6.614329901433477, "learning_rate": 8.425593063632092e-06, "loss": 17.5936, "step": 15426 }, { "epoch": 0.281993163580529, "grad_norm": 7.934813215507083, "learning_rate": 8.425377433493e-06, "loss": 17.8976, "step": 15427 }, { "epoch": 0.28201144277697554, "grad_norm": 7.4831568881450785, "learning_rate": 8.425161791348258e-06, "loss": 17.8004, "step": 15428 }, { "epoch": 0.282029721973422, "grad_norm": 6.702634420573207, "learning_rate": 8.424946137198621e-06, "loss": 17.7498, "step": 15429 }, { "epoch": 0.28204800116986856, "grad_norm": 6.774928563263164, "learning_rate": 8.424730471044845e-06, "loss": 17.8457, "step": 15430 }, { "epoch": 0.2820662803663151, "grad_norm": 6.819093186442741, "learning_rate": 8.424514792887686e-06, "loss": 17.7781, "step": 15431 }, { "epoch": 0.2820845595627616, "grad_norm": 5.697913517231326, "learning_rate": 8.424299102727899e-06, "loss": 17.3248, "step": 15432 }, { "epoch": 0.28210283875920816, "grad_norm": 7.531715739191498, "learning_rate": 8.424083400566243e-06, "loss": 17.9968, "step": 15433 }, { "epoch": 0.28212111795565464, "grad_norm": 6.673835150266609, "learning_rate": 8.42386768640347e-06, "loss": 17.8088, "step": 15434 }, { "epoch": 0.2821393971521012, "grad_norm": 5.569046211206729, "learning_rate": 8.423651960240339e-06, "loss": 17.2213, "step": 15435 }, { "epoch": 0.2821576763485477, "grad_norm": 6.4540470447502285, "learning_rate": 8.423436222077603e-06, "loss": 17.4253, "step": 15436 }, { "epoch": 0.28217595554499425, "grad_norm": 7.489503211144116, "learning_rate": 8.423220471916022e-06, "loss": 17.9254, "step": 15437 }, { "epoch": 0.2821942347414408, "grad_norm": 7.188916281016509, "learning_rate": 8.423004709756348e-06, "loss": 17.5623, "step": 15438 }, { "epoch": 0.28221251393788727, "grad_norm": 7.454564411197409, "learning_rate": 8.422788935599341e-06, "loss": 17.6314, "step": 15439 }, { "epoch": 0.2822307931343338, "grad_norm": 5.741675049568417, "learning_rate": 8.422573149445756e-06, "loss": 17.2068, "step": 15440 }, { "epoch": 0.28224907233078034, "grad_norm": 6.689218308835845, "learning_rate": 8.422357351296349e-06, "loss": 17.7591, "step": 15441 }, { "epoch": 0.2822673515272269, "grad_norm": 6.211700649871322, "learning_rate": 8.422141541151878e-06, "loss": 17.6763, "step": 15442 }, { "epoch": 0.2822856307236734, "grad_norm": 6.638265554748326, "learning_rate": 8.421925719013096e-06, "loss": 17.5908, "step": 15443 }, { "epoch": 0.2823039099201199, "grad_norm": 6.925772444016115, "learning_rate": 8.421709884880762e-06, "loss": 17.3167, "step": 15444 }, { "epoch": 0.2823221891165664, "grad_norm": 7.298203244186044, "learning_rate": 8.42149403875563e-06, "loss": 17.8008, "step": 15445 }, { "epoch": 0.28234046831301296, "grad_norm": 6.7739748624787195, "learning_rate": 8.42127818063846e-06, "loss": 17.886, "step": 15446 }, { "epoch": 0.2823587475094595, "grad_norm": 6.675542708786623, "learning_rate": 8.421062310530008e-06, "loss": 17.7136, "step": 15447 }, { "epoch": 0.28237702670590603, "grad_norm": 6.530552262727731, "learning_rate": 8.420846428431026e-06, "loss": 17.6029, "step": 15448 }, { "epoch": 0.2823953059023525, "grad_norm": 7.964362371597339, "learning_rate": 8.420630534342277e-06, "loss": 18.4993, "step": 15449 }, { "epoch": 0.28241358509879905, "grad_norm": 6.968830900129372, "learning_rate": 8.420414628264515e-06, "loss": 17.7445, "step": 15450 }, { "epoch": 0.2824318642952456, "grad_norm": 8.277947052614286, "learning_rate": 8.420198710198495e-06, "loss": 17.8483, "step": 15451 }, { "epoch": 0.2824501434916921, "grad_norm": 5.802198301028127, "learning_rate": 8.419982780144974e-06, "loss": 17.0768, "step": 15452 }, { "epoch": 0.28246842268813865, "grad_norm": 6.816259597711021, "learning_rate": 8.419766838104712e-06, "loss": 17.5801, "step": 15453 }, { "epoch": 0.28248670188458513, "grad_norm": 6.2967691082072035, "learning_rate": 8.419550884078465e-06, "loss": 17.6557, "step": 15454 }, { "epoch": 0.28250498108103167, "grad_norm": 6.258892931758237, "learning_rate": 8.419334918066987e-06, "loss": 17.4094, "step": 15455 }, { "epoch": 0.2825232602774782, "grad_norm": 6.862327542047562, "learning_rate": 8.419118940071039e-06, "loss": 17.9269, "step": 15456 }, { "epoch": 0.28254153947392474, "grad_norm": 6.718567381681014, "learning_rate": 8.418902950091374e-06, "loss": 17.5564, "step": 15457 }, { "epoch": 0.2825598186703713, "grad_norm": 6.717837238026571, "learning_rate": 8.418686948128752e-06, "loss": 17.4344, "step": 15458 }, { "epoch": 0.28257809786681776, "grad_norm": 11.766996616844157, "learning_rate": 8.418470934183927e-06, "loss": 18.5154, "step": 15459 }, { "epoch": 0.2825963770632643, "grad_norm": 5.102631544374815, "learning_rate": 8.418254908257659e-06, "loss": 16.8542, "step": 15460 }, { "epoch": 0.28261465625971083, "grad_norm": 6.080400088498884, "learning_rate": 8.418038870350706e-06, "loss": 17.3789, "step": 15461 }, { "epoch": 0.28263293545615736, "grad_norm": 6.188239214951652, "learning_rate": 8.417822820463822e-06, "loss": 17.4083, "step": 15462 }, { "epoch": 0.28265121465260384, "grad_norm": 5.514424312512443, "learning_rate": 8.417606758597765e-06, "loss": 17.0802, "step": 15463 }, { "epoch": 0.2826694938490504, "grad_norm": 5.558196896729641, "learning_rate": 8.417390684753292e-06, "loss": 17.2066, "step": 15464 }, { "epoch": 0.2826877730454969, "grad_norm": 5.907595343879084, "learning_rate": 8.417174598931163e-06, "loss": 17.2644, "step": 15465 }, { "epoch": 0.28270605224194345, "grad_norm": 6.63700062319701, "learning_rate": 8.416958501132133e-06, "loss": 17.699, "step": 15466 }, { "epoch": 0.28272433143839, "grad_norm": 5.6432284918911755, "learning_rate": 8.41674239135696e-06, "loss": 17.0312, "step": 15467 }, { "epoch": 0.28274261063483647, "grad_norm": 6.289393242218711, "learning_rate": 8.416526269606402e-06, "loss": 17.5631, "step": 15468 }, { "epoch": 0.282760889831283, "grad_norm": 7.5551513783786355, "learning_rate": 8.416310135881214e-06, "loss": 17.7653, "step": 15469 }, { "epoch": 0.28277916902772954, "grad_norm": 5.956620052584515, "learning_rate": 8.416093990182157e-06, "loss": 17.3785, "step": 15470 }, { "epoch": 0.2827974482241761, "grad_norm": 6.86001654615364, "learning_rate": 8.415877832509987e-06, "loss": 17.6636, "step": 15471 }, { "epoch": 0.2828157274206226, "grad_norm": 6.382719215755634, "learning_rate": 8.415661662865462e-06, "loss": 17.7263, "step": 15472 }, { "epoch": 0.2828340066170691, "grad_norm": 6.1246574539133585, "learning_rate": 8.415445481249339e-06, "loss": 17.2325, "step": 15473 }, { "epoch": 0.2828522858135156, "grad_norm": 7.390938265665142, "learning_rate": 8.415229287662375e-06, "loss": 17.7339, "step": 15474 }, { "epoch": 0.28287056500996216, "grad_norm": 5.215737196107322, "learning_rate": 8.415013082105328e-06, "loss": 17.035, "step": 15475 }, { "epoch": 0.2828888442064087, "grad_norm": 7.755466898336756, "learning_rate": 8.414796864578957e-06, "loss": 17.7883, "step": 15476 }, { "epoch": 0.28290712340285523, "grad_norm": 6.8293170026329255, "learning_rate": 8.41458063508402e-06, "loss": 17.6541, "step": 15477 }, { "epoch": 0.2829254025993017, "grad_norm": 6.517733841349658, "learning_rate": 8.414364393621274e-06, "loss": 17.5341, "step": 15478 }, { "epoch": 0.28294368179574825, "grad_norm": 5.862465874817472, "learning_rate": 8.414148140191479e-06, "loss": 17.1817, "step": 15479 }, { "epoch": 0.2829619609921948, "grad_norm": 7.381018863754203, "learning_rate": 8.41393187479539e-06, "loss": 17.7526, "step": 15480 }, { "epoch": 0.2829802401886413, "grad_norm": 6.833948647150858, "learning_rate": 8.413715597433764e-06, "loss": 17.6928, "step": 15481 }, { "epoch": 0.28299851938508785, "grad_norm": 6.521558147809372, "learning_rate": 8.413499308107363e-06, "loss": 17.5961, "step": 15482 }, { "epoch": 0.28301679858153433, "grad_norm": 5.946615365920805, "learning_rate": 8.413283006816943e-06, "loss": 17.3043, "step": 15483 }, { "epoch": 0.28303507777798087, "grad_norm": 6.485929179868215, "learning_rate": 8.413066693563262e-06, "loss": 17.5305, "step": 15484 }, { "epoch": 0.2830533569744274, "grad_norm": 5.945363387251339, "learning_rate": 8.412850368347077e-06, "loss": 17.0616, "step": 15485 }, { "epoch": 0.28307163617087394, "grad_norm": 6.54235154309566, "learning_rate": 8.412634031169148e-06, "loss": 17.3017, "step": 15486 }, { "epoch": 0.2830899153673205, "grad_norm": 5.686563129486015, "learning_rate": 8.412417682030234e-06, "loss": 17.4352, "step": 15487 }, { "epoch": 0.28310819456376696, "grad_norm": 6.595737961887192, "learning_rate": 8.412201320931092e-06, "loss": 17.5812, "step": 15488 }, { "epoch": 0.2831264737602135, "grad_norm": 6.398344498781888, "learning_rate": 8.41198494787248e-06, "loss": 17.2987, "step": 15489 }, { "epoch": 0.28314475295666003, "grad_norm": 6.557725214850864, "learning_rate": 8.411768562855157e-06, "loss": 17.7187, "step": 15490 }, { "epoch": 0.28316303215310656, "grad_norm": 6.50233786074878, "learning_rate": 8.411552165879881e-06, "loss": 17.3658, "step": 15491 }, { "epoch": 0.2831813113495531, "grad_norm": 6.1620822188746125, "learning_rate": 8.411335756947411e-06, "loss": 17.5341, "step": 15492 }, { "epoch": 0.2831995905459996, "grad_norm": 5.969493183820305, "learning_rate": 8.411119336058506e-06, "loss": 17.4539, "step": 15493 }, { "epoch": 0.2832178697424461, "grad_norm": 5.99854252879164, "learning_rate": 8.410902903213924e-06, "loss": 17.3071, "step": 15494 }, { "epoch": 0.28323614893889265, "grad_norm": 5.433585164652213, "learning_rate": 8.410686458414421e-06, "loss": 17.0368, "step": 15495 }, { "epoch": 0.2832544281353392, "grad_norm": 7.437621076301175, "learning_rate": 8.41047000166076e-06, "loss": 17.613, "step": 15496 }, { "epoch": 0.28327270733178567, "grad_norm": 6.106388244878, "learning_rate": 8.410253532953696e-06, "loss": 17.1693, "step": 15497 }, { "epoch": 0.2832909865282322, "grad_norm": 6.386802219835082, "learning_rate": 8.410037052293992e-06, "loss": 17.2761, "step": 15498 }, { "epoch": 0.28330926572467874, "grad_norm": 6.369372505903644, "learning_rate": 8.409820559682402e-06, "loss": 17.7474, "step": 15499 }, { "epoch": 0.2833275449211253, "grad_norm": 6.271390582198113, "learning_rate": 8.409604055119687e-06, "loss": 17.5237, "step": 15500 }, { "epoch": 0.2833458241175718, "grad_norm": 6.5256147398044835, "learning_rate": 8.409387538606605e-06, "loss": 17.1695, "step": 15501 }, { "epoch": 0.2833641033140183, "grad_norm": 5.786452002426799, "learning_rate": 8.409171010143916e-06, "loss": 17.4172, "step": 15502 }, { "epoch": 0.2833823825104648, "grad_norm": 8.84299267926821, "learning_rate": 8.40895446973238e-06, "loss": 18.0781, "step": 15503 }, { "epoch": 0.28340066170691136, "grad_norm": 5.691610922571614, "learning_rate": 8.408737917372751e-06, "loss": 17.104, "step": 15504 }, { "epoch": 0.2834189409033579, "grad_norm": 6.379030894126828, "learning_rate": 8.408521353065796e-06, "loss": 17.4691, "step": 15505 }, { "epoch": 0.28343722009980443, "grad_norm": 7.063629606285928, "learning_rate": 8.408304776812266e-06, "loss": 17.5241, "step": 15506 }, { "epoch": 0.2834554992962509, "grad_norm": 6.4461397749199545, "learning_rate": 8.408088188612923e-06, "loss": 17.4663, "step": 15507 }, { "epoch": 0.28347377849269745, "grad_norm": 7.239733713449572, "learning_rate": 8.407871588468527e-06, "loss": 17.5837, "step": 15508 }, { "epoch": 0.283492057689144, "grad_norm": 6.703860169477245, "learning_rate": 8.40765497637984e-06, "loss": 17.66, "step": 15509 }, { "epoch": 0.2835103368855905, "grad_norm": 6.8981920422858485, "learning_rate": 8.407438352347614e-06, "loss": 17.2924, "step": 15510 }, { "epoch": 0.28352861608203705, "grad_norm": 6.9308056613648485, "learning_rate": 8.407221716372615e-06, "loss": 17.6194, "step": 15511 }, { "epoch": 0.28354689527848354, "grad_norm": 8.630598151276228, "learning_rate": 8.4070050684556e-06, "loss": 18.5181, "step": 15512 }, { "epoch": 0.28356517447493007, "grad_norm": 6.140531931427257, "learning_rate": 8.406788408597324e-06, "loss": 17.1919, "step": 15513 }, { "epoch": 0.2835834536713766, "grad_norm": 6.105063839189512, "learning_rate": 8.406571736798554e-06, "loss": 17.3691, "step": 15514 }, { "epoch": 0.28360173286782314, "grad_norm": 5.6438490398010845, "learning_rate": 8.406355053060044e-06, "loss": 17.1524, "step": 15515 }, { "epoch": 0.2836200120642697, "grad_norm": 5.594531492297863, "learning_rate": 8.406138357382556e-06, "loss": 17.2135, "step": 15516 }, { "epoch": 0.28363829126071616, "grad_norm": 8.008643271750694, "learning_rate": 8.405921649766849e-06, "loss": 18.2914, "step": 15517 }, { "epoch": 0.2836565704571627, "grad_norm": 5.957476832027726, "learning_rate": 8.40570493021368e-06, "loss": 17.182, "step": 15518 }, { "epoch": 0.28367484965360923, "grad_norm": 6.777397209515814, "learning_rate": 8.405488198723813e-06, "loss": 17.5969, "step": 15519 }, { "epoch": 0.28369312885005576, "grad_norm": 6.620547159990193, "learning_rate": 8.405271455298005e-06, "loss": 17.5505, "step": 15520 }, { "epoch": 0.2837114080465023, "grad_norm": 6.648656132268679, "learning_rate": 8.405054699937014e-06, "loss": 17.4893, "step": 15521 }, { "epoch": 0.2837296872429488, "grad_norm": 5.712029974103151, "learning_rate": 8.404837932641604e-06, "loss": 17.2915, "step": 15522 }, { "epoch": 0.2837479664393953, "grad_norm": 7.236765825272202, "learning_rate": 8.404621153412532e-06, "loss": 17.6133, "step": 15523 }, { "epoch": 0.28376624563584185, "grad_norm": 8.10122431474489, "learning_rate": 8.404404362250558e-06, "loss": 18.0702, "step": 15524 }, { "epoch": 0.2837845248322884, "grad_norm": 7.144456215384271, "learning_rate": 8.404187559156443e-06, "loss": 17.7497, "step": 15525 }, { "epoch": 0.2838028040287349, "grad_norm": 6.659411430034418, "learning_rate": 8.403970744130945e-06, "loss": 17.6221, "step": 15526 }, { "epoch": 0.2838210832251814, "grad_norm": 7.175396214025024, "learning_rate": 8.403753917174825e-06, "loss": 17.8914, "step": 15527 }, { "epoch": 0.28383936242162794, "grad_norm": 6.189528774703327, "learning_rate": 8.403537078288843e-06, "loss": 17.5811, "step": 15528 }, { "epoch": 0.2838576416180745, "grad_norm": 5.856297634820472, "learning_rate": 8.403320227473759e-06, "loss": 17.1073, "step": 15529 }, { "epoch": 0.283875920814521, "grad_norm": 8.643114153220317, "learning_rate": 8.403103364730333e-06, "loss": 18.4561, "step": 15530 }, { "epoch": 0.2838942000109675, "grad_norm": 7.828290680158178, "learning_rate": 8.402886490059325e-06, "loss": 18.0083, "step": 15531 }, { "epoch": 0.283912479207414, "grad_norm": 6.445601967259433, "learning_rate": 8.402669603461495e-06, "loss": 17.2818, "step": 15532 }, { "epoch": 0.28393075840386056, "grad_norm": 8.173572547923358, "learning_rate": 8.402452704937602e-06, "loss": 18.4044, "step": 15533 }, { "epoch": 0.2839490376003071, "grad_norm": 7.3105068229246415, "learning_rate": 8.40223579448841e-06, "loss": 17.8771, "step": 15534 }, { "epoch": 0.28396731679675363, "grad_norm": 8.043554647759427, "learning_rate": 8.402018872114675e-06, "loss": 17.6716, "step": 15535 }, { "epoch": 0.2839855959932001, "grad_norm": 8.575846251880327, "learning_rate": 8.40180193781716e-06, "loss": 18.127, "step": 15536 }, { "epoch": 0.28400387518964665, "grad_norm": 8.448946370612017, "learning_rate": 8.401584991596623e-06, "loss": 18.2131, "step": 15537 }, { "epoch": 0.2840221543860932, "grad_norm": 6.12334406272895, "learning_rate": 8.401368033453827e-06, "loss": 17.3053, "step": 15538 }, { "epoch": 0.2840404335825397, "grad_norm": 6.517376042846156, "learning_rate": 8.401151063389533e-06, "loss": 17.3515, "step": 15539 }, { "epoch": 0.28405871277898626, "grad_norm": 6.126027630011614, "learning_rate": 8.400934081404497e-06, "loss": 17.2624, "step": 15540 }, { "epoch": 0.28407699197543274, "grad_norm": 6.057541862518091, "learning_rate": 8.400717087499483e-06, "loss": 17.1479, "step": 15541 }, { "epoch": 0.28409527117187927, "grad_norm": 6.5286989223360345, "learning_rate": 8.40050008167525e-06, "loss": 17.3018, "step": 15542 }, { "epoch": 0.2841135503683258, "grad_norm": 7.968956088806122, "learning_rate": 8.40028306393256e-06, "loss": 17.7986, "step": 15543 }, { "epoch": 0.28413182956477234, "grad_norm": 5.88287122620639, "learning_rate": 8.400066034272173e-06, "loss": 17.3807, "step": 15544 }, { "epoch": 0.2841501087612189, "grad_norm": 7.255389013799567, "learning_rate": 8.399848992694849e-06, "loss": 17.6915, "step": 15545 }, { "epoch": 0.28416838795766536, "grad_norm": 6.207393249947155, "learning_rate": 8.39963193920135e-06, "loss": 17.6513, "step": 15546 }, { "epoch": 0.2841866671541119, "grad_norm": 5.493874442691914, "learning_rate": 8.399414873792435e-06, "loss": 17.0561, "step": 15547 }, { "epoch": 0.28420494635055843, "grad_norm": 6.265429232395019, "learning_rate": 8.399197796468867e-06, "loss": 17.4793, "step": 15548 }, { "epoch": 0.28422322554700497, "grad_norm": 6.986659093185191, "learning_rate": 8.398980707231405e-06, "loss": 17.6765, "step": 15549 }, { "epoch": 0.2842415047434515, "grad_norm": 5.722656947055226, "learning_rate": 8.398763606080812e-06, "loss": 17.0972, "step": 15550 }, { "epoch": 0.284259783939898, "grad_norm": 6.6297669680583065, "learning_rate": 8.398546493017846e-06, "loss": 17.5315, "step": 15551 }, { "epoch": 0.2842780631363445, "grad_norm": 6.3911646778028155, "learning_rate": 8.39832936804327e-06, "loss": 17.3867, "step": 15552 }, { "epoch": 0.28429634233279105, "grad_norm": 6.234982800213382, "learning_rate": 8.398112231157844e-06, "loss": 17.3341, "step": 15553 }, { "epoch": 0.2843146215292376, "grad_norm": 7.501273893139183, "learning_rate": 8.39789508236233e-06, "loss": 18.1063, "step": 15554 }, { "epoch": 0.2843329007256841, "grad_norm": 5.731369411501362, "learning_rate": 8.397677921657488e-06, "loss": 17.0956, "step": 15555 }, { "epoch": 0.2843511799221306, "grad_norm": 5.81032010845882, "learning_rate": 8.397460749044079e-06, "loss": 17.3655, "step": 15556 }, { "epoch": 0.28436945911857714, "grad_norm": 7.082287812047839, "learning_rate": 8.397243564522867e-06, "loss": 17.5869, "step": 15557 }, { "epoch": 0.2843877383150237, "grad_norm": 6.594087648334816, "learning_rate": 8.39702636809461e-06, "loss": 17.4453, "step": 15558 }, { "epoch": 0.2844060175114702, "grad_norm": 6.80664865386882, "learning_rate": 8.39680915976007e-06, "loss": 17.7205, "step": 15559 }, { "epoch": 0.28442429670791675, "grad_norm": 7.412781021068577, "learning_rate": 8.39659193952001e-06, "loss": 17.7192, "step": 15560 }, { "epoch": 0.2844425759043632, "grad_norm": 5.867012061418011, "learning_rate": 8.39637470737519e-06, "loss": 17.2483, "step": 15561 }, { "epoch": 0.28446085510080976, "grad_norm": 9.014390204684132, "learning_rate": 8.39615746332637e-06, "loss": 18.6399, "step": 15562 }, { "epoch": 0.2844791342972563, "grad_norm": 7.759039131336317, "learning_rate": 8.395940207374314e-06, "loss": 17.5036, "step": 15563 }, { "epoch": 0.28449741349370283, "grad_norm": 6.503756645272427, "learning_rate": 8.395722939519782e-06, "loss": 17.5279, "step": 15564 }, { "epoch": 0.2845156926901493, "grad_norm": 7.171876314404471, "learning_rate": 8.395505659763534e-06, "loss": 17.8446, "step": 15565 }, { "epoch": 0.28453397188659585, "grad_norm": 6.245292427653352, "learning_rate": 8.395288368106334e-06, "loss": 17.5238, "step": 15566 }, { "epoch": 0.2845522510830424, "grad_norm": 6.8851167604665084, "learning_rate": 8.395071064548945e-06, "loss": 17.7864, "step": 15567 }, { "epoch": 0.2845705302794889, "grad_norm": 5.5415031339971925, "learning_rate": 8.394853749092125e-06, "loss": 17.0479, "step": 15568 }, { "epoch": 0.28458880947593546, "grad_norm": 5.524736013309662, "learning_rate": 8.394636421736637e-06, "loss": 17.1075, "step": 15569 }, { "epoch": 0.28460708867238194, "grad_norm": 5.053254637388772, "learning_rate": 8.394419082483242e-06, "loss": 16.9987, "step": 15570 }, { "epoch": 0.28462536786882847, "grad_norm": 6.435384113223739, "learning_rate": 8.394201731332705e-06, "loss": 17.3633, "step": 15571 }, { "epoch": 0.284643647065275, "grad_norm": 7.507276131995696, "learning_rate": 8.393984368285784e-06, "loss": 18.0902, "step": 15572 }, { "epoch": 0.28466192626172154, "grad_norm": 7.964726173875384, "learning_rate": 8.393766993343241e-06, "loss": 18.1453, "step": 15573 }, { "epoch": 0.2846802054581681, "grad_norm": 7.2337262115679914, "learning_rate": 8.393549606505842e-06, "loss": 17.8401, "step": 15574 }, { "epoch": 0.28469848465461456, "grad_norm": 5.573186046338195, "learning_rate": 8.393332207774345e-06, "loss": 17.1204, "step": 15575 }, { "epoch": 0.2847167638510611, "grad_norm": 8.1926876728909, "learning_rate": 8.393114797149513e-06, "loss": 18.4901, "step": 15576 }, { "epoch": 0.28473504304750763, "grad_norm": 6.337786905884441, "learning_rate": 8.392897374632107e-06, "loss": 17.5824, "step": 15577 }, { "epoch": 0.28475332224395417, "grad_norm": 6.646307893594947, "learning_rate": 8.392679940222893e-06, "loss": 17.3747, "step": 15578 }, { "epoch": 0.2847716014404007, "grad_norm": 5.773095729081687, "learning_rate": 8.392462493922629e-06, "loss": 17.1753, "step": 15579 }, { "epoch": 0.2847898806368472, "grad_norm": 7.033828873581546, "learning_rate": 8.392245035732077e-06, "loss": 17.9067, "step": 15580 }, { "epoch": 0.2848081598332937, "grad_norm": 6.589963825307848, "learning_rate": 8.392027565652001e-06, "loss": 17.4564, "step": 15581 }, { "epoch": 0.28482643902974025, "grad_norm": 6.496213127727764, "learning_rate": 8.391810083683163e-06, "loss": 17.5729, "step": 15582 }, { "epoch": 0.2848447182261868, "grad_norm": 6.22426655532525, "learning_rate": 8.391592589826325e-06, "loss": 17.6297, "step": 15583 }, { "epoch": 0.2848629974226333, "grad_norm": 7.330933426305583, "learning_rate": 8.391375084082249e-06, "loss": 17.9936, "step": 15584 }, { "epoch": 0.2848812766190798, "grad_norm": 5.628551605391713, "learning_rate": 8.391157566451697e-06, "loss": 16.9961, "step": 15585 }, { "epoch": 0.28489955581552634, "grad_norm": 5.046472674557297, "learning_rate": 8.390940036935433e-06, "loss": 16.9365, "step": 15586 }, { "epoch": 0.2849178350119729, "grad_norm": 6.914505875474923, "learning_rate": 8.39072249553422e-06, "loss": 17.3626, "step": 15587 }, { "epoch": 0.2849361142084194, "grad_norm": 7.25076524166895, "learning_rate": 8.390504942248817e-06, "loss": 17.9625, "step": 15588 }, { "epoch": 0.28495439340486595, "grad_norm": 7.699190980540354, "learning_rate": 8.390287377079989e-06, "loss": 17.5692, "step": 15589 }, { "epoch": 0.2849726726013124, "grad_norm": 6.518746589959383, "learning_rate": 8.390069800028497e-06, "loss": 17.3474, "step": 15590 }, { "epoch": 0.28499095179775896, "grad_norm": 6.774196109811676, "learning_rate": 8.389852211095104e-06, "loss": 17.5621, "step": 15591 }, { "epoch": 0.2850092309942055, "grad_norm": 7.007953092574309, "learning_rate": 8.389634610280576e-06, "loss": 17.7761, "step": 15592 }, { "epoch": 0.28502751019065203, "grad_norm": 7.785073531272435, "learning_rate": 8.38941699758567e-06, "loss": 18.2594, "step": 15593 }, { "epoch": 0.28504578938709857, "grad_norm": 5.737027901527635, "learning_rate": 8.389199373011151e-06, "loss": 16.9646, "step": 15594 }, { "epoch": 0.28506406858354505, "grad_norm": 6.1317094096416005, "learning_rate": 8.388981736557786e-06, "loss": 17.276, "step": 15595 }, { "epoch": 0.2850823477799916, "grad_norm": 5.552608549638589, "learning_rate": 8.388764088226332e-06, "loss": 17.1453, "step": 15596 }, { "epoch": 0.2851006269764381, "grad_norm": 6.541323176059032, "learning_rate": 8.388546428017553e-06, "loss": 17.572, "step": 15597 }, { "epoch": 0.28511890617288466, "grad_norm": 6.5776082966284495, "learning_rate": 8.388328755932213e-06, "loss": 17.3414, "step": 15598 }, { "epoch": 0.28513718536933114, "grad_norm": 5.859995221506433, "learning_rate": 8.388111071971077e-06, "loss": 17.3288, "step": 15599 }, { "epoch": 0.28515546456577767, "grad_norm": 6.283259630316175, "learning_rate": 8.387893376134903e-06, "loss": 17.615, "step": 15600 }, { "epoch": 0.2851737437622242, "grad_norm": 8.207639137259413, "learning_rate": 8.387675668424457e-06, "loss": 18.3846, "step": 15601 }, { "epoch": 0.28519202295867074, "grad_norm": 6.3159778451564, "learning_rate": 8.387457948840503e-06, "loss": 17.7309, "step": 15602 }, { "epoch": 0.2852103021551173, "grad_norm": 7.207083762354696, "learning_rate": 8.387240217383804e-06, "loss": 17.754, "step": 15603 }, { "epoch": 0.28522858135156376, "grad_norm": 6.95631087327876, "learning_rate": 8.38702247405512e-06, "loss": 17.6824, "step": 15604 }, { "epoch": 0.2852468605480103, "grad_norm": 7.055379196665715, "learning_rate": 8.386804718855217e-06, "loss": 17.7365, "step": 15605 }, { "epoch": 0.28526513974445683, "grad_norm": 6.141082121277028, "learning_rate": 8.386586951784857e-06, "loss": 17.2589, "step": 15606 }, { "epoch": 0.28528341894090337, "grad_norm": 6.2588554003648555, "learning_rate": 8.386369172844803e-06, "loss": 17.4175, "step": 15607 }, { "epoch": 0.2853016981373499, "grad_norm": 5.849630957286629, "learning_rate": 8.386151382035819e-06, "loss": 17.2994, "step": 15608 }, { "epoch": 0.2853199773337964, "grad_norm": 7.919239102329731, "learning_rate": 8.38593357935867e-06, "loss": 18.2311, "step": 15609 }, { "epoch": 0.2853382565302429, "grad_norm": 7.460346656682483, "learning_rate": 8.385715764814115e-06, "loss": 18.0355, "step": 15610 }, { "epoch": 0.28535653572668945, "grad_norm": 7.166477420381712, "learning_rate": 8.385497938402921e-06, "loss": 17.9849, "step": 15611 }, { "epoch": 0.285374814923136, "grad_norm": 7.811072381805779, "learning_rate": 8.385280100125852e-06, "loss": 17.874, "step": 15612 }, { "epoch": 0.2853930941195825, "grad_norm": 6.212841050772617, "learning_rate": 8.385062249983668e-06, "loss": 17.7724, "step": 15613 }, { "epoch": 0.285411373316029, "grad_norm": 8.286750865831028, "learning_rate": 8.384844387977136e-06, "loss": 17.7, "step": 15614 }, { "epoch": 0.28542965251247554, "grad_norm": 7.108771609697807, "learning_rate": 8.384626514107017e-06, "loss": 17.7998, "step": 15615 }, { "epoch": 0.2854479317089221, "grad_norm": 5.327994253042193, "learning_rate": 8.384408628374076e-06, "loss": 17.1421, "step": 15616 }, { "epoch": 0.2854662109053686, "grad_norm": 6.386484428558483, "learning_rate": 8.384190730779077e-06, "loss": 17.5845, "step": 15617 }, { "epoch": 0.28548449010181515, "grad_norm": 5.628792152596963, "learning_rate": 8.383972821322783e-06, "loss": 17.4626, "step": 15618 }, { "epoch": 0.2855027692982616, "grad_norm": 6.1920812252869855, "learning_rate": 8.383754900005958e-06, "loss": 17.5209, "step": 15619 }, { "epoch": 0.28552104849470816, "grad_norm": 7.196441400429767, "learning_rate": 8.383536966829365e-06, "loss": 17.8727, "step": 15620 }, { "epoch": 0.2855393276911547, "grad_norm": 6.394134189644871, "learning_rate": 8.38331902179377e-06, "loss": 17.3357, "step": 15621 }, { "epoch": 0.28555760688760123, "grad_norm": 7.351549259202254, "learning_rate": 8.383101064899934e-06, "loss": 18.0577, "step": 15622 }, { "epoch": 0.28557588608404777, "grad_norm": 8.885390520908818, "learning_rate": 8.382883096148623e-06, "loss": 17.5862, "step": 15623 }, { "epoch": 0.28559416528049425, "grad_norm": 7.79950581234333, "learning_rate": 8.382665115540601e-06, "loss": 18.0429, "step": 15624 }, { "epoch": 0.2856124444769408, "grad_norm": 7.142911245825157, "learning_rate": 8.38244712307663e-06, "loss": 17.7714, "step": 15625 }, { "epoch": 0.2856307236733873, "grad_norm": 7.328069278498716, "learning_rate": 8.382229118757475e-06, "loss": 17.7837, "step": 15626 }, { "epoch": 0.28564900286983386, "grad_norm": 6.390514179583239, "learning_rate": 8.382011102583903e-06, "loss": 17.5888, "step": 15627 }, { "epoch": 0.2856672820662804, "grad_norm": 6.003095180740606, "learning_rate": 8.381793074556673e-06, "loss": 17.6111, "step": 15628 }, { "epoch": 0.2856855612627269, "grad_norm": 7.090927574776756, "learning_rate": 8.38157503467655e-06, "loss": 17.7163, "step": 15629 }, { "epoch": 0.2857038404591734, "grad_norm": 10.159727596808448, "learning_rate": 8.381356982944304e-06, "loss": 18.8136, "step": 15630 }, { "epoch": 0.28572211965561994, "grad_norm": 6.746606551618541, "learning_rate": 8.381138919360693e-06, "loss": 17.4561, "step": 15631 }, { "epoch": 0.2857403988520665, "grad_norm": 8.895814824886411, "learning_rate": 8.380920843926485e-06, "loss": 18.0273, "step": 15632 }, { "epoch": 0.285758678048513, "grad_norm": 7.931643137647633, "learning_rate": 8.380702756642443e-06, "loss": 18.2132, "step": 15633 }, { "epoch": 0.2857769572449595, "grad_norm": 6.607209696763906, "learning_rate": 8.380484657509329e-06, "loss": 17.5462, "step": 15634 }, { "epoch": 0.28579523644140603, "grad_norm": 6.397507164123776, "learning_rate": 8.380266546527911e-06, "loss": 17.6273, "step": 15635 }, { "epoch": 0.28581351563785257, "grad_norm": 6.787735261967875, "learning_rate": 8.380048423698952e-06, "loss": 17.4471, "step": 15636 }, { "epoch": 0.2858317948342991, "grad_norm": 8.94183718488205, "learning_rate": 8.379830289023216e-06, "loss": 18.6105, "step": 15637 }, { "epoch": 0.2858500740307456, "grad_norm": 6.901059258190413, "learning_rate": 8.379612142501468e-06, "loss": 17.6378, "step": 15638 }, { "epoch": 0.2858683532271921, "grad_norm": 7.276057520784042, "learning_rate": 8.379393984134473e-06, "loss": 17.645, "step": 15639 }, { "epoch": 0.28588663242363865, "grad_norm": 6.7987410673042605, "learning_rate": 8.379175813922998e-06, "loss": 17.6281, "step": 15640 }, { "epoch": 0.2859049116200852, "grad_norm": 5.242972500649996, "learning_rate": 8.378957631867801e-06, "loss": 17.0635, "step": 15641 }, { "epoch": 0.2859231908165317, "grad_norm": 6.686899850055457, "learning_rate": 8.378739437969653e-06, "loss": 17.3893, "step": 15642 }, { "epoch": 0.2859414700129782, "grad_norm": 6.254068526072431, "learning_rate": 8.378521232229316e-06, "loss": 17.3998, "step": 15643 }, { "epoch": 0.28595974920942474, "grad_norm": 6.1499387344539915, "learning_rate": 8.378303014647555e-06, "loss": 17.3369, "step": 15644 }, { "epoch": 0.2859780284058713, "grad_norm": 7.888230663721426, "learning_rate": 8.378084785225134e-06, "loss": 18.1959, "step": 15645 }, { "epoch": 0.2859963076023178, "grad_norm": 6.289409435598981, "learning_rate": 8.37786654396282e-06, "loss": 17.5666, "step": 15646 }, { "epoch": 0.28601458679876435, "grad_norm": 7.090039170347586, "learning_rate": 8.377648290861377e-06, "loss": 17.9468, "step": 15647 }, { "epoch": 0.2860328659952108, "grad_norm": 6.036925531325897, "learning_rate": 8.37743002592157e-06, "loss": 17.1986, "step": 15648 }, { "epoch": 0.28605114519165736, "grad_norm": 6.868252158953648, "learning_rate": 8.377211749144165e-06, "loss": 17.752, "step": 15649 }, { "epoch": 0.2860694243881039, "grad_norm": 9.033682671133217, "learning_rate": 8.376993460529925e-06, "loss": 18.5495, "step": 15650 }, { "epoch": 0.28608770358455043, "grad_norm": 5.882631257234656, "learning_rate": 8.376775160079614e-06, "loss": 17.2984, "step": 15651 }, { "epoch": 0.28610598278099697, "grad_norm": 7.8151648561632845, "learning_rate": 8.376556847794001e-06, "loss": 18.2152, "step": 15652 }, { "epoch": 0.28612426197744345, "grad_norm": 5.1476495119161, "learning_rate": 8.376338523673848e-06, "loss": 16.9924, "step": 15653 }, { "epoch": 0.28614254117389, "grad_norm": 5.977907295560187, "learning_rate": 8.376120187719924e-06, "loss": 17.4418, "step": 15654 }, { "epoch": 0.2861608203703365, "grad_norm": 8.701996806963585, "learning_rate": 8.37590183993299e-06, "loss": 18.1837, "step": 15655 }, { "epoch": 0.28617909956678306, "grad_norm": 6.594309340151606, "learning_rate": 8.375683480313812e-06, "loss": 17.8694, "step": 15656 }, { "epoch": 0.2861973787632296, "grad_norm": 7.245940354928539, "learning_rate": 8.375465108863159e-06, "loss": 17.9216, "step": 15657 }, { "epoch": 0.2862156579596761, "grad_norm": 7.323500051559651, "learning_rate": 8.375246725581792e-06, "loss": 18.2038, "step": 15658 }, { "epoch": 0.2862339371561226, "grad_norm": 7.425768035062593, "learning_rate": 8.375028330470477e-06, "loss": 17.7077, "step": 15659 }, { "epoch": 0.28625221635256914, "grad_norm": 6.646181546882208, "learning_rate": 8.374809923529981e-06, "loss": 17.5014, "step": 15660 }, { "epoch": 0.2862704955490157, "grad_norm": 7.815951148838298, "learning_rate": 8.374591504761072e-06, "loss": 18.3835, "step": 15661 }, { "epoch": 0.2862887747454622, "grad_norm": 9.259074008802063, "learning_rate": 8.37437307416451e-06, "loss": 17.9353, "step": 15662 }, { "epoch": 0.2863070539419087, "grad_norm": 5.373786956405164, "learning_rate": 8.374154631741063e-06, "loss": 17.1724, "step": 15663 }, { "epoch": 0.28632533313835523, "grad_norm": 6.369099122450056, "learning_rate": 8.373936177491497e-06, "loss": 17.517, "step": 15664 }, { "epoch": 0.28634361233480177, "grad_norm": 6.033493817967538, "learning_rate": 8.373717711416578e-06, "loss": 17.278, "step": 15665 }, { "epoch": 0.2863618915312483, "grad_norm": 6.127814021510294, "learning_rate": 8.373499233517071e-06, "loss": 17.382, "step": 15666 }, { "epoch": 0.28638017072769484, "grad_norm": 5.894522159937451, "learning_rate": 8.373280743793741e-06, "loss": 17.2759, "step": 15667 }, { "epoch": 0.2863984499241413, "grad_norm": 12.338123559113056, "learning_rate": 8.373062242247358e-06, "loss": 17.7412, "step": 15668 }, { "epoch": 0.28641672912058785, "grad_norm": 7.369914316078894, "learning_rate": 8.372843728878681e-06, "loss": 17.9116, "step": 15669 }, { "epoch": 0.2864350083170344, "grad_norm": 6.947360513007213, "learning_rate": 8.37262520368848e-06, "loss": 17.5352, "step": 15670 }, { "epoch": 0.2864532875134809, "grad_norm": 6.656061287890204, "learning_rate": 8.372406666677521e-06, "loss": 17.6119, "step": 15671 }, { "epoch": 0.2864715667099274, "grad_norm": 5.5692997538235725, "learning_rate": 8.37218811784657e-06, "loss": 16.9218, "step": 15672 }, { "epoch": 0.28648984590637394, "grad_norm": 6.848217536842453, "learning_rate": 8.371969557196391e-06, "loss": 17.6411, "step": 15673 }, { "epoch": 0.2865081251028205, "grad_norm": 6.129048388804936, "learning_rate": 8.371750984727753e-06, "loss": 17.4972, "step": 15674 }, { "epoch": 0.286526404299267, "grad_norm": 7.013742207397539, "learning_rate": 8.37153240044142e-06, "loss": 17.6785, "step": 15675 }, { "epoch": 0.28654468349571355, "grad_norm": 7.179054398456335, "learning_rate": 8.371313804338156e-06, "loss": 17.6121, "step": 15676 }, { "epoch": 0.28656296269216003, "grad_norm": 7.800869695566196, "learning_rate": 8.371095196418731e-06, "loss": 18.1214, "step": 15677 }, { "epoch": 0.28658124188860656, "grad_norm": 6.261844071848223, "learning_rate": 8.370876576683913e-06, "loss": 17.2797, "step": 15678 }, { "epoch": 0.2865995210850531, "grad_norm": 6.810676162116741, "learning_rate": 8.37065794513446e-06, "loss": 17.4442, "step": 15679 }, { "epoch": 0.28661780028149964, "grad_norm": 7.497837283605271, "learning_rate": 8.370439301771146e-06, "loss": 17.9312, "step": 15680 }, { "epoch": 0.28663607947794617, "grad_norm": 6.724830039731928, "learning_rate": 8.370220646594736e-06, "loss": 17.5545, "step": 15681 }, { "epoch": 0.28665435867439265, "grad_norm": 6.520261805262163, "learning_rate": 8.370001979605993e-06, "loss": 17.6657, "step": 15682 }, { "epoch": 0.2866726378708392, "grad_norm": 7.030807258369695, "learning_rate": 8.369783300805685e-06, "loss": 17.7105, "step": 15683 }, { "epoch": 0.2866909170672857, "grad_norm": 9.927787256458823, "learning_rate": 8.36956461019458e-06, "loss": 17.9591, "step": 15684 }, { "epoch": 0.28670919626373226, "grad_norm": 6.033712258318712, "learning_rate": 8.369345907773444e-06, "loss": 17.3323, "step": 15685 }, { "epoch": 0.2867274754601788, "grad_norm": 6.195990386468086, "learning_rate": 8.369127193543044e-06, "loss": 17.4414, "step": 15686 }, { "epoch": 0.2867457546566253, "grad_norm": 6.896687238686422, "learning_rate": 8.368908467504142e-06, "loss": 17.5145, "step": 15687 }, { "epoch": 0.2867640338530718, "grad_norm": 6.95707880779647, "learning_rate": 8.368689729657511e-06, "loss": 17.7917, "step": 15688 }, { "epoch": 0.28678231304951834, "grad_norm": 7.336968328842924, "learning_rate": 8.368470980003914e-06, "loss": 17.6072, "step": 15689 }, { "epoch": 0.2868005922459649, "grad_norm": 6.1451909369189215, "learning_rate": 8.368252218544117e-06, "loss": 17.4768, "step": 15690 }, { "epoch": 0.2868188714424114, "grad_norm": 6.061638916616824, "learning_rate": 8.368033445278892e-06, "loss": 17.5442, "step": 15691 }, { "epoch": 0.2868371506388579, "grad_norm": 6.548939791459066, "learning_rate": 8.367814660208999e-06, "loss": 17.635, "step": 15692 }, { "epoch": 0.28685542983530443, "grad_norm": 7.527961122980897, "learning_rate": 8.367595863335208e-06, "loss": 17.7922, "step": 15693 }, { "epoch": 0.28687370903175097, "grad_norm": 6.5655215870551045, "learning_rate": 8.367377054658287e-06, "loss": 17.3734, "step": 15694 }, { "epoch": 0.2868919882281975, "grad_norm": 6.477341736143212, "learning_rate": 8.367158234179001e-06, "loss": 17.4314, "step": 15695 }, { "epoch": 0.28691026742464404, "grad_norm": 7.475140751791854, "learning_rate": 8.366939401898117e-06, "loss": 17.6992, "step": 15696 }, { "epoch": 0.2869285466210905, "grad_norm": 7.305250930061159, "learning_rate": 8.366720557816404e-06, "loss": 17.6783, "step": 15697 }, { "epoch": 0.28694682581753705, "grad_norm": 7.440542250326834, "learning_rate": 8.366501701934626e-06, "loss": 17.774, "step": 15698 }, { "epoch": 0.2869651050139836, "grad_norm": 7.427751760169241, "learning_rate": 8.366282834253553e-06, "loss": 17.9105, "step": 15699 }, { "epoch": 0.2869833842104301, "grad_norm": 7.019817900435624, "learning_rate": 8.366063954773949e-06, "loss": 17.6111, "step": 15700 }, { "epoch": 0.28700166340687666, "grad_norm": 5.732647211925649, "learning_rate": 8.365845063496585e-06, "loss": 17.1256, "step": 15701 }, { "epoch": 0.28701994260332314, "grad_norm": 7.037915434264202, "learning_rate": 8.365626160422226e-06, "loss": 17.7761, "step": 15702 }, { "epoch": 0.2870382217997697, "grad_norm": 5.67736410354966, "learning_rate": 8.365407245551638e-06, "loss": 17.062, "step": 15703 }, { "epoch": 0.2870565009962162, "grad_norm": 5.468890560092592, "learning_rate": 8.36518831888559e-06, "loss": 17.0763, "step": 15704 }, { "epoch": 0.28707478019266275, "grad_norm": 5.434551600521866, "learning_rate": 8.364969380424849e-06, "loss": 16.9165, "step": 15705 }, { "epoch": 0.28709305938910923, "grad_norm": 6.323974577584368, "learning_rate": 8.364750430170183e-06, "loss": 17.3111, "step": 15706 }, { "epoch": 0.28711133858555576, "grad_norm": 7.9254576665947365, "learning_rate": 8.36453146812236e-06, "loss": 18.2983, "step": 15707 }, { "epoch": 0.2871296177820023, "grad_norm": 7.312198334280671, "learning_rate": 8.364312494282143e-06, "loss": 17.672, "step": 15708 }, { "epoch": 0.28714789697844884, "grad_norm": 6.6101487821054565, "learning_rate": 8.364093508650304e-06, "loss": 17.6043, "step": 15709 }, { "epoch": 0.28716617617489537, "grad_norm": 6.559681846630912, "learning_rate": 8.36387451122761e-06, "loss": 17.525, "step": 15710 }, { "epoch": 0.28718445537134185, "grad_norm": 7.656501045636494, "learning_rate": 8.363655502014826e-06, "loss": 18.0472, "step": 15711 }, { "epoch": 0.2872027345677884, "grad_norm": 9.269767916265716, "learning_rate": 8.363436481012722e-06, "loss": 17.7054, "step": 15712 }, { "epoch": 0.2872210137642349, "grad_norm": 6.74409848397112, "learning_rate": 8.363217448222065e-06, "loss": 17.3151, "step": 15713 }, { "epoch": 0.28723929296068146, "grad_norm": 8.016435392112882, "learning_rate": 8.362998403643623e-06, "loss": 18.1044, "step": 15714 }, { "epoch": 0.287257572157128, "grad_norm": 6.570574622683178, "learning_rate": 8.362779347278163e-06, "loss": 17.3039, "step": 15715 }, { "epoch": 0.2872758513535745, "grad_norm": 7.366678450115921, "learning_rate": 8.362560279126454e-06, "loss": 17.9155, "step": 15716 }, { "epoch": 0.287294130550021, "grad_norm": 7.345328132863208, "learning_rate": 8.362341199189264e-06, "loss": 18.1081, "step": 15717 }, { "epoch": 0.28731240974646755, "grad_norm": 6.579812525042959, "learning_rate": 8.362122107467357e-06, "loss": 17.4539, "step": 15718 }, { "epoch": 0.2873306889429141, "grad_norm": 6.677578022932323, "learning_rate": 8.361903003961507e-06, "loss": 17.6364, "step": 15719 }, { "epoch": 0.2873489681393606, "grad_norm": 7.310175659897703, "learning_rate": 8.361683888672475e-06, "loss": 17.9238, "step": 15720 }, { "epoch": 0.2873672473358071, "grad_norm": 7.462718364959586, "learning_rate": 8.361464761601036e-06, "loss": 17.6278, "step": 15721 }, { "epoch": 0.28738552653225363, "grad_norm": 7.227925124793609, "learning_rate": 8.361245622747954e-06, "loss": 17.6803, "step": 15722 }, { "epoch": 0.28740380572870017, "grad_norm": 5.185760579323815, "learning_rate": 8.361026472113997e-06, "loss": 16.9618, "step": 15723 }, { "epoch": 0.2874220849251467, "grad_norm": 6.827928535280947, "learning_rate": 8.360807309699934e-06, "loss": 17.7473, "step": 15724 }, { "epoch": 0.28744036412159324, "grad_norm": 7.476993260591754, "learning_rate": 8.360588135506532e-06, "loss": 17.9636, "step": 15725 }, { "epoch": 0.2874586433180397, "grad_norm": 6.337144269226875, "learning_rate": 8.360368949534562e-06, "loss": 17.6345, "step": 15726 }, { "epoch": 0.28747692251448626, "grad_norm": 6.127933451755372, "learning_rate": 8.360149751784789e-06, "loss": 17.37, "step": 15727 }, { "epoch": 0.2874952017109328, "grad_norm": 6.0698608722058935, "learning_rate": 8.359930542257984e-06, "loss": 17.1567, "step": 15728 }, { "epoch": 0.2875134809073793, "grad_norm": 5.84965151610325, "learning_rate": 8.359711320954913e-06, "loss": 17.3871, "step": 15729 }, { "epoch": 0.28753176010382586, "grad_norm": 7.0989870416467875, "learning_rate": 8.359492087876346e-06, "loss": 17.6362, "step": 15730 }, { "epoch": 0.28755003930027234, "grad_norm": 7.949534533597411, "learning_rate": 8.359272843023049e-06, "loss": 18.5204, "step": 15731 }, { "epoch": 0.2875683184967189, "grad_norm": 8.35695095818438, "learning_rate": 8.359053586395796e-06, "loss": 18.2508, "step": 15732 }, { "epoch": 0.2875865976931654, "grad_norm": 5.440921624763122, "learning_rate": 8.358834317995349e-06, "loss": 17.1725, "step": 15733 }, { "epoch": 0.28760487688961195, "grad_norm": 7.033113579652731, "learning_rate": 8.35861503782248e-06, "loss": 17.7829, "step": 15734 }, { "epoch": 0.2876231560860585, "grad_norm": 6.064502051210512, "learning_rate": 8.358395745877956e-06, "loss": 17.2337, "step": 15735 }, { "epoch": 0.28764143528250496, "grad_norm": 6.502516987498833, "learning_rate": 8.358176442162545e-06, "loss": 17.3482, "step": 15736 }, { "epoch": 0.2876597144789515, "grad_norm": 7.3532004121531, "learning_rate": 8.35795712667702e-06, "loss": 18.0178, "step": 15737 }, { "epoch": 0.28767799367539804, "grad_norm": 7.212506742678808, "learning_rate": 8.357737799422144e-06, "loss": 17.9006, "step": 15738 }, { "epoch": 0.28769627287184457, "grad_norm": 5.851440011004375, "learning_rate": 8.35751846039869e-06, "loss": 17.3012, "step": 15739 }, { "epoch": 0.28771455206829105, "grad_norm": 7.668508686807459, "learning_rate": 8.357299109607425e-06, "loss": 18.4711, "step": 15740 }, { "epoch": 0.2877328312647376, "grad_norm": 8.088217389170984, "learning_rate": 8.357079747049116e-06, "loss": 18.3095, "step": 15741 }, { "epoch": 0.2877511104611841, "grad_norm": 6.679334674361961, "learning_rate": 8.356860372724538e-06, "loss": 17.736, "step": 15742 }, { "epoch": 0.28776938965763066, "grad_norm": 6.996640799362128, "learning_rate": 8.356640986634453e-06, "loss": 17.7252, "step": 15743 }, { "epoch": 0.2877876688540772, "grad_norm": 6.84840280288548, "learning_rate": 8.356421588779633e-06, "loss": 17.6636, "step": 15744 }, { "epoch": 0.2878059480505237, "grad_norm": 6.754510511269283, "learning_rate": 8.356202179160847e-06, "loss": 17.4008, "step": 15745 }, { "epoch": 0.2878242272469702, "grad_norm": 5.698967045748068, "learning_rate": 8.355982757778861e-06, "loss": 17.262, "step": 15746 }, { "epoch": 0.28784250644341675, "grad_norm": 6.573891907933512, "learning_rate": 8.35576332463445e-06, "loss": 17.4271, "step": 15747 }, { "epoch": 0.2878607856398633, "grad_norm": 6.243566704387631, "learning_rate": 8.355543879728378e-06, "loss": 17.4468, "step": 15748 }, { "epoch": 0.2878790648363098, "grad_norm": 6.31556660161982, "learning_rate": 8.355324423061415e-06, "loss": 17.5753, "step": 15749 }, { "epoch": 0.2878973440327563, "grad_norm": 5.770455445087101, "learning_rate": 8.355104954634334e-06, "loss": 17.0861, "step": 15750 }, { "epoch": 0.28791562322920283, "grad_norm": 7.50727004029445, "learning_rate": 8.3548854744479e-06, "loss": 18.0645, "step": 15751 }, { "epoch": 0.28793390242564937, "grad_norm": 8.677714095973903, "learning_rate": 8.354665982502883e-06, "loss": 18.379, "step": 15752 }, { "epoch": 0.2879521816220959, "grad_norm": 4.7307551149284945, "learning_rate": 8.354446478800053e-06, "loss": 16.738, "step": 15753 }, { "epoch": 0.28797046081854244, "grad_norm": 6.695736112471265, "learning_rate": 8.35422696334018e-06, "loss": 17.8252, "step": 15754 }, { "epoch": 0.2879887400149889, "grad_norm": 6.32623048304772, "learning_rate": 8.354007436124031e-06, "loss": 17.4677, "step": 15755 }, { "epoch": 0.28800701921143546, "grad_norm": 5.641460203140147, "learning_rate": 8.353787897152377e-06, "loss": 17.2396, "step": 15756 }, { "epoch": 0.288025298407882, "grad_norm": 5.224213157568367, "learning_rate": 8.353568346425989e-06, "loss": 17.1579, "step": 15757 }, { "epoch": 0.2880435776043285, "grad_norm": 6.951452327045251, "learning_rate": 8.353348783945633e-06, "loss": 17.7508, "step": 15758 }, { "epoch": 0.28806185680077506, "grad_norm": 6.943798294859284, "learning_rate": 8.353129209712084e-06, "loss": 17.5634, "step": 15759 }, { "epoch": 0.28808013599722154, "grad_norm": 8.345537359122252, "learning_rate": 8.352909623726105e-06, "loss": 18.1797, "step": 15760 }, { "epoch": 0.2880984151936681, "grad_norm": 5.59276660194879, "learning_rate": 8.352690025988468e-06, "loss": 17.2527, "step": 15761 }, { "epoch": 0.2881166943901146, "grad_norm": 6.721423592195531, "learning_rate": 8.352470416499945e-06, "loss": 17.5115, "step": 15762 }, { "epoch": 0.28813497358656115, "grad_norm": 6.34198069378264, "learning_rate": 8.352250795261304e-06, "loss": 17.4371, "step": 15763 }, { "epoch": 0.2881532527830077, "grad_norm": 6.625762213620717, "learning_rate": 8.352031162273316e-06, "loss": 17.8069, "step": 15764 }, { "epoch": 0.28817153197945417, "grad_norm": 7.369267609233369, "learning_rate": 8.351811517536748e-06, "loss": 17.8182, "step": 15765 }, { "epoch": 0.2881898111759007, "grad_norm": 6.127724232807258, "learning_rate": 8.351591861052371e-06, "loss": 17.4318, "step": 15766 }, { "epoch": 0.28820809037234724, "grad_norm": 5.957781267863218, "learning_rate": 8.351372192820956e-06, "loss": 17.2612, "step": 15767 }, { "epoch": 0.28822636956879377, "grad_norm": 7.40018453570937, "learning_rate": 8.351152512843273e-06, "loss": 18.0546, "step": 15768 }, { "epoch": 0.2882446487652403, "grad_norm": 7.588501590064388, "learning_rate": 8.350932821120093e-06, "loss": 17.5848, "step": 15769 }, { "epoch": 0.2882629279616868, "grad_norm": 6.9466494681157505, "learning_rate": 8.35071311765218e-06, "loss": 17.8109, "step": 15770 }, { "epoch": 0.2882812071581333, "grad_norm": 8.034972029273105, "learning_rate": 8.350493402440312e-06, "loss": 18.2194, "step": 15771 }, { "epoch": 0.28829948635457986, "grad_norm": 6.376879871939102, "learning_rate": 8.350273675485251e-06, "loss": 17.489, "step": 15772 }, { "epoch": 0.2883177655510264, "grad_norm": 6.655720185303255, "learning_rate": 8.350053936787777e-06, "loss": 17.5294, "step": 15773 }, { "epoch": 0.2883360447474729, "grad_norm": 6.714952995024809, "learning_rate": 8.349834186348652e-06, "loss": 17.5506, "step": 15774 }, { "epoch": 0.2883543239439194, "grad_norm": 6.856311084096559, "learning_rate": 8.349614424168649e-06, "loss": 17.8293, "step": 15775 }, { "epoch": 0.28837260314036595, "grad_norm": 5.681622234775624, "learning_rate": 8.349394650248537e-06, "loss": 17.2425, "step": 15776 }, { "epoch": 0.2883908823368125, "grad_norm": 5.712181956332461, "learning_rate": 8.349174864589088e-06, "loss": 17.1507, "step": 15777 }, { "epoch": 0.288409161533259, "grad_norm": 5.832129861901588, "learning_rate": 8.348955067191071e-06, "loss": 17.1996, "step": 15778 }, { "epoch": 0.2884274407297055, "grad_norm": 7.35506781927844, "learning_rate": 8.348735258055258e-06, "loss": 17.5264, "step": 15779 }, { "epoch": 0.28844571992615203, "grad_norm": 8.321872434461017, "learning_rate": 8.34851543718242e-06, "loss": 17.928, "step": 15780 }, { "epoch": 0.28846399912259857, "grad_norm": 7.331634591493145, "learning_rate": 8.348295604573324e-06, "loss": 17.6809, "step": 15781 }, { "epoch": 0.2884822783190451, "grad_norm": 6.64089174685003, "learning_rate": 8.348075760228744e-06, "loss": 17.5471, "step": 15782 }, { "epoch": 0.28850055751549164, "grad_norm": 6.379516298623369, "learning_rate": 8.347855904149447e-06, "loss": 17.4211, "step": 15783 }, { "epoch": 0.2885188367119381, "grad_norm": 5.813734280984642, "learning_rate": 8.347636036336207e-06, "loss": 17.1827, "step": 15784 }, { "epoch": 0.28853711590838466, "grad_norm": 7.679506660534976, "learning_rate": 8.347416156789791e-06, "loss": 18.2738, "step": 15785 }, { "epoch": 0.2885553951048312, "grad_norm": 6.070713105725202, "learning_rate": 8.347196265510976e-06, "loss": 17.6549, "step": 15786 }, { "epoch": 0.2885736743012777, "grad_norm": 8.341650233524906, "learning_rate": 8.346976362500526e-06, "loss": 17.9239, "step": 15787 }, { "epoch": 0.28859195349772426, "grad_norm": 6.223210640444305, "learning_rate": 8.346756447759215e-06, "loss": 17.5592, "step": 15788 }, { "epoch": 0.28861023269417074, "grad_norm": 7.440064451023787, "learning_rate": 8.346536521287812e-06, "loss": 18.4392, "step": 15789 }, { "epoch": 0.2886285118906173, "grad_norm": 6.827000463227836, "learning_rate": 8.346316583087088e-06, "loss": 17.6463, "step": 15790 }, { "epoch": 0.2886467910870638, "grad_norm": 7.2689956750364555, "learning_rate": 8.346096633157816e-06, "loss": 17.6978, "step": 15791 }, { "epoch": 0.28866507028351035, "grad_norm": 9.560304213282526, "learning_rate": 8.345876671500766e-06, "loss": 18.0112, "step": 15792 }, { "epoch": 0.2886833494799569, "grad_norm": 6.371597099476278, "learning_rate": 8.345656698116708e-06, "loss": 17.3692, "step": 15793 }, { "epoch": 0.28870162867640337, "grad_norm": 5.686262289011201, "learning_rate": 8.345436713006416e-06, "loss": 16.9762, "step": 15794 }, { "epoch": 0.2887199078728499, "grad_norm": 7.621295718506177, "learning_rate": 8.345216716170656e-06, "loss": 17.5674, "step": 15795 }, { "epoch": 0.28873818706929644, "grad_norm": 7.620971429766454, "learning_rate": 8.344996707610202e-06, "loss": 17.7524, "step": 15796 }, { "epoch": 0.288756466265743, "grad_norm": 6.191796140829016, "learning_rate": 8.344776687325825e-06, "loss": 17.313, "step": 15797 }, { "epoch": 0.2887747454621895, "grad_norm": 6.181568030098211, "learning_rate": 8.344556655318296e-06, "loss": 17.5748, "step": 15798 }, { "epoch": 0.288793024658636, "grad_norm": 6.268887034304854, "learning_rate": 8.344336611588385e-06, "loss": 17.3379, "step": 15799 }, { "epoch": 0.2888113038550825, "grad_norm": 6.341147994667579, "learning_rate": 8.344116556136867e-06, "loss": 17.4737, "step": 15800 }, { "epoch": 0.28882958305152906, "grad_norm": 6.033665314323493, "learning_rate": 8.34389648896451e-06, "loss": 17.6287, "step": 15801 }, { "epoch": 0.2888478622479756, "grad_norm": 6.986899797204273, "learning_rate": 8.343676410072086e-06, "loss": 17.2298, "step": 15802 }, { "epoch": 0.28886614144442213, "grad_norm": 8.022259512284283, "learning_rate": 8.343456319460365e-06, "loss": 18.4381, "step": 15803 }, { "epoch": 0.2888844206408686, "grad_norm": 7.335846744323855, "learning_rate": 8.34323621713012e-06, "loss": 18.1217, "step": 15804 }, { "epoch": 0.28890269983731515, "grad_norm": 5.4181572914117195, "learning_rate": 8.343016103082122e-06, "loss": 17.2419, "step": 15805 }, { "epoch": 0.2889209790337617, "grad_norm": 7.668717511255242, "learning_rate": 8.342795977317144e-06, "loss": 18.0679, "step": 15806 }, { "epoch": 0.2889392582302082, "grad_norm": 6.007362244453671, "learning_rate": 8.342575839835954e-06, "loss": 17.324, "step": 15807 }, { "epoch": 0.2889575374266547, "grad_norm": 5.766159683698141, "learning_rate": 8.342355690639329e-06, "loss": 17.1475, "step": 15808 }, { "epoch": 0.28897581662310123, "grad_norm": 7.18569407857634, "learning_rate": 8.342135529728036e-06, "loss": 18.1369, "step": 15809 }, { "epoch": 0.28899409581954777, "grad_norm": 8.230697519035877, "learning_rate": 8.341915357102846e-06, "loss": 18.0357, "step": 15810 }, { "epoch": 0.2890123750159943, "grad_norm": 6.858098229288025, "learning_rate": 8.341695172764533e-06, "loss": 17.6922, "step": 15811 }, { "epoch": 0.28903065421244084, "grad_norm": 6.996834134548224, "learning_rate": 8.34147497671387e-06, "loss": 17.7166, "step": 15812 }, { "epoch": 0.2890489334088873, "grad_norm": 7.044271065411338, "learning_rate": 8.341254768951627e-06, "loss": 17.9316, "step": 15813 }, { "epoch": 0.28906721260533386, "grad_norm": 6.186792034925866, "learning_rate": 8.341034549478575e-06, "loss": 17.378, "step": 15814 }, { "epoch": 0.2890854918017804, "grad_norm": 6.568810237734127, "learning_rate": 8.340814318295488e-06, "loss": 17.3772, "step": 15815 }, { "epoch": 0.2891037709982269, "grad_norm": 7.044154342767684, "learning_rate": 8.340594075403137e-06, "loss": 17.8485, "step": 15816 }, { "epoch": 0.28912205019467346, "grad_norm": 5.307773036124306, "learning_rate": 8.340373820802292e-06, "loss": 17.0261, "step": 15817 }, { "epoch": 0.28914032939111994, "grad_norm": 6.701566476623037, "learning_rate": 8.340153554493727e-06, "loss": 17.7325, "step": 15818 }, { "epoch": 0.2891586085875665, "grad_norm": 7.622078532809152, "learning_rate": 8.339933276478215e-06, "loss": 17.7651, "step": 15819 }, { "epoch": 0.289176887784013, "grad_norm": 7.24709114490203, "learning_rate": 8.339712986756524e-06, "loss": 18.1042, "step": 15820 }, { "epoch": 0.28919516698045955, "grad_norm": 6.313386930704077, "learning_rate": 8.339492685329431e-06, "loss": 17.5091, "step": 15821 }, { "epoch": 0.2892134461769061, "grad_norm": 7.392391100618513, "learning_rate": 8.339272372197707e-06, "loss": 17.9825, "step": 15822 }, { "epoch": 0.28923172537335257, "grad_norm": 6.142106388509246, "learning_rate": 8.339052047362122e-06, "loss": 17.5634, "step": 15823 }, { "epoch": 0.2892500045697991, "grad_norm": 5.677965210339009, "learning_rate": 8.338831710823448e-06, "loss": 17.2084, "step": 15824 }, { "epoch": 0.28926828376624564, "grad_norm": 6.277885906577304, "learning_rate": 8.338611362582458e-06, "loss": 17.598, "step": 15825 }, { "epoch": 0.2892865629626922, "grad_norm": 6.359339975831886, "learning_rate": 8.338391002639927e-06, "loss": 17.3931, "step": 15826 }, { "epoch": 0.2893048421591387, "grad_norm": 6.134284777287734, "learning_rate": 8.338170630996625e-06, "loss": 17.5104, "step": 15827 }, { "epoch": 0.2893231213555852, "grad_norm": 7.572483903418803, "learning_rate": 8.337950247653323e-06, "loss": 17.9657, "step": 15828 }, { "epoch": 0.2893414005520317, "grad_norm": 6.421304801092773, "learning_rate": 8.337729852610797e-06, "loss": 17.6365, "step": 15829 }, { "epoch": 0.28935967974847826, "grad_norm": 8.723364308966541, "learning_rate": 8.337509445869818e-06, "loss": 18.4818, "step": 15830 }, { "epoch": 0.2893779589449248, "grad_norm": 6.2155569847895515, "learning_rate": 8.337289027431156e-06, "loss": 17.3669, "step": 15831 }, { "epoch": 0.28939623814137133, "grad_norm": 6.416649729972007, "learning_rate": 8.337068597295585e-06, "loss": 17.4181, "step": 15832 }, { "epoch": 0.2894145173378178, "grad_norm": 8.053614383827934, "learning_rate": 8.33684815546388e-06, "loss": 18.5884, "step": 15833 }, { "epoch": 0.28943279653426435, "grad_norm": 7.767476813720748, "learning_rate": 8.336627701936813e-06, "loss": 17.7285, "step": 15834 }, { "epoch": 0.2894510757307109, "grad_norm": 7.30343678621806, "learning_rate": 8.336407236715152e-06, "loss": 17.9958, "step": 15835 }, { "epoch": 0.2894693549271574, "grad_norm": 6.469256363024108, "learning_rate": 8.336186759799675e-06, "loss": 17.4907, "step": 15836 }, { "epoch": 0.28948763412360395, "grad_norm": 7.4002692280470175, "learning_rate": 8.335966271191154e-06, "loss": 17.884, "step": 15837 }, { "epoch": 0.28950591332005043, "grad_norm": 7.020862962918426, "learning_rate": 8.335745770890359e-06, "loss": 17.6649, "step": 15838 }, { "epoch": 0.28952419251649697, "grad_norm": 5.913564074879372, "learning_rate": 8.335525258898065e-06, "loss": 17.2542, "step": 15839 }, { "epoch": 0.2895424717129435, "grad_norm": 6.964672161097862, "learning_rate": 8.335304735215044e-06, "loss": 17.5509, "step": 15840 }, { "epoch": 0.28956075090939004, "grad_norm": 7.032762438823331, "learning_rate": 8.33508419984207e-06, "loss": 18.202, "step": 15841 }, { "epoch": 0.2895790301058365, "grad_norm": 7.0709440844314, "learning_rate": 8.334863652779914e-06, "loss": 17.6258, "step": 15842 }, { "epoch": 0.28959730930228306, "grad_norm": 7.833117486528488, "learning_rate": 8.334643094029354e-06, "loss": 17.5652, "step": 15843 }, { "epoch": 0.2896155884987296, "grad_norm": 7.707687027103323, "learning_rate": 8.334422523591154e-06, "loss": 17.7136, "step": 15844 }, { "epoch": 0.28963386769517613, "grad_norm": 8.420145673153188, "learning_rate": 8.334201941466096e-06, "loss": 18.1864, "step": 15845 }, { "epoch": 0.28965214689162266, "grad_norm": 5.708632640111591, "learning_rate": 8.333981347654947e-06, "loss": 17.0223, "step": 15846 }, { "epoch": 0.28967042608806914, "grad_norm": 5.904381181598544, "learning_rate": 8.333760742158485e-06, "loss": 17.3161, "step": 15847 }, { "epoch": 0.2896887052845157, "grad_norm": 6.625647036338524, "learning_rate": 8.333540124977482e-06, "loss": 17.3811, "step": 15848 }, { "epoch": 0.2897069844809622, "grad_norm": 8.40824991275657, "learning_rate": 8.333319496112707e-06, "loss": 18.5501, "step": 15849 }, { "epoch": 0.28972526367740875, "grad_norm": 5.080051963454226, "learning_rate": 8.333098855564938e-06, "loss": 16.8455, "step": 15850 }, { "epoch": 0.2897435428738553, "grad_norm": 6.884719320562312, "learning_rate": 8.332878203334946e-06, "loss": 17.5062, "step": 15851 }, { "epoch": 0.28976182207030177, "grad_norm": 8.473160270872027, "learning_rate": 8.332657539423505e-06, "loss": 17.6787, "step": 15852 }, { "epoch": 0.2897801012667483, "grad_norm": 6.920592389283835, "learning_rate": 8.33243686383139e-06, "loss": 17.5786, "step": 15853 }, { "epoch": 0.28979838046319484, "grad_norm": 7.619456747101325, "learning_rate": 8.332216176559371e-06, "loss": 18.207, "step": 15854 }, { "epoch": 0.2898166596596414, "grad_norm": 7.784304950394402, "learning_rate": 8.331995477608225e-06, "loss": 18.1824, "step": 15855 }, { "epoch": 0.2898349388560879, "grad_norm": 8.779377212254373, "learning_rate": 8.331774766978723e-06, "loss": 18.0307, "step": 15856 }, { "epoch": 0.2898532180525344, "grad_norm": 7.566250554103422, "learning_rate": 8.331554044671641e-06, "loss": 17.8003, "step": 15857 }, { "epoch": 0.2898714972489809, "grad_norm": 6.331108699259542, "learning_rate": 8.331333310687751e-06, "loss": 17.5381, "step": 15858 }, { "epoch": 0.28988977644542746, "grad_norm": 5.620430241750999, "learning_rate": 8.331112565027825e-06, "loss": 17.1739, "step": 15859 }, { "epoch": 0.289908055641874, "grad_norm": 7.172176970203313, "learning_rate": 8.33089180769264e-06, "loss": 17.8289, "step": 15860 }, { "epoch": 0.28992633483832053, "grad_norm": 6.1488761230885824, "learning_rate": 8.330671038682967e-06, "loss": 17.2673, "step": 15861 }, { "epoch": 0.289944614034767, "grad_norm": 6.707370241170042, "learning_rate": 8.330450257999582e-06, "loss": 17.4739, "step": 15862 }, { "epoch": 0.28996289323121355, "grad_norm": 7.522112889595386, "learning_rate": 8.330229465643257e-06, "loss": 17.9497, "step": 15863 }, { "epoch": 0.2899811724276601, "grad_norm": 7.0785085349866925, "learning_rate": 8.330008661614769e-06, "loss": 17.644, "step": 15864 }, { "epoch": 0.2899994516241066, "grad_norm": 5.762830020144023, "learning_rate": 8.329787845914888e-06, "loss": 17.4525, "step": 15865 }, { "epoch": 0.29001773082055315, "grad_norm": 6.201282452027288, "learning_rate": 8.32956701854439e-06, "loss": 17.4657, "step": 15866 }, { "epoch": 0.29003601001699963, "grad_norm": 6.9848531405467815, "learning_rate": 8.329346179504046e-06, "loss": 17.948, "step": 15867 }, { "epoch": 0.29005428921344617, "grad_norm": 6.343855021026234, "learning_rate": 8.329125328794635e-06, "loss": 17.1982, "step": 15868 }, { "epoch": 0.2900725684098927, "grad_norm": 7.5193987271791585, "learning_rate": 8.328904466416929e-06, "loss": 18.4442, "step": 15869 }, { "epoch": 0.29009084760633924, "grad_norm": 6.16276905751907, "learning_rate": 8.3286835923717e-06, "loss": 17.5355, "step": 15870 }, { "epoch": 0.2901091268027858, "grad_norm": 6.806165075539182, "learning_rate": 8.328462706659726e-06, "loss": 17.5512, "step": 15871 }, { "epoch": 0.29012740599923226, "grad_norm": 7.470899654380862, "learning_rate": 8.328241809281776e-06, "loss": 17.7824, "step": 15872 }, { "epoch": 0.2901456851956788, "grad_norm": 7.926610709436997, "learning_rate": 8.32802090023863e-06, "loss": 18.0969, "step": 15873 }, { "epoch": 0.29016396439212533, "grad_norm": 5.75081515714187, "learning_rate": 8.327799979531058e-06, "loss": 17.2268, "step": 15874 }, { "epoch": 0.29018224358857186, "grad_norm": 6.982746896800702, "learning_rate": 8.327579047159837e-06, "loss": 17.4291, "step": 15875 }, { "epoch": 0.29020052278501834, "grad_norm": 5.570328508642877, "learning_rate": 8.32735810312574e-06, "loss": 17.2382, "step": 15876 }, { "epoch": 0.2902188019814649, "grad_norm": 6.244755244437659, "learning_rate": 8.32713714742954e-06, "loss": 17.2208, "step": 15877 }, { "epoch": 0.2902370811779114, "grad_norm": 6.844615777860132, "learning_rate": 8.326916180072015e-06, "loss": 17.7951, "step": 15878 }, { "epoch": 0.29025536037435795, "grad_norm": 6.211093470232108, "learning_rate": 8.326695201053937e-06, "loss": 17.3398, "step": 15879 }, { "epoch": 0.2902736395708045, "grad_norm": 7.295018932536425, "learning_rate": 8.32647421037608e-06, "loss": 17.6137, "step": 15880 }, { "epoch": 0.29029191876725097, "grad_norm": 6.186315851716792, "learning_rate": 8.326253208039222e-06, "loss": 17.4425, "step": 15881 }, { "epoch": 0.2903101979636975, "grad_norm": 6.9896372933184345, "learning_rate": 8.326032194044132e-06, "loss": 17.6139, "step": 15882 }, { "epoch": 0.29032847716014404, "grad_norm": 5.796558153213868, "learning_rate": 8.325811168391589e-06, "loss": 17.2714, "step": 15883 }, { "epoch": 0.2903467563565906, "grad_norm": 5.973745328774716, "learning_rate": 8.325590131082367e-06, "loss": 17.4456, "step": 15884 }, { "epoch": 0.2903650355530371, "grad_norm": 7.042997243532951, "learning_rate": 8.32536908211724e-06, "loss": 17.5358, "step": 15885 }, { "epoch": 0.2903833147494836, "grad_norm": 6.664916227130399, "learning_rate": 8.325148021496982e-06, "loss": 17.671, "step": 15886 }, { "epoch": 0.2904015939459301, "grad_norm": 6.326458415158664, "learning_rate": 8.32492694922237e-06, "loss": 17.5055, "step": 15887 }, { "epoch": 0.29041987314237666, "grad_norm": 6.8060649565236435, "learning_rate": 8.324705865294178e-06, "loss": 17.7007, "step": 15888 }, { "epoch": 0.2904381523388232, "grad_norm": 6.149219705617901, "learning_rate": 8.324484769713179e-06, "loss": 17.4992, "step": 15889 }, { "epoch": 0.29045643153526973, "grad_norm": 6.0369480476305455, "learning_rate": 8.32426366248015e-06, "loss": 17.3001, "step": 15890 }, { "epoch": 0.2904747107317162, "grad_norm": 6.877425771103101, "learning_rate": 8.324042543595866e-06, "loss": 17.8269, "step": 15891 }, { "epoch": 0.29049298992816275, "grad_norm": 5.443113205173727, "learning_rate": 8.3238214130611e-06, "loss": 17.36, "step": 15892 }, { "epoch": 0.2905112691246093, "grad_norm": 6.024407802871284, "learning_rate": 8.323600270876628e-06, "loss": 17.4058, "step": 15893 }, { "epoch": 0.2905295483210558, "grad_norm": 6.59169659842555, "learning_rate": 8.323379117043226e-06, "loss": 17.6154, "step": 15894 }, { "epoch": 0.29054782751750236, "grad_norm": 8.11732513575511, "learning_rate": 8.32315795156167e-06, "loss": 18.2892, "step": 15895 }, { "epoch": 0.29056610671394884, "grad_norm": 6.8012147171376975, "learning_rate": 8.322936774432733e-06, "loss": 17.4193, "step": 15896 }, { "epoch": 0.29058438591039537, "grad_norm": 6.4210491553833124, "learning_rate": 8.322715585657191e-06, "loss": 17.922, "step": 15897 }, { "epoch": 0.2906026651068419, "grad_norm": 6.96120011220932, "learning_rate": 8.322494385235818e-06, "loss": 17.9317, "step": 15898 }, { "epoch": 0.29062094430328844, "grad_norm": 6.308022217355439, "learning_rate": 8.322273173169392e-06, "loss": 17.5599, "step": 15899 }, { "epoch": 0.290639223499735, "grad_norm": 6.59785364956779, "learning_rate": 8.322051949458686e-06, "loss": 17.4036, "step": 15900 }, { "epoch": 0.29065750269618146, "grad_norm": 6.912208280964955, "learning_rate": 8.321830714104476e-06, "loss": 17.7087, "step": 15901 }, { "epoch": 0.290675781892628, "grad_norm": 7.600567631357769, "learning_rate": 8.321609467107538e-06, "loss": 18.1136, "step": 15902 }, { "epoch": 0.29069406108907453, "grad_norm": 5.760471125677956, "learning_rate": 8.321388208468647e-06, "loss": 17.3006, "step": 15903 }, { "epoch": 0.29071234028552106, "grad_norm": 6.084253569765747, "learning_rate": 8.321166938188578e-06, "loss": 17.0782, "step": 15904 }, { "epoch": 0.2907306194819676, "grad_norm": 5.168974005331681, "learning_rate": 8.320945656268109e-06, "loss": 17.0572, "step": 15905 }, { "epoch": 0.2907488986784141, "grad_norm": 5.221581518003498, "learning_rate": 8.320724362708013e-06, "loss": 17.0476, "step": 15906 }, { "epoch": 0.2907671778748606, "grad_norm": 7.866374632689439, "learning_rate": 8.320503057509064e-06, "loss": 17.6504, "step": 15907 }, { "epoch": 0.29078545707130715, "grad_norm": 6.143672742519724, "learning_rate": 8.320281740672042e-06, "loss": 17.6234, "step": 15908 }, { "epoch": 0.2908037362677537, "grad_norm": 6.609740979769964, "learning_rate": 8.32006041219772e-06, "loss": 17.6301, "step": 15909 }, { "epoch": 0.29082201546420017, "grad_norm": 6.571069191355662, "learning_rate": 8.319839072086876e-06, "loss": 17.6319, "step": 15910 }, { "epoch": 0.2908402946606467, "grad_norm": 7.908554049421495, "learning_rate": 8.31961772034028e-06, "loss": 17.9841, "step": 15911 }, { "epoch": 0.29085857385709324, "grad_norm": 6.285597972319262, "learning_rate": 8.319396356958716e-06, "loss": 17.3774, "step": 15912 }, { "epoch": 0.2908768530535398, "grad_norm": 7.75251298088019, "learning_rate": 8.319174981942955e-06, "loss": 18.1743, "step": 15913 }, { "epoch": 0.2908951322499863, "grad_norm": 6.516258591895095, "learning_rate": 8.318953595293772e-06, "loss": 17.6466, "step": 15914 }, { "epoch": 0.2909134114464328, "grad_norm": 6.639266152220219, "learning_rate": 8.318732197011945e-06, "loss": 17.7367, "step": 15915 }, { "epoch": 0.2909316906428793, "grad_norm": 5.321623586949735, "learning_rate": 8.318510787098252e-06, "loss": 17.0874, "step": 15916 }, { "epoch": 0.29094996983932586, "grad_norm": 6.611776542798462, "learning_rate": 8.318289365553465e-06, "loss": 17.68, "step": 15917 }, { "epoch": 0.2909682490357724, "grad_norm": 7.1980416374132945, "learning_rate": 8.318067932378361e-06, "loss": 17.6493, "step": 15918 }, { "epoch": 0.29098652823221893, "grad_norm": 6.15059107590305, "learning_rate": 8.317846487573717e-06, "loss": 17.3689, "step": 15919 }, { "epoch": 0.2910048074286654, "grad_norm": 6.84114950642885, "learning_rate": 8.31762503114031e-06, "loss": 17.456, "step": 15920 }, { "epoch": 0.29102308662511195, "grad_norm": 5.920430701908309, "learning_rate": 8.317403563078915e-06, "loss": 17.1576, "step": 15921 }, { "epoch": 0.2910413658215585, "grad_norm": 6.375604895983556, "learning_rate": 8.317182083390307e-06, "loss": 17.4953, "step": 15922 }, { "epoch": 0.291059645018005, "grad_norm": 6.330529357697266, "learning_rate": 8.316960592075267e-06, "loss": 17.5169, "step": 15923 }, { "epoch": 0.29107792421445156, "grad_norm": 7.647725504781784, "learning_rate": 8.316739089134564e-06, "loss": 18.0041, "step": 15924 }, { "epoch": 0.29109620341089804, "grad_norm": 6.060455983108166, "learning_rate": 8.316517574568981e-06, "loss": 17.3912, "step": 15925 }, { "epoch": 0.29111448260734457, "grad_norm": 7.1133631092179135, "learning_rate": 8.31629604837929e-06, "loss": 17.7133, "step": 15926 }, { "epoch": 0.2911327618037911, "grad_norm": 4.92741934288932, "learning_rate": 8.31607451056627e-06, "loss": 16.7877, "step": 15927 }, { "epoch": 0.29115104100023764, "grad_norm": 5.807936016586364, "learning_rate": 8.315852961130697e-06, "loss": 17.3348, "step": 15928 }, { "epoch": 0.2911693201966842, "grad_norm": 6.720183569198894, "learning_rate": 8.315631400073346e-06, "loss": 17.5512, "step": 15929 }, { "epoch": 0.29118759939313066, "grad_norm": 6.337216179019596, "learning_rate": 8.315409827394996e-06, "loss": 17.5489, "step": 15930 }, { "epoch": 0.2912058785895772, "grad_norm": 8.794982143667239, "learning_rate": 8.315188243096421e-06, "loss": 19.0212, "step": 15931 }, { "epoch": 0.29122415778602373, "grad_norm": 7.48505748242081, "learning_rate": 8.3149666471784e-06, "loss": 17.9526, "step": 15932 }, { "epoch": 0.29124243698247027, "grad_norm": 5.8641357013429, "learning_rate": 8.314745039641708e-06, "loss": 17.4658, "step": 15933 }, { "epoch": 0.2912607161789168, "grad_norm": 6.536095360010347, "learning_rate": 8.314523420487122e-06, "loss": 17.5087, "step": 15934 }, { "epoch": 0.2912789953753633, "grad_norm": 6.992953286405548, "learning_rate": 8.314301789715419e-06, "loss": 17.6882, "step": 15935 }, { "epoch": 0.2912972745718098, "grad_norm": 5.702328158462788, "learning_rate": 8.314080147327376e-06, "loss": 17.1967, "step": 15936 }, { "epoch": 0.29131555376825635, "grad_norm": 6.203032349576431, "learning_rate": 8.31385849332377e-06, "loss": 17.5671, "step": 15937 }, { "epoch": 0.2913338329647029, "grad_norm": 7.527407895431283, "learning_rate": 8.313636827705376e-06, "loss": 17.6928, "step": 15938 }, { "epoch": 0.2913521121611494, "grad_norm": 6.583331630723015, "learning_rate": 8.313415150472974e-06, "loss": 17.6124, "step": 15939 }, { "epoch": 0.2913703913575959, "grad_norm": 5.763251944549388, "learning_rate": 8.31319346162734e-06, "loss": 17.0309, "step": 15940 }, { "epoch": 0.29138867055404244, "grad_norm": 5.955730027311099, "learning_rate": 8.31297176116925e-06, "loss": 17.1212, "step": 15941 }, { "epoch": 0.291406949750489, "grad_norm": 9.129691374230369, "learning_rate": 8.31275004909948e-06, "loss": 18.2103, "step": 15942 }, { "epoch": 0.2914252289469355, "grad_norm": 5.801987910334633, "learning_rate": 8.31252832541881e-06, "loss": 17.2921, "step": 15943 }, { "epoch": 0.291443508143382, "grad_norm": 5.2325612262252825, "learning_rate": 8.312306590128015e-06, "loss": 16.7652, "step": 15944 }, { "epoch": 0.2914617873398285, "grad_norm": 6.368534594401016, "learning_rate": 8.312084843227873e-06, "loss": 17.443, "step": 15945 }, { "epoch": 0.29148006653627506, "grad_norm": 6.093266259069604, "learning_rate": 8.311863084719161e-06, "loss": 17.4423, "step": 15946 }, { "epoch": 0.2914983457327216, "grad_norm": 7.184285949649789, "learning_rate": 8.311641314602657e-06, "loss": 17.6637, "step": 15947 }, { "epoch": 0.29151662492916813, "grad_norm": 6.639613039608739, "learning_rate": 8.311419532879137e-06, "loss": 17.3799, "step": 15948 }, { "epoch": 0.2915349041256146, "grad_norm": 6.936285137221981, "learning_rate": 8.311197739549378e-06, "loss": 18.0556, "step": 15949 }, { "epoch": 0.29155318332206115, "grad_norm": 7.292562960656055, "learning_rate": 8.31097593461416e-06, "loss": 18.0502, "step": 15950 }, { "epoch": 0.2915714625185077, "grad_norm": 7.423271528676154, "learning_rate": 8.310754118074258e-06, "loss": 17.7295, "step": 15951 }, { "epoch": 0.2915897417149542, "grad_norm": 6.857453059461951, "learning_rate": 8.310532289930449e-06, "loss": 17.9657, "step": 15952 }, { "epoch": 0.29160802091140076, "grad_norm": 6.849705261001428, "learning_rate": 8.310310450183512e-06, "loss": 17.6968, "step": 15953 }, { "epoch": 0.29162630010784724, "grad_norm": 6.846373698840621, "learning_rate": 8.310088598834226e-06, "loss": 17.7055, "step": 15954 }, { "epoch": 0.29164457930429377, "grad_norm": 5.983680416740459, "learning_rate": 8.309866735883365e-06, "loss": 17.2364, "step": 15955 }, { "epoch": 0.2916628585007403, "grad_norm": 7.717862622944557, "learning_rate": 8.309644861331707e-06, "loss": 18.4288, "step": 15956 }, { "epoch": 0.29168113769718684, "grad_norm": 6.166111719059242, "learning_rate": 8.309422975180036e-06, "loss": 17.4886, "step": 15957 }, { "epoch": 0.2916994168936334, "grad_norm": 6.259664873126532, "learning_rate": 8.30920107742912e-06, "loss": 17.6488, "step": 15958 }, { "epoch": 0.29171769609007986, "grad_norm": 5.640824133826954, "learning_rate": 8.308979168079742e-06, "loss": 17.5109, "step": 15959 }, { "epoch": 0.2917359752865264, "grad_norm": 5.6084383085018, "learning_rate": 8.308757247132679e-06, "loss": 17.3469, "step": 15960 }, { "epoch": 0.29175425448297293, "grad_norm": 5.7750588523022515, "learning_rate": 8.30853531458871e-06, "loss": 17.3453, "step": 15961 }, { "epoch": 0.29177253367941947, "grad_norm": 7.083811626591957, "learning_rate": 8.308313370448611e-06, "loss": 17.7419, "step": 15962 }, { "epoch": 0.291790812875866, "grad_norm": 7.035315389410472, "learning_rate": 8.308091414713162e-06, "loss": 17.8954, "step": 15963 }, { "epoch": 0.2918090920723125, "grad_norm": 6.371758361429908, "learning_rate": 8.307869447383139e-06, "loss": 17.5146, "step": 15964 }, { "epoch": 0.291827371268759, "grad_norm": 4.803194078634297, "learning_rate": 8.30764746845932e-06, "loss": 16.8787, "step": 15965 }, { "epoch": 0.29184565046520555, "grad_norm": 6.802235942204773, "learning_rate": 8.307425477942485e-06, "loss": 17.6656, "step": 15966 }, { "epoch": 0.2918639296616521, "grad_norm": 7.262053207815283, "learning_rate": 8.30720347583341e-06, "loss": 18.0788, "step": 15967 }, { "epoch": 0.2918822088580986, "grad_norm": 7.844899505760226, "learning_rate": 8.306981462132873e-06, "loss": 18.1277, "step": 15968 }, { "epoch": 0.2919004880545451, "grad_norm": 6.267639267917648, "learning_rate": 8.306759436841653e-06, "loss": 17.4688, "step": 15969 }, { "epoch": 0.29191876725099164, "grad_norm": 6.941059769945796, "learning_rate": 8.306537399960528e-06, "loss": 17.8929, "step": 15970 }, { "epoch": 0.2919370464474382, "grad_norm": 7.0941521241078, "learning_rate": 8.306315351490279e-06, "loss": 17.9915, "step": 15971 }, { "epoch": 0.2919553256438847, "grad_norm": 6.878623015133455, "learning_rate": 8.30609329143168e-06, "loss": 17.9059, "step": 15972 }, { "epoch": 0.29197360484033125, "grad_norm": 5.563252523447362, "learning_rate": 8.305871219785509e-06, "loss": 17.025, "step": 15973 }, { "epoch": 0.2919918840367777, "grad_norm": 5.711905719455599, "learning_rate": 8.30564913655255e-06, "loss": 17.2821, "step": 15974 }, { "epoch": 0.29201016323322426, "grad_norm": 6.946104489699604, "learning_rate": 8.305427041733573e-06, "loss": 17.7724, "step": 15975 }, { "epoch": 0.2920284424296708, "grad_norm": 5.92126910155695, "learning_rate": 8.305204935329365e-06, "loss": 17.3468, "step": 15976 }, { "epoch": 0.29204672162611733, "grad_norm": 9.455120274795474, "learning_rate": 8.304982817340699e-06, "loss": 18.5487, "step": 15977 }, { "epoch": 0.2920650008225638, "grad_norm": 6.930638307766184, "learning_rate": 8.304760687768355e-06, "loss": 17.6471, "step": 15978 }, { "epoch": 0.29208328001901035, "grad_norm": 7.163525702393523, "learning_rate": 8.304538546613111e-06, "loss": 18.0668, "step": 15979 }, { "epoch": 0.2921015592154569, "grad_norm": 6.304269962437889, "learning_rate": 8.304316393875746e-06, "loss": 17.4759, "step": 15980 }, { "epoch": 0.2921198384119034, "grad_norm": 6.6073744214449395, "learning_rate": 8.304094229557041e-06, "loss": 17.7584, "step": 15981 }, { "epoch": 0.29213811760834996, "grad_norm": 6.917237855040101, "learning_rate": 8.30387205365777e-06, "loss": 17.6626, "step": 15982 }, { "epoch": 0.29215639680479644, "grad_norm": 7.831925164171523, "learning_rate": 8.303649866178716e-06, "loss": 18.0621, "step": 15983 }, { "epoch": 0.29217467600124297, "grad_norm": 7.766589484273645, "learning_rate": 8.303427667120655e-06, "loss": 18.0887, "step": 15984 }, { "epoch": 0.2921929551976895, "grad_norm": 7.177838681952092, "learning_rate": 8.303205456484367e-06, "loss": 17.45, "step": 15985 }, { "epoch": 0.29221123439413604, "grad_norm": 6.036841186751471, "learning_rate": 8.30298323427063e-06, "loss": 17.4499, "step": 15986 }, { "epoch": 0.2922295135905826, "grad_norm": 7.082155768465045, "learning_rate": 8.302761000480223e-06, "loss": 17.826, "step": 15987 }, { "epoch": 0.29224779278702906, "grad_norm": 6.580597836497529, "learning_rate": 8.302538755113927e-06, "loss": 17.5262, "step": 15988 }, { "epoch": 0.2922660719834756, "grad_norm": 8.288081416192268, "learning_rate": 8.302316498172518e-06, "loss": 17.7942, "step": 15989 }, { "epoch": 0.29228435117992213, "grad_norm": 6.33771745536431, "learning_rate": 8.302094229656776e-06, "loss": 17.4197, "step": 15990 }, { "epoch": 0.29230263037636867, "grad_norm": 7.04313423853878, "learning_rate": 8.30187194956748e-06, "loss": 17.6455, "step": 15991 }, { "epoch": 0.2923209095728152, "grad_norm": 5.712160325010722, "learning_rate": 8.30164965790541e-06, "loss": 17.2575, "step": 15992 }, { "epoch": 0.2923391887692617, "grad_norm": 7.276121630883649, "learning_rate": 8.301427354671345e-06, "loss": 18.0208, "step": 15993 }, { "epoch": 0.2923574679657082, "grad_norm": 6.383029706669919, "learning_rate": 8.301205039866063e-06, "loss": 17.774, "step": 15994 }, { "epoch": 0.29237574716215475, "grad_norm": 6.627319589607053, "learning_rate": 8.300982713490344e-06, "loss": 17.5862, "step": 15995 }, { "epoch": 0.2923940263586013, "grad_norm": 7.220860590953939, "learning_rate": 8.300760375544967e-06, "loss": 17.9415, "step": 15996 }, { "epoch": 0.2924123055550478, "grad_norm": 6.978643408968633, "learning_rate": 8.300538026030712e-06, "loss": 17.569, "step": 15997 }, { "epoch": 0.2924305847514943, "grad_norm": 6.531278179387211, "learning_rate": 8.300315664948355e-06, "loss": 17.5871, "step": 15998 }, { "epoch": 0.29244886394794084, "grad_norm": 6.357891325954263, "learning_rate": 8.300093292298681e-06, "loss": 17.3975, "step": 15999 }, { "epoch": 0.2924671431443874, "grad_norm": 8.095016958306415, "learning_rate": 8.299870908082465e-06, "loss": 18.3559, "step": 16000 }, { "epoch": 0.2924854223408339, "grad_norm": 7.719323042506764, "learning_rate": 8.299648512300487e-06, "loss": 18.284, "step": 16001 }, { "epoch": 0.29250370153728045, "grad_norm": 7.570844006464962, "learning_rate": 8.29942610495353e-06, "loss": 17.994, "step": 16002 }, { "epoch": 0.2925219807337269, "grad_norm": 7.601757116027508, "learning_rate": 8.299203686042367e-06, "loss": 17.9397, "step": 16003 }, { "epoch": 0.29254025993017346, "grad_norm": 6.740646743610164, "learning_rate": 8.298981255567785e-06, "loss": 17.8283, "step": 16004 }, { "epoch": 0.29255853912662, "grad_norm": 6.308900735552201, "learning_rate": 8.298758813530559e-06, "loss": 17.4854, "step": 16005 }, { "epoch": 0.29257681832306653, "grad_norm": 7.362631597178887, "learning_rate": 8.298536359931469e-06, "loss": 18.0763, "step": 16006 }, { "epoch": 0.29259509751951307, "grad_norm": 5.7634680549419235, "learning_rate": 8.298313894771294e-06, "loss": 17.2389, "step": 16007 }, { "epoch": 0.29261337671595955, "grad_norm": 6.239958089881529, "learning_rate": 8.298091418050817e-06, "loss": 17.3514, "step": 16008 }, { "epoch": 0.2926316559124061, "grad_norm": 9.225766618544824, "learning_rate": 8.297868929770815e-06, "loss": 18.6185, "step": 16009 }, { "epoch": 0.2926499351088526, "grad_norm": 5.553300930458291, "learning_rate": 8.29764642993207e-06, "loss": 17.0238, "step": 16010 }, { "epoch": 0.29266821430529916, "grad_norm": 8.737045883777931, "learning_rate": 8.29742391853536e-06, "loss": 17.7353, "step": 16011 }, { "epoch": 0.29268649350174564, "grad_norm": 8.113213857813278, "learning_rate": 8.297201395581463e-06, "loss": 18.0144, "step": 16012 }, { "epoch": 0.2927047726981922, "grad_norm": 6.930256829166329, "learning_rate": 8.296978861071163e-06, "loss": 17.8505, "step": 16013 }, { "epoch": 0.2927230518946387, "grad_norm": 5.76729045647949, "learning_rate": 8.296756315005237e-06, "loss": 17.3446, "step": 16014 }, { "epoch": 0.29274133109108524, "grad_norm": 6.852138600941832, "learning_rate": 8.296533757384467e-06, "loss": 17.7116, "step": 16015 }, { "epoch": 0.2927596102875318, "grad_norm": 7.1098600478062535, "learning_rate": 8.296311188209634e-06, "loss": 17.5953, "step": 16016 }, { "epoch": 0.29277788948397826, "grad_norm": 6.152991216930885, "learning_rate": 8.296088607481514e-06, "loss": 17.6729, "step": 16017 }, { "epoch": 0.2927961686804248, "grad_norm": 6.699902350562617, "learning_rate": 8.295866015200889e-06, "loss": 17.5917, "step": 16018 }, { "epoch": 0.29281444787687133, "grad_norm": 6.718593645829086, "learning_rate": 8.29564341136854e-06, "loss": 17.4667, "step": 16019 }, { "epoch": 0.29283272707331787, "grad_norm": 6.238437528064015, "learning_rate": 8.29542079598525e-06, "loss": 17.4897, "step": 16020 }, { "epoch": 0.2928510062697644, "grad_norm": 7.803161615971059, "learning_rate": 8.295198169051792e-06, "loss": 17.9566, "step": 16021 }, { "epoch": 0.2928692854662109, "grad_norm": 5.851969317334456, "learning_rate": 8.294975530568952e-06, "loss": 17.2075, "step": 16022 }, { "epoch": 0.2928875646626574, "grad_norm": 6.903866718538288, "learning_rate": 8.29475288053751e-06, "loss": 17.684, "step": 16023 }, { "epoch": 0.29290584385910395, "grad_norm": 9.073972148485757, "learning_rate": 8.294530218958243e-06, "loss": 18.4389, "step": 16024 }, { "epoch": 0.2929241230555505, "grad_norm": 5.92501062690928, "learning_rate": 8.294307545831935e-06, "loss": 17.5402, "step": 16025 }, { "epoch": 0.292942402251997, "grad_norm": 5.2315268846003695, "learning_rate": 8.294084861159363e-06, "loss": 16.9985, "step": 16026 }, { "epoch": 0.2929606814484435, "grad_norm": 6.86822506179636, "learning_rate": 8.293862164941311e-06, "loss": 17.5747, "step": 16027 }, { "epoch": 0.29297896064489004, "grad_norm": 5.756534441651972, "learning_rate": 8.293639457178557e-06, "loss": 17.2154, "step": 16028 }, { "epoch": 0.2929972398413366, "grad_norm": 8.803201480989898, "learning_rate": 8.293416737871882e-06, "loss": 18.1632, "step": 16029 }, { "epoch": 0.2930155190377831, "grad_norm": 7.056439052161994, "learning_rate": 8.29319400702207e-06, "loss": 18.0589, "step": 16030 }, { "epoch": 0.29303379823422965, "grad_norm": 7.8384031209301375, "learning_rate": 8.292971264629895e-06, "loss": 18.3924, "step": 16031 }, { "epoch": 0.29305207743067613, "grad_norm": 5.30923451141389, "learning_rate": 8.292748510696144e-06, "loss": 17.0437, "step": 16032 }, { "epoch": 0.29307035662712266, "grad_norm": 5.420403371338054, "learning_rate": 8.292525745221595e-06, "loss": 17.1702, "step": 16033 }, { "epoch": 0.2930886358235692, "grad_norm": 8.135822152292993, "learning_rate": 8.292302968207028e-06, "loss": 17.872, "step": 16034 }, { "epoch": 0.29310691502001573, "grad_norm": 4.77519323958378, "learning_rate": 8.292080179653225e-06, "loss": 16.8895, "step": 16035 }, { "epoch": 0.29312519421646227, "grad_norm": 6.505618821002592, "learning_rate": 8.291857379560968e-06, "loss": 17.8212, "step": 16036 }, { "epoch": 0.29314347341290875, "grad_norm": 7.792316429728093, "learning_rate": 8.291634567931036e-06, "loss": 17.7504, "step": 16037 }, { "epoch": 0.2931617526093553, "grad_norm": 6.299430618102456, "learning_rate": 8.291411744764209e-06, "loss": 17.4734, "step": 16038 }, { "epoch": 0.2931800318058018, "grad_norm": 7.2576532554125235, "learning_rate": 8.29118891006127e-06, "loss": 17.9509, "step": 16039 }, { "epoch": 0.29319831100224836, "grad_norm": 8.165665908077704, "learning_rate": 8.290966063823e-06, "loss": 18.0751, "step": 16040 }, { "epoch": 0.2932165901986949, "grad_norm": 7.610675332319149, "learning_rate": 8.29074320605018e-06, "loss": 17.8204, "step": 16041 }, { "epoch": 0.2932348693951414, "grad_norm": 6.104090895652702, "learning_rate": 8.290520336743589e-06, "loss": 17.5176, "step": 16042 }, { "epoch": 0.2932531485915879, "grad_norm": 7.843404621359061, "learning_rate": 8.290297455904011e-06, "loss": 18.1972, "step": 16043 }, { "epoch": 0.29327142778803444, "grad_norm": 8.210727392862546, "learning_rate": 8.290074563532227e-06, "loss": 18.4069, "step": 16044 }, { "epoch": 0.293289706984481, "grad_norm": 5.698596075513861, "learning_rate": 8.289851659629014e-06, "loss": 17.1512, "step": 16045 }, { "epoch": 0.29330798618092746, "grad_norm": 7.1254844095808565, "learning_rate": 8.28962874419516e-06, "loss": 17.9916, "step": 16046 }, { "epoch": 0.293326265377374, "grad_norm": 6.408700500381423, "learning_rate": 8.289405817231439e-06, "loss": 17.7296, "step": 16047 }, { "epoch": 0.29334454457382053, "grad_norm": 6.294001819723074, "learning_rate": 8.28918287873864e-06, "loss": 17.5453, "step": 16048 }, { "epoch": 0.29336282377026707, "grad_norm": 8.891507157521445, "learning_rate": 8.288959928717538e-06, "loss": 17.6735, "step": 16049 }, { "epoch": 0.2933811029667136, "grad_norm": 6.445046404182921, "learning_rate": 8.288736967168917e-06, "loss": 17.4465, "step": 16050 }, { "epoch": 0.2933993821631601, "grad_norm": 6.512188189130023, "learning_rate": 8.288513994093558e-06, "loss": 18.1263, "step": 16051 }, { "epoch": 0.2934176613596066, "grad_norm": 6.035160251880754, "learning_rate": 8.288291009492245e-06, "loss": 17.4996, "step": 16052 }, { "epoch": 0.29343594055605315, "grad_norm": 5.99175988250994, "learning_rate": 8.288068013365755e-06, "loss": 17.3569, "step": 16053 }, { "epoch": 0.2934542197524997, "grad_norm": 6.9243016139231015, "learning_rate": 8.287845005714872e-06, "loss": 17.4461, "step": 16054 }, { "epoch": 0.2934724989489462, "grad_norm": 7.028727291165785, "learning_rate": 8.287621986540379e-06, "loss": 17.3805, "step": 16055 }, { "epoch": 0.2934907781453927, "grad_norm": 4.964826885366487, "learning_rate": 8.287398955843056e-06, "loss": 16.9784, "step": 16056 }, { "epoch": 0.29350905734183924, "grad_norm": 5.559467036698685, "learning_rate": 8.287175913623683e-06, "loss": 17.0553, "step": 16057 }, { "epoch": 0.2935273365382858, "grad_norm": 5.561341548724766, "learning_rate": 8.286952859883046e-06, "loss": 17.0916, "step": 16058 }, { "epoch": 0.2935456157347323, "grad_norm": 8.510322733422155, "learning_rate": 8.286729794621924e-06, "loss": 18.8003, "step": 16059 }, { "epoch": 0.29356389493117885, "grad_norm": 7.407605325077076, "learning_rate": 8.286506717841098e-06, "loss": 17.7896, "step": 16060 }, { "epoch": 0.29358217412762533, "grad_norm": 6.351108811363999, "learning_rate": 8.286283629541354e-06, "loss": 17.6701, "step": 16061 }, { "epoch": 0.29360045332407186, "grad_norm": 6.637702791225121, "learning_rate": 8.286060529723467e-06, "loss": 17.6626, "step": 16062 }, { "epoch": 0.2936187325205184, "grad_norm": 6.9148735357227755, "learning_rate": 8.285837418388225e-06, "loss": 17.7434, "step": 16063 }, { "epoch": 0.29363701171696494, "grad_norm": 6.5891439576524755, "learning_rate": 8.285614295536408e-06, "loss": 17.6507, "step": 16064 }, { "epoch": 0.29365529091341147, "grad_norm": 7.0251678746844615, "learning_rate": 8.285391161168798e-06, "loss": 17.5474, "step": 16065 }, { "epoch": 0.29367357010985795, "grad_norm": 7.031143782963527, "learning_rate": 8.285168015286177e-06, "loss": 17.8143, "step": 16066 }, { "epoch": 0.2936918493063045, "grad_norm": 7.893692081698446, "learning_rate": 8.284944857889327e-06, "loss": 18.5228, "step": 16067 }, { "epoch": 0.293710128502751, "grad_norm": 6.516554887842644, "learning_rate": 8.284721688979032e-06, "loss": 17.3555, "step": 16068 }, { "epoch": 0.29372840769919756, "grad_norm": 6.303948742692822, "learning_rate": 8.284498508556072e-06, "loss": 17.4553, "step": 16069 }, { "epoch": 0.2937466868956441, "grad_norm": 8.522657992212642, "learning_rate": 8.284275316621227e-06, "loss": 18.4694, "step": 16070 }, { "epoch": 0.2937649660920906, "grad_norm": 6.756765906445806, "learning_rate": 8.284052113175285e-06, "loss": 17.8483, "step": 16071 }, { "epoch": 0.2937832452885371, "grad_norm": 6.263059521885723, "learning_rate": 8.283828898219025e-06, "loss": 17.4964, "step": 16072 }, { "epoch": 0.29380152448498364, "grad_norm": 6.480726018118758, "learning_rate": 8.283605671753228e-06, "loss": 17.6313, "step": 16073 }, { "epoch": 0.2938198036814302, "grad_norm": 6.316229642215812, "learning_rate": 8.283382433778678e-06, "loss": 17.733, "step": 16074 }, { "epoch": 0.2938380828778767, "grad_norm": 5.5538384810390005, "learning_rate": 8.283159184296158e-06, "loss": 16.9584, "step": 16075 }, { "epoch": 0.2938563620743232, "grad_norm": 6.588095026098765, "learning_rate": 8.282935923306452e-06, "loss": 17.619, "step": 16076 }, { "epoch": 0.29387464127076973, "grad_norm": 6.07862577927372, "learning_rate": 8.282712650810339e-06, "loss": 17.302, "step": 16077 }, { "epoch": 0.29389292046721627, "grad_norm": 7.78484540887961, "learning_rate": 8.282489366808603e-06, "loss": 18.1963, "step": 16078 }, { "epoch": 0.2939111996636628, "grad_norm": 6.954039498244128, "learning_rate": 8.282266071302025e-06, "loss": 17.9513, "step": 16079 }, { "epoch": 0.2939294788601093, "grad_norm": 6.391492039911019, "learning_rate": 8.282042764291392e-06, "loss": 17.5255, "step": 16080 }, { "epoch": 0.2939477580565558, "grad_norm": 6.943123103228119, "learning_rate": 8.281819445777483e-06, "loss": 17.8471, "step": 16081 }, { "epoch": 0.29396603725300235, "grad_norm": 4.913061747853871, "learning_rate": 8.281596115761082e-06, "loss": 16.976, "step": 16082 }, { "epoch": 0.2939843164494489, "grad_norm": 6.9758949576322165, "learning_rate": 8.281372774242968e-06, "loss": 17.8159, "step": 16083 }, { "epoch": 0.2940025956458954, "grad_norm": 6.624762103518529, "learning_rate": 8.28114942122393e-06, "loss": 17.527, "step": 16084 }, { "epoch": 0.2940208748423419, "grad_norm": 7.075738927009336, "learning_rate": 8.28092605670475e-06, "loss": 17.815, "step": 16085 }, { "epoch": 0.29403915403878844, "grad_norm": 6.632446800746286, "learning_rate": 8.280702680686206e-06, "loss": 17.7209, "step": 16086 }, { "epoch": 0.294057433235235, "grad_norm": 7.477268407655306, "learning_rate": 8.280479293169083e-06, "loss": 17.8343, "step": 16087 }, { "epoch": 0.2940757124316815, "grad_norm": 7.735048943739937, "learning_rate": 8.280255894154167e-06, "loss": 17.6479, "step": 16088 }, { "epoch": 0.29409399162812805, "grad_norm": 6.944062305694892, "learning_rate": 8.280032483642238e-06, "loss": 18.0583, "step": 16089 }, { "epoch": 0.29411227082457453, "grad_norm": 6.292512509475764, "learning_rate": 8.27980906163408e-06, "loss": 17.3399, "step": 16090 }, { "epoch": 0.29413055002102106, "grad_norm": 6.856975000258546, "learning_rate": 8.279585628130476e-06, "loss": 17.7249, "step": 16091 }, { "epoch": 0.2941488292174676, "grad_norm": 6.587628718956014, "learning_rate": 8.279362183132208e-06, "loss": 17.5801, "step": 16092 }, { "epoch": 0.29416710841391414, "grad_norm": 6.10922530217729, "learning_rate": 8.27913872664006e-06, "loss": 17.3902, "step": 16093 }, { "epoch": 0.29418538761036067, "grad_norm": 5.769721979891175, "learning_rate": 8.278915258654816e-06, "loss": 17.4178, "step": 16094 }, { "epoch": 0.29420366680680715, "grad_norm": 8.461191822315362, "learning_rate": 8.27869177917726e-06, "loss": 18.7004, "step": 16095 }, { "epoch": 0.2942219460032537, "grad_norm": 5.960529254388798, "learning_rate": 8.278468288208173e-06, "loss": 17.2166, "step": 16096 }, { "epoch": 0.2942402251997002, "grad_norm": 6.0029111031728135, "learning_rate": 8.278244785748337e-06, "loss": 17.3049, "step": 16097 }, { "epoch": 0.29425850439614676, "grad_norm": 7.2388768396319, "learning_rate": 8.27802127179854e-06, "loss": 17.8213, "step": 16098 }, { "epoch": 0.2942767835925933, "grad_norm": 6.964455211596097, "learning_rate": 8.277797746359562e-06, "loss": 17.8663, "step": 16099 }, { "epoch": 0.2942950627890398, "grad_norm": 7.143780228227318, "learning_rate": 8.277574209432187e-06, "loss": 17.6374, "step": 16100 }, { "epoch": 0.2943133419854863, "grad_norm": 7.404051063205473, "learning_rate": 8.277350661017198e-06, "loss": 18.0763, "step": 16101 }, { "epoch": 0.29433162118193285, "grad_norm": 5.681771181830303, "learning_rate": 8.277127101115381e-06, "loss": 17.1563, "step": 16102 }, { "epoch": 0.2943499003783794, "grad_norm": 6.541284403697504, "learning_rate": 8.276903529727517e-06, "loss": 17.4682, "step": 16103 }, { "epoch": 0.2943681795748259, "grad_norm": 6.463094141425445, "learning_rate": 8.276679946854392e-06, "loss": 17.3894, "step": 16104 }, { "epoch": 0.2943864587712724, "grad_norm": 7.1175319393264305, "learning_rate": 8.276456352496785e-06, "loss": 17.7138, "step": 16105 }, { "epoch": 0.29440473796771893, "grad_norm": 7.0808893139446605, "learning_rate": 8.276232746655485e-06, "loss": 17.5719, "step": 16106 }, { "epoch": 0.29442301716416547, "grad_norm": 5.617139148213048, "learning_rate": 8.276009129331273e-06, "loss": 17.3385, "step": 16107 }, { "epoch": 0.294441296360612, "grad_norm": 6.638985159144182, "learning_rate": 8.275785500524933e-06, "loss": 17.6396, "step": 16108 }, { "epoch": 0.29445957555705854, "grad_norm": 6.126398449831048, "learning_rate": 8.275561860237252e-06, "loss": 17.4253, "step": 16109 }, { "epoch": 0.294477854753505, "grad_norm": 7.9825261159233625, "learning_rate": 8.275338208469007e-06, "loss": 18.7895, "step": 16110 }, { "epoch": 0.29449613394995156, "grad_norm": 6.348114834009812, "learning_rate": 8.275114545220986e-06, "loss": 17.5388, "step": 16111 }, { "epoch": 0.2945144131463981, "grad_norm": 6.363968832816221, "learning_rate": 8.274890870493975e-06, "loss": 17.4444, "step": 16112 }, { "epoch": 0.2945326923428446, "grad_norm": 6.605263007879188, "learning_rate": 8.274667184288755e-06, "loss": 17.6027, "step": 16113 }, { "epoch": 0.2945509715392911, "grad_norm": 5.957730997015338, "learning_rate": 8.27444348660611e-06, "loss": 17.288, "step": 16114 }, { "epoch": 0.29456925073573764, "grad_norm": 7.155158853752062, "learning_rate": 8.274219777446826e-06, "loss": 18.0591, "step": 16115 }, { "epoch": 0.2945875299321842, "grad_norm": 5.690349804479167, "learning_rate": 8.273996056811684e-06, "loss": 17.1678, "step": 16116 }, { "epoch": 0.2946058091286307, "grad_norm": 8.759875273515886, "learning_rate": 8.27377232470147e-06, "loss": 18.4531, "step": 16117 }, { "epoch": 0.29462408832507725, "grad_norm": 7.071037803884969, "learning_rate": 8.27354858111697e-06, "loss": 17.9133, "step": 16118 }, { "epoch": 0.29464236752152373, "grad_norm": 5.211525432522588, "learning_rate": 8.273324826058966e-06, "loss": 17.1834, "step": 16119 }, { "epoch": 0.29466064671797026, "grad_norm": 6.635936234020447, "learning_rate": 8.273101059528242e-06, "loss": 17.4478, "step": 16120 }, { "epoch": 0.2946789259144168, "grad_norm": 5.075378179443972, "learning_rate": 8.272877281525581e-06, "loss": 16.7821, "step": 16121 }, { "epoch": 0.29469720511086334, "grad_norm": 7.059489589531912, "learning_rate": 8.27265349205177e-06, "loss": 18.1215, "step": 16122 }, { "epoch": 0.29471548430730987, "grad_norm": 9.00772735041651, "learning_rate": 8.272429691107595e-06, "loss": 17.2768, "step": 16123 }, { "epoch": 0.29473376350375635, "grad_norm": 6.863778251720272, "learning_rate": 8.272205878693835e-06, "loss": 18.007, "step": 16124 }, { "epoch": 0.2947520427002029, "grad_norm": 5.685395242343081, "learning_rate": 8.271982054811279e-06, "loss": 17.2372, "step": 16125 }, { "epoch": 0.2947703218966494, "grad_norm": 7.226228088489144, "learning_rate": 8.271758219460708e-06, "loss": 17.967, "step": 16126 }, { "epoch": 0.29478860109309596, "grad_norm": 6.642752145506515, "learning_rate": 8.27153437264291e-06, "loss": 17.7786, "step": 16127 }, { "epoch": 0.2948068802895425, "grad_norm": 7.521831132525992, "learning_rate": 8.271310514358667e-06, "loss": 18.4123, "step": 16128 }, { "epoch": 0.294825159485989, "grad_norm": 7.055337495464242, "learning_rate": 8.271086644608766e-06, "loss": 17.9119, "step": 16129 }, { "epoch": 0.2948434386824355, "grad_norm": 6.715668028490851, "learning_rate": 8.27086276339399e-06, "loss": 17.9008, "step": 16130 }, { "epoch": 0.29486171787888205, "grad_norm": 6.226074771096643, "learning_rate": 8.270638870715122e-06, "loss": 17.6105, "step": 16131 }, { "epoch": 0.2948799970753286, "grad_norm": 7.970005508418957, "learning_rate": 8.27041496657295e-06, "loss": 18.0794, "step": 16132 }, { "epoch": 0.2948982762717751, "grad_norm": 7.312806125808853, "learning_rate": 8.270191050968257e-06, "loss": 17.7882, "step": 16133 }, { "epoch": 0.2949165554682216, "grad_norm": 7.343051756783496, "learning_rate": 8.269967123901828e-06, "loss": 17.7306, "step": 16134 }, { "epoch": 0.29493483466466813, "grad_norm": 6.028786414168179, "learning_rate": 8.269743185374449e-06, "loss": 17.3196, "step": 16135 }, { "epoch": 0.29495311386111467, "grad_norm": 7.446803114877071, "learning_rate": 8.269519235386902e-06, "loss": 18.0951, "step": 16136 }, { "epoch": 0.2949713930575612, "grad_norm": 5.85371185484508, "learning_rate": 8.269295273939974e-06, "loss": 17.2621, "step": 16137 }, { "epoch": 0.29498967225400774, "grad_norm": 6.221291944454979, "learning_rate": 8.26907130103445e-06, "loss": 17.3017, "step": 16138 }, { "epoch": 0.2950079514504542, "grad_norm": 6.240179982039564, "learning_rate": 8.268847316671116e-06, "loss": 17.4279, "step": 16139 }, { "epoch": 0.29502623064690076, "grad_norm": 7.634717676856093, "learning_rate": 8.268623320850755e-06, "loss": 17.6661, "step": 16140 }, { "epoch": 0.2950445098433473, "grad_norm": 7.903036835138847, "learning_rate": 8.268399313574154e-06, "loss": 18.2374, "step": 16141 }, { "epoch": 0.2950627890397938, "grad_norm": 6.865211273666724, "learning_rate": 8.268175294842096e-06, "loss": 17.7922, "step": 16142 }, { "epoch": 0.29508106823624036, "grad_norm": 6.0218696280341355, "learning_rate": 8.267951264655367e-06, "loss": 17.1883, "step": 16143 }, { "epoch": 0.29509934743268684, "grad_norm": 5.3851609690899656, "learning_rate": 8.267727223014752e-06, "loss": 17.3298, "step": 16144 }, { "epoch": 0.2951176266291334, "grad_norm": 7.415122471618471, "learning_rate": 8.267503169921037e-06, "loss": 17.9095, "step": 16145 }, { "epoch": 0.2951359058255799, "grad_norm": 7.143329963730976, "learning_rate": 8.267279105375007e-06, "loss": 17.428, "step": 16146 }, { "epoch": 0.29515418502202645, "grad_norm": 6.871888122379926, "learning_rate": 8.267055029377448e-06, "loss": 17.5288, "step": 16147 }, { "epoch": 0.29517246421847293, "grad_norm": 8.243891775073257, "learning_rate": 8.266830941929144e-06, "loss": 17.8825, "step": 16148 }, { "epoch": 0.29519074341491947, "grad_norm": 6.784484797692967, "learning_rate": 8.26660684303088e-06, "loss": 17.7391, "step": 16149 }, { "epoch": 0.295209022611366, "grad_norm": 9.112605991119066, "learning_rate": 8.266382732683445e-06, "loss": 18.6377, "step": 16150 }, { "epoch": 0.29522730180781254, "grad_norm": 6.986089492669639, "learning_rate": 8.26615861088762e-06, "loss": 17.7572, "step": 16151 }, { "epoch": 0.2952455810042591, "grad_norm": 5.6192299528481175, "learning_rate": 8.265934477644193e-06, "loss": 17.2478, "step": 16152 }, { "epoch": 0.29526386020070555, "grad_norm": 6.16900873742378, "learning_rate": 8.265710332953949e-06, "loss": 17.5938, "step": 16153 }, { "epoch": 0.2952821393971521, "grad_norm": 7.15423716363989, "learning_rate": 8.265486176817675e-06, "loss": 17.6952, "step": 16154 }, { "epoch": 0.2953004185935986, "grad_norm": 5.2767902577615695, "learning_rate": 8.265262009236152e-06, "loss": 17.137, "step": 16155 }, { "epoch": 0.29531869779004516, "grad_norm": 6.146251710364802, "learning_rate": 8.265037830210172e-06, "loss": 17.4041, "step": 16156 }, { "epoch": 0.2953369769864917, "grad_norm": 6.758768374203726, "learning_rate": 8.264813639740517e-06, "loss": 17.8532, "step": 16157 }, { "epoch": 0.2953552561829382, "grad_norm": 6.4511848223802195, "learning_rate": 8.264589437827971e-06, "loss": 17.5946, "step": 16158 }, { "epoch": 0.2953735353793847, "grad_norm": 6.447252327774815, "learning_rate": 8.264365224473327e-06, "loss": 17.6131, "step": 16159 }, { "epoch": 0.29539181457583125, "grad_norm": 6.5147614417505775, "learning_rate": 8.264140999677363e-06, "loss": 17.4324, "step": 16160 }, { "epoch": 0.2954100937722778, "grad_norm": 5.917691681255722, "learning_rate": 8.26391676344087e-06, "loss": 17.3937, "step": 16161 }, { "epoch": 0.2954283729687243, "grad_norm": 5.627604957337583, "learning_rate": 8.26369251576463e-06, "loss": 17.1035, "step": 16162 }, { "epoch": 0.2954466521651708, "grad_norm": 6.813752414980774, "learning_rate": 8.263468256649432e-06, "loss": 17.7017, "step": 16163 }, { "epoch": 0.29546493136161733, "grad_norm": 7.170450657184277, "learning_rate": 8.26324398609606e-06, "loss": 17.8039, "step": 16164 }, { "epoch": 0.29548321055806387, "grad_norm": 6.794922461702556, "learning_rate": 8.263019704105301e-06, "loss": 17.6664, "step": 16165 }, { "epoch": 0.2955014897545104, "grad_norm": 9.545740247540214, "learning_rate": 8.262795410677942e-06, "loss": 17.8033, "step": 16166 }, { "epoch": 0.29551976895095694, "grad_norm": 5.873040185505237, "learning_rate": 8.262571105814768e-06, "loss": 17.3408, "step": 16167 }, { "epoch": 0.2955380481474034, "grad_norm": 7.591817860083055, "learning_rate": 8.262346789516567e-06, "loss": 18.2472, "step": 16168 }, { "epoch": 0.29555632734384996, "grad_norm": 6.756312239314254, "learning_rate": 8.262122461784121e-06, "loss": 17.5218, "step": 16169 }, { "epoch": 0.2955746065402965, "grad_norm": 6.458097026716254, "learning_rate": 8.26189812261822e-06, "loss": 17.6256, "step": 16170 }, { "epoch": 0.295592885736743, "grad_norm": 6.966415218401644, "learning_rate": 8.261673772019649e-06, "loss": 17.4215, "step": 16171 }, { "epoch": 0.29561116493318956, "grad_norm": 6.382851441378164, "learning_rate": 8.261449409989194e-06, "loss": 17.7032, "step": 16172 }, { "epoch": 0.29562944412963604, "grad_norm": 5.805456529681644, "learning_rate": 8.261225036527642e-06, "loss": 17.3363, "step": 16173 }, { "epoch": 0.2956477233260826, "grad_norm": 6.408086235868578, "learning_rate": 8.26100065163578e-06, "loss": 17.4591, "step": 16174 }, { "epoch": 0.2956660025225291, "grad_norm": 5.1312227955501895, "learning_rate": 8.260776255314394e-06, "loss": 17.0203, "step": 16175 }, { "epoch": 0.29568428171897565, "grad_norm": 6.232576056759392, "learning_rate": 8.260551847564268e-06, "loss": 17.4782, "step": 16176 }, { "epoch": 0.2957025609154222, "grad_norm": 6.546301101195819, "learning_rate": 8.260327428386191e-06, "loss": 17.5471, "step": 16177 }, { "epoch": 0.29572084011186867, "grad_norm": 7.393112461830841, "learning_rate": 8.260102997780952e-06, "loss": 18.0688, "step": 16178 }, { "epoch": 0.2957391193083152, "grad_norm": 5.768559251810734, "learning_rate": 8.259878555749332e-06, "loss": 17.2527, "step": 16179 }, { "epoch": 0.29575739850476174, "grad_norm": 6.482737775699103, "learning_rate": 8.259654102292123e-06, "loss": 17.5944, "step": 16180 }, { "epoch": 0.2957756777012083, "grad_norm": 8.191801246187003, "learning_rate": 8.259429637410108e-06, "loss": 17.6921, "step": 16181 }, { "epoch": 0.29579395689765475, "grad_norm": 6.895390327766547, "learning_rate": 8.259205161104075e-06, "loss": 17.6604, "step": 16182 }, { "epoch": 0.2958122360941013, "grad_norm": 6.8447549875082805, "learning_rate": 8.25898067337481e-06, "loss": 17.6201, "step": 16183 }, { "epoch": 0.2958305152905478, "grad_norm": 6.061554104175592, "learning_rate": 8.258756174223101e-06, "loss": 17.4606, "step": 16184 }, { "epoch": 0.29584879448699436, "grad_norm": 10.694557749786986, "learning_rate": 8.258531663649735e-06, "loss": 19.1512, "step": 16185 }, { "epoch": 0.2958670736834409, "grad_norm": 7.995631273483882, "learning_rate": 8.258307141655499e-06, "loss": 18.1532, "step": 16186 }, { "epoch": 0.2958853528798874, "grad_norm": 6.339244400609893, "learning_rate": 8.258082608241177e-06, "loss": 17.1876, "step": 16187 }, { "epoch": 0.2959036320763339, "grad_norm": 7.27091552991875, "learning_rate": 8.25785806340756e-06, "loss": 17.7918, "step": 16188 }, { "epoch": 0.29592191127278045, "grad_norm": 7.415439253092029, "learning_rate": 8.257633507155431e-06, "loss": 17.6777, "step": 16189 }, { "epoch": 0.295940190469227, "grad_norm": 5.553385193729211, "learning_rate": 8.257408939485582e-06, "loss": 17.097, "step": 16190 }, { "epoch": 0.2959584696656735, "grad_norm": 6.635004356880178, "learning_rate": 8.257184360398796e-06, "loss": 17.4957, "step": 16191 }, { "epoch": 0.29597674886212, "grad_norm": 7.517149146968689, "learning_rate": 8.256959769895861e-06, "loss": 17.5058, "step": 16192 }, { "epoch": 0.29599502805856653, "grad_norm": 6.089891699890243, "learning_rate": 8.256735167977566e-06, "loss": 17.3818, "step": 16193 }, { "epoch": 0.29601330725501307, "grad_norm": 5.788348349656729, "learning_rate": 8.256510554644696e-06, "loss": 17.1936, "step": 16194 }, { "epoch": 0.2960315864514596, "grad_norm": 4.823369899702506, "learning_rate": 8.25628592989804e-06, "loss": 16.9966, "step": 16195 }, { "epoch": 0.29604986564790614, "grad_norm": 7.191284231122164, "learning_rate": 8.256061293738382e-06, "loss": 17.8501, "step": 16196 }, { "epoch": 0.2960681448443526, "grad_norm": 7.338598339373476, "learning_rate": 8.255836646166512e-06, "loss": 17.7325, "step": 16197 }, { "epoch": 0.29608642404079916, "grad_norm": 5.882320845772196, "learning_rate": 8.255611987183218e-06, "loss": 17.3934, "step": 16198 }, { "epoch": 0.2961047032372457, "grad_norm": 7.050088584913749, "learning_rate": 8.255387316789289e-06, "loss": 17.8632, "step": 16199 }, { "epoch": 0.29612298243369223, "grad_norm": 7.354316577228631, "learning_rate": 8.255162634985508e-06, "loss": 17.7189, "step": 16200 }, { "epoch": 0.29614126163013876, "grad_norm": 6.596201647834317, "learning_rate": 8.254937941772663e-06, "loss": 17.8398, "step": 16201 }, { "epoch": 0.29615954082658524, "grad_norm": 5.997520771429841, "learning_rate": 8.254713237151546e-06, "loss": 17.3732, "step": 16202 }, { "epoch": 0.2961778200230318, "grad_norm": 5.567684933785494, "learning_rate": 8.254488521122937e-06, "loss": 17.0583, "step": 16203 }, { "epoch": 0.2961960992194783, "grad_norm": 6.988167398003105, "learning_rate": 8.25426379368763e-06, "loss": 17.4395, "step": 16204 }, { "epoch": 0.29621437841592485, "grad_norm": 6.334472459987664, "learning_rate": 8.254039054846413e-06, "loss": 17.3746, "step": 16205 }, { "epoch": 0.2962326576123714, "grad_norm": 6.792457675376119, "learning_rate": 8.25381430460007e-06, "loss": 17.6924, "step": 16206 }, { "epoch": 0.29625093680881787, "grad_norm": 6.543604059497521, "learning_rate": 8.253589542949391e-06, "loss": 17.4972, "step": 16207 }, { "epoch": 0.2962692160052644, "grad_norm": 8.213450685786313, "learning_rate": 8.25336476989516e-06, "loss": 18.0698, "step": 16208 }, { "epoch": 0.29628749520171094, "grad_norm": 6.590426678547136, "learning_rate": 8.253139985438172e-06, "loss": 17.5321, "step": 16209 }, { "epoch": 0.2963057743981575, "grad_norm": 5.826370838699314, "learning_rate": 8.252915189579209e-06, "loss": 17.3427, "step": 16210 }, { "epoch": 0.296324053594604, "grad_norm": 6.142322848068911, "learning_rate": 8.25269038231906e-06, "loss": 17.4707, "step": 16211 }, { "epoch": 0.2963423327910505, "grad_norm": 6.313431713920389, "learning_rate": 8.252465563658514e-06, "loss": 17.3804, "step": 16212 }, { "epoch": 0.296360611987497, "grad_norm": 6.7735844023314815, "learning_rate": 8.252240733598357e-06, "loss": 17.5166, "step": 16213 }, { "epoch": 0.29637889118394356, "grad_norm": 6.634904623775015, "learning_rate": 8.25201589213938e-06, "loss": 17.3804, "step": 16214 }, { "epoch": 0.2963971703803901, "grad_norm": 6.574053095469506, "learning_rate": 8.251791039282369e-06, "loss": 17.6128, "step": 16215 }, { "epoch": 0.2964154495768366, "grad_norm": 6.523212168595939, "learning_rate": 8.251566175028114e-06, "loss": 17.2457, "step": 16216 }, { "epoch": 0.2964337287732831, "grad_norm": 7.227468536478085, "learning_rate": 8.2513412993774e-06, "loss": 17.6916, "step": 16217 }, { "epoch": 0.29645200796972965, "grad_norm": 6.522893347865626, "learning_rate": 8.251116412331017e-06, "loss": 17.553, "step": 16218 }, { "epoch": 0.2964702871661762, "grad_norm": 5.442161858411724, "learning_rate": 8.250891513889754e-06, "loss": 17.1043, "step": 16219 }, { "epoch": 0.2964885663626227, "grad_norm": 6.756243611043344, "learning_rate": 8.250666604054396e-06, "loss": 17.7974, "step": 16220 }, { "epoch": 0.2965068455590692, "grad_norm": 6.723419642373557, "learning_rate": 8.250441682825736e-06, "loss": 17.526, "step": 16221 }, { "epoch": 0.29652512475551573, "grad_norm": 6.723178081527187, "learning_rate": 8.250216750204559e-06, "loss": 17.7818, "step": 16222 }, { "epoch": 0.29654340395196227, "grad_norm": 6.47743698662654, "learning_rate": 8.249991806191656e-06, "loss": 17.6314, "step": 16223 }, { "epoch": 0.2965616831484088, "grad_norm": 6.9977355079827674, "learning_rate": 8.249766850787811e-06, "loss": 17.6568, "step": 16224 }, { "epoch": 0.29657996234485534, "grad_norm": 6.719055048688414, "learning_rate": 8.249541883993816e-06, "loss": 17.4425, "step": 16225 }, { "epoch": 0.2965982415413018, "grad_norm": 7.75375840497314, "learning_rate": 8.24931690581046e-06, "loss": 18.2105, "step": 16226 }, { "epoch": 0.29661652073774836, "grad_norm": 6.969221779509029, "learning_rate": 8.24909191623853e-06, "loss": 17.887, "step": 16227 }, { "epoch": 0.2966347999341949, "grad_norm": 7.026830897623245, "learning_rate": 8.248866915278814e-06, "loss": 17.8746, "step": 16228 }, { "epoch": 0.29665307913064143, "grad_norm": 6.494742477544057, "learning_rate": 8.248641902932102e-06, "loss": 17.6973, "step": 16229 }, { "epoch": 0.29667135832708796, "grad_norm": 7.425141486599022, "learning_rate": 8.248416879199182e-06, "loss": 17.9923, "step": 16230 }, { "epoch": 0.29668963752353444, "grad_norm": 6.136964101879413, "learning_rate": 8.248191844080841e-06, "loss": 17.4555, "step": 16231 }, { "epoch": 0.296707916719981, "grad_norm": 6.159605954960472, "learning_rate": 8.247966797577871e-06, "loss": 17.4765, "step": 16232 }, { "epoch": 0.2967261959164275, "grad_norm": 6.623923583884193, "learning_rate": 8.24774173969106e-06, "loss": 17.7391, "step": 16233 }, { "epoch": 0.29674447511287405, "grad_norm": 6.149862095375403, "learning_rate": 8.247516670421195e-06, "loss": 17.3587, "step": 16234 }, { "epoch": 0.2967627543093206, "grad_norm": 7.031136387036252, "learning_rate": 8.247291589769065e-06, "loss": 17.9162, "step": 16235 }, { "epoch": 0.29678103350576707, "grad_norm": 6.355655195530405, "learning_rate": 8.24706649773546e-06, "loss": 17.638, "step": 16236 }, { "epoch": 0.2967993127022136, "grad_norm": 6.13200706403997, "learning_rate": 8.246841394321172e-06, "loss": 17.454, "step": 16237 }, { "epoch": 0.29681759189866014, "grad_norm": 5.62872690399162, "learning_rate": 8.246616279526982e-06, "loss": 17.1959, "step": 16238 }, { "epoch": 0.2968358710951067, "grad_norm": 7.210139026661183, "learning_rate": 8.246391153353687e-06, "loss": 17.5475, "step": 16239 }, { "epoch": 0.2968541502915532, "grad_norm": 6.75275509676336, "learning_rate": 8.24616601580207e-06, "loss": 17.5892, "step": 16240 }, { "epoch": 0.2968724294879997, "grad_norm": 6.834470207428036, "learning_rate": 8.245940866872925e-06, "loss": 17.3088, "step": 16241 }, { "epoch": 0.2968907086844462, "grad_norm": 7.7373090466346275, "learning_rate": 8.245715706567038e-06, "loss": 18.2506, "step": 16242 }, { "epoch": 0.29690898788089276, "grad_norm": 6.253828221575055, "learning_rate": 8.2454905348852e-06, "loss": 17.335, "step": 16243 }, { "epoch": 0.2969272670773393, "grad_norm": 7.914715304597217, "learning_rate": 8.245265351828197e-06, "loss": 18.1206, "step": 16244 }, { "epoch": 0.29694554627378583, "grad_norm": 6.230357335525374, "learning_rate": 8.245040157396824e-06, "loss": 17.6677, "step": 16245 }, { "epoch": 0.2969638254702323, "grad_norm": 6.535313520280362, "learning_rate": 8.244814951591864e-06, "loss": 17.6605, "step": 16246 }, { "epoch": 0.29698210466667885, "grad_norm": 6.625128264606999, "learning_rate": 8.244589734414112e-06, "loss": 17.7093, "step": 16247 }, { "epoch": 0.2970003838631254, "grad_norm": 7.679745526749129, "learning_rate": 8.244364505864351e-06, "loss": 18.37, "step": 16248 }, { "epoch": 0.2970186630595719, "grad_norm": 7.535364743788393, "learning_rate": 8.244139265943376e-06, "loss": 18.0094, "step": 16249 }, { "epoch": 0.2970369422560184, "grad_norm": 6.396184890742791, "learning_rate": 8.243914014651975e-06, "loss": 17.475, "step": 16250 }, { "epoch": 0.29705522145246493, "grad_norm": 6.868777124854813, "learning_rate": 8.243688751990935e-06, "loss": 17.7841, "step": 16251 }, { "epoch": 0.29707350064891147, "grad_norm": 6.31584488355534, "learning_rate": 8.243463477961048e-06, "loss": 17.452, "step": 16252 }, { "epoch": 0.297091779845358, "grad_norm": 6.467455494339748, "learning_rate": 8.243238192563103e-06, "loss": 17.4342, "step": 16253 }, { "epoch": 0.29711005904180454, "grad_norm": 5.442241482256332, "learning_rate": 8.243012895797891e-06, "loss": 17.1801, "step": 16254 }, { "epoch": 0.297128338238251, "grad_norm": 6.469735857165421, "learning_rate": 8.242787587666198e-06, "loss": 17.731, "step": 16255 }, { "epoch": 0.29714661743469756, "grad_norm": 6.322748020825891, "learning_rate": 8.242562268168817e-06, "loss": 17.2883, "step": 16256 }, { "epoch": 0.2971648966311441, "grad_norm": 6.610727907010459, "learning_rate": 8.242336937306536e-06, "loss": 17.8377, "step": 16257 }, { "epoch": 0.29718317582759063, "grad_norm": 5.088755091306931, "learning_rate": 8.242111595080146e-06, "loss": 17.1507, "step": 16258 }, { "epoch": 0.29720145502403716, "grad_norm": 8.340190513043664, "learning_rate": 8.241886241490438e-06, "loss": 18.0603, "step": 16259 }, { "epoch": 0.29721973422048364, "grad_norm": 6.459625104554761, "learning_rate": 8.241660876538198e-06, "loss": 17.7535, "step": 16260 }, { "epoch": 0.2972380134169302, "grad_norm": 5.587747198410822, "learning_rate": 8.241435500224217e-06, "loss": 17.0845, "step": 16261 }, { "epoch": 0.2972562926133767, "grad_norm": 6.732006582626895, "learning_rate": 8.241210112549287e-06, "loss": 17.7029, "step": 16262 }, { "epoch": 0.29727457180982325, "grad_norm": 7.984138342881745, "learning_rate": 8.240984713514198e-06, "loss": 17.9232, "step": 16263 }, { "epoch": 0.2972928510062698, "grad_norm": 7.675562147358658, "learning_rate": 8.240759303119736e-06, "loss": 18.595, "step": 16264 }, { "epoch": 0.29731113020271627, "grad_norm": 6.360932458947583, "learning_rate": 8.240533881366696e-06, "loss": 17.6675, "step": 16265 }, { "epoch": 0.2973294093991628, "grad_norm": 6.27721314278734, "learning_rate": 8.240308448255866e-06, "loss": 17.6947, "step": 16266 }, { "epoch": 0.29734768859560934, "grad_norm": 6.977908367355837, "learning_rate": 8.240083003788036e-06, "loss": 17.6504, "step": 16267 }, { "epoch": 0.2973659677920559, "grad_norm": 6.049040808097772, "learning_rate": 8.239857547963995e-06, "loss": 17.3966, "step": 16268 }, { "epoch": 0.2973842469885024, "grad_norm": 7.1086045804802795, "learning_rate": 8.239632080784535e-06, "loss": 17.7306, "step": 16269 }, { "epoch": 0.2974025261849489, "grad_norm": 6.054916242819913, "learning_rate": 8.239406602250447e-06, "loss": 17.3749, "step": 16270 }, { "epoch": 0.2974208053813954, "grad_norm": 6.9152581995438895, "learning_rate": 8.239181112362517e-06, "loss": 17.6033, "step": 16271 }, { "epoch": 0.29743908457784196, "grad_norm": 6.459269066450177, "learning_rate": 8.238955611121541e-06, "loss": 17.4815, "step": 16272 }, { "epoch": 0.2974573637742885, "grad_norm": 6.8023051578082026, "learning_rate": 8.238730098528306e-06, "loss": 17.6099, "step": 16273 }, { "epoch": 0.29747564297073503, "grad_norm": 8.092449666154756, "learning_rate": 8.2385045745836e-06, "loss": 17.9357, "step": 16274 }, { "epoch": 0.2974939221671815, "grad_norm": 7.398706690100068, "learning_rate": 8.238279039288222e-06, "loss": 17.9046, "step": 16275 }, { "epoch": 0.29751220136362805, "grad_norm": 5.909733446999433, "learning_rate": 8.238053492642954e-06, "loss": 17.2984, "step": 16276 }, { "epoch": 0.2975304805600746, "grad_norm": 6.661307944514938, "learning_rate": 8.23782793464859e-06, "loss": 17.5077, "step": 16277 }, { "epoch": 0.2975487597565211, "grad_norm": 5.915677565068317, "learning_rate": 8.23760236530592e-06, "loss": 17.0839, "step": 16278 }, { "epoch": 0.29756703895296766, "grad_norm": 6.2602485757742645, "learning_rate": 8.237376784615734e-06, "loss": 17.6571, "step": 16279 }, { "epoch": 0.29758531814941414, "grad_norm": 6.740954056650279, "learning_rate": 8.237151192578823e-06, "loss": 17.648, "step": 16280 }, { "epoch": 0.29760359734586067, "grad_norm": 6.573906646349148, "learning_rate": 8.236925589195978e-06, "loss": 17.4232, "step": 16281 }, { "epoch": 0.2976218765423072, "grad_norm": 7.325810720028958, "learning_rate": 8.236699974467993e-06, "loss": 17.9357, "step": 16282 }, { "epoch": 0.29764015573875374, "grad_norm": 7.240468148216298, "learning_rate": 8.236474348395651e-06, "loss": 17.394, "step": 16283 }, { "epoch": 0.2976584349352002, "grad_norm": 6.918691931463542, "learning_rate": 8.23624871097975e-06, "loss": 17.3358, "step": 16284 }, { "epoch": 0.29767671413164676, "grad_norm": 6.13736298047117, "learning_rate": 8.236023062221077e-06, "loss": 17.4781, "step": 16285 }, { "epoch": 0.2976949933280933, "grad_norm": 7.117920319422106, "learning_rate": 8.235797402120425e-06, "loss": 17.8098, "step": 16286 }, { "epoch": 0.29771327252453983, "grad_norm": 6.822319572920566, "learning_rate": 8.235571730678583e-06, "loss": 17.5788, "step": 16287 }, { "epoch": 0.29773155172098636, "grad_norm": 5.6378570072738, "learning_rate": 8.235346047896342e-06, "loss": 17.246, "step": 16288 }, { "epoch": 0.29774983091743285, "grad_norm": 6.837267184240421, "learning_rate": 8.235120353774494e-06, "loss": 17.9497, "step": 16289 }, { "epoch": 0.2977681101138794, "grad_norm": 6.56729590587665, "learning_rate": 8.234894648313832e-06, "loss": 17.8588, "step": 16290 }, { "epoch": 0.2977863893103259, "grad_norm": 7.694086139703035, "learning_rate": 8.234668931515143e-06, "loss": 18.0188, "step": 16291 }, { "epoch": 0.29780466850677245, "grad_norm": 7.2056602639730345, "learning_rate": 8.23444320337922e-06, "loss": 17.957, "step": 16292 }, { "epoch": 0.297822947703219, "grad_norm": 6.620513928103384, "learning_rate": 8.234217463906857e-06, "loss": 17.4882, "step": 16293 }, { "epoch": 0.29784122689966547, "grad_norm": 6.255504383389368, "learning_rate": 8.233991713098839e-06, "loss": 17.3204, "step": 16294 }, { "epoch": 0.297859506096112, "grad_norm": 5.402672496530524, "learning_rate": 8.233765950955963e-06, "loss": 17.1263, "step": 16295 }, { "epoch": 0.29787778529255854, "grad_norm": 7.434840654253358, "learning_rate": 8.233540177479016e-06, "loss": 17.7537, "step": 16296 }, { "epoch": 0.2978960644890051, "grad_norm": 6.053828911386629, "learning_rate": 8.233314392668794e-06, "loss": 17.3347, "step": 16297 }, { "epoch": 0.2979143436854516, "grad_norm": 8.279375604491499, "learning_rate": 8.233088596526082e-06, "loss": 18.3454, "step": 16298 }, { "epoch": 0.2979326228818981, "grad_norm": 7.516060747930949, "learning_rate": 8.232862789051678e-06, "loss": 18.3192, "step": 16299 }, { "epoch": 0.2979509020783446, "grad_norm": 5.65837228236491, "learning_rate": 8.232636970246371e-06, "loss": 17.2985, "step": 16300 }, { "epoch": 0.29796918127479116, "grad_norm": 7.764916684762759, "learning_rate": 8.232411140110948e-06, "loss": 18.1349, "step": 16301 }, { "epoch": 0.2979874604712377, "grad_norm": 6.2881691007591485, "learning_rate": 8.232185298646209e-06, "loss": 17.316, "step": 16302 }, { "epoch": 0.29800573966768423, "grad_norm": 5.6482328866201525, "learning_rate": 8.231959445852937e-06, "loss": 17.1771, "step": 16303 }, { "epoch": 0.2980240188641307, "grad_norm": 5.602594649059896, "learning_rate": 8.23173358173193e-06, "loss": 17.1372, "step": 16304 }, { "epoch": 0.29804229806057725, "grad_norm": 7.803580560883558, "learning_rate": 8.231507706283976e-06, "loss": 17.8065, "step": 16305 }, { "epoch": 0.2980605772570238, "grad_norm": 7.578179986971631, "learning_rate": 8.231281819509869e-06, "loss": 18.0388, "step": 16306 }, { "epoch": 0.2980788564534703, "grad_norm": 6.970383338331267, "learning_rate": 8.2310559214104e-06, "loss": 17.875, "step": 16307 }, { "epoch": 0.29809713564991686, "grad_norm": 5.25820880547372, "learning_rate": 8.23083001198636e-06, "loss": 17.0394, "step": 16308 }, { "epoch": 0.29811541484636334, "grad_norm": 5.943940907953198, "learning_rate": 8.230604091238542e-06, "loss": 17.5735, "step": 16309 }, { "epoch": 0.29813369404280987, "grad_norm": 6.077625711083019, "learning_rate": 8.230378159167733e-06, "loss": 17.4468, "step": 16310 }, { "epoch": 0.2981519732392564, "grad_norm": 6.435290454954497, "learning_rate": 8.230152215774731e-06, "loss": 17.3735, "step": 16311 }, { "epoch": 0.29817025243570294, "grad_norm": 5.978024970966035, "learning_rate": 8.229926261060328e-06, "loss": 17.0599, "step": 16312 }, { "epoch": 0.2981885316321495, "grad_norm": 7.255333358452495, "learning_rate": 8.229700295025311e-06, "loss": 18.2076, "step": 16313 }, { "epoch": 0.29820681082859596, "grad_norm": 5.767137796494151, "learning_rate": 8.229474317670476e-06, "loss": 17.2848, "step": 16314 }, { "epoch": 0.2982250900250425, "grad_norm": 5.74214433296508, "learning_rate": 8.229248328996615e-06, "loss": 17.396, "step": 16315 }, { "epoch": 0.29824336922148903, "grad_norm": 7.15303670243752, "learning_rate": 8.229022329004518e-06, "loss": 17.8744, "step": 16316 }, { "epoch": 0.29826164841793557, "grad_norm": 6.630547596158621, "learning_rate": 8.228796317694976e-06, "loss": 17.6934, "step": 16317 }, { "epoch": 0.29827992761438205, "grad_norm": 6.012624207429132, "learning_rate": 8.228570295068785e-06, "loss": 17.3032, "step": 16318 }, { "epoch": 0.2982982068108286, "grad_norm": 7.6240797394970325, "learning_rate": 8.228344261126735e-06, "loss": 18.0754, "step": 16319 }, { "epoch": 0.2983164860072751, "grad_norm": 7.414234900991309, "learning_rate": 8.228118215869619e-06, "loss": 18.1664, "step": 16320 }, { "epoch": 0.29833476520372165, "grad_norm": 7.557201962615848, "learning_rate": 8.227892159298228e-06, "loss": 17.6655, "step": 16321 }, { "epoch": 0.2983530444001682, "grad_norm": 6.3213765342509, "learning_rate": 8.227666091413354e-06, "loss": 17.3342, "step": 16322 }, { "epoch": 0.29837132359661467, "grad_norm": 7.023726114079908, "learning_rate": 8.22744001221579e-06, "loss": 17.743, "step": 16323 }, { "epoch": 0.2983896027930612, "grad_norm": 6.289313939971966, "learning_rate": 8.227213921706332e-06, "loss": 17.5194, "step": 16324 }, { "epoch": 0.29840788198950774, "grad_norm": 5.8862396089787845, "learning_rate": 8.226987819885767e-06, "loss": 17.2599, "step": 16325 }, { "epoch": 0.2984261611859543, "grad_norm": 7.5430086296374474, "learning_rate": 8.226761706754891e-06, "loss": 18.1224, "step": 16326 }, { "epoch": 0.2984444403824008, "grad_norm": 8.393691036336323, "learning_rate": 8.226535582314494e-06, "loss": 18.579, "step": 16327 }, { "epoch": 0.2984627195788473, "grad_norm": 5.713785414661842, "learning_rate": 8.226309446565371e-06, "loss": 17.2607, "step": 16328 }, { "epoch": 0.2984809987752938, "grad_norm": 6.678237594393372, "learning_rate": 8.226083299508312e-06, "loss": 17.6117, "step": 16329 }, { "epoch": 0.29849927797174036, "grad_norm": 7.120928405474422, "learning_rate": 8.225857141144111e-06, "loss": 17.8109, "step": 16330 }, { "epoch": 0.2985175571681869, "grad_norm": 6.400104494718224, "learning_rate": 8.225630971473561e-06, "loss": 17.559, "step": 16331 }, { "epoch": 0.29853583636463343, "grad_norm": 6.076709500381016, "learning_rate": 8.225404790497456e-06, "loss": 17.2295, "step": 16332 }, { "epoch": 0.2985541155610799, "grad_norm": 6.556860148667879, "learning_rate": 8.225178598216586e-06, "loss": 17.7958, "step": 16333 }, { "epoch": 0.29857239475752645, "grad_norm": 6.5912662165209746, "learning_rate": 8.224952394631744e-06, "loss": 17.3398, "step": 16334 }, { "epoch": 0.298590673953973, "grad_norm": 7.461391451472866, "learning_rate": 8.224726179743726e-06, "loss": 17.8425, "step": 16335 }, { "epoch": 0.2986089531504195, "grad_norm": 6.171684837161076, "learning_rate": 8.22449995355332e-06, "loss": 17.5799, "step": 16336 }, { "epoch": 0.29862723234686606, "grad_norm": 7.531757010209729, "learning_rate": 8.224273716061321e-06, "loss": 17.9508, "step": 16337 }, { "epoch": 0.29864551154331254, "grad_norm": 6.823430736037084, "learning_rate": 8.224047467268524e-06, "loss": 17.7491, "step": 16338 }, { "epoch": 0.29866379073975907, "grad_norm": 5.504777292052472, "learning_rate": 8.22382120717572e-06, "loss": 17.0871, "step": 16339 }, { "epoch": 0.2986820699362056, "grad_norm": 6.439319508459507, "learning_rate": 8.223594935783703e-06, "loss": 17.5903, "step": 16340 }, { "epoch": 0.29870034913265214, "grad_norm": 7.342933055695366, "learning_rate": 8.223368653093267e-06, "loss": 17.9005, "step": 16341 }, { "epoch": 0.2987186283290987, "grad_norm": 6.640712689129402, "learning_rate": 8.2231423591052e-06, "loss": 17.3471, "step": 16342 }, { "epoch": 0.29873690752554516, "grad_norm": 5.821954409355846, "learning_rate": 8.2229160538203e-06, "loss": 17.3971, "step": 16343 }, { "epoch": 0.2987551867219917, "grad_norm": 5.710282016845329, "learning_rate": 8.22268973723936e-06, "loss": 17.2268, "step": 16344 }, { "epoch": 0.29877346591843823, "grad_norm": 5.344218447819602, "learning_rate": 8.222463409363171e-06, "loss": 17.2301, "step": 16345 }, { "epoch": 0.29879174511488477, "grad_norm": 6.029696883942533, "learning_rate": 8.222237070192528e-06, "loss": 17.2053, "step": 16346 }, { "epoch": 0.2988100243113313, "grad_norm": 10.209855700168081, "learning_rate": 8.222010719728225e-06, "loss": 18.889, "step": 16347 }, { "epoch": 0.2988283035077778, "grad_norm": 6.3143922895963716, "learning_rate": 8.221784357971053e-06, "loss": 17.282, "step": 16348 }, { "epoch": 0.2988465827042243, "grad_norm": 6.721410765336859, "learning_rate": 8.221557984921803e-06, "loss": 17.7417, "step": 16349 }, { "epoch": 0.29886486190067085, "grad_norm": 6.411746297474031, "learning_rate": 8.221331600581276e-06, "loss": 17.4306, "step": 16350 }, { "epoch": 0.2988831410971174, "grad_norm": 6.359058951813595, "learning_rate": 8.221105204950259e-06, "loss": 17.7775, "step": 16351 }, { "epoch": 0.29890142029356387, "grad_norm": 9.464212744525556, "learning_rate": 8.22087879802955e-06, "loss": 18.5987, "step": 16352 }, { "epoch": 0.2989196994900104, "grad_norm": 7.341539248348361, "learning_rate": 8.220652379819939e-06, "loss": 18.0818, "step": 16353 }, { "epoch": 0.29893797868645694, "grad_norm": 5.992769917037668, "learning_rate": 8.220425950322222e-06, "loss": 17.4905, "step": 16354 }, { "epoch": 0.2989562578829035, "grad_norm": 6.541544820443978, "learning_rate": 8.22019950953719e-06, "loss": 17.6631, "step": 16355 }, { "epoch": 0.29897453707935, "grad_norm": 6.882540981182489, "learning_rate": 8.219973057465638e-06, "loss": 17.9089, "step": 16356 }, { "epoch": 0.2989928162757965, "grad_norm": 6.2809731953307955, "learning_rate": 8.21974659410836e-06, "loss": 17.573, "step": 16357 }, { "epoch": 0.299011095472243, "grad_norm": 6.748124825600281, "learning_rate": 8.219520119466152e-06, "loss": 17.7794, "step": 16358 }, { "epoch": 0.29902937466868956, "grad_norm": 6.647893390589946, "learning_rate": 8.219293633539803e-06, "loss": 17.3586, "step": 16359 }, { "epoch": 0.2990476538651361, "grad_norm": 6.014042612491038, "learning_rate": 8.219067136330107e-06, "loss": 17.3431, "step": 16360 }, { "epoch": 0.29906593306158263, "grad_norm": 5.92188614220963, "learning_rate": 8.218840627837866e-06, "loss": 17.2457, "step": 16361 }, { "epoch": 0.2990842122580291, "grad_norm": 7.5322215277244355, "learning_rate": 8.218614108063863e-06, "loss": 17.8183, "step": 16362 }, { "epoch": 0.29910249145447565, "grad_norm": 6.318718321398887, "learning_rate": 8.218387577008896e-06, "loss": 17.3413, "step": 16363 }, { "epoch": 0.2991207706509222, "grad_norm": 8.63721541871255, "learning_rate": 8.218161034673763e-06, "loss": 18.7411, "step": 16364 }, { "epoch": 0.2991390498473687, "grad_norm": 5.896134279393408, "learning_rate": 8.217934481059255e-06, "loss": 17.4976, "step": 16365 }, { "epoch": 0.29915732904381526, "grad_norm": 6.614586099824656, "learning_rate": 8.217707916166165e-06, "loss": 17.6771, "step": 16366 }, { "epoch": 0.29917560824026174, "grad_norm": 7.594683145901173, "learning_rate": 8.217481339995288e-06, "loss": 17.923, "step": 16367 }, { "epoch": 0.2991938874367083, "grad_norm": 7.1342326422791515, "learning_rate": 8.217254752547419e-06, "loss": 17.8564, "step": 16368 }, { "epoch": 0.2992121666331548, "grad_norm": 8.750236790492691, "learning_rate": 8.21702815382335e-06, "loss": 18.7921, "step": 16369 }, { "epoch": 0.29923044582960134, "grad_norm": 7.5708330389398855, "learning_rate": 8.216801543823875e-06, "loss": 18.0739, "step": 16370 }, { "epoch": 0.2992487250260479, "grad_norm": 6.925853287739947, "learning_rate": 8.216574922549794e-06, "loss": 17.4019, "step": 16371 }, { "epoch": 0.29926700422249436, "grad_norm": 8.817725573612206, "learning_rate": 8.216348290001893e-06, "loss": 18.5605, "step": 16372 }, { "epoch": 0.2992852834189409, "grad_norm": 6.942994691492411, "learning_rate": 8.216121646180973e-06, "loss": 17.6855, "step": 16373 }, { "epoch": 0.29930356261538743, "grad_norm": 5.870285652412425, "learning_rate": 8.215894991087823e-06, "loss": 17.1564, "step": 16374 }, { "epoch": 0.29932184181183397, "grad_norm": 7.281625574319899, "learning_rate": 8.215668324723242e-06, "loss": 18.0881, "step": 16375 }, { "epoch": 0.2993401210082805, "grad_norm": 6.719466972932332, "learning_rate": 8.215441647088023e-06, "loss": 17.588, "step": 16376 }, { "epoch": 0.299358400204727, "grad_norm": 7.8788804222029265, "learning_rate": 8.215214958182959e-06, "loss": 17.8596, "step": 16377 }, { "epoch": 0.2993766794011735, "grad_norm": 6.53591483455379, "learning_rate": 8.214988258008845e-06, "loss": 17.5788, "step": 16378 }, { "epoch": 0.29939495859762005, "grad_norm": 6.936515196876394, "learning_rate": 8.214761546566478e-06, "loss": 17.7465, "step": 16379 }, { "epoch": 0.2994132377940666, "grad_norm": 8.580979463421398, "learning_rate": 8.21453482385665e-06, "loss": 18.464, "step": 16380 }, { "epoch": 0.2994315169905131, "grad_norm": 6.168998388455126, "learning_rate": 8.214308089880156e-06, "loss": 17.3842, "step": 16381 }, { "epoch": 0.2994497961869596, "grad_norm": 7.31217079621152, "learning_rate": 8.214081344637792e-06, "loss": 17.7132, "step": 16382 }, { "epoch": 0.29946807538340614, "grad_norm": 9.892344239339227, "learning_rate": 8.21385458813035e-06, "loss": 17.5744, "step": 16383 }, { "epoch": 0.2994863545798527, "grad_norm": 6.62911889642519, "learning_rate": 8.213627820358627e-06, "loss": 17.6832, "step": 16384 }, { "epoch": 0.2995046337762992, "grad_norm": 6.143767905709122, "learning_rate": 8.213401041323418e-06, "loss": 17.6862, "step": 16385 }, { "epoch": 0.2995229129727457, "grad_norm": 7.191470006510474, "learning_rate": 8.213174251025517e-06, "loss": 17.8029, "step": 16386 }, { "epoch": 0.2995411921691922, "grad_norm": 5.654302385320921, "learning_rate": 8.21294744946572e-06, "loss": 17.2512, "step": 16387 }, { "epoch": 0.29955947136563876, "grad_norm": 6.370351161826269, "learning_rate": 8.21272063664482e-06, "loss": 17.2756, "step": 16388 }, { "epoch": 0.2995777505620853, "grad_norm": 6.32073824523956, "learning_rate": 8.212493812563613e-06, "loss": 17.5443, "step": 16389 }, { "epoch": 0.29959602975853183, "grad_norm": 6.537775541813034, "learning_rate": 8.212266977222893e-06, "loss": 17.6793, "step": 16390 }, { "epoch": 0.2996143089549783, "grad_norm": 8.518777316757442, "learning_rate": 8.212040130623458e-06, "loss": 18.5988, "step": 16391 }, { "epoch": 0.29963258815142485, "grad_norm": 6.986553902632954, "learning_rate": 8.2118132727661e-06, "loss": 17.9079, "step": 16392 }, { "epoch": 0.2996508673478714, "grad_norm": 7.459135995463893, "learning_rate": 8.211586403651616e-06, "loss": 18.0181, "step": 16393 }, { "epoch": 0.2996691465443179, "grad_norm": 6.109765355923692, "learning_rate": 8.2113595232808e-06, "loss": 17.2701, "step": 16394 }, { "epoch": 0.29968742574076446, "grad_norm": 7.112389133833306, "learning_rate": 8.211132631654447e-06, "loss": 17.8923, "step": 16395 }, { "epoch": 0.29970570493721094, "grad_norm": 5.862526486348486, "learning_rate": 8.210905728773353e-06, "loss": 17.3055, "step": 16396 }, { "epoch": 0.2997239841336575, "grad_norm": 6.354541865061286, "learning_rate": 8.210678814638313e-06, "loss": 17.4199, "step": 16397 }, { "epoch": 0.299742263330104, "grad_norm": 6.5336974846492035, "learning_rate": 8.210451889250121e-06, "loss": 17.5184, "step": 16398 }, { "epoch": 0.29976054252655054, "grad_norm": 8.431012648038225, "learning_rate": 8.210224952609575e-06, "loss": 17.8148, "step": 16399 }, { "epoch": 0.2997788217229971, "grad_norm": 7.255367776565569, "learning_rate": 8.20999800471747e-06, "loss": 17.6895, "step": 16400 }, { "epoch": 0.29979710091944356, "grad_norm": 5.651987035069994, "learning_rate": 8.209771045574599e-06, "loss": 17.1961, "step": 16401 }, { "epoch": 0.2998153801158901, "grad_norm": 6.4202266438013655, "learning_rate": 8.20954407518176e-06, "loss": 17.5035, "step": 16402 }, { "epoch": 0.29983365931233663, "grad_norm": 7.965755841062741, "learning_rate": 8.209317093539748e-06, "loss": 18.2732, "step": 16403 }, { "epoch": 0.29985193850878317, "grad_norm": 7.526054502795034, "learning_rate": 8.209090100649357e-06, "loss": 17.8266, "step": 16404 }, { "epoch": 0.2998702177052297, "grad_norm": 7.239781023690425, "learning_rate": 8.208863096511385e-06, "loss": 17.6852, "step": 16405 }, { "epoch": 0.2998884969016762, "grad_norm": 7.012467979271183, "learning_rate": 8.208636081126625e-06, "loss": 17.8401, "step": 16406 }, { "epoch": 0.2999067760981227, "grad_norm": 6.49402094062138, "learning_rate": 8.208409054495874e-06, "loss": 17.4207, "step": 16407 }, { "epoch": 0.29992505529456925, "grad_norm": 6.415587472946477, "learning_rate": 8.208182016619928e-06, "loss": 17.5619, "step": 16408 }, { "epoch": 0.2999433344910158, "grad_norm": 5.439534581469668, "learning_rate": 8.207954967499583e-06, "loss": 17.0143, "step": 16409 }, { "epoch": 0.2999616136874623, "grad_norm": 6.0970909843815475, "learning_rate": 8.207727907135634e-06, "loss": 17.4049, "step": 16410 }, { "epoch": 0.2999798928839088, "grad_norm": 6.199160967168086, "learning_rate": 8.207500835528877e-06, "loss": 17.5505, "step": 16411 }, { "epoch": 0.29999817208035534, "grad_norm": 6.3804577425469065, "learning_rate": 8.207273752680107e-06, "loss": 17.4565, "step": 16412 }, { "epoch": 0.3000164512768019, "grad_norm": 6.856210443440484, "learning_rate": 8.207046658590121e-06, "loss": 17.643, "step": 16413 }, { "epoch": 0.3000347304732484, "grad_norm": 7.289911227589726, "learning_rate": 8.206819553259716e-06, "loss": 17.8324, "step": 16414 }, { "epoch": 0.30005300966969495, "grad_norm": 6.578725571598782, "learning_rate": 8.206592436689686e-06, "loss": 17.4686, "step": 16415 }, { "epoch": 0.30007128886614143, "grad_norm": 7.295758397619474, "learning_rate": 8.206365308880828e-06, "loss": 17.6599, "step": 16416 }, { "epoch": 0.30008956806258796, "grad_norm": 7.035031104762345, "learning_rate": 8.206138169833938e-06, "loss": 17.7502, "step": 16417 }, { "epoch": 0.3001078472590345, "grad_norm": 7.635415994651476, "learning_rate": 8.20591101954981e-06, "loss": 17.8712, "step": 16418 }, { "epoch": 0.30012612645548103, "grad_norm": 6.0513149454420505, "learning_rate": 8.205683858029244e-06, "loss": 17.5575, "step": 16419 }, { "epoch": 0.3001444056519275, "grad_norm": 7.690193874636865, "learning_rate": 8.205456685273035e-06, "loss": 18.0409, "step": 16420 }, { "epoch": 0.30016268484837405, "grad_norm": 6.337069261755316, "learning_rate": 8.205229501281976e-06, "loss": 17.4, "step": 16421 }, { "epoch": 0.3001809640448206, "grad_norm": 7.08058405623771, "learning_rate": 8.205002306056865e-06, "loss": 17.9945, "step": 16422 }, { "epoch": 0.3001992432412671, "grad_norm": 5.790310461273087, "learning_rate": 8.204775099598503e-06, "loss": 17.2035, "step": 16423 }, { "epoch": 0.30021752243771366, "grad_norm": 6.460634673234715, "learning_rate": 8.20454788190768e-06, "loss": 17.6697, "step": 16424 }, { "epoch": 0.30023580163416014, "grad_norm": 7.638564574355802, "learning_rate": 8.204320652985195e-06, "loss": 17.8488, "step": 16425 }, { "epoch": 0.3002540808306067, "grad_norm": 6.639336955554244, "learning_rate": 8.204093412831845e-06, "loss": 17.6881, "step": 16426 }, { "epoch": 0.3002723600270532, "grad_norm": 6.42767590274851, "learning_rate": 8.203866161448425e-06, "loss": 17.4481, "step": 16427 }, { "epoch": 0.30029063922349974, "grad_norm": 7.207889403731094, "learning_rate": 8.20363889883573e-06, "loss": 18.0579, "step": 16428 }, { "epoch": 0.3003089184199463, "grad_norm": 6.107333428761575, "learning_rate": 8.203411624994561e-06, "loss": 17.398, "step": 16429 }, { "epoch": 0.30032719761639276, "grad_norm": 5.7498502185620115, "learning_rate": 8.203184339925714e-06, "loss": 17.209, "step": 16430 }, { "epoch": 0.3003454768128393, "grad_norm": 6.980368445476265, "learning_rate": 8.202957043629981e-06, "loss": 17.6543, "step": 16431 }, { "epoch": 0.30036375600928583, "grad_norm": 6.612427473663922, "learning_rate": 8.202729736108163e-06, "loss": 17.503, "step": 16432 }, { "epoch": 0.30038203520573237, "grad_norm": 6.970997891185264, "learning_rate": 8.202502417361053e-06, "loss": 17.7627, "step": 16433 }, { "epoch": 0.3004003144021789, "grad_norm": 6.73318970432507, "learning_rate": 8.202275087389452e-06, "loss": 17.5573, "step": 16434 }, { "epoch": 0.3004185935986254, "grad_norm": 7.305255101930601, "learning_rate": 8.202047746194155e-06, "loss": 17.8708, "step": 16435 }, { "epoch": 0.3004368727950719, "grad_norm": 6.06333359743604, "learning_rate": 8.201820393775957e-06, "loss": 17.3008, "step": 16436 }, { "epoch": 0.30045515199151845, "grad_norm": 8.089268148461798, "learning_rate": 8.201593030135657e-06, "loss": 18.2991, "step": 16437 }, { "epoch": 0.300473431187965, "grad_norm": 5.927432331427687, "learning_rate": 8.201365655274051e-06, "loss": 17.5833, "step": 16438 }, { "epoch": 0.3004917103844115, "grad_norm": 5.144567413931674, "learning_rate": 8.201138269191937e-06, "loss": 17.0671, "step": 16439 }, { "epoch": 0.300509989580858, "grad_norm": 5.5994736216512315, "learning_rate": 8.20091087189011e-06, "loss": 17.1244, "step": 16440 }, { "epoch": 0.30052826877730454, "grad_norm": 7.242150273910535, "learning_rate": 8.20068346336937e-06, "loss": 17.9618, "step": 16441 }, { "epoch": 0.3005465479737511, "grad_norm": 6.085807824257408, "learning_rate": 8.20045604363051e-06, "loss": 17.5531, "step": 16442 }, { "epoch": 0.3005648271701976, "grad_norm": 5.901827548152579, "learning_rate": 8.20022861267433e-06, "loss": 17.2967, "step": 16443 }, { "epoch": 0.30058310636664415, "grad_norm": 6.5713707264465375, "learning_rate": 8.200001170501627e-06, "loss": 17.168, "step": 16444 }, { "epoch": 0.30060138556309063, "grad_norm": 6.241234340365578, "learning_rate": 8.199773717113198e-06, "loss": 17.3489, "step": 16445 }, { "epoch": 0.30061966475953716, "grad_norm": 7.99825696017237, "learning_rate": 8.199546252509838e-06, "loss": 18.1971, "step": 16446 }, { "epoch": 0.3006379439559837, "grad_norm": 5.952035644458607, "learning_rate": 8.199318776692347e-06, "loss": 17.4981, "step": 16447 }, { "epoch": 0.30065622315243024, "grad_norm": 6.8341513635009425, "learning_rate": 8.199091289661522e-06, "loss": 17.498, "step": 16448 }, { "epoch": 0.30067450234887677, "grad_norm": 6.460937181230472, "learning_rate": 8.198863791418159e-06, "loss": 17.712, "step": 16449 }, { "epoch": 0.30069278154532325, "grad_norm": 7.310936784672614, "learning_rate": 8.198636281963055e-06, "loss": 17.8312, "step": 16450 }, { "epoch": 0.3007110607417698, "grad_norm": 5.779597444089757, "learning_rate": 8.19840876129701e-06, "loss": 17.1703, "step": 16451 }, { "epoch": 0.3007293399382163, "grad_norm": 7.292474928284607, "learning_rate": 8.198181229420819e-06, "loss": 17.6399, "step": 16452 }, { "epoch": 0.30074761913466286, "grad_norm": 5.460471257386947, "learning_rate": 8.197953686335281e-06, "loss": 17.1148, "step": 16453 }, { "epoch": 0.30076589833110934, "grad_norm": 7.682451591408236, "learning_rate": 8.197726132041194e-06, "loss": 18.075, "step": 16454 }, { "epoch": 0.3007841775275559, "grad_norm": 7.517908304719064, "learning_rate": 8.19749856653935e-06, "loss": 18.0855, "step": 16455 }, { "epoch": 0.3008024567240024, "grad_norm": 7.706676260127519, "learning_rate": 8.197270989830554e-06, "loss": 17.9831, "step": 16456 }, { "epoch": 0.30082073592044895, "grad_norm": 8.199037568674859, "learning_rate": 8.197043401915601e-06, "loss": 17.9961, "step": 16457 }, { "epoch": 0.3008390151168955, "grad_norm": 6.534789165792984, "learning_rate": 8.196815802795288e-06, "loss": 17.4348, "step": 16458 }, { "epoch": 0.30085729431334196, "grad_norm": 6.904812089585506, "learning_rate": 8.196588192470412e-06, "loss": 17.5515, "step": 16459 }, { "epoch": 0.3008755735097885, "grad_norm": 5.8144758827684795, "learning_rate": 8.196360570941773e-06, "loss": 17.4175, "step": 16460 }, { "epoch": 0.30089385270623503, "grad_norm": 13.19151471823941, "learning_rate": 8.196132938210166e-06, "loss": 18.2953, "step": 16461 }, { "epoch": 0.30091213190268157, "grad_norm": 7.153700518477827, "learning_rate": 8.195905294276392e-06, "loss": 17.8263, "step": 16462 }, { "epoch": 0.3009304110991281, "grad_norm": 6.088575872454362, "learning_rate": 8.195677639141247e-06, "loss": 17.5734, "step": 16463 }, { "epoch": 0.3009486902955746, "grad_norm": 7.01020178943264, "learning_rate": 8.195449972805529e-06, "loss": 17.3798, "step": 16464 }, { "epoch": 0.3009669694920211, "grad_norm": 6.772620443157715, "learning_rate": 8.195222295270035e-06, "loss": 17.3864, "step": 16465 }, { "epoch": 0.30098524868846765, "grad_norm": 7.187676691443884, "learning_rate": 8.194994606535566e-06, "loss": 17.8388, "step": 16466 }, { "epoch": 0.3010035278849142, "grad_norm": 6.564834304727205, "learning_rate": 8.194766906602916e-06, "loss": 17.4207, "step": 16467 }, { "epoch": 0.3010218070813607, "grad_norm": 7.6128025116026885, "learning_rate": 8.194539195472888e-06, "loss": 18.0054, "step": 16468 }, { "epoch": 0.3010400862778072, "grad_norm": 7.329839875497806, "learning_rate": 8.194311473146274e-06, "loss": 17.9493, "step": 16469 }, { "epoch": 0.30105836547425374, "grad_norm": 6.162042508545888, "learning_rate": 8.19408373962388e-06, "loss": 17.4555, "step": 16470 }, { "epoch": 0.3010766446707003, "grad_norm": 6.166557836595641, "learning_rate": 8.193855994906497e-06, "loss": 17.5689, "step": 16471 }, { "epoch": 0.3010949238671468, "grad_norm": 6.038105628137205, "learning_rate": 8.193628238994924e-06, "loss": 17.223, "step": 16472 }, { "epoch": 0.30111320306359335, "grad_norm": 5.687321592545057, "learning_rate": 8.193400471889965e-06, "loss": 17.3932, "step": 16473 }, { "epoch": 0.30113148226003983, "grad_norm": 6.6969656967844085, "learning_rate": 8.19317269359241e-06, "loss": 17.6497, "step": 16474 }, { "epoch": 0.30114976145648636, "grad_norm": 8.598318612771129, "learning_rate": 8.192944904103065e-06, "loss": 18.3712, "step": 16475 }, { "epoch": 0.3011680406529329, "grad_norm": 6.489485945335258, "learning_rate": 8.192717103422725e-06, "loss": 17.5235, "step": 16476 }, { "epoch": 0.30118631984937944, "grad_norm": 7.399255963939922, "learning_rate": 8.192489291552188e-06, "loss": 17.6362, "step": 16477 }, { "epoch": 0.30120459904582597, "grad_norm": 12.276893317189007, "learning_rate": 8.192261468492252e-06, "loss": 18.7239, "step": 16478 }, { "epoch": 0.30122287824227245, "grad_norm": 6.283561086603921, "learning_rate": 8.19203363424372e-06, "loss": 17.6668, "step": 16479 }, { "epoch": 0.301241157438719, "grad_norm": 8.687683138104715, "learning_rate": 8.191805788807383e-06, "loss": 18.1202, "step": 16480 }, { "epoch": 0.3012594366351655, "grad_norm": 6.722267533928825, "learning_rate": 8.191577932184045e-06, "loss": 17.5651, "step": 16481 }, { "epoch": 0.30127771583161206, "grad_norm": 7.543462823867828, "learning_rate": 8.191350064374505e-06, "loss": 17.8648, "step": 16482 }, { "epoch": 0.3012959950280586, "grad_norm": 7.695502028635188, "learning_rate": 8.19112218537956e-06, "loss": 17.7225, "step": 16483 }, { "epoch": 0.3013142742245051, "grad_norm": 7.016309067710564, "learning_rate": 8.190894295200006e-06, "loss": 17.6521, "step": 16484 }, { "epoch": 0.3013325534209516, "grad_norm": 5.179047886656715, "learning_rate": 8.190666393836646e-06, "loss": 16.982, "step": 16485 }, { "epoch": 0.30135083261739815, "grad_norm": 7.388548267853332, "learning_rate": 8.190438481290278e-06, "loss": 17.9728, "step": 16486 }, { "epoch": 0.3013691118138447, "grad_norm": 5.749814460472834, "learning_rate": 8.190210557561698e-06, "loss": 17.4074, "step": 16487 }, { "epoch": 0.30138739101029116, "grad_norm": 7.430658950912746, "learning_rate": 8.189982622651707e-06, "loss": 17.75, "step": 16488 }, { "epoch": 0.3014056702067377, "grad_norm": 8.619400391358544, "learning_rate": 8.189754676561105e-06, "loss": 17.8165, "step": 16489 }, { "epoch": 0.30142394940318423, "grad_norm": 7.090906533322632, "learning_rate": 8.189526719290688e-06, "loss": 17.644, "step": 16490 }, { "epoch": 0.30144222859963077, "grad_norm": 6.997694260141088, "learning_rate": 8.18929875084126e-06, "loss": 17.5898, "step": 16491 }, { "epoch": 0.3014605077960773, "grad_norm": 6.926362107786558, "learning_rate": 8.189070771213614e-06, "loss": 18.1243, "step": 16492 }, { "epoch": 0.3014787869925238, "grad_norm": 7.318321542607569, "learning_rate": 8.188842780408551e-06, "loss": 17.595, "step": 16493 }, { "epoch": 0.3014970661889703, "grad_norm": 6.952968749692479, "learning_rate": 8.188614778426871e-06, "loss": 17.9359, "step": 16494 }, { "epoch": 0.30151534538541686, "grad_norm": 6.615195948124856, "learning_rate": 8.188386765269376e-06, "loss": 17.546, "step": 16495 }, { "epoch": 0.3015336245818634, "grad_norm": 8.695163492420042, "learning_rate": 8.188158740936859e-06, "loss": 18.9347, "step": 16496 }, { "epoch": 0.3015519037783099, "grad_norm": 5.85640219611583, "learning_rate": 8.187930705430123e-06, "loss": 17.3828, "step": 16497 }, { "epoch": 0.3015701829747564, "grad_norm": 6.906288848652323, "learning_rate": 8.187702658749966e-06, "loss": 17.544, "step": 16498 }, { "epoch": 0.30158846217120294, "grad_norm": 8.21743779413207, "learning_rate": 8.18747460089719e-06, "loss": 18.0989, "step": 16499 }, { "epoch": 0.3016067413676495, "grad_norm": 6.508468927991813, "learning_rate": 8.187246531872588e-06, "loss": 17.5793, "step": 16500 }, { "epoch": 0.301625020564096, "grad_norm": 6.812483683032258, "learning_rate": 8.187018451676967e-06, "loss": 17.7585, "step": 16501 }, { "epoch": 0.30164329976054255, "grad_norm": 7.712239528760078, "learning_rate": 8.186790360311123e-06, "loss": 18.1059, "step": 16502 }, { "epoch": 0.30166157895698903, "grad_norm": 5.874934679959864, "learning_rate": 8.186562257775853e-06, "loss": 17.2574, "step": 16503 }, { "epoch": 0.30167985815343556, "grad_norm": 6.555338602061579, "learning_rate": 8.18633414407196e-06, "loss": 17.6712, "step": 16504 }, { "epoch": 0.3016981373498821, "grad_norm": 7.227967719558234, "learning_rate": 8.186106019200242e-06, "loss": 17.7025, "step": 16505 }, { "epoch": 0.30171641654632864, "grad_norm": 5.869019190080841, "learning_rate": 8.185877883161499e-06, "loss": 17.3396, "step": 16506 }, { "epoch": 0.30173469574277517, "grad_norm": 7.133923522152701, "learning_rate": 8.185649735956532e-06, "loss": 17.9019, "step": 16507 }, { "epoch": 0.30175297493922165, "grad_norm": 7.245080198261911, "learning_rate": 8.185421577586136e-06, "loss": 17.7752, "step": 16508 }, { "epoch": 0.3017712541356682, "grad_norm": 4.810273064943183, "learning_rate": 8.185193408051117e-06, "loss": 16.9043, "step": 16509 }, { "epoch": 0.3017895333321147, "grad_norm": 7.7115591365479546, "learning_rate": 8.184965227352269e-06, "loss": 18.181, "step": 16510 }, { "epoch": 0.30180781252856126, "grad_norm": 8.173721821529089, "learning_rate": 8.184737035490395e-06, "loss": 18.4305, "step": 16511 }, { "epoch": 0.3018260917250078, "grad_norm": 7.775223499109105, "learning_rate": 8.184508832466296e-06, "loss": 17.6237, "step": 16512 }, { "epoch": 0.3018443709214543, "grad_norm": 6.893432384644545, "learning_rate": 8.184280618280767e-06, "loss": 17.5622, "step": 16513 }, { "epoch": 0.3018626501179008, "grad_norm": 6.232925317226751, "learning_rate": 8.184052392934612e-06, "loss": 17.6704, "step": 16514 }, { "epoch": 0.30188092931434735, "grad_norm": 6.098056554058219, "learning_rate": 8.18382415642863e-06, "loss": 17.7114, "step": 16515 }, { "epoch": 0.3018992085107939, "grad_norm": 6.588516230814564, "learning_rate": 8.183595908763621e-06, "loss": 17.5474, "step": 16516 }, { "epoch": 0.3019174877072404, "grad_norm": 6.028432713556956, "learning_rate": 8.183367649940383e-06, "loss": 17.2659, "step": 16517 }, { "epoch": 0.3019357669036869, "grad_norm": 6.720741996752916, "learning_rate": 8.18313937995972e-06, "loss": 17.7225, "step": 16518 }, { "epoch": 0.30195404610013343, "grad_norm": 7.538141356641681, "learning_rate": 8.182911098822429e-06, "loss": 17.6945, "step": 16519 }, { "epoch": 0.30197232529657997, "grad_norm": 7.364066713971182, "learning_rate": 8.182682806529308e-06, "loss": 17.9775, "step": 16520 }, { "epoch": 0.3019906044930265, "grad_norm": 5.94866339604873, "learning_rate": 8.182454503081163e-06, "loss": 17.3491, "step": 16521 }, { "epoch": 0.302008883689473, "grad_norm": 6.276540962611812, "learning_rate": 8.182226188478789e-06, "loss": 17.4954, "step": 16522 }, { "epoch": 0.3020271628859195, "grad_norm": 7.731017932608166, "learning_rate": 8.18199786272299e-06, "loss": 18.1973, "step": 16523 }, { "epoch": 0.30204544208236606, "grad_norm": 7.919062207011995, "learning_rate": 8.181769525814564e-06, "loss": 18.2331, "step": 16524 }, { "epoch": 0.3020637212788126, "grad_norm": 5.786780000985477, "learning_rate": 8.181541177754313e-06, "loss": 17.3974, "step": 16525 }, { "epoch": 0.3020820004752591, "grad_norm": 4.780131828598741, "learning_rate": 8.181312818543035e-06, "loss": 16.9365, "step": 16526 }, { "epoch": 0.3021002796717056, "grad_norm": 6.2531493406756935, "learning_rate": 8.18108444818153e-06, "loss": 17.4062, "step": 16527 }, { "epoch": 0.30211855886815214, "grad_norm": 7.22452000001973, "learning_rate": 8.180856066670601e-06, "loss": 17.8213, "step": 16528 }, { "epoch": 0.3021368380645987, "grad_norm": 7.399980142162305, "learning_rate": 8.18062767401105e-06, "loss": 17.8293, "step": 16529 }, { "epoch": 0.3021551172610452, "grad_norm": 6.711907461664344, "learning_rate": 8.180399270203674e-06, "loss": 17.6821, "step": 16530 }, { "epoch": 0.30217339645749175, "grad_norm": 6.115761496396272, "learning_rate": 8.180170855249273e-06, "loss": 17.4398, "step": 16531 }, { "epoch": 0.30219167565393823, "grad_norm": 6.0219889397981445, "learning_rate": 8.17994242914865e-06, "loss": 17.4822, "step": 16532 }, { "epoch": 0.30220995485038477, "grad_norm": 7.016598860551536, "learning_rate": 8.179713991902604e-06, "loss": 17.8632, "step": 16533 }, { "epoch": 0.3022282340468313, "grad_norm": 6.277178289562638, "learning_rate": 8.179485543511937e-06, "loss": 17.5471, "step": 16534 }, { "epoch": 0.30224651324327784, "grad_norm": 6.154732914126383, "learning_rate": 8.17925708397745e-06, "loss": 17.2966, "step": 16535 }, { "epoch": 0.3022647924397244, "grad_norm": 5.984522907203976, "learning_rate": 8.179028613299942e-06, "loss": 17.2695, "step": 16536 }, { "epoch": 0.30228307163617085, "grad_norm": 6.073259677347897, "learning_rate": 8.178800131480215e-06, "loss": 17.3777, "step": 16537 }, { "epoch": 0.3023013508326174, "grad_norm": 6.778560795947439, "learning_rate": 8.17857163851907e-06, "loss": 17.6691, "step": 16538 }, { "epoch": 0.3023196300290639, "grad_norm": 5.072466790177338, "learning_rate": 8.178343134417305e-06, "loss": 17.0627, "step": 16539 }, { "epoch": 0.30233790922551046, "grad_norm": 6.49635799588229, "learning_rate": 8.178114619175725e-06, "loss": 17.7353, "step": 16540 }, { "epoch": 0.302356188421957, "grad_norm": 6.225165267747494, "learning_rate": 8.17788609279513e-06, "loss": 17.3818, "step": 16541 }, { "epoch": 0.3023744676184035, "grad_norm": 5.528245605871067, "learning_rate": 8.177657555276316e-06, "loss": 17.1716, "step": 16542 }, { "epoch": 0.30239274681485, "grad_norm": 7.403188748676723, "learning_rate": 8.17742900662009e-06, "loss": 18.0589, "step": 16543 }, { "epoch": 0.30241102601129655, "grad_norm": 7.47989327310321, "learning_rate": 8.177200446827253e-06, "loss": 18.2505, "step": 16544 }, { "epoch": 0.3024293052077431, "grad_norm": 6.608004154482326, "learning_rate": 8.176971875898602e-06, "loss": 17.7651, "step": 16545 }, { "epoch": 0.3024475844041896, "grad_norm": 6.595024618848987, "learning_rate": 8.176743293834942e-06, "loss": 17.5882, "step": 16546 }, { "epoch": 0.3024658636006361, "grad_norm": 6.83054755805045, "learning_rate": 8.17651470063707e-06, "loss": 17.783, "step": 16547 }, { "epoch": 0.30248414279708263, "grad_norm": 6.688101712968924, "learning_rate": 8.176286096305791e-06, "loss": 17.5782, "step": 16548 }, { "epoch": 0.30250242199352917, "grad_norm": 7.122155042754724, "learning_rate": 8.176057480841905e-06, "loss": 17.7173, "step": 16549 }, { "epoch": 0.3025207011899757, "grad_norm": 5.9652240692357, "learning_rate": 8.175828854246213e-06, "loss": 17.0419, "step": 16550 }, { "epoch": 0.30253898038642224, "grad_norm": 6.229676225581576, "learning_rate": 8.175600216519518e-06, "loss": 17.3498, "step": 16551 }, { "epoch": 0.3025572595828687, "grad_norm": 6.822616507251027, "learning_rate": 8.175371567662617e-06, "loss": 17.6579, "step": 16552 }, { "epoch": 0.30257553877931526, "grad_norm": 6.216293568132132, "learning_rate": 8.175142907676314e-06, "loss": 17.3725, "step": 16553 }, { "epoch": 0.3025938179757618, "grad_norm": 6.257030541385044, "learning_rate": 8.174914236561413e-06, "loss": 17.5881, "step": 16554 }, { "epoch": 0.3026120971722083, "grad_norm": 6.7557453311657545, "learning_rate": 8.17468555431871e-06, "loss": 17.7954, "step": 16555 }, { "epoch": 0.3026303763686548, "grad_norm": 7.772523328786312, "learning_rate": 8.174456860949013e-06, "loss": 18.0306, "step": 16556 }, { "epoch": 0.30264865556510134, "grad_norm": 5.725160385162716, "learning_rate": 8.174228156453118e-06, "loss": 17.2693, "step": 16557 }, { "epoch": 0.3026669347615479, "grad_norm": 6.027807594635172, "learning_rate": 8.173999440831832e-06, "loss": 17.1753, "step": 16558 }, { "epoch": 0.3026852139579944, "grad_norm": 6.579440995997506, "learning_rate": 8.17377071408595e-06, "loss": 17.7444, "step": 16559 }, { "epoch": 0.30270349315444095, "grad_norm": 7.149903712173893, "learning_rate": 8.173541976216278e-06, "loss": 17.7759, "step": 16560 }, { "epoch": 0.30272177235088743, "grad_norm": 8.21693445222621, "learning_rate": 8.173313227223618e-06, "loss": 18.1841, "step": 16561 }, { "epoch": 0.30274005154733397, "grad_norm": 5.965184469782462, "learning_rate": 8.173084467108768e-06, "loss": 17.167, "step": 16562 }, { "epoch": 0.3027583307437805, "grad_norm": 6.5322324471294095, "learning_rate": 8.172855695872535e-06, "loss": 17.8392, "step": 16563 }, { "epoch": 0.30277660994022704, "grad_norm": 6.699074543118785, "learning_rate": 8.172626913515716e-06, "loss": 17.4916, "step": 16564 }, { "epoch": 0.3027948891366736, "grad_norm": 7.690518849392542, "learning_rate": 8.172398120039115e-06, "loss": 18.1277, "step": 16565 }, { "epoch": 0.30281316833312005, "grad_norm": 6.593627364055679, "learning_rate": 8.172169315443536e-06, "loss": 17.7265, "step": 16566 }, { "epoch": 0.3028314475295666, "grad_norm": 7.555344403148906, "learning_rate": 8.171940499729776e-06, "loss": 17.5391, "step": 16567 }, { "epoch": 0.3028497267260131, "grad_norm": 6.291748527130846, "learning_rate": 8.171711672898642e-06, "loss": 17.4901, "step": 16568 }, { "epoch": 0.30286800592245966, "grad_norm": 6.284059474720727, "learning_rate": 8.171482834950932e-06, "loss": 17.5326, "step": 16569 }, { "epoch": 0.3028862851189062, "grad_norm": 6.193792799554601, "learning_rate": 8.171253985887452e-06, "loss": 17.4754, "step": 16570 }, { "epoch": 0.3029045643153527, "grad_norm": 5.946877100607961, "learning_rate": 8.171025125709002e-06, "loss": 17.2342, "step": 16571 }, { "epoch": 0.3029228435117992, "grad_norm": 6.540139571152314, "learning_rate": 8.170796254416382e-06, "loss": 17.6354, "step": 16572 }, { "epoch": 0.30294112270824575, "grad_norm": 7.6721978967782665, "learning_rate": 8.170567372010396e-06, "loss": 18.2392, "step": 16573 }, { "epoch": 0.3029594019046923, "grad_norm": 7.625256006654974, "learning_rate": 8.170338478491849e-06, "loss": 17.9819, "step": 16574 }, { "epoch": 0.3029776811011388, "grad_norm": 6.890673777461589, "learning_rate": 8.17010957386154e-06, "loss": 17.7003, "step": 16575 }, { "epoch": 0.3029959602975853, "grad_norm": 5.740488014371495, "learning_rate": 8.169880658120271e-06, "loss": 17.1102, "step": 16576 }, { "epoch": 0.30301423949403183, "grad_norm": 6.974765187209739, "learning_rate": 8.169651731268846e-06, "loss": 17.6724, "step": 16577 }, { "epoch": 0.30303251869047837, "grad_norm": 7.644279023603949, "learning_rate": 8.169422793308067e-06, "loss": 18.1045, "step": 16578 }, { "epoch": 0.3030507978869249, "grad_norm": 6.8224864213141005, "learning_rate": 8.169193844238735e-06, "loss": 18.0005, "step": 16579 }, { "epoch": 0.30306907708337144, "grad_norm": 7.407862070402675, "learning_rate": 8.168964884061654e-06, "loss": 18.038, "step": 16580 }, { "epoch": 0.3030873562798179, "grad_norm": 7.445578759311784, "learning_rate": 8.168735912777626e-06, "loss": 17.8016, "step": 16581 }, { "epoch": 0.30310563547626446, "grad_norm": 7.453146616106986, "learning_rate": 8.168506930387455e-06, "loss": 17.6925, "step": 16582 }, { "epoch": 0.303123914672711, "grad_norm": 6.647364772448283, "learning_rate": 8.16827793689194e-06, "loss": 17.9154, "step": 16583 }, { "epoch": 0.30314219386915753, "grad_norm": 6.6805973224818604, "learning_rate": 8.168048932291887e-06, "loss": 17.7983, "step": 16584 }, { "epoch": 0.30316047306560406, "grad_norm": 6.81124673399441, "learning_rate": 8.167819916588098e-06, "loss": 17.4131, "step": 16585 }, { "epoch": 0.30317875226205054, "grad_norm": 5.71800434990808, "learning_rate": 8.167590889781374e-06, "loss": 17.193, "step": 16586 }, { "epoch": 0.3031970314584971, "grad_norm": 6.357336002746887, "learning_rate": 8.16736185187252e-06, "loss": 17.6707, "step": 16587 }, { "epoch": 0.3032153106549436, "grad_norm": 6.7051551075638995, "learning_rate": 8.167132802862337e-06, "loss": 17.7506, "step": 16588 }, { "epoch": 0.30323358985139015, "grad_norm": 6.0656603212703715, "learning_rate": 8.166903742751629e-06, "loss": 17.3446, "step": 16589 }, { "epoch": 0.30325186904783663, "grad_norm": 6.234430686785868, "learning_rate": 8.166674671541197e-06, "loss": 17.5529, "step": 16590 }, { "epoch": 0.30327014824428317, "grad_norm": 6.07744793907776, "learning_rate": 8.166445589231844e-06, "loss": 17.3579, "step": 16591 }, { "epoch": 0.3032884274407297, "grad_norm": 7.458944463186073, "learning_rate": 8.166216495824377e-06, "loss": 18.1661, "step": 16592 }, { "epoch": 0.30330670663717624, "grad_norm": 7.237524505725243, "learning_rate": 8.165987391319595e-06, "loss": 17.9962, "step": 16593 }, { "epoch": 0.3033249858336228, "grad_norm": 6.211035357940235, "learning_rate": 8.165758275718299e-06, "loss": 17.4706, "step": 16594 }, { "epoch": 0.30334326503006925, "grad_norm": 6.169519490500913, "learning_rate": 8.1655291490213e-06, "loss": 17.4826, "step": 16595 }, { "epoch": 0.3033615442265158, "grad_norm": 7.016402761120805, "learning_rate": 8.165300011229391e-06, "loss": 17.6625, "step": 16596 }, { "epoch": 0.3033798234229623, "grad_norm": 7.2425316947153355, "learning_rate": 8.165070862343383e-06, "loss": 17.8298, "step": 16597 }, { "epoch": 0.30339810261940886, "grad_norm": 5.969118681307772, "learning_rate": 8.164841702364074e-06, "loss": 17.277, "step": 16598 }, { "epoch": 0.3034163818158554, "grad_norm": 6.9113068676432805, "learning_rate": 8.164612531292272e-06, "loss": 17.8815, "step": 16599 }, { "epoch": 0.3034346610123019, "grad_norm": 6.00061687960561, "learning_rate": 8.164383349128778e-06, "loss": 17.2717, "step": 16600 }, { "epoch": 0.3034529402087484, "grad_norm": 6.190804065741893, "learning_rate": 8.164154155874392e-06, "loss": 17.3708, "step": 16601 }, { "epoch": 0.30347121940519495, "grad_norm": 5.90428780466989, "learning_rate": 8.163924951529922e-06, "loss": 17.2192, "step": 16602 }, { "epoch": 0.3034894986016415, "grad_norm": 8.428949101455649, "learning_rate": 8.16369573609617e-06, "loss": 18.2044, "step": 16603 }, { "epoch": 0.303507777798088, "grad_norm": 6.087154641374202, "learning_rate": 8.163466509573938e-06, "loss": 17.4781, "step": 16604 }, { "epoch": 0.3035260569945345, "grad_norm": 5.548964191141877, "learning_rate": 8.163237271964032e-06, "loss": 17.1363, "step": 16605 }, { "epoch": 0.30354433619098103, "grad_norm": 6.528545508294717, "learning_rate": 8.163008023267253e-06, "loss": 17.6114, "step": 16606 }, { "epoch": 0.30356261538742757, "grad_norm": 5.317325238136297, "learning_rate": 8.162778763484405e-06, "loss": 16.9295, "step": 16607 }, { "epoch": 0.3035808945838741, "grad_norm": 6.628272614822982, "learning_rate": 8.162549492616292e-06, "loss": 17.7807, "step": 16608 }, { "epoch": 0.30359917378032064, "grad_norm": 6.85699704842018, "learning_rate": 8.162320210663717e-06, "loss": 17.6457, "step": 16609 }, { "epoch": 0.3036174529767671, "grad_norm": 6.036235766142873, "learning_rate": 8.162090917627486e-06, "loss": 17.0974, "step": 16610 }, { "epoch": 0.30363573217321366, "grad_norm": 6.257521317624505, "learning_rate": 8.161861613508399e-06, "loss": 17.6131, "step": 16611 }, { "epoch": 0.3036540113696602, "grad_norm": 7.3301207660809515, "learning_rate": 8.161632298307261e-06, "loss": 17.7923, "step": 16612 }, { "epoch": 0.30367229056610673, "grad_norm": 8.590125791099593, "learning_rate": 8.161402972024876e-06, "loss": 18.2708, "step": 16613 }, { "epoch": 0.30369056976255326, "grad_norm": 6.756733202026823, "learning_rate": 8.16117363466205e-06, "loss": 17.8269, "step": 16614 }, { "epoch": 0.30370884895899974, "grad_norm": 7.909718751814012, "learning_rate": 8.160944286219582e-06, "loss": 17.9376, "step": 16615 }, { "epoch": 0.3037271281554463, "grad_norm": 5.841152948775874, "learning_rate": 8.160714926698281e-06, "loss": 17.2749, "step": 16616 }, { "epoch": 0.3037454073518928, "grad_norm": 6.303060380406051, "learning_rate": 8.160485556098948e-06, "loss": 17.6905, "step": 16617 }, { "epoch": 0.30376368654833935, "grad_norm": 7.849903925154078, "learning_rate": 8.160256174422387e-06, "loss": 18.0157, "step": 16618 }, { "epoch": 0.3037819657447859, "grad_norm": 6.219801675497526, "learning_rate": 8.160026781669401e-06, "loss": 17.5778, "step": 16619 }, { "epoch": 0.30380024494123237, "grad_norm": 6.340471740537694, "learning_rate": 8.159797377840799e-06, "loss": 17.2766, "step": 16620 }, { "epoch": 0.3038185241376789, "grad_norm": 6.8026489423000065, "learning_rate": 8.159567962937379e-06, "loss": 17.823, "step": 16621 }, { "epoch": 0.30383680333412544, "grad_norm": 7.274009078158674, "learning_rate": 8.159338536959946e-06, "loss": 18.1152, "step": 16622 }, { "epoch": 0.303855082530572, "grad_norm": 8.867369198726927, "learning_rate": 8.159109099909309e-06, "loss": 18.6322, "step": 16623 }, { "epoch": 0.30387336172701845, "grad_norm": 7.441710985970655, "learning_rate": 8.158879651786266e-06, "loss": 18.0164, "step": 16624 }, { "epoch": 0.303891640923465, "grad_norm": 5.665058720299332, "learning_rate": 8.158650192591625e-06, "loss": 17.2601, "step": 16625 }, { "epoch": 0.3039099201199115, "grad_norm": 5.330355568794583, "learning_rate": 8.158420722326188e-06, "loss": 17.0789, "step": 16626 }, { "epoch": 0.30392819931635806, "grad_norm": 6.799221602475565, "learning_rate": 8.158191240990761e-06, "loss": 17.7676, "step": 16627 }, { "epoch": 0.3039464785128046, "grad_norm": 7.771883530264951, "learning_rate": 8.157961748586149e-06, "loss": 17.9648, "step": 16628 }, { "epoch": 0.3039647577092511, "grad_norm": 5.87215653021683, "learning_rate": 8.157732245113153e-06, "loss": 17.3832, "step": 16629 }, { "epoch": 0.3039830369056976, "grad_norm": 7.254285161872083, "learning_rate": 8.157502730572581e-06, "loss": 17.9605, "step": 16630 }, { "epoch": 0.30400131610214415, "grad_norm": 5.982002777119766, "learning_rate": 8.157273204965238e-06, "loss": 17.2095, "step": 16631 }, { "epoch": 0.3040195952985907, "grad_norm": 5.96001871350836, "learning_rate": 8.157043668291922e-06, "loss": 17.3055, "step": 16632 }, { "epoch": 0.3040378744950372, "grad_norm": 6.435863027618191, "learning_rate": 8.156814120553445e-06, "loss": 17.7032, "step": 16633 }, { "epoch": 0.3040561536914837, "grad_norm": 7.57322633761336, "learning_rate": 8.156584561750606e-06, "loss": 18.0216, "step": 16634 }, { "epoch": 0.30407443288793023, "grad_norm": 6.696133293513736, "learning_rate": 8.156354991884214e-06, "loss": 17.692, "step": 16635 }, { "epoch": 0.30409271208437677, "grad_norm": 8.727030605321485, "learning_rate": 8.156125410955071e-06, "loss": 18.3051, "step": 16636 }, { "epoch": 0.3041109912808233, "grad_norm": 6.733621858197244, "learning_rate": 8.155895818963982e-06, "loss": 17.498, "step": 16637 }, { "epoch": 0.30412927047726984, "grad_norm": 5.976496155872423, "learning_rate": 8.155666215911754e-06, "loss": 17.279, "step": 16638 }, { "epoch": 0.3041475496737163, "grad_norm": 6.118867408043492, "learning_rate": 8.155436601799187e-06, "loss": 17.4757, "step": 16639 }, { "epoch": 0.30416582887016286, "grad_norm": 5.735956527341406, "learning_rate": 8.15520697662709e-06, "loss": 17.1856, "step": 16640 }, { "epoch": 0.3041841080666094, "grad_norm": 7.487874252516413, "learning_rate": 8.154977340396264e-06, "loss": 18.0411, "step": 16641 }, { "epoch": 0.30420238726305593, "grad_norm": 7.761509509625328, "learning_rate": 8.154747693107518e-06, "loss": 18.2016, "step": 16642 }, { "epoch": 0.30422066645950246, "grad_norm": 6.191408684603873, "learning_rate": 8.154518034761657e-06, "loss": 17.5018, "step": 16643 }, { "epoch": 0.30423894565594894, "grad_norm": 7.728201685651844, "learning_rate": 8.154288365359483e-06, "loss": 17.9976, "step": 16644 }, { "epoch": 0.3042572248523955, "grad_norm": 5.864159128704303, "learning_rate": 8.1540586849018e-06, "loss": 17.3021, "step": 16645 }, { "epoch": 0.304275504048842, "grad_norm": 5.7828330681318105, "learning_rate": 8.153828993389417e-06, "loss": 17.1839, "step": 16646 }, { "epoch": 0.30429378324528855, "grad_norm": 6.2154094997417895, "learning_rate": 8.153599290823136e-06, "loss": 17.5577, "step": 16647 }, { "epoch": 0.3043120624417351, "grad_norm": 5.964364786629598, "learning_rate": 8.153369577203764e-06, "loss": 17.4375, "step": 16648 }, { "epoch": 0.30433034163818157, "grad_norm": 6.622789388812934, "learning_rate": 8.153139852532104e-06, "loss": 17.3699, "step": 16649 }, { "epoch": 0.3043486208346281, "grad_norm": 8.46486387940904, "learning_rate": 8.152910116808962e-06, "loss": 18.9387, "step": 16650 }, { "epoch": 0.30436690003107464, "grad_norm": 6.588297848353495, "learning_rate": 8.152680370035146e-06, "loss": 17.6354, "step": 16651 }, { "epoch": 0.3043851792275212, "grad_norm": 7.363797111774982, "learning_rate": 8.152450612211457e-06, "loss": 17.824, "step": 16652 }, { "epoch": 0.3044034584239677, "grad_norm": 7.877980197925042, "learning_rate": 8.152220843338704e-06, "loss": 18.4074, "step": 16653 }, { "epoch": 0.3044217376204142, "grad_norm": 5.609245018958935, "learning_rate": 8.15199106341769e-06, "loss": 17.25, "step": 16654 }, { "epoch": 0.3044400168168607, "grad_norm": 6.194752741290725, "learning_rate": 8.151761272449219e-06, "loss": 17.7048, "step": 16655 }, { "epoch": 0.30445829601330726, "grad_norm": 5.858072727717588, "learning_rate": 8.151531470434099e-06, "loss": 17.364, "step": 16656 }, { "epoch": 0.3044765752097538, "grad_norm": 7.032068695583198, "learning_rate": 8.151301657373136e-06, "loss": 17.9112, "step": 16657 }, { "epoch": 0.3044948544062003, "grad_norm": 5.809396707838711, "learning_rate": 8.151071833267135e-06, "loss": 17.2292, "step": 16658 }, { "epoch": 0.3045131336026468, "grad_norm": 6.118237197029155, "learning_rate": 8.150841998116898e-06, "loss": 17.5482, "step": 16659 }, { "epoch": 0.30453141279909335, "grad_norm": 11.032475752883897, "learning_rate": 8.150612151923234e-06, "loss": 18.1286, "step": 16660 }, { "epoch": 0.3045496919955399, "grad_norm": 5.7063388817630285, "learning_rate": 8.150382294686948e-06, "loss": 17.4102, "step": 16661 }, { "epoch": 0.3045679711919864, "grad_norm": 6.531306214638482, "learning_rate": 8.150152426408845e-06, "loss": 17.4352, "step": 16662 }, { "epoch": 0.3045862503884329, "grad_norm": 5.562829033755462, "learning_rate": 8.14992254708973e-06, "loss": 17.2006, "step": 16663 }, { "epoch": 0.30460452958487944, "grad_norm": 6.371054500796451, "learning_rate": 8.149692656730413e-06, "loss": 17.6054, "step": 16664 }, { "epoch": 0.30462280878132597, "grad_norm": 7.696357414683173, "learning_rate": 8.149462755331695e-06, "loss": 18.2222, "step": 16665 }, { "epoch": 0.3046410879777725, "grad_norm": 6.145415972817905, "learning_rate": 8.149232842894384e-06, "loss": 17.449, "step": 16666 }, { "epoch": 0.30465936717421904, "grad_norm": 7.071202653939831, "learning_rate": 8.149002919419282e-06, "loss": 18.3085, "step": 16667 }, { "epoch": 0.3046776463706655, "grad_norm": 7.551379793858554, "learning_rate": 8.148772984907203e-06, "loss": 17.9634, "step": 16668 }, { "epoch": 0.30469592556711206, "grad_norm": 8.186109212308564, "learning_rate": 8.148543039358944e-06, "loss": 18.2248, "step": 16669 }, { "epoch": 0.3047142047635586, "grad_norm": 6.466871249323418, "learning_rate": 8.148313082775316e-06, "loss": 17.5463, "step": 16670 }, { "epoch": 0.30473248396000513, "grad_norm": 6.293822843931664, "learning_rate": 8.148083115157124e-06, "loss": 17.3767, "step": 16671 }, { "epoch": 0.30475076315645167, "grad_norm": 6.636580017643419, "learning_rate": 8.147853136505175e-06, "loss": 17.6899, "step": 16672 }, { "epoch": 0.30476904235289815, "grad_norm": 6.777519905813917, "learning_rate": 8.147623146820272e-06, "loss": 17.7548, "step": 16673 }, { "epoch": 0.3047873215493447, "grad_norm": 7.441594029204132, "learning_rate": 8.147393146103224e-06, "loss": 17.8708, "step": 16674 }, { "epoch": 0.3048056007457912, "grad_norm": 6.356874956270289, "learning_rate": 8.147163134354836e-06, "loss": 17.5734, "step": 16675 }, { "epoch": 0.30482387994223775, "grad_norm": 7.140009436584371, "learning_rate": 8.146933111575915e-06, "loss": 17.7291, "step": 16676 }, { "epoch": 0.3048421591386843, "grad_norm": 6.992353068374214, "learning_rate": 8.146703077767265e-06, "loss": 17.9955, "step": 16677 }, { "epoch": 0.30486043833513077, "grad_norm": 6.299840670557331, "learning_rate": 8.146473032929693e-06, "loss": 17.4187, "step": 16678 }, { "epoch": 0.3048787175315773, "grad_norm": 5.890386436433404, "learning_rate": 8.146242977064009e-06, "loss": 17.3146, "step": 16679 }, { "epoch": 0.30489699672802384, "grad_norm": 7.095378126234456, "learning_rate": 8.146012910171014e-06, "loss": 17.591, "step": 16680 }, { "epoch": 0.3049152759244704, "grad_norm": 5.414916195517052, "learning_rate": 8.14578283225152e-06, "loss": 17.2094, "step": 16681 }, { "epoch": 0.3049335551209169, "grad_norm": 8.092887418734362, "learning_rate": 8.145552743306327e-06, "loss": 17.7922, "step": 16682 }, { "epoch": 0.3049518343173634, "grad_norm": 7.124788941677222, "learning_rate": 8.145322643336245e-06, "loss": 17.6115, "step": 16683 }, { "epoch": 0.3049701135138099, "grad_norm": 6.079544236882442, "learning_rate": 8.14509253234208e-06, "loss": 17.7165, "step": 16684 }, { "epoch": 0.30498839271025646, "grad_norm": 7.861638541511168, "learning_rate": 8.14486241032464e-06, "loss": 17.9554, "step": 16685 }, { "epoch": 0.305006671906703, "grad_norm": 7.415013676567563, "learning_rate": 8.14463227728473e-06, "loss": 17.8059, "step": 16686 }, { "epoch": 0.30502495110314953, "grad_norm": 6.172542416710036, "learning_rate": 8.144402133223155e-06, "loss": 17.3136, "step": 16687 }, { "epoch": 0.305043230299596, "grad_norm": 7.380340395382709, "learning_rate": 8.144171978140725e-06, "loss": 17.1493, "step": 16688 }, { "epoch": 0.30506150949604255, "grad_norm": 6.811544579927895, "learning_rate": 8.143941812038244e-06, "loss": 17.8914, "step": 16689 }, { "epoch": 0.3050797886924891, "grad_norm": 6.0034266004955095, "learning_rate": 8.14371163491652e-06, "loss": 17.4623, "step": 16690 }, { "epoch": 0.3050980678889356, "grad_norm": 6.050861975044013, "learning_rate": 8.14348144677636e-06, "loss": 17.5379, "step": 16691 }, { "epoch": 0.3051163470853821, "grad_norm": 5.647014412074355, "learning_rate": 8.14325124761857e-06, "loss": 17.3413, "step": 16692 }, { "epoch": 0.30513462628182864, "grad_norm": 7.545296878365928, "learning_rate": 8.143021037443956e-06, "loss": 17.9563, "step": 16693 }, { "epoch": 0.30515290547827517, "grad_norm": 6.106851345102743, "learning_rate": 8.142790816253327e-06, "loss": 17.6058, "step": 16694 }, { "epoch": 0.3051711846747217, "grad_norm": 6.0532280414861015, "learning_rate": 8.14256058404749e-06, "loss": 17.3859, "step": 16695 }, { "epoch": 0.30518946387116824, "grad_norm": 5.322284477396805, "learning_rate": 8.14233034082725e-06, "loss": 17.4024, "step": 16696 }, { "epoch": 0.3052077430676147, "grad_norm": 8.295172495342939, "learning_rate": 8.142100086593414e-06, "loss": 18.5029, "step": 16697 }, { "epoch": 0.30522602226406126, "grad_norm": 8.014620110297049, "learning_rate": 8.141869821346791e-06, "loss": 18.4967, "step": 16698 }, { "epoch": 0.3052443014605078, "grad_norm": 7.109719991357483, "learning_rate": 8.141639545088189e-06, "loss": 17.7804, "step": 16699 }, { "epoch": 0.30526258065695433, "grad_norm": 7.570967708371013, "learning_rate": 8.141409257818409e-06, "loss": 17.7589, "step": 16700 }, { "epoch": 0.30528085985340087, "grad_norm": 5.743573043057227, "learning_rate": 8.141178959538263e-06, "loss": 17.3379, "step": 16701 }, { "epoch": 0.30529913904984735, "grad_norm": 5.60616088144606, "learning_rate": 8.140948650248559e-06, "loss": 17.2385, "step": 16702 }, { "epoch": 0.3053174182462939, "grad_norm": 5.142308309232406, "learning_rate": 8.140718329950101e-06, "loss": 17.0075, "step": 16703 }, { "epoch": 0.3053356974427404, "grad_norm": 6.547997382321389, "learning_rate": 8.140487998643699e-06, "loss": 17.8636, "step": 16704 }, { "epoch": 0.30535397663918695, "grad_norm": 5.658739944947381, "learning_rate": 8.140257656330159e-06, "loss": 17.2068, "step": 16705 }, { "epoch": 0.3053722558356335, "grad_norm": 6.732443229239918, "learning_rate": 8.140027303010288e-06, "loss": 17.6035, "step": 16706 }, { "epoch": 0.30539053503207997, "grad_norm": 7.543038726133314, "learning_rate": 8.139796938684892e-06, "loss": 18.0054, "step": 16707 }, { "epoch": 0.3054088142285265, "grad_norm": 5.737792449591821, "learning_rate": 8.139566563354782e-06, "loss": 17.1012, "step": 16708 }, { "epoch": 0.30542709342497304, "grad_norm": 5.283208433592343, "learning_rate": 8.139336177020765e-06, "loss": 17.0553, "step": 16709 }, { "epoch": 0.3054453726214196, "grad_norm": 4.807319283842524, "learning_rate": 8.139105779683645e-06, "loss": 16.9905, "step": 16710 }, { "epoch": 0.3054636518178661, "grad_norm": 7.485844959479744, "learning_rate": 8.138875371344232e-06, "loss": 17.9085, "step": 16711 }, { "epoch": 0.3054819310143126, "grad_norm": 5.759929392525538, "learning_rate": 8.138644952003334e-06, "loss": 17.2418, "step": 16712 }, { "epoch": 0.3055002102107591, "grad_norm": 6.841986283055872, "learning_rate": 8.138414521661758e-06, "loss": 17.6657, "step": 16713 }, { "epoch": 0.30551848940720566, "grad_norm": 7.699958157519718, "learning_rate": 8.13818408032031e-06, "loss": 18.2383, "step": 16714 }, { "epoch": 0.3055367686036522, "grad_norm": 5.396068550385644, "learning_rate": 8.1379536279798e-06, "loss": 17.256, "step": 16715 }, { "epoch": 0.30555504780009873, "grad_norm": 7.920276591533406, "learning_rate": 8.137723164641034e-06, "loss": 18.6347, "step": 16716 }, { "epoch": 0.3055733269965452, "grad_norm": 7.267905648980075, "learning_rate": 8.137492690304823e-06, "loss": 17.924, "step": 16717 }, { "epoch": 0.30559160619299175, "grad_norm": 5.882535555137481, "learning_rate": 8.13726220497197e-06, "loss": 17.3439, "step": 16718 }, { "epoch": 0.3056098853894383, "grad_norm": 7.564212895553327, "learning_rate": 8.137031708643283e-06, "loss": 17.7634, "step": 16719 }, { "epoch": 0.3056281645858848, "grad_norm": 8.278958609025626, "learning_rate": 8.136801201319578e-06, "loss": 17.9191, "step": 16720 }, { "epoch": 0.30564644378233136, "grad_norm": 5.837226528619861, "learning_rate": 8.136570683001652e-06, "loss": 17.2523, "step": 16721 }, { "epoch": 0.30566472297877784, "grad_norm": 7.32327415796826, "learning_rate": 8.136340153690321e-06, "loss": 17.8673, "step": 16722 }, { "epoch": 0.30568300217522437, "grad_norm": 10.108315129156512, "learning_rate": 8.13610961338639e-06, "loss": 18.5179, "step": 16723 }, { "epoch": 0.3057012813716709, "grad_norm": 6.933209439932622, "learning_rate": 8.135879062090663e-06, "loss": 17.9609, "step": 16724 }, { "epoch": 0.30571956056811744, "grad_norm": 7.230965566714261, "learning_rate": 8.135648499803956e-06, "loss": 17.7497, "step": 16725 }, { "epoch": 0.3057378397645639, "grad_norm": 6.254464458688985, "learning_rate": 8.135417926527072e-06, "loss": 17.5044, "step": 16726 }, { "epoch": 0.30575611896101046, "grad_norm": 6.747843528117833, "learning_rate": 8.135187342260819e-06, "loss": 17.9859, "step": 16727 }, { "epoch": 0.305774398157457, "grad_norm": 6.7380015745969875, "learning_rate": 8.134956747006009e-06, "loss": 17.922, "step": 16728 }, { "epoch": 0.30579267735390353, "grad_norm": 6.965720999503236, "learning_rate": 8.134726140763445e-06, "loss": 17.6706, "step": 16729 }, { "epoch": 0.30581095655035007, "grad_norm": 5.699536573914778, "learning_rate": 8.134495523533939e-06, "loss": 17.3331, "step": 16730 }, { "epoch": 0.30582923574679655, "grad_norm": 7.264395494959649, "learning_rate": 8.134264895318298e-06, "loss": 17.9404, "step": 16731 }, { "epoch": 0.3058475149432431, "grad_norm": 10.733387802977195, "learning_rate": 8.134034256117332e-06, "loss": 17.9636, "step": 16732 }, { "epoch": 0.3058657941396896, "grad_norm": 5.704574170589308, "learning_rate": 8.133803605931847e-06, "loss": 17.1899, "step": 16733 }, { "epoch": 0.30588407333613615, "grad_norm": 6.687750961798256, "learning_rate": 8.133572944762651e-06, "loss": 17.6118, "step": 16734 }, { "epoch": 0.3059023525325827, "grad_norm": 8.285488445118865, "learning_rate": 8.133342272610553e-06, "loss": 17.1306, "step": 16735 }, { "epoch": 0.30592063172902917, "grad_norm": 6.001814260037967, "learning_rate": 8.133111589476366e-06, "loss": 17.4356, "step": 16736 }, { "epoch": 0.3059389109254757, "grad_norm": 7.99769116871961, "learning_rate": 8.132880895360893e-06, "loss": 18.3703, "step": 16737 }, { "epoch": 0.30595719012192224, "grad_norm": 7.802336244065987, "learning_rate": 8.132650190264944e-06, "loss": 18.3243, "step": 16738 }, { "epoch": 0.3059754693183688, "grad_norm": 6.642160089090536, "learning_rate": 8.132419474189328e-06, "loss": 17.7333, "step": 16739 }, { "epoch": 0.3059937485148153, "grad_norm": 7.335260573506195, "learning_rate": 8.132188747134852e-06, "loss": 17.6557, "step": 16740 }, { "epoch": 0.3060120277112618, "grad_norm": 6.055280801158583, "learning_rate": 8.131958009102327e-06, "loss": 17.3898, "step": 16741 }, { "epoch": 0.3060303069077083, "grad_norm": 5.184181120365097, "learning_rate": 8.131727260092564e-06, "loss": 16.957, "step": 16742 }, { "epoch": 0.30604858610415486, "grad_norm": 7.012052898024959, "learning_rate": 8.131496500106366e-06, "loss": 18.0213, "step": 16743 }, { "epoch": 0.3060668653006014, "grad_norm": 7.012947203763594, "learning_rate": 8.131265729144544e-06, "loss": 17.6844, "step": 16744 }, { "epoch": 0.30608514449704793, "grad_norm": 6.594685830215474, "learning_rate": 8.131034947207909e-06, "loss": 17.6969, "step": 16745 }, { "epoch": 0.3061034236934944, "grad_norm": 7.3559014298241046, "learning_rate": 8.130804154297268e-06, "loss": 17.7295, "step": 16746 }, { "epoch": 0.30612170288994095, "grad_norm": 7.191641221787167, "learning_rate": 8.130573350413428e-06, "loss": 18.1313, "step": 16747 }, { "epoch": 0.3061399820863875, "grad_norm": 6.974589058037992, "learning_rate": 8.130342535557202e-06, "loss": 17.8844, "step": 16748 }, { "epoch": 0.306158261282834, "grad_norm": 6.325822738509391, "learning_rate": 8.130111709729396e-06, "loss": 17.3803, "step": 16749 }, { "epoch": 0.30617654047928056, "grad_norm": 5.97383920703589, "learning_rate": 8.129880872930822e-06, "loss": 17.5041, "step": 16750 }, { "epoch": 0.30619481967572704, "grad_norm": 5.784921630829452, "learning_rate": 8.129650025162285e-06, "loss": 17.3294, "step": 16751 }, { "epoch": 0.3062130988721736, "grad_norm": 5.714783397676278, "learning_rate": 8.129419166424597e-06, "loss": 17.3295, "step": 16752 }, { "epoch": 0.3062313780686201, "grad_norm": 5.472971448192495, "learning_rate": 8.129188296718566e-06, "loss": 17.1211, "step": 16753 }, { "epoch": 0.30624965726506664, "grad_norm": 8.371551177531051, "learning_rate": 8.128957416045003e-06, "loss": 17.8683, "step": 16754 }, { "epoch": 0.3062679364615132, "grad_norm": 6.703373792499423, "learning_rate": 8.128726524404715e-06, "loss": 17.5168, "step": 16755 }, { "epoch": 0.30628621565795966, "grad_norm": 6.402467016581645, "learning_rate": 8.128495621798511e-06, "loss": 17.5197, "step": 16756 }, { "epoch": 0.3063044948544062, "grad_norm": 6.957447476972262, "learning_rate": 8.128264708227203e-06, "loss": 17.8857, "step": 16757 }, { "epoch": 0.30632277405085273, "grad_norm": 7.614934722454362, "learning_rate": 8.128033783691598e-06, "loss": 18.1989, "step": 16758 }, { "epoch": 0.30634105324729927, "grad_norm": 6.2255533879289615, "learning_rate": 8.127802848192506e-06, "loss": 17.4933, "step": 16759 }, { "epoch": 0.30635933244374575, "grad_norm": 6.811878880010115, "learning_rate": 8.127571901730736e-06, "loss": 17.951, "step": 16760 }, { "epoch": 0.3063776116401923, "grad_norm": 6.885163504999443, "learning_rate": 8.127340944307099e-06, "loss": 17.8237, "step": 16761 }, { "epoch": 0.3063958908366388, "grad_norm": 6.169290540256493, "learning_rate": 8.127109975922402e-06, "loss": 17.4573, "step": 16762 }, { "epoch": 0.30641417003308535, "grad_norm": 6.963155307268487, "learning_rate": 8.126878996577456e-06, "loss": 17.8927, "step": 16763 }, { "epoch": 0.3064324492295319, "grad_norm": 7.110075754402128, "learning_rate": 8.12664800627307e-06, "loss": 18.0326, "step": 16764 }, { "epoch": 0.30645072842597837, "grad_norm": 7.334729002026811, "learning_rate": 8.126417005010056e-06, "loss": 17.9197, "step": 16765 }, { "epoch": 0.3064690076224249, "grad_norm": 5.815015540294881, "learning_rate": 8.12618599278922e-06, "loss": 17.5021, "step": 16766 }, { "epoch": 0.30648728681887144, "grad_norm": 6.733952977149314, "learning_rate": 8.125954969611373e-06, "loss": 17.5858, "step": 16767 }, { "epoch": 0.306505566015318, "grad_norm": 6.286434035949967, "learning_rate": 8.125723935477328e-06, "loss": 17.5503, "step": 16768 }, { "epoch": 0.3065238452117645, "grad_norm": 7.330028539524488, "learning_rate": 8.12549289038789e-06, "loss": 17.8128, "step": 16769 }, { "epoch": 0.306542124408211, "grad_norm": 6.148307060165565, "learning_rate": 8.12526183434387e-06, "loss": 17.4417, "step": 16770 }, { "epoch": 0.3065604036046575, "grad_norm": 6.539274763971553, "learning_rate": 8.125030767346081e-06, "loss": 17.5556, "step": 16771 }, { "epoch": 0.30657868280110406, "grad_norm": 7.796567227685249, "learning_rate": 8.124799689395328e-06, "loss": 18.2581, "step": 16772 }, { "epoch": 0.3065969619975506, "grad_norm": 6.881588784470505, "learning_rate": 8.124568600492421e-06, "loss": 17.586, "step": 16773 }, { "epoch": 0.30661524119399713, "grad_norm": 6.720588951388159, "learning_rate": 8.124337500638175e-06, "loss": 17.687, "step": 16774 }, { "epoch": 0.3066335203904436, "grad_norm": 6.651211194075465, "learning_rate": 8.124106389833397e-06, "loss": 17.6702, "step": 16775 }, { "epoch": 0.30665179958689015, "grad_norm": 5.27799086682439, "learning_rate": 8.123875268078898e-06, "loss": 16.9764, "step": 16776 }, { "epoch": 0.3066700787833367, "grad_norm": 5.655025221938781, "learning_rate": 8.123644135375487e-06, "loss": 17.2942, "step": 16777 }, { "epoch": 0.3066883579797832, "grad_norm": 9.086716992701747, "learning_rate": 8.123412991723975e-06, "loss": 18.2968, "step": 16778 }, { "epoch": 0.30670663717622976, "grad_norm": 7.865888714452238, "learning_rate": 8.123181837125169e-06, "loss": 18.2855, "step": 16779 }, { "epoch": 0.30672491637267624, "grad_norm": 6.064853050171648, "learning_rate": 8.122950671579884e-06, "loss": 17.5467, "step": 16780 }, { "epoch": 0.3067431955691228, "grad_norm": 5.548607974608034, "learning_rate": 8.122719495088926e-06, "loss": 17.1425, "step": 16781 }, { "epoch": 0.3067614747655693, "grad_norm": 5.511890499092476, "learning_rate": 8.12248830765311e-06, "loss": 17.3113, "step": 16782 }, { "epoch": 0.30677975396201584, "grad_norm": 5.745980862167241, "learning_rate": 8.12225710927324e-06, "loss": 17.3077, "step": 16783 }, { "epoch": 0.3067980331584624, "grad_norm": 6.277237563776308, "learning_rate": 8.12202589995013e-06, "loss": 17.7336, "step": 16784 }, { "epoch": 0.30681631235490886, "grad_norm": 6.482625000784745, "learning_rate": 8.121794679684593e-06, "loss": 17.507, "step": 16785 }, { "epoch": 0.3068345915513554, "grad_norm": 6.1322004915697095, "learning_rate": 8.121563448477434e-06, "loss": 17.5198, "step": 16786 }, { "epoch": 0.30685287074780193, "grad_norm": 7.408403820256594, "learning_rate": 8.121332206329468e-06, "loss": 17.9982, "step": 16787 }, { "epoch": 0.30687114994424847, "grad_norm": 6.359961007502059, "learning_rate": 8.121100953241501e-06, "loss": 17.3535, "step": 16788 }, { "epoch": 0.306889429140695, "grad_norm": 6.706687458532863, "learning_rate": 8.120869689214349e-06, "loss": 17.4008, "step": 16789 }, { "epoch": 0.3069077083371415, "grad_norm": 6.510804917349421, "learning_rate": 8.120638414248819e-06, "loss": 17.6156, "step": 16790 }, { "epoch": 0.306925987533588, "grad_norm": 6.008520202802392, "learning_rate": 8.12040712834572e-06, "loss": 17.4768, "step": 16791 }, { "epoch": 0.30694426673003455, "grad_norm": 6.809351771785792, "learning_rate": 8.120175831505865e-06, "loss": 17.5423, "step": 16792 }, { "epoch": 0.3069625459264811, "grad_norm": 8.908918303842555, "learning_rate": 8.119944523730065e-06, "loss": 18.2396, "step": 16793 }, { "epoch": 0.30698082512292757, "grad_norm": 6.16648566698405, "learning_rate": 8.119713205019131e-06, "loss": 17.4055, "step": 16794 }, { "epoch": 0.3069991043193741, "grad_norm": 6.690468500161972, "learning_rate": 8.119481875373874e-06, "loss": 17.5125, "step": 16795 }, { "epoch": 0.30701738351582064, "grad_norm": 7.690193087772361, "learning_rate": 8.1192505347951e-06, "loss": 18.2434, "step": 16796 }, { "epoch": 0.3070356627122672, "grad_norm": 6.21081997330213, "learning_rate": 8.119019183283627e-06, "loss": 17.406, "step": 16797 }, { "epoch": 0.3070539419087137, "grad_norm": 6.7458423446584534, "learning_rate": 8.118787820840261e-06, "loss": 17.6053, "step": 16798 }, { "epoch": 0.3070722211051602, "grad_norm": 6.222093098054909, "learning_rate": 8.118556447465815e-06, "loss": 17.4989, "step": 16799 }, { "epoch": 0.30709050030160673, "grad_norm": 7.178904037819462, "learning_rate": 8.118325063161099e-06, "loss": 17.8731, "step": 16800 }, { "epoch": 0.30710877949805326, "grad_norm": 7.297540371704384, "learning_rate": 8.118093667926923e-06, "loss": 17.6484, "step": 16801 }, { "epoch": 0.3071270586944998, "grad_norm": 6.5991204891772135, "learning_rate": 8.1178622617641e-06, "loss": 17.5603, "step": 16802 }, { "epoch": 0.30714533789094633, "grad_norm": 8.665284978980097, "learning_rate": 8.11763084467344e-06, "loss": 17.5106, "step": 16803 }, { "epoch": 0.3071636170873928, "grad_norm": 6.996091273311296, "learning_rate": 8.117399416655758e-06, "loss": 17.8823, "step": 16804 }, { "epoch": 0.30718189628383935, "grad_norm": 6.669947849826685, "learning_rate": 8.117167977711858e-06, "loss": 17.6966, "step": 16805 }, { "epoch": 0.3072001754802859, "grad_norm": 7.306246721884619, "learning_rate": 8.116936527842556e-06, "loss": 17.6553, "step": 16806 }, { "epoch": 0.3072184546767324, "grad_norm": 6.424525242590785, "learning_rate": 8.11670506704866e-06, "loss": 17.3905, "step": 16807 }, { "epoch": 0.30723673387317896, "grad_norm": 6.4793656402982025, "learning_rate": 8.116473595330985e-06, "loss": 17.6348, "step": 16808 }, { "epoch": 0.30725501306962544, "grad_norm": 7.7622202772727755, "learning_rate": 8.116242112690341e-06, "loss": 18.0591, "step": 16809 }, { "epoch": 0.307273292266072, "grad_norm": 5.689732872215455, "learning_rate": 8.116010619127537e-06, "loss": 17.0951, "step": 16810 }, { "epoch": 0.3072915714625185, "grad_norm": 7.211639722715736, "learning_rate": 8.115779114643386e-06, "loss": 17.9286, "step": 16811 }, { "epoch": 0.30730985065896504, "grad_norm": 5.708995603719906, "learning_rate": 8.1155475992387e-06, "loss": 17.0867, "step": 16812 }, { "epoch": 0.3073281298554116, "grad_norm": 6.614224500834429, "learning_rate": 8.115316072914292e-06, "loss": 17.563, "step": 16813 }, { "epoch": 0.30734640905185806, "grad_norm": 5.890866472730227, "learning_rate": 8.11508453567097e-06, "loss": 17.3403, "step": 16814 }, { "epoch": 0.3073646882483046, "grad_norm": 10.282356418660505, "learning_rate": 8.114852987509546e-06, "loss": 18.3379, "step": 16815 }, { "epoch": 0.30738296744475113, "grad_norm": 6.636957431138423, "learning_rate": 8.114621428430834e-06, "loss": 17.5146, "step": 16816 }, { "epoch": 0.30740124664119767, "grad_norm": 8.012350806778004, "learning_rate": 8.114389858435643e-06, "loss": 18.3006, "step": 16817 }, { "epoch": 0.3074195258376442, "grad_norm": 5.57934032195251, "learning_rate": 8.114158277524788e-06, "loss": 17.2041, "step": 16818 }, { "epoch": 0.3074378050340907, "grad_norm": 5.903979757135749, "learning_rate": 8.113926685699076e-06, "loss": 17.2361, "step": 16819 }, { "epoch": 0.3074560842305372, "grad_norm": 5.890769605037617, "learning_rate": 8.113695082959323e-06, "loss": 17.2154, "step": 16820 }, { "epoch": 0.30747436342698375, "grad_norm": 6.167782280137336, "learning_rate": 8.113463469306338e-06, "loss": 17.605, "step": 16821 }, { "epoch": 0.3074926426234303, "grad_norm": 6.041483161343982, "learning_rate": 8.113231844740934e-06, "loss": 17.276, "step": 16822 }, { "epoch": 0.3075109218198768, "grad_norm": 6.384100256896727, "learning_rate": 8.113000209263923e-06, "loss": 17.6484, "step": 16823 }, { "epoch": 0.3075292010163233, "grad_norm": 7.4010222801699, "learning_rate": 8.112768562876115e-06, "loss": 17.938, "step": 16824 }, { "epoch": 0.30754748021276984, "grad_norm": 5.227382045976373, "learning_rate": 8.112536905578324e-06, "loss": 17.0111, "step": 16825 }, { "epoch": 0.3075657594092164, "grad_norm": 6.248957610211808, "learning_rate": 8.112305237371363e-06, "loss": 17.6124, "step": 16826 }, { "epoch": 0.3075840386056629, "grad_norm": 8.505216408345236, "learning_rate": 8.11207355825604e-06, "loss": 18.6358, "step": 16827 }, { "epoch": 0.3076023178021094, "grad_norm": 5.564588254452631, "learning_rate": 8.111841868233169e-06, "loss": 17.1388, "step": 16828 }, { "epoch": 0.30762059699855593, "grad_norm": 6.53574604524088, "learning_rate": 8.111610167303564e-06, "loss": 17.4384, "step": 16829 }, { "epoch": 0.30763887619500246, "grad_norm": 5.688050011488321, "learning_rate": 8.111378455468033e-06, "loss": 17.2404, "step": 16830 }, { "epoch": 0.307657155391449, "grad_norm": 7.501892039648677, "learning_rate": 8.111146732727393e-06, "loss": 18.3043, "step": 16831 }, { "epoch": 0.30767543458789554, "grad_norm": 7.138462604852312, "learning_rate": 8.110914999082453e-06, "loss": 18.0898, "step": 16832 }, { "epoch": 0.307693713784342, "grad_norm": 7.377713723181878, "learning_rate": 8.110683254534026e-06, "loss": 18.0345, "step": 16833 }, { "epoch": 0.30771199298078855, "grad_norm": 6.676124166974646, "learning_rate": 8.110451499082923e-06, "loss": 17.518, "step": 16834 }, { "epoch": 0.3077302721772351, "grad_norm": 6.495347737747358, "learning_rate": 8.110219732729958e-06, "loss": 17.639, "step": 16835 }, { "epoch": 0.3077485513736816, "grad_norm": 7.532118493682625, "learning_rate": 8.109987955475943e-06, "loss": 18.0588, "step": 16836 }, { "epoch": 0.30776683057012816, "grad_norm": 6.460262745617369, "learning_rate": 8.10975616732169e-06, "loss": 17.4713, "step": 16837 }, { "epoch": 0.30778510976657464, "grad_norm": 6.988802108291433, "learning_rate": 8.109524368268011e-06, "loss": 17.7515, "step": 16838 }, { "epoch": 0.3078033889630212, "grad_norm": 7.09823848055091, "learning_rate": 8.10929255831572e-06, "loss": 17.3706, "step": 16839 }, { "epoch": 0.3078216681594677, "grad_norm": 6.16473466216064, "learning_rate": 8.109060737465628e-06, "loss": 17.6117, "step": 16840 }, { "epoch": 0.30783994735591425, "grad_norm": 6.333954307177859, "learning_rate": 8.108828905718547e-06, "loss": 17.2888, "step": 16841 }, { "epoch": 0.3078582265523608, "grad_norm": 7.885699569862376, "learning_rate": 8.10859706307529e-06, "loss": 18.4863, "step": 16842 }, { "epoch": 0.30787650574880726, "grad_norm": 6.5957527771697055, "learning_rate": 8.108365209536672e-06, "loss": 17.7022, "step": 16843 }, { "epoch": 0.3078947849452538, "grad_norm": 6.928343747044289, "learning_rate": 8.108133345103505e-06, "loss": 17.6217, "step": 16844 }, { "epoch": 0.30791306414170033, "grad_norm": 5.832568707890773, "learning_rate": 8.107901469776595e-06, "loss": 17.4022, "step": 16845 }, { "epoch": 0.30793134333814687, "grad_norm": 6.3452969762109435, "learning_rate": 8.107669583556763e-06, "loss": 17.3455, "step": 16846 }, { "epoch": 0.3079496225345934, "grad_norm": 7.294542601090386, "learning_rate": 8.10743768644482e-06, "loss": 18.1352, "step": 16847 }, { "epoch": 0.3079679017310399, "grad_norm": 6.319800452694817, "learning_rate": 8.107205778441576e-06, "loss": 17.7499, "step": 16848 }, { "epoch": 0.3079861809274864, "grad_norm": 5.88348935253275, "learning_rate": 8.106973859547847e-06, "loss": 17.2997, "step": 16849 }, { "epoch": 0.30800446012393295, "grad_norm": 6.2635753404745325, "learning_rate": 8.106741929764443e-06, "loss": 17.7991, "step": 16850 }, { "epoch": 0.3080227393203795, "grad_norm": 5.887526511443674, "learning_rate": 8.106509989092179e-06, "loss": 17.331, "step": 16851 }, { "epoch": 0.308041018516826, "grad_norm": 5.916620641359955, "learning_rate": 8.106278037531864e-06, "loss": 17.3204, "step": 16852 }, { "epoch": 0.3080592977132725, "grad_norm": 9.593246848857893, "learning_rate": 8.106046075084317e-06, "loss": 18.2151, "step": 16853 }, { "epoch": 0.30807757690971904, "grad_norm": 7.7937880274598745, "learning_rate": 8.105814101750349e-06, "loss": 17.7964, "step": 16854 }, { "epoch": 0.3080958561061656, "grad_norm": 6.027667395664885, "learning_rate": 8.10558211753077e-06, "loss": 17.1767, "step": 16855 }, { "epoch": 0.3081141353026121, "grad_norm": 6.311521404106981, "learning_rate": 8.105350122426393e-06, "loss": 17.51, "step": 16856 }, { "epoch": 0.30813241449905865, "grad_norm": 6.497565444424784, "learning_rate": 8.105118116438037e-06, "loss": 17.3181, "step": 16857 }, { "epoch": 0.30815069369550513, "grad_norm": 6.621864204055428, "learning_rate": 8.104886099566511e-06, "loss": 17.764, "step": 16858 }, { "epoch": 0.30816897289195166, "grad_norm": 7.942768946301585, "learning_rate": 8.104654071812629e-06, "loss": 17.9489, "step": 16859 }, { "epoch": 0.3081872520883982, "grad_norm": 7.0570350180681745, "learning_rate": 8.104422033177201e-06, "loss": 17.6731, "step": 16860 }, { "epoch": 0.30820553128484474, "grad_norm": 6.918981765212633, "learning_rate": 8.104189983661047e-06, "loss": 17.8462, "step": 16861 }, { "epoch": 0.3082238104812912, "grad_norm": 7.644666345858515, "learning_rate": 8.103957923264974e-06, "loss": 17.7347, "step": 16862 }, { "epoch": 0.30824208967773775, "grad_norm": 7.955988581314848, "learning_rate": 8.1037258519898e-06, "loss": 18.2606, "step": 16863 }, { "epoch": 0.3082603688741843, "grad_norm": 6.1781673866031745, "learning_rate": 8.103493769836332e-06, "loss": 17.3356, "step": 16864 }, { "epoch": 0.3082786480706308, "grad_norm": 8.316612046608348, "learning_rate": 8.103261676805392e-06, "loss": 18.5851, "step": 16865 }, { "epoch": 0.30829692726707736, "grad_norm": 7.471215637665413, "learning_rate": 8.103029572897787e-06, "loss": 18.0248, "step": 16866 }, { "epoch": 0.30831520646352384, "grad_norm": 5.831401656742476, "learning_rate": 8.102797458114332e-06, "loss": 17.2288, "step": 16867 }, { "epoch": 0.3083334856599704, "grad_norm": 6.277362015516548, "learning_rate": 8.102565332455843e-06, "loss": 17.3635, "step": 16868 }, { "epoch": 0.3083517648564169, "grad_norm": 7.113790971566835, "learning_rate": 8.102333195923131e-06, "loss": 17.7055, "step": 16869 }, { "epoch": 0.30837004405286345, "grad_norm": 6.126092734369032, "learning_rate": 8.10210104851701e-06, "loss": 17.5085, "step": 16870 }, { "epoch": 0.30838832324931, "grad_norm": 6.734911255508399, "learning_rate": 8.101868890238294e-06, "loss": 17.8684, "step": 16871 }, { "epoch": 0.30840660244575646, "grad_norm": 6.176690289964088, "learning_rate": 8.101636721087799e-06, "loss": 17.6733, "step": 16872 }, { "epoch": 0.308424881642203, "grad_norm": 6.466825704602649, "learning_rate": 8.101404541066331e-06, "loss": 18.0116, "step": 16873 }, { "epoch": 0.30844316083864953, "grad_norm": 8.540871578912435, "learning_rate": 8.101172350174713e-06, "loss": 18.262, "step": 16874 }, { "epoch": 0.30846144003509607, "grad_norm": 6.476487326471872, "learning_rate": 8.100940148413755e-06, "loss": 17.787, "step": 16875 }, { "epoch": 0.3084797192315426, "grad_norm": 6.543371046583851, "learning_rate": 8.100707935784271e-06, "loss": 17.5618, "step": 16876 }, { "epoch": 0.3084979984279891, "grad_norm": 5.191648147949043, "learning_rate": 8.100475712287074e-06, "loss": 17.1827, "step": 16877 }, { "epoch": 0.3085162776244356, "grad_norm": 8.132230026553794, "learning_rate": 8.10024347792298e-06, "loss": 18.4899, "step": 16878 }, { "epoch": 0.30853455682088216, "grad_norm": 7.1031659533241465, "learning_rate": 8.100011232692799e-06, "loss": 17.8651, "step": 16879 }, { "epoch": 0.3085528360173287, "grad_norm": 7.425041626183908, "learning_rate": 8.09977897659735e-06, "loss": 17.7215, "step": 16880 }, { "epoch": 0.3085711152137752, "grad_norm": 7.556925177089032, "learning_rate": 8.099546709637444e-06, "loss": 18.1119, "step": 16881 }, { "epoch": 0.3085893944102217, "grad_norm": 6.519700635836287, "learning_rate": 8.099314431813895e-06, "loss": 17.8108, "step": 16882 }, { "epoch": 0.30860767360666824, "grad_norm": 6.792229799249035, "learning_rate": 8.099082143127518e-06, "loss": 17.4976, "step": 16883 }, { "epoch": 0.3086259528031148, "grad_norm": 5.103482726645891, "learning_rate": 8.098849843579128e-06, "loss": 17.0982, "step": 16884 }, { "epoch": 0.3086442319995613, "grad_norm": 6.815483530135922, "learning_rate": 8.098617533169538e-06, "loss": 17.8734, "step": 16885 }, { "epoch": 0.30866251119600785, "grad_norm": 6.605615636643606, "learning_rate": 8.098385211899562e-06, "loss": 17.7568, "step": 16886 }, { "epoch": 0.30868079039245433, "grad_norm": 6.773876397373886, "learning_rate": 8.098152879770015e-06, "loss": 17.5579, "step": 16887 }, { "epoch": 0.30869906958890087, "grad_norm": 6.326966428213546, "learning_rate": 8.09792053678171e-06, "loss": 17.5555, "step": 16888 }, { "epoch": 0.3087173487853474, "grad_norm": 7.177618927533809, "learning_rate": 8.097688182935463e-06, "loss": 17.9881, "step": 16889 }, { "epoch": 0.30873562798179394, "grad_norm": 6.649670489629989, "learning_rate": 8.097455818232089e-06, "loss": 17.6745, "step": 16890 }, { "epoch": 0.30875390717824047, "grad_norm": 7.538466543305304, "learning_rate": 8.097223442672399e-06, "loss": 17.9156, "step": 16891 }, { "epoch": 0.30877218637468695, "grad_norm": 7.372745503381064, "learning_rate": 8.096991056257212e-06, "loss": 17.7921, "step": 16892 }, { "epoch": 0.3087904655711335, "grad_norm": 7.89383907410312, "learning_rate": 8.096758658987339e-06, "loss": 18.3288, "step": 16893 }, { "epoch": 0.30880874476758, "grad_norm": 6.242170963309946, "learning_rate": 8.096526250863594e-06, "loss": 17.5095, "step": 16894 }, { "epoch": 0.30882702396402656, "grad_norm": 6.301612936572135, "learning_rate": 8.096293831886795e-06, "loss": 17.3264, "step": 16895 }, { "epoch": 0.30884530316047304, "grad_norm": 5.8358432367779836, "learning_rate": 8.096061402057755e-06, "loss": 17.351, "step": 16896 }, { "epoch": 0.3088635823569196, "grad_norm": 5.0439347738848, "learning_rate": 8.095828961377287e-06, "loss": 16.9685, "step": 16897 }, { "epoch": 0.3088818615533661, "grad_norm": 5.497715837812876, "learning_rate": 8.095596509846209e-06, "loss": 17.1605, "step": 16898 }, { "epoch": 0.30890014074981265, "grad_norm": 6.384977825418099, "learning_rate": 8.095364047465333e-06, "loss": 17.379, "step": 16899 }, { "epoch": 0.3089184199462592, "grad_norm": 6.976100335057817, "learning_rate": 8.095131574235473e-06, "loss": 17.9314, "step": 16900 }, { "epoch": 0.30893669914270566, "grad_norm": 6.306299971912668, "learning_rate": 8.094899090157447e-06, "loss": 17.5288, "step": 16901 }, { "epoch": 0.3089549783391522, "grad_norm": 10.584265253117026, "learning_rate": 8.094666595232067e-06, "loss": 17.918, "step": 16902 }, { "epoch": 0.30897325753559873, "grad_norm": 5.874533561464786, "learning_rate": 8.094434089460152e-06, "loss": 17.2669, "step": 16903 }, { "epoch": 0.30899153673204527, "grad_norm": 6.055407560537636, "learning_rate": 8.094201572842511e-06, "loss": 16.9438, "step": 16904 }, { "epoch": 0.3090098159284918, "grad_norm": 7.248798402285025, "learning_rate": 8.093969045379964e-06, "loss": 18.0245, "step": 16905 }, { "epoch": 0.3090280951249383, "grad_norm": 6.966026641683622, "learning_rate": 8.093736507073325e-06, "loss": 17.3831, "step": 16906 }, { "epoch": 0.3090463743213848, "grad_norm": 7.450958004208825, "learning_rate": 8.093503957923404e-06, "loss": 18.1105, "step": 16907 }, { "epoch": 0.30906465351783136, "grad_norm": 6.392306557549609, "learning_rate": 8.093271397931022e-06, "loss": 17.6111, "step": 16908 }, { "epoch": 0.3090829327142779, "grad_norm": 5.670538844079594, "learning_rate": 8.093038827096993e-06, "loss": 17.0552, "step": 16909 }, { "epoch": 0.3091012119107244, "grad_norm": 7.512646870486651, "learning_rate": 8.092806245422131e-06, "loss": 17.7541, "step": 16910 }, { "epoch": 0.3091194911071709, "grad_norm": 6.903661762816213, "learning_rate": 8.092573652907252e-06, "loss": 17.8163, "step": 16911 }, { "epoch": 0.30913777030361744, "grad_norm": 7.138344336056069, "learning_rate": 8.092341049553168e-06, "loss": 17.9178, "step": 16912 }, { "epoch": 0.309156049500064, "grad_norm": 6.014316542066885, "learning_rate": 8.0921084353607e-06, "loss": 17.5349, "step": 16913 }, { "epoch": 0.3091743286965105, "grad_norm": 6.103595161617867, "learning_rate": 8.091875810330658e-06, "loss": 17.3808, "step": 16914 }, { "epoch": 0.30919260789295705, "grad_norm": 5.635502138542043, "learning_rate": 8.09164317446386e-06, "loss": 17.4035, "step": 16915 }, { "epoch": 0.30921088708940353, "grad_norm": 6.6603190080422365, "learning_rate": 8.091410527761123e-06, "loss": 17.4741, "step": 16916 }, { "epoch": 0.30922916628585007, "grad_norm": 7.082998085470822, "learning_rate": 8.09117787022326e-06, "loss": 17.6634, "step": 16917 }, { "epoch": 0.3092474454822966, "grad_norm": 7.799159186861457, "learning_rate": 8.090945201851086e-06, "loss": 18.1, "step": 16918 }, { "epoch": 0.30926572467874314, "grad_norm": 7.639306056104836, "learning_rate": 8.090712522645417e-06, "loss": 17.9958, "step": 16919 }, { "epoch": 0.3092840038751897, "grad_norm": 6.07550528023007, "learning_rate": 8.090479832607069e-06, "loss": 17.4038, "step": 16920 }, { "epoch": 0.30930228307163615, "grad_norm": 7.2802138718397345, "learning_rate": 8.090247131736857e-06, "loss": 18.0411, "step": 16921 }, { "epoch": 0.3093205622680827, "grad_norm": 7.460016928913056, "learning_rate": 8.090014420035597e-06, "loss": 18.155, "step": 16922 }, { "epoch": 0.3093388414645292, "grad_norm": 6.719145207687469, "learning_rate": 8.089781697504105e-06, "loss": 17.7742, "step": 16923 }, { "epoch": 0.30935712066097576, "grad_norm": 6.374124792853929, "learning_rate": 8.089548964143196e-06, "loss": 17.6679, "step": 16924 }, { "epoch": 0.3093753998574223, "grad_norm": 6.882657522479925, "learning_rate": 8.089316219953687e-06, "loss": 17.5267, "step": 16925 }, { "epoch": 0.3093936790538688, "grad_norm": 7.636590366002228, "learning_rate": 8.089083464936392e-06, "loss": 17.5369, "step": 16926 }, { "epoch": 0.3094119582503153, "grad_norm": 6.799639629794637, "learning_rate": 8.088850699092127e-06, "loss": 17.92, "step": 16927 }, { "epoch": 0.30943023744676185, "grad_norm": 6.764508225258617, "learning_rate": 8.08861792242171e-06, "loss": 17.5539, "step": 16928 }, { "epoch": 0.3094485166432084, "grad_norm": 7.029403941083494, "learning_rate": 8.088385134925953e-06, "loss": 17.6364, "step": 16929 }, { "epoch": 0.30946679583965486, "grad_norm": 6.558432469824841, "learning_rate": 8.088152336605674e-06, "loss": 17.4582, "step": 16930 }, { "epoch": 0.3094850750361014, "grad_norm": 6.7557714165906955, "learning_rate": 8.08791952746169e-06, "loss": 17.7139, "step": 16931 }, { "epoch": 0.30950335423254793, "grad_norm": 6.897953373824674, "learning_rate": 8.087686707494817e-06, "loss": 17.8887, "step": 16932 }, { "epoch": 0.30952163342899447, "grad_norm": 6.63615805785581, "learning_rate": 8.087453876705868e-06, "loss": 17.7007, "step": 16933 }, { "epoch": 0.309539912625441, "grad_norm": 6.515605327772047, "learning_rate": 8.087221035095662e-06, "loss": 17.4209, "step": 16934 }, { "epoch": 0.3095581918218875, "grad_norm": 5.315636922119088, "learning_rate": 8.086988182665016e-06, "loss": 16.9972, "step": 16935 }, { "epoch": 0.309576471018334, "grad_norm": 6.8677499121840775, "learning_rate": 8.086755319414743e-06, "loss": 17.692, "step": 16936 }, { "epoch": 0.30959475021478056, "grad_norm": 7.371046058676606, "learning_rate": 8.08652244534566e-06, "loss": 18.0294, "step": 16937 }, { "epoch": 0.3096130294112271, "grad_norm": 7.992511094900772, "learning_rate": 8.086289560458583e-06, "loss": 17.8508, "step": 16938 }, { "epoch": 0.3096313086076736, "grad_norm": 6.380923803072536, "learning_rate": 8.086056664754328e-06, "loss": 17.919, "step": 16939 }, { "epoch": 0.3096495878041201, "grad_norm": 6.475349880439513, "learning_rate": 8.085823758233716e-06, "loss": 17.7402, "step": 16940 }, { "epoch": 0.30966786700056664, "grad_norm": 6.118767857981034, "learning_rate": 8.085590840897558e-06, "loss": 17.4519, "step": 16941 }, { "epoch": 0.3096861461970132, "grad_norm": 6.533876579597839, "learning_rate": 8.08535791274667e-06, "loss": 17.2101, "step": 16942 }, { "epoch": 0.3097044253934597, "grad_norm": 6.731862506967125, "learning_rate": 8.085124973781872e-06, "loss": 17.3575, "step": 16943 }, { "epoch": 0.30972270458990625, "grad_norm": 7.39391206942118, "learning_rate": 8.084892024003978e-06, "loss": 17.8833, "step": 16944 }, { "epoch": 0.30974098378635273, "grad_norm": 6.661848016390472, "learning_rate": 8.084659063413805e-06, "loss": 17.6427, "step": 16945 }, { "epoch": 0.30975926298279927, "grad_norm": 6.019351705212323, "learning_rate": 8.08442609201217e-06, "loss": 17.5766, "step": 16946 }, { "epoch": 0.3097775421792458, "grad_norm": 6.303805987317659, "learning_rate": 8.084193109799889e-06, "loss": 17.7708, "step": 16947 }, { "epoch": 0.30979582137569234, "grad_norm": 8.651229911106629, "learning_rate": 8.08396011677778e-06, "loss": 18.3284, "step": 16948 }, { "epoch": 0.3098141005721389, "grad_norm": 7.170146505652532, "learning_rate": 8.083727112946657e-06, "loss": 17.5075, "step": 16949 }, { "epoch": 0.30983237976858535, "grad_norm": 5.813648845249317, "learning_rate": 8.083494098307338e-06, "loss": 17.4887, "step": 16950 }, { "epoch": 0.3098506589650319, "grad_norm": 5.079115814545762, "learning_rate": 8.08326107286064e-06, "loss": 17.0961, "step": 16951 }, { "epoch": 0.3098689381614784, "grad_norm": 6.967427799136651, "learning_rate": 8.08302803660738e-06, "loss": 17.8371, "step": 16952 }, { "epoch": 0.30988721735792496, "grad_norm": 5.887776100616813, "learning_rate": 8.082794989548372e-06, "loss": 17.3094, "step": 16953 }, { "epoch": 0.3099054965543715, "grad_norm": 5.590935783414069, "learning_rate": 8.08256193168444e-06, "loss": 17.3391, "step": 16954 }, { "epoch": 0.309923775750818, "grad_norm": 5.9681028865587935, "learning_rate": 8.082328863016392e-06, "loss": 17.4024, "step": 16955 }, { "epoch": 0.3099420549472645, "grad_norm": 7.224395964372872, "learning_rate": 8.08209578354505e-06, "loss": 17.9612, "step": 16956 }, { "epoch": 0.30996033414371105, "grad_norm": 6.998488998901023, "learning_rate": 8.081862693271228e-06, "loss": 17.6941, "step": 16957 }, { "epoch": 0.3099786133401576, "grad_norm": 6.834681750089619, "learning_rate": 8.081629592195748e-06, "loss": 17.6734, "step": 16958 }, { "epoch": 0.3099968925366041, "grad_norm": 6.348617706162432, "learning_rate": 8.08139648031942e-06, "loss": 17.6922, "step": 16959 }, { "epoch": 0.3100151717330506, "grad_norm": 6.926850762857286, "learning_rate": 8.081163357643067e-06, "loss": 17.7738, "step": 16960 }, { "epoch": 0.31003345092949713, "grad_norm": 5.575484399543014, "learning_rate": 8.080930224167505e-06, "loss": 17.1479, "step": 16961 }, { "epoch": 0.31005173012594367, "grad_norm": 6.297593836898856, "learning_rate": 8.080697079893547e-06, "loss": 17.2397, "step": 16962 }, { "epoch": 0.3100700093223902, "grad_norm": 5.373837482153073, "learning_rate": 8.080463924822016e-06, "loss": 17.2421, "step": 16963 }, { "epoch": 0.3100882885188367, "grad_norm": 6.251755688410515, "learning_rate": 8.080230758953725e-06, "loss": 17.6207, "step": 16964 }, { "epoch": 0.3101065677152832, "grad_norm": 5.887693642675731, "learning_rate": 8.07999758228949e-06, "loss": 17.246, "step": 16965 }, { "epoch": 0.31012484691172976, "grad_norm": 6.622014302673911, "learning_rate": 8.079764394830132e-06, "loss": 17.7236, "step": 16966 }, { "epoch": 0.3101431261081763, "grad_norm": 6.26840513401978, "learning_rate": 8.079531196576468e-06, "loss": 17.4721, "step": 16967 }, { "epoch": 0.31016140530462283, "grad_norm": 7.617899115028561, "learning_rate": 8.079297987529315e-06, "loss": 18.0188, "step": 16968 }, { "epoch": 0.3101796845010693, "grad_norm": 6.255883472661476, "learning_rate": 8.079064767689489e-06, "loss": 17.4223, "step": 16969 }, { "epoch": 0.31019796369751584, "grad_norm": 5.44702046069408, "learning_rate": 8.078831537057809e-06, "loss": 17.2657, "step": 16970 }, { "epoch": 0.3102162428939624, "grad_norm": 6.8017262707599375, "learning_rate": 8.07859829563509e-06, "loss": 17.8041, "step": 16971 }, { "epoch": 0.3102345220904089, "grad_norm": 7.582731197635068, "learning_rate": 8.078365043422153e-06, "loss": 17.7339, "step": 16972 }, { "epoch": 0.31025280128685545, "grad_norm": 6.277249513335438, "learning_rate": 8.078131780419811e-06, "loss": 17.3556, "step": 16973 }, { "epoch": 0.31027108048330193, "grad_norm": 6.069702885550678, "learning_rate": 8.077898506628887e-06, "loss": 17.4744, "step": 16974 }, { "epoch": 0.31028935967974847, "grad_norm": 6.704836121399122, "learning_rate": 8.077665222050195e-06, "loss": 17.6548, "step": 16975 }, { "epoch": 0.310307638876195, "grad_norm": 6.304636426754897, "learning_rate": 8.077431926684552e-06, "loss": 17.4647, "step": 16976 }, { "epoch": 0.31032591807264154, "grad_norm": 6.967132063925525, "learning_rate": 8.077198620532779e-06, "loss": 17.773, "step": 16977 }, { "epoch": 0.3103441972690881, "grad_norm": 7.629948796757115, "learning_rate": 8.076965303595692e-06, "loss": 17.9232, "step": 16978 }, { "epoch": 0.31036247646553455, "grad_norm": 7.415482202614117, "learning_rate": 8.076731975874107e-06, "loss": 18.1143, "step": 16979 }, { "epoch": 0.3103807556619811, "grad_norm": 6.065671198356489, "learning_rate": 8.076498637368844e-06, "loss": 17.5238, "step": 16980 }, { "epoch": 0.3103990348584276, "grad_norm": 6.143485099025842, "learning_rate": 8.07626528808072e-06, "loss": 17.4993, "step": 16981 }, { "epoch": 0.31041731405487416, "grad_norm": 6.107688265313014, "learning_rate": 8.076031928010554e-06, "loss": 17.4624, "step": 16982 }, { "epoch": 0.3104355932513207, "grad_norm": 9.391764640027931, "learning_rate": 8.075798557159163e-06, "loss": 18.4911, "step": 16983 }, { "epoch": 0.3104538724477672, "grad_norm": 6.056483632792479, "learning_rate": 8.075565175527365e-06, "loss": 17.4349, "step": 16984 }, { "epoch": 0.3104721516442137, "grad_norm": 5.903959445175416, "learning_rate": 8.075331783115977e-06, "loss": 17.3011, "step": 16985 }, { "epoch": 0.31049043084066025, "grad_norm": 5.315413721020948, "learning_rate": 8.075098379925818e-06, "loss": 17.0139, "step": 16986 }, { "epoch": 0.3105087100371068, "grad_norm": 7.277900017474505, "learning_rate": 8.074864965957706e-06, "loss": 17.9004, "step": 16987 }, { "epoch": 0.3105269892335533, "grad_norm": 6.132393360020543, "learning_rate": 8.07463154121246e-06, "loss": 17.6193, "step": 16988 }, { "epoch": 0.3105452684299998, "grad_norm": 5.281859849348049, "learning_rate": 8.074398105690897e-06, "loss": 17.1315, "step": 16989 }, { "epoch": 0.31056354762644633, "grad_norm": 7.063814869684254, "learning_rate": 8.074164659393834e-06, "loss": 17.4937, "step": 16990 }, { "epoch": 0.31058182682289287, "grad_norm": 6.327726563585208, "learning_rate": 8.073931202322092e-06, "loss": 17.6545, "step": 16991 }, { "epoch": 0.3106001060193394, "grad_norm": 6.2286458775949285, "learning_rate": 8.073697734476489e-06, "loss": 17.3014, "step": 16992 }, { "epoch": 0.31061838521578594, "grad_norm": 6.557859461936198, "learning_rate": 8.07346425585784e-06, "loss": 17.4907, "step": 16993 }, { "epoch": 0.3106366644122324, "grad_norm": 6.85632582671068, "learning_rate": 8.073230766466966e-06, "loss": 17.8222, "step": 16994 }, { "epoch": 0.31065494360867896, "grad_norm": 7.178275517134839, "learning_rate": 8.072997266304686e-06, "loss": 17.7144, "step": 16995 }, { "epoch": 0.3106732228051255, "grad_norm": 5.980530187088951, "learning_rate": 8.072763755371816e-06, "loss": 17.7154, "step": 16996 }, { "epoch": 0.31069150200157203, "grad_norm": 6.507905260479088, "learning_rate": 8.072530233669176e-06, "loss": 17.8949, "step": 16997 }, { "epoch": 0.3107097811980185, "grad_norm": 7.817112705747621, "learning_rate": 8.072296701197584e-06, "loss": 18.3349, "step": 16998 }, { "epoch": 0.31072806039446504, "grad_norm": 6.768726074333093, "learning_rate": 8.07206315795786e-06, "loss": 18.1172, "step": 16999 }, { "epoch": 0.3107463395909116, "grad_norm": 7.226745209650806, "learning_rate": 8.071829603950821e-06, "loss": 18.185, "step": 17000 }, { "epoch": 0.3107646187873581, "grad_norm": 5.905469271144069, "learning_rate": 8.071596039177284e-06, "loss": 17.3604, "step": 17001 }, { "epoch": 0.31078289798380465, "grad_norm": 6.731758686543261, "learning_rate": 8.071362463638071e-06, "loss": 17.4995, "step": 17002 }, { "epoch": 0.31080117718025113, "grad_norm": 7.016546984059646, "learning_rate": 8.071128877333999e-06, "loss": 17.7842, "step": 17003 }, { "epoch": 0.31081945637669767, "grad_norm": 6.972725771772177, "learning_rate": 8.070895280265884e-06, "loss": 18.0233, "step": 17004 }, { "epoch": 0.3108377355731442, "grad_norm": 6.619120149588867, "learning_rate": 8.07066167243455e-06, "loss": 17.742, "step": 17005 }, { "epoch": 0.31085601476959074, "grad_norm": 7.179735912054314, "learning_rate": 8.070428053840816e-06, "loss": 17.9021, "step": 17006 }, { "epoch": 0.3108742939660373, "grad_norm": 6.547854175047619, "learning_rate": 8.070194424485494e-06, "loss": 17.6715, "step": 17007 }, { "epoch": 0.31089257316248375, "grad_norm": 6.141519537589516, "learning_rate": 8.069960784369407e-06, "loss": 17.4044, "step": 17008 }, { "epoch": 0.3109108523589303, "grad_norm": 6.107340651125197, "learning_rate": 8.069727133493376e-06, "loss": 17.4647, "step": 17009 }, { "epoch": 0.3109291315553768, "grad_norm": 6.769323403249451, "learning_rate": 8.069493471858216e-06, "loss": 17.6992, "step": 17010 }, { "epoch": 0.31094741075182336, "grad_norm": 5.643737975840195, "learning_rate": 8.06925979946475e-06, "loss": 17.2691, "step": 17011 }, { "epoch": 0.3109656899482699, "grad_norm": 6.782297245436195, "learning_rate": 8.069026116313791e-06, "loss": 17.8015, "step": 17012 }, { "epoch": 0.3109839691447164, "grad_norm": 6.4714854196737175, "learning_rate": 8.068792422406167e-06, "loss": 17.439, "step": 17013 }, { "epoch": 0.3110022483411629, "grad_norm": 6.747759203050445, "learning_rate": 8.068558717742688e-06, "loss": 17.7828, "step": 17014 }, { "epoch": 0.31102052753760945, "grad_norm": 7.009226825907778, "learning_rate": 8.068325002324177e-06, "loss": 17.5213, "step": 17015 }, { "epoch": 0.311038806734056, "grad_norm": 5.484043950856844, "learning_rate": 8.068091276151454e-06, "loss": 17.116, "step": 17016 }, { "epoch": 0.3110570859305025, "grad_norm": 6.943562688903507, "learning_rate": 8.067857539225338e-06, "loss": 17.909, "step": 17017 }, { "epoch": 0.311075365126949, "grad_norm": 7.288566353969405, "learning_rate": 8.067623791546646e-06, "loss": 17.8718, "step": 17018 }, { "epoch": 0.31109364432339554, "grad_norm": 8.31020159525137, "learning_rate": 8.0673900331162e-06, "loss": 18.0491, "step": 17019 }, { "epoch": 0.31111192351984207, "grad_norm": 6.679492595636824, "learning_rate": 8.067156263934818e-06, "loss": 17.6752, "step": 17020 }, { "epoch": 0.3111302027162886, "grad_norm": 6.486200578548156, "learning_rate": 8.066922484003319e-06, "loss": 17.8335, "step": 17021 }, { "epoch": 0.31114848191273514, "grad_norm": 6.565739699048459, "learning_rate": 8.066688693322523e-06, "loss": 17.5224, "step": 17022 }, { "epoch": 0.3111667611091816, "grad_norm": 6.113042705635595, "learning_rate": 8.06645489189325e-06, "loss": 17.7832, "step": 17023 }, { "epoch": 0.31118504030562816, "grad_norm": 6.835543180942098, "learning_rate": 8.066221079716317e-06, "loss": 17.4553, "step": 17024 }, { "epoch": 0.3112033195020747, "grad_norm": 5.97030877167702, "learning_rate": 8.065987256792547e-06, "loss": 17.1874, "step": 17025 }, { "epoch": 0.31122159869852123, "grad_norm": 6.18769337467142, "learning_rate": 8.065753423122755e-06, "loss": 17.3374, "step": 17026 }, { "epoch": 0.31123987789496776, "grad_norm": 6.025111695620645, "learning_rate": 8.065519578707766e-06, "loss": 17.1555, "step": 17027 }, { "epoch": 0.31125815709141424, "grad_norm": 6.695901983329723, "learning_rate": 8.065285723548398e-06, "loss": 17.8727, "step": 17028 }, { "epoch": 0.3112764362878608, "grad_norm": 8.023883962901643, "learning_rate": 8.065051857645466e-06, "loss": 18.1035, "step": 17029 }, { "epoch": 0.3112947154843073, "grad_norm": 6.898508469798952, "learning_rate": 8.064817980999794e-06, "loss": 17.9484, "step": 17030 }, { "epoch": 0.31131299468075385, "grad_norm": 6.922422156492689, "learning_rate": 8.064584093612203e-06, "loss": 17.7561, "step": 17031 }, { "epoch": 0.31133127387720033, "grad_norm": 6.895024630554713, "learning_rate": 8.064350195483509e-06, "loss": 17.9243, "step": 17032 }, { "epoch": 0.31134955307364687, "grad_norm": 9.128823275667488, "learning_rate": 8.064116286614535e-06, "loss": 18.3941, "step": 17033 }, { "epoch": 0.3113678322700934, "grad_norm": 6.110840694777033, "learning_rate": 8.063882367006098e-06, "loss": 17.4976, "step": 17034 }, { "epoch": 0.31138611146653994, "grad_norm": 6.416235495914686, "learning_rate": 8.06364843665902e-06, "loss": 17.8703, "step": 17035 }, { "epoch": 0.3114043906629865, "grad_norm": 6.764230592310325, "learning_rate": 8.063414495574118e-06, "loss": 17.8869, "step": 17036 }, { "epoch": 0.31142266985943295, "grad_norm": 6.669468797976102, "learning_rate": 8.063180543752216e-06, "loss": 17.8181, "step": 17037 }, { "epoch": 0.3114409490558795, "grad_norm": 6.2584630470038904, "learning_rate": 8.062946581194131e-06, "loss": 17.4431, "step": 17038 }, { "epoch": 0.311459228252326, "grad_norm": 9.804785148077373, "learning_rate": 8.062712607900685e-06, "loss": 18.5277, "step": 17039 }, { "epoch": 0.31147750744877256, "grad_norm": 7.235891141499928, "learning_rate": 8.062478623872698e-06, "loss": 17.916, "step": 17040 }, { "epoch": 0.3114957866452191, "grad_norm": 7.355908158041542, "learning_rate": 8.062244629110986e-06, "loss": 17.6022, "step": 17041 }, { "epoch": 0.3115140658416656, "grad_norm": 7.182154954017314, "learning_rate": 8.062010623616375e-06, "loss": 17.7456, "step": 17042 }, { "epoch": 0.3115323450381121, "grad_norm": 10.286129801372665, "learning_rate": 8.06177660738968e-06, "loss": 17.9494, "step": 17043 }, { "epoch": 0.31155062423455865, "grad_norm": 6.156373082601276, "learning_rate": 8.061542580431726e-06, "loss": 17.349, "step": 17044 }, { "epoch": 0.3115689034310052, "grad_norm": 6.827769349808465, "learning_rate": 8.06130854274333e-06, "loss": 17.6985, "step": 17045 }, { "epoch": 0.3115871826274517, "grad_norm": 7.673966913137476, "learning_rate": 8.061074494325315e-06, "loss": 17.7679, "step": 17046 }, { "epoch": 0.3116054618238982, "grad_norm": 9.078005760136998, "learning_rate": 8.060840435178498e-06, "loss": 18.4449, "step": 17047 }, { "epoch": 0.31162374102034474, "grad_norm": 5.808936845800925, "learning_rate": 8.0606063653037e-06, "loss": 17.1379, "step": 17048 }, { "epoch": 0.31164202021679127, "grad_norm": 10.06711807599597, "learning_rate": 8.060372284701743e-06, "loss": 18.286, "step": 17049 }, { "epoch": 0.3116602994132378, "grad_norm": 5.761124065838537, "learning_rate": 8.060138193373446e-06, "loss": 17.0214, "step": 17050 }, { "epoch": 0.31167857860968434, "grad_norm": 6.014642946448462, "learning_rate": 8.059904091319633e-06, "loss": 17.4176, "step": 17051 }, { "epoch": 0.3116968578061308, "grad_norm": 5.110580803887255, "learning_rate": 8.059669978541118e-06, "loss": 16.9478, "step": 17052 }, { "epoch": 0.31171513700257736, "grad_norm": 6.358176665756766, "learning_rate": 8.059435855038727e-06, "loss": 17.3045, "step": 17053 }, { "epoch": 0.3117334161990239, "grad_norm": 6.821562150145217, "learning_rate": 8.05920172081328e-06, "loss": 17.5357, "step": 17054 }, { "epoch": 0.31175169539547043, "grad_norm": 7.502017033052161, "learning_rate": 8.058967575865593e-06, "loss": 17.9821, "step": 17055 }, { "epoch": 0.31176997459191697, "grad_norm": 6.404123994690587, "learning_rate": 8.058733420196492e-06, "loss": 17.3785, "step": 17056 }, { "epoch": 0.31178825378836345, "grad_norm": 5.824909042286776, "learning_rate": 8.058499253806797e-06, "loss": 17.0588, "step": 17057 }, { "epoch": 0.31180653298481, "grad_norm": 8.893768353879862, "learning_rate": 8.058265076697327e-06, "loss": 18.3984, "step": 17058 }, { "epoch": 0.3118248121812565, "grad_norm": 8.36422672697942, "learning_rate": 8.058030888868902e-06, "loss": 18.1194, "step": 17059 }, { "epoch": 0.31184309137770305, "grad_norm": 7.707476457887245, "learning_rate": 8.057796690322345e-06, "loss": 18.1633, "step": 17060 }, { "epoch": 0.3118613705741496, "grad_norm": 5.794932126433034, "learning_rate": 8.057562481058476e-06, "loss": 17.2252, "step": 17061 }, { "epoch": 0.31187964977059607, "grad_norm": 6.579128480537068, "learning_rate": 8.057328261078116e-06, "loss": 17.6532, "step": 17062 }, { "epoch": 0.3118979289670426, "grad_norm": 5.953515237473084, "learning_rate": 8.057094030382084e-06, "loss": 17.3097, "step": 17063 }, { "epoch": 0.31191620816348914, "grad_norm": 7.242998926533677, "learning_rate": 8.056859788971206e-06, "loss": 17.637, "step": 17064 }, { "epoch": 0.3119344873599357, "grad_norm": 6.669121683718374, "learning_rate": 8.056625536846297e-06, "loss": 17.7112, "step": 17065 }, { "epoch": 0.31195276655638216, "grad_norm": 6.234045316484745, "learning_rate": 8.056391274008182e-06, "loss": 17.5276, "step": 17066 }, { "epoch": 0.3119710457528287, "grad_norm": 5.99918087491853, "learning_rate": 8.05615700045768e-06, "loss": 17.3289, "step": 17067 }, { "epoch": 0.3119893249492752, "grad_norm": 7.395409281321343, "learning_rate": 8.055922716195614e-06, "loss": 17.9555, "step": 17068 }, { "epoch": 0.31200760414572176, "grad_norm": 6.560024466253384, "learning_rate": 8.055688421222802e-06, "loss": 17.689, "step": 17069 }, { "epoch": 0.3120258833421683, "grad_norm": 6.8100888844054595, "learning_rate": 8.05545411554007e-06, "loss": 17.4748, "step": 17070 }, { "epoch": 0.3120441625386148, "grad_norm": 6.8086225586593345, "learning_rate": 8.055219799148236e-06, "loss": 17.5875, "step": 17071 }, { "epoch": 0.3120624417350613, "grad_norm": 7.076630506537384, "learning_rate": 8.05498547204812e-06, "loss": 17.9189, "step": 17072 }, { "epoch": 0.31208072093150785, "grad_norm": 7.134796225580065, "learning_rate": 8.054751134240545e-06, "loss": 17.9532, "step": 17073 }, { "epoch": 0.3120990001279544, "grad_norm": 6.903890762980463, "learning_rate": 8.054516785726333e-06, "loss": 17.9626, "step": 17074 }, { "epoch": 0.3121172793244009, "grad_norm": 6.892699558993286, "learning_rate": 8.054282426506306e-06, "loss": 17.8961, "step": 17075 }, { "epoch": 0.3121355585208474, "grad_norm": 6.06007985064659, "learning_rate": 8.054048056581283e-06, "loss": 17.4805, "step": 17076 }, { "epoch": 0.31215383771729394, "grad_norm": 6.5185241906590194, "learning_rate": 8.053813675952085e-06, "loss": 17.4921, "step": 17077 }, { "epoch": 0.31217211691374047, "grad_norm": 7.249273040241578, "learning_rate": 8.053579284619538e-06, "loss": 17.394, "step": 17078 }, { "epoch": 0.312190396110187, "grad_norm": 6.445044732571578, "learning_rate": 8.05334488258446e-06, "loss": 17.4595, "step": 17079 }, { "epoch": 0.31220867530663354, "grad_norm": 6.187669512889358, "learning_rate": 8.053110469847671e-06, "loss": 17.4792, "step": 17080 }, { "epoch": 0.31222695450308, "grad_norm": 6.122343555267942, "learning_rate": 8.052876046409997e-06, "loss": 17.619, "step": 17081 }, { "epoch": 0.31224523369952656, "grad_norm": 6.058224000590358, "learning_rate": 8.052641612272255e-06, "loss": 17.3695, "step": 17082 }, { "epoch": 0.3122635128959731, "grad_norm": 6.11590720237535, "learning_rate": 8.052407167435271e-06, "loss": 17.5494, "step": 17083 }, { "epoch": 0.31228179209241963, "grad_norm": 5.560146633478227, "learning_rate": 8.052172711899864e-06, "loss": 17.3819, "step": 17084 }, { "epoch": 0.31230007128886617, "grad_norm": 5.705357822499922, "learning_rate": 8.051938245666857e-06, "loss": 17.4352, "step": 17085 }, { "epoch": 0.31231835048531265, "grad_norm": 5.918455183105127, "learning_rate": 8.051703768737072e-06, "loss": 17.2833, "step": 17086 }, { "epoch": 0.3123366296817592, "grad_norm": 7.623831503881604, "learning_rate": 8.051469281111329e-06, "loss": 18.0024, "step": 17087 }, { "epoch": 0.3123549088782057, "grad_norm": 7.182707774727692, "learning_rate": 8.05123478279045e-06, "loss": 17.6483, "step": 17088 }, { "epoch": 0.31237318807465225, "grad_norm": 8.123794902176977, "learning_rate": 8.05100027377526e-06, "loss": 18.1358, "step": 17089 }, { "epoch": 0.3123914672710988, "grad_norm": 7.305505632125513, "learning_rate": 8.050765754066577e-06, "loss": 17.5472, "step": 17090 }, { "epoch": 0.31240974646754527, "grad_norm": 10.310046563194174, "learning_rate": 8.050531223665226e-06, "loss": 18.3201, "step": 17091 }, { "epoch": 0.3124280256639918, "grad_norm": 7.293663182369382, "learning_rate": 8.050296682572028e-06, "loss": 17.6776, "step": 17092 }, { "epoch": 0.31244630486043834, "grad_norm": 7.711378309642685, "learning_rate": 8.050062130787803e-06, "loss": 18.0298, "step": 17093 }, { "epoch": 0.3124645840568849, "grad_norm": 8.998707061510684, "learning_rate": 8.049827568313377e-06, "loss": 18.6374, "step": 17094 }, { "epoch": 0.3124828632533314, "grad_norm": 6.572040902501415, "learning_rate": 8.049592995149568e-06, "loss": 17.5308, "step": 17095 }, { "epoch": 0.3125011424497779, "grad_norm": 7.654196776411626, "learning_rate": 8.049358411297203e-06, "loss": 18.1515, "step": 17096 }, { "epoch": 0.3125194216462244, "grad_norm": 5.888627620214243, "learning_rate": 8.049123816757098e-06, "loss": 17.5023, "step": 17097 }, { "epoch": 0.31253770084267096, "grad_norm": 5.840214153242996, "learning_rate": 8.04888921153008e-06, "loss": 17.5331, "step": 17098 }, { "epoch": 0.3125559800391175, "grad_norm": 6.845758804187963, "learning_rate": 8.048654595616972e-06, "loss": 17.4805, "step": 17099 }, { "epoch": 0.312574259235564, "grad_norm": 6.997364797480822, "learning_rate": 8.04841996901859e-06, "loss": 18.0936, "step": 17100 }, { "epoch": 0.3125925384320105, "grad_norm": 7.516464323446667, "learning_rate": 8.048185331735764e-06, "loss": 17.8479, "step": 17101 }, { "epoch": 0.31261081762845705, "grad_norm": 7.688519225306918, "learning_rate": 8.047950683769312e-06, "loss": 17.6508, "step": 17102 }, { "epoch": 0.3126290968249036, "grad_norm": 6.727504229055415, "learning_rate": 8.047716025120058e-06, "loss": 17.6344, "step": 17103 }, { "epoch": 0.3126473760213501, "grad_norm": 6.331388539219552, "learning_rate": 8.047481355788822e-06, "loss": 17.6653, "step": 17104 }, { "epoch": 0.3126656552177966, "grad_norm": 6.8055450743979415, "learning_rate": 8.047246675776428e-06, "loss": 17.6641, "step": 17105 }, { "epoch": 0.31268393441424314, "grad_norm": 7.724344148380758, "learning_rate": 8.047011985083701e-06, "loss": 18.4167, "step": 17106 }, { "epoch": 0.31270221361068967, "grad_norm": 6.457607376909295, "learning_rate": 8.04677728371146e-06, "loss": 17.8015, "step": 17107 }, { "epoch": 0.3127204928071362, "grad_norm": 10.292345069602899, "learning_rate": 8.04654257166053e-06, "loss": 18.1682, "step": 17108 }, { "epoch": 0.31273877200358274, "grad_norm": 7.2397095021676385, "learning_rate": 8.046307848931733e-06, "loss": 17.7562, "step": 17109 }, { "epoch": 0.3127570512000292, "grad_norm": 7.833608664386632, "learning_rate": 8.04607311552589e-06, "loss": 17.8266, "step": 17110 }, { "epoch": 0.31277533039647576, "grad_norm": 9.656642046194163, "learning_rate": 8.045838371443826e-06, "loss": 17.5523, "step": 17111 }, { "epoch": 0.3127936095929223, "grad_norm": 7.2585431274803085, "learning_rate": 8.045603616686362e-06, "loss": 17.7699, "step": 17112 }, { "epoch": 0.31281188878936883, "grad_norm": 6.917059958259703, "learning_rate": 8.045368851254322e-06, "loss": 17.547, "step": 17113 }, { "epoch": 0.31283016798581537, "grad_norm": 7.219994130213293, "learning_rate": 8.04513407514853e-06, "loss": 17.5624, "step": 17114 }, { "epoch": 0.31284844718226185, "grad_norm": 9.500891634754796, "learning_rate": 8.044899288369804e-06, "loss": 18.6027, "step": 17115 }, { "epoch": 0.3128667263787084, "grad_norm": 7.287378731024665, "learning_rate": 8.044664490918972e-06, "loss": 17.9434, "step": 17116 }, { "epoch": 0.3128850055751549, "grad_norm": 7.479277181987813, "learning_rate": 8.044429682796855e-06, "loss": 18.0854, "step": 17117 }, { "epoch": 0.31290328477160145, "grad_norm": 5.413721045579754, "learning_rate": 8.044194864004276e-06, "loss": 17.1874, "step": 17118 }, { "epoch": 0.312921563968048, "grad_norm": 8.020478219000237, "learning_rate": 8.043960034542058e-06, "loss": 18.4478, "step": 17119 }, { "epoch": 0.31293984316449447, "grad_norm": 6.171454756272949, "learning_rate": 8.043725194411025e-06, "loss": 17.6453, "step": 17120 }, { "epoch": 0.312958122360941, "grad_norm": 7.381828581592629, "learning_rate": 8.043490343612e-06, "loss": 17.5644, "step": 17121 }, { "epoch": 0.31297640155738754, "grad_norm": 5.4404574332533775, "learning_rate": 8.043255482145804e-06, "loss": 17.1108, "step": 17122 }, { "epoch": 0.3129946807538341, "grad_norm": 8.279403270053606, "learning_rate": 8.043020610013261e-06, "loss": 18.4835, "step": 17123 }, { "epoch": 0.3130129599502806, "grad_norm": 6.7496167025061675, "learning_rate": 8.042785727215196e-06, "loss": 17.4873, "step": 17124 }, { "epoch": 0.3130312391467271, "grad_norm": 6.016379324428997, "learning_rate": 8.042550833752431e-06, "loss": 17.5257, "step": 17125 }, { "epoch": 0.3130495183431736, "grad_norm": 5.828238094728282, "learning_rate": 8.042315929625789e-06, "loss": 17.3178, "step": 17126 }, { "epoch": 0.31306779753962016, "grad_norm": 6.058292426814321, "learning_rate": 8.042081014836094e-06, "loss": 17.5104, "step": 17127 }, { "epoch": 0.3130860767360667, "grad_norm": 5.795678889764795, "learning_rate": 8.041846089384169e-06, "loss": 17.2683, "step": 17128 }, { "epoch": 0.31310435593251323, "grad_norm": 6.838414188100603, "learning_rate": 8.041611153270837e-06, "loss": 17.5137, "step": 17129 }, { "epoch": 0.3131226351289597, "grad_norm": 7.508957516597827, "learning_rate": 8.041376206496922e-06, "loss": 18.0325, "step": 17130 }, { "epoch": 0.31314091432540625, "grad_norm": 5.947618787572319, "learning_rate": 8.041141249063249e-06, "loss": 17.4239, "step": 17131 }, { "epoch": 0.3131591935218528, "grad_norm": 6.988214297935935, "learning_rate": 8.040906280970637e-06, "loss": 17.8497, "step": 17132 }, { "epoch": 0.3131774727182993, "grad_norm": 7.0904501986832065, "learning_rate": 8.040671302219915e-06, "loss": 17.9897, "step": 17133 }, { "epoch": 0.3131957519147458, "grad_norm": 7.551212912243727, "learning_rate": 8.040436312811902e-06, "loss": 18.2153, "step": 17134 }, { "epoch": 0.31321403111119234, "grad_norm": 7.66120681501316, "learning_rate": 8.040201312747425e-06, "loss": 17.5864, "step": 17135 }, { "epoch": 0.3132323103076389, "grad_norm": 6.689147304740825, "learning_rate": 8.039966302027305e-06, "loss": 17.8764, "step": 17136 }, { "epoch": 0.3132505895040854, "grad_norm": 6.95603386516969, "learning_rate": 8.039731280652368e-06, "loss": 17.8554, "step": 17137 }, { "epoch": 0.31326886870053194, "grad_norm": 7.202105499569859, "learning_rate": 8.039496248623436e-06, "loss": 18.0454, "step": 17138 }, { "epoch": 0.3132871478969784, "grad_norm": 7.411083961802374, "learning_rate": 8.03926120594133e-06, "loss": 17.7921, "step": 17139 }, { "epoch": 0.31330542709342496, "grad_norm": 7.784527847208358, "learning_rate": 8.039026152606883e-06, "loss": 18.0465, "step": 17140 }, { "epoch": 0.3133237062898715, "grad_norm": 6.618673361962217, "learning_rate": 8.038791088620909e-06, "loss": 17.4875, "step": 17141 }, { "epoch": 0.31334198548631803, "grad_norm": 5.812691420482996, "learning_rate": 8.038556013984239e-06, "loss": 17.3837, "step": 17142 }, { "epoch": 0.31336026468276457, "grad_norm": 7.11314530167224, "learning_rate": 8.038320928697691e-06, "loss": 17.7925, "step": 17143 }, { "epoch": 0.31337854387921105, "grad_norm": 7.7265097697341, "learning_rate": 8.038085832762095e-06, "loss": 18.3554, "step": 17144 }, { "epoch": 0.3133968230756576, "grad_norm": 5.447993047607807, "learning_rate": 8.037850726178269e-06, "loss": 17.1864, "step": 17145 }, { "epoch": 0.3134151022721041, "grad_norm": 6.728960990362715, "learning_rate": 8.037615608947041e-06, "loss": 17.7867, "step": 17146 }, { "epoch": 0.31343338146855065, "grad_norm": 7.495857224980962, "learning_rate": 8.037380481069234e-06, "loss": 17.8913, "step": 17147 }, { "epoch": 0.3134516606649972, "grad_norm": 7.049243686139036, "learning_rate": 8.037145342545671e-06, "loss": 17.8628, "step": 17148 }, { "epoch": 0.31346993986144367, "grad_norm": 4.711646748515058, "learning_rate": 8.036910193377178e-06, "loss": 16.9872, "step": 17149 }, { "epoch": 0.3134882190578902, "grad_norm": 5.493751215946904, "learning_rate": 8.036675033564579e-06, "loss": 17.0835, "step": 17150 }, { "epoch": 0.31350649825433674, "grad_norm": 5.888501284564506, "learning_rate": 8.036439863108696e-06, "loss": 17.3825, "step": 17151 }, { "epoch": 0.3135247774507833, "grad_norm": 6.046177868777849, "learning_rate": 8.036204682010355e-06, "loss": 17.332, "step": 17152 }, { "epoch": 0.3135430566472298, "grad_norm": 6.575959579527793, "learning_rate": 8.03596949027038e-06, "loss": 17.8634, "step": 17153 }, { "epoch": 0.3135613358436763, "grad_norm": 6.607339601223778, "learning_rate": 8.035734287889597e-06, "loss": 17.6993, "step": 17154 }, { "epoch": 0.3135796150401228, "grad_norm": 6.441259428163981, "learning_rate": 8.035499074868827e-06, "loss": 17.6536, "step": 17155 }, { "epoch": 0.31359789423656936, "grad_norm": 6.082592525515024, "learning_rate": 8.035263851208897e-06, "loss": 17.4241, "step": 17156 }, { "epoch": 0.3136161734330159, "grad_norm": 6.054719549805286, "learning_rate": 8.03502861691063e-06, "loss": 17.4872, "step": 17157 }, { "epoch": 0.31363445262946243, "grad_norm": 6.86476340893, "learning_rate": 8.034793371974851e-06, "loss": 17.5494, "step": 17158 }, { "epoch": 0.3136527318259089, "grad_norm": 6.060835396448324, "learning_rate": 8.034558116402386e-06, "loss": 17.4649, "step": 17159 }, { "epoch": 0.31367101102235545, "grad_norm": 7.512181779187408, "learning_rate": 8.034322850194056e-06, "loss": 17.8525, "step": 17160 }, { "epoch": 0.313689290218802, "grad_norm": 7.245479470438559, "learning_rate": 8.034087573350689e-06, "loss": 17.899, "step": 17161 }, { "epoch": 0.3137075694152485, "grad_norm": 7.1684862366743936, "learning_rate": 8.03385228587311e-06, "loss": 17.7725, "step": 17162 }, { "epoch": 0.31372584861169506, "grad_norm": 8.30358114454457, "learning_rate": 8.033616987762138e-06, "loss": 17.719, "step": 17163 }, { "epoch": 0.31374412780814154, "grad_norm": 6.560198161467762, "learning_rate": 8.033381679018605e-06, "loss": 17.4197, "step": 17164 }, { "epoch": 0.3137624070045881, "grad_norm": 7.377941426853494, "learning_rate": 8.033146359643332e-06, "loss": 17.6037, "step": 17165 }, { "epoch": 0.3137806862010346, "grad_norm": 5.557929775709622, "learning_rate": 8.032911029637143e-06, "loss": 17.1505, "step": 17166 }, { "epoch": 0.31379896539748114, "grad_norm": 6.899013606209825, "learning_rate": 8.032675689000864e-06, "loss": 17.6968, "step": 17167 }, { "epoch": 0.3138172445939276, "grad_norm": 6.5191416595380485, "learning_rate": 8.032440337735322e-06, "loss": 17.5975, "step": 17168 }, { "epoch": 0.31383552379037416, "grad_norm": 7.606407530181519, "learning_rate": 8.032204975841337e-06, "loss": 17.8917, "step": 17169 }, { "epoch": 0.3138538029868207, "grad_norm": 7.05771477842777, "learning_rate": 8.031969603319737e-06, "loss": 17.4604, "step": 17170 }, { "epoch": 0.31387208218326723, "grad_norm": 5.3322142199112195, "learning_rate": 8.031734220171349e-06, "loss": 17.1174, "step": 17171 }, { "epoch": 0.31389036137971377, "grad_norm": 7.02892773669566, "learning_rate": 8.031498826396992e-06, "loss": 17.8542, "step": 17172 }, { "epoch": 0.31390864057616025, "grad_norm": 9.054141376018855, "learning_rate": 8.031263421997497e-06, "loss": 18.8066, "step": 17173 }, { "epoch": 0.3139269197726068, "grad_norm": 6.130279888692038, "learning_rate": 8.031028006973686e-06, "loss": 17.7783, "step": 17174 }, { "epoch": 0.3139451989690533, "grad_norm": 5.995388164254831, "learning_rate": 8.030792581326388e-06, "loss": 17.4762, "step": 17175 }, { "epoch": 0.31396347816549985, "grad_norm": 5.885826294439638, "learning_rate": 8.030557145056421e-06, "loss": 17.3577, "step": 17176 }, { "epoch": 0.3139817573619464, "grad_norm": 6.60999595930151, "learning_rate": 8.030321698164616e-06, "loss": 17.6923, "step": 17177 }, { "epoch": 0.31400003655839287, "grad_norm": 6.843984019058763, "learning_rate": 8.030086240651796e-06, "loss": 17.6676, "step": 17178 }, { "epoch": 0.3140183157548394, "grad_norm": 5.883915114984477, "learning_rate": 8.029850772518787e-06, "loss": 17.2554, "step": 17179 }, { "epoch": 0.31403659495128594, "grad_norm": 6.892014099833698, "learning_rate": 8.029615293766413e-06, "loss": 17.8833, "step": 17180 }, { "epoch": 0.3140548741477325, "grad_norm": 6.339557791548552, "learning_rate": 8.029379804395501e-06, "loss": 17.4732, "step": 17181 }, { "epoch": 0.314073153344179, "grad_norm": 7.260205023241176, "learning_rate": 8.029144304406875e-06, "loss": 17.8128, "step": 17182 }, { "epoch": 0.3140914325406255, "grad_norm": 5.817146925384382, "learning_rate": 8.028908793801362e-06, "loss": 17.3207, "step": 17183 }, { "epoch": 0.31410971173707203, "grad_norm": 6.626224046216491, "learning_rate": 8.028673272579786e-06, "loss": 17.6731, "step": 17184 }, { "epoch": 0.31412799093351856, "grad_norm": 8.369654425397433, "learning_rate": 8.028437740742974e-06, "loss": 18.2265, "step": 17185 }, { "epoch": 0.3141462701299651, "grad_norm": 9.432428565195123, "learning_rate": 8.028202198291749e-06, "loss": 18.692, "step": 17186 }, { "epoch": 0.31416454932641164, "grad_norm": 4.921619769724041, "learning_rate": 8.02796664522694e-06, "loss": 16.8349, "step": 17187 }, { "epoch": 0.3141828285228581, "grad_norm": 8.626537819541797, "learning_rate": 8.027731081549368e-06, "loss": 18.0901, "step": 17188 }, { "epoch": 0.31420110771930465, "grad_norm": 7.517223221411115, "learning_rate": 8.027495507259863e-06, "loss": 17.8854, "step": 17189 }, { "epoch": 0.3142193869157512, "grad_norm": 6.503608465111523, "learning_rate": 8.027259922359248e-06, "loss": 17.7484, "step": 17190 }, { "epoch": 0.3142376661121977, "grad_norm": 6.030001566174185, "learning_rate": 8.02702432684835e-06, "loss": 17.3925, "step": 17191 }, { "epoch": 0.31425594530864426, "grad_norm": 6.216164772313948, "learning_rate": 8.026788720727997e-06, "loss": 17.5722, "step": 17192 }, { "epoch": 0.31427422450509074, "grad_norm": 5.947817891194216, "learning_rate": 8.026553103999009e-06, "loss": 17.5399, "step": 17193 }, { "epoch": 0.3142925037015373, "grad_norm": 6.695638931461703, "learning_rate": 8.026317476662215e-06, "loss": 17.8152, "step": 17194 }, { "epoch": 0.3143107828979838, "grad_norm": 7.016799302824753, "learning_rate": 8.026081838718442e-06, "loss": 17.458, "step": 17195 }, { "epoch": 0.31432906209443034, "grad_norm": 5.769310967622843, "learning_rate": 8.025846190168515e-06, "loss": 17.3009, "step": 17196 }, { "epoch": 0.3143473412908769, "grad_norm": 5.257616943946488, "learning_rate": 8.02561053101326e-06, "loss": 16.9891, "step": 17197 }, { "epoch": 0.31436562048732336, "grad_norm": 6.439660213076309, "learning_rate": 8.0253748612535e-06, "loss": 17.6261, "step": 17198 }, { "epoch": 0.3143838996837699, "grad_norm": 7.616382543381116, "learning_rate": 8.025139180890066e-06, "loss": 17.825, "step": 17199 }, { "epoch": 0.31440217888021643, "grad_norm": 6.859843867417869, "learning_rate": 8.02490348992378e-06, "loss": 17.8512, "step": 17200 }, { "epoch": 0.31442045807666297, "grad_norm": 6.012777268791715, "learning_rate": 8.024667788355473e-06, "loss": 17.4365, "step": 17201 }, { "epoch": 0.31443873727310945, "grad_norm": 7.34281656927429, "learning_rate": 8.024432076185967e-06, "loss": 18.0848, "step": 17202 }, { "epoch": 0.314457016469556, "grad_norm": 7.1363276984549735, "learning_rate": 8.024196353416085e-06, "loss": 17.8839, "step": 17203 }, { "epoch": 0.3144752956660025, "grad_norm": 6.619445406609291, "learning_rate": 8.023960620046661e-06, "loss": 17.512, "step": 17204 }, { "epoch": 0.31449357486244905, "grad_norm": 5.901356843278337, "learning_rate": 8.023724876078517e-06, "loss": 17.5128, "step": 17205 }, { "epoch": 0.3145118540588956, "grad_norm": 5.377358834635803, "learning_rate": 8.023489121512479e-06, "loss": 17.0316, "step": 17206 }, { "epoch": 0.31453013325534207, "grad_norm": 9.555391015931994, "learning_rate": 8.023253356349375e-06, "loss": 18.6005, "step": 17207 }, { "epoch": 0.3145484124517886, "grad_norm": 6.7078403560945405, "learning_rate": 8.023017580590029e-06, "loss": 17.5261, "step": 17208 }, { "epoch": 0.31456669164823514, "grad_norm": 6.47132507059488, "learning_rate": 8.022781794235268e-06, "loss": 17.8296, "step": 17209 }, { "epoch": 0.3145849708446817, "grad_norm": 8.109703898154141, "learning_rate": 8.02254599728592e-06, "loss": 18.1307, "step": 17210 }, { "epoch": 0.3146032500411282, "grad_norm": 6.5760029145144525, "learning_rate": 8.022310189742812e-06, "loss": 17.5782, "step": 17211 }, { "epoch": 0.3146215292375747, "grad_norm": 8.482911353667058, "learning_rate": 8.022074371606767e-06, "loss": 18.6695, "step": 17212 }, { "epoch": 0.31463980843402123, "grad_norm": 6.977732439286522, "learning_rate": 8.021838542878616e-06, "loss": 17.9351, "step": 17213 }, { "epoch": 0.31465808763046776, "grad_norm": 5.743098806772147, "learning_rate": 8.02160270355918e-06, "loss": 17.3204, "step": 17214 }, { "epoch": 0.3146763668269143, "grad_norm": 7.267947435405922, "learning_rate": 8.02136685364929e-06, "loss": 17.5826, "step": 17215 }, { "epoch": 0.31469464602336084, "grad_norm": 7.912004941148593, "learning_rate": 8.021130993149773e-06, "loss": 17.9818, "step": 17216 }, { "epoch": 0.3147129252198073, "grad_norm": 5.280419012227969, "learning_rate": 8.020895122061454e-06, "loss": 16.951, "step": 17217 }, { "epoch": 0.31473120441625385, "grad_norm": 6.873611055670704, "learning_rate": 8.020659240385157e-06, "loss": 17.6168, "step": 17218 }, { "epoch": 0.3147494836127004, "grad_norm": 8.206195024040627, "learning_rate": 8.020423348121713e-06, "loss": 18.5465, "step": 17219 }, { "epoch": 0.3147677628091469, "grad_norm": 6.962185452648408, "learning_rate": 8.020187445271947e-06, "loss": 17.6953, "step": 17220 }, { "epoch": 0.31478604200559346, "grad_norm": 7.088755955462132, "learning_rate": 8.019951531836686e-06, "loss": 17.9777, "step": 17221 }, { "epoch": 0.31480432120203994, "grad_norm": 6.190448969393758, "learning_rate": 8.019715607816758e-06, "loss": 17.3383, "step": 17222 }, { "epoch": 0.3148226003984865, "grad_norm": 6.980175547078554, "learning_rate": 8.019479673212988e-06, "loss": 17.8941, "step": 17223 }, { "epoch": 0.314840879594933, "grad_norm": 6.766442043943911, "learning_rate": 8.019243728026204e-06, "loss": 17.7019, "step": 17224 }, { "epoch": 0.31485915879137955, "grad_norm": 8.86342108336854, "learning_rate": 8.019007772257233e-06, "loss": 18.7092, "step": 17225 }, { "epoch": 0.3148774379878261, "grad_norm": 6.873461870005551, "learning_rate": 8.0187718059069e-06, "loss": 17.7726, "step": 17226 }, { "epoch": 0.31489571718427256, "grad_norm": 7.5290291440852535, "learning_rate": 8.018535828976035e-06, "loss": 18.0024, "step": 17227 }, { "epoch": 0.3149139963807191, "grad_norm": 7.28258231935665, "learning_rate": 8.018299841465464e-06, "loss": 17.7803, "step": 17228 }, { "epoch": 0.31493227557716563, "grad_norm": 5.672384628255509, "learning_rate": 8.018063843376014e-06, "loss": 17.1077, "step": 17229 }, { "epoch": 0.31495055477361217, "grad_norm": 6.763207668043426, "learning_rate": 8.017827834708513e-06, "loss": 17.8968, "step": 17230 }, { "epoch": 0.3149688339700587, "grad_norm": 6.352084908004501, "learning_rate": 8.017591815463785e-06, "loss": 17.9149, "step": 17231 }, { "epoch": 0.3149871131665052, "grad_norm": 5.401660204723154, "learning_rate": 8.017355785642661e-06, "loss": 17.0605, "step": 17232 }, { "epoch": 0.3150053923629517, "grad_norm": 6.3157623324548124, "learning_rate": 8.017119745245969e-06, "loss": 17.4241, "step": 17233 }, { "epoch": 0.31502367155939826, "grad_norm": 7.187095964596448, "learning_rate": 8.01688369427453e-06, "loss": 17.8613, "step": 17234 }, { "epoch": 0.3150419507558448, "grad_norm": 6.45123413937246, "learning_rate": 8.016647632729177e-06, "loss": 17.6179, "step": 17235 }, { "epoch": 0.31506022995229127, "grad_norm": 6.373516121494759, "learning_rate": 8.016411560610737e-06, "loss": 17.4736, "step": 17236 }, { "epoch": 0.3150785091487378, "grad_norm": 7.561806774375053, "learning_rate": 8.016175477920036e-06, "loss": 17.8736, "step": 17237 }, { "epoch": 0.31509678834518434, "grad_norm": 8.012843117739664, "learning_rate": 8.015939384657901e-06, "loss": 18.4852, "step": 17238 }, { "epoch": 0.3151150675416309, "grad_norm": 6.270369077479934, "learning_rate": 8.015703280825158e-06, "loss": 17.4573, "step": 17239 }, { "epoch": 0.3151333467380774, "grad_norm": 7.8125620468099095, "learning_rate": 8.015467166422641e-06, "loss": 18.0803, "step": 17240 }, { "epoch": 0.3151516259345239, "grad_norm": 6.951899482123191, "learning_rate": 8.01523104145117e-06, "loss": 17.6458, "step": 17241 }, { "epoch": 0.31516990513097043, "grad_norm": 7.537431181425768, "learning_rate": 8.014994905911577e-06, "loss": 17.6453, "step": 17242 }, { "epoch": 0.31518818432741696, "grad_norm": 7.574629766724625, "learning_rate": 8.014758759804688e-06, "loss": 17.9085, "step": 17243 }, { "epoch": 0.3152064635238635, "grad_norm": 6.280790363627346, "learning_rate": 8.014522603131332e-06, "loss": 17.7072, "step": 17244 }, { "epoch": 0.31522474272031004, "grad_norm": 6.663546949785355, "learning_rate": 8.014286435892335e-06, "loss": 17.7714, "step": 17245 }, { "epoch": 0.3152430219167565, "grad_norm": 5.644993620118886, "learning_rate": 8.014050258088527e-06, "loss": 17.1598, "step": 17246 }, { "epoch": 0.31526130111320305, "grad_norm": 6.549006480041142, "learning_rate": 8.013814069720733e-06, "loss": 17.5526, "step": 17247 }, { "epoch": 0.3152795803096496, "grad_norm": 6.563431303899967, "learning_rate": 8.013577870789783e-06, "loss": 17.3868, "step": 17248 }, { "epoch": 0.3152978595060961, "grad_norm": 5.361171220750779, "learning_rate": 8.013341661296503e-06, "loss": 17.0847, "step": 17249 }, { "epoch": 0.31531613870254266, "grad_norm": 8.254859259003663, "learning_rate": 8.013105441241722e-06, "loss": 18.2838, "step": 17250 }, { "epoch": 0.31533441789898914, "grad_norm": 7.036847900780292, "learning_rate": 8.01286921062627e-06, "loss": 17.8468, "step": 17251 }, { "epoch": 0.3153526970954357, "grad_norm": 9.258121444683413, "learning_rate": 8.012632969450971e-06, "loss": 18.5679, "step": 17252 }, { "epoch": 0.3153709762918822, "grad_norm": 7.0027051711961015, "learning_rate": 8.012396717716655e-06, "loss": 17.6236, "step": 17253 }, { "epoch": 0.31538925548832875, "grad_norm": 7.772629492817567, "learning_rate": 8.01216045542415e-06, "loss": 18.0685, "step": 17254 }, { "epoch": 0.3154075346847753, "grad_norm": 6.821788964207853, "learning_rate": 8.011924182574285e-06, "loss": 17.4284, "step": 17255 }, { "epoch": 0.31542581388122176, "grad_norm": 8.243962954711291, "learning_rate": 8.011687899167885e-06, "loss": 17.9809, "step": 17256 }, { "epoch": 0.3154440930776683, "grad_norm": 5.941002081332321, "learning_rate": 8.011451605205782e-06, "loss": 17.2933, "step": 17257 }, { "epoch": 0.31546237227411483, "grad_norm": 5.7085955853111665, "learning_rate": 8.011215300688803e-06, "loss": 17.4081, "step": 17258 }, { "epoch": 0.31548065147056137, "grad_norm": 7.489969880342292, "learning_rate": 8.010978985617775e-06, "loss": 18.0581, "step": 17259 }, { "epoch": 0.3154989306670079, "grad_norm": 6.170305090550007, "learning_rate": 8.010742659993525e-06, "loss": 17.3531, "step": 17260 }, { "epoch": 0.3155172098634544, "grad_norm": 5.761930461774254, "learning_rate": 8.010506323816886e-06, "loss": 17.1373, "step": 17261 }, { "epoch": 0.3155354890599009, "grad_norm": 7.579984190464123, "learning_rate": 8.010269977088684e-06, "loss": 18.2122, "step": 17262 }, { "epoch": 0.31555376825634746, "grad_norm": 8.04920914814461, "learning_rate": 8.010033619809744e-06, "loss": 18.2227, "step": 17263 }, { "epoch": 0.315572047452794, "grad_norm": 6.980271621424535, "learning_rate": 8.009797251980898e-06, "loss": 17.4735, "step": 17264 }, { "epoch": 0.3155903266492405, "grad_norm": 7.203646358916323, "learning_rate": 8.009560873602976e-06, "loss": 17.9982, "step": 17265 }, { "epoch": 0.315608605845687, "grad_norm": 7.014703836732413, "learning_rate": 8.009324484676801e-06, "loss": 17.7615, "step": 17266 }, { "epoch": 0.31562688504213354, "grad_norm": 8.962230500198741, "learning_rate": 8.009088085203207e-06, "loss": 18.1978, "step": 17267 }, { "epoch": 0.3156451642385801, "grad_norm": 6.798689249987276, "learning_rate": 8.00885167518302e-06, "loss": 17.7435, "step": 17268 }, { "epoch": 0.3156634434350266, "grad_norm": 6.71692463745285, "learning_rate": 8.00861525461707e-06, "loss": 17.6106, "step": 17269 }, { "epoch": 0.3156817226314731, "grad_norm": 6.0212322090815045, "learning_rate": 8.008378823506183e-06, "loss": 17.4011, "step": 17270 }, { "epoch": 0.31570000182791963, "grad_norm": 6.323795571367101, "learning_rate": 8.008142381851191e-06, "loss": 17.339, "step": 17271 }, { "epoch": 0.31571828102436617, "grad_norm": 5.162207674821056, "learning_rate": 8.007905929652919e-06, "loss": 16.9637, "step": 17272 }, { "epoch": 0.3157365602208127, "grad_norm": 6.40622354301321, "learning_rate": 8.007669466912197e-06, "loss": 17.2422, "step": 17273 }, { "epoch": 0.31575483941725924, "grad_norm": 6.380792081899297, "learning_rate": 8.007432993629857e-06, "loss": 17.3429, "step": 17274 }, { "epoch": 0.3157731186137057, "grad_norm": 7.435868585263863, "learning_rate": 8.007196509806724e-06, "loss": 17.7877, "step": 17275 }, { "epoch": 0.31579139781015225, "grad_norm": 9.257972846005018, "learning_rate": 8.00696001544363e-06, "loss": 18.0455, "step": 17276 }, { "epoch": 0.3158096770065988, "grad_norm": 6.444610897364533, "learning_rate": 8.006723510541401e-06, "loss": 17.6732, "step": 17277 }, { "epoch": 0.3158279562030453, "grad_norm": 6.669997196515607, "learning_rate": 8.006486995100866e-06, "loss": 17.6559, "step": 17278 }, { "epoch": 0.31584623539949186, "grad_norm": 5.975750319203101, "learning_rate": 8.006250469122857e-06, "loss": 17.3374, "step": 17279 }, { "epoch": 0.31586451459593834, "grad_norm": 6.378632875562156, "learning_rate": 8.0060139326082e-06, "loss": 17.4521, "step": 17280 }, { "epoch": 0.3158827937923849, "grad_norm": 6.1339635490609234, "learning_rate": 8.005777385557723e-06, "loss": 17.3795, "step": 17281 }, { "epoch": 0.3159010729888314, "grad_norm": 7.2323692727419315, "learning_rate": 8.005540827972259e-06, "loss": 17.9336, "step": 17282 }, { "epoch": 0.31591935218527795, "grad_norm": 5.936761222618097, "learning_rate": 8.005304259852636e-06, "loss": 17.2524, "step": 17283 }, { "epoch": 0.3159376313817245, "grad_norm": 6.986959874057104, "learning_rate": 8.00506768119968e-06, "loss": 17.6888, "step": 17284 }, { "epoch": 0.31595591057817096, "grad_norm": 6.618194378270937, "learning_rate": 8.004831092014224e-06, "loss": 17.7176, "step": 17285 }, { "epoch": 0.3159741897746175, "grad_norm": 6.618019920452158, "learning_rate": 8.004594492297095e-06, "loss": 17.5945, "step": 17286 }, { "epoch": 0.31599246897106403, "grad_norm": 6.229218484130583, "learning_rate": 8.004357882049125e-06, "loss": 17.3467, "step": 17287 }, { "epoch": 0.31601074816751057, "grad_norm": 6.583558412131839, "learning_rate": 8.004121261271139e-06, "loss": 17.7141, "step": 17288 }, { "epoch": 0.3160290273639571, "grad_norm": 5.570932105293516, "learning_rate": 8.00388462996397e-06, "loss": 17.0904, "step": 17289 }, { "epoch": 0.3160473065604036, "grad_norm": 6.773103782373799, "learning_rate": 8.003647988128447e-06, "loss": 17.823, "step": 17290 }, { "epoch": 0.3160655857568501, "grad_norm": 7.140574485216784, "learning_rate": 8.003411335765397e-06, "loss": 17.8718, "step": 17291 }, { "epoch": 0.31608386495329666, "grad_norm": 7.65391096461442, "learning_rate": 8.00317467287565e-06, "loss": 18.1112, "step": 17292 }, { "epoch": 0.3161021441497432, "grad_norm": 6.4821818518534196, "learning_rate": 8.002937999460038e-06, "loss": 17.4704, "step": 17293 }, { "epoch": 0.3161204233461897, "grad_norm": 4.8915957927763465, "learning_rate": 8.002701315519388e-06, "loss": 16.8415, "step": 17294 }, { "epoch": 0.3161387025426362, "grad_norm": 6.98314453855389, "learning_rate": 8.002464621054531e-06, "loss": 17.8192, "step": 17295 }, { "epoch": 0.31615698173908274, "grad_norm": 5.695664433405737, "learning_rate": 8.002227916066297e-06, "loss": 17.4068, "step": 17296 }, { "epoch": 0.3161752609355293, "grad_norm": 7.0937901152485265, "learning_rate": 8.001991200555512e-06, "loss": 18.2263, "step": 17297 }, { "epoch": 0.3161935401319758, "grad_norm": 6.613123201057216, "learning_rate": 8.00175447452301e-06, "loss": 17.7912, "step": 17298 }, { "epoch": 0.31621181932842235, "grad_norm": 5.779613572039592, "learning_rate": 8.00151773796962e-06, "loss": 17.1359, "step": 17299 }, { "epoch": 0.31623009852486883, "grad_norm": 5.56224046592532, "learning_rate": 8.00128099089617e-06, "loss": 17.1956, "step": 17300 }, { "epoch": 0.31624837772131537, "grad_norm": 5.978160083714933, "learning_rate": 8.00104423330349e-06, "loss": 17.1161, "step": 17301 }, { "epoch": 0.3162666569177619, "grad_norm": 7.990643806813121, "learning_rate": 8.000807465192411e-06, "loss": 18.4523, "step": 17302 }, { "epoch": 0.31628493611420844, "grad_norm": 5.567845383398115, "learning_rate": 8.00057068656376e-06, "loss": 17.1607, "step": 17303 }, { "epoch": 0.3163032153106549, "grad_norm": 5.922321863482638, "learning_rate": 8.000333897418372e-06, "loss": 17.5144, "step": 17304 }, { "epoch": 0.31632149450710145, "grad_norm": 5.705829061206794, "learning_rate": 8.000097097757072e-06, "loss": 17.2625, "step": 17305 }, { "epoch": 0.316339773703548, "grad_norm": 6.227613307187678, "learning_rate": 7.999860287580694e-06, "loss": 17.6444, "step": 17306 }, { "epoch": 0.3163580528999945, "grad_norm": 7.018679087373992, "learning_rate": 7.999623466890065e-06, "loss": 17.5985, "step": 17307 }, { "epoch": 0.31637633209644106, "grad_norm": 6.960499383597473, "learning_rate": 7.999386635686016e-06, "loss": 17.6521, "step": 17308 }, { "epoch": 0.31639461129288754, "grad_norm": 6.187277667793289, "learning_rate": 7.999149793969377e-06, "loss": 17.3097, "step": 17309 }, { "epoch": 0.3164128904893341, "grad_norm": 6.838715198564892, "learning_rate": 7.99891294174098e-06, "loss": 17.6548, "step": 17310 }, { "epoch": 0.3164311696857806, "grad_norm": 7.189376662082501, "learning_rate": 7.998676079001651e-06, "loss": 17.9041, "step": 17311 }, { "epoch": 0.31644944888222715, "grad_norm": 6.9285400299559585, "learning_rate": 7.998439205752222e-06, "loss": 17.7576, "step": 17312 }, { "epoch": 0.3164677280786737, "grad_norm": 6.837096459838117, "learning_rate": 7.998202321993527e-06, "loss": 17.5272, "step": 17313 }, { "epoch": 0.31648600727512016, "grad_norm": 6.5943505560026425, "learning_rate": 7.997965427726391e-06, "loss": 17.3491, "step": 17314 }, { "epoch": 0.3165042864715667, "grad_norm": 6.8997158881354235, "learning_rate": 7.997728522951646e-06, "loss": 18.0895, "step": 17315 }, { "epoch": 0.31652256566801323, "grad_norm": 5.475364935696605, "learning_rate": 7.997491607670123e-06, "loss": 17.2759, "step": 17316 }, { "epoch": 0.31654084486445977, "grad_norm": 6.76389765174802, "learning_rate": 7.997254681882652e-06, "loss": 17.7249, "step": 17317 }, { "epoch": 0.3165591240609063, "grad_norm": 6.563208550847732, "learning_rate": 7.997017745590064e-06, "loss": 17.7053, "step": 17318 }, { "epoch": 0.3165774032573528, "grad_norm": 6.285419678293115, "learning_rate": 7.996780798793187e-06, "loss": 17.2764, "step": 17319 }, { "epoch": 0.3165956824537993, "grad_norm": 6.2225713999306125, "learning_rate": 7.996543841492857e-06, "loss": 17.4828, "step": 17320 }, { "epoch": 0.31661396165024586, "grad_norm": 6.265834466442455, "learning_rate": 7.996306873689899e-06, "loss": 17.2636, "step": 17321 }, { "epoch": 0.3166322408466924, "grad_norm": 6.726600881578628, "learning_rate": 7.996069895385143e-06, "loss": 17.4049, "step": 17322 }, { "epoch": 0.3166505200431389, "grad_norm": 6.485672045106972, "learning_rate": 7.995832906579426e-06, "loss": 17.3534, "step": 17323 }, { "epoch": 0.3166687992395854, "grad_norm": 8.904328244583729, "learning_rate": 7.995595907273573e-06, "loss": 18.7114, "step": 17324 }, { "epoch": 0.31668707843603194, "grad_norm": 6.148101540336492, "learning_rate": 7.995358897468414e-06, "loss": 17.4741, "step": 17325 }, { "epoch": 0.3167053576324785, "grad_norm": 6.932768245039868, "learning_rate": 7.995121877164784e-06, "loss": 17.4276, "step": 17326 }, { "epoch": 0.316723636828925, "grad_norm": 7.669086120789412, "learning_rate": 7.994884846363513e-06, "loss": 17.7421, "step": 17327 }, { "epoch": 0.31674191602537155, "grad_norm": 5.84113389394122, "learning_rate": 7.99464780506543e-06, "loss": 17.3566, "step": 17328 }, { "epoch": 0.31676019522181803, "grad_norm": 6.880726002467942, "learning_rate": 7.994410753271365e-06, "loss": 17.6299, "step": 17329 }, { "epoch": 0.31677847441826457, "grad_norm": 6.734761939125572, "learning_rate": 7.99417369098215e-06, "loss": 17.988, "step": 17330 }, { "epoch": 0.3167967536147111, "grad_norm": 6.721574535046607, "learning_rate": 7.993936618198616e-06, "loss": 17.5829, "step": 17331 }, { "epoch": 0.31681503281115764, "grad_norm": 6.424721329456516, "learning_rate": 7.993699534921594e-06, "loss": 17.7047, "step": 17332 }, { "epoch": 0.3168333120076042, "grad_norm": 7.5887974576988615, "learning_rate": 7.993462441151918e-06, "loss": 17.8649, "step": 17333 }, { "epoch": 0.31685159120405065, "grad_norm": 5.820842922740516, "learning_rate": 7.993225336890414e-06, "loss": 17.3976, "step": 17334 }, { "epoch": 0.3168698704004972, "grad_norm": 6.607944164689301, "learning_rate": 7.992988222137914e-06, "loss": 17.5775, "step": 17335 }, { "epoch": 0.3168881495969437, "grad_norm": 6.239762600115004, "learning_rate": 7.99275109689525e-06, "loss": 17.5336, "step": 17336 }, { "epoch": 0.31690642879339026, "grad_norm": 6.274828033999698, "learning_rate": 7.992513961163253e-06, "loss": 17.3763, "step": 17337 }, { "epoch": 0.31692470798983674, "grad_norm": 7.08519694139076, "learning_rate": 7.992276814942756e-06, "loss": 17.5813, "step": 17338 }, { "epoch": 0.3169429871862833, "grad_norm": 7.3140485087203375, "learning_rate": 7.992039658234586e-06, "loss": 17.8942, "step": 17339 }, { "epoch": 0.3169612663827298, "grad_norm": 7.207227940097351, "learning_rate": 7.99180249103958e-06, "loss": 18.0972, "step": 17340 }, { "epoch": 0.31697954557917635, "grad_norm": 6.858963748711504, "learning_rate": 7.991565313358562e-06, "loss": 17.7875, "step": 17341 }, { "epoch": 0.3169978247756229, "grad_norm": 6.556687275015834, "learning_rate": 7.991328125192368e-06, "loss": 17.3149, "step": 17342 }, { "epoch": 0.31701610397206936, "grad_norm": 6.490040996250598, "learning_rate": 7.99109092654183e-06, "loss": 18.3425, "step": 17343 }, { "epoch": 0.3170343831685159, "grad_norm": 5.979798621442256, "learning_rate": 7.990853717407778e-06, "loss": 17.4498, "step": 17344 }, { "epoch": 0.31705266236496243, "grad_norm": 7.520527607230535, "learning_rate": 7.990616497791043e-06, "loss": 18.1362, "step": 17345 }, { "epoch": 0.31707094156140897, "grad_norm": 8.00210194451691, "learning_rate": 7.990379267692455e-06, "loss": 17.9182, "step": 17346 }, { "epoch": 0.3170892207578555, "grad_norm": 6.0069612982067415, "learning_rate": 7.990142027112849e-06, "loss": 17.13, "step": 17347 }, { "epoch": 0.317107499954302, "grad_norm": 7.431075068686617, "learning_rate": 7.989904776053054e-06, "loss": 17.8591, "step": 17348 }, { "epoch": 0.3171257791507485, "grad_norm": 7.194017830536698, "learning_rate": 7.989667514513903e-06, "loss": 17.9881, "step": 17349 }, { "epoch": 0.31714405834719506, "grad_norm": 7.2560760693483894, "learning_rate": 7.989430242496226e-06, "loss": 18.1042, "step": 17350 }, { "epoch": 0.3171623375436416, "grad_norm": 6.127488058005536, "learning_rate": 7.989192960000855e-06, "loss": 17.3964, "step": 17351 }, { "epoch": 0.31718061674008813, "grad_norm": 6.820769982935315, "learning_rate": 7.988955667028622e-06, "loss": 17.7718, "step": 17352 }, { "epoch": 0.3171988959365346, "grad_norm": 5.801515730191406, "learning_rate": 7.988718363580359e-06, "loss": 17.4517, "step": 17353 }, { "epoch": 0.31721717513298114, "grad_norm": 6.102441621651423, "learning_rate": 7.9884810496569e-06, "loss": 17.2964, "step": 17354 }, { "epoch": 0.3172354543294277, "grad_norm": 5.961007345755837, "learning_rate": 7.988243725259071e-06, "loss": 17.3146, "step": 17355 }, { "epoch": 0.3172537335258742, "grad_norm": 6.451354412838017, "learning_rate": 7.988006390387707e-06, "loss": 17.4721, "step": 17356 }, { "epoch": 0.31727201272232075, "grad_norm": 6.165619916006797, "learning_rate": 7.98776904504364e-06, "loss": 17.3525, "step": 17357 }, { "epoch": 0.31729029191876723, "grad_norm": 6.448672229159587, "learning_rate": 7.987531689227705e-06, "loss": 17.5783, "step": 17358 }, { "epoch": 0.31730857111521377, "grad_norm": 6.547768556287841, "learning_rate": 7.987294322940728e-06, "loss": 17.629, "step": 17359 }, { "epoch": 0.3173268503116603, "grad_norm": 6.2012472626810355, "learning_rate": 7.987056946183544e-06, "loss": 17.5342, "step": 17360 }, { "epoch": 0.31734512950810684, "grad_norm": 7.712075728461025, "learning_rate": 7.986819558956984e-06, "loss": 18.4234, "step": 17361 }, { "epoch": 0.3173634087045534, "grad_norm": 7.181988503389922, "learning_rate": 7.986582161261881e-06, "loss": 17.7457, "step": 17362 }, { "epoch": 0.31738168790099985, "grad_norm": 5.440387236226897, "learning_rate": 7.986344753099067e-06, "loss": 17.1153, "step": 17363 }, { "epoch": 0.3173999670974464, "grad_norm": 5.29858840328652, "learning_rate": 7.986107334469374e-06, "loss": 17.1398, "step": 17364 }, { "epoch": 0.3174182462938929, "grad_norm": 6.48987557252202, "learning_rate": 7.985869905373635e-06, "loss": 17.6772, "step": 17365 }, { "epoch": 0.31743652549033946, "grad_norm": 7.141359406982921, "learning_rate": 7.985632465812679e-06, "loss": 17.8821, "step": 17366 }, { "epoch": 0.317454804686786, "grad_norm": 6.660119386170312, "learning_rate": 7.98539501578734e-06, "loss": 17.6982, "step": 17367 }, { "epoch": 0.3174730838832325, "grad_norm": 5.7470754973462554, "learning_rate": 7.985157555298453e-06, "loss": 17.2834, "step": 17368 }, { "epoch": 0.317491363079679, "grad_norm": 5.582415356984512, "learning_rate": 7.984920084346845e-06, "loss": 17.2948, "step": 17369 }, { "epoch": 0.31750964227612555, "grad_norm": 7.13506128815316, "learning_rate": 7.984682602933353e-06, "loss": 17.8519, "step": 17370 }, { "epoch": 0.3175279214725721, "grad_norm": 7.337779130993448, "learning_rate": 7.984445111058807e-06, "loss": 18.0208, "step": 17371 }, { "epoch": 0.31754620066901856, "grad_norm": 5.21294512449078, "learning_rate": 7.98420760872404e-06, "loss": 17.0952, "step": 17372 }, { "epoch": 0.3175644798654651, "grad_norm": 6.4250015353459276, "learning_rate": 7.983970095929884e-06, "loss": 17.4149, "step": 17373 }, { "epoch": 0.31758275906191163, "grad_norm": 5.239606804881995, "learning_rate": 7.983732572677172e-06, "loss": 16.8856, "step": 17374 }, { "epoch": 0.31760103825835817, "grad_norm": 7.069788966658681, "learning_rate": 7.983495038966735e-06, "loss": 17.5749, "step": 17375 }, { "epoch": 0.3176193174548047, "grad_norm": 6.574379653563129, "learning_rate": 7.98325749479941e-06, "loss": 17.8001, "step": 17376 }, { "epoch": 0.3176375966512512, "grad_norm": 6.548469582548624, "learning_rate": 7.983019940176024e-06, "loss": 17.4517, "step": 17377 }, { "epoch": 0.3176558758476977, "grad_norm": 5.8811889627432805, "learning_rate": 7.982782375097412e-06, "loss": 17.2188, "step": 17378 }, { "epoch": 0.31767415504414426, "grad_norm": 6.428892726108311, "learning_rate": 7.982544799564407e-06, "loss": 17.5082, "step": 17379 }, { "epoch": 0.3176924342405908, "grad_norm": 9.126127628108254, "learning_rate": 7.98230721357784e-06, "loss": 18.6344, "step": 17380 }, { "epoch": 0.31771071343703733, "grad_norm": 5.123403464589389, "learning_rate": 7.982069617138545e-06, "loss": 16.9785, "step": 17381 }, { "epoch": 0.3177289926334838, "grad_norm": 5.754014486785964, "learning_rate": 7.981832010247358e-06, "loss": 17.3393, "step": 17382 }, { "epoch": 0.31774727182993034, "grad_norm": 6.143688327756162, "learning_rate": 7.981594392905105e-06, "loss": 17.4204, "step": 17383 }, { "epoch": 0.3177655510263769, "grad_norm": 5.7456749514990175, "learning_rate": 7.981356765112624e-06, "loss": 17.1715, "step": 17384 }, { "epoch": 0.3177838302228234, "grad_norm": 7.125977071006484, "learning_rate": 7.981119126870747e-06, "loss": 17.6395, "step": 17385 }, { "epoch": 0.31780210941926995, "grad_norm": 6.775925131844454, "learning_rate": 7.980881478180305e-06, "loss": 17.5778, "step": 17386 }, { "epoch": 0.31782038861571643, "grad_norm": 5.346890123465364, "learning_rate": 7.980643819042132e-06, "loss": 16.9998, "step": 17387 }, { "epoch": 0.31783866781216297, "grad_norm": 6.44156051332979, "learning_rate": 7.980406149457062e-06, "loss": 17.615, "step": 17388 }, { "epoch": 0.3178569470086095, "grad_norm": 6.542473041592614, "learning_rate": 7.980168469425926e-06, "loss": 17.9314, "step": 17389 }, { "epoch": 0.31787522620505604, "grad_norm": 7.14022172765695, "learning_rate": 7.979930778949559e-06, "loss": 17.8826, "step": 17390 }, { "epoch": 0.3178935054015026, "grad_norm": 6.3171456407064035, "learning_rate": 7.979693078028792e-06, "loss": 17.7095, "step": 17391 }, { "epoch": 0.31791178459794905, "grad_norm": 7.237654991147253, "learning_rate": 7.979455366664461e-06, "loss": 17.7842, "step": 17392 }, { "epoch": 0.3179300637943956, "grad_norm": 6.316166156805975, "learning_rate": 7.979217644857395e-06, "loss": 17.4043, "step": 17393 }, { "epoch": 0.3179483429908421, "grad_norm": 6.222195360899406, "learning_rate": 7.978979912608432e-06, "loss": 17.3792, "step": 17394 }, { "epoch": 0.31796662218728866, "grad_norm": 7.086724627567828, "learning_rate": 7.978742169918403e-06, "loss": 17.4918, "step": 17395 }, { "epoch": 0.3179849013837352, "grad_norm": 6.821847337797812, "learning_rate": 7.97850441678814e-06, "loss": 17.6306, "step": 17396 }, { "epoch": 0.3180031805801817, "grad_norm": 6.894452783898032, "learning_rate": 7.978266653218478e-06, "loss": 17.6678, "step": 17397 }, { "epoch": 0.3180214597766282, "grad_norm": 7.5576865526535055, "learning_rate": 7.978028879210249e-06, "loss": 17.8944, "step": 17398 }, { "epoch": 0.31803973897307475, "grad_norm": 6.537276779955941, "learning_rate": 7.977791094764288e-06, "loss": 17.436, "step": 17399 }, { "epoch": 0.3180580181695213, "grad_norm": 5.298225272967506, "learning_rate": 7.977553299881428e-06, "loss": 17.0772, "step": 17400 }, { "epoch": 0.3180762973659678, "grad_norm": 7.239544698165914, "learning_rate": 7.9773154945625e-06, "loss": 17.998, "step": 17401 }, { "epoch": 0.3180945765624143, "grad_norm": 6.289866225021799, "learning_rate": 7.977077678808342e-06, "loss": 17.462, "step": 17402 }, { "epoch": 0.31811285575886084, "grad_norm": 6.5872078956297475, "learning_rate": 7.976839852619785e-06, "loss": 17.5857, "step": 17403 }, { "epoch": 0.31813113495530737, "grad_norm": 7.102906806243845, "learning_rate": 7.976602015997662e-06, "loss": 17.8424, "step": 17404 }, { "epoch": 0.3181494141517539, "grad_norm": 6.1827037640816975, "learning_rate": 7.976364168942807e-06, "loss": 17.3523, "step": 17405 }, { "epoch": 0.3181676933482004, "grad_norm": 7.042763867266987, "learning_rate": 7.976126311456054e-06, "loss": 17.7008, "step": 17406 }, { "epoch": 0.3181859725446469, "grad_norm": 6.073728287871449, "learning_rate": 7.975888443538235e-06, "loss": 17.3641, "step": 17407 }, { "epoch": 0.31820425174109346, "grad_norm": 5.714845719217169, "learning_rate": 7.975650565190187e-06, "loss": 17.4318, "step": 17408 }, { "epoch": 0.31822253093754, "grad_norm": 7.3403681460001025, "learning_rate": 7.975412676412742e-06, "loss": 17.9309, "step": 17409 }, { "epoch": 0.31824081013398653, "grad_norm": 6.26588164655034, "learning_rate": 7.975174777206733e-06, "loss": 17.543, "step": 17410 }, { "epoch": 0.318259089330433, "grad_norm": 5.042416906249586, "learning_rate": 7.974936867572995e-06, "loss": 16.921, "step": 17411 }, { "epoch": 0.31827736852687954, "grad_norm": 5.8848114121785455, "learning_rate": 7.974698947512362e-06, "loss": 17.5731, "step": 17412 }, { "epoch": 0.3182956477233261, "grad_norm": 7.517702316834048, "learning_rate": 7.974461017025667e-06, "loss": 17.9577, "step": 17413 }, { "epoch": 0.3183139269197726, "grad_norm": 5.918598179588317, "learning_rate": 7.974223076113744e-06, "loss": 17.278, "step": 17414 }, { "epoch": 0.31833220611621915, "grad_norm": 5.42916953972551, "learning_rate": 7.973985124777427e-06, "loss": 17.1337, "step": 17415 }, { "epoch": 0.31835048531266563, "grad_norm": 10.107034878062095, "learning_rate": 7.973747163017552e-06, "loss": 18.4012, "step": 17416 }, { "epoch": 0.31836876450911217, "grad_norm": 6.690150743389067, "learning_rate": 7.97350919083495e-06, "loss": 17.5545, "step": 17417 }, { "epoch": 0.3183870437055587, "grad_norm": 6.481417714103426, "learning_rate": 7.973271208230454e-06, "loss": 17.5627, "step": 17418 }, { "epoch": 0.31840532290200524, "grad_norm": 6.523033117753645, "learning_rate": 7.973033215204902e-06, "loss": 17.5143, "step": 17419 }, { "epoch": 0.3184236020984518, "grad_norm": 7.0409480642755655, "learning_rate": 7.972795211759129e-06, "loss": 17.7209, "step": 17420 }, { "epoch": 0.31844188129489825, "grad_norm": 6.998002046816529, "learning_rate": 7.972557197893964e-06, "loss": 17.6533, "step": 17421 }, { "epoch": 0.3184601604913448, "grad_norm": 7.4785771956664435, "learning_rate": 7.972319173610243e-06, "loss": 18.058, "step": 17422 }, { "epoch": 0.3184784396877913, "grad_norm": 6.994765865188953, "learning_rate": 7.972081138908805e-06, "loss": 17.7197, "step": 17423 }, { "epoch": 0.31849671888423786, "grad_norm": 7.442863291258745, "learning_rate": 7.971843093790477e-06, "loss": 17.7491, "step": 17424 }, { "epoch": 0.3185149980806844, "grad_norm": 6.4550573992928895, "learning_rate": 7.971605038256098e-06, "loss": 17.6854, "step": 17425 }, { "epoch": 0.3185332772771309, "grad_norm": 6.770599267949571, "learning_rate": 7.971366972306503e-06, "loss": 17.6583, "step": 17426 }, { "epoch": 0.3185515564735774, "grad_norm": 7.6163586371788154, "learning_rate": 7.971128895942522e-06, "loss": 17.9685, "step": 17427 }, { "epoch": 0.31856983567002395, "grad_norm": 7.577717258632709, "learning_rate": 7.970890809164992e-06, "loss": 18.0588, "step": 17428 }, { "epoch": 0.3185881148664705, "grad_norm": 8.542688583545747, "learning_rate": 7.97065271197475e-06, "loss": 18.0899, "step": 17429 }, { "epoch": 0.318606394062917, "grad_norm": 6.05510887570353, "learning_rate": 7.970414604372627e-06, "loss": 17.2955, "step": 17430 }, { "epoch": 0.3186246732593635, "grad_norm": 6.308549937419515, "learning_rate": 7.970176486359457e-06, "loss": 17.3666, "step": 17431 }, { "epoch": 0.31864295245581004, "grad_norm": 6.004841029169363, "learning_rate": 7.969938357936078e-06, "loss": 17.2362, "step": 17432 }, { "epoch": 0.31866123165225657, "grad_norm": 5.99162523397113, "learning_rate": 7.969700219103323e-06, "loss": 17.3407, "step": 17433 }, { "epoch": 0.3186795108487031, "grad_norm": 6.420638244051939, "learning_rate": 7.969462069862025e-06, "loss": 17.5247, "step": 17434 }, { "epoch": 0.31869779004514964, "grad_norm": 6.14558719914021, "learning_rate": 7.96922391021302e-06, "loss": 17.4104, "step": 17435 }, { "epoch": 0.3187160692415961, "grad_norm": 7.010187718192945, "learning_rate": 7.968985740157144e-06, "loss": 17.8984, "step": 17436 }, { "epoch": 0.31873434843804266, "grad_norm": 7.00973642192062, "learning_rate": 7.968747559695232e-06, "loss": 18.0226, "step": 17437 }, { "epoch": 0.3187526276344892, "grad_norm": 7.942679197149888, "learning_rate": 7.968509368828115e-06, "loss": 17.8861, "step": 17438 }, { "epoch": 0.31877090683093573, "grad_norm": 6.691062041059063, "learning_rate": 7.968271167556629e-06, "loss": 17.5306, "step": 17439 }, { "epoch": 0.3187891860273822, "grad_norm": 7.305086467569272, "learning_rate": 7.968032955881612e-06, "loss": 17.8021, "step": 17440 }, { "epoch": 0.31880746522382875, "grad_norm": 7.282716776920097, "learning_rate": 7.967794733803899e-06, "loss": 17.9196, "step": 17441 }, { "epoch": 0.3188257444202753, "grad_norm": 6.124799570346934, "learning_rate": 7.96755650132432e-06, "loss": 17.5765, "step": 17442 }, { "epoch": 0.3188440236167218, "grad_norm": 6.351901912431719, "learning_rate": 7.967318258443715e-06, "loss": 17.266, "step": 17443 }, { "epoch": 0.31886230281316835, "grad_norm": 7.462993494413, "learning_rate": 7.967080005162915e-06, "loss": 17.7182, "step": 17444 }, { "epoch": 0.31888058200961483, "grad_norm": 5.1388979891743425, "learning_rate": 7.966841741482757e-06, "loss": 17.0286, "step": 17445 }, { "epoch": 0.31889886120606137, "grad_norm": 7.108736057028803, "learning_rate": 7.966603467404079e-06, "loss": 17.679, "step": 17446 }, { "epoch": 0.3189171404025079, "grad_norm": 6.703201495150327, "learning_rate": 7.966365182927712e-06, "loss": 17.7453, "step": 17447 }, { "epoch": 0.31893541959895444, "grad_norm": 5.855150371783629, "learning_rate": 7.966126888054491e-06, "loss": 17.398, "step": 17448 }, { "epoch": 0.318953698795401, "grad_norm": 6.85273940483492, "learning_rate": 7.965888582785254e-06, "loss": 17.8339, "step": 17449 }, { "epoch": 0.31897197799184746, "grad_norm": 6.410416172121891, "learning_rate": 7.965650267120834e-06, "loss": 17.4644, "step": 17450 }, { "epoch": 0.318990257188294, "grad_norm": 7.057073891279669, "learning_rate": 7.96541194106207e-06, "loss": 17.5452, "step": 17451 }, { "epoch": 0.3190085363847405, "grad_norm": 7.045457363541464, "learning_rate": 7.96517360460979e-06, "loss": 17.6048, "step": 17452 }, { "epoch": 0.31902681558118706, "grad_norm": 6.154527110652669, "learning_rate": 7.964935257764836e-06, "loss": 17.3925, "step": 17453 }, { "epoch": 0.3190450947776336, "grad_norm": 7.464159396080561, "learning_rate": 7.964696900528042e-06, "loss": 18.3016, "step": 17454 }, { "epoch": 0.3190633739740801, "grad_norm": 5.749208152804058, "learning_rate": 7.964458532900242e-06, "loss": 17.3589, "step": 17455 }, { "epoch": 0.3190816531705266, "grad_norm": 8.00658019945167, "learning_rate": 7.96422015488227e-06, "loss": 18.218, "step": 17456 }, { "epoch": 0.31909993236697315, "grad_norm": 8.344669067885857, "learning_rate": 7.963981766474966e-06, "loss": 18.2281, "step": 17457 }, { "epoch": 0.3191182115634197, "grad_norm": 7.415546398600031, "learning_rate": 7.963743367679163e-06, "loss": 17.9363, "step": 17458 }, { "epoch": 0.3191364907598662, "grad_norm": 7.5905559663552005, "learning_rate": 7.963504958495695e-06, "loss": 17.9948, "step": 17459 }, { "epoch": 0.3191547699563127, "grad_norm": 7.352105628538097, "learning_rate": 7.963266538925401e-06, "loss": 18.1612, "step": 17460 }, { "epoch": 0.31917304915275924, "grad_norm": 6.36190368461596, "learning_rate": 7.963028108969115e-06, "loss": 17.6291, "step": 17461 }, { "epoch": 0.31919132834920577, "grad_norm": 6.576523290862874, "learning_rate": 7.962789668627672e-06, "loss": 17.451, "step": 17462 }, { "epoch": 0.3192096075456523, "grad_norm": 8.733940482155006, "learning_rate": 7.962551217901909e-06, "loss": 17.718, "step": 17463 }, { "epoch": 0.31922788674209884, "grad_norm": 7.95138733543129, "learning_rate": 7.962312756792659e-06, "loss": 18.3766, "step": 17464 }, { "epoch": 0.3192461659385453, "grad_norm": 7.864199906412713, "learning_rate": 7.962074285300763e-06, "loss": 18.3167, "step": 17465 }, { "epoch": 0.31926444513499186, "grad_norm": 6.142260208460528, "learning_rate": 7.96183580342705e-06, "loss": 17.4108, "step": 17466 }, { "epoch": 0.3192827243314384, "grad_norm": 7.183732960893861, "learning_rate": 7.961597311172361e-06, "loss": 17.479, "step": 17467 }, { "epoch": 0.31930100352788493, "grad_norm": 7.74294349359482, "learning_rate": 7.96135880853753e-06, "loss": 18.2792, "step": 17468 }, { "epoch": 0.31931928272433147, "grad_norm": 6.047273233634122, "learning_rate": 7.961120295523397e-06, "loss": 17.4092, "step": 17469 }, { "epoch": 0.31933756192077795, "grad_norm": 4.820243801648722, "learning_rate": 7.960881772130791e-06, "loss": 16.9432, "step": 17470 }, { "epoch": 0.3193558411172245, "grad_norm": 7.203332010453183, "learning_rate": 7.960643238360552e-06, "loss": 17.6185, "step": 17471 }, { "epoch": 0.319374120313671, "grad_norm": 5.7546710407216874, "learning_rate": 7.960404694213514e-06, "loss": 17.4795, "step": 17472 }, { "epoch": 0.31939239951011755, "grad_norm": 6.916722004121082, "learning_rate": 7.960166139690516e-06, "loss": 17.6398, "step": 17473 }, { "epoch": 0.31941067870656403, "grad_norm": 5.269629728558581, "learning_rate": 7.959927574792393e-06, "loss": 17.0616, "step": 17474 }, { "epoch": 0.31942895790301057, "grad_norm": 7.214591716120327, "learning_rate": 7.959688999519979e-06, "loss": 17.8556, "step": 17475 }, { "epoch": 0.3194472370994571, "grad_norm": 5.938926725577914, "learning_rate": 7.959450413874112e-06, "loss": 17.4313, "step": 17476 }, { "epoch": 0.31946551629590364, "grad_norm": 6.393551342787462, "learning_rate": 7.95921181785563e-06, "loss": 17.747, "step": 17477 }, { "epoch": 0.3194837954923502, "grad_norm": 6.970582896186193, "learning_rate": 7.958973211465366e-06, "loss": 17.4394, "step": 17478 }, { "epoch": 0.31950207468879666, "grad_norm": 6.540716048527766, "learning_rate": 7.958734594704158e-06, "loss": 17.6077, "step": 17479 }, { "epoch": 0.3195203538852432, "grad_norm": 6.393892115170374, "learning_rate": 7.958495967572842e-06, "loss": 17.7184, "step": 17480 }, { "epoch": 0.3195386330816897, "grad_norm": 6.378799462589449, "learning_rate": 7.958257330072255e-06, "loss": 17.5559, "step": 17481 }, { "epoch": 0.31955691227813626, "grad_norm": 6.667832837994266, "learning_rate": 7.95801868220323e-06, "loss": 17.6016, "step": 17482 }, { "epoch": 0.3195751914745828, "grad_norm": 6.471674145106354, "learning_rate": 7.95778002396661e-06, "loss": 17.5118, "step": 17483 }, { "epoch": 0.3195934706710293, "grad_norm": 6.955999185552674, "learning_rate": 7.957541355363225e-06, "loss": 17.8465, "step": 17484 }, { "epoch": 0.3196117498674758, "grad_norm": 8.602094459095065, "learning_rate": 7.957302676393916e-06, "loss": 18.5077, "step": 17485 }, { "epoch": 0.31963002906392235, "grad_norm": 7.357329600585394, "learning_rate": 7.957063987059517e-06, "loss": 17.6965, "step": 17486 }, { "epoch": 0.3196483082603689, "grad_norm": 6.773284659372345, "learning_rate": 7.956825287360864e-06, "loss": 17.6129, "step": 17487 }, { "epoch": 0.3196665874568154, "grad_norm": 7.487286498452414, "learning_rate": 7.956586577298798e-06, "loss": 18.1413, "step": 17488 }, { "epoch": 0.3196848666532619, "grad_norm": 7.478385783668679, "learning_rate": 7.95634785687415e-06, "loss": 17.8402, "step": 17489 }, { "epoch": 0.31970314584970844, "grad_norm": 6.650266303301079, "learning_rate": 7.956109126087759e-06, "loss": 17.7921, "step": 17490 }, { "epoch": 0.319721425046155, "grad_norm": 8.307400356261565, "learning_rate": 7.955870384940463e-06, "loss": 18.2952, "step": 17491 }, { "epoch": 0.3197397042426015, "grad_norm": 5.796790357817366, "learning_rate": 7.955631633433099e-06, "loss": 17.3616, "step": 17492 }, { "epoch": 0.31975798343904804, "grad_norm": 7.4729494518302815, "learning_rate": 7.955392871566501e-06, "loss": 18.1274, "step": 17493 }, { "epoch": 0.3197762626354945, "grad_norm": 7.555378140360098, "learning_rate": 7.955154099341509e-06, "loss": 17.7562, "step": 17494 }, { "epoch": 0.31979454183194106, "grad_norm": 5.510249169967757, "learning_rate": 7.954915316758955e-06, "loss": 17.403, "step": 17495 }, { "epoch": 0.3198128210283876, "grad_norm": 7.153243771245886, "learning_rate": 7.954676523819682e-06, "loss": 17.3068, "step": 17496 }, { "epoch": 0.31983110022483413, "grad_norm": 6.570997957736306, "learning_rate": 7.954437720524524e-06, "loss": 17.472, "step": 17497 }, { "epoch": 0.31984937942128067, "grad_norm": 7.388369987731288, "learning_rate": 7.954198906874318e-06, "loss": 17.7279, "step": 17498 }, { "epoch": 0.31986765861772715, "grad_norm": 5.970687406083828, "learning_rate": 7.953960082869901e-06, "loss": 17.2817, "step": 17499 }, { "epoch": 0.3198859378141737, "grad_norm": 5.885900804608633, "learning_rate": 7.95372124851211e-06, "loss": 17.4165, "step": 17500 }, { "epoch": 0.3199042170106202, "grad_norm": 5.55017902287351, "learning_rate": 7.953482403801782e-06, "loss": 17.2828, "step": 17501 }, { "epoch": 0.31992249620706675, "grad_norm": 8.790762996276861, "learning_rate": 7.953243548739756e-06, "loss": 18.4818, "step": 17502 }, { "epoch": 0.3199407754035133, "grad_norm": 6.927184560188266, "learning_rate": 7.953004683326867e-06, "loss": 17.8649, "step": 17503 }, { "epoch": 0.31995905459995977, "grad_norm": 5.5768742281237955, "learning_rate": 7.952765807563952e-06, "loss": 17.3118, "step": 17504 }, { "epoch": 0.3199773337964063, "grad_norm": 6.95477515024145, "learning_rate": 7.952526921451849e-06, "loss": 18.0654, "step": 17505 }, { "epoch": 0.31999561299285284, "grad_norm": 6.230271429962698, "learning_rate": 7.952288024991398e-06, "loss": 17.417, "step": 17506 }, { "epoch": 0.3200138921892994, "grad_norm": 6.747843091399649, "learning_rate": 7.952049118183429e-06, "loss": 17.6285, "step": 17507 }, { "epoch": 0.32003217138574586, "grad_norm": 7.916312733795419, "learning_rate": 7.951810201028787e-06, "loss": 18.3359, "step": 17508 }, { "epoch": 0.3200504505821924, "grad_norm": 6.5808985123475665, "learning_rate": 7.951571273528307e-06, "loss": 17.5606, "step": 17509 }, { "epoch": 0.3200687297786389, "grad_norm": 6.519203520268625, "learning_rate": 7.951332335682823e-06, "loss": 17.5328, "step": 17510 }, { "epoch": 0.32008700897508546, "grad_norm": 5.964109407732359, "learning_rate": 7.951093387493179e-06, "loss": 17.5441, "step": 17511 }, { "epoch": 0.320105288171532, "grad_norm": 7.177621814938111, "learning_rate": 7.950854428960207e-06, "loss": 17.819, "step": 17512 }, { "epoch": 0.3201235673679785, "grad_norm": 6.044403665427599, "learning_rate": 7.950615460084745e-06, "loss": 17.5882, "step": 17513 }, { "epoch": 0.320141846564425, "grad_norm": 7.707237807010002, "learning_rate": 7.950376480867633e-06, "loss": 17.7754, "step": 17514 }, { "epoch": 0.32016012576087155, "grad_norm": 6.508442052553958, "learning_rate": 7.950137491309708e-06, "loss": 17.3819, "step": 17515 }, { "epoch": 0.3201784049573181, "grad_norm": 6.812862237743727, "learning_rate": 7.949898491411807e-06, "loss": 17.7334, "step": 17516 }, { "epoch": 0.3201966841537646, "grad_norm": 5.788706744289365, "learning_rate": 7.949659481174768e-06, "loss": 17.2861, "step": 17517 }, { "epoch": 0.3202149633502111, "grad_norm": 6.495981215022219, "learning_rate": 7.949420460599425e-06, "loss": 17.8605, "step": 17518 }, { "epoch": 0.32023324254665764, "grad_norm": 7.151300394292834, "learning_rate": 7.949181429686624e-06, "loss": 18.113, "step": 17519 }, { "epoch": 0.3202515217431042, "grad_norm": 5.538760547001581, "learning_rate": 7.948942388437195e-06, "loss": 17.3418, "step": 17520 }, { "epoch": 0.3202698009395507, "grad_norm": 6.04893879279129, "learning_rate": 7.94870333685198e-06, "loss": 17.2436, "step": 17521 }, { "epoch": 0.32028808013599724, "grad_norm": 6.735375023901454, "learning_rate": 7.948464274931816e-06, "loss": 18.0333, "step": 17522 }, { "epoch": 0.3203063593324437, "grad_norm": 6.345674222165929, "learning_rate": 7.94822520267754e-06, "loss": 17.7165, "step": 17523 }, { "epoch": 0.32032463852889026, "grad_norm": 6.817369136315513, "learning_rate": 7.94798612008999e-06, "loss": 17.7406, "step": 17524 }, { "epoch": 0.3203429177253368, "grad_norm": 5.850563317955968, "learning_rate": 7.947747027170005e-06, "loss": 17.3355, "step": 17525 }, { "epoch": 0.32036119692178333, "grad_norm": 6.867214062864711, "learning_rate": 7.947507923918423e-06, "loss": 17.3336, "step": 17526 }, { "epoch": 0.32037947611822987, "grad_norm": 6.268486565383152, "learning_rate": 7.94726881033608e-06, "loss": 17.3672, "step": 17527 }, { "epoch": 0.32039775531467635, "grad_norm": 6.667181512188055, "learning_rate": 7.947029686423818e-06, "loss": 17.8645, "step": 17528 }, { "epoch": 0.3204160345111229, "grad_norm": 7.25862817451171, "learning_rate": 7.94679055218247e-06, "loss": 18.0934, "step": 17529 }, { "epoch": 0.3204343137075694, "grad_norm": 6.6850707239734835, "learning_rate": 7.94655140761288e-06, "loss": 17.288, "step": 17530 }, { "epoch": 0.32045259290401595, "grad_norm": 7.096240594452438, "learning_rate": 7.94631225271588e-06, "loss": 18.0013, "step": 17531 }, { "epoch": 0.3204708721004625, "grad_norm": 6.55634688373912, "learning_rate": 7.946073087492311e-06, "loss": 17.6222, "step": 17532 }, { "epoch": 0.32048915129690897, "grad_norm": 6.717447999377284, "learning_rate": 7.945833911943013e-06, "loss": 17.8644, "step": 17533 }, { "epoch": 0.3205074304933555, "grad_norm": 6.8972307919127305, "learning_rate": 7.945594726068823e-06, "loss": 17.9629, "step": 17534 }, { "epoch": 0.32052570968980204, "grad_norm": 6.469995544451611, "learning_rate": 7.945355529870578e-06, "loss": 17.4238, "step": 17535 }, { "epoch": 0.3205439888862486, "grad_norm": 5.857223663272877, "learning_rate": 7.945116323349119e-06, "loss": 17.3366, "step": 17536 }, { "epoch": 0.3205622680826951, "grad_norm": 7.036488914584075, "learning_rate": 7.944877106505282e-06, "loss": 17.7789, "step": 17537 }, { "epoch": 0.3205805472791416, "grad_norm": 6.284735969025044, "learning_rate": 7.944637879339907e-06, "loss": 17.4463, "step": 17538 }, { "epoch": 0.32059882647558813, "grad_norm": 5.871437555277933, "learning_rate": 7.94439864185383e-06, "loss": 17.4446, "step": 17539 }, { "epoch": 0.32061710567203466, "grad_norm": 6.1560330642973335, "learning_rate": 7.944159394047893e-06, "loss": 17.5608, "step": 17540 }, { "epoch": 0.3206353848684812, "grad_norm": 5.920449784092316, "learning_rate": 7.943920135922932e-06, "loss": 17.4074, "step": 17541 }, { "epoch": 0.3206536640649277, "grad_norm": 6.87684239240657, "learning_rate": 7.943680867479786e-06, "loss": 17.8309, "step": 17542 }, { "epoch": 0.3206719432613742, "grad_norm": 6.122805539581964, "learning_rate": 7.943441588719294e-06, "loss": 17.3828, "step": 17543 }, { "epoch": 0.32069022245782075, "grad_norm": 8.136131342916752, "learning_rate": 7.943202299642297e-06, "loss": 18.3353, "step": 17544 }, { "epoch": 0.3207085016542673, "grad_norm": 8.59593021747093, "learning_rate": 7.942963000249628e-06, "loss": 18.373, "step": 17545 }, { "epoch": 0.3207267808507138, "grad_norm": 6.971687487346667, "learning_rate": 7.94272369054213e-06, "loss": 17.6022, "step": 17546 }, { "epoch": 0.3207450600471603, "grad_norm": 5.31227976173666, "learning_rate": 7.942484370520643e-06, "loss": 17.1144, "step": 17547 }, { "epoch": 0.32076333924360684, "grad_norm": 6.780034685837173, "learning_rate": 7.942245040186e-06, "loss": 17.7526, "step": 17548 }, { "epoch": 0.3207816184400534, "grad_norm": 5.263989698779791, "learning_rate": 7.942005699539046e-06, "loss": 17.1315, "step": 17549 }, { "epoch": 0.3207998976364999, "grad_norm": 7.301086640136754, "learning_rate": 7.941766348580617e-06, "loss": 17.9126, "step": 17550 }, { "epoch": 0.32081817683294644, "grad_norm": 6.426251101219799, "learning_rate": 7.941526987311552e-06, "loss": 17.6605, "step": 17551 }, { "epoch": 0.3208364560293929, "grad_norm": 6.434784824376673, "learning_rate": 7.941287615732689e-06, "loss": 17.6406, "step": 17552 }, { "epoch": 0.32085473522583946, "grad_norm": 7.409805954713263, "learning_rate": 7.94104823384487e-06, "loss": 18.2674, "step": 17553 }, { "epoch": 0.320873014422286, "grad_norm": 5.58741255493611, "learning_rate": 7.940808841648932e-06, "loss": 17.3585, "step": 17554 }, { "epoch": 0.32089129361873253, "grad_norm": 7.277771121319455, "learning_rate": 7.940569439145714e-06, "loss": 18.0897, "step": 17555 }, { "epoch": 0.32090957281517907, "grad_norm": 6.72962489040817, "learning_rate": 7.940330026336055e-06, "loss": 17.694, "step": 17556 }, { "epoch": 0.32092785201162555, "grad_norm": 5.180622443703075, "learning_rate": 7.940090603220793e-06, "loss": 16.8818, "step": 17557 }, { "epoch": 0.3209461312080721, "grad_norm": 5.899524899822945, "learning_rate": 7.93985116980077e-06, "loss": 17.3021, "step": 17558 }, { "epoch": 0.3209644104045186, "grad_norm": 6.69713386327496, "learning_rate": 7.939611726076823e-06, "loss": 17.6658, "step": 17559 }, { "epoch": 0.32098268960096515, "grad_norm": 7.072623495168213, "learning_rate": 7.939372272049792e-06, "loss": 17.8161, "step": 17560 }, { "epoch": 0.3210009687974117, "grad_norm": 6.802736690860457, "learning_rate": 7.939132807720518e-06, "loss": 17.676, "step": 17561 }, { "epoch": 0.32101924799385817, "grad_norm": 4.864000088094914, "learning_rate": 7.938893333089837e-06, "loss": 16.9136, "step": 17562 }, { "epoch": 0.3210375271903047, "grad_norm": 5.849446161457291, "learning_rate": 7.93865384815859e-06, "loss": 17.5182, "step": 17563 }, { "epoch": 0.32105580638675124, "grad_norm": 6.494365172480418, "learning_rate": 7.938414352927618e-06, "loss": 17.6016, "step": 17564 }, { "epoch": 0.3210740855831978, "grad_norm": 6.639194863257415, "learning_rate": 7.938174847397758e-06, "loss": 17.6989, "step": 17565 }, { "epoch": 0.3210923647796443, "grad_norm": 7.944189344042548, "learning_rate": 7.937935331569848e-06, "loss": 18.2166, "step": 17566 }, { "epoch": 0.3211106439760908, "grad_norm": 7.286882368237209, "learning_rate": 7.93769580544473e-06, "loss": 18.2911, "step": 17567 }, { "epoch": 0.32112892317253733, "grad_norm": 6.926221020780543, "learning_rate": 7.937456269023245e-06, "loss": 17.6126, "step": 17568 }, { "epoch": 0.32114720236898386, "grad_norm": 6.692531238903858, "learning_rate": 7.93721672230623e-06, "loss": 17.605, "step": 17569 }, { "epoch": 0.3211654815654304, "grad_norm": 5.020600207226189, "learning_rate": 7.936977165294525e-06, "loss": 16.8796, "step": 17570 }, { "epoch": 0.32118376076187694, "grad_norm": 6.703917254732207, "learning_rate": 7.93673759798897e-06, "loss": 17.6535, "step": 17571 }, { "epoch": 0.3212020399583234, "grad_norm": 6.488071635110896, "learning_rate": 7.936498020390404e-06, "loss": 17.7118, "step": 17572 }, { "epoch": 0.32122031915476995, "grad_norm": 7.154042169177971, "learning_rate": 7.936258432499669e-06, "loss": 17.6759, "step": 17573 }, { "epoch": 0.3212385983512165, "grad_norm": 8.49823162600307, "learning_rate": 7.9360188343176e-06, "loss": 18.0933, "step": 17574 }, { "epoch": 0.321256877547663, "grad_norm": 8.807821376726649, "learning_rate": 7.935779225845042e-06, "loss": 18.4572, "step": 17575 }, { "epoch": 0.3212751567441095, "grad_norm": 6.9730719662996, "learning_rate": 7.935539607082832e-06, "loss": 17.8311, "step": 17576 }, { "epoch": 0.32129343594055604, "grad_norm": 6.399781052021891, "learning_rate": 7.935299978031811e-06, "loss": 17.7642, "step": 17577 }, { "epoch": 0.3213117151370026, "grad_norm": 6.923258449578971, "learning_rate": 7.935060338692817e-06, "loss": 17.9463, "step": 17578 }, { "epoch": 0.3213299943334491, "grad_norm": 5.661210167097001, "learning_rate": 7.934820689066693e-06, "loss": 17.1465, "step": 17579 }, { "epoch": 0.32134827352989564, "grad_norm": 6.156655129376134, "learning_rate": 7.934581029154276e-06, "loss": 17.6847, "step": 17580 }, { "epoch": 0.3213665527263421, "grad_norm": 6.773443607982263, "learning_rate": 7.934341358956409e-06, "loss": 17.7063, "step": 17581 }, { "epoch": 0.32138483192278866, "grad_norm": 6.826103831707497, "learning_rate": 7.934101678473926e-06, "loss": 17.7705, "step": 17582 }, { "epoch": 0.3214031111192352, "grad_norm": 5.698977585287464, "learning_rate": 7.933861987707675e-06, "loss": 17.18, "step": 17583 }, { "epoch": 0.32142139031568173, "grad_norm": 6.760545354895173, "learning_rate": 7.93362228665849e-06, "loss": 17.9606, "step": 17584 }, { "epoch": 0.32143966951212827, "grad_norm": 6.4923707747208566, "learning_rate": 7.933382575327216e-06, "loss": 17.5494, "step": 17585 }, { "epoch": 0.32145794870857475, "grad_norm": 5.761927297438746, "learning_rate": 7.933142853714689e-06, "loss": 17.4916, "step": 17586 }, { "epoch": 0.3214762279050213, "grad_norm": 7.406766303230393, "learning_rate": 7.932903121821749e-06, "loss": 17.8947, "step": 17587 }, { "epoch": 0.3214945071014678, "grad_norm": 5.591415923018626, "learning_rate": 7.93266337964924e-06, "loss": 17.3074, "step": 17588 }, { "epoch": 0.32151278629791435, "grad_norm": 6.4557917353124585, "learning_rate": 7.932423627198e-06, "loss": 17.693, "step": 17589 }, { "epoch": 0.3215310654943609, "grad_norm": 5.755025346279173, "learning_rate": 7.932183864468872e-06, "loss": 17.3218, "step": 17590 }, { "epoch": 0.32154934469080737, "grad_norm": 6.200545828607429, "learning_rate": 7.93194409146269e-06, "loss": 17.5762, "step": 17591 }, { "epoch": 0.3215676238872539, "grad_norm": 5.663094746456993, "learning_rate": 7.931704308180302e-06, "loss": 17.2843, "step": 17592 }, { "epoch": 0.32158590308370044, "grad_norm": 6.467995005492069, "learning_rate": 7.931464514622543e-06, "loss": 17.4465, "step": 17593 }, { "epoch": 0.321604182280147, "grad_norm": 5.806689306895789, "learning_rate": 7.931224710790256e-06, "loss": 17.2962, "step": 17594 }, { "epoch": 0.3216224614765935, "grad_norm": 6.154533597171496, "learning_rate": 7.93098489668428e-06, "loss": 17.3486, "step": 17595 }, { "epoch": 0.32164074067304, "grad_norm": 6.699972068362086, "learning_rate": 7.930745072305455e-06, "loss": 17.5267, "step": 17596 }, { "epoch": 0.32165901986948653, "grad_norm": 7.016208914675771, "learning_rate": 7.930505237654624e-06, "loss": 17.5877, "step": 17597 }, { "epoch": 0.32167729906593306, "grad_norm": 6.944053439932251, "learning_rate": 7.930265392732627e-06, "loss": 17.6566, "step": 17598 }, { "epoch": 0.3216955782623796, "grad_norm": 6.254643436663572, "learning_rate": 7.930025537540304e-06, "loss": 17.6416, "step": 17599 }, { "epoch": 0.32171385745882614, "grad_norm": 6.85258363988787, "learning_rate": 7.929785672078496e-06, "loss": 17.6077, "step": 17600 }, { "epoch": 0.3217321366552726, "grad_norm": 6.205739028806184, "learning_rate": 7.929545796348041e-06, "loss": 17.5122, "step": 17601 }, { "epoch": 0.32175041585171915, "grad_norm": 6.310597843555779, "learning_rate": 7.929305910349786e-06, "loss": 17.2942, "step": 17602 }, { "epoch": 0.3217686950481657, "grad_norm": 7.499436544521757, "learning_rate": 7.929066014084566e-06, "loss": 17.749, "step": 17603 }, { "epoch": 0.3217869742446122, "grad_norm": 6.284919539679724, "learning_rate": 7.928826107553224e-06, "loss": 17.513, "step": 17604 }, { "epoch": 0.32180525344105876, "grad_norm": 7.584473562317276, "learning_rate": 7.9285861907566e-06, "loss": 17.8977, "step": 17605 }, { "epoch": 0.32182353263750524, "grad_norm": 7.055192555998346, "learning_rate": 7.928346263695537e-06, "loss": 17.7729, "step": 17606 }, { "epoch": 0.3218418118339518, "grad_norm": 8.069726630065611, "learning_rate": 7.928106326370872e-06, "loss": 18.356, "step": 17607 }, { "epoch": 0.3218600910303983, "grad_norm": 5.606543136716627, "learning_rate": 7.92786637878345e-06, "loss": 17.06, "step": 17608 }, { "epoch": 0.32187837022684485, "grad_norm": 7.076976047593216, "learning_rate": 7.927626420934112e-06, "loss": 17.3924, "step": 17609 }, { "epoch": 0.3218966494232913, "grad_norm": 6.147490011363302, "learning_rate": 7.927386452823695e-06, "loss": 17.3598, "step": 17610 }, { "epoch": 0.32191492861973786, "grad_norm": 6.665162055800956, "learning_rate": 7.927146474453042e-06, "loss": 17.9429, "step": 17611 }, { "epoch": 0.3219332078161844, "grad_norm": 5.36051715475057, "learning_rate": 7.926906485822998e-06, "loss": 17.1047, "step": 17612 }, { "epoch": 0.32195148701263093, "grad_norm": 11.761879936366626, "learning_rate": 7.926666486934398e-06, "loss": 18.6834, "step": 17613 }, { "epoch": 0.32196976620907747, "grad_norm": 5.695281167138567, "learning_rate": 7.926426477788087e-06, "loss": 17.3025, "step": 17614 }, { "epoch": 0.32198804540552395, "grad_norm": 5.154080125994498, "learning_rate": 7.926186458384904e-06, "loss": 17.0674, "step": 17615 }, { "epoch": 0.3220063246019705, "grad_norm": 6.038050596825892, "learning_rate": 7.925946428725693e-06, "loss": 17.3455, "step": 17616 }, { "epoch": 0.322024603798417, "grad_norm": 5.41747520107593, "learning_rate": 7.925706388811293e-06, "loss": 17.1618, "step": 17617 }, { "epoch": 0.32204288299486356, "grad_norm": 5.824629910042442, "learning_rate": 7.925466338642545e-06, "loss": 17.137, "step": 17618 }, { "epoch": 0.3220611621913101, "grad_norm": 6.258364118403654, "learning_rate": 7.925226278220292e-06, "loss": 17.6541, "step": 17619 }, { "epoch": 0.32207944138775657, "grad_norm": 6.918020430505713, "learning_rate": 7.924986207545376e-06, "loss": 17.6106, "step": 17620 }, { "epoch": 0.3220977205842031, "grad_norm": 5.210507486396455, "learning_rate": 7.924746126618635e-06, "loss": 17.0473, "step": 17621 }, { "epoch": 0.32211599978064964, "grad_norm": 8.335482754109005, "learning_rate": 7.924506035440914e-06, "loss": 18.4042, "step": 17622 }, { "epoch": 0.3221342789770962, "grad_norm": 5.207931457502078, "learning_rate": 7.924265934013054e-06, "loss": 16.9483, "step": 17623 }, { "epoch": 0.3221525581735427, "grad_norm": 8.92538790907973, "learning_rate": 7.924025822335895e-06, "loss": 18.117, "step": 17624 }, { "epoch": 0.3221708373699892, "grad_norm": 6.976151972593718, "learning_rate": 7.923785700410276e-06, "loss": 18.046, "step": 17625 }, { "epoch": 0.32218911656643573, "grad_norm": 7.820570273505529, "learning_rate": 7.923545568237046e-06, "loss": 17.9882, "step": 17626 }, { "epoch": 0.32220739576288226, "grad_norm": 6.724563730350931, "learning_rate": 7.92330542581704e-06, "loss": 17.5323, "step": 17627 }, { "epoch": 0.3222256749593288, "grad_norm": 6.692164008823655, "learning_rate": 7.923065273151103e-06, "loss": 17.6357, "step": 17628 }, { "epoch": 0.32224395415577534, "grad_norm": 5.977387225591005, "learning_rate": 7.922825110240078e-06, "loss": 17.2419, "step": 17629 }, { "epoch": 0.3222622333522218, "grad_norm": 6.459570328727786, "learning_rate": 7.922584937084802e-06, "loss": 17.4161, "step": 17630 }, { "epoch": 0.32228051254866835, "grad_norm": 6.683616062881609, "learning_rate": 7.922344753686119e-06, "loss": 17.7386, "step": 17631 }, { "epoch": 0.3222987917451149, "grad_norm": 5.1633507876631, "learning_rate": 7.922104560044872e-06, "loss": 17.0346, "step": 17632 }, { "epoch": 0.3223170709415614, "grad_norm": 6.779004434736978, "learning_rate": 7.921864356161904e-06, "loss": 17.5749, "step": 17633 }, { "epoch": 0.32233535013800796, "grad_norm": 6.6448000556715705, "learning_rate": 7.921624142038053e-06, "loss": 17.4547, "step": 17634 }, { "epoch": 0.32235362933445444, "grad_norm": 6.9512427461943025, "learning_rate": 7.921383917674164e-06, "loss": 17.5671, "step": 17635 }, { "epoch": 0.322371908530901, "grad_norm": 7.072902040991003, "learning_rate": 7.921143683071076e-06, "loss": 17.8704, "step": 17636 }, { "epoch": 0.3223901877273475, "grad_norm": 6.973465940446415, "learning_rate": 7.920903438229635e-06, "loss": 17.8397, "step": 17637 }, { "epoch": 0.32240846692379405, "grad_norm": 5.468724935219691, "learning_rate": 7.920663183150679e-06, "loss": 17.2447, "step": 17638 }, { "epoch": 0.3224267461202406, "grad_norm": 8.139759119222475, "learning_rate": 7.920422917835054e-06, "loss": 18.1, "step": 17639 }, { "epoch": 0.32244502531668706, "grad_norm": 5.998672892071717, "learning_rate": 7.920182642283598e-06, "loss": 17.4788, "step": 17640 }, { "epoch": 0.3224633045131336, "grad_norm": 5.916220502412665, "learning_rate": 7.919942356497157e-06, "loss": 17.305, "step": 17641 }, { "epoch": 0.32248158370958013, "grad_norm": 6.748695405789091, "learning_rate": 7.91970206047657e-06, "loss": 17.831, "step": 17642 }, { "epoch": 0.32249986290602667, "grad_norm": 6.75706245539897, "learning_rate": 7.91946175422268e-06, "loss": 17.7215, "step": 17643 }, { "epoch": 0.32251814210247315, "grad_norm": 5.84519637319865, "learning_rate": 7.919221437736333e-06, "loss": 17.45, "step": 17644 }, { "epoch": 0.3225364212989197, "grad_norm": 5.426434874888759, "learning_rate": 7.918981111018365e-06, "loss": 17.0958, "step": 17645 }, { "epoch": 0.3225547004953662, "grad_norm": 8.686339613866407, "learning_rate": 7.918740774069623e-06, "loss": 18.4381, "step": 17646 }, { "epoch": 0.32257297969181276, "grad_norm": 6.0229604203282525, "learning_rate": 7.91850042689095e-06, "loss": 17.4061, "step": 17647 }, { "epoch": 0.3225912588882593, "grad_norm": 7.045232420341901, "learning_rate": 7.918260069483182e-06, "loss": 17.9396, "step": 17648 }, { "epoch": 0.32260953808470577, "grad_norm": 7.3879756288542335, "learning_rate": 7.918019701847168e-06, "loss": 18.1053, "step": 17649 }, { "epoch": 0.3226278172811523, "grad_norm": 7.417586721580354, "learning_rate": 7.917779323983748e-06, "loss": 18.019, "step": 17650 }, { "epoch": 0.32264609647759884, "grad_norm": 7.810734338045706, "learning_rate": 7.917538935893765e-06, "loss": 18.1668, "step": 17651 }, { "epoch": 0.3226643756740454, "grad_norm": 5.962226528997857, "learning_rate": 7.91729853757806e-06, "loss": 17.1979, "step": 17652 }, { "epoch": 0.3226826548704919, "grad_norm": 6.282755958032387, "learning_rate": 7.917058129037478e-06, "loss": 17.5572, "step": 17653 }, { "epoch": 0.3227009340669384, "grad_norm": 7.2950753549441245, "learning_rate": 7.91681771027286e-06, "loss": 17.8905, "step": 17654 }, { "epoch": 0.32271921326338493, "grad_norm": 6.718396391577625, "learning_rate": 7.916577281285048e-06, "loss": 17.5792, "step": 17655 }, { "epoch": 0.32273749245983147, "grad_norm": 5.407450209892771, "learning_rate": 7.916336842074888e-06, "loss": 17.3437, "step": 17656 }, { "epoch": 0.322755771656278, "grad_norm": 5.827207139776879, "learning_rate": 7.916096392643218e-06, "loss": 17.417, "step": 17657 }, { "epoch": 0.32277405085272454, "grad_norm": 6.742161879018509, "learning_rate": 7.915855932990884e-06, "loss": 17.5526, "step": 17658 }, { "epoch": 0.322792330049171, "grad_norm": 6.818318909808149, "learning_rate": 7.915615463118729e-06, "loss": 17.5256, "step": 17659 }, { "epoch": 0.32281060924561755, "grad_norm": 6.824640501856777, "learning_rate": 7.915374983027593e-06, "loss": 17.8469, "step": 17660 }, { "epoch": 0.3228288884420641, "grad_norm": 6.830626955606592, "learning_rate": 7.915134492718323e-06, "loss": 17.6978, "step": 17661 }, { "epoch": 0.3228471676385106, "grad_norm": 7.148219238642276, "learning_rate": 7.914893992191759e-06, "loss": 17.8267, "step": 17662 }, { "epoch": 0.32286544683495716, "grad_norm": 7.88725752829819, "learning_rate": 7.914653481448742e-06, "loss": 18.2131, "step": 17663 }, { "epoch": 0.32288372603140364, "grad_norm": 6.4780235233255095, "learning_rate": 7.914412960490118e-06, "loss": 17.4028, "step": 17664 }, { "epoch": 0.3229020052278502, "grad_norm": 5.945918683748106, "learning_rate": 7.914172429316733e-06, "loss": 17.4281, "step": 17665 }, { "epoch": 0.3229202844242967, "grad_norm": 6.167771083235213, "learning_rate": 7.913931887929423e-06, "loss": 17.312, "step": 17666 }, { "epoch": 0.32293856362074325, "grad_norm": 8.232554697348833, "learning_rate": 7.913691336329037e-06, "loss": 18.3315, "step": 17667 }, { "epoch": 0.3229568428171898, "grad_norm": 6.99789188139659, "learning_rate": 7.913450774516415e-06, "loss": 17.6496, "step": 17668 }, { "epoch": 0.32297512201363626, "grad_norm": 6.172405494613258, "learning_rate": 7.9132102024924e-06, "loss": 17.3793, "step": 17669 }, { "epoch": 0.3229934012100828, "grad_norm": 6.875727755040785, "learning_rate": 7.912969620257835e-06, "loss": 17.3903, "step": 17670 }, { "epoch": 0.32301168040652933, "grad_norm": 8.725162881721227, "learning_rate": 7.912729027813568e-06, "loss": 18.6348, "step": 17671 }, { "epoch": 0.32302995960297587, "grad_norm": 5.532490516428547, "learning_rate": 7.912488425160436e-06, "loss": 17.2522, "step": 17672 }, { "epoch": 0.3230482387994224, "grad_norm": 5.84360916027288, "learning_rate": 7.912247812299283e-06, "loss": 17.2939, "step": 17673 }, { "epoch": 0.3230665179958689, "grad_norm": 8.111220523515845, "learning_rate": 7.912007189230957e-06, "loss": 18.5528, "step": 17674 }, { "epoch": 0.3230847971923154, "grad_norm": 7.7266110967014425, "learning_rate": 7.911766555956297e-06, "loss": 18.3292, "step": 17675 }, { "epoch": 0.32310307638876196, "grad_norm": 6.113899516838553, "learning_rate": 7.91152591247615e-06, "loss": 17.3183, "step": 17676 }, { "epoch": 0.3231213555852085, "grad_norm": 4.764840947501727, "learning_rate": 7.911285258791355e-06, "loss": 17.0315, "step": 17677 }, { "epoch": 0.32313963478165497, "grad_norm": 5.930207843666761, "learning_rate": 7.91104459490276e-06, "loss": 17.4221, "step": 17678 }, { "epoch": 0.3231579139781015, "grad_norm": 6.575485128174704, "learning_rate": 7.910803920811203e-06, "loss": 17.528, "step": 17679 }, { "epoch": 0.32317619317454804, "grad_norm": 7.029879562926375, "learning_rate": 7.910563236517534e-06, "loss": 17.6705, "step": 17680 }, { "epoch": 0.3231944723709946, "grad_norm": 7.354106194436128, "learning_rate": 7.910322542022591e-06, "loss": 17.7459, "step": 17681 }, { "epoch": 0.3232127515674411, "grad_norm": 6.873088354184548, "learning_rate": 7.91008183732722e-06, "loss": 17.7918, "step": 17682 }, { "epoch": 0.3232310307638876, "grad_norm": 6.241485631431879, "learning_rate": 7.909841122432269e-06, "loss": 17.7582, "step": 17683 }, { "epoch": 0.32324930996033413, "grad_norm": 6.642653081716574, "learning_rate": 7.909600397338573e-06, "loss": 17.5774, "step": 17684 }, { "epoch": 0.32326758915678067, "grad_norm": 7.312009719374648, "learning_rate": 7.909359662046983e-06, "loss": 17.7914, "step": 17685 }, { "epoch": 0.3232858683532272, "grad_norm": 7.6530235791674, "learning_rate": 7.909118916558338e-06, "loss": 18.1013, "step": 17686 }, { "epoch": 0.32330414754967374, "grad_norm": 5.896039758083264, "learning_rate": 7.908878160873483e-06, "loss": 17.2169, "step": 17687 }, { "epoch": 0.3233224267461202, "grad_norm": 7.229472362374189, "learning_rate": 7.908637394993265e-06, "loss": 17.7046, "step": 17688 }, { "epoch": 0.32334070594256675, "grad_norm": 6.181143582624684, "learning_rate": 7.90839661891852e-06, "loss": 17.3552, "step": 17689 }, { "epoch": 0.3233589851390133, "grad_norm": 6.751168869331026, "learning_rate": 7.908155832650103e-06, "loss": 17.6183, "step": 17690 }, { "epoch": 0.3233772643354598, "grad_norm": 5.3818619076095136, "learning_rate": 7.90791503618885e-06, "loss": 17.0947, "step": 17691 }, { "epoch": 0.32339554353190636, "grad_norm": 7.173813864648602, "learning_rate": 7.907674229535606e-06, "loss": 17.7175, "step": 17692 }, { "epoch": 0.32341382272835284, "grad_norm": 6.849343874448063, "learning_rate": 7.907433412691218e-06, "loss": 17.6358, "step": 17693 }, { "epoch": 0.3234321019247994, "grad_norm": 5.811953914809876, "learning_rate": 7.907192585656528e-06, "loss": 17.3599, "step": 17694 }, { "epoch": 0.3234503811212459, "grad_norm": 6.406612554200852, "learning_rate": 7.90695174843238e-06, "loss": 17.3569, "step": 17695 }, { "epoch": 0.32346866031769245, "grad_norm": 5.3506577758741125, "learning_rate": 7.906710901019618e-06, "loss": 17.1093, "step": 17696 }, { "epoch": 0.323486939514139, "grad_norm": 7.877884365220087, "learning_rate": 7.906470043419086e-06, "loss": 18.4213, "step": 17697 }, { "epoch": 0.32350521871058546, "grad_norm": 6.736546281178132, "learning_rate": 7.90622917563163e-06, "loss": 17.8381, "step": 17698 }, { "epoch": 0.323523497907032, "grad_norm": 7.589485775589006, "learning_rate": 7.905988297658093e-06, "loss": 18.1189, "step": 17699 }, { "epoch": 0.32354177710347853, "grad_norm": 5.904388765821344, "learning_rate": 7.905747409499318e-06, "loss": 17.4213, "step": 17700 }, { "epoch": 0.32356005629992507, "grad_norm": 7.315703403965148, "learning_rate": 7.905506511156151e-06, "loss": 17.8598, "step": 17701 }, { "epoch": 0.3235783354963716, "grad_norm": 7.147717361011801, "learning_rate": 7.905265602629435e-06, "loss": 18.0147, "step": 17702 }, { "epoch": 0.3235966146928181, "grad_norm": 8.356688813804935, "learning_rate": 7.905024683920018e-06, "loss": 18.8452, "step": 17703 }, { "epoch": 0.3236148938892646, "grad_norm": 5.914228540617025, "learning_rate": 7.904783755028738e-06, "loss": 17.2188, "step": 17704 }, { "epoch": 0.32363317308571116, "grad_norm": 5.47241798190702, "learning_rate": 7.904542815956444e-06, "loss": 17.2184, "step": 17705 }, { "epoch": 0.3236514522821577, "grad_norm": 5.4748724835386104, "learning_rate": 7.90430186670398e-06, "loss": 17.1407, "step": 17706 }, { "epoch": 0.32366973147860423, "grad_norm": 7.256228296101664, "learning_rate": 7.90406090727219e-06, "loss": 17.7795, "step": 17707 }, { "epoch": 0.3236880106750507, "grad_norm": 7.323442326843513, "learning_rate": 7.90381993766192e-06, "loss": 18.3803, "step": 17708 }, { "epoch": 0.32370628987149724, "grad_norm": 6.627845023069605, "learning_rate": 7.903578957874012e-06, "loss": 17.4024, "step": 17709 }, { "epoch": 0.3237245690679438, "grad_norm": 7.420892493312514, "learning_rate": 7.90333796790931e-06, "loss": 18.1122, "step": 17710 }, { "epoch": 0.3237428482643903, "grad_norm": 7.30268088293728, "learning_rate": 7.903096967768662e-06, "loss": 18.3343, "step": 17711 }, { "epoch": 0.3237611274608368, "grad_norm": 6.287198503197101, "learning_rate": 7.902855957452911e-06, "loss": 17.4339, "step": 17712 }, { "epoch": 0.32377940665728333, "grad_norm": 6.465856027196392, "learning_rate": 7.902614936962902e-06, "loss": 17.6532, "step": 17713 }, { "epoch": 0.32379768585372987, "grad_norm": 6.308087695928773, "learning_rate": 7.902373906299479e-06, "loss": 17.4322, "step": 17714 }, { "epoch": 0.3238159650501764, "grad_norm": 6.605016306857635, "learning_rate": 7.902132865463487e-06, "loss": 17.5501, "step": 17715 }, { "epoch": 0.32383424424662294, "grad_norm": 5.748431837972686, "learning_rate": 7.901891814455772e-06, "loss": 17.503, "step": 17716 }, { "epoch": 0.3238525234430694, "grad_norm": 7.569285201614199, "learning_rate": 7.901650753277177e-06, "loss": 18.0239, "step": 17717 }, { "epoch": 0.32387080263951595, "grad_norm": 8.720107472605482, "learning_rate": 7.901409681928548e-06, "loss": 18.1893, "step": 17718 }, { "epoch": 0.3238890818359625, "grad_norm": 6.799628458198373, "learning_rate": 7.90116860041073e-06, "loss": 18.034, "step": 17719 }, { "epoch": 0.323907361032409, "grad_norm": 7.660477877385908, "learning_rate": 7.90092750872457e-06, "loss": 18.111, "step": 17720 }, { "epoch": 0.32392564022885556, "grad_norm": 6.272439255187087, "learning_rate": 7.900686406870908e-06, "loss": 17.3672, "step": 17721 }, { "epoch": 0.32394391942530204, "grad_norm": 6.880732583845493, "learning_rate": 7.900445294850591e-06, "loss": 17.4492, "step": 17722 }, { "epoch": 0.3239621986217486, "grad_norm": 6.76794043338143, "learning_rate": 7.900204172664468e-06, "loss": 17.4717, "step": 17723 }, { "epoch": 0.3239804778181951, "grad_norm": 6.342803464392752, "learning_rate": 7.89996304031338e-06, "loss": 17.3245, "step": 17724 }, { "epoch": 0.32399875701464165, "grad_norm": 8.656431088842632, "learning_rate": 7.899721897798172e-06, "loss": 18.5138, "step": 17725 }, { "epoch": 0.3240170362110882, "grad_norm": 6.08280644000809, "learning_rate": 7.899480745119693e-06, "loss": 17.5171, "step": 17726 }, { "epoch": 0.32403531540753466, "grad_norm": 6.808667970082966, "learning_rate": 7.899239582278783e-06, "loss": 17.5517, "step": 17727 }, { "epoch": 0.3240535946039812, "grad_norm": 6.885294328924785, "learning_rate": 7.898998409276291e-06, "loss": 17.6877, "step": 17728 }, { "epoch": 0.32407187380042773, "grad_norm": 6.599931734145278, "learning_rate": 7.89875722611306e-06, "loss": 17.6172, "step": 17729 }, { "epoch": 0.32409015299687427, "grad_norm": 5.370720150179787, "learning_rate": 7.898516032789937e-06, "loss": 17.0975, "step": 17730 }, { "epoch": 0.3241084321933208, "grad_norm": 5.709689429617202, "learning_rate": 7.898274829307769e-06, "loss": 17.2393, "step": 17731 }, { "epoch": 0.3241267113897673, "grad_norm": 6.0834948297296645, "learning_rate": 7.898033615667395e-06, "loss": 17.4599, "step": 17732 }, { "epoch": 0.3241449905862138, "grad_norm": 6.197853953703129, "learning_rate": 7.897792391869668e-06, "loss": 17.4502, "step": 17733 }, { "epoch": 0.32416326978266036, "grad_norm": 6.903159381393967, "learning_rate": 7.89755115791543e-06, "loss": 17.6691, "step": 17734 }, { "epoch": 0.3241815489791069, "grad_norm": 6.520885784867376, "learning_rate": 7.897309913805525e-06, "loss": 17.7851, "step": 17735 }, { "epoch": 0.32419982817555343, "grad_norm": 6.8901319836075805, "learning_rate": 7.8970686595408e-06, "loss": 17.6095, "step": 17736 }, { "epoch": 0.3242181073719999, "grad_norm": 6.546169171931813, "learning_rate": 7.896827395122102e-06, "loss": 17.2313, "step": 17737 }, { "epoch": 0.32423638656844644, "grad_norm": 7.915518776957184, "learning_rate": 7.896586120550276e-06, "loss": 17.9864, "step": 17738 }, { "epoch": 0.324254665764893, "grad_norm": 6.9312848548921435, "learning_rate": 7.896344835826166e-06, "loss": 17.8592, "step": 17739 }, { "epoch": 0.3242729449613395, "grad_norm": 5.485400336636975, "learning_rate": 7.89610354095062e-06, "loss": 17.3208, "step": 17740 }, { "epoch": 0.32429122415778605, "grad_norm": 7.078439224696487, "learning_rate": 7.895862235924481e-06, "loss": 18.073, "step": 17741 }, { "epoch": 0.32430950335423253, "grad_norm": 6.53713902367002, "learning_rate": 7.895620920748594e-06, "loss": 17.5156, "step": 17742 }, { "epoch": 0.32432778255067907, "grad_norm": 6.786474194080707, "learning_rate": 7.895379595423809e-06, "loss": 17.6559, "step": 17743 }, { "epoch": 0.3243460617471256, "grad_norm": 6.124202454779582, "learning_rate": 7.895138259950972e-06, "loss": 17.5843, "step": 17744 }, { "epoch": 0.32436434094357214, "grad_norm": 6.803613532007667, "learning_rate": 7.894896914330925e-06, "loss": 17.6096, "step": 17745 }, { "epoch": 0.3243826201400186, "grad_norm": 5.626643891123522, "learning_rate": 7.894655558564514e-06, "loss": 17.1485, "step": 17746 }, { "epoch": 0.32440089933646515, "grad_norm": 5.9320916818188785, "learning_rate": 7.894414192652589e-06, "loss": 17.1957, "step": 17747 }, { "epoch": 0.3244191785329117, "grad_norm": 7.825310368883438, "learning_rate": 7.89417281659599e-06, "loss": 18.514, "step": 17748 }, { "epoch": 0.3244374577293582, "grad_norm": 5.832333198813054, "learning_rate": 7.89393143039557e-06, "loss": 17.4828, "step": 17749 }, { "epoch": 0.32445573692580476, "grad_norm": 7.19840251586273, "learning_rate": 7.893690034052167e-06, "loss": 18.1829, "step": 17750 }, { "epoch": 0.32447401612225124, "grad_norm": 6.9550916640503875, "learning_rate": 7.893448627566637e-06, "loss": 17.4698, "step": 17751 }, { "epoch": 0.3244922953186978, "grad_norm": 6.196420070907171, "learning_rate": 7.893207210939817e-06, "loss": 17.5809, "step": 17752 }, { "epoch": 0.3245105745151443, "grad_norm": 6.916335868485267, "learning_rate": 7.892965784172558e-06, "loss": 17.642, "step": 17753 }, { "epoch": 0.32452885371159085, "grad_norm": 7.6881850398519616, "learning_rate": 7.892724347265706e-06, "loss": 18.3688, "step": 17754 }, { "epoch": 0.3245471329080374, "grad_norm": 6.320467909081312, "learning_rate": 7.892482900220105e-06, "loss": 17.5321, "step": 17755 }, { "epoch": 0.32456541210448386, "grad_norm": 5.970622163186657, "learning_rate": 7.892241443036601e-06, "loss": 17.6554, "step": 17756 }, { "epoch": 0.3245836913009304, "grad_norm": 6.518388630649442, "learning_rate": 7.891999975716043e-06, "loss": 17.7403, "step": 17757 }, { "epoch": 0.32460197049737693, "grad_norm": 8.581149135678286, "learning_rate": 7.891758498259277e-06, "loss": 18.1474, "step": 17758 }, { "epoch": 0.32462024969382347, "grad_norm": 6.430791470956686, "learning_rate": 7.891517010667147e-06, "loss": 17.4294, "step": 17759 }, { "epoch": 0.32463852889027, "grad_norm": 6.747363986693666, "learning_rate": 7.891275512940502e-06, "loss": 17.7416, "step": 17760 }, { "epoch": 0.3246568080867165, "grad_norm": 6.648622315508402, "learning_rate": 7.891034005080188e-06, "loss": 17.8707, "step": 17761 }, { "epoch": 0.324675087283163, "grad_norm": 7.107497351581862, "learning_rate": 7.890792487087049e-06, "loss": 17.6806, "step": 17762 }, { "epoch": 0.32469336647960956, "grad_norm": 6.519000431193044, "learning_rate": 7.890550958961933e-06, "loss": 17.4322, "step": 17763 }, { "epoch": 0.3247116456760561, "grad_norm": 6.180686488773445, "learning_rate": 7.890309420705686e-06, "loss": 17.4595, "step": 17764 }, { "epoch": 0.32472992487250263, "grad_norm": 6.813454925467863, "learning_rate": 7.890067872319158e-06, "loss": 17.3925, "step": 17765 }, { "epoch": 0.3247482040689491, "grad_norm": 7.281394317189339, "learning_rate": 7.88982631380319e-06, "loss": 17.4958, "step": 17766 }, { "epoch": 0.32476648326539564, "grad_norm": 6.22838325671392, "learning_rate": 7.889584745158634e-06, "loss": 17.5235, "step": 17767 }, { "epoch": 0.3247847624618422, "grad_norm": 7.7460781767421345, "learning_rate": 7.889343166386334e-06, "loss": 17.707, "step": 17768 }, { "epoch": 0.3248030416582887, "grad_norm": 6.367076966095275, "learning_rate": 7.889101577487134e-06, "loss": 17.6144, "step": 17769 }, { "epoch": 0.32482132085473525, "grad_norm": 6.420686888235853, "learning_rate": 7.888859978461887e-06, "loss": 17.7745, "step": 17770 }, { "epoch": 0.32483960005118173, "grad_norm": 6.926027327496974, "learning_rate": 7.888618369311436e-06, "loss": 17.5875, "step": 17771 }, { "epoch": 0.32485787924762827, "grad_norm": 6.695980001231234, "learning_rate": 7.888376750036626e-06, "loss": 17.4798, "step": 17772 }, { "epoch": 0.3248761584440748, "grad_norm": 6.401478752069769, "learning_rate": 7.888135120638309e-06, "loss": 17.4129, "step": 17773 }, { "epoch": 0.32489443764052134, "grad_norm": 6.690387221084487, "learning_rate": 7.887893481117327e-06, "loss": 17.6467, "step": 17774 }, { "epoch": 0.3249127168369679, "grad_norm": 6.288090431810262, "learning_rate": 7.88765183147453e-06, "loss": 17.5896, "step": 17775 }, { "epoch": 0.32493099603341435, "grad_norm": 6.856296727872927, "learning_rate": 7.887410171710764e-06, "loss": 17.6377, "step": 17776 }, { "epoch": 0.3249492752298609, "grad_norm": 7.220036562946701, "learning_rate": 7.887168501826874e-06, "loss": 18.1338, "step": 17777 }, { "epoch": 0.3249675544263074, "grad_norm": 6.856851184796004, "learning_rate": 7.88692682182371e-06, "loss": 17.7401, "step": 17778 }, { "epoch": 0.32498583362275396, "grad_norm": 8.166159479680575, "learning_rate": 7.886685131702118e-06, "loss": 18.38, "step": 17779 }, { "epoch": 0.32500411281920044, "grad_norm": 7.24517999511875, "learning_rate": 7.886443431462946e-06, "loss": 17.9074, "step": 17780 }, { "epoch": 0.325022392015647, "grad_norm": 7.729271856325883, "learning_rate": 7.886201721107041e-06, "loss": 18.1386, "step": 17781 }, { "epoch": 0.3250406712120935, "grad_norm": 6.002140416107779, "learning_rate": 7.885960000635247e-06, "loss": 17.3604, "step": 17782 }, { "epoch": 0.32505895040854005, "grad_norm": 6.484218390540682, "learning_rate": 7.885718270048414e-06, "loss": 17.3894, "step": 17783 }, { "epoch": 0.3250772296049866, "grad_norm": 6.5765930619465, "learning_rate": 7.885476529347391e-06, "loss": 17.8014, "step": 17784 }, { "epoch": 0.32509550880143306, "grad_norm": 7.923834403163036, "learning_rate": 7.885234778533022e-06, "loss": 18.1465, "step": 17785 }, { "epoch": 0.3251137879978796, "grad_norm": 6.430724149334308, "learning_rate": 7.884993017606155e-06, "loss": 17.4932, "step": 17786 }, { "epoch": 0.32513206719432614, "grad_norm": 6.77751500942763, "learning_rate": 7.884751246567637e-06, "loss": 18.0722, "step": 17787 }, { "epoch": 0.32515034639077267, "grad_norm": 6.186405548945096, "learning_rate": 7.884509465418318e-06, "loss": 17.6653, "step": 17788 }, { "epoch": 0.3251686255872192, "grad_norm": 7.268120331299511, "learning_rate": 7.884267674159043e-06, "loss": 17.9533, "step": 17789 }, { "epoch": 0.3251869047836657, "grad_norm": 7.245866146128734, "learning_rate": 7.884025872790661e-06, "loss": 18.1388, "step": 17790 }, { "epoch": 0.3252051839801122, "grad_norm": 6.500089085904076, "learning_rate": 7.883784061314017e-06, "loss": 17.6386, "step": 17791 }, { "epoch": 0.32522346317655876, "grad_norm": 6.76226009888217, "learning_rate": 7.88354223972996e-06, "loss": 17.5732, "step": 17792 }, { "epoch": 0.3252417423730053, "grad_norm": 5.529002913461683, "learning_rate": 7.883300408039338e-06, "loss": 17.024, "step": 17793 }, { "epoch": 0.32526002156945183, "grad_norm": 6.280103600881916, "learning_rate": 7.883058566243e-06, "loss": 17.3343, "step": 17794 }, { "epoch": 0.3252783007658983, "grad_norm": 5.537614534548059, "learning_rate": 7.88281671434179e-06, "loss": 17.201, "step": 17795 }, { "epoch": 0.32529657996234485, "grad_norm": 6.842566450357201, "learning_rate": 7.882574852336558e-06, "loss": 17.7165, "step": 17796 }, { "epoch": 0.3253148591587914, "grad_norm": 6.62388762569798, "learning_rate": 7.882332980228151e-06, "loss": 17.5779, "step": 17797 }, { "epoch": 0.3253331383552379, "grad_norm": 7.81704242010747, "learning_rate": 7.882091098017417e-06, "loss": 18.1819, "step": 17798 }, { "epoch": 0.32535141755168445, "grad_norm": 8.17155499574659, "learning_rate": 7.881849205705206e-06, "loss": 18.1628, "step": 17799 }, { "epoch": 0.32536969674813093, "grad_norm": 5.228344861810051, "learning_rate": 7.881607303292361e-06, "loss": 17.1929, "step": 17800 }, { "epoch": 0.32538797594457747, "grad_norm": 6.478130051775579, "learning_rate": 7.881365390779734e-06, "loss": 17.7408, "step": 17801 }, { "epoch": 0.325406255141024, "grad_norm": 7.205583581384945, "learning_rate": 7.881123468168169e-06, "loss": 17.7589, "step": 17802 }, { "epoch": 0.32542453433747054, "grad_norm": 8.659392100726214, "learning_rate": 7.880881535458519e-06, "loss": 18.608, "step": 17803 }, { "epoch": 0.3254428135339171, "grad_norm": 6.691558341576043, "learning_rate": 7.880639592651628e-06, "loss": 17.315, "step": 17804 }, { "epoch": 0.32546109273036355, "grad_norm": 6.824648938193877, "learning_rate": 7.880397639748346e-06, "loss": 17.7161, "step": 17805 }, { "epoch": 0.3254793719268101, "grad_norm": 5.681769825030666, "learning_rate": 7.88015567674952e-06, "loss": 17.378, "step": 17806 }, { "epoch": 0.3254976511232566, "grad_norm": 7.590131225088655, "learning_rate": 7.879913703655997e-06, "loss": 17.7448, "step": 17807 }, { "epoch": 0.32551593031970316, "grad_norm": 6.873404346881098, "learning_rate": 7.879671720468626e-06, "loss": 17.6415, "step": 17808 }, { "epoch": 0.3255342095161497, "grad_norm": 8.40510328880946, "learning_rate": 7.879429727188257e-06, "loss": 17.7219, "step": 17809 }, { "epoch": 0.3255524887125962, "grad_norm": 6.477972532371386, "learning_rate": 7.879187723815737e-06, "loss": 17.7117, "step": 17810 }, { "epoch": 0.3255707679090427, "grad_norm": 7.337673120293114, "learning_rate": 7.878945710351913e-06, "loss": 17.9361, "step": 17811 }, { "epoch": 0.32558904710548925, "grad_norm": 6.719375226765658, "learning_rate": 7.878703686797634e-06, "loss": 17.4639, "step": 17812 }, { "epoch": 0.3256073263019358, "grad_norm": 5.282211402681556, "learning_rate": 7.878461653153749e-06, "loss": 17.2107, "step": 17813 }, { "epoch": 0.32562560549838226, "grad_norm": 6.322777752947682, "learning_rate": 7.878219609421105e-06, "loss": 17.5931, "step": 17814 }, { "epoch": 0.3256438846948288, "grad_norm": 6.335835715646217, "learning_rate": 7.87797755560055e-06, "loss": 17.7482, "step": 17815 }, { "epoch": 0.32566216389127534, "grad_norm": 6.695294309727545, "learning_rate": 7.877735491692937e-06, "loss": 17.9087, "step": 17816 }, { "epoch": 0.32568044308772187, "grad_norm": 6.898742884933421, "learning_rate": 7.877493417699109e-06, "loss": 17.8053, "step": 17817 }, { "epoch": 0.3256987222841684, "grad_norm": 5.513199510821228, "learning_rate": 7.877251333619916e-06, "loss": 17.1758, "step": 17818 }, { "epoch": 0.3257170014806149, "grad_norm": 5.876307084156893, "learning_rate": 7.877009239456206e-06, "loss": 17.3925, "step": 17819 }, { "epoch": 0.3257352806770614, "grad_norm": 7.402268251489208, "learning_rate": 7.876767135208829e-06, "loss": 17.6846, "step": 17820 }, { "epoch": 0.32575355987350796, "grad_norm": 6.426810501598508, "learning_rate": 7.876525020878632e-06, "loss": 17.7161, "step": 17821 }, { "epoch": 0.3257718390699545, "grad_norm": 6.662116041025357, "learning_rate": 7.876282896466465e-06, "loss": 17.7204, "step": 17822 }, { "epoch": 0.32579011826640103, "grad_norm": 5.774182478325658, "learning_rate": 7.876040761973179e-06, "loss": 17.3396, "step": 17823 }, { "epoch": 0.3258083974628475, "grad_norm": 6.616518385284222, "learning_rate": 7.875798617399614e-06, "loss": 17.5979, "step": 17824 }, { "epoch": 0.32582667665929405, "grad_norm": 6.846705875031634, "learning_rate": 7.875556462746628e-06, "loss": 17.9477, "step": 17825 }, { "epoch": 0.3258449558557406, "grad_norm": 5.693870640415677, "learning_rate": 7.875314298015065e-06, "loss": 17.2138, "step": 17826 }, { "epoch": 0.3258632350521871, "grad_norm": 6.969038422382312, "learning_rate": 7.875072123205776e-06, "loss": 18.0013, "step": 17827 }, { "epoch": 0.32588151424863365, "grad_norm": 6.1486335454418874, "learning_rate": 7.874829938319608e-06, "loss": 17.4233, "step": 17828 }, { "epoch": 0.32589979344508013, "grad_norm": 6.132878567881788, "learning_rate": 7.87458774335741e-06, "loss": 17.4989, "step": 17829 }, { "epoch": 0.32591807264152667, "grad_norm": 6.537418428041828, "learning_rate": 7.874345538320033e-06, "loss": 17.5308, "step": 17830 }, { "epoch": 0.3259363518379732, "grad_norm": 7.185045793392967, "learning_rate": 7.874103323208323e-06, "loss": 17.5923, "step": 17831 }, { "epoch": 0.32595463103441974, "grad_norm": 7.792166666071645, "learning_rate": 7.87386109802313e-06, "loss": 18.4338, "step": 17832 }, { "epoch": 0.3259729102308663, "grad_norm": 5.88940091915513, "learning_rate": 7.873618862765305e-06, "loss": 17.4111, "step": 17833 }, { "epoch": 0.32599118942731276, "grad_norm": 5.0957149338845635, "learning_rate": 7.873376617435693e-06, "loss": 17.0923, "step": 17834 }, { "epoch": 0.3260094686237593, "grad_norm": 6.947892487435096, "learning_rate": 7.873134362035147e-06, "loss": 17.7021, "step": 17835 }, { "epoch": 0.3260277478202058, "grad_norm": 6.767897076797359, "learning_rate": 7.872892096564512e-06, "loss": 17.6243, "step": 17836 }, { "epoch": 0.32604602701665236, "grad_norm": 5.5962489430574625, "learning_rate": 7.872649821024642e-06, "loss": 17.0655, "step": 17837 }, { "epoch": 0.3260643062130989, "grad_norm": 7.541190100036091, "learning_rate": 7.872407535416384e-06, "loss": 18.0704, "step": 17838 }, { "epoch": 0.3260825854095454, "grad_norm": 7.35367908788049, "learning_rate": 7.872165239740585e-06, "loss": 18.224, "step": 17839 }, { "epoch": 0.3261008646059919, "grad_norm": 6.867253190417915, "learning_rate": 7.871922933998098e-06, "loss": 17.7978, "step": 17840 }, { "epoch": 0.32611914380243845, "grad_norm": 7.990110756551321, "learning_rate": 7.871680618189768e-06, "loss": 17.9894, "step": 17841 }, { "epoch": 0.326137422998885, "grad_norm": 5.920388531886041, "learning_rate": 7.871438292316448e-06, "loss": 17.292, "step": 17842 }, { "epoch": 0.3261557021953315, "grad_norm": 5.815565367184524, "learning_rate": 7.871195956378985e-06, "loss": 17.4709, "step": 17843 }, { "epoch": 0.326173981391778, "grad_norm": 5.669661110095847, "learning_rate": 7.870953610378231e-06, "loss": 17.2476, "step": 17844 }, { "epoch": 0.32619226058822454, "grad_norm": 6.000059489859444, "learning_rate": 7.870711254315031e-06, "loss": 17.153, "step": 17845 }, { "epoch": 0.32621053978467107, "grad_norm": 6.574262859048824, "learning_rate": 7.870468888190239e-06, "loss": 17.281, "step": 17846 }, { "epoch": 0.3262288189811176, "grad_norm": 6.777443581943335, "learning_rate": 7.870226512004704e-06, "loss": 17.566, "step": 17847 }, { "epoch": 0.3262470981775641, "grad_norm": 7.712616665092374, "learning_rate": 7.869984125759272e-06, "loss": 18.2918, "step": 17848 }, { "epoch": 0.3262653773740106, "grad_norm": 7.503483333333809, "learning_rate": 7.869741729454797e-06, "loss": 17.9509, "step": 17849 }, { "epoch": 0.32628365657045716, "grad_norm": 6.6588036093839165, "learning_rate": 7.869499323092122e-06, "loss": 17.5324, "step": 17850 }, { "epoch": 0.3263019357669037, "grad_norm": 6.430826262770365, "learning_rate": 7.869256906672104e-06, "loss": 17.451, "step": 17851 }, { "epoch": 0.32632021496335023, "grad_norm": 7.02650368230658, "learning_rate": 7.869014480195589e-06, "loss": 17.9679, "step": 17852 }, { "epoch": 0.3263384941597967, "grad_norm": 6.164166018539878, "learning_rate": 7.868772043663429e-06, "loss": 17.3597, "step": 17853 }, { "epoch": 0.32635677335624325, "grad_norm": 6.413715747359366, "learning_rate": 7.868529597076469e-06, "loss": 17.8651, "step": 17854 }, { "epoch": 0.3263750525526898, "grad_norm": 6.470010343951983, "learning_rate": 7.868287140435564e-06, "loss": 17.5322, "step": 17855 }, { "epoch": 0.3263933317491363, "grad_norm": 6.276310738559672, "learning_rate": 7.86804467374156e-06, "loss": 17.6477, "step": 17856 }, { "epoch": 0.32641161094558285, "grad_norm": 6.24266804974688, "learning_rate": 7.867802196995308e-06, "loss": 17.4857, "step": 17857 }, { "epoch": 0.32642989014202933, "grad_norm": 6.496002462917033, "learning_rate": 7.867559710197658e-06, "loss": 17.4417, "step": 17858 }, { "epoch": 0.32644816933847587, "grad_norm": 7.161516651354113, "learning_rate": 7.867317213349461e-06, "loss": 18.0931, "step": 17859 }, { "epoch": 0.3264664485349224, "grad_norm": 6.511777079840866, "learning_rate": 7.867074706451567e-06, "loss": 17.4908, "step": 17860 }, { "epoch": 0.32648472773136894, "grad_norm": 7.188666024752628, "learning_rate": 7.866832189504823e-06, "loss": 17.5326, "step": 17861 }, { "epoch": 0.3265030069278155, "grad_norm": 7.008425777054819, "learning_rate": 7.866589662510083e-06, "loss": 17.9847, "step": 17862 }, { "epoch": 0.32652128612426196, "grad_norm": 7.068713305265701, "learning_rate": 7.866347125468192e-06, "loss": 17.8885, "step": 17863 }, { "epoch": 0.3265395653207085, "grad_norm": 5.917749223184772, "learning_rate": 7.866104578380005e-06, "loss": 17.7064, "step": 17864 }, { "epoch": 0.326557844517155, "grad_norm": 6.621049855079859, "learning_rate": 7.86586202124637e-06, "loss": 17.6552, "step": 17865 }, { "epoch": 0.32657612371360156, "grad_norm": 7.758840166314253, "learning_rate": 7.865619454068137e-06, "loss": 17.9226, "step": 17866 }, { "epoch": 0.3265944029100481, "grad_norm": 7.331408698946266, "learning_rate": 7.865376876846158e-06, "loss": 17.9984, "step": 17867 }, { "epoch": 0.3266126821064946, "grad_norm": 6.837829300824041, "learning_rate": 7.86513428958128e-06, "loss": 17.4893, "step": 17868 }, { "epoch": 0.3266309613029411, "grad_norm": 6.4049637784235545, "learning_rate": 7.864891692274355e-06, "loss": 17.5133, "step": 17869 }, { "epoch": 0.32664924049938765, "grad_norm": 6.273073302128767, "learning_rate": 7.864649084926232e-06, "loss": 17.2967, "step": 17870 }, { "epoch": 0.3266675196958342, "grad_norm": 6.344309302924921, "learning_rate": 7.864406467537764e-06, "loss": 17.439, "step": 17871 }, { "epoch": 0.3266857988922807, "grad_norm": 6.449355080338526, "learning_rate": 7.864163840109802e-06, "loss": 17.1538, "step": 17872 }, { "epoch": 0.3267040780887272, "grad_norm": 5.920602117673282, "learning_rate": 7.863921202643192e-06, "loss": 17.258, "step": 17873 }, { "epoch": 0.32672235728517374, "grad_norm": 6.259629210157948, "learning_rate": 7.863678555138786e-06, "loss": 17.3877, "step": 17874 }, { "epoch": 0.3267406364816203, "grad_norm": 6.69678860967342, "learning_rate": 7.863435897597437e-06, "loss": 17.802, "step": 17875 }, { "epoch": 0.3267589156780668, "grad_norm": 6.340828147922386, "learning_rate": 7.863193230019991e-06, "loss": 17.6317, "step": 17876 }, { "epoch": 0.32677719487451334, "grad_norm": 6.991458089088453, "learning_rate": 7.862950552407304e-06, "loss": 18.0541, "step": 17877 }, { "epoch": 0.3267954740709598, "grad_norm": 6.0945809284360495, "learning_rate": 7.862707864760225e-06, "loss": 17.2963, "step": 17878 }, { "epoch": 0.32681375326740636, "grad_norm": 7.682440158745301, "learning_rate": 7.862465167079599e-06, "loss": 17.6765, "step": 17879 }, { "epoch": 0.3268320324638529, "grad_norm": 6.554405377487383, "learning_rate": 7.862222459366283e-06, "loss": 17.4388, "step": 17880 }, { "epoch": 0.32685031166029943, "grad_norm": 5.860101075469898, "learning_rate": 7.861979741621126e-06, "loss": 17.3062, "step": 17881 }, { "epoch": 0.3268685908567459, "grad_norm": 5.921788599011734, "learning_rate": 7.86173701384498e-06, "loss": 17.421, "step": 17882 }, { "epoch": 0.32688687005319245, "grad_norm": 6.729473933777559, "learning_rate": 7.86149427603869e-06, "loss": 18.0369, "step": 17883 }, { "epoch": 0.326905149249639, "grad_norm": 7.171624055665262, "learning_rate": 7.861251528203113e-06, "loss": 17.7876, "step": 17884 }, { "epoch": 0.3269234284460855, "grad_norm": 5.78811924291365, "learning_rate": 7.861008770339098e-06, "loss": 17.2695, "step": 17885 }, { "epoch": 0.32694170764253205, "grad_norm": 7.386439276230386, "learning_rate": 7.860766002447495e-06, "loss": 17.7566, "step": 17886 }, { "epoch": 0.32695998683897853, "grad_norm": 5.864195043080574, "learning_rate": 7.860523224529156e-06, "loss": 17.34, "step": 17887 }, { "epoch": 0.32697826603542507, "grad_norm": 5.371148963013059, "learning_rate": 7.86028043658493e-06, "loss": 17.0583, "step": 17888 }, { "epoch": 0.3269965452318716, "grad_norm": 7.595197440764273, "learning_rate": 7.860037638615671e-06, "loss": 17.8264, "step": 17889 }, { "epoch": 0.32701482442831814, "grad_norm": 5.525909084266543, "learning_rate": 7.859794830622227e-06, "loss": 17.1311, "step": 17890 }, { "epoch": 0.3270331036247647, "grad_norm": 7.350189973649129, "learning_rate": 7.859552012605452e-06, "loss": 18.1613, "step": 17891 }, { "epoch": 0.32705138282121116, "grad_norm": 6.313772679875273, "learning_rate": 7.859309184566193e-06, "loss": 17.7864, "step": 17892 }, { "epoch": 0.3270696620176577, "grad_norm": 5.807894059462818, "learning_rate": 7.859066346505305e-06, "loss": 17.3594, "step": 17893 }, { "epoch": 0.3270879412141042, "grad_norm": 5.577269715782767, "learning_rate": 7.858823498423637e-06, "loss": 17.5178, "step": 17894 }, { "epoch": 0.32710622041055076, "grad_norm": 5.881722427483263, "learning_rate": 7.85858064032204e-06, "loss": 17.5096, "step": 17895 }, { "epoch": 0.3271244996069973, "grad_norm": 7.551792391781151, "learning_rate": 7.858337772201368e-06, "loss": 18.3451, "step": 17896 }, { "epoch": 0.3271427788034438, "grad_norm": 7.199569481845562, "learning_rate": 7.858094894062468e-06, "loss": 17.7249, "step": 17897 }, { "epoch": 0.3271610579998903, "grad_norm": 6.221756197581803, "learning_rate": 7.857852005906195e-06, "loss": 17.5991, "step": 17898 }, { "epoch": 0.32717933719633685, "grad_norm": 7.165307209689358, "learning_rate": 7.857609107733398e-06, "loss": 17.9371, "step": 17899 }, { "epoch": 0.3271976163927834, "grad_norm": 6.4143827619662614, "learning_rate": 7.857366199544929e-06, "loss": 17.3668, "step": 17900 }, { "epoch": 0.3272158955892299, "grad_norm": 6.634976745565556, "learning_rate": 7.857123281341639e-06, "loss": 17.635, "step": 17901 }, { "epoch": 0.3272341747856764, "grad_norm": 6.445272136131572, "learning_rate": 7.85688035312438e-06, "loss": 17.7784, "step": 17902 }, { "epoch": 0.32725245398212294, "grad_norm": 6.627760389550447, "learning_rate": 7.856637414894003e-06, "loss": 17.7706, "step": 17903 }, { "epoch": 0.3272707331785695, "grad_norm": 7.047530424947791, "learning_rate": 7.85639446665136e-06, "loss": 17.7615, "step": 17904 }, { "epoch": 0.327289012375016, "grad_norm": 6.242491228600791, "learning_rate": 7.856151508397303e-06, "loss": 17.2843, "step": 17905 }, { "epoch": 0.32730729157146254, "grad_norm": 7.56601766773708, "learning_rate": 7.855908540132682e-06, "loss": 18.0979, "step": 17906 }, { "epoch": 0.327325570767909, "grad_norm": 6.057493735827574, "learning_rate": 7.85566556185835e-06, "loss": 17.554, "step": 17907 }, { "epoch": 0.32734384996435556, "grad_norm": 6.36774798328385, "learning_rate": 7.855422573575158e-06, "loss": 17.4506, "step": 17908 }, { "epoch": 0.3273621291608021, "grad_norm": 5.150073712766874, "learning_rate": 7.855179575283958e-06, "loss": 16.9747, "step": 17909 }, { "epoch": 0.32738040835724863, "grad_norm": 6.624202698198208, "learning_rate": 7.8549365669856e-06, "loss": 17.5623, "step": 17910 }, { "epoch": 0.32739868755369517, "grad_norm": 6.897513006872821, "learning_rate": 7.854693548680939e-06, "loss": 17.84, "step": 17911 }, { "epoch": 0.32741696675014165, "grad_norm": 6.807505608479961, "learning_rate": 7.854450520370823e-06, "loss": 17.7713, "step": 17912 }, { "epoch": 0.3274352459465882, "grad_norm": 8.995518507179327, "learning_rate": 7.854207482056106e-06, "loss": 18.9268, "step": 17913 }, { "epoch": 0.3274535251430347, "grad_norm": 6.933558453854653, "learning_rate": 7.85396443373764e-06, "loss": 17.8008, "step": 17914 }, { "epoch": 0.32747180433948125, "grad_norm": 6.034555834357745, "learning_rate": 7.853721375416276e-06, "loss": 17.4894, "step": 17915 }, { "epoch": 0.32749008353592773, "grad_norm": 6.874334781188533, "learning_rate": 7.853478307092867e-06, "loss": 17.7075, "step": 17916 }, { "epoch": 0.32750836273237427, "grad_norm": 6.582386838434732, "learning_rate": 7.853235228768263e-06, "loss": 17.7948, "step": 17917 }, { "epoch": 0.3275266419288208, "grad_norm": 5.708070149466229, "learning_rate": 7.852992140443317e-06, "loss": 17.3767, "step": 17918 }, { "epoch": 0.32754492112526734, "grad_norm": 5.705497787883412, "learning_rate": 7.852749042118882e-06, "loss": 17.1096, "step": 17919 }, { "epoch": 0.3275632003217139, "grad_norm": 6.898629192703969, "learning_rate": 7.85250593379581e-06, "loss": 17.7545, "step": 17920 }, { "epoch": 0.32758147951816036, "grad_norm": 5.666294156284441, "learning_rate": 7.85226281547495e-06, "loss": 17.4826, "step": 17921 }, { "epoch": 0.3275997587146069, "grad_norm": 5.759539364835163, "learning_rate": 7.852019687157158e-06, "loss": 17.1894, "step": 17922 }, { "epoch": 0.32761803791105343, "grad_norm": 6.0697805940568275, "learning_rate": 7.851776548843285e-06, "loss": 17.3675, "step": 17923 }, { "epoch": 0.32763631710749996, "grad_norm": 6.1392823783558494, "learning_rate": 7.851533400534179e-06, "loss": 17.5242, "step": 17924 }, { "epoch": 0.3276545963039465, "grad_norm": 8.411186952250112, "learning_rate": 7.8512902422307e-06, "loss": 18.1547, "step": 17925 }, { "epoch": 0.327672875500393, "grad_norm": 8.022695074463817, "learning_rate": 7.851047073933693e-06, "loss": 17.9314, "step": 17926 }, { "epoch": 0.3276911546968395, "grad_norm": 5.887575318032095, "learning_rate": 7.850803895644017e-06, "loss": 17.4752, "step": 17927 }, { "epoch": 0.32770943389328605, "grad_norm": 7.175985913344132, "learning_rate": 7.850560707362518e-06, "loss": 17.9756, "step": 17928 }, { "epoch": 0.3277277130897326, "grad_norm": 6.701434753654602, "learning_rate": 7.85031750909005e-06, "loss": 17.622, "step": 17929 }, { "epoch": 0.3277459922861791, "grad_norm": 5.9431661773785684, "learning_rate": 7.85007430082747e-06, "loss": 17.4444, "step": 17930 }, { "epoch": 0.3277642714826256, "grad_norm": 5.460614712466154, "learning_rate": 7.849831082575625e-06, "loss": 17.2491, "step": 17931 }, { "epoch": 0.32778255067907214, "grad_norm": 6.449065531617945, "learning_rate": 7.849587854335369e-06, "loss": 17.6549, "step": 17932 }, { "epoch": 0.3278008298755187, "grad_norm": 6.505054635099538, "learning_rate": 7.849344616107554e-06, "loss": 17.4446, "step": 17933 }, { "epoch": 0.3278191090719652, "grad_norm": 6.711412197228129, "learning_rate": 7.849101367893037e-06, "loss": 17.407, "step": 17934 }, { "epoch": 0.32783738826841174, "grad_norm": 7.0024387822301275, "learning_rate": 7.848858109692663e-06, "loss": 17.6073, "step": 17935 }, { "epoch": 0.3278556674648582, "grad_norm": 5.9135186739116845, "learning_rate": 7.84861484150729e-06, "loss": 17.4377, "step": 17936 }, { "epoch": 0.32787394666130476, "grad_norm": 7.451437561929218, "learning_rate": 7.848371563337771e-06, "loss": 17.9172, "step": 17937 }, { "epoch": 0.3278922258577513, "grad_norm": 7.453435976355388, "learning_rate": 7.848128275184954e-06, "loss": 17.9466, "step": 17938 }, { "epoch": 0.32791050505419783, "grad_norm": 6.787862427824031, "learning_rate": 7.847884977049695e-06, "loss": 17.8055, "step": 17939 }, { "epoch": 0.32792878425064437, "grad_norm": 5.684888889559415, "learning_rate": 7.847641668932848e-06, "loss": 17.0676, "step": 17940 }, { "epoch": 0.32794706344709085, "grad_norm": 6.670039474136435, "learning_rate": 7.847398350835263e-06, "loss": 17.7334, "step": 17941 }, { "epoch": 0.3279653426435374, "grad_norm": 6.450659337918838, "learning_rate": 7.847155022757793e-06, "loss": 17.2774, "step": 17942 }, { "epoch": 0.3279836218399839, "grad_norm": 7.099834262384581, "learning_rate": 7.846911684701293e-06, "loss": 18.0099, "step": 17943 }, { "epoch": 0.32800190103643045, "grad_norm": 8.16204715184103, "learning_rate": 7.846668336666616e-06, "loss": 17.9374, "step": 17944 }, { "epoch": 0.328020180232877, "grad_norm": 5.679050610946927, "learning_rate": 7.84642497865461e-06, "loss": 17.0642, "step": 17945 }, { "epoch": 0.32803845942932347, "grad_norm": 6.36842780726683, "learning_rate": 7.846181610666134e-06, "loss": 17.358, "step": 17946 }, { "epoch": 0.32805673862577, "grad_norm": 7.540779931147121, "learning_rate": 7.845938232702037e-06, "loss": 18.0322, "step": 17947 }, { "epoch": 0.32807501782221654, "grad_norm": 6.828821345241826, "learning_rate": 7.845694844763174e-06, "loss": 17.5905, "step": 17948 }, { "epoch": 0.3280932970186631, "grad_norm": 6.156397289374125, "learning_rate": 7.845451446850399e-06, "loss": 17.3741, "step": 17949 }, { "epoch": 0.32811157621510956, "grad_norm": 8.289089484106746, "learning_rate": 7.845208038964562e-06, "loss": 17.8755, "step": 17950 }, { "epoch": 0.3281298554115561, "grad_norm": 6.832392891004793, "learning_rate": 7.844964621106518e-06, "loss": 17.6991, "step": 17951 }, { "epoch": 0.32814813460800263, "grad_norm": 5.67463719989777, "learning_rate": 7.844721193277118e-06, "loss": 17.4293, "step": 17952 }, { "epoch": 0.32816641380444916, "grad_norm": 6.881617909594826, "learning_rate": 7.84447775547722e-06, "loss": 17.6258, "step": 17953 }, { "epoch": 0.3281846930008957, "grad_norm": 6.887026728402564, "learning_rate": 7.844234307707673e-06, "loss": 17.658, "step": 17954 }, { "epoch": 0.3282029721973422, "grad_norm": 6.028614365375526, "learning_rate": 7.843990849969332e-06, "loss": 17.4576, "step": 17955 }, { "epoch": 0.3282212513937887, "grad_norm": 6.630595707426664, "learning_rate": 7.84374738226305e-06, "loss": 17.7347, "step": 17956 }, { "epoch": 0.32823953059023525, "grad_norm": 6.54165765514594, "learning_rate": 7.843503904589678e-06, "loss": 17.6441, "step": 17957 }, { "epoch": 0.3282578097866818, "grad_norm": 7.652646522711401, "learning_rate": 7.843260416950073e-06, "loss": 18.535, "step": 17958 }, { "epoch": 0.3282760889831283, "grad_norm": 6.696123866533352, "learning_rate": 7.843016919345088e-06, "loss": 17.6987, "step": 17959 }, { "epoch": 0.3282943681795748, "grad_norm": 6.198056922095434, "learning_rate": 7.842773411775575e-06, "loss": 17.2978, "step": 17960 }, { "epoch": 0.32831264737602134, "grad_norm": 9.442932381923036, "learning_rate": 7.842529894242387e-06, "loss": 18.1727, "step": 17961 }, { "epoch": 0.3283309265724679, "grad_norm": 6.890377807218648, "learning_rate": 7.84228636674638e-06, "loss": 17.2305, "step": 17962 }, { "epoch": 0.3283492057689144, "grad_norm": 7.8386009702386445, "learning_rate": 7.842042829288405e-06, "loss": 18.2672, "step": 17963 }, { "epoch": 0.32836748496536095, "grad_norm": 5.3862193100565, "learning_rate": 7.841799281869316e-06, "loss": 16.9004, "step": 17964 }, { "epoch": 0.3283857641618074, "grad_norm": 6.426333063664287, "learning_rate": 7.841555724489968e-06, "loss": 17.3328, "step": 17965 }, { "epoch": 0.32840404335825396, "grad_norm": 6.454542323521426, "learning_rate": 7.841312157151214e-06, "loss": 17.7325, "step": 17966 }, { "epoch": 0.3284223225547005, "grad_norm": 6.364717940158, "learning_rate": 7.841068579853905e-06, "loss": 17.4542, "step": 17967 }, { "epoch": 0.32844060175114703, "grad_norm": 8.143825905159051, "learning_rate": 7.8408249925989e-06, "loss": 18.5515, "step": 17968 }, { "epoch": 0.32845888094759357, "grad_norm": 6.231896144861432, "learning_rate": 7.840581395387049e-06, "loss": 17.5316, "step": 17969 }, { "epoch": 0.32847716014404005, "grad_norm": 6.312618339014489, "learning_rate": 7.840337788219206e-06, "loss": 17.3823, "step": 17970 }, { "epoch": 0.3284954393404866, "grad_norm": 7.612973403971207, "learning_rate": 7.840094171096227e-06, "loss": 17.9676, "step": 17971 }, { "epoch": 0.3285137185369331, "grad_norm": 7.0474817026375955, "learning_rate": 7.839850544018963e-06, "loss": 17.5598, "step": 17972 }, { "epoch": 0.32853199773337965, "grad_norm": 7.233474172101555, "learning_rate": 7.839606906988269e-06, "loss": 17.6773, "step": 17973 }, { "epoch": 0.3285502769298262, "grad_norm": 7.354578322300646, "learning_rate": 7.839363260005e-06, "loss": 18.029, "step": 17974 }, { "epoch": 0.32856855612627267, "grad_norm": 6.074226772905845, "learning_rate": 7.839119603070009e-06, "loss": 17.4109, "step": 17975 }, { "epoch": 0.3285868353227192, "grad_norm": 6.238306304417973, "learning_rate": 7.83887593618415e-06, "loss": 17.4351, "step": 17976 }, { "epoch": 0.32860511451916574, "grad_norm": 7.7263793962581655, "learning_rate": 7.838632259348277e-06, "loss": 18.1237, "step": 17977 }, { "epoch": 0.3286233937156123, "grad_norm": 7.757291919193158, "learning_rate": 7.838388572563245e-06, "loss": 18.1212, "step": 17978 }, { "epoch": 0.3286416729120588, "grad_norm": 5.371021798274012, "learning_rate": 7.838144875829907e-06, "loss": 17.1414, "step": 17979 }, { "epoch": 0.3286599521085053, "grad_norm": 5.787143922859928, "learning_rate": 7.837901169149118e-06, "loss": 17.484, "step": 17980 }, { "epoch": 0.32867823130495183, "grad_norm": 5.58306720618807, "learning_rate": 7.837657452521731e-06, "loss": 17.1153, "step": 17981 }, { "epoch": 0.32869651050139836, "grad_norm": 6.215042796250209, "learning_rate": 7.837413725948601e-06, "loss": 17.2298, "step": 17982 }, { "epoch": 0.3287147896978449, "grad_norm": 6.972794338186463, "learning_rate": 7.837169989430582e-06, "loss": 17.7384, "step": 17983 }, { "epoch": 0.3287330688942914, "grad_norm": 5.574196475197414, "learning_rate": 7.836926242968528e-06, "loss": 17.1791, "step": 17984 }, { "epoch": 0.3287513480907379, "grad_norm": 6.929002251117667, "learning_rate": 7.836682486563296e-06, "loss": 17.7906, "step": 17985 }, { "epoch": 0.32876962728718445, "grad_norm": 6.137476365263559, "learning_rate": 7.836438720215736e-06, "loss": 17.0248, "step": 17986 }, { "epoch": 0.328787906483631, "grad_norm": 6.542484529930226, "learning_rate": 7.836194943926704e-06, "loss": 17.8205, "step": 17987 }, { "epoch": 0.3288061856800775, "grad_norm": 6.055576458847637, "learning_rate": 7.835951157697055e-06, "loss": 17.1962, "step": 17988 }, { "epoch": 0.328824464876524, "grad_norm": 5.382599772472619, "learning_rate": 7.835707361527644e-06, "loss": 17.0756, "step": 17989 }, { "epoch": 0.32884274407297054, "grad_norm": 8.919955577730727, "learning_rate": 7.835463555419324e-06, "loss": 18.5505, "step": 17990 }, { "epoch": 0.3288610232694171, "grad_norm": 6.047143064185945, "learning_rate": 7.835219739372952e-06, "loss": 17.4779, "step": 17991 }, { "epoch": 0.3288793024658636, "grad_norm": 7.568418447047422, "learning_rate": 7.83497591338938e-06, "loss": 17.8594, "step": 17992 }, { "epoch": 0.32889758166231015, "grad_norm": 6.442770499078313, "learning_rate": 7.834732077469463e-06, "loss": 17.6591, "step": 17993 }, { "epoch": 0.3289158608587566, "grad_norm": 5.93445607341471, "learning_rate": 7.834488231614056e-06, "loss": 17.4359, "step": 17994 }, { "epoch": 0.32893414005520316, "grad_norm": 5.694514030385999, "learning_rate": 7.834244375824013e-06, "loss": 17.1629, "step": 17995 }, { "epoch": 0.3289524192516497, "grad_norm": 7.673753865343592, "learning_rate": 7.834000510100192e-06, "loss": 17.9861, "step": 17996 }, { "epoch": 0.32897069844809623, "grad_norm": 5.615634763335271, "learning_rate": 7.833756634443442e-06, "loss": 17.3676, "step": 17997 }, { "epoch": 0.32898897764454277, "grad_norm": 6.668802536505096, "learning_rate": 7.833512748854622e-06, "loss": 17.5947, "step": 17998 }, { "epoch": 0.32900725684098925, "grad_norm": 6.918821112867181, "learning_rate": 7.833268853334587e-06, "loss": 17.8065, "step": 17999 }, { "epoch": 0.3290255360374358, "grad_norm": 6.840251713117992, "learning_rate": 7.83302494788419e-06, "loss": 17.9558, "step": 18000 }, { "epoch": 0.3290438152338823, "grad_norm": 6.965153888851557, "learning_rate": 7.832781032504285e-06, "loss": 17.3834, "step": 18001 }, { "epoch": 0.32906209443032886, "grad_norm": 6.2277298638382845, "learning_rate": 7.832537107195729e-06, "loss": 17.7506, "step": 18002 }, { "epoch": 0.3290803736267754, "grad_norm": 7.37426000360595, "learning_rate": 7.832293171959378e-06, "loss": 17.9153, "step": 18003 }, { "epoch": 0.32909865282322187, "grad_norm": 6.85056256944876, "learning_rate": 7.832049226796083e-06, "loss": 17.7379, "step": 18004 }, { "epoch": 0.3291169320196684, "grad_norm": 6.888727972434795, "learning_rate": 7.831805271706701e-06, "loss": 17.7978, "step": 18005 }, { "epoch": 0.32913521121611494, "grad_norm": 5.792005203325986, "learning_rate": 7.83156130669209e-06, "loss": 17.4617, "step": 18006 }, { "epoch": 0.3291534904125615, "grad_norm": 6.5035961323595215, "learning_rate": 7.831317331753099e-06, "loss": 17.6357, "step": 18007 }, { "epoch": 0.329171769609008, "grad_norm": 7.231552584455718, "learning_rate": 7.831073346890588e-06, "loss": 18.0991, "step": 18008 }, { "epoch": 0.3291900488054545, "grad_norm": 5.947293522693324, "learning_rate": 7.830829352105412e-06, "loss": 17.2566, "step": 18009 }, { "epoch": 0.32920832800190103, "grad_norm": 5.742591906200253, "learning_rate": 7.830585347398423e-06, "loss": 17.2973, "step": 18010 }, { "epoch": 0.32922660719834757, "grad_norm": 7.13675380760017, "learning_rate": 7.830341332770477e-06, "loss": 17.7959, "step": 18011 }, { "epoch": 0.3292448863947941, "grad_norm": 7.440563667754312, "learning_rate": 7.830097308222431e-06, "loss": 18.1438, "step": 18012 }, { "epoch": 0.32926316559124064, "grad_norm": 6.20282260730255, "learning_rate": 7.829853273755142e-06, "loss": 17.369, "step": 18013 }, { "epoch": 0.3292814447876871, "grad_norm": 5.730776245466025, "learning_rate": 7.82960922936946e-06, "loss": 17.1559, "step": 18014 }, { "epoch": 0.32929972398413365, "grad_norm": 6.100881290165984, "learning_rate": 7.829365175066244e-06, "loss": 17.5479, "step": 18015 }, { "epoch": 0.3293180031805802, "grad_norm": 6.817386754238109, "learning_rate": 7.82912111084635e-06, "loss": 17.8857, "step": 18016 }, { "epoch": 0.3293362823770267, "grad_norm": 7.652466360647009, "learning_rate": 7.828877036710629e-06, "loss": 17.7605, "step": 18017 }, { "epoch": 0.3293545615734732, "grad_norm": 6.025793143133479, "learning_rate": 7.828632952659942e-06, "loss": 17.3684, "step": 18018 }, { "epoch": 0.32937284076991974, "grad_norm": 5.6804646863167445, "learning_rate": 7.82838885869514e-06, "loss": 17.1107, "step": 18019 }, { "epoch": 0.3293911199663663, "grad_norm": 6.310527485746265, "learning_rate": 7.828144754817083e-06, "loss": 17.2968, "step": 18020 }, { "epoch": 0.3294093991628128, "grad_norm": 6.532576936114714, "learning_rate": 7.827900641026622e-06, "loss": 17.3347, "step": 18021 }, { "epoch": 0.32942767835925935, "grad_norm": 5.82792366400588, "learning_rate": 7.827656517324616e-06, "loss": 17.3119, "step": 18022 }, { "epoch": 0.3294459575557058, "grad_norm": 6.7218850160504235, "learning_rate": 7.827412383711919e-06, "loss": 17.6072, "step": 18023 }, { "epoch": 0.32946423675215236, "grad_norm": 5.941080781099494, "learning_rate": 7.827168240189385e-06, "loss": 17.3296, "step": 18024 }, { "epoch": 0.3294825159485989, "grad_norm": 5.940669637323221, "learning_rate": 7.826924086757873e-06, "loss": 17.3381, "step": 18025 }, { "epoch": 0.32950079514504543, "grad_norm": 6.334301727038886, "learning_rate": 7.826679923418238e-06, "loss": 17.4331, "step": 18026 }, { "epoch": 0.32951907434149197, "grad_norm": 6.434928643502983, "learning_rate": 7.826435750171336e-06, "loss": 17.6518, "step": 18027 }, { "epoch": 0.32953735353793845, "grad_norm": 5.795639760284889, "learning_rate": 7.82619156701802e-06, "loss": 17.3047, "step": 18028 }, { "epoch": 0.329555632734385, "grad_norm": 6.196272877079253, "learning_rate": 7.825947373959147e-06, "loss": 17.5228, "step": 18029 }, { "epoch": 0.3295739119308315, "grad_norm": 5.278615878184858, "learning_rate": 7.825703170995576e-06, "loss": 17.0388, "step": 18030 }, { "epoch": 0.32959219112727806, "grad_norm": 7.92969487264405, "learning_rate": 7.82545895812816e-06, "loss": 17.9639, "step": 18031 }, { "epoch": 0.3296104703237246, "grad_norm": 7.482819457997297, "learning_rate": 7.825214735357754e-06, "loss": 17.8671, "step": 18032 }, { "epoch": 0.32962874952017107, "grad_norm": 5.786874699951977, "learning_rate": 7.824970502685218e-06, "loss": 17.2433, "step": 18033 }, { "epoch": 0.3296470287166176, "grad_norm": 6.134673803576229, "learning_rate": 7.824726260111403e-06, "loss": 17.4821, "step": 18034 }, { "epoch": 0.32966530791306414, "grad_norm": 6.769792504409528, "learning_rate": 7.824482007637171e-06, "loss": 17.7747, "step": 18035 }, { "epoch": 0.3296835871095107, "grad_norm": 6.461015525984091, "learning_rate": 7.824237745263372e-06, "loss": 17.3478, "step": 18036 }, { "epoch": 0.3297018663059572, "grad_norm": 8.139266660952575, "learning_rate": 7.823993472990866e-06, "loss": 18.0593, "step": 18037 }, { "epoch": 0.3297201455024037, "grad_norm": 5.728976451364784, "learning_rate": 7.823749190820507e-06, "loss": 17.1549, "step": 18038 }, { "epoch": 0.32973842469885023, "grad_norm": 6.000148853719646, "learning_rate": 7.823504898753152e-06, "loss": 17.5285, "step": 18039 }, { "epoch": 0.32975670389529677, "grad_norm": 5.398455405227888, "learning_rate": 7.823260596789657e-06, "loss": 17.1715, "step": 18040 }, { "epoch": 0.3297749830917433, "grad_norm": 7.0204903803390675, "learning_rate": 7.82301628493088e-06, "loss": 17.6809, "step": 18041 }, { "epoch": 0.32979326228818984, "grad_norm": 6.668100476576282, "learning_rate": 7.822771963177676e-06, "loss": 17.872, "step": 18042 }, { "epoch": 0.3298115414846363, "grad_norm": 7.371328529126587, "learning_rate": 7.822527631530898e-06, "loss": 18.0666, "step": 18043 }, { "epoch": 0.32982982068108285, "grad_norm": 8.178018564594312, "learning_rate": 7.822283289991411e-06, "loss": 18.3436, "step": 18044 }, { "epoch": 0.3298480998775294, "grad_norm": 11.580222952895815, "learning_rate": 7.822038938560061e-06, "loss": 18.7833, "step": 18045 }, { "epoch": 0.3298663790739759, "grad_norm": 7.480236483365055, "learning_rate": 7.82179457723771e-06, "loss": 17.4975, "step": 18046 }, { "epoch": 0.32988465827042246, "grad_norm": 6.546495003471312, "learning_rate": 7.821550206025218e-06, "loss": 17.6675, "step": 18047 }, { "epoch": 0.32990293746686894, "grad_norm": 7.540433245344673, "learning_rate": 7.821305824923434e-06, "loss": 17.9526, "step": 18048 }, { "epoch": 0.3299212166633155, "grad_norm": 6.398733916800262, "learning_rate": 7.821061433933217e-06, "loss": 17.98, "step": 18049 }, { "epoch": 0.329939495859762, "grad_norm": 6.002220326238243, "learning_rate": 7.820817033055426e-06, "loss": 17.5718, "step": 18050 }, { "epoch": 0.32995777505620855, "grad_norm": 5.524521422577539, "learning_rate": 7.820572622290916e-06, "loss": 17.1192, "step": 18051 }, { "epoch": 0.329976054252655, "grad_norm": 7.543780459250276, "learning_rate": 7.820328201640545e-06, "loss": 17.9918, "step": 18052 }, { "epoch": 0.32999433344910156, "grad_norm": 7.530696739431696, "learning_rate": 7.820083771105166e-06, "loss": 18.4403, "step": 18053 }, { "epoch": 0.3300126126455481, "grad_norm": 6.880084319685398, "learning_rate": 7.819839330685641e-06, "loss": 17.7529, "step": 18054 }, { "epoch": 0.33003089184199463, "grad_norm": 5.9957187798217, "learning_rate": 7.819594880382822e-06, "loss": 17.5537, "step": 18055 }, { "epoch": 0.33004917103844117, "grad_norm": 8.015415195987858, "learning_rate": 7.819350420197566e-06, "loss": 18.0474, "step": 18056 }, { "epoch": 0.33006745023488765, "grad_norm": 6.748262435062119, "learning_rate": 7.819105950130734e-06, "loss": 17.699, "step": 18057 }, { "epoch": 0.3300857294313342, "grad_norm": 6.842770845866191, "learning_rate": 7.81886147018318e-06, "loss": 17.9296, "step": 18058 }, { "epoch": 0.3301040086277807, "grad_norm": 7.048283201148179, "learning_rate": 7.818616980355759e-06, "loss": 18.0855, "step": 18059 }, { "epoch": 0.33012228782422726, "grad_norm": 5.679185944674213, "learning_rate": 7.818372480649332e-06, "loss": 17.4917, "step": 18060 }, { "epoch": 0.3301405670206738, "grad_norm": 8.87379436962972, "learning_rate": 7.818127971064755e-06, "loss": 18.5461, "step": 18061 }, { "epoch": 0.33015884621712027, "grad_norm": 7.260242380332696, "learning_rate": 7.817883451602884e-06, "loss": 17.8184, "step": 18062 }, { "epoch": 0.3301771254135668, "grad_norm": 6.120923486287874, "learning_rate": 7.817638922264572e-06, "loss": 17.292, "step": 18063 }, { "epoch": 0.33019540461001334, "grad_norm": 6.204902826118205, "learning_rate": 7.817394383050683e-06, "loss": 17.5321, "step": 18064 }, { "epoch": 0.3302136838064599, "grad_norm": 6.371457833218362, "learning_rate": 7.817149833962073e-06, "loss": 17.5532, "step": 18065 }, { "epoch": 0.3302319630029064, "grad_norm": 6.202345362612025, "learning_rate": 7.816905274999594e-06, "loss": 17.5803, "step": 18066 }, { "epoch": 0.3302502421993529, "grad_norm": 5.772474384306617, "learning_rate": 7.816660706164107e-06, "loss": 17.3038, "step": 18067 }, { "epoch": 0.33026852139579943, "grad_norm": 6.2731523091285215, "learning_rate": 7.81641612745647e-06, "loss": 17.4522, "step": 18068 }, { "epoch": 0.33028680059224597, "grad_norm": 6.030460793230085, "learning_rate": 7.81617153887754e-06, "loss": 17.4709, "step": 18069 }, { "epoch": 0.3303050797886925, "grad_norm": 7.152867573673834, "learning_rate": 7.815926940428169e-06, "loss": 18.0158, "step": 18070 }, { "epoch": 0.33032335898513904, "grad_norm": 6.988290298304303, "learning_rate": 7.815682332109221e-06, "loss": 18.0304, "step": 18071 }, { "epoch": 0.3303416381815855, "grad_norm": 5.60823683949017, "learning_rate": 7.815437713921553e-06, "loss": 17.0719, "step": 18072 }, { "epoch": 0.33035991737803205, "grad_norm": 6.508456643510161, "learning_rate": 7.815193085866017e-06, "loss": 17.7962, "step": 18073 }, { "epoch": 0.3303781965744786, "grad_norm": 5.333315064822724, "learning_rate": 7.814948447943474e-06, "loss": 17.0398, "step": 18074 }, { "epoch": 0.3303964757709251, "grad_norm": 6.06665473651248, "learning_rate": 7.814703800154781e-06, "loss": 17.2698, "step": 18075 }, { "epoch": 0.33041475496737166, "grad_norm": 6.011162302757721, "learning_rate": 7.814459142500795e-06, "loss": 17.2969, "step": 18076 }, { "epoch": 0.33043303416381814, "grad_norm": 6.284279519153075, "learning_rate": 7.814214474982374e-06, "loss": 17.3795, "step": 18077 }, { "epoch": 0.3304513133602647, "grad_norm": 6.412640893166762, "learning_rate": 7.813969797600377e-06, "loss": 17.8153, "step": 18078 }, { "epoch": 0.3304695925567112, "grad_norm": 6.086706312108125, "learning_rate": 7.813725110355658e-06, "loss": 17.514, "step": 18079 }, { "epoch": 0.33048787175315775, "grad_norm": 6.369750439259867, "learning_rate": 7.813480413249078e-06, "loss": 17.4404, "step": 18080 }, { "epoch": 0.3305061509496043, "grad_norm": 6.714898930401324, "learning_rate": 7.813235706281493e-06, "loss": 17.7209, "step": 18081 }, { "epoch": 0.33052443014605076, "grad_norm": 5.085908452935506, "learning_rate": 7.812990989453762e-06, "loss": 17.2132, "step": 18082 }, { "epoch": 0.3305427093424973, "grad_norm": 7.636258516957033, "learning_rate": 7.81274626276674e-06, "loss": 18.3384, "step": 18083 }, { "epoch": 0.33056098853894383, "grad_norm": 6.3478744830122045, "learning_rate": 7.812501526221286e-06, "loss": 17.6347, "step": 18084 }, { "epoch": 0.33057926773539037, "grad_norm": 6.298719686003601, "learning_rate": 7.812256779818262e-06, "loss": 17.2591, "step": 18085 }, { "epoch": 0.33059754693183685, "grad_norm": 6.323332639605773, "learning_rate": 7.812012023558517e-06, "loss": 17.4948, "step": 18086 }, { "epoch": 0.3306158261282834, "grad_norm": 6.769834675958703, "learning_rate": 7.811767257442917e-06, "loss": 17.6773, "step": 18087 }, { "epoch": 0.3306341053247299, "grad_norm": 7.566896663553592, "learning_rate": 7.811522481472316e-06, "loss": 18.1479, "step": 18088 }, { "epoch": 0.33065238452117646, "grad_norm": 7.349644500255119, "learning_rate": 7.811277695647573e-06, "loss": 17.9977, "step": 18089 }, { "epoch": 0.330670663717623, "grad_norm": 6.247972811162064, "learning_rate": 7.811032899969545e-06, "loss": 17.5106, "step": 18090 }, { "epoch": 0.3306889429140695, "grad_norm": 8.004581912619546, "learning_rate": 7.81078809443909e-06, "loss": 18.3166, "step": 18091 }, { "epoch": 0.330707222110516, "grad_norm": 7.109596586974475, "learning_rate": 7.810543279057068e-06, "loss": 17.8664, "step": 18092 }, { "epoch": 0.33072550130696254, "grad_norm": 5.738651723051288, "learning_rate": 7.810298453824336e-06, "loss": 17.3457, "step": 18093 }, { "epoch": 0.3307437805034091, "grad_norm": 7.220865070391188, "learning_rate": 7.81005361874175e-06, "loss": 17.6067, "step": 18094 }, { "epoch": 0.3307620596998556, "grad_norm": 8.148402427707252, "learning_rate": 7.80980877381017e-06, "loss": 18.197, "step": 18095 }, { "epoch": 0.3307803388963021, "grad_norm": 5.588565291030363, "learning_rate": 7.809563919030456e-06, "loss": 17.1872, "step": 18096 }, { "epoch": 0.33079861809274863, "grad_norm": 6.75662550492222, "learning_rate": 7.809319054403463e-06, "loss": 17.1131, "step": 18097 }, { "epoch": 0.33081689728919517, "grad_norm": 7.431033782768452, "learning_rate": 7.80907417993005e-06, "loss": 17.975, "step": 18098 }, { "epoch": 0.3308351764856417, "grad_norm": 7.685822219913446, "learning_rate": 7.808829295611078e-06, "loss": 17.7454, "step": 18099 }, { "epoch": 0.33085345568208824, "grad_norm": 6.228433468252308, "learning_rate": 7.8085844014474e-06, "loss": 17.2792, "step": 18100 }, { "epoch": 0.3308717348785347, "grad_norm": 7.236931279106639, "learning_rate": 7.808339497439881e-06, "loss": 17.8093, "step": 18101 }, { "epoch": 0.33089001407498125, "grad_norm": 4.747904350524478, "learning_rate": 7.808094583589372e-06, "loss": 16.8722, "step": 18102 }, { "epoch": 0.3309082932714278, "grad_norm": 5.943061297250326, "learning_rate": 7.807849659896738e-06, "loss": 17.5772, "step": 18103 }, { "epoch": 0.3309265724678743, "grad_norm": 8.433176466603951, "learning_rate": 7.807604726362833e-06, "loss": 18.0154, "step": 18104 }, { "epoch": 0.33094485166432086, "grad_norm": 7.371157211341919, "learning_rate": 7.807359782988517e-06, "loss": 17.5609, "step": 18105 }, { "epoch": 0.33096313086076734, "grad_norm": 7.081159680112228, "learning_rate": 7.80711482977465e-06, "loss": 17.9772, "step": 18106 }, { "epoch": 0.3309814100572139, "grad_norm": 8.509748744179188, "learning_rate": 7.806869866722087e-06, "loss": 18.6014, "step": 18107 }, { "epoch": 0.3309996892536604, "grad_norm": 6.176470547847755, "learning_rate": 7.806624893831692e-06, "loss": 17.4069, "step": 18108 }, { "epoch": 0.33101796845010695, "grad_norm": 5.95420010238592, "learning_rate": 7.806379911104316e-06, "loss": 17.4311, "step": 18109 }, { "epoch": 0.3310362476465535, "grad_norm": 5.677335869781743, "learning_rate": 7.806134918540825e-06, "loss": 17.4766, "step": 18110 }, { "epoch": 0.33105452684299996, "grad_norm": 5.634643249307986, "learning_rate": 7.805889916142073e-06, "loss": 17.2223, "step": 18111 }, { "epoch": 0.3310728060394465, "grad_norm": 6.59916300705919, "learning_rate": 7.805644903908922e-06, "loss": 17.4556, "step": 18112 }, { "epoch": 0.33109108523589303, "grad_norm": 5.978686613423198, "learning_rate": 7.805399881842227e-06, "loss": 17.1401, "step": 18113 }, { "epoch": 0.33110936443233957, "grad_norm": 6.213357174843528, "learning_rate": 7.805154849942851e-06, "loss": 17.2441, "step": 18114 }, { "epoch": 0.3311276436287861, "grad_norm": 6.805620757747374, "learning_rate": 7.804909808211649e-06, "loss": 17.5737, "step": 18115 }, { "epoch": 0.3311459228252326, "grad_norm": 6.61973807849275, "learning_rate": 7.804664756649483e-06, "loss": 17.7328, "step": 18116 }, { "epoch": 0.3311642020216791, "grad_norm": 7.081637541097287, "learning_rate": 7.80441969525721e-06, "loss": 18.1021, "step": 18117 }, { "epoch": 0.33118248121812566, "grad_norm": 6.1570293838340024, "learning_rate": 7.804174624035687e-06, "loss": 17.2987, "step": 18118 }, { "epoch": 0.3312007604145722, "grad_norm": 7.137962111086096, "learning_rate": 7.803929542985778e-06, "loss": 18.09, "step": 18119 }, { "epoch": 0.3312190396110187, "grad_norm": 6.630613522949249, "learning_rate": 7.80368445210834e-06, "loss": 17.2999, "step": 18120 }, { "epoch": 0.3312373188074652, "grad_norm": 6.5951159747846795, "learning_rate": 7.80343935140423e-06, "loss": 17.5798, "step": 18121 }, { "epoch": 0.33125559800391174, "grad_norm": 5.678767297055565, "learning_rate": 7.803194240874307e-06, "loss": 17.2323, "step": 18122 }, { "epoch": 0.3312738772003583, "grad_norm": 6.177090873093481, "learning_rate": 7.802949120519433e-06, "loss": 17.4886, "step": 18123 }, { "epoch": 0.3312921563968048, "grad_norm": 6.948735445214337, "learning_rate": 7.802703990340465e-06, "loss": 17.9592, "step": 18124 }, { "epoch": 0.3313104355932513, "grad_norm": 6.245303136428903, "learning_rate": 7.802458850338262e-06, "loss": 17.5357, "step": 18125 }, { "epoch": 0.33132871478969783, "grad_norm": 5.0947277965446975, "learning_rate": 7.802213700513686e-06, "loss": 16.7877, "step": 18126 }, { "epoch": 0.33134699398614437, "grad_norm": 6.178992615043774, "learning_rate": 7.801968540867593e-06, "loss": 17.5462, "step": 18127 }, { "epoch": 0.3313652731825909, "grad_norm": 6.400873647242991, "learning_rate": 7.801723371400842e-06, "loss": 17.5993, "step": 18128 }, { "epoch": 0.33138355237903744, "grad_norm": 7.180080291465306, "learning_rate": 7.801478192114294e-06, "loss": 17.7825, "step": 18129 }, { "epoch": 0.3314018315754839, "grad_norm": 8.426975854213334, "learning_rate": 7.80123300300881e-06, "loss": 18.0174, "step": 18130 }, { "epoch": 0.33142011077193045, "grad_norm": 9.678813909641246, "learning_rate": 7.800987804085248e-06, "loss": 18.6294, "step": 18131 }, { "epoch": 0.331438389968377, "grad_norm": 6.117208571758629, "learning_rate": 7.800742595344464e-06, "loss": 17.3539, "step": 18132 }, { "epoch": 0.3314566691648235, "grad_norm": 8.796654775465436, "learning_rate": 7.800497376787322e-06, "loss": 18.2088, "step": 18133 }, { "epoch": 0.33147494836127006, "grad_norm": 6.591865191776443, "learning_rate": 7.80025214841468e-06, "loss": 17.7186, "step": 18134 }, { "epoch": 0.33149322755771654, "grad_norm": 12.247619296007418, "learning_rate": 7.800006910227395e-06, "loss": 18.1093, "step": 18135 }, { "epoch": 0.3315115067541631, "grad_norm": 6.145603212567427, "learning_rate": 7.79976166222633e-06, "loss": 17.3947, "step": 18136 }, { "epoch": 0.3315297859506096, "grad_norm": 5.775018551611026, "learning_rate": 7.799516404412344e-06, "loss": 17.327, "step": 18137 }, { "epoch": 0.33154806514705615, "grad_norm": 7.1323289170806365, "learning_rate": 7.799271136786294e-06, "loss": 17.8676, "step": 18138 }, { "epoch": 0.3315663443435027, "grad_norm": 6.461974829962352, "learning_rate": 7.799025859349043e-06, "loss": 17.6025, "step": 18139 }, { "epoch": 0.33158462353994916, "grad_norm": 7.838103949478371, "learning_rate": 7.79878057210145e-06, "loss": 18.3584, "step": 18140 }, { "epoch": 0.3316029027363957, "grad_norm": 8.413702494573815, "learning_rate": 7.798535275044374e-06, "loss": 18.6791, "step": 18141 }, { "epoch": 0.33162118193284223, "grad_norm": 5.8768644266598145, "learning_rate": 7.798289968178674e-06, "loss": 17.3626, "step": 18142 }, { "epoch": 0.33163946112928877, "grad_norm": 5.994574320590915, "learning_rate": 7.79804465150521e-06, "loss": 17.5051, "step": 18143 }, { "epoch": 0.3316577403257353, "grad_norm": 6.586087063590682, "learning_rate": 7.797799325024842e-06, "loss": 17.736, "step": 18144 }, { "epoch": 0.3316760195221818, "grad_norm": 5.638852904875247, "learning_rate": 7.797553988738432e-06, "loss": 17.2101, "step": 18145 }, { "epoch": 0.3316942987186283, "grad_norm": 5.736074742061226, "learning_rate": 7.797308642646836e-06, "loss": 17.1432, "step": 18146 }, { "epoch": 0.33171257791507486, "grad_norm": 7.090918000663191, "learning_rate": 7.797063286750916e-06, "loss": 17.6971, "step": 18147 }, { "epoch": 0.3317308571115214, "grad_norm": 6.206276526643924, "learning_rate": 7.796817921051534e-06, "loss": 17.3684, "step": 18148 }, { "epoch": 0.33174913630796793, "grad_norm": 6.436787675555288, "learning_rate": 7.796572545549546e-06, "loss": 17.7381, "step": 18149 }, { "epoch": 0.3317674155044144, "grad_norm": 5.870395325684857, "learning_rate": 7.796327160245814e-06, "loss": 17.257, "step": 18150 }, { "epoch": 0.33178569470086094, "grad_norm": 7.163311270417848, "learning_rate": 7.796081765141198e-06, "loss": 17.813, "step": 18151 }, { "epoch": 0.3318039738973075, "grad_norm": 6.897218684159557, "learning_rate": 7.795836360236559e-06, "loss": 17.6345, "step": 18152 }, { "epoch": 0.331822253093754, "grad_norm": 6.524512327566224, "learning_rate": 7.795590945532757e-06, "loss": 17.5572, "step": 18153 }, { "epoch": 0.3318405322902005, "grad_norm": 6.146439310580656, "learning_rate": 7.79534552103065e-06, "loss": 17.2985, "step": 18154 }, { "epoch": 0.33185881148664703, "grad_norm": 6.068391168257, "learning_rate": 7.7951000867311e-06, "loss": 17.6694, "step": 18155 }, { "epoch": 0.33187709068309357, "grad_norm": 7.338505430554289, "learning_rate": 7.794854642634964e-06, "loss": 17.6184, "step": 18156 }, { "epoch": 0.3318953698795401, "grad_norm": 6.143827448923468, "learning_rate": 7.794609188743108e-06, "loss": 17.3543, "step": 18157 }, { "epoch": 0.33191364907598664, "grad_norm": 7.009805038282566, "learning_rate": 7.79436372505639e-06, "loss": 17.7452, "step": 18158 }, { "epoch": 0.3319319282724331, "grad_norm": 5.642670319090825, "learning_rate": 7.794118251575666e-06, "loss": 17.3011, "step": 18159 }, { "epoch": 0.33195020746887965, "grad_norm": 7.102703592615145, "learning_rate": 7.793872768301802e-06, "loss": 17.8441, "step": 18160 }, { "epoch": 0.3319684866653262, "grad_norm": 7.125349431069596, "learning_rate": 7.793627275235658e-06, "loss": 17.5985, "step": 18161 }, { "epoch": 0.3319867658617727, "grad_norm": 5.280807767518307, "learning_rate": 7.793381772378091e-06, "loss": 16.9641, "step": 18162 }, { "epoch": 0.33200504505821926, "grad_norm": 7.904987467500512, "learning_rate": 7.793136259729963e-06, "loss": 18.7803, "step": 18163 }, { "epoch": 0.33202332425466574, "grad_norm": 5.918237343746448, "learning_rate": 7.792890737292135e-06, "loss": 17.1779, "step": 18164 }, { "epoch": 0.3320416034511123, "grad_norm": 6.076838668528555, "learning_rate": 7.792645205065469e-06, "loss": 17.3053, "step": 18165 }, { "epoch": 0.3320598826475588, "grad_norm": 5.958843514612012, "learning_rate": 7.792399663050822e-06, "loss": 17.4298, "step": 18166 }, { "epoch": 0.33207816184400535, "grad_norm": 6.130864054531944, "learning_rate": 7.792154111249057e-06, "loss": 17.1949, "step": 18167 }, { "epoch": 0.3320964410404519, "grad_norm": 7.954768819297425, "learning_rate": 7.791908549661036e-06, "loss": 18.3735, "step": 18168 }, { "epoch": 0.33211472023689836, "grad_norm": 8.298811300794295, "learning_rate": 7.791662978287616e-06, "loss": 18.0898, "step": 18169 }, { "epoch": 0.3321329994333449, "grad_norm": 6.303685131205644, "learning_rate": 7.791417397129659e-06, "loss": 17.1811, "step": 18170 }, { "epoch": 0.33215127862979144, "grad_norm": 7.206124607693227, "learning_rate": 7.791171806188027e-06, "loss": 18.0821, "step": 18171 }, { "epoch": 0.33216955782623797, "grad_norm": 8.314122606992065, "learning_rate": 7.79092620546358e-06, "loss": 17.6907, "step": 18172 }, { "epoch": 0.3321878370226845, "grad_norm": 5.033311332280101, "learning_rate": 7.790680594957179e-06, "loss": 16.9447, "step": 18173 }, { "epoch": 0.332206116219131, "grad_norm": 6.494315316107332, "learning_rate": 7.790434974669685e-06, "loss": 17.2337, "step": 18174 }, { "epoch": 0.3322243954155775, "grad_norm": 6.53056235395755, "learning_rate": 7.790189344601957e-06, "loss": 17.7898, "step": 18175 }, { "epoch": 0.33224267461202406, "grad_norm": 6.62133568231179, "learning_rate": 7.789943704754859e-06, "loss": 17.6237, "step": 18176 }, { "epoch": 0.3322609538084706, "grad_norm": 7.443861748373182, "learning_rate": 7.789698055129248e-06, "loss": 17.912, "step": 18177 }, { "epoch": 0.33227923300491713, "grad_norm": 6.973394307184902, "learning_rate": 7.789452395725991e-06, "loss": 17.7439, "step": 18178 }, { "epoch": 0.3322975122013636, "grad_norm": 6.785497824905413, "learning_rate": 7.789206726545944e-06, "loss": 17.5712, "step": 18179 }, { "epoch": 0.33231579139781015, "grad_norm": 5.727668255250328, "learning_rate": 7.788961047589968e-06, "loss": 17.3711, "step": 18180 }, { "epoch": 0.3323340705942567, "grad_norm": 7.234093853885038, "learning_rate": 7.788715358858927e-06, "loss": 17.8033, "step": 18181 }, { "epoch": 0.3323523497907032, "grad_norm": 7.431557595833951, "learning_rate": 7.78846966035368e-06, "loss": 18.0144, "step": 18182 }, { "epoch": 0.33237062898714975, "grad_norm": 5.669439493474292, "learning_rate": 7.78822395207509e-06, "loss": 17.175, "step": 18183 }, { "epoch": 0.33238890818359623, "grad_norm": 5.84385140341315, "learning_rate": 7.787978234024014e-06, "loss": 17.4927, "step": 18184 }, { "epoch": 0.33240718738004277, "grad_norm": 6.780583032607836, "learning_rate": 7.78773250620132e-06, "loss": 17.8106, "step": 18185 }, { "epoch": 0.3324254665764893, "grad_norm": 7.255141407721134, "learning_rate": 7.787486768607864e-06, "loss": 17.8612, "step": 18186 }, { "epoch": 0.33244374577293584, "grad_norm": 6.983175297532281, "learning_rate": 7.787241021244509e-06, "loss": 17.8476, "step": 18187 }, { "epoch": 0.3324620249693823, "grad_norm": 7.714506912999816, "learning_rate": 7.786995264112113e-06, "loss": 17.8431, "step": 18188 }, { "epoch": 0.33248030416582885, "grad_norm": 6.635305280111997, "learning_rate": 7.786749497211545e-06, "loss": 17.844, "step": 18189 }, { "epoch": 0.3324985833622754, "grad_norm": 6.495138931650477, "learning_rate": 7.78650372054366e-06, "loss": 17.572, "step": 18190 }, { "epoch": 0.3325168625587219, "grad_norm": 6.49733321429841, "learning_rate": 7.786257934109321e-06, "loss": 17.6245, "step": 18191 }, { "epoch": 0.33253514175516846, "grad_norm": 7.198187102241127, "learning_rate": 7.78601213790939e-06, "loss": 17.7093, "step": 18192 }, { "epoch": 0.33255342095161494, "grad_norm": 6.14606092043527, "learning_rate": 7.785766331944729e-06, "loss": 17.6105, "step": 18193 }, { "epoch": 0.3325717001480615, "grad_norm": 5.960404881106066, "learning_rate": 7.785520516216196e-06, "loss": 17.5927, "step": 18194 }, { "epoch": 0.332589979344508, "grad_norm": 6.351337550024658, "learning_rate": 7.785274690724657e-06, "loss": 17.8266, "step": 18195 }, { "epoch": 0.33260825854095455, "grad_norm": 5.717737567289761, "learning_rate": 7.785028855470973e-06, "loss": 17.3423, "step": 18196 }, { "epoch": 0.3326265377374011, "grad_norm": 8.886182938977878, "learning_rate": 7.784783010456002e-06, "loss": 18.6777, "step": 18197 }, { "epoch": 0.33264481693384756, "grad_norm": 6.677696799615567, "learning_rate": 7.784537155680611e-06, "loss": 17.6191, "step": 18198 }, { "epoch": 0.3326630961302941, "grad_norm": 5.338467567289512, "learning_rate": 7.784291291145657e-06, "loss": 16.9954, "step": 18199 }, { "epoch": 0.33268137532674064, "grad_norm": 5.759579456797714, "learning_rate": 7.784045416852007e-06, "loss": 17.2598, "step": 18200 }, { "epoch": 0.33269965452318717, "grad_norm": 6.395268915261202, "learning_rate": 7.783799532800516e-06, "loss": 17.4145, "step": 18201 }, { "epoch": 0.3327179337196337, "grad_norm": 6.0361347411754505, "learning_rate": 7.78355363899205e-06, "loss": 17.2654, "step": 18202 }, { "epoch": 0.3327362129160802, "grad_norm": 6.208967974543009, "learning_rate": 7.78330773542747e-06, "loss": 17.4756, "step": 18203 }, { "epoch": 0.3327544921125267, "grad_norm": 7.052348690253904, "learning_rate": 7.783061822107637e-06, "loss": 17.6805, "step": 18204 }, { "epoch": 0.33277277130897326, "grad_norm": 8.153610445614932, "learning_rate": 7.782815899033415e-06, "loss": 18.003, "step": 18205 }, { "epoch": 0.3327910505054198, "grad_norm": 6.652723108848424, "learning_rate": 7.782569966205664e-06, "loss": 17.6864, "step": 18206 }, { "epoch": 0.33280932970186633, "grad_norm": 7.3738920706327615, "learning_rate": 7.782324023625247e-06, "loss": 17.7472, "step": 18207 }, { "epoch": 0.3328276088983128, "grad_norm": 5.475350667601087, "learning_rate": 7.782078071293026e-06, "loss": 17.132, "step": 18208 }, { "epoch": 0.33284588809475935, "grad_norm": 6.62703705335512, "learning_rate": 7.781832109209864e-06, "loss": 17.2494, "step": 18209 }, { "epoch": 0.3328641672912059, "grad_norm": 5.713141076718879, "learning_rate": 7.78158613737662e-06, "loss": 17.2371, "step": 18210 }, { "epoch": 0.3328824464876524, "grad_norm": 5.889611034039129, "learning_rate": 7.781340155794159e-06, "loss": 17.2982, "step": 18211 }, { "epoch": 0.33290072568409895, "grad_norm": 6.832460942448049, "learning_rate": 7.78109416446334e-06, "loss": 17.4735, "step": 18212 }, { "epoch": 0.33291900488054543, "grad_norm": 7.148140644955894, "learning_rate": 7.78084816338503e-06, "loss": 17.9011, "step": 18213 }, { "epoch": 0.33293728407699197, "grad_norm": 6.981509351000349, "learning_rate": 7.780602152560089e-06, "loss": 17.4855, "step": 18214 }, { "epoch": 0.3329555632734385, "grad_norm": 5.84717279478008, "learning_rate": 7.780356131989375e-06, "loss": 17.1238, "step": 18215 }, { "epoch": 0.33297384246988504, "grad_norm": 6.5923059057619104, "learning_rate": 7.780110101673758e-06, "loss": 17.6541, "step": 18216 }, { "epoch": 0.3329921216663316, "grad_norm": 7.385514389434467, "learning_rate": 7.779864061614094e-06, "loss": 17.5661, "step": 18217 }, { "epoch": 0.33301040086277806, "grad_norm": 6.295521554971102, "learning_rate": 7.779618011811248e-06, "loss": 17.6539, "step": 18218 }, { "epoch": 0.3330286800592246, "grad_norm": 6.767173193562697, "learning_rate": 7.779371952266082e-06, "loss": 17.3043, "step": 18219 }, { "epoch": 0.3330469592556711, "grad_norm": 5.589786289988833, "learning_rate": 7.779125882979458e-06, "loss": 17.3223, "step": 18220 }, { "epoch": 0.33306523845211766, "grad_norm": 7.416346672681761, "learning_rate": 7.778879803952242e-06, "loss": 17.8023, "step": 18221 }, { "epoch": 0.33308351764856414, "grad_norm": 5.774653161170778, "learning_rate": 7.77863371518529e-06, "loss": 17.1647, "step": 18222 }, { "epoch": 0.3331017968450107, "grad_norm": 7.831824156077151, "learning_rate": 7.77838761667947e-06, "loss": 17.9389, "step": 18223 }, { "epoch": 0.3331200760414572, "grad_norm": 5.982720017870738, "learning_rate": 7.778141508435641e-06, "loss": 17.2224, "step": 18224 }, { "epoch": 0.33313835523790375, "grad_norm": 7.700784768558035, "learning_rate": 7.777895390454669e-06, "loss": 17.8543, "step": 18225 }, { "epoch": 0.3331566344343503, "grad_norm": 8.116481306875674, "learning_rate": 7.777649262737412e-06, "loss": 18.4105, "step": 18226 }, { "epoch": 0.33317491363079677, "grad_norm": 6.267324299825966, "learning_rate": 7.777403125284737e-06, "loss": 17.1391, "step": 18227 }, { "epoch": 0.3331931928272433, "grad_norm": 6.52838322101029, "learning_rate": 7.777156978097505e-06, "loss": 17.8091, "step": 18228 }, { "epoch": 0.33321147202368984, "grad_norm": 6.199658199303566, "learning_rate": 7.776910821176578e-06, "loss": 17.317, "step": 18229 }, { "epoch": 0.33322975122013637, "grad_norm": 7.757411540484732, "learning_rate": 7.77666465452282e-06, "loss": 18.0858, "step": 18230 }, { "epoch": 0.3332480304165829, "grad_norm": 6.168299341975133, "learning_rate": 7.776418478137095e-06, "loss": 17.5393, "step": 18231 }, { "epoch": 0.3332663096130294, "grad_norm": 6.299672138464206, "learning_rate": 7.776172292020262e-06, "loss": 17.2522, "step": 18232 }, { "epoch": 0.3332845888094759, "grad_norm": 5.580060240216484, "learning_rate": 7.775926096173187e-06, "loss": 17.2081, "step": 18233 }, { "epoch": 0.33330286800592246, "grad_norm": 6.557004094769657, "learning_rate": 7.775679890596731e-06, "loss": 17.5013, "step": 18234 }, { "epoch": 0.333321147202369, "grad_norm": 8.150564436436557, "learning_rate": 7.77543367529176e-06, "loss": 17.8301, "step": 18235 }, { "epoch": 0.33333942639881553, "grad_norm": 6.586964449226526, "learning_rate": 7.775187450259132e-06, "loss": 17.5818, "step": 18236 }, { "epoch": 0.333357705595262, "grad_norm": 5.918227388140869, "learning_rate": 7.774941215499715e-06, "loss": 17.1627, "step": 18237 }, { "epoch": 0.33337598479170855, "grad_norm": 5.814865474293571, "learning_rate": 7.774694971014366e-06, "loss": 17.4967, "step": 18238 }, { "epoch": 0.3333942639881551, "grad_norm": 6.3666921474352005, "learning_rate": 7.774448716803957e-06, "loss": 17.9214, "step": 18239 }, { "epoch": 0.3334125431846016, "grad_norm": 5.805887642846262, "learning_rate": 7.774202452869343e-06, "loss": 17.3668, "step": 18240 }, { "epoch": 0.33343082238104815, "grad_norm": 6.024066042154094, "learning_rate": 7.773956179211392e-06, "loss": 17.3197, "step": 18241 }, { "epoch": 0.33344910157749463, "grad_norm": 6.4865101154431555, "learning_rate": 7.773709895830964e-06, "loss": 17.2924, "step": 18242 }, { "epoch": 0.33346738077394117, "grad_norm": 6.695215422941599, "learning_rate": 7.773463602728923e-06, "loss": 17.669, "step": 18243 }, { "epoch": 0.3334856599703877, "grad_norm": 6.694233011803048, "learning_rate": 7.773217299906134e-06, "loss": 17.7665, "step": 18244 }, { "epoch": 0.33350393916683424, "grad_norm": 5.820866749295414, "learning_rate": 7.772970987363458e-06, "loss": 17.1573, "step": 18245 }, { "epoch": 0.3335222183632808, "grad_norm": 6.240684066895211, "learning_rate": 7.772724665101761e-06, "loss": 17.4504, "step": 18246 }, { "epoch": 0.33354049755972726, "grad_norm": 6.125521233582662, "learning_rate": 7.772478333121904e-06, "loss": 17.2956, "step": 18247 }, { "epoch": 0.3335587767561738, "grad_norm": 7.79943240506785, "learning_rate": 7.77223199142475e-06, "loss": 18.0713, "step": 18248 }, { "epoch": 0.3335770559526203, "grad_norm": 8.366673337653188, "learning_rate": 7.771985640011163e-06, "loss": 18.4496, "step": 18249 }, { "epoch": 0.33359533514906686, "grad_norm": 7.069364827523432, "learning_rate": 7.771739278882009e-06, "loss": 17.8833, "step": 18250 }, { "epoch": 0.3336136143455134, "grad_norm": 5.865046767256241, "learning_rate": 7.771492908038147e-06, "loss": 17.2996, "step": 18251 }, { "epoch": 0.3336318935419599, "grad_norm": 7.32448449067599, "learning_rate": 7.771246527480446e-06, "loss": 17.7859, "step": 18252 }, { "epoch": 0.3336501727384064, "grad_norm": 6.180994553510096, "learning_rate": 7.771000137209763e-06, "loss": 17.5074, "step": 18253 }, { "epoch": 0.33366845193485295, "grad_norm": 6.730369171626773, "learning_rate": 7.770753737226965e-06, "loss": 17.6742, "step": 18254 }, { "epoch": 0.3336867311312995, "grad_norm": 7.494552997378219, "learning_rate": 7.77050732753292e-06, "loss": 17.7578, "step": 18255 }, { "epoch": 0.33370501032774597, "grad_norm": 6.277815491673119, "learning_rate": 7.770260908128481e-06, "loss": 17.4783, "step": 18256 }, { "epoch": 0.3337232895241925, "grad_norm": 6.2061717937115715, "learning_rate": 7.770014479014523e-06, "loss": 17.5916, "step": 18257 }, { "epoch": 0.33374156872063904, "grad_norm": 7.7996121002910055, "learning_rate": 7.769768040191904e-06, "loss": 18.2667, "step": 18258 }, { "epoch": 0.3337598479170856, "grad_norm": 8.257618892263123, "learning_rate": 7.769521591661487e-06, "loss": 18.329, "step": 18259 }, { "epoch": 0.3337781271135321, "grad_norm": 7.268356090039945, "learning_rate": 7.769275133424135e-06, "loss": 18.227, "step": 18260 }, { "epoch": 0.3337964063099786, "grad_norm": 6.236784550460681, "learning_rate": 7.769028665480718e-06, "loss": 17.495, "step": 18261 }, { "epoch": 0.3338146855064251, "grad_norm": 5.826446793854041, "learning_rate": 7.768782187832094e-06, "loss": 17.0138, "step": 18262 }, { "epoch": 0.33383296470287166, "grad_norm": 5.045773038751963, "learning_rate": 7.768535700479128e-06, "loss": 17.0794, "step": 18263 }, { "epoch": 0.3338512438993182, "grad_norm": 6.339256647183979, "learning_rate": 7.768289203422685e-06, "loss": 17.3311, "step": 18264 }, { "epoch": 0.33386952309576473, "grad_norm": 6.1668381429627415, "learning_rate": 7.768042696663629e-06, "loss": 17.2583, "step": 18265 }, { "epoch": 0.3338878022922112, "grad_norm": 6.779129137618555, "learning_rate": 7.767796180202823e-06, "loss": 17.6344, "step": 18266 }, { "epoch": 0.33390608148865775, "grad_norm": 7.8809435520336955, "learning_rate": 7.767549654041132e-06, "loss": 17.9554, "step": 18267 }, { "epoch": 0.3339243606851043, "grad_norm": 7.234332435835093, "learning_rate": 7.767303118179422e-06, "loss": 17.8294, "step": 18268 }, { "epoch": 0.3339426398815508, "grad_norm": 5.744768987872191, "learning_rate": 7.76705657261855e-06, "loss": 17.2128, "step": 18269 }, { "epoch": 0.33396091907799735, "grad_norm": 7.181589881128322, "learning_rate": 7.766810017359387e-06, "loss": 17.7517, "step": 18270 }, { "epoch": 0.33397919827444383, "grad_norm": 7.6324903882375645, "learning_rate": 7.766563452402796e-06, "loss": 18.0289, "step": 18271 }, { "epoch": 0.33399747747089037, "grad_norm": 7.045548758291174, "learning_rate": 7.766316877749641e-06, "loss": 17.5929, "step": 18272 }, { "epoch": 0.3340157566673369, "grad_norm": 5.48986111322997, "learning_rate": 7.766070293400783e-06, "loss": 17.0748, "step": 18273 }, { "epoch": 0.33403403586378344, "grad_norm": 6.950841627839012, "learning_rate": 7.765823699357089e-06, "loss": 17.7548, "step": 18274 }, { "epoch": 0.33405231506023, "grad_norm": 7.359007079523373, "learning_rate": 7.765577095619423e-06, "loss": 17.6903, "step": 18275 }, { "epoch": 0.33407059425667646, "grad_norm": 6.8479912684776725, "learning_rate": 7.765330482188649e-06, "loss": 17.1306, "step": 18276 }, { "epoch": 0.334088873453123, "grad_norm": 7.270761509348165, "learning_rate": 7.765083859065631e-06, "loss": 17.8807, "step": 18277 }, { "epoch": 0.3341071526495695, "grad_norm": 6.29190187688376, "learning_rate": 7.764837226251237e-06, "loss": 17.6153, "step": 18278 }, { "epoch": 0.33412543184601606, "grad_norm": 6.576013848161763, "learning_rate": 7.764590583746328e-06, "loss": 17.4124, "step": 18279 }, { "epoch": 0.3341437110424626, "grad_norm": 6.887204970856277, "learning_rate": 7.764343931551765e-06, "loss": 17.745, "step": 18280 }, { "epoch": 0.3341619902389091, "grad_norm": 7.233508854667866, "learning_rate": 7.76409726966842e-06, "loss": 18.0759, "step": 18281 }, { "epoch": 0.3341802694353556, "grad_norm": 7.782221930944987, "learning_rate": 7.763850598097154e-06, "loss": 18.2327, "step": 18282 }, { "epoch": 0.33419854863180215, "grad_norm": 5.774946535351223, "learning_rate": 7.76360391683883e-06, "loss": 17.1124, "step": 18283 }, { "epoch": 0.3342168278282487, "grad_norm": 7.142259606946519, "learning_rate": 7.763357225894314e-06, "loss": 17.9635, "step": 18284 }, { "epoch": 0.3342351070246952, "grad_norm": 6.552254980874395, "learning_rate": 7.763110525264471e-06, "loss": 17.6825, "step": 18285 }, { "epoch": 0.3342533862211417, "grad_norm": 7.166106033103709, "learning_rate": 7.762863814950165e-06, "loss": 17.816, "step": 18286 }, { "epoch": 0.33427166541758824, "grad_norm": 6.908900588446828, "learning_rate": 7.762617094952262e-06, "loss": 18.2036, "step": 18287 }, { "epoch": 0.3342899446140348, "grad_norm": 7.4750743296947375, "learning_rate": 7.762370365271625e-06, "loss": 18.0611, "step": 18288 }, { "epoch": 0.3343082238104813, "grad_norm": 6.420174060591953, "learning_rate": 7.76212362590912e-06, "loss": 17.6022, "step": 18289 }, { "epoch": 0.3343265030069278, "grad_norm": 6.725124424115284, "learning_rate": 7.761876876865612e-06, "loss": 17.3032, "step": 18290 }, { "epoch": 0.3343447822033743, "grad_norm": 6.210333346303373, "learning_rate": 7.761630118141966e-06, "loss": 17.2584, "step": 18291 }, { "epoch": 0.33436306139982086, "grad_norm": 7.46718203655446, "learning_rate": 7.761383349739045e-06, "loss": 18.3899, "step": 18292 }, { "epoch": 0.3343813405962674, "grad_norm": 5.809202179301146, "learning_rate": 7.761136571657714e-06, "loss": 17.3044, "step": 18293 }, { "epoch": 0.33439961979271393, "grad_norm": 7.969665938088734, "learning_rate": 7.760889783898839e-06, "loss": 18.0716, "step": 18294 }, { "epoch": 0.3344178989891604, "grad_norm": 7.580848556915434, "learning_rate": 7.760642986463284e-06, "loss": 18.0002, "step": 18295 }, { "epoch": 0.33443617818560695, "grad_norm": 5.325450503699952, "learning_rate": 7.760396179351919e-06, "loss": 17.0275, "step": 18296 }, { "epoch": 0.3344544573820535, "grad_norm": 7.614931020495053, "learning_rate": 7.760149362565602e-06, "loss": 18.0945, "step": 18297 }, { "epoch": 0.3344727365785, "grad_norm": 6.624587523393241, "learning_rate": 7.7599025361052e-06, "loss": 17.1637, "step": 18298 }, { "epoch": 0.33449101577494655, "grad_norm": 6.067403262655608, "learning_rate": 7.759655699971581e-06, "loss": 17.3723, "step": 18299 }, { "epoch": 0.33450929497139303, "grad_norm": 7.451357832709255, "learning_rate": 7.759408854165608e-06, "loss": 17.8072, "step": 18300 }, { "epoch": 0.33452757416783957, "grad_norm": 5.248124535174485, "learning_rate": 7.759161998688145e-06, "loss": 16.988, "step": 18301 }, { "epoch": 0.3345458533642861, "grad_norm": 5.894392508469301, "learning_rate": 7.758915133540059e-06, "loss": 17.2724, "step": 18302 }, { "epoch": 0.33456413256073264, "grad_norm": 8.490995476710541, "learning_rate": 7.758668258722217e-06, "loss": 17.5968, "step": 18303 }, { "epoch": 0.3345824117571792, "grad_norm": 6.3621774839835705, "learning_rate": 7.758421374235481e-06, "loss": 17.3985, "step": 18304 }, { "epoch": 0.33460069095362566, "grad_norm": 6.348004245125481, "learning_rate": 7.758174480080717e-06, "loss": 17.4257, "step": 18305 }, { "epoch": 0.3346189701500722, "grad_norm": 6.258615994091, "learning_rate": 7.75792757625879e-06, "loss": 17.6457, "step": 18306 }, { "epoch": 0.33463724934651873, "grad_norm": 6.986781705578218, "learning_rate": 7.757680662770568e-06, "loss": 17.6274, "step": 18307 }, { "epoch": 0.33465552854296526, "grad_norm": 6.621352092591684, "learning_rate": 7.757433739616913e-06, "loss": 17.8456, "step": 18308 }, { "epoch": 0.3346738077394118, "grad_norm": 5.42694561819176, "learning_rate": 7.757186806798693e-06, "loss": 17.1311, "step": 18309 }, { "epoch": 0.3346920869358583, "grad_norm": 6.145164943286487, "learning_rate": 7.756939864316773e-06, "loss": 17.3195, "step": 18310 }, { "epoch": 0.3347103661323048, "grad_norm": 6.99898461330753, "learning_rate": 7.756692912172017e-06, "loss": 17.8293, "step": 18311 }, { "epoch": 0.33472864532875135, "grad_norm": 7.338612922482191, "learning_rate": 7.756445950365292e-06, "loss": 17.5407, "step": 18312 }, { "epoch": 0.3347469245251979, "grad_norm": 6.485677018272688, "learning_rate": 7.756198978897463e-06, "loss": 17.7522, "step": 18313 }, { "epoch": 0.3347652037216444, "grad_norm": 7.882693990131259, "learning_rate": 7.755951997769395e-06, "loss": 17.9094, "step": 18314 }, { "epoch": 0.3347834829180909, "grad_norm": 6.346007494365597, "learning_rate": 7.755705006981955e-06, "loss": 17.3265, "step": 18315 }, { "epoch": 0.33480176211453744, "grad_norm": 6.206414249888963, "learning_rate": 7.75545800653601e-06, "loss": 17.2567, "step": 18316 }, { "epoch": 0.334820041310984, "grad_norm": 6.55249888096095, "learning_rate": 7.755210996432421e-06, "loss": 17.4971, "step": 18317 }, { "epoch": 0.3348383205074305, "grad_norm": 6.964432634711099, "learning_rate": 7.754963976672056e-06, "loss": 17.7342, "step": 18318 }, { "epoch": 0.33485659970387704, "grad_norm": 7.916931176316435, "learning_rate": 7.754716947255784e-06, "loss": 17.8617, "step": 18319 }, { "epoch": 0.3348748789003235, "grad_norm": 6.461639288916398, "learning_rate": 7.754469908184467e-06, "loss": 17.347, "step": 18320 }, { "epoch": 0.33489315809677006, "grad_norm": 5.057605319358128, "learning_rate": 7.754222859458973e-06, "loss": 16.8883, "step": 18321 }, { "epoch": 0.3349114372932166, "grad_norm": 6.832876065541013, "learning_rate": 7.753975801080165e-06, "loss": 17.5447, "step": 18322 }, { "epoch": 0.33492971648966313, "grad_norm": 6.864591411239356, "learning_rate": 7.753728733048911e-06, "loss": 17.6954, "step": 18323 }, { "epoch": 0.3349479956861096, "grad_norm": 6.001523010916419, "learning_rate": 7.753481655366077e-06, "loss": 17.3145, "step": 18324 }, { "epoch": 0.33496627488255615, "grad_norm": 5.667306709259325, "learning_rate": 7.75323456803253e-06, "loss": 17.0702, "step": 18325 }, { "epoch": 0.3349845540790027, "grad_norm": 6.639306807494685, "learning_rate": 7.752987471049133e-06, "loss": 17.4848, "step": 18326 }, { "epoch": 0.3350028332754492, "grad_norm": 6.2005682970654865, "learning_rate": 7.752740364416756e-06, "loss": 17.4231, "step": 18327 }, { "epoch": 0.33502111247189575, "grad_norm": 6.852004423139621, "learning_rate": 7.752493248136262e-06, "loss": 17.8458, "step": 18328 }, { "epoch": 0.33503939166834223, "grad_norm": 6.242489731981976, "learning_rate": 7.752246122208515e-06, "loss": 17.3899, "step": 18329 }, { "epoch": 0.33505767086478877, "grad_norm": 6.761276698732926, "learning_rate": 7.751998986634388e-06, "loss": 17.3753, "step": 18330 }, { "epoch": 0.3350759500612353, "grad_norm": 8.4697101881472, "learning_rate": 7.751751841414742e-06, "loss": 18.2433, "step": 18331 }, { "epoch": 0.33509422925768184, "grad_norm": 5.98340590415751, "learning_rate": 7.751504686550444e-06, "loss": 17.3087, "step": 18332 }, { "epoch": 0.3351125084541284, "grad_norm": 4.63306568016994, "learning_rate": 7.75125752204236e-06, "loss": 16.8643, "step": 18333 }, { "epoch": 0.33513078765057486, "grad_norm": 6.874276635523743, "learning_rate": 7.751010347891361e-06, "loss": 17.6956, "step": 18334 }, { "epoch": 0.3351490668470214, "grad_norm": 7.056705825769277, "learning_rate": 7.750763164098308e-06, "loss": 18.1054, "step": 18335 }, { "epoch": 0.33516734604346793, "grad_norm": 6.022894070645962, "learning_rate": 7.750515970664066e-06, "loss": 17.2611, "step": 18336 }, { "epoch": 0.33518562523991446, "grad_norm": 6.4601713256358595, "learning_rate": 7.750268767589507e-06, "loss": 17.4091, "step": 18337 }, { "epoch": 0.335203904436361, "grad_norm": 7.631234410012835, "learning_rate": 7.750021554875493e-06, "loss": 18.0535, "step": 18338 }, { "epoch": 0.3352221836328075, "grad_norm": 6.5660887452535865, "learning_rate": 7.749774332522894e-06, "loss": 17.5306, "step": 18339 }, { "epoch": 0.335240462829254, "grad_norm": 6.38797809805494, "learning_rate": 7.749527100532572e-06, "loss": 17.5954, "step": 18340 }, { "epoch": 0.33525874202570055, "grad_norm": 6.6464765680954505, "learning_rate": 7.749279858905398e-06, "loss": 17.9632, "step": 18341 }, { "epoch": 0.3352770212221471, "grad_norm": 5.948301317242268, "learning_rate": 7.749032607642237e-06, "loss": 17.1922, "step": 18342 }, { "epoch": 0.3352953004185936, "grad_norm": 5.775275108449763, "learning_rate": 7.748785346743955e-06, "loss": 17.3549, "step": 18343 }, { "epoch": 0.3353135796150401, "grad_norm": 5.930169817187332, "learning_rate": 7.748538076211418e-06, "loss": 17.362, "step": 18344 }, { "epoch": 0.33533185881148664, "grad_norm": 6.230980678229091, "learning_rate": 7.748290796045493e-06, "loss": 17.4745, "step": 18345 }, { "epoch": 0.3353501380079332, "grad_norm": 6.814774819011009, "learning_rate": 7.74804350624705e-06, "loss": 17.7425, "step": 18346 }, { "epoch": 0.3353684172043797, "grad_norm": 7.52974247559615, "learning_rate": 7.74779620681695e-06, "loss": 17.9699, "step": 18347 }, { "epoch": 0.33538669640082625, "grad_norm": 6.217013645338138, "learning_rate": 7.747548897756063e-06, "loss": 17.175, "step": 18348 }, { "epoch": 0.3354049755972727, "grad_norm": 6.00741862194668, "learning_rate": 7.747301579065256e-06, "loss": 17.1926, "step": 18349 }, { "epoch": 0.33542325479371926, "grad_norm": 6.8412301633342105, "learning_rate": 7.747054250745396e-06, "loss": 17.6055, "step": 18350 }, { "epoch": 0.3354415339901658, "grad_norm": 6.638726934444085, "learning_rate": 7.746806912797349e-06, "loss": 17.6016, "step": 18351 }, { "epoch": 0.33545981318661233, "grad_norm": 7.583313276990292, "learning_rate": 7.746559565221983e-06, "loss": 18.0188, "step": 18352 }, { "epoch": 0.33547809238305887, "grad_norm": 7.2380370876110645, "learning_rate": 7.746312208020164e-06, "loss": 18.0414, "step": 18353 }, { "epoch": 0.33549637157950535, "grad_norm": 7.097301550884373, "learning_rate": 7.746064841192757e-06, "loss": 17.6619, "step": 18354 }, { "epoch": 0.3355146507759519, "grad_norm": 6.277958463243101, "learning_rate": 7.745817464740633e-06, "loss": 17.5832, "step": 18355 }, { "epoch": 0.3355329299723984, "grad_norm": 6.045685301953929, "learning_rate": 7.745570078664655e-06, "loss": 17.2695, "step": 18356 }, { "epoch": 0.33555120916884495, "grad_norm": 7.749337621853986, "learning_rate": 7.745322682965693e-06, "loss": 17.9238, "step": 18357 }, { "epoch": 0.33556948836529144, "grad_norm": 7.15648722064878, "learning_rate": 7.745075277644615e-06, "loss": 18.0479, "step": 18358 }, { "epoch": 0.33558776756173797, "grad_norm": 6.481833282438767, "learning_rate": 7.744827862702284e-06, "loss": 17.5936, "step": 18359 }, { "epoch": 0.3356060467581845, "grad_norm": 7.27669269267047, "learning_rate": 7.744580438139571e-06, "loss": 17.9004, "step": 18360 }, { "epoch": 0.33562432595463104, "grad_norm": 6.198573375094083, "learning_rate": 7.744333003957341e-06, "loss": 17.412, "step": 18361 }, { "epoch": 0.3356426051510776, "grad_norm": 6.3126170267845465, "learning_rate": 7.744085560156462e-06, "loss": 17.565, "step": 18362 }, { "epoch": 0.33566088434752406, "grad_norm": 6.722386020348753, "learning_rate": 7.743838106737802e-06, "loss": 17.2823, "step": 18363 }, { "epoch": 0.3356791635439706, "grad_norm": 8.532039182448148, "learning_rate": 7.743590643702227e-06, "loss": 18.4358, "step": 18364 }, { "epoch": 0.33569744274041713, "grad_norm": 7.019269928101385, "learning_rate": 7.743343171050604e-06, "loss": 17.7279, "step": 18365 }, { "epoch": 0.33571572193686366, "grad_norm": 6.2363754879825795, "learning_rate": 7.743095688783803e-06, "loss": 17.4868, "step": 18366 }, { "epoch": 0.3357340011333102, "grad_norm": 6.547608551667183, "learning_rate": 7.742848196902688e-06, "loss": 17.6869, "step": 18367 }, { "epoch": 0.3357522803297567, "grad_norm": 6.524650132021672, "learning_rate": 7.742600695408128e-06, "loss": 17.6394, "step": 18368 }, { "epoch": 0.3357705595262032, "grad_norm": 7.580916121783052, "learning_rate": 7.742353184300992e-06, "loss": 17.504, "step": 18369 }, { "epoch": 0.33578883872264975, "grad_norm": 7.134162781073809, "learning_rate": 7.742105663582145e-06, "loss": 17.9606, "step": 18370 }, { "epoch": 0.3358071179190963, "grad_norm": 6.742971936651327, "learning_rate": 7.741858133252456e-06, "loss": 17.4696, "step": 18371 }, { "epoch": 0.3358253971155428, "grad_norm": 5.756909069664118, "learning_rate": 7.741610593312794e-06, "loss": 17.2435, "step": 18372 }, { "epoch": 0.3358436763119893, "grad_norm": 6.655582882159083, "learning_rate": 7.741363043764023e-06, "loss": 17.3407, "step": 18373 }, { "epoch": 0.33586195550843584, "grad_norm": 5.649762551970552, "learning_rate": 7.741115484607011e-06, "loss": 17.1598, "step": 18374 }, { "epoch": 0.3358802347048824, "grad_norm": 5.345426745039483, "learning_rate": 7.74086791584263e-06, "loss": 17.0716, "step": 18375 }, { "epoch": 0.3358985139013289, "grad_norm": 6.107630936871481, "learning_rate": 7.740620337471743e-06, "loss": 17.4788, "step": 18376 }, { "epoch": 0.33591679309777545, "grad_norm": 6.792138438895729, "learning_rate": 7.74037274949522e-06, "loss": 17.4798, "step": 18377 }, { "epoch": 0.3359350722942219, "grad_norm": 5.853056285933311, "learning_rate": 7.740125151913927e-06, "loss": 17.2303, "step": 18378 }, { "epoch": 0.33595335149066846, "grad_norm": 6.464523229272147, "learning_rate": 7.739877544728734e-06, "loss": 17.5476, "step": 18379 }, { "epoch": 0.335971630687115, "grad_norm": 8.422027448603725, "learning_rate": 7.739629927940508e-06, "loss": 18.2603, "step": 18380 }, { "epoch": 0.33598990988356153, "grad_norm": 6.288018775709371, "learning_rate": 7.739382301550117e-06, "loss": 17.6214, "step": 18381 }, { "epoch": 0.33600818908000807, "grad_norm": 6.475994648492222, "learning_rate": 7.73913466555843e-06, "loss": 17.4556, "step": 18382 }, { "epoch": 0.33602646827645455, "grad_norm": 5.701573680999045, "learning_rate": 7.738887019966312e-06, "loss": 17.106, "step": 18383 }, { "epoch": 0.3360447474729011, "grad_norm": 7.0780042943270125, "learning_rate": 7.738639364774633e-06, "loss": 17.6859, "step": 18384 }, { "epoch": 0.3360630266693476, "grad_norm": 7.001881184847415, "learning_rate": 7.73839169998426e-06, "loss": 18.0078, "step": 18385 }, { "epoch": 0.33608130586579416, "grad_norm": 7.605615146116568, "learning_rate": 7.738144025596063e-06, "loss": 18.2703, "step": 18386 }, { "epoch": 0.3360995850622407, "grad_norm": 6.939675288723311, "learning_rate": 7.737896341610908e-06, "loss": 17.482, "step": 18387 }, { "epoch": 0.33611786425868717, "grad_norm": 6.544616756770822, "learning_rate": 7.737648648029664e-06, "loss": 17.7247, "step": 18388 }, { "epoch": 0.3361361434551337, "grad_norm": 5.560292907012687, "learning_rate": 7.737400944853201e-06, "loss": 17.3386, "step": 18389 }, { "epoch": 0.33615442265158024, "grad_norm": 6.072849495003731, "learning_rate": 7.737153232082383e-06, "loss": 17.3119, "step": 18390 }, { "epoch": 0.3361727018480268, "grad_norm": 7.224700545550032, "learning_rate": 7.73690550971808e-06, "loss": 17.9829, "step": 18391 }, { "epoch": 0.33619098104447326, "grad_norm": 7.312227418371407, "learning_rate": 7.736657777761164e-06, "loss": 17.7287, "step": 18392 }, { "epoch": 0.3362092602409198, "grad_norm": 8.927086043083467, "learning_rate": 7.736410036212497e-06, "loss": 18.3927, "step": 18393 }, { "epoch": 0.33622753943736633, "grad_norm": 6.14345139295062, "learning_rate": 7.73616228507295e-06, "loss": 17.3107, "step": 18394 }, { "epoch": 0.33624581863381287, "grad_norm": 8.08745349602045, "learning_rate": 7.735914524343393e-06, "loss": 18.4474, "step": 18395 }, { "epoch": 0.3362640978302594, "grad_norm": 7.518355698459249, "learning_rate": 7.735666754024692e-06, "loss": 18.0125, "step": 18396 }, { "epoch": 0.3362823770267059, "grad_norm": 7.670188487304936, "learning_rate": 7.735418974117716e-06, "loss": 18.2755, "step": 18397 }, { "epoch": 0.3363006562231524, "grad_norm": 7.363685793543698, "learning_rate": 7.735171184623336e-06, "loss": 17.5832, "step": 18398 }, { "epoch": 0.33631893541959895, "grad_norm": 7.797323413026644, "learning_rate": 7.734923385542417e-06, "loss": 18.2299, "step": 18399 }, { "epoch": 0.3363372146160455, "grad_norm": 6.175064421290564, "learning_rate": 7.734675576875828e-06, "loss": 17.5104, "step": 18400 }, { "epoch": 0.336355493812492, "grad_norm": 6.925918699168115, "learning_rate": 7.73442775862444e-06, "loss": 17.8916, "step": 18401 }, { "epoch": 0.3363737730089385, "grad_norm": 6.313385351587444, "learning_rate": 7.734179930789119e-06, "loss": 17.4044, "step": 18402 }, { "epoch": 0.33639205220538504, "grad_norm": 6.600660588623588, "learning_rate": 7.733932093370735e-06, "loss": 17.4565, "step": 18403 }, { "epoch": 0.3364103314018316, "grad_norm": 6.081680885442603, "learning_rate": 7.733684246370156e-06, "loss": 17.4435, "step": 18404 }, { "epoch": 0.3364286105982781, "grad_norm": 6.066761271916142, "learning_rate": 7.733436389788252e-06, "loss": 17.3945, "step": 18405 }, { "epoch": 0.33644688979472465, "grad_norm": 5.79555519497916, "learning_rate": 7.73318852362589e-06, "loss": 17.3575, "step": 18406 }, { "epoch": 0.3364651689911711, "grad_norm": 7.031841822332528, "learning_rate": 7.732940647883939e-06, "loss": 17.3679, "step": 18407 }, { "epoch": 0.33648344818761766, "grad_norm": 5.652500007778597, "learning_rate": 7.732692762563267e-06, "loss": 17.395, "step": 18408 }, { "epoch": 0.3365017273840642, "grad_norm": 7.356023526358955, "learning_rate": 7.732444867664746e-06, "loss": 18.0134, "step": 18409 }, { "epoch": 0.33652000658051073, "grad_norm": 6.5753188549896775, "learning_rate": 7.732196963189243e-06, "loss": 17.7415, "step": 18410 }, { "epoch": 0.33653828577695727, "grad_norm": 6.340180504691334, "learning_rate": 7.731949049137627e-06, "loss": 17.2667, "step": 18411 }, { "epoch": 0.33655656497340375, "grad_norm": 6.867459569486875, "learning_rate": 7.731701125510764e-06, "loss": 17.7586, "step": 18412 }, { "epoch": 0.3365748441698503, "grad_norm": 9.345006651836984, "learning_rate": 7.731453192309529e-06, "loss": 18.6728, "step": 18413 }, { "epoch": 0.3365931233662968, "grad_norm": 6.909630227126561, "learning_rate": 7.731205249534785e-06, "loss": 17.6986, "step": 18414 }, { "epoch": 0.33661140256274336, "grad_norm": 7.7838930350249935, "learning_rate": 7.730957297187403e-06, "loss": 17.6255, "step": 18415 }, { "epoch": 0.3366296817591899, "grad_norm": 6.265011349717121, "learning_rate": 7.730709335268256e-06, "loss": 17.4874, "step": 18416 }, { "epoch": 0.33664796095563637, "grad_norm": 8.02861059803375, "learning_rate": 7.730461363778206e-06, "loss": 18.3071, "step": 18417 }, { "epoch": 0.3366662401520829, "grad_norm": 5.596711905591294, "learning_rate": 7.730213382718129e-06, "loss": 17.1153, "step": 18418 }, { "epoch": 0.33668451934852944, "grad_norm": 7.972988658366918, "learning_rate": 7.72996539208889e-06, "loss": 18.1299, "step": 18419 }, { "epoch": 0.336702798544976, "grad_norm": 8.120943580975988, "learning_rate": 7.729717391891358e-06, "loss": 18.3888, "step": 18420 }, { "epoch": 0.3367210777414225, "grad_norm": 6.731044068945436, "learning_rate": 7.729469382126405e-06, "loss": 17.5747, "step": 18421 }, { "epoch": 0.336739356937869, "grad_norm": 7.289569065858472, "learning_rate": 7.729221362794897e-06, "loss": 17.9513, "step": 18422 }, { "epoch": 0.33675763613431553, "grad_norm": 5.743911337736639, "learning_rate": 7.728973333897707e-06, "loss": 17.2202, "step": 18423 }, { "epoch": 0.33677591533076207, "grad_norm": 6.662068868190871, "learning_rate": 7.728725295435701e-06, "loss": 17.6026, "step": 18424 }, { "epoch": 0.3367941945272086, "grad_norm": 9.093729319320211, "learning_rate": 7.72847724740975e-06, "loss": 18.9263, "step": 18425 }, { "epoch": 0.3368124737236551, "grad_norm": 6.455217427397297, "learning_rate": 7.728229189820721e-06, "loss": 17.7361, "step": 18426 }, { "epoch": 0.3368307529201016, "grad_norm": 7.146877314145098, "learning_rate": 7.72798112266949e-06, "loss": 17.7652, "step": 18427 }, { "epoch": 0.33684903211654815, "grad_norm": 7.404027428581642, "learning_rate": 7.727733045956919e-06, "loss": 17.6531, "step": 18428 }, { "epoch": 0.3368673113129947, "grad_norm": 6.469316020147925, "learning_rate": 7.72748495968388e-06, "loss": 17.7893, "step": 18429 }, { "epoch": 0.3368855905094412, "grad_norm": 7.740519309697861, "learning_rate": 7.727236863851243e-06, "loss": 17.9784, "step": 18430 }, { "epoch": 0.3369038697058877, "grad_norm": 7.096828766894045, "learning_rate": 7.726988758459877e-06, "loss": 17.7158, "step": 18431 }, { "epoch": 0.33692214890233424, "grad_norm": 6.821265096886873, "learning_rate": 7.726740643510654e-06, "loss": 17.6382, "step": 18432 }, { "epoch": 0.3369404280987808, "grad_norm": 7.026946958908574, "learning_rate": 7.72649251900444e-06, "loss": 18.1321, "step": 18433 }, { "epoch": 0.3369587072952273, "grad_norm": 6.347502405851839, "learning_rate": 7.726244384942108e-06, "loss": 17.4784, "step": 18434 }, { "epoch": 0.33697698649167385, "grad_norm": 5.277570150694714, "learning_rate": 7.725996241324524e-06, "loss": 17.0442, "step": 18435 }, { "epoch": 0.3369952656881203, "grad_norm": 5.521964833824245, "learning_rate": 7.725748088152561e-06, "loss": 17.1314, "step": 18436 }, { "epoch": 0.33701354488456686, "grad_norm": 7.197662323340944, "learning_rate": 7.725499925427086e-06, "loss": 17.6402, "step": 18437 }, { "epoch": 0.3370318240810134, "grad_norm": 6.006474924625444, "learning_rate": 7.725251753148972e-06, "loss": 17.4828, "step": 18438 }, { "epoch": 0.33705010327745993, "grad_norm": 5.059700720043585, "learning_rate": 7.725003571319086e-06, "loss": 16.9115, "step": 18439 }, { "epoch": 0.33706838247390647, "grad_norm": 6.864063054870559, "learning_rate": 7.7247553799383e-06, "loss": 17.577, "step": 18440 }, { "epoch": 0.33708666167035295, "grad_norm": 6.52387554693364, "learning_rate": 7.724507179007484e-06, "loss": 17.5513, "step": 18441 }, { "epoch": 0.3371049408667995, "grad_norm": 5.43570866174775, "learning_rate": 7.724258968527503e-06, "loss": 17.2563, "step": 18442 }, { "epoch": 0.337123220063246, "grad_norm": 7.3973909691611395, "learning_rate": 7.724010748499232e-06, "loss": 17.9174, "step": 18443 }, { "epoch": 0.33714149925969256, "grad_norm": 6.778630546710854, "learning_rate": 7.72376251892354e-06, "loss": 17.6066, "step": 18444 }, { "epoch": 0.3371597784561391, "grad_norm": 5.961436839492804, "learning_rate": 7.723514279801298e-06, "loss": 17.3852, "step": 18445 }, { "epoch": 0.33717805765258557, "grad_norm": 7.062129788464218, "learning_rate": 7.723266031133373e-06, "loss": 17.9826, "step": 18446 }, { "epoch": 0.3371963368490321, "grad_norm": 7.432303147374123, "learning_rate": 7.72301777292064e-06, "loss": 17.7383, "step": 18447 }, { "epoch": 0.33721461604547864, "grad_norm": 7.742647669234327, "learning_rate": 7.722769505163963e-06, "loss": 18.1091, "step": 18448 }, { "epoch": 0.3372328952419252, "grad_norm": 7.893233484369064, "learning_rate": 7.722521227864216e-06, "loss": 18.2455, "step": 18449 }, { "epoch": 0.3372511744383717, "grad_norm": 7.142750209709399, "learning_rate": 7.722272941022268e-06, "loss": 17.8689, "step": 18450 }, { "epoch": 0.3372694536348182, "grad_norm": 7.571102108735381, "learning_rate": 7.72202464463899e-06, "loss": 18.0523, "step": 18451 }, { "epoch": 0.33728773283126473, "grad_norm": 7.237628522781843, "learning_rate": 7.721776338715252e-06, "loss": 17.7742, "step": 18452 }, { "epoch": 0.33730601202771127, "grad_norm": 8.014417650877423, "learning_rate": 7.721528023251924e-06, "loss": 18.1643, "step": 18453 }, { "epoch": 0.3373242912241578, "grad_norm": 5.603909557078515, "learning_rate": 7.721279698249878e-06, "loss": 17.1089, "step": 18454 }, { "epoch": 0.33734257042060434, "grad_norm": 8.166787237695967, "learning_rate": 7.72103136370998e-06, "loss": 18.3125, "step": 18455 }, { "epoch": 0.3373608496170508, "grad_norm": 7.54873919290871, "learning_rate": 7.720783019633103e-06, "loss": 17.8197, "step": 18456 }, { "epoch": 0.33737912881349735, "grad_norm": 5.9202994619846, "learning_rate": 7.720534666020119e-06, "loss": 17.3548, "step": 18457 }, { "epoch": 0.3373974080099439, "grad_norm": 6.260292847986377, "learning_rate": 7.720286302871899e-06, "loss": 17.5368, "step": 18458 }, { "epoch": 0.3374156872063904, "grad_norm": 5.300475724027984, "learning_rate": 7.720037930189308e-06, "loss": 17.0986, "step": 18459 }, { "epoch": 0.3374339664028369, "grad_norm": 8.137722655610194, "learning_rate": 7.719789547973222e-06, "loss": 18.1161, "step": 18460 }, { "epoch": 0.33745224559928344, "grad_norm": 6.534889342544086, "learning_rate": 7.719541156224509e-06, "loss": 17.3363, "step": 18461 }, { "epoch": 0.33747052479573, "grad_norm": 6.652638240476339, "learning_rate": 7.71929275494404e-06, "loss": 17.3188, "step": 18462 }, { "epoch": 0.3374888039921765, "grad_norm": 6.130865054693788, "learning_rate": 7.719044344132687e-06, "loss": 17.2704, "step": 18463 }, { "epoch": 0.33750708318862305, "grad_norm": 6.9481372715060985, "learning_rate": 7.718795923791318e-06, "loss": 17.679, "step": 18464 }, { "epoch": 0.3375253623850695, "grad_norm": 6.577487798955367, "learning_rate": 7.718547493920808e-06, "loss": 17.4429, "step": 18465 }, { "epoch": 0.33754364158151606, "grad_norm": 6.712775286732302, "learning_rate": 7.718299054522023e-06, "loss": 17.616, "step": 18466 }, { "epoch": 0.3375619207779626, "grad_norm": 6.309615343063586, "learning_rate": 7.718050605595834e-06, "loss": 17.5656, "step": 18467 }, { "epoch": 0.33758019997440913, "grad_norm": 7.976188969781821, "learning_rate": 7.717802147143116e-06, "loss": 17.9397, "step": 18468 }, { "epoch": 0.33759847917085567, "grad_norm": 6.056843727085684, "learning_rate": 7.717553679164736e-06, "loss": 17.6374, "step": 18469 }, { "epoch": 0.33761675836730215, "grad_norm": 5.79688689723586, "learning_rate": 7.717305201661567e-06, "loss": 17.2079, "step": 18470 }, { "epoch": 0.3376350375637487, "grad_norm": 8.007233412783789, "learning_rate": 7.717056714634478e-06, "loss": 18.2403, "step": 18471 }, { "epoch": 0.3376533167601952, "grad_norm": 8.340269402764084, "learning_rate": 7.716808218084344e-06, "loss": 18.3294, "step": 18472 }, { "epoch": 0.33767159595664176, "grad_norm": 5.789205177314982, "learning_rate": 7.716559712012029e-06, "loss": 17.2842, "step": 18473 }, { "epoch": 0.3376898751530883, "grad_norm": 6.961346009129132, "learning_rate": 7.716311196418409e-06, "loss": 18.0157, "step": 18474 }, { "epoch": 0.3377081543495348, "grad_norm": 6.816075453202713, "learning_rate": 7.716062671304356e-06, "loss": 17.5105, "step": 18475 }, { "epoch": 0.3377264335459813, "grad_norm": 7.1519747347555525, "learning_rate": 7.715814136670738e-06, "loss": 17.6979, "step": 18476 }, { "epoch": 0.33774471274242784, "grad_norm": 7.397342958436656, "learning_rate": 7.715565592518426e-06, "loss": 18.2724, "step": 18477 }, { "epoch": 0.3377629919388744, "grad_norm": 7.2484185914721415, "learning_rate": 7.715317038848294e-06, "loss": 18.0584, "step": 18478 }, { "epoch": 0.3377812711353209, "grad_norm": 6.753753332473047, "learning_rate": 7.71506847566121e-06, "loss": 17.6873, "step": 18479 }, { "epoch": 0.3377995503317674, "grad_norm": 5.910713944546503, "learning_rate": 7.714819902958047e-06, "loss": 17.2845, "step": 18480 }, { "epoch": 0.33781782952821393, "grad_norm": 6.040055410360147, "learning_rate": 7.714571320739674e-06, "loss": 17.6741, "step": 18481 }, { "epoch": 0.33783610872466047, "grad_norm": 6.16224240871509, "learning_rate": 7.714322729006968e-06, "loss": 17.4705, "step": 18482 }, { "epoch": 0.337854387921107, "grad_norm": 6.786275507486063, "learning_rate": 7.714074127760793e-06, "loss": 17.6599, "step": 18483 }, { "epoch": 0.33787266711755354, "grad_norm": 5.052748271895253, "learning_rate": 7.713825517002025e-06, "loss": 16.891, "step": 18484 }, { "epoch": 0.337890946314, "grad_norm": 6.548327809552189, "learning_rate": 7.713576896731534e-06, "loss": 17.8533, "step": 18485 }, { "epoch": 0.33790922551044655, "grad_norm": 6.809651591047302, "learning_rate": 7.713328266950192e-06, "loss": 17.5008, "step": 18486 }, { "epoch": 0.3379275047068931, "grad_norm": 6.811358222407411, "learning_rate": 7.71307962765887e-06, "loss": 17.4791, "step": 18487 }, { "epoch": 0.3379457839033396, "grad_norm": 6.732612926720186, "learning_rate": 7.712830978858437e-06, "loss": 17.6251, "step": 18488 }, { "epoch": 0.33796406309978616, "grad_norm": 6.0661694502438674, "learning_rate": 7.71258232054977e-06, "loss": 17.433, "step": 18489 }, { "epoch": 0.33798234229623264, "grad_norm": 7.741720293416946, "learning_rate": 7.712333652733736e-06, "loss": 17.7191, "step": 18490 }, { "epoch": 0.3380006214926792, "grad_norm": 5.94148528406997, "learning_rate": 7.712084975411207e-06, "loss": 17.4314, "step": 18491 }, { "epoch": 0.3380189006891257, "grad_norm": 5.503236881276483, "learning_rate": 7.711836288583056e-06, "loss": 17.3177, "step": 18492 }, { "epoch": 0.33803717988557225, "grad_norm": 6.227555450615774, "learning_rate": 7.711587592250157e-06, "loss": 17.5322, "step": 18493 }, { "epoch": 0.3380554590820187, "grad_norm": 6.5307639282098044, "learning_rate": 7.711338886413375e-06, "loss": 17.6392, "step": 18494 }, { "epoch": 0.33807373827846526, "grad_norm": 8.25031259660908, "learning_rate": 7.711090171073585e-06, "loss": 18.2677, "step": 18495 }, { "epoch": 0.3380920174749118, "grad_norm": 7.919697683464512, "learning_rate": 7.710841446231662e-06, "loss": 17.9129, "step": 18496 }, { "epoch": 0.33811029667135833, "grad_norm": 6.672298055295145, "learning_rate": 7.710592711888473e-06, "loss": 17.546, "step": 18497 }, { "epoch": 0.33812857586780487, "grad_norm": 5.8147908282243925, "learning_rate": 7.710343968044893e-06, "loss": 17.1957, "step": 18498 }, { "epoch": 0.33814685506425135, "grad_norm": 7.871496156274192, "learning_rate": 7.710095214701792e-06, "loss": 17.8094, "step": 18499 }, { "epoch": 0.3381651342606979, "grad_norm": 8.4109188978225, "learning_rate": 7.709846451860044e-06, "loss": 18.0162, "step": 18500 }, { "epoch": 0.3381834134571444, "grad_norm": 5.2937680106235705, "learning_rate": 7.709597679520517e-06, "loss": 17.0771, "step": 18501 }, { "epoch": 0.33820169265359096, "grad_norm": 6.379321046963044, "learning_rate": 7.709348897684087e-06, "loss": 17.7056, "step": 18502 }, { "epoch": 0.3382199718500375, "grad_norm": 7.265112552027738, "learning_rate": 7.709100106351624e-06, "loss": 17.5608, "step": 18503 }, { "epoch": 0.338238251046484, "grad_norm": 6.226972182198487, "learning_rate": 7.708851305523999e-06, "loss": 17.03, "step": 18504 }, { "epoch": 0.3382565302429305, "grad_norm": 6.515094573049991, "learning_rate": 7.708602495202085e-06, "loss": 17.4419, "step": 18505 }, { "epoch": 0.33827480943937704, "grad_norm": 5.828203314056155, "learning_rate": 7.708353675386756e-06, "loss": 17.4523, "step": 18506 }, { "epoch": 0.3382930886358236, "grad_norm": 7.433264374469968, "learning_rate": 7.70810484607888e-06, "loss": 17.8551, "step": 18507 }, { "epoch": 0.3383113678322701, "grad_norm": 6.452021201753046, "learning_rate": 7.707856007279336e-06, "loss": 17.5515, "step": 18508 }, { "epoch": 0.3383296470287166, "grad_norm": 6.927308323353563, "learning_rate": 7.707607158988989e-06, "loss": 17.6572, "step": 18509 }, { "epoch": 0.33834792622516313, "grad_norm": 7.59364801967737, "learning_rate": 7.707358301208711e-06, "loss": 18.0751, "step": 18510 }, { "epoch": 0.33836620542160967, "grad_norm": 8.481590889263641, "learning_rate": 7.70710943393938e-06, "loss": 18.4459, "step": 18511 }, { "epoch": 0.3383844846180562, "grad_norm": 6.218440313046197, "learning_rate": 7.706860557181865e-06, "loss": 17.4924, "step": 18512 }, { "epoch": 0.33840276381450274, "grad_norm": 7.873215963953774, "learning_rate": 7.70661167093704e-06, "loss": 17.8562, "step": 18513 }, { "epoch": 0.3384210430109492, "grad_norm": 5.718721767784067, "learning_rate": 7.706362775205775e-06, "loss": 17.4298, "step": 18514 }, { "epoch": 0.33843932220739575, "grad_norm": 6.855804305720729, "learning_rate": 7.706113869988942e-06, "loss": 17.7369, "step": 18515 }, { "epoch": 0.3384576014038423, "grad_norm": 6.285208999963029, "learning_rate": 7.705864955287417e-06, "loss": 17.4317, "step": 18516 }, { "epoch": 0.3384758806002888, "grad_norm": 6.876274856684372, "learning_rate": 7.705616031102067e-06, "loss": 17.7482, "step": 18517 }, { "epoch": 0.33849415979673536, "grad_norm": 6.551674808949516, "learning_rate": 7.70536709743377e-06, "loss": 17.5721, "step": 18518 }, { "epoch": 0.33851243899318184, "grad_norm": 7.1792626870622795, "learning_rate": 7.705118154283395e-06, "loss": 17.81, "step": 18519 }, { "epoch": 0.3385307181896284, "grad_norm": 6.532119534364582, "learning_rate": 7.704869201651817e-06, "loss": 17.4447, "step": 18520 }, { "epoch": 0.3385489973860749, "grad_norm": 7.836061959256069, "learning_rate": 7.704620239539907e-06, "loss": 18.0364, "step": 18521 }, { "epoch": 0.33856727658252145, "grad_norm": 7.278440436296791, "learning_rate": 7.704371267948537e-06, "loss": 17.9914, "step": 18522 }, { "epoch": 0.338585555778968, "grad_norm": 9.443688365625592, "learning_rate": 7.704122286878582e-06, "loss": 19.0627, "step": 18523 }, { "epoch": 0.33860383497541446, "grad_norm": 6.7285697217231775, "learning_rate": 7.703873296330911e-06, "loss": 17.7257, "step": 18524 }, { "epoch": 0.338622114171861, "grad_norm": 6.832079061969127, "learning_rate": 7.703624296306398e-06, "loss": 17.8432, "step": 18525 }, { "epoch": 0.33864039336830754, "grad_norm": 7.372634881950305, "learning_rate": 7.703375286805918e-06, "loss": 18.1717, "step": 18526 }, { "epoch": 0.33865867256475407, "grad_norm": 6.671263012834701, "learning_rate": 7.703126267830341e-06, "loss": 17.832, "step": 18527 }, { "epoch": 0.33867695176120055, "grad_norm": 8.064692606205229, "learning_rate": 7.702877239380541e-06, "loss": 18.3962, "step": 18528 }, { "epoch": 0.3386952309576471, "grad_norm": 5.9140910909686655, "learning_rate": 7.702628201457393e-06, "loss": 17.543, "step": 18529 }, { "epoch": 0.3387135101540936, "grad_norm": 7.536298645706425, "learning_rate": 7.702379154061766e-06, "loss": 18.0075, "step": 18530 }, { "epoch": 0.33873178935054016, "grad_norm": 6.903730155055247, "learning_rate": 7.702130097194536e-06, "loss": 17.737, "step": 18531 }, { "epoch": 0.3387500685469867, "grad_norm": 6.892784737799518, "learning_rate": 7.701881030856573e-06, "loss": 17.8239, "step": 18532 }, { "epoch": 0.3387683477434332, "grad_norm": 7.989731384844175, "learning_rate": 7.701631955048751e-06, "loss": 18.1867, "step": 18533 }, { "epoch": 0.3387866269398797, "grad_norm": 6.75314577527341, "learning_rate": 7.701382869771944e-06, "loss": 17.6316, "step": 18534 }, { "epoch": 0.33880490613632624, "grad_norm": 6.425552269052934, "learning_rate": 7.701133775027026e-06, "loss": 17.7064, "step": 18535 }, { "epoch": 0.3388231853327728, "grad_norm": 6.475145211434733, "learning_rate": 7.700884670814867e-06, "loss": 17.5869, "step": 18536 }, { "epoch": 0.3388414645292193, "grad_norm": 7.979887064014127, "learning_rate": 7.700635557136342e-06, "loss": 17.9246, "step": 18537 }, { "epoch": 0.3388597437256658, "grad_norm": 7.1966159183684155, "learning_rate": 7.700386433992325e-06, "loss": 17.6055, "step": 18538 }, { "epoch": 0.33887802292211233, "grad_norm": 5.8361565872888335, "learning_rate": 7.700137301383685e-06, "loss": 17.3603, "step": 18539 }, { "epoch": 0.33889630211855887, "grad_norm": 6.800728664016093, "learning_rate": 7.6998881593113e-06, "loss": 17.8276, "step": 18540 }, { "epoch": 0.3389145813150054, "grad_norm": 6.969027315962583, "learning_rate": 7.69963900777604e-06, "loss": 18.0461, "step": 18541 }, { "epoch": 0.33893286051145194, "grad_norm": 7.748699637922752, "learning_rate": 7.699389846778781e-06, "loss": 18.1258, "step": 18542 }, { "epoch": 0.3389511397078984, "grad_norm": 7.299936552345248, "learning_rate": 7.699140676320394e-06, "loss": 17.5742, "step": 18543 }, { "epoch": 0.33896941890434495, "grad_norm": 5.406616937955294, "learning_rate": 7.698891496401753e-06, "loss": 17.1552, "step": 18544 }, { "epoch": 0.3389876981007915, "grad_norm": 6.105185122653779, "learning_rate": 7.698642307023732e-06, "loss": 17.3877, "step": 18545 }, { "epoch": 0.339005977297238, "grad_norm": 6.070445457980878, "learning_rate": 7.698393108187203e-06, "loss": 17.433, "step": 18546 }, { "epoch": 0.33902425649368456, "grad_norm": 7.327904658034851, "learning_rate": 7.698143899893042e-06, "loss": 18.2264, "step": 18547 }, { "epoch": 0.33904253569013104, "grad_norm": 5.811399519134845, "learning_rate": 7.697894682142119e-06, "loss": 17.2927, "step": 18548 }, { "epoch": 0.3390608148865776, "grad_norm": 8.371710481814059, "learning_rate": 7.697645454935311e-06, "loss": 18.5259, "step": 18549 }, { "epoch": 0.3390790940830241, "grad_norm": 6.162236942618617, "learning_rate": 7.697396218273488e-06, "loss": 17.4977, "step": 18550 }, { "epoch": 0.33909737327947065, "grad_norm": 5.667929160973008, "learning_rate": 7.697146972157527e-06, "loss": 17.2238, "step": 18551 }, { "epoch": 0.3391156524759172, "grad_norm": 6.3630864987232725, "learning_rate": 7.696897716588299e-06, "loss": 17.5051, "step": 18552 }, { "epoch": 0.33913393167236366, "grad_norm": 5.547956065392964, "learning_rate": 7.696648451566678e-06, "loss": 17.2606, "step": 18553 }, { "epoch": 0.3391522108688102, "grad_norm": 5.693365817578991, "learning_rate": 7.696399177093539e-06, "loss": 17.2777, "step": 18554 }, { "epoch": 0.33917049006525674, "grad_norm": 7.699578961192763, "learning_rate": 7.696149893169756e-06, "loss": 18.1077, "step": 18555 }, { "epoch": 0.33918876926170327, "grad_norm": 7.370075956908817, "learning_rate": 7.6959005997962e-06, "loss": 17.7865, "step": 18556 }, { "epoch": 0.3392070484581498, "grad_norm": 6.320802877160347, "learning_rate": 7.695651296973745e-06, "loss": 17.6819, "step": 18557 }, { "epoch": 0.3392253276545963, "grad_norm": 7.172380059789501, "learning_rate": 7.695401984703269e-06, "loss": 17.9064, "step": 18558 }, { "epoch": 0.3392436068510428, "grad_norm": 5.8204236789504575, "learning_rate": 7.695152662985641e-06, "loss": 17.3202, "step": 18559 }, { "epoch": 0.33926188604748936, "grad_norm": 6.671811365467112, "learning_rate": 7.694903331821736e-06, "loss": 17.4557, "step": 18560 }, { "epoch": 0.3392801652439359, "grad_norm": 5.506696943152934, "learning_rate": 7.694653991212431e-06, "loss": 17.1238, "step": 18561 }, { "epoch": 0.3392984444403824, "grad_norm": 6.6946185875147926, "learning_rate": 7.694404641158597e-06, "loss": 18.0571, "step": 18562 }, { "epoch": 0.3393167236368289, "grad_norm": 5.7400999723436295, "learning_rate": 7.694155281661108e-06, "loss": 17.1185, "step": 18563 }, { "epoch": 0.33933500283327545, "grad_norm": 4.560350280967302, "learning_rate": 7.69390591272084e-06, "loss": 16.7462, "step": 18564 }, { "epoch": 0.339353282029722, "grad_norm": 6.779489856870191, "learning_rate": 7.693656534338665e-06, "loss": 17.9682, "step": 18565 }, { "epoch": 0.3393715612261685, "grad_norm": 6.055479736217653, "learning_rate": 7.693407146515455e-06, "loss": 17.3872, "step": 18566 }, { "epoch": 0.339389840422615, "grad_norm": 7.063314999171396, "learning_rate": 7.693157749252089e-06, "loss": 17.8399, "step": 18567 }, { "epoch": 0.33940811961906153, "grad_norm": 6.485390388814588, "learning_rate": 7.692908342549439e-06, "loss": 17.3437, "step": 18568 }, { "epoch": 0.33942639881550807, "grad_norm": 7.67912906130451, "learning_rate": 7.692658926408378e-06, "loss": 18.3269, "step": 18569 }, { "epoch": 0.3394446780119546, "grad_norm": 6.66874415783317, "learning_rate": 7.692409500829781e-06, "loss": 17.6165, "step": 18570 }, { "epoch": 0.33946295720840114, "grad_norm": 5.525745694311952, "learning_rate": 7.692160065814522e-06, "loss": 16.9587, "step": 18571 }, { "epoch": 0.3394812364048476, "grad_norm": 6.5776478903528135, "learning_rate": 7.691910621363479e-06, "loss": 17.9475, "step": 18572 }, { "epoch": 0.33949951560129416, "grad_norm": 6.735479974909261, "learning_rate": 7.691661167477519e-06, "loss": 17.3685, "step": 18573 }, { "epoch": 0.3395177947977407, "grad_norm": 6.103561192639743, "learning_rate": 7.691411704157521e-06, "loss": 17.369, "step": 18574 }, { "epoch": 0.3395360739941872, "grad_norm": 5.988450917113393, "learning_rate": 7.69116223140436e-06, "loss": 17.2464, "step": 18575 }, { "epoch": 0.33955435319063376, "grad_norm": 5.907624695401685, "learning_rate": 7.690912749218908e-06, "loss": 17.3719, "step": 18576 }, { "epoch": 0.33957263238708024, "grad_norm": 6.418615476685507, "learning_rate": 7.69066325760204e-06, "loss": 17.6757, "step": 18577 }, { "epoch": 0.3395909115835268, "grad_norm": 6.390374719797928, "learning_rate": 7.69041375655463e-06, "loss": 17.4991, "step": 18578 }, { "epoch": 0.3396091907799733, "grad_norm": 7.513673674936005, "learning_rate": 7.690164246077553e-06, "loss": 17.6994, "step": 18579 }, { "epoch": 0.33962746997641985, "grad_norm": 7.370235617724177, "learning_rate": 7.689914726171685e-06, "loss": 17.7465, "step": 18580 }, { "epoch": 0.3396457491728664, "grad_norm": 5.776963263010866, "learning_rate": 7.6896651968379e-06, "loss": 17.1914, "step": 18581 }, { "epoch": 0.33966402836931286, "grad_norm": 7.480861992419118, "learning_rate": 7.689415658077074e-06, "loss": 18.0238, "step": 18582 }, { "epoch": 0.3396823075657594, "grad_norm": 7.51474730627037, "learning_rate": 7.689166109890073e-06, "loss": 17.8101, "step": 18583 }, { "epoch": 0.33970058676220594, "grad_norm": 5.94084192894617, "learning_rate": 7.688916552277783e-06, "loss": 17.2131, "step": 18584 }, { "epoch": 0.33971886595865247, "grad_norm": 5.7606416869487616, "learning_rate": 7.688666985241074e-06, "loss": 17.5015, "step": 18585 }, { "epoch": 0.339737145155099, "grad_norm": 7.528853867532281, "learning_rate": 7.688417408780817e-06, "loss": 18.0463, "step": 18586 }, { "epoch": 0.3397554243515455, "grad_norm": 6.374259758307279, "learning_rate": 7.688167822897894e-06, "loss": 17.3193, "step": 18587 }, { "epoch": 0.339773703547992, "grad_norm": 7.187857336463657, "learning_rate": 7.687918227593174e-06, "loss": 17.9579, "step": 18588 }, { "epoch": 0.33979198274443856, "grad_norm": 6.771650370125248, "learning_rate": 7.687668622867535e-06, "loss": 17.7377, "step": 18589 }, { "epoch": 0.3398102619408851, "grad_norm": 6.478670704204277, "learning_rate": 7.687419008721848e-06, "loss": 17.6423, "step": 18590 }, { "epoch": 0.33982854113733163, "grad_norm": 5.801317083485911, "learning_rate": 7.687169385156994e-06, "loss": 17.2358, "step": 18591 }, { "epoch": 0.3398468203337781, "grad_norm": 5.778391106065269, "learning_rate": 7.686919752173842e-06, "loss": 17.1861, "step": 18592 }, { "epoch": 0.33986509953022465, "grad_norm": 5.4130668068485495, "learning_rate": 7.686670109773271e-06, "loss": 17.1547, "step": 18593 }, { "epoch": 0.3398833787266712, "grad_norm": 7.179857244364473, "learning_rate": 7.686420457956153e-06, "loss": 18.2276, "step": 18594 }, { "epoch": 0.3399016579231177, "grad_norm": 7.545135015636312, "learning_rate": 7.686170796723365e-06, "loss": 17.9744, "step": 18595 }, { "epoch": 0.3399199371195642, "grad_norm": 5.992206441478063, "learning_rate": 7.685921126075783e-06, "loss": 17.4739, "step": 18596 }, { "epoch": 0.33993821631601073, "grad_norm": 7.753257465418856, "learning_rate": 7.685671446014281e-06, "loss": 17.8907, "step": 18597 }, { "epoch": 0.33995649551245727, "grad_norm": 5.220488384198819, "learning_rate": 7.68542175653973e-06, "loss": 17.2379, "step": 18598 }, { "epoch": 0.3399747747089038, "grad_norm": 6.756246510999741, "learning_rate": 7.68517205765301e-06, "loss": 17.9904, "step": 18599 }, { "epoch": 0.33999305390535034, "grad_norm": 6.6665878497981295, "learning_rate": 7.684922349354997e-06, "loss": 17.666, "step": 18600 }, { "epoch": 0.3400113331017968, "grad_norm": 6.7635936038448685, "learning_rate": 7.684672631646561e-06, "loss": 17.6601, "step": 18601 }, { "epoch": 0.34002961229824336, "grad_norm": 6.019484555741808, "learning_rate": 7.684422904528584e-06, "loss": 17.4302, "step": 18602 }, { "epoch": 0.3400478914946899, "grad_norm": 6.587729455162058, "learning_rate": 7.684173168001936e-06, "loss": 17.6979, "step": 18603 }, { "epoch": 0.3400661706911364, "grad_norm": 6.695329373211095, "learning_rate": 7.683923422067492e-06, "loss": 17.7751, "step": 18604 }, { "epoch": 0.34008444988758296, "grad_norm": 5.599427833123826, "learning_rate": 7.683673666726133e-06, "loss": 17.3286, "step": 18605 }, { "epoch": 0.34010272908402944, "grad_norm": 6.655312439568152, "learning_rate": 7.683423901978729e-06, "loss": 17.6809, "step": 18606 }, { "epoch": 0.340121008280476, "grad_norm": 5.213666939523555, "learning_rate": 7.683174127826156e-06, "loss": 17.0193, "step": 18607 }, { "epoch": 0.3401392874769225, "grad_norm": 6.082647708732131, "learning_rate": 7.682924344269292e-06, "loss": 17.4358, "step": 18608 }, { "epoch": 0.34015756667336905, "grad_norm": 6.728008690159649, "learning_rate": 7.682674551309008e-06, "loss": 17.8505, "step": 18609 }, { "epoch": 0.3401758458698156, "grad_norm": 6.666065553449126, "learning_rate": 7.682424748946187e-06, "loss": 17.8174, "step": 18610 }, { "epoch": 0.34019412506626207, "grad_norm": 5.802111281189128, "learning_rate": 7.682174937181696e-06, "loss": 17.2524, "step": 18611 }, { "epoch": 0.3402124042627086, "grad_norm": 7.474876302335836, "learning_rate": 7.681925116016417e-06, "loss": 18.1691, "step": 18612 }, { "epoch": 0.34023068345915514, "grad_norm": 6.281967462298568, "learning_rate": 7.681675285451223e-06, "loss": 17.3999, "step": 18613 }, { "epoch": 0.34024896265560167, "grad_norm": 7.110678994979965, "learning_rate": 7.68142544548699e-06, "loss": 17.7144, "step": 18614 }, { "epoch": 0.3402672418520482, "grad_norm": 6.398574156336081, "learning_rate": 7.681175596124592e-06, "loss": 17.4233, "step": 18615 }, { "epoch": 0.3402855210484947, "grad_norm": 6.289630212886747, "learning_rate": 7.680925737364908e-06, "loss": 17.3901, "step": 18616 }, { "epoch": 0.3403038002449412, "grad_norm": 5.8380846989527155, "learning_rate": 7.680675869208811e-06, "loss": 17.1604, "step": 18617 }, { "epoch": 0.34032207944138776, "grad_norm": 5.569289614347202, "learning_rate": 7.680425991657177e-06, "loss": 17.0017, "step": 18618 }, { "epoch": 0.3403403586378343, "grad_norm": 6.144307323923875, "learning_rate": 7.680176104710883e-06, "loss": 17.4247, "step": 18619 }, { "epoch": 0.34035863783428083, "grad_norm": 6.459215526872056, "learning_rate": 7.679926208370807e-06, "loss": 17.6302, "step": 18620 }, { "epoch": 0.3403769170307273, "grad_norm": 5.93392102819098, "learning_rate": 7.679676302637818e-06, "loss": 17.2792, "step": 18621 }, { "epoch": 0.34039519622717385, "grad_norm": 7.4960631324250215, "learning_rate": 7.679426387512799e-06, "loss": 17.7464, "step": 18622 }, { "epoch": 0.3404134754236204, "grad_norm": 8.31714979088079, "learning_rate": 7.679176462996622e-06, "loss": 18.3638, "step": 18623 }, { "epoch": 0.3404317546200669, "grad_norm": 5.6869215349779525, "learning_rate": 7.678926529090164e-06, "loss": 17.3644, "step": 18624 }, { "epoch": 0.34045003381651345, "grad_norm": 9.132516392454006, "learning_rate": 7.6786765857943e-06, "loss": 18.0032, "step": 18625 }, { "epoch": 0.34046831301295993, "grad_norm": 6.52480229562743, "learning_rate": 7.678426633109908e-06, "loss": 17.6034, "step": 18626 }, { "epoch": 0.34048659220940647, "grad_norm": 8.39836950754598, "learning_rate": 7.678176671037864e-06, "loss": 18.007, "step": 18627 }, { "epoch": 0.340504871405853, "grad_norm": 5.796424602940962, "learning_rate": 7.677926699579042e-06, "loss": 17.2748, "step": 18628 }, { "epoch": 0.34052315060229954, "grad_norm": 6.627960521855117, "learning_rate": 7.677676718734319e-06, "loss": 17.6418, "step": 18629 }, { "epoch": 0.340541429798746, "grad_norm": 7.443410950212804, "learning_rate": 7.677426728504572e-06, "loss": 18.0215, "step": 18630 }, { "epoch": 0.34055970899519256, "grad_norm": 6.474408461785655, "learning_rate": 7.677176728890677e-06, "loss": 17.5713, "step": 18631 }, { "epoch": 0.3405779881916391, "grad_norm": 7.853435968222706, "learning_rate": 7.676926719893509e-06, "loss": 18.0385, "step": 18632 }, { "epoch": 0.3405962673880856, "grad_norm": 7.4119833741467795, "learning_rate": 7.676676701513945e-06, "loss": 17.9976, "step": 18633 }, { "epoch": 0.34061454658453216, "grad_norm": 7.26715653733841, "learning_rate": 7.676426673752862e-06, "loss": 18.1636, "step": 18634 }, { "epoch": 0.34063282578097864, "grad_norm": 7.406503317562208, "learning_rate": 7.676176636611137e-06, "loss": 17.742, "step": 18635 }, { "epoch": 0.3406511049774252, "grad_norm": 6.3999819331489585, "learning_rate": 7.675926590089643e-06, "loss": 17.6915, "step": 18636 }, { "epoch": 0.3406693841738717, "grad_norm": 7.352261573316151, "learning_rate": 7.675676534189261e-06, "loss": 17.7272, "step": 18637 }, { "epoch": 0.34068766337031825, "grad_norm": 6.6079238737001695, "learning_rate": 7.675426468910862e-06, "loss": 17.4509, "step": 18638 }, { "epoch": 0.3407059425667648, "grad_norm": 5.703601467050324, "learning_rate": 7.675176394255326e-06, "loss": 17.4343, "step": 18639 }, { "epoch": 0.34072422176321127, "grad_norm": 7.165661530921426, "learning_rate": 7.67492631022353e-06, "loss": 17.945, "step": 18640 }, { "epoch": 0.3407425009596578, "grad_norm": 5.0921407244045165, "learning_rate": 7.67467621681635e-06, "loss": 16.9564, "step": 18641 }, { "epoch": 0.34076078015610434, "grad_norm": 7.055686350188954, "learning_rate": 7.674426114034662e-06, "loss": 18.0199, "step": 18642 }, { "epoch": 0.3407790593525509, "grad_norm": 6.803755903484869, "learning_rate": 7.67417600187934e-06, "loss": 17.4164, "step": 18643 }, { "epoch": 0.3407973385489974, "grad_norm": 7.2652829283862514, "learning_rate": 7.673925880351266e-06, "loss": 17.7704, "step": 18644 }, { "epoch": 0.3408156177454439, "grad_norm": 7.275087787321198, "learning_rate": 7.673675749451312e-06, "loss": 17.8544, "step": 18645 }, { "epoch": 0.3408338969418904, "grad_norm": 6.113747157958731, "learning_rate": 7.673425609180356e-06, "loss": 17.4012, "step": 18646 }, { "epoch": 0.34085217613833696, "grad_norm": 5.939387750543694, "learning_rate": 7.673175459539277e-06, "loss": 17.0685, "step": 18647 }, { "epoch": 0.3408704553347835, "grad_norm": 5.721475154394008, "learning_rate": 7.672925300528949e-06, "loss": 17.1819, "step": 18648 }, { "epoch": 0.34088873453123003, "grad_norm": 6.697699313686008, "learning_rate": 7.672675132150249e-06, "loss": 17.3943, "step": 18649 }, { "epoch": 0.3409070137276765, "grad_norm": 6.393508358343627, "learning_rate": 7.672424954404057e-06, "loss": 17.5696, "step": 18650 }, { "epoch": 0.34092529292412305, "grad_norm": 8.07438924970571, "learning_rate": 7.672174767291246e-06, "loss": 17.8587, "step": 18651 }, { "epoch": 0.3409435721205696, "grad_norm": 6.642235116480529, "learning_rate": 7.671924570812694e-06, "loss": 17.6111, "step": 18652 }, { "epoch": 0.3409618513170161, "grad_norm": 6.590922537754951, "learning_rate": 7.671674364969277e-06, "loss": 17.4407, "step": 18653 }, { "epoch": 0.34098013051346265, "grad_norm": 5.966974561777243, "learning_rate": 7.671424149761878e-06, "loss": 17.4277, "step": 18654 }, { "epoch": 0.34099840970990913, "grad_norm": 7.563680802508155, "learning_rate": 7.671173925191364e-06, "loss": 17.7721, "step": 18655 }, { "epoch": 0.34101668890635567, "grad_norm": 6.201240177848574, "learning_rate": 7.670923691258619e-06, "loss": 17.2041, "step": 18656 }, { "epoch": 0.3410349681028022, "grad_norm": 6.99645951902213, "learning_rate": 7.670673447964518e-06, "loss": 17.6827, "step": 18657 }, { "epoch": 0.34105324729924874, "grad_norm": 6.715923558615538, "learning_rate": 7.67042319530994e-06, "loss": 17.2222, "step": 18658 }, { "epoch": 0.3410715264956953, "grad_norm": 9.084080103438504, "learning_rate": 7.670172933295758e-06, "loss": 18.4638, "step": 18659 }, { "epoch": 0.34108980569214176, "grad_norm": 6.348419974733283, "learning_rate": 7.669922661922853e-06, "loss": 17.379, "step": 18660 }, { "epoch": 0.3411080848885883, "grad_norm": 8.42640356163809, "learning_rate": 7.6696723811921e-06, "loss": 18.2419, "step": 18661 }, { "epoch": 0.34112636408503483, "grad_norm": 6.340744017100043, "learning_rate": 7.669422091104377e-06, "loss": 17.2227, "step": 18662 }, { "epoch": 0.34114464328148136, "grad_norm": 6.583856869218941, "learning_rate": 7.669171791660562e-06, "loss": 17.5243, "step": 18663 }, { "epoch": 0.34116292247792784, "grad_norm": 6.765985311311577, "learning_rate": 7.668921482861531e-06, "loss": 17.6967, "step": 18664 }, { "epoch": 0.3411812016743744, "grad_norm": 7.855367461729398, "learning_rate": 7.668671164708163e-06, "loss": 18.0338, "step": 18665 }, { "epoch": 0.3411994808708209, "grad_norm": 5.842699478441956, "learning_rate": 7.668420837201331e-06, "loss": 17.3604, "step": 18666 }, { "epoch": 0.34121776006726745, "grad_norm": 5.647097215030044, "learning_rate": 7.668170500341918e-06, "loss": 17.3587, "step": 18667 }, { "epoch": 0.341236039263714, "grad_norm": 6.117666474122066, "learning_rate": 7.6679201541308e-06, "loss": 17.5845, "step": 18668 }, { "epoch": 0.34125431846016047, "grad_norm": 6.599540683177623, "learning_rate": 7.667669798568852e-06, "loss": 17.6067, "step": 18669 }, { "epoch": 0.341272597656607, "grad_norm": 6.819116057737852, "learning_rate": 7.667419433656953e-06, "loss": 17.8113, "step": 18670 }, { "epoch": 0.34129087685305354, "grad_norm": 6.718181840431996, "learning_rate": 7.667169059395978e-06, "loss": 17.6825, "step": 18671 }, { "epoch": 0.3413091560495001, "grad_norm": 6.805742840354403, "learning_rate": 7.66691867578681e-06, "loss": 17.7864, "step": 18672 }, { "epoch": 0.3413274352459466, "grad_norm": 7.512941050983491, "learning_rate": 7.666668282830323e-06, "loss": 18.1333, "step": 18673 }, { "epoch": 0.3413457144423931, "grad_norm": 6.473133563595723, "learning_rate": 7.666417880527395e-06, "loss": 17.5756, "step": 18674 }, { "epoch": 0.3413639936388396, "grad_norm": 6.226849326535687, "learning_rate": 7.666167468878902e-06, "loss": 17.576, "step": 18675 }, { "epoch": 0.34138227283528616, "grad_norm": 6.436126671583582, "learning_rate": 7.665917047885724e-06, "loss": 17.4472, "step": 18676 }, { "epoch": 0.3414005520317327, "grad_norm": 5.675453461661069, "learning_rate": 7.665666617548739e-06, "loss": 17.3, "step": 18677 }, { "epoch": 0.34141883122817923, "grad_norm": 6.821599837399781, "learning_rate": 7.665416177868825e-06, "loss": 17.8204, "step": 18678 }, { "epoch": 0.3414371104246257, "grad_norm": 7.783969477680731, "learning_rate": 7.665165728846857e-06, "loss": 18.1205, "step": 18679 }, { "epoch": 0.34145538962107225, "grad_norm": 6.7448408250391045, "learning_rate": 7.664915270483713e-06, "loss": 17.6446, "step": 18680 }, { "epoch": 0.3414736688175188, "grad_norm": 5.673997915867822, "learning_rate": 7.664664802780275e-06, "loss": 17.0912, "step": 18681 }, { "epoch": 0.3414919480139653, "grad_norm": 6.27327755016016, "learning_rate": 7.664414325737417e-06, "loss": 17.4395, "step": 18682 }, { "epoch": 0.34151022721041185, "grad_norm": 7.16947317453992, "learning_rate": 7.664163839356017e-06, "loss": 18.1252, "step": 18683 }, { "epoch": 0.34152850640685833, "grad_norm": 5.995978383754804, "learning_rate": 7.663913343636955e-06, "loss": 17.2703, "step": 18684 }, { "epoch": 0.34154678560330487, "grad_norm": 6.606834029652045, "learning_rate": 7.663662838581107e-06, "loss": 17.4672, "step": 18685 }, { "epoch": 0.3415650647997514, "grad_norm": 5.280484714656908, "learning_rate": 7.663412324189353e-06, "loss": 16.9884, "step": 18686 }, { "epoch": 0.34158334399619794, "grad_norm": 7.013647159691276, "learning_rate": 7.663161800462569e-06, "loss": 17.7756, "step": 18687 }, { "epoch": 0.3416016231926445, "grad_norm": 7.220249667939323, "learning_rate": 7.662911267401634e-06, "loss": 17.4257, "step": 18688 }, { "epoch": 0.34161990238909096, "grad_norm": 6.068767553887193, "learning_rate": 7.662660725007427e-06, "loss": 17.4268, "step": 18689 }, { "epoch": 0.3416381815855375, "grad_norm": 6.104009893408434, "learning_rate": 7.662410173280825e-06, "loss": 17.5837, "step": 18690 }, { "epoch": 0.34165646078198403, "grad_norm": 6.684856497049956, "learning_rate": 7.662159612222706e-06, "loss": 17.843, "step": 18691 }, { "epoch": 0.34167473997843056, "grad_norm": 6.654655871948643, "learning_rate": 7.661909041833951e-06, "loss": 17.4879, "step": 18692 }, { "epoch": 0.3416930191748771, "grad_norm": 5.581902799086931, "learning_rate": 7.661658462115431e-06, "loss": 17.2192, "step": 18693 }, { "epoch": 0.3417112983713236, "grad_norm": 6.939167635113642, "learning_rate": 7.661407873068031e-06, "loss": 17.7619, "step": 18694 }, { "epoch": 0.3417295775677701, "grad_norm": 6.485193863234857, "learning_rate": 7.661157274692628e-06, "loss": 17.4223, "step": 18695 }, { "epoch": 0.34174785676421665, "grad_norm": 6.644593736895018, "learning_rate": 7.660906666990102e-06, "loss": 17.7662, "step": 18696 }, { "epoch": 0.3417661359606632, "grad_norm": 5.50455822128651, "learning_rate": 7.660656049961326e-06, "loss": 17.0274, "step": 18697 }, { "epoch": 0.34178441515710967, "grad_norm": 6.24026387829265, "learning_rate": 7.66040542360718e-06, "loss": 17.3114, "step": 18698 }, { "epoch": 0.3418026943535562, "grad_norm": 6.941206257786055, "learning_rate": 7.660154787928546e-06, "loss": 18.088, "step": 18699 }, { "epoch": 0.34182097355000274, "grad_norm": 7.010808293183774, "learning_rate": 7.659904142926302e-06, "loss": 17.6137, "step": 18700 }, { "epoch": 0.3418392527464493, "grad_norm": 5.953407799406823, "learning_rate": 7.659653488601322e-06, "loss": 17.2847, "step": 18701 }, { "epoch": 0.3418575319428958, "grad_norm": 6.3305612939514555, "learning_rate": 7.659402824954488e-06, "loss": 17.7074, "step": 18702 }, { "epoch": 0.3418758111393423, "grad_norm": 6.868297075420189, "learning_rate": 7.659152151986679e-06, "loss": 17.529, "step": 18703 }, { "epoch": 0.3418940903357888, "grad_norm": 6.883515930230535, "learning_rate": 7.658901469698771e-06, "loss": 17.5927, "step": 18704 }, { "epoch": 0.34191236953223536, "grad_norm": 5.387613705367663, "learning_rate": 7.658650778091645e-06, "loss": 17.0186, "step": 18705 }, { "epoch": 0.3419306487286819, "grad_norm": 7.590193402357873, "learning_rate": 7.658400077166178e-06, "loss": 18.2854, "step": 18706 }, { "epoch": 0.34194892792512843, "grad_norm": 5.918681654641845, "learning_rate": 7.658149366923249e-06, "loss": 17.1875, "step": 18707 }, { "epoch": 0.3419672071215749, "grad_norm": 6.065267697725173, "learning_rate": 7.65789864736374e-06, "loss": 17.3768, "step": 18708 }, { "epoch": 0.34198548631802145, "grad_norm": 6.035988272592032, "learning_rate": 7.657647918488523e-06, "loss": 17.473, "step": 18709 }, { "epoch": 0.342003765514468, "grad_norm": 7.538219994187357, "learning_rate": 7.657397180298483e-06, "loss": 17.6449, "step": 18710 }, { "epoch": 0.3420220447109145, "grad_norm": 8.440697458735569, "learning_rate": 7.657146432794496e-06, "loss": 18.4291, "step": 18711 }, { "epoch": 0.34204032390736105, "grad_norm": 7.200546142422039, "learning_rate": 7.65689567597744e-06, "loss": 17.6517, "step": 18712 }, { "epoch": 0.34205860310380753, "grad_norm": 7.796034028720834, "learning_rate": 7.656644909848198e-06, "loss": 17.9615, "step": 18713 }, { "epoch": 0.34207688230025407, "grad_norm": 5.797347903855568, "learning_rate": 7.656394134407646e-06, "loss": 17.3534, "step": 18714 }, { "epoch": 0.3420951614967006, "grad_norm": 6.328034515254491, "learning_rate": 7.656143349656661e-06, "loss": 17.3153, "step": 18715 }, { "epoch": 0.34211344069314714, "grad_norm": 8.291240342910957, "learning_rate": 7.655892555596124e-06, "loss": 18.3262, "step": 18716 }, { "epoch": 0.3421317198895937, "grad_norm": 7.436553844779456, "learning_rate": 7.655641752226915e-06, "loss": 17.8486, "step": 18717 }, { "epoch": 0.34214999908604016, "grad_norm": 6.385686728345363, "learning_rate": 7.655390939549911e-06, "loss": 17.9412, "step": 18718 }, { "epoch": 0.3421682782824867, "grad_norm": 7.330562144678844, "learning_rate": 7.655140117565995e-06, "loss": 17.9242, "step": 18719 }, { "epoch": 0.34218655747893323, "grad_norm": 7.543387211435891, "learning_rate": 7.65488928627604e-06, "loss": 18.0056, "step": 18720 }, { "epoch": 0.34220483667537976, "grad_norm": 7.81850260618639, "learning_rate": 7.65463844568093e-06, "loss": 17.9146, "step": 18721 }, { "epoch": 0.3422231158718263, "grad_norm": 5.5160302544833995, "learning_rate": 7.654387595781542e-06, "loss": 17.0875, "step": 18722 }, { "epoch": 0.3422413950682728, "grad_norm": 6.8386345834937305, "learning_rate": 7.654136736578759e-06, "loss": 17.4698, "step": 18723 }, { "epoch": 0.3422596742647193, "grad_norm": 6.12700821599637, "learning_rate": 7.653885868073454e-06, "loss": 17.195, "step": 18724 }, { "epoch": 0.34227795346116585, "grad_norm": 5.048266219225038, "learning_rate": 7.653634990266507e-06, "loss": 17.0995, "step": 18725 }, { "epoch": 0.3422962326576124, "grad_norm": 7.152460909063966, "learning_rate": 7.653384103158802e-06, "loss": 17.6174, "step": 18726 }, { "epoch": 0.3423145118540589, "grad_norm": 5.968102892944309, "learning_rate": 7.653133206751218e-06, "loss": 17.3426, "step": 18727 }, { "epoch": 0.3423327910505054, "grad_norm": 8.25861894424999, "learning_rate": 7.65288230104463e-06, "loss": 18.1985, "step": 18728 }, { "epoch": 0.34235107024695194, "grad_norm": 7.8130738972777465, "learning_rate": 7.652631386039921e-06, "loss": 17.7325, "step": 18729 }, { "epoch": 0.3423693494433985, "grad_norm": 7.035678726252646, "learning_rate": 7.65238046173797e-06, "loss": 18.0212, "step": 18730 }, { "epoch": 0.342387628639845, "grad_norm": 6.6801113104921805, "learning_rate": 7.652129528139654e-06, "loss": 17.6916, "step": 18731 }, { "epoch": 0.3424059078362915, "grad_norm": 7.156947819969368, "learning_rate": 7.651878585245853e-06, "loss": 18.0317, "step": 18732 }, { "epoch": 0.342424187032738, "grad_norm": 7.582022893941064, "learning_rate": 7.65162763305745e-06, "loss": 17.6248, "step": 18733 }, { "epoch": 0.34244246622918456, "grad_norm": 7.9573283114692295, "learning_rate": 7.65137667157532e-06, "loss": 18.3825, "step": 18734 }, { "epoch": 0.3424607454256311, "grad_norm": 5.623300634353293, "learning_rate": 7.651125700800346e-06, "loss": 17.1096, "step": 18735 }, { "epoch": 0.34247902462207763, "grad_norm": 6.6412611985181265, "learning_rate": 7.650874720733407e-06, "loss": 17.4568, "step": 18736 }, { "epoch": 0.3424973038185241, "grad_norm": 5.04614504841508, "learning_rate": 7.650623731375381e-06, "loss": 16.9781, "step": 18737 }, { "epoch": 0.34251558301497065, "grad_norm": 6.510875943565755, "learning_rate": 7.65037273272715e-06, "loss": 17.628, "step": 18738 }, { "epoch": 0.3425338622114172, "grad_norm": 6.4333687956937515, "learning_rate": 7.650121724789592e-06, "loss": 17.8449, "step": 18739 }, { "epoch": 0.3425521414078637, "grad_norm": 6.963383029694077, "learning_rate": 7.649870707563588e-06, "loss": 17.674, "step": 18740 }, { "epoch": 0.34257042060431026, "grad_norm": 7.182635781943536, "learning_rate": 7.649619681050015e-06, "loss": 17.9128, "step": 18741 }, { "epoch": 0.34258869980075674, "grad_norm": 8.654101903838043, "learning_rate": 7.649368645249757e-06, "loss": 18.2864, "step": 18742 }, { "epoch": 0.34260697899720327, "grad_norm": 6.562410099438556, "learning_rate": 7.649117600163691e-06, "loss": 17.5895, "step": 18743 }, { "epoch": 0.3426252581936498, "grad_norm": 7.61451345911573, "learning_rate": 7.648866545792699e-06, "loss": 17.8557, "step": 18744 }, { "epoch": 0.34264353739009634, "grad_norm": 7.150796271708187, "learning_rate": 7.648615482137658e-06, "loss": 17.9896, "step": 18745 }, { "epoch": 0.3426618165865429, "grad_norm": 6.534132170946157, "learning_rate": 7.64836440919945e-06, "loss": 17.8412, "step": 18746 }, { "epoch": 0.34268009578298936, "grad_norm": 6.674528048805177, "learning_rate": 7.648113326978954e-06, "loss": 17.5252, "step": 18747 }, { "epoch": 0.3426983749794359, "grad_norm": 5.492376445780663, "learning_rate": 7.647862235477053e-06, "loss": 17.1936, "step": 18748 }, { "epoch": 0.34271665417588243, "grad_norm": 5.219124691380864, "learning_rate": 7.647611134694621e-06, "loss": 17.003, "step": 18749 }, { "epoch": 0.34273493337232896, "grad_norm": 7.492574678745267, "learning_rate": 7.647360024632544e-06, "loss": 17.9828, "step": 18750 }, { "epoch": 0.3427532125687755, "grad_norm": 5.298951135931565, "learning_rate": 7.6471089052917e-06, "loss": 16.8713, "step": 18751 }, { "epoch": 0.342771491765222, "grad_norm": 6.2397662614587945, "learning_rate": 7.646857776672968e-06, "loss": 17.5835, "step": 18752 }, { "epoch": 0.3427897709616685, "grad_norm": 5.97676886984631, "learning_rate": 7.64660663877723e-06, "loss": 17.6145, "step": 18753 }, { "epoch": 0.34280805015811505, "grad_norm": 5.447429576276824, "learning_rate": 7.646355491605367e-06, "loss": 17.1826, "step": 18754 }, { "epoch": 0.3428263293545616, "grad_norm": 5.293841771378851, "learning_rate": 7.646104335158254e-06, "loss": 17.2044, "step": 18755 }, { "epoch": 0.3428446085510081, "grad_norm": 5.762019716466999, "learning_rate": 7.645853169436777e-06, "loss": 17.3198, "step": 18756 }, { "epoch": 0.3428628877474546, "grad_norm": 6.274008540598406, "learning_rate": 7.645601994441813e-06, "loss": 17.3941, "step": 18757 }, { "epoch": 0.34288116694390114, "grad_norm": 6.880523485854208, "learning_rate": 7.645350810174245e-06, "loss": 17.8719, "step": 18758 }, { "epoch": 0.3428994461403477, "grad_norm": 8.122983849837068, "learning_rate": 7.64509961663495e-06, "loss": 17.9244, "step": 18759 }, { "epoch": 0.3429177253367942, "grad_norm": 6.032228247568009, "learning_rate": 7.644848413824812e-06, "loss": 17.1918, "step": 18760 }, { "epoch": 0.34293600453324075, "grad_norm": 6.7114781849518925, "learning_rate": 7.64459720174471e-06, "loss": 17.6641, "step": 18761 }, { "epoch": 0.3429542837296872, "grad_norm": 5.099540818065632, "learning_rate": 7.644345980395524e-06, "loss": 16.9746, "step": 18762 }, { "epoch": 0.34297256292613376, "grad_norm": 7.0862622411338645, "learning_rate": 7.644094749778134e-06, "loss": 17.9195, "step": 18763 }, { "epoch": 0.3429908421225803, "grad_norm": 6.6467858690198405, "learning_rate": 7.643843509893423e-06, "loss": 17.7148, "step": 18764 }, { "epoch": 0.34300912131902683, "grad_norm": 6.181743736946362, "learning_rate": 7.64359226074227e-06, "loss": 17.5138, "step": 18765 }, { "epoch": 0.3430274005154733, "grad_norm": 7.299730676973299, "learning_rate": 7.643341002325553e-06, "loss": 17.8965, "step": 18766 }, { "epoch": 0.34304567971191985, "grad_norm": 6.007275784693551, "learning_rate": 7.643089734644157e-06, "loss": 17.513, "step": 18767 }, { "epoch": 0.3430639589083664, "grad_norm": 6.887004519072852, "learning_rate": 7.64283845769896e-06, "loss": 17.5114, "step": 18768 }, { "epoch": 0.3430822381048129, "grad_norm": 7.213322080699672, "learning_rate": 7.642587171490846e-06, "loss": 17.6282, "step": 18769 }, { "epoch": 0.34310051730125946, "grad_norm": 7.240584778713403, "learning_rate": 7.64233587602069e-06, "loss": 17.7969, "step": 18770 }, { "epoch": 0.34311879649770594, "grad_norm": 7.131732296849313, "learning_rate": 7.642084571289376e-06, "loss": 17.5967, "step": 18771 }, { "epoch": 0.34313707569415247, "grad_norm": 5.8936483822808405, "learning_rate": 7.641833257297788e-06, "loss": 17.2392, "step": 18772 }, { "epoch": 0.343155354890599, "grad_norm": 7.3028334644621475, "learning_rate": 7.641581934046802e-06, "loss": 17.7516, "step": 18773 }, { "epoch": 0.34317363408704554, "grad_norm": 6.259976548532674, "learning_rate": 7.6413306015373e-06, "loss": 17.2937, "step": 18774 }, { "epoch": 0.3431919132834921, "grad_norm": 7.36073248310213, "learning_rate": 7.641079259770163e-06, "loss": 17.9214, "step": 18775 }, { "epoch": 0.34321019247993856, "grad_norm": 7.563428694144747, "learning_rate": 7.640827908746274e-06, "loss": 17.849, "step": 18776 }, { "epoch": 0.3432284716763851, "grad_norm": 5.960784738086114, "learning_rate": 7.640576548466512e-06, "loss": 17.4236, "step": 18777 }, { "epoch": 0.34324675087283163, "grad_norm": 6.341646836969284, "learning_rate": 7.640325178931757e-06, "loss": 17.4697, "step": 18778 }, { "epoch": 0.34326503006927817, "grad_norm": 6.240868076821658, "learning_rate": 7.640073800142892e-06, "loss": 17.0821, "step": 18779 }, { "epoch": 0.3432833092657247, "grad_norm": 6.089649298515942, "learning_rate": 7.639822412100798e-06, "loss": 17.3529, "step": 18780 }, { "epoch": 0.3433015884621712, "grad_norm": 5.793944393176278, "learning_rate": 7.639571014806356e-06, "loss": 17.3753, "step": 18781 }, { "epoch": 0.3433198676586177, "grad_norm": 6.461406824451548, "learning_rate": 7.639319608260446e-06, "loss": 17.5282, "step": 18782 }, { "epoch": 0.34333814685506425, "grad_norm": 6.998204049448984, "learning_rate": 7.63906819246395e-06, "loss": 17.8295, "step": 18783 }, { "epoch": 0.3433564260515108, "grad_norm": 8.130001995420912, "learning_rate": 7.638816767417746e-06, "loss": 18.1857, "step": 18784 }, { "epoch": 0.3433747052479573, "grad_norm": 6.0315541071302485, "learning_rate": 7.638565333122721e-06, "loss": 17.1067, "step": 18785 }, { "epoch": 0.3433929844444038, "grad_norm": 8.11687880415702, "learning_rate": 7.638313889579754e-06, "loss": 18.4706, "step": 18786 }, { "epoch": 0.34341126364085034, "grad_norm": 6.595413154594915, "learning_rate": 7.638062436789726e-06, "loss": 17.3251, "step": 18787 }, { "epoch": 0.3434295428372969, "grad_norm": 6.549313801078654, "learning_rate": 7.637810974753517e-06, "loss": 17.6947, "step": 18788 }, { "epoch": 0.3434478220337434, "grad_norm": 5.937545933649113, "learning_rate": 7.637559503472009e-06, "loss": 17.1814, "step": 18789 }, { "epoch": 0.34346610123018995, "grad_norm": 6.059549672670603, "learning_rate": 7.637308022946084e-06, "loss": 17.2336, "step": 18790 }, { "epoch": 0.3434843804266364, "grad_norm": 6.964066984769655, "learning_rate": 7.637056533176625e-06, "loss": 17.7251, "step": 18791 }, { "epoch": 0.34350265962308296, "grad_norm": 8.46609148214759, "learning_rate": 7.636805034164511e-06, "loss": 18.3873, "step": 18792 }, { "epoch": 0.3435209388195295, "grad_norm": 5.574070086308546, "learning_rate": 7.636553525910621e-06, "loss": 17.1388, "step": 18793 }, { "epoch": 0.34353921801597603, "grad_norm": 7.062336492708427, "learning_rate": 7.636302008415844e-06, "loss": 17.7763, "step": 18794 }, { "epoch": 0.34355749721242257, "grad_norm": 6.918785307699996, "learning_rate": 7.636050481681055e-06, "loss": 17.5145, "step": 18795 }, { "epoch": 0.34357577640886905, "grad_norm": 6.681621492836319, "learning_rate": 7.63579894570714e-06, "loss": 17.6554, "step": 18796 }, { "epoch": 0.3435940556053156, "grad_norm": 8.249625305925028, "learning_rate": 7.635547400494976e-06, "loss": 18.4605, "step": 18797 }, { "epoch": 0.3436123348017621, "grad_norm": 8.073382903177853, "learning_rate": 7.635295846045447e-06, "loss": 18.4458, "step": 18798 }, { "epoch": 0.34363061399820866, "grad_norm": 6.880827109514999, "learning_rate": 7.635044282359437e-06, "loss": 17.6348, "step": 18799 }, { "epoch": 0.34364889319465514, "grad_norm": 6.763693486197934, "learning_rate": 7.634792709437822e-06, "loss": 17.4167, "step": 18800 }, { "epoch": 0.34366717239110167, "grad_norm": 6.2756764284531386, "learning_rate": 7.63454112728149e-06, "loss": 17.4985, "step": 18801 }, { "epoch": 0.3436854515875482, "grad_norm": 5.653630459512056, "learning_rate": 7.634289535891319e-06, "loss": 17.3694, "step": 18802 }, { "epoch": 0.34370373078399474, "grad_norm": 7.489377884641946, "learning_rate": 7.634037935268191e-06, "loss": 17.7952, "step": 18803 }, { "epoch": 0.3437220099804413, "grad_norm": 5.682485521663398, "learning_rate": 7.63378632541299e-06, "loss": 17.2305, "step": 18804 }, { "epoch": 0.34374028917688776, "grad_norm": 6.994947420661549, "learning_rate": 7.633534706326596e-06, "loss": 17.6213, "step": 18805 }, { "epoch": 0.3437585683733343, "grad_norm": 7.111023801184497, "learning_rate": 7.633283078009892e-06, "loss": 17.9773, "step": 18806 }, { "epoch": 0.34377684756978083, "grad_norm": 9.382723296691937, "learning_rate": 7.633031440463757e-06, "loss": 18.3125, "step": 18807 }, { "epoch": 0.34379512676622737, "grad_norm": 9.081819864001538, "learning_rate": 7.632779793689077e-06, "loss": 18.6075, "step": 18808 }, { "epoch": 0.3438134059626739, "grad_norm": 7.903707217962754, "learning_rate": 7.632528137686732e-06, "loss": 17.8067, "step": 18809 }, { "epoch": 0.3438316851591204, "grad_norm": 6.8143214832862204, "learning_rate": 7.632276472457604e-06, "loss": 17.8137, "step": 18810 }, { "epoch": 0.3438499643555669, "grad_norm": 8.528808663813097, "learning_rate": 7.632024798002577e-06, "loss": 18.0433, "step": 18811 }, { "epoch": 0.34386824355201345, "grad_norm": 5.970033498778536, "learning_rate": 7.631773114322529e-06, "loss": 17.3655, "step": 18812 }, { "epoch": 0.34388652274846, "grad_norm": 7.185904186886038, "learning_rate": 7.631521421418348e-06, "loss": 17.7229, "step": 18813 }, { "epoch": 0.3439048019449065, "grad_norm": 6.002420221406456, "learning_rate": 7.63126971929091e-06, "loss": 17.6412, "step": 18814 }, { "epoch": 0.343923081141353, "grad_norm": 5.6499855524419145, "learning_rate": 7.631018007941101e-06, "loss": 17.1009, "step": 18815 }, { "epoch": 0.34394136033779954, "grad_norm": 5.970341578576657, "learning_rate": 7.6307662873698e-06, "loss": 17.3979, "step": 18816 }, { "epoch": 0.3439596395342461, "grad_norm": 5.279621444832671, "learning_rate": 7.630514557577895e-06, "loss": 17.3045, "step": 18817 }, { "epoch": 0.3439779187306926, "grad_norm": 6.6552676311017365, "learning_rate": 7.630262818566264e-06, "loss": 17.5556, "step": 18818 }, { "epoch": 0.34399619792713915, "grad_norm": 7.680684311602772, "learning_rate": 7.630011070335788e-06, "loss": 18.5134, "step": 18819 }, { "epoch": 0.3440144771235856, "grad_norm": 5.6916536488339355, "learning_rate": 7.629759312887353e-06, "loss": 17.4569, "step": 18820 }, { "epoch": 0.34403275632003216, "grad_norm": 7.07473974257135, "learning_rate": 7.62950754622184e-06, "loss": 18.1428, "step": 18821 }, { "epoch": 0.3440510355164787, "grad_norm": 6.386525174074634, "learning_rate": 7.62925577034013e-06, "loss": 17.6783, "step": 18822 }, { "epoch": 0.34406931471292523, "grad_norm": 7.169004794316508, "learning_rate": 7.629003985243108e-06, "loss": 17.8882, "step": 18823 }, { "epoch": 0.34408759390937177, "grad_norm": 6.911734582211646, "learning_rate": 7.628752190931654e-06, "loss": 18.1529, "step": 18824 }, { "epoch": 0.34410587310581825, "grad_norm": 5.576247646965134, "learning_rate": 7.628500387406652e-06, "loss": 17.0755, "step": 18825 }, { "epoch": 0.3441241523022648, "grad_norm": 7.021326916468734, "learning_rate": 7.6282485746689835e-06, "loss": 17.8135, "step": 18826 }, { "epoch": 0.3441424314987113, "grad_norm": 5.888912413575095, "learning_rate": 7.627996752719533e-06, "loss": 17.1671, "step": 18827 }, { "epoch": 0.34416071069515786, "grad_norm": 7.174716073977307, "learning_rate": 7.627744921559183e-06, "loss": 17.7786, "step": 18828 }, { "epoch": 0.3441789898916044, "grad_norm": 6.440076200999903, "learning_rate": 7.627493081188813e-06, "loss": 17.3864, "step": 18829 }, { "epoch": 0.3441972690880509, "grad_norm": 6.371314035709741, "learning_rate": 7.627241231609308e-06, "loss": 17.3974, "step": 18830 }, { "epoch": 0.3442155482844974, "grad_norm": 7.178583371291788, "learning_rate": 7.626989372821552e-06, "loss": 17.9502, "step": 18831 }, { "epoch": 0.34423382748094394, "grad_norm": 5.43836292811123, "learning_rate": 7.6267375048264245e-06, "loss": 17.1827, "step": 18832 }, { "epoch": 0.3442521066773905, "grad_norm": 6.061526697036359, "learning_rate": 7.62648562762481e-06, "loss": 17.364, "step": 18833 }, { "epoch": 0.34427038587383696, "grad_norm": 7.416400119145123, "learning_rate": 7.626233741217592e-06, "loss": 17.9628, "step": 18834 }, { "epoch": 0.3442886650702835, "grad_norm": 6.155932104665575, "learning_rate": 7.625981845605652e-06, "loss": 17.6379, "step": 18835 }, { "epoch": 0.34430694426673003, "grad_norm": 6.286729678642646, "learning_rate": 7.625729940789875e-06, "loss": 17.0942, "step": 18836 }, { "epoch": 0.34432522346317657, "grad_norm": 5.570072005269807, "learning_rate": 7.625478026771143e-06, "loss": 17.1133, "step": 18837 }, { "epoch": 0.3443435026596231, "grad_norm": 8.294658198108628, "learning_rate": 7.625226103550334e-06, "loss": 18.2501, "step": 18838 }, { "epoch": 0.3443617818560696, "grad_norm": 6.554547636739679, "learning_rate": 7.6249741711283385e-06, "loss": 17.6861, "step": 18839 }, { "epoch": 0.3443800610525161, "grad_norm": 5.81957898550504, "learning_rate": 7.624722229506036e-06, "loss": 17.3066, "step": 18840 }, { "epoch": 0.34439834024896265, "grad_norm": 6.623162130971828, "learning_rate": 7.624470278684311e-06, "loss": 17.4684, "step": 18841 }, { "epoch": 0.3444166194454092, "grad_norm": 7.694048787608055, "learning_rate": 7.624218318664044e-06, "loss": 18.2676, "step": 18842 }, { "epoch": 0.3444348986418557, "grad_norm": 6.292099350719881, "learning_rate": 7.6239663494461195e-06, "loss": 17.409, "step": 18843 }, { "epoch": 0.3444531778383022, "grad_norm": 6.2170305037463836, "learning_rate": 7.623714371031421e-06, "loss": 17.2171, "step": 18844 }, { "epoch": 0.34447145703474874, "grad_norm": 7.276994272151025, "learning_rate": 7.623462383420831e-06, "loss": 17.9421, "step": 18845 }, { "epoch": 0.3444897362311953, "grad_norm": 6.682340604025584, "learning_rate": 7.6232103866152325e-06, "loss": 17.2774, "step": 18846 }, { "epoch": 0.3445080154276418, "grad_norm": 7.655083146438742, "learning_rate": 7.622958380615511e-06, "loss": 18.1831, "step": 18847 }, { "epoch": 0.34452629462408835, "grad_norm": 6.375371933391231, "learning_rate": 7.622706365422545e-06, "loss": 17.7205, "step": 18848 }, { "epoch": 0.3445445738205348, "grad_norm": 7.460218703284634, "learning_rate": 7.622454341037224e-06, "loss": 18.0615, "step": 18849 }, { "epoch": 0.34456285301698136, "grad_norm": 7.557403389145596, "learning_rate": 7.622202307460426e-06, "loss": 17.9129, "step": 18850 }, { "epoch": 0.3445811322134279, "grad_norm": 5.463903761896323, "learning_rate": 7.621950264693039e-06, "loss": 17.2442, "step": 18851 }, { "epoch": 0.34459941140987443, "grad_norm": 5.320197632858207, "learning_rate": 7.62169821273594e-06, "loss": 17.2238, "step": 18852 }, { "epoch": 0.34461769060632097, "grad_norm": 5.893030144033991, "learning_rate": 7.621446151590018e-06, "loss": 17.4173, "step": 18853 }, { "epoch": 0.34463596980276745, "grad_norm": 6.252265876898372, "learning_rate": 7.6211940812561555e-06, "loss": 17.5848, "step": 18854 }, { "epoch": 0.344654248999214, "grad_norm": 7.255444106176171, "learning_rate": 7.620942001735235e-06, "loss": 17.8887, "step": 18855 }, { "epoch": 0.3446725281956605, "grad_norm": 5.76311644733677, "learning_rate": 7.620689913028141e-06, "loss": 17.1523, "step": 18856 }, { "epoch": 0.34469080739210706, "grad_norm": 6.070883116721132, "learning_rate": 7.620437815135753e-06, "loss": 17.1883, "step": 18857 }, { "epoch": 0.3447090865885536, "grad_norm": 6.704219160795553, "learning_rate": 7.620185708058962e-06, "loss": 17.6405, "step": 18858 }, { "epoch": 0.3447273657850001, "grad_norm": 5.39117974837999, "learning_rate": 7.619933591798645e-06, "loss": 17.2863, "step": 18859 }, { "epoch": 0.3447456449814466, "grad_norm": 5.623336980396304, "learning_rate": 7.619681466355688e-06, "loss": 17.4557, "step": 18860 }, { "epoch": 0.34476392417789314, "grad_norm": 6.1063789323148985, "learning_rate": 7.619429331730977e-06, "loss": 17.6039, "step": 18861 }, { "epoch": 0.3447822033743397, "grad_norm": 7.56351926677025, "learning_rate": 7.6191771879253905e-06, "loss": 17.9513, "step": 18862 }, { "epoch": 0.3448004825707862, "grad_norm": 6.106402972925933, "learning_rate": 7.618925034939817e-06, "loss": 17.3865, "step": 18863 }, { "epoch": 0.3448187617672327, "grad_norm": 5.920384771196159, "learning_rate": 7.618672872775138e-06, "loss": 17.3956, "step": 18864 }, { "epoch": 0.34483704096367923, "grad_norm": 6.670774222241027, "learning_rate": 7.618420701432238e-06, "loss": 17.9289, "step": 18865 }, { "epoch": 0.34485532016012577, "grad_norm": 7.524320600353264, "learning_rate": 7.618168520912001e-06, "loss": 18.0164, "step": 18866 }, { "epoch": 0.3448735993565723, "grad_norm": 6.578151056101703, "learning_rate": 7.617916331215309e-06, "loss": 17.4418, "step": 18867 }, { "epoch": 0.3448918785530188, "grad_norm": 6.111321466298664, "learning_rate": 7.61766413234305e-06, "loss": 17.4151, "step": 18868 }, { "epoch": 0.3449101577494653, "grad_norm": 6.647655970034263, "learning_rate": 7.617411924296103e-06, "loss": 17.8468, "step": 18869 }, { "epoch": 0.34492843694591185, "grad_norm": 5.391290134991107, "learning_rate": 7.617159707075355e-06, "loss": 17.1188, "step": 18870 }, { "epoch": 0.3449467161423584, "grad_norm": 5.836604081616722, "learning_rate": 7.61690748068169e-06, "loss": 17.4615, "step": 18871 }, { "epoch": 0.3449649953388049, "grad_norm": 5.828869638207708, "learning_rate": 7.6166552451159914e-06, "loss": 17.298, "step": 18872 }, { "epoch": 0.3449832745352514, "grad_norm": 6.268811339034368, "learning_rate": 7.6164030003791424e-06, "loss": 17.5558, "step": 18873 }, { "epoch": 0.34500155373169794, "grad_norm": 7.453674579298899, "learning_rate": 7.6161507464720285e-06, "loss": 17.7549, "step": 18874 }, { "epoch": 0.3450198329281445, "grad_norm": 6.651998662950934, "learning_rate": 7.615898483395534e-06, "loss": 17.8893, "step": 18875 }, { "epoch": 0.345038112124591, "grad_norm": 6.128656015172041, "learning_rate": 7.61564621115054e-06, "loss": 17.6188, "step": 18876 }, { "epoch": 0.34505639132103755, "grad_norm": 6.3756299741129485, "learning_rate": 7.615393929737935e-06, "loss": 17.6619, "step": 18877 }, { "epoch": 0.34507467051748403, "grad_norm": 5.825326039746997, "learning_rate": 7.615141639158601e-06, "loss": 17.3186, "step": 18878 }, { "epoch": 0.34509294971393056, "grad_norm": 6.891541289228385, "learning_rate": 7.614889339413422e-06, "loss": 17.9987, "step": 18879 }, { "epoch": 0.3451112289103771, "grad_norm": 5.671312721659657, "learning_rate": 7.614637030503282e-06, "loss": 16.9284, "step": 18880 }, { "epoch": 0.34512950810682363, "grad_norm": 6.622144873740716, "learning_rate": 7.614384712429068e-06, "loss": 17.5671, "step": 18881 }, { "epoch": 0.34514778730327017, "grad_norm": 7.310112596708961, "learning_rate": 7.614132385191661e-06, "loss": 17.9785, "step": 18882 }, { "epoch": 0.34516606649971665, "grad_norm": 5.69360839671099, "learning_rate": 7.613880048791948e-06, "loss": 17.1646, "step": 18883 }, { "epoch": 0.3451843456961632, "grad_norm": 6.170253810983289, "learning_rate": 7.61362770323081e-06, "loss": 17.4712, "step": 18884 }, { "epoch": 0.3452026248926097, "grad_norm": 5.50022870992592, "learning_rate": 7.613375348509135e-06, "loss": 17.2666, "step": 18885 }, { "epoch": 0.34522090408905626, "grad_norm": 7.188684396091385, "learning_rate": 7.613122984627808e-06, "loss": 17.6921, "step": 18886 }, { "epoch": 0.3452391832855028, "grad_norm": 7.81632667876437, "learning_rate": 7.61287061158771e-06, "loss": 18.0837, "step": 18887 }, { "epoch": 0.3452574624819493, "grad_norm": 6.656226293490472, "learning_rate": 7.612618229389728e-06, "loss": 17.4341, "step": 18888 }, { "epoch": 0.3452757416783958, "grad_norm": 6.449742042322299, "learning_rate": 7.6123658380347446e-06, "loss": 17.4351, "step": 18889 }, { "epoch": 0.34529402087484234, "grad_norm": 6.712440839298584, "learning_rate": 7.612113437523646e-06, "loss": 17.6861, "step": 18890 }, { "epoch": 0.3453123000712889, "grad_norm": 8.251373663963818, "learning_rate": 7.611861027857317e-06, "loss": 18.1807, "step": 18891 }, { "epoch": 0.3453305792677354, "grad_norm": 6.296892881834908, "learning_rate": 7.61160860903664e-06, "loss": 17.5629, "step": 18892 }, { "epoch": 0.3453488584641819, "grad_norm": 6.264489853355421, "learning_rate": 7.611356181062503e-06, "loss": 17.5003, "step": 18893 }, { "epoch": 0.34536713766062843, "grad_norm": 6.240038453594531, "learning_rate": 7.61110374393579e-06, "loss": 17.3175, "step": 18894 }, { "epoch": 0.34538541685707497, "grad_norm": 6.152512085827735, "learning_rate": 7.610851297657383e-06, "loss": 17.3498, "step": 18895 }, { "epoch": 0.3454036960535215, "grad_norm": 6.005694368651467, "learning_rate": 7.6105988422281715e-06, "loss": 17.0651, "step": 18896 }, { "epoch": 0.34542197524996804, "grad_norm": 5.959610914219258, "learning_rate": 7.610346377649034e-06, "loss": 17.4591, "step": 18897 }, { "epoch": 0.3454402544464145, "grad_norm": 6.33804627651429, "learning_rate": 7.610093903920861e-06, "loss": 17.6828, "step": 18898 }, { "epoch": 0.34545853364286105, "grad_norm": 7.439112451385574, "learning_rate": 7.609841421044537e-06, "loss": 17.9568, "step": 18899 }, { "epoch": 0.3454768128393076, "grad_norm": 7.210065475661961, "learning_rate": 7.609588929020944e-06, "loss": 17.7448, "step": 18900 }, { "epoch": 0.3454950920357541, "grad_norm": 7.720746780965516, "learning_rate": 7.609336427850966e-06, "loss": 17.9802, "step": 18901 }, { "epoch": 0.3455133712322006, "grad_norm": 5.498515695973799, "learning_rate": 7.609083917535491e-06, "loss": 17.3154, "step": 18902 }, { "epoch": 0.34553165042864714, "grad_norm": 5.962879130467125, "learning_rate": 7.608831398075406e-06, "loss": 17.3829, "step": 18903 }, { "epoch": 0.3455499296250937, "grad_norm": 6.846620968994786, "learning_rate": 7.608578869471591e-06, "loss": 17.8339, "step": 18904 }, { "epoch": 0.3455682088215402, "grad_norm": 7.6018391661958145, "learning_rate": 7.6083263317249335e-06, "loss": 18.1102, "step": 18905 }, { "epoch": 0.34558648801798675, "grad_norm": 5.784234635221302, "learning_rate": 7.6080737848363195e-06, "loss": 17.2917, "step": 18906 }, { "epoch": 0.34560476721443323, "grad_norm": 5.752357111200658, "learning_rate": 7.6078212288066335e-06, "loss": 17.2057, "step": 18907 }, { "epoch": 0.34562304641087976, "grad_norm": 6.459778009405557, "learning_rate": 7.60756866363676e-06, "loss": 17.1771, "step": 18908 }, { "epoch": 0.3456413256073263, "grad_norm": 7.0444922758887865, "learning_rate": 7.6073160893275834e-06, "loss": 17.8434, "step": 18909 }, { "epoch": 0.34565960480377284, "grad_norm": 6.265177353041961, "learning_rate": 7.6070635058799915e-06, "loss": 17.1887, "step": 18910 }, { "epoch": 0.34567788400021937, "grad_norm": 7.053831506175236, "learning_rate": 7.606810913294866e-06, "loss": 17.7498, "step": 18911 }, { "epoch": 0.34569616319666585, "grad_norm": 5.4893544285976725, "learning_rate": 7.606558311573097e-06, "loss": 17.1869, "step": 18912 }, { "epoch": 0.3457144423931124, "grad_norm": 6.470420202776055, "learning_rate": 7.606305700715567e-06, "loss": 17.6234, "step": 18913 }, { "epoch": 0.3457327215895589, "grad_norm": 5.8681958086487915, "learning_rate": 7.606053080723161e-06, "loss": 17.1226, "step": 18914 }, { "epoch": 0.34575100078600546, "grad_norm": 7.992891775574533, "learning_rate": 7.605800451596765e-06, "loss": 17.9576, "step": 18915 }, { "epoch": 0.345769279982452, "grad_norm": 5.892657778749329, "learning_rate": 7.605547813337264e-06, "loss": 17.2621, "step": 18916 }, { "epoch": 0.3457875591788985, "grad_norm": 5.501337991493749, "learning_rate": 7.605295165945546e-06, "loss": 17.1447, "step": 18917 }, { "epoch": 0.345805838375345, "grad_norm": 7.913320990572355, "learning_rate": 7.605042509422493e-06, "loss": 18.0964, "step": 18918 }, { "epoch": 0.34582411757179154, "grad_norm": 6.4001766127926905, "learning_rate": 7.604789843768993e-06, "loss": 17.3341, "step": 18919 }, { "epoch": 0.3458423967682381, "grad_norm": 7.205033271136617, "learning_rate": 7.60453716898593e-06, "loss": 17.9212, "step": 18920 }, { "epoch": 0.3458606759646846, "grad_norm": 5.8284231389009395, "learning_rate": 7.6042844850741905e-06, "loss": 17.1386, "step": 18921 }, { "epoch": 0.3458789551611311, "grad_norm": 7.97500368082429, "learning_rate": 7.604031792034659e-06, "loss": 18.2105, "step": 18922 }, { "epoch": 0.34589723435757763, "grad_norm": 6.882976466923342, "learning_rate": 7.603779089868224e-06, "loss": 17.7936, "step": 18923 }, { "epoch": 0.34591551355402417, "grad_norm": 6.071329614133939, "learning_rate": 7.603526378575767e-06, "loss": 17.3471, "step": 18924 }, { "epoch": 0.3459337927504707, "grad_norm": 7.596351184938982, "learning_rate": 7.603273658158177e-06, "loss": 18.3059, "step": 18925 }, { "epoch": 0.34595207194691724, "grad_norm": 7.818145529326444, "learning_rate": 7.603020928616338e-06, "loss": 17.9933, "step": 18926 }, { "epoch": 0.3459703511433637, "grad_norm": 5.9681859650846, "learning_rate": 7.602768189951138e-06, "loss": 17.3911, "step": 18927 }, { "epoch": 0.34598863033981025, "grad_norm": 7.141417352971058, "learning_rate": 7.602515442163461e-06, "loss": 17.5778, "step": 18928 }, { "epoch": 0.3460069095362568, "grad_norm": 5.776967542647376, "learning_rate": 7.602262685254192e-06, "loss": 17.2206, "step": 18929 }, { "epoch": 0.3460251887327033, "grad_norm": 7.337196629646757, "learning_rate": 7.602009919224219e-06, "loss": 18.1074, "step": 18930 }, { "epoch": 0.34604346792914986, "grad_norm": 6.295462201743125, "learning_rate": 7.6017571440744264e-06, "loss": 17.4926, "step": 18931 }, { "epoch": 0.34606174712559634, "grad_norm": 6.889041399251884, "learning_rate": 7.601504359805702e-06, "loss": 17.6455, "step": 18932 }, { "epoch": 0.3460800263220429, "grad_norm": 8.463787252771924, "learning_rate": 7.60125156641893e-06, "loss": 18.1351, "step": 18933 }, { "epoch": 0.3460983055184894, "grad_norm": 6.931909202236026, "learning_rate": 7.600998763914996e-06, "loss": 17.6481, "step": 18934 }, { "epoch": 0.34611658471493595, "grad_norm": 6.67161552163744, "learning_rate": 7.6007459522947875e-06, "loss": 17.763, "step": 18935 }, { "epoch": 0.34613486391138243, "grad_norm": 6.180953002498059, "learning_rate": 7.60049313155919e-06, "loss": 17.3172, "step": 18936 }, { "epoch": 0.34615314310782896, "grad_norm": 5.520944988557824, "learning_rate": 7.600240301709092e-06, "loss": 17.0942, "step": 18937 }, { "epoch": 0.3461714223042755, "grad_norm": 5.072948752035825, "learning_rate": 7.599987462745375e-06, "loss": 16.8876, "step": 18938 }, { "epoch": 0.34618970150072204, "grad_norm": 8.091342686028737, "learning_rate": 7.599734614668928e-06, "loss": 17.9227, "step": 18939 }, { "epoch": 0.34620798069716857, "grad_norm": 5.525980838938255, "learning_rate": 7.599481757480636e-06, "loss": 17.1579, "step": 18940 }, { "epoch": 0.34622625989361505, "grad_norm": 6.154743454988897, "learning_rate": 7.599228891181389e-06, "loss": 17.4544, "step": 18941 }, { "epoch": 0.3462445390900616, "grad_norm": 7.132020764960109, "learning_rate": 7.5989760157720675e-06, "loss": 17.8731, "step": 18942 }, { "epoch": 0.3462628182865081, "grad_norm": 5.503747199022232, "learning_rate": 7.59872313125356e-06, "loss": 17.073, "step": 18943 }, { "epoch": 0.34628109748295466, "grad_norm": 6.0161469480476795, "learning_rate": 7.598470237626756e-06, "loss": 17.2366, "step": 18944 }, { "epoch": 0.3462993766794012, "grad_norm": 6.026055133355524, "learning_rate": 7.598217334892537e-06, "loss": 17.3908, "step": 18945 }, { "epoch": 0.3463176558758477, "grad_norm": 6.8960372076668035, "learning_rate": 7.597964423051794e-06, "loss": 17.6786, "step": 18946 }, { "epoch": 0.3463359350722942, "grad_norm": 7.113678456554318, "learning_rate": 7.5977115021054096e-06, "loss": 18.041, "step": 18947 }, { "epoch": 0.34635421426874075, "grad_norm": 6.122004662433077, "learning_rate": 7.597458572054272e-06, "loss": 17.2704, "step": 18948 }, { "epoch": 0.3463724934651873, "grad_norm": 8.464978046311984, "learning_rate": 7.597205632899267e-06, "loss": 18.6798, "step": 18949 }, { "epoch": 0.3463907726616338, "grad_norm": 6.166113285087843, "learning_rate": 7.596952684641282e-06, "loss": 17.5786, "step": 18950 }, { "epoch": 0.3464090518580803, "grad_norm": 8.464100664878849, "learning_rate": 7.596699727281205e-06, "loss": 18.4814, "step": 18951 }, { "epoch": 0.34642733105452683, "grad_norm": 6.51298013944387, "learning_rate": 7.596446760819918e-06, "loss": 17.5248, "step": 18952 }, { "epoch": 0.34644561025097337, "grad_norm": 8.071762299248826, "learning_rate": 7.596193785258311e-06, "loss": 18.2478, "step": 18953 }, { "epoch": 0.3464638894474199, "grad_norm": 6.861047489719463, "learning_rate": 7.59594080059727e-06, "loss": 17.6007, "step": 18954 }, { "epoch": 0.34648216864386644, "grad_norm": 7.388227593370416, "learning_rate": 7.595687806837683e-06, "loss": 17.9192, "step": 18955 }, { "epoch": 0.3465004478403129, "grad_norm": 6.022438807693688, "learning_rate": 7.595434803980436e-06, "loss": 17.35, "step": 18956 }, { "epoch": 0.34651872703675946, "grad_norm": 6.939677496048242, "learning_rate": 7.595181792026414e-06, "loss": 17.8307, "step": 18957 }, { "epoch": 0.346537006233206, "grad_norm": 6.9813038396306135, "learning_rate": 7.594928770976505e-06, "loss": 17.8424, "step": 18958 }, { "epoch": 0.3465552854296525, "grad_norm": 5.442763769373437, "learning_rate": 7.594675740831597e-06, "loss": 17.1987, "step": 18959 }, { "epoch": 0.34657356462609906, "grad_norm": 6.688560567220221, "learning_rate": 7.594422701592574e-06, "loss": 17.663, "step": 18960 }, { "epoch": 0.34659184382254554, "grad_norm": 7.418697154261923, "learning_rate": 7.5941696532603246e-06, "loss": 17.8478, "step": 18961 }, { "epoch": 0.3466101230189921, "grad_norm": 6.555275826485624, "learning_rate": 7.5939165958357365e-06, "loss": 17.7269, "step": 18962 }, { "epoch": 0.3466284022154386, "grad_norm": 6.872575812728452, "learning_rate": 7.593663529319695e-06, "loss": 17.8606, "step": 18963 }, { "epoch": 0.34664668141188515, "grad_norm": 6.927386186000394, "learning_rate": 7.593410453713088e-06, "loss": 17.8418, "step": 18964 }, { "epoch": 0.3466649606083317, "grad_norm": 6.69084749891412, "learning_rate": 7.593157369016804e-06, "loss": 17.454, "step": 18965 }, { "epoch": 0.34668323980477816, "grad_norm": 6.755837995512869, "learning_rate": 7.592904275231727e-06, "loss": 17.8124, "step": 18966 }, { "epoch": 0.3467015190012247, "grad_norm": 6.100701960625412, "learning_rate": 7.592651172358746e-06, "loss": 17.5114, "step": 18967 }, { "epoch": 0.34671979819767124, "grad_norm": 6.9029266075070135, "learning_rate": 7.592398060398749e-06, "loss": 17.7986, "step": 18968 }, { "epoch": 0.34673807739411777, "grad_norm": 6.1624585317135745, "learning_rate": 7.592144939352619e-06, "loss": 17.2991, "step": 18969 }, { "epoch": 0.34675635659056425, "grad_norm": 5.21458580299733, "learning_rate": 7.591891809221247e-06, "loss": 17.201, "step": 18970 }, { "epoch": 0.3467746357870108, "grad_norm": 7.3290017937401455, "learning_rate": 7.591638670005519e-06, "loss": 18.0177, "step": 18971 }, { "epoch": 0.3467929149834573, "grad_norm": 5.548219698809922, "learning_rate": 7.591385521706324e-06, "loss": 17.2977, "step": 18972 }, { "epoch": 0.34681119417990386, "grad_norm": 6.618569731965456, "learning_rate": 7.591132364324545e-06, "loss": 17.7773, "step": 18973 }, { "epoch": 0.3468294733763504, "grad_norm": 5.670441696830813, "learning_rate": 7.5908791978610745e-06, "loss": 17.1742, "step": 18974 }, { "epoch": 0.3468477525727969, "grad_norm": 6.266394248876967, "learning_rate": 7.590626022316796e-06, "loss": 17.4635, "step": 18975 }, { "epoch": 0.3468660317692434, "grad_norm": 7.1009250742474554, "learning_rate": 7.590372837692597e-06, "loss": 17.8548, "step": 18976 }, { "epoch": 0.34688431096568995, "grad_norm": 8.017534823453387, "learning_rate": 7.5901196439893664e-06, "loss": 18.4484, "step": 18977 }, { "epoch": 0.3469025901621365, "grad_norm": 9.423774676795624, "learning_rate": 7.589866441207993e-06, "loss": 18.6049, "step": 18978 }, { "epoch": 0.346920869358583, "grad_norm": 5.964929523147536, "learning_rate": 7.589613229349361e-06, "loss": 17.4383, "step": 18979 }, { "epoch": 0.3469391485550295, "grad_norm": 6.44123440510207, "learning_rate": 7.5893600084143595e-06, "loss": 17.6667, "step": 18980 }, { "epoch": 0.34695742775147603, "grad_norm": 6.673572921483574, "learning_rate": 7.589106778403876e-06, "loss": 17.7146, "step": 18981 }, { "epoch": 0.34697570694792257, "grad_norm": 5.71994061700808, "learning_rate": 7.588853539318799e-06, "loss": 17.4912, "step": 18982 }, { "epoch": 0.3469939861443691, "grad_norm": 6.24984709488286, "learning_rate": 7.588600291160013e-06, "loss": 17.6651, "step": 18983 }, { "epoch": 0.34701226534081564, "grad_norm": 6.899227373441953, "learning_rate": 7.588347033928408e-06, "loss": 18.0038, "step": 18984 }, { "epoch": 0.3470305445372621, "grad_norm": 7.093099139063351, "learning_rate": 7.588093767624871e-06, "loss": 17.9797, "step": 18985 }, { "epoch": 0.34704882373370866, "grad_norm": 6.660768228404228, "learning_rate": 7.587840492250293e-06, "loss": 17.3602, "step": 18986 }, { "epoch": 0.3470671029301552, "grad_norm": 6.04878390469459, "learning_rate": 7.587587207805555e-06, "loss": 17.2671, "step": 18987 }, { "epoch": 0.3470853821266017, "grad_norm": 8.783789370315844, "learning_rate": 7.587333914291551e-06, "loss": 19.0311, "step": 18988 }, { "epoch": 0.34710366132304826, "grad_norm": 6.75572416519923, "learning_rate": 7.587080611709163e-06, "loss": 18.0498, "step": 18989 }, { "epoch": 0.34712194051949474, "grad_norm": 6.152852386853728, "learning_rate": 7.586827300059285e-06, "loss": 17.3344, "step": 18990 }, { "epoch": 0.3471402197159413, "grad_norm": 8.027940366040578, "learning_rate": 7.586573979342799e-06, "loss": 18.1787, "step": 18991 }, { "epoch": 0.3471584989123878, "grad_norm": 7.033464796487611, "learning_rate": 7.586320649560599e-06, "loss": 17.2953, "step": 18992 }, { "epoch": 0.34717677810883435, "grad_norm": 6.151192848005572, "learning_rate": 7.586067310713567e-06, "loss": 17.5295, "step": 18993 }, { "epoch": 0.3471950573052809, "grad_norm": 6.775272982301498, "learning_rate": 7.585813962802595e-06, "loss": 17.9858, "step": 18994 }, { "epoch": 0.34721333650172737, "grad_norm": 7.107263174528969, "learning_rate": 7.585560605828568e-06, "loss": 17.8125, "step": 18995 }, { "epoch": 0.3472316156981739, "grad_norm": 8.009063603799497, "learning_rate": 7.585307239792377e-06, "loss": 18.1853, "step": 18996 }, { "epoch": 0.34724989489462044, "grad_norm": 5.769403736100324, "learning_rate": 7.585053864694907e-06, "loss": 17.463, "step": 18997 }, { "epoch": 0.347268174091067, "grad_norm": 5.6762921224754175, "learning_rate": 7.5848004805370475e-06, "loss": 17.2014, "step": 18998 }, { "epoch": 0.3472864532875135, "grad_norm": 7.577787481676225, "learning_rate": 7.584547087319689e-06, "loss": 18.1574, "step": 18999 }, { "epoch": 0.34730473248396, "grad_norm": 5.820216814608337, "learning_rate": 7.584293685043716e-06, "loss": 17.3679, "step": 19000 }, { "epoch": 0.3473230116804065, "grad_norm": 6.389470657618189, "learning_rate": 7.584040273710016e-06, "loss": 17.6025, "step": 19001 }, { "epoch": 0.34734129087685306, "grad_norm": 6.825169449048844, "learning_rate": 7.583786853319479e-06, "loss": 17.7989, "step": 19002 }, { "epoch": 0.3473595700732996, "grad_norm": 5.567763698087035, "learning_rate": 7.583533423872997e-06, "loss": 17.2088, "step": 19003 }, { "epoch": 0.3473778492697461, "grad_norm": 6.1100989530468, "learning_rate": 7.583279985371452e-06, "loss": 17.6809, "step": 19004 }, { "epoch": 0.3473961284661926, "grad_norm": 7.535361986395495, "learning_rate": 7.583026537815734e-06, "loss": 17.9317, "step": 19005 }, { "epoch": 0.34741440766263915, "grad_norm": 7.958814589469808, "learning_rate": 7.582773081206733e-06, "loss": 18.0453, "step": 19006 }, { "epoch": 0.3474326868590857, "grad_norm": 7.2911889967958015, "learning_rate": 7.582519615545339e-06, "loss": 18.1261, "step": 19007 }, { "epoch": 0.3474509660555322, "grad_norm": 8.145035509363272, "learning_rate": 7.582266140832435e-06, "loss": 18.7788, "step": 19008 }, { "epoch": 0.3474692452519787, "grad_norm": 7.024365272537474, "learning_rate": 7.582012657068912e-06, "loss": 17.6573, "step": 19009 }, { "epoch": 0.34748752444842523, "grad_norm": 5.462961894964153, "learning_rate": 7.58175916425566e-06, "loss": 17.2726, "step": 19010 }, { "epoch": 0.34750580364487177, "grad_norm": 7.841845760232381, "learning_rate": 7.581505662393564e-06, "loss": 18.0179, "step": 19011 }, { "epoch": 0.3475240828413183, "grad_norm": 5.8154820249981745, "learning_rate": 7.581252151483518e-06, "loss": 17.4179, "step": 19012 }, { "epoch": 0.34754236203776484, "grad_norm": 5.71731248489601, "learning_rate": 7.580998631526406e-06, "loss": 17.18, "step": 19013 }, { "epoch": 0.3475606412342113, "grad_norm": 6.907962212338977, "learning_rate": 7.580745102523117e-06, "loss": 17.7363, "step": 19014 }, { "epoch": 0.34757892043065786, "grad_norm": 8.04364219464597, "learning_rate": 7.580491564474542e-06, "loss": 18.3559, "step": 19015 }, { "epoch": 0.3475971996271044, "grad_norm": 7.898696894901157, "learning_rate": 7.580238017381565e-06, "loss": 18.4485, "step": 19016 }, { "epoch": 0.3476154788235509, "grad_norm": 7.3644667197822535, "learning_rate": 7.57998446124508e-06, "loss": 17.9994, "step": 19017 }, { "epoch": 0.34763375801999746, "grad_norm": 7.964761199509911, "learning_rate": 7.579730896065974e-06, "loss": 18.6357, "step": 19018 }, { "epoch": 0.34765203721644394, "grad_norm": 7.69745785077081, "learning_rate": 7.5794773218451324e-06, "loss": 17.9703, "step": 19019 }, { "epoch": 0.3476703164128905, "grad_norm": 5.963685721172786, "learning_rate": 7.579223738583448e-06, "loss": 17.3082, "step": 19020 }, { "epoch": 0.347688595609337, "grad_norm": 5.4843169921823245, "learning_rate": 7.5789701462818075e-06, "loss": 17.3481, "step": 19021 }, { "epoch": 0.34770687480578355, "grad_norm": 7.035984081381331, "learning_rate": 7.578716544941102e-06, "loss": 17.8914, "step": 19022 }, { "epoch": 0.3477251540022301, "grad_norm": 8.248334072918139, "learning_rate": 7.578462934562217e-06, "loss": 17.92, "step": 19023 }, { "epoch": 0.34774343319867657, "grad_norm": 6.501565335749314, "learning_rate": 7.5782093151460435e-06, "loss": 17.3995, "step": 19024 }, { "epoch": 0.3477617123951231, "grad_norm": 7.022827693322409, "learning_rate": 7.5779556866934715e-06, "loss": 18.0678, "step": 19025 }, { "epoch": 0.34777999159156964, "grad_norm": 8.056792785808573, "learning_rate": 7.577702049205387e-06, "loss": 18.0544, "step": 19026 }, { "epoch": 0.3477982707880162, "grad_norm": 5.813902440435188, "learning_rate": 7.577448402682682e-06, "loss": 17.2232, "step": 19027 }, { "epoch": 0.3478165499844627, "grad_norm": 6.1137023407954185, "learning_rate": 7.577194747126244e-06, "loss": 17.3333, "step": 19028 }, { "epoch": 0.3478348291809092, "grad_norm": 5.989409712986414, "learning_rate": 7.57694108253696e-06, "loss": 17.2316, "step": 19029 }, { "epoch": 0.3478531083773557, "grad_norm": 5.130493179645548, "learning_rate": 7.576687408915723e-06, "loss": 16.9334, "step": 19030 }, { "epoch": 0.34787138757380226, "grad_norm": 6.070497004139617, "learning_rate": 7.576433726263419e-06, "loss": 17.4544, "step": 19031 }, { "epoch": 0.3478896667702488, "grad_norm": 6.206023807432817, "learning_rate": 7.57618003458094e-06, "loss": 17.7142, "step": 19032 }, { "epoch": 0.34790794596669533, "grad_norm": 6.3894035943645315, "learning_rate": 7.575926333869172e-06, "loss": 17.2279, "step": 19033 }, { "epoch": 0.3479262251631418, "grad_norm": 4.903387349998527, "learning_rate": 7.575672624129006e-06, "loss": 16.7663, "step": 19034 }, { "epoch": 0.34794450435958835, "grad_norm": 6.085588517306378, "learning_rate": 7.575418905361331e-06, "loss": 17.1808, "step": 19035 }, { "epoch": 0.3479627835560349, "grad_norm": 8.329898932990947, "learning_rate": 7.575165177567036e-06, "loss": 17.7279, "step": 19036 }, { "epoch": 0.3479810627524814, "grad_norm": 5.714696455360687, "learning_rate": 7.574911440747013e-06, "loss": 17.2421, "step": 19037 }, { "epoch": 0.3479993419489279, "grad_norm": 7.4415930191479, "learning_rate": 7.574657694902146e-06, "loss": 18.241, "step": 19038 }, { "epoch": 0.34801762114537443, "grad_norm": 7.682200406043389, "learning_rate": 7.574403940033326e-06, "loss": 17.6827, "step": 19039 }, { "epoch": 0.34803590034182097, "grad_norm": 7.373737036423542, "learning_rate": 7.574150176141446e-06, "loss": 18.2338, "step": 19040 }, { "epoch": 0.3480541795382675, "grad_norm": 6.582039258523045, "learning_rate": 7.573896403227395e-06, "loss": 17.7226, "step": 19041 }, { "epoch": 0.34807245873471404, "grad_norm": 5.455502816831152, "learning_rate": 7.573642621292056e-06, "loss": 17.3168, "step": 19042 }, { "epoch": 0.3480907379311605, "grad_norm": 10.111280711634576, "learning_rate": 7.5733888303363235e-06, "loss": 18.9197, "step": 19043 }, { "epoch": 0.34810901712760706, "grad_norm": 7.355693970955116, "learning_rate": 7.57313503036109e-06, "loss": 17.8706, "step": 19044 }, { "epoch": 0.3481272963240536, "grad_norm": 5.411445701675955, "learning_rate": 7.572881221367239e-06, "loss": 17.137, "step": 19045 }, { "epoch": 0.34814557552050013, "grad_norm": 7.883532814525666, "learning_rate": 7.572627403355664e-06, "loss": 18.134, "step": 19046 }, { "epoch": 0.34816385471694666, "grad_norm": 6.0318294601231495, "learning_rate": 7.572373576327251e-06, "loss": 17.2574, "step": 19047 }, { "epoch": 0.34818213391339314, "grad_norm": 7.86051881596752, "learning_rate": 7.572119740282893e-06, "loss": 18.2593, "step": 19048 }, { "epoch": 0.3482004131098397, "grad_norm": 5.075490205629728, "learning_rate": 7.571865895223478e-06, "loss": 16.8729, "step": 19049 }, { "epoch": 0.3482186923062862, "grad_norm": 8.147328538856263, "learning_rate": 7.5716120411498965e-06, "loss": 18.4634, "step": 19050 }, { "epoch": 0.34823697150273275, "grad_norm": 5.504469117713465, "learning_rate": 7.571358178063039e-06, "loss": 17.1509, "step": 19051 }, { "epoch": 0.3482552506991793, "grad_norm": 6.705735662361387, "learning_rate": 7.571104305963792e-06, "loss": 17.7934, "step": 19052 }, { "epoch": 0.34827352989562577, "grad_norm": 5.3881058828175705, "learning_rate": 7.570850424853049e-06, "loss": 16.9258, "step": 19053 }, { "epoch": 0.3482918090920723, "grad_norm": 6.8951777748095635, "learning_rate": 7.570596534731699e-06, "loss": 17.8119, "step": 19054 }, { "epoch": 0.34831008828851884, "grad_norm": 5.937914246025563, "learning_rate": 7.570342635600632e-06, "loss": 17.2293, "step": 19055 }, { "epoch": 0.3483283674849654, "grad_norm": 6.921626001753688, "learning_rate": 7.570088727460735e-06, "loss": 17.7745, "step": 19056 }, { "epoch": 0.3483466466814119, "grad_norm": 8.85207934715673, "learning_rate": 7.5698348103129e-06, "loss": 18.2198, "step": 19057 }, { "epoch": 0.3483649258778584, "grad_norm": 5.969737430063275, "learning_rate": 7.569580884158017e-06, "loss": 17.1383, "step": 19058 }, { "epoch": 0.3483832050743049, "grad_norm": 7.2601443315016585, "learning_rate": 7.569326948996978e-06, "loss": 17.7702, "step": 19059 }, { "epoch": 0.34840148427075146, "grad_norm": 7.75588757033667, "learning_rate": 7.569073004830669e-06, "loss": 17.9613, "step": 19060 }, { "epoch": 0.348419763467198, "grad_norm": 5.469352595144674, "learning_rate": 7.568819051659983e-06, "loss": 17.4016, "step": 19061 }, { "epoch": 0.34843804266364453, "grad_norm": 6.614436525311249, "learning_rate": 7.568565089485809e-06, "loss": 17.7536, "step": 19062 }, { "epoch": 0.348456321860091, "grad_norm": 5.400787195704766, "learning_rate": 7.568311118309035e-06, "loss": 17.1057, "step": 19063 }, { "epoch": 0.34847460105653755, "grad_norm": 6.276006433668197, "learning_rate": 7.5680571381305555e-06, "loss": 17.4418, "step": 19064 }, { "epoch": 0.3484928802529841, "grad_norm": 5.90012870807546, "learning_rate": 7.567803148951259e-06, "loss": 17.2305, "step": 19065 }, { "epoch": 0.3485111594494306, "grad_norm": 9.806247923713876, "learning_rate": 7.5675491507720355e-06, "loss": 18.9236, "step": 19066 }, { "epoch": 0.34852943864587715, "grad_norm": 5.767030138984717, "learning_rate": 7.567295143593774e-06, "loss": 17.2684, "step": 19067 }, { "epoch": 0.34854771784232363, "grad_norm": 7.542900090242737, "learning_rate": 7.567041127417367e-06, "loss": 18.4578, "step": 19068 }, { "epoch": 0.34856599703877017, "grad_norm": 5.341792812164293, "learning_rate": 7.5667871022437025e-06, "loss": 17.1259, "step": 19069 }, { "epoch": 0.3485842762352167, "grad_norm": 7.385116990771949, "learning_rate": 7.566533068073671e-06, "loss": 18.0127, "step": 19070 }, { "epoch": 0.34860255543166324, "grad_norm": 7.847511809859978, "learning_rate": 7.566279024908165e-06, "loss": 18.119, "step": 19071 }, { "epoch": 0.3486208346281097, "grad_norm": 6.816524743102691, "learning_rate": 7.566024972748074e-06, "loss": 17.5523, "step": 19072 }, { "epoch": 0.34863911382455626, "grad_norm": 7.3187033101902355, "learning_rate": 7.565770911594288e-06, "loss": 18.1721, "step": 19073 }, { "epoch": 0.3486573930210028, "grad_norm": 7.181470743501008, "learning_rate": 7.565516841447698e-06, "loss": 17.9119, "step": 19074 }, { "epoch": 0.34867567221744933, "grad_norm": 8.020648660161886, "learning_rate": 7.565262762309194e-06, "loss": 18.5753, "step": 19075 }, { "epoch": 0.34869395141389586, "grad_norm": 7.215914984143746, "learning_rate": 7.565008674179666e-06, "loss": 17.8941, "step": 19076 }, { "epoch": 0.34871223061034234, "grad_norm": 6.445584663850671, "learning_rate": 7.564754577060005e-06, "loss": 17.6232, "step": 19077 }, { "epoch": 0.3487305098067889, "grad_norm": 7.515743718445765, "learning_rate": 7.5645004709511015e-06, "loss": 17.7808, "step": 19078 }, { "epoch": 0.3487487890032354, "grad_norm": 7.9065855217742165, "learning_rate": 7.564246355853846e-06, "loss": 17.7412, "step": 19079 }, { "epoch": 0.34876706819968195, "grad_norm": 7.175318809610735, "learning_rate": 7.563992231769131e-06, "loss": 18.122, "step": 19080 }, { "epoch": 0.3487853473961285, "grad_norm": 6.167743180577843, "learning_rate": 7.563738098697846e-06, "loss": 17.301, "step": 19081 }, { "epoch": 0.34880362659257497, "grad_norm": 7.036348573682763, "learning_rate": 7.56348395664088e-06, "loss": 17.7892, "step": 19082 }, { "epoch": 0.3488219057890215, "grad_norm": 7.608559451051648, "learning_rate": 7.563229805599126e-06, "loss": 18.3402, "step": 19083 }, { "epoch": 0.34884018498546804, "grad_norm": 5.978868292873253, "learning_rate": 7.562975645573473e-06, "loss": 17.487, "step": 19084 }, { "epoch": 0.3488584641819146, "grad_norm": 6.081142939739899, "learning_rate": 7.5627214765648134e-06, "loss": 17.2917, "step": 19085 }, { "epoch": 0.3488767433783611, "grad_norm": 6.354029078628353, "learning_rate": 7.562467298574039e-06, "loss": 17.5563, "step": 19086 }, { "epoch": 0.3488950225748076, "grad_norm": 6.797968928490002, "learning_rate": 7.562213111602037e-06, "loss": 17.316, "step": 19087 }, { "epoch": 0.3489133017712541, "grad_norm": 5.105401632683817, "learning_rate": 7.5619589156497004e-06, "loss": 17.0003, "step": 19088 }, { "epoch": 0.34893158096770066, "grad_norm": 6.572664449949223, "learning_rate": 7.561704710717921e-06, "loss": 17.4657, "step": 19089 }, { "epoch": 0.3489498601641472, "grad_norm": 6.692513585854958, "learning_rate": 7.5614504968075895e-06, "loss": 17.6388, "step": 19090 }, { "epoch": 0.34896813936059373, "grad_norm": 6.308701198746419, "learning_rate": 7.561196273919596e-06, "loss": 17.6996, "step": 19091 }, { "epoch": 0.3489864185570402, "grad_norm": 8.232221482867809, "learning_rate": 7.560942042054831e-06, "loss": 18.2663, "step": 19092 }, { "epoch": 0.34900469775348675, "grad_norm": 5.985624416470622, "learning_rate": 7.560687801214186e-06, "loss": 17.2788, "step": 19093 }, { "epoch": 0.3490229769499333, "grad_norm": 8.288715722382547, "learning_rate": 7.560433551398553e-06, "loss": 18.4856, "step": 19094 }, { "epoch": 0.3490412561463798, "grad_norm": 6.881455031953275, "learning_rate": 7.560179292608823e-06, "loss": 17.5496, "step": 19095 }, { "epoch": 0.34905953534282635, "grad_norm": 6.606205236103944, "learning_rate": 7.5599250248458864e-06, "loss": 17.4679, "step": 19096 }, { "epoch": 0.34907781453927283, "grad_norm": 6.94821201997258, "learning_rate": 7.559670748110634e-06, "loss": 17.885, "step": 19097 }, { "epoch": 0.34909609373571937, "grad_norm": 7.050968024514505, "learning_rate": 7.559416462403959e-06, "loss": 18.0848, "step": 19098 }, { "epoch": 0.3491143729321659, "grad_norm": 6.783862967584317, "learning_rate": 7.559162167726751e-06, "loss": 17.2696, "step": 19099 }, { "epoch": 0.34913265212861244, "grad_norm": 6.884680949415629, "learning_rate": 7.558907864079901e-06, "loss": 17.5352, "step": 19100 }, { "epoch": 0.349150931325059, "grad_norm": 7.496809186050319, "learning_rate": 7.558653551464301e-06, "loss": 18.0601, "step": 19101 }, { "epoch": 0.34916921052150546, "grad_norm": 7.9261805044850115, "learning_rate": 7.558399229880843e-06, "loss": 18.2118, "step": 19102 }, { "epoch": 0.349187489717952, "grad_norm": 8.338104394501705, "learning_rate": 7.558144899330418e-06, "loss": 18.584, "step": 19103 }, { "epoch": 0.34920576891439853, "grad_norm": 6.305445602880384, "learning_rate": 7.557890559813916e-06, "loss": 17.5534, "step": 19104 }, { "epoch": 0.34922404811084506, "grad_norm": 6.472521795684011, "learning_rate": 7.557636211332231e-06, "loss": 17.5505, "step": 19105 }, { "epoch": 0.34924232730729154, "grad_norm": 6.9698569968533866, "learning_rate": 7.557381853886252e-06, "loss": 17.612, "step": 19106 }, { "epoch": 0.3492606065037381, "grad_norm": 6.984919849468305, "learning_rate": 7.557127487476872e-06, "loss": 17.6495, "step": 19107 }, { "epoch": 0.3492788857001846, "grad_norm": 6.338068856962003, "learning_rate": 7.556873112104981e-06, "loss": 17.7788, "step": 19108 }, { "epoch": 0.34929716489663115, "grad_norm": 7.984494388826529, "learning_rate": 7.556618727771473e-06, "loss": 18.0341, "step": 19109 }, { "epoch": 0.3493154440930777, "grad_norm": 5.431516007249899, "learning_rate": 7.5563643344772385e-06, "loss": 17.1211, "step": 19110 }, { "epoch": 0.34933372328952417, "grad_norm": 6.056252371829921, "learning_rate": 7.5561099322231665e-06, "loss": 17.3791, "step": 19111 }, { "epoch": 0.3493520024859707, "grad_norm": 7.158640698573915, "learning_rate": 7.555855521010153e-06, "loss": 17.7733, "step": 19112 }, { "epoch": 0.34937028168241724, "grad_norm": 7.090888065074658, "learning_rate": 7.555601100839087e-06, "loss": 17.8602, "step": 19113 }, { "epoch": 0.3493885608788638, "grad_norm": 6.705794721948667, "learning_rate": 7.555346671710861e-06, "loss": 17.6568, "step": 19114 }, { "epoch": 0.3494068400753103, "grad_norm": 6.4504616451705346, "learning_rate": 7.555092233626367e-06, "loss": 17.4211, "step": 19115 }, { "epoch": 0.3494251192717568, "grad_norm": 7.092298629616406, "learning_rate": 7.5548377865864955e-06, "loss": 17.7163, "step": 19116 }, { "epoch": 0.3494433984682033, "grad_norm": 7.2102206459298746, "learning_rate": 7.554583330592141e-06, "loss": 18.0103, "step": 19117 }, { "epoch": 0.34946167766464986, "grad_norm": 7.2977341125012325, "learning_rate": 7.554328865644193e-06, "loss": 17.9424, "step": 19118 }, { "epoch": 0.3494799568610964, "grad_norm": 6.037901914704069, "learning_rate": 7.5540743917435435e-06, "loss": 17.3669, "step": 19119 }, { "epoch": 0.34949823605754293, "grad_norm": 6.128530930107559, "learning_rate": 7.553819908891085e-06, "loss": 17.3773, "step": 19120 }, { "epoch": 0.3495165152539894, "grad_norm": 7.217781933883795, "learning_rate": 7.55356541708771e-06, "loss": 18.0618, "step": 19121 }, { "epoch": 0.34953479445043595, "grad_norm": 7.532066289037688, "learning_rate": 7.553310916334307e-06, "loss": 17.9471, "step": 19122 }, { "epoch": 0.3495530736468825, "grad_norm": 7.884880601530532, "learning_rate": 7.553056406631773e-06, "loss": 18.0946, "step": 19123 }, { "epoch": 0.349571352843329, "grad_norm": 6.906685803731557, "learning_rate": 7.5528018879809985e-06, "loss": 17.5602, "step": 19124 }, { "epoch": 0.34958963203977556, "grad_norm": 6.31357951210493, "learning_rate": 7.552547360382873e-06, "loss": 17.8895, "step": 19125 }, { "epoch": 0.34960791123622204, "grad_norm": 5.962814846618306, "learning_rate": 7.552292823838292e-06, "loss": 17.2481, "step": 19126 }, { "epoch": 0.34962619043266857, "grad_norm": 5.733791137215473, "learning_rate": 7.552038278348146e-06, "loss": 17.3406, "step": 19127 }, { "epoch": 0.3496444696291151, "grad_norm": 6.127260787737624, "learning_rate": 7.5517837239133275e-06, "loss": 17.4258, "step": 19128 }, { "epoch": 0.34966274882556164, "grad_norm": 5.498357279762782, "learning_rate": 7.551529160534727e-06, "loss": 17.0711, "step": 19129 }, { "epoch": 0.3496810280220082, "grad_norm": 7.139696871622537, "learning_rate": 7.55127458821324e-06, "loss": 17.6185, "step": 19130 }, { "epoch": 0.34969930721845466, "grad_norm": 5.361238809589072, "learning_rate": 7.551020006949756e-06, "loss": 17.1274, "step": 19131 }, { "epoch": 0.3497175864149012, "grad_norm": 5.661615351669901, "learning_rate": 7.5507654167451684e-06, "loss": 17.1587, "step": 19132 }, { "epoch": 0.34973586561134773, "grad_norm": 7.1297421996471515, "learning_rate": 7.550510817600369e-06, "loss": 17.9598, "step": 19133 }, { "epoch": 0.34975414480779426, "grad_norm": 6.251832945188568, "learning_rate": 7.5502562095162516e-06, "loss": 17.431, "step": 19134 }, { "epoch": 0.3497724240042408, "grad_norm": 6.8688043884264065, "learning_rate": 7.550001592493705e-06, "loss": 17.7732, "step": 19135 }, { "epoch": 0.3497907032006873, "grad_norm": 7.889183889188873, "learning_rate": 7.549746966533627e-06, "loss": 17.9789, "step": 19136 }, { "epoch": 0.3498089823971338, "grad_norm": 5.877786857054396, "learning_rate": 7.5494923316369075e-06, "loss": 17.3148, "step": 19137 }, { "epoch": 0.34982726159358035, "grad_norm": 6.4262491059760976, "learning_rate": 7.549237687804436e-06, "loss": 17.6071, "step": 19138 }, { "epoch": 0.3498455407900269, "grad_norm": 8.282359710148388, "learning_rate": 7.548983035037107e-06, "loss": 18.281, "step": 19139 }, { "epoch": 0.34986381998647337, "grad_norm": 7.108549038253616, "learning_rate": 7.548728373335816e-06, "loss": 17.8754, "step": 19140 }, { "epoch": 0.3498820991829199, "grad_norm": 8.155913885820201, "learning_rate": 7.548473702701453e-06, "loss": 17.8797, "step": 19141 }, { "epoch": 0.34990037837936644, "grad_norm": 6.6169587638016925, "learning_rate": 7.54821902313491e-06, "loss": 17.6922, "step": 19142 }, { "epoch": 0.349918657575813, "grad_norm": 7.043672133519509, "learning_rate": 7.547964334637081e-06, "loss": 18.2744, "step": 19143 }, { "epoch": 0.3499369367722595, "grad_norm": 9.829544045235695, "learning_rate": 7.547709637208859e-06, "loss": 17.7204, "step": 19144 }, { "epoch": 0.349955215968706, "grad_norm": 6.093992746556152, "learning_rate": 7.547454930851135e-06, "loss": 17.446, "step": 19145 }, { "epoch": 0.3499734951651525, "grad_norm": 6.498279843625785, "learning_rate": 7.5472002155648015e-06, "loss": 17.5197, "step": 19146 }, { "epoch": 0.34999177436159906, "grad_norm": 6.192123225655105, "learning_rate": 7.5469454913507534e-06, "loss": 17.5905, "step": 19147 }, { "epoch": 0.3500100535580456, "grad_norm": 7.238979418550019, "learning_rate": 7.546690758209883e-06, "loss": 17.7479, "step": 19148 }, { "epoch": 0.35002833275449213, "grad_norm": 7.023267655072233, "learning_rate": 7.5464360161430805e-06, "loss": 17.6625, "step": 19149 }, { "epoch": 0.3500466119509386, "grad_norm": 6.64545949456632, "learning_rate": 7.546181265151241e-06, "loss": 17.5238, "step": 19150 }, { "epoch": 0.35006489114738515, "grad_norm": 7.66921051515604, "learning_rate": 7.545926505235258e-06, "loss": 18.1588, "step": 19151 }, { "epoch": 0.3500831703438317, "grad_norm": 7.1549883339917955, "learning_rate": 7.545671736396023e-06, "loss": 17.8666, "step": 19152 }, { "epoch": 0.3501014495402782, "grad_norm": 7.647805578815517, "learning_rate": 7.545416958634431e-06, "loss": 18.2531, "step": 19153 }, { "epoch": 0.35011972873672476, "grad_norm": 4.906920746033703, "learning_rate": 7.5451621719513725e-06, "loss": 16.9204, "step": 19154 }, { "epoch": 0.35013800793317124, "grad_norm": 6.69855203244943, "learning_rate": 7.544907376347741e-06, "loss": 17.762, "step": 19155 }, { "epoch": 0.35015628712961777, "grad_norm": 6.688795121293713, "learning_rate": 7.544652571824429e-06, "loss": 17.6316, "step": 19156 }, { "epoch": 0.3501745663260643, "grad_norm": 7.152484763408261, "learning_rate": 7.544397758382331e-06, "loss": 17.6217, "step": 19157 }, { "epoch": 0.35019284552251084, "grad_norm": 5.769786048137639, "learning_rate": 7.544142936022342e-06, "loss": 17.3694, "step": 19158 }, { "epoch": 0.3502111247189574, "grad_norm": 5.942327537010234, "learning_rate": 7.543888104745352e-06, "loss": 17.4199, "step": 19159 }, { "epoch": 0.35022940391540386, "grad_norm": 8.713755063742017, "learning_rate": 7.543633264552253e-06, "loss": 18.8002, "step": 19160 }, { "epoch": 0.3502476831118504, "grad_norm": 6.111676468636414, "learning_rate": 7.5433784154439425e-06, "loss": 17.347, "step": 19161 }, { "epoch": 0.35026596230829693, "grad_norm": 5.696435753875319, "learning_rate": 7.5431235574213104e-06, "loss": 17.2735, "step": 19162 }, { "epoch": 0.35028424150474347, "grad_norm": 6.897497797903919, "learning_rate": 7.54286869048525e-06, "loss": 17.7057, "step": 19163 }, { "epoch": 0.35030252070119, "grad_norm": 7.618375172339585, "learning_rate": 7.542613814636655e-06, "loss": 18.0204, "step": 19164 }, { "epoch": 0.3503207998976365, "grad_norm": 6.664557194664785, "learning_rate": 7.542358929876421e-06, "loss": 17.899, "step": 19165 }, { "epoch": 0.350339079094083, "grad_norm": 6.527456188713896, "learning_rate": 7.5421040362054385e-06, "loss": 17.7848, "step": 19166 }, { "epoch": 0.35035735829052955, "grad_norm": 6.481927320564349, "learning_rate": 7.541849133624603e-06, "loss": 17.5153, "step": 19167 }, { "epoch": 0.3503756374869761, "grad_norm": 6.508228343221474, "learning_rate": 7.541594222134807e-06, "loss": 17.4626, "step": 19168 }, { "epoch": 0.3503939166834226, "grad_norm": 6.364981374422483, "learning_rate": 7.541339301736943e-06, "loss": 17.3569, "step": 19169 }, { "epoch": 0.3504121958798691, "grad_norm": 7.083302689962924, "learning_rate": 7.541084372431904e-06, "loss": 17.8332, "step": 19170 }, { "epoch": 0.35043047507631564, "grad_norm": 7.3664542017646895, "learning_rate": 7.5408294342205866e-06, "loss": 17.9209, "step": 19171 }, { "epoch": 0.3504487542727622, "grad_norm": 6.621397043851026, "learning_rate": 7.540574487103882e-06, "loss": 17.7306, "step": 19172 }, { "epoch": 0.3504670334692087, "grad_norm": 6.072460447216706, "learning_rate": 7.540319531082685e-06, "loss": 17.151, "step": 19173 }, { "epoch": 0.3504853126656552, "grad_norm": 6.006454141057217, "learning_rate": 7.540064566157887e-06, "loss": 17.1267, "step": 19174 }, { "epoch": 0.3505035918621017, "grad_norm": 5.747981494818808, "learning_rate": 7.539809592330385e-06, "loss": 17.1149, "step": 19175 }, { "epoch": 0.35052187105854826, "grad_norm": 6.813425080652564, "learning_rate": 7.539554609601069e-06, "loss": 17.6495, "step": 19176 }, { "epoch": 0.3505401502549948, "grad_norm": 7.750637941850223, "learning_rate": 7.539299617970834e-06, "loss": 17.9969, "step": 19177 }, { "epoch": 0.35055842945144133, "grad_norm": 7.154887239422486, "learning_rate": 7.539044617440576e-06, "loss": 17.5975, "step": 19178 }, { "epoch": 0.3505767086478878, "grad_norm": 5.696516250028021, "learning_rate": 7.538789608011185e-06, "loss": 17.2689, "step": 19179 }, { "epoch": 0.35059498784433435, "grad_norm": 6.376758840470761, "learning_rate": 7.53853458968356e-06, "loss": 17.5849, "step": 19180 }, { "epoch": 0.3506132670407809, "grad_norm": 5.269383120275941, "learning_rate": 7.538279562458588e-06, "loss": 17.1573, "step": 19181 }, { "epoch": 0.3506315462372274, "grad_norm": 7.144242085682909, "learning_rate": 7.538024526337169e-06, "loss": 18.1725, "step": 19182 }, { "epoch": 0.35064982543367396, "grad_norm": 6.782469407083449, "learning_rate": 7.537769481320194e-06, "loss": 17.6427, "step": 19183 }, { "epoch": 0.35066810463012044, "grad_norm": 6.493324414981373, "learning_rate": 7.537514427408555e-06, "loss": 17.4828, "step": 19184 }, { "epoch": 0.35068638382656697, "grad_norm": 6.67935934419827, "learning_rate": 7.537259364603149e-06, "loss": 17.6403, "step": 19185 }, { "epoch": 0.3507046630230135, "grad_norm": 6.6564380539045604, "learning_rate": 7.53700429290487e-06, "loss": 17.708, "step": 19186 }, { "epoch": 0.35072294221946004, "grad_norm": 7.4398656587598895, "learning_rate": 7.536749212314611e-06, "loss": 17.5967, "step": 19187 }, { "epoch": 0.3507412214159066, "grad_norm": 7.525312261833701, "learning_rate": 7.5364941228332655e-06, "loss": 17.6407, "step": 19188 }, { "epoch": 0.35075950061235306, "grad_norm": 6.326946912306059, "learning_rate": 7.536239024461729e-06, "loss": 17.5081, "step": 19189 }, { "epoch": 0.3507777798087996, "grad_norm": 6.086980055709366, "learning_rate": 7.535983917200895e-06, "loss": 17.3881, "step": 19190 }, { "epoch": 0.35079605900524613, "grad_norm": 5.687678816216126, "learning_rate": 7.535728801051656e-06, "loss": 17.2807, "step": 19191 }, { "epoch": 0.35081433820169267, "grad_norm": 6.775249668295368, "learning_rate": 7.5354736760149085e-06, "loss": 17.5426, "step": 19192 }, { "epoch": 0.3508326173981392, "grad_norm": 4.997770310487413, "learning_rate": 7.535218542091546e-06, "loss": 16.8495, "step": 19193 }, { "epoch": 0.3508508965945857, "grad_norm": 8.67709245403442, "learning_rate": 7.534963399282462e-06, "loss": 17.9259, "step": 19194 }, { "epoch": 0.3508691757910322, "grad_norm": 7.081679979038991, "learning_rate": 7.5347082475885515e-06, "loss": 17.7819, "step": 19195 }, { "epoch": 0.35088745498747875, "grad_norm": 6.15128860725821, "learning_rate": 7.534453087010709e-06, "loss": 17.4622, "step": 19196 }, { "epoch": 0.3509057341839253, "grad_norm": 9.454014416296994, "learning_rate": 7.534197917549827e-06, "loss": 18.5688, "step": 19197 }, { "epoch": 0.3509240133803718, "grad_norm": 5.653357758714179, "learning_rate": 7.533942739206802e-06, "loss": 17.0181, "step": 19198 }, { "epoch": 0.3509422925768183, "grad_norm": 8.371572714131837, "learning_rate": 7.533687551982529e-06, "loss": 18.4663, "step": 19199 }, { "epoch": 0.35096057177326484, "grad_norm": 5.903768986161092, "learning_rate": 7.533432355877899e-06, "loss": 17.2126, "step": 19200 }, { "epoch": 0.3509788509697114, "grad_norm": 7.8339977387852615, "learning_rate": 7.533177150893809e-06, "loss": 17.8432, "step": 19201 }, { "epoch": 0.3509971301661579, "grad_norm": 5.658744341429674, "learning_rate": 7.532921937031153e-06, "loss": 17.3167, "step": 19202 }, { "epoch": 0.35101540936260445, "grad_norm": 6.03939753372727, "learning_rate": 7.532666714290826e-06, "loss": 16.9948, "step": 19203 }, { "epoch": 0.3510336885590509, "grad_norm": 6.768835174965798, "learning_rate": 7.532411482673721e-06, "loss": 17.5395, "step": 19204 }, { "epoch": 0.35105196775549746, "grad_norm": 5.418941345037018, "learning_rate": 7.532156242180734e-06, "loss": 17.0491, "step": 19205 }, { "epoch": 0.351070246951944, "grad_norm": 6.822958325551641, "learning_rate": 7.531900992812759e-06, "loss": 17.5348, "step": 19206 }, { "epoch": 0.35108852614839053, "grad_norm": 7.108892069534108, "learning_rate": 7.531645734570689e-06, "loss": 17.9473, "step": 19207 }, { "epoch": 0.351106805344837, "grad_norm": 6.974757906404995, "learning_rate": 7.531390467455424e-06, "loss": 18.0174, "step": 19208 }, { "epoch": 0.35112508454128355, "grad_norm": 7.23199396986483, "learning_rate": 7.531135191467852e-06, "loss": 17.9606, "step": 19209 }, { "epoch": 0.3511433637377301, "grad_norm": 6.282276272652066, "learning_rate": 7.530879906608873e-06, "loss": 17.4516, "step": 19210 }, { "epoch": 0.3511616429341766, "grad_norm": 6.7981872820744895, "learning_rate": 7.530624612879378e-06, "loss": 17.8349, "step": 19211 }, { "epoch": 0.35117992213062316, "grad_norm": 7.898551184926161, "learning_rate": 7.5303693102802635e-06, "loss": 17.8937, "step": 19212 }, { "epoch": 0.35119820132706964, "grad_norm": 5.46829284366765, "learning_rate": 7.530113998812425e-06, "loss": 17.1842, "step": 19213 }, { "epoch": 0.3512164805235162, "grad_norm": 6.301265062633721, "learning_rate": 7.529858678476756e-06, "loss": 17.6048, "step": 19214 }, { "epoch": 0.3512347597199627, "grad_norm": 7.420612252023315, "learning_rate": 7.529603349274152e-06, "loss": 17.5682, "step": 19215 }, { "epoch": 0.35125303891640924, "grad_norm": 6.836806153714951, "learning_rate": 7.529348011205506e-06, "loss": 17.6979, "step": 19216 }, { "epoch": 0.3512713181128558, "grad_norm": 7.2037295493918085, "learning_rate": 7.529092664271718e-06, "loss": 17.8771, "step": 19217 }, { "epoch": 0.35128959730930226, "grad_norm": 6.491421191766841, "learning_rate": 7.528837308473678e-06, "loss": 17.7311, "step": 19218 }, { "epoch": 0.3513078765057488, "grad_norm": 6.726707260941049, "learning_rate": 7.5285819438122805e-06, "loss": 17.7142, "step": 19219 }, { "epoch": 0.35132615570219533, "grad_norm": 6.129149885948185, "learning_rate": 7.528326570288427e-06, "loss": 17.4959, "step": 19220 }, { "epoch": 0.35134443489864187, "grad_norm": 5.846540043871445, "learning_rate": 7.5280711879030055e-06, "loss": 17.2388, "step": 19221 }, { "epoch": 0.3513627140950884, "grad_norm": 8.91938466865475, "learning_rate": 7.527815796656914e-06, "loss": 18.7418, "step": 19222 }, { "epoch": 0.3513809932915349, "grad_norm": 8.019711184974327, "learning_rate": 7.527560396551048e-06, "loss": 18.2656, "step": 19223 }, { "epoch": 0.3513992724879814, "grad_norm": 6.768107675353159, "learning_rate": 7.527304987586301e-06, "loss": 17.799, "step": 19224 }, { "epoch": 0.35141755168442795, "grad_norm": 5.558738259085692, "learning_rate": 7.527049569763571e-06, "loss": 17.122, "step": 19225 }, { "epoch": 0.3514358308808745, "grad_norm": 7.115017050290701, "learning_rate": 7.526794143083751e-06, "loss": 17.7497, "step": 19226 }, { "epoch": 0.351454110077321, "grad_norm": 6.686023231461101, "learning_rate": 7.526538707547737e-06, "loss": 17.4914, "step": 19227 }, { "epoch": 0.3514723892737675, "grad_norm": 7.005102727356235, "learning_rate": 7.526283263156424e-06, "loss": 17.7012, "step": 19228 }, { "epoch": 0.35149066847021404, "grad_norm": 6.39245729381603, "learning_rate": 7.526027809910706e-06, "loss": 17.5251, "step": 19229 }, { "epoch": 0.3515089476666606, "grad_norm": 6.653919114430062, "learning_rate": 7.525772347811482e-06, "loss": 17.686, "step": 19230 }, { "epoch": 0.3515272268631071, "grad_norm": 6.929301038300203, "learning_rate": 7.5255168768596444e-06, "loss": 17.8971, "step": 19231 }, { "epoch": 0.35154550605955365, "grad_norm": 5.498475126930915, "learning_rate": 7.52526139705609e-06, "loss": 17.2416, "step": 19232 }, { "epoch": 0.3515637852560001, "grad_norm": 6.282802389198806, "learning_rate": 7.525005908401711e-06, "loss": 17.5791, "step": 19233 }, { "epoch": 0.35158206445244666, "grad_norm": 6.470919523886569, "learning_rate": 7.5247504108974075e-06, "loss": 17.6202, "step": 19234 }, { "epoch": 0.3516003436488932, "grad_norm": 5.620344683425913, "learning_rate": 7.524494904544072e-06, "loss": 17.3058, "step": 19235 }, { "epoch": 0.35161862284533973, "grad_norm": 5.405100648432202, "learning_rate": 7.524239389342601e-06, "loss": 17.2389, "step": 19236 }, { "epoch": 0.35163690204178627, "grad_norm": 6.056074402243854, "learning_rate": 7.523983865293891e-06, "loss": 17.3652, "step": 19237 }, { "epoch": 0.35165518123823275, "grad_norm": 6.845948434940423, "learning_rate": 7.523728332398834e-06, "loss": 17.7525, "step": 19238 }, { "epoch": 0.3516734604346793, "grad_norm": 5.678162363390758, "learning_rate": 7.523472790658331e-06, "loss": 17.2832, "step": 19239 }, { "epoch": 0.3516917396311258, "grad_norm": 6.276029721847072, "learning_rate": 7.523217240073273e-06, "loss": 17.2178, "step": 19240 }, { "epoch": 0.35171001882757236, "grad_norm": 6.771556626423765, "learning_rate": 7.52296168064456e-06, "loss": 17.5713, "step": 19241 }, { "epoch": 0.35172829802401884, "grad_norm": 6.9140068797528595, "learning_rate": 7.522706112373083e-06, "loss": 17.5612, "step": 19242 }, { "epoch": 0.3517465772204654, "grad_norm": 5.268949832379009, "learning_rate": 7.5224505352597395e-06, "loss": 17.1115, "step": 19243 }, { "epoch": 0.3517648564169119, "grad_norm": 8.420649134444037, "learning_rate": 7.522194949305428e-06, "loss": 17.0724, "step": 19244 }, { "epoch": 0.35178313561335844, "grad_norm": 6.6024701703151845, "learning_rate": 7.5219393545110406e-06, "loss": 17.6513, "step": 19245 }, { "epoch": 0.351801414809805, "grad_norm": 5.5397313205401115, "learning_rate": 7.521683750877475e-06, "loss": 17.143, "step": 19246 }, { "epoch": 0.35181969400625146, "grad_norm": 7.0635908472449245, "learning_rate": 7.521428138405626e-06, "loss": 17.8985, "step": 19247 }, { "epoch": 0.351837973202698, "grad_norm": 6.690937779179455, "learning_rate": 7.521172517096391e-06, "loss": 17.5753, "step": 19248 }, { "epoch": 0.35185625239914453, "grad_norm": 5.162150717931989, "learning_rate": 7.520916886950664e-06, "loss": 16.9441, "step": 19249 }, { "epoch": 0.35187453159559107, "grad_norm": 7.542734166513863, "learning_rate": 7.520661247969343e-06, "loss": 17.9402, "step": 19250 }, { "epoch": 0.3518928107920376, "grad_norm": 6.2321172952593455, "learning_rate": 7.520405600153324e-06, "loss": 17.4276, "step": 19251 }, { "epoch": 0.3519110899884841, "grad_norm": 6.189843951485729, "learning_rate": 7.520149943503501e-06, "loss": 17.4252, "step": 19252 }, { "epoch": 0.3519293691849306, "grad_norm": 6.775092305787856, "learning_rate": 7.5198942780207705e-06, "loss": 17.6616, "step": 19253 }, { "epoch": 0.35194764838137715, "grad_norm": 5.865178419230509, "learning_rate": 7.519638603706029e-06, "loss": 17.066, "step": 19254 }, { "epoch": 0.3519659275778237, "grad_norm": 7.121064231289921, "learning_rate": 7.519382920560175e-06, "loss": 17.9656, "step": 19255 }, { "epoch": 0.3519842067742702, "grad_norm": 7.603032854869226, "learning_rate": 7.519127228584101e-06, "loss": 17.9382, "step": 19256 }, { "epoch": 0.3520024859707167, "grad_norm": 6.441960309050005, "learning_rate": 7.5188715277787035e-06, "loss": 18.0542, "step": 19257 }, { "epoch": 0.35202076516716324, "grad_norm": 6.6675001435748245, "learning_rate": 7.518615818144883e-06, "loss": 17.1855, "step": 19258 }, { "epoch": 0.3520390443636098, "grad_norm": 7.671842857573003, "learning_rate": 7.51836009968353e-06, "loss": 17.9289, "step": 19259 }, { "epoch": 0.3520573235600563, "grad_norm": 5.406344839963338, "learning_rate": 7.518104372395545e-06, "loss": 17.2156, "step": 19260 }, { "epoch": 0.35207560275650285, "grad_norm": 7.403033251888348, "learning_rate": 7.5178486362818215e-06, "loss": 18.0308, "step": 19261 }, { "epoch": 0.35209388195294933, "grad_norm": 7.1430028300022945, "learning_rate": 7.517592891343258e-06, "loss": 17.698, "step": 19262 }, { "epoch": 0.35211216114939586, "grad_norm": 9.051370413187286, "learning_rate": 7.517337137580749e-06, "loss": 18.4366, "step": 19263 }, { "epoch": 0.3521304403458424, "grad_norm": 6.552307942762419, "learning_rate": 7.517081374995192e-06, "loss": 17.4133, "step": 19264 }, { "epoch": 0.35214871954228893, "grad_norm": 6.255774953362444, "learning_rate": 7.516825603587483e-06, "loss": 17.5453, "step": 19265 }, { "epoch": 0.35216699873873547, "grad_norm": 6.4441058435343725, "learning_rate": 7.516569823358519e-06, "loss": 17.49, "step": 19266 }, { "epoch": 0.35218527793518195, "grad_norm": 7.242224969692995, "learning_rate": 7.516314034309197e-06, "loss": 18.3363, "step": 19267 }, { "epoch": 0.3522035571316285, "grad_norm": 6.158568205970019, "learning_rate": 7.516058236440412e-06, "loss": 17.394, "step": 19268 }, { "epoch": 0.352221836328075, "grad_norm": 7.229432679066626, "learning_rate": 7.515802429753061e-06, "loss": 17.947, "step": 19269 }, { "epoch": 0.35224011552452156, "grad_norm": 6.939608841008816, "learning_rate": 7.515546614248039e-06, "loss": 17.6991, "step": 19270 }, { "epoch": 0.3522583947209681, "grad_norm": 5.790529395723205, "learning_rate": 7.515290789926248e-06, "loss": 17.3236, "step": 19271 }, { "epoch": 0.3522766739174146, "grad_norm": 5.226285675220542, "learning_rate": 7.515034956788579e-06, "loss": 17.0536, "step": 19272 }, { "epoch": 0.3522949531138611, "grad_norm": 6.510252246174526, "learning_rate": 7.514779114835931e-06, "loss": 17.729, "step": 19273 }, { "epoch": 0.35231323231030764, "grad_norm": 6.164202259826949, "learning_rate": 7.5145232640692e-06, "loss": 17.4468, "step": 19274 }, { "epoch": 0.3523315115067542, "grad_norm": 5.663787925761229, "learning_rate": 7.514267404489284e-06, "loss": 17.3572, "step": 19275 }, { "epoch": 0.35234979070320066, "grad_norm": 6.53771164186853, "learning_rate": 7.514011536097079e-06, "loss": 17.3409, "step": 19276 }, { "epoch": 0.3523680698996472, "grad_norm": 5.923042661465154, "learning_rate": 7.51375565889348e-06, "loss": 17.332, "step": 19277 }, { "epoch": 0.35238634909609373, "grad_norm": 6.503639029233199, "learning_rate": 7.513499772879387e-06, "loss": 17.5271, "step": 19278 }, { "epoch": 0.35240462829254027, "grad_norm": 6.303507979073592, "learning_rate": 7.513243878055696e-06, "loss": 17.5725, "step": 19279 }, { "epoch": 0.3524229074889868, "grad_norm": 6.265970875824671, "learning_rate": 7.512987974423303e-06, "loss": 17.4912, "step": 19280 }, { "epoch": 0.3524411866854333, "grad_norm": 5.95214388881223, "learning_rate": 7.512732061983103e-06, "loss": 17.37, "step": 19281 }, { "epoch": 0.3524594658818798, "grad_norm": 6.7964495082100855, "learning_rate": 7.512476140735998e-06, "loss": 17.4483, "step": 19282 }, { "epoch": 0.35247774507832635, "grad_norm": 5.8637732923041135, "learning_rate": 7.51222021068288e-06, "loss": 17.5728, "step": 19283 }, { "epoch": 0.3524960242747729, "grad_norm": 16.598796165901515, "learning_rate": 7.51196427182465e-06, "loss": 17.1121, "step": 19284 }, { "epoch": 0.3525143034712194, "grad_norm": 20.40456920183395, "learning_rate": 7.5117083241622014e-06, "loss": 17.6317, "step": 19285 }, { "epoch": 0.3525325826676659, "grad_norm": 29.364647554813363, "learning_rate": 7.511452367696434e-06, "loss": 17.4675, "step": 19286 }, { "epoch": 0.35255086186411244, "grad_norm": 5.421552533264433, "learning_rate": 7.511196402428244e-06, "loss": 17.0389, "step": 19287 }, { "epoch": 0.352569141060559, "grad_norm": 6.622874636689023, "learning_rate": 7.510940428358529e-06, "loss": 17.6747, "step": 19288 }, { "epoch": 0.3525874202570055, "grad_norm": 6.4841809629340625, "learning_rate": 7.510684445488186e-06, "loss": 17.6812, "step": 19289 }, { "epoch": 0.35260569945345205, "grad_norm": 6.109972943276097, "learning_rate": 7.51042845381811e-06, "loss": 17.2907, "step": 19290 }, { "epoch": 0.35262397864989853, "grad_norm": 5.171756038966166, "learning_rate": 7.5101724533492025e-06, "loss": 16.9398, "step": 19291 }, { "epoch": 0.35264225784634506, "grad_norm": 6.6507230063221625, "learning_rate": 7.509916444082357e-06, "loss": 17.5161, "step": 19292 }, { "epoch": 0.3526605370427916, "grad_norm": 5.773007882989047, "learning_rate": 7.509660426018473e-06, "loss": 17.1385, "step": 19293 }, { "epoch": 0.35267881623923814, "grad_norm": 8.749319481203136, "learning_rate": 7.509404399158445e-06, "loss": 18.1071, "step": 19294 }, { "epoch": 0.35269709543568467, "grad_norm": 8.197171997068704, "learning_rate": 7.509148363503174e-06, "loss": 18.0026, "step": 19295 }, { "epoch": 0.35271537463213115, "grad_norm": 12.50595201301948, "learning_rate": 7.508892319053555e-06, "loss": 17.443, "step": 19296 }, { "epoch": 0.3527336538285777, "grad_norm": 6.516132376949015, "learning_rate": 7.508636265810486e-06, "loss": 17.3995, "step": 19297 }, { "epoch": 0.3527519330250242, "grad_norm": 6.613174558109078, "learning_rate": 7.508380203774865e-06, "loss": 17.5949, "step": 19298 }, { "epoch": 0.35277021222147076, "grad_norm": 7.868352150229157, "learning_rate": 7.508124132947589e-06, "loss": 17.9619, "step": 19299 }, { "epoch": 0.3527884914179173, "grad_norm": 7.004765055019461, "learning_rate": 7.507868053329557e-06, "loss": 17.6702, "step": 19300 }, { "epoch": 0.3528067706143638, "grad_norm": 5.577969945626082, "learning_rate": 7.507611964921664e-06, "loss": 17.0379, "step": 19301 }, { "epoch": 0.3528250498108103, "grad_norm": 5.206648365877945, "learning_rate": 7.507355867724807e-06, "loss": 16.8953, "step": 19302 }, { "epoch": 0.35284332900725685, "grad_norm": 7.115165103580579, "learning_rate": 7.5070997617398875e-06, "loss": 17.6558, "step": 19303 }, { "epoch": 0.3528616082037034, "grad_norm": 8.738741552797054, "learning_rate": 7.5068436469678e-06, "loss": 17.2167, "step": 19304 }, { "epoch": 0.3528798874001499, "grad_norm": 6.129962169898521, "learning_rate": 7.506587523409443e-06, "loss": 17.1931, "step": 19305 }, { "epoch": 0.3528981665965964, "grad_norm": 7.6024967870376905, "learning_rate": 7.506331391065714e-06, "loss": 17.9997, "step": 19306 }, { "epoch": 0.35291644579304293, "grad_norm": 7.450007025171821, "learning_rate": 7.5060752499375125e-06, "loss": 17.8514, "step": 19307 }, { "epoch": 0.35293472498948947, "grad_norm": 8.420767894245751, "learning_rate": 7.505819100025733e-06, "loss": 18.2729, "step": 19308 }, { "epoch": 0.352953004185936, "grad_norm": 6.280470968026568, "learning_rate": 7.5055629413312745e-06, "loss": 17.4036, "step": 19309 }, { "epoch": 0.3529712833823825, "grad_norm": 7.887634315817519, "learning_rate": 7.505306773855036e-06, "loss": 18.1748, "step": 19310 }, { "epoch": 0.352989562578829, "grad_norm": 6.721258238027715, "learning_rate": 7.505050597597916e-06, "loss": 17.559, "step": 19311 }, { "epoch": 0.35300784177527555, "grad_norm": 7.314230183760376, "learning_rate": 7.50479441256081e-06, "loss": 17.8109, "step": 19312 }, { "epoch": 0.3530261209717221, "grad_norm": 6.0105027388851155, "learning_rate": 7.504538218744617e-06, "loss": 17.4281, "step": 19313 }, { "epoch": 0.3530444001681686, "grad_norm": 6.292850551068649, "learning_rate": 7.504282016150235e-06, "loss": 17.6397, "step": 19314 }, { "epoch": 0.3530626793646151, "grad_norm": 7.011075822188665, "learning_rate": 7.504025804778561e-06, "loss": 17.8955, "step": 19315 }, { "epoch": 0.35308095856106164, "grad_norm": 6.904144514818356, "learning_rate": 7.503769584630495e-06, "loss": 17.6697, "step": 19316 }, { "epoch": 0.3530992377575082, "grad_norm": 7.152090942929883, "learning_rate": 7.503513355706934e-06, "loss": 18.0321, "step": 19317 }, { "epoch": 0.3531175169539547, "grad_norm": 6.4690979081874485, "learning_rate": 7.5032571180087756e-06, "loss": 17.5206, "step": 19318 }, { "epoch": 0.35313579615040125, "grad_norm": 6.742357135324228, "learning_rate": 7.5030008715369175e-06, "loss": 17.6227, "step": 19319 }, { "epoch": 0.35315407534684773, "grad_norm": 6.9498528970034394, "learning_rate": 7.502744616292259e-06, "loss": 17.718, "step": 19320 }, { "epoch": 0.35317235454329426, "grad_norm": 5.8268840292535256, "learning_rate": 7.502488352275697e-06, "loss": 17.4104, "step": 19321 }, { "epoch": 0.3531906337397408, "grad_norm": 7.403693368819702, "learning_rate": 7.502232079488132e-06, "loss": 17.4577, "step": 19322 }, { "epoch": 0.35320891293618734, "grad_norm": 6.124948314523103, "learning_rate": 7.5019757979304594e-06, "loss": 17.3621, "step": 19323 }, { "epoch": 0.35322719213263387, "grad_norm": 5.397086258924638, "learning_rate": 7.50171950760358e-06, "loss": 17.0504, "step": 19324 }, { "epoch": 0.35324547132908035, "grad_norm": 6.130934468575067, "learning_rate": 7.5014632085083905e-06, "loss": 17.3744, "step": 19325 }, { "epoch": 0.3532637505255269, "grad_norm": 5.84133944166528, "learning_rate": 7.5012069006457876e-06, "loss": 17.518, "step": 19326 }, { "epoch": 0.3532820297219734, "grad_norm": 6.435103065004983, "learning_rate": 7.500950584016675e-06, "loss": 17.6368, "step": 19327 }, { "epoch": 0.35330030891841996, "grad_norm": 6.361098036967545, "learning_rate": 7.500694258621946e-06, "loss": 17.4724, "step": 19328 }, { "epoch": 0.3533185881148665, "grad_norm": 6.2022596023931325, "learning_rate": 7.500437924462498e-06, "loss": 17.6998, "step": 19329 }, { "epoch": 0.353336867311313, "grad_norm": 5.95565398908481, "learning_rate": 7.500181581539236e-06, "loss": 17.5403, "step": 19330 }, { "epoch": 0.3533551465077595, "grad_norm": 6.4444870534955765, "learning_rate": 7.4999252298530536e-06, "loss": 17.6668, "step": 19331 }, { "epoch": 0.35337342570420605, "grad_norm": 7.226446417006764, "learning_rate": 7.4996688694048496e-06, "loss": 17.999, "step": 19332 }, { "epoch": 0.3533917049006526, "grad_norm": 5.130752996598166, "learning_rate": 7.499412500195522e-06, "loss": 16.8309, "step": 19333 }, { "epoch": 0.3534099840970991, "grad_norm": 6.091716027701422, "learning_rate": 7.499156122225972e-06, "loss": 17.1431, "step": 19334 }, { "epoch": 0.3534282632935456, "grad_norm": 5.577210815893012, "learning_rate": 7.498899735497096e-06, "loss": 17.2686, "step": 19335 }, { "epoch": 0.35344654248999213, "grad_norm": 5.420874767043154, "learning_rate": 7.498643340009793e-06, "loss": 17.2443, "step": 19336 }, { "epoch": 0.35346482168643867, "grad_norm": 7.005754200492961, "learning_rate": 7.498386935764964e-06, "loss": 17.7823, "step": 19337 }, { "epoch": 0.3534831008828852, "grad_norm": 7.014225587626757, "learning_rate": 7.498130522763503e-06, "loss": 17.7229, "step": 19338 }, { "epoch": 0.35350138007933174, "grad_norm": 6.815981578344918, "learning_rate": 7.497874101006312e-06, "loss": 17.8219, "step": 19339 }, { "epoch": 0.3535196592757782, "grad_norm": 6.287199660882772, "learning_rate": 7.497617670494289e-06, "loss": 17.5265, "step": 19340 }, { "epoch": 0.35353793847222476, "grad_norm": 7.457545237236673, "learning_rate": 7.497361231228334e-06, "loss": 18.0378, "step": 19341 }, { "epoch": 0.3535562176686713, "grad_norm": 7.807867387794018, "learning_rate": 7.497104783209343e-06, "loss": 18.1411, "step": 19342 }, { "epoch": 0.3535744968651178, "grad_norm": 5.939464419722728, "learning_rate": 7.496848326438218e-06, "loss": 17.237, "step": 19343 }, { "epoch": 0.3535927760615643, "grad_norm": 5.94933347974999, "learning_rate": 7.496591860915855e-06, "loss": 17.6885, "step": 19344 }, { "epoch": 0.35361105525801084, "grad_norm": 7.62622439160612, "learning_rate": 7.496335386643155e-06, "loss": 18.0454, "step": 19345 }, { "epoch": 0.3536293344544574, "grad_norm": 7.654420929465917, "learning_rate": 7.496078903621016e-06, "loss": 17.9037, "step": 19346 }, { "epoch": 0.3536476136509039, "grad_norm": 6.9677134942444, "learning_rate": 7.495822411850335e-06, "loss": 17.8292, "step": 19347 }, { "epoch": 0.35366589284735045, "grad_norm": 7.397537831439291, "learning_rate": 7.495565911332015e-06, "loss": 18.5911, "step": 19348 }, { "epoch": 0.35368417204379693, "grad_norm": 6.928067092409122, "learning_rate": 7.495309402066954e-06, "loss": 17.9352, "step": 19349 }, { "epoch": 0.35370245124024347, "grad_norm": 6.730765795853856, "learning_rate": 7.495052884056048e-06, "loss": 17.6145, "step": 19350 }, { "epoch": 0.35372073043669, "grad_norm": 8.10738099050842, "learning_rate": 7.4947963573001995e-06, "loss": 17.8017, "step": 19351 }, { "epoch": 0.35373900963313654, "grad_norm": 7.93727278428228, "learning_rate": 7.494539821800305e-06, "loss": 17.7669, "step": 19352 }, { "epoch": 0.35375728882958307, "grad_norm": 7.339848526157841, "learning_rate": 7.494283277557266e-06, "loss": 17.6792, "step": 19353 }, { "epoch": 0.35377556802602955, "grad_norm": 7.201721940676704, "learning_rate": 7.49402672457198e-06, "loss": 17.8781, "step": 19354 }, { "epoch": 0.3537938472224761, "grad_norm": 6.697310098952394, "learning_rate": 7.493770162845348e-06, "loss": 17.5622, "step": 19355 }, { "epoch": 0.3538121264189226, "grad_norm": 8.084516537100795, "learning_rate": 7.493513592378265e-06, "loss": 18.0973, "step": 19356 }, { "epoch": 0.35383040561536916, "grad_norm": 5.1961317299465675, "learning_rate": 7.493257013171636e-06, "loss": 17.1162, "step": 19357 }, { "epoch": 0.3538486848118157, "grad_norm": 6.731553770253061, "learning_rate": 7.493000425226358e-06, "loss": 17.816, "step": 19358 }, { "epoch": 0.3538669640082622, "grad_norm": 6.091521486875263, "learning_rate": 7.492743828543327e-06, "loss": 17.4561, "step": 19359 }, { "epoch": 0.3538852432047087, "grad_norm": 6.8667630201488725, "learning_rate": 7.492487223123448e-06, "loss": 17.7971, "step": 19360 }, { "epoch": 0.35390352240115525, "grad_norm": 6.19592515380013, "learning_rate": 7.492230608967614e-06, "loss": 17.2811, "step": 19361 }, { "epoch": 0.3539218015976018, "grad_norm": 5.807384444053767, "learning_rate": 7.491973986076733e-06, "loss": 17.3417, "step": 19362 }, { "epoch": 0.3539400807940483, "grad_norm": 7.155386757335756, "learning_rate": 7.491717354451695e-06, "loss": 17.7106, "step": 19363 }, { "epoch": 0.3539583599904948, "grad_norm": 6.538612560506778, "learning_rate": 7.491460714093406e-06, "loss": 17.2755, "step": 19364 }, { "epoch": 0.35397663918694133, "grad_norm": 6.458033663280672, "learning_rate": 7.491204065002763e-06, "loss": 17.4271, "step": 19365 }, { "epoch": 0.35399491838338787, "grad_norm": 5.739066204118389, "learning_rate": 7.4909474071806665e-06, "loss": 17.1759, "step": 19366 }, { "epoch": 0.3540131975798344, "grad_norm": 6.692072568599566, "learning_rate": 7.490690740628015e-06, "loss": 17.6206, "step": 19367 }, { "epoch": 0.35403147677628094, "grad_norm": 8.267619576579627, "learning_rate": 7.4904340653457086e-06, "loss": 18.2098, "step": 19368 }, { "epoch": 0.3540497559727274, "grad_norm": 7.1204361353633825, "learning_rate": 7.4901773813346465e-06, "loss": 17.9392, "step": 19369 }, { "epoch": 0.35406803516917396, "grad_norm": 6.691643134926981, "learning_rate": 7.489920688595729e-06, "loss": 17.7207, "step": 19370 }, { "epoch": 0.3540863143656205, "grad_norm": 6.25705543192282, "learning_rate": 7.489663987129855e-06, "loss": 17.4774, "step": 19371 }, { "epoch": 0.354104593562067, "grad_norm": 7.4853794775293885, "learning_rate": 7.489407276937927e-06, "loss": 18.0386, "step": 19372 }, { "epoch": 0.35412287275851356, "grad_norm": 7.476864122292723, "learning_rate": 7.48915055802084e-06, "loss": 18.1652, "step": 19373 }, { "epoch": 0.35414115195496004, "grad_norm": 9.351362439238002, "learning_rate": 7.488893830379498e-06, "loss": 18.3186, "step": 19374 }, { "epoch": 0.3541594311514066, "grad_norm": 8.284016530529174, "learning_rate": 7.4886370940147975e-06, "loss": 18.4558, "step": 19375 }, { "epoch": 0.3541777103478531, "grad_norm": 4.6921299243565455, "learning_rate": 7.4883803489276404e-06, "loss": 16.9383, "step": 19376 }, { "epoch": 0.35419598954429965, "grad_norm": 5.42530176855745, "learning_rate": 7.4881235951189265e-06, "loss": 17.0558, "step": 19377 }, { "epoch": 0.35421426874074613, "grad_norm": 7.705040512252523, "learning_rate": 7.487866832589555e-06, "loss": 17.8432, "step": 19378 }, { "epoch": 0.35423254793719267, "grad_norm": 6.561441805437232, "learning_rate": 7.487610061340427e-06, "loss": 17.3472, "step": 19379 }, { "epoch": 0.3542508271336392, "grad_norm": 5.070628575814306, "learning_rate": 7.48735328137244e-06, "loss": 16.7991, "step": 19380 }, { "epoch": 0.35426910633008574, "grad_norm": 5.672514503446029, "learning_rate": 7.487096492686498e-06, "loss": 16.9956, "step": 19381 }, { "epoch": 0.3542873855265323, "grad_norm": 6.737898024418183, "learning_rate": 7.486839695283497e-06, "loss": 17.5229, "step": 19382 }, { "epoch": 0.35430566472297875, "grad_norm": 6.182176476803175, "learning_rate": 7.486582889164338e-06, "loss": 17.5448, "step": 19383 }, { "epoch": 0.3543239439194253, "grad_norm": 8.33687576037172, "learning_rate": 7.486326074329923e-06, "loss": 18.6737, "step": 19384 }, { "epoch": 0.3543422231158718, "grad_norm": 5.595973665793096, "learning_rate": 7.48606925078115e-06, "loss": 17.1997, "step": 19385 }, { "epoch": 0.35436050231231836, "grad_norm": 5.926973625388309, "learning_rate": 7.4858124185189215e-06, "loss": 17.1603, "step": 19386 }, { "epoch": 0.3543787815087649, "grad_norm": 5.435840466787997, "learning_rate": 7.485555577544136e-06, "loss": 17.1007, "step": 19387 }, { "epoch": 0.3543970607052114, "grad_norm": 7.000065925848345, "learning_rate": 7.4852987278576915e-06, "loss": 17.7634, "step": 19388 }, { "epoch": 0.3544153399016579, "grad_norm": 6.415895711266425, "learning_rate": 7.485041869460493e-06, "loss": 17.4346, "step": 19389 }, { "epoch": 0.35443361909810445, "grad_norm": 6.9250974739440645, "learning_rate": 7.4847850023534375e-06, "loss": 17.7694, "step": 19390 }, { "epoch": 0.354451898294551, "grad_norm": 6.543538726229283, "learning_rate": 7.484528126537426e-06, "loss": 17.6611, "step": 19391 }, { "epoch": 0.3544701774909975, "grad_norm": 5.240844858653111, "learning_rate": 7.484271242013359e-06, "loss": 16.8667, "step": 19392 }, { "epoch": 0.354488456687444, "grad_norm": 6.863256791947771, "learning_rate": 7.484014348782138e-06, "loss": 17.4832, "step": 19393 }, { "epoch": 0.35450673588389053, "grad_norm": 6.642713304065364, "learning_rate": 7.483757446844661e-06, "loss": 17.7014, "step": 19394 }, { "epoch": 0.35452501508033707, "grad_norm": 6.849159659362434, "learning_rate": 7.48350053620183e-06, "loss": 17.4561, "step": 19395 }, { "epoch": 0.3545432942767836, "grad_norm": 5.535302093579434, "learning_rate": 7.4832436168545466e-06, "loss": 17.1929, "step": 19396 }, { "epoch": 0.35456157347323014, "grad_norm": 6.1383064259059, "learning_rate": 7.4829866888037065e-06, "loss": 17.3695, "step": 19397 }, { "epoch": 0.3545798526696766, "grad_norm": 6.16842294986674, "learning_rate": 7.482729752050215e-06, "loss": 17.5157, "step": 19398 }, { "epoch": 0.35459813186612316, "grad_norm": 6.435334878102475, "learning_rate": 7.4824728065949735e-06, "loss": 17.6294, "step": 19399 }, { "epoch": 0.3546164110625697, "grad_norm": 7.209909555953054, "learning_rate": 7.482215852438878e-06, "loss": 17.7161, "step": 19400 }, { "epoch": 0.3546346902590162, "grad_norm": 5.889956488909761, "learning_rate": 7.481958889582832e-06, "loss": 17.2855, "step": 19401 }, { "epoch": 0.35465296945546276, "grad_norm": 7.656774016906944, "learning_rate": 7.481701918027734e-06, "loss": 18.104, "step": 19402 }, { "epoch": 0.35467124865190924, "grad_norm": 8.024126407579462, "learning_rate": 7.481444937774488e-06, "loss": 18.0196, "step": 19403 }, { "epoch": 0.3546895278483558, "grad_norm": 7.285299984162822, "learning_rate": 7.4811879488239915e-06, "loss": 17.7149, "step": 19404 }, { "epoch": 0.3547078070448023, "grad_norm": 6.37127872968481, "learning_rate": 7.480930951177148e-06, "loss": 17.4418, "step": 19405 }, { "epoch": 0.35472608624124885, "grad_norm": 7.470571138470041, "learning_rate": 7.480673944834856e-06, "loss": 17.9877, "step": 19406 }, { "epoch": 0.3547443654376954, "grad_norm": 8.185439832267216, "learning_rate": 7.480416929798016e-06, "loss": 17.9194, "step": 19407 }, { "epoch": 0.35476264463414187, "grad_norm": 5.043994100916689, "learning_rate": 7.480159906067531e-06, "loss": 17.0113, "step": 19408 }, { "epoch": 0.3547809238305884, "grad_norm": 6.712227558010779, "learning_rate": 7.479902873644301e-06, "loss": 17.8045, "step": 19409 }, { "epoch": 0.35479920302703494, "grad_norm": 7.727220735557964, "learning_rate": 7.479645832529225e-06, "loss": 18.0885, "step": 19410 }, { "epoch": 0.3548174822234815, "grad_norm": 7.878353648153295, "learning_rate": 7.479388782723208e-06, "loss": 17.9703, "step": 19411 }, { "epoch": 0.35483576141992795, "grad_norm": 7.830284306740343, "learning_rate": 7.479131724227147e-06, "loss": 17.9463, "step": 19412 }, { "epoch": 0.3548540406163745, "grad_norm": 6.335659283153983, "learning_rate": 7.4788746570419454e-06, "loss": 17.2787, "step": 19413 }, { "epoch": 0.354872319812821, "grad_norm": 6.30472829977409, "learning_rate": 7.478617581168503e-06, "loss": 17.2966, "step": 19414 }, { "epoch": 0.35489059900926756, "grad_norm": 6.574696312691399, "learning_rate": 7.478360496607719e-06, "loss": 17.8703, "step": 19415 }, { "epoch": 0.3549088782057141, "grad_norm": 6.392912622945608, "learning_rate": 7.478103403360498e-06, "loss": 17.1885, "step": 19416 }, { "epoch": 0.3549271574021606, "grad_norm": 6.15821031640931, "learning_rate": 7.477846301427741e-06, "loss": 17.4841, "step": 19417 }, { "epoch": 0.3549454365986071, "grad_norm": 5.8032289235700905, "learning_rate": 7.4775891908103456e-06, "loss": 17.2369, "step": 19418 }, { "epoch": 0.35496371579505365, "grad_norm": 7.8978308302965, "learning_rate": 7.477332071509217e-06, "loss": 18.2384, "step": 19419 }, { "epoch": 0.3549819949915002, "grad_norm": 7.281175734484118, "learning_rate": 7.477074943525253e-06, "loss": 17.8954, "step": 19420 }, { "epoch": 0.3550002741879467, "grad_norm": 6.277923838885113, "learning_rate": 7.476817806859357e-06, "loss": 17.3499, "step": 19421 }, { "epoch": 0.3550185533843932, "grad_norm": 7.200947809816378, "learning_rate": 7.476560661512429e-06, "loss": 17.6936, "step": 19422 }, { "epoch": 0.35503683258083973, "grad_norm": 7.9194955302842756, "learning_rate": 7.476303507485371e-06, "loss": 17.7159, "step": 19423 }, { "epoch": 0.35505511177728627, "grad_norm": 5.766785049922964, "learning_rate": 7.4760463447790844e-06, "loss": 17.3214, "step": 19424 }, { "epoch": 0.3550733909737328, "grad_norm": 7.358756659132915, "learning_rate": 7.47578917339447e-06, "loss": 18.048, "step": 19425 }, { "epoch": 0.35509167017017934, "grad_norm": 7.4608818189334, "learning_rate": 7.475531993332429e-06, "loss": 17.7924, "step": 19426 }, { "epoch": 0.3551099493666258, "grad_norm": 6.007732917341111, "learning_rate": 7.475274804593864e-06, "loss": 17.2599, "step": 19427 }, { "epoch": 0.35512822856307236, "grad_norm": 7.804206020385914, "learning_rate": 7.475017607179676e-06, "loss": 18.2085, "step": 19428 }, { "epoch": 0.3551465077595189, "grad_norm": 6.8839181908048666, "learning_rate": 7.474760401090764e-06, "loss": 17.9312, "step": 19429 }, { "epoch": 0.35516478695596543, "grad_norm": 7.304411432808902, "learning_rate": 7.474503186328033e-06, "loss": 17.8743, "step": 19430 }, { "epoch": 0.35518306615241196, "grad_norm": 7.846221445858971, "learning_rate": 7.474245962892382e-06, "loss": 18.1887, "step": 19431 }, { "epoch": 0.35520134534885844, "grad_norm": 6.601758236885788, "learning_rate": 7.4739887307847145e-06, "loss": 17.3935, "step": 19432 }, { "epoch": 0.355219624545305, "grad_norm": 5.638614361581859, "learning_rate": 7.473731490005931e-06, "loss": 17.2594, "step": 19433 }, { "epoch": 0.3552379037417515, "grad_norm": 8.1558423597002, "learning_rate": 7.473474240556934e-06, "loss": 18.1723, "step": 19434 }, { "epoch": 0.35525618293819805, "grad_norm": 6.212145101632636, "learning_rate": 7.473216982438624e-06, "loss": 17.3857, "step": 19435 }, { "epoch": 0.3552744621346446, "grad_norm": 6.131519220006103, "learning_rate": 7.472959715651902e-06, "loss": 17.4338, "step": 19436 }, { "epoch": 0.35529274133109107, "grad_norm": 5.871994721306829, "learning_rate": 7.472702440197672e-06, "loss": 17.0883, "step": 19437 }, { "epoch": 0.3553110205275376, "grad_norm": 6.403552947923738, "learning_rate": 7.472445156076834e-06, "loss": 17.5859, "step": 19438 }, { "epoch": 0.35532929972398414, "grad_norm": 8.193769561087173, "learning_rate": 7.47218786329029e-06, "loss": 18.3522, "step": 19439 }, { "epoch": 0.3553475789204307, "grad_norm": 5.705550408018775, "learning_rate": 7.471930561838943e-06, "loss": 17.3153, "step": 19440 }, { "epoch": 0.3553658581168772, "grad_norm": 5.943444147922853, "learning_rate": 7.471673251723694e-06, "loss": 17.3869, "step": 19441 }, { "epoch": 0.3553841373133237, "grad_norm": 6.622891918132139, "learning_rate": 7.471415932945443e-06, "loss": 17.8957, "step": 19442 }, { "epoch": 0.3554024165097702, "grad_norm": 7.742108703211141, "learning_rate": 7.4711586055050944e-06, "loss": 18.1735, "step": 19443 }, { "epoch": 0.35542069570621676, "grad_norm": 5.842591222976607, "learning_rate": 7.470901269403551e-06, "loss": 17.3396, "step": 19444 }, { "epoch": 0.3554389749026633, "grad_norm": 5.9068511967150705, "learning_rate": 7.470643924641712e-06, "loss": 17.2643, "step": 19445 }, { "epoch": 0.3554572540991098, "grad_norm": 6.018712598958013, "learning_rate": 7.47038657122048e-06, "loss": 17.3815, "step": 19446 }, { "epoch": 0.3554755332955563, "grad_norm": 7.499780665216952, "learning_rate": 7.470129209140756e-06, "loss": 18.296, "step": 19447 }, { "epoch": 0.35549381249200285, "grad_norm": 9.515142214114233, "learning_rate": 7.469871838403446e-06, "loss": 17.596, "step": 19448 }, { "epoch": 0.3555120916884494, "grad_norm": 5.112666074246406, "learning_rate": 7.469614459009449e-06, "loss": 16.9713, "step": 19449 }, { "epoch": 0.3555303708848959, "grad_norm": 7.224288794638123, "learning_rate": 7.469357070959667e-06, "loss": 17.7355, "step": 19450 }, { "epoch": 0.3555486500813424, "grad_norm": 7.842872194239781, "learning_rate": 7.469099674255002e-06, "loss": 17.8648, "step": 19451 }, { "epoch": 0.35556692927778893, "grad_norm": 6.537318214223713, "learning_rate": 7.468842268896359e-06, "loss": 17.4829, "step": 19452 }, { "epoch": 0.35558520847423547, "grad_norm": 5.624756200968691, "learning_rate": 7.468584854884636e-06, "loss": 17.1853, "step": 19453 }, { "epoch": 0.355603487670682, "grad_norm": 6.1704492959040245, "learning_rate": 7.468327432220739e-06, "loss": 17.1741, "step": 19454 }, { "epoch": 0.35562176686712854, "grad_norm": 5.6975740937579635, "learning_rate": 7.468070000905568e-06, "loss": 17.1753, "step": 19455 }, { "epoch": 0.355640046063575, "grad_norm": 7.813950691810959, "learning_rate": 7.467812560940025e-06, "loss": 18.2112, "step": 19456 }, { "epoch": 0.35565832526002156, "grad_norm": 5.335502955682161, "learning_rate": 7.467555112325013e-06, "loss": 17.0313, "step": 19457 }, { "epoch": 0.3556766044564681, "grad_norm": 6.446712609176233, "learning_rate": 7.467297655061437e-06, "loss": 17.4101, "step": 19458 }, { "epoch": 0.35569488365291463, "grad_norm": 6.1874619697485365, "learning_rate": 7.467040189150194e-06, "loss": 17.3346, "step": 19459 }, { "epoch": 0.35571316284936116, "grad_norm": 6.018356849341548, "learning_rate": 7.466782714592191e-06, "loss": 17.0546, "step": 19460 }, { "epoch": 0.35573144204580764, "grad_norm": 5.294735414932873, "learning_rate": 7.466525231388327e-06, "loss": 17.1645, "step": 19461 }, { "epoch": 0.3557497212422542, "grad_norm": 6.469328490918808, "learning_rate": 7.4662677395395074e-06, "loss": 17.5357, "step": 19462 }, { "epoch": 0.3557680004387007, "grad_norm": 6.282228377007154, "learning_rate": 7.466010239046632e-06, "loss": 17.6503, "step": 19463 }, { "epoch": 0.35578627963514725, "grad_norm": 6.608266845938125, "learning_rate": 7.465752729910607e-06, "loss": 17.4305, "step": 19464 }, { "epoch": 0.3558045588315938, "grad_norm": 6.311324031626701, "learning_rate": 7.465495212132331e-06, "loss": 17.7267, "step": 19465 }, { "epoch": 0.35582283802804027, "grad_norm": 6.094071624844103, "learning_rate": 7.465237685712708e-06, "loss": 17.3744, "step": 19466 }, { "epoch": 0.3558411172244868, "grad_norm": 7.621146830460536, "learning_rate": 7.464980150652642e-06, "loss": 17.8176, "step": 19467 }, { "epoch": 0.35585939642093334, "grad_norm": 7.217791900838364, "learning_rate": 7.464722606953034e-06, "loss": 18.1047, "step": 19468 }, { "epoch": 0.3558776756173799, "grad_norm": 6.719686606133383, "learning_rate": 7.4644650546147875e-06, "loss": 17.65, "step": 19469 }, { "epoch": 0.3558959548138264, "grad_norm": 5.859634824966476, "learning_rate": 7.464207493638803e-06, "loss": 17.4257, "step": 19470 }, { "epoch": 0.3559142340102729, "grad_norm": 7.003694273833317, "learning_rate": 7.463949924025987e-06, "loss": 17.9359, "step": 19471 }, { "epoch": 0.3559325132067194, "grad_norm": 6.715987847591053, "learning_rate": 7.463692345777241e-06, "loss": 17.4042, "step": 19472 }, { "epoch": 0.35595079240316596, "grad_norm": 6.6440791916900235, "learning_rate": 7.463434758893465e-06, "loss": 17.2459, "step": 19473 }, { "epoch": 0.3559690715996125, "grad_norm": 6.703365635526031, "learning_rate": 7.4631771633755645e-06, "loss": 17.6477, "step": 19474 }, { "epoch": 0.35598735079605903, "grad_norm": 7.42266547463853, "learning_rate": 7.462919559224442e-06, "loss": 18.0435, "step": 19475 }, { "epoch": 0.3560056299925055, "grad_norm": 6.53594389417586, "learning_rate": 7.462661946441001e-06, "loss": 17.4178, "step": 19476 }, { "epoch": 0.35602390918895205, "grad_norm": 5.858822877464721, "learning_rate": 7.462404325026142e-06, "loss": 17.5212, "step": 19477 }, { "epoch": 0.3560421883853986, "grad_norm": 6.326782648560232, "learning_rate": 7.46214669498077e-06, "loss": 17.1662, "step": 19478 }, { "epoch": 0.3560604675818451, "grad_norm": 7.593968420428665, "learning_rate": 7.461889056305789e-06, "loss": 17.7736, "step": 19479 }, { "epoch": 0.3560787467782916, "grad_norm": 5.912228012846317, "learning_rate": 7.4616314090020995e-06, "loss": 17.0636, "step": 19480 }, { "epoch": 0.35609702597473814, "grad_norm": 6.369144309951476, "learning_rate": 7.461373753070605e-06, "loss": 17.5882, "step": 19481 }, { "epoch": 0.35611530517118467, "grad_norm": 8.272102536758139, "learning_rate": 7.4611160885122105e-06, "loss": 17.848, "step": 19482 }, { "epoch": 0.3561335843676312, "grad_norm": 6.165283653648815, "learning_rate": 7.460858415327815e-06, "loss": 17.2829, "step": 19483 }, { "epoch": 0.35615186356407774, "grad_norm": 6.53035454580862, "learning_rate": 7.460600733518326e-06, "loss": 17.4869, "step": 19484 }, { "epoch": 0.3561701427605242, "grad_norm": 6.209285409291938, "learning_rate": 7.460343043084645e-06, "loss": 17.4252, "step": 19485 }, { "epoch": 0.35618842195697076, "grad_norm": 6.619422600433882, "learning_rate": 7.460085344027675e-06, "loss": 17.3904, "step": 19486 }, { "epoch": 0.3562067011534173, "grad_norm": 6.954155104690954, "learning_rate": 7.45982763634832e-06, "loss": 17.5735, "step": 19487 }, { "epoch": 0.35622498034986383, "grad_norm": 8.274983654871239, "learning_rate": 7.45956992004748e-06, "loss": 18.6415, "step": 19488 }, { "epoch": 0.35624325954631036, "grad_norm": 6.702428815338919, "learning_rate": 7.459312195126064e-06, "loss": 17.6304, "step": 19489 }, { "epoch": 0.35626153874275684, "grad_norm": 5.848455807410714, "learning_rate": 7.459054461584971e-06, "loss": 17.2761, "step": 19490 }, { "epoch": 0.3562798179392034, "grad_norm": 6.183570962589923, "learning_rate": 7.458796719425106e-06, "loss": 17.489, "step": 19491 }, { "epoch": 0.3562980971356499, "grad_norm": 6.0344839279626195, "learning_rate": 7.458538968647371e-06, "loss": 17.5043, "step": 19492 }, { "epoch": 0.35631637633209645, "grad_norm": 7.7679328374348735, "learning_rate": 7.45828120925267e-06, "loss": 18.2713, "step": 19493 }, { "epoch": 0.356334655528543, "grad_norm": 7.188328960680923, "learning_rate": 7.458023441241907e-06, "loss": 17.872, "step": 19494 }, { "epoch": 0.35635293472498947, "grad_norm": 6.708241420470331, "learning_rate": 7.457765664615986e-06, "loss": 17.8736, "step": 19495 }, { "epoch": 0.356371213921436, "grad_norm": 5.880791325612288, "learning_rate": 7.4575078793758095e-06, "loss": 17.4544, "step": 19496 }, { "epoch": 0.35638949311788254, "grad_norm": 7.228093828812865, "learning_rate": 7.4572500855222795e-06, "loss": 17.7935, "step": 19497 }, { "epoch": 0.3564077723143291, "grad_norm": 7.389873482180641, "learning_rate": 7.456992283056302e-06, "loss": 17.7376, "step": 19498 }, { "epoch": 0.3564260515107756, "grad_norm": 6.079701279278257, "learning_rate": 7.456734471978782e-06, "loss": 17.4537, "step": 19499 }, { "epoch": 0.3564443307072221, "grad_norm": 6.49756636901589, "learning_rate": 7.456476652290619e-06, "loss": 17.8027, "step": 19500 }, { "epoch": 0.3564626099036686, "grad_norm": 8.343409896559569, "learning_rate": 7.456218823992718e-06, "loss": 18.3613, "step": 19501 }, { "epoch": 0.35648088910011516, "grad_norm": 7.275233240696608, "learning_rate": 7.455960987085982e-06, "loss": 18.1414, "step": 19502 }, { "epoch": 0.3564991682965617, "grad_norm": 5.746313584875896, "learning_rate": 7.4557031415713185e-06, "loss": 17.3823, "step": 19503 }, { "epoch": 0.35651744749300823, "grad_norm": 7.209214987742282, "learning_rate": 7.455445287449627e-06, "loss": 17.9968, "step": 19504 }, { "epoch": 0.3565357266894547, "grad_norm": 5.615786379902083, "learning_rate": 7.455187424721814e-06, "loss": 17.1986, "step": 19505 }, { "epoch": 0.35655400588590125, "grad_norm": 6.919847812923979, "learning_rate": 7.454929553388781e-06, "loss": 17.5477, "step": 19506 }, { "epoch": 0.3565722850823478, "grad_norm": 6.458199993844874, "learning_rate": 7.454671673451434e-06, "loss": 17.3889, "step": 19507 }, { "epoch": 0.3565905642787943, "grad_norm": 5.492010412363909, "learning_rate": 7.454413784910675e-06, "loss": 17.189, "step": 19508 }, { "epoch": 0.35660884347524086, "grad_norm": 5.620456516038945, "learning_rate": 7.454155887767409e-06, "loss": 17.3132, "step": 19509 }, { "epoch": 0.35662712267168734, "grad_norm": 7.333231458293356, "learning_rate": 7.453897982022539e-06, "loss": 17.9381, "step": 19510 }, { "epoch": 0.35664540186813387, "grad_norm": 5.895555401955035, "learning_rate": 7.453640067676971e-06, "loss": 17.6563, "step": 19511 }, { "epoch": 0.3566636810645804, "grad_norm": 6.095911156600389, "learning_rate": 7.4533821447316045e-06, "loss": 17.2515, "step": 19512 }, { "epoch": 0.35668196026102694, "grad_norm": 5.91007587085715, "learning_rate": 7.4531242131873505e-06, "loss": 17.2879, "step": 19513 }, { "epoch": 0.3567002394574734, "grad_norm": 7.874624189571801, "learning_rate": 7.452866273045106e-06, "loss": 17.926, "step": 19514 }, { "epoch": 0.35671851865391996, "grad_norm": 6.741908116251173, "learning_rate": 7.452608324305779e-06, "loss": 17.5684, "step": 19515 }, { "epoch": 0.3567367978503665, "grad_norm": 5.734303574903024, "learning_rate": 7.452350366970273e-06, "loss": 16.9621, "step": 19516 }, { "epoch": 0.35675507704681303, "grad_norm": 6.827994662104138, "learning_rate": 7.452092401039491e-06, "loss": 17.8539, "step": 19517 }, { "epoch": 0.35677335624325957, "grad_norm": 5.554487343322766, "learning_rate": 7.451834426514339e-06, "loss": 17.2516, "step": 19518 }, { "epoch": 0.35679163543970605, "grad_norm": 8.996661982365572, "learning_rate": 7.4515764433957195e-06, "loss": 18.6347, "step": 19519 }, { "epoch": 0.3568099146361526, "grad_norm": 5.777592153915319, "learning_rate": 7.4513184516845376e-06, "loss": 17.1962, "step": 19520 }, { "epoch": 0.3568281938325991, "grad_norm": 6.6244143733216205, "learning_rate": 7.451060451381696e-06, "loss": 17.6402, "step": 19521 }, { "epoch": 0.35684647302904565, "grad_norm": 5.736463429171136, "learning_rate": 7.450802442488101e-06, "loss": 17.448, "step": 19522 }, { "epoch": 0.3568647522254922, "grad_norm": 6.225464066752719, "learning_rate": 7.450544425004657e-06, "loss": 17.4283, "step": 19523 }, { "epoch": 0.35688303142193867, "grad_norm": 5.557820621065601, "learning_rate": 7.450286398932266e-06, "loss": 17.2756, "step": 19524 }, { "epoch": 0.3569013106183852, "grad_norm": 7.261448883357131, "learning_rate": 7.450028364271834e-06, "loss": 17.9538, "step": 19525 }, { "epoch": 0.35691958981483174, "grad_norm": 7.418123239235833, "learning_rate": 7.449770321024265e-06, "loss": 17.8033, "step": 19526 }, { "epoch": 0.3569378690112783, "grad_norm": 6.38774604313727, "learning_rate": 7.449512269190466e-06, "loss": 17.5972, "step": 19527 }, { "epoch": 0.3569561482077248, "grad_norm": 7.360832041667508, "learning_rate": 7.449254208771337e-06, "loss": 18.2713, "step": 19528 }, { "epoch": 0.3569744274041713, "grad_norm": 5.408830883075092, "learning_rate": 7.4489961397677835e-06, "loss": 17.1253, "step": 19529 }, { "epoch": 0.3569927066006178, "grad_norm": 6.239119904201538, "learning_rate": 7.4487380621807125e-06, "loss": 17.3757, "step": 19530 }, { "epoch": 0.35701098579706436, "grad_norm": 5.891247481157093, "learning_rate": 7.4484799760110285e-06, "loss": 17.1889, "step": 19531 }, { "epoch": 0.3570292649935109, "grad_norm": 6.40253611384729, "learning_rate": 7.448221881259633e-06, "loss": 17.7897, "step": 19532 }, { "epoch": 0.35704754418995743, "grad_norm": 6.509498408766673, "learning_rate": 7.4479637779274315e-06, "loss": 17.5238, "step": 19533 }, { "epoch": 0.3570658233864039, "grad_norm": 5.435814259320647, "learning_rate": 7.44770566601533e-06, "loss": 16.9849, "step": 19534 }, { "epoch": 0.35708410258285045, "grad_norm": 6.373180457230543, "learning_rate": 7.447447545524234e-06, "loss": 17.5635, "step": 19535 }, { "epoch": 0.357102381779297, "grad_norm": 6.064176241219916, "learning_rate": 7.447189416455045e-06, "loss": 17.3874, "step": 19536 }, { "epoch": 0.3571206609757435, "grad_norm": 7.94021393627077, "learning_rate": 7.44693127880867e-06, "loss": 18.064, "step": 19537 }, { "epoch": 0.35713894017219006, "grad_norm": 6.717605353286808, "learning_rate": 7.446673132586013e-06, "loss": 17.7664, "step": 19538 }, { "epoch": 0.35715721936863654, "grad_norm": 5.421782166907727, "learning_rate": 7.446414977787979e-06, "loss": 17.1765, "step": 19539 }, { "epoch": 0.35717549856508307, "grad_norm": 6.563266448417799, "learning_rate": 7.446156814415472e-06, "loss": 17.6065, "step": 19540 }, { "epoch": 0.3571937777615296, "grad_norm": 5.986476597498721, "learning_rate": 7.4458986424694e-06, "loss": 17.385, "step": 19541 }, { "epoch": 0.35721205695797614, "grad_norm": 6.529516511890925, "learning_rate": 7.445640461950664e-06, "loss": 17.4231, "step": 19542 }, { "epoch": 0.3572303361544227, "grad_norm": 7.215770537126524, "learning_rate": 7.4453822728601695e-06, "loss": 17.7901, "step": 19543 }, { "epoch": 0.35724861535086916, "grad_norm": 6.153997687868142, "learning_rate": 7.445124075198824e-06, "loss": 17.3847, "step": 19544 }, { "epoch": 0.3572668945473157, "grad_norm": 6.406699556092609, "learning_rate": 7.44486586896753e-06, "loss": 17.5249, "step": 19545 }, { "epoch": 0.35728517374376223, "grad_norm": 5.122915638185464, "learning_rate": 7.444607654167194e-06, "loss": 17.0643, "step": 19546 }, { "epoch": 0.35730345294020877, "grad_norm": 6.14367321722611, "learning_rate": 7.444349430798718e-06, "loss": 17.3365, "step": 19547 }, { "epoch": 0.35732173213665525, "grad_norm": 6.8760673202196125, "learning_rate": 7.444091198863012e-06, "loss": 17.8723, "step": 19548 }, { "epoch": 0.3573400113331018, "grad_norm": 9.740963472137814, "learning_rate": 7.4438329583609785e-06, "loss": 19.0003, "step": 19549 }, { "epoch": 0.3573582905295483, "grad_norm": 6.189180487315558, "learning_rate": 7.443574709293522e-06, "loss": 17.4863, "step": 19550 }, { "epoch": 0.35737656972599485, "grad_norm": 7.1232218886265555, "learning_rate": 7.443316451661546e-06, "loss": 17.9244, "step": 19551 }, { "epoch": 0.3573948489224414, "grad_norm": 6.644580298661404, "learning_rate": 7.4430581854659615e-06, "loss": 17.6901, "step": 19552 }, { "epoch": 0.35741312811888787, "grad_norm": 7.33589492496623, "learning_rate": 7.442799910707667e-06, "loss": 17.77, "step": 19553 }, { "epoch": 0.3574314073153344, "grad_norm": 7.316198263494221, "learning_rate": 7.442541627387572e-06, "loss": 18.1297, "step": 19554 }, { "epoch": 0.35744968651178094, "grad_norm": 7.166057386822801, "learning_rate": 7.442283335506582e-06, "loss": 17.8345, "step": 19555 }, { "epoch": 0.3574679657082275, "grad_norm": 6.646128805745733, "learning_rate": 7.442025035065598e-06, "loss": 17.4866, "step": 19556 }, { "epoch": 0.357486244904674, "grad_norm": 6.057776358116597, "learning_rate": 7.441766726065529e-06, "loss": 17.4085, "step": 19557 }, { "epoch": 0.3575045241011205, "grad_norm": 6.231824837767921, "learning_rate": 7.441508408507281e-06, "loss": 17.4139, "step": 19558 }, { "epoch": 0.357522803297567, "grad_norm": 6.982239309763197, "learning_rate": 7.441250082391756e-06, "loss": 18.093, "step": 19559 }, { "epoch": 0.35754108249401356, "grad_norm": 7.365303753528583, "learning_rate": 7.440991747719863e-06, "loss": 17.7381, "step": 19560 }, { "epoch": 0.3575593616904601, "grad_norm": 6.102411056837014, "learning_rate": 7.440733404492504e-06, "loss": 17.3601, "step": 19561 }, { "epoch": 0.35757764088690663, "grad_norm": 8.092326201907957, "learning_rate": 7.4404750527105885e-06, "loss": 18.417, "step": 19562 }, { "epoch": 0.3575959200833531, "grad_norm": 5.923153029325546, "learning_rate": 7.440216692375017e-06, "loss": 17.0967, "step": 19563 }, { "epoch": 0.35761419927979965, "grad_norm": 6.1980079939452795, "learning_rate": 7.4399583234867005e-06, "loss": 17.4764, "step": 19564 }, { "epoch": 0.3576324784762462, "grad_norm": 6.10555734087733, "learning_rate": 7.43969994604654e-06, "loss": 17.4802, "step": 19565 }, { "epoch": 0.3576507576726927, "grad_norm": 5.356375823423826, "learning_rate": 7.439441560055443e-06, "loss": 17.0387, "step": 19566 }, { "epoch": 0.35766903686913926, "grad_norm": 5.8036224360158135, "learning_rate": 7.4391831655143155e-06, "loss": 17.2135, "step": 19567 }, { "epoch": 0.35768731606558574, "grad_norm": 6.727786201025457, "learning_rate": 7.4389247624240635e-06, "loss": 17.4191, "step": 19568 }, { "epoch": 0.35770559526203227, "grad_norm": 6.942808237998604, "learning_rate": 7.43866635078559e-06, "loss": 17.7536, "step": 19569 }, { "epoch": 0.3577238744584788, "grad_norm": 7.271246304521421, "learning_rate": 7.438407930599802e-06, "loss": 17.9878, "step": 19570 }, { "epoch": 0.35774215365492534, "grad_norm": 7.343102527902273, "learning_rate": 7.438149501867609e-06, "loss": 17.6091, "step": 19571 }, { "epoch": 0.3577604328513719, "grad_norm": 7.014358727902585, "learning_rate": 7.437891064589912e-06, "loss": 17.4978, "step": 19572 }, { "epoch": 0.35777871204781836, "grad_norm": 6.316023574272729, "learning_rate": 7.437632618767619e-06, "loss": 17.6849, "step": 19573 }, { "epoch": 0.3577969912442649, "grad_norm": 7.1173021736225, "learning_rate": 7.437374164401632e-06, "loss": 18.0485, "step": 19574 }, { "epoch": 0.35781527044071143, "grad_norm": 6.922034267789044, "learning_rate": 7.437115701492863e-06, "loss": 17.6598, "step": 19575 }, { "epoch": 0.35783354963715797, "grad_norm": 6.941606087333205, "learning_rate": 7.436857230042215e-06, "loss": 17.7257, "step": 19576 }, { "epoch": 0.3578518288336045, "grad_norm": 5.806717517611175, "learning_rate": 7.436598750050593e-06, "loss": 17.2348, "step": 19577 }, { "epoch": 0.357870108030051, "grad_norm": 7.204718588209834, "learning_rate": 7.436340261518904e-06, "loss": 17.6794, "step": 19578 }, { "epoch": 0.3578883872264975, "grad_norm": 5.228357199531201, "learning_rate": 7.436081764448054e-06, "loss": 16.9076, "step": 19579 }, { "epoch": 0.35790666642294405, "grad_norm": 8.135319780959174, "learning_rate": 7.4358232588389475e-06, "loss": 18.3858, "step": 19580 }, { "epoch": 0.3579249456193906, "grad_norm": 6.395997239594845, "learning_rate": 7.435564744692494e-06, "loss": 17.4644, "step": 19581 }, { "epoch": 0.35794322481583707, "grad_norm": 6.621546714164927, "learning_rate": 7.435306222009597e-06, "loss": 17.5496, "step": 19582 }, { "epoch": 0.3579615040122836, "grad_norm": 7.346892370167702, "learning_rate": 7.435047690791162e-06, "loss": 17.5626, "step": 19583 }, { "epoch": 0.35797978320873014, "grad_norm": 5.783952019402652, "learning_rate": 7.434789151038097e-06, "loss": 17.3614, "step": 19584 }, { "epoch": 0.3579980624051767, "grad_norm": 5.760549772703515, "learning_rate": 7.434530602751307e-06, "loss": 17.2567, "step": 19585 }, { "epoch": 0.3580163416016232, "grad_norm": 6.478047570788055, "learning_rate": 7.434272045931698e-06, "loss": 17.4649, "step": 19586 }, { "epoch": 0.3580346207980697, "grad_norm": 6.221514341852532, "learning_rate": 7.434013480580178e-06, "loss": 17.2979, "step": 19587 }, { "epoch": 0.3580528999945162, "grad_norm": 8.038249083522063, "learning_rate": 7.43375490669765e-06, "loss": 18.2009, "step": 19588 }, { "epoch": 0.35807117919096276, "grad_norm": 7.153169603424881, "learning_rate": 7.433496324285023e-06, "loss": 17.8616, "step": 19589 }, { "epoch": 0.3580894583874093, "grad_norm": 6.140821138713422, "learning_rate": 7.433237733343204e-06, "loss": 17.2862, "step": 19590 }, { "epoch": 0.35810773758385583, "grad_norm": 6.303164117025946, "learning_rate": 7.432979133873096e-06, "loss": 17.1239, "step": 19591 }, { "epoch": 0.3581260167803023, "grad_norm": 7.40032118331771, "learning_rate": 7.432720525875608e-06, "loss": 17.7626, "step": 19592 }, { "epoch": 0.35814429597674885, "grad_norm": 5.211355925358332, "learning_rate": 7.432461909351646e-06, "loss": 17.0628, "step": 19593 }, { "epoch": 0.3581625751731954, "grad_norm": 7.288194341296274, "learning_rate": 7.432203284302115e-06, "loss": 18.2211, "step": 19594 }, { "epoch": 0.3581808543696419, "grad_norm": 7.43978040314632, "learning_rate": 7.431944650727924e-06, "loss": 17.9704, "step": 19595 }, { "epoch": 0.35819913356608846, "grad_norm": 8.187597270207503, "learning_rate": 7.431686008629978e-06, "loss": 18.0001, "step": 19596 }, { "epoch": 0.35821741276253494, "grad_norm": 7.247317235545807, "learning_rate": 7.431427358009182e-06, "loss": 17.9215, "step": 19597 }, { "epoch": 0.3582356919589815, "grad_norm": 7.942615379347667, "learning_rate": 7.431168698866444e-06, "loss": 17.7566, "step": 19598 }, { "epoch": 0.358253971155428, "grad_norm": 5.567173124767692, "learning_rate": 7.430910031202673e-06, "loss": 17.1107, "step": 19599 }, { "epoch": 0.35827225035187454, "grad_norm": 6.038390410749337, "learning_rate": 7.430651355018772e-06, "loss": 17.3829, "step": 19600 }, { "epoch": 0.3582905295483211, "grad_norm": 6.185621499809661, "learning_rate": 7.4303926703156484e-06, "loss": 17.3401, "step": 19601 }, { "epoch": 0.35830880874476756, "grad_norm": 6.998107385390362, "learning_rate": 7.43013397709421e-06, "loss": 17.6478, "step": 19602 }, { "epoch": 0.3583270879412141, "grad_norm": 6.423917792766361, "learning_rate": 7.429875275355364e-06, "loss": 17.5313, "step": 19603 }, { "epoch": 0.35834536713766063, "grad_norm": 6.190849407069878, "learning_rate": 7.429616565100014e-06, "loss": 17.6826, "step": 19604 }, { "epoch": 0.35836364633410717, "grad_norm": 6.864104331432642, "learning_rate": 7.429357846329069e-06, "loss": 17.4815, "step": 19605 }, { "epoch": 0.3583819255305537, "grad_norm": 5.633235072213729, "learning_rate": 7.429099119043437e-06, "loss": 17.101, "step": 19606 }, { "epoch": 0.3584002047270002, "grad_norm": 7.173497231752361, "learning_rate": 7.428840383244023e-06, "loss": 17.5816, "step": 19607 }, { "epoch": 0.3584184839234467, "grad_norm": 6.824575995155842, "learning_rate": 7.428581638931734e-06, "loss": 17.7751, "step": 19608 }, { "epoch": 0.35843676311989325, "grad_norm": 7.294082566289569, "learning_rate": 7.428322886107475e-06, "loss": 17.9937, "step": 19609 }, { "epoch": 0.3584550423163398, "grad_norm": 6.336024020206503, "learning_rate": 7.428064124772158e-06, "loss": 17.5486, "step": 19610 }, { "epoch": 0.3584733215127863, "grad_norm": 6.8384419425680045, "learning_rate": 7.4278053549266845e-06, "loss": 17.8061, "step": 19611 }, { "epoch": 0.3584916007092328, "grad_norm": 6.8563408704511835, "learning_rate": 7.427546576571966e-06, "loss": 17.8507, "step": 19612 }, { "epoch": 0.35850987990567934, "grad_norm": 5.672177529612705, "learning_rate": 7.427287789708907e-06, "loss": 17.2885, "step": 19613 }, { "epoch": 0.3585281591021259, "grad_norm": 6.383419232609466, "learning_rate": 7.427028994338414e-06, "loss": 17.3135, "step": 19614 }, { "epoch": 0.3585464382985724, "grad_norm": 7.179162938060904, "learning_rate": 7.426770190461394e-06, "loss": 17.9906, "step": 19615 }, { "epoch": 0.3585647174950189, "grad_norm": 5.885327218747135, "learning_rate": 7.4265113780787575e-06, "loss": 17.1926, "step": 19616 }, { "epoch": 0.3585829966914654, "grad_norm": 6.841343611008343, "learning_rate": 7.426252557191409e-06, "loss": 17.5999, "step": 19617 }, { "epoch": 0.35860127588791196, "grad_norm": 5.706515078920045, "learning_rate": 7.425993727800255e-06, "loss": 17.1247, "step": 19618 }, { "epoch": 0.3586195550843585, "grad_norm": 5.059189919687213, "learning_rate": 7.425734889906203e-06, "loss": 16.9361, "step": 19619 }, { "epoch": 0.35863783428080503, "grad_norm": 5.500086691594506, "learning_rate": 7.425476043510161e-06, "loss": 17.0845, "step": 19620 }, { "epoch": 0.3586561134772515, "grad_norm": 6.27450390437792, "learning_rate": 7.4252171886130365e-06, "loss": 17.4172, "step": 19621 }, { "epoch": 0.35867439267369805, "grad_norm": 5.902742990740609, "learning_rate": 7.424958325215736e-06, "loss": 17.596, "step": 19622 }, { "epoch": 0.3586926718701446, "grad_norm": 6.644123805087707, "learning_rate": 7.424699453319166e-06, "loss": 17.4616, "step": 19623 }, { "epoch": 0.3587109510665911, "grad_norm": 7.793181924074182, "learning_rate": 7.424440572924236e-06, "loss": 17.9932, "step": 19624 }, { "epoch": 0.35872923026303766, "grad_norm": 9.1000473577842, "learning_rate": 7.424181684031853e-06, "loss": 18.4022, "step": 19625 }, { "epoch": 0.35874750945948414, "grad_norm": 6.450341745546176, "learning_rate": 7.423922786642922e-06, "loss": 17.4903, "step": 19626 }, { "epoch": 0.3587657886559307, "grad_norm": 5.945848969936562, "learning_rate": 7.423663880758354e-06, "loss": 17.0628, "step": 19627 }, { "epoch": 0.3587840678523772, "grad_norm": 5.344817447692296, "learning_rate": 7.423404966379052e-06, "loss": 17.0436, "step": 19628 }, { "epoch": 0.35880234704882374, "grad_norm": 6.273733286302565, "learning_rate": 7.4231460435059255e-06, "loss": 17.3068, "step": 19629 }, { "epoch": 0.3588206262452703, "grad_norm": 6.762580780150825, "learning_rate": 7.422887112139884e-06, "loss": 17.5117, "step": 19630 }, { "epoch": 0.35883890544171676, "grad_norm": 12.948570375745069, "learning_rate": 7.422628172281834e-06, "loss": 18.4008, "step": 19631 }, { "epoch": 0.3588571846381633, "grad_norm": 7.659120306546429, "learning_rate": 7.422369223932682e-06, "loss": 18.1467, "step": 19632 }, { "epoch": 0.35887546383460983, "grad_norm": 6.1455289098079176, "learning_rate": 7.422110267093334e-06, "loss": 17.3468, "step": 19633 }, { "epoch": 0.35889374303105637, "grad_norm": 7.165817662349481, "learning_rate": 7.421851301764702e-06, "loss": 17.7761, "step": 19634 }, { "epoch": 0.3589120222275029, "grad_norm": 6.38528561482254, "learning_rate": 7.421592327947691e-06, "loss": 17.6262, "step": 19635 }, { "epoch": 0.3589303014239494, "grad_norm": 8.56793241169269, "learning_rate": 7.421333345643208e-06, "loss": 17.9467, "step": 19636 }, { "epoch": 0.3589485806203959, "grad_norm": 6.218560703794342, "learning_rate": 7.4210743548521625e-06, "loss": 17.5763, "step": 19637 }, { "epoch": 0.35896685981684245, "grad_norm": 7.398152881594439, "learning_rate": 7.4208153555754615e-06, "loss": 17.7664, "step": 19638 }, { "epoch": 0.358985139013289, "grad_norm": 8.148209656499118, "learning_rate": 7.420556347814012e-06, "loss": 18.0681, "step": 19639 }, { "epoch": 0.3590034182097355, "grad_norm": 5.627743029720429, "learning_rate": 7.420297331568723e-06, "loss": 17.3774, "step": 19640 }, { "epoch": 0.359021697406182, "grad_norm": 7.478425081380414, "learning_rate": 7.420038306840503e-06, "loss": 18.0255, "step": 19641 }, { "epoch": 0.35903997660262854, "grad_norm": 6.609427342782146, "learning_rate": 7.419779273630256e-06, "loss": 17.5987, "step": 19642 }, { "epoch": 0.3590582557990751, "grad_norm": 7.193156920052047, "learning_rate": 7.419520231938895e-06, "loss": 17.4684, "step": 19643 }, { "epoch": 0.3590765349955216, "grad_norm": 5.748449793544554, "learning_rate": 7.419261181767326e-06, "loss": 17.293, "step": 19644 }, { "epoch": 0.35909481419196815, "grad_norm": 8.081670635816513, "learning_rate": 7.419002123116456e-06, "loss": 18.1125, "step": 19645 }, { "epoch": 0.35911309338841463, "grad_norm": 5.235679311443473, "learning_rate": 7.418743055987192e-06, "loss": 16.8445, "step": 19646 }, { "epoch": 0.35913137258486116, "grad_norm": 7.671628058202542, "learning_rate": 7.418483980380444e-06, "loss": 18.4453, "step": 19647 }, { "epoch": 0.3591496517813077, "grad_norm": 5.769573530475302, "learning_rate": 7.418224896297121e-06, "loss": 17.2692, "step": 19648 }, { "epoch": 0.35916793097775424, "grad_norm": 6.803533960951056, "learning_rate": 7.417965803738127e-06, "loss": 17.6479, "step": 19649 }, { "epoch": 0.3591862101742007, "grad_norm": 6.928022572209373, "learning_rate": 7.417706702704375e-06, "loss": 17.8475, "step": 19650 }, { "epoch": 0.35920448937064725, "grad_norm": 6.335909893000433, "learning_rate": 7.4174475931967705e-06, "loss": 17.6418, "step": 19651 }, { "epoch": 0.3592227685670938, "grad_norm": 9.00018364645467, "learning_rate": 7.417188475216222e-06, "loss": 17.9307, "step": 19652 }, { "epoch": 0.3592410477635403, "grad_norm": 5.782366549676033, "learning_rate": 7.416929348763636e-06, "loss": 17.0915, "step": 19653 }, { "epoch": 0.35925932695998686, "grad_norm": 7.1325422603754, "learning_rate": 7.416670213839924e-06, "loss": 17.5685, "step": 19654 }, { "epoch": 0.35927760615643334, "grad_norm": 6.932598907319226, "learning_rate": 7.416411070445992e-06, "loss": 17.8335, "step": 19655 }, { "epoch": 0.3592958853528799, "grad_norm": 7.340339314323664, "learning_rate": 7.416151918582748e-06, "loss": 17.9561, "step": 19656 }, { "epoch": 0.3593141645493264, "grad_norm": 7.09431420271154, "learning_rate": 7.415892758251102e-06, "loss": 17.8449, "step": 19657 }, { "epoch": 0.35933244374577294, "grad_norm": 6.842800698365112, "learning_rate": 7.415633589451963e-06, "loss": 17.6761, "step": 19658 }, { "epoch": 0.3593507229422195, "grad_norm": 6.4437469966055225, "learning_rate": 7.4153744121862356e-06, "loss": 17.432, "step": 19659 }, { "epoch": 0.35936900213866596, "grad_norm": 6.043227159248423, "learning_rate": 7.4151152264548325e-06, "loss": 17.367, "step": 19660 }, { "epoch": 0.3593872813351125, "grad_norm": 5.698312052736918, "learning_rate": 7.414856032258657e-06, "loss": 17.2209, "step": 19661 }, { "epoch": 0.35940556053155903, "grad_norm": 5.7025338404511485, "learning_rate": 7.414596829598624e-06, "loss": 17.1254, "step": 19662 }, { "epoch": 0.35942383972800557, "grad_norm": 7.415967652311182, "learning_rate": 7.414337618475638e-06, "loss": 17.956, "step": 19663 }, { "epoch": 0.3594421189244521, "grad_norm": 7.879265535384837, "learning_rate": 7.414078398890607e-06, "loss": 18.2849, "step": 19664 }, { "epoch": 0.3594603981208986, "grad_norm": 6.156486612932872, "learning_rate": 7.413819170844441e-06, "loss": 17.5195, "step": 19665 }, { "epoch": 0.3594786773173451, "grad_norm": 6.833262405969314, "learning_rate": 7.4135599343380485e-06, "loss": 17.6161, "step": 19666 }, { "epoch": 0.35949695651379165, "grad_norm": 5.941007952575729, "learning_rate": 7.413300689372338e-06, "loss": 17.3599, "step": 19667 }, { "epoch": 0.3595152357102382, "grad_norm": 7.484804191904205, "learning_rate": 7.413041435948218e-06, "loss": 18.2024, "step": 19668 }, { "epoch": 0.3595335149066847, "grad_norm": 5.704438021847672, "learning_rate": 7.412782174066596e-06, "loss": 17.3, "step": 19669 }, { "epoch": 0.3595517941031312, "grad_norm": 6.185017125210529, "learning_rate": 7.412522903728383e-06, "loss": 17.2473, "step": 19670 }, { "epoch": 0.35957007329957774, "grad_norm": 5.671060004639427, "learning_rate": 7.412263624934486e-06, "loss": 17.3157, "step": 19671 }, { "epoch": 0.3595883524960243, "grad_norm": 6.884825009983396, "learning_rate": 7.412004337685817e-06, "loss": 17.9186, "step": 19672 }, { "epoch": 0.3596066316924708, "grad_norm": 7.112803035888674, "learning_rate": 7.411745041983279e-06, "loss": 17.7299, "step": 19673 }, { "epoch": 0.35962491088891735, "grad_norm": 7.537168009725052, "learning_rate": 7.411485737827784e-06, "loss": 17.6849, "step": 19674 }, { "epoch": 0.35964319008536383, "grad_norm": 8.076727056397893, "learning_rate": 7.411226425220241e-06, "loss": 17.8502, "step": 19675 }, { "epoch": 0.35966146928181036, "grad_norm": 7.422706306015047, "learning_rate": 7.41096710416156e-06, "loss": 17.7832, "step": 19676 }, { "epoch": 0.3596797484782569, "grad_norm": 6.264827269575593, "learning_rate": 7.410707774652648e-06, "loss": 17.4166, "step": 19677 }, { "epoch": 0.35969802767470344, "grad_norm": 6.39577954566566, "learning_rate": 7.410448436694413e-06, "loss": 17.6863, "step": 19678 }, { "epoch": 0.35971630687114997, "grad_norm": 6.806574691474655, "learning_rate": 7.410189090287767e-06, "loss": 17.4876, "step": 19679 }, { "epoch": 0.35973458606759645, "grad_norm": 5.805553498268519, "learning_rate": 7.4099297354336165e-06, "loss": 17.1755, "step": 19680 }, { "epoch": 0.359752865264043, "grad_norm": 6.210606853116422, "learning_rate": 7.4096703721328724e-06, "loss": 17.4134, "step": 19681 }, { "epoch": 0.3597711444604895, "grad_norm": 7.514337354941543, "learning_rate": 7.4094110003864425e-06, "loss": 18.1328, "step": 19682 }, { "epoch": 0.35978942365693606, "grad_norm": 7.110195460046373, "learning_rate": 7.409151620195234e-06, "loss": 17.6775, "step": 19683 }, { "epoch": 0.35980770285338254, "grad_norm": 6.409499865083177, "learning_rate": 7.4088922315601605e-06, "loss": 17.4388, "step": 19684 }, { "epoch": 0.3598259820498291, "grad_norm": 6.406989787801191, "learning_rate": 7.408632834482128e-06, "loss": 17.4169, "step": 19685 }, { "epoch": 0.3598442612462756, "grad_norm": 8.533000130720591, "learning_rate": 7.408373428962048e-06, "loss": 17.9988, "step": 19686 }, { "epoch": 0.35986254044272215, "grad_norm": 7.258823821723709, "learning_rate": 7.408114015000826e-06, "loss": 18.2898, "step": 19687 }, { "epoch": 0.3598808196391687, "grad_norm": 6.27707598990602, "learning_rate": 7.407854592599373e-06, "loss": 17.4093, "step": 19688 }, { "epoch": 0.35989909883561516, "grad_norm": 6.565035756992462, "learning_rate": 7.407595161758601e-06, "loss": 17.8111, "step": 19689 }, { "epoch": 0.3599173780320617, "grad_norm": 7.039274284803448, "learning_rate": 7.407335722479415e-06, "loss": 17.7455, "step": 19690 }, { "epoch": 0.35993565722850823, "grad_norm": 6.696144712301817, "learning_rate": 7.407076274762727e-06, "loss": 17.4365, "step": 19691 }, { "epoch": 0.35995393642495477, "grad_norm": 6.206007733573061, "learning_rate": 7.406816818609445e-06, "loss": 17.5794, "step": 19692 }, { "epoch": 0.3599722156214013, "grad_norm": 8.493781781946268, "learning_rate": 7.406557354020478e-06, "loss": 18.0151, "step": 19693 }, { "epoch": 0.3599904948178478, "grad_norm": 7.626843398616922, "learning_rate": 7.406297880996738e-06, "loss": 18.0236, "step": 19694 }, { "epoch": 0.3600087740142943, "grad_norm": 6.942237055756598, "learning_rate": 7.406038399539133e-06, "loss": 17.555, "step": 19695 }, { "epoch": 0.36002705321074085, "grad_norm": 6.615219864669865, "learning_rate": 7.405778909648571e-06, "loss": 17.548, "step": 19696 }, { "epoch": 0.3600453324071874, "grad_norm": 6.089896410559852, "learning_rate": 7.405519411325962e-06, "loss": 17.6716, "step": 19697 }, { "epoch": 0.3600636116036339, "grad_norm": 5.7815032990880715, "learning_rate": 7.405259904572218e-06, "loss": 17.4445, "step": 19698 }, { "epoch": 0.3600818908000804, "grad_norm": 7.686329575038699, "learning_rate": 7.405000389388246e-06, "loss": 17.9146, "step": 19699 }, { "epoch": 0.36010016999652694, "grad_norm": 9.216626247454519, "learning_rate": 7.404740865774956e-06, "loss": 18.9646, "step": 19700 }, { "epoch": 0.3601184491929735, "grad_norm": 6.141974855088552, "learning_rate": 7.404481333733258e-06, "loss": 17.4394, "step": 19701 }, { "epoch": 0.36013672838942, "grad_norm": 6.143262419367804, "learning_rate": 7.404221793264062e-06, "loss": 17.4181, "step": 19702 }, { "epoch": 0.36015500758586655, "grad_norm": 5.082826702009226, "learning_rate": 7.403962244368278e-06, "loss": 16.8467, "step": 19703 }, { "epoch": 0.36017328678231303, "grad_norm": 7.543973184689073, "learning_rate": 7.403702687046813e-06, "loss": 18.0566, "step": 19704 }, { "epoch": 0.36019156597875956, "grad_norm": 6.4485019873172496, "learning_rate": 7.40344312130058e-06, "loss": 17.5813, "step": 19705 }, { "epoch": 0.3602098451752061, "grad_norm": 6.611479357754433, "learning_rate": 7.4031835471304865e-06, "loss": 18.2218, "step": 19706 }, { "epoch": 0.36022812437165264, "grad_norm": 6.891696190007479, "learning_rate": 7.402923964537444e-06, "loss": 17.6386, "step": 19707 }, { "epoch": 0.36024640356809917, "grad_norm": 6.611510491468658, "learning_rate": 7.4026643735223615e-06, "loss": 17.9912, "step": 19708 }, { "epoch": 0.36026468276454565, "grad_norm": 5.8347357933883925, "learning_rate": 7.402404774086149e-06, "loss": 17.4233, "step": 19709 }, { "epoch": 0.3602829619609922, "grad_norm": 5.884081020716994, "learning_rate": 7.402145166229715e-06, "loss": 17.4612, "step": 19710 }, { "epoch": 0.3603012411574387, "grad_norm": 4.938148253713487, "learning_rate": 7.401885549953972e-06, "loss": 16.6768, "step": 19711 }, { "epoch": 0.36031952035388526, "grad_norm": 6.006304997933859, "learning_rate": 7.401625925259828e-06, "loss": 17.3801, "step": 19712 }, { "epoch": 0.3603377995503318, "grad_norm": 5.800407938715674, "learning_rate": 7.401366292148195e-06, "loss": 17.4753, "step": 19713 }, { "epoch": 0.3603560787467783, "grad_norm": 6.698856927273207, "learning_rate": 7.40110665061998e-06, "loss": 17.4803, "step": 19714 }, { "epoch": 0.3603743579432248, "grad_norm": 5.772585813016475, "learning_rate": 7.400847000676094e-06, "loss": 17.2274, "step": 19715 }, { "epoch": 0.36039263713967135, "grad_norm": 6.647914607446682, "learning_rate": 7.400587342317448e-06, "loss": 17.5091, "step": 19716 }, { "epoch": 0.3604109163361179, "grad_norm": 8.63686879543823, "learning_rate": 7.400327675544953e-06, "loss": 18.5806, "step": 19717 }, { "epoch": 0.36042919553256436, "grad_norm": 7.582196247257612, "learning_rate": 7.400068000359517e-06, "loss": 18.1169, "step": 19718 }, { "epoch": 0.3604474747290109, "grad_norm": 6.340555182234325, "learning_rate": 7.39980831676205e-06, "loss": 17.6335, "step": 19719 }, { "epoch": 0.36046575392545743, "grad_norm": 6.6482684547007675, "learning_rate": 7.399548624753464e-06, "loss": 17.6152, "step": 19720 }, { "epoch": 0.36048403312190397, "grad_norm": 5.909967438297958, "learning_rate": 7.399288924334669e-06, "loss": 17.4626, "step": 19721 }, { "epoch": 0.3605023123183505, "grad_norm": 6.005151272205654, "learning_rate": 7.399029215506573e-06, "loss": 17.4018, "step": 19722 }, { "epoch": 0.360520591514797, "grad_norm": 7.12043830715943, "learning_rate": 7.398769498270089e-06, "loss": 17.5324, "step": 19723 }, { "epoch": 0.3605388707112435, "grad_norm": 5.024518202423369, "learning_rate": 7.398509772626125e-06, "loss": 16.9543, "step": 19724 }, { "epoch": 0.36055714990769006, "grad_norm": 6.777717028682303, "learning_rate": 7.398250038575592e-06, "loss": 17.6355, "step": 19725 }, { "epoch": 0.3605754291041366, "grad_norm": 6.549358447587129, "learning_rate": 7.397990296119402e-06, "loss": 17.546, "step": 19726 }, { "epoch": 0.3605937083005831, "grad_norm": 6.683853120093345, "learning_rate": 7.397730545258465e-06, "loss": 17.6049, "step": 19727 }, { "epoch": 0.3606119874970296, "grad_norm": 6.851753428347234, "learning_rate": 7.3974707859936875e-06, "loss": 17.3464, "step": 19728 }, { "epoch": 0.36063026669347614, "grad_norm": 4.364639334883509, "learning_rate": 7.397211018325985e-06, "loss": 16.718, "step": 19729 }, { "epoch": 0.3606485458899227, "grad_norm": 7.298185805244727, "learning_rate": 7.396951242256265e-06, "loss": 18.0385, "step": 19730 }, { "epoch": 0.3606668250863692, "grad_norm": 6.818757348557367, "learning_rate": 7.396691457785441e-06, "loss": 17.7227, "step": 19731 }, { "epoch": 0.36068510428281575, "grad_norm": 6.243456883073716, "learning_rate": 7.39643166491442e-06, "loss": 17.4463, "step": 19732 }, { "epoch": 0.36070338347926223, "grad_norm": 7.381302393938972, "learning_rate": 7.396171863644112e-06, "loss": 17.7082, "step": 19733 }, { "epoch": 0.36072166267570877, "grad_norm": 7.6168563506880975, "learning_rate": 7.395912053975432e-06, "loss": 18.3822, "step": 19734 }, { "epoch": 0.3607399418721553, "grad_norm": 7.06514164437329, "learning_rate": 7.395652235909287e-06, "loss": 17.9721, "step": 19735 }, { "epoch": 0.36075822106860184, "grad_norm": 6.251879282186458, "learning_rate": 7.39539240944659e-06, "loss": 17.3321, "step": 19736 }, { "epoch": 0.36077650026504837, "grad_norm": 8.067429593127484, "learning_rate": 7.3951325745882495e-06, "loss": 17.7748, "step": 19737 }, { "epoch": 0.36079477946149485, "grad_norm": 6.340137944056408, "learning_rate": 7.394872731335177e-06, "loss": 17.0545, "step": 19738 }, { "epoch": 0.3608130586579414, "grad_norm": 7.021389629665991, "learning_rate": 7.394612879688285e-06, "loss": 17.594, "step": 19739 }, { "epoch": 0.3608313378543879, "grad_norm": 6.903716597008266, "learning_rate": 7.3943530196484815e-06, "loss": 17.6848, "step": 19740 }, { "epoch": 0.36084961705083446, "grad_norm": 7.48213399992343, "learning_rate": 7.394093151216679e-06, "loss": 18.2631, "step": 19741 }, { "epoch": 0.360867896247281, "grad_norm": 5.734938813390531, "learning_rate": 7.393833274393786e-06, "loss": 17.0842, "step": 19742 }, { "epoch": 0.3608861754437275, "grad_norm": 6.20768880816114, "learning_rate": 7.393573389180716e-06, "loss": 17.5858, "step": 19743 }, { "epoch": 0.360904454640174, "grad_norm": 6.778458834230044, "learning_rate": 7.393313495578381e-06, "loss": 17.7546, "step": 19744 }, { "epoch": 0.36092273383662055, "grad_norm": 5.675230261919206, "learning_rate": 7.393053593587688e-06, "loss": 17.2096, "step": 19745 }, { "epoch": 0.3609410130330671, "grad_norm": 5.775884309642294, "learning_rate": 7.392793683209549e-06, "loss": 17.075, "step": 19746 }, { "epoch": 0.3609592922295136, "grad_norm": 6.958690337990445, "learning_rate": 7.392533764444876e-06, "loss": 17.7834, "step": 19747 }, { "epoch": 0.3609775714259601, "grad_norm": 7.253467290962211, "learning_rate": 7.392273837294581e-06, "loss": 17.8554, "step": 19748 }, { "epoch": 0.36099585062240663, "grad_norm": 6.391119217486749, "learning_rate": 7.3920139017595735e-06, "loss": 17.5906, "step": 19749 }, { "epoch": 0.36101412981885317, "grad_norm": 7.451194477337203, "learning_rate": 7.391753957840765e-06, "loss": 17.5156, "step": 19750 }, { "epoch": 0.3610324090152997, "grad_norm": 7.478802256143311, "learning_rate": 7.391494005539066e-06, "loss": 17.6013, "step": 19751 }, { "epoch": 0.3610506882117462, "grad_norm": 6.444095772568898, "learning_rate": 7.391234044855388e-06, "loss": 17.4123, "step": 19752 }, { "epoch": 0.3610689674081927, "grad_norm": 6.233308545837031, "learning_rate": 7.390974075790643e-06, "loss": 17.3002, "step": 19753 }, { "epoch": 0.36108724660463926, "grad_norm": 6.2841775562864495, "learning_rate": 7.390714098345739e-06, "loss": 17.6364, "step": 19754 }, { "epoch": 0.3611055258010858, "grad_norm": 6.062349416057556, "learning_rate": 7.390454112521592e-06, "loss": 17.3652, "step": 19755 }, { "epoch": 0.3611238049975323, "grad_norm": 6.675362618049272, "learning_rate": 7.3901941183191095e-06, "loss": 17.7102, "step": 19756 }, { "epoch": 0.3611420841939788, "grad_norm": 5.779234254884737, "learning_rate": 7.389934115739204e-06, "loss": 17.1497, "step": 19757 }, { "epoch": 0.36116036339042534, "grad_norm": 6.523897847285966, "learning_rate": 7.389674104782789e-06, "loss": 17.7741, "step": 19758 }, { "epoch": 0.3611786425868719, "grad_norm": 6.619671923116873, "learning_rate": 7.389414085450772e-06, "loss": 17.6544, "step": 19759 }, { "epoch": 0.3611969217833184, "grad_norm": 7.02593384430311, "learning_rate": 7.389154057744066e-06, "loss": 17.8876, "step": 19760 }, { "epoch": 0.36121520097976495, "grad_norm": 7.397148017211546, "learning_rate": 7.388894021663581e-06, "loss": 17.9427, "step": 19761 }, { "epoch": 0.36123348017621143, "grad_norm": 6.055158587464948, "learning_rate": 7.388633977210231e-06, "loss": 17.3072, "step": 19762 }, { "epoch": 0.36125175937265797, "grad_norm": 5.619037949473037, "learning_rate": 7.388373924384926e-06, "loss": 17.247, "step": 19763 }, { "epoch": 0.3612700385691045, "grad_norm": 6.593007971613619, "learning_rate": 7.388113863188579e-06, "loss": 17.7705, "step": 19764 }, { "epoch": 0.36128831776555104, "grad_norm": 6.80874070597616, "learning_rate": 7.387853793622099e-06, "loss": 17.6813, "step": 19765 }, { "epoch": 0.3613065969619976, "grad_norm": 8.362611850273602, "learning_rate": 7.387593715686399e-06, "loss": 18.5448, "step": 19766 }, { "epoch": 0.36132487615844405, "grad_norm": 6.786788258346048, "learning_rate": 7.387333629382388e-06, "loss": 17.598, "step": 19767 }, { "epoch": 0.3613431553548906, "grad_norm": 5.684770298024673, "learning_rate": 7.387073534710982e-06, "loss": 17.2738, "step": 19768 }, { "epoch": 0.3613614345513371, "grad_norm": 6.338173972597751, "learning_rate": 7.38681343167309e-06, "loss": 17.5468, "step": 19769 }, { "epoch": 0.36137971374778366, "grad_norm": 8.451045048153572, "learning_rate": 7.386553320269625e-06, "loss": 18.3305, "step": 19770 }, { "epoch": 0.3613979929442302, "grad_norm": 7.1551800330201125, "learning_rate": 7.386293200501495e-06, "loss": 17.7602, "step": 19771 }, { "epoch": 0.3614162721406767, "grad_norm": 6.1481840629691025, "learning_rate": 7.386033072369619e-06, "loss": 17.4353, "step": 19772 }, { "epoch": 0.3614345513371232, "grad_norm": 8.949106858349216, "learning_rate": 7.3857729358749e-06, "loss": 17.7564, "step": 19773 }, { "epoch": 0.36145283053356975, "grad_norm": 7.304820160434771, "learning_rate": 7.385512791018255e-06, "loss": 17.9803, "step": 19774 }, { "epoch": 0.3614711097300163, "grad_norm": 7.3603973195622086, "learning_rate": 7.3852526378005955e-06, "loss": 18.0031, "step": 19775 }, { "epoch": 0.3614893889264628, "grad_norm": 5.999500649438277, "learning_rate": 7.384992476222832e-06, "loss": 17.3192, "step": 19776 }, { "epoch": 0.3615076681229093, "grad_norm": 7.537500465756095, "learning_rate": 7.384732306285875e-06, "loss": 17.9028, "step": 19777 }, { "epoch": 0.36152594731935583, "grad_norm": 6.726759715914112, "learning_rate": 7.384472127990641e-06, "loss": 17.7602, "step": 19778 }, { "epoch": 0.36154422651580237, "grad_norm": 6.548891995168657, "learning_rate": 7.384211941338038e-06, "loss": 17.719, "step": 19779 }, { "epoch": 0.3615625057122489, "grad_norm": 5.862858606696997, "learning_rate": 7.383951746328979e-06, "loss": 17.2742, "step": 19780 }, { "epoch": 0.36158078490869544, "grad_norm": 6.781399042218442, "learning_rate": 7.383691542964376e-06, "loss": 18.1223, "step": 19781 }, { "epoch": 0.3615990641051419, "grad_norm": 6.087743190092235, "learning_rate": 7.383431331245142e-06, "loss": 17.3174, "step": 19782 }, { "epoch": 0.36161734330158846, "grad_norm": 6.2910179355940254, "learning_rate": 7.383171111172186e-06, "loss": 17.2643, "step": 19783 }, { "epoch": 0.361635622498035, "grad_norm": 6.218268411467764, "learning_rate": 7.382910882746424e-06, "loss": 17.3961, "step": 19784 }, { "epoch": 0.3616539016944815, "grad_norm": 5.730710339353957, "learning_rate": 7.382650645968764e-06, "loss": 16.9912, "step": 19785 }, { "epoch": 0.361672180890928, "grad_norm": 6.022333613509842, "learning_rate": 7.382390400840123e-06, "loss": 17.1482, "step": 19786 }, { "epoch": 0.36169046008737454, "grad_norm": 7.196930519841331, "learning_rate": 7.382130147361408e-06, "loss": 17.9276, "step": 19787 }, { "epoch": 0.3617087392838211, "grad_norm": 6.0683435637162395, "learning_rate": 7.381869885533534e-06, "loss": 17.3092, "step": 19788 }, { "epoch": 0.3617270184802676, "grad_norm": 7.343149765349085, "learning_rate": 7.381609615357414e-06, "loss": 18.0614, "step": 19789 }, { "epoch": 0.36174529767671415, "grad_norm": 5.5653076492380675, "learning_rate": 7.381349336833958e-06, "loss": 17.3303, "step": 19790 }, { "epoch": 0.36176357687316063, "grad_norm": 5.080857582127793, "learning_rate": 7.381089049964078e-06, "loss": 17.013, "step": 19791 }, { "epoch": 0.36178185606960717, "grad_norm": 7.169992833963652, "learning_rate": 7.38082875474869e-06, "loss": 17.5835, "step": 19792 }, { "epoch": 0.3618001352660537, "grad_norm": 6.034150606404791, "learning_rate": 7.380568451188702e-06, "loss": 17.4684, "step": 19793 }, { "epoch": 0.36181841446250024, "grad_norm": 7.196883464812891, "learning_rate": 7.3803081392850286e-06, "loss": 17.6147, "step": 19794 }, { "epoch": 0.3618366936589468, "grad_norm": 7.120685145067033, "learning_rate": 7.380047819038583e-06, "loss": 17.9046, "step": 19795 }, { "epoch": 0.36185497285539325, "grad_norm": 5.37868830159824, "learning_rate": 7.3797874904502744e-06, "loss": 17.0191, "step": 19796 }, { "epoch": 0.3618732520518398, "grad_norm": 7.297146558207694, "learning_rate": 7.3795271535210175e-06, "loss": 17.8612, "step": 19797 }, { "epoch": 0.3618915312482863, "grad_norm": 6.835203495703539, "learning_rate": 7.379266808251725e-06, "loss": 17.6131, "step": 19798 }, { "epoch": 0.36190981044473286, "grad_norm": 6.051211154481783, "learning_rate": 7.3790064546433096e-06, "loss": 17.3336, "step": 19799 }, { "epoch": 0.3619280896411794, "grad_norm": 9.885438003116352, "learning_rate": 7.378746092696682e-06, "loss": 17.6307, "step": 19800 }, { "epoch": 0.3619463688376259, "grad_norm": 9.13616426789303, "learning_rate": 7.378485722412756e-06, "loss": 17.9851, "step": 19801 }, { "epoch": 0.3619646480340724, "grad_norm": 6.680500048478998, "learning_rate": 7.3782253437924434e-06, "loss": 17.4634, "step": 19802 }, { "epoch": 0.36198292723051895, "grad_norm": 7.965927476562711, "learning_rate": 7.377964956836658e-06, "loss": 18.0622, "step": 19803 }, { "epoch": 0.3620012064269655, "grad_norm": 6.075774584032624, "learning_rate": 7.377704561546311e-06, "loss": 17.4991, "step": 19804 }, { "epoch": 0.362019485623412, "grad_norm": 5.394030899585471, "learning_rate": 7.377444157922318e-06, "loss": 17.1865, "step": 19805 }, { "epoch": 0.3620377648198585, "grad_norm": 6.494119032171042, "learning_rate": 7.377183745965587e-06, "loss": 17.2804, "step": 19806 }, { "epoch": 0.36205604401630503, "grad_norm": 6.971741388121176, "learning_rate": 7.3769233256770346e-06, "loss": 17.7178, "step": 19807 }, { "epoch": 0.36207432321275157, "grad_norm": 7.57425777775483, "learning_rate": 7.3766628970575716e-06, "loss": 17.7694, "step": 19808 }, { "epoch": 0.3620926024091981, "grad_norm": 6.448874817621395, "learning_rate": 7.3764024601081105e-06, "loss": 17.3395, "step": 19809 }, { "epoch": 0.36211088160564464, "grad_norm": 7.60523362352674, "learning_rate": 7.376142014829566e-06, "loss": 17.8962, "step": 19810 }, { "epoch": 0.3621291608020911, "grad_norm": 6.5349476392425885, "learning_rate": 7.3758815612228505e-06, "loss": 17.5655, "step": 19811 }, { "epoch": 0.36214743999853766, "grad_norm": 6.597569027456001, "learning_rate": 7.375621099288875e-06, "loss": 17.5343, "step": 19812 }, { "epoch": 0.3621657191949842, "grad_norm": 8.332972762924195, "learning_rate": 7.375360629028556e-06, "loss": 18.2068, "step": 19813 }, { "epoch": 0.36218399839143073, "grad_norm": 6.058549509102406, "learning_rate": 7.375100150442802e-06, "loss": 17.3782, "step": 19814 }, { "epoch": 0.36220227758787726, "grad_norm": 6.656909869088897, "learning_rate": 7.3748396635325284e-06, "loss": 17.7636, "step": 19815 }, { "epoch": 0.36222055678432374, "grad_norm": 8.63263441180892, "learning_rate": 7.374579168298648e-06, "loss": 18.1313, "step": 19816 }, { "epoch": 0.3622388359807703, "grad_norm": 6.344480129066718, "learning_rate": 7.374318664742075e-06, "loss": 17.5452, "step": 19817 }, { "epoch": 0.3622571151772168, "grad_norm": 8.058106029602351, "learning_rate": 7.374058152863719e-06, "loss": 18.2145, "step": 19818 }, { "epoch": 0.36227539437366335, "grad_norm": 7.686560264536356, "learning_rate": 7.373797632664497e-06, "loss": 18.156, "step": 19819 }, { "epoch": 0.36229367357010983, "grad_norm": 6.453915937680067, "learning_rate": 7.373537104145318e-06, "loss": 17.696, "step": 19820 }, { "epoch": 0.36231195276655637, "grad_norm": 6.338557925126497, "learning_rate": 7.373276567307099e-06, "loss": 17.6531, "step": 19821 }, { "epoch": 0.3623302319630029, "grad_norm": 7.44949599558676, "learning_rate": 7.373016022150752e-06, "loss": 17.9402, "step": 19822 }, { "epoch": 0.36234851115944944, "grad_norm": 5.511826999001526, "learning_rate": 7.372755468677188e-06, "loss": 17.1546, "step": 19823 }, { "epoch": 0.362366790355896, "grad_norm": 6.730621061072508, "learning_rate": 7.372494906887324e-06, "loss": 17.5534, "step": 19824 }, { "epoch": 0.36238506955234245, "grad_norm": 6.597743266117303, "learning_rate": 7.37223433678207e-06, "loss": 17.886, "step": 19825 }, { "epoch": 0.362403348748789, "grad_norm": 6.424877650774905, "learning_rate": 7.371973758362341e-06, "loss": 17.7819, "step": 19826 }, { "epoch": 0.3624216279452355, "grad_norm": 6.081115763625361, "learning_rate": 7.371713171629051e-06, "loss": 17.4009, "step": 19827 }, { "epoch": 0.36243990714168206, "grad_norm": 6.027815519647787, "learning_rate": 7.371452576583109e-06, "loss": 17.4071, "step": 19828 }, { "epoch": 0.3624581863381286, "grad_norm": 6.908653643836321, "learning_rate": 7.371191973225433e-06, "loss": 18.0503, "step": 19829 }, { "epoch": 0.3624764655345751, "grad_norm": 9.657029465211973, "learning_rate": 7.370931361556936e-06, "loss": 18.0939, "step": 19830 }, { "epoch": 0.3624947447310216, "grad_norm": 6.602626637139704, "learning_rate": 7.37067074157853e-06, "loss": 17.6102, "step": 19831 }, { "epoch": 0.36251302392746815, "grad_norm": 5.74620240840169, "learning_rate": 7.370410113291129e-06, "loss": 17.0892, "step": 19832 }, { "epoch": 0.3625313031239147, "grad_norm": 5.600716101090447, "learning_rate": 7.370149476695644e-06, "loss": 17.1531, "step": 19833 }, { "epoch": 0.3625495823203612, "grad_norm": 6.647988825642275, "learning_rate": 7.3698888317929924e-06, "loss": 17.6309, "step": 19834 }, { "epoch": 0.3625678615168077, "grad_norm": 7.462942426129583, "learning_rate": 7.369628178584087e-06, "loss": 18.1157, "step": 19835 }, { "epoch": 0.36258614071325423, "grad_norm": 6.950999798039686, "learning_rate": 7.369367517069839e-06, "loss": 17.818, "step": 19836 }, { "epoch": 0.36260441990970077, "grad_norm": 5.928236702927465, "learning_rate": 7.369106847251164e-06, "loss": 17.324, "step": 19837 }, { "epoch": 0.3626226991061473, "grad_norm": 8.025803946220389, "learning_rate": 7.368846169128975e-06, "loss": 17.9005, "step": 19838 }, { "epoch": 0.36264097830259384, "grad_norm": 7.188681587313006, "learning_rate": 7.368585482704186e-06, "loss": 17.7076, "step": 19839 }, { "epoch": 0.3626592574990403, "grad_norm": 5.946788418432133, "learning_rate": 7.3683247879777094e-06, "loss": 17.3071, "step": 19840 }, { "epoch": 0.36267753669548686, "grad_norm": 8.783183882784503, "learning_rate": 7.368064084950461e-06, "loss": 18.1716, "step": 19841 }, { "epoch": 0.3626958158919334, "grad_norm": 8.32619851133781, "learning_rate": 7.367803373623352e-06, "loss": 18.4285, "step": 19842 }, { "epoch": 0.36271409508837993, "grad_norm": 5.768828605822886, "learning_rate": 7.3675426539973e-06, "loss": 17.2132, "step": 19843 }, { "epoch": 0.36273237428482646, "grad_norm": 7.295614907201202, "learning_rate": 7.3672819260732155e-06, "loss": 17.9062, "step": 19844 }, { "epoch": 0.36275065348127294, "grad_norm": 7.601003625095559, "learning_rate": 7.367021189852013e-06, "loss": 17.755, "step": 19845 }, { "epoch": 0.3627689326777195, "grad_norm": 7.25728850681366, "learning_rate": 7.366760445334607e-06, "loss": 17.588, "step": 19846 }, { "epoch": 0.362787211874166, "grad_norm": 7.435540056191759, "learning_rate": 7.366499692521909e-06, "loss": 17.9894, "step": 19847 }, { "epoch": 0.36280549107061255, "grad_norm": 6.575627358714186, "learning_rate": 7.3662389314148375e-06, "loss": 17.651, "step": 19848 }, { "epoch": 0.3628237702670591, "grad_norm": 6.107729087499006, "learning_rate": 7.3659781620143035e-06, "loss": 17.3608, "step": 19849 }, { "epoch": 0.36284204946350557, "grad_norm": 7.461839777289414, "learning_rate": 7.36571738432122e-06, "loss": 17.6875, "step": 19850 }, { "epoch": 0.3628603286599521, "grad_norm": 6.015353779103699, "learning_rate": 7.365456598336504e-06, "loss": 17.3833, "step": 19851 }, { "epoch": 0.36287860785639864, "grad_norm": 7.613494342322678, "learning_rate": 7.3651958040610675e-06, "loss": 18.0758, "step": 19852 }, { "epoch": 0.3628968870528452, "grad_norm": 8.27991365474283, "learning_rate": 7.364935001495823e-06, "loss": 18.2216, "step": 19853 }, { "epoch": 0.36291516624929165, "grad_norm": 7.206607014140489, "learning_rate": 7.364674190641688e-06, "loss": 17.7533, "step": 19854 }, { "epoch": 0.3629334454457382, "grad_norm": 6.2258669349276, "learning_rate": 7.3644133714995754e-06, "loss": 17.4918, "step": 19855 }, { "epoch": 0.3629517246421847, "grad_norm": 5.830931908101433, "learning_rate": 7.364152544070399e-06, "loss": 17.4619, "step": 19856 }, { "epoch": 0.36297000383863126, "grad_norm": 6.744025648768919, "learning_rate": 7.363891708355074e-06, "loss": 17.8524, "step": 19857 }, { "epoch": 0.3629882830350778, "grad_norm": 5.33544559027682, "learning_rate": 7.363630864354513e-06, "loss": 17.122, "step": 19858 }, { "epoch": 0.3630065622315243, "grad_norm": 7.120475255009561, "learning_rate": 7.363370012069631e-06, "loss": 17.8644, "step": 19859 }, { "epoch": 0.3630248414279708, "grad_norm": 6.415446205178347, "learning_rate": 7.36310915150134e-06, "loss": 17.4951, "step": 19860 }, { "epoch": 0.36304312062441735, "grad_norm": 5.1713900764061735, "learning_rate": 7.362848282650559e-06, "loss": 16.9743, "step": 19861 }, { "epoch": 0.3630613998208639, "grad_norm": 6.582510831952927, "learning_rate": 7.362587405518199e-06, "loss": 17.458, "step": 19862 }, { "epoch": 0.3630796790173104, "grad_norm": 7.14838707725858, "learning_rate": 7.3623265201051755e-06, "loss": 17.4343, "step": 19863 }, { "epoch": 0.3630979582137569, "grad_norm": 6.030890566137289, "learning_rate": 7.362065626412402e-06, "loss": 17.5821, "step": 19864 }, { "epoch": 0.36311623741020344, "grad_norm": 6.132200128666458, "learning_rate": 7.361804724440793e-06, "loss": 17.5141, "step": 19865 }, { "epoch": 0.36313451660664997, "grad_norm": 6.755722703473308, "learning_rate": 7.361543814191266e-06, "loss": 17.4266, "step": 19866 }, { "epoch": 0.3631527958030965, "grad_norm": 5.901362078986602, "learning_rate": 7.36128289566473e-06, "loss": 17.4349, "step": 19867 }, { "epoch": 0.36317107499954304, "grad_norm": 7.375293085928717, "learning_rate": 7.361021968862103e-06, "loss": 18.1263, "step": 19868 }, { "epoch": 0.3631893541959895, "grad_norm": 7.949065423837825, "learning_rate": 7.3607610337842995e-06, "loss": 18.0082, "step": 19869 }, { "epoch": 0.36320763339243606, "grad_norm": 7.529454143613692, "learning_rate": 7.360500090432232e-06, "loss": 18.2168, "step": 19870 }, { "epoch": 0.3632259125888826, "grad_norm": 5.57568043384929, "learning_rate": 7.360239138806818e-06, "loss": 17.1047, "step": 19871 }, { "epoch": 0.36324419178532913, "grad_norm": 5.331238942134828, "learning_rate": 7.359978178908972e-06, "loss": 16.9609, "step": 19872 }, { "epoch": 0.36326247098177566, "grad_norm": 5.665163282601551, "learning_rate": 7.359717210739605e-06, "loss": 17.2756, "step": 19873 }, { "epoch": 0.36328075017822214, "grad_norm": 6.414277189172481, "learning_rate": 7.3594562342996335e-06, "loss": 17.6562, "step": 19874 }, { "epoch": 0.3632990293746687, "grad_norm": 6.986932385311707, "learning_rate": 7.359195249589974e-06, "loss": 17.5632, "step": 19875 }, { "epoch": 0.3633173085711152, "grad_norm": 6.595780216320281, "learning_rate": 7.3589342566115406e-06, "loss": 17.6591, "step": 19876 }, { "epoch": 0.36333558776756175, "grad_norm": 9.75605735647986, "learning_rate": 7.358673255365245e-06, "loss": 19.0344, "step": 19877 }, { "epoch": 0.3633538669640083, "grad_norm": 9.233791121504725, "learning_rate": 7.358412245852007e-06, "loss": 18.404, "step": 19878 }, { "epoch": 0.36337214616045477, "grad_norm": 7.148687535834478, "learning_rate": 7.358151228072736e-06, "loss": 17.9507, "step": 19879 }, { "epoch": 0.3633904253569013, "grad_norm": 6.465007259481233, "learning_rate": 7.357890202028351e-06, "loss": 17.4709, "step": 19880 }, { "epoch": 0.36340870455334784, "grad_norm": 6.286347588352254, "learning_rate": 7.3576291677197655e-06, "loss": 17.3946, "step": 19881 }, { "epoch": 0.3634269837497944, "grad_norm": 4.960147903218949, "learning_rate": 7.357368125147895e-06, "loss": 16.9839, "step": 19882 }, { "epoch": 0.3634452629462409, "grad_norm": 7.9545658929064755, "learning_rate": 7.3571070743136515e-06, "loss": 18.2448, "step": 19883 }, { "epoch": 0.3634635421426874, "grad_norm": 5.9500038086809495, "learning_rate": 7.3568460152179545e-06, "loss": 17.5407, "step": 19884 }, { "epoch": 0.3634818213391339, "grad_norm": 8.391662365068228, "learning_rate": 7.356584947861716e-06, "loss": 18.2017, "step": 19885 }, { "epoch": 0.36350010053558046, "grad_norm": 8.43666846600439, "learning_rate": 7.356323872245852e-06, "loss": 18.4896, "step": 19886 }, { "epoch": 0.363518379732027, "grad_norm": 6.651142502786835, "learning_rate": 7.356062788371277e-06, "loss": 17.5591, "step": 19887 }, { "epoch": 0.3635366589284735, "grad_norm": 6.580105254687807, "learning_rate": 7.355801696238906e-06, "loss": 17.5632, "step": 19888 }, { "epoch": 0.36355493812492, "grad_norm": 6.534458383635875, "learning_rate": 7.3555405958496555e-06, "loss": 17.4418, "step": 19889 }, { "epoch": 0.36357321732136655, "grad_norm": 7.116823096185072, "learning_rate": 7.3552794872044385e-06, "loss": 17.8299, "step": 19890 }, { "epoch": 0.3635914965178131, "grad_norm": 6.319830041837158, "learning_rate": 7.355018370304172e-06, "loss": 17.7226, "step": 19891 }, { "epoch": 0.3636097757142596, "grad_norm": 5.650490274725987, "learning_rate": 7.35475724514977e-06, "loss": 17.1894, "step": 19892 }, { "epoch": 0.3636280549107061, "grad_norm": 6.31767404409286, "learning_rate": 7.354496111742149e-06, "loss": 17.6089, "step": 19893 }, { "epoch": 0.36364633410715264, "grad_norm": 7.319743799716542, "learning_rate": 7.354234970082223e-06, "loss": 17.6796, "step": 19894 }, { "epoch": 0.36366461330359917, "grad_norm": 7.574336442661001, "learning_rate": 7.353973820170906e-06, "loss": 17.582, "step": 19895 }, { "epoch": 0.3636828925000457, "grad_norm": 6.055400818121726, "learning_rate": 7.353712662009118e-06, "loss": 17.562, "step": 19896 }, { "epoch": 0.36370117169649224, "grad_norm": 5.941978445605599, "learning_rate": 7.35345149559777e-06, "loss": 17.2626, "step": 19897 }, { "epoch": 0.3637194508929387, "grad_norm": 5.331945161886824, "learning_rate": 7.353190320937778e-06, "loss": 17.1074, "step": 19898 }, { "epoch": 0.36373773008938526, "grad_norm": 7.415834593382804, "learning_rate": 7.352929138030059e-06, "loss": 18.073, "step": 19899 }, { "epoch": 0.3637560092858318, "grad_norm": 7.025386381980766, "learning_rate": 7.352667946875528e-06, "loss": 17.3585, "step": 19900 }, { "epoch": 0.36377428848227833, "grad_norm": 6.2390589600593245, "learning_rate": 7.352406747475098e-06, "loss": 17.484, "step": 19901 }, { "epoch": 0.36379256767872487, "grad_norm": 7.228486929245648, "learning_rate": 7.352145539829688e-06, "loss": 17.651, "step": 19902 }, { "epoch": 0.36381084687517135, "grad_norm": 5.342575900198373, "learning_rate": 7.351884323940212e-06, "loss": 17.0671, "step": 19903 }, { "epoch": 0.3638291260716179, "grad_norm": 6.952441270837234, "learning_rate": 7.351623099807587e-06, "loss": 17.7346, "step": 19904 }, { "epoch": 0.3638474052680644, "grad_norm": 5.874155239855435, "learning_rate": 7.351361867432725e-06, "loss": 17.4286, "step": 19905 }, { "epoch": 0.36386568446451095, "grad_norm": 6.257066167438005, "learning_rate": 7.351100626816544e-06, "loss": 17.5954, "step": 19906 }, { "epoch": 0.3638839636609575, "grad_norm": 6.795264780061543, "learning_rate": 7.350839377959959e-06, "loss": 17.6907, "step": 19907 }, { "epoch": 0.36390224285740397, "grad_norm": 10.194669551376359, "learning_rate": 7.350578120863887e-06, "loss": 18.3099, "step": 19908 }, { "epoch": 0.3639205220538505, "grad_norm": 5.402891267281715, "learning_rate": 7.350316855529243e-06, "loss": 17.2023, "step": 19909 }, { "epoch": 0.36393880125029704, "grad_norm": 6.010307659767309, "learning_rate": 7.35005558195694e-06, "loss": 17.5061, "step": 19910 }, { "epoch": 0.3639570804467436, "grad_norm": 5.967855881697756, "learning_rate": 7.3497943001478975e-06, "loss": 17.4228, "step": 19911 }, { "epoch": 0.3639753596431901, "grad_norm": 6.897902739331548, "learning_rate": 7.34953301010303e-06, "loss": 17.7083, "step": 19912 }, { "epoch": 0.3639936388396366, "grad_norm": 7.135653523181321, "learning_rate": 7.349271711823255e-06, "loss": 17.8317, "step": 19913 }, { "epoch": 0.3640119180360831, "grad_norm": 6.64145514082436, "learning_rate": 7.3490104053094845e-06, "loss": 17.4163, "step": 19914 }, { "epoch": 0.36403019723252966, "grad_norm": 6.376695234039627, "learning_rate": 7.348749090562636e-06, "loss": 17.5942, "step": 19915 }, { "epoch": 0.3640484764289762, "grad_norm": 4.790855748747493, "learning_rate": 7.348487767583625e-06, "loss": 16.8697, "step": 19916 }, { "epoch": 0.36406675562542273, "grad_norm": 8.041839296634185, "learning_rate": 7.348226436373371e-06, "loss": 17.9628, "step": 19917 }, { "epoch": 0.3640850348218692, "grad_norm": 7.822606379322255, "learning_rate": 7.347965096932785e-06, "loss": 18.3822, "step": 19918 }, { "epoch": 0.36410331401831575, "grad_norm": 5.845155756889289, "learning_rate": 7.347703749262787e-06, "loss": 17.2784, "step": 19919 }, { "epoch": 0.3641215932147623, "grad_norm": 6.453666345030061, "learning_rate": 7.3474423933642895e-06, "loss": 17.7771, "step": 19920 }, { "epoch": 0.3641398724112088, "grad_norm": 6.952256037911246, "learning_rate": 7.34718102923821e-06, "loss": 17.9244, "step": 19921 }, { "epoch": 0.3641581516076553, "grad_norm": 5.726640790921278, "learning_rate": 7.3469196568854654e-06, "loss": 17.2561, "step": 19922 }, { "epoch": 0.36417643080410184, "grad_norm": 6.158958516929366, "learning_rate": 7.346658276306971e-06, "loss": 17.3897, "step": 19923 }, { "epoch": 0.36419471000054837, "grad_norm": 6.7314552207787, "learning_rate": 7.3463968875036415e-06, "loss": 17.5779, "step": 19924 }, { "epoch": 0.3642129891969949, "grad_norm": 5.953983189035276, "learning_rate": 7.346135490476396e-06, "loss": 17.1681, "step": 19925 }, { "epoch": 0.36423126839344144, "grad_norm": 7.448260645280754, "learning_rate": 7.345874085226149e-06, "loss": 18.071, "step": 19926 }, { "epoch": 0.3642495475898879, "grad_norm": 7.135589902163268, "learning_rate": 7.3456126717538165e-06, "loss": 17.9099, "step": 19927 }, { "epoch": 0.36426782678633446, "grad_norm": 7.811948117017589, "learning_rate": 7.3453512500603155e-06, "loss": 18.1919, "step": 19928 }, { "epoch": 0.364286105982781, "grad_norm": 6.4819789518858775, "learning_rate": 7.345089820146561e-06, "loss": 17.391, "step": 19929 }, { "epoch": 0.36430438517922753, "grad_norm": 6.573790459434949, "learning_rate": 7.3448283820134714e-06, "loss": 17.4223, "step": 19930 }, { "epoch": 0.36432266437567407, "grad_norm": 7.142233545797336, "learning_rate": 7.344566935661963e-06, "loss": 17.9212, "step": 19931 }, { "epoch": 0.36434094357212055, "grad_norm": 7.815281670207444, "learning_rate": 7.344305481092948e-06, "loss": 18.5594, "step": 19932 }, { "epoch": 0.3643592227685671, "grad_norm": 6.389263015084949, "learning_rate": 7.344044018307347e-06, "loss": 17.6672, "step": 19933 }, { "epoch": 0.3643775019650136, "grad_norm": 6.363778951246994, "learning_rate": 7.3437825473060756e-06, "loss": 17.5988, "step": 19934 }, { "epoch": 0.36439578116146015, "grad_norm": 7.878011337387179, "learning_rate": 7.34352106809005e-06, "loss": 18.1643, "step": 19935 }, { "epoch": 0.3644140603579067, "grad_norm": 8.285066118286098, "learning_rate": 7.343259580660185e-06, "loss": 18.3539, "step": 19936 }, { "epoch": 0.36443233955435317, "grad_norm": 6.443267824682111, "learning_rate": 7.3429980850173985e-06, "loss": 17.3013, "step": 19937 }, { "epoch": 0.3644506187507997, "grad_norm": 6.650738875390846, "learning_rate": 7.342736581162608e-06, "loss": 17.6297, "step": 19938 }, { "epoch": 0.36446889794724624, "grad_norm": 8.323867892700868, "learning_rate": 7.342475069096729e-06, "loss": 18.2189, "step": 19939 }, { "epoch": 0.3644871771436928, "grad_norm": 8.029657663627717, "learning_rate": 7.342213548820678e-06, "loss": 17.9917, "step": 19940 }, { "epoch": 0.3645054563401393, "grad_norm": 5.902466295141075, "learning_rate": 7.3419520203353736e-06, "loss": 17.5369, "step": 19941 }, { "epoch": 0.3645237355365858, "grad_norm": 6.6950706084737, "learning_rate": 7.341690483641727e-06, "loss": 17.6478, "step": 19942 }, { "epoch": 0.3645420147330323, "grad_norm": 7.414295409257129, "learning_rate": 7.34142893874066e-06, "loss": 18.0049, "step": 19943 }, { "epoch": 0.36456029392947886, "grad_norm": 6.441432619775502, "learning_rate": 7.341167385633089e-06, "loss": 17.3596, "step": 19944 }, { "epoch": 0.3645785731259254, "grad_norm": 7.565466444403028, "learning_rate": 7.340905824319928e-06, "loss": 17.9493, "step": 19945 }, { "epoch": 0.36459685232237193, "grad_norm": 6.884218125613176, "learning_rate": 7.3406442548020965e-06, "loss": 17.9367, "step": 19946 }, { "epoch": 0.3646151315188184, "grad_norm": 8.205436379367864, "learning_rate": 7.340382677080509e-06, "loss": 18.2755, "step": 19947 }, { "epoch": 0.36463341071526495, "grad_norm": 7.479586387863585, "learning_rate": 7.3401210911560825e-06, "loss": 18.084, "step": 19948 }, { "epoch": 0.3646516899117115, "grad_norm": 5.456945197338324, "learning_rate": 7.3398594970297365e-06, "loss": 17.2475, "step": 19949 }, { "epoch": 0.364669969108158, "grad_norm": 7.155232689620144, "learning_rate": 7.339597894702385e-06, "loss": 18.057, "step": 19950 }, { "epoch": 0.36468824830460456, "grad_norm": 6.315182426164225, "learning_rate": 7.339336284174946e-06, "loss": 17.4557, "step": 19951 }, { "epoch": 0.36470652750105104, "grad_norm": 6.869441070444373, "learning_rate": 7.339074665448336e-06, "loss": 17.7521, "step": 19952 }, { "epoch": 0.36472480669749757, "grad_norm": 5.832337154707694, "learning_rate": 7.338813038523473e-06, "loss": 17.2121, "step": 19953 }, { "epoch": 0.3647430858939441, "grad_norm": 7.771324661108997, "learning_rate": 7.338551403401273e-06, "loss": 18.1004, "step": 19954 }, { "epoch": 0.36476136509039064, "grad_norm": 5.4355066147730575, "learning_rate": 7.338289760082653e-06, "loss": 17.1734, "step": 19955 }, { "epoch": 0.3647796442868371, "grad_norm": 6.067806744073409, "learning_rate": 7.33802810856853e-06, "loss": 17.3221, "step": 19956 }, { "epoch": 0.36479792348328366, "grad_norm": 5.627941981217168, "learning_rate": 7.337766448859822e-06, "loss": 17.2232, "step": 19957 }, { "epoch": 0.3648162026797302, "grad_norm": 7.304743637762822, "learning_rate": 7.337504780957446e-06, "loss": 17.9585, "step": 19958 }, { "epoch": 0.36483448187617673, "grad_norm": 6.371991764224059, "learning_rate": 7.337243104862317e-06, "loss": 17.4396, "step": 19959 }, { "epoch": 0.36485276107262327, "grad_norm": 7.272901108269467, "learning_rate": 7.336981420575354e-06, "loss": 17.7912, "step": 19960 }, { "epoch": 0.36487104026906975, "grad_norm": 6.709934676998213, "learning_rate": 7.336719728097475e-06, "loss": 17.6917, "step": 19961 }, { "epoch": 0.3648893194655163, "grad_norm": 7.241521777883362, "learning_rate": 7.336458027429596e-06, "loss": 17.9274, "step": 19962 }, { "epoch": 0.3649075986619628, "grad_norm": 6.401168922310908, "learning_rate": 7.336196318572635e-06, "loss": 17.6308, "step": 19963 }, { "epoch": 0.36492587785840935, "grad_norm": 5.128963487143801, "learning_rate": 7.335934601527507e-06, "loss": 17.2691, "step": 19964 }, { "epoch": 0.3649441570548559, "grad_norm": 6.057947574781268, "learning_rate": 7.335672876295131e-06, "loss": 17.4541, "step": 19965 }, { "epoch": 0.36496243625130237, "grad_norm": 8.199589475868523, "learning_rate": 7.335411142876425e-06, "loss": 17.8986, "step": 19966 }, { "epoch": 0.3649807154477489, "grad_norm": 5.152101468473128, "learning_rate": 7.335149401272306e-06, "loss": 16.9409, "step": 19967 }, { "epoch": 0.36499899464419544, "grad_norm": 7.64284625076132, "learning_rate": 7.33488765148369e-06, "loss": 17.8299, "step": 19968 }, { "epoch": 0.365017273840642, "grad_norm": 6.694127243468344, "learning_rate": 7.334625893511496e-06, "loss": 17.6618, "step": 19969 }, { "epoch": 0.3650355530370885, "grad_norm": 6.137374494704486, "learning_rate": 7.33436412735664e-06, "loss": 17.1838, "step": 19970 }, { "epoch": 0.365053832233535, "grad_norm": 6.492198993151892, "learning_rate": 7.334102353020041e-06, "loss": 17.8877, "step": 19971 }, { "epoch": 0.3650721114299815, "grad_norm": 5.802896208555929, "learning_rate": 7.333840570502616e-06, "loss": 17.2948, "step": 19972 }, { "epoch": 0.36509039062642806, "grad_norm": 7.132221431844595, "learning_rate": 7.333578779805282e-06, "loss": 17.6006, "step": 19973 }, { "epoch": 0.3651086698228746, "grad_norm": 6.093304534442225, "learning_rate": 7.333316980928956e-06, "loss": 17.3567, "step": 19974 }, { "epoch": 0.36512694901932113, "grad_norm": 6.436127130333897, "learning_rate": 7.333055173874558e-06, "loss": 17.4512, "step": 19975 }, { "epoch": 0.3651452282157676, "grad_norm": 6.954979280215868, "learning_rate": 7.332793358643003e-06, "loss": 17.6193, "step": 19976 }, { "epoch": 0.36516350741221415, "grad_norm": 5.556852294777728, "learning_rate": 7.33253153523521e-06, "loss": 17.1742, "step": 19977 }, { "epoch": 0.3651817866086607, "grad_norm": 6.905918436637122, "learning_rate": 7.332269703652098e-06, "loss": 17.6606, "step": 19978 }, { "epoch": 0.3652000658051072, "grad_norm": 7.1322970346039885, "learning_rate": 7.332007863894582e-06, "loss": 17.18, "step": 19979 }, { "epoch": 0.36521834500155376, "grad_norm": 6.8587204657493555, "learning_rate": 7.331746015963579e-06, "loss": 17.7386, "step": 19980 }, { "epoch": 0.36523662419800024, "grad_norm": 6.772290370521658, "learning_rate": 7.331484159860009e-06, "loss": 17.7342, "step": 19981 }, { "epoch": 0.3652549033944468, "grad_norm": 7.231616881412896, "learning_rate": 7.331222295584791e-06, "loss": 17.6909, "step": 19982 }, { "epoch": 0.3652731825908933, "grad_norm": 7.793296884804323, "learning_rate": 7.33096042313884e-06, "loss": 18.1183, "step": 19983 }, { "epoch": 0.36529146178733984, "grad_norm": 6.483927942294078, "learning_rate": 7.330698542523076e-06, "loss": 17.2938, "step": 19984 }, { "epoch": 0.3653097409837864, "grad_norm": 6.057767800886469, "learning_rate": 7.330436653738415e-06, "loss": 17.5713, "step": 19985 }, { "epoch": 0.36532802018023286, "grad_norm": 6.1906657356421215, "learning_rate": 7.330174756785777e-06, "loss": 17.2538, "step": 19986 }, { "epoch": 0.3653462993766794, "grad_norm": 7.278490927171888, "learning_rate": 7.329912851666077e-06, "loss": 17.6953, "step": 19987 }, { "epoch": 0.36536457857312593, "grad_norm": 5.9615259558238, "learning_rate": 7.329650938380235e-06, "loss": 17.3261, "step": 19988 }, { "epoch": 0.36538285776957247, "grad_norm": 5.550292146699225, "learning_rate": 7.329389016929169e-06, "loss": 17.1694, "step": 19989 }, { "epoch": 0.36540113696601895, "grad_norm": 6.412002416771385, "learning_rate": 7.3291270873137964e-06, "loss": 17.7491, "step": 19990 }, { "epoch": 0.3654194161624655, "grad_norm": 6.143702555565244, "learning_rate": 7.328865149535036e-06, "loss": 17.4818, "step": 19991 }, { "epoch": 0.365437695358912, "grad_norm": 7.844841567040807, "learning_rate": 7.328603203593805e-06, "loss": 17.7499, "step": 19992 }, { "epoch": 0.36545597455535855, "grad_norm": 8.015703335755985, "learning_rate": 7.328341249491022e-06, "loss": 17.7027, "step": 19993 }, { "epoch": 0.3654742537518051, "grad_norm": 6.263571151195266, "learning_rate": 7.3280792872276055e-06, "loss": 17.3522, "step": 19994 }, { "epoch": 0.36549253294825157, "grad_norm": 7.541199669687731, "learning_rate": 7.327817316804472e-06, "loss": 17.7727, "step": 19995 }, { "epoch": 0.3655108121446981, "grad_norm": 7.378806630387118, "learning_rate": 7.3275553382225405e-06, "loss": 17.955, "step": 19996 }, { "epoch": 0.36552909134114464, "grad_norm": 6.7678685546807085, "learning_rate": 7.32729335148273e-06, "loss": 17.4339, "step": 19997 }, { "epoch": 0.3655473705375912, "grad_norm": 7.12057927862782, "learning_rate": 7.327031356585959e-06, "loss": 17.3648, "step": 19998 }, { "epoch": 0.3655656497340377, "grad_norm": 8.71501300379711, "learning_rate": 7.326769353533144e-06, "loss": 17.7721, "step": 19999 }, { "epoch": 0.3655839289304842, "grad_norm": 7.299752176009435, "learning_rate": 7.326507342325206e-06, "loss": 18.0457, "step": 20000 }, { "epoch": 0.36560220812693073, "grad_norm": 7.3599804513870914, "learning_rate": 7.32624532296306e-06, "loss": 17.6514, "step": 20001 }, { "epoch": 0.36562048732337726, "grad_norm": 5.3486958687936745, "learning_rate": 7.325983295447626e-06, "loss": 16.9642, "step": 20002 }, { "epoch": 0.3656387665198238, "grad_norm": 6.557372272729643, "learning_rate": 7.325721259779824e-06, "loss": 17.619, "step": 20003 }, { "epoch": 0.36565704571627033, "grad_norm": 5.416973195391767, "learning_rate": 7.325459215960569e-06, "loss": 17.2945, "step": 20004 }, { "epoch": 0.3656753249127168, "grad_norm": 5.989822328414677, "learning_rate": 7.325197163990783e-06, "loss": 17.1684, "step": 20005 }, { "epoch": 0.36569360410916335, "grad_norm": 6.011230896835203, "learning_rate": 7.32493510387138e-06, "loss": 17.2764, "step": 20006 }, { "epoch": 0.3657118833056099, "grad_norm": 6.343896300084955, "learning_rate": 7.324673035603283e-06, "loss": 17.2743, "step": 20007 }, { "epoch": 0.3657301625020564, "grad_norm": 8.424063219074782, "learning_rate": 7.324410959187409e-06, "loss": 18.5565, "step": 20008 }, { "epoch": 0.36574844169850296, "grad_norm": 5.454175634280242, "learning_rate": 7.324148874624676e-06, "loss": 17.1443, "step": 20009 }, { "epoch": 0.36576672089494944, "grad_norm": 6.962785575902715, "learning_rate": 7.323886781916003e-06, "loss": 17.6528, "step": 20010 }, { "epoch": 0.365785000091396, "grad_norm": 6.398342119715435, "learning_rate": 7.323624681062308e-06, "loss": 17.469, "step": 20011 }, { "epoch": 0.3658032792878425, "grad_norm": 6.982471194952962, "learning_rate": 7.323362572064509e-06, "loss": 17.7279, "step": 20012 }, { "epoch": 0.36582155848428904, "grad_norm": 5.722768016888922, "learning_rate": 7.323100454923529e-06, "loss": 17.3035, "step": 20013 }, { "epoch": 0.3658398376807356, "grad_norm": 6.207848962254209, "learning_rate": 7.322838329640281e-06, "loss": 17.4449, "step": 20014 }, { "epoch": 0.36585811687718206, "grad_norm": 5.52371366053853, "learning_rate": 7.322576196215687e-06, "loss": 17.0434, "step": 20015 }, { "epoch": 0.3658763960736286, "grad_norm": 6.447198465491182, "learning_rate": 7.322314054650664e-06, "loss": 17.4525, "step": 20016 }, { "epoch": 0.36589467527007513, "grad_norm": 5.772447503822162, "learning_rate": 7.322051904946134e-06, "loss": 17.204, "step": 20017 }, { "epoch": 0.36591295446652167, "grad_norm": 5.598459571819927, "learning_rate": 7.321789747103012e-06, "loss": 17.2012, "step": 20018 }, { "epoch": 0.3659312336629682, "grad_norm": 6.772302513544603, "learning_rate": 7.321527581122218e-06, "loss": 17.5079, "step": 20019 }, { "epoch": 0.3659495128594147, "grad_norm": 8.395492804046095, "learning_rate": 7.3212654070046715e-06, "loss": 18.0994, "step": 20020 }, { "epoch": 0.3659677920558612, "grad_norm": 5.442244433328405, "learning_rate": 7.321003224751292e-06, "loss": 16.9976, "step": 20021 }, { "epoch": 0.36598607125230775, "grad_norm": 4.894323867458015, "learning_rate": 7.320741034362997e-06, "loss": 16.9411, "step": 20022 }, { "epoch": 0.3660043504487543, "grad_norm": 6.376324414241229, "learning_rate": 7.3204788358407065e-06, "loss": 17.4495, "step": 20023 }, { "epoch": 0.36602262964520077, "grad_norm": 7.014361650828988, "learning_rate": 7.320216629185339e-06, "loss": 17.6281, "step": 20024 }, { "epoch": 0.3660409088416473, "grad_norm": 6.3606585805402585, "learning_rate": 7.319954414397813e-06, "loss": 17.4682, "step": 20025 }, { "epoch": 0.36605918803809384, "grad_norm": 6.883518029555075, "learning_rate": 7.3196921914790485e-06, "loss": 17.637, "step": 20026 }, { "epoch": 0.3660774672345404, "grad_norm": 7.022462850259167, "learning_rate": 7.319429960429965e-06, "loss": 17.7141, "step": 20027 }, { "epoch": 0.3660957464309869, "grad_norm": 6.239354392444819, "learning_rate": 7.319167721251479e-06, "loss": 17.3864, "step": 20028 }, { "epoch": 0.3661140256274334, "grad_norm": 4.969791983872103, "learning_rate": 7.318905473944512e-06, "loss": 16.8889, "step": 20029 }, { "epoch": 0.36613230482387993, "grad_norm": 6.228127877753289, "learning_rate": 7.318643218509982e-06, "loss": 17.5305, "step": 20030 }, { "epoch": 0.36615058402032646, "grad_norm": 10.772890395687297, "learning_rate": 7.31838095494881e-06, "loss": 18.2895, "step": 20031 }, { "epoch": 0.366168863216773, "grad_norm": 7.2522352411852555, "learning_rate": 7.318118683261915e-06, "loss": 17.7977, "step": 20032 }, { "epoch": 0.36618714241321954, "grad_norm": 7.486696173260041, "learning_rate": 7.317856403450212e-06, "loss": 18.1596, "step": 20033 }, { "epoch": 0.366205421609666, "grad_norm": 6.07141209553588, "learning_rate": 7.317594115514626e-06, "loss": 17.4972, "step": 20034 }, { "epoch": 0.36622370080611255, "grad_norm": 6.372265499279799, "learning_rate": 7.317331819456073e-06, "loss": 17.5198, "step": 20035 }, { "epoch": 0.3662419800025591, "grad_norm": 8.42351503298585, "learning_rate": 7.317069515275472e-06, "loss": 18.2196, "step": 20036 }, { "epoch": 0.3662602591990056, "grad_norm": 7.328440958729051, "learning_rate": 7.316807202973744e-06, "loss": 18.0784, "step": 20037 }, { "epoch": 0.36627853839545216, "grad_norm": 5.967004691863931, "learning_rate": 7.316544882551808e-06, "loss": 17.1939, "step": 20038 }, { "epoch": 0.36629681759189864, "grad_norm": 6.792899665896095, "learning_rate": 7.3162825540105834e-06, "loss": 17.5523, "step": 20039 }, { "epoch": 0.3663150967883452, "grad_norm": 6.016111889403537, "learning_rate": 7.31602021735099e-06, "loss": 17.2861, "step": 20040 }, { "epoch": 0.3663333759847917, "grad_norm": 5.728117369895846, "learning_rate": 7.315757872573946e-06, "loss": 17.3807, "step": 20041 }, { "epoch": 0.36635165518123824, "grad_norm": 9.82960673985697, "learning_rate": 7.315495519680371e-06, "loss": 19.2907, "step": 20042 }, { "epoch": 0.3663699343776848, "grad_norm": 6.578645153903663, "learning_rate": 7.315233158671186e-06, "loss": 17.7264, "step": 20043 }, { "epoch": 0.36638821357413126, "grad_norm": 7.27288412308479, "learning_rate": 7.3149707895473096e-06, "loss": 17.8259, "step": 20044 }, { "epoch": 0.3664064927705778, "grad_norm": 7.088161976089492, "learning_rate": 7.314708412309661e-06, "loss": 17.7448, "step": 20045 }, { "epoch": 0.36642477196702433, "grad_norm": 9.611423527380586, "learning_rate": 7.314446026959161e-06, "loss": 17.7218, "step": 20046 }, { "epoch": 0.36644305116347087, "grad_norm": 7.10924466864736, "learning_rate": 7.314183633496728e-06, "loss": 17.8319, "step": 20047 }, { "epoch": 0.3664613303599174, "grad_norm": 6.762878693982524, "learning_rate": 7.313921231923282e-06, "loss": 17.8862, "step": 20048 }, { "epoch": 0.3664796095563639, "grad_norm": 7.040128425206224, "learning_rate": 7.313658822239743e-06, "loss": 17.5381, "step": 20049 }, { "epoch": 0.3664978887528104, "grad_norm": 6.2413025502073864, "learning_rate": 7.31339640444703e-06, "loss": 17.2786, "step": 20050 }, { "epoch": 0.36651616794925695, "grad_norm": 5.821097701028158, "learning_rate": 7.313133978546065e-06, "loss": 17.2855, "step": 20051 }, { "epoch": 0.3665344471457035, "grad_norm": 6.420080152777128, "learning_rate": 7.312871544537765e-06, "loss": 17.5517, "step": 20052 }, { "epoch": 0.36655272634215, "grad_norm": 6.687833941744186, "learning_rate": 7.312609102423052e-06, "loss": 17.0203, "step": 20053 }, { "epoch": 0.3665710055385965, "grad_norm": 7.4913413588200575, "learning_rate": 7.312346652202843e-06, "loss": 18.13, "step": 20054 }, { "epoch": 0.36658928473504304, "grad_norm": 6.378464109102114, "learning_rate": 7.312084193878061e-06, "loss": 17.5577, "step": 20055 }, { "epoch": 0.3666075639314896, "grad_norm": 6.46936764170632, "learning_rate": 7.311821727449624e-06, "loss": 17.4391, "step": 20056 }, { "epoch": 0.3666258431279361, "grad_norm": 6.938693382391579, "learning_rate": 7.311559252918452e-06, "loss": 17.7051, "step": 20057 }, { "epoch": 0.3666441223243826, "grad_norm": 6.756617300125149, "learning_rate": 7.311296770285467e-06, "loss": 17.748, "step": 20058 }, { "epoch": 0.36666240152082913, "grad_norm": 7.063743985939497, "learning_rate": 7.3110342795515865e-06, "loss": 17.5138, "step": 20059 }, { "epoch": 0.36668068071727566, "grad_norm": 6.830333458443167, "learning_rate": 7.310771780717729e-06, "loss": 17.8553, "step": 20060 }, { "epoch": 0.3666989599137222, "grad_norm": 7.060236435351416, "learning_rate": 7.310509273784821e-06, "loss": 17.798, "step": 20061 }, { "epoch": 0.36671723911016874, "grad_norm": 6.933666121958821, "learning_rate": 7.310246758753776e-06, "loss": 17.826, "step": 20062 }, { "epoch": 0.3667355183066152, "grad_norm": 8.6564575951896, "learning_rate": 7.309984235625518e-06, "loss": 18.0475, "step": 20063 }, { "epoch": 0.36675379750306175, "grad_norm": 6.021194253401691, "learning_rate": 7.309721704400965e-06, "loss": 17.2236, "step": 20064 }, { "epoch": 0.3667720766995083, "grad_norm": 8.03573630844235, "learning_rate": 7.309459165081037e-06, "loss": 18.3012, "step": 20065 }, { "epoch": 0.3667903558959548, "grad_norm": 6.409128167831567, "learning_rate": 7.309196617666657e-06, "loss": 17.5102, "step": 20066 }, { "epoch": 0.36680863509240136, "grad_norm": 5.691077165759169, "learning_rate": 7.308934062158741e-06, "loss": 17.4877, "step": 20067 }, { "epoch": 0.36682691428884784, "grad_norm": 7.946724345294653, "learning_rate": 7.308671498558213e-06, "loss": 18.0304, "step": 20068 }, { "epoch": 0.3668451934852944, "grad_norm": 6.276899429007991, "learning_rate": 7.308408926865991e-06, "loss": 17.5556, "step": 20069 }, { "epoch": 0.3668634726817409, "grad_norm": 8.054993608283159, "learning_rate": 7.308146347082996e-06, "loss": 18.115, "step": 20070 }, { "epoch": 0.36688175187818745, "grad_norm": 6.73741360233731, "learning_rate": 7.307883759210148e-06, "loss": 17.7996, "step": 20071 }, { "epoch": 0.366900031074634, "grad_norm": 6.637461375419106, "learning_rate": 7.3076211632483695e-06, "loss": 17.5056, "step": 20072 }, { "epoch": 0.36691831027108046, "grad_norm": 6.408352045887416, "learning_rate": 7.307358559198578e-06, "loss": 17.5988, "step": 20073 }, { "epoch": 0.366936589467527, "grad_norm": 7.497068300084706, "learning_rate": 7.307095947061694e-06, "loss": 17.4475, "step": 20074 }, { "epoch": 0.36695486866397353, "grad_norm": 7.083633864362394, "learning_rate": 7.306833326838641e-06, "loss": 17.7188, "step": 20075 }, { "epoch": 0.36697314786042007, "grad_norm": 5.704502153833018, "learning_rate": 7.306570698530336e-06, "loss": 17.2738, "step": 20076 }, { "epoch": 0.3669914270568666, "grad_norm": 6.596272202193835, "learning_rate": 7.306308062137702e-06, "loss": 17.5896, "step": 20077 }, { "epoch": 0.3670097062533131, "grad_norm": 5.400517468672541, "learning_rate": 7.3060454176616555e-06, "loss": 17.1224, "step": 20078 }, { "epoch": 0.3670279854497596, "grad_norm": 7.2614347856671815, "learning_rate": 7.3057827651031225e-06, "loss": 17.918, "step": 20079 }, { "epoch": 0.36704626464620616, "grad_norm": 5.905622146665631, "learning_rate": 7.305520104463019e-06, "loss": 17.1705, "step": 20080 }, { "epoch": 0.3670645438426527, "grad_norm": 5.873681135369336, "learning_rate": 7.30525743574227e-06, "loss": 17.1603, "step": 20081 }, { "epoch": 0.3670828230390992, "grad_norm": 5.945053141843443, "learning_rate": 7.304994758941792e-06, "loss": 17.3761, "step": 20082 }, { "epoch": 0.3671011022355457, "grad_norm": 8.528893648065019, "learning_rate": 7.304732074062508e-06, "loss": 18.5653, "step": 20083 }, { "epoch": 0.36711938143199224, "grad_norm": 6.767456007725406, "learning_rate": 7.3044693811053395e-06, "loss": 17.8274, "step": 20084 }, { "epoch": 0.3671376606284388, "grad_norm": 7.488586284702607, "learning_rate": 7.304206680071204e-06, "loss": 17.8009, "step": 20085 }, { "epoch": 0.3671559398248853, "grad_norm": 6.5844851393308925, "learning_rate": 7.303943970961025e-06, "loss": 17.4793, "step": 20086 }, { "epoch": 0.36717421902133185, "grad_norm": 5.177449408518992, "learning_rate": 7.303681253775721e-06, "loss": 17.1093, "step": 20087 }, { "epoch": 0.36719249821777833, "grad_norm": 7.252141770205381, "learning_rate": 7.303418528516214e-06, "loss": 17.6935, "step": 20088 }, { "epoch": 0.36721077741422486, "grad_norm": 5.407830787162609, "learning_rate": 7.303155795183427e-06, "loss": 17.032, "step": 20089 }, { "epoch": 0.3672290566106714, "grad_norm": 5.407756889535193, "learning_rate": 7.302893053778277e-06, "loss": 17.2391, "step": 20090 }, { "epoch": 0.36724733580711794, "grad_norm": 5.398190716370125, "learning_rate": 7.302630304301688e-06, "loss": 17.0918, "step": 20091 }, { "epoch": 0.3672656150035644, "grad_norm": 6.664543300439525, "learning_rate": 7.302367546754577e-06, "loss": 17.6589, "step": 20092 }, { "epoch": 0.36728389420001095, "grad_norm": 7.744316272679232, "learning_rate": 7.302104781137871e-06, "loss": 18.0601, "step": 20093 }, { "epoch": 0.3673021733964575, "grad_norm": 7.400419416615188, "learning_rate": 7.3018420074524856e-06, "loss": 17.8153, "step": 20094 }, { "epoch": 0.367320452592904, "grad_norm": 7.730658448004205, "learning_rate": 7.301579225699344e-06, "loss": 17.9133, "step": 20095 }, { "epoch": 0.36733873178935056, "grad_norm": 7.870582895323564, "learning_rate": 7.301316435879366e-06, "loss": 18.2224, "step": 20096 }, { "epoch": 0.36735701098579704, "grad_norm": 5.7632077535216215, "learning_rate": 7.301053637993476e-06, "loss": 17.2209, "step": 20097 }, { "epoch": 0.3673752901822436, "grad_norm": 6.723134070566391, "learning_rate": 7.30079083204259e-06, "loss": 17.5405, "step": 20098 }, { "epoch": 0.3673935693786901, "grad_norm": 8.821324588463753, "learning_rate": 7.300528018027634e-06, "loss": 18.5008, "step": 20099 }, { "epoch": 0.36741184857513665, "grad_norm": 6.1028228266027895, "learning_rate": 7.300265195949526e-06, "loss": 17.4973, "step": 20100 }, { "epoch": 0.3674301277715832, "grad_norm": 7.136781649492445, "learning_rate": 7.300002365809187e-06, "loss": 17.7945, "step": 20101 }, { "epoch": 0.36744840696802966, "grad_norm": 6.748100371495393, "learning_rate": 7.299739527607541e-06, "loss": 17.5044, "step": 20102 }, { "epoch": 0.3674666861644762, "grad_norm": 7.662613441268769, "learning_rate": 7.299476681345508e-06, "loss": 18.1845, "step": 20103 }, { "epoch": 0.36748496536092273, "grad_norm": 6.700378445390445, "learning_rate": 7.299213827024007e-06, "loss": 17.564, "step": 20104 }, { "epoch": 0.36750324455736927, "grad_norm": 6.7273354272376, "learning_rate": 7.298950964643961e-06, "loss": 17.5001, "step": 20105 }, { "epoch": 0.3675215237538158, "grad_norm": 8.701715193046008, "learning_rate": 7.298688094206292e-06, "loss": 18.1433, "step": 20106 }, { "epoch": 0.3675398029502623, "grad_norm": 7.906115467473902, "learning_rate": 7.298425215711922e-06, "loss": 17.9331, "step": 20107 }, { "epoch": 0.3675580821467088, "grad_norm": 5.709904934285042, "learning_rate": 7.298162329161769e-06, "loss": 17.3205, "step": 20108 }, { "epoch": 0.36757636134315536, "grad_norm": 6.515815760470419, "learning_rate": 7.297899434556757e-06, "loss": 17.5524, "step": 20109 }, { "epoch": 0.3675946405396019, "grad_norm": 5.267983022408444, "learning_rate": 7.297636531897807e-06, "loss": 16.9202, "step": 20110 }, { "epoch": 0.3676129197360484, "grad_norm": 6.826460841783871, "learning_rate": 7.2973736211858404e-06, "loss": 17.4843, "step": 20111 }, { "epoch": 0.3676311989324949, "grad_norm": 5.052098701405337, "learning_rate": 7.297110702421779e-06, "loss": 16.9792, "step": 20112 }, { "epoch": 0.36764947812894144, "grad_norm": 6.565128374893936, "learning_rate": 7.2968477756065446e-06, "loss": 17.3362, "step": 20113 }, { "epoch": 0.367667757325388, "grad_norm": 6.128296352871817, "learning_rate": 7.296584840741055e-06, "loss": 17.4375, "step": 20114 }, { "epoch": 0.3676860365218345, "grad_norm": 6.125640252298581, "learning_rate": 7.2963218978262375e-06, "loss": 17.266, "step": 20115 }, { "epoch": 0.36770431571828105, "grad_norm": 6.902902395075871, "learning_rate": 7.296058946863011e-06, "loss": 17.6244, "step": 20116 }, { "epoch": 0.36772259491472753, "grad_norm": 7.64173330540598, "learning_rate": 7.295795987852297e-06, "loss": 18.2167, "step": 20117 }, { "epoch": 0.36774087411117407, "grad_norm": 6.884629072605797, "learning_rate": 7.295533020795017e-06, "loss": 17.5537, "step": 20118 }, { "epoch": 0.3677591533076206, "grad_norm": 6.1274511937644185, "learning_rate": 7.295270045692091e-06, "loss": 17.3439, "step": 20119 }, { "epoch": 0.36777743250406714, "grad_norm": 6.622061460834097, "learning_rate": 7.295007062544446e-06, "loss": 17.4974, "step": 20120 }, { "epoch": 0.36779571170051367, "grad_norm": 6.366456852787825, "learning_rate": 7.294744071352999e-06, "loss": 17.3657, "step": 20121 }, { "epoch": 0.36781399089696015, "grad_norm": 6.999359129834391, "learning_rate": 7.294481072118673e-06, "loss": 17.7983, "step": 20122 }, { "epoch": 0.3678322700934067, "grad_norm": 6.185835948923508, "learning_rate": 7.2942180648423885e-06, "loss": 17.4683, "step": 20123 }, { "epoch": 0.3678505492898532, "grad_norm": 6.919377538042241, "learning_rate": 7.293955049525071e-06, "loss": 17.8182, "step": 20124 }, { "epoch": 0.36786882848629976, "grad_norm": 8.598542348260848, "learning_rate": 7.2936920261676395e-06, "loss": 18.0788, "step": 20125 }, { "epoch": 0.36788710768274624, "grad_norm": 6.762175903862375, "learning_rate": 7.293428994771017e-06, "loss": 17.6727, "step": 20126 }, { "epoch": 0.3679053868791928, "grad_norm": 6.078292989964342, "learning_rate": 7.293165955336125e-06, "loss": 17.3059, "step": 20127 }, { "epoch": 0.3679236660756393, "grad_norm": 6.885064048666509, "learning_rate": 7.292902907863883e-06, "loss": 17.7859, "step": 20128 }, { "epoch": 0.36794194527208585, "grad_norm": 6.8245340079871175, "learning_rate": 7.292639852355216e-06, "loss": 17.6658, "step": 20129 }, { "epoch": 0.3679602244685324, "grad_norm": 5.562108315960352, "learning_rate": 7.292376788811047e-06, "loss": 17.0449, "step": 20130 }, { "epoch": 0.36797850366497886, "grad_norm": 7.151082116488049, "learning_rate": 7.292113717232297e-06, "loss": 17.8598, "step": 20131 }, { "epoch": 0.3679967828614254, "grad_norm": 6.569337289212714, "learning_rate": 7.291850637619884e-06, "loss": 17.7569, "step": 20132 }, { "epoch": 0.36801506205787193, "grad_norm": 6.592275684741269, "learning_rate": 7.291587549974734e-06, "loss": 17.6656, "step": 20133 }, { "epoch": 0.36803334125431847, "grad_norm": 7.178208409049657, "learning_rate": 7.291324454297771e-06, "loss": 17.6371, "step": 20134 }, { "epoch": 0.368051620450765, "grad_norm": 7.485265496700924, "learning_rate": 7.291061350589913e-06, "loss": 18.1561, "step": 20135 }, { "epoch": 0.3680698996472115, "grad_norm": 7.948896447649479, "learning_rate": 7.290798238852084e-06, "loss": 17.9248, "step": 20136 }, { "epoch": 0.368088178843658, "grad_norm": 7.168305311563071, "learning_rate": 7.2905351190852055e-06, "loss": 17.8386, "step": 20137 }, { "epoch": 0.36810645804010456, "grad_norm": 5.664485484315749, "learning_rate": 7.290271991290201e-06, "loss": 17.13, "step": 20138 }, { "epoch": 0.3681247372365511, "grad_norm": 6.137259040566241, "learning_rate": 7.290008855467992e-06, "loss": 17.4592, "step": 20139 }, { "epoch": 0.3681430164329976, "grad_norm": 7.451550629648147, "learning_rate": 7.289745711619499e-06, "loss": 18.3272, "step": 20140 }, { "epoch": 0.3681612956294441, "grad_norm": 5.842546717482064, "learning_rate": 7.2894825597456475e-06, "loss": 17.2294, "step": 20141 }, { "epoch": 0.36817957482589064, "grad_norm": 7.217392933405763, "learning_rate": 7.289219399847358e-06, "loss": 18.2428, "step": 20142 }, { "epoch": 0.3681978540223372, "grad_norm": 7.084181205845155, "learning_rate": 7.288956231925552e-06, "loss": 17.5697, "step": 20143 }, { "epoch": 0.3682161332187837, "grad_norm": 5.796022375836719, "learning_rate": 7.288693055981156e-06, "loss": 17.2733, "step": 20144 }, { "epoch": 0.36823441241523025, "grad_norm": 6.625334101804693, "learning_rate": 7.288429872015087e-06, "loss": 17.5939, "step": 20145 }, { "epoch": 0.36825269161167673, "grad_norm": 6.874146281867213, "learning_rate": 7.28816668002827e-06, "loss": 17.5714, "step": 20146 }, { "epoch": 0.36827097080812327, "grad_norm": 6.428597716907134, "learning_rate": 7.287903480021627e-06, "loss": 17.5581, "step": 20147 }, { "epoch": 0.3682892500045698, "grad_norm": 5.285284881012578, "learning_rate": 7.287640271996082e-06, "loss": 16.989, "step": 20148 }, { "epoch": 0.36830752920101634, "grad_norm": 5.255396001001805, "learning_rate": 7.287377055952557e-06, "loss": 16.8976, "step": 20149 }, { "epoch": 0.3683258083974629, "grad_norm": 6.133955867592134, "learning_rate": 7.287113831891972e-06, "loss": 17.2857, "step": 20150 }, { "epoch": 0.36834408759390935, "grad_norm": 5.460204999978745, "learning_rate": 7.286850599815253e-06, "loss": 17.1928, "step": 20151 }, { "epoch": 0.3683623667903559, "grad_norm": 9.398947934144394, "learning_rate": 7.286587359723321e-06, "loss": 17.856, "step": 20152 }, { "epoch": 0.3683806459868024, "grad_norm": 6.430024112866958, "learning_rate": 7.286324111617098e-06, "loss": 17.398, "step": 20153 }, { "epoch": 0.36839892518324896, "grad_norm": 6.933947022277681, "learning_rate": 7.286060855497508e-06, "loss": 17.4871, "step": 20154 }, { "epoch": 0.3684172043796955, "grad_norm": 6.49910817450385, "learning_rate": 7.285797591365471e-06, "loss": 17.3752, "step": 20155 }, { "epoch": 0.368435483576142, "grad_norm": 6.501522383444161, "learning_rate": 7.285534319221914e-06, "loss": 17.5474, "step": 20156 }, { "epoch": 0.3684537627725885, "grad_norm": 6.8706472101460205, "learning_rate": 7.285271039067758e-06, "loss": 17.6611, "step": 20157 }, { "epoch": 0.36847204196903505, "grad_norm": 11.028480464771858, "learning_rate": 7.285007750903924e-06, "loss": 18.1285, "step": 20158 }, { "epoch": 0.3684903211654816, "grad_norm": 6.970470342012089, "learning_rate": 7.284744454731336e-06, "loss": 17.9104, "step": 20159 }, { "epoch": 0.36850860036192806, "grad_norm": 6.760825199052605, "learning_rate": 7.284481150550917e-06, "loss": 17.8917, "step": 20160 }, { "epoch": 0.3685268795583746, "grad_norm": 6.481066904222357, "learning_rate": 7.28421783836359e-06, "loss": 17.732, "step": 20161 }, { "epoch": 0.36854515875482113, "grad_norm": 5.683108799358094, "learning_rate": 7.283954518170279e-06, "loss": 17.3968, "step": 20162 }, { "epoch": 0.36856343795126767, "grad_norm": 5.7027389137600535, "learning_rate": 7.283691189971905e-06, "loss": 17.5697, "step": 20163 }, { "epoch": 0.3685817171477142, "grad_norm": 7.586150772960724, "learning_rate": 7.28342785376939e-06, "loss": 17.9641, "step": 20164 }, { "epoch": 0.3685999963441607, "grad_norm": 7.0170276217612555, "learning_rate": 7.28316450956366e-06, "loss": 17.6283, "step": 20165 }, { "epoch": 0.3686182755406072, "grad_norm": 8.547247398630253, "learning_rate": 7.282901157355635e-06, "loss": 17.8078, "step": 20166 }, { "epoch": 0.36863655473705376, "grad_norm": 6.7076971943592865, "learning_rate": 7.282637797146241e-06, "loss": 17.8502, "step": 20167 }, { "epoch": 0.3686548339335003, "grad_norm": 7.7097343986682505, "learning_rate": 7.2823744289364e-06, "loss": 17.678, "step": 20168 }, { "epoch": 0.36867311312994683, "grad_norm": 6.749417805144167, "learning_rate": 7.282111052727033e-06, "loss": 17.5471, "step": 20169 }, { "epoch": 0.3686913923263933, "grad_norm": 5.925547427443137, "learning_rate": 7.281847668519066e-06, "loss": 17.0655, "step": 20170 }, { "epoch": 0.36870967152283984, "grad_norm": 5.014707464621083, "learning_rate": 7.28158427631342e-06, "loss": 16.8923, "step": 20171 }, { "epoch": 0.3687279507192864, "grad_norm": 8.34346511944614, "learning_rate": 7.281320876111021e-06, "loss": 18.1639, "step": 20172 }, { "epoch": 0.3687462299157329, "grad_norm": 6.665248434135376, "learning_rate": 7.2810574679127886e-06, "loss": 17.5314, "step": 20173 }, { "epoch": 0.36876450911217945, "grad_norm": 6.58588464299712, "learning_rate": 7.280794051719647e-06, "loss": 17.7183, "step": 20174 }, { "epoch": 0.36878278830862593, "grad_norm": 6.760345052735897, "learning_rate": 7.280530627532521e-06, "loss": 17.6791, "step": 20175 }, { "epoch": 0.36880106750507247, "grad_norm": 6.586517927225698, "learning_rate": 7.280267195352335e-06, "loss": 17.5181, "step": 20176 }, { "epoch": 0.368819346701519, "grad_norm": 4.754023736789508, "learning_rate": 7.280003755180009e-06, "loss": 16.9561, "step": 20177 }, { "epoch": 0.36883762589796554, "grad_norm": 8.604932407798067, "learning_rate": 7.279740307016468e-06, "loss": 18.5146, "step": 20178 }, { "epoch": 0.3688559050944121, "grad_norm": 5.268657647543221, "learning_rate": 7.279476850862634e-06, "loss": 16.9788, "step": 20179 }, { "epoch": 0.36887418429085855, "grad_norm": 7.057785945547185, "learning_rate": 7.2792133867194314e-06, "loss": 17.814, "step": 20180 }, { "epoch": 0.3688924634873051, "grad_norm": 7.236374121449109, "learning_rate": 7.278949914587784e-06, "loss": 17.9099, "step": 20181 }, { "epoch": 0.3689107426837516, "grad_norm": 6.654765517746662, "learning_rate": 7.278686434468615e-06, "loss": 17.8197, "step": 20182 }, { "epoch": 0.36892902188019816, "grad_norm": 6.710378083383677, "learning_rate": 7.278422946362847e-06, "loss": 17.7071, "step": 20183 }, { "epoch": 0.3689473010766447, "grad_norm": 7.2149529704784925, "learning_rate": 7.2781594502714056e-06, "loss": 17.7761, "step": 20184 }, { "epoch": 0.3689655802730912, "grad_norm": 6.455532537998574, "learning_rate": 7.277895946195213e-06, "loss": 17.4167, "step": 20185 }, { "epoch": 0.3689838594695377, "grad_norm": 5.737671359493939, "learning_rate": 7.2776324341351925e-06, "loss": 17.2511, "step": 20186 }, { "epoch": 0.36900213866598425, "grad_norm": 7.3158062405044495, "learning_rate": 7.277368914092266e-06, "loss": 17.8465, "step": 20187 }, { "epoch": 0.3690204178624308, "grad_norm": 6.451035583476881, "learning_rate": 7.277105386067361e-06, "loss": 17.4541, "step": 20188 }, { "epoch": 0.3690386970588773, "grad_norm": 6.621442986884995, "learning_rate": 7.2768418500614e-06, "loss": 17.8096, "step": 20189 }, { "epoch": 0.3690569762553238, "grad_norm": 6.519777440519619, "learning_rate": 7.276578306075306e-06, "loss": 17.4998, "step": 20190 }, { "epoch": 0.36907525545177033, "grad_norm": 6.84816196470274, "learning_rate": 7.276314754110001e-06, "loss": 17.5994, "step": 20191 }, { "epoch": 0.36909353464821687, "grad_norm": 6.731177467542887, "learning_rate": 7.276051194166409e-06, "loss": 17.458, "step": 20192 }, { "epoch": 0.3691118138446634, "grad_norm": 9.501986421561949, "learning_rate": 7.275787626245459e-06, "loss": 18.2521, "step": 20193 }, { "epoch": 0.3691300930411099, "grad_norm": 6.501485311118727, "learning_rate": 7.2755240503480685e-06, "loss": 17.5143, "step": 20194 }, { "epoch": 0.3691483722375564, "grad_norm": 8.309874503389043, "learning_rate": 7.2752604664751634e-06, "loss": 18.1989, "step": 20195 }, { "epoch": 0.36916665143400296, "grad_norm": 7.4206115730466164, "learning_rate": 7.274996874627669e-06, "loss": 17.9945, "step": 20196 }, { "epoch": 0.3691849306304495, "grad_norm": 5.68336568933742, "learning_rate": 7.274733274806507e-06, "loss": 17.2302, "step": 20197 }, { "epoch": 0.36920320982689603, "grad_norm": 5.460141761208392, "learning_rate": 7.274469667012603e-06, "loss": 17.1825, "step": 20198 }, { "epoch": 0.3692214890233425, "grad_norm": 6.54903313345841, "learning_rate": 7.274206051246879e-06, "loss": 17.2857, "step": 20199 }, { "epoch": 0.36923976821978904, "grad_norm": 5.60409017180462, "learning_rate": 7.273942427510262e-06, "loss": 17.296, "step": 20200 }, { "epoch": 0.3692580474162356, "grad_norm": 5.352966850503021, "learning_rate": 7.273678795803671e-06, "loss": 16.9111, "step": 20201 }, { "epoch": 0.3692763266126821, "grad_norm": 7.657158870126983, "learning_rate": 7.273415156128037e-06, "loss": 18.1799, "step": 20202 }, { "epoch": 0.36929460580912865, "grad_norm": 6.281812384549631, "learning_rate": 7.273151508484278e-06, "loss": 17.1461, "step": 20203 }, { "epoch": 0.36931288500557513, "grad_norm": 5.657290687537265, "learning_rate": 7.2728878528733205e-06, "loss": 17.268, "step": 20204 }, { "epoch": 0.36933116420202167, "grad_norm": 7.615048088581312, "learning_rate": 7.272624189296088e-06, "loss": 17.7176, "step": 20205 }, { "epoch": 0.3693494433984682, "grad_norm": 8.85994191056542, "learning_rate": 7.272360517753505e-06, "loss": 18.8301, "step": 20206 }, { "epoch": 0.36936772259491474, "grad_norm": 6.823672506279992, "learning_rate": 7.272096838246496e-06, "loss": 17.9613, "step": 20207 }, { "epoch": 0.3693860017913613, "grad_norm": 6.95551203175244, "learning_rate": 7.271833150775984e-06, "loss": 17.7593, "step": 20208 }, { "epoch": 0.36940428098780775, "grad_norm": 5.5484462019423795, "learning_rate": 7.271569455342895e-06, "loss": 17.0792, "step": 20209 }, { "epoch": 0.3694225601842543, "grad_norm": 7.258949293892397, "learning_rate": 7.271305751948152e-06, "loss": 17.8223, "step": 20210 }, { "epoch": 0.3694408393807008, "grad_norm": 7.2421793358182995, "learning_rate": 7.2710420405926795e-06, "loss": 18.0179, "step": 20211 }, { "epoch": 0.36945911857714736, "grad_norm": 5.539469075281623, "learning_rate": 7.270778321277401e-06, "loss": 17.1364, "step": 20212 }, { "epoch": 0.3694773977735939, "grad_norm": 8.741657716263415, "learning_rate": 7.270514594003243e-06, "loss": 18.4418, "step": 20213 }, { "epoch": 0.3694956769700404, "grad_norm": 6.51638394524961, "learning_rate": 7.270250858771126e-06, "loss": 17.5268, "step": 20214 }, { "epoch": 0.3695139561664869, "grad_norm": 5.658198226211729, "learning_rate": 7.2699871155819775e-06, "loss": 17.0006, "step": 20215 }, { "epoch": 0.36953223536293345, "grad_norm": 5.933392965481522, "learning_rate": 7.269723364436721e-06, "loss": 17.113, "step": 20216 }, { "epoch": 0.36955051455938, "grad_norm": 7.0326731791723684, "learning_rate": 7.2694596053362834e-06, "loss": 17.9364, "step": 20217 }, { "epoch": 0.3695687937558265, "grad_norm": 7.2282587953716515, "learning_rate": 7.269195838281585e-06, "loss": 17.5891, "step": 20218 }, { "epoch": 0.369587072952273, "grad_norm": 6.463010505382674, "learning_rate": 7.268932063273552e-06, "loss": 17.4782, "step": 20219 }, { "epoch": 0.36960535214871953, "grad_norm": 7.062369766509262, "learning_rate": 7.26866828031311e-06, "loss": 17.5808, "step": 20220 }, { "epoch": 0.36962363134516607, "grad_norm": 7.207672875590722, "learning_rate": 7.2684044894011805e-06, "loss": 17.8386, "step": 20221 }, { "epoch": 0.3696419105416126, "grad_norm": 4.822325848945059, "learning_rate": 7.268140690538692e-06, "loss": 16.8335, "step": 20222 }, { "epoch": 0.36966018973805914, "grad_norm": 5.3225809086744755, "learning_rate": 7.267876883726567e-06, "loss": 17.1389, "step": 20223 }, { "epoch": 0.3696784689345056, "grad_norm": 5.3704522860734105, "learning_rate": 7.267613068965729e-06, "loss": 17.0194, "step": 20224 }, { "epoch": 0.36969674813095216, "grad_norm": 6.869480950513082, "learning_rate": 7.267349246257105e-06, "loss": 18.2319, "step": 20225 }, { "epoch": 0.3697150273273987, "grad_norm": 6.397992614038071, "learning_rate": 7.267085415601618e-06, "loss": 17.5737, "step": 20226 }, { "epoch": 0.36973330652384523, "grad_norm": 6.558547566995884, "learning_rate": 7.266821577000195e-06, "loss": 17.7717, "step": 20227 }, { "epoch": 0.3697515857202917, "grad_norm": 6.4273194172281585, "learning_rate": 7.266557730453757e-06, "loss": 17.454, "step": 20228 }, { "epoch": 0.36976986491673824, "grad_norm": 7.414612660435543, "learning_rate": 7.266293875963232e-06, "loss": 17.8947, "step": 20229 }, { "epoch": 0.3697881441131848, "grad_norm": 6.833536317395566, "learning_rate": 7.266030013529544e-06, "loss": 17.7028, "step": 20230 }, { "epoch": 0.3698064233096313, "grad_norm": 6.376908683057129, "learning_rate": 7.265766143153617e-06, "loss": 17.7087, "step": 20231 }, { "epoch": 0.36982470250607785, "grad_norm": 6.2602699004023385, "learning_rate": 7.265502264836376e-06, "loss": 17.3338, "step": 20232 }, { "epoch": 0.36984298170252433, "grad_norm": 5.588023044502968, "learning_rate": 7.265238378578745e-06, "loss": 17.2379, "step": 20233 }, { "epoch": 0.36986126089897087, "grad_norm": 4.989465197818141, "learning_rate": 7.264974484381653e-06, "loss": 16.9015, "step": 20234 }, { "epoch": 0.3698795400954174, "grad_norm": 6.612200306580514, "learning_rate": 7.26471058224602e-06, "loss": 17.569, "step": 20235 }, { "epoch": 0.36989781929186394, "grad_norm": 7.340673453322384, "learning_rate": 7.264446672172772e-06, "loss": 17.7228, "step": 20236 }, { "epoch": 0.3699160984883105, "grad_norm": 5.468142572189995, "learning_rate": 7.264182754162836e-06, "loss": 17.0734, "step": 20237 }, { "epoch": 0.36993437768475695, "grad_norm": 7.07678047529603, "learning_rate": 7.263918828217137e-06, "loss": 17.6607, "step": 20238 }, { "epoch": 0.3699526568812035, "grad_norm": 7.675859352430443, "learning_rate": 7.263654894336598e-06, "loss": 18.2184, "step": 20239 }, { "epoch": 0.36997093607765, "grad_norm": 5.954324255706961, "learning_rate": 7.263390952522145e-06, "loss": 17.0799, "step": 20240 }, { "epoch": 0.36998921527409656, "grad_norm": 6.029026241074232, "learning_rate": 7.263127002774703e-06, "loss": 17.3228, "step": 20241 }, { "epoch": 0.3700074944705431, "grad_norm": 5.535211916569911, "learning_rate": 7.262863045095197e-06, "loss": 17.2121, "step": 20242 }, { "epoch": 0.3700257736669896, "grad_norm": 7.536357040008187, "learning_rate": 7.262599079484554e-06, "loss": 18.1031, "step": 20243 }, { "epoch": 0.3700440528634361, "grad_norm": 6.40201864864051, "learning_rate": 7.262335105943696e-06, "loss": 17.8633, "step": 20244 }, { "epoch": 0.37006233205988265, "grad_norm": 7.2861604702502065, "learning_rate": 7.262071124473551e-06, "loss": 17.5257, "step": 20245 }, { "epoch": 0.3700806112563292, "grad_norm": 6.315008338704994, "learning_rate": 7.261807135075041e-06, "loss": 17.5906, "step": 20246 }, { "epoch": 0.3700988904527757, "grad_norm": 8.925224656046367, "learning_rate": 7.261543137749094e-06, "loss": 19.0537, "step": 20247 }, { "epoch": 0.3701171696492222, "grad_norm": 5.802604624840978, "learning_rate": 7.261279132496636e-06, "loss": 17.1504, "step": 20248 }, { "epoch": 0.37013544884566874, "grad_norm": 8.186105514082048, "learning_rate": 7.261015119318589e-06, "loss": 18.017, "step": 20249 }, { "epoch": 0.37015372804211527, "grad_norm": 6.326322714453996, "learning_rate": 7.260751098215881e-06, "loss": 17.5734, "step": 20250 }, { "epoch": 0.3701720072385618, "grad_norm": 7.162270440843001, "learning_rate": 7.260487069189437e-06, "loss": 17.9879, "step": 20251 }, { "epoch": 0.37019028643500834, "grad_norm": 6.534782206402165, "learning_rate": 7.260223032240181e-06, "loss": 17.5544, "step": 20252 }, { "epoch": 0.3702085656314548, "grad_norm": 6.059176468142338, "learning_rate": 7.25995898736904e-06, "loss": 17.3778, "step": 20253 }, { "epoch": 0.37022684482790136, "grad_norm": 5.622668196541799, "learning_rate": 7.259694934576939e-06, "loss": 17.4539, "step": 20254 }, { "epoch": 0.3702451240243479, "grad_norm": 7.751454172468496, "learning_rate": 7.259430873864804e-06, "loss": 17.8333, "step": 20255 }, { "epoch": 0.37026340322079443, "grad_norm": 6.797918019734457, "learning_rate": 7.259166805233559e-06, "loss": 17.8342, "step": 20256 }, { "epoch": 0.37028168241724096, "grad_norm": 7.335166529034762, "learning_rate": 7.25890272868413e-06, "loss": 17.7724, "step": 20257 }, { "epoch": 0.37029996161368745, "grad_norm": 5.964816393323727, "learning_rate": 7.258638644217444e-06, "loss": 17.3451, "step": 20258 }, { "epoch": 0.370318240810134, "grad_norm": 9.362367560733293, "learning_rate": 7.258374551834425e-06, "loss": 17.8653, "step": 20259 }, { "epoch": 0.3703365200065805, "grad_norm": 6.005702624129726, "learning_rate": 7.258110451535998e-06, "loss": 17.2804, "step": 20260 }, { "epoch": 0.37035479920302705, "grad_norm": 6.918011032469916, "learning_rate": 7.257846343323091e-06, "loss": 17.6388, "step": 20261 }, { "epoch": 0.37037307839947353, "grad_norm": 6.683525037046206, "learning_rate": 7.257582227196629e-06, "loss": 17.9775, "step": 20262 }, { "epoch": 0.37039135759592007, "grad_norm": 6.844561692522498, "learning_rate": 7.257318103157537e-06, "loss": 17.5909, "step": 20263 }, { "epoch": 0.3704096367923666, "grad_norm": 6.407991545432797, "learning_rate": 7.25705397120674e-06, "loss": 17.442, "step": 20264 }, { "epoch": 0.37042791598881314, "grad_norm": 7.4644311069466385, "learning_rate": 7.256789831345166e-06, "loss": 17.7972, "step": 20265 }, { "epoch": 0.3704461951852597, "grad_norm": 6.029558736818221, "learning_rate": 7.256525683573739e-06, "loss": 17.319, "step": 20266 }, { "epoch": 0.37046447438170615, "grad_norm": 6.868138842885967, "learning_rate": 7.2562615278933845e-06, "loss": 17.7122, "step": 20267 }, { "epoch": 0.3704827535781527, "grad_norm": 6.01779397169704, "learning_rate": 7.255997364305028e-06, "loss": 17.2451, "step": 20268 }, { "epoch": 0.3705010327745992, "grad_norm": 6.623586650363315, "learning_rate": 7.255733192809598e-06, "loss": 17.6405, "step": 20269 }, { "epoch": 0.37051931197104576, "grad_norm": 5.949821133058551, "learning_rate": 7.2554690134080195e-06, "loss": 17.2411, "step": 20270 }, { "epoch": 0.3705375911674923, "grad_norm": 5.943184825420087, "learning_rate": 7.255204826101218e-06, "loss": 17.2515, "step": 20271 }, { "epoch": 0.3705558703639388, "grad_norm": 7.694644087135006, "learning_rate": 7.254940630890119e-06, "loss": 17.9962, "step": 20272 }, { "epoch": 0.3705741495603853, "grad_norm": 7.380378328384874, "learning_rate": 7.254676427775648e-06, "loss": 17.4382, "step": 20273 }, { "epoch": 0.37059242875683185, "grad_norm": 7.042658148811812, "learning_rate": 7.254412216758731e-06, "loss": 17.4113, "step": 20274 }, { "epoch": 0.3706107079532784, "grad_norm": 7.525244056436113, "learning_rate": 7.254147997840297e-06, "loss": 18.1133, "step": 20275 }, { "epoch": 0.3706289871497249, "grad_norm": 7.00185530585026, "learning_rate": 7.25388377102127e-06, "loss": 17.8023, "step": 20276 }, { "epoch": 0.3706472663461714, "grad_norm": 6.562099782250633, "learning_rate": 7.253619536302574e-06, "loss": 17.2893, "step": 20277 }, { "epoch": 0.37066554554261794, "grad_norm": 6.358206370988475, "learning_rate": 7.253355293685137e-06, "loss": 17.6039, "step": 20278 }, { "epoch": 0.37068382473906447, "grad_norm": 6.083152868347161, "learning_rate": 7.2530910431698876e-06, "loss": 17.4665, "step": 20279 }, { "epoch": 0.370702103935511, "grad_norm": 8.2756852119704, "learning_rate": 7.252826784757747e-06, "loss": 18.5358, "step": 20280 }, { "epoch": 0.37072038313195754, "grad_norm": 6.258825911969038, "learning_rate": 7.252562518449646e-06, "loss": 17.2119, "step": 20281 }, { "epoch": 0.370738662328404, "grad_norm": 7.742175015108623, "learning_rate": 7.252298244246507e-06, "loss": 17.6524, "step": 20282 }, { "epoch": 0.37075694152485056, "grad_norm": 7.474971190019222, "learning_rate": 7.252033962149259e-06, "loss": 17.9957, "step": 20283 }, { "epoch": 0.3707752207212971, "grad_norm": 5.816660347635074, "learning_rate": 7.251769672158828e-06, "loss": 17.118, "step": 20284 }, { "epoch": 0.37079349991774363, "grad_norm": 5.944782867154556, "learning_rate": 7.25150537427614e-06, "loss": 17.4711, "step": 20285 }, { "epoch": 0.37081177911419017, "grad_norm": 7.211325359244409, "learning_rate": 7.251241068502121e-06, "loss": 17.7895, "step": 20286 }, { "epoch": 0.37083005831063665, "grad_norm": 7.37474166400793, "learning_rate": 7.250976754837695e-06, "loss": 17.4837, "step": 20287 }, { "epoch": 0.3708483375070832, "grad_norm": 6.2830496208268, "learning_rate": 7.250712433283793e-06, "loss": 17.4363, "step": 20288 }, { "epoch": 0.3708666167035297, "grad_norm": 8.058595380782732, "learning_rate": 7.250448103841339e-06, "loss": 18.1716, "step": 20289 }, { "epoch": 0.37088489589997625, "grad_norm": 5.1949873569851635, "learning_rate": 7.250183766511259e-06, "loss": 17.067, "step": 20290 }, { "epoch": 0.3709031750964228, "grad_norm": 7.460335858185023, "learning_rate": 7.249919421294481e-06, "loss": 17.7925, "step": 20291 }, { "epoch": 0.37092145429286927, "grad_norm": 8.332337387334707, "learning_rate": 7.2496550681919295e-06, "loss": 18.2483, "step": 20292 }, { "epoch": 0.3709397334893158, "grad_norm": 5.447324642704994, "learning_rate": 7.249390707204533e-06, "loss": 17.0831, "step": 20293 }, { "epoch": 0.37095801268576234, "grad_norm": 6.048356023286317, "learning_rate": 7.249126338333218e-06, "loss": 17.4379, "step": 20294 }, { "epoch": 0.3709762918822089, "grad_norm": 6.806845187645089, "learning_rate": 7.2488619615789095e-06, "loss": 17.7563, "step": 20295 }, { "epoch": 0.37099457107865536, "grad_norm": 5.968497371252764, "learning_rate": 7.248597576942534e-06, "loss": 17.3476, "step": 20296 }, { "epoch": 0.3710128502751019, "grad_norm": 7.537556683206155, "learning_rate": 7.248333184425021e-06, "loss": 17.8649, "step": 20297 }, { "epoch": 0.3710311294715484, "grad_norm": 6.298372576893895, "learning_rate": 7.2480687840272935e-06, "loss": 17.2882, "step": 20298 }, { "epoch": 0.37104940866799496, "grad_norm": 6.362553137479139, "learning_rate": 7.247804375750281e-06, "loss": 17.6443, "step": 20299 }, { "epoch": 0.3710676878644415, "grad_norm": 4.55298761071668, "learning_rate": 7.2475399595949105e-06, "loss": 16.6209, "step": 20300 }, { "epoch": 0.371085967060888, "grad_norm": 5.750610391656214, "learning_rate": 7.2472755355621045e-06, "loss": 17.1992, "step": 20301 }, { "epoch": 0.3711042462573345, "grad_norm": 6.369967866465619, "learning_rate": 7.247011103652794e-06, "loss": 17.8721, "step": 20302 }, { "epoch": 0.37112252545378105, "grad_norm": 7.08208857214891, "learning_rate": 7.246746663867906e-06, "loss": 17.9226, "step": 20303 }, { "epoch": 0.3711408046502276, "grad_norm": 6.477355633600419, "learning_rate": 7.246482216208365e-06, "loss": 17.4098, "step": 20304 }, { "epoch": 0.3711590838466741, "grad_norm": 6.497567825869114, "learning_rate": 7.246217760675098e-06, "loss": 17.623, "step": 20305 }, { "epoch": 0.3711773630431206, "grad_norm": 6.477044126668947, "learning_rate": 7.245953297269033e-06, "loss": 17.3895, "step": 20306 }, { "epoch": 0.37119564223956714, "grad_norm": 5.643879089142931, "learning_rate": 7.2456888259910975e-06, "loss": 16.9544, "step": 20307 }, { "epoch": 0.37121392143601367, "grad_norm": 6.8606448765339945, "learning_rate": 7.245424346842217e-06, "loss": 17.5219, "step": 20308 }, { "epoch": 0.3712322006324602, "grad_norm": 7.416814314734445, "learning_rate": 7.2451598598233184e-06, "loss": 17.3286, "step": 20309 }, { "epoch": 0.37125047982890674, "grad_norm": 7.233693659450961, "learning_rate": 7.244895364935329e-06, "loss": 17.6946, "step": 20310 }, { "epoch": 0.3712687590253532, "grad_norm": 6.991765959399882, "learning_rate": 7.244630862179178e-06, "loss": 17.9366, "step": 20311 }, { "epoch": 0.37128703822179976, "grad_norm": 5.725040707089489, "learning_rate": 7.244366351555789e-06, "loss": 17.3533, "step": 20312 }, { "epoch": 0.3713053174182463, "grad_norm": 5.299920044235636, "learning_rate": 7.244101833066093e-06, "loss": 16.9887, "step": 20313 }, { "epoch": 0.37132359661469283, "grad_norm": 6.651243925486612, "learning_rate": 7.243837306711011e-06, "loss": 17.7768, "step": 20314 }, { "epoch": 0.37134187581113937, "grad_norm": 6.195642278522273, "learning_rate": 7.243572772491476e-06, "loss": 17.2241, "step": 20315 }, { "epoch": 0.37136015500758585, "grad_norm": 7.474750560470644, "learning_rate": 7.243308230408413e-06, "loss": 17.6936, "step": 20316 }, { "epoch": 0.3713784342040324, "grad_norm": 6.453715122510833, "learning_rate": 7.243043680462751e-06, "loss": 17.6156, "step": 20317 }, { "epoch": 0.3713967134004789, "grad_norm": 4.693457419653771, "learning_rate": 7.2427791226554136e-06, "loss": 16.9029, "step": 20318 }, { "epoch": 0.37141499259692545, "grad_norm": 7.411761556224165, "learning_rate": 7.24251455698733e-06, "loss": 17.5067, "step": 20319 }, { "epoch": 0.371433271793372, "grad_norm": 6.894335782006021, "learning_rate": 7.242249983459429e-06, "loss": 17.7946, "step": 20320 }, { "epoch": 0.37145155098981847, "grad_norm": 8.265264598175682, "learning_rate": 7.241985402072634e-06, "loss": 17.5989, "step": 20321 }, { "epoch": 0.371469830186265, "grad_norm": 6.0522965571151515, "learning_rate": 7.241720812827876e-06, "loss": 17.4702, "step": 20322 }, { "epoch": 0.37148810938271154, "grad_norm": 5.793673229110889, "learning_rate": 7.241456215726082e-06, "loss": 17.4635, "step": 20323 }, { "epoch": 0.3715063885791581, "grad_norm": 6.144375277382662, "learning_rate": 7.241191610768177e-06, "loss": 17.5922, "step": 20324 }, { "epoch": 0.3715246677756046, "grad_norm": 5.6056628885740265, "learning_rate": 7.24092699795509e-06, "loss": 17.2037, "step": 20325 }, { "epoch": 0.3715429469720511, "grad_norm": 5.406300344384662, "learning_rate": 7.240662377287748e-06, "loss": 17.1828, "step": 20326 }, { "epoch": 0.3715612261684976, "grad_norm": 7.063140556306277, "learning_rate": 7.240397748767081e-06, "loss": 17.6718, "step": 20327 }, { "epoch": 0.37157950536494416, "grad_norm": 7.660404107492753, "learning_rate": 7.240133112394012e-06, "loss": 18.1827, "step": 20328 }, { "epoch": 0.3715977845613907, "grad_norm": 6.445982472041131, "learning_rate": 7.239868468169471e-06, "loss": 17.5548, "step": 20329 }, { "epoch": 0.3716160637578372, "grad_norm": 6.112940911948984, "learning_rate": 7.239603816094387e-06, "loss": 17.2363, "step": 20330 }, { "epoch": 0.3716343429542837, "grad_norm": 5.734139377635783, "learning_rate": 7.239339156169686e-06, "loss": 17.1976, "step": 20331 }, { "epoch": 0.37165262215073025, "grad_norm": 5.476184692509491, "learning_rate": 7.239074488396294e-06, "loss": 17.2108, "step": 20332 }, { "epoch": 0.3716709013471768, "grad_norm": 6.127131572197858, "learning_rate": 7.238809812775139e-06, "loss": 17.3148, "step": 20333 }, { "epoch": 0.3716891805436233, "grad_norm": 7.332164080361569, "learning_rate": 7.238545129307153e-06, "loss": 17.7033, "step": 20334 }, { "epoch": 0.3717074597400698, "grad_norm": 6.19730544508833, "learning_rate": 7.2382804379932595e-06, "loss": 17.3996, "step": 20335 }, { "epoch": 0.37172573893651634, "grad_norm": 7.430300612267093, "learning_rate": 7.238015738834388e-06, "loss": 18.2496, "step": 20336 }, { "epoch": 0.3717440181329629, "grad_norm": 6.067064644869495, "learning_rate": 7.237751031831464e-06, "loss": 17.2441, "step": 20337 }, { "epoch": 0.3717622973294094, "grad_norm": 6.331459560734785, "learning_rate": 7.2374863169854175e-06, "loss": 17.3189, "step": 20338 }, { "epoch": 0.37178057652585594, "grad_norm": 5.788864823371587, "learning_rate": 7.237221594297175e-06, "loss": 17.0798, "step": 20339 }, { "epoch": 0.3717988557223024, "grad_norm": 6.677458964288095, "learning_rate": 7.236956863767665e-06, "loss": 17.7383, "step": 20340 }, { "epoch": 0.37181713491874896, "grad_norm": 6.999870699390516, "learning_rate": 7.2366921253978165e-06, "loss": 17.6734, "step": 20341 }, { "epoch": 0.3718354141151955, "grad_norm": 8.180484547018644, "learning_rate": 7.236427379188556e-06, "loss": 18.4928, "step": 20342 }, { "epoch": 0.37185369331164203, "grad_norm": 7.760925417708203, "learning_rate": 7.2361626251408105e-06, "loss": 18.3223, "step": 20343 }, { "epoch": 0.37187197250808857, "grad_norm": 6.124893382275461, "learning_rate": 7.235897863255509e-06, "loss": 17.5096, "step": 20344 }, { "epoch": 0.37189025170453505, "grad_norm": 5.905706033508136, "learning_rate": 7.23563309353358e-06, "loss": 17.3197, "step": 20345 }, { "epoch": 0.3719085309009816, "grad_norm": 6.6805730546934505, "learning_rate": 7.235368315975951e-06, "loss": 17.679, "step": 20346 }, { "epoch": 0.3719268100974281, "grad_norm": 6.794605647027376, "learning_rate": 7.23510353058355e-06, "loss": 17.8838, "step": 20347 }, { "epoch": 0.37194508929387465, "grad_norm": 5.693706186866842, "learning_rate": 7.234838737357306e-06, "loss": 17.549, "step": 20348 }, { "epoch": 0.3719633684903212, "grad_norm": 7.74429790113914, "learning_rate": 7.234573936298146e-06, "loss": 17.9132, "step": 20349 }, { "epoch": 0.37198164768676767, "grad_norm": 7.1510684608011825, "learning_rate": 7.234309127406998e-06, "loss": 17.5058, "step": 20350 }, { "epoch": 0.3719999268832142, "grad_norm": 5.94800289132471, "learning_rate": 7.234044310684789e-06, "loss": 17.368, "step": 20351 }, { "epoch": 0.37201820607966074, "grad_norm": 7.601307345453096, "learning_rate": 7.233779486132451e-06, "loss": 17.7545, "step": 20352 }, { "epoch": 0.3720364852761073, "grad_norm": 5.93708836301738, "learning_rate": 7.233514653750907e-06, "loss": 17.4703, "step": 20353 }, { "epoch": 0.3720547644725538, "grad_norm": 6.089745149104948, "learning_rate": 7.23324981354109e-06, "loss": 17.1858, "step": 20354 }, { "epoch": 0.3720730436690003, "grad_norm": 5.428649211988015, "learning_rate": 7.232984965503925e-06, "loss": 17.0951, "step": 20355 }, { "epoch": 0.3720913228654468, "grad_norm": 7.580011344726705, "learning_rate": 7.232720109640342e-06, "loss": 17.7734, "step": 20356 }, { "epoch": 0.37210960206189336, "grad_norm": 7.538724121483692, "learning_rate": 7.232455245951269e-06, "loss": 17.9085, "step": 20357 }, { "epoch": 0.3721278812583399, "grad_norm": 6.5290370401590705, "learning_rate": 7.232190374437634e-06, "loss": 17.5595, "step": 20358 }, { "epoch": 0.37214616045478643, "grad_norm": 7.6163367269095374, "learning_rate": 7.231925495100365e-06, "loss": 18.1394, "step": 20359 }, { "epoch": 0.3721644396512329, "grad_norm": 7.053694094974511, "learning_rate": 7.231660607940391e-06, "loss": 17.8886, "step": 20360 }, { "epoch": 0.37218271884767945, "grad_norm": 5.9060435750609335, "learning_rate": 7.23139571295864e-06, "loss": 17.3318, "step": 20361 }, { "epoch": 0.372200998044126, "grad_norm": 7.689619564427998, "learning_rate": 7.231130810156042e-06, "loss": 17.9838, "step": 20362 }, { "epoch": 0.3722192772405725, "grad_norm": 8.4709035566881, "learning_rate": 7.230865899533522e-06, "loss": 17.8392, "step": 20363 }, { "epoch": 0.372237556437019, "grad_norm": 5.693540796749584, "learning_rate": 7.230600981092012e-06, "loss": 17.215, "step": 20364 }, { "epoch": 0.37225583563346554, "grad_norm": 5.698863212612251, "learning_rate": 7.230336054832438e-06, "loss": 17.0566, "step": 20365 }, { "epoch": 0.3722741148299121, "grad_norm": 7.070801136228155, "learning_rate": 7.230071120755732e-06, "loss": 17.548, "step": 20366 }, { "epoch": 0.3722923940263586, "grad_norm": 8.326772885066012, "learning_rate": 7.229806178862818e-06, "loss": 17.5586, "step": 20367 }, { "epoch": 0.37231067322280514, "grad_norm": 5.982651802352122, "learning_rate": 7.229541229154627e-06, "loss": 17.277, "step": 20368 }, { "epoch": 0.3723289524192516, "grad_norm": 6.496205803133922, "learning_rate": 7.2292762716320886e-06, "loss": 17.6187, "step": 20369 }, { "epoch": 0.37234723161569816, "grad_norm": 6.161779591690141, "learning_rate": 7.229011306296129e-06, "loss": 17.5687, "step": 20370 }, { "epoch": 0.3723655108121447, "grad_norm": 7.006544113911659, "learning_rate": 7.2287463331476795e-06, "loss": 17.7012, "step": 20371 }, { "epoch": 0.37238379000859123, "grad_norm": 5.95469046391852, "learning_rate": 7.228481352187668e-06, "loss": 17.1852, "step": 20372 }, { "epoch": 0.37240206920503777, "grad_norm": 8.285553807565886, "learning_rate": 7.2282163634170196e-06, "loss": 17.919, "step": 20373 }, { "epoch": 0.37242034840148425, "grad_norm": 6.3174512701254, "learning_rate": 7.2279513668366696e-06, "loss": 17.2932, "step": 20374 }, { "epoch": 0.3724386275979308, "grad_norm": 5.365878806247166, "learning_rate": 7.2276863624475414e-06, "loss": 17.0177, "step": 20375 }, { "epoch": 0.3724569067943773, "grad_norm": 7.849594451486141, "learning_rate": 7.227421350250568e-06, "loss": 18.0845, "step": 20376 }, { "epoch": 0.37247518599082385, "grad_norm": 7.173790348339584, "learning_rate": 7.227156330246674e-06, "loss": 17.7624, "step": 20377 }, { "epoch": 0.3724934651872704, "grad_norm": 6.732504055398928, "learning_rate": 7.226891302436789e-06, "loss": 17.5077, "step": 20378 }, { "epoch": 0.37251174438371687, "grad_norm": 7.776751079739892, "learning_rate": 7.226626266821847e-06, "loss": 17.9832, "step": 20379 }, { "epoch": 0.3725300235801634, "grad_norm": 7.164123401789497, "learning_rate": 7.226361223402771e-06, "loss": 17.8237, "step": 20380 }, { "epoch": 0.37254830277660994, "grad_norm": 5.495777387559704, "learning_rate": 7.226096172180492e-06, "loss": 17.0427, "step": 20381 }, { "epoch": 0.3725665819730565, "grad_norm": 7.497811476326853, "learning_rate": 7.225831113155939e-06, "loss": 17.7845, "step": 20382 }, { "epoch": 0.372584861169503, "grad_norm": 6.126258756148357, "learning_rate": 7.225566046330041e-06, "loss": 17.292, "step": 20383 }, { "epoch": 0.3726031403659495, "grad_norm": 7.5891907319413425, "learning_rate": 7.225300971703728e-06, "loss": 17.7541, "step": 20384 }, { "epoch": 0.37262141956239603, "grad_norm": 5.681311559330811, "learning_rate": 7.225035889277928e-06, "loss": 17.2156, "step": 20385 }, { "epoch": 0.37263969875884256, "grad_norm": 5.389113341989961, "learning_rate": 7.224770799053571e-06, "loss": 17.2472, "step": 20386 }, { "epoch": 0.3726579779552891, "grad_norm": 7.119291474765218, "learning_rate": 7.224505701031584e-06, "loss": 17.9081, "step": 20387 }, { "epoch": 0.37267625715173563, "grad_norm": 7.550679998764515, "learning_rate": 7.224240595212898e-06, "loss": 17.8024, "step": 20388 }, { "epoch": 0.3726945363481821, "grad_norm": 7.120317814639238, "learning_rate": 7.223975481598443e-06, "loss": 17.6353, "step": 20389 }, { "epoch": 0.37271281554462865, "grad_norm": 6.534661234696521, "learning_rate": 7.223710360189145e-06, "loss": 17.6243, "step": 20390 }, { "epoch": 0.3727310947410752, "grad_norm": 6.674445844875846, "learning_rate": 7.223445230985936e-06, "loss": 17.8813, "step": 20391 }, { "epoch": 0.3727493739375217, "grad_norm": 6.958357293000667, "learning_rate": 7.223180093989743e-06, "loss": 17.6807, "step": 20392 }, { "epoch": 0.37276765313396826, "grad_norm": 6.307849016021199, "learning_rate": 7.2229149492015e-06, "loss": 17.414, "step": 20393 }, { "epoch": 0.37278593233041474, "grad_norm": 9.402225556016122, "learning_rate": 7.2226497966221295e-06, "loss": 18.8113, "step": 20394 }, { "epoch": 0.3728042115268613, "grad_norm": 6.158146935391387, "learning_rate": 7.222384636252566e-06, "loss": 17.4431, "step": 20395 }, { "epoch": 0.3728224907233078, "grad_norm": 8.180275466524071, "learning_rate": 7.2221194680937375e-06, "loss": 18.4592, "step": 20396 }, { "epoch": 0.37284076991975434, "grad_norm": 6.246114860222931, "learning_rate": 7.221854292146573e-06, "loss": 17.2941, "step": 20397 }, { "epoch": 0.3728590491162008, "grad_norm": 6.15569543233691, "learning_rate": 7.221589108412001e-06, "loss": 17.5038, "step": 20398 }, { "epoch": 0.37287732831264736, "grad_norm": 7.643497344732373, "learning_rate": 7.221323916890952e-06, "loss": 17.9917, "step": 20399 }, { "epoch": 0.3728956075090939, "grad_norm": 6.504358251889061, "learning_rate": 7.221058717584357e-06, "loss": 17.5867, "step": 20400 }, { "epoch": 0.37291388670554043, "grad_norm": 6.410339312678638, "learning_rate": 7.2207935104931425e-06, "loss": 17.5052, "step": 20401 }, { "epoch": 0.37293216590198697, "grad_norm": 8.280785292636546, "learning_rate": 7.22052829561824e-06, "loss": 17.466, "step": 20402 }, { "epoch": 0.37295044509843345, "grad_norm": 6.071215727603237, "learning_rate": 7.2202630729605794e-06, "loss": 17.3962, "step": 20403 }, { "epoch": 0.37296872429488, "grad_norm": 7.037387699823428, "learning_rate": 7.219997842521088e-06, "loss": 18.0676, "step": 20404 }, { "epoch": 0.3729870034913265, "grad_norm": 6.362228304268196, "learning_rate": 7.2197326043006965e-06, "loss": 17.452, "step": 20405 }, { "epoch": 0.37300528268777305, "grad_norm": 4.923762259060005, "learning_rate": 7.219467358300335e-06, "loss": 16.9562, "step": 20406 }, { "epoch": 0.3730235618842196, "grad_norm": 5.680735934454968, "learning_rate": 7.219202104520935e-06, "loss": 17.1766, "step": 20407 }, { "epoch": 0.37304184108066607, "grad_norm": 6.010009134758298, "learning_rate": 7.218936842963422e-06, "loss": 17.3163, "step": 20408 }, { "epoch": 0.3730601202771126, "grad_norm": 6.572640723154287, "learning_rate": 7.218671573628729e-06, "loss": 17.5496, "step": 20409 }, { "epoch": 0.37307839947355914, "grad_norm": 5.927443830371127, "learning_rate": 7.218406296517785e-06, "loss": 17.3753, "step": 20410 }, { "epoch": 0.3730966786700057, "grad_norm": 7.44410725698587, "learning_rate": 7.218141011631518e-06, "loss": 17.9294, "step": 20411 }, { "epoch": 0.3731149578664522, "grad_norm": 5.054380719914831, "learning_rate": 7.21787571897086e-06, "loss": 16.8668, "step": 20412 }, { "epoch": 0.3731332370628987, "grad_norm": 6.755970480299233, "learning_rate": 7.21761041853674e-06, "loss": 17.6978, "step": 20413 }, { "epoch": 0.37315151625934523, "grad_norm": 6.60742547686864, "learning_rate": 7.217345110330088e-06, "loss": 17.3586, "step": 20414 }, { "epoch": 0.37316979545579176, "grad_norm": 6.517868095519433, "learning_rate": 7.217079794351833e-06, "loss": 17.6383, "step": 20415 }, { "epoch": 0.3731880746522383, "grad_norm": 7.533480740334832, "learning_rate": 7.216814470602907e-06, "loss": 18.0985, "step": 20416 }, { "epoch": 0.37320635384868484, "grad_norm": 6.71196017452529, "learning_rate": 7.216549139084239e-06, "loss": 17.6497, "step": 20417 }, { "epoch": 0.3732246330451313, "grad_norm": 5.921607526423101, "learning_rate": 7.216283799796758e-06, "loss": 17.4144, "step": 20418 }, { "epoch": 0.37324291224157785, "grad_norm": 8.118812396137619, "learning_rate": 7.216018452741393e-06, "loss": 18.1792, "step": 20419 }, { "epoch": 0.3732611914380244, "grad_norm": 8.533555423039589, "learning_rate": 7.215753097919078e-06, "loss": 18.1658, "step": 20420 }, { "epoch": 0.3732794706344709, "grad_norm": 9.005233588612297, "learning_rate": 7.215487735330739e-06, "loss": 18.7296, "step": 20421 }, { "epoch": 0.37329774983091746, "grad_norm": 7.1531783299493465, "learning_rate": 7.215222364977309e-06, "loss": 17.8506, "step": 20422 }, { "epoch": 0.37331602902736394, "grad_norm": 6.62261073381144, "learning_rate": 7.2149569868597156e-06, "loss": 17.3995, "step": 20423 }, { "epoch": 0.3733343082238105, "grad_norm": 7.694517476480957, "learning_rate": 7.214691600978891e-06, "loss": 18.037, "step": 20424 }, { "epoch": 0.373352587420257, "grad_norm": 5.286063429211789, "learning_rate": 7.214426207335765e-06, "loss": 16.9748, "step": 20425 }, { "epoch": 0.37337086661670355, "grad_norm": 6.853043960827987, "learning_rate": 7.2141608059312665e-06, "loss": 17.6959, "step": 20426 }, { "epoch": 0.3733891458131501, "grad_norm": 7.6643730515163275, "learning_rate": 7.213895396766327e-06, "loss": 17.8825, "step": 20427 }, { "epoch": 0.37340742500959656, "grad_norm": 5.409856853628547, "learning_rate": 7.213629979841875e-06, "loss": 16.9521, "step": 20428 }, { "epoch": 0.3734257042060431, "grad_norm": 5.500316520841786, "learning_rate": 7.213364555158843e-06, "loss": 17.1085, "step": 20429 }, { "epoch": 0.37344398340248963, "grad_norm": 6.484131949159568, "learning_rate": 7.21309912271816e-06, "loss": 17.4931, "step": 20430 }, { "epoch": 0.37346226259893617, "grad_norm": 7.160145232373765, "learning_rate": 7.212833682520758e-06, "loss": 17.8472, "step": 20431 }, { "epoch": 0.37348054179538265, "grad_norm": 8.808054033558152, "learning_rate": 7.212568234567563e-06, "loss": 17.7341, "step": 20432 }, { "epoch": 0.3734988209918292, "grad_norm": 7.0031468822222935, "learning_rate": 7.21230277885951e-06, "loss": 17.9265, "step": 20433 }, { "epoch": 0.3735171001882757, "grad_norm": 6.6477486261601335, "learning_rate": 7.212037315397528e-06, "loss": 17.5194, "step": 20434 }, { "epoch": 0.37353537938472225, "grad_norm": 6.296791146161914, "learning_rate": 7.2117718441825475e-06, "loss": 17.4754, "step": 20435 }, { "epoch": 0.3735536585811688, "grad_norm": 5.909369086372829, "learning_rate": 7.211506365215499e-06, "loss": 17.3268, "step": 20436 }, { "epoch": 0.37357193777761527, "grad_norm": 7.5901947275425625, "learning_rate": 7.21124087849731e-06, "loss": 17.8533, "step": 20437 }, { "epoch": 0.3735902169740618, "grad_norm": 6.5267304256580365, "learning_rate": 7.210975384028917e-06, "loss": 17.5819, "step": 20438 }, { "epoch": 0.37360849617050834, "grad_norm": 8.301980468284086, "learning_rate": 7.210709881811245e-06, "loss": 18.2158, "step": 20439 }, { "epoch": 0.3736267753669549, "grad_norm": 6.438116920575476, "learning_rate": 7.210444371845227e-06, "loss": 17.5852, "step": 20440 }, { "epoch": 0.3736450545634014, "grad_norm": 5.890955174083961, "learning_rate": 7.210178854131793e-06, "loss": 17.1586, "step": 20441 }, { "epoch": 0.3736633337598479, "grad_norm": 6.658707114323633, "learning_rate": 7.2099133286718744e-06, "loss": 17.5105, "step": 20442 }, { "epoch": 0.37368161295629443, "grad_norm": 7.20389531042741, "learning_rate": 7.209647795466401e-06, "loss": 17.5541, "step": 20443 }, { "epoch": 0.37369989215274096, "grad_norm": 6.692437785833514, "learning_rate": 7.209382254516304e-06, "loss": 17.5992, "step": 20444 }, { "epoch": 0.3737181713491875, "grad_norm": 7.156500761876589, "learning_rate": 7.209116705822516e-06, "loss": 17.7442, "step": 20445 }, { "epoch": 0.37373645054563404, "grad_norm": 10.273160095679948, "learning_rate": 7.208851149385963e-06, "loss": 17.881, "step": 20446 }, { "epoch": 0.3737547297420805, "grad_norm": 7.652218591225395, "learning_rate": 7.208585585207578e-06, "loss": 18.0469, "step": 20447 }, { "epoch": 0.37377300893852705, "grad_norm": 6.983571133024348, "learning_rate": 7.208320013288295e-06, "loss": 17.4955, "step": 20448 }, { "epoch": 0.3737912881349736, "grad_norm": 8.059806225719282, "learning_rate": 7.2080544336290395e-06, "loss": 18.0089, "step": 20449 }, { "epoch": 0.3738095673314201, "grad_norm": 7.542634018960026, "learning_rate": 7.2077888462307456e-06, "loss": 18.0137, "step": 20450 }, { "epoch": 0.37382784652786666, "grad_norm": 5.932502408738489, "learning_rate": 7.207523251094344e-06, "loss": 17.358, "step": 20451 }, { "epoch": 0.37384612572431314, "grad_norm": 6.534574091013455, "learning_rate": 7.207257648220763e-06, "loss": 17.3377, "step": 20452 }, { "epoch": 0.3738644049207597, "grad_norm": 5.875549277401524, "learning_rate": 7.206992037610937e-06, "loss": 17.3167, "step": 20453 }, { "epoch": 0.3738826841172062, "grad_norm": 7.025478727291052, "learning_rate": 7.206726419265795e-06, "loss": 17.4492, "step": 20454 }, { "epoch": 0.37390096331365275, "grad_norm": 6.612097043680163, "learning_rate": 7.206460793186268e-06, "loss": 17.4482, "step": 20455 }, { "epoch": 0.3739192425100993, "grad_norm": 6.267549369740469, "learning_rate": 7.206195159373288e-06, "loss": 17.2456, "step": 20456 }, { "epoch": 0.37393752170654576, "grad_norm": 6.401926259821656, "learning_rate": 7.205929517827785e-06, "loss": 17.4843, "step": 20457 }, { "epoch": 0.3739558009029923, "grad_norm": 6.560521431492206, "learning_rate": 7.205663868550693e-06, "loss": 17.5084, "step": 20458 }, { "epoch": 0.37397408009943883, "grad_norm": 5.626893536183918, "learning_rate": 7.205398211542938e-06, "loss": 17.1795, "step": 20459 }, { "epoch": 0.37399235929588537, "grad_norm": 5.339963679579897, "learning_rate": 7.205132546805454e-06, "loss": 17.0069, "step": 20460 }, { "epoch": 0.3740106384923319, "grad_norm": 7.4430745967816385, "learning_rate": 7.204866874339172e-06, "loss": 17.921, "step": 20461 }, { "epoch": 0.3740289176887784, "grad_norm": 5.656506238986444, "learning_rate": 7.2046011941450225e-06, "loss": 17.4381, "step": 20462 }, { "epoch": 0.3740471968852249, "grad_norm": 6.401459680619209, "learning_rate": 7.204335506223937e-06, "loss": 17.2807, "step": 20463 }, { "epoch": 0.37406547608167146, "grad_norm": 6.163360997915802, "learning_rate": 7.204069810576848e-06, "loss": 17.3161, "step": 20464 }, { "epoch": 0.374083755278118, "grad_norm": 6.956249254360118, "learning_rate": 7.203804107204684e-06, "loss": 17.3061, "step": 20465 }, { "epoch": 0.37410203447456447, "grad_norm": 7.914146130191101, "learning_rate": 7.203538396108378e-06, "loss": 18.0533, "step": 20466 }, { "epoch": 0.374120313671011, "grad_norm": 6.022921740089152, "learning_rate": 7.203272677288863e-06, "loss": 17.1129, "step": 20467 }, { "epoch": 0.37413859286745754, "grad_norm": 5.345696553436056, "learning_rate": 7.2030069507470665e-06, "loss": 17.0964, "step": 20468 }, { "epoch": 0.3741568720639041, "grad_norm": 5.8767713802306165, "learning_rate": 7.202741216483923e-06, "loss": 17.344, "step": 20469 }, { "epoch": 0.3741751512603506, "grad_norm": 7.090472714638295, "learning_rate": 7.202475474500361e-06, "loss": 17.8596, "step": 20470 }, { "epoch": 0.3741934304567971, "grad_norm": 6.851878326223762, "learning_rate": 7.202209724797316e-06, "loss": 17.7297, "step": 20471 }, { "epoch": 0.37421170965324363, "grad_norm": 6.653875934548672, "learning_rate": 7.201943967375716e-06, "loss": 17.8149, "step": 20472 }, { "epoch": 0.37422998884969016, "grad_norm": 6.095453224164434, "learning_rate": 7.201678202236493e-06, "loss": 17.4917, "step": 20473 }, { "epoch": 0.3742482680461367, "grad_norm": 6.747793075429817, "learning_rate": 7.201412429380579e-06, "loss": 17.7392, "step": 20474 }, { "epoch": 0.37426654724258324, "grad_norm": 5.7077639193355285, "learning_rate": 7.201146648808906e-06, "loss": 17.2055, "step": 20475 }, { "epoch": 0.3742848264390297, "grad_norm": 5.588325452177898, "learning_rate": 7.200880860522405e-06, "loss": 17.1681, "step": 20476 }, { "epoch": 0.37430310563547625, "grad_norm": 7.116280070737461, "learning_rate": 7.2006150645220075e-06, "loss": 17.9519, "step": 20477 }, { "epoch": 0.3743213848319228, "grad_norm": 7.354552839910853, "learning_rate": 7.200349260808644e-06, "loss": 17.9828, "step": 20478 }, { "epoch": 0.3743396640283693, "grad_norm": 5.0117794482627955, "learning_rate": 7.200083449383248e-06, "loss": 17.0739, "step": 20479 }, { "epoch": 0.37435794322481586, "grad_norm": 6.110387257820735, "learning_rate": 7.199817630246751e-06, "loss": 17.245, "step": 20480 }, { "epoch": 0.37437622242126234, "grad_norm": 6.26316331195001, "learning_rate": 7.1995518034000836e-06, "loss": 17.5018, "step": 20481 }, { "epoch": 0.3743945016177089, "grad_norm": 5.416961702897103, "learning_rate": 7.199285968844178e-06, "loss": 17.0941, "step": 20482 }, { "epoch": 0.3744127808141554, "grad_norm": 7.296340194253948, "learning_rate": 7.199020126579966e-06, "loss": 18.0503, "step": 20483 }, { "epoch": 0.37443106001060195, "grad_norm": 8.16897791809317, "learning_rate": 7.19875427660838e-06, "loss": 18.1047, "step": 20484 }, { "epoch": 0.3744493392070485, "grad_norm": 5.496261649783343, "learning_rate": 7.1984884189303495e-06, "loss": 17.1197, "step": 20485 }, { "epoch": 0.37446761840349496, "grad_norm": 6.9872561177916594, "learning_rate": 7.19822255354681e-06, "loss": 17.7445, "step": 20486 }, { "epoch": 0.3744858975999415, "grad_norm": 5.224986596449498, "learning_rate": 7.197956680458689e-06, "loss": 16.9281, "step": 20487 }, { "epoch": 0.37450417679638803, "grad_norm": 5.579414671273383, "learning_rate": 7.197690799666921e-06, "loss": 17.0586, "step": 20488 }, { "epoch": 0.37452245599283457, "grad_norm": 6.510955389162127, "learning_rate": 7.197424911172439e-06, "loss": 17.3088, "step": 20489 }, { "epoch": 0.3745407351892811, "grad_norm": 7.3533702767898, "learning_rate": 7.197159014976172e-06, "loss": 17.8623, "step": 20490 }, { "epoch": 0.3745590143857276, "grad_norm": 6.424771511305801, "learning_rate": 7.196893111079054e-06, "loss": 17.471, "step": 20491 }, { "epoch": 0.3745772935821741, "grad_norm": 7.165256769964428, "learning_rate": 7.196627199482015e-06, "loss": 17.5574, "step": 20492 }, { "epoch": 0.37459557277862066, "grad_norm": 6.464879066796956, "learning_rate": 7.19636128018599e-06, "loss": 17.5617, "step": 20493 }, { "epoch": 0.3746138519750672, "grad_norm": 5.3519671094987675, "learning_rate": 7.196095353191909e-06, "loss": 16.9451, "step": 20494 }, { "epoch": 0.3746321311715137, "grad_norm": 7.342403173271185, "learning_rate": 7.195829418500704e-06, "loss": 17.6891, "step": 20495 }, { "epoch": 0.3746504103679602, "grad_norm": 7.397482331241367, "learning_rate": 7.195563476113306e-06, "loss": 17.8364, "step": 20496 }, { "epoch": 0.37466868956440674, "grad_norm": 7.601953773384147, "learning_rate": 7.19529752603065e-06, "loss": 17.7663, "step": 20497 }, { "epoch": 0.3746869687608533, "grad_norm": 6.155741447949556, "learning_rate": 7.195031568253667e-06, "loss": 17.4188, "step": 20498 }, { "epoch": 0.3747052479572998, "grad_norm": 8.528566994652131, "learning_rate": 7.194765602783288e-06, "loss": 18.6777, "step": 20499 }, { "epoch": 0.3747235271537463, "grad_norm": 5.999514942833209, "learning_rate": 7.194499629620446e-06, "loss": 17.3963, "step": 20500 }, { "epoch": 0.37474180635019283, "grad_norm": 5.988950183033299, "learning_rate": 7.194233648766073e-06, "loss": 17.3092, "step": 20501 }, { "epoch": 0.37476008554663937, "grad_norm": 6.778376306879646, "learning_rate": 7.193967660221103e-06, "loss": 17.6975, "step": 20502 }, { "epoch": 0.3747783647430859, "grad_norm": 6.825540751643079, "learning_rate": 7.1937016639864665e-06, "loss": 17.9962, "step": 20503 }, { "epoch": 0.37479664393953244, "grad_norm": 6.92133841962337, "learning_rate": 7.193435660063095e-06, "loss": 17.7729, "step": 20504 }, { "epoch": 0.3748149231359789, "grad_norm": 6.629127129797609, "learning_rate": 7.193169648451921e-06, "loss": 17.5761, "step": 20505 }, { "epoch": 0.37483320233242545, "grad_norm": 6.247274377170819, "learning_rate": 7.192903629153879e-06, "loss": 17.6489, "step": 20506 }, { "epoch": 0.374851481528872, "grad_norm": 4.864723505686175, "learning_rate": 7.192637602169901e-06, "loss": 16.8646, "step": 20507 }, { "epoch": 0.3748697607253185, "grad_norm": 7.351974906030962, "learning_rate": 7.192371567500917e-06, "loss": 17.8657, "step": 20508 }, { "epoch": 0.37488803992176506, "grad_norm": 6.279121183760727, "learning_rate": 7.192105525147861e-06, "loss": 17.553, "step": 20509 }, { "epoch": 0.37490631911821154, "grad_norm": 6.4870410422967, "learning_rate": 7.191839475111666e-06, "loss": 17.7006, "step": 20510 }, { "epoch": 0.3749245983146581, "grad_norm": 6.529975507119786, "learning_rate": 7.191573417393264e-06, "loss": 17.4445, "step": 20511 }, { "epoch": 0.3749428775111046, "grad_norm": 5.882872064621326, "learning_rate": 7.191307351993586e-06, "loss": 17.4472, "step": 20512 }, { "epoch": 0.37496115670755115, "grad_norm": 6.89285895607024, "learning_rate": 7.191041278913566e-06, "loss": 18.0626, "step": 20513 }, { "epoch": 0.3749794359039977, "grad_norm": 7.402257841543211, "learning_rate": 7.190775198154139e-06, "loss": 17.7354, "step": 20514 }, { "epoch": 0.37499771510044416, "grad_norm": 6.9256213267346975, "learning_rate": 7.190509109716232e-06, "loss": 17.7617, "step": 20515 }, { "epoch": 0.3750159942968907, "grad_norm": 6.055825591041182, "learning_rate": 7.190243013600782e-06, "loss": 16.9858, "step": 20516 }, { "epoch": 0.37503427349333723, "grad_norm": 6.9973535582198245, "learning_rate": 7.189976909808721e-06, "loss": 17.9019, "step": 20517 }, { "epoch": 0.37505255268978377, "grad_norm": 5.49296416870087, "learning_rate": 7.189710798340981e-06, "loss": 17.0977, "step": 20518 }, { "epoch": 0.3750708318862303, "grad_norm": 5.5940893216151295, "learning_rate": 7.189444679198492e-06, "loss": 17.1064, "step": 20519 }, { "epoch": 0.3750891110826768, "grad_norm": 6.803714787433599, "learning_rate": 7.189178552382192e-06, "loss": 17.8292, "step": 20520 }, { "epoch": 0.3751073902791233, "grad_norm": 6.706811302961145, "learning_rate": 7.18891241789301e-06, "loss": 17.8327, "step": 20521 }, { "epoch": 0.37512566947556986, "grad_norm": 6.087035417363352, "learning_rate": 7.188646275731881e-06, "loss": 17.5479, "step": 20522 }, { "epoch": 0.3751439486720164, "grad_norm": 5.825170628716262, "learning_rate": 7.188380125899736e-06, "loss": 17.5318, "step": 20523 }, { "epoch": 0.3751622278684629, "grad_norm": 7.386143292146687, "learning_rate": 7.188113968397508e-06, "loss": 17.8499, "step": 20524 }, { "epoch": 0.3751805070649094, "grad_norm": 5.670855151232415, "learning_rate": 7.1878478032261314e-06, "loss": 17.154, "step": 20525 }, { "epoch": 0.37519878626135594, "grad_norm": 6.059875690237442, "learning_rate": 7.187581630386538e-06, "loss": 17.3713, "step": 20526 }, { "epoch": 0.3752170654578025, "grad_norm": 5.732884623881086, "learning_rate": 7.187315449879659e-06, "loss": 17.1756, "step": 20527 }, { "epoch": 0.375235344654249, "grad_norm": 7.308732349846579, "learning_rate": 7.187049261706431e-06, "loss": 18.1432, "step": 20528 }, { "epoch": 0.37525362385069555, "grad_norm": 6.459090289358423, "learning_rate": 7.186783065867785e-06, "loss": 17.3665, "step": 20529 }, { "epoch": 0.37527190304714203, "grad_norm": 5.737021132272651, "learning_rate": 7.1865168623646546e-06, "loss": 17.4221, "step": 20530 }, { "epoch": 0.37529018224358857, "grad_norm": 6.279016688668446, "learning_rate": 7.186250651197971e-06, "loss": 17.5875, "step": 20531 }, { "epoch": 0.3753084614400351, "grad_norm": 7.238675373326088, "learning_rate": 7.185984432368669e-06, "loss": 17.5531, "step": 20532 }, { "epoch": 0.37532674063648164, "grad_norm": 5.654569050953152, "learning_rate": 7.185718205877681e-06, "loss": 17.1147, "step": 20533 }, { "epoch": 0.3753450198329281, "grad_norm": 6.4257274957213575, "learning_rate": 7.1854519717259416e-06, "loss": 17.4075, "step": 20534 }, { "epoch": 0.37536329902937465, "grad_norm": 6.637054571580879, "learning_rate": 7.185185729914383e-06, "loss": 17.5743, "step": 20535 }, { "epoch": 0.3753815782258212, "grad_norm": 7.637741798668585, "learning_rate": 7.184919480443936e-06, "loss": 17.9811, "step": 20536 }, { "epoch": 0.3753998574222677, "grad_norm": 6.407290892107951, "learning_rate": 7.184653223315535e-06, "loss": 17.5998, "step": 20537 }, { "epoch": 0.37541813661871426, "grad_norm": 7.749589762691165, "learning_rate": 7.184386958530117e-06, "loss": 17.7453, "step": 20538 }, { "epoch": 0.37543641581516074, "grad_norm": 6.905829872121333, "learning_rate": 7.184120686088612e-06, "loss": 17.4916, "step": 20539 }, { "epoch": 0.3754546950116073, "grad_norm": 5.7683542321228, "learning_rate": 7.183854405991952e-06, "loss": 17.1817, "step": 20540 }, { "epoch": 0.3754729742080538, "grad_norm": 5.981327578864603, "learning_rate": 7.183588118241072e-06, "loss": 17.3861, "step": 20541 }, { "epoch": 0.37549125340450035, "grad_norm": 7.5470328785879515, "learning_rate": 7.183321822836906e-06, "loss": 18.2527, "step": 20542 }, { "epoch": 0.3755095326009469, "grad_norm": 7.341944203848078, "learning_rate": 7.183055519780385e-06, "loss": 18.0454, "step": 20543 }, { "epoch": 0.37552781179739336, "grad_norm": 7.321986932501018, "learning_rate": 7.182789209072445e-06, "loss": 17.8002, "step": 20544 }, { "epoch": 0.3755460909938399, "grad_norm": 7.018625649196589, "learning_rate": 7.182522890714018e-06, "loss": 17.887, "step": 20545 }, { "epoch": 0.37556437019028643, "grad_norm": 6.703391618043084, "learning_rate": 7.182256564706039e-06, "loss": 17.6745, "step": 20546 }, { "epoch": 0.37558264938673297, "grad_norm": 7.104778415264163, "learning_rate": 7.181990231049437e-06, "loss": 17.6805, "step": 20547 }, { "epoch": 0.3756009285831795, "grad_norm": 8.684152435477817, "learning_rate": 7.181723889745151e-06, "loss": 18.46, "step": 20548 }, { "epoch": 0.375619207779626, "grad_norm": 7.3817123782854175, "learning_rate": 7.181457540794112e-06, "loss": 17.6573, "step": 20549 }, { "epoch": 0.3756374869760725, "grad_norm": 6.750336302395451, "learning_rate": 7.181191184197254e-06, "loss": 17.623, "step": 20550 }, { "epoch": 0.37565576617251906, "grad_norm": 5.881837910867297, "learning_rate": 7.180924819955508e-06, "loss": 17.1445, "step": 20551 }, { "epoch": 0.3756740453689656, "grad_norm": 7.534997164159258, "learning_rate": 7.180658448069811e-06, "loss": 17.9065, "step": 20552 }, { "epoch": 0.37569232456541213, "grad_norm": 9.022394311527165, "learning_rate": 7.180392068541095e-06, "loss": 18.0997, "step": 20553 }, { "epoch": 0.3757106037618586, "grad_norm": 7.275141752031852, "learning_rate": 7.180125681370296e-06, "loss": 17.596, "step": 20554 }, { "epoch": 0.37572888295830514, "grad_norm": 6.640547035363546, "learning_rate": 7.1798592865583425e-06, "loss": 17.6874, "step": 20555 }, { "epoch": 0.3757471621547517, "grad_norm": 5.768911845931946, "learning_rate": 7.179592884106174e-06, "loss": 17.4228, "step": 20556 }, { "epoch": 0.3757654413511982, "grad_norm": 6.99321745048219, "learning_rate": 7.179326474014721e-06, "loss": 17.7309, "step": 20557 }, { "epoch": 0.37578372054764475, "grad_norm": 7.839414507814109, "learning_rate": 7.179060056284917e-06, "loss": 17.9429, "step": 20558 }, { "epoch": 0.37580199974409123, "grad_norm": 5.920105089765995, "learning_rate": 7.178793630917696e-06, "loss": 17.2724, "step": 20559 }, { "epoch": 0.37582027894053777, "grad_norm": 5.438035983080217, "learning_rate": 7.178527197913994e-06, "loss": 17.1341, "step": 20560 }, { "epoch": 0.3758385581369843, "grad_norm": 6.888576926240088, "learning_rate": 7.178260757274742e-06, "loss": 17.8441, "step": 20561 }, { "epoch": 0.37585683733343084, "grad_norm": 6.94751405345727, "learning_rate": 7.177994309000876e-06, "loss": 17.1709, "step": 20562 }, { "epoch": 0.3758751165298774, "grad_norm": 7.568423208404413, "learning_rate": 7.1777278530933295e-06, "loss": 17.9601, "step": 20563 }, { "epoch": 0.37589339572632385, "grad_norm": 7.295671286163825, "learning_rate": 7.177461389553033e-06, "loss": 17.7052, "step": 20564 }, { "epoch": 0.3759116749227704, "grad_norm": 5.163917432288961, "learning_rate": 7.177194918380926e-06, "loss": 17.0495, "step": 20565 }, { "epoch": 0.3759299541192169, "grad_norm": 6.35829451976066, "learning_rate": 7.176928439577939e-06, "loss": 17.3866, "step": 20566 }, { "epoch": 0.37594823331566346, "grad_norm": 5.945944096813592, "learning_rate": 7.176661953145007e-06, "loss": 17.2316, "step": 20567 }, { "epoch": 0.37596651251210994, "grad_norm": 6.3620505848576006, "learning_rate": 7.176395459083063e-06, "loss": 17.3621, "step": 20568 }, { "epoch": 0.3759847917085565, "grad_norm": 8.249472346758372, "learning_rate": 7.1761289573930425e-06, "loss": 18.04, "step": 20569 }, { "epoch": 0.376003070905003, "grad_norm": 7.075767540520091, "learning_rate": 7.1758624480758776e-06, "loss": 17.421, "step": 20570 }, { "epoch": 0.37602135010144955, "grad_norm": 5.894654845925681, "learning_rate": 7.175595931132505e-06, "loss": 17.2072, "step": 20571 }, { "epoch": 0.3760396292978961, "grad_norm": 7.297821368762594, "learning_rate": 7.175329406563858e-06, "loss": 17.847, "step": 20572 }, { "epoch": 0.37605790849434256, "grad_norm": 7.334149966670582, "learning_rate": 7.175062874370868e-06, "loss": 17.9298, "step": 20573 }, { "epoch": 0.3760761876907891, "grad_norm": 6.938856531852848, "learning_rate": 7.174796334554473e-06, "loss": 17.8027, "step": 20574 }, { "epoch": 0.37609446688723563, "grad_norm": 5.923019276067725, "learning_rate": 7.174529787115605e-06, "loss": 17.2943, "step": 20575 }, { "epoch": 0.37611274608368217, "grad_norm": 6.37713460699952, "learning_rate": 7.174263232055198e-06, "loss": 17.4922, "step": 20576 }, { "epoch": 0.3761310252801287, "grad_norm": 6.8539348980493395, "learning_rate": 7.1739966693741894e-06, "loss": 17.7022, "step": 20577 }, { "epoch": 0.3761493044765752, "grad_norm": 7.476638742288511, "learning_rate": 7.1737300990735085e-06, "loss": 17.9302, "step": 20578 }, { "epoch": 0.3761675836730217, "grad_norm": 5.942880873420569, "learning_rate": 7.173463521154094e-06, "loss": 17.3077, "step": 20579 }, { "epoch": 0.37618586286946826, "grad_norm": 8.375990830522591, "learning_rate": 7.173196935616877e-06, "loss": 18.302, "step": 20580 }, { "epoch": 0.3762041420659148, "grad_norm": 6.2401076669671, "learning_rate": 7.172930342462795e-06, "loss": 17.3708, "step": 20581 }, { "epoch": 0.37622242126236133, "grad_norm": 5.788733961449134, "learning_rate": 7.17266374169278e-06, "loss": 17.364, "step": 20582 }, { "epoch": 0.3762407004588078, "grad_norm": 6.731599080527994, "learning_rate": 7.172397133307767e-06, "loss": 17.6133, "step": 20583 }, { "epoch": 0.37625897965525434, "grad_norm": 5.956999934736685, "learning_rate": 7.172130517308691e-06, "loss": 17.4725, "step": 20584 }, { "epoch": 0.3762772588517009, "grad_norm": 7.831714054322711, "learning_rate": 7.171863893696485e-06, "loss": 17.9058, "step": 20585 }, { "epoch": 0.3762955380481474, "grad_norm": 6.823454541730952, "learning_rate": 7.171597262472085e-06, "loss": 17.7316, "step": 20586 }, { "epoch": 0.37631381724459395, "grad_norm": 5.638160627563298, "learning_rate": 7.171330623636426e-06, "loss": 17.3128, "step": 20587 }, { "epoch": 0.37633209644104043, "grad_norm": 7.103100444250907, "learning_rate": 7.17106397719044e-06, "loss": 17.8114, "step": 20588 }, { "epoch": 0.37635037563748697, "grad_norm": 5.456728586507418, "learning_rate": 7.170797323135065e-06, "loss": 17.0995, "step": 20589 }, { "epoch": 0.3763686548339335, "grad_norm": 5.971389395298871, "learning_rate": 7.170530661471232e-06, "loss": 17.4495, "step": 20590 }, { "epoch": 0.37638693403038004, "grad_norm": 6.927627408702738, "learning_rate": 7.170263992199878e-06, "loss": 17.7794, "step": 20591 }, { "epoch": 0.3764052132268266, "grad_norm": 6.10809545916464, "learning_rate": 7.169997315321936e-06, "loss": 17.6278, "step": 20592 }, { "epoch": 0.37642349242327305, "grad_norm": 7.840387278475167, "learning_rate": 7.169730630838344e-06, "loss": 17.4599, "step": 20593 }, { "epoch": 0.3764417716197196, "grad_norm": 5.808072367256486, "learning_rate": 7.169463938750033e-06, "loss": 17.2614, "step": 20594 }, { "epoch": 0.3764600508161661, "grad_norm": 6.6618112247851675, "learning_rate": 7.169197239057939e-06, "loss": 17.4821, "step": 20595 }, { "epoch": 0.37647833001261266, "grad_norm": 6.159097442050367, "learning_rate": 7.168930531762998e-06, "loss": 17.3987, "step": 20596 }, { "epoch": 0.3764966092090592, "grad_norm": 5.5933978247039855, "learning_rate": 7.1686638168661425e-06, "loss": 17.1157, "step": 20597 }, { "epoch": 0.3765148884055057, "grad_norm": 6.900925113958627, "learning_rate": 7.168397094368309e-06, "loss": 17.7539, "step": 20598 }, { "epoch": 0.3765331676019522, "grad_norm": 6.619261728696811, "learning_rate": 7.168130364270431e-06, "loss": 17.4448, "step": 20599 }, { "epoch": 0.37655144679839875, "grad_norm": 7.609053670754561, "learning_rate": 7.167863626573446e-06, "loss": 18.3832, "step": 20600 }, { "epoch": 0.3765697259948453, "grad_norm": 5.803236237706797, "learning_rate": 7.167596881278285e-06, "loss": 17.4427, "step": 20601 }, { "epoch": 0.37658800519129176, "grad_norm": 7.273994144546687, "learning_rate": 7.167330128385886e-06, "loss": 17.775, "step": 20602 }, { "epoch": 0.3766062843877383, "grad_norm": 7.463503272074474, "learning_rate": 7.167063367897184e-06, "loss": 17.6611, "step": 20603 }, { "epoch": 0.37662456358418483, "grad_norm": 7.0457114727194945, "learning_rate": 7.1667965998131124e-06, "loss": 17.7985, "step": 20604 }, { "epoch": 0.37664284278063137, "grad_norm": 6.895245411245836, "learning_rate": 7.166529824134606e-06, "loss": 17.6429, "step": 20605 }, { "epoch": 0.3766611219770779, "grad_norm": 4.420823489714812, "learning_rate": 7.1662630408626e-06, "loss": 16.8286, "step": 20606 }, { "epoch": 0.3766794011735244, "grad_norm": 5.763056319838112, "learning_rate": 7.165996249998033e-06, "loss": 17.3129, "step": 20607 }, { "epoch": 0.3766976803699709, "grad_norm": 6.765938780753487, "learning_rate": 7.165729451541834e-06, "loss": 17.5902, "step": 20608 }, { "epoch": 0.37671595956641746, "grad_norm": 6.233005257901531, "learning_rate": 7.165462645494943e-06, "loss": 17.1538, "step": 20609 }, { "epoch": 0.376734238762864, "grad_norm": 6.994044793039864, "learning_rate": 7.165195831858293e-06, "loss": 17.6151, "step": 20610 }, { "epoch": 0.37675251795931053, "grad_norm": 4.5691847886265275, "learning_rate": 7.164929010632818e-06, "loss": 16.8158, "step": 20611 }, { "epoch": 0.376770797155757, "grad_norm": 5.113949836479145, "learning_rate": 7.164662181819456e-06, "loss": 16.9211, "step": 20612 }, { "epoch": 0.37678907635220354, "grad_norm": 6.731286088024672, "learning_rate": 7.164395345419141e-06, "loss": 17.5905, "step": 20613 }, { "epoch": 0.3768073555486501, "grad_norm": 5.779281914613035, "learning_rate": 7.164128501432808e-06, "loss": 17.4421, "step": 20614 }, { "epoch": 0.3768256347450966, "grad_norm": 7.339046452482347, "learning_rate": 7.163861649861392e-06, "loss": 18.3369, "step": 20615 }, { "epoch": 0.37684391394154315, "grad_norm": 6.884621453446428, "learning_rate": 7.163594790705829e-06, "loss": 17.8277, "step": 20616 }, { "epoch": 0.37686219313798963, "grad_norm": 5.655586210170619, "learning_rate": 7.163327923967055e-06, "loss": 17.1877, "step": 20617 }, { "epoch": 0.37688047233443617, "grad_norm": 8.771523681859211, "learning_rate": 7.163061049646003e-06, "loss": 18.0978, "step": 20618 }, { "epoch": 0.3768987515308827, "grad_norm": 5.9379482109307595, "learning_rate": 7.1627941677436085e-06, "loss": 17.5278, "step": 20619 }, { "epoch": 0.37691703072732924, "grad_norm": 5.382141824677348, "learning_rate": 7.162527278260811e-06, "loss": 17.233, "step": 20620 }, { "epoch": 0.3769353099237758, "grad_norm": 6.069812859379751, "learning_rate": 7.162260381198541e-06, "loss": 17.3415, "step": 20621 }, { "epoch": 0.37695358912022225, "grad_norm": 5.784579558492788, "learning_rate": 7.161993476557737e-06, "loss": 17.3677, "step": 20622 }, { "epoch": 0.3769718683166688, "grad_norm": 7.2001587613957065, "learning_rate": 7.161726564339333e-06, "loss": 18.0532, "step": 20623 }, { "epoch": 0.3769901475131153, "grad_norm": 6.8242809603122145, "learning_rate": 7.161459644544265e-06, "loss": 17.3681, "step": 20624 }, { "epoch": 0.37700842670956186, "grad_norm": 7.612123828849303, "learning_rate": 7.161192717173469e-06, "loss": 18.4676, "step": 20625 }, { "epoch": 0.3770267059060084, "grad_norm": 5.312600056096935, "learning_rate": 7.16092578222788e-06, "loss": 17.2113, "step": 20626 }, { "epoch": 0.3770449851024549, "grad_norm": 7.309212073447616, "learning_rate": 7.160658839708433e-06, "loss": 17.8747, "step": 20627 }, { "epoch": 0.3770632642989014, "grad_norm": 7.697322528872857, "learning_rate": 7.1603918896160655e-06, "loss": 18.2026, "step": 20628 }, { "epoch": 0.37708154349534795, "grad_norm": 9.588027842999113, "learning_rate": 7.160124931951711e-06, "loss": 18.5526, "step": 20629 }, { "epoch": 0.3770998226917945, "grad_norm": 5.977328879269785, "learning_rate": 7.1598579667163045e-06, "loss": 17.5158, "step": 20630 }, { "epoch": 0.377118101888241, "grad_norm": 6.09204584799851, "learning_rate": 7.1595909939107864e-06, "loss": 17.3506, "step": 20631 }, { "epoch": 0.3771363810846875, "grad_norm": 6.83840505088421, "learning_rate": 7.159324013536086e-06, "loss": 17.6528, "step": 20632 }, { "epoch": 0.37715466028113404, "grad_norm": 5.7778449402272765, "learning_rate": 7.159057025593145e-06, "loss": 17.2363, "step": 20633 }, { "epoch": 0.37717293947758057, "grad_norm": 6.64526238744069, "learning_rate": 7.158790030082896e-06, "loss": 17.5401, "step": 20634 }, { "epoch": 0.3771912186740271, "grad_norm": 5.803148609951093, "learning_rate": 7.158523027006275e-06, "loss": 17.439, "step": 20635 }, { "epoch": 0.3772094978704736, "grad_norm": 6.60449995898493, "learning_rate": 7.158256016364218e-06, "loss": 17.4628, "step": 20636 }, { "epoch": 0.3772277770669201, "grad_norm": 5.7326802198899705, "learning_rate": 7.157988998157659e-06, "loss": 17.0555, "step": 20637 }, { "epoch": 0.37724605626336666, "grad_norm": 7.460395265804034, "learning_rate": 7.157721972387539e-06, "loss": 17.85, "step": 20638 }, { "epoch": 0.3772643354598132, "grad_norm": 6.627173442440888, "learning_rate": 7.157454939054788e-06, "loss": 17.5353, "step": 20639 }, { "epoch": 0.37728261465625973, "grad_norm": 5.914285399030155, "learning_rate": 7.157187898160347e-06, "loss": 17.3655, "step": 20640 }, { "epoch": 0.3773008938527062, "grad_norm": 5.620032876235361, "learning_rate": 7.156920849705149e-06, "loss": 17.2645, "step": 20641 }, { "epoch": 0.37731917304915275, "grad_norm": 7.513128525274539, "learning_rate": 7.156653793690129e-06, "loss": 17.9416, "step": 20642 }, { "epoch": 0.3773374522455993, "grad_norm": 6.12097987736912, "learning_rate": 7.156386730116227e-06, "loss": 17.4376, "step": 20643 }, { "epoch": 0.3773557314420458, "grad_norm": 6.206280891072118, "learning_rate": 7.156119658984375e-06, "loss": 17.4708, "step": 20644 }, { "epoch": 0.37737401063849235, "grad_norm": 6.309603134178015, "learning_rate": 7.155852580295513e-06, "loss": 17.4303, "step": 20645 }, { "epoch": 0.37739228983493883, "grad_norm": 6.091201990564179, "learning_rate": 7.155585494050571e-06, "loss": 17.1456, "step": 20646 }, { "epoch": 0.37741056903138537, "grad_norm": 6.393127162720256, "learning_rate": 7.15531840025049e-06, "loss": 17.6871, "step": 20647 }, { "epoch": 0.3774288482278319, "grad_norm": 5.684010274567867, "learning_rate": 7.155051298896207e-06, "loss": 17.1029, "step": 20648 }, { "epoch": 0.37744712742427844, "grad_norm": 5.859678024083746, "learning_rate": 7.154784189988655e-06, "loss": 17.3521, "step": 20649 }, { "epoch": 0.377465406620725, "grad_norm": 6.493662822507453, "learning_rate": 7.154517073528772e-06, "loss": 17.4507, "step": 20650 }, { "epoch": 0.37748368581717145, "grad_norm": 7.461636475787436, "learning_rate": 7.154249949517493e-06, "loss": 18.1048, "step": 20651 }, { "epoch": 0.377501965013618, "grad_norm": 8.337347370485924, "learning_rate": 7.153982817955755e-06, "loss": 18.4426, "step": 20652 }, { "epoch": 0.3775202442100645, "grad_norm": 7.217298307508891, "learning_rate": 7.153715678844494e-06, "loss": 17.9003, "step": 20653 }, { "epoch": 0.37753852340651106, "grad_norm": 6.72731521926569, "learning_rate": 7.153448532184646e-06, "loss": 17.7426, "step": 20654 }, { "epoch": 0.3775568026029576, "grad_norm": 7.578537547101339, "learning_rate": 7.153181377977148e-06, "loss": 17.8402, "step": 20655 }, { "epoch": 0.3775750817994041, "grad_norm": 6.645035765757099, "learning_rate": 7.152914216222937e-06, "loss": 17.6881, "step": 20656 }, { "epoch": 0.3775933609958506, "grad_norm": 7.351151228534585, "learning_rate": 7.152647046922947e-06, "loss": 18.1416, "step": 20657 }, { "epoch": 0.37761164019229715, "grad_norm": 6.366196253922736, "learning_rate": 7.152379870078119e-06, "loss": 17.3985, "step": 20658 }, { "epoch": 0.3776299193887437, "grad_norm": 7.008910741742748, "learning_rate": 7.152112685689383e-06, "loss": 17.9082, "step": 20659 }, { "epoch": 0.3776481985851902, "grad_norm": 6.443878939319213, "learning_rate": 7.151845493757679e-06, "loss": 17.4953, "step": 20660 }, { "epoch": 0.3776664777816367, "grad_norm": 8.356011214369085, "learning_rate": 7.151578294283944e-06, "loss": 18.0463, "step": 20661 }, { "epoch": 0.37768475697808324, "grad_norm": 5.906777639110561, "learning_rate": 7.151311087269115e-06, "loss": 17.2742, "step": 20662 }, { "epoch": 0.37770303617452977, "grad_norm": 7.20076130382401, "learning_rate": 7.151043872714126e-06, "loss": 17.7422, "step": 20663 }, { "epoch": 0.3777213153709763, "grad_norm": 6.3545204646164954, "learning_rate": 7.150776650619915e-06, "loss": 17.4427, "step": 20664 }, { "epoch": 0.37773959456742284, "grad_norm": 6.162254158058019, "learning_rate": 7.15050942098742e-06, "loss": 17.161, "step": 20665 }, { "epoch": 0.3777578737638693, "grad_norm": 7.150157393780649, "learning_rate": 7.150242183817574e-06, "loss": 18.031, "step": 20666 }, { "epoch": 0.37777615296031586, "grad_norm": 6.03951850120098, "learning_rate": 7.149974939111316e-06, "loss": 17.2216, "step": 20667 }, { "epoch": 0.3777944321567624, "grad_norm": 8.161372667305617, "learning_rate": 7.149707686869582e-06, "loss": 18.3367, "step": 20668 }, { "epoch": 0.37781271135320893, "grad_norm": 6.55974120489108, "learning_rate": 7.149440427093311e-06, "loss": 17.6477, "step": 20669 }, { "epoch": 0.3778309905496554, "grad_norm": 5.8649574848977215, "learning_rate": 7.149173159783437e-06, "loss": 17.6777, "step": 20670 }, { "epoch": 0.37784926974610195, "grad_norm": 7.815085048357885, "learning_rate": 7.148905884940898e-06, "loss": 17.8094, "step": 20671 }, { "epoch": 0.3778675489425485, "grad_norm": 5.209538297226656, "learning_rate": 7.14863860256663e-06, "loss": 17.1547, "step": 20672 }, { "epoch": 0.377885828138995, "grad_norm": 6.423224718314327, "learning_rate": 7.1483713126615685e-06, "loss": 17.8088, "step": 20673 }, { "epoch": 0.37790410733544155, "grad_norm": 5.678146266476477, "learning_rate": 7.148104015226653e-06, "loss": 17.3624, "step": 20674 }, { "epoch": 0.37792238653188803, "grad_norm": 6.250913610206943, "learning_rate": 7.14783671026282e-06, "loss": 17.5371, "step": 20675 }, { "epoch": 0.37794066572833457, "grad_norm": 7.075915479019987, "learning_rate": 7.147569397771006e-06, "loss": 17.9878, "step": 20676 }, { "epoch": 0.3779589449247811, "grad_norm": 5.2718456219478655, "learning_rate": 7.1473020777521474e-06, "loss": 16.95, "step": 20677 }, { "epoch": 0.37797722412122764, "grad_norm": 6.692447361100275, "learning_rate": 7.1470347502071804e-06, "loss": 17.5211, "step": 20678 }, { "epoch": 0.3779955033176742, "grad_norm": 5.869065536809518, "learning_rate": 7.146767415137044e-06, "loss": 17.4061, "step": 20679 }, { "epoch": 0.37801378251412066, "grad_norm": 5.385633023882873, "learning_rate": 7.146500072542672e-06, "loss": 17.283, "step": 20680 }, { "epoch": 0.3780320617105672, "grad_norm": 6.259513688434857, "learning_rate": 7.146232722425006e-06, "loss": 17.4676, "step": 20681 }, { "epoch": 0.3780503409070137, "grad_norm": 7.565017404182663, "learning_rate": 7.145965364784979e-06, "loss": 17.7473, "step": 20682 }, { "epoch": 0.37806862010346026, "grad_norm": 6.2425939611643475, "learning_rate": 7.145697999623531e-06, "loss": 17.3411, "step": 20683 }, { "epoch": 0.3780868992999068, "grad_norm": 6.515403647706122, "learning_rate": 7.145430626941596e-06, "loss": 17.5335, "step": 20684 }, { "epoch": 0.3781051784963533, "grad_norm": 5.913250439289875, "learning_rate": 7.145163246740114e-06, "loss": 17.2398, "step": 20685 }, { "epoch": 0.3781234576927998, "grad_norm": 6.178161015518975, "learning_rate": 7.144895859020022e-06, "loss": 17.678, "step": 20686 }, { "epoch": 0.37814173688924635, "grad_norm": 6.245587199046832, "learning_rate": 7.144628463782254e-06, "loss": 17.3644, "step": 20687 }, { "epoch": 0.3781600160856929, "grad_norm": 9.519394063770232, "learning_rate": 7.14436106102775e-06, "loss": 18.4064, "step": 20688 }, { "epoch": 0.3781782952821394, "grad_norm": 8.162139299355768, "learning_rate": 7.144093650757448e-06, "loss": 18.1407, "step": 20689 }, { "epoch": 0.3781965744785859, "grad_norm": 8.207625117832002, "learning_rate": 7.1438262329722816e-06, "loss": 17.5477, "step": 20690 }, { "epoch": 0.37821485367503244, "grad_norm": 6.423680900033887, "learning_rate": 7.143558807673191e-06, "loss": 17.316, "step": 20691 }, { "epoch": 0.37823313287147897, "grad_norm": 6.368076140486672, "learning_rate": 7.143291374861113e-06, "loss": 17.5855, "step": 20692 }, { "epoch": 0.3782514120679255, "grad_norm": 6.50904391012658, "learning_rate": 7.143023934536986e-06, "loss": 17.2793, "step": 20693 }, { "epoch": 0.37826969126437204, "grad_norm": 7.412766496640174, "learning_rate": 7.142756486701744e-06, "loss": 17.9571, "step": 20694 }, { "epoch": 0.3782879704608185, "grad_norm": 6.464074321628694, "learning_rate": 7.142489031356328e-06, "loss": 17.6333, "step": 20695 }, { "epoch": 0.37830624965726506, "grad_norm": 6.848667173751901, "learning_rate": 7.1422215685016725e-06, "loss": 17.7922, "step": 20696 }, { "epoch": 0.3783245288537116, "grad_norm": 6.335291254994485, "learning_rate": 7.141954098138717e-06, "loss": 17.5243, "step": 20697 }, { "epoch": 0.37834280805015813, "grad_norm": 5.87701609881068, "learning_rate": 7.1416866202683975e-06, "loss": 17.2739, "step": 20698 }, { "epoch": 0.37836108724660467, "grad_norm": 6.276542507763416, "learning_rate": 7.141419134891654e-06, "loss": 17.4585, "step": 20699 }, { "epoch": 0.37837936644305115, "grad_norm": 5.95125091657514, "learning_rate": 7.141151642009421e-06, "loss": 17.2629, "step": 20700 }, { "epoch": 0.3783976456394977, "grad_norm": 6.099762326585936, "learning_rate": 7.1408841416226375e-06, "loss": 17.2621, "step": 20701 }, { "epoch": 0.3784159248359442, "grad_norm": 6.472944595990493, "learning_rate": 7.14061663373224e-06, "loss": 17.6128, "step": 20702 }, { "epoch": 0.37843420403239075, "grad_norm": 7.422113554229637, "learning_rate": 7.1403491183391695e-06, "loss": 17.966, "step": 20703 }, { "epoch": 0.37845248322883723, "grad_norm": 6.347877429952918, "learning_rate": 7.140081595444359e-06, "loss": 17.2928, "step": 20704 }, { "epoch": 0.37847076242528377, "grad_norm": 6.016001465431868, "learning_rate": 7.139814065048748e-06, "loss": 17.2871, "step": 20705 }, { "epoch": 0.3784890416217303, "grad_norm": 6.682169672155171, "learning_rate": 7.139546527153275e-06, "loss": 17.435, "step": 20706 }, { "epoch": 0.37850732081817684, "grad_norm": 6.188439116903879, "learning_rate": 7.139278981758878e-06, "loss": 17.6462, "step": 20707 }, { "epoch": 0.3785256000146234, "grad_norm": 6.600287770503797, "learning_rate": 7.139011428866493e-06, "loss": 17.3779, "step": 20708 }, { "epoch": 0.37854387921106986, "grad_norm": 5.474955385579385, "learning_rate": 7.1387438684770585e-06, "loss": 16.9861, "step": 20709 }, { "epoch": 0.3785621584075164, "grad_norm": 5.5985919635142745, "learning_rate": 7.138476300591511e-06, "loss": 17.2531, "step": 20710 }, { "epoch": 0.3785804376039629, "grad_norm": 5.726156526690359, "learning_rate": 7.138208725210791e-06, "loss": 17.3002, "step": 20711 }, { "epoch": 0.37859871680040946, "grad_norm": 6.581332163686492, "learning_rate": 7.137941142335836e-06, "loss": 17.4097, "step": 20712 }, { "epoch": 0.378616995996856, "grad_norm": 7.045607385744825, "learning_rate": 7.137673551967581e-06, "loss": 17.8435, "step": 20713 }, { "epoch": 0.3786352751933025, "grad_norm": 6.638400931802244, "learning_rate": 7.1374059541069665e-06, "loss": 17.755, "step": 20714 }, { "epoch": 0.378653554389749, "grad_norm": 7.596801101096576, "learning_rate": 7.1371383487549296e-06, "loss": 17.7562, "step": 20715 }, { "epoch": 0.37867183358619555, "grad_norm": 9.420613093821757, "learning_rate": 7.136870735912408e-06, "loss": 18.0275, "step": 20716 }, { "epoch": 0.3786901127826421, "grad_norm": 6.237802438989488, "learning_rate": 7.13660311558034e-06, "loss": 17.5701, "step": 20717 }, { "epoch": 0.3787083919790886, "grad_norm": 8.624445529734015, "learning_rate": 7.136335487759664e-06, "loss": 18.3155, "step": 20718 }, { "epoch": 0.3787266711755351, "grad_norm": 7.878952553669444, "learning_rate": 7.136067852451316e-06, "loss": 18.0405, "step": 20719 }, { "epoch": 0.37874495037198164, "grad_norm": 4.998626687964603, "learning_rate": 7.135800209656238e-06, "loss": 16.9398, "step": 20720 }, { "epoch": 0.3787632295684282, "grad_norm": 6.383723286774549, "learning_rate": 7.135532559375364e-06, "loss": 17.6559, "step": 20721 }, { "epoch": 0.3787815087648747, "grad_norm": 8.815173822229411, "learning_rate": 7.135264901609633e-06, "loss": 18.904, "step": 20722 }, { "epoch": 0.37879978796132124, "grad_norm": 7.22992635093191, "learning_rate": 7.134997236359984e-06, "loss": 17.6525, "step": 20723 }, { "epoch": 0.3788180671577677, "grad_norm": 5.9283624355310165, "learning_rate": 7.134729563627356e-06, "loss": 17.2961, "step": 20724 }, { "epoch": 0.37883634635421426, "grad_norm": 7.719535222571975, "learning_rate": 7.134461883412686e-06, "loss": 17.9128, "step": 20725 }, { "epoch": 0.3788546255506608, "grad_norm": 7.733402061159149, "learning_rate": 7.134194195716912e-06, "loss": 17.5171, "step": 20726 }, { "epoch": 0.37887290474710733, "grad_norm": 6.445478746516123, "learning_rate": 7.133926500540973e-06, "loss": 17.4731, "step": 20727 }, { "epoch": 0.37889118394355387, "grad_norm": 6.5167505852322565, "learning_rate": 7.133658797885806e-06, "loss": 17.68, "step": 20728 }, { "epoch": 0.37890946314000035, "grad_norm": 5.421027084675409, "learning_rate": 7.1333910877523505e-06, "loss": 17.1143, "step": 20729 }, { "epoch": 0.3789277423364469, "grad_norm": 6.159280335343325, "learning_rate": 7.1331233701415445e-06, "loss": 17.4021, "step": 20730 }, { "epoch": 0.3789460215328934, "grad_norm": 6.331667450781991, "learning_rate": 7.132855645054326e-06, "loss": 17.3254, "step": 20731 }, { "epoch": 0.37896430072933995, "grad_norm": 5.962973632759887, "learning_rate": 7.1325879124916335e-06, "loss": 17.3799, "step": 20732 }, { "epoch": 0.3789825799257865, "grad_norm": 9.026509699935183, "learning_rate": 7.1323201724544054e-06, "loss": 17.743, "step": 20733 }, { "epoch": 0.37900085912223297, "grad_norm": 7.870083216701158, "learning_rate": 7.13205242494358e-06, "loss": 17.9164, "step": 20734 }, { "epoch": 0.3790191383186795, "grad_norm": 6.315996277779612, "learning_rate": 7.131784669960097e-06, "loss": 17.4389, "step": 20735 }, { "epoch": 0.37903741751512604, "grad_norm": 6.230132327519647, "learning_rate": 7.1315169075048925e-06, "loss": 17.5464, "step": 20736 }, { "epoch": 0.3790556967115726, "grad_norm": 6.512243512904325, "learning_rate": 7.131249137578906e-06, "loss": 17.4722, "step": 20737 }, { "epoch": 0.37907397590801906, "grad_norm": 6.23420620772002, "learning_rate": 7.130981360183078e-06, "loss": 17.5325, "step": 20738 }, { "epoch": 0.3790922551044656, "grad_norm": 6.393638833110737, "learning_rate": 7.130713575318343e-06, "loss": 17.6389, "step": 20739 }, { "epoch": 0.3791105343009121, "grad_norm": 6.95356601842442, "learning_rate": 7.130445782985643e-06, "loss": 17.6849, "step": 20740 }, { "epoch": 0.37912881349735866, "grad_norm": 7.321391171174789, "learning_rate": 7.1301779831859155e-06, "loss": 17.7158, "step": 20741 }, { "epoch": 0.3791470926938052, "grad_norm": 6.270581262878128, "learning_rate": 7.129910175920098e-06, "loss": 17.5712, "step": 20742 }, { "epoch": 0.3791653718902517, "grad_norm": 5.7544699683778076, "learning_rate": 7.129642361189131e-06, "loss": 17.1531, "step": 20743 }, { "epoch": 0.3791836510866982, "grad_norm": 5.94292761915387, "learning_rate": 7.129374538993952e-06, "loss": 17.3732, "step": 20744 }, { "epoch": 0.37920193028314475, "grad_norm": 6.793645401089376, "learning_rate": 7.129106709335502e-06, "loss": 17.7743, "step": 20745 }, { "epoch": 0.3792202094795913, "grad_norm": 5.669400652861999, "learning_rate": 7.128838872214714e-06, "loss": 17.4094, "step": 20746 }, { "epoch": 0.3792384886760378, "grad_norm": 6.0989341256543135, "learning_rate": 7.128571027632533e-06, "loss": 17.5634, "step": 20747 }, { "epoch": 0.3792567678724843, "grad_norm": 6.06441229343946, "learning_rate": 7.128303175589894e-06, "loss": 17.439, "step": 20748 }, { "epoch": 0.37927504706893084, "grad_norm": 6.35104589498538, "learning_rate": 7.128035316087738e-06, "loss": 17.6118, "step": 20749 }, { "epoch": 0.3792933262653774, "grad_norm": 7.39298736376242, "learning_rate": 7.127767449127003e-06, "loss": 17.9437, "step": 20750 }, { "epoch": 0.3793116054618239, "grad_norm": 9.08367253886659, "learning_rate": 7.127499574708626e-06, "loss": 18.5506, "step": 20751 }, { "epoch": 0.37932988465827044, "grad_norm": 6.919256850079278, "learning_rate": 7.12723169283355e-06, "loss": 17.8666, "step": 20752 }, { "epoch": 0.3793481638547169, "grad_norm": 7.263917032680766, "learning_rate": 7.126963803502711e-06, "loss": 17.9848, "step": 20753 }, { "epoch": 0.37936644305116346, "grad_norm": 6.694937443856486, "learning_rate": 7.126695906717047e-06, "loss": 17.6934, "step": 20754 }, { "epoch": 0.37938472224761, "grad_norm": 7.969832870115532, "learning_rate": 7.1264280024775e-06, "loss": 18.1185, "step": 20755 }, { "epoch": 0.37940300144405653, "grad_norm": 6.4036621065899, "learning_rate": 7.126160090785006e-06, "loss": 17.5468, "step": 20756 }, { "epoch": 0.37942128064050307, "grad_norm": 5.827606480174566, "learning_rate": 7.125892171640506e-06, "loss": 17.217, "step": 20757 }, { "epoch": 0.37943955983694955, "grad_norm": 5.506119830132652, "learning_rate": 7.12562424504494e-06, "loss": 17.0894, "step": 20758 }, { "epoch": 0.3794578390333961, "grad_norm": 7.0339410972391745, "learning_rate": 7.125356310999243e-06, "loss": 17.8281, "step": 20759 }, { "epoch": 0.3794761182298426, "grad_norm": 6.1191833339710815, "learning_rate": 7.125088369504357e-06, "loss": 17.2776, "step": 20760 }, { "epoch": 0.37949439742628915, "grad_norm": 6.140600884159622, "learning_rate": 7.124820420561221e-06, "loss": 17.3743, "step": 20761 }, { "epoch": 0.3795126766227357, "grad_norm": 7.652709346580124, "learning_rate": 7.124552464170775e-06, "loss": 18.0969, "step": 20762 }, { "epoch": 0.37953095581918217, "grad_norm": 5.975453658052, "learning_rate": 7.124284500333955e-06, "loss": 17.5479, "step": 20763 }, { "epoch": 0.3795492350156287, "grad_norm": 5.893502341608432, "learning_rate": 7.124016529051703e-06, "loss": 17.3345, "step": 20764 }, { "epoch": 0.37956751421207524, "grad_norm": 6.661394297806554, "learning_rate": 7.1237485503249585e-06, "loss": 17.5253, "step": 20765 }, { "epoch": 0.3795857934085218, "grad_norm": 5.5381015433825596, "learning_rate": 7.123480564154659e-06, "loss": 17.033, "step": 20766 }, { "epoch": 0.3796040726049683, "grad_norm": 6.238925559423806, "learning_rate": 7.123212570541743e-06, "loss": 17.5199, "step": 20767 }, { "epoch": 0.3796223518014148, "grad_norm": 6.2780918759583795, "learning_rate": 7.122944569487153e-06, "loss": 17.4589, "step": 20768 }, { "epoch": 0.37964063099786133, "grad_norm": 5.659419884527585, "learning_rate": 7.122676560991826e-06, "loss": 17.1041, "step": 20769 }, { "epoch": 0.37965891019430786, "grad_norm": 7.404834297493874, "learning_rate": 7.1224085450567e-06, "loss": 17.856, "step": 20770 }, { "epoch": 0.3796771893907544, "grad_norm": 6.210689993677177, "learning_rate": 7.122140521682719e-06, "loss": 17.309, "step": 20771 }, { "epoch": 0.3796954685872009, "grad_norm": 4.809070659389534, "learning_rate": 7.121872490870818e-06, "loss": 16.861, "step": 20772 }, { "epoch": 0.3797137477836474, "grad_norm": 7.589711016124053, "learning_rate": 7.1216044526219375e-06, "loss": 17.6736, "step": 20773 }, { "epoch": 0.37973202698009395, "grad_norm": 6.303364912652323, "learning_rate": 7.121336406937018e-06, "loss": 17.6306, "step": 20774 }, { "epoch": 0.3797503061765405, "grad_norm": 7.443107831532645, "learning_rate": 7.121068353817e-06, "loss": 18.2774, "step": 20775 }, { "epoch": 0.379768585372987, "grad_norm": 6.809614136229891, "learning_rate": 7.120800293262821e-06, "loss": 17.7834, "step": 20776 }, { "epoch": 0.3797868645694335, "grad_norm": 5.856678594502468, "learning_rate": 7.1205322252754206e-06, "loss": 17.2173, "step": 20777 }, { "epoch": 0.37980514376588004, "grad_norm": 7.222293962955577, "learning_rate": 7.120264149855738e-06, "loss": 17.7268, "step": 20778 }, { "epoch": 0.3798234229623266, "grad_norm": 5.724796100158445, "learning_rate": 7.119996067004714e-06, "loss": 17.3644, "step": 20779 }, { "epoch": 0.3798417021587731, "grad_norm": 5.224426821291531, "learning_rate": 7.119727976723289e-06, "loss": 17.0199, "step": 20780 }, { "epoch": 0.37985998135521964, "grad_norm": 6.378530739003206, "learning_rate": 7.119459879012399e-06, "loss": 17.5581, "step": 20781 }, { "epoch": 0.3798782605516661, "grad_norm": 7.493453941456023, "learning_rate": 7.119191773872988e-06, "loss": 17.8533, "step": 20782 }, { "epoch": 0.37989653974811266, "grad_norm": 7.181188667677754, "learning_rate": 7.118923661305992e-06, "loss": 17.936, "step": 20783 }, { "epoch": 0.3799148189445592, "grad_norm": 6.825187861916647, "learning_rate": 7.118655541312354e-06, "loss": 17.8693, "step": 20784 }, { "epoch": 0.37993309814100573, "grad_norm": 6.078822640548004, "learning_rate": 7.118387413893011e-06, "loss": 17.4699, "step": 20785 }, { "epoch": 0.37995137733745227, "grad_norm": 6.949376025687311, "learning_rate": 7.118119279048905e-06, "loss": 17.6847, "step": 20786 }, { "epoch": 0.37996965653389875, "grad_norm": 6.2332728126778605, "learning_rate": 7.117851136780974e-06, "loss": 17.6239, "step": 20787 }, { "epoch": 0.3799879357303453, "grad_norm": 10.07927838175929, "learning_rate": 7.1175829870901595e-06, "loss": 18.2738, "step": 20788 }, { "epoch": 0.3800062149267918, "grad_norm": 6.965522917367423, "learning_rate": 7.1173148299774e-06, "loss": 17.8364, "step": 20789 }, { "epoch": 0.38002449412323835, "grad_norm": 6.24263074851901, "learning_rate": 7.117046665443635e-06, "loss": 17.5051, "step": 20790 }, { "epoch": 0.3800427733196849, "grad_norm": 6.529845780698912, "learning_rate": 7.1167784934898044e-06, "loss": 17.8506, "step": 20791 }, { "epoch": 0.38006105251613137, "grad_norm": 6.713118413498703, "learning_rate": 7.116510314116851e-06, "loss": 17.42, "step": 20792 }, { "epoch": 0.3800793317125779, "grad_norm": 5.7589875249438425, "learning_rate": 7.116242127325712e-06, "loss": 17.3183, "step": 20793 }, { "epoch": 0.38009761090902444, "grad_norm": 5.604574003598077, "learning_rate": 7.115973933117328e-06, "loss": 17.4576, "step": 20794 }, { "epoch": 0.380115890105471, "grad_norm": 5.863492297018321, "learning_rate": 7.115705731492638e-06, "loss": 17.2632, "step": 20795 }, { "epoch": 0.3801341693019175, "grad_norm": 7.712852658319658, "learning_rate": 7.115437522452584e-06, "loss": 17.8007, "step": 20796 }, { "epoch": 0.380152448498364, "grad_norm": 5.439655829759279, "learning_rate": 7.115169305998104e-06, "loss": 16.9575, "step": 20797 }, { "epoch": 0.38017072769481053, "grad_norm": 6.5774090196801, "learning_rate": 7.1149010821301404e-06, "loss": 17.4319, "step": 20798 }, { "epoch": 0.38018900689125706, "grad_norm": 6.539854924193836, "learning_rate": 7.1146328508496325e-06, "loss": 17.3212, "step": 20799 }, { "epoch": 0.3802072860877036, "grad_norm": 6.136157464951033, "learning_rate": 7.1143646121575185e-06, "loss": 17.4212, "step": 20800 }, { "epoch": 0.38022556528415014, "grad_norm": 5.321499503310928, "learning_rate": 7.1140963660547394e-06, "loss": 17.2692, "step": 20801 }, { "epoch": 0.3802438444805966, "grad_norm": 5.402814100503582, "learning_rate": 7.113828112542237e-06, "loss": 17.0028, "step": 20802 }, { "epoch": 0.38026212367704315, "grad_norm": 7.2308965458430885, "learning_rate": 7.1135598516209515e-06, "loss": 17.8786, "step": 20803 }, { "epoch": 0.3802804028734897, "grad_norm": 9.61500681048284, "learning_rate": 7.113291583291821e-06, "loss": 18.4204, "step": 20804 }, { "epoch": 0.3802986820699362, "grad_norm": 6.106425624782197, "learning_rate": 7.113023307555786e-06, "loss": 17.2703, "step": 20805 }, { "epoch": 0.3803169612663827, "grad_norm": 6.99745351777501, "learning_rate": 7.1127550244137885e-06, "loss": 17.6546, "step": 20806 }, { "epoch": 0.38033524046282924, "grad_norm": 6.538338269757374, "learning_rate": 7.112486733866769e-06, "loss": 17.8167, "step": 20807 }, { "epoch": 0.3803535196592758, "grad_norm": 7.364576068447406, "learning_rate": 7.112218435915667e-06, "loss": 17.8228, "step": 20808 }, { "epoch": 0.3803717988557223, "grad_norm": 6.757236656425216, "learning_rate": 7.111950130561421e-06, "loss": 17.7835, "step": 20809 }, { "epoch": 0.38039007805216885, "grad_norm": 5.933350921832651, "learning_rate": 7.111681817804974e-06, "loss": 17.1217, "step": 20810 }, { "epoch": 0.3804083572486153, "grad_norm": 6.232631490358939, "learning_rate": 7.111413497647265e-06, "loss": 17.4871, "step": 20811 }, { "epoch": 0.38042663644506186, "grad_norm": 6.275613415622698, "learning_rate": 7.1111451700892355e-06, "loss": 17.231, "step": 20812 }, { "epoch": 0.3804449156415084, "grad_norm": 6.23796317998585, "learning_rate": 7.110876835131824e-06, "loss": 17.502, "step": 20813 }, { "epoch": 0.38046319483795493, "grad_norm": 6.2602286933265905, "learning_rate": 7.110608492775974e-06, "loss": 17.3774, "step": 20814 }, { "epoch": 0.38048147403440147, "grad_norm": 6.931666805961042, "learning_rate": 7.1103401430226225e-06, "loss": 17.7166, "step": 20815 }, { "epoch": 0.38049975323084795, "grad_norm": 6.035905670956372, "learning_rate": 7.1100717858727145e-06, "loss": 17.2214, "step": 20816 }, { "epoch": 0.3805180324272945, "grad_norm": 5.7084629547077235, "learning_rate": 7.109803421327187e-06, "loss": 17.2142, "step": 20817 }, { "epoch": 0.380536311623741, "grad_norm": 8.142998894952107, "learning_rate": 7.1095350493869795e-06, "loss": 18.3317, "step": 20818 }, { "epoch": 0.38055459082018755, "grad_norm": 6.762787633424514, "learning_rate": 7.109266670053036e-06, "loss": 17.8856, "step": 20819 }, { "epoch": 0.3805728700166341, "grad_norm": 5.078895769171451, "learning_rate": 7.108998283326298e-06, "loss": 17.0001, "step": 20820 }, { "epoch": 0.38059114921308057, "grad_norm": 6.911115253979119, "learning_rate": 7.1087298892077015e-06, "loss": 17.8457, "step": 20821 }, { "epoch": 0.3806094284095271, "grad_norm": 6.12260812313701, "learning_rate": 7.108461487698192e-06, "loss": 17.577, "step": 20822 }, { "epoch": 0.38062770760597364, "grad_norm": 6.335144243529556, "learning_rate": 7.108193078798704e-06, "loss": 17.1269, "step": 20823 }, { "epoch": 0.3806459868024202, "grad_norm": 6.537416389310044, "learning_rate": 7.107924662510186e-06, "loss": 17.5166, "step": 20824 }, { "epoch": 0.3806642659988667, "grad_norm": 7.255772304525292, "learning_rate": 7.107656238833573e-06, "loss": 17.5818, "step": 20825 }, { "epoch": 0.3806825451953132, "grad_norm": 6.587597096182242, "learning_rate": 7.10738780776981e-06, "loss": 17.718, "step": 20826 }, { "epoch": 0.38070082439175973, "grad_norm": 6.450749813985822, "learning_rate": 7.107119369319834e-06, "loss": 17.4572, "step": 20827 }, { "epoch": 0.38071910358820626, "grad_norm": 9.730322542962446, "learning_rate": 7.106850923484587e-06, "loss": 18.7782, "step": 20828 }, { "epoch": 0.3807373827846528, "grad_norm": 6.702658270530379, "learning_rate": 7.106582470265011e-06, "loss": 17.5062, "step": 20829 }, { "epoch": 0.38075566198109934, "grad_norm": 6.741417519344889, "learning_rate": 7.1063140096620455e-06, "loss": 17.8569, "step": 20830 }, { "epoch": 0.3807739411775458, "grad_norm": 6.835549574482659, "learning_rate": 7.106045541676634e-06, "loss": 17.7487, "step": 20831 }, { "epoch": 0.38079222037399235, "grad_norm": 5.835097889779242, "learning_rate": 7.105777066309714e-06, "loss": 17.3014, "step": 20832 }, { "epoch": 0.3808104995704389, "grad_norm": 8.640088946506342, "learning_rate": 7.105508583562227e-06, "loss": 18.1967, "step": 20833 }, { "epoch": 0.3808287787668854, "grad_norm": 6.4034437865262674, "learning_rate": 7.105240093435118e-06, "loss": 17.1212, "step": 20834 }, { "epoch": 0.38084705796333196, "grad_norm": 5.855337970033611, "learning_rate": 7.104971595929324e-06, "loss": 17.2914, "step": 20835 }, { "epoch": 0.38086533715977844, "grad_norm": 6.656582309527312, "learning_rate": 7.1047030910457874e-06, "loss": 17.8303, "step": 20836 }, { "epoch": 0.380883616356225, "grad_norm": 6.657019482079074, "learning_rate": 7.104434578785448e-06, "loss": 17.9462, "step": 20837 }, { "epoch": 0.3809018955526715, "grad_norm": 6.17664984070476, "learning_rate": 7.104166059149249e-06, "loss": 17.4421, "step": 20838 }, { "epoch": 0.38092017474911805, "grad_norm": 5.880778338538877, "learning_rate": 7.10389753213813e-06, "loss": 17.3208, "step": 20839 }, { "epoch": 0.3809384539455645, "grad_norm": 4.631100250135552, "learning_rate": 7.103628997753033e-06, "loss": 16.809, "step": 20840 }, { "epoch": 0.38095673314201106, "grad_norm": 7.10246331981957, "learning_rate": 7.1033604559948985e-06, "loss": 17.8819, "step": 20841 }, { "epoch": 0.3809750123384576, "grad_norm": 6.2676708374018135, "learning_rate": 7.103091906864669e-06, "loss": 17.5006, "step": 20842 }, { "epoch": 0.38099329153490413, "grad_norm": 6.668340103982635, "learning_rate": 7.102823350363283e-06, "loss": 17.5777, "step": 20843 }, { "epoch": 0.38101157073135067, "grad_norm": 6.365695726242811, "learning_rate": 7.102554786491687e-06, "loss": 17.5829, "step": 20844 }, { "epoch": 0.38102984992779715, "grad_norm": 6.769414572402143, "learning_rate": 7.102286215250815e-06, "loss": 17.6714, "step": 20845 }, { "epoch": 0.3810481291242437, "grad_norm": 6.226691924059086, "learning_rate": 7.102017636641615e-06, "loss": 17.8598, "step": 20846 }, { "epoch": 0.3810664083206902, "grad_norm": 5.4387356711989865, "learning_rate": 7.101749050665024e-06, "loss": 17.2436, "step": 20847 }, { "epoch": 0.38108468751713676, "grad_norm": 6.852695570490196, "learning_rate": 7.101480457321987e-06, "loss": 17.6438, "step": 20848 }, { "epoch": 0.3811029667135833, "grad_norm": 6.629804302980361, "learning_rate": 7.101211856613442e-06, "loss": 17.5632, "step": 20849 }, { "epoch": 0.38112124591002977, "grad_norm": 6.657351809764172, "learning_rate": 7.100943248540331e-06, "loss": 17.7071, "step": 20850 }, { "epoch": 0.3811395251064763, "grad_norm": 7.011585648378357, "learning_rate": 7.100674633103597e-06, "loss": 17.6329, "step": 20851 }, { "epoch": 0.38115780430292284, "grad_norm": 7.160553696981467, "learning_rate": 7.100406010304182e-06, "loss": 17.6871, "step": 20852 }, { "epoch": 0.3811760834993694, "grad_norm": 7.141567162756561, "learning_rate": 7.100137380143025e-06, "loss": 18.0261, "step": 20853 }, { "epoch": 0.3811943626958159, "grad_norm": 6.259878955116255, "learning_rate": 7.099868742621069e-06, "loss": 17.3837, "step": 20854 }, { "epoch": 0.3812126418922624, "grad_norm": 5.492887614629066, "learning_rate": 7.099600097739254e-06, "loss": 17.3764, "step": 20855 }, { "epoch": 0.38123092108870893, "grad_norm": 7.144588748872126, "learning_rate": 7.099331445498524e-06, "loss": 17.9633, "step": 20856 }, { "epoch": 0.38124920028515547, "grad_norm": 7.159360110593641, "learning_rate": 7.0990627858998195e-06, "loss": 17.9482, "step": 20857 }, { "epoch": 0.381267479481602, "grad_norm": 7.675817932055794, "learning_rate": 7.098794118944083e-06, "loss": 18.113, "step": 20858 }, { "epoch": 0.38128575867804854, "grad_norm": 5.633699805040702, "learning_rate": 7.098525444632253e-06, "loss": 17.3153, "step": 20859 }, { "epoch": 0.381304037874495, "grad_norm": 6.470043418287717, "learning_rate": 7.0982567629652744e-06, "loss": 17.4714, "step": 20860 }, { "epoch": 0.38132231707094155, "grad_norm": 6.768151848349427, "learning_rate": 7.0979880739440876e-06, "loss": 17.6138, "step": 20861 }, { "epoch": 0.3813405962673881, "grad_norm": 6.104392527320515, "learning_rate": 7.0977193775696366e-06, "loss": 17.4135, "step": 20862 }, { "epoch": 0.3813588754638346, "grad_norm": 5.745199755207529, "learning_rate": 7.097450673842859e-06, "loss": 17.2317, "step": 20863 }, { "epoch": 0.38137715466028116, "grad_norm": 6.5674161376538756, "learning_rate": 7.097181962764699e-06, "loss": 17.8981, "step": 20864 }, { "epoch": 0.38139543385672764, "grad_norm": 7.043972020853318, "learning_rate": 7.0969132443361e-06, "loss": 17.7503, "step": 20865 }, { "epoch": 0.3814137130531742, "grad_norm": 6.367059956679154, "learning_rate": 7.096644518558e-06, "loss": 17.639, "step": 20866 }, { "epoch": 0.3814319922496207, "grad_norm": 6.702691715465437, "learning_rate": 7.096375785431344e-06, "loss": 17.6277, "step": 20867 }, { "epoch": 0.38145027144606725, "grad_norm": 6.207568160627037, "learning_rate": 7.096107044957072e-06, "loss": 17.6149, "step": 20868 }, { "epoch": 0.3814685506425138, "grad_norm": 6.595650471552085, "learning_rate": 7.095838297136127e-06, "loss": 17.7398, "step": 20869 }, { "epoch": 0.38148682983896026, "grad_norm": 6.134431622447001, "learning_rate": 7.095569541969451e-06, "loss": 17.3198, "step": 20870 }, { "epoch": 0.3815051090354068, "grad_norm": 6.07535366359923, "learning_rate": 7.095300779457986e-06, "loss": 17.5319, "step": 20871 }, { "epoch": 0.38152338823185333, "grad_norm": 5.8705506077538585, "learning_rate": 7.0950320096026735e-06, "loss": 17.4445, "step": 20872 }, { "epoch": 0.38154166742829987, "grad_norm": 6.168112308933465, "learning_rate": 7.094763232404454e-06, "loss": 17.3161, "step": 20873 }, { "epoch": 0.38155994662474635, "grad_norm": 5.920086473077339, "learning_rate": 7.094494447864272e-06, "loss": 17.2226, "step": 20874 }, { "epoch": 0.3815782258211929, "grad_norm": 6.572426053992683, "learning_rate": 7.094225655983069e-06, "loss": 17.3873, "step": 20875 }, { "epoch": 0.3815965050176394, "grad_norm": 7.497605754510795, "learning_rate": 7.093956856761788e-06, "loss": 17.387, "step": 20876 }, { "epoch": 0.38161478421408596, "grad_norm": 7.672030592204335, "learning_rate": 7.0936880502013685e-06, "loss": 17.9081, "step": 20877 }, { "epoch": 0.3816330634105325, "grad_norm": 4.961913623652882, "learning_rate": 7.093419236302753e-06, "loss": 16.7968, "step": 20878 }, { "epoch": 0.38165134260697897, "grad_norm": 5.825915175452671, "learning_rate": 7.093150415066887e-06, "loss": 17.1826, "step": 20879 }, { "epoch": 0.3816696218034255, "grad_norm": 5.778189478281201, "learning_rate": 7.09288158649471e-06, "loss": 17.192, "step": 20880 }, { "epoch": 0.38168790099987204, "grad_norm": 6.757521273809351, "learning_rate": 7.092612750587164e-06, "loss": 17.7086, "step": 20881 }, { "epoch": 0.3817061801963186, "grad_norm": 7.279928892138955, "learning_rate": 7.092343907345191e-06, "loss": 18.1617, "step": 20882 }, { "epoch": 0.3817244593927651, "grad_norm": 6.173932016935703, "learning_rate": 7.092075056769735e-06, "loss": 17.9202, "step": 20883 }, { "epoch": 0.3817427385892116, "grad_norm": 6.879067491651368, "learning_rate": 7.0918061988617386e-06, "loss": 17.9197, "step": 20884 }, { "epoch": 0.38176101778565813, "grad_norm": 6.638190201616912, "learning_rate": 7.091537333622142e-06, "loss": 17.7333, "step": 20885 }, { "epoch": 0.38177929698210467, "grad_norm": 5.592561299929222, "learning_rate": 7.0912684610518876e-06, "loss": 17.1227, "step": 20886 }, { "epoch": 0.3817975761785512, "grad_norm": 6.838690555546561, "learning_rate": 7.090999581151919e-06, "loss": 17.5876, "step": 20887 }, { "epoch": 0.38181585537499774, "grad_norm": 7.727658466566001, "learning_rate": 7.09073069392318e-06, "loss": 17.8524, "step": 20888 }, { "epoch": 0.3818341345714442, "grad_norm": 7.9605758252771865, "learning_rate": 7.09046179936661e-06, "loss": 18.4245, "step": 20889 }, { "epoch": 0.38185241376789075, "grad_norm": 4.929522106376273, "learning_rate": 7.090192897483154e-06, "loss": 16.8717, "step": 20890 }, { "epoch": 0.3818706929643373, "grad_norm": 7.145315426100771, "learning_rate": 7.089923988273752e-06, "loss": 17.7391, "step": 20891 }, { "epoch": 0.3818889721607838, "grad_norm": 5.938780832852384, "learning_rate": 7.089655071739347e-06, "loss": 17.4467, "step": 20892 }, { "epoch": 0.38190725135723036, "grad_norm": 6.16574190594442, "learning_rate": 7.089386147880885e-06, "loss": 17.5801, "step": 20893 }, { "epoch": 0.38192553055367684, "grad_norm": 6.957619290412913, "learning_rate": 7.089117216699304e-06, "loss": 17.8385, "step": 20894 }, { "epoch": 0.3819438097501234, "grad_norm": 5.634910750663889, "learning_rate": 7.088848278195548e-06, "loss": 17.4732, "step": 20895 }, { "epoch": 0.3819620889465699, "grad_norm": 12.784091532845126, "learning_rate": 7.088579332370561e-06, "loss": 18.0073, "step": 20896 }, { "epoch": 0.38198036814301645, "grad_norm": 5.366099207301944, "learning_rate": 7.088310379225285e-06, "loss": 17.1493, "step": 20897 }, { "epoch": 0.381998647339463, "grad_norm": 7.263435265105196, "learning_rate": 7.088041418760662e-06, "loss": 17.5792, "step": 20898 }, { "epoch": 0.38201692653590946, "grad_norm": 5.88373008069427, "learning_rate": 7.087772450977634e-06, "loss": 17.4534, "step": 20899 }, { "epoch": 0.382035205732356, "grad_norm": 7.455630310783019, "learning_rate": 7.087503475877145e-06, "loss": 17.9119, "step": 20900 }, { "epoch": 0.38205348492880253, "grad_norm": 5.745101603164724, "learning_rate": 7.087234493460139e-06, "loss": 17.0636, "step": 20901 }, { "epoch": 0.38207176412524907, "grad_norm": 6.147402314200255, "learning_rate": 7.086965503727556e-06, "loss": 17.4028, "step": 20902 }, { "epoch": 0.3820900433216956, "grad_norm": 7.93139180122912, "learning_rate": 7.086696506680342e-06, "loss": 18.0001, "step": 20903 }, { "epoch": 0.3821083225181421, "grad_norm": 5.751434835443962, "learning_rate": 7.086427502319435e-06, "loss": 17.4825, "step": 20904 }, { "epoch": 0.3821266017145886, "grad_norm": 10.527463834833005, "learning_rate": 7.0861584906457805e-06, "loss": 18.3152, "step": 20905 }, { "epoch": 0.38214488091103516, "grad_norm": 6.088116082350334, "learning_rate": 7.085889471660323e-06, "loss": 17.3969, "step": 20906 }, { "epoch": 0.3821631601074817, "grad_norm": 5.788142043169116, "learning_rate": 7.085620445364005e-06, "loss": 17.5298, "step": 20907 }, { "epoch": 0.38218143930392817, "grad_norm": 7.462126406792685, "learning_rate": 7.085351411757766e-06, "loss": 18.252, "step": 20908 }, { "epoch": 0.3821997185003747, "grad_norm": 6.409991001895903, "learning_rate": 7.085082370842553e-06, "loss": 17.4893, "step": 20909 }, { "epoch": 0.38221799769682124, "grad_norm": 6.865954809362901, "learning_rate": 7.084813322619306e-06, "loss": 17.9921, "step": 20910 }, { "epoch": 0.3822362768932678, "grad_norm": 6.856510409482297, "learning_rate": 7.08454426708897e-06, "loss": 17.4194, "step": 20911 }, { "epoch": 0.3822545560897143, "grad_norm": 4.615319540814306, "learning_rate": 7.0842752042524865e-06, "loss": 16.9605, "step": 20912 }, { "epoch": 0.3822728352861608, "grad_norm": 5.526644847407925, "learning_rate": 7.084006134110799e-06, "loss": 17.1899, "step": 20913 }, { "epoch": 0.38229111448260733, "grad_norm": 8.99895679572845, "learning_rate": 7.0837370566648525e-06, "loss": 17.4682, "step": 20914 }, { "epoch": 0.38230939367905387, "grad_norm": 6.301254732162111, "learning_rate": 7.0834679719155876e-06, "loss": 17.1955, "step": 20915 }, { "epoch": 0.3823276728755004, "grad_norm": 7.629743795603142, "learning_rate": 7.083198879863947e-06, "loss": 17.5275, "step": 20916 }, { "epoch": 0.38234595207194694, "grad_norm": 6.202724535045338, "learning_rate": 7.082929780510877e-06, "loss": 17.3454, "step": 20917 }, { "epoch": 0.3823642312683934, "grad_norm": 6.982099796758858, "learning_rate": 7.0826606738573175e-06, "loss": 17.5226, "step": 20918 }, { "epoch": 0.38238251046483995, "grad_norm": 6.245180649736514, "learning_rate": 7.082391559904213e-06, "loss": 17.4011, "step": 20919 }, { "epoch": 0.3824007896612865, "grad_norm": 6.604345590638832, "learning_rate": 7.082122438652508e-06, "loss": 17.5027, "step": 20920 }, { "epoch": 0.382419068857733, "grad_norm": 5.691258786852814, "learning_rate": 7.081853310103145e-06, "loss": 17.1142, "step": 20921 }, { "epoch": 0.38243734805417956, "grad_norm": 6.314341060201153, "learning_rate": 7.081584174257066e-06, "loss": 17.4148, "step": 20922 }, { "epoch": 0.38245562725062604, "grad_norm": 6.277514237526559, "learning_rate": 7.081315031115214e-06, "loss": 17.5835, "step": 20923 }, { "epoch": 0.3824739064470726, "grad_norm": 5.968445440806513, "learning_rate": 7.081045880678534e-06, "loss": 17.3819, "step": 20924 }, { "epoch": 0.3824921856435191, "grad_norm": 5.913309017293552, "learning_rate": 7.08077672294797e-06, "loss": 17.2318, "step": 20925 }, { "epoch": 0.38251046483996565, "grad_norm": 7.212968971941925, "learning_rate": 7.080507557924463e-06, "loss": 17.874, "step": 20926 }, { "epoch": 0.3825287440364122, "grad_norm": 5.659362249790801, "learning_rate": 7.080238385608958e-06, "loss": 17.3396, "step": 20927 }, { "epoch": 0.38254702323285866, "grad_norm": 7.303117907501463, "learning_rate": 7.079969206002397e-06, "loss": 17.5264, "step": 20928 }, { "epoch": 0.3825653024293052, "grad_norm": 7.148331468273093, "learning_rate": 7.079700019105725e-06, "loss": 18.0166, "step": 20929 }, { "epoch": 0.38258358162575173, "grad_norm": 6.2101605807832945, "learning_rate": 7.079430824919885e-06, "loss": 17.5685, "step": 20930 }, { "epoch": 0.38260186082219827, "grad_norm": 6.126608292160563, "learning_rate": 7.0791616234458215e-06, "loss": 17.3098, "step": 20931 }, { "epoch": 0.3826201400186448, "grad_norm": 6.2424096754175515, "learning_rate": 7.078892414684475e-06, "loss": 17.2135, "step": 20932 }, { "epoch": 0.3826384192150913, "grad_norm": 7.893451554677613, "learning_rate": 7.078623198636792e-06, "loss": 17.8848, "step": 20933 }, { "epoch": 0.3826566984115378, "grad_norm": 6.27278983088745, "learning_rate": 7.078353975303716e-06, "loss": 17.4218, "step": 20934 }, { "epoch": 0.38267497760798436, "grad_norm": 7.5959714124403614, "learning_rate": 7.078084744686189e-06, "loss": 18.019, "step": 20935 }, { "epoch": 0.3826932568044309, "grad_norm": 6.751028770933, "learning_rate": 7.077815506785154e-06, "loss": 17.5426, "step": 20936 }, { "epoch": 0.38271153600087743, "grad_norm": 5.352088787704339, "learning_rate": 7.077546261601556e-06, "loss": 17.0635, "step": 20937 }, { "epoch": 0.3827298151973239, "grad_norm": 6.524017412712978, "learning_rate": 7.077277009136341e-06, "loss": 17.6072, "step": 20938 }, { "epoch": 0.38274809439377044, "grad_norm": 5.923411217835149, "learning_rate": 7.077007749390448e-06, "loss": 17.1601, "step": 20939 }, { "epoch": 0.382766373590217, "grad_norm": 6.382600647904093, "learning_rate": 7.076738482364825e-06, "loss": 17.797, "step": 20940 }, { "epoch": 0.3827846527866635, "grad_norm": 7.349333360894832, "learning_rate": 7.076469208060412e-06, "loss": 18.4183, "step": 20941 }, { "epoch": 0.38280293198311, "grad_norm": 7.886549704741756, "learning_rate": 7.076199926478155e-06, "loss": 17.9815, "step": 20942 }, { "epoch": 0.38282121117955653, "grad_norm": 6.920208388927554, "learning_rate": 7.075930637618998e-06, "loss": 17.764, "step": 20943 }, { "epoch": 0.38283949037600307, "grad_norm": 8.381040262324833, "learning_rate": 7.075661341483884e-06, "loss": 18.1572, "step": 20944 }, { "epoch": 0.3828577695724496, "grad_norm": 6.618339656691645, "learning_rate": 7.0753920380737564e-06, "loss": 17.5145, "step": 20945 }, { "epoch": 0.38287604876889614, "grad_norm": 6.021925121301376, "learning_rate": 7.075122727389561e-06, "loss": 17.5195, "step": 20946 }, { "epoch": 0.3828943279653426, "grad_norm": 7.302842556870371, "learning_rate": 7.07485340943224e-06, "loss": 17.9682, "step": 20947 }, { "epoch": 0.38291260716178915, "grad_norm": 6.476085190963245, "learning_rate": 7.074584084202739e-06, "loss": 17.7174, "step": 20948 }, { "epoch": 0.3829308863582357, "grad_norm": 7.060712497037543, "learning_rate": 7.074314751702e-06, "loss": 17.7902, "step": 20949 }, { "epoch": 0.3829491655546822, "grad_norm": 7.439936953729566, "learning_rate": 7.0740454119309655e-06, "loss": 17.8011, "step": 20950 }, { "epoch": 0.38296744475112876, "grad_norm": 5.748107972841419, "learning_rate": 7.073776064890584e-06, "loss": 17.2943, "step": 20951 }, { "epoch": 0.38298572394757524, "grad_norm": 7.592641996812257, "learning_rate": 7.073506710581798e-06, "loss": 18.1216, "step": 20952 }, { "epoch": 0.3830040031440218, "grad_norm": 5.639567552391562, "learning_rate": 7.073237349005551e-06, "loss": 17.2927, "step": 20953 }, { "epoch": 0.3830222823404683, "grad_norm": 6.2018678368437605, "learning_rate": 7.072967980162785e-06, "loss": 17.395, "step": 20954 }, { "epoch": 0.38304056153691485, "grad_norm": 5.873436213169519, "learning_rate": 7.072698604054448e-06, "loss": 17.2147, "step": 20955 }, { "epoch": 0.3830588407333614, "grad_norm": 6.504260340309082, "learning_rate": 7.07242922068148e-06, "loss": 17.9237, "step": 20956 }, { "epoch": 0.38307711992980786, "grad_norm": 6.911863760797188, "learning_rate": 7.072159830044829e-06, "loss": 17.8015, "step": 20957 }, { "epoch": 0.3830953991262544, "grad_norm": 6.0197496330229265, "learning_rate": 7.071890432145438e-06, "loss": 17.5761, "step": 20958 }, { "epoch": 0.38311367832270093, "grad_norm": 5.974001537225322, "learning_rate": 7.07162102698425e-06, "loss": 17.3282, "step": 20959 }, { "epoch": 0.38313195751914747, "grad_norm": 7.8089125046639625, "learning_rate": 7.071351614562211e-06, "loss": 18.4462, "step": 20960 }, { "epoch": 0.383150236715594, "grad_norm": 6.679126497544561, "learning_rate": 7.071082194880263e-06, "loss": 17.5977, "step": 20961 }, { "epoch": 0.3831685159120405, "grad_norm": 7.571452136221873, "learning_rate": 7.070812767939353e-06, "loss": 17.9026, "step": 20962 }, { "epoch": 0.383186795108487, "grad_norm": 5.488059508087825, "learning_rate": 7.0705433337404235e-06, "loss": 17.1057, "step": 20963 }, { "epoch": 0.38320507430493356, "grad_norm": 5.754784046064573, "learning_rate": 7.070273892284418e-06, "loss": 17.1884, "step": 20964 }, { "epoch": 0.3832233535013801, "grad_norm": 6.562704278530298, "learning_rate": 7.0700044435722845e-06, "loss": 17.3791, "step": 20965 }, { "epoch": 0.38324163269782663, "grad_norm": 6.10723289146391, "learning_rate": 7.069734987604964e-06, "loss": 17.1413, "step": 20966 }, { "epoch": 0.3832599118942731, "grad_norm": 9.181396417149502, "learning_rate": 7.069465524383401e-06, "loss": 17.8588, "step": 20967 }, { "epoch": 0.38327819109071964, "grad_norm": 5.816902113994662, "learning_rate": 7.069196053908541e-06, "loss": 17.29, "step": 20968 }, { "epoch": 0.3832964702871662, "grad_norm": 7.247253740811549, "learning_rate": 7.0689265761813295e-06, "loss": 17.9448, "step": 20969 }, { "epoch": 0.3833147494836127, "grad_norm": 5.846369869770649, "learning_rate": 7.0686570912027095e-06, "loss": 17.5586, "step": 20970 }, { "epoch": 0.38333302868005925, "grad_norm": 6.9528831305859, "learning_rate": 7.068387598973626e-06, "loss": 17.7971, "step": 20971 }, { "epoch": 0.38335130787650573, "grad_norm": 5.561074919618863, "learning_rate": 7.068118099495023e-06, "loss": 16.9145, "step": 20972 }, { "epoch": 0.38336958707295227, "grad_norm": 6.119739518076784, "learning_rate": 7.067848592767845e-06, "loss": 17.3776, "step": 20973 }, { "epoch": 0.3833878662693988, "grad_norm": 7.926793988714482, "learning_rate": 7.0675790787930384e-06, "loss": 18.1583, "step": 20974 }, { "epoch": 0.38340614546584534, "grad_norm": 7.415874693286228, "learning_rate": 7.067309557571546e-06, "loss": 17.8907, "step": 20975 }, { "epoch": 0.3834244246622918, "grad_norm": 5.336722379137145, "learning_rate": 7.067040029104314e-06, "loss": 16.8849, "step": 20976 }, { "epoch": 0.38344270385873835, "grad_norm": 6.731954946186777, "learning_rate": 7.066770493392284e-06, "loss": 17.6902, "step": 20977 }, { "epoch": 0.3834609830551849, "grad_norm": 6.256213226265865, "learning_rate": 7.066500950436404e-06, "loss": 17.5907, "step": 20978 }, { "epoch": 0.3834792622516314, "grad_norm": 6.945786522373672, "learning_rate": 7.066231400237619e-06, "loss": 17.4622, "step": 20979 }, { "epoch": 0.38349754144807796, "grad_norm": 5.882857721699971, "learning_rate": 7.06596184279687e-06, "loss": 17.3546, "step": 20980 }, { "epoch": 0.38351582064452444, "grad_norm": 5.737339459003288, "learning_rate": 7.065692278115105e-06, "loss": 17.4939, "step": 20981 }, { "epoch": 0.383534099840971, "grad_norm": 6.914059546544488, "learning_rate": 7.065422706193266e-06, "loss": 17.869, "step": 20982 }, { "epoch": 0.3835523790374175, "grad_norm": 7.15039442679996, "learning_rate": 7.065153127032303e-06, "loss": 17.7811, "step": 20983 }, { "epoch": 0.38357065823386405, "grad_norm": 7.5436196844894905, "learning_rate": 7.064883540633155e-06, "loss": 17.8848, "step": 20984 }, { "epoch": 0.3835889374303106, "grad_norm": 8.22823033911563, "learning_rate": 7.06461394699677e-06, "loss": 18.5603, "step": 20985 }, { "epoch": 0.38360721662675706, "grad_norm": 6.363131251137945, "learning_rate": 7.064344346124092e-06, "loss": 17.3607, "step": 20986 }, { "epoch": 0.3836254958232036, "grad_norm": 5.632763651606355, "learning_rate": 7.064074738016067e-06, "loss": 17.3302, "step": 20987 }, { "epoch": 0.38364377501965014, "grad_norm": 6.683524758785473, "learning_rate": 7.063805122673638e-06, "loss": 17.463, "step": 20988 }, { "epoch": 0.38366205421609667, "grad_norm": 5.601327133997698, "learning_rate": 7.0635355000977525e-06, "loss": 17.3258, "step": 20989 }, { "epoch": 0.3836803334125432, "grad_norm": 5.834606755959688, "learning_rate": 7.063265870289353e-06, "loss": 17.2437, "step": 20990 }, { "epoch": 0.3836986126089897, "grad_norm": 7.335505367654194, "learning_rate": 7.062996233249385e-06, "loss": 17.736, "step": 20991 }, { "epoch": 0.3837168918054362, "grad_norm": 7.605727190558551, "learning_rate": 7.062726588978795e-06, "loss": 17.6628, "step": 20992 }, { "epoch": 0.38373517100188276, "grad_norm": 6.677220834372723, "learning_rate": 7.062456937478529e-06, "loss": 17.5839, "step": 20993 }, { "epoch": 0.3837534501983293, "grad_norm": 5.9987708403078175, "learning_rate": 7.062187278749528e-06, "loss": 17.1523, "step": 20994 }, { "epoch": 0.38377172939477583, "grad_norm": 5.824464208607177, "learning_rate": 7.0619176127927416e-06, "loss": 17.3179, "step": 20995 }, { "epoch": 0.3837900085912223, "grad_norm": 6.46492619817191, "learning_rate": 7.0616479396091105e-06, "loss": 17.6594, "step": 20996 }, { "epoch": 0.38380828778766884, "grad_norm": 7.486001052481146, "learning_rate": 7.061378259199585e-06, "loss": 18.256, "step": 20997 }, { "epoch": 0.3838265669841154, "grad_norm": 6.76620453305171, "learning_rate": 7.061108571565105e-06, "loss": 17.6748, "step": 20998 }, { "epoch": 0.3838448461805619, "grad_norm": 6.107454163429668, "learning_rate": 7.06083887670662e-06, "loss": 17.2595, "step": 20999 }, { "epoch": 0.38386312537700845, "grad_norm": 6.861673884222232, "learning_rate": 7.060569174625074e-06, "loss": 17.6715, "step": 21000 }, { "epoch": 0.38388140457345493, "grad_norm": 7.388862690006889, "learning_rate": 7.060299465321409e-06, "loss": 17.999, "step": 21001 }, { "epoch": 0.38389968376990147, "grad_norm": 6.952039206444979, "learning_rate": 7.060029748796575e-06, "loss": 17.8205, "step": 21002 }, { "epoch": 0.383917962966348, "grad_norm": 8.046804842311497, "learning_rate": 7.059760025051517e-06, "loss": 17.9271, "step": 21003 }, { "epoch": 0.38393624216279454, "grad_norm": 6.753743435534955, "learning_rate": 7.059490294087178e-06, "loss": 17.7938, "step": 21004 }, { "epoch": 0.3839545213592411, "grad_norm": 7.518158013897047, "learning_rate": 7.059220555904503e-06, "loss": 17.8388, "step": 21005 }, { "epoch": 0.38397280055568755, "grad_norm": 6.211908699222352, "learning_rate": 7.058950810504439e-06, "loss": 17.3581, "step": 21006 }, { "epoch": 0.3839910797521341, "grad_norm": 7.248228724588168, "learning_rate": 7.058681057887932e-06, "loss": 17.9582, "step": 21007 }, { "epoch": 0.3840093589485806, "grad_norm": 6.761188584964818, "learning_rate": 7.058411298055925e-06, "loss": 17.8539, "step": 21008 }, { "epoch": 0.38402763814502716, "grad_norm": 6.668129533612861, "learning_rate": 7.058141531009366e-06, "loss": 17.4327, "step": 21009 }, { "epoch": 0.38404591734147364, "grad_norm": 6.720147588197332, "learning_rate": 7.057871756749199e-06, "loss": 17.5705, "step": 21010 }, { "epoch": 0.3840641965379202, "grad_norm": 6.025626867685157, "learning_rate": 7.057601975276372e-06, "loss": 17.2125, "step": 21011 }, { "epoch": 0.3840824757343667, "grad_norm": 6.418059717440586, "learning_rate": 7.057332186591827e-06, "loss": 17.2908, "step": 21012 }, { "epoch": 0.38410075493081325, "grad_norm": 5.208297016391685, "learning_rate": 7.057062390696511e-06, "loss": 16.9275, "step": 21013 }, { "epoch": 0.3841190341272598, "grad_norm": 6.7694717826112605, "learning_rate": 7.05679258759137e-06, "loss": 17.4841, "step": 21014 }, { "epoch": 0.38413731332370626, "grad_norm": 5.318957700949014, "learning_rate": 7.056522777277349e-06, "loss": 16.9798, "step": 21015 }, { "epoch": 0.3841555925201528, "grad_norm": 6.8537494940230745, "learning_rate": 7.056252959755396e-06, "loss": 17.4746, "step": 21016 }, { "epoch": 0.38417387171659934, "grad_norm": 6.9875700824701035, "learning_rate": 7.055983135026454e-06, "loss": 17.7363, "step": 21017 }, { "epoch": 0.38419215091304587, "grad_norm": 6.606386917732638, "learning_rate": 7.055713303091467e-06, "loss": 17.5111, "step": 21018 }, { "epoch": 0.3842104301094924, "grad_norm": 5.8560429944783605, "learning_rate": 7.055443463951386e-06, "loss": 17.2994, "step": 21019 }, { "epoch": 0.3842287093059389, "grad_norm": 5.899752883657581, "learning_rate": 7.0551736176071525e-06, "loss": 17.3462, "step": 21020 }, { "epoch": 0.3842469885023854, "grad_norm": 5.719587758213765, "learning_rate": 7.054903764059716e-06, "loss": 17.4546, "step": 21021 }, { "epoch": 0.38426526769883196, "grad_norm": 6.360485085785115, "learning_rate": 7.054633903310018e-06, "loss": 17.416, "step": 21022 }, { "epoch": 0.3842835468952785, "grad_norm": 6.2388700986330194, "learning_rate": 7.054364035359007e-06, "loss": 17.3875, "step": 21023 }, { "epoch": 0.38430182609172503, "grad_norm": 7.730507789397206, "learning_rate": 7.05409416020763e-06, "loss": 18.2943, "step": 21024 }, { "epoch": 0.3843201052881715, "grad_norm": 7.365054977760105, "learning_rate": 7.053824277856829e-06, "loss": 18.2068, "step": 21025 }, { "epoch": 0.38433838448461805, "grad_norm": 7.222826076162248, "learning_rate": 7.053554388307553e-06, "loss": 17.8555, "step": 21026 }, { "epoch": 0.3843566636810646, "grad_norm": 7.403992404066004, "learning_rate": 7.0532844915607464e-06, "loss": 17.6077, "step": 21027 }, { "epoch": 0.3843749428775111, "grad_norm": 6.646387237157337, "learning_rate": 7.053014587617357e-06, "loss": 17.6762, "step": 21028 }, { "epoch": 0.38439322207395765, "grad_norm": 5.383995280193755, "learning_rate": 7.052744676478329e-06, "loss": 17.0154, "step": 21029 }, { "epoch": 0.38441150127040413, "grad_norm": 5.231586236215032, "learning_rate": 7.05247475814461e-06, "loss": 17.0177, "step": 21030 }, { "epoch": 0.38442978046685067, "grad_norm": 6.022003660024721, "learning_rate": 7.0522048326171446e-06, "loss": 17.5111, "step": 21031 }, { "epoch": 0.3844480596632972, "grad_norm": 6.961967428506571, "learning_rate": 7.051934899896877e-06, "loss": 17.5252, "step": 21032 }, { "epoch": 0.38446633885974374, "grad_norm": 6.121851983504221, "learning_rate": 7.051664959984757e-06, "loss": 17.6957, "step": 21033 }, { "epoch": 0.3844846180561903, "grad_norm": 7.502628993346572, "learning_rate": 7.051395012881732e-06, "loss": 18.1055, "step": 21034 }, { "epoch": 0.38450289725263675, "grad_norm": 6.937053189562557, "learning_rate": 7.051125058588744e-06, "loss": 17.5169, "step": 21035 }, { "epoch": 0.3845211764490833, "grad_norm": 6.425997547010063, "learning_rate": 7.0508550971067395e-06, "loss": 17.5367, "step": 21036 }, { "epoch": 0.3845394556455298, "grad_norm": 7.543885421665368, "learning_rate": 7.050585128436664e-06, "loss": 17.9488, "step": 21037 }, { "epoch": 0.38455773484197636, "grad_norm": 5.393873116929426, "learning_rate": 7.05031515257947e-06, "loss": 17.1293, "step": 21038 }, { "epoch": 0.3845760140384229, "grad_norm": 7.6006476187830305, "learning_rate": 7.0500451695360974e-06, "loss": 18.0129, "step": 21039 }, { "epoch": 0.3845942932348694, "grad_norm": 6.896454748473229, "learning_rate": 7.049775179307494e-06, "loss": 17.8704, "step": 21040 }, { "epoch": 0.3846125724313159, "grad_norm": 7.847156974161075, "learning_rate": 7.049505181894607e-06, "loss": 17.9775, "step": 21041 }, { "epoch": 0.38463085162776245, "grad_norm": 7.167280215081267, "learning_rate": 7.049235177298381e-06, "loss": 17.628, "step": 21042 }, { "epoch": 0.384649130824209, "grad_norm": 4.947757102027752, "learning_rate": 7.048965165519764e-06, "loss": 16.8362, "step": 21043 }, { "epoch": 0.38466741002065546, "grad_norm": 6.1773599780632, "learning_rate": 7.048695146559703e-06, "loss": 17.6304, "step": 21044 }, { "epoch": 0.384685689217102, "grad_norm": 7.392166611133998, "learning_rate": 7.048425120419142e-06, "loss": 17.799, "step": 21045 }, { "epoch": 0.38470396841354854, "grad_norm": 7.947934466412802, "learning_rate": 7.04815508709903e-06, "loss": 17.9185, "step": 21046 }, { "epoch": 0.38472224760999507, "grad_norm": 6.3371784457134375, "learning_rate": 7.047885046600311e-06, "loss": 17.5585, "step": 21047 }, { "epoch": 0.3847405268064416, "grad_norm": 6.35844089344265, "learning_rate": 7.047614998923934e-06, "loss": 17.6056, "step": 21048 }, { "epoch": 0.3847588060028881, "grad_norm": 6.371209181819641, "learning_rate": 7.047344944070843e-06, "loss": 17.772, "step": 21049 }, { "epoch": 0.3847770851993346, "grad_norm": 9.318432427827792, "learning_rate": 7.047074882041986e-06, "loss": 18.2345, "step": 21050 }, { "epoch": 0.38479536439578116, "grad_norm": 6.422751603195068, "learning_rate": 7.046804812838308e-06, "loss": 17.4365, "step": 21051 }, { "epoch": 0.3848136435922277, "grad_norm": 5.859817860410559, "learning_rate": 7.046534736460758e-06, "loss": 17.2159, "step": 21052 }, { "epoch": 0.38483192278867423, "grad_norm": 7.197963911762869, "learning_rate": 7.046264652910282e-06, "loss": 17.7393, "step": 21053 }, { "epoch": 0.3848502019851207, "grad_norm": 7.672084740455577, "learning_rate": 7.045994562187825e-06, "loss": 17.8146, "step": 21054 }, { "epoch": 0.38486848118156725, "grad_norm": 6.565158908516528, "learning_rate": 7.045724464294335e-06, "loss": 17.7477, "step": 21055 }, { "epoch": 0.3848867603780138, "grad_norm": 7.8717063409463055, "learning_rate": 7.045454359230757e-06, "loss": 18.0792, "step": 21056 }, { "epoch": 0.3849050395744603, "grad_norm": 5.79232056568044, "learning_rate": 7.04518424699804e-06, "loss": 17.4529, "step": 21057 }, { "epoch": 0.38492331877090685, "grad_norm": 6.33561194468226, "learning_rate": 7.044914127597131e-06, "loss": 17.6643, "step": 21058 }, { "epoch": 0.38494159796735333, "grad_norm": 6.785048526549523, "learning_rate": 7.044644001028973e-06, "loss": 17.7654, "step": 21059 }, { "epoch": 0.38495987716379987, "grad_norm": 8.698879900998087, "learning_rate": 7.044373867294516e-06, "loss": 18.3851, "step": 21060 }, { "epoch": 0.3849781563602464, "grad_norm": 6.081322171523658, "learning_rate": 7.044103726394706e-06, "loss": 17.4652, "step": 21061 }, { "epoch": 0.38499643555669294, "grad_norm": 7.096132129346076, "learning_rate": 7.04383357833049e-06, "loss": 17.8868, "step": 21062 }, { "epoch": 0.3850147147531395, "grad_norm": 6.499055407332533, "learning_rate": 7.043563423102815e-06, "loss": 17.4621, "step": 21063 }, { "epoch": 0.38503299394958596, "grad_norm": 5.897318297174493, "learning_rate": 7.043293260712627e-06, "loss": 17.4509, "step": 21064 }, { "epoch": 0.3850512731460325, "grad_norm": 7.0679735364353835, "learning_rate": 7.043023091160875e-06, "loss": 17.8111, "step": 21065 }, { "epoch": 0.385069552342479, "grad_norm": 5.8669612247424086, "learning_rate": 7.042752914448502e-06, "loss": 17.293, "step": 21066 }, { "epoch": 0.38508783153892556, "grad_norm": 7.397229059389791, "learning_rate": 7.042482730576459e-06, "loss": 17.7818, "step": 21067 }, { "epoch": 0.3851061107353721, "grad_norm": 6.583045133857047, "learning_rate": 7.04221253954569e-06, "loss": 17.7877, "step": 21068 }, { "epoch": 0.3851243899318186, "grad_norm": 6.318748984029918, "learning_rate": 7.041942341357144e-06, "loss": 17.4063, "step": 21069 }, { "epoch": 0.3851426691282651, "grad_norm": 6.166478127521871, "learning_rate": 7.0416721360117666e-06, "loss": 17.7328, "step": 21070 }, { "epoch": 0.38516094832471165, "grad_norm": 5.73594155656501, "learning_rate": 7.041401923510505e-06, "loss": 17.2638, "step": 21071 }, { "epoch": 0.3851792275211582, "grad_norm": 7.858864102035168, "learning_rate": 7.0411317038543095e-06, "loss": 17.9374, "step": 21072 }, { "epoch": 0.3851975067176047, "grad_norm": 5.148517394924511, "learning_rate": 7.040861477044122e-06, "loss": 16.8221, "step": 21073 }, { "epoch": 0.3852157859140512, "grad_norm": 7.215845543892201, "learning_rate": 7.040591243080893e-06, "loss": 18.0409, "step": 21074 }, { "epoch": 0.38523406511049774, "grad_norm": 7.545392211958562, "learning_rate": 7.040321001965569e-06, "loss": 17.8653, "step": 21075 }, { "epoch": 0.38525234430694427, "grad_norm": 6.430969382890963, "learning_rate": 7.040050753699097e-06, "loss": 17.3215, "step": 21076 }, { "epoch": 0.3852706235033908, "grad_norm": 6.494384680228185, "learning_rate": 7.039780498282422e-06, "loss": 17.5879, "step": 21077 }, { "epoch": 0.3852889026998373, "grad_norm": 6.95596397269451, "learning_rate": 7.039510235716496e-06, "loss": 17.7674, "step": 21078 }, { "epoch": 0.3853071818962838, "grad_norm": 5.629432299225228, "learning_rate": 7.039239966002264e-06, "loss": 17.2038, "step": 21079 }, { "epoch": 0.38532546109273036, "grad_norm": 6.775178413315715, "learning_rate": 7.038969689140671e-06, "loss": 17.683, "step": 21080 }, { "epoch": 0.3853437402891769, "grad_norm": 6.248519359035783, "learning_rate": 7.038699405132668e-06, "loss": 17.2174, "step": 21081 }, { "epoch": 0.38536201948562343, "grad_norm": 6.120305207682711, "learning_rate": 7.0384291139791975e-06, "loss": 17.4106, "step": 21082 }, { "epoch": 0.3853802986820699, "grad_norm": 5.493229917477497, "learning_rate": 7.038158815681213e-06, "loss": 17.1191, "step": 21083 }, { "epoch": 0.38539857787851645, "grad_norm": 7.91059832809418, "learning_rate": 7.037888510239657e-06, "loss": 18.056, "step": 21084 }, { "epoch": 0.385416857074963, "grad_norm": 5.522442157148008, "learning_rate": 7.037618197655479e-06, "loss": 17.2022, "step": 21085 }, { "epoch": 0.3854351362714095, "grad_norm": 6.181231408716889, "learning_rate": 7.037347877929626e-06, "loss": 17.4109, "step": 21086 }, { "epoch": 0.38545341546785605, "grad_norm": 6.892857205373257, "learning_rate": 7.037077551063045e-06, "loss": 17.3751, "step": 21087 }, { "epoch": 0.38547169466430253, "grad_norm": 5.573036233274179, "learning_rate": 7.036807217056685e-06, "loss": 17.1242, "step": 21088 }, { "epoch": 0.38548997386074907, "grad_norm": 5.633173271976434, "learning_rate": 7.036536875911492e-06, "loss": 16.9751, "step": 21089 }, { "epoch": 0.3855082530571956, "grad_norm": 7.079818367249521, "learning_rate": 7.036266527628415e-06, "loss": 18.0067, "step": 21090 }, { "epoch": 0.38552653225364214, "grad_norm": 6.997568015876288, "learning_rate": 7.035996172208398e-06, "loss": 17.6373, "step": 21091 }, { "epoch": 0.3855448114500887, "grad_norm": 7.057284878058664, "learning_rate": 7.035725809652392e-06, "loss": 17.9884, "step": 21092 }, { "epoch": 0.38556309064653516, "grad_norm": 6.205975896431654, "learning_rate": 7.035455439961345e-06, "loss": 17.2431, "step": 21093 }, { "epoch": 0.3855813698429817, "grad_norm": 10.777434619190368, "learning_rate": 7.035185063136203e-06, "loss": 17.7487, "step": 21094 }, { "epoch": 0.3855996490394282, "grad_norm": 7.723381474482577, "learning_rate": 7.034914679177913e-06, "loss": 17.8022, "step": 21095 }, { "epoch": 0.38561792823587476, "grad_norm": 6.358376912339185, "learning_rate": 7.034644288087424e-06, "loss": 17.643, "step": 21096 }, { "epoch": 0.3856362074323213, "grad_norm": 7.280527488684475, "learning_rate": 7.034373889865683e-06, "loss": 17.6818, "step": 21097 }, { "epoch": 0.3856544866287678, "grad_norm": 7.806657770907371, "learning_rate": 7.034103484513639e-06, "loss": 18.6375, "step": 21098 }, { "epoch": 0.3856727658252143, "grad_norm": 6.957910146235624, "learning_rate": 7.033833072032238e-06, "loss": 17.8476, "step": 21099 }, { "epoch": 0.38569104502166085, "grad_norm": 8.754076732392846, "learning_rate": 7.033562652422428e-06, "loss": 18.5968, "step": 21100 }, { "epoch": 0.3857093242181074, "grad_norm": 7.531241236820884, "learning_rate": 7.033292225685159e-06, "loss": 17.4104, "step": 21101 }, { "epoch": 0.3857276034145539, "grad_norm": 5.7801749191855345, "learning_rate": 7.0330217918213765e-06, "loss": 17.1987, "step": 21102 }, { "epoch": 0.3857458826110004, "grad_norm": 5.485070074298556, "learning_rate": 7.03275135083203e-06, "loss": 17.1562, "step": 21103 }, { "epoch": 0.38576416180744694, "grad_norm": 6.9288162598212315, "learning_rate": 7.032480902718064e-06, "loss": 17.8123, "step": 21104 }, { "epoch": 0.3857824410038935, "grad_norm": 6.468959941191855, "learning_rate": 7.03221044748043e-06, "loss": 17.5832, "step": 21105 }, { "epoch": 0.38580072020034, "grad_norm": 7.959014596218402, "learning_rate": 7.0319399851200754e-06, "loss": 17.4631, "step": 21106 }, { "epoch": 0.38581899939678654, "grad_norm": 6.044343220531988, "learning_rate": 7.0316695156379475e-06, "loss": 17.3057, "step": 21107 }, { "epoch": 0.385837278593233, "grad_norm": 8.2256849637106, "learning_rate": 7.031399039034994e-06, "loss": 18.4641, "step": 21108 }, { "epoch": 0.38585555778967956, "grad_norm": 5.699225958830676, "learning_rate": 7.031128555312161e-06, "loss": 17.1867, "step": 21109 }, { "epoch": 0.3858738369861261, "grad_norm": 6.672498573674858, "learning_rate": 7.030858064470402e-06, "loss": 17.3648, "step": 21110 }, { "epoch": 0.38589211618257263, "grad_norm": 4.735959575691783, "learning_rate": 7.03058756651066e-06, "loss": 16.7964, "step": 21111 }, { "epoch": 0.3859103953790191, "grad_norm": 7.371837255640524, "learning_rate": 7.030317061433884e-06, "loss": 17.9339, "step": 21112 }, { "epoch": 0.38592867457546565, "grad_norm": 6.87418183915726, "learning_rate": 7.0300465492410256e-06, "loss": 17.7446, "step": 21113 }, { "epoch": 0.3859469537719122, "grad_norm": 5.61363960267249, "learning_rate": 7.029776029933027e-06, "loss": 17.2526, "step": 21114 }, { "epoch": 0.3859652329683587, "grad_norm": 8.675659705115105, "learning_rate": 7.029505503510842e-06, "loss": 18.4315, "step": 21115 }, { "epoch": 0.38598351216480525, "grad_norm": 6.314659400689751, "learning_rate": 7.029234969975415e-06, "loss": 17.6072, "step": 21116 }, { "epoch": 0.38600179136125173, "grad_norm": 7.281348543329399, "learning_rate": 7.028964429327697e-06, "loss": 17.6171, "step": 21117 }, { "epoch": 0.38602007055769827, "grad_norm": 6.886107038104054, "learning_rate": 7.028693881568632e-06, "loss": 17.8502, "step": 21118 }, { "epoch": 0.3860383497541448, "grad_norm": 6.362316523667407, "learning_rate": 7.028423326699173e-06, "loss": 17.5793, "step": 21119 }, { "epoch": 0.38605662895059134, "grad_norm": 7.459611734937078, "learning_rate": 7.028152764720265e-06, "loss": 17.6337, "step": 21120 }, { "epoch": 0.3860749081470379, "grad_norm": 5.5907760913335265, "learning_rate": 7.027882195632861e-06, "loss": 17.2374, "step": 21121 }, { "epoch": 0.38609318734348436, "grad_norm": 5.973703927348647, "learning_rate": 7.027611619437902e-06, "loss": 17.1285, "step": 21122 }, { "epoch": 0.3861114665399309, "grad_norm": 4.924313880101816, "learning_rate": 7.02734103613634e-06, "loss": 16.8294, "step": 21123 }, { "epoch": 0.3861297457363774, "grad_norm": 7.181231802780088, "learning_rate": 7.027070445729127e-06, "loss": 17.681, "step": 21124 }, { "epoch": 0.38614802493282396, "grad_norm": 6.990090253560784, "learning_rate": 7.026799848217206e-06, "loss": 17.8761, "step": 21125 }, { "epoch": 0.3861663041292705, "grad_norm": 6.184005663463678, "learning_rate": 7.026529243601528e-06, "loss": 17.3393, "step": 21126 }, { "epoch": 0.386184583325717, "grad_norm": 6.267227339306514, "learning_rate": 7.02625863188304e-06, "loss": 17.6497, "step": 21127 }, { "epoch": 0.3862028625221635, "grad_norm": 6.75662883580309, "learning_rate": 7.025988013062691e-06, "loss": 17.5729, "step": 21128 }, { "epoch": 0.38622114171861005, "grad_norm": 9.296670103182908, "learning_rate": 7.025717387141431e-06, "loss": 18.15, "step": 21129 }, { "epoch": 0.3862394209150566, "grad_norm": 4.852905765168992, "learning_rate": 7.025446754120206e-06, "loss": 16.86, "step": 21130 }, { "epoch": 0.3862577001115031, "grad_norm": 5.890617171437006, "learning_rate": 7.0251761139999674e-06, "loss": 17.1843, "step": 21131 }, { "epoch": 0.3862759793079496, "grad_norm": 7.53836892579514, "learning_rate": 7.024905466781662e-06, "loss": 17.8045, "step": 21132 }, { "epoch": 0.38629425850439614, "grad_norm": 7.142241216227472, "learning_rate": 7.0246348124662375e-06, "loss": 17.5758, "step": 21133 }, { "epoch": 0.3863125377008427, "grad_norm": 5.6274541869633925, "learning_rate": 7.024364151054646e-06, "loss": 17.1649, "step": 21134 }, { "epoch": 0.3863308168972892, "grad_norm": 5.0354058954229926, "learning_rate": 7.024093482547831e-06, "loss": 16.9671, "step": 21135 }, { "epoch": 0.38634909609373574, "grad_norm": 6.676414643799981, "learning_rate": 7.023822806946745e-06, "loss": 17.4365, "step": 21136 }, { "epoch": 0.3863673752901822, "grad_norm": 6.334598866258384, "learning_rate": 7.023552124252335e-06, "loss": 17.5582, "step": 21137 }, { "epoch": 0.38638565448662876, "grad_norm": 5.145320711120825, "learning_rate": 7.023281434465553e-06, "loss": 16.9881, "step": 21138 }, { "epoch": 0.3864039336830753, "grad_norm": 8.21784938964477, "learning_rate": 7.0230107375873435e-06, "loss": 17.9658, "step": 21139 }, { "epoch": 0.38642221287952183, "grad_norm": 6.194250296436442, "learning_rate": 7.022740033618657e-06, "loss": 17.2713, "step": 21140 }, { "epoch": 0.38644049207596837, "grad_norm": 7.4350060971970615, "learning_rate": 7.0224693225604415e-06, "loss": 17.8246, "step": 21141 }, { "epoch": 0.38645877127241485, "grad_norm": 6.433020845599466, "learning_rate": 7.022198604413647e-06, "loss": 17.5145, "step": 21142 }, { "epoch": 0.3864770504688614, "grad_norm": 7.612530599796842, "learning_rate": 7.0219278791792225e-06, "loss": 17.9481, "step": 21143 }, { "epoch": 0.3864953296653079, "grad_norm": 6.7636431475299235, "learning_rate": 7.021657146858115e-06, "loss": 17.6575, "step": 21144 }, { "epoch": 0.38651360886175445, "grad_norm": 7.789390469904421, "learning_rate": 7.021386407451276e-06, "loss": 17.4399, "step": 21145 }, { "epoch": 0.38653188805820093, "grad_norm": 5.859613362592341, "learning_rate": 7.021115660959652e-06, "loss": 17.052, "step": 21146 }, { "epoch": 0.38655016725464747, "grad_norm": 7.707885355718643, "learning_rate": 7.020844907384193e-06, "loss": 17.7728, "step": 21147 }, { "epoch": 0.386568446451094, "grad_norm": 6.576837103363616, "learning_rate": 7.020574146725849e-06, "loss": 17.473, "step": 21148 }, { "epoch": 0.38658672564754054, "grad_norm": 6.165204627767476, "learning_rate": 7.020303378985568e-06, "loss": 17.5712, "step": 21149 }, { "epoch": 0.3866050048439871, "grad_norm": 6.642705530824343, "learning_rate": 7.020032604164297e-06, "loss": 17.3808, "step": 21150 }, { "epoch": 0.38662328404043356, "grad_norm": 4.789453489011865, "learning_rate": 7.019761822262988e-06, "loss": 16.6734, "step": 21151 }, { "epoch": 0.3866415632368801, "grad_norm": 7.416221651627426, "learning_rate": 7.019491033282591e-06, "loss": 17.8973, "step": 21152 }, { "epoch": 0.38665984243332663, "grad_norm": 6.346482425134587, "learning_rate": 7.019220237224051e-06, "loss": 17.6587, "step": 21153 }, { "epoch": 0.38667812162977316, "grad_norm": 5.579443119528855, "learning_rate": 7.01894943408832e-06, "loss": 16.9855, "step": 21154 }, { "epoch": 0.3866964008262197, "grad_norm": 6.875463484691944, "learning_rate": 7.018678623876346e-06, "loss": 17.9065, "step": 21155 }, { "epoch": 0.3867146800226662, "grad_norm": 8.57757477707898, "learning_rate": 7.0184078065890785e-06, "loss": 17.675, "step": 21156 }, { "epoch": 0.3867329592191127, "grad_norm": 8.57415771607685, "learning_rate": 7.018136982227467e-06, "loss": 18.3249, "step": 21157 }, { "epoch": 0.38675123841555925, "grad_norm": 8.106472151018053, "learning_rate": 7.017866150792461e-06, "loss": 18.364, "step": 21158 }, { "epoch": 0.3867695176120058, "grad_norm": 5.573386292208885, "learning_rate": 7.017595312285008e-06, "loss": 16.8987, "step": 21159 }, { "epoch": 0.3867877968084523, "grad_norm": 7.469690580880193, "learning_rate": 7.0173244667060606e-06, "loss": 18.009, "step": 21160 }, { "epoch": 0.3868060760048988, "grad_norm": 7.218461469271216, "learning_rate": 7.017053614056564e-06, "loss": 18.0056, "step": 21161 }, { "epoch": 0.38682435520134534, "grad_norm": 6.64697296731165, "learning_rate": 7.016782754337471e-06, "loss": 17.7236, "step": 21162 }, { "epoch": 0.3868426343977919, "grad_norm": 6.469455702339826, "learning_rate": 7.016511887549729e-06, "loss": 17.52, "step": 21163 }, { "epoch": 0.3868609135942384, "grad_norm": 5.620907433957624, "learning_rate": 7.016241013694287e-06, "loss": 17.2523, "step": 21164 }, { "epoch": 0.38687919279068494, "grad_norm": 5.6795585345674375, "learning_rate": 7.015970132772097e-06, "loss": 17.3294, "step": 21165 }, { "epoch": 0.3868974719871314, "grad_norm": 7.020711561287148, "learning_rate": 7.015699244784104e-06, "loss": 17.7057, "step": 21166 }, { "epoch": 0.38691575118357796, "grad_norm": 5.718392094795572, "learning_rate": 7.015428349731261e-06, "loss": 17.165, "step": 21167 }, { "epoch": 0.3869340303800245, "grad_norm": 6.738196681250109, "learning_rate": 7.015157447614518e-06, "loss": 17.3594, "step": 21168 }, { "epoch": 0.38695230957647103, "grad_norm": 6.514408171224374, "learning_rate": 7.014886538434822e-06, "loss": 17.7885, "step": 21169 }, { "epoch": 0.38697058877291757, "grad_norm": 6.240929938726908, "learning_rate": 7.014615622193124e-06, "loss": 17.3936, "step": 21170 }, { "epoch": 0.38698886796936405, "grad_norm": 6.488269955690865, "learning_rate": 7.0143446988903714e-06, "loss": 17.4412, "step": 21171 }, { "epoch": 0.3870071471658106, "grad_norm": 7.510448804413826, "learning_rate": 7.014073768527517e-06, "loss": 17.868, "step": 21172 }, { "epoch": 0.3870254263622571, "grad_norm": 6.568964597754895, "learning_rate": 7.013802831105508e-06, "loss": 17.8103, "step": 21173 }, { "epoch": 0.38704370555870365, "grad_norm": 6.347534864591465, "learning_rate": 7.0135318866252955e-06, "loss": 17.3994, "step": 21174 }, { "epoch": 0.3870619847551502, "grad_norm": 6.076323680890524, "learning_rate": 7.0132609350878285e-06, "loss": 17.266, "step": 21175 }, { "epoch": 0.38708026395159667, "grad_norm": 6.32945252994887, "learning_rate": 7.012989976494057e-06, "loss": 17.3789, "step": 21176 }, { "epoch": 0.3870985431480432, "grad_norm": 5.935308236977482, "learning_rate": 7.012719010844928e-06, "loss": 17.4507, "step": 21177 }, { "epoch": 0.38711682234448974, "grad_norm": 6.854667051747896, "learning_rate": 7.012448038141396e-06, "loss": 17.7286, "step": 21178 }, { "epoch": 0.3871351015409363, "grad_norm": 6.164314235251316, "learning_rate": 7.012177058384408e-06, "loss": 17.3948, "step": 21179 }, { "epoch": 0.38715338073738276, "grad_norm": 6.860808195260509, "learning_rate": 7.0119060715749146e-06, "loss": 17.8231, "step": 21180 }, { "epoch": 0.3871716599338293, "grad_norm": 5.204002291292977, "learning_rate": 7.011635077713863e-06, "loss": 17.0888, "step": 21181 }, { "epoch": 0.38718993913027583, "grad_norm": 5.552994605069955, "learning_rate": 7.0113640768022055e-06, "loss": 17.0881, "step": 21182 }, { "epoch": 0.38720821832672236, "grad_norm": 7.288976439343021, "learning_rate": 7.0110930688408915e-06, "loss": 17.6469, "step": 21183 }, { "epoch": 0.3872264975231689, "grad_norm": 7.121803750697378, "learning_rate": 7.010822053830872e-06, "loss": 17.7485, "step": 21184 }, { "epoch": 0.3872447767196154, "grad_norm": 7.171010355826255, "learning_rate": 7.010551031773094e-06, "loss": 18.1696, "step": 21185 }, { "epoch": 0.3872630559160619, "grad_norm": 6.215532642912796, "learning_rate": 7.01028000266851e-06, "loss": 17.5473, "step": 21186 }, { "epoch": 0.38728133511250845, "grad_norm": 6.009141313573754, "learning_rate": 7.010008966518069e-06, "loss": 17.534, "step": 21187 }, { "epoch": 0.387299614308955, "grad_norm": 7.382995024123719, "learning_rate": 7.009737923322722e-06, "loss": 17.8126, "step": 21188 }, { "epoch": 0.3873178935054015, "grad_norm": 7.008409613622167, "learning_rate": 7.009466873083416e-06, "loss": 17.8726, "step": 21189 }, { "epoch": 0.387336172701848, "grad_norm": 6.326699647289833, "learning_rate": 7.009195815801105e-06, "loss": 17.2901, "step": 21190 }, { "epoch": 0.38735445189829454, "grad_norm": 6.816441338042101, "learning_rate": 7.008924751476734e-06, "loss": 17.8958, "step": 21191 }, { "epoch": 0.3873727310947411, "grad_norm": 6.4113341488070965, "learning_rate": 7.0086536801112595e-06, "loss": 17.5528, "step": 21192 }, { "epoch": 0.3873910102911876, "grad_norm": 6.098204645303003, "learning_rate": 7.008382601705626e-06, "loss": 17.3523, "step": 21193 }, { "epoch": 0.38740928948763415, "grad_norm": 5.224449357427487, "learning_rate": 7.008111516260787e-06, "loss": 17.1505, "step": 21194 }, { "epoch": 0.3874275686840806, "grad_norm": 7.138159137774924, "learning_rate": 7.007840423777691e-06, "loss": 18.0406, "step": 21195 }, { "epoch": 0.38744584788052716, "grad_norm": 7.110189495810454, "learning_rate": 7.0075693242572885e-06, "loss": 17.7023, "step": 21196 }, { "epoch": 0.3874641270769737, "grad_norm": 8.82591569452853, "learning_rate": 7.007298217700529e-06, "loss": 18.3154, "step": 21197 }, { "epoch": 0.38748240627342023, "grad_norm": 9.124803495482377, "learning_rate": 7.0070271041083635e-06, "loss": 18.4117, "step": 21198 }, { "epoch": 0.38750068546986677, "grad_norm": 6.888477199115898, "learning_rate": 7.006755983481744e-06, "loss": 17.9521, "step": 21199 }, { "epoch": 0.38751896466631325, "grad_norm": 7.585323345702857, "learning_rate": 7.006484855821617e-06, "loss": 17.995, "step": 21200 }, { "epoch": 0.3875372438627598, "grad_norm": 5.733917607894738, "learning_rate": 7.0062137211289346e-06, "loss": 17.2265, "step": 21201 }, { "epoch": 0.3875555230592063, "grad_norm": 7.162694575830329, "learning_rate": 7.0059425794046485e-06, "loss": 17.1872, "step": 21202 }, { "epoch": 0.38757380225565286, "grad_norm": 6.972112316308997, "learning_rate": 7.005671430649708e-06, "loss": 17.7391, "step": 21203 }, { "epoch": 0.3875920814520994, "grad_norm": 5.907092817545116, "learning_rate": 7.005400274865062e-06, "loss": 17.129, "step": 21204 }, { "epoch": 0.38761036064854587, "grad_norm": 6.014288258255214, "learning_rate": 7.005129112051662e-06, "loss": 17.1242, "step": 21205 }, { "epoch": 0.3876286398449924, "grad_norm": 7.994252068301885, "learning_rate": 7.004857942210459e-06, "loss": 18.3423, "step": 21206 }, { "epoch": 0.38764691904143894, "grad_norm": 5.43324849083137, "learning_rate": 7.004586765342403e-06, "loss": 17.2776, "step": 21207 }, { "epoch": 0.3876651982378855, "grad_norm": 6.869744228197584, "learning_rate": 7.004315581448444e-06, "loss": 17.8097, "step": 21208 }, { "epoch": 0.387683477434332, "grad_norm": 8.093794571431726, "learning_rate": 7.004044390529532e-06, "loss": 18.0262, "step": 21209 }, { "epoch": 0.3877017566307785, "grad_norm": 6.808434764031609, "learning_rate": 7.00377319258662e-06, "loss": 18.0233, "step": 21210 }, { "epoch": 0.38772003582722503, "grad_norm": 6.4851077505687025, "learning_rate": 7.003501987620655e-06, "loss": 17.6019, "step": 21211 }, { "epoch": 0.38773831502367156, "grad_norm": 5.779791405467095, "learning_rate": 7.003230775632591e-06, "loss": 17.2765, "step": 21212 }, { "epoch": 0.3877565942201181, "grad_norm": 5.9871841761264095, "learning_rate": 7.002959556623376e-06, "loss": 17.2801, "step": 21213 }, { "epoch": 0.3877748734165646, "grad_norm": 8.093114902296954, "learning_rate": 7.002688330593963e-06, "loss": 18.6881, "step": 21214 }, { "epoch": 0.3877931526130111, "grad_norm": 9.376799662022735, "learning_rate": 7.0024170975453e-06, "loss": 19.07, "step": 21215 }, { "epoch": 0.38781143180945765, "grad_norm": 8.387256154251714, "learning_rate": 7.002145857478338e-06, "loss": 18.239, "step": 21216 }, { "epoch": 0.3878297110059042, "grad_norm": 7.62922656084555, "learning_rate": 7.00187461039403e-06, "loss": 18.0096, "step": 21217 }, { "epoch": 0.3878479902023507, "grad_norm": 5.985392864732103, "learning_rate": 7.001603356293325e-06, "loss": 17.3189, "step": 21218 }, { "epoch": 0.3878662693987972, "grad_norm": 7.650112208586723, "learning_rate": 7.001332095177173e-06, "loss": 18.0754, "step": 21219 }, { "epoch": 0.38788454859524374, "grad_norm": 6.035053943900861, "learning_rate": 7.001060827046527e-06, "loss": 17.51, "step": 21220 }, { "epoch": 0.3879028277916903, "grad_norm": 6.216438855753022, "learning_rate": 7.0007895519023364e-06, "loss": 17.2839, "step": 21221 }, { "epoch": 0.3879211069881368, "grad_norm": 5.130572402721616, "learning_rate": 7.000518269745551e-06, "loss": 17.2857, "step": 21222 }, { "epoch": 0.38793938618458335, "grad_norm": 5.96959733512535, "learning_rate": 7.000246980577121e-06, "loss": 17.3203, "step": 21223 }, { "epoch": 0.3879576653810298, "grad_norm": 6.889450543395383, "learning_rate": 6.999975684398002e-06, "loss": 17.8088, "step": 21224 }, { "epoch": 0.38797594457747636, "grad_norm": 6.029957558576468, "learning_rate": 6.9997043812091415e-06, "loss": 17.6056, "step": 21225 }, { "epoch": 0.3879942237739229, "grad_norm": 6.906724005829455, "learning_rate": 6.99943307101149e-06, "loss": 17.6793, "step": 21226 }, { "epoch": 0.38801250297036943, "grad_norm": 5.340520529071106, "learning_rate": 6.999161753805999e-06, "loss": 17.1273, "step": 21227 }, { "epoch": 0.38803078216681597, "grad_norm": 6.47837350521822, "learning_rate": 6.998890429593619e-06, "loss": 17.6201, "step": 21228 }, { "epoch": 0.38804906136326245, "grad_norm": 6.268382853033523, "learning_rate": 6.9986190983753015e-06, "loss": 17.4448, "step": 21229 }, { "epoch": 0.388067340559709, "grad_norm": 5.7812184180378745, "learning_rate": 6.9983477601519975e-06, "loss": 17.2134, "step": 21230 }, { "epoch": 0.3880856197561555, "grad_norm": 7.476410771269091, "learning_rate": 6.9980764149246594e-06, "loss": 17.9308, "step": 21231 }, { "epoch": 0.38810389895260206, "grad_norm": 7.042216423096148, "learning_rate": 6.997805062694235e-06, "loss": 17.8468, "step": 21232 }, { "epoch": 0.3881221781490486, "grad_norm": 7.155437551364444, "learning_rate": 6.997533703461679e-06, "loss": 17.9161, "step": 21233 }, { "epoch": 0.38814045734549507, "grad_norm": 7.6242148961471905, "learning_rate": 6.99726233722794e-06, "loss": 17.8064, "step": 21234 }, { "epoch": 0.3881587365419416, "grad_norm": 6.409742456396555, "learning_rate": 6.996990963993971e-06, "loss": 17.5401, "step": 21235 }, { "epoch": 0.38817701573838814, "grad_norm": 4.970076298313822, "learning_rate": 6.9967195837607184e-06, "loss": 16.9293, "step": 21236 }, { "epoch": 0.3881952949348347, "grad_norm": 6.927960706911375, "learning_rate": 6.99644819652914e-06, "loss": 17.711, "step": 21237 }, { "epoch": 0.3882135741312812, "grad_norm": 7.612483565007718, "learning_rate": 6.996176802300183e-06, "loss": 17.8934, "step": 21238 }, { "epoch": 0.3882318533277277, "grad_norm": 7.882187425633395, "learning_rate": 6.9959054010748e-06, "loss": 17.8482, "step": 21239 }, { "epoch": 0.38825013252417423, "grad_norm": 9.240745684312687, "learning_rate": 6.9956339928539406e-06, "loss": 18.8445, "step": 21240 }, { "epoch": 0.38826841172062077, "grad_norm": 7.712105683195305, "learning_rate": 6.995362577638558e-06, "loss": 17.8008, "step": 21241 }, { "epoch": 0.3882866909170673, "grad_norm": 10.050560138755184, "learning_rate": 6.995091155429603e-06, "loss": 18.0613, "step": 21242 }, { "epoch": 0.38830497011351384, "grad_norm": 6.018862137503063, "learning_rate": 6.994819726228026e-06, "loss": 17.3277, "step": 21243 }, { "epoch": 0.3883232493099603, "grad_norm": 6.199811183230868, "learning_rate": 6.994548290034779e-06, "loss": 17.444, "step": 21244 }, { "epoch": 0.38834152850640685, "grad_norm": 6.255313750780701, "learning_rate": 6.9942768468508134e-06, "loss": 17.5624, "step": 21245 }, { "epoch": 0.3883598077028534, "grad_norm": 5.6463831568429645, "learning_rate": 6.994005396677082e-06, "loss": 16.9848, "step": 21246 }, { "epoch": 0.3883780868992999, "grad_norm": 6.679686702760674, "learning_rate": 6.993733939514534e-06, "loss": 17.4929, "step": 21247 }, { "epoch": 0.3883963660957464, "grad_norm": 6.9273344663011285, "learning_rate": 6.993462475364121e-06, "loss": 17.7167, "step": 21248 }, { "epoch": 0.38841464529219294, "grad_norm": 6.811534633973879, "learning_rate": 6.993191004226795e-06, "loss": 17.6195, "step": 21249 }, { "epoch": 0.3884329244886395, "grad_norm": 6.014366310528886, "learning_rate": 6.992919526103507e-06, "loss": 17.5811, "step": 21250 }, { "epoch": 0.388451203685086, "grad_norm": 6.22316949754385, "learning_rate": 6.992648040995209e-06, "loss": 17.4077, "step": 21251 }, { "epoch": 0.38846948288153255, "grad_norm": 5.558124965916242, "learning_rate": 6.9923765489028535e-06, "loss": 17.3147, "step": 21252 }, { "epoch": 0.388487762077979, "grad_norm": 6.249392869860253, "learning_rate": 6.9921050498273915e-06, "loss": 17.476, "step": 21253 }, { "epoch": 0.38850604127442556, "grad_norm": 7.6245737784902055, "learning_rate": 6.9918335437697725e-06, "loss": 17.9288, "step": 21254 }, { "epoch": 0.3885243204708721, "grad_norm": 6.749996569070155, "learning_rate": 6.99156203073095e-06, "loss": 17.5503, "step": 21255 }, { "epoch": 0.38854259966731863, "grad_norm": 6.65555231658176, "learning_rate": 6.991290510711877e-06, "loss": 17.3065, "step": 21256 }, { "epoch": 0.38856087886376517, "grad_norm": 5.966298807431138, "learning_rate": 6.991018983713502e-06, "loss": 17.3712, "step": 21257 }, { "epoch": 0.38857915806021165, "grad_norm": 6.940957382615158, "learning_rate": 6.990747449736779e-06, "loss": 17.5398, "step": 21258 }, { "epoch": 0.3885974372566582, "grad_norm": 7.656777098521687, "learning_rate": 6.990475908782659e-06, "loss": 17.7731, "step": 21259 }, { "epoch": 0.3886157164531047, "grad_norm": 5.286748892777709, "learning_rate": 6.990204360852093e-06, "loss": 17.0508, "step": 21260 }, { "epoch": 0.38863399564955126, "grad_norm": 5.743004171762552, "learning_rate": 6.989932805946033e-06, "loss": 17.3825, "step": 21261 }, { "epoch": 0.3886522748459978, "grad_norm": 7.176283377532258, "learning_rate": 6.989661244065433e-06, "loss": 17.9225, "step": 21262 }, { "epoch": 0.38867055404244427, "grad_norm": 5.632309989099968, "learning_rate": 6.989389675211241e-06, "loss": 17.1852, "step": 21263 }, { "epoch": 0.3886888332388908, "grad_norm": 6.291288617650993, "learning_rate": 6.989118099384412e-06, "loss": 17.4721, "step": 21264 }, { "epoch": 0.38870711243533734, "grad_norm": 5.919406410920219, "learning_rate": 6.9888465165858974e-06, "loss": 17.304, "step": 21265 }, { "epoch": 0.3887253916317839, "grad_norm": 7.682794152487566, "learning_rate": 6.988574926816647e-06, "loss": 18.1165, "step": 21266 }, { "epoch": 0.3887436708282304, "grad_norm": 5.4366959645772654, "learning_rate": 6.988303330077615e-06, "loss": 17.1581, "step": 21267 }, { "epoch": 0.3887619500246769, "grad_norm": 7.521380821342717, "learning_rate": 6.988031726369751e-06, "loss": 17.8559, "step": 21268 }, { "epoch": 0.38878022922112343, "grad_norm": 5.808104123312755, "learning_rate": 6.987760115694009e-06, "loss": 17.1664, "step": 21269 }, { "epoch": 0.38879850841756997, "grad_norm": 5.958179556614933, "learning_rate": 6.987488498051341e-06, "loss": 17.3527, "step": 21270 }, { "epoch": 0.3888167876140165, "grad_norm": 7.133793472041586, "learning_rate": 6.987216873442697e-06, "loss": 17.6617, "step": 21271 }, { "epoch": 0.38883506681046304, "grad_norm": 8.031273334361943, "learning_rate": 6.986945241869032e-06, "loss": 18.4279, "step": 21272 }, { "epoch": 0.3888533460069095, "grad_norm": 8.503223922463164, "learning_rate": 6.986673603331295e-06, "loss": 17.3892, "step": 21273 }, { "epoch": 0.38887162520335605, "grad_norm": 5.965962258022024, "learning_rate": 6.9864019578304395e-06, "loss": 17.3279, "step": 21274 }, { "epoch": 0.3888899043998026, "grad_norm": 6.595645195422536, "learning_rate": 6.9861303053674175e-06, "loss": 17.6504, "step": 21275 }, { "epoch": 0.3889081835962491, "grad_norm": 7.262360912825414, "learning_rate": 6.985858645943182e-06, "loss": 17.438, "step": 21276 }, { "epoch": 0.38892646279269566, "grad_norm": 6.147591248388254, "learning_rate": 6.9855869795586826e-06, "loss": 17.2427, "step": 21277 }, { "epoch": 0.38894474198914214, "grad_norm": 5.633766962180324, "learning_rate": 6.9853153062148746e-06, "loss": 17.1198, "step": 21278 }, { "epoch": 0.3889630211855887, "grad_norm": 6.725508579829761, "learning_rate": 6.9850436259127096e-06, "loss": 17.775, "step": 21279 }, { "epoch": 0.3889813003820352, "grad_norm": 5.49034085767427, "learning_rate": 6.984771938653138e-06, "loss": 17.1014, "step": 21280 }, { "epoch": 0.38899957957848175, "grad_norm": 7.925168542674946, "learning_rate": 6.984500244437112e-06, "loss": 18.0358, "step": 21281 }, { "epoch": 0.3890178587749282, "grad_norm": 8.082796156974405, "learning_rate": 6.9842285432655845e-06, "loss": 18.4971, "step": 21282 }, { "epoch": 0.38903613797137476, "grad_norm": 7.126770318695678, "learning_rate": 6.9839568351395095e-06, "loss": 17.8173, "step": 21283 }, { "epoch": 0.3890544171678213, "grad_norm": 5.307411454487401, "learning_rate": 6.983685120059838e-06, "loss": 17.0083, "step": 21284 }, { "epoch": 0.38907269636426783, "grad_norm": 6.385024001756688, "learning_rate": 6.983413398027522e-06, "loss": 17.6907, "step": 21285 }, { "epoch": 0.38909097556071437, "grad_norm": 7.627134498652095, "learning_rate": 6.983141669043514e-06, "loss": 17.709, "step": 21286 }, { "epoch": 0.38910925475716085, "grad_norm": 7.2880895484751695, "learning_rate": 6.982869933108766e-06, "loss": 17.7053, "step": 21287 }, { "epoch": 0.3891275339536074, "grad_norm": 5.2131421028496865, "learning_rate": 6.982598190224233e-06, "loss": 17.0961, "step": 21288 }, { "epoch": 0.3891458131500539, "grad_norm": 5.955602767712263, "learning_rate": 6.982326440390863e-06, "loss": 17.3809, "step": 21289 }, { "epoch": 0.38916409234650046, "grad_norm": 6.879215689190057, "learning_rate": 6.982054683609613e-06, "loss": 17.786, "step": 21290 }, { "epoch": 0.389182371542947, "grad_norm": 6.564078461155945, "learning_rate": 6.98178291988143e-06, "loss": 17.4708, "step": 21291 }, { "epoch": 0.38920065073939347, "grad_norm": 7.3157794080072795, "learning_rate": 6.981511149207272e-06, "loss": 17.9549, "step": 21292 }, { "epoch": 0.38921892993584, "grad_norm": 7.74486846707566, "learning_rate": 6.981239371588091e-06, "loss": 18.1465, "step": 21293 }, { "epoch": 0.38923720913228654, "grad_norm": 6.72366683820151, "learning_rate": 6.980967587024836e-06, "loss": 17.9271, "step": 21294 }, { "epoch": 0.3892554883287331, "grad_norm": 5.436869165037432, "learning_rate": 6.980695795518462e-06, "loss": 17.1761, "step": 21295 }, { "epoch": 0.3892737675251796, "grad_norm": 6.483646507250865, "learning_rate": 6.980423997069921e-06, "loss": 17.6019, "step": 21296 }, { "epoch": 0.3892920467216261, "grad_norm": 5.810803759790037, "learning_rate": 6.980152191680165e-06, "loss": 17.2279, "step": 21297 }, { "epoch": 0.38931032591807263, "grad_norm": 8.216575660666162, "learning_rate": 6.979880379350148e-06, "loss": 18.4252, "step": 21298 }, { "epoch": 0.38932860511451917, "grad_norm": 6.958361073056182, "learning_rate": 6.979608560080822e-06, "loss": 17.8733, "step": 21299 }, { "epoch": 0.3893468843109657, "grad_norm": 6.095786815181945, "learning_rate": 6.979336733873139e-06, "loss": 17.3995, "step": 21300 }, { "epoch": 0.38936516350741224, "grad_norm": 6.236943758539881, "learning_rate": 6.9790649007280544e-06, "loss": 17.507, "step": 21301 }, { "epoch": 0.3893834427038587, "grad_norm": 7.616428036504893, "learning_rate": 6.978793060646517e-06, "loss": 17.8563, "step": 21302 }, { "epoch": 0.38940172190030525, "grad_norm": 7.625061378673789, "learning_rate": 6.9785212136294835e-06, "loss": 18.0646, "step": 21303 }, { "epoch": 0.3894200010967518, "grad_norm": 7.409375462993235, "learning_rate": 6.978249359677903e-06, "loss": 17.8779, "step": 21304 }, { "epoch": 0.3894382802931983, "grad_norm": 6.573970239933467, "learning_rate": 6.977977498792732e-06, "loss": 17.907, "step": 21305 }, { "epoch": 0.38945655948964486, "grad_norm": 8.660433654350012, "learning_rate": 6.97770563097492e-06, "loss": 18.4487, "step": 21306 }, { "epoch": 0.38947483868609134, "grad_norm": 8.063613108737854, "learning_rate": 6.977433756225422e-06, "loss": 18.5591, "step": 21307 }, { "epoch": 0.3894931178825379, "grad_norm": 11.406531230982122, "learning_rate": 6.9771618745451905e-06, "loss": 18.5781, "step": 21308 }, { "epoch": 0.3895113970789844, "grad_norm": 6.6462230803461875, "learning_rate": 6.976889985935178e-06, "loss": 17.8404, "step": 21309 }, { "epoch": 0.38952967627543095, "grad_norm": 7.66327701944068, "learning_rate": 6.976618090396339e-06, "loss": 18.0304, "step": 21310 }, { "epoch": 0.3895479554718775, "grad_norm": 6.727846858984811, "learning_rate": 6.976346187929623e-06, "loss": 17.8418, "step": 21311 }, { "epoch": 0.38956623466832396, "grad_norm": 8.182694833695164, "learning_rate": 6.976074278535986e-06, "loss": 18.2975, "step": 21312 }, { "epoch": 0.3895845138647705, "grad_norm": 6.926322785438558, "learning_rate": 6.97580236221638e-06, "loss": 17.9299, "step": 21313 }, { "epoch": 0.38960279306121703, "grad_norm": 8.550408556159267, "learning_rate": 6.975530438971759e-06, "loss": 18.1579, "step": 21314 }, { "epoch": 0.38962107225766357, "grad_norm": 6.021824361613114, "learning_rate": 6.975258508803073e-06, "loss": 17.5287, "step": 21315 }, { "epoch": 0.38963935145411005, "grad_norm": 6.385797779918753, "learning_rate": 6.974986571711279e-06, "loss": 17.7935, "step": 21316 }, { "epoch": 0.3896576306505566, "grad_norm": 5.637782107208922, "learning_rate": 6.9747146276973285e-06, "loss": 17.0898, "step": 21317 }, { "epoch": 0.3896759098470031, "grad_norm": 5.861024354538965, "learning_rate": 6.9744426767621745e-06, "loss": 17.3149, "step": 21318 }, { "epoch": 0.38969418904344966, "grad_norm": 7.489401160585184, "learning_rate": 6.97417071890677e-06, "loss": 18.1764, "step": 21319 }, { "epoch": 0.3897124682398962, "grad_norm": 6.662047481926622, "learning_rate": 6.973898754132068e-06, "loss": 17.5862, "step": 21320 }, { "epoch": 0.3897307474363427, "grad_norm": 6.698633197460809, "learning_rate": 6.9736267824390235e-06, "loss": 17.4519, "step": 21321 }, { "epoch": 0.3897490266327892, "grad_norm": 5.485635347009831, "learning_rate": 6.973354803828587e-06, "loss": 17.1254, "step": 21322 }, { "epoch": 0.38976730582923574, "grad_norm": 7.2791840642705035, "learning_rate": 6.973082818301713e-06, "loss": 17.8611, "step": 21323 }, { "epoch": 0.3897855850256823, "grad_norm": 6.988091609826114, "learning_rate": 6.972810825859357e-06, "loss": 17.9001, "step": 21324 }, { "epoch": 0.3898038642221288, "grad_norm": 7.631402227675237, "learning_rate": 6.972538826502468e-06, "loss": 17.6979, "step": 21325 }, { "epoch": 0.3898221434185753, "grad_norm": 6.259318480862302, "learning_rate": 6.972266820232002e-06, "loss": 17.4767, "step": 21326 }, { "epoch": 0.38984042261502183, "grad_norm": 6.440930814892935, "learning_rate": 6.971994807048913e-06, "loss": 17.3391, "step": 21327 }, { "epoch": 0.38985870181146837, "grad_norm": 7.124678437374745, "learning_rate": 6.971722786954153e-06, "loss": 17.4527, "step": 21328 }, { "epoch": 0.3898769810079149, "grad_norm": 6.218475416507136, "learning_rate": 6.971450759948675e-06, "loss": 17.5786, "step": 21329 }, { "epoch": 0.38989526020436144, "grad_norm": 6.110012833418725, "learning_rate": 6.971178726033434e-06, "loss": 17.4619, "step": 21330 }, { "epoch": 0.3899135394008079, "grad_norm": 5.964802041121985, "learning_rate": 6.970906685209382e-06, "loss": 17.4164, "step": 21331 }, { "epoch": 0.38993181859725445, "grad_norm": 8.95644489526695, "learning_rate": 6.9706346374774725e-06, "loss": 18.7085, "step": 21332 }, { "epoch": 0.389950097793701, "grad_norm": 6.611856409956911, "learning_rate": 6.970362582838661e-06, "loss": 17.3819, "step": 21333 }, { "epoch": 0.3899683769901475, "grad_norm": 8.3973359875057, "learning_rate": 6.9700905212938995e-06, "loss": 18.4749, "step": 21334 }, { "epoch": 0.38998665618659406, "grad_norm": 7.78691454636137, "learning_rate": 6.969818452844141e-06, "loss": 17.9705, "step": 21335 }, { "epoch": 0.39000493538304054, "grad_norm": 7.820378788277538, "learning_rate": 6.96954637749034e-06, "loss": 18.2859, "step": 21336 }, { "epoch": 0.3900232145794871, "grad_norm": 7.308608328921452, "learning_rate": 6.969274295233449e-06, "loss": 17.9314, "step": 21337 }, { "epoch": 0.3900414937759336, "grad_norm": 5.854718803924013, "learning_rate": 6.969002206074425e-06, "loss": 17.4876, "step": 21338 }, { "epoch": 0.39005977297238015, "grad_norm": 7.528831837381876, "learning_rate": 6.968730110014217e-06, "loss": 17.8921, "step": 21339 }, { "epoch": 0.3900780521688267, "grad_norm": 5.982696355296637, "learning_rate": 6.968458007053781e-06, "loss": 17.5032, "step": 21340 }, { "epoch": 0.39009633136527316, "grad_norm": 5.648254364233851, "learning_rate": 6.968185897194071e-06, "loss": 17.1825, "step": 21341 }, { "epoch": 0.3901146105617197, "grad_norm": 7.828135749879042, "learning_rate": 6.96791378043604e-06, "loss": 17.9979, "step": 21342 }, { "epoch": 0.39013288975816623, "grad_norm": 5.338550953651234, "learning_rate": 6.967641656780641e-06, "loss": 16.9122, "step": 21343 }, { "epoch": 0.39015116895461277, "grad_norm": 6.0180414000488165, "learning_rate": 6.9673695262288295e-06, "loss": 17.5922, "step": 21344 }, { "epoch": 0.3901694481510593, "grad_norm": 6.810479249087184, "learning_rate": 6.967097388781558e-06, "loss": 17.3728, "step": 21345 }, { "epoch": 0.3901877273475058, "grad_norm": 6.766345601358699, "learning_rate": 6.9668252444397825e-06, "loss": 17.5683, "step": 21346 }, { "epoch": 0.3902060065439523, "grad_norm": 5.059032446363334, "learning_rate": 6.966553093204455e-06, "loss": 17.015, "step": 21347 }, { "epoch": 0.39022428574039886, "grad_norm": 6.346645110894162, "learning_rate": 6.966280935076529e-06, "loss": 17.3645, "step": 21348 }, { "epoch": 0.3902425649368454, "grad_norm": 7.672018816807479, "learning_rate": 6.966008770056959e-06, "loss": 18.1503, "step": 21349 }, { "epoch": 0.3902608441332919, "grad_norm": 7.026915302430959, "learning_rate": 6.965736598146698e-06, "loss": 18.068, "step": 21350 }, { "epoch": 0.3902791233297384, "grad_norm": 6.5049983056414495, "learning_rate": 6.9654644193467e-06, "loss": 17.5476, "step": 21351 }, { "epoch": 0.39029740252618494, "grad_norm": 8.315180938519537, "learning_rate": 6.965192233657923e-06, "loss": 17.9425, "step": 21352 }, { "epoch": 0.3903156817226315, "grad_norm": 6.170959138669452, "learning_rate": 6.964920041081315e-06, "loss": 17.2836, "step": 21353 }, { "epoch": 0.390333960919078, "grad_norm": 6.832621635727959, "learning_rate": 6.964647841617834e-06, "loss": 18.0796, "step": 21354 }, { "epoch": 0.3903522401155245, "grad_norm": 6.560517745910997, "learning_rate": 6.964375635268432e-06, "loss": 17.8781, "step": 21355 }, { "epoch": 0.39037051931197103, "grad_norm": 5.484001868073417, "learning_rate": 6.964103422034065e-06, "loss": 17.0136, "step": 21356 }, { "epoch": 0.39038879850841757, "grad_norm": 5.99839690517561, "learning_rate": 6.963831201915685e-06, "loss": 17.2651, "step": 21357 }, { "epoch": 0.3904070777048641, "grad_norm": 6.872484539048409, "learning_rate": 6.963558974914248e-06, "loss": 17.5205, "step": 21358 }, { "epoch": 0.39042535690131064, "grad_norm": 5.506546562774649, "learning_rate": 6.963286741030706e-06, "loss": 16.9392, "step": 21359 }, { "epoch": 0.3904436360977571, "grad_norm": 6.476657809631774, "learning_rate": 6.963014500266015e-06, "loss": 17.3503, "step": 21360 }, { "epoch": 0.39046191529420365, "grad_norm": 5.417844657813247, "learning_rate": 6.962742252621128e-06, "loss": 17.1027, "step": 21361 }, { "epoch": 0.3904801944906502, "grad_norm": 6.559373381981851, "learning_rate": 6.962469998097001e-06, "loss": 17.842, "step": 21362 }, { "epoch": 0.3904984736870967, "grad_norm": 7.3015999342370455, "learning_rate": 6.962197736694585e-06, "loss": 17.8833, "step": 21363 }, { "epoch": 0.39051675288354326, "grad_norm": 5.2950704084144, "learning_rate": 6.961925468414838e-06, "loss": 17.1602, "step": 21364 }, { "epoch": 0.39053503207998974, "grad_norm": 6.701239897971625, "learning_rate": 6.9616531932587115e-06, "loss": 17.4443, "step": 21365 }, { "epoch": 0.3905533112764363, "grad_norm": 6.224332576366943, "learning_rate": 6.961380911227161e-06, "loss": 17.3525, "step": 21366 }, { "epoch": 0.3905715904728828, "grad_norm": 6.300876541429538, "learning_rate": 6.961108622321141e-06, "loss": 17.5501, "step": 21367 }, { "epoch": 0.39058986966932935, "grad_norm": 4.867193414597315, "learning_rate": 6.960836326541605e-06, "loss": 16.8688, "step": 21368 }, { "epoch": 0.3906081488657759, "grad_norm": 6.014989360490721, "learning_rate": 6.960564023889508e-06, "loss": 17.4641, "step": 21369 }, { "epoch": 0.39062642806222236, "grad_norm": 6.288902481815039, "learning_rate": 6.960291714365804e-06, "loss": 17.2468, "step": 21370 }, { "epoch": 0.3906447072586689, "grad_norm": 7.155918954696706, "learning_rate": 6.960019397971448e-06, "loss": 17.9502, "step": 21371 }, { "epoch": 0.39066298645511544, "grad_norm": 8.354754346094992, "learning_rate": 6.9597470747073936e-06, "loss": 18.3131, "step": 21372 }, { "epoch": 0.39068126565156197, "grad_norm": 5.953499741152448, "learning_rate": 6.959474744574596e-06, "loss": 16.9842, "step": 21373 }, { "epoch": 0.3906995448480085, "grad_norm": 6.550937126367873, "learning_rate": 6.95920240757401e-06, "loss": 17.5067, "step": 21374 }, { "epoch": 0.390717824044455, "grad_norm": 6.852875182419491, "learning_rate": 6.958930063706588e-06, "loss": 17.9154, "step": 21375 }, { "epoch": 0.3907361032409015, "grad_norm": 6.767881163629693, "learning_rate": 6.958657712973289e-06, "loss": 17.5089, "step": 21376 }, { "epoch": 0.39075438243734806, "grad_norm": 6.7261822161174045, "learning_rate": 6.958385355375062e-06, "loss": 17.4858, "step": 21377 }, { "epoch": 0.3907726616337946, "grad_norm": 5.7532822623251345, "learning_rate": 6.958112990912865e-06, "loss": 17.4299, "step": 21378 }, { "epoch": 0.39079094083024113, "grad_norm": 6.290265203103681, "learning_rate": 6.957840619587653e-06, "loss": 17.6446, "step": 21379 }, { "epoch": 0.3908092200266876, "grad_norm": 7.53805699223157, "learning_rate": 6.957568241400378e-06, "loss": 18.0118, "step": 21380 }, { "epoch": 0.39082749922313414, "grad_norm": 6.14553882754755, "learning_rate": 6.957295856351997e-06, "loss": 17.5025, "step": 21381 }, { "epoch": 0.3908457784195807, "grad_norm": 6.223444201479475, "learning_rate": 6.957023464443462e-06, "loss": 17.4475, "step": 21382 }, { "epoch": 0.3908640576160272, "grad_norm": 6.902839466662411, "learning_rate": 6.956751065675732e-06, "loss": 17.7503, "step": 21383 }, { "epoch": 0.3908823368124737, "grad_norm": 6.001556486376833, "learning_rate": 6.956478660049759e-06, "loss": 17.3742, "step": 21384 }, { "epoch": 0.39090061600892023, "grad_norm": 7.430794110392538, "learning_rate": 6.956206247566497e-06, "loss": 18.3194, "step": 21385 }, { "epoch": 0.39091889520536677, "grad_norm": 6.880564037341758, "learning_rate": 6.955933828226903e-06, "loss": 17.5664, "step": 21386 }, { "epoch": 0.3909371744018133, "grad_norm": 6.151128109271224, "learning_rate": 6.95566140203193e-06, "loss": 17.5719, "step": 21387 }, { "epoch": 0.39095545359825984, "grad_norm": 6.9677245026585215, "learning_rate": 6.955388968982533e-06, "loss": 17.7612, "step": 21388 }, { "epoch": 0.3909737327947063, "grad_norm": 6.084702929661392, "learning_rate": 6.955116529079668e-06, "loss": 17.3963, "step": 21389 }, { "epoch": 0.39099201199115285, "grad_norm": 5.680522535403873, "learning_rate": 6.9548440823242894e-06, "loss": 17.2712, "step": 21390 }, { "epoch": 0.3910102911875994, "grad_norm": 6.214114351366993, "learning_rate": 6.954571628717352e-06, "loss": 17.3423, "step": 21391 }, { "epoch": 0.3910285703840459, "grad_norm": 6.662398299688071, "learning_rate": 6.95429916825981e-06, "loss": 17.4247, "step": 21392 }, { "epoch": 0.39104684958049246, "grad_norm": 8.062051943231458, "learning_rate": 6.9540267009526195e-06, "loss": 18.1852, "step": 21393 }, { "epoch": 0.39106512877693894, "grad_norm": 6.170123012231771, "learning_rate": 6.953754226796735e-06, "loss": 17.4669, "step": 21394 }, { "epoch": 0.3910834079733855, "grad_norm": 6.779457287130032, "learning_rate": 6.9534817457931106e-06, "loss": 17.3266, "step": 21395 }, { "epoch": 0.391101687169832, "grad_norm": 6.6416770898103845, "learning_rate": 6.953209257942703e-06, "loss": 17.7326, "step": 21396 }, { "epoch": 0.39111996636627855, "grad_norm": 5.499987273354564, "learning_rate": 6.9529367632464675e-06, "loss": 17.1806, "step": 21397 }, { "epoch": 0.3911382455627251, "grad_norm": 6.564647982097723, "learning_rate": 6.952664261705357e-06, "loss": 17.5584, "step": 21398 }, { "epoch": 0.39115652475917156, "grad_norm": 4.950783792128595, "learning_rate": 6.9523917533203264e-06, "loss": 16.7491, "step": 21399 }, { "epoch": 0.3911748039556181, "grad_norm": 5.412107465896945, "learning_rate": 6.952119238092334e-06, "loss": 17.1481, "step": 21400 }, { "epoch": 0.39119308315206464, "grad_norm": 6.919772189678604, "learning_rate": 6.951846716022333e-06, "loss": 17.8896, "step": 21401 }, { "epoch": 0.39121136234851117, "grad_norm": 6.4788537182646175, "learning_rate": 6.9515741871112765e-06, "loss": 17.4922, "step": 21402 }, { "epoch": 0.3912296415449577, "grad_norm": 6.40365381631644, "learning_rate": 6.951301651360125e-06, "loss": 17.3484, "step": 21403 }, { "epoch": 0.3912479207414042, "grad_norm": 10.072362100586338, "learning_rate": 6.951029108769828e-06, "loss": 17.8765, "step": 21404 }, { "epoch": 0.3912661999378507, "grad_norm": 7.4249881377671265, "learning_rate": 6.950756559341344e-06, "loss": 17.793, "step": 21405 }, { "epoch": 0.39128447913429726, "grad_norm": 5.574785088089333, "learning_rate": 6.950484003075627e-06, "loss": 17.0611, "step": 21406 }, { "epoch": 0.3913027583307438, "grad_norm": 8.11320467579111, "learning_rate": 6.950211439973635e-06, "loss": 17.8801, "step": 21407 }, { "epoch": 0.39132103752719033, "grad_norm": 5.957545539376082, "learning_rate": 6.949938870036319e-06, "loss": 17.5233, "step": 21408 }, { "epoch": 0.3913393167236368, "grad_norm": 8.042419256128051, "learning_rate": 6.949666293264636e-06, "loss": 18.249, "step": 21409 }, { "epoch": 0.39135759592008335, "grad_norm": 6.69497399498759, "learning_rate": 6.949393709659545e-06, "loss": 17.7397, "step": 21410 }, { "epoch": 0.3913758751165299, "grad_norm": 5.444913876067578, "learning_rate": 6.949121119221996e-06, "loss": 17.1343, "step": 21411 }, { "epoch": 0.3913941543129764, "grad_norm": 6.117132649027419, "learning_rate": 6.948848521952946e-06, "loss": 17.2501, "step": 21412 }, { "epoch": 0.39141243350942295, "grad_norm": 7.002488487919857, "learning_rate": 6.948575917853353e-06, "loss": 17.3118, "step": 21413 }, { "epoch": 0.39143071270586943, "grad_norm": 7.05743563768558, "learning_rate": 6.948303306924169e-06, "loss": 17.9495, "step": 21414 }, { "epoch": 0.39144899190231597, "grad_norm": 6.8819713184902245, "learning_rate": 6.9480306891663506e-06, "loss": 17.7145, "step": 21415 }, { "epoch": 0.3914672710987625, "grad_norm": 7.144076480747118, "learning_rate": 6.947758064580854e-06, "loss": 17.8562, "step": 21416 }, { "epoch": 0.39148555029520904, "grad_norm": 5.910437964592411, "learning_rate": 6.947485433168637e-06, "loss": 17.4718, "step": 21417 }, { "epoch": 0.3915038294916555, "grad_norm": 7.346846741473479, "learning_rate": 6.947212794930649e-06, "loss": 17.8076, "step": 21418 }, { "epoch": 0.39152210868810206, "grad_norm": 5.877024921883323, "learning_rate": 6.94694014986785e-06, "loss": 17.3449, "step": 21419 }, { "epoch": 0.3915403878845486, "grad_norm": 7.500077927696738, "learning_rate": 6.946667497981195e-06, "loss": 17.7087, "step": 21420 }, { "epoch": 0.3915586670809951, "grad_norm": 6.33079279859324, "learning_rate": 6.946394839271641e-06, "loss": 17.1952, "step": 21421 }, { "epoch": 0.39157694627744166, "grad_norm": 7.604093867593265, "learning_rate": 6.946122173740139e-06, "loss": 18.1463, "step": 21422 }, { "epoch": 0.39159522547388814, "grad_norm": 6.961778367265984, "learning_rate": 6.945849501387649e-06, "loss": 17.9901, "step": 21423 }, { "epoch": 0.3916135046703347, "grad_norm": 8.235848893700775, "learning_rate": 6.945576822215127e-06, "loss": 18.8584, "step": 21424 }, { "epoch": 0.3916317838667812, "grad_norm": 7.0501785748067345, "learning_rate": 6.945304136223525e-06, "loss": 17.8166, "step": 21425 }, { "epoch": 0.39165006306322775, "grad_norm": 7.336789474080342, "learning_rate": 6.945031443413801e-06, "loss": 18.0062, "step": 21426 }, { "epoch": 0.3916683422596743, "grad_norm": 8.57694652541582, "learning_rate": 6.944758743786912e-06, "loss": 18.2202, "step": 21427 }, { "epoch": 0.39168662145612076, "grad_norm": 6.263094385257538, "learning_rate": 6.944486037343812e-06, "loss": 17.2668, "step": 21428 }, { "epoch": 0.3917049006525673, "grad_norm": 5.130306232587203, "learning_rate": 6.944213324085456e-06, "loss": 16.9621, "step": 21429 }, { "epoch": 0.39172317984901384, "grad_norm": 6.770502139624869, "learning_rate": 6.943940604012801e-06, "loss": 17.6095, "step": 21430 }, { "epoch": 0.39174145904546037, "grad_norm": 6.802158937988266, "learning_rate": 6.943667877126803e-06, "loss": 17.7199, "step": 21431 }, { "epoch": 0.3917597382419069, "grad_norm": 9.665343205258099, "learning_rate": 6.943395143428418e-06, "loss": 17.8363, "step": 21432 }, { "epoch": 0.3917780174383534, "grad_norm": 4.94746228459059, "learning_rate": 6.943122402918603e-06, "loss": 16.8839, "step": 21433 }, { "epoch": 0.3917962966347999, "grad_norm": 7.560672697460262, "learning_rate": 6.942849655598312e-06, "loss": 17.9669, "step": 21434 }, { "epoch": 0.39181457583124646, "grad_norm": 5.195320813019755, "learning_rate": 6.942576901468501e-06, "loss": 17.116, "step": 21435 }, { "epoch": 0.391832855027693, "grad_norm": 5.082459973250335, "learning_rate": 6.942304140530125e-06, "loss": 16.927, "step": 21436 }, { "epoch": 0.39185113422413953, "grad_norm": 5.687533246753722, "learning_rate": 6.942031372784143e-06, "loss": 17.1209, "step": 21437 }, { "epoch": 0.391869413420586, "grad_norm": 6.92106101088448, "learning_rate": 6.94175859823151e-06, "loss": 17.5381, "step": 21438 }, { "epoch": 0.39188769261703255, "grad_norm": 7.247306842169459, "learning_rate": 6.9414858168731814e-06, "loss": 17.9658, "step": 21439 }, { "epoch": 0.3919059718134791, "grad_norm": 6.322222866411631, "learning_rate": 6.941213028710113e-06, "loss": 17.4772, "step": 21440 }, { "epoch": 0.3919242510099256, "grad_norm": 6.506007597978849, "learning_rate": 6.940940233743262e-06, "loss": 17.5034, "step": 21441 }, { "epoch": 0.39194253020637215, "grad_norm": 6.427767140365889, "learning_rate": 6.9406674319735835e-06, "loss": 17.938, "step": 21442 }, { "epoch": 0.39196080940281863, "grad_norm": 8.099055649396236, "learning_rate": 6.940394623402033e-06, "loss": 17.9091, "step": 21443 }, { "epoch": 0.39197908859926517, "grad_norm": 5.871450044502728, "learning_rate": 6.940121808029569e-06, "loss": 17.2447, "step": 21444 }, { "epoch": 0.3919973677957117, "grad_norm": 6.802340027753673, "learning_rate": 6.9398489858571475e-06, "loss": 17.7163, "step": 21445 }, { "epoch": 0.39201564699215824, "grad_norm": 6.690952033889844, "learning_rate": 6.939576156885722e-06, "loss": 17.4323, "step": 21446 }, { "epoch": 0.3920339261886048, "grad_norm": 6.167617635270627, "learning_rate": 6.9393033211162505e-06, "loss": 17.3473, "step": 21447 }, { "epoch": 0.39205220538505126, "grad_norm": 5.846650263525677, "learning_rate": 6.939030478549691e-06, "loss": 17.3274, "step": 21448 }, { "epoch": 0.3920704845814978, "grad_norm": 5.950837418407083, "learning_rate": 6.938757629186996e-06, "loss": 17.0843, "step": 21449 }, { "epoch": 0.3920887637779443, "grad_norm": 5.836430906477098, "learning_rate": 6.938484773029123e-06, "loss": 17.4089, "step": 21450 }, { "epoch": 0.39210704297439086, "grad_norm": 6.608261446021528, "learning_rate": 6.938211910077031e-06, "loss": 17.6756, "step": 21451 }, { "epoch": 0.39212532217083734, "grad_norm": 6.531429575224675, "learning_rate": 6.937939040331674e-06, "loss": 17.2479, "step": 21452 }, { "epoch": 0.3921436013672839, "grad_norm": 6.041658607655284, "learning_rate": 6.937666163794008e-06, "loss": 17.2461, "step": 21453 }, { "epoch": 0.3921618805637304, "grad_norm": 9.557082172843252, "learning_rate": 6.9373932804649915e-06, "loss": 18.5878, "step": 21454 }, { "epoch": 0.39218015976017695, "grad_norm": 6.205925750888624, "learning_rate": 6.937120390345579e-06, "loss": 17.3433, "step": 21455 }, { "epoch": 0.3921984389566235, "grad_norm": 5.817668641710071, "learning_rate": 6.936847493436727e-06, "loss": 17.1638, "step": 21456 }, { "epoch": 0.39221671815306997, "grad_norm": 5.751541819348251, "learning_rate": 6.9365745897393935e-06, "loss": 17.2232, "step": 21457 }, { "epoch": 0.3922349973495165, "grad_norm": 5.882162509912774, "learning_rate": 6.936301679254533e-06, "loss": 16.9619, "step": 21458 }, { "epoch": 0.39225327654596304, "grad_norm": 6.3118147297515215, "learning_rate": 6.9360287619831035e-06, "loss": 17.4475, "step": 21459 }, { "epoch": 0.3922715557424096, "grad_norm": 5.410386490362895, "learning_rate": 6.9357558379260615e-06, "loss": 16.971, "step": 21460 }, { "epoch": 0.3922898349388561, "grad_norm": 5.477369075142672, "learning_rate": 6.9354829070843635e-06, "loss": 17.1283, "step": 21461 }, { "epoch": 0.3923081141353026, "grad_norm": 6.588263923847018, "learning_rate": 6.935209969458967e-06, "loss": 17.66, "step": 21462 }, { "epoch": 0.3923263933317491, "grad_norm": 6.770909018164611, "learning_rate": 6.934937025050826e-06, "loss": 17.7483, "step": 21463 }, { "epoch": 0.39234467252819566, "grad_norm": 6.5732233956474415, "learning_rate": 6.9346640738608975e-06, "loss": 17.818, "step": 21464 }, { "epoch": 0.3923629517246422, "grad_norm": 6.378638997720948, "learning_rate": 6.934391115890142e-06, "loss": 17.5235, "step": 21465 }, { "epoch": 0.39238123092108873, "grad_norm": 5.609027096527029, "learning_rate": 6.934118151139512e-06, "loss": 17.1316, "step": 21466 }, { "epoch": 0.3923995101175352, "grad_norm": 8.59040276509835, "learning_rate": 6.933845179609966e-06, "loss": 18.5035, "step": 21467 }, { "epoch": 0.39241778931398175, "grad_norm": 6.018861877900512, "learning_rate": 6.933572201302459e-06, "loss": 17.3577, "step": 21468 }, { "epoch": 0.3924360685104283, "grad_norm": 7.895633672450876, "learning_rate": 6.933299216217952e-06, "loss": 18.2459, "step": 21469 }, { "epoch": 0.3924543477068748, "grad_norm": 7.084441441369362, "learning_rate": 6.933026224357397e-06, "loss": 17.7863, "step": 21470 }, { "epoch": 0.39247262690332135, "grad_norm": 5.888197528605958, "learning_rate": 6.932753225721753e-06, "loss": 17.2933, "step": 21471 }, { "epoch": 0.39249090609976783, "grad_norm": 5.328045823701662, "learning_rate": 6.9324802203119766e-06, "loss": 17.3001, "step": 21472 }, { "epoch": 0.39250918529621437, "grad_norm": 5.873985335509079, "learning_rate": 6.9322072081290245e-06, "loss": 17.1989, "step": 21473 }, { "epoch": 0.3925274644926609, "grad_norm": 6.382507901772117, "learning_rate": 6.931934189173854e-06, "loss": 17.2451, "step": 21474 }, { "epoch": 0.39254574368910744, "grad_norm": 6.416150137299243, "learning_rate": 6.931661163447423e-06, "loss": 17.5899, "step": 21475 }, { "epoch": 0.392564022885554, "grad_norm": 5.733098397335905, "learning_rate": 6.931388130950688e-06, "loss": 17.3838, "step": 21476 }, { "epoch": 0.39258230208200046, "grad_norm": 5.390450152159793, "learning_rate": 6.931115091684603e-06, "loss": 17.1822, "step": 21477 }, { "epoch": 0.392600581278447, "grad_norm": 7.565431469379208, "learning_rate": 6.930842045650127e-06, "loss": 17.9168, "step": 21478 }, { "epoch": 0.3926188604748935, "grad_norm": 6.791175985526804, "learning_rate": 6.930568992848219e-06, "loss": 17.561, "step": 21479 }, { "epoch": 0.39263713967134006, "grad_norm": 6.845110080529336, "learning_rate": 6.930295933279833e-06, "loss": 17.6659, "step": 21480 }, { "epoch": 0.3926554188677866, "grad_norm": 5.9062726952214835, "learning_rate": 6.930022866945928e-06, "loss": 17.3578, "step": 21481 }, { "epoch": 0.3926736980642331, "grad_norm": 9.224183488796932, "learning_rate": 6.929749793847459e-06, "loss": 17.6501, "step": 21482 }, { "epoch": 0.3926919772606796, "grad_norm": 6.297355400920313, "learning_rate": 6.929476713985386e-06, "loss": 17.4091, "step": 21483 }, { "epoch": 0.39271025645712615, "grad_norm": 6.22445551869341, "learning_rate": 6.9292036273606635e-06, "loss": 17.293, "step": 21484 }, { "epoch": 0.3927285356535727, "grad_norm": 6.79711374166879, "learning_rate": 6.92893053397425e-06, "loss": 17.6671, "step": 21485 }, { "epoch": 0.39274681485001917, "grad_norm": 7.058928683656009, "learning_rate": 6.928657433827102e-06, "loss": 17.8986, "step": 21486 }, { "epoch": 0.3927650940464657, "grad_norm": 5.999522263824738, "learning_rate": 6.928384326920178e-06, "loss": 17.5663, "step": 21487 }, { "epoch": 0.39278337324291224, "grad_norm": 5.60797011067639, "learning_rate": 6.928111213254434e-06, "loss": 17.1847, "step": 21488 }, { "epoch": 0.3928016524393588, "grad_norm": 6.880346185719323, "learning_rate": 6.927838092830826e-06, "loss": 17.7063, "step": 21489 }, { "epoch": 0.3928199316358053, "grad_norm": 6.67807673595115, "learning_rate": 6.927564965650315e-06, "loss": 17.7538, "step": 21490 }, { "epoch": 0.3928382108322518, "grad_norm": 6.885612102038492, "learning_rate": 6.927291831713855e-06, "loss": 17.6307, "step": 21491 }, { "epoch": 0.3928564900286983, "grad_norm": 5.380259452494266, "learning_rate": 6.927018691022403e-06, "loss": 17.0805, "step": 21492 }, { "epoch": 0.39287476922514486, "grad_norm": 5.43083297474748, "learning_rate": 6.926745543576921e-06, "loss": 17.1407, "step": 21493 }, { "epoch": 0.3928930484215914, "grad_norm": 7.756199643110793, "learning_rate": 6.926472389378361e-06, "loss": 17.8113, "step": 21494 }, { "epoch": 0.39291132761803793, "grad_norm": 10.660610872866012, "learning_rate": 6.926199228427681e-06, "loss": 17.8452, "step": 21495 }, { "epoch": 0.3929296068144844, "grad_norm": 8.26578635522386, "learning_rate": 6.925926060725843e-06, "loss": 18.2691, "step": 21496 }, { "epoch": 0.39294788601093095, "grad_norm": 6.998510645103447, "learning_rate": 6.925652886273799e-06, "loss": 17.9226, "step": 21497 }, { "epoch": 0.3929661652073775, "grad_norm": 6.001831030454143, "learning_rate": 6.9253797050725104e-06, "loss": 17.4689, "step": 21498 }, { "epoch": 0.392984444403824, "grad_norm": 7.423516301738987, "learning_rate": 6.925106517122932e-06, "loss": 18.0992, "step": 21499 }, { "epoch": 0.39300272360027055, "grad_norm": 6.370552452597878, "learning_rate": 6.9248333224260226e-06, "loss": 17.4909, "step": 21500 }, { "epoch": 0.39302100279671703, "grad_norm": 5.862008907473447, "learning_rate": 6.924560120982739e-06, "loss": 17.4139, "step": 21501 }, { "epoch": 0.39303928199316357, "grad_norm": 5.935590540687427, "learning_rate": 6.924286912794039e-06, "loss": 17.1046, "step": 21502 }, { "epoch": 0.3930575611896101, "grad_norm": 6.926847413856296, "learning_rate": 6.924013697860882e-06, "loss": 17.7685, "step": 21503 }, { "epoch": 0.39307584038605664, "grad_norm": 5.970307707596561, "learning_rate": 6.923740476184222e-06, "loss": 17.6556, "step": 21504 }, { "epoch": 0.3930941195825032, "grad_norm": 6.713674199500309, "learning_rate": 6.923467247765019e-06, "loss": 17.6955, "step": 21505 }, { "epoch": 0.39311239877894966, "grad_norm": 5.577756592376602, "learning_rate": 6.923194012604231e-06, "loss": 17.3995, "step": 21506 }, { "epoch": 0.3931306779753962, "grad_norm": 5.9016469407663825, "learning_rate": 6.9229207707028146e-06, "loss": 17.5544, "step": 21507 }, { "epoch": 0.39314895717184273, "grad_norm": 6.434933392640182, "learning_rate": 6.922647522061727e-06, "loss": 17.6404, "step": 21508 }, { "epoch": 0.39316723636828926, "grad_norm": 6.4413342933589925, "learning_rate": 6.922374266681927e-06, "loss": 17.9014, "step": 21509 }, { "epoch": 0.3931855155647358, "grad_norm": 6.065567085535461, "learning_rate": 6.922101004564373e-06, "loss": 17.1599, "step": 21510 }, { "epoch": 0.3932037947611823, "grad_norm": 7.44199599360796, "learning_rate": 6.92182773571002e-06, "loss": 17.7583, "step": 21511 }, { "epoch": 0.3932220739576288, "grad_norm": 7.304037294401231, "learning_rate": 6.92155446011983e-06, "loss": 17.6521, "step": 21512 }, { "epoch": 0.39324035315407535, "grad_norm": 6.205163333011176, "learning_rate": 6.9212811777947565e-06, "loss": 17.4399, "step": 21513 }, { "epoch": 0.3932586323505219, "grad_norm": 6.609599701956432, "learning_rate": 6.92100788873576e-06, "loss": 17.4695, "step": 21514 }, { "epoch": 0.3932769115469684, "grad_norm": 6.584113271674233, "learning_rate": 6.920734592943796e-06, "loss": 17.5762, "step": 21515 }, { "epoch": 0.3932951907434149, "grad_norm": 6.800658262301761, "learning_rate": 6.920461290419825e-06, "loss": 17.6977, "step": 21516 }, { "epoch": 0.39331346993986144, "grad_norm": 6.457193982312407, "learning_rate": 6.920187981164804e-06, "loss": 17.6557, "step": 21517 }, { "epoch": 0.393331749136308, "grad_norm": 7.853754503596599, "learning_rate": 6.919914665179691e-06, "loss": 18.1926, "step": 21518 }, { "epoch": 0.3933500283327545, "grad_norm": 6.6955447511823225, "learning_rate": 6.919641342465444e-06, "loss": 17.4948, "step": 21519 }, { "epoch": 0.393368307529201, "grad_norm": 5.966810488350211, "learning_rate": 6.91936801302302e-06, "loss": 17.3947, "step": 21520 }, { "epoch": 0.3933865867256475, "grad_norm": 6.077602715754855, "learning_rate": 6.919094676853378e-06, "loss": 17.0745, "step": 21521 }, { "epoch": 0.39340486592209406, "grad_norm": 5.584685044400404, "learning_rate": 6.918821333957475e-06, "loss": 17.0942, "step": 21522 }, { "epoch": 0.3934231451185406, "grad_norm": 7.001524006806588, "learning_rate": 6.91854798433627e-06, "loss": 17.9249, "step": 21523 }, { "epoch": 0.39344142431498713, "grad_norm": 6.599745661110747, "learning_rate": 6.918274627990722e-06, "loss": 17.7185, "step": 21524 }, { "epoch": 0.3934597035114336, "grad_norm": 8.165704272755427, "learning_rate": 6.918001264921786e-06, "loss": 18.4602, "step": 21525 }, { "epoch": 0.39347798270788015, "grad_norm": 6.1992574624140095, "learning_rate": 6.917727895130423e-06, "loss": 17.4023, "step": 21526 }, { "epoch": 0.3934962619043267, "grad_norm": 7.073284774296896, "learning_rate": 6.917454518617589e-06, "loss": 17.572, "step": 21527 }, { "epoch": 0.3935145411007732, "grad_norm": 7.292081190745441, "learning_rate": 6.917181135384246e-06, "loss": 17.8471, "step": 21528 }, { "epoch": 0.39353282029721975, "grad_norm": 7.072267248965862, "learning_rate": 6.9169077454313475e-06, "loss": 17.8776, "step": 21529 }, { "epoch": 0.39355109949366623, "grad_norm": 5.322622559874622, "learning_rate": 6.916634348759853e-06, "loss": 17.1781, "step": 21530 }, { "epoch": 0.39356937869011277, "grad_norm": 7.077310316001486, "learning_rate": 6.916360945370722e-06, "loss": 17.94, "step": 21531 }, { "epoch": 0.3935876578865593, "grad_norm": 6.440467751651259, "learning_rate": 6.916087535264913e-06, "loss": 17.45, "step": 21532 }, { "epoch": 0.39360593708300584, "grad_norm": 7.40705158499139, "learning_rate": 6.915814118443383e-06, "loss": 17.6429, "step": 21533 }, { "epoch": 0.3936242162794524, "grad_norm": 5.276780521040269, "learning_rate": 6.915540694907092e-06, "loss": 16.977, "step": 21534 }, { "epoch": 0.39364249547589886, "grad_norm": 7.254487865106271, "learning_rate": 6.9152672646569955e-06, "loss": 17.8337, "step": 21535 }, { "epoch": 0.3936607746723454, "grad_norm": 5.914857845336416, "learning_rate": 6.914993827694053e-06, "loss": 17.2768, "step": 21536 }, { "epoch": 0.39367905386879193, "grad_norm": 5.750367364362483, "learning_rate": 6.9147203840192246e-06, "loss": 17.3195, "step": 21537 }, { "epoch": 0.39369733306523846, "grad_norm": 6.5440410320419105, "learning_rate": 6.914446933633467e-06, "loss": 17.4559, "step": 21538 }, { "epoch": 0.393715612261685, "grad_norm": 5.75955858533619, "learning_rate": 6.914173476537739e-06, "loss": 17.282, "step": 21539 }, { "epoch": 0.3937338914581315, "grad_norm": 7.3803077544723585, "learning_rate": 6.913900012732999e-06, "loss": 17.4063, "step": 21540 }, { "epoch": 0.393752170654578, "grad_norm": 6.739488790118494, "learning_rate": 6.913626542220205e-06, "loss": 17.7722, "step": 21541 }, { "epoch": 0.39377044985102455, "grad_norm": 5.871982181584669, "learning_rate": 6.913353065000317e-06, "loss": 16.9883, "step": 21542 }, { "epoch": 0.3937887290474711, "grad_norm": 6.849382124331771, "learning_rate": 6.913079581074293e-06, "loss": 17.9869, "step": 21543 }, { "epoch": 0.3938070082439176, "grad_norm": 5.594692598085732, "learning_rate": 6.91280609044309e-06, "loss": 17.0854, "step": 21544 }, { "epoch": 0.3938252874403641, "grad_norm": 5.758206562655694, "learning_rate": 6.912532593107667e-06, "loss": 17.2974, "step": 21545 }, { "epoch": 0.39384356663681064, "grad_norm": 5.677117188934752, "learning_rate": 6.912259089068984e-06, "loss": 17.1663, "step": 21546 }, { "epoch": 0.3938618458332572, "grad_norm": 6.861772832897799, "learning_rate": 6.911985578327999e-06, "loss": 17.7252, "step": 21547 }, { "epoch": 0.3938801250297037, "grad_norm": 7.841222096859772, "learning_rate": 6.911712060885672e-06, "loss": 17.629, "step": 21548 }, { "epoch": 0.39389840422615024, "grad_norm": 7.624450085180011, "learning_rate": 6.9114385367429585e-06, "loss": 18.033, "step": 21549 }, { "epoch": 0.3939166834225967, "grad_norm": 7.868388483174876, "learning_rate": 6.911165005900817e-06, "loss": 18.2352, "step": 21550 }, { "epoch": 0.39393496261904326, "grad_norm": 7.593997323363449, "learning_rate": 6.91089146836021e-06, "loss": 18.1603, "step": 21551 }, { "epoch": 0.3939532418154898, "grad_norm": 9.515579700243576, "learning_rate": 6.910617924122094e-06, "loss": 17.5219, "step": 21552 }, { "epoch": 0.39397152101193633, "grad_norm": 6.462431585095287, "learning_rate": 6.9103443731874286e-06, "loss": 17.7226, "step": 21553 }, { "epoch": 0.3939898002083828, "grad_norm": 6.071070491291092, "learning_rate": 6.9100708155571705e-06, "loss": 17.3667, "step": 21554 }, { "epoch": 0.39400807940482935, "grad_norm": 7.148145689259566, "learning_rate": 6.909797251232282e-06, "loss": 17.588, "step": 21555 }, { "epoch": 0.3940263586012759, "grad_norm": 6.939347550435989, "learning_rate": 6.9095236802137174e-06, "loss": 17.5267, "step": 21556 }, { "epoch": 0.3940446377977224, "grad_norm": 5.705199918354399, "learning_rate": 6.909250102502439e-06, "loss": 17.3792, "step": 21557 }, { "epoch": 0.39406291699416895, "grad_norm": 7.28107304514705, "learning_rate": 6.908976518099405e-06, "loss": 18.0734, "step": 21558 }, { "epoch": 0.39408119619061543, "grad_norm": 8.205423068008912, "learning_rate": 6.908702927005574e-06, "loss": 18.3236, "step": 21559 }, { "epoch": 0.39409947538706197, "grad_norm": 6.7778303874948715, "learning_rate": 6.9084293292219055e-06, "loss": 17.8698, "step": 21560 }, { "epoch": 0.3941177545835085, "grad_norm": 5.884320065261258, "learning_rate": 6.908155724749357e-06, "loss": 17.2822, "step": 21561 }, { "epoch": 0.39413603377995504, "grad_norm": 6.104454965401443, "learning_rate": 6.907882113588889e-06, "loss": 17.4723, "step": 21562 }, { "epoch": 0.3941543129764016, "grad_norm": 5.783308774319683, "learning_rate": 6.907608495741458e-06, "loss": 17.4832, "step": 21563 }, { "epoch": 0.39417259217284806, "grad_norm": 6.2913806012946605, "learning_rate": 6.907334871208024e-06, "loss": 17.5201, "step": 21564 }, { "epoch": 0.3941908713692946, "grad_norm": 5.327093358403174, "learning_rate": 6.907061239989551e-06, "loss": 16.931, "step": 21565 }, { "epoch": 0.39420915056574113, "grad_norm": 5.1719985979508865, "learning_rate": 6.9067876020869905e-06, "loss": 17.1255, "step": 21566 }, { "epoch": 0.39422742976218766, "grad_norm": 6.54416253618085, "learning_rate": 6.906513957501306e-06, "loss": 17.6631, "step": 21567 }, { "epoch": 0.3942457089586342, "grad_norm": 6.253670207973528, "learning_rate": 6.9062403062334545e-06, "loss": 17.1095, "step": 21568 }, { "epoch": 0.3942639881550807, "grad_norm": 7.244370332448828, "learning_rate": 6.905966648284398e-06, "loss": 17.801, "step": 21569 }, { "epoch": 0.3942822673515272, "grad_norm": 6.038511238306077, "learning_rate": 6.905692983655092e-06, "loss": 17.4237, "step": 21570 }, { "epoch": 0.39430054654797375, "grad_norm": 7.0794140402264265, "learning_rate": 6.905419312346499e-06, "loss": 17.909, "step": 21571 }, { "epoch": 0.3943188257444203, "grad_norm": 6.083436443068745, "learning_rate": 6.905145634359576e-06, "loss": 17.4752, "step": 21572 }, { "epoch": 0.3943371049408668, "grad_norm": 7.731608014735897, "learning_rate": 6.904871949695282e-06, "loss": 18.1243, "step": 21573 }, { "epoch": 0.3943553841373133, "grad_norm": 6.637765956525025, "learning_rate": 6.9045982583545775e-06, "loss": 17.7536, "step": 21574 }, { "epoch": 0.39437366333375984, "grad_norm": 6.532880660508335, "learning_rate": 6.904324560338422e-06, "loss": 17.4753, "step": 21575 }, { "epoch": 0.3943919425302064, "grad_norm": 6.107409359039475, "learning_rate": 6.904050855647775e-06, "loss": 17.5301, "step": 21576 }, { "epoch": 0.3944102217266529, "grad_norm": 5.78441930579743, "learning_rate": 6.903777144283593e-06, "loss": 17.3339, "step": 21577 }, { "epoch": 0.39442850092309945, "grad_norm": 5.36158213128983, "learning_rate": 6.9035034262468385e-06, "loss": 17.2054, "step": 21578 }, { "epoch": 0.3944467801195459, "grad_norm": 7.98591769113503, "learning_rate": 6.903229701538469e-06, "loss": 17.8269, "step": 21579 }, { "epoch": 0.39446505931599246, "grad_norm": 6.972481701306595, "learning_rate": 6.902955970159446e-06, "loss": 17.9411, "step": 21580 }, { "epoch": 0.394483338512439, "grad_norm": 7.447622898864084, "learning_rate": 6.902682232110727e-06, "loss": 17.9744, "step": 21581 }, { "epoch": 0.39450161770888553, "grad_norm": 6.104928288760058, "learning_rate": 6.90240848739327e-06, "loss": 17.5479, "step": 21582 }, { "epoch": 0.39451989690533207, "grad_norm": 6.141051315020905, "learning_rate": 6.9021347360080385e-06, "loss": 17.4445, "step": 21583 }, { "epoch": 0.39453817610177855, "grad_norm": 5.794997374883734, "learning_rate": 6.901860977955989e-06, "loss": 17.3963, "step": 21584 }, { "epoch": 0.3945564552982251, "grad_norm": 6.731518293898058, "learning_rate": 6.901587213238081e-06, "loss": 17.715, "step": 21585 }, { "epoch": 0.3945747344946716, "grad_norm": 7.191380738474616, "learning_rate": 6.901313441855275e-06, "loss": 17.5182, "step": 21586 }, { "epoch": 0.39459301369111816, "grad_norm": 6.365568178045943, "learning_rate": 6.901039663808531e-06, "loss": 17.5003, "step": 21587 }, { "epoch": 0.39461129288756464, "grad_norm": 7.978823305473955, "learning_rate": 6.9007658790988086e-06, "loss": 17.7458, "step": 21588 }, { "epoch": 0.39462957208401117, "grad_norm": 6.411446773921178, "learning_rate": 6.900492087727065e-06, "loss": 17.4705, "step": 21589 }, { "epoch": 0.3946478512804577, "grad_norm": 7.511496973229927, "learning_rate": 6.900218289694262e-06, "loss": 18.0328, "step": 21590 }, { "epoch": 0.39466613047690424, "grad_norm": 6.437665494260655, "learning_rate": 6.8999444850013604e-06, "loss": 17.6575, "step": 21591 }, { "epoch": 0.3946844096733508, "grad_norm": 9.144353950375423, "learning_rate": 6.899670673649317e-06, "loss": 18.3534, "step": 21592 }, { "epoch": 0.39470268886979726, "grad_norm": 6.14218081702421, "learning_rate": 6.8993968556390945e-06, "loss": 17.2321, "step": 21593 }, { "epoch": 0.3947209680662438, "grad_norm": 5.523745245072925, "learning_rate": 6.899123030971648e-06, "loss": 16.9665, "step": 21594 }, { "epoch": 0.39473924726269033, "grad_norm": 5.152206424640226, "learning_rate": 6.8988491996479414e-06, "loss": 17.0409, "step": 21595 }, { "epoch": 0.39475752645913686, "grad_norm": 6.574388933840838, "learning_rate": 6.8985753616689335e-06, "loss": 17.6122, "step": 21596 }, { "epoch": 0.3947758056555834, "grad_norm": 5.76128631986071, "learning_rate": 6.898301517035584e-06, "loss": 17.3483, "step": 21597 }, { "epoch": 0.3947940848520299, "grad_norm": 6.052356447756394, "learning_rate": 6.8980276657488505e-06, "loss": 17.416, "step": 21598 }, { "epoch": 0.3948123640484764, "grad_norm": 6.493268264946741, "learning_rate": 6.897753807809696e-06, "loss": 17.8602, "step": 21599 }, { "epoch": 0.39483064324492295, "grad_norm": 6.095529945679339, "learning_rate": 6.897479943219079e-06, "loss": 17.2012, "step": 21600 }, { "epoch": 0.3948489224413695, "grad_norm": 6.111414826188851, "learning_rate": 6.89720607197796e-06, "loss": 17.449, "step": 21601 }, { "epoch": 0.394867201637816, "grad_norm": 7.318563016598272, "learning_rate": 6.896932194087298e-06, "loss": 17.6215, "step": 21602 }, { "epoch": 0.3948854808342625, "grad_norm": 6.858298435815988, "learning_rate": 6.896658309548053e-06, "loss": 17.624, "step": 21603 }, { "epoch": 0.39490376003070904, "grad_norm": 6.4539344725539065, "learning_rate": 6.896384418361185e-06, "loss": 17.7222, "step": 21604 }, { "epoch": 0.3949220392271556, "grad_norm": 6.983623354502389, "learning_rate": 6.896110520527655e-06, "loss": 17.5526, "step": 21605 }, { "epoch": 0.3949403184236021, "grad_norm": 8.105170382444221, "learning_rate": 6.8958366160484214e-06, "loss": 18.1537, "step": 21606 }, { "epoch": 0.39495859762004865, "grad_norm": 6.996364276421232, "learning_rate": 6.895562704924446e-06, "loss": 18.1652, "step": 21607 }, { "epoch": 0.3949768768164951, "grad_norm": 6.259866267692265, "learning_rate": 6.895288787156687e-06, "loss": 17.491, "step": 21608 }, { "epoch": 0.39499515601294166, "grad_norm": 5.608185112770575, "learning_rate": 6.895014862746103e-06, "loss": 17.0852, "step": 21609 }, { "epoch": 0.3950134352093882, "grad_norm": 7.386344062264315, "learning_rate": 6.89474093169366e-06, "loss": 17.7781, "step": 21610 }, { "epoch": 0.39503171440583473, "grad_norm": 7.345399211896186, "learning_rate": 6.894466994000313e-06, "loss": 18.0912, "step": 21611 }, { "epoch": 0.39504999360228127, "grad_norm": 5.4158628072361, "learning_rate": 6.894193049667024e-06, "loss": 17.2701, "step": 21612 }, { "epoch": 0.39506827279872775, "grad_norm": 5.333877593585367, "learning_rate": 6.893919098694752e-06, "loss": 17.0123, "step": 21613 }, { "epoch": 0.3950865519951743, "grad_norm": 8.665147609340739, "learning_rate": 6.893645141084458e-06, "loss": 18.3743, "step": 21614 }, { "epoch": 0.3951048311916208, "grad_norm": 7.752494504262045, "learning_rate": 6.893371176837103e-06, "loss": 18.1383, "step": 21615 }, { "epoch": 0.39512311038806736, "grad_norm": 5.804033550700176, "learning_rate": 6.893097205953645e-06, "loss": 17.3385, "step": 21616 }, { "epoch": 0.3951413895845139, "grad_norm": 5.224038404367742, "learning_rate": 6.8928232284350474e-06, "loss": 16.9664, "step": 21617 }, { "epoch": 0.39515966878096037, "grad_norm": 6.568917726489629, "learning_rate": 6.892549244282267e-06, "loss": 17.9612, "step": 21618 }, { "epoch": 0.3951779479774069, "grad_norm": 5.9351522721466585, "learning_rate": 6.8922752534962656e-06, "loss": 17.5031, "step": 21619 }, { "epoch": 0.39519622717385344, "grad_norm": 6.6544735398237975, "learning_rate": 6.892001256078005e-06, "loss": 17.6356, "step": 21620 }, { "epoch": 0.3952145063703, "grad_norm": 7.363304186122785, "learning_rate": 6.891727252028444e-06, "loss": 17.6523, "step": 21621 }, { "epoch": 0.39523278556674646, "grad_norm": 6.160753133854578, "learning_rate": 6.8914532413485415e-06, "loss": 17.2715, "step": 21622 }, { "epoch": 0.395251064763193, "grad_norm": 7.278316791103219, "learning_rate": 6.89117922403926e-06, "loss": 17.9876, "step": 21623 }, { "epoch": 0.39526934395963953, "grad_norm": 6.2495401952620275, "learning_rate": 6.8909052001015616e-06, "loss": 17.3047, "step": 21624 }, { "epoch": 0.39528762315608607, "grad_norm": 8.317036432905699, "learning_rate": 6.8906311695364035e-06, "loss": 18.1389, "step": 21625 }, { "epoch": 0.3953059023525326, "grad_norm": 6.982178598956792, "learning_rate": 6.890357132344746e-06, "loss": 17.6884, "step": 21626 }, { "epoch": 0.3953241815489791, "grad_norm": 7.886219479599872, "learning_rate": 6.890083088527551e-06, "loss": 17.9141, "step": 21627 }, { "epoch": 0.3953424607454256, "grad_norm": 6.750399332128435, "learning_rate": 6.8898090380857795e-06, "loss": 17.7167, "step": 21628 }, { "epoch": 0.39536073994187215, "grad_norm": 5.841837478245158, "learning_rate": 6.889534981020392e-06, "loss": 17.3937, "step": 21629 }, { "epoch": 0.3953790191383187, "grad_norm": 5.332917163119414, "learning_rate": 6.889260917332347e-06, "loss": 17.2659, "step": 21630 }, { "epoch": 0.3953972983347652, "grad_norm": 6.827078039101366, "learning_rate": 6.888986847022607e-06, "loss": 17.7501, "step": 21631 }, { "epoch": 0.3954155775312117, "grad_norm": 6.815880046059335, "learning_rate": 6.888712770092132e-06, "loss": 17.9065, "step": 21632 }, { "epoch": 0.39543385672765824, "grad_norm": 5.806367709665783, "learning_rate": 6.8884386865418825e-06, "loss": 17.2579, "step": 21633 }, { "epoch": 0.3954521359241048, "grad_norm": 6.373527468823904, "learning_rate": 6.888164596372819e-06, "loss": 17.1608, "step": 21634 }, { "epoch": 0.3954704151205513, "grad_norm": 7.91456275985367, "learning_rate": 6.887890499585903e-06, "loss": 18.3949, "step": 21635 }, { "epoch": 0.39548869431699785, "grad_norm": 6.545420292366348, "learning_rate": 6.887616396182094e-06, "loss": 17.4711, "step": 21636 }, { "epoch": 0.3955069735134443, "grad_norm": 6.150967724446916, "learning_rate": 6.887342286162354e-06, "loss": 17.3862, "step": 21637 }, { "epoch": 0.39552525270989086, "grad_norm": 6.086142113223638, "learning_rate": 6.887068169527643e-06, "loss": 17.2927, "step": 21638 }, { "epoch": 0.3955435319063374, "grad_norm": 6.752372891295724, "learning_rate": 6.886794046278922e-06, "loss": 17.7975, "step": 21639 }, { "epoch": 0.39556181110278393, "grad_norm": 5.734532996593663, "learning_rate": 6.886519916417152e-06, "loss": 17.3136, "step": 21640 }, { "epoch": 0.39558009029923047, "grad_norm": 6.179620272208543, "learning_rate": 6.886245779943291e-06, "loss": 17.3563, "step": 21641 }, { "epoch": 0.39559836949567695, "grad_norm": 6.746739964031116, "learning_rate": 6.885971636858304e-06, "loss": 17.4968, "step": 21642 }, { "epoch": 0.3956166486921235, "grad_norm": 5.582131068210862, "learning_rate": 6.88569748716315e-06, "loss": 17.0617, "step": 21643 }, { "epoch": 0.39563492788857, "grad_norm": 7.099942387867566, "learning_rate": 6.8854233308587905e-06, "loss": 17.7158, "step": 21644 }, { "epoch": 0.39565320708501656, "grad_norm": 7.8912448083419, "learning_rate": 6.885149167946185e-06, "loss": 18.0542, "step": 21645 }, { "epoch": 0.3956714862814631, "grad_norm": 5.404480522763143, "learning_rate": 6.884874998426296e-06, "loss": 16.9571, "step": 21646 }, { "epoch": 0.39568976547790957, "grad_norm": 6.247336627791287, "learning_rate": 6.8846008223000825e-06, "loss": 17.5024, "step": 21647 }, { "epoch": 0.3957080446743561, "grad_norm": 7.941220192674957, "learning_rate": 6.884326639568508e-06, "loss": 17.8973, "step": 21648 }, { "epoch": 0.39572632387080264, "grad_norm": 6.379053972758212, "learning_rate": 6.88405245023253e-06, "loss": 17.4956, "step": 21649 }, { "epoch": 0.3957446030672492, "grad_norm": 8.387081659392715, "learning_rate": 6.883778254293113e-06, "loss": 18.1086, "step": 21650 }, { "epoch": 0.3957628822636957, "grad_norm": 7.3948425362543375, "learning_rate": 6.883504051751217e-06, "loss": 17.8278, "step": 21651 }, { "epoch": 0.3957811614601422, "grad_norm": 8.063472087585946, "learning_rate": 6.883229842607804e-06, "loss": 18.4844, "step": 21652 }, { "epoch": 0.39579944065658873, "grad_norm": 6.357610352214988, "learning_rate": 6.882955626863832e-06, "loss": 17.4236, "step": 21653 }, { "epoch": 0.39581771985303527, "grad_norm": 5.873895105905679, "learning_rate": 6.882681404520263e-06, "loss": 17.1778, "step": 21654 }, { "epoch": 0.3958359990494818, "grad_norm": 7.428820280037442, "learning_rate": 6.882407175578061e-06, "loss": 18.0671, "step": 21655 }, { "epoch": 0.3958542782459283, "grad_norm": 7.706670152117786, "learning_rate": 6.8821329400381844e-06, "loss": 17.9579, "step": 21656 }, { "epoch": 0.3958725574423748, "grad_norm": 7.5884015572154855, "learning_rate": 6.881858697901596e-06, "loss": 17.7861, "step": 21657 }, { "epoch": 0.39589083663882135, "grad_norm": 7.288277868186172, "learning_rate": 6.881584449169254e-06, "loss": 18.0603, "step": 21658 }, { "epoch": 0.3959091158352679, "grad_norm": 5.498976681355198, "learning_rate": 6.881310193842123e-06, "loss": 17.179, "step": 21659 }, { "epoch": 0.3959273950317144, "grad_norm": 6.68694624285585, "learning_rate": 6.881035931921164e-06, "loss": 17.604, "step": 21660 }, { "epoch": 0.3959456742281609, "grad_norm": 5.453455096321566, "learning_rate": 6.880761663407336e-06, "loss": 17.2405, "step": 21661 }, { "epoch": 0.39596395342460744, "grad_norm": 6.035074401662799, "learning_rate": 6.880487388301603e-06, "loss": 17.5, "step": 21662 }, { "epoch": 0.395982232621054, "grad_norm": 9.092492814031782, "learning_rate": 6.880213106604923e-06, "loss": 17.4662, "step": 21663 }, { "epoch": 0.3960005118175005, "grad_norm": 5.664062192085136, "learning_rate": 6.87993881831826e-06, "loss": 17.2395, "step": 21664 }, { "epoch": 0.39601879101394705, "grad_norm": 7.052725853189261, "learning_rate": 6.879664523442575e-06, "loss": 17.7064, "step": 21665 }, { "epoch": 0.3960370702103935, "grad_norm": 7.638767286611265, "learning_rate": 6.87939022197883e-06, "loss": 18.2496, "step": 21666 }, { "epoch": 0.39605534940684006, "grad_norm": 5.9105272706982594, "learning_rate": 6.879115913927984e-06, "loss": 17.2275, "step": 21667 }, { "epoch": 0.3960736286032866, "grad_norm": 5.679793639716042, "learning_rate": 6.878841599290998e-06, "loss": 17.4539, "step": 21668 }, { "epoch": 0.39609190779973313, "grad_norm": 6.795324756701742, "learning_rate": 6.878567278068838e-06, "loss": 17.7713, "step": 21669 }, { "epoch": 0.39611018699617967, "grad_norm": 5.698620339964075, "learning_rate": 6.8782929502624615e-06, "loss": 17.2789, "step": 21670 }, { "epoch": 0.39612846619262615, "grad_norm": 6.147221977014783, "learning_rate": 6.878018615872832e-06, "loss": 17.3817, "step": 21671 }, { "epoch": 0.3961467453890727, "grad_norm": 6.531815043610964, "learning_rate": 6.877744274900911e-06, "loss": 17.4807, "step": 21672 }, { "epoch": 0.3961650245855192, "grad_norm": 6.115114026166975, "learning_rate": 6.8774699273476576e-06, "loss": 17.2749, "step": 21673 }, { "epoch": 0.39618330378196576, "grad_norm": 7.012860703434272, "learning_rate": 6.877195573214035e-06, "loss": 17.7193, "step": 21674 }, { "epoch": 0.3962015829784123, "grad_norm": 6.41914177435871, "learning_rate": 6.876921212501004e-06, "loss": 17.3767, "step": 21675 }, { "epoch": 0.3962198621748588, "grad_norm": 6.946961562007794, "learning_rate": 6.876646845209529e-06, "loss": 17.9227, "step": 21676 }, { "epoch": 0.3962381413713053, "grad_norm": 6.488254765236719, "learning_rate": 6.876372471340569e-06, "loss": 17.7707, "step": 21677 }, { "epoch": 0.39625642056775184, "grad_norm": 7.564070027824658, "learning_rate": 6.876098090895086e-06, "loss": 18.2453, "step": 21678 }, { "epoch": 0.3962746997641984, "grad_norm": 6.780838371917999, "learning_rate": 6.875823703874043e-06, "loss": 17.5928, "step": 21679 }, { "epoch": 0.3962929789606449, "grad_norm": 7.090326527895478, "learning_rate": 6.8755493102784e-06, "loss": 17.6829, "step": 21680 }, { "epoch": 0.3963112581570914, "grad_norm": 6.033272197405425, "learning_rate": 6.875274910109117e-06, "loss": 17.3839, "step": 21681 }, { "epoch": 0.39632953735353793, "grad_norm": 5.981024208762855, "learning_rate": 6.875000503367162e-06, "loss": 17.2835, "step": 21682 }, { "epoch": 0.39634781654998447, "grad_norm": 6.067834244801076, "learning_rate": 6.874726090053492e-06, "loss": 17.361, "step": 21683 }, { "epoch": 0.396366095746431, "grad_norm": 24.24281158832486, "learning_rate": 6.874451670169069e-06, "loss": 17.6624, "step": 21684 }, { "epoch": 0.39638437494287754, "grad_norm": 5.5553463901806985, "learning_rate": 6.874177243714856e-06, "loss": 17.2165, "step": 21685 }, { "epoch": 0.396402654139324, "grad_norm": 6.062006121777724, "learning_rate": 6.8739028106918135e-06, "loss": 17.4607, "step": 21686 }, { "epoch": 0.39642093333577055, "grad_norm": 7.17781354196199, "learning_rate": 6.873628371100906e-06, "loss": 18.1326, "step": 21687 }, { "epoch": 0.3964392125322171, "grad_norm": 7.301042531092424, "learning_rate": 6.873353924943091e-06, "loss": 17.8408, "step": 21688 }, { "epoch": 0.3964574917286636, "grad_norm": 7.357280041350993, "learning_rate": 6.873079472219335e-06, "loss": 18.2311, "step": 21689 }, { "epoch": 0.3964757709251101, "grad_norm": 5.448759267322615, "learning_rate": 6.8728050129305975e-06, "loss": 17.1483, "step": 21690 }, { "epoch": 0.39649405012155664, "grad_norm": 5.594364101224168, "learning_rate": 6.872530547077841e-06, "loss": 17.3468, "step": 21691 }, { "epoch": 0.3965123293180032, "grad_norm": 5.505243172890195, "learning_rate": 6.872256074662027e-06, "loss": 17.0655, "step": 21692 }, { "epoch": 0.3965306085144497, "grad_norm": 5.557656220302946, "learning_rate": 6.8719815956841196e-06, "loss": 17.3279, "step": 21693 }, { "epoch": 0.39654888771089625, "grad_norm": 6.88965028193292, "learning_rate": 6.8717071101450785e-06, "loss": 17.6607, "step": 21694 }, { "epoch": 0.3965671669073427, "grad_norm": 5.57957657299959, "learning_rate": 6.871432618045864e-06, "loss": 17.1442, "step": 21695 }, { "epoch": 0.39658544610378926, "grad_norm": 6.508591444589206, "learning_rate": 6.871158119387442e-06, "loss": 17.5329, "step": 21696 }, { "epoch": 0.3966037253002358, "grad_norm": 7.373474480849557, "learning_rate": 6.870883614170774e-06, "loss": 17.3962, "step": 21697 }, { "epoch": 0.39662200449668233, "grad_norm": 5.911328579089407, "learning_rate": 6.8706091023968215e-06, "loss": 17.3305, "step": 21698 }, { "epoch": 0.39664028369312887, "grad_norm": 5.494601161598914, "learning_rate": 6.870334584066546e-06, "loss": 17.2719, "step": 21699 }, { "epoch": 0.39665856288957535, "grad_norm": 7.0876504129624385, "learning_rate": 6.87006005918091e-06, "loss": 17.8883, "step": 21700 }, { "epoch": 0.3966768420860219, "grad_norm": 6.69949522211074, "learning_rate": 6.8697855277408756e-06, "loss": 17.6359, "step": 21701 }, { "epoch": 0.3966951212824684, "grad_norm": 7.254559317539576, "learning_rate": 6.869510989747404e-06, "loss": 17.7826, "step": 21702 }, { "epoch": 0.39671340047891496, "grad_norm": 7.387419531232121, "learning_rate": 6.869236445201462e-06, "loss": 17.9149, "step": 21703 }, { "epoch": 0.3967316796753615, "grad_norm": 6.903547161426229, "learning_rate": 6.8689618941040045e-06, "loss": 17.7069, "step": 21704 }, { "epoch": 0.396749958871808, "grad_norm": 6.6084817588078755, "learning_rate": 6.868687336455999e-06, "loss": 17.6637, "step": 21705 }, { "epoch": 0.3967682380682545, "grad_norm": 6.407653100612558, "learning_rate": 6.868412772258407e-06, "loss": 17.4011, "step": 21706 }, { "epoch": 0.39678651726470104, "grad_norm": 6.483539091650716, "learning_rate": 6.868138201512191e-06, "loss": 17.5027, "step": 21707 }, { "epoch": 0.3968047964611476, "grad_norm": 6.656166325296128, "learning_rate": 6.867863624218313e-06, "loss": 17.9308, "step": 21708 }, { "epoch": 0.3968230756575941, "grad_norm": 6.37100398672029, "learning_rate": 6.867589040377734e-06, "loss": 17.5063, "step": 21709 }, { "epoch": 0.3968413548540406, "grad_norm": 6.419081622356355, "learning_rate": 6.867314449991418e-06, "loss": 17.6277, "step": 21710 }, { "epoch": 0.39685963405048713, "grad_norm": 7.351393233610686, "learning_rate": 6.867039853060326e-06, "loss": 17.8415, "step": 21711 }, { "epoch": 0.39687791324693367, "grad_norm": 8.340268986995127, "learning_rate": 6.866765249585422e-06, "loss": 18.6467, "step": 21712 }, { "epoch": 0.3968961924433802, "grad_norm": 5.347399172394467, "learning_rate": 6.866490639567667e-06, "loss": 16.9904, "step": 21713 }, { "epoch": 0.39691447163982674, "grad_norm": 6.333002298780255, "learning_rate": 6.8662160230080254e-06, "loss": 17.5916, "step": 21714 }, { "epoch": 0.3969327508362732, "grad_norm": 6.659012769856985, "learning_rate": 6.8659413999074574e-06, "loss": 17.5251, "step": 21715 }, { "epoch": 0.39695103003271975, "grad_norm": 6.221801363568825, "learning_rate": 6.865666770266928e-06, "loss": 17.5295, "step": 21716 }, { "epoch": 0.3969693092291663, "grad_norm": 7.381124587032561, "learning_rate": 6.865392134087398e-06, "loss": 17.8972, "step": 21717 }, { "epoch": 0.3969875884256128, "grad_norm": 7.784792840951576, "learning_rate": 6.86511749136983e-06, "loss": 18.134, "step": 21718 }, { "epoch": 0.39700586762205936, "grad_norm": 5.986191625320928, "learning_rate": 6.864842842115187e-06, "loss": 17.208, "step": 21719 }, { "epoch": 0.39702414681850584, "grad_norm": 6.727676440293558, "learning_rate": 6.864568186324432e-06, "loss": 17.936, "step": 21720 }, { "epoch": 0.3970424260149524, "grad_norm": 5.727698637854074, "learning_rate": 6.864293523998529e-06, "loss": 17.2057, "step": 21721 }, { "epoch": 0.3970607052113989, "grad_norm": 6.279105869482758, "learning_rate": 6.864018855138436e-06, "loss": 17.5633, "step": 21722 }, { "epoch": 0.39707898440784545, "grad_norm": 6.476167616880967, "learning_rate": 6.86374417974512e-06, "loss": 17.5121, "step": 21723 }, { "epoch": 0.39709726360429193, "grad_norm": 7.9602930994660746, "learning_rate": 6.8634694978195436e-06, "loss": 18.1208, "step": 21724 }, { "epoch": 0.39711554280073846, "grad_norm": 7.379102487241958, "learning_rate": 6.863194809362666e-06, "loss": 17.898, "step": 21725 }, { "epoch": 0.397133821997185, "grad_norm": 8.96666518078975, "learning_rate": 6.862920114375453e-06, "loss": 18.7539, "step": 21726 }, { "epoch": 0.39715210119363153, "grad_norm": 6.3219931753442, "learning_rate": 6.862645412858867e-06, "loss": 17.4428, "step": 21727 }, { "epoch": 0.39717038039007807, "grad_norm": 6.353760273395351, "learning_rate": 6.862370704813871e-06, "loss": 17.3138, "step": 21728 }, { "epoch": 0.39718865958652455, "grad_norm": 5.657058150834073, "learning_rate": 6.862095990241426e-06, "loss": 17.1173, "step": 21729 }, { "epoch": 0.3972069387829711, "grad_norm": 6.049000545032745, "learning_rate": 6.861821269142498e-06, "loss": 17.4315, "step": 21730 }, { "epoch": 0.3972252179794176, "grad_norm": 8.969556896900917, "learning_rate": 6.8615465415180475e-06, "loss": 18.502, "step": 21731 }, { "epoch": 0.39724349717586416, "grad_norm": 6.173650364088976, "learning_rate": 6.861271807369037e-06, "loss": 17.4822, "step": 21732 }, { "epoch": 0.3972617763723107, "grad_norm": 6.580220131505144, "learning_rate": 6.86099706669643e-06, "loss": 17.5246, "step": 21733 }, { "epoch": 0.3972800555687572, "grad_norm": 6.1567155291020494, "learning_rate": 6.8607223195011915e-06, "loss": 17.4647, "step": 21734 }, { "epoch": 0.3972983347652037, "grad_norm": 7.324932987615221, "learning_rate": 6.860447565784281e-06, "loss": 18.156, "step": 21735 }, { "epoch": 0.39731661396165024, "grad_norm": 8.103877960545196, "learning_rate": 6.8601728055466635e-06, "loss": 18.2347, "step": 21736 }, { "epoch": 0.3973348931580968, "grad_norm": 6.70610024045635, "learning_rate": 6.859898038789301e-06, "loss": 17.7807, "step": 21737 }, { "epoch": 0.3973531723545433, "grad_norm": 5.7562338435786415, "learning_rate": 6.85962326551316e-06, "loss": 17.2446, "step": 21738 }, { "epoch": 0.3973714515509898, "grad_norm": 7.755936289396021, "learning_rate": 6.8593484857192e-06, "loss": 17.9505, "step": 21739 }, { "epoch": 0.39738973074743633, "grad_norm": 6.438809062253919, "learning_rate": 6.859073699408383e-06, "loss": 17.6234, "step": 21740 }, { "epoch": 0.39740800994388287, "grad_norm": 6.267715065425261, "learning_rate": 6.858798906581675e-06, "loss": 17.6766, "step": 21741 }, { "epoch": 0.3974262891403294, "grad_norm": 7.4280055700738306, "learning_rate": 6.858524107240039e-06, "loss": 17.7466, "step": 21742 }, { "epoch": 0.39744456833677594, "grad_norm": 6.236803296639654, "learning_rate": 6.858249301384437e-06, "loss": 17.3597, "step": 21743 }, { "epoch": 0.3974628475332224, "grad_norm": 6.489969263565319, "learning_rate": 6.8579744890158305e-06, "loss": 17.6892, "step": 21744 }, { "epoch": 0.39748112672966895, "grad_norm": 5.842214122340492, "learning_rate": 6.857699670135186e-06, "loss": 17.2405, "step": 21745 }, { "epoch": 0.3974994059261155, "grad_norm": 7.86180178100099, "learning_rate": 6.857424844743465e-06, "loss": 18.4659, "step": 21746 }, { "epoch": 0.397517685122562, "grad_norm": 7.684834211311984, "learning_rate": 6.857150012841633e-06, "loss": 18.1623, "step": 21747 }, { "epoch": 0.39753596431900856, "grad_norm": 4.927592761924374, "learning_rate": 6.8568751744306505e-06, "loss": 16.9394, "step": 21748 }, { "epoch": 0.39755424351545504, "grad_norm": 6.420163071271816, "learning_rate": 6.85660032951148e-06, "loss": 17.5458, "step": 21749 }, { "epoch": 0.3975725227119016, "grad_norm": 6.5515169109406415, "learning_rate": 6.856325478085087e-06, "loss": 17.6797, "step": 21750 }, { "epoch": 0.3975908019083481, "grad_norm": 6.616783364994676, "learning_rate": 6.856050620152435e-06, "loss": 17.6536, "step": 21751 }, { "epoch": 0.39760908110479465, "grad_norm": 6.223348051198621, "learning_rate": 6.8557757557144874e-06, "loss": 17.3968, "step": 21752 }, { "epoch": 0.3976273603012412, "grad_norm": 6.569699690737651, "learning_rate": 6.855500884772206e-06, "loss": 17.5949, "step": 21753 }, { "epoch": 0.39764563949768766, "grad_norm": 6.85853425702792, "learning_rate": 6.855226007326554e-06, "loss": 17.9053, "step": 21754 }, { "epoch": 0.3976639186941342, "grad_norm": 6.80916540211321, "learning_rate": 6.854951123378497e-06, "loss": 17.6419, "step": 21755 }, { "epoch": 0.39768219789058074, "grad_norm": 5.705231847810381, "learning_rate": 6.854676232928997e-06, "loss": 17.3183, "step": 21756 }, { "epoch": 0.39770047708702727, "grad_norm": 6.397163156391924, "learning_rate": 6.854401335979019e-06, "loss": 17.4293, "step": 21757 }, { "epoch": 0.39771875628347375, "grad_norm": 7.026852961160377, "learning_rate": 6.854126432529523e-06, "loss": 17.9778, "step": 21758 }, { "epoch": 0.3977370354799203, "grad_norm": 6.489656750077679, "learning_rate": 6.853851522581476e-06, "loss": 17.6702, "step": 21759 }, { "epoch": 0.3977553146763668, "grad_norm": 5.046174196276279, "learning_rate": 6.85357660613584e-06, "loss": 16.9933, "step": 21760 }, { "epoch": 0.39777359387281336, "grad_norm": 5.231458690561238, "learning_rate": 6.853301683193579e-06, "loss": 17.0756, "step": 21761 }, { "epoch": 0.3977918730692599, "grad_norm": 5.865646842255912, "learning_rate": 6.853026753755656e-06, "loss": 17.4636, "step": 21762 }, { "epoch": 0.3978101522657064, "grad_norm": 5.577294626414456, "learning_rate": 6.852751817823035e-06, "loss": 17.1527, "step": 21763 }, { "epoch": 0.3978284314621529, "grad_norm": 6.030721027048702, "learning_rate": 6.85247687539668e-06, "loss": 17.4301, "step": 21764 }, { "epoch": 0.39784671065859945, "grad_norm": 5.51079709212565, "learning_rate": 6.8522019264775544e-06, "loss": 17.3503, "step": 21765 }, { "epoch": 0.397864989855046, "grad_norm": 6.2641449921534775, "learning_rate": 6.851926971066623e-06, "loss": 17.3794, "step": 21766 }, { "epoch": 0.3978832690514925, "grad_norm": 6.393330466474008, "learning_rate": 6.851652009164846e-06, "loss": 17.3104, "step": 21767 }, { "epoch": 0.397901548247939, "grad_norm": 5.537558818735297, "learning_rate": 6.851377040773189e-06, "loss": 17.3696, "step": 21768 }, { "epoch": 0.39791982744438553, "grad_norm": 6.64380040405659, "learning_rate": 6.851102065892618e-06, "loss": 17.5596, "step": 21769 }, { "epoch": 0.39793810664083207, "grad_norm": 6.084923088989286, "learning_rate": 6.850827084524094e-06, "loss": 17.4808, "step": 21770 }, { "epoch": 0.3979563858372786, "grad_norm": 6.786046481175895, "learning_rate": 6.850552096668583e-06, "loss": 16.8464, "step": 21771 }, { "epoch": 0.39797466503372514, "grad_norm": 7.921359138492886, "learning_rate": 6.850277102327047e-06, "loss": 17.8138, "step": 21772 }, { "epoch": 0.3979929442301716, "grad_norm": 6.3057469313954275, "learning_rate": 6.8500021015004495e-06, "loss": 17.6966, "step": 21773 }, { "epoch": 0.39801122342661815, "grad_norm": 5.978307516896791, "learning_rate": 6.849727094189755e-06, "loss": 17.4407, "step": 21774 }, { "epoch": 0.3980295026230647, "grad_norm": 5.361668479899914, "learning_rate": 6.849452080395928e-06, "loss": 16.9982, "step": 21775 }, { "epoch": 0.3980477818195112, "grad_norm": 5.436084862090296, "learning_rate": 6.849177060119931e-06, "loss": 17.0593, "step": 21776 }, { "epoch": 0.39806606101595776, "grad_norm": 7.219011796924425, "learning_rate": 6.84890203336273e-06, "loss": 17.4976, "step": 21777 }, { "epoch": 0.39808434021240424, "grad_norm": 6.542009008742553, "learning_rate": 6.848627000125288e-06, "loss": 17.3236, "step": 21778 }, { "epoch": 0.3981026194088508, "grad_norm": 6.471159757398091, "learning_rate": 6.8483519604085695e-06, "loss": 17.8908, "step": 21779 }, { "epoch": 0.3981208986052973, "grad_norm": 8.547507575517846, "learning_rate": 6.848076914213536e-06, "loss": 18.24, "step": 21780 }, { "epoch": 0.39813917780174385, "grad_norm": 5.05408154870597, "learning_rate": 6.847801861541154e-06, "loss": 16.8201, "step": 21781 }, { "epoch": 0.3981574569981904, "grad_norm": 6.572162405922015, "learning_rate": 6.847526802392386e-06, "loss": 17.6547, "step": 21782 }, { "epoch": 0.39817573619463686, "grad_norm": 5.963744775318282, "learning_rate": 6.847251736768199e-06, "loss": 17.384, "step": 21783 }, { "epoch": 0.3981940153910834, "grad_norm": 5.2809965346776515, "learning_rate": 6.846976664669553e-06, "loss": 17.0352, "step": 21784 }, { "epoch": 0.39821229458752994, "grad_norm": 7.468870171175749, "learning_rate": 6.846701586097415e-06, "loss": 17.8976, "step": 21785 }, { "epoch": 0.39823057378397647, "grad_norm": 5.501253658388204, "learning_rate": 6.846426501052748e-06, "loss": 17.2988, "step": 21786 }, { "epoch": 0.398248852980423, "grad_norm": 8.552318000196248, "learning_rate": 6.846151409536516e-06, "loss": 18.2101, "step": 21787 }, { "epoch": 0.3982671321768695, "grad_norm": 5.161676492978516, "learning_rate": 6.845876311549684e-06, "loss": 17.0767, "step": 21788 }, { "epoch": 0.398285411373316, "grad_norm": 6.1466095472310425, "learning_rate": 6.845601207093215e-06, "loss": 17.4433, "step": 21789 }, { "epoch": 0.39830369056976256, "grad_norm": 6.246326853145241, "learning_rate": 6.845326096168074e-06, "loss": 17.5201, "step": 21790 }, { "epoch": 0.3983219697662091, "grad_norm": 7.085188300824559, "learning_rate": 6.8450509787752255e-06, "loss": 18.0179, "step": 21791 }, { "epoch": 0.3983402489626556, "grad_norm": 6.9875426997890075, "learning_rate": 6.844775854915633e-06, "loss": 17.6406, "step": 21792 }, { "epoch": 0.3983585281591021, "grad_norm": 4.239942086341225, "learning_rate": 6.8445007245902625e-06, "loss": 16.7594, "step": 21793 }, { "epoch": 0.39837680735554865, "grad_norm": 6.171533553468372, "learning_rate": 6.844225587800077e-06, "loss": 17.6613, "step": 21794 }, { "epoch": 0.3983950865519952, "grad_norm": 6.560016173062775, "learning_rate": 6.843950444546039e-06, "loss": 17.5954, "step": 21795 }, { "epoch": 0.3984133657484417, "grad_norm": 6.237759203957921, "learning_rate": 6.843675294829115e-06, "loss": 17.7816, "step": 21796 }, { "epoch": 0.3984316449448882, "grad_norm": 7.600823146795452, "learning_rate": 6.843400138650271e-06, "loss": 17.7881, "step": 21797 }, { "epoch": 0.39844992414133473, "grad_norm": 8.109419437101522, "learning_rate": 6.843124976010469e-06, "loss": 17.6862, "step": 21798 }, { "epoch": 0.39846820333778127, "grad_norm": 6.655250879466462, "learning_rate": 6.842849806910672e-06, "loss": 17.8075, "step": 21799 }, { "epoch": 0.3984864825342278, "grad_norm": 5.3213564636102735, "learning_rate": 6.8425746313518485e-06, "loss": 17.0101, "step": 21800 }, { "epoch": 0.39850476173067434, "grad_norm": 8.147329827383535, "learning_rate": 6.842299449334959e-06, "loss": 17.7536, "step": 21801 }, { "epoch": 0.3985230409271208, "grad_norm": 5.804868824451375, "learning_rate": 6.842024260860971e-06, "loss": 17.4383, "step": 21802 }, { "epoch": 0.39854132012356736, "grad_norm": 6.1724928884144665, "learning_rate": 6.841749065930847e-06, "loss": 17.2015, "step": 21803 }, { "epoch": 0.3985595993200139, "grad_norm": 6.715634459644148, "learning_rate": 6.841473864545553e-06, "loss": 17.6493, "step": 21804 }, { "epoch": 0.3985778785164604, "grad_norm": 6.679022305429887, "learning_rate": 6.841198656706053e-06, "loss": 17.8111, "step": 21805 }, { "epoch": 0.39859615771290696, "grad_norm": 7.257315299953628, "learning_rate": 6.840923442413311e-06, "loss": 17.1689, "step": 21806 }, { "epoch": 0.39861443690935344, "grad_norm": 8.573541639910738, "learning_rate": 6.8406482216682934e-06, "loss": 18.0391, "step": 21807 }, { "epoch": 0.3986327161058, "grad_norm": 6.4849479840258555, "learning_rate": 6.840372994471961e-06, "loss": 17.541, "step": 21808 }, { "epoch": 0.3986509953022465, "grad_norm": 8.349189848762084, "learning_rate": 6.8400977608252814e-06, "loss": 18.3481, "step": 21809 }, { "epoch": 0.39866927449869305, "grad_norm": 10.121523300115816, "learning_rate": 6.839822520729221e-06, "loss": 18.6284, "step": 21810 }, { "epoch": 0.3986875536951396, "grad_norm": 7.817159093429263, "learning_rate": 6.839547274184741e-06, "loss": 17.7238, "step": 21811 }, { "epoch": 0.39870583289158606, "grad_norm": 7.011889955934559, "learning_rate": 6.8392720211928075e-06, "loss": 17.7431, "step": 21812 }, { "epoch": 0.3987241120880326, "grad_norm": 6.156123959449216, "learning_rate": 6.838996761754384e-06, "loss": 17.685, "step": 21813 }, { "epoch": 0.39874239128447914, "grad_norm": 6.015380396421697, "learning_rate": 6.838721495870438e-06, "loss": 17.2928, "step": 21814 }, { "epoch": 0.39876067048092567, "grad_norm": 5.2348778933493145, "learning_rate": 6.838446223541933e-06, "loss": 17.0707, "step": 21815 }, { "epoch": 0.3987789496773722, "grad_norm": 5.51539712070171, "learning_rate": 6.838170944769833e-06, "loss": 17.2077, "step": 21816 }, { "epoch": 0.3987972288738187, "grad_norm": 6.562161452212529, "learning_rate": 6.837895659555103e-06, "loss": 17.5551, "step": 21817 }, { "epoch": 0.3988155080702652, "grad_norm": 5.654587977776979, "learning_rate": 6.837620367898708e-06, "loss": 17.2969, "step": 21818 }, { "epoch": 0.39883378726671176, "grad_norm": 7.475892527216948, "learning_rate": 6.837345069801613e-06, "loss": 17.2382, "step": 21819 }, { "epoch": 0.3988520664631583, "grad_norm": 7.995946912514331, "learning_rate": 6.837069765264783e-06, "loss": 18.3482, "step": 21820 }, { "epoch": 0.39887034565960483, "grad_norm": 7.497912190028149, "learning_rate": 6.8367944542891854e-06, "loss": 18.0156, "step": 21821 }, { "epoch": 0.3988886248560513, "grad_norm": 6.730666641172929, "learning_rate": 6.836519136875779e-06, "loss": 17.6704, "step": 21822 }, { "epoch": 0.39890690405249785, "grad_norm": 6.453252477793417, "learning_rate": 6.836243813025534e-06, "loss": 17.8398, "step": 21823 }, { "epoch": 0.3989251832489444, "grad_norm": 6.75137426284157, "learning_rate": 6.835968482739415e-06, "loss": 17.5854, "step": 21824 }, { "epoch": 0.3989434624453909, "grad_norm": 8.37417003563625, "learning_rate": 6.835693146018384e-06, "loss": 18.5165, "step": 21825 }, { "epoch": 0.3989617416418374, "grad_norm": 6.2616493045157124, "learning_rate": 6.8354178028634084e-06, "loss": 17.6568, "step": 21826 }, { "epoch": 0.39898002083828393, "grad_norm": 8.738659647010948, "learning_rate": 6.8351424532754515e-06, "loss": 18.6223, "step": 21827 }, { "epoch": 0.39899830003473047, "grad_norm": 6.4634686275494095, "learning_rate": 6.834867097255482e-06, "loss": 17.3887, "step": 21828 }, { "epoch": 0.399016579231177, "grad_norm": 6.354428864865202, "learning_rate": 6.834591734804461e-06, "loss": 17.5772, "step": 21829 }, { "epoch": 0.39903485842762354, "grad_norm": 7.608988584982392, "learning_rate": 6.834316365923355e-06, "loss": 18.2136, "step": 21830 }, { "epoch": 0.39905313762407, "grad_norm": 5.989923457364409, "learning_rate": 6.834040990613129e-06, "loss": 17.2518, "step": 21831 }, { "epoch": 0.39907141682051656, "grad_norm": 7.747319358810052, "learning_rate": 6.83376560887475e-06, "loss": 18.2965, "step": 21832 }, { "epoch": 0.3990896960169631, "grad_norm": 7.6421741538121095, "learning_rate": 6.8334902207091804e-06, "loss": 17.9195, "step": 21833 }, { "epoch": 0.3991079752134096, "grad_norm": 6.326266384956976, "learning_rate": 6.833214826117387e-06, "loss": 17.6251, "step": 21834 }, { "epoch": 0.39912625440985616, "grad_norm": 5.960369971268348, "learning_rate": 6.832939425100336e-06, "loss": 17.3625, "step": 21835 }, { "epoch": 0.39914453360630264, "grad_norm": 5.766202353274833, "learning_rate": 6.832664017658988e-06, "loss": 17.0969, "step": 21836 }, { "epoch": 0.3991628128027492, "grad_norm": 7.035487552730544, "learning_rate": 6.832388603794315e-06, "loss": 17.8479, "step": 21837 }, { "epoch": 0.3991810919991957, "grad_norm": 5.930590917364918, "learning_rate": 6.832113183507278e-06, "loss": 17.346, "step": 21838 }, { "epoch": 0.39919937119564225, "grad_norm": 5.197521251504096, "learning_rate": 6.831837756798842e-06, "loss": 16.8392, "step": 21839 }, { "epoch": 0.3992176503920888, "grad_norm": 5.994831294123699, "learning_rate": 6.831562323669976e-06, "loss": 17.6677, "step": 21840 }, { "epoch": 0.39923592958853527, "grad_norm": 8.895594948925588, "learning_rate": 6.831286884121642e-06, "loss": 17.7985, "step": 21841 }, { "epoch": 0.3992542087849818, "grad_norm": 6.367047764293951, "learning_rate": 6.831011438154805e-06, "loss": 17.4813, "step": 21842 }, { "epoch": 0.39927248798142834, "grad_norm": 6.123505316100374, "learning_rate": 6.8307359857704324e-06, "loss": 17.084, "step": 21843 }, { "epoch": 0.3992907671778749, "grad_norm": 4.732171505471359, "learning_rate": 6.8304605269694904e-06, "loss": 16.8162, "step": 21844 }, { "epoch": 0.3993090463743214, "grad_norm": 6.73229843461191, "learning_rate": 6.8301850617529405e-06, "loss": 17.8678, "step": 21845 }, { "epoch": 0.3993273255707679, "grad_norm": 7.1852536067024175, "learning_rate": 6.829909590121752e-06, "loss": 18.1702, "step": 21846 }, { "epoch": 0.3993456047672144, "grad_norm": 7.180394346522405, "learning_rate": 6.82963411207689e-06, "loss": 17.9493, "step": 21847 }, { "epoch": 0.39936388396366096, "grad_norm": 4.71416536056585, "learning_rate": 6.82935862761932e-06, "loss": 16.8741, "step": 21848 }, { "epoch": 0.3993821631601075, "grad_norm": 6.544736596933013, "learning_rate": 6.8290831367500055e-06, "loss": 17.6977, "step": 21849 }, { "epoch": 0.39940044235655403, "grad_norm": 5.202792661730387, "learning_rate": 6.828807639469914e-06, "loss": 17.0273, "step": 21850 }, { "epoch": 0.3994187215530005, "grad_norm": 5.843070617820656, "learning_rate": 6.828532135780008e-06, "loss": 17.3643, "step": 21851 }, { "epoch": 0.39943700074944705, "grad_norm": 7.029189764478648, "learning_rate": 6.8282566256812584e-06, "loss": 17.6621, "step": 21852 }, { "epoch": 0.3994552799458936, "grad_norm": 10.042292797677177, "learning_rate": 6.827981109174627e-06, "loss": 18.8844, "step": 21853 }, { "epoch": 0.3994735591423401, "grad_norm": 5.584993858822853, "learning_rate": 6.82770558626108e-06, "loss": 17.2078, "step": 21854 }, { "epoch": 0.39949183833878665, "grad_norm": 7.134501801572942, "learning_rate": 6.8274300569415845e-06, "loss": 17.7828, "step": 21855 }, { "epoch": 0.39951011753523313, "grad_norm": 6.869150866445629, "learning_rate": 6.827154521217104e-06, "loss": 17.4381, "step": 21856 }, { "epoch": 0.39952839673167967, "grad_norm": 6.065259526390333, "learning_rate": 6.826878979088607e-06, "loss": 17.2889, "step": 21857 }, { "epoch": 0.3995466759281262, "grad_norm": 7.516231241164341, "learning_rate": 6.826603430557056e-06, "loss": 18.2564, "step": 21858 }, { "epoch": 0.39956495512457274, "grad_norm": 7.000265027217838, "learning_rate": 6.826327875623419e-06, "loss": 17.8884, "step": 21859 }, { "epoch": 0.3995832343210192, "grad_norm": 5.977487675097315, "learning_rate": 6.826052314288662e-06, "loss": 17.355, "step": 21860 }, { "epoch": 0.39960151351746576, "grad_norm": 6.090132413359348, "learning_rate": 6.8257767465537496e-06, "loss": 17.4178, "step": 21861 }, { "epoch": 0.3996197927139123, "grad_norm": 6.309985701706487, "learning_rate": 6.825501172419649e-06, "loss": 17.4804, "step": 21862 }, { "epoch": 0.3996380719103588, "grad_norm": 6.169446371465011, "learning_rate": 6.825225591887323e-06, "loss": 17.4456, "step": 21863 }, { "epoch": 0.39965635110680536, "grad_norm": 5.208395078947912, "learning_rate": 6.824950004957741e-06, "loss": 16.8539, "step": 21864 }, { "epoch": 0.39967463030325184, "grad_norm": 6.798308654497195, "learning_rate": 6.824674411631868e-06, "loss": 17.6094, "step": 21865 }, { "epoch": 0.3996929094996984, "grad_norm": 6.468971865610881, "learning_rate": 6.82439881191067e-06, "loss": 17.5193, "step": 21866 }, { "epoch": 0.3997111886961449, "grad_norm": 6.129154578278078, "learning_rate": 6.824123205795111e-06, "loss": 17.3764, "step": 21867 }, { "epoch": 0.39972946789259145, "grad_norm": 6.4687796845339545, "learning_rate": 6.823847593286159e-06, "loss": 17.591, "step": 21868 }, { "epoch": 0.399747747089038, "grad_norm": 7.197323925496892, "learning_rate": 6.8235719743847795e-06, "loss": 17.6638, "step": 21869 }, { "epoch": 0.39976602628548447, "grad_norm": 5.736916691002986, "learning_rate": 6.823296349091939e-06, "loss": 17.1347, "step": 21870 }, { "epoch": 0.399784305481931, "grad_norm": 7.0601501110054645, "learning_rate": 6.823020717408603e-06, "loss": 17.9473, "step": 21871 }, { "epoch": 0.39980258467837754, "grad_norm": 7.433267916188017, "learning_rate": 6.822745079335736e-06, "loss": 17.7632, "step": 21872 }, { "epoch": 0.3998208638748241, "grad_norm": 5.870480293815363, "learning_rate": 6.822469434874307e-06, "loss": 17.1841, "step": 21873 }, { "epoch": 0.3998391430712706, "grad_norm": 6.783657703056416, "learning_rate": 6.8221937840252805e-06, "loss": 17.1817, "step": 21874 }, { "epoch": 0.3998574222677171, "grad_norm": 8.472302679485521, "learning_rate": 6.8219181267896225e-06, "loss": 18.1618, "step": 21875 }, { "epoch": 0.3998757014641636, "grad_norm": 7.146008847435934, "learning_rate": 6.821642463168301e-06, "loss": 18.0327, "step": 21876 }, { "epoch": 0.39989398066061016, "grad_norm": 6.834847710285364, "learning_rate": 6.821366793162279e-06, "loss": 17.6565, "step": 21877 }, { "epoch": 0.3999122598570567, "grad_norm": 5.498396193659899, "learning_rate": 6.8210911167725256e-06, "loss": 17.1138, "step": 21878 }, { "epoch": 0.39993053905350323, "grad_norm": 6.8552746884165146, "learning_rate": 6.820815434000007e-06, "loss": 17.6906, "step": 21879 }, { "epoch": 0.3999488182499497, "grad_norm": 6.639894723167643, "learning_rate": 6.8205397448456865e-06, "loss": 17.4614, "step": 21880 }, { "epoch": 0.39996709744639625, "grad_norm": 6.37637902059711, "learning_rate": 6.820264049310531e-06, "loss": 17.404, "step": 21881 }, { "epoch": 0.3999853766428428, "grad_norm": 6.069914179531985, "learning_rate": 6.8199883473955094e-06, "loss": 17.3451, "step": 21882 }, { "epoch": 0.4000036558392893, "grad_norm": 7.420894879857651, "learning_rate": 6.819712639101588e-06, "loss": 18.2089, "step": 21883 }, { "epoch": 0.40002193503573585, "grad_norm": 8.62891725858039, "learning_rate": 6.8194369244297294e-06, "loss": 18.5057, "step": 21884 }, { "epoch": 0.40004021423218233, "grad_norm": 6.593069105903263, "learning_rate": 6.819161203380903e-06, "loss": 17.5398, "step": 21885 }, { "epoch": 0.40005849342862887, "grad_norm": 7.625733575239709, "learning_rate": 6.818885475956075e-06, "loss": 18.1123, "step": 21886 }, { "epoch": 0.4000767726250754, "grad_norm": 6.542194823489286, "learning_rate": 6.81860974215621e-06, "loss": 17.9121, "step": 21887 }, { "epoch": 0.40009505182152194, "grad_norm": 5.649682944877462, "learning_rate": 6.8183340019822774e-06, "loss": 17.164, "step": 21888 }, { "epoch": 0.4001133310179685, "grad_norm": 8.18719447903527, "learning_rate": 6.81805825543524e-06, "loss": 18.4074, "step": 21889 }, { "epoch": 0.40013161021441496, "grad_norm": 5.971442482873152, "learning_rate": 6.817782502516068e-06, "loss": 17.4632, "step": 21890 }, { "epoch": 0.4001498894108615, "grad_norm": 7.544173298075092, "learning_rate": 6.817506743225725e-06, "loss": 18.1639, "step": 21891 }, { "epoch": 0.40016816860730803, "grad_norm": 6.585524425248248, "learning_rate": 6.817230977565179e-06, "loss": 17.309, "step": 21892 }, { "epoch": 0.40018644780375456, "grad_norm": 6.47608704865308, "learning_rate": 6.816955205535397e-06, "loss": 17.5814, "step": 21893 }, { "epoch": 0.40020472700020104, "grad_norm": 5.466854866379673, "learning_rate": 6.8166794271373425e-06, "loss": 17.2101, "step": 21894 }, { "epoch": 0.4002230061966476, "grad_norm": 5.8153603545612915, "learning_rate": 6.816403642371985e-06, "loss": 17.5636, "step": 21895 }, { "epoch": 0.4002412853930941, "grad_norm": 6.9351026441863315, "learning_rate": 6.816127851240291e-06, "loss": 17.5025, "step": 21896 }, { "epoch": 0.40025956458954065, "grad_norm": 7.636826122033852, "learning_rate": 6.815852053743227e-06, "loss": 18.0502, "step": 21897 }, { "epoch": 0.4002778437859872, "grad_norm": 6.464902345553054, "learning_rate": 6.815576249881758e-06, "loss": 17.5888, "step": 21898 }, { "epoch": 0.40029612298243367, "grad_norm": 6.749419499798225, "learning_rate": 6.815300439656852e-06, "loss": 17.5443, "step": 21899 }, { "epoch": 0.4003144021788802, "grad_norm": 6.58827908461623, "learning_rate": 6.815024623069476e-06, "loss": 17.5441, "step": 21900 }, { "epoch": 0.40033268137532674, "grad_norm": 5.761502253496959, "learning_rate": 6.8147488001205965e-06, "loss": 17.4418, "step": 21901 }, { "epoch": 0.4003509605717733, "grad_norm": 6.863697161354168, "learning_rate": 6.814472970811179e-06, "loss": 17.8328, "step": 21902 }, { "epoch": 0.4003692397682198, "grad_norm": 5.95862440492216, "learning_rate": 6.814197135142191e-06, "loss": 17.1633, "step": 21903 }, { "epoch": 0.4003875189646663, "grad_norm": 6.644524654873513, "learning_rate": 6.8139212931145995e-06, "loss": 17.3927, "step": 21904 }, { "epoch": 0.4004057981611128, "grad_norm": 6.860794251018615, "learning_rate": 6.813645444729372e-06, "loss": 18.0471, "step": 21905 }, { "epoch": 0.40042407735755936, "grad_norm": 6.001006930176777, "learning_rate": 6.813369589987474e-06, "loss": 17.3834, "step": 21906 }, { "epoch": 0.4004423565540059, "grad_norm": 6.18695976509492, "learning_rate": 6.813093728889874e-06, "loss": 17.2937, "step": 21907 }, { "epoch": 0.40046063575045243, "grad_norm": 6.4642207578544495, "learning_rate": 6.812817861437536e-06, "loss": 17.6252, "step": 21908 }, { "epoch": 0.4004789149468989, "grad_norm": 6.632278544468539, "learning_rate": 6.8125419876314295e-06, "loss": 17.5642, "step": 21909 }, { "epoch": 0.40049719414334545, "grad_norm": 7.109454895608768, "learning_rate": 6.812266107472522e-06, "loss": 17.7552, "step": 21910 }, { "epoch": 0.400515473339792, "grad_norm": 8.508668659805597, "learning_rate": 6.811990220961779e-06, "loss": 18.43, "step": 21911 }, { "epoch": 0.4005337525362385, "grad_norm": 7.185601574581772, "learning_rate": 6.811714328100165e-06, "loss": 17.9087, "step": 21912 }, { "epoch": 0.40055203173268505, "grad_norm": 5.920070425212109, "learning_rate": 6.81143842888865e-06, "loss": 17.1764, "step": 21913 }, { "epoch": 0.40057031092913153, "grad_norm": 6.966815940783827, "learning_rate": 6.811162523328203e-06, "loss": 17.8286, "step": 21914 }, { "epoch": 0.40058859012557807, "grad_norm": 6.586401880593733, "learning_rate": 6.810886611419787e-06, "loss": 17.8632, "step": 21915 }, { "epoch": 0.4006068693220246, "grad_norm": 6.063754430848439, "learning_rate": 6.81061069316437e-06, "loss": 17.4085, "step": 21916 }, { "epoch": 0.40062514851847114, "grad_norm": 6.32509247933547, "learning_rate": 6.810334768562921e-06, "loss": 17.5364, "step": 21917 }, { "epoch": 0.4006434277149177, "grad_norm": 6.724629586613398, "learning_rate": 6.8100588376164036e-06, "loss": 17.5292, "step": 21918 }, { "epoch": 0.40066170691136416, "grad_norm": 7.667001702775765, "learning_rate": 6.809782900325789e-06, "loss": 18.004, "step": 21919 }, { "epoch": 0.4006799861078107, "grad_norm": 7.118537808752815, "learning_rate": 6.809506956692041e-06, "loss": 17.9814, "step": 21920 }, { "epoch": 0.40069826530425723, "grad_norm": 6.9903226744488745, "learning_rate": 6.809231006716131e-06, "loss": 17.4455, "step": 21921 }, { "epoch": 0.40071654450070376, "grad_norm": 6.728524686486218, "learning_rate": 6.808955050399018e-06, "loss": 17.4874, "step": 21922 }, { "epoch": 0.4007348236971503, "grad_norm": 7.310463906228096, "learning_rate": 6.808679087741679e-06, "loss": 18.2281, "step": 21923 }, { "epoch": 0.4007531028935968, "grad_norm": 7.433994389988249, "learning_rate": 6.808403118745076e-06, "loss": 17.8506, "step": 21924 }, { "epoch": 0.4007713820900433, "grad_norm": 6.082764194422455, "learning_rate": 6.808127143410177e-06, "loss": 17.3403, "step": 21925 }, { "epoch": 0.40078966128648985, "grad_norm": 6.904402641846164, "learning_rate": 6.8078511617379485e-06, "loss": 17.7018, "step": 21926 }, { "epoch": 0.4008079404829364, "grad_norm": 7.1843567484424575, "learning_rate": 6.8075751737293575e-06, "loss": 18.1391, "step": 21927 }, { "epoch": 0.40082621967938287, "grad_norm": 8.693140802240887, "learning_rate": 6.8072991793853734e-06, "loss": 17.9093, "step": 21928 }, { "epoch": 0.4008444988758294, "grad_norm": 7.323779640948805, "learning_rate": 6.807023178706964e-06, "loss": 18.4544, "step": 21929 }, { "epoch": 0.40086277807227594, "grad_norm": 8.73760270559256, "learning_rate": 6.8067471716950935e-06, "loss": 18.1625, "step": 21930 }, { "epoch": 0.4008810572687225, "grad_norm": 8.111036553412072, "learning_rate": 6.8064711583507315e-06, "loss": 18.7604, "step": 21931 }, { "epoch": 0.400899336465169, "grad_norm": 7.337649671092504, "learning_rate": 6.806195138674845e-06, "loss": 17.4618, "step": 21932 }, { "epoch": 0.4009176156616155, "grad_norm": 6.14522667941267, "learning_rate": 6.8059191126684e-06, "loss": 17.2946, "step": 21933 }, { "epoch": 0.400935894858062, "grad_norm": 7.186178180742369, "learning_rate": 6.805643080332366e-06, "loss": 17.9071, "step": 21934 }, { "epoch": 0.40095417405450856, "grad_norm": 7.029971897302851, "learning_rate": 6.80536704166771e-06, "loss": 17.6502, "step": 21935 }, { "epoch": 0.4009724532509551, "grad_norm": 9.513560196022905, "learning_rate": 6.805090996675399e-06, "loss": 18.5135, "step": 21936 }, { "epoch": 0.40099073244740163, "grad_norm": 7.605591732838555, "learning_rate": 6.804814945356401e-06, "loss": 17.6754, "step": 21937 }, { "epoch": 0.4010090116438481, "grad_norm": 6.2261058022499824, "learning_rate": 6.804538887711684e-06, "loss": 17.4833, "step": 21938 }, { "epoch": 0.40102729084029465, "grad_norm": 6.843279889744721, "learning_rate": 6.804262823742214e-06, "loss": 18.0529, "step": 21939 }, { "epoch": 0.4010455700367412, "grad_norm": 6.811844571700934, "learning_rate": 6.803986753448956e-06, "loss": 17.7461, "step": 21940 }, { "epoch": 0.4010638492331877, "grad_norm": 6.2373444016166015, "learning_rate": 6.803710676832887e-06, "loss": 17.7564, "step": 21941 }, { "epoch": 0.40108212842963425, "grad_norm": 7.451427159131176, "learning_rate": 6.803434593894965e-06, "loss": 18.0171, "step": 21942 }, { "epoch": 0.40110040762608073, "grad_norm": 7.006701312109924, "learning_rate": 6.803158504636162e-06, "loss": 17.8858, "step": 21943 }, { "epoch": 0.40111868682252727, "grad_norm": 5.912711778513443, "learning_rate": 6.8028824090574455e-06, "loss": 17.4637, "step": 21944 }, { "epoch": 0.4011369660189738, "grad_norm": 5.719679315294928, "learning_rate": 6.802606307159782e-06, "loss": 17.2627, "step": 21945 }, { "epoch": 0.40115524521542034, "grad_norm": 6.126713852624104, "learning_rate": 6.802330198944138e-06, "loss": 17.4433, "step": 21946 }, { "epoch": 0.4011735244118669, "grad_norm": 5.778026223407568, "learning_rate": 6.802054084411486e-06, "loss": 17.0654, "step": 21947 }, { "epoch": 0.40119180360831336, "grad_norm": 6.839335613346102, "learning_rate": 6.80177796356279e-06, "loss": 17.4037, "step": 21948 }, { "epoch": 0.4012100828047599, "grad_norm": 6.688918250827921, "learning_rate": 6.801501836399017e-06, "loss": 17.4062, "step": 21949 }, { "epoch": 0.40122836200120643, "grad_norm": 5.431568159693095, "learning_rate": 6.801225702921138e-06, "loss": 17.3574, "step": 21950 }, { "epoch": 0.40124664119765296, "grad_norm": 6.668323124251579, "learning_rate": 6.800949563130119e-06, "loss": 17.6149, "step": 21951 }, { "epoch": 0.4012649203940995, "grad_norm": 6.999382372145549, "learning_rate": 6.8006734170269284e-06, "loss": 17.7897, "step": 21952 }, { "epoch": 0.401283199590546, "grad_norm": 6.190362614469207, "learning_rate": 6.800397264612533e-06, "loss": 17.9581, "step": 21953 }, { "epoch": 0.4013014787869925, "grad_norm": 5.57772871319978, "learning_rate": 6.8001211058879e-06, "loss": 17.3105, "step": 21954 }, { "epoch": 0.40131975798343905, "grad_norm": 5.700006419530701, "learning_rate": 6.799844940854002e-06, "loss": 17.1836, "step": 21955 }, { "epoch": 0.4013380371798856, "grad_norm": 6.858696783415418, "learning_rate": 6.799568769511802e-06, "loss": 17.7469, "step": 21956 }, { "epoch": 0.4013563163763321, "grad_norm": 6.797137546460257, "learning_rate": 6.7992925918622696e-06, "loss": 17.7856, "step": 21957 }, { "epoch": 0.4013745955727786, "grad_norm": 5.138139047181146, "learning_rate": 6.799016407906372e-06, "loss": 16.875, "step": 21958 }, { "epoch": 0.40139287476922514, "grad_norm": 7.769990713000648, "learning_rate": 6.79874021764508e-06, "loss": 17.2655, "step": 21959 }, { "epoch": 0.4014111539656717, "grad_norm": 5.793944453016079, "learning_rate": 6.7984640210793586e-06, "loss": 17.2418, "step": 21960 }, { "epoch": 0.4014294331621182, "grad_norm": 6.6119078654034515, "learning_rate": 6.798187818210176e-06, "loss": 17.6804, "step": 21961 }, { "epoch": 0.4014477123585647, "grad_norm": 6.897665729366046, "learning_rate": 6.797911609038503e-06, "loss": 17.5899, "step": 21962 }, { "epoch": 0.4014659915550112, "grad_norm": 6.964162749019687, "learning_rate": 6.797635393565304e-06, "loss": 17.151, "step": 21963 }, { "epoch": 0.40148427075145776, "grad_norm": 5.771019717862557, "learning_rate": 6.797359171791549e-06, "loss": 17.306, "step": 21964 }, { "epoch": 0.4015025499479043, "grad_norm": 6.331485271180629, "learning_rate": 6.797082943718207e-06, "loss": 17.2906, "step": 21965 }, { "epoch": 0.40152082914435083, "grad_norm": 5.79659720268587, "learning_rate": 6.796806709346246e-06, "loss": 17.4377, "step": 21966 }, { "epoch": 0.4015391083407973, "grad_norm": 7.211064151600302, "learning_rate": 6.796530468676632e-06, "loss": 17.4476, "step": 21967 }, { "epoch": 0.40155738753724385, "grad_norm": 7.455823922053474, "learning_rate": 6.796254221710335e-06, "loss": 17.9749, "step": 21968 }, { "epoch": 0.4015756667336904, "grad_norm": 5.516866774663918, "learning_rate": 6.795977968448323e-06, "loss": 17.2624, "step": 21969 }, { "epoch": 0.4015939459301369, "grad_norm": 5.234600543069211, "learning_rate": 6.795701708891563e-06, "loss": 17.0726, "step": 21970 }, { "epoch": 0.40161222512658346, "grad_norm": 5.393485636893945, "learning_rate": 6.795425443041026e-06, "loss": 16.8985, "step": 21971 }, { "epoch": 0.40163050432302994, "grad_norm": 7.261848924659315, "learning_rate": 6.795149170897677e-06, "loss": 18.2308, "step": 21972 }, { "epoch": 0.40164878351947647, "grad_norm": 5.681941304243241, "learning_rate": 6.794872892462487e-06, "loss": 17.3867, "step": 21973 }, { "epoch": 0.401667062715923, "grad_norm": 6.784338920543298, "learning_rate": 6.794596607736423e-06, "loss": 17.6111, "step": 21974 }, { "epoch": 0.40168534191236954, "grad_norm": 6.381183413292857, "learning_rate": 6.794320316720453e-06, "loss": 17.5785, "step": 21975 }, { "epoch": 0.4017036211088161, "grad_norm": 6.472831603511827, "learning_rate": 6.794044019415547e-06, "loss": 17.6216, "step": 21976 }, { "epoch": 0.40172190030526256, "grad_norm": 8.033889329602548, "learning_rate": 6.793767715822672e-06, "loss": 17.621, "step": 21977 }, { "epoch": 0.4017401795017091, "grad_norm": 7.652435122747328, "learning_rate": 6.793491405942797e-06, "loss": 18.1178, "step": 21978 }, { "epoch": 0.40175845869815563, "grad_norm": 5.7795189774144795, "learning_rate": 6.7932150897768914e-06, "loss": 17.304, "step": 21979 }, { "epoch": 0.40177673789460217, "grad_norm": 7.5296212022012545, "learning_rate": 6.792938767325921e-06, "loss": 18.2468, "step": 21980 }, { "epoch": 0.4017950170910487, "grad_norm": 7.349797483946474, "learning_rate": 6.792662438590854e-06, "loss": 17.8214, "step": 21981 }, { "epoch": 0.4018132962874952, "grad_norm": 7.531457348741944, "learning_rate": 6.792386103572663e-06, "loss": 17.9705, "step": 21982 }, { "epoch": 0.4018315754839417, "grad_norm": 5.983560340242937, "learning_rate": 6.792109762272315e-06, "loss": 17.3376, "step": 21983 }, { "epoch": 0.40184985468038825, "grad_norm": 8.365451107347186, "learning_rate": 6.791833414690776e-06, "loss": 18.2873, "step": 21984 }, { "epoch": 0.4018681338768348, "grad_norm": 6.06164443050326, "learning_rate": 6.791557060829017e-06, "loss": 17.4937, "step": 21985 }, { "epoch": 0.4018864130732813, "grad_norm": 5.389488352470851, "learning_rate": 6.791280700688006e-06, "loss": 17.1742, "step": 21986 }, { "epoch": 0.4019046922697278, "grad_norm": 6.035099688578071, "learning_rate": 6.7910043342687124e-06, "loss": 17.2362, "step": 21987 }, { "epoch": 0.40192297146617434, "grad_norm": 6.240104426224746, "learning_rate": 6.790727961572103e-06, "loss": 17.3403, "step": 21988 }, { "epoch": 0.4019412506626209, "grad_norm": 4.771364297154467, "learning_rate": 6.790451582599148e-06, "loss": 16.7517, "step": 21989 }, { "epoch": 0.4019595298590674, "grad_norm": 6.414069119573712, "learning_rate": 6.790175197350814e-06, "loss": 17.4858, "step": 21990 }, { "epoch": 0.40197780905551395, "grad_norm": 5.394892539709728, "learning_rate": 6.789898805828074e-06, "loss": 16.9978, "step": 21991 }, { "epoch": 0.4019960882519604, "grad_norm": 5.559454598292766, "learning_rate": 6.789622408031893e-06, "loss": 17.1492, "step": 21992 }, { "epoch": 0.40201436744840696, "grad_norm": 7.353267904120058, "learning_rate": 6.7893460039632404e-06, "loss": 18.0896, "step": 21993 }, { "epoch": 0.4020326466448535, "grad_norm": 6.918933038129437, "learning_rate": 6.789069593623085e-06, "loss": 17.9874, "step": 21994 }, { "epoch": 0.40205092584130003, "grad_norm": 5.892259725242188, "learning_rate": 6.788793177012396e-06, "loss": 17.4803, "step": 21995 }, { "epoch": 0.4020692050377465, "grad_norm": 6.42627628804412, "learning_rate": 6.788516754132142e-06, "loss": 17.584, "step": 21996 }, { "epoch": 0.40208748423419305, "grad_norm": 6.86353923249927, "learning_rate": 6.788240324983293e-06, "loss": 17.9212, "step": 21997 }, { "epoch": 0.4021057634306396, "grad_norm": 8.310368469293014, "learning_rate": 6.7879638895668165e-06, "loss": 17.9964, "step": 21998 }, { "epoch": 0.4021240426270861, "grad_norm": 6.562962555960447, "learning_rate": 6.787687447883682e-06, "loss": 17.6401, "step": 21999 }, { "epoch": 0.40214232182353266, "grad_norm": 7.473633228695363, "learning_rate": 6.787410999934857e-06, "loss": 18.0085, "step": 22000 }, { "epoch": 0.40216060101997914, "grad_norm": 5.169612281976841, "learning_rate": 6.787134545721312e-06, "loss": 16.9001, "step": 22001 }, { "epoch": 0.40217888021642567, "grad_norm": 5.74281798225418, "learning_rate": 6.786858085244015e-06, "loss": 17.1053, "step": 22002 }, { "epoch": 0.4021971594128722, "grad_norm": 6.11722418767506, "learning_rate": 6.786581618503936e-06, "loss": 17.4146, "step": 22003 }, { "epoch": 0.40221543860931874, "grad_norm": 6.453345918976262, "learning_rate": 6.786305145502043e-06, "loss": 17.4624, "step": 22004 }, { "epoch": 0.4022337178057653, "grad_norm": 5.904459705572975, "learning_rate": 6.786028666239306e-06, "loss": 17.1647, "step": 22005 }, { "epoch": 0.40225199700221176, "grad_norm": 7.64038538335007, "learning_rate": 6.785752180716694e-06, "loss": 17.9963, "step": 22006 }, { "epoch": 0.4022702761986583, "grad_norm": 6.0691043780964105, "learning_rate": 6.785475688935176e-06, "loss": 17.3344, "step": 22007 }, { "epoch": 0.40228855539510483, "grad_norm": 7.882165262024196, "learning_rate": 6.785199190895719e-06, "loss": 18.2275, "step": 22008 }, { "epoch": 0.40230683459155137, "grad_norm": 6.959207615504242, "learning_rate": 6.784922686599295e-06, "loss": 17.6494, "step": 22009 }, { "epoch": 0.4023251137879979, "grad_norm": 6.859123166489989, "learning_rate": 6.7846461760468714e-06, "loss": 17.4516, "step": 22010 }, { "epoch": 0.4023433929844444, "grad_norm": 7.235208318542025, "learning_rate": 6.784369659239418e-06, "loss": 17.4774, "step": 22011 }, { "epoch": 0.4023616721808909, "grad_norm": 5.988047756592985, "learning_rate": 6.784093136177903e-06, "loss": 17.3887, "step": 22012 }, { "epoch": 0.40237995137733745, "grad_norm": 6.956947575994767, "learning_rate": 6.783816606863296e-06, "loss": 17.7987, "step": 22013 }, { "epoch": 0.402398230573784, "grad_norm": 7.0798289717982765, "learning_rate": 6.783540071296568e-06, "loss": 17.8944, "step": 22014 }, { "epoch": 0.4024165097702305, "grad_norm": 6.891298784286834, "learning_rate": 6.783263529478686e-06, "loss": 17.8773, "step": 22015 }, { "epoch": 0.402434788966677, "grad_norm": 5.762533932607662, "learning_rate": 6.782986981410621e-06, "loss": 17.1087, "step": 22016 }, { "epoch": 0.40245306816312354, "grad_norm": 8.80927131911007, "learning_rate": 6.782710427093341e-06, "loss": 18.2545, "step": 22017 }, { "epoch": 0.4024713473595701, "grad_norm": 5.843840912648421, "learning_rate": 6.782433866527815e-06, "loss": 17.2093, "step": 22018 }, { "epoch": 0.4024896265560166, "grad_norm": 8.975227642255565, "learning_rate": 6.782157299715013e-06, "loss": 18.5843, "step": 22019 }, { "epoch": 0.40250790575246315, "grad_norm": 6.92250061229194, "learning_rate": 6.781880726655905e-06, "loss": 17.6077, "step": 22020 }, { "epoch": 0.4025261849489096, "grad_norm": 6.820599787468956, "learning_rate": 6.7816041473514606e-06, "loss": 17.688, "step": 22021 }, { "epoch": 0.40254446414535616, "grad_norm": 6.747177678529493, "learning_rate": 6.781327561802645e-06, "loss": 17.5352, "step": 22022 }, { "epoch": 0.4025627433418027, "grad_norm": 5.259387240651166, "learning_rate": 6.781050970010433e-06, "loss": 17.1437, "step": 22023 }, { "epoch": 0.40258102253824923, "grad_norm": 6.830641097745921, "learning_rate": 6.780774371975794e-06, "loss": 17.8107, "step": 22024 }, { "epoch": 0.40259930173469577, "grad_norm": 6.203491879441497, "learning_rate": 6.780497767699692e-06, "loss": 17.7428, "step": 22025 }, { "epoch": 0.40261758093114225, "grad_norm": 7.405202023530535, "learning_rate": 6.780221157183101e-06, "loss": 17.8754, "step": 22026 }, { "epoch": 0.4026358601275888, "grad_norm": 6.102518656227493, "learning_rate": 6.779944540426988e-06, "loss": 17.5551, "step": 22027 }, { "epoch": 0.4026541393240353, "grad_norm": 6.421375108809336, "learning_rate": 6.7796679174323265e-06, "loss": 17.4135, "step": 22028 }, { "epoch": 0.40267241852048186, "grad_norm": 5.960106085602953, "learning_rate": 6.7793912882000815e-06, "loss": 17.4722, "step": 22029 }, { "epoch": 0.40269069771692834, "grad_norm": 7.350397311128247, "learning_rate": 6.779114652731224e-06, "loss": 17.739, "step": 22030 }, { "epoch": 0.40270897691337487, "grad_norm": 7.4286504026203595, "learning_rate": 6.778838011026726e-06, "loss": 18.2491, "step": 22031 }, { "epoch": 0.4027272561098214, "grad_norm": 6.304389322318602, "learning_rate": 6.778561363087555e-06, "loss": 17.5771, "step": 22032 }, { "epoch": 0.40274553530626794, "grad_norm": 6.152669886805366, "learning_rate": 6.778284708914679e-06, "loss": 17.4633, "step": 22033 }, { "epoch": 0.4027638145027145, "grad_norm": 6.005907317098, "learning_rate": 6.778008048509071e-06, "loss": 17.727, "step": 22034 }, { "epoch": 0.40278209369916096, "grad_norm": 6.711324495143864, "learning_rate": 6.7777313818716974e-06, "loss": 17.7124, "step": 22035 }, { "epoch": 0.4028003728956075, "grad_norm": 6.923061489646611, "learning_rate": 6.77745470900353e-06, "loss": 18.0881, "step": 22036 }, { "epoch": 0.40281865209205403, "grad_norm": 6.470315405504754, "learning_rate": 6.777178029905539e-06, "loss": 17.5918, "step": 22037 }, { "epoch": 0.40283693128850057, "grad_norm": 6.143189444036186, "learning_rate": 6.776901344578694e-06, "loss": 17.4019, "step": 22038 }, { "epoch": 0.4028552104849471, "grad_norm": 6.553847836721387, "learning_rate": 6.776624653023962e-06, "loss": 17.5724, "step": 22039 }, { "epoch": 0.4028734896813936, "grad_norm": 7.314977848349203, "learning_rate": 6.776347955242315e-06, "loss": 17.931, "step": 22040 }, { "epoch": 0.4028917688778401, "grad_norm": 7.257845640885772, "learning_rate": 6.776071251234724e-06, "loss": 18.0227, "step": 22041 }, { "epoch": 0.40291004807428665, "grad_norm": 7.347759081709609, "learning_rate": 6.7757945410021565e-06, "loss": 17.9248, "step": 22042 }, { "epoch": 0.4029283272707332, "grad_norm": 7.469395251740494, "learning_rate": 6.775517824545583e-06, "loss": 17.7553, "step": 22043 }, { "epoch": 0.4029466064671797, "grad_norm": 6.61346924708778, "learning_rate": 6.775241101865975e-06, "loss": 17.6877, "step": 22044 }, { "epoch": 0.4029648856636262, "grad_norm": 9.551444592270448, "learning_rate": 6.774964372964299e-06, "loss": 18.2672, "step": 22045 }, { "epoch": 0.40298316486007274, "grad_norm": 5.8401528513055885, "learning_rate": 6.7746876378415286e-06, "loss": 17.421, "step": 22046 }, { "epoch": 0.4030014440565193, "grad_norm": 6.567797173097257, "learning_rate": 6.77441089649863e-06, "loss": 17.3372, "step": 22047 }, { "epoch": 0.4030197232529658, "grad_norm": 6.330795420549813, "learning_rate": 6.774134148936578e-06, "loss": 17.6241, "step": 22048 }, { "epoch": 0.40303800244941235, "grad_norm": 4.981431999872036, "learning_rate": 6.773857395156337e-06, "loss": 16.9476, "step": 22049 }, { "epoch": 0.4030562816458588, "grad_norm": 7.565433370051487, "learning_rate": 6.7735806351588805e-06, "loss": 18.1421, "step": 22050 }, { "epoch": 0.40307456084230536, "grad_norm": 7.49806402746438, "learning_rate": 6.773303868945178e-06, "loss": 18.1754, "step": 22051 }, { "epoch": 0.4030928400387519, "grad_norm": 7.030163964983321, "learning_rate": 6.773027096516201e-06, "loss": 17.6668, "step": 22052 }, { "epoch": 0.40311111923519843, "grad_norm": 7.069973587782146, "learning_rate": 6.772750317872916e-06, "loss": 17.7277, "step": 22053 }, { "epoch": 0.40312939843164497, "grad_norm": 6.569088556625905, "learning_rate": 6.772473533016294e-06, "loss": 17.4102, "step": 22054 }, { "epoch": 0.40314767762809145, "grad_norm": 6.114960161641345, "learning_rate": 6.772196741947308e-06, "loss": 17.4188, "step": 22055 }, { "epoch": 0.403165956824538, "grad_norm": 6.1939415525572254, "learning_rate": 6.771919944666926e-06, "loss": 17.467, "step": 22056 }, { "epoch": 0.4031842360209845, "grad_norm": 6.43608017716079, "learning_rate": 6.771643141176118e-06, "loss": 17.6242, "step": 22057 }, { "epoch": 0.40320251521743106, "grad_norm": 6.264800078105397, "learning_rate": 6.771366331475854e-06, "loss": 17.4917, "step": 22058 }, { "epoch": 0.4032207944138776, "grad_norm": 6.575052766783038, "learning_rate": 6.771089515567105e-06, "loss": 17.8639, "step": 22059 }, { "epoch": 0.4032390736103241, "grad_norm": 7.931746975453965, "learning_rate": 6.770812693450841e-06, "loss": 18.1557, "step": 22060 }, { "epoch": 0.4032573528067706, "grad_norm": 4.861686580927656, "learning_rate": 6.770535865128033e-06, "loss": 16.9002, "step": 22061 }, { "epoch": 0.40327563200321714, "grad_norm": 6.135266402479794, "learning_rate": 6.7702590305996485e-06, "loss": 17.1294, "step": 22062 }, { "epoch": 0.4032939111996637, "grad_norm": 6.025703055757253, "learning_rate": 6.769982189866662e-06, "loss": 17.4299, "step": 22063 }, { "epoch": 0.40331219039611016, "grad_norm": 5.818448677965238, "learning_rate": 6.7697053429300395e-06, "loss": 17.1443, "step": 22064 }, { "epoch": 0.4033304695925567, "grad_norm": 6.500125203523118, "learning_rate": 6.769428489790755e-06, "loss": 17.5327, "step": 22065 }, { "epoch": 0.40334874878900323, "grad_norm": 6.401033609103858, "learning_rate": 6.7691516304497775e-06, "loss": 17.3931, "step": 22066 }, { "epoch": 0.40336702798544977, "grad_norm": 7.247341161607492, "learning_rate": 6.768874764908074e-06, "loss": 17.8451, "step": 22067 }, { "epoch": 0.4033853071818963, "grad_norm": 9.345615282623246, "learning_rate": 6.7685978931666204e-06, "loss": 18.7439, "step": 22068 }, { "epoch": 0.4034035863783428, "grad_norm": 6.891614580638836, "learning_rate": 6.768321015226385e-06, "loss": 17.4206, "step": 22069 }, { "epoch": 0.4034218655747893, "grad_norm": 4.931124718854705, "learning_rate": 6.768044131088337e-06, "loss": 17.0932, "step": 22070 }, { "epoch": 0.40344014477123585, "grad_norm": 6.823202517333984, "learning_rate": 6.767767240753448e-06, "loss": 17.6827, "step": 22071 }, { "epoch": 0.4034584239676824, "grad_norm": 5.133011928741689, "learning_rate": 6.767490344222687e-06, "loss": 16.9361, "step": 22072 }, { "epoch": 0.4034767031641289, "grad_norm": 6.51980297676806, "learning_rate": 6.767213441497028e-06, "loss": 17.5075, "step": 22073 }, { "epoch": 0.4034949823605754, "grad_norm": 6.3817933788838435, "learning_rate": 6.766936532577438e-06, "loss": 17.2291, "step": 22074 }, { "epoch": 0.40351326155702194, "grad_norm": 6.940379698871074, "learning_rate": 6.766659617464889e-06, "loss": 17.7432, "step": 22075 }, { "epoch": 0.4035315407534685, "grad_norm": 7.381287030269214, "learning_rate": 6.766382696160351e-06, "loss": 17.8526, "step": 22076 }, { "epoch": 0.403549819949915, "grad_norm": 7.3041508553010015, "learning_rate": 6.766105768664795e-06, "loss": 17.9687, "step": 22077 }, { "epoch": 0.40356809914636155, "grad_norm": 6.578276686090134, "learning_rate": 6.765828834979191e-06, "loss": 17.6986, "step": 22078 }, { "epoch": 0.403586378342808, "grad_norm": 6.286582031636894, "learning_rate": 6.765551895104512e-06, "loss": 17.4138, "step": 22079 }, { "epoch": 0.40360465753925456, "grad_norm": 6.793606781535274, "learning_rate": 6.765274949041726e-06, "loss": 17.7358, "step": 22080 }, { "epoch": 0.4036229367357011, "grad_norm": 8.42867156786083, "learning_rate": 6.764997996791803e-06, "loss": 17.849, "step": 22081 }, { "epoch": 0.40364121593214763, "grad_norm": 6.2925320194200465, "learning_rate": 6.764721038355716e-06, "loss": 17.3358, "step": 22082 }, { "epoch": 0.40365949512859417, "grad_norm": 6.380862232199877, "learning_rate": 6.764444073734436e-06, "loss": 17.5864, "step": 22083 }, { "epoch": 0.40367777432504065, "grad_norm": 5.93191903512372, "learning_rate": 6.764167102928932e-06, "loss": 17.2688, "step": 22084 }, { "epoch": 0.4036960535214872, "grad_norm": 6.916394760447434, "learning_rate": 6.7638901259401755e-06, "loss": 17.6485, "step": 22085 }, { "epoch": 0.4037143327179337, "grad_norm": 7.711118810005729, "learning_rate": 6.763613142769137e-06, "loss": 18.2313, "step": 22086 }, { "epoch": 0.40373261191438026, "grad_norm": 7.187299408147425, "learning_rate": 6.763336153416787e-06, "loss": 17.5883, "step": 22087 }, { "epoch": 0.4037508911108268, "grad_norm": 6.224547537223937, "learning_rate": 6.763059157884098e-06, "loss": 17.4435, "step": 22088 }, { "epoch": 0.4037691703072733, "grad_norm": 5.817792774023097, "learning_rate": 6.762782156172037e-06, "loss": 17.4436, "step": 22089 }, { "epoch": 0.4037874495037198, "grad_norm": 7.816523837537163, "learning_rate": 6.76250514828158e-06, "loss": 17.8937, "step": 22090 }, { "epoch": 0.40380572870016634, "grad_norm": 6.207199183236863, "learning_rate": 6.762228134213695e-06, "loss": 17.5285, "step": 22091 }, { "epoch": 0.4038240078966129, "grad_norm": 6.839655499113275, "learning_rate": 6.761951113969353e-06, "loss": 17.7867, "step": 22092 }, { "epoch": 0.4038422870930594, "grad_norm": 7.786225975050639, "learning_rate": 6.761674087549526e-06, "loss": 18.0969, "step": 22093 }, { "epoch": 0.4038605662895059, "grad_norm": 6.049391074637899, "learning_rate": 6.761397054955182e-06, "loss": 17.4082, "step": 22094 }, { "epoch": 0.40387884548595243, "grad_norm": 6.449817757699847, "learning_rate": 6.761120016187296e-06, "loss": 17.5026, "step": 22095 }, { "epoch": 0.40389712468239897, "grad_norm": 6.807746451894597, "learning_rate": 6.760842971246837e-06, "loss": 17.4275, "step": 22096 }, { "epoch": 0.4039154038788455, "grad_norm": 5.916084772017239, "learning_rate": 6.760565920134776e-06, "loss": 17.326, "step": 22097 }, { "epoch": 0.403933683075292, "grad_norm": 7.6902640515359915, "learning_rate": 6.760288862852085e-06, "loss": 17.8501, "step": 22098 }, { "epoch": 0.4039519622717385, "grad_norm": 7.595915409993594, "learning_rate": 6.760011799399732e-06, "loss": 18.0188, "step": 22099 }, { "epoch": 0.40397024146818505, "grad_norm": 5.399474979429258, "learning_rate": 6.759734729778693e-06, "loss": 17.0223, "step": 22100 }, { "epoch": 0.4039885206646316, "grad_norm": 6.034056826555642, "learning_rate": 6.759457653989936e-06, "loss": 17.1478, "step": 22101 }, { "epoch": 0.4040067998610781, "grad_norm": 5.237464031625341, "learning_rate": 6.759180572034432e-06, "loss": 17.1195, "step": 22102 }, { "epoch": 0.4040250790575246, "grad_norm": 6.949943876542296, "learning_rate": 6.758903483913152e-06, "loss": 17.529, "step": 22103 }, { "epoch": 0.40404335825397114, "grad_norm": 5.775795031745005, "learning_rate": 6.758626389627068e-06, "loss": 17.3392, "step": 22104 }, { "epoch": 0.4040616374504177, "grad_norm": 6.04868043272047, "learning_rate": 6.7583492891771516e-06, "loss": 17.0395, "step": 22105 }, { "epoch": 0.4040799166468642, "grad_norm": 6.370321786337064, "learning_rate": 6.758072182564374e-06, "loss": 17.5069, "step": 22106 }, { "epoch": 0.40409819584331075, "grad_norm": 6.813322830272538, "learning_rate": 6.757795069789706e-06, "loss": 17.2654, "step": 22107 }, { "epoch": 0.40411647503975723, "grad_norm": 6.144323791285378, "learning_rate": 6.757517950854118e-06, "loss": 17.619, "step": 22108 }, { "epoch": 0.40413475423620376, "grad_norm": 5.941140413558077, "learning_rate": 6.757240825758582e-06, "loss": 17.5066, "step": 22109 }, { "epoch": 0.4041530334326503, "grad_norm": 6.320242546936558, "learning_rate": 6.756963694504071e-06, "loss": 17.3604, "step": 22110 }, { "epoch": 0.40417131262909683, "grad_norm": 6.601216207740825, "learning_rate": 6.756686557091554e-06, "loss": 17.4847, "step": 22111 }, { "epoch": 0.40418959182554337, "grad_norm": 6.657458883317337, "learning_rate": 6.756409413522002e-06, "loss": 17.4861, "step": 22112 }, { "epoch": 0.40420787102198985, "grad_norm": 7.012513906515419, "learning_rate": 6.7561322637963865e-06, "loss": 17.7998, "step": 22113 }, { "epoch": 0.4042261502184364, "grad_norm": 5.949843811360443, "learning_rate": 6.755855107915683e-06, "loss": 17.0085, "step": 22114 }, { "epoch": 0.4042444294148829, "grad_norm": 5.812895215797421, "learning_rate": 6.755577945880858e-06, "loss": 17.3518, "step": 22115 }, { "epoch": 0.40426270861132946, "grad_norm": 5.973497965673985, "learning_rate": 6.755300777692885e-06, "loss": 17.3914, "step": 22116 }, { "epoch": 0.404280987807776, "grad_norm": 5.411971787295952, "learning_rate": 6.755023603352735e-06, "loss": 17.1019, "step": 22117 }, { "epoch": 0.4042992670042225, "grad_norm": 5.909867899448887, "learning_rate": 6.754746422861379e-06, "loss": 17.1723, "step": 22118 }, { "epoch": 0.404317546200669, "grad_norm": 7.245289313835801, "learning_rate": 6.754469236219789e-06, "loss": 17.984, "step": 22119 }, { "epoch": 0.40433582539711554, "grad_norm": 6.789279055749452, "learning_rate": 6.754192043428938e-06, "loss": 17.5433, "step": 22120 }, { "epoch": 0.4043541045935621, "grad_norm": 5.120970386248282, "learning_rate": 6.753914844489795e-06, "loss": 16.965, "step": 22121 }, { "epoch": 0.4043723837900086, "grad_norm": 6.566986376265424, "learning_rate": 6.753637639403332e-06, "loss": 17.2193, "step": 22122 }, { "epoch": 0.4043906629864551, "grad_norm": 6.546714540317617, "learning_rate": 6.753360428170523e-06, "loss": 17.2887, "step": 22123 }, { "epoch": 0.40440894218290163, "grad_norm": 8.074619644270074, "learning_rate": 6.753083210792337e-06, "loss": 17.7437, "step": 22124 }, { "epoch": 0.40442722137934817, "grad_norm": 6.365788857589704, "learning_rate": 6.752805987269746e-06, "loss": 17.5323, "step": 22125 }, { "epoch": 0.4044455005757947, "grad_norm": 5.925189280272979, "learning_rate": 6.752528757603722e-06, "loss": 17.4087, "step": 22126 }, { "epoch": 0.40446377977224124, "grad_norm": 7.873084083845511, "learning_rate": 6.752251521795236e-06, "loss": 18.2124, "step": 22127 }, { "epoch": 0.4044820589686877, "grad_norm": 6.146300131736766, "learning_rate": 6.751974279845264e-06, "loss": 17.4707, "step": 22128 }, { "epoch": 0.40450033816513425, "grad_norm": 5.7523359568742265, "learning_rate": 6.751697031754772e-06, "loss": 17.4216, "step": 22129 }, { "epoch": 0.4045186173615808, "grad_norm": 6.698152489926536, "learning_rate": 6.751419777524734e-06, "loss": 17.41, "step": 22130 }, { "epoch": 0.4045368965580273, "grad_norm": 6.921879800943967, "learning_rate": 6.7511425171561205e-06, "loss": 17.8194, "step": 22131 }, { "epoch": 0.4045551757544738, "grad_norm": 6.662011338095347, "learning_rate": 6.750865250649906e-06, "loss": 17.5918, "step": 22132 }, { "epoch": 0.40457345495092034, "grad_norm": 6.260882181315775, "learning_rate": 6.75058797800706e-06, "loss": 17.3202, "step": 22133 }, { "epoch": 0.4045917341473669, "grad_norm": 5.924199633577894, "learning_rate": 6.750310699228555e-06, "loss": 17.4094, "step": 22134 }, { "epoch": 0.4046100133438134, "grad_norm": 6.184279968253344, "learning_rate": 6.750033414315363e-06, "loss": 17.2393, "step": 22135 }, { "epoch": 0.40462829254025995, "grad_norm": 9.379302029088581, "learning_rate": 6.749756123268456e-06, "loss": 18.2848, "step": 22136 }, { "epoch": 0.40464657173670643, "grad_norm": 6.544399335645791, "learning_rate": 6.749478826088806e-06, "loss": 17.6162, "step": 22137 }, { "epoch": 0.40466485093315296, "grad_norm": 5.786205780710364, "learning_rate": 6.749201522777385e-06, "loss": 17.2665, "step": 22138 }, { "epoch": 0.4046831301295995, "grad_norm": 8.181619392120167, "learning_rate": 6.748924213335163e-06, "loss": 18.2582, "step": 22139 }, { "epoch": 0.40470140932604604, "grad_norm": 6.457207279081686, "learning_rate": 6.7486468977631126e-06, "loss": 17.1033, "step": 22140 }, { "epoch": 0.40471968852249257, "grad_norm": 7.855967774278722, "learning_rate": 6.748369576062208e-06, "loss": 18.189, "step": 22141 }, { "epoch": 0.40473796771893905, "grad_norm": 4.969703936150911, "learning_rate": 6.74809224823342e-06, "loss": 16.9735, "step": 22142 }, { "epoch": 0.4047562469153856, "grad_norm": 7.140640634446646, "learning_rate": 6.74781491427772e-06, "loss": 17.6187, "step": 22143 }, { "epoch": 0.4047745261118321, "grad_norm": 6.037265655891585, "learning_rate": 6.74753757419608e-06, "loss": 17.2544, "step": 22144 }, { "epoch": 0.40479280530827866, "grad_norm": 7.297147796514037, "learning_rate": 6.747260227989473e-06, "loss": 17.9993, "step": 22145 }, { "epoch": 0.4048110845047252, "grad_norm": 9.024807169459095, "learning_rate": 6.7469828756588694e-06, "loss": 18.2871, "step": 22146 }, { "epoch": 0.4048293637011717, "grad_norm": 5.0270602565412945, "learning_rate": 6.746705517205244e-06, "loss": 16.9534, "step": 22147 }, { "epoch": 0.4048476428976182, "grad_norm": 5.270923813606672, "learning_rate": 6.746428152629567e-06, "loss": 16.99, "step": 22148 }, { "epoch": 0.40486592209406475, "grad_norm": 6.167460161072971, "learning_rate": 6.746150781932809e-06, "loss": 17.4296, "step": 22149 }, { "epoch": 0.4048842012905113, "grad_norm": 7.3482352376413695, "learning_rate": 6.745873405115946e-06, "loss": 18.2224, "step": 22150 }, { "epoch": 0.4049024804869578, "grad_norm": 6.671407230482707, "learning_rate": 6.7455960221799475e-06, "loss": 17.0869, "step": 22151 }, { "epoch": 0.4049207596834043, "grad_norm": 7.619510605552885, "learning_rate": 6.745318633125788e-06, "loss": 17.9083, "step": 22152 }, { "epoch": 0.40493903887985083, "grad_norm": 7.018422401100924, "learning_rate": 6.745041237954437e-06, "loss": 17.8694, "step": 22153 }, { "epoch": 0.40495731807629737, "grad_norm": 5.185636416302049, "learning_rate": 6.744763836666866e-06, "loss": 16.768, "step": 22154 }, { "epoch": 0.4049755972727439, "grad_norm": 5.460020134601508, "learning_rate": 6.7444864292640525e-06, "loss": 17.1994, "step": 22155 }, { "epoch": 0.40499387646919044, "grad_norm": 5.8555374893900485, "learning_rate": 6.744209015746963e-06, "loss": 17.2155, "step": 22156 }, { "epoch": 0.4050121556656369, "grad_norm": 5.443443031534633, "learning_rate": 6.743931596116573e-06, "loss": 17.1019, "step": 22157 }, { "epoch": 0.40503043486208345, "grad_norm": 6.232530723447787, "learning_rate": 6.743654170373855e-06, "loss": 17.3289, "step": 22158 }, { "epoch": 0.40504871405853, "grad_norm": 6.257351557302948, "learning_rate": 6.743376738519779e-06, "loss": 17.3444, "step": 22159 }, { "epoch": 0.4050669932549765, "grad_norm": 6.7702966690245034, "learning_rate": 6.743099300555319e-06, "loss": 17.714, "step": 22160 }, { "epoch": 0.40508527245142306, "grad_norm": 7.751807796514337, "learning_rate": 6.742821856481448e-06, "loss": 17.7319, "step": 22161 }, { "epoch": 0.40510355164786954, "grad_norm": 6.509454448143292, "learning_rate": 6.742544406299137e-06, "loss": 17.506, "step": 22162 }, { "epoch": 0.4051218308443161, "grad_norm": 7.400758878878089, "learning_rate": 6.742266950009359e-06, "loss": 17.9424, "step": 22163 }, { "epoch": 0.4051401100407626, "grad_norm": 5.882275896544902, "learning_rate": 6.741989487613087e-06, "loss": 17.4201, "step": 22164 }, { "epoch": 0.40515838923720915, "grad_norm": 7.6409091347877975, "learning_rate": 6.741712019111293e-06, "loss": 17.9477, "step": 22165 }, { "epoch": 0.40517666843365563, "grad_norm": 6.4758461795965205, "learning_rate": 6.74143454450495e-06, "loss": 17.6987, "step": 22166 }, { "epoch": 0.40519494763010216, "grad_norm": 6.2413095213566905, "learning_rate": 6.741157063795028e-06, "loss": 17.3144, "step": 22167 }, { "epoch": 0.4052132268265487, "grad_norm": 6.9565224842103675, "learning_rate": 6.740879576982505e-06, "loss": 17.714, "step": 22168 }, { "epoch": 0.40523150602299524, "grad_norm": 6.0106981750372634, "learning_rate": 6.740602084068349e-06, "loss": 17.2819, "step": 22169 }, { "epoch": 0.40524978521944177, "grad_norm": 5.607878065581783, "learning_rate": 6.740324585053532e-06, "loss": 16.992, "step": 22170 }, { "epoch": 0.40526806441588825, "grad_norm": 6.215494327541819, "learning_rate": 6.740047079939028e-06, "loss": 17.2968, "step": 22171 }, { "epoch": 0.4052863436123348, "grad_norm": 6.622060494217935, "learning_rate": 6.7397695687258115e-06, "loss": 17.5319, "step": 22172 }, { "epoch": 0.4053046228087813, "grad_norm": 6.28169817228459, "learning_rate": 6.7394920514148535e-06, "loss": 17.4997, "step": 22173 }, { "epoch": 0.40532290200522786, "grad_norm": 6.7644607074438605, "learning_rate": 6.739214528007126e-06, "loss": 17.8697, "step": 22174 }, { "epoch": 0.4053411812016744, "grad_norm": 5.951257001626463, "learning_rate": 6.738936998503603e-06, "loss": 17.2977, "step": 22175 }, { "epoch": 0.4053594603981209, "grad_norm": 5.930150434365645, "learning_rate": 6.738659462905257e-06, "loss": 17.2478, "step": 22176 }, { "epoch": 0.4053777395945674, "grad_norm": 5.6784804525390715, "learning_rate": 6.738381921213061e-06, "loss": 17.4433, "step": 22177 }, { "epoch": 0.40539601879101395, "grad_norm": 6.64308669008464, "learning_rate": 6.738104373427986e-06, "loss": 17.3754, "step": 22178 }, { "epoch": 0.4054142979874605, "grad_norm": 5.741868228596022, "learning_rate": 6.737826819551008e-06, "loss": 17.1142, "step": 22179 }, { "epoch": 0.405432577183907, "grad_norm": 5.997290038820609, "learning_rate": 6.737549259583096e-06, "loss": 17.3825, "step": 22180 }, { "epoch": 0.4054508563803535, "grad_norm": 5.844866164401639, "learning_rate": 6.7372716935252235e-06, "loss": 17.267, "step": 22181 }, { "epoch": 0.40546913557680003, "grad_norm": 6.673007568731624, "learning_rate": 6.7369941213783664e-06, "loss": 17.7447, "step": 22182 }, { "epoch": 0.40548741477324657, "grad_norm": 6.79963815163332, "learning_rate": 6.736716543143496e-06, "loss": 17.7124, "step": 22183 }, { "epoch": 0.4055056939696931, "grad_norm": 6.457810547830599, "learning_rate": 6.736438958821584e-06, "loss": 17.3048, "step": 22184 }, { "epoch": 0.40552397316613964, "grad_norm": 6.999599851720819, "learning_rate": 6.736161368413604e-06, "loss": 17.8615, "step": 22185 }, { "epoch": 0.4055422523625861, "grad_norm": 5.936760149888992, "learning_rate": 6.735883771920528e-06, "loss": 17.3027, "step": 22186 }, { "epoch": 0.40556053155903266, "grad_norm": 6.800880808213196, "learning_rate": 6.7356061693433314e-06, "loss": 17.8916, "step": 22187 }, { "epoch": 0.4055788107554792, "grad_norm": 6.662290234587748, "learning_rate": 6.7353285606829855e-06, "loss": 17.6892, "step": 22188 }, { "epoch": 0.4055970899519257, "grad_norm": 6.0424886929919355, "learning_rate": 6.7350509459404644e-06, "loss": 17.3845, "step": 22189 }, { "epoch": 0.40561536914837226, "grad_norm": 8.329936546732132, "learning_rate": 6.734773325116739e-06, "loss": 18.0189, "step": 22190 }, { "epoch": 0.40563364834481874, "grad_norm": 6.792306073485588, "learning_rate": 6.734495698212784e-06, "loss": 17.8954, "step": 22191 }, { "epoch": 0.4056519275412653, "grad_norm": 5.311830982957135, "learning_rate": 6.734218065229572e-06, "loss": 17.1164, "step": 22192 }, { "epoch": 0.4056702067377118, "grad_norm": 6.775960255611034, "learning_rate": 6.7339404261680775e-06, "loss": 17.197, "step": 22193 }, { "epoch": 0.40568848593415835, "grad_norm": 7.348697310052713, "learning_rate": 6.733662781029271e-06, "loss": 17.8498, "step": 22194 }, { "epoch": 0.4057067651306049, "grad_norm": 8.793370791530828, "learning_rate": 6.733385129814126e-06, "loss": 17.9595, "step": 22195 }, { "epoch": 0.40572504432705137, "grad_norm": 7.5304583369701, "learning_rate": 6.733107472523618e-06, "loss": 17.9314, "step": 22196 }, { "epoch": 0.4057433235234979, "grad_norm": 7.494023003913464, "learning_rate": 6.732829809158719e-06, "loss": 17.8105, "step": 22197 }, { "epoch": 0.40576160271994444, "grad_norm": 6.0353004471521405, "learning_rate": 6.7325521397204005e-06, "loss": 17.3592, "step": 22198 }, { "epoch": 0.40577988191639097, "grad_norm": 6.462822894306157, "learning_rate": 6.732274464209637e-06, "loss": 17.5024, "step": 22199 }, { "epoch": 0.40579816111283745, "grad_norm": 7.389732610475244, "learning_rate": 6.731996782627404e-06, "loss": 17.8692, "step": 22200 }, { "epoch": 0.405816440309284, "grad_norm": 7.098776795984079, "learning_rate": 6.731719094974671e-06, "loss": 17.963, "step": 22201 }, { "epoch": 0.4058347195057305, "grad_norm": 7.033700437195739, "learning_rate": 6.7314414012524135e-06, "loss": 17.4693, "step": 22202 }, { "epoch": 0.40585299870217706, "grad_norm": 5.605984450205299, "learning_rate": 6.731163701461603e-06, "loss": 17.1704, "step": 22203 }, { "epoch": 0.4058712778986236, "grad_norm": 6.494532560138076, "learning_rate": 6.730885995603215e-06, "loss": 17.3485, "step": 22204 }, { "epoch": 0.4058895570950701, "grad_norm": 6.795673206135465, "learning_rate": 6.730608283678222e-06, "loss": 17.7579, "step": 22205 }, { "epoch": 0.4059078362915166, "grad_norm": 6.104057564248977, "learning_rate": 6.730330565687596e-06, "loss": 17.2064, "step": 22206 }, { "epoch": 0.40592611548796315, "grad_norm": 7.248129920360663, "learning_rate": 6.730052841632313e-06, "loss": 17.7974, "step": 22207 }, { "epoch": 0.4059443946844097, "grad_norm": 7.279123128963967, "learning_rate": 6.729775111513342e-06, "loss": 17.781, "step": 22208 }, { "epoch": 0.4059626738808562, "grad_norm": 6.649230320363981, "learning_rate": 6.729497375331662e-06, "loss": 17.9351, "step": 22209 }, { "epoch": 0.4059809530773027, "grad_norm": 5.495569677274187, "learning_rate": 6.729219633088244e-06, "loss": 17.0028, "step": 22210 }, { "epoch": 0.40599923227374923, "grad_norm": 6.623289284082819, "learning_rate": 6.72894188478406e-06, "loss": 17.7299, "step": 22211 }, { "epoch": 0.40601751147019577, "grad_norm": 7.003139447383208, "learning_rate": 6.728664130420085e-06, "loss": 17.71, "step": 22212 }, { "epoch": 0.4060357906666423, "grad_norm": 5.921916466668357, "learning_rate": 6.728386369997292e-06, "loss": 17.101, "step": 22213 }, { "epoch": 0.40605406986308884, "grad_norm": 5.181627189879955, "learning_rate": 6.728108603516655e-06, "loss": 17.0198, "step": 22214 }, { "epoch": 0.4060723490595353, "grad_norm": 8.467760984784013, "learning_rate": 6.727830830979148e-06, "loss": 18.6448, "step": 22215 }, { "epoch": 0.40609062825598186, "grad_norm": 6.226974569968773, "learning_rate": 6.727553052385742e-06, "loss": 17.2704, "step": 22216 }, { "epoch": 0.4061089074524284, "grad_norm": 7.620899612529509, "learning_rate": 6.727275267737414e-06, "loss": 17.6003, "step": 22217 }, { "epoch": 0.4061271866488749, "grad_norm": 5.428206503269267, "learning_rate": 6.726997477035137e-06, "loss": 17.1095, "step": 22218 }, { "epoch": 0.40614546584532146, "grad_norm": 6.234512034152487, "learning_rate": 6.7267196802798814e-06, "loss": 17.4033, "step": 22219 }, { "epoch": 0.40616374504176794, "grad_norm": 5.91543801171165, "learning_rate": 6.726441877472625e-06, "loss": 17.2918, "step": 22220 }, { "epoch": 0.4061820242382145, "grad_norm": 5.962861017427411, "learning_rate": 6.726164068614338e-06, "loss": 17.1846, "step": 22221 }, { "epoch": 0.406200303434661, "grad_norm": 5.507652938256123, "learning_rate": 6.725886253705996e-06, "loss": 17.1938, "step": 22222 }, { "epoch": 0.40621858263110755, "grad_norm": 6.847664016963516, "learning_rate": 6.7256084327485735e-06, "loss": 17.7769, "step": 22223 }, { "epoch": 0.4062368618275541, "grad_norm": 6.7659342386056025, "learning_rate": 6.725330605743043e-06, "loss": 17.3308, "step": 22224 }, { "epoch": 0.40625514102400057, "grad_norm": 7.855577634776607, "learning_rate": 6.725052772690379e-06, "loss": 17.9193, "step": 22225 }, { "epoch": 0.4062734202204471, "grad_norm": 10.47053549414686, "learning_rate": 6.7247749335915526e-06, "loss": 18.4262, "step": 22226 }, { "epoch": 0.40629169941689364, "grad_norm": 6.279469691492723, "learning_rate": 6.724497088447541e-06, "loss": 17.613, "step": 22227 }, { "epoch": 0.4063099786133402, "grad_norm": 6.6329844426406765, "learning_rate": 6.724219237259318e-06, "loss": 17.5052, "step": 22228 }, { "epoch": 0.4063282578097867, "grad_norm": 5.967539116521782, "learning_rate": 6.723941380027854e-06, "loss": 17.4117, "step": 22229 }, { "epoch": 0.4063465370062332, "grad_norm": 6.275821818703162, "learning_rate": 6.723663516754126e-06, "loss": 17.66, "step": 22230 }, { "epoch": 0.4063648162026797, "grad_norm": 6.574200769598269, "learning_rate": 6.723385647439108e-06, "loss": 17.7173, "step": 22231 }, { "epoch": 0.40638309539912626, "grad_norm": 8.875882950778484, "learning_rate": 6.7231077720837714e-06, "loss": 18.4791, "step": 22232 }, { "epoch": 0.4064013745955728, "grad_norm": 7.896599020986848, "learning_rate": 6.722829890689092e-06, "loss": 18.1513, "step": 22233 }, { "epoch": 0.4064196537920193, "grad_norm": 6.937869112786421, "learning_rate": 6.722552003256043e-06, "loss": 17.5613, "step": 22234 }, { "epoch": 0.4064379329884658, "grad_norm": 8.10140539035616, "learning_rate": 6.722274109785599e-06, "loss": 18.0932, "step": 22235 }, { "epoch": 0.40645621218491235, "grad_norm": 5.71491763157779, "learning_rate": 6.721996210278734e-06, "loss": 17.2551, "step": 22236 }, { "epoch": 0.4064744913813589, "grad_norm": 5.936790365388158, "learning_rate": 6.721718304736421e-06, "loss": 17.4498, "step": 22237 }, { "epoch": 0.4064927705778054, "grad_norm": 6.032687222202416, "learning_rate": 6.721440393159636e-06, "loss": 17.2518, "step": 22238 }, { "epoch": 0.4065110497742519, "grad_norm": 6.356822217101505, "learning_rate": 6.721162475549351e-06, "loss": 17.4824, "step": 22239 }, { "epoch": 0.40652932897069843, "grad_norm": 6.64672910711208, "learning_rate": 6.72088455190654e-06, "loss": 17.798, "step": 22240 }, { "epoch": 0.40654760816714497, "grad_norm": 6.381776877968177, "learning_rate": 6.720606622232179e-06, "loss": 17.3612, "step": 22241 }, { "epoch": 0.4065658873635915, "grad_norm": 6.884852765220903, "learning_rate": 6.720328686527242e-06, "loss": 17.6881, "step": 22242 }, { "epoch": 0.40658416656003804, "grad_norm": 6.519872071151057, "learning_rate": 6.720050744792701e-06, "loss": 17.3681, "step": 22243 }, { "epoch": 0.4066024457564845, "grad_norm": 6.4910420545005465, "learning_rate": 6.719772797029531e-06, "loss": 17.542, "step": 22244 }, { "epoch": 0.40662072495293106, "grad_norm": 6.679339522996079, "learning_rate": 6.719494843238707e-06, "loss": 17.6046, "step": 22245 }, { "epoch": 0.4066390041493776, "grad_norm": 6.380391764448618, "learning_rate": 6.7192168834212036e-06, "loss": 17.5827, "step": 22246 }, { "epoch": 0.4066572833458241, "grad_norm": 8.933926730953658, "learning_rate": 6.718938917577993e-06, "loss": 18.9389, "step": 22247 }, { "epoch": 0.40667556254227066, "grad_norm": 6.058289307292574, "learning_rate": 6.718660945710052e-06, "loss": 17.395, "step": 22248 }, { "epoch": 0.40669384173871714, "grad_norm": 6.329572252191765, "learning_rate": 6.718382967818352e-06, "loss": 17.6975, "step": 22249 }, { "epoch": 0.4067121209351637, "grad_norm": 7.076438856369958, "learning_rate": 6.718104983903869e-06, "loss": 17.8666, "step": 22250 }, { "epoch": 0.4067304001316102, "grad_norm": 6.23451601461986, "learning_rate": 6.717826993967578e-06, "loss": 17.6653, "step": 22251 }, { "epoch": 0.40674867932805675, "grad_norm": 6.995557174581337, "learning_rate": 6.717548998010454e-06, "loss": 17.8749, "step": 22252 }, { "epoch": 0.4067669585245033, "grad_norm": 7.147895900000048, "learning_rate": 6.717270996033467e-06, "loss": 17.6818, "step": 22253 }, { "epoch": 0.40678523772094977, "grad_norm": 5.875860858663323, "learning_rate": 6.716992988037594e-06, "loss": 17.5146, "step": 22254 }, { "epoch": 0.4068035169173963, "grad_norm": 7.237372281884785, "learning_rate": 6.7167149740238125e-06, "loss": 17.839, "step": 22255 }, { "epoch": 0.40682179611384284, "grad_norm": 7.264890108518054, "learning_rate": 6.716436953993092e-06, "loss": 17.7052, "step": 22256 }, { "epoch": 0.4068400753102894, "grad_norm": 6.724171135001746, "learning_rate": 6.716158927946408e-06, "loss": 17.6778, "step": 22257 }, { "epoch": 0.4068583545067359, "grad_norm": 5.578468251230984, "learning_rate": 6.715880895884738e-06, "loss": 17.4131, "step": 22258 }, { "epoch": 0.4068766337031824, "grad_norm": 6.560338080352419, "learning_rate": 6.715602857809052e-06, "loss": 17.6146, "step": 22259 }, { "epoch": 0.4068949128996289, "grad_norm": 6.9180291916264975, "learning_rate": 6.715324813720329e-06, "loss": 17.9467, "step": 22260 }, { "epoch": 0.40691319209607546, "grad_norm": 5.622453941909864, "learning_rate": 6.715046763619541e-06, "loss": 17.2836, "step": 22261 }, { "epoch": 0.406931471292522, "grad_norm": 8.176080449579668, "learning_rate": 6.714768707507662e-06, "loss": 18.2418, "step": 22262 }, { "epoch": 0.40694975048896853, "grad_norm": 6.274976231881443, "learning_rate": 6.714490645385667e-06, "loss": 17.7055, "step": 22263 }, { "epoch": 0.406968029685415, "grad_norm": 7.577819589305502, "learning_rate": 6.714212577254533e-06, "loss": 18.304, "step": 22264 }, { "epoch": 0.40698630888186155, "grad_norm": 6.573095639871201, "learning_rate": 6.713934503115232e-06, "loss": 17.6393, "step": 22265 }, { "epoch": 0.4070045880783081, "grad_norm": 6.295120292715528, "learning_rate": 6.713656422968739e-06, "loss": 17.264, "step": 22266 }, { "epoch": 0.4070228672747546, "grad_norm": 6.889377350472357, "learning_rate": 6.7133783368160275e-06, "loss": 17.6572, "step": 22267 }, { "epoch": 0.4070411464712011, "grad_norm": 6.40510907772885, "learning_rate": 6.713100244658075e-06, "loss": 17.3039, "step": 22268 }, { "epoch": 0.40705942566764763, "grad_norm": 7.383304609218675, "learning_rate": 6.7128221464958565e-06, "loss": 18.0031, "step": 22269 }, { "epoch": 0.40707770486409417, "grad_norm": 6.5984351382322215, "learning_rate": 6.712544042330342e-06, "loss": 17.3374, "step": 22270 }, { "epoch": 0.4070959840605407, "grad_norm": 6.941330714665212, "learning_rate": 6.7122659321625115e-06, "loss": 17.5744, "step": 22271 }, { "epoch": 0.40711426325698724, "grad_norm": 5.798958432628722, "learning_rate": 6.711987815993335e-06, "loss": 17.1436, "step": 22272 }, { "epoch": 0.4071325424534337, "grad_norm": 7.03506626909677, "learning_rate": 6.711709693823793e-06, "loss": 17.9521, "step": 22273 }, { "epoch": 0.40715082164988026, "grad_norm": 6.350586760113581, "learning_rate": 6.7114315656548554e-06, "loss": 17.3257, "step": 22274 }, { "epoch": 0.4071691008463268, "grad_norm": 5.637832901288479, "learning_rate": 6.711153431487498e-06, "loss": 17.2569, "step": 22275 }, { "epoch": 0.40718738004277333, "grad_norm": 5.1169365619830165, "learning_rate": 6.710875291322697e-06, "loss": 16.9187, "step": 22276 }, { "epoch": 0.40720565923921986, "grad_norm": 6.370177226031568, "learning_rate": 6.710597145161427e-06, "loss": 17.438, "step": 22277 }, { "epoch": 0.40722393843566634, "grad_norm": 5.952175061029821, "learning_rate": 6.710318993004662e-06, "loss": 17.1577, "step": 22278 }, { "epoch": 0.4072422176321129, "grad_norm": 6.831569909064983, "learning_rate": 6.710040834853377e-06, "loss": 17.7127, "step": 22279 }, { "epoch": 0.4072604968285594, "grad_norm": 6.590500632536857, "learning_rate": 6.709762670708548e-06, "loss": 17.4051, "step": 22280 }, { "epoch": 0.40727877602500595, "grad_norm": 7.026313355604036, "learning_rate": 6.709484500571148e-06, "loss": 17.7467, "step": 22281 }, { "epoch": 0.4072970552214525, "grad_norm": 5.977094423814589, "learning_rate": 6.709206324442154e-06, "loss": 17.2317, "step": 22282 }, { "epoch": 0.40731533441789897, "grad_norm": 5.483432737920128, "learning_rate": 6.708928142322542e-06, "loss": 17.2138, "step": 22283 }, { "epoch": 0.4073336136143455, "grad_norm": 6.767125319549047, "learning_rate": 6.708649954213282e-06, "loss": 17.7343, "step": 22284 }, { "epoch": 0.40735189281079204, "grad_norm": 6.409230829732946, "learning_rate": 6.708371760115354e-06, "loss": 17.5576, "step": 22285 }, { "epoch": 0.4073701720072386, "grad_norm": 6.469226127075871, "learning_rate": 6.7080935600297306e-06, "loss": 17.5401, "step": 22286 }, { "epoch": 0.4073884512036851, "grad_norm": 6.399617496700894, "learning_rate": 6.7078153539573874e-06, "loss": 17.5286, "step": 22287 }, { "epoch": 0.4074067304001316, "grad_norm": 6.639113903558887, "learning_rate": 6.7075371418993e-06, "loss": 17.3282, "step": 22288 }, { "epoch": 0.4074250095965781, "grad_norm": 6.1714005240139596, "learning_rate": 6.707258923856442e-06, "loss": 17.3113, "step": 22289 }, { "epoch": 0.40744328879302466, "grad_norm": 8.751944703567593, "learning_rate": 6.706980699829791e-06, "loss": 18.5986, "step": 22290 }, { "epoch": 0.4074615679894712, "grad_norm": 8.630337043098338, "learning_rate": 6.70670246982032e-06, "loss": 18.6933, "step": 22291 }, { "epoch": 0.40747984718591773, "grad_norm": 6.414471102860324, "learning_rate": 6.7064242338290055e-06, "loss": 17.7053, "step": 22292 }, { "epoch": 0.4074981263823642, "grad_norm": 5.758931406556247, "learning_rate": 6.706145991856823e-06, "loss": 17.2899, "step": 22293 }, { "epoch": 0.40751640557881075, "grad_norm": 5.454292115298666, "learning_rate": 6.705867743904744e-06, "loss": 17.2698, "step": 22294 }, { "epoch": 0.4075346847752573, "grad_norm": 6.140112711077134, "learning_rate": 6.705589489973748e-06, "loss": 17.2706, "step": 22295 }, { "epoch": 0.4075529639717038, "grad_norm": 6.569895549115014, "learning_rate": 6.705311230064809e-06, "loss": 17.3748, "step": 22296 }, { "epoch": 0.40757124316815035, "grad_norm": 5.985081258725076, "learning_rate": 6.705032964178903e-06, "loss": 17.5517, "step": 22297 }, { "epoch": 0.40758952236459683, "grad_norm": 8.285743999725959, "learning_rate": 6.704754692317004e-06, "loss": 18.3401, "step": 22298 }, { "epoch": 0.40760780156104337, "grad_norm": 6.897872490167506, "learning_rate": 6.7044764144800865e-06, "loss": 17.6131, "step": 22299 }, { "epoch": 0.4076260807574899, "grad_norm": 6.236834655809541, "learning_rate": 6.704198130669128e-06, "loss": 17.6524, "step": 22300 }, { "epoch": 0.40764435995393644, "grad_norm": 6.979269261389349, "learning_rate": 6.703919840885104e-06, "loss": 17.826, "step": 22301 }, { "epoch": 0.4076626391503829, "grad_norm": 7.847914823017599, "learning_rate": 6.703641545128987e-06, "loss": 17.8131, "step": 22302 }, { "epoch": 0.40768091834682946, "grad_norm": 7.6746039941179545, "learning_rate": 6.703363243401755e-06, "loss": 18.1945, "step": 22303 }, { "epoch": 0.407699197543276, "grad_norm": 7.115684433826195, "learning_rate": 6.703084935704383e-06, "loss": 18.0043, "step": 22304 }, { "epoch": 0.40771747673972253, "grad_norm": 5.602538289522343, "learning_rate": 6.7028066220378455e-06, "loss": 17.1549, "step": 22305 }, { "epoch": 0.40773575593616906, "grad_norm": 5.5303830787105435, "learning_rate": 6.702528302403118e-06, "loss": 17.3422, "step": 22306 }, { "epoch": 0.40775403513261554, "grad_norm": 6.849153816525019, "learning_rate": 6.702249976801179e-06, "loss": 17.395, "step": 22307 }, { "epoch": 0.4077723143290621, "grad_norm": 6.642799615662462, "learning_rate": 6.701971645232998e-06, "loss": 17.6234, "step": 22308 }, { "epoch": 0.4077905935255086, "grad_norm": 7.963889722521995, "learning_rate": 6.701693307699556e-06, "loss": 18.139, "step": 22309 }, { "epoch": 0.40780887272195515, "grad_norm": 5.621074242739525, "learning_rate": 6.701414964201828e-06, "loss": 17.2245, "step": 22310 }, { "epoch": 0.4078271519184017, "grad_norm": 5.369137339102429, "learning_rate": 6.701136614740786e-06, "loss": 17.2033, "step": 22311 }, { "epoch": 0.40784543111484817, "grad_norm": 6.241584735197689, "learning_rate": 6.700858259317409e-06, "loss": 17.3039, "step": 22312 }, { "epoch": 0.4078637103112947, "grad_norm": 6.024766009967673, "learning_rate": 6.700579897932669e-06, "loss": 17.3971, "step": 22313 }, { "epoch": 0.40788198950774124, "grad_norm": 5.472173140584315, "learning_rate": 6.700301530587547e-06, "loss": 16.9745, "step": 22314 }, { "epoch": 0.4079002687041878, "grad_norm": 6.249329870649448, "learning_rate": 6.700023157283014e-06, "loss": 17.3968, "step": 22315 }, { "epoch": 0.4079185479006343, "grad_norm": 6.131460890280038, "learning_rate": 6.6997447780200484e-06, "loss": 17.4808, "step": 22316 }, { "epoch": 0.4079368270970808, "grad_norm": 6.929633668418552, "learning_rate": 6.699466392799624e-06, "loss": 17.867, "step": 22317 }, { "epoch": 0.4079551062935273, "grad_norm": 7.152533941069182, "learning_rate": 6.6991880016227185e-06, "loss": 17.9286, "step": 22318 }, { "epoch": 0.40797338548997386, "grad_norm": 5.60451819455061, "learning_rate": 6.698909604490304e-06, "loss": 17.3974, "step": 22319 }, { "epoch": 0.4079916646864204, "grad_norm": 5.387246173205216, "learning_rate": 6.698631201403362e-06, "loss": 17.1566, "step": 22320 }, { "epoch": 0.40800994388286693, "grad_norm": 7.480777182546839, "learning_rate": 6.698352792362863e-06, "loss": 17.9302, "step": 22321 }, { "epoch": 0.4080282230793134, "grad_norm": 5.409482611641295, "learning_rate": 6.698074377369786e-06, "loss": 17.2391, "step": 22322 }, { "epoch": 0.40804650227575995, "grad_norm": 6.5794473185928055, "learning_rate": 6.697795956425104e-06, "loss": 17.4969, "step": 22323 }, { "epoch": 0.4080647814722065, "grad_norm": 6.755483181362662, "learning_rate": 6.6975175295297964e-06, "loss": 17.7137, "step": 22324 }, { "epoch": 0.408083060668653, "grad_norm": 7.968078334194961, "learning_rate": 6.697239096684837e-06, "loss": 18.395, "step": 22325 }, { "epoch": 0.40810133986509955, "grad_norm": 5.974473819363313, "learning_rate": 6.696960657891199e-06, "loss": 17.447, "step": 22326 }, { "epoch": 0.40811961906154604, "grad_norm": 6.771331882821818, "learning_rate": 6.696682213149864e-06, "loss": 17.6519, "step": 22327 }, { "epoch": 0.40813789825799257, "grad_norm": 6.071063538193178, "learning_rate": 6.696403762461805e-06, "loss": 17.2803, "step": 22328 }, { "epoch": 0.4081561774544391, "grad_norm": 5.61989192197024, "learning_rate": 6.6961253058279975e-06, "loss": 17.1686, "step": 22329 }, { "epoch": 0.40817445665088564, "grad_norm": 7.3972282820148205, "learning_rate": 6.695846843249418e-06, "loss": 17.6974, "step": 22330 }, { "epoch": 0.4081927358473322, "grad_norm": 6.140066467892387, "learning_rate": 6.695568374727042e-06, "loss": 17.3713, "step": 22331 }, { "epoch": 0.40821101504377866, "grad_norm": 7.578357768826225, "learning_rate": 6.695289900261847e-06, "loss": 17.9596, "step": 22332 }, { "epoch": 0.4082292942402252, "grad_norm": 6.018563344980862, "learning_rate": 6.6950114198548065e-06, "loss": 17.2115, "step": 22333 }, { "epoch": 0.40824757343667173, "grad_norm": 5.8043348744124135, "learning_rate": 6.694732933506899e-06, "loss": 17.4287, "step": 22334 }, { "epoch": 0.40826585263311826, "grad_norm": 6.321735976651865, "learning_rate": 6.694454441219099e-06, "loss": 17.6359, "step": 22335 }, { "epoch": 0.40828413182956474, "grad_norm": 5.996166355033848, "learning_rate": 6.694175942992385e-06, "loss": 17.3165, "step": 22336 }, { "epoch": 0.4083024110260113, "grad_norm": 6.517665764543585, "learning_rate": 6.69389743882773e-06, "loss": 17.792, "step": 22337 }, { "epoch": 0.4083206902224578, "grad_norm": 6.617217406232964, "learning_rate": 6.693618928726112e-06, "loss": 17.7017, "step": 22338 }, { "epoch": 0.40833896941890435, "grad_norm": 6.2426880409963506, "learning_rate": 6.693340412688506e-06, "loss": 17.3849, "step": 22339 }, { "epoch": 0.4083572486153509, "grad_norm": 6.739043792869497, "learning_rate": 6.6930618907158885e-06, "loss": 17.6292, "step": 22340 }, { "epoch": 0.40837552781179737, "grad_norm": 6.193059522850531, "learning_rate": 6.692783362809237e-06, "loss": 17.6269, "step": 22341 }, { "epoch": 0.4083938070082439, "grad_norm": 8.52446269237601, "learning_rate": 6.692504828969526e-06, "loss": 18.5218, "step": 22342 }, { "epoch": 0.40841208620469044, "grad_norm": 8.91671047362675, "learning_rate": 6.692226289197732e-06, "loss": 18.3849, "step": 22343 }, { "epoch": 0.408430365401137, "grad_norm": 7.340285727156134, "learning_rate": 6.691947743494834e-06, "loss": 18.0694, "step": 22344 }, { "epoch": 0.4084486445975835, "grad_norm": 7.706295893171392, "learning_rate": 6.691669191861803e-06, "loss": 17.5781, "step": 22345 }, { "epoch": 0.40846692379403, "grad_norm": 5.679200024921107, "learning_rate": 6.691390634299619e-06, "loss": 17.2387, "step": 22346 }, { "epoch": 0.4084852029904765, "grad_norm": 6.6261268333057055, "learning_rate": 6.691112070809258e-06, "loss": 17.3913, "step": 22347 }, { "epoch": 0.40850348218692306, "grad_norm": 6.510059106698121, "learning_rate": 6.690833501391697e-06, "loss": 17.3602, "step": 22348 }, { "epoch": 0.4085217613833696, "grad_norm": 6.95710940300285, "learning_rate": 6.69055492604791e-06, "loss": 17.7947, "step": 22349 }, { "epoch": 0.40854004057981613, "grad_norm": 7.624081240706674, "learning_rate": 6.690276344778875e-06, "loss": 17.8705, "step": 22350 }, { "epoch": 0.4085583197762626, "grad_norm": 7.093745915585537, "learning_rate": 6.689997757585568e-06, "loss": 17.3084, "step": 22351 }, { "epoch": 0.40857659897270915, "grad_norm": 5.625715074646376, "learning_rate": 6.689719164468967e-06, "loss": 17.3643, "step": 22352 }, { "epoch": 0.4085948781691557, "grad_norm": 6.754054896581014, "learning_rate": 6.689440565430044e-06, "loss": 17.7577, "step": 22353 }, { "epoch": 0.4086131573656022, "grad_norm": 6.159552515619195, "learning_rate": 6.68916196046978e-06, "loss": 17.4922, "step": 22354 }, { "epoch": 0.40863143656204876, "grad_norm": 6.114677035172412, "learning_rate": 6.688883349589151e-06, "loss": 17.4217, "step": 22355 }, { "epoch": 0.40864971575849524, "grad_norm": 6.560570286473523, "learning_rate": 6.688604732789131e-06, "loss": 17.772, "step": 22356 }, { "epoch": 0.40866799495494177, "grad_norm": 5.437434771663608, "learning_rate": 6.6883261100706985e-06, "loss": 17.0448, "step": 22357 }, { "epoch": 0.4086862741513883, "grad_norm": 6.938271909855448, "learning_rate": 6.6880474814348285e-06, "loss": 17.8065, "step": 22358 }, { "epoch": 0.40870455334783484, "grad_norm": 7.007961642145968, "learning_rate": 6.687768846882501e-06, "loss": 17.8035, "step": 22359 }, { "epoch": 0.4087228325442814, "grad_norm": 6.780869384648125, "learning_rate": 6.687490206414689e-06, "loss": 17.6165, "step": 22360 }, { "epoch": 0.40874111174072786, "grad_norm": 6.877968688405431, "learning_rate": 6.687211560032368e-06, "loss": 17.798, "step": 22361 }, { "epoch": 0.4087593909371744, "grad_norm": 5.986241171382858, "learning_rate": 6.686932907736518e-06, "loss": 17.3656, "step": 22362 }, { "epoch": 0.40877767013362093, "grad_norm": 6.660632585050558, "learning_rate": 6.686654249528116e-06, "loss": 17.5422, "step": 22363 }, { "epoch": 0.40879594933006747, "grad_norm": 6.241903161658502, "learning_rate": 6.686375585408137e-06, "loss": 17.5906, "step": 22364 }, { "epoch": 0.408814228526514, "grad_norm": 5.598750220896844, "learning_rate": 6.686096915377557e-06, "loss": 17.164, "step": 22365 }, { "epoch": 0.4088325077229605, "grad_norm": 6.348137366423458, "learning_rate": 6.685818239437355e-06, "loss": 17.6996, "step": 22366 }, { "epoch": 0.408850786919407, "grad_norm": 6.0425301995955, "learning_rate": 6.685539557588504e-06, "loss": 17.452, "step": 22367 }, { "epoch": 0.40886906611585355, "grad_norm": 6.012375619513796, "learning_rate": 6.685260869831984e-06, "loss": 17.445, "step": 22368 }, { "epoch": 0.4088873453123001, "grad_norm": 6.472431702978844, "learning_rate": 6.684982176168773e-06, "loss": 17.4102, "step": 22369 }, { "epoch": 0.40890562450874657, "grad_norm": 6.952900079004711, "learning_rate": 6.684703476599844e-06, "loss": 17.6931, "step": 22370 }, { "epoch": 0.4089239037051931, "grad_norm": 6.195808589104972, "learning_rate": 6.684424771126176e-06, "loss": 17.4166, "step": 22371 }, { "epoch": 0.40894218290163964, "grad_norm": 6.543064768324731, "learning_rate": 6.684146059748743e-06, "loss": 17.9098, "step": 22372 }, { "epoch": 0.4089604620980862, "grad_norm": 4.7435375951410075, "learning_rate": 6.683867342468528e-06, "loss": 16.8005, "step": 22373 }, { "epoch": 0.4089787412945327, "grad_norm": 5.4370720825517616, "learning_rate": 6.683588619286501e-06, "loss": 17.1992, "step": 22374 }, { "epoch": 0.4089970204909792, "grad_norm": 6.171659769049901, "learning_rate": 6.683309890203643e-06, "loss": 17.4659, "step": 22375 }, { "epoch": 0.4090152996874257, "grad_norm": 5.8467812409448845, "learning_rate": 6.683031155220931e-06, "loss": 17.288, "step": 22376 }, { "epoch": 0.40903357888387226, "grad_norm": 7.396621978798048, "learning_rate": 6.682752414339339e-06, "loss": 18.0265, "step": 22377 }, { "epoch": 0.4090518580803188, "grad_norm": 6.049778369575566, "learning_rate": 6.682473667559847e-06, "loss": 17.7492, "step": 22378 }, { "epoch": 0.40907013727676533, "grad_norm": 6.371293358168965, "learning_rate": 6.682194914883431e-06, "loss": 17.5876, "step": 22379 }, { "epoch": 0.4090884164732118, "grad_norm": 6.525303404567865, "learning_rate": 6.681916156311068e-06, "loss": 17.7739, "step": 22380 }, { "epoch": 0.40910669566965835, "grad_norm": 7.041370823694498, "learning_rate": 6.681637391843732e-06, "loss": 17.8203, "step": 22381 }, { "epoch": 0.4091249748661049, "grad_norm": 6.3008761005745155, "learning_rate": 6.681358621482405e-06, "loss": 17.5252, "step": 22382 }, { "epoch": 0.4091432540625514, "grad_norm": 7.765356271583535, "learning_rate": 6.6810798452280635e-06, "loss": 18.2771, "step": 22383 }, { "epoch": 0.40916153325899796, "grad_norm": 6.365913360250412, "learning_rate": 6.680801063081681e-06, "loss": 17.4948, "step": 22384 }, { "epoch": 0.40917981245544444, "grad_norm": 6.318444715530758, "learning_rate": 6.6805222750442366e-06, "loss": 17.4903, "step": 22385 }, { "epoch": 0.40919809165189097, "grad_norm": 6.2602657409691345, "learning_rate": 6.680243481116708e-06, "loss": 17.344, "step": 22386 }, { "epoch": 0.4092163708483375, "grad_norm": 7.076340057322319, "learning_rate": 6.679964681300073e-06, "loss": 17.7203, "step": 22387 }, { "epoch": 0.40923465004478404, "grad_norm": 7.4483993347309765, "learning_rate": 6.679685875595305e-06, "loss": 18.0464, "step": 22388 }, { "epoch": 0.4092529292412306, "grad_norm": 8.177442978394508, "learning_rate": 6.679407064003386e-06, "loss": 18.2687, "step": 22389 }, { "epoch": 0.40927120843767706, "grad_norm": 5.67031421466407, "learning_rate": 6.6791282465252895e-06, "loss": 17.2061, "step": 22390 }, { "epoch": 0.4092894876341236, "grad_norm": 6.073479551777875, "learning_rate": 6.678849423161995e-06, "loss": 17.29, "step": 22391 }, { "epoch": 0.40930776683057013, "grad_norm": 5.908413856599679, "learning_rate": 6.678570593914478e-06, "loss": 17.412, "step": 22392 }, { "epoch": 0.40932604602701667, "grad_norm": 6.850784777996676, "learning_rate": 6.678291758783719e-06, "loss": 17.8967, "step": 22393 }, { "epoch": 0.4093443252234632, "grad_norm": 7.8328601566665, "learning_rate": 6.6780129177706895e-06, "loss": 18.6999, "step": 22394 }, { "epoch": 0.4093626044199097, "grad_norm": 6.272933520527842, "learning_rate": 6.677734070876373e-06, "loss": 17.6513, "step": 22395 }, { "epoch": 0.4093808836163562, "grad_norm": 6.514965334561195, "learning_rate": 6.677455218101743e-06, "loss": 17.4764, "step": 22396 }, { "epoch": 0.40939916281280275, "grad_norm": 6.005791253197453, "learning_rate": 6.67717635944778e-06, "loss": 17.0802, "step": 22397 }, { "epoch": 0.4094174420092493, "grad_norm": 6.7383995685433895, "learning_rate": 6.676897494915457e-06, "loss": 17.8608, "step": 22398 }, { "epoch": 0.4094357212056958, "grad_norm": 8.845025797527725, "learning_rate": 6.6766186245057544e-06, "loss": 17.5709, "step": 22399 }, { "epoch": 0.4094540004021423, "grad_norm": 6.539898475467322, "learning_rate": 6.67633974821965e-06, "loss": 17.4487, "step": 22400 }, { "epoch": 0.40947227959858884, "grad_norm": 6.476897413372039, "learning_rate": 6.67606086605812e-06, "loss": 17.3881, "step": 22401 }, { "epoch": 0.4094905587950354, "grad_norm": 6.139891906179132, "learning_rate": 6.675781978022141e-06, "loss": 17.5398, "step": 22402 }, { "epoch": 0.4095088379914819, "grad_norm": 6.82874764899981, "learning_rate": 6.675503084112692e-06, "loss": 17.5812, "step": 22403 }, { "epoch": 0.4095271171879284, "grad_norm": 5.798182208057855, "learning_rate": 6.675224184330751e-06, "loss": 17.6063, "step": 22404 }, { "epoch": 0.4095453963843749, "grad_norm": 6.5577458900205485, "learning_rate": 6.674945278677294e-06, "loss": 17.4989, "step": 22405 }, { "epoch": 0.40956367558082146, "grad_norm": 6.039979097365193, "learning_rate": 6.674666367153299e-06, "loss": 17.5457, "step": 22406 }, { "epoch": 0.409581954777268, "grad_norm": 7.492943735372451, "learning_rate": 6.674387449759744e-06, "loss": 18.4872, "step": 22407 }, { "epoch": 0.40960023397371453, "grad_norm": 5.967778949376407, "learning_rate": 6.674108526497605e-06, "loss": 17.2267, "step": 22408 }, { "epoch": 0.409618513170161, "grad_norm": 6.551757154325815, "learning_rate": 6.673829597367862e-06, "loss": 17.5845, "step": 22409 }, { "epoch": 0.40963679236660755, "grad_norm": 6.43448251059287, "learning_rate": 6.673550662371491e-06, "loss": 17.5492, "step": 22410 }, { "epoch": 0.4096550715630541, "grad_norm": 5.390735848441046, "learning_rate": 6.673271721509471e-06, "loss": 17.1281, "step": 22411 }, { "epoch": 0.4096733507595006, "grad_norm": 5.562757320291179, "learning_rate": 6.672992774782779e-06, "loss": 17.3111, "step": 22412 }, { "epoch": 0.40969162995594716, "grad_norm": 7.848481641711909, "learning_rate": 6.672713822192392e-06, "loss": 17.7411, "step": 22413 }, { "epoch": 0.40970990915239364, "grad_norm": 5.705962222544973, "learning_rate": 6.672434863739288e-06, "loss": 17.0998, "step": 22414 }, { "epoch": 0.40972818834884017, "grad_norm": 8.514607143462618, "learning_rate": 6.672155899424445e-06, "loss": 18.5722, "step": 22415 }, { "epoch": 0.4097464675452867, "grad_norm": 5.8988134022637535, "learning_rate": 6.6718769292488406e-06, "loss": 17.197, "step": 22416 }, { "epoch": 0.40976474674173324, "grad_norm": 6.690551317855109, "learning_rate": 6.6715979532134535e-06, "loss": 17.8692, "step": 22417 }, { "epoch": 0.4097830259381798, "grad_norm": 7.3224953502855, "learning_rate": 6.67131897131926e-06, "loss": 18.1417, "step": 22418 }, { "epoch": 0.40980130513462626, "grad_norm": 7.578450397318651, "learning_rate": 6.671039983567238e-06, "loss": 17.9822, "step": 22419 }, { "epoch": 0.4098195843310728, "grad_norm": 7.047372506277028, "learning_rate": 6.670760989958366e-06, "loss": 17.5138, "step": 22420 }, { "epoch": 0.40983786352751933, "grad_norm": 6.500958699879609, "learning_rate": 6.670481990493621e-06, "loss": 17.5217, "step": 22421 }, { "epoch": 0.40985614272396587, "grad_norm": 6.591173975816295, "learning_rate": 6.670202985173983e-06, "loss": 17.4485, "step": 22422 }, { "epoch": 0.4098744219204124, "grad_norm": 6.497177345976398, "learning_rate": 6.669923974000429e-06, "loss": 17.5386, "step": 22423 }, { "epoch": 0.4098927011168589, "grad_norm": 5.355490015264795, "learning_rate": 6.669644956973935e-06, "loss": 17.0498, "step": 22424 }, { "epoch": 0.4099109803133054, "grad_norm": 6.149706652710731, "learning_rate": 6.6693659340954804e-06, "loss": 17.5281, "step": 22425 }, { "epoch": 0.40992925950975195, "grad_norm": 5.630134258395935, "learning_rate": 6.669086905366043e-06, "loss": 17.379, "step": 22426 }, { "epoch": 0.4099475387061985, "grad_norm": 6.22113460738467, "learning_rate": 6.668807870786601e-06, "loss": 17.5218, "step": 22427 }, { "epoch": 0.409965817902645, "grad_norm": 6.225699203355149, "learning_rate": 6.668528830358134e-06, "loss": 17.3983, "step": 22428 }, { "epoch": 0.4099840970990915, "grad_norm": 6.5146956328387775, "learning_rate": 6.668249784081616e-06, "loss": 17.6844, "step": 22429 }, { "epoch": 0.41000237629553804, "grad_norm": 7.210863668792879, "learning_rate": 6.667970731958029e-06, "loss": 17.667, "step": 22430 }, { "epoch": 0.4100206554919846, "grad_norm": 7.243398059648909, "learning_rate": 6.667691673988348e-06, "loss": 17.4272, "step": 22431 }, { "epoch": 0.4100389346884311, "grad_norm": 5.536042701374809, "learning_rate": 6.667412610173552e-06, "loss": 17.3022, "step": 22432 }, { "epoch": 0.41005721388487765, "grad_norm": 6.944375066789045, "learning_rate": 6.667133540514621e-06, "loss": 17.4673, "step": 22433 }, { "epoch": 0.4100754930813241, "grad_norm": 6.800971125614902, "learning_rate": 6.66685446501253e-06, "loss": 17.7474, "step": 22434 }, { "epoch": 0.41009377227777066, "grad_norm": 5.855007406131806, "learning_rate": 6.666575383668259e-06, "loss": 17.2631, "step": 22435 }, { "epoch": 0.4101120514742172, "grad_norm": 7.458053534102518, "learning_rate": 6.666296296482787e-06, "loss": 17.7256, "step": 22436 }, { "epoch": 0.41013033067066373, "grad_norm": 6.04020474663933, "learning_rate": 6.66601720345709e-06, "loss": 17.214, "step": 22437 }, { "epoch": 0.4101486098671102, "grad_norm": 7.315070036328204, "learning_rate": 6.665738104592149e-06, "loss": 17.8686, "step": 22438 }, { "epoch": 0.41016688906355675, "grad_norm": 6.991834659225314, "learning_rate": 6.66545899988894e-06, "loss": 17.6729, "step": 22439 }, { "epoch": 0.4101851682600033, "grad_norm": 6.1356000894297935, "learning_rate": 6.665179889348438e-06, "loss": 17.457, "step": 22440 }, { "epoch": 0.4102034474564498, "grad_norm": 7.530821124138062, "learning_rate": 6.66490077297163e-06, "loss": 18.1345, "step": 22441 }, { "epoch": 0.41022172665289636, "grad_norm": 7.91404525806891, "learning_rate": 6.664621650759487e-06, "loss": 18.0275, "step": 22442 }, { "epoch": 0.41024000584934284, "grad_norm": 6.957079326907108, "learning_rate": 6.66434252271299e-06, "loss": 17.6925, "step": 22443 }, { "epoch": 0.4102582850457894, "grad_norm": 6.195271870605417, "learning_rate": 6.664063388833116e-06, "loss": 17.381, "step": 22444 }, { "epoch": 0.4102765642422359, "grad_norm": 6.692512343516527, "learning_rate": 6.663784249120846e-06, "loss": 17.422, "step": 22445 }, { "epoch": 0.41029484343868244, "grad_norm": 4.662301471960729, "learning_rate": 6.663505103577155e-06, "loss": 16.8443, "step": 22446 }, { "epoch": 0.410313122635129, "grad_norm": 6.161177716814195, "learning_rate": 6.663225952203023e-06, "loss": 17.5143, "step": 22447 }, { "epoch": 0.41033140183157546, "grad_norm": 7.114737601813586, "learning_rate": 6.66294679499943e-06, "loss": 17.5671, "step": 22448 }, { "epoch": 0.410349681028022, "grad_norm": 6.490208520600908, "learning_rate": 6.662667631967351e-06, "loss": 17.5853, "step": 22449 }, { "epoch": 0.41036796022446853, "grad_norm": 6.788196061136019, "learning_rate": 6.6623884631077664e-06, "loss": 17.6763, "step": 22450 }, { "epoch": 0.41038623942091507, "grad_norm": 7.399119123919406, "learning_rate": 6.6621092884216555e-06, "loss": 18.0581, "step": 22451 }, { "epoch": 0.4104045186173616, "grad_norm": 5.616390064467137, "learning_rate": 6.661830107909996e-06, "loss": 17.32, "step": 22452 }, { "epoch": 0.4104227978138081, "grad_norm": 8.262323097739829, "learning_rate": 6.661550921573764e-06, "loss": 18.1139, "step": 22453 }, { "epoch": 0.4104410770102546, "grad_norm": 6.9899293270499285, "learning_rate": 6.661271729413942e-06, "loss": 17.7583, "step": 22454 }, { "epoch": 0.41045935620670115, "grad_norm": 7.651297782205858, "learning_rate": 6.660992531431507e-06, "loss": 18.151, "step": 22455 }, { "epoch": 0.4104776354031477, "grad_norm": 6.046370442855944, "learning_rate": 6.660713327627437e-06, "loss": 17.24, "step": 22456 }, { "epoch": 0.4104959145995942, "grad_norm": 6.8742298685763, "learning_rate": 6.66043411800271e-06, "loss": 17.7414, "step": 22457 }, { "epoch": 0.4105141937960407, "grad_norm": 8.311854084271925, "learning_rate": 6.660154902558304e-06, "loss": 18.2518, "step": 22458 }, { "epoch": 0.41053247299248724, "grad_norm": 7.01890204473356, "learning_rate": 6.6598756812952026e-06, "loss": 17.697, "step": 22459 }, { "epoch": 0.4105507521889338, "grad_norm": 6.646090362366263, "learning_rate": 6.65959645421438e-06, "loss": 17.7471, "step": 22460 }, { "epoch": 0.4105690313853803, "grad_norm": 5.416482398601893, "learning_rate": 6.659317221316815e-06, "loss": 17.3344, "step": 22461 }, { "epoch": 0.41058731058182685, "grad_norm": 5.019278069723802, "learning_rate": 6.659037982603488e-06, "loss": 16.9551, "step": 22462 }, { "epoch": 0.4106055897782733, "grad_norm": 6.676420183258173, "learning_rate": 6.658758738075376e-06, "loss": 17.7794, "step": 22463 }, { "epoch": 0.41062386897471986, "grad_norm": 6.048997414497438, "learning_rate": 6.658479487733459e-06, "loss": 17.308, "step": 22464 }, { "epoch": 0.4106421481711664, "grad_norm": 7.98950980278614, "learning_rate": 6.6582002315787155e-06, "loss": 17.9655, "step": 22465 }, { "epoch": 0.41066042736761293, "grad_norm": 5.991122565262289, "learning_rate": 6.657920969612124e-06, "loss": 17.2028, "step": 22466 }, { "epoch": 0.41067870656405947, "grad_norm": 7.535391185851954, "learning_rate": 6.657641701834663e-06, "loss": 18.0067, "step": 22467 }, { "epoch": 0.41069698576050595, "grad_norm": 7.395234063393955, "learning_rate": 6.657362428247311e-06, "loss": 17.7825, "step": 22468 }, { "epoch": 0.4107152649569525, "grad_norm": 6.148243444602376, "learning_rate": 6.65708314885105e-06, "loss": 17.6456, "step": 22469 }, { "epoch": 0.410733544153399, "grad_norm": 6.940659798109537, "learning_rate": 6.656803863646855e-06, "loss": 17.3144, "step": 22470 }, { "epoch": 0.41075182334984556, "grad_norm": 5.984508152192915, "learning_rate": 6.656524572635705e-06, "loss": 17.3747, "step": 22471 }, { "epoch": 0.41077010254629204, "grad_norm": 8.984030284838962, "learning_rate": 6.65624527581858e-06, "loss": 18.2754, "step": 22472 }, { "epoch": 0.4107883817427386, "grad_norm": 6.874324901779238, "learning_rate": 6.655965973196461e-06, "loss": 17.8326, "step": 22473 }, { "epoch": 0.4108066609391851, "grad_norm": 6.649152355038819, "learning_rate": 6.655686664770324e-06, "loss": 17.6961, "step": 22474 }, { "epoch": 0.41082494013563164, "grad_norm": 8.136475477998427, "learning_rate": 6.6554073505411495e-06, "loss": 17.9943, "step": 22475 }, { "epoch": 0.4108432193320782, "grad_norm": 6.43012584151685, "learning_rate": 6.655128030509915e-06, "loss": 17.5833, "step": 22476 }, { "epoch": 0.41086149852852466, "grad_norm": 7.342883675959253, "learning_rate": 6.654848704677601e-06, "loss": 17.7023, "step": 22477 }, { "epoch": 0.4108797777249712, "grad_norm": 8.305732709692458, "learning_rate": 6.654569373045185e-06, "loss": 18.3501, "step": 22478 }, { "epoch": 0.41089805692141773, "grad_norm": 5.804968513432319, "learning_rate": 6.654290035613649e-06, "loss": 17.3626, "step": 22479 }, { "epoch": 0.41091633611786427, "grad_norm": 6.065199581638211, "learning_rate": 6.654010692383967e-06, "loss": 17.1871, "step": 22480 }, { "epoch": 0.4109346153143108, "grad_norm": 5.798473532906206, "learning_rate": 6.653731343357123e-06, "loss": 17.3126, "step": 22481 }, { "epoch": 0.4109528945107573, "grad_norm": 6.415907022619494, "learning_rate": 6.653451988534094e-06, "loss": 17.5288, "step": 22482 }, { "epoch": 0.4109711737072038, "grad_norm": 7.033875247971466, "learning_rate": 6.6531726279158595e-06, "loss": 17.7119, "step": 22483 }, { "epoch": 0.41098945290365035, "grad_norm": 8.652061811686448, "learning_rate": 6.652893261503398e-06, "loss": 18.2471, "step": 22484 }, { "epoch": 0.4110077321000969, "grad_norm": 6.00480941005815, "learning_rate": 6.6526138892976875e-06, "loss": 17.4825, "step": 22485 }, { "epoch": 0.4110260112965434, "grad_norm": 6.899236062989019, "learning_rate": 6.652334511299712e-06, "loss": 17.4574, "step": 22486 }, { "epoch": 0.4110442904929899, "grad_norm": 6.592482875960514, "learning_rate": 6.652055127510445e-06, "loss": 17.6955, "step": 22487 }, { "epoch": 0.41106256968943644, "grad_norm": 5.799762215987938, "learning_rate": 6.651775737930869e-06, "loss": 17.2804, "step": 22488 }, { "epoch": 0.411080848885883, "grad_norm": 6.8970588756919256, "learning_rate": 6.651496342561962e-06, "loss": 17.3843, "step": 22489 }, { "epoch": 0.4110991280823295, "grad_norm": 6.318700315418928, "learning_rate": 6.651216941404703e-06, "loss": 17.7247, "step": 22490 }, { "epoch": 0.41111740727877605, "grad_norm": 6.513451357168261, "learning_rate": 6.650937534460074e-06, "loss": 17.484, "step": 22491 }, { "epoch": 0.41113568647522253, "grad_norm": 6.453364609413719, "learning_rate": 6.65065812172905e-06, "loss": 17.474, "step": 22492 }, { "epoch": 0.41115396567166906, "grad_norm": 5.901676580688798, "learning_rate": 6.650378703212614e-06, "loss": 17.3427, "step": 22493 }, { "epoch": 0.4111722448681156, "grad_norm": 6.035202696120195, "learning_rate": 6.650099278911742e-06, "loss": 17.5816, "step": 22494 }, { "epoch": 0.41119052406456214, "grad_norm": 6.328909776131016, "learning_rate": 6.649819848827417e-06, "loss": 17.407, "step": 22495 }, { "epoch": 0.41120880326100867, "grad_norm": 5.54156689266959, "learning_rate": 6.649540412960616e-06, "loss": 17.1902, "step": 22496 }, { "epoch": 0.41122708245745515, "grad_norm": 6.867040320968979, "learning_rate": 6.649260971312319e-06, "loss": 17.8289, "step": 22497 }, { "epoch": 0.4112453616539017, "grad_norm": 5.781974445936039, "learning_rate": 6.648981523883506e-06, "loss": 17.2794, "step": 22498 }, { "epoch": 0.4112636408503482, "grad_norm": 6.866986400203878, "learning_rate": 6.6487020706751535e-06, "loss": 17.5703, "step": 22499 }, { "epoch": 0.41128192004679476, "grad_norm": 6.25314561124634, "learning_rate": 6.648422611688247e-06, "loss": 17.3703, "step": 22500 }, { "epoch": 0.4113001992432413, "grad_norm": 5.527027959992802, "learning_rate": 6.64814314692376e-06, "loss": 17.0699, "step": 22501 }, { "epoch": 0.4113184784396878, "grad_norm": 9.991418748524842, "learning_rate": 6.6478636763826745e-06, "loss": 17.6793, "step": 22502 }, { "epoch": 0.4113367576361343, "grad_norm": 6.23830929250894, "learning_rate": 6.647584200065971e-06, "loss": 17.2719, "step": 22503 }, { "epoch": 0.41135503683258084, "grad_norm": 7.572286135394841, "learning_rate": 6.647304717974626e-06, "loss": 18.0421, "step": 22504 }, { "epoch": 0.4113733160290274, "grad_norm": 5.261262791016685, "learning_rate": 6.647025230109622e-06, "loss": 17.0682, "step": 22505 }, { "epoch": 0.41139159522547386, "grad_norm": 6.678915311011433, "learning_rate": 6.646745736471936e-06, "loss": 17.4591, "step": 22506 }, { "epoch": 0.4114098744219204, "grad_norm": 5.753913399849667, "learning_rate": 6.646466237062551e-06, "loss": 17.1814, "step": 22507 }, { "epoch": 0.41142815361836693, "grad_norm": 9.14217943399806, "learning_rate": 6.646186731882444e-06, "loss": 18.6355, "step": 22508 }, { "epoch": 0.41144643281481347, "grad_norm": 6.604336182941888, "learning_rate": 6.645907220932595e-06, "loss": 17.7191, "step": 22509 }, { "epoch": 0.41146471201126, "grad_norm": 5.813419040298178, "learning_rate": 6.645627704213985e-06, "loss": 17.2996, "step": 22510 }, { "epoch": 0.4114829912077065, "grad_norm": 6.552131724035272, "learning_rate": 6.645348181727594e-06, "loss": 17.4184, "step": 22511 }, { "epoch": 0.411501270404153, "grad_norm": 6.598285296345405, "learning_rate": 6.645068653474396e-06, "loss": 17.5236, "step": 22512 }, { "epoch": 0.41151954960059955, "grad_norm": 5.676660695960994, "learning_rate": 6.644789119455377e-06, "loss": 16.8865, "step": 22513 }, { "epoch": 0.4115378287970461, "grad_norm": 5.061233180440918, "learning_rate": 6.644509579671517e-06, "loss": 16.8625, "step": 22514 }, { "epoch": 0.4115561079934926, "grad_norm": 6.514822594679821, "learning_rate": 6.644230034123792e-06, "loss": 17.4267, "step": 22515 }, { "epoch": 0.4115743871899391, "grad_norm": 4.932665565234066, "learning_rate": 6.643950482813184e-06, "loss": 17.12, "step": 22516 }, { "epoch": 0.41159266638638564, "grad_norm": 6.643103736082339, "learning_rate": 6.643670925740672e-06, "loss": 17.9247, "step": 22517 }, { "epoch": 0.4116109455828322, "grad_norm": 6.605073545749215, "learning_rate": 6.643391362907235e-06, "loss": 17.5981, "step": 22518 }, { "epoch": 0.4116292247792787, "grad_norm": 7.392426403764373, "learning_rate": 6.643111794313855e-06, "loss": 17.9218, "step": 22519 }, { "epoch": 0.41164750397572525, "grad_norm": 6.84728674931774, "learning_rate": 6.6428322199615106e-06, "loss": 17.656, "step": 22520 }, { "epoch": 0.41166578317217173, "grad_norm": 5.647548514640339, "learning_rate": 6.64255263985118e-06, "loss": 17.2701, "step": 22521 }, { "epoch": 0.41168406236861826, "grad_norm": 5.552168435205575, "learning_rate": 6.642273053983848e-06, "loss": 17.0794, "step": 22522 }, { "epoch": 0.4117023415650648, "grad_norm": 6.78696796718912, "learning_rate": 6.64199346236049e-06, "loss": 17.6392, "step": 22523 }, { "epoch": 0.41172062076151134, "grad_norm": 7.092229779419543, "learning_rate": 6.641713864982088e-06, "loss": 17.7869, "step": 22524 }, { "epoch": 0.41173889995795787, "grad_norm": 5.618666480432428, "learning_rate": 6.641434261849621e-06, "loss": 17.4101, "step": 22525 }, { "epoch": 0.41175717915440435, "grad_norm": 6.476638868176701, "learning_rate": 6.641154652964068e-06, "loss": 17.6316, "step": 22526 }, { "epoch": 0.4117754583508509, "grad_norm": 7.067195271851774, "learning_rate": 6.640875038326411e-06, "loss": 18.0322, "step": 22527 }, { "epoch": 0.4117937375472974, "grad_norm": 6.82609681223036, "learning_rate": 6.640595417937631e-06, "loss": 17.7255, "step": 22528 }, { "epoch": 0.41181201674374396, "grad_norm": 7.708298233999794, "learning_rate": 6.640315791798705e-06, "loss": 17.8175, "step": 22529 }, { "epoch": 0.4118302959401905, "grad_norm": 5.429983974169377, "learning_rate": 6.640036159910614e-06, "loss": 17.2022, "step": 22530 }, { "epoch": 0.411848575136637, "grad_norm": 6.477489049801571, "learning_rate": 6.639756522274341e-06, "loss": 17.4828, "step": 22531 }, { "epoch": 0.4118668543330835, "grad_norm": 7.084888439645354, "learning_rate": 6.639476878890862e-06, "loss": 17.7816, "step": 22532 }, { "epoch": 0.41188513352953005, "grad_norm": 6.6279879840863645, "learning_rate": 6.639197229761158e-06, "loss": 17.7472, "step": 22533 }, { "epoch": 0.4119034127259766, "grad_norm": 6.007976655677513, "learning_rate": 6.638917574886211e-06, "loss": 17.5309, "step": 22534 }, { "epoch": 0.4119216919224231, "grad_norm": 6.343015233689347, "learning_rate": 6.6386379142669996e-06, "loss": 17.784, "step": 22535 }, { "epoch": 0.4119399711188696, "grad_norm": 6.820796339191304, "learning_rate": 6.638358247904505e-06, "loss": 17.9703, "step": 22536 }, { "epoch": 0.41195825031531613, "grad_norm": 5.85890992128528, "learning_rate": 6.638078575799707e-06, "loss": 17.3764, "step": 22537 }, { "epoch": 0.41197652951176267, "grad_norm": 7.074606037217186, "learning_rate": 6.637798897953585e-06, "loss": 17.8454, "step": 22538 }, { "epoch": 0.4119948087082092, "grad_norm": 5.784132235908096, "learning_rate": 6.637519214367121e-06, "loss": 17.1954, "step": 22539 }, { "epoch": 0.4120130879046557, "grad_norm": 6.025932167870897, "learning_rate": 6.637239525041293e-06, "loss": 17.3466, "step": 22540 }, { "epoch": 0.4120313671011022, "grad_norm": 6.169035862579544, "learning_rate": 6.636959829977083e-06, "loss": 17.4327, "step": 22541 }, { "epoch": 0.41204964629754876, "grad_norm": 6.650060216978216, "learning_rate": 6.636680129175472e-06, "loss": 17.3783, "step": 22542 }, { "epoch": 0.4120679254939953, "grad_norm": 5.154164921642721, "learning_rate": 6.636400422637439e-06, "loss": 16.9665, "step": 22543 }, { "epoch": 0.4120862046904418, "grad_norm": 7.4621241910548815, "learning_rate": 6.636120710363964e-06, "loss": 18.1499, "step": 22544 }, { "epoch": 0.4121044838868883, "grad_norm": 7.432794839233043, "learning_rate": 6.635840992356026e-06, "loss": 17.9092, "step": 22545 }, { "epoch": 0.41212276308333484, "grad_norm": 7.170814548989489, "learning_rate": 6.63556126861461e-06, "loss": 17.5366, "step": 22546 }, { "epoch": 0.4121410422797814, "grad_norm": 5.518909409711679, "learning_rate": 6.635281539140692e-06, "loss": 17.0978, "step": 22547 }, { "epoch": 0.4121593214762279, "grad_norm": 6.302062730524471, "learning_rate": 6.635001803935255e-06, "loss": 17.6605, "step": 22548 }, { "epoch": 0.41217760067267445, "grad_norm": 7.471207997707816, "learning_rate": 6.634722062999278e-06, "loss": 17.9399, "step": 22549 }, { "epoch": 0.41219587986912093, "grad_norm": 5.324470003932659, "learning_rate": 6.634442316333742e-06, "loss": 17.2086, "step": 22550 }, { "epoch": 0.41221415906556746, "grad_norm": 5.9771071974674905, "learning_rate": 6.634162563939628e-06, "loss": 17.3712, "step": 22551 }, { "epoch": 0.412232438262014, "grad_norm": 6.7774724499917225, "learning_rate": 6.633882805817917e-06, "loss": 17.7359, "step": 22552 }, { "epoch": 0.41225071745846054, "grad_norm": 7.551931887991059, "learning_rate": 6.6336030419695866e-06, "loss": 17.8439, "step": 22553 }, { "epoch": 0.41226899665490707, "grad_norm": 5.753257938800097, "learning_rate": 6.63332327239562e-06, "loss": 17.4086, "step": 22554 }, { "epoch": 0.41228727585135355, "grad_norm": 5.866038870768499, "learning_rate": 6.633043497096998e-06, "loss": 17.457, "step": 22555 }, { "epoch": 0.4123055550478001, "grad_norm": 6.0536502638895, "learning_rate": 6.632763716074699e-06, "loss": 17.304, "step": 22556 }, { "epoch": 0.4123238342442466, "grad_norm": 7.563518144366697, "learning_rate": 6.632483929329705e-06, "loss": 17.8347, "step": 22557 }, { "epoch": 0.41234211344069316, "grad_norm": 8.167575832029081, "learning_rate": 6.6322041368629965e-06, "loss": 18.2212, "step": 22558 }, { "epoch": 0.4123603926371397, "grad_norm": 6.886400060823418, "learning_rate": 6.631924338675555e-06, "loss": 18.0066, "step": 22559 }, { "epoch": 0.4123786718335862, "grad_norm": 5.423171002706399, "learning_rate": 6.63164453476836e-06, "loss": 17.2012, "step": 22560 }, { "epoch": 0.4123969510300327, "grad_norm": 6.1445295548696315, "learning_rate": 6.631364725142392e-06, "loss": 17.5894, "step": 22561 }, { "epoch": 0.41241523022647925, "grad_norm": 7.212889865114428, "learning_rate": 6.631084909798632e-06, "loss": 17.8984, "step": 22562 }, { "epoch": 0.4124335094229258, "grad_norm": 9.16653149833562, "learning_rate": 6.630805088738061e-06, "loss": 17.6354, "step": 22563 }, { "epoch": 0.4124517886193723, "grad_norm": 6.325290511361483, "learning_rate": 6.63052526196166e-06, "loss": 17.5876, "step": 22564 }, { "epoch": 0.4124700678158188, "grad_norm": 5.763074934803377, "learning_rate": 6.63024542947041e-06, "loss": 17.3363, "step": 22565 }, { "epoch": 0.41248834701226533, "grad_norm": 5.958746031648419, "learning_rate": 6.629965591265292e-06, "loss": 17.223, "step": 22566 }, { "epoch": 0.41250662620871187, "grad_norm": 6.515658349181106, "learning_rate": 6.629685747347283e-06, "loss": 17.6717, "step": 22567 }, { "epoch": 0.4125249054051584, "grad_norm": 4.946023024333121, "learning_rate": 6.629405897717368e-06, "loss": 16.9446, "step": 22568 }, { "epoch": 0.41254318460160494, "grad_norm": 5.283187874761963, "learning_rate": 6.629126042376528e-06, "loss": 17.1899, "step": 22569 }, { "epoch": 0.4125614637980514, "grad_norm": 7.248605819013886, "learning_rate": 6.628846181325742e-06, "loss": 17.7481, "step": 22570 }, { "epoch": 0.41257974299449796, "grad_norm": 7.405447399579446, "learning_rate": 6.628566314565992e-06, "loss": 17.8948, "step": 22571 }, { "epoch": 0.4125980221909445, "grad_norm": 6.6967074034598175, "learning_rate": 6.628286442098256e-06, "loss": 17.7341, "step": 22572 }, { "epoch": 0.412616301387391, "grad_norm": 5.734497313651601, "learning_rate": 6.62800656392352e-06, "loss": 17.2537, "step": 22573 }, { "epoch": 0.4126345805838375, "grad_norm": 7.758596269655583, "learning_rate": 6.627726680042762e-06, "loss": 17.9411, "step": 22574 }, { "epoch": 0.41265285978028404, "grad_norm": 5.201702593488365, "learning_rate": 6.6274467904569615e-06, "loss": 16.9402, "step": 22575 }, { "epoch": 0.4126711389767306, "grad_norm": 5.9675413553645065, "learning_rate": 6.627166895167103e-06, "loss": 17.3452, "step": 22576 }, { "epoch": 0.4126894181731771, "grad_norm": 7.291423129238422, "learning_rate": 6.626886994174165e-06, "loss": 17.779, "step": 22577 }, { "epoch": 0.41270769736962365, "grad_norm": 5.968892468616462, "learning_rate": 6.626607087479129e-06, "loss": 17.5132, "step": 22578 }, { "epoch": 0.41272597656607013, "grad_norm": 7.170439939026732, "learning_rate": 6.6263271750829775e-06, "loss": 17.7694, "step": 22579 }, { "epoch": 0.41274425576251667, "grad_norm": 7.318670095737872, "learning_rate": 6.626047256986688e-06, "loss": 17.9435, "step": 22580 }, { "epoch": 0.4127625349589632, "grad_norm": 6.036756291847952, "learning_rate": 6.625767333191247e-06, "loss": 17.398, "step": 22581 }, { "epoch": 0.41278081415540974, "grad_norm": 6.053125609980945, "learning_rate": 6.62548740369763e-06, "loss": 17.3572, "step": 22582 }, { "epoch": 0.41279909335185627, "grad_norm": 6.199717892438728, "learning_rate": 6.625207468506822e-06, "loss": 17.5492, "step": 22583 }, { "epoch": 0.41281737254830275, "grad_norm": 6.698844190195537, "learning_rate": 6.624927527619803e-06, "loss": 17.5677, "step": 22584 }, { "epoch": 0.4128356517447493, "grad_norm": 5.988876906425944, "learning_rate": 6.624647581037553e-06, "loss": 17.344, "step": 22585 }, { "epoch": 0.4128539309411958, "grad_norm": 6.045429597912229, "learning_rate": 6.624367628761056e-06, "loss": 17.4643, "step": 22586 }, { "epoch": 0.41287221013764236, "grad_norm": 6.42182730756578, "learning_rate": 6.624087670791291e-06, "loss": 17.6234, "step": 22587 }, { "epoch": 0.4128904893340889, "grad_norm": 6.986066200367678, "learning_rate": 6.623807707129237e-06, "loss": 17.4393, "step": 22588 }, { "epoch": 0.4129087685305354, "grad_norm": 5.657003922026771, "learning_rate": 6.623527737775881e-06, "loss": 17.2285, "step": 22589 }, { "epoch": 0.4129270477269819, "grad_norm": 6.25354650988634, "learning_rate": 6.623247762732199e-06, "loss": 17.6109, "step": 22590 }, { "epoch": 0.41294532692342845, "grad_norm": 6.1299618904758315, "learning_rate": 6.622967781999175e-06, "loss": 17.2798, "step": 22591 }, { "epoch": 0.412963606119875, "grad_norm": 6.8638793367227775, "learning_rate": 6.622687795577792e-06, "loss": 17.8593, "step": 22592 }, { "epoch": 0.4129818853163215, "grad_norm": 6.796283468696494, "learning_rate": 6.622407803469027e-06, "loss": 17.9025, "step": 22593 }, { "epoch": 0.413000164512768, "grad_norm": 7.761046709983375, "learning_rate": 6.622127805673863e-06, "loss": 17.8826, "step": 22594 }, { "epoch": 0.41301844370921453, "grad_norm": 5.004190494024626, "learning_rate": 6.621847802193282e-06, "loss": 17.0119, "step": 22595 }, { "epoch": 0.41303672290566107, "grad_norm": 7.200378132739967, "learning_rate": 6.621567793028265e-06, "loss": 17.9817, "step": 22596 }, { "epoch": 0.4130550021021076, "grad_norm": 5.330772260455991, "learning_rate": 6.621287778179795e-06, "loss": 17.1643, "step": 22597 }, { "epoch": 0.41307328129855414, "grad_norm": 6.535466779561626, "learning_rate": 6.621007757648852e-06, "loss": 17.729, "step": 22598 }, { "epoch": 0.4130915604950006, "grad_norm": 5.811170757742771, "learning_rate": 6.620727731436416e-06, "loss": 17.0422, "step": 22599 }, { "epoch": 0.41310983969144716, "grad_norm": 6.824860401716977, "learning_rate": 6.62044769954347e-06, "loss": 17.7706, "step": 22600 }, { "epoch": 0.4131281188878937, "grad_norm": 5.996010384567158, "learning_rate": 6.620167661970998e-06, "loss": 17.5312, "step": 22601 }, { "epoch": 0.4131463980843402, "grad_norm": 7.0419128847561, "learning_rate": 6.619887618719977e-06, "loss": 17.9146, "step": 22602 }, { "epoch": 0.41316467728078676, "grad_norm": 6.673918239797463, "learning_rate": 6.61960756979139e-06, "loss": 17.6691, "step": 22603 }, { "epoch": 0.41318295647723324, "grad_norm": 5.993621475464369, "learning_rate": 6.619327515186219e-06, "loss": 17.352, "step": 22604 }, { "epoch": 0.4132012356736798, "grad_norm": 6.092149993554768, "learning_rate": 6.619047454905446e-06, "loss": 17.5204, "step": 22605 }, { "epoch": 0.4132195148701263, "grad_norm": 6.303515551780367, "learning_rate": 6.618767388950052e-06, "loss": 17.4342, "step": 22606 }, { "epoch": 0.41323779406657285, "grad_norm": 7.357594247325871, "learning_rate": 6.6184873173210194e-06, "loss": 17.9901, "step": 22607 }, { "epoch": 0.41325607326301933, "grad_norm": 8.321893973483117, "learning_rate": 6.61820724001933e-06, "loss": 18.4359, "step": 22608 }, { "epoch": 0.41327435245946587, "grad_norm": 5.0807070169529585, "learning_rate": 6.6179271570459625e-06, "loss": 17.0118, "step": 22609 }, { "epoch": 0.4132926316559124, "grad_norm": 7.203675890097184, "learning_rate": 6.617647068401902e-06, "loss": 18.179, "step": 22610 }, { "epoch": 0.41331091085235894, "grad_norm": 5.735684038570406, "learning_rate": 6.61736697408813e-06, "loss": 17.46, "step": 22611 }, { "epoch": 0.4133291900488055, "grad_norm": 6.064406790187848, "learning_rate": 6.6170868741056235e-06, "loss": 17.3494, "step": 22612 }, { "epoch": 0.41334746924525195, "grad_norm": 6.664329183395076, "learning_rate": 6.616806768455371e-06, "loss": 17.6179, "step": 22613 }, { "epoch": 0.4133657484416985, "grad_norm": 6.3572287918374055, "learning_rate": 6.61652665713835e-06, "loss": 17.4921, "step": 22614 }, { "epoch": 0.413384027638145, "grad_norm": 6.907970743755561, "learning_rate": 6.616246540155544e-06, "loss": 17.6579, "step": 22615 }, { "epoch": 0.41340230683459156, "grad_norm": 6.736745296231206, "learning_rate": 6.615966417507933e-06, "loss": 17.8245, "step": 22616 }, { "epoch": 0.4134205860310381, "grad_norm": 6.102058842223797, "learning_rate": 6.615686289196501e-06, "loss": 17.5284, "step": 22617 }, { "epoch": 0.4134388652274846, "grad_norm": 6.820874290130761, "learning_rate": 6.615406155222228e-06, "loss": 17.5447, "step": 22618 }, { "epoch": 0.4134571444239311, "grad_norm": 6.451908386244689, "learning_rate": 6.615126015586097e-06, "loss": 17.7669, "step": 22619 }, { "epoch": 0.41347542362037765, "grad_norm": 8.368993765242212, "learning_rate": 6.614845870289089e-06, "loss": 18.1534, "step": 22620 }, { "epoch": 0.4134937028168242, "grad_norm": 6.6094965163326735, "learning_rate": 6.614565719332187e-06, "loss": 17.5565, "step": 22621 }, { "epoch": 0.4135119820132707, "grad_norm": 8.729927958239205, "learning_rate": 6.614285562716372e-06, "loss": 18.5272, "step": 22622 }, { "epoch": 0.4135302612097172, "grad_norm": 7.685832120660212, "learning_rate": 6.614005400442625e-06, "loss": 17.6718, "step": 22623 }, { "epoch": 0.41354854040616373, "grad_norm": 5.114941706725837, "learning_rate": 6.613725232511931e-06, "loss": 17.1031, "step": 22624 }, { "epoch": 0.41356681960261027, "grad_norm": 7.073382172132256, "learning_rate": 6.613445058925271e-06, "loss": 17.626, "step": 22625 }, { "epoch": 0.4135850987990568, "grad_norm": 6.521550957369006, "learning_rate": 6.613164879683622e-06, "loss": 17.5834, "step": 22626 }, { "epoch": 0.41360337799550334, "grad_norm": 5.709561474608981, "learning_rate": 6.612884694787973e-06, "loss": 17.4551, "step": 22627 }, { "epoch": 0.4136216571919498, "grad_norm": 6.014032259728497, "learning_rate": 6.612604504239304e-06, "loss": 17.3407, "step": 22628 }, { "epoch": 0.41363993638839636, "grad_norm": 6.536297970602517, "learning_rate": 6.612324308038595e-06, "loss": 17.4338, "step": 22629 }, { "epoch": 0.4136582155848429, "grad_norm": 6.439188938365965, "learning_rate": 6.612044106186829e-06, "loss": 17.4706, "step": 22630 }, { "epoch": 0.4136764947812894, "grad_norm": 6.860891132068323, "learning_rate": 6.611763898684989e-06, "loss": 17.84, "step": 22631 }, { "epoch": 0.41369477397773596, "grad_norm": 6.780814567751688, "learning_rate": 6.611483685534054e-06, "loss": 17.4697, "step": 22632 }, { "epoch": 0.41371305317418244, "grad_norm": 7.741936908099513, "learning_rate": 6.61120346673501e-06, "loss": 18.2421, "step": 22633 }, { "epoch": 0.413731332370629, "grad_norm": 8.460448600785817, "learning_rate": 6.610923242288838e-06, "loss": 18.2521, "step": 22634 }, { "epoch": 0.4137496115670755, "grad_norm": 9.804278378568787, "learning_rate": 6.6106430121965206e-06, "loss": 18.8881, "step": 22635 }, { "epoch": 0.41376789076352205, "grad_norm": 6.469584170545846, "learning_rate": 6.610362776459038e-06, "loss": 17.5042, "step": 22636 }, { "epoch": 0.4137861699599686, "grad_norm": 6.491743775339879, "learning_rate": 6.610082535077373e-06, "loss": 17.6091, "step": 22637 }, { "epoch": 0.41380444915641507, "grad_norm": 6.723904337174401, "learning_rate": 6.6098022880525114e-06, "loss": 17.8345, "step": 22638 }, { "epoch": 0.4138227283528616, "grad_norm": 6.348440816127864, "learning_rate": 6.609522035385429e-06, "loss": 17.4658, "step": 22639 }, { "epoch": 0.41384100754930814, "grad_norm": 6.6049482488077915, "learning_rate": 6.6092417770771135e-06, "loss": 17.7139, "step": 22640 }, { "epoch": 0.4138592867457547, "grad_norm": 7.664834529279627, "learning_rate": 6.608961513128544e-06, "loss": 18.1336, "step": 22641 }, { "epoch": 0.41387756594220115, "grad_norm": 8.67237503839013, "learning_rate": 6.608681243540706e-06, "loss": 18.5005, "step": 22642 }, { "epoch": 0.4138958451386477, "grad_norm": 6.90125030437421, "learning_rate": 6.608400968314578e-06, "loss": 17.5234, "step": 22643 }, { "epoch": 0.4139141243350942, "grad_norm": 6.710162254357177, "learning_rate": 6.608120687451144e-06, "loss": 17.7885, "step": 22644 }, { "epoch": 0.41393240353154076, "grad_norm": 7.94811464687793, "learning_rate": 6.607840400951387e-06, "loss": 18.3258, "step": 22645 }, { "epoch": 0.4139506827279873, "grad_norm": 6.864843794923079, "learning_rate": 6.60756010881629e-06, "loss": 17.9661, "step": 22646 }, { "epoch": 0.4139689619244338, "grad_norm": 5.348178509871798, "learning_rate": 6.607279811046834e-06, "loss": 17.0711, "step": 22647 }, { "epoch": 0.4139872411208803, "grad_norm": 5.843528393840664, "learning_rate": 6.6069995076440004e-06, "loss": 17.3442, "step": 22648 }, { "epoch": 0.41400552031732685, "grad_norm": 7.0493975084425164, "learning_rate": 6.606719198608775e-06, "loss": 17.751, "step": 22649 }, { "epoch": 0.4140237995137734, "grad_norm": 6.850352787803978, "learning_rate": 6.606438883942136e-06, "loss": 17.8465, "step": 22650 }, { "epoch": 0.4140420787102199, "grad_norm": 6.794192267071566, "learning_rate": 6.606158563645069e-06, "loss": 18.0543, "step": 22651 }, { "epoch": 0.4140603579066664, "grad_norm": 5.6260671342242246, "learning_rate": 6.605878237718557e-06, "loss": 17.2493, "step": 22652 }, { "epoch": 0.41407863710311293, "grad_norm": 5.133847299703579, "learning_rate": 6.605597906163579e-06, "loss": 17.0291, "step": 22653 }, { "epoch": 0.41409691629955947, "grad_norm": 6.321677314254794, "learning_rate": 6.60531756898112e-06, "loss": 17.601, "step": 22654 }, { "epoch": 0.414115195496006, "grad_norm": 7.713653231749315, "learning_rate": 6.605037226172164e-06, "loss": 18.0653, "step": 22655 }, { "epoch": 0.41413347469245254, "grad_norm": 7.827450559057984, "learning_rate": 6.60475687773769e-06, "loss": 18.1411, "step": 22656 }, { "epoch": 0.414151753888899, "grad_norm": 7.096833323457897, "learning_rate": 6.604476523678682e-06, "loss": 17.9774, "step": 22657 }, { "epoch": 0.41417003308534556, "grad_norm": 8.250085366544415, "learning_rate": 6.604196163996124e-06, "loss": 18.4161, "step": 22658 }, { "epoch": 0.4141883122817921, "grad_norm": 6.781771733149326, "learning_rate": 6.603915798690999e-06, "loss": 17.7022, "step": 22659 }, { "epoch": 0.41420659147823863, "grad_norm": 6.757840342237743, "learning_rate": 6.603635427764286e-06, "loss": 17.8375, "step": 22660 }, { "epoch": 0.41422487067468516, "grad_norm": 6.5419938792567, "learning_rate": 6.603355051216971e-06, "loss": 17.7905, "step": 22661 }, { "epoch": 0.41424314987113164, "grad_norm": 5.537596873236283, "learning_rate": 6.603074669050036e-06, "loss": 17.2593, "step": 22662 }, { "epoch": 0.4142614290675782, "grad_norm": 7.3491561295609324, "learning_rate": 6.602794281264462e-06, "loss": 17.7703, "step": 22663 }, { "epoch": 0.4142797082640247, "grad_norm": 6.3830916393239745, "learning_rate": 6.602513887861235e-06, "loss": 17.7261, "step": 22664 }, { "epoch": 0.41429798746047125, "grad_norm": 4.750540918325534, "learning_rate": 6.6022334888413345e-06, "loss": 16.8368, "step": 22665 }, { "epoch": 0.4143162666569178, "grad_norm": 6.487918736243572, "learning_rate": 6.601953084205745e-06, "loss": 17.3947, "step": 22666 }, { "epoch": 0.41433454585336427, "grad_norm": 8.488969122297817, "learning_rate": 6.601672673955449e-06, "loss": 17.4989, "step": 22667 }, { "epoch": 0.4143528250498108, "grad_norm": 5.959678194243318, "learning_rate": 6.601392258091429e-06, "loss": 17.2292, "step": 22668 }, { "epoch": 0.41437110424625734, "grad_norm": 6.752733173416814, "learning_rate": 6.60111183661467e-06, "loss": 17.5716, "step": 22669 }, { "epoch": 0.4143893834427039, "grad_norm": 7.461818859216035, "learning_rate": 6.600831409526152e-06, "loss": 18.3179, "step": 22670 }, { "epoch": 0.4144076626391504, "grad_norm": 6.52799862030602, "learning_rate": 6.6005509768268575e-06, "loss": 17.829, "step": 22671 }, { "epoch": 0.4144259418355969, "grad_norm": 8.700369849679246, "learning_rate": 6.60027053851777e-06, "loss": 18.6796, "step": 22672 }, { "epoch": 0.4144442210320434, "grad_norm": 6.722078435566233, "learning_rate": 6.599990094599875e-06, "loss": 17.6187, "step": 22673 }, { "epoch": 0.41446250022848996, "grad_norm": 8.761588404165707, "learning_rate": 6.599709645074154e-06, "loss": 18.3351, "step": 22674 }, { "epoch": 0.4144807794249365, "grad_norm": 5.366673880496607, "learning_rate": 6.599429189941589e-06, "loss": 16.8164, "step": 22675 }, { "epoch": 0.414499058621383, "grad_norm": 5.554265745940292, "learning_rate": 6.599148729203162e-06, "loss": 17.0912, "step": 22676 }, { "epoch": 0.4145173378178295, "grad_norm": 8.969641571116686, "learning_rate": 6.598868262859859e-06, "loss": 18.5551, "step": 22677 }, { "epoch": 0.41453561701427605, "grad_norm": 5.718663842826723, "learning_rate": 6.598587790912661e-06, "loss": 17.3056, "step": 22678 }, { "epoch": 0.4145538962107226, "grad_norm": 5.6854851969374405, "learning_rate": 6.598307313362552e-06, "loss": 17.4263, "step": 22679 }, { "epoch": 0.4145721754071691, "grad_norm": 7.126189282650898, "learning_rate": 6.598026830210513e-06, "loss": 17.6173, "step": 22680 }, { "epoch": 0.4145904546036156, "grad_norm": 6.867647480876624, "learning_rate": 6.597746341457531e-06, "loss": 17.8448, "step": 22681 }, { "epoch": 0.41460873380006213, "grad_norm": 7.245180556532616, "learning_rate": 6.597465847104585e-06, "loss": 17.9264, "step": 22682 }, { "epoch": 0.41462701299650867, "grad_norm": 5.698173578017259, "learning_rate": 6.597185347152661e-06, "loss": 17.2355, "step": 22683 }, { "epoch": 0.4146452921929552, "grad_norm": 7.678229885503245, "learning_rate": 6.596904841602741e-06, "loss": 18.1914, "step": 22684 }, { "epoch": 0.41466357138940174, "grad_norm": 6.946669557539084, "learning_rate": 6.596624330455805e-06, "loss": 17.7522, "step": 22685 }, { "epoch": 0.4146818505858482, "grad_norm": 8.112722763877004, "learning_rate": 6.596343813712843e-06, "loss": 18.1864, "step": 22686 }, { "epoch": 0.41470012978229476, "grad_norm": 7.077645673920443, "learning_rate": 6.5960632913748334e-06, "loss": 17.761, "step": 22687 }, { "epoch": 0.4147184089787413, "grad_norm": 5.948136834264524, "learning_rate": 6.595782763442759e-06, "loss": 17.4198, "step": 22688 }, { "epoch": 0.41473668817518783, "grad_norm": 5.479383944276968, "learning_rate": 6.595502229917608e-06, "loss": 17.3015, "step": 22689 }, { "epoch": 0.41475496737163436, "grad_norm": 5.630273377241858, "learning_rate": 6.595221690800356e-06, "loss": 17.2719, "step": 22690 }, { "epoch": 0.41477324656808084, "grad_norm": 7.345218275703665, "learning_rate": 6.594941146091993e-06, "loss": 18.4604, "step": 22691 }, { "epoch": 0.4147915257645274, "grad_norm": 5.511659026344551, "learning_rate": 6.594660595793498e-06, "loss": 17.2525, "step": 22692 }, { "epoch": 0.4148098049609739, "grad_norm": 5.954239280317081, "learning_rate": 6.5943800399058586e-06, "loss": 17.4721, "step": 22693 }, { "epoch": 0.41482808415742045, "grad_norm": 8.52838192229829, "learning_rate": 6.594099478430052e-06, "loss": 18.3876, "step": 22694 }, { "epoch": 0.414846363353867, "grad_norm": 5.610287645105502, "learning_rate": 6.593818911367067e-06, "loss": 17.1563, "step": 22695 }, { "epoch": 0.41486464255031347, "grad_norm": 5.801369980537789, "learning_rate": 6.593538338717885e-06, "loss": 17.0791, "step": 22696 }, { "epoch": 0.41488292174676, "grad_norm": 5.399986477203266, "learning_rate": 6.59325776048349e-06, "loss": 17.1426, "step": 22697 }, { "epoch": 0.41490120094320654, "grad_norm": 5.841316509387635, "learning_rate": 6.5929771766648646e-06, "loss": 17.4869, "step": 22698 }, { "epoch": 0.4149194801396531, "grad_norm": 7.118676761819252, "learning_rate": 6.59269658726299e-06, "loss": 18.0046, "step": 22699 }, { "epoch": 0.4149377593360996, "grad_norm": 6.532402113897623, "learning_rate": 6.592415992278855e-06, "loss": 17.5097, "step": 22700 }, { "epoch": 0.4149560385325461, "grad_norm": 5.9490599045365, "learning_rate": 6.59213539171344e-06, "loss": 17.435, "step": 22701 }, { "epoch": 0.4149743177289926, "grad_norm": 6.041935462363598, "learning_rate": 6.591854785567727e-06, "loss": 17.2972, "step": 22702 }, { "epoch": 0.41499259692543916, "grad_norm": 7.19247230647167, "learning_rate": 6.591574173842702e-06, "loss": 17.942, "step": 22703 }, { "epoch": 0.4150108761218857, "grad_norm": 6.734515529317652, "learning_rate": 6.591293556539348e-06, "loss": 17.7786, "step": 22704 }, { "epoch": 0.41502915531833223, "grad_norm": 6.395805860776399, "learning_rate": 6.591012933658647e-06, "loss": 17.2242, "step": 22705 }, { "epoch": 0.4150474345147787, "grad_norm": 7.439591270775899, "learning_rate": 6.5907323052015846e-06, "loss": 18.0942, "step": 22706 }, { "epoch": 0.41506571371122525, "grad_norm": 7.104558951426564, "learning_rate": 6.590451671169143e-06, "loss": 18.0514, "step": 22707 }, { "epoch": 0.4150839929076718, "grad_norm": 6.619138140788436, "learning_rate": 6.590171031562307e-06, "loss": 17.6459, "step": 22708 }, { "epoch": 0.4151022721041183, "grad_norm": 7.68805210518015, "learning_rate": 6.589890386382058e-06, "loss": 18.1722, "step": 22709 }, { "epoch": 0.4151205513005648, "grad_norm": 6.3781541007056, "learning_rate": 6.589609735629383e-06, "loss": 17.4187, "step": 22710 }, { "epoch": 0.41513883049701134, "grad_norm": 6.485791575507058, "learning_rate": 6.589329079305265e-06, "loss": 17.3851, "step": 22711 }, { "epoch": 0.41515710969345787, "grad_norm": 7.265329608831831, "learning_rate": 6.589048417410683e-06, "loss": 18.2836, "step": 22712 }, { "epoch": 0.4151753888899044, "grad_norm": 6.106993705205944, "learning_rate": 6.5887677499466255e-06, "loss": 17.276, "step": 22713 }, { "epoch": 0.41519366808635094, "grad_norm": 8.007759577422881, "learning_rate": 6.588487076914076e-06, "loss": 17.994, "step": 22714 }, { "epoch": 0.4152119472827974, "grad_norm": 6.874602017566855, "learning_rate": 6.588206398314017e-06, "loss": 17.7814, "step": 22715 }, { "epoch": 0.41523022647924396, "grad_norm": 7.192935282340736, "learning_rate": 6.58792571414743e-06, "loss": 17.6344, "step": 22716 }, { "epoch": 0.4152485056756905, "grad_norm": 7.48865060419624, "learning_rate": 6.587645024415304e-06, "loss": 17.8313, "step": 22717 }, { "epoch": 0.41526678487213703, "grad_norm": 6.263113228871017, "learning_rate": 6.587364329118619e-06, "loss": 17.3444, "step": 22718 }, { "epoch": 0.41528506406858356, "grad_norm": 7.208517383989723, "learning_rate": 6.587083628258358e-06, "loss": 17.7908, "step": 22719 }, { "epoch": 0.41530334326503004, "grad_norm": 8.204107080778781, "learning_rate": 6.586802921835509e-06, "loss": 18.3743, "step": 22720 }, { "epoch": 0.4153216224614766, "grad_norm": 6.759058520351196, "learning_rate": 6.586522209851053e-06, "loss": 17.6838, "step": 22721 }, { "epoch": 0.4153399016579231, "grad_norm": 5.367233973973414, "learning_rate": 6.586241492305974e-06, "loss": 17.1537, "step": 22722 }, { "epoch": 0.41535818085436965, "grad_norm": 5.641765597733447, "learning_rate": 6.585960769201256e-06, "loss": 17.2739, "step": 22723 }, { "epoch": 0.4153764600508162, "grad_norm": 5.603372240086101, "learning_rate": 6.585680040537884e-06, "loss": 17.1906, "step": 22724 }, { "epoch": 0.41539473924726267, "grad_norm": 6.509496739115115, "learning_rate": 6.58539930631684e-06, "loss": 17.7058, "step": 22725 }, { "epoch": 0.4154130184437092, "grad_norm": 7.650496191049718, "learning_rate": 6.585118566539108e-06, "loss": 17.7613, "step": 22726 }, { "epoch": 0.41543129764015574, "grad_norm": 6.774446900572114, "learning_rate": 6.584837821205675e-06, "loss": 18.0853, "step": 22727 }, { "epoch": 0.4154495768366023, "grad_norm": 8.229439411922518, "learning_rate": 6.584557070317523e-06, "loss": 18.2243, "step": 22728 }, { "epoch": 0.4154678560330488, "grad_norm": 5.692487977891022, "learning_rate": 6.584276313875635e-06, "loss": 17.2809, "step": 22729 }, { "epoch": 0.4154861352294953, "grad_norm": 7.541651090569733, "learning_rate": 6.583995551880996e-06, "loss": 17.8468, "step": 22730 }, { "epoch": 0.4155044144259418, "grad_norm": 7.593822362366229, "learning_rate": 6.58371478433459e-06, "loss": 17.8409, "step": 22731 }, { "epoch": 0.41552269362238836, "grad_norm": 6.341984989300643, "learning_rate": 6.5834340112374015e-06, "loss": 17.7052, "step": 22732 }, { "epoch": 0.4155409728188349, "grad_norm": 6.75870241976922, "learning_rate": 6.583153232590415e-06, "loss": 17.8381, "step": 22733 }, { "epoch": 0.41555925201528143, "grad_norm": 6.017906222199273, "learning_rate": 6.5828724483946124e-06, "loss": 17.5057, "step": 22734 }, { "epoch": 0.4155775312117279, "grad_norm": 6.575012576757038, "learning_rate": 6.58259165865098e-06, "loss": 17.4874, "step": 22735 }, { "epoch": 0.41559581040817445, "grad_norm": 6.8646238780668005, "learning_rate": 6.582310863360501e-06, "loss": 17.6847, "step": 22736 }, { "epoch": 0.415614089604621, "grad_norm": 6.004205502592466, "learning_rate": 6.58203006252416e-06, "loss": 17.261, "step": 22737 }, { "epoch": 0.4156323688010675, "grad_norm": 6.6996546292894354, "learning_rate": 6.581749256142941e-06, "loss": 17.5514, "step": 22738 }, { "epoch": 0.41565064799751406, "grad_norm": 5.002377555622047, "learning_rate": 6.581468444217827e-06, "loss": 16.9799, "step": 22739 }, { "epoch": 0.41566892719396054, "grad_norm": 5.442555686252805, "learning_rate": 6.581187626749803e-06, "loss": 17.1699, "step": 22740 }, { "epoch": 0.41568720639040707, "grad_norm": 5.511346881169742, "learning_rate": 6.580906803739855e-06, "loss": 17.1863, "step": 22741 }, { "epoch": 0.4157054855868536, "grad_norm": 5.8617931785889565, "learning_rate": 6.580625975188966e-06, "loss": 17.1286, "step": 22742 }, { "epoch": 0.41572376478330014, "grad_norm": 6.571820235000502, "learning_rate": 6.58034514109812e-06, "loss": 17.4238, "step": 22743 }, { "epoch": 0.4157420439797466, "grad_norm": 7.297712380885935, "learning_rate": 6.5800643014683e-06, "loss": 17.8347, "step": 22744 }, { "epoch": 0.41576032317619316, "grad_norm": 7.361179552002683, "learning_rate": 6.579783456300494e-06, "loss": 17.7443, "step": 22745 }, { "epoch": 0.4157786023726397, "grad_norm": 5.652602800604153, "learning_rate": 6.579502605595682e-06, "loss": 17.3332, "step": 22746 }, { "epoch": 0.41579688156908623, "grad_norm": 6.973782360010165, "learning_rate": 6.579221749354851e-06, "loss": 17.2906, "step": 22747 }, { "epoch": 0.41581516076553277, "grad_norm": 5.04874986374777, "learning_rate": 6.578940887578985e-06, "loss": 16.7602, "step": 22748 }, { "epoch": 0.41583343996197925, "grad_norm": 6.617958056138184, "learning_rate": 6.578660020269069e-06, "loss": 17.5798, "step": 22749 }, { "epoch": 0.4158517191584258, "grad_norm": 5.884115906716348, "learning_rate": 6.578379147426085e-06, "loss": 17.076, "step": 22750 }, { "epoch": 0.4158699983548723, "grad_norm": 5.746617761475577, "learning_rate": 6.5780982690510195e-06, "loss": 17.3955, "step": 22751 }, { "epoch": 0.41588827755131885, "grad_norm": 5.915119519887098, "learning_rate": 6.577817385144858e-06, "loss": 17.3731, "step": 22752 }, { "epoch": 0.4159065567477654, "grad_norm": 5.217123786820856, "learning_rate": 6.577536495708582e-06, "loss": 16.9362, "step": 22753 }, { "epoch": 0.41592483594421187, "grad_norm": 7.274446557520727, "learning_rate": 6.577255600743178e-06, "loss": 17.6814, "step": 22754 }, { "epoch": 0.4159431151406584, "grad_norm": 7.1672588174974035, "learning_rate": 6.57697470024963e-06, "loss": 17.6601, "step": 22755 }, { "epoch": 0.41596139433710494, "grad_norm": 5.915564077256306, "learning_rate": 6.5766937942289236e-06, "loss": 17.3083, "step": 22756 }, { "epoch": 0.4159796735335515, "grad_norm": 6.479994222430836, "learning_rate": 6.5764128826820404e-06, "loss": 17.5307, "step": 22757 }, { "epoch": 0.415997952729998, "grad_norm": 7.006907757023098, "learning_rate": 6.5761319656099665e-06, "loss": 17.5083, "step": 22758 }, { "epoch": 0.4160162319264445, "grad_norm": 6.909314029790856, "learning_rate": 6.575851043013688e-06, "loss": 17.5981, "step": 22759 }, { "epoch": 0.416034511122891, "grad_norm": 8.133590289608975, "learning_rate": 6.575570114894189e-06, "loss": 18.3469, "step": 22760 }, { "epoch": 0.41605279031933756, "grad_norm": 7.532132414079868, "learning_rate": 6.575289181252452e-06, "loss": 17.8717, "step": 22761 }, { "epoch": 0.4160710695157841, "grad_norm": 8.058429272216127, "learning_rate": 6.575008242089463e-06, "loss": 17.4815, "step": 22762 }, { "epoch": 0.41608934871223063, "grad_norm": 6.538101077384175, "learning_rate": 6.574727297406208e-06, "loss": 17.3078, "step": 22763 }, { "epoch": 0.4161076279086771, "grad_norm": 7.684220028388074, "learning_rate": 6.5744463472036705e-06, "loss": 17.8021, "step": 22764 }, { "epoch": 0.41612590710512365, "grad_norm": 6.244223985643519, "learning_rate": 6.574165391482834e-06, "loss": 17.3872, "step": 22765 }, { "epoch": 0.4161441863015702, "grad_norm": 6.987439625975591, "learning_rate": 6.573884430244686e-06, "loss": 17.4608, "step": 22766 }, { "epoch": 0.4161624654980167, "grad_norm": 6.891764479283446, "learning_rate": 6.573603463490208e-06, "loss": 17.8429, "step": 22767 }, { "epoch": 0.41618074469446326, "grad_norm": 7.666691658856379, "learning_rate": 6.573322491220387e-06, "loss": 17.587, "step": 22768 }, { "epoch": 0.41619902389090974, "grad_norm": 5.806290877069214, "learning_rate": 6.573041513436208e-06, "loss": 17.4055, "step": 22769 }, { "epoch": 0.41621730308735627, "grad_norm": 5.16293829467992, "learning_rate": 6.572760530138654e-06, "loss": 17.1096, "step": 22770 }, { "epoch": 0.4162355822838028, "grad_norm": 7.348799653268203, "learning_rate": 6.572479541328711e-06, "loss": 17.7816, "step": 22771 }, { "epoch": 0.41625386148024934, "grad_norm": 6.755542268569418, "learning_rate": 6.5721985470073635e-06, "loss": 17.3061, "step": 22772 }, { "epoch": 0.4162721406766959, "grad_norm": 7.217781189562275, "learning_rate": 6.571917547175598e-06, "loss": 18.0527, "step": 22773 }, { "epoch": 0.41629041987314236, "grad_norm": 6.833506095546124, "learning_rate": 6.571636541834396e-06, "loss": 17.7029, "step": 22774 }, { "epoch": 0.4163086990695889, "grad_norm": 5.849803002947453, "learning_rate": 6.571355530984746e-06, "loss": 17.3694, "step": 22775 }, { "epoch": 0.41632697826603543, "grad_norm": 5.765323378988717, "learning_rate": 6.571074514627629e-06, "loss": 17.3174, "step": 22776 }, { "epoch": 0.41634525746248197, "grad_norm": 6.690528133298416, "learning_rate": 6.570793492764033e-06, "loss": 17.779, "step": 22777 }, { "epoch": 0.41636353665892845, "grad_norm": 5.990251398528859, "learning_rate": 6.570512465394943e-06, "loss": 17.3365, "step": 22778 }, { "epoch": 0.416381815855375, "grad_norm": 7.25207053808171, "learning_rate": 6.570231432521344e-06, "loss": 17.7794, "step": 22779 }, { "epoch": 0.4164000950518215, "grad_norm": 5.545128358764208, "learning_rate": 6.5699503941442176e-06, "loss": 17.218, "step": 22780 }, { "epoch": 0.41641837424826805, "grad_norm": 7.852032475184119, "learning_rate": 6.569669350264553e-06, "loss": 17.7779, "step": 22781 }, { "epoch": 0.4164366534447146, "grad_norm": 7.142927567560632, "learning_rate": 6.569388300883332e-06, "loss": 17.912, "step": 22782 }, { "epoch": 0.41645493264116107, "grad_norm": 8.292464851180304, "learning_rate": 6.569107246001542e-06, "loss": 18.0269, "step": 22783 }, { "epoch": 0.4164732118376076, "grad_norm": 7.474122331540772, "learning_rate": 6.568826185620169e-06, "loss": 17.2395, "step": 22784 }, { "epoch": 0.41649149103405414, "grad_norm": 7.301032832584391, "learning_rate": 6.568545119740193e-06, "loss": 17.9761, "step": 22785 }, { "epoch": 0.4165097702305007, "grad_norm": 6.912074852409992, "learning_rate": 6.568264048362605e-06, "loss": 17.6828, "step": 22786 }, { "epoch": 0.4165280494269472, "grad_norm": 5.650365942879607, "learning_rate": 6.567982971488387e-06, "loss": 17.1882, "step": 22787 }, { "epoch": 0.4165463286233937, "grad_norm": 6.1543212301739105, "learning_rate": 6.5677018891185255e-06, "loss": 17.501, "step": 22788 }, { "epoch": 0.4165646078198402, "grad_norm": 5.830334058663455, "learning_rate": 6.567420801254003e-06, "loss": 17.3876, "step": 22789 }, { "epoch": 0.41658288701628676, "grad_norm": 6.832920466203908, "learning_rate": 6.567139707895808e-06, "loss": 17.5439, "step": 22790 }, { "epoch": 0.4166011662127333, "grad_norm": 7.110551234129028, "learning_rate": 6.566858609044924e-06, "loss": 17.7816, "step": 22791 }, { "epoch": 0.41661944540917983, "grad_norm": 7.744163628442694, "learning_rate": 6.5665775047023365e-06, "loss": 18.228, "step": 22792 }, { "epoch": 0.4166377246056263, "grad_norm": 8.667665539047851, "learning_rate": 6.566296394869032e-06, "loss": 18.431, "step": 22793 }, { "epoch": 0.41665600380207285, "grad_norm": 6.7923573073427415, "learning_rate": 6.566015279545991e-06, "loss": 17.6889, "step": 22794 }, { "epoch": 0.4166742829985194, "grad_norm": 6.236571850196767, "learning_rate": 6.565734158734205e-06, "loss": 17.5213, "step": 22795 }, { "epoch": 0.4166925621949659, "grad_norm": 6.12475517112729, "learning_rate": 6.565453032434657e-06, "loss": 17.6715, "step": 22796 }, { "epoch": 0.41671084139141246, "grad_norm": 5.582975984442605, "learning_rate": 6.5651719006483304e-06, "loss": 17.1853, "step": 22797 }, { "epoch": 0.41672912058785894, "grad_norm": 7.714019679154534, "learning_rate": 6.564890763376212e-06, "loss": 18.1422, "step": 22798 }, { "epoch": 0.4167473997843055, "grad_norm": 6.2442493922056475, "learning_rate": 6.564609620619289e-06, "loss": 17.3394, "step": 22799 }, { "epoch": 0.416765678980752, "grad_norm": 6.482635908781921, "learning_rate": 6.564328472378545e-06, "loss": 17.7275, "step": 22800 }, { "epoch": 0.41678395817719854, "grad_norm": 5.819707570598144, "learning_rate": 6.564047318654965e-06, "loss": 17.1874, "step": 22801 }, { "epoch": 0.4168022373736451, "grad_norm": 7.5473504574752734, "learning_rate": 6.563766159449534e-06, "loss": 18.2899, "step": 22802 }, { "epoch": 0.41682051657009156, "grad_norm": 9.299562735722224, "learning_rate": 6.563484994763238e-06, "loss": 18.1879, "step": 22803 }, { "epoch": 0.4168387957665381, "grad_norm": 6.709706486727593, "learning_rate": 6.563203824597064e-06, "loss": 17.5875, "step": 22804 }, { "epoch": 0.41685707496298463, "grad_norm": 6.495214829325099, "learning_rate": 6.562922648951997e-06, "loss": 17.5701, "step": 22805 }, { "epoch": 0.41687535415943117, "grad_norm": 6.4480004868956415, "learning_rate": 6.562641467829021e-06, "loss": 17.1504, "step": 22806 }, { "epoch": 0.4168936333558777, "grad_norm": 6.590807356068329, "learning_rate": 6.562360281229121e-06, "loss": 17.6114, "step": 22807 }, { "epoch": 0.4169119125523242, "grad_norm": 5.205104114154562, "learning_rate": 6.562079089153285e-06, "loss": 16.862, "step": 22808 }, { "epoch": 0.4169301917487707, "grad_norm": 7.197480642816911, "learning_rate": 6.561797891602496e-06, "loss": 18.1306, "step": 22809 }, { "epoch": 0.41694847094521725, "grad_norm": 8.322559946414186, "learning_rate": 6.561516688577743e-06, "loss": 18.0837, "step": 22810 }, { "epoch": 0.4169667501416638, "grad_norm": 6.383365600979941, "learning_rate": 6.561235480080008e-06, "loss": 17.2967, "step": 22811 }, { "epoch": 0.41698502933811027, "grad_norm": 5.639384376893032, "learning_rate": 6.560954266110278e-06, "loss": 17.1241, "step": 22812 }, { "epoch": 0.4170033085345568, "grad_norm": 7.4142106095428435, "learning_rate": 6.560673046669539e-06, "loss": 17.7709, "step": 22813 }, { "epoch": 0.41702158773100334, "grad_norm": 5.281954945023397, "learning_rate": 6.560391821758778e-06, "loss": 16.9594, "step": 22814 }, { "epoch": 0.4170398669274499, "grad_norm": 8.812066905483043, "learning_rate": 6.560110591378978e-06, "loss": 18.5953, "step": 22815 }, { "epoch": 0.4170581461238964, "grad_norm": 6.876947558740086, "learning_rate": 6.559829355531125e-06, "loss": 18.0454, "step": 22816 }, { "epoch": 0.4170764253203429, "grad_norm": 7.917584726064076, "learning_rate": 6.5595481142162055e-06, "loss": 17.8833, "step": 22817 }, { "epoch": 0.4170947045167894, "grad_norm": 5.738162967416068, "learning_rate": 6.559266867435207e-06, "loss": 17.3356, "step": 22818 }, { "epoch": 0.41711298371323596, "grad_norm": 6.922606770731424, "learning_rate": 6.558985615189112e-06, "loss": 17.8734, "step": 22819 }, { "epoch": 0.4171312629096825, "grad_norm": 8.63541636951145, "learning_rate": 6.5587043574789065e-06, "loss": 18.5934, "step": 22820 }, { "epoch": 0.41714954210612903, "grad_norm": 7.578627984517951, "learning_rate": 6.55842309430558e-06, "loss": 17.95, "step": 22821 }, { "epoch": 0.4171678213025755, "grad_norm": 7.858886689256134, "learning_rate": 6.558141825670114e-06, "loss": 18.0093, "step": 22822 }, { "epoch": 0.41718610049902205, "grad_norm": 7.001485042822444, "learning_rate": 6.5578605515734964e-06, "loss": 17.706, "step": 22823 }, { "epoch": 0.4172043796954686, "grad_norm": 6.834582829360226, "learning_rate": 6.557579272016714e-06, "loss": 17.5111, "step": 22824 }, { "epoch": 0.4172226588919151, "grad_norm": 8.267023838670658, "learning_rate": 6.55729798700075e-06, "loss": 18.0063, "step": 22825 }, { "epoch": 0.41724093808836166, "grad_norm": 6.703518452516018, "learning_rate": 6.557016696526592e-06, "loss": 18.0998, "step": 22826 }, { "epoch": 0.41725921728480814, "grad_norm": 8.303254247812571, "learning_rate": 6.556735400595225e-06, "loss": 18.0793, "step": 22827 }, { "epoch": 0.4172774964812547, "grad_norm": 6.049871087523901, "learning_rate": 6.556454099207638e-06, "loss": 17.2825, "step": 22828 }, { "epoch": 0.4172957756777012, "grad_norm": 7.0593863672495205, "learning_rate": 6.5561727923648124e-06, "loss": 17.8252, "step": 22829 }, { "epoch": 0.41731405487414774, "grad_norm": 7.749978588804397, "learning_rate": 6.555891480067736e-06, "loss": 18.0291, "step": 22830 }, { "epoch": 0.4173323340705943, "grad_norm": 6.377548209145335, "learning_rate": 6.5556101623173966e-06, "loss": 17.8959, "step": 22831 }, { "epoch": 0.41735061326704076, "grad_norm": 7.416872017395103, "learning_rate": 6.555328839114776e-06, "loss": 18.2488, "step": 22832 }, { "epoch": 0.4173688924634873, "grad_norm": 5.627472921197626, "learning_rate": 6.555047510460866e-06, "loss": 17.2994, "step": 22833 }, { "epoch": 0.41738717165993383, "grad_norm": 7.142209636209356, "learning_rate": 6.554766176356646e-06, "loss": 17.8598, "step": 22834 }, { "epoch": 0.41740545085638037, "grad_norm": 6.156923175015818, "learning_rate": 6.554484836803108e-06, "loss": 17.411, "step": 22835 }, { "epoch": 0.4174237300528269, "grad_norm": 7.262796757388549, "learning_rate": 6.554203491801235e-06, "loss": 17.9443, "step": 22836 }, { "epoch": 0.4174420092492734, "grad_norm": 6.31230909615951, "learning_rate": 6.553922141352012e-06, "loss": 17.5397, "step": 22837 }, { "epoch": 0.4174602884457199, "grad_norm": 5.799852278081229, "learning_rate": 6.55364078545643e-06, "loss": 17.0805, "step": 22838 }, { "epoch": 0.41747856764216645, "grad_norm": 6.628159449081681, "learning_rate": 6.553359424115468e-06, "loss": 17.6692, "step": 22839 }, { "epoch": 0.417496846838613, "grad_norm": 6.820717034896563, "learning_rate": 6.553078057330118e-06, "loss": 17.6101, "step": 22840 }, { "epoch": 0.4175151260350595, "grad_norm": 6.089442215659412, "learning_rate": 6.552796685101364e-06, "loss": 17.4498, "step": 22841 }, { "epoch": 0.417533405231506, "grad_norm": 6.499586675908634, "learning_rate": 6.552515307430194e-06, "loss": 17.801, "step": 22842 }, { "epoch": 0.41755168442795254, "grad_norm": 5.901833702082617, "learning_rate": 6.55223392431759e-06, "loss": 17.4328, "step": 22843 }, { "epoch": 0.4175699636243991, "grad_norm": 8.705715413606889, "learning_rate": 6.551952535764541e-06, "loss": 18.5282, "step": 22844 }, { "epoch": 0.4175882428208456, "grad_norm": 6.320404062671469, "learning_rate": 6.5516711417720355e-06, "loss": 17.5892, "step": 22845 }, { "epoch": 0.4176065220172921, "grad_norm": 7.178860213931443, "learning_rate": 6.551389742341055e-06, "loss": 17.9891, "step": 22846 }, { "epoch": 0.41762480121373863, "grad_norm": 7.656582368173924, "learning_rate": 6.551108337472589e-06, "loss": 18.0129, "step": 22847 }, { "epoch": 0.41764308041018516, "grad_norm": 6.109532819932893, "learning_rate": 6.550826927167623e-06, "loss": 17.4247, "step": 22848 }, { "epoch": 0.4176613596066317, "grad_norm": 6.092443474453851, "learning_rate": 6.5505455114271424e-06, "loss": 17.3379, "step": 22849 }, { "epoch": 0.41767963880307823, "grad_norm": 6.133743134607131, "learning_rate": 6.550264090252134e-06, "loss": 17.7763, "step": 22850 }, { "epoch": 0.4176979179995247, "grad_norm": 8.763945002107015, "learning_rate": 6.549982663643586e-06, "loss": 18.1602, "step": 22851 }, { "epoch": 0.41771619719597125, "grad_norm": 6.210400200575394, "learning_rate": 6.549701231602484e-06, "loss": 17.2319, "step": 22852 }, { "epoch": 0.4177344763924178, "grad_norm": 6.661533010185124, "learning_rate": 6.54941979412981e-06, "loss": 17.7869, "step": 22853 }, { "epoch": 0.4177527555888643, "grad_norm": 6.898743035864414, "learning_rate": 6.549138351226556e-06, "loss": 17.7566, "step": 22854 }, { "epoch": 0.41777103478531086, "grad_norm": 6.548996978112739, "learning_rate": 6.548856902893708e-06, "loss": 17.5209, "step": 22855 }, { "epoch": 0.41778931398175734, "grad_norm": 7.238878738760886, "learning_rate": 6.5485754491322494e-06, "loss": 17.9588, "step": 22856 }, { "epoch": 0.4178075931782039, "grad_norm": 6.436757042608861, "learning_rate": 6.548293989943168e-06, "loss": 17.5837, "step": 22857 }, { "epoch": 0.4178258723746504, "grad_norm": 5.661168070102067, "learning_rate": 6.5480125253274505e-06, "loss": 17.3673, "step": 22858 }, { "epoch": 0.41784415157109694, "grad_norm": 6.605295512255456, "learning_rate": 6.547731055286085e-06, "loss": 17.3872, "step": 22859 }, { "epoch": 0.4178624307675435, "grad_norm": 7.909887497474025, "learning_rate": 6.5474495798200555e-06, "loss": 18.1409, "step": 22860 }, { "epoch": 0.41788070996398996, "grad_norm": 6.512755013831273, "learning_rate": 6.5471680989303495e-06, "loss": 17.5697, "step": 22861 }, { "epoch": 0.4178989891604365, "grad_norm": 5.761497015408156, "learning_rate": 6.546886612617953e-06, "loss": 17.1833, "step": 22862 }, { "epoch": 0.41791726835688303, "grad_norm": 5.362426299524855, "learning_rate": 6.546605120883854e-06, "loss": 17.2209, "step": 22863 }, { "epoch": 0.41793554755332957, "grad_norm": 5.837311405647309, "learning_rate": 6.546323623729038e-06, "loss": 17.453, "step": 22864 }, { "epoch": 0.4179538267497761, "grad_norm": 7.939985383396437, "learning_rate": 6.546042121154492e-06, "loss": 17.9857, "step": 22865 }, { "epoch": 0.4179721059462226, "grad_norm": 6.930859774128972, "learning_rate": 6.545760613161202e-06, "loss": 17.5704, "step": 22866 }, { "epoch": 0.4179903851426691, "grad_norm": 6.103396396617472, "learning_rate": 6.545479099750156e-06, "loss": 17.1374, "step": 22867 }, { "epoch": 0.41800866433911565, "grad_norm": 5.99368123074367, "learning_rate": 6.545197580922339e-06, "loss": 17.5591, "step": 22868 }, { "epoch": 0.4180269435355622, "grad_norm": 6.182348943063241, "learning_rate": 6.54491605667874e-06, "loss": 17.5682, "step": 22869 }, { "epoch": 0.4180452227320087, "grad_norm": 6.729086066286519, "learning_rate": 6.544634527020343e-06, "loss": 17.3765, "step": 22870 }, { "epoch": 0.4180635019284552, "grad_norm": 7.225283574756662, "learning_rate": 6.5443529919481355e-06, "loss": 17.8946, "step": 22871 }, { "epoch": 0.41808178112490174, "grad_norm": 7.6669910653718825, "learning_rate": 6.5440714514631056e-06, "loss": 18.1302, "step": 22872 }, { "epoch": 0.4181000603213483, "grad_norm": 5.850875944637185, "learning_rate": 6.54378990556624e-06, "loss": 17.4011, "step": 22873 }, { "epoch": 0.4181183395177948, "grad_norm": 7.086829435337445, "learning_rate": 6.5435083542585235e-06, "loss": 17.5344, "step": 22874 }, { "epoch": 0.41813661871424135, "grad_norm": 7.1525906763665885, "learning_rate": 6.543226797540945e-06, "loss": 18.6293, "step": 22875 }, { "epoch": 0.41815489791068783, "grad_norm": 7.118951815704408, "learning_rate": 6.542945235414489e-06, "loss": 17.6044, "step": 22876 }, { "epoch": 0.41817317710713436, "grad_norm": 6.265518363771976, "learning_rate": 6.542663667880145e-06, "loss": 17.3717, "step": 22877 }, { "epoch": 0.4181914563035809, "grad_norm": 6.87913448403044, "learning_rate": 6.5423820949388995e-06, "loss": 17.4798, "step": 22878 }, { "epoch": 0.41820973550002744, "grad_norm": 5.81084269600853, "learning_rate": 6.542100516591737e-06, "loss": 17.2091, "step": 22879 }, { "epoch": 0.4182280146964739, "grad_norm": 6.3975723906987625, "learning_rate": 6.541818932839646e-06, "loss": 17.7207, "step": 22880 }, { "epoch": 0.41824629389292045, "grad_norm": 6.452800317594072, "learning_rate": 6.541537343683615e-06, "loss": 17.413, "step": 22881 }, { "epoch": 0.418264573089367, "grad_norm": 6.346871107329755, "learning_rate": 6.541255749124629e-06, "loss": 17.3771, "step": 22882 }, { "epoch": 0.4182828522858135, "grad_norm": 6.923827081749899, "learning_rate": 6.5409741491636746e-06, "loss": 17.3506, "step": 22883 }, { "epoch": 0.41830113148226006, "grad_norm": 7.067361672916809, "learning_rate": 6.54069254380174e-06, "loss": 17.9107, "step": 22884 }, { "epoch": 0.41831941067870654, "grad_norm": 6.980080056759994, "learning_rate": 6.54041093303981e-06, "loss": 17.8602, "step": 22885 }, { "epoch": 0.4183376898751531, "grad_norm": 7.087277520644656, "learning_rate": 6.540129316878876e-06, "loss": 17.9097, "step": 22886 }, { "epoch": 0.4183559690715996, "grad_norm": 5.600155285651116, "learning_rate": 6.539847695319922e-06, "loss": 17.1198, "step": 22887 }, { "epoch": 0.41837424826804614, "grad_norm": 6.921820599430229, "learning_rate": 6.539566068363934e-06, "loss": 17.7032, "step": 22888 }, { "epoch": 0.4183925274644927, "grad_norm": 7.571194767346009, "learning_rate": 6.539284436011901e-06, "loss": 17.7819, "step": 22889 }, { "epoch": 0.41841080666093916, "grad_norm": 6.197019217095146, "learning_rate": 6.539002798264811e-06, "loss": 17.2801, "step": 22890 }, { "epoch": 0.4184290858573857, "grad_norm": 6.59662714682823, "learning_rate": 6.5387211551236485e-06, "loss": 17.7627, "step": 22891 }, { "epoch": 0.41844736505383223, "grad_norm": 5.962347021512397, "learning_rate": 6.538439506589401e-06, "loss": 17.2985, "step": 22892 }, { "epoch": 0.41846564425027877, "grad_norm": 6.084208894677302, "learning_rate": 6.538157852663059e-06, "loss": 17.3511, "step": 22893 }, { "epoch": 0.4184839234467253, "grad_norm": 8.792003026134429, "learning_rate": 6.537876193345605e-06, "loss": 18.112, "step": 22894 }, { "epoch": 0.4185022026431718, "grad_norm": 5.730695756311766, "learning_rate": 6.537594528638028e-06, "loss": 17.2505, "step": 22895 }, { "epoch": 0.4185204818396183, "grad_norm": 5.973897008972709, "learning_rate": 6.537312858541317e-06, "loss": 17.3319, "step": 22896 }, { "epoch": 0.41853876103606485, "grad_norm": 5.26638274908141, "learning_rate": 6.537031183056459e-06, "loss": 17.4098, "step": 22897 }, { "epoch": 0.4185570402325114, "grad_norm": 5.63597513759987, "learning_rate": 6.536749502184437e-06, "loss": 17.3671, "step": 22898 }, { "epoch": 0.4185753194289579, "grad_norm": 6.58035156249633, "learning_rate": 6.536467815926243e-06, "loss": 17.7771, "step": 22899 }, { "epoch": 0.4185935986254044, "grad_norm": 7.699676029301902, "learning_rate": 6.5361861242828635e-06, "loss": 18.0079, "step": 22900 }, { "epoch": 0.41861187782185094, "grad_norm": 5.7441541297691785, "learning_rate": 6.535904427255284e-06, "loss": 17.2305, "step": 22901 }, { "epoch": 0.4186301570182975, "grad_norm": 7.929346601580015, "learning_rate": 6.535622724844492e-06, "loss": 17.9838, "step": 22902 }, { "epoch": 0.418648436214744, "grad_norm": 5.295553888626519, "learning_rate": 6.535341017051477e-06, "loss": 17.1963, "step": 22903 }, { "epoch": 0.41866671541119055, "grad_norm": 9.134796239102027, "learning_rate": 6.535059303877224e-06, "loss": 17.7302, "step": 22904 }, { "epoch": 0.41868499460763703, "grad_norm": 7.049232928319126, "learning_rate": 6.534777585322722e-06, "loss": 17.7632, "step": 22905 }, { "epoch": 0.41870327380408356, "grad_norm": 6.862135989837325, "learning_rate": 6.5344958613889575e-06, "loss": 17.8258, "step": 22906 }, { "epoch": 0.4187215530005301, "grad_norm": 7.14700917026873, "learning_rate": 6.534214132076918e-06, "loss": 17.7554, "step": 22907 }, { "epoch": 0.41873983219697664, "grad_norm": 6.623554884593093, "learning_rate": 6.533932397387591e-06, "loss": 17.637, "step": 22908 }, { "epoch": 0.41875811139342317, "grad_norm": 7.292976780208033, "learning_rate": 6.533650657321965e-06, "loss": 18.3185, "step": 22909 }, { "epoch": 0.41877639058986965, "grad_norm": 6.2192238774027775, "learning_rate": 6.5333689118810265e-06, "loss": 17.295, "step": 22910 }, { "epoch": 0.4187946697863162, "grad_norm": 6.987670920154048, "learning_rate": 6.533087161065762e-06, "loss": 17.6673, "step": 22911 }, { "epoch": 0.4188129489827627, "grad_norm": 6.243765544688094, "learning_rate": 6.5328054048771594e-06, "loss": 17.4971, "step": 22912 }, { "epoch": 0.41883122817920926, "grad_norm": 7.625280996094554, "learning_rate": 6.5325236433162084e-06, "loss": 18.003, "step": 22913 }, { "epoch": 0.41884950737565574, "grad_norm": 6.994930904578712, "learning_rate": 6.5322418763838954e-06, "loss": 17.5114, "step": 22914 }, { "epoch": 0.4188677865721023, "grad_norm": 7.214030486165409, "learning_rate": 6.531960104081206e-06, "loss": 17.4588, "step": 22915 }, { "epoch": 0.4188860657685488, "grad_norm": 6.766063991825144, "learning_rate": 6.53167832640913e-06, "loss": 17.5853, "step": 22916 }, { "epoch": 0.41890434496499535, "grad_norm": 5.607349715613173, "learning_rate": 6.531396543368653e-06, "loss": 17.0989, "step": 22917 }, { "epoch": 0.4189226241614419, "grad_norm": 6.568315322497222, "learning_rate": 6.531114754960767e-06, "loss": 17.6328, "step": 22918 }, { "epoch": 0.41894090335788836, "grad_norm": 6.6660130541892455, "learning_rate": 6.5308329611864555e-06, "loss": 17.5281, "step": 22919 }, { "epoch": 0.4189591825543349, "grad_norm": 5.385308447069871, "learning_rate": 6.5305511620467065e-06, "loss": 17.34, "step": 22920 }, { "epoch": 0.41897746175078143, "grad_norm": 5.628795621941321, "learning_rate": 6.530269357542509e-06, "loss": 16.9445, "step": 22921 }, { "epoch": 0.41899574094722797, "grad_norm": 6.669207417037738, "learning_rate": 6.52998754767485e-06, "loss": 17.6181, "step": 22922 }, { "epoch": 0.4190140201436745, "grad_norm": 7.998858964954297, "learning_rate": 6.529705732444716e-06, "loss": 17.9669, "step": 22923 }, { "epoch": 0.419032299340121, "grad_norm": 8.60196555676147, "learning_rate": 6.529423911853099e-06, "loss": 18.0825, "step": 22924 }, { "epoch": 0.4190505785365675, "grad_norm": 6.788464748694883, "learning_rate": 6.529142085900981e-06, "loss": 17.6837, "step": 22925 }, { "epoch": 0.41906885773301406, "grad_norm": 6.576786036431059, "learning_rate": 6.528860254589356e-06, "loss": 17.4223, "step": 22926 }, { "epoch": 0.4190871369294606, "grad_norm": 6.463190432849364, "learning_rate": 6.528578417919206e-06, "loss": 17.5257, "step": 22927 }, { "epoch": 0.4191054161259071, "grad_norm": 6.992534174268173, "learning_rate": 6.528296575891523e-06, "loss": 17.8828, "step": 22928 }, { "epoch": 0.4191236953223536, "grad_norm": 7.223467350509907, "learning_rate": 6.5280147285072915e-06, "loss": 17.7576, "step": 22929 }, { "epoch": 0.41914197451880014, "grad_norm": 6.330170602348665, "learning_rate": 6.527732875767501e-06, "loss": 17.417, "step": 22930 }, { "epoch": 0.4191602537152467, "grad_norm": 6.005615070281294, "learning_rate": 6.527451017673141e-06, "loss": 17.3151, "step": 22931 }, { "epoch": 0.4191785329116932, "grad_norm": 6.5658764577434, "learning_rate": 6.527169154225196e-06, "loss": 17.2052, "step": 22932 }, { "epoch": 0.41919681210813975, "grad_norm": 7.586881698945212, "learning_rate": 6.526887285424657e-06, "loss": 18.1262, "step": 22933 }, { "epoch": 0.41921509130458623, "grad_norm": 8.520250697467581, "learning_rate": 6.526605411272509e-06, "loss": 18.5186, "step": 22934 }, { "epoch": 0.41923337050103276, "grad_norm": 6.0149379766618445, "learning_rate": 6.5263235317697425e-06, "loss": 17.394, "step": 22935 }, { "epoch": 0.4192516496974793, "grad_norm": 7.677557024289154, "learning_rate": 6.526041646917344e-06, "loss": 18.3367, "step": 22936 }, { "epoch": 0.41926992889392584, "grad_norm": 8.267497217504378, "learning_rate": 6.525759756716302e-06, "loss": 18.0889, "step": 22937 }, { "epoch": 0.41928820809037237, "grad_norm": 6.730750142506911, "learning_rate": 6.525477861167606e-06, "loss": 17.9213, "step": 22938 }, { "epoch": 0.41930648728681885, "grad_norm": 5.9702144330438385, "learning_rate": 6.52519596027224e-06, "loss": 17.4348, "step": 22939 }, { "epoch": 0.4193247664832654, "grad_norm": 6.446330953569598, "learning_rate": 6.524914054031195e-06, "loss": 17.8092, "step": 22940 }, { "epoch": 0.4193430456797119, "grad_norm": 6.5551284079364756, "learning_rate": 6.52463214244546e-06, "loss": 17.7906, "step": 22941 }, { "epoch": 0.41936132487615846, "grad_norm": 5.762021855641645, "learning_rate": 6.524350225516022e-06, "loss": 17.2061, "step": 22942 }, { "epoch": 0.419379604072605, "grad_norm": 7.679490353746419, "learning_rate": 6.5240683032438665e-06, "loss": 17.8234, "step": 22943 }, { "epoch": 0.4193978832690515, "grad_norm": 5.921225542284149, "learning_rate": 6.5237863756299845e-06, "loss": 17.2429, "step": 22944 }, { "epoch": 0.419416162465498, "grad_norm": 7.252687262905585, "learning_rate": 6.523504442675366e-06, "loss": 17.5584, "step": 22945 }, { "epoch": 0.41943444166194455, "grad_norm": 5.835753477697853, "learning_rate": 6.523222504380994e-06, "loss": 17.2518, "step": 22946 }, { "epoch": 0.4194527208583911, "grad_norm": 5.739807876177038, "learning_rate": 6.522940560747859e-06, "loss": 17.1743, "step": 22947 }, { "epoch": 0.41947100005483756, "grad_norm": 7.819202206090441, "learning_rate": 6.5226586117769504e-06, "loss": 18.5037, "step": 22948 }, { "epoch": 0.4194892792512841, "grad_norm": 6.0028511634436015, "learning_rate": 6.522376657469256e-06, "loss": 17.5147, "step": 22949 }, { "epoch": 0.41950755844773063, "grad_norm": 5.319944123196091, "learning_rate": 6.522094697825763e-06, "loss": 16.9886, "step": 22950 }, { "epoch": 0.41952583764417717, "grad_norm": 5.881423393874906, "learning_rate": 6.52181273284746e-06, "loss": 17.3217, "step": 22951 }, { "epoch": 0.4195441168406237, "grad_norm": 5.815097478620257, "learning_rate": 6.521530762535336e-06, "loss": 17.3631, "step": 22952 }, { "epoch": 0.4195623960370702, "grad_norm": 5.740822827179228, "learning_rate": 6.521248786890377e-06, "loss": 17.2605, "step": 22953 }, { "epoch": 0.4195806752335167, "grad_norm": 6.358788451252137, "learning_rate": 6.5209668059135755e-06, "loss": 17.606, "step": 22954 }, { "epoch": 0.41959895442996326, "grad_norm": 5.87558205935959, "learning_rate": 6.520684819605917e-06, "loss": 17.2332, "step": 22955 }, { "epoch": 0.4196172336264098, "grad_norm": 6.093464300381178, "learning_rate": 6.520402827968389e-06, "loss": 17.5618, "step": 22956 }, { "epoch": 0.4196355128228563, "grad_norm": 5.730160457463107, "learning_rate": 6.5201208310019815e-06, "loss": 17.323, "step": 22957 }, { "epoch": 0.4196537920193028, "grad_norm": 6.346543923820535, "learning_rate": 6.51983882870768e-06, "loss": 17.4396, "step": 22958 }, { "epoch": 0.41967207121574934, "grad_norm": 7.25686662683186, "learning_rate": 6.519556821086479e-06, "loss": 17.8619, "step": 22959 }, { "epoch": 0.4196903504121959, "grad_norm": 6.531134559914157, "learning_rate": 6.519274808139362e-06, "loss": 17.317, "step": 22960 }, { "epoch": 0.4197086296086424, "grad_norm": 17.641543513914314, "learning_rate": 6.5189927898673174e-06, "loss": 18.4942, "step": 22961 }, { "epoch": 0.41972690880508895, "grad_norm": 7.338485644450107, "learning_rate": 6.518710766271337e-06, "loss": 17.8047, "step": 22962 }, { "epoch": 0.41974518800153543, "grad_norm": 7.69381003070154, "learning_rate": 6.518428737352406e-06, "loss": 18.1808, "step": 22963 }, { "epoch": 0.41976346719798197, "grad_norm": 5.71042703009896, "learning_rate": 6.518146703111513e-06, "loss": 17.1054, "step": 22964 }, { "epoch": 0.4197817463944285, "grad_norm": 5.663902545869665, "learning_rate": 6.517864663549649e-06, "loss": 17.2465, "step": 22965 }, { "epoch": 0.41980002559087504, "grad_norm": 6.541940914774427, "learning_rate": 6.5175826186678e-06, "loss": 17.487, "step": 22966 }, { "epoch": 0.4198183047873216, "grad_norm": 5.950104287109007, "learning_rate": 6.517300568466956e-06, "loss": 17.3955, "step": 22967 }, { "epoch": 0.41983658398376805, "grad_norm": 6.896729789912742, "learning_rate": 6.517018512948106e-06, "loss": 17.9357, "step": 22968 }, { "epoch": 0.4198548631802146, "grad_norm": 5.469552240937213, "learning_rate": 6.516736452112238e-06, "loss": 17.0821, "step": 22969 }, { "epoch": 0.4198731423766611, "grad_norm": 4.997359057882777, "learning_rate": 6.51645438596034e-06, "loss": 16.9295, "step": 22970 }, { "epoch": 0.41989142157310766, "grad_norm": 5.3222105377112765, "learning_rate": 6.516172314493399e-06, "loss": 17.0438, "step": 22971 }, { "epoch": 0.4199097007695542, "grad_norm": 5.246128332099832, "learning_rate": 6.515890237712408e-06, "loss": 16.9717, "step": 22972 }, { "epoch": 0.4199279799660007, "grad_norm": 5.515928106494591, "learning_rate": 6.515608155618353e-06, "loss": 17.2534, "step": 22973 }, { "epoch": 0.4199462591624472, "grad_norm": 8.869645870466277, "learning_rate": 6.515326068212222e-06, "loss": 17.8627, "step": 22974 }, { "epoch": 0.41996453835889375, "grad_norm": 6.34706392084448, "learning_rate": 6.515043975495005e-06, "loss": 17.3602, "step": 22975 }, { "epoch": 0.4199828175553403, "grad_norm": 9.963344186945106, "learning_rate": 6.5147618774676905e-06, "loss": 18.176, "step": 22976 }, { "epoch": 0.4200010967517868, "grad_norm": 5.851374040972541, "learning_rate": 6.514479774131266e-06, "loss": 17.4657, "step": 22977 }, { "epoch": 0.4200193759482333, "grad_norm": 4.702057656452562, "learning_rate": 6.514197665486723e-06, "loss": 16.7217, "step": 22978 }, { "epoch": 0.42003765514467983, "grad_norm": 5.870093732208084, "learning_rate": 6.513915551535047e-06, "loss": 17.2361, "step": 22979 }, { "epoch": 0.42005593434112637, "grad_norm": 5.632922764412533, "learning_rate": 6.513633432277229e-06, "loss": 17.2576, "step": 22980 }, { "epoch": 0.4200742135375729, "grad_norm": 8.060190828372876, "learning_rate": 6.513351307714257e-06, "loss": 18.1932, "step": 22981 }, { "epoch": 0.4200924927340194, "grad_norm": 6.025354070894363, "learning_rate": 6.51306917784712e-06, "loss": 17.4459, "step": 22982 }, { "epoch": 0.4201107719304659, "grad_norm": 6.708815475795794, "learning_rate": 6.512787042676808e-06, "loss": 17.6669, "step": 22983 }, { "epoch": 0.42012905112691246, "grad_norm": 7.122797819201618, "learning_rate": 6.512504902204309e-06, "loss": 17.6847, "step": 22984 }, { "epoch": 0.420147330323359, "grad_norm": 5.589616164174937, "learning_rate": 6.512222756430609e-06, "loss": 17.0379, "step": 22985 }, { "epoch": 0.4201656095198055, "grad_norm": 6.83522881316481, "learning_rate": 6.5119406053567e-06, "loss": 17.559, "step": 22986 }, { "epoch": 0.420183888716252, "grad_norm": 7.503206816650065, "learning_rate": 6.511658448983572e-06, "loss": 17.8493, "step": 22987 }, { "epoch": 0.42020216791269854, "grad_norm": 6.789844118966189, "learning_rate": 6.511376287312212e-06, "loss": 17.7367, "step": 22988 }, { "epoch": 0.4202204471091451, "grad_norm": 7.627172290430774, "learning_rate": 6.511094120343608e-06, "loss": 17.6574, "step": 22989 }, { "epoch": 0.4202387263055916, "grad_norm": 7.249181799702012, "learning_rate": 6.510811948078751e-06, "loss": 17.883, "step": 22990 }, { "epoch": 0.42025700550203815, "grad_norm": 6.714112096450881, "learning_rate": 6.51052977051863e-06, "loss": 17.7014, "step": 22991 }, { "epoch": 0.42027528469848463, "grad_norm": 6.294913210124984, "learning_rate": 6.510247587664231e-06, "loss": 17.3123, "step": 22992 }, { "epoch": 0.42029356389493117, "grad_norm": 7.10322660420361, "learning_rate": 6.509965399516547e-06, "loss": 17.5675, "step": 22993 }, { "epoch": 0.4203118430913777, "grad_norm": 9.548721430594611, "learning_rate": 6.509683206076565e-06, "loss": 18.3707, "step": 22994 }, { "epoch": 0.42033012228782424, "grad_norm": 5.962714972520607, "learning_rate": 6.509401007345275e-06, "loss": 17.2836, "step": 22995 }, { "epoch": 0.4203484014842708, "grad_norm": 6.639399571622893, "learning_rate": 6.509118803323664e-06, "loss": 17.5949, "step": 22996 }, { "epoch": 0.42036668068071725, "grad_norm": 7.976306646430064, "learning_rate": 6.508836594012724e-06, "loss": 18.7225, "step": 22997 }, { "epoch": 0.4203849598771638, "grad_norm": 6.963961481132685, "learning_rate": 6.508554379413441e-06, "loss": 17.7807, "step": 22998 }, { "epoch": 0.4204032390736103, "grad_norm": 6.820626992447245, "learning_rate": 6.508272159526807e-06, "loss": 17.661, "step": 22999 }, { "epoch": 0.42042151827005686, "grad_norm": 5.6887619432817855, "learning_rate": 6.507989934353811e-06, "loss": 17.4962, "step": 23000 }, { "epoch": 0.4204397974665034, "grad_norm": 6.520621347198876, "learning_rate": 6.507707703895441e-06, "loss": 17.683, "step": 23001 }, { "epoch": 0.4204580766629499, "grad_norm": 7.230178250681007, "learning_rate": 6.507425468152684e-06, "loss": 17.8178, "step": 23002 }, { "epoch": 0.4204763558593964, "grad_norm": 7.004311385823891, "learning_rate": 6.5071432271265325e-06, "loss": 17.6068, "step": 23003 }, { "epoch": 0.42049463505584295, "grad_norm": 6.563694562402713, "learning_rate": 6.506860980817975e-06, "loss": 17.8427, "step": 23004 }, { "epoch": 0.4205129142522895, "grad_norm": 5.298064693816009, "learning_rate": 6.506578729228002e-06, "loss": 17.1916, "step": 23005 }, { "epoch": 0.420531193448736, "grad_norm": 5.726445908275986, "learning_rate": 6.5062964723575984e-06, "loss": 17.3356, "step": 23006 }, { "epoch": 0.4205494726451825, "grad_norm": 7.583442717564308, "learning_rate": 6.506014210207758e-06, "loss": 18.0529, "step": 23007 }, { "epoch": 0.42056775184162903, "grad_norm": 6.949887057451262, "learning_rate": 6.505731942779469e-06, "loss": 17.7024, "step": 23008 }, { "epoch": 0.42058603103807557, "grad_norm": 9.40189290605422, "learning_rate": 6.505449670073719e-06, "loss": 18.596, "step": 23009 }, { "epoch": 0.4206043102345221, "grad_norm": 6.785372381138408, "learning_rate": 6.505167392091499e-06, "loss": 17.7004, "step": 23010 }, { "epoch": 0.42062258943096864, "grad_norm": 5.254013375883744, "learning_rate": 6.5048851088338e-06, "loss": 17.0435, "step": 23011 }, { "epoch": 0.4206408686274151, "grad_norm": 6.722683697860286, "learning_rate": 6.5046028203016056e-06, "loss": 17.5095, "step": 23012 }, { "epoch": 0.42065914782386166, "grad_norm": 11.684252960700595, "learning_rate": 6.50432052649591e-06, "loss": 19.2351, "step": 23013 }, { "epoch": 0.4206774270203082, "grad_norm": 5.700003385395421, "learning_rate": 6.504038227417703e-06, "loss": 17.2616, "step": 23014 }, { "epoch": 0.42069570621675473, "grad_norm": 6.3408082485463115, "learning_rate": 6.503755923067972e-06, "loss": 17.5155, "step": 23015 }, { "epoch": 0.4207139854132012, "grad_norm": 6.399662701364488, "learning_rate": 6.5034736134477064e-06, "loss": 17.48, "step": 23016 }, { "epoch": 0.42073226460964774, "grad_norm": 7.063894437669993, "learning_rate": 6.503191298557895e-06, "loss": 17.5484, "step": 23017 }, { "epoch": 0.4207505438060943, "grad_norm": 5.456594985238224, "learning_rate": 6.502908978399531e-06, "loss": 17.2491, "step": 23018 }, { "epoch": 0.4207688230025408, "grad_norm": 6.251009222255237, "learning_rate": 6.502626652973601e-06, "loss": 17.478, "step": 23019 }, { "epoch": 0.42078710219898735, "grad_norm": 5.105701868192694, "learning_rate": 6.502344322281093e-06, "loss": 16.9507, "step": 23020 }, { "epoch": 0.42080538139543383, "grad_norm": 5.973386292605992, "learning_rate": 6.502061986323001e-06, "loss": 17.4321, "step": 23021 }, { "epoch": 0.42082366059188037, "grad_norm": 6.355501402620701, "learning_rate": 6.50177964510031e-06, "loss": 17.5487, "step": 23022 }, { "epoch": 0.4208419397883269, "grad_norm": 6.529065987468653, "learning_rate": 6.501497298614012e-06, "loss": 17.4421, "step": 23023 }, { "epoch": 0.42086021898477344, "grad_norm": 4.734227154551184, "learning_rate": 6.501214946865099e-06, "loss": 16.8256, "step": 23024 }, { "epoch": 0.42087849818122, "grad_norm": 6.475746937751447, "learning_rate": 6.500932589854554e-06, "loss": 17.6178, "step": 23025 }, { "epoch": 0.42089677737766645, "grad_norm": 7.898971635939524, "learning_rate": 6.500650227583373e-06, "loss": 17.9597, "step": 23026 }, { "epoch": 0.420915056574113, "grad_norm": 5.868163200725381, "learning_rate": 6.500367860052542e-06, "loss": 17.55, "step": 23027 }, { "epoch": 0.4209333357705595, "grad_norm": 6.305679871062329, "learning_rate": 6.500085487263054e-06, "loss": 17.4224, "step": 23028 }, { "epoch": 0.42095161496700606, "grad_norm": 7.460606064130908, "learning_rate": 6.4998031092158945e-06, "loss": 17.7353, "step": 23029 }, { "epoch": 0.4209698941634526, "grad_norm": 7.432347668733622, "learning_rate": 6.4995207259120545e-06, "loss": 18.1434, "step": 23030 }, { "epoch": 0.4209881733598991, "grad_norm": 7.0641466773516255, "learning_rate": 6.499238337352526e-06, "loss": 17.4237, "step": 23031 }, { "epoch": 0.4210064525563456, "grad_norm": 5.934801043033059, "learning_rate": 6.498955943538296e-06, "loss": 17.3854, "step": 23032 }, { "epoch": 0.42102473175279215, "grad_norm": 5.9483563809520374, "learning_rate": 6.498673544470357e-06, "loss": 17.0176, "step": 23033 }, { "epoch": 0.4210430109492387, "grad_norm": 6.40693209554785, "learning_rate": 6.498391140149697e-06, "loss": 17.5485, "step": 23034 }, { "epoch": 0.4210612901456852, "grad_norm": 6.269989729490269, "learning_rate": 6.498108730577305e-06, "loss": 17.344, "step": 23035 }, { "epoch": 0.4210795693421317, "grad_norm": 5.74404053994302, "learning_rate": 6.4978263157541724e-06, "loss": 17.3322, "step": 23036 }, { "epoch": 0.42109784853857823, "grad_norm": 6.126397048386426, "learning_rate": 6.497543895681289e-06, "loss": 17.4026, "step": 23037 }, { "epoch": 0.42111612773502477, "grad_norm": 7.4906858377145245, "learning_rate": 6.497261470359645e-06, "loss": 17.9141, "step": 23038 }, { "epoch": 0.4211344069314713, "grad_norm": 7.246093139301408, "learning_rate": 6.496979039790228e-06, "loss": 17.6428, "step": 23039 }, { "epoch": 0.42115268612791784, "grad_norm": 5.9149978582310725, "learning_rate": 6.496696603974029e-06, "loss": 17.2904, "step": 23040 }, { "epoch": 0.4211709653243643, "grad_norm": 5.73176407449862, "learning_rate": 6.496414162912039e-06, "loss": 17.5765, "step": 23041 }, { "epoch": 0.42118924452081086, "grad_norm": 6.743509820661, "learning_rate": 6.496131716605247e-06, "loss": 17.6755, "step": 23042 }, { "epoch": 0.4212075237172574, "grad_norm": 21.24521701980779, "learning_rate": 6.495849265054645e-06, "loss": 17.8667, "step": 23043 }, { "epoch": 0.42122580291370393, "grad_norm": 7.545617373943087, "learning_rate": 6.495566808261218e-06, "loss": 17.9976, "step": 23044 }, { "epoch": 0.42124408211015046, "grad_norm": 7.0451922621308105, "learning_rate": 6.4952843462259605e-06, "loss": 17.7555, "step": 23045 }, { "epoch": 0.42126236130659694, "grad_norm": 6.393711946691229, "learning_rate": 6.495001878949862e-06, "loss": 17.5035, "step": 23046 }, { "epoch": 0.4212806405030435, "grad_norm": 7.00282923224376, "learning_rate": 6.4947194064339106e-06, "loss": 17.6665, "step": 23047 }, { "epoch": 0.42129891969949, "grad_norm": 5.760467812965505, "learning_rate": 6.494436928679098e-06, "loss": 17.2732, "step": 23048 }, { "epoch": 0.42131719889593655, "grad_norm": 7.456025998569738, "learning_rate": 6.494154445686413e-06, "loss": 17.6858, "step": 23049 }, { "epoch": 0.42133547809238303, "grad_norm": 5.678786638497476, "learning_rate": 6.493871957456847e-06, "loss": 17.263, "step": 23050 }, { "epoch": 0.42135375728882957, "grad_norm": 6.27811710201321, "learning_rate": 6.493589463991389e-06, "loss": 17.6338, "step": 23051 }, { "epoch": 0.4213720364852761, "grad_norm": 6.141284749832784, "learning_rate": 6.4933069652910286e-06, "loss": 17.4214, "step": 23052 }, { "epoch": 0.42139031568172264, "grad_norm": 6.329698450530063, "learning_rate": 6.4930244613567585e-06, "loss": 17.258, "step": 23053 }, { "epoch": 0.4214085948781692, "grad_norm": 5.062037776340806, "learning_rate": 6.492741952189566e-06, "loss": 16.6702, "step": 23054 }, { "epoch": 0.42142687407461565, "grad_norm": 6.427858729614545, "learning_rate": 6.492459437790444e-06, "loss": 17.6709, "step": 23055 }, { "epoch": 0.4214451532710622, "grad_norm": 8.082825206280688, "learning_rate": 6.49217691816038e-06, "loss": 17.9099, "step": 23056 }, { "epoch": 0.4214634324675087, "grad_norm": 6.148529649176768, "learning_rate": 6.4918943933003654e-06, "loss": 17.223, "step": 23057 }, { "epoch": 0.42148171166395526, "grad_norm": 6.107731647693613, "learning_rate": 6.49161186321139e-06, "loss": 17.5471, "step": 23058 }, { "epoch": 0.4214999908604018, "grad_norm": 6.598753730395351, "learning_rate": 6.491329327894447e-06, "loss": 17.6031, "step": 23059 }, { "epoch": 0.4215182700568483, "grad_norm": 6.269006757437165, "learning_rate": 6.4910467873505215e-06, "loss": 17.3866, "step": 23060 }, { "epoch": 0.4215365492532948, "grad_norm": 8.63377623937528, "learning_rate": 6.490764241580607e-06, "loss": 18.8411, "step": 23061 }, { "epoch": 0.42155482844974135, "grad_norm": 6.131700349978271, "learning_rate": 6.490481690585694e-06, "loss": 17.1781, "step": 23062 }, { "epoch": 0.4215731076461879, "grad_norm": 5.9149467235687485, "learning_rate": 6.49019913436677e-06, "loss": 17.239, "step": 23063 }, { "epoch": 0.4215913868426344, "grad_norm": 7.234875253028634, "learning_rate": 6.489916572924829e-06, "loss": 17.8849, "step": 23064 }, { "epoch": 0.4216096660390809, "grad_norm": 6.330188920281069, "learning_rate": 6.4896340062608595e-06, "loss": 17.2995, "step": 23065 }, { "epoch": 0.42162794523552743, "grad_norm": 7.635884789761982, "learning_rate": 6.489351434375852e-06, "loss": 18.158, "step": 23066 }, { "epoch": 0.42164622443197397, "grad_norm": 7.506853994220852, "learning_rate": 6.4890688572707975e-06, "loss": 17.9646, "step": 23067 }, { "epoch": 0.4216645036284205, "grad_norm": 7.183464164255513, "learning_rate": 6.488786274946684e-06, "loss": 17.7851, "step": 23068 }, { "epoch": 0.42168278282486704, "grad_norm": 6.9744005775285, "learning_rate": 6.488503687404506e-06, "loss": 18.1321, "step": 23069 }, { "epoch": 0.4217010620213135, "grad_norm": 5.343039344496768, "learning_rate": 6.4882210946452515e-06, "loss": 17.066, "step": 23070 }, { "epoch": 0.42171934121776006, "grad_norm": 7.2410374871601615, "learning_rate": 6.48793849666991e-06, "loss": 17.9285, "step": 23071 }, { "epoch": 0.4217376204142066, "grad_norm": 6.736478462705318, "learning_rate": 6.487655893479473e-06, "loss": 17.6948, "step": 23072 }, { "epoch": 0.42175589961065313, "grad_norm": 8.07099306422754, "learning_rate": 6.487373285074933e-06, "loss": 17.7591, "step": 23073 }, { "epoch": 0.42177417880709966, "grad_norm": 8.15454074081976, "learning_rate": 6.487090671457278e-06, "loss": 17.8959, "step": 23074 }, { "epoch": 0.42179245800354614, "grad_norm": 5.813501670173463, "learning_rate": 6.4868080526274975e-06, "loss": 17.3495, "step": 23075 }, { "epoch": 0.4218107371999927, "grad_norm": 6.051453057483406, "learning_rate": 6.4865254285865855e-06, "loss": 17.4222, "step": 23076 }, { "epoch": 0.4218290163964392, "grad_norm": 5.7059688922023994, "learning_rate": 6.4862427993355315e-06, "loss": 17.3903, "step": 23077 }, { "epoch": 0.42184729559288575, "grad_norm": 7.380393933533788, "learning_rate": 6.485960164875323e-06, "loss": 17.5945, "step": 23078 }, { "epoch": 0.4218655747893323, "grad_norm": 5.822752000654732, "learning_rate": 6.485677525206955e-06, "loss": 17.3908, "step": 23079 }, { "epoch": 0.42188385398577877, "grad_norm": 6.866617406183222, "learning_rate": 6.485394880331416e-06, "loss": 17.6554, "step": 23080 }, { "epoch": 0.4219021331822253, "grad_norm": 6.685040144882246, "learning_rate": 6.485112230249696e-06, "loss": 17.6405, "step": 23081 }, { "epoch": 0.42192041237867184, "grad_norm": 5.7851308625382645, "learning_rate": 6.484829574962788e-06, "loss": 17.2046, "step": 23082 }, { "epoch": 0.4219386915751184, "grad_norm": 7.830516847175503, "learning_rate": 6.484546914471681e-06, "loss": 18.1744, "step": 23083 }, { "epoch": 0.42195697077156485, "grad_norm": 7.663114401718743, "learning_rate": 6.484264248777365e-06, "loss": 17.722, "step": 23084 }, { "epoch": 0.4219752499680114, "grad_norm": 6.8836285566600095, "learning_rate": 6.48398157788083e-06, "loss": 17.803, "step": 23085 }, { "epoch": 0.4219935291644579, "grad_norm": 6.639189576399866, "learning_rate": 6.4836989017830705e-06, "loss": 17.9358, "step": 23086 }, { "epoch": 0.42201180836090446, "grad_norm": 6.4209760802856835, "learning_rate": 6.483416220485076e-06, "loss": 17.5268, "step": 23087 }, { "epoch": 0.422030087557351, "grad_norm": 6.811218033578101, "learning_rate": 6.4831335339878355e-06, "loss": 18.1312, "step": 23088 }, { "epoch": 0.4220483667537975, "grad_norm": 8.252758684835596, "learning_rate": 6.4828508422923394e-06, "loss": 17.8009, "step": 23089 }, { "epoch": 0.422066645950244, "grad_norm": 5.580805128115323, "learning_rate": 6.4825681453995805e-06, "loss": 17.2021, "step": 23090 }, { "epoch": 0.42208492514669055, "grad_norm": 6.77878781847524, "learning_rate": 6.482285443310549e-06, "loss": 17.3954, "step": 23091 }, { "epoch": 0.4221032043431371, "grad_norm": 5.443725019425856, "learning_rate": 6.482002736026236e-06, "loss": 17.1061, "step": 23092 }, { "epoch": 0.4221214835395836, "grad_norm": 7.432860429111075, "learning_rate": 6.481720023547631e-06, "loss": 17.858, "step": 23093 }, { "epoch": 0.4221397627360301, "grad_norm": 7.115159419646255, "learning_rate": 6.481437305875727e-06, "loss": 17.8522, "step": 23094 }, { "epoch": 0.42215804193247664, "grad_norm": 6.601983738113477, "learning_rate": 6.481154583011513e-06, "loss": 17.8641, "step": 23095 }, { "epoch": 0.42217632112892317, "grad_norm": 6.441212479276569, "learning_rate": 6.4808718549559815e-06, "loss": 17.6611, "step": 23096 }, { "epoch": 0.4221946003253697, "grad_norm": 5.8398323228293, "learning_rate": 6.480589121710123e-06, "loss": 17.3993, "step": 23097 }, { "epoch": 0.42221287952181624, "grad_norm": 5.15225786937002, "learning_rate": 6.480306383274926e-06, "loss": 17.003, "step": 23098 }, { "epoch": 0.4222311587182627, "grad_norm": 6.037309898579251, "learning_rate": 6.480023639651385e-06, "loss": 17.1447, "step": 23099 }, { "epoch": 0.42224943791470926, "grad_norm": 7.347191776281626, "learning_rate": 6.47974089084049e-06, "loss": 18.1314, "step": 23100 }, { "epoch": 0.4222677171111558, "grad_norm": 7.158394307901473, "learning_rate": 6.479458136843232e-06, "loss": 17.8036, "step": 23101 }, { "epoch": 0.42228599630760233, "grad_norm": 6.573547552557479, "learning_rate": 6.479175377660601e-06, "loss": 17.4141, "step": 23102 }, { "epoch": 0.42230427550404886, "grad_norm": 6.198939740718497, "learning_rate": 6.478892613293586e-06, "loss": 17.4501, "step": 23103 }, { "epoch": 0.42232255470049535, "grad_norm": 6.512008325165434, "learning_rate": 6.4786098437431845e-06, "loss": 17.7391, "step": 23104 }, { "epoch": 0.4223408338969419, "grad_norm": 7.510655871600087, "learning_rate": 6.478327069010381e-06, "loss": 17.8093, "step": 23105 }, { "epoch": 0.4223591130933884, "grad_norm": 6.438621615415397, "learning_rate": 6.478044289096173e-06, "loss": 17.5805, "step": 23106 }, { "epoch": 0.42237739228983495, "grad_norm": 7.336705497786379, "learning_rate": 6.477761504001545e-06, "loss": 17.7835, "step": 23107 }, { "epoch": 0.4223956714862815, "grad_norm": 5.87458767097228, "learning_rate": 6.477478713727492e-06, "loss": 17.4316, "step": 23108 }, { "epoch": 0.42241395068272797, "grad_norm": 6.552670239449119, "learning_rate": 6.477195918275003e-06, "loss": 17.901, "step": 23109 }, { "epoch": 0.4224322298791745, "grad_norm": 6.2558059341346475, "learning_rate": 6.476913117645073e-06, "loss": 17.6013, "step": 23110 }, { "epoch": 0.42245050907562104, "grad_norm": 6.159191106187636, "learning_rate": 6.47663031183869e-06, "loss": 17.4801, "step": 23111 }, { "epoch": 0.4224687882720676, "grad_norm": 6.3639036507049935, "learning_rate": 6.476347500856844e-06, "loss": 17.7147, "step": 23112 }, { "epoch": 0.4224870674685141, "grad_norm": 5.495717279870066, "learning_rate": 6.476064684700529e-06, "loss": 17.1045, "step": 23113 }, { "epoch": 0.4225053466649606, "grad_norm": 5.334120959304089, "learning_rate": 6.475781863370738e-06, "loss": 17.3345, "step": 23114 }, { "epoch": 0.4225236258614071, "grad_norm": 7.442688966357905, "learning_rate": 6.4754990368684565e-06, "loss": 17.7983, "step": 23115 }, { "epoch": 0.42254190505785366, "grad_norm": 7.859035112315611, "learning_rate": 6.475216205194681e-06, "loss": 17.8951, "step": 23116 }, { "epoch": 0.4225601842543002, "grad_norm": 6.195672053441522, "learning_rate": 6.474933368350398e-06, "loss": 17.2802, "step": 23117 }, { "epoch": 0.4225784634507467, "grad_norm": 6.771734234262354, "learning_rate": 6.4746505263366045e-06, "loss": 17.7595, "step": 23118 }, { "epoch": 0.4225967426471932, "grad_norm": 5.870869540255059, "learning_rate": 6.4743676791542874e-06, "loss": 17.2879, "step": 23119 }, { "epoch": 0.42261502184363975, "grad_norm": 8.189513975913615, "learning_rate": 6.474084826804438e-06, "loss": 17.9759, "step": 23120 }, { "epoch": 0.4226333010400863, "grad_norm": 6.303489517903496, "learning_rate": 6.473801969288052e-06, "loss": 17.3265, "step": 23121 }, { "epoch": 0.4226515802365328, "grad_norm": 7.0375939108654695, "learning_rate": 6.473519106606117e-06, "loss": 17.8507, "step": 23122 }, { "epoch": 0.4226698594329793, "grad_norm": 6.59593245315403, "learning_rate": 6.473236238759625e-06, "loss": 17.6573, "step": 23123 }, { "epoch": 0.42268813862942584, "grad_norm": 6.561789534776478, "learning_rate": 6.472953365749569e-06, "loss": 17.5769, "step": 23124 }, { "epoch": 0.42270641782587237, "grad_norm": 6.425788824945835, "learning_rate": 6.472670487576937e-06, "loss": 17.762, "step": 23125 }, { "epoch": 0.4227246970223189, "grad_norm": 6.233744604495483, "learning_rate": 6.4723876042427245e-06, "loss": 17.5841, "step": 23126 }, { "epoch": 0.42274297621876544, "grad_norm": 6.3898368059332755, "learning_rate": 6.47210471574792e-06, "loss": 17.5965, "step": 23127 }, { "epoch": 0.4227612554152119, "grad_norm": 5.853990276212833, "learning_rate": 6.471821822093518e-06, "loss": 17.4561, "step": 23128 }, { "epoch": 0.42277953461165846, "grad_norm": 6.057939282826052, "learning_rate": 6.471538923280507e-06, "loss": 17.346, "step": 23129 }, { "epoch": 0.422797813808105, "grad_norm": 6.8457531698245715, "learning_rate": 6.471256019309879e-06, "loss": 17.6576, "step": 23130 }, { "epoch": 0.42281609300455153, "grad_norm": 6.489701519716401, "learning_rate": 6.470973110182629e-06, "loss": 17.5682, "step": 23131 }, { "epoch": 0.42283437220099807, "grad_norm": 6.988440599621172, "learning_rate": 6.470690195899744e-06, "loss": 17.9877, "step": 23132 }, { "epoch": 0.42285265139744455, "grad_norm": 6.231174844169285, "learning_rate": 6.470407276462217e-06, "loss": 17.6179, "step": 23133 }, { "epoch": 0.4228709305938911, "grad_norm": 8.562516756371206, "learning_rate": 6.470124351871041e-06, "loss": 17.6676, "step": 23134 }, { "epoch": 0.4228892097903376, "grad_norm": 6.440482517126097, "learning_rate": 6.4698414221272066e-06, "loss": 17.6784, "step": 23135 }, { "epoch": 0.42290748898678415, "grad_norm": 6.348356776678403, "learning_rate": 6.469558487231706e-06, "loss": 17.545, "step": 23136 }, { "epoch": 0.4229257681832307, "grad_norm": 12.258602007158364, "learning_rate": 6.469275547185529e-06, "loss": 18.8457, "step": 23137 }, { "epoch": 0.42294404737967717, "grad_norm": 7.0072444440524455, "learning_rate": 6.468992601989671e-06, "loss": 17.976, "step": 23138 }, { "epoch": 0.4229623265761237, "grad_norm": 6.128780314200569, "learning_rate": 6.468709651645119e-06, "loss": 17.4697, "step": 23139 }, { "epoch": 0.42298060577257024, "grad_norm": 6.223880414362575, "learning_rate": 6.4684266961528675e-06, "loss": 17.4181, "step": 23140 }, { "epoch": 0.4229988849690168, "grad_norm": 5.158827222271919, "learning_rate": 6.468143735513908e-06, "loss": 16.8935, "step": 23141 }, { "epoch": 0.4230171641654633, "grad_norm": 6.220023788164625, "learning_rate": 6.467860769729234e-06, "loss": 17.5723, "step": 23142 }, { "epoch": 0.4230354433619098, "grad_norm": 6.199615221312094, "learning_rate": 6.467577798799834e-06, "loss": 17.3186, "step": 23143 }, { "epoch": 0.4230537225583563, "grad_norm": 5.38133794882142, "learning_rate": 6.4672948227267e-06, "loss": 17.0014, "step": 23144 }, { "epoch": 0.42307200175480286, "grad_norm": 6.8549333842776585, "learning_rate": 6.467011841510827e-06, "loss": 18.0086, "step": 23145 }, { "epoch": 0.4230902809512494, "grad_norm": 5.703526497242912, "learning_rate": 6.466728855153203e-06, "loss": 17.295, "step": 23146 }, { "epoch": 0.42310856014769593, "grad_norm": 7.304360598495538, "learning_rate": 6.466445863654823e-06, "loss": 17.7328, "step": 23147 }, { "epoch": 0.4231268393441424, "grad_norm": 7.816409379870154, "learning_rate": 6.466162867016677e-06, "loss": 17.6701, "step": 23148 }, { "epoch": 0.42314511854058895, "grad_norm": 6.000202311919672, "learning_rate": 6.465879865239757e-06, "loss": 17.4872, "step": 23149 }, { "epoch": 0.4231633977370355, "grad_norm": 7.076186929357036, "learning_rate": 6.465596858325056e-06, "loss": 17.9496, "step": 23150 }, { "epoch": 0.423181676933482, "grad_norm": 6.291260848980936, "learning_rate": 6.465313846273566e-06, "loss": 17.1886, "step": 23151 }, { "epoch": 0.4231999561299285, "grad_norm": 4.861121498229915, "learning_rate": 6.465030829086276e-06, "loss": 16.8271, "step": 23152 }, { "epoch": 0.42321823532637504, "grad_norm": 7.000038974411, "learning_rate": 6.464747806764181e-06, "loss": 17.7604, "step": 23153 }, { "epoch": 0.42323651452282157, "grad_norm": 6.553291930245339, "learning_rate": 6.4644647793082725e-06, "loss": 17.5454, "step": 23154 }, { "epoch": 0.4232547937192681, "grad_norm": 6.337761010366317, "learning_rate": 6.464181746719541e-06, "loss": 17.578, "step": 23155 }, { "epoch": 0.42327307291571464, "grad_norm": 6.479008818263093, "learning_rate": 6.463898708998981e-06, "loss": 17.6494, "step": 23156 }, { "epoch": 0.4232913521121611, "grad_norm": 6.435850887126168, "learning_rate": 6.463615666147581e-06, "loss": 17.4268, "step": 23157 }, { "epoch": 0.42330963130860766, "grad_norm": 7.474651685203027, "learning_rate": 6.463332618166337e-06, "loss": 17.697, "step": 23158 }, { "epoch": 0.4233279105050542, "grad_norm": 7.888308945496125, "learning_rate": 6.463049565056239e-06, "loss": 18.1867, "step": 23159 }, { "epoch": 0.42334618970150073, "grad_norm": 5.471285991459773, "learning_rate": 6.462766506818279e-06, "loss": 17.1011, "step": 23160 }, { "epoch": 0.42336446889794727, "grad_norm": 6.5632578482274875, "learning_rate": 6.462483443453449e-06, "loss": 17.5202, "step": 23161 }, { "epoch": 0.42338274809439375, "grad_norm": 6.3339588163787255, "learning_rate": 6.4622003749627415e-06, "loss": 17.4519, "step": 23162 }, { "epoch": 0.4234010272908403, "grad_norm": 7.567082923147876, "learning_rate": 6.461917301347148e-06, "loss": 17.8977, "step": 23163 }, { "epoch": 0.4234193064872868, "grad_norm": 7.367710063713969, "learning_rate": 6.461634222607662e-06, "loss": 18.1403, "step": 23164 }, { "epoch": 0.42343758568373335, "grad_norm": 8.318156710560396, "learning_rate": 6.461351138745275e-06, "loss": 18.1321, "step": 23165 }, { "epoch": 0.4234558648801799, "grad_norm": 7.474583299570602, "learning_rate": 6.461068049760978e-06, "loss": 18.057, "step": 23166 }, { "epoch": 0.42347414407662637, "grad_norm": 6.9254902409955275, "learning_rate": 6.460784955655766e-06, "loss": 17.8518, "step": 23167 }, { "epoch": 0.4234924232730729, "grad_norm": 7.831107012584451, "learning_rate": 6.4605018564306275e-06, "loss": 18.1655, "step": 23168 }, { "epoch": 0.42351070246951944, "grad_norm": 5.2637812020058234, "learning_rate": 6.460218752086559e-06, "loss": 17.1073, "step": 23169 }, { "epoch": 0.423528981665966, "grad_norm": 6.165313486475107, "learning_rate": 6.459935642624549e-06, "loss": 17.4921, "step": 23170 }, { "epoch": 0.4235472608624125, "grad_norm": 5.610198829730439, "learning_rate": 6.45965252804559e-06, "loss": 17.2784, "step": 23171 }, { "epoch": 0.423565540058859, "grad_norm": 6.1106798370772415, "learning_rate": 6.459369408350677e-06, "loss": 17.2397, "step": 23172 }, { "epoch": 0.4235838192553055, "grad_norm": 7.268801561247627, "learning_rate": 6.459086283540802e-06, "loss": 18.0769, "step": 23173 }, { "epoch": 0.42360209845175206, "grad_norm": 5.950001126618713, "learning_rate": 6.458803153616955e-06, "loss": 17.5477, "step": 23174 }, { "epoch": 0.4236203776481986, "grad_norm": 5.950580894332745, "learning_rate": 6.45852001858013e-06, "loss": 17.0636, "step": 23175 }, { "epoch": 0.42363865684464513, "grad_norm": 6.151153173015378, "learning_rate": 6.458236878431317e-06, "loss": 17.3583, "step": 23176 }, { "epoch": 0.4236569360410916, "grad_norm": 6.353086558370284, "learning_rate": 6.457953733171513e-06, "loss": 17.2193, "step": 23177 }, { "epoch": 0.42367521523753815, "grad_norm": 6.90804130787632, "learning_rate": 6.457670582801706e-06, "loss": 17.4209, "step": 23178 }, { "epoch": 0.4236934944339847, "grad_norm": 6.916016700328348, "learning_rate": 6.457387427322889e-06, "loss": 17.4882, "step": 23179 }, { "epoch": 0.4237117736304312, "grad_norm": 6.390139397516349, "learning_rate": 6.4571042667360585e-06, "loss": 17.3082, "step": 23180 }, { "epoch": 0.42373005282687776, "grad_norm": 7.211340731285922, "learning_rate": 6.4568211010422025e-06, "loss": 17.9054, "step": 23181 }, { "epoch": 0.42374833202332424, "grad_norm": 7.222845905313327, "learning_rate": 6.456537930242315e-06, "loss": 17.7328, "step": 23182 }, { "epoch": 0.4237666112197708, "grad_norm": 6.221297555481, "learning_rate": 6.45625475433739e-06, "loss": 17.6305, "step": 23183 }, { "epoch": 0.4237848904162173, "grad_norm": 6.447643288950181, "learning_rate": 6.455971573328415e-06, "loss": 17.6717, "step": 23184 }, { "epoch": 0.42380316961266384, "grad_norm": 8.04998433861434, "learning_rate": 6.4556883872163875e-06, "loss": 18.1202, "step": 23185 }, { "epoch": 0.4238214488091103, "grad_norm": 7.751375134420809, "learning_rate": 6.4554051960023e-06, "loss": 18.5387, "step": 23186 }, { "epoch": 0.42383972800555686, "grad_norm": 7.467149403090195, "learning_rate": 6.455121999687143e-06, "loss": 17.7701, "step": 23187 }, { "epoch": 0.4238580072020034, "grad_norm": 7.1647614363195515, "learning_rate": 6.454838798271909e-06, "loss": 17.3247, "step": 23188 }, { "epoch": 0.42387628639844993, "grad_norm": 8.03890099407871, "learning_rate": 6.45455559175759e-06, "loss": 18.1342, "step": 23189 }, { "epoch": 0.42389456559489647, "grad_norm": 7.077863520526551, "learning_rate": 6.454272380145183e-06, "loss": 17.4789, "step": 23190 }, { "epoch": 0.42391284479134295, "grad_norm": 7.8605563705078465, "learning_rate": 6.453989163435676e-06, "loss": 18.283, "step": 23191 }, { "epoch": 0.4239311239877895, "grad_norm": 7.136978425686276, "learning_rate": 6.453705941630062e-06, "loss": 17.8003, "step": 23192 }, { "epoch": 0.423949403184236, "grad_norm": 5.420422828767463, "learning_rate": 6.453422714729336e-06, "loss": 17.055, "step": 23193 }, { "epoch": 0.42396768238068255, "grad_norm": 8.036881457389141, "learning_rate": 6.453139482734489e-06, "loss": 17.8265, "step": 23194 }, { "epoch": 0.4239859615771291, "grad_norm": 6.376078633630916, "learning_rate": 6.452856245646515e-06, "loss": 17.5076, "step": 23195 }, { "epoch": 0.42400424077357557, "grad_norm": 7.055613800982587, "learning_rate": 6.4525730034664046e-06, "loss": 17.6407, "step": 23196 }, { "epoch": 0.4240225199700221, "grad_norm": 5.7198785669912136, "learning_rate": 6.4522897561951536e-06, "loss": 17.2559, "step": 23197 }, { "epoch": 0.42404079916646864, "grad_norm": 7.325935359629535, "learning_rate": 6.452006503833752e-06, "loss": 17.8513, "step": 23198 }, { "epoch": 0.4240590783629152, "grad_norm": 6.532728512763269, "learning_rate": 6.451723246383194e-06, "loss": 17.2482, "step": 23199 }, { "epoch": 0.4240773575593617, "grad_norm": 6.1520616411847495, "learning_rate": 6.451439983844472e-06, "loss": 17.5248, "step": 23200 }, { "epoch": 0.4240956367558082, "grad_norm": 5.030040310422997, "learning_rate": 6.451156716218579e-06, "loss": 16.9401, "step": 23201 }, { "epoch": 0.4241139159522547, "grad_norm": 6.108694950567766, "learning_rate": 6.450873443506507e-06, "loss": 17.6505, "step": 23202 }, { "epoch": 0.42413219514870126, "grad_norm": 6.204057434994778, "learning_rate": 6.450590165709248e-06, "loss": 17.585, "step": 23203 }, { "epoch": 0.4241504743451478, "grad_norm": 9.611802540482145, "learning_rate": 6.4503068828277994e-06, "loss": 17.6319, "step": 23204 }, { "epoch": 0.42416875354159433, "grad_norm": 5.975477673986726, "learning_rate": 6.450023594863149e-06, "loss": 17.2863, "step": 23205 }, { "epoch": 0.4241870327380408, "grad_norm": 6.75798405226483, "learning_rate": 6.449740301816292e-06, "loss": 17.4903, "step": 23206 }, { "epoch": 0.42420531193448735, "grad_norm": 6.041848193441785, "learning_rate": 6.449457003688222e-06, "loss": 17.4501, "step": 23207 }, { "epoch": 0.4242235911309339, "grad_norm": 5.974743847344579, "learning_rate": 6.4491737004799305e-06, "loss": 17.2646, "step": 23208 }, { "epoch": 0.4242418703273804, "grad_norm": 6.457713787705749, "learning_rate": 6.44889039219241e-06, "loss": 17.4153, "step": 23209 }, { "epoch": 0.42426014952382696, "grad_norm": 5.316422577175712, "learning_rate": 6.448607078826655e-06, "loss": 17.2499, "step": 23210 }, { "epoch": 0.42427842872027344, "grad_norm": 6.224997073142436, "learning_rate": 6.448323760383659e-06, "loss": 17.7254, "step": 23211 }, { "epoch": 0.42429670791672, "grad_norm": 6.81697345358101, "learning_rate": 6.448040436864412e-06, "loss": 17.4188, "step": 23212 }, { "epoch": 0.4243149871131665, "grad_norm": 7.262940284252221, "learning_rate": 6.447757108269911e-06, "loss": 17.9658, "step": 23213 }, { "epoch": 0.42433326630961304, "grad_norm": 6.028857721532317, "learning_rate": 6.4474737746011465e-06, "loss": 17.2204, "step": 23214 }, { "epoch": 0.4243515455060596, "grad_norm": 5.812748220060766, "learning_rate": 6.447190435859111e-06, "loss": 17.2248, "step": 23215 }, { "epoch": 0.42436982470250606, "grad_norm": 7.9293828470095615, "learning_rate": 6.446907092044799e-06, "loss": 17.979, "step": 23216 }, { "epoch": 0.4243881038989526, "grad_norm": 6.89259415779758, "learning_rate": 6.446623743159203e-06, "loss": 17.6296, "step": 23217 }, { "epoch": 0.42440638309539913, "grad_norm": 7.8202367995967, "learning_rate": 6.446340389203317e-06, "loss": 18.0758, "step": 23218 }, { "epoch": 0.42442466229184567, "grad_norm": 5.132927930426186, "learning_rate": 6.446057030178132e-06, "loss": 17.0155, "step": 23219 }, { "epoch": 0.42444294148829215, "grad_norm": 4.992649609231906, "learning_rate": 6.445773666084645e-06, "loss": 16.9942, "step": 23220 }, { "epoch": 0.4244612206847387, "grad_norm": 7.880871310450647, "learning_rate": 6.445490296923844e-06, "loss": 17.6014, "step": 23221 }, { "epoch": 0.4244794998811852, "grad_norm": 6.238049813757726, "learning_rate": 6.445206922696727e-06, "loss": 17.4601, "step": 23222 }, { "epoch": 0.42449777907763175, "grad_norm": 6.126625090422114, "learning_rate": 6.444923543404285e-06, "loss": 17.3324, "step": 23223 }, { "epoch": 0.4245160582740783, "grad_norm": 6.958085470379649, "learning_rate": 6.444640159047511e-06, "loss": 17.7785, "step": 23224 }, { "epoch": 0.42453433747052477, "grad_norm": 6.814057729507913, "learning_rate": 6.444356769627398e-06, "loss": 17.8646, "step": 23225 }, { "epoch": 0.4245526166669713, "grad_norm": 5.994456734825881, "learning_rate": 6.4440733751449396e-06, "loss": 17.3985, "step": 23226 }, { "epoch": 0.42457089586341784, "grad_norm": 8.369769332719311, "learning_rate": 6.443789975601129e-06, "loss": 17.7328, "step": 23227 }, { "epoch": 0.4245891750598644, "grad_norm": 8.509696638553095, "learning_rate": 6.443506570996962e-06, "loss": 18.0756, "step": 23228 }, { "epoch": 0.4246074542563109, "grad_norm": 5.68080480769699, "learning_rate": 6.4432231613334295e-06, "loss": 17.3222, "step": 23229 }, { "epoch": 0.4246257334527574, "grad_norm": 5.864578603069478, "learning_rate": 6.442939746611523e-06, "loss": 17.4366, "step": 23230 }, { "epoch": 0.42464401264920393, "grad_norm": 6.695005158913941, "learning_rate": 6.44265632683224e-06, "loss": 17.4122, "step": 23231 }, { "epoch": 0.42466229184565046, "grad_norm": 7.886751378773805, "learning_rate": 6.44237290199657e-06, "loss": 18.1189, "step": 23232 }, { "epoch": 0.424680571042097, "grad_norm": 5.667682538075096, "learning_rate": 6.4420894721055094e-06, "loss": 17.2764, "step": 23233 }, { "epoch": 0.42469885023854353, "grad_norm": 6.566721020308051, "learning_rate": 6.44180603716005e-06, "loss": 17.4361, "step": 23234 }, { "epoch": 0.42471712943499, "grad_norm": 6.160303505709899, "learning_rate": 6.441522597161185e-06, "loss": 17.2469, "step": 23235 }, { "epoch": 0.42473540863143655, "grad_norm": 6.889576470886445, "learning_rate": 6.4412391521099084e-06, "loss": 17.6904, "step": 23236 }, { "epoch": 0.4247536878278831, "grad_norm": 5.488787147695676, "learning_rate": 6.4409557020072145e-06, "loss": 16.8651, "step": 23237 }, { "epoch": 0.4247719670243296, "grad_norm": 8.267265947832813, "learning_rate": 6.440672246854096e-06, "loss": 18.3391, "step": 23238 }, { "epoch": 0.42479024622077616, "grad_norm": 6.037747717693437, "learning_rate": 6.4403887866515445e-06, "loss": 17.3174, "step": 23239 }, { "epoch": 0.42480852541722264, "grad_norm": 6.060987718727519, "learning_rate": 6.440105321400556e-06, "loss": 17.6085, "step": 23240 }, { "epoch": 0.4248268046136692, "grad_norm": 5.91925489394884, "learning_rate": 6.439821851102124e-06, "loss": 17.2479, "step": 23241 }, { "epoch": 0.4248450838101157, "grad_norm": 5.738825810714, "learning_rate": 6.439538375757243e-06, "loss": 17.2108, "step": 23242 }, { "epoch": 0.42486336300656224, "grad_norm": 7.070132764479361, "learning_rate": 6.439254895366902e-06, "loss": 17.7795, "step": 23243 }, { "epoch": 0.4248816422030088, "grad_norm": 5.287897605428229, "learning_rate": 6.4389714099320975e-06, "loss": 17.1248, "step": 23244 }, { "epoch": 0.42489992139945526, "grad_norm": 7.782209619870981, "learning_rate": 6.438687919453826e-06, "loss": 18.2702, "step": 23245 }, { "epoch": 0.4249182005959018, "grad_norm": 6.820480185191707, "learning_rate": 6.438404423933076e-06, "loss": 17.3446, "step": 23246 }, { "epoch": 0.42493647979234833, "grad_norm": 5.73377650808498, "learning_rate": 6.438120923370843e-06, "loss": 17.2321, "step": 23247 }, { "epoch": 0.42495475898879487, "grad_norm": 7.453244852160705, "learning_rate": 6.437837417768123e-06, "loss": 18.1756, "step": 23248 }, { "epoch": 0.4249730381852414, "grad_norm": 5.9834094339976, "learning_rate": 6.437553907125905e-06, "loss": 17.3006, "step": 23249 }, { "epoch": 0.4249913173816879, "grad_norm": 5.715656527767392, "learning_rate": 6.437270391445186e-06, "loss": 17.3641, "step": 23250 }, { "epoch": 0.4250095965781344, "grad_norm": 5.991435802699906, "learning_rate": 6.43698687072696e-06, "loss": 17.5296, "step": 23251 }, { "epoch": 0.42502787577458095, "grad_norm": 5.914436881039171, "learning_rate": 6.436703344972219e-06, "loss": 17.3627, "step": 23252 }, { "epoch": 0.4250461549710275, "grad_norm": 7.27600022634788, "learning_rate": 6.436419814181958e-06, "loss": 17.9451, "step": 23253 }, { "epoch": 0.42506443416747397, "grad_norm": 6.39585124336612, "learning_rate": 6.43613627835717e-06, "loss": 17.7013, "step": 23254 }, { "epoch": 0.4250827133639205, "grad_norm": 7.95961009723974, "learning_rate": 6.435852737498849e-06, "loss": 17.8516, "step": 23255 }, { "epoch": 0.42510099256036704, "grad_norm": 7.026971734875077, "learning_rate": 6.435569191607989e-06, "loss": 17.6718, "step": 23256 }, { "epoch": 0.4251192717568136, "grad_norm": 6.698563438269797, "learning_rate": 6.435285640685582e-06, "loss": 17.7578, "step": 23257 }, { "epoch": 0.4251375509532601, "grad_norm": 6.703836671505481, "learning_rate": 6.435002084732625e-06, "loss": 17.5404, "step": 23258 }, { "epoch": 0.4251558301497066, "grad_norm": 5.346873724366964, "learning_rate": 6.4347185237501095e-06, "loss": 17.004, "step": 23259 }, { "epoch": 0.42517410934615313, "grad_norm": 5.8769726319864874, "learning_rate": 6.4344349577390306e-06, "loss": 16.9058, "step": 23260 }, { "epoch": 0.42519238854259966, "grad_norm": 4.379997502804807, "learning_rate": 6.434151386700382e-06, "loss": 16.8215, "step": 23261 }, { "epoch": 0.4252106677390462, "grad_norm": 6.778903453990103, "learning_rate": 6.433867810635156e-06, "loss": 17.7149, "step": 23262 }, { "epoch": 0.42522894693549274, "grad_norm": 8.234971800394172, "learning_rate": 6.433584229544348e-06, "loss": 17.7708, "step": 23263 }, { "epoch": 0.4252472261319392, "grad_norm": 7.117066939009308, "learning_rate": 6.4333006434289525e-06, "loss": 17.7574, "step": 23264 }, { "epoch": 0.42526550532838575, "grad_norm": 7.891690304306368, "learning_rate": 6.433017052289963e-06, "loss": 18.013, "step": 23265 }, { "epoch": 0.4252837845248323, "grad_norm": 6.682338327057211, "learning_rate": 6.432733456128371e-06, "loss": 17.53, "step": 23266 }, { "epoch": 0.4253020637212788, "grad_norm": 7.742684286834764, "learning_rate": 6.432449854945174e-06, "loss": 18.009, "step": 23267 }, { "epoch": 0.42532034291772536, "grad_norm": 7.87572476066597, "learning_rate": 6.4321662487413634e-06, "loss": 18.0306, "step": 23268 }, { "epoch": 0.42533862211417184, "grad_norm": 12.092941117262523, "learning_rate": 6.431882637517937e-06, "loss": 18.5198, "step": 23269 }, { "epoch": 0.4253569013106184, "grad_norm": 6.577591419740454, "learning_rate": 6.431599021275885e-06, "loss": 17.8837, "step": 23270 }, { "epoch": 0.4253751805070649, "grad_norm": 5.7233591927392276, "learning_rate": 6.4313154000162e-06, "loss": 17.2162, "step": 23271 }, { "epoch": 0.42539345970351145, "grad_norm": 5.9142626601897765, "learning_rate": 6.431031773739882e-06, "loss": 17.2328, "step": 23272 }, { "epoch": 0.425411738899958, "grad_norm": 6.981238667079544, "learning_rate": 6.430748142447921e-06, "loss": 17.9035, "step": 23273 }, { "epoch": 0.42543001809640446, "grad_norm": 7.6050498953989765, "learning_rate": 6.430464506141312e-06, "loss": 18.0571, "step": 23274 }, { "epoch": 0.425448297292851, "grad_norm": 6.8033061858957655, "learning_rate": 6.430180864821048e-06, "loss": 17.6216, "step": 23275 }, { "epoch": 0.42546657648929753, "grad_norm": 8.662456425833138, "learning_rate": 6.4298972184881255e-06, "loss": 18.2785, "step": 23276 }, { "epoch": 0.42548485568574407, "grad_norm": 5.844497092877724, "learning_rate": 6.4296135671435365e-06, "loss": 17.3414, "step": 23277 }, { "epoch": 0.4255031348821906, "grad_norm": 7.497452862917624, "learning_rate": 6.429329910788276e-06, "loss": 17.8492, "step": 23278 }, { "epoch": 0.4255214140786371, "grad_norm": 7.102280082484249, "learning_rate": 6.429046249423339e-06, "loss": 17.7186, "step": 23279 }, { "epoch": 0.4255396932750836, "grad_norm": 7.334603403675517, "learning_rate": 6.428762583049718e-06, "loss": 17.8792, "step": 23280 }, { "epoch": 0.42555797247153015, "grad_norm": 5.9367013808119005, "learning_rate": 6.428478911668408e-06, "loss": 17.5629, "step": 23281 }, { "epoch": 0.4255762516679767, "grad_norm": 6.119055864556667, "learning_rate": 6.428195235280403e-06, "loss": 17.2329, "step": 23282 }, { "epoch": 0.4255945308644232, "grad_norm": 5.894820412562195, "learning_rate": 6.4279115538867e-06, "loss": 17.3982, "step": 23283 }, { "epoch": 0.4256128100608697, "grad_norm": 6.780257747702837, "learning_rate": 6.427627867488289e-06, "loss": 18.1553, "step": 23284 }, { "epoch": 0.42563108925731624, "grad_norm": 6.77157361870574, "learning_rate": 6.427344176086166e-06, "loss": 17.9649, "step": 23285 }, { "epoch": 0.4256493684537628, "grad_norm": 7.303543310573126, "learning_rate": 6.427060479681326e-06, "loss": 17.8533, "step": 23286 }, { "epoch": 0.4256676476502093, "grad_norm": 6.705371479411704, "learning_rate": 6.426776778274763e-06, "loss": 17.9653, "step": 23287 }, { "epoch": 0.4256859268466558, "grad_norm": 5.539826475065183, "learning_rate": 6.426493071867472e-06, "loss": 17.0326, "step": 23288 }, { "epoch": 0.42570420604310233, "grad_norm": 6.732033702819237, "learning_rate": 6.426209360460445e-06, "loss": 17.5677, "step": 23289 }, { "epoch": 0.42572248523954886, "grad_norm": 5.35049837540552, "learning_rate": 6.425925644054679e-06, "loss": 16.9009, "step": 23290 }, { "epoch": 0.4257407644359954, "grad_norm": 6.842504751471586, "learning_rate": 6.425641922651167e-06, "loss": 17.8086, "step": 23291 }, { "epoch": 0.42575904363244194, "grad_norm": 6.861016117580617, "learning_rate": 6.425358196250904e-06, "loss": 17.7915, "step": 23292 }, { "epoch": 0.4257773228288884, "grad_norm": 5.9472844534362315, "learning_rate": 6.4250744648548835e-06, "loss": 17.2729, "step": 23293 }, { "epoch": 0.42579560202533495, "grad_norm": 5.451052307127618, "learning_rate": 6.4247907284641005e-06, "loss": 17.1485, "step": 23294 }, { "epoch": 0.4258138812217815, "grad_norm": 4.984651170296348, "learning_rate": 6.424506987079551e-06, "loss": 16.9197, "step": 23295 }, { "epoch": 0.425832160418228, "grad_norm": 7.692813012204413, "learning_rate": 6.4242232407022274e-06, "loss": 18.1744, "step": 23296 }, { "epoch": 0.42585043961467456, "grad_norm": 6.741708956330376, "learning_rate": 6.423939489333126e-06, "loss": 17.6719, "step": 23297 }, { "epoch": 0.42586871881112104, "grad_norm": 6.629877763655567, "learning_rate": 6.423655732973237e-06, "loss": 17.5006, "step": 23298 }, { "epoch": 0.4258869980075676, "grad_norm": 5.856492412922344, "learning_rate": 6.423371971623562e-06, "loss": 17.4823, "step": 23299 }, { "epoch": 0.4259052772040141, "grad_norm": 5.609827729623355, "learning_rate": 6.423088205285091e-06, "loss": 16.9767, "step": 23300 }, { "epoch": 0.42592355640046065, "grad_norm": 6.290690897885583, "learning_rate": 6.422804433958818e-06, "loss": 17.642, "step": 23301 }, { "epoch": 0.4259418355969072, "grad_norm": 5.601023951618471, "learning_rate": 6.422520657645739e-06, "loss": 17.2755, "step": 23302 }, { "epoch": 0.42596011479335366, "grad_norm": 6.052452128632869, "learning_rate": 6.422236876346848e-06, "loss": 17.4081, "step": 23303 }, { "epoch": 0.4259783939898002, "grad_norm": 6.388773808185296, "learning_rate": 6.4219530900631425e-06, "loss": 17.6369, "step": 23304 }, { "epoch": 0.42599667318624673, "grad_norm": 5.870620735000374, "learning_rate": 6.421669298795613e-06, "loss": 17.2354, "step": 23305 }, { "epoch": 0.42601495238269327, "grad_norm": 5.390375085752392, "learning_rate": 6.4213855025452565e-06, "loss": 17.1514, "step": 23306 }, { "epoch": 0.4260332315791398, "grad_norm": 5.589035895055516, "learning_rate": 6.421101701313067e-06, "loss": 17.3054, "step": 23307 }, { "epoch": 0.4260515107755863, "grad_norm": 6.425361073746728, "learning_rate": 6.420817895100039e-06, "loss": 17.4956, "step": 23308 }, { "epoch": 0.4260697899720328, "grad_norm": 6.037017043839415, "learning_rate": 6.420534083907169e-06, "loss": 17.4951, "step": 23309 }, { "epoch": 0.42608806916847936, "grad_norm": 6.237122949554329, "learning_rate": 6.4202502677354485e-06, "loss": 17.4349, "step": 23310 }, { "epoch": 0.4261063483649259, "grad_norm": 6.083998987840913, "learning_rate": 6.419966446585875e-06, "loss": 17.135, "step": 23311 }, { "epoch": 0.4261246275613724, "grad_norm": 6.549137200299183, "learning_rate": 6.419682620459442e-06, "loss": 17.6685, "step": 23312 }, { "epoch": 0.4261429067578189, "grad_norm": 7.026041518216922, "learning_rate": 6.419398789357144e-06, "loss": 17.681, "step": 23313 }, { "epoch": 0.42616118595426544, "grad_norm": 8.113965958966368, "learning_rate": 6.419114953279979e-06, "loss": 18.2747, "step": 23314 }, { "epoch": 0.426179465150712, "grad_norm": 5.402074650973205, "learning_rate": 6.418831112228937e-06, "loss": 16.9344, "step": 23315 }, { "epoch": 0.4261977443471585, "grad_norm": 6.291374197533516, "learning_rate": 6.418547266205014e-06, "loss": 17.5045, "step": 23316 }, { "epoch": 0.42621602354360505, "grad_norm": 6.783662187149835, "learning_rate": 6.418263415209207e-06, "loss": 17.9824, "step": 23317 }, { "epoch": 0.42623430274005153, "grad_norm": 6.520332513714922, "learning_rate": 6.417979559242512e-06, "loss": 17.5565, "step": 23318 }, { "epoch": 0.42625258193649807, "grad_norm": 6.2588971858846625, "learning_rate": 6.417695698305919e-06, "loss": 17.3797, "step": 23319 }, { "epoch": 0.4262708611329446, "grad_norm": 6.113436550965818, "learning_rate": 6.417411832400427e-06, "loss": 17.4842, "step": 23320 }, { "epoch": 0.42628914032939114, "grad_norm": 4.839899465251502, "learning_rate": 6.417127961527029e-06, "loss": 16.9643, "step": 23321 }, { "epoch": 0.4263074195258376, "grad_norm": 6.913841491735516, "learning_rate": 6.41684408568672e-06, "loss": 17.7049, "step": 23322 }, { "epoch": 0.42632569872228415, "grad_norm": 6.253215869966576, "learning_rate": 6.4165602048804964e-06, "loss": 17.3477, "step": 23323 }, { "epoch": 0.4263439779187307, "grad_norm": 6.160457300128599, "learning_rate": 6.416276319109351e-06, "loss": 17.4272, "step": 23324 }, { "epoch": 0.4263622571151772, "grad_norm": 6.605787098097573, "learning_rate": 6.415992428374281e-06, "loss": 17.5389, "step": 23325 }, { "epoch": 0.42638053631162376, "grad_norm": 5.904636517213099, "learning_rate": 6.41570853267628e-06, "loss": 17.3321, "step": 23326 }, { "epoch": 0.42639881550807024, "grad_norm": 5.849282282109855, "learning_rate": 6.4154246320163435e-06, "loss": 17.2366, "step": 23327 }, { "epoch": 0.4264170947045168, "grad_norm": 5.530342027558702, "learning_rate": 6.415140726395468e-06, "loss": 17.1125, "step": 23328 }, { "epoch": 0.4264353739009633, "grad_norm": 6.067278157116494, "learning_rate": 6.414856815814645e-06, "loss": 17.5748, "step": 23329 }, { "epoch": 0.42645365309740985, "grad_norm": 8.310868332090848, "learning_rate": 6.414572900274871e-06, "loss": 18.2586, "step": 23330 }, { "epoch": 0.4264719322938564, "grad_norm": 5.3566791051364175, "learning_rate": 6.414288979777145e-06, "loss": 17.2356, "step": 23331 }, { "epoch": 0.42649021149030286, "grad_norm": 5.551640164710231, "learning_rate": 6.414005054322456e-06, "loss": 17.1504, "step": 23332 }, { "epoch": 0.4265084906867494, "grad_norm": 8.486077590578063, "learning_rate": 6.413721123911803e-06, "loss": 17.9259, "step": 23333 }, { "epoch": 0.42652676988319593, "grad_norm": 7.864505064176411, "learning_rate": 6.41343718854618e-06, "loss": 17.8304, "step": 23334 }, { "epoch": 0.42654504907964247, "grad_norm": 6.703655426624963, "learning_rate": 6.413153248226583e-06, "loss": 17.6709, "step": 23335 }, { "epoch": 0.426563328276089, "grad_norm": 6.9711473328119515, "learning_rate": 6.412869302954005e-06, "loss": 16.8692, "step": 23336 }, { "epoch": 0.4265816074725355, "grad_norm": 6.087758990966172, "learning_rate": 6.412585352729443e-06, "loss": 17.3385, "step": 23337 }, { "epoch": 0.426599886668982, "grad_norm": 6.883960524960858, "learning_rate": 6.412301397553893e-06, "loss": 17.7104, "step": 23338 }, { "epoch": 0.42661816586542856, "grad_norm": 5.557755489306266, "learning_rate": 6.412017437428348e-06, "loss": 17.3387, "step": 23339 }, { "epoch": 0.4266364450618751, "grad_norm": 5.728770330626398, "learning_rate": 6.411733472353805e-06, "loss": 17.3812, "step": 23340 }, { "epoch": 0.4266547242583216, "grad_norm": 5.894866565703461, "learning_rate": 6.411449502331258e-06, "loss": 17.2593, "step": 23341 }, { "epoch": 0.4266730034547681, "grad_norm": 7.00927155631741, "learning_rate": 6.411165527361705e-06, "loss": 17.556, "step": 23342 }, { "epoch": 0.42669128265121464, "grad_norm": 7.27315518515396, "learning_rate": 6.410881547446137e-06, "loss": 17.6639, "step": 23343 }, { "epoch": 0.4267095618476612, "grad_norm": 7.228428613284711, "learning_rate": 6.410597562585552e-06, "loss": 18.053, "step": 23344 }, { "epoch": 0.4267278410441077, "grad_norm": 6.687136055615292, "learning_rate": 6.4103135727809465e-06, "loss": 17.5049, "step": 23345 }, { "epoch": 0.42674612024055425, "grad_norm": 6.764895121356559, "learning_rate": 6.410029578033313e-06, "loss": 17.7955, "step": 23346 }, { "epoch": 0.42676439943700073, "grad_norm": 6.520786108739674, "learning_rate": 6.4097455783436495e-06, "loss": 17.4612, "step": 23347 }, { "epoch": 0.42678267863344727, "grad_norm": 8.915873416299993, "learning_rate": 6.409461573712947e-06, "loss": 18.2511, "step": 23348 }, { "epoch": 0.4268009578298938, "grad_norm": 6.260238044671665, "learning_rate": 6.409177564142207e-06, "loss": 17.3599, "step": 23349 }, { "epoch": 0.42681923702634034, "grad_norm": 8.116313773124174, "learning_rate": 6.408893549632421e-06, "loss": 18.0807, "step": 23350 }, { "epoch": 0.4268375162227869, "grad_norm": 5.993751166289965, "learning_rate": 6.408609530184585e-06, "loss": 17.2234, "step": 23351 }, { "epoch": 0.42685579541923335, "grad_norm": 6.319804179162196, "learning_rate": 6.4083255057996954e-06, "loss": 17.6066, "step": 23352 }, { "epoch": 0.4268740746156799, "grad_norm": 6.584059094697491, "learning_rate": 6.408041476478747e-06, "loss": 17.5112, "step": 23353 }, { "epoch": 0.4268923538121264, "grad_norm": 6.5244990030763494, "learning_rate": 6.407757442222735e-06, "loss": 17.6516, "step": 23354 }, { "epoch": 0.42691063300857296, "grad_norm": 5.883039291502872, "learning_rate": 6.407473403032656e-06, "loss": 17.0701, "step": 23355 }, { "epoch": 0.42692891220501944, "grad_norm": 6.736911400804352, "learning_rate": 6.407189358909505e-06, "loss": 17.6317, "step": 23356 }, { "epoch": 0.426947191401466, "grad_norm": 7.641623068033282, "learning_rate": 6.406905309854275e-06, "loss": 17.6438, "step": 23357 }, { "epoch": 0.4269654705979125, "grad_norm": 6.224456284307733, "learning_rate": 6.406621255867966e-06, "loss": 17.3261, "step": 23358 }, { "epoch": 0.42698374979435905, "grad_norm": 5.5787801716904655, "learning_rate": 6.406337196951573e-06, "loss": 17.2173, "step": 23359 }, { "epoch": 0.4270020289908056, "grad_norm": 6.001649194149242, "learning_rate": 6.406053133106088e-06, "loss": 17.1758, "step": 23360 }, { "epoch": 0.42702030818725206, "grad_norm": 6.336602194065833, "learning_rate": 6.40576906433251e-06, "loss": 17.4226, "step": 23361 }, { "epoch": 0.4270385873836986, "grad_norm": 5.5915735460026985, "learning_rate": 6.405484990631831e-06, "loss": 17.441, "step": 23362 }, { "epoch": 0.42705686658014513, "grad_norm": 5.60167724582844, "learning_rate": 6.405200912005052e-06, "loss": 17.2534, "step": 23363 }, { "epoch": 0.42707514577659167, "grad_norm": 6.76547289832025, "learning_rate": 6.404916828453165e-06, "loss": 17.6248, "step": 23364 }, { "epoch": 0.4270934249730382, "grad_norm": 6.751197872198745, "learning_rate": 6.404632739977166e-06, "loss": 17.728, "step": 23365 }, { "epoch": 0.4271117041694847, "grad_norm": 5.4264654123146085, "learning_rate": 6.40434864657805e-06, "loss": 17.008, "step": 23366 }, { "epoch": 0.4271299833659312, "grad_norm": 5.563911891125588, "learning_rate": 6.404064548256815e-06, "loss": 17.1815, "step": 23367 }, { "epoch": 0.42714826256237776, "grad_norm": 7.417629212616718, "learning_rate": 6.403780445014456e-06, "loss": 17.7103, "step": 23368 }, { "epoch": 0.4271665417588243, "grad_norm": 6.1007423515647, "learning_rate": 6.403496336851969e-06, "loss": 17.426, "step": 23369 }, { "epoch": 0.4271848209552708, "grad_norm": 6.109197967976358, "learning_rate": 6.403212223770348e-06, "loss": 17.6285, "step": 23370 }, { "epoch": 0.4272031001517173, "grad_norm": 6.768557816439791, "learning_rate": 6.402928105770588e-06, "loss": 17.5708, "step": 23371 }, { "epoch": 0.42722137934816384, "grad_norm": 8.242561548441238, "learning_rate": 6.402643982853689e-06, "loss": 17.5663, "step": 23372 }, { "epoch": 0.4272396585446104, "grad_norm": 6.9426266384781385, "learning_rate": 6.402359855020645e-06, "loss": 17.9845, "step": 23373 }, { "epoch": 0.4272579377410569, "grad_norm": 6.324919899163823, "learning_rate": 6.402075722272451e-06, "loss": 17.673, "step": 23374 }, { "epoch": 0.42727621693750345, "grad_norm": 5.840632425260436, "learning_rate": 6.401791584610103e-06, "loss": 17.3264, "step": 23375 }, { "epoch": 0.42729449613394993, "grad_norm": 5.941959213094946, "learning_rate": 6.401507442034597e-06, "loss": 17.3341, "step": 23376 }, { "epoch": 0.42731277533039647, "grad_norm": 5.083564261165598, "learning_rate": 6.401223294546929e-06, "loss": 17.1065, "step": 23377 }, { "epoch": 0.427331054526843, "grad_norm": 7.467611811641787, "learning_rate": 6.400939142148095e-06, "loss": 18.23, "step": 23378 }, { "epoch": 0.42734933372328954, "grad_norm": 5.464582705786741, "learning_rate": 6.400654984839091e-06, "loss": 17.0183, "step": 23379 }, { "epoch": 0.4273676129197361, "grad_norm": 7.229755104089995, "learning_rate": 6.4003708226209116e-06, "loss": 18.0654, "step": 23380 }, { "epoch": 0.42738589211618255, "grad_norm": 6.632513083793697, "learning_rate": 6.400086655494555e-06, "loss": 18.0036, "step": 23381 }, { "epoch": 0.4274041713126291, "grad_norm": 7.4728082978196815, "learning_rate": 6.399802483461017e-06, "loss": 17.9919, "step": 23382 }, { "epoch": 0.4274224505090756, "grad_norm": 7.787181386131671, "learning_rate": 6.399518306521293e-06, "loss": 17.7279, "step": 23383 }, { "epoch": 0.42744072970552216, "grad_norm": 5.880190777567864, "learning_rate": 6.399234124676376e-06, "loss": 17.3932, "step": 23384 }, { "epoch": 0.4274590089019687, "grad_norm": 6.543591168829177, "learning_rate": 6.3989499379272665e-06, "loss": 17.7307, "step": 23385 }, { "epoch": 0.4274772880984152, "grad_norm": 7.560626540131132, "learning_rate": 6.398665746274959e-06, "loss": 18.2747, "step": 23386 }, { "epoch": 0.4274955672948617, "grad_norm": 7.082023980148686, "learning_rate": 6.398381549720452e-06, "loss": 17.8332, "step": 23387 }, { "epoch": 0.42751384649130825, "grad_norm": 7.786927866663507, "learning_rate": 6.398097348264736e-06, "loss": 18.5009, "step": 23388 }, { "epoch": 0.4275321256877548, "grad_norm": 5.70308358439869, "learning_rate": 6.397813141908809e-06, "loss": 17.2726, "step": 23389 }, { "epoch": 0.42755040488420126, "grad_norm": 5.99044632759544, "learning_rate": 6.3975289306536704e-06, "loss": 17.4928, "step": 23390 }, { "epoch": 0.4275686840806478, "grad_norm": 6.975416516493688, "learning_rate": 6.397244714500313e-06, "loss": 17.9719, "step": 23391 }, { "epoch": 0.42758696327709433, "grad_norm": 6.339173005272153, "learning_rate": 6.396960493449735e-06, "loss": 17.4774, "step": 23392 }, { "epoch": 0.42760524247354087, "grad_norm": 6.574409026294816, "learning_rate": 6.396676267502931e-06, "loss": 17.6055, "step": 23393 }, { "epoch": 0.4276235216699874, "grad_norm": 7.330237821050877, "learning_rate": 6.396392036660899e-06, "loss": 17.5929, "step": 23394 }, { "epoch": 0.4276418008664339, "grad_norm": 12.230810864601223, "learning_rate": 6.396107800924634e-06, "loss": 18.3993, "step": 23395 }, { "epoch": 0.4276600800628804, "grad_norm": 7.356162325461706, "learning_rate": 6.395823560295131e-06, "loss": 17.8599, "step": 23396 }, { "epoch": 0.42767835925932696, "grad_norm": 6.247100254242655, "learning_rate": 6.3955393147733895e-06, "loss": 17.4883, "step": 23397 }, { "epoch": 0.4276966384557735, "grad_norm": 6.020560638533876, "learning_rate": 6.395255064360401e-06, "loss": 17.5034, "step": 23398 }, { "epoch": 0.42771491765222003, "grad_norm": 5.884288937345549, "learning_rate": 6.394970809057166e-06, "loss": 17.1348, "step": 23399 }, { "epoch": 0.4277331968486665, "grad_norm": 5.84602641285627, "learning_rate": 6.394686548864681e-06, "loss": 17.1399, "step": 23400 }, { "epoch": 0.42775147604511304, "grad_norm": 7.170843289748093, "learning_rate": 6.394402283783938e-06, "loss": 17.8178, "step": 23401 }, { "epoch": 0.4277697552415596, "grad_norm": 6.12561822244916, "learning_rate": 6.394118013815938e-06, "loss": 17.493, "step": 23402 }, { "epoch": 0.4277880344380061, "grad_norm": 7.458365081773975, "learning_rate": 6.393833738961672e-06, "loss": 17.832, "step": 23403 }, { "epoch": 0.42780631363445265, "grad_norm": 5.824320554959616, "learning_rate": 6.3935494592221435e-06, "loss": 17.1904, "step": 23404 }, { "epoch": 0.42782459283089913, "grad_norm": 6.063267217156814, "learning_rate": 6.3932651745983444e-06, "loss": 17.5405, "step": 23405 }, { "epoch": 0.42784287202734567, "grad_norm": 7.899359840208344, "learning_rate": 6.39298088509127e-06, "loss": 17.8857, "step": 23406 }, { "epoch": 0.4278611512237922, "grad_norm": 5.7405543562282615, "learning_rate": 6.3926965907019205e-06, "loss": 17.3189, "step": 23407 }, { "epoch": 0.42787943042023874, "grad_norm": 5.869797000359069, "learning_rate": 6.3924122914312895e-06, "loss": 17.2573, "step": 23408 }, { "epoch": 0.4278977096166853, "grad_norm": 5.407481655789767, "learning_rate": 6.392127987280373e-06, "loss": 17.1029, "step": 23409 }, { "epoch": 0.42791598881313175, "grad_norm": 5.415827292402781, "learning_rate": 6.39184367825017e-06, "loss": 17.3705, "step": 23410 }, { "epoch": 0.4279342680095783, "grad_norm": 6.435835641604202, "learning_rate": 6.391559364341675e-06, "loss": 17.6047, "step": 23411 }, { "epoch": 0.4279525472060248, "grad_norm": 6.857008590435317, "learning_rate": 6.391275045555886e-06, "loss": 18.0624, "step": 23412 }, { "epoch": 0.42797082640247136, "grad_norm": 6.466399128208936, "learning_rate": 6.3909907218937985e-06, "loss": 17.6225, "step": 23413 }, { "epoch": 0.4279891055989179, "grad_norm": 5.721083240196607, "learning_rate": 6.39070639335641e-06, "loss": 17.2616, "step": 23414 }, { "epoch": 0.4280073847953644, "grad_norm": 7.764525729143078, "learning_rate": 6.390422059944716e-06, "loss": 17.9437, "step": 23415 }, { "epoch": 0.4280256639918109, "grad_norm": 7.267272213174589, "learning_rate": 6.390137721659711e-06, "loss": 17.7327, "step": 23416 }, { "epoch": 0.42804394318825745, "grad_norm": 5.616225426549465, "learning_rate": 6.389853378502395e-06, "loss": 17.248, "step": 23417 }, { "epoch": 0.428062222384704, "grad_norm": 6.208328680412768, "learning_rate": 6.389569030473765e-06, "loss": 17.5197, "step": 23418 }, { "epoch": 0.4280805015811505, "grad_norm": 6.186402087249811, "learning_rate": 6.389284677574815e-06, "loss": 17.6012, "step": 23419 }, { "epoch": 0.428098780777597, "grad_norm": 5.891358819072858, "learning_rate": 6.389000319806543e-06, "loss": 17.622, "step": 23420 }, { "epoch": 0.42811705997404353, "grad_norm": 6.162886707761934, "learning_rate": 6.388715957169947e-06, "loss": 17.5174, "step": 23421 }, { "epoch": 0.42813533917049007, "grad_norm": 5.778054424604584, "learning_rate": 6.38843158966602e-06, "loss": 17.3119, "step": 23422 }, { "epoch": 0.4281536183669366, "grad_norm": 6.824577888808088, "learning_rate": 6.3881472172957606e-06, "loss": 17.694, "step": 23423 }, { "epoch": 0.4281718975633831, "grad_norm": 7.722819229871561, "learning_rate": 6.387862840060166e-06, "loss": 17.8384, "step": 23424 }, { "epoch": 0.4281901767598296, "grad_norm": 5.206806597121236, "learning_rate": 6.387578457960233e-06, "loss": 16.9836, "step": 23425 }, { "epoch": 0.42820845595627616, "grad_norm": 6.920501065603111, "learning_rate": 6.3872940709969575e-06, "loss": 17.7597, "step": 23426 }, { "epoch": 0.4282267351527227, "grad_norm": 6.899161944954252, "learning_rate": 6.387009679171336e-06, "loss": 17.8887, "step": 23427 }, { "epoch": 0.42824501434916923, "grad_norm": 7.294812669355296, "learning_rate": 6.386725282484369e-06, "loss": 18.0346, "step": 23428 }, { "epoch": 0.4282632935456157, "grad_norm": 6.217190853006324, "learning_rate": 6.3864408809370484e-06, "loss": 17.4678, "step": 23429 }, { "epoch": 0.42828157274206224, "grad_norm": 6.162584665430932, "learning_rate": 6.386156474530372e-06, "loss": 17.5032, "step": 23430 }, { "epoch": 0.4282998519385088, "grad_norm": 6.444946592469472, "learning_rate": 6.385872063265338e-06, "loss": 17.6911, "step": 23431 }, { "epoch": 0.4283181311349553, "grad_norm": 7.7693908600627095, "learning_rate": 6.385587647142944e-06, "loss": 17.8688, "step": 23432 }, { "epoch": 0.42833641033140185, "grad_norm": 5.589450438592937, "learning_rate": 6.385303226164183e-06, "loss": 17.3504, "step": 23433 }, { "epoch": 0.42835468952784833, "grad_norm": 6.525097342409039, "learning_rate": 6.385018800330056e-06, "loss": 17.321, "step": 23434 }, { "epoch": 0.42837296872429487, "grad_norm": 6.754817938260677, "learning_rate": 6.384734369641558e-06, "loss": 17.5407, "step": 23435 }, { "epoch": 0.4283912479207414, "grad_norm": 5.851656400971432, "learning_rate": 6.384449934099686e-06, "loss": 17.4082, "step": 23436 }, { "epoch": 0.42840952711718794, "grad_norm": 6.852300756559189, "learning_rate": 6.384165493705437e-06, "loss": 17.7443, "step": 23437 }, { "epoch": 0.4284278063136345, "grad_norm": 7.375661965669248, "learning_rate": 6.383881048459808e-06, "loss": 18.0115, "step": 23438 }, { "epoch": 0.42844608551008095, "grad_norm": 5.550086568877446, "learning_rate": 6.383596598363796e-06, "loss": 17.2862, "step": 23439 }, { "epoch": 0.4284643647065275, "grad_norm": 5.993033378286841, "learning_rate": 6.383312143418399e-06, "loss": 17.3555, "step": 23440 }, { "epoch": 0.428482643902974, "grad_norm": 5.820840740675721, "learning_rate": 6.383027683624612e-06, "loss": 17.1568, "step": 23441 }, { "epoch": 0.42850092309942056, "grad_norm": 8.607797647673888, "learning_rate": 6.382743218983434e-06, "loss": 18.587, "step": 23442 }, { "epoch": 0.4285192022958671, "grad_norm": 5.81046941666761, "learning_rate": 6.382458749495859e-06, "loss": 17.13, "step": 23443 }, { "epoch": 0.4285374814923136, "grad_norm": 7.615414587016948, "learning_rate": 6.382174275162887e-06, "loss": 17.9039, "step": 23444 }, { "epoch": 0.4285557606887601, "grad_norm": 6.909462525828415, "learning_rate": 6.381889795985515e-06, "loss": 17.6704, "step": 23445 }, { "epoch": 0.42857403988520665, "grad_norm": 5.694158058507743, "learning_rate": 6.3816053119647395e-06, "loss": 17.2406, "step": 23446 }, { "epoch": 0.4285923190816532, "grad_norm": 6.002936666070136, "learning_rate": 6.381320823101556e-06, "loss": 17.4432, "step": 23447 }, { "epoch": 0.4286105982780997, "grad_norm": 6.189362379272837, "learning_rate": 6.3810363293969615e-06, "loss": 17.3398, "step": 23448 }, { "epoch": 0.4286288774745462, "grad_norm": 5.539747891510892, "learning_rate": 6.3807518308519575e-06, "loss": 17.2422, "step": 23449 }, { "epoch": 0.42864715667099273, "grad_norm": 6.804636414467735, "learning_rate": 6.380467327467537e-06, "loss": 17.7654, "step": 23450 }, { "epoch": 0.42866543586743927, "grad_norm": 7.243815335154215, "learning_rate": 6.380182819244698e-06, "loss": 17.8922, "step": 23451 }, { "epoch": 0.4286837150638858, "grad_norm": 6.036758226415079, "learning_rate": 6.379898306184438e-06, "loss": 17.4417, "step": 23452 }, { "epoch": 0.42870199426033234, "grad_norm": 6.990790364334782, "learning_rate": 6.379613788287754e-06, "loss": 18.0702, "step": 23453 }, { "epoch": 0.4287202734567788, "grad_norm": 6.70366083248654, "learning_rate": 6.379329265555644e-06, "loss": 17.7388, "step": 23454 }, { "epoch": 0.42873855265322536, "grad_norm": 5.785569135151232, "learning_rate": 6.379044737989104e-06, "loss": 17.183, "step": 23455 }, { "epoch": 0.4287568318496719, "grad_norm": 6.619216894772767, "learning_rate": 6.378760205589134e-06, "loss": 17.9898, "step": 23456 }, { "epoch": 0.42877511104611843, "grad_norm": 6.623487871309558, "learning_rate": 6.3784756683567265e-06, "loss": 17.5719, "step": 23457 }, { "epoch": 0.42879339024256496, "grad_norm": 7.345545332429029, "learning_rate": 6.378191126292881e-06, "loss": 17.6108, "step": 23458 }, { "epoch": 0.42881166943901144, "grad_norm": 6.003816686915262, "learning_rate": 6.377906579398598e-06, "loss": 17.4517, "step": 23459 }, { "epoch": 0.428829948635458, "grad_norm": 6.564628632479459, "learning_rate": 6.37762202767487e-06, "loss": 17.5803, "step": 23460 }, { "epoch": 0.4288482278319045, "grad_norm": 5.4720816589631625, "learning_rate": 6.377337471122698e-06, "loss": 17.1215, "step": 23461 }, { "epoch": 0.42886650702835105, "grad_norm": 6.655273116118058, "learning_rate": 6.377052909743075e-06, "loss": 17.5229, "step": 23462 }, { "epoch": 0.42888478622479753, "grad_norm": 6.984137184578219, "learning_rate": 6.376768343537003e-06, "loss": 17.6818, "step": 23463 }, { "epoch": 0.42890306542124407, "grad_norm": 6.047080046835342, "learning_rate": 6.376483772505477e-06, "loss": 17.4843, "step": 23464 }, { "epoch": 0.4289213446176906, "grad_norm": 5.805838726085733, "learning_rate": 6.376199196649494e-06, "loss": 17.2506, "step": 23465 }, { "epoch": 0.42893962381413714, "grad_norm": 7.52425932776498, "learning_rate": 6.375914615970054e-06, "loss": 18.106, "step": 23466 }, { "epoch": 0.4289579030105837, "grad_norm": 5.908963140598852, "learning_rate": 6.37563003046815e-06, "loss": 17.5495, "step": 23467 }, { "epoch": 0.42897618220703015, "grad_norm": 6.53915540978699, "learning_rate": 6.3753454401447845e-06, "loss": 17.4231, "step": 23468 }, { "epoch": 0.4289944614034767, "grad_norm": 6.07557074565523, "learning_rate": 6.375060845000953e-06, "loss": 17.3704, "step": 23469 }, { "epoch": 0.4290127405999232, "grad_norm": 5.44013386682845, "learning_rate": 6.37477624503765e-06, "loss": 17.157, "step": 23470 }, { "epoch": 0.42903101979636976, "grad_norm": 8.099400722174494, "learning_rate": 6.3744916402558775e-06, "loss": 18.2346, "step": 23471 }, { "epoch": 0.4290492989928163, "grad_norm": 6.548503239278486, "learning_rate": 6.37420703065663e-06, "loss": 17.7201, "step": 23472 }, { "epoch": 0.4290675781892628, "grad_norm": 6.781953401474344, "learning_rate": 6.373922416240907e-06, "loss": 17.6232, "step": 23473 }, { "epoch": 0.4290858573857093, "grad_norm": 5.94086083175987, "learning_rate": 6.373637797009706e-06, "loss": 17.1802, "step": 23474 }, { "epoch": 0.42910413658215585, "grad_norm": 6.625007104623523, "learning_rate": 6.373353172964021e-06, "loss": 17.666, "step": 23475 }, { "epoch": 0.4291224157786024, "grad_norm": 7.0803730896948105, "learning_rate": 6.3730685441048545e-06, "loss": 17.8875, "step": 23476 }, { "epoch": 0.4291406949750489, "grad_norm": 7.0384334182787285, "learning_rate": 6.372783910433202e-06, "loss": 17.8789, "step": 23477 }, { "epoch": 0.4291589741714954, "grad_norm": 6.710562453004961, "learning_rate": 6.37249927195006e-06, "loss": 17.7144, "step": 23478 }, { "epoch": 0.42917725336794194, "grad_norm": 7.008047150991366, "learning_rate": 6.372214628656427e-06, "loss": 17.8009, "step": 23479 }, { "epoch": 0.42919553256438847, "grad_norm": 5.735284382354732, "learning_rate": 6.371929980553302e-06, "loss": 17.407, "step": 23480 }, { "epoch": 0.429213811760835, "grad_norm": 6.266421602105894, "learning_rate": 6.37164532764168e-06, "loss": 17.5883, "step": 23481 }, { "epoch": 0.42923209095728154, "grad_norm": 6.604179934218001, "learning_rate": 6.37136066992256e-06, "loss": 17.4246, "step": 23482 }, { "epoch": 0.429250370153728, "grad_norm": 6.678183162723747, "learning_rate": 6.371076007396942e-06, "loss": 17.809, "step": 23483 }, { "epoch": 0.42926864935017456, "grad_norm": 6.645304757768106, "learning_rate": 6.370791340065819e-06, "loss": 17.4831, "step": 23484 }, { "epoch": 0.4292869285466211, "grad_norm": 7.229561117047641, "learning_rate": 6.370506667930193e-06, "loss": 17.8044, "step": 23485 }, { "epoch": 0.42930520774306763, "grad_norm": 7.211862745666257, "learning_rate": 6.370221990991059e-06, "loss": 17.7365, "step": 23486 }, { "epoch": 0.42932348693951417, "grad_norm": 7.6654712734821855, "learning_rate": 6.3699373092494185e-06, "loss": 17.8216, "step": 23487 }, { "epoch": 0.42934176613596065, "grad_norm": 6.927591734126307, "learning_rate": 6.369652622706264e-06, "loss": 17.8755, "step": 23488 }, { "epoch": 0.4293600453324072, "grad_norm": 5.89864838012858, "learning_rate": 6.3693679313625955e-06, "loss": 17.4478, "step": 23489 }, { "epoch": 0.4293783245288537, "grad_norm": 9.105643662289777, "learning_rate": 6.369083235219413e-06, "loss": 18.2581, "step": 23490 }, { "epoch": 0.42939660372530025, "grad_norm": 6.688581317154519, "learning_rate": 6.3687985342777115e-06, "loss": 17.6537, "step": 23491 }, { "epoch": 0.4294148829217468, "grad_norm": 6.440538128411903, "learning_rate": 6.368513828538491e-06, "loss": 17.3301, "step": 23492 }, { "epoch": 0.42943316211819327, "grad_norm": 6.289178477966556, "learning_rate": 6.368229118002746e-06, "loss": 17.5724, "step": 23493 }, { "epoch": 0.4294514413146398, "grad_norm": 5.8884013131330795, "learning_rate": 6.367944402671479e-06, "loss": 17.3068, "step": 23494 }, { "epoch": 0.42946972051108634, "grad_norm": 7.201165273310998, "learning_rate": 6.367659682545685e-06, "loss": 18.0724, "step": 23495 }, { "epoch": 0.4294879997075329, "grad_norm": 6.013576105633485, "learning_rate": 6.367374957626362e-06, "loss": 17.3887, "step": 23496 }, { "epoch": 0.42950627890397935, "grad_norm": 5.994183588812937, "learning_rate": 6.36709022791451e-06, "loss": 17.4696, "step": 23497 }, { "epoch": 0.4295245581004259, "grad_norm": 5.918961348027402, "learning_rate": 6.366805493411122e-06, "loss": 17.4055, "step": 23498 }, { "epoch": 0.4295428372968724, "grad_norm": 6.664741140326103, "learning_rate": 6.366520754117201e-06, "loss": 17.3546, "step": 23499 }, { "epoch": 0.42956111649331896, "grad_norm": 6.489303887878604, "learning_rate": 6.366236010033745e-06, "loss": 17.6144, "step": 23500 }, { "epoch": 0.4295793956897655, "grad_norm": 6.184314106609628, "learning_rate": 6.365951261161749e-06, "loss": 17.7604, "step": 23501 }, { "epoch": 0.429597674886212, "grad_norm": 7.7819691350396845, "learning_rate": 6.365666507502213e-06, "loss": 18.0776, "step": 23502 }, { "epoch": 0.4296159540826585, "grad_norm": 7.812641912064223, "learning_rate": 6.365381749056132e-06, "loss": 17.8831, "step": 23503 }, { "epoch": 0.42963423327910505, "grad_norm": 6.9714881228046695, "learning_rate": 6.365096985824509e-06, "loss": 17.7983, "step": 23504 }, { "epoch": 0.4296525124755516, "grad_norm": 5.83690403922629, "learning_rate": 6.364812217808339e-06, "loss": 17.2189, "step": 23505 }, { "epoch": 0.4296707916719981, "grad_norm": 8.227452114088974, "learning_rate": 6.36452744500862e-06, "loss": 18.3677, "step": 23506 }, { "epoch": 0.4296890708684446, "grad_norm": 5.292856627022733, "learning_rate": 6.364242667426351e-06, "loss": 17.0268, "step": 23507 }, { "epoch": 0.42970735006489114, "grad_norm": 7.44315320327994, "learning_rate": 6.3639578850625305e-06, "loss": 18.1939, "step": 23508 }, { "epoch": 0.42972562926133767, "grad_norm": 8.495513373816406, "learning_rate": 6.363673097918155e-06, "loss": 18.83, "step": 23509 }, { "epoch": 0.4297439084577842, "grad_norm": 5.843429288037855, "learning_rate": 6.3633883059942246e-06, "loss": 17.4012, "step": 23510 }, { "epoch": 0.42976218765423074, "grad_norm": 5.653508601303307, "learning_rate": 6.363103509291735e-06, "loss": 17.3988, "step": 23511 }, { "epoch": 0.4297804668506772, "grad_norm": 5.9362537578750905, "learning_rate": 6.362818707811687e-06, "loss": 17.495, "step": 23512 }, { "epoch": 0.42979874604712376, "grad_norm": 5.418770510451064, "learning_rate": 6.362533901555078e-06, "loss": 17.0801, "step": 23513 }, { "epoch": 0.4298170252435703, "grad_norm": 7.009895044952832, "learning_rate": 6.362249090522906e-06, "loss": 17.8629, "step": 23514 }, { "epoch": 0.42983530444001683, "grad_norm": 7.28416435772863, "learning_rate": 6.361964274716168e-06, "loss": 18.1369, "step": 23515 }, { "epoch": 0.42985358363646337, "grad_norm": 6.379897454566321, "learning_rate": 6.361679454135863e-06, "loss": 17.4548, "step": 23516 }, { "epoch": 0.42987186283290985, "grad_norm": 6.540674653398182, "learning_rate": 6.361394628782991e-06, "loss": 17.6663, "step": 23517 }, { "epoch": 0.4298901420293564, "grad_norm": 6.393240692581243, "learning_rate": 6.361109798658549e-06, "loss": 17.5582, "step": 23518 }, { "epoch": 0.4299084212258029, "grad_norm": 5.943618125336763, "learning_rate": 6.360824963763535e-06, "loss": 17.4567, "step": 23519 }, { "epoch": 0.42992670042224945, "grad_norm": 5.73368450021337, "learning_rate": 6.3605401240989485e-06, "loss": 17.2038, "step": 23520 }, { "epoch": 0.429944979618696, "grad_norm": 6.206420853309497, "learning_rate": 6.360255279665785e-06, "loss": 17.5609, "step": 23521 }, { "epoch": 0.42996325881514247, "grad_norm": 7.4257023033097305, "learning_rate": 6.359970430465045e-06, "loss": 18.1641, "step": 23522 }, { "epoch": 0.429981538011589, "grad_norm": 6.678247074651477, "learning_rate": 6.359685576497727e-06, "loss": 17.5187, "step": 23523 }, { "epoch": 0.42999981720803554, "grad_norm": 5.184782187513316, "learning_rate": 6.35940071776483e-06, "loss": 17.003, "step": 23524 }, { "epoch": 0.4300180964044821, "grad_norm": 6.271490413621714, "learning_rate": 6.359115854267351e-06, "loss": 17.4747, "step": 23525 }, { "epoch": 0.4300363756009286, "grad_norm": 7.708380800894812, "learning_rate": 6.358830986006288e-06, "loss": 17.8523, "step": 23526 }, { "epoch": 0.4300546547973751, "grad_norm": 6.687626777871401, "learning_rate": 6.358546112982642e-06, "loss": 17.3331, "step": 23527 }, { "epoch": 0.4300729339938216, "grad_norm": 6.3734659973425885, "learning_rate": 6.358261235197409e-06, "loss": 17.7526, "step": 23528 }, { "epoch": 0.43009121319026816, "grad_norm": 6.961532529100626, "learning_rate": 6.357976352651588e-06, "loss": 18.1992, "step": 23529 }, { "epoch": 0.4301094923867147, "grad_norm": 7.342318940314222, "learning_rate": 6.357691465346176e-06, "loss": 17.9878, "step": 23530 }, { "epoch": 0.4301277715831612, "grad_norm": 4.868647472607592, "learning_rate": 6.357406573282177e-06, "loss": 16.9925, "step": 23531 }, { "epoch": 0.4301460507796077, "grad_norm": 7.720392733134378, "learning_rate": 6.3571216764605834e-06, "loss": 18.1204, "step": 23532 }, { "epoch": 0.43016432997605425, "grad_norm": 6.0582095173374935, "learning_rate": 6.356836774882395e-06, "loss": 17.3617, "step": 23533 }, { "epoch": 0.4301826091725008, "grad_norm": 6.294570966416092, "learning_rate": 6.356551868548614e-06, "loss": 17.5776, "step": 23534 }, { "epoch": 0.4302008883689473, "grad_norm": 6.149519870982751, "learning_rate": 6.356266957460235e-06, "loss": 17.7005, "step": 23535 }, { "epoch": 0.4302191675653938, "grad_norm": 6.666495115279745, "learning_rate": 6.355982041618258e-06, "loss": 17.6561, "step": 23536 }, { "epoch": 0.43023744676184034, "grad_norm": 5.5696277194670225, "learning_rate": 6.355697121023681e-06, "loss": 17.2011, "step": 23537 }, { "epoch": 0.43025572595828687, "grad_norm": 6.57652460180183, "learning_rate": 6.355412195677505e-06, "loss": 17.9103, "step": 23538 }, { "epoch": 0.4302740051547334, "grad_norm": 5.353515828827387, "learning_rate": 6.355127265580726e-06, "loss": 17.2027, "step": 23539 }, { "epoch": 0.43029228435117994, "grad_norm": 6.7153313542503605, "learning_rate": 6.354842330734343e-06, "loss": 17.6947, "step": 23540 }, { "epoch": 0.4303105635476264, "grad_norm": 6.61578978784584, "learning_rate": 6.354557391139356e-06, "loss": 17.4198, "step": 23541 }, { "epoch": 0.43032884274407296, "grad_norm": 7.698702025652604, "learning_rate": 6.354272446796763e-06, "loss": 18.6244, "step": 23542 }, { "epoch": 0.4303471219405195, "grad_norm": 5.020434350955654, "learning_rate": 6.353987497707561e-06, "loss": 17.1536, "step": 23543 }, { "epoch": 0.43036540113696603, "grad_norm": 6.580445026362194, "learning_rate": 6.353702543872752e-06, "loss": 17.8923, "step": 23544 }, { "epoch": 0.43038368033341257, "grad_norm": 6.9063397978375365, "learning_rate": 6.353417585293333e-06, "loss": 17.7039, "step": 23545 }, { "epoch": 0.43040195952985905, "grad_norm": 5.466611397350951, "learning_rate": 6.353132621970302e-06, "loss": 17.0673, "step": 23546 }, { "epoch": 0.4304202387263056, "grad_norm": 6.117090436525762, "learning_rate": 6.352847653904659e-06, "loss": 17.4691, "step": 23547 }, { "epoch": 0.4304385179227521, "grad_norm": 8.191070441242521, "learning_rate": 6.352562681097402e-06, "loss": 18.4792, "step": 23548 }, { "epoch": 0.43045679711919865, "grad_norm": 5.184905925924164, "learning_rate": 6.352277703549532e-06, "loss": 17.1745, "step": 23549 }, { "epoch": 0.4304750763156452, "grad_norm": 6.102763577036956, "learning_rate": 6.351992721262044e-06, "loss": 17.6115, "step": 23550 }, { "epoch": 0.43049335551209167, "grad_norm": 7.700583774581458, "learning_rate": 6.351707734235939e-06, "loss": 17.936, "step": 23551 }, { "epoch": 0.4305116347085382, "grad_norm": 6.013999561214821, "learning_rate": 6.351422742472215e-06, "loss": 17.4293, "step": 23552 }, { "epoch": 0.43052991390498474, "grad_norm": 6.4355427076619325, "learning_rate": 6.351137745971874e-06, "loss": 17.4397, "step": 23553 }, { "epoch": 0.4305481931014313, "grad_norm": 6.155661478665161, "learning_rate": 6.35085274473591e-06, "loss": 17.3839, "step": 23554 }, { "epoch": 0.4305664722978778, "grad_norm": 6.448929461968095, "learning_rate": 6.350567738765325e-06, "loss": 17.6711, "step": 23555 }, { "epoch": 0.4305847514943243, "grad_norm": 5.863524683486678, "learning_rate": 6.350282728061119e-06, "loss": 17.3323, "step": 23556 }, { "epoch": 0.4306030306907708, "grad_norm": 6.371609691187254, "learning_rate": 6.349997712624287e-06, "loss": 17.302, "step": 23557 }, { "epoch": 0.43062130988721736, "grad_norm": 7.076900386101137, "learning_rate": 6.34971269245583e-06, "loss": 17.5653, "step": 23558 }, { "epoch": 0.4306395890836639, "grad_norm": 7.148225022313409, "learning_rate": 6.34942766755675e-06, "loss": 17.9312, "step": 23559 }, { "epoch": 0.43065786828011043, "grad_norm": 7.5029938852357025, "learning_rate": 6.349142637928041e-06, "loss": 17.7849, "step": 23560 }, { "epoch": 0.4306761474765569, "grad_norm": 5.972651678416425, "learning_rate": 6.348857603570704e-06, "loss": 17.5773, "step": 23561 }, { "epoch": 0.43069442667300345, "grad_norm": 6.354691840650409, "learning_rate": 6.3485725644857375e-06, "loss": 17.5921, "step": 23562 }, { "epoch": 0.43071270586945, "grad_norm": 7.019444950097048, "learning_rate": 6.348287520674144e-06, "loss": 17.8997, "step": 23563 }, { "epoch": 0.4307309850658965, "grad_norm": 5.534087623630858, "learning_rate": 6.3480024721369175e-06, "loss": 17.2789, "step": 23564 }, { "epoch": 0.430749264262343, "grad_norm": 6.998121395021827, "learning_rate": 6.347717418875059e-06, "loss": 18.0811, "step": 23565 }, { "epoch": 0.43076754345878954, "grad_norm": 6.394144033047483, "learning_rate": 6.347432360889569e-06, "loss": 17.8028, "step": 23566 }, { "epoch": 0.4307858226552361, "grad_norm": 6.197785823895567, "learning_rate": 6.3471472981814455e-06, "loss": 17.3168, "step": 23567 }, { "epoch": 0.4308041018516826, "grad_norm": 7.082447720214877, "learning_rate": 6.346862230751687e-06, "loss": 18.2345, "step": 23568 }, { "epoch": 0.43082238104812914, "grad_norm": 6.853089912572116, "learning_rate": 6.346577158601295e-06, "loss": 17.658, "step": 23569 }, { "epoch": 0.4308406602445756, "grad_norm": 6.759346800709279, "learning_rate": 6.346292081731263e-06, "loss": 17.6147, "step": 23570 }, { "epoch": 0.43085893944102216, "grad_norm": 5.888435230387981, "learning_rate": 6.346007000142597e-06, "loss": 17.5343, "step": 23571 }, { "epoch": 0.4308772186374687, "grad_norm": 6.506146373043827, "learning_rate": 6.345721913836293e-06, "loss": 17.7227, "step": 23572 }, { "epoch": 0.43089549783391523, "grad_norm": 6.400574972811731, "learning_rate": 6.345436822813351e-06, "loss": 17.5505, "step": 23573 }, { "epoch": 0.43091377703036177, "grad_norm": 6.925905266755376, "learning_rate": 6.345151727074769e-06, "loss": 17.981, "step": 23574 }, { "epoch": 0.43093205622680825, "grad_norm": 6.203663754888339, "learning_rate": 6.344866626621545e-06, "loss": 17.6513, "step": 23575 }, { "epoch": 0.4309503354232548, "grad_norm": 9.25854042975842, "learning_rate": 6.3445815214546835e-06, "loss": 17.6923, "step": 23576 }, { "epoch": 0.4309686146197013, "grad_norm": 5.340424561620956, "learning_rate": 6.34429641157518e-06, "loss": 17.0434, "step": 23577 }, { "epoch": 0.43098689381614785, "grad_norm": 7.223076131341303, "learning_rate": 6.344011296984032e-06, "loss": 17.7422, "step": 23578 }, { "epoch": 0.4310051730125944, "grad_norm": 7.421662114646333, "learning_rate": 6.343726177682242e-06, "loss": 18.237, "step": 23579 }, { "epoch": 0.43102345220904087, "grad_norm": 6.412888405338289, "learning_rate": 6.343441053670809e-06, "loss": 17.768, "step": 23580 }, { "epoch": 0.4310417314054874, "grad_norm": 5.686539067838539, "learning_rate": 6.343155924950731e-06, "loss": 17.561, "step": 23581 }, { "epoch": 0.43106001060193394, "grad_norm": 6.2577164798291305, "learning_rate": 6.3428707915230084e-06, "loss": 17.3655, "step": 23582 }, { "epoch": 0.4310782897983805, "grad_norm": 6.478114533256353, "learning_rate": 6.342585653388641e-06, "loss": 17.7416, "step": 23583 }, { "epoch": 0.431096568994827, "grad_norm": 6.864987302960307, "learning_rate": 6.3423005105486255e-06, "loss": 17.835, "step": 23584 }, { "epoch": 0.4311148481912735, "grad_norm": 7.418753310101508, "learning_rate": 6.342015363003964e-06, "loss": 17.3938, "step": 23585 }, { "epoch": 0.43113312738772, "grad_norm": 6.927264000389076, "learning_rate": 6.341730210755656e-06, "loss": 17.8835, "step": 23586 }, { "epoch": 0.43115140658416656, "grad_norm": 6.540024975991596, "learning_rate": 6.3414450538047e-06, "loss": 17.5499, "step": 23587 }, { "epoch": 0.4311696857806131, "grad_norm": 6.128366513281492, "learning_rate": 6.341159892152094e-06, "loss": 17.327, "step": 23588 }, { "epoch": 0.43118796497705963, "grad_norm": 6.531556646376057, "learning_rate": 6.340874725798839e-06, "loss": 17.3907, "step": 23589 }, { "epoch": 0.4312062441735061, "grad_norm": 5.874498961371777, "learning_rate": 6.340589554745936e-06, "loss": 17.3448, "step": 23590 }, { "epoch": 0.43122452336995265, "grad_norm": 6.398861232659166, "learning_rate": 6.340304378994382e-06, "loss": 17.3126, "step": 23591 }, { "epoch": 0.4312428025663992, "grad_norm": 6.822006102054159, "learning_rate": 6.340019198545177e-06, "loss": 17.5016, "step": 23592 }, { "epoch": 0.4312610817628457, "grad_norm": 5.416352710818658, "learning_rate": 6.339734013399323e-06, "loss": 17.344, "step": 23593 }, { "epoch": 0.43127936095929226, "grad_norm": 5.854315251684952, "learning_rate": 6.339448823557816e-06, "loss": 17.5095, "step": 23594 }, { "epoch": 0.43129764015573874, "grad_norm": 4.862848523644214, "learning_rate": 6.339163629021656e-06, "loss": 16.6493, "step": 23595 }, { "epoch": 0.4313159193521853, "grad_norm": 7.724796247332725, "learning_rate": 6.338878429791846e-06, "loss": 18.062, "step": 23596 }, { "epoch": 0.4313341985486318, "grad_norm": 6.703452027022384, "learning_rate": 6.338593225869382e-06, "loss": 17.9162, "step": 23597 }, { "epoch": 0.43135247774507834, "grad_norm": 6.360853912618733, "learning_rate": 6.338308017255265e-06, "loss": 17.6445, "step": 23598 }, { "epoch": 0.4313707569415248, "grad_norm": 5.146724697823028, "learning_rate": 6.338022803950495e-06, "loss": 16.985, "step": 23599 }, { "epoch": 0.43138903613797136, "grad_norm": 6.493735869695229, "learning_rate": 6.337737585956072e-06, "loss": 17.4252, "step": 23600 }, { "epoch": 0.4314073153344179, "grad_norm": 6.177591743726019, "learning_rate": 6.337452363272994e-06, "loss": 17.499, "step": 23601 }, { "epoch": 0.43142559453086443, "grad_norm": 5.310687994515556, "learning_rate": 6.3371671359022595e-06, "loss": 16.9643, "step": 23602 }, { "epoch": 0.43144387372731097, "grad_norm": 6.164661027840266, "learning_rate": 6.336881903844872e-06, "loss": 17.3908, "step": 23603 }, { "epoch": 0.43146215292375745, "grad_norm": 6.258563344737975, "learning_rate": 6.33659666710183e-06, "loss": 17.2719, "step": 23604 }, { "epoch": 0.431480432120204, "grad_norm": 6.074955619878194, "learning_rate": 6.336311425674132e-06, "loss": 17.3799, "step": 23605 }, { "epoch": 0.4314987113166505, "grad_norm": 6.905842635532751, "learning_rate": 6.336026179562777e-06, "loss": 17.6053, "step": 23606 }, { "epoch": 0.43151699051309705, "grad_norm": 5.6531995983639165, "learning_rate": 6.335740928768769e-06, "loss": 17.2047, "step": 23607 }, { "epoch": 0.4315352697095436, "grad_norm": 6.341681698187891, "learning_rate": 6.335455673293102e-06, "loss": 17.4688, "step": 23608 }, { "epoch": 0.43155354890599007, "grad_norm": 6.6960972846629, "learning_rate": 6.335170413136782e-06, "loss": 17.6311, "step": 23609 }, { "epoch": 0.4315718281024366, "grad_norm": 5.425467928272902, "learning_rate": 6.3348851483008034e-06, "loss": 17.2715, "step": 23610 }, { "epoch": 0.43159010729888314, "grad_norm": 5.786997763853015, "learning_rate": 6.334599878786169e-06, "loss": 17.0508, "step": 23611 }, { "epoch": 0.4316083864953297, "grad_norm": 7.2793864873006315, "learning_rate": 6.334314604593877e-06, "loss": 17.8548, "step": 23612 }, { "epoch": 0.4316266656917762, "grad_norm": 7.507989362870152, "learning_rate": 6.334029325724928e-06, "loss": 17.5372, "step": 23613 }, { "epoch": 0.4316449448882227, "grad_norm": 6.007074653945293, "learning_rate": 6.333744042180324e-06, "loss": 17.4673, "step": 23614 }, { "epoch": 0.43166322408466923, "grad_norm": 7.066863931780278, "learning_rate": 6.3334587539610616e-06, "loss": 17.6358, "step": 23615 }, { "epoch": 0.43168150328111576, "grad_norm": 5.730963674864685, "learning_rate": 6.33317346106814e-06, "loss": 17.2837, "step": 23616 }, { "epoch": 0.4316997824775623, "grad_norm": 6.995700824601071, "learning_rate": 6.3328881635025645e-06, "loss": 17.9608, "step": 23617 }, { "epoch": 0.43171806167400884, "grad_norm": 7.683474690210917, "learning_rate": 6.33260286126533e-06, "loss": 17.9472, "step": 23618 }, { "epoch": 0.4317363408704553, "grad_norm": 5.3997313453108005, "learning_rate": 6.332317554357439e-06, "loss": 17.3005, "step": 23619 }, { "epoch": 0.43175462006690185, "grad_norm": 7.647251536137354, "learning_rate": 6.332032242779888e-06, "loss": 18.0896, "step": 23620 }, { "epoch": 0.4317728992633484, "grad_norm": 6.0922444223845025, "learning_rate": 6.3317469265336825e-06, "loss": 17.2557, "step": 23621 }, { "epoch": 0.4317911784597949, "grad_norm": 6.41205927824415, "learning_rate": 6.331461605619819e-06, "loss": 17.6117, "step": 23622 }, { "epoch": 0.43180945765624146, "grad_norm": 6.974559947009274, "learning_rate": 6.331176280039297e-06, "loss": 17.9107, "step": 23623 }, { "epoch": 0.43182773685268794, "grad_norm": 6.111808477909253, "learning_rate": 6.330890949793118e-06, "loss": 17.6125, "step": 23624 }, { "epoch": 0.4318460160491345, "grad_norm": 6.386831922776062, "learning_rate": 6.330605614882282e-06, "loss": 17.3546, "step": 23625 }, { "epoch": 0.431864295245581, "grad_norm": 5.6752321721912455, "learning_rate": 6.330320275307788e-06, "loss": 17.4309, "step": 23626 }, { "epoch": 0.43188257444202754, "grad_norm": 6.8700104537621645, "learning_rate": 6.3300349310706385e-06, "loss": 17.7857, "step": 23627 }, { "epoch": 0.4319008536384741, "grad_norm": 5.587201181943335, "learning_rate": 6.329749582171831e-06, "loss": 17.4384, "step": 23628 }, { "epoch": 0.43191913283492056, "grad_norm": 7.180760901175978, "learning_rate": 6.329464228612366e-06, "loss": 17.8585, "step": 23629 }, { "epoch": 0.4319374120313671, "grad_norm": 6.486207732412409, "learning_rate": 6.329178870393245e-06, "loss": 17.4598, "step": 23630 }, { "epoch": 0.43195569122781363, "grad_norm": 5.825725309102074, "learning_rate": 6.328893507515469e-06, "loss": 17.2494, "step": 23631 }, { "epoch": 0.43197397042426017, "grad_norm": 6.5699124501434065, "learning_rate": 6.328608139980035e-06, "loss": 17.4834, "step": 23632 }, { "epoch": 0.43199224962070665, "grad_norm": 5.819090964177129, "learning_rate": 6.328322767787944e-06, "loss": 17.1533, "step": 23633 }, { "epoch": 0.4320105288171532, "grad_norm": 5.410143388116245, "learning_rate": 6.328037390940196e-06, "loss": 17.1068, "step": 23634 }, { "epoch": 0.4320288080135997, "grad_norm": 5.6812501974864675, "learning_rate": 6.327752009437795e-06, "loss": 17.2393, "step": 23635 }, { "epoch": 0.43204708721004625, "grad_norm": 5.394884613321816, "learning_rate": 6.327466623281737e-06, "loss": 17.105, "step": 23636 }, { "epoch": 0.4320653664064928, "grad_norm": 6.279848727586088, "learning_rate": 6.3271812324730246e-06, "loss": 17.5912, "step": 23637 }, { "epoch": 0.43208364560293927, "grad_norm": 7.0200001625514865, "learning_rate": 6.326895837012657e-06, "loss": 17.8785, "step": 23638 }, { "epoch": 0.4321019247993858, "grad_norm": 6.6011727904737, "learning_rate": 6.326610436901633e-06, "loss": 17.6225, "step": 23639 }, { "epoch": 0.43212020399583234, "grad_norm": 9.000078204813386, "learning_rate": 6.3263250321409565e-06, "loss": 18.1729, "step": 23640 }, { "epoch": 0.4321384831922789, "grad_norm": 6.494816833204111, "learning_rate": 6.326039622731625e-06, "loss": 17.5466, "step": 23641 }, { "epoch": 0.4321567623887254, "grad_norm": 7.552311891076966, "learning_rate": 6.325754208674639e-06, "loss": 18.2418, "step": 23642 }, { "epoch": 0.4321750415851719, "grad_norm": 4.660648391971336, "learning_rate": 6.325468789971e-06, "loss": 17.0135, "step": 23643 }, { "epoch": 0.43219332078161843, "grad_norm": 7.794188448478198, "learning_rate": 6.325183366621708e-06, "loss": 18.2133, "step": 23644 }, { "epoch": 0.43221159997806496, "grad_norm": 6.958218183860368, "learning_rate": 6.324897938627764e-06, "loss": 17.6694, "step": 23645 }, { "epoch": 0.4322298791745115, "grad_norm": 5.308723604304931, "learning_rate": 6.3246125059901675e-06, "loss": 17.1629, "step": 23646 }, { "epoch": 0.43224815837095804, "grad_norm": 6.898854752428574, "learning_rate": 6.324327068709919e-06, "loss": 17.6634, "step": 23647 }, { "epoch": 0.4322664375674045, "grad_norm": 5.657987460756355, "learning_rate": 6.3240416267880176e-06, "loss": 17.4149, "step": 23648 }, { "epoch": 0.43228471676385105, "grad_norm": 6.514028981464109, "learning_rate": 6.323756180225467e-06, "loss": 17.4828, "step": 23649 }, { "epoch": 0.4323029959602976, "grad_norm": 6.3316402606485065, "learning_rate": 6.323470729023265e-06, "loss": 17.4596, "step": 23650 }, { "epoch": 0.4323212751567441, "grad_norm": 6.1611782907205495, "learning_rate": 6.323185273182414e-06, "loss": 17.7437, "step": 23651 }, { "epoch": 0.43233955435319066, "grad_norm": 6.204323257004265, "learning_rate": 6.322899812703912e-06, "loss": 17.7884, "step": 23652 }, { "epoch": 0.43235783354963714, "grad_norm": 6.031031266139402, "learning_rate": 6.3226143475887615e-06, "loss": 17.5877, "step": 23653 }, { "epoch": 0.4323761127460837, "grad_norm": 6.642578101931615, "learning_rate": 6.322328877837962e-06, "loss": 17.8141, "step": 23654 }, { "epoch": 0.4323943919425302, "grad_norm": 6.232290290975836, "learning_rate": 6.322043403452516e-06, "loss": 17.6875, "step": 23655 }, { "epoch": 0.43241267113897675, "grad_norm": 6.447425512472471, "learning_rate": 6.321757924433423e-06, "loss": 17.7279, "step": 23656 }, { "epoch": 0.4324309503354233, "grad_norm": 5.546854156608528, "learning_rate": 6.32147244078168e-06, "loss": 17.178, "step": 23657 }, { "epoch": 0.43244922953186976, "grad_norm": 7.348328477775804, "learning_rate": 6.321186952498292e-06, "loss": 18.0173, "step": 23658 }, { "epoch": 0.4324675087283163, "grad_norm": 6.889108306857017, "learning_rate": 6.320901459584261e-06, "loss": 17.8659, "step": 23659 }, { "epoch": 0.43248578792476283, "grad_norm": 6.132608183396902, "learning_rate": 6.320615962040582e-06, "loss": 17.4422, "step": 23660 }, { "epoch": 0.43250406712120937, "grad_norm": 6.72385031976017, "learning_rate": 6.32033045986826e-06, "loss": 17.7272, "step": 23661 }, { "epoch": 0.4325223463176559, "grad_norm": 4.912147979234194, "learning_rate": 6.320044953068292e-06, "loss": 16.9572, "step": 23662 }, { "epoch": 0.4325406255141024, "grad_norm": 6.492503014424679, "learning_rate": 6.319759441641684e-06, "loss": 17.6422, "step": 23663 }, { "epoch": 0.4325589047105489, "grad_norm": 6.552009588590618, "learning_rate": 6.319473925589434e-06, "loss": 17.6427, "step": 23664 }, { "epoch": 0.43257718390699545, "grad_norm": 5.223481546395908, "learning_rate": 6.319188404912539e-06, "loss": 16.9528, "step": 23665 }, { "epoch": 0.432595463103442, "grad_norm": 6.261079903315186, "learning_rate": 6.3189028796120064e-06, "loss": 17.7824, "step": 23666 }, { "epoch": 0.43261374229988847, "grad_norm": 7.699282425401338, "learning_rate": 6.318617349688833e-06, "loss": 18.1409, "step": 23667 }, { "epoch": 0.432632021496335, "grad_norm": 6.000838364614353, "learning_rate": 6.3183318151440185e-06, "loss": 17.6232, "step": 23668 }, { "epoch": 0.43265030069278154, "grad_norm": 7.386893603469021, "learning_rate": 6.318046275978568e-06, "loss": 17.883, "step": 23669 }, { "epoch": 0.4326685798892281, "grad_norm": 5.803587478337489, "learning_rate": 6.317760732193476e-06, "loss": 17.1153, "step": 23670 }, { "epoch": 0.4326868590856746, "grad_norm": 5.739835343185507, "learning_rate": 6.317475183789749e-06, "loss": 17.4805, "step": 23671 }, { "epoch": 0.4327051382821211, "grad_norm": 6.590862740531634, "learning_rate": 6.317189630768387e-06, "loss": 17.5096, "step": 23672 }, { "epoch": 0.43272341747856763, "grad_norm": 6.227370853172004, "learning_rate": 6.31690407313039e-06, "loss": 17.3566, "step": 23673 }, { "epoch": 0.43274169667501416, "grad_norm": 7.471506976174249, "learning_rate": 6.316618510876756e-06, "loss": 17.8985, "step": 23674 }, { "epoch": 0.4327599758714607, "grad_norm": 6.930991009726059, "learning_rate": 6.316332944008489e-06, "loss": 17.7353, "step": 23675 }, { "epoch": 0.43277825506790724, "grad_norm": 8.710427864066192, "learning_rate": 6.31604737252659e-06, "loss": 17.9213, "step": 23676 }, { "epoch": 0.4327965342643537, "grad_norm": 7.255891957310312, "learning_rate": 6.315761796432059e-06, "loss": 18.1452, "step": 23677 }, { "epoch": 0.43281481346080025, "grad_norm": 7.331456921649562, "learning_rate": 6.315476215725898e-06, "loss": 18.0464, "step": 23678 }, { "epoch": 0.4328330926572468, "grad_norm": 5.865152578344326, "learning_rate": 6.3151906304091044e-06, "loss": 17.3125, "step": 23679 }, { "epoch": 0.4328513718536933, "grad_norm": 6.2557696130679314, "learning_rate": 6.314905040482684e-06, "loss": 17.2814, "step": 23680 }, { "epoch": 0.43286965105013986, "grad_norm": 6.979713538355563, "learning_rate": 6.314619445947635e-06, "loss": 17.9718, "step": 23681 }, { "epoch": 0.43288793024658634, "grad_norm": 6.949967968444671, "learning_rate": 6.314333846804958e-06, "loss": 17.8274, "step": 23682 }, { "epoch": 0.4329062094430329, "grad_norm": 7.340115865364658, "learning_rate": 6.3140482430556575e-06, "loss": 18.0253, "step": 23683 }, { "epoch": 0.4329244886394794, "grad_norm": 6.784555261414443, "learning_rate": 6.3137626347007285e-06, "loss": 17.7889, "step": 23684 }, { "epoch": 0.43294276783592595, "grad_norm": 7.6079277560130905, "learning_rate": 6.313477021741177e-06, "loss": 18.6101, "step": 23685 }, { "epoch": 0.4329610470323725, "grad_norm": 7.2347278507334245, "learning_rate": 6.313191404178003e-06, "loss": 17.5927, "step": 23686 }, { "epoch": 0.43297932622881896, "grad_norm": 6.8709107838501025, "learning_rate": 6.312905782012208e-06, "loss": 17.9347, "step": 23687 }, { "epoch": 0.4329976054252655, "grad_norm": 6.193055591317017, "learning_rate": 6.312620155244791e-06, "loss": 18.3008, "step": 23688 }, { "epoch": 0.43301588462171203, "grad_norm": 7.119030086685474, "learning_rate": 6.312334523876753e-06, "loss": 17.7394, "step": 23689 }, { "epoch": 0.43303416381815857, "grad_norm": 8.173876144565408, "learning_rate": 6.312048887909098e-06, "loss": 18.3403, "step": 23690 }, { "epoch": 0.4330524430146051, "grad_norm": 4.51053767601419, "learning_rate": 6.311763247342824e-06, "loss": 16.9699, "step": 23691 }, { "epoch": 0.4330707222110516, "grad_norm": 5.552189046937376, "learning_rate": 6.311477602178936e-06, "loss": 17.2013, "step": 23692 }, { "epoch": 0.4330890014074981, "grad_norm": 5.192470755949138, "learning_rate": 6.31119195241843e-06, "loss": 16.9921, "step": 23693 }, { "epoch": 0.43310728060394466, "grad_norm": 5.885915132582066, "learning_rate": 6.310906298062313e-06, "loss": 17.48, "step": 23694 }, { "epoch": 0.4331255598003912, "grad_norm": 6.116542001462024, "learning_rate": 6.310620639111581e-06, "loss": 17.4715, "step": 23695 }, { "epoch": 0.4331438389968377, "grad_norm": 7.1573774379069155, "learning_rate": 6.310334975567238e-06, "loss": 18.1411, "step": 23696 }, { "epoch": 0.4331621181932842, "grad_norm": 6.87937979468646, "learning_rate": 6.310049307430285e-06, "loss": 17.7575, "step": 23697 }, { "epoch": 0.43318039738973074, "grad_norm": 6.805774397697574, "learning_rate": 6.309763634701722e-06, "loss": 17.7559, "step": 23698 }, { "epoch": 0.4331986765861773, "grad_norm": 6.354744043821025, "learning_rate": 6.309477957382551e-06, "loss": 17.8381, "step": 23699 }, { "epoch": 0.4332169557826238, "grad_norm": 6.263618172154704, "learning_rate": 6.309192275473776e-06, "loss": 17.4542, "step": 23700 }, { "epoch": 0.4332352349790703, "grad_norm": 5.9982633904496945, "learning_rate": 6.308906588976393e-06, "loss": 17.245, "step": 23701 }, { "epoch": 0.43325351417551683, "grad_norm": 6.403164628163186, "learning_rate": 6.3086208978914055e-06, "loss": 17.5769, "step": 23702 }, { "epoch": 0.43327179337196337, "grad_norm": 5.2043635896948865, "learning_rate": 6.3083352022198176e-06, "loss": 16.9292, "step": 23703 }, { "epoch": 0.4332900725684099, "grad_norm": 5.8461032049150266, "learning_rate": 6.308049501962628e-06, "loss": 17.3624, "step": 23704 }, { "epoch": 0.43330835176485644, "grad_norm": 6.958185011318118, "learning_rate": 6.3077637971208376e-06, "loss": 17.3023, "step": 23705 }, { "epoch": 0.4333266309613029, "grad_norm": 6.964875055633574, "learning_rate": 6.307478087695448e-06, "loss": 17.7401, "step": 23706 }, { "epoch": 0.43334491015774945, "grad_norm": 7.906333688005333, "learning_rate": 6.307192373687462e-06, "loss": 18.0221, "step": 23707 }, { "epoch": 0.433363189354196, "grad_norm": 6.303503673044512, "learning_rate": 6.3069066550978795e-06, "loss": 17.6923, "step": 23708 }, { "epoch": 0.4333814685506425, "grad_norm": 7.393071072362138, "learning_rate": 6.306620931927702e-06, "loss": 17.565, "step": 23709 }, { "epoch": 0.43339974774708906, "grad_norm": 6.959454472348956, "learning_rate": 6.306335204177933e-06, "loss": 17.7927, "step": 23710 }, { "epoch": 0.43341802694353554, "grad_norm": 6.3942945657447545, "learning_rate": 6.306049471849572e-06, "loss": 17.4261, "step": 23711 }, { "epoch": 0.4334363061399821, "grad_norm": 7.0891884242612795, "learning_rate": 6.305763734943622e-06, "loss": 18.0542, "step": 23712 }, { "epoch": 0.4334545853364286, "grad_norm": 5.988720333390101, "learning_rate": 6.3054779934610825e-06, "loss": 17.3924, "step": 23713 }, { "epoch": 0.43347286453287515, "grad_norm": 8.290340208252422, "learning_rate": 6.305192247402956e-06, "loss": 18.3084, "step": 23714 }, { "epoch": 0.4334911437293217, "grad_norm": 5.889952083587758, "learning_rate": 6.304906496770244e-06, "loss": 17.2681, "step": 23715 }, { "epoch": 0.43350942292576816, "grad_norm": 6.112561906215684, "learning_rate": 6.304620741563946e-06, "loss": 17.4354, "step": 23716 }, { "epoch": 0.4335277021222147, "grad_norm": 5.97949838288502, "learning_rate": 6.304334981785067e-06, "loss": 17.4104, "step": 23717 }, { "epoch": 0.43354598131866123, "grad_norm": 6.254784076973419, "learning_rate": 6.3040492174346095e-06, "loss": 17.7769, "step": 23718 }, { "epoch": 0.43356426051510777, "grad_norm": 4.91749993441896, "learning_rate": 6.303763448513569e-06, "loss": 16.818, "step": 23719 }, { "epoch": 0.4335825397115543, "grad_norm": 6.556698095861072, "learning_rate": 6.303477675022952e-06, "loss": 17.5761, "step": 23720 }, { "epoch": 0.4336008189080008, "grad_norm": 7.580466775949854, "learning_rate": 6.3031918969637595e-06, "loss": 17.9656, "step": 23721 }, { "epoch": 0.4336190981044473, "grad_norm": 7.378613834067667, "learning_rate": 6.302906114336992e-06, "loss": 17.9768, "step": 23722 }, { "epoch": 0.43363737730089386, "grad_norm": 6.573427134354408, "learning_rate": 6.302620327143652e-06, "loss": 17.6758, "step": 23723 }, { "epoch": 0.4336556564973404, "grad_norm": 7.381865920222753, "learning_rate": 6.3023345353847395e-06, "loss": 17.8383, "step": 23724 }, { "epoch": 0.4336739356937869, "grad_norm": 7.178408294525828, "learning_rate": 6.302048739061258e-06, "loss": 18.1773, "step": 23725 }, { "epoch": 0.4336922148902334, "grad_norm": 6.765000874438852, "learning_rate": 6.30176293817421e-06, "loss": 17.7684, "step": 23726 }, { "epoch": 0.43371049408667994, "grad_norm": 7.228686473083321, "learning_rate": 6.301477132724594e-06, "loss": 17.7485, "step": 23727 }, { "epoch": 0.4337287732831265, "grad_norm": 7.387191203986933, "learning_rate": 6.301191322713416e-06, "loss": 17.9596, "step": 23728 }, { "epoch": 0.433747052479573, "grad_norm": 6.741546671682292, "learning_rate": 6.300905508141672e-06, "loss": 17.6733, "step": 23729 }, { "epoch": 0.43376533167601955, "grad_norm": 6.506066753363428, "learning_rate": 6.30061968901037e-06, "loss": 17.6092, "step": 23730 }, { "epoch": 0.43378361087246603, "grad_norm": 6.6999741709423315, "learning_rate": 6.300333865320507e-06, "loss": 17.7757, "step": 23731 }, { "epoch": 0.43380189006891257, "grad_norm": 6.947325471826711, "learning_rate": 6.300048037073089e-06, "loss": 17.9511, "step": 23732 }, { "epoch": 0.4338201692653591, "grad_norm": 5.874680522608939, "learning_rate": 6.299762204269113e-06, "loss": 17.4567, "step": 23733 }, { "epoch": 0.43383844846180564, "grad_norm": 6.361205809933727, "learning_rate": 6.299476366909583e-06, "loss": 17.7777, "step": 23734 }, { "epoch": 0.4338567276582521, "grad_norm": 8.225822282790286, "learning_rate": 6.299190524995503e-06, "loss": 18.1674, "step": 23735 }, { "epoch": 0.43387500685469865, "grad_norm": 6.356940099536296, "learning_rate": 6.298904678527873e-06, "loss": 17.5669, "step": 23736 }, { "epoch": 0.4338932860511452, "grad_norm": 6.480234310564968, "learning_rate": 6.2986188275076945e-06, "loss": 17.5864, "step": 23737 }, { "epoch": 0.4339115652475917, "grad_norm": 6.6292184884954555, "learning_rate": 6.298332971935968e-06, "loss": 17.2966, "step": 23738 }, { "epoch": 0.43392984444403826, "grad_norm": 6.738738912862232, "learning_rate": 6.298047111813699e-06, "loss": 18.0916, "step": 23739 }, { "epoch": 0.43394812364048474, "grad_norm": 7.26931032448823, "learning_rate": 6.297761247141886e-06, "loss": 18.0442, "step": 23740 }, { "epoch": 0.4339664028369313, "grad_norm": 5.247154909447839, "learning_rate": 6.297475377921534e-06, "loss": 16.9678, "step": 23741 }, { "epoch": 0.4339846820333778, "grad_norm": 6.959207063024884, "learning_rate": 6.297189504153642e-06, "loss": 17.497, "step": 23742 }, { "epoch": 0.43400296122982435, "grad_norm": 6.192873902998268, "learning_rate": 6.296903625839214e-06, "loss": 17.2276, "step": 23743 }, { "epoch": 0.4340212404262709, "grad_norm": 6.8783072443713555, "learning_rate": 6.296617742979251e-06, "loss": 17.8566, "step": 23744 }, { "epoch": 0.43403951962271736, "grad_norm": 8.055878202472636, "learning_rate": 6.296331855574757e-06, "loss": 18.0506, "step": 23745 }, { "epoch": 0.4340577988191639, "grad_norm": 6.284688876324532, "learning_rate": 6.29604596362673e-06, "loss": 17.8828, "step": 23746 }, { "epoch": 0.43407607801561043, "grad_norm": 5.41551190715901, "learning_rate": 6.295760067136177e-06, "loss": 17.1019, "step": 23747 }, { "epoch": 0.43409435721205697, "grad_norm": 6.578470511427789, "learning_rate": 6.295474166104093e-06, "loss": 17.4602, "step": 23748 }, { "epoch": 0.4341126364085035, "grad_norm": 7.0332412085414715, "learning_rate": 6.295188260531488e-06, "loss": 17.862, "step": 23749 }, { "epoch": 0.43413091560495, "grad_norm": 6.817839618855633, "learning_rate": 6.294902350419361e-06, "loss": 17.7536, "step": 23750 }, { "epoch": 0.4341491948013965, "grad_norm": 6.1622146518976315, "learning_rate": 6.2946164357687115e-06, "loss": 17.4708, "step": 23751 }, { "epoch": 0.43416747399784306, "grad_norm": 8.014778122356413, "learning_rate": 6.294330516580545e-06, "loss": 18.0563, "step": 23752 }, { "epoch": 0.4341857531942896, "grad_norm": 6.252125107404907, "learning_rate": 6.294044592855861e-06, "loss": 17.3551, "step": 23753 }, { "epoch": 0.4342040323907361, "grad_norm": 5.6135930489602375, "learning_rate": 6.293758664595664e-06, "loss": 17.2825, "step": 23754 }, { "epoch": 0.4342223115871826, "grad_norm": 7.749024116360044, "learning_rate": 6.2934727318009555e-06, "loss": 17.9782, "step": 23755 }, { "epoch": 0.43424059078362914, "grad_norm": 7.256405670159264, "learning_rate": 6.293186794472736e-06, "loss": 18.049, "step": 23756 }, { "epoch": 0.4342588699800757, "grad_norm": 7.390234266504027, "learning_rate": 6.2929008526120106e-06, "loss": 17.8325, "step": 23757 }, { "epoch": 0.4342771491765222, "grad_norm": 6.2910071211717975, "learning_rate": 6.292614906219778e-06, "loss": 17.4764, "step": 23758 }, { "epoch": 0.43429542837296875, "grad_norm": 6.114919999794618, "learning_rate": 6.292328955297046e-06, "loss": 17.6181, "step": 23759 }, { "epoch": 0.43431370756941523, "grad_norm": 5.737394877965056, "learning_rate": 6.292042999844809e-06, "loss": 17.2908, "step": 23760 }, { "epoch": 0.43433198676586177, "grad_norm": 6.138453097840232, "learning_rate": 6.2917570398640746e-06, "loss": 17.6269, "step": 23761 }, { "epoch": 0.4343502659623083, "grad_norm": 6.406815295454056, "learning_rate": 6.291471075355845e-06, "loss": 17.5962, "step": 23762 }, { "epoch": 0.43436854515875484, "grad_norm": 7.716533458941187, "learning_rate": 6.291185106321121e-06, "loss": 18.1808, "step": 23763 }, { "epoch": 0.4343868243552014, "grad_norm": 7.1176732978572765, "learning_rate": 6.290899132760906e-06, "loss": 17.9156, "step": 23764 }, { "epoch": 0.43440510355164785, "grad_norm": 5.961970929296037, "learning_rate": 6.2906131546761996e-06, "loss": 17.4415, "step": 23765 }, { "epoch": 0.4344233827480944, "grad_norm": 5.569594998420012, "learning_rate": 6.290327172068007e-06, "loss": 17.1737, "step": 23766 }, { "epoch": 0.4344416619445409, "grad_norm": 7.591474637343277, "learning_rate": 6.29004118493733e-06, "loss": 18.2131, "step": 23767 }, { "epoch": 0.43445994114098746, "grad_norm": 13.932519950208663, "learning_rate": 6.28975519328517e-06, "loss": 17.9378, "step": 23768 }, { "epoch": 0.43447822033743394, "grad_norm": 7.058536196305738, "learning_rate": 6.289469197112531e-06, "loss": 17.882, "step": 23769 }, { "epoch": 0.4344964995338805, "grad_norm": 6.365668185885612, "learning_rate": 6.2891831964204116e-06, "loss": 17.5266, "step": 23770 }, { "epoch": 0.434514778730327, "grad_norm": 6.198416145919927, "learning_rate": 6.28889719120982e-06, "loss": 17.3559, "step": 23771 }, { "epoch": 0.43453305792677355, "grad_norm": 7.714164527855773, "learning_rate": 6.288611181481754e-06, "loss": 18.0403, "step": 23772 }, { "epoch": 0.4345513371232201, "grad_norm": 6.179672552393671, "learning_rate": 6.288325167237219e-06, "loss": 17.4772, "step": 23773 }, { "epoch": 0.43456961631966656, "grad_norm": 6.9156131398090235, "learning_rate": 6.2880391484772166e-06, "loss": 17.4294, "step": 23774 }, { "epoch": 0.4345878955161131, "grad_norm": 8.725267812372703, "learning_rate": 6.287753125202744e-06, "loss": 18.3998, "step": 23775 }, { "epoch": 0.43460617471255963, "grad_norm": 7.209562219500126, "learning_rate": 6.287467097414815e-06, "loss": 17.7836, "step": 23776 }, { "epoch": 0.43462445390900617, "grad_norm": 6.774363317910752, "learning_rate": 6.287181065114421e-06, "loss": 17.3588, "step": 23777 }, { "epoch": 0.4346427331054527, "grad_norm": 7.268566592381338, "learning_rate": 6.286895028302571e-06, "loss": 17.9112, "step": 23778 }, { "epoch": 0.4346610123018992, "grad_norm": 7.904588477619206, "learning_rate": 6.286608986980265e-06, "loss": 18.0713, "step": 23779 }, { "epoch": 0.4346792914983457, "grad_norm": 6.923864533436848, "learning_rate": 6.2863229411485064e-06, "loss": 18.0276, "step": 23780 }, { "epoch": 0.43469757069479226, "grad_norm": 6.622258261032361, "learning_rate": 6.286036890808297e-06, "loss": 17.4968, "step": 23781 }, { "epoch": 0.4347158498912388, "grad_norm": 6.380302552900819, "learning_rate": 6.28575083596064e-06, "loss": 17.4397, "step": 23782 }, { "epoch": 0.43473412908768533, "grad_norm": 4.829702097401405, "learning_rate": 6.2854647766065395e-06, "loss": 16.9652, "step": 23783 }, { "epoch": 0.4347524082841318, "grad_norm": 6.179167919777229, "learning_rate": 6.2851787127469935e-06, "loss": 17.3528, "step": 23784 }, { "epoch": 0.43477068748057834, "grad_norm": 6.818738640862289, "learning_rate": 6.284892644383009e-06, "loss": 17.8505, "step": 23785 }, { "epoch": 0.4347889666770249, "grad_norm": 6.829626493712859, "learning_rate": 6.284606571515588e-06, "loss": 17.664, "step": 23786 }, { "epoch": 0.4348072458734714, "grad_norm": 6.360443457777423, "learning_rate": 6.284320494145732e-06, "loss": 17.6461, "step": 23787 }, { "epoch": 0.43482552506991795, "grad_norm": 6.773893232326095, "learning_rate": 6.284034412274445e-06, "loss": 17.6809, "step": 23788 }, { "epoch": 0.43484380426636443, "grad_norm": 6.30655400159841, "learning_rate": 6.283748325902726e-06, "loss": 17.5792, "step": 23789 }, { "epoch": 0.43486208346281097, "grad_norm": 11.005317080610869, "learning_rate": 6.283462235031583e-06, "loss": 17.8486, "step": 23790 }, { "epoch": 0.4348803626592575, "grad_norm": 7.58798293626356, "learning_rate": 6.283176139662016e-06, "loss": 18.1948, "step": 23791 }, { "epoch": 0.43489864185570404, "grad_norm": 6.606259008986435, "learning_rate": 6.282890039795027e-06, "loss": 17.6139, "step": 23792 }, { "epoch": 0.4349169210521506, "grad_norm": 7.7573673955048825, "learning_rate": 6.28260393543162e-06, "loss": 17.9213, "step": 23793 }, { "epoch": 0.43493520024859705, "grad_norm": 5.020639260069999, "learning_rate": 6.282317826572799e-06, "loss": 16.9778, "step": 23794 }, { "epoch": 0.4349534794450436, "grad_norm": 5.935445833306718, "learning_rate": 6.282031713219563e-06, "loss": 17.347, "step": 23795 }, { "epoch": 0.4349717586414901, "grad_norm": 6.553196914098264, "learning_rate": 6.281745595372919e-06, "loss": 17.6077, "step": 23796 }, { "epoch": 0.43499003783793666, "grad_norm": 6.682440196137849, "learning_rate": 6.281459473033867e-06, "loss": 17.7438, "step": 23797 }, { "epoch": 0.4350083170343832, "grad_norm": 5.695638352358544, "learning_rate": 6.2811733462034105e-06, "loss": 17.3181, "step": 23798 }, { "epoch": 0.4350265962308297, "grad_norm": 6.105545382268568, "learning_rate": 6.280887214882553e-06, "loss": 17.5573, "step": 23799 }, { "epoch": 0.4350448754272762, "grad_norm": 5.335979083358556, "learning_rate": 6.280601079072298e-06, "loss": 17.1656, "step": 23800 }, { "epoch": 0.43506315462372275, "grad_norm": 5.198904730747608, "learning_rate": 6.2803149387736464e-06, "loss": 17.2512, "step": 23801 }, { "epoch": 0.4350814338201693, "grad_norm": 7.323196392213511, "learning_rate": 6.2800287939876e-06, "loss": 18.1471, "step": 23802 }, { "epoch": 0.43509971301661576, "grad_norm": 8.272485293683951, "learning_rate": 6.279742644715166e-06, "loss": 18.2761, "step": 23803 }, { "epoch": 0.4351179922130623, "grad_norm": 6.603923717364079, "learning_rate": 6.279456490957346e-06, "loss": 17.671, "step": 23804 }, { "epoch": 0.43513627140950883, "grad_norm": 5.854976550362765, "learning_rate": 6.279170332715141e-06, "loss": 17.2637, "step": 23805 }, { "epoch": 0.43515455060595537, "grad_norm": 7.574194909508295, "learning_rate": 6.2788841699895545e-06, "loss": 18.2218, "step": 23806 }, { "epoch": 0.4351728298024019, "grad_norm": 7.174977581973314, "learning_rate": 6.278598002781591e-06, "loss": 17.6843, "step": 23807 }, { "epoch": 0.4351911089988484, "grad_norm": 6.157702531852071, "learning_rate": 6.278311831092251e-06, "loss": 17.5328, "step": 23808 }, { "epoch": 0.4352093881952949, "grad_norm": 6.641045909320454, "learning_rate": 6.278025654922539e-06, "loss": 17.6615, "step": 23809 }, { "epoch": 0.43522766739174146, "grad_norm": 6.868937306954469, "learning_rate": 6.2777394742734585e-06, "loss": 17.6829, "step": 23810 }, { "epoch": 0.435245946588188, "grad_norm": 6.527845149836127, "learning_rate": 6.277453289146013e-06, "loss": 17.4385, "step": 23811 }, { "epoch": 0.43526422578463453, "grad_norm": 6.613864051417975, "learning_rate": 6.277167099541204e-06, "loss": 17.6872, "step": 23812 }, { "epoch": 0.435282504981081, "grad_norm": 6.637978323084171, "learning_rate": 6.276880905460034e-06, "loss": 17.6415, "step": 23813 }, { "epoch": 0.43530078417752754, "grad_norm": 6.945530451501778, "learning_rate": 6.276594706903509e-06, "loss": 17.9313, "step": 23814 }, { "epoch": 0.4353190633739741, "grad_norm": 7.2642954581539145, "learning_rate": 6.276308503872629e-06, "loss": 17.8815, "step": 23815 }, { "epoch": 0.4353373425704206, "grad_norm": 6.325217827060266, "learning_rate": 6.2760222963683985e-06, "loss": 17.6147, "step": 23816 }, { "epoch": 0.43535562176686715, "grad_norm": 5.012434633454674, "learning_rate": 6.2757360843918204e-06, "loss": 17.1863, "step": 23817 }, { "epoch": 0.43537390096331363, "grad_norm": 6.864615915355431, "learning_rate": 6.2754498679438995e-06, "loss": 17.6262, "step": 23818 }, { "epoch": 0.43539218015976017, "grad_norm": 6.14360434336811, "learning_rate": 6.275163647025638e-06, "loss": 17.7933, "step": 23819 }, { "epoch": 0.4354104593562067, "grad_norm": 5.812737460701002, "learning_rate": 6.274877421638036e-06, "loss": 17.4681, "step": 23820 }, { "epoch": 0.43542873855265324, "grad_norm": 5.59308688335235, "learning_rate": 6.2745911917821e-06, "loss": 17.2346, "step": 23821 }, { "epoch": 0.4354470177490998, "grad_norm": 6.675402886746384, "learning_rate": 6.274304957458833e-06, "loss": 17.7082, "step": 23822 }, { "epoch": 0.43546529694554625, "grad_norm": 5.454848945652792, "learning_rate": 6.274018718669237e-06, "loss": 17.4234, "step": 23823 }, { "epoch": 0.4354835761419928, "grad_norm": 6.667585564411321, "learning_rate": 6.273732475414317e-06, "loss": 17.5829, "step": 23824 }, { "epoch": 0.4355018553384393, "grad_norm": 7.194824399986449, "learning_rate": 6.273446227695074e-06, "loss": 17.6354, "step": 23825 }, { "epoch": 0.43552013453488586, "grad_norm": 5.50968680104873, "learning_rate": 6.273159975512514e-06, "loss": 17.2895, "step": 23826 }, { "epoch": 0.4355384137313324, "grad_norm": 5.744403015190538, "learning_rate": 6.272873718867638e-06, "loss": 17.4208, "step": 23827 }, { "epoch": 0.4355566929277789, "grad_norm": 6.447591341865886, "learning_rate": 6.272587457761451e-06, "loss": 17.4899, "step": 23828 }, { "epoch": 0.4355749721242254, "grad_norm": 6.886341073026387, "learning_rate": 6.272301192194952e-06, "loss": 17.7471, "step": 23829 }, { "epoch": 0.43559325132067195, "grad_norm": 6.4278439276045605, "learning_rate": 6.272014922169151e-06, "loss": 17.5109, "step": 23830 }, { "epoch": 0.4356115305171185, "grad_norm": 6.051503140524521, "learning_rate": 6.271728647685047e-06, "loss": 17.3598, "step": 23831 }, { "epoch": 0.435629809713565, "grad_norm": 5.103946563957025, "learning_rate": 6.271442368743645e-06, "loss": 16.9667, "step": 23832 }, { "epoch": 0.4356480889100115, "grad_norm": 6.8230330003778095, "learning_rate": 6.271156085345949e-06, "loss": 17.5054, "step": 23833 }, { "epoch": 0.43566636810645804, "grad_norm": 6.123612548656846, "learning_rate": 6.270869797492958e-06, "loss": 17.6386, "step": 23834 }, { "epoch": 0.43568464730290457, "grad_norm": 6.524032878715726, "learning_rate": 6.270583505185681e-06, "loss": 17.4919, "step": 23835 }, { "epoch": 0.4357029264993511, "grad_norm": 8.047292619036462, "learning_rate": 6.270297208425119e-06, "loss": 18.374, "step": 23836 }, { "epoch": 0.4357212056957976, "grad_norm": 5.43341562292553, "learning_rate": 6.270010907212275e-06, "loss": 17.1471, "step": 23837 }, { "epoch": 0.4357394848922441, "grad_norm": 5.482138443011934, "learning_rate": 6.269724601548152e-06, "loss": 17.303, "step": 23838 }, { "epoch": 0.43575776408869066, "grad_norm": 5.984226886374791, "learning_rate": 6.269438291433756e-06, "loss": 17.2762, "step": 23839 }, { "epoch": 0.4357760432851372, "grad_norm": 7.463098994122314, "learning_rate": 6.269151976870088e-06, "loss": 17.9539, "step": 23840 }, { "epoch": 0.43579432248158373, "grad_norm": 5.454584015390502, "learning_rate": 6.268865657858153e-06, "loss": 17.3301, "step": 23841 }, { "epoch": 0.4358126016780302, "grad_norm": 6.536505420193189, "learning_rate": 6.268579334398954e-06, "loss": 17.5903, "step": 23842 }, { "epoch": 0.43583088087447674, "grad_norm": 7.288094103806399, "learning_rate": 6.268293006493493e-06, "loss": 17.8827, "step": 23843 }, { "epoch": 0.4358491600709233, "grad_norm": 7.773616907676997, "learning_rate": 6.268006674142777e-06, "loss": 17.6444, "step": 23844 }, { "epoch": 0.4358674392673698, "grad_norm": 7.666929283601415, "learning_rate": 6.2677203373478075e-06, "loss": 17.8615, "step": 23845 }, { "epoch": 0.43588571846381635, "grad_norm": 5.472988303768162, "learning_rate": 6.267433996109589e-06, "loss": 17.2007, "step": 23846 }, { "epoch": 0.43590399766026283, "grad_norm": 6.383812497393178, "learning_rate": 6.267147650429122e-06, "loss": 17.7053, "step": 23847 }, { "epoch": 0.43592227685670937, "grad_norm": 5.908331424186044, "learning_rate": 6.266861300307412e-06, "loss": 17.3104, "step": 23848 }, { "epoch": 0.4359405560531559, "grad_norm": 7.414114736635272, "learning_rate": 6.266574945745466e-06, "loss": 17.7896, "step": 23849 }, { "epoch": 0.43595883524960244, "grad_norm": 6.630469700512484, "learning_rate": 6.266288586744283e-06, "loss": 17.8471, "step": 23850 }, { "epoch": 0.435977114446049, "grad_norm": 6.498282735265571, "learning_rate": 6.266002223304869e-06, "loss": 17.6131, "step": 23851 }, { "epoch": 0.43599539364249545, "grad_norm": 5.1313976864739645, "learning_rate": 6.265715855428227e-06, "loss": 17.0579, "step": 23852 }, { "epoch": 0.436013672838942, "grad_norm": 6.155423500859613, "learning_rate": 6.26542948311536e-06, "loss": 17.4741, "step": 23853 }, { "epoch": 0.4360319520353885, "grad_norm": 9.077812198192008, "learning_rate": 6.265143106367273e-06, "loss": 18.3364, "step": 23854 }, { "epoch": 0.43605023123183506, "grad_norm": 6.2349520297556404, "learning_rate": 6.264856725184969e-06, "loss": 17.7364, "step": 23855 }, { "epoch": 0.4360685104282816, "grad_norm": 4.7269954324097725, "learning_rate": 6.264570339569452e-06, "loss": 16.9981, "step": 23856 }, { "epoch": 0.4360867896247281, "grad_norm": 7.295275529568781, "learning_rate": 6.264283949521725e-06, "loss": 17.8293, "step": 23857 }, { "epoch": 0.4361050688211746, "grad_norm": 6.532155770873544, "learning_rate": 6.263997555042793e-06, "loss": 17.7705, "step": 23858 }, { "epoch": 0.43612334801762115, "grad_norm": 8.064467076302083, "learning_rate": 6.263711156133662e-06, "loss": 18.0814, "step": 23859 }, { "epoch": 0.4361416272140677, "grad_norm": 7.302257298135736, "learning_rate": 6.263424752795331e-06, "loss": 17.988, "step": 23860 }, { "epoch": 0.4361599064105142, "grad_norm": 6.190354554040243, "learning_rate": 6.263138345028803e-06, "loss": 17.4445, "step": 23861 }, { "epoch": 0.4361781856069607, "grad_norm": 6.427026740535259, "learning_rate": 6.2628519328350876e-06, "loss": 17.4506, "step": 23862 }, { "epoch": 0.43619646480340724, "grad_norm": 6.372205394798012, "learning_rate": 6.262565516215187e-06, "loss": 17.6951, "step": 23863 }, { "epoch": 0.43621474399985377, "grad_norm": 8.06059915581161, "learning_rate": 6.2622790951701006e-06, "loss": 17.9144, "step": 23864 }, { "epoch": 0.4362330231963003, "grad_norm": 6.949122154778351, "learning_rate": 6.261992669700838e-06, "loss": 17.8636, "step": 23865 }, { "epoch": 0.43625130239274684, "grad_norm": 6.709881056579819, "learning_rate": 6.2617062398084e-06, "loss": 17.7407, "step": 23866 }, { "epoch": 0.4362695815891933, "grad_norm": 5.416059978746071, "learning_rate": 6.26141980549379e-06, "loss": 17.1129, "step": 23867 }, { "epoch": 0.43628786078563986, "grad_norm": 6.833323550308938, "learning_rate": 6.261133366758014e-06, "loss": 17.3961, "step": 23868 }, { "epoch": 0.4363061399820864, "grad_norm": 6.178535823968487, "learning_rate": 6.260846923602076e-06, "loss": 17.0604, "step": 23869 }, { "epoch": 0.43632441917853293, "grad_norm": 5.307022416040542, "learning_rate": 6.2605604760269755e-06, "loss": 17.076, "step": 23870 }, { "epoch": 0.4363426983749794, "grad_norm": 7.152625980662234, "learning_rate": 6.260274024033724e-06, "loss": 17.9072, "step": 23871 }, { "epoch": 0.43636097757142595, "grad_norm": 5.831215618894096, "learning_rate": 6.259987567623318e-06, "loss": 17.2684, "step": 23872 }, { "epoch": 0.4363792567678725, "grad_norm": 7.49625516486939, "learning_rate": 6.2597011067967674e-06, "loss": 18.1041, "step": 23873 }, { "epoch": 0.436397535964319, "grad_norm": 8.868963467919887, "learning_rate": 6.259414641555072e-06, "loss": 18.0595, "step": 23874 }, { "epoch": 0.43641581516076555, "grad_norm": 7.660085715575635, "learning_rate": 6.259128171899238e-06, "loss": 18.0545, "step": 23875 }, { "epoch": 0.43643409435721203, "grad_norm": 5.3908779914879235, "learning_rate": 6.258841697830271e-06, "loss": 17.1993, "step": 23876 }, { "epoch": 0.43645237355365857, "grad_norm": 5.633074670352394, "learning_rate": 6.2585552193491715e-06, "loss": 17.0302, "step": 23877 }, { "epoch": 0.4364706527501051, "grad_norm": 6.96628031102791, "learning_rate": 6.258268736456945e-06, "loss": 17.6736, "step": 23878 }, { "epoch": 0.43648893194655164, "grad_norm": 7.111966315222163, "learning_rate": 6.257982249154596e-06, "loss": 17.6331, "step": 23879 }, { "epoch": 0.4365072111429982, "grad_norm": 6.989383445396583, "learning_rate": 6.257695757443128e-06, "loss": 17.7052, "step": 23880 }, { "epoch": 0.43652549033944466, "grad_norm": 6.529517624966005, "learning_rate": 6.257409261323546e-06, "loss": 17.5059, "step": 23881 }, { "epoch": 0.4365437695358912, "grad_norm": 5.992859896160147, "learning_rate": 6.257122760796853e-06, "loss": 17.1186, "step": 23882 }, { "epoch": 0.4365620487323377, "grad_norm": 7.781229654134481, "learning_rate": 6.256836255864054e-06, "loss": 17.7622, "step": 23883 }, { "epoch": 0.43658032792878426, "grad_norm": 6.4979118999299725, "learning_rate": 6.256549746526154e-06, "loss": 17.6442, "step": 23884 }, { "epoch": 0.4365986071252308, "grad_norm": 8.421286888859038, "learning_rate": 6.2562632327841545e-06, "loss": 17.7855, "step": 23885 }, { "epoch": 0.4366168863216773, "grad_norm": 9.54913063909957, "learning_rate": 6.2559767146390626e-06, "loss": 18.0372, "step": 23886 }, { "epoch": 0.4366351655181238, "grad_norm": 6.965151729683479, "learning_rate": 6.255690192091882e-06, "loss": 17.5536, "step": 23887 }, { "epoch": 0.43665344471457035, "grad_norm": 10.681850626360164, "learning_rate": 6.255403665143615e-06, "loss": 17.6952, "step": 23888 }, { "epoch": 0.4366717239110169, "grad_norm": 5.258765641273252, "learning_rate": 6.255117133795266e-06, "loss": 17.033, "step": 23889 }, { "epoch": 0.4366900031074634, "grad_norm": 6.665458575687888, "learning_rate": 6.254830598047843e-06, "loss": 17.5997, "step": 23890 }, { "epoch": 0.4367082823039099, "grad_norm": 6.035690424981967, "learning_rate": 6.254544057902347e-06, "loss": 17.4735, "step": 23891 }, { "epoch": 0.43672656150035644, "grad_norm": 6.474421956015781, "learning_rate": 6.254257513359781e-06, "loss": 17.5113, "step": 23892 }, { "epoch": 0.43674484069680297, "grad_norm": 8.314695596987976, "learning_rate": 6.253970964421152e-06, "loss": 18.2821, "step": 23893 }, { "epoch": 0.4367631198932495, "grad_norm": 6.256334127436358, "learning_rate": 6.253684411087465e-06, "loss": 17.3606, "step": 23894 }, { "epoch": 0.43678139908969604, "grad_norm": 7.6796115714427895, "learning_rate": 6.253397853359723e-06, "loss": 18.5128, "step": 23895 }, { "epoch": 0.4367996782861425, "grad_norm": 6.795941357264282, "learning_rate": 6.253111291238929e-06, "loss": 17.7538, "step": 23896 }, { "epoch": 0.43681795748258906, "grad_norm": 7.25199643861381, "learning_rate": 6.2528247247260885e-06, "loss": 17.594, "step": 23897 }, { "epoch": 0.4368362366790356, "grad_norm": 6.372996474514302, "learning_rate": 6.252538153822206e-06, "loss": 17.2522, "step": 23898 }, { "epoch": 0.43685451587548213, "grad_norm": 6.026701428622392, "learning_rate": 6.252251578528287e-06, "loss": 17.3368, "step": 23899 }, { "epoch": 0.43687279507192867, "grad_norm": 4.761477883977416, "learning_rate": 6.2519649988453345e-06, "loss": 16.8937, "step": 23900 }, { "epoch": 0.43689107426837515, "grad_norm": 6.334667458315501, "learning_rate": 6.251678414774354e-06, "loss": 17.3993, "step": 23901 }, { "epoch": 0.4369093534648217, "grad_norm": 8.493483046830853, "learning_rate": 6.251391826316348e-06, "loss": 18.5209, "step": 23902 }, { "epoch": 0.4369276326612682, "grad_norm": 5.988722862784205, "learning_rate": 6.2511052334723225e-06, "loss": 17.2655, "step": 23903 }, { "epoch": 0.43694591185771475, "grad_norm": 7.315354498610283, "learning_rate": 6.250818636243283e-06, "loss": 17.8763, "step": 23904 }, { "epoch": 0.43696419105416123, "grad_norm": 6.248714930340277, "learning_rate": 6.250532034630231e-06, "loss": 17.2859, "step": 23905 }, { "epoch": 0.43698247025060777, "grad_norm": 5.514940662980491, "learning_rate": 6.250245428634174e-06, "loss": 17.0152, "step": 23906 }, { "epoch": 0.4370007494470543, "grad_norm": 5.657675618127949, "learning_rate": 6.249958818256115e-06, "loss": 17.3315, "step": 23907 }, { "epoch": 0.43701902864350084, "grad_norm": 6.360635433025995, "learning_rate": 6.249672203497058e-06, "loss": 17.7566, "step": 23908 }, { "epoch": 0.4370373078399474, "grad_norm": 6.008786704469323, "learning_rate": 6.249385584358009e-06, "loss": 17.3999, "step": 23909 }, { "epoch": 0.43705558703639386, "grad_norm": 7.33142682619034, "learning_rate": 6.249098960839972e-06, "loss": 17.7921, "step": 23910 }, { "epoch": 0.4370738662328404, "grad_norm": 6.371861169270015, "learning_rate": 6.248812332943951e-06, "loss": 17.5232, "step": 23911 }, { "epoch": 0.4370921454292869, "grad_norm": 5.869601600415608, "learning_rate": 6.248525700670951e-06, "loss": 17.4915, "step": 23912 }, { "epoch": 0.43711042462573346, "grad_norm": 5.525187376485091, "learning_rate": 6.248239064021977e-06, "loss": 17.2771, "step": 23913 }, { "epoch": 0.43712870382218, "grad_norm": 6.488219510405763, "learning_rate": 6.247952422998035e-06, "loss": 17.3501, "step": 23914 }, { "epoch": 0.4371469830186265, "grad_norm": 5.909557666655518, "learning_rate": 6.247665777600127e-06, "loss": 17.2575, "step": 23915 }, { "epoch": 0.437165262215073, "grad_norm": 5.06380006176535, "learning_rate": 6.247379127829257e-06, "loss": 16.8698, "step": 23916 }, { "epoch": 0.43718354141151955, "grad_norm": 5.905539963323596, "learning_rate": 6.247092473686432e-06, "loss": 17.1606, "step": 23917 }, { "epoch": 0.4372018206079661, "grad_norm": 6.807182451339311, "learning_rate": 6.246805815172659e-06, "loss": 17.4176, "step": 23918 }, { "epoch": 0.4372200998044126, "grad_norm": 6.221123397001821, "learning_rate": 6.246519152288937e-06, "loss": 17.6172, "step": 23919 }, { "epoch": 0.4372383790008591, "grad_norm": 6.212224300995334, "learning_rate": 6.246232485036275e-06, "loss": 17.3482, "step": 23920 }, { "epoch": 0.43725665819730564, "grad_norm": 7.823034045524622, "learning_rate": 6.2459458134156745e-06, "loss": 18.1664, "step": 23921 }, { "epoch": 0.43727493739375217, "grad_norm": 7.033783182189167, "learning_rate": 6.2456591374281435e-06, "loss": 17.8423, "step": 23922 }, { "epoch": 0.4372932165901987, "grad_norm": 5.502804655149759, "learning_rate": 6.245372457074685e-06, "loss": 17.0922, "step": 23923 }, { "epoch": 0.43731149578664524, "grad_norm": 6.576363494862701, "learning_rate": 6.245085772356304e-06, "loss": 17.6938, "step": 23924 }, { "epoch": 0.4373297749830917, "grad_norm": 6.067144789740839, "learning_rate": 6.244799083274004e-06, "loss": 17.3229, "step": 23925 }, { "epoch": 0.43734805417953826, "grad_norm": 6.394717174482385, "learning_rate": 6.244512389828794e-06, "loss": 17.4762, "step": 23926 }, { "epoch": 0.4373663333759848, "grad_norm": 5.516036442517155, "learning_rate": 6.244225692021675e-06, "loss": 17.2713, "step": 23927 }, { "epoch": 0.43738461257243133, "grad_norm": 8.086540536894024, "learning_rate": 6.243938989853653e-06, "loss": 17.7486, "step": 23928 }, { "epoch": 0.43740289176887787, "grad_norm": 5.642024582760483, "learning_rate": 6.2436522833257314e-06, "loss": 17.1427, "step": 23929 }, { "epoch": 0.43742117096532435, "grad_norm": 7.3292519349256535, "learning_rate": 6.2433655724389175e-06, "loss": 17.8424, "step": 23930 }, { "epoch": 0.4374394501617709, "grad_norm": 5.842609832264661, "learning_rate": 6.243078857194215e-06, "loss": 17.1244, "step": 23931 }, { "epoch": 0.4374577293582174, "grad_norm": 8.550158140001454, "learning_rate": 6.24279213759263e-06, "loss": 18.3504, "step": 23932 }, { "epoch": 0.43747600855466395, "grad_norm": 6.4760633544487956, "learning_rate": 6.242505413635166e-06, "loss": 17.5759, "step": 23933 }, { "epoch": 0.4374942877511105, "grad_norm": 6.731350216811516, "learning_rate": 6.242218685322826e-06, "loss": 17.5102, "step": 23934 }, { "epoch": 0.43751256694755697, "grad_norm": 5.788240053547558, "learning_rate": 6.24193195265662e-06, "loss": 17.1085, "step": 23935 }, { "epoch": 0.4375308461440035, "grad_norm": 7.5396417647391445, "learning_rate": 6.24164521563755e-06, "loss": 18.0382, "step": 23936 }, { "epoch": 0.43754912534045004, "grad_norm": 4.642640229587409, "learning_rate": 6.241358474266621e-06, "loss": 16.8847, "step": 23937 }, { "epoch": 0.4375674045368966, "grad_norm": 4.759367897182057, "learning_rate": 6.241071728544837e-06, "loss": 16.7752, "step": 23938 }, { "epoch": 0.43758568373334306, "grad_norm": 6.08158858127298, "learning_rate": 6.240784978473206e-06, "loss": 17.1645, "step": 23939 }, { "epoch": 0.4376039629297896, "grad_norm": 5.450517691313575, "learning_rate": 6.2404982240527305e-06, "loss": 17.2829, "step": 23940 }, { "epoch": 0.4376222421262361, "grad_norm": 7.3670376610069255, "learning_rate": 6.240211465284416e-06, "loss": 18.0665, "step": 23941 }, { "epoch": 0.43764052132268266, "grad_norm": 6.087789442483752, "learning_rate": 6.23992470216927e-06, "loss": 17.3223, "step": 23942 }, { "epoch": 0.4376588005191292, "grad_norm": 7.9833410695256575, "learning_rate": 6.2396379347082925e-06, "loss": 18.3517, "step": 23943 }, { "epoch": 0.4376770797155757, "grad_norm": 5.848633507256055, "learning_rate": 6.239351162902493e-06, "loss": 17.4503, "step": 23944 }, { "epoch": 0.4376953589120222, "grad_norm": 5.796874455548671, "learning_rate": 6.239064386752876e-06, "loss": 17.2623, "step": 23945 }, { "epoch": 0.43771363810846875, "grad_norm": 7.67556505670471, "learning_rate": 6.2387776062604454e-06, "loss": 18.3013, "step": 23946 }, { "epoch": 0.4377319173049153, "grad_norm": 5.207616322576824, "learning_rate": 6.238490821426206e-06, "loss": 16.9911, "step": 23947 }, { "epoch": 0.4377501965013618, "grad_norm": 6.335479282535727, "learning_rate": 6.238204032251163e-06, "loss": 17.1, "step": 23948 }, { "epoch": 0.4377684756978083, "grad_norm": 9.483746020749363, "learning_rate": 6.237917238736325e-06, "loss": 18.4088, "step": 23949 }, { "epoch": 0.43778675489425484, "grad_norm": 6.349133558818311, "learning_rate": 6.237630440882693e-06, "loss": 17.7754, "step": 23950 }, { "epoch": 0.4378050340907014, "grad_norm": 7.326353212586243, "learning_rate": 6.237343638691273e-06, "loss": 17.7348, "step": 23951 }, { "epoch": 0.4378233132871479, "grad_norm": 5.381147450195405, "learning_rate": 6.237056832163072e-06, "loss": 16.8319, "step": 23952 }, { "epoch": 0.43784159248359444, "grad_norm": 7.024108480865005, "learning_rate": 6.236770021299093e-06, "loss": 17.6267, "step": 23953 }, { "epoch": 0.4378598716800409, "grad_norm": 5.617575326960354, "learning_rate": 6.236483206100344e-06, "loss": 17.1481, "step": 23954 }, { "epoch": 0.43787815087648746, "grad_norm": 11.712267076978886, "learning_rate": 6.236196386567828e-06, "loss": 18.2683, "step": 23955 }, { "epoch": 0.437896430072934, "grad_norm": 8.004055374621178, "learning_rate": 6.23590956270255e-06, "loss": 18.5959, "step": 23956 }, { "epoch": 0.43791470926938053, "grad_norm": 6.343183840251039, "learning_rate": 6.2356227345055175e-06, "loss": 17.7577, "step": 23957 }, { "epoch": 0.43793298846582707, "grad_norm": 7.599136735195646, "learning_rate": 6.2353359019777335e-06, "loss": 17.7117, "step": 23958 }, { "epoch": 0.43795126766227355, "grad_norm": 5.9030588525894885, "learning_rate": 6.235049065120207e-06, "loss": 17.3532, "step": 23959 }, { "epoch": 0.4379695468587201, "grad_norm": 8.139557562651724, "learning_rate": 6.2347622239339376e-06, "loss": 17.592, "step": 23960 }, { "epoch": 0.4379878260551666, "grad_norm": 6.3013355629707055, "learning_rate": 6.234475378419934e-06, "loss": 17.6954, "step": 23961 }, { "epoch": 0.43800610525161315, "grad_norm": 5.630351907541568, "learning_rate": 6.234188528579202e-06, "loss": 17.0036, "step": 23962 }, { "epoch": 0.4380243844480597, "grad_norm": 5.78054772288933, "learning_rate": 6.233901674412748e-06, "loss": 17.3421, "step": 23963 }, { "epoch": 0.43804266364450617, "grad_norm": 5.959559681237487, "learning_rate": 6.2336148159215735e-06, "loss": 17.3405, "step": 23964 }, { "epoch": 0.4380609428409527, "grad_norm": 6.7847804685119, "learning_rate": 6.233327953106687e-06, "loss": 17.6707, "step": 23965 }, { "epoch": 0.43807922203739924, "grad_norm": 7.25773349149983, "learning_rate": 6.233041085969092e-06, "loss": 17.8941, "step": 23966 }, { "epoch": 0.4380975012338458, "grad_norm": 7.789015459486722, "learning_rate": 6.232754214509796e-06, "loss": 18.152, "step": 23967 }, { "epoch": 0.4381157804302923, "grad_norm": 5.943583740901636, "learning_rate": 6.232467338729803e-06, "loss": 17.2675, "step": 23968 }, { "epoch": 0.4381340596267388, "grad_norm": 7.1440296579959455, "learning_rate": 6.232180458630119e-06, "loss": 17.6182, "step": 23969 }, { "epoch": 0.43815233882318533, "grad_norm": 6.2490591830604805, "learning_rate": 6.231893574211749e-06, "loss": 17.4543, "step": 23970 }, { "epoch": 0.43817061801963186, "grad_norm": 6.5830397561092004, "learning_rate": 6.231606685475701e-06, "loss": 17.7279, "step": 23971 }, { "epoch": 0.4381888972160784, "grad_norm": 7.437304042880089, "learning_rate": 6.231319792422977e-06, "loss": 17.8313, "step": 23972 }, { "epoch": 0.4382071764125249, "grad_norm": 6.352752017127395, "learning_rate": 6.231032895054584e-06, "loss": 17.6606, "step": 23973 }, { "epoch": 0.4382254556089714, "grad_norm": 5.6922561812502535, "learning_rate": 6.230745993371528e-06, "loss": 17.383, "step": 23974 }, { "epoch": 0.43824373480541795, "grad_norm": 5.680137833276108, "learning_rate": 6.2304590873748115e-06, "loss": 17.2924, "step": 23975 }, { "epoch": 0.4382620140018645, "grad_norm": 6.695873536494616, "learning_rate": 6.230172177065445e-06, "loss": 17.8808, "step": 23976 }, { "epoch": 0.438280293198311, "grad_norm": 6.708773107718349, "learning_rate": 6.229885262444433e-06, "loss": 17.6513, "step": 23977 }, { "epoch": 0.4382985723947575, "grad_norm": 6.676344055924407, "learning_rate": 6.229598343512777e-06, "loss": 17.6905, "step": 23978 }, { "epoch": 0.43831685159120404, "grad_norm": 6.201313962599765, "learning_rate": 6.229311420271488e-06, "loss": 17.547, "step": 23979 }, { "epoch": 0.4383351307876506, "grad_norm": 6.183536383194596, "learning_rate": 6.229024492721567e-06, "loss": 17.4816, "step": 23980 }, { "epoch": 0.4383534099840971, "grad_norm": 5.639793513166326, "learning_rate": 6.228737560864024e-06, "loss": 17.3323, "step": 23981 }, { "epoch": 0.43837168918054364, "grad_norm": 7.02600896370297, "learning_rate": 6.22845062469986e-06, "loss": 17.6568, "step": 23982 }, { "epoch": 0.4383899683769901, "grad_norm": 5.201563695186877, "learning_rate": 6.228163684230084e-06, "loss": 17.1166, "step": 23983 }, { "epoch": 0.43840824757343666, "grad_norm": 6.3069927927281295, "learning_rate": 6.227876739455702e-06, "loss": 17.5435, "step": 23984 }, { "epoch": 0.4384265267698832, "grad_norm": 6.684435791059988, "learning_rate": 6.227589790377717e-06, "loss": 17.5835, "step": 23985 }, { "epoch": 0.43844480596632973, "grad_norm": 6.139683812962959, "learning_rate": 6.2273028369971375e-06, "loss": 17.2079, "step": 23986 }, { "epoch": 0.43846308516277627, "grad_norm": 6.744759916293599, "learning_rate": 6.2270158793149696e-06, "loss": 17.8278, "step": 23987 }, { "epoch": 0.43848136435922275, "grad_norm": 6.103049014534535, "learning_rate": 6.226728917332215e-06, "loss": 17.4248, "step": 23988 }, { "epoch": 0.4384996435556693, "grad_norm": 6.857580617955428, "learning_rate": 6.226441951049882e-06, "loss": 17.6062, "step": 23989 }, { "epoch": 0.4385179227521158, "grad_norm": 8.286891018914536, "learning_rate": 6.226154980468978e-06, "loss": 18.1121, "step": 23990 }, { "epoch": 0.43853620194856235, "grad_norm": 6.281215225087562, "learning_rate": 6.225868005590506e-06, "loss": 17.5584, "step": 23991 }, { "epoch": 0.4385544811450089, "grad_norm": 7.280953450911573, "learning_rate": 6.225581026415473e-06, "loss": 17.7032, "step": 23992 }, { "epoch": 0.43857276034145537, "grad_norm": 6.979399398605442, "learning_rate": 6.225294042944884e-06, "loss": 17.6486, "step": 23993 }, { "epoch": 0.4385910395379019, "grad_norm": 6.638458640357061, "learning_rate": 6.225007055179748e-06, "loss": 17.723, "step": 23994 }, { "epoch": 0.43860931873434844, "grad_norm": 6.057993657988643, "learning_rate": 6.224720063121067e-06, "loss": 17.2632, "step": 23995 }, { "epoch": 0.438627597930795, "grad_norm": 6.21037187523833, "learning_rate": 6.224433066769849e-06, "loss": 17.6976, "step": 23996 }, { "epoch": 0.4386458771272415, "grad_norm": 6.595796145271207, "learning_rate": 6.224146066127099e-06, "loss": 17.8447, "step": 23997 }, { "epoch": 0.438664156323688, "grad_norm": 8.278190147439984, "learning_rate": 6.2238590611938234e-06, "loss": 17.9456, "step": 23998 }, { "epoch": 0.43868243552013453, "grad_norm": 5.362890817208312, "learning_rate": 6.223572051971027e-06, "loss": 17.3774, "step": 23999 }, { "epoch": 0.43870071471658106, "grad_norm": 5.991677612466831, "learning_rate": 6.223285038459719e-06, "loss": 17.4217, "step": 24000 }, { "epoch": 0.4387189939130276, "grad_norm": 6.0251617144174405, "learning_rate": 6.222998020660903e-06, "loss": 17.4952, "step": 24001 }, { "epoch": 0.43873727310947414, "grad_norm": 6.764376032096751, "learning_rate": 6.222710998575583e-06, "loss": 17.6979, "step": 24002 }, { "epoch": 0.4387555523059206, "grad_norm": 5.86926991320708, "learning_rate": 6.222423972204768e-06, "loss": 17.1538, "step": 24003 }, { "epoch": 0.43877383150236715, "grad_norm": 7.762880804805859, "learning_rate": 6.222136941549464e-06, "loss": 17.8571, "step": 24004 }, { "epoch": 0.4387921106988137, "grad_norm": 7.059405127136602, "learning_rate": 6.221849906610674e-06, "loss": 17.3909, "step": 24005 }, { "epoch": 0.4388103898952602, "grad_norm": 6.2189132962688705, "learning_rate": 6.221562867389408e-06, "loss": 17.4762, "step": 24006 }, { "epoch": 0.4388286690917067, "grad_norm": 7.639833437389585, "learning_rate": 6.221275823886669e-06, "loss": 18.0506, "step": 24007 }, { "epoch": 0.43884694828815324, "grad_norm": 5.459067405090136, "learning_rate": 6.220988776103465e-06, "loss": 17.1166, "step": 24008 }, { "epoch": 0.4388652274845998, "grad_norm": 6.609410233825979, "learning_rate": 6.220701724040801e-06, "loss": 17.6673, "step": 24009 }, { "epoch": 0.4388835066810463, "grad_norm": 6.431944664243485, "learning_rate": 6.220414667699682e-06, "loss": 17.5546, "step": 24010 }, { "epoch": 0.43890178587749284, "grad_norm": 6.221324915602247, "learning_rate": 6.220127607081117e-06, "loss": 17.1842, "step": 24011 }, { "epoch": 0.4389200650739393, "grad_norm": 7.07345777971569, "learning_rate": 6.219840542186111e-06, "loss": 17.7193, "step": 24012 }, { "epoch": 0.43893834427038586, "grad_norm": 7.0063190363877865, "learning_rate": 6.219553473015668e-06, "loss": 17.4253, "step": 24013 }, { "epoch": 0.4389566234668324, "grad_norm": 6.2546168603984595, "learning_rate": 6.219266399570798e-06, "loss": 17.2838, "step": 24014 }, { "epoch": 0.43897490266327893, "grad_norm": 6.258635538417954, "learning_rate": 6.218979321852503e-06, "loss": 17.4274, "step": 24015 }, { "epoch": 0.43899318185972547, "grad_norm": 7.169503195365026, "learning_rate": 6.218692239861793e-06, "loss": 17.9354, "step": 24016 }, { "epoch": 0.43901146105617195, "grad_norm": 6.601886440297481, "learning_rate": 6.218405153599671e-06, "loss": 17.5558, "step": 24017 }, { "epoch": 0.4390297402526185, "grad_norm": 6.373022725769502, "learning_rate": 6.218118063067147e-06, "loss": 17.4031, "step": 24018 }, { "epoch": 0.439048019449065, "grad_norm": 6.5719079781718115, "learning_rate": 6.2178309682652235e-06, "loss": 17.7041, "step": 24019 }, { "epoch": 0.43906629864551155, "grad_norm": 7.017985430227371, "learning_rate": 6.2175438691949065e-06, "loss": 17.6017, "step": 24020 }, { "epoch": 0.4390845778419581, "grad_norm": 6.251060702893441, "learning_rate": 6.217256765857207e-06, "loss": 17.2571, "step": 24021 }, { "epoch": 0.43910285703840457, "grad_norm": 6.367375870634299, "learning_rate": 6.216969658253125e-06, "loss": 17.5228, "step": 24022 }, { "epoch": 0.4391211362348511, "grad_norm": 6.377043967704567, "learning_rate": 6.216682546383672e-06, "loss": 17.8246, "step": 24023 }, { "epoch": 0.43913941543129764, "grad_norm": 7.771306717169053, "learning_rate": 6.216395430249852e-06, "loss": 18.4007, "step": 24024 }, { "epoch": 0.4391576946277442, "grad_norm": 6.000973498365739, "learning_rate": 6.216108309852672e-06, "loss": 17.0641, "step": 24025 }, { "epoch": 0.4391759738241907, "grad_norm": 7.008358668382307, "learning_rate": 6.215821185193137e-06, "loss": 17.7032, "step": 24026 }, { "epoch": 0.4391942530206372, "grad_norm": 6.928903847663157, "learning_rate": 6.215534056272254e-06, "loss": 17.4494, "step": 24027 }, { "epoch": 0.43921253221708373, "grad_norm": 5.625535482139369, "learning_rate": 6.215246923091032e-06, "loss": 17.5746, "step": 24028 }, { "epoch": 0.43923081141353026, "grad_norm": 6.9614181128989445, "learning_rate": 6.214959785650472e-06, "loss": 18.0524, "step": 24029 }, { "epoch": 0.4392490906099768, "grad_norm": 7.409461275741407, "learning_rate": 6.214672643951584e-06, "loss": 18.0821, "step": 24030 }, { "epoch": 0.43926736980642334, "grad_norm": 6.029300780715063, "learning_rate": 6.214385497995374e-06, "loss": 17.2179, "step": 24031 }, { "epoch": 0.4392856490028698, "grad_norm": 6.11037002292352, "learning_rate": 6.214098347782849e-06, "loss": 17.4044, "step": 24032 }, { "epoch": 0.43930392819931635, "grad_norm": 5.834711278241937, "learning_rate": 6.213811193315015e-06, "loss": 17.2023, "step": 24033 }, { "epoch": 0.4393222073957629, "grad_norm": 6.200046138205611, "learning_rate": 6.213524034592875e-06, "loss": 17.4785, "step": 24034 }, { "epoch": 0.4393404865922094, "grad_norm": 8.000951197088606, "learning_rate": 6.213236871617442e-06, "loss": 18.1242, "step": 24035 }, { "epoch": 0.43935876578865596, "grad_norm": 5.27557532248393, "learning_rate": 6.212949704389718e-06, "loss": 17.0796, "step": 24036 }, { "epoch": 0.43937704498510244, "grad_norm": 5.58697925314592, "learning_rate": 6.21266253291071e-06, "loss": 17.3713, "step": 24037 }, { "epoch": 0.439395324181549, "grad_norm": 6.952475108885502, "learning_rate": 6.212375357181426e-06, "loss": 17.6035, "step": 24038 }, { "epoch": 0.4394136033779955, "grad_norm": 6.9391452999085015, "learning_rate": 6.21208817720287e-06, "loss": 17.688, "step": 24039 }, { "epoch": 0.43943188257444205, "grad_norm": 6.731781520110059, "learning_rate": 6.211800992976051e-06, "loss": 17.5839, "step": 24040 }, { "epoch": 0.4394501617708885, "grad_norm": 6.403488127370576, "learning_rate": 6.211513804501975e-06, "loss": 17.6752, "step": 24041 }, { "epoch": 0.43946844096733506, "grad_norm": 5.8170076131586255, "learning_rate": 6.211226611781649e-06, "loss": 17.202, "step": 24042 }, { "epoch": 0.4394867201637816, "grad_norm": 6.704862089039464, "learning_rate": 6.2109394148160774e-06, "loss": 17.8916, "step": 24043 }, { "epoch": 0.43950499936022813, "grad_norm": 6.6130829714901695, "learning_rate": 6.210652213606269e-06, "loss": 17.7099, "step": 24044 }, { "epoch": 0.43952327855667467, "grad_norm": 5.804196571886088, "learning_rate": 6.21036500815323e-06, "loss": 17.314, "step": 24045 }, { "epoch": 0.43954155775312115, "grad_norm": 5.870295302515808, "learning_rate": 6.2100777984579655e-06, "loss": 17.4325, "step": 24046 }, { "epoch": 0.4395598369495677, "grad_norm": 5.406245643499944, "learning_rate": 6.209790584521483e-06, "loss": 17.091, "step": 24047 }, { "epoch": 0.4395781161460142, "grad_norm": 5.301351690539633, "learning_rate": 6.20950336634479e-06, "loss": 16.9648, "step": 24048 }, { "epoch": 0.43959639534246076, "grad_norm": 6.142972290332052, "learning_rate": 6.209216143928895e-06, "loss": 17.133, "step": 24049 }, { "epoch": 0.4396146745389073, "grad_norm": 5.772682067366621, "learning_rate": 6.208928917274799e-06, "loss": 17.4193, "step": 24050 }, { "epoch": 0.43963295373535377, "grad_norm": 6.561133445600708, "learning_rate": 6.2086416863835145e-06, "loss": 17.5559, "step": 24051 }, { "epoch": 0.4396512329318003, "grad_norm": 7.225885480856654, "learning_rate": 6.2083544512560434e-06, "loss": 17.5924, "step": 24052 }, { "epoch": 0.43966951212824684, "grad_norm": 6.301709506262397, "learning_rate": 6.208067211893396e-06, "loss": 17.5383, "step": 24053 }, { "epoch": 0.4396877913246934, "grad_norm": 6.2750822658107355, "learning_rate": 6.207779968296578e-06, "loss": 17.3671, "step": 24054 }, { "epoch": 0.4397060705211399, "grad_norm": 5.6883360896097, "learning_rate": 6.207492720466596e-06, "loss": 17.2641, "step": 24055 }, { "epoch": 0.4397243497175864, "grad_norm": 5.160428857090913, "learning_rate": 6.207205468404457e-06, "loss": 17.0549, "step": 24056 }, { "epoch": 0.43974262891403293, "grad_norm": 6.177780461799774, "learning_rate": 6.206918212111167e-06, "loss": 17.3782, "step": 24057 }, { "epoch": 0.43976090811047946, "grad_norm": 6.555941508063238, "learning_rate": 6.2066309515877334e-06, "loss": 17.5858, "step": 24058 }, { "epoch": 0.439779187306926, "grad_norm": 7.982207387034825, "learning_rate": 6.206343686835165e-06, "loss": 18.1021, "step": 24059 }, { "epoch": 0.43979746650337254, "grad_norm": 6.827920589372808, "learning_rate": 6.206056417854464e-06, "loss": 17.5228, "step": 24060 }, { "epoch": 0.439815745699819, "grad_norm": 5.243004926423027, "learning_rate": 6.205769144646641e-06, "loss": 17.1173, "step": 24061 }, { "epoch": 0.43983402489626555, "grad_norm": 6.091493836617262, "learning_rate": 6.205481867212701e-06, "loss": 17.4518, "step": 24062 }, { "epoch": 0.4398523040927121, "grad_norm": 5.630099018895974, "learning_rate": 6.205194585553653e-06, "loss": 17.106, "step": 24063 }, { "epoch": 0.4398705832891586, "grad_norm": 5.488712327693138, "learning_rate": 6.204907299670502e-06, "loss": 17.0807, "step": 24064 }, { "epoch": 0.43988886248560516, "grad_norm": 8.329318893114538, "learning_rate": 6.204620009564255e-06, "loss": 18.4138, "step": 24065 }, { "epoch": 0.43990714168205164, "grad_norm": 6.201264196117154, "learning_rate": 6.20433271523592e-06, "loss": 17.3556, "step": 24066 }, { "epoch": 0.4399254208784982, "grad_norm": 7.803245184763667, "learning_rate": 6.204045416686503e-06, "loss": 18.0564, "step": 24067 }, { "epoch": 0.4399437000749447, "grad_norm": 5.848519091575874, "learning_rate": 6.203758113917011e-06, "loss": 17.3502, "step": 24068 }, { "epoch": 0.43996197927139125, "grad_norm": 5.450613093420574, "learning_rate": 6.2034708069284525e-06, "loss": 17.0774, "step": 24069 }, { "epoch": 0.4399802584678378, "grad_norm": 7.0861272134943984, "learning_rate": 6.2031834957218314e-06, "loss": 17.7915, "step": 24070 }, { "epoch": 0.43999853766428426, "grad_norm": 6.2486276449081695, "learning_rate": 6.202896180298158e-06, "loss": 17.4001, "step": 24071 }, { "epoch": 0.4400168168607308, "grad_norm": 5.976318337781716, "learning_rate": 6.202608860658438e-06, "loss": 17.5905, "step": 24072 }, { "epoch": 0.44003509605717733, "grad_norm": 4.9373641132000525, "learning_rate": 6.2023215368036785e-06, "loss": 16.9522, "step": 24073 }, { "epoch": 0.44005337525362387, "grad_norm": 6.556857284850879, "learning_rate": 6.2020342087348854e-06, "loss": 17.6768, "step": 24074 }, { "epoch": 0.44007165445007035, "grad_norm": 7.061485093126709, "learning_rate": 6.201746876453066e-06, "loss": 18.1163, "step": 24075 }, { "epoch": 0.4400899336465169, "grad_norm": 6.027018540386245, "learning_rate": 6.201459539959229e-06, "loss": 17.4606, "step": 24076 }, { "epoch": 0.4401082128429634, "grad_norm": 6.039365159304191, "learning_rate": 6.2011721992543814e-06, "loss": 17.695, "step": 24077 }, { "epoch": 0.44012649203940996, "grad_norm": 7.1681031907290125, "learning_rate": 6.200884854339529e-06, "loss": 17.8596, "step": 24078 }, { "epoch": 0.4401447712358565, "grad_norm": 6.082145313149376, "learning_rate": 6.2005975052156784e-06, "loss": 17.4135, "step": 24079 }, { "epoch": 0.44016305043230297, "grad_norm": 6.230506107679308, "learning_rate": 6.200310151883838e-06, "loss": 17.5036, "step": 24080 }, { "epoch": 0.4401813296287495, "grad_norm": 5.464460759241678, "learning_rate": 6.200022794345015e-06, "loss": 17.2063, "step": 24081 }, { "epoch": 0.44019960882519604, "grad_norm": 6.622869756721586, "learning_rate": 6.199735432600216e-06, "loss": 17.5457, "step": 24082 }, { "epoch": 0.4402178880216426, "grad_norm": 7.554716121718214, "learning_rate": 6.1994480666504484e-06, "loss": 17.9225, "step": 24083 }, { "epoch": 0.4402361672180891, "grad_norm": 5.31264702707009, "learning_rate": 6.19916069649672e-06, "loss": 17.0408, "step": 24084 }, { "epoch": 0.4402544464145356, "grad_norm": 5.89945498771339, "learning_rate": 6.198873322140038e-06, "loss": 17.3624, "step": 24085 }, { "epoch": 0.44027272561098213, "grad_norm": 7.644977464635734, "learning_rate": 6.198585943581407e-06, "loss": 17.8964, "step": 24086 }, { "epoch": 0.44029100480742867, "grad_norm": 6.783231163989308, "learning_rate": 6.198298560821838e-06, "loss": 17.6885, "step": 24087 }, { "epoch": 0.4403092840038752, "grad_norm": 5.355605410047858, "learning_rate": 6.198011173862335e-06, "loss": 17.2444, "step": 24088 }, { "epoch": 0.44032756320032174, "grad_norm": 5.804702837279948, "learning_rate": 6.197723782703908e-06, "loss": 17.1333, "step": 24089 }, { "epoch": 0.4403458423967682, "grad_norm": 7.146224777024456, "learning_rate": 6.197436387347564e-06, "loss": 17.8382, "step": 24090 }, { "epoch": 0.44036412159321475, "grad_norm": 7.206326932353275, "learning_rate": 6.197148987794308e-06, "loss": 17.8005, "step": 24091 }, { "epoch": 0.4403824007896613, "grad_norm": 6.633553428716376, "learning_rate": 6.196861584045149e-06, "loss": 17.7012, "step": 24092 }, { "epoch": 0.4404006799861078, "grad_norm": 6.150559907538199, "learning_rate": 6.196574176101093e-06, "loss": 17.3696, "step": 24093 }, { "epoch": 0.44041895918255436, "grad_norm": 5.471955820876253, "learning_rate": 6.19628676396315e-06, "loss": 17.1405, "step": 24094 }, { "epoch": 0.44043723837900084, "grad_norm": 7.408212062568318, "learning_rate": 6.195999347632324e-06, "loss": 17.8337, "step": 24095 }, { "epoch": 0.4404555175754474, "grad_norm": 6.785862576964398, "learning_rate": 6.195711927109626e-06, "loss": 17.7062, "step": 24096 }, { "epoch": 0.4404737967718939, "grad_norm": 6.0664998422025045, "learning_rate": 6.19542450239606e-06, "loss": 17.7203, "step": 24097 }, { "epoch": 0.44049207596834045, "grad_norm": 6.928755566027269, "learning_rate": 6.1951370734926355e-06, "loss": 17.978, "step": 24098 }, { "epoch": 0.440510355164787, "grad_norm": 7.80758531994904, "learning_rate": 6.194849640400359e-06, "loss": 17.9731, "step": 24099 }, { "epoch": 0.44052863436123346, "grad_norm": 6.613035027127089, "learning_rate": 6.194562203120238e-06, "loss": 17.6502, "step": 24100 }, { "epoch": 0.44054691355768, "grad_norm": 7.56195043778486, "learning_rate": 6.194274761653281e-06, "loss": 18.3339, "step": 24101 }, { "epoch": 0.44056519275412653, "grad_norm": 6.171567868539247, "learning_rate": 6.1939873160004935e-06, "loss": 17.5275, "step": 24102 }, { "epoch": 0.44058347195057307, "grad_norm": 8.35353216457608, "learning_rate": 6.193699866162884e-06, "loss": 17.7445, "step": 24103 }, { "epoch": 0.4406017511470196, "grad_norm": 7.110938906108972, "learning_rate": 6.193412412141462e-06, "loss": 17.7643, "step": 24104 }, { "epoch": 0.4406200303434661, "grad_norm": 6.212587681886543, "learning_rate": 6.193124953937232e-06, "loss": 17.2705, "step": 24105 }, { "epoch": 0.4406383095399126, "grad_norm": 8.214394566138274, "learning_rate": 6.1928374915512024e-06, "loss": 17.8221, "step": 24106 }, { "epoch": 0.44065658873635916, "grad_norm": 5.751694848503822, "learning_rate": 6.192550024984381e-06, "loss": 17.581, "step": 24107 }, { "epoch": 0.4406748679328057, "grad_norm": 5.798670043369045, "learning_rate": 6.192262554237774e-06, "loss": 17.3722, "step": 24108 }, { "epoch": 0.44069314712925217, "grad_norm": 5.719614869280944, "learning_rate": 6.191975079312391e-06, "loss": 17.3738, "step": 24109 }, { "epoch": 0.4407114263256987, "grad_norm": 6.068344991425805, "learning_rate": 6.1916876002092394e-06, "loss": 17.2393, "step": 24110 }, { "epoch": 0.44072970552214524, "grad_norm": 5.9317090430344095, "learning_rate": 6.191400116929326e-06, "loss": 17.3586, "step": 24111 }, { "epoch": 0.4407479847185918, "grad_norm": 8.087094429335183, "learning_rate": 6.191112629473658e-06, "loss": 17.7309, "step": 24112 }, { "epoch": 0.4407662639150383, "grad_norm": 5.378131859592571, "learning_rate": 6.1908251378432434e-06, "loss": 17.1388, "step": 24113 }, { "epoch": 0.4407845431114848, "grad_norm": 6.349659146169226, "learning_rate": 6.190537642039092e-06, "loss": 17.5838, "step": 24114 }, { "epoch": 0.44080282230793133, "grad_norm": 6.523370246758639, "learning_rate": 6.1902501420622066e-06, "loss": 17.6528, "step": 24115 }, { "epoch": 0.44082110150437787, "grad_norm": 7.487310635175428, "learning_rate": 6.1899626379135995e-06, "loss": 17.5962, "step": 24116 }, { "epoch": 0.4408393807008244, "grad_norm": 7.361289340010666, "learning_rate": 6.189675129594276e-06, "loss": 18.0879, "step": 24117 }, { "epoch": 0.44085765989727094, "grad_norm": 7.610033682556638, "learning_rate": 6.189387617105246e-06, "loss": 17.7131, "step": 24118 }, { "epoch": 0.4408759390937174, "grad_norm": 5.350356172600791, "learning_rate": 6.1891001004475135e-06, "loss": 17.0352, "step": 24119 }, { "epoch": 0.44089421829016395, "grad_norm": 7.939890278819534, "learning_rate": 6.188812579622089e-06, "loss": 17.9314, "step": 24120 }, { "epoch": 0.4409124974866105, "grad_norm": 6.983534295055088, "learning_rate": 6.1885250546299805e-06, "loss": 17.6833, "step": 24121 }, { "epoch": 0.440930776683057, "grad_norm": 6.748967627266763, "learning_rate": 6.188237525472194e-06, "loss": 17.6455, "step": 24122 }, { "epoch": 0.44094905587950356, "grad_norm": 7.317423161030537, "learning_rate": 6.187949992149737e-06, "loss": 18.1427, "step": 24123 }, { "epoch": 0.44096733507595004, "grad_norm": 7.7601246543196645, "learning_rate": 6.18766245466362e-06, "loss": 17.9725, "step": 24124 }, { "epoch": 0.4409856142723966, "grad_norm": 6.938177549182238, "learning_rate": 6.187374913014849e-06, "loss": 17.5571, "step": 24125 }, { "epoch": 0.4410038934688431, "grad_norm": 6.565980904736275, "learning_rate": 6.187087367204431e-06, "loss": 17.5904, "step": 24126 }, { "epoch": 0.44102217266528965, "grad_norm": 7.039811303293514, "learning_rate": 6.186799817233376e-06, "loss": 17.7007, "step": 24127 }, { "epoch": 0.4410404518617362, "grad_norm": 6.225445666144468, "learning_rate": 6.186512263102691e-06, "loss": 17.3729, "step": 24128 }, { "epoch": 0.44105873105818266, "grad_norm": 4.526645758632178, "learning_rate": 6.18622470481338e-06, "loss": 16.7223, "step": 24129 }, { "epoch": 0.4410770102546292, "grad_norm": 5.020622148618269, "learning_rate": 6.1859371423664576e-06, "loss": 16.7954, "step": 24130 }, { "epoch": 0.44109528945107573, "grad_norm": 7.681872407141381, "learning_rate": 6.185649575762927e-06, "loss": 18.0012, "step": 24131 }, { "epoch": 0.44111356864752227, "grad_norm": 6.0825591877760985, "learning_rate": 6.1853620050038e-06, "loss": 17.4602, "step": 24132 }, { "epoch": 0.4411318478439688, "grad_norm": 7.331425433942787, "learning_rate": 6.18507443009008e-06, "loss": 17.8376, "step": 24133 }, { "epoch": 0.4411501270404153, "grad_norm": 7.5145986448707145, "learning_rate": 6.184786851022776e-06, "loss": 17.823, "step": 24134 }, { "epoch": 0.4411684062368618, "grad_norm": 8.883725417509767, "learning_rate": 6.184499267802899e-06, "loss": 18.034, "step": 24135 }, { "epoch": 0.44118668543330836, "grad_norm": 5.554931324624878, "learning_rate": 6.184211680431453e-06, "loss": 16.9853, "step": 24136 }, { "epoch": 0.4412049646297549, "grad_norm": 6.7366515615716205, "learning_rate": 6.1839240889094494e-06, "loss": 17.6005, "step": 24137 }, { "epoch": 0.44122324382620143, "grad_norm": 7.613902781998414, "learning_rate": 6.183636493237895e-06, "loss": 18.2018, "step": 24138 }, { "epoch": 0.4412415230226479, "grad_norm": 6.248567696616156, "learning_rate": 6.1833488934177956e-06, "loss": 17.35, "step": 24139 }, { "epoch": 0.44125980221909444, "grad_norm": 6.427620493390211, "learning_rate": 6.183061289450162e-06, "loss": 17.4878, "step": 24140 }, { "epoch": 0.441278081415541, "grad_norm": 6.949019104185665, "learning_rate": 6.182773681336e-06, "loss": 17.7226, "step": 24141 }, { "epoch": 0.4412963606119875, "grad_norm": 6.536886009697157, "learning_rate": 6.18248606907632e-06, "loss": 17.4815, "step": 24142 }, { "epoch": 0.441314639808434, "grad_norm": 6.913409699567346, "learning_rate": 6.182198452672129e-06, "loss": 17.379, "step": 24143 }, { "epoch": 0.44133291900488053, "grad_norm": 6.160386367775416, "learning_rate": 6.181910832124435e-06, "loss": 17.5737, "step": 24144 }, { "epoch": 0.44135119820132707, "grad_norm": 6.536423006612148, "learning_rate": 6.181623207434246e-06, "loss": 17.535, "step": 24145 }, { "epoch": 0.4413694773977736, "grad_norm": 4.932389707799513, "learning_rate": 6.1813355786025705e-06, "loss": 16.9306, "step": 24146 }, { "epoch": 0.44138775659422014, "grad_norm": 6.3232867337010665, "learning_rate": 6.181047945630415e-06, "loss": 17.3533, "step": 24147 }, { "epoch": 0.4414060357906666, "grad_norm": 6.136195358541279, "learning_rate": 6.18076030851879e-06, "loss": 17.2286, "step": 24148 }, { "epoch": 0.44142431498711315, "grad_norm": 6.138399060310465, "learning_rate": 6.180472667268703e-06, "loss": 17.3879, "step": 24149 }, { "epoch": 0.4414425941835597, "grad_norm": 4.486653669905141, "learning_rate": 6.180185021881161e-06, "loss": 16.8849, "step": 24150 }, { "epoch": 0.4414608733800062, "grad_norm": 5.733280473822919, "learning_rate": 6.179897372357173e-06, "loss": 17.1547, "step": 24151 }, { "epoch": 0.44147915257645276, "grad_norm": 5.648891036525072, "learning_rate": 6.179609718697748e-06, "loss": 17.3554, "step": 24152 }, { "epoch": 0.44149743177289924, "grad_norm": 7.292753873653095, "learning_rate": 6.179322060903892e-06, "loss": 17.5932, "step": 24153 }, { "epoch": 0.4415157109693458, "grad_norm": 6.400696449672667, "learning_rate": 6.1790343989766155e-06, "loss": 17.6274, "step": 24154 }, { "epoch": 0.4415339901657923, "grad_norm": 6.882805693823392, "learning_rate": 6.1787467329169245e-06, "loss": 17.4463, "step": 24155 }, { "epoch": 0.44155226936223885, "grad_norm": 6.98579812805766, "learning_rate": 6.178459062725829e-06, "loss": 17.4662, "step": 24156 }, { "epoch": 0.4415705485586854, "grad_norm": 5.993690162660362, "learning_rate": 6.178171388404337e-06, "loss": 17.3647, "step": 24157 }, { "epoch": 0.44158882775513186, "grad_norm": 5.645586666406506, "learning_rate": 6.177883709953457e-06, "loss": 17.2167, "step": 24158 }, { "epoch": 0.4416071069515784, "grad_norm": 6.216929702676079, "learning_rate": 6.177596027374197e-06, "loss": 17.6003, "step": 24159 }, { "epoch": 0.44162538614802493, "grad_norm": 8.872753679417796, "learning_rate": 6.177308340667565e-06, "loss": 18.4974, "step": 24160 }, { "epoch": 0.44164366534447147, "grad_norm": 7.430306716374661, "learning_rate": 6.177020649834567e-06, "loss": 18.0911, "step": 24161 }, { "epoch": 0.441661944540918, "grad_norm": 7.063357979590903, "learning_rate": 6.176732954876215e-06, "loss": 17.4794, "step": 24162 }, { "epoch": 0.4416802237373645, "grad_norm": 5.1590089963925445, "learning_rate": 6.1764452557935185e-06, "loss": 16.9837, "step": 24163 }, { "epoch": 0.441698502933811, "grad_norm": 6.164279204045742, "learning_rate": 6.176157552587481e-06, "loss": 17.3539, "step": 24164 }, { "epoch": 0.44171678213025756, "grad_norm": 9.5639257842599, "learning_rate": 6.175869845259115e-06, "loss": 18.3954, "step": 24165 }, { "epoch": 0.4417350613267041, "grad_norm": 5.901771352074644, "learning_rate": 6.175582133809426e-06, "loss": 17.3821, "step": 24166 }, { "epoch": 0.44175334052315063, "grad_norm": 7.029474309152619, "learning_rate": 6.175294418239424e-06, "loss": 17.1903, "step": 24167 }, { "epoch": 0.4417716197195971, "grad_norm": 7.983708765747276, "learning_rate": 6.175006698550117e-06, "loss": 17.8349, "step": 24168 }, { "epoch": 0.44178989891604364, "grad_norm": 5.763698744975223, "learning_rate": 6.174718974742513e-06, "loss": 17.1935, "step": 24169 }, { "epoch": 0.4418081781124902, "grad_norm": 6.64625075210769, "learning_rate": 6.174431246817621e-06, "loss": 17.4247, "step": 24170 }, { "epoch": 0.4418264573089367, "grad_norm": 6.923610002131327, "learning_rate": 6.17414351477645e-06, "loss": 17.6074, "step": 24171 }, { "epoch": 0.44184473650538325, "grad_norm": 8.948947891851802, "learning_rate": 6.173855778620007e-06, "loss": 18.4662, "step": 24172 }, { "epoch": 0.44186301570182973, "grad_norm": 7.043904495888951, "learning_rate": 6.173568038349304e-06, "loss": 17.8628, "step": 24173 }, { "epoch": 0.44188129489827627, "grad_norm": 5.500254369039936, "learning_rate": 6.173280293965343e-06, "loss": 16.8723, "step": 24174 }, { "epoch": 0.4418995740947228, "grad_norm": 6.490909643950598, "learning_rate": 6.172992545469139e-06, "loss": 17.4127, "step": 24175 }, { "epoch": 0.44191785329116934, "grad_norm": 5.883672564127929, "learning_rate": 6.172704792861698e-06, "loss": 17.1861, "step": 24176 }, { "epoch": 0.4419361324876158, "grad_norm": 6.70316592529986, "learning_rate": 6.172417036144027e-06, "loss": 17.4034, "step": 24177 }, { "epoch": 0.44195441168406235, "grad_norm": 6.224850651602484, "learning_rate": 6.172129275317137e-06, "loss": 17.2891, "step": 24178 }, { "epoch": 0.4419726908805089, "grad_norm": 7.706510807772626, "learning_rate": 6.171841510382034e-06, "loss": 18.1552, "step": 24179 }, { "epoch": 0.4419909700769554, "grad_norm": 6.183663533753914, "learning_rate": 6.17155374133973e-06, "loss": 17.4231, "step": 24180 }, { "epoch": 0.44200924927340196, "grad_norm": 7.01547013991577, "learning_rate": 6.171265968191231e-06, "loss": 17.8055, "step": 24181 }, { "epoch": 0.44202752846984844, "grad_norm": 7.553437793710265, "learning_rate": 6.170978190937547e-06, "loss": 18.0711, "step": 24182 }, { "epoch": 0.442045807666295, "grad_norm": 6.4266653424142115, "learning_rate": 6.170690409579685e-06, "loss": 17.5941, "step": 24183 }, { "epoch": 0.4420640868627415, "grad_norm": 5.947258359679659, "learning_rate": 6.170402624118655e-06, "loss": 17.1631, "step": 24184 }, { "epoch": 0.44208236605918805, "grad_norm": 5.5195636614023575, "learning_rate": 6.170114834555466e-06, "loss": 17.259, "step": 24185 }, { "epoch": 0.4421006452556346, "grad_norm": 7.218084105817804, "learning_rate": 6.1698270408911266e-06, "loss": 17.7056, "step": 24186 }, { "epoch": 0.44211892445208106, "grad_norm": 6.099651480618411, "learning_rate": 6.169539243126644e-06, "loss": 17.5093, "step": 24187 }, { "epoch": 0.4421372036485276, "grad_norm": 6.663853645288097, "learning_rate": 6.169251441263028e-06, "loss": 17.769, "step": 24188 }, { "epoch": 0.44215548284497413, "grad_norm": 5.849334981384076, "learning_rate": 6.168963635301287e-06, "loss": 17.1934, "step": 24189 }, { "epoch": 0.44217376204142067, "grad_norm": 5.993995202711705, "learning_rate": 6.168675825242431e-06, "loss": 16.9622, "step": 24190 }, { "epoch": 0.4421920412378672, "grad_norm": 5.630983754426648, "learning_rate": 6.168388011087466e-06, "loss": 17.2036, "step": 24191 }, { "epoch": 0.4422103204343137, "grad_norm": 7.195487849986069, "learning_rate": 6.168100192837403e-06, "loss": 18.0233, "step": 24192 }, { "epoch": 0.4422285996307602, "grad_norm": 7.121234997380964, "learning_rate": 6.167812370493249e-06, "loss": 17.7001, "step": 24193 }, { "epoch": 0.44224687882720676, "grad_norm": 7.362998605201447, "learning_rate": 6.167524544056018e-06, "loss": 18.134, "step": 24194 }, { "epoch": 0.4422651580236533, "grad_norm": 7.772638600808954, "learning_rate": 6.167236713526711e-06, "loss": 17.8615, "step": 24195 }, { "epoch": 0.44228343722009983, "grad_norm": 7.129707872885745, "learning_rate": 6.166948878906341e-06, "loss": 17.7359, "step": 24196 }, { "epoch": 0.4423017164165463, "grad_norm": 5.58093023938167, "learning_rate": 6.166661040195917e-06, "loss": 17.0113, "step": 24197 }, { "epoch": 0.44231999561299284, "grad_norm": 5.2716662428945185, "learning_rate": 6.166373197396448e-06, "loss": 17.0409, "step": 24198 }, { "epoch": 0.4423382748094394, "grad_norm": 6.236117649285141, "learning_rate": 6.166085350508941e-06, "loss": 17.4686, "step": 24199 }, { "epoch": 0.4423565540058859, "grad_norm": 6.92155220381434, "learning_rate": 6.165797499534407e-06, "loss": 17.9482, "step": 24200 }, { "epoch": 0.44237483320233245, "grad_norm": 6.806176269382211, "learning_rate": 6.165509644473855e-06, "loss": 18.0436, "step": 24201 }, { "epoch": 0.44239311239877893, "grad_norm": 6.325650007245598, "learning_rate": 6.165221785328289e-06, "loss": 17.4151, "step": 24202 }, { "epoch": 0.44241139159522547, "grad_norm": 5.469717470043323, "learning_rate": 6.164933922098725e-06, "loss": 17.1739, "step": 24203 }, { "epoch": 0.442429670791672, "grad_norm": 7.086379925307295, "learning_rate": 6.164646054786168e-06, "loss": 17.9885, "step": 24204 }, { "epoch": 0.44244794998811854, "grad_norm": 6.558634207594864, "learning_rate": 6.164358183391628e-06, "loss": 17.3538, "step": 24205 }, { "epoch": 0.4424662291845651, "grad_norm": 9.546454805546572, "learning_rate": 6.164070307916113e-06, "loss": 18.1258, "step": 24206 }, { "epoch": 0.44248450838101155, "grad_norm": 7.320380266248527, "learning_rate": 6.1637824283606314e-06, "loss": 18.2428, "step": 24207 }, { "epoch": 0.4425027875774581, "grad_norm": 7.917832202990051, "learning_rate": 6.163494544726195e-06, "loss": 17.5665, "step": 24208 }, { "epoch": 0.4425210667739046, "grad_norm": 7.062214357481698, "learning_rate": 6.163206657013811e-06, "loss": 17.871, "step": 24209 }, { "epoch": 0.44253934597035116, "grad_norm": 7.780263464878502, "learning_rate": 6.162918765224488e-06, "loss": 17.4316, "step": 24210 }, { "epoch": 0.44255762516679764, "grad_norm": 6.833201342721086, "learning_rate": 6.162630869359236e-06, "loss": 17.5612, "step": 24211 }, { "epoch": 0.4425759043632442, "grad_norm": 6.497135885491084, "learning_rate": 6.162342969419064e-06, "loss": 17.6416, "step": 24212 }, { "epoch": 0.4425941835596907, "grad_norm": 5.856626806966802, "learning_rate": 6.162055065404981e-06, "loss": 17.2533, "step": 24213 }, { "epoch": 0.44261246275613725, "grad_norm": 6.313626717112335, "learning_rate": 6.161767157317996e-06, "loss": 17.6512, "step": 24214 }, { "epoch": 0.4426307419525838, "grad_norm": 7.22027848710395, "learning_rate": 6.161479245159115e-06, "loss": 17.7917, "step": 24215 }, { "epoch": 0.44264902114903026, "grad_norm": 6.204356458413, "learning_rate": 6.161191328929354e-06, "loss": 17.5948, "step": 24216 }, { "epoch": 0.4426673003454768, "grad_norm": 7.258930959726999, "learning_rate": 6.160903408629716e-06, "loss": 17.3764, "step": 24217 }, { "epoch": 0.44268557954192334, "grad_norm": 8.039318793161836, "learning_rate": 6.160615484261213e-06, "loss": 17.8878, "step": 24218 }, { "epoch": 0.44270385873836987, "grad_norm": 5.714886397409613, "learning_rate": 6.160327555824853e-06, "loss": 17.2314, "step": 24219 }, { "epoch": 0.4427221379348164, "grad_norm": 5.866988746985408, "learning_rate": 6.160039623321645e-06, "loss": 17.38, "step": 24220 }, { "epoch": 0.4427404171312629, "grad_norm": 5.424162144011653, "learning_rate": 6.159751686752601e-06, "loss": 17.0537, "step": 24221 }, { "epoch": 0.4427586963277094, "grad_norm": 6.119947301657276, "learning_rate": 6.159463746118726e-06, "loss": 17.5312, "step": 24222 }, { "epoch": 0.44277697552415596, "grad_norm": 5.83570647973294, "learning_rate": 6.159175801421031e-06, "loss": 17.1355, "step": 24223 }, { "epoch": 0.4427952547206025, "grad_norm": 7.079606293471452, "learning_rate": 6.1588878526605265e-06, "loss": 17.6112, "step": 24224 }, { "epoch": 0.44281353391704903, "grad_norm": 6.662479255800925, "learning_rate": 6.15859989983822e-06, "loss": 17.9218, "step": 24225 }, { "epoch": 0.4428318131134955, "grad_norm": 5.76213768489833, "learning_rate": 6.158311942955122e-06, "loss": 17.2206, "step": 24226 }, { "epoch": 0.44285009230994204, "grad_norm": 7.465509608667722, "learning_rate": 6.1580239820122414e-06, "loss": 17.8642, "step": 24227 }, { "epoch": 0.4428683715063886, "grad_norm": 7.247346089799373, "learning_rate": 6.157736017010587e-06, "loss": 17.7723, "step": 24228 }, { "epoch": 0.4428866507028351, "grad_norm": 4.697584188159765, "learning_rate": 6.157448047951166e-06, "loss": 16.8858, "step": 24229 }, { "epoch": 0.44290492989928165, "grad_norm": 8.43424148909306, "learning_rate": 6.157160074834992e-06, "loss": 18.319, "step": 24230 }, { "epoch": 0.44292320909572813, "grad_norm": 6.109837674523738, "learning_rate": 6.156872097663073e-06, "loss": 17.1626, "step": 24231 }, { "epoch": 0.44294148829217467, "grad_norm": 6.568676897566294, "learning_rate": 6.1565841164364185e-06, "loss": 17.2407, "step": 24232 }, { "epoch": 0.4429597674886212, "grad_norm": 5.922361169144793, "learning_rate": 6.156296131156036e-06, "loss": 17.0792, "step": 24233 }, { "epoch": 0.44297804668506774, "grad_norm": 7.670628458841614, "learning_rate": 6.156008141822933e-06, "loss": 18.2302, "step": 24234 }, { "epoch": 0.4429963258815143, "grad_norm": 5.4385676906398555, "learning_rate": 6.155720148438126e-06, "loss": 17.2284, "step": 24235 }, { "epoch": 0.44301460507796075, "grad_norm": 6.069382087305104, "learning_rate": 6.155432151002618e-06, "loss": 17.2016, "step": 24236 }, { "epoch": 0.4430328842744073, "grad_norm": 5.568902653557678, "learning_rate": 6.15514414951742e-06, "loss": 17.0489, "step": 24237 }, { "epoch": 0.4430511634708538, "grad_norm": 6.347852184975662, "learning_rate": 6.154856143983544e-06, "loss": 17.5024, "step": 24238 }, { "epoch": 0.44306944266730036, "grad_norm": 6.4876102673276534, "learning_rate": 6.154568134401996e-06, "loss": 17.4501, "step": 24239 }, { "epoch": 0.4430877218637469, "grad_norm": 5.946846618681404, "learning_rate": 6.154280120773787e-06, "loss": 17.3774, "step": 24240 }, { "epoch": 0.4431060010601934, "grad_norm": 6.819805116977419, "learning_rate": 6.1539921030999276e-06, "loss": 17.2599, "step": 24241 }, { "epoch": 0.4431242802566399, "grad_norm": 6.4322762761306285, "learning_rate": 6.153704081381424e-06, "loss": 17.499, "step": 24242 }, { "epoch": 0.44314255945308645, "grad_norm": 7.1095276575998, "learning_rate": 6.153416055619289e-06, "loss": 17.6187, "step": 24243 }, { "epoch": 0.443160838649533, "grad_norm": 5.3262334183648665, "learning_rate": 6.15312802581453e-06, "loss": 16.8974, "step": 24244 }, { "epoch": 0.44317911784597946, "grad_norm": 6.892633490560132, "learning_rate": 6.152839991968159e-06, "loss": 17.5396, "step": 24245 }, { "epoch": 0.443197397042426, "grad_norm": 6.6684426234079615, "learning_rate": 6.152551954081183e-06, "loss": 17.5883, "step": 24246 }, { "epoch": 0.44321567623887254, "grad_norm": 5.857402012260117, "learning_rate": 6.152263912154611e-06, "loss": 17.3895, "step": 24247 }, { "epoch": 0.44323395543531907, "grad_norm": 6.58462638691802, "learning_rate": 6.151975866189455e-06, "loss": 17.6756, "step": 24248 }, { "epoch": 0.4432522346317656, "grad_norm": 8.599941133848265, "learning_rate": 6.151687816186725e-06, "loss": 18.5927, "step": 24249 }, { "epoch": 0.4432705138282121, "grad_norm": 6.610459640862656, "learning_rate": 6.151399762147428e-06, "loss": 17.3349, "step": 24250 }, { "epoch": 0.4432887930246586, "grad_norm": 6.320284833880291, "learning_rate": 6.151111704072574e-06, "loss": 17.5565, "step": 24251 }, { "epoch": 0.44330707222110516, "grad_norm": 6.754158347300993, "learning_rate": 6.150823641963174e-06, "loss": 17.9968, "step": 24252 }, { "epoch": 0.4433253514175517, "grad_norm": 6.009291161879157, "learning_rate": 6.150535575820237e-06, "loss": 17.4278, "step": 24253 }, { "epoch": 0.44334363061399823, "grad_norm": 6.194526641385256, "learning_rate": 6.150247505644773e-06, "loss": 17.327, "step": 24254 }, { "epoch": 0.4433619098104447, "grad_norm": 8.857128349580567, "learning_rate": 6.149959431437791e-06, "loss": 18.4417, "step": 24255 }, { "epoch": 0.44338018900689125, "grad_norm": 5.916802344099659, "learning_rate": 6.149671353200301e-06, "loss": 17.2905, "step": 24256 }, { "epoch": 0.4433984682033378, "grad_norm": 5.3010549313235895, "learning_rate": 6.149383270933311e-06, "loss": 17.1912, "step": 24257 }, { "epoch": 0.4434167473997843, "grad_norm": 6.769126587055146, "learning_rate": 6.149095184637834e-06, "loss": 18.0512, "step": 24258 }, { "epoch": 0.44343502659623085, "grad_norm": 6.034267195322498, "learning_rate": 6.148807094314879e-06, "loss": 17.3947, "step": 24259 }, { "epoch": 0.44345330579267733, "grad_norm": 6.846131522263476, "learning_rate": 6.148518999965454e-06, "loss": 17.6969, "step": 24260 }, { "epoch": 0.44347158498912387, "grad_norm": 6.654444558424248, "learning_rate": 6.148230901590568e-06, "loss": 17.5979, "step": 24261 }, { "epoch": 0.4434898641855704, "grad_norm": 5.927856863203363, "learning_rate": 6.147942799191235e-06, "loss": 17.3457, "step": 24262 }, { "epoch": 0.44350814338201694, "grad_norm": 5.881571836399706, "learning_rate": 6.147654692768461e-06, "loss": 17.3193, "step": 24263 }, { "epoch": 0.4435264225784635, "grad_norm": 6.040106129984872, "learning_rate": 6.1473665823232565e-06, "loss": 17.1444, "step": 24264 }, { "epoch": 0.44354470177490996, "grad_norm": 6.189774380879795, "learning_rate": 6.147078467856632e-06, "loss": 17.3509, "step": 24265 }, { "epoch": 0.4435629809713565, "grad_norm": 6.826063409108439, "learning_rate": 6.146790349369597e-06, "loss": 17.5885, "step": 24266 }, { "epoch": 0.443581260167803, "grad_norm": 6.001597727562739, "learning_rate": 6.146502226863161e-06, "loss": 17.4777, "step": 24267 }, { "epoch": 0.44359953936424956, "grad_norm": 8.585103529418781, "learning_rate": 6.146214100338335e-06, "loss": 18.3625, "step": 24268 }, { "epoch": 0.4436178185606961, "grad_norm": 6.588201065544275, "learning_rate": 6.1459259697961275e-06, "loss": 17.3728, "step": 24269 }, { "epoch": 0.4436360977571426, "grad_norm": 6.145629592495774, "learning_rate": 6.145637835237549e-06, "loss": 17.6649, "step": 24270 }, { "epoch": 0.4436543769535891, "grad_norm": 6.569078665093694, "learning_rate": 6.145349696663608e-06, "loss": 17.7227, "step": 24271 }, { "epoch": 0.44367265615003565, "grad_norm": 7.128844505822352, "learning_rate": 6.145061554075318e-06, "loss": 17.815, "step": 24272 }, { "epoch": 0.4436909353464822, "grad_norm": 5.4845654340425956, "learning_rate": 6.144773407473686e-06, "loss": 17.1778, "step": 24273 }, { "epoch": 0.4437092145429287, "grad_norm": 5.535464992401839, "learning_rate": 6.144485256859722e-06, "loss": 17.23, "step": 24274 }, { "epoch": 0.4437274937393752, "grad_norm": 5.522928988988442, "learning_rate": 6.144197102234436e-06, "loss": 17.0113, "step": 24275 }, { "epoch": 0.44374577293582174, "grad_norm": 6.9864170491744995, "learning_rate": 6.14390894359884e-06, "loss": 17.6287, "step": 24276 }, { "epoch": 0.44376405213226827, "grad_norm": 7.583486048710066, "learning_rate": 6.143620780953941e-06, "loss": 17.9127, "step": 24277 }, { "epoch": 0.4437823313287148, "grad_norm": 6.465241793642631, "learning_rate": 6.143332614300751e-06, "loss": 17.4757, "step": 24278 }, { "epoch": 0.4438006105251613, "grad_norm": 7.142216981775892, "learning_rate": 6.143044443640278e-06, "loss": 17.6552, "step": 24279 }, { "epoch": 0.4438188897216078, "grad_norm": 5.9695328808129435, "learning_rate": 6.142756268973536e-06, "loss": 17.2828, "step": 24280 }, { "epoch": 0.44383716891805436, "grad_norm": 6.9498493225935025, "learning_rate": 6.142468090301531e-06, "loss": 17.6509, "step": 24281 }, { "epoch": 0.4438554481145009, "grad_norm": 5.187061746346155, "learning_rate": 6.142179907625274e-06, "loss": 16.9073, "step": 24282 }, { "epoch": 0.44387372731094743, "grad_norm": 6.568881953882317, "learning_rate": 6.141891720945776e-06, "loss": 17.2912, "step": 24283 }, { "epoch": 0.4438920065073939, "grad_norm": 10.60555002158264, "learning_rate": 6.141603530264046e-06, "loss": 17.8106, "step": 24284 }, { "epoch": 0.44391028570384045, "grad_norm": 8.03583234211266, "learning_rate": 6.141315335581096e-06, "loss": 17.9401, "step": 24285 }, { "epoch": 0.443928564900287, "grad_norm": 7.243956753975113, "learning_rate": 6.141027136897935e-06, "loss": 17.7223, "step": 24286 }, { "epoch": 0.4439468440967335, "grad_norm": 7.500720564815997, "learning_rate": 6.140738934215572e-06, "loss": 17.6781, "step": 24287 }, { "epoch": 0.44396512329318005, "grad_norm": 6.193149165372164, "learning_rate": 6.140450727535018e-06, "loss": 17.6141, "step": 24288 }, { "epoch": 0.44398340248962653, "grad_norm": 7.03293458091303, "learning_rate": 6.140162516857283e-06, "loss": 17.5132, "step": 24289 }, { "epoch": 0.44400168168607307, "grad_norm": 7.1255967726294385, "learning_rate": 6.139874302183379e-06, "loss": 17.7558, "step": 24290 }, { "epoch": 0.4440199608825196, "grad_norm": 6.825648015376803, "learning_rate": 6.1395860835143125e-06, "loss": 17.4193, "step": 24291 }, { "epoch": 0.44403824007896614, "grad_norm": 7.630204429301696, "learning_rate": 6.139297860851097e-06, "loss": 17.326, "step": 24292 }, { "epoch": 0.4440565192754127, "grad_norm": 6.944416049690852, "learning_rate": 6.139009634194739e-06, "loss": 17.7461, "step": 24293 }, { "epoch": 0.44407479847185916, "grad_norm": 6.009494017367086, "learning_rate": 6.138721403546252e-06, "loss": 17.4291, "step": 24294 }, { "epoch": 0.4440930776683057, "grad_norm": 6.813643389771506, "learning_rate": 6.1384331689066475e-06, "loss": 17.8385, "step": 24295 }, { "epoch": 0.4441113568647522, "grad_norm": 7.091083405823428, "learning_rate": 6.138144930276931e-06, "loss": 17.6301, "step": 24296 }, { "epoch": 0.44412963606119876, "grad_norm": 7.7125590736864815, "learning_rate": 6.137856687658117e-06, "loss": 17.3912, "step": 24297 }, { "epoch": 0.4441479152576453, "grad_norm": 6.410050452714684, "learning_rate": 6.137568441051214e-06, "loss": 17.7583, "step": 24298 }, { "epoch": 0.4441661944540918, "grad_norm": 6.996499164391479, "learning_rate": 6.137280190457231e-06, "loss": 17.683, "step": 24299 }, { "epoch": 0.4441844736505383, "grad_norm": 5.371767244130282, "learning_rate": 6.1369919358771805e-06, "loss": 17.1344, "step": 24300 }, { "epoch": 0.44420275284698485, "grad_norm": 6.341875127461749, "learning_rate": 6.136703677312071e-06, "loss": 17.2414, "step": 24301 }, { "epoch": 0.4442210320434314, "grad_norm": 6.628741169783307, "learning_rate": 6.136415414762915e-06, "loss": 17.8268, "step": 24302 }, { "epoch": 0.4442393112398779, "grad_norm": 5.533741679658401, "learning_rate": 6.13612714823072e-06, "loss": 17.3195, "step": 24303 }, { "epoch": 0.4442575904363244, "grad_norm": 6.661046720425952, "learning_rate": 6.1358388777165e-06, "loss": 17.4677, "step": 24304 }, { "epoch": 0.44427586963277094, "grad_norm": 5.47171707160309, "learning_rate": 6.1355506032212635e-06, "loss": 17.1375, "step": 24305 }, { "epoch": 0.4442941488292175, "grad_norm": 6.598453224838951, "learning_rate": 6.135262324746017e-06, "loss": 17.418, "step": 24306 }, { "epoch": 0.444312428025664, "grad_norm": 6.932034174488093, "learning_rate": 6.1349740422917785e-06, "loss": 17.4595, "step": 24307 }, { "epoch": 0.44433070722211054, "grad_norm": 6.785210180932653, "learning_rate": 6.134685755859553e-06, "loss": 17.5935, "step": 24308 }, { "epoch": 0.444348986418557, "grad_norm": 6.757859135595257, "learning_rate": 6.134397465450353e-06, "loss": 17.3384, "step": 24309 }, { "epoch": 0.44436726561500356, "grad_norm": 6.447510426771404, "learning_rate": 6.1341091710651866e-06, "loss": 17.4765, "step": 24310 }, { "epoch": 0.4443855448114501, "grad_norm": 7.09644440450274, "learning_rate": 6.133820872705068e-06, "loss": 17.3937, "step": 24311 }, { "epoch": 0.44440382400789663, "grad_norm": 5.429525403196993, "learning_rate": 6.133532570371005e-06, "loss": 17.2287, "step": 24312 }, { "epoch": 0.4444221032043431, "grad_norm": 10.01770545589729, "learning_rate": 6.133244264064007e-06, "loss": 17.9477, "step": 24313 }, { "epoch": 0.44444038240078965, "grad_norm": 5.171950826862728, "learning_rate": 6.132955953785089e-06, "loss": 16.983, "step": 24314 }, { "epoch": 0.4444586615972362, "grad_norm": 7.792712040468101, "learning_rate": 6.132667639535257e-06, "loss": 17.5489, "step": 24315 }, { "epoch": 0.4444769407936827, "grad_norm": 6.560204077919434, "learning_rate": 6.132379321315522e-06, "loss": 17.6316, "step": 24316 }, { "epoch": 0.44449521999012925, "grad_norm": 5.694203920575719, "learning_rate": 6.1320909991268984e-06, "loss": 17.1112, "step": 24317 }, { "epoch": 0.44451349918657573, "grad_norm": 5.907759863783654, "learning_rate": 6.131802672970394e-06, "loss": 17.4768, "step": 24318 }, { "epoch": 0.44453177838302227, "grad_norm": 5.786420166290249, "learning_rate": 6.131514342847018e-06, "loss": 17.2686, "step": 24319 }, { "epoch": 0.4445500575794688, "grad_norm": 5.972920879570193, "learning_rate": 6.131226008757781e-06, "loss": 17.2622, "step": 24320 }, { "epoch": 0.44456833677591534, "grad_norm": 7.229629167659487, "learning_rate": 6.1309376707036986e-06, "loss": 17.5661, "step": 24321 }, { "epoch": 0.4445866159723619, "grad_norm": 8.328817984606994, "learning_rate": 6.130649328685776e-06, "loss": 17.6267, "step": 24322 }, { "epoch": 0.44460489516880836, "grad_norm": 6.499535407976775, "learning_rate": 6.130360982705026e-06, "loss": 17.4157, "step": 24323 }, { "epoch": 0.4446231743652549, "grad_norm": 7.7690598525152765, "learning_rate": 6.130072632762458e-06, "loss": 18.2608, "step": 24324 }, { "epoch": 0.4446414535617014, "grad_norm": 8.653909364164772, "learning_rate": 6.129784278859083e-06, "loss": 18.9854, "step": 24325 }, { "epoch": 0.44465973275814796, "grad_norm": 6.04688392428587, "learning_rate": 6.129495920995913e-06, "loss": 17.3662, "step": 24326 }, { "epoch": 0.4446780119545945, "grad_norm": 5.861343213502779, "learning_rate": 6.129207559173958e-06, "loss": 17.3296, "step": 24327 }, { "epoch": 0.444696291151041, "grad_norm": 7.353721394555597, "learning_rate": 6.128919193394231e-06, "loss": 17.9263, "step": 24328 }, { "epoch": 0.4447145703474875, "grad_norm": 6.899622820319202, "learning_rate": 6.128630823657735e-06, "loss": 17.7727, "step": 24329 }, { "epoch": 0.44473284954393405, "grad_norm": 7.335581877620247, "learning_rate": 6.128342449965488e-06, "loss": 17.8733, "step": 24330 }, { "epoch": 0.4447511287403806, "grad_norm": 5.702965774210911, "learning_rate": 6.1280540723185e-06, "loss": 17.1145, "step": 24331 }, { "epoch": 0.4447694079368271, "grad_norm": 6.254112132783163, "learning_rate": 6.127765690717781e-06, "loss": 17.3912, "step": 24332 }, { "epoch": 0.4447876871332736, "grad_norm": 6.160431164341467, "learning_rate": 6.127477305164339e-06, "loss": 17.5638, "step": 24333 }, { "epoch": 0.44480596632972014, "grad_norm": 4.911584992933277, "learning_rate": 6.127188915659186e-06, "loss": 16.855, "step": 24334 }, { "epoch": 0.4448242455261667, "grad_norm": 7.166647929728161, "learning_rate": 6.126900522203336e-06, "loss": 17.8046, "step": 24335 }, { "epoch": 0.4448425247226132, "grad_norm": 7.861798426693862, "learning_rate": 6.126612124797797e-06, "loss": 18.479, "step": 24336 }, { "epoch": 0.44486080391905974, "grad_norm": 6.754642858429004, "learning_rate": 6.12632372344358e-06, "loss": 17.7218, "step": 24337 }, { "epoch": 0.4448790831155062, "grad_norm": 7.082069116742022, "learning_rate": 6.126035318141694e-06, "loss": 17.8935, "step": 24338 }, { "epoch": 0.44489736231195276, "grad_norm": 5.984734050673375, "learning_rate": 6.1257469088931556e-06, "loss": 17.2154, "step": 24339 }, { "epoch": 0.4449156415083993, "grad_norm": 6.255999503580697, "learning_rate": 6.125458495698971e-06, "loss": 17.597, "step": 24340 }, { "epoch": 0.44493392070484583, "grad_norm": 5.921269898866655, "learning_rate": 6.12517007856015e-06, "loss": 17.6401, "step": 24341 }, { "epoch": 0.44495219990129237, "grad_norm": 8.369976768902918, "learning_rate": 6.124881657477707e-06, "loss": 17.893, "step": 24342 }, { "epoch": 0.44497047909773885, "grad_norm": 6.758463596355022, "learning_rate": 6.124593232452652e-06, "loss": 17.5642, "step": 24343 }, { "epoch": 0.4449887582941854, "grad_norm": 5.48942460446099, "learning_rate": 6.124304803485994e-06, "loss": 17.0318, "step": 24344 }, { "epoch": 0.4450070374906319, "grad_norm": 7.397430182434983, "learning_rate": 6.124016370578747e-06, "loss": 17.5278, "step": 24345 }, { "epoch": 0.44502531668707845, "grad_norm": 6.604856253075043, "learning_rate": 6.123727933731918e-06, "loss": 17.6773, "step": 24346 }, { "epoch": 0.44504359588352493, "grad_norm": 6.016318657053202, "learning_rate": 6.1234394929465206e-06, "loss": 17.0158, "step": 24347 }, { "epoch": 0.44506187507997147, "grad_norm": 5.993272945402518, "learning_rate": 6.123151048223565e-06, "loss": 17.2964, "step": 24348 }, { "epoch": 0.445080154276418, "grad_norm": 7.895884841218638, "learning_rate": 6.1228625995640645e-06, "loss": 18.2877, "step": 24349 }, { "epoch": 0.44509843347286454, "grad_norm": 7.822172859346337, "learning_rate": 6.122574146969026e-06, "loss": 17.9766, "step": 24350 }, { "epoch": 0.4451167126693111, "grad_norm": 4.894234732054747, "learning_rate": 6.122285690439464e-06, "loss": 16.969, "step": 24351 }, { "epoch": 0.44513499186575756, "grad_norm": 5.545973586692187, "learning_rate": 6.121997229976387e-06, "loss": 17.0296, "step": 24352 }, { "epoch": 0.4451532710622041, "grad_norm": 7.603804127291949, "learning_rate": 6.121708765580807e-06, "loss": 17.7488, "step": 24353 }, { "epoch": 0.44517155025865063, "grad_norm": 6.185939227796631, "learning_rate": 6.121420297253735e-06, "loss": 17.534, "step": 24354 }, { "epoch": 0.44518982945509716, "grad_norm": 5.910913781325826, "learning_rate": 6.121131824996183e-06, "loss": 17.3078, "step": 24355 }, { "epoch": 0.4452081086515437, "grad_norm": 5.5531843054562, "learning_rate": 6.1208433488091604e-06, "loss": 17.0702, "step": 24356 }, { "epoch": 0.4452263878479902, "grad_norm": 6.177239257868246, "learning_rate": 6.12055486869368e-06, "loss": 17.5976, "step": 24357 }, { "epoch": 0.4452446670444367, "grad_norm": 6.5302201683004935, "learning_rate": 6.1202663846507505e-06, "loss": 17.3438, "step": 24358 }, { "epoch": 0.44526294624088325, "grad_norm": 6.506877468055951, "learning_rate": 6.119977896681387e-06, "loss": 17.9006, "step": 24359 }, { "epoch": 0.4452812254373298, "grad_norm": 7.115421182349851, "learning_rate": 6.1196894047865964e-06, "loss": 17.5706, "step": 24360 }, { "epoch": 0.4452995046337763, "grad_norm": 6.869706757941675, "learning_rate": 6.119400908967391e-06, "loss": 17.8241, "step": 24361 }, { "epoch": 0.4453177838302228, "grad_norm": 6.170737916919833, "learning_rate": 6.119112409224783e-06, "loss": 17.3096, "step": 24362 }, { "epoch": 0.44533606302666934, "grad_norm": 6.320784406038538, "learning_rate": 6.118823905559785e-06, "loss": 17.5181, "step": 24363 }, { "epoch": 0.4453543422231159, "grad_norm": 7.279107281238489, "learning_rate": 6.1185353979734055e-06, "loss": 17.5462, "step": 24364 }, { "epoch": 0.4453726214195624, "grad_norm": 6.995689565213354, "learning_rate": 6.118246886466655e-06, "loss": 17.8895, "step": 24365 }, { "epoch": 0.44539090061600894, "grad_norm": 7.393614652143189, "learning_rate": 6.117958371040548e-06, "loss": 17.9326, "step": 24366 }, { "epoch": 0.4454091798124554, "grad_norm": 7.519159012904411, "learning_rate": 6.1176698516960916e-06, "loss": 17.8713, "step": 24367 }, { "epoch": 0.44542745900890196, "grad_norm": 5.349614964005685, "learning_rate": 6.117381328434302e-06, "loss": 17.0897, "step": 24368 }, { "epoch": 0.4454457382053485, "grad_norm": 5.568668906025615, "learning_rate": 6.117092801256186e-06, "loss": 17.0726, "step": 24369 }, { "epoch": 0.44546401740179503, "grad_norm": 5.97316198049961, "learning_rate": 6.1168042701627574e-06, "loss": 17.3952, "step": 24370 }, { "epoch": 0.44548229659824157, "grad_norm": 5.999665770267773, "learning_rate": 6.116515735155026e-06, "loss": 17.5104, "step": 24371 }, { "epoch": 0.44550057579468805, "grad_norm": 7.194786978556543, "learning_rate": 6.116227196234005e-06, "loss": 17.5944, "step": 24372 }, { "epoch": 0.4455188549911346, "grad_norm": 5.967959370906969, "learning_rate": 6.115938653400705e-06, "loss": 17.2163, "step": 24373 }, { "epoch": 0.4455371341875811, "grad_norm": 6.690550135980042, "learning_rate": 6.115650106656134e-06, "loss": 17.3666, "step": 24374 }, { "epoch": 0.44555541338402765, "grad_norm": 5.803152695053935, "learning_rate": 6.115361556001308e-06, "loss": 17.4216, "step": 24375 }, { "epoch": 0.4455736925804742, "grad_norm": 6.895514055682177, "learning_rate": 6.1150730014372375e-06, "loss": 17.557, "step": 24376 }, { "epoch": 0.44559197177692067, "grad_norm": 6.313370438085639, "learning_rate": 6.114784442964932e-06, "loss": 17.2118, "step": 24377 }, { "epoch": 0.4456102509733672, "grad_norm": 6.0538085555126235, "learning_rate": 6.114495880585404e-06, "loss": 17.1377, "step": 24378 }, { "epoch": 0.44562853016981374, "grad_norm": 6.642174924296055, "learning_rate": 6.114207314299662e-06, "loss": 17.888, "step": 24379 }, { "epoch": 0.4456468093662603, "grad_norm": 7.1705988045603, "learning_rate": 6.1139187441087246e-06, "loss": 17.6493, "step": 24380 }, { "epoch": 0.44566508856270676, "grad_norm": 6.5318335551960915, "learning_rate": 6.113630170013596e-06, "loss": 17.3605, "step": 24381 }, { "epoch": 0.4456833677591533, "grad_norm": 7.349949186536805, "learning_rate": 6.11334159201529e-06, "loss": 18.0314, "step": 24382 }, { "epoch": 0.44570164695559983, "grad_norm": 6.590783839230621, "learning_rate": 6.11305301011482e-06, "loss": 17.5504, "step": 24383 }, { "epoch": 0.44571992615204636, "grad_norm": 6.166497304071057, "learning_rate": 6.1127644243131945e-06, "loss": 17.3252, "step": 24384 }, { "epoch": 0.4457382053484929, "grad_norm": 7.171042314064051, "learning_rate": 6.112475834611426e-06, "loss": 17.6472, "step": 24385 }, { "epoch": 0.4457564845449394, "grad_norm": 7.9448358019926735, "learning_rate": 6.112187241010527e-06, "loss": 18.3846, "step": 24386 }, { "epoch": 0.4457747637413859, "grad_norm": 6.725274435039454, "learning_rate": 6.111898643511509e-06, "loss": 17.7046, "step": 24387 }, { "epoch": 0.44579304293783245, "grad_norm": 6.3831455941825554, "learning_rate": 6.111610042115381e-06, "loss": 17.5592, "step": 24388 }, { "epoch": 0.445811322134279, "grad_norm": 5.259620222601401, "learning_rate": 6.111321436823157e-06, "loss": 17.1178, "step": 24389 }, { "epoch": 0.4458296013307255, "grad_norm": 5.712018163076865, "learning_rate": 6.11103282763585e-06, "loss": 17.2385, "step": 24390 }, { "epoch": 0.445847880527172, "grad_norm": 6.489322288991205, "learning_rate": 6.110744214554467e-06, "loss": 17.3642, "step": 24391 }, { "epoch": 0.44586615972361854, "grad_norm": 6.893296290587616, "learning_rate": 6.110455597580022e-06, "loss": 17.5147, "step": 24392 }, { "epoch": 0.4458844389200651, "grad_norm": 5.634470184937584, "learning_rate": 6.110166976713525e-06, "loss": 17.1702, "step": 24393 }, { "epoch": 0.4459027181165116, "grad_norm": 7.946888191806957, "learning_rate": 6.109878351955992e-06, "loss": 18.0497, "step": 24394 }, { "epoch": 0.44592099731295815, "grad_norm": 7.873532003521282, "learning_rate": 6.10958972330843e-06, "loss": 18.1097, "step": 24395 }, { "epoch": 0.4459392765094046, "grad_norm": 8.331721818258567, "learning_rate": 6.109301090771853e-06, "loss": 17.5412, "step": 24396 }, { "epoch": 0.44595755570585116, "grad_norm": 6.31519033379773, "learning_rate": 6.109012454347272e-06, "loss": 17.3509, "step": 24397 }, { "epoch": 0.4459758349022977, "grad_norm": 8.558776793205876, "learning_rate": 6.108723814035697e-06, "loss": 18.4301, "step": 24398 }, { "epoch": 0.44599411409874423, "grad_norm": 6.507503840989197, "learning_rate": 6.108435169838143e-06, "loss": 17.6548, "step": 24399 }, { "epoch": 0.44601239329519077, "grad_norm": 5.944197870978429, "learning_rate": 6.108146521755619e-06, "loss": 17.2269, "step": 24400 }, { "epoch": 0.44603067249163725, "grad_norm": 5.1432332411241815, "learning_rate": 6.107857869789139e-06, "loss": 17.158, "step": 24401 }, { "epoch": 0.4460489516880838, "grad_norm": 6.723672584918994, "learning_rate": 6.107569213939712e-06, "loss": 17.4827, "step": 24402 }, { "epoch": 0.4460672308845303, "grad_norm": 8.294100445087008, "learning_rate": 6.107280554208351e-06, "loss": 17.8669, "step": 24403 }, { "epoch": 0.44608551008097685, "grad_norm": 7.0036918377540776, "learning_rate": 6.106991890596069e-06, "loss": 17.6244, "step": 24404 }, { "epoch": 0.4461037892774234, "grad_norm": 6.762475181686122, "learning_rate": 6.106703223103876e-06, "loss": 17.6889, "step": 24405 }, { "epoch": 0.44612206847386987, "grad_norm": 7.7299651463420425, "learning_rate": 6.106414551732782e-06, "loss": 17.7296, "step": 24406 }, { "epoch": 0.4461403476703164, "grad_norm": 5.927754323053762, "learning_rate": 6.1061258764838025e-06, "loss": 17.4274, "step": 24407 }, { "epoch": 0.44615862686676294, "grad_norm": 7.176967531030489, "learning_rate": 6.105837197357949e-06, "loss": 17.7028, "step": 24408 }, { "epoch": 0.4461769060632095, "grad_norm": 8.012934018334555, "learning_rate": 6.105548514356232e-06, "loss": 18.5327, "step": 24409 }, { "epoch": 0.446195185259656, "grad_norm": 7.480270741527926, "learning_rate": 6.105259827479662e-06, "loss": 17.9369, "step": 24410 }, { "epoch": 0.4462134644561025, "grad_norm": 6.2645173973984996, "learning_rate": 6.104971136729253e-06, "loss": 17.4705, "step": 24411 }, { "epoch": 0.44623174365254903, "grad_norm": 6.237915343950155, "learning_rate": 6.104682442106016e-06, "loss": 17.6889, "step": 24412 }, { "epoch": 0.44625002284899556, "grad_norm": 5.41888111081362, "learning_rate": 6.1043937436109626e-06, "loss": 17.0488, "step": 24413 }, { "epoch": 0.4462683020454421, "grad_norm": 5.957470474484199, "learning_rate": 6.104105041245106e-06, "loss": 17.3516, "step": 24414 }, { "epoch": 0.4462865812418886, "grad_norm": 5.838828191115028, "learning_rate": 6.103816335009455e-06, "loss": 17.174, "step": 24415 }, { "epoch": 0.4463048604383351, "grad_norm": 6.069141699449847, "learning_rate": 6.1035276249050246e-06, "loss": 17.0595, "step": 24416 }, { "epoch": 0.44632313963478165, "grad_norm": 7.87473920273802, "learning_rate": 6.103238910932825e-06, "loss": 17.7671, "step": 24417 }, { "epoch": 0.4463414188312282, "grad_norm": 6.3488164438989685, "learning_rate": 6.102950193093871e-06, "loss": 17.5785, "step": 24418 }, { "epoch": 0.4463596980276747, "grad_norm": 5.646912176149879, "learning_rate": 6.102661471389171e-06, "loss": 17.1245, "step": 24419 }, { "epoch": 0.4463779772241212, "grad_norm": 5.528284778019849, "learning_rate": 6.1023727458197355e-06, "loss": 17.166, "step": 24420 }, { "epoch": 0.44639625642056774, "grad_norm": 6.485533940654361, "learning_rate": 6.102084016386583e-06, "loss": 17.707, "step": 24421 }, { "epoch": 0.4464145356170143, "grad_norm": 7.741099090791554, "learning_rate": 6.101795283090721e-06, "loss": 18.1211, "step": 24422 }, { "epoch": 0.4464328148134608, "grad_norm": 6.9229104341689025, "learning_rate": 6.101506545933161e-06, "loss": 17.6524, "step": 24423 }, { "epoch": 0.44645109400990735, "grad_norm": 6.377767345759849, "learning_rate": 6.101217804914917e-06, "loss": 17.6627, "step": 24424 }, { "epoch": 0.4464693732063538, "grad_norm": 5.490576776855622, "learning_rate": 6.1009290600369995e-06, "loss": 17.3771, "step": 24425 }, { "epoch": 0.44648765240280036, "grad_norm": 5.651945789549618, "learning_rate": 6.100640311300421e-06, "loss": 17.0331, "step": 24426 }, { "epoch": 0.4465059315992469, "grad_norm": 5.449373409515362, "learning_rate": 6.100351558706194e-06, "loss": 17.0476, "step": 24427 }, { "epoch": 0.44652421079569343, "grad_norm": 8.799801754872561, "learning_rate": 6.100062802255331e-06, "loss": 17.9051, "step": 24428 }, { "epoch": 0.44654248999213997, "grad_norm": 5.901880458321972, "learning_rate": 6.099774041948843e-06, "loss": 17.3573, "step": 24429 }, { "epoch": 0.44656076918858645, "grad_norm": 8.634143831220134, "learning_rate": 6.099485277787741e-06, "loss": 18.4925, "step": 24430 }, { "epoch": 0.446579048385033, "grad_norm": 7.463467214231075, "learning_rate": 6.09919650977304e-06, "loss": 17.6599, "step": 24431 }, { "epoch": 0.4465973275814795, "grad_norm": 5.546747571313554, "learning_rate": 6.0989077379057516e-06, "loss": 17.301, "step": 24432 }, { "epoch": 0.44661560677792606, "grad_norm": 6.397673045441741, "learning_rate": 6.098618962186884e-06, "loss": 17.5818, "step": 24433 }, { "epoch": 0.4466338859743726, "grad_norm": 6.2708999992759145, "learning_rate": 6.098330182617453e-06, "loss": 17.3073, "step": 24434 }, { "epoch": 0.44665216517081907, "grad_norm": 6.074037112045683, "learning_rate": 6.098041399198473e-06, "loss": 17.3755, "step": 24435 }, { "epoch": 0.4466704443672656, "grad_norm": 5.556667156349534, "learning_rate": 6.097752611930951e-06, "loss": 17.13, "step": 24436 }, { "epoch": 0.44668872356371214, "grad_norm": 6.359632585710636, "learning_rate": 6.097463820815901e-06, "loss": 17.6939, "step": 24437 }, { "epoch": 0.4467070027601587, "grad_norm": 6.201687773142437, "learning_rate": 6.0971750258543346e-06, "loss": 17.4796, "step": 24438 }, { "epoch": 0.4467252819566052, "grad_norm": 5.467661989849664, "learning_rate": 6.096886227047267e-06, "loss": 16.8875, "step": 24439 }, { "epoch": 0.4467435611530517, "grad_norm": 7.009486146427265, "learning_rate": 6.0965974243957086e-06, "loss": 17.646, "step": 24440 }, { "epoch": 0.44676184034949823, "grad_norm": 7.013412644793047, "learning_rate": 6.09630861790067e-06, "loss": 17.7202, "step": 24441 }, { "epoch": 0.44678011954594476, "grad_norm": 6.289208607952947, "learning_rate": 6.096019807563165e-06, "loss": 17.4421, "step": 24442 }, { "epoch": 0.4467983987423913, "grad_norm": 6.966372039594387, "learning_rate": 6.0957309933842065e-06, "loss": 17.7467, "step": 24443 }, { "epoch": 0.44681667793883784, "grad_norm": 6.863189672967621, "learning_rate": 6.0954421753648056e-06, "loss": 17.4474, "step": 24444 }, { "epoch": 0.4468349571352843, "grad_norm": 4.46617239274621, "learning_rate": 6.095153353505976e-06, "loss": 16.6909, "step": 24445 }, { "epoch": 0.44685323633173085, "grad_norm": 5.825656605819203, "learning_rate": 6.094864527808727e-06, "loss": 17.0685, "step": 24446 }, { "epoch": 0.4468715155281774, "grad_norm": 4.714162601204519, "learning_rate": 6.0945756982740725e-06, "loss": 16.9861, "step": 24447 }, { "epoch": 0.4468897947246239, "grad_norm": 6.777702908399464, "learning_rate": 6.094286864903026e-06, "loss": 17.6843, "step": 24448 }, { "epoch": 0.4469080739210704, "grad_norm": 6.834913335229522, "learning_rate": 6.0939980276966e-06, "loss": 17.7261, "step": 24449 }, { "epoch": 0.44692635311751694, "grad_norm": 6.745095667864185, "learning_rate": 6.093709186655805e-06, "loss": 17.845, "step": 24450 }, { "epoch": 0.4469446323139635, "grad_norm": 5.955249358373833, "learning_rate": 6.093420341781655e-06, "loss": 17.3205, "step": 24451 }, { "epoch": 0.44696291151041, "grad_norm": 7.981867678401039, "learning_rate": 6.0931314930751606e-06, "loss": 17.9401, "step": 24452 }, { "epoch": 0.44698119070685655, "grad_norm": 7.955055418987271, "learning_rate": 6.092842640537336e-06, "loss": 18.6077, "step": 24453 }, { "epoch": 0.446999469903303, "grad_norm": 8.27575083597538, "learning_rate": 6.0925537841691906e-06, "loss": 18.028, "step": 24454 }, { "epoch": 0.44701774909974956, "grad_norm": 6.397660865664258, "learning_rate": 6.092264923971742e-06, "loss": 17.3809, "step": 24455 }, { "epoch": 0.4470360282961961, "grad_norm": 6.529630593388632, "learning_rate": 6.091976059945998e-06, "loss": 17.7047, "step": 24456 }, { "epoch": 0.44705430749264263, "grad_norm": 6.15868748906537, "learning_rate": 6.091687192092972e-06, "loss": 17.0119, "step": 24457 }, { "epoch": 0.44707258668908917, "grad_norm": 7.1100094140139865, "learning_rate": 6.091398320413679e-06, "loss": 17.6149, "step": 24458 }, { "epoch": 0.44709086588553565, "grad_norm": 7.13538518829046, "learning_rate": 6.091109444909129e-06, "loss": 17.9354, "step": 24459 }, { "epoch": 0.4471091450819822, "grad_norm": 10.354510507253659, "learning_rate": 6.090820565580333e-06, "loss": 18.9472, "step": 24460 }, { "epoch": 0.4471274242784287, "grad_norm": 7.783071261002511, "learning_rate": 6.090531682428306e-06, "loss": 18.0399, "step": 24461 }, { "epoch": 0.44714570347487526, "grad_norm": 6.401935706870166, "learning_rate": 6.090242795454062e-06, "loss": 17.436, "step": 24462 }, { "epoch": 0.4471639826713218, "grad_norm": 4.558443586842041, "learning_rate": 6.089953904658612e-06, "loss": 16.6771, "step": 24463 }, { "epoch": 0.44718226186776827, "grad_norm": 6.583926730349258, "learning_rate": 6.089665010042968e-06, "loss": 17.4754, "step": 24464 }, { "epoch": 0.4472005410642148, "grad_norm": 8.632820674805483, "learning_rate": 6.089376111608141e-06, "loss": 18.8808, "step": 24465 }, { "epoch": 0.44721882026066134, "grad_norm": 7.053539542181739, "learning_rate": 6.089087209355147e-06, "loss": 17.3104, "step": 24466 }, { "epoch": 0.4472370994571079, "grad_norm": 7.452025530751659, "learning_rate": 6.088798303284995e-06, "loss": 17.9288, "step": 24467 }, { "epoch": 0.4472553786535544, "grad_norm": 6.336280300716763, "learning_rate": 6.088509393398701e-06, "loss": 17.484, "step": 24468 }, { "epoch": 0.4472736578500009, "grad_norm": 6.085966663598267, "learning_rate": 6.088220479697274e-06, "loss": 16.9977, "step": 24469 }, { "epoch": 0.44729193704644743, "grad_norm": 5.044427867867533, "learning_rate": 6.087931562181731e-06, "loss": 16.9515, "step": 24470 }, { "epoch": 0.44731021624289397, "grad_norm": 7.232597980795171, "learning_rate": 6.087642640853081e-06, "loss": 17.8035, "step": 24471 }, { "epoch": 0.4473284954393405, "grad_norm": 5.801893114607026, "learning_rate": 6.087353715712337e-06, "loss": 17.2479, "step": 24472 }, { "epoch": 0.44734677463578704, "grad_norm": 8.394168635213568, "learning_rate": 6.087064786760516e-06, "loss": 18.3203, "step": 24473 }, { "epoch": 0.4473650538322335, "grad_norm": 6.320306523985644, "learning_rate": 6.086775853998623e-06, "loss": 17.3289, "step": 24474 }, { "epoch": 0.44738333302868005, "grad_norm": 8.609223173205471, "learning_rate": 6.086486917427678e-06, "loss": 18.4674, "step": 24475 }, { "epoch": 0.4474016122251266, "grad_norm": 6.310444867558349, "learning_rate": 6.086197977048689e-06, "loss": 17.4766, "step": 24476 }, { "epoch": 0.4474198914215731, "grad_norm": 6.807482670978645, "learning_rate": 6.085909032862671e-06, "loss": 17.4981, "step": 24477 }, { "epoch": 0.44743817061801966, "grad_norm": 7.996647139645574, "learning_rate": 6.0856200848706375e-06, "loss": 17.9355, "step": 24478 }, { "epoch": 0.44745644981446614, "grad_norm": 5.510315706969526, "learning_rate": 6.085331133073596e-06, "loss": 17.2577, "step": 24479 }, { "epoch": 0.4474747290109127, "grad_norm": 6.080993520086858, "learning_rate": 6.085042177472567e-06, "loss": 17.5098, "step": 24480 }, { "epoch": 0.4474930082073592, "grad_norm": 6.2517235184031215, "learning_rate": 6.084753218068557e-06, "loss": 17.4185, "step": 24481 }, { "epoch": 0.44751128740380575, "grad_norm": 5.88677226674453, "learning_rate": 6.084464254862582e-06, "loss": 17.347, "step": 24482 }, { "epoch": 0.4475295666002522, "grad_norm": 5.411578797994535, "learning_rate": 6.084175287855654e-06, "loss": 16.9384, "step": 24483 }, { "epoch": 0.44754784579669876, "grad_norm": 6.045058578793596, "learning_rate": 6.0838863170487846e-06, "loss": 17.1605, "step": 24484 }, { "epoch": 0.4475661249931453, "grad_norm": 6.455967636022619, "learning_rate": 6.083597342442989e-06, "loss": 17.7157, "step": 24485 }, { "epoch": 0.44758440418959183, "grad_norm": 6.179426288420043, "learning_rate": 6.083308364039279e-06, "loss": 17.6575, "step": 24486 }, { "epoch": 0.44760268338603837, "grad_norm": 5.652262027740546, "learning_rate": 6.083019381838666e-06, "loss": 17.0972, "step": 24487 }, { "epoch": 0.44762096258248485, "grad_norm": 5.54168902868514, "learning_rate": 6.082730395842165e-06, "loss": 16.751, "step": 24488 }, { "epoch": 0.4476392417789314, "grad_norm": 5.32191399402457, "learning_rate": 6.0824414060507865e-06, "loss": 16.9165, "step": 24489 }, { "epoch": 0.4476575209753779, "grad_norm": 5.216734970115658, "learning_rate": 6.082152412465546e-06, "loss": 16.8919, "step": 24490 }, { "epoch": 0.44767580017182446, "grad_norm": 5.817938937234099, "learning_rate": 6.0818634150874554e-06, "loss": 17.2652, "step": 24491 }, { "epoch": 0.447694079368271, "grad_norm": 7.522501099547367, "learning_rate": 6.081574413917527e-06, "loss": 17.6549, "step": 24492 }, { "epoch": 0.44771235856471747, "grad_norm": 5.931479171545933, "learning_rate": 6.081285408956773e-06, "loss": 17.3432, "step": 24493 }, { "epoch": 0.447730637761164, "grad_norm": 4.41491681837655, "learning_rate": 6.08099640020621e-06, "loss": 16.6533, "step": 24494 }, { "epoch": 0.44774891695761054, "grad_norm": 5.703115331221978, "learning_rate": 6.080707387666847e-06, "loss": 17.5828, "step": 24495 }, { "epoch": 0.4477671961540571, "grad_norm": 5.684241655245052, "learning_rate": 6.080418371339698e-06, "loss": 17.2495, "step": 24496 }, { "epoch": 0.4477854753505036, "grad_norm": 5.922163493167484, "learning_rate": 6.0801293512257765e-06, "loss": 17.3535, "step": 24497 }, { "epoch": 0.4478037545469501, "grad_norm": 6.7646213945996605, "learning_rate": 6.079840327326095e-06, "loss": 17.7324, "step": 24498 }, { "epoch": 0.44782203374339663, "grad_norm": 7.056552852321804, "learning_rate": 6.079551299641667e-06, "loss": 17.7943, "step": 24499 }, { "epoch": 0.44784031293984317, "grad_norm": 6.695332757115799, "learning_rate": 6.079262268173506e-06, "loss": 17.8052, "step": 24500 }, { "epoch": 0.4478585921362897, "grad_norm": 6.137749675423967, "learning_rate": 6.078973232922625e-06, "loss": 17.4122, "step": 24501 }, { "epoch": 0.44787687133273624, "grad_norm": 6.932955082810483, "learning_rate": 6.078684193890036e-06, "loss": 17.692, "step": 24502 }, { "epoch": 0.4478951505291827, "grad_norm": 7.238481088020135, "learning_rate": 6.078395151076751e-06, "loss": 17.6842, "step": 24503 }, { "epoch": 0.44791342972562925, "grad_norm": 6.353353648148848, "learning_rate": 6.078106104483787e-06, "loss": 17.4092, "step": 24504 }, { "epoch": 0.4479317089220758, "grad_norm": 5.394112992320563, "learning_rate": 6.077817054112153e-06, "loss": 17.0963, "step": 24505 }, { "epoch": 0.4479499881185223, "grad_norm": 6.6213831348714125, "learning_rate": 6.077527999962863e-06, "loss": 17.4994, "step": 24506 }, { "epoch": 0.44796826731496886, "grad_norm": 6.238180867135258, "learning_rate": 6.0772389420369315e-06, "loss": 17.5379, "step": 24507 }, { "epoch": 0.44798654651141534, "grad_norm": 6.918233957899457, "learning_rate": 6.076949880335373e-06, "loss": 17.6738, "step": 24508 }, { "epoch": 0.4480048257078619, "grad_norm": 5.578343451360341, "learning_rate": 6.0766608148591965e-06, "loss": 17.0726, "step": 24509 }, { "epoch": 0.4480231049043084, "grad_norm": 6.591828819491328, "learning_rate": 6.0763717456094185e-06, "loss": 17.4182, "step": 24510 }, { "epoch": 0.44804138410075495, "grad_norm": 5.434818719766534, "learning_rate": 6.0760826725870506e-06, "loss": 17.2368, "step": 24511 }, { "epoch": 0.4480596632972015, "grad_norm": 6.644255578742306, "learning_rate": 6.075793595793106e-06, "loss": 17.6142, "step": 24512 }, { "epoch": 0.44807794249364796, "grad_norm": 6.334112684827274, "learning_rate": 6.075504515228597e-06, "loss": 17.2672, "step": 24513 }, { "epoch": 0.4480962216900945, "grad_norm": 5.539732552447868, "learning_rate": 6.075215430894541e-06, "loss": 17.2791, "step": 24514 }, { "epoch": 0.44811450088654103, "grad_norm": 6.359708444980066, "learning_rate": 6.074926342791945e-06, "loss": 17.3534, "step": 24515 }, { "epoch": 0.44813278008298757, "grad_norm": 6.455018272419638, "learning_rate": 6.0746372509218264e-06, "loss": 17.3637, "step": 24516 }, { "epoch": 0.44815105927943405, "grad_norm": 4.894673736300759, "learning_rate": 6.074348155285198e-06, "loss": 16.7497, "step": 24517 }, { "epoch": 0.4481693384758806, "grad_norm": 6.136956746908434, "learning_rate": 6.074059055883074e-06, "loss": 17.667, "step": 24518 }, { "epoch": 0.4481876176723271, "grad_norm": 7.35503859221064, "learning_rate": 6.073769952716465e-06, "loss": 17.8807, "step": 24519 }, { "epoch": 0.44820589686877366, "grad_norm": 7.248474537555424, "learning_rate": 6.073480845786384e-06, "loss": 17.9835, "step": 24520 }, { "epoch": 0.4482241760652202, "grad_norm": 7.084398277839027, "learning_rate": 6.073191735093848e-06, "loss": 18.3454, "step": 24521 }, { "epoch": 0.4482424552616667, "grad_norm": 5.520930994534029, "learning_rate": 6.072902620639867e-06, "loss": 17.1094, "step": 24522 }, { "epoch": 0.4482607344581132, "grad_norm": 6.646691813263984, "learning_rate": 6.0726135024254555e-06, "loss": 17.3124, "step": 24523 }, { "epoch": 0.44827901365455974, "grad_norm": 6.089075594839369, "learning_rate": 6.072324380451626e-06, "loss": 17.284, "step": 24524 }, { "epoch": 0.4482972928510063, "grad_norm": 5.835320371397225, "learning_rate": 6.072035254719394e-06, "loss": 17.2227, "step": 24525 }, { "epoch": 0.4483155720474528, "grad_norm": 7.1010148801621185, "learning_rate": 6.0717461252297706e-06, "loss": 17.6833, "step": 24526 }, { "epoch": 0.4483338512438993, "grad_norm": 7.871774532027448, "learning_rate": 6.071456991983771e-06, "loss": 17.5686, "step": 24527 }, { "epoch": 0.44835213044034583, "grad_norm": 6.50344425580137, "learning_rate": 6.071167854982406e-06, "loss": 17.5485, "step": 24528 }, { "epoch": 0.44837040963679237, "grad_norm": 5.7587372362784865, "learning_rate": 6.070878714226691e-06, "loss": 17.1437, "step": 24529 }, { "epoch": 0.4483886888332389, "grad_norm": 7.974223587236758, "learning_rate": 6.07058956971764e-06, "loss": 17.9765, "step": 24530 }, { "epoch": 0.44840696802968544, "grad_norm": 5.616838625710783, "learning_rate": 6.070300421456264e-06, "loss": 17.3922, "step": 24531 }, { "epoch": 0.4484252472261319, "grad_norm": 6.365724697268386, "learning_rate": 6.070011269443581e-06, "loss": 17.5502, "step": 24532 }, { "epoch": 0.44844352642257845, "grad_norm": 5.381085605556312, "learning_rate": 6.0697221136805975e-06, "loss": 17.1453, "step": 24533 }, { "epoch": 0.448461805619025, "grad_norm": 7.3249294609347375, "learning_rate": 6.069432954168333e-06, "loss": 17.647, "step": 24534 }, { "epoch": 0.4484800848154715, "grad_norm": 6.86413334020217, "learning_rate": 6.069143790907799e-06, "loss": 17.7492, "step": 24535 }, { "epoch": 0.44849836401191806, "grad_norm": 6.857971680791453, "learning_rate": 6.068854623900008e-06, "loss": 17.7351, "step": 24536 }, { "epoch": 0.44851664320836454, "grad_norm": 6.371194485979414, "learning_rate": 6.068565453145975e-06, "loss": 17.5003, "step": 24537 }, { "epoch": 0.4485349224048111, "grad_norm": 7.202973564893784, "learning_rate": 6.068276278646711e-06, "loss": 17.9177, "step": 24538 }, { "epoch": 0.4485532016012576, "grad_norm": 6.680424328765258, "learning_rate": 6.067987100403233e-06, "loss": 17.7232, "step": 24539 }, { "epoch": 0.44857148079770415, "grad_norm": 6.039204159314783, "learning_rate": 6.067697918416553e-06, "loss": 17.2954, "step": 24540 }, { "epoch": 0.4485897599941507, "grad_norm": 5.480267338243896, "learning_rate": 6.067408732687684e-06, "loss": 17.2093, "step": 24541 }, { "epoch": 0.44860803919059716, "grad_norm": 7.426914394730756, "learning_rate": 6.06711954321764e-06, "loss": 17.863, "step": 24542 }, { "epoch": 0.4486263183870437, "grad_norm": 6.464402359165225, "learning_rate": 6.066830350007435e-06, "loss": 17.5623, "step": 24543 }, { "epoch": 0.44864459758349023, "grad_norm": 6.824830301997114, "learning_rate": 6.066541153058081e-06, "loss": 17.7391, "step": 24544 }, { "epoch": 0.44866287677993677, "grad_norm": 7.5960022816052595, "learning_rate": 6.066251952370594e-06, "loss": 18.0109, "step": 24545 }, { "epoch": 0.4486811559763833, "grad_norm": 5.759280762000812, "learning_rate": 6.0659627479459856e-06, "loss": 17.2945, "step": 24546 }, { "epoch": 0.4486994351728298, "grad_norm": 6.07739225476777, "learning_rate": 6.065673539785271e-06, "loss": 17.3572, "step": 24547 }, { "epoch": 0.4487177143692763, "grad_norm": 7.522781197636879, "learning_rate": 6.065384327889462e-06, "loss": 17.8289, "step": 24548 }, { "epoch": 0.44873599356572286, "grad_norm": 5.416111713084062, "learning_rate": 6.065095112259575e-06, "loss": 17.0497, "step": 24549 }, { "epoch": 0.4487542727621694, "grad_norm": 6.2261954311888505, "learning_rate": 6.064805892896621e-06, "loss": 17.4546, "step": 24550 }, { "epoch": 0.4487725519586159, "grad_norm": 6.698559746903378, "learning_rate": 6.0645166698016145e-06, "loss": 17.4948, "step": 24551 }, { "epoch": 0.4487908311550624, "grad_norm": 5.989076779748443, "learning_rate": 6.06422744297557e-06, "loss": 17.2271, "step": 24552 }, { "epoch": 0.44880911035150894, "grad_norm": 6.786639717361195, "learning_rate": 6.063938212419501e-06, "loss": 17.6503, "step": 24553 }, { "epoch": 0.4488273895479555, "grad_norm": 7.987285151832187, "learning_rate": 6.06364897813442e-06, "loss": 17.8774, "step": 24554 }, { "epoch": 0.448845668744402, "grad_norm": 7.534178762851669, "learning_rate": 6.063359740121342e-06, "loss": 18.2498, "step": 24555 }, { "epoch": 0.4488639479408485, "grad_norm": 7.818544921555647, "learning_rate": 6.063070498381281e-06, "loss": 18.225, "step": 24556 }, { "epoch": 0.44888222713729503, "grad_norm": 6.150402645822434, "learning_rate": 6.0627812529152496e-06, "loss": 17.2284, "step": 24557 }, { "epoch": 0.44890050633374157, "grad_norm": 6.452872580875381, "learning_rate": 6.062492003724262e-06, "loss": 17.5387, "step": 24558 }, { "epoch": 0.4489187855301881, "grad_norm": 5.609052918892873, "learning_rate": 6.0622027508093325e-06, "loss": 17.2485, "step": 24559 }, { "epoch": 0.44893706472663464, "grad_norm": 6.278336245370838, "learning_rate": 6.061913494171474e-06, "loss": 17.2655, "step": 24560 }, { "epoch": 0.4489553439230811, "grad_norm": 8.9152948236935, "learning_rate": 6.0616242338117005e-06, "loss": 18.1979, "step": 24561 }, { "epoch": 0.44897362311952765, "grad_norm": 6.088876698717248, "learning_rate": 6.0613349697310275e-06, "loss": 17.317, "step": 24562 }, { "epoch": 0.4489919023159742, "grad_norm": 5.977949774786967, "learning_rate": 6.061045701930468e-06, "loss": 17.5265, "step": 24563 }, { "epoch": 0.4490101815124207, "grad_norm": 6.291955535729224, "learning_rate": 6.060756430411033e-06, "loss": 17.6719, "step": 24564 }, { "epoch": 0.44902846070886726, "grad_norm": 5.83042788133474, "learning_rate": 6.060467155173739e-06, "loss": 17.2278, "step": 24565 }, { "epoch": 0.44904673990531374, "grad_norm": 9.914272324998482, "learning_rate": 6.0601778762196016e-06, "loss": 17.3401, "step": 24566 }, { "epoch": 0.4490650191017603, "grad_norm": 5.88376955223234, "learning_rate": 6.059888593549632e-06, "loss": 17.3663, "step": 24567 }, { "epoch": 0.4490832982982068, "grad_norm": 6.75817276328447, "learning_rate": 6.059599307164845e-06, "loss": 17.7442, "step": 24568 }, { "epoch": 0.44910157749465335, "grad_norm": 5.464164571011008, "learning_rate": 6.059310017066254e-06, "loss": 17.2119, "step": 24569 }, { "epoch": 0.4491198566910999, "grad_norm": 5.066354716612397, "learning_rate": 6.059020723254874e-06, "loss": 17.247, "step": 24570 }, { "epoch": 0.44913813588754636, "grad_norm": 6.02637680820274, "learning_rate": 6.058731425731716e-06, "loss": 17.4971, "step": 24571 }, { "epoch": 0.4491564150839929, "grad_norm": 6.5116680623856835, "learning_rate": 6.058442124497799e-06, "loss": 17.6569, "step": 24572 }, { "epoch": 0.44917469428043943, "grad_norm": 7.547813129000829, "learning_rate": 6.058152819554134e-06, "loss": 17.994, "step": 24573 }, { "epoch": 0.44919297347688597, "grad_norm": 5.78438643898109, "learning_rate": 6.057863510901733e-06, "loss": 17.3803, "step": 24574 }, { "epoch": 0.4492112526733325, "grad_norm": 6.207095392725289, "learning_rate": 6.057574198541614e-06, "loss": 17.4389, "step": 24575 }, { "epoch": 0.449229531869779, "grad_norm": 7.965840733566035, "learning_rate": 6.057284882474788e-06, "loss": 18.28, "step": 24576 }, { "epoch": 0.4492478110662255, "grad_norm": 5.2206629645349265, "learning_rate": 6.056995562702271e-06, "loss": 17.176, "step": 24577 }, { "epoch": 0.44926609026267206, "grad_norm": 6.88168788536299, "learning_rate": 6.056706239225076e-06, "loss": 18.1214, "step": 24578 }, { "epoch": 0.4492843694591186, "grad_norm": 6.376165550817576, "learning_rate": 6.056416912044217e-06, "loss": 17.6409, "step": 24579 }, { "epoch": 0.44930264865556513, "grad_norm": 7.4322640736029, "learning_rate": 6.0561275811607104e-06, "loss": 17.9083, "step": 24580 }, { "epoch": 0.4493209278520116, "grad_norm": 6.9442375255901405, "learning_rate": 6.055838246575566e-06, "loss": 17.6621, "step": 24581 }, { "epoch": 0.44933920704845814, "grad_norm": 5.6862610391781265, "learning_rate": 6.055548908289801e-06, "loss": 17.0563, "step": 24582 }, { "epoch": 0.4493574862449047, "grad_norm": 6.955649417451342, "learning_rate": 6.055259566304429e-06, "loss": 17.5888, "step": 24583 }, { "epoch": 0.4493757654413512, "grad_norm": 6.470692818883817, "learning_rate": 6.054970220620463e-06, "loss": 17.3649, "step": 24584 }, { "epoch": 0.4493940446377977, "grad_norm": 9.814720207131511, "learning_rate": 6.054680871238918e-06, "loss": 18.5805, "step": 24585 }, { "epoch": 0.44941232383424423, "grad_norm": 6.234637690888976, "learning_rate": 6.054391518160808e-06, "loss": 17.7208, "step": 24586 }, { "epoch": 0.44943060303069077, "grad_norm": 5.97628720273616, "learning_rate": 6.054102161387147e-06, "loss": 17.4372, "step": 24587 }, { "epoch": 0.4494488822271373, "grad_norm": 7.366581984919246, "learning_rate": 6.053812800918951e-06, "loss": 17.609, "step": 24588 }, { "epoch": 0.44946716142358384, "grad_norm": 6.589630624354999, "learning_rate": 6.053523436757232e-06, "loss": 17.6204, "step": 24589 }, { "epoch": 0.4494854406200303, "grad_norm": 6.370969219120875, "learning_rate": 6.053234068903004e-06, "loss": 17.3953, "step": 24590 }, { "epoch": 0.44950371981647685, "grad_norm": 5.747775020938252, "learning_rate": 6.052944697357283e-06, "loss": 17.2095, "step": 24591 }, { "epoch": 0.4495219990129234, "grad_norm": 5.984339777999843, "learning_rate": 6.052655322121081e-06, "loss": 17.479, "step": 24592 }, { "epoch": 0.4495402782093699, "grad_norm": 6.84481552137433, "learning_rate": 6.052365943195413e-06, "loss": 17.8045, "step": 24593 }, { "epoch": 0.44955855740581646, "grad_norm": 7.8522367904267405, "learning_rate": 6.0520765605812956e-06, "loss": 18.0193, "step": 24594 }, { "epoch": 0.44957683660226294, "grad_norm": 5.309549928513325, "learning_rate": 6.051787174279741e-06, "loss": 17.0146, "step": 24595 }, { "epoch": 0.4495951157987095, "grad_norm": 5.985642368716371, "learning_rate": 6.051497784291762e-06, "loss": 17.5111, "step": 24596 }, { "epoch": 0.449613394995156, "grad_norm": 6.398311933102849, "learning_rate": 6.051208390618375e-06, "loss": 17.4885, "step": 24597 }, { "epoch": 0.44963167419160255, "grad_norm": 6.720363386418574, "learning_rate": 6.050918993260595e-06, "loss": 17.4864, "step": 24598 }, { "epoch": 0.4496499533880491, "grad_norm": 6.057645664109545, "learning_rate": 6.050629592219434e-06, "loss": 17.5053, "step": 24599 }, { "epoch": 0.44966823258449556, "grad_norm": 6.683413351050948, "learning_rate": 6.050340187495908e-06, "loss": 17.4866, "step": 24600 }, { "epoch": 0.4496865117809421, "grad_norm": 6.5973144254902, "learning_rate": 6.05005077909103e-06, "loss": 17.6612, "step": 24601 }, { "epoch": 0.44970479097738864, "grad_norm": 5.997397751800434, "learning_rate": 6.049761367005815e-06, "loss": 17.5349, "step": 24602 }, { "epoch": 0.44972307017383517, "grad_norm": 6.240040430748273, "learning_rate": 6.049471951241279e-06, "loss": 17.4376, "step": 24603 }, { "epoch": 0.4497413493702817, "grad_norm": 5.208798792321488, "learning_rate": 6.049182531798434e-06, "loss": 17.0191, "step": 24604 }, { "epoch": 0.4497596285667282, "grad_norm": 5.919284012950762, "learning_rate": 6.048893108678295e-06, "loss": 17.2049, "step": 24605 }, { "epoch": 0.4497779077631747, "grad_norm": 6.7964267950890385, "learning_rate": 6.0486036818818775e-06, "loss": 17.5385, "step": 24606 }, { "epoch": 0.44979618695962126, "grad_norm": 7.811179831176289, "learning_rate": 6.048314251410193e-06, "loss": 17.5783, "step": 24607 }, { "epoch": 0.4498144661560678, "grad_norm": 6.408143347933946, "learning_rate": 6.048024817264261e-06, "loss": 17.7159, "step": 24608 }, { "epoch": 0.44983274535251433, "grad_norm": 7.157801644792308, "learning_rate": 6.047735379445092e-06, "loss": 17.6406, "step": 24609 }, { "epoch": 0.4498510245489608, "grad_norm": 6.714112451459579, "learning_rate": 6.047445937953701e-06, "loss": 17.6858, "step": 24610 }, { "epoch": 0.44986930374540735, "grad_norm": 5.638619957228446, "learning_rate": 6.047156492791102e-06, "loss": 17.0898, "step": 24611 }, { "epoch": 0.4498875829418539, "grad_norm": 6.660373547952849, "learning_rate": 6.046867043958311e-06, "loss": 18.0357, "step": 24612 }, { "epoch": 0.4499058621383004, "grad_norm": 5.914512935480703, "learning_rate": 6.046577591456343e-06, "loss": 17.0058, "step": 24613 }, { "epoch": 0.44992414133474695, "grad_norm": 6.288417485868926, "learning_rate": 6.0462881352862115e-06, "loss": 17.5398, "step": 24614 }, { "epoch": 0.44994242053119343, "grad_norm": 5.8650711695290525, "learning_rate": 6.045998675448927e-06, "loss": 17.2077, "step": 24615 }, { "epoch": 0.44996069972763997, "grad_norm": 5.990436894644584, "learning_rate": 6.045709211945512e-06, "loss": 17.223, "step": 24616 }, { "epoch": 0.4499789789240865, "grad_norm": 7.495889648121055, "learning_rate": 6.045419744776976e-06, "loss": 18.0186, "step": 24617 }, { "epoch": 0.44999725812053304, "grad_norm": 8.213885327731585, "learning_rate": 6.045130273944334e-06, "loss": 18.2507, "step": 24618 }, { "epoch": 0.4500155373169795, "grad_norm": 6.141209230567598, "learning_rate": 6.044840799448602e-06, "loss": 17.2438, "step": 24619 }, { "epoch": 0.45003381651342605, "grad_norm": 5.854929778397421, "learning_rate": 6.044551321290791e-06, "loss": 17.3367, "step": 24620 }, { "epoch": 0.4500520957098726, "grad_norm": 8.045059826317729, "learning_rate": 6.044261839471921e-06, "loss": 17.5542, "step": 24621 }, { "epoch": 0.4500703749063191, "grad_norm": 5.220343005622515, "learning_rate": 6.043972353993004e-06, "loss": 17.014, "step": 24622 }, { "epoch": 0.45008865410276566, "grad_norm": 5.407247716345242, "learning_rate": 6.043682864855053e-06, "loss": 17.0192, "step": 24623 }, { "epoch": 0.45010693329921214, "grad_norm": 6.97528893326854, "learning_rate": 6.0433933720590845e-06, "loss": 17.7547, "step": 24624 }, { "epoch": 0.4501252124956587, "grad_norm": 6.01521567472642, "learning_rate": 6.0431038756061135e-06, "loss": 17.1485, "step": 24625 }, { "epoch": 0.4501434916921052, "grad_norm": 5.651430175005524, "learning_rate": 6.0428143754971526e-06, "loss": 17.0379, "step": 24626 }, { "epoch": 0.45016177088855175, "grad_norm": 6.778124048190056, "learning_rate": 6.042524871733218e-06, "loss": 17.4037, "step": 24627 }, { "epoch": 0.4501800500849983, "grad_norm": 5.674727814042923, "learning_rate": 6.042235364315325e-06, "loss": 17.1581, "step": 24628 }, { "epoch": 0.45019832928144476, "grad_norm": 6.423002753043599, "learning_rate": 6.0419458532444875e-06, "loss": 17.5022, "step": 24629 }, { "epoch": 0.4502166084778913, "grad_norm": 7.302713204386245, "learning_rate": 6.04165633852172e-06, "loss": 17.8249, "step": 24630 }, { "epoch": 0.45023488767433784, "grad_norm": 7.458820722608766, "learning_rate": 6.041366820148037e-06, "loss": 18.0166, "step": 24631 }, { "epoch": 0.45025316687078437, "grad_norm": 6.194179028173427, "learning_rate": 6.0410772981244555e-06, "loss": 17.6362, "step": 24632 }, { "epoch": 0.4502714460672309, "grad_norm": 7.4911200748010245, "learning_rate": 6.040787772451986e-06, "loss": 17.8867, "step": 24633 }, { "epoch": 0.4502897252636774, "grad_norm": 6.057510729245927, "learning_rate": 6.040498243131646e-06, "loss": 17.2536, "step": 24634 }, { "epoch": 0.4503080044601239, "grad_norm": 8.547528988042247, "learning_rate": 6.040208710164451e-06, "loss": 17.9104, "step": 24635 }, { "epoch": 0.45032628365657046, "grad_norm": 6.650799585296418, "learning_rate": 6.0399191735514154e-06, "loss": 17.4979, "step": 24636 }, { "epoch": 0.450344562853017, "grad_norm": 5.931705650138957, "learning_rate": 6.039629633293552e-06, "loss": 17.0645, "step": 24637 }, { "epoch": 0.45036284204946353, "grad_norm": 6.823833292013225, "learning_rate": 6.039340089391876e-06, "loss": 17.4026, "step": 24638 }, { "epoch": 0.45038112124591, "grad_norm": 6.347875714287686, "learning_rate": 6.039050541847405e-06, "loss": 17.552, "step": 24639 }, { "epoch": 0.45039940044235655, "grad_norm": 6.138020966787515, "learning_rate": 6.038760990661151e-06, "loss": 17.2043, "step": 24640 }, { "epoch": 0.4504176796388031, "grad_norm": 6.06862788956139, "learning_rate": 6.03847143583413e-06, "loss": 17.286, "step": 24641 }, { "epoch": 0.4504359588352496, "grad_norm": 6.661539874719218, "learning_rate": 6.038181877367358e-06, "loss": 17.328, "step": 24642 }, { "epoch": 0.45045423803169615, "grad_norm": 6.225700155227202, "learning_rate": 6.037892315261847e-06, "loss": 17.5815, "step": 24643 }, { "epoch": 0.45047251722814263, "grad_norm": 6.21853811677487, "learning_rate": 6.037602749518614e-06, "loss": 17.4411, "step": 24644 }, { "epoch": 0.45049079642458917, "grad_norm": 6.902656552265081, "learning_rate": 6.0373131801386734e-06, "loss": 17.6042, "step": 24645 }, { "epoch": 0.4505090756210357, "grad_norm": 7.846094296385334, "learning_rate": 6.0370236071230414e-06, "loss": 17.962, "step": 24646 }, { "epoch": 0.45052735481748224, "grad_norm": 5.717781758447652, "learning_rate": 6.036734030472729e-06, "loss": 17.1185, "step": 24647 }, { "epoch": 0.4505456340139288, "grad_norm": 4.763707573529443, "learning_rate": 6.036444450188755e-06, "loss": 16.8422, "step": 24648 }, { "epoch": 0.45056391321037526, "grad_norm": 6.732233461299591, "learning_rate": 6.036154866272135e-06, "loss": 17.6115, "step": 24649 }, { "epoch": 0.4505821924068218, "grad_norm": 5.251545990872237, "learning_rate": 6.03586527872388e-06, "loss": 17.0011, "step": 24650 }, { "epoch": 0.4506004716032683, "grad_norm": 6.707542053326857, "learning_rate": 6.035575687545008e-06, "loss": 17.874, "step": 24651 }, { "epoch": 0.45061875079971486, "grad_norm": 5.854115273020799, "learning_rate": 6.035286092736532e-06, "loss": 17.3612, "step": 24652 }, { "epoch": 0.45063702999616134, "grad_norm": 6.948975364826289, "learning_rate": 6.0349964942994685e-06, "loss": 17.805, "step": 24653 }, { "epoch": 0.4506553091926079, "grad_norm": 6.999709866000698, "learning_rate": 6.034706892234833e-06, "loss": 17.9041, "step": 24654 }, { "epoch": 0.4506735883890544, "grad_norm": 7.182306356613054, "learning_rate": 6.034417286543639e-06, "loss": 17.9241, "step": 24655 }, { "epoch": 0.45069186758550095, "grad_norm": 6.594254229495999, "learning_rate": 6.034127677226902e-06, "loss": 17.443, "step": 24656 }, { "epoch": 0.4507101467819475, "grad_norm": 5.478485975266605, "learning_rate": 6.033838064285638e-06, "loss": 17.2909, "step": 24657 }, { "epoch": 0.45072842597839397, "grad_norm": 6.41011908912437, "learning_rate": 6.03354844772086e-06, "loss": 17.5707, "step": 24658 }, { "epoch": 0.4507467051748405, "grad_norm": 8.414537097297394, "learning_rate": 6.033258827533586e-06, "loss": 18.035, "step": 24659 }, { "epoch": 0.45076498437128704, "grad_norm": 7.001185991744438, "learning_rate": 6.032969203724828e-06, "loss": 17.7875, "step": 24660 }, { "epoch": 0.45078326356773357, "grad_norm": 6.089805242115694, "learning_rate": 6.032679576295603e-06, "loss": 17.1514, "step": 24661 }, { "epoch": 0.4508015427641801, "grad_norm": 5.927690599311975, "learning_rate": 6.032389945246925e-06, "loss": 17.3021, "step": 24662 }, { "epoch": 0.4508198219606266, "grad_norm": 6.095567075875497, "learning_rate": 6.032100310579812e-06, "loss": 17.4224, "step": 24663 }, { "epoch": 0.4508381011570731, "grad_norm": 4.9729292296281935, "learning_rate": 6.031810672295275e-06, "loss": 16.9422, "step": 24664 }, { "epoch": 0.45085638035351966, "grad_norm": 7.196499713694455, "learning_rate": 6.03152103039433e-06, "loss": 18.2755, "step": 24665 }, { "epoch": 0.4508746595499662, "grad_norm": 6.45490234704856, "learning_rate": 6.0312313848779965e-06, "loss": 17.451, "step": 24666 }, { "epoch": 0.45089293874641273, "grad_norm": 6.236169856996404, "learning_rate": 6.030941735747285e-06, "loss": 17.2154, "step": 24667 }, { "epoch": 0.4509112179428592, "grad_norm": 6.194245727873289, "learning_rate": 6.0306520830032124e-06, "loss": 17.4885, "step": 24668 }, { "epoch": 0.45092949713930575, "grad_norm": 6.4940184288310565, "learning_rate": 6.030362426646793e-06, "loss": 17.4401, "step": 24669 }, { "epoch": 0.4509477763357523, "grad_norm": 6.039570888625534, "learning_rate": 6.030072766679044e-06, "loss": 17.329, "step": 24670 }, { "epoch": 0.4509660555321988, "grad_norm": 7.073103273252511, "learning_rate": 6.029783103100978e-06, "loss": 17.642, "step": 24671 }, { "epoch": 0.45098433472864535, "grad_norm": 8.236292636508232, "learning_rate": 6.029493435913611e-06, "loss": 18.2369, "step": 24672 }, { "epoch": 0.45100261392509183, "grad_norm": 7.841646819805741, "learning_rate": 6.029203765117961e-06, "loss": 17.8693, "step": 24673 }, { "epoch": 0.45102089312153837, "grad_norm": 5.856147196076327, "learning_rate": 6.02891409071504e-06, "loss": 17.5565, "step": 24674 }, { "epoch": 0.4510391723179849, "grad_norm": 7.68190603764835, "learning_rate": 6.028624412705863e-06, "loss": 17.8626, "step": 24675 }, { "epoch": 0.45105745151443144, "grad_norm": 5.343253256839253, "learning_rate": 6.0283347310914485e-06, "loss": 17.0762, "step": 24676 }, { "epoch": 0.451075730710878, "grad_norm": 5.886742429105178, "learning_rate": 6.028045045872811e-06, "loss": 17.1906, "step": 24677 }, { "epoch": 0.45109400990732446, "grad_norm": 7.367978120929744, "learning_rate": 6.027755357050964e-06, "loss": 17.5718, "step": 24678 }, { "epoch": 0.451112289103771, "grad_norm": 7.035319286772534, "learning_rate": 6.0274656646269215e-06, "loss": 17.4689, "step": 24679 }, { "epoch": 0.4511305683002175, "grad_norm": 5.245110714669578, "learning_rate": 6.027175968601704e-06, "loss": 16.8598, "step": 24680 }, { "epoch": 0.45114884749666406, "grad_norm": 7.052759396033848, "learning_rate": 6.026886268976322e-06, "loss": 17.6692, "step": 24681 }, { "epoch": 0.4511671266931106, "grad_norm": 6.680488552848285, "learning_rate": 6.026596565751794e-06, "loss": 17.6147, "step": 24682 }, { "epoch": 0.4511854058895571, "grad_norm": 8.496915746510703, "learning_rate": 6.026306858929133e-06, "loss": 18.4555, "step": 24683 }, { "epoch": 0.4512036850860036, "grad_norm": 6.630456798807599, "learning_rate": 6.026017148509355e-06, "loss": 17.6023, "step": 24684 }, { "epoch": 0.45122196428245015, "grad_norm": 8.436450617612355, "learning_rate": 6.025727434493477e-06, "loss": 17.9402, "step": 24685 }, { "epoch": 0.4512402434788967, "grad_norm": 6.927448939750945, "learning_rate": 6.025437716882513e-06, "loss": 17.8359, "step": 24686 }, { "epoch": 0.45125852267534317, "grad_norm": 6.830529479138461, "learning_rate": 6.02514799567748e-06, "loss": 17.8449, "step": 24687 }, { "epoch": 0.4512768018717897, "grad_norm": 4.461113871469783, "learning_rate": 6.02485827087939e-06, "loss": 16.8073, "step": 24688 }, { "epoch": 0.45129508106823624, "grad_norm": 6.770820637432052, "learning_rate": 6.024568542489262e-06, "loss": 17.6159, "step": 24689 }, { "epoch": 0.4513133602646828, "grad_norm": 6.644486299362443, "learning_rate": 6.0242788105081106e-06, "loss": 17.5468, "step": 24690 }, { "epoch": 0.4513316394611293, "grad_norm": 7.095053551366182, "learning_rate": 6.023989074936951e-06, "loss": 17.7514, "step": 24691 }, { "epoch": 0.4513499186575758, "grad_norm": 5.998084138138065, "learning_rate": 6.0236993357767955e-06, "loss": 17.3063, "step": 24692 }, { "epoch": 0.4513681978540223, "grad_norm": 7.585781276479587, "learning_rate": 6.023409593028666e-06, "loss": 18.1675, "step": 24693 }, { "epoch": 0.45138647705046886, "grad_norm": 6.2665781415529365, "learning_rate": 6.0231198466935745e-06, "loss": 17.5643, "step": 24694 }, { "epoch": 0.4514047562469154, "grad_norm": 7.750558703464514, "learning_rate": 6.0228300967725365e-06, "loss": 17.747, "step": 24695 }, { "epoch": 0.45142303544336193, "grad_norm": 7.602132480396472, "learning_rate": 6.022540343266566e-06, "loss": 17.7959, "step": 24696 }, { "epoch": 0.4514413146398084, "grad_norm": 7.7349738302042566, "learning_rate": 6.022250586176683e-06, "loss": 17.7687, "step": 24697 }, { "epoch": 0.45145959383625495, "grad_norm": 6.355945855864465, "learning_rate": 6.021960825503897e-06, "loss": 17.4123, "step": 24698 }, { "epoch": 0.4514778730327015, "grad_norm": 6.6578356518514, "learning_rate": 6.021671061249229e-06, "loss": 17.4073, "step": 24699 }, { "epoch": 0.451496152229148, "grad_norm": 5.713505249244532, "learning_rate": 6.021381293413693e-06, "loss": 17.1641, "step": 24700 }, { "epoch": 0.45151443142559455, "grad_norm": 5.624758350692372, "learning_rate": 6.021091521998304e-06, "loss": 17.1406, "step": 24701 }, { "epoch": 0.45153271062204103, "grad_norm": 5.948590414234612, "learning_rate": 6.020801747004077e-06, "loss": 17.2232, "step": 24702 }, { "epoch": 0.45155098981848757, "grad_norm": 6.155157389616313, "learning_rate": 6.020511968432029e-06, "loss": 17.4346, "step": 24703 }, { "epoch": 0.4515692690149341, "grad_norm": 5.474975628502764, "learning_rate": 6.020222186283175e-06, "loss": 17.1878, "step": 24704 }, { "epoch": 0.45158754821138064, "grad_norm": 5.084933908634606, "learning_rate": 6.019932400558531e-06, "loss": 17.007, "step": 24705 }, { "epoch": 0.4516058274078272, "grad_norm": 5.23240276581489, "learning_rate": 6.019642611259111e-06, "loss": 17.048, "step": 24706 }, { "epoch": 0.45162410660427366, "grad_norm": 6.451885620662344, "learning_rate": 6.019352818385934e-06, "loss": 17.3354, "step": 24707 }, { "epoch": 0.4516423858007202, "grad_norm": 8.407424004543389, "learning_rate": 6.019063021940014e-06, "loss": 17.9548, "step": 24708 }, { "epoch": 0.4516606649971667, "grad_norm": 5.788778329765934, "learning_rate": 6.018773221922366e-06, "loss": 17.2667, "step": 24709 }, { "epoch": 0.45167894419361326, "grad_norm": 6.064469964372707, "learning_rate": 6.018483418334006e-06, "loss": 17.3951, "step": 24710 }, { "epoch": 0.4516972233900598, "grad_norm": 8.66837895395117, "learning_rate": 6.01819361117595e-06, "loss": 18.1703, "step": 24711 }, { "epoch": 0.4517155025865063, "grad_norm": 6.830167778453176, "learning_rate": 6.0179038004492144e-06, "loss": 17.4709, "step": 24712 }, { "epoch": 0.4517337817829528, "grad_norm": 6.965211136666187, "learning_rate": 6.017613986154813e-06, "loss": 17.6896, "step": 24713 }, { "epoch": 0.45175206097939935, "grad_norm": 6.262331316183803, "learning_rate": 6.017324168293763e-06, "loss": 17.2691, "step": 24714 }, { "epoch": 0.4517703401758459, "grad_norm": 8.443516784987569, "learning_rate": 6.017034346867081e-06, "loss": 17.7252, "step": 24715 }, { "epoch": 0.4517886193722924, "grad_norm": 6.064474784741955, "learning_rate": 6.016744521875782e-06, "loss": 17.6041, "step": 24716 }, { "epoch": 0.4518068985687389, "grad_norm": 7.668158594365066, "learning_rate": 6.01645469332088e-06, "loss": 18.2217, "step": 24717 }, { "epoch": 0.45182517776518544, "grad_norm": 6.322105330152482, "learning_rate": 6.016164861203395e-06, "loss": 17.5224, "step": 24718 }, { "epoch": 0.451843456961632, "grad_norm": 7.438910802051069, "learning_rate": 6.015875025524338e-06, "loss": 18.0336, "step": 24719 }, { "epoch": 0.4518617361580785, "grad_norm": 6.704452739777389, "learning_rate": 6.015585186284728e-06, "loss": 17.4795, "step": 24720 }, { "epoch": 0.451880015354525, "grad_norm": 6.718555993230533, "learning_rate": 6.015295343485581e-06, "loss": 17.8028, "step": 24721 }, { "epoch": 0.4518982945509715, "grad_norm": 5.925313793829689, "learning_rate": 6.015005497127911e-06, "loss": 17.2797, "step": 24722 }, { "epoch": 0.45191657374741806, "grad_norm": 6.746912088623491, "learning_rate": 6.014715647212736e-06, "loss": 17.4472, "step": 24723 }, { "epoch": 0.4519348529438646, "grad_norm": 6.928342340989377, "learning_rate": 6.014425793741068e-06, "loss": 17.8301, "step": 24724 }, { "epoch": 0.45195313214031113, "grad_norm": 6.042923310933138, "learning_rate": 6.014135936713928e-06, "loss": 17.3002, "step": 24725 }, { "epoch": 0.4519714113367576, "grad_norm": 8.578425695372713, "learning_rate": 6.013846076132329e-06, "loss": 18.3965, "step": 24726 }, { "epoch": 0.45198969053320415, "grad_norm": 6.111551031387784, "learning_rate": 6.013556211997286e-06, "loss": 17.0901, "step": 24727 }, { "epoch": 0.4520079697296507, "grad_norm": 5.5082290783231045, "learning_rate": 6.01326634430982e-06, "loss": 17.04, "step": 24728 }, { "epoch": 0.4520262489260972, "grad_norm": 4.90541389879834, "learning_rate": 6.01297647307094e-06, "loss": 17.063, "step": 24729 }, { "epoch": 0.45204452812254375, "grad_norm": 5.598993418730989, "learning_rate": 6.012686598281666e-06, "loss": 17.3291, "step": 24730 }, { "epoch": 0.45206280731899023, "grad_norm": 7.057060538631161, "learning_rate": 6.012396719943014e-06, "loss": 17.4468, "step": 24731 }, { "epoch": 0.45208108651543677, "grad_norm": 6.644628032127668, "learning_rate": 6.012106838056001e-06, "loss": 17.9096, "step": 24732 }, { "epoch": 0.4520993657118833, "grad_norm": 6.235272183127271, "learning_rate": 6.011816952621639e-06, "loss": 17.4477, "step": 24733 }, { "epoch": 0.45211764490832984, "grad_norm": 7.395452813459593, "learning_rate": 6.011527063640946e-06, "loss": 18.0175, "step": 24734 }, { "epoch": 0.4521359241047764, "grad_norm": 6.41941944832454, "learning_rate": 6.011237171114941e-06, "loss": 17.355, "step": 24735 }, { "epoch": 0.45215420330122286, "grad_norm": 6.074373312847202, "learning_rate": 6.010947275044635e-06, "loss": 17.3462, "step": 24736 }, { "epoch": 0.4521724824976694, "grad_norm": 7.89500808482335, "learning_rate": 6.010657375431047e-06, "loss": 18.4106, "step": 24737 }, { "epoch": 0.45219076169411593, "grad_norm": 7.327090196334108, "learning_rate": 6.010367472275192e-06, "loss": 17.7476, "step": 24738 }, { "epoch": 0.45220904089056246, "grad_norm": 5.418767125407057, "learning_rate": 6.010077565578088e-06, "loss": 17.0018, "step": 24739 }, { "epoch": 0.452227320087009, "grad_norm": 5.740834123665435, "learning_rate": 6.009787655340751e-06, "loss": 17.1866, "step": 24740 }, { "epoch": 0.4522455992834555, "grad_norm": 5.682718616324278, "learning_rate": 6.009497741564194e-06, "loss": 17.313, "step": 24741 }, { "epoch": 0.452263878479902, "grad_norm": 5.373888396178508, "learning_rate": 6.009207824249435e-06, "loss": 17.0231, "step": 24742 }, { "epoch": 0.45228215767634855, "grad_norm": 6.381700126000964, "learning_rate": 6.008917903397491e-06, "loss": 17.6324, "step": 24743 }, { "epoch": 0.4523004368727951, "grad_norm": 5.812652938728273, "learning_rate": 6.008627979009376e-06, "loss": 17.3316, "step": 24744 }, { "epoch": 0.4523187160692416, "grad_norm": 8.068093935386853, "learning_rate": 6.008338051086109e-06, "loss": 18.2421, "step": 24745 }, { "epoch": 0.4523369952656881, "grad_norm": 6.458507344262246, "learning_rate": 6.008048119628705e-06, "loss": 17.3915, "step": 24746 }, { "epoch": 0.45235527446213464, "grad_norm": 6.150373670861808, "learning_rate": 6.007758184638177e-06, "loss": 17.2885, "step": 24747 }, { "epoch": 0.4523735536585812, "grad_norm": 5.755699726914589, "learning_rate": 6.007468246115545e-06, "loss": 17.2085, "step": 24748 }, { "epoch": 0.4523918328550277, "grad_norm": 7.056788174499082, "learning_rate": 6.007178304061827e-06, "loss": 17.987, "step": 24749 }, { "epoch": 0.45241011205147424, "grad_norm": 6.359862268820703, "learning_rate": 6.0068883584780336e-06, "loss": 17.4381, "step": 24750 }, { "epoch": 0.4524283912479207, "grad_norm": 6.187298269703934, "learning_rate": 6.006598409365185e-06, "loss": 17.451, "step": 24751 }, { "epoch": 0.45244667044436726, "grad_norm": 5.255808732439555, "learning_rate": 6.006308456724296e-06, "loss": 17.0127, "step": 24752 }, { "epoch": 0.4524649496408138, "grad_norm": 5.954886878936499, "learning_rate": 6.006018500556383e-06, "loss": 17.5082, "step": 24753 }, { "epoch": 0.45248322883726033, "grad_norm": 7.1927315341034905, "learning_rate": 6.005728540862462e-06, "loss": 17.5332, "step": 24754 }, { "epoch": 0.4525015080337068, "grad_norm": 7.240682473726163, "learning_rate": 6.005438577643551e-06, "loss": 17.4798, "step": 24755 }, { "epoch": 0.45251978723015335, "grad_norm": 7.584413677100589, "learning_rate": 6.005148610900664e-06, "loss": 18.0481, "step": 24756 }, { "epoch": 0.4525380664265999, "grad_norm": 5.44395975670243, "learning_rate": 6.004858640634819e-06, "loss": 16.9502, "step": 24757 }, { "epoch": 0.4525563456230464, "grad_norm": 6.467104458156888, "learning_rate": 6.00456866684703e-06, "loss": 17.4797, "step": 24758 }, { "epoch": 0.45257462481949295, "grad_norm": 6.479292435086554, "learning_rate": 6.004278689538319e-06, "loss": 17.5469, "step": 24759 }, { "epoch": 0.45259290401593943, "grad_norm": 6.200094013034941, "learning_rate": 6.003988708709694e-06, "loss": 17.6831, "step": 24760 }, { "epoch": 0.45261118321238597, "grad_norm": 6.3042271961249305, "learning_rate": 6.003698724362177e-06, "loss": 17.4735, "step": 24761 }, { "epoch": 0.4526294624088325, "grad_norm": 6.796098419122103, "learning_rate": 6.003408736496784e-06, "loss": 17.4488, "step": 24762 }, { "epoch": 0.45264774160527904, "grad_norm": 6.787563180152649, "learning_rate": 6.0031187451145314e-06, "loss": 17.9, "step": 24763 }, { "epoch": 0.4526660208017256, "grad_norm": 5.483334221140463, "learning_rate": 6.002828750216433e-06, "loss": 17.3169, "step": 24764 }, { "epoch": 0.45268429999817206, "grad_norm": 5.447648707127053, "learning_rate": 6.002538751803505e-06, "loss": 17.0144, "step": 24765 }, { "epoch": 0.4527025791946186, "grad_norm": 7.920318139622635, "learning_rate": 6.002248749876769e-06, "loss": 17.6761, "step": 24766 }, { "epoch": 0.45272085839106513, "grad_norm": 6.695960442495997, "learning_rate": 6.001958744437237e-06, "loss": 17.4054, "step": 24767 }, { "epoch": 0.45273913758751166, "grad_norm": 5.701863915732816, "learning_rate": 6.001668735485926e-06, "loss": 17.2778, "step": 24768 }, { "epoch": 0.4527574167839582, "grad_norm": 6.932504052318536, "learning_rate": 6.001378723023854e-06, "loss": 17.5723, "step": 24769 }, { "epoch": 0.4527756959804047, "grad_norm": 7.430116870954375, "learning_rate": 6.001088707052035e-06, "loss": 17.4729, "step": 24770 }, { "epoch": 0.4527939751768512, "grad_norm": 5.385734016428016, "learning_rate": 6.000798687571487e-06, "loss": 17.1351, "step": 24771 }, { "epoch": 0.45281225437329775, "grad_norm": 5.242606336630852, "learning_rate": 6.0005086645832276e-06, "loss": 16.9974, "step": 24772 }, { "epoch": 0.4528305335697443, "grad_norm": 6.5685110889576475, "learning_rate": 6.000218638088273e-06, "loss": 17.6213, "step": 24773 }, { "epoch": 0.4528488127661908, "grad_norm": 5.716236414320557, "learning_rate": 5.999928608087637e-06, "loss": 17.3367, "step": 24774 }, { "epoch": 0.4528670919626373, "grad_norm": 8.564552218972441, "learning_rate": 5.999638574582338e-06, "loss": 18.2329, "step": 24775 }, { "epoch": 0.45288537115908384, "grad_norm": 7.301717290044244, "learning_rate": 5.999348537573394e-06, "loss": 17.4011, "step": 24776 }, { "epoch": 0.4529036503555304, "grad_norm": 6.38934496240644, "learning_rate": 5.99905849706182e-06, "loss": 17.6334, "step": 24777 }, { "epoch": 0.4529219295519769, "grad_norm": 5.2947761883209905, "learning_rate": 5.998768453048632e-06, "loss": 17.122, "step": 24778 }, { "epoch": 0.45294020874842345, "grad_norm": 8.074965858889485, "learning_rate": 5.998478405534845e-06, "loss": 17.8879, "step": 24779 }, { "epoch": 0.4529584879448699, "grad_norm": 6.699026954328017, "learning_rate": 5.998188354521481e-06, "loss": 17.7634, "step": 24780 }, { "epoch": 0.45297676714131646, "grad_norm": 6.182307564133946, "learning_rate": 5.997898300009554e-06, "loss": 17.4829, "step": 24781 }, { "epoch": 0.452995046337763, "grad_norm": 6.368049414384359, "learning_rate": 5.997608242000078e-06, "loss": 17.3981, "step": 24782 }, { "epoch": 0.45301332553420953, "grad_norm": 6.191705584338558, "learning_rate": 5.997318180494071e-06, "loss": 17.2676, "step": 24783 }, { "epoch": 0.45303160473065607, "grad_norm": 5.93275458193394, "learning_rate": 5.997028115492552e-06, "loss": 17.4764, "step": 24784 }, { "epoch": 0.45304988392710255, "grad_norm": 4.5247068484507444, "learning_rate": 5.996738046996535e-06, "loss": 16.7954, "step": 24785 }, { "epoch": 0.4530681631235491, "grad_norm": 7.902685526078171, "learning_rate": 5.99644797500704e-06, "loss": 17.3129, "step": 24786 }, { "epoch": 0.4530864423199956, "grad_norm": 6.06325082023824, "learning_rate": 5.996157899525078e-06, "loss": 17.2844, "step": 24787 }, { "epoch": 0.45310472151644215, "grad_norm": 7.628112177345854, "learning_rate": 5.995867820551671e-06, "loss": 17.9469, "step": 24788 }, { "epoch": 0.45312300071288864, "grad_norm": 7.873043759885314, "learning_rate": 5.995577738087832e-06, "loss": 17.643, "step": 24789 }, { "epoch": 0.45314127990933517, "grad_norm": 6.797267503432025, "learning_rate": 5.995287652134583e-06, "loss": 17.6177, "step": 24790 }, { "epoch": 0.4531595591057817, "grad_norm": 6.406551669096651, "learning_rate": 5.994997562692934e-06, "loss": 17.3904, "step": 24791 }, { "epoch": 0.45317783830222824, "grad_norm": 5.9040284706172335, "learning_rate": 5.994707469763904e-06, "loss": 17.4223, "step": 24792 }, { "epoch": 0.4531961174986748, "grad_norm": 7.130237826587623, "learning_rate": 5.9944173733485125e-06, "loss": 17.6801, "step": 24793 }, { "epoch": 0.45321439669512126, "grad_norm": 6.8328317591233825, "learning_rate": 5.994127273447775e-06, "loss": 17.64, "step": 24794 }, { "epoch": 0.4532326758915678, "grad_norm": 8.590765023395658, "learning_rate": 5.993837170062708e-06, "loss": 18.3209, "step": 24795 }, { "epoch": 0.45325095508801433, "grad_norm": 6.8864481963997015, "learning_rate": 5.993547063194326e-06, "loss": 17.5819, "step": 24796 }, { "epoch": 0.45326923428446086, "grad_norm": 5.636764255944445, "learning_rate": 5.993256952843648e-06, "loss": 17.1017, "step": 24797 }, { "epoch": 0.4532875134809074, "grad_norm": 5.544907470650895, "learning_rate": 5.992966839011691e-06, "loss": 17.2787, "step": 24798 }, { "epoch": 0.4533057926773539, "grad_norm": 6.258601438049628, "learning_rate": 5.992676721699472e-06, "loss": 17.5772, "step": 24799 }, { "epoch": 0.4533240718738004, "grad_norm": 7.644932221665382, "learning_rate": 5.992386600908007e-06, "loss": 17.9644, "step": 24800 }, { "epoch": 0.45334235107024695, "grad_norm": 6.548807081424216, "learning_rate": 5.9920964766383114e-06, "loss": 17.5698, "step": 24801 }, { "epoch": 0.4533606302666935, "grad_norm": 6.928747559619665, "learning_rate": 5.991806348891406e-06, "loss": 17.5953, "step": 24802 }, { "epoch": 0.45337890946314, "grad_norm": 6.24419831583202, "learning_rate": 5.991516217668304e-06, "loss": 17.7199, "step": 24803 }, { "epoch": 0.4533971886595865, "grad_norm": 4.78474495283371, "learning_rate": 5.991226082970025e-06, "loss": 17.0338, "step": 24804 }, { "epoch": 0.45341546785603304, "grad_norm": 5.934566945253231, "learning_rate": 5.9909359447975845e-06, "loss": 17.2925, "step": 24805 }, { "epoch": 0.4534337470524796, "grad_norm": 6.538932399172217, "learning_rate": 5.990645803151998e-06, "loss": 17.7339, "step": 24806 }, { "epoch": 0.4534520262489261, "grad_norm": 7.162529326065977, "learning_rate": 5.990355658034285e-06, "loss": 17.5858, "step": 24807 }, { "epoch": 0.45347030544537265, "grad_norm": 7.854321443510832, "learning_rate": 5.990065509445462e-06, "loss": 18.0197, "step": 24808 }, { "epoch": 0.4534885846418191, "grad_norm": 8.937205810721213, "learning_rate": 5.989775357386544e-06, "loss": 18.1315, "step": 24809 }, { "epoch": 0.45350686383826566, "grad_norm": 5.685135286358327, "learning_rate": 5.989485201858549e-06, "loss": 17.2512, "step": 24810 }, { "epoch": 0.4535251430347122, "grad_norm": 6.0497262468242425, "learning_rate": 5.989195042862495e-06, "loss": 17.3527, "step": 24811 }, { "epoch": 0.45354342223115873, "grad_norm": 7.0295077846462934, "learning_rate": 5.988904880399398e-06, "loss": 17.764, "step": 24812 }, { "epoch": 0.45356170142760527, "grad_norm": 6.518605241306475, "learning_rate": 5.988614714470276e-06, "loss": 17.5902, "step": 24813 }, { "epoch": 0.45357998062405175, "grad_norm": 5.870952484450353, "learning_rate": 5.988324545076144e-06, "loss": 17.538, "step": 24814 }, { "epoch": 0.4535982598204983, "grad_norm": 5.238022253591621, "learning_rate": 5.988034372218021e-06, "loss": 17.2232, "step": 24815 }, { "epoch": 0.4536165390169448, "grad_norm": 6.168754075001521, "learning_rate": 5.987744195896923e-06, "loss": 17.3962, "step": 24816 }, { "epoch": 0.45363481821339136, "grad_norm": 6.673533910286118, "learning_rate": 5.987454016113867e-06, "loss": 17.8792, "step": 24817 }, { "epoch": 0.4536530974098379, "grad_norm": 7.751066565956767, "learning_rate": 5.9871638328698725e-06, "loss": 18.1194, "step": 24818 }, { "epoch": 0.45367137660628437, "grad_norm": 6.507359471953103, "learning_rate": 5.986873646165951e-06, "loss": 17.6494, "step": 24819 }, { "epoch": 0.4536896558027309, "grad_norm": 8.920332758130165, "learning_rate": 5.986583456003124e-06, "loss": 18.2551, "step": 24820 }, { "epoch": 0.45370793499917744, "grad_norm": 7.692489007374717, "learning_rate": 5.98629326238241e-06, "loss": 18.1621, "step": 24821 }, { "epoch": 0.453726214195624, "grad_norm": 6.3461456362686635, "learning_rate": 5.986003065304822e-06, "loss": 17.6561, "step": 24822 }, { "epoch": 0.45374449339207046, "grad_norm": 7.554931960332167, "learning_rate": 5.985712864771378e-06, "loss": 17.7931, "step": 24823 }, { "epoch": 0.453762772588517, "grad_norm": 5.62834503111923, "learning_rate": 5.9854226607830955e-06, "loss": 17.187, "step": 24824 }, { "epoch": 0.45378105178496353, "grad_norm": 7.30912711086053, "learning_rate": 5.985132453340995e-06, "loss": 17.793, "step": 24825 }, { "epoch": 0.45379933098141007, "grad_norm": 6.51726859739539, "learning_rate": 5.9848422424460895e-06, "loss": 17.8442, "step": 24826 }, { "epoch": 0.4538176101778566, "grad_norm": 7.22422985826368, "learning_rate": 5.984552028099396e-06, "loss": 17.9982, "step": 24827 }, { "epoch": 0.4538358893743031, "grad_norm": 7.219433123503709, "learning_rate": 5.984261810301935e-06, "loss": 17.3976, "step": 24828 }, { "epoch": 0.4538541685707496, "grad_norm": 6.654532646470851, "learning_rate": 5.98397158905472e-06, "loss": 17.8668, "step": 24829 }, { "epoch": 0.45387244776719615, "grad_norm": 6.292491359443311, "learning_rate": 5.983681364358771e-06, "loss": 17.4518, "step": 24830 }, { "epoch": 0.4538907269636427, "grad_norm": 7.276505528291034, "learning_rate": 5.983391136215104e-06, "loss": 17.998, "step": 24831 }, { "epoch": 0.4539090061600892, "grad_norm": 7.740058123791623, "learning_rate": 5.983100904624737e-06, "loss": 18.0715, "step": 24832 }, { "epoch": 0.4539272853565357, "grad_norm": 6.70546100657501, "learning_rate": 5.982810669588685e-06, "loss": 17.5629, "step": 24833 }, { "epoch": 0.45394556455298224, "grad_norm": 6.932532836554571, "learning_rate": 5.982520431107968e-06, "loss": 17.6811, "step": 24834 }, { "epoch": 0.4539638437494288, "grad_norm": 5.7676651583805025, "learning_rate": 5.982230189183602e-06, "loss": 17.3768, "step": 24835 }, { "epoch": 0.4539821229458753, "grad_norm": 8.112885332345646, "learning_rate": 5.981939943816605e-06, "loss": 17.9777, "step": 24836 }, { "epoch": 0.45400040214232185, "grad_norm": 5.899663538561533, "learning_rate": 5.981649695007993e-06, "loss": 17.3216, "step": 24837 }, { "epoch": 0.4540186813387683, "grad_norm": 7.48592461627228, "learning_rate": 5.981359442758783e-06, "loss": 17.8734, "step": 24838 }, { "epoch": 0.45403696053521486, "grad_norm": 7.987240402143579, "learning_rate": 5.981069187069996e-06, "loss": 18.1016, "step": 24839 }, { "epoch": 0.4540552397316614, "grad_norm": 5.7988877873438724, "learning_rate": 5.980778927942644e-06, "loss": 17.1847, "step": 24840 }, { "epoch": 0.45407351892810793, "grad_norm": 7.3804228309967135, "learning_rate": 5.980488665377748e-06, "loss": 17.8344, "step": 24841 }, { "epoch": 0.45409179812455447, "grad_norm": 5.739488152069684, "learning_rate": 5.980198399376325e-06, "loss": 17.1668, "step": 24842 }, { "epoch": 0.45411007732100095, "grad_norm": 6.449068281897217, "learning_rate": 5.979908129939391e-06, "loss": 17.4461, "step": 24843 }, { "epoch": 0.4541283565174475, "grad_norm": 6.511989432260754, "learning_rate": 5.979617857067964e-06, "loss": 17.6251, "step": 24844 }, { "epoch": 0.454146635713894, "grad_norm": 7.184755907382845, "learning_rate": 5.979327580763062e-06, "loss": 17.7948, "step": 24845 }, { "epoch": 0.45416491491034056, "grad_norm": 7.397502154523116, "learning_rate": 5.979037301025701e-06, "loss": 17.9996, "step": 24846 }, { "epoch": 0.4541831941067871, "grad_norm": 7.44664159389226, "learning_rate": 5.978747017856898e-06, "loss": 17.6774, "step": 24847 }, { "epoch": 0.45420147330323357, "grad_norm": 6.625820805484883, "learning_rate": 5.978456731257674e-06, "loss": 17.3918, "step": 24848 }, { "epoch": 0.4542197524996801, "grad_norm": 7.660520868003206, "learning_rate": 5.978166441229044e-06, "loss": 17.7056, "step": 24849 }, { "epoch": 0.45423803169612664, "grad_norm": 6.622079502102471, "learning_rate": 5.977876147772025e-06, "loss": 17.1085, "step": 24850 }, { "epoch": 0.4542563108925732, "grad_norm": 7.282386391227921, "learning_rate": 5.977585850887634e-06, "loss": 18.1855, "step": 24851 }, { "epoch": 0.4542745900890197, "grad_norm": 5.043317870253002, "learning_rate": 5.97729555057689e-06, "loss": 16.9436, "step": 24852 }, { "epoch": 0.4542928692854662, "grad_norm": 6.813848243736479, "learning_rate": 5.97700524684081e-06, "loss": 17.655, "step": 24853 }, { "epoch": 0.45431114848191273, "grad_norm": 5.891577703227318, "learning_rate": 5.976714939680412e-06, "loss": 17.1782, "step": 24854 }, { "epoch": 0.45432942767835927, "grad_norm": 7.105356369781019, "learning_rate": 5.976424629096712e-06, "loss": 17.4883, "step": 24855 }, { "epoch": 0.4543477068748058, "grad_norm": 7.454305661770091, "learning_rate": 5.976134315090729e-06, "loss": 17.7958, "step": 24856 }, { "epoch": 0.4543659860712523, "grad_norm": 7.516791832633677, "learning_rate": 5.97584399766348e-06, "loss": 18.2962, "step": 24857 }, { "epoch": 0.4543842652676988, "grad_norm": 7.0906485532158605, "learning_rate": 5.975553676815982e-06, "loss": 17.6407, "step": 24858 }, { "epoch": 0.45440254446414535, "grad_norm": 7.128028939234638, "learning_rate": 5.975263352549253e-06, "loss": 17.8622, "step": 24859 }, { "epoch": 0.4544208236605919, "grad_norm": 6.907942211710578, "learning_rate": 5.97497302486431e-06, "loss": 17.9482, "step": 24860 }, { "epoch": 0.4544391028570384, "grad_norm": 6.368527253496372, "learning_rate": 5.974682693762172e-06, "loss": 17.4452, "step": 24861 }, { "epoch": 0.4544573820534849, "grad_norm": 8.097922008715956, "learning_rate": 5.9743923592438555e-06, "loss": 17.9609, "step": 24862 }, { "epoch": 0.45447566124993144, "grad_norm": 6.807453022908108, "learning_rate": 5.97410202131038e-06, "loss": 17.8506, "step": 24863 }, { "epoch": 0.454493940446378, "grad_norm": 6.664126366238458, "learning_rate": 5.973811679962759e-06, "loss": 17.4324, "step": 24864 }, { "epoch": 0.4545122196428245, "grad_norm": 4.72076247400394, "learning_rate": 5.973521335202013e-06, "loss": 16.7962, "step": 24865 }, { "epoch": 0.45453049883927105, "grad_norm": 5.934770211234447, "learning_rate": 5.97323098702916e-06, "loss": 17.1794, "step": 24866 }, { "epoch": 0.4545487780357175, "grad_norm": 7.472014628053679, "learning_rate": 5.972940635445217e-06, "loss": 17.7042, "step": 24867 }, { "epoch": 0.45456705723216406, "grad_norm": 5.461609249343249, "learning_rate": 5.9726502804512e-06, "loss": 17.0504, "step": 24868 }, { "epoch": 0.4545853364286106, "grad_norm": 6.322879836658449, "learning_rate": 5.97235992204813e-06, "loss": 17.5128, "step": 24869 }, { "epoch": 0.45460361562505713, "grad_norm": 5.059208558620764, "learning_rate": 5.9720695602370215e-06, "loss": 16.9362, "step": 24870 }, { "epoch": 0.45462189482150367, "grad_norm": 7.565480030308469, "learning_rate": 5.971779195018894e-06, "loss": 18.1777, "step": 24871 }, { "epoch": 0.45464017401795015, "grad_norm": 7.677649071696698, "learning_rate": 5.971488826394764e-06, "loss": 17.9081, "step": 24872 }, { "epoch": 0.4546584532143967, "grad_norm": 6.457744832161168, "learning_rate": 5.971198454365652e-06, "loss": 17.5226, "step": 24873 }, { "epoch": 0.4546767324108432, "grad_norm": 8.486369762666998, "learning_rate": 5.970908078932571e-06, "loss": 18.1025, "step": 24874 }, { "epoch": 0.45469501160728976, "grad_norm": 7.373906463484746, "learning_rate": 5.9706177000965434e-06, "loss": 17.8832, "step": 24875 }, { "epoch": 0.4547132908037363, "grad_norm": 7.7853096676110285, "learning_rate": 5.970327317858584e-06, "loss": 18.1932, "step": 24876 }, { "epoch": 0.45473157000018277, "grad_norm": 7.584841019866278, "learning_rate": 5.970036932219714e-06, "loss": 18.2716, "step": 24877 }, { "epoch": 0.4547498491966293, "grad_norm": 6.319330515581081, "learning_rate": 5.9697465431809455e-06, "loss": 17.874, "step": 24878 }, { "epoch": 0.45476812839307584, "grad_norm": 6.481658860565889, "learning_rate": 5.9694561507433e-06, "loss": 17.3555, "step": 24879 }, { "epoch": 0.4547864075895224, "grad_norm": 6.38656394726239, "learning_rate": 5.969165754907796e-06, "loss": 17.7083, "step": 24880 }, { "epoch": 0.4548046867859689, "grad_norm": 5.605613709880605, "learning_rate": 5.96887535567545e-06, "loss": 17.2987, "step": 24881 }, { "epoch": 0.4548229659824154, "grad_norm": 6.6044745124806585, "learning_rate": 5.9685849530472795e-06, "loss": 17.4623, "step": 24882 }, { "epoch": 0.45484124517886193, "grad_norm": 5.850295887623415, "learning_rate": 5.968294547024303e-06, "loss": 17.1722, "step": 24883 }, { "epoch": 0.45485952437530847, "grad_norm": 6.675313290934436, "learning_rate": 5.968004137607538e-06, "loss": 17.9645, "step": 24884 }, { "epoch": 0.454877803571755, "grad_norm": 7.694681496036081, "learning_rate": 5.967713724798003e-06, "loss": 18.1642, "step": 24885 }, { "epoch": 0.45489608276820154, "grad_norm": 7.106606312621698, "learning_rate": 5.9674233085967145e-06, "loss": 17.8321, "step": 24886 }, { "epoch": 0.454914361964648, "grad_norm": 7.192350868472989, "learning_rate": 5.967132889004692e-06, "loss": 17.7427, "step": 24887 }, { "epoch": 0.45493264116109455, "grad_norm": 7.930449156621662, "learning_rate": 5.966842466022952e-06, "loss": 18.1769, "step": 24888 }, { "epoch": 0.4549509203575411, "grad_norm": 5.3265330405979405, "learning_rate": 5.9665520396525135e-06, "loss": 16.9592, "step": 24889 }, { "epoch": 0.4549691995539876, "grad_norm": 7.141694489175982, "learning_rate": 5.966261609894395e-06, "loss": 17.9663, "step": 24890 }, { "epoch": 0.4549874787504341, "grad_norm": 7.5992211434718655, "learning_rate": 5.965971176749612e-06, "loss": 18.0856, "step": 24891 }, { "epoch": 0.45500575794688064, "grad_norm": 6.695374744834487, "learning_rate": 5.965680740219183e-06, "loss": 17.4817, "step": 24892 }, { "epoch": 0.4550240371433272, "grad_norm": 6.740658620386362, "learning_rate": 5.965390300304128e-06, "loss": 17.5219, "step": 24893 }, { "epoch": 0.4550423163397737, "grad_norm": 6.3009442508213445, "learning_rate": 5.965099857005464e-06, "loss": 17.2256, "step": 24894 }, { "epoch": 0.45506059553622025, "grad_norm": 5.805336415765952, "learning_rate": 5.9648094103242096e-06, "loss": 17.1404, "step": 24895 }, { "epoch": 0.4550788747326667, "grad_norm": 6.654660750961497, "learning_rate": 5.96451896026138e-06, "loss": 18.0301, "step": 24896 }, { "epoch": 0.45509715392911326, "grad_norm": 6.159094027491954, "learning_rate": 5.964228506817996e-06, "loss": 17.8566, "step": 24897 }, { "epoch": 0.4551154331255598, "grad_norm": 5.617800758583667, "learning_rate": 5.963938049995075e-06, "loss": 17.1717, "step": 24898 }, { "epoch": 0.45513371232200633, "grad_norm": 5.684078828624881, "learning_rate": 5.963647589793634e-06, "loss": 17.1505, "step": 24899 }, { "epoch": 0.45515199151845287, "grad_norm": 6.164305056396494, "learning_rate": 5.963357126214692e-06, "loss": 17.1597, "step": 24900 }, { "epoch": 0.45517027071489935, "grad_norm": 5.661822102120397, "learning_rate": 5.963066659259267e-06, "loss": 17.461, "step": 24901 }, { "epoch": 0.4551885499113459, "grad_norm": 6.6575164760084835, "learning_rate": 5.962776188928377e-06, "loss": 17.5022, "step": 24902 }, { "epoch": 0.4552068291077924, "grad_norm": 5.634278910422919, "learning_rate": 5.962485715223041e-06, "loss": 17.1694, "step": 24903 }, { "epoch": 0.45522510830423896, "grad_norm": 6.8679635270014705, "learning_rate": 5.962195238144275e-06, "loss": 17.8086, "step": 24904 }, { "epoch": 0.4552433875006855, "grad_norm": 6.154759540486805, "learning_rate": 5.961904757693099e-06, "loss": 17.3594, "step": 24905 }, { "epoch": 0.455261666697132, "grad_norm": 7.605995253946768, "learning_rate": 5.961614273870528e-06, "loss": 17.8852, "step": 24906 }, { "epoch": 0.4552799458935785, "grad_norm": 6.513969623764981, "learning_rate": 5.9613237866775845e-06, "loss": 17.4292, "step": 24907 }, { "epoch": 0.45529822509002504, "grad_norm": 5.226876028951783, "learning_rate": 5.961033296115285e-06, "loss": 17.0332, "step": 24908 }, { "epoch": 0.4553165042864716, "grad_norm": 6.233897614045115, "learning_rate": 5.960742802184646e-06, "loss": 17.5373, "step": 24909 }, { "epoch": 0.4553347834829181, "grad_norm": 6.6655302509229, "learning_rate": 5.9604523048866865e-06, "loss": 17.2439, "step": 24910 }, { "epoch": 0.4553530626793646, "grad_norm": 6.8305913291420985, "learning_rate": 5.960161804222427e-06, "loss": 17.7038, "step": 24911 }, { "epoch": 0.45537134187581113, "grad_norm": 5.805860333489474, "learning_rate": 5.959871300192882e-06, "loss": 17.3428, "step": 24912 }, { "epoch": 0.45538962107225767, "grad_norm": 6.114688002868169, "learning_rate": 5.959580792799071e-06, "loss": 17.515, "step": 24913 }, { "epoch": 0.4554079002687042, "grad_norm": 6.3102515482798776, "learning_rate": 5.959290282042014e-06, "loss": 17.3935, "step": 24914 }, { "epoch": 0.45542617946515074, "grad_norm": 6.762663799426242, "learning_rate": 5.958999767922726e-06, "loss": 17.5704, "step": 24915 }, { "epoch": 0.4554444586615972, "grad_norm": 6.7378267892211126, "learning_rate": 5.95870925044223e-06, "loss": 17.7129, "step": 24916 }, { "epoch": 0.45546273785804375, "grad_norm": 6.833504878076066, "learning_rate": 5.958418729601538e-06, "loss": 17.692, "step": 24917 }, { "epoch": 0.4554810170544903, "grad_norm": 5.306666325362931, "learning_rate": 5.958128205401674e-06, "loss": 17.1075, "step": 24918 }, { "epoch": 0.4554992962509368, "grad_norm": 5.969029578562306, "learning_rate": 5.957837677843652e-06, "loss": 17.4495, "step": 24919 }, { "epoch": 0.45551757544738336, "grad_norm": 5.976336859409207, "learning_rate": 5.957547146928493e-06, "loss": 17.3549, "step": 24920 }, { "epoch": 0.45553585464382984, "grad_norm": 5.806214475317869, "learning_rate": 5.957256612657215e-06, "loss": 17.2555, "step": 24921 }, { "epoch": 0.4555541338402764, "grad_norm": 8.016121191859519, "learning_rate": 5.956966075030834e-06, "loss": 18.102, "step": 24922 }, { "epoch": 0.4555724130367229, "grad_norm": 8.230586744775685, "learning_rate": 5.956675534050371e-06, "loss": 18.5998, "step": 24923 }, { "epoch": 0.45559069223316945, "grad_norm": 6.598971293214389, "learning_rate": 5.956384989716842e-06, "loss": 17.5706, "step": 24924 }, { "epoch": 0.4556089714296159, "grad_norm": 6.186971604231284, "learning_rate": 5.956094442031269e-06, "loss": 17.494, "step": 24925 }, { "epoch": 0.45562725062606246, "grad_norm": 7.136106957598433, "learning_rate": 5.955803890994667e-06, "loss": 17.9298, "step": 24926 }, { "epoch": 0.455645529822509, "grad_norm": 5.9943177520249, "learning_rate": 5.9555133366080545e-06, "loss": 17.3705, "step": 24927 }, { "epoch": 0.45566380901895553, "grad_norm": 7.222490292310578, "learning_rate": 5.95522277887245e-06, "loss": 17.8616, "step": 24928 }, { "epoch": 0.45568208821540207, "grad_norm": 7.608678769348252, "learning_rate": 5.954932217788875e-06, "loss": 17.9525, "step": 24929 }, { "epoch": 0.45570036741184855, "grad_norm": 7.804162846358708, "learning_rate": 5.954641653358343e-06, "loss": 17.8717, "step": 24930 }, { "epoch": 0.4557186466082951, "grad_norm": 6.6028374641371625, "learning_rate": 5.954351085581876e-06, "loss": 17.573, "step": 24931 }, { "epoch": 0.4557369258047416, "grad_norm": 5.98152414111835, "learning_rate": 5.954060514460492e-06, "loss": 17.2916, "step": 24932 }, { "epoch": 0.45575520500118816, "grad_norm": 6.051014682198928, "learning_rate": 5.953769939995206e-06, "loss": 17.3404, "step": 24933 }, { "epoch": 0.4557734841976347, "grad_norm": 6.7063809868234925, "learning_rate": 5.953479362187041e-06, "loss": 17.6344, "step": 24934 }, { "epoch": 0.4557917633940812, "grad_norm": 6.408814761826627, "learning_rate": 5.953188781037015e-06, "loss": 17.1016, "step": 24935 }, { "epoch": 0.4558100425905277, "grad_norm": 7.052933799014533, "learning_rate": 5.952898196546144e-06, "loss": 17.4834, "step": 24936 }, { "epoch": 0.45582832178697424, "grad_norm": 7.127083901926689, "learning_rate": 5.952607608715447e-06, "loss": 17.5944, "step": 24937 }, { "epoch": 0.4558466009834208, "grad_norm": 6.907721348468742, "learning_rate": 5.952317017545941e-06, "loss": 17.5721, "step": 24938 }, { "epoch": 0.4558648801798673, "grad_norm": 4.945476134036083, "learning_rate": 5.952026423038651e-06, "loss": 16.7368, "step": 24939 }, { "epoch": 0.4558831593763138, "grad_norm": 7.127582004948006, "learning_rate": 5.951735825194588e-06, "loss": 18.0291, "step": 24940 }, { "epoch": 0.45590143857276033, "grad_norm": 7.289376816504818, "learning_rate": 5.951445224014773e-06, "loss": 17.4337, "step": 24941 }, { "epoch": 0.45591971776920687, "grad_norm": 7.195187834258994, "learning_rate": 5.951154619500227e-06, "loss": 17.8671, "step": 24942 }, { "epoch": 0.4559379969656534, "grad_norm": 6.080345119539842, "learning_rate": 5.9508640116519656e-06, "loss": 17.2807, "step": 24943 }, { "epoch": 0.45595627616209994, "grad_norm": 7.27655914283795, "learning_rate": 5.950573400471008e-06, "loss": 17.7143, "step": 24944 }, { "epoch": 0.4559745553585464, "grad_norm": 5.197007266471913, "learning_rate": 5.950282785958373e-06, "loss": 16.8949, "step": 24945 }, { "epoch": 0.45599283455499295, "grad_norm": 5.502133312801913, "learning_rate": 5.949992168115081e-06, "loss": 17.0616, "step": 24946 }, { "epoch": 0.4560111137514395, "grad_norm": 7.636449685803365, "learning_rate": 5.949701546942147e-06, "loss": 17.619, "step": 24947 }, { "epoch": 0.456029392947886, "grad_norm": 6.895739600713596, "learning_rate": 5.949410922440592e-06, "loss": 17.6776, "step": 24948 }, { "epoch": 0.45604767214433256, "grad_norm": 7.892914071186963, "learning_rate": 5.9491202946114355e-06, "loss": 17.7504, "step": 24949 }, { "epoch": 0.45606595134077904, "grad_norm": 5.7805011601384315, "learning_rate": 5.948829663455694e-06, "loss": 17.2404, "step": 24950 }, { "epoch": 0.4560842305372256, "grad_norm": 6.332919356755202, "learning_rate": 5.948539028974385e-06, "loss": 17.2974, "step": 24951 }, { "epoch": 0.4561025097336721, "grad_norm": 9.809507334768115, "learning_rate": 5.9482483911685316e-06, "loss": 18.2558, "step": 24952 }, { "epoch": 0.45612078893011865, "grad_norm": 6.073160875066537, "learning_rate": 5.947957750039148e-06, "loss": 17.3897, "step": 24953 }, { "epoch": 0.4561390681265652, "grad_norm": 6.531167093761964, "learning_rate": 5.947667105587256e-06, "loss": 17.439, "step": 24954 }, { "epoch": 0.45615734732301166, "grad_norm": 7.527818403529953, "learning_rate": 5.947376457813873e-06, "loss": 17.7602, "step": 24955 }, { "epoch": 0.4561756265194582, "grad_norm": 8.235916091449743, "learning_rate": 5.947085806720017e-06, "loss": 17.6098, "step": 24956 }, { "epoch": 0.45619390571590474, "grad_norm": 7.187161374228202, "learning_rate": 5.946795152306708e-06, "loss": 17.783, "step": 24957 }, { "epoch": 0.45621218491235127, "grad_norm": 6.41587946281288, "learning_rate": 5.946504494574963e-06, "loss": 17.2936, "step": 24958 }, { "epoch": 0.45623046410879775, "grad_norm": 5.618748298564456, "learning_rate": 5.946213833525805e-06, "loss": 17.2479, "step": 24959 }, { "epoch": 0.4562487433052443, "grad_norm": 5.766928935455178, "learning_rate": 5.945923169160245e-06, "loss": 17.3264, "step": 24960 }, { "epoch": 0.4562670225016908, "grad_norm": 7.84812429726118, "learning_rate": 5.945632501479309e-06, "loss": 17.9254, "step": 24961 }, { "epoch": 0.45628530169813736, "grad_norm": 7.181081642174104, "learning_rate": 5.945341830484012e-06, "loss": 17.5853, "step": 24962 }, { "epoch": 0.4563035808945839, "grad_norm": 6.726035651839546, "learning_rate": 5.945051156175377e-06, "loss": 17.7259, "step": 24963 }, { "epoch": 0.4563218600910304, "grad_norm": 5.917198199955994, "learning_rate": 5.944760478554416e-06, "loss": 17.3732, "step": 24964 }, { "epoch": 0.4563401392874769, "grad_norm": 6.127464928487141, "learning_rate": 5.9444697976221525e-06, "loss": 17.0653, "step": 24965 }, { "epoch": 0.45635841848392344, "grad_norm": 8.204374634512021, "learning_rate": 5.944179113379606e-06, "loss": 18.1827, "step": 24966 }, { "epoch": 0.45637669768037, "grad_norm": 6.621224850233595, "learning_rate": 5.943888425827793e-06, "loss": 17.4072, "step": 24967 }, { "epoch": 0.4563949768768165, "grad_norm": 6.47726189934075, "learning_rate": 5.943597734967732e-06, "loss": 17.6277, "step": 24968 }, { "epoch": 0.456413256073263, "grad_norm": 6.388248961482317, "learning_rate": 5.943307040800443e-06, "loss": 17.3201, "step": 24969 }, { "epoch": 0.45643153526970953, "grad_norm": 6.423716528385374, "learning_rate": 5.943016343326945e-06, "loss": 17.679, "step": 24970 }, { "epoch": 0.45644981446615607, "grad_norm": 5.6762539725291195, "learning_rate": 5.942725642548256e-06, "loss": 17.3484, "step": 24971 }, { "epoch": 0.4564680936626026, "grad_norm": 6.9716001442144675, "learning_rate": 5.942434938465396e-06, "loss": 17.838, "step": 24972 }, { "epoch": 0.45648637285904914, "grad_norm": 5.593409809155962, "learning_rate": 5.942144231079383e-06, "loss": 17.1198, "step": 24973 }, { "epoch": 0.4565046520554956, "grad_norm": 7.224772400014191, "learning_rate": 5.941853520391237e-06, "loss": 17.8676, "step": 24974 }, { "epoch": 0.45652293125194215, "grad_norm": 7.4132826230866735, "learning_rate": 5.941562806401975e-06, "loss": 18.2126, "step": 24975 }, { "epoch": 0.4565412104483887, "grad_norm": 5.8980284137638455, "learning_rate": 5.941272089112617e-06, "loss": 17.1877, "step": 24976 }, { "epoch": 0.4565594896448352, "grad_norm": 6.153005448015386, "learning_rate": 5.940981368524184e-06, "loss": 17.4887, "step": 24977 }, { "epoch": 0.45657776884128176, "grad_norm": 5.92254038898274, "learning_rate": 5.940690644637691e-06, "loss": 17.3755, "step": 24978 }, { "epoch": 0.45659604803772824, "grad_norm": 6.557337023734522, "learning_rate": 5.940399917454159e-06, "loss": 17.6272, "step": 24979 }, { "epoch": 0.4566143272341748, "grad_norm": 5.132409964638729, "learning_rate": 5.940109186974609e-06, "loss": 17.1082, "step": 24980 }, { "epoch": 0.4566326064306213, "grad_norm": 4.745751923636569, "learning_rate": 5.939818453200056e-06, "loss": 16.8106, "step": 24981 }, { "epoch": 0.45665088562706785, "grad_norm": 6.50431922714319, "learning_rate": 5.939527716131521e-06, "loss": 17.2606, "step": 24982 }, { "epoch": 0.4566691648235144, "grad_norm": 7.071391533159738, "learning_rate": 5.939236975770022e-06, "loss": 17.753, "step": 24983 }, { "epoch": 0.45668744401996086, "grad_norm": 6.161093548966815, "learning_rate": 5.938946232116581e-06, "loss": 17.1366, "step": 24984 }, { "epoch": 0.4567057232164074, "grad_norm": 5.539176743544879, "learning_rate": 5.9386554851722134e-06, "loss": 17.2145, "step": 24985 }, { "epoch": 0.45672400241285394, "grad_norm": 5.763589022567573, "learning_rate": 5.938364734937941e-06, "loss": 17.2451, "step": 24986 }, { "epoch": 0.45674228160930047, "grad_norm": 6.188618538411882, "learning_rate": 5.9380739814147805e-06, "loss": 17.569, "step": 24987 }, { "epoch": 0.456760560805747, "grad_norm": 7.275887982283109, "learning_rate": 5.937783224603753e-06, "loss": 18.0389, "step": 24988 }, { "epoch": 0.4567788400021935, "grad_norm": 5.9095840773301, "learning_rate": 5.937492464505875e-06, "loss": 17.1005, "step": 24989 }, { "epoch": 0.45679711919864, "grad_norm": 6.170282123101501, "learning_rate": 5.937201701122171e-06, "loss": 17.5119, "step": 24990 }, { "epoch": 0.45681539839508656, "grad_norm": 6.781463448853069, "learning_rate": 5.936910934453652e-06, "loss": 17.6081, "step": 24991 }, { "epoch": 0.4568336775915331, "grad_norm": 6.42650410498322, "learning_rate": 5.9366201645013435e-06, "loss": 17.6133, "step": 24992 }, { "epoch": 0.4568519567879796, "grad_norm": 6.944239694729154, "learning_rate": 5.936329391266261e-06, "loss": 17.68, "step": 24993 }, { "epoch": 0.4568702359844261, "grad_norm": 9.275047584937695, "learning_rate": 5.936038614749429e-06, "loss": 18.3214, "step": 24994 }, { "epoch": 0.45688851518087265, "grad_norm": 6.422879899323243, "learning_rate": 5.9357478349518595e-06, "loss": 17.5967, "step": 24995 }, { "epoch": 0.4569067943773192, "grad_norm": 7.40083545125522, "learning_rate": 5.935457051874575e-06, "loss": 18.1773, "step": 24996 }, { "epoch": 0.4569250735737657, "grad_norm": 7.582751640422174, "learning_rate": 5.935166265518597e-06, "loss": 17.8034, "step": 24997 }, { "epoch": 0.4569433527702122, "grad_norm": 5.281919500133999, "learning_rate": 5.934875475884942e-06, "loss": 17.1753, "step": 24998 }, { "epoch": 0.45696163196665873, "grad_norm": 5.054218994977413, "learning_rate": 5.9345846829746275e-06, "loss": 16.9593, "step": 24999 }, { "epoch": 0.45697991116310527, "grad_norm": 5.185175175409264, "learning_rate": 5.934293886788676e-06, "loss": 16.984, "step": 25000 }, { "epoch": 0.4569981903595518, "grad_norm": 6.5781400783196835, "learning_rate": 5.934003087328105e-06, "loss": 17.9447, "step": 25001 }, { "epoch": 0.45701646955599834, "grad_norm": 5.665450611948805, "learning_rate": 5.933712284593936e-06, "loss": 17.1155, "step": 25002 }, { "epoch": 0.4570347487524448, "grad_norm": 6.668786084884377, "learning_rate": 5.933421478587184e-06, "loss": 17.3732, "step": 25003 }, { "epoch": 0.45705302794889135, "grad_norm": 5.047739528366748, "learning_rate": 5.933130669308873e-06, "loss": 16.9366, "step": 25004 }, { "epoch": 0.4570713071453379, "grad_norm": 5.7553338101981835, "learning_rate": 5.932839856760018e-06, "loss": 17.3289, "step": 25005 }, { "epoch": 0.4570895863417844, "grad_norm": 5.205437676452599, "learning_rate": 5.932549040941641e-06, "loss": 16.904, "step": 25006 }, { "epoch": 0.45710786553823096, "grad_norm": 6.927558186739496, "learning_rate": 5.932258221854761e-06, "loss": 17.8677, "step": 25007 }, { "epoch": 0.45712614473467744, "grad_norm": 6.593786348312188, "learning_rate": 5.931967399500397e-06, "loss": 17.5772, "step": 25008 }, { "epoch": 0.457144423931124, "grad_norm": 8.053694123470054, "learning_rate": 5.931676573879568e-06, "loss": 17.9786, "step": 25009 }, { "epoch": 0.4571627031275705, "grad_norm": 5.583726033411606, "learning_rate": 5.931385744993292e-06, "loss": 17.0944, "step": 25010 }, { "epoch": 0.45718098232401705, "grad_norm": 5.1863500794669015, "learning_rate": 5.931094912842592e-06, "loss": 16.9153, "step": 25011 }, { "epoch": 0.4571992615204636, "grad_norm": 6.834577803973572, "learning_rate": 5.930804077428484e-06, "loss": 17.7712, "step": 25012 }, { "epoch": 0.45721754071691006, "grad_norm": 5.897365251803732, "learning_rate": 5.930513238751988e-06, "loss": 17.4395, "step": 25013 }, { "epoch": 0.4572358199133566, "grad_norm": 5.8493365305636535, "learning_rate": 5.930222396814125e-06, "loss": 17.4145, "step": 25014 }, { "epoch": 0.45725409910980314, "grad_norm": 5.287957136092569, "learning_rate": 5.929931551615912e-06, "loss": 17.0397, "step": 25015 }, { "epoch": 0.45727237830624967, "grad_norm": 7.513279019333759, "learning_rate": 5.9296407031583705e-06, "loss": 18.2377, "step": 25016 }, { "epoch": 0.4572906575026962, "grad_norm": 7.1404263870812175, "learning_rate": 5.929349851442519e-06, "loss": 17.5738, "step": 25017 }, { "epoch": 0.4573089366991427, "grad_norm": 5.902897723117464, "learning_rate": 5.929058996469377e-06, "loss": 17.2764, "step": 25018 }, { "epoch": 0.4573272158955892, "grad_norm": 6.950774681367328, "learning_rate": 5.928768138239962e-06, "loss": 17.6763, "step": 25019 }, { "epoch": 0.45734549509203576, "grad_norm": 5.235636441851553, "learning_rate": 5.928477276755297e-06, "loss": 16.9038, "step": 25020 }, { "epoch": 0.4573637742884823, "grad_norm": 5.592681693440445, "learning_rate": 5.9281864120164e-06, "loss": 17.149, "step": 25021 }, { "epoch": 0.45738205348492883, "grad_norm": 6.098654773706037, "learning_rate": 5.927895544024289e-06, "loss": 17.6138, "step": 25022 }, { "epoch": 0.4574003326813753, "grad_norm": 5.711515865679313, "learning_rate": 5.927604672779985e-06, "loss": 17.1243, "step": 25023 }, { "epoch": 0.45741861187782185, "grad_norm": 7.063529120006668, "learning_rate": 5.927313798284507e-06, "loss": 17.8486, "step": 25024 }, { "epoch": 0.4574368910742684, "grad_norm": 7.12987208995607, "learning_rate": 5.927022920538876e-06, "loss": 18.1592, "step": 25025 }, { "epoch": 0.4574551702707149, "grad_norm": 6.505539588273021, "learning_rate": 5.926732039544109e-06, "loss": 17.3972, "step": 25026 }, { "epoch": 0.4574734494671614, "grad_norm": 6.7104988310585645, "learning_rate": 5.926441155301226e-06, "loss": 17.385, "step": 25027 }, { "epoch": 0.45749172866360793, "grad_norm": 6.991219655943665, "learning_rate": 5.926150267811248e-06, "loss": 17.8121, "step": 25028 }, { "epoch": 0.45751000786005447, "grad_norm": 5.934969451134591, "learning_rate": 5.9258593770751935e-06, "loss": 17.486, "step": 25029 }, { "epoch": 0.457528287056501, "grad_norm": 9.175652058688627, "learning_rate": 5.925568483094081e-06, "loss": 17.9355, "step": 25030 }, { "epoch": 0.45754656625294754, "grad_norm": 6.92586047309848, "learning_rate": 5.925277585868934e-06, "loss": 17.6992, "step": 25031 }, { "epoch": 0.457564845449394, "grad_norm": 7.093206602569276, "learning_rate": 5.9249866854007685e-06, "loss": 17.9072, "step": 25032 }, { "epoch": 0.45758312464584056, "grad_norm": 5.559411962234882, "learning_rate": 5.924695781690604e-06, "loss": 17.046, "step": 25033 }, { "epoch": 0.4576014038422871, "grad_norm": 7.940295854282515, "learning_rate": 5.92440487473946e-06, "loss": 17.6611, "step": 25034 }, { "epoch": 0.4576196830387336, "grad_norm": 9.122628018645774, "learning_rate": 5.924113964548361e-06, "loss": 17.8055, "step": 25035 }, { "epoch": 0.45763796223518016, "grad_norm": 6.720342112232046, "learning_rate": 5.923823051118319e-06, "loss": 17.6522, "step": 25036 }, { "epoch": 0.45765624143162664, "grad_norm": 6.209130475710021, "learning_rate": 5.923532134450358e-06, "loss": 17.2298, "step": 25037 }, { "epoch": 0.4576745206280732, "grad_norm": 4.943226370519743, "learning_rate": 5.923241214545496e-06, "loss": 16.8489, "step": 25038 }, { "epoch": 0.4576927998245197, "grad_norm": 6.909209484041424, "learning_rate": 5.9229502914047565e-06, "loss": 17.4053, "step": 25039 }, { "epoch": 0.45771107902096625, "grad_norm": 5.227227420071569, "learning_rate": 5.922659365029156e-06, "loss": 17.0036, "step": 25040 }, { "epoch": 0.4577293582174128, "grad_norm": 6.522922057325364, "learning_rate": 5.922368435419713e-06, "loss": 17.774, "step": 25041 }, { "epoch": 0.45774763741385927, "grad_norm": 6.219062308287128, "learning_rate": 5.922077502577449e-06, "loss": 17.3931, "step": 25042 }, { "epoch": 0.4577659166103058, "grad_norm": 6.206941750695732, "learning_rate": 5.921786566503384e-06, "loss": 17.4327, "step": 25043 }, { "epoch": 0.45778419580675234, "grad_norm": 8.247628077256223, "learning_rate": 5.921495627198537e-06, "loss": 18.1207, "step": 25044 }, { "epoch": 0.45780247500319887, "grad_norm": 5.742348481906552, "learning_rate": 5.921204684663927e-06, "loss": 17.5051, "step": 25045 }, { "epoch": 0.4578207541996454, "grad_norm": 7.0053822967011214, "learning_rate": 5.920913738900575e-06, "loss": 17.9018, "step": 25046 }, { "epoch": 0.4578390333960919, "grad_norm": 6.3120630670410005, "learning_rate": 5.920622789909499e-06, "loss": 17.2657, "step": 25047 }, { "epoch": 0.4578573125925384, "grad_norm": 6.638560846178379, "learning_rate": 5.920331837691722e-06, "loss": 17.3365, "step": 25048 }, { "epoch": 0.45787559178898496, "grad_norm": 6.913310899836669, "learning_rate": 5.920040882248261e-06, "loss": 17.3952, "step": 25049 }, { "epoch": 0.4578938709854315, "grad_norm": 5.8329902562283165, "learning_rate": 5.919749923580137e-06, "loss": 17.53, "step": 25050 }, { "epoch": 0.45791215018187803, "grad_norm": 7.880407501848141, "learning_rate": 5.919458961688368e-06, "loss": 17.8751, "step": 25051 }, { "epoch": 0.4579304293783245, "grad_norm": 7.8721008769038985, "learning_rate": 5.919167996573975e-06, "loss": 17.7918, "step": 25052 }, { "epoch": 0.45794870857477105, "grad_norm": 6.212387184352187, "learning_rate": 5.918877028237982e-06, "loss": 17.2153, "step": 25053 }, { "epoch": 0.4579669877712176, "grad_norm": 7.679853254937747, "learning_rate": 5.9185860566814005e-06, "loss": 18.0365, "step": 25054 }, { "epoch": 0.4579852669676641, "grad_norm": 5.9041637537375316, "learning_rate": 5.9182950819052554e-06, "loss": 16.9869, "step": 25055 }, { "epoch": 0.45800354616411065, "grad_norm": 7.22318367184783, "learning_rate": 5.9180041039105664e-06, "loss": 18.0856, "step": 25056 }, { "epoch": 0.45802182536055713, "grad_norm": 8.581666754043548, "learning_rate": 5.917713122698352e-06, "loss": 18.331, "step": 25057 }, { "epoch": 0.45804010455700367, "grad_norm": 5.827562596542903, "learning_rate": 5.9174221382696325e-06, "loss": 17.3273, "step": 25058 }, { "epoch": 0.4580583837534502, "grad_norm": 5.2291750829573775, "learning_rate": 5.917131150625431e-06, "loss": 17.1274, "step": 25059 }, { "epoch": 0.45807666294989674, "grad_norm": 7.002193119910955, "learning_rate": 5.91684015976676e-06, "loss": 17.8088, "step": 25060 }, { "epoch": 0.4580949421463432, "grad_norm": 5.064940999586588, "learning_rate": 5.916549165694646e-06, "loss": 16.8917, "step": 25061 }, { "epoch": 0.45811322134278976, "grad_norm": 6.07487078474585, "learning_rate": 5.9162581684101065e-06, "loss": 17.2801, "step": 25062 }, { "epoch": 0.4581315005392363, "grad_norm": 6.100885615395288, "learning_rate": 5.915967167914163e-06, "loss": 17.3161, "step": 25063 }, { "epoch": 0.4581497797356828, "grad_norm": 6.365595687647508, "learning_rate": 5.915676164207833e-06, "loss": 17.3906, "step": 25064 }, { "epoch": 0.45816805893212936, "grad_norm": 8.370558908105483, "learning_rate": 5.915385157292135e-06, "loss": 17.9775, "step": 25065 }, { "epoch": 0.45818633812857584, "grad_norm": 4.743868565626321, "learning_rate": 5.915094147168096e-06, "loss": 16.7813, "step": 25066 }, { "epoch": 0.4582046173250224, "grad_norm": 6.470216535288608, "learning_rate": 5.914803133836729e-06, "loss": 17.4377, "step": 25067 }, { "epoch": 0.4582228965214689, "grad_norm": 5.208946094185995, "learning_rate": 5.914512117299056e-06, "loss": 16.9007, "step": 25068 }, { "epoch": 0.45824117571791545, "grad_norm": 6.511039458565328, "learning_rate": 5.914221097556097e-06, "loss": 17.6099, "step": 25069 }, { "epoch": 0.458259454914362, "grad_norm": 6.199890150444937, "learning_rate": 5.913930074608873e-06, "loss": 17.3178, "step": 25070 }, { "epoch": 0.45827773411080847, "grad_norm": 6.396809693260093, "learning_rate": 5.913639048458404e-06, "loss": 17.4528, "step": 25071 }, { "epoch": 0.458296013307255, "grad_norm": 5.383985903555381, "learning_rate": 5.913348019105709e-06, "loss": 17.1753, "step": 25072 }, { "epoch": 0.45831429250370154, "grad_norm": 8.01317385967738, "learning_rate": 5.913056986551809e-06, "loss": 18.2772, "step": 25073 }, { "epoch": 0.4583325717001481, "grad_norm": 5.055525897880802, "learning_rate": 5.912765950797723e-06, "loss": 17.0403, "step": 25074 }, { "epoch": 0.4583508508965946, "grad_norm": 5.834932092271731, "learning_rate": 5.912474911844471e-06, "loss": 17.349, "step": 25075 }, { "epoch": 0.4583691300930411, "grad_norm": 7.3257768903772265, "learning_rate": 5.912183869693074e-06, "loss": 17.8751, "step": 25076 }, { "epoch": 0.4583874092894876, "grad_norm": 7.538587155687487, "learning_rate": 5.911892824344554e-06, "loss": 18.3766, "step": 25077 }, { "epoch": 0.45840568848593416, "grad_norm": 7.160764942228313, "learning_rate": 5.911601775799925e-06, "loss": 17.8963, "step": 25078 }, { "epoch": 0.4584239676823807, "grad_norm": 7.378645955279105, "learning_rate": 5.911310724060213e-06, "loss": 17.7232, "step": 25079 }, { "epoch": 0.45844224687882723, "grad_norm": 5.049835034722711, "learning_rate": 5.9110196691264365e-06, "loss": 17.0266, "step": 25080 }, { "epoch": 0.4584605260752737, "grad_norm": 6.34414887518242, "learning_rate": 5.9107286109996135e-06, "loss": 17.4845, "step": 25081 }, { "epoch": 0.45847880527172025, "grad_norm": 5.977932616193766, "learning_rate": 5.910437549680766e-06, "loss": 17.3735, "step": 25082 }, { "epoch": 0.4584970844681668, "grad_norm": 6.308358394565755, "learning_rate": 5.910146485170914e-06, "loss": 17.6364, "step": 25083 }, { "epoch": 0.4585153636646133, "grad_norm": 5.86124418007269, "learning_rate": 5.9098554174710785e-06, "loss": 17.3927, "step": 25084 }, { "epoch": 0.45853364286105985, "grad_norm": 6.138031504468722, "learning_rate": 5.909564346582279e-06, "loss": 17.458, "step": 25085 }, { "epoch": 0.45855192205750633, "grad_norm": 5.4553383569489755, "learning_rate": 5.909273272505534e-06, "loss": 17.2117, "step": 25086 }, { "epoch": 0.45857020125395287, "grad_norm": 7.030659033261773, "learning_rate": 5.908982195241865e-06, "loss": 17.5818, "step": 25087 }, { "epoch": 0.4585884804503994, "grad_norm": 5.483053980848989, "learning_rate": 5.908691114792293e-06, "loss": 17.0663, "step": 25088 }, { "epoch": 0.45860675964684594, "grad_norm": 6.1136181010802275, "learning_rate": 5.908400031157837e-06, "loss": 17.1744, "step": 25089 }, { "epoch": 0.4586250388432925, "grad_norm": 5.69582017469816, "learning_rate": 5.908108944339519e-06, "loss": 17.0884, "step": 25090 }, { "epoch": 0.45864331803973896, "grad_norm": 5.736528975665548, "learning_rate": 5.907817854338357e-06, "loss": 17.1045, "step": 25091 }, { "epoch": 0.4586615972361855, "grad_norm": 7.6264465897051, "learning_rate": 5.907526761155371e-06, "loss": 17.871, "step": 25092 }, { "epoch": 0.458679876432632, "grad_norm": 6.128675020624806, "learning_rate": 5.907235664791583e-06, "loss": 17.5724, "step": 25093 }, { "epoch": 0.45869815562907856, "grad_norm": 5.955904531388647, "learning_rate": 5.906944565248015e-06, "loss": 17.3476, "step": 25094 }, { "epoch": 0.45871643482552504, "grad_norm": 7.08895749649592, "learning_rate": 5.9066534625256836e-06, "loss": 17.7101, "step": 25095 }, { "epoch": 0.4587347140219716, "grad_norm": 4.708930776386075, "learning_rate": 5.90636235662561e-06, "loss": 16.7383, "step": 25096 }, { "epoch": 0.4587529932184181, "grad_norm": 7.568027262060807, "learning_rate": 5.906071247548814e-06, "loss": 18.0702, "step": 25097 }, { "epoch": 0.45877127241486465, "grad_norm": 6.160265851589954, "learning_rate": 5.90578013529632e-06, "loss": 17.3222, "step": 25098 }, { "epoch": 0.4587895516113112, "grad_norm": 5.221119349192646, "learning_rate": 5.905489019869142e-06, "loss": 16.8484, "step": 25099 }, { "epoch": 0.45880783080775767, "grad_norm": 5.777141960437828, "learning_rate": 5.905197901268305e-06, "loss": 17.3222, "step": 25100 }, { "epoch": 0.4588261100042042, "grad_norm": 6.447375505580861, "learning_rate": 5.9049067794948275e-06, "loss": 17.2996, "step": 25101 }, { "epoch": 0.45884438920065074, "grad_norm": 6.997710347061462, "learning_rate": 5.904615654549732e-06, "loss": 17.3454, "step": 25102 }, { "epoch": 0.4588626683970973, "grad_norm": 7.18329244941718, "learning_rate": 5.904324526434035e-06, "loss": 17.8857, "step": 25103 }, { "epoch": 0.4588809475935438, "grad_norm": 6.85829402999007, "learning_rate": 5.904033395148761e-06, "loss": 17.6004, "step": 25104 }, { "epoch": 0.4588992267899903, "grad_norm": 6.094213590941932, "learning_rate": 5.903742260694926e-06, "loss": 17.2283, "step": 25105 }, { "epoch": 0.4589175059864368, "grad_norm": 7.3716304976459135, "learning_rate": 5.903451123073554e-06, "loss": 17.8677, "step": 25106 }, { "epoch": 0.45893578518288336, "grad_norm": 5.238332336507377, "learning_rate": 5.903159982285663e-06, "loss": 17.0321, "step": 25107 }, { "epoch": 0.4589540643793299, "grad_norm": 6.935188174699001, "learning_rate": 5.902868838332277e-06, "loss": 17.7228, "step": 25108 }, { "epoch": 0.45897234357577643, "grad_norm": 6.557339988151756, "learning_rate": 5.9025776912144125e-06, "loss": 17.7469, "step": 25109 }, { "epoch": 0.4589906227722229, "grad_norm": 7.055907107137074, "learning_rate": 5.902286540933091e-06, "loss": 17.8525, "step": 25110 }, { "epoch": 0.45900890196866945, "grad_norm": 5.214122725260912, "learning_rate": 5.901995387489335e-06, "loss": 17.1292, "step": 25111 }, { "epoch": 0.459027181165116, "grad_norm": 6.4066059822303485, "learning_rate": 5.9017042308841635e-06, "loss": 17.475, "step": 25112 }, { "epoch": 0.4590454603615625, "grad_norm": 6.777180689280732, "learning_rate": 5.901413071118596e-06, "loss": 17.4952, "step": 25113 }, { "epoch": 0.45906373955800905, "grad_norm": 6.146671576227362, "learning_rate": 5.901121908193654e-06, "loss": 17.554, "step": 25114 }, { "epoch": 0.45908201875445553, "grad_norm": 6.734242849027381, "learning_rate": 5.900830742110358e-06, "loss": 17.8993, "step": 25115 }, { "epoch": 0.45910029795090207, "grad_norm": 5.438781339591436, "learning_rate": 5.900539572869728e-06, "loss": 17.2216, "step": 25116 }, { "epoch": 0.4591185771473486, "grad_norm": 5.51344483745984, "learning_rate": 5.900248400472786e-06, "loss": 17.3257, "step": 25117 }, { "epoch": 0.45913685634379514, "grad_norm": 6.841229462459413, "learning_rate": 5.899957224920551e-06, "loss": 17.4026, "step": 25118 }, { "epoch": 0.4591551355402417, "grad_norm": 7.015296903704281, "learning_rate": 5.899666046214043e-06, "loss": 17.751, "step": 25119 }, { "epoch": 0.45917341473668816, "grad_norm": 6.198221657390064, "learning_rate": 5.899374864354284e-06, "loss": 17.2304, "step": 25120 }, { "epoch": 0.4591916939331347, "grad_norm": 5.665737824164518, "learning_rate": 5.899083679342296e-06, "loss": 17.0227, "step": 25121 }, { "epoch": 0.45920997312958123, "grad_norm": 7.453643756704126, "learning_rate": 5.898792491179096e-06, "loss": 17.9597, "step": 25122 }, { "epoch": 0.45922825232602776, "grad_norm": 8.077937538556672, "learning_rate": 5.898501299865707e-06, "loss": 18.109, "step": 25123 }, { "epoch": 0.4592465315224743, "grad_norm": 8.610773605697204, "learning_rate": 5.898210105403147e-06, "loss": 18.0171, "step": 25124 }, { "epoch": 0.4592648107189208, "grad_norm": 6.443037608981733, "learning_rate": 5.897918907792442e-06, "loss": 17.3407, "step": 25125 }, { "epoch": 0.4592830899153673, "grad_norm": 6.200238922158968, "learning_rate": 5.897627707034606e-06, "loss": 17.6508, "step": 25126 }, { "epoch": 0.45930136911181385, "grad_norm": 9.187035784712638, "learning_rate": 5.897336503130664e-06, "loss": 18.0105, "step": 25127 }, { "epoch": 0.4593196483082604, "grad_norm": 7.111063903992415, "learning_rate": 5.897045296081636e-06, "loss": 17.4023, "step": 25128 }, { "epoch": 0.45933792750470687, "grad_norm": 6.1989517543475054, "learning_rate": 5.896754085888541e-06, "loss": 17.3739, "step": 25129 }, { "epoch": 0.4593562067011534, "grad_norm": 5.6610742089571335, "learning_rate": 5.896462872552401e-06, "loss": 17.3181, "step": 25130 }, { "epoch": 0.45937448589759994, "grad_norm": 6.512710182304373, "learning_rate": 5.896171656074237e-06, "loss": 17.3586, "step": 25131 }, { "epoch": 0.4593927650940465, "grad_norm": 6.210866625513039, "learning_rate": 5.895880436455068e-06, "loss": 17.3805, "step": 25132 }, { "epoch": 0.459411044290493, "grad_norm": 6.937713023128045, "learning_rate": 5.895589213695917e-06, "loss": 17.7345, "step": 25133 }, { "epoch": 0.4594293234869395, "grad_norm": 9.909806208021777, "learning_rate": 5.895297987797803e-06, "loss": 17.8982, "step": 25134 }, { "epoch": 0.459447602683386, "grad_norm": 5.917635441195128, "learning_rate": 5.895006758761749e-06, "loss": 17.294, "step": 25135 }, { "epoch": 0.45946588187983256, "grad_norm": 6.053944887018149, "learning_rate": 5.894715526588771e-06, "loss": 17.3829, "step": 25136 }, { "epoch": 0.4594841610762791, "grad_norm": 6.8268018748677575, "learning_rate": 5.8944242912798935e-06, "loss": 17.483, "step": 25137 }, { "epoch": 0.45950244027272563, "grad_norm": 5.369844108200249, "learning_rate": 5.894133052836138e-06, "loss": 17.2398, "step": 25138 }, { "epoch": 0.4595207194691721, "grad_norm": 13.63973965013231, "learning_rate": 5.8938418112585225e-06, "loss": 18.634, "step": 25139 }, { "epoch": 0.45953899866561865, "grad_norm": 5.121883898150083, "learning_rate": 5.8935505665480695e-06, "loss": 17.051, "step": 25140 }, { "epoch": 0.4595572778620652, "grad_norm": 6.89128015628205, "learning_rate": 5.893259318705799e-06, "loss": 17.4457, "step": 25141 }, { "epoch": 0.4595755570585117, "grad_norm": 6.989561441812156, "learning_rate": 5.892968067732731e-06, "loss": 17.7903, "step": 25142 }, { "epoch": 0.45959383625495825, "grad_norm": 5.770252141612089, "learning_rate": 5.892676813629889e-06, "loss": 17.1302, "step": 25143 }, { "epoch": 0.45961211545140473, "grad_norm": 6.059587407676246, "learning_rate": 5.892385556398292e-06, "loss": 17.501, "step": 25144 }, { "epoch": 0.45963039464785127, "grad_norm": 6.491129225958094, "learning_rate": 5.892094296038961e-06, "loss": 17.6745, "step": 25145 }, { "epoch": 0.4596486738442978, "grad_norm": 7.600274561858066, "learning_rate": 5.891803032552916e-06, "loss": 17.8503, "step": 25146 }, { "epoch": 0.45966695304074434, "grad_norm": 7.128989380532426, "learning_rate": 5.89151176594118e-06, "loss": 17.8868, "step": 25147 }, { "epoch": 0.4596852322371909, "grad_norm": 6.210565817815751, "learning_rate": 5.891220496204772e-06, "loss": 17.5276, "step": 25148 }, { "epoch": 0.45970351143363736, "grad_norm": 5.885705935678427, "learning_rate": 5.890929223344715e-06, "loss": 17.3678, "step": 25149 }, { "epoch": 0.4597217906300839, "grad_norm": 6.667881055100639, "learning_rate": 5.890637947362027e-06, "loss": 17.8547, "step": 25150 }, { "epoch": 0.45974006982653043, "grad_norm": 6.99481690001039, "learning_rate": 5.890346668257729e-06, "loss": 17.7187, "step": 25151 }, { "epoch": 0.45975834902297696, "grad_norm": 7.695289062844708, "learning_rate": 5.890055386032845e-06, "loss": 18.2169, "step": 25152 }, { "epoch": 0.4597766282194235, "grad_norm": 6.9050078947645925, "learning_rate": 5.889764100688394e-06, "loss": 17.721, "step": 25153 }, { "epoch": 0.45979490741587, "grad_norm": 6.58402216988812, "learning_rate": 5.8894728122253965e-06, "loss": 17.4457, "step": 25154 }, { "epoch": 0.4598131866123165, "grad_norm": 6.988560929739352, "learning_rate": 5.889181520644874e-06, "loss": 17.2455, "step": 25155 }, { "epoch": 0.45983146580876305, "grad_norm": 6.206599477481273, "learning_rate": 5.888890225947848e-06, "loss": 17.4213, "step": 25156 }, { "epoch": 0.4598497450052096, "grad_norm": 5.6008437350115985, "learning_rate": 5.888598928135338e-06, "loss": 17.0786, "step": 25157 }, { "epoch": 0.4598680242016561, "grad_norm": 7.791893690638781, "learning_rate": 5.888307627208366e-06, "loss": 18.0871, "step": 25158 }, { "epoch": 0.4598863033981026, "grad_norm": 5.786612455520085, "learning_rate": 5.888016323167954e-06, "loss": 17.1307, "step": 25159 }, { "epoch": 0.45990458259454914, "grad_norm": 6.026676468863097, "learning_rate": 5.88772501601512e-06, "loss": 17.2374, "step": 25160 }, { "epoch": 0.4599228617909957, "grad_norm": 9.161378002187389, "learning_rate": 5.887433705750889e-06, "loss": 18.0735, "step": 25161 }, { "epoch": 0.4599411409874422, "grad_norm": 7.409644025610671, "learning_rate": 5.887142392376279e-06, "loss": 17.5688, "step": 25162 }, { "epoch": 0.4599594201838887, "grad_norm": 5.614609338578762, "learning_rate": 5.886851075892311e-06, "loss": 17.2005, "step": 25163 }, { "epoch": 0.4599776993803352, "grad_norm": 6.605912678841178, "learning_rate": 5.886559756300008e-06, "loss": 17.5157, "step": 25164 }, { "epoch": 0.45999597857678176, "grad_norm": 4.981666970189918, "learning_rate": 5.886268433600388e-06, "loss": 16.7706, "step": 25165 }, { "epoch": 0.4600142577732283, "grad_norm": 6.723853799327433, "learning_rate": 5.885977107794477e-06, "loss": 17.6284, "step": 25166 }, { "epoch": 0.46003253696967483, "grad_norm": 4.767315444881347, "learning_rate": 5.885685778883292e-06, "loss": 16.9863, "step": 25167 }, { "epoch": 0.4600508161661213, "grad_norm": 5.883460762423431, "learning_rate": 5.885394446867855e-06, "loss": 17.5723, "step": 25168 }, { "epoch": 0.46006909536256785, "grad_norm": 6.7623460547856915, "learning_rate": 5.885103111749186e-06, "loss": 17.5193, "step": 25169 }, { "epoch": 0.4600873745590144, "grad_norm": 5.578138536650387, "learning_rate": 5.884811773528309e-06, "loss": 17.1289, "step": 25170 }, { "epoch": 0.4601056537554609, "grad_norm": 7.633891638786964, "learning_rate": 5.884520432206243e-06, "loss": 17.5063, "step": 25171 }, { "epoch": 0.46012393295190746, "grad_norm": 7.294758059690856, "learning_rate": 5.88422908778401e-06, "loss": 17.6058, "step": 25172 }, { "epoch": 0.46014221214835394, "grad_norm": 6.53870452695131, "learning_rate": 5.883937740262631e-06, "loss": 17.7105, "step": 25173 }, { "epoch": 0.46016049134480047, "grad_norm": 7.234411483567285, "learning_rate": 5.883646389643126e-06, "loss": 17.8144, "step": 25174 }, { "epoch": 0.460178770541247, "grad_norm": 5.87520245327371, "learning_rate": 5.883355035926518e-06, "loss": 17.2566, "step": 25175 }, { "epoch": 0.46019704973769354, "grad_norm": 7.127310428675387, "learning_rate": 5.8830636791138265e-06, "loss": 17.3071, "step": 25176 }, { "epoch": 0.4602153289341401, "grad_norm": 7.0579754506303205, "learning_rate": 5.8827723192060745e-06, "loss": 17.7469, "step": 25177 }, { "epoch": 0.46023360813058656, "grad_norm": 7.4672521690141584, "learning_rate": 5.882480956204281e-06, "loss": 18.2015, "step": 25178 }, { "epoch": 0.4602518873270331, "grad_norm": 5.607334949849727, "learning_rate": 5.882189590109468e-06, "loss": 17.1994, "step": 25179 }, { "epoch": 0.46027016652347963, "grad_norm": 6.617843704706007, "learning_rate": 5.881898220922658e-06, "loss": 17.7119, "step": 25180 }, { "epoch": 0.46028844571992616, "grad_norm": 8.233597757977755, "learning_rate": 5.881606848644872e-06, "loss": 18.2433, "step": 25181 }, { "epoch": 0.4603067249163727, "grad_norm": 6.630516398476976, "learning_rate": 5.881315473277129e-06, "loss": 17.459, "step": 25182 }, { "epoch": 0.4603250041128192, "grad_norm": 6.928581051265842, "learning_rate": 5.881024094820451e-06, "loss": 17.697, "step": 25183 }, { "epoch": 0.4603432833092657, "grad_norm": 6.309244438724938, "learning_rate": 5.880732713275863e-06, "loss": 17.4451, "step": 25184 }, { "epoch": 0.46036156250571225, "grad_norm": 10.785039141572657, "learning_rate": 5.880441328644381e-06, "loss": 17.9353, "step": 25185 }, { "epoch": 0.4603798417021588, "grad_norm": 5.6893484008183135, "learning_rate": 5.880149940927029e-06, "loss": 17.4092, "step": 25186 }, { "epoch": 0.4603981208986053, "grad_norm": 12.42222377509774, "learning_rate": 5.879858550124827e-06, "loss": 17.9524, "step": 25187 }, { "epoch": 0.4604164000950518, "grad_norm": 6.33427230977597, "learning_rate": 5.879567156238799e-06, "loss": 17.2989, "step": 25188 }, { "epoch": 0.46043467929149834, "grad_norm": 7.729890168576504, "learning_rate": 5.879275759269963e-06, "loss": 18.2629, "step": 25189 }, { "epoch": 0.4604529584879449, "grad_norm": 7.295201757649203, "learning_rate": 5.878984359219343e-06, "loss": 17.7452, "step": 25190 }, { "epoch": 0.4604712376843914, "grad_norm": 6.160423920275361, "learning_rate": 5.878692956087959e-06, "loss": 17.3426, "step": 25191 }, { "epoch": 0.46048951688083795, "grad_norm": 8.42585663647094, "learning_rate": 5.87840154987683e-06, "loss": 18.0182, "step": 25192 }, { "epoch": 0.4605077960772844, "grad_norm": 7.534976925029885, "learning_rate": 5.878110140586981e-06, "loss": 18.3795, "step": 25193 }, { "epoch": 0.46052607527373096, "grad_norm": 7.314838726838559, "learning_rate": 5.877818728219434e-06, "loss": 17.5525, "step": 25194 }, { "epoch": 0.4605443544701775, "grad_norm": 5.671522377539959, "learning_rate": 5.877527312775207e-06, "loss": 17.1415, "step": 25195 }, { "epoch": 0.46056263366662403, "grad_norm": 6.277055034352918, "learning_rate": 5.877235894255323e-06, "loss": 17.2759, "step": 25196 }, { "epoch": 0.4605809128630705, "grad_norm": 5.267296296698131, "learning_rate": 5.876944472660803e-06, "loss": 17.0471, "step": 25197 }, { "epoch": 0.46059919205951705, "grad_norm": 7.532153553065105, "learning_rate": 5.87665304799267e-06, "loss": 18.3739, "step": 25198 }, { "epoch": 0.4606174712559636, "grad_norm": 6.211914389122407, "learning_rate": 5.8763616202519435e-06, "loss": 17.4008, "step": 25199 }, { "epoch": 0.4606357504524101, "grad_norm": 4.745888638877521, "learning_rate": 5.876070189439645e-06, "loss": 16.9409, "step": 25200 }, { "epoch": 0.46065402964885666, "grad_norm": 7.994165483051935, "learning_rate": 5.875778755556797e-06, "loss": 18.1199, "step": 25201 }, { "epoch": 0.46067230884530314, "grad_norm": 7.676489971327513, "learning_rate": 5.8754873186044205e-06, "loss": 17.8826, "step": 25202 }, { "epoch": 0.46069058804174967, "grad_norm": 6.803787940231963, "learning_rate": 5.875195878583536e-06, "loss": 17.6777, "step": 25203 }, { "epoch": 0.4607088672381962, "grad_norm": 6.612945588168071, "learning_rate": 5.874904435495168e-06, "loss": 17.4038, "step": 25204 }, { "epoch": 0.46072714643464274, "grad_norm": 5.49287347466228, "learning_rate": 5.874612989340334e-06, "loss": 16.9628, "step": 25205 }, { "epoch": 0.4607454256310893, "grad_norm": 6.219369992367084, "learning_rate": 5.874321540120057e-06, "loss": 17.4567, "step": 25206 }, { "epoch": 0.46076370482753576, "grad_norm": 5.8198794907322515, "learning_rate": 5.87403008783536e-06, "loss": 17.366, "step": 25207 }, { "epoch": 0.4607819840239823, "grad_norm": 7.891903307016829, "learning_rate": 5.873738632487265e-06, "loss": 18.1387, "step": 25208 }, { "epoch": 0.46080026322042883, "grad_norm": 5.462459582829687, "learning_rate": 5.873447174076789e-06, "loss": 16.8464, "step": 25209 }, { "epoch": 0.46081854241687537, "grad_norm": 5.379997424751426, "learning_rate": 5.873155712604956e-06, "loss": 16.9951, "step": 25210 }, { "epoch": 0.4608368216133219, "grad_norm": 5.334205765095013, "learning_rate": 5.8728642480727915e-06, "loss": 17.0456, "step": 25211 }, { "epoch": 0.4608551008097684, "grad_norm": 6.22783642163609, "learning_rate": 5.8725727804813115e-06, "loss": 17.4138, "step": 25212 }, { "epoch": 0.4608733800062149, "grad_norm": 4.8363785858796575, "learning_rate": 5.87228130983154e-06, "loss": 16.8598, "step": 25213 }, { "epoch": 0.46089165920266145, "grad_norm": 7.366645808860496, "learning_rate": 5.871989836124498e-06, "loss": 17.8513, "step": 25214 }, { "epoch": 0.460909938399108, "grad_norm": 6.301782432938622, "learning_rate": 5.871698359361207e-06, "loss": 17.4138, "step": 25215 }, { "epoch": 0.4609282175955545, "grad_norm": 6.958742772704289, "learning_rate": 5.871406879542688e-06, "loss": 17.8689, "step": 25216 }, { "epoch": 0.460946496792001, "grad_norm": 6.499677068371723, "learning_rate": 5.871115396669965e-06, "loss": 17.2869, "step": 25217 }, { "epoch": 0.46096477598844754, "grad_norm": 6.879665208187112, "learning_rate": 5.870823910744059e-06, "loss": 17.3627, "step": 25218 }, { "epoch": 0.4609830551848941, "grad_norm": 7.381367835499502, "learning_rate": 5.8705324217659886e-06, "loss": 17.6932, "step": 25219 }, { "epoch": 0.4610013343813406, "grad_norm": 5.87604450242848, "learning_rate": 5.870240929736778e-06, "loss": 17.2681, "step": 25220 }, { "epoch": 0.46101961357778715, "grad_norm": 7.899233988968155, "learning_rate": 5.869949434657449e-06, "loss": 17.809, "step": 25221 }, { "epoch": 0.4610378927742336, "grad_norm": 5.016263511686514, "learning_rate": 5.869657936529023e-06, "loss": 16.9404, "step": 25222 }, { "epoch": 0.46105617197068016, "grad_norm": 6.545002506397308, "learning_rate": 5.869366435352521e-06, "loss": 17.3206, "step": 25223 }, { "epoch": 0.4610744511671267, "grad_norm": 7.149939246215548, "learning_rate": 5.869074931128964e-06, "loss": 17.4842, "step": 25224 }, { "epoch": 0.46109273036357323, "grad_norm": 6.060316208116345, "learning_rate": 5.868783423859378e-06, "loss": 17.4576, "step": 25225 }, { "epoch": 0.46111100956001977, "grad_norm": 5.562265651496669, "learning_rate": 5.868491913544779e-06, "loss": 17.0999, "step": 25226 }, { "epoch": 0.46112928875646625, "grad_norm": 6.356959184287293, "learning_rate": 5.868200400186191e-06, "loss": 17.4441, "step": 25227 }, { "epoch": 0.4611475679529128, "grad_norm": 6.443925494825827, "learning_rate": 5.867908883784637e-06, "loss": 17.124, "step": 25228 }, { "epoch": 0.4611658471493593, "grad_norm": 6.061029860344893, "learning_rate": 5.867617364341137e-06, "loss": 17.1974, "step": 25229 }, { "epoch": 0.46118412634580586, "grad_norm": 5.775275949768164, "learning_rate": 5.8673258418567134e-06, "loss": 17.0047, "step": 25230 }, { "epoch": 0.46120240554225234, "grad_norm": 5.633981491550323, "learning_rate": 5.867034316332389e-06, "loss": 17.2366, "step": 25231 }, { "epoch": 0.46122068473869887, "grad_norm": 6.859929030125392, "learning_rate": 5.8667427877691825e-06, "loss": 17.5946, "step": 25232 }, { "epoch": 0.4612389639351454, "grad_norm": 7.569064890538536, "learning_rate": 5.86645125616812e-06, "loss": 17.7965, "step": 25233 }, { "epoch": 0.46125724313159194, "grad_norm": 6.642838065807771, "learning_rate": 5.86615972153022e-06, "loss": 17.7194, "step": 25234 }, { "epoch": 0.4612755223280385, "grad_norm": 6.321309871406881, "learning_rate": 5.8658681838565065e-06, "loss": 17.1463, "step": 25235 }, { "epoch": 0.46129380152448496, "grad_norm": 6.560191692175135, "learning_rate": 5.865576643147999e-06, "loss": 17.7838, "step": 25236 }, { "epoch": 0.4613120807209315, "grad_norm": 10.608667819433098, "learning_rate": 5.8652850994057184e-06, "loss": 17.9546, "step": 25237 }, { "epoch": 0.46133035991737803, "grad_norm": 5.644111811817615, "learning_rate": 5.8649935526306915e-06, "loss": 17.4048, "step": 25238 }, { "epoch": 0.46134863911382457, "grad_norm": 5.340542725307962, "learning_rate": 5.864702002823938e-06, "loss": 17.1213, "step": 25239 }, { "epoch": 0.4613669183102711, "grad_norm": 6.584077175225929, "learning_rate": 5.864410449986478e-06, "loss": 17.4067, "step": 25240 }, { "epoch": 0.4613851975067176, "grad_norm": 7.2018494393832295, "learning_rate": 5.864118894119333e-06, "loss": 17.9102, "step": 25241 }, { "epoch": 0.4614034767031641, "grad_norm": 5.720497312017708, "learning_rate": 5.863827335223526e-06, "loss": 17.1027, "step": 25242 }, { "epoch": 0.46142175589961065, "grad_norm": 6.62557114176098, "learning_rate": 5.863535773300081e-06, "loss": 17.6402, "step": 25243 }, { "epoch": 0.4614400350960572, "grad_norm": 6.843530139692724, "learning_rate": 5.863244208350017e-06, "loss": 17.7313, "step": 25244 }, { "epoch": 0.4614583142925037, "grad_norm": 6.098550955711443, "learning_rate": 5.862952640374358e-06, "loss": 17.4683, "step": 25245 }, { "epoch": 0.4614765934889502, "grad_norm": 5.8955470014577465, "learning_rate": 5.862661069374123e-06, "loss": 17.3704, "step": 25246 }, { "epoch": 0.46149487268539674, "grad_norm": 5.348734500097625, "learning_rate": 5.862369495350337e-06, "loss": 16.8836, "step": 25247 }, { "epoch": 0.4615131518818433, "grad_norm": 7.734146139843284, "learning_rate": 5.862077918304021e-06, "loss": 18.1261, "step": 25248 }, { "epoch": 0.4615314310782898, "grad_norm": 6.757837059362166, "learning_rate": 5.861786338236198e-06, "loss": 17.9775, "step": 25249 }, { "epoch": 0.46154971027473635, "grad_norm": 7.728993264104784, "learning_rate": 5.861494755147887e-06, "loss": 18.1239, "step": 25250 }, { "epoch": 0.4615679894711828, "grad_norm": 5.6011239945637, "learning_rate": 5.86120316904011e-06, "loss": 17.2771, "step": 25251 }, { "epoch": 0.46158626866762936, "grad_norm": 6.037583735673573, "learning_rate": 5.8609115799138925e-06, "loss": 17.5366, "step": 25252 }, { "epoch": 0.4616045478640759, "grad_norm": 7.21085759175853, "learning_rate": 5.860619987770256e-06, "loss": 18.182, "step": 25253 }, { "epoch": 0.46162282706052243, "grad_norm": 6.658784762118231, "learning_rate": 5.86032839261022e-06, "loss": 17.7426, "step": 25254 }, { "epoch": 0.46164110625696897, "grad_norm": 5.8194026285300415, "learning_rate": 5.860036794434807e-06, "loss": 17.4987, "step": 25255 }, { "epoch": 0.46165938545341545, "grad_norm": 6.869381633538686, "learning_rate": 5.85974519324504e-06, "loss": 17.8578, "step": 25256 }, { "epoch": 0.461677664649862, "grad_norm": 6.092164871274417, "learning_rate": 5.8594535890419405e-06, "loss": 17.3048, "step": 25257 }, { "epoch": 0.4616959438463085, "grad_norm": 5.755045555872947, "learning_rate": 5.859161981826531e-06, "loss": 17.2976, "step": 25258 }, { "epoch": 0.46171422304275506, "grad_norm": 6.588632099795179, "learning_rate": 5.858870371599833e-06, "loss": 17.2774, "step": 25259 }, { "epoch": 0.4617325022392016, "grad_norm": 6.464079790384099, "learning_rate": 5.858578758362869e-06, "loss": 17.6903, "step": 25260 }, { "epoch": 0.46175078143564807, "grad_norm": 6.743054818292839, "learning_rate": 5.858287142116661e-06, "loss": 17.5171, "step": 25261 }, { "epoch": 0.4617690606320946, "grad_norm": 8.15978190782185, "learning_rate": 5.8579955228622305e-06, "loss": 17.9251, "step": 25262 }, { "epoch": 0.46178733982854114, "grad_norm": 7.962239552499945, "learning_rate": 5.857703900600602e-06, "loss": 17.6902, "step": 25263 }, { "epoch": 0.4618056190249877, "grad_norm": 6.622267708487954, "learning_rate": 5.857412275332795e-06, "loss": 17.5825, "step": 25264 }, { "epoch": 0.46182389822143416, "grad_norm": 6.302864673146543, "learning_rate": 5.8571206470598304e-06, "loss": 17.3838, "step": 25265 }, { "epoch": 0.4618421774178807, "grad_norm": 6.524106147421126, "learning_rate": 5.856829015782734e-06, "loss": 17.4938, "step": 25266 }, { "epoch": 0.46186045661432723, "grad_norm": 6.8560153214061295, "learning_rate": 5.856537381502527e-06, "loss": 17.8109, "step": 25267 }, { "epoch": 0.46187873581077377, "grad_norm": 5.7603696737380785, "learning_rate": 5.85624574422023e-06, "loss": 17.306, "step": 25268 }, { "epoch": 0.4618970150072203, "grad_norm": 6.245738680757659, "learning_rate": 5.8559541039368654e-06, "loss": 17.4456, "step": 25269 }, { "epoch": 0.4619152942036668, "grad_norm": 9.23885331020808, "learning_rate": 5.855662460653457e-06, "loss": 17.7326, "step": 25270 }, { "epoch": 0.4619335734001133, "grad_norm": 4.698139770850545, "learning_rate": 5.855370814371024e-06, "loss": 16.9017, "step": 25271 }, { "epoch": 0.46195185259655985, "grad_norm": 6.815264462425667, "learning_rate": 5.8550791650905925e-06, "loss": 17.996, "step": 25272 }, { "epoch": 0.4619701317930064, "grad_norm": 5.414622819904023, "learning_rate": 5.854787512813183e-06, "loss": 16.9644, "step": 25273 }, { "epoch": 0.4619884109894529, "grad_norm": 8.818899491645826, "learning_rate": 5.854495857539816e-06, "loss": 18.2126, "step": 25274 }, { "epoch": 0.4620066901858994, "grad_norm": 8.150483084597855, "learning_rate": 5.854204199271515e-06, "loss": 17.9843, "step": 25275 }, { "epoch": 0.46202496938234594, "grad_norm": 7.73954821080085, "learning_rate": 5.853912538009303e-06, "loss": 18.2651, "step": 25276 }, { "epoch": 0.4620432485787925, "grad_norm": 6.402496847467016, "learning_rate": 5.853620873754202e-06, "loss": 17.48, "step": 25277 }, { "epoch": 0.462061527775239, "grad_norm": 6.084215762293935, "learning_rate": 5.853329206507234e-06, "loss": 17.6089, "step": 25278 }, { "epoch": 0.46207980697168555, "grad_norm": 7.669596020238035, "learning_rate": 5.85303753626942e-06, "loss": 18.0464, "step": 25279 }, { "epoch": 0.462098086168132, "grad_norm": 7.3662375825774316, "learning_rate": 5.852745863041786e-06, "loss": 17.9197, "step": 25280 }, { "epoch": 0.46211636536457856, "grad_norm": 7.026272663659752, "learning_rate": 5.85245418682535e-06, "loss": 17.8691, "step": 25281 }, { "epoch": 0.4621346445610251, "grad_norm": 5.535689058701501, "learning_rate": 5.852162507621135e-06, "loss": 17.3449, "step": 25282 }, { "epoch": 0.46215292375747163, "grad_norm": 6.74302090668274, "learning_rate": 5.851870825430165e-06, "loss": 17.4523, "step": 25283 }, { "epoch": 0.46217120295391817, "grad_norm": 6.12401175446677, "learning_rate": 5.851579140253463e-06, "loss": 17.1992, "step": 25284 }, { "epoch": 0.46218948215036465, "grad_norm": 5.439906872526443, "learning_rate": 5.851287452092048e-06, "loss": 17.3654, "step": 25285 }, { "epoch": 0.4622077613468112, "grad_norm": 7.301342799027574, "learning_rate": 5.850995760946946e-06, "loss": 17.372, "step": 25286 }, { "epoch": 0.4622260405432577, "grad_norm": 7.031633964986034, "learning_rate": 5.850704066819177e-06, "loss": 17.9206, "step": 25287 }, { "epoch": 0.46224431973970426, "grad_norm": 5.336220084573948, "learning_rate": 5.850412369709764e-06, "loss": 17.0533, "step": 25288 }, { "epoch": 0.4622625989361508, "grad_norm": 5.539355997825748, "learning_rate": 5.8501206696197296e-06, "loss": 17.2112, "step": 25289 }, { "epoch": 0.4622808781325973, "grad_norm": 7.1189937992224275, "learning_rate": 5.849828966550098e-06, "loss": 17.7607, "step": 25290 }, { "epoch": 0.4622991573290438, "grad_norm": 6.133941928886113, "learning_rate": 5.849537260501886e-06, "loss": 17.5482, "step": 25291 }, { "epoch": 0.46231743652549034, "grad_norm": 6.225187836437039, "learning_rate": 5.849245551476122e-06, "loss": 17.4509, "step": 25292 }, { "epoch": 0.4623357157219369, "grad_norm": 7.658657985499206, "learning_rate": 5.8489538394738245e-06, "loss": 17.999, "step": 25293 }, { "epoch": 0.4623539949183834, "grad_norm": 5.98802306611786, "learning_rate": 5.84866212449602e-06, "loss": 17.285, "step": 25294 }, { "epoch": 0.4623722741148299, "grad_norm": 7.570375819435523, "learning_rate": 5.848370406543727e-06, "loss": 18.1253, "step": 25295 }, { "epoch": 0.46239055331127643, "grad_norm": 8.033185384736198, "learning_rate": 5.848078685617967e-06, "loss": 18.0514, "step": 25296 }, { "epoch": 0.46240883250772297, "grad_norm": 6.587730052328041, "learning_rate": 5.847786961719768e-06, "loss": 17.3455, "step": 25297 }, { "epoch": 0.4624271117041695, "grad_norm": 7.186198500662436, "learning_rate": 5.847495234850148e-06, "loss": 18.0893, "step": 25298 }, { "epoch": 0.462445390900616, "grad_norm": 6.613017319394958, "learning_rate": 5.8472035050101305e-06, "loss": 17.8159, "step": 25299 }, { "epoch": 0.4624636700970625, "grad_norm": 7.119548517905186, "learning_rate": 5.846911772200738e-06, "loss": 18.079, "step": 25300 }, { "epoch": 0.46248194929350905, "grad_norm": 5.0024120341206295, "learning_rate": 5.846620036422994e-06, "loss": 16.9131, "step": 25301 }, { "epoch": 0.4625002284899556, "grad_norm": 5.990592914536484, "learning_rate": 5.84632829767792e-06, "loss": 17.4368, "step": 25302 }, { "epoch": 0.4625185076864021, "grad_norm": 6.52735317668258, "learning_rate": 5.8460365559665385e-06, "loss": 17.2994, "step": 25303 }, { "epoch": 0.4625367868828486, "grad_norm": 6.455628074021686, "learning_rate": 5.845744811289874e-06, "loss": 17.6648, "step": 25304 }, { "epoch": 0.46255506607929514, "grad_norm": 7.138957096937389, "learning_rate": 5.845453063648945e-06, "loss": 17.9178, "step": 25305 }, { "epoch": 0.4625733452757417, "grad_norm": 6.196540200439552, "learning_rate": 5.845161313044777e-06, "loss": 17.5285, "step": 25306 }, { "epoch": 0.4625916244721882, "grad_norm": 6.210113427208343, "learning_rate": 5.844869559478392e-06, "loss": 17.61, "step": 25307 }, { "epoch": 0.46260990366863475, "grad_norm": 7.13312957663764, "learning_rate": 5.844577802950815e-06, "loss": 17.6366, "step": 25308 }, { "epoch": 0.46262818286508123, "grad_norm": 6.990945895824653, "learning_rate": 5.844286043463063e-06, "loss": 17.8473, "step": 25309 }, { "epoch": 0.46264646206152776, "grad_norm": 5.8883079948583665, "learning_rate": 5.843994281016161e-06, "loss": 17.3688, "step": 25310 }, { "epoch": 0.4626647412579743, "grad_norm": 6.139783268220673, "learning_rate": 5.843702515611136e-06, "loss": 17.429, "step": 25311 }, { "epoch": 0.46268302045442083, "grad_norm": 6.756691423272204, "learning_rate": 5.843410747249004e-06, "loss": 17.6165, "step": 25312 }, { "epoch": 0.46270129965086737, "grad_norm": 6.564538204662522, "learning_rate": 5.843118975930792e-06, "loss": 17.2877, "step": 25313 }, { "epoch": 0.46271957884731385, "grad_norm": 5.636229527122742, "learning_rate": 5.8428272016575196e-06, "loss": 17.2892, "step": 25314 }, { "epoch": 0.4627378580437604, "grad_norm": 6.562940087437146, "learning_rate": 5.8425354244302116e-06, "loss": 17.2753, "step": 25315 }, { "epoch": 0.4627561372402069, "grad_norm": 5.201640249630994, "learning_rate": 5.84224364424989e-06, "loss": 17.1833, "step": 25316 }, { "epoch": 0.46277441643665346, "grad_norm": 6.451804603219493, "learning_rate": 5.841951861117578e-06, "loss": 17.3407, "step": 25317 }, { "epoch": 0.4627926956331, "grad_norm": 6.161920593051882, "learning_rate": 5.8416600750342985e-06, "loss": 17.5662, "step": 25318 }, { "epoch": 0.4628109748295465, "grad_norm": 6.2909873660523905, "learning_rate": 5.8413682860010715e-06, "loss": 17.4847, "step": 25319 }, { "epoch": 0.462829254025993, "grad_norm": 6.678807603183959, "learning_rate": 5.841076494018922e-06, "loss": 17.5152, "step": 25320 }, { "epoch": 0.46284753322243954, "grad_norm": 6.201930651209116, "learning_rate": 5.840784699088873e-06, "loss": 17.5612, "step": 25321 }, { "epoch": 0.4628658124188861, "grad_norm": 6.736141269907265, "learning_rate": 5.840492901211949e-06, "loss": 17.7945, "step": 25322 }, { "epoch": 0.4628840916153326, "grad_norm": 6.219596559882062, "learning_rate": 5.8402011003891665e-06, "loss": 17.289, "step": 25323 }, { "epoch": 0.4629023708117791, "grad_norm": 6.894170696110101, "learning_rate": 5.839909296621553e-06, "loss": 17.5829, "step": 25324 }, { "epoch": 0.46292065000822563, "grad_norm": 6.866118240618525, "learning_rate": 5.839617489910132e-06, "loss": 17.7614, "step": 25325 }, { "epoch": 0.46293892920467217, "grad_norm": 7.584770837758176, "learning_rate": 5.839325680255923e-06, "loss": 18.1404, "step": 25326 }, { "epoch": 0.4629572084011187, "grad_norm": 6.497741617928724, "learning_rate": 5.839033867659951e-06, "loss": 17.4433, "step": 25327 }, { "epoch": 0.46297548759756524, "grad_norm": 6.794961528294284, "learning_rate": 5.8387420521232375e-06, "loss": 17.5179, "step": 25328 }, { "epoch": 0.4629937667940117, "grad_norm": 6.43904306414136, "learning_rate": 5.8384502336468065e-06, "loss": 17.4582, "step": 25329 }, { "epoch": 0.46301204599045825, "grad_norm": 9.384331646384522, "learning_rate": 5.838158412231679e-06, "loss": 18.4846, "step": 25330 }, { "epoch": 0.4630303251869048, "grad_norm": 7.290277001149687, "learning_rate": 5.83786658787888e-06, "loss": 17.895, "step": 25331 }, { "epoch": 0.4630486043833513, "grad_norm": 7.414494454667557, "learning_rate": 5.8375747605894305e-06, "loss": 17.7033, "step": 25332 }, { "epoch": 0.4630668835797978, "grad_norm": 5.793884845650024, "learning_rate": 5.837282930364355e-06, "loss": 17.3952, "step": 25333 }, { "epoch": 0.46308516277624434, "grad_norm": 7.216531494080016, "learning_rate": 5.836991097204676e-06, "loss": 18.011, "step": 25334 }, { "epoch": 0.4631034419726909, "grad_norm": 6.921636301413677, "learning_rate": 5.836699261111416e-06, "loss": 17.6392, "step": 25335 }, { "epoch": 0.4631217211691374, "grad_norm": 6.148023916485857, "learning_rate": 5.836407422085597e-06, "loss": 17.5761, "step": 25336 }, { "epoch": 0.46314000036558395, "grad_norm": 6.454094203469489, "learning_rate": 5.836115580128241e-06, "loss": 17.5949, "step": 25337 }, { "epoch": 0.46315827956203043, "grad_norm": 7.478081110918355, "learning_rate": 5.835823735240374e-06, "loss": 17.8644, "step": 25338 }, { "epoch": 0.46317655875847696, "grad_norm": 6.6794437943755565, "learning_rate": 5.835531887423018e-06, "loss": 17.6262, "step": 25339 }, { "epoch": 0.4631948379549235, "grad_norm": 6.636371695461813, "learning_rate": 5.835240036677195e-06, "loss": 17.4355, "step": 25340 }, { "epoch": 0.46321311715137004, "grad_norm": 6.4468585853660105, "learning_rate": 5.834948183003927e-06, "loss": 17.5162, "step": 25341 }, { "epoch": 0.46323139634781657, "grad_norm": 6.415564959924538, "learning_rate": 5.8346563264042376e-06, "loss": 17.4264, "step": 25342 }, { "epoch": 0.46324967554426305, "grad_norm": 6.0162155921609255, "learning_rate": 5.8343644668791525e-06, "loss": 17.5026, "step": 25343 }, { "epoch": 0.4632679547407096, "grad_norm": 5.887120949260308, "learning_rate": 5.83407260442969e-06, "loss": 17.2072, "step": 25344 }, { "epoch": 0.4632862339371561, "grad_norm": 7.261838044243021, "learning_rate": 5.833780739056877e-06, "loss": 17.6527, "step": 25345 }, { "epoch": 0.46330451313360266, "grad_norm": 8.076103650438975, "learning_rate": 5.833488870761734e-06, "loss": 18.1868, "step": 25346 }, { "epoch": 0.4633227923300492, "grad_norm": 6.297252061462196, "learning_rate": 5.833196999545285e-06, "loss": 17.7289, "step": 25347 }, { "epoch": 0.4633410715264957, "grad_norm": 7.313819585494033, "learning_rate": 5.832905125408553e-06, "loss": 18.4109, "step": 25348 }, { "epoch": 0.4633593507229422, "grad_norm": 7.601253779940887, "learning_rate": 5.832613248352562e-06, "loss": 17.9015, "step": 25349 }, { "epoch": 0.46337762991938874, "grad_norm": 6.179775210548726, "learning_rate": 5.832321368378333e-06, "loss": 17.3932, "step": 25350 }, { "epoch": 0.4633959091158353, "grad_norm": 6.334473440110927, "learning_rate": 5.832029485486888e-06, "loss": 17.4904, "step": 25351 }, { "epoch": 0.4634141883122818, "grad_norm": 6.247934125522594, "learning_rate": 5.831737599679254e-06, "loss": 17.3286, "step": 25352 }, { "epoch": 0.4634324675087283, "grad_norm": 7.355119546749972, "learning_rate": 5.831445710956452e-06, "loss": 18.326, "step": 25353 }, { "epoch": 0.46345074670517483, "grad_norm": 6.091327128978012, "learning_rate": 5.831153819319504e-06, "loss": 17.6493, "step": 25354 }, { "epoch": 0.46346902590162137, "grad_norm": 6.971561419558052, "learning_rate": 5.830861924769433e-06, "loss": 17.8274, "step": 25355 }, { "epoch": 0.4634873050980679, "grad_norm": 5.619793787453107, "learning_rate": 5.830570027307265e-06, "loss": 17.236, "step": 25356 }, { "epoch": 0.46350558429451444, "grad_norm": 5.792039180591749, "learning_rate": 5.83027812693402e-06, "loss": 17.3111, "step": 25357 }, { "epoch": 0.4635238634909609, "grad_norm": 7.829832211260318, "learning_rate": 5.829986223650722e-06, "loss": 18.3417, "step": 25358 }, { "epoch": 0.46354214268740745, "grad_norm": 6.0999711054193675, "learning_rate": 5.8296943174583955e-06, "loss": 17.3464, "step": 25359 }, { "epoch": 0.463560421883854, "grad_norm": 6.605675552514442, "learning_rate": 5.829402408358061e-06, "loss": 17.3732, "step": 25360 }, { "epoch": 0.4635787010803005, "grad_norm": 5.863169034171898, "learning_rate": 5.829110496350744e-06, "loss": 17.1284, "step": 25361 }, { "epoch": 0.46359698027674706, "grad_norm": 7.091978667450804, "learning_rate": 5.828818581437467e-06, "loss": 17.7498, "step": 25362 }, { "epoch": 0.46361525947319354, "grad_norm": 6.241537816369766, "learning_rate": 5.828526663619253e-06, "loss": 17.4417, "step": 25363 }, { "epoch": 0.4636335386696401, "grad_norm": 7.3549456243561, "learning_rate": 5.8282347428971235e-06, "loss": 17.5559, "step": 25364 }, { "epoch": 0.4636518178660866, "grad_norm": 5.578223423588588, "learning_rate": 5.8279428192721035e-06, "loss": 16.9686, "step": 25365 }, { "epoch": 0.46367009706253315, "grad_norm": 7.081109685546922, "learning_rate": 5.8276508927452165e-06, "loss": 17.5608, "step": 25366 }, { "epoch": 0.46368837625897963, "grad_norm": 5.8333112989986144, "learning_rate": 5.827358963317485e-06, "loss": 17.3277, "step": 25367 }, { "epoch": 0.46370665545542616, "grad_norm": 6.273552577904575, "learning_rate": 5.827067030989931e-06, "loss": 17.3467, "step": 25368 }, { "epoch": 0.4637249346518727, "grad_norm": 5.6332370903147675, "learning_rate": 5.826775095763578e-06, "loss": 17.3149, "step": 25369 }, { "epoch": 0.46374321384831924, "grad_norm": 5.125673178882156, "learning_rate": 5.826483157639453e-06, "loss": 16.8984, "step": 25370 }, { "epoch": 0.46376149304476577, "grad_norm": 5.542666792038269, "learning_rate": 5.826191216618574e-06, "loss": 17.3516, "step": 25371 }, { "epoch": 0.46377977224121225, "grad_norm": 5.973093200492799, "learning_rate": 5.825899272701968e-06, "loss": 17.3629, "step": 25372 }, { "epoch": 0.4637980514376588, "grad_norm": 7.58868573698555, "learning_rate": 5.825607325890655e-06, "loss": 17.8242, "step": 25373 }, { "epoch": 0.4638163306341053, "grad_norm": 6.26002998800929, "learning_rate": 5.825315376185662e-06, "loss": 17.1687, "step": 25374 }, { "epoch": 0.46383460983055186, "grad_norm": 5.446282005113881, "learning_rate": 5.825023423588009e-06, "loss": 17.0268, "step": 25375 }, { "epoch": 0.4638528890269984, "grad_norm": 6.700845822706817, "learning_rate": 5.82473146809872e-06, "loss": 17.542, "step": 25376 }, { "epoch": 0.4638711682234449, "grad_norm": 5.392900742683065, "learning_rate": 5.82443950971882e-06, "loss": 16.9712, "step": 25377 }, { "epoch": 0.4638894474198914, "grad_norm": 8.590892151220999, "learning_rate": 5.824147548449329e-06, "loss": 18.2489, "step": 25378 }, { "epoch": 0.46390772661633795, "grad_norm": 6.090399025769546, "learning_rate": 5.823855584291274e-06, "loss": 17.465, "step": 25379 }, { "epoch": 0.4639260058127845, "grad_norm": 7.01374334481513, "learning_rate": 5.823563617245678e-06, "loss": 17.891, "step": 25380 }, { "epoch": 0.463944285009231, "grad_norm": 8.63108118246894, "learning_rate": 5.8232716473135605e-06, "loss": 17.4832, "step": 25381 }, { "epoch": 0.4639625642056775, "grad_norm": 6.861777262285128, "learning_rate": 5.8229796744959485e-06, "loss": 17.8547, "step": 25382 }, { "epoch": 0.46398084340212403, "grad_norm": 6.175446626925928, "learning_rate": 5.822687698793863e-06, "loss": 17.1716, "step": 25383 }, { "epoch": 0.46399912259857057, "grad_norm": 4.995754632381177, "learning_rate": 5.82239572020833e-06, "loss": 16.9765, "step": 25384 }, { "epoch": 0.4640174017950171, "grad_norm": 6.297137853896752, "learning_rate": 5.8221037387403715e-06, "loss": 17.3626, "step": 25385 }, { "epoch": 0.46403568099146364, "grad_norm": 5.997894405928749, "learning_rate": 5.82181175439101e-06, "loss": 17.3152, "step": 25386 }, { "epoch": 0.4640539601879101, "grad_norm": 9.412705800128995, "learning_rate": 5.821519767161269e-06, "loss": 17.9353, "step": 25387 }, { "epoch": 0.46407223938435666, "grad_norm": 5.846131578774165, "learning_rate": 5.821227777052173e-06, "loss": 17.7164, "step": 25388 }, { "epoch": 0.4640905185808032, "grad_norm": 6.777301688189443, "learning_rate": 5.820935784064745e-06, "loss": 17.3869, "step": 25389 }, { "epoch": 0.4641087977772497, "grad_norm": 7.3930174262124675, "learning_rate": 5.820643788200009e-06, "loss": 17.6377, "step": 25390 }, { "epoch": 0.46412707697369626, "grad_norm": 6.179387443757268, "learning_rate": 5.8203517894589865e-06, "loss": 17.3686, "step": 25391 }, { "epoch": 0.46414535617014274, "grad_norm": 6.3682285265058525, "learning_rate": 5.820059787842702e-06, "loss": 17.4647, "step": 25392 }, { "epoch": 0.4641636353665893, "grad_norm": 6.687257072835672, "learning_rate": 5.8197677833521805e-06, "loss": 17.3832, "step": 25393 }, { "epoch": 0.4641819145630358, "grad_norm": 8.697887815710882, "learning_rate": 5.819475775988445e-06, "loss": 18.3869, "step": 25394 }, { "epoch": 0.46420019375948235, "grad_norm": 6.373782285258622, "learning_rate": 5.819183765752516e-06, "loss": 17.4581, "step": 25395 }, { "epoch": 0.4642184729559289, "grad_norm": 5.93386617840029, "learning_rate": 5.818891752645418e-06, "loss": 17.1143, "step": 25396 }, { "epoch": 0.46423675215237536, "grad_norm": 5.622582804489323, "learning_rate": 5.818599736668178e-06, "loss": 17.1578, "step": 25397 }, { "epoch": 0.4642550313488219, "grad_norm": 5.715015822660491, "learning_rate": 5.8183077178218166e-06, "loss": 17.1295, "step": 25398 }, { "epoch": 0.46427331054526844, "grad_norm": 6.563659242416588, "learning_rate": 5.8180156961073566e-06, "loss": 17.4553, "step": 25399 }, { "epoch": 0.46429158974171497, "grad_norm": 5.851177216489818, "learning_rate": 5.817723671525822e-06, "loss": 17.2711, "step": 25400 }, { "epoch": 0.46430986893816145, "grad_norm": 7.7478468610353, "learning_rate": 5.8174316440782375e-06, "loss": 17.9985, "step": 25401 }, { "epoch": 0.464328148134608, "grad_norm": 7.663742962480571, "learning_rate": 5.8171396137656265e-06, "loss": 18.1791, "step": 25402 }, { "epoch": 0.4643464273310545, "grad_norm": 5.864716069555697, "learning_rate": 5.8168475805890125e-06, "loss": 17.3009, "step": 25403 }, { "epoch": 0.46436470652750106, "grad_norm": 7.865864842195836, "learning_rate": 5.816555544549418e-06, "loss": 18.2962, "step": 25404 }, { "epoch": 0.4643829857239476, "grad_norm": 5.933238752604491, "learning_rate": 5.8162635056478665e-06, "loss": 17.2437, "step": 25405 }, { "epoch": 0.4644012649203941, "grad_norm": 6.257970487848118, "learning_rate": 5.815971463885383e-06, "loss": 17.3387, "step": 25406 }, { "epoch": 0.4644195441168406, "grad_norm": 5.873520787476609, "learning_rate": 5.815679419262989e-06, "loss": 17.2506, "step": 25407 }, { "epoch": 0.46443782331328715, "grad_norm": 5.236429270078546, "learning_rate": 5.815387371781713e-06, "loss": 16.9959, "step": 25408 }, { "epoch": 0.4644561025097337, "grad_norm": 6.122325410689532, "learning_rate": 5.815095321442572e-06, "loss": 17.189, "step": 25409 }, { "epoch": 0.4644743817061802, "grad_norm": 6.761716591140318, "learning_rate": 5.814803268246593e-06, "loss": 18.0985, "step": 25410 }, { "epoch": 0.4644926609026267, "grad_norm": 6.242224172683804, "learning_rate": 5.8145112121948e-06, "loss": 17.6028, "step": 25411 }, { "epoch": 0.46451094009907323, "grad_norm": 7.912110355549454, "learning_rate": 5.814219153288215e-06, "loss": 17.3915, "step": 25412 }, { "epoch": 0.46452921929551977, "grad_norm": 7.727862681621994, "learning_rate": 5.813927091527864e-06, "loss": 17.9699, "step": 25413 }, { "epoch": 0.4645474984919663, "grad_norm": 6.419234879825803, "learning_rate": 5.813635026914767e-06, "loss": 17.6021, "step": 25414 }, { "epoch": 0.46456577768841284, "grad_norm": 6.38559028325601, "learning_rate": 5.813342959449951e-06, "loss": 17.4399, "step": 25415 }, { "epoch": 0.4645840568848593, "grad_norm": 6.663499737680368, "learning_rate": 5.813050889134438e-06, "loss": 17.4291, "step": 25416 }, { "epoch": 0.46460233608130586, "grad_norm": 7.055634140752238, "learning_rate": 5.812758815969253e-06, "loss": 17.7337, "step": 25417 }, { "epoch": 0.4646206152777524, "grad_norm": 5.865997723408139, "learning_rate": 5.812466739955418e-06, "loss": 17.0736, "step": 25418 }, { "epoch": 0.4646388944741989, "grad_norm": 7.534559974340752, "learning_rate": 5.8121746610939575e-06, "loss": 17.673, "step": 25419 }, { "epoch": 0.46465717367064546, "grad_norm": 7.6623166898443555, "learning_rate": 5.811882579385897e-06, "loss": 18.3083, "step": 25420 }, { "epoch": 0.46467545286709194, "grad_norm": 7.165215723988402, "learning_rate": 5.8115904948322565e-06, "loss": 17.5709, "step": 25421 }, { "epoch": 0.4646937320635385, "grad_norm": 8.324835671437848, "learning_rate": 5.811298407434064e-06, "loss": 18.0978, "step": 25422 }, { "epoch": 0.464712011259985, "grad_norm": 7.465843154080268, "learning_rate": 5.811006317192338e-06, "loss": 18.0381, "step": 25423 }, { "epoch": 0.46473029045643155, "grad_norm": 6.404398169454394, "learning_rate": 5.810714224108107e-06, "loss": 17.35, "step": 25424 }, { "epoch": 0.4647485696528781, "grad_norm": 5.558068167350239, "learning_rate": 5.810422128182393e-06, "loss": 17.0357, "step": 25425 }, { "epoch": 0.46476684884932457, "grad_norm": 7.94388708563255, "learning_rate": 5.810130029416221e-06, "loss": 18.3155, "step": 25426 }, { "epoch": 0.4647851280457711, "grad_norm": 10.070031384060563, "learning_rate": 5.809837927810612e-06, "loss": 18.129, "step": 25427 }, { "epoch": 0.46480340724221764, "grad_norm": 6.813759307518242, "learning_rate": 5.8095458233665915e-06, "loss": 17.3762, "step": 25428 }, { "epoch": 0.46482168643866417, "grad_norm": 8.24614842268811, "learning_rate": 5.8092537160851825e-06, "loss": 18.2599, "step": 25429 }, { "epoch": 0.4648399656351107, "grad_norm": 7.886666816792465, "learning_rate": 5.80896160596741e-06, "loss": 17.9468, "step": 25430 }, { "epoch": 0.4648582448315572, "grad_norm": 6.734079013518263, "learning_rate": 5.808669493014297e-06, "loss": 17.51, "step": 25431 }, { "epoch": 0.4648765240280037, "grad_norm": 5.77974965198436, "learning_rate": 5.808377377226868e-06, "loss": 17.169, "step": 25432 }, { "epoch": 0.46489480322445026, "grad_norm": 7.621575056975452, "learning_rate": 5.808085258606146e-06, "loss": 17.8084, "step": 25433 }, { "epoch": 0.4649130824208968, "grad_norm": 5.757923524112118, "learning_rate": 5.807793137153156e-06, "loss": 17.0912, "step": 25434 }, { "epoch": 0.4649313616173433, "grad_norm": 5.969163553293595, "learning_rate": 5.8075010128689226e-06, "loss": 17.1662, "step": 25435 }, { "epoch": 0.4649496408137898, "grad_norm": 7.14085071453925, "learning_rate": 5.807208885754466e-06, "loss": 17.5669, "step": 25436 }, { "epoch": 0.46496792001023635, "grad_norm": 6.7941607972969935, "learning_rate": 5.806916755810812e-06, "loss": 17.5949, "step": 25437 }, { "epoch": 0.4649861992066829, "grad_norm": 7.7719566649871785, "learning_rate": 5.806624623038985e-06, "loss": 17.5299, "step": 25438 }, { "epoch": 0.4650044784031294, "grad_norm": 6.084064617451337, "learning_rate": 5.80633248744001e-06, "loss": 17.7187, "step": 25439 }, { "epoch": 0.4650227575995759, "grad_norm": 6.3385224137895415, "learning_rate": 5.806040349014908e-06, "loss": 17.3292, "step": 25440 }, { "epoch": 0.46504103679602243, "grad_norm": 5.967159038319389, "learning_rate": 5.805748207764707e-06, "loss": 17.1518, "step": 25441 }, { "epoch": 0.46505931599246897, "grad_norm": 7.480021498899485, "learning_rate": 5.805456063690426e-06, "loss": 17.6674, "step": 25442 }, { "epoch": 0.4650775951889155, "grad_norm": 5.934298317671547, "learning_rate": 5.805163916793092e-06, "loss": 17.2422, "step": 25443 }, { "epoch": 0.46509587438536204, "grad_norm": 5.419061835388019, "learning_rate": 5.804871767073729e-06, "loss": 17.2889, "step": 25444 }, { "epoch": 0.4651141535818085, "grad_norm": 6.091419538500869, "learning_rate": 5.804579614533359e-06, "loss": 17.4997, "step": 25445 }, { "epoch": 0.46513243277825506, "grad_norm": 5.920096238098323, "learning_rate": 5.804287459173008e-06, "loss": 17.3415, "step": 25446 }, { "epoch": 0.4651507119747016, "grad_norm": 8.725667131244787, "learning_rate": 5.8039953009937e-06, "loss": 17.7631, "step": 25447 }, { "epoch": 0.4651689911711481, "grad_norm": 6.398170107632234, "learning_rate": 5.803703139996457e-06, "loss": 17.2332, "step": 25448 }, { "epoch": 0.46518727036759466, "grad_norm": 6.594791005756539, "learning_rate": 5.803410976182306e-06, "loss": 17.3875, "step": 25449 }, { "epoch": 0.46520554956404114, "grad_norm": 5.356000716930894, "learning_rate": 5.803118809552268e-06, "loss": 17.138, "step": 25450 }, { "epoch": 0.4652238287604877, "grad_norm": 6.044378429430535, "learning_rate": 5.802826640107367e-06, "loss": 17.1697, "step": 25451 }, { "epoch": 0.4652421079569342, "grad_norm": 7.951520853661495, "learning_rate": 5.802534467848629e-06, "loss": 18.0788, "step": 25452 }, { "epoch": 0.46526038715338075, "grad_norm": 6.48639673924519, "learning_rate": 5.80224229277708e-06, "loss": 17.3391, "step": 25453 }, { "epoch": 0.4652786663498273, "grad_norm": 5.80189892277259, "learning_rate": 5.80195011489374e-06, "loss": 17.255, "step": 25454 }, { "epoch": 0.46529694554627377, "grad_norm": 6.871948405060312, "learning_rate": 5.801657934199633e-06, "loss": 17.7307, "step": 25455 }, { "epoch": 0.4653152247427203, "grad_norm": 6.067583718378898, "learning_rate": 5.801365750695786e-06, "loss": 17.0874, "step": 25456 }, { "epoch": 0.46533350393916684, "grad_norm": 6.5609355757583945, "learning_rate": 5.801073564383219e-06, "loss": 17.272, "step": 25457 }, { "epoch": 0.4653517831356134, "grad_norm": 6.271905588458909, "learning_rate": 5.800781375262962e-06, "loss": 17.386, "step": 25458 }, { "epoch": 0.4653700623320599, "grad_norm": 7.648823117763026, "learning_rate": 5.800489183336033e-06, "loss": 17.7272, "step": 25459 }, { "epoch": 0.4653883415285064, "grad_norm": 8.224700023842017, "learning_rate": 5.800196988603461e-06, "loss": 17.8575, "step": 25460 }, { "epoch": 0.4654066207249529, "grad_norm": 7.975275975421861, "learning_rate": 5.799904791066266e-06, "loss": 17.7064, "step": 25461 }, { "epoch": 0.46542489992139946, "grad_norm": 7.692745960079071, "learning_rate": 5.799612590725477e-06, "loss": 18.0154, "step": 25462 }, { "epoch": 0.465443179117846, "grad_norm": 6.198587248863337, "learning_rate": 5.799320387582113e-06, "loss": 17.4678, "step": 25463 }, { "epoch": 0.46546145831429253, "grad_norm": 5.770454018950131, "learning_rate": 5.7990281816372e-06, "loss": 17.1268, "step": 25464 }, { "epoch": 0.465479737510739, "grad_norm": 8.128152895873397, "learning_rate": 5.798735972891764e-06, "loss": 18.4631, "step": 25465 }, { "epoch": 0.46549801670718555, "grad_norm": 4.776212786610156, "learning_rate": 5.798443761346828e-06, "loss": 16.8338, "step": 25466 }, { "epoch": 0.4655162959036321, "grad_norm": 8.268703185539234, "learning_rate": 5.798151547003416e-06, "loss": 17.7133, "step": 25467 }, { "epoch": 0.4655345751000786, "grad_norm": 6.109843705524758, "learning_rate": 5.797859329862551e-06, "loss": 17.4378, "step": 25468 }, { "epoch": 0.4655528542965251, "grad_norm": 6.166562862592454, "learning_rate": 5.7975671099252575e-06, "loss": 17.7501, "step": 25469 }, { "epoch": 0.46557113349297163, "grad_norm": 5.4298879723126285, "learning_rate": 5.797274887192562e-06, "loss": 17.3158, "step": 25470 }, { "epoch": 0.46558941268941817, "grad_norm": 6.311308844092306, "learning_rate": 5.796982661665487e-06, "loss": 17.1825, "step": 25471 }, { "epoch": 0.4656076918858647, "grad_norm": 6.356383502204275, "learning_rate": 5.796690433345056e-06, "loss": 17.1621, "step": 25472 }, { "epoch": 0.46562597108231124, "grad_norm": 6.694012136081018, "learning_rate": 5.796398202232295e-06, "loss": 17.6111, "step": 25473 }, { "epoch": 0.4656442502787577, "grad_norm": 5.5338284597810485, "learning_rate": 5.796105968328227e-06, "loss": 17.1924, "step": 25474 }, { "epoch": 0.46566252947520426, "grad_norm": 6.168919143212222, "learning_rate": 5.795813731633877e-06, "loss": 17.3206, "step": 25475 }, { "epoch": 0.4656808086716508, "grad_norm": 5.722622995627302, "learning_rate": 5.795521492150269e-06, "loss": 17.2056, "step": 25476 }, { "epoch": 0.46569908786809733, "grad_norm": 5.637556516954348, "learning_rate": 5.795229249878427e-06, "loss": 17.1989, "step": 25477 }, { "epoch": 0.46571736706454386, "grad_norm": 6.9322596818191276, "learning_rate": 5.794937004819374e-06, "loss": 17.6157, "step": 25478 }, { "epoch": 0.46573564626099034, "grad_norm": 6.298532134776038, "learning_rate": 5.794644756974138e-06, "loss": 17.4987, "step": 25479 }, { "epoch": 0.4657539254574369, "grad_norm": 5.948370314153637, "learning_rate": 5.7943525063437415e-06, "loss": 17.3412, "step": 25480 }, { "epoch": 0.4657722046538834, "grad_norm": 6.1791203787472515, "learning_rate": 5.7940602529292065e-06, "loss": 17.5173, "step": 25481 }, { "epoch": 0.46579048385032995, "grad_norm": 7.65046005754977, "learning_rate": 5.793767996731561e-06, "loss": 17.9836, "step": 25482 }, { "epoch": 0.4658087630467765, "grad_norm": 6.968752098824836, "learning_rate": 5.793475737751825e-06, "loss": 17.7423, "step": 25483 }, { "epoch": 0.46582704224322297, "grad_norm": 5.743834748829708, "learning_rate": 5.793183475991028e-06, "loss": 17.2186, "step": 25484 }, { "epoch": 0.4658453214396695, "grad_norm": 6.34696715960195, "learning_rate": 5.79289121145019e-06, "loss": 17.4219, "step": 25485 }, { "epoch": 0.46586360063611604, "grad_norm": 11.808143119530456, "learning_rate": 5.792598944130338e-06, "loss": 17.6976, "step": 25486 }, { "epoch": 0.4658818798325626, "grad_norm": 8.46839004191002, "learning_rate": 5.7923066740324954e-06, "loss": 18.5328, "step": 25487 }, { "epoch": 0.4659001590290091, "grad_norm": 7.399257564711092, "learning_rate": 5.792014401157686e-06, "loss": 17.7309, "step": 25488 }, { "epoch": 0.4659184382254556, "grad_norm": 7.342365479060027, "learning_rate": 5.791722125506935e-06, "loss": 17.8551, "step": 25489 }, { "epoch": 0.4659367174219021, "grad_norm": 6.414269926068148, "learning_rate": 5.791429847081268e-06, "loss": 17.376, "step": 25490 }, { "epoch": 0.46595499661834866, "grad_norm": 6.692070420332744, "learning_rate": 5.791137565881706e-06, "loss": 17.1239, "step": 25491 }, { "epoch": 0.4659732758147952, "grad_norm": 5.941832870729837, "learning_rate": 5.790845281909278e-06, "loss": 17.342, "step": 25492 }, { "epoch": 0.46599155501124173, "grad_norm": 7.054093746543174, "learning_rate": 5.790552995165003e-06, "loss": 17.82, "step": 25493 }, { "epoch": 0.4660098342076882, "grad_norm": 7.1926783141619035, "learning_rate": 5.790260705649912e-06, "loss": 17.8253, "step": 25494 }, { "epoch": 0.46602811340413475, "grad_norm": 7.290353227204587, "learning_rate": 5.789968413365022e-06, "loss": 17.8883, "step": 25495 }, { "epoch": 0.4660463926005813, "grad_norm": 8.000022510651304, "learning_rate": 5.789676118311362e-06, "loss": 17.6913, "step": 25496 }, { "epoch": 0.4660646717970278, "grad_norm": 5.7223660166521855, "learning_rate": 5.789383820489958e-06, "loss": 17.2571, "step": 25497 }, { "epoch": 0.46608295099347435, "grad_norm": 6.014089842940683, "learning_rate": 5.7890915199018305e-06, "loss": 17.3348, "step": 25498 }, { "epoch": 0.46610123018992083, "grad_norm": 7.728944553249146, "learning_rate": 5.788799216548007e-06, "loss": 18.1525, "step": 25499 }, { "epoch": 0.46611950938636737, "grad_norm": 6.740202405766377, "learning_rate": 5.788506910429509e-06, "loss": 18.0152, "step": 25500 }, { "epoch": 0.4661377885828139, "grad_norm": 5.878783018698597, "learning_rate": 5.7882146015473635e-06, "loss": 17.3935, "step": 25501 }, { "epoch": 0.46615606777926044, "grad_norm": 5.514175149194307, "learning_rate": 5.787922289902594e-06, "loss": 17.1685, "step": 25502 }, { "epoch": 0.4661743469757069, "grad_norm": 5.575428012857195, "learning_rate": 5.787629975496225e-06, "loss": 17.3216, "step": 25503 }, { "epoch": 0.46619262617215346, "grad_norm": 6.837583545419483, "learning_rate": 5.787337658329283e-06, "loss": 17.6341, "step": 25504 }, { "epoch": 0.4662109053686, "grad_norm": 6.86522844280634, "learning_rate": 5.787045338402788e-06, "loss": 17.6928, "step": 25505 }, { "epoch": 0.46622918456504653, "grad_norm": 6.6057154665080375, "learning_rate": 5.7867530157177695e-06, "loss": 17.4884, "step": 25506 }, { "epoch": 0.46624746376149306, "grad_norm": 6.234282463089636, "learning_rate": 5.786460690275248e-06, "loss": 17.4245, "step": 25507 }, { "epoch": 0.46626574295793954, "grad_norm": 5.2487149707511795, "learning_rate": 5.786168362076253e-06, "loss": 17.1315, "step": 25508 }, { "epoch": 0.4662840221543861, "grad_norm": 6.327196159722178, "learning_rate": 5.785876031121804e-06, "loss": 17.3609, "step": 25509 }, { "epoch": 0.4663023013508326, "grad_norm": 4.98513399112824, "learning_rate": 5.7855836974129275e-06, "loss": 16.8159, "step": 25510 }, { "epoch": 0.46632058054727915, "grad_norm": 7.033274968593185, "learning_rate": 5.78529136095065e-06, "loss": 17.8629, "step": 25511 }, { "epoch": 0.4663388597437257, "grad_norm": 6.4682641751182395, "learning_rate": 5.784999021735994e-06, "loss": 17.729, "step": 25512 }, { "epoch": 0.46635713894017217, "grad_norm": 6.061961259786219, "learning_rate": 5.7847066797699835e-06, "loss": 17.3876, "step": 25513 }, { "epoch": 0.4663754181366187, "grad_norm": 7.0861948553921135, "learning_rate": 5.784414335053645e-06, "loss": 17.6794, "step": 25514 }, { "epoch": 0.46639369733306524, "grad_norm": 5.939268965139865, "learning_rate": 5.7841219875880014e-06, "loss": 17.4165, "step": 25515 }, { "epoch": 0.4664119765295118, "grad_norm": 7.868548777318356, "learning_rate": 5.783829637374079e-06, "loss": 17.8753, "step": 25516 }, { "epoch": 0.4664302557259583, "grad_norm": 5.904137056078681, "learning_rate": 5.783537284412901e-06, "loss": 17.0714, "step": 25517 }, { "epoch": 0.4664485349224048, "grad_norm": 8.465949297581712, "learning_rate": 5.783244928705494e-06, "loss": 18.3392, "step": 25518 }, { "epoch": 0.4664668141188513, "grad_norm": 5.678356745077976, "learning_rate": 5.782952570252881e-06, "loss": 17.1089, "step": 25519 }, { "epoch": 0.46648509331529786, "grad_norm": 6.723494075443567, "learning_rate": 5.782660209056087e-06, "loss": 17.9239, "step": 25520 }, { "epoch": 0.4665033725117444, "grad_norm": 6.949571521758994, "learning_rate": 5.782367845116137e-06, "loss": 17.7583, "step": 25521 }, { "epoch": 0.46652165170819093, "grad_norm": 7.673193173161843, "learning_rate": 5.782075478434056e-06, "loss": 18.2279, "step": 25522 }, { "epoch": 0.4665399309046374, "grad_norm": 7.222699785340751, "learning_rate": 5.7817831090108665e-06, "loss": 17.7451, "step": 25523 }, { "epoch": 0.46655821010108395, "grad_norm": 8.3256639385663, "learning_rate": 5.781490736847597e-06, "loss": 18.2498, "step": 25524 }, { "epoch": 0.4665764892975305, "grad_norm": 6.864595074317109, "learning_rate": 5.78119836194527e-06, "loss": 17.7798, "step": 25525 }, { "epoch": 0.466594768493977, "grad_norm": 7.300946696061787, "learning_rate": 5.780905984304911e-06, "loss": 17.8623, "step": 25526 }, { "epoch": 0.46661304769042355, "grad_norm": 7.140533085292159, "learning_rate": 5.780613603927543e-06, "loss": 17.791, "step": 25527 }, { "epoch": 0.46663132688687003, "grad_norm": 6.553813815456046, "learning_rate": 5.7803212208141925e-06, "loss": 17.4941, "step": 25528 }, { "epoch": 0.46664960608331657, "grad_norm": 6.372771379056917, "learning_rate": 5.780028834965884e-06, "loss": 17.5054, "step": 25529 }, { "epoch": 0.4666678852797631, "grad_norm": 6.650607337775036, "learning_rate": 5.779736446383642e-06, "loss": 17.7421, "step": 25530 }, { "epoch": 0.46668616447620964, "grad_norm": 5.45317082491413, "learning_rate": 5.7794440550684914e-06, "loss": 17.1432, "step": 25531 }, { "epoch": 0.4667044436726562, "grad_norm": 6.692476524637104, "learning_rate": 5.779151661021457e-06, "loss": 17.5515, "step": 25532 }, { "epoch": 0.46672272286910266, "grad_norm": 6.150310827331175, "learning_rate": 5.778859264243564e-06, "loss": 17.3746, "step": 25533 }, { "epoch": 0.4667410020655492, "grad_norm": 5.5649850611619085, "learning_rate": 5.778566864735836e-06, "loss": 17.3589, "step": 25534 }, { "epoch": 0.46675928126199573, "grad_norm": 6.7430881717107, "learning_rate": 5.778274462499301e-06, "loss": 18.0345, "step": 25535 }, { "epoch": 0.46677756045844226, "grad_norm": 6.655482925491027, "learning_rate": 5.777982057534978e-06, "loss": 17.6676, "step": 25536 }, { "epoch": 0.46679583965488874, "grad_norm": 8.495092585597954, "learning_rate": 5.777689649843897e-06, "loss": 18.0385, "step": 25537 }, { "epoch": 0.4668141188513353, "grad_norm": 7.124209805019028, "learning_rate": 5.777397239427081e-06, "loss": 17.7627, "step": 25538 }, { "epoch": 0.4668323980477818, "grad_norm": 6.998044890956389, "learning_rate": 5.7771048262855565e-06, "loss": 17.7135, "step": 25539 }, { "epoch": 0.46685067724422835, "grad_norm": 6.909892355272215, "learning_rate": 5.776812410420347e-06, "loss": 18.0957, "step": 25540 }, { "epoch": 0.4668689564406749, "grad_norm": 6.803884410587914, "learning_rate": 5.7765199918324766e-06, "loss": 17.3433, "step": 25541 }, { "epoch": 0.46688723563712137, "grad_norm": 6.327646757334009, "learning_rate": 5.77622757052297e-06, "loss": 17.2302, "step": 25542 }, { "epoch": 0.4669055148335679, "grad_norm": 6.224291355273158, "learning_rate": 5.775935146492855e-06, "loss": 17.4444, "step": 25543 }, { "epoch": 0.46692379403001444, "grad_norm": 6.495749539530074, "learning_rate": 5.775642719743153e-06, "loss": 17.742, "step": 25544 }, { "epoch": 0.466942073226461, "grad_norm": 5.932850909083503, "learning_rate": 5.7753502902748915e-06, "loss": 17.1484, "step": 25545 }, { "epoch": 0.4669603524229075, "grad_norm": 6.508198917908852, "learning_rate": 5.775057858089094e-06, "loss": 17.3845, "step": 25546 }, { "epoch": 0.466978631619354, "grad_norm": 8.117538089044235, "learning_rate": 5.774765423186786e-06, "loss": 17.6249, "step": 25547 }, { "epoch": 0.4669969108158005, "grad_norm": 6.58849137660269, "learning_rate": 5.774472985568993e-06, "loss": 17.6773, "step": 25548 }, { "epoch": 0.46701519001224706, "grad_norm": 8.029901195463289, "learning_rate": 5.7741805452367395e-06, "loss": 17.898, "step": 25549 }, { "epoch": 0.4670334692086936, "grad_norm": 5.626040265528789, "learning_rate": 5.77388810219105e-06, "loss": 17.1408, "step": 25550 }, { "epoch": 0.46705174840514013, "grad_norm": 7.145792048078933, "learning_rate": 5.773595656432949e-06, "loss": 17.8677, "step": 25551 }, { "epoch": 0.4670700276015866, "grad_norm": 6.079540984990817, "learning_rate": 5.773303207963463e-06, "loss": 17.4458, "step": 25552 }, { "epoch": 0.46708830679803315, "grad_norm": 7.3454322016886815, "learning_rate": 5.773010756783618e-06, "loss": 17.6739, "step": 25553 }, { "epoch": 0.4671065859944797, "grad_norm": 7.135559965269335, "learning_rate": 5.772718302894436e-06, "loss": 17.7609, "step": 25554 }, { "epoch": 0.4671248651909262, "grad_norm": 6.4889447700450615, "learning_rate": 5.772425846296942e-06, "loss": 17.4732, "step": 25555 }, { "epoch": 0.46714314438737276, "grad_norm": 6.333431867166509, "learning_rate": 5.772133386992164e-06, "loss": 17.6238, "step": 25556 }, { "epoch": 0.46716142358381924, "grad_norm": 6.435345424507448, "learning_rate": 5.771840924981126e-06, "loss": 17.1634, "step": 25557 }, { "epoch": 0.46717970278026577, "grad_norm": 7.149523412735398, "learning_rate": 5.771548460264851e-06, "loss": 17.5551, "step": 25558 }, { "epoch": 0.4671979819767123, "grad_norm": 6.234826849885168, "learning_rate": 5.771255992844367e-06, "loss": 17.5132, "step": 25559 }, { "epoch": 0.46721626117315884, "grad_norm": 8.309589306755823, "learning_rate": 5.770963522720696e-06, "loss": 18.2804, "step": 25560 }, { "epoch": 0.4672345403696054, "grad_norm": 6.5399304772071725, "learning_rate": 5.770671049894866e-06, "loss": 17.3866, "step": 25561 }, { "epoch": 0.46725281956605186, "grad_norm": 5.692793393220293, "learning_rate": 5.7703785743679005e-06, "loss": 17.2978, "step": 25562 }, { "epoch": 0.4672710987624984, "grad_norm": 9.360252230875442, "learning_rate": 5.770086096140826e-06, "loss": 18.3529, "step": 25563 }, { "epoch": 0.46728937795894493, "grad_norm": 7.343001489445058, "learning_rate": 5.769793615214665e-06, "loss": 18.1661, "step": 25564 }, { "epoch": 0.46730765715539146, "grad_norm": 5.8445637134500386, "learning_rate": 5.769501131590445e-06, "loss": 17.3307, "step": 25565 }, { "epoch": 0.467325936351838, "grad_norm": 5.3673205644296695, "learning_rate": 5.769208645269191e-06, "loss": 16.9959, "step": 25566 }, { "epoch": 0.4673442155482845, "grad_norm": 5.995327681747548, "learning_rate": 5.768916156251926e-06, "loss": 17.3419, "step": 25567 }, { "epoch": 0.467362494744731, "grad_norm": 5.9037602333119095, "learning_rate": 5.768623664539677e-06, "loss": 17.4071, "step": 25568 }, { "epoch": 0.46738077394117755, "grad_norm": 7.740053557973855, "learning_rate": 5.7683311701334675e-06, "loss": 18.268, "step": 25569 }, { "epoch": 0.4673990531376241, "grad_norm": 5.596008801615207, "learning_rate": 5.7680386730343265e-06, "loss": 17.4299, "step": 25570 }, { "epoch": 0.46741733233407057, "grad_norm": 6.375225596961252, "learning_rate": 5.767746173243276e-06, "loss": 17.421, "step": 25571 }, { "epoch": 0.4674356115305171, "grad_norm": 6.040953522506135, "learning_rate": 5.7674536707613404e-06, "loss": 17.4077, "step": 25572 }, { "epoch": 0.46745389072696364, "grad_norm": 5.330775525693102, "learning_rate": 5.767161165589548e-06, "loss": 17.1028, "step": 25573 }, { "epoch": 0.4674721699234102, "grad_norm": 7.585647974726364, "learning_rate": 5.766868657728921e-06, "loss": 17.7483, "step": 25574 }, { "epoch": 0.4674904491198567, "grad_norm": 6.346581416172484, "learning_rate": 5.766576147180486e-06, "loss": 17.537, "step": 25575 }, { "epoch": 0.4675087283163032, "grad_norm": 6.514677848062042, "learning_rate": 5.766283633945269e-06, "loss": 17.7364, "step": 25576 }, { "epoch": 0.4675270075127497, "grad_norm": 6.8426307403884, "learning_rate": 5.765991118024294e-06, "loss": 17.6425, "step": 25577 }, { "epoch": 0.46754528670919626, "grad_norm": 6.11212857663012, "learning_rate": 5.765698599418585e-06, "loss": 17.4694, "step": 25578 }, { "epoch": 0.4675635659056428, "grad_norm": 6.029309951634189, "learning_rate": 5.76540607812917e-06, "loss": 17.3648, "step": 25579 }, { "epoch": 0.46758184510208933, "grad_norm": 7.3512979418722075, "learning_rate": 5.7651135541570745e-06, "loss": 17.4534, "step": 25580 }, { "epoch": 0.4676001242985358, "grad_norm": 6.478351434879791, "learning_rate": 5.764821027503321e-06, "loss": 17.1912, "step": 25581 }, { "epoch": 0.46761840349498235, "grad_norm": 6.2906457901331825, "learning_rate": 5.764528498168935e-06, "loss": 17.4239, "step": 25582 }, { "epoch": 0.4676366826914289, "grad_norm": 5.0143946326574875, "learning_rate": 5.764235966154945e-06, "loss": 17.0215, "step": 25583 }, { "epoch": 0.4676549618878754, "grad_norm": 6.367645216377919, "learning_rate": 5.763943431462375e-06, "loss": 17.4521, "step": 25584 }, { "epoch": 0.46767324108432196, "grad_norm": 5.883589161899149, "learning_rate": 5.763650894092247e-06, "loss": 17.3561, "step": 25585 }, { "epoch": 0.46769152028076844, "grad_norm": 7.519895788858407, "learning_rate": 5.763358354045591e-06, "loss": 17.4102, "step": 25586 }, { "epoch": 0.46770979947721497, "grad_norm": 7.090251417467326, "learning_rate": 5.763065811323429e-06, "loss": 17.4349, "step": 25587 }, { "epoch": 0.4677280786736615, "grad_norm": 6.605999878783802, "learning_rate": 5.762773265926788e-06, "loss": 17.434, "step": 25588 }, { "epoch": 0.46774635787010804, "grad_norm": 5.274989284856472, "learning_rate": 5.762480717856692e-06, "loss": 16.9382, "step": 25589 }, { "epoch": 0.4677646370665546, "grad_norm": 6.385074581300302, "learning_rate": 5.762188167114168e-06, "loss": 17.5914, "step": 25590 }, { "epoch": 0.46778291626300106, "grad_norm": 5.097482259909812, "learning_rate": 5.76189561370024e-06, "loss": 17.1079, "step": 25591 }, { "epoch": 0.4678011954594476, "grad_norm": 6.112364499099624, "learning_rate": 5.761603057615936e-06, "loss": 17.45, "step": 25592 }, { "epoch": 0.46781947465589413, "grad_norm": 6.6746320042266545, "learning_rate": 5.761310498862277e-06, "loss": 17.5868, "step": 25593 }, { "epoch": 0.46783775385234067, "grad_norm": 6.196454607512979, "learning_rate": 5.761017937440293e-06, "loss": 17.5181, "step": 25594 }, { "epoch": 0.4678560330487872, "grad_norm": 7.1611120361721925, "learning_rate": 5.760725373351006e-06, "loss": 17.6064, "step": 25595 }, { "epoch": 0.4678743122452337, "grad_norm": 6.581741303080483, "learning_rate": 5.760432806595441e-06, "loss": 17.7773, "step": 25596 }, { "epoch": 0.4678925914416802, "grad_norm": 6.500205512880502, "learning_rate": 5.760140237174627e-06, "loss": 17.6136, "step": 25597 }, { "epoch": 0.46791087063812675, "grad_norm": 6.2234080923482935, "learning_rate": 5.759847665089587e-06, "loss": 17.37, "step": 25598 }, { "epoch": 0.4679291498345733, "grad_norm": 5.453934784457371, "learning_rate": 5.759555090341347e-06, "loss": 17.2709, "step": 25599 }, { "epoch": 0.4679474290310198, "grad_norm": 5.221289181546808, "learning_rate": 5.759262512930932e-06, "loss": 17.1167, "step": 25600 }, { "epoch": 0.4679657082274663, "grad_norm": 7.9108630108612665, "learning_rate": 5.7589699328593675e-06, "loss": 18.2024, "step": 25601 }, { "epoch": 0.46798398742391284, "grad_norm": 7.952453852758127, "learning_rate": 5.758677350127679e-06, "loss": 17.6896, "step": 25602 }, { "epoch": 0.4680022666203594, "grad_norm": 4.938859191373233, "learning_rate": 5.758384764736893e-06, "loss": 17.1023, "step": 25603 }, { "epoch": 0.4680205458168059, "grad_norm": 7.937993365598106, "learning_rate": 5.758092176688035e-06, "loss": 18.5179, "step": 25604 }, { "epoch": 0.4680388250132524, "grad_norm": 6.450681511813594, "learning_rate": 5.757799585982128e-06, "loss": 17.8099, "step": 25605 }, { "epoch": 0.4680571042096989, "grad_norm": 6.683748311075554, "learning_rate": 5.7575069926201995e-06, "loss": 17.5805, "step": 25606 }, { "epoch": 0.46807538340614546, "grad_norm": 6.964070926733987, "learning_rate": 5.757214396603275e-06, "loss": 18.0081, "step": 25607 }, { "epoch": 0.468093662602592, "grad_norm": 6.770428775611666, "learning_rate": 5.756921797932381e-06, "loss": 17.1792, "step": 25608 }, { "epoch": 0.46811194179903853, "grad_norm": 5.657804233910794, "learning_rate": 5.756629196608541e-06, "loss": 17.0043, "step": 25609 }, { "epoch": 0.468130220995485, "grad_norm": 5.359336554783504, "learning_rate": 5.756336592632779e-06, "loss": 16.9921, "step": 25610 }, { "epoch": 0.46814850019193155, "grad_norm": 6.278354616573513, "learning_rate": 5.756043986006127e-06, "loss": 17.5775, "step": 25611 }, { "epoch": 0.4681667793883781, "grad_norm": 8.526799560984587, "learning_rate": 5.7557513767296035e-06, "loss": 17.9176, "step": 25612 }, { "epoch": 0.4681850585848246, "grad_norm": 5.399386204186027, "learning_rate": 5.7554587648042384e-06, "loss": 17.0852, "step": 25613 }, { "epoch": 0.46820333778127116, "grad_norm": 8.148357792618592, "learning_rate": 5.755166150231055e-06, "loss": 17.9017, "step": 25614 }, { "epoch": 0.46822161697771764, "grad_norm": 5.188454402782272, "learning_rate": 5.754873533011079e-06, "loss": 17.1485, "step": 25615 }, { "epoch": 0.46823989617416417, "grad_norm": 6.797866173821247, "learning_rate": 5.754580913145339e-06, "loss": 17.8525, "step": 25616 }, { "epoch": 0.4682581753706107, "grad_norm": 6.606843059171352, "learning_rate": 5.754288290634856e-06, "loss": 17.5798, "step": 25617 }, { "epoch": 0.46827645456705724, "grad_norm": 6.558975467364231, "learning_rate": 5.753995665480659e-06, "loss": 17.4101, "step": 25618 }, { "epoch": 0.4682947337635038, "grad_norm": 6.4949767267599166, "learning_rate": 5.753703037683774e-06, "loss": 17.6557, "step": 25619 }, { "epoch": 0.46831301295995026, "grad_norm": 6.863121463205504, "learning_rate": 5.753410407245224e-06, "loss": 17.7778, "step": 25620 }, { "epoch": 0.4683312921563968, "grad_norm": 6.479891724331871, "learning_rate": 5.753117774166036e-06, "loss": 17.263, "step": 25621 }, { "epoch": 0.46834957135284333, "grad_norm": 5.386820293998476, "learning_rate": 5.7528251384472365e-06, "loss": 16.9541, "step": 25622 }, { "epoch": 0.46836785054928987, "grad_norm": 6.605155191840947, "learning_rate": 5.752532500089848e-06, "loss": 17.2899, "step": 25623 }, { "epoch": 0.4683861297457364, "grad_norm": 5.857745284694996, "learning_rate": 5.7522398590948995e-06, "loss": 17.3025, "step": 25624 }, { "epoch": 0.4684044089421829, "grad_norm": 5.650735443579671, "learning_rate": 5.7519472154634174e-06, "loss": 17.2117, "step": 25625 }, { "epoch": 0.4684226881386294, "grad_norm": 7.130100135051834, "learning_rate": 5.751654569196423e-06, "loss": 17.9528, "step": 25626 }, { "epoch": 0.46844096733507595, "grad_norm": 8.278536021762902, "learning_rate": 5.751361920294946e-06, "loss": 18.1778, "step": 25627 }, { "epoch": 0.4684592465315225, "grad_norm": 5.957601076068427, "learning_rate": 5.751069268760008e-06, "loss": 17.4728, "step": 25628 }, { "epoch": 0.468477525727969, "grad_norm": 6.7556101549333585, "learning_rate": 5.750776614592641e-06, "loss": 17.7987, "step": 25629 }, { "epoch": 0.4684958049244155, "grad_norm": 6.83338256167171, "learning_rate": 5.750483957793865e-06, "loss": 17.4064, "step": 25630 }, { "epoch": 0.46851408412086204, "grad_norm": 7.054059252306268, "learning_rate": 5.750191298364709e-06, "loss": 17.7911, "step": 25631 }, { "epoch": 0.4685323633173086, "grad_norm": 7.719577776442061, "learning_rate": 5.749898636306197e-06, "loss": 17.7469, "step": 25632 }, { "epoch": 0.4685506425137551, "grad_norm": 5.074989754595142, "learning_rate": 5.749605971619355e-06, "loss": 16.9432, "step": 25633 }, { "epoch": 0.46856892171020165, "grad_norm": 5.9451183656073985, "learning_rate": 5.74931330430521e-06, "loss": 17.2048, "step": 25634 }, { "epoch": 0.4685872009066481, "grad_norm": 6.742594603455647, "learning_rate": 5.749020634364787e-06, "loss": 17.5849, "step": 25635 }, { "epoch": 0.46860548010309466, "grad_norm": 6.11718331539814, "learning_rate": 5.748727961799111e-06, "loss": 17.7196, "step": 25636 }, { "epoch": 0.4686237592995412, "grad_norm": 6.610508876323689, "learning_rate": 5.7484352866092075e-06, "loss": 17.4668, "step": 25637 }, { "epoch": 0.46864203849598773, "grad_norm": 7.691559971277722, "learning_rate": 5.748142608796105e-06, "loss": 17.7219, "step": 25638 }, { "epoch": 0.4686603176924342, "grad_norm": 6.308453846473192, "learning_rate": 5.747849928360827e-06, "loss": 17.3177, "step": 25639 }, { "epoch": 0.46867859688888075, "grad_norm": 5.393722225054013, "learning_rate": 5.7475572453044e-06, "loss": 17.0605, "step": 25640 }, { "epoch": 0.4686968760853273, "grad_norm": 9.16376237447455, "learning_rate": 5.7472645596278495e-06, "loss": 17.6939, "step": 25641 }, { "epoch": 0.4687151552817738, "grad_norm": 7.240996906399217, "learning_rate": 5.7469718713322024e-06, "loss": 18.1665, "step": 25642 }, { "epoch": 0.46873343447822036, "grad_norm": 7.507610472592406, "learning_rate": 5.7466791804184815e-06, "loss": 18.2205, "step": 25643 }, { "epoch": 0.46875171367466684, "grad_norm": 5.822709102917425, "learning_rate": 5.746386486887718e-06, "loss": 17.2523, "step": 25644 }, { "epoch": 0.4687699928711134, "grad_norm": 6.237261321188969, "learning_rate": 5.746093790740932e-06, "loss": 17.6036, "step": 25645 }, { "epoch": 0.4687882720675599, "grad_norm": 6.15218480557225, "learning_rate": 5.745801091979153e-06, "loss": 17.4224, "step": 25646 }, { "epoch": 0.46880655126400644, "grad_norm": 6.24742142239045, "learning_rate": 5.745508390603405e-06, "loss": 17.301, "step": 25647 }, { "epoch": 0.468824830460453, "grad_norm": 6.127466908515292, "learning_rate": 5.7452156866147155e-06, "loss": 17.361, "step": 25648 }, { "epoch": 0.46884310965689946, "grad_norm": 6.378781447889848, "learning_rate": 5.744922980014111e-06, "loss": 17.5905, "step": 25649 }, { "epoch": 0.468861388853346, "grad_norm": 6.266970334193923, "learning_rate": 5.744630270802614e-06, "loss": 17.6054, "step": 25650 }, { "epoch": 0.46887966804979253, "grad_norm": 5.714962090082019, "learning_rate": 5.744337558981253e-06, "loss": 17.4403, "step": 25651 }, { "epoch": 0.46889794724623907, "grad_norm": 6.36557250001768, "learning_rate": 5.744044844551053e-06, "loss": 17.1341, "step": 25652 }, { "epoch": 0.4689162264426856, "grad_norm": 7.188647325522799, "learning_rate": 5.743752127513042e-06, "loss": 17.8325, "step": 25653 }, { "epoch": 0.4689345056391321, "grad_norm": 7.547680872870857, "learning_rate": 5.743459407868245e-06, "loss": 17.8632, "step": 25654 }, { "epoch": 0.4689527848355786, "grad_norm": 6.92933596374259, "learning_rate": 5.743166685617685e-06, "loss": 17.7693, "step": 25655 }, { "epoch": 0.46897106403202515, "grad_norm": 7.349112223008469, "learning_rate": 5.742873960762392e-06, "loss": 18.1488, "step": 25656 }, { "epoch": 0.4689893432284717, "grad_norm": 6.8542570673454755, "learning_rate": 5.7425812333033884e-06, "loss": 17.6014, "step": 25657 }, { "epoch": 0.4690076224249182, "grad_norm": 6.339414208739852, "learning_rate": 5.742288503241703e-06, "loss": 17.6208, "step": 25658 }, { "epoch": 0.4690259016213647, "grad_norm": 6.584263814777369, "learning_rate": 5.741995770578362e-06, "loss": 17.9069, "step": 25659 }, { "epoch": 0.46904418081781124, "grad_norm": 5.819636579535961, "learning_rate": 5.741703035314388e-06, "loss": 17.5112, "step": 25660 }, { "epoch": 0.4690624600142578, "grad_norm": 6.241513009919817, "learning_rate": 5.74141029745081e-06, "loss": 17.5247, "step": 25661 }, { "epoch": 0.4690807392107043, "grad_norm": 6.614371455467719, "learning_rate": 5.741117556988653e-06, "loss": 17.8962, "step": 25662 }, { "epoch": 0.46909901840715085, "grad_norm": 5.626675146884744, "learning_rate": 5.740824813928945e-06, "loss": 17.2283, "step": 25663 }, { "epoch": 0.4691172976035973, "grad_norm": 6.17983942813688, "learning_rate": 5.740532068272708e-06, "loss": 17.5033, "step": 25664 }, { "epoch": 0.46913557680004386, "grad_norm": 6.929165717542995, "learning_rate": 5.740239320020972e-06, "loss": 17.4919, "step": 25665 }, { "epoch": 0.4691538559964904, "grad_norm": 7.643820412534104, "learning_rate": 5.739946569174761e-06, "loss": 18.0734, "step": 25666 }, { "epoch": 0.46917213519293693, "grad_norm": 6.86572819422133, "learning_rate": 5.739653815735101e-06, "loss": 17.5621, "step": 25667 }, { "epoch": 0.46919041438938347, "grad_norm": 6.634938317137737, "learning_rate": 5.7393610597030195e-06, "loss": 17.7162, "step": 25668 }, { "epoch": 0.46920869358582995, "grad_norm": 5.9285304036871045, "learning_rate": 5.739068301079539e-06, "loss": 17.4825, "step": 25669 }, { "epoch": 0.4692269727822765, "grad_norm": 6.362775910391573, "learning_rate": 5.738775539865692e-06, "loss": 17.9573, "step": 25670 }, { "epoch": 0.469245251978723, "grad_norm": 5.632427385560699, "learning_rate": 5.738482776062499e-06, "loss": 17.141, "step": 25671 }, { "epoch": 0.46926353117516956, "grad_norm": 5.64359341938006, "learning_rate": 5.738190009670987e-06, "loss": 17.3118, "step": 25672 }, { "epoch": 0.46928181037161604, "grad_norm": 7.628796705740088, "learning_rate": 5.737897240692185e-06, "loss": 18.1154, "step": 25673 }, { "epoch": 0.4693000895680626, "grad_norm": 6.6720638000211006, "learning_rate": 5.737604469127116e-06, "loss": 17.6352, "step": 25674 }, { "epoch": 0.4693183687645091, "grad_norm": 7.118285106718248, "learning_rate": 5.737311694976807e-06, "loss": 17.8439, "step": 25675 }, { "epoch": 0.46933664796095564, "grad_norm": 6.679490191867098, "learning_rate": 5.737018918242285e-06, "loss": 17.4833, "step": 25676 }, { "epoch": 0.4693549271574022, "grad_norm": 6.56219098604823, "learning_rate": 5.7367261389245765e-06, "loss": 17.4497, "step": 25677 }, { "epoch": 0.46937320635384866, "grad_norm": 5.93748186058725, "learning_rate": 5.736433357024706e-06, "loss": 17.2334, "step": 25678 }, { "epoch": 0.4693914855502952, "grad_norm": 5.548583926147259, "learning_rate": 5.7361405725437005e-06, "loss": 17.118, "step": 25679 }, { "epoch": 0.46940976474674173, "grad_norm": 7.320166665768787, "learning_rate": 5.735847785482587e-06, "loss": 17.8548, "step": 25680 }, { "epoch": 0.46942804394318827, "grad_norm": 6.010595277612539, "learning_rate": 5.73555499584239e-06, "loss": 16.8336, "step": 25681 }, { "epoch": 0.4694463231396348, "grad_norm": 5.908111636604885, "learning_rate": 5.7352622036241356e-06, "loss": 17.4006, "step": 25682 }, { "epoch": 0.4694646023360813, "grad_norm": 8.56743637344712, "learning_rate": 5.734969408828852e-06, "loss": 18.1486, "step": 25683 }, { "epoch": 0.4694828815325278, "grad_norm": 8.068802025596723, "learning_rate": 5.734676611457566e-06, "loss": 18.1027, "step": 25684 }, { "epoch": 0.46950116072897435, "grad_norm": 6.568334598477962, "learning_rate": 5.734383811511301e-06, "loss": 17.4182, "step": 25685 }, { "epoch": 0.4695194399254209, "grad_norm": 6.602268560068675, "learning_rate": 5.734091008991083e-06, "loss": 17.3948, "step": 25686 }, { "epoch": 0.4695377191218674, "grad_norm": 8.575644626828332, "learning_rate": 5.733798203897941e-06, "loss": 18.5066, "step": 25687 }, { "epoch": 0.4695559983183139, "grad_norm": 5.9666038254173435, "learning_rate": 5.7335053962329e-06, "loss": 17.3666, "step": 25688 }, { "epoch": 0.46957427751476044, "grad_norm": 7.61695503473196, "learning_rate": 5.7332125859969856e-06, "loss": 17.5837, "step": 25689 }, { "epoch": 0.469592556711207, "grad_norm": 6.1250613603963835, "learning_rate": 5.7329197731912256e-06, "loss": 17.5394, "step": 25690 }, { "epoch": 0.4696108359076535, "grad_norm": 6.446822408970243, "learning_rate": 5.732626957816645e-06, "loss": 17.4589, "step": 25691 }, { "epoch": 0.46962911510410005, "grad_norm": 5.94007934733352, "learning_rate": 5.7323341398742706e-06, "loss": 17.2912, "step": 25692 }, { "epoch": 0.46964739430054653, "grad_norm": 6.384094508784263, "learning_rate": 5.732041319365128e-06, "loss": 17.5478, "step": 25693 }, { "epoch": 0.46966567349699306, "grad_norm": 7.089703335800537, "learning_rate": 5.731748496290246e-06, "loss": 17.5673, "step": 25694 }, { "epoch": 0.4696839526934396, "grad_norm": 4.857902234470191, "learning_rate": 5.731455670650647e-06, "loss": 16.9854, "step": 25695 }, { "epoch": 0.46970223188988613, "grad_norm": 6.634609408981984, "learning_rate": 5.731162842447359e-06, "loss": 17.2928, "step": 25696 }, { "epoch": 0.46972051108633267, "grad_norm": 6.032974300249462, "learning_rate": 5.730870011681411e-06, "loss": 17.3983, "step": 25697 }, { "epoch": 0.46973879028277915, "grad_norm": 5.073998822564762, "learning_rate": 5.730577178353824e-06, "loss": 16.9713, "step": 25698 }, { "epoch": 0.4697570694792257, "grad_norm": 6.252144364596876, "learning_rate": 5.7302843424656294e-06, "loss": 17.2702, "step": 25699 }, { "epoch": 0.4697753486756722, "grad_norm": 6.534455522100211, "learning_rate": 5.729991504017851e-06, "loss": 17.7289, "step": 25700 }, { "epoch": 0.46979362787211876, "grad_norm": 7.048492106480271, "learning_rate": 5.729698663011516e-06, "loss": 17.7036, "step": 25701 }, { "epoch": 0.4698119070685653, "grad_norm": 7.787887934353227, "learning_rate": 5.729405819447651e-06, "loss": 17.8927, "step": 25702 }, { "epoch": 0.4698301862650118, "grad_norm": 6.843305471009764, "learning_rate": 5.729112973327281e-06, "loss": 17.6049, "step": 25703 }, { "epoch": 0.4698484654614583, "grad_norm": 6.609894127921328, "learning_rate": 5.728820124651435e-06, "loss": 17.4264, "step": 25704 }, { "epoch": 0.46986674465790484, "grad_norm": 5.988494520884227, "learning_rate": 5.728527273421135e-06, "loss": 17.3569, "step": 25705 }, { "epoch": 0.4698850238543514, "grad_norm": 6.828862597846488, "learning_rate": 5.728234419637411e-06, "loss": 17.5556, "step": 25706 }, { "epoch": 0.46990330305079786, "grad_norm": 7.474874776494301, "learning_rate": 5.7279415633012895e-06, "loss": 18.2851, "step": 25707 }, { "epoch": 0.4699215822472444, "grad_norm": 7.734436070776567, "learning_rate": 5.7276487044137964e-06, "loss": 18.4307, "step": 25708 }, { "epoch": 0.46993986144369093, "grad_norm": 7.963822028095857, "learning_rate": 5.727355842975956e-06, "loss": 18.0831, "step": 25709 }, { "epoch": 0.46995814064013747, "grad_norm": 7.74876160533145, "learning_rate": 5.7270629789887966e-06, "loss": 17.859, "step": 25710 }, { "epoch": 0.469976419836584, "grad_norm": 5.603722211222619, "learning_rate": 5.7267701124533455e-06, "loss": 17.1839, "step": 25711 }, { "epoch": 0.4699946990330305, "grad_norm": 7.350472567641458, "learning_rate": 5.7264772433706284e-06, "loss": 17.7715, "step": 25712 }, { "epoch": 0.470012978229477, "grad_norm": 8.48293326398749, "learning_rate": 5.726184371741671e-06, "loss": 17.9233, "step": 25713 }, { "epoch": 0.47003125742592355, "grad_norm": 7.3769601066609365, "learning_rate": 5.7258914975675e-06, "loss": 18.0175, "step": 25714 }, { "epoch": 0.4700495366223701, "grad_norm": 5.47695415709752, "learning_rate": 5.725598620849144e-06, "loss": 16.9339, "step": 25715 }, { "epoch": 0.4700678158188166, "grad_norm": 6.340040965722052, "learning_rate": 5.725305741587627e-06, "loss": 17.6432, "step": 25716 }, { "epoch": 0.4700860950152631, "grad_norm": 6.050953636640179, "learning_rate": 5.725012859783975e-06, "loss": 17.3825, "step": 25717 }, { "epoch": 0.47010437421170964, "grad_norm": 6.8669465037179975, "learning_rate": 5.724719975439217e-06, "loss": 17.7668, "step": 25718 }, { "epoch": 0.4701226534081562, "grad_norm": 5.122800605776736, "learning_rate": 5.724427088554379e-06, "loss": 17.0787, "step": 25719 }, { "epoch": 0.4701409326046027, "grad_norm": 6.818098158107216, "learning_rate": 5.7241341991304855e-06, "loss": 17.5973, "step": 25720 }, { "epoch": 0.47015921180104925, "grad_norm": 7.719653622645311, "learning_rate": 5.723841307168565e-06, "loss": 17.8661, "step": 25721 }, { "epoch": 0.47017749099749573, "grad_norm": 7.152130450960114, "learning_rate": 5.723548412669644e-06, "loss": 17.8253, "step": 25722 }, { "epoch": 0.47019577019394226, "grad_norm": 8.988701786963508, "learning_rate": 5.723255515634747e-06, "loss": 17.6978, "step": 25723 }, { "epoch": 0.4702140493903888, "grad_norm": 5.22453220394301, "learning_rate": 5.722962616064903e-06, "loss": 17.0157, "step": 25724 }, { "epoch": 0.47023232858683534, "grad_norm": 5.863474360587279, "learning_rate": 5.72266971396114e-06, "loss": 17.3891, "step": 25725 }, { "epoch": 0.47025060778328187, "grad_norm": 8.334234473614273, "learning_rate": 5.72237680932448e-06, "loss": 18.2978, "step": 25726 }, { "epoch": 0.47026888697972835, "grad_norm": 7.895708930511546, "learning_rate": 5.722083902155952e-06, "loss": 18.1887, "step": 25727 }, { "epoch": 0.4702871661761749, "grad_norm": 5.5779541338091025, "learning_rate": 5.7217909924565825e-06, "loss": 17.3675, "step": 25728 }, { "epoch": 0.4703054453726214, "grad_norm": 7.120368008290413, "learning_rate": 5.7214980802274e-06, "loss": 17.6981, "step": 25729 }, { "epoch": 0.47032372456906796, "grad_norm": 6.318338417013921, "learning_rate": 5.721205165469428e-06, "loss": 17.3642, "step": 25730 }, { "epoch": 0.4703420037655145, "grad_norm": 6.455983042945042, "learning_rate": 5.7209122481836944e-06, "loss": 17.5759, "step": 25731 }, { "epoch": 0.470360282961961, "grad_norm": 7.351994669905593, "learning_rate": 5.720619328371226e-06, "loss": 18.0706, "step": 25732 }, { "epoch": 0.4703785621584075, "grad_norm": 6.5837840471987, "learning_rate": 5.72032640603305e-06, "loss": 17.7664, "step": 25733 }, { "epoch": 0.47039684135485405, "grad_norm": 8.831748245630628, "learning_rate": 5.720033481170192e-06, "loss": 18.2569, "step": 25734 }, { "epoch": 0.4704151205513006, "grad_norm": 6.8952771330461395, "learning_rate": 5.719740553783679e-06, "loss": 17.6438, "step": 25735 }, { "epoch": 0.4704333997477471, "grad_norm": 5.684297049286084, "learning_rate": 5.719447623874539e-06, "loss": 17.1094, "step": 25736 }, { "epoch": 0.4704516789441936, "grad_norm": 6.4742925462084004, "learning_rate": 5.719154691443795e-06, "loss": 17.4488, "step": 25737 }, { "epoch": 0.47046995814064013, "grad_norm": 4.955174616071838, "learning_rate": 5.718861756492478e-06, "loss": 17.0022, "step": 25738 }, { "epoch": 0.47048823733708667, "grad_norm": 6.669365353904177, "learning_rate": 5.718568819021612e-06, "loss": 17.7681, "step": 25739 }, { "epoch": 0.4705065165335332, "grad_norm": 6.576506940733695, "learning_rate": 5.718275879032226e-06, "loss": 17.3518, "step": 25740 }, { "epoch": 0.4705247957299797, "grad_norm": 6.843285743590159, "learning_rate": 5.717982936525345e-06, "loss": 17.7304, "step": 25741 }, { "epoch": 0.4705430749264262, "grad_norm": 6.1425211194349005, "learning_rate": 5.717689991501996e-06, "loss": 17.572, "step": 25742 }, { "epoch": 0.47056135412287275, "grad_norm": 6.259091147614705, "learning_rate": 5.7173970439632044e-06, "loss": 17.2392, "step": 25743 }, { "epoch": 0.4705796333193193, "grad_norm": 6.56324687763438, "learning_rate": 5.71710409391e-06, "loss": 17.699, "step": 25744 }, { "epoch": 0.4705979125157658, "grad_norm": 6.4870259587971075, "learning_rate": 5.716811141343407e-06, "loss": 17.575, "step": 25745 }, { "epoch": 0.4706161917122123, "grad_norm": 7.0280958866553185, "learning_rate": 5.716518186264454e-06, "loss": 17.7948, "step": 25746 }, { "epoch": 0.47063447090865884, "grad_norm": 7.43835303131793, "learning_rate": 5.716225228674166e-06, "loss": 17.4661, "step": 25747 }, { "epoch": 0.4706527501051054, "grad_norm": 5.680874544795799, "learning_rate": 5.715932268573572e-06, "loss": 17.3705, "step": 25748 }, { "epoch": 0.4706710293015519, "grad_norm": 11.223624627611429, "learning_rate": 5.715639305963697e-06, "loss": 18.3935, "step": 25749 }, { "epoch": 0.47068930849799845, "grad_norm": 7.106054349598446, "learning_rate": 5.715346340845568e-06, "loss": 17.6383, "step": 25750 }, { "epoch": 0.47070758769444493, "grad_norm": 5.927422855884639, "learning_rate": 5.7150533732202115e-06, "loss": 17.346, "step": 25751 }, { "epoch": 0.47072586689089146, "grad_norm": 6.295244208832737, "learning_rate": 5.714760403088656e-06, "loss": 17.5507, "step": 25752 }, { "epoch": 0.470744146087338, "grad_norm": 9.004936621510755, "learning_rate": 5.7144674304519275e-06, "loss": 17.6309, "step": 25753 }, { "epoch": 0.47076242528378454, "grad_norm": 8.922715486165398, "learning_rate": 5.714174455311052e-06, "loss": 17.9797, "step": 25754 }, { "epoch": 0.47078070448023107, "grad_norm": 7.6902900504639415, "learning_rate": 5.713881477667057e-06, "loss": 18.4467, "step": 25755 }, { "epoch": 0.47079898367667755, "grad_norm": 10.561340904277566, "learning_rate": 5.713588497520971e-06, "loss": 18.5204, "step": 25756 }, { "epoch": 0.4708172628731241, "grad_norm": 6.6134355482343175, "learning_rate": 5.7132955148738174e-06, "loss": 17.7061, "step": 25757 }, { "epoch": 0.4708355420695706, "grad_norm": 6.8893242974843885, "learning_rate": 5.713002529726626e-06, "loss": 17.7588, "step": 25758 }, { "epoch": 0.47085382126601716, "grad_norm": 7.474456619203395, "learning_rate": 5.712709542080421e-06, "loss": 18.0075, "step": 25759 }, { "epoch": 0.4708721004624637, "grad_norm": 6.948855013586783, "learning_rate": 5.712416551936232e-06, "loss": 17.8254, "step": 25760 }, { "epoch": 0.4708903796589102, "grad_norm": 6.50457562766823, "learning_rate": 5.7121235592950855e-06, "loss": 17.751, "step": 25761 }, { "epoch": 0.4709086588553567, "grad_norm": 5.83894267974012, "learning_rate": 5.711830564158006e-06, "loss": 17.2684, "step": 25762 }, { "epoch": 0.47092693805180325, "grad_norm": 7.653457693003221, "learning_rate": 5.711537566526024e-06, "loss": 17.9138, "step": 25763 }, { "epoch": 0.4709452172482498, "grad_norm": 5.1323006430626785, "learning_rate": 5.711244566400163e-06, "loss": 16.9562, "step": 25764 }, { "epoch": 0.4709634964446963, "grad_norm": 8.25864421776104, "learning_rate": 5.710951563781452e-06, "loss": 17.7398, "step": 25765 }, { "epoch": 0.4709817756411428, "grad_norm": 7.69456264534389, "learning_rate": 5.710658558670919e-06, "loss": 18.1504, "step": 25766 }, { "epoch": 0.47100005483758933, "grad_norm": 6.088654055819759, "learning_rate": 5.710365551069588e-06, "loss": 17.5746, "step": 25767 }, { "epoch": 0.47101833403403587, "grad_norm": 5.194843577707228, "learning_rate": 5.710072540978487e-06, "loss": 17.1521, "step": 25768 }, { "epoch": 0.4710366132304824, "grad_norm": 6.697547190304205, "learning_rate": 5.709779528398643e-06, "loss": 17.759, "step": 25769 }, { "epoch": 0.47105489242692894, "grad_norm": 6.429279863825913, "learning_rate": 5.709486513331085e-06, "loss": 17.1669, "step": 25770 }, { "epoch": 0.4710731716233754, "grad_norm": 6.441112590700953, "learning_rate": 5.709193495776837e-06, "loss": 17.7022, "step": 25771 }, { "epoch": 0.47109145081982196, "grad_norm": 6.625448028730175, "learning_rate": 5.708900475736928e-06, "loss": 17.4705, "step": 25772 }, { "epoch": 0.4711097300162685, "grad_norm": 6.305411184269652, "learning_rate": 5.708607453212385e-06, "loss": 17.5364, "step": 25773 }, { "epoch": 0.471128009212715, "grad_norm": 6.290464556869714, "learning_rate": 5.708314428204233e-06, "loss": 17.5009, "step": 25774 }, { "epoch": 0.4711462884091615, "grad_norm": 7.906891643381622, "learning_rate": 5.708021400713501e-06, "loss": 17.8957, "step": 25775 }, { "epoch": 0.47116456760560804, "grad_norm": 6.183705673203319, "learning_rate": 5.707728370741215e-06, "loss": 17.5608, "step": 25776 }, { "epoch": 0.4711828468020546, "grad_norm": 7.540136204940559, "learning_rate": 5.707435338288403e-06, "loss": 18.1934, "step": 25777 }, { "epoch": 0.4712011259985011, "grad_norm": 6.791809613177821, "learning_rate": 5.707142303356092e-06, "loss": 17.4112, "step": 25778 }, { "epoch": 0.47121940519494765, "grad_norm": 5.89670348103306, "learning_rate": 5.706849265945307e-06, "loss": 17.3449, "step": 25779 }, { "epoch": 0.47123768439139413, "grad_norm": 7.1241957625525085, "learning_rate": 5.706556226057078e-06, "loss": 17.7355, "step": 25780 }, { "epoch": 0.47125596358784066, "grad_norm": 7.153960642312598, "learning_rate": 5.706263183692431e-06, "loss": 17.6717, "step": 25781 }, { "epoch": 0.4712742427842872, "grad_norm": 6.438701194873751, "learning_rate": 5.70597013885239e-06, "loss": 17.5661, "step": 25782 }, { "epoch": 0.47129252198073374, "grad_norm": 7.390245278411702, "learning_rate": 5.705677091537988e-06, "loss": 18.0024, "step": 25783 }, { "epoch": 0.47131080117718027, "grad_norm": 5.88308197225255, "learning_rate": 5.705384041750249e-06, "loss": 17.3385, "step": 25784 }, { "epoch": 0.47132908037362675, "grad_norm": 5.560797798888823, "learning_rate": 5.705090989490199e-06, "loss": 17.2345, "step": 25785 }, { "epoch": 0.4713473595700733, "grad_norm": 6.386049160659695, "learning_rate": 5.7047979347588665e-06, "loss": 17.8416, "step": 25786 }, { "epoch": 0.4713656387665198, "grad_norm": 6.9627073599178715, "learning_rate": 5.704504877557279e-06, "loss": 18.0885, "step": 25787 }, { "epoch": 0.47138391796296636, "grad_norm": 6.204843270334688, "learning_rate": 5.704211817886463e-06, "loss": 17.7679, "step": 25788 }, { "epoch": 0.4714021971594129, "grad_norm": 6.480918566703043, "learning_rate": 5.703918755747444e-06, "loss": 17.6984, "step": 25789 }, { "epoch": 0.4714204763558594, "grad_norm": 7.127484284389984, "learning_rate": 5.703625691141252e-06, "loss": 17.9324, "step": 25790 }, { "epoch": 0.4714387555523059, "grad_norm": 6.2940842817535, "learning_rate": 5.703332624068914e-06, "loss": 17.2779, "step": 25791 }, { "epoch": 0.47145703474875245, "grad_norm": 6.574987717773518, "learning_rate": 5.703039554531454e-06, "loss": 17.7116, "step": 25792 }, { "epoch": 0.471475313945199, "grad_norm": 7.253700012881749, "learning_rate": 5.7027464825299024e-06, "loss": 17.8144, "step": 25793 }, { "epoch": 0.4714935931416455, "grad_norm": 6.939616955610362, "learning_rate": 5.702453408065287e-06, "loss": 17.7171, "step": 25794 }, { "epoch": 0.471511872338092, "grad_norm": 7.077047832098274, "learning_rate": 5.702160331138632e-06, "loss": 17.4779, "step": 25795 }, { "epoch": 0.47153015153453853, "grad_norm": 8.067816705056266, "learning_rate": 5.7018672517509645e-06, "loss": 18.0141, "step": 25796 }, { "epoch": 0.47154843073098507, "grad_norm": 4.89697354386606, "learning_rate": 5.7015741699033144e-06, "loss": 16.8199, "step": 25797 }, { "epoch": 0.4715667099274316, "grad_norm": 6.599982962770622, "learning_rate": 5.701281085596709e-06, "loss": 17.2145, "step": 25798 }, { "epoch": 0.47158498912387814, "grad_norm": 6.756677056868582, "learning_rate": 5.700987998832173e-06, "loss": 17.5423, "step": 25799 }, { "epoch": 0.4716032683203246, "grad_norm": 6.4648199917368325, "learning_rate": 5.700694909610736e-06, "loss": 17.7403, "step": 25800 }, { "epoch": 0.47162154751677116, "grad_norm": 6.397784271903643, "learning_rate": 5.700401817933422e-06, "loss": 17.6628, "step": 25801 }, { "epoch": 0.4716398267132177, "grad_norm": 5.149442698610189, "learning_rate": 5.700108723801262e-06, "loss": 17.0186, "step": 25802 }, { "epoch": 0.4716581059096642, "grad_norm": 7.4748213213920796, "learning_rate": 5.69981562721528e-06, "loss": 17.7124, "step": 25803 }, { "epoch": 0.47167638510611076, "grad_norm": 7.234924381618311, "learning_rate": 5.699522528176506e-06, "loss": 18.0197, "step": 25804 }, { "epoch": 0.47169466430255724, "grad_norm": 6.348990477810652, "learning_rate": 5.699229426685967e-06, "loss": 17.305, "step": 25805 }, { "epoch": 0.4717129434990038, "grad_norm": 5.804665038058139, "learning_rate": 5.698936322744689e-06, "loss": 17.1312, "step": 25806 }, { "epoch": 0.4717312226954503, "grad_norm": 5.685403952588046, "learning_rate": 5.6986432163537e-06, "loss": 17.1033, "step": 25807 }, { "epoch": 0.47174950189189685, "grad_norm": 5.6113171001790985, "learning_rate": 5.698350107514028e-06, "loss": 17.089, "step": 25808 }, { "epoch": 0.47176778108834333, "grad_norm": 6.010913038377717, "learning_rate": 5.698056996226697e-06, "loss": 17.4232, "step": 25809 }, { "epoch": 0.47178606028478987, "grad_norm": 7.38772038955717, "learning_rate": 5.6977638824927385e-06, "loss": 17.7902, "step": 25810 }, { "epoch": 0.4718043394812364, "grad_norm": 6.259717543016462, "learning_rate": 5.697470766313179e-06, "loss": 17.4974, "step": 25811 }, { "epoch": 0.47182261867768294, "grad_norm": 7.24695507568173, "learning_rate": 5.697177647689044e-06, "loss": 18.066, "step": 25812 }, { "epoch": 0.4718408978741295, "grad_norm": 6.138165958681334, "learning_rate": 5.696884526621361e-06, "loss": 17.2708, "step": 25813 }, { "epoch": 0.47185917707057595, "grad_norm": 5.908285647364228, "learning_rate": 5.696591403111158e-06, "loss": 17.0526, "step": 25814 }, { "epoch": 0.4718774562670225, "grad_norm": 5.9555887823675135, "learning_rate": 5.6962982771594645e-06, "loss": 17.3364, "step": 25815 }, { "epoch": 0.471895735463469, "grad_norm": 6.547020115876095, "learning_rate": 5.696005148767305e-06, "loss": 17.6849, "step": 25816 }, { "epoch": 0.47191401465991556, "grad_norm": 5.5296341109417115, "learning_rate": 5.695712017935707e-06, "loss": 17.2958, "step": 25817 }, { "epoch": 0.4719322938563621, "grad_norm": 5.914017653103802, "learning_rate": 5.6954188846657e-06, "loss": 17.355, "step": 25818 }, { "epoch": 0.4719505730528086, "grad_norm": 6.347253209844765, "learning_rate": 5.69512574895831e-06, "loss": 17.6088, "step": 25819 }, { "epoch": 0.4719688522492551, "grad_norm": 6.8219558055636105, "learning_rate": 5.694832610814565e-06, "loss": 17.7155, "step": 25820 }, { "epoch": 0.47198713144570165, "grad_norm": 6.475568929341647, "learning_rate": 5.694539470235491e-06, "loss": 17.4291, "step": 25821 }, { "epoch": 0.4720054106421482, "grad_norm": 6.238996419924572, "learning_rate": 5.694246327222117e-06, "loss": 17.3621, "step": 25822 }, { "epoch": 0.4720236898385947, "grad_norm": 5.822179323273069, "learning_rate": 5.693953181775469e-06, "loss": 17.1884, "step": 25823 }, { "epoch": 0.4720419690350412, "grad_norm": 7.282504982699104, "learning_rate": 5.6936600338965755e-06, "loss": 17.8156, "step": 25824 }, { "epoch": 0.47206024823148773, "grad_norm": 6.595031253693095, "learning_rate": 5.693366883586465e-06, "loss": 17.5496, "step": 25825 }, { "epoch": 0.47207852742793427, "grad_norm": 6.624650194623883, "learning_rate": 5.693073730846162e-06, "loss": 17.6315, "step": 25826 }, { "epoch": 0.4720968066243808, "grad_norm": 6.576914094424634, "learning_rate": 5.6927805756766975e-06, "loss": 17.4683, "step": 25827 }, { "epoch": 0.47211508582082734, "grad_norm": 6.415511974670059, "learning_rate": 5.692487418079096e-06, "loss": 17.4982, "step": 25828 }, { "epoch": 0.4721333650172738, "grad_norm": 6.482107892871176, "learning_rate": 5.692194258054387e-06, "loss": 17.8158, "step": 25829 }, { "epoch": 0.47215164421372036, "grad_norm": 6.574487703205978, "learning_rate": 5.691901095603596e-06, "loss": 17.4369, "step": 25830 }, { "epoch": 0.4721699234101669, "grad_norm": 8.795784141479906, "learning_rate": 5.6916079307277525e-06, "loss": 17.8158, "step": 25831 }, { "epoch": 0.4721882026066134, "grad_norm": 8.248989669523736, "learning_rate": 5.691314763427883e-06, "loss": 17.8837, "step": 25832 }, { "epoch": 0.47220648180305996, "grad_norm": 6.722758123023115, "learning_rate": 5.691021593705015e-06, "loss": 17.8966, "step": 25833 }, { "epoch": 0.47222476099950644, "grad_norm": 7.033595405632596, "learning_rate": 5.690728421560177e-06, "loss": 17.7698, "step": 25834 }, { "epoch": 0.472243040195953, "grad_norm": 5.509576586719086, "learning_rate": 5.690435246994397e-06, "loss": 17.2702, "step": 25835 }, { "epoch": 0.4722613193923995, "grad_norm": 6.454036418745367, "learning_rate": 5.6901420700086985e-06, "loss": 17.6068, "step": 25836 }, { "epoch": 0.47227959858884605, "grad_norm": 6.5781206231900065, "learning_rate": 5.689848890604113e-06, "loss": 17.5991, "step": 25837 }, { "epoch": 0.4722978777852926, "grad_norm": 6.2061747849983675, "learning_rate": 5.689555708781667e-06, "loss": 17.3686, "step": 25838 }, { "epoch": 0.47231615698173907, "grad_norm": 5.537459395375557, "learning_rate": 5.689262524542389e-06, "loss": 17.2471, "step": 25839 }, { "epoch": 0.4723344361781856, "grad_norm": 6.814166713417278, "learning_rate": 5.688969337887305e-06, "loss": 17.7222, "step": 25840 }, { "epoch": 0.47235271537463214, "grad_norm": 6.132540921005035, "learning_rate": 5.688676148817442e-06, "loss": 17.0324, "step": 25841 }, { "epoch": 0.4723709945710787, "grad_norm": 5.9814842542380395, "learning_rate": 5.688382957333831e-06, "loss": 17.4957, "step": 25842 }, { "epoch": 0.47238927376752515, "grad_norm": 6.635167267324463, "learning_rate": 5.688089763437498e-06, "loss": 17.6083, "step": 25843 }, { "epoch": 0.4724075529639717, "grad_norm": 6.772207805176339, "learning_rate": 5.687796567129468e-06, "loss": 18.0892, "step": 25844 }, { "epoch": 0.4724258321604182, "grad_norm": 6.155238518836094, "learning_rate": 5.687503368410772e-06, "loss": 17.3286, "step": 25845 }, { "epoch": 0.47244411135686476, "grad_norm": 6.557655040142976, "learning_rate": 5.687210167282435e-06, "loss": 17.805, "step": 25846 }, { "epoch": 0.4724623905533113, "grad_norm": 5.832099023514787, "learning_rate": 5.686916963745487e-06, "loss": 17.4085, "step": 25847 }, { "epoch": 0.4724806697497578, "grad_norm": 5.806232684173829, "learning_rate": 5.686623757800954e-06, "loss": 17.4479, "step": 25848 }, { "epoch": 0.4724989489462043, "grad_norm": 8.043928830639969, "learning_rate": 5.686330549449866e-06, "loss": 17.8331, "step": 25849 }, { "epoch": 0.47251722814265085, "grad_norm": 6.535933902088941, "learning_rate": 5.6860373386932465e-06, "loss": 17.6848, "step": 25850 }, { "epoch": 0.4725355073390974, "grad_norm": 7.168594583730302, "learning_rate": 5.685744125532127e-06, "loss": 17.5545, "step": 25851 }, { "epoch": 0.4725537865355439, "grad_norm": 5.374707794732045, "learning_rate": 5.685450909967533e-06, "loss": 17.1791, "step": 25852 }, { "epoch": 0.4725720657319904, "grad_norm": 6.280479088854206, "learning_rate": 5.685157692000494e-06, "loss": 17.7383, "step": 25853 }, { "epoch": 0.47259034492843693, "grad_norm": 6.757402852042405, "learning_rate": 5.6848644716320365e-06, "loss": 17.4559, "step": 25854 }, { "epoch": 0.47260862412488347, "grad_norm": 6.907323659330623, "learning_rate": 5.684571248863188e-06, "loss": 17.5365, "step": 25855 }, { "epoch": 0.47262690332133, "grad_norm": 6.153775305233588, "learning_rate": 5.684278023694978e-06, "loss": 17.3031, "step": 25856 }, { "epoch": 0.47264518251777654, "grad_norm": 13.665868608285148, "learning_rate": 5.683984796128432e-06, "loss": 18.349, "step": 25857 }, { "epoch": 0.472663461714223, "grad_norm": 6.817935297092144, "learning_rate": 5.683691566164579e-06, "loss": 17.463, "step": 25858 }, { "epoch": 0.47268174091066956, "grad_norm": 6.551114817335246, "learning_rate": 5.683398333804446e-06, "loss": 17.8015, "step": 25859 }, { "epoch": 0.4727000201071161, "grad_norm": 5.18392331601108, "learning_rate": 5.683105099049061e-06, "loss": 17.0122, "step": 25860 }, { "epoch": 0.47271829930356263, "grad_norm": 6.0659547084974585, "learning_rate": 5.682811861899452e-06, "loss": 17.4556, "step": 25861 }, { "epoch": 0.47273657850000916, "grad_norm": 6.1119645321856195, "learning_rate": 5.682518622356647e-06, "loss": 17.3908, "step": 25862 }, { "epoch": 0.47275485769645564, "grad_norm": 7.284642014411717, "learning_rate": 5.682225380421674e-06, "loss": 17.9496, "step": 25863 }, { "epoch": 0.4727731368929022, "grad_norm": 6.929019893916304, "learning_rate": 5.681932136095558e-06, "loss": 17.4113, "step": 25864 }, { "epoch": 0.4727914160893487, "grad_norm": 6.6258985334680025, "learning_rate": 5.681638889379331e-06, "loss": 17.316, "step": 25865 }, { "epoch": 0.47280969528579525, "grad_norm": 5.242997829500244, "learning_rate": 5.681345640274018e-06, "loss": 16.9227, "step": 25866 }, { "epoch": 0.4728279744822418, "grad_norm": 6.577576620433062, "learning_rate": 5.681052388780649e-06, "loss": 17.6209, "step": 25867 }, { "epoch": 0.47284625367868827, "grad_norm": 5.71879287588394, "learning_rate": 5.680759134900249e-06, "loss": 17.5261, "step": 25868 }, { "epoch": 0.4728645328751348, "grad_norm": 6.196560471184433, "learning_rate": 5.680465878633848e-06, "loss": 17.5856, "step": 25869 }, { "epoch": 0.47288281207158134, "grad_norm": 6.68080064698184, "learning_rate": 5.680172619982474e-06, "loss": 17.7189, "step": 25870 }, { "epoch": 0.4729010912680279, "grad_norm": 6.161544304228171, "learning_rate": 5.6798793589471526e-06, "loss": 17.308, "step": 25871 }, { "epoch": 0.4729193704644744, "grad_norm": 8.332024594696001, "learning_rate": 5.679586095528913e-06, "loss": 17.9046, "step": 25872 }, { "epoch": 0.4729376496609209, "grad_norm": 12.59299099131102, "learning_rate": 5.679292829728783e-06, "loss": 18.5357, "step": 25873 }, { "epoch": 0.4729559288573674, "grad_norm": 7.905383411209632, "learning_rate": 5.678999561547791e-06, "loss": 18.2884, "step": 25874 }, { "epoch": 0.47297420805381396, "grad_norm": 6.661492886445078, "learning_rate": 5.678706290986964e-06, "loss": 17.5146, "step": 25875 }, { "epoch": 0.4729924872502605, "grad_norm": 4.781496507417505, "learning_rate": 5.678413018047331e-06, "loss": 16.9854, "step": 25876 }, { "epoch": 0.473010766446707, "grad_norm": 7.208361767350122, "learning_rate": 5.678119742729919e-06, "loss": 17.4394, "step": 25877 }, { "epoch": 0.4730290456431535, "grad_norm": 6.994738151235073, "learning_rate": 5.6778264650357565e-06, "loss": 17.588, "step": 25878 }, { "epoch": 0.47304732483960005, "grad_norm": 7.306192444508178, "learning_rate": 5.677533184965871e-06, "loss": 17.7977, "step": 25879 }, { "epoch": 0.4730656040360466, "grad_norm": 6.0981017799010075, "learning_rate": 5.677239902521291e-06, "loss": 17.4439, "step": 25880 }, { "epoch": 0.4730838832324931, "grad_norm": 6.438982786471811, "learning_rate": 5.676946617703043e-06, "loss": 17.5578, "step": 25881 }, { "epoch": 0.4731021624289396, "grad_norm": 6.134441006378765, "learning_rate": 5.676653330512155e-06, "loss": 17.29, "step": 25882 }, { "epoch": 0.47312044162538613, "grad_norm": 6.184028519930327, "learning_rate": 5.676360040949656e-06, "loss": 17.6106, "step": 25883 }, { "epoch": 0.47313872082183267, "grad_norm": 6.186597448277817, "learning_rate": 5.676066749016577e-06, "loss": 17.1757, "step": 25884 }, { "epoch": 0.4731570000182792, "grad_norm": 6.887298996952183, "learning_rate": 5.67577345471394e-06, "loss": 17.5008, "step": 25885 }, { "epoch": 0.47317527921472574, "grad_norm": 5.17511996340504, "learning_rate": 5.6754801580427755e-06, "loss": 17.1795, "step": 25886 }, { "epoch": 0.4731935584111722, "grad_norm": 7.466202467595964, "learning_rate": 5.6751868590041125e-06, "loss": 17.8165, "step": 25887 }, { "epoch": 0.47321183760761876, "grad_norm": 7.265150142125658, "learning_rate": 5.6748935575989775e-06, "loss": 17.9262, "step": 25888 }, { "epoch": 0.4732301168040653, "grad_norm": 7.85296419328889, "learning_rate": 5.6746002538284e-06, "loss": 18.3895, "step": 25889 }, { "epoch": 0.47324839600051183, "grad_norm": 6.851732004003136, "learning_rate": 5.674306947693406e-06, "loss": 17.6846, "step": 25890 }, { "epoch": 0.47326667519695836, "grad_norm": 6.713183541630558, "learning_rate": 5.674013639195025e-06, "loss": 17.7856, "step": 25891 }, { "epoch": 0.47328495439340484, "grad_norm": 8.11777700522724, "learning_rate": 5.6737203283342846e-06, "loss": 18.2853, "step": 25892 }, { "epoch": 0.4733032335898514, "grad_norm": 5.807472787390272, "learning_rate": 5.673427015112214e-06, "loss": 17.3082, "step": 25893 }, { "epoch": 0.4733215127862979, "grad_norm": 6.708169580046064, "learning_rate": 5.673133699529841e-06, "loss": 17.7547, "step": 25894 }, { "epoch": 0.47333979198274445, "grad_norm": 5.90589898079177, "learning_rate": 5.67284038158819e-06, "loss": 17.3468, "step": 25895 }, { "epoch": 0.473358071179191, "grad_norm": 6.762469218667008, "learning_rate": 5.672547061288292e-06, "loss": 17.6666, "step": 25896 }, { "epoch": 0.47337635037563747, "grad_norm": 6.57452511513548, "learning_rate": 5.672253738631176e-06, "loss": 17.4593, "step": 25897 }, { "epoch": 0.473394629572084, "grad_norm": 4.857877928913913, "learning_rate": 5.671960413617869e-06, "loss": 16.9493, "step": 25898 }, { "epoch": 0.47341290876853054, "grad_norm": 6.762618836487802, "learning_rate": 5.671667086249399e-06, "loss": 17.3922, "step": 25899 }, { "epoch": 0.4734311879649771, "grad_norm": 7.510592587270578, "learning_rate": 5.671373756526795e-06, "loss": 17.6653, "step": 25900 }, { "epoch": 0.4734494671614236, "grad_norm": 6.607209235175018, "learning_rate": 5.671080424451083e-06, "loss": 17.8866, "step": 25901 }, { "epoch": 0.4734677463578701, "grad_norm": 5.585064724011794, "learning_rate": 5.670787090023293e-06, "loss": 17.0745, "step": 25902 }, { "epoch": 0.4734860255543166, "grad_norm": 6.124320979701702, "learning_rate": 5.670493753244452e-06, "loss": 17.4572, "step": 25903 }, { "epoch": 0.47350430475076316, "grad_norm": 6.563376709963528, "learning_rate": 5.670200414115589e-06, "loss": 17.6055, "step": 25904 }, { "epoch": 0.4735225839472097, "grad_norm": 6.343559243910293, "learning_rate": 5.66990707263773e-06, "loss": 17.398, "step": 25905 }, { "epoch": 0.47354086314365623, "grad_norm": 6.588324721187836, "learning_rate": 5.669613728811907e-06, "loss": 17.7244, "step": 25906 }, { "epoch": 0.4735591423401027, "grad_norm": 7.63767999502871, "learning_rate": 5.669320382639145e-06, "loss": 17.9324, "step": 25907 }, { "epoch": 0.47357742153654925, "grad_norm": 4.932081138185488, "learning_rate": 5.669027034120474e-06, "loss": 16.8443, "step": 25908 }, { "epoch": 0.4735957007329958, "grad_norm": 6.400489033653264, "learning_rate": 5.66873368325692e-06, "loss": 17.6367, "step": 25909 }, { "epoch": 0.4736139799294423, "grad_norm": 5.927089896266275, "learning_rate": 5.6684403300495135e-06, "loss": 17.129, "step": 25910 }, { "epoch": 0.4736322591258888, "grad_norm": 6.13647123586034, "learning_rate": 5.668146974499282e-06, "loss": 17.468, "step": 25911 }, { "epoch": 0.47365053832233533, "grad_norm": 6.247460530699331, "learning_rate": 5.667853616607253e-06, "loss": 17.3167, "step": 25912 }, { "epoch": 0.47366881751878187, "grad_norm": 6.628616623180669, "learning_rate": 5.667560256374455e-06, "loss": 17.5867, "step": 25913 }, { "epoch": 0.4736870967152284, "grad_norm": 6.181169806865533, "learning_rate": 5.667266893801915e-06, "loss": 17.2199, "step": 25914 }, { "epoch": 0.47370537591167494, "grad_norm": 7.61247354414544, "learning_rate": 5.6669735288906634e-06, "loss": 17.9029, "step": 25915 }, { "epoch": 0.4737236551081214, "grad_norm": 7.861975335849096, "learning_rate": 5.666680161641728e-06, "loss": 17.6708, "step": 25916 }, { "epoch": 0.47374193430456796, "grad_norm": 6.866773229149948, "learning_rate": 5.6663867920561354e-06, "loss": 17.6993, "step": 25917 }, { "epoch": 0.4737602135010145, "grad_norm": 8.542882237612853, "learning_rate": 5.666093420134916e-06, "loss": 18.2594, "step": 25918 }, { "epoch": 0.47377849269746103, "grad_norm": 6.402801659901995, "learning_rate": 5.665800045879097e-06, "loss": 17.5634, "step": 25919 }, { "epoch": 0.47379677189390756, "grad_norm": 6.7766241834966525, "learning_rate": 5.665506669289705e-06, "loss": 17.5705, "step": 25920 }, { "epoch": 0.47381505109035404, "grad_norm": 9.444608585705058, "learning_rate": 5.66521329036777e-06, "loss": 18.2186, "step": 25921 }, { "epoch": 0.4738333302868006, "grad_norm": 5.785322703644276, "learning_rate": 5.664919909114322e-06, "loss": 17.4296, "step": 25922 }, { "epoch": 0.4738516094832471, "grad_norm": 6.267172285029286, "learning_rate": 5.664626525530385e-06, "loss": 17.4039, "step": 25923 }, { "epoch": 0.47386988867969365, "grad_norm": 5.974093653947869, "learning_rate": 5.664333139616992e-06, "loss": 17.5757, "step": 25924 }, { "epoch": 0.4738881678761402, "grad_norm": 7.500747167628114, "learning_rate": 5.66403975137517e-06, "loss": 17.7725, "step": 25925 }, { "epoch": 0.47390644707258667, "grad_norm": 5.964027363515949, "learning_rate": 5.663746360805944e-06, "loss": 17.081, "step": 25926 }, { "epoch": 0.4739247262690332, "grad_norm": 6.290251241856698, "learning_rate": 5.663452967910345e-06, "loss": 17.6068, "step": 25927 }, { "epoch": 0.47394300546547974, "grad_norm": 5.208218032142328, "learning_rate": 5.6631595726894e-06, "loss": 17.0189, "step": 25928 }, { "epoch": 0.4739612846619263, "grad_norm": 7.29574622467421, "learning_rate": 5.662866175144139e-06, "loss": 17.9344, "step": 25929 }, { "epoch": 0.4739795638583728, "grad_norm": 5.434198716830011, "learning_rate": 5.662572775275591e-06, "loss": 17.1999, "step": 25930 }, { "epoch": 0.4739978430548193, "grad_norm": 6.895067401429106, "learning_rate": 5.662279373084782e-06, "loss": 18.0767, "step": 25931 }, { "epoch": 0.4740161222512658, "grad_norm": 6.052100713760119, "learning_rate": 5.661985968572741e-06, "loss": 17.3477, "step": 25932 }, { "epoch": 0.47403440144771236, "grad_norm": 7.534649191841099, "learning_rate": 5.6616925617404965e-06, "loss": 17.4309, "step": 25933 }, { "epoch": 0.4740526806441589, "grad_norm": 5.7560926047448335, "learning_rate": 5.661399152589077e-06, "loss": 17.3121, "step": 25934 }, { "epoch": 0.47407095984060543, "grad_norm": 5.212578901397439, "learning_rate": 5.6611057411195115e-06, "loss": 17.0144, "step": 25935 }, { "epoch": 0.4740892390370519, "grad_norm": 6.813388999902722, "learning_rate": 5.660812327332825e-06, "loss": 17.7488, "step": 25936 }, { "epoch": 0.47410751823349845, "grad_norm": 5.903166788230887, "learning_rate": 5.6605189112300515e-06, "loss": 17.2933, "step": 25937 }, { "epoch": 0.474125797429945, "grad_norm": 5.312244703909885, "learning_rate": 5.660225492812216e-06, "loss": 17.2784, "step": 25938 }, { "epoch": 0.4741440766263915, "grad_norm": 7.317861567662505, "learning_rate": 5.659932072080349e-06, "loss": 18.0144, "step": 25939 }, { "epoch": 0.47416235582283806, "grad_norm": 5.920722899051549, "learning_rate": 5.6596386490354745e-06, "loss": 17.3487, "step": 25940 }, { "epoch": 0.47418063501928454, "grad_norm": 6.411628454375291, "learning_rate": 5.659345223678624e-06, "loss": 17.5412, "step": 25941 }, { "epoch": 0.47419891421573107, "grad_norm": 7.406774194138147, "learning_rate": 5.659051796010828e-06, "loss": 18.295, "step": 25942 }, { "epoch": 0.4742171934121776, "grad_norm": 5.98952482355676, "learning_rate": 5.658758366033111e-06, "loss": 17.0943, "step": 25943 }, { "epoch": 0.47423547260862414, "grad_norm": 6.0568664036181366, "learning_rate": 5.6584649337465036e-06, "loss": 17.3889, "step": 25944 }, { "epoch": 0.4742537518050706, "grad_norm": 7.333534487383448, "learning_rate": 5.6581714991520335e-06, "loss": 18.02, "step": 25945 }, { "epoch": 0.47427203100151716, "grad_norm": 5.345981274064981, "learning_rate": 5.657878062250729e-06, "loss": 17.1343, "step": 25946 }, { "epoch": 0.4742903101979637, "grad_norm": 7.045236121429757, "learning_rate": 5.657584623043619e-06, "loss": 17.6542, "step": 25947 }, { "epoch": 0.47430858939441023, "grad_norm": 5.870662473700976, "learning_rate": 5.657291181531732e-06, "loss": 17.3836, "step": 25948 }, { "epoch": 0.47432686859085677, "grad_norm": 6.906669538816162, "learning_rate": 5.6569977377160985e-06, "loss": 17.6594, "step": 25949 }, { "epoch": 0.47434514778730325, "grad_norm": 5.133526592293263, "learning_rate": 5.656704291597742e-06, "loss": 16.8409, "step": 25950 }, { "epoch": 0.4743634269837498, "grad_norm": 6.365411223856171, "learning_rate": 5.656410843177695e-06, "loss": 17.318, "step": 25951 }, { "epoch": 0.4743817061801963, "grad_norm": 5.788346236899196, "learning_rate": 5.656117392456986e-06, "loss": 17.2644, "step": 25952 }, { "epoch": 0.47439998537664285, "grad_norm": 6.593894142725526, "learning_rate": 5.655823939436642e-06, "loss": 17.4617, "step": 25953 }, { "epoch": 0.4744182645730894, "grad_norm": 7.152965042730411, "learning_rate": 5.655530484117691e-06, "loss": 17.8426, "step": 25954 }, { "epoch": 0.47443654376953587, "grad_norm": 5.5782092289664575, "learning_rate": 5.655237026501162e-06, "loss": 17.3581, "step": 25955 }, { "epoch": 0.4744548229659824, "grad_norm": 6.861717928090626, "learning_rate": 5.654943566588087e-06, "loss": 17.9868, "step": 25956 }, { "epoch": 0.47447310216242894, "grad_norm": 6.025530385063021, "learning_rate": 5.65465010437949e-06, "loss": 17.5004, "step": 25957 }, { "epoch": 0.4744913813588755, "grad_norm": 5.756300202233745, "learning_rate": 5.654356639876401e-06, "loss": 16.9075, "step": 25958 }, { "epoch": 0.474509660555322, "grad_norm": 5.906484596240676, "learning_rate": 5.654063173079849e-06, "loss": 17.4008, "step": 25959 }, { "epoch": 0.4745279397517685, "grad_norm": 6.307032854882647, "learning_rate": 5.6537697039908616e-06, "loss": 17.4723, "step": 25960 }, { "epoch": 0.474546218948215, "grad_norm": 7.954707887473592, "learning_rate": 5.653476232610469e-06, "loss": 18.1062, "step": 25961 }, { "epoch": 0.47456449814466156, "grad_norm": 8.200524458621782, "learning_rate": 5.653182758939698e-06, "loss": 18.505, "step": 25962 }, { "epoch": 0.4745827773411081, "grad_norm": 6.6348159359859515, "learning_rate": 5.652889282979579e-06, "loss": 17.3301, "step": 25963 }, { "epoch": 0.47460105653755463, "grad_norm": 5.168494641857537, "learning_rate": 5.652595804731139e-06, "loss": 17.0669, "step": 25964 }, { "epoch": 0.4746193357340011, "grad_norm": 5.690613203882012, "learning_rate": 5.6523023241954076e-06, "loss": 17.1961, "step": 25965 }, { "epoch": 0.47463761493044765, "grad_norm": 6.898315220190865, "learning_rate": 5.652008841373413e-06, "loss": 17.6733, "step": 25966 }, { "epoch": 0.4746558941268942, "grad_norm": 6.814659740351716, "learning_rate": 5.651715356266187e-06, "loss": 17.678, "step": 25967 }, { "epoch": 0.4746741733233407, "grad_norm": 6.526756873649642, "learning_rate": 5.65142186887475e-06, "loss": 17.4209, "step": 25968 }, { "epoch": 0.47469245251978726, "grad_norm": 6.556606698719368, "learning_rate": 5.651128379200139e-06, "loss": 17.5862, "step": 25969 }, { "epoch": 0.47471073171623374, "grad_norm": 5.146666597874986, "learning_rate": 5.650834887243379e-06, "loss": 16.9093, "step": 25970 }, { "epoch": 0.47472901091268027, "grad_norm": 7.757187958135804, "learning_rate": 5.6505413930055e-06, "loss": 17.8769, "step": 25971 }, { "epoch": 0.4747472901091268, "grad_norm": 6.710453664682444, "learning_rate": 5.650247896487528e-06, "loss": 17.8814, "step": 25972 }, { "epoch": 0.47476556930557334, "grad_norm": 7.123010685500772, "learning_rate": 5.649954397690496e-06, "loss": 17.6994, "step": 25973 }, { "epoch": 0.4747838485020199, "grad_norm": 4.969359930302907, "learning_rate": 5.649660896615428e-06, "loss": 16.9672, "step": 25974 }, { "epoch": 0.47480212769846636, "grad_norm": 6.9708902897066185, "learning_rate": 5.6493673932633555e-06, "loss": 17.9988, "step": 25975 }, { "epoch": 0.4748204068949129, "grad_norm": 5.975754995200632, "learning_rate": 5.6490738876353066e-06, "loss": 17.502, "step": 25976 }, { "epoch": 0.47483868609135943, "grad_norm": 5.851913605484984, "learning_rate": 5.64878037973231e-06, "loss": 17.3717, "step": 25977 }, { "epoch": 0.47485696528780597, "grad_norm": 5.6586834549541125, "learning_rate": 5.648486869555395e-06, "loss": 17.2946, "step": 25978 }, { "epoch": 0.47487524448425245, "grad_norm": 7.157862049440766, "learning_rate": 5.64819335710559e-06, "loss": 17.7344, "step": 25979 }, { "epoch": 0.474893523680699, "grad_norm": 8.014626993561148, "learning_rate": 5.647899842383923e-06, "loss": 18.4884, "step": 25980 }, { "epoch": 0.4749118028771455, "grad_norm": 6.046323901293027, "learning_rate": 5.647606325391425e-06, "loss": 17.1595, "step": 25981 }, { "epoch": 0.47493008207359205, "grad_norm": 6.691217919244507, "learning_rate": 5.647312806129119e-06, "loss": 17.7766, "step": 25982 }, { "epoch": 0.4749483612700386, "grad_norm": 6.359084571375429, "learning_rate": 5.647019284598041e-06, "loss": 17.3846, "step": 25983 }, { "epoch": 0.47496664046648507, "grad_norm": 6.427892313531851, "learning_rate": 5.646725760799216e-06, "loss": 17.4413, "step": 25984 }, { "epoch": 0.4749849196629316, "grad_norm": 6.274193033345976, "learning_rate": 5.646432234733674e-06, "loss": 17.6532, "step": 25985 }, { "epoch": 0.47500319885937814, "grad_norm": 6.768050627752301, "learning_rate": 5.646138706402442e-06, "loss": 17.5241, "step": 25986 }, { "epoch": 0.4750214780558247, "grad_norm": 5.819678207070442, "learning_rate": 5.64584517580655e-06, "loss": 17.5962, "step": 25987 }, { "epoch": 0.4750397572522712, "grad_norm": 5.951836988982997, "learning_rate": 5.645551642947027e-06, "loss": 17.3161, "step": 25988 }, { "epoch": 0.4750580364487177, "grad_norm": 6.789371409648295, "learning_rate": 5.6452581078249006e-06, "loss": 17.5762, "step": 25989 }, { "epoch": 0.4750763156451642, "grad_norm": 9.374394706643287, "learning_rate": 5.644964570441202e-06, "loss": 18.3603, "step": 25990 }, { "epoch": 0.47509459484161076, "grad_norm": 7.474719096925224, "learning_rate": 5.644671030796958e-06, "loss": 18.0784, "step": 25991 }, { "epoch": 0.4751128740380573, "grad_norm": 5.8469241580465985, "learning_rate": 5.6443774888931964e-06, "loss": 16.9541, "step": 25992 }, { "epoch": 0.47513115323450383, "grad_norm": 6.610767546635071, "learning_rate": 5.644083944730949e-06, "loss": 17.4141, "step": 25993 }, { "epoch": 0.4751494324309503, "grad_norm": 5.82041686861602, "learning_rate": 5.643790398311244e-06, "loss": 17.0751, "step": 25994 }, { "epoch": 0.47516771162739685, "grad_norm": 6.541205305517173, "learning_rate": 5.64349684963511e-06, "loss": 17.5317, "step": 25995 }, { "epoch": 0.4751859908238434, "grad_norm": 5.705654479397359, "learning_rate": 5.643203298703572e-06, "loss": 17.2416, "step": 25996 }, { "epoch": 0.4752042700202899, "grad_norm": 5.929132218688094, "learning_rate": 5.642909745517665e-06, "loss": 17.0134, "step": 25997 }, { "epoch": 0.47522254921673646, "grad_norm": 6.505326066339509, "learning_rate": 5.6426161900784146e-06, "loss": 17.3633, "step": 25998 }, { "epoch": 0.47524082841318294, "grad_norm": 6.685730152464692, "learning_rate": 5.642322632386851e-06, "loss": 17.5658, "step": 25999 }, { "epoch": 0.47525910760962947, "grad_norm": 6.376676619157217, "learning_rate": 5.642029072443999e-06, "loss": 17.6068, "step": 26000 }, { "epoch": 0.475277386806076, "grad_norm": 7.857717160114505, "learning_rate": 5.641735510250895e-06, "loss": 18.0532, "step": 26001 }, { "epoch": 0.47529566600252254, "grad_norm": 6.084575919193199, "learning_rate": 5.641441945808562e-06, "loss": 17.3179, "step": 26002 }, { "epoch": 0.4753139451989691, "grad_norm": 5.884143566256101, "learning_rate": 5.641148379118031e-06, "loss": 17.3732, "step": 26003 }, { "epoch": 0.47533222439541556, "grad_norm": 7.197660095175341, "learning_rate": 5.64085481018033e-06, "loss": 17.9315, "step": 26004 }, { "epoch": 0.4753505035918621, "grad_norm": 7.2089115755654625, "learning_rate": 5.640561238996489e-06, "loss": 17.9124, "step": 26005 }, { "epoch": 0.47536878278830863, "grad_norm": 6.665101417994158, "learning_rate": 5.640267665567536e-06, "loss": 17.5239, "step": 26006 }, { "epoch": 0.47538706198475517, "grad_norm": 6.039014754649297, "learning_rate": 5.6399740898944995e-06, "loss": 17.5545, "step": 26007 }, { "epoch": 0.4754053411812017, "grad_norm": 5.889816194995486, "learning_rate": 5.6396805119784125e-06, "loss": 17.4227, "step": 26008 }, { "epoch": 0.4754236203776482, "grad_norm": 6.266189007191372, "learning_rate": 5.639386931820298e-06, "loss": 17.5186, "step": 26009 }, { "epoch": 0.4754418995740947, "grad_norm": 5.6734377472976485, "learning_rate": 5.639093349421187e-06, "loss": 17.2666, "step": 26010 }, { "epoch": 0.47546017877054125, "grad_norm": 5.059160481785002, "learning_rate": 5.638799764782113e-06, "loss": 16.9287, "step": 26011 }, { "epoch": 0.4754784579669878, "grad_norm": 7.62269200425769, "learning_rate": 5.6385061779040986e-06, "loss": 17.7419, "step": 26012 }, { "epoch": 0.47549673716343427, "grad_norm": 7.4300084586237745, "learning_rate": 5.638212588788175e-06, "loss": 17.9371, "step": 26013 }, { "epoch": 0.4755150163598808, "grad_norm": 6.190862055253693, "learning_rate": 5.6379189974353724e-06, "loss": 17.4832, "step": 26014 }, { "epoch": 0.47553329555632734, "grad_norm": 6.148301697943547, "learning_rate": 5.63762540384672e-06, "loss": 17.6687, "step": 26015 }, { "epoch": 0.4755515747527739, "grad_norm": 6.783249319787577, "learning_rate": 5.6373318080232455e-06, "loss": 17.8364, "step": 26016 }, { "epoch": 0.4755698539492204, "grad_norm": 9.257873471346558, "learning_rate": 5.637038209965977e-06, "loss": 18.4764, "step": 26017 }, { "epoch": 0.4755881331456669, "grad_norm": 6.158810833202474, "learning_rate": 5.636744609675946e-06, "loss": 17.564, "step": 26018 }, { "epoch": 0.4756064123421134, "grad_norm": 6.401517358591553, "learning_rate": 5.63645100715418e-06, "loss": 17.5254, "step": 26019 }, { "epoch": 0.47562469153855996, "grad_norm": 5.391545148250154, "learning_rate": 5.6361574024017085e-06, "loss": 17.2427, "step": 26020 }, { "epoch": 0.4756429707350065, "grad_norm": 7.675663362478772, "learning_rate": 5.635863795419561e-06, "loss": 18.3336, "step": 26021 }, { "epoch": 0.47566124993145303, "grad_norm": 4.807485419133941, "learning_rate": 5.6355701862087665e-06, "loss": 16.82, "step": 26022 }, { "epoch": 0.4756795291278995, "grad_norm": 6.581052885289084, "learning_rate": 5.635276574770352e-06, "loss": 17.4988, "step": 26023 }, { "epoch": 0.47569780832434605, "grad_norm": 5.08216685605317, "learning_rate": 5.634982961105349e-06, "loss": 17.0414, "step": 26024 }, { "epoch": 0.4757160875207926, "grad_norm": 6.988364270643437, "learning_rate": 5.634689345214787e-06, "loss": 18.0667, "step": 26025 }, { "epoch": 0.4757343667172391, "grad_norm": 6.381569064303758, "learning_rate": 5.634395727099692e-06, "loss": 17.3598, "step": 26026 }, { "epoch": 0.47575264591368566, "grad_norm": 6.380849986204152, "learning_rate": 5.634102106761095e-06, "loss": 17.548, "step": 26027 }, { "epoch": 0.47577092511013214, "grad_norm": 7.404830013781351, "learning_rate": 5.633808484200026e-06, "loss": 17.8752, "step": 26028 }, { "epoch": 0.4757892043065787, "grad_norm": 6.500785528836138, "learning_rate": 5.633514859417514e-06, "loss": 17.56, "step": 26029 }, { "epoch": 0.4758074835030252, "grad_norm": 6.321136405646386, "learning_rate": 5.633221232414587e-06, "loss": 17.3529, "step": 26030 }, { "epoch": 0.47582576269947174, "grad_norm": 5.165881963417881, "learning_rate": 5.632927603192274e-06, "loss": 17.038, "step": 26031 }, { "epoch": 0.4758440418959183, "grad_norm": 6.385283406981002, "learning_rate": 5.632633971751604e-06, "loss": 17.1728, "step": 26032 }, { "epoch": 0.47586232109236476, "grad_norm": 7.2731084059304365, "learning_rate": 5.632340338093608e-06, "loss": 17.9146, "step": 26033 }, { "epoch": 0.4758806002888113, "grad_norm": 6.42716275932108, "learning_rate": 5.632046702219314e-06, "loss": 17.2662, "step": 26034 }, { "epoch": 0.47589887948525783, "grad_norm": 6.9941749540297105, "learning_rate": 5.631753064129751e-06, "loss": 17.4194, "step": 26035 }, { "epoch": 0.47591715868170437, "grad_norm": 6.033991187815937, "learning_rate": 5.631459423825948e-06, "loss": 17.294, "step": 26036 }, { "epoch": 0.4759354378781509, "grad_norm": 5.936301524659878, "learning_rate": 5.631165781308934e-06, "loss": 17.7412, "step": 26037 }, { "epoch": 0.4759537170745974, "grad_norm": 6.1020200600307835, "learning_rate": 5.630872136579739e-06, "loss": 17.3488, "step": 26038 }, { "epoch": 0.4759719962710439, "grad_norm": 6.877421053511342, "learning_rate": 5.630578489639393e-06, "loss": 17.6673, "step": 26039 }, { "epoch": 0.47599027546749045, "grad_norm": 4.975494633060373, "learning_rate": 5.6302848404889245e-06, "loss": 16.8864, "step": 26040 }, { "epoch": 0.476008554663937, "grad_norm": 6.525103130084275, "learning_rate": 5.62999118912936e-06, "loss": 17.4129, "step": 26041 }, { "epoch": 0.4760268338603835, "grad_norm": 5.996496404997624, "learning_rate": 5.629697535561733e-06, "loss": 17.3319, "step": 26042 }, { "epoch": 0.47604511305683, "grad_norm": 7.351909418351181, "learning_rate": 5.62940387978707e-06, "loss": 17.9268, "step": 26043 }, { "epoch": 0.47606339225327654, "grad_norm": 5.526687594872396, "learning_rate": 5.6291102218064005e-06, "loss": 17.299, "step": 26044 }, { "epoch": 0.4760816714497231, "grad_norm": 5.425425747437962, "learning_rate": 5.628816561620755e-06, "loss": 16.9579, "step": 26045 }, { "epoch": 0.4760999506461696, "grad_norm": 6.642975514835074, "learning_rate": 5.628522899231163e-06, "loss": 17.9246, "step": 26046 }, { "epoch": 0.4761182298426161, "grad_norm": 5.958625007699751, "learning_rate": 5.62822923463865e-06, "loss": 17.1009, "step": 26047 }, { "epoch": 0.4761365090390626, "grad_norm": 7.628080431055862, "learning_rate": 5.62793556784425e-06, "loss": 18.0556, "step": 26048 }, { "epoch": 0.47615478823550916, "grad_norm": 5.801207100153378, "learning_rate": 5.6276418988489916e-06, "loss": 17.2965, "step": 26049 }, { "epoch": 0.4761730674319557, "grad_norm": 9.79241349374093, "learning_rate": 5.627348227653899e-06, "loss": 18.6229, "step": 26050 }, { "epoch": 0.47619134662840223, "grad_norm": 6.781462717629474, "learning_rate": 5.627054554260008e-06, "loss": 17.4345, "step": 26051 }, { "epoch": 0.4762096258248487, "grad_norm": 5.812752241444554, "learning_rate": 5.626760878668344e-06, "loss": 17.3331, "step": 26052 }, { "epoch": 0.47622790502129525, "grad_norm": 6.147366690483685, "learning_rate": 5.62646720087994e-06, "loss": 17.7013, "step": 26053 }, { "epoch": 0.4762461842177418, "grad_norm": 5.122933959756373, "learning_rate": 5.626173520895821e-06, "loss": 17.1301, "step": 26054 }, { "epoch": 0.4762644634141883, "grad_norm": 7.401273026911979, "learning_rate": 5.6258798387170165e-06, "loss": 17.7734, "step": 26055 }, { "epoch": 0.47628274261063486, "grad_norm": 4.933972472532199, "learning_rate": 5.6255861543445615e-06, "loss": 16.9932, "step": 26056 }, { "epoch": 0.47630102180708134, "grad_norm": 4.427933087749209, "learning_rate": 5.625292467779479e-06, "loss": 16.7622, "step": 26057 }, { "epoch": 0.4763193010035279, "grad_norm": 5.7662417096621965, "learning_rate": 5.6249987790228e-06, "loss": 17.4404, "step": 26058 }, { "epoch": 0.4763375801999744, "grad_norm": 6.577856752167029, "learning_rate": 5.624705088075555e-06, "loss": 17.7029, "step": 26059 }, { "epoch": 0.47635585939642094, "grad_norm": 6.67163525678445, "learning_rate": 5.624411394938772e-06, "loss": 17.5504, "step": 26060 }, { "epoch": 0.4763741385928675, "grad_norm": 6.360925961932458, "learning_rate": 5.624117699613483e-06, "loss": 17.6593, "step": 26061 }, { "epoch": 0.47639241778931396, "grad_norm": 7.586802719897017, "learning_rate": 5.6238240021007155e-06, "loss": 17.9586, "step": 26062 }, { "epoch": 0.4764106969857605, "grad_norm": 4.899688214664459, "learning_rate": 5.6235303024014975e-06, "loss": 16.7137, "step": 26063 }, { "epoch": 0.47642897618220703, "grad_norm": 7.138013485464661, "learning_rate": 5.623236600516861e-06, "loss": 18.2766, "step": 26064 }, { "epoch": 0.47644725537865357, "grad_norm": 7.056691352433283, "learning_rate": 5.622942896447834e-06, "loss": 17.9555, "step": 26065 }, { "epoch": 0.4764655345751001, "grad_norm": 6.519716635122897, "learning_rate": 5.622649190195446e-06, "loss": 17.7805, "step": 26066 }, { "epoch": 0.4764838137715466, "grad_norm": 6.2039302734864465, "learning_rate": 5.622355481760728e-06, "loss": 17.3564, "step": 26067 }, { "epoch": 0.4765020929679931, "grad_norm": 6.708680452902, "learning_rate": 5.622061771144706e-06, "loss": 17.8124, "step": 26068 }, { "epoch": 0.47652037216443965, "grad_norm": 6.146022834553187, "learning_rate": 5.621768058348413e-06, "loss": 17.3903, "step": 26069 }, { "epoch": 0.4765386513608862, "grad_norm": 7.644073505989182, "learning_rate": 5.621474343372877e-06, "loss": 17.859, "step": 26070 }, { "epoch": 0.4765569305573327, "grad_norm": 6.31873404662728, "learning_rate": 5.621180626219126e-06, "loss": 17.6161, "step": 26071 }, { "epoch": 0.4765752097537792, "grad_norm": 6.857044687447573, "learning_rate": 5.620886906888191e-06, "loss": 17.5395, "step": 26072 }, { "epoch": 0.47659348895022574, "grad_norm": 6.538786504817792, "learning_rate": 5.620593185381102e-06, "loss": 17.6519, "step": 26073 }, { "epoch": 0.4766117681466723, "grad_norm": 6.460891049638645, "learning_rate": 5.6202994616988884e-06, "loss": 17.6651, "step": 26074 }, { "epoch": 0.4766300473431188, "grad_norm": 5.771273994807442, "learning_rate": 5.620005735842577e-06, "loss": 17.2586, "step": 26075 }, { "epoch": 0.47664832653956535, "grad_norm": 6.34127645290423, "learning_rate": 5.6197120078132005e-06, "loss": 17.2723, "step": 26076 }, { "epoch": 0.47666660573601183, "grad_norm": 7.6002476847563525, "learning_rate": 5.619418277611788e-06, "loss": 17.7249, "step": 26077 }, { "epoch": 0.47668488493245836, "grad_norm": 5.6028576690747185, "learning_rate": 5.619124545239366e-06, "loss": 17.1663, "step": 26078 }, { "epoch": 0.4767031641289049, "grad_norm": 6.8009010872796845, "learning_rate": 5.618830810696968e-06, "loss": 17.3903, "step": 26079 }, { "epoch": 0.47672144332535143, "grad_norm": 6.2757260823554475, "learning_rate": 5.6185370739856226e-06, "loss": 17.5399, "step": 26080 }, { "epoch": 0.4767397225217979, "grad_norm": 7.007343328296595, "learning_rate": 5.618243335106357e-06, "loss": 17.8199, "step": 26081 }, { "epoch": 0.47675800171824445, "grad_norm": 7.868802959252958, "learning_rate": 5.6179495940602005e-06, "loss": 17.9974, "step": 26082 }, { "epoch": 0.476776280914691, "grad_norm": 7.3481225366603, "learning_rate": 5.617655850848186e-06, "loss": 18.1017, "step": 26083 }, { "epoch": 0.4767945601111375, "grad_norm": 6.664199757996422, "learning_rate": 5.617362105471342e-06, "loss": 18.04, "step": 26084 }, { "epoch": 0.47681283930758406, "grad_norm": 8.498700739774147, "learning_rate": 5.617068357930697e-06, "loss": 17.8875, "step": 26085 }, { "epoch": 0.47683111850403054, "grad_norm": 6.243252142764387, "learning_rate": 5.61677460822728e-06, "loss": 17.4461, "step": 26086 }, { "epoch": 0.4768493977004771, "grad_norm": 6.937977061291332, "learning_rate": 5.616480856362123e-06, "loss": 17.905, "step": 26087 }, { "epoch": 0.4768676768969236, "grad_norm": 6.865585270736471, "learning_rate": 5.616187102336252e-06, "loss": 17.6613, "step": 26088 }, { "epoch": 0.47688595609337014, "grad_norm": 4.784125178394753, "learning_rate": 5.6158933461507e-06, "loss": 16.9239, "step": 26089 }, { "epoch": 0.4769042352898167, "grad_norm": 6.590838836207453, "learning_rate": 5.615599587806496e-06, "loss": 17.2997, "step": 26090 }, { "epoch": 0.47692251448626316, "grad_norm": 6.671508649687292, "learning_rate": 5.615305827304668e-06, "loss": 17.68, "step": 26091 }, { "epoch": 0.4769407936827097, "grad_norm": 7.76100921275125, "learning_rate": 5.615012064646247e-06, "loss": 18.0952, "step": 26092 }, { "epoch": 0.47695907287915623, "grad_norm": 6.088363358254355, "learning_rate": 5.614718299832262e-06, "loss": 17.33, "step": 26093 }, { "epoch": 0.47697735207560277, "grad_norm": 7.614245279204956, "learning_rate": 5.614424532863743e-06, "loss": 17.8937, "step": 26094 }, { "epoch": 0.4769956312720493, "grad_norm": 6.900345840051787, "learning_rate": 5.614130763741717e-06, "loss": 17.6068, "step": 26095 }, { "epoch": 0.4770139104684958, "grad_norm": 6.964787476654074, "learning_rate": 5.613836992467217e-06, "loss": 17.4893, "step": 26096 }, { "epoch": 0.4770321896649423, "grad_norm": 6.002296108303925, "learning_rate": 5.613543219041273e-06, "loss": 17.4203, "step": 26097 }, { "epoch": 0.47705046886138885, "grad_norm": 6.834447217700692, "learning_rate": 5.613249443464913e-06, "loss": 17.9869, "step": 26098 }, { "epoch": 0.4770687480578354, "grad_norm": 6.104831198554275, "learning_rate": 5.612955665739167e-06, "loss": 17.3504, "step": 26099 }, { "epoch": 0.4770870272542819, "grad_norm": 6.449593406950984, "learning_rate": 5.612661885865063e-06, "loss": 17.4109, "step": 26100 }, { "epoch": 0.4771053064507284, "grad_norm": 7.0224103024352615, "learning_rate": 5.612368103843634e-06, "loss": 17.5904, "step": 26101 }, { "epoch": 0.47712358564717494, "grad_norm": 6.135187132870457, "learning_rate": 5.612074319675907e-06, "loss": 17.4386, "step": 26102 }, { "epoch": 0.4771418648436215, "grad_norm": 5.0770678705615495, "learning_rate": 5.611780533362913e-06, "loss": 17.1951, "step": 26103 }, { "epoch": 0.477160144040068, "grad_norm": 5.778558709472459, "learning_rate": 5.611486744905681e-06, "loss": 17.3517, "step": 26104 }, { "epoch": 0.47717842323651455, "grad_norm": 6.9569184192284546, "learning_rate": 5.611192954305241e-06, "loss": 17.6013, "step": 26105 }, { "epoch": 0.47719670243296103, "grad_norm": 5.554985824878484, "learning_rate": 5.610899161562623e-06, "loss": 17.3215, "step": 26106 }, { "epoch": 0.47721498162940756, "grad_norm": 7.256870840413215, "learning_rate": 5.6106053666788566e-06, "loss": 18.0335, "step": 26107 }, { "epoch": 0.4772332608258541, "grad_norm": 6.578082303262216, "learning_rate": 5.610311569654972e-06, "loss": 17.5233, "step": 26108 }, { "epoch": 0.47725154002230064, "grad_norm": 7.0026002953920905, "learning_rate": 5.610017770491995e-06, "loss": 17.642, "step": 26109 }, { "epoch": 0.47726981921874717, "grad_norm": 8.634285905142995, "learning_rate": 5.609723969190963e-06, "loss": 18.8466, "step": 26110 }, { "epoch": 0.47728809841519365, "grad_norm": 5.591100147596347, "learning_rate": 5.6094301657529e-06, "loss": 16.9774, "step": 26111 }, { "epoch": 0.4773063776116402, "grad_norm": 6.155160576115009, "learning_rate": 5.609136360178837e-06, "loss": 17.5168, "step": 26112 }, { "epoch": 0.4773246568080867, "grad_norm": 7.1575398367811545, "learning_rate": 5.608842552469802e-06, "loss": 17.7671, "step": 26113 }, { "epoch": 0.47734293600453326, "grad_norm": 7.510841169697521, "learning_rate": 5.608548742626827e-06, "loss": 18.2355, "step": 26114 }, { "epoch": 0.47736121520097974, "grad_norm": 5.402852436341276, "learning_rate": 5.608254930650944e-06, "loss": 17.061, "step": 26115 }, { "epoch": 0.4773794943974263, "grad_norm": 5.388457298037819, "learning_rate": 5.607961116543179e-06, "loss": 16.9783, "step": 26116 }, { "epoch": 0.4773977735938728, "grad_norm": 6.750534929649605, "learning_rate": 5.607667300304563e-06, "loss": 17.8664, "step": 26117 }, { "epoch": 0.47741605279031935, "grad_norm": 7.515028893326799, "learning_rate": 5.607373481936126e-06, "loss": 17.9722, "step": 26118 }, { "epoch": 0.4774343319867659, "grad_norm": 6.097286987623301, "learning_rate": 5.607079661438897e-06, "loss": 17.3201, "step": 26119 }, { "epoch": 0.47745261118321236, "grad_norm": 5.582122826765157, "learning_rate": 5.606785838813907e-06, "loss": 17.1418, "step": 26120 }, { "epoch": 0.4774708903796589, "grad_norm": 5.719784680253939, "learning_rate": 5.6064920140621846e-06, "loss": 17.3713, "step": 26121 }, { "epoch": 0.47748916957610543, "grad_norm": 5.744226850913215, "learning_rate": 5.606198187184762e-06, "loss": 17.2014, "step": 26122 }, { "epoch": 0.47750744877255197, "grad_norm": 7.999450040902364, "learning_rate": 5.605904358182666e-06, "loss": 18.2115, "step": 26123 }, { "epoch": 0.4775257279689985, "grad_norm": 6.8892260325088435, "learning_rate": 5.605610527056927e-06, "loss": 18.0052, "step": 26124 }, { "epoch": 0.477544007165445, "grad_norm": 6.857778199194525, "learning_rate": 5.605316693808578e-06, "loss": 17.5765, "step": 26125 }, { "epoch": 0.4775622863618915, "grad_norm": 5.734045895742806, "learning_rate": 5.6050228584386456e-06, "loss": 17.4257, "step": 26126 }, { "epoch": 0.47758056555833805, "grad_norm": 6.357934785959785, "learning_rate": 5.604729020948158e-06, "loss": 17.6335, "step": 26127 }, { "epoch": 0.4775988447547846, "grad_norm": 7.308077097943614, "learning_rate": 5.604435181338151e-06, "loss": 17.7287, "step": 26128 }, { "epoch": 0.4776171239512311, "grad_norm": 8.951314214338932, "learning_rate": 5.6041413396096515e-06, "loss": 17.9758, "step": 26129 }, { "epoch": 0.4776354031476776, "grad_norm": 5.470556974764741, "learning_rate": 5.603847495763687e-06, "loss": 17.1528, "step": 26130 }, { "epoch": 0.47765368234412414, "grad_norm": 6.183881165989675, "learning_rate": 5.60355364980129e-06, "loss": 17.3334, "step": 26131 }, { "epoch": 0.4776719615405707, "grad_norm": 7.066583803623028, "learning_rate": 5.603259801723489e-06, "loss": 17.8592, "step": 26132 }, { "epoch": 0.4776902407370172, "grad_norm": 6.293945421294758, "learning_rate": 5.602965951531316e-06, "loss": 17.4171, "step": 26133 }, { "epoch": 0.47770851993346375, "grad_norm": 8.633406434286142, "learning_rate": 5.6026720992258e-06, "loss": 17.9421, "step": 26134 }, { "epoch": 0.47772679912991023, "grad_norm": 5.846218605556748, "learning_rate": 5.602378244807969e-06, "loss": 17.2044, "step": 26135 }, { "epoch": 0.47774507832635676, "grad_norm": 5.629178104367023, "learning_rate": 5.602084388278856e-06, "loss": 17.2794, "step": 26136 }, { "epoch": 0.4777633575228033, "grad_norm": 6.874864594145727, "learning_rate": 5.601790529639488e-06, "loss": 17.5492, "step": 26137 }, { "epoch": 0.47778163671924984, "grad_norm": 8.096302984276473, "learning_rate": 5.601496668890898e-06, "loss": 17.9595, "step": 26138 }, { "epoch": 0.47779991591569637, "grad_norm": 6.4229967012341795, "learning_rate": 5.601202806034114e-06, "loss": 17.5692, "step": 26139 }, { "epoch": 0.47781819511214285, "grad_norm": 7.275263196865297, "learning_rate": 5.600908941070167e-06, "loss": 17.7041, "step": 26140 }, { "epoch": 0.4778364743085894, "grad_norm": 6.078711589615375, "learning_rate": 5.6006150740000835e-06, "loss": 17.4678, "step": 26141 }, { "epoch": 0.4778547535050359, "grad_norm": 5.8877548028571685, "learning_rate": 5.600321204824899e-06, "loss": 17.2552, "step": 26142 }, { "epoch": 0.47787303270148246, "grad_norm": 6.159760499766907, "learning_rate": 5.60002733354564e-06, "loss": 17.4516, "step": 26143 }, { "epoch": 0.477891311897929, "grad_norm": 6.668883774809692, "learning_rate": 5.5997334601633365e-06, "loss": 17.5266, "step": 26144 }, { "epoch": 0.4779095910943755, "grad_norm": 6.346232467794673, "learning_rate": 5.5994395846790194e-06, "loss": 17.2999, "step": 26145 }, { "epoch": 0.477927870290822, "grad_norm": 6.279314627877693, "learning_rate": 5.599145707093719e-06, "loss": 17.4918, "step": 26146 }, { "epoch": 0.47794614948726855, "grad_norm": 6.055620613960243, "learning_rate": 5.598851827408466e-06, "loss": 17.2214, "step": 26147 }, { "epoch": 0.4779644286837151, "grad_norm": 8.800387241080244, "learning_rate": 5.598557945624288e-06, "loss": 17.9734, "step": 26148 }, { "epoch": 0.47798270788016156, "grad_norm": 5.879055359319588, "learning_rate": 5.598264061742217e-06, "loss": 17.4847, "step": 26149 }, { "epoch": 0.4780009870766081, "grad_norm": 7.840857060702316, "learning_rate": 5.597970175763281e-06, "loss": 17.9681, "step": 26150 }, { "epoch": 0.47801926627305463, "grad_norm": 7.390125956957079, "learning_rate": 5.5976762876885114e-06, "loss": 17.9516, "step": 26151 }, { "epoch": 0.47803754546950117, "grad_norm": 7.886241989784929, "learning_rate": 5.59738239751894e-06, "loss": 18.1274, "step": 26152 }, { "epoch": 0.4780558246659477, "grad_norm": 7.6358127734958305, "learning_rate": 5.597088505255596e-06, "loss": 18.33, "step": 26153 }, { "epoch": 0.4780741038623942, "grad_norm": 5.8727765713036115, "learning_rate": 5.596794610899507e-06, "loss": 17.2083, "step": 26154 }, { "epoch": 0.4780923830588407, "grad_norm": 5.719505322466552, "learning_rate": 5.596500714451703e-06, "loss": 17.1283, "step": 26155 }, { "epoch": 0.47811066225528726, "grad_norm": 5.249643386733936, "learning_rate": 5.596206815913217e-06, "loss": 17.0474, "step": 26156 }, { "epoch": 0.4781289414517338, "grad_norm": 5.9512499632323586, "learning_rate": 5.595912915285079e-06, "loss": 17.1633, "step": 26157 }, { "epoch": 0.4781472206481803, "grad_norm": 5.539861525542652, "learning_rate": 5.595619012568318e-06, "loss": 17.0582, "step": 26158 }, { "epoch": 0.4781654998446268, "grad_norm": 6.746955695472212, "learning_rate": 5.595325107763963e-06, "loss": 17.6486, "step": 26159 }, { "epoch": 0.47818377904107334, "grad_norm": 6.372113047626275, "learning_rate": 5.595031200873045e-06, "loss": 17.3573, "step": 26160 }, { "epoch": 0.4782020582375199, "grad_norm": 5.8737005821916535, "learning_rate": 5.594737291896594e-06, "loss": 17.3061, "step": 26161 }, { "epoch": 0.4782203374339664, "grad_norm": 6.239354686610514, "learning_rate": 5.594443380835642e-06, "loss": 17.6471, "step": 26162 }, { "epoch": 0.47823861663041295, "grad_norm": 6.4618147944618105, "learning_rate": 5.5941494676912165e-06, "loss": 17.5302, "step": 26163 }, { "epoch": 0.47825689582685943, "grad_norm": 6.8845883561397265, "learning_rate": 5.593855552464348e-06, "loss": 17.5376, "step": 26164 }, { "epoch": 0.47827517502330597, "grad_norm": 6.808300698691769, "learning_rate": 5.593561635156068e-06, "loss": 17.4821, "step": 26165 }, { "epoch": 0.4782934542197525, "grad_norm": 5.980524909712036, "learning_rate": 5.593267715767406e-06, "loss": 17.0913, "step": 26166 }, { "epoch": 0.47831173341619904, "grad_norm": 5.126226771227641, "learning_rate": 5.592973794299393e-06, "loss": 17.0242, "step": 26167 }, { "epoch": 0.47833001261264557, "grad_norm": 6.136243881740379, "learning_rate": 5.592679870753057e-06, "loss": 17.3082, "step": 26168 }, { "epoch": 0.47834829180909205, "grad_norm": 6.093848300755434, "learning_rate": 5.59238594512943e-06, "loss": 17.2224, "step": 26169 }, { "epoch": 0.4783665710055386, "grad_norm": 5.297080451154436, "learning_rate": 5.592092017429543e-06, "loss": 17.2881, "step": 26170 }, { "epoch": 0.4783848502019851, "grad_norm": 6.05727917693309, "learning_rate": 5.5917980876544235e-06, "loss": 17.2438, "step": 26171 }, { "epoch": 0.47840312939843166, "grad_norm": 7.863517824378537, "learning_rate": 5.591504155805103e-06, "loss": 18.2442, "step": 26172 }, { "epoch": 0.4784214085948782, "grad_norm": 8.058677058472782, "learning_rate": 5.591210221882611e-06, "loss": 17.993, "step": 26173 }, { "epoch": 0.4784396877913247, "grad_norm": 7.495887265653726, "learning_rate": 5.590916285887979e-06, "loss": 18.1109, "step": 26174 }, { "epoch": 0.4784579669877712, "grad_norm": 6.267948127689959, "learning_rate": 5.590622347822238e-06, "loss": 17.4014, "step": 26175 }, { "epoch": 0.47847624618421775, "grad_norm": 6.949861333830357, "learning_rate": 5.590328407686415e-06, "loss": 17.9126, "step": 26176 }, { "epoch": 0.4784945253806643, "grad_norm": 9.55325244105705, "learning_rate": 5.590034465481544e-06, "loss": 17.4078, "step": 26177 }, { "epoch": 0.4785128045771108, "grad_norm": 7.621456997317617, "learning_rate": 5.589740521208652e-06, "loss": 17.6216, "step": 26178 }, { "epoch": 0.4785310837735573, "grad_norm": 6.386817809825003, "learning_rate": 5.589446574868771e-06, "loss": 17.2495, "step": 26179 }, { "epoch": 0.47854936297000383, "grad_norm": 5.700295189066984, "learning_rate": 5.589152626462933e-06, "loss": 17.2569, "step": 26180 }, { "epoch": 0.47856764216645037, "grad_norm": 5.936096719381702, "learning_rate": 5.588858675992164e-06, "loss": 17.2734, "step": 26181 }, { "epoch": 0.4785859213628969, "grad_norm": 6.098904815202431, "learning_rate": 5.5885647234574946e-06, "loss": 17.3035, "step": 26182 }, { "epoch": 0.4786042005593434, "grad_norm": 7.148858141497193, "learning_rate": 5.588270768859959e-06, "loss": 17.6, "step": 26183 }, { "epoch": 0.4786224797557899, "grad_norm": 6.168080790795707, "learning_rate": 5.587976812200587e-06, "loss": 17.3124, "step": 26184 }, { "epoch": 0.47864075895223646, "grad_norm": 5.496299195925315, "learning_rate": 5.587682853480405e-06, "loss": 17.1208, "step": 26185 }, { "epoch": 0.478659038148683, "grad_norm": 6.916940082572477, "learning_rate": 5.587388892700446e-06, "loss": 17.675, "step": 26186 }, { "epoch": 0.4786773173451295, "grad_norm": 6.065550357992969, "learning_rate": 5.587094929861741e-06, "loss": 17.3024, "step": 26187 }, { "epoch": 0.478695596541576, "grad_norm": 5.804851435310755, "learning_rate": 5.586800964965318e-06, "loss": 17.29, "step": 26188 }, { "epoch": 0.47871387573802254, "grad_norm": 6.033602700131579, "learning_rate": 5.586506998012209e-06, "loss": 17.2692, "step": 26189 }, { "epoch": 0.4787321549344691, "grad_norm": 6.663829582698504, "learning_rate": 5.586213029003443e-06, "loss": 17.7129, "step": 26190 }, { "epoch": 0.4787504341309156, "grad_norm": 6.951364507725383, "learning_rate": 5.5859190579400526e-06, "loss": 17.7884, "step": 26191 }, { "epoch": 0.47876871332736215, "grad_norm": 9.055546153627796, "learning_rate": 5.585625084823066e-06, "loss": 18.6385, "step": 26192 }, { "epoch": 0.47878699252380863, "grad_norm": 6.9846995043625855, "learning_rate": 5.5853311096535145e-06, "loss": 17.5807, "step": 26193 }, { "epoch": 0.47880527172025517, "grad_norm": 6.732692477602404, "learning_rate": 5.585037132432429e-06, "loss": 17.4702, "step": 26194 }, { "epoch": 0.4788235509167017, "grad_norm": 5.727818276585686, "learning_rate": 5.584743153160837e-06, "loss": 17.4588, "step": 26195 }, { "epoch": 0.47884183011314824, "grad_norm": 8.451719454867723, "learning_rate": 5.584449171839772e-06, "loss": 18.1792, "step": 26196 }, { "epoch": 0.4788601093095948, "grad_norm": 6.900812623289093, "learning_rate": 5.584155188470264e-06, "loss": 17.6951, "step": 26197 }, { "epoch": 0.47887838850604125, "grad_norm": 6.976842411357199, "learning_rate": 5.583861203053344e-06, "loss": 17.7989, "step": 26198 }, { "epoch": 0.4788966677024878, "grad_norm": 5.910355358761967, "learning_rate": 5.583567215590039e-06, "loss": 17.2434, "step": 26199 }, { "epoch": 0.4789149468989343, "grad_norm": 6.175191662204462, "learning_rate": 5.583273226081381e-06, "loss": 17.4369, "step": 26200 }, { "epoch": 0.47893322609538086, "grad_norm": 6.404927060162966, "learning_rate": 5.5829792345284025e-06, "loss": 17.2073, "step": 26201 }, { "epoch": 0.4789515052918274, "grad_norm": 7.6688649647097415, "learning_rate": 5.5826852409321316e-06, "loss": 18.2065, "step": 26202 }, { "epoch": 0.4789697844882739, "grad_norm": 7.757099773467878, "learning_rate": 5.5823912452936e-06, "loss": 18.0641, "step": 26203 }, { "epoch": 0.4789880636847204, "grad_norm": 5.8128483224136795, "learning_rate": 5.582097247613838e-06, "loss": 17.2436, "step": 26204 }, { "epoch": 0.47900634288116695, "grad_norm": 6.405755405230303, "learning_rate": 5.581803247893876e-06, "loss": 17.6274, "step": 26205 }, { "epoch": 0.4790246220776135, "grad_norm": 8.570545696432772, "learning_rate": 5.581509246134742e-06, "loss": 18.6291, "step": 26206 }, { "epoch": 0.47904290127406, "grad_norm": 6.156591387375951, "learning_rate": 5.58121524233747e-06, "loss": 17.24, "step": 26207 }, { "epoch": 0.4790611804705065, "grad_norm": 6.623992225506808, "learning_rate": 5.58092123650309e-06, "loss": 17.5872, "step": 26208 }, { "epoch": 0.47907945966695303, "grad_norm": 5.216852011450072, "learning_rate": 5.580627228632629e-06, "loss": 16.8767, "step": 26209 }, { "epoch": 0.47909773886339957, "grad_norm": 7.510683482527872, "learning_rate": 5.580333218727121e-06, "loss": 17.9773, "step": 26210 }, { "epoch": 0.4791160180598461, "grad_norm": 5.75592348346135, "learning_rate": 5.580039206787597e-06, "loss": 17.1502, "step": 26211 }, { "epoch": 0.47913429725629264, "grad_norm": 5.907665321789651, "learning_rate": 5.579745192815085e-06, "loss": 17.0974, "step": 26212 }, { "epoch": 0.4791525764527391, "grad_norm": 6.292182669194808, "learning_rate": 5.579451176810615e-06, "loss": 17.2361, "step": 26213 }, { "epoch": 0.47917085564918566, "grad_norm": 7.204820704388497, "learning_rate": 5.5791571587752195e-06, "loss": 18.0221, "step": 26214 }, { "epoch": 0.4791891348456322, "grad_norm": 5.484157167412335, "learning_rate": 5.578863138709929e-06, "loss": 17.1521, "step": 26215 }, { "epoch": 0.4792074140420787, "grad_norm": 5.744855348578453, "learning_rate": 5.578569116615773e-06, "loss": 17.4221, "step": 26216 }, { "epoch": 0.4792256932385252, "grad_norm": 7.220814378367754, "learning_rate": 5.578275092493783e-06, "loss": 17.7878, "step": 26217 }, { "epoch": 0.47924397243497174, "grad_norm": 6.328796412840449, "learning_rate": 5.577981066344988e-06, "loss": 17.4949, "step": 26218 }, { "epoch": 0.4792622516314183, "grad_norm": 6.5617144161473515, "learning_rate": 5.577687038170421e-06, "loss": 17.2807, "step": 26219 }, { "epoch": 0.4792805308278648, "grad_norm": 6.495003736669981, "learning_rate": 5.5773930079711105e-06, "loss": 17.3757, "step": 26220 }, { "epoch": 0.47929881002431135, "grad_norm": 6.865996755807059, "learning_rate": 5.5770989757480865e-06, "loss": 17.6212, "step": 26221 }, { "epoch": 0.47931708922075783, "grad_norm": 6.020183196247202, "learning_rate": 5.576804941502382e-06, "loss": 17.2654, "step": 26222 }, { "epoch": 0.47933536841720437, "grad_norm": 6.099741301700175, "learning_rate": 5.576510905235025e-06, "loss": 17.3339, "step": 26223 }, { "epoch": 0.4793536476136509, "grad_norm": 5.4067937833161235, "learning_rate": 5.576216866947048e-06, "loss": 17.0633, "step": 26224 }, { "epoch": 0.47937192681009744, "grad_norm": 7.247404031681719, "learning_rate": 5.575922826639483e-06, "loss": 17.6743, "step": 26225 }, { "epoch": 0.479390206006544, "grad_norm": 6.605198334954851, "learning_rate": 5.575628784313356e-06, "loss": 17.6374, "step": 26226 }, { "epoch": 0.47940848520299045, "grad_norm": 7.6240696437546935, "learning_rate": 5.575334739969699e-06, "loss": 18.268, "step": 26227 }, { "epoch": 0.479426764399437, "grad_norm": 7.378431079986668, "learning_rate": 5.5750406936095445e-06, "loss": 17.9383, "step": 26228 }, { "epoch": 0.4794450435958835, "grad_norm": 5.394760685278859, "learning_rate": 5.574746645233924e-06, "loss": 17.1321, "step": 26229 }, { "epoch": 0.47946332279233006, "grad_norm": 5.730823542829997, "learning_rate": 5.574452594843865e-06, "loss": 17.4686, "step": 26230 }, { "epoch": 0.4794816019887766, "grad_norm": 6.081860419800946, "learning_rate": 5.5741585424404e-06, "loss": 17.7798, "step": 26231 }, { "epoch": 0.4794998811852231, "grad_norm": 6.530243472618639, "learning_rate": 5.57386448802456e-06, "loss": 17.6541, "step": 26232 }, { "epoch": 0.4795181603816696, "grad_norm": 6.6227555503740385, "learning_rate": 5.573570431597373e-06, "loss": 17.4864, "step": 26233 }, { "epoch": 0.47953643957811615, "grad_norm": 4.923635154528669, "learning_rate": 5.573276373159872e-06, "loss": 16.9674, "step": 26234 }, { "epoch": 0.4795547187745627, "grad_norm": 5.603616071046356, "learning_rate": 5.572982312713087e-06, "loss": 17.2972, "step": 26235 }, { "epoch": 0.4795729979710092, "grad_norm": 6.893350260820897, "learning_rate": 5.572688250258048e-06, "loss": 17.7171, "step": 26236 }, { "epoch": 0.4795912771674557, "grad_norm": 5.620651305897151, "learning_rate": 5.572394185795787e-06, "loss": 17.3509, "step": 26237 }, { "epoch": 0.47960955636390223, "grad_norm": 6.51119153220209, "learning_rate": 5.572100119327335e-06, "loss": 17.7156, "step": 26238 }, { "epoch": 0.47962783556034877, "grad_norm": 5.026318614404321, "learning_rate": 5.571806050853722e-06, "loss": 17.0207, "step": 26239 }, { "epoch": 0.4796461147567953, "grad_norm": 5.384751585291172, "learning_rate": 5.571511980375977e-06, "loss": 17.1038, "step": 26240 }, { "epoch": 0.47966439395324184, "grad_norm": 6.795563613774651, "learning_rate": 5.5712179078951325e-06, "loss": 17.2896, "step": 26241 }, { "epoch": 0.4796826731496883, "grad_norm": 6.604792047533484, "learning_rate": 5.5709238334122194e-06, "loss": 17.6891, "step": 26242 }, { "epoch": 0.47970095234613486, "grad_norm": 5.920415462727743, "learning_rate": 5.570629756928267e-06, "loss": 17.2459, "step": 26243 }, { "epoch": 0.4797192315425814, "grad_norm": 7.0716277174869555, "learning_rate": 5.570335678444308e-06, "loss": 17.948, "step": 26244 }, { "epoch": 0.47973751073902793, "grad_norm": 7.241241562813814, "learning_rate": 5.57004159796137e-06, "loss": 17.8781, "step": 26245 }, { "epoch": 0.47975578993547446, "grad_norm": 6.244955681893017, "learning_rate": 5.569747515480487e-06, "loss": 17.6744, "step": 26246 }, { "epoch": 0.47977406913192094, "grad_norm": 5.550364666379726, "learning_rate": 5.569453431002687e-06, "loss": 17.098, "step": 26247 }, { "epoch": 0.4797923483283675, "grad_norm": 6.5128953084245085, "learning_rate": 5.569159344529004e-06, "loss": 17.5295, "step": 26248 }, { "epoch": 0.479810627524814, "grad_norm": 6.53858048588471, "learning_rate": 5.568865256060466e-06, "loss": 17.5188, "step": 26249 }, { "epoch": 0.47982890672126055, "grad_norm": 5.346609906075889, "learning_rate": 5.568571165598104e-06, "loss": 17.0783, "step": 26250 }, { "epoch": 0.47984718591770703, "grad_norm": 7.49948746741147, "learning_rate": 5.56827707314295e-06, "loss": 18.0951, "step": 26251 }, { "epoch": 0.47986546511415357, "grad_norm": 9.32404418977323, "learning_rate": 5.567982978696035e-06, "loss": 19.117, "step": 26252 }, { "epoch": 0.4798837443106001, "grad_norm": 6.0266094390333045, "learning_rate": 5.5676888822583884e-06, "loss": 17.4078, "step": 26253 }, { "epoch": 0.47990202350704664, "grad_norm": 6.420340507834126, "learning_rate": 5.567394783831041e-06, "loss": 17.7002, "step": 26254 }, { "epoch": 0.4799203027034932, "grad_norm": 6.72038526407255, "learning_rate": 5.567100683415025e-06, "loss": 17.562, "step": 26255 }, { "epoch": 0.47993858189993965, "grad_norm": 6.125386102652748, "learning_rate": 5.56680658101137e-06, "loss": 17.4068, "step": 26256 }, { "epoch": 0.4799568610963862, "grad_norm": 7.086812921516251, "learning_rate": 5.566512476621106e-06, "loss": 17.5719, "step": 26257 }, { "epoch": 0.4799751402928327, "grad_norm": 5.5893929247853675, "learning_rate": 5.5662183702452665e-06, "loss": 17.3398, "step": 26258 }, { "epoch": 0.47999341948927926, "grad_norm": 5.584706944612587, "learning_rate": 5.5659242618848785e-06, "loss": 16.944, "step": 26259 }, { "epoch": 0.4800116986857258, "grad_norm": 5.61440570770011, "learning_rate": 5.565630151540978e-06, "loss": 17.2652, "step": 26260 }, { "epoch": 0.4800299778821723, "grad_norm": 7.016142058233312, "learning_rate": 5.5653360392145914e-06, "loss": 18.0383, "step": 26261 }, { "epoch": 0.4800482570786188, "grad_norm": 6.160932323324327, "learning_rate": 5.5650419249067514e-06, "loss": 17.2671, "step": 26262 }, { "epoch": 0.48006653627506535, "grad_norm": 8.763301565907854, "learning_rate": 5.564747808618488e-06, "loss": 18.2968, "step": 26263 }, { "epoch": 0.4800848154715119, "grad_norm": 5.798588579926526, "learning_rate": 5.564453690350833e-06, "loss": 17.0508, "step": 26264 }, { "epoch": 0.4801030946679584, "grad_norm": 5.1590241961317655, "learning_rate": 5.564159570104817e-06, "loss": 16.9717, "step": 26265 }, { "epoch": 0.4801213738644049, "grad_norm": 6.837219230012546, "learning_rate": 5.56386544788147e-06, "loss": 17.3461, "step": 26266 }, { "epoch": 0.48013965306085143, "grad_norm": 6.810932047800776, "learning_rate": 5.563571323681825e-06, "loss": 17.9143, "step": 26267 }, { "epoch": 0.48015793225729797, "grad_norm": 5.903412998805857, "learning_rate": 5.5632771975069085e-06, "loss": 17.2265, "step": 26268 }, { "epoch": 0.4801762114537445, "grad_norm": 7.445933863773643, "learning_rate": 5.562983069357757e-06, "loss": 17.8089, "step": 26269 }, { "epoch": 0.48019449065019104, "grad_norm": 5.859780627223239, "learning_rate": 5.562688939235398e-06, "loss": 17.1932, "step": 26270 }, { "epoch": 0.4802127698466375, "grad_norm": 7.763470278693377, "learning_rate": 5.562394807140863e-06, "loss": 17.9588, "step": 26271 }, { "epoch": 0.48023104904308406, "grad_norm": 7.187215510049689, "learning_rate": 5.5621006730751825e-06, "loss": 17.8856, "step": 26272 }, { "epoch": 0.4802493282395306, "grad_norm": 6.174061876007868, "learning_rate": 5.561806537039388e-06, "loss": 17.5122, "step": 26273 }, { "epoch": 0.48026760743597713, "grad_norm": 4.817893880664546, "learning_rate": 5.561512399034511e-06, "loss": 16.8486, "step": 26274 }, { "epoch": 0.48028588663242366, "grad_norm": 5.405630905271116, "learning_rate": 5.5612182590615815e-06, "loss": 17.0686, "step": 26275 }, { "epoch": 0.48030416582887014, "grad_norm": 5.733040557614441, "learning_rate": 5.56092411712163e-06, "loss": 17.444, "step": 26276 }, { "epoch": 0.4803224450253167, "grad_norm": 7.022229118587633, "learning_rate": 5.560629973215688e-06, "loss": 17.797, "step": 26277 }, { "epoch": 0.4803407242217632, "grad_norm": 7.0566013658705575, "learning_rate": 5.5603358273447886e-06, "loss": 17.8137, "step": 26278 }, { "epoch": 0.48035900341820975, "grad_norm": 6.799484733560736, "learning_rate": 5.560041679509959e-06, "loss": 17.6116, "step": 26279 }, { "epoch": 0.4803772826146563, "grad_norm": 4.995210153192633, "learning_rate": 5.559747529712234e-06, "loss": 16.7933, "step": 26280 }, { "epoch": 0.48039556181110277, "grad_norm": 11.211654346671791, "learning_rate": 5.559453377952641e-06, "loss": 17.997, "step": 26281 }, { "epoch": 0.4804138410075493, "grad_norm": 5.8848111032914066, "learning_rate": 5.55915922423221e-06, "loss": 17.2252, "step": 26282 }, { "epoch": 0.48043212020399584, "grad_norm": 5.2625305885375075, "learning_rate": 5.558865068551978e-06, "loss": 17.0976, "step": 26283 }, { "epoch": 0.4804503994004424, "grad_norm": 6.16618166177873, "learning_rate": 5.558570910912971e-06, "loss": 17.6266, "step": 26284 }, { "epoch": 0.48046867859688885, "grad_norm": 6.671207237377224, "learning_rate": 5.558276751316222e-06, "loss": 17.9212, "step": 26285 }, { "epoch": 0.4804869577933354, "grad_norm": 6.5691155807881945, "learning_rate": 5.55798258976276e-06, "loss": 17.658, "step": 26286 }, { "epoch": 0.4805052369897819, "grad_norm": 5.148630452808944, "learning_rate": 5.557688426253619e-06, "loss": 16.9711, "step": 26287 }, { "epoch": 0.48052351618622846, "grad_norm": 6.101104961735287, "learning_rate": 5.557394260789828e-06, "loss": 17.5516, "step": 26288 }, { "epoch": 0.480541795382675, "grad_norm": 6.030352061449858, "learning_rate": 5.557100093372418e-06, "loss": 17.1955, "step": 26289 }, { "epoch": 0.4805600745791215, "grad_norm": 6.568361226260854, "learning_rate": 5.556805924002421e-06, "loss": 17.5732, "step": 26290 }, { "epoch": 0.480578353775568, "grad_norm": 7.811923646920016, "learning_rate": 5.5565117526808675e-06, "loss": 18.1357, "step": 26291 }, { "epoch": 0.48059663297201455, "grad_norm": 6.420016928709315, "learning_rate": 5.556217579408789e-06, "loss": 17.7828, "step": 26292 }, { "epoch": 0.4806149121684611, "grad_norm": 7.340951696991889, "learning_rate": 5.555923404187216e-06, "loss": 18.1184, "step": 26293 }, { "epoch": 0.4806331913649076, "grad_norm": 6.170264613948992, "learning_rate": 5.5556292270171796e-06, "loss": 17.3478, "step": 26294 }, { "epoch": 0.4806514705613541, "grad_norm": 7.3686494293047, "learning_rate": 5.5553350478997105e-06, "loss": 18.1051, "step": 26295 }, { "epoch": 0.48066974975780064, "grad_norm": 6.20315733126409, "learning_rate": 5.55504086683584e-06, "loss": 17.4688, "step": 26296 }, { "epoch": 0.48068802895424717, "grad_norm": 6.279347818895089, "learning_rate": 5.5547466838265995e-06, "loss": 17.2298, "step": 26297 }, { "epoch": 0.4807063081506937, "grad_norm": 7.263587220888498, "learning_rate": 5.554452498873022e-06, "loss": 17.6596, "step": 26298 }, { "epoch": 0.48072458734714024, "grad_norm": 5.879391009640084, "learning_rate": 5.5541583119761345e-06, "loss": 17.3132, "step": 26299 }, { "epoch": 0.4807428665435867, "grad_norm": 5.67632039881883, "learning_rate": 5.55386412313697e-06, "loss": 17.0772, "step": 26300 }, { "epoch": 0.48076114574003326, "grad_norm": 5.4965637625749135, "learning_rate": 5.553569932356561e-06, "loss": 17.4225, "step": 26301 }, { "epoch": 0.4807794249364798, "grad_norm": 7.718166568010055, "learning_rate": 5.553275739635938e-06, "loss": 17.6912, "step": 26302 }, { "epoch": 0.48079770413292633, "grad_norm": 7.331301042963287, "learning_rate": 5.55298154497613e-06, "loss": 17.9998, "step": 26303 }, { "epoch": 0.48081598332937286, "grad_norm": 7.30181352093863, "learning_rate": 5.552687348378171e-06, "loss": 17.9985, "step": 26304 }, { "epoch": 0.48083426252581934, "grad_norm": 6.1782655559328425, "learning_rate": 5.552393149843089e-06, "loss": 17.6233, "step": 26305 }, { "epoch": 0.4808525417222659, "grad_norm": 6.840889730160085, "learning_rate": 5.552098949371918e-06, "loss": 17.8455, "step": 26306 }, { "epoch": 0.4808708209187124, "grad_norm": 6.504445684199956, "learning_rate": 5.551804746965689e-06, "loss": 17.5115, "step": 26307 }, { "epoch": 0.48088910011515895, "grad_norm": 6.329447353544942, "learning_rate": 5.551510542625433e-06, "loss": 17.6087, "step": 26308 }, { "epoch": 0.4809073793116055, "grad_norm": 5.539137613769063, "learning_rate": 5.551216336352176e-06, "loss": 17.0746, "step": 26309 }, { "epoch": 0.48092565850805197, "grad_norm": 6.315593296203523, "learning_rate": 5.550922128146957e-06, "loss": 17.6492, "step": 26310 }, { "epoch": 0.4809439377044985, "grad_norm": 6.286549505811321, "learning_rate": 5.550627918010804e-06, "loss": 17.3756, "step": 26311 }, { "epoch": 0.48096221690094504, "grad_norm": 7.438101299101056, "learning_rate": 5.550333705944747e-06, "loss": 17.9737, "step": 26312 }, { "epoch": 0.4809804960973916, "grad_norm": 5.016704469711039, "learning_rate": 5.550039491949818e-06, "loss": 16.9689, "step": 26313 }, { "epoch": 0.4809987752938381, "grad_norm": 5.832696566777145, "learning_rate": 5.549745276027047e-06, "loss": 17.6528, "step": 26314 }, { "epoch": 0.4810170544902846, "grad_norm": 5.482277567660579, "learning_rate": 5.54945105817747e-06, "loss": 16.9582, "step": 26315 }, { "epoch": 0.4810353336867311, "grad_norm": 6.803837274527475, "learning_rate": 5.5491568384021125e-06, "loss": 17.9034, "step": 26316 }, { "epoch": 0.48105361288317766, "grad_norm": 6.9583309770867885, "learning_rate": 5.548862616702008e-06, "loss": 17.8081, "step": 26317 }, { "epoch": 0.4810718920796242, "grad_norm": 8.33771885405574, "learning_rate": 5.548568393078188e-06, "loss": 18.432, "step": 26318 }, { "epoch": 0.4810901712760707, "grad_norm": 5.976827166786, "learning_rate": 5.548274167531682e-06, "loss": 17.3933, "step": 26319 }, { "epoch": 0.4811084504725172, "grad_norm": 7.0006852407067495, "learning_rate": 5.547979940063524e-06, "loss": 17.5429, "step": 26320 }, { "epoch": 0.48112672966896375, "grad_norm": 5.994212461893175, "learning_rate": 5.547685710674744e-06, "loss": 17.2477, "step": 26321 }, { "epoch": 0.4811450088654103, "grad_norm": 5.40182894286935, "learning_rate": 5.547391479366372e-06, "loss": 17.0529, "step": 26322 }, { "epoch": 0.4811632880618568, "grad_norm": 6.683537961834204, "learning_rate": 5.547097246139441e-06, "loss": 17.5788, "step": 26323 }, { "epoch": 0.4811815672583033, "grad_norm": 6.985561446856629, "learning_rate": 5.546803010994982e-06, "loss": 17.7265, "step": 26324 }, { "epoch": 0.48119984645474984, "grad_norm": 6.762442212134518, "learning_rate": 5.546508773934026e-06, "loss": 17.568, "step": 26325 }, { "epoch": 0.48121812565119637, "grad_norm": 6.166687649428789, "learning_rate": 5.5462145349576046e-06, "loss": 17.3994, "step": 26326 }, { "epoch": 0.4812364048476429, "grad_norm": 5.78866726007588, "learning_rate": 5.545920294066747e-06, "loss": 17.1952, "step": 26327 }, { "epoch": 0.48125468404408944, "grad_norm": 7.8089709936525855, "learning_rate": 5.545626051262486e-06, "loss": 17.8199, "step": 26328 }, { "epoch": 0.4812729632405359, "grad_norm": 5.730696785860018, "learning_rate": 5.545331806545855e-06, "loss": 17.2917, "step": 26329 }, { "epoch": 0.48129124243698246, "grad_norm": 6.010027176161514, "learning_rate": 5.545037559917883e-06, "loss": 17.3959, "step": 26330 }, { "epoch": 0.481309521633429, "grad_norm": 6.660744577614647, "learning_rate": 5.5447433113796e-06, "loss": 17.3296, "step": 26331 }, { "epoch": 0.48132780082987553, "grad_norm": 6.7999284135408775, "learning_rate": 5.54444906093204e-06, "loss": 17.5348, "step": 26332 }, { "epoch": 0.48134608002632207, "grad_norm": 6.717869793306183, "learning_rate": 5.544154808576235e-06, "loss": 17.8168, "step": 26333 }, { "epoch": 0.48136435922276855, "grad_norm": 7.7918479956045354, "learning_rate": 5.543860554313212e-06, "loss": 17.5697, "step": 26334 }, { "epoch": 0.4813826384192151, "grad_norm": 5.3385532128671915, "learning_rate": 5.543566298144005e-06, "loss": 17.0516, "step": 26335 }, { "epoch": 0.4814009176156616, "grad_norm": 5.446983480464271, "learning_rate": 5.543272040069646e-06, "loss": 17.1035, "step": 26336 }, { "epoch": 0.48141919681210815, "grad_norm": 7.361994189029218, "learning_rate": 5.542977780091166e-06, "loss": 17.4346, "step": 26337 }, { "epoch": 0.4814374760085547, "grad_norm": 5.831175749993607, "learning_rate": 5.542683518209596e-06, "loss": 17.2801, "step": 26338 }, { "epoch": 0.48145575520500117, "grad_norm": 5.320647097754033, "learning_rate": 5.5423892544259685e-06, "loss": 17.0893, "step": 26339 }, { "epoch": 0.4814740344014477, "grad_norm": 4.948055325539974, "learning_rate": 5.542094988741311e-06, "loss": 16.8273, "step": 26340 }, { "epoch": 0.48149231359789424, "grad_norm": 6.744933085245579, "learning_rate": 5.541800721156658e-06, "loss": 17.8702, "step": 26341 }, { "epoch": 0.4815105927943408, "grad_norm": 6.903769133505602, "learning_rate": 5.541506451673043e-06, "loss": 17.9442, "step": 26342 }, { "epoch": 0.4815288719907873, "grad_norm": 5.061867908918671, "learning_rate": 5.541212180291493e-06, "loss": 17.1038, "step": 26343 }, { "epoch": 0.4815471511872338, "grad_norm": 7.448512656050445, "learning_rate": 5.540917907013041e-06, "loss": 18.2131, "step": 26344 }, { "epoch": 0.4815654303836803, "grad_norm": 6.354973975214358, "learning_rate": 5.54062363183872e-06, "loss": 17.4036, "step": 26345 }, { "epoch": 0.48158370958012686, "grad_norm": 6.487779165859003, "learning_rate": 5.540329354769559e-06, "loss": 17.4813, "step": 26346 }, { "epoch": 0.4816019887765734, "grad_norm": 6.461643148634538, "learning_rate": 5.540035075806591e-06, "loss": 17.2318, "step": 26347 }, { "epoch": 0.48162026797301993, "grad_norm": 6.7665501526774685, "learning_rate": 5.539740794950846e-06, "loss": 17.7662, "step": 26348 }, { "epoch": 0.4816385471694664, "grad_norm": 7.327919960970281, "learning_rate": 5.539446512203358e-06, "loss": 17.7267, "step": 26349 }, { "epoch": 0.48165682636591295, "grad_norm": 7.199502002435817, "learning_rate": 5.5391522275651555e-06, "loss": 17.7265, "step": 26350 }, { "epoch": 0.4816751055623595, "grad_norm": 6.1694956664444325, "learning_rate": 5.538857941037272e-06, "loss": 17.4255, "step": 26351 }, { "epoch": 0.481693384758806, "grad_norm": 7.261349513143847, "learning_rate": 5.538563652620738e-06, "loss": 18.0629, "step": 26352 }, { "epoch": 0.4817116639552525, "grad_norm": 7.2562196600869715, "learning_rate": 5.538269362316585e-06, "loss": 17.7093, "step": 26353 }, { "epoch": 0.48172994315169904, "grad_norm": 7.815595500800447, "learning_rate": 5.537975070125844e-06, "loss": 18.2068, "step": 26354 }, { "epoch": 0.48174822234814557, "grad_norm": 8.912954167386891, "learning_rate": 5.537680776049547e-06, "loss": 17.9742, "step": 26355 }, { "epoch": 0.4817665015445921, "grad_norm": 5.379099899561392, "learning_rate": 5.537386480088728e-06, "loss": 17.3032, "step": 26356 }, { "epoch": 0.48178478074103864, "grad_norm": 5.833575098529954, "learning_rate": 5.537092182244414e-06, "loss": 17.1715, "step": 26357 }, { "epoch": 0.4818030599374851, "grad_norm": 5.944524551931755, "learning_rate": 5.536797882517639e-06, "loss": 17.4551, "step": 26358 }, { "epoch": 0.48182133913393166, "grad_norm": 4.406158916859859, "learning_rate": 5.5365035809094315e-06, "loss": 16.7808, "step": 26359 }, { "epoch": 0.4818396183303782, "grad_norm": 6.482350089546045, "learning_rate": 5.536209277420829e-06, "loss": 17.5317, "step": 26360 }, { "epoch": 0.48185789752682473, "grad_norm": 8.447406297336544, "learning_rate": 5.5359149720528586e-06, "loss": 17.653, "step": 26361 }, { "epoch": 0.48187617672327127, "grad_norm": 7.80788670441757, "learning_rate": 5.535620664806551e-06, "loss": 18.283, "step": 26362 }, { "epoch": 0.48189445591971775, "grad_norm": 5.911683566350858, "learning_rate": 5.535326355682942e-06, "loss": 17.5112, "step": 26363 }, { "epoch": 0.4819127351161643, "grad_norm": 6.646164979700029, "learning_rate": 5.5350320446830585e-06, "loss": 17.9519, "step": 26364 }, { "epoch": 0.4819310143126108, "grad_norm": 5.971546840920929, "learning_rate": 5.534737731807935e-06, "loss": 17.2452, "step": 26365 }, { "epoch": 0.48194929350905735, "grad_norm": 6.922578573049142, "learning_rate": 5.534443417058602e-06, "loss": 17.9595, "step": 26366 }, { "epoch": 0.4819675727055039, "grad_norm": 7.729067852830623, "learning_rate": 5.534149100436092e-06, "loss": 17.5947, "step": 26367 }, { "epoch": 0.48198585190195037, "grad_norm": 5.921488170399348, "learning_rate": 5.533854781941435e-06, "loss": 17.4843, "step": 26368 }, { "epoch": 0.4820041310983969, "grad_norm": 6.099905150420779, "learning_rate": 5.533560461575663e-06, "loss": 17.4936, "step": 26369 }, { "epoch": 0.48202241029484344, "grad_norm": 5.2078788116215, "learning_rate": 5.533266139339809e-06, "loss": 17.0594, "step": 26370 }, { "epoch": 0.48204068949129, "grad_norm": 5.344458387455696, "learning_rate": 5.5329718152349036e-06, "loss": 17.1222, "step": 26371 }, { "epoch": 0.4820589686877365, "grad_norm": 5.759422879294768, "learning_rate": 5.532677489261976e-06, "loss": 17.1989, "step": 26372 }, { "epoch": 0.482077247884183, "grad_norm": 6.27210077383447, "learning_rate": 5.532383161422061e-06, "loss": 17.4011, "step": 26373 }, { "epoch": 0.4820955270806295, "grad_norm": 6.975288594122639, "learning_rate": 5.532088831716191e-06, "loss": 17.5764, "step": 26374 }, { "epoch": 0.48211380627707606, "grad_norm": 7.2013189700389875, "learning_rate": 5.531794500145394e-06, "loss": 17.7035, "step": 26375 }, { "epoch": 0.4821320854735226, "grad_norm": 5.93673221939514, "learning_rate": 5.531500166710704e-06, "loss": 17.3782, "step": 26376 }, { "epoch": 0.48215036466996913, "grad_norm": 6.2951549386612635, "learning_rate": 5.5312058314131515e-06, "loss": 17.537, "step": 26377 }, { "epoch": 0.4821686438664156, "grad_norm": 5.095930937214379, "learning_rate": 5.530911494253769e-06, "loss": 17.0357, "step": 26378 }, { "epoch": 0.48218692306286215, "grad_norm": 6.595069307076094, "learning_rate": 5.530617155233588e-06, "loss": 17.6362, "step": 26379 }, { "epoch": 0.4822052022593087, "grad_norm": 5.227657714040226, "learning_rate": 5.530322814353641e-06, "loss": 16.9308, "step": 26380 }, { "epoch": 0.4822234814557552, "grad_norm": 5.635523305027515, "learning_rate": 5.530028471614955e-06, "loss": 17.263, "step": 26381 }, { "epoch": 0.48224176065220176, "grad_norm": 5.44696137512229, "learning_rate": 5.529734127018568e-06, "loss": 17.0705, "step": 26382 }, { "epoch": 0.48226003984864824, "grad_norm": 5.997484849082562, "learning_rate": 5.529439780565509e-06, "loss": 17.4066, "step": 26383 }, { "epoch": 0.48227831904509477, "grad_norm": 6.628561715389882, "learning_rate": 5.529145432256809e-06, "loss": 17.7799, "step": 26384 }, { "epoch": 0.4822965982415413, "grad_norm": 6.430481647033819, "learning_rate": 5.5288510820935005e-06, "loss": 17.3636, "step": 26385 }, { "epoch": 0.48231487743798784, "grad_norm": 7.03515482701866, "learning_rate": 5.528556730076613e-06, "loss": 17.7708, "step": 26386 }, { "epoch": 0.4823331566344343, "grad_norm": 6.0235610521984135, "learning_rate": 5.528262376207182e-06, "loss": 17.2265, "step": 26387 }, { "epoch": 0.48235143583088086, "grad_norm": 6.569044577328092, "learning_rate": 5.527968020486237e-06, "loss": 17.2592, "step": 26388 }, { "epoch": 0.4823697150273274, "grad_norm": 6.5548474387297855, "learning_rate": 5.52767366291481e-06, "loss": 17.3231, "step": 26389 }, { "epoch": 0.48238799422377393, "grad_norm": 6.309354915714875, "learning_rate": 5.527379303493932e-06, "loss": 17.197, "step": 26390 }, { "epoch": 0.48240627342022047, "grad_norm": 6.195226704317913, "learning_rate": 5.527084942224635e-06, "loss": 17.8131, "step": 26391 }, { "epoch": 0.48242455261666695, "grad_norm": 5.912727609028651, "learning_rate": 5.526790579107951e-06, "loss": 17.1175, "step": 26392 }, { "epoch": 0.4824428318131135, "grad_norm": 7.107358197025265, "learning_rate": 5.526496214144912e-06, "loss": 17.9344, "step": 26393 }, { "epoch": 0.48246111100956, "grad_norm": 7.684102386305336, "learning_rate": 5.526201847336551e-06, "loss": 17.9892, "step": 26394 }, { "epoch": 0.48247939020600655, "grad_norm": 6.474374355161299, "learning_rate": 5.525907478683895e-06, "loss": 17.4138, "step": 26395 }, { "epoch": 0.4824976694024531, "grad_norm": 5.991150146906878, "learning_rate": 5.525613108187982e-06, "loss": 17.1133, "step": 26396 }, { "epoch": 0.48251594859889957, "grad_norm": 7.153046771873008, "learning_rate": 5.5253187358498385e-06, "loss": 17.7382, "step": 26397 }, { "epoch": 0.4825342277953461, "grad_norm": 5.927026542319743, "learning_rate": 5.5250243616705005e-06, "loss": 17.0766, "step": 26398 }, { "epoch": 0.48255250699179264, "grad_norm": 6.171271561810203, "learning_rate": 5.524729985650996e-06, "loss": 17.6482, "step": 26399 }, { "epoch": 0.4825707861882392, "grad_norm": 6.893844430349866, "learning_rate": 5.524435607792358e-06, "loss": 17.6515, "step": 26400 }, { "epoch": 0.4825890653846857, "grad_norm": 7.439381565053576, "learning_rate": 5.524141228095621e-06, "loss": 17.8956, "step": 26401 }, { "epoch": 0.4826073445811322, "grad_norm": 7.373664106598826, "learning_rate": 5.5238468465618135e-06, "loss": 17.9442, "step": 26402 }, { "epoch": 0.4826256237775787, "grad_norm": 6.9939627799868695, "learning_rate": 5.5235524631919664e-06, "loss": 17.7618, "step": 26403 }, { "epoch": 0.48264390297402526, "grad_norm": 6.217449746692742, "learning_rate": 5.523258077987116e-06, "loss": 17.2366, "step": 26404 }, { "epoch": 0.4826621821704718, "grad_norm": 6.475407706996417, "learning_rate": 5.5229636909482895e-06, "loss": 17.453, "step": 26405 }, { "epoch": 0.48268046136691833, "grad_norm": 6.771248412325351, "learning_rate": 5.522669302076522e-06, "loss": 17.6682, "step": 26406 }, { "epoch": 0.4826987405633648, "grad_norm": 7.932638380024133, "learning_rate": 5.522374911372843e-06, "loss": 17.7972, "step": 26407 }, { "epoch": 0.48271701975981135, "grad_norm": 5.398758511775816, "learning_rate": 5.522080518838286e-06, "loss": 17.0619, "step": 26408 }, { "epoch": 0.4827352989562579, "grad_norm": 7.134467064725874, "learning_rate": 5.521786124473881e-06, "loss": 17.676, "step": 26409 }, { "epoch": 0.4827535781527044, "grad_norm": 5.467138954842728, "learning_rate": 5.521491728280661e-06, "loss": 17.079, "step": 26410 }, { "epoch": 0.48277185734915096, "grad_norm": 7.448593383359493, "learning_rate": 5.52119733025966e-06, "loss": 17.7377, "step": 26411 }, { "epoch": 0.48279013654559744, "grad_norm": 6.706712332785757, "learning_rate": 5.5209029304119055e-06, "loss": 17.542, "step": 26412 }, { "epoch": 0.482808415742044, "grad_norm": 8.079782595318413, "learning_rate": 5.520608528738431e-06, "loss": 18.13, "step": 26413 }, { "epoch": 0.4828266949384905, "grad_norm": 6.180117578392903, "learning_rate": 5.520314125240269e-06, "loss": 17.2135, "step": 26414 }, { "epoch": 0.48284497413493704, "grad_norm": 6.413793734003815, "learning_rate": 5.520019719918454e-06, "loss": 17.4822, "step": 26415 }, { "epoch": 0.4828632533313836, "grad_norm": 5.416052383810696, "learning_rate": 5.519725312774012e-06, "loss": 16.9619, "step": 26416 }, { "epoch": 0.48288153252783006, "grad_norm": 8.515404032131901, "learning_rate": 5.519430903807979e-06, "loss": 18.2006, "step": 26417 }, { "epoch": 0.4828998117242766, "grad_norm": 6.718324835533906, "learning_rate": 5.519136493021385e-06, "loss": 17.7451, "step": 26418 }, { "epoch": 0.48291809092072313, "grad_norm": 7.557157321472496, "learning_rate": 5.518842080415263e-06, "loss": 18.0379, "step": 26419 }, { "epoch": 0.48293637011716967, "grad_norm": 5.491356103339672, "learning_rate": 5.518547665990644e-06, "loss": 16.8837, "step": 26420 }, { "epoch": 0.48295464931361615, "grad_norm": 6.039783839028251, "learning_rate": 5.518253249748562e-06, "loss": 17.4438, "step": 26421 }, { "epoch": 0.4829729285100627, "grad_norm": 7.234507615296453, "learning_rate": 5.517958831690047e-06, "loss": 17.5323, "step": 26422 }, { "epoch": 0.4829912077065092, "grad_norm": 7.147886885993875, "learning_rate": 5.517664411816129e-06, "loss": 17.9488, "step": 26423 }, { "epoch": 0.48300948690295575, "grad_norm": 6.7792975085443254, "learning_rate": 5.517369990127844e-06, "loss": 17.9162, "step": 26424 }, { "epoch": 0.4830277660994023, "grad_norm": 6.366299688558848, "learning_rate": 5.517075566626223e-06, "loss": 17.3968, "step": 26425 }, { "epoch": 0.48304604529584877, "grad_norm": 6.671544908143495, "learning_rate": 5.516781141312296e-06, "loss": 17.6329, "step": 26426 }, { "epoch": 0.4830643244922953, "grad_norm": 6.147532588853179, "learning_rate": 5.516486714187095e-06, "loss": 17.3035, "step": 26427 }, { "epoch": 0.48308260368874184, "grad_norm": 6.5213750406529005, "learning_rate": 5.516192285251654e-06, "loss": 17.7173, "step": 26428 }, { "epoch": 0.4831008828851884, "grad_norm": 6.299153225964926, "learning_rate": 5.515897854507004e-06, "loss": 17.5168, "step": 26429 }, { "epoch": 0.4831191620816349, "grad_norm": 6.884270250099057, "learning_rate": 5.5156034219541765e-06, "loss": 17.6303, "step": 26430 }, { "epoch": 0.4831374412780814, "grad_norm": 6.791941452825686, "learning_rate": 5.515308987594204e-06, "loss": 17.6293, "step": 26431 }, { "epoch": 0.4831557204745279, "grad_norm": 5.425015410045171, "learning_rate": 5.515014551428117e-06, "loss": 16.9689, "step": 26432 }, { "epoch": 0.48317399967097446, "grad_norm": 6.327286698870973, "learning_rate": 5.514720113456949e-06, "loss": 17.4335, "step": 26433 }, { "epoch": 0.483192278867421, "grad_norm": 5.751877844138184, "learning_rate": 5.514425673681732e-06, "loss": 17.1089, "step": 26434 }, { "epoch": 0.48321055806386753, "grad_norm": 7.826855071819306, "learning_rate": 5.514131232103498e-06, "loss": 17.887, "step": 26435 }, { "epoch": 0.483228837260314, "grad_norm": 5.554974774813623, "learning_rate": 5.513836788723279e-06, "loss": 17.195, "step": 26436 }, { "epoch": 0.48324711645676055, "grad_norm": 6.460077041217687, "learning_rate": 5.513542343542105e-06, "loss": 17.619, "step": 26437 }, { "epoch": 0.4832653956532071, "grad_norm": 6.559002295310229, "learning_rate": 5.513247896561011e-06, "loss": 17.7913, "step": 26438 }, { "epoch": 0.4832836748496536, "grad_norm": 6.33195986367585, "learning_rate": 5.5129534477810285e-06, "loss": 17.4724, "step": 26439 }, { "epoch": 0.48330195404610016, "grad_norm": 5.912952227299503, "learning_rate": 5.512658997203187e-06, "loss": 17.3697, "step": 26440 }, { "epoch": 0.48332023324254664, "grad_norm": 5.983003719290201, "learning_rate": 5.51236454482852e-06, "loss": 17.4935, "step": 26441 }, { "epoch": 0.4833385124389932, "grad_norm": 6.084286934451618, "learning_rate": 5.5120700906580614e-06, "loss": 17.2255, "step": 26442 }, { "epoch": 0.4833567916354397, "grad_norm": 5.918171952761315, "learning_rate": 5.5117756346928406e-06, "loss": 17.3408, "step": 26443 }, { "epoch": 0.48337507083188624, "grad_norm": 7.875499809776562, "learning_rate": 5.51148117693389e-06, "loss": 17.8208, "step": 26444 }, { "epoch": 0.4833933500283328, "grad_norm": 5.9383213103735315, "learning_rate": 5.511186717382244e-06, "loss": 17.3032, "step": 26445 }, { "epoch": 0.48341162922477926, "grad_norm": 9.484592910129143, "learning_rate": 5.510892256038932e-06, "loss": 18.4924, "step": 26446 }, { "epoch": 0.4834299084212258, "grad_norm": 6.985124629907305, "learning_rate": 5.510597792904987e-06, "loss": 17.5429, "step": 26447 }, { "epoch": 0.48344818761767233, "grad_norm": 5.5846440055655755, "learning_rate": 5.51030332798144e-06, "loss": 17.1491, "step": 26448 }, { "epoch": 0.48346646681411887, "grad_norm": 5.508266345734633, "learning_rate": 5.510008861269325e-06, "loss": 17.0911, "step": 26449 }, { "epoch": 0.4834847460105654, "grad_norm": 6.573095767583956, "learning_rate": 5.509714392769674e-06, "loss": 17.2324, "step": 26450 }, { "epoch": 0.4835030252070119, "grad_norm": 7.010458442249363, "learning_rate": 5.509419922483516e-06, "loss": 17.8, "step": 26451 }, { "epoch": 0.4835213044034584, "grad_norm": 6.951120102039227, "learning_rate": 5.509125450411888e-06, "loss": 17.402, "step": 26452 }, { "epoch": 0.48353958359990495, "grad_norm": 6.355362812699377, "learning_rate": 5.508830976555819e-06, "loss": 17.0563, "step": 26453 }, { "epoch": 0.4835578627963515, "grad_norm": 6.308735858351048, "learning_rate": 5.5085365009163394e-06, "loss": 17.3058, "step": 26454 }, { "epoch": 0.48357614199279797, "grad_norm": 6.79619632781194, "learning_rate": 5.508242023494486e-06, "loss": 17.7997, "step": 26455 }, { "epoch": 0.4835944211892445, "grad_norm": 6.67685624025841, "learning_rate": 5.507947544291288e-06, "loss": 17.1824, "step": 26456 }, { "epoch": 0.48361270038569104, "grad_norm": 7.292173648165463, "learning_rate": 5.507653063307777e-06, "loss": 17.9333, "step": 26457 }, { "epoch": 0.4836309795821376, "grad_norm": 6.360337230182274, "learning_rate": 5.507358580544986e-06, "loss": 17.4501, "step": 26458 }, { "epoch": 0.4836492587785841, "grad_norm": 8.908950701616417, "learning_rate": 5.507064096003947e-06, "loss": 18.7293, "step": 26459 }, { "epoch": 0.4836675379750306, "grad_norm": 6.081792855704345, "learning_rate": 5.506769609685694e-06, "loss": 17.4807, "step": 26460 }, { "epoch": 0.48368581717147713, "grad_norm": 5.310649674090544, "learning_rate": 5.506475121591256e-06, "loss": 16.9886, "step": 26461 }, { "epoch": 0.48370409636792366, "grad_norm": 6.463605555093732, "learning_rate": 5.5061806317216675e-06, "loss": 17.5928, "step": 26462 }, { "epoch": 0.4837223755643702, "grad_norm": 6.645141703076254, "learning_rate": 5.505886140077959e-06, "loss": 17.6156, "step": 26463 }, { "epoch": 0.48374065476081674, "grad_norm": 7.919740673887846, "learning_rate": 5.505591646661163e-06, "loss": 17.6663, "step": 26464 }, { "epoch": 0.4837589339572632, "grad_norm": 6.380795144980014, "learning_rate": 5.505297151472314e-06, "loss": 17.497, "step": 26465 }, { "epoch": 0.48377721315370975, "grad_norm": 5.354570423059798, "learning_rate": 5.505002654512442e-06, "loss": 17.2725, "step": 26466 }, { "epoch": 0.4837954923501563, "grad_norm": 7.920450163534305, "learning_rate": 5.504708155782579e-06, "loss": 18.361, "step": 26467 }, { "epoch": 0.4838137715466028, "grad_norm": 6.797506938434176, "learning_rate": 5.504413655283757e-06, "loss": 17.4863, "step": 26468 }, { "epoch": 0.48383205074304936, "grad_norm": 6.1863490212084455, "learning_rate": 5.504119153017009e-06, "loss": 17.4748, "step": 26469 }, { "epoch": 0.48385032993949584, "grad_norm": 9.94859428382597, "learning_rate": 5.503824648983369e-06, "loss": 18.8714, "step": 26470 }, { "epoch": 0.4838686091359424, "grad_norm": 5.984985549829618, "learning_rate": 5.503530143183865e-06, "loss": 17.3481, "step": 26471 }, { "epoch": 0.4838868883323889, "grad_norm": 7.570580355804501, "learning_rate": 5.5032356356195325e-06, "loss": 18.2996, "step": 26472 }, { "epoch": 0.48390516752883544, "grad_norm": 8.095710540796654, "learning_rate": 5.502941126291402e-06, "loss": 17.7676, "step": 26473 }, { "epoch": 0.483923446725282, "grad_norm": 6.442904414187398, "learning_rate": 5.502646615200509e-06, "loss": 17.41, "step": 26474 }, { "epoch": 0.48394172592172846, "grad_norm": 5.8999603205240385, "learning_rate": 5.502352102347881e-06, "loss": 17.0339, "step": 26475 }, { "epoch": 0.483960005118175, "grad_norm": 7.594679173460728, "learning_rate": 5.502057587734553e-06, "loss": 17.767, "step": 26476 }, { "epoch": 0.48397828431462153, "grad_norm": 6.339421754426708, "learning_rate": 5.501763071361557e-06, "loss": 17.576, "step": 26477 }, { "epoch": 0.48399656351106807, "grad_norm": 5.493731792588541, "learning_rate": 5.501468553229924e-06, "loss": 17.3342, "step": 26478 }, { "epoch": 0.4840148427075146, "grad_norm": 5.890005557295245, "learning_rate": 5.501174033340687e-06, "loss": 17.1531, "step": 26479 }, { "epoch": 0.4840331219039611, "grad_norm": 8.516081426871658, "learning_rate": 5.500879511694881e-06, "loss": 18.3053, "step": 26480 }, { "epoch": 0.4840514011004076, "grad_norm": 6.46748852795383, "learning_rate": 5.500584988293534e-06, "loss": 17.4105, "step": 26481 }, { "epoch": 0.48406968029685415, "grad_norm": 6.58473814722474, "learning_rate": 5.50029046313768e-06, "loss": 17.5878, "step": 26482 }, { "epoch": 0.4840879594933007, "grad_norm": 5.820451167813538, "learning_rate": 5.49999593622835e-06, "loss": 17.3362, "step": 26483 }, { "epoch": 0.4841062386897472, "grad_norm": 6.398707813036087, "learning_rate": 5.499701407566581e-06, "loss": 17.45, "step": 26484 }, { "epoch": 0.4841245178861937, "grad_norm": 6.657717437355664, "learning_rate": 5.499406877153401e-06, "loss": 17.7112, "step": 26485 }, { "epoch": 0.48414279708264024, "grad_norm": 5.654215096790829, "learning_rate": 5.49911234498984e-06, "loss": 17.0652, "step": 26486 }, { "epoch": 0.4841610762790868, "grad_norm": 6.9519255804161, "learning_rate": 5.498817811076938e-06, "loss": 17.8553, "step": 26487 }, { "epoch": 0.4841793554755333, "grad_norm": 6.242247399787906, "learning_rate": 5.49852327541572e-06, "loss": 17.5432, "step": 26488 }, { "epoch": 0.4841976346719798, "grad_norm": 6.713974957489091, "learning_rate": 5.498228738007222e-06, "loss": 17.3966, "step": 26489 }, { "epoch": 0.48421591386842633, "grad_norm": 6.439268771521275, "learning_rate": 5.497934198852475e-06, "loss": 17.7379, "step": 26490 }, { "epoch": 0.48423419306487286, "grad_norm": 6.713643240167457, "learning_rate": 5.497639657952513e-06, "loss": 17.6121, "step": 26491 }, { "epoch": 0.4842524722613194, "grad_norm": 6.863243618818315, "learning_rate": 5.497345115308366e-06, "loss": 17.7486, "step": 26492 }, { "epoch": 0.48427075145776594, "grad_norm": 6.463231846157784, "learning_rate": 5.497050570921067e-06, "loss": 17.3952, "step": 26493 }, { "epoch": 0.4842890306542124, "grad_norm": 7.895602202068241, "learning_rate": 5.4967560247916516e-06, "loss": 17.902, "step": 26494 }, { "epoch": 0.48430730985065895, "grad_norm": 7.699725467897229, "learning_rate": 5.496461476921147e-06, "loss": 18.0572, "step": 26495 }, { "epoch": 0.4843255890471055, "grad_norm": 6.638513230121819, "learning_rate": 5.4961669273105875e-06, "loss": 17.6035, "step": 26496 }, { "epoch": 0.484343868243552, "grad_norm": 6.281047227787639, "learning_rate": 5.495872375961008e-06, "loss": 16.9826, "step": 26497 }, { "epoch": 0.48436214743999856, "grad_norm": 6.55216091310133, "learning_rate": 5.495577822873439e-06, "loss": 17.9349, "step": 26498 }, { "epoch": 0.48438042663644504, "grad_norm": 5.6852815409141435, "learning_rate": 5.495283268048912e-06, "loss": 16.9378, "step": 26499 }, { "epoch": 0.4843987058328916, "grad_norm": 7.084329263200316, "learning_rate": 5.494988711488458e-06, "loss": 17.6849, "step": 26500 }, { "epoch": 0.4844169850293381, "grad_norm": 4.411131277470196, "learning_rate": 5.4946941531931146e-06, "loss": 16.6473, "step": 26501 }, { "epoch": 0.48443526422578465, "grad_norm": 6.360299954623239, "learning_rate": 5.49439959316391e-06, "loss": 17.5655, "step": 26502 }, { "epoch": 0.4844535434222312, "grad_norm": 7.005715812781231, "learning_rate": 5.494105031401877e-06, "loss": 17.8182, "step": 26503 }, { "epoch": 0.48447182261867766, "grad_norm": 7.065328420310809, "learning_rate": 5.49381046790805e-06, "loss": 17.8284, "step": 26504 }, { "epoch": 0.4844901018151242, "grad_norm": 7.66959744448296, "learning_rate": 5.493515902683459e-06, "loss": 18.0271, "step": 26505 }, { "epoch": 0.48450838101157073, "grad_norm": 6.852720664283129, "learning_rate": 5.493221335729139e-06, "loss": 17.7273, "step": 26506 }, { "epoch": 0.48452666020801727, "grad_norm": 6.1968679770201005, "learning_rate": 5.49292676704612e-06, "loss": 17.4665, "step": 26507 }, { "epoch": 0.4845449394044638, "grad_norm": 6.000632562630498, "learning_rate": 5.492632196635436e-06, "loss": 17.4382, "step": 26508 }, { "epoch": 0.4845632186009103, "grad_norm": 6.07520775388825, "learning_rate": 5.492337624498117e-06, "loss": 17.476, "step": 26509 }, { "epoch": 0.4845814977973568, "grad_norm": 6.671538107506655, "learning_rate": 5.4920430506351995e-06, "loss": 17.4663, "step": 26510 }, { "epoch": 0.48459977699380336, "grad_norm": 5.957206393544445, "learning_rate": 5.491748475047714e-06, "loss": 17.3816, "step": 26511 }, { "epoch": 0.4846180561902499, "grad_norm": 5.550194755608161, "learning_rate": 5.491453897736692e-06, "loss": 17.0009, "step": 26512 }, { "epoch": 0.4846363353866964, "grad_norm": 6.1428089729205695, "learning_rate": 5.491159318703165e-06, "loss": 17.1129, "step": 26513 }, { "epoch": 0.4846546145831429, "grad_norm": 6.285517326065865, "learning_rate": 5.490864737948169e-06, "loss": 17.4101, "step": 26514 }, { "epoch": 0.48467289377958944, "grad_norm": 6.65362113295435, "learning_rate": 5.4905701554727365e-06, "loss": 17.5512, "step": 26515 }, { "epoch": 0.484691172976036, "grad_norm": 6.620467063218983, "learning_rate": 5.490275571277896e-06, "loss": 17.8127, "step": 26516 }, { "epoch": 0.4847094521724825, "grad_norm": 5.786860733968187, "learning_rate": 5.489980985364682e-06, "loss": 17.1823, "step": 26517 }, { "epoch": 0.48472773136892905, "grad_norm": 6.009540133094905, "learning_rate": 5.4896863977341275e-06, "loss": 17.1732, "step": 26518 }, { "epoch": 0.48474601056537553, "grad_norm": 5.942866457705427, "learning_rate": 5.489391808387265e-06, "loss": 17.2876, "step": 26519 }, { "epoch": 0.48476428976182206, "grad_norm": 6.147859989217325, "learning_rate": 5.489097217325127e-06, "loss": 17.4091, "step": 26520 }, { "epoch": 0.4847825689582686, "grad_norm": 5.839743379545554, "learning_rate": 5.4888026245487444e-06, "loss": 17.1816, "step": 26521 }, { "epoch": 0.48480084815471514, "grad_norm": 6.987820665477988, "learning_rate": 5.488508030059152e-06, "loss": 17.4251, "step": 26522 }, { "epoch": 0.4848191273511616, "grad_norm": 6.279484115563279, "learning_rate": 5.488213433857381e-06, "loss": 17.4336, "step": 26523 }, { "epoch": 0.48483740654760815, "grad_norm": 7.064761281317023, "learning_rate": 5.487918835944465e-06, "loss": 17.8791, "step": 26524 }, { "epoch": 0.4848556857440547, "grad_norm": 8.05195396523413, "learning_rate": 5.487624236321435e-06, "loss": 17.53, "step": 26525 }, { "epoch": 0.4848739649405012, "grad_norm": 7.037036320561439, "learning_rate": 5.487329634989325e-06, "loss": 17.7999, "step": 26526 }, { "epoch": 0.48489224413694776, "grad_norm": 5.150846998602031, "learning_rate": 5.487035031949165e-06, "loss": 16.9384, "step": 26527 }, { "epoch": 0.48491052333339424, "grad_norm": 5.853581615381309, "learning_rate": 5.486740427201991e-06, "loss": 17.1325, "step": 26528 }, { "epoch": 0.4849288025298408, "grad_norm": 7.924993206481882, "learning_rate": 5.486445820748835e-06, "loss": 17.6764, "step": 26529 }, { "epoch": 0.4849470817262873, "grad_norm": 7.8504844587924, "learning_rate": 5.486151212590728e-06, "loss": 18.0291, "step": 26530 }, { "epoch": 0.48496536092273385, "grad_norm": 5.809219210938385, "learning_rate": 5.485856602728702e-06, "loss": 17.1463, "step": 26531 }, { "epoch": 0.4849836401191804, "grad_norm": 8.660822147117912, "learning_rate": 5.485561991163791e-06, "loss": 18.2508, "step": 26532 }, { "epoch": 0.48500191931562686, "grad_norm": 6.271816895138902, "learning_rate": 5.485267377897029e-06, "loss": 17.5188, "step": 26533 }, { "epoch": 0.4850201985120734, "grad_norm": 7.230829243055291, "learning_rate": 5.484972762929446e-06, "loss": 17.5166, "step": 26534 }, { "epoch": 0.48503847770851993, "grad_norm": 5.104340002457307, "learning_rate": 5.484678146262075e-06, "loss": 17.0811, "step": 26535 }, { "epoch": 0.48505675690496647, "grad_norm": 6.016658967529224, "learning_rate": 5.484383527895949e-06, "loss": 17.5314, "step": 26536 }, { "epoch": 0.485075036101413, "grad_norm": 6.387333265667369, "learning_rate": 5.484088907832102e-06, "loss": 17.7089, "step": 26537 }, { "epoch": 0.4850933152978595, "grad_norm": 7.233007439047168, "learning_rate": 5.483794286071565e-06, "loss": 17.8259, "step": 26538 }, { "epoch": 0.485111594494306, "grad_norm": 6.07590553003209, "learning_rate": 5.483499662615371e-06, "loss": 17.429, "step": 26539 }, { "epoch": 0.48512987369075256, "grad_norm": 6.550549145304181, "learning_rate": 5.483205037464552e-06, "loss": 17.4891, "step": 26540 }, { "epoch": 0.4851481528871991, "grad_norm": 5.359384545009174, "learning_rate": 5.4829104106201415e-06, "loss": 17.0303, "step": 26541 }, { "epoch": 0.4851664320836456, "grad_norm": 5.337846074159136, "learning_rate": 5.4826157820831715e-06, "loss": 17.0801, "step": 26542 }, { "epoch": 0.4851847112800921, "grad_norm": 6.631213815437908, "learning_rate": 5.482321151854677e-06, "loss": 17.6556, "step": 26543 }, { "epoch": 0.48520299047653864, "grad_norm": 7.722108684275236, "learning_rate": 5.482026519935687e-06, "loss": 17.1154, "step": 26544 }, { "epoch": 0.4852212696729852, "grad_norm": 6.857963631684011, "learning_rate": 5.481731886327235e-06, "loss": 17.8986, "step": 26545 }, { "epoch": 0.4852395488694317, "grad_norm": 5.79207267534587, "learning_rate": 5.481437251030357e-06, "loss": 17.3041, "step": 26546 }, { "epoch": 0.48525782806587825, "grad_norm": 6.3787164053305245, "learning_rate": 5.4811426140460825e-06, "loss": 17.3586, "step": 26547 }, { "epoch": 0.48527610726232473, "grad_norm": 8.393542094603822, "learning_rate": 5.480847975375444e-06, "loss": 18.3381, "step": 26548 }, { "epoch": 0.48529438645877127, "grad_norm": 6.574024373888554, "learning_rate": 5.480553335019475e-06, "loss": 17.5777, "step": 26549 }, { "epoch": 0.4853126656552178, "grad_norm": 5.98329693173344, "learning_rate": 5.4802586929792086e-06, "loss": 17.3316, "step": 26550 }, { "epoch": 0.48533094485166434, "grad_norm": 6.985402588071238, "learning_rate": 5.479964049255677e-06, "loss": 17.6135, "step": 26551 }, { "epoch": 0.48534922404811087, "grad_norm": 6.4466575683269935, "learning_rate": 5.479669403849913e-06, "loss": 17.6279, "step": 26552 }, { "epoch": 0.48536750324455735, "grad_norm": 5.189915395214789, "learning_rate": 5.4793747567629504e-06, "loss": 17.1822, "step": 26553 }, { "epoch": 0.4853857824410039, "grad_norm": 6.067557612326647, "learning_rate": 5.47908010799582e-06, "loss": 17.5608, "step": 26554 }, { "epoch": 0.4854040616374504, "grad_norm": 7.594750565730956, "learning_rate": 5.478785457549555e-06, "loss": 17.7572, "step": 26555 }, { "epoch": 0.48542234083389696, "grad_norm": 6.435276649579518, "learning_rate": 5.478490805425191e-06, "loss": 17.6635, "step": 26556 }, { "epoch": 0.48544062003034344, "grad_norm": 7.11158491200244, "learning_rate": 5.4781961516237555e-06, "loss": 17.7047, "step": 26557 }, { "epoch": 0.48545889922679, "grad_norm": 4.733950175664281, "learning_rate": 5.477901496146285e-06, "loss": 16.7589, "step": 26558 }, { "epoch": 0.4854771784232365, "grad_norm": 7.0080832196589276, "learning_rate": 5.47760683899381e-06, "loss": 17.9338, "step": 26559 }, { "epoch": 0.48549545761968305, "grad_norm": 5.0607686591002965, "learning_rate": 5.477312180167366e-06, "loss": 16.995, "step": 26560 }, { "epoch": 0.4855137368161296, "grad_norm": 6.137308209246509, "learning_rate": 5.4770175196679845e-06, "loss": 17.2181, "step": 26561 }, { "epoch": 0.48553201601257606, "grad_norm": 6.7271861283140755, "learning_rate": 5.4767228574966965e-06, "loss": 17.5663, "step": 26562 }, { "epoch": 0.4855502952090226, "grad_norm": 5.148819457273331, "learning_rate": 5.476428193654537e-06, "loss": 16.822, "step": 26563 }, { "epoch": 0.48556857440546913, "grad_norm": 7.003527994582009, "learning_rate": 5.4761335281425375e-06, "loss": 17.6496, "step": 26564 }, { "epoch": 0.48558685360191567, "grad_norm": 8.40255245863197, "learning_rate": 5.475838860961732e-06, "loss": 17.9476, "step": 26565 }, { "epoch": 0.4856051327983622, "grad_norm": 6.653719698463278, "learning_rate": 5.475544192113152e-06, "loss": 17.3842, "step": 26566 }, { "epoch": 0.4856234119948087, "grad_norm": 5.443807941247819, "learning_rate": 5.475249521597833e-06, "loss": 16.907, "step": 26567 }, { "epoch": 0.4856416911912552, "grad_norm": 5.86720973065785, "learning_rate": 5.474954849416802e-06, "loss": 17.2419, "step": 26568 }, { "epoch": 0.48565997038770176, "grad_norm": 6.2895047620896065, "learning_rate": 5.474660175571098e-06, "loss": 17.3104, "step": 26569 }, { "epoch": 0.4856782495841483, "grad_norm": 7.6818301270678155, "learning_rate": 5.4743655000617515e-06, "loss": 18.2199, "step": 26570 }, { "epoch": 0.4856965287805948, "grad_norm": 7.084022234762375, "learning_rate": 5.474070822889795e-06, "loss": 18.0019, "step": 26571 }, { "epoch": 0.4857148079770413, "grad_norm": 6.868505220168283, "learning_rate": 5.47377614405626e-06, "loss": 17.9504, "step": 26572 }, { "epoch": 0.48573308717348784, "grad_norm": 6.6229483247928735, "learning_rate": 5.47348146356218e-06, "loss": 17.7776, "step": 26573 }, { "epoch": 0.4857513663699344, "grad_norm": 6.440645746533691, "learning_rate": 5.473186781408591e-06, "loss": 17.5428, "step": 26574 }, { "epoch": 0.4857696455663809, "grad_norm": 7.515916814424151, "learning_rate": 5.4728920975965214e-06, "loss": 17.715, "step": 26575 }, { "epoch": 0.48578792476282745, "grad_norm": 6.886688046334952, "learning_rate": 5.472597412127008e-06, "loss": 17.5188, "step": 26576 }, { "epoch": 0.48580620395927393, "grad_norm": 6.25423326557669, "learning_rate": 5.47230272500108e-06, "loss": 17.1894, "step": 26577 }, { "epoch": 0.48582448315572047, "grad_norm": 5.149576858402395, "learning_rate": 5.472008036219772e-06, "loss": 17.0235, "step": 26578 }, { "epoch": 0.485842762352167, "grad_norm": 5.221973513778705, "learning_rate": 5.471713345784118e-06, "loss": 16.9643, "step": 26579 }, { "epoch": 0.48586104154861354, "grad_norm": 5.60671239177601, "learning_rate": 5.471418653695149e-06, "loss": 17.1331, "step": 26580 }, { "epoch": 0.4858793207450601, "grad_norm": 8.106985399703962, "learning_rate": 5.471123959953898e-06, "loss": 17.6359, "step": 26581 }, { "epoch": 0.48589759994150655, "grad_norm": 7.437369590931693, "learning_rate": 5.4708292645613995e-06, "loss": 17.9801, "step": 26582 }, { "epoch": 0.4859158791379531, "grad_norm": 5.063156408596951, "learning_rate": 5.470534567518684e-06, "loss": 16.9018, "step": 26583 }, { "epoch": 0.4859341583343996, "grad_norm": 9.493058215283343, "learning_rate": 5.470239868826788e-06, "loss": 17.5201, "step": 26584 }, { "epoch": 0.48595243753084616, "grad_norm": 6.557149756489445, "learning_rate": 5.46994516848674e-06, "loss": 17.3902, "step": 26585 }, { "epoch": 0.4859707167272927, "grad_norm": 7.111068340274383, "learning_rate": 5.469650466499574e-06, "loss": 17.8025, "step": 26586 }, { "epoch": 0.4859889959237392, "grad_norm": 7.015342299448457, "learning_rate": 5.469355762866327e-06, "loss": 17.7311, "step": 26587 }, { "epoch": 0.4860072751201857, "grad_norm": 5.7164801045758695, "learning_rate": 5.469061057588027e-06, "loss": 17.3174, "step": 26588 }, { "epoch": 0.48602555431663225, "grad_norm": 5.642670381011369, "learning_rate": 5.468766350665709e-06, "loss": 17.0686, "step": 26589 }, { "epoch": 0.4860438335130788, "grad_norm": 8.514006608723344, "learning_rate": 5.468471642100404e-06, "loss": 17.354, "step": 26590 }, { "epoch": 0.48606211270952526, "grad_norm": 6.285525586919754, "learning_rate": 5.468176931893149e-06, "loss": 17.5392, "step": 26591 }, { "epoch": 0.4860803919059718, "grad_norm": 4.87026401636559, "learning_rate": 5.467882220044973e-06, "loss": 17.0176, "step": 26592 }, { "epoch": 0.48609867110241833, "grad_norm": 6.604639662487142, "learning_rate": 5.467587506556911e-06, "loss": 17.5402, "step": 26593 }, { "epoch": 0.48611695029886487, "grad_norm": 7.691647893381333, "learning_rate": 5.467292791429997e-06, "loss": 17.6207, "step": 26594 }, { "epoch": 0.4861352294953114, "grad_norm": 5.898843312344089, "learning_rate": 5.466998074665259e-06, "loss": 17.2536, "step": 26595 }, { "epoch": 0.4861535086917579, "grad_norm": 7.573039342632927, "learning_rate": 5.466703356263734e-06, "loss": 17.5114, "step": 26596 }, { "epoch": 0.4861717878882044, "grad_norm": 5.101249641526701, "learning_rate": 5.466408636226455e-06, "loss": 16.9151, "step": 26597 }, { "epoch": 0.48619006708465096, "grad_norm": 5.8520661688430575, "learning_rate": 5.466113914554455e-06, "loss": 17.3912, "step": 26598 }, { "epoch": 0.4862083462810975, "grad_norm": 6.020245283050912, "learning_rate": 5.465819191248766e-06, "loss": 17.3466, "step": 26599 }, { "epoch": 0.486226625477544, "grad_norm": 5.594927094598268, "learning_rate": 5.465524466310419e-06, "loss": 17.1538, "step": 26600 }, { "epoch": 0.4862449046739905, "grad_norm": 7.47649543567741, "learning_rate": 5.465229739740452e-06, "loss": 18.611, "step": 26601 }, { "epoch": 0.48626318387043704, "grad_norm": 5.2141971011979855, "learning_rate": 5.464935011539894e-06, "loss": 17.0901, "step": 26602 }, { "epoch": 0.4862814630668836, "grad_norm": 6.845905298465474, "learning_rate": 5.464640281709779e-06, "loss": 17.6799, "step": 26603 }, { "epoch": 0.4862997422633301, "grad_norm": 6.394076981508338, "learning_rate": 5.46434555025114e-06, "loss": 17.6783, "step": 26604 }, { "epoch": 0.48631802145977665, "grad_norm": 5.506820170598393, "learning_rate": 5.464050817165009e-06, "loss": 16.9771, "step": 26605 }, { "epoch": 0.48633630065622313, "grad_norm": 7.344541839171006, "learning_rate": 5.463756082452423e-06, "loss": 17.163, "step": 26606 }, { "epoch": 0.48635457985266967, "grad_norm": 6.527649923861331, "learning_rate": 5.463461346114409e-06, "loss": 17.3594, "step": 26607 }, { "epoch": 0.4863728590491162, "grad_norm": 5.226281276789873, "learning_rate": 5.463166608152005e-06, "loss": 17.0518, "step": 26608 }, { "epoch": 0.48639113824556274, "grad_norm": 5.9010886221186505, "learning_rate": 5.462871868566242e-06, "loss": 17.4303, "step": 26609 }, { "epoch": 0.4864094174420093, "grad_norm": 7.05978247832892, "learning_rate": 5.462577127358154e-06, "loss": 17.7988, "step": 26610 }, { "epoch": 0.48642769663845575, "grad_norm": 7.197769770156895, "learning_rate": 5.462282384528773e-06, "loss": 18.1668, "step": 26611 }, { "epoch": 0.4864459758349023, "grad_norm": 7.142218245417265, "learning_rate": 5.461987640079132e-06, "loss": 17.8786, "step": 26612 }, { "epoch": 0.4864642550313488, "grad_norm": 6.449349589955589, "learning_rate": 5.461692894010263e-06, "loss": 17.7673, "step": 26613 }, { "epoch": 0.48648253422779536, "grad_norm": 6.426401988205395, "learning_rate": 5.461398146323202e-06, "loss": 17.8083, "step": 26614 }, { "epoch": 0.4865008134242419, "grad_norm": 7.282501321645925, "learning_rate": 5.461103397018981e-06, "loss": 17.8547, "step": 26615 }, { "epoch": 0.4865190926206884, "grad_norm": 6.3285525513507555, "learning_rate": 5.4608086460986324e-06, "loss": 17.5695, "step": 26616 }, { "epoch": 0.4865373718171349, "grad_norm": 6.616748535093236, "learning_rate": 5.460513893563189e-06, "loss": 17.4545, "step": 26617 }, { "epoch": 0.48655565101358145, "grad_norm": 6.015220408534, "learning_rate": 5.460219139413684e-06, "loss": 17.2147, "step": 26618 }, { "epoch": 0.486573930210028, "grad_norm": 6.001736263614366, "learning_rate": 5.4599243836511516e-06, "loss": 17.3096, "step": 26619 }, { "epoch": 0.4865922094064745, "grad_norm": 7.029744272317311, "learning_rate": 5.459629626276624e-06, "loss": 17.556, "step": 26620 }, { "epoch": 0.486610488602921, "grad_norm": 6.365931427236614, "learning_rate": 5.459334867291134e-06, "loss": 17.6441, "step": 26621 }, { "epoch": 0.48662876779936753, "grad_norm": 7.774243933733844, "learning_rate": 5.459040106695716e-06, "loss": 18.2137, "step": 26622 }, { "epoch": 0.48664704699581407, "grad_norm": 5.9603038412986376, "learning_rate": 5.4587453444914e-06, "loss": 17.4149, "step": 26623 }, { "epoch": 0.4866653261922606, "grad_norm": 5.659576186668394, "learning_rate": 5.458450580679223e-06, "loss": 17.3379, "step": 26624 }, { "epoch": 0.4866836053887071, "grad_norm": 6.196018730642399, "learning_rate": 5.458155815260219e-06, "loss": 17.4753, "step": 26625 }, { "epoch": 0.4867018845851536, "grad_norm": 7.024416450972192, "learning_rate": 5.457861048235416e-06, "loss": 17.7739, "step": 26626 }, { "epoch": 0.48672016378160016, "grad_norm": 5.400957036917106, "learning_rate": 5.457566279605848e-06, "loss": 17.0448, "step": 26627 }, { "epoch": 0.4867384429780467, "grad_norm": 5.8155318177455575, "learning_rate": 5.4572715093725515e-06, "loss": 17.4156, "step": 26628 }, { "epoch": 0.48675672217449323, "grad_norm": 5.562979741031682, "learning_rate": 5.45697673753656e-06, "loss": 17.1216, "step": 26629 }, { "epoch": 0.4867750013709397, "grad_norm": 7.12242779394732, "learning_rate": 5.456681964098902e-06, "loss": 17.4697, "step": 26630 }, { "epoch": 0.48679328056738624, "grad_norm": 5.889235737164289, "learning_rate": 5.4563871890606145e-06, "loss": 17.4843, "step": 26631 }, { "epoch": 0.4868115597638328, "grad_norm": 6.642818958598797, "learning_rate": 5.45609241242273e-06, "loss": 17.4714, "step": 26632 }, { "epoch": 0.4868298389602793, "grad_norm": 5.585845806917995, "learning_rate": 5.45579763418628e-06, "loss": 17.255, "step": 26633 }, { "epoch": 0.48684811815672585, "grad_norm": 5.393327362257041, "learning_rate": 5.455502854352299e-06, "loss": 17.2974, "step": 26634 }, { "epoch": 0.48686639735317233, "grad_norm": 7.314095707053459, "learning_rate": 5.455208072921821e-06, "loss": 17.8221, "step": 26635 }, { "epoch": 0.48688467654961887, "grad_norm": 7.0350601960001535, "learning_rate": 5.454913289895878e-06, "loss": 17.6477, "step": 26636 }, { "epoch": 0.4869029557460654, "grad_norm": 6.931914639525556, "learning_rate": 5.454618505275503e-06, "loss": 17.7597, "step": 26637 }, { "epoch": 0.48692123494251194, "grad_norm": 7.275927073570928, "learning_rate": 5.454323719061729e-06, "loss": 17.9571, "step": 26638 }, { "epoch": 0.4869395141389585, "grad_norm": 5.102907014372395, "learning_rate": 5.454028931255592e-06, "loss": 16.9075, "step": 26639 }, { "epoch": 0.48695779333540495, "grad_norm": 5.730347467741487, "learning_rate": 5.45373414185812e-06, "loss": 17.3043, "step": 26640 }, { "epoch": 0.4869760725318515, "grad_norm": 7.3836310643565914, "learning_rate": 5.453439350870351e-06, "loss": 17.4446, "step": 26641 }, { "epoch": 0.486994351728298, "grad_norm": 5.020498286082509, "learning_rate": 5.453144558293315e-06, "loss": 16.9975, "step": 26642 }, { "epoch": 0.48701263092474456, "grad_norm": 8.013904751478776, "learning_rate": 5.452849764128049e-06, "loss": 18.1751, "step": 26643 }, { "epoch": 0.4870309101211911, "grad_norm": 7.2824823067803175, "learning_rate": 5.452554968375583e-06, "loss": 17.982, "step": 26644 }, { "epoch": 0.4870491893176376, "grad_norm": 6.719829505022645, "learning_rate": 5.452260171036949e-06, "loss": 17.6701, "step": 26645 }, { "epoch": 0.4870674685140841, "grad_norm": 5.584415576101518, "learning_rate": 5.451965372113185e-06, "loss": 17.0621, "step": 26646 }, { "epoch": 0.48708574771053065, "grad_norm": 6.707432748749226, "learning_rate": 5.451670571605321e-06, "loss": 17.4253, "step": 26647 }, { "epoch": 0.4871040269069772, "grad_norm": 7.062764180772722, "learning_rate": 5.45137576951439e-06, "loss": 17.8118, "step": 26648 }, { "epoch": 0.4871223061034237, "grad_norm": 9.202990816780765, "learning_rate": 5.451080965841427e-06, "loss": 17.9016, "step": 26649 }, { "epoch": 0.4871405852998702, "grad_norm": 6.32233053160526, "learning_rate": 5.450786160587463e-06, "loss": 17.4882, "step": 26650 }, { "epoch": 0.48715886449631673, "grad_norm": 6.069334385436446, "learning_rate": 5.450491353753533e-06, "loss": 17.2714, "step": 26651 }, { "epoch": 0.48717714369276327, "grad_norm": 6.147846392541621, "learning_rate": 5.450196545340671e-06, "loss": 17.4155, "step": 26652 }, { "epoch": 0.4871954228892098, "grad_norm": 6.07501178327118, "learning_rate": 5.4499017353499095e-06, "loss": 17.3031, "step": 26653 }, { "epoch": 0.48721370208565634, "grad_norm": 4.661875821745856, "learning_rate": 5.449606923782279e-06, "loss": 16.7665, "step": 26654 }, { "epoch": 0.4872319812821028, "grad_norm": 6.967972881934136, "learning_rate": 5.449312110638817e-06, "loss": 17.7271, "step": 26655 }, { "epoch": 0.48725026047854936, "grad_norm": 8.872333170762872, "learning_rate": 5.449017295920556e-06, "loss": 17.9727, "step": 26656 }, { "epoch": 0.4872685396749959, "grad_norm": 6.082674160655565, "learning_rate": 5.448722479628528e-06, "loss": 17.541, "step": 26657 }, { "epoch": 0.48728681887144243, "grad_norm": 5.8644745001181775, "learning_rate": 5.448427661763766e-06, "loss": 17.274, "step": 26658 }, { "epoch": 0.4873050980678889, "grad_norm": 8.562532643994132, "learning_rate": 5.448132842327302e-06, "loss": 18.3781, "step": 26659 }, { "epoch": 0.48732337726433544, "grad_norm": 7.318551635687446, "learning_rate": 5.447838021320173e-06, "loss": 17.994, "step": 26660 }, { "epoch": 0.487341656460782, "grad_norm": 7.048067486365951, "learning_rate": 5.447543198743411e-06, "loss": 17.6905, "step": 26661 }, { "epoch": 0.4873599356572285, "grad_norm": 7.088445178297393, "learning_rate": 5.447248374598049e-06, "loss": 17.5954, "step": 26662 }, { "epoch": 0.48737821485367505, "grad_norm": 6.056738429756884, "learning_rate": 5.44695354888512e-06, "loss": 17.2938, "step": 26663 }, { "epoch": 0.48739649405012153, "grad_norm": 5.597497715489482, "learning_rate": 5.446658721605657e-06, "loss": 17.4139, "step": 26664 }, { "epoch": 0.48741477324656807, "grad_norm": 7.8833620968787725, "learning_rate": 5.446363892760694e-06, "loss": 18.2431, "step": 26665 }, { "epoch": 0.4874330524430146, "grad_norm": 5.829542433580899, "learning_rate": 5.446069062351265e-06, "loss": 17.4375, "step": 26666 }, { "epoch": 0.48745133163946114, "grad_norm": 7.326167912127498, "learning_rate": 5.445774230378402e-06, "loss": 18.1475, "step": 26667 }, { "epoch": 0.4874696108359077, "grad_norm": 8.159616676986518, "learning_rate": 5.44547939684314e-06, "loss": 17.8864, "step": 26668 }, { "epoch": 0.48748789003235415, "grad_norm": 6.733878280865458, "learning_rate": 5.445184561746511e-06, "loss": 17.5365, "step": 26669 }, { "epoch": 0.4875061692288007, "grad_norm": 6.150416170308892, "learning_rate": 5.444889725089548e-06, "loss": 17.3496, "step": 26670 }, { "epoch": 0.4875244484252472, "grad_norm": 5.852591058993894, "learning_rate": 5.444594886873286e-06, "loss": 17.3277, "step": 26671 }, { "epoch": 0.48754272762169376, "grad_norm": 7.050355774784577, "learning_rate": 5.444300047098756e-06, "loss": 17.8252, "step": 26672 }, { "epoch": 0.4875610068181403, "grad_norm": 5.101136321964566, "learning_rate": 5.4440052057669934e-06, "loss": 16.9262, "step": 26673 }, { "epoch": 0.4875792860145868, "grad_norm": 6.171134819997972, "learning_rate": 5.443710362879033e-06, "loss": 17.3672, "step": 26674 }, { "epoch": 0.4875975652110333, "grad_norm": 8.424747184849217, "learning_rate": 5.443415518435905e-06, "loss": 18.4778, "step": 26675 }, { "epoch": 0.48761584440747985, "grad_norm": 6.663080208183747, "learning_rate": 5.443120672438643e-06, "loss": 17.5707, "step": 26676 }, { "epoch": 0.4876341236039264, "grad_norm": 5.795464634344631, "learning_rate": 5.442825824888283e-06, "loss": 17.2108, "step": 26677 }, { "epoch": 0.4876524028003729, "grad_norm": 5.469798270017111, "learning_rate": 5.442530975785855e-06, "loss": 17.1255, "step": 26678 }, { "epoch": 0.4876706819968194, "grad_norm": 7.341864107316959, "learning_rate": 5.442236125132397e-06, "loss": 17.7743, "step": 26679 }, { "epoch": 0.48768896119326594, "grad_norm": 6.446259241788843, "learning_rate": 5.441941272928939e-06, "loss": 17.5795, "step": 26680 }, { "epoch": 0.48770724038971247, "grad_norm": 6.32346501154039, "learning_rate": 5.441646419176514e-06, "loss": 17.399, "step": 26681 }, { "epoch": 0.487725519586159, "grad_norm": 7.253957061840832, "learning_rate": 5.441351563876157e-06, "loss": 17.7322, "step": 26682 }, { "epoch": 0.48774379878260554, "grad_norm": 8.251096577384583, "learning_rate": 5.441056707028901e-06, "loss": 17.5844, "step": 26683 }, { "epoch": 0.487762077979052, "grad_norm": 8.309287226384034, "learning_rate": 5.440761848635781e-06, "loss": 18.3265, "step": 26684 }, { "epoch": 0.48778035717549856, "grad_norm": 6.478423372830788, "learning_rate": 5.440466988697828e-06, "loss": 17.7607, "step": 26685 }, { "epoch": 0.4877986363719451, "grad_norm": 6.46679214652241, "learning_rate": 5.440172127216076e-06, "loss": 17.3941, "step": 26686 }, { "epoch": 0.48781691556839163, "grad_norm": 6.592282836976066, "learning_rate": 5.43987726419156e-06, "loss": 17.6032, "step": 26687 }, { "epoch": 0.48783519476483816, "grad_norm": 6.687011106745191, "learning_rate": 5.439582399625311e-06, "loss": 17.6042, "step": 26688 }, { "epoch": 0.48785347396128464, "grad_norm": 6.168841621335481, "learning_rate": 5.439287533518364e-06, "loss": 17.363, "step": 26689 }, { "epoch": 0.4878717531577312, "grad_norm": 5.826934054760655, "learning_rate": 5.438992665871753e-06, "loss": 17.2163, "step": 26690 }, { "epoch": 0.4878900323541777, "grad_norm": 6.508074954201721, "learning_rate": 5.438697796686511e-06, "loss": 17.7818, "step": 26691 }, { "epoch": 0.48790831155062425, "grad_norm": 5.347730389877579, "learning_rate": 5.438402925963669e-06, "loss": 17.0873, "step": 26692 }, { "epoch": 0.48792659074707073, "grad_norm": 5.735453219002497, "learning_rate": 5.4381080537042655e-06, "loss": 17.0578, "step": 26693 }, { "epoch": 0.48794486994351727, "grad_norm": 5.871442657524678, "learning_rate": 5.437813179909332e-06, "loss": 17.3047, "step": 26694 }, { "epoch": 0.4879631491399638, "grad_norm": 6.4683578382875435, "learning_rate": 5.437518304579898e-06, "loss": 17.4914, "step": 26695 }, { "epoch": 0.48798142833641034, "grad_norm": 5.744793784154497, "learning_rate": 5.437223427717001e-06, "loss": 17.2997, "step": 26696 }, { "epoch": 0.4879997075328569, "grad_norm": 5.867712906536801, "learning_rate": 5.436928549321675e-06, "loss": 16.928, "step": 26697 }, { "epoch": 0.48801798672930335, "grad_norm": 6.391154608621728, "learning_rate": 5.436633669394953e-06, "loss": 17.5694, "step": 26698 }, { "epoch": 0.4880362659257499, "grad_norm": 5.955561226736121, "learning_rate": 5.436338787937867e-06, "loss": 17.1313, "step": 26699 }, { "epoch": 0.4880545451221964, "grad_norm": 5.836660560356295, "learning_rate": 5.43604390495145e-06, "loss": 17.2105, "step": 26700 }, { "epoch": 0.48807282431864296, "grad_norm": 6.2484085719067295, "learning_rate": 5.43574902043674e-06, "loss": 17.5134, "step": 26701 }, { "epoch": 0.4880911035150895, "grad_norm": 7.114590802442527, "learning_rate": 5.435454134394765e-06, "loss": 17.5665, "step": 26702 }, { "epoch": 0.488109382711536, "grad_norm": 7.373815281828023, "learning_rate": 5.4351592468265625e-06, "loss": 17.5261, "step": 26703 }, { "epoch": 0.4881276619079825, "grad_norm": 8.493552962876162, "learning_rate": 5.434864357733164e-06, "loss": 18.0959, "step": 26704 }, { "epoch": 0.48814594110442905, "grad_norm": 6.985858148092461, "learning_rate": 5.434569467115604e-06, "loss": 17.6583, "step": 26705 }, { "epoch": 0.4881642203008756, "grad_norm": 6.05518898261649, "learning_rate": 5.434274574974915e-06, "loss": 17.3173, "step": 26706 }, { "epoch": 0.4881824994973221, "grad_norm": 7.075867296689358, "learning_rate": 5.433979681312131e-06, "loss": 17.7777, "step": 26707 }, { "epoch": 0.4882007786937686, "grad_norm": 7.078355166872703, "learning_rate": 5.433684786128287e-06, "loss": 18.022, "step": 26708 }, { "epoch": 0.48821905789021514, "grad_norm": 6.155883790054182, "learning_rate": 5.433389889424416e-06, "loss": 17.5531, "step": 26709 }, { "epoch": 0.48823733708666167, "grad_norm": 7.2286511400033175, "learning_rate": 5.433094991201549e-06, "loss": 17.9021, "step": 26710 }, { "epoch": 0.4882556162831082, "grad_norm": 4.98670601372043, "learning_rate": 5.432800091460723e-06, "loss": 16.9237, "step": 26711 }, { "epoch": 0.48827389547955474, "grad_norm": 7.6114791374891695, "learning_rate": 5.43250519020297e-06, "loss": 18.0662, "step": 26712 }, { "epoch": 0.4882921746760012, "grad_norm": 7.510468745586594, "learning_rate": 5.432210287429324e-06, "loss": 17.7714, "step": 26713 }, { "epoch": 0.48831045387244776, "grad_norm": 6.251969574222383, "learning_rate": 5.4319153831408176e-06, "loss": 17.5097, "step": 26714 }, { "epoch": 0.4883287330688943, "grad_norm": 7.079435065952679, "learning_rate": 5.4316204773384865e-06, "loss": 17.6973, "step": 26715 }, { "epoch": 0.48834701226534083, "grad_norm": 7.955138314761039, "learning_rate": 5.431325570023362e-06, "loss": 17.8779, "step": 26716 }, { "epoch": 0.48836529146178737, "grad_norm": 6.933085599949644, "learning_rate": 5.431030661196481e-06, "loss": 17.819, "step": 26717 }, { "epoch": 0.48838357065823385, "grad_norm": 6.230553472893436, "learning_rate": 5.430735750858872e-06, "loss": 17.508, "step": 26718 }, { "epoch": 0.4884018498546804, "grad_norm": 5.705069411832746, "learning_rate": 5.430440839011572e-06, "loss": 17.1437, "step": 26719 }, { "epoch": 0.4884201290511269, "grad_norm": 7.904679814923992, "learning_rate": 5.430145925655616e-06, "loss": 18.4188, "step": 26720 }, { "epoch": 0.48843840824757345, "grad_norm": 5.547515061694185, "learning_rate": 5.4298510107920345e-06, "loss": 17.1844, "step": 26721 }, { "epoch": 0.48845668744402, "grad_norm": 5.882950277228873, "learning_rate": 5.429556094421863e-06, "loss": 17.1431, "step": 26722 }, { "epoch": 0.48847496664046647, "grad_norm": 5.959155236936194, "learning_rate": 5.429261176546134e-06, "loss": 17.1748, "step": 26723 }, { "epoch": 0.488493245836913, "grad_norm": 6.085907969524742, "learning_rate": 5.428966257165882e-06, "loss": 17.637, "step": 26724 }, { "epoch": 0.48851152503335954, "grad_norm": 5.442096106554661, "learning_rate": 5.428671336282142e-06, "loss": 17.1573, "step": 26725 }, { "epoch": 0.4885298042298061, "grad_norm": 6.044868232271571, "learning_rate": 5.428376413895945e-06, "loss": 17.719, "step": 26726 }, { "epoch": 0.48854808342625256, "grad_norm": 6.206093193676755, "learning_rate": 5.428081490008325e-06, "loss": 17.208, "step": 26727 }, { "epoch": 0.4885663626226991, "grad_norm": 4.799014605211223, "learning_rate": 5.427786564620318e-06, "loss": 16.8918, "step": 26728 }, { "epoch": 0.4885846418191456, "grad_norm": 6.832359469861618, "learning_rate": 5.4274916377329556e-06, "loss": 17.8295, "step": 26729 }, { "epoch": 0.48860292101559216, "grad_norm": 6.656490306847209, "learning_rate": 5.427196709347272e-06, "loss": 17.855, "step": 26730 }, { "epoch": 0.4886212002120387, "grad_norm": 5.792625739089165, "learning_rate": 5.4269017794643006e-06, "loss": 17.1727, "step": 26731 }, { "epoch": 0.4886394794084852, "grad_norm": 6.971258159023458, "learning_rate": 5.426606848085076e-06, "loss": 17.5696, "step": 26732 }, { "epoch": 0.4886577586049317, "grad_norm": 5.25932462163262, "learning_rate": 5.426311915210633e-06, "loss": 16.901, "step": 26733 }, { "epoch": 0.48867603780137825, "grad_norm": 7.260817596924251, "learning_rate": 5.426016980842002e-06, "loss": 18.1982, "step": 26734 }, { "epoch": 0.4886943169978248, "grad_norm": 8.153119422459952, "learning_rate": 5.425722044980217e-06, "loss": 18.3199, "step": 26735 }, { "epoch": 0.4887125961942713, "grad_norm": 6.048124673936369, "learning_rate": 5.425427107626316e-06, "loss": 17.2398, "step": 26736 }, { "epoch": 0.4887308753907178, "grad_norm": 7.214994033609668, "learning_rate": 5.425132168781328e-06, "loss": 17.6843, "step": 26737 }, { "epoch": 0.48874915458716434, "grad_norm": 6.063501555656492, "learning_rate": 5.424837228446289e-06, "loss": 17.4596, "step": 26738 }, { "epoch": 0.48876743378361087, "grad_norm": 5.529805401716335, "learning_rate": 5.424542286622234e-06, "loss": 17.1915, "step": 26739 }, { "epoch": 0.4887857129800574, "grad_norm": 5.316192857659175, "learning_rate": 5.424247343310193e-06, "loss": 17.0905, "step": 26740 }, { "epoch": 0.48880399217650394, "grad_norm": 7.14134986856988, "learning_rate": 5.423952398511202e-06, "loss": 17.7756, "step": 26741 }, { "epoch": 0.4888222713729504, "grad_norm": 5.6608215942570155, "learning_rate": 5.4236574522262955e-06, "loss": 17.1622, "step": 26742 }, { "epoch": 0.48884055056939696, "grad_norm": 7.754954075097137, "learning_rate": 5.4233625044565075e-06, "loss": 17.5948, "step": 26743 }, { "epoch": 0.4888588297658435, "grad_norm": 6.304006626992727, "learning_rate": 5.4230675552028686e-06, "loss": 17.4356, "step": 26744 }, { "epoch": 0.48887710896229003, "grad_norm": 7.6598036781548435, "learning_rate": 5.4227726044664154e-06, "loss": 18.1548, "step": 26745 }, { "epoch": 0.48889538815873657, "grad_norm": 5.043567632997474, "learning_rate": 5.4224776522481815e-06, "loss": 16.9123, "step": 26746 }, { "epoch": 0.48891366735518305, "grad_norm": 7.4474356120256795, "learning_rate": 5.4221826985491984e-06, "loss": 17.4138, "step": 26747 }, { "epoch": 0.4889319465516296, "grad_norm": 6.580050828900662, "learning_rate": 5.421887743370503e-06, "loss": 17.678, "step": 26748 }, { "epoch": 0.4889502257480761, "grad_norm": 5.914436484098127, "learning_rate": 5.421592786713128e-06, "loss": 17.3221, "step": 26749 }, { "epoch": 0.48896850494452265, "grad_norm": 7.040745101047383, "learning_rate": 5.421297828578106e-06, "loss": 18.0544, "step": 26750 }, { "epoch": 0.4889867841409692, "grad_norm": 5.439251177141843, "learning_rate": 5.4210028689664715e-06, "loss": 17.0645, "step": 26751 }, { "epoch": 0.48900506333741567, "grad_norm": 7.458356758409796, "learning_rate": 5.420707907879259e-06, "loss": 17.536, "step": 26752 }, { "epoch": 0.4890233425338622, "grad_norm": 6.508491627841003, "learning_rate": 5.420412945317502e-06, "loss": 17.4326, "step": 26753 }, { "epoch": 0.48904162173030874, "grad_norm": 6.51150504386778, "learning_rate": 5.420117981282234e-06, "loss": 17.5608, "step": 26754 }, { "epoch": 0.4890599009267553, "grad_norm": 6.116798502026182, "learning_rate": 5.419823015774488e-06, "loss": 17.3379, "step": 26755 }, { "epoch": 0.4890781801232018, "grad_norm": 7.176310260618286, "learning_rate": 5.419528048795301e-06, "loss": 17.9868, "step": 26756 }, { "epoch": 0.4890964593196483, "grad_norm": 6.910287149539001, "learning_rate": 5.419233080345702e-06, "loss": 17.8103, "step": 26757 }, { "epoch": 0.4891147385160948, "grad_norm": 6.065923872504756, "learning_rate": 5.418938110426729e-06, "loss": 17.4823, "step": 26758 }, { "epoch": 0.48913301771254136, "grad_norm": 5.272226781807781, "learning_rate": 5.4186431390394124e-06, "loss": 16.9866, "step": 26759 }, { "epoch": 0.4891512969089879, "grad_norm": 6.766457150853006, "learning_rate": 5.41834816618479e-06, "loss": 17.6785, "step": 26760 }, { "epoch": 0.4891695761054344, "grad_norm": 5.990333341937009, "learning_rate": 5.418053191863893e-06, "loss": 17.3857, "step": 26761 }, { "epoch": 0.4891878553018809, "grad_norm": 4.845768868473213, "learning_rate": 5.417758216077756e-06, "loss": 16.8998, "step": 26762 }, { "epoch": 0.48920613449832745, "grad_norm": 6.316455854819885, "learning_rate": 5.417463238827413e-06, "loss": 17.5497, "step": 26763 }, { "epoch": 0.489224413694774, "grad_norm": 5.425386887917177, "learning_rate": 5.417168260113896e-06, "loss": 16.943, "step": 26764 }, { "epoch": 0.4892426928912205, "grad_norm": 6.336866161571187, "learning_rate": 5.416873279938241e-06, "loss": 17.459, "step": 26765 }, { "epoch": 0.489260972087667, "grad_norm": 7.594505821619773, "learning_rate": 5.4165782983014825e-06, "loss": 17.9707, "step": 26766 }, { "epoch": 0.48927925128411354, "grad_norm": 6.511043350662532, "learning_rate": 5.416283315204652e-06, "loss": 17.4195, "step": 26767 }, { "epoch": 0.4892975304805601, "grad_norm": 6.222490785112991, "learning_rate": 5.415988330648785e-06, "loss": 17.5351, "step": 26768 }, { "epoch": 0.4893158096770066, "grad_norm": 6.5235654266904755, "learning_rate": 5.415693344634916e-06, "loss": 17.4953, "step": 26769 }, { "epoch": 0.48933408887345314, "grad_norm": 7.537106258040323, "learning_rate": 5.415398357164078e-06, "loss": 17.7087, "step": 26770 }, { "epoch": 0.4893523680698996, "grad_norm": 6.006885768011372, "learning_rate": 5.4151033682373035e-06, "loss": 17.4608, "step": 26771 }, { "epoch": 0.48937064726634616, "grad_norm": 5.3133142659177635, "learning_rate": 5.414808377855626e-06, "loss": 17.1791, "step": 26772 }, { "epoch": 0.4893889264627927, "grad_norm": 6.612993465288934, "learning_rate": 5.414513386020084e-06, "loss": 18.1677, "step": 26773 }, { "epoch": 0.48940720565923923, "grad_norm": 6.834958480775478, "learning_rate": 5.414218392731708e-06, "loss": 17.8332, "step": 26774 }, { "epoch": 0.48942548485568577, "grad_norm": 6.685691737836291, "learning_rate": 5.413923397991532e-06, "loss": 17.0545, "step": 26775 }, { "epoch": 0.48944376405213225, "grad_norm": 6.097640583030748, "learning_rate": 5.413628401800591e-06, "loss": 17.3799, "step": 26776 }, { "epoch": 0.4894620432485788, "grad_norm": 6.762595586009303, "learning_rate": 5.413333404159917e-06, "loss": 17.6671, "step": 26777 }, { "epoch": 0.4894803224450253, "grad_norm": 5.749003583807096, "learning_rate": 5.413038405070547e-06, "loss": 17.1119, "step": 26778 }, { "epoch": 0.48949860164147185, "grad_norm": 5.646286438711893, "learning_rate": 5.412743404533512e-06, "loss": 17.493, "step": 26779 }, { "epoch": 0.4895168808379184, "grad_norm": 7.391000562632388, "learning_rate": 5.412448402549848e-06, "loss": 17.8051, "step": 26780 }, { "epoch": 0.48953516003436487, "grad_norm": 5.408967309528288, "learning_rate": 5.4121533991205875e-06, "loss": 17.2397, "step": 26781 }, { "epoch": 0.4895534392308114, "grad_norm": 6.601831321266045, "learning_rate": 5.411858394246765e-06, "loss": 17.4203, "step": 26782 }, { "epoch": 0.48957171842725794, "grad_norm": 5.480369766172453, "learning_rate": 5.411563387929415e-06, "loss": 17.2981, "step": 26783 }, { "epoch": 0.4895899976237045, "grad_norm": 6.8605239151113695, "learning_rate": 5.411268380169572e-06, "loss": 17.771, "step": 26784 }, { "epoch": 0.489608276820151, "grad_norm": 6.873494468844328, "learning_rate": 5.410973370968268e-06, "loss": 17.6408, "step": 26785 }, { "epoch": 0.4896265560165975, "grad_norm": 5.889293413290619, "learning_rate": 5.410678360326537e-06, "loss": 17.2507, "step": 26786 }, { "epoch": 0.489644835213044, "grad_norm": 5.892544846947069, "learning_rate": 5.410383348245416e-06, "loss": 17.0728, "step": 26787 }, { "epoch": 0.48966311440949056, "grad_norm": 7.835531562358622, "learning_rate": 5.4100883347259355e-06, "loss": 18.115, "step": 26788 }, { "epoch": 0.4896813936059371, "grad_norm": 6.599834990120285, "learning_rate": 5.4097933197691325e-06, "loss": 17.5977, "step": 26789 }, { "epoch": 0.48969967280238363, "grad_norm": 5.751483476441311, "learning_rate": 5.409498303376038e-06, "loss": 17.186, "step": 26790 }, { "epoch": 0.4897179519988301, "grad_norm": 5.943528801227008, "learning_rate": 5.409203285547687e-06, "loss": 17.4003, "step": 26791 }, { "epoch": 0.48973623119527665, "grad_norm": 6.631362999406427, "learning_rate": 5.408908266285116e-06, "loss": 17.3434, "step": 26792 }, { "epoch": 0.4897545103917232, "grad_norm": 8.261633001616998, "learning_rate": 5.408613245589354e-06, "loss": 18.0013, "step": 26793 }, { "epoch": 0.4897727895881697, "grad_norm": 9.316514163925088, "learning_rate": 5.408318223461441e-06, "loss": 18.3173, "step": 26794 }, { "epoch": 0.4897910687846162, "grad_norm": 5.632237732045882, "learning_rate": 5.408023199902407e-06, "loss": 17.2808, "step": 26795 }, { "epoch": 0.48980934798106274, "grad_norm": 7.090079701011441, "learning_rate": 5.407728174913287e-06, "loss": 17.7669, "step": 26796 }, { "epoch": 0.4898276271775093, "grad_norm": 7.082274922460304, "learning_rate": 5.407433148495115e-06, "loss": 17.6108, "step": 26797 }, { "epoch": 0.4898459063739558, "grad_norm": 5.921376319212949, "learning_rate": 5.407138120648926e-06, "loss": 17.4497, "step": 26798 }, { "epoch": 0.48986418557040234, "grad_norm": 5.088669060802675, "learning_rate": 5.406843091375752e-06, "loss": 17.1594, "step": 26799 }, { "epoch": 0.4898824647668488, "grad_norm": 9.112998331090383, "learning_rate": 5.406548060676629e-06, "loss": 18.656, "step": 26800 }, { "epoch": 0.48990074396329536, "grad_norm": 6.64795311345859, "learning_rate": 5.40625302855259e-06, "loss": 17.4863, "step": 26801 }, { "epoch": 0.4899190231597419, "grad_norm": 6.893498065757439, "learning_rate": 5.40595799500467e-06, "loss": 17.5541, "step": 26802 }, { "epoch": 0.48993730235618843, "grad_norm": 8.16370549613025, "learning_rate": 5.405662960033902e-06, "loss": 18.6281, "step": 26803 }, { "epoch": 0.48995558155263497, "grad_norm": 6.470659436684596, "learning_rate": 5.405367923641319e-06, "loss": 17.524, "step": 26804 }, { "epoch": 0.48997386074908145, "grad_norm": 6.170261979075298, "learning_rate": 5.40507288582796e-06, "loss": 17.555, "step": 26805 }, { "epoch": 0.489992139945528, "grad_norm": 4.9576388587498315, "learning_rate": 5.404777846594853e-06, "loss": 16.8701, "step": 26806 }, { "epoch": 0.4900104191419745, "grad_norm": 6.438231315488346, "learning_rate": 5.4044828059430355e-06, "loss": 17.6076, "step": 26807 }, { "epoch": 0.49002869833842105, "grad_norm": 7.5781883995260255, "learning_rate": 5.4041877638735405e-06, "loss": 17.563, "step": 26808 }, { "epoch": 0.4900469775348676, "grad_norm": 6.578265249848708, "learning_rate": 5.403892720387404e-06, "loss": 17.3995, "step": 26809 }, { "epoch": 0.49006525673131407, "grad_norm": 5.285911452760043, "learning_rate": 5.403597675485657e-06, "loss": 17.0243, "step": 26810 }, { "epoch": 0.4900835359277606, "grad_norm": 5.795312727202839, "learning_rate": 5.403302629169336e-06, "loss": 17.2022, "step": 26811 }, { "epoch": 0.49010181512420714, "grad_norm": 9.374189870011783, "learning_rate": 5.403007581439475e-06, "loss": 17.405, "step": 26812 }, { "epoch": 0.4901200943206537, "grad_norm": 7.492876513342417, "learning_rate": 5.4027125322971045e-06, "loss": 18.029, "step": 26813 }, { "epoch": 0.4901383735171002, "grad_norm": 5.4262779633071165, "learning_rate": 5.4024174817432624e-06, "loss": 17.2035, "step": 26814 }, { "epoch": 0.4901566527135467, "grad_norm": 6.46063539353455, "learning_rate": 5.402122429778985e-06, "loss": 17.7318, "step": 26815 }, { "epoch": 0.49017493190999323, "grad_norm": 6.131022203123187, "learning_rate": 5.401827376405301e-06, "loss": 17.5596, "step": 26816 }, { "epoch": 0.49019321110643976, "grad_norm": 6.43573483444271, "learning_rate": 5.401532321623247e-06, "loss": 17.4997, "step": 26817 }, { "epoch": 0.4902114903028863, "grad_norm": 6.267315419116112, "learning_rate": 5.401237265433857e-06, "loss": 17.2701, "step": 26818 }, { "epoch": 0.49022976949933283, "grad_norm": 8.349651440043182, "learning_rate": 5.400942207838166e-06, "loss": 18.0184, "step": 26819 }, { "epoch": 0.4902480486957793, "grad_norm": 5.849351097855939, "learning_rate": 5.400647148837207e-06, "loss": 17.2519, "step": 26820 }, { "epoch": 0.49026632789222585, "grad_norm": 5.557082938829403, "learning_rate": 5.400352088432014e-06, "loss": 17.2449, "step": 26821 }, { "epoch": 0.4902846070886724, "grad_norm": 7.659280914258292, "learning_rate": 5.400057026623622e-06, "loss": 17.6655, "step": 26822 }, { "epoch": 0.4903028862851189, "grad_norm": 6.293562110702888, "learning_rate": 5.399761963413065e-06, "loss": 17.3693, "step": 26823 }, { "epoch": 0.49032116548156546, "grad_norm": 5.320283229261041, "learning_rate": 5.399466898801377e-06, "loss": 17.1344, "step": 26824 }, { "epoch": 0.49033944467801194, "grad_norm": 5.3788484838259, "learning_rate": 5.3991718327895925e-06, "loss": 17.0159, "step": 26825 }, { "epoch": 0.4903577238744585, "grad_norm": 7.665377172028155, "learning_rate": 5.398876765378744e-06, "loss": 18.0574, "step": 26826 }, { "epoch": 0.490376003070905, "grad_norm": 6.049932660681751, "learning_rate": 5.398581696569868e-06, "loss": 17.2714, "step": 26827 }, { "epoch": 0.49039428226735154, "grad_norm": 6.2361116173369044, "learning_rate": 5.398286626363996e-06, "loss": 17.4638, "step": 26828 }, { "epoch": 0.490412561463798, "grad_norm": 6.112522394228724, "learning_rate": 5.397991554762167e-06, "loss": 17.3001, "step": 26829 }, { "epoch": 0.49043084066024456, "grad_norm": 5.640500088782503, "learning_rate": 5.3976964817654106e-06, "loss": 17.0323, "step": 26830 }, { "epoch": 0.4904491198566911, "grad_norm": 7.223058265261146, "learning_rate": 5.397401407374763e-06, "loss": 17.7531, "step": 26831 }, { "epoch": 0.49046739905313763, "grad_norm": 10.137905839670566, "learning_rate": 5.3971063315912575e-06, "loss": 17.808, "step": 26832 }, { "epoch": 0.49048567824958417, "grad_norm": 5.570050306643148, "learning_rate": 5.396811254415929e-06, "loss": 17.0955, "step": 26833 }, { "epoch": 0.49050395744603065, "grad_norm": 6.471424733456603, "learning_rate": 5.396516175849812e-06, "loss": 17.4727, "step": 26834 }, { "epoch": 0.4905222366424772, "grad_norm": 7.28869555481653, "learning_rate": 5.396221095893938e-06, "loss": 18.055, "step": 26835 }, { "epoch": 0.4905405158389237, "grad_norm": 6.113599979603523, "learning_rate": 5.395926014549347e-06, "loss": 17.2481, "step": 26836 }, { "epoch": 0.49055879503537025, "grad_norm": 6.477612647925058, "learning_rate": 5.395630931817066e-06, "loss": 17.579, "step": 26837 }, { "epoch": 0.4905770742318168, "grad_norm": 5.570082626655907, "learning_rate": 5.3953358476981355e-06, "loss": 17.0483, "step": 26838 }, { "epoch": 0.49059535342826327, "grad_norm": 6.5939035359793685, "learning_rate": 5.395040762193587e-06, "loss": 17.5565, "step": 26839 }, { "epoch": 0.4906136326247098, "grad_norm": 5.889521432332619, "learning_rate": 5.394745675304453e-06, "loss": 17.3782, "step": 26840 }, { "epoch": 0.49063191182115634, "grad_norm": 5.810556861786461, "learning_rate": 5.394450587031771e-06, "loss": 17.1271, "step": 26841 }, { "epoch": 0.4906501910176029, "grad_norm": 7.603444045410603, "learning_rate": 5.3941554973765745e-06, "loss": 18.1476, "step": 26842 }, { "epoch": 0.4906684702140494, "grad_norm": 8.524801879786166, "learning_rate": 5.393860406339897e-06, "loss": 18.5666, "step": 26843 }, { "epoch": 0.4906867494104959, "grad_norm": 6.091587868190396, "learning_rate": 5.3935653139227714e-06, "loss": 17.3453, "step": 26844 }, { "epoch": 0.49070502860694243, "grad_norm": 5.373096735902344, "learning_rate": 5.393270220126235e-06, "loss": 17.1939, "step": 26845 }, { "epoch": 0.49072330780338896, "grad_norm": 6.959521133381509, "learning_rate": 5.39297512495132e-06, "loss": 17.8511, "step": 26846 }, { "epoch": 0.4907415869998355, "grad_norm": 5.851506965114323, "learning_rate": 5.392680028399062e-06, "loss": 17.2155, "step": 26847 }, { "epoch": 0.49075986619628204, "grad_norm": 7.532761371224967, "learning_rate": 5.392384930470493e-06, "loss": 17.6946, "step": 26848 }, { "epoch": 0.4907781453927285, "grad_norm": 6.0838953034197365, "learning_rate": 5.39208983116665e-06, "loss": 17.4837, "step": 26849 }, { "epoch": 0.49079642458917505, "grad_norm": 7.050171855548302, "learning_rate": 5.391794730488567e-06, "loss": 18.0847, "step": 26850 }, { "epoch": 0.4908147037856216, "grad_norm": 5.740280096755192, "learning_rate": 5.3914996284372756e-06, "loss": 17.4159, "step": 26851 }, { "epoch": 0.4908329829820681, "grad_norm": 7.279768457602669, "learning_rate": 5.391204525013814e-06, "loss": 17.7201, "step": 26852 }, { "epoch": 0.49085126217851466, "grad_norm": 6.894222463107142, "learning_rate": 5.390909420219213e-06, "loss": 17.1891, "step": 26853 }, { "epoch": 0.49086954137496114, "grad_norm": 5.797836351655732, "learning_rate": 5.390614314054509e-06, "loss": 17.4896, "step": 26854 }, { "epoch": 0.4908878205714077, "grad_norm": 6.95220083501087, "learning_rate": 5.390319206520734e-06, "loss": 17.8335, "step": 26855 }, { "epoch": 0.4909060997678542, "grad_norm": 6.383801639281826, "learning_rate": 5.3900240976189275e-06, "loss": 17.5907, "step": 26856 }, { "epoch": 0.49092437896430074, "grad_norm": 6.0585700664970075, "learning_rate": 5.389728987350118e-06, "loss": 17.3967, "step": 26857 }, { "epoch": 0.4909426581607473, "grad_norm": 5.4990854133711515, "learning_rate": 5.389433875715343e-06, "loss": 17.2939, "step": 26858 }, { "epoch": 0.49096093735719376, "grad_norm": 6.031399634538206, "learning_rate": 5.389138762715634e-06, "loss": 17.4824, "step": 26859 }, { "epoch": 0.4909792165536403, "grad_norm": 5.960467649815992, "learning_rate": 5.38884364835203e-06, "loss": 17.5735, "step": 26860 }, { "epoch": 0.49099749575008683, "grad_norm": 6.363918885303285, "learning_rate": 5.388548532625562e-06, "loss": 17.4231, "step": 26861 }, { "epoch": 0.49101577494653337, "grad_norm": 6.799854083715129, "learning_rate": 5.388253415537266e-06, "loss": 17.46, "step": 26862 }, { "epoch": 0.49103405414297985, "grad_norm": 5.419690651476267, "learning_rate": 5.387958297088174e-06, "loss": 17.1368, "step": 26863 }, { "epoch": 0.4910523333394264, "grad_norm": 5.6065718145011365, "learning_rate": 5.387663177279322e-06, "loss": 17.1557, "step": 26864 }, { "epoch": 0.4910706125358729, "grad_norm": 7.392491276085907, "learning_rate": 5.3873680561117444e-06, "loss": 18.1115, "step": 26865 }, { "epoch": 0.49108889173231945, "grad_norm": 5.540634482165595, "learning_rate": 5.387072933586476e-06, "loss": 17.0831, "step": 26866 }, { "epoch": 0.491107170928766, "grad_norm": 6.49316476778211, "learning_rate": 5.386777809704549e-06, "loss": 17.6583, "step": 26867 }, { "epoch": 0.49112545012521247, "grad_norm": 5.272607120352341, "learning_rate": 5.386482684467e-06, "loss": 17.3372, "step": 26868 }, { "epoch": 0.491143729321659, "grad_norm": 7.280203647564744, "learning_rate": 5.386187557874864e-06, "loss": 17.0905, "step": 26869 }, { "epoch": 0.49116200851810554, "grad_norm": 6.901638087373839, "learning_rate": 5.385892429929174e-06, "loss": 17.4576, "step": 26870 }, { "epoch": 0.4911802877145521, "grad_norm": 6.4241817384312006, "learning_rate": 5.385597300630964e-06, "loss": 17.5416, "step": 26871 }, { "epoch": 0.4911985669109986, "grad_norm": 7.21240344430058, "learning_rate": 5.385302169981267e-06, "loss": 18.199, "step": 26872 }, { "epoch": 0.4912168461074451, "grad_norm": 6.461777851863109, "learning_rate": 5.385007037981122e-06, "loss": 17.4993, "step": 26873 }, { "epoch": 0.49123512530389163, "grad_norm": 6.32818213034626, "learning_rate": 5.38471190463156e-06, "loss": 17.6206, "step": 26874 }, { "epoch": 0.49125340450033816, "grad_norm": 6.541881916707459, "learning_rate": 5.384416769933616e-06, "loss": 17.6468, "step": 26875 }, { "epoch": 0.4912716836967847, "grad_norm": 5.788408433951197, "learning_rate": 5.384121633888324e-06, "loss": 17.363, "step": 26876 }, { "epoch": 0.49128996289323124, "grad_norm": 7.48705751623344, "learning_rate": 5.38382649649672e-06, "loss": 17.7163, "step": 26877 }, { "epoch": 0.4913082420896777, "grad_norm": 6.054534722697527, "learning_rate": 5.383531357759837e-06, "loss": 17.2928, "step": 26878 }, { "epoch": 0.49132652128612425, "grad_norm": 5.686697007101629, "learning_rate": 5.383236217678709e-06, "loss": 16.9443, "step": 26879 }, { "epoch": 0.4913448004825708, "grad_norm": 5.154853867221452, "learning_rate": 5.382941076254372e-06, "loss": 17.0009, "step": 26880 }, { "epoch": 0.4913630796790173, "grad_norm": 9.232364566283492, "learning_rate": 5.38264593348786e-06, "loss": 18.3791, "step": 26881 }, { "epoch": 0.49138135887546386, "grad_norm": 5.6116698874649895, "learning_rate": 5.382350789380207e-06, "loss": 17.5436, "step": 26882 }, { "epoch": 0.49139963807191034, "grad_norm": 5.797979642885264, "learning_rate": 5.3820556439324476e-06, "loss": 17.2975, "step": 26883 }, { "epoch": 0.4914179172683569, "grad_norm": 6.644032793815194, "learning_rate": 5.381760497145617e-06, "loss": 17.4729, "step": 26884 }, { "epoch": 0.4914361964648034, "grad_norm": 6.307802392799059, "learning_rate": 5.381465349020749e-06, "loss": 17.5992, "step": 26885 }, { "epoch": 0.49145447566124995, "grad_norm": 6.2488141686992265, "learning_rate": 5.381170199558876e-06, "loss": 17.6261, "step": 26886 }, { "epoch": 0.4914727548576965, "grad_norm": 6.195354067715704, "learning_rate": 5.380875048761038e-06, "loss": 17.6299, "step": 26887 }, { "epoch": 0.49149103405414296, "grad_norm": 7.809291432483063, "learning_rate": 5.380579896628263e-06, "loss": 18.0773, "step": 26888 }, { "epoch": 0.4915093132505895, "grad_norm": 7.261728012854263, "learning_rate": 5.38028474316159e-06, "loss": 18.1113, "step": 26889 }, { "epoch": 0.49152759244703603, "grad_norm": 6.174152541252177, "learning_rate": 5.379989588362052e-06, "loss": 17.3604, "step": 26890 }, { "epoch": 0.49154587164348257, "grad_norm": 7.053717253465543, "learning_rate": 5.379694432230682e-06, "loss": 17.4238, "step": 26891 }, { "epoch": 0.4915641508399291, "grad_norm": 6.163030691150242, "learning_rate": 5.379399274768517e-06, "loss": 17.7023, "step": 26892 }, { "epoch": 0.4915824300363756, "grad_norm": 7.085142398701097, "learning_rate": 5.37910411597659e-06, "loss": 18.0284, "step": 26893 }, { "epoch": 0.4916007092328221, "grad_norm": 7.466436844686998, "learning_rate": 5.378808955855937e-06, "loss": 18.1061, "step": 26894 }, { "epoch": 0.49161898842926866, "grad_norm": 6.054243917307078, "learning_rate": 5.378513794407591e-06, "loss": 17.3603, "step": 26895 }, { "epoch": 0.4916372676257152, "grad_norm": 6.703002079725329, "learning_rate": 5.378218631632585e-06, "loss": 17.7708, "step": 26896 }, { "epoch": 0.49165554682216167, "grad_norm": 7.644869572406129, "learning_rate": 5.377923467531958e-06, "loss": 18.1343, "step": 26897 }, { "epoch": 0.4916738260186082, "grad_norm": 6.135790011801075, "learning_rate": 5.3776283021067435e-06, "loss": 17.4194, "step": 26898 }, { "epoch": 0.49169210521505474, "grad_norm": 5.248274801099542, "learning_rate": 5.37733313535797e-06, "loss": 16.8469, "step": 26899 }, { "epoch": 0.4917103844115013, "grad_norm": 5.1752921146963855, "learning_rate": 5.3770379672866805e-06, "loss": 16.9588, "step": 26900 }, { "epoch": 0.4917286636079478, "grad_norm": 6.878734534908685, "learning_rate": 5.376742797893905e-06, "loss": 17.795, "step": 26901 }, { "epoch": 0.4917469428043943, "grad_norm": 6.765103844627631, "learning_rate": 5.376447627180677e-06, "loss": 17.5713, "step": 26902 }, { "epoch": 0.49176522200084083, "grad_norm": 7.373925014002523, "learning_rate": 5.376152455148034e-06, "loss": 17.9319, "step": 26903 }, { "epoch": 0.49178350119728736, "grad_norm": 6.974871017734401, "learning_rate": 5.375857281797008e-06, "loss": 17.7886, "step": 26904 }, { "epoch": 0.4918017803937339, "grad_norm": 7.047464959961935, "learning_rate": 5.375562107128636e-06, "loss": 17.8694, "step": 26905 }, { "epoch": 0.49182005959018044, "grad_norm": 6.203225308572396, "learning_rate": 5.375266931143951e-06, "loss": 17.5135, "step": 26906 }, { "epoch": 0.4918383387866269, "grad_norm": 5.6146931985286495, "learning_rate": 5.374971753843988e-06, "loss": 17.0993, "step": 26907 }, { "epoch": 0.49185661798307345, "grad_norm": 5.788173869878248, "learning_rate": 5.374676575229782e-06, "loss": 17.3109, "step": 26908 }, { "epoch": 0.49187489717952, "grad_norm": 6.580879383156743, "learning_rate": 5.374381395302367e-06, "loss": 17.4365, "step": 26909 }, { "epoch": 0.4918931763759665, "grad_norm": 5.662605690686466, "learning_rate": 5.374086214062777e-06, "loss": 17.2884, "step": 26910 }, { "epoch": 0.49191145557241306, "grad_norm": 7.698035315986826, "learning_rate": 5.373791031512048e-06, "loss": 18.2263, "step": 26911 }, { "epoch": 0.49192973476885954, "grad_norm": 6.156024129877999, "learning_rate": 5.373495847651214e-06, "loss": 17.47, "step": 26912 }, { "epoch": 0.4919480139653061, "grad_norm": 6.802171153383905, "learning_rate": 5.373200662481308e-06, "loss": 17.6954, "step": 26913 }, { "epoch": 0.4919662931617526, "grad_norm": 6.636026493710453, "learning_rate": 5.372905476003368e-06, "loss": 17.6593, "step": 26914 }, { "epoch": 0.49198457235819915, "grad_norm": 6.484582377149246, "learning_rate": 5.372610288218426e-06, "loss": 17.4478, "step": 26915 }, { "epoch": 0.4920028515546457, "grad_norm": 6.15595223056846, "learning_rate": 5.3723150991275175e-06, "loss": 17.363, "step": 26916 }, { "epoch": 0.49202113075109216, "grad_norm": 7.695587024505982, "learning_rate": 5.372019908731676e-06, "loss": 18.4037, "step": 26917 }, { "epoch": 0.4920394099475387, "grad_norm": 6.798574774437424, "learning_rate": 5.371724717031938e-06, "loss": 17.4892, "step": 26918 }, { "epoch": 0.49205768914398523, "grad_norm": 6.815633024814039, "learning_rate": 5.371429524029337e-06, "loss": 17.726, "step": 26919 }, { "epoch": 0.49207596834043177, "grad_norm": 6.759571646412667, "learning_rate": 5.3711343297249065e-06, "loss": 17.4205, "step": 26920 }, { "epoch": 0.4920942475368783, "grad_norm": 7.2400862585694945, "learning_rate": 5.370839134119683e-06, "loss": 17.8043, "step": 26921 }, { "epoch": 0.4921125267333248, "grad_norm": 7.084659912512047, "learning_rate": 5.370543937214701e-06, "loss": 17.9527, "step": 26922 }, { "epoch": 0.4921308059297713, "grad_norm": 6.862495073162513, "learning_rate": 5.370248739010995e-06, "loss": 17.8831, "step": 26923 }, { "epoch": 0.49214908512621786, "grad_norm": 5.968072786265508, "learning_rate": 5.369953539509598e-06, "loss": 17.5994, "step": 26924 }, { "epoch": 0.4921673643226644, "grad_norm": 6.376872240610247, "learning_rate": 5.369658338711547e-06, "loss": 17.3389, "step": 26925 }, { "epoch": 0.4921856435191109, "grad_norm": 6.258884787702053, "learning_rate": 5.369363136617875e-06, "loss": 17.4756, "step": 26926 }, { "epoch": 0.4922039227155574, "grad_norm": 5.857006205686026, "learning_rate": 5.369067933229617e-06, "loss": 17.5055, "step": 26927 }, { "epoch": 0.49222220191200394, "grad_norm": 6.134324532429734, "learning_rate": 5.368772728547809e-06, "loss": 17.4956, "step": 26928 }, { "epoch": 0.4922404811084505, "grad_norm": 6.484942194754365, "learning_rate": 5.368477522573484e-06, "loss": 17.6194, "step": 26929 }, { "epoch": 0.492258760304897, "grad_norm": 5.237759670925472, "learning_rate": 5.368182315307677e-06, "loss": 16.8716, "step": 26930 }, { "epoch": 0.4922770395013435, "grad_norm": 6.164913411336027, "learning_rate": 5.367887106751423e-06, "loss": 17.584, "step": 26931 }, { "epoch": 0.49229531869779003, "grad_norm": 4.863338566559338, "learning_rate": 5.367591896905757e-06, "loss": 16.9111, "step": 26932 }, { "epoch": 0.49231359789423657, "grad_norm": 6.055456709587119, "learning_rate": 5.367296685771713e-06, "loss": 17.3858, "step": 26933 }, { "epoch": 0.4923318770906831, "grad_norm": 4.91780885037892, "learning_rate": 5.367001473350327e-06, "loss": 16.8793, "step": 26934 }, { "epoch": 0.49235015628712964, "grad_norm": 7.4618920387291565, "learning_rate": 5.366706259642631e-06, "loss": 17.7985, "step": 26935 }, { "epoch": 0.4923684354835761, "grad_norm": 6.8229652616455425, "learning_rate": 5.366411044649663e-06, "loss": 17.5554, "step": 26936 }, { "epoch": 0.49238671468002265, "grad_norm": 6.244515700441109, "learning_rate": 5.366115828372454e-06, "loss": 17.2898, "step": 26937 }, { "epoch": 0.4924049938764692, "grad_norm": 5.985505017798252, "learning_rate": 5.365820610812042e-06, "loss": 17.4043, "step": 26938 }, { "epoch": 0.4924232730729157, "grad_norm": 5.535320221329392, "learning_rate": 5.365525391969463e-06, "loss": 17.3785, "step": 26939 }, { "epoch": 0.49244155226936226, "grad_norm": 7.899752924980689, "learning_rate": 5.365230171845746e-06, "loss": 18.0108, "step": 26940 }, { "epoch": 0.49245983146580874, "grad_norm": 6.514524779167289, "learning_rate": 5.364934950441929e-06, "loss": 17.4425, "step": 26941 }, { "epoch": 0.4924781106622553, "grad_norm": 6.0603258145956, "learning_rate": 5.364639727759047e-06, "loss": 17.1233, "step": 26942 }, { "epoch": 0.4924963898587018, "grad_norm": 6.512603450804955, "learning_rate": 5.364344503798136e-06, "loss": 17.5404, "step": 26943 }, { "epoch": 0.49251466905514835, "grad_norm": 8.761079644741475, "learning_rate": 5.3640492785602285e-06, "loss": 17.5778, "step": 26944 }, { "epoch": 0.4925329482515949, "grad_norm": 6.417406889031707, "learning_rate": 5.363754052046357e-06, "loss": 17.3514, "step": 26945 }, { "epoch": 0.49255122744804136, "grad_norm": 5.894893632438456, "learning_rate": 5.3634588242575625e-06, "loss": 17.3609, "step": 26946 }, { "epoch": 0.4925695066444879, "grad_norm": 6.449317455126374, "learning_rate": 5.363163595194876e-06, "loss": 17.5889, "step": 26947 }, { "epoch": 0.49258778584093443, "grad_norm": 7.35069890110503, "learning_rate": 5.362868364859332e-06, "loss": 17.8098, "step": 26948 }, { "epoch": 0.49260606503738097, "grad_norm": 7.126230057637257, "learning_rate": 5.362573133251965e-06, "loss": 17.7459, "step": 26949 }, { "epoch": 0.4926243442338275, "grad_norm": 6.235345778518111, "learning_rate": 5.362277900373811e-06, "loss": 17.3931, "step": 26950 }, { "epoch": 0.492642623430274, "grad_norm": 6.933436583356739, "learning_rate": 5.361982666225905e-06, "loss": 17.5829, "step": 26951 }, { "epoch": 0.4926609026267205, "grad_norm": 7.389122953462225, "learning_rate": 5.361687430809282e-06, "loss": 18.1202, "step": 26952 }, { "epoch": 0.49267918182316706, "grad_norm": 7.1692490912883144, "learning_rate": 5.361392194124973e-06, "loss": 17.9091, "step": 26953 }, { "epoch": 0.4926974610196136, "grad_norm": 8.591531879847942, "learning_rate": 5.361096956174018e-06, "loss": 17.9691, "step": 26954 }, { "epoch": 0.4927157402160601, "grad_norm": 6.527397017102309, "learning_rate": 5.360801716957449e-06, "loss": 17.7767, "step": 26955 }, { "epoch": 0.4927340194125066, "grad_norm": 6.639954028183422, "learning_rate": 5.360506476476303e-06, "loss": 17.6142, "step": 26956 }, { "epoch": 0.49275229860895314, "grad_norm": 6.465569336463757, "learning_rate": 5.360211234731612e-06, "loss": 17.6567, "step": 26957 }, { "epoch": 0.4927705778053997, "grad_norm": 7.161281066801649, "learning_rate": 5.359915991724409e-06, "loss": 18.3439, "step": 26958 }, { "epoch": 0.4927888570018462, "grad_norm": 5.810592162886852, "learning_rate": 5.359620747455734e-06, "loss": 17.2661, "step": 26959 }, { "epoch": 0.49280713619829275, "grad_norm": 6.639986391388215, "learning_rate": 5.359325501926621e-06, "loss": 17.945, "step": 26960 }, { "epoch": 0.49282541539473923, "grad_norm": 5.685433823811523, "learning_rate": 5.359030255138101e-06, "loss": 17.2936, "step": 26961 }, { "epoch": 0.49284369459118577, "grad_norm": 5.729852340163938, "learning_rate": 5.358735007091212e-06, "loss": 17.2307, "step": 26962 }, { "epoch": 0.4928619737876323, "grad_norm": 6.187144348372903, "learning_rate": 5.358439757786989e-06, "loss": 17.2237, "step": 26963 }, { "epoch": 0.49288025298407884, "grad_norm": 4.332550610332634, "learning_rate": 5.358144507226464e-06, "loss": 16.6847, "step": 26964 }, { "epoch": 0.4928985321805253, "grad_norm": 4.863894359925461, "learning_rate": 5.357849255410674e-06, "loss": 16.7782, "step": 26965 }, { "epoch": 0.49291681137697185, "grad_norm": 6.182878907863092, "learning_rate": 5.357554002340654e-06, "loss": 17.2337, "step": 26966 }, { "epoch": 0.4929350905734184, "grad_norm": 5.526272701074921, "learning_rate": 5.357258748017437e-06, "loss": 17.3214, "step": 26967 }, { "epoch": 0.4929533697698649, "grad_norm": 7.613455418716324, "learning_rate": 5.3569634924420595e-06, "loss": 18.3149, "step": 26968 }, { "epoch": 0.49297164896631146, "grad_norm": 6.67763283935045, "learning_rate": 5.356668235615557e-06, "loss": 17.8365, "step": 26969 }, { "epoch": 0.49298992816275794, "grad_norm": 5.913927222305072, "learning_rate": 5.356372977538964e-06, "loss": 17.6074, "step": 26970 }, { "epoch": 0.4930082073592045, "grad_norm": 8.135030482059443, "learning_rate": 5.356077718213313e-06, "loss": 18.0704, "step": 26971 }, { "epoch": 0.493026486555651, "grad_norm": 5.8973378186105725, "learning_rate": 5.35578245763964e-06, "loss": 17.1145, "step": 26972 }, { "epoch": 0.49304476575209755, "grad_norm": 14.303282257554455, "learning_rate": 5.35548719581898e-06, "loss": 17.8257, "step": 26973 }, { "epoch": 0.4930630449485441, "grad_norm": 5.7261448376667765, "learning_rate": 5.35519193275237e-06, "loss": 17.0845, "step": 26974 }, { "epoch": 0.49308132414499056, "grad_norm": 5.512857713231104, "learning_rate": 5.354896668440843e-06, "loss": 17.2451, "step": 26975 }, { "epoch": 0.4930996033414371, "grad_norm": 5.209464948503143, "learning_rate": 5.354601402885434e-06, "loss": 16.8178, "step": 26976 }, { "epoch": 0.49311788253788363, "grad_norm": 6.831800009675839, "learning_rate": 5.3543061360871774e-06, "loss": 17.8316, "step": 26977 }, { "epoch": 0.49313616173433017, "grad_norm": 6.310170617315658, "learning_rate": 5.3540108680471085e-06, "loss": 17.6085, "step": 26978 }, { "epoch": 0.4931544409307767, "grad_norm": 7.249291216101069, "learning_rate": 5.353715598766262e-06, "loss": 18.0408, "step": 26979 }, { "epoch": 0.4931727201272232, "grad_norm": 5.835547523451604, "learning_rate": 5.353420328245673e-06, "loss": 17.244, "step": 26980 }, { "epoch": 0.4931909993236697, "grad_norm": 6.747052172227247, "learning_rate": 5.353125056486377e-06, "loss": 17.5503, "step": 26981 }, { "epoch": 0.49320927852011626, "grad_norm": 5.787707208088663, "learning_rate": 5.352829783489407e-06, "loss": 17.3794, "step": 26982 }, { "epoch": 0.4932275577165628, "grad_norm": 7.077291862170868, "learning_rate": 5.3525345092558e-06, "loss": 17.6171, "step": 26983 }, { "epoch": 0.49324583691300933, "grad_norm": 8.164761075820138, "learning_rate": 5.352239233786592e-06, "loss": 17.6073, "step": 26984 }, { "epoch": 0.4932641161094558, "grad_norm": 7.161564110869427, "learning_rate": 5.351943957082813e-06, "loss": 17.7869, "step": 26985 }, { "epoch": 0.49328239530590234, "grad_norm": 7.7619675751750625, "learning_rate": 5.351648679145502e-06, "loss": 18.0181, "step": 26986 }, { "epoch": 0.4933006745023489, "grad_norm": 5.668140917595582, "learning_rate": 5.351353399975694e-06, "loss": 17.3054, "step": 26987 }, { "epoch": 0.4933189536987954, "grad_norm": 6.3856548032983405, "learning_rate": 5.351058119574423e-06, "loss": 17.4187, "step": 26988 }, { "epoch": 0.49333723289524195, "grad_norm": 5.526415424467501, "learning_rate": 5.3507628379427225e-06, "loss": 17.179, "step": 26989 }, { "epoch": 0.49335551209168843, "grad_norm": 6.346497051201486, "learning_rate": 5.3504675550816286e-06, "loss": 17.575, "step": 26990 }, { "epoch": 0.49337379128813497, "grad_norm": 6.980587862419724, "learning_rate": 5.350172270992177e-06, "loss": 17.946, "step": 26991 }, { "epoch": 0.4933920704845815, "grad_norm": 7.803761180025331, "learning_rate": 5.3498769856754e-06, "loss": 17.7162, "step": 26992 }, { "epoch": 0.49341034968102804, "grad_norm": 6.661552136135507, "learning_rate": 5.349581699132337e-06, "loss": 17.7463, "step": 26993 }, { "epoch": 0.4934286288774746, "grad_norm": 5.6460867041461125, "learning_rate": 5.34928641136402e-06, "loss": 17.1434, "step": 26994 }, { "epoch": 0.49344690807392105, "grad_norm": 6.079734936911856, "learning_rate": 5.348991122371484e-06, "loss": 17.5019, "step": 26995 }, { "epoch": 0.4934651872703676, "grad_norm": 5.589034062581712, "learning_rate": 5.3486958321557645e-06, "loss": 17.1942, "step": 26996 }, { "epoch": 0.4934834664668141, "grad_norm": 6.288894942471698, "learning_rate": 5.348400540717896e-06, "loss": 17.3222, "step": 26997 }, { "epoch": 0.49350174566326066, "grad_norm": 5.079433526195749, "learning_rate": 5.3481052480589154e-06, "loss": 16.911, "step": 26998 }, { "epoch": 0.49352002485970714, "grad_norm": 5.801648085214462, "learning_rate": 5.347809954179853e-06, "loss": 17.2667, "step": 26999 }, { "epoch": 0.4935383040561537, "grad_norm": 6.783307260324162, "learning_rate": 5.34751465908175e-06, "loss": 17.6292, "step": 27000 }, { "epoch": 0.4935565832526002, "grad_norm": 7.3165225948765995, "learning_rate": 5.347219362765637e-06, "loss": 17.8569, "step": 27001 }, { "epoch": 0.49357486244904675, "grad_norm": 6.571744864750892, "learning_rate": 5.34692406523255e-06, "loss": 17.5595, "step": 27002 }, { "epoch": 0.4935931416454933, "grad_norm": 6.7247155088644375, "learning_rate": 5.3466287664835255e-06, "loss": 17.9137, "step": 27003 }, { "epoch": 0.49361142084193976, "grad_norm": 5.6204778914317535, "learning_rate": 5.3463334665195954e-06, "loss": 17.0609, "step": 27004 }, { "epoch": 0.4936297000383863, "grad_norm": 6.321485899260976, "learning_rate": 5.346038165341798e-06, "loss": 17.4114, "step": 27005 }, { "epoch": 0.49364797923483283, "grad_norm": 5.684925732587549, "learning_rate": 5.3457428629511665e-06, "loss": 17.2745, "step": 27006 }, { "epoch": 0.49366625843127937, "grad_norm": 6.471030352199631, "learning_rate": 5.345447559348736e-06, "loss": 17.2311, "step": 27007 }, { "epoch": 0.4936845376277259, "grad_norm": 6.386545510890274, "learning_rate": 5.345152254535542e-06, "loss": 17.4082, "step": 27008 }, { "epoch": 0.4937028168241724, "grad_norm": 6.72417967649249, "learning_rate": 5.344856948512619e-06, "loss": 17.8282, "step": 27009 }, { "epoch": 0.4937210960206189, "grad_norm": 6.224589284089164, "learning_rate": 5.344561641281003e-06, "loss": 17.2626, "step": 27010 }, { "epoch": 0.49373937521706546, "grad_norm": 6.127257620504393, "learning_rate": 5.344266332841728e-06, "loss": 17.2126, "step": 27011 }, { "epoch": 0.493757654413512, "grad_norm": 7.429251140891445, "learning_rate": 5.343971023195829e-06, "loss": 18.1963, "step": 27012 }, { "epoch": 0.49377593360995853, "grad_norm": 6.348298199733702, "learning_rate": 5.343675712344342e-06, "loss": 17.7133, "step": 27013 }, { "epoch": 0.493794212806405, "grad_norm": 6.316016192402913, "learning_rate": 5.343380400288301e-06, "loss": 17.4648, "step": 27014 }, { "epoch": 0.49381249200285154, "grad_norm": 7.458585796933602, "learning_rate": 5.343085087028742e-06, "loss": 18.0657, "step": 27015 }, { "epoch": 0.4938307711992981, "grad_norm": 6.686006564372484, "learning_rate": 5.3427897725667e-06, "loss": 17.5928, "step": 27016 }, { "epoch": 0.4938490503957446, "grad_norm": 7.845928481052563, "learning_rate": 5.342494456903208e-06, "loss": 18.0915, "step": 27017 }, { "epoch": 0.49386732959219115, "grad_norm": 7.656002154062699, "learning_rate": 5.342199140039303e-06, "loss": 17.3364, "step": 27018 }, { "epoch": 0.49388560878863763, "grad_norm": 5.391532742953372, "learning_rate": 5.3419038219760215e-06, "loss": 17.1496, "step": 27019 }, { "epoch": 0.49390388798508417, "grad_norm": 5.359108971593141, "learning_rate": 5.341608502714395e-06, "loss": 17.1226, "step": 27020 }, { "epoch": 0.4939221671815307, "grad_norm": 7.004731176418986, "learning_rate": 5.34131318225546e-06, "loss": 17.8174, "step": 27021 }, { "epoch": 0.49394044637797724, "grad_norm": 5.484664541987583, "learning_rate": 5.341017860600253e-06, "loss": 17.1402, "step": 27022 }, { "epoch": 0.4939587255744238, "grad_norm": 6.371429794989805, "learning_rate": 5.340722537749807e-06, "loss": 17.494, "step": 27023 }, { "epoch": 0.49397700477087025, "grad_norm": 7.01453052158817, "learning_rate": 5.340427213705159e-06, "loss": 17.7144, "step": 27024 }, { "epoch": 0.4939952839673168, "grad_norm": 6.078159804931989, "learning_rate": 5.340131888467344e-06, "loss": 17.478, "step": 27025 }, { "epoch": 0.4940135631637633, "grad_norm": 6.5826178827557875, "learning_rate": 5.3398365620373935e-06, "loss": 17.4035, "step": 27026 }, { "epoch": 0.49403184236020986, "grad_norm": 6.41085703611692, "learning_rate": 5.339541234416348e-06, "loss": 17.4888, "step": 27027 }, { "epoch": 0.4940501215566564, "grad_norm": 6.471313237406635, "learning_rate": 5.3392459056052385e-06, "loss": 17.6197, "step": 27028 }, { "epoch": 0.4940684007531029, "grad_norm": 6.759620568912845, "learning_rate": 5.338950575605103e-06, "loss": 17.5151, "step": 27029 }, { "epoch": 0.4940866799495494, "grad_norm": 5.726145059844037, "learning_rate": 5.338655244416974e-06, "loss": 17.1632, "step": 27030 }, { "epoch": 0.49410495914599595, "grad_norm": 7.367143100038594, "learning_rate": 5.3383599120418865e-06, "loss": 17.8241, "step": 27031 }, { "epoch": 0.4941232383424425, "grad_norm": 8.401285293444744, "learning_rate": 5.338064578480879e-06, "loss": 18.5157, "step": 27032 }, { "epoch": 0.49414151753888896, "grad_norm": 7.261683111609722, "learning_rate": 5.3377692437349835e-06, "loss": 17.8886, "step": 27033 }, { "epoch": 0.4941597967353355, "grad_norm": 6.532766534634735, "learning_rate": 5.3374739078052366e-06, "loss": 17.487, "step": 27034 }, { "epoch": 0.49417807593178203, "grad_norm": 5.855344901640332, "learning_rate": 5.3371785706926725e-06, "loss": 17.1961, "step": 27035 }, { "epoch": 0.49419635512822857, "grad_norm": 6.000774744402495, "learning_rate": 5.336883232398326e-06, "loss": 17.4634, "step": 27036 }, { "epoch": 0.4942146343246751, "grad_norm": 6.4939483382731416, "learning_rate": 5.336587892923235e-06, "loss": 17.3718, "step": 27037 }, { "epoch": 0.4942329135211216, "grad_norm": 8.90333019290219, "learning_rate": 5.336292552268432e-06, "loss": 18.2913, "step": 27038 }, { "epoch": 0.4942511927175681, "grad_norm": 5.384420330075554, "learning_rate": 5.335997210434953e-06, "loss": 17.1865, "step": 27039 }, { "epoch": 0.49426947191401466, "grad_norm": 6.191513432394301, "learning_rate": 5.33570186742383e-06, "loss": 17.3732, "step": 27040 }, { "epoch": 0.4942877511104612, "grad_norm": 6.0282047564718555, "learning_rate": 5.335406523236104e-06, "loss": 17.5049, "step": 27041 }, { "epoch": 0.49430603030690773, "grad_norm": 7.34642269136078, "learning_rate": 5.3351111778728075e-06, "loss": 17.7227, "step": 27042 }, { "epoch": 0.4943243095033542, "grad_norm": 6.347294744552038, "learning_rate": 5.334815831334975e-06, "loss": 17.4325, "step": 27043 }, { "epoch": 0.49434258869980074, "grad_norm": 5.738275868115558, "learning_rate": 5.334520483623641e-06, "loss": 17.1608, "step": 27044 }, { "epoch": 0.4943608678962473, "grad_norm": 6.983004953804565, "learning_rate": 5.334225134739841e-06, "loss": 17.4832, "step": 27045 }, { "epoch": 0.4943791470926938, "grad_norm": 5.8536804263533, "learning_rate": 5.333929784684614e-06, "loss": 17.2471, "step": 27046 }, { "epoch": 0.49439742628914035, "grad_norm": 6.337725509820828, "learning_rate": 5.333634433458989e-06, "loss": 17.3965, "step": 27047 }, { "epoch": 0.49441570548558683, "grad_norm": 5.4616529150627215, "learning_rate": 5.333339081064006e-06, "loss": 17.2323, "step": 27048 }, { "epoch": 0.49443398468203337, "grad_norm": 6.1375526015468305, "learning_rate": 5.3330437275006985e-06, "loss": 17.3927, "step": 27049 }, { "epoch": 0.4944522638784799, "grad_norm": 6.460811066663535, "learning_rate": 5.3327483727701015e-06, "loss": 17.4909, "step": 27050 }, { "epoch": 0.49447054307492644, "grad_norm": 7.0460329858769954, "learning_rate": 5.3324530168732495e-06, "loss": 17.8015, "step": 27051 }, { "epoch": 0.494488822271373, "grad_norm": 6.81539654948131, "learning_rate": 5.332157659811179e-06, "loss": 17.6895, "step": 27052 }, { "epoch": 0.49450710146781945, "grad_norm": 5.905972951436398, "learning_rate": 5.331862301584925e-06, "loss": 17.2996, "step": 27053 }, { "epoch": 0.494525380664266, "grad_norm": 6.84792720692525, "learning_rate": 5.331566942195522e-06, "loss": 17.8233, "step": 27054 }, { "epoch": 0.4945436598607125, "grad_norm": 6.546110597837149, "learning_rate": 5.331271581644005e-06, "loss": 17.1846, "step": 27055 }, { "epoch": 0.49456193905715906, "grad_norm": 6.034470999865652, "learning_rate": 5.3309762199314115e-06, "loss": 17.1926, "step": 27056 }, { "epoch": 0.4945802182536056, "grad_norm": 6.678728078849492, "learning_rate": 5.330680857058774e-06, "loss": 17.3395, "step": 27057 }, { "epoch": 0.4945984974500521, "grad_norm": 5.890798850145908, "learning_rate": 5.330385493027128e-06, "loss": 17.2562, "step": 27058 }, { "epoch": 0.4946167766464986, "grad_norm": 5.659396926385291, "learning_rate": 5.330090127837511e-06, "loss": 17.1656, "step": 27059 }, { "epoch": 0.49463505584294515, "grad_norm": 6.4122559835855, "learning_rate": 5.329794761490957e-06, "loss": 17.6316, "step": 27060 }, { "epoch": 0.4946533350393917, "grad_norm": 7.444983281994059, "learning_rate": 5.329499393988501e-06, "loss": 17.8522, "step": 27061 }, { "epoch": 0.4946716142358382, "grad_norm": 5.868080111913281, "learning_rate": 5.3292040253311774e-06, "loss": 17.0549, "step": 27062 }, { "epoch": 0.4946898934322847, "grad_norm": 6.43927321499535, "learning_rate": 5.328908655520022e-06, "loss": 17.5224, "step": 27063 }, { "epoch": 0.49470817262873124, "grad_norm": 6.275055806201355, "learning_rate": 5.32861328455607e-06, "loss": 17.5626, "step": 27064 }, { "epoch": 0.49472645182517777, "grad_norm": 5.874244766501496, "learning_rate": 5.328317912440358e-06, "loss": 17.3449, "step": 27065 }, { "epoch": 0.4947447310216243, "grad_norm": 6.002735749726299, "learning_rate": 5.328022539173919e-06, "loss": 17.4166, "step": 27066 }, { "epoch": 0.4947630102180708, "grad_norm": 7.078313016909611, "learning_rate": 5.327727164757791e-06, "loss": 17.8288, "step": 27067 }, { "epoch": 0.4947812894145173, "grad_norm": 7.211479667554601, "learning_rate": 5.3274317891930075e-06, "loss": 17.7986, "step": 27068 }, { "epoch": 0.49479956861096386, "grad_norm": 6.49780210498228, "learning_rate": 5.327136412480603e-06, "loss": 17.5079, "step": 27069 }, { "epoch": 0.4948178478074104, "grad_norm": 4.962388526502773, "learning_rate": 5.3268410346216146e-06, "loss": 17.0274, "step": 27070 }, { "epoch": 0.49483612700385693, "grad_norm": 8.588909027113303, "learning_rate": 5.326545655617077e-06, "loss": 18.572, "step": 27071 }, { "epoch": 0.4948544062003034, "grad_norm": 7.909936946394288, "learning_rate": 5.326250275468023e-06, "loss": 17.8187, "step": 27072 }, { "epoch": 0.49487268539674995, "grad_norm": 7.79164215084259, "learning_rate": 5.325954894175491e-06, "loss": 18.0844, "step": 27073 }, { "epoch": 0.4948909645931965, "grad_norm": 6.507166316716315, "learning_rate": 5.325659511740518e-06, "loss": 17.4294, "step": 27074 }, { "epoch": 0.494909243789643, "grad_norm": 7.045042200629096, "learning_rate": 5.325364128164134e-06, "loss": 17.4291, "step": 27075 }, { "epoch": 0.49492752298608955, "grad_norm": 6.5913797952025615, "learning_rate": 5.325068743447378e-06, "loss": 17.5557, "step": 27076 }, { "epoch": 0.49494580218253603, "grad_norm": 7.157139359265984, "learning_rate": 5.324773357591284e-06, "loss": 17.8728, "step": 27077 }, { "epoch": 0.49496408137898257, "grad_norm": 6.286609264768648, "learning_rate": 5.324477970596887e-06, "loss": 17.2988, "step": 27078 }, { "epoch": 0.4949823605754291, "grad_norm": 6.139125567440642, "learning_rate": 5.324182582465224e-06, "loss": 17.442, "step": 27079 }, { "epoch": 0.49500063977187564, "grad_norm": 5.286657223187777, "learning_rate": 5.323887193197328e-06, "loss": 17.0214, "step": 27080 }, { "epoch": 0.4950189189683222, "grad_norm": 6.0313338055455175, "learning_rate": 5.323591802794237e-06, "loss": 17.1108, "step": 27081 }, { "epoch": 0.49503719816476865, "grad_norm": 6.663827894606453, "learning_rate": 5.323296411256983e-06, "loss": 17.822, "step": 27082 }, { "epoch": 0.4950554773612152, "grad_norm": 5.749880403608699, "learning_rate": 5.323001018586604e-06, "loss": 17.1773, "step": 27083 }, { "epoch": 0.4950737565576617, "grad_norm": 5.834371956386388, "learning_rate": 5.322705624784136e-06, "loss": 17.2421, "step": 27084 }, { "epoch": 0.49509203575410826, "grad_norm": 6.237005739814264, "learning_rate": 5.32241022985061e-06, "loss": 17.2866, "step": 27085 }, { "epoch": 0.4951103149505548, "grad_norm": 5.823408136601172, "learning_rate": 5.322114833787066e-06, "loss": 17.2565, "step": 27086 }, { "epoch": 0.4951285941470013, "grad_norm": 7.479932089479094, "learning_rate": 5.321819436594536e-06, "loss": 17.7947, "step": 27087 }, { "epoch": 0.4951468733434478, "grad_norm": 6.271746162433983, "learning_rate": 5.321524038274058e-06, "loss": 17.3551, "step": 27088 }, { "epoch": 0.49516515253989435, "grad_norm": 5.516386190837085, "learning_rate": 5.321228638826667e-06, "loss": 17.2787, "step": 27089 }, { "epoch": 0.4951834317363409, "grad_norm": 6.51806980194292, "learning_rate": 5.320933238253394e-06, "loss": 17.2764, "step": 27090 }, { "epoch": 0.4952017109327874, "grad_norm": 6.505095705765514, "learning_rate": 5.320637836555282e-06, "loss": 17.3872, "step": 27091 }, { "epoch": 0.4952199901292339, "grad_norm": 6.008812418764353, "learning_rate": 5.32034243373336e-06, "loss": 17.4496, "step": 27092 }, { "epoch": 0.49523826932568044, "grad_norm": 6.132994346673277, "learning_rate": 5.320047029788665e-06, "loss": 17.3415, "step": 27093 }, { "epoch": 0.49525654852212697, "grad_norm": 6.76828084705253, "learning_rate": 5.319751624722235e-06, "loss": 17.7391, "step": 27094 }, { "epoch": 0.4952748277185735, "grad_norm": 4.852920399836946, "learning_rate": 5.319456218535102e-06, "loss": 16.8498, "step": 27095 }, { "epoch": 0.49529310691502004, "grad_norm": 8.37845213462456, "learning_rate": 5.3191608112283026e-06, "loss": 18.22, "step": 27096 }, { "epoch": 0.4953113861114665, "grad_norm": 5.510644493946002, "learning_rate": 5.318865402802872e-06, "loss": 17.079, "step": 27097 }, { "epoch": 0.49532966530791306, "grad_norm": 5.823363752835767, "learning_rate": 5.318569993259848e-06, "loss": 17.3107, "step": 27098 }, { "epoch": 0.4953479445043596, "grad_norm": 5.763510154128105, "learning_rate": 5.31827458260026e-06, "loss": 17.5461, "step": 27099 }, { "epoch": 0.49536622370080613, "grad_norm": 6.918820810921189, "learning_rate": 5.317979170825149e-06, "loss": 17.4873, "step": 27100 }, { "epoch": 0.4953845028972526, "grad_norm": 7.989744572692749, "learning_rate": 5.31768375793555e-06, "loss": 18.4143, "step": 27101 }, { "epoch": 0.49540278209369915, "grad_norm": 7.214879095416795, "learning_rate": 5.317388343932497e-06, "loss": 17.8851, "step": 27102 }, { "epoch": 0.4954210612901457, "grad_norm": 5.8630533633346245, "learning_rate": 5.3170929288170235e-06, "loss": 17.2316, "step": 27103 }, { "epoch": 0.4954393404865922, "grad_norm": 5.838223225066707, "learning_rate": 5.316797512590166e-06, "loss": 17.0889, "step": 27104 }, { "epoch": 0.49545761968303875, "grad_norm": 8.490065189147296, "learning_rate": 5.316502095252964e-06, "loss": 17.939, "step": 27105 }, { "epoch": 0.49547589887948523, "grad_norm": 6.947018162341639, "learning_rate": 5.316206676806448e-06, "loss": 17.8263, "step": 27106 }, { "epoch": 0.49549417807593177, "grad_norm": 6.530436899887638, "learning_rate": 5.315911257251655e-06, "loss": 17.5459, "step": 27107 }, { "epoch": 0.4955124572723783, "grad_norm": 5.139972165119779, "learning_rate": 5.31561583658962e-06, "loss": 16.8613, "step": 27108 }, { "epoch": 0.49553073646882484, "grad_norm": 5.007554461914014, "learning_rate": 5.315320414821379e-06, "loss": 16.9549, "step": 27109 }, { "epoch": 0.4955490156652714, "grad_norm": 6.531020566255451, "learning_rate": 5.3150249919479676e-06, "loss": 17.5633, "step": 27110 }, { "epoch": 0.49556729486171786, "grad_norm": 6.7268024899273655, "learning_rate": 5.314729567970421e-06, "loss": 17.2107, "step": 27111 }, { "epoch": 0.4955855740581644, "grad_norm": 5.763513281309919, "learning_rate": 5.314434142889776e-06, "loss": 17.3185, "step": 27112 }, { "epoch": 0.4956038532546109, "grad_norm": 5.735780970376321, "learning_rate": 5.314138716707063e-06, "loss": 16.6206, "step": 27113 }, { "epoch": 0.49562213245105746, "grad_norm": 6.094915498236935, "learning_rate": 5.313843289423324e-06, "loss": 17.5662, "step": 27114 }, { "epoch": 0.495640411647504, "grad_norm": 7.193393884445647, "learning_rate": 5.313547861039592e-06, "loss": 17.6936, "step": 27115 }, { "epoch": 0.4956586908439505, "grad_norm": 6.116919446868163, "learning_rate": 5.3132524315569e-06, "loss": 17.6923, "step": 27116 }, { "epoch": 0.495676970040397, "grad_norm": 6.071783068354307, "learning_rate": 5.3129570009762864e-06, "loss": 17.4997, "step": 27117 }, { "epoch": 0.49569524923684355, "grad_norm": 7.36498956045436, "learning_rate": 5.312661569298784e-06, "loss": 17.9898, "step": 27118 }, { "epoch": 0.4957135284332901, "grad_norm": 6.513731500749691, "learning_rate": 5.312366136525433e-06, "loss": 17.376, "step": 27119 }, { "epoch": 0.4957318076297366, "grad_norm": 5.943945414772952, "learning_rate": 5.312070702657264e-06, "loss": 16.9633, "step": 27120 }, { "epoch": 0.4957500868261831, "grad_norm": 7.317799352604877, "learning_rate": 5.311775267695314e-06, "loss": 17.541, "step": 27121 }, { "epoch": 0.49576836602262964, "grad_norm": 6.678806429324938, "learning_rate": 5.311479831640619e-06, "loss": 17.6554, "step": 27122 }, { "epoch": 0.49578664521907617, "grad_norm": 6.422061144880449, "learning_rate": 5.311184394494214e-06, "loss": 17.6898, "step": 27123 }, { "epoch": 0.4958049244155227, "grad_norm": 7.257743893993381, "learning_rate": 5.310888956257135e-06, "loss": 17.7341, "step": 27124 }, { "epoch": 0.49582320361196924, "grad_norm": 5.938111724825104, "learning_rate": 5.310593516930418e-06, "loss": 17.3035, "step": 27125 }, { "epoch": 0.4958414828084157, "grad_norm": 6.100282441446354, "learning_rate": 5.310298076515096e-06, "loss": 17.5533, "step": 27126 }, { "epoch": 0.49585976200486226, "grad_norm": 6.40958138518579, "learning_rate": 5.310002635012207e-06, "loss": 17.5581, "step": 27127 }, { "epoch": 0.4958780412013088, "grad_norm": 6.904282606317481, "learning_rate": 5.309707192422786e-06, "loss": 17.818, "step": 27128 }, { "epoch": 0.49589632039775533, "grad_norm": 8.793520044325492, "learning_rate": 5.309411748747869e-06, "loss": 17.8181, "step": 27129 }, { "epoch": 0.49591459959420187, "grad_norm": 6.398017389891622, "learning_rate": 5.309116303988488e-06, "loss": 17.5039, "step": 27130 }, { "epoch": 0.49593287879064835, "grad_norm": 7.251971439581628, "learning_rate": 5.308820858145682e-06, "loss": 17.5048, "step": 27131 }, { "epoch": 0.4959511579870949, "grad_norm": 6.404215997284142, "learning_rate": 5.308525411220488e-06, "loss": 17.4336, "step": 27132 }, { "epoch": 0.4959694371835414, "grad_norm": 7.729486686990656, "learning_rate": 5.3082299632139375e-06, "loss": 17.9189, "step": 27133 }, { "epoch": 0.49598771637998795, "grad_norm": 5.949653232636323, "learning_rate": 5.307934514127068e-06, "loss": 17.5042, "step": 27134 }, { "epoch": 0.49600599557643443, "grad_norm": 5.5871038029272935, "learning_rate": 5.3076390639609146e-06, "loss": 17.4189, "step": 27135 }, { "epoch": 0.49602427477288097, "grad_norm": 6.966548112460395, "learning_rate": 5.307343612716512e-06, "loss": 17.5541, "step": 27136 }, { "epoch": 0.4960425539693275, "grad_norm": 7.007021036075989, "learning_rate": 5.307048160394899e-06, "loss": 17.6978, "step": 27137 }, { "epoch": 0.49606083316577404, "grad_norm": 8.397875547905214, "learning_rate": 5.306752706997107e-06, "loss": 18.4, "step": 27138 }, { "epoch": 0.4960791123622206, "grad_norm": 6.045377634488545, "learning_rate": 5.306457252524176e-06, "loss": 17.4589, "step": 27139 }, { "epoch": 0.49609739155866706, "grad_norm": 6.331855818214131, "learning_rate": 5.306161796977134e-06, "loss": 17.2416, "step": 27140 }, { "epoch": 0.4961156707551136, "grad_norm": 7.340035417544372, "learning_rate": 5.305866340357024e-06, "loss": 17.6641, "step": 27141 }, { "epoch": 0.4961339499515601, "grad_norm": 6.171788505845319, "learning_rate": 5.30557088266488e-06, "loss": 17.1048, "step": 27142 }, { "epoch": 0.49615222914800666, "grad_norm": 5.457816041538035, "learning_rate": 5.305275423901737e-06, "loss": 17.3584, "step": 27143 }, { "epoch": 0.4961705083444532, "grad_norm": 7.613004108036592, "learning_rate": 5.304979964068628e-06, "loss": 18.1483, "step": 27144 }, { "epoch": 0.4961887875408997, "grad_norm": 5.481668594229304, "learning_rate": 5.3046845031665915e-06, "loss": 17.0934, "step": 27145 }, { "epoch": 0.4962070667373462, "grad_norm": 7.39994442611417, "learning_rate": 5.304389041196664e-06, "loss": 17.4173, "step": 27146 }, { "epoch": 0.49622534593379275, "grad_norm": 5.7025844735581455, "learning_rate": 5.304093578159877e-06, "loss": 17.2323, "step": 27147 }, { "epoch": 0.4962436251302393, "grad_norm": 7.30143375176874, "learning_rate": 5.303798114057269e-06, "loss": 18.0775, "step": 27148 }, { "epoch": 0.4962619043266858, "grad_norm": 5.696227811299683, "learning_rate": 5.3035026488898754e-06, "loss": 17.4022, "step": 27149 }, { "epoch": 0.4962801835231323, "grad_norm": 6.530643656227143, "learning_rate": 5.30320718265873e-06, "loss": 17.6637, "step": 27150 }, { "epoch": 0.49629846271957884, "grad_norm": 6.822978267413106, "learning_rate": 5.302911715364871e-06, "loss": 17.8557, "step": 27151 }, { "epoch": 0.4963167419160254, "grad_norm": 7.268598916926087, "learning_rate": 5.3026162470093335e-06, "loss": 17.7663, "step": 27152 }, { "epoch": 0.4963350211124719, "grad_norm": 7.921854843630647, "learning_rate": 5.30232077759315e-06, "loss": 18.2357, "step": 27153 }, { "epoch": 0.49635330030891844, "grad_norm": 6.040172703825464, "learning_rate": 5.302025307117361e-06, "loss": 17.5834, "step": 27154 }, { "epoch": 0.4963715795053649, "grad_norm": 6.33443303987812, "learning_rate": 5.301729835582998e-06, "loss": 17.5087, "step": 27155 }, { "epoch": 0.49638985870181146, "grad_norm": 6.399351837295014, "learning_rate": 5.301434362991099e-06, "loss": 17.5478, "step": 27156 }, { "epoch": 0.496408137898258, "grad_norm": 7.275944016496439, "learning_rate": 5.301138889342698e-06, "loss": 18.0614, "step": 27157 }, { "epoch": 0.49642641709470453, "grad_norm": 7.654295357271992, "learning_rate": 5.300843414638831e-06, "loss": 17.9039, "step": 27158 }, { "epoch": 0.49644469629115107, "grad_norm": 6.381527232148551, "learning_rate": 5.3005479388805335e-06, "loss": 17.747, "step": 27159 }, { "epoch": 0.49646297548759755, "grad_norm": 7.172154294659397, "learning_rate": 5.300252462068845e-06, "loss": 17.5675, "step": 27160 }, { "epoch": 0.4964812546840441, "grad_norm": 5.899237087590126, "learning_rate": 5.299956984204794e-06, "loss": 17.3641, "step": 27161 }, { "epoch": 0.4964995338804906, "grad_norm": 6.293315909695069, "learning_rate": 5.299661505289421e-06, "loss": 17.1642, "step": 27162 }, { "epoch": 0.49651781307693715, "grad_norm": 7.239253128344299, "learning_rate": 5.29936602532376e-06, "loss": 17.3627, "step": 27163 }, { "epoch": 0.4965360922733837, "grad_norm": 7.336105727193535, "learning_rate": 5.299070544308847e-06, "loss": 18.1346, "step": 27164 }, { "epoch": 0.49655437146983017, "grad_norm": 6.574231614732913, "learning_rate": 5.298775062245719e-06, "loss": 17.4154, "step": 27165 }, { "epoch": 0.4965726506662767, "grad_norm": 4.892042150079644, "learning_rate": 5.298479579135409e-06, "loss": 17.0709, "step": 27166 }, { "epoch": 0.49659092986272324, "grad_norm": 6.696837834447044, "learning_rate": 5.2981840949789546e-06, "loss": 17.3769, "step": 27167 }, { "epoch": 0.4966092090591698, "grad_norm": 7.3244109929235295, "learning_rate": 5.297888609777391e-06, "loss": 17.7439, "step": 27168 }, { "epoch": 0.49662748825561626, "grad_norm": 5.75513126188906, "learning_rate": 5.2975931235317525e-06, "loss": 17.3198, "step": 27169 }, { "epoch": 0.4966457674520628, "grad_norm": 6.675387476127053, "learning_rate": 5.297297636243077e-06, "loss": 17.27, "step": 27170 }, { "epoch": 0.4966640466485093, "grad_norm": 6.81580139447009, "learning_rate": 5.2970021479124e-06, "loss": 17.6338, "step": 27171 }, { "epoch": 0.49668232584495586, "grad_norm": 6.732773707828087, "learning_rate": 5.296706658540753e-06, "loss": 17.3781, "step": 27172 }, { "epoch": 0.4967006050414024, "grad_norm": 5.441919668441327, "learning_rate": 5.296411168129177e-06, "loss": 17.0593, "step": 27173 }, { "epoch": 0.4967188842378489, "grad_norm": 6.315453747522237, "learning_rate": 5.296115676678707e-06, "loss": 17.4357, "step": 27174 }, { "epoch": 0.4967371634342954, "grad_norm": 7.056538431335537, "learning_rate": 5.2958201841903754e-06, "loss": 17.6652, "step": 27175 }, { "epoch": 0.49675544263074195, "grad_norm": 5.821744196774932, "learning_rate": 5.295524690665221e-06, "loss": 17.2166, "step": 27176 }, { "epoch": 0.4967737218271885, "grad_norm": 7.502599944919671, "learning_rate": 5.295229196104277e-06, "loss": 18.0384, "step": 27177 }, { "epoch": 0.496792001023635, "grad_norm": 5.019815368437581, "learning_rate": 5.2949337005085795e-06, "loss": 17.0715, "step": 27178 }, { "epoch": 0.4968102802200815, "grad_norm": 6.0498780514001815, "learning_rate": 5.294638203879167e-06, "loss": 17.3159, "step": 27179 }, { "epoch": 0.49682855941652804, "grad_norm": 6.728427255070438, "learning_rate": 5.294342706217072e-06, "loss": 18.0954, "step": 27180 }, { "epoch": 0.4968468386129746, "grad_norm": 6.383029542631247, "learning_rate": 5.294047207523332e-06, "loss": 17.4091, "step": 27181 }, { "epoch": 0.4968651178094211, "grad_norm": 5.8332534112216745, "learning_rate": 5.293751707798981e-06, "loss": 17.1511, "step": 27182 }, { "epoch": 0.49688339700586764, "grad_norm": 5.88775996750707, "learning_rate": 5.293456207045056e-06, "loss": 17.2777, "step": 27183 }, { "epoch": 0.4969016762023141, "grad_norm": 7.75262316026169, "learning_rate": 5.293160705262594e-06, "loss": 17.8964, "step": 27184 }, { "epoch": 0.49691995539876066, "grad_norm": 6.520555094380181, "learning_rate": 5.292865202452628e-06, "loss": 17.6149, "step": 27185 }, { "epoch": 0.4969382345952072, "grad_norm": 6.358301365429968, "learning_rate": 5.2925696986161935e-06, "loss": 17.1833, "step": 27186 }, { "epoch": 0.49695651379165373, "grad_norm": 5.566572725779698, "learning_rate": 5.2922741937543294e-06, "loss": 17.2679, "step": 27187 }, { "epoch": 0.49697479298810027, "grad_norm": 6.276243680080803, "learning_rate": 5.2919786878680705e-06, "loss": 17.5834, "step": 27188 }, { "epoch": 0.49699307218454675, "grad_norm": 5.914734083121124, "learning_rate": 5.29168318095845e-06, "loss": 17.2533, "step": 27189 }, { "epoch": 0.4970113513809933, "grad_norm": 7.3116436347415945, "learning_rate": 5.291387673026505e-06, "loss": 17.3719, "step": 27190 }, { "epoch": 0.4970296305774398, "grad_norm": 6.664224589792451, "learning_rate": 5.291092164073273e-06, "loss": 17.7188, "step": 27191 }, { "epoch": 0.49704790977388635, "grad_norm": 7.468512860219787, "learning_rate": 5.290796654099787e-06, "loss": 17.5344, "step": 27192 }, { "epoch": 0.4970661889703329, "grad_norm": 5.7942198892057695, "learning_rate": 5.2905011431070845e-06, "loss": 17.0086, "step": 27193 }, { "epoch": 0.49708446816677937, "grad_norm": 7.135569698803995, "learning_rate": 5.2902056310962005e-06, "loss": 17.7024, "step": 27194 }, { "epoch": 0.4971027473632259, "grad_norm": 8.383877195301391, "learning_rate": 5.28991011806817e-06, "loss": 17.6119, "step": 27195 }, { "epoch": 0.49712102655967244, "grad_norm": 6.165860670145469, "learning_rate": 5.2896146040240305e-06, "loss": 17.3282, "step": 27196 }, { "epoch": 0.497139305756119, "grad_norm": 7.3823915368426185, "learning_rate": 5.289319088964817e-06, "loss": 17.7051, "step": 27197 }, { "epoch": 0.4971575849525655, "grad_norm": 5.654115886268558, "learning_rate": 5.289023572891567e-06, "loss": 17.1707, "step": 27198 }, { "epoch": 0.497175864149012, "grad_norm": 7.867968063707022, "learning_rate": 5.288728055805311e-06, "loss": 18.0214, "step": 27199 }, { "epoch": 0.49719414334545853, "grad_norm": 7.252312509773188, "learning_rate": 5.28843253770709e-06, "loss": 17.5912, "step": 27200 }, { "epoch": 0.49721242254190506, "grad_norm": 6.8954169769120055, "learning_rate": 5.288137018597939e-06, "loss": 17.8047, "step": 27201 }, { "epoch": 0.4972307017383516, "grad_norm": 7.892652262769014, "learning_rate": 5.287841498478892e-06, "loss": 17.9997, "step": 27202 }, { "epoch": 0.4972489809347981, "grad_norm": 5.391683692320803, "learning_rate": 5.287545977350985e-06, "loss": 17.2399, "step": 27203 }, { "epoch": 0.4972672601312446, "grad_norm": 7.162900747616963, "learning_rate": 5.287250455215254e-06, "loss": 17.7604, "step": 27204 }, { "epoch": 0.49728553932769115, "grad_norm": 6.745080042105251, "learning_rate": 5.2869549320727355e-06, "loss": 17.527, "step": 27205 }, { "epoch": 0.4973038185241377, "grad_norm": 7.4739556135528, "learning_rate": 5.286659407924465e-06, "loss": 17.9076, "step": 27206 }, { "epoch": 0.4973220977205842, "grad_norm": 6.454025515691937, "learning_rate": 5.286363882771478e-06, "loss": 17.8487, "step": 27207 }, { "epoch": 0.4973403769170307, "grad_norm": 5.179353665563951, "learning_rate": 5.2860683566148105e-06, "loss": 17.1066, "step": 27208 }, { "epoch": 0.49735865611347724, "grad_norm": 5.0208671925141815, "learning_rate": 5.285772829455499e-06, "loss": 17.0062, "step": 27209 }, { "epoch": 0.4973769353099238, "grad_norm": 6.331156785180926, "learning_rate": 5.285477301294577e-06, "loss": 17.5236, "step": 27210 }, { "epoch": 0.4973952145063703, "grad_norm": 6.954132012161971, "learning_rate": 5.2851817721330835e-06, "loss": 17.7691, "step": 27211 }, { "epoch": 0.49741349370281684, "grad_norm": 7.436982095131251, "learning_rate": 5.284886241972051e-06, "loss": 17.9956, "step": 27212 }, { "epoch": 0.4974317728992633, "grad_norm": 6.021238119244414, "learning_rate": 5.284590710812519e-06, "loss": 17.1315, "step": 27213 }, { "epoch": 0.49745005209570986, "grad_norm": 5.331703064694421, "learning_rate": 5.284295178655518e-06, "loss": 16.984, "step": 27214 }, { "epoch": 0.4974683312921564, "grad_norm": 6.639442860072865, "learning_rate": 5.283999645502091e-06, "loss": 17.5802, "step": 27215 }, { "epoch": 0.49748661048860293, "grad_norm": 6.296819913586566, "learning_rate": 5.283704111353267e-06, "loss": 17.7537, "step": 27216 }, { "epoch": 0.49750488968504947, "grad_norm": 5.949604943756755, "learning_rate": 5.283408576210085e-06, "loss": 17.375, "step": 27217 }, { "epoch": 0.49752316888149595, "grad_norm": 5.874507368310843, "learning_rate": 5.283113040073581e-06, "loss": 17.2806, "step": 27218 }, { "epoch": 0.4975414480779425, "grad_norm": 5.321096736482824, "learning_rate": 5.282817502944791e-06, "loss": 16.9946, "step": 27219 }, { "epoch": 0.497559727274389, "grad_norm": 4.867627458878209, "learning_rate": 5.28252196482475e-06, "loss": 16.8596, "step": 27220 }, { "epoch": 0.49757800647083555, "grad_norm": 5.657588405964617, "learning_rate": 5.282226425714494e-06, "loss": 17.1736, "step": 27221 }, { "epoch": 0.4975962856672821, "grad_norm": 7.688874444755954, "learning_rate": 5.281930885615059e-06, "loss": 17.4771, "step": 27222 }, { "epoch": 0.49761456486372857, "grad_norm": 7.27819314632887, "learning_rate": 5.28163534452748e-06, "loss": 18.0137, "step": 27223 }, { "epoch": 0.4976328440601751, "grad_norm": 7.227627704917986, "learning_rate": 5.281339802452794e-06, "loss": 17.845, "step": 27224 }, { "epoch": 0.49765112325662164, "grad_norm": 6.586557711148001, "learning_rate": 5.281044259392038e-06, "loss": 17.5847, "step": 27225 }, { "epoch": 0.4976694024530682, "grad_norm": 6.729547602989797, "learning_rate": 5.280748715346242e-06, "loss": 17.7012, "step": 27226 }, { "epoch": 0.4976876816495147, "grad_norm": 6.039794973030868, "learning_rate": 5.28045317031645e-06, "loss": 17.4433, "step": 27227 }, { "epoch": 0.4977059608459612, "grad_norm": 5.757563907346949, "learning_rate": 5.280157624303692e-06, "loss": 17.0479, "step": 27228 }, { "epoch": 0.49772424004240773, "grad_norm": 7.145382119795324, "learning_rate": 5.279862077309007e-06, "loss": 17.6854, "step": 27229 }, { "epoch": 0.49774251923885426, "grad_norm": 6.90161787296418, "learning_rate": 5.27956652933343e-06, "loss": 17.8546, "step": 27230 }, { "epoch": 0.4977607984353008, "grad_norm": 5.636887504816268, "learning_rate": 5.279270980377994e-06, "loss": 17.2021, "step": 27231 }, { "epoch": 0.49777907763174734, "grad_norm": 7.027245822017659, "learning_rate": 5.27897543044374e-06, "loss": 17.7732, "step": 27232 }, { "epoch": 0.4977973568281938, "grad_norm": 6.361708975918521, "learning_rate": 5.278679879531701e-06, "loss": 17.2399, "step": 27233 }, { "epoch": 0.49781563602464035, "grad_norm": 6.389524564784658, "learning_rate": 5.278384327642912e-06, "loss": 17.7114, "step": 27234 }, { "epoch": 0.4978339152210869, "grad_norm": 6.159050623032668, "learning_rate": 5.278088774778412e-06, "loss": 17.4076, "step": 27235 }, { "epoch": 0.4978521944175334, "grad_norm": 7.697747232418964, "learning_rate": 5.277793220939233e-06, "loss": 18.5743, "step": 27236 }, { "epoch": 0.4978704736139799, "grad_norm": 6.099365168059452, "learning_rate": 5.277497666126413e-06, "loss": 17.5432, "step": 27237 }, { "epoch": 0.49788875281042644, "grad_norm": 7.462158905698103, "learning_rate": 5.277202110340989e-06, "loss": 18.1144, "step": 27238 }, { "epoch": 0.497907032006873, "grad_norm": 5.544124810711395, "learning_rate": 5.276906553583996e-06, "loss": 17.2428, "step": 27239 }, { "epoch": 0.4979253112033195, "grad_norm": 7.329429172628634, "learning_rate": 5.276610995856468e-06, "loss": 17.5655, "step": 27240 }, { "epoch": 0.49794359039976605, "grad_norm": 5.533087384747263, "learning_rate": 5.276315437159443e-06, "loss": 17.2499, "step": 27241 }, { "epoch": 0.4979618695962125, "grad_norm": 6.646939876218621, "learning_rate": 5.2760198774939565e-06, "loss": 17.5805, "step": 27242 }, { "epoch": 0.49798014879265906, "grad_norm": 7.168023518725268, "learning_rate": 5.275724316861045e-06, "loss": 18.0732, "step": 27243 }, { "epoch": 0.4979984279891056, "grad_norm": 7.253311223750868, "learning_rate": 5.275428755261742e-06, "loss": 17.8587, "step": 27244 }, { "epoch": 0.49801670718555213, "grad_norm": 6.635024711834854, "learning_rate": 5.275133192697086e-06, "loss": 17.6585, "step": 27245 }, { "epoch": 0.49803498638199867, "grad_norm": 5.967339873264225, "learning_rate": 5.274837629168112e-06, "loss": 17.2748, "step": 27246 }, { "epoch": 0.49805326557844515, "grad_norm": 5.594154402885207, "learning_rate": 5.274542064675857e-06, "loss": 17.3248, "step": 27247 }, { "epoch": 0.4980715447748917, "grad_norm": 6.088278735186422, "learning_rate": 5.274246499221355e-06, "loss": 17.6099, "step": 27248 }, { "epoch": 0.4980898239713382, "grad_norm": 5.448986816313254, "learning_rate": 5.273950932805641e-06, "loss": 17.2054, "step": 27249 }, { "epoch": 0.49810810316778475, "grad_norm": 6.941485267383951, "learning_rate": 5.273655365429756e-06, "loss": 17.5758, "step": 27250 }, { "epoch": 0.4981263823642313, "grad_norm": 7.093893261235679, "learning_rate": 5.273359797094731e-06, "loss": 17.6851, "step": 27251 }, { "epoch": 0.49814466156067777, "grad_norm": 6.0737254445571365, "learning_rate": 5.273064227801604e-06, "loss": 17.2736, "step": 27252 }, { "epoch": 0.4981629407571243, "grad_norm": 6.223150827510893, "learning_rate": 5.272768657551411e-06, "loss": 17.375, "step": 27253 }, { "epoch": 0.49818121995357084, "grad_norm": 6.778711264125992, "learning_rate": 5.272473086345187e-06, "loss": 17.4212, "step": 27254 }, { "epoch": 0.4981994991500174, "grad_norm": 5.4685402624857025, "learning_rate": 5.272177514183967e-06, "loss": 17.1399, "step": 27255 }, { "epoch": 0.4982177783464639, "grad_norm": 5.971388206290414, "learning_rate": 5.271881941068792e-06, "loss": 17.3266, "step": 27256 }, { "epoch": 0.4982360575429104, "grad_norm": 5.697162623851452, "learning_rate": 5.271586367000692e-06, "loss": 17.3548, "step": 27257 }, { "epoch": 0.49825433673935693, "grad_norm": 6.088845435619034, "learning_rate": 5.271290791980704e-06, "loss": 17.3779, "step": 27258 }, { "epoch": 0.49827261593580346, "grad_norm": 5.853067754879511, "learning_rate": 5.270995216009867e-06, "loss": 17.323, "step": 27259 }, { "epoch": 0.49829089513225, "grad_norm": 6.57190388598174, "learning_rate": 5.2706996390892166e-06, "loss": 17.6874, "step": 27260 }, { "epoch": 0.49830917432869654, "grad_norm": 5.603114077511451, "learning_rate": 5.270404061219786e-06, "loss": 17.0561, "step": 27261 }, { "epoch": 0.498327453525143, "grad_norm": 6.171017351257926, "learning_rate": 5.270108482402612e-06, "loss": 17.3007, "step": 27262 }, { "epoch": 0.49834573272158955, "grad_norm": 6.350938266900552, "learning_rate": 5.269812902638733e-06, "loss": 17.8039, "step": 27263 }, { "epoch": 0.4983640119180361, "grad_norm": 5.562161501121355, "learning_rate": 5.2695173219291805e-06, "loss": 17.1186, "step": 27264 }, { "epoch": 0.4983822911144826, "grad_norm": 5.4989385623513565, "learning_rate": 5.269221740274996e-06, "loss": 17.3501, "step": 27265 }, { "epoch": 0.49840057031092916, "grad_norm": 6.814186356177413, "learning_rate": 5.268926157677211e-06, "loss": 17.681, "step": 27266 }, { "epoch": 0.49841884950737564, "grad_norm": 6.32709050709167, "learning_rate": 5.268630574136864e-06, "loss": 17.4067, "step": 27267 }, { "epoch": 0.4984371287038222, "grad_norm": 6.0456350719203, "learning_rate": 5.268334989654988e-06, "loss": 17.6149, "step": 27268 }, { "epoch": 0.4984554079002687, "grad_norm": 5.299140660827133, "learning_rate": 5.268039404232624e-06, "loss": 17.1577, "step": 27269 }, { "epoch": 0.49847368709671525, "grad_norm": 5.4418613551047965, "learning_rate": 5.267743817870805e-06, "loss": 17.1168, "step": 27270 }, { "epoch": 0.4984919662931617, "grad_norm": 7.413473483414581, "learning_rate": 5.267448230570565e-06, "loss": 17.9511, "step": 27271 }, { "epoch": 0.49851024548960826, "grad_norm": 6.447941037758599, "learning_rate": 5.267152642332943e-06, "loss": 17.6765, "step": 27272 }, { "epoch": 0.4985285246860548, "grad_norm": 8.662117696687469, "learning_rate": 5.266857053158975e-06, "loss": 18.0849, "step": 27273 }, { "epoch": 0.49854680388250133, "grad_norm": 4.784470459934603, "learning_rate": 5.2665614630496965e-06, "loss": 16.8672, "step": 27274 }, { "epoch": 0.49856508307894787, "grad_norm": 7.212260733700813, "learning_rate": 5.2662658720061424e-06, "loss": 17.6389, "step": 27275 }, { "epoch": 0.49858336227539435, "grad_norm": 6.082691345885144, "learning_rate": 5.265970280029349e-06, "loss": 17.5206, "step": 27276 }, { "epoch": 0.4986016414718409, "grad_norm": 6.361255605534515, "learning_rate": 5.265674687120354e-06, "loss": 17.3961, "step": 27277 }, { "epoch": 0.4986199206682874, "grad_norm": 7.882584150565434, "learning_rate": 5.265379093280191e-06, "loss": 18.135, "step": 27278 }, { "epoch": 0.49863819986473396, "grad_norm": 6.803867565955744, "learning_rate": 5.265083498509898e-06, "loss": 17.8072, "step": 27279 }, { "epoch": 0.4986564790611805, "grad_norm": 6.734315518552827, "learning_rate": 5.26478790281051e-06, "loss": 17.3493, "step": 27280 }, { "epoch": 0.49867475825762697, "grad_norm": 5.94951606986175, "learning_rate": 5.264492306183063e-06, "loss": 17.4249, "step": 27281 }, { "epoch": 0.4986930374540735, "grad_norm": 7.496664659887621, "learning_rate": 5.264196708628595e-06, "loss": 17.6521, "step": 27282 }, { "epoch": 0.49871131665052004, "grad_norm": 5.084969949782511, "learning_rate": 5.26390111014814e-06, "loss": 16.9039, "step": 27283 }, { "epoch": 0.4987295958469666, "grad_norm": 7.350533788688811, "learning_rate": 5.263605510742734e-06, "loss": 17.7732, "step": 27284 }, { "epoch": 0.4987478750434131, "grad_norm": 5.92753706762238, "learning_rate": 5.263309910413412e-06, "loss": 17.1627, "step": 27285 }, { "epoch": 0.4987661542398596, "grad_norm": 8.021800227382037, "learning_rate": 5.263014309161214e-06, "loss": 18.1532, "step": 27286 }, { "epoch": 0.49878443343630613, "grad_norm": 6.557198343835686, "learning_rate": 5.262718706987172e-06, "loss": 17.678, "step": 27287 }, { "epoch": 0.49880271263275267, "grad_norm": 6.704476399119188, "learning_rate": 5.262423103892327e-06, "loss": 17.7313, "step": 27288 }, { "epoch": 0.4988209918291992, "grad_norm": 8.38404822779253, "learning_rate": 5.262127499877708e-06, "loss": 18.4438, "step": 27289 }, { "epoch": 0.49883927102564574, "grad_norm": 6.1932640372868715, "learning_rate": 5.261831894944356e-06, "loss": 17.103, "step": 27290 }, { "epoch": 0.4988575502220922, "grad_norm": 6.626956857118013, "learning_rate": 5.261536289093308e-06, "loss": 17.4115, "step": 27291 }, { "epoch": 0.49887582941853875, "grad_norm": 5.9304365371352965, "learning_rate": 5.261240682325595e-06, "loss": 17.4598, "step": 27292 }, { "epoch": 0.4988941086149853, "grad_norm": 7.531357573936381, "learning_rate": 5.260945074642257e-06, "loss": 18.1339, "step": 27293 }, { "epoch": 0.4989123878114318, "grad_norm": 6.873920083080513, "learning_rate": 5.26064946604433e-06, "loss": 17.749, "step": 27294 }, { "epoch": 0.49893066700787836, "grad_norm": 5.743669766851879, "learning_rate": 5.260353856532848e-06, "loss": 17.2678, "step": 27295 }, { "epoch": 0.49894894620432484, "grad_norm": 6.520042487393192, "learning_rate": 5.260058246108849e-06, "loss": 17.7258, "step": 27296 }, { "epoch": 0.4989672254007714, "grad_norm": 6.582487933064542, "learning_rate": 5.259762634773369e-06, "loss": 17.6029, "step": 27297 }, { "epoch": 0.4989855045972179, "grad_norm": 6.714649967128315, "learning_rate": 5.259467022527443e-06, "loss": 17.6026, "step": 27298 }, { "epoch": 0.49900378379366445, "grad_norm": 6.081654655488484, "learning_rate": 5.259171409372107e-06, "loss": 17.3182, "step": 27299 }, { "epoch": 0.499022062990111, "grad_norm": 5.695502752793096, "learning_rate": 5.258875795308398e-06, "loss": 17.404, "step": 27300 }, { "epoch": 0.49904034218655746, "grad_norm": 5.562535014051476, "learning_rate": 5.258580180337353e-06, "loss": 17.538, "step": 27301 }, { "epoch": 0.499058621383004, "grad_norm": 5.945981359883965, "learning_rate": 5.258284564460006e-06, "loss": 17.2567, "step": 27302 }, { "epoch": 0.49907690057945053, "grad_norm": 8.16154827655044, "learning_rate": 5.2579889476773936e-06, "loss": 18.235, "step": 27303 }, { "epoch": 0.49909517977589707, "grad_norm": 7.341600633851559, "learning_rate": 5.257693329990552e-06, "loss": 18.1669, "step": 27304 }, { "epoch": 0.49911345897234355, "grad_norm": 5.323704261626477, "learning_rate": 5.257397711400519e-06, "loss": 17.2324, "step": 27305 }, { "epoch": 0.4991317381687901, "grad_norm": 5.860128598907489, "learning_rate": 5.2571020919083294e-06, "loss": 17.3605, "step": 27306 }, { "epoch": 0.4991500173652366, "grad_norm": 4.738935459635471, "learning_rate": 5.256806471515018e-06, "loss": 16.9251, "step": 27307 }, { "epoch": 0.49916829656168316, "grad_norm": 6.472101664606162, "learning_rate": 5.2565108502216225e-06, "loss": 17.2316, "step": 27308 }, { "epoch": 0.4991865757581297, "grad_norm": 7.973558479732446, "learning_rate": 5.256215228029179e-06, "loss": 18.1083, "step": 27309 }, { "epoch": 0.49920485495457617, "grad_norm": 4.894069680357392, "learning_rate": 5.255919604938723e-06, "loss": 16.889, "step": 27310 }, { "epoch": 0.4992231341510227, "grad_norm": 5.8727078370170185, "learning_rate": 5.255623980951292e-06, "loss": 17.3891, "step": 27311 }, { "epoch": 0.49924141334746924, "grad_norm": 6.632408257021762, "learning_rate": 5.2553283560679205e-06, "loss": 17.6655, "step": 27312 }, { "epoch": 0.4992596925439158, "grad_norm": 6.405787045312934, "learning_rate": 5.255032730289644e-06, "loss": 17.5536, "step": 27313 }, { "epoch": 0.4992779717403623, "grad_norm": 7.02220431101913, "learning_rate": 5.254737103617502e-06, "loss": 17.9237, "step": 27314 }, { "epoch": 0.4992962509368088, "grad_norm": 6.160457284452359, "learning_rate": 5.254441476052529e-06, "loss": 17.2988, "step": 27315 }, { "epoch": 0.49931453013325533, "grad_norm": 6.7947684550883745, "learning_rate": 5.254145847595758e-06, "loss": 17.7638, "step": 27316 }, { "epoch": 0.49933280932970187, "grad_norm": 6.777068010215756, "learning_rate": 5.253850218248228e-06, "loss": 17.8532, "step": 27317 }, { "epoch": 0.4993510885261484, "grad_norm": 7.243763457221405, "learning_rate": 5.2535545880109775e-06, "loss": 17.7889, "step": 27318 }, { "epoch": 0.49936936772259494, "grad_norm": 5.3918795999495, "learning_rate": 5.25325895688504e-06, "loss": 17.0649, "step": 27319 }, { "epoch": 0.4993876469190414, "grad_norm": 5.413404390198723, "learning_rate": 5.25296332487145e-06, "loss": 17.1639, "step": 27320 }, { "epoch": 0.49940592611548795, "grad_norm": 5.72906473494947, "learning_rate": 5.252667691971247e-06, "loss": 17.2953, "step": 27321 }, { "epoch": 0.4994242053119345, "grad_norm": 5.193161058333442, "learning_rate": 5.252372058185465e-06, "loss": 17.0099, "step": 27322 }, { "epoch": 0.499442484508381, "grad_norm": 7.039314401305383, "learning_rate": 5.25207642351514e-06, "loss": 17.5882, "step": 27323 }, { "epoch": 0.49946076370482756, "grad_norm": 8.025612850124404, "learning_rate": 5.2517807879613105e-06, "loss": 17.9741, "step": 27324 }, { "epoch": 0.49947904290127404, "grad_norm": 5.330340399919986, "learning_rate": 5.251485151525011e-06, "loss": 16.956, "step": 27325 }, { "epoch": 0.4994973220977206, "grad_norm": 7.545316536692668, "learning_rate": 5.251189514207276e-06, "loss": 18.2837, "step": 27326 }, { "epoch": 0.4995156012941671, "grad_norm": 6.0748830026656515, "learning_rate": 5.250893876009146e-06, "loss": 17.2829, "step": 27327 }, { "epoch": 0.49953388049061365, "grad_norm": 5.65142999078361, "learning_rate": 5.2505982369316525e-06, "loss": 17.2648, "step": 27328 }, { "epoch": 0.4995521596870602, "grad_norm": 6.562824523907876, "learning_rate": 5.250302596975836e-06, "loss": 17.387, "step": 27329 }, { "epoch": 0.49957043888350666, "grad_norm": 6.622548580334553, "learning_rate": 5.25000695614273e-06, "loss": 17.3598, "step": 27330 }, { "epoch": 0.4995887180799532, "grad_norm": 7.550808300661673, "learning_rate": 5.24971131443337e-06, "loss": 18.1236, "step": 27331 }, { "epoch": 0.49960699727639973, "grad_norm": 5.670013093144835, "learning_rate": 5.2494156718487955e-06, "loss": 17.1048, "step": 27332 }, { "epoch": 0.49962527647284627, "grad_norm": 4.981376117498401, "learning_rate": 5.249120028390039e-06, "loss": 17.1096, "step": 27333 }, { "epoch": 0.4996435556692928, "grad_norm": 7.2170066049910115, "learning_rate": 5.2488243840581395e-06, "loss": 17.6629, "step": 27334 }, { "epoch": 0.4996618348657393, "grad_norm": 6.390773456586503, "learning_rate": 5.248528738854132e-06, "loss": 17.352, "step": 27335 }, { "epoch": 0.4996801140621858, "grad_norm": 5.998306482182172, "learning_rate": 5.248233092779053e-06, "loss": 17.4651, "step": 27336 }, { "epoch": 0.49969839325863236, "grad_norm": 5.664445245901735, "learning_rate": 5.247937445833937e-06, "loss": 17.337, "step": 27337 }, { "epoch": 0.4997166724550789, "grad_norm": 7.0876287427138704, "learning_rate": 5.247641798019824e-06, "loss": 17.7832, "step": 27338 }, { "epoch": 0.49973495165152537, "grad_norm": 7.7887961983554534, "learning_rate": 5.247346149337746e-06, "loss": 17.8716, "step": 27339 }, { "epoch": 0.4997532308479719, "grad_norm": 6.913824805197927, "learning_rate": 5.247050499788742e-06, "loss": 17.7465, "step": 27340 }, { "epoch": 0.49977151004441844, "grad_norm": 6.70493938546642, "learning_rate": 5.246754849373848e-06, "loss": 17.6515, "step": 27341 }, { "epoch": 0.499789789240865, "grad_norm": 7.240895239359535, "learning_rate": 5.246459198094098e-06, "loss": 17.7322, "step": 27342 }, { "epoch": 0.4998080684373115, "grad_norm": 7.6092188518292, "learning_rate": 5.246163545950532e-06, "loss": 17.7282, "step": 27343 }, { "epoch": 0.499826347633758, "grad_norm": 8.32095780022321, "learning_rate": 5.245867892944183e-06, "loss": 18.1291, "step": 27344 }, { "epoch": 0.49984462683020453, "grad_norm": 5.387775197233404, "learning_rate": 5.245572239076089e-06, "loss": 17.1777, "step": 27345 }, { "epoch": 0.49986290602665107, "grad_norm": 5.71156203244884, "learning_rate": 5.245276584347285e-06, "loss": 17.2225, "step": 27346 }, { "epoch": 0.4998811852230976, "grad_norm": 5.826846297576714, "learning_rate": 5.2449809287588086e-06, "loss": 16.9412, "step": 27347 }, { "epoch": 0.49989946441954414, "grad_norm": 5.894183395356666, "learning_rate": 5.2446852723116945e-06, "loss": 17.2698, "step": 27348 }, { "epoch": 0.4999177436159906, "grad_norm": 7.939433203270985, "learning_rate": 5.24438961500698e-06, "loss": 17.8702, "step": 27349 }, { "epoch": 0.49993602281243715, "grad_norm": 7.17360554657337, "learning_rate": 5.244093956845701e-06, "loss": 17.8907, "step": 27350 }, { "epoch": 0.4999543020088837, "grad_norm": 7.083243086783728, "learning_rate": 5.2437982978288935e-06, "loss": 17.8503, "step": 27351 }, { "epoch": 0.4999725812053302, "grad_norm": 6.267525803147073, "learning_rate": 5.2435026379575945e-06, "loss": 17.5359, "step": 27352 }, { "epoch": 0.49999086040177676, "grad_norm": 5.68135934114697, "learning_rate": 5.243206977232841e-06, "loss": 17.2727, "step": 27353 }, { "epoch": 0.5000091395982232, "grad_norm": 6.0711168002188565, "learning_rate": 5.242911315655667e-06, "loss": 17.3524, "step": 27354 }, { "epoch": 0.5000274187946698, "grad_norm": 8.019120929226196, "learning_rate": 5.24261565322711e-06, "loss": 18.4906, "step": 27355 }, { "epoch": 0.5000456979911163, "grad_norm": 6.1078030558693905, "learning_rate": 5.242319989948207e-06, "loss": 17.3165, "step": 27356 }, { "epoch": 0.5000639771875628, "grad_norm": 6.110272340357689, "learning_rate": 5.242024325819993e-06, "loss": 17.1111, "step": 27357 }, { "epoch": 0.5000822563840094, "grad_norm": 6.387111648612196, "learning_rate": 5.241728660843504e-06, "loss": 17.4546, "step": 27358 }, { "epoch": 0.5001005355804559, "grad_norm": 6.958135010754308, "learning_rate": 5.241432995019776e-06, "loss": 17.8441, "step": 27359 }, { "epoch": 0.5001188147769025, "grad_norm": 5.9604794133716466, "learning_rate": 5.241137328349849e-06, "loss": 17.4998, "step": 27360 }, { "epoch": 0.5001370939733489, "grad_norm": 7.191712253563262, "learning_rate": 5.240841660834756e-06, "loss": 17.6892, "step": 27361 }, { "epoch": 0.5001553731697954, "grad_norm": 8.089367139206132, "learning_rate": 5.240545992475533e-06, "loss": 17.9973, "step": 27362 }, { "epoch": 0.500173652366242, "grad_norm": 6.500798907545879, "learning_rate": 5.240250323273217e-06, "loss": 17.9538, "step": 27363 }, { "epoch": 0.5001919315626885, "grad_norm": 7.202789932386763, "learning_rate": 5.239954653228845e-06, "loss": 17.4273, "step": 27364 }, { "epoch": 0.5002102107591351, "grad_norm": 6.917364605155819, "learning_rate": 5.239658982343453e-06, "loss": 17.7761, "step": 27365 }, { "epoch": 0.5002284899555816, "grad_norm": 6.394389360957413, "learning_rate": 5.239363310618076e-06, "loss": 17.4169, "step": 27366 }, { "epoch": 0.500246769152028, "grad_norm": 5.216270305462959, "learning_rate": 5.239067638053752e-06, "loss": 16.9941, "step": 27367 }, { "epoch": 0.5002650483484746, "grad_norm": 5.8359567468384945, "learning_rate": 5.238771964651517e-06, "loss": 17.0607, "step": 27368 }, { "epoch": 0.5002833275449211, "grad_norm": 7.552960228153321, "learning_rate": 5.238476290412407e-06, "loss": 18.4582, "step": 27369 }, { "epoch": 0.5003016067413677, "grad_norm": 6.996073524475762, "learning_rate": 5.238180615337459e-06, "loss": 17.414, "step": 27370 }, { "epoch": 0.5003198859378142, "grad_norm": 5.943166499036538, "learning_rate": 5.237884939427707e-06, "loss": 17.4045, "step": 27371 }, { "epoch": 0.5003381651342607, "grad_norm": 6.822133846742642, "learning_rate": 5.237589262684188e-06, "loss": 17.3984, "step": 27372 }, { "epoch": 0.5003564443307073, "grad_norm": 4.917184385497025, "learning_rate": 5.237293585107942e-06, "loss": 16.8352, "step": 27373 }, { "epoch": 0.5003747235271537, "grad_norm": 5.67334271032968, "learning_rate": 5.236997906700002e-06, "loss": 17.2446, "step": 27374 }, { "epoch": 0.5003930027236003, "grad_norm": 5.8042370374908785, "learning_rate": 5.236702227461404e-06, "loss": 17.2292, "step": 27375 }, { "epoch": 0.5004112819200468, "grad_norm": 7.816240683140112, "learning_rate": 5.236406547393185e-06, "loss": 18.1633, "step": 27376 }, { "epoch": 0.5004295611164933, "grad_norm": 4.794110991390027, "learning_rate": 5.236110866496383e-06, "loss": 16.9117, "step": 27377 }, { "epoch": 0.5004478403129399, "grad_norm": 6.997077513138833, "learning_rate": 5.2358151847720315e-06, "loss": 17.6769, "step": 27378 }, { "epoch": 0.5004661195093864, "grad_norm": 7.3662656254532335, "learning_rate": 5.2355195022211695e-06, "loss": 17.7404, "step": 27379 }, { "epoch": 0.500484398705833, "grad_norm": 6.483880158315783, "learning_rate": 5.235223818844832e-06, "loss": 17.8944, "step": 27380 }, { "epoch": 0.5005026779022794, "grad_norm": 7.234928813027564, "learning_rate": 5.234928134644054e-06, "loss": 17.5484, "step": 27381 }, { "epoch": 0.5005209570987259, "grad_norm": 7.261600480159182, "learning_rate": 5.234632449619873e-06, "loss": 17.8835, "step": 27382 }, { "epoch": 0.5005392362951725, "grad_norm": 6.238666607560452, "learning_rate": 5.234336763773326e-06, "loss": 17.2874, "step": 27383 }, { "epoch": 0.500557515491619, "grad_norm": 6.396814720909468, "learning_rate": 5.234041077105451e-06, "loss": 17.2575, "step": 27384 }, { "epoch": 0.5005757946880656, "grad_norm": 6.363811787678032, "learning_rate": 5.233745389617281e-06, "loss": 17.7276, "step": 27385 }, { "epoch": 0.500594073884512, "grad_norm": 5.8440359777593365, "learning_rate": 5.233449701309853e-06, "loss": 17.3754, "step": 27386 }, { "epoch": 0.5006123530809585, "grad_norm": 6.120363833446666, "learning_rate": 5.233154012184205e-06, "loss": 17.3495, "step": 27387 }, { "epoch": 0.5006306322774051, "grad_norm": 7.57091183675726, "learning_rate": 5.232858322241373e-06, "loss": 17.618, "step": 27388 }, { "epoch": 0.5006489114738516, "grad_norm": 5.809761818613197, "learning_rate": 5.232562631482392e-06, "loss": 17.3301, "step": 27389 }, { "epoch": 0.5006671906702981, "grad_norm": 5.935096577307627, "learning_rate": 5.232266939908298e-06, "loss": 17.3575, "step": 27390 }, { "epoch": 0.5006854698667447, "grad_norm": 5.683414427115721, "learning_rate": 5.23197124752013e-06, "loss": 17.4249, "step": 27391 }, { "epoch": 0.5007037490631912, "grad_norm": 7.672901328723398, "learning_rate": 5.231675554318923e-06, "loss": 18.2358, "step": 27392 }, { "epoch": 0.5007220282596377, "grad_norm": 5.51855032814968, "learning_rate": 5.2313798603057135e-06, "loss": 17.2993, "step": 27393 }, { "epoch": 0.5007403074560842, "grad_norm": 4.902807267884883, "learning_rate": 5.2310841654815355e-06, "loss": 17.0361, "step": 27394 }, { "epoch": 0.5007585866525307, "grad_norm": 5.574659021420579, "learning_rate": 5.23078846984743e-06, "loss": 17.4636, "step": 27395 }, { "epoch": 0.5007768658489773, "grad_norm": 6.5940863560363825, "learning_rate": 5.2304927734044295e-06, "loss": 17.5887, "step": 27396 }, { "epoch": 0.5007951450454238, "grad_norm": 5.592720218805488, "learning_rate": 5.2301970761535725e-06, "loss": 17.3221, "step": 27397 }, { "epoch": 0.5008134242418704, "grad_norm": 5.166934862750008, "learning_rate": 5.229901378095895e-06, "loss": 17.1445, "step": 27398 }, { "epoch": 0.5008317034383168, "grad_norm": 7.109736810296239, "learning_rate": 5.229605679232432e-06, "loss": 17.6091, "step": 27399 }, { "epoch": 0.5008499826347633, "grad_norm": 6.148882070907928, "learning_rate": 5.229309979564221e-06, "loss": 17.6021, "step": 27400 }, { "epoch": 0.5008682618312099, "grad_norm": 7.179005321709902, "learning_rate": 5.2290142790923e-06, "loss": 17.991, "step": 27401 }, { "epoch": 0.5008865410276564, "grad_norm": 5.461564279100281, "learning_rate": 5.228718577817703e-06, "loss": 17.2314, "step": 27402 }, { "epoch": 0.500904820224103, "grad_norm": 6.55343778634408, "learning_rate": 5.228422875741467e-06, "loss": 17.3584, "step": 27403 }, { "epoch": 0.5009230994205495, "grad_norm": 5.462835394014223, "learning_rate": 5.228127172864627e-06, "loss": 17.1289, "step": 27404 }, { "epoch": 0.500941378616996, "grad_norm": 6.905041544785173, "learning_rate": 5.227831469188225e-06, "loss": 17.823, "step": 27405 }, { "epoch": 0.5009596578134425, "grad_norm": 7.102553762331364, "learning_rate": 5.227535764713291e-06, "loss": 17.7111, "step": 27406 }, { "epoch": 0.500977937009889, "grad_norm": 7.158427242021332, "learning_rate": 5.227240059440865e-06, "loss": 17.4464, "step": 27407 }, { "epoch": 0.5009962162063356, "grad_norm": 7.132729504524786, "learning_rate": 5.2269443533719814e-06, "loss": 17.7173, "step": 27408 }, { "epoch": 0.5010144954027821, "grad_norm": 7.428577864623479, "learning_rate": 5.226648646507677e-06, "loss": 18.198, "step": 27409 }, { "epoch": 0.5010327745992286, "grad_norm": 6.282317811470806, "learning_rate": 5.2263529388489885e-06, "loss": 17.3669, "step": 27410 }, { "epoch": 0.5010510537956752, "grad_norm": 6.973185316094404, "learning_rate": 5.226057230396953e-06, "loss": 17.2822, "step": 27411 }, { "epoch": 0.5010693329921216, "grad_norm": 5.760768121814484, "learning_rate": 5.225761521152608e-06, "loss": 17.1108, "step": 27412 }, { "epoch": 0.5010876121885682, "grad_norm": 6.267662872351457, "learning_rate": 5.225465811116988e-06, "loss": 17.8989, "step": 27413 }, { "epoch": 0.5011058913850147, "grad_norm": 6.161751745976709, "learning_rate": 5.225170100291129e-06, "loss": 17.6454, "step": 27414 }, { "epoch": 0.5011241705814612, "grad_norm": 6.946668774645795, "learning_rate": 5.22487438867607e-06, "loss": 17.4902, "step": 27415 }, { "epoch": 0.5011424497779078, "grad_norm": 7.8009518378293246, "learning_rate": 5.224578676272844e-06, "loss": 18.0471, "step": 27416 }, { "epoch": 0.5011607289743543, "grad_norm": 8.100631348561828, "learning_rate": 5.2242829630824885e-06, "loss": 18.1581, "step": 27417 }, { "epoch": 0.5011790081708009, "grad_norm": 7.944855927452899, "learning_rate": 5.223987249106042e-06, "loss": 18.2589, "step": 27418 }, { "epoch": 0.5011972873672473, "grad_norm": 5.1268255941988405, "learning_rate": 5.2236915343445404e-06, "loss": 17.0776, "step": 27419 }, { "epoch": 0.5012155665636938, "grad_norm": 7.138204920686909, "learning_rate": 5.223395818799019e-06, "loss": 18.2867, "step": 27420 }, { "epoch": 0.5012338457601404, "grad_norm": 7.551431483059387, "learning_rate": 5.223100102470513e-06, "loss": 17.6356, "step": 27421 }, { "epoch": 0.5012521249565869, "grad_norm": 6.532834147764727, "learning_rate": 5.222804385360062e-06, "loss": 17.6956, "step": 27422 }, { "epoch": 0.5012704041530335, "grad_norm": 7.167663155300319, "learning_rate": 5.222508667468701e-06, "loss": 17.2327, "step": 27423 }, { "epoch": 0.50128868334948, "grad_norm": 6.731071205187084, "learning_rate": 5.222212948797466e-06, "loss": 17.6541, "step": 27424 }, { "epoch": 0.5013069625459264, "grad_norm": 5.914110127603212, "learning_rate": 5.221917229347395e-06, "loss": 17.4401, "step": 27425 }, { "epoch": 0.501325241742373, "grad_norm": 6.690376471128805, "learning_rate": 5.221621509119521e-06, "loss": 17.3074, "step": 27426 }, { "epoch": 0.5013435209388195, "grad_norm": 6.600748489199835, "learning_rate": 5.221325788114884e-06, "loss": 17.7168, "step": 27427 }, { "epoch": 0.5013618001352661, "grad_norm": 6.278334865841477, "learning_rate": 5.22103006633452e-06, "loss": 17.583, "step": 27428 }, { "epoch": 0.5013800793317126, "grad_norm": 6.644632845121036, "learning_rate": 5.220734343779465e-06, "loss": 17.7029, "step": 27429 }, { "epoch": 0.5013983585281591, "grad_norm": 7.3252409032340005, "learning_rate": 5.220438620450754e-06, "loss": 17.3549, "step": 27430 }, { "epoch": 0.5014166377246057, "grad_norm": 5.853935514389421, "learning_rate": 5.220142896349424e-06, "loss": 17.3847, "step": 27431 }, { "epoch": 0.5014349169210521, "grad_norm": 4.588873686307592, "learning_rate": 5.219847171476515e-06, "loss": 16.8352, "step": 27432 }, { "epoch": 0.5014531961174987, "grad_norm": 6.392448853003408, "learning_rate": 5.2195514458330585e-06, "loss": 17.4353, "step": 27433 }, { "epoch": 0.5014714753139452, "grad_norm": 7.0629340071061835, "learning_rate": 5.219255719420095e-06, "loss": 18.0061, "step": 27434 }, { "epoch": 0.5014897545103917, "grad_norm": 9.441199465210085, "learning_rate": 5.218959992238658e-06, "loss": 17.6992, "step": 27435 }, { "epoch": 0.5015080337068383, "grad_norm": 6.942934271494722, "learning_rate": 5.218664264289786e-06, "loss": 17.6394, "step": 27436 }, { "epoch": 0.5015263129032848, "grad_norm": 6.086372515573157, "learning_rate": 5.2183685355745126e-06, "loss": 17.388, "step": 27437 }, { "epoch": 0.5015445920997313, "grad_norm": 6.790859367633346, "learning_rate": 5.218072806093879e-06, "loss": 17.5098, "step": 27438 }, { "epoch": 0.5015628712961778, "grad_norm": 6.377041254405387, "learning_rate": 5.217777075848918e-06, "loss": 17.4578, "step": 27439 }, { "epoch": 0.5015811504926243, "grad_norm": 8.609211270087672, "learning_rate": 5.217481344840667e-06, "loss": 17.4983, "step": 27440 }, { "epoch": 0.5015994296890709, "grad_norm": 5.416409304891662, "learning_rate": 5.217185613070164e-06, "loss": 17.1505, "step": 27441 }, { "epoch": 0.5016177088855174, "grad_norm": 6.184904764132652, "learning_rate": 5.2168898805384424e-06, "loss": 17.5776, "step": 27442 }, { "epoch": 0.501635988081964, "grad_norm": 6.135180400266057, "learning_rate": 5.216594147246543e-06, "loss": 17.5401, "step": 27443 }, { "epoch": 0.5016542672784104, "grad_norm": 7.028843924673126, "learning_rate": 5.216298413195497e-06, "loss": 17.506, "step": 27444 }, { "epoch": 0.5016725464748569, "grad_norm": 5.641817449703513, "learning_rate": 5.216002678386346e-06, "loss": 17.2133, "step": 27445 }, { "epoch": 0.5016908256713035, "grad_norm": 5.1560586955278405, "learning_rate": 5.215706942820124e-06, "loss": 17.0432, "step": 27446 }, { "epoch": 0.50170910486775, "grad_norm": 6.549285818228365, "learning_rate": 5.215411206497868e-06, "loss": 17.5745, "step": 27447 }, { "epoch": 0.5017273840641966, "grad_norm": 7.7095342333496815, "learning_rate": 5.215115469420614e-06, "loss": 18.2654, "step": 27448 }, { "epoch": 0.5017456632606431, "grad_norm": 5.941606628883112, "learning_rate": 5.214819731589398e-06, "loss": 17.1361, "step": 27449 }, { "epoch": 0.5017639424570896, "grad_norm": 7.8957610329637165, "learning_rate": 5.214523993005259e-06, "loss": 17.9532, "step": 27450 }, { "epoch": 0.5017822216535361, "grad_norm": 6.372576546619049, "learning_rate": 5.214228253669232e-06, "loss": 17.3834, "step": 27451 }, { "epoch": 0.5018005008499826, "grad_norm": 7.6097126215648245, "learning_rate": 5.213932513582353e-06, "loss": 18.1436, "step": 27452 }, { "epoch": 0.5018187800464292, "grad_norm": 5.852300587894222, "learning_rate": 5.2136367727456595e-06, "loss": 17.1411, "step": 27453 }, { "epoch": 0.5018370592428757, "grad_norm": 9.691714048624561, "learning_rate": 5.2133410311601875e-06, "loss": 18.9537, "step": 27454 }, { "epoch": 0.5018553384393222, "grad_norm": 7.197165582650849, "learning_rate": 5.2130452888269725e-06, "loss": 17.9285, "step": 27455 }, { "epoch": 0.5018736176357688, "grad_norm": 6.790190706934791, "learning_rate": 5.212749545747053e-06, "loss": 17.5413, "step": 27456 }, { "epoch": 0.5018918968322152, "grad_norm": 5.920475628504475, "learning_rate": 5.212453801921467e-06, "loss": 17.2972, "step": 27457 }, { "epoch": 0.5019101760286617, "grad_norm": 5.7450883295921695, "learning_rate": 5.2121580573512456e-06, "loss": 17.2121, "step": 27458 }, { "epoch": 0.5019284552251083, "grad_norm": 6.129887306500934, "learning_rate": 5.21186231203743e-06, "loss": 17.3798, "step": 27459 }, { "epoch": 0.5019467344215548, "grad_norm": 6.389799105005755, "learning_rate": 5.2115665659810555e-06, "loss": 17.4598, "step": 27460 }, { "epoch": 0.5019650136180014, "grad_norm": 7.487780545050386, "learning_rate": 5.211270819183159e-06, "loss": 17.9528, "step": 27461 }, { "epoch": 0.5019832928144479, "grad_norm": 6.545697454607325, "learning_rate": 5.210975071644776e-06, "loss": 17.2926, "step": 27462 }, { "epoch": 0.5020015720108943, "grad_norm": 5.724842236748844, "learning_rate": 5.210679323366943e-06, "loss": 17.2105, "step": 27463 }, { "epoch": 0.5020198512073409, "grad_norm": 5.7749935514895725, "learning_rate": 5.210383574350698e-06, "loss": 17.0421, "step": 27464 }, { "epoch": 0.5020381304037874, "grad_norm": 5.319072944307387, "learning_rate": 5.210087824597076e-06, "loss": 16.9669, "step": 27465 }, { "epoch": 0.502056409600234, "grad_norm": 6.916192023352253, "learning_rate": 5.209792074107116e-06, "loss": 17.661, "step": 27466 }, { "epoch": 0.5020746887966805, "grad_norm": 6.52839170697784, "learning_rate": 5.209496322881852e-06, "loss": 17.5856, "step": 27467 }, { "epoch": 0.502092967993127, "grad_norm": 5.7575835279806, "learning_rate": 5.209200570922322e-06, "loss": 17.2502, "step": 27468 }, { "epoch": 0.5021112471895736, "grad_norm": 5.21937368770072, "learning_rate": 5.208904818229561e-06, "loss": 17.2321, "step": 27469 }, { "epoch": 0.50212952638602, "grad_norm": 8.122590314842842, "learning_rate": 5.2086090648046096e-06, "loss": 18.061, "step": 27470 }, { "epoch": 0.5021478055824666, "grad_norm": 5.781673897216666, "learning_rate": 5.2083133106484986e-06, "loss": 17.1307, "step": 27471 }, { "epoch": 0.5021660847789131, "grad_norm": 6.9870143769618895, "learning_rate": 5.208017555762268e-06, "loss": 17.7875, "step": 27472 }, { "epoch": 0.5021843639753596, "grad_norm": 6.684202125132718, "learning_rate": 5.207721800146954e-06, "loss": 17.4909, "step": 27473 }, { "epoch": 0.5022026431718062, "grad_norm": 6.565595420450179, "learning_rate": 5.2074260438035954e-06, "loss": 17.5612, "step": 27474 }, { "epoch": 0.5022209223682527, "grad_norm": 6.897436635906595, "learning_rate": 5.207130286733224e-06, "loss": 17.6166, "step": 27475 }, { "epoch": 0.5022392015646993, "grad_norm": 6.051510521759804, "learning_rate": 5.206834528936878e-06, "loss": 17.5767, "step": 27476 }, { "epoch": 0.5022574807611457, "grad_norm": 4.660172711916454, "learning_rate": 5.206538770415598e-06, "loss": 16.8226, "step": 27477 }, { "epoch": 0.5022757599575922, "grad_norm": 5.502986954221638, "learning_rate": 5.206243011170415e-06, "loss": 17.023, "step": 27478 }, { "epoch": 0.5022940391540388, "grad_norm": 5.978417104720057, "learning_rate": 5.205947251202369e-06, "loss": 17.5388, "step": 27479 }, { "epoch": 0.5023123183504853, "grad_norm": 6.8248904751761765, "learning_rate": 5.205651490512496e-06, "loss": 17.613, "step": 27480 }, { "epoch": 0.5023305975469319, "grad_norm": 4.8724367239266, "learning_rate": 5.205355729101833e-06, "loss": 17.0065, "step": 27481 }, { "epoch": 0.5023488767433784, "grad_norm": 5.961520441985069, "learning_rate": 5.205059966971415e-06, "loss": 17.6608, "step": 27482 }, { "epoch": 0.5023671559398248, "grad_norm": 5.884050484409216, "learning_rate": 5.204764204122279e-06, "loss": 17.1868, "step": 27483 }, { "epoch": 0.5023854351362714, "grad_norm": 6.253560070917388, "learning_rate": 5.2044684405554645e-06, "loss": 17.4443, "step": 27484 }, { "epoch": 0.5024037143327179, "grad_norm": 8.408789749844162, "learning_rate": 5.204172676272003e-06, "loss": 18.3138, "step": 27485 }, { "epoch": 0.5024219935291645, "grad_norm": 6.558372316727946, "learning_rate": 5.203876911272936e-06, "loss": 17.6935, "step": 27486 }, { "epoch": 0.502440272725611, "grad_norm": 6.158160751081932, "learning_rate": 5.203581145559298e-06, "loss": 17.2859, "step": 27487 }, { "epoch": 0.5024585519220575, "grad_norm": 6.208609058868916, "learning_rate": 5.2032853791321255e-06, "loss": 17.3936, "step": 27488 }, { "epoch": 0.502476831118504, "grad_norm": 7.848355301758456, "learning_rate": 5.202989611992455e-06, "loss": 17.9317, "step": 27489 }, { "epoch": 0.5024951103149505, "grad_norm": 6.685016959708192, "learning_rate": 5.202693844141322e-06, "loss": 17.299, "step": 27490 }, { "epoch": 0.5025133895113971, "grad_norm": 5.288702752183807, "learning_rate": 5.202398075579767e-06, "loss": 17.035, "step": 27491 }, { "epoch": 0.5025316687078436, "grad_norm": 8.106273503780569, "learning_rate": 5.202102306308825e-06, "loss": 17.9173, "step": 27492 }, { "epoch": 0.5025499479042901, "grad_norm": 6.0398167216408885, "learning_rate": 5.2018065363295304e-06, "loss": 17.4087, "step": 27493 }, { "epoch": 0.5025682271007367, "grad_norm": 7.557718648557547, "learning_rate": 5.201510765642922e-06, "loss": 17.7971, "step": 27494 }, { "epoch": 0.5025865062971832, "grad_norm": 6.054882255550186, "learning_rate": 5.201214994250034e-06, "loss": 17.3972, "step": 27495 }, { "epoch": 0.5026047854936297, "grad_norm": 7.194471640590093, "learning_rate": 5.200919222151908e-06, "loss": 17.9227, "step": 27496 }, { "epoch": 0.5026230646900762, "grad_norm": 7.479360695694727, "learning_rate": 5.200623449349575e-06, "loss": 18.0845, "step": 27497 }, { "epoch": 0.5026413438865227, "grad_norm": 6.683254102833924, "learning_rate": 5.200327675844076e-06, "loss": 17.4318, "step": 27498 }, { "epoch": 0.5026596230829693, "grad_norm": 6.748717926325936, "learning_rate": 5.200031901636444e-06, "loss": 17.6456, "step": 27499 }, { "epoch": 0.5026779022794158, "grad_norm": 6.117365829160261, "learning_rate": 5.199736126727719e-06, "loss": 17.3438, "step": 27500 }, { "epoch": 0.5026961814758624, "grad_norm": 6.642969667594831, "learning_rate": 5.199440351118936e-06, "loss": 17.4521, "step": 27501 }, { "epoch": 0.5027144606723088, "grad_norm": 6.546327008119256, "learning_rate": 5.199144574811132e-06, "loss": 17.5044, "step": 27502 }, { "epoch": 0.5027327398687553, "grad_norm": 5.349614452347511, "learning_rate": 5.198848797805343e-06, "loss": 16.9952, "step": 27503 }, { "epoch": 0.5027510190652019, "grad_norm": 6.806322501820001, "learning_rate": 5.198553020102606e-06, "loss": 17.7437, "step": 27504 }, { "epoch": 0.5027692982616484, "grad_norm": 6.504506895365117, "learning_rate": 5.198257241703959e-06, "loss": 17.6166, "step": 27505 }, { "epoch": 0.502787577458095, "grad_norm": 6.8308968273401645, "learning_rate": 5.1979614626104365e-06, "loss": 17.7568, "step": 27506 }, { "epoch": 0.5028058566545415, "grad_norm": 6.600709319039463, "learning_rate": 5.197665682823076e-06, "loss": 17.5428, "step": 27507 }, { "epoch": 0.502824135850988, "grad_norm": 6.2470534123693895, "learning_rate": 5.197369902342916e-06, "loss": 17.3073, "step": 27508 }, { "epoch": 0.5028424150474345, "grad_norm": 6.976116280348958, "learning_rate": 5.197074121170991e-06, "loss": 17.7634, "step": 27509 }, { "epoch": 0.502860694243881, "grad_norm": 6.1875155221474625, "learning_rate": 5.196778339308338e-06, "loss": 17.3329, "step": 27510 }, { "epoch": 0.5028789734403276, "grad_norm": 6.46088522994846, "learning_rate": 5.196482556755994e-06, "loss": 17.599, "step": 27511 }, { "epoch": 0.5028972526367741, "grad_norm": 8.997033569978639, "learning_rate": 5.196186773514995e-06, "loss": 18.3507, "step": 27512 }, { "epoch": 0.5029155318332206, "grad_norm": 5.922053130112032, "learning_rate": 5.19589098958638e-06, "loss": 17.4276, "step": 27513 }, { "epoch": 0.5029338110296672, "grad_norm": 5.7468085657305625, "learning_rate": 5.195595204971182e-06, "loss": 17.3735, "step": 27514 }, { "epoch": 0.5029520902261136, "grad_norm": 6.405522064361517, "learning_rate": 5.195299419670442e-06, "loss": 17.8147, "step": 27515 }, { "epoch": 0.5029703694225602, "grad_norm": 7.037740565033126, "learning_rate": 5.195003633685194e-06, "loss": 17.6155, "step": 27516 }, { "epoch": 0.5029886486190067, "grad_norm": 5.7461921632343325, "learning_rate": 5.194707847016474e-06, "loss": 17.2436, "step": 27517 }, { "epoch": 0.5030069278154532, "grad_norm": 6.768562562646901, "learning_rate": 5.19441205966532e-06, "loss": 17.821, "step": 27518 }, { "epoch": 0.5030252070118998, "grad_norm": 6.994355393123101, "learning_rate": 5.194116271632769e-06, "loss": 17.6248, "step": 27519 }, { "epoch": 0.5030434862083463, "grad_norm": 5.945406372738656, "learning_rate": 5.193820482919858e-06, "loss": 17.2435, "step": 27520 }, { "epoch": 0.5030617654047929, "grad_norm": 5.974668663293852, "learning_rate": 5.193524693527623e-06, "loss": 17.3625, "step": 27521 }, { "epoch": 0.5030800446012393, "grad_norm": 6.9194982401268526, "learning_rate": 5.1932289034571e-06, "loss": 18.2391, "step": 27522 }, { "epoch": 0.5030983237976858, "grad_norm": 7.315381405321791, "learning_rate": 5.192933112709326e-06, "loss": 17.9562, "step": 27523 }, { "epoch": 0.5031166029941324, "grad_norm": 6.594120925526298, "learning_rate": 5.1926373212853385e-06, "loss": 17.5046, "step": 27524 }, { "epoch": 0.5031348821905789, "grad_norm": 7.304940661604103, "learning_rate": 5.192341529186175e-06, "loss": 17.7933, "step": 27525 }, { "epoch": 0.5031531613870254, "grad_norm": 5.483380648253276, "learning_rate": 5.19204573641287e-06, "loss": 17.1572, "step": 27526 }, { "epoch": 0.503171440583472, "grad_norm": 6.890729723949929, "learning_rate": 5.191749942966462e-06, "loss": 17.8273, "step": 27527 }, { "epoch": 0.5031897197799184, "grad_norm": 5.600074806366154, "learning_rate": 5.191454148847986e-06, "loss": 17.2443, "step": 27528 }, { "epoch": 0.503207998976365, "grad_norm": 5.512225238396698, "learning_rate": 5.191158354058482e-06, "loss": 17.1353, "step": 27529 }, { "epoch": 0.5032262781728115, "grad_norm": 6.097103260232165, "learning_rate": 5.190862558598983e-06, "loss": 17.3884, "step": 27530 }, { "epoch": 0.503244557369258, "grad_norm": 5.2372114164909735, "learning_rate": 5.190566762470527e-06, "loss": 16.9618, "step": 27531 }, { "epoch": 0.5032628365657046, "grad_norm": 7.533783967663297, "learning_rate": 5.190270965674152e-06, "loss": 18.1905, "step": 27532 }, { "epoch": 0.5032811157621511, "grad_norm": 5.798360449902278, "learning_rate": 5.189975168210893e-06, "loss": 17.3989, "step": 27533 }, { "epoch": 0.5032993949585977, "grad_norm": 6.640908870126543, "learning_rate": 5.189679370081789e-06, "loss": 17.4577, "step": 27534 }, { "epoch": 0.5033176741550441, "grad_norm": 5.080109637563325, "learning_rate": 5.189383571287872e-06, "loss": 16.9935, "step": 27535 }, { "epoch": 0.5033359533514906, "grad_norm": 6.912073437888923, "learning_rate": 5.189087771830186e-06, "loss": 17.6632, "step": 27536 }, { "epoch": 0.5033542325479372, "grad_norm": 6.575914356282273, "learning_rate": 5.188791971709761e-06, "loss": 17.4028, "step": 27537 }, { "epoch": 0.5033725117443837, "grad_norm": 5.540881853619029, "learning_rate": 5.188496170927637e-06, "loss": 17.2044, "step": 27538 }, { "epoch": 0.5033907909408303, "grad_norm": 7.219808193319258, "learning_rate": 5.1882003694848515e-06, "loss": 17.7435, "step": 27539 }, { "epoch": 0.5034090701372768, "grad_norm": 7.271662511375215, "learning_rate": 5.187904567382439e-06, "loss": 18.1021, "step": 27540 }, { "epoch": 0.5034273493337232, "grad_norm": 6.789408787706083, "learning_rate": 5.187608764621437e-06, "loss": 17.6288, "step": 27541 }, { "epoch": 0.5034456285301698, "grad_norm": 6.929807585433291, "learning_rate": 5.187312961202882e-06, "loss": 17.5508, "step": 27542 }, { "epoch": 0.5034639077266163, "grad_norm": 5.757440779569821, "learning_rate": 5.187017157127815e-06, "loss": 17.2604, "step": 27543 }, { "epoch": 0.5034821869230629, "grad_norm": 6.736364591735982, "learning_rate": 5.186721352397265e-06, "loss": 17.5449, "step": 27544 }, { "epoch": 0.5035004661195094, "grad_norm": 5.680884503028648, "learning_rate": 5.186425547012275e-06, "loss": 17.0263, "step": 27545 }, { "epoch": 0.5035187453159559, "grad_norm": 4.685044570264193, "learning_rate": 5.18612974097388e-06, "loss": 16.8804, "step": 27546 }, { "epoch": 0.5035370245124025, "grad_norm": 7.224721385841569, "learning_rate": 5.185833934283114e-06, "loss": 17.9704, "step": 27547 }, { "epoch": 0.5035553037088489, "grad_norm": 6.933162705042278, "learning_rate": 5.185538126941019e-06, "loss": 17.9275, "step": 27548 }, { "epoch": 0.5035735829052955, "grad_norm": 5.864375061142779, "learning_rate": 5.1852423189486256e-06, "loss": 17.6488, "step": 27549 }, { "epoch": 0.503591862101742, "grad_norm": 7.11665122116013, "learning_rate": 5.184946510306977e-06, "loss": 17.5661, "step": 27550 }, { "epoch": 0.5036101412981885, "grad_norm": 6.304113181307043, "learning_rate": 5.184650701017105e-06, "loss": 17.5131, "step": 27551 }, { "epoch": 0.5036284204946351, "grad_norm": 5.737618692572437, "learning_rate": 5.18435489108005e-06, "loss": 16.9711, "step": 27552 }, { "epoch": 0.5036466996910816, "grad_norm": 5.700244822219272, "learning_rate": 5.184059080496846e-06, "loss": 17.1295, "step": 27553 }, { "epoch": 0.5036649788875281, "grad_norm": 5.862370704788984, "learning_rate": 5.183763269268531e-06, "loss": 17.1625, "step": 27554 }, { "epoch": 0.5036832580839746, "grad_norm": 5.909596797048763, "learning_rate": 5.183467457396142e-06, "loss": 17.1662, "step": 27555 }, { "epoch": 0.5037015372804211, "grad_norm": 8.056279847860345, "learning_rate": 5.183171644880714e-06, "loss": 17.7257, "step": 27556 }, { "epoch": 0.5037198164768677, "grad_norm": 6.257912896267873, "learning_rate": 5.182875831723288e-06, "loss": 17.5899, "step": 27557 }, { "epoch": 0.5037380956733142, "grad_norm": 7.619659666477824, "learning_rate": 5.1825800179248964e-06, "loss": 17.6511, "step": 27558 }, { "epoch": 0.5037563748697608, "grad_norm": 8.25092517858032, "learning_rate": 5.182284203486577e-06, "loss": 18.1767, "step": 27559 }, { "epoch": 0.5037746540662073, "grad_norm": 5.824274287405943, "learning_rate": 5.1819883884093705e-06, "loss": 17.31, "step": 27560 }, { "epoch": 0.5037929332626537, "grad_norm": 6.923456020475565, "learning_rate": 5.181692572694308e-06, "loss": 17.7274, "step": 27561 }, { "epoch": 0.5038112124591003, "grad_norm": 7.702178478700586, "learning_rate": 5.181396756342428e-06, "loss": 17.6612, "step": 27562 }, { "epoch": 0.5038294916555468, "grad_norm": 5.526224845757836, "learning_rate": 5.18110093935477e-06, "loss": 17.2442, "step": 27563 }, { "epoch": 0.5038477708519934, "grad_norm": 5.925765686166306, "learning_rate": 5.180805121732367e-06, "loss": 17.246, "step": 27564 }, { "epoch": 0.5038660500484399, "grad_norm": 6.314698857761973, "learning_rate": 5.18050930347626e-06, "loss": 17.4263, "step": 27565 }, { "epoch": 0.5038843292448864, "grad_norm": 6.400760567150828, "learning_rate": 5.180213484587482e-06, "loss": 17.4009, "step": 27566 }, { "epoch": 0.5039026084413329, "grad_norm": 6.066075711125857, "learning_rate": 5.179917665067072e-06, "loss": 17.5616, "step": 27567 }, { "epoch": 0.5039208876377794, "grad_norm": 6.74359468662774, "learning_rate": 5.179621844916065e-06, "loss": 17.6614, "step": 27568 }, { "epoch": 0.503939166834226, "grad_norm": 6.5324995219543736, "learning_rate": 5.1793260241355e-06, "loss": 17.2444, "step": 27569 }, { "epoch": 0.5039574460306725, "grad_norm": 6.916770101727999, "learning_rate": 5.179030202726414e-06, "loss": 18.1773, "step": 27570 }, { "epoch": 0.503975725227119, "grad_norm": 5.983715099934033, "learning_rate": 5.17873438068984e-06, "loss": 17.2402, "step": 27571 }, { "epoch": 0.5039940044235656, "grad_norm": 5.256466208380804, "learning_rate": 5.178438558026819e-06, "loss": 17.3271, "step": 27572 }, { "epoch": 0.504012283620012, "grad_norm": 4.865510599169104, "learning_rate": 5.178142734738386e-06, "loss": 16.9278, "step": 27573 }, { "epoch": 0.5040305628164586, "grad_norm": 6.009582623410873, "learning_rate": 5.17784691082558e-06, "loss": 17.3797, "step": 27574 }, { "epoch": 0.5040488420129051, "grad_norm": 6.029082880362614, "learning_rate": 5.177551086289434e-06, "loss": 17.3101, "step": 27575 }, { "epoch": 0.5040671212093516, "grad_norm": 6.645481189205907, "learning_rate": 5.177255261130987e-06, "loss": 17.4678, "step": 27576 }, { "epoch": 0.5040854004057982, "grad_norm": 6.868423411372092, "learning_rate": 5.1769594353512765e-06, "loss": 17.8212, "step": 27577 }, { "epoch": 0.5041036796022447, "grad_norm": 6.7974656322543865, "learning_rate": 5.1766636089513375e-06, "loss": 17.5799, "step": 27578 }, { "epoch": 0.5041219587986913, "grad_norm": 9.202097818861546, "learning_rate": 5.176367781932209e-06, "loss": 17.9019, "step": 27579 }, { "epoch": 0.5041402379951377, "grad_norm": 6.546695558542694, "learning_rate": 5.176071954294926e-06, "loss": 17.2939, "step": 27580 }, { "epoch": 0.5041585171915842, "grad_norm": 4.114361230779656, "learning_rate": 5.175776126040526e-06, "loss": 16.5922, "step": 27581 }, { "epoch": 0.5041767963880308, "grad_norm": 5.446068971978248, "learning_rate": 5.175480297170047e-06, "loss": 17.1501, "step": 27582 }, { "epoch": 0.5041950755844773, "grad_norm": 6.528465725908221, "learning_rate": 5.1751844676845234e-06, "loss": 17.6874, "step": 27583 }, { "epoch": 0.5042133547809239, "grad_norm": 6.263069703194509, "learning_rate": 5.174888637584995e-06, "loss": 17.3593, "step": 27584 }, { "epoch": 0.5042316339773704, "grad_norm": 7.089100425270701, "learning_rate": 5.174592806872495e-06, "loss": 17.7325, "step": 27585 }, { "epoch": 0.5042499131738168, "grad_norm": 6.952406788040247, "learning_rate": 5.174296975548063e-06, "loss": 17.8632, "step": 27586 }, { "epoch": 0.5042681923702634, "grad_norm": 6.702582100217042, "learning_rate": 5.1740011436127355e-06, "loss": 17.5542, "step": 27587 }, { "epoch": 0.5042864715667099, "grad_norm": 6.415982961407396, "learning_rate": 5.1737053110675505e-06, "loss": 17.8005, "step": 27588 }, { "epoch": 0.5043047507631565, "grad_norm": 6.657096792187528, "learning_rate": 5.173409477913543e-06, "loss": 17.6757, "step": 27589 }, { "epoch": 0.504323029959603, "grad_norm": 7.168338115829753, "learning_rate": 5.173113644151748e-06, "loss": 17.7773, "step": 27590 }, { "epoch": 0.5043413091560495, "grad_norm": 4.892969233631025, "learning_rate": 5.172817809783207e-06, "loss": 16.8434, "step": 27591 }, { "epoch": 0.5043595883524961, "grad_norm": 5.473425059303431, "learning_rate": 5.172521974808954e-06, "loss": 17.2269, "step": 27592 }, { "epoch": 0.5043778675489425, "grad_norm": 6.62752555391626, "learning_rate": 5.1722261392300265e-06, "loss": 17.6366, "step": 27593 }, { "epoch": 0.504396146745389, "grad_norm": 5.984820671045974, "learning_rate": 5.171930303047461e-06, "loss": 17.1321, "step": 27594 }, { "epoch": 0.5044144259418356, "grad_norm": 7.448722269027972, "learning_rate": 5.171634466262294e-06, "loss": 17.9497, "step": 27595 }, { "epoch": 0.5044327051382821, "grad_norm": 5.5853500569532955, "learning_rate": 5.171338628875564e-06, "loss": 17.2253, "step": 27596 }, { "epoch": 0.5044509843347287, "grad_norm": 6.771563707319088, "learning_rate": 5.171042790888308e-06, "loss": 17.549, "step": 27597 }, { "epoch": 0.5044692635311752, "grad_norm": 5.871899978894757, "learning_rate": 5.170746952301559e-06, "loss": 17.4473, "step": 27598 }, { "epoch": 0.5044875427276216, "grad_norm": 6.274713350608566, "learning_rate": 5.170451113116359e-06, "loss": 17.7429, "step": 27599 }, { "epoch": 0.5045058219240682, "grad_norm": 7.110533040292356, "learning_rate": 5.170155273333743e-06, "loss": 17.9841, "step": 27600 }, { "epoch": 0.5045241011205147, "grad_norm": 7.803695683487732, "learning_rate": 5.169859432954747e-06, "loss": 18.2242, "step": 27601 }, { "epoch": 0.5045423803169613, "grad_norm": 5.916541655408195, "learning_rate": 5.169563591980409e-06, "loss": 17.0129, "step": 27602 }, { "epoch": 0.5045606595134078, "grad_norm": 6.182438632464134, "learning_rate": 5.169267750411763e-06, "loss": 17.5817, "step": 27603 }, { "epoch": 0.5045789387098543, "grad_norm": 5.900076900146689, "learning_rate": 5.1689719082498494e-06, "loss": 17.128, "step": 27604 }, { "epoch": 0.5045972179063009, "grad_norm": 7.172817506058974, "learning_rate": 5.168676065495705e-06, "loss": 17.5771, "step": 27605 }, { "epoch": 0.5046154971027473, "grad_norm": 7.6274939751249615, "learning_rate": 5.168380222150364e-06, "loss": 17.8769, "step": 27606 }, { "epoch": 0.5046337762991939, "grad_norm": 5.404809349830576, "learning_rate": 5.1680843782148656e-06, "loss": 17.0407, "step": 27607 }, { "epoch": 0.5046520554956404, "grad_norm": 7.394237278623086, "learning_rate": 5.167788533690247e-06, "loss": 17.8788, "step": 27608 }, { "epoch": 0.5046703346920869, "grad_norm": 7.492467063448779, "learning_rate": 5.167492688577543e-06, "loss": 17.9748, "step": 27609 }, { "epoch": 0.5046886138885335, "grad_norm": 6.8790718888237, "learning_rate": 5.167196842877792e-06, "loss": 17.5047, "step": 27610 }, { "epoch": 0.50470689308498, "grad_norm": 6.597736836157785, "learning_rate": 5.1669009965920305e-06, "loss": 17.8067, "step": 27611 }, { "epoch": 0.5047251722814265, "grad_norm": 5.16417093993006, "learning_rate": 5.166605149721296e-06, "loss": 16.864, "step": 27612 }, { "epoch": 0.504743451477873, "grad_norm": 6.790951932401917, "learning_rate": 5.166309302266624e-06, "loss": 17.5408, "step": 27613 }, { "epoch": 0.5047617306743195, "grad_norm": 7.320218145959628, "learning_rate": 5.166013454229053e-06, "loss": 17.9132, "step": 27614 }, { "epoch": 0.5047800098707661, "grad_norm": 5.540957011762305, "learning_rate": 5.16571760560962e-06, "loss": 17.3871, "step": 27615 }, { "epoch": 0.5047982890672126, "grad_norm": 7.778922745046559, "learning_rate": 5.16542175640936e-06, "loss": 18.0185, "step": 27616 }, { "epoch": 0.5048165682636592, "grad_norm": 6.193097886338422, "learning_rate": 5.16512590662931e-06, "loss": 17.3566, "step": 27617 }, { "epoch": 0.5048348474601057, "grad_norm": 5.811583558003684, "learning_rate": 5.164830056270509e-06, "loss": 17.2262, "step": 27618 }, { "epoch": 0.5048531266565521, "grad_norm": 7.898633495034506, "learning_rate": 5.164534205333995e-06, "loss": 18.3214, "step": 27619 }, { "epoch": 0.5048714058529987, "grad_norm": 8.030837492772608, "learning_rate": 5.1642383538208005e-06, "loss": 18.3775, "step": 27620 }, { "epoch": 0.5048896850494452, "grad_norm": 5.723929271515542, "learning_rate": 5.163942501731966e-06, "loss": 17.4789, "step": 27621 }, { "epoch": 0.5049079642458918, "grad_norm": 5.230468112841281, "learning_rate": 5.163646649068527e-06, "loss": 16.9984, "step": 27622 }, { "epoch": 0.5049262434423383, "grad_norm": 6.128025454370273, "learning_rate": 5.16335079583152e-06, "loss": 17.399, "step": 27623 }, { "epoch": 0.5049445226387848, "grad_norm": 5.866331204761387, "learning_rate": 5.163054942021983e-06, "loss": 17.6067, "step": 27624 }, { "epoch": 0.5049628018352313, "grad_norm": 5.760854215968586, "learning_rate": 5.162759087640953e-06, "loss": 17.3237, "step": 27625 }, { "epoch": 0.5049810810316778, "grad_norm": 7.015165463723453, "learning_rate": 5.162463232689465e-06, "loss": 18.1369, "step": 27626 }, { "epoch": 0.5049993602281244, "grad_norm": 6.416469001773678, "learning_rate": 5.162167377168559e-06, "loss": 17.7597, "step": 27627 }, { "epoch": 0.5050176394245709, "grad_norm": 6.994677546386976, "learning_rate": 5.1618715210792704e-06, "loss": 18.0055, "step": 27628 }, { "epoch": 0.5050359186210174, "grad_norm": 7.063157435887147, "learning_rate": 5.161575664422637e-06, "loss": 17.8699, "step": 27629 }, { "epoch": 0.505054197817464, "grad_norm": 7.2920732637401215, "learning_rate": 5.161279807199692e-06, "loss": 18.198, "step": 27630 }, { "epoch": 0.5050724770139104, "grad_norm": 5.055173979027419, "learning_rate": 5.160983949411478e-06, "loss": 16.9205, "step": 27631 }, { "epoch": 0.505090756210357, "grad_norm": 5.647088359036415, "learning_rate": 5.1606880910590285e-06, "loss": 17.2178, "step": 27632 }, { "epoch": 0.5051090354068035, "grad_norm": 5.928155576463627, "learning_rate": 5.160392232143381e-06, "loss": 17.5289, "step": 27633 }, { "epoch": 0.50512731460325, "grad_norm": 6.396122099132778, "learning_rate": 5.160096372665573e-06, "loss": 17.3092, "step": 27634 }, { "epoch": 0.5051455937996966, "grad_norm": 5.2811810268772446, "learning_rate": 5.1598005126266395e-06, "loss": 17.0258, "step": 27635 }, { "epoch": 0.5051638729961431, "grad_norm": 6.033285483416313, "learning_rate": 5.159504652027621e-06, "loss": 17.4595, "step": 27636 }, { "epoch": 0.5051821521925897, "grad_norm": 6.461835547461277, "learning_rate": 5.159208790869552e-06, "loss": 17.6768, "step": 27637 }, { "epoch": 0.5052004313890361, "grad_norm": 6.103894956407447, "learning_rate": 5.158912929153469e-06, "loss": 17.0627, "step": 27638 }, { "epoch": 0.5052187105854826, "grad_norm": 5.478906843384525, "learning_rate": 5.158617066880411e-06, "loss": 16.9255, "step": 27639 }, { "epoch": 0.5052369897819292, "grad_norm": 7.640017289504977, "learning_rate": 5.158321204051414e-06, "loss": 17.9303, "step": 27640 }, { "epoch": 0.5052552689783757, "grad_norm": 8.514597003385521, "learning_rate": 5.158025340667514e-06, "loss": 18.0154, "step": 27641 }, { "epoch": 0.5052735481748223, "grad_norm": 5.810975657165897, "learning_rate": 5.157729476729749e-06, "loss": 17.0998, "step": 27642 }, { "epoch": 0.5052918273712688, "grad_norm": 5.553983838062384, "learning_rate": 5.1574336122391575e-06, "loss": 17.1545, "step": 27643 }, { "epoch": 0.5053101065677152, "grad_norm": 5.185153497954882, "learning_rate": 5.157137747196773e-06, "loss": 17.1105, "step": 27644 }, { "epoch": 0.5053283857641618, "grad_norm": 7.312979692958259, "learning_rate": 5.156841881603635e-06, "loss": 17.5566, "step": 27645 }, { "epoch": 0.5053466649606083, "grad_norm": 7.308497903257434, "learning_rate": 5.156546015460782e-06, "loss": 18.2654, "step": 27646 }, { "epoch": 0.5053649441570549, "grad_norm": 6.652847675639543, "learning_rate": 5.156250148769247e-06, "loss": 17.6146, "step": 27647 }, { "epoch": 0.5053832233535014, "grad_norm": 6.041361608402879, "learning_rate": 5.155954281530066e-06, "loss": 17.3519, "step": 27648 }, { "epoch": 0.5054015025499479, "grad_norm": 6.277524567147896, "learning_rate": 5.155658413744281e-06, "loss": 17.147, "step": 27649 }, { "epoch": 0.5054197817463945, "grad_norm": 5.30187731898524, "learning_rate": 5.155362545412928e-06, "loss": 17.1637, "step": 27650 }, { "epoch": 0.5054380609428409, "grad_norm": 6.06369297579374, "learning_rate": 5.1550666765370416e-06, "loss": 17.3441, "step": 27651 }, { "epoch": 0.5054563401392875, "grad_norm": 6.125803243534805, "learning_rate": 5.15477080711766e-06, "loss": 17.4234, "step": 27652 }, { "epoch": 0.505474619335734, "grad_norm": 6.902841414799843, "learning_rate": 5.1544749371558214e-06, "loss": 17.7937, "step": 27653 }, { "epoch": 0.5054928985321805, "grad_norm": 9.121673690437543, "learning_rate": 5.154179066652559e-06, "loss": 17.8539, "step": 27654 }, { "epoch": 0.5055111777286271, "grad_norm": 5.513348411451003, "learning_rate": 5.153883195608914e-06, "loss": 17.23, "step": 27655 }, { "epoch": 0.5055294569250736, "grad_norm": 7.644708440959603, "learning_rate": 5.153587324025921e-06, "loss": 18.2627, "step": 27656 }, { "epoch": 0.5055477361215202, "grad_norm": 5.872963130275385, "learning_rate": 5.153291451904621e-06, "loss": 17.2168, "step": 27657 }, { "epoch": 0.5055660153179666, "grad_norm": 6.047861589930542, "learning_rate": 5.1529955792460425e-06, "loss": 17.3844, "step": 27658 }, { "epoch": 0.5055842945144131, "grad_norm": 6.697298712603379, "learning_rate": 5.1526997060512305e-06, "loss": 17.7653, "step": 27659 }, { "epoch": 0.5056025737108597, "grad_norm": 5.831644306619482, "learning_rate": 5.1524038323212215e-06, "loss": 17.4758, "step": 27660 }, { "epoch": 0.5056208529073062, "grad_norm": 7.811983398180582, "learning_rate": 5.1521079580570464e-06, "loss": 17.9186, "step": 27661 }, { "epoch": 0.5056391321037527, "grad_norm": 8.365875514911632, "learning_rate": 5.151812083259747e-06, "loss": 18.0911, "step": 27662 }, { "epoch": 0.5056574113001993, "grad_norm": 7.142597196886949, "learning_rate": 5.151516207930361e-06, "loss": 17.9224, "step": 27663 }, { "epoch": 0.5056756904966457, "grad_norm": 7.144160869364189, "learning_rate": 5.151220332069923e-06, "loss": 17.8455, "step": 27664 }, { "epoch": 0.5056939696930923, "grad_norm": 5.225125562203611, "learning_rate": 5.15092445567947e-06, "loss": 17.0537, "step": 27665 }, { "epoch": 0.5057122488895388, "grad_norm": 6.164546195115452, "learning_rate": 5.1506285787600405e-06, "loss": 17.3785, "step": 27666 }, { "epoch": 0.5057305280859853, "grad_norm": 5.844485109282541, "learning_rate": 5.150332701312672e-06, "loss": 17.5462, "step": 27667 }, { "epoch": 0.5057488072824319, "grad_norm": 5.7195059066051215, "learning_rate": 5.150036823338399e-06, "loss": 17.2419, "step": 27668 }, { "epoch": 0.5057670864788784, "grad_norm": 7.8681767607495745, "learning_rate": 5.1497409448382605e-06, "loss": 17.7197, "step": 27669 }, { "epoch": 0.505785365675325, "grad_norm": 3.9105518559744006, "learning_rate": 5.149445065813294e-06, "loss": 16.5977, "step": 27670 }, { "epoch": 0.5058036448717714, "grad_norm": 6.170131995520045, "learning_rate": 5.1491491862645325e-06, "loss": 17.2897, "step": 27671 }, { "epoch": 0.5058219240682179, "grad_norm": 5.1358430281321175, "learning_rate": 5.148853306193018e-06, "loss": 17.7214, "step": 27672 }, { "epoch": 0.5058402032646645, "grad_norm": 5.856023897165747, "learning_rate": 5.148557425599786e-06, "loss": 17.4302, "step": 27673 }, { "epoch": 0.505858482461111, "grad_norm": 6.09834731008439, "learning_rate": 5.148261544485873e-06, "loss": 17.6248, "step": 27674 }, { "epoch": 0.5058767616575576, "grad_norm": 5.32953497411772, "learning_rate": 5.1479656628523166e-06, "loss": 16.9663, "step": 27675 }, { "epoch": 0.505895040854004, "grad_norm": 5.791288210493317, "learning_rate": 5.147669780700151e-06, "loss": 17.4501, "step": 27676 }, { "epoch": 0.5059133200504505, "grad_norm": 4.674975822255367, "learning_rate": 5.147373898030419e-06, "loss": 17.5492, "step": 27677 }, { "epoch": 0.5059315992468971, "grad_norm": 6.4795540493638315, "learning_rate": 5.147078014844152e-06, "loss": 17.4952, "step": 27678 }, { "epoch": 0.5059498784433436, "grad_norm": 5.87615287630539, "learning_rate": 5.14678213114239e-06, "loss": 17.3138, "step": 27679 }, { "epoch": 0.5059681576397902, "grad_norm": 6.823351620249338, "learning_rate": 5.146486246926169e-06, "loss": 17.7274, "step": 27680 }, { "epoch": 0.5059864368362367, "grad_norm": 6.636215937774113, "learning_rate": 5.1461903621965256e-06, "loss": 17.8749, "step": 27681 }, { "epoch": 0.5060047160326832, "grad_norm": 5.570020480201589, "learning_rate": 5.145894476954499e-06, "loss": 17.1821, "step": 27682 }, { "epoch": 0.5060229952291297, "grad_norm": 6.308932897503492, "learning_rate": 5.145598591201124e-06, "loss": 17.7802, "step": 27683 }, { "epoch": 0.5060412744255762, "grad_norm": 7.644739061584473, "learning_rate": 5.14530270493744e-06, "loss": 17.6652, "step": 27684 }, { "epoch": 0.5060595536220228, "grad_norm": 5.5404416303762725, "learning_rate": 5.145006818164482e-06, "loss": 17.2558, "step": 27685 }, { "epoch": 0.5060778328184693, "grad_norm": 5.951095898646831, "learning_rate": 5.1447109308832865e-06, "loss": 17.2144, "step": 27686 }, { "epoch": 0.5060961120149158, "grad_norm": 5.955982864622808, "learning_rate": 5.144415043094892e-06, "loss": 17.585, "step": 27687 }, { "epoch": 0.5061143912113624, "grad_norm": 6.180952227977285, "learning_rate": 5.144119154800338e-06, "loss": 17.5469, "step": 27688 }, { "epoch": 0.5061326704078088, "grad_norm": 7.116905280800295, "learning_rate": 5.143823266000657e-06, "loss": 17.9486, "step": 27689 }, { "epoch": 0.5061509496042554, "grad_norm": 6.0854189530902625, "learning_rate": 5.143527376696886e-06, "loss": 17.3287, "step": 27690 }, { "epoch": 0.5061692288007019, "grad_norm": 5.9056177128777305, "learning_rate": 5.1432314868900675e-06, "loss": 17.273, "step": 27691 }, { "epoch": 0.5061875079971484, "grad_norm": 7.1257202352464475, "learning_rate": 5.1429355965812335e-06, "loss": 17.7816, "step": 27692 }, { "epoch": 0.506205787193595, "grad_norm": 4.9526104789537975, "learning_rate": 5.142639705771422e-06, "loss": 16.9168, "step": 27693 }, { "epoch": 0.5062240663900415, "grad_norm": 6.925540441649807, "learning_rate": 5.142343814461671e-06, "loss": 18.026, "step": 27694 }, { "epoch": 0.5062423455864881, "grad_norm": 5.057242785752675, "learning_rate": 5.1420479226530176e-06, "loss": 16.9973, "step": 27695 }, { "epoch": 0.5062606247829345, "grad_norm": 5.298186320413527, "learning_rate": 5.141752030346499e-06, "loss": 17.0309, "step": 27696 }, { "epoch": 0.506278903979381, "grad_norm": 6.590194470556828, "learning_rate": 5.141456137543151e-06, "loss": 17.6833, "step": 27697 }, { "epoch": 0.5062971831758276, "grad_norm": 6.984955925175951, "learning_rate": 5.141160244244011e-06, "loss": 17.9011, "step": 27698 }, { "epoch": 0.5063154623722741, "grad_norm": 4.831846667844356, "learning_rate": 5.140864350450117e-06, "loss": 16.9374, "step": 27699 }, { "epoch": 0.5063337415687207, "grad_norm": 6.681990776830355, "learning_rate": 5.140568456162507e-06, "loss": 17.773, "step": 27700 }, { "epoch": 0.5063520207651672, "grad_norm": 7.553439780878484, "learning_rate": 5.1402725613822165e-06, "loss": 18.0232, "step": 27701 }, { "epoch": 0.5063702999616136, "grad_norm": 7.438364030658903, "learning_rate": 5.139976666110283e-06, "loss": 17.9461, "step": 27702 }, { "epoch": 0.5063885791580602, "grad_norm": 5.842131301653922, "learning_rate": 5.139680770347741e-06, "loss": 17.4337, "step": 27703 }, { "epoch": 0.5064068583545067, "grad_norm": 6.235689119762629, "learning_rate": 5.139384874095631e-06, "loss": 17.5769, "step": 27704 }, { "epoch": 0.5064251375509533, "grad_norm": 6.5957393003374305, "learning_rate": 5.139088977354991e-06, "loss": 17.4528, "step": 27705 }, { "epoch": 0.5064434167473998, "grad_norm": 8.876219451068485, "learning_rate": 5.138793080126855e-06, "loss": 17.9884, "step": 27706 }, { "epoch": 0.5064616959438463, "grad_norm": 6.536162880783229, "learning_rate": 5.138497182412261e-06, "loss": 17.8436, "step": 27707 }, { "epoch": 0.5064799751402929, "grad_norm": 5.346922379373697, "learning_rate": 5.138201284212246e-06, "loss": 17.1215, "step": 27708 }, { "epoch": 0.5064982543367393, "grad_norm": 6.920789085254473, "learning_rate": 5.13790538552785e-06, "loss": 18.0832, "step": 27709 }, { "epoch": 0.5065165335331859, "grad_norm": 5.38868080758509, "learning_rate": 5.137609486360105e-06, "loss": 17.1127, "step": 27710 }, { "epoch": 0.5065348127296324, "grad_norm": 7.3354816946597525, "learning_rate": 5.137313586710051e-06, "loss": 17.5267, "step": 27711 }, { "epoch": 0.5065530919260789, "grad_norm": 5.713671445638332, "learning_rate": 5.137017686578724e-06, "loss": 17.0097, "step": 27712 }, { "epoch": 0.5065713711225255, "grad_norm": 7.486510099193616, "learning_rate": 5.136721785967165e-06, "loss": 17.7021, "step": 27713 }, { "epoch": 0.506589650318972, "grad_norm": 5.746439996288859, "learning_rate": 5.136425884876405e-06, "loss": 17.2224, "step": 27714 }, { "epoch": 0.5066079295154186, "grad_norm": 7.3379061579999965, "learning_rate": 5.136129983307486e-06, "loss": 17.9784, "step": 27715 }, { "epoch": 0.506626208711865, "grad_norm": 6.008121010187761, "learning_rate": 5.135834081261443e-06, "loss": 17.6975, "step": 27716 }, { "epoch": 0.5066444879083115, "grad_norm": 7.21445062747721, "learning_rate": 5.135538178739311e-06, "loss": 17.6537, "step": 27717 }, { "epoch": 0.5066627671047581, "grad_norm": 5.362687134513164, "learning_rate": 5.135242275742132e-06, "loss": 17.1599, "step": 27718 }, { "epoch": 0.5066810463012046, "grad_norm": 7.813650131621309, "learning_rate": 5.13494637227094e-06, "loss": 18.3504, "step": 27719 }, { "epoch": 0.5066993254976512, "grad_norm": 6.092689226364962, "learning_rate": 5.134650468326773e-06, "loss": 17.5533, "step": 27720 }, { "epoch": 0.5067176046940977, "grad_norm": 7.587971222214896, "learning_rate": 5.134354563910667e-06, "loss": 18.0693, "step": 27721 }, { "epoch": 0.5067358838905441, "grad_norm": 6.330113621846811, "learning_rate": 5.134058659023661e-06, "loss": 17.2376, "step": 27722 }, { "epoch": 0.5067541630869907, "grad_norm": 6.32979980405475, "learning_rate": 5.133762753666789e-06, "loss": 17.7372, "step": 27723 }, { "epoch": 0.5067724422834372, "grad_norm": 5.110404489032517, "learning_rate": 5.1334668478410925e-06, "loss": 16.9661, "step": 27724 }, { "epoch": 0.5067907214798838, "grad_norm": 6.623456977007643, "learning_rate": 5.133170941547604e-06, "loss": 17.2745, "step": 27725 }, { "epoch": 0.5068090006763303, "grad_norm": 8.008242050881773, "learning_rate": 5.132875034787365e-06, "loss": 18.2314, "step": 27726 }, { "epoch": 0.5068272798727768, "grad_norm": 5.7253805568384895, "learning_rate": 5.132579127561409e-06, "loss": 17.1657, "step": 27727 }, { "epoch": 0.5068455590692234, "grad_norm": 9.219274905737294, "learning_rate": 5.132283219870775e-06, "loss": 18.0161, "step": 27728 }, { "epoch": 0.5068638382656698, "grad_norm": 7.70803399074323, "learning_rate": 5.1319873117165005e-06, "loss": 17.7997, "step": 27729 }, { "epoch": 0.5068821174621163, "grad_norm": 6.875985284577539, "learning_rate": 5.131691403099621e-06, "loss": 17.5223, "step": 27730 }, { "epoch": 0.5069003966585629, "grad_norm": 5.026856926452195, "learning_rate": 5.1313954940211755e-06, "loss": 17.085, "step": 27731 }, { "epoch": 0.5069186758550094, "grad_norm": 6.312322182507129, "learning_rate": 5.1310995844822e-06, "loss": 17.629, "step": 27732 }, { "epoch": 0.506936955051456, "grad_norm": 5.544649978169023, "learning_rate": 5.130803674483732e-06, "loss": 17.2875, "step": 27733 }, { "epoch": 0.5069552342479025, "grad_norm": 6.679378183058156, "learning_rate": 5.130507764026808e-06, "loss": 17.8269, "step": 27734 }, { "epoch": 0.5069735134443489, "grad_norm": 6.886195116724657, "learning_rate": 5.130211853112463e-06, "loss": 17.8946, "step": 27735 }, { "epoch": 0.5069917926407955, "grad_norm": 5.442584452922059, "learning_rate": 5.129915941741741e-06, "loss": 17.1328, "step": 27736 }, { "epoch": 0.507010071837242, "grad_norm": 6.613305630433648, "learning_rate": 5.129620029915674e-06, "loss": 17.8886, "step": 27737 }, { "epoch": 0.5070283510336886, "grad_norm": 6.267162436401069, "learning_rate": 5.1293241176353e-06, "loss": 17.4957, "step": 27738 }, { "epoch": 0.5070466302301351, "grad_norm": 5.707869374733119, "learning_rate": 5.129028204901654e-06, "loss": 17.1091, "step": 27739 }, { "epoch": 0.5070649094265816, "grad_norm": 6.481475256922889, "learning_rate": 5.128732291715777e-06, "loss": 17.533, "step": 27740 }, { "epoch": 0.5070831886230281, "grad_norm": 5.59205507786949, "learning_rate": 5.128436378078704e-06, "loss": 17.2637, "step": 27741 }, { "epoch": 0.5071014678194746, "grad_norm": 8.04507234687692, "learning_rate": 5.128140463991473e-06, "loss": 18.3353, "step": 27742 }, { "epoch": 0.5071197470159212, "grad_norm": 5.860938477511177, "learning_rate": 5.127844549455122e-06, "loss": 17.3962, "step": 27743 }, { "epoch": 0.5071380262123677, "grad_norm": 6.096136693855123, "learning_rate": 5.127548634470685e-06, "loss": 17.3681, "step": 27744 }, { "epoch": 0.5071563054088142, "grad_norm": 6.991027932920232, "learning_rate": 5.127252719039202e-06, "loss": 17.618, "step": 27745 }, { "epoch": 0.5071745846052608, "grad_norm": 6.212866849705913, "learning_rate": 5.126956803161709e-06, "loss": 17.4926, "step": 27746 }, { "epoch": 0.5071928638017072, "grad_norm": 5.892368860593251, "learning_rate": 5.126660886839244e-06, "loss": 17.3287, "step": 27747 }, { "epoch": 0.5072111429981538, "grad_norm": 7.190833850718565, "learning_rate": 5.126364970072843e-06, "loss": 17.5176, "step": 27748 }, { "epoch": 0.5072294221946003, "grad_norm": 7.364418141602431, "learning_rate": 5.126069052863542e-06, "loss": 17.8325, "step": 27749 }, { "epoch": 0.5072477013910468, "grad_norm": 8.195818727530257, "learning_rate": 5.125773135212383e-06, "loss": 18.4358, "step": 27750 }, { "epoch": 0.5072659805874934, "grad_norm": 6.889646480412214, "learning_rate": 5.125477217120399e-06, "loss": 17.9033, "step": 27751 }, { "epoch": 0.5072842597839399, "grad_norm": 5.628395766056375, "learning_rate": 5.125181298588629e-06, "loss": 17.3565, "step": 27752 }, { "epoch": 0.5073025389803865, "grad_norm": 4.957846213430797, "learning_rate": 5.124885379618107e-06, "loss": 16.8323, "step": 27753 }, { "epoch": 0.5073208181768329, "grad_norm": 8.043322179811897, "learning_rate": 5.124589460209875e-06, "loss": 17.9384, "step": 27754 }, { "epoch": 0.5073390973732794, "grad_norm": 5.643776303822953, "learning_rate": 5.124293540364966e-06, "loss": 17.4568, "step": 27755 }, { "epoch": 0.507357376569726, "grad_norm": 6.823357145909125, "learning_rate": 5.12399762008442e-06, "loss": 17.6151, "step": 27756 }, { "epoch": 0.5073756557661725, "grad_norm": 6.848050043748216, "learning_rate": 5.1237016993692726e-06, "loss": 17.5411, "step": 27757 }, { "epoch": 0.5073939349626191, "grad_norm": 6.818873049251554, "learning_rate": 5.123405778220562e-06, "loss": 17.5898, "step": 27758 }, { "epoch": 0.5074122141590656, "grad_norm": 6.891246948640802, "learning_rate": 5.123109856639325e-06, "loss": 17.724, "step": 27759 }, { "epoch": 0.507430493355512, "grad_norm": 5.22646842897986, "learning_rate": 5.122813934626598e-06, "loss": 17.3879, "step": 27760 }, { "epoch": 0.5074487725519586, "grad_norm": 6.241375253225352, "learning_rate": 5.122518012183419e-06, "loss": 17.4592, "step": 27761 }, { "epoch": 0.5074670517484051, "grad_norm": 7.011943657573289, "learning_rate": 5.1222220893108245e-06, "loss": 17.7635, "step": 27762 }, { "epoch": 0.5074853309448517, "grad_norm": 8.340489642847533, "learning_rate": 5.121926166009854e-06, "loss": 17.9439, "step": 27763 }, { "epoch": 0.5075036101412982, "grad_norm": 7.020032119133576, "learning_rate": 5.1216302422815415e-06, "loss": 18.0195, "step": 27764 }, { "epoch": 0.5075218893377447, "grad_norm": 7.319892816728759, "learning_rate": 5.121334318126925e-06, "loss": 17.8734, "step": 27765 }, { "epoch": 0.5075401685341913, "grad_norm": 6.401731890036808, "learning_rate": 5.121038393547043e-06, "loss": 17.5819, "step": 27766 }, { "epoch": 0.5075584477306377, "grad_norm": 4.869051610432103, "learning_rate": 5.120742468542932e-06, "loss": 17.0804, "step": 27767 }, { "epoch": 0.5075767269270843, "grad_norm": 7.001114839435052, "learning_rate": 5.120446543115629e-06, "loss": 17.6885, "step": 27768 }, { "epoch": 0.5075950061235308, "grad_norm": 5.985950871215604, "learning_rate": 5.12015061726617e-06, "loss": 17.2141, "step": 27769 }, { "epoch": 0.5076132853199773, "grad_norm": 6.203530235471378, "learning_rate": 5.119854690995597e-06, "loss": 17.4852, "step": 27770 }, { "epoch": 0.5076315645164239, "grad_norm": 6.345810166297624, "learning_rate": 5.11955876430494e-06, "loss": 17.5605, "step": 27771 }, { "epoch": 0.5076498437128704, "grad_norm": 7.017718859447296, "learning_rate": 5.119262837195241e-06, "loss": 18.0204, "step": 27772 }, { "epoch": 0.507668122909317, "grad_norm": 5.915548862643262, "learning_rate": 5.118966909667536e-06, "loss": 17.4062, "step": 27773 }, { "epoch": 0.5076864021057634, "grad_norm": 8.459241006330435, "learning_rate": 5.118670981722864e-06, "loss": 18.7702, "step": 27774 }, { "epoch": 0.5077046813022099, "grad_norm": 5.26648824627864, "learning_rate": 5.11837505336226e-06, "loss": 17.0453, "step": 27775 }, { "epoch": 0.5077229604986565, "grad_norm": 6.546008264375168, "learning_rate": 5.11807912458676e-06, "loss": 17.4485, "step": 27776 }, { "epoch": 0.507741239695103, "grad_norm": 6.065952069195552, "learning_rate": 5.117783195397405e-06, "loss": 17.2764, "step": 27777 }, { "epoch": 0.5077595188915496, "grad_norm": 6.844432593900314, "learning_rate": 5.117487265795229e-06, "loss": 17.4527, "step": 27778 }, { "epoch": 0.5077777980879961, "grad_norm": 5.670818472573281, "learning_rate": 5.11719133578127e-06, "loss": 17.3438, "step": 27779 }, { "epoch": 0.5077960772844425, "grad_norm": 7.971437219958533, "learning_rate": 5.1168954053565655e-06, "loss": 18.0089, "step": 27780 }, { "epoch": 0.5078143564808891, "grad_norm": 6.268244555676373, "learning_rate": 5.116599474522153e-06, "loss": 17.5396, "step": 27781 }, { "epoch": 0.5078326356773356, "grad_norm": 6.215729356350821, "learning_rate": 5.11630354327907e-06, "loss": 17.5865, "step": 27782 }, { "epoch": 0.5078509148737822, "grad_norm": 6.288194553442732, "learning_rate": 5.116007611628353e-06, "loss": 17.5758, "step": 27783 }, { "epoch": 0.5078691940702287, "grad_norm": 6.14978246789447, "learning_rate": 5.115711679571038e-06, "loss": 17.4051, "step": 27784 }, { "epoch": 0.5078874732666752, "grad_norm": 6.612922377398286, "learning_rate": 5.115415747108166e-06, "loss": 17.4096, "step": 27785 }, { "epoch": 0.5079057524631218, "grad_norm": 6.47498327904835, "learning_rate": 5.11511981424077e-06, "loss": 17.2669, "step": 27786 }, { "epoch": 0.5079240316595682, "grad_norm": 7.595785899509144, "learning_rate": 5.114823880969889e-06, "loss": 17.6638, "step": 27787 }, { "epoch": 0.5079423108560148, "grad_norm": 6.23259983046272, "learning_rate": 5.114527947296563e-06, "loss": 17.4832, "step": 27788 }, { "epoch": 0.5079605900524613, "grad_norm": 7.519498838984317, "learning_rate": 5.114232013221823e-06, "loss": 17.7527, "step": 27789 }, { "epoch": 0.5079788692489078, "grad_norm": 7.11893307367405, "learning_rate": 5.1139360787467104e-06, "loss": 17.8125, "step": 27790 }, { "epoch": 0.5079971484453544, "grad_norm": 7.572962276243854, "learning_rate": 5.113640143872264e-06, "loss": 17.6688, "step": 27791 }, { "epoch": 0.5080154276418009, "grad_norm": 5.910023689579945, "learning_rate": 5.1133442085995165e-06, "loss": 17.5413, "step": 27792 }, { "epoch": 0.5080337068382474, "grad_norm": 6.737547699899249, "learning_rate": 5.113048272929508e-06, "loss": 17.8257, "step": 27793 }, { "epoch": 0.5080519860346939, "grad_norm": 6.569180814096426, "learning_rate": 5.112752336863275e-06, "loss": 17.3771, "step": 27794 }, { "epoch": 0.5080702652311404, "grad_norm": 6.509063902180465, "learning_rate": 5.112456400401855e-06, "loss": 17.4601, "step": 27795 }, { "epoch": 0.508088544427587, "grad_norm": 6.216367487588847, "learning_rate": 5.112160463546285e-06, "loss": 17.2877, "step": 27796 }, { "epoch": 0.5081068236240335, "grad_norm": 5.924333410370937, "learning_rate": 5.111864526297603e-06, "loss": 17.0676, "step": 27797 }, { "epoch": 0.50812510282048, "grad_norm": 6.347197197891492, "learning_rate": 5.111568588656845e-06, "loss": 17.7076, "step": 27798 }, { "epoch": 0.5081433820169265, "grad_norm": 6.201500369875943, "learning_rate": 5.111272650625049e-06, "loss": 17.4921, "step": 27799 }, { "epoch": 0.508161661213373, "grad_norm": 5.864006941659548, "learning_rate": 5.110976712203251e-06, "loss": 17.397, "step": 27800 }, { "epoch": 0.5081799404098196, "grad_norm": 6.527750122750077, "learning_rate": 5.110680773392491e-06, "loss": 17.7126, "step": 27801 }, { "epoch": 0.5081982196062661, "grad_norm": 5.978759562287268, "learning_rate": 5.110384834193804e-06, "loss": 17.5569, "step": 27802 }, { "epoch": 0.5082164988027126, "grad_norm": 5.653941790257011, "learning_rate": 5.110088894608226e-06, "loss": 17.2645, "step": 27803 }, { "epoch": 0.5082347779991592, "grad_norm": 7.111886366714862, "learning_rate": 5.109792954636796e-06, "loss": 17.7092, "step": 27804 }, { "epoch": 0.5082530571956057, "grad_norm": 5.887329362261316, "learning_rate": 5.109497014280555e-06, "loss": 17.5025, "step": 27805 }, { "epoch": 0.5082713363920522, "grad_norm": 6.316006213197096, "learning_rate": 5.1092010735405325e-06, "loss": 17.4514, "step": 27806 }, { "epoch": 0.5082896155884987, "grad_norm": 5.425393787878941, "learning_rate": 5.108905132417772e-06, "loss": 17.1413, "step": 27807 }, { "epoch": 0.5083078947849452, "grad_norm": 6.179089818058358, "learning_rate": 5.1086091909133075e-06, "loss": 17.3279, "step": 27808 }, { "epoch": 0.5083261739813918, "grad_norm": 5.75494897087057, "learning_rate": 5.108313249028177e-06, "loss": 17.2535, "step": 27809 }, { "epoch": 0.5083444531778383, "grad_norm": 7.413869531581902, "learning_rate": 5.108017306763417e-06, "loss": 17.927, "step": 27810 }, { "epoch": 0.5083627323742849, "grad_norm": 6.531126158633522, "learning_rate": 5.107721364120067e-06, "loss": 17.4851, "step": 27811 }, { "epoch": 0.5083810115707313, "grad_norm": 7.495061218472021, "learning_rate": 5.107425421099163e-06, "loss": 18.0959, "step": 27812 }, { "epoch": 0.5083992907671778, "grad_norm": 7.345337631125079, "learning_rate": 5.107129477701743e-06, "loss": 17.8673, "step": 27813 }, { "epoch": 0.5084175699636244, "grad_norm": 5.881337538549764, "learning_rate": 5.106833533928842e-06, "loss": 17.3385, "step": 27814 }, { "epoch": 0.5084358491600709, "grad_norm": 6.806397927155145, "learning_rate": 5.106537589781501e-06, "loss": 17.442, "step": 27815 }, { "epoch": 0.5084541283565175, "grad_norm": 7.2959632017956695, "learning_rate": 5.106241645260754e-06, "loss": 17.8208, "step": 27816 }, { "epoch": 0.508472407552964, "grad_norm": 5.475619954715328, "learning_rate": 5.105945700367636e-06, "loss": 16.8526, "step": 27817 }, { "epoch": 0.5084906867494104, "grad_norm": 8.234228650583185, "learning_rate": 5.105649755103191e-06, "loss": 18.1657, "step": 27818 }, { "epoch": 0.508508965945857, "grad_norm": 8.355068089071333, "learning_rate": 5.1053538094684515e-06, "loss": 17.6617, "step": 27819 }, { "epoch": 0.5085272451423035, "grad_norm": 6.819178272566638, "learning_rate": 5.105057863464458e-06, "loss": 17.7844, "step": 27820 }, { "epoch": 0.5085455243387501, "grad_norm": 6.923034621610501, "learning_rate": 5.104761917092243e-06, "loss": 17.6915, "step": 27821 }, { "epoch": 0.5085638035351966, "grad_norm": 6.931761091764257, "learning_rate": 5.104465970352848e-06, "loss": 17.6312, "step": 27822 }, { "epoch": 0.5085820827316431, "grad_norm": 5.682705060024975, "learning_rate": 5.1041700232473095e-06, "loss": 17.3104, "step": 27823 }, { "epoch": 0.5086003619280897, "grad_norm": 6.657548656800469, "learning_rate": 5.103874075776663e-06, "loss": 17.6736, "step": 27824 }, { "epoch": 0.5086186411245361, "grad_norm": 7.483931887010073, "learning_rate": 5.103578127941946e-06, "loss": 17.8158, "step": 27825 }, { "epoch": 0.5086369203209827, "grad_norm": 6.676413809934157, "learning_rate": 5.103282179744198e-06, "loss": 17.5908, "step": 27826 }, { "epoch": 0.5086551995174292, "grad_norm": 7.886278071909366, "learning_rate": 5.102986231184455e-06, "loss": 17.9176, "step": 27827 }, { "epoch": 0.5086734787138757, "grad_norm": 7.839605698319246, "learning_rate": 5.102690282263754e-06, "loss": 17.8862, "step": 27828 }, { "epoch": 0.5086917579103223, "grad_norm": 4.9164327604959315, "learning_rate": 5.102394332983132e-06, "loss": 17.0036, "step": 27829 }, { "epoch": 0.5087100371067688, "grad_norm": 6.177818813364183, "learning_rate": 5.1020983833436264e-06, "loss": 17.1324, "step": 27830 }, { "epoch": 0.5087283163032154, "grad_norm": 8.576030724323914, "learning_rate": 5.101802433346275e-06, "loss": 17.9203, "step": 27831 }, { "epoch": 0.5087465954996618, "grad_norm": 7.2500642547772225, "learning_rate": 5.101506482992118e-06, "loss": 18.0675, "step": 27832 }, { "epoch": 0.5087648746961083, "grad_norm": 6.52184673889023, "learning_rate": 5.101210532282187e-06, "loss": 17.4957, "step": 27833 }, { "epoch": 0.5087831538925549, "grad_norm": 6.68907586183205, "learning_rate": 5.100914581217521e-06, "loss": 17.5087, "step": 27834 }, { "epoch": 0.5088014330890014, "grad_norm": 6.575152285909569, "learning_rate": 5.1006186297991574e-06, "loss": 17.392, "step": 27835 }, { "epoch": 0.508819712285448, "grad_norm": 11.465419141079181, "learning_rate": 5.100322678028138e-06, "loss": 19.344, "step": 27836 }, { "epoch": 0.5088379914818945, "grad_norm": 6.067945112157284, "learning_rate": 5.100026725905494e-06, "loss": 17.4425, "step": 27837 }, { "epoch": 0.5088562706783409, "grad_norm": 5.802755459897126, "learning_rate": 5.099730773432266e-06, "loss": 17.0646, "step": 27838 }, { "epoch": 0.5088745498747875, "grad_norm": 6.309037106321797, "learning_rate": 5.099434820609488e-06, "loss": 17.2151, "step": 27839 }, { "epoch": 0.508892829071234, "grad_norm": 6.163295659381445, "learning_rate": 5.099138867438201e-06, "loss": 17.281, "step": 27840 }, { "epoch": 0.5089111082676806, "grad_norm": 8.354820996571528, "learning_rate": 5.098842913919442e-06, "loss": 18.0639, "step": 27841 }, { "epoch": 0.5089293874641271, "grad_norm": 7.64688041787203, "learning_rate": 5.098546960054246e-06, "loss": 17.6232, "step": 27842 }, { "epoch": 0.5089476666605736, "grad_norm": 6.7545021547324, "learning_rate": 5.098251005843652e-06, "loss": 17.6928, "step": 27843 }, { "epoch": 0.5089659458570202, "grad_norm": 5.143487498292757, "learning_rate": 5.097955051288696e-06, "loss": 17.1643, "step": 27844 }, { "epoch": 0.5089842250534666, "grad_norm": 5.53427588550187, "learning_rate": 5.097659096390416e-06, "loss": 17.2131, "step": 27845 }, { "epoch": 0.5090025042499132, "grad_norm": 6.30860754506615, "learning_rate": 5.09736314114985e-06, "loss": 17.7132, "step": 27846 }, { "epoch": 0.5090207834463597, "grad_norm": 8.563713722008549, "learning_rate": 5.0970671855680344e-06, "loss": 17.6117, "step": 27847 }, { "epoch": 0.5090390626428062, "grad_norm": 7.1524516519094306, "learning_rate": 5.096771229646007e-06, "loss": 17.6969, "step": 27848 }, { "epoch": 0.5090573418392528, "grad_norm": 5.634445277772277, "learning_rate": 5.0964752733848035e-06, "loss": 17.332, "step": 27849 }, { "epoch": 0.5090756210356993, "grad_norm": 10.846335040821502, "learning_rate": 5.096179316785464e-06, "loss": 18.1933, "step": 27850 }, { "epoch": 0.5090939002321458, "grad_norm": 5.813532846858103, "learning_rate": 5.095883359849024e-06, "loss": 17.3308, "step": 27851 }, { "epoch": 0.5091121794285923, "grad_norm": 6.807114437097919, "learning_rate": 5.095587402576521e-06, "loss": 17.7462, "step": 27852 }, { "epoch": 0.5091304586250388, "grad_norm": 5.423405840084081, "learning_rate": 5.095291444968993e-06, "loss": 17.0539, "step": 27853 }, { "epoch": 0.5091487378214854, "grad_norm": 8.369617524169517, "learning_rate": 5.094995487027475e-06, "loss": 17.9208, "step": 27854 }, { "epoch": 0.5091670170179319, "grad_norm": 5.734025767614059, "learning_rate": 5.094699528753008e-06, "loss": 17.2728, "step": 27855 }, { "epoch": 0.5091852962143785, "grad_norm": 7.37603727772448, "learning_rate": 5.094403570146626e-06, "loss": 17.5804, "step": 27856 }, { "epoch": 0.509203575410825, "grad_norm": 5.523727202934449, "learning_rate": 5.0941076112093694e-06, "loss": 17.057, "step": 27857 }, { "epoch": 0.5092218546072714, "grad_norm": 6.575651794090517, "learning_rate": 5.093811651942272e-06, "loss": 17.824, "step": 27858 }, { "epoch": 0.509240133803718, "grad_norm": 7.08990373527558, "learning_rate": 5.093515692346373e-06, "loss": 17.8624, "step": 27859 }, { "epoch": 0.5092584130001645, "grad_norm": 6.252322738583567, "learning_rate": 5.093219732422712e-06, "loss": 17.2543, "step": 27860 }, { "epoch": 0.5092766921966111, "grad_norm": 7.274000395594801, "learning_rate": 5.092923772172322e-06, "loss": 17.5227, "step": 27861 }, { "epoch": 0.5092949713930576, "grad_norm": 7.363211891902213, "learning_rate": 5.092627811596241e-06, "loss": 17.73, "step": 27862 }, { "epoch": 0.509313250589504, "grad_norm": 7.440880837814436, "learning_rate": 5.0923318506955086e-06, "loss": 17.8423, "step": 27863 }, { "epoch": 0.5093315297859506, "grad_norm": 5.093079881812766, "learning_rate": 5.092035889471163e-06, "loss": 17.0842, "step": 27864 }, { "epoch": 0.5093498089823971, "grad_norm": 6.865865467142303, "learning_rate": 5.091739927924239e-06, "loss": 17.9005, "step": 27865 }, { "epoch": 0.5093680881788436, "grad_norm": 9.030087018564162, "learning_rate": 5.0914439660557726e-06, "loss": 18.2221, "step": 27866 }, { "epoch": 0.5093863673752902, "grad_norm": 6.753652388079484, "learning_rate": 5.0911480038668036e-06, "loss": 17.7345, "step": 27867 }, { "epoch": 0.5094046465717367, "grad_norm": 7.056262798115523, "learning_rate": 5.090852041358369e-06, "loss": 17.5722, "step": 27868 }, { "epoch": 0.5094229257681833, "grad_norm": 6.356815609070242, "learning_rate": 5.0905560785315065e-06, "loss": 17.3165, "step": 27869 }, { "epoch": 0.5094412049646297, "grad_norm": 6.512300669371463, "learning_rate": 5.090260115387253e-06, "loss": 17.2421, "step": 27870 }, { "epoch": 0.5094594841610762, "grad_norm": 8.32693455713865, "learning_rate": 5.089964151926644e-06, "loss": 18.4941, "step": 27871 }, { "epoch": 0.5094777633575228, "grad_norm": 5.997102400857258, "learning_rate": 5.089668188150719e-06, "loss": 17.2529, "step": 27872 }, { "epoch": 0.5094960425539693, "grad_norm": 6.3895848858132105, "learning_rate": 5.089372224060516e-06, "loss": 17.4529, "step": 27873 }, { "epoch": 0.5095143217504159, "grad_norm": 8.13476791743883, "learning_rate": 5.089076259657071e-06, "loss": 18.2045, "step": 27874 }, { "epoch": 0.5095326009468624, "grad_norm": 5.361835074480378, "learning_rate": 5.088780294941421e-06, "loss": 17.0467, "step": 27875 }, { "epoch": 0.5095508801433088, "grad_norm": 6.250313742437329, "learning_rate": 5.088484329914602e-06, "loss": 17.5089, "step": 27876 }, { "epoch": 0.5095691593397554, "grad_norm": 6.288506411961067, "learning_rate": 5.0881883645776565e-06, "loss": 17.7503, "step": 27877 }, { "epoch": 0.5095874385362019, "grad_norm": 6.757143820103346, "learning_rate": 5.087892398931616e-06, "loss": 17.5151, "step": 27878 }, { "epoch": 0.5096057177326485, "grad_norm": 8.195882530412849, "learning_rate": 5.087596432977521e-06, "loss": 17.9789, "step": 27879 }, { "epoch": 0.509623996929095, "grad_norm": 7.861485717426822, "learning_rate": 5.087300466716407e-06, "loss": 17.7657, "step": 27880 }, { "epoch": 0.5096422761255415, "grad_norm": 6.36459832657322, "learning_rate": 5.087004500149314e-06, "loss": 17.5838, "step": 27881 }, { "epoch": 0.5096605553219881, "grad_norm": 5.4643637952944415, "learning_rate": 5.086708533277277e-06, "loss": 17.012, "step": 27882 }, { "epoch": 0.5096788345184345, "grad_norm": 5.439422791490859, "learning_rate": 5.086412566101334e-06, "loss": 17.1715, "step": 27883 }, { "epoch": 0.5096971137148811, "grad_norm": 6.050986215713007, "learning_rate": 5.086116598622522e-06, "loss": 17.3001, "step": 27884 }, { "epoch": 0.5097153929113276, "grad_norm": 7.2932790019798235, "learning_rate": 5.08582063084188e-06, "loss": 17.6906, "step": 27885 }, { "epoch": 0.5097336721077741, "grad_norm": 6.866716266787876, "learning_rate": 5.085524662760444e-06, "loss": 17.6706, "step": 27886 }, { "epoch": 0.5097519513042207, "grad_norm": 6.237292491224625, "learning_rate": 5.085228694379251e-06, "loss": 17.4173, "step": 27887 }, { "epoch": 0.5097702305006672, "grad_norm": 9.178059618595595, "learning_rate": 5.0849327256993394e-06, "loss": 18.0714, "step": 27888 }, { "epoch": 0.5097885096971138, "grad_norm": 6.494638280082455, "learning_rate": 5.084636756721744e-06, "loss": 17.8371, "step": 27889 }, { "epoch": 0.5098067888935602, "grad_norm": 8.03154022239092, "learning_rate": 5.084340787447506e-06, "loss": 18.1014, "step": 27890 }, { "epoch": 0.5098250680900067, "grad_norm": 8.497256806005296, "learning_rate": 5.084044817877663e-06, "loss": 17.6061, "step": 27891 }, { "epoch": 0.5098433472864533, "grad_norm": 6.4522589549367675, "learning_rate": 5.083748848013247e-06, "loss": 17.4162, "step": 27892 }, { "epoch": 0.5098616264828998, "grad_norm": 6.124167891150617, "learning_rate": 5.083452877855298e-06, "loss": 17.3322, "step": 27893 }, { "epoch": 0.5098799056793464, "grad_norm": 6.070776568095202, "learning_rate": 5.083156907404855e-06, "loss": 17.1881, "step": 27894 }, { "epoch": 0.5098981848757929, "grad_norm": 6.869976134582212, "learning_rate": 5.0828609366629556e-06, "loss": 17.7099, "step": 27895 }, { "epoch": 0.5099164640722393, "grad_norm": 6.624390682490424, "learning_rate": 5.082564965630634e-06, "loss": 17.6734, "step": 27896 }, { "epoch": 0.5099347432686859, "grad_norm": 6.250022675612942, "learning_rate": 5.0822689943089294e-06, "loss": 17.5516, "step": 27897 }, { "epoch": 0.5099530224651324, "grad_norm": 6.681294668058565, "learning_rate": 5.0819730226988805e-06, "loss": 17.3986, "step": 27898 }, { "epoch": 0.509971301661579, "grad_norm": 8.381762746719547, "learning_rate": 5.081677050801522e-06, "loss": 18.1535, "step": 27899 }, { "epoch": 0.5099895808580255, "grad_norm": 6.578829196788574, "learning_rate": 5.081381078617893e-06, "loss": 17.7318, "step": 27900 }, { "epoch": 0.510007860054472, "grad_norm": 7.0951003053675334, "learning_rate": 5.0810851061490315e-06, "loss": 17.6282, "step": 27901 }, { "epoch": 0.5100261392509186, "grad_norm": 5.103586841876317, "learning_rate": 5.080789133395973e-06, "loss": 17.1156, "step": 27902 }, { "epoch": 0.510044418447365, "grad_norm": 7.460144193023806, "learning_rate": 5.080493160359754e-06, "loss": 17.6798, "step": 27903 }, { "epoch": 0.5100626976438116, "grad_norm": 7.090248826311705, "learning_rate": 5.080197187041415e-06, "loss": 17.0903, "step": 27904 }, { "epoch": 0.5100809768402581, "grad_norm": 6.8042642548949885, "learning_rate": 5.079901213441992e-06, "loss": 17.5889, "step": 27905 }, { "epoch": 0.5100992560367046, "grad_norm": 6.165145144153961, "learning_rate": 5.079605239562522e-06, "loss": 16.9309, "step": 27906 }, { "epoch": 0.5101175352331512, "grad_norm": 6.504810586479322, "learning_rate": 5.079309265404042e-06, "loss": 17.6423, "step": 27907 }, { "epoch": 0.5101358144295977, "grad_norm": 5.552455567232766, "learning_rate": 5.079013290967589e-06, "loss": 17.2008, "step": 27908 }, { "epoch": 0.5101540936260442, "grad_norm": 7.036312304543531, "learning_rate": 5.078717316254202e-06, "loss": 17.0784, "step": 27909 }, { "epoch": 0.5101723728224907, "grad_norm": 6.5684040882431285, "learning_rate": 5.078421341264919e-06, "loss": 17.5405, "step": 27910 }, { "epoch": 0.5101906520189372, "grad_norm": 6.3598182509281775, "learning_rate": 5.078125366000775e-06, "loss": 17.3858, "step": 27911 }, { "epoch": 0.5102089312153838, "grad_norm": 6.950337943610434, "learning_rate": 5.077829390462809e-06, "loss": 17.6734, "step": 27912 }, { "epoch": 0.5102272104118303, "grad_norm": 6.392436753818627, "learning_rate": 5.077533414652056e-06, "loss": 17.6908, "step": 27913 }, { "epoch": 0.5102454896082769, "grad_norm": 6.37787231520368, "learning_rate": 5.077237438569557e-06, "loss": 17.3778, "step": 27914 }, { "epoch": 0.5102637688047233, "grad_norm": 5.703003146742271, "learning_rate": 5.076941462216347e-06, "loss": 17.2388, "step": 27915 }, { "epoch": 0.5102820480011698, "grad_norm": 5.423843335529613, "learning_rate": 5.076645485593462e-06, "loss": 17.035, "step": 27916 }, { "epoch": 0.5103003271976164, "grad_norm": 8.167982766498403, "learning_rate": 5.076349508701943e-06, "loss": 18.0702, "step": 27917 }, { "epoch": 0.5103186063940629, "grad_norm": 6.480664865267102, "learning_rate": 5.076053531542826e-06, "loss": 17.2405, "step": 27918 }, { "epoch": 0.5103368855905095, "grad_norm": 6.1631597582761195, "learning_rate": 5.075757554117148e-06, "loss": 17.2679, "step": 27919 }, { "epoch": 0.510355164786956, "grad_norm": 7.117681822102847, "learning_rate": 5.075461576425946e-06, "loss": 17.6283, "step": 27920 }, { "epoch": 0.5103734439834025, "grad_norm": 5.114716886469767, "learning_rate": 5.075165598470257e-06, "loss": 16.8313, "step": 27921 }, { "epoch": 0.510391723179849, "grad_norm": 7.345253415797447, "learning_rate": 5.07486962025112e-06, "loss": 17.9692, "step": 27922 }, { "epoch": 0.5104100023762955, "grad_norm": 5.800808293392524, "learning_rate": 5.07457364176957e-06, "loss": 17.2526, "step": 27923 }, { "epoch": 0.5104282815727421, "grad_norm": 6.642269203657916, "learning_rate": 5.0742776630266475e-06, "loss": 17.4973, "step": 27924 }, { "epoch": 0.5104465607691886, "grad_norm": 6.6416064789316875, "learning_rate": 5.073981684023388e-06, "loss": 17.5644, "step": 27925 }, { "epoch": 0.5104648399656351, "grad_norm": 6.927755807980413, "learning_rate": 5.073685704760828e-06, "loss": 17.3729, "step": 27926 }, { "epoch": 0.5104831191620817, "grad_norm": 6.033087824890685, "learning_rate": 5.073389725240006e-06, "loss": 17.3631, "step": 27927 }, { "epoch": 0.5105013983585281, "grad_norm": 5.796741823708957, "learning_rate": 5.073093745461961e-06, "loss": 17.2888, "step": 27928 }, { "epoch": 0.5105196775549747, "grad_norm": 6.489427966014684, "learning_rate": 5.072797765427729e-06, "loss": 17.6568, "step": 27929 }, { "epoch": 0.5105379567514212, "grad_norm": 5.861282253986606, "learning_rate": 5.072501785138345e-06, "loss": 17.0678, "step": 27930 }, { "epoch": 0.5105562359478677, "grad_norm": 6.958764261470338, "learning_rate": 5.07220580459485e-06, "loss": 17.4967, "step": 27931 }, { "epoch": 0.5105745151443143, "grad_norm": 7.584696063777194, "learning_rate": 5.07190982379828e-06, "loss": 17.7249, "step": 27932 }, { "epoch": 0.5105927943407608, "grad_norm": 5.35595648986422, "learning_rate": 5.071613842749672e-06, "loss": 17.1567, "step": 27933 }, { "epoch": 0.5106110735372072, "grad_norm": 6.061092903293324, "learning_rate": 5.071317861450063e-06, "loss": 17.5192, "step": 27934 }, { "epoch": 0.5106293527336538, "grad_norm": 5.365330897197779, "learning_rate": 5.0710218799004906e-06, "loss": 17.0581, "step": 27935 }, { "epoch": 0.5106476319301003, "grad_norm": 5.838573082680509, "learning_rate": 5.070725898101995e-06, "loss": 17.5544, "step": 27936 }, { "epoch": 0.5106659111265469, "grad_norm": 8.596949097713447, "learning_rate": 5.070429916055609e-06, "loss": 17.6689, "step": 27937 }, { "epoch": 0.5106841903229934, "grad_norm": 8.097615772411954, "learning_rate": 5.070133933762373e-06, "loss": 18.4352, "step": 27938 }, { "epoch": 0.5107024695194399, "grad_norm": 6.61105770284266, "learning_rate": 5.069837951223324e-06, "loss": 17.6067, "step": 27939 }, { "epoch": 0.5107207487158865, "grad_norm": 7.06885343616716, "learning_rate": 5.069541968439498e-06, "loss": 17.7823, "step": 27940 }, { "epoch": 0.5107390279123329, "grad_norm": 7.714779778663963, "learning_rate": 5.069245985411935e-06, "loss": 17.3728, "step": 27941 }, { "epoch": 0.5107573071087795, "grad_norm": 5.374890110205253, "learning_rate": 5.06895000214167e-06, "loss": 17.0761, "step": 27942 }, { "epoch": 0.510775586305226, "grad_norm": 6.779089121408302, "learning_rate": 5.068654018629743e-06, "loss": 17.2467, "step": 27943 }, { "epoch": 0.5107938655016725, "grad_norm": 6.6904896821342055, "learning_rate": 5.068358034877187e-06, "loss": 17.8552, "step": 27944 }, { "epoch": 0.5108121446981191, "grad_norm": 7.544460945651414, "learning_rate": 5.068062050885042e-06, "loss": 18.0192, "step": 27945 }, { "epoch": 0.5108304238945656, "grad_norm": 7.3778038152276055, "learning_rate": 5.0677660666543486e-06, "loss": 17.8556, "step": 27946 }, { "epoch": 0.5108487030910122, "grad_norm": 6.147904216098145, "learning_rate": 5.067470082186138e-06, "loss": 17.287, "step": 27947 }, { "epoch": 0.5108669822874586, "grad_norm": 6.036334583470327, "learning_rate": 5.067174097481451e-06, "loss": 17.5303, "step": 27948 }, { "epoch": 0.5108852614839051, "grad_norm": 6.3812375777208175, "learning_rate": 5.0668781125413235e-06, "loss": 17.2747, "step": 27949 }, { "epoch": 0.5109035406803517, "grad_norm": 7.0846058593199945, "learning_rate": 5.066582127366797e-06, "loss": 17.9406, "step": 27950 }, { "epoch": 0.5109218198767982, "grad_norm": 5.995581704388138, "learning_rate": 5.0662861419589045e-06, "loss": 17.5552, "step": 27951 }, { "epoch": 0.5109400990732448, "grad_norm": 8.079681787989854, "learning_rate": 5.065990156318686e-06, "loss": 17.5316, "step": 27952 }, { "epoch": 0.5109583782696913, "grad_norm": 6.394805775513728, "learning_rate": 5.065694170447175e-06, "loss": 17.3691, "step": 27953 }, { "epoch": 0.5109766574661377, "grad_norm": 9.04358904856608, "learning_rate": 5.065398184345413e-06, "loss": 18.2654, "step": 27954 }, { "epoch": 0.5109949366625843, "grad_norm": 5.919043342764798, "learning_rate": 5.0651021980144366e-06, "loss": 17.2662, "step": 27955 }, { "epoch": 0.5110132158590308, "grad_norm": 6.8232770058589205, "learning_rate": 5.064806211455282e-06, "loss": 17.3929, "step": 27956 }, { "epoch": 0.5110314950554774, "grad_norm": 6.3081512598405, "learning_rate": 5.0645102246689885e-06, "loss": 17.3551, "step": 27957 }, { "epoch": 0.5110497742519239, "grad_norm": 7.207245941402502, "learning_rate": 5.064214237656591e-06, "loss": 17.8506, "step": 27958 }, { "epoch": 0.5110680534483704, "grad_norm": 6.225357130711153, "learning_rate": 5.063918250419128e-06, "loss": 17.3574, "step": 27959 }, { "epoch": 0.511086332644817, "grad_norm": 6.2018937939581145, "learning_rate": 5.063622262957638e-06, "loss": 17.3057, "step": 27960 }, { "epoch": 0.5111046118412634, "grad_norm": 6.3001218579894065, "learning_rate": 5.063326275273157e-06, "loss": 17.3587, "step": 27961 }, { "epoch": 0.51112289103771, "grad_norm": 6.048242051160997, "learning_rate": 5.063030287366723e-06, "loss": 17.4052, "step": 27962 }, { "epoch": 0.5111411702341565, "grad_norm": 6.06499117173175, "learning_rate": 5.062734299239372e-06, "loss": 17.3983, "step": 27963 }, { "epoch": 0.511159449430603, "grad_norm": 5.785315620005533, "learning_rate": 5.0624383108921445e-06, "loss": 17.4715, "step": 27964 }, { "epoch": 0.5111777286270496, "grad_norm": 6.231404459331465, "learning_rate": 5.0621423223260765e-06, "loss": 17.6666, "step": 27965 }, { "epoch": 0.5111960078234961, "grad_norm": 6.198098887849262, "learning_rate": 5.061846333542204e-06, "loss": 17.3928, "step": 27966 }, { "epoch": 0.5112142870199426, "grad_norm": 6.256331047675148, "learning_rate": 5.061550344541566e-06, "loss": 17.4104, "step": 27967 }, { "epoch": 0.5112325662163891, "grad_norm": 6.070585188329285, "learning_rate": 5.061254355325198e-06, "loss": 17.4089, "step": 27968 }, { "epoch": 0.5112508454128356, "grad_norm": 5.648920168004417, "learning_rate": 5.06095836589414e-06, "loss": 17.2043, "step": 27969 }, { "epoch": 0.5112691246092822, "grad_norm": 7.33495141010944, "learning_rate": 5.060662376249429e-06, "loss": 17.926, "step": 27970 }, { "epoch": 0.5112874038057287, "grad_norm": 6.145325637030012, "learning_rate": 5.0603663863921e-06, "loss": 17.5622, "step": 27971 }, { "epoch": 0.5113056830021753, "grad_norm": 7.409558314805561, "learning_rate": 5.060070396323192e-06, "loss": 17.6996, "step": 27972 }, { "epoch": 0.5113239621986218, "grad_norm": 6.537365255117251, "learning_rate": 5.059774406043744e-06, "loss": 17.6237, "step": 27973 }, { "epoch": 0.5113422413950682, "grad_norm": 7.200750731269138, "learning_rate": 5.059478415554792e-06, "loss": 17.9639, "step": 27974 }, { "epoch": 0.5113605205915148, "grad_norm": 6.421165575310187, "learning_rate": 5.059182424857371e-06, "loss": 17.411, "step": 27975 }, { "epoch": 0.5113787997879613, "grad_norm": 5.277040088558584, "learning_rate": 5.058886433952521e-06, "loss": 17.0703, "step": 27976 }, { "epoch": 0.5113970789844079, "grad_norm": 6.936336264939123, "learning_rate": 5.0585904428412824e-06, "loss": 17.7754, "step": 27977 }, { "epoch": 0.5114153581808544, "grad_norm": 6.855191478628995, "learning_rate": 5.058294451524687e-06, "loss": 17.6812, "step": 27978 }, { "epoch": 0.5114336373773009, "grad_norm": 6.050774172441768, "learning_rate": 5.057998460003775e-06, "loss": 17.4065, "step": 27979 }, { "epoch": 0.5114519165737474, "grad_norm": 6.863536531195133, "learning_rate": 5.057702468279583e-06, "loss": 17.816, "step": 27980 }, { "epoch": 0.5114701957701939, "grad_norm": 5.223811086374738, "learning_rate": 5.0574064763531495e-06, "loss": 16.9572, "step": 27981 }, { "epoch": 0.5114884749666405, "grad_norm": 6.796207410027425, "learning_rate": 5.05711048422551e-06, "loss": 17.7347, "step": 27982 }, { "epoch": 0.511506754163087, "grad_norm": 6.777248411355415, "learning_rate": 5.056814491897705e-06, "loss": 17.7146, "step": 27983 }, { "epoch": 0.5115250333595335, "grad_norm": 6.3883882989031235, "learning_rate": 5.056518499370768e-06, "loss": 17.5044, "step": 27984 }, { "epoch": 0.5115433125559801, "grad_norm": 4.973791532447568, "learning_rate": 5.056222506645741e-06, "loss": 17.2173, "step": 27985 }, { "epoch": 0.5115615917524265, "grad_norm": 7.69910450381834, "learning_rate": 5.055926513723657e-06, "loss": 18.153, "step": 27986 }, { "epoch": 0.5115798709488731, "grad_norm": 6.213555465878709, "learning_rate": 5.055630520605557e-06, "loss": 17.3883, "step": 27987 }, { "epoch": 0.5115981501453196, "grad_norm": 6.102399203148746, "learning_rate": 5.055334527292477e-06, "loss": 17.4289, "step": 27988 }, { "epoch": 0.5116164293417661, "grad_norm": 5.704482747869875, "learning_rate": 5.055038533785451e-06, "loss": 17.3771, "step": 27989 }, { "epoch": 0.5116347085382127, "grad_norm": 7.720373131835971, "learning_rate": 5.054742540085523e-06, "loss": 17.9982, "step": 27990 }, { "epoch": 0.5116529877346592, "grad_norm": 5.988599421806966, "learning_rate": 5.054446546193728e-06, "loss": 17.4281, "step": 27991 }, { "epoch": 0.5116712669311058, "grad_norm": 6.250533702797249, "learning_rate": 5.0541505521111e-06, "loss": 17.2882, "step": 27992 }, { "epoch": 0.5116895461275522, "grad_norm": 5.697124985304141, "learning_rate": 5.05385455783868e-06, "loss": 17.2617, "step": 27993 }, { "epoch": 0.5117078253239987, "grad_norm": 8.295971595841486, "learning_rate": 5.053558563377503e-06, "loss": 18.3817, "step": 27994 }, { "epoch": 0.5117261045204453, "grad_norm": 7.837309151991971, "learning_rate": 5.053262568728609e-06, "loss": 17.877, "step": 27995 }, { "epoch": 0.5117443837168918, "grad_norm": 6.498182727342684, "learning_rate": 5.052966573893036e-06, "loss": 17.6076, "step": 27996 }, { "epoch": 0.5117626629133384, "grad_norm": 7.2434474881906, "learning_rate": 5.052670578871818e-06, "loss": 17.802, "step": 27997 }, { "epoch": 0.5117809421097849, "grad_norm": 5.303841371811925, "learning_rate": 5.052374583665994e-06, "loss": 17.1082, "step": 27998 }, { "epoch": 0.5117992213062313, "grad_norm": 5.851803379670491, "learning_rate": 5.052078588276602e-06, "loss": 17.4313, "step": 27999 }, { "epoch": 0.5118175005026779, "grad_norm": 8.61954880884424, "learning_rate": 5.051782592704679e-06, "loss": 18.0882, "step": 28000 }, { "epoch": 0.5118357796991244, "grad_norm": 6.047523950834138, "learning_rate": 5.051486596951264e-06, "loss": 17.4648, "step": 28001 }, { "epoch": 0.5118540588955709, "grad_norm": 6.244999563783573, "learning_rate": 5.051190601017391e-06, "loss": 17.2835, "step": 28002 }, { "epoch": 0.5118723380920175, "grad_norm": 9.220879058723916, "learning_rate": 5.0508946049041e-06, "loss": 18.5216, "step": 28003 }, { "epoch": 0.511890617288464, "grad_norm": 5.911686970152043, "learning_rate": 5.050598608612427e-06, "loss": 17.3586, "step": 28004 }, { "epoch": 0.5119088964849106, "grad_norm": 5.395181574700731, "learning_rate": 5.0503026121434115e-06, "loss": 17.2908, "step": 28005 }, { "epoch": 0.511927175681357, "grad_norm": 7.199897142317298, "learning_rate": 5.05000661549809e-06, "loss": 17.8668, "step": 28006 }, { "epoch": 0.5119454548778035, "grad_norm": 5.713902850634102, "learning_rate": 5.049710618677499e-06, "loss": 17.3038, "step": 28007 }, { "epoch": 0.5119637340742501, "grad_norm": 5.945746328317656, "learning_rate": 5.049414621682677e-06, "loss": 17.2434, "step": 28008 }, { "epoch": 0.5119820132706966, "grad_norm": 6.800970587928478, "learning_rate": 5.049118624514659e-06, "loss": 17.7604, "step": 28009 }, { "epoch": 0.5120002924671432, "grad_norm": 5.014364988281461, "learning_rate": 5.048822627174487e-06, "loss": 17.2556, "step": 28010 }, { "epoch": 0.5120185716635897, "grad_norm": 5.646783329337935, "learning_rate": 5.048526629663194e-06, "loss": 17.1364, "step": 28011 }, { "epoch": 0.5120368508600361, "grad_norm": 5.238593346776299, "learning_rate": 5.0482306319818205e-06, "loss": 17.0546, "step": 28012 }, { "epoch": 0.5120551300564827, "grad_norm": 5.867916078013198, "learning_rate": 5.047934634131403e-06, "loss": 17.257, "step": 28013 }, { "epoch": 0.5120734092529292, "grad_norm": 6.349823261825295, "learning_rate": 5.047638636112978e-06, "loss": 17.6975, "step": 28014 }, { "epoch": 0.5120916884493758, "grad_norm": 8.712592750857338, "learning_rate": 5.047342637927586e-06, "loss": 18.8455, "step": 28015 }, { "epoch": 0.5121099676458223, "grad_norm": 6.66391801475898, "learning_rate": 5.047046639576258e-06, "loss": 17.4625, "step": 28016 }, { "epoch": 0.5121282468422688, "grad_norm": 6.62869295930528, "learning_rate": 5.046750641060038e-06, "loss": 17.7247, "step": 28017 }, { "epoch": 0.5121465260387154, "grad_norm": 5.373166233411081, "learning_rate": 5.046454642379962e-06, "loss": 17.1545, "step": 28018 }, { "epoch": 0.5121648052351618, "grad_norm": 6.501967979661488, "learning_rate": 5.0461586435370656e-06, "loss": 17.5092, "step": 28019 }, { "epoch": 0.5121830844316084, "grad_norm": 6.33903576693256, "learning_rate": 5.045862644532388e-06, "loss": 17.8611, "step": 28020 }, { "epoch": 0.5122013636280549, "grad_norm": 5.994603276352138, "learning_rate": 5.045566645366963e-06, "loss": 17.3117, "step": 28021 }, { "epoch": 0.5122196428245014, "grad_norm": 5.630383901243195, "learning_rate": 5.045270646041834e-06, "loss": 17.291, "step": 28022 }, { "epoch": 0.512237922020948, "grad_norm": 5.532876931544379, "learning_rate": 5.044974646558034e-06, "loss": 17.0049, "step": 28023 }, { "epoch": 0.5122562012173945, "grad_norm": 5.008223902516464, "learning_rate": 5.044678646916602e-06, "loss": 17.0178, "step": 28024 }, { "epoch": 0.512274480413841, "grad_norm": 6.615996521253628, "learning_rate": 5.044382647118574e-06, "loss": 17.6848, "step": 28025 }, { "epoch": 0.5122927596102875, "grad_norm": 6.074262935974503, "learning_rate": 5.044086647164991e-06, "loss": 17.2916, "step": 28026 }, { "epoch": 0.512311038806734, "grad_norm": 5.6487417065577405, "learning_rate": 5.043790647056886e-06, "loss": 17.1785, "step": 28027 }, { "epoch": 0.5123293180031806, "grad_norm": 6.049154975666557, "learning_rate": 5.043494646795299e-06, "loss": 17.1928, "step": 28028 }, { "epoch": 0.5123475971996271, "grad_norm": 5.1185673294355, "learning_rate": 5.043198646381269e-06, "loss": 16.8799, "step": 28029 }, { "epoch": 0.5123658763960737, "grad_norm": 5.770846082516518, "learning_rate": 5.042902645815829e-06, "loss": 17.1606, "step": 28030 }, { "epoch": 0.5123841555925202, "grad_norm": 6.910684220598062, "learning_rate": 5.04260664510002e-06, "loss": 17.4591, "step": 28031 }, { "epoch": 0.5124024347889666, "grad_norm": 6.628891492238753, "learning_rate": 5.042310644234878e-06, "loss": 17.422, "step": 28032 }, { "epoch": 0.5124207139854132, "grad_norm": 5.3739276066214705, "learning_rate": 5.042014643221442e-06, "loss": 17.0926, "step": 28033 }, { "epoch": 0.5124389931818597, "grad_norm": 4.709496993209792, "learning_rate": 5.0417186420607475e-06, "loss": 16.9556, "step": 28034 }, { "epoch": 0.5124572723783063, "grad_norm": 6.570254315115292, "learning_rate": 5.041422640753831e-06, "loss": 17.3542, "step": 28035 }, { "epoch": 0.5124755515747528, "grad_norm": 5.702979363851849, "learning_rate": 5.041126639301736e-06, "loss": 17.0444, "step": 28036 }, { "epoch": 0.5124938307711993, "grad_norm": 6.134215999578505, "learning_rate": 5.040830637705493e-06, "loss": 17.3444, "step": 28037 }, { "epoch": 0.5125121099676458, "grad_norm": 5.53583089154324, "learning_rate": 5.040534635966141e-06, "loss": 17.1026, "step": 28038 }, { "epoch": 0.5125303891640923, "grad_norm": 6.692634199078798, "learning_rate": 5.040238634084721e-06, "loss": 17.1582, "step": 28039 }, { "epoch": 0.5125486683605389, "grad_norm": 9.112706744573707, "learning_rate": 5.0399426320622665e-06, "loss": 17.7881, "step": 28040 }, { "epoch": 0.5125669475569854, "grad_norm": 5.436373246965209, "learning_rate": 5.039646629899817e-06, "loss": 17.1567, "step": 28041 }, { "epoch": 0.5125852267534319, "grad_norm": 6.351125943732603, "learning_rate": 5.0393506275984094e-06, "loss": 17.621, "step": 28042 }, { "epoch": 0.5126035059498785, "grad_norm": 5.806286890615243, "learning_rate": 5.039054625159081e-06, "loss": 17.3372, "step": 28043 }, { "epoch": 0.512621785146325, "grad_norm": 5.160793728004603, "learning_rate": 5.03875862258287e-06, "loss": 16.8742, "step": 28044 }, { "epoch": 0.5126400643427715, "grad_norm": 6.267331369127202, "learning_rate": 5.038462619870814e-06, "loss": 17.3996, "step": 28045 }, { "epoch": 0.512658343539218, "grad_norm": 6.085634046898352, "learning_rate": 5.03816661702395e-06, "loss": 17.5692, "step": 28046 }, { "epoch": 0.5126766227356645, "grad_norm": 6.61253265012242, "learning_rate": 5.037870614043314e-06, "loss": 17.3804, "step": 28047 }, { "epoch": 0.5126949019321111, "grad_norm": 4.96611423998561, "learning_rate": 5.037574610929945e-06, "loss": 16.8542, "step": 28048 }, { "epoch": 0.5127131811285576, "grad_norm": 6.320321578904927, "learning_rate": 5.03727860768488e-06, "loss": 17.5675, "step": 28049 }, { "epoch": 0.5127314603250042, "grad_norm": 7.157330893384386, "learning_rate": 5.036982604309159e-06, "loss": 17.466, "step": 28050 }, { "epoch": 0.5127497395214506, "grad_norm": 5.3084310590091635, "learning_rate": 5.036686600803815e-06, "loss": 17.1899, "step": 28051 }, { "epoch": 0.5127680187178971, "grad_norm": 7.373330017990298, "learning_rate": 5.036390597169888e-06, "loss": 17.9253, "step": 28052 }, { "epoch": 0.5127862979143437, "grad_norm": 6.65378399298599, "learning_rate": 5.036094593408415e-06, "loss": 17.6431, "step": 28053 }, { "epoch": 0.5128045771107902, "grad_norm": 5.858154925957364, "learning_rate": 5.035798589520434e-06, "loss": 17.3389, "step": 28054 }, { "epoch": 0.5128228563072368, "grad_norm": 5.330596774222364, "learning_rate": 5.035502585506981e-06, "loss": 17.1273, "step": 28055 }, { "epoch": 0.5128411355036833, "grad_norm": 6.735683133722009, "learning_rate": 5.035206581369097e-06, "loss": 17.3526, "step": 28056 }, { "epoch": 0.5128594147001297, "grad_norm": 5.310218738370323, "learning_rate": 5.034910577107814e-06, "loss": 17.2729, "step": 28057 }, { "epoch": 0.5128776938965763, "grad_norm": 5.661281842404663, "learning_rate": 5.034614572724175e-06, "loss": 17.3244, "step": 28058 }, { "epoch": 0.5128959730930228, "grad_norm": 5.7079937216885845, "learning_rate": 5.034318568219213e-06, "loss": 17.4455, "step": 28059 }, { "epoch": 0.5129142522894694, "grad_norm": 5.33893828694486, "learning_rate": 5.0340225635939685e-06, "loss": 17.1936, "step": 28060 }, { "epoch": 0.5129325314859159, "grad_norm": 6.991607716437704, "learning_rate": 5.033726558849479e-06, "loss": 17.6457, "step": 28061 }, { "epoch": 0.5129508106823624, "grad_norm": 7.173764685392336, "learning_rate": 5.0334305539867774e-06, "loss": 18.1706, "step": 28062 }, { "epoch": 0.512969089878809, "grad_norm": 7.119662542488564, "learning_rate": 5.033134549006906e-06, "loss": 17.7013, "step": 28063 }, { "epoch": 0.5129873690752554, "grad_norm": 7.890981854441498, "learning_rate": 5.032838543910903e-06, "loss": 18.2298, "step": 28064 }, { "epoch": 0.513005648271702, "grad_norm": 6.941025290307531, "learning_rate": 5.032542538699803e-06, "loss": 17.8069, "step": 28065 }, { "epoch": 0.5130239274681485, "grad_norm": 7.707036799576286, "learning_rate": 5.032246533374643e-06, "loss": 17.4969, "step": 28066 }, { "epoch": 0.513042206664595, "grad_norm": 5.8355286680754626, "learning_rate": 5.031950527936462e-06, "loss": 17.2879, "step": 28067 }, { "epoch": 0.5130604858610416, "grad_norm": 6.097625669021527, "learning_rate": 5.031654522386297e-06, "loss": 17.4816, "step": 28068 }, { "epoch": 0.5130787650574881, "grad_norm": 6.391443302246504, "learning_rate": 5.031358516725185e-06, "loss": 17.5693, "step": 28069 }, { "epoch": 0.5130970442539345, "grad_norm": 5.853587011187509, "learning_rate": 5.031062510954166e-06, "loss": 17.2966, "step": 28070 }, { "epoch": 0.5131153234503811, "grad_norm": 6.888218201777283, "learning_rate": 5.030766505074275e-06, "loss": 17.6219, "step": 28071 }, { "epoch": 0.5131336026468276, "grad_norm": 7.078410786984801, "learning_rate": 5.030470499086549e-06, "loss": 17.7244, "step": 28072 }, { "epoch": 0.5131518818432742, "grad_norm": 5.892402288292222, "learning_rate": 5.030174492992027e-06, "loss": 17.2036, "step": 28073 }, { "epoch": 0.5131701610397207, "grad_norm": 6.045014744014676, "learning_rate": 5.029878486791748e-06, "loss": 17.318, "step": 28074 }, { "epoch": 0.5131884402361672, "grad_norm": 5.76539178992317, "learning_rate": 5.029582480486744e-06, "loss": 17.2788, "step": 28075 }, { "epoch": 0.5132067194326138, "grad_norm": 6.317937925272435, "learning_rate": 5.029286474078058e-06, "loss": 17.3094, "step": 28076 }, { "epoch": 0.5132249986290602, "grad_norm": 5.9402884217697665, "learning_rate": 5.028990467566727e-06, "loss": 17.1369, "step": 28077 }, { "epoch": 0.5132432778255068, "grad_norm": 6.658072244458772, "learning_rate": 5.028694460953785e-06, "loss": 17.6295, "step": 28078 }, { "epoch": 0.5132615570219533, "grad_norm": 6.548053989903735, "learning_rate": 5.028398454240271e-06, "loss": 17.6661, "step": 28079 }, { "epoch": 0.5132798362183998, "grad_norm": 7.370262231792316, "learning_rate": 5.0281024474272225e-06, "loss": 18.1196, "step": 28080 }, { "epoch": 0.5132981154148464, "grad_norm": 5.794905417242664, "learning_rate": 5.027806440515679e-06, "loss": 17.0295, "step": 28081 }, { "epoch": 0.5133163946112929, "grad_norm": 6.559785798742597, "learning_rate": 5.027510433506676e-06, "loss": 17.4465, "step": 28082 }, { "epoch": 0.5133346738077394, "grad_norm": 6.161346157356899, "learning_rate": 5.02721442640125e-06, "loss": 17.6543, "step": 28083 }, { "epoch": 0.5133529530041859, "grad_norm": 4.745825669070633, "learning_rate": 5.026918419200442e-06, "loss": 16.9587, "step": 28084 }, { "epoch": 0.5133712322006324, "grad_norm": 7.6563757111778985, "learning_rate": 5.0266224119052855e-06, "loss": 18.0366, "step": 28085 }, { "epoch": 0.513389511397079, "grad_norm": 7.5577163342264315, "learning_rate": 5.026326404516821e-06, "loss": 18.04, "step": 28086 }, { "epoch": 0.5134077905935255, "grad_norm": 5.61846200718234, "learning_rate": 5.0260303970360835e-06, "loss": 17.259, "step": 28087 }, { "epoch": 0.5134260697899721, "grad_norm": 7.114267771998251, "learning_rate": 5.025734389464113e-06, "loss": 17.5694, "step": 28088 }, { "epoch": 0.5134443489864186, "grad_norm": 5.122244468492323, "learning_rate": 5.025438381801943e-06, "loss": 16.9749, "step": 28089 }, { "epoch": 0.513462628182865, "grad_norm": 8.060971334515285, "learning_rate": 5.0251423740506155e-06, "loss": 18.7725, "step": 28090 }, { "epoch": 0.5134809073793116, "grad_norm": 6.072602028199104, "learning_rate": 5.024846366211168e-06, "loss": 17.5009, "step": 28091 }, { "epoch": 0.5134991865757581, "grad_norm": 7.245636367424293, "learning_rate": 5.024550358284634e-06, "loss": 17.753, "step": 28092 }, { "epoch": 0.5135174657722047, "grad_norm": 6.0389132423800636, "learning_rate": 5.024254350272054e-06, "loss": 17.379, "step": 28093 }, { "epoch": 0.5135357449686512, "grad_norm": 5.406577581521393, "learning_rate": 5.023958342174463e-06, "loss": 17.1855, "step": 28094 }, { "epoch": 0.5135540241650977, "grad_norm": 5.828234473307503, "learning_rate": 5.0236623339929025e-06, "loss": 17.0713, "step": 28095 }, { "epoch": 0.5135723033615442, "grad_norm": 4.6051927541259134, "learning_rate": 5.023366325728406e-06, "loss": 16.7456, "step": 28096 }, { "epoch": 0.5135905825579907, "grad_norm": 6.234438137695905, "learning_rate": 5.023070317382013e-06, "loss": 17.4698, "step": 28097 }, { "epoch": 0.5136088617544373, "grad_norm": 6.745582056480888, "learning_rate": 5.0227743089547594e-06, "loss": 17.3086, "step": 28098 }, { "epoch": 0.5136271409508838, "grad_norm": 5.385427269965797, "learning_rate": 5.022478300447685e-06, "loss": 17.2957, "step": 28099 }, { "epoch": 0.5136454201473303, "grad_norm": 8.091292903650018, "learning_rate": 5.022182291861826e-06, "loss": 17.9107, "step": 28100 }, { "epoch": 0.5136636993437769, "grad_norm": 6.26059130775774, "learning_rate": 5.021886283198221e-06, "loss": 17.4496, "step": 28101 }, { "epoch": 0.5136819785402233, "grad_norm": 6.137892618674586, "learning_rate": 5.021590274457906e-06, "loss": 17.6514, "step": 28102 }, { "epoch": 0.5137002577366699, "grad_norm": 7.384613148894318, "learning_rate": 5.0212942656419175e-06, "loss": 17.8735, "step": 28103 }, { "epoch": 0.5137185369331164, "grad_norm": 6.224373819966781, "learning_rate": 5.020998256751295e-06, "loss": 17.6275, "step": 28104 }, { "epoch": 0.5137368161295629, "grad_norm": 6.561541920810067, "learning_rate": 5.020702247787076e-06, "loss": 17.4027, "step": 28105 }, { "epoch": 0.5137550953260095, "grad_norm": 6.348724462220238, "learning_rate": 5.020406238750297e-06, "loss": 17.2009, "step": 28106 }, { "epoch": 0.513773374522456, "grad_norm": 5.6274317742546, "learning_rate": 5.020110229641997e-06, "loss": 17.1539, "step": 28107 }, { "epoch": 0.5137916537189026, "grad_norm": 5.402416735629333, "learning_rate": 5.01981422046321e-06, "loss": 16.9529, "step": 28108 }, { "epoch": 0.513809932915349, "grad_norm": 7.337176309987599, "learning_rate": 5.019518211214978e-06, "loss": 17.723, "step": 28109 }, { "epoch": 0.5138282121117955, "grad_norm": 6.865097560876604, "learning_rate": 5.019222201898336e-06, "loss": 17.5339, "step": 28110 }, { "epoch": 0.5138464913082421, "grad_norm": 6.23455616923348, "learning_rate": 5.0189261925143214e-06, "loss": 17.2609, "step": 28111 }, { "epoch": 0.5138647705046886, "grad_norm": 6.809861678289743, "learning_rate": 5.018630183063972e-06, "loss": 17.4722, "step": 28112 }, { "epoch": 0.5138830497011352, "grad_norm": 6.1133301907662245, "learning_rate": 5.018334173548326e-06, "loss": 17.4801, "step": 28113 }, { "epoch": 0.5139013288975817, "grad_norm": 11.552232517083285, "learning_rate": 5.018038163968419e-06, "loss": 18.6332, "step": 28114 }, { "epoch": 0.5139196080940281, "grad_norm": 6.335392764099568, "learning_rate": 5.0177421543252925e-06, "loss": 17.5472, "step": 28115 }, { "epoch": 0.5139378872904747, "grad_norm": 5.9829507015151036, "learning_rate": 5.017446144619978e-06, "loss": 17.6113, "step": 28116 }, { "epoch": 0.5139561664869212, "grad_norm": 5.410154458145175, "learning_rate": 5.017150134853518e-06, "loss": 17.0227, "step": 28117 }, { "epoch": 0.5139744456833678, "grad_norm": 5.83075024866596, "learning_rate": 5.016854125026949e-06, "loss": 17.3264, "step": 28118 }, { "epoch": 0.5139927248798143, "grad_norm": 5.460150176460602, "learning_rate": 5.016558115141308e-06, "loss": 17.1501, "step": 28119 }, { "epoch": 0.5140110040762608, "grad_norm": 7.476150179442159, "learning_rate": 5.01626210519763e-06, "loss": 18.2651, "step": 28120 }, { "epoch": 0.5140292832727074, "grad_norm": 8.293272410674218, "learning_rate": 5.015966095196956e-06, "loss": 18.1733, "step": 28121 }, { "epoch": 0.5140475624691538, "grad_norm": 8.932934328206157, "learning_rate": 5.015670085140323e-06, "loss": 18.0572, "step": 28122 }, { "epoch": 0.5140658416656004, "grad_norm": 6.376393939232121, "learning_rate": 5.0153740750287665e-06, "loss": 17.6159, "step": 28123 }, { "epoch": 0.5140841208620469, "grad_norm": 4.975699133719373, "learning_rate": 5.015078064863325e-06, "loss": 16.8964, "step": 28124 }, { "epoch": 0.5141024000584934, "grad_norm": 5.524051630978833, "learning_rate": 5.014782054645037e-06, "loss": 17.229, "step": 28125 }, { "epoch": 0.51412067925494, "grad_norm": 6.733322352927828, "learning_rate": 5.014486044374939e-06, "loss": 17.7211, "step": 28126 }, { "epoch": 0.5141389584513865, "grad_norm": 6.145482932736864, "learning_rate": 5.014190034054068e-06, "loss": 17.4748, "step": 28127 }, { "epoch": 0.514157237647833, "grad_norm": 10.969824636140316, "learning_rate": 5.013894023683463e-06, "loss": 18.5218, "step": 28128 }, { "epoch": 0.5141755168442795, "grad_norm": 6.207129862453142, "learning_rate": 5.013598013264162e-06, "loss": 17.3163, "step": 28129 }, { "epoch": 0.514193796040726, "grad_norm": 8.738833173940085, "learning_rate": 5.013302002797198e-06, "loss": 18.1539, "step": 28130 }, { "epoch": 0.5142120752371726, "grad_norm": 6.69535504124619, "learning_rate": 5.013005992283613e-06, "loss": 17.2844, "step": 28131 }, { "epoch": 0.5142303544336191, "grad_norm": 6.870859011575733, "learning_rate": 5.012709981724443e-06, "loss": 17.4839, "step": 28132 }, { "epoch": 0.5142486336300657, "grad_norm": 5.880878576065578, "learning_rate": 5.012413971120726e-06, "loss": 17.4608, "step": 28133 }, { "epoch": 0.5142669128265122, "grad_norm": 7.412049525932334, "learning_rate": 5.0121179604735005e-06, "loss": 17.7907, "step": 28134 }, { "epoch": 0.5142851920229586, "grad_norm": 5.182481876257259, "learning_rate": 5.011821949783799e-06, "loss": 17.1516, "step": 28135 }, { "epoch": 0.5143034712194052, "grad_norm": 7.256991660617841, "learning_rate": 5.011525939052666e-06, "loss": 17.9211, "step": 28136 }, { "epoch": 0.5143217504158517, "grad_norm": 6.612943637546306, "learning_rate": 5.011229928281134e-06, "loss": 17.7423, "step": 28137 }, { "epoch": 0.5143400296122982, "grad_norm": 6.236537944236401, "learning_rate": 5.010933917470243e-06, "loss": 17.4842, "step": 28138 }, { "epoch": 0.5143583088087448, "grad_norm": 6.912204897544808, "learning_rate": 5.0106379066210285e-06, "loss": 17.5455, "step": 28139 }, { "epoch": 0.5143765880051913, "grad_norm": 5.305372202814259, "learning_rate": 5.010341895734529e-06, "loss": 17.1269, "step": 28140 }, { "epoch": 0.5143948672016379, "grad_norm": 5.715634971383142, "learning_rate": 5.010045884811783e-06, "loss": 17.015, "step": 28141 }, { "epoch": 0.5144131463980843, "grad_norm": 8.049630120347942, "learning_rate": 5.009749873853827e-06, "loss": 18.0077, "step": 28142 }, { "epoch": 0.5144314255945308, "grad_norm": 5.197981675803811, "learning_rate": 5.009453862861697e-06, "loss": 16.8769, "step": 28143 }, { "epoch": 0.5144497047909774, "grad_norm": 6.366521019383396, "learning_rate": 5.009157851836434e-06, "loss": 17.6728, "step": 28144 }, { "epoch": 0.5144679839874239, "grad_norm": 4.922210672212978, "learning_rate": 5.008861840779072e-06, "loss": 16.7652, "step": 28145 }, { "epoch": 0.5144862631838705, "grad_norm": 6.174499497399777, "learning_rate": 5.008565829690652e-06, "loss": 17.2349, "step": 28146 }, { "epoch": 0.514504542380317, "grad_norm": 6.432555622617168, "learning_rate": 5.0082698185722076e-06, "loss": 17.6648, "step": 28147 }, { "epoch": 0.5145228215767634, "grad_norm": 6.826354523350087, "learning_rate": 5.007973807424778e-06, "loss": 17.4025, "step": 28148 }, { "epoch": 0.51454110077321, "grad_norm": 5.885592308033598, "learning_rate": 5.007677796249402e-06, "loss": 17.3885, "step": 28149 }, { "epoch": 0.5145593799696565, "grad_norm": 5.243712760308505, "learning_rate": 5.007381785047116e-06, "loss": 17.0312, "step": 28150 }, { "epoch": 0.5145776591661031, "grad_norm": 6.797253314513713, "learning_rate": 5.007085773818958e-06, "loss": 17.6553, "step": 28151 }, { "epoch": 0.5145959383625496, "grad_norm": 5.585097237420028, "learning_rate": 5.006789762565964e-06, "loss": 17.2602, "step": 28152 }, { "epoch": 0.5146142175589961, "grad_norm": 6.464299392483597, "learning_rate": 5.006493751289172e-06, "loss": 17.3206, "step": 28153 }, { "epoch": 0.5146324967554426, "grad_norm": 5.9594289792473, "learning_rate": 5.006197739989621e-06, "loss": 17.4366, "step": 28154 }, { "epoch": 0.5146507759518891, "grad_norm": 5.19228241604465, "learning_rate": 5.005901728668346e-06, "loss": 16.9556, "step": 28155 }, { "epoch": 0.5146690551483357, "grad_norm": 5.6669730647734475, "learning_rate": 5.005605717326388e-06, "loss": 17.3045, "step": 28156 }, { "epoch": 0.5146873343447822, "grad_norm": 4.970170824432851, "learning_rate": 5.005309705964782e-06, "loss": 16.9029, "step": 28157 }, { "epoch": 0.5147056135412287, "grad_norm": 6.8288527551437115, "learning_rate": 5.005013694584565e-06, "loss": 17.6727, "step": 28158 }, { "epoch": 0.5147238927376753, "grad_norm": 6.65212379126175, "learning_rate": 5.004717683186775e-06, "loss": 17.6678, "step": 28159 }, { "epoch": 0.5147421719341218, "grad_norm": 6.49324925634759, "learning_rate": 5.004421671772453e-06, "loss": 17.4102, "step": 28160 }, { "epoch": 0.5147604511305683, "grad_norm": 6.6435775706349505, "learning_rate": 5.00412566034263e-06, "loss": 17.6286, "step": 28161 }, { "epoch": 0.5147787303270148, "grad_norm": 5.318601132790432, "learning_rate": 5.003829648898347e-06, "loss": 17.1369, "step": 28162 }, { "epoch": 0.5147970095234613, "grad_norm": 5.851364989000181, "learning_rate": 5.003533637440643e-06, "loss": 17.1015, "step": 28163 }, { "epoch": 0.5148152887199079, "grad_norm": 5.97345678470819, "learning_rate": 5.003237625970554e-06, "loss": 17.3515, "step": 28164 }, { "epoch": 0.5148335679163544, "grad_norm": 4.319079791050368, "learning_rate": 5.002941614489117e-06, "loss": 16.7136, "step": 28165 }, { "epoch": 0.514851847112801, "grad_norm": 6.030998116732727, "learning_rate": 5.0026456029973705e-06, "loss": 17.2514, "step": 28166 }, { "epoch": 0.5148701263092474, "grad_norm": 7.039669797362035, "learning_rate": 5.002349591496349e-06, "loss": 17.6927, "step": 28167 }, { "epoch": 0.5148884055056939, "grad_norm": 7.865756664491041, "learning_rate": 5.002053579987095e-06, "loss": 17.9537, "step": 28168 }, { "epoch": 0.5149066847021405, "grad_norm": 6.962113674781745, "learning_rate": 5.001757568470642e-06, "loss": 17.5078, "step": 28169 }, { "epoch": 0.514924963898587, "grad_norm": 6.504396415023693, "learning_rate": 5.00146155694803e-06, "loss": 17.4157, "step": 28170 }, { "epoch": 0.5149432430950336, "grad_norm": 5.863818599382739, "learning_rate": 5.001165545420293e-06, "loss": 17.2288, "step": 28171 }, { "epoch": 0.5149615222914801, "grad_norm": 6.319205856870767, "learning_rate": 5.0008695338884725e-06, "loss": 17.4342, "step": 28172 }, { "epoch": 0.5149798014879265, "grad_norm": 5.943042318046097, "learning_rate": 5.000573522353604e-06, "loss": 17.356, "step": 28173 }, { "epoch": 0.5149980806843731, "grad_norm": 6.129856488068772, "learning_rate": 5.000277510816728e-06, "loss": 17.3474, "step": 28174 }, { "epoch": 0.5150163598808196, "grad_norm": 5.945116040970834, "learning_rate": 4.999981499278876e-06, "loss": 17.1864, "step": 28175 }, { "epoch": 0.5150346390772662, "grad_norm": 8.265559736040654, "learning_rate": 4.9996854877410905e-06, "loss": 17.8946, "step": 28176 }, { "epoch": 0.5150529182737127, "grad_norm": 6.437334872176177, "learning_rate": 4.999389476204406e-06, "loss": 17.5352, "step": 28177 }, { "epoch": 0.5150711974701592, "grad_norm": 5.419875494669513, "learning_rate": 4.999093464669863e-06, "loss": 16.9175, "step": 28178 }, { "epoch": 0.5150894766666058, "grad_norm": 6.770252063586818, "learning_rate": 4.998797453138496e-06, "loss": 17.6959, "step": 28179 }, { "epoch": 0.5151077558630522, "grad_norm": 5.81401931216123, "learning_rate": 4.998501441611343e-06, "loss": 17.1223, "step": 28180 }, { "epoch": 0.5151260350594988, "grad_norm": 6.229354007716394, "learning_rate": 4.998205430089445e-06, "loss": 17.4315, "step": 28181 }, { "epoch": 0.5151443142559453, "grad_norm": 6.00189037519563, "learning_rate": 4.9979094185738344e-06, "loss": 17.3048, "step": 28182 }, { "epoch": 0.5151625934523918, "grad_norm": 6.072229788591644, "learning_rate": 4.997613407065552e-06, "loss": 17.2411, "step": 28183 }, { "epoch": 0.5151808726488384, "grad_norm": 4.97116775481753, "learning_rate": 4.997317395565635e-06, "loss": 16.9966, "step": 28184 }, { "epoch": 0.5151991518452849, "grad_norm": 5.731876597442955, "learning_rate": 4.9970213840751185e-06, "loss": 17.3688, "step": 28185 }, { "epoch": 0.5152174310417315, "grad_norm": 6.139715199829413, "learning_rate": 4.996725372595044e-06, "loss": 17.1598, "step": 28186 }, { "epoch": 0.5152357102381779, "grad_norm": 6.436362192283496, "learning_rate": 4.996429361126447e-06, "loss": 17.4544, "step": 28187 }, { "epoch": 0.5152539894346244, "grad_norm": 7.375691130758968, "learning_rate": 4.996133349670362e-06, "loss": 17.7593, "step": 28188 }, { "epoch": 0.515272268631071, "grad_norm": 6.085832382220902, "learning_rate": 4.995837338227832e-06, "loss": 17.4433, "step": 28189 }, { "epoch": 0.5152905478275175, "grad_norm": 6.663826732315967, "learning_rate": 4.9955413267998905e-06, "loss": 17.5097, "step": 28190 }, { "epoch": 0.5153088270239641, "grad_norm": 5.015545658554212, "learning_rate": 4.995245315387575e-06, "loss": 16.7537, "step": 28191 }, { "epoch": 0.5153271062204106, "grad_norm": 5.838505174851261, "learning_rate": 4.994949303991928e-06, "loss": 17.252, "step": 28192 }, { "epoch": 0.515345385416857, "grad_norm": 8.249928955669725, "learning_rate": 4.9946532926139805e-06, "loss": 18.2775, "step": 28193 }, { "epoch": 0.5153636646133036, "grad_norm": 6.7158528747227955, "learning_rate": 4.994357281254772e-06, "loss": 17.6138, "step": 28194 }, { "epoch": 0.5153819438097501, "grad_norm": 5.824144166676193, "learning_rate": 4.994061269915343e-06, "loss": 17.2477, "step": 28195 }, { "epoch": 0.5154002230061967, "grad_norm": 5.463913434287644, "learning_rate": 4.993765258596728e-06, "loss": 17.3145, "step": 28196 }, { "epoch": 0.5154185022026432, "grad_norm": 6.420409486988014, "learning_rate": 4.993469247299964e-06, "loss": 17.47, "step": 28197 }, { "epoch": 0.5154367813990897, "grad_norm": 5.579108004881442, "learning_rate": 4.993173236026091e-06, "loss": 17.1566, "step": 28198 }, { "epoch": 0.5154550605955363, "grad_norm": 6.457916538417718, "learning_rate": 4.9928772247761435e-06, "loss": 17.6163, "step": 28199 }, { "epoch": 0.5154733397919827, "grad_norm": 6.571864744552564, "learning_rate": 4.992581213551163e-06, "loss": 17.651, "step": 28200 }, { "epoch": 0.5154916189884293, "grad_norm": 6.43539111382244, "learning_rate": 4.992285202352184e-06, "loss": 17.5781, "step": 28201 }, { "epoch": 0.5155098981848758, "grad_norm": 5.1435792108081895, "learning_rate": 4.9919891911802445e-06, "loss": 17.0905, "step": 28202 }, { "epoch": 0.5155281773813223, "grad_norm": 7.134187654591497, "learning_rate": 4.991693180036382e-06, "loss": 17.9438, "step": 28203 }, { "epoch": 0.5155464565777689, "grad_norm": 6.490637936095741, "learning_rate": 4.9913971689216355e-06, "loss": 17.351, "step": 28204 }, { "epoch": 0.5155647357742154, "grad_norm": 7.956565135064702, "learning_rate": 4.991101157837038e-06, "loss": 17.9674, "step": 28205 }, { "epoch": 0.5155830149706618, "grad_norm": 6.678808826906036, "learning_rate": 4.990805146783633e-06, "loss": 17.649, "step": 28206 }, { "epoch": 0.5156012941671084, "grad_norm": 6.412975551902454, "learning_rate": 4.990509135762455e-06, "loss": 17.5338, "step": 28207 }, { "epoch": 0.5156195733635549, "grad_norm": 8.295048085068535, "learning_rate": 4.9902131247745395e-06, "loss": 17.9047, "step": 28208 }, { "epoch": 0.5156378525600015, "grad_norm": 6.91658108633665, "learning_rate": 4.989917113820928e-06, "loss": 17.6383, "step": 28209 }, { "epoch": 0.515656131756448, "grad_norm": 5.576102802165591, "learning_rate": 4.989621102902658e-06, "loss": 17.3871, "step": 28210 }, { "epoch": 0.5156744109528945, "grad_norm": 8.235191717878612, "learning_rate": 4.9893250920207606e-06, "loss": 18.031, "step": 28211 }, { "epoch": 0.515692690149341, "grad_norm": 5.481930958457845, "learning_rate": 4.98902908117628e-06, "loss": 17.0455, "step": 28212 }, { "epoch": 0.5157109693457875, "grad_norm": 6.837430098374462, "learning_rate": 4.988733070370251e-06, "loss": 17.7349, "step": 28213 }, { "epoch": 0.5157292485422341, "grad_norm": 7.129773801985469, "learning_rate": 4.988437059603713e-06, "loss": 17.821, "step": 28214 }, { "epoch": 0.5157475277386806, "grad_norm": 8.401615798316545, "learning_rate": 4.988141048877703e-06, "loss": 18.8932, "step": 28215 }, { "epoch": 0.5157658069351271, "grad_norm": 6.682211248851416, "learning_rate": 4.987845038193254e-06, "loss": 17.6356, "step": 28216 }, { "epoch": 0.5157840861315737, "grad_norm": 6.583499921389675, "learning_rate": 4.987549027551409e-06, "loss": 17.3783, "step": 28217 }, { "epoch": 0.5158023653280202, "grad_norm": 5.521369248951098, "learning_rate": 4.987253016953205e-06, "loss": 17.1592, "step": 28218 }, { "epoch": 0.5158206445244667, "grad_norm": 6.762331827717847, "learning_rate": 4.986957006399675e-06, "loss": 17.2717, "step": 28219 }, { "epoch": 0.5158389237209132, "grad_norm": 6.7441140402099, "learning_rate": 4.986660995891862e-06, "loss": 17.633, "step": 28220 }, { "epoch": 0.5158572029173597, "grad_norm": 7.605188296975337, "learning_rate": 4.986364985430801e-06, "loss": 17.9664, "step": 28221 }, { "epoch": 0.5158754821138063, "grad_norm": 5.954533950344529, "learning_rate": 4.986068975017527e-06, "loss": 17.1513, "step": 28222 }, { "epoch": 0.5158937613102528, "grad_norm": 5.19384164719807, "learning_rate": 4.985772964653083e-06, "loss": 17.0377, "step": 28223 }, { "epoch": 0.5159120405066994, "grad_norm": 5.709840000751817, "learning_rate": 4.985476954338504e-06, "loss": 17.0539, "step": 28224 }, { "epoch": 0.5159303197031458, "grad_norm": 5.467003263646111, "learning_rate": 4.985180944074824e-06, "loss": 17.1586, "step": 28225 }, { "epoch": 0.5159485988995923, "grad_norm": 6.818836571595896, "learning_rate": 4.984884933863085e-06, "loss": 17.7478, "step": 28226 }, { "epoch": 0.5159668780960389, "grad_norm": 6.905771052252174, "learning_rate": 4.984588923704323e-06, "loss": 18.1019, "step": 28227 }, { "epoch": 0.5159851572924854, "grad_norm": 5.661404374163345, "learning_rate": 4.984292913599575e-06, "loss": 17.0274, "step": 28228 }, { "epoch": 0.516003436488932, "grad_norm": 7.014521349377153, "learning_rate": 4.983996903549881e-06, "loss": 17.7912, "step": 28229 }, { "epoch": 0.5160217156853785, "grad_norm": 6.232023919122138, "learning_rate": 4.983700893556273e-06, "loss": 17.6686, "step": 28230 }, { "epoch": 0.516039994881825, "grad_norm": 7.890911013477252, "learning_rate": 4.983404883619794e-06, "loss": 17.769, "step": 28231 }, { "epoch": 0.5160582740782715, "grad_norm": 6.872587574323388, "learning_rate": 4.98310887374148e-06, "loss": 17.5923, "step": 28232 }, { "epoch": 0.516076553274718, "grad_norm": 5.201490169639113, "learning_rate": 4.982812863922366e-06, "loss": 17.0947, "step": 28233 }, { "epoch": 0.5160948324711646, "grad_norm": 5.763744767295851, "learning_rate": 4.982516854163494e-06, "loss": 17.3743, "step": 28234 }, { "epoch": 0.5161131116676111, "grad_norm": 6.914334473628715, "learning_rate": 4.982220844465897e-06, "loss": 18.0751, "step": 28235 }, { "epoch": 0.5161313908640576, "grad_norm": 6.620543844060205, "learning_rate": 4.981924834830614e-06, "loss": 17.7356, "step": 28236 }, { "epoch": 0.5161496700605042, "grad_norm": 6.557555013743278, "learning_rate": 4.9816288252586844e-06, "loss": 17.4938, "step": 28237 }, { "epoch": 0.5161679492569506, "grad_norm": 6.9004998332954015, "learning_rate": 4.981332815751144e-06, "loss": 17.7065, "step": 28238 }, { "epoch": 0.5161862284533972, "grad_norm": 6.3165920019418875, "learning_rate": 4.98103680630903e-06, "loss": 17.3589, "step": 28239 }, { "epoch": 0.5162045076498437, "grad_norm": 6.509015542500228, "learning_rate": 4.98074079693338e-06, "loss": 17.6352, "step": 28240 }, { "epoch": 0.5162227868462902, "grad_norm": 7.113184527318696, "learning_rate": 4.980444787625233e-06, "loss": 17.6092, "step": 28241 }, { "epoch": 0.5162410660427368, "grad_norm": 6.14557656788362, "learning_rate": 4.980148778385623e-06, "loss": 17.1574, "step": 28242 }, { "epoch": 0.5162593452391833, "grad_norm": 7.213181570738609, "learning_rate": 4.9798527692155915e-06, "loss": 18.0375, "step": 28243 }, { "epoch": 0.5162776244356299, "grad_norm": 5.639323384051671, "learning_rate": 4.9795567601161735e-06, "loss": 17.1832, "step": 28244 }, { "epoch": 0.5162959036320763, "grad_norm": 6.137465459690317, "learning_rate": 4.979260751088409e-06, "loss": 17.5277, "step": 28245 }, { "epoch": 0.5163141828285228, "grad_norm": 7.096317856002897, "learning_rate": 4.9789647421333335e-06, "loss": 17.6459, "step": 28246 }, { "epoch": 0.5163324620249694, "grad_norm": 6.791427737996549, "learning_rate": 4.978668733251982e-06, "loss": 17.8373, "step": 28247 }, { "epoch": 0.5163507412214159, "grad_norm": 10.550594804003643, "learning_rate": 4.978372724445397e-06, "loss": 18.8802, "step": 28248 }, { "epoch": 0.5163690204178625, "grad_norm": 5.836382127207683, "learning_rate": 4.978076715714614e-06, "loss": 17.6371, "step": 28249 }, { "epoch": 0.516387299614309, "grad_norm": 6.107905509674293, "learning_rate": 4.977780707060668e-06, "loss": 17.2012, "step": 28250 }, { "epoch": 0.5164055788107554, "grad_norm": 6.893817051738435, "learning_rate": 4.977484698484602e-06, "loss": 17.5091, "step": 28251 }, { "epoch": 0.516423858007202, "grad_norm": 5.507606081739157, "learning_rate": 4.9771886899874485e-06, "loss": 17.087, "step": 28252 }, { "epoch": 0.5164421372036485, "grad_norm": 5.895304799896382, "learning_rate": 4.976892681570246e-06, "loss": 17.3784, "step": 28253 }, { "epoch": 0.5164604164000951, "grad_norm": 6.530205478059318, "learning_rate": 4.9765966732340335e-06, "loss": 17.5561, "step": 28254 }, { "epoch": 0.5164786955965416, "grad_norm": 6.872070323979689, "learning_rate": 4.9763006649798485e-06, "loss": 17.7354, "step": 28255 }, { "epoch": 0.5164969747929881, "grad_norm": 6.304183298980531, "learning_rate": 4.976004656808725e-06, "loss": 17.3701, "step": 28256 }, { "epoch": 0.5165152539894347, "grad_norm": 6.000271605623386, "learning_rate": 4.975708648721705e-06, "loss": 17.3946, "step": 28257 }, { "epoch": 0.5165335331858811, "grad_norm": 6.491954323026376, "learning_rate": 4.975412640719825e-06, "loss": 17.6466, "step": 28258 }, { "epoch": 0.5165518123823277, "grad_norm": 5.061908246575599, "learning_rate": 4.975116632804119e-06, "loss": 16.9296, "step": 28259 }, { "epoch": 0.5165700915787742, "grad_norm": 6.976247778399507, "learning_rate": 4.974820624975629e-06, "loss": 17.6008, "step": 28260 }, { "epoch": 0.5165883707752207, "grad_norm": 6.742763888324338, "learning_rate": 4.974524617235389e-06, "loss": 17.6102, "step": 28261 }, { "epoch": 0.5166066499716673, "grad_norm": 6.6387729698208275, "learning_rate": 4.974228609584438e-06, "loss": 17.6382, "step": 28262 }, { "epoch": 0.5166249291681138, "grad_norm": 6.2449695980180415, "learning_rate": 4.973932602023816e-06, "loss": 17.5073, "step": 28263 }, { "epoch": 0.5166432083645603, "grad_norm": 6.450636685086195, "learning_rate": 4.973636594554555e-06, "loss": 17.3292, "step": 28264 }, { "epoch": 0.5166614875610068, "grad_norm": 7.9278652694344975, "learning_rate": 4.973340587177698e-06, "loss": 18.0148, "step": 28265 }, { "epoch": 0.5166797667574533, "grad_norm": 7.312880169912325, "learning_rate": 4.9730445798942784e-06, "loss": 17.8211, "step": 28266 }, { "epoch": 0.5166980459538999, "grad_norm": 5.976868121343258, "learning_rate": 4.972748572705334e-06, "loss": 17.2071, "step": 28267 }, { "epoch": 0.5167163251503464, "grad_norm": 6.468444778454015, "learning_rate": 4.972452565611906e-06, "loss": 17.235, "step": 28268 }, { "epoch": 0.516734604346793, "grad_norm": 5.1413800935468235, "learning_rate": 4.9721565586150295e-06, "loss": 16.8773, "step": 28269 }, { "epoch": 0.5167528835432394, "grad_norm": 7.270722378199894, "learning_rate": 4.971860551715739e-06, "loss": 17.472, "step": 28270 }, { "epoch": 0.5167711627396859, "grad_norm": 7.904071788550602, "learning_rate": 4.971564544915077e-06, "loss": 18.0134, "step": 28271 }, { "epoch": 0.5167894419361325, "grad_norm": 5.7469197328987995, "learning_rate": 4.971268538214079e-06, "loss": 17.2567, "step": 28272 }, { "epoch": 0.516807721132579, "grad_norm": 5.030550165373328, "learning_rate": 4.97097253161378e-06, "loss": 16.8883, "step": 28273 }, { "epoch": 0.5168260003290255, "grad_norm": 6.871964276897015, "learning_rate": 4.970676525115223e-06, "loss": 17.7691, "step": 28274 }, { "epoch": 0.5168442795254721, "grad_norm": 4.606991548838814, "learning_rate": 4.970380518719439e-06, "loss": 16.7543, "step": 28275 }, { "epoch": 0.5168625587219186, "grad_norm": 5.56013897016377, "learning_rate": 4.97008451242747e-06, "loss": 17.3125, "step": 28276 }, { "epoch": 0.5168808379183651, "grad_norm": 6.780150570995449, "learning_rate": 4.969788506240354e-06, "loss": 17.4703, "step": 28277 }, { "epoch": 0.5168991171148116, "grad_norm": 6.311010318448981, "learning_rate": 4.9694925001591235e-06, "loss": 17.3542, "step": 28278 }, { "epoch": 0.5169173963112581, "grad_norm": 6.854886493991408, "learning_rate": 4.969196494184822e-06, "loss": 17.5825, "step": 28279 }, { "epoch": 0.5169356755077047, "grad_norm": 8.301282717597386, "learning_rate": 4.968900488318483e-06, "loss": 18.3161, "step": 28280 }, { "epoch": 0.5169539547041512, "grad_norm": 6.235898453284987, "learning_rate": 4.968604482561143e-06, "loss": 17.5212, "step": 28281 }, { "epoch": 0.5169722339005978, "grad_norm": 6.341026571008973, "learning_rate": 4.968308476913845e-06, "loss": 17.5553, "step": 28282 }, { "epoch": 0.5169905130970442, "grad_norm": 7.778073061671004, "learning_rate": 4.968012471377623e-06, "loss": 18.2273, "step": 28283 }, { "epoch": 0.5170087922934907, "grad_norm": 5.256761677275139, "learning_rate": 4.967716465953512e-06, "loss": 17.1107, "step": 28284 }, { "epoch": 0.5170270714899373, "grad_norm": 5.232657715473453, "learning_rate": 4.967420460642553e-06, "loss": 17.0836, "step": 28285 }, { "epoch": 0.5170453506863838, "grad_norm": 6.372387967753305, "learning_rate": 4.967124455445783e-06, "loss": 17.3634, "step": 28286 }, { "epoch": 0.5170636298828304, "grad_norm": 7.721831486024882, "learning_rate": 4.966828450364238e-06, "loss": 17.7065, "step": 28287 }, { "epoch": 0.5170819090792769, "grad_norm": 6.690076463553113, "learning_rate": 4.966532445398958e-06, "loss": 17.6673, "step": 28288 }, { "epoch": 0.5171001882757233, "grad_norm": 7.281341350508655, "learning_rate": 4.966236440550977e-06, "loss": 17.7145, "step": 28289 }, { "epoch": 0.5171184674721699, "grad_norm": 5.559529328863653, "learning_rate": 4.965940435821334e-06, "loss": 17.0352, "step": 28290 }, { "epoch": 0.5171367466686164, "grad_norm": 6.753979170901499, "learning_rate": 4.965644431211069e-06, "loss": 17.2227, "step": 28291 }, { "epoch": 0.517155025865063, "grad_norm": 6.501264889097688, "learning_rate": 4.9653484267212145e-06, "loss": 17.5962, "step": 28292 }, { "epoch": 0.5171733050615095, "grad_norm": 8.685964218239112, "learning_rate": 4.965052422352814e-06, "loss": 18.3715, "step": 28293 }, { "epoch": 0.517191584257956, "grad_norm": 6.520037121411414, "learning_rate": 4.9647564181069e-06, "loss": 17.9027, "step": 28294 }, { "epoch": 0.5172098634544026, "grad_norm": 5.006900722549799, "learning_rate": 4.9644604139845106e-06, "loss": 17.0317, "step": 28295 }, { "epoch": 0.517228142650849, "grad_norm": 6.79245027887876, "learning_rate": 4.964164409986687e-06, "loss": 17.8067, "step": 28296 }, { "epoch": 0.5172464218472956, "grad_norm": 6.425147652690195, "learning_rate": 4.963868406114463e-06, "loss": 17.0715, "step": 28297 }, { "epoch": 0.5172647010437421, "grad_norm": 7.592396400924471, "learning_rate": 4.963572402368877e-06, "loss": 17.6252, "step": 28298 }, { "epoch": 0.5172829802401886, "grad_norm": 4.9459803310968, "learning_rate": 4.9632763987509656e-06, "loss": 16.8748, "step": 28299 }, { "epoch": 0.5173012594366352, "grad_norm": 5.637228125645112, "learning_rate": 4.962980395261769e-06, "loss": 17.0424, "step": 28300 }, { "epoch": 0.5173195386330817, "grad_norm": 6.5955010881627985, "learning_rate": 4.96268439190232e-06, "loss": 17.5066, "step": 28301 }, { "epoch": 0.5173378178295283, "grad_norm": 7.152268273930854, "learning_rate": 4.962388388673661e-06, "loss": 17.9691, "step": 28302 }, { "epoch": 0.5173560970259747, "grad_norm": 6.301281888975413, "learning_rate": 4.962092385576828e-06, "loss": 17.3616, "step": 28303 }, { "epoch": 0.5173743762224212, "grad_norm": 7.623465819388862, "learning_rate": 4.961796382612857e-06, "loss": 18.2055, "step": 28304 }, { "epoch": 0.5173926554188678, "grad_norm": 6.844232173095886, "learning_rate": 4.961500379782787e-06, "loss": 17.559, "step": 28305 }, { "epoch": 0.5174109346153143, "grad_norm": 5.103228395683297, "learning_rate": 4.961204377087654e-06, "loss": 17.0642, "step": 28306 }, { "epoch": 0.5174292138117609, "grad_norm": 5.862556345814915, "learning_rate": 4.9609083745284955e-06, "loss": 17.403, "step": 28307 }, { "epoch": 0.5174474930082074, "grad_norm": 6.84776664844964, "learning_rate": 4.960612372106352e-06, "loss": 17.7825, "step": 28308 }, { "epoch": 0.5174657722046538, "grad_norm": 7.289848890162367, "learning_rate": 4.9603163698222565e-06, "loss": 17.8208, "step": 28309 }, { "epoch": 0.5174840514011004, "grad_norm": 5.779740391739088, "learning_rate": 4.960020367677251e-06, "loss": 17.2748, "step": 28310 }, { "epoch": 0.5175023305975469, "grad_norm": 6.937638054976089, "learning_rate": 4.959724365672369e-06, "loss": 17.5835, "step": 28311 }, { "epoch": 0.5175206097939935, "grad_norm": 5.821591776952739, "learning_rate": 4.95942836380865e-06, "loss": 17.0297, "step": 28312 }, { "epoch": 0.51753888899044, "grad_norm": 7.870391038015023, "learning_rate": 4.959132362087131e-06, "loss": 17.5907, "step": 28313 }, { "epoch": 0.5175571681868865, "grad_norm": 5.963559402433707, "learning_rate": 4.958836360508851e-06, "loss": 17.0072, "step": 28314 }, { "epoch": 0.517575447383333, "grad_norm": 6.138604551563362, "learning_rate": 4.958540359074843e-06, "loss": 17.4719, "step": 28315 }, { "epoch": 0.5175937265797795, "grad_norm": 5.55085953010945, "learning_rate": 4.958244357786149e-06, "loss": 17.0418, "step": 28316 }, { "epoch": 0.5176120057762261, "grad_norm": 6.216162786051773, "learning_rate": 4.957948356643806e-06, "loss": 17.5679, "step": 28317 }, { "epoch": 0.5176302849726726, "grad_norm": 6.1428693500892, "learning_rate": 4.9576523556488485e-06, "loss": 17.5301, "step": 28318 }, { "epoch": 0.5176485641691191, "grad_norm": 5.828606178745438, "learning_rate": 4.957356354802318e-06, "loss": 17.1704, "step": 28319 }, { "epoch": 0.5176668433655657, "grad_norm": 7.199583396032562, "learning_rate": 4.957060354105247e-06, "loss": 17.832, "step": 28320 }, { "epoch": 0.5176851225620122, "grad_norm": 6.743918145778942, "learning_rate": 4.956764353558677e-06, "loss": 17.7326, "step": 28321 }, { "epoch": 0.5177034017584587, "grad_norm": 6.830053726994922, "learning_rate": 4.956468353163646e-06, "loss": 17.6931, "step": 28322 }, { "epoch": 0.5177216809549052, "grad_norm": 5.917704872181462, "learning_rate": 4.956172352921186e-06, "loss": 17.2226, "step": 28323 }, { "epoch": 0.5177399601513517, "grad_norm": 5.256594909885979, "learning_rate": 4.955876352832342e-06, "loss": 17.0001, "step": 28324 }, { "epoch": 0.5177582393477983, "grad_norm": 5.406433740918442, "learning_rate": 4.955580352898145e-06, "loss": 17.2431, "step": 28325 }, { "epoch": 0.5177765185442448, "grad_norm": 5.8918516094395965, "learning_rate": 4.955284353119635e-06, "loss": 17.2941, "step": 28326 }, { "epoch": 0.5177947977406914, "grad_norm": 5.17406305535719, "learning_rate": 4.954988353497851e-06, "loss": 16.8681, "step": 28327 }, { "epoch": 0.5178130769371379, "grad_norm": 5.742955895787697, "learning_rate": 4.954692354033829e-06, "loss": 17.2397, "step": 28328 }, { "epoch": 0.5178313561335843, "grad_norm": 7.038128444420002, "learning_rate": 4.954396354728604e-06, "loss": 17.6647, "step": 28329 }, { "epoch": 0.5178496353300309, "grad_norm": 7.029265376873107, "learning_rate": 4.954100355583217e-06, "loss": 18.0109, "step": 28330 }, { "epoch": 0.5178679145264774, "grad_norm": 5.6742373272957884, "learning_rate": 4.953804356598706e-06, "loss": 17.1585, "step": 28331 }, { "epoch": 0.517886193722924, "grad_norm": 5.766574209184498, "learning_rate": 4.953508357776104e-06, "loss": 17.4039, "step": 28332 }, { "epoch": 0.5179044729193705, "grad_norm": 6.3053895933968525, "learning_rate": 4.953212359116453e-06, "loss": 17.4309, "step": 28333 }, { "epoch": 0.517922752115817, "grad_norm": 7.950056890711074, "learning_rate": 4.9529163606207884e-06, "loss": 17.8308, "step": 28334 }, { "epoch": 0.5179410313122635, "grad_norm": 7.35631073458575, "learning_rate": 4.952620362290146e-06, "loss": 17.8463, "step": 28335 }, { "epoch": 0.51795931050871, "grad_norm": 5.043105007380679, "learning_rate": 4.952324364125567e-06, "loss": 16.6986, "step": 28336 }, { "epoch": 0.5179775897051566, "grad_norm": 4.6970956307817815, "learning_rate": 4.952028366128086e-06, "loss": 16.8207, "step": 28337 }, { "epoch": 0.5179958689016031, "grad_norm": 7.161634770000542, "learning_rate": 4.951732368298743e-06, "loss": 17.5895, "step": 28338 }, { "epoch": 0.5180141480980496, "grad_norm": 6.896067591252059, "learning_rate": 4.951436370638572e-06, "loss": 17.2049, "step": 28339 }, { "epoch": 0.5180324272944962, "grad_norm": 5.907005539784071, "learning_rate": 4.951140373148613e-06, "loss": 17.4387, "step": 28340 }, { "epoch": 0.5180507064909426, "grad_norm": 6.669907739938093, "learning_rate": 4.950844375829903e-06, "loss": 17.532, "step": 28341 }, { "epoch": 0.5180689856873891, "grad_norm": 6.386297552721186, "learning_rate": 4.9505483786834804e-06, "loss": 17.4779, "step": 28342 }, { "epoch": 0.5180872648838357, "grad_norm": 6.555596657950641, "learning_rate": 4.950252381710379e-06, "loss": 17.1734, "step": 28343 }, { "epoch": 0.5181055440802822, "grad_norm": 8.248701041644166, "learning_rate": 4.94995638491164e-06, "loss": 18.5298, "step": 28344 }, { "epoch": 0.5181238232767288, "grad_norm": 5.926592105592358, "learning_rate": 4.9496603882883005e-06, "loss": 17.32, "step": 28345 }, { "epoch": 0.5181421024731753, "grad_norm": 7.876904087450707, "learning_rate": 4.949364391841395e-06, "loss": 17.803, "step": 28346 }, { "epoch": 0.5181603816696218, "grad_norm": 8.87604005395616, "learning_rate": 4.9490683955719645e-06, "loss": 18.6382, "step": 28347 }, { "epoch": 0.5181786608660683, "grad_norm": 6.327237224683834, "learning_rate": 4.948772399481044e-06, "loss": 17.1739, "step": 28348 }, { "epoch": 0.5181969400625148, "grad_norm": 7.09737098314194, "learning_rate": 4.9484764035696705e-06, "loss": 17.6734, "step": 28349 }, { "epoch": 0.5182152192589614, "grad_norm": 12.50251472700511, "learning_rate": 4.9481804078388854e-06, "loss": 17.6391, "step": 28350 }, { "epoch": 0.5182334984554079, "grad_norm": 5.556826248447641, "learning_rate": 4.94788441228972e-06, "loss": 17.31, "step": 28351 }, { "epoch": 0.5182517776518544, "grad_norm": 6.169112154080561, "learning_rate": 4.9475884169232195e-06, "loss": 17.3714, "step": 28352 }, { "epoch": 0.518270056848301, "grad_norm": 6.149369658046421, "learning_rate": 4.947292421740415e-06, "loss": 17.459, "step": 28353 }, { "epoch": 0.5182883360447474, "grad_norm": 7.25027977926675, "learning_rate": 4.9469964267423445e-06, "loss": 17.6783, "step": 28354 }, { "epoch": 0.518306615241194, "grad_norm": 5.705919113560786, "learning_rate": 4.946700431930049e-06, "loss": 17.2594, "step": 28355 }, { "epoch": 0.5183248944376405, "grad_norm": 7.103057322642434, "learning_rate": 4.946404437304565e-06, "loss": 17.7569, "step": 28356 }, { "epoch": 0.518343173634087, "grad_norm": 6.324290224428657, "learning_rate": 4.946108442866925e-06, "loss": 17.4654, "step": 28357 }, { "epoch": 0.5183614528305336, "grad_norm": 5.6742389865285245, "learning_rate": 4.945812448618173e-06, "loss": 17.2231, "step": 28358 }, { "epoch": 0.5183797320269801, "grad_norm": 6.67351229223035, "learning_rate": 4.945516454559343e-06, "loss": 17.6726, "step": 28359 }, { "epoch": 0.5183980112234267, "grad_norm": 5.985127165617485, "learning_rate": 4.945220460691473e-06, "loss": 17.2501, "step": 28360 }, { "epoch": 0.5184162904198731, "grad_norm": 6.790971164288386, "learning_rate": 4.944924467015601e-06, "loss": 17.6443, "step": 28361 }, { "epoch": 0.5184345696163196, "grad_norm": 5.156384856233661, "learning_rate": 4.944628473532763e-06, "loss": 17.082, "step": 28362 }, { "epoch": 0.5184528488127662, "grad_norm": 5.804276280583565, "learning_rate": 4.9443324802439975e-06, "loss": 17.3728, "step": 28363 }, { "epoch": 0.5184711280092127, "grad_norm": 6.887342527283665, "learning_rate": 4.944036487150343e-06, "loss": 18.011, "step": 28364 }, { "epoch": 0.5184894072056593, "grad_norm": 4.909795921075503, "learning_rate": 4.943740494252835e-06, "loss": 16.7328, "step": 28365 }, { "epoch": 0.5185076864021058, "grad_norm": 5.793876673440153, "learning_rate": 4.94344450155251e-06, "loss": 17.0952, "step": 28366 }, { "epoch": 0.5185259655985522, "grad_norm": 5.643399042905578, "learning_rate": 4.94314850905041e-06, "loss": 17.1263, "step": 28367 }, { "epoch": 0.5185442447949988, "grad_norm": 6.337080276201196, "learning_rate": 4.942852516747567e-06, "loss": 17.6168, "step": 28368 }, { "epoch": 0.5185625239914453, "grad_norm": 4.43384728185298, "learning_rate": 4.942556524645023e-06, "loss": 16.618, "step": 28369 }, { "epoch": 0.5185808031878919, "grad_norm": 6.31907727904341, "learning_rate": 4.942260532743813e-06, "loss": 17.5743, "step": 28370 }, { "epoch": 0.5185990823843384, "grad_norm": 6.269384186412205, "learning_rate": 4.9419645410449735e-06, "loss": 17.3451, "step": 28371 }, { "epoch": 0.5186173615807849, "grad_norm": 6.723354654080379, "learning_rate": 4.9416685495495454e-06, "loss": 17.5516, "step": 28372 }, { "epoch": 0.5186356407772315, "grad_norm": 5.644872574713539, "learning_rate": 4.941372558258564e-06, "loss": 17.238, "step": 28373 }, { "epoch": 0.5186539199736779, "grad_norm": 8.335516295278675, "learning_rate": 4.941076567173064e-06, "loss": 17.8968, "step": 28374 }, { "epoch": 0.5186721991701245, "grad_norm": 7.392830132433686, "learning_rate": 4.940780576294087e-06, "loss": 17.701, "step": 28375 }, { "epoch": 0.518690478366571, "grad_norm": 5.7589566603408615, "learning_rate": 4.94048458562267e-06, "loss": 17.3986, "step": 28376 }, { "epoch": 0.5187087575630175, "grad_norm": 6.828373995474195, "learning_rate": 4.940188595159848e-06, "loss": 17.8689, "step": 28377 }, { "epoch": 0.5187270367594641, "grad_norm": 6.074302009423401, "learning_rate": 4.939892604906661e-06, "loss": 17.3894, "step": 28378 }, { "epoch": 0.5187453159559106, "grad_norm": 5.395728480787896, "learning_rate": 4.939596614864144e-06, "loss": 17.1342, "step": 28379 }, { "epoch": 0.5187635951523571, "grad_norm": 7.098541162613564, "learning_rate": 4.9393006250333345e-06, "loss": 17.6606, "step": 28380 }, { "epoch": 0.5187818743488036, "grad_norm": 6.19940468455215, "learning_rate": 4.939004635415274e-06, "loss": 17.1753, "step": 28381 }, { "epoch": 0.5188001535452501, "grad_norm": 5.574690490203115, "learning_rate": 4.938708646010994e-06, "loss": 17.4, "step": 28382 }, { "epoch": 0.5188184327416967, "grad_norm": 7.557797584270141, "learning_rate": 4.9384126568215374e-06, "loss": 18.0713, "step": 28383 }, { "epoch": 0.5188367119381432, "grad_norm": 5.5071146457900335, "learning_rate": 4.938116667847938e-06, "loss": 17.1174, "step": 28384 }, { "epoch": 0.5188549911345898, "grad_norm": 6.239175399354861, "learning_rate": 4.937820679091233e-06, "loss": 17.6445, "step": 28385 }, { "epoch": 0.5188732703310363, "grad_norm": 7.55498510327374, "learning_rate": 4.937524690552464e-06, "loss": 17.7631, "step": 28386 }, { "epoch": 0.5188915495274827, "grad_norm": 6.593389107555483, "learning_rate": 4.937228702232665e-06, "loss": 17.4847, "step": 28387 }, { "epoch": 0.5189098287239293, "grad_norm": 7.227918407171986, "learning_rate": 4.9369327141328715e-06, "loss": 18.1397, "step": 28388 }, { "epoch": 0.5189281079203758, "grad_norm": 7.193422353039811, "learning_rate": 4.936636726254125e-06, "loss": 17.5931, "step": 28389 }, { "epoch": 0.5189463871168224, "grad_norm": 6.521969771777463, "learning_rate": 4.936340738597462e-06, "loss": 17.5764, "step": 28390 }, { "epoch": 0.5189646663132689, "grad_norm": 6.147235511859382, "learning_rate": 4.936044751163917e-06, "loss": 17.5021, "step": 28391 }, { "epoch": 0.5189829455097154, "grad_norm": 4.990153548976036, "learning_rate": 4.9357487639545324e-06, "loss": 16.7748, "step": 28392 }, { "epoch": 0.519001224706162, "grad_norm": 5.926882449078139, "learning_rate": 4.935452776970341e-06, "loss": 17.1332, "step": 28393 }, { "epoch": 0.5190195039026084, "grad_norm": 8.228062422646142, "learning_rate": 4.935156790212381e-06, "loss": 18.3323, "step": 28394 }, { "epoch": 0.519037783099055, "grad_norm": 7.999383086747716, "learning_rate": 4.934860803681693e-06, "loss": 18.1071, "step": 28395 }, { "epoch": 0.5190560622955015, "grad_norm": 5.932543343354322, "learning_rate": 4.934564817379312e-06, "loss": 17.1878, "step": 28396 }, { "epoch": 0.519074341491948, "grad_norm": 5.780339840906645, "learning_rate": 4.934268831306274e-06, "loss": 17.0861, "step": 28397 }, { "epoch": 0.5190926206883946, "grad_norm": 5.877545939180377, "learning_rate": 4.9339728454636194e-06, "loss": 17.6133, "step": 28398 }, { "epoch": 0.519110899884841, "grad_norm": 6.929590337227377, "learning_rate": 4.933676859852383e-06, "loss": 18.037, "step": 28399 }, { "epoch": 0.5191291790812876, "grad_norm": 6.550692562746869, "learning_rate": 4.933380874473605e-06, "loss": 17.5974, "step": 28400 }, { "epoch": 0.5191474582777341, "grad_norm": 7.504334160162672, "learning_rate": 4.933084889328322e-06, "loss": 17.6812, "step": 28401 }, { "epoch": 0.5191657374741806, "grad_norm": 6.277629082262044, "learning_rate": 4.932788904417568e-06, "loss": 17.4072, "step": 28402 }, { "epoch": 0.5191840166706272, "grad_norm": 5.911238000245223, "learning_rate": 4.932492919742384e-06, "loss": 17.0953, "step": 28403 }, { "epoch": 0.5192022958670737, "grad_norm": 5.204696671213171, "learning_rate": 4.932196935303808e-06, "loss": 17.13, "step": 28404 }, { "epoch": 0.5192205750635203, "grad_norm": 5.964727338725323, "learning_rate": 4.931900951102873e-06, "loss": 17.5791, "step": 28405 }, { "epoch": 0.5192388542599667, "grad_norm": 6.512404584723284, "learning_rate": 4.931604967140622e-06, "loss": 17.5078, "step": 28406 }, { "epoch": 0.5192571334564132, "grad_norm": 4.910562497753882, "learning_rate": 4.9313089834180885e-06, "loss": 16.6523, "step": 28407 }, { "epoch": 0.5192754126528598, "grad_norm": 5.419494529033941, "learning_rate": 4.9310129999363095e-06, "loss": 17.4113, "step": 28408 }, { "epoch": 0.5192936918493063, "grad_norm": 7.950816500233542, "learning_rate": 4.930717016696327e-06, "loss": 17.9876, "step": 28409 }, { "epoch": 0.5193119710457528, "grad_norm": 5.0251182567233466, "learning_rate": 4.930421033699175e-06, "loss": 16.789, "step": 28410 }, { "epoch": 0.5193302502421994, "grad_norm": 7.994435891045464, "learning_rate": 4.930125050945889e-06, "loss": 18.0434, "step": 28411 }, { "epoch": 0.5193485294386458, "grad_norm": 6.410436598654899, "learning_rate": 4.929829068437509e-06, "loss": 17.6376, "step": 28412 }, { "epoch": 0.5193668086350924, "grad_norm": 5.663290499919785, "learning_rate": 4.929533086175072e-06, "loss": 17.2351, "step": 28413 }, { "epoch": 0.5193850878315389, "grad_norm": 8.776582529292156, "learning_rate": 4.9292371041596175e-06, "loss": 18.4499, "step": 28414 }, { "epoch": 0.5194033670279854, "grad_norm": 7.041122472572309, "learning_rate": 4.928941122392181e-06, "loss": 17.6232, "step": 28415 }, { "epoch": 0.519421646224432, "grad_norm": 5.731336055102409, "learning_rate": 4.928645140873797e-06, "loss": 17.2532, "step": 28416 }, { "epoch": 0.5194399254208785, "grad_norm": 5.400588933769152, "learning_rate": 4.928349159605506e-06, "loss": 17.0085, "step": 28417 }, { "epoch": 0.5194582046173251, "grad_norm": 7.707068384184171, "learning_rate": 4.928053178588347e-06, "loss": 17.8172, "step": 28418 }, { "epoch": 0.5194764838137715, "grad_norm": 7.437717160223488, "learning_rate": 4.9277571978233526e-06, "loss": 17.7059, "step": 28419 }, { "epoch": 0.519494763010218, "grad_norm": 5.959425894062813, "learning_rate": 4.927461217311566e-06, "loss": 17.3244, "step": 28420 }, { "epoch": 0.5195130422066646, "grad_norm": 5.688353393018982, "learning_rate": 4.92716523705402e-06, "loss": 17.136, "step": 28421 }, { "epoch": 0.5195313214031111, "grad_norm": 7.921413404049765, "learning_rate": 4.926869257051752e-06, "loss": 18.2318, "step": 28422 }, { "epoch": 0.5195496005995577, "grad_norm": 6.218524229888885, "learning_rate": 4.926573277305804e-06, "loss": 17.5586, "step": 28423 }, { "epoch": 0.5195678797960042, "grad_norm": 5.7370631366777385, "learning_rate": 4.926277297817209e-06, "loss": 17.2098, "step": 28424 }, { "epoch": 0.5195861589924506, "grad_norm": 6.801426371435264, "learning_rate": 4.925981318587005e-06, "loss": 17.5821, "step": 28425 }, { "epoch": 0.5196044381888972, "grad_norm": 6.884217335038451, "learning_rate": 4.9256853396162304e-06, "loss": 17.5537, "step": 28426 }, { "epoch": 0.5196227173853437, "grad_norm": 6.938959073811555, "learning_rate": 4.925389360905924e-06, "loss": 17.7079, "step": 28427 }, { "epoch": 0.5196409965817903, "grad_norm": 6.3840040994419835, "learning_rate": 4.925093382457118e-06, "loss": 17.6047, "step": 28428 }, { "epoch": 0.5196592757782368, "grad_norm": 5.493911569407914, "learning_rate": 4.924797404270854e-06, "loss": 17.1116, "step": 28429 }, { "epoch": 0.5196775549746833, "grad_norm": 6.806618236692073, "learning_rate": 4.92450142634817e-06, "loss": 17.8762, "step": 28430 }, { "epoch": 0.5196958341711299, "grad_norm": 6.596538830081444, "learning_rate": 4.924205448690101e-06, "loss": 17.4036, "step": 28431 }, { "epoch": 0.5197141133675763, "grad_norm": 6.1625061284571006, "learning_rate": 4.923909471297687e-06, "loss": 17.5077, "step": 28432 }, { "epoch": 0.5197323925640229, "grad_norm": 5.321676442797134, "learning_rate": 4.923613494171962e-06, "loss": 17.0904, "step": 28433 }, { "epoch": 0.5197506717604694, "grad_norm": 13.33556481310796, "learning_rate": 4.923317517313965e-06, "loss": 19.3512, "step": 28434 }, { "epoch": 0.5197689509569159, "grad_norm": 6.584603473348195, "learning_rate": 4.923021540724735e-06, "loss": 17.5803, "step": 28435 }, { "epoch": 0.5197872301533625, "grad_norm": 7.97873784956204, "learning_rate": 4.9227255644053056e-06, "loss": 17.7348, "step": 28436 }, { "epoch": 0.519805509349809, "grad_norm": 5.609333406609034, "learning_rate": 4.9224295883567185e-06, "loss": 17.2715, "step": 28437 }, { "epoch": 0.5198237885462555, "grad_norm": 6.283377946211638, "learning_rate": 4.922133612580009e-06, "loss": 17.4622, "step": 28438 }, { "epoch": 0.519842067742702, "grad_norm": 7.13110081138636, "learning_rate": 4.921837637076212e-06, "loss": 17.4589, "step": 28439 }, { "epoch": 0.5198603469391485, "grad_norm": 8.620049344265254, "learning_rate": 4.921541661846369e-06, "loss": 18.0009, "step": 28440 }, { "epoch": 0.5198786261355951, "grad_norm": 5.458156654294497, "learning_rate": 4.921245686891517e-06, "loss": 17.1189, "step": 28441 }, { "epoch": 0.5198969053320416, "grad_norm": 5.461605257200562, "learning_rate": 4.92094971221269e-06, "loss": 17.2679, "step": 28442 }, { "epoch": 0.5199151845284882, "grad_norm": 6.077526445575021, "learning_rate": 4.920653737810927e-06, "loss": 17.7665, "step": 28443 }, { "epoch": 0.5199334637249347, "grad_norm": 7.17578423592594, "learning_rate": 4.920357763687265e-06, "loss": 17.5324, "step": 28444 }, { "epoch": 0.5199517429213811, "grad_norm": 5.584351044297349, "learning_rate": 4.920061789842745e-06, "loss": 17.3489, "step": 28445 }, { "epoch": 0.5199700221178277, "grad_norm": 6.857244895720754, "learning_rate": 4.9197658162784015e-06, "loss": 17.819, "step": 28446 }, { "epoch": 0.5199883013142742, "grad_norm": 6.717729242088374, "learning_rate": 4.919469842995269e-06, "loss": 17.7314, "step": 28447 }, { "epoch": 0.5200065805107208, "grad_norm": 6.559535453075689, "learning_rate": 4.91917386999439e-06, "loss": 17.2234, "step": 28448 }, { "epoch": 0.5200248597071673, "grad_norm": 7.06374343637183, "learning_rate": 4.9188778972767996e-06, "loss": 17.3341, "step": 28449 }, { "epoch": 0.5200431389036138, "grad_norm": 5.7664473779447, "learning_rate": 4.918581924843534e-06, "loss": 17.1368, "step": 28450 }, { "epoch": 0.5200614181000603, "grad_norm": 5.871158813757223, "learning_rate": 4.9182859526956324e-06, "loss": 17.2258, "step": 28451 }, { "epoch": 0.5200796972965068, "grad_norm": 5.98428605017562, "learning_rate": 4.917989980834132e-06, "loss": 17.5082, "step": 28452 }, { "epoch": 0.5200979764929534, "grad_norm": 6.733955259138203, "learning_rate": 4.917694009260067e-06, "loss": 17.4531, "step": 28453 }, { "epoch": 0.5201162556893999, "grad_norm": 6.591630512321672, "learning_rate": 4.91739803797448e-06, "loss": 17.7313, "step": 28454 }, { "epoch": 0.5201345348858464, "grad_norm": 7.072901599462475, "learning_rate": 4.9171020669784065e-06, "loss": 17.7049, "step": 28455 }, { "epoch": 0.520152814082293, "grad_norm": 5.093333802295993, "learning_rate": 4.9168060962728795e-06, "loss": 17.0145, "step": 28456 }, { "epoch": 0.5201710932787394, "grad_norm": 6.398399630976842, "learning_rate": 4.916510125858942e-06, "loss": 17.7248, "step": 28457 }, { "epoch": 0.520189372475186, "grad_norm": 7.420138996746665, "learning_rate": 4.91621415573763e-06, "loss": 17.7913, "step": 28458 }, { "epoch": 0.5202076516716325, "grad_norm": 6.666231696793718, "learning_rate": 4.915918185909978e-06, "loss": 17.4272, "step": 28459 }, { "epoch": 0.520225930868079, "grad_norm": 6.8041289241111835, "learning_rate": 4.915622216377028e-06, "loss": 17.7081, "step": 28460 }, { "epoch": 0.5202442100645256, "grad_norm": 5.951706159791261, "learning_rate": 4.915326247139812e-06, "loss": 17.4126, "step": 28461 }, { "epoch": 0.5202624892609721, "grad_norm": 5.490469716778039, "learning_rate": 4.9150302781993715e-06, "loss": 17.169, "step": 28462 }, { "epoch": 0.5202807684574187, "grad_norm": 7.478933155531026, "learning_rate": 4.914734309556744e-06, "loss": 17.8273, "step": 28463 }, { "epoch": 0.5202990476538651, "grad_norm": 5.376059672743149, "learning_rate": 4.914438341212963e-06, "loss": 17.1553, "step": 28464 }, { "epoch": 0.5203173268503116, "grad_norm": 6.858406977413497, "learning_rate": 4.91414237316907e-06, "loss": 17.5945, "step": 28465 }, { "epoch": 0.5203356060467582, "grad_norm": 6.077786951193615, "learning_rate": 4.9138464054261e-06, "loss": 17.3616, "step": 28466 }, { "epoch": 0.5203538852432047, "grad_norm": 6.812812721856576, "learning_rate": 4.913550437985089e-06, "loss": 17.8304, "step": 28467 }, { "epoch": 0.5203721644396513, "grad_norm": 6.2142037331805025, "learning_rate": 4.913254470847079e-06, "loss": 17.566, "step": 28468 }, { "epoch": 0.5203904436360978, "grad_norm": 7.039103235915634, "learning_rate": 4.912958504013104e-06, "loss": 17.8699, "step": 28469 }, { "epoch": 0.5204087228325442, "grad_norm": 7.681828737673268, "learning_rate": 4.9126625374842e-06, "loss": 18.2374, "step": 28470 }, { "epoch": 0.5204270020289908, "grad_norm": 13.307296266008041, "learning_rate": 4.912366571261408e-06, "loss": 17.6047, "step": 28471 }, { "epoch": 0.5204452812254373, "grad_norm": 7.231720035665516, "learning_rate": 4.912070605345764e-06, "loss": 18.2681, "step": 28472 }, { "epoch": 0.5204635604218839, "grad_norm": 6.602924443524286, "learning_rate": 4.911774639738303e-06, "loss": 17.5021, "step": 28473 }, { "epoch": 0.5204818396183304, "grad_norm": 7.883868020494509, "learning_rate": 4.911478674440066e-06, "loss": 18.2676, "step": 28474 }, { "epoch": 0.5205001188147769, "grad_norm": 6.159561162809547, "learning_rate": 4.911182709452086e-06, "loss": 17.2755, "step": 28475 }, { "epoch": 0.5205183980112235, "grad_norm": 5.514469339714249, "learning_rate": 4.910886744775405e-06, "loss": 17.2943, "step": 28476 }, { "epoch": 0.5205366772076699, "grad_norm": 6.324586802480088, "learning_rate": 4.910590780411058e-06, "loss": 17.3471, "step": 28477 }, { "epoch": 0.5205549564041164, "grad_norm": 6.5277273182511175, "learning_rate": 4.910294816360081e-06, "loss": 17.519, "step": 28478 }, { "epoch": 0.520573235600563, "grad_norm": 6.6522125621361115, "learning_rate": 4.909998852623516e-06, "loss": 17.6845, "step": 28479 }, { "epoch": 0.5205915147970095, "grad_norm": 5.550169158965691, "learning_rate": 4.9097028892023955e-06, "loss": 17.0065, "step": 28480 }, { "epoch": 0.5206097939934561, "grad_norm": 5.80747317245644, "learning_rate": 4.909406926097758e-06, "loss": 17.1229, "step": 28481 }, { "epoch": 0.5206280731899026, "grad_norm": 6.619435338361883, "learning_rate": 4.909110963310642e-06, "loss": 17.6486, "step": 28482 }, { "epoch": 0.520646352386349, "grad_norm": 7.424707776529631, "learning_rate": 4.908815000842085e-06, "loss": 17.9898, "step": 28483 }, { "epoch": 0.5206646315827956, "grad_norm": 6.1872392938948035, "learning_rate": 4.908519038693122e-06, "loss": 17.5199, "step": 28484 }, { "epoch": 0.5206829107792421, "grad_norm": 7.727224028795232, "learning_rate": 4.908223076864792e-06, "loss": 17.9094, "step": 28485 }, { "epoch": 0.5207011899756887, "grad_norm": 5.480193590507775, "learning_rate": 4.907927115358133e-06, "loss": 17.1923, "step": 28486 }, { "epoch": 0.5207194691721352, "grad_norm": 6.266386773671442, "learning_rate": 4.907631154174181e-06, "loss": 17.7293, "step": 28487 }, { "epoch": 0.5207377483685817, "grad_norm": 7.352862448510087, "learning_rate": 4.9073351933139744e-06, "loss": 17.8787, "step": 28488 }, { "epoch": 0.5207560275650283, "grad_norm": 6.142563812239842, "learning_rate": 4.9070392327785484e-06, "loss": 17.1949, "step": 28489 }, { "epoch": 0.5207743067614747, "grad_norm": 5.701055702495349, "learning_rate": 4.906743272568942e-06, "loss": 16.9904, "step": 28490 }, { "epoch": 0.5207925859579213, "grad_norm": 5.679934440740015, "learning_rate": 4.906447312686195e-06, "loss": 17.0599, "step": 28491 }, { "epoch": 0.5208108651543678, "grad_norm": 6.686150930820088, "learning_rate": 4.906151353131339e-06, "loss": 17.4449, "step": 28492 }, { "epoch": 0.5208291443508143, "grad_norm": 6.696901803619023, "learning_rate": 4.905855393905415e-06, "loss": 17.3549, "step": 28493 }, { "epoch": 0.5208474235472609, "grad_norm": 6.195181244263033, "learning_rate": 4.905559435009462e-06, "loss": 17.5175, "step": 28494 }, { "epoch": 0.5208657027437074, "grad_norm": 6.59450788418369, "learning_rate": 4.905263476444511e-06, "loss": 17.1538, "step": 28495 }, { "epoch": 0.520883981940154, "grad_norm": 7.408468682901097, "learning_rate": 4.904967518211607e-06, "loss": 17.9047, "step": 28496 }, { "epoch": 0.5209022611366004, "grad_norm": 6.412189614504112, "learning_rate": 4.904671560311782e-06, "loss": 17.4363, "step": 28497 }, { "epoch": 0.5209205403330469, "grad_norm": 6.7656715290992455, "learning_rate": 4.904375602746074e-06, "loss": 17.9125, "step": 28498 }, { "epoch": 0.5209388195294935, "grad_norm": 6.4924780465885945, "learning_rate": 4.904079645515523e-06, "loss": 17.2634, "step": 28499 }, { "epoch": 0.52095709872594, "grad_norm": 6.285596636750341, "learning_rate": 4.9037836886211645e-06, "loss": 17.5109, "step": 28500 }, { "epoch": 0.5209753779223866, "grad_norm": 6.7154283657399025, "learning_rate": 4.903487732064034e-06, "loss": 17.6013, "step": 28501 }, { "epoch": 0.520993657118833, "grad_norm": 6.548987268343913, "learning_rate": 4.903191775845171e-06, "loss": 17.78, "step": 28502 }, { "epoch": 0.5210119363152795, "grad_norm": 5.626496958756712, "learning_rate": 4.9028958199656145e-06, "loss": 17.1568, "step": 28503 }, { "epoch": 0.5210302155117261, "grad_norm": 6.430132484776041, "learning_rate": 4.902599864426397e-06, "loss": 17.5608, "step": 28504 }, { "epoch": 0.5210484947081726, "grad_norm": 6.821738962030881, "learning_rate": 4.902303909228561e-06, "loss": 17.402, "step": 28505 }, { "epoch": 0.5210667739046192, "grad_norm": 7.8250467364491145, "learning_rate": 4.902007954373139e-06, "loss": 18.2186, "step": 28506 }, { "epoch": 0.5210850531010657, "grad_norm": 7.19755452109373, "learning_rate": 4.901711999861172e-06, "loss": 18.0846, "step": 28507 }, { "epoch": 0.5211033322975122, "grad_norm": 8.471044242296147, "learning_rate": 4.901416045693697e-06, "loss": 18.6619, "step": 28508 }, { "epoch": 0.5211216114939587, "grad_norm": 7.159979382803887, "learning_rate": 4.901120091871747e-06, "loss": 18.2145, "step": 28509 }, { "epoch": 0.5211398906904052, "grad_norm": 8.182553049643214, "learning_rate": 4.9008241383963655e-06, "loss": 17.5352, "step": 28510 }, { "epoch": 0.5211581698868518, "grad_norm": 7.407998109603498, "learning_rate": 4.900528185268586e-06, "loss": 18.4521, "step": 28511 }, { "epoch": 0.5211764490832983, "grad_norm": 6.260601819204853, "learning_rate": 4.900232232489445e-06, "loss": 17.4589, "step": 28512 }, { "epoch": 0.5211947282797448, "grad_norm": 5.3650323782935825, "learning_rate": 4.899936280059983e-06, "loss": 17.1716, "step": 28513 }, { "epoch": 0.5212130074761914, "grad_norm": 5.452966579857079, "learning_rate": 4.899640327981237e-06, "loss": 17.1096, "step": 28514 }, { "epoch": 0.5212312866726379, "grad_norm": 6.8104556465352815, "learning_rate": 4.899344376254239e-06, "loss": 17.8224, "step": 28515 }, { "epoch": 0.5212495658690844, "grad_norm": 7.543379328766232, "learning_rate": 4.899048424880033e-06, "loss": 18.0693, "step": 28516 }, { "epoch": 0.5212678450655309, "grad_norm": 5.315562135630225, "learning_rate": 4.898752473859654e-06, "loss": 17.234, "step": 28517 }, { "epoch": 0.5212861242619774, "grad_norm": 7.1049195512248735, "learning_rate": 4.898456523194136e-06, "loss": 17.4867, "step": 28518 }, { "epoch": 0.521304403458424, "grad_norm": 7.272050415416022, "learning_rate": 4.898160572884522e-06, "loss": 17.7739, "step": 28519 }, { "epoch": 0.5213226826548705, "grad_norm": 8.401022379990874, "learning_rate": 4.897864622931845e-06, "loss": 18.3357, "step": 28520 }, { "epoch": 0.5213409618513171, "grad_norm": 8.378815935319116, "learning_rate": 4.897568673337143e-06, "loss": 17.6349, "step": 28521 }, { "epoch": 0.5213592410477635, "grad_norm": 7.1869115979672165, "learning_rate": 4.897272724101456e-06, "loss": 17.6035, "step": 28522 }, { "epoch": 0.52137752024421, "grad_norm": 6.674780567610482, "learning_rate": 4.8969767752258165e-06, "loss": 17.3377, "step": 28523 }, { "epoch": 0.5213957994406566, "grad_norm": 5.367316091649678, "learning_rate": 4.896680826711267e-06, "loss": 17.2292, "step": 28524 }, { "epoch": 0.5214140786371031, "grad_norm": 6.794283867337464, "learning_rate": 4.896384878558841e-06, "loss": 17.968, "step": 28525 }, { "epoch": 0.5214323578335497, "grad_norm": 6.77990135169107, "learning_rate": 4.896088930769576e-06, "loss": 17.4489, "step": 28526 }, { "epoch": 0.5214506370299962, "grad_norm": 8.045400185883338, "learning_rate": 4.895792983344512e-06, "loss": 17.8203, "step": 28527 }, { "epoch": 0.5214689162264426, "grad_norm": 4.902725783251568, "learning_rate": 4.895497036284685e-06, "loss": 16.9511, "step": 28528 }, { "epoch": 0.5214871954228892, "grad_norm": 5.70237133267022, "learning_rate": 4.89520108959113e-06, "loss": 17.1397, "step": 28529 }, { "epoch": 0.5215054746193357, "grad_norm": 6.144524863815836, "learning_rate": 4.894905143264887e-06, "loss": 17.3851, "step": 28530 }, { "epoch": 0.5215237538157823, "grad_norm": 5.899723755150293, "learning_rate": 4.8946091973069935e-06, "loss": 17.5855, "step": 28531 }, { "epoch": 0.5215420330122288, "grad_norm": 5.603984848018888, "learning_rate": 4.894313251718483e-06, "loss": 17.1923, "step": 28532 }, { "epoch": 0.5215603122086753, "grad_norm": 6.277892233137457, "learning_rate": 4.894017306500397e-06, "loss": 17.2843, "step": 28533 }, { "epoch": 0.5215785914051219, "grad_norm": 5.936613727806103, "learning_rate": 4.893721361653771e-06, "loss": 17.5003, "step": 28534 }, { "epoch": 0.5215968706015683, "grad_norm": 7.027533804174499, "learning_rate": 4.893425417179641e-06, "loss": 17.649, "step": 28535 }, { "epoch": 0.5216151497980149, "grad_norm": 6.102147359780212, "learning_rate": 4.893129473079048e-06, "loss": 17.1189, "step": 28536 }, { "epoch": 0.5216334289944614, "grad_norm": 6.998842791815334, "learning_rate": 4.892833529353025e-06, "loss": 17.8941, "step": 28537 }, { "epoch": 0.5216517081909079, "grad_norm": 5.774873116056154, "learning_rate": 4.892537586002613e-06, "loss": 17.2359, "step": 28538 }, { "epoch": 0.5216699873873545, "grad_norm": 5.992399578919574, "learning_rate": 4.8922416430288465e-06, "loss": 17.6229, "step": 28539 }, { "epoch": 0.521688266583801, "grad_norm": 5.756647339843333, "learning_rate": 4.891945700432762e-06, "loss": 17.437, "step": 28540 }, { "epoch": 0.5217065457802476, "grad_norm": 7.315163324355285, "learning_rate": 4.8916497582154015e-06, "loss": 17.5926, "step": 28541 }, { "epoch": 0.521724824976694, "grad_norm": 6.881487766683682, "learning_rate": 4.891353816377798e-06, "loss": 17.6835, "step": 28542 }, { "epoch": 0.5217431041731405, "grad_norm": 5.535896958764953, "learning_rate": 4.891057874920989e-06, "loss": 17.2321, "step": 28543 }, { "epoch": 0.5217613833695871, "grad_norm": 7.536160248991853, "learning_rate": 4.890761933846014e-06, "loss": 18.0044, "step": 28544 }, { "epoch": 0.5217796625660336, "grad_norm": 6.413927128110615, "learning_rate": 4.890465993153909e-06, "loss": 17.4872, "step": 28545 }, { "epoch": 0.5217979417624801, "grad_norm": 7.2266632095213605, "learning_rate": 4.8901700528457094e-06, "loss": 17.9273, "step": 28546 }, { "epoch": 0.5218162209589267, "grad_norm": 5.267232293352199, "learning_rate": 4.889874112922457e-06, "loss": 17.1158, "step": 28547 }, { "epoch": 0.5218345001553731, "grad_norm": 5.643571977378045, "learning_rate": 4.889578173385184e-06, "loss": 17.1832, "step": 28548 }, { "epoch": 0.5218527793518197, "grad_norm": 6.05130260401698, "learning_rate": 4.889282234234929e-06, "loss": 17.6666, "step": 28549 }, { "epoch": 0.5218710585482662, "grad_norm": 7.328747357004344, "learning_rate": 4.8889862954727325e-06, "loss": 17.2468, "step": 28550 }, { "epoch": 0.5218893377447127, "grad_norm": 5.400184968760882, "learning_rate": 4.888690357099628e-06, "loss": 17.0291, "step": 28551 }, { "epoch": 0.5219076169411593, "grad_norm": 7.448530732505496, "learning_rate": 4.888394419116656e-06, "loss": 18.0796, "step": 28552 }, { "epoch": 0.5219258961376058, "grad_norm": 7.59018770133678, "learning_rate": 4.88809848152485e-06, "loss": 18.0727, "step": 28553 }, { "epoch": 0.5219441753340524, "grad_norm": 8.839733041001011, "learning_rate": 4.887802544325249e-06, "loss": 18.1013, "step": 28554 }, { "epoch": 0.5219624545304988, "grad_norm": 5.991160633309579, "learning_rate": 4.887506607518892e-06, "loss": 17.3717, "step": 28555 }, { "epoch": 0.5219807337269453, "grad_norm": 5.253203731855516, "learning_rate": 4.887210671106814e-06, "loss": 16.9025, "step": 28556 }, { "epoch": 0.5219990129233919, "grad_norm": 6.76671894110189, "learning_rate": 4.886914735090053e-06, "loss": 17.3081, "step": 28557 }, { "epoch": 0.5220172921198384, "grad_norm": 7.6298661703180715, "learning_rate": 4.886618799469644e-06, "loss": 17.8484, "step": 28558 }, { "epoch": 0.522035571316285, "grad_norm": 7.2541321687389235, "learning_rate": 4.88632286424663e-06, "loss": 17.596, "step": 28559 }, { "epoch": 0.5220538505127315, "grad_norm": 6.760886726101878, "learning_rate": 4.886026929422041e-06, "loss": 17.3026, "step": 28560 }, { "epoch": 0.5220721297091779, "grad_norm": 5.676519567750497, "learning_rate": 4.885730994996919e-06, "loss": 17.1263, "step": 28561 }, { "epoch": 0.5220904089056245, "grad_norm": 7.21428551073307, "learning_rate": 4.8854350609723e-06, "loss": 17.8924, "step": 28562 }, { "epoch": 0.522108688102071, "grad_norm": 6.416671753889042, "learning_rate": 4.885139127349221e-06, "loss": 17.4339, "step": 28563 }, { "epoch": 0.5221269672985176, "grad_norm": 8.085624888739334, "learning_rate": 4.88484319412872e-06, "loss": 17.1029, "step": 28564 }, { "epoch": 0.5221452464949641, "grad_norm": 6.661496340731978, "learning_rate": 4.884547261311833e-06, "loss": 17.5654, "step": 28565 }, { "epoch": 0.5221635256914106, "grad_norm": 6.730843745277235, "learning_rate": 4.884251328899598e-06, "loss": 17.7507, "step": 28566 }, { "epoch": 0.5221818048878571, "grad_norm": 5.203887170605507, "learning_rate": 4.883955396893053e-06, "loss": 17.069, "step": 28567 }, { "epoch": 0.5222000840843036, "grad_norm": 5.702194749382778, "learning_rate": 4.883659465293231e-06, "loss": 17.133, "step": 28568 }, { "epoch": 0.5222183632807502, "grad_norm": 6.531802448548897, "learning_rate": 4.883363534101176e-06, "loss": 17.7237, "step": 28569 }, { "epoch": 0.5222366424771967, "grad_norm": 5.5281364685397865, "learning_rate": 4.8830676033179205e-06, "loss": 17.1396, "step": 28570 }, { "epoch": 0.5222549216736432, "grad_norm": 7.158311541678934, "learning_rate": 4.882771672944502e-06, "loss": 17.2343, "step": 28571 }, { "epoch": 0.5222732008700898, "grad_norm": 6.385483693936126, "learning_rate": 4.88247574298196e-06, "loss": 17.3761, "step": 28572 }, { "epoch": 0.5222914800665363, "grad_norm": 6.575685810924026, "learning_rate": 4.882179813431331e-06, "loss": 17.5746, "step": 28573 }, { "epoch": 0.5223097592629828, "grad_norm": 5.620502338011076, "learning_rate": 4.88188388429365e-06, "loss": 17.1279, "step": 28574 }, { "epoch": 0.5223280384594293, "grad_norm": 5.458200354751128, "learning_rate": 4.881587955569955e-06, "loss": 17.1399, "step": 28575 }, { "epoch": 0.5223463176558758, "grad_norm": 6.698885551026733, "learning_rate": 4.881292027261286e-06, "loss": 17.4983, "step": 28576 }, { "epoch": 0.5223645968523224, "grad_norm": 5.175300885480785, "learning_rate": 4.880996099368677e-06, "loss": 17.091, "step": 28577 }, { "epoch": 0.5223828760487689, "grad_norm": 5.456395267418592, "learning_rate": 4.880700171893167e-06, "loss": 16.9812, "step": 28578 }, { "epoch": 0.5224011552452155, "grad_norm": 6.832448428618826, "learning_rate": 4.880404244835792e-06, "loss": 17.8258, "step": 28579 }, { "epoch": 0.522419434441662, "grad_norm": 6.287832994819546, "learning_rate": 4.880108318197588e-06, "loss": 17.7479, "step": 28580 }, { "epoch": 0.5224377136381084, "grad_norm": 5.5577425560173435, "learning_rate": 4.879812391979598e-06, "loss": 17.223, "step": 28581 }, { "epoch": 0.522455992834555, "grad_norm": 6.121673541690735, "learning_rate": 4.8795164661828505e-06, "loss": 17.3787, "step": 28582 }, { "epoch": 0.5224742720310015, "grad_norm": 5.665615467954876, "learning_rate": 4.8792205408083915e-06, "loss": 17.0907, "step": 28583 }, { "epoch": 0.5224925512274481, "grad_norm": 8.032322676181629, "learning_rate": 4.878924615857252e-06, "loss": 18.0102, "step": 28584 }, { "epoch": 0.5225108304238946, "grad_norm": 6.827180904425209, "learning_rate": 4.87862869133047e-06, "loss": 17.6581, "step": 28585 }, { "epoch": 0.522529109620341, "grad_norm": 5.908271524091968, "learning_rate": 4.878332767229086e-06, "loss": 17.2425, "step": 28586 }, { "epoch": 0.5225473888167876, "grad_norm": 5.935408688879286, "learning_rate": 4.878036843554136e-06, "loss": 17.4134, "step": 28587 }, { "epoch": 0.5225656680132341, "grad_norm": 5.779912781779714, "learning_rate": 4.877740920306654e-06, "loss": 17.2493, "step": 28588 }, { "epoch": 0.5225839472096807, "grad_norm": 6.2411674179792795, "learning_rate": 4.87744499748768e-06, "loss": 17.5038, "step": 28589 }, { "epoch": 0.5226022264061272, "grad_norm": 6.749663808532885, "learning_rate": 4.877149075098251e-06, "loss": 17.7859, "step": 28590 }, { "epoch": 0.5226205056025737, "grad_norm": 6.399509929554015, "learning_rate": 4.8768531531394035e-06, "loss": 17.3995, "step": 28591 }, { "epoch": 0.5226387847990203, "grad_norm": 5.718918710110861, "learning_rate": 4.876557231612176e-06, "loss": 17.365, "step": 28592 }, { "epoch": 0.5226570639954667, "grad_norm": 6.330719956222392, "learning_rate": 4.876261310517604e-06, "loss": 17.404, "step": 28593 }, { "epoch": 0.5226753431919133, "grad_norm": 6.570372225377883, "learning_rate": 4.875965389856724e-06, "loss": 17.7979, "step": 28594 }, { "epoch": 0.5226936223883598, "grad_norm": 6.830668800842742, "learning_rate": 4.875669469630577e-06, "loss": 17.7618, "step": 28595 }, { "epoch": 0.5227119015848063, "grad_norm": 7.904864307882776, "learning_rate": 4.875373549840197e-06, "loss": 18.4144, "step": 28596 }, { "epoch": 0.5227301807812529, "grad_norm": 5.606781549605206, "learning_rate": 4.87507763048662e-06, "loss": 17.2982, "step": 28597 }, { "epoch": 0.5227484599776994, "grad_norm": 4.72959210553153, "learning_rate": 4.874781711570886e-06, "loss": 16.8386, "step": 28598 }, { "epoch": 0.522766739174146, "grad_norm": 6.458947639581319, "learning_rate": 4.874485793094031e-06, "loss": 17.894, "step": 28599 }, { "epoch": 0.5227850183705924, "grad_norm": 5.920819838995126, "learning_rate": 4.874189875057094e-06, "loss": 17.1354, "step": 28600 }, { "epoch": 0.5228032975670389, "grad_norm": 6.349242723956153, "learning_rate": 4.873893957461111e-06, "loss": 17.4207, "step": 28601 }, { "epoch": 0.5228215767634855, "grad_norm": 6.208539077079403, "learning_rate": 4.873598040307116e-06, "loss": 17.4685, "step": 28602 }, { "epoch": 0.522839855959932, "grad_norm": 7.317078508463828, "learning_rate": 4.87330212359615e-06, "loss": 17.7244, "step": 28603 }, { "epoch": 0.5228581351563786, "grad_norm": 6.723673178658345, "learning_rate": 4.873006207329251e-06, "loss": 17.4472, "step": 28604 }, { "epoch": 0.5228764143528251, "grad_norm": 6.0828055766578695, "learning_rate": 4.872710291507452e-06, "loss": 17.403, "step": 28605 }, { "epoch": 0.5228946935492715, "grad_norm": 5.220242569578839, "learning_rate": 4.872414376131793e-06, "loss": 17.1341, "step": 28606 }, { "epoch": 0.5229129727457181, "grad_norm": 5.79877143035889, "learning_rate": 4.872118461203311e-06, "loss": 17.2194, "step": 28607 }, { "epoch": 0.5229312519421646, "grad_norm": 6.908868440391969, "learning_rate": 4.871822546723041e-06, "loss": 17.7, "step": 28608 }, { "epoch": 0.5229495311386112, "grad_norm": 5.969050187671405, "learning_rate": 4.871526632692024e-06, "loss": 17.2985, "step": 28609 }, { "epoch": 0.5229678103350577, "grad_norm": 6.39895815571215, "learning_rate": 4.871230719111295e-06, "loss": 17.659, "step": 28610 }, { "epoch": 0.5229860895315042, "grad_norm": 6.329881816727737, "learning_rate": 4.870934805981889e-06, "loss": 17.436, "step": 28611 }, { "epoch": 0.5230043687279508, "grad_norm": 5.993809150442772, "learning_rate": 4.870638893304846e-06, "loss": 17.5049, "step": 28612 }, { "epoch": 0.5230226479243972, "grad_norm": 8.424532172633825, "learning_rate": 4.870342981081202e-06, "loss": 17.9168, "step": 28613 }, { "epoch": 0.5230409271208437, "grad_norm": 7.646587726971442, "learning_rate": 4.870047069311997e-06, "loss": 17.668, "step": 28614 }, { "epoch": 0.5230592063172903, "grad_norm": 5.686314147004758, "learning_rate": 4.869751157998264e-06, "loss": 17.1126, "step": 28615 }, { "epoch": 0.5230774855137368, "grad_norm": 6.036282928313144, "learning_rate": 4.8694552471410425e-06, "loss": 17.433, "step": 28616 }, { "epoch": 0.5230957647101834, "grad_norm": 6.994720058312239, "learning_rate": 4.869159336741369e-06, "loss": 17.9136, "step": 28617 }, { "epoch": 0.5231140439066299, "grad_norm": 5.934655269276345, "learning_rate": 4.868863426800281e-06, "loss": 17.4217, "step": 28618 }, { "epoch": 0.5231323231030763, "grad_norm": 5.382820492933522, "learning_rate": 4.868567517318813e-06, "loss": 17.0695, "step": 28619 }, { "epoch": 0.5231506022995229, "grad_norm": 6.584344094198239, "learning_rate": 4.8682716082980065e-06, "loss": 17.3006, "step": 28620 }, { "epoch": 0.5231688814959694, "grad_norm": 6.533655367891564, "learning_rate": 4.867975699738897e-06, "loss": 17.6225, "step": 28621 }, { "epoch": 0.523187160692416, "grad_norm": 5.519509156261628, "learning_rate": 4.8676797916425194e-06, "loss": 17.1227, "step": 28622 }, { "epoch": 0.5232054398888625, "grad_norm": 4.932508531538405, "learning_rate": 4.867383884009915e-06, "loss": 16.9027, "step": 28623 }, { "epoch": 0.523223719085309, "grad_norm": 7.522018420607516, "learning_rate": 4.8670879768421176e-06, "loss": 17.8257, "step": 28624 }, { "epoch": 0.5232419982817555, "grad_norm": 6.990985213410393, "learning_rate": 4.866792070140164e-06, "loss": 17.6341, "step": 28625 }, { "epoch": 0.523260277478202, "grad_norm": 6.369858785666854, "learning_rate": 4.866496163905094e-06, "loss": 17.4816, "step": 28626 }, { "epoch": 0.5232785566746486, "grad_norm": 7.279355469306909, "learning_rate": 4.866200258137944e-06, "loss": 17.7265, "step": 28627 }, { "epoch": 0.5232968358710951, "grad_norm": 6.3077888297320435, "learning_rate": 4.8659043528397484e-06, "loss": 17.2586, "step": 28628 }, { "epoch": 0.5233151150675416, "grad_norm": 6.007628964170743, "learning_rate": 4.865608448011547e-06, "loss": 17.5715, "step": 28629 }, { "epoch": 0.5233333942639882, "grad_norm": 7.651909580589494, "learning_rate": 4.865312543654376e-06, "loss": 17.7631, "step": 28630 }, { "epoch": 0.5233516734604347, "grad_norm": 5.473384197437844, "learning_rate": 4.865016639769275e-06, "loss": 17.071, "step": 28631 }, { "epoch": 0.5233699526568812, "grad_norm": 6.458577390802869, "learning_rate": 4.8647207363572785e-06, "loss": 17.3749, "step": 28632 }, { "epoch": 0.5233882318533277, "grad_norm": 4.945032555018459, "learning_rate": 4.864424833419422e-06, "loss": 17.1227, "step": 28633 }, { "epoch": 0.5234065110497742, "grad_norm": 7.19139568278267, "learning_rate": 4.864128930956746e-06, "loss": 17.7834, "step": 28634 }, { "epoch": 0.5234247902462208, "grad_norm": 5.468476556153672, "learning_rate": 4.863833028970287e-06, "loss": 16.9363, "step": 28635 }, { "epoch": 0.5234430694426673, "grad_norm": 5.719976840796176, "learning_rate": 4.86353712746108e-06, "loss": 17.2282, "step": 28636 }, { "epoch": 0.5234613486391139, "grad_norm": 5.927814075489786, "learning_rate": 4.8632412264301645e-06, "loss": 17.2721, "step": 28637 }, { "epoch": 0.5234796278355603, "grad_norm": 5.846840195343381, "learning_rate": 4.862945325878576e-06, "loss": 16.983, "step": 28638 }, { "epoch": 0.5234979070320068, "grad_norm": 6.206621822029001, "learning_rate": 4.862649425807352e-06, "loss": 17.364, "step": 28639 }, { "epoch": 0.5235161862284534, "grad_norm": 6.532029385335391, "learning_rate": 4.862353526217531e-06, "loss": 17.4888, "step": 28640 }, { "epoch": 0.5235344654248999, "grad_norm": 5.1130084455203635, "learning_rate": 4.86205762711015e-06, "loss": 17.0225, "step": 28641 }, { "epoch": 0.5235527446213465, "grad_norm": 6.754188500007389, "learning_rate": 4.861761728486242e-06, "loss": 17.8322, "step": 28642 }, { "epoch": 0.523571023817793, "grad_norm": 6.50366454538655, "learning_rate": 4.861465830346848e-06, "loss": 17.4779, "step": 28643 }, { "epoch": 0.5235893030142394, "grad_norm": 9.49622504661568, "learning_rate": 4.861169932693004e-06, "loss": 18.3106, "step": 28644 }, { "epoch": 0.523607582210686, "grad_norm": 5.73110752522359, "learning_rate": 4.8608740355257485e-06, "loss": 17.0343, "step": 28645 }, { "epoch": 0.5236258614071325, "grad_norm": 6.458243870159093, "learning_rate": 4.860578138846119e-06, "loss": 17.5289, "step": 28646 }, { "epoch": 0.5236441406035791, "grad_norm": 6.777740444972973, "learning_rate": 4.860282242655147e-06, "loss": 17.5169, "step": 28647 }, { "epoch": 0.5236624198000256, "grad_norm": 7.175666162289647, "learning_rate": 4.859986346953876e-06, "loss": 17.5742, "step": 28648 }, { "epoch": 0.5236806989964721, "grad_norm": 6.058726642855046, "learning_rate": 4.859690451743341e-06, "loss": 17.0269, "step": 28649 }, { "epoch": 0.5236989781929187, "grad_norm": 7.504244029549141, "learning_rate": 4.8593945570245776e-06, "loss": 17.9352, "step": 28650 }, { "epoch": 0.5237172573893651, "grad_norm": 5.674543523973336, "learning_rate": 4.859098662798625e-06, "loss": 17.101, "step": 28651 }, { "epoch": 0.5237355365858117, "grad_norm": 5.386776452218993, "learning_rate": 4.858802769066519e-06, "loss": 16.987, "step": 28652 }, { "epoch": 0.5237538157822582, "grad_norm": 5.170912022530876, "learning_rate": 4.858506875829297e-06, "loss": 16.9947, "step": 28653 }, { "epoch": 0.5237720949787047, "grad_norm": 6.497979812330839, "learning_rate": 4.858210983087997e-06, "loss": 17.4098, "step": 28654 }, { "epoch": 0.5237903741751513, "grad_norm": 5.396850166875037, "learning_rate": 4.857915090843655e-06, "loss": 16.8639, "step": 28655 }, { "epoch": 0.5238086533715978, "grad_norm": 8.161069659580226, "learning_rate": 4.857619199097307e-06, "loss": 18.3507, "step": 28656 }, { "epoch": 0.5238269325680444, "grad_norm": 7.427932251449674, "learning_rate": 4.857323307849992e-06, "loss": 18.1129, "step": 28657 }, { "epoch": 0.5238452117644908, "grad_norm": 7.23740741971263, "learning_rate": 4.857027417102744e-06, "loss": 17.7084, "step": 28658 }, { "epoch": 0.5238634909609373, "grad_norm": 6.949649628765428, "learning_rate": 4.856731526856607e-06, "loss": 17.7779, "step": 28659 }, { "epoch": 0.5238817701573839, "grad_norm": 6.939305662249475, "learning_rate": 4.856435637112612e-06, "loss": 17.4437, "step": 28660 }, { "epoch": 0.5239000493538304, "grad_norm": 8.040513767106534, "learning_rate": 4.856139747871796e-06, "loss": 17.8194, "step": 28661 }, { "epoch": 0.523918328550277, "grad_norm": 5.234498276910926, "learning_rate": 4.8558438591351984e-06, "loss": 16.9883, "step": 28662 }, { "epoch": 0.5239366077467235, "grad_norm": 5.6882015636197725, "learning_rate": 4.8555479709038575e-06, "loss": 17.1606, "step": 28663 }, { "epoch": 0.5239548869431699, "grad_norm": 5.740407646631591, "learning_rate": 4.855252083178806e-06, "loss": 17.3319, "step": 28664 }, { "epoch": 0.5239731661396165, "grad_norm": 5.718372727511673, "learning_rate": 4.854956195961085e-06, "loss": 16.933, "step": 28665 }, { "epoch": 0.523991445336063, "grad_norm": 6.6420807768311025, "learning_rate": 4.854660309251729e-06, "loss": 17.8021, "step": 28666 }, { "epoch": 0.5240097245325096, "grad_norm": 6.545572116505617, "learning_rate": 4.854364423051775e-06, "loss": 17.6041, "step": 28667 }, { "epoch": 0.5240280037289561, "grad_norm": 7.023776003366043, "learning_rate": 4.854068537362264e-06, "loss": 17.6512, "step": 28668 }, { "epoch": 0.5240462829254026, "grad_norm": 6.3335694738473, "learning_rate": 4.853772652184229e-06, "loss": 17.7214, "step": 28669 }, { "epoch": 0.5240645621218492, "grad_norm": 8.615002949894881, "learning_rate": 4.853476767518706e-06, "loss": 18.0568, "step": 28670 }, { "epoch": 0.5240828413182956, "grad_norm": 5.987409289706991, "learning_rate": 4.853180883366736e-06, "loss": 17.322, "step": 28671 }, { "epoch": 0.5241011205147422, "grad_norm": 14.72844723554283, "learning_rate": 4.8528849997293556e-06, "loss": 17.903, "step": 28672 }, { "epoch": 0.5241193997111887, "grad_norm": 7.502380063868678, "learning_rate": 4.852589116607597e-06, "loss": 17.6554, "step": 28673 }, { "epoch": 0.5241376789076352, "grad_norm": 5.156945823290601, "learning_rate": 4.852293234002505e-06, "loss": 16.9923, "step": 28674 }, { "epoch": 0.5241559581040818, "grad_norm": 6.0739783576531305, "learning_rate": 4.851997351915108e-06, "loss": 17.3553, "step": 28675 }, { "epoch": 0.5241742373005283, "grad_norm": 6.034195832265106, "learning_rate": 4.851701470346449e-06, "loss": 17.1783, "step": 28676 }, { "epoch": 0.5241925164969748, "grad_norm": 5.664087929063006, "learning_rate": 4.851405589297566e-06, "loss": 17.241, "step": 28677 }, { "epoch": 0.5242107956934213, "grad_norm": 6.075340909033961, "learning_rate": 4.85110970876949e-06, "loss": 17.2628, "step": 28678 }, { "epoch": 0.5242290748898678, "grad_norm": 6.765987722082619, "learning_rate": 4.850813828763264e-06, "loss": 17.7013, "step": 28679 }, { "epoch": 0.5242473540863144, "grad_norm": 5.71274771896953, "learning_rate": 4.850517949279922e-06, "loss": 17.2508, "step": 28680 }, { "epoch": 0.5242656332827609, "grad_norm": 6.519198357361352, "learning_rate": 4.8502220703205e-06, "loss": 17.5417, "step": 28681 }, { "epoch": 0.5242839124792074, "grad_norm": 6.3193823278258, "learning_rate": 4.849926191886039e-06, "loss": 17.4188, "step": 28682 }, { "epoch": 0.524302191675654, "grad_norm": 5.507761081687611, "learning_rate": 4.849630313977573e-06, "loss": 17.1608, "step": 28683 }, { "epoch": 0.5243204708721004, "grad_norm": 5.776666122232264, "learning_rate": 4.849334436596139e-06, "loss": 16.9994, "step": 28684 }, { "epoch": 0.524338750068547, "grad_norm": 7.7390542660983765, "learning_rate": 4.8490385597427745e-06, "loss": 18.1266, "step": 28685 }, { "epoch": 0.5243570292649935, "grad_norm": 6.413963591555267, "learning_rate": 4.848742683418519e-06, "loss": 17.3086, "step": 28686 }, { "epoch": 0.52437530846144, "grad_norm": 6.415781633774691, "learning_rate": 4.848446807624404e-06, "loss": 17.5619, "step": 28687 }, { "epoch": 0.5243935876578866, "grad_norm": 6.41271687883417, "learning_rate": 4.848150932361471e-06, "loss": 17.5939, "step": 28688 }, { "epoch": 0.524411866854333, "grad_norm": 6.594850199120288, "learning_rate": 4.847855057630756e-06, "loss": 17.723, "step": 28689 }, { "epoch": 0.5244301460507796, "grad_norm": 6.550198528890269, "learning_rate": 4.847559183433296e-06, "loss": 17.7492, "step": 28690 }, { "epoch": 0.5244484252472261, "grad_norm": 7.220340798436696, "learning_rate": 4.847263309770129e-06, "loss": 17.8352, "step": 28691 }, { "epoch": 0.5244667044436726, "grad_norm": 5.511075521973555, "learning_rate": 4.8469674366422885e-06, "loss": 17.3616, "step": 28692 }, { "epoch": 0.5244849836401192, "grad_norm": 6.07969437251479, "learning_rate": 4.846671564050815e-06, "loss": 17.2891, "step": 28693 }, { "epoch": 0.5245032628365657, "grad_norm": 6.277042181576031, "learning_rate": 4.846375691996745e-06, "loss": 17.4882, "step": 28694 }, { "epoch": 0.5245215420330123, "grad_norm": 8.152968328070358, "learning_rate": 4.846079820481113e-06, "loss": 17.9782, "step": 28695 }, { "epoch": 0.5245398212294587, "grad_norm": 6.791582980957194, "learning_rate": 4.845783949504961e-06, "loss": 17.5009, "step": 28696 }, { "epoch": 0.5245581004259052, "grad_norm": 6.489858402076207, "learning_rate": 4.84548807906932e-06, "loss": 17.6425, "step": 28697 }, { "epoch": 0.5245763796223518, "grad_norm": 6.854646747146825, "learning_rate": 4.84519220917523e-06, "loss": 17.766, "step": 28698 }, { "epoch": 0.5245946588187983, "grad_norm": 7.4948507071382275, "learning_rate": 4.844896339823731e-06, "loss": 17.9889, "step": 28699 }, { "epoch": 0.5246129380152449, "grad_norm": 7.274832398975454, "learning_rate": 4.844600471015855e-06, "loss": 17.7356, "step": 28700 }, { "epoch": 0.5246312172116914, "grad_norm": 7.592434054060814, "learning_rate": 4.8443046027526395e-06, "loss": 17.7565, "step": 28701 }, { "epoch": 0.5246494964081378, "grad_norm": 7.306628034976432, "learning_rate": 4.844008735035124e-06, "loss": 17.8487, "step": 28702 }, { "epoch": 0.5246677756045844, "grad_norm": 8.127512072239806, "learning_rate": 4.843712867864345e-06, "loss": 17.7852, "step": 28703 }, { "epoch": 0.5246860548010309, "grad_norm": 6.608577482738582, "learning_rate": 4.843417001241336e-06, "loss": 17.7183, "step": 28704 }, { "epoch": 0.5247043339974775, "grad_norm": 5.1392686140284765, "learning_rate": 4.84312113516714e-06, "loss": 16.972, "step": 28705 }, { "epoch": 0.524722613193924, "grad_norm": 5.622460930992014, "learning_rate": 4.8428252696427884e-06, "loss": 17.2805, "step": 28706 }, { "epoch": 0.5247408923903705, "grad_norm": 6.728131733473552, "learning_rate": 4.842529404669322e-06, "loss": 17.5892, "step": 28707 }, { "epoch": 0.5247591715868171, "grad_norm": 6.667787341536187, "learning_rate": 4.842233540247777e-06, "loss": 17.4749, "step": 28708 }, { "epoch": 0.5247774507832635, "grad_norm": 6.431972698734262, "learning_rate": 4.841937676379188e-06, "loss": 17.8282, "step": 28709 }, { "epoch": 0.5247957299797101, "grad_norm": 6.122433842841607, "learning_rate": 4.841641813064596e-06, "loss": 17.6201, "step": 28710 }, { "epoch": 0.5248140091761566, "grad_norm": 5.797297230345632, "learning_rate": 4.841345950305034e-06, "loss": 17.2922, "step": 28711 }, { "epoch": 0.5248322883726031, "grad_norm": 5.655518893695661, "learning_rate": 4.84105008810154e-06, "loss": 17.2077, "step": 28712 }, { "epoch": 0.5248505675690497, "grad_norm": 6.053768863837141, "learning_rate": 4.840754226455154e-06, "loss": 17.6577, "step": 28713 }, { "epoch": 0.5248688467654962, "grad_norm": 5.519281016861334, "learning_rate": 4.84045836536691e-06, "loss": 17.2577, "step": 28714 }, { "epoch": 0.5248871259619428, "grad_norm": 6.821674791896626, "learning_rate": 4.840162504837844e-06, "loss": 17.5838, "step": 28715 }, { "epoch": 0.5249054051583892, "grad_norm": 7.131856104619915, "learning_rate": 4.839866644868995e-06, "loss": 17.3524, "step": 28716 }, { "epoch": 0.5249236843548357, "grad_norm": 6.5408899826240345, "learning_rate": 4.839570785461401e-06, "loss": 17.4512, "step": 28717 }, { "epoch": 0.5249419635512823, "grad_norm": 5.77154805686396, "learning_rate": 4.839274926616096e-06, "loss": 17.1084, "step": 28718 }, { "epoch": 0.5249602427477288, "grad_norm": 5.1531795568693, "learning_rate": 4.838979068334119e-06, "loss": 17.1768, "step": 28719 }, { "epoch": 0.5249785219441754, "grad_norm": 5.985471990358824, "learning_rate": 4.838683210616505e-06, "loss": 17.449, "step": 28720 }, { "epoch": 0.5249968011406219, "grad_norm": 8.892860157466224, "learning_rate": 4.838387353464295e-06, "loss": 17.9445, "step": 28721 }, { "epoch": 0.5250150803370683, "grad_norm": 6.0620276417726435, "learning_rate": 4.838091496878522e-06, "loss": 17.66, "step": 28722 }, { "epoch": 0.5250333595335149, "grad_norm": 6.106682141346665, "learning_rate": 4.837795640860224e-06, "loss": 17.2064, "step": 28723 }, { "epoch": 0.5250516387299614, "grad_norm": 6.211250277119349, "learning_rate": 4.8374997854104385e-06, "loss": 17.2898, "step": 28724 }, { "epoch": 0.525069917926408, "grad_norm": 6.518622661517581, "learning_rate": 4.8372039305302025e-06, "loss": 17.5631, "step": 28725 }, { "epoch": 0.5250881971228545, "grad_norm": 6.1429629713118485, "learning_rate": 4.836908076220551e-06, "loss": 17.5944, "step": 28726 }, { "epoch": 0.525106476319301, "grad_norm": 6.793614605633182, "learning_rate": 4.836612222482524e-06, "loss": 17.6654, "step": 28727 }, { "epoch": 0.5251247555157476, "grad_norm": 5.079446359635824, "learning_rate": 4.836316369317158e-06, "loss": 16.9357, "step": 28728 }, { "epoch": 0.525143034712194, "grad_norm": 7.305594957389146, "learning_rate": 4.8360205167254865e-06, "loss": 17.7642, "step": 28729 }, { "epoch": 0.5251613139086406, "grad_norm": 7.259000192502282, "learning_rate": 4.83572466470855e-06, "loss": 18.3577, "step": 28730 }, { "epoch": 0.5251795931050871, "grad_norm": 5.5184028071688385, "learning_rate": 4.835428813267385e-06, "loss": 17.3325, "step": 28731 }, { "epoch": 0.5251978723015336, "grad_norm": 5.251727641357589, "learning_rate": 4.8351329624030255e-06, "loss": 16.9968, "step": 28732 }, { "epoch": 0.5252161514979802, "grad_norm": 5.448063703813754, "learning_rate": 4.834837112116514e-06, "loss": 17.137, "step": 28733 }, { "epoch": 0.5252344306944267, "grad_norm": 5.831568472431994, "learning_rate": 4.8345412624088814e-06, "loss": 17.2769, "step": 28734 }, { "epoch": 0.5252527098908732, "grad_norm": 4.8287999413733305, "learning_rate": 4.834245413281167e-06, "loss": 16.7295, "step": 28735 }, { "epoch": 0.5252709890873197, "grad_norm": 5.193900537815957, "learning_rate": 4.83394956473441e-06, "loss": 16.9692, "step": 28736 }, { "epoch": 0.5252892682837662, "grad_norm": 7.100448094310603, "learning_rate": 4.833653716769644e-06, "loss": 17.5512, "step": 28737 }, { "epoch": 0.5253075474802128, "grad_norm": 8.357750688280527, "learning_rate": 4.8333578693879095e-06, "loss": 17.7029, "step": 28738 }, { "epoch": 0.5253258266766593, "grad_norm": 6.653940056829294, "learning_rate": 4.833062022590239e-06, "loss": 17.5639, "step": 28739 }, { "epoch": 0.5253441058731059, "grad_norm": 5.77919097964156, "learning_rate": 4.832766176377671e-06, "loss": 17.1731, "step": 28740 }, { "epoch": 0.5253623850695524, "grad_norm": 6.792309666282276, "learning_rate": 4.832470330751245e-06, "loss": 17.4062, "step": 28741 }, { "epoch": 0.5253806642659988, "grad_norm": 5.637323340332414, "learning_rate": 4.832174485711995e-06, "loss": 17.2939, "step": 28742 }, { "epoch": 0.5253989434624454, "grad_norm": 5.088149111599136, "learning_rate": 4.831878641260959e-06, "loss": 17.0118, "step": 28743 }, { "epoch": 0.5254172226588919, "grad_norm": 6.665107883465812, "learning_rate": 4.831582797399173e-06, "loss": 17.8735, "step": 28744 }, { "epoch": 0.5254355018553385, "grad_norm": 5.934339297536312, "learning_rate": 4.831286954127677e-06, "loss": 17.3122, "step": 28745 }, { "epoch": 0.525453781051785, "grad_norm": 5.522877423594949, "learning_rate": 4.830991111447503e-06, "loss": 16.9918, "step": 28746 }, { "epoch": 0.5254720602482315, "grad_norm": 6.352460929737988, "learning_rate": 4.8306952693596916e-06, "loss": 17.5666, "step": 28747 }, { "epoch": 0.525490339444678, "grad_norm": 6.520708069835449, "learning_rate": 4.83039942786528e-06, "loss": 17.8456, "step": 28748 }, { "epoch": 0.5255086186411245, "grad_norm": 6.266852236870579, "learning_rate": 4.8301035869653006e-06, "loss": 17.4629, "step": 28749 }, { "epoch": 0.525526897837571, "grad_norm": 5.3717780443408, "learning_rate": 4.8298077466607965e-06, "loss": 17.0446, "step": 28750 }, { "epoch": 0.5255451770340176, "grad_norm": 8.064369400960787, "learning_rate": 4.8295119069527984e-06, "loss": 18.0764, "step": 28751 }, { "epoch": 0.5255634562304641, "grad_norm": 5.994749865910325, "learning_rate": 4.829216067842347e-06, "loss": 17.3672, "step": 28752 }, { "epoch": 0.5255817354269107, "grad_norm": 5.7969456856365715, "learning_rate": 4.828920229330482e-06, "loss": 17.4399, "step": 28753 }, { "epoch": 0.5256000146233571, "grad_norm": 7.31493624844024, "learning_rate": 4.8286243914182326e-06, "loss": 18.06, "step": 28754 }, { "epoch": 0.5256182938198036, "grad_norm": 5.753582751787242, "learning_rate": 4.828328554106642e-06, "loss": 17.3053, "step": 28755 }, { "epoch": 0.5256365730162502, "grad_norm": 6.150802565740242, "learning_rate": 4.828032717396744e-06, "loss": 17.4962, "step": 28756 }, { "epoch": 0.5256548522126967, "grad_norm": 6.206272552471612, "learning_rate": 4.827736881289575e-06, "loss": 17.394, "step": 28757 }, { "epoch": 0.5256731314091433, "grad_norm": 6.901267374559965, "learning_rate": 4.8274410457861764e-06, "loss": 17.513, "step": 28758 }, { "epoch": 0.5256914106055898, "grad_norm": 5.536056620931555, "learning_rate": 4.8271452108875815e-06, "loss": 17.0095, "step": 28759 }, { "epoch": 0.5257096898020363, "grad_norm": 4.967540766595819, "learning_rate": 4.826849376594825e-06, "loss": 16.812, "step": 28760 }, { "epoch": 0.5257279689984828, "grad_norm": 5.979739505266952, "learning_rate": 4.826553542908948e-06, "loss": 17.1347, "step": 28761 }, { "epoch": 0.5257462481949293, "grad_norm": 5.699134976416894, "learning_rate": 4.826257709830987e-06, "loss": 17.4132, "step": 28762 }, { "epoch": 0.5257645273913759, "grad_norm": 5.727878449317979, "learning_rate": 4.825961877361975e-06, "loss": 17.2631, "step": 28763 }, { "epoch": 0.5257828065878224, "grad_norm": 7.249990447239445, "learning_rate": 4.825666045502954e-06, "loss": 17.3154, "step": 28764 }, { "epoch": 0.5258010857842689, "grad_norm": 5.949054223126825, "learning_rate": 4.825370214254958e-06, "loss": 17.3387, "step": 28765 }, { "epoch": 0.5258193649807155, "grad_norm": 6.098863907929176, "learning_rate": 4.825074383619022e-06, "loss": 17.4397, "step": 28766 }, { "epoch": 0.525837644177162, "grad_norm": 6.686424863194278, "learning_rate": 4.824778553596188e-06, "loss": 17.3564, "step": 28767 }, { "epoch": 0.5258559233736085, "grad_norm": 5.192021215463849, "learning_rate": 4.824482724187488e-06, "loss": 17.0585, "step": 28768 }, { "epoch": 0.525874202570055, "grad_norm": 5.950977882136102, "learning_rate": 4.8241868953939626e-06, "loss": 17.1144, "step": 28769 }, { "epoch": 0.5258924817665015, "grad_norm": 5.34094818584719, "learning_rate": 4.823891067216645e-06, "loss": 17.1881, "step": 28770 }, { "epoch": 0.5259107609629481, "grad_norm": 6.856814190634476, "learning_rate": 4.823595239656573e-06, "loss": 17.6609, "step": 28771 }, { "epoch": 0.5259290401593946, "grad_norm": 5.639556277583762, "learning_rate": 4.823299412714788e-06, "loss": 17.0494, "step": 28772 }, { "epoch": 0.5259473193558412, "grad_norm": 6.296680540643699, "learning_rate": 4.823003586392322e-06, "loss": 17.4254, "step": 28773 }, { "epoch": 0.5259655985522876, "grad_norm": 6.73354810613954, "learning_rate": 4.822707760690211e-06, "loss": 17.8162, "step": 28774 }, { "epoch": 0.5259838777487341, "grad_norm": 6.5525951676966345, "learning_rate": 4.822411935609496e-06, "loss": 17.1992, "step": 28775 }, { "epoch": 0.5260021569451807, "grad_norm": 8.408037551361863, "learning_rate": 4.822116111151212e-06, "loss": 18.426, "step": 28776 }, { "epoch": 0.5260204361416272, "grad_norm": 6.5854702943143675, "learning_rate": 4.821820287316394e-06, "loss": 17.6198, "step": 28777 }, { "epoch": 0.5260387153380738, "grad_norm": 6.318287058444489, "learning_rate": 4.821524464106082e-06, "loss": 17.3501, "step": 28778 }, { "epoch": 0.5260569945345203, "grad_norm": 5.133348364285311, "learning_rate": 4.8212286415213095e-06, "loss": 17.1092, "step": 28779 }, { "epoch": 0.5260752737309667, "grad_norm": 7.3323036109316515, "learning_rate": 4.820932819563115e-06, "loss": 17.8787, "step": 28780 }, { "epoch": 0.5260935529274133, "grad_norm": 7.388183691942203, "learning_rate": 4.8206369982325375e-06, "loss": 17.8646, "step": 28781 }, { "epoch": 0.5261118321238598, "grad_norm": 5.480323312940068, "learning_rate": 4.820341177530609e-06, "loss": 17.3548, "step": 28782 }, { "epoch": 0.5261301113203064, "grad_norm": 7.074633615873922, "learning_rate": 4.820045357458372e-06, "loss": 17.7369, "step": 28783 }, { "epoch": 0.5261483905167529, "grad_norm": 7.247532050009481, "learning_rate": 4.819749538016859e-06, "loss": 18.0397, "step": 28784 }, { "epoch": 0.5261666697131994, "grad_norm": 6.678261021602981, "learning_rate": 4.819453719207107e-06, "loss": 17.4542, "step": 28785 }, { "epoch": 0.526184948909646, "grad_norm": 5.309826273261194, "learning_rate": 4.819157901030156e-06, "loss": 17.1165, "step": 28786 }, { "epoch": 0.5262032281060924, "grad_norm": 7.008534551252991, "learning_rate": 4.818862083487042e-06, "loss": 18.041, "step": 28787 }, { "epoch": 0.526221507302539, "grad_norm": 5.676848863987564, "learning_rate": 4.818566266578797e-06, "loss": 17.2307, "step": 28788 }, { "epoch": 0.5262397864989855, "grad_norm": 6.501034094267944, "learning_rate": 4.818270450306464e-06, "loss": 17.7014, "step": 28789 }, { "epoch": 0.526258065695432, "grad_norm": 5.961937982620032, "learning_rate": 4.8179746346710775e-06, "loss": 17.4626, "step": 28790 }, { "epoch": 0.5262763448918786, "grad_norm": 5.322464779133777, "learning_rate": 4.817678819673672e-06, "loss": 17.1798, "step": 28791 }, { "epoch": 0.5262946240883251, "grad_norm": 5.726952271737116, "learning_rate": 4.817383005315289e-06, "loss": 17.2633, "step": 28792 }, { "epoch": 0.5263129032847716, "grad_norm": 7.5566700966965765, "learning_rate": 4.8170871915969615e-06, "loss": 18.2115, "step": 28793 }, { "epoch": 0.5263311824812181, "grad_norm": 7.582494015258839, "learning_rate": 4.816791378519726e-06, "loss": 18.143, "step": 28794 }, { "epoch": 0.5263494616776646, "grad_norm": 5.774045152665932, "learning_rate": 4.8164955660846234e-06, "loss": 17.3002, "step": 28795 }, { "epoch": 0.5263677408741112, "grad_norm": 6.097955632273698, "learning_rate": 4.816199754292688e-06, "loss": 17.2827, "step": 28796 }, { "epoch": 0.5263860200705577, "grad_norm": 6.6630289248265795, "learning_rate": 4.815903943144955e-06, "loss": 17.2041, "step": 28797 }, { "epoch": 0.5264042992670043, "grad_norm": 5.932300072459454, "learning_rate": 4.815608132642462e-06, "loss": 17.4194, "step": 28798 }, { "epoch": 0.5264225784634508, "grad_norm": 4.999133806791074, "learning_rate": 4.815312322786247e-06, "loss": 17.0654, "step": 28799 }, { "epoch": 0.5264408576598972, "grad_norm": 6.751328673344176, "learning_rate": 4.8150165135773475e-06, "loss": 17.9876, "step": 28800 }, { "epoch": 0.5264591368563438, "grad_norm": 6.192437654670182, "learning_rate": 4.814720705016799e-06, "loss": 17.6106, "step": 28801 }, { "epoch": 0.5264774160527903, "grad_norm": 7.794274628215912, "learning_rate": 4.814424897105638e-06, "loss": 17.8192, "step": 28802 }, { "epoch": 0.5264956952492369, "grad_norm": 5.323126925697858, "learning_rate": 4.8141290898449e-06, "loss": 17.3072, "step": 28803 }, { "epoch": 0.5265139744456834, "grad_norm": 7.398138475483236, "learning_rate": 4.813833283235626e-06, "loss": 18.1937, "step": 28804 }, { "epoch": 0.5265322536421299, "grad_norm": 6.105252246422766, "learning_rate": 4.8135374772788475e-06, "loss": 17.2926, "step": 28805 }, { "epoch": 0.5265505328385764, "grad_norm": 6.645956131776079, "learning_rate": 4.813241671975607e-06, "loss": 17.6453, "step": 28806 }, { "epoch": 0.5265688120350229, "grad_norm": 5.998042468674564, "learning_rate": 4.812945867326937e-06, "loss": 17.3839, "step": 28807 }, { "epoch": 0.5265870912314695, "grad_norm": 5.410285365863696, "learning_rate": 4.812650063333874e-06, "loss": 17.1879, "step": 28808 }, { "epoch": 0.526605370427916, "grad_norm": 6.562702465446166, "learning_rate": 4.8123542599974584e-06, "loss": 17.4391, "step": 28809 }, { "epoch": 0.5266236496243625, "grad_norm": 7.70544683800876, "learning_rate": 4.812058457318724e-06, "loss": 18.02, "step": 28810 }, { "epoch": 0.5266419288208091, "grad_norm": 6.833309188303262, "learning_rate": 4.811762655298707e-06, "loss": 17.8452, "step": 28811 }, { "epoch": 0.5266602080172555, "grad_norm": 5.996346710112719, "learning_rate": 4.811466853938448e-06, "loss": 17.3942, "step": 28812 }, { "epoch": 0.5266784872137021, "grad_norm": 7.732941784560871, "learning_rate": 4.811171053238978e-06, "loss": 17.989, "step": 28813 }, { "epoch": 0.5266967664101486, "grad_norm": 6.3369818548016354, "learning_rate": 4.8108752532013405e-06, "loss": 17.3566, "step": 28814 }, { "epoch": 0.5267150456065951, "grad_norm": 7.1037375989936455, "learning_rate": 4.810579453826568e-06, "loss": 17.7607, "step": 28815 }, { "epoch": 0.5267333248030417, "grad_norm": 5.608068240597165, "learning_rate": 4.810283655115697e-06, "loss": 17.1247, "step": 28816 }, { "epoch": 0.5267516039994882, "grad_norm": 5.750561928620749, "learning_rate": 4.809987857069766e-06, "loss": 17.0579, "step": 28817 }, { "epoch": 0.5267698831959347, "grad_norm": 7.964696187803064, "learning_rate": 4.809692059689813e-06, "loss": 18.233, "step": 28818 }, { "epoch": 0.5267881623923812, "grad_norm": 5.694170250881407, "learning_rate": 4.809396262976869e-06, "loss": 17.548, "step": 28819 }, { "epoch": 0.5268064415888277, "grad_norm": 5.420672596971406, "learning_rate": 4.809100466931976e-06, "loss": 17.0734, "step": 28820 }, { "epoch": 0.5268247207852743, "grad_norm": 6.091166944828002, "learning_rate": 4.808804671556171e-06, "loss": 17.3475, "step": 28821 }, { "epoch": 0.5268429999817208, "grad_norm": 7.1580787462604, "learning_rate": 4.8085088768504865e-06, "loss": 17.9794, "step": 28822 }, { "epoch": 0.5268612791781673, "grad_norm": 6.468301608303433, "learning_rate": 4.808213082815964e-06, "loss": 17.3555, "step": 28823 }, { "epoch": 0.5268795583746139, "grad_norm": 6.112134604455029, "learning_rate": 4.807917289453637e-06, "loss": 17.3798, "step": 28824 }, { "epoch": 0.5268978375710603, "grad_norm": 5.614336961777815, "learning_rate": 4.807621496764542e-06, "loss": 17.2696, "step": 28825 }, { "epoch": 0.5269161167675069, "grad_norm": 5.986367120200508, "learning_rate": 4.807325704749719e-06, "loss": 17.4735, "step": 28826 }, { "epoch": 0.5269343959639534, "grad_norm": 5.879731673740824, "learning_rate": 4.8070299134102006e-06, "loss": 17.4008, "step": 28827 }, { "epoch": 0.5269526751603999, "grad_norm": 5.533548854367254, "learning_rate": 4.806734122747028e-06, "loss": 17.1912, "step": 28828 }, { "epoch": 0.5269709543568465, "grad_norm": 7.511973988798581, "learning_rate": 4.806438332761234e-06, "loss": 17.5786, "step": 28829 }, { "epoch": 0.526989233553293, "grad_norm": 6.706040907550385, "learning_rate": 4.806142543453857e-06, "loss": 17.6139, "step": 28830 }, { "epoch": 0.5270075127497396, "grad_norm": 7.714620301900427, "learning_rate": 4.805846754825934e-06, "loss": 18.1718, "step": 28831 }, { "epoch": 0.527025791946186, "grad_norm": 6.39458512092261, "learning_rate": 4.805550966878502e-06, "loss": 17.6294, "step": 28832 }, { "epoch": 0.5270440711426325, "grad_norm": 6.84977405622963, "learning_rate": 4.805255179612595e-06, "loss": 17.7036, "step": 28833 }, { "epoch": 0.5270623503390791, "grad_norm": 5.206935118393221, "learning_rate": 4.804959393029253e-06, "loss": 17.0615, "step": 28834 }, { "epoch": 0.5270806295355256, "grad_norm": 6.176374157997757, "learning_rate": 4.804663607129512e-06, "loss": 17.5081, "step": 28835 }, { "epoch": 0.5270989087319722, "grad_norm": 7.04104096253257, "learning_rate": 4.804367821914406e-06, "loss": 17.7588, "step": 28836 }, { "epoch": 0.5271171879284187, "grad_norm": 6.64607566359931, "learning_rate": 4.804072037384976e-06, "loss": 17.6263, "step": 28837 }, { "epoch": 0.5271354671248651, "grad_norm": 6.695391060144659, "learning_rate": 4.803776253542256e-06, "loss": 17.929, "step": 28838 }, { "epoch": 0.5271537463213117, "grad_norm": 5.7556417819184755, "learning_rate": 4.803480470387282e-06, "loss": 17.2578, "step": 28839 }, { "epoch": 0.5271720255177582, "grad_norm": 4.93780507451977, "learning_rate": 4.803184687921093e-06, "loss": 16.9088, "step": 28840 }, { "epoch": 0.5271903047142048, "grad_norm": 6.562483807808549, "learning_rate": 4.802888906144726e-06, "loss": 17.7105, "step": 28841 }, { "epoch": 0.5272085839106513, "grad_norm": 5.767657012647988, "learning_rate": 4.8025931250592135e-06, "loss": 17.2964, "step": 28842 }, { "epoch": 0.5272268631070978, "grad_norm": 5.772017979605431, "learning_rate": 4.802297344665595e-06, "loss": 17.3725, "step": 28843 }, { "epoch": 0.5272451423035444, "grad_norm": 6.119249020066546, "learning_rate": 4.802001564964908e-06, "loss": 17.3731, "step": 28844 }, { "epoch": 0.5272634214999908, "grad_norm": 6.198138569624086, "learning_rate": 4.801705785958189e-06, "loss": 17.6388, "step": 28845 }, { "epoch": 0.5272817006964374, "grad_norm": 7.0646354782869265, "learning_rate": 4.801410007646475e-06, "loss": 17.3207, "step": 28846 }, { "epoch": 0.5272999798928839, "grad_norm": 5.609250364045369, "learning_rate": 4.801114230030799e-06, "loss": 17.3197, "step": 28847 }, { "epoch": 0.5273182590893304, "grad_norm": 5.920393323699768, "learning_rate": 4.800818453112201e-06, "loss": 17.3523, "step": 28848 }, { "epoch": 0.527336538285777, "grad_norm": 6.555831430885845, "learning_rate": 4.800522676891719e-06, "loss": 17.5858, "step": 28849 }, { "epoch": 0.5273548174822235, "grad_norm": 6.300141261394385, "learning_rate": 4.800226901370385e-06, "loss": 17.531, "step": 28850 }, { "epoch": 0.52737309667867, "grad_norm": 6.621617771442162, "learning_rate": 4.799931126549241e-06, "loss": 17.3024, "step": 28851 }, { "epoch": 0.5273913758751165, "grad_norm": 6.508352025331976, "learning_rate": 4.79963535242932e-06, "loss": 17.7978, "step": 28852 }, { "epoch": 0.527409655071563, "grad_norm": 6.263305196475308, "learning_rate": 4.799339579011658e-06, "loss": 17.3442, "step": 28853 }, { "epoch": 0.5274279342680096, "grad_norm": 5.870789556753149, "learning_rate": 4.799043806297296e-06, "loss": 17.2287, "step": 28854 }, { "epoch": 0.5274462134644561, "grad_norm": 7.02265397337617, "learning_rate": 4.798748034287268e-06, "loss": 17.6731, "step": 28855 }, { "epoch": 0.5274644926609027, "grad_norm": 5.772823389684692, "learning_rate": 4.798452262982608e-06, "loss": 16.9305, "step": 28856 }, { "epoch": 0.5274827718573492, "grad_norm": 5.5524350645130145, "learning_rate": 4.7981564923843575e-06, "loss": 17.316, "step": 28857 }, { "epoch": 0.5275010510537956, "grad_norm": 5.601942665543992, "learning_rate": 4.797860722493549e-06, "loss": 17.0819, "step": 28858 }, { "epoch": 0.5275193302502422, "grad_norm": 6.174825587999008, "learning_rate": 4.797564953311223e-06, "loss": 17.2222, "step": 28859 }, { "epoch": 0.5275376094466887, "grad_norm": 7.139356368902403, "learning_rate": 4.797269184838415e-06, "loss": 17.5805, "step": 28860 }, { "epoch": 0.5275558886431353, "grad_norm": 7.096687345433783, "learning_rate": 4.796973417076158e-06, "loss": 17.9524, "step": 28861 }, { "epoch": 0.5275741678395818, "grad_norm": 6.824117822901115, "learning_rate": 4.796677650025493e-06, "loss": 17.9426, "step": 28862 }, { "epoch": 0.5275924470360283, "grad_norm": 4.971978942798934, "learning_rate": 4.796381883687457e-06, "loss": 17.0492, "step": 28863 }, { "epoch": 0.5276107262324748, "grad_norm": 7.267242743867442, "learning_rate": 4.7960861180630815e-06, "loss": 17.8284, "step": 28864 }, { "epoch": 0.5276290054289213, "grad_norm": 7.140995282182595, "learning_rate": 4.7957903531534095e-06, "loss": 17.4981, "step": 28865 }, { "epoch": 0.5276472846253679, "grad_norm": 6.044278588423279, "learning_rate": 4.7954945889594735e-06, "loss": 17.3575, "step": 28866 }, { "epoch": 0.5276655638218144, "grad_norm": 6.165296503275143, "learning_rate": 4.79519882548231e-06, "loss": 17.7388, "step": 28867 }, { "epoch": 0.5276838430182609, "grad_norm": 7.251876738000928, "learning_rate": 4.794903062722959e-06, "loss": 17.8625, "step": 28868 }, { "epoch": 0.5277021222147075, "grad_norm": 5.316511572719626, "learning_rate": 4.794607300682453e-06, "loss": 17.2733, "step": 28869 }, { "epoch": 0.527720401411154, "grad_norm": 4.96127738508365, "learning_rate": 4.794311539361832e-06, "loss": 16.9464, "step": 28870 }, { "epoch": 0.5277386806076005, "grad_norm": 6.128694443346111, "learning_rate": 4.79401577876213e-06, "loss": 17.5274, "step": 28871 }, { "epoch": 0.527756959804047, "grad_norm": 6.106806971745361, "learning_rate": 4.793720018884387e-06, "loss": 17.3225, "step": 28872 }, { "epoch": 0.5277752390004935, "grad_norm": 7.468414650130117, "learning_rate": 4.793424259729634e-06, "loss": 17.8283, "step": 28873 }, { "epoch": 0.5277935181969401, "grad_norm": 7.132019563148453, "learning_rate": 4.7931285012989135e-06, "loss": 17.6442, "step": 28874 }, { "epoch": 0.5278117973933866, "grad_norm": 5.131383202438949, "learning_rate": 4.7928327435932584e-06, "loss": 17.0155, "step": 28875 }, { "epoch": 0.5278300765898332, "grad_norm": 5.292756276479835, "learning_rate": 4.792536986613707e-06, "loss": 17.2849, "step": 28876 }, { "epoch": 0.5278483557862796, "grad_norm": 6.986892692955527, "learning_rate": 4.792241230361297e-06, "loss": 17.7004, "step": 28877 }, { "epoch": 0.5278666349827261, "grad_norm": 6.412828363572758, "learning_rate": 4.791945474837061e-06, "loss": 17.4445, "step": 28878 }, { "epoch": 0.5278849141791727, "grad_norm": 7.324417043389137, "learning_rate": 4.791649720042039e-06, "loss": 17.9936, "step": 28879 }, { "epoch": 0.5279031933756192, "grad_norm": 5.866238112140212, "learning_rate": 4.791353965977268e-06, "loss": 17.3683, "step": 28880 }, { "epoch": 0.5279214725720658, "grad_norm": 6.977715905323436, "learning_rate": 4.791058212643781e-06, "loss": 17.33, "step": 28881 }, { "epoch": 0.5279397517685123, "grad_norm": 7.285617705414452, "learning_rate": 4.790762460042619e-06, "loss": 18.2784, "step": 28882 }, { "epoch": 0.5279580309649587, "grad_norm": 6.124274183913281, "learning_rate": 4.790466708174815e-06, "loss": 17.406, "step": 28883 }, { "epoch": 0.5279763101614053, "grad_norm": 5.342439893740732, "learning_rate": 4.790170957041406e-06, "loss": 16.9759, "step": 28884 }, { "epoch": 0.5279945893578518, "grad_norm": 6.44921547563559, "learning_rate": 4.789875206643432e-06, "loss": 17.5331, "step": 28885 }, { "epoch": 0.5280128685542983, "grad_norm": 6.408641977064421, "learning_rate": 4.789579456981927e-06, "loss": 17.7002, "step": 28886 }, { "epoch": 0.5280311477507449, "grad_norm": 7.119874164463317, "learning_rate": 4.789283708057926e-06, "loss": 18.0914, "step": 28887 }, { "epoch": 0.5280494269471914, "grad_norm": 6.315073171129628, "learning_rate": 4.788987959872468e-06, "loss": 17.3811, "step": 28888 }, { "epoch": 0.528067706143638, "grad_norm": 5.316694993406047, "learning_rate": 4.7886922124265875e-06, "loss": 17.1931, "step": 28889 }, { "epoch": 0.5280859853400844, "grad_norm": 6.151973430956692, "learning_rate": 4.788396465721326e-06, "loss": 17.3438, "step": 28890 }, { "epoch": 0.5281042645365309, "grad_norm": 5.531067691821519, "learning_rate": 4.788100719757715e-06, "loss": 17.2357, "step": 28891 }, { "epoch": 0.5281225437329775, "grad_norm": 8.132472525597233, "learning_rate": 4.787804974536791e-06, "loss": 18.3136, "step": 28892 }, { "epoch": 0.528140822929424, "grad_norm": 6.2304121069382274, "learning_rate": 4.787509230059593e-06, "loss": 17.4463, "step": 28893 }, { "epoch": 0.5281591021258706, "grad_norm": 5.938465286065155, "learning_rate": 4.787213486327158e-06, "loss": 17.3088, "step": 28894 }, { "epoch": 0.5281773813223171, "grad_norm": 6.935893332228987, "learning_rate": 4.786917743340519e-06, "loss": 17.7446, "step": 28895 }, { "epoch": 0.5281956605187635, "grad_norm": 7.08577763734086, "learning_rate": 4.786622001100718e-06, "loss": 17.3099, "step": 28896 }, { "epoch": 0.5282139397152101, "grad_norm": 7.63743022798368, "learning_rate": 4.786326259608785e-06, "loss": 17.802, "step": 28897 }, { "epoch": 0.5282322189116566, "grad_norm": 6.003127141033391, "learning_rate": 4.78603051886576e-06, "loss": 17.5072, "step": 28898 }, { "epoch": 0.5282504981081032, "grad_norm": 6.017089453144707, "learning_rate": 4.785734778872682e-06, "loss": 17.4595, "step": 28899 }, { "epoch": 0.5282687773045497, "grad_norm": 5.361848091579653, "learning_rate": 4.785439039630585e-06, "loss": 17.0804, "step": 28900 }, { "epoch": 0.5282870565009962, "grad_norm": 6.2754717078019375, "learning_rate": 4.785143301140504e-06, "loss": 17.4889, "step": 28901 }, { "epoch": 0.5283053356974428, "grad_norm": 6.365149984308117, "learning_rate": 4.784847563403477e-06, "loss": 17.3507, "step": 28902 }, { "epoch": 0.5283236148938892, "grad_norm": 6.818204922468601, "learning_rate": 4.784551826420542e-06, "loss": 17.5514, "step": 28903 }, { "epoch": 0.5283418940903358, "grad_norm": 5.779721557854343, "learning_rate": 4.784256090192732e-06, "loss": 17.3461, "step": 28904 }, { "epoch": 0.5283601732867823, "grad_norm": 6.774582861024068, "learning_rate": 4.783960354721089e-06, "loss": 17.8016, "step": 28905 }, { "epoch": 0.5283784524832288, "grad_norm": 6.594676569936102, "learning_rate": 4.783664620006642e-06, "loss": 17.6608, "step": 28906 }, { "epoch": 0.5283967316796754, "grad_norm": 6.839665753232639, "learning_rate": 4.783368886050434e-06, "loss": 17.4051, "step": 28907 }, { "epoch": 0.5284150108761219, "grad_norm": 5.488721248285436, "learning_rate": 4.7830731528535e-06, "loss": 17.0675, "step": 28908 }, { "epoch": 0.5284332900725685, "grad_norm": 7.588622775282102, "learning_rate": 4.782777420416874e-06, "loss": 17.9841, "step": 28909 }, { "epoch": 0.5284515692690149, "grad_norm": 5.7470798935263385, "learning_rate": 4.782481688741596e-06, "loss": 17.0947, "step": 28910 }, { "epoch": 0.5284698484654614, "grad_norm": 5.238533635509501, "learning_rate": 4.7821859578287e-06, "loss": 17.1855, "step": 28911 }, { "epoch": 0.528488127661908, "grad_norm": 5.2659604937854985, "learning_rate": 4.781890227679222e-06, "loss": 17.0248, "step": 28912 }, { "epoch": 0.5285064068583545, "grad_norm": 7.881360674751614, "learning_rate": 4.781594498294202e-06, "loss": 17.9199, "step": 28913 }, { "epoch": 0.5285246860548011, "grad_norm": 7.4174814585036595, "learning_rate": 4.781298769674675e-06, "loss": 18.175, "step": 28914 }, { "epoch": 0.5285429652512476, "grad_norm": 7.3494059635963, "learning_rate": 4.7810030418216744e-06, "loss": 17.924, "step": 28915 }, { "epoch": 0.528561244447694, "grad_norm": 5.8011214444685555, "learning_rate": 4.780707314736239e-06, "loss": 17.2422, "step": 28916 }, { "epoch": 0.5285795236441406, "grad_norm": 5.654016475660515, "learning_rate": 4.780411588419408e-06, "loss": 17.0475, "step": 28917 }, { "epoch": 0.5285978028405871, "grad_norm": 5.827169075505775, "learning_rate": 4.780115862872213e-06, "loss": 17.4755, "step": 28918 }, { "epoch": 0.5286160820370337, "grad_norm": 6.205762913996296, "learning_rate": 4.779820138095694e-06, "loss": 17.5518, "step": 28919 }, { "epoch": 0.5286343612334802, "grad_norm": 6.311415581467795, "learning_rate": 4.7795244140908845e-06, "loss": 17.6736, "step": 28920 }, { "epoch": 0.5286526404299267, "grad_norm": 6.67700343394037, "learning_rate": 4.779228690858825e-06, "loss": 17.8366, "step": 28921 }, { "epoch": 0.5286709196263732, "grad_norm": 4.573150850930606, "learning_rate": 4.7789329684005494e-06, "loss": 16.7168, "step": 28922 }, { "epoch": 0.5286891988228197, "grad_norm": 5.597287935428627, "learning_rate": 4.778637246717093e-06, "loss": 17.0395, "step": 28923 }, { "epoch": 0.5287074780192663, "grad_norm": 5.664359525650026, "learning_rate": 4.778341525809496e-06, "loss": 17.1411, "step": 28924 }, { "epoch": 0.5287257572157128, "grad_norm": 5.99321077912104, "learning_rate": 4.778045805678792e-06, "loss": 17.6294, "step": 28925 }, { "epoch": 0.5287440364121593, "grad_norm": 6.917453724948669, "learning_rate": 4.777750086326017e-06, "loss": 17.4843, "step": 28926 }, { "epoch": 0.5287623156086059, "grad_norm": 6.605667314495165, "learning_rate": 4.77745436775221e-06, "loss": 17.5499, "step": 28927 }, { "epoch": 0.5287805948050524, "grad_norm": 5.684851313266677, "learning_rate": 4.777158649958407e-06, "loss": 17.1861, "step": 28928 }, { "epoch": 0.5287988740014989, "grad_norm": 5.556901423251663, "learning_rate": 4.776862932945641e-06, "loss": 17.0909, "step": 28929 }, { "epoch": 0.5288171531979454, "grad_norm": 7.655543176993401, "learning_rate": 4.776567216714952e-06, "loss": 17.8859, "step": 28930 }, { "epoch": 0.5288354323943919, "grad_norm": 6.340572271328863, "learning_rate": 4.776271501267377e-06, "loss": 17.4977, "step": 28931 }, { "epoch": 0.5288537115908385, "grad_norm": 6.857807800546154, "learning_rate": 4.775975786603949e-06, "loss": 17.8677, "step": 28932 }, { "epoch": 0.528871990787285, "grad_norm": 6.873222102734115, "learning_rate": 4.775680072725708e-06, "loss": 17.9672, "step": 28933 }, { "epoch": 0.5288902699837316, "grad_norm": 5.399505905359774, "learning_rate": 4.775384359633688e-06, "loss": 17.1648, "step": 28934 }, { "epoch": 0.528908549180178, "grad_norm": 7.014687734891507, "learning_rate": 4.775088647328925e-06, "loss": 17.6098, "step": 28935 }, { "epoch": 0.5289268283766245, "grad_norm": 6.4533415281168285, "learning_rate": 4.7747929358124595e-06, "loss": 17.6374, "step": 28936 }, { "epoch": 0.5289451075730711, "grad_norm": 5.176733574346096, "learning_rate": 4.774497225085323e-06, "loss": 17.2231, "step": 28937 }, { "epoch": 0.5289633867695176, "grad_norm": 5.686831397646956, "learning_rate": 4.774201515148556e-06, "loss": 17.0373, "step": 28938 }, { "epoch": 0.5289816659659642, "grad_norm": 6.436715675593961, "learning_rate": 4.773905806003193e-06, "loss": 17.3613, "step": 28939 }, { "epoch": 0.5289999451624107, "grad_norm": 5.688844862169735, "learning_rate": 4.773610097650268e-06, "loss": 17.2999, "step": 28940 }, { "epoch": 0.5290182243588571, "grad_norm": 6.76112525919375, "learning_rate": 4.773314390090823e-06, "loss": 17.5339, "step": 28941 }, { "epoch": 0.5290365035553037, "grad_norm": 6.521896327749828, "learning_rate": 4.77301868332589e-06, "loss": 17.6273, "step": 28942 }, { "epoch": 0.5290547827517502, "grad_norm": 6.340256283989966, "learning_rate": 4.772722977356507e-06, "loss": 17.3061, "step": 28943 }, { "epoch": 0.5290730619481968, "grad_norm": 7.579506806029583, "learning_rate": 4.77242727218371e-06, "loss": 18.4786, "step": 28944 }, { "epoch": 0.5290913411446433, "grad_norm": 5.748663241019761, "learning_rate": 4.7721315678085364e-06, "loss": 17.0845, "step": 28945 }, { "epoch": 0.5291096203410898, "grad_norm": 7.145918408295226, "learning_rate": 4.77183586423202e-06, "loss": 17.7638, "step": 28946 }, { "epoch": 0.5291278995375364, "grad_norm": 7.635723725813082, "learning_rate": 4.7715401614552e-06, "loss": 17.8344, "step": 28947 }, { "epoch": 0.5291461787339828, "grad_norm": 7.6730095577825885, "learning_rate": 4.771244459479114e-06, "loss": 17.984, "step": 28948 }, { "epoch": 0.5291644579304294, "grad_norm": 6.272690059928203, "learning_rate": 4.770948758304793e-06, "loss": 17.4534, "step": 28949 }, { "epoch": 0.5291827371268759, "grad_norm": 6.233938132678369, "learning_rate": 4.77065305793328e-06, "loss": 17.4707, "step": 28950 }, { "epoch": 0.5292010163233224, "grad_norm": 6.405967934080573, "learning_rate": 4.770357358365605e-06, "loss": 17.6681, "step": 28951 }, { "epoch": 0.529219295519769, "grad_norm": 6.115772521647002, "learning_rate": 4.770061659602809e-06, "loss": 17.5208, "step": 28952 }, { "epoch": 0.5292375747162155, "grad_norm": 9.511511786835976, "learning_rate": 4.769765961645928e-06, "loss": 18.0441, "step": 28953 }, { "epoch": 0.5292558539126619, "grad_norm": 7.4884185891622845, "learning_rate": 4.769470264495995e-06, "loss": 17.8858, "step": 28954 }, { "epoch": 0.5292741331091085, "grad_norm": 6.190588943265005, "learning_rate": 4.769174568154052e-06, "loss": 17.4059, "step": 28955 }, { "epoch": 0.529292412305555, "grad_norm": 5.628795109903128, "learning_rate": 4.768878872621129e-06, "loss": 17.2416, "step": 28956 }, { "epoch": 0.5293106915020016, "grad_norm": 5.738085228999053, "learning_rate": 4.7685831778982656e-06, "loss": 17.4048, "step": 28957 }, { "epoch": 0.5293289706984481, "grad_norm": 6.150061708058466, "learning_rate": 4.7682874839865005e-06, "loss": 17.4161, "step": 28958 }, { "epoch": 0.5293472498948946, "grad_norm": 6.365318945287678, "learning_rate": 4.767991790886866e-06, "loss": 17.4476, "step": 28959 }, { "epoch": 0.5293655290913412, "grad_norm": 6.330916463834369, "learning_rate": 4.7676960986004e-06, "loss": 17.4031, "step": 28960 }, { "epoch": 0.5293838082877876, "grad_norm": 5.79164312744024, "learning_rate": 4.76740040712814e-06, "loss": 17.3763, "step": 28961 }, { "epoch": 0.5294020874842342, "grad_norm": 6.62474071391404, "learning_rate": 4.767104716471122e-06, "loss": 17.5816, "step": 28962 }, { "epoch": 0.5294203666806807, "grad_norm": 6.355841527629139, "learning_rate": 4.766809026630378e-06, "loss": 17.5715, "step": 28963 }, { "epoch": 0.5294386458771272, "grad_norm": 5.728055133066767, "learning_rate": 4.766513337606952e-06, "loss": 17.4855, "step": 28964 }, { "epoch": 0.5294569250735738, "grad_norm": 6.528204563701519, "learning_rate": 4.766217649401875e-06, "loss": 17.4504, "step": 28965 }, { "epoch": 0.5294752042700203, "grad_norm": 5.622772939513933, "learning_rate": 4.7659219620161845e-06, "loss": 17.205, "step": 28966 }, { "epoch": 0.5294934834664669, "grad_norm": 5.9634311597967535, "learning_rate": 4.765626275450918e-06, "loss": 17.397, "step": 28967 }, { "epoch": 0.5295117626629133, "grad_norm": 6.9254273620727655, "learning_rate": 4.76533058970711e-06, "loss": 17.3841, "step": 28968 }, { "epoch": 0.5295300418593598, "grad_norm": 5.182212194444598, "learning_rate": 4.7650349047858e-06, "loss": 16.9626, "step": 28969 }, { "epoch": 0.5295483210558064, "grad_norm": 5.5897143284923665, "learning_rate": 4.76473922068802e-06, "loss": 17.1623, "step": 28970 }, { "epoch": 0.5295666002522529, "grad_norm": 6.547672645916321, "learning_rate": 4.764443537414809e-06, "loss": 17.294, "step": 28971 }, { "epoch": 0.5295848794486995, "grad_norm": 6.335219448620369, "learning_rate": 4.764147854967205e-06, "loss": 17.1706, "step": 28972 }, { "epoch": 0.529603158645146, "grad_norm": 6.512345272851514, "learning_rate": 4.763852173346242e-06, "loss": 17.5477, "step": 28973 }, { "epoch": 0.5296214378415924, "grad_norm": 7.126635933444644, "learning_rate": 4.763556492552954e-06, "loss": 17.4364, "step": 28974 }, { "epoch": 0.529639717038039, "grad_norm": 6.699586223759229, "learning_rate": 4.763260812588381e-06, "loss": 17.5306, "step": 28975 }, { "epoch": 0.5296579962344855, "grad_norm": 6.2770136583172516, "learning_rate": 4.76296513345356e-06, "loss": 17.2066, "step": 28976 }, { "epoch": 0.5296762754309321, "grad_norm": 6.664630342097273, "learning_rate": 4.762669455149523e-06, "loss": 17.6957, "step": 28977 }, { "epoch": 0.5296945546273786, "grad_norm": 6.334087876642642, "learning_rate": 4.7623737776773125e-06, "loss": 17.3825, "step": 28978 }, { "epoch": 0.5297128338238251, "grad_norm": 7.664843603961732, "learning_rate": 4.762078101037959e-06, "loss": 17.9372, "step": 28979 }, { "epoch": 0.5297311130202716, "grad_norm": 6.2353192056173405, "learning_rate": 4.7617824252324995e-06, "loss": 17.274, "step": 28980 }, { "epoch": 0.5297493922167181, "grad_norm": 9.97945252779588, "learning_rate": 4.761486750261975e-06, "loss": 19.1664, "step": 28981 }, { "epoch": 0.5297676714131647, "grad_norm": 7.069265974511238, "learning_rate": 4.761191076127416e-06, "loss": 17.753, "step": 28982 }, { "epoch": 0.5297859506096112, "grad_norm": 6.040235588954766, "learning_rate": 4.760895402829864e-06, "loss": 17.5188, "step": 28983 }, { "epoch": 0.5298042298060577, "grad_norm": 7.13098589882815, "learning_rate": 4.760599730370352e-06, "loss": 17.8517, "step": 28984 }, { "epoch": 0.5298225090025043, "grad_norm": 6.561225109909866, "learning_rate": 4.7603040587499165e-06, "loss": 17.468, "step": 28985 }, { "epoch": 0.5298407881989508, "grad_norm": 6.197798029895168, "learning_rate": 4.7600083879695954e-06, "loss": 17.4974, "step": 28986 }, { "epoch": 0.5298590673953973, "grad_norm": 6.746513134685152, "learning_rate": 4.759712718030425e-06, "loss": 17.6516, "step": 28987 }, { "epoch": 0.5298773465918438, "grad_norm": 8.384064048282763, "learning_rate": 4.759417048933438e-06, "loss": 18.0163, "step": 28988 }, { "epoch": 0.5298956257882903, "grad_norm": 7.545582877617902, "learning_rate": 4.759121380679674e-06, "loss": 17.6561, "step": 28989 }, { "epoch": 0.5299139049847369, "grad_norm": 7.380961876726133, "learning_rate": 4.758825713270171e-06, "loss": 17.7723, "step": 28990 }, { "epoch": 0.5299321841811834, "grad_norm": 7.403514377235956, "learning_rate": 4.758530046705961e-06, "loss": 17.621, "step": 28991 }, { "epoch": 0.52995046337763, "grad_norm": 7.6176915257901765, "learning_rate": 4.758234380988083e-06, "loss": 17.9377, "step": 28992 }, { "epoch": 0.5299687425740764, "grad_norm": 5.851218543396473, "learning_rate": 4.757938716117572e-06, "loss": 17.5295, "step": 28993 }, { "epoch": 0.5299870217705229, "grad_norm": 5.357925665484355, "learning_rate": 4.757643052095464e-06, "loss": 17.1055, "step": 28994 }, { "epoch": 0.5300053009669695, "grad_norm": 6.815637251884852, "learning_rate": 4.757347388922797e-06, "loss": 17.5178, "step": 28995 }, { "epoch": 0.530023580163416, "grad_norm": 5.519619079182495, "learning_rate": 4.757051726600606e-06, "loss": 17.2082, "step": 28996 }, { "epoch": 0.5300418593598626, "grad_norm": 6.284457709423418, "learning_rate": 4.756756065129929e-06, "loss": 17.5382, "step": 28997 }, { "epoch": 0.5300601385563091, "grad_norm": 6.20993452893053, "learning_rate": 4.756460404511799e-06, "loss": 17.1413, "step": 28998 }, { "epoch": 0.5300784177527555, "grad_norm": 6.393283012192629, "learning_rate": 4.7561647447472545e-06, "loss": 17.4477, "step": 28999 }, { "epoch": 0.5300966969492021, "grad_norm": 5.622959309911061, "learning_rate": 4.755869085837333e-06, "loss": 17.1082, "step": 29000 }, { "epoch": 0.5301149761456486, "grad_norm": 5.020867332350183, "learning_rate": 4.755573427783068e-06, "loss": 17.0118, "step": 29001 }, { "epoch": 0.5301332553420952, "grad_norm": 5.03247731379233, "learning_rate": 4.755277770585496e-06, "loss": 16.8708, "step": 29002 }, { "epoch": 0.5301515345385417, "grad_norm": 7.435931180725855, "learning_rate": 4.754982114245655e-06, "loss": 17.8667, "step": 29003 }, { "epoch": 0.5301698137349882, "grad_norm": 6.99032306287987, "learning_rate": 4.754686458764582e-06, "loss": 17.8947, "step": 29004 }, { "epoch": 0.5301880929314348, "grad_norm": 7.200105281796414, "learning_rate": 4.754390804143309e-06, "loss": 18.0311, "step": 29005 }, { "epoch": 0.5302063721278812, "grad_norm": 4.986183366678657, "learning_rate": 4.754095150382876e-06, "loss": 16.965, "step": 29006 }, { "epoch": 0.5302246513243278, "grad_norm": 6.233305622340141, "learning_rate": 4.753799497484319e-06, "loss": 17.4663, "step": 29007 }, { "epoch": 0.5302429305207743, "grad_norm": 5.673532705218475, "learning_rate": 4.753503845448672e-06, "loss": 17.0611, "step": 29008 }, { "epoch": 0.5302612097172208, "grad_norm": 5.690178376425513, "learning_rate": 4.753208194276974e-06, "loss": 17.1658, "step": 29009 }, { "epoch": 0.5302794889136674, "grad_norm": 6.673679623648136, "learning_rate": 4.7529125439702594e-06, "loss": 17.9059, "step": 29010 }, { "epoch": 0.5302977681101139, "grad_norm": 6.457653997418939, "learning_rate": 4.752616894529564e-06, "loss": 17.2834, "step": 29011 }, { "epoch": 0.5303160473065605, "grad_norm": 5.788563805288343, "learning_rate": 4.752321245955927e-06, "loss": 17.2688, "step": 29012 }, { "epoch": 0.5303343265030069, "grad_norm": 7.02269511892852, "learning_rate": 4.752025598250379e-06, "loss": 17.782, "step": 29013 }, { "epoch": 0.5303526056994534, "grad_norm": 6.536583697592861, "learning_rate": 4.751729951413963e-06, "loss": 17.5823, "step": 29014 }, { "epoch": 0.5303708848959, "grad_norm": 6.175956006660992, "learning_rate": 4.7514343054477105e-06, "loss": 17.1392, "step": 29015 }, { "epoch": 0.5303891640923465, "grad_norm": 6.52542404024492, "learning_rate": 4.751138660352659e-06, "loss": 17.2988, "step": 29016 }, { "epoch": 0.5304074432887931, "grad_norm": 5.389273970877506, "learning_rate": 4.750843016129846e-06, "loss": 17.0757, "step": 29017 }, { "epoch": 0.5304257224852396, "grad_norm": 6.227759632651717, "learning_rate": 4.750547372780308e-06, "loss": 17.5329, "step": 29018 }, { "epoch": 0.530444001681686, "grad_norm": 7.032243624524673, "learning_rate": 4.750251730305077e-06, "loss": 18.0772, "step": 29019 }, { "epoch": 0.5304622808781326, "grad_norm": 8.65552082768172, "learning_rate": 4.749956088705192e-06, "loss": 17.8541, "step": 29020 }, { "epoch": 0.5304805600745791, "grad_norm": 7.130319468662974, "learning_rate": 4.749660447981691e-06, "loss": 17.8018, "step": 29021 }, { "epoch": 0.5304988392710256, "grad_norm": 7.080552813274937, "learning_rate": 4.749364808135607e-06, "loss": 17.7129, "step": 29022 }, { "epoch": 0.5305171184674722, "grad_norm": 5.794984351054141, "learning_rate": 4.749069169167979e-06, "loss": 17.3477, "step": 29023 }, { "epoch": 0.5305353976639187, "grad_norm": 5.624996959233557, "learning_rate": 4.7487735310798405e-06, "loss": 17.1875, "step": 29024 }, { "epoch": 0.5305536768603653, "grad_norm": 5.469560987892032, "learning_rate": 4.7484778938722285e-06, "loss": 17.2285, "step": 29025 }, { "epoch": 0.5305719560568117, "grad_norm": 5.890981220322474, "learning_rate": 4.748182257546181e-06, "loss": 16.9253, "step": 29026 }, { "epoch": 0.5305902352532582, "grad_norm": 8.778371523133774, "learning_rate": 4.747886622102731e-06, "loss": 18.3411, "step": 29027 }, { "epoch": 0.5306085144497048, "grad_norm": 5.650137184782942, "learning_rate": 4.747590987542919e-06, "loss": 17.1306, "step": 29028 }, { "epoch": 0.5306267936461513, "grad_norm": 6.961102745151707, "learning_rate": 4.747295353867778e-06, "loss": 18.2216, "step": 29029 }, { "epoch": 0.5306450728425979, "grad_norm": 6.470423668539671, "learning_rate": 4.7469997210783435e-06, "loss": 17.6332, "step": 29030 }, { "epoch": 0.5306633520390444, "grad_norm": 7.04058675641339, "learning_rate": 4.746704089175655e-06, "loss": 17.6692, "step": 29031 }, { "epoch": 0.5306816312354908, "grad_norm": 7.120824994109516, "learning_rate": 4.7464084581607465e-06, "loss": 17.4945, "step": 29032 }, { "epoch": 0.5306999104319374, "grad_norm": 7.813392016753592, "learning_rate": 4.746112828034653e-06, "loss": 17.6192, "step": 29033 }, { "epoch": 0.5307181896283839, "grad_norm": 6.513456589941889, "learning_rate": 4.745817198798412e-06, "loss": 17.3292, "step": 29034 }, { "epoch": 0.5307364688248305, "grad_norm": 6.6167338676562135, "learning_rate": 4.745521570453061e-06, "loss": 17.394, "step": 29035 }, { "epoch": 0.530754748021277, "grad_norm": 8.437965719621715, "learning_rate": 4.745225942999633e-06, "loss": 18.5355, "step": 29036 }, { "epoch": 0.5307730272177235, "grad_norm": 8.917662320778536, "learning_rate": 4.744930316439168e-06, "loss": 18.2374, "step": 29037 }, { "epoch": 0.53079130641417, "grad_norm": 6.990175887722325, "learning_rate": 4.744634690772699e-06, "loss": 17.5489, "step": 29038 }, { "epoch": 0.5308095856106165, "grad_norm": 6.7485057341990675, "learning_rate": 4.744339066001262e-06, "loss": 17.3745, "step": 29039 }, { "epoch": 0.5308278648070631, "grad_norm": 7.143291266208228, "learning_rate": 4.744043442125897e-06, "loss": 17.8604, "step": 29040 }, { "epoch": 0.5308461440035096, "grad_norm": 6.971640177308801, "learning_rate": 4.743747819147637e-06, "loss": 17.62, "step": 29041 }, { "epoch": 0.5308644231999561, "grad_norm": 7.976051122820207, "learning_rate": 4.743452197067516e-06, "loss": 17.6011, "step": 29042 }, { "epoch": 0.5308827023964027, "grad_norm": 6.391408805176334, "learning_rate": 4.743156575886575e-06, "loss": 17.0996, "step": 29043 }, { "epoch": 0.5309009815928492, "grad_norm": 4.956450882770411, "learning_rate": 4.742860955605846e-06, "loss": 16.9487, "step": 29044 }, { "epoch": 0.5309192607892957, "grad_norm": 7.895859674510243, "learning_rate": 4.74256533622637e-06, "loss": 17.883, "step": 29045 }, { "epoch": 0.5309375399857422, "grad_norm": 7.879627065172488, "learning_rate": 4.74226971774918e-06, "loss": 17.886, "step": 29046 }, { "epoch": 0.5309558191821887, "grad_norm": 7.166142253316326, "learning_rate": 4.74197410017531e-06, "loss": 17.5091, "step": 29047 }, { "epoch": 0.5309740983786353, "grad_norm": 6.595705922538593, "learning_rate": 4.741678483505799e-06, "loss": 17.5326, "step": 29048 }, { "epoch": 0.5309923775750818, "grad_norm": 7.3835527577926845, "learning_rate": 4.741382867741684e-06, "loss": 18.1946, "step": 29049 }, { "epoch": 0.5310106567715284, "grad_norm": 6.338145646823406, "learning_rate": 4.741087252883998e-06, "loss": 17.4536, "step": 29050 }, { "epoch": 0.5310289359679748, "grad_norm": 11.185395669690315, "learning_rate": 4.74079163893378e-06, "loss": 17.9135, "step": 29051 }, { "epoch": 0.5310472151644213, "grad_norm": 7.885925422033358, "learning_rate": 4.740496025892064e-06, "loss": 18.2055, "step": 29052 }, { "epoch": 0.5310654943608679, "grad_norm": 6.817404549241904, "learning_rate": 4.740200413759886e-06, "loss": 17.429, "step": 29053 }, { "epoch": 0.5310837735573144, "grad_norm": 6.044258696525604, "learning_rate": 4.739904802538284e-06, "loss": 17.6657, "step": 29054 }, { "epoch": 0.531102052753761, "grad_norm": 9.037642839197277, "learning_rate": 4.739609192228295e-06, "loss": 17.7689, "step": 29055 }, { "epoch": 0.5311203319502075, "grad_norm": 4.846138987507346, "learning_rate": 4.7393135828309495e-06, "loss": 16.8368, "step": 29056 }, { "epoch": 0.531138611146654, "grad_norm": 10.03554536509534, "learning_rate": 4.7390179743472895e-06, "loss": 17.9387, "step": 29057 }, { "epoch": 0.5311568903431005, "grad_norm": 6.869602546938215, "learning_rate": 4.738722366778346e-06, "loss": 17.7339, "step": 29058 }, { "epoch": 0.531175169539547, "grad_norm": 5.454949928664412, "learning_rate": 4.738426760125162e-06, "loss": 17.0717, "step": 29059 }, { "epoch": 0.5311934487359936, "grad_norm": 5.857091816580592, "learning_rate": 4.738131154388768e-06, "loss": 17.1165, "step": 29060 }, { "epoch": 0.5312117279324401, "grad_norm": 6.930273201413045, "learning_rate": 4.737835549570201e-06, "loss": 17.7204, "step": 29061 }, { "epoch": 0.5312300071288866, "grad_norm": 5.431037714773593, "learning_rate": 4.737539945670498e-06, "loss": 17.1813, "step": 29062 }, { "epoch": 0.5312482863253332, "grad_norm": 6.5420044744411845, "learning_rate": 4.737244342690696e-06, "loss": 17.4828, "step": 29063 }, { "epoch": 0.5312665655217796, "grad_norm": 7.036392217452092, "learning_rate": 4.736948740631827e-06, "loss": 17.8612, "step": 29064 }, { "epoch": 0.5312848447182262, "grad_norm": 7.429120135752747, "learning_rate": 4.736653139494933e-06, "loss": 18.002, "step": 29065 }, { "epoch": 0.5313031239146727, "grad_norm": 7.647763917020927, "learning_rate": 4.736357539281045e-06, "loss": 18.3326, "step": 29066 }, { "epoch": 0.5313214031111192, "grad_norm": 6.714500069030933, "learning_rate": 4.7360619399912e-06, "loss": 17.7183, "step": 29067 }, { "epoch": 0.5313396823075658, "grad_norm": 6.437689205093623, "learning_rate": 4.735766341626437e-06, "loss": 17.5725, "step": 29068 }, { "epoch": 0.5313579615040123, "grad_norm": 5.896080567498599, "learning_rate": 4.735470744187789e-06, "loss": 17.1384, "step": 29069 }, { "epoch": 0.5313762407004589, "grad_norm": 5.706152266296976, "learning_rate": 4.735175147676294e-06, "loss": 17.4059, "step": 29070 }, { "epoch": 0.5313945198969053, "grad_norm": 6.558307824820367, "learning_rate": 4.734879552092986e-06, "loss": 17.5545, "step": 29071 }, { "epoch": 0.5314127990933518, "grad_norm": 6.6419850563836444, "learning_rate": 4.734583957438903e-06, "loss": 17.7131, "step": 29072 }, { "epoch": 0.5314310782897984, "grad_norm": 5.426127406448526, "learning_rate": 4.7342883637150796e-06, "loss": 17.0622, "step": 29073 }, { "epoch": 0.5314493574862449, "grad_norm": 6.008733891505808, "learning_rate": 4.7339927709225524e-06, "loss": 17.4839, "step": 29074 }, { "epoch": 0.5314676366826915, "grad_norm": 5.623738962763688, "learning_rate": 4.733697179062356e-06, "loss": 17.3007, "step": 29075 }, { "epoch": 0.531485915879138, "grad_norm": 5.616239444795882, "learning_rate": 4.733401588135531e-06, "loss": 17.0733, "step": 29076 }, { "epoch": 0.5315041950755844, "grad_norm": 7.862884441361074, "learning_rate": 4.73310599814311e-06, "loss": 17.9129, "step": 29077 }, { "epoch": 0.531522474272031, "grad_norm": 5.009030678254554, "learning_rate": 4.732810409086127e-06, "loss": 16.9358, "step": 29078 }, { "epoch": 0.5315407534684775, "grad_norm": 5.334322342518181, "learning_rate": 4.732514820965621e-06, "loss": 17.1485, "step": 29079 }, { "epoch": 0.5315590326649241, "grad_norm": 6.000890507008084, "learning_rate": 4.73221923378263e-06, "loss": 17.4316, "step": 29080 }, { "epoch": 0.5315773118613706, "grad_norm": 6.659016598136487, "learning_rate": 4.731923647538184e-06, "loss": 17.3543, "step": 29081 }, { "epoch": 0.5315955910578171, "grad_norm": 6.61833062327974, "learning_rate": 4.731628062233325e-06, "loss": 17.6285, "step": 29082 }, { "epoch": 0.5316138702542637, "grad_norm": 5.462550395014614, "learning_rate": 4.731332477869084e-06, "loss": 16.8888, "step": 29083 }, { "epoch": 0.5316321494507101, "grad_norm": 8.559596390802898, "learning_rate": 4.731036894446499e-06, "loss": 18.3411, "step": 29084 }, { "epoch": 0.5316504286471567, "grad_norm": 5.5304459159182136, "learning_rate": 4.730741311966609e-06, "loss": 17.2945, "step": 29085 }, { "epoch": 0.5316687078436032, "grad_norm": 6.4386947966121255, "learning_rate": 4.730445730430447e-06, "loss": 17.6291, "step": 29086 }, { "epoch": 0.5316869870400497, "grad_norm": 6.924546856510563, "learning_rate": 4.730150149839047e-06, "loss": 17.9036, "step": 29087 }, { "epoch": 0.5317052662364963, "grad_norm": 8.26086054422182, "learning_rate": 4.729854570193448e-06, "loss": 18.4739, "step": 29088 }, { "epoch": 0.5317235454329428, "grad_norm": 5.911957173509042, "learning_rate": 4.729558991494685e-06, "loss": 17.5701, "step": 29089 }, { "epoch": 0.5317418246293892, "grad_norm": 7.3927788013912155, "learning_rate": 4.7292634137437965e-06, "loss": 17.8532, "step": 29090 }, { "epoch": 0.5317601038258358, "grad_norm": 6.669605504379684, "learning_rate": 4.728967836941816e-06, "loss": 17.3569, "step": 29091 }, { "epoch": 0.5317783830222823, "grad_norm": 7.796781218527904, "learning_rate": 4.728672261089777e-06, "loss": 17.879, "step": 29092 }, { "epoch": 0.5317966622187289, "grad_norm": 6.581204812453982, "learning_rate": 4.72837668618872e-06, "loss": 17.4378, "step": 29093 }, { "epoch": 0.5318149414151754, "grad_norm": 5.786141109773239, "learning_rate": 4.72808111223968e-06, "loss": 17.3518, "step": 29094 }, { "epoch": 0.5318332206116219, "grad_norm": 5.580146707013943, "learning_rate": 4.72778553924369e-06, "loss": 17.1563, "step": 29095 }, { "epoch": 0.5318514998080685, "grad_norm": 6.7681026442740535, "learning_rate": 4.72748996720179e-06, "loss": 17.6506, "step": 29096 }, { "epoch": 0.5318697790045149, "grad_norm": 6.325593306072917, "learning_rate": 4.727194396115013e-06, "loss": 17.4579, "step": 29097 }, { "epoch": 0.5318880582009615, "grad_norm": 7.750348884085031, "learning_rate": 4.7268988259843945e-06, "loss": 18.1183, "step": 29098 }, { "epoch": 0.531906337397408, "grad_norm": 7.378992994794502, "learning_rate": 4.7266032568109745e-06, "loss": 18.222, "step": 29099 }, { "epoch": 0.5319246165938545, "grad_norm": 7.214803875822746, "learning_rate": 4.726307688595787e-06, "loss": 17.9459, "step": 29100 }, { "epoch": 0.5319428957903011, "grad_norm": 6.175586950398687, "learning_rate": 4.726012121339864e-06, "loss": 17.5421, "step": 29101 }, { "epoch": 0.5319611749867476, "grad_norm": 5.526292871774039, "learning_rate": 4.725716555044246e-06, "loss": 17.3515, "step": 29102 }, { "epoch": 0.5319794541831941, "grad_norm": 6.173101937782035, "learning_rate": 4.72542098970997e-06, "loss": 17.2722, "step": 29103 }, { "epoch": 0.5319977333796406, "grad_norm": 7.939720874584466, "learning_rate": 4.725125425338066e-06, "loss": 17.6554, "step": 29104 }, { "epoch": 0.5320160125760871, "grad_norm": 7.102072818870398, "learning_rate": 4.724829861929576e-06, "loss": 18.0799, "step": 29105 }, { "epoch": 0.5320342917725337, "grad_norm": 6.193719341303695, "learning_rate": 4.724534299485532e-06, "loss": 17.3817, "step": 29106 }, { "epoch": 0.5320525709689802, "grad_norm": 6.710891402253766, "learning_rate": 4.724238738006972e-06, "loss": 17.5157, "step": 29107 }, { "epoch": 0.5320708501654268, "grad_norm": 6.289553488924047, "learning_rate": 4.723943177494932e-06, "loss": 17.2921, "step": 29108 }, { "epoch": 0.5320891293618732, "grad_norm": 7.320087808991515, "learning_rate": 4.723647617950446e-06, "loss": 18.1539, "step": 29109 }, { "epoch": 0.5321074085583197, "grad_norm": 5.961258353746868, "learning_rate": 4.723352059374552e-06, "loss": 17.329, "step": 29110 }, { "epoch": 0.5321256877547663, "grad_norm": 6.051518226625935, "learning_rate": 4.723056501768285e-06, "loss": 17.4943, "step": 29111 }, { "epoch": 0.5321439669512128, "grad_norm": 6.3104499872196715, "learning_rate": 4.722760945132679e-06, "loss": 17.4186, "step": 29112 }, { "epoch": 0.5321622461476594, "grad_norm": 6.005834493972101, "learning_rate": 4.722465389468775e-06, "loss": 17.5438, "step": 29113 }, { "epoch": 0.5321805253441059, "grad_norm": 6.753467752413835, "learning_rate": 4.722169834777605e-06, "loss": 18.0328, "step": 29114 }, { "epoch": 0.5321988045405524, "grad_norm": 6.127705081696299, "learning_rate": 4.7218742810602035e-06, "loss": 17.5204, "step": 29115 }, { "epoch": 0.5322170837369989, "grad_norm": 5.364277239290237, "learning_rate": 4.72157872831761e-06, "loss": 17.2414, "step": 29116 }, { "epoch": 0.5322353629334454, "grad_norm": 4.817707614412788, "learning_rate": 4.72128317655086e-06, "loss": 16.9568, "step": 29117 }, { "epoch": 0.532253642129892, "grad_norm": 5.5802778958409025, "learning_rate": 4.720987625760985e-06, "loss": 17.2708, "step": 29118 }, { "epoch": 0.5322719213263385, "grad_norm": 5.350329158205401, "learning_rate": 4.720692075949027e-06, "loss": 17.2927, "step": 29119 }, { "epoch": 0.532290200522785, "grad_norm": 6.25668171940735, "learning_rate": 4.720396527116018e-06, "loss": 17.4023, "step": 29120 }, { "epoch": 0.5323084797192316, "grad_norm": 6.244412090560745, "learning_rate": 4.720100979262995e-06, "loss": 17.5831, "step": 29121 }, { "epoch": 0.532326758915678, "grad_norm": 7.59218018480341, "learning_rate": 4.719805432390995e-06, "loss": 17.7089, "step": 29122 }, { "epoch": 0.5323450381121246, "grad_norm": 9.935650537819685, "learning_rate": 4.7195098865010504e-06, "loss": 18.3908, "step": 29123 }, { "epoch": 0.5323633173085711, "grad_norm": 6.495075807465171, "learning_rate": 4.719214341594201e-06, "loss": 17.6079, "step": 29124 }, { "epoch": 0.5323815965050176, "grad_norm": 7.914936385801918, "learning_rate": 4.7189187976714804e-06, "loss": 18.2558, "step": 29125 }, { "epoch": 0.5323998757014642, "grad_norm": 6.230754304976618, "learning_rate": 4.718623254733924e-06, "loss": 17.5205, "step": 29126 }, { "epoch": 0.5324181548979107, "grad_norm": 5.421609703568032, "learning_rate": 4.71832771278257e-06, "loss": 17.2301, "step": 29127 }, { "epoch": 0.5324364340943573, "grad_norm": 6.478292233647558, "learning_rate": 4.718032171818453e-06, "loss": 17.5168, "step": 29128 }, { "epoch": 0.5324547132908037, "grad_norm": 6.688539822736038, "learning_rate": 4.717736631842608e-06, "loss": 17.6626, "step": 29129 }, { "epoch": 0.5324729924872502, "grad_norm": 6.928218580494495, "learning_rate": 4.717441092856072e-06, "loss": 18.171, "step": 29130 }, { "epoch": 0.5324912716836968, "grad_norm": 6.56043645920163, "learning_rate": 4.7171455548598816e-06, "loss": 17.7995, "step": 29131 }, { "epoch": 0.5325095508801433, "grad_norm": 6.150982416930782, "learning_rate": 4.7168500178550695e-06, "loss": 17.4851, "step": 29132 }, { "epoch": 0.5325278300765899, "grad_norm": 6.320931657999938, "learning_rate": 4.716554481842674e-06, "loss": 17.5192, "step": 29133 }, { "epoch": 0.5325461092730364, "grad_norm": 4.759395082450143, "learning_rate": 4.716258946823732e-06, "loss": 16.7456, "step": 29134 }, { "epoch": 0.5325643884694828, "grad_norm": 7.486771062381638, "learning_rate": 4.715963412799276e-06, "loss": 17.7593, "step": 29135 }, { "epoch": 0.5325826676659294, "grad_norm": 5.549348222321404, "learning_rate": 4.715667879770345e-06, "loss": 17.1854, "step": 29136 }, { "epoch": 0.5326009468623759, "grad_norm": 5.915789294304772, "learning_rate": 4.715372347737971e-06, "loss": 17.4329, "step": 29137 }, { "epoch": 0.5326192260588225, "grad_norm": 9.867560162165196, "learning_rate": 4.715076816703194e-06, "loss": 17.8107, "step": 29138 }, { "epoch": 0.532637505255269, "grad_norm": 6.532373079707325, "learning_rate": 4.714781286667048e-06, "loss": 17.5884, "step": 29139 }, { "epoch": 0.5326557844517155, "grad_norm": 5.020319197667657, "learning_rate": 4.714485757630568e-06, "loss": 16.8819, "step": 29140 }, { "epoch": 0.532674063648162, "grad_norm": 7.2737297847563696, "learning_rate": 4.714190229594792e-06, "loss": 17.9487, "step": 29141 }, { "epoch": 0.5326923428446085, "grad_norm": 6.0278678970210535, "learning_rate": 4.713894702560754e-06, "loss": 17.3564, "step": 29142 }, { "epoch": 0.5327106220410551, "grad_norm": 8.863413400127481, "learning_rate": 4.713599176529488e-06, "loss": 18.677, "step": 29143 }, { "epoch": 0.5327289012375016, "grad_norm": 6.13073041808526, "learning_rate": 4.713303651502036e-06, "loss": 17.609, "step": 29144 }, { "epoch": 0.5327471804339481, "grad_norm": 7.0548541991269555, "learning_rate": 4.713008127479429e-06, "loss": 17.8444, "step": 29145 }, { "epoch": 0.5327654596303947, "grad_norm": 6.3594455768943385, "learning_rate": 4.712712604462701e-06, "loss": 17.6913, "step": 29146 }, { "epoch": 0.5327837388268412, "grad_norm": 7.358024674587688, "learning_rate": 4.712417082452892e-06, "loss": 18.1915, "step": 29147 }, { "epoch": 0.5328020180232877, "grad_norm": 7.27916880003968, "learning_rate": 4.7121215614510365e-06, "loss": 17.7422, "step": 29148 }, { "epoch": 0.5328202972197342, "grad_norm": 6.10328131209803, "learning_rate": 4.711826041458169e-06, "loss": 17.4295, "step": 29149 }, { "epoch": 0.5328385764161807, "grad_norm": 6.993145169384505, "learning_rate": 4.711530522475327e-06, "loss": 17.9042, "step": 29150 }, { "epoch": 0.5328568556126273, "grad_norm": 6.224101834741438, "learning_rate": 4.711235004503544e-06, "loss": 17.4397, "step": 29151 }, { "epoch": 0.5328751348090738, "grad_norm": 6.784740796828709, "learning_rate": 4.7109394875438585e-06, "loss": 17.503, "step": 29152 }, { "epoch": 0.5328934140055204, "grad_norm": 5.454179536820799, "learning_rate": 4.710643971597306e-06, "loss": 17.3178, "step": 29153 }, { "epoch": 0.5329116932019669, "grad_norm": 5.419832965479272, "learning_rate": 4.710348456664919e-06, "loss": 17.1325, "step": 29154 }, { "epoch": 0.5329299723984133, "grad_norm": 6.468622716265971, "learning_rate": 4.710052942747738e-06, "loss": 17.3501, "step": 29155 }, { "epoch": 0.5329482515948599, "grad_norm": 5.677443523185989, "learning_rate": 4.709757429846795e-06, "loss": 17.5229, "step": 29156 }, { "epoch": 0.5329665307913064, "grad_norm": 6.436444698835134, "learning_rate": 4.709461917963126e-06, "loss": 17.6191, "step": 29157 }, { "epoch": 0.5329848099877529, "grad_norm": 6.253066463006159, "learning_rate": 4.709166407097769e-06, "loss": 17.4264, "step": 29158 }, { "epoch": 0.5330030891841995, "grad_norm": 5.261050823052316, "learning_rate": 4.70887089725176e-06, "loss": 17.2113, "step": 29159 }, { "epoch": 0.533021368380646, "grad_norm": 6.257819167140041, "learning_rate": 4.708575388426131e-06, "loss": 17.4203, "step": 29160 }, { "epoch": 0.5330396475770925, "grad_norm": 6.513401227549751, "learning_rate": 4.70827988062192e-06, "loss": 17.6654, "step": 29161 }, { "epoch": 0.533057926773539, "grad_norm": 5.635081993814778, "learning_rate": 4.707984373840164e-06, "loss": 17.2523, "step": 29162 }, { "epoch": 0.5330762059699855, "grad_norm": 8.04649228287663, "learning_rate": 4.707688868081896e-06, "loss": 17.7352, "step": 29163 }, { "epoch": 0.5330944851664321, "grad_norm": 5.31590699965306, "learning_rate": 4.707393363348154e-06, "loss": 17.1076, "step": 29164 }, { "epoch": 0.5331127643628786, "grad_norm": 5.554912643104351, "learning_rate": 4.707097859639972e-06, "loss": 17.2083, "step": 29165 }, { "epoch": 0.5331310435593252, "grad_norm": 7.071426242434656, "learning_rate": 4.7068023569583865e-06, "loss": 17.7656, "step": 29166 }, { "epoch": 0.5331493227557716, "grad_norm": 5.76104893770655, "learning_rate": 4.706506855304435e-06, "loss": 17.2488, "step": 29167 }, { "epoch": 0.5331676019522181, "grad_norm": 6.712165285165031, "learning_rate": 4.7062113546791496e-06, "loss": 17.3628, "step": 29168 }, { "epoch": 0.5331858811486647, "grad_norm": 6.7751911834935195, "learning_rate": 4.70591585508357e-06, "loss": 17.6679, "step": 29169 }, { "epoch": 0.5332041603451112, "grad_norm": 6.4036925971669225, "learning_rate": 4.705620356518729e-06, "loss": 17.7652, "step": 29170 }, { "epoch": 0.5332224395415578, "grad_norm": 5.620878637163123, "learning_rate": 4.705324858985662e-06, "loss": 17.3258, "step": 29171 }, { "epoch": 0.5332407187380043, "grad_norm": 6.484494676186936, "learning_rate": 4.705029362485407e-06, "loss": 17.6879, "step": 29172 }, { "epoch": 0.5332589979344508, "grad_norm": 6.126985357715734, "learning_rate": 4.704733867018999e-06, "loss": 17.608, "step": 29173 }, { "epoch": 0.5332772771308973, "grad_norm": 6.061217895217802, "learning_rate": 4.704438372587471e-06, "loss": 17.3942, "step": 29174 }, { "epoch": 0.5332955563273438, "grad_norm": 6.91946908138377, "learning_rate": 4.704142879191862e-06, "loss": 17.885, "step": 29175 }, { "epoch": 0.5333138355237904, "grad_norm": 5.638756120268821, "learning_rate": 4.703847386833207e-06, "loss": 17.4492, "step": 29176 }, { "epoch": 0.5333321147202369, "grad_norm": 7.360688988167568, "learning_rate": 4.70355189551254e-06, "loss": 17.8167, "step": 29177 }, { "epoch": 0.5333503939166834, "grad_norm": 5.554017683149972, "learning_rate": 4.7032564052309e-06, "loss": 17.3239, "step": 29178 }, { "epoch": 0.53336867311313, "grad_norm": 7.220440437032762, "learning_rate": 4.7029609159893196e-06, "loss": 17.9182, "step": 29179 }, { "epoch": 0.5333869523095764, "grad_norm": 5.617028454349031, "learning_rate": 4.702665427788833e-06, "loss": 17.1134, "step": 29180 }, { "epoch": 0.533405231506023, "grad_norm": 7.210935894788144, "learning_rate": 4.702369940630482e-06, "loss": 17.7359, "step": 29181 }, { "epoch": 0.5334235107024695, "grad_norm": 5.442315779787362, "learning_rate": 4.702074454515296e-06, "loss": 17.3984, "step": 29182 }, { "epoch": 0.533441789898916, "grad_norm": 5.732815744973244, "learning_rate": 4.701778969444315e-06, "loss": 17.2546, "step": 29183 }, { "epoch": 0.5334600690953626, "grad_norm": 6.875816693462245, "learning_rate": 4.701483485418571e-06, "loss": 17.7802, "step": 29184 }, { "epoch": 0.5334783482918091, "grad_norm": 5.564279309972907, "learning_rate": 4.701188002439101e-06, "loss": 17.452, "step": 29185 }, { "epoch": 0.5334966274882557, "grad_norm": 5.306153078862511, "learning_rate": 4.700892520506944e-06, "loss": 17.2651, "step": 29186 }, { "epoch": 0.5335149066847021, "grad_norm": 14.53928973098706, "learning_rate": 4.700597039623133e-06, "loss": 17.6077, "step": 29187 }, { "epoch": 0.5335331858811486, "grad_norm": 6.369984681166308, "learning_rate": 4.7003015597887e-06, "loss": 17.5793, "step": 29188 }, { "epoch": 0.5335514650775952, "grad_norm": 7.481302517731318, "learning_rate": 4.700006081004685e-06, "loss": 18.1637, "step": 29189 }, { "epoch": 0.5335697442740417, "grad_norm": 6.582023609889075, "learning_rate": 4.699710603272125e-06, "loss": 17.6102, "step": 29190 }, { "epoch": 0.5335880234704883, "grad_norm": 6.980630320109185, "learning_rate": 4.699415126592051e-06, "loss": 17.8523, "step": 29191 }, { "epoch": 0.5336063026669348, "grad_norm": 5.382278433913923, "learning_rate": 4.699119650965502e-06, "loss": 16.9737, "step": 29192 }, { "epoch": 0.5336245818633812, "grad_norm": 5.500432465719919, "learning_rate": 4.698824176393512e-06, "loss": 17.2951, "step": 29193 }, { "epoch": 0.5336428610598278, "grad_norm": 5.730394696563894, "learning_rate": 4.698528702877116e-06, "loss": 17.4959, "step": 29194 }, { "epoch": 0.5336611402562743, "grad_norm": 5.30363362092714, "learning_rate": 4.6982332304173524e-06, "loss": 17.0591, "step": 29195 }, { "epoch": 0.5336794194527209, "grad_norm": 5.358670228367869, "learning_rate": 4.697937759015254e-06, "loss": 17.1578, "step": 29196 }, { "epoch": 0.5336976986491674, "grad_norm": 8.413072866315387, "learning_rate": 4.697642288671858e-06, "loss": 18.0521, "step": 29197 }, { "epoch": 0.5337159778456139, "grad_norm": 4.688684094204592, "learning_rate": 4.697346819388201e-06, "loss": 16.8042, "step": 29198 }, { "epoch": 0.5337342570420605, "grad_norm": 6.053872229728858, "learning_rate": 4.697051351165314e-06, "loss": 17.4931, "step": 29199 }, { "epoch": 0.5337525362385069, "grad_norm": 5.765015298125997, "learning_rate": 4.6967558840042395e-06, "loss": 17.0297, "step": 29200 }, { "epoch": 0.5337708154349535, "grad_norm": 5.818301652851261, "learning_rate": 4.696460417906007e-06, "loss": 17.0998, "step": 29201 }, { "epoch": 0.5337890946314, "grad_norm": 5.126136644576303, "learning_rate": 4.696164952871654e-06, "loss": 17.2133, "step": 29202 }, { "epoch": 0.5338073738278465, "grad_norm": 5.614543121003322, "learning_rate": 4.695869488902218e-06, "loss": 17.2043, "step": 29203 }, { "epoch": 0.5338256530242931, "grad_norm": 5.950383837368383, "learning_rate": 4.695574025998733e-06, "loss": 17.3344, "step": 29204 }, { "epoch": 0.5338439322207396, "grad_norm": 6.149957103878116, "learning_rate": 4.6952785641622326e-06, "loss": 17.3925, "step": 29205 }, { "epoch": 0.5338622114171861, "grad_norm": 5.709616959971137, "learning_rate": 4.694983103393756e-06, "loss": 17.1373, "step": 29206 }, { "epoch": 0.5338804906136326, "grad_norm": 7.2074863095903705, "learning_rate": 4.694687643694338e-06, "loss": 17.7656, "step": 29207 }, { "epoch": 0.5338987698100791, "grad_norm": 6.052629056621195, "learning_rate": 4.694392185065011e-06, "loss": 17.3853, "step": 29208 }, { "epoch": 0.5339170490065257, "grad_norm": 6.6373186264798445, "learning_rate": 4.694096727506815e-06, "loss": 17.3346, "step": 29209 }, { "epoch": 0.5339353282029722, "grad_norm": 6.387124795224236, "learning_rate": 4.693801271020783e-06, "loss": 17.9033, "step": 29210 }, { "epoch": 0.5339536073994188, "grad_norm": 6.223251258431593, "learning_rate": 4.693505815607949e-06, "loss": 17.4301, "step": 29211 }, { "epoch": 0.5339718865958653, "grad_norm": 6.576451745258645, "learning_rate": 4.693210361269352e-06, "loss": 17.6064, "step": 29212 }, { "epoch": 0.5339901657923117, "grad_norm": 6.325741136202451, "learning_rate": 4.692914908006026e-06, "loss": 17.3868, "step": 29213 }, { "epoch": 0.5340084449887583, "grad_norm": 5.453538743658066, "learning_rate": 4.692619455819008e-06, "loss": 17.3604, "step": 29214 }, { "epoch": 0.5340267241852048, "grad_norm": 6.395024201833934, "learning_rate": 4.69232400470933e-06, "loss": 17.4291, "step": 29215 }, { "epoch": 0.5340450033816514, "grad_norm": 5.261791959142509, "learning_rate": 4.692028554678029e-06, "loss": 17.1114, "step": 29216 }, { "epoch": 0.5340632825780979, "grad_norm": 6.792270631815648, "learning_rate": 4.691733105726144e-06, "loss": 17.5678, "step": 29217 }, { "epoch": 0.5340815617745444, "grad_norm": 6.80209010429963, "learning_rate": 4.691437657854707e-06, "loss": 17.6784, "step": 29218 }, { "epoch": 0.534099840970991, "grad_norm": 6.498573399814959, "learning_rate": 4.691142211064753e-06, "loss": 17.5499, "step": 29219 }, { "epoch": 0.5341181201674374, "grad_norm": 6.3261239509177045, "learning_rate": 4.690846765357319e-06, "loss": 17.424, "step": 29220 }, { "epoch": 0.534136399363884, "grad_norm": 6.458664907486099, "learning_rate": 4.690551320733442e-06, "loss": 17.4369, "step": 29221 }, { "epoch": 0.5341546785603305, "grad_norm": 6.926856980821773, "learning_rate": 4.690255877194152e-06, "loss": 17.9459, "step": 29222 }, { "epoch": 0.534172957756777, "grad_norm": 5.482993153007825, "learning_rate": 4.6899604347404925e-06, "loss": 17.0677, "step": 29223 }, { "epoch": 0.5341912369532236, "grad_norm": 5.95549853159849, "learning_rate": 4.689664993373493e-06, "loss": 17.4105, "step": 29224 }, { "epoch": 0.53420951614967, "grad_norm": 5.7026105701250716, "learning_rate": 4.689369553094189e-06, "loss": 17.3671, "step": 29225 }, { "epoch": 0.5342277953461165, "grad_norm": 7.3686976538510685, "learning_rate": 4.689074113903621e-06, "loss": 17.7183, "step": 29226 }, { "epoch": 0.5342460745425631, "grad_norm": 6.252171421143555, "learning_rate": 4.688778675802818e-06, "loss": 17.5999, "step": 29227 }, { "epoch": 0.5342643537390096, "grad_norm": 5.786479766304065, "learning_rate": 4.688483238792822e-06, "loss": 17.4693, "step": 29228 }, { "epoch": 0.5342826329354562, "grad_norm": 6.364261824826348, "learning_rate": 4.688187802874663e-06, "loss": 17.3688, "step": 29229 }, { "epoch": 0.5343009121319027, "grad_norm": 6.8465039000472165, "learning_rate": 4.6878923680493785e-06, "loss": 17.9025, "step": 29230 }, { "epoch": 0.5343191913283492, "grad_norm": 7.833967474762711, "learning_rate": 4.687596934318006e-06, "loss": 17.7981, "step": 29231 }, { "epoch": 0.5343374705247957, "grad_norm": 6.664933874321917, "learning_rate": 4.687301501681579e-06, "loss": 17.7476, "step": 29232 }, { "epoch": 0.5343557497212422, "grad_norm": 6.091551360699175, "learning_rate": 4.687006070141131e-06, "loss": 17.5468, "step": 29233 }, { "epoch": 0.5343740289176888, "grad_norm": 6.8923573712614585, "learning_rate": 4.686710639697701e-06, "loss": 17.692, "step": 29234 }, { "epoch": 0.5343923081141353, "grad_norm": 6.937381768672017, "learning_rate": 4.686415210352324e-06, "loss": 17.6177, "step": 29235 }, { "epoch": 0.5344105873105818, "grad_norm": 6.084408330732266, "learning_rate": 4.6861197821060315e-06, "loss": 17.4911, "step": 29236 }, { "epoch": 0.5344288665070284, "grad_norm": 7.44522130052418, "learning_rate": 4.685824354959865e-06, "loss": 17.7589, "step": 29237 }, { "epoch": 0.5344471457034748, "grad_norm": 4.454374701917882, "learning_rate": 4.685528928914855e-06, "loss": 16.7307, "step": 29238 }, { "epoch": 0.5344654248999214, "grad_norm": 6.715146535940895, "learning_rate": 4.685233503972039e-06, "loss": 17.6364, "step": 29239 }, { "epoch": 0.5344837040963679, "grad_norm": 7.0344452900829735, "learning_rate": 4.684938080132454e-06, "loss": 17.7868, "step": 29240 }, { "epoch": 0.5345019832928144, "grad_norm": 6.1756329523202345, "learning_rate": 4.684642657397132e-06, "loss": 17.5986, "step": 29241 }, { "epoch": 0.534520262489261, "grad_norm": 5.955811314738884, "learning_rate": 4.684347235767111e-06, "loss": 17.3416, "step": 29242 }, { "epoch": 0.5345385416857075, "grad_norm": 5.508464569377075, "learning_rate": 4.6840518152434245e-06, "loss": 17.2384, "step": 29243 }, { "epoch": 0.5345568208821541, "grad_norm": 6.255405359310729, "learning_rate": 4.6837563958271085e-06, "loss": 17.5851, "step": 29244 }, { "epoch": 0.5345751000786005, "grad_norm": 5.702873854884792, "learning_rate": 4.6834609775192e-06, "loss": 17.2959, "step": 29245 }, { "epoch": 0.534593379275047, "grad_norm": 5.607666762210169, "learning_rate": 4.683165560320735e-06, "loss": 17.1606, "step": 29246 }, { "epoch": 0.5346116584714936, "grad_norm": 5.07753553773357, "learning_rate": 4.682870144232744e-06, "loss": 17.0299, "step": 29247 }, { "epoch": 0.5346299376679401, "grad_norm": 5.812528319347235, "learning_rate": 4.682574729256266e-06, "loss": 17.3234, "step": 29248 }, { "epoch": 0.5346482168643867, "grad_norm": 5.261893376053922, "learning_rate": 4.682279315392339e-06, "loss": 17.1088, "step": 29249 }, { "epoch": 0.5346664960608332, "grad_norm": 8.080050959436333, "learning_rate": 4.681983902641992e-06, "loss": 17.9734, "step": 29250 }, { "epoch": 0.5346847752572796, "grad_norm": 6.2807987098320295, "learning_rate": 4.681688491006267e-06, "loss": 17.4539, "step": 29251 }, { "epoch": 0.5347030544537262, "grad_norm": 7.599171447651194, "learning_rate": 4.681393080486194e-06, "loss": 18.1529, "step": 29252 }, { "epoch": 0.5347213336501727, "grad_norm": 6.1447741984974575, "learning_rate": 4.68109767108281e-06, "loss": 17.4327, "step": 29253 }, { "epoch": 0.5347396128466193, "grad_norm": 6.389357331246572, "learning_rate": 4.680802262797153e-06, "loss": 17.5275, "step": 29254 }, { "epoch": 0.5347578920430658, "grad_norm": 5.745397360469146, "learning_rate": 4.6805068556302555e-06, "loss": 17.2144, "step": 29255 }, { "epoch": 0.5347761712395123, "grad_norm": 5.640009675799337, "learning_rate": 4.680211449583153e-06, "loss": 17.225, "step": 29256 }, { "epoch": 0.5347944504359589, "grad_norm": 6.701663513066303, "learning_rate": 4.679916044656883e-06, "loss": 17.5104, "step": 29257 }, { "epoch": 0.5348127296324053, "grad_norm": 6.758906431610483, "learning_rate": 4.679620640852477e-06, "loss": 17.8155, "step": 29258 }, { "epoch": 0.5348310088288519, "grad_norm": 5.804176758180699, "learning_rate": 4.679325238170977e-06, "loss": 17.1583, "step": 29259 }, { "epoch": 0.5348492880252984, "grad_norm": 6.456709927031515, "learning_rate": 4.679029836613411e-06, "loss": 17.827, "step": 29260 }, { "epoch": 0.5348675672217449, "grad_norm": 5.544919597973098, "learning_rate": 4.678734436180818e-06, "loss": 17.2689, "step": 29261 }, { "epoch": 0.5348858464181915, "grad_norm": 6.694230767897984, "learning_rate": 4.678439036874234e-06, "loss": 17.6911, "step": 29262 }, { "epoch": 0.534904125614638, "grad_norm": 7.383500252683421, "learning_rate": 4.6781436386946935e-06, "loss": 17.6856, "step": 29263 }, { "epoch": 0.5349224048110846, "grad_norm": 6.969245483848144, "learning_rate": 4.6778482416432295e-06, "loss": 17.3148, "step": 29264 }, { "epoch": 0.534940684007531, "grad_norm": 6.455067519609029, "learning_rate": 4.677552845720881e-06, "loss": 17.6421, "step": 29265 }, { "epoch": 0.5349589632039775, "grad_norm": 5.647349330468523, "learning_rate": 4.677257450928682e-06, "loss": 17.0891, "step": 29266 }, { "epoch": 0.5349772424004241, "grad_norm": 5.590856780988894, "learning_rate": 4.676962057267667e-06, "loss": 17.1274, "step": 29267 }, { "epoch": 0.5349955215968706, "grad_norm": 8.717455084846353, "learning_rate": 4.676666664738873e-06, "loss": 17.8971, "step": 29268 }, { "epoch": 0.5350138007933172, "grad_norm": 7.189686572730251, "learning_rate": 4.676371273343333e-06, "loss": 17.9889, "step": 29269 }, { "epoch": 0.5350320799897637, "grad_norm": 7.806460932605605, "learning_rate": 4.676075883082083e-06, "loss": 18.0796, "step": 29270 }, { "epoch": 0.5350503591862101, "grad_norm": 7.254536934137691, "learning_rate": 4.675780493956161e-06, "loss": 17.829, "step": 29271 }, { "epoch": 0.5350686383826567, "grad_norm": 8.41694626434146, "learning_rate": 4.6754851059666e-06, "loss": 18.4648, "step": 29272 }, { "epoch": 0.5350869175791032, "grad_norm": 5.5703289626038055, "learning_rate": 4.6751897191144335e-06, "loss": 17.2874, "step": 29273 }, { "epoch": 0.5351051967755498, "grad_norm": 5.408001157120569, "learning_rate": 4.6748943334007e-06, "loss": 17.1824, "step": 29274 }, { "epoch": 0.5351234759719963, "grad_norm": 5.930717778294631, "learning_rate": 4.674598948826432e-06, "loss": 16.822, "step": 29275 }, { "epoch": 0.5351417551684428, "grad_norm": 6.642122104741229, "learning_rate": 4.67430356539267e-06, "loss": 17.3118, "step": 29276 }, { "epoch": 0.5351600343648893, "grad_norm": 6.719707459775767, "learning_rate": 4.674008183100445e-06, "loss": 17.6993, "step": 29277 }, { "epoch": 0.5351783135613358, "grad_norm": 6.797813782800309, "learning_rate": 4.67371280195079e-06, "loss": 17.8031, "step": 29278 }, { "epoch": 0.5351965927577824, "grad_norm": 5.928483262475636, "learning_rate": 4.673417421944746e-06, "loss": 17.1513, "step": 29279 }, { "epoch": 0.5352148719542289, "grad_norm": 5.708362227727168, "learning_rate": 4.673122043083345e-06, "loss": 17.2476, "step": 29280 }, { "epoch": 0.5352331511506754, "grad_norm": 6.456114185780829, "learning_rate": 4.6728266653676225e-06, "loss": 17.3815, "step": 29281 }, { "epoch": 0.535251430347122, "grad_norm": 6.649650343554105, "learning_rate": 4.6725312887986154e-06, "loss": 17.2747, "step": 29282 }, { "epoch": 0.5352697095435685, "grad_norm": 5.574568616832653, "learning_rate": 4.672235913377357e-06, "loss": 17.35, "step": 29283 }, { "epoch": 0.535287988740015, "grad_norm": 5.443705554389813, "learning_rate": 4.671940539104881e-06, "loss": 17.3226, "step": 29284 }, { "epoch": 0.5353062679364615, "grad_norm": 5.73942444897739, "learning_rate": 4.6716451659822284e-06, "loss": 17.2158, "step": 29285 }, { "epoch": 0.535324547132908, "grad_norm": 6.801894375705081, "learning_rate": 4.67134979401043e-06, "loss": 17.5671, "step": 29286 }, { "epoch": 0.5353428263293546, "grad_norm": 6.290705725065143, "learning_rate": 4.671054423190521e-06, "loss": 17.4168, "step": 29287 }, { "epoch": 0.5353611055258011, "grad_norm": 5.743885114201012, "learning_rate": 4.670759053523538e-06, "loss": 17.2939, "step": 29288 }, { "epoch": 0.5353793847222477, "grad_norm": 5.7248347951083245, "learning_rate": 4.670463685010514e-06, "loss": 17.2367, "step": 29289 }, { "epoch": 0.5353976639186941, "grad_norm": 7.111293871021303, "learning_rate": 4.670168317652491e-06, "loss": 17.7536, "step": 29290 }, { "epoch": 0.5354159431151406, "grad_norm": 8.535309486135604, "learning_rate": 4.669872951450497e-06, "loss": 18.1195, "step": 29291 }, { "epoch": 0.5354342223115872, "grad_norm": 5.840101600532595, "learning_rate": 4.669577586405568e-06, "loss": 17.271, "step": 29292 }, { "epoch": 0.5354525015080337, "grad_norm": 6.973643897993268, "learning_rate": 4.669282222518743e-06, "loss": 17.6406, "step": 29293 }, { "epoch": 0.5354707807044802, "grad_norm": 6.14925160338331, "learning_rate": 4.6689868597910555e-06, "loss": 17.2322, "step": 29294 }, { "epoch": 0.5354890599009268, "grad_norm": 6.997981700307357, "learning_rate": 4.668691498223537e-06, "loss": 17.5883, "step": 29295 }, { "epoch": 0.5355073390973732, "grad_norm": 7.53567212091816, "learning_rate": 4.668396137817231e-06, "loss": 17.8659, "step": 29296 }, { "epoch": 0.5355256182938198, "grad_norm": 5.8336664721152065, "learning_rate": 4.668100778573164e-06, "loss": 17.2724, "step": 29297 }, { "epoch": 0.5355438974902663, "grad_norm": 5.951920364049022, "learning_rate": 4.667805420492375e-06, "loss": 17.2534, "step": 29298 }, { "epoch": 0.5355621766867128, "grad_norm": 5.349078049617566, "learning_rate": 4.6675100635759015e-06, "loss": 17.33, "step": 29299 }, { "epoch": 0.5355804558831594, "grad_norm": 5.181484811256063, "learning_rate": 4.667214707824776e-06, "loss": 16.9174, "step": 29300 }, { "epoch": 0.5355987350796059, "grad_norm": 8.621212699436843, "learning_rate": 4.6669193532400326e-06, "loss": 17.8028, "step": 29301 }, { "epoch": 0.5356170142760525, "grad_norm": 6.008305903796612, "learning_rate": 4.666623999822708e-06, "loss": 17.0339, "step": 29302 }, { "epoch": 0.5356352934724989, "grad_norm": 7.774715545069141, "learning_rate": 4.666328647573837e-06, "loss": 17.7492, "step": 29303 }, { "epoch": 0.5356535726689454, "grad_norm": 6.096256932202169, "learning_rate": 4.666033296494457e-06, "loss": 17.556, "step": 29304 }, { "epoch": 0.535671851865392, "grad_norm": 6.884931342989107, "learning_rate": 4.665737946585602e-06, "loss": 17.2523, "step": 29305 }, { "epoch": 0.5356901310618385, "grad_norm": 6.185112709333423, "learning_rate": 4.665442597848303e-06, "loss": 17.6004, "step": 29306 }, { "epoch": 0.5357084102582851, "grad_norm": 7.236608712566107, "learning_rate": 4.665147250283601e-06, "loss": 17.9588, "step": 29307 }, { "epoch": 0.5357266894547316, "grad_norm": 5.671268378623509, "learning_rate": 4.6648519038925285e-06, "loss": 17.0656, "step": 29308 }, { "epoch": 0.535744968651178, "grad_norm": 8.802022106221187, "learning_rate": 4.66455655867612e-06, "loss": 17.9557, "step": 29309 }, { "epoch": 0.5357632478476246, "grad_norm": 5.864360534832958, "learning_rate": 4.664261214635414e-06, "loss": 17.3295, "step": 29310 }, { "epoch": 0.5357815270440711, "grad_norm": 7.331905969731185, "learning_rate": 4.663965871771441e-06, "loss": 18.0535, "step": 29311 }, { "epoch": 0.5357998062405177, "grad_norm": 7.08138413712087, "learning_rate": 4.663670530085239e-06, "loss": 17.7963, "step": 29312 }, { "epoch": 0.5358180854369642, "grad_norm": 5.424243891684073, "learning_rate": 4.663375189577843e-06, "loss": 17.1795, "step": 29313 }, { "epoch": 0.5358363646334107, "grad_norm": 6.239865055155216, "learning_rate": 4.66307985025029e-06, "loss": 17.2509, "step": 29314 }, { "epoch": 0.5358546438298573, "grad_norm": 5.735952050430516, "learning_rate": 4.6627845121036084e-06, "loss": 17.1316, "step": 29315 }, { "epoch": 0.5358729230263037, "grad_norm": 5.565925724658268, "learning_rate": 4.66248917513884e-06, "loss": 17.1876, "step": 29316 }, { "epoch": 0.5358912022227503, "grad_norm": 6.106526816844541, "learning_rate": 4.662193839357019e-06, "loss": 17.2983, "step": 29317 }, { "epoch": 0.5359094814191968, "grad_norm": 6.100422188196055, "learning_rate": 4.661898504759176e-06, "loss": 17.3339, "step": 29318 }, { "epoch": 0.5359277606156433, "grad_norm": 9.067345685594086, "learning_rate": 4.661603171346352e-06, "loss": 18.2016, "step": 29319 }, { "epoch": 0.5359460398120899, "grad_norm": 5.759043408835573, "learning_rate": 4.661307839119579e-06, "loss": 17.36, "step": 29320 }, { "epoch": 0.5359643190085364, "grad_norm": 7.074325023844372, "learning_rate": 4.661012508079892e-06, "loss": 17.9006, "step": 29321 }, { "epoch": 0.535982598204983, "grad_norm": 4.682188078011207, "learning_rate": 4.660717178228328e-06, "loss": 16.8074, "step": 29322 }, { "epoch": 0.5360008774014294, "grad_norm": 5.932596754632673, "learning_rate": 4.660421849565919e-06, "loss": 17.3143, "step": 29323 }, { "epoch": 0.5360191565978759, "grad_norm": 5.032828189967548, "learning_rate": 4.660126522093704e-06, "loss": 17.1526, "step": 29324 }, { "epoch": 0.5360374357943225, "grad_norm": 5.933551125599635, "learning_rate": 4.659831195812716e-06, "loss": 17.634, "step": 29325 }, { "epoch": 0.536055714990769, "grad_norm": 5.250106738628413, "learning_rate": 4.659535870723988e-06, "loss": 16.9836, "step": 29326 }, { "epoch": 0.5360739941872156, "grad_norm": 7.230189637800085, "learning_rate": 4.65924054682856e-06, "loss": 17.9238, "step": 29327 }, { "epoch": 0.536092273383662, "grad_norm": 7.002282883504601, "learning_rate": 4.658945224127462e-06, "loss": 17.5911, "step": 29328 }, { "epoch": 0.5361105525801085, "grad_norm": 5.822483441629879, "learning_rate": 4.65864990262173e-06, "loss": 17.3025, "step": 29329 }, { "epoch": 0.5361288317765551, "grad_norm": 6.458043066053483, "learning_rate": 4.658354582312405e-06, "loss": 17.4127, "step": 29330 }, { "epoch": 0.5361471109730016, "grad_norm": 7.808095657410022, "learning_rate": 4.658059263200516e-06, "loss": 18.3442, "step": 29331 }, { "epoch": 0.5361653901694482, "grad_norm": 5.579122407499592, "learning_rate": 4.657763945287097e-06, "loss": 17.3155, "step": 29332 }, { "epoch": 0.5361836693658947, "grad_norm": 5.269806281522528, "learning_rate": 4.657468628573188e-06, "loss": 17.0414, "step": 29333 }, { "epoch": 0.5362019485623412, "grad_norm": 7.400005842810225, "learning_rate": 4.65717331305982e-06, "loss": 18.1298, "step": 29334 }, { "epoch": 0.5362202277587877, "grad_norm": 8.19494904366371, "learning_rate": 4.656877998748032e-06, "loss": 17.7911, "step": 29335 }, { "epoch": 0.5362385069552342, "grad_norm": 5.395202795013228, "learning_rate": 4.656582685638857e-06, "loss": 17.2497, "step": 29336 }, { "epoch": 0.5362567861516808, "grad_norm": 5.708600503897889, "learning_rate": 4.656287373733328e-06, "loss": 17.2469, "step": 29337 }, { "epoch": 0.5362750653481273, "grad_norm": 5.47260203117835, "learning_rate": 4.6559920630324825e-06, "loss": 17.0903, "step": 29338 }, { "epoch": 0.5362933445445738, "grad_norm": 6.778494362597934, "learning_rate": 4.655696753537356e-06, "loss": 17.8149, "step": 29339 }, { "epoch": 0.5363116237410204, "grad_norm": 7.044672217424278, "learning_rate": 4.655401445248981e-06, "loss": 17.8323, "step": 29340 }, { "epoch": 0.5363299029374669, "grad_norm": 5.916865508538785, "learning_rate": 4.655106138168395e-06, "loss": 17.3129, "step": 29341 }, { "epoch": 0.5363481821339134, "grad_norm": 6.2823091343334525, "learning_rate": 4.654810832296632e-06, "loss": 17.1717, "step": 29342 }, { "epoch": 0.5363664613303599, "grad_norm": 10.37997974822832, "learning_rate": 4.654515527634725e-06, "loss": 17.3777, "step": 29343 }, { "epoch": 0.5363847405268064, "grad_norm": 6.325962697313978, "learning_rate": 4.654220224183715e-06, "loss": 17.6281, "step": 29344 }, { "epoch": 0.536403019723253, "grad_norm": 6.839796889503244, "learning_rate": 4.653924921944631e-06, "loss": 17.6442, "step": 29345 }, { "epoch": 0.5364212989196995, "grad_norm": 5.720004386686231, "learning_rate": 4.653629620918509e-06, "loss": 17.0727, "step": 29346 }, { "epoch": 0.5364395781161461, "grad_norm": 7.202606278330624, "learning_rate": 4.653334321106387e-06, "loss": 17.9296, "step": 29347 }, { "epoch": 0.5364578573125925, "grad_norm": 5.618062835825139, "learning_rate": 4.653039022509298e-06, "loss": 17.0657, "step": 29348 }, { "epoch": 0.536476136509039, "grad_norm": 5.433691777755176, "learning_rate": 4.652743725128275e-06, "loss": 17.0539, "step": 29349 }, { "epoch": 0.5364944157054856, "grad_norm": 6.402264889054256, "learning_rate": 4.652448428964358e-06, "loss": 17.3862, "step": 29350 }, { "epoch": 0.5365126949019321, "grad_norm": 7.8667460832180724, "learning_rate": 4.652153134018577e-06, "loss": 18.1236, "step": 29351 }, { "epoch": 0.5365309740983787, "grad_norm": 6.508666560921784, "learning_rate": 4.651857840291969e-06, "loss": 17.5245, "step": 29352 }, { "epoch": 0.5365492532948252, "grad_norm": 7.228950229054558, "learning_rate": 4.651562547785571e-06, "loss": 18.0504, "step": 29353 }, { "epoch": 0.5365675324912716, "grad_norm": 6.948779196738871, "learning_rate": 4.651267256500413e-06, "loss": 17.7902, "step": 29354 }, { "epoch": 0.5365858116877182, "grad_norm": 5.746321231918124, "learning_rate": 4.650971966437537e-06, "loss": 17.2835, "step": 29355 }, { "epoch": 0.5366040908841647, "grad_norm": 6.279429001653878, "learning_rate": 4.650676677597972e-06, "loss": 17.4677, "step": 29356 }, { "epoch": 0.5366223700806113, "grad_norm": 7.049565430130494, "learning_rate": 4.650381389982754e-06, "loss": 17.4014, "step": 29357 }, { "epoch": 0.5366406492770578, "grad_norm": 6.241784604979106, "learning_rate": 4.6500861035929195e-06, "loss": 17.1587, "step": 29358 }, { "epoch": 0.5366589284735043, "grad_norm": 5.489016065724621, "learning_rate": 4.6497908184295044e-06, "loss": 17.1332, "step": 29359 }, { "epoch": 0.5366772076699509, "grad_norm": 6.850144707783792, "learning_rate": 4.649495534493539e-06, "loss": 18.0111, "step": 29360 }, { "epoch": 0.5366954868663973, "grad_norm": 6.81380763397933, "learning_rate": 4.649200251786063e-06, "loss": 17.415, "step": 29361 }, { "epoch": 0.5367137660628438, "grad_norm": 9.109713823789997, "learning_rate": 4.648904970308111e-06, "loss": 18.4835, "step": 29362 }, { "epoch": 0.5367320452592904, "grad_norm": 6.870680971815996, "learning_rate": 4.6486096900607145e-06, "loss": 17.4854, "step": 29363 }, { "epoch": 0.5367503244557369, "grad_norm": 7.654470596092475, "learning_rate": 4.648314411044912e-06, "loss": 17.8558, "step": 29364 }, { "epoch": 0.5367686036521835, "grad_norm": 6.540666871477112, "learning_rate": 4.648019133261735e-06, "loss": 17.4551, "step": 29365 }, { "epoch": 0.53678688284863, "grad_norm": 7.060905809863112, "learning_rate": 4.647723856712222e-06, "loss": 17.41, "step": 29366 }, { "epoch": 0.5368051620450764, "grad_norm": 6.528052422282426, "learning_rate": 4.6474285813974075e-06, "loss": 17.6751, "step": 29367 }, { "epoch": 0.536823441241523, "grad_norm": 5.822993964637325, "learning_rate": 4.647133307318322e-06, "loss": 17.2885, "step": 29368 }, { "epoch": 0.5368417204379695, "grad_norm": 7.361284339128492, "learning_rate": 4.646838034476007e-06, "loss": 17.6867, "step": 29369 }, { "epoch": 0.5368599996344161, "grad_norm": 8.702266070282723, "learning_rate": 4.646542762871492e-06, "loss": 18.0663, "step": 29370 }, { "epoch": 0.5368782788308626, "grad_norm": 7.680265628120438, "learning_rate": 4.646247492505813e-06, "loss": 17.5257, "step": 29371 }, { "epoch": 0.5368965580273091, "grad_norm": 6.756423681184469, "learning_rate": 4.645952223380009e-06, "loss": 17.6017, "step": 29372 }, { "epoch": 0.5369148372237557, "grad_norm": 5.657934085083988, "learning_rate": 4.645656955495111e-06, "loss": 17.3026, "step": 29373 }, { "epoch": 0.5369331164202021, "grad_norm": 5.797227153979704, "learning_rate": 4.645361688852153e-06, "loss": 17.2116, "step": 29374 }, { "epoch": 0.5369513956166487, "grad_norm": 7.409386454521562, "learning_rate": 4.645066423452171e-06, "loss": 17.8287, "step": 29375 }, { "epoch": 0.5369696748130952, "grad_norm": 6.785129146466672, "learning_rate": 4.644771159296203e-06, "loss": 17.5434, "step": 29376 }, { "epoch": 0.5369879540095417, "grad_norm": 7.335645828702823, "learning_rate": 4.644475896385278e-06, "loss": 17.5533, "step": 29377 }, { "epoch": 0.5370062332059883, "grad_norm": 5.843956419485704, "learning_rate": 4.644180634720437e-06, "loss": 17.1225, "step": 29378 }, { "epoch": 0.5370245124024348, "grad_norm": 5.8218643680485815, "learning_rate": 4.643885374302711e-06, "loss": 17.4456, "step": 29379 }, { "epoch": 0.5370427915988814, "grad_norm": 6.76200635501882, "learning_rate": 4.643590115133135e-06, "loss": 17.3823, "step": 29380 }, { "epoch": 0.5370610707953278, "grad_norm": 5.566491416400289, "learning_rate": 4.643294857212746e-06, "loss": 17.1918, "step": 29381 }, { "epoch": 0.5370793499917743, "grad_norm": 6.260268604229605, "learning_rate": 4.642999600542576e-06, "loss": 17.474, "step": 29382 }, { "epoch": 0.5370976291882209, "grad_norm": 6.0218129939675835, "learning_rate": 4.642704345123664e-06, "loss": 17.4573, "step": 29383 }, { "epoch": 0.5371159083846674, "grad_norm": 6.530548656154416, "learning_rate": 4.64240909095704e-06, "loss": 17.5706, "step": 29384 }, { "epoch": 0.537134187581114, "grad_norm": 6.071058171613956, "learning_rate": 4.642113838043741e-06, "loss": 17.3123, "step": 29385 }, { "epoch": 0.5371524667775605, "grad_norm": 5.5482326896115595, "learning_rate": 4.641818586384803e-06, "loss": 17.1153, "step": 29386 }, { "epoch": 0.5371707459740069, "grad_norm": 6.211957718250558, "learning_rate": 4.64152333598126e-06, "loss": 17.2345, "step": 29387 }, { "epoch": 0.5371890251704535, "grad_norm": 5.699175348587522, "learning_rate": 4.641228086834146e-06, "loss": 17.0164, "step": 29388 }, { "epoch": 0.5372073043669, "grad_norm": 6.010587503966536, "learning_rate": 4.640932838944497e-06, "loss": 17.2308, "step": 29389 }, { "epoch": 0.5372255835633466, "grad_norm": 6.244570599895282, "learning_rate": 4.640637592313347e-06, "loss": 17.351, "step": 29390 }, { "epoch": 0.5372438627597931, "grad_norm": 6.78333587828515, "learning_rate": 4.64034234694173e-06, "loss": 17.6876, "step": 29391 }, { "epoch": 0.5372621419562396, "grad_norm": 6.678429894941688, "learning_rate": 4.640047102830683e-06, "loss": 17.5395, "step": 29392 }, { "epoch": 0.5372804211526861, "grad_norm": 7.160387268459865, "learning_rate": 4.63975185998124e-06, "loss": 17.7461, "step": 29393 }, { "epoch": 0.5372987003491326, "grad_norm": 6.526628635386126, "learning_rate": 4.639456618394434e-06, "loss": 17.4234, "step": 29394 }, { "epoch": 0.5373169795455792, "grad_norm": 5.693856248389524, "learning_rate": 4.639161378071303e-06, "loss": 17.178, "step": 29395 }, { "epoch": 0.5373352587420257, "grad_norm": 6.264029564666308, "learning_rate": 4.638866139012879e-06, "loss": 17.2177, "step": 29396 }, { "epoch": 0.5373535379384722, "grad_norm": 5.949745882098424, "learning_rate": 4.638570901220197e-06, "loss": 17.4326, "step": 29397 }, { "epoch": 0.5373718171349188, "grad_norm": 8.495532980691907, "learning_rate": 4.638275664694295e-06, "loss": 17.5507, "step": 29398 }, { "epoch": 0.5373900963313653, "grad_norm": 8.76437569547379, "learning_rate": 4.637980429436203e-06, "loss": 18.227, "step": 29399 }, { "epoch": 0.5374083755278118, "grad_norm": 5.973602951977648, "learning_rate": 4.63768519544696e-06, "loss": 17.5578, "step": 29400 }, { "epoch": 0.5374266547242583, "grad_norm": 4.959636867896732, "learning_rate": 4.637389962727598e-06, "loss": 16.8575, "step": 29401 }, { "epoch": 0.5374449339207048, "grad_norm": 6.858806662125633, "learning_rate": 4.637094731279152e-06, "loss": 18.0463, "step": 29402 }, { "epoch": 0.5374632131171514, "grad_norm": 5.708286998701389, "learning_rate": 4.6367995011026596e-06, "loss": 17.1434, "step": 29403 }, { "epoch": 0.5374814923135979, "grad_norm": 6.168848984205421, "learning_rate": 4.636504272199153e-06, "loss": 17.3139, "step": 29404 }, { "epoch": 0.5374997715100445, "grad_norm": 5.401303803194577, "learning_rate": 4.636209044569665e-06, "loss": 17.1362, "step": 29405 }, { "epoch": 0.537518050706491, "grad_norm": 7.160912187803219, "learning_rate": 4.635913818215234e-06, "loss": 17.6565, "step": 29406 }, { "epoch": 0.5375363299029374, "grad_norm": 6.024653480766796, "learning_rate": 4.635618593136896e-06, "loss": 17.083, "step": 29407 }, { "epoch": 0.537554609099384, "grad_norm": 6.465684858822787, "learning_rate": 4.63532336933568e-06, "loss": 17.5774, "step": 29408 }, { "epoch": 0.5375728882958305, "grad_norm": 6.705385820450828, "learning_rate": 4.6350281468126255e-06, "loss": 17.3118, "step": 29409 }, { "epoch": 0.5375911674922771, "grad_norm": 6.960883044363678, "learning_rate": 4.6347329255687654e-06, "loss": 17.7671, "step": 29410 }, { "epoch": 0.5376094466887236, "grad_norm": 5.949739040229357, "learning_rate": 4.634437705605133e-06, "loss": 17.1729, "step": 29411 }, { "epoch": 0.53762772588517, "grad_norm": 8.008650712910079, "learning_rate": 4.634142486922767e-06, "loss": 18.3337, "step": 29412 }, { "epoch": 0.5376460050816166, "grad_norm": 6.381688136674593, "learning_rate": 4.633847269522698e-06, "loss": 17.7893, "step": 29413 }, { "epoch": 0.5376642842780631, "grad_norm": 6.129539403024211, "learning_rate": 4.633552053405965e-06, "loss": 17.2959, "step": 29414 }, { "epoch": 0.5376825634745097, "grad_norm": 6.8010730349317345, "learning_rate": 4.6332568385735985e-06, "loss": 17.4192, "step": 29415 }, { "epoch": 0.5377008426709562, "grad_norm": 7.749552261047914, "learning_rate": 4.632961625026634e-06, "loss": 17.8023, "step": 29416 }, { "epoch": 0.5377191218674027, "grad_norm": 7.227173298445376, "learning_rate": 4.632666412766109e-06, "loss": 17.5013, "step": 29417 }, { "epoch": 0.5377374010638493, "grad_norm": 6.589930903699898, "learning_rate": 4.632371201793057e-06, "loss": 18.0213, "step": 29418 }, { "epoch": 0.5377556802602957, "grad_norm": 7.1108711693286955, "learning_rate": 4.63207599210851e-06, "loss": 17.7495, "step": 29419 }, { "epoch": 0.5377739594567423, "grad_norm": 5.756677243444879, "learning_rate": 4.6317807837135055e-06, "loss": 17.1539, "step": 29420 }, { "epoch": 0.5377922386531888, "grad_norm": 5.851982032084623, "learning_rate": 4.631485576609078e-06, "loss": 17.285, "step": 29421 }, { "epoch": 0.5378105178496353, "grad_norm": 5.69020733797831, "learning_rate": 4.63119037079626e-06, "loss": 17.2473, "step": 29422 }, { "epoch": 0.5378287970460819, "grad_norm": 6.562588678630628, "learning_rate": 4.63089516627609e-06, "loss": 17.6663, "step": 29423 }, { "epoch": 0.5378470762425284, "grad_norm": 5.940777519911705, "learning_rate": 4.630599963049599e-06, "loss": 17.0966, "step": 29424 }, { "epoch": 0.537865355438975, "grad_norm": 5.076806333792547, "learning_rate": 4.630304761117822e-06, "loss": 17.0302, "step": 29425 }, { "epoch": 0.5378836346354214, "grad_norm": 5.255430406037884, "learning_rate": 4.630009560481797e-06, "loss": 17.2251, "step": 29426 }, { "epoch": 0.5379019138318679, "grad_norm": 7.216822730025074, "learning_rate": 4.629714361142555e-06, "loss": 17.9002, "step": 29427 }, { "epoch": 0.5379201930283145, "grad_norm": 6.092894704779971, "learning_rate": 4.629419163101133e-06, "loss": 17.4818, "step": 29428 }, { "epoch": 0.537938472224761, "grad_norm": 5.758073355618983, "learning_rate": 4.629123966358564e-06, "loss": 17.1897, "step": 29429 }, { "epoch": 0.5379567514212075, "grad_norm": 7.515799397134103, "learning_rate": 4.628828770915882e-06, "loss": 17.6309, "step": 29430 }, { "epoch": 0.5379750306176541, "grad_norm": 6.877101783486477, "learning_rate": 4.628533576774125e-06, "loss": 17.9767, "step": 29431 }, { "epoch": 0.5379933098141005, "grad_norm": 5.612593666034816, "learning_rate": 4.628238383934326e-06, "loss": 17.1924, "step": 29432 }, { "epoch": 0.5380115890105471, "grad_norm": 6.022177368941253, "learning_rate": 4.627943192397517e-06, "loss": 17.4696, "step": 29433 }, { "epoch": 0.5380298682069936, "grad_norm": 6.01082937675309, "learning_rate": 4.627648002164736e-06, "loss": 17.101, "step": 29434 }, { "epoch": 0.5380481474034401, "grad_norm": 6.908720313695068, "learning_rate": 4.627352813237017e-06, "loss": 17.7684, "step": 29435 }, { "epoch": 0.5380664265998867, "grad_norm": 6.934726264491708, "learning_rate": 4.627057625615392e-06, "loss": 17.4942, "step": 29436 }, { "epoch": 0.5380847057963332, "grad_norm": 6.767730609440472, "learning_rate": 4.6267624393009e-06, "loss": 17.6882, "step": 29437 }, { "epoch": 0.5381029849927798, "grad_norm": 5.363970931806698, "learning_rate": 4.626467254294572e-06, "loss": 17.1753, "step": 29438 }, { "epoch": 0.5381212641892262, "grad_norm": 5.659846540324078, "learning_rate": 4.626172070597444e-06, "loss": 17.3222, "step": 29439 }, { "epoch": 0.5381395433856727, "grad_norm": 6.044542077164771, "learning_rate": 4.625876888210551e-06, "loss": 17.1571, "step": 29440 }, { "epoch": 0.5381578225821193, "grad_norm": 6.570344416048931, "learning_rate": 4.625581707134928e-06, "loss": 17.7369, "step": 29441 }, { "epoch": 0.5381761017785658, "grad_norm": 6.120390875854275, "learning_rate": 4.625286527371606e-06, "loss": 17.0608, "step": 29442 }, { "epoch": 0.5381943809750124, "grad_norm": 6.09306400427329, "learning_rate": 4.6249913489216245e-06, "loss": 17.3157, "step": 29443 }, { "epoch": 0.5382126601714589, "grad_norm": 6.790203034486313, "learning_rate": 4.624696171786012e-06, "loss": 17.7011, "step": 29444 }, { "epoch": 0.5382309393679053, "grad_norm": 7.639083428217476, "learning_rate": 4.624400995965812e-06, "loss": 17.3712, "step": 29445 }, { "epoch": 0.5382492185643519, "grad_norm": 7.516845162250205, "learning_rate": 4.6241058214620516e-06, "loss": 17.8654, "step": 29446 }, { "epoch": 0.5382674977607984, "grad_norm": 7.301650308646706, "learning_rate": 4.623810648275767e-06, "loss": 17.4821, "step": 29447 }, { "epoch": 0.538285776957245, "grad_norm": 7.0783371920980525, "learning_rate": 4.623515476407994e-06, "loss": 17.8513, "step": 29448 }, { "epoch": 0.5383040561536915, "grad_norm": 6.989071061146871, "learning_rate": 4.6232203058597676e-06, "loss": 17.5442, "step": 29449 }, { "epoch": 0.538322335350138, "grad_norm": 6.740616738139947, "learning_rate": 4.6229251366321195e-06, "loss": 17.6039, "step": 29450 }, { "epoch": 0.5383406145465846, "grad_norm": 6.055411461890058, "learning_rate": 4.622629968726087e-06, "loss": 17.2912, "step": 29451 }, { "epoch": 0.538358893743031, "grad_norm": 5.006914991409046, "learning_rate": 4.622334802142705e-06, "loss": 16.853, "step": 29452 }, { "epoch": 0.5383771729394776, "grad_norm": 5.945411939113464, "learning_rate": 4.622039636883004e-06, "loss": 17.2922, "step": 29453 }, { "epoch": 0.5383954521359241, "grad_norm": 6.997866624696268, "learning_rate": 4.6217444729480235e-06, "loss": 17.3213, "step": 29454 }, { "epoch": 0.5384137313323706, "grad_norm": 4.842755316183787, "learning_rate": 4.621449310338795e-06, "loss": 16.904, "step": 29455 }, { "epoch": 0.5384320105288172, "grad_norm": 6.44555934151634, "learning_rate": 4.621154149056353e-06, "loss": 17.318, "step": 29456 }, { "epoch": 0.5384502897252637, "grad_norm": 5.63938653182184, "learning_rate": 4.620858989101735e-06, "loss": 17.2163, "step": 29457 }, { "epoch": 0.5384685689217102, "grad_norm": 6.847603108257481, "learning_rate": 4.62056383047597e-06, "loss": 17.8226, "step": 29458 }, { "epoch": 0.5384868481181567, "grad_norm": 6.495474202642668, "learning_rate": 4.620268673180099e-06, "loss": 17.6462, "step": 29459 }, { "epoch": 0.5385051273146032, "grad_norm": 6.36912065411912, "learning_rate": 4.6199735172151525e-06, "loss": 17.4788, "step": 29460 }, { "epoch": 0.5385234065110498, "grad_norm": 7.569506157647184, "learning_rate": 4.619678362582164e-06, "loss": 17.3841, "step": 29461 }, { "epoch": 0.5385416857074963, "grad_norm": 6.13074048656206, "learning_rate": 4.619383209282173e-06, "loss": 17.304, "step": 29462 }, { "epoch": 0.5385599649039429, "grad_norm": 5.6353209803185536, "learning_rate": 4.61908805731621e-06, "loss": 17.044, "step": 29463 }, { "epoch": 0.5385782441003893, "grad_norm": 6.529993248223105, "learning_rate": 4.618792906685308e-06, "loss": 17.5979, "step": 29464 }, { "epoch": 0.5385965232968358, "grad_norm": 5.907159939389777, "learning_rate": 4.618497757390505e-06, "loss": 17.3092, "step": 29465 }, { "epoch": 0.5386148024932824, "grad_norm": 9.227590186808916, "learning_rate": 4.618202609432836e-06, "loss": 18.4866, "step": 29466 }, { "epoch": 0.5386330816897289, "grad_norm": 5.495784479179967, "learning_rate": 4.617907462813332e-06, "loss": 17.33, "step": 29467 }, { "epoch": 0.5386513608861755, "grad_norm": 6.004887540725572, "learning_rate": 4.6176123175330295e-06, "loss": 17.0819, "step": 29468 }, { "epoch": 0.538669640082622, "grad_norm": 6.377693620330323, "learning_rate": 4.617317173592963e-06, "loss": 17.5609, "step": 29469 }, { "epoch": 0.5386879192790685, "grad_norm": 7.296511044930412, "learning_rate": 4.617022030994165e-06, "loss": 17.7308, "step": 29470 }, { "epoch": 0.538706198475515, "grad_norm": 6.2670869074828355, "learning_rate": 4.616726889737673e-06, "loss": 17.5962, "step": 29471 }, { "epoch": 0.5387244776719615, "grad_norm": 6.273506710374839, "learning_rate": 4.616431749824519e-06, "loss": 17.3744, "step": 29472 }, { "epoch": 0.5387427568684081, "grad_norm": 8.194156052425928, "learning_rate": 4.61613661125574e-06, "loss": 18.4392, "step": 29473 }, { "epoch": 0.5387610360648546, "grad_norm": 5.372760051340595, "learning_rate": 4.615841474032368e-06, "loss": 17.0274, "step": 29474 }, { "epoch": 0.5387793152613011, "grad_norm": 7.082151687429085, "learning_rate": 4.615546338155438e-06, "loss": 17.681, "step": 29475 }, { "epoch": 0.5387975944577477, "grad_norm": 6.496208767191889, "learning_rate": 4.615251203625986e-06, "loss": 17.3553, "step": 29476 }, { "epoch": 0.5388158736541941, "grad_norm": 6.762368717166169, "learning_rate": 4.614956070445045e-06, "loss": 17.6863, "step": 29477 }, { "epoch": 0.5388341528506407, "grad_norm": 5.503882816305648, "learning_rate": 4.614660938613648e-06, "loss": 17.2541, "step": 29478 }, { "epoch": 0.5388524320470872, "grad_norm": 6.676343953056528, "learning_rate": 4.614365808132832e-06, "loss": 17.4198, "step": 29479 }, { "epoch": 0.5388707112435337, "grad_norm": 7.722256220464426, "learning_rate": 4.61407067900363e-06, "loss": 18.0793, "step": 29480 }, { "epoch": 0.5388889904399803, "grad_norm": 5.1380679544098715, "learning_rate": 4.613775551227076e-06, "loss": 16.9727, "step": 29481 }, { "epoch": 0.5389072696364268, "grad_norm": 6.9937525316139135, "learning_rate": 4.613480424804207e-06, "loss": 17.459, "step": 29482 }, { "epoch": 0.5389255488328734, "grad_norm": 6.575303936009218, "learning_rate": 4.613185299736055e-06, "loss": 17.5852, "step": 29483 }, { "epoch": 0.5389438280293198, "grad_norm": 6.0547981093976695, "learning_rate": 4.612890176023654e-06, "loss": 17.2239, "step": 29484 }, { "epoch": 0.5389621072257663, "grad_norm": 6.979537515178285, "learning_rate": 4.61259505366804e-06, "loss": 17.7887, "step": 29485 }, { "epoch": 0.5389803864222129, "grad_norm": 5.574405040408401, "learning_rate": 4.612299932670248e-06, "loss": 17.0227, "step": 29486 }, { "epoch": 0.5389986656186594, "grad_norm": 5.957891024767679, "learning_rate": 4.6120048130313085e-06, "loss": 17.4767, "step": 29487 }, { "epoch": 0.539016944815106, "grad_norm": 6.508454385681888, "learning_rate": 4.611709694752259e-06, "loss": 17.4061, "step": 29488 }, { "epoch": 0.5390352240115525, "grad_norm": 7.952240112241982, "learning_rate": 4.611414577834134e-06, "loss": 18.545, "step": 29489 }, { "epoch": 0.5390535032079989, "grad_norm": 6.074578112519142, "learning_rate": 4.611119462277968e-06, "loss": 17.3847, "step": 29490 }, { "epoch": 0.5390717824044455, "grad_norm": 7.042071628695794, "learning_rate": 4.6108243480847945e-06, "loss": 17.6878, "step": 29491 }, { "epoch": 0.539090061600892, "grad_norm": 7.742850166745321, "learning_rate": 4.610529235255646e-06, "loss": 17.825, "step": 29492 }, { "epoch": 0.5391083407973386, "grad_norm": 7.16135930848796, "learning_rate": 4.61023412379156e-06, "loss": 17.7906, "step": 29493 }, { "epoch": 0.5391266199937851, "grad_norm": 6.872807704777655, "learning_rate": 4.60993901369357e-06, "loss": 17.2553, "step": 29494 }, { "epoch": 0.5391448991902316, "grad_norm": 7.036621368448833, "learning_rate": 4.609643904962709e-06, "loss": 17.7991, "step": 29495 }, { "epoch": 0.5391631783866782, "grad_norm": 6.140199390986187, "learning_rate": 4.609348797600013e-06, "loss": 17.288, "step": 29496 }, { "epoch": 0.5391814575831246, "grad_norm": 5.393724906218478, "learning_rate": 4.609053691606516e-06, "loss": 17.3026, "step": 29497 }, { "epoch": 0.5391997367795711, "grad_norm": 6.618458585201584, "learning_rate": 4.608758586983249e-06, "loss": 17.4113, "step": 29498 }, { "epoch": 0.5392180159760177, "grad_norm": 6.38221382609543, "learning_rate": 4.608463483731253e-06, "loss": 17.3821, "step": 29499 }, { "epoch": 0.5392362951724642, "grad_norm": 6.160402299489039, "learning_rate": 4.6081683818515575e-06, "loss": 17.4905, "step": 29500 }, { "epoch": 0.5392545743689108, "grad_norm": 6.342634907180759, "learning_rate": 4.607873281345195e-06, "loss": 17.7497, "step": 29501 }, { "epoch": 0.5392728535653573, "grad_norm": 6.460548588368639, "learning_rate": 4.607578182213205e-06, "loss": 17.7083, "step": 29502 }, { "epoch": 0.5392911327618037, "grad_norm": 5.734833076468752, "learning_rate": 4.6072830844566185e-06, "loss": 17.1418, "step": 29503 }, { "epoch": 0.5393094119582503, "grad_norm": 7.359787517858874, "learning_rate": 4.606987988076472e-06, "loss": 17.7528, "step": 29504 }, { "epoch": 0.5393276911546968, "grad_norm": 5.458341472156394, "learning_rate": 4.606692893073799e-06, "loss": 17.1362, "step": 29505 }, { "epoch": 0.5393459703511434, "grad_norm": 6.920707979608791, "learning_rate": 4.606397799449631e-06, "loss": 17.5392, "step": 29506 }, { "epoch": 0.5393642495475899, "grad_norm": 8.00182754483115, "learning_rate": 4.606102707205006e-06, "loss": 18.1148, "step": 29507 }, { "epoch": 0.5393825287440364, "grad_norm": 6.679142286864438, "learning_rate": 4.605807616340957e-06, "loss": 17.8182, "step": 29508 }, { "epoch": 0.539400807940483, "grad_norm": 5.970806665365426, "learning_rate": 4.605512526858516e-06, "loss": 17.5121, "step": 29509 }, { "epoch": 0.5394190871369294, "grad_norm": 6.117168140213418, "learning_rate": 4.6052174387587225e-06, "loss": 17.2834, "step": 29510 }, { "epoch": 0.539437366333376, "grad_norm": 6.16289121384958, "learning_rate": 4.604922352042606e-06, "loss": 17.5368, "step": 29511 }, { "epoch": 0.5394556455298225, "grad_norm": 6.110081842343696, "learning_rate": 4.604627266711201e-06, "loss": 17.358, "step": 29512 }, { "epoch": 0.539473924726269, "grad_norm": 7.842125660684712, "learning_rate": 4.604332182765546e-06, "loss": 18.1485, "step": 29513 }, { "epoch": 0.5394922039227156, "grad_norm": 6.022275396307779, "learning_rate": 4.604037100206671e-06, "loss": 17.2176, "step": 29514 }, { "epoch": 0.539510483119162, "grad_norm": 6.711720674628798, "learning_rate": 4.6037420190356115e-06, "loss": 17.3717, "step": 29515 }, { "epoch": 0.5395287623156086, "grad_norm": 6.865020039902298, "learning_rate": 4.603446939253402e-06, "loss": 17.7045, "step": 29516 }, { "epoch": 0.5395470415120551, "grad_norm": 7.02628139760289, "learning_rate": 4.603151860861077e-06, "loss": 17.5875, "step": 29517 }, { "epoch": 0.5395653207085016, "grad_norm": 5.147306927893737, "learning_rate": 4.602856783859669e-06, "loss": 16.9374, "step": 29518 }, { "epoch": 0.5395835999049482, "grad_norm": 6.712076000484498, "learning_rate": 4.602561708250214e-06, "loss": 17.7712, "step": 29519 }, { "epoch": 0.5396018791013947, "grad_norm": 5.839047283790438, "learning_rate": 4.602266634033746e-06, "loss": 17.3353, "step": 29520 }, { "epoch": 0.5396201582978413, "grad_norm": 6.028358730601311, "learning_rate": 4.601971561211299e-06, "loss": 17.3756, "step": 29521 }, { "epoch": 0.5396384374942877, "grad_norm": 5.9702602537056535, "learning_rate": 4.601676489783908e-06, "loss": 17.4412, "step": 29522 }, { "epoch": 0.5396567166907342, "grad_norm": 6.274475804567916, "learning_rate": 4.601381419752604e-06, "loss": 17.3835, "step": 29523 }, { "epoch": 0.5396749958871808, "grad_norm": 5.56602335108596, "learning_rate": 4.601086351118425e-06, "loss": 17.0837, "step": 29524 }, { "epoch": 0.5396932750836273, "grad_norm": 7.041899583424272, "learning_rate": 4.600791283882405e-06, "loss": 17.4936, "step": 29525 }, { "epoch": 0.5397115542800739, "grad_norm": 8.125748311455663, "learning_rate": 4.6004962180455745e-06, "loss": 17.9327, "step": 29526 }, { "epoch": 0.5397298334765204, "grad_norm": 6.271850957278777, "learning_rate": 4.600201153608972e-06, "loss": 17.8437, "step": 29527 }, { "epoch": 0.5397481126729669, "grad_norm": 4.627354531910062, "learning_rate": 4.599906090573629e-06, "loss": 16.8228, "step": 29528 }, { "epoch": 0.5397663918694134, "grad_norm": 5.669684829349366, "learning_rate": 4.599611028940579e-06, "loss": 17.0739, "step": 29529 }, { "epoch": 0.5397846710658599, "grad_norm": 6.015815793001434, "learning_rate": 4.599315968710859e-06, "loss": 17.0334, "step": 29530 }, { "epoch": 0.5398029502623065, "grad_norm": 8.522527278046015, "learning_rate": 4.599020909885503e-06, "loss": 18.7189, "step": 29531 }, { "epoch": 0.539821229458753, "grad_norm": 6.110859752001126, "learning_rate": 4.598725852465542e-06, "loss": 17.3934, "step": 29532 }, { "epoch": 0.5398395086551995, "grad_norm": 5.547563674843797, "learning_rate": 4.5984307964520126e-06, "loss": 17.1769, "step": 29533 }, { "epoch": 0.5398577878516461, "grad_norm": 6.391420727342237, "learning_rate": 4.598135741845947e-06, "loss": 17.5392, "step": 29534 }, { "epoch": 0.5398760670480925, "grad_norm": 6.945385862846721, "learning_rate": 4.597840688648384e-06, "loss": 17.8386, "step": 29535 }, { "epoch": 0.5398943462445391, "grad_norm": 6.869928061516658, "learning_rate": 4.597545636860353e-06, "loss": 17.7396, "step": 29536 }, { "epoch": 0.5399126254409856, "grad_norm": 5.434576383237398, "learning_rate": 4.5972505864828885e-06, "loss": 17.191, "step": 29537 }, { "epoch": 0.5399309046374321, "grad_norm": 7.233550043779768, "learning_rate": 4.5969555375170265e-06, "loss": 17.5468, "step": 29538 }, { "epoch": 0.5399491838338787, "grad_norm": 5.927400998913646, "learning_rate": 4.596660489963802e-06, "loss": 17.2392, "step": 29539 }, { "epoch": 0.5399674630303252, "grad_norm": 5.336706399146603, "learning_rate": 4.596365443824244e-06, "loss": 17.0921, "step": 29540 }, { "epoch": 0.5399857422267718, "grad_norm": 6.997711522809032, "learning_rate": 4.596070399099394e-06, "loss": 17.6849, "step": 29541 }, { "epoch": 0.5400040214232182, "grad_norm": 6.9343088287611385, "learning_rate": 4.59577535579028e-06, "loss": 17.597, "step": 29542 }, { "epoch": 0.5400223006196647, "grad_norm": 5.499312390109808, "learning_rate": 4.5954803138979374e-06, "loss": 17.1526, "step": 29543 }, { "epoch": 0.5400405798161113, "grad_norm": 6.534126222472637, "learning_rate": 4.595185273423403e-06, "loss": 17.5575, "step": 29544 }, { "epoch": 0.5400588590125578, "grad_norm": 6.451302751067807, "learning_rate": 4.5948902343677095e-06, "loss": 17.812, "step": 29545 }, { "epoch": 0.5400771382090044, "grad_norm": 6.441432885491875, "learning_rate": 4.594595196731889e-06, "loss": 17.4155, "step": 29546 }, { "epoch": 0.5400954174054509, "grad_norm": 7.714083364880972, "learning_rate": 4.594300160516977e-06, "loss": 17.7388, "step": 29547 }, { "epoch": 0.5401136966018973, "grad_norm": 5.0077450325797015, "learning_rate": 4.594005125724009e-06, "loss": 17.1602, "step": 29548 }, { "epoch": 0.5401319757983439, "grad_norm": 6.250300872568342, "learning_rate": 4.593710092354017e-06, "loss": 17.3882, "step": 29549 }, { "epoch": 0.5401502549947904, "grad_norm": 6.109001953841035, "learning_rate": 4.593415060408037e-06, "loss": 17.2753, "step": 29550 }, { "epoch": 0.540168534191237, "grad_norm": 8.20635031160667, "learning_rate": 4.593120029887099e-06, "loss": 18.2797, "step": 29551 }, { "epoch": 0.5401868133876835, "grad_norm": 5.136627694104386, "learning_rate": 4.592825000792243e-06, "loss": 17.0054, "step": 29552 }, { "epoch": 0.54020509258413, "grad_norm": 6.9035171454661315, "learning_rate": 4.592529973124499e-06, "loss": 18.0783, "step": 29553 }, { "epoch": 0.5402233717805766, "grad_norm": 6.070774784382692, "learning_rate": 4.5922349468849014e-06, "loss": 17.3955, "step": 29554 }, { "epoch": 0.540241650977023, "grad_norm": 5.764280822624456, "learning_rate": 4.591939922074486e-06, "loss": 17.1105, "step": 29555 }, { "epoch": 0.5402599301734696, "grad_norm": 5.826092072206386, "learning_rate": 4.591644898694286e-06, "loss": 17.263, "step": 29556 }, { "epoch": 0.5402782093699161, "grad_norm": 7.087756919193326, "learning_rate": 4.591349876745334e-06, "loss": 17.7256, "step": 29557 }, { "epoch": 0.5402964885663626, "grad_norm": 7.1360926009157115, "learning_rate": 4.591054856228666e-06, "loss": 17.9364, "step": 29558 }, { "epoch": 0.5403147677628092, "grad_norm": 6.323440964848422, "learning_rate": 4.590759837145316e-06, "loss": 17.5094, "step": 29559 }, { "epoch": 0.5403330469592557, "grad_norm": 6.411672834709725, "learning_rate": 4.5904648194963155e-06, "loss": 17.5399, "step": 29560 }, { "epoch": 0.5403513261557022, "grad_norm": 6.33249450682044, "learning_rate": 4.590169803282701e-06, "loss": 17.3937, "step": 29561 }, { "epoch": 0.5403696053521487, "grad_norm": 6.744071758605902, "learning_rate": 4.589874788505507e-06, "loss": 17.7377, "step": 29562 }, { "epoch": 0.5403878845485952, "grad_norm": 5.947256453709719, "learning_rate": 4.589579775165764e-06, "loss": 17.2556, "step": 29563 }, { "epoch": 0.5404061637450418, "grad_norm": 5.562032210968999, "learning_rate": 4.589284763264511e-06, "loss": 17.0514, "step": 29564 }, { "epoch": 0.5404244429414883, "grad_norm": 7.9472774633742755, "learning_rate": 4.588989752802776e-06, "loss": 18.639, "step": 29565 }, { "epoch": 0.5404427221379348, "grad_norm": 7.17686691594272, "learning_rate": 4.588694743781598e-06, "loss": 17.8866, "step": 29566 }, { "epoch": 0.5404610013343814, "grad_norm": 8.040124402631278, "learning_rate": 4.58839973620201e-06, "loss": 18.0178, "step": 29567 }, { "epoch": 0.5404792805308278, "grad_norm": 7.58091503723088, "learning_rate": 4.588104730065043e-06, "loss": 17.788, "step": 29568 }, { "epoch": 0.5404975597272744, "grad_norm": 5.396101792360515, "learning_rate": 4.587809725371736e-06, "loss": 17.2398, "step": 29569 }, { "epoch": 0.5405158389237209, "grad_norm": 5.288060533384774, "learning_rate": 4.587514722123118e-06, "loss": 16.9973, "step": 29570 }, { "epoch": 0.5405341181201674, "grad_norm": 7.491336673450522, "learning_rate": 4.587219720320225e-06, "loss": 18.4791, "step": 29571 }, { "epoch": 0.540552397316614, "grad_norm": 6.166980825097523, "learning_rate": 4.586924719964093e-06, "loss": 17.4704, "step": 29572 }, { "epoch": 0.5405706765130605, "grad_norm": 6.26724829030317, "learning_rate": 4.586629721055753e-06, "loss": 17.5774, "step": 29573 }, { "epoch": 0.540588955709507, "grad_norm": 5.9488813663641515, "learning_rate": 4.5863347235962395e-06, "loss": 17.3036, "step": 29574 }, { "epoch": 0.5406072349059535, "grad_norm": 11.248008778979802, "learning_rate": 4.586039727586588e-06, "loss": 18.5698, "step": 29575 }, { "epoch": 0.5406255141024, "grad_norm": 6.112652381446217, "learning_rate": 4.585744733027831e-06, "loss": 17.6378, "step": 29576 }, { "epoch": 0.5406437932988466, "grad_norm": 5.553440615939625, "learning_rate": 4.585449739921001e-06, "loss": 17.2027, "step": 29577 }, { "epoch": 0.5406620724952931, "grad_norm": 5.929681792129342, "learning_rate": 4.585154748267135e-06, "loss": 17.3732, "step": 29578 }, { "epoch": 0.5406803516917397, "grad_norm": 7.994111039780171, "learning_rate": 4.584859758067267e-06, "loss": 18.2096, "step": 29579 }, { "epoch": 0.5406986308881861, "grad_norm": 5.843295529097822, "learning_rate": 4.584564769322426e-06, "loss": 17.3366, "step": 29580 }, { "epoch": 0.5407169100846326, "grad_norm": 6.435143178394292, "learning_rate": 4.584269782033654e-06, "loss": 17.7412, "step": 29581 }, { "epoch": 0.5407351892810792, "grad_norm": 5.3340548375901955, "learning_rate": 4.583974796201976e-06, "loss": 17.1549, "step": 29582 }, { "epoch": 0.5407534684775257, "grad_norm": 5.808047047662473, "learning_rate": 4.583679811828432e-06, "loss": 17.5286, "step": 29583 }, { "epoch": 0.5407717476739723, "grad_norm": 7.396367092759821, "learning_rate": 4.583384828914056e-06, "loss": 17.3303, "step": 29584 }, { "epoch": 0.5407900268704188, "grad_norm": 6.824111540527763, "learning_rate": 4.5830898474598765e-06, "loss": 17.6233, "step": 29585 }, { "epoch": 0.5408083060668653, "grad_norm": 7.361454099315705, "learning_rate": 4.582794867466933e-06, "loss": 17.6935, "step": 29586 }, { "epoch": 0.5408265852633118, "grad_norm": 5.073500489118596, "learning_rate": 4.582499888936258e-06, "loss": 16.9941, "step": 29587 }, { "epoch": 0.5408448644597583, "grad_norm": 6.456710084913706, "learning_rate": 4.582204911868882e-06, "loss": 17.5711, "step": 29588 }, { "epoch": 0.5408631436562049, "grad_norm": 6.391964391265243, "learning_rate": 4.5819099362658444e-06, "loss": 17.6913, "step": 29589 }, { "epoch": 0.5408814228526514, "grad_norm": 6.4255955808156155, "learning_rate": 4.581614962128177e-06, "loss": 17.6093, "step": 29590 }, { "epoch": 0.5408997020490979, "grad_norm": 5.709049731155068, "learning_rate": 4.581319989456909e-06, "loss": 17.1791, "step": 29591 }, { "epoch": 0.5409179812455445, "grad_norm": 5.241522152156904, "learning_rate": 4.58102501825308e-06, "loss": 17.0085, "step": 29592 }, { "epoch": 0.540936260441991, "grad_norm": 7.38524211310844, "learning_rate": 4.580730048517722e-06, "loss": 18.9426, "step": 29593 }, { "epoch": 0.5409545396384375, "grad_norm": 5.41977539133444, "learning_rate": 4.580435080251868e-06, "loss": 17.2418, "step": 29594 }, { "epoch": 0.540972818834884, "grad_norm": 8.394582266396927, "learning_rate": 4.580140113456555e-06, "loss": 17.9029, "step": 29595 }, { "epoch": 0.5409910980313305, "grad_norm": 7.180813621283885, "learning_rate": 4.579845148132811e-06, "loss": 18.3101, "step": 29596 }, { "epoch": 0.5410093772277771, "grad_norm": 7.302615648042981, "learning_rate": 4.579550184281676e-06, "loss": 17.5187, "step": 29597 }, { "epoch": 0.5410276564242236, "grad_norm": 5.653256780254672, "learning_rate": 4.57925522190418e-06, "loss": 17.3435, "step": 29598 }, { "epoch": 0.5410459356206702, "grad_norm": 5.184510598244916, "learning_rate": 4.578960261001357e-06, "loss": 17.0471, "step": 29599 }, { "epoch": 0.5410642148171166, "grad_norm": 5.526176162467872, "learning_rate": 4.578665301574244e-06, "loss": 17.2414, "step": 29600 }, { "epoch": 0.5410824940135631, "grad_norm": 8.039703135656477, "learning_rate": 4.578370343623872e-06, "loss": 18.6114, "step": 29601 }, { "epoch": 0.5411007732100097, "grad_norm": 5.378889197527293, "learning_rate": 4.578075387151273e-06, "loss": 17.0587, "step": 29602 }, { "epoch": 0.5411190524064562, "grad_norm": 7.238396941952168, "learning_rate": 4.577780432157486e-06, "loss": 17.8847, "step": 29603 }, { "epoch": 0.5411373316029028, "grad_norm": 6.009710933524192, "learning_rate": 4.5774854786435424e-06, "loss": 17.2457, "step": 29604 }, { "epoch": 0.5411556107993493, "grad_norm": 6.311236044204959, "learning_rate": 4.5771905266104724e-06, "loss": 17.2449, "step": 29605 }, { "epoch": 0.5411738899957957, "grad_norm": 6.541790251161015, "learning_rate": 4.5768955760593145e-06, "loss": 17.2919, "step": 29606 }, { "epoch": 0.5411921691922423, "grad_norm": 5.728177810415312, "learning_rate": 4.576600626991102e-06, "loss": 17.1003, "step": 29607 }, { "epoch": 0.5412104483886888, "grad_norm": 6.1367762599334, "learning_rate": 4.576305679406866e-06, "loss": 17.178, "step": 29608 }, { "epoch": 0.5412287275851354, "grad_norm": 8.141131574610736, "learning_rate": 4.576010733307643e-06, "loss": 18.2822, "step": 29609 }, { "epoch": 0.5412470067815819, "grad_norm": 6.189202883977985, "learning_rate": 4.5757157886944656e-06, "loss": 17.57, "step": 29610 }, { "epoch": 0.5412652859780284, "grad_norm": 4.961156748861543, "learning_rate": 4.5754208455683654e-06, "loss": 16.8882, "step": 29611 }, { "epoch": 0.541283565174475, "grad_norm": 6.201245438813038, "learning_rate": 4.575125903930381e-06, "loss": 17.3062, "step": 29612 }, { "epoch": 0.5413018443709214, "grad_norm": 6.7621821627944705, "learning_rate": 4.574830963781541e-06, "loss": 17.5588, "step": 29613 }, { "epoch": 0.541320123567368, "grad_norm": 6.641660262178539, "learning_rate": 4.574536025122883e-06, "loss": 17.7576, "step": 29614 }, { "epoch": 0.5413384027638145, "grad_norm": 6.117000832706429, "learning_rate": 4.5742410879554394e-06, "loss": 17.4642, "step": 29615 }, { "epoch": 0.541356681960261, "grad_norm": 6.418354361601318, "learning_rate": 4.573946152280243e-06, "loss": 17.7535, "step": 29616 }, { "epoch": 0.5413749611567076, "grad_norm": 8.537856998233808, "learning_rate": 4.573651218098329e-06, "loss": 18.1927, "step": 29617 }, { "epoch": 0.5413932403531541, "grad_norm": 6.820547664460975, "learning_rate": 4.573356285410732e-06, "loss": 17.5252, "step": 29618 }, { "epoch": 0.5414115195496007, "grad_norm": 5.694230061557964, "learning_rate": 4.573061354218481e-06, "loss": 17.1256, "step": 29619 }, { "epoch": 0.5414297987460471, "grad_norm": 6.4974202471643245, "learning_rate": 4.572766424522614e-06, "loss": 17.438, "step": 29620 }, { "epoch": 0.5414480779424936, "grad_norm": 6.126276147189317, "learning_rate": 4.572471496324166e-06, "loss": 17.4765, "step": 29621 }, { "epoch": 0.5414663571389402, "grad_norm": 6.386612248283866, "learning_rate": 4.572176569624165e-06, "loss": 17.4464, "step": 29622 }, { "epoch": 0.5414846363353867, "grad_norm": 5.556163113989534, "learning_rate": 4.5718816444236504e-06, "loss": 17.147, "step": 29623 }, { "epoch": 0.5415029155318333, "grad_norm": 4.649621013264692, "learning_rate": 4.571586720723652e-06, "loss": 16.7444, "step": 29624 }, { "epoch": 0.5415211947282798, "grad_norm": 4.174854077343063, "learning_rate": 4.571291798525205e-06, "loss": 16.7033, "step": 29625 }, { "epoch": 0.5415394739247262, "grad_norm": 6.492713530810022, "learning_rate": 4.570996877829345e-06, "loss": 17.4221, "step": 29626 }, { "epoch": 0.5415577531211728, "grad_norm": 6.505369375649434, "learning_rate": 4.570701958637101e-06, "loss": 17.7729, "step": 29627 }, { "epoch": 0.5415760323176193, "grad_norm": 5.705956036459067, "learning_rate": 4.570407040949511e-06, "loss": 17.4168, "step": 29628 }, { "epoch": 0.5415943115140659, "grad_norm": 5.923482131143591, "learning_rate": 4.570112124767607e-06, "loss": 17.2764, "step": 29629 }, { "epoch": 0.5416125907105124, "grad_norm": 7.36158538602033, "learning_rate": 4.569817210092421e-06, "loss": 18.0458, "step": 29630 }, { "epoch": 0.5416308699069589, "grad_norm": 5.716861240297323, "learning_rate": 4.569522296924991e-06, "loss": 17.3219, "step": 29631 }, { "epoch": 0.5416491491034054, "grad_norm": 5.9882796157639735, "learning_rate": 4.569227385266347e-06, "loss": 17.3857, "step": 29632 }, { "epoch": 0.5416674282998519, "grad_norm": 5.289952764953291, "learning_rate": 4.568932475117522e-06, "loss": 17.1143, "step": 29633 }, { "epoch": 0.5416857074962984, "grad_norm": 4.618175951607197, "learning_rate": 4.568637566479553e-06, "loss": 16.8169, "step": 29634 }, { "epoch": 0.541703986692745, "grad_norm": 6.691585229276231, "learning_rate": 4.5683426593534726e-06, "loss": 17.6094, "step": 29635 }, { "epoch": 0.5417222658891915, "grad_norm": 7.19933157188703, "learning_rate": 4.568047753740311e-06, "loss": 17.1854, "step": 29636 }, { "epoch": 0.5417405450856381, "grad_norm": 6.418557076094551, "learning_rate": 4.5677528496411075e-06, "loss": 17.4957, "step": 29637 }, { "epoch": 0.5417588242820846, "grad_norm": 5.740394779073838, "learning_rate": 4.567457947056892e-06, "loss": 17.2411, "step": 29638 }, { "epoch": 0.541777103478531, "grad_norm": 7.8082516972894975, "learning_rate": 4.567163045988696e-06, "loss": 17.9983, "step": 29639 }, { "epoch": 0.5417953826749776, "grad_norm": 6.868368245695889, "learning_rate": 4.56686814643756e-06, "loss": 17.6248, "step": 29640 }, { "epoch": 0.5418136618714241, "grad_norm": 5.710909750789169, "learning_rate": 4.56657324840451e-06, "loss": 16.9952, "step": 29641 }, { "epoch": 0.5418319410678707, "grad_norm": 5.860529055602299, "learning_rate": 4.566278351890586e-06, "loss": 17.2877, "step": 29642 }, { "epoch": 0.5418502202643172, "grad_norm": 6.040461071846872, "learning_rate": 4.565983456896818e-06, "loss": 17.5136, "step": 29643 }, { "epoch": 0.5418684994607637, "grad_norm": 6.252320615760492, "learning_rate": 4.5656885634242384e-06, "loss": 17.3288, "step": 29644 }, { "epoch": 0.5418867786572102, "grad_norm": 6.898618929717745, "learning_rate": 4.565393671473885e-06, "loss": 17.4735, "step": 29645 }, { "epoch": 0.5419050578536567, "grad_norm": 6.51677299660067, "learning_rate": 4.565098781046789e-06, "loss": 17.3316, "step": 29646 }, { "epoch": 0.5419233370501033, "grad_norm": 5.734625964188466, "learning_rate": 4.564803892143983e-06, "loss": 17.1953, "step": 29647 }, { "epoch": 0.5419416162465498, "grad_norm": 6.13569454927733, "learning_rate": 4.564509004766502e-06, "loss": 17.1181, "step": 29648 }, { "epoch": 0.5419598954429963, "grad_norm": 7.470060073143754, "learning_rate": 4.56421411891538e-06, "loss": 18.0188, "step": 29649 }, { "epoch": 0.5419781746394429, "grad_norm": 8.220575278129363, "learning_rate": 4.563919234591648e-06, "loss": 17.7823, "step": 29650 }, { "epoch": 0.5419964538358893, "grad_norm": 7.211301947036379, "learning_rate": 4.563624351796342e-06, "loss": 17.3246, "step": 29651 }, { "epoch": 0.5420147330323359, "grad_norm": 6.714271685316648, "learning_rate": 4.563329470530496e-06, "loss": 17.5986, "step": 29652 }, { "epoch": 0.5420330122287824, "grad_norm": 6.849701657478895, "learning_rate": 4.56303459079514e-06, "loss": 17.3423, "step": 29653 }, { "epoch": 0.5420512914252289, "grad_norm": 5.716490875272969, "learning_rate": 4.562739712591312e-06, "loss": 17.4247, "step": 29654 }, { "epoch": 0.5420695706216755, "grad_norm": 5.144277181911669, "learning_rate": 4.562444835920043e-06, "loss": 17.1177, "step": 29655 }, { "epoch": 0.542087849818122, "grad_norm": 9.49489993659656, "learning_rate": 4.562149960782364e-06, "loss": 18.5251, "step": 29656 }, { "epoch": 0.5421061290145686, "grad_norm": 6.635326246513486, "learning_rate": 4.561855087179315e-06, "loss": 17.4825, "step": 29657 }, { "epoch": 0.542124408211015, "grad_norm": 7.308762417651992, "learning_rate": 4.561560215111923e-06, "loss": 17.7194, "step": 29658 }, { "epoch": 0.5421426874074615, "grad_norm": 7.508366887085561, "learning_rate": 4.5612653445812276e-06, "loss": 18.2243, "step": 29659 }, { "epoch": 0.5421609666039081, "grad_norm": 6.593115705002529, "learning_rate": 4.560970475588257e-06, "loss": 17.4839, "step": 29660 }, { "epoch": 0.5421792458003546, "grad_norm": 5.609410387454165, "learning_rate": 4.560675608134046e-06, "loss": 17.2127, "step": 29661 }, { "epoch": 0.5421975249968012, "grad_norm": 7.128644493469324, "learning_rate": 4.560380742219632e-06, "loss": 17.7569, "step": 29662 }, { "epoch": 0.5422158041932477, "grad_norm": 5.599985212979389, "learning_rate": 4.560085877846044e-06, "loss": 17.2181, "step": 29663 }, { "epoch": 0.5422340833896941, "grad_norm": 6.903808158906843, "learning_rate": 4.559791015014315e-06, "loss": 17.8935, "step": 29664 }, { "epoch": 0.5422523625861407, "grad_norm": 7.128702155523251, "learning_rate": 4.559496153725482e-06, "loss": 17.8018, "step": 29665 }, { "epoch": 0.5422706417825872, "grad_norm": 6.015423562514056, "learning_rate": 4.559201293980577e-06, "loss": 17.2447, "step": 29666 }, { "epoch": 0.5422889209790338, "grad_norm": 7.629992786007098, "learning_rate": 4.558906435780631e-06, "loss": 17.7983, "step": 29667 }, { "epoch": 0.5423072001754803, "grad_norm": 7.123557674696467, "learning_rate": 4.558611579126683e-06, "loss": 17.6898, "step": 29668 }, { "epoch": 0.5423254793719268, "grad_norm": 5.604675782370979, "learning_rate": 4.558316724019761e-06, "loss": 17.2348, "step": 29669 }, { "epoch": 0.5423437585683734, "grad_norm": 7.162854762771484, "learning_rate": 4.5580218704609e-06, "loss": 17.9124, "step": 29670 }, { "epoch": 0.5423620377648198, "grad_norm": 6.33074254928047, "learning_rate": 4.557727018451135e-06, "loss": 17.6032, "step": 29671 }, { "epoch": 0.5423803169612664, "grad_norm": 5.667264685505669, "learning_rate": 4.557432167991497e-06, "loss": 17.2997, "step": 29672 }, { "epoch": 0.5423985961577129, "grad_norm": 6.795377093569764, "learning_rate": 4.557137319083024e-06, "loss": 17.5604, "step": 29673 }, { "epoch": 0.5424168753541594, "grad_norm": 5.004371341115228, "learning_rate": 4.556842471726744e-06, "loss": 16.9478, "step": 29674 }, { "epoch": 0.542435154550606, "grad_norm": 6.168793772185022, "learning_rate": 4.556547625923692e-06, "loss": 17.6359, "step": 29675 }, { "epoch": 0.5424534337470525, "grad_norm": 5.825743283592522, "learning_rate": 4.5562527816749044e-06, "loss": 17.3339, "step": 29676 }, { "epoch": 0.542471712943499, "grad_norm": 8.021226238177269, "learning_rate": 4.555957938981412e-06, "loss": 17.8246, "step": 29677 }, { "epoch": 0.5424899921399455, "grad_norm": 7.021284886957429, "learning_rate": 4.555663097844246e-06, "loss": 17.4717, "step": 29678 }, { "epoch": 0.542508271336392, "grad_norm": 6.3246825549705985, "learning_rate": 4.555368258264444e-06, "loss": 17.4124, "step": 29679 }, { "epoch": 0.5425265505328386, "grad_norm": 9.308083944086373, "learning_rate": 4.555073420243039e-06, "loss": 17.7765, "step": 29680 }, { "epoch": 0.5425448297292851, "grad_norm": 7.330648419752536, "learning_rate": 4.55477858378106e-06, "loss": 17.8064, "step": 29681 }, { "epoch": 0.5425631089257317, "grad_norm": 5.6040503674811735, "learning_rate": 4.5544837488795456e-06, "loss": 17.1433, "step": 29682 }, { "epoch": 0.5425813881221782, "grad_norm": 7.2501149432169845, "learning_rate": 4.554188915539527e-06, "loss": 17.5451, "step": 29683 }, { "epoch": 0.5425996673186246, "grad_norm": 6.870756464319314, "learning_rate": 4.553894083762035e-06, "loss": 17.904, "step": 29684 }, { "epoch": 0.5426179465150712, "grad_norm": 6.5390521701977145, "learning_rate": 4.553599253548109e-06, "loss": 17.4077, "step": 29685 }, { "epoch": 0.5426362257115177, "grad_norm": 7.055649846717575, "learning_rate": 4.553304424898778e-06, "loss": 17.4823, "step": 29686 }, { "epoch": 0.5426545049079643, "grad_norm": 7.117765750461991, "learning_rate": 4.5530095978150755e-06, "loss": 17.62, "step": 29687 }, { "epoch": 0.5426727841044108, "grad_norm": 6.0392099766165614, "learning_rate": 4.552714772298035e-06, "loss": 17.3103, "step": 29688 }, { "epoch": 0.5426910633008573, "grad_norm": 6.065624807615632, "learning_rate": 4.552419948348689e-06, "loss": 17.4615, "step": 29689 }, { "epoch": 0.5427093424973038, "grad_norm": 5.830405225788817, "learning_rate": 4.552125125968077e-06, "loss": 17.1944, "step": 29690 }, { "epoch": 0.5427276216937503, "grad_norm": 5.984972560620384, "learning_rate": 4.551830305157226e-06, "loss": 17.0087, "step": 29691 }, { "epoch": 0.5427459008901969, "grad_norm": 6.782424136793444, "learning_rate": 4.5515354859171675e-06, "loss": 17.6585, "step": 29692 }, { "epoch": 0.5427641800866434, "grad_norm": 6.8757354729421, "learning_rate": 4.551240668248941e-06, "loss": 17.6013, "step": 29693 }, { "epoch": 0.5427824592830899, "grad_norm": 6.707365179077693, "learning_rate": 4.550945852153577e-06, "loss": 17.8396, "step": 29694 }, { "epoch": 0.5428007384795365, "grad_norm": 6.618137099118713, "learning_rate": 4.550651037632107e-06, "loss": 17.5337, "step": 29695 }, { "epoch": 0.542819017675983, "grad_norm": 5.618798544611922, "learning_rate": 4.550356224685569e-06, "loss": 17.3293, "step": 29696 }, { "epoch": 0.5428372968724295, "grad_norm": 5.186682633252918, "learning_rate": 4.550061413314992e-06, "loss": 16.9746, "step": 29697 }, { "epoch": 0.542855576068876, "grad_norm": 5.92798895242843, "learning_rate": 4.549766603521409e-06, "loss": 17.177, "step": 29698 }, { "epoch": 0.5428738552653225, "grad_norm": 6.747404395892682, "learning_rate": 4.549471795305857e-06, "loss": 17.4971, "step": 29699 }, { "epoch": 0.5428921344617691, "grad_norm": 5.519406174632494, "learning_rate": 4.549176988669367e-06, "loss": 17.1426, "step": 29700 }, { "epoch": 0.5429104136582156, "grad_norm": 5.726444208905331, "learning_rate": 4.548882183612972e-06, "loss": 17.3675, "step": 29701 }, { "epoch": 0.542928692854662, "grad_norm": 5.489821496681419, "learning_rate": 4.548587380137707e-06, "loss": 17.3541, "step": 29702 }, { "epoch": 0.5429469720511086, "grad_norm": 6.406279454695321, "learning_rate": 4.548292578244602e-06, "loss": 17.4913, "step": 29703 }, { "epoch": 0.5429652512475551, "grad_norm": 6.18368739436124, "learning_rate": 4.547997777934694e-06, "loss": 17.3633, "step": 29704 }, { "epoch": 0.5429835304440017, "grad_norm": 5.075462707883375, "learning_rate": 4.547702979209014e-06, "loss": 16.916, "step": 29705 }, { "epoch": 0.5430018096404482, "grad_norm": 6.519913266950627, "learning_rate": 4.547408182068596e-06, "loss": 17.4422, "step": 29706 }, { "epoch": 0.5430200888368947, "grad_norm": 6.490346269879971, "learning_rate": 4.547113386514473e-06, "loss": 17.3936, "step": 29707 }, { "epoch": 0.5430383680333413, "grad_norm": 6.59902817896294, "learning_rate": 4.5468185925476796e-06, "loss": 17.5076, "step": 29708 }, { "epoch": 0.5430566472297877, "grad_norm": 7.128341072337267, "learning_rate": 4.546523800169244e-06, "loss": 17.685, "step": 29709 }, { "epoch": 0.5430749264262343, "grad_norm": 7.655527155254718, "learning_rate": 4.546229009380206e-06, "loss": 17.8178, "step": 29710 }, { "epoch": 0.5430932056226808, "grad_norm": 5.287994195809187, "learning_rate": 4.545934220181596e-06, "loss": 17.1862, "step": 29711 }, { "epoch": 0.5431114848191273, "grad_norm": 5.068931246867957, "learning_rate": 4.545639432574446e-06, "loss": 17.0162, "step": 29712 }, { "epoch": 0.5431297640155739, "grad_norm": 5.880490269889207, "learning_rate": 4.545344646559792e-06, "loss": 17.4517, "step": 29713 }, { "epoch": 0.5431480432120204, "grad_norm": 6.879958574956194, "learning_rate": 4.545049862138664e-06, "loss": 17.8441, "step": 29714 }, { "epoch": 0.543166322408467, "grad_norm": 5.787419784632917, "learning_rate": 4.544755079312096e-06, "loss": 17.2289, "step": 29715 }, { "epoch": 0.5431846016049134, "grad_norm": 5.548658639575891, "learning_rate": 4.544460298081125e-06, "loss": 17.3173, "step": 29716 }, { "epoch": 0.5432028808013599, "grad_norm": 6.934500731768947, "learning_rate": 4.5441655184467795e-06, "loss": 17.9761, "step": 29717 }, { "epoch": 0.5432211599978065, "grad_norm": 6.761250168360362, "learning_rate": 4.5438707404100935e-06, "loss": 17.3439, "step": 29718 }, { "epoch": 0.543239439194253, "grad_norm": 6.415289051473613, "learning_rate": 4.543575963972102e-06, "loss": 17.5158, "step": 29719 }, { "epoch": 0.5432577183906996, "grad_norm": 8.783907284853905, "learning_rate": 4.543281189133835e-06, "loss": 17.8434, "step": 29720 }, { "epoch": 0.5432759975871461, "grad_norm": 6.79347641103674, "learning_rate": 4.54298641589633e-06, "loss": 17.5747, "step": 29721 }, { "epoch": 0.5432942767835925, "grad_norm": 6.207689075500921, "learning_rate": 4.542691644260619e-06, "loss": 17.8038, "step": 29722 }, { "epoch": 0.5433125559800391, "grad_norm": 6.046593543618439, "learning_rate": 4.542396874227731e-06, "loss": 17.3915, "step": 29723 }, { "epoch": 0.5433308351764856, "grad_norm": 6.481133088575492, "learning_rate": 4.542102105798704e-06, "loss": 17.7624, "step": 29724 }, { "epoch": 0.5433491143729322, "grad_norm": 5.26915704585981, "learning_rate": 4.54180733897457e-06, "loss": 17.0649, "step": 29725 }, { "epoch": 0.5433673935693787, "grad_norm": 7.661998671229234, "learning_rate": 4.54151257375636e-06, "loss": 17.4998, "step": 29726 }, { "epoch": 0.5433856727658252, "grad_norm": 5.079280991521506, "learning_rate": 4.54121781014511e-06, "loss": 17.1403, "step": 29727 }, { "epoch": 0.5434039519622718, "grad_norm": 6.591976516077883, "learning_rate": 4.5409230481418515e-06, "loss": 17.5592, "step": 29728 }, { "epoch": 0.5434222311587182, "grad_norm": 8.05511868510039, "learning_rate": 4.540628287747616e-06, "loss": 17.457, "step": 29729 }, { "epoch": 0.5434405103551648, "grad_norm": 6.220016839508645, "learning_rate": 4.5403335289634416e-06, "loss": 17.297, "step": 29730 }, { "epoch": 0.5434587895516113, "grad_norm": 6.919700802780564, "learning_rate": 4.540038771790358e-06, "loss": 17.6536, "step": 29731 }, { "epoch": 0.5434770687480578, "grad_norm": 7.286166680008689, "learning_rate": 4.5397440162293955e-06, "loss": 17.7389, "step": 29732 }, { "epoch": 0.5434953479445044, "grad_norm": 6.731926325676894, "learning_rate": 4.539449262281592e-06, "loss": 17.5524, "step": 29733 }, { "epoch": 0.5435136271409509, "grad_norm": 5.287271873186343, "learning_rate": 4.539154509947978e-06, "loss": 16.9027, "step": 29734 }, { "epoch": 0.5435319063373975, "grad_norm": 5.977010676408968, "learning_rate": 4.538859759229589e-06, "loss": 17.2191, "step": 29735 }, { "epoch": 0.5435501855338439, "grad_norm": 8.044221948461693, "learning_rate": 4.538565010127457e-06, "loss": 18.3168, "step": 29736 }, { "epoch": 0.5435684647302904, "grad_norm": 6.591066568386356, "learning_rate": 4.538270262642612e-06, "loss": 17.4135, "step": 29737 }, { "epoch": 0.543586743926737, "grad_norm": 6.470070075664459, "learning_rate": 4.5379755167760915e-06, "loss": 17.7193, "step": 29738 }, { "epoch": 0.5436050231231835, "grad_norm": 8.168956895022317, "learning_rate": 4.537680772528927e-06, "loss": 18.1096, "step": 29739 }, { "epoch": 0.5436233023196301, "grad_norm": 6.6268058040210125, "learning_rate": 4.53738602990215e-06, "loss": 17.6433, "step": 29740 }, { "epoch": 0.5436415815160766, "grad_norm": 7.334626401798522, "learning_rate": 4.537091288896797e-06, "loss": 17.4032, "step": 29741 }, { "epoch": 0.543659860712523, "grad_norm": 6.354564354032523, "learning_rate": 4.536796549513897e-06, "loss": 17.3485, "step": 29742 }, { "epoch": 0.5436781399089696, "grad_norm": 6.542988684457901, "learning_rate": 4.536501811754484e-06, "loss": 17.8009, "step": 29743 }, { "epoch": 0.5436964191054161, "grad_norm": 6.71974531938016, "learning_rate": 4.536207075619594e-06, "loss": 17.672, "step": 29744 }, { "epoch": 0.5437146983018627, "grad_norm": 6.084831783242429, "learning_rate": 4.535912341110258e-06, "loss": 17.5863, "step": 29745 }, { "epoch": 0.5437329774983092, "grad_norm": 9.051869920885643, "learning_rate": 4.535617608227507e-06, "loss": 18.1845, "step": 29746 }, { "epoch": 0.5437512566947557, "grad_norm": 5.998411233650284, "learning_rate": 4.535322876972377e-06, "loss": 17.371, "step": 29747 }, { "epoch": 0.5437695358912022, "grad_norm": 6.437192958666372, "learning_rate": 4.5350281473459015e-06, "loss": 17.6276, "step": 29748 }, { "epoch": 0.5437878150876487, "grad_norm": 6.4144856918106905, "learning_rate": 4.534733419349109e-06, "loss": 17.6265, "step": 29749 }, { "epoch": 0.5438060942840953, "grad_norm": 5.353560264351247, "learning_rate": 4.534438692983039e-06, "loss": 17.1263, "step": 29750 }, { "epoch": 0.5438243734805418, "grad_norm": 4.904430860243971, "learning_rate": 4.534143968248717e-06, "loss": 16.9371, "step": 29751 }, { "epoch": 0.5438426526769883, "grad_norm": 5.075854334746022, "learning_rate": 4.5338492451471825e-06, "loss": 16.8521, "step": 29752 }, { "epoch": 0.5438609318734349, "grad_norm": 6.360054128696429, "learning_rate": 4.5335545236794665e-06, "loss": 17.1759, "step": 29753 }, { "epoch": 0.5438792110698814, "grad_norm": 6.233971848612843, "learning_rate": 4.5332598038465995e-06, "loss": 17.2346, "step": 29754 }, { "epoch": 0.5438974902663279, "grad_norm": 7.426176839830116, "learning_rate": 4.5329650856496174e-06, "loss": 17.7628, "step": 29755 }, { "epoch": 0.5439157694627744, "grad_norm": 5.534029862958593, "learning_rate": 4.532670369089553e-06, "loss": 17.0533, "step": 29756 }, { "epoch": 0.5439340486592209, "grad_norm": 5.8724933429645905, "learning_rate": 4.5323756541674355e-06, "loss": 17.4794, "step": 29757 }, { "epoch": 0.5439523278556675, "grad_norm": 8.440113168317428, "learning_rate": 4.532080940884304e-06, "loss": 18.0309, "step": 29758 }, { "epoch": 0.543970607052114, "grad_norm": 6.795401646588165, "learning_rate": 4.531786229241188e-06, "loss": 17.6853, "step": 29759 }, { "epoch": 0.5439888862485606, "grad_norm": 7.0348309968385285, "learning_rate": 4.531491519239118e-06, "loss": 17.5845, "step": 29760 }, { "epoch": 0.544007165445007, "grad_norm": 5.6011253301184, "learning_rate": 4.531196810879131e-06, "loss": 17.289, "step": 29761 }, { "epoch": 0.5440254446414535, "grad_norm": 5.775024687068892, "learning_rate": 4.5309021041622594e-06, "loss": 17.2664, "step": 29762 }, { "epoch": 0.5440437238379001, "grad_norm": 6.762532883136613, "learning_rate": 4.530607399089534e-06, "loss": 17.2159, "step": 29763 }, { "epoch": 0.5440620030343466, "grad_norm": 7.533710066538556, "learning_rate": 4.530312695661991e-06, "loss": 17.8064, "step": 29764 }, { "epoch": 0.5440802822307932, "grad_norm": 6.4242535402314855, "learning_rate": 4.530017993880658e-06, "loss": 17.3662, "step": 29765 }, { "epoch": 0.5440985614272397, "grad_norm": 5.834356991207791, "learning_rate": 4.529723293746572e-06, "loss": 17.2633, "step": 29766 }, { "epoch": 0.5441168406236861, "grad_norm": 4.752902626215665, "learning_rate": 4.5294285952607666e-06, "loss": 16.8622, "step": 29767 }, { "epoch": 0.5441351198201327, "grad_norm": 9.179993867558066, "learning_rate": 4.5291338984242715e-06, "loss": 18.783, "step": 29768 }, { "epoch": 0.5441533990165792, "grad_norm": 6.984373149630205, "learning_rate": 4.528839203238123e-06, "loss": 17.7424, "step": 29769 }, { "epoch": 0.5441716782130257, "grad_norm": 5.3032689774042, "learning_rate": 4.528544509703351e-06, "loss": 16.9639, "step": 29770 }, { "epoch": 0.5441899574094723, "grad_norm": 5.2805143882599515, "learning_rate": 4.528249817820989e-06, "loss": 17.0632, "step": 29771 }, { "epoch": 0.5442082366059188, "grad_norm": 7.488229883309365, "learning_rate": 4.527955127592073e-06, "loss": 18.1898, "step": 29772 }, { "epoch": 0.5442265158023654, "grad_norm": 6.564024162871789, "learning_rate": 4.527660439017631e-06, "loss": 17.6983, "step": 29773 }, { "epoch": 0.5442447949988118, "grad_norm": 9.217775181673133, "learning_rate": 4.527365752098698e-06, "loss": 18.6347, "step": 29774 }, { "epoch": 0.5442630741952583, "grad_norm": 6.958625842202245, "learning_rate": 4.527071066836308e-06, "loss": 17.9068, "step": 29775 }, { "epoch": 0.5442813533917049, "grad_norm": 5.219888960144033, "learning_rate": 4.526776383231494e-06, "loss": 17.0764, "step": 29776 }, { "epoch": 0.5442996325881514, "grad_norm": 4.9816643400626095, "learning_rate": 4.526481701285285e-06, "loss": 16.9681, "step": 29777 }, { "epoch": 0.544317911784598, "grad_norm": 7.283708828247044, "learning_rate": 4.526187020998718e-06, "loss": 17.9841, "step": 29778 }, { "epoch": 0.5443361909810445, "grad_norm": 6.930961043464601, "learning_rate": 4.5258923423728255e-06, "loss": 17.6424, "step": 29779 }, { "epoch": 0.544354470177491, "grad_norm": 5.2457728074291925, "learning_rate": 4.5255976654086365e-06, "loss": 17.1416, "step": 29780 }, { "epoch": 0.5443727493739375, "grad_norm": 9.544446535827493, "learning_rate": 4.525302990107189e-06, "loss": 19.0776, "step": 29781 }, { "epoch": 0.544391028570384, "grad_norm": 5.828811376532233, "learning_rate": 4.525008316469511e-06, "loss": 17.3938, "step": 29782 }, { "epoch": 0.5444093077668306, "grad_norm": 5.281308899006892, "learning_rate": 4.524713644496639e-06, "loss": 17.0127, "step": 29783 }, { "epoch": 0.5444275869632771, "grad_norm": 6.261564416963043, "learning_rate": 4.524418974189606e-06, "loss": 17.3045, "step": 29784 }, { "epoch": 0.5444458661597236, "grad_norm": 7.52540360382981, "learning_rate": 4.52412430554944e-06, "loss": 17.6195, "step": 29785 }, { "epoch": 0.5444641453561702, "grad_norm": 5.532811093516602, "learning_rate": 4.523829638577179e-06, "loss": 17.163, "step": 29786 }, { "epoch": 0.5444824245526166, "grad_norm": 8.076028082992227, "learning_rate": 4.523534973273854e-06, "loss": 17.7471, "step": 29787 }, { "epoch": 0.5445007037490632, "grad_norm": 7.291901736010515, "learning_rate": 4.523240309640496e-06, "loss": 17.5132, "step": 29788 }, { "epoch": 0.5445189829455097, "grad_norm": 7.355850219446334, "learning_rate": 4.522945647678141e-06, "loss": 17.5001, "step": 29789 }, { "epoch": 0.5445372621419562, "grad_norm": 6.337907991411011, "learning_rate": 4.522650987387821e-06, "loss": 17.2475, "step": 29790 }, { "epoch": 0.5445555413384028, "grad_norm": 5.2733262898580575, "learning_rate": 4.5223563287705654e-06, "loss": 17.0288, "step": 29791 }, { "epoch": 0.5445738205348493, "grad_norm": 8.245234281026514, "learning_rate": 4.5220616718274104e-06, "loss": 18.3879, "step": 29792 }, { "epoch": 0.5445920997312959, "grad_norm": 6.631865375008177, "learning_rate": 4.5217670165593884e-06, "loss": 17.6043, "step": 29793 }, { "epoch": 0.5446103789277423, "grad_norm": 6.371310180818902, "learning_rate": 4.5214723629675295e-06, "loss": 17.8006, "step": 29794 }, { "epoch": 0.5446286581241888, "grad_norm": 5.898571953079797, "learning_rate": 4.521177711052871e-06, "loss": 17.2774, "step": 29795 }, { "epoch": 0.5446469373206354, "grad_norm": 6.703374935223071, "learning_rate": 4.520883060816441e-06, "loss": 17.6809, "step": 29796 }, { "epoch": 0.5446652165170819, "grad_norm": 5.7732646783065915, "learning_rate": 4.5205884122592745e-06, "loss": 17.1796, "step": 29797 }, { "epoch": 0.5446834957135285, "grad_norm": 5.5366150255952, "learning_rate": 4.520293765382406e-06, "loss": 17.3408, "step": 29798 }, { "epoch": 0.544701774909975, "grad_norm": 5.928262975987572, "learning_rate": 4.519999120186864e-06, "loss": 17.401, "step": 29799 }, { "epoch": 0.5447200541064214, "grad_norm": 6.562893917569163, "learning_rate": 4.519704476673685e-06, "loss": 17.5111, "step": 29800 }, { "epoch": 0.544738333302868, "grad_norm": 6.423373814949974, "learning_rate": 4.5194098348439e-06, "loss": 17.2714, "step": 29801 }, { "epoch": 0.5447566124993145, "grad_norm": 6.527698236088468, "learning_rate": 4.51911519469854e-06, "loss": 17.4675, "step": 29802 }, { "epoch": 0.5447748916957611, "grad_norm": 6.090413165261496, "learning_rate": 4.518820556238641e-06, "loss": 17.3672, "step": 29803 }, { "epoch": 0.5447931708922076, "grad_norm": 5.32202477703688, "learning_rate": 4.518525919465235e-06, "loss": 16.9823, "step": 29804 }, { "epoch": 0.5448114500886541, "grad_norm": 7.212169384724447, "learning_rate": 4.518231284379352e-06, "loss": 17.6878, "step": 29805 }, { "epoch": 0.5448297292851007, "grad_norm": 6.406651509320488, "learning_rate": 4.517936650982027e-06, "loss": 17.6046, "step": 29806 }, { "epoch": 0.5448480084815471, "grad_norm": 6.957012439387348, "learning_rate": 4.5176420192742945e-06, "loss": 17.6864, "step": 29807 }, { "epoch": 0.5448662876779937, "grad_norm": 5.925938789418662, "learning_rate": 4.517347389257181e-06, "loss": 17.3233, "step": 29808 }, { "epoch": 0.5448845668744402, "grad_norm": 6.047347141299578, "learning_rate": 4.517052760931726e-06, "loss": 17.6261, "step": 29809 }, { "epoch": 0.5449028460708867, "grad_norm": 5.298378107625687, "learning_rate": 4.516758134298957e-06, "loss": 17.2762, "step": 29810 }, { "epoch": 0.5449211252673333, "grad_norm": 6.007188635773468, "learning_rate": 4.51646350935991e-06, "loss": 17.2235, "step": 29811 }, { "epoch": 0.5449394044637798, "grad_norm": 5.773299862634692, "learning_rate": 4.5161688861156175e-06, "loss": 17.3671, "step": 29812 }, { "epoch": 0.5449576836602263, "grad_norm": 6.230030289232958, "learning_rate": 4.515874264567109e-06, "loss": 17.2909, "step": 29813 }, { "epoch": 0.5449759628566728, "grad_norm": 6.331167220246017, "learning_rate": 4.515579644715421e-06, "loss": 17.4052, "step": 29814 }, { "epoch": 0.5449942420531193, "grad_norm": 6.390069762059562, "learning_rate": 4.515285026561583e-06, "loss": 17.4455, "step": 29815 }, { "epoch": 0.5450125212495659, "grad_norm": 6.221852677393686, "learning_rate": 4.5149904101066284e-06, "loss": 17.5324, "step": 29816 }, { "epoch": 0.5450308004460124, "grad_norm": 7.831668280317319, "learning_rate": 4.514695795351593e-06, "loss": 18.5615, "step": 29817 }, { "epoch": 0.545049079642459, "grad_norm": 6.341311371962764, "learning_rate": 4.514401182297506e-06, "loss": 17.4584, "step": 29818 }, { "epoch": 0.5450673588389054, "grad_norm": 5.8308249104525105, "learning_rate": 4.5141065709453985e-06, "loss": 17.2229, "step": 29819 }, { "epoch": 0.5450856380353519, "grad_norm": 6.819306464359632, "learning_rate": 4.513811961296306e-06, "loss": 17.585, "step": 29820 }, { "epoch": 0.5451039172317985, "grad_norm": 7.054456857587738, "learning_rate": 4.513517353351262e-06, "loss": 17.9742, "step": 29821 }, { "epoch": 0.545122196428245, "grad_norm": 5.795733117737769, "learning_rate": 4.513222747111296e-06, "loss": 17.2528, "step": 29822 }, { "epoch": 0.5451404756246916, "grad_norm": 6.907163963753899, "learning_rate": 4.512928142577443e-06, "loss": 17.7732, "step": 29823 }, { "epoch": 0.5451587548211381, "grad_norm": 6.724741947632082, "learning_rate": 4.512633539750733e-06, "loss": 17.527, "step": 29824 }, { "epoch": 0.5451770340175845, "grad_norm": 7.411436416095668, "learning_rate": 4.512338938632201e-06, "loss": 17.7891, "step": 29825 }, { "epoch": 0.5451953132140311, "grad_norm": 5.591339280408839, "learning_rate": 4.51204433922288e-06, "loss": 17.1434, "step": 29826 }, { "epoch": 0.5452135924104776, "grad_norm": 5.4240035856229465, "learning_rate": 4.511749741523799e-06, "loss": 17.2114, "step": 29827 }, { "epoch": 0.5452318716069242, "grad_norm": 6.7116663570885, "learning_rate": 4.511455145535994e-06, "loss": 17.4094, "step": 29828 }, { "epoch": 0.5452501508033707, "grad_norm": 6.996102584263931, "learning_rate": 4.511160551260497e-06, "loss": 17.3232, "step": 29829 }, { "epoch": 0.5452684299998172, "grad_norm": 5.84991075286938, "learning_rate": 4.510865958698338e-06, "loss": 17.332, "step": 29830 }, { "epoch": 0.5452867091962638, "grad_norm": 6.617473309042606, "learning_rate": 4.510571367850554e-06, "loss": 17.5034, "step": 29831 }, { "epoch": 0.5453049883927102, "grad_norm": 7.495257962888313, "learning_rate": 4.510276778718173e-06, "loss": 17.7343, "step": 29832 }, { "epoch": 0.5453232675891568, "grad_norm": 6.208804031998445, "learning_rate": 4.509982191302229e-06, "loss": 17.4547, "step": 29833 }, { "epoch": 0.5453415467856033, "grad_norm": 6.346798371697569, "learning_rate": 4.509687605603756e-06, "loss": 17.3119, "step": 29834 }, { "epoch": 0.5453598259820498, "grad_norm": 7.174423274794517, "learning_rate": 4.509393021623786e-06, "loss": 17.8427, "step": 29835 }, { "epoch": 0.5453781051784964, "grad_norm": 6.117551649926435, "learning_rate": 4.509098439363349e-06, "loss": 17.4154, "step": 29836 }, { "epoch": 0.5453963843749429, "grad_norm": 6.093356874652026, "learning_rate": 4.50880385882348e-06, "loss": 17.6254, "step": 29837 }, { "epoch": 0.5454146635713893, "grad_norm": 6.326845297827514, "learning_rate": 4.5085092800052125e-06, "loss": 17.29, "step": 29838 }, { "epoch": 0.5454329427678359, "grad_norm": 5.41611387908689, "learning_rate": 4.508214702909575e-06, "loss": 17.2061, "step": 29839 }, { "epoch": 0.5454512219642824, "grad_norm": 5.959247820681941, "learning_rate": 4.507920127537604e-06, "loss": 17.2459, "step": 29840 }, { "epoch": 0.545469501160729, "grad_norm": 6.249684689796049, "learning_rate": 4.507625553890328e-06, "loss": 17.5384, "step": 29841 }, { "epoch": 0.5454877803571755, "grad_norm": 5.623945557152371, "learning_rate": 4.507330981968783e-06, "loss": 17.2486, "step": 29842 }, { "epoch": 0.545506059553622, "grad_norm": 5.38558899978421, "learning_rate": 4.507036411774002e-06, "loss": 17.1745, "step": 29843 }, { "epoch": 0.5455243387500686, "grad_norm": 6.232364436236493, "learning_rate": 4.506741843307013e-06, "loss": 17.4518, "step": 29844 }, { "epoch": 0.545542617946515, "grad_norm": 6.649288779595549, "learning_rate": 4.506447276568853e-06, "loss": 17.4485, "step": 29845 }, { "epoch": 0.5455608971429616, "grad_norm": 5.815028806992932, "learning_rate": 4.5061527115605506e-06, "loss": 17.1158, "step": 29846 }, { "epoch": 0.5455791763394081, "grad_norm": 6.611034735514783, "learning_rate": 4.50585814828314e-06, "loss": 17.5293, "step": 29847 }, { "epoch": 0.5455974555358546, "grad_norm": 7.785933304030335, "learning_rate": 4.505563586737656e-06, "loss": 18.2479, "step": 29848 }, { "epoch": 0.5456157347323012, "grad_norm": 6.461295489664825, "learning_rate": 4.5052690269251285e-06, "loss": 17.7074, "step": 29849 }, { "epoch": 0.5456340139287477, "grad_norm": 5.837095256346962, "learning_rate": 4.504974468846588e-06, "loss": 17.4469, "step": 29850 }, { "epoch": 0.5456522931251943, "grad_norm": 8.660296098329551, "learning_rate": 4.504679912503069e-06, "loss": 17.6508, "step": 29851 }, { "epoch": 0.5456705723216407, "grad_norm": 5.43680738370704, "learning_rate": 4.504385357895607e-06, "loss": 16.8972, "step": 29852 }, { "epoch": 0.5456888515180872, "grad_norm": 6.299372588714826, "learning_rate": 4.504090805025227e-06, "loss": 17.5305, "step": 29853 }, { "epoch": 0.5457071307145338, "grad_norm": 5.823742560151857, "learning_rate": 4.50379625389297e-06, "loss": 17.1453, "step": 29854 }, { "epoch": 0.5457254099109803, "grad_norm": 8.366415418508273, "learning_rate": 4.503501704499862e-06, "loss": 18.1735, "step": 29855 }, { "epoch": 0.5457436891074269, "grad_norm": 6.409736355699162, "learning_rate": 4.503207156846936e-06, "loss": 17.2588, "step": 29856 }, { "epoch": 0.5457619683038734, "grad_norm": 5.500175703712791, "learning_rate": 4.502912610935228e-06, "loss": 17.1702, "step": 29857 }, { "epoch": 0.5457802475003198, "grad_norm": 7.551609729563949, "learning_rate": 4.502618066765767e-06, "loss": 17.8314, "step": 29858 }, { "epoch": 0.5457985266967664, "grad_norm": 6.249658920742486, "learning_rate": 4.502323524339588e-06, "loss": 17.4371, "step": 29859 }, { "epoch": 0.5458168058932129, "grad_norm": 5.960419613179486, "learning_rate": 4.50202898365772e-06, "loss": 17.4953, "step": 29860 }, { "epoch": 0.5458350850896595, "grad_norm": 8.444299891146914, "learning_rate": 4.501734444721197e-06, "loss": 18.1816, "step": 29861 }, { "epoch": 0.545853364286106, "grad_norm": 5.545560477818979, "learning_rate": 4.501439907531054e-06, "loss": 17.0107, "step": 29862 }, { "epoch": 0.5458716434825525, "grad_norm": 5.651396438144135, "learning_rate": 4.50114537208832e-06, "loss": 17.0345, "step": 29863 }, { "epoch": 0.545889922678999, "grad_norm": 7.147786432863684, "learning_rate": 4.500850838394027e-06, "loss": 17.9985, "step": 29864 }, { "epoch": 0.5459082018754455, "grad_norm": 6.165104825371763, "learning_rate": 4.500556306449209e-06, "loss": 17.3229, "step": 29865 }, { "epoch": 0.5459264810718921, "grad_norm": 5.519909434473997, "learning_rate": 4.500261776254899e-06, "loss": 17.3631, "step": 29866 }, { "epoch": 0.5459447602683386, "grad_norm": 5.497153780658095, "learning_rate": 4.499967247812126e-06, "loss": 17.4672, "step": 29867 }, { "epoch": 0.5459630394647851, "grad_norm": 6.355106246936305, "learning_rate": 4.499672721121927e-06, "loss": 17.5106, "step": 29868 }, { "epoch": 0.5459813186612317, "grad_norm": 7.153578665596846, "learning_rate": 4.49937819618533e-06, "loss": 17.8917, "step": 29869 }, { "epoch": 0.5459995978576782, "grad_norm": 5.6171626100526675, "learning_rate": 4.499083673003367e-06, "loss": 17.1828, "step": 29870 }, { "epoch": 0.5460178770541247, "grad_norm": 5.484770797635483, "learning_rate": 4.498789151577076e-06, "loss": 17.2484, "step": 29871 }, { "epoch": 0.5460361562505712, "grad_norm": 5.109302235371514, "learning_rate": 4.498494631907482e-06, "loss": 17.0379, "step": 29872 }, { "epoch": 0.5460544354470177, "grad_norm": 6.251625381378134, "learning_rate": 4.498200113995624e-06, "loss": 17.295, "step": 29873 }, { "epoch": 0.5460727146434643, "grad_norm": 5.32061620467433, "learning_rate": 4.49790559784253e-06, "loss": 17.0004, "step": 29874 }, { "epoch": 0.5460909938399108, "grad_norm": 6.137381289615527, "learning_rate": 4.497611083449232e-06, "loss": 17.2192, "step": 29875 }, { "epoch": 0.5461092730363574, "grad_norm": 6.7371793620703615, "learning_rate": 4.497316570816766e-06, "loss": 17.7424, "step": 29876 }, { "epoch": 0.5461275522328038, "grad_norm": 6.654437140177508, "learning_rate": 4.497022059946161e-06, "loss": 17.4136, "step": 29877 }, { "epoch": 0.5461458314292503, "grad_norm": 5.907522311118308, "learning_rate": 4.496727550838448e-06, "loss": 17.1606, "step": 29878 }, { "epoch": 0.5461641106256969, "grad_norm": 7.8653568506344165, "learning_rate": 4.4964330434946625e-06, "loss": 18.5069, "step": 29879 }, { "epoch": 0.5461823898221434, "grad_norm": 6.986754247564929, "learning_rate": 4.496138537915837e-06, "loss": 17.5956, "step": 29880 }, { "epoch": 0.54620066901859, "grad_norm": 5.041542222846744, "learning_rate": 4.4958440341030006e-06, "loss": 16.8883, "step": 29881 }, { "epoch": 0.5462189482150365, "grad_norm": 6.316943765028018, "learning_rate": 4.4955495320571875e-06, "loss": 17.5841, "step": 29882 }, { "epoch": 0.546237227411483, "grad_norm": 6.063011643474048, "learning_rate": 4.4952550317794296e-06, "loss": 17.5541, "step": 29883 }, { "epoch": 0.5462555066079295, "grad_norm": 7.074734572930089, "learning_rate": 4.494960533270758e-06, "loss": 17.6636, "step": 29884 }, { "epoch": 0.546273785804376, "grad_norm": 7.599202523619464, "learning_rate": 4.494666036532207e-06, "loss": 17.7178, "step": 29885 }, { "epoch": 0.5462920650008226, "grad_norm": 6.406539075787203, "learning_rate": 4.494371541564809e-06, "loss": 17.6392, "step": 29886 }, { "epoch": 0.5463103441972691, "grad_norm": 5.036360475680159, "learning_rate": 4.494077048369592e-06, "loss": 16.9795, "step": 29887 }, { "epoch": 0.5463286233937156, "grad_norm": 7.244425918504672, "learning_rate": 4.493782556947593e-06, "loss": 17.6858, "step": 29888 }, { "epoch": 0.5463469025901622, "grad_norm": 4.513269880443145, "learning_rate": 4.49348806729984e-06, "loss": 16.8492, "step": 29889 }, { "epoch": 0.5463651817866086, "grad_norm": 7.0835793226397445, "learning_rate": 4.49319357942737e-06, "loss": 18.0469, "step": 29890 }, { "epoch": 0.5463834609830552, "grad_norm": 6.8285757499177135, "learning_rate": 4.4928990933312125e-06, "loss": 17.7484, "step": 29891 }, { "epoch": 0.5464017401795017, "grad_norm": 6.207541448370579, "learning_rate": 4.492604609012397e-06, "loss": 17.3827, "step": 29892 }, { "epoch": 0.5464200193759482, "grad_norm": 6.459528779841446, "learning_rate": 4.492310126471961e-06, "loss": 17.4392, "step": 29893 }, { "epoch": 0.5464382985723948, "grad_norm": 5.393696194603834, "learning_rate": 4.492015645710933e-06, "loss": 17.0643, "step": 29894 }, { "epoch": 0.5464565777688413, "grad_norm": 7.3738768294080375, "learning_rate": 4.491721166730345e-06, "loss": 18.183, "step": 29895 }, { "epoch": 0.5464748569652879, "grad_norm": 6.544940685260809, "learning_rate": 4.4914266895312316e-06, "loss": 17.2237, "step": 29896 }, { "epoch": 0.5464931361617343, "grad_norm": 6.924888167802018, "learning_rate": 4.491132214114623e-06, "loss": 17.8239, "step": 29897 }, { "epoch": 0.5465114153581808, "grad_norm": 7.6494571584778415, "learning_rate": 4.490837740481552e-06, "loss": 17.9941, "step": 29898 }, { "epoch": 0.5465296945546274, "grad_norm": 4.785723316633475, "learning_rate": 4.490543268633051e-06, "loss": 16.8696, "step": 29899 }, { "epoch": 0.5465479737510739, "grad_norm": 6.405056035879659, "learning_rate": 4.490248798570151e-06, "loss": 17.3514, "step": 29900 }, { "epoch": 0.5465662529475205, "grad_norm": 4.8048881692407175, "learning_rate": 4.489954330293884e-06, "loss": 16.9552, "step": 29901 }, { "epoch": 0.546584532143967, "grad_norm": 5.844986447206242, "learning_rate": 4.4896598638052846e-06, "loss": 17.4383, "step": 29902 }, { "epoch": 0.5466028113404134, "grad_norm": 6.577780147734054, "learning_rate": 4.489365399105381e-06, "loss": 17.7178, "step": 29903 }, { "epoch": 0.54662109053686, "grad_norm": 6.704186491518743, "learning_rate": 4.489070936195209e-06, "loss": 17.6263, "step": 29904 }, { "epoch": 0.5466393697333065, "grad_norm": 6.140710033807339, "learning_rate": 4.488776475075799e-06, "loss": 17.2171, "step": 29905 }, { "epoch": 0.546657648929753, "grad_norm": 6.207628076557828, "learning_rate": 4.488482015748181e-06, "loss": 17.3463, "step": 29906 }, { "epoch": 0.5466759281261996, "grad_norm": 6.748547986923466, "learning_rate": 4.488187558213392e-06, "loss": 17.445, "step": 29907 }, { "epoch": 0.5466942073226461, "grad_norm": 5.935607454155856, "learning_rate": 4.487893102472462e-06, "loss": 17.4129, "step": 29908 }, { "epoch": 0.5467124865190927, "grad_norm": 7.244160677015997, "learning_rate": 4.4875986485264185e-06, "loss": 17.6466, "step": 29909 }, { "epoch": 0.5467307657155391, "grad_norm": 6.910746944828571, "learning_rate": 4.487304196376298e-06, "loss": 17.3333, "step": 29910 }, { "epoch": 0.5467490449119856, "grad_norm": 7.065118953054043, "learning_rate": 4.487009746023134e-06, "loss": 17.4686, "step": 29911 }, { "epoch": 0.5467673241084322, "grad_norm": 6.3600651650901145, "learning_rate": 4.486715297467955e-06, "loss": 17.416, "step": 29912 }, { "epoch": 0.5467856033048787, "grad_norm": 6.358763317043901, "learning_rate": 4.486420850711795e-06, "loss": 17.4703, "step": 29913 }, { "epoch": 0.5468038825013253, "grad_norm": 6.234123911362589, "learning_rate": 4.486126405755685e-06, "loss": 17.6905, "step": 29914 }, { "epoch": 0.5468221616977718, "grad_norm": 6.263090498037006, "learning_rate": 4.485831962600656e-06, "loss": 17.4243, "step": 29915 }, { "epoch": 0.5468404408942182, "grad_norm": 6.441255582142518, "learning_rate": 4.485537521247743e-06, "loss": 17.5102, "step": 29916 }, { "epoch": 0.5468587200906648, "grad_norm": 5.807255207845825, "learning_rate": 4.485243081697977e-06, "loss": 17.3718, "step": 29917 }, { "epoch": 0.5468769992871113, "grad_norm": 5.557675309708604, "learning_rate": 4.484948643952387e-06, "loss": 17.2215, "step": 29918 }, { "epoch": 0.5468952784835579, "grad_norm": 5.4643407581846715, "learning_rate": 4.484654208012009e-06, "loss": 17.0952, "step": 29919 }, { "epoch": 0.5469135576800044, "grad_norm": 6.565434586565145, "learning_rate": 4.484359773877871e-06, "loss": 17.6024, "step": 29920 }, { "epoch": 0.5469318368764509, "grad_norm": 5.418188885948109, "learning_rate": 4.4840653415510105e-06, "loss": 17.2842, "step": 29921 }, { "epoch": 0.5469501160728975, "grad_norm": 4.8766363029411295, "learning_rate": 4.483770911032456e-06, "loss": 16.8786, "step": 29922 }, { "epoch": 0.5469683952693439, "grad_norm": 7.773697169204935, "learning_rate": 4.483476482323237e-06, "loss": 17.9567, "step": 29923 }, { "epoch": 0.5469866744657905, "grad_norm": 7.084478516680454, "learning_rate": 4.483182055424389e-06, "loss": 17.6186, "step": 29924 }, { "epoch": 0.547004953662237, "grad_norm": 5.521613148408107, "learning_rate": 4.482887630336945e-06, "loss": 17.1707, "step": 29925 }, { "epoch": 0.5470232328586835, "grad_norm": 6.287974742250135, "learning_rate": 4.482593207061932e-06, "loss": 17.3772, "step": 29926 }, { "epoch": 0.5470415120551301, "grad_norm": 5.716995861215337, "learning_rate": 4.482298785600388e-06, "loss": 17.4699, "step": 29927 }, { "epoch": 0.5470597912515766, "grad_norm": 8.391052063673003, "learning_rate": 4.48200436595334e-06, "loss": 17.8316, "step": 29928 }, { "epoch": 0.5470780704480231, "grad_norm": 9.417842154407687, "learning_rate": 4.481709948121821e-06, "loss": 17.9807, "step": 29929 }, { "epoch": 0.5470963496444696, "grad_norm": 5.751166858401681, "learning_rate": 4.481415532106865e-06, "loss": 17.1238, "step": 29930 }, { "epoch": 0.5471146288409161, "grad_norm": 6.178574943591446, "learning_rate": 4.481121117909504e-06, "loss": 17.4767, "step": 29931 }, { "epoch": 0.5471329080373627, "grad_norm": 6.689219800313792, "learning_rate": 4.480826705530765e-06, "loss": 17.732, "step": 29932 }, { "epoch": 0.5471511872338092, "grad_norm": 6.730132410078064, "learning_rate": 4.480532294971686e-06, "loss": 17.7706, "step": 29933 }, { "epoch": 0.5471694664302558, "grad_norm": 5.861581253007977, "learning_rate": 4.4802378862332935e-06, "loss": 17.2256, "step": 29934 }, { "epoch": 0.5471877456267022, "grad_norm": 6.307789849128085, "learning_rate": 4.479943479316625e-06, "loss": 17.4996, "step": 29935 }, { "epoch": 0.5472060248231487, "grad_norm": 6.151723703647532, "learning_rate": 4.47964907422271e-06, "loss": 17.487, "step": 29936 }, { "epoch": 0.5472243040195953, "grad_norm": 5.551917689989683, "learning_rate": 4.479354670952577e-06, "loss": 17.2774, "step": 29937 }, { "epoch": 0.5472425832160418, "grad_norm": 5.787757218133462, "learning_rate": 4.479060269507261e-06, "loss": 17.1996, "step": 29938 }, { "epoch": 0.5472608624124884, "grad_norm": 5.601362525356568, "learning_rate": 4.4787658698877955e-06, "loss": 17.321, "step": 29939 }, { "epoch": 0.5472791416089349, "grad_norm": 6.875320977600868, "learning_rate": 4.4784714720952085e-06, "loss": 17.8362, "step": 29940 }, { "epoch": 0.5472974208053814, "grad_norm": 8.0104342615916, "learning_rate": 4.478177076130535e-06, "loss": 18.3389, "step": 29941 }, { "epoch": 0.5473157000018279, "grad_norm": 5.562123091090518, "learning_rate": 4.477882681994806e-06, "loss": 17.2691, "step": 29942 }, { "epoch": 0.5473339791982744, "grad_norm": 6.519372776513574, "learning_rate": 4.477588289689051e-06, "loss": 17.6525, "step": 29943 }, { "epoch": 0.547352258394721, "grad_norm": 6.951966067783197, "learning_rate": 4.477293899214306e-06, "loss": 17.7864, "step": 29944 }, { "epoch": 0.5473705375911675, "grad_norm": 5.989651002258745, "learning_rate": 4.4769995105716e-06, "loss": 17.3813, "step": 29945 }, { "epoch": 0.547388816787614, "grad_norm": 6.2019339546484025, "learning_rate": 4.476705123761963e-06, "loss": 17.4525, "step": 29946 }, { "epoch": 0.5474070959840606, "grad_norm": 5.582581969317681, "learning_rate": 4.476410738786431e-06, "loss": 17.029, "step": 29947 }, { "epoch": 0.547425375180507, "grad_norm": 7.010349935392367, "learning_rate": 4.476116355646035e-06, "loss": 17.5314, "step": 29948 }, { "epoch": 0.5474436543769536, "grad_norm": 6.244018643934657, "learning_rate": 4.475821974341802e-06, "loss": 17.3772, "step": 29949 }, { "epoch": 0.5474619335734001, "grad_norm": 5.238346461774196, "learning_rate": 4.475527594874772e-06, "loss": 16.9993, "step": 29950 }, { "epoch": 0.5474802127698466, "grad_norm": 6.107015335162, "learning_rate": 4.475233217245969e-06, "loss": 17.1762, "step": 29951 }, { "epoch": 0.5474984919662932, "grad_norm": 7.476392497954179, "learning_rate": 4.474938841456429e-06, "loss": 17.7419, "step": 29952 }, { "epoch": 0.5475167711627397, "grad_norm": 6.390410346915936, "learning_rate": 4.474644467507184e-06, "loss": 17.3222, "step": 29953 }, { "epoch": 0.5475350503591863, "grad_norm": 7.0082484532262335, "learning_rate": 4.4743500953992615e-06, "loss": 17.4108, "step": 29954 }, { "epoch": 0.5475533295556327, "grad_norm": 5.904456891351366, "learning_rate": 4.474055725133699e-06, "loss": 17.3527, "step": 29955 }, { "epoch": 0.5475716087520792, "grad_norm": 6.247048350447342, "learning_rate": 4.473761356711526e-06, "loss": 17.2525, "step": 29956 }, { "epoch": 0.5475898879485258, "grad_norm": 5.680479951721527, "learning_rate": 4.473466990133771e-06, "loss": 17.106, "step": 29957 }, { "epoch": 0.5476081671449723, "grad_norm": 6.686189926657158, "learning_rate": 4.4731726254014715e-06, "loss": 17.2459, "step": 29958 }, { "epoch": 0.5476264463414189, "grad_norm": 7.167566793784869, "learning_rate": 4.472878262515654e-06, "loss": 17.8219, "step": 29959 }, { "epoch": 0.5476447255378654, "grad_norm": 4.666711318976103, "learning_rate": 4.472583901477354e-06, "loss": 16.8821, "step": 29960 }, { "epoch": 0.5476630047343118, "grad_norm": 6.6864426179505125, "learning_rate": 4.4722895422876015e-06, "loss": 17.7286, "step": 29961 }, { "epoch": 0.5476812839307584, "grad_norm": 5.288250113493308, "learning_rate": 4.471995184947428e-06, "loss": 16.9606, "step": 29962 }, { "epoch": 0.5476995631272049, "grad_norm": 7.895219090468151, "learning_rate": 4.471700829457865e-06, "loss": 17.732, "step": 29963 }, { "epoch": 0.5477178423236515, "grad_norm": 6.965260009994084, "learning_rate": 4.471406475819945e-06, "loss": 18.0287, "step": 29964 }, { "epoch": 0.547736121520098, "grad_norm": 6.988976500763091, "learning_rate": 4.471112124034699e-06, "loss": 17.6816, "step": 29965 }, { "epoch": 0.5477544007165445, "grad_norm": 5.925297690429422, "learning_rate": 4.47081777410316e-06, "loss": 17.2804, "step": 29966 }, { "epoch": 0.5477726799129911, "grad_norm": 7.376464805098315, "learning_rate": 4.4705234260263595e-06, "loss": 17.8528, "step": 29967 }, { "epoch": 0.5477909591094375, "grad_norm": 6.894372384625795, "learning_rate": 4.470229079805326e-06, "loss": 17.5793, "step": 29968 }, { "epoch": 0.5478092383058841, "grad_norm": 6.22915140806728, "learning_rate": 4.469934735441095e-06, "loss": 17.5881, "step": 29969 }, { "epoch": 0.5478275175023306, "grad_norm": 4.266056111251357, "learning_rate": 4.469640392934698e-06, "loss": 16.6043, "step": 29970 }, { "epoch": 0.5478457966987771, "grad_norm": 7.467336348393364, "learning_rate": 4.469346052287163e-06, "loss": 18.0513, "step": 29971 }, { "epoch": 0.5478640758952237, "grad_norm": 6.718057044436721, "learning_rate": 4.4690517134995264e-06, "loss": 17.3271, "step": 29972 }, { "epoch": 0.5478823550916702, "grad_norm": 6.7693545527490615, "learning_rate": 4.468757376572816e-06, "loss": 17.8766, "step": 29973 }, { "epoch": 0.5479006342881166, "grad_norm": 7.689775459930075, "learning_rate": 4.4684630415080645e-06, "loss": 17.4854, "step": 29974 }, { "epoch": 0.5479189134845632, "grad_norm": 8.145275970422656, "learning_rate": 4.468168708306305e-06, "loss": 17.9723, "step": 29975 }, { "epoch": 0.5479371926810097, "grad_norm": 7.267033447995452, "learning_rate": 4.467874376968569e-06, "loss": 17.5731, "step": 29976 }, { "epoch": 0.5479554718774563, "grad_norm": 9.1491570059217, "learning_rate": 4.467580047495885e-06, "loss": 18.2818, "step": 29977 }, { "epoch": 0.5479737510739028, "grad_norm": 6.074206309602419, "learning_rate": 4.467285719889287e-06, "loss": 17.1681, "step": 29978 }, { "epoch": 0.5479920302703493, "grad_norm": 5.50053312523098, "learning_rate": 4.466991394149806e-06, "loss": 17.0841, "step": 29979 }, { "epoch": 0.5480103094667959, "grad_norm": 6.007815274154165, "learning_rate": 4.466697070278475e-06, "loss": 17.2823, "step": 29980 }, { "epoch": 0.5480285886632423, "grad_norm": 5.478061548206217, "learning_rate": 4.466402748276326e-06, "loss": 17.0333, "step": 29981 }, { "epoch": 0.5480468678596889, "grad_norm": 6.833185976218334, "learning_rate": 4.466108428144386e-06, "loss": 17.4097, "step": 29982 }, { "epoch": 0.5480651470561354, "grad_norm": 6.388728626183493, "learning_rate": 4.465814109883691e-06, "loss": 17.3269, "step": 29983 }, { "epoch": 0.5480834262525819, "grad_norm": 6.274355790578766, "learning_rate": 4.465519793495272e-06, "loss": 17.7212, "step": 29984 }, { "epoch": 0.5481017054490285, "grad_norm": 8.0495402661432, "learning_rate": 4.465225478980157e-06, "loss": 18.2964, "step": 29985 }, { "epoch": 0.548119984645475, "grad_norm": 6.089359975626386, "learning_rate": 4.464931166339384e-06, "loss": 17.2935, "step": 29986 }, { "epoch": 0.5481382638419215, "grad_norm": 6.4774135308287075, "learning_rate": 4.464636855573979e-06, "loss": 17.7752, "step": 29987 }, { "epoch": 0.548156543038368, "grad_norm": 5.106578232302358, "learning_rate": 4.464342546684974e-06, "loss": 16.9542, "step": 29988 }, { "epoch": 0.5481748222348145, "grad_norm": 6.618566793374132, "learning_rate": 4.464048239673404e-06, "loss": 17.3029, "step": 29989 }, { "epoch": 0.5481931014312611, "grad_norm": 6.766067940977479, "learning_rate": 4.463753934540299e-06, "loss": 17.8327, "step": 29990 }, { "epoch": 0.5482113806277076, "grad_norm": 6.119638208226795, "learning_rate": 4.463459631286687e-06, "loss": 17.403, "step": 29991 }, { "epoch": 0.5482296598241542, "grad_norm": 7.1159792993383295, "learning_rate": 4.4631653299136045e-06, "loss": 17.8087, "step": 29992 }, { "epoch": 0.5482479390206006, "grad_norm": 6.676170516836438, "learning_rate": 4.462871030422081e-06, "loss": 17.7216, "step": 29993 }, { "epoch": 0.5482662182170471, "grad_norm": 5.591207476247649, "learning_rate": 4.462576732813147e-06, "loss": 17.3842, "step": 29994 }, { "epoch": 0.5482844974134937, "grad_norm": 8.261642097872205, "learning_rate": 4.462282437087836e-06, "loss": 18.1528, "step": 29995 }, { "epoch": 0.5483027766099402, "grad_norm": 8.436517305905586, "learning_rate": 4.461988143247177e-06, "loss": 17.9186, "step": 29996 }, { "epoch": 0.5483210558063868, "grad_norm": 6.362551058784096, "learning_rate": 4.461693851292203e-06, "loss": 17.7831, "step": 29997 }, { "epoch": 0.5483393350028333, "grad_norm": 5.391059584870672, "learning_rate": 4.461399561223947e-06, "loss": 17.1725, "step": 29998 }, { "epoch": 0.5483576141992798, "grad_norm": 8.866131825960062, "learning_rate": 4.461105273043436e-06, "loss": 18.3238, "step": 29999 }, { "epoch": 0.5483758933957263, "grad_norm": 6.12238121911726, "learning_rate": 4.460810986751707e-06, "loss": 17.6901, "step": 30000 }, { "epoch": 0.5483941725921728, "grad_norm": 7.041084183841055, "learning_rate": 4.460516702349788e-06, "loss": 17.7879, "step": 30001 }, { "epoch": 0.5484124517886194, "grad_norm": 6.260382845539813, "learning_rate": 4.460222419838709e-06, "loss": 17.3415, "step": 30002 }, { "epoch": 0.5484307309850659, "grad_norm": 7.5440139275690274, "learning_rate": 4.459928139219507e-06, "loss": 18.4351, "step": 30003 }, { "epoch": 0.5484490101815124, "grad_norm": 7.923116374808737, "learning_rate": 4.4596338604932095e-06, "loss": 18.3252, "step": 30004 }, { "epoch": 0.548467289377959, "grad_norm": 7.42695098724457, "learning_rate": 4.459339583660846e-06, "loss": 18.4082, "step": 30005 }, { "epoch": 0.5484855685744054, "grad_norm": 6.871676214599112, "learning_rate": 4.459045308723452e-06, "loss": 17.9584, "step": 30006 }, { "epoch": 0.548503847770852, "grad_norm": 4.912398867968541, "learning_rate": 4.458751035682058e-06, "loss": 17.0354, "step": 30007 }, { "epoch": 0.5485221269672985, "grad_norm": 6.00925260492537, "learning_rate": 4.458456764537692e-06, "loss": 17.3476, "step": 30008 }, { "epoch": 0.548540406163745, "grad_norm": 7.441901245884984, "learning_rate": 4.458162495291391e-06, "loss": 17.9354, "step": 30009 }, { "epoch": 0.5485586853601916, "grad_norm": 5.865508839279307, "learning_rate": 4.457868227944181e-06, "loss": 17.3786, "step": 30010 }, { "epoch": 0.5485769645566381, "grad_norm": 5.281108427533841, "learning_rate": 4.457573962497096e-06, "loss": 17.0944, "step": 30011 }, { "epoch": 0.5485952437530847, "grad_norm": 6.709081683015869, "learning_rate": 4.457279698951169e-06, "loss": 17.7829, "step": 30012 }, { "epoch": 0.5486135229495311, "grad_norm": 4.939857191218929, "learning_rate": 4.4569854373074275e-06, "loss": 17.0502, "step": 30013 }, { "epoch": 0.5486318021459776, "grad_norm": 5.771543900995329, "learning_rate": 4.456691177566908e-06, "loss": 17.3419, "step": 30014 }, { "epoch": 0.5486500813424242, "grad_norm": 5.7580274581012345, "learning_rate": 4.456396919730636e-06, "loss": 17.3636, "step": 30015 }, { "epoch": 0.5486683605388707, "grad_norm": 6.570380678508645, "learning_rate": 4.456102663799646e-06, "loss": 17.7161, "step": 30016 }, { "epoch": 0.5486866397353173, "grad_norm": 8.715043169192343, "learning_rate": 4.45580840977497e-06, "loss": 17.7619, "step": 30017 }, { "epoch": 0.5487049189317638, "grad_norm": 6.075514121629997, "learning_rate": 4.4555141576576384e-06, "loss": 17.3589, "step": 30018 }, { "epoch": 0.5487231981282102, "grad_norm": 6.278777143825685, "learning_rate": 4.4552199074486814e-06, "loss": 17.5513, "step": 30019 }, { "epoch": 0.5487414773246568, "grad_norm": 6.2361422365459305, "learning_rate": 4.4549256591491315e-06, "loss": 17.6894, "step": 30020 }, { "epoch": 0.5487597565211033, "grad_norm": 5.910979204989236, "learning_rate": 4.454631412760021e-06, "loss": 17.4392, "step": 30021 }, { "epoch": 0.5487780357175499, "grad_norm": 6.88514308069649, "learning_rate": 4.454337168282378e-06, "loss": 17.8553, "step": 30022 }, { "epoch": 0.5487963149139964, "grad_norm": 5.8870582332644394, "learning_rate": 4.454042925717238e-06, "loss": 17.2131, "step": 30023 }, { "epoch": 0.5488145941104429, "grad_norm": 5.479730277338284, "learning_rate": 4.453748685065629e-06, "loss": 17.2547, "step": 30024 }, { "epoch": 0.5488328733068895, "grad_norm": 7.355513214633129, "learning_rate": 4.453454446328583e-06, "loss": 17.5338, "step": 30025 }, { "epoch": 0.5488511525033359, "grad_norm": 8.121595834451627, "learning_rate": 4.453160209507133e-06, "loss": 17.8696, "step": 30026 }, { "epoch": 0.5488694316997825, "grad_norm": 6.812594875926578, "learning_rate": 4.452865974602307e-06, "loss": 17.4453, "step": 30027 }, { "epoch": 0.548887710896229, "grad_norm": 6.540145310093475, "learning_rate": 4.452571741615139e-06, "loss": 17.48, "step": 30028 }, { "epoch": 0.5489059900926755, "grad_norm": 5.630384191677534, "learning_rate": 4.452277510546662e-06, "loss": 17.1112, "step": 30029 }, { "epoch": 0.5489242692891221, "grad_norm": 7.987867278810087, "learning_rate": 4.451983281397901e-06, "loss": 17.7469, "step": 30030 }, { "epoch": 0.5489425484855686, "grad_norm": 5.86625442318381, "learning_rate": 4.451689054169895e-06, "loss": 17.3055, "step": 30031 }, { "epoch": 0.5489608276820152, "grad_norm": 6.294543778323252, "learning_rate": 4.451394828863668e-06, "loss": 17.5372, "step": 30032 }, { "epoch": 0.5489791068784616, "grad_norm": 6.354159081101994, "learning_rate": 4.451100605480256e-06, "loss": 17.8528, "step": 30033 }, { "epoch": 0.5489973860749081, "grad_norm": 5.809564193664952, "learning_rate": 4.45080638402069e-06, "loss": 17.531, "step": 30034 }, { "epoch": 0.5490156652713547, "grad_norm": 5.280773195490768, "learning_rate": 4.450512164485999e-06, "loss": 17.1262, "step": 30035 }, { "epoch": 0.5490339444678012, "grad_norm": 6.23026191458919, "learning_rate": 4.4502179468772134e-06, "loss": 17.709, "step": 30036 }, { "epoch": 0.5490522236642478, "grad_norm": 10.320013487943324, "learning_rate": 4.4499237311953675e-06, "loss": 17.4595, "step": 30037 }, { "epoch": 0.5490705028606943, "grad_norm": 6.220799922220343, "learning_rate": 4.449629517441492e-06, "loss": 17.3898, "step": 30038 }, { "epoch": 0.5490887820571407, "grad_norm": 7.281918066112626, "learning_rate": 4.449335305616616e-06, "loss": 17.7072, "step": 30039 }, { "epoch": 0.5491070612535873, "grad_norm": 5.602179369946772, "learning_rate": 4.4490410957217735e-06, "loss": 17.244, "step": 30040 }, { "epoch": 0.5491253404500338, "grad_norm": 7.184792133354781, "learning_rate": 4.448746887757992e-06, "loss": 17.5119, "step": 30041 }, { "epoch": 0.5491436196464803, "grad_norm": 5.973725805395779, "learning_rate": 4.448452681726305e-06, "loss": 17.1449, "step": 30042 }, { "epoch": 0.5491618988429269, "grad_norm": 4.909465948983754, "learning_rate": 4.4481584776277455e-06, "loss": 16.827, "step": 30043 }, { "epoch": 0.5491801780393734, "grad_norm": 6.702561374876034, "learning_rate": 4.447864275463341e-06, "loss": 17.5531, "step": 30044 }, { "epoch": 0.54919845723582, "grad_norm": 7.23076288037877, "learning_rate": 4.447570075234125e-06, "loss": 17.7147, "step": 30045 }, { "epoch": 0.5492167364322664, "grad_norm": 7.453734277905452, "learning_rate": 4.447275876941128e-06, "loss": 17.6868, "step": 30046 }, { "epoch": 0.5492350156287129, "grad_norm": 4.892551102010612, "learning_rate": 4.446981680585381e-06, "loss": 16.8286, "step": 30047 }, { "epoch": 0.5492532948251595, "grad_norm": 7.439706715866182, "learning_rate": 4.446687486167916e-06, "loss": 18.08, "step": 30048 }, { "epoch": 0.549271574021606, "grad_norm": 7.328486625108682, "learning_rate": 4.446393293689763e-06, "loss": 17.5704, "step": 30049 }, { "epoch": 0.5492898532180526, "grad_norm": 5.2015184738610225, "learning_rate": 4.446099103151952e-06, "loss": 17.0694, "step": 30050 }, { "epoch": 0.549308132414499, "grad_norm": 8.109999306308653, "learning_rate": 4.445804914555516e-06, "loss": 18.1133, "step": 30051 }, { "epoch": 0.5493264116109455, "grad_norm": 7.844600690358717, "learning_rate": 4.4455107279014875e-06, "loss": 17.5223, "step": 30052 }, { "epoch": 0.5493446908073921, "grad_norm": 8.297474750292066, "learning_rate": 4.445216543190893e-06, "loss": 18.2614, "step": 30053 }, { "epoch": 0.5493629700038386, "grad_norm": 9.219467900855433, "learning_rate": 4.4449223604247695e-06, "loss": 18.7127, "step": 30054 }, { "epoch": 0.5493812492002852, "grad_norm": 5.919370136240793, "learning_rate": 4.444628179604143e-06, "loss": 17.5285, "step": 30055 }, { "epoch": 0.5493995283967317, "grad_norm": 5.5207185253591256, "learning_rate": 4.4443340007300455e-06, "loss": 17.1255, "step": 30056 }, { "epoch": 0.5494178075931782, "grad_norm": 7.07466306586394, "learning_rate": 4.444039823803512e-06, "loss": 17.5243, "step": 30057 }, { "epoch": 0.5494360867896247, "grad_norm": 7.11849832948702, "learning_rate": 4.443745648825567e-06, "loss": 17.7048, "step": 30058 }, { "epoch": 0.5494543659860712, "grad_norm": 4.9873486121891, "learning_rate": 4.443451475797249e-06, "loss": 16.9014, "step": 30059 }, { "epoch": 0.5494726451825178, "grad_norm": 6.694497552438027, "learning_rate": 4.4431573047195835e-06, "loss": 17.8681, "step": 30060 }, { "epoch": 0.5494909243789643, "grad_norm": 6.330104374862356, "learning_rate": 4.4428631355936024e-06, "loss": 17.4925, "step": 30061 }, { "epoch": 0.5495092035754108, "grad_norm": 6.879520004461866, "learning_rate": 4.44256896842034e-06, "loss": 17.5174, "step": 30062 }, { "epoch": 0.5495274827718574, "grad_norm": 7.970195974292491, "learning_rate": 4.442274803200825e-06, "loss": 17.6555, "step": 30063 }, { "epoch": 0.5495457619683038, "grad_norm": 5.902935871367927, "learning_rate": 4.441980639936086e-06, "loss": 17.3751, "step": 30064 }, { "epoch": 0.5495640411647504, "grad_norm": 5.760487613557208, "learning_rate": 4.441686478627157e-06, "loss": 17.2579, "step": 30065 }, { "epoch": 0.5495823203611969, "grad_norm": 5.76115156608336, "learning_rate": 4.44139231927507e-06, "loss": 17.2049, "step": 30066 }, { "epoch": 0.5496005995576434, "grad_norm": 6.8803902816721525, "learning_rate": 4.4410981618808526e-06, "loss": 17.6651, "step": 30067 }, { "epoch": 0.54961887875409, "grad_norm": 5.097870764927917, "learning_rate": 4.4408040064455396e-06, "loss": 16.7743, "step": 30068 }, { "epoch": 0.5496371579505365, "grad_norm": 6.398470439051112, "learning_rate": 4.440509852970159e-06, "loss": 17.5102, "step": 30069 }, { "epoch": 0.5496554371469831, "grad_norm": 6.354966518781314, "learning_rate": 4.440215701455741e-06, "loss": 17.3118, "step": 30070 }, { "epoch": 0.5496737163434295, "grad_norm": 5.510823637684201, "learning_rate": 4.439921551903322e-06, "loss": 17.3302, "step": 30071 }, { "epoch": 0.549691995539876, "grad_norm": 5.570880567862861, "learning_rate": 4.439627404313926e-06, "loss": 17.0881, "step": 30072 }, { "epoch": 0.5497102747363226, "grad_norm": 7.190396782650435, "learning_rate": 4.4393332586885905e-06, "loss": 17.6907, "step": 30073 }, { "epoch": 0.5497285539327691, "grad_norm": 5.544839836334417, "learning_rate": 4.439039115028342e-06, "loss": 17.1032, "step": 30074 }, { "epoch": 0.5497468331292157, "grad_norm": 5.952794981427437, "learning_rate": 4.43874497333421e-06, "loss": 17.3158, "step": 30075 }, { "epoch": 0.5497651123256622, "grad_norm": 7.023334255754825, "learning_rate": 4.438450833607232e-06, "loss": 18.0733, "step": 30076 }, { "epoch": 0.5497833915221086, "grad_norm": 6.961188311784665, "learning_rate": 4.438156695848434e-06, "loss": 17.8102, "step": 30077 }, { "epoch": 0.5498016707185552, "grad_norm": 6.504575491134482, "learning_rate": 4.437862560058847e-06, "loss": 17.5339, "step": 30078 }, { "epoch": 0.5498199499150017, "grad_norm": 7.707167425312774, "learning_rate": 4.437568426239504e-06, "loss": 17.493, "step": 30079 }, { "epoch": 0.5498382291114483, "grad_norm": 5.3780813477201175, "learning_rate": 4.437274294391435e-06, "loss": 17.2028, "step": 30080 }, { "epoch": 0.5498565083078948, "grad_norm": 6.74989834649112, "learning_rate": 4.436980164515669e-06, "loss": 17.9462, "step": 30081 }, { "epoch": 0.5498747875043413, "grad_norm": 8.632713814725113, "learning_rate": 4.436686036613241e-06, "loss": 18.1658, "step": 30082 }, { "epoch": 0.5498930667007879, "grad_norm": 5.792372615866103, "learning_rate": 4.436391910685179e-06, "loss": 17.1583, "step": 30083 }, { "epoch": 0.5499113458972343, "grad_norm": 6.672212691783152, "learning_rate": 4.436097786732512e-06, "loss": 17.4401, "step": 30084 }, { "epoch": 0.5499296250936809, "grad_norm": 6.100105603222309, "learning_rate": 4.4358036647562764e-06, "loss": 17.373, "step": 30085 }, { "epoch": 0.5499479042901274, "grad_norm": 5.696764929691293, "learning_rate": 4.435509544757498e-06, "loss": 17.0011, "step": 30086 }, { "epoch": 0.5499661834865739, "grad_norm": 5.167279766535078, "learning_rate": 4.435215426737211e-06, "loss": 17.1422, "step": 30087 }, { "epoch": 0.5499844626830205, "grad_norm": 6.943192344259093, "learning_rate": 4.434921310696444e-06, "loss": 17.3907, "step": 30088 }, { "epoch": 0.550002741879467, "grad_norm": 6.393559573148072, "learning_rate": 4.434627196636228e-06, "loss": 17.5005, "step": 30089 }, { "epoch": 0.5500210210759136, "grad_norm": 6.925818208451736, "learning_rate": 4.434333084557597e-06, "loss": 17.3341, "step": 30090 }, { "epoch": 0.55003930027236, "grad_norm": 6.800288302209815, "learning_rate": 4.434038974461578e-06, "loss": 17.491, "step": 30091 }, { "epoch": 0.5500575794688065, "grad_norm": 6.603762413383734, "learning_rate": 4.433744866349204e-06, "loss": 17.4876, "step": 30092 }, { "epoch": 0.5500758586652531, "grad_norm": 4.781891132156435, "learning_rate": 4.433450760221504e-06, "loss": 17.0773, "step": 30093 }, { "epoch": 0.5500941378616996, "grad_norm": 7.360680508754002, "learning_rate": 4.433156656079512e-06, "loss": 17.6945, "step": 30094 }, { "epoch": 0.5501124170581462, "grad_norm": 5.8102613603219995, "learning_rate": 4.432862553924254e-06, "loss": 17.1878, "step": 30095 }, { "epoch": 0.5501306962545927, "grad_norm": 5.473274112565679, "learning_rate": 4.432568453756765e-06, "loss": 17.0103, "step": 30096 }, { "epoch": 0.5501489754510391, "grad_norm": 6.1505990358473355, "learning_rate": 4.432274355578075e-06, "loss": 17.0957, "step": 30097 }, { "epoch": 0.5501672546474857, "grad_norm": 7.275899275273455, "learning_rate": 4.431980259389212e-06, "loss": 18.1979, "step": 30098 }, { "epoch": 0.5501855338439322, "grad_norm": 7.5334975030339875, "learning_rate": 4.431686165191212e-06, "loss": 17.638, "step": 30099 }, { "epoch": 0.5502038130403788, "grad_norm": 6.507631276780941, "learning_rate": 4.4313920729851e-06, "loss": 17.6094, "step": 30100 }, { "epoch": 0.5502220922368253, "grad_norm": 8.517191612709452, "learning_rate": 4.431097982771909e-06, "loss": 18.5016, "step": 30101 }, { "epoch": 0.5502403714332718, "grad_norm": 8.380059322660932, "learning_rate": 4.430803894552673e-06, "loss": 17.9571, "step": 30102 }, { "epoch": 0.5502586506297183, "grad_norm": 4.992579067904614, "learning_rate": 4.430509808328417e-06, "loss": 17.014, "step": 30103 }, { "epoch": 0.5502769298261648, "grad_norm": 7.436591878631617, "learning_rate": 4.430215724100177e-06, "loss": 17.911, "step": 30104 }, { "epoch": 0.5502952090226114, "grad_norm": 6.445802706612601, "learning_rate": 4.42992164186898e-06, "loss": 17.6193, "step": 30105 }, { "epoch": 0.5503134882190579, "grad_norm": 5.204358655279812, "learning_rate": 4.4296275616358586e-06, "loss": 17.2688, "step": 30106 }, { "epoch": 0.5503317674155044, "grad_norm": 6.150376811200465, "learning_rate": 4.429333483401844e-06, "loss": 17.3935, "step": 30107 }, { "epoch": 0.550350046611951, "grad_norm": 7.663929037799493, "learning_rate": 4.429039407167966e-06, "loss": 18.0979, "step": 30108 }, { "epoch": 0.5503683258083975, "grad_norm": 7.339447585957471, "learning_rate": 4.428745332935253e-06, "loss": 18.1889, "step": 30109 }, { "epoch": 0.5503866050048439, "grad_norm": 4.432713261157599, "learning_rate": 4.4284512607047406e-06, "loss": 16.6285, "step": 30110 }, { "epoch": 0.5504048842012905, "grad_norm": 7.182126491550373, "learning_rate": 4.4281571904774565e-06, "loss": 17.6895, "step": 30111 }, { "epoch": 0.550423163397737, "grad_norm": 6.147319984971938, "learning_rate": 4.42786312225443e-06, "loss": 17.4386, "step": 30112 }, { "epoch": 0.5504414425941836, "grad_norm": 5.7042479689883985, "learning_rate": 4.427569056036696e-06, "loss": 16.9866, "step": 30113 }, { "epoch": 0.5504597217906301, "grad_norm": 6.660785753951066, "learning_rate": 4.427274991825282e-06, "loss": 17.8478, "step": 30114 }, { "epoch": 0.5504780009870766, "grad_norm": 5.375214049413267, "learning_rate": 4.426980929621218e-06, "loss": 16.9939, "step": 30115 }, { "epoch": 0.5504962801835231, "grad_norm": 6.138046654478134, "learning_rate": 4.426686869425538e-06, "loss": 17.5067, "step": 30116 }, { "epoch": 0.5505145593799696, "grad_norm": 6.564527356448167, "learning_rate": 4.4263928112392715e-06, "loss": 17.4227, "step": 30117 }, { "epoch": 0.5505328385764162, "grad_norm": 6.188220081891025, "learning_rate": 4.426098755063446e-06, "loss": 17.4902, "step": 30118 }, { "epoch": 0.5505511177728627, "grad_norm": 7.0042106668331945, "learning_rate": 4.425804700899095e-06, "loss": 18.0548, "step": 30119 }, { "epoch": 0.5505693969693092, "grad_norm": 5.2702306964089125, "learning_rate": 4.425510648747248e-06, "loss": 17.3659, "step": 30120 }, { "epoch": 0.5505876761657558, "grad_norm": 6.127995598678103, "learning_rate": 4.425216598608939e-06, "loss": 17.4695, "step": 30121 }, { "epoch": 0.5506059553622022, "grad_norm": 7.355245643253364, "learning_rate": 4.424922550485195e-06, "loss": 18.0353, "step": 30122 }, { "epoch": 0.5506242345586488, "grad_norm": 5.885694903219933, "learning_rate": 4.4246285043770466e-06, "loss": 17.5176, "step": 30123 }, { "epoch": 0.5506425137550953, "grad_norm": 6.939436496647176, "learning_rate": 4.424334460285526e-06, "loss": 17.7709, "step": 30124 }, { "epoch": 0.5506607929515418, "grad_norm": 6.28978412841182, "learning_rate": 4.4240404182116636e-06, "loss": 17.5956, "step": 30125 }, { "epoch": 0.5506790721479884, "grad_norm": 8.319874107063598, "learning_rate": 4.423746378156488e-06, "loss": 18.3335, "step": 30126 }, { "epoch": 0.5506973513444349, "grad_norm": 6.084329402680735, "learning_rate": 4.423452340121033e-06, "loss": 17.4851, "step": 30127 }, { "epoch": 0.5507156305408815, "grad_norm": 5.518099458191808, "learning_rate": 4.423158304106326e-06, "loss": 17.1785, "step": 30128 }, { "epoch": 0.5507339097373279, "grad_norm": 8.021383396148854, "learning_rate": 4.4228642701134e-06, "loss": 17.9974, "step": 30129 }, { "epoch": 0.5507521889337744, "grad_norm": 5.867686479487178, "learning_rate": 4.422570238143285e-06, "loss": 17.3011, "step": 30130 }, { "epoch": 0.550770468130221, "grad_norm": 7.527360713157865, "learning_rate": 4.4222762081970115e-06, "loss": 17.283, "step": 30131 }, { "epoch": 0.5507887473266675, "grad_norm": 7.210463627428066, "learning_rate": 4.421982180275608e-06, "loss": 17.6962, "step": 30132 }, { "epoch": 0.5508070265231141, "grad_norm": 7.117537311845487, "learning_rate": 4.421688154380107e-06, "loss": 17.7992, "step": 30133 }, { "epoch": 0.5508253057195606, "grad_norm": 7.266332261010944, "learning_rate": 4.4213941305115395e-06, "loss": 17.7835, "step": 30134 }, { "epoch": 0.550843584916007, "grad_norm": 8.34012851755377, "learning_rate": 4.421100108670936e-06, "loss": 18.2049, "step": 30135 }, { "epoch": 0.5508618641124536, "grad_norm": 7.636861746597017, "learning_rate": 4.420806088859328e-06, "loss": 17.7986, "step": 30136 }, { "epoch": 0.5508801433089001, "grad_norm": 5.286071520105252, "learning_rate": 4.42051207107774e-06, "loss": 17.1054, "step": 30137 }, { "epoch": 0.5508984225053467, "grad_norm": 5.419393806452107, "learning_rate": 4.420218055327209e-06, "loss": 16.9718, "step": 30138 }, { "epoch": 0.5509167017017932, "grad_norm": 4.78351067100413, "learning_rate": 4.419924041608765e-06, "loss": 16.8289, "step": 30139 }, { "epoch": 0.5509349808982397, "grad_norm": 4.732479688684023, "learning_rate": 4.419630029923435e-06, "loss": 16.9182, "step": 30140 }, { "epoch": 0.5509532600946863, "grad_norm": 8.282357832162468, "learning_rate": 4.4193360202722525e-06, "loss": 18.7895, "step": 30141 }, { "epoch": 0.5509715392911327, "grad_norm": 5.659146277013938, "learning_rate": 4.419042012656246e-06, "loss": 17.0761, "step": 30142 }, { "epoch": 0.5509898184875793, "grad_norm": 6.192306189014837, "learning_rate": 4.418748007076446e-06, "loss": 17.3479, "step": 30143 }, { "epoch": 0.5510080976840258, "grad_norm": 6.718496371482483, "learning_rate": 4.418454003533886e-06, "loss": 17.4435, "step": 30144 }, { "epoch": 0.5510263768804723, "grad_norm": 5.756801204543571, "learning_rate": 4.418160002029594e-06, "loss": 17.4155, "step": 30145 }, { "epoch": 0.5510446560769189, "grad_norm": 7.449093064536033, "learning_rate": 4.417866002564599e-06, "loss": 17.9352, "step": 30146 }, { "epoch": 0.5510629352733654, "grad_norm": 6.134459682484548, "learning_rate": 4.417572005139933e-06, "loss": 17.3372, "step": 30147 }, { "epoch": 0.551081214469812, "grad_norm": 5.747740041791798, "learning_rate": 4.417278009756627e-06, "loss": 16.9981, "step": 30148 }, { "epoch": 0.5510994936662584, "grad_norm": 5.628417875338373, "learning_rate": 4.416984016415713e-06, "loss": 17.3454, "step": 30149 }, { "epoch": 0.5511177728627049, "grad_norm": 5.675031451416584, "learning_rate": 4.416690025118218e-06, "loss": 17.0975, "step": 30150 }, { "epoch": 0.5511360520591515, "grad_norm": 6.498990236701142, "learning_rate": 4.416396035865173e-06, "loss": 17.4499, "step": 30151 }, { "epoch": 0.551154331255598, "grad_norm": 7.657877908403108, "learning_rate": 4.41610204865761e-06, "loss": 17.663, "step": 30152 }, { "epoch": 0.5511726104520446, "grad_norm": 5.957615394935194, "learning_rate": 4.41580806349656e-06, "loss": 17.3599, "step": 30153 }, { "epoch": 0.5511908896484911, "grad_norm": 7.294539776459871, "learning_rate": 4.41551408038305e-06, "loss": 17.8323, "step": 30154 }, { "epoch": 0.5512091688449375, "grad_norm": 6.4251807616922, "learning_rate": 4.415220099318112e-06, "loss": 17.551, "step": 30155 }, { "epoch": 0.5512274480413841, "grad_norm": 5.459328489114766, "learning_rate": 4.41492612030278e-06, "loss": 17.0093, "step": 30156 }, { "epoch": 0.5512457272378306, "grad_norm": 5.600325944184191, "learning_rate": 4.414632143338078e-06, "loss": 17.226, "step": 30157 }, { "epoch": 0.5512640064342772, "grad_norm": 6.581119285060729, "learning_rate": 4.414338168425042e-06, "loss": 17.5038, "step": 30158 }, { "epoch": 0.5512822856307237, "grad_norm": 6.075156441205385, "learning_rate": 4.414044195564698e-06, "loss": 16.9063, "step": 30159 }, { "epoch": 0.5513005648271702, "grad_norm": 6.125651863527007, "learning_rate": 4.413750224758077e-06, "loss": 17.7006, "step": 30160 }, { "epoch": 0.5513188440236167, "grad_norm": 7.420628650552498, "learning_rate": 4.4134562560062146e-06, "loss": 18.1768, "step": 30161 }, { "epoch": 0.5513371232200632, "grad_norm": 5.665290163825649, "learning_rate": 4.4131622893101354e-06, "loss": 17.2341, "step": 30162 }, { "epoch": 0.5513554024165098, "grad_norm": 5.470779187265795, "learning_rate": 4.41286832467087e-06, "loss": 17.433, "step": 30163 }, { "epoch": 0.5513736816129563, "grad_norm": 6.030992413713156, "learning_rate": 4.412574362089452e-06, "loss": 17.283, "step": 30164 }, { "epoch": 0.5513919608094028, "grad_norm": 5.518921629234116, "learning_rate": 4.412280401566908e-06, "loss": 17.1875, "step": 30165 }, { "epoch": 0.5514102400058494, "grad_norm": 7.52056276976164, "learning_rate": 4.411986443104272e-06, "loss": 17.4791, "step": 30166 }, { "epoch": 0.5514285192022959, "grad_norm": 6.464022474978419, "learning_rate": 4.411692486702572e-06, "loss": 17.2576, "step": 30167 }, { "epoch": 0.5514467983987424, "grad_norm": 5.4720098536607304, "learning_rate": 4.4113985323628376e-06, "loss": 17.1809, "step": 30168 }, { "epoch": 0.5514650775951889, "grad_norm": 16.282498921307358, "learning_rate": 4.411104580086101e-06, "loss": 18.6388, "step": 30169 }, { "epoch": 0.5514833567916354, "grad_norm": 6.062034282700581, "learning_rate": 4.410810629873393e-06, "loss": 17.3368, "step": 30170 }, { "epoch": 0.551501635988082, "grad_norm": 5.973932078359239, "learning_rate": 4.4105166817257414e-06, "loss": 17.3173, "step": 30171 }, { "epoch": 0.5515199151845285, "grad_norm": 6.655094354611399, "learning_rate": 4.4102227356441784e-06, "loss": 17.7443, "step": 30172 }, { "epoch": 0.5515381943809751, "grad_norm": 5.687161172251759, "learning_rate": 4.409928791629733e-06, "loss": 17.312, "step": 30173 }, { "epoch": 0.5515564735774215, "grad_norm": 6.674728039257616, "learning_rate": 4.409634849683435e-06, "loss": 17.5615, "step": 30174 }, { "epoch": 0.551574752773868, "grad_norm": 6.3463502433616625, "learning_rate": 4.409340909806318e-06, "loss": 17.3196, "step": 30175 }, { "epoch": 0.5515930319703146, "grad_norm": 5.612881125137073, "learning_rate": 4.409046971999409e-06, "loss": 17.081, "step": 30176 }, { "epoch": 0.5516113111667611, "grad_norm": 7.891742012591863, "learning_rate": 4.408753036263738e-06, "loss": 17.7839, "step": 30177 }, { "epoch": 0.5516295903632076, "grad_norm": 5.735277508547044, "learning_rate": 4.408459102600337e-06, "loss": 17.1423, "step": 30178 }, { "epoch": 0.5516478695596542, "grad_norm": 5.550132875653037, "learning_rate": 4.408165171010233e-06, "loss": 17.3862, "step": 30179 }, { "epoch": 0.5516661487561006, "grad_norm": 5.51712040167122, "learning_rate": 4.407871241494462e-06, "loss": 17.1867, "step": 30180 }, { "epoch": 0.5516844279525472, "grad_norm": 4.79231910038781, "learning_rate": 4.407577314054052e-06, "loss": 16.9295, "step": 30181 }, { "epoch": 0.5517027071489937, "grad_norm": 7.578676633796247, "learning_rate": 4.407283388690028e-06, "loss": 17.7791, "step": 30182 }, { "epoch": 0.5517209863454402, "grad_norm": 5.529157774847453, "learning_rate": 4.406989465403427e-06, "loss": 17.2524, "step": 30183 }, { "epoch": 0.5517392655418868, "grad_norm": 6.582552623549398, "learning_rate": 4.406695544195277e-06, "loss": 17.6916, "step": 30184 }, { "epoch": 0.5517575447383333, "grad_norm": 5.349411807715741, "learning_rate": 4.406401625066605e-06, "loss": 16.9618, "step": 30185 }, { "epoch": 0.5517758239347799, "grad_norm": 5.509576835181399, "learning_rate": 4.406107708018446e-06, "loss": 17.0843, "step": 30186 }, { "epoch": 0.5517941031312263, "grad_norm": 6.974712126684686, "learning_rate": 4.405813793051827e-06, "loss": 17.7234, "step": 30187 }, { "epoch": 0.5518123823276728, "grad_norm": 6.779168923659532, "learning_rate": 4.405519880167779e-06, "loss": 17.678, "step": 30188 }, { "epoch": 0.5518306615241194, "grad_norm": 5.762673653345211, "learning_rate": 4.405225969367333e-06, "loss": 17.0943, "step": 30189 }, { "epoch": 0.5518489407205659, "grad_norm": 7.100009686682578, "learning_rate": 4.40493206065152e-06, "loss": 17.8366, "step": 30190 }, { "epoch": 0.5518672199170125, "grad_norm": 6.265523965745149, "learning_rate": 4.404638154021364e-06, "loss": 17.4496, "step": 30191 }, { "epoch": 0.551885499113459, "grad_norm": 8.385237128357838, "learning_rate": 4.404344249477903e-06, "loss": 17.9356, "step": 30192 }, { "epoch": 0.5519037783099054, "grad_norm": 6.745909402395149, "learning_rate": 4.404050347022164e-06, "loss": 17.8305, "step": 30193 }, { "epoch": 0.551922057506352, "grad_norm": 5.947061888329876, "learning_rate": 4.403756446655174e-06, "loss": 17.0674, "step": 30194 }, { "epoch": 0.5519403367027985, "grad_norm": 7.455995980240113, "learning_rate": 4.403462548377968e-06, "loss": 17.6844, "step": 30195 }, { "epoch": 0.5519586158992451, "grad_norm": 7.6308839625690466, "learning_rate": 4.4031686521915724e-06, "loss": 18.0679, "step": 30196 }, { "epoch": 0.5519768950956916, "grad_norm": 5.4133699149898495, "learning_rate": 4.40287475809702e-06, "loss": 16.9803, "step": 30197 }, { "epoch": 0.5519951742921381, "grad_norm": 5.258260003565105, "learning_rate": 4.402580866095341e-06, "loss": 17.0773, "step": 30198 }, { "epoch": 0.5520134534885847, "grad_norm": 6.434222380501876, "learning_rate": 4.402286976187561e-06, "loss": 17.5685, "step": 30199 }, { "epoch": 0.5520317326850311, "grad_norm": 6.508550649676666, "learning_rate": 4.401993088374717e-06, "loss": 17.3027, "step": 30200 }, { "epoch": 0.5520500118814777, "grad_norm": 5.830867467180034, "learning_rate": 4.401699202657833e-06, "loss": 17.3244, "step": 30201 }, { "epoch": 0.5520682910779242, "grad_norm": 5.714404405060131, "learning_rate": 4.401405319037941e-06, "loss": 17.2118, "step": 30202 }, { "epoch": 0.5520865702743707, "grad_norm": 5.7606406916810995, "learning_rate": 4.4011114375160734e-06, "loss": 17.2273, "step": 30203 }, { "epoch": 0.5521048494708173, "grad_norm": 7.978179532859275, "learning_rate": 4.400817558093258e-06, "loss": 17.5675, "step": 30204 }, { "epoch": 0.5521231286672638, "grad_norm": 5.291497613306611, "learning_rate": 4.400523680770523e-06, "loss": 16.9066, "step": 30205 }, { "epoch": 0.5521414078637104, "grad_norm": 6.178258131720722, "learning_rate": 4.400229805548902e-06, "loss": 17.3986, "step": 30206 }, { "epoch": 0.5521596870601568, "grad_norm": 6.489090624712598, "learning_rate": 4.399935932429424e-06, "loss": 17.6892, "step": 30207 }, { "epoch": 0.5521779662566033, "grad_norm": 6.096661769607695, "learning_rate": 4.399642061413117e-06, "loss": 17.3259, "step": 30208 }, { "epoch": 0.5521962454530499, "grad_norm": 5.508193013313602, "learning_rate": 4.399348192501014e-06, "loss": 17.0868, "step": 30209 }, { "epoch": 0.5522145246494964, "grad_norm": 5.5425123925261515, "learning_rate": 4.399054325694141e-06, "loss": 17.1979, "step": 30210 }, { "epoch": 0.552232803845943, "grad_norm": 7.5516890245346495, "learning_rate": 4.398760460993533e-06, "loss": 18.3056, "step": 30211 }, { "epoch": 0.5522510830423895, "grad_norm": 4.998986698323605, "learning_rate": 4.398466598400218e-06, "loss": 16.998, "step": 30212 }, { "epoch": 0.5522693622388359, "grad_norm": 5.633212845378458, "learning_rate": 4.398172737915222e-06, "loss": 17.2503, "step": 30213 }, { "epoch": 0.5522876414352825, "grad_norm": 6.848624260221072, "learning_rate": 4.397878879539581e-06, "loss": 17.6044, "step": 30214 }, { "epoch": 0.552305920631729, "grad_norm": 8.359666414624277, "learning_rate": 4.397585023274321e-06, "loss": 18.6034, "step": 30215 }, { "epoch": 0.5523241998281756, "grad_norm": 6.531479112926587, "learning_rate": 4.3972911691204725e-06, "loss": 17.55, "step": 30216 }, { "epoch": 0.5523424790246221, "grad_norm": 5.122586717052425, "learning_rate": 4.39699731707907e-06, "loss": 16.9055, "step": 30217 }, { "epoch": 0.5523607582210686, "grad_norm": 6.146156526478349, "learning_rate": 4.396703467151136e-06, "loss": 17.4581, "step": 30218 }, { "epoch": 0.5523790374175152, "grad_norm": 6.356989487723886, "learning_rate": 4.396409619337705e-06, "loss": 17.3354, "step": 30219 }, { "epoch": 0.5523973166139616, "grad_norm": 6.74876374643555, "learning_rate": 4.396115773639805e-06, "loss": 18.0694, "step": 30220 }, { "epoch": 0.5524155958104082, "grad_norm": 6.314103156325294, "learning_rate": 4.395821930058469e-06, "loss": 17.4759, "step": 30221 }, { "epoch": 0.5524338750068547, "grad_norm": 5.656818576821866, "learning_rate": 4.395528088594722e-06, "loss": 17.1753, "step": 30222 }, { "epoch": 0.5524521542033012, "grad_norm": 6.882569570825177, "learning_rate": 4.3952342492495985e-06, "loss": 17.6632, "step": 30223 }, { "epoch": 0.5524704333997478, "grad_norm": 9.543015555831975, "learning_rate": 4.3949404120241266e-06, "loss": 18.4047, "step": 30224 }, { "epoch": 0.5524887125961943, "grad_norm": 6.258254024819142, "learning_rate": 4.394646576919334e-06, "loss": 17.4675, "step": 30225 }, { "epoch": 0.5525069917926408, "grad_norm": 7.859021091605838, "learning_rate": 4.394352743936255e-06, "loss": 18.1285, "step": 30226 }, { "epoch": 0.5525252709890873, "grad_norm": 5.956100926443144, "learning_rate": 4.394058913075915e-06, "loss": 17.1101, "step": 30227 }, { "epoch": 0.5525435501855338, "grad_norm": 8.108199872698917, "learning_rate": 4.393765084339347e-06, "loss": 18.1672, "step": 30228 }, { "epoch": 0.5525618293819804, "grad_norm": 6.838274958976816, "learning_rate": 4.393471257727581e-06, "loss": 17.3814, "step": 30229 }, { "epoch": 0.5525801085784269, "grad_norm": 7.469452286583123, "learning_rate": 4.393177433241644e-06, "loss": 18.0835, "step": 30230 }, { "epoch": 0.5525983877748735, "grad_norm": 6.134921237918217, "learning_rate": 4.392883610882568e-06, "loss": 17.661, "step": 30231 }, { "epoch": 0.55261666697132, "grad_norm": 6.250531664023144, "learning_rate": 4.3925897906513824e-06, "loss": 17.4529, "step": 30232 }, { "epoch": 0.5526349461677664, "grad_norm": 5.620125058983457, "learning_rate": 4.392295972549115e-06, "loss": 17.2834, "step": 30233 }, { "epoch": 0.552653225364213, "grad_norm": 7.765763751058777, "learning_rate": 4.3920021565768e-06, "loss": 17.8893, "step": 30234 }, { "epoch": 0.5526715045606595, "grad_norm": 7.442983162897202, "learning_rate": 4.391708342735465e-06, "loss": 18.0008, "step": 30235 }, { "epoch": 0.5526897837571061, "grad_norm": 7.861646178880158, "learning_rate": 4.391414531026137e-06, "loss": 17.9025, "step": 30236 }, { "epoch": 0.5527080629535526, "grad_norm": 5.57876852294131, "learning_rate": 4.391120721449849e-06, "loss": 17.011, "step": 30237 }, { "epoch": 0.552726342149999, "grad_norm": 6.336983426324883, "learning_rate": 4.390826914007631e-06, "loss": 17.4852, "step": 30238 }, { "epoch": 0.5527446213464456, "grad_norm": 6.312342973115463, "learning_rate": 4.390533108700511e-06, "loss": 17.449, "step": 30239 }, { "epoch": 0.5527629005428921, "grad_norm": 5.796303835086722, "learning_rate": 4.390239305529521e-06, "loss": 17.1823, "step": 30240 }, { "epoch": 0.5527811797393387, "grad_norm": 5.681771643584559, "learning_rate": 4.389945504495687e-06, "loss": 17.2692, "step": 30241 }, { "epoch": 0.5527994589357852, "grad_norm": 6.270779389936353, "learning_rate": 4.389651705600042e-06, "loss": 17.1669, "step": 30242 }, { "epoch": 0.5528177381322317, "grad_norm": 6.670528871897711, "learning_rate": 4.389357908843616e-06, "loss": 17.8519, "step": 30243 }, { "epoch": 0.5528360173286783, "grad_norm": 5.923667260454152, "learning_rate": 4.389064114227435e-06, "loss": 17.3574, "step": 30244 }, { "epoch": 0.5528542965251247, "grad_norm": 8.071349516509562, "learning_rate": 4.388770321752534e-06, "loss": 18.0075, "step": 30245 }, { "epoch": 0.5528725757215712, "grad_norm": 5.31513725374893, "learning_rate": 4.388476531419939e-06, "loss": 16.9235, "step": 30246 }, { "epoch": 0.5528908549180178, "grad_norm": 8.320806545795927, "learning_rate": 4.38818274323068e-06, "loss": 17.8873, "step": 30247 }, { "epoch": 0.5529091341144643, "grad_norm": 7.104302416639451, "learning_rate": 4.387888957185789e-06, "loss": 17.6857, "step": 30248 }, { "epoch": 0.5529274133109109, "grad_norm": 6.447085140218499, "learning_rate": 4.387595173286293e-06, "loss": 17.6231, "step": 30249 }, { "epoch": 0.5529456925073574, "grad_norm": 5.898164768668812, "learning_rate": 4.387301391533222e-06, "loss": 17.2551, "step": 30250 }, { "epoch": 0.5529639717038038, "grad_norm": 6.365802089110502, "learning_rate": 4.387007611927607e-06, "loss": 17.4282, "step": 30251 }, { "epoch": 0.5529822509002504, "grad_norm": 6.006388697691529, "learning_rate": 4.386713834470478e-06, "loss": 17.192, "step": 30252 }, { "epoch": 0.5530005300966969, "grad_norm": 6.85338304150964, "learning_rate": 4.3864200591628625e-06, "loss": 17.4481, "step": 30253 }, { "epoch": 0.5530188092931435, "grad_norm": 6.870322906759602, "learning_rate": 4.386126286005794e-06, "loss": 17.5518, "step": 30254 }, { "epoch": 0.55303708848959, "grad_norm": 5.981111430703467, "learning_rate": 4.385832515000296e-06, "loss": 17.3003, "step": 30255 }, { "epoch": 0.5530553676860365, "grad_norm": 5.665930348236462, "learning_rate": 4.385538746147403e-06, "loss": 16.9893, "step": 30256 }, { "epoch": 0.5530736468824831, "grad_norm": 7.295227666285047, "learning_rate": 4.385244979448145e-06, "loss": 17.2911, "step": 30257 }, { "epoch": 0.5530919260789295, "grad_norm": 6.046858023029833, "learning_rate": 4.384951214903548e-06, "loss": 17.0849, "step": 30258 }, { "epoch": 0.5531102052753761, "grad_norm": 5.917410224271163, "learning_rate": 4.384657452514645e-06, "loss": 17.3002, "step": 30259 }, { "epoch": 0.5531284844718226, "grad_norm": 5.9626603324754175, "learning_rate": 4.384363692282464e-06, "loss": 17.0717, "step": 30260 }, { "epoch": 0.5531467636682691, "grad_norm": 5.72418860858743, "learning_rate": 4.384069934208033e-06, "loss": 17.4669, "step": 30261 }, { "epoch": 0.5531650428647157, "grad_norm": 6.30467172117599, "learning_rate": 4.383776178292386e-06, "loss": 17.5421, "step": 30262 }, { "epoch": 0.5531833220611622, "grad_norm": 7.966081117944276, "learning_rate": 4.383482424536551e-06, "loss": 17.84, "step": 30263 }, { "epoch": 0.5532016012576088, "grad_norm": 6.949565813260994, "learning_rate": 4.383188672941553e-06, "loss": 17.1557, "step": 30264 }, { "epoch": 0.5532198804540552, "grad_norm": 6.433904224450848, "learning_rate": 4.382894923508426e-06, "loss": 17.4429, "step": 30265 }, { "epoch": 0.5532381596505017, "grad_norm": 5.321374126724819, "learning_rate": 4.382601176238201e-06, "loss": 17.1028, "step": 30266 }, { "epoch": 0.5532564388469483, "grad_norm": 7.336172873837642, "learning_rate": 4.382307431131902e-06, "loss": 17.7732, "step": 30267 }, { "epoch": 0.5532747180433948, "grad_norm": 6.055899856268193, "learning_rate": 4.382013688190565e-06, "loss": 17.4818, "step": 30268 }, { "epoch": 0.5532929972398414, "grad_norm": 6.1566353964938525, "learning_rate": 4.381719947415215e-06, "loss": 17.6951, "step": 30269 }, { "epoch": 0.5533112764362879, "grad_norm": 5.4633017195580615, "learning_rate": 4.381426208806882e-06, "loss": 17.2238, "step": 30270 }, { "epoch": 0.5533295556327343, "grad_norm": 6.134461733146662, "learning_rate": 4.381132472366598e-06, "loss": 17.2313, "step": 30271 }, { "epoch": 0.5533478348291809, "grad_norm": 6.769990426813929, "learning_rate": 4.38083873809539e-06, "loss": 17.4319, "step": 30272 }, { "epoch": 0.5533661140256274, "grad_norm": 7.664489691662575, "learning_rate": 4.380545005994289e-06, "loss": 17.7405, "step": 30273 }, { "epoch": 0.553384393222074, "grad_norm": 5.702560233341192, "learning_rate": 4.380251276064325e-06, "loss": 17.2491, "step": 30274 }, { "epoch": 0.5534026724185205, "grad_norm": 6.770899595331074, "learning_rate": 4.379957548306524e-06, "loss": 17.7744, "step": 30275 }, { "epoch": 0.553420951614967, "grad_norm": 7.1376424988965885, "learning_rate": 4.37966382272192e-06, "loss": 17.8623, "step": 30276 }, { "epoch": 0.5534392308114136, "grad_norm": 6.102778081184057, "learning_rate": 4.3793700993115396e-06, "loss": 17.3926, "step": 30277 }, { "epoch": 0.55345751000786, "grad_norm": 6.109637758771485, "learning_rate": 4.379076378076413e-06, "loss": 17.4199, "step": 30278 }, { "epoch": 0.5534757892043066, "grad_norm": 7.030661802553034, "learning_rate": 4.378782659017571e-06, "loss": 17.8899, "step": 30279 }, { "epoch": 0.5534940684007531, "grad_norm": 6.16235198297284, "learning_rate": 4.378488942136042e-06, "loss": 17.3848, "step": 30280 }, { "epoch": 0.5535123475971996, "grad_norm": 8.198914262615212, "learning_rate": 4.378195227432853e-06, "loss": 18.0128, "step": 30281 }, { "epoch": 0.5535306267936462, "grad_norm": 6.813783414193281, "learning_rate": 4.377901514909038e-06, "loss": 17.7419, "step": 30282 }, { "epoch": 0.5535489059900927, "grad_norm": 5.828140998240404, "learning_rate": 4.377607804565624e-06, "loss": 17.2969, "step": 30283 }, { "epoch": 0.5535671851865392, "grad_norm": 6.315315844769342, "learning_rate": 4.377314096403639e-06, "loss": 17.5496, "step": 30284 }, { "epoch": 0.5535854643829857, "grad_norm": 4.987911294645249, "learning_rate": 4.377020390424116e-06, "loss": 16.8573, "step": 30285 }, { "epoch": 0.5536037435794322, "grad_norm": 16.577468706848013, "learning_rate": 4.376726686628081e-06, "loss": 17.7807, "step": 30286 }, { "epoch": 0.5536220227758788, "grad_norm": 7.41044786764492, "learning_rate": 4.376432985016564e-06, "loss": 17.987, "step": 30287 }, { "epoch": 0.5536403019723253, "grad_norm": 5.375695334643534, "learning_rate": 4.376139285590598e-06, "loss": 16.985, "step": 30288 }, { "epoch": 0.5536585811687719, "grad_norm": 6.313366040196504, "learning_rate": 4.375845588351207e-06, "loss": 17.4675, "step": 30289 }, { "epoch": 0.5536768603652183, "grad_norm": 6.85715340793278, "learning_rate": 4.375551893299426e-06, "loss": 17.7346, "step": 30290 }, { "epoch": 0.5536951395616648, "grad_norm": 7.087756254984986, "learning_rate": 4.37525820043628e-06, "loss": 17.83, "step": 30291 }, { "epoch": 0.5537134187581114, "grad_norm": 5.422093143895342, "learning_rate": 4.3749645097627984e-06, "loss": 17.198, "step": 30292 }, { "epoch": 0.5537316979545579, "grad_norm": 6.806823687792971, "learning_rate": 4.3746708212800144e-06, "loss": 17.4154, "step": 30293 }, { "epoch": 0.5537499771510045, "grad_norm": 5.5027362080481925, "learning_rate": 4.3743771349889544e-06, "loss": 17.2116, "step": 30294 }, { "epoch": 0.553768256347451, "grad_norm": 5.704336839235598, "learning_rate": 4.374083450890647e-06, "loss": 17.0469, "step": 30295 }, { "epoch": 0.5537865355438975, "grad_norm": 6.61823883381611, "learning_rate": 4.373789768986123e-06, "loss": 17.4971, "step": 30296 }, { "epoch": 0.553804814740344, "grad_norm": 4.521445091106775, "learning_rate": 4.3734960892764125e-06, "loss": 16.7694, "step": 30297 }, { "epoch": 0.5538230939367905, "grad_norm": 5.991338274401712, "learning_rate": 4.3732024117625425e-06, "loss": 17.2053, "step": 30298 }, { "epoch": 0.5538413731332371, "grad_norm": 7.069635500078585, "learning_rate": 4.372908736445545e-06, "loss": 17.5617, "step": 30299 }, { "epoch": 0.5538596523296836, "grad_norm": 6.360220743939881, "learning_rate": 4.372615063326448e-06, "loss": 17.5486, "step": 30300 }, { "epoch": 0.5538779315261301, "grad_norm": 5.405791526397622, "learning_rate": 4.372321392406279e-06, "loss": 16.9709, "step": 30301 }, { "epoch": 0.5538962107225767, "grad_norm": 5.667728751783411, "learning_rate": 4.372027723686071e-06, "loss": 17.1731, "step": 30302 }, { "epoch": 0.5539144899190231, "grad_norm": 6.757191130227014, "learning_rate": 4.371734057166849e-06, "loss": 17.6579, "step": 30303 }, { "epoch": 0.5539327691154697, "grad_norm": 6.301407621089819, "learning_rate": 4.371440392849647e-06, "loss": 17.3382, "step": 30304 }, { "epoch": 0.5539510483119162, "grad_norm": 6.953617092456889, "learning_rate": 4.371146730735491e-06, "loss": 17.6847, "step": 30305 }, { "epoch": 0.5539693275083627, "grad_norm": 7.287142917768623, "learning_rate": 4.37085307082541e-06, "loss": 17.7371, "step": 30306 }, { "epoch": 0.5539876067048093, "grad_norm": 5.8503870665840125, "learning_rate": 4.370559413120436e-06, "loss": 17.3082, "step": 30307 }, { "epoch": 0.5540058859012558, "grad_norm": 5.550949929908652, "learning_rate": 4.370265757621598e-06, "loss": 17.0426, "step": 30308 }, { "epoch": 0.5540241650977024, "grad_norm": 5.828492032295973, "learning_rate": 4.369972104329921e-06, "loss": 17.3801, "step": 30309 }, { "epoch": 0.5540424442941488, "grad_norm": 6.228563586584846, "learning_rate": 4.369678453246438e-06, "loss": 17.4802, "step": 30310 }, { "epoch": 0.5540607234905953, "grad_norm": 6.200689603149399, "learning_rate": 4.369384804372178e-06, "loss": 17.1955, "step": 30311 }, { "epoch": 0.5540790026870419, "grad_norm": 5.905322592379656, "learning_rate": 4.369091157708168e-06, "loss": 17.2843, "step": 30312 }, { "epoch": 0.5540972818834884, "grad_norm": 7.590702224105861, "learning_rate": 4.368797513255441e-06, "loss": 17.9146, "step": 30313 }, { "epoch": 0.5541155610799349, "grad_norm": 8.308285445367972, "learning_rate": 4.368503871015022e-06, "loss": 18.3459, "step": 30314 }, { "epoch": 0.5541338402763815, "grad_norm": 7.29600765137834, "learning_rate": 4.368210230987942e-06, "loss": 18.069, "step": 30315 }, { "epoch": 0.5541521194728279, "grad_norm": 5.773948172888511, "learning_rate": 4.367916593175232e-06, "loss": 17.1965, "step": 30316 }, { "epoch": 0.5541703986692745, "grad_norm": 7.367777914015997, "learning_rate": 4.3676229575779175e-06, "loss": 17.8529, "step": 30317 }, { "epoch": 0.554188677865721, "grad_norm": 5.106540302907535, "learning_rate": 4.367329324197032e-06, "loss": 17.0493, "step": 30318 }, { "epoch": 0.5542069570621675, "grad_norm": 6.641690426031864, "learning_rate": 4.3670356930336014e-06, "loss": 17.5556, "step": 30319 }, { "epoch": 0.5542252362586141, "grad_norm": 6.044207158840531, "learning_rate": 4.366742064088654e-06, "loss": 17.409, "step": 30320 }, { "epoch": 0.5542435154550606, "grad_norm": 6.159529872064835, "learning_rate": 4.3664484373632235e-06, "loss": 17.1658, "step": 30321 }, { "epoch": 0.5542617946515072, "grad_norm": 8.261730484933404, "learning_rate": 4.3661548128583355e-06, "loss": 17.7314, "step": 30322 }, { "epoch": 0.5542800738479536, "grad_norm": 6.608101776928486, "learning_rate": 4.3658611905750185e-06, "loss": 17.3932, "step": 30323 }, { "epoch": 0.5542983530444001, "grad_norm": 6.38645509996841, "learning_rate": 4.365567570514303e-06, "loss": 17.1476, "step": 30324 }, { "epoch": 0.5543166322408467, "grad_norm": 7.281998597193281, "learning_rate": 4.3652739526772205e-06, "loss": 17.6203, "step": 30325 }, { "epoch": 0.5543349114372932, "grad_norm": 5.840415658040937, "learning_rate": 4.364980337064795e-06, "loss": 17.5128, "step": 30326 }, { "epoch": 0.5543531906337398, "grad_norm": 5.298643323976937, "learning_rate": 4.36468672367806e-06, "loss": 17.1351, "step": 30327 }, { "epoch": 0.5543714698301863, "grad_norm": 5.052684642508309, "learning_rate": 4.3643931125180425e-06, "loss": 17.0609, "step": 30328 }, { "epoch": 0.5543897490266327, "grad_norm": 6.561321726630394, "learning_rate": 4.364099503585771e-06, "loss": 17.3506, "step": 30329 }, { "epoch": 0.5544080282230793, "grad_norm": 6.123437658798484, "learning_rate": 4.363805896882278e-06, "loss": 17.2921, "step": 30330 }, { "epoch": 0.5544263074195258, "grad_norm": 8.339512494710691, "learning_rate": 4.363512292408589e-06, "loss": 18.415, "step": 30331 }, { "epoch": 0.5544445866159724, "grad_norm": 6.606970244336006, "learning_rate": 4.363218690165733e-06, "loss": 17.4348, "step": 30332 }, { "epoch": 0.5544628658124189, "grad_norm": 6.329948945585482, "learning_rate": 4.3629250901547406e-06, "loss": 17.626, "step": 30333 }, { "epoch": 0.5544811450088654, "grad_norm": 5.530115648521457, "learning_rate": 4.36263149237664e-06, "loss": 16.981, "step": 30334 }, { "epoch": 0.554499424205312, "grad_norm": 7.644986382304356, "learning_rate": 4.362337896832462e-06, "loss": 17.9082, "step": 30335 }, { "epoch": 0.5545177034017584, "grad_norm": 6.101723694127275, "learning_rate": 4.362044303523235e-06, "loss": 17.2786, "step": 30336 }, { "epoch": 0.554535982598205, "grad_norm": 7.591131643693746, "learning_rate": 4.361750712449986e-06, "loss": 18.0084, "step": 30337 }, { "epoch": 0.5545542617946515, "grad_norm": 5.607980575279896, "learning_rate": 4.361457123613745e-06, "loss": 17.3101, "step": 30338 }, { "epoch": 0.554572540991098, "grad_norm": 8.300865561812167, "learning_rate": 4.361163537015542e-06, "loss": 18.4454, "step": 30339 }, { "epoch": 0.5545908201875446, "grad_norm": 6.225652251713139, "learning_rate": 4.360869952656404e-06, "loss": 17.7608, "step": 30340 }, { "epoch": 0.554609099383991, "grad_norm": 5.648584092924264, "learning_rate": 4.360576370537364e-06, "loss": 17.0988, "step": 30341 }, { "epoch": 0.5546273785804376, "grad_norm": 5.94729778539228, "learning_rate": 4.360282790659447e-06, "loss": 17.2462, "step": 30342 }, { "epoch": 0.5546456577768841, "grad_norm": 5.766770304503278, "learning_rate": 4.3599892130236825e-06, "loss": 17.3529, "step": 30343 }, { "epoch": 0.5546639369733306, "grad_norm": 6.1864229405636255, "learning_rate": 4.359695637631102e-06, "loss": 17.314, "step": 30344 }, { "epoch": 0.5546822161697772, "grad_norm": 6.667065221710332, "learning_rate": 4.3594020644827325e-06, "loss": 17.5803, "step": 30345 }, { "epoch": 0.5547004953662237, "grad_norm": 6.973554292934087, "learning_rate": 4.3591084935796015e-06, "loss": 17.6976, "step": 30346 }, { "epoch": 0.5547187745626703, "grad_norm": 5.110075058257313, "learning_rate": 4.358814924922742e-06, "loss": 16.8784, "step": 30347 }, { "epoch": 0.5547370537591167, "grad_norm": 6.506216539079061, "learning_rate": 4.358521358513177e-06, "loss": 17.5179, "step": 30348 }, { "epoch": 0.5547553329555632, "grad_norm": 6.222224056219645, "learning_rate": 4.358227794351942e-06, "loss": 17.3263, "step": 30349 }, { "epoch": 0.5547736121520098, "grad_norm": 6.8306169805787365, "learning_rate": 4.357934232440062e-06, "loss": 17.3949, "step": 30350 }, { "epoch": 0.5547918913484563, "grad_norm": 6.439195934577927, "learning_rate": 4.357640672778567e-06, "loss": 17.2626, "step": 30351 }, { "epoch": 0.5548101705449029, "grad_norm": 8.271904233799688, "learning_rate": 4.357347115368485e-06, "loss": 17.8624, "step": 30352 }, { "epoch": 0.5548284497413494, "grad_norm": 6.6369110602168, "learning_rate": 4.357053560210848e-06, "loss": 17.7727, "step": 30353 }, { "epoch": 0.5548467289377959, "grad_norm": 5.199433077804251, "learning_rate": 4.356760007306679e-06, "loss": 16.9715, "step": 30354 }, { "epoch": 0.5548650081342424, "grad_norm": 6.375087032103942, "learning_rate": 4.356466456657012e-06, "loss": 17.3064, "step": 30355 }, { "epoch": 0.5548832873306889, "grad_norm": 6.658227783544365, "learning_rate": 4.356172908262875e-06, "loss": 17.6765, "step": 30356 }, { "epoch": 0.5549015665271355, "grad_norm": 7.194560066016349, "learning_rate": 4.355879362125294e-06, "loss": 17.3674, "step": 30357 }, { "epoch": 0.554919845723582, "grad_norm": 5.691292600185158, "learning_rate": 4.355585818245303e-06, "loss": 16.9299, "step": 30358 }, { "epoch": 0.5549381249200285, "grad_norm": 6.641633994517181, "learning_rate": 4.355292276623926e-06, "loss": 17.6198, "step": 30359 }, { "epoch": 0.5549564041164751, "grad_norm": 6.493268331591499, "learning_rate": 4.354998737262192e-06, "loss": 17.3888, "step": 30360 }, { "epoch": 0.5549746833129215, "grad_norm": 6.685690377222298, "learning_rate": 4.354705200161134e-06, "loss": 17.1993, "step": 30361 }, { "epoch": 0.5549929625093681, "grad_norm": 7.008107717220254, "learning_rate": 4.354411665321778e-06, "loss": 18.0965, "step": 30362 }, { "epoch": 0.5550112417058146, "grad_norm": 6.721315390869566, "learning_rate": 4.354118132745152e-06, "loss": 17.601, "step": 30363 }, { "epoch": 0.5550295209022611, "grad_norm": 6.678764887609802, "learning_rate": 4.3538246024322865e-06, "loss": 17.5931, "step": 30364 }, { "epoch": 0.5550478000987077, "grad_norm": 5.793003180777682, "learning_rate": 4.353531074384208e-06, "loss": 17.1015, "step": 30365 }, { "epoch": 0.5550660792951542, "grad_norm": 4.9525215940738585, "learning_rate": 4.353237548601949e-06, "loss": 16.9716, "step": 30366 }, { "epoch": 0.5550843584916008, "grad_norm": 7.411039946434502, "learning_rate": 4.352944025086537e-06, "loss": 17.9773, "step": 30367 }, { "epoch": 0.5551026376880472, "grad_norm": 6.017241744885814, "learning_rate": 4.352650503838998e-06, "loss": 17.4964, "step": 30368 }, { "epoch": 0.5551209168844937, "grad_norm": 6.469002140528921, "learning_rate": 4.352356984860363e-06, "loss": 17.4043, "step": 30369 }, { "epoch": 0.5551391960809403, "grad_norm": 7.41769403890821, "learning_rate": 4.352063468151662e-06, "loss": 17.8154, "step": 30370 }, { "epoch": 0.5551574752773868, "grad_norm": 6.877648358798637, "learning_rate": 4.351769953713921e-06, "loss": 17.8969, "step": 30371 }, { "epoch": 0.5551757544738334, "grad_norm": 6.998627690582557, "learning_rate": 4.351476441548172e-06, "loss": 17.7696, "step": 30372 }, { "epoch": 0.5551940336702799, "grad_norm": 7.090493132136382, "learning_rate": 4.35118293165544e-06, "loss": 17.5699, "step": 30373 }, { "epoch": 0.5552123128667263, "grad_norm": 5.812297805741085, "learning_rate": 4.350889424036755e-06, "loss": 17.0902, "step": 30374 }, { "epoch": 0.5552305920631729, "grad_norm": 6.3790401079495584, "learning_rate": 4.3505959186931475e-06, "loss": 17.5394, "step": 30375 }, { "epoch": 0.5552488712596194, "grad_norm": 5.063170833445275, "learning_rate": 4.350302415625646e-06, "loss": 17.06, "step": 30376 }, { "epoch": 0.555267150456066, "grad_norm": 6.298257367528082, "learning_rate": 4.350008914835276e-06, "loss": 17.4815, "step": 30377 }, { "epoch": 0.5552854296525125, "grad_norm": 7.825339613834134, "learning_rate": 4.3497154163230694e-06, "loss": 18.0031, "step": 30378 }, { "epoch": 0.555303708848959, "grad_norm": 6.997685148404749, "learning_rate": 4.3494219200900524e-06, "loss": 17.806, "step": 30379 }, { "epoch": 0.5553219880454056, "grad_norm": 6.282657348364097, "learning_rate": 4.349128426137257e-06, "loss": 17.1973, "step": 30380 }, { "epoch": 0.555340267241852, "grad_norm": 6.106846184281818, "learning_rate": 4.34883493446571e-06, "loss": 17.4181, "step": 30381 }, { "epoch": 0.5553585464382985, "grad_norm": 6.964246439915261, "learning_rate": 4.348541445076438e-06, "loss": 17.5849, "step": 30382 }, { "epoch": 0.5553768256347451, "grad_norm": 7.283050590665498, "learning_rate": 4.348247957970473e-06, "loss": 18.1505, "step": 30383 }, { "epoch": 0.5553951048311916, "grad_norm": 5.892949177316248, "learning_rate": 4.347954473148844e-06, "loss": 17.0135, "step": 30384 }, { "epoch": 0.5554133840276382, "grad_norm": 6.129741921511846, "learning_rate": 4.347660990612575e-06, "loss": 17.5324, "step": 30385 }, { "epoch": 0.5554316632240847, "grad_norm": 8.390329790070275, "learning_rate": 4.3473675103627e-06, "loss": 18.1553, "step": 30386 }, { "epoch": 0.5554499424205311, "grad_norm": 6.061017148581379, "learning_rate": 4.347074032400244e-06, "loss": 17.4678, "step": 30387 }, { "epoch": 0.5554682216169777, "grad_norm": 6.65152864252743, "learning_rate": 4.346780556726236e-06, "loss": 17.69, "step": 30388 }, { "epoch": 0.5554865008134242, "grad_norm": 5.788701319658049, "learning_rate": 4.346487083341708e-06, "loss": 17.2928, "step": 30389 }, { "epoch": 0.5555047800098708, "grad_norm": 5.86063270319099, "learning_rate": 4.346193612247685e-06, "loss": 17.258, "step": 30390 }, { "epoch": 0.5555230592063173, "grad_norm": 6.151518159440682, "learning_rate": 4.345900143445195e-06, "loss": 17.4509, "step": 30391 }, { "epoch": 0.5555413384027638, "grad_norm": 6.145568809117302, "learning_rate": 4.34560667693527e-06, "loss": 17.6411, "step": 30392 }, { "epoch": 0.5555596175992104, "grad_norm": 6.833072302760803, "learning_rate": 4.345313212718937e-06, "loss": 17.7711, "step": 30393 }, { "epoch": 0.5555778967956568, "grad_norm": 5.8774261768338985, "learning_rate": 4.345019750797222e-06, "loss": 17.2889, "step": 30394 }, { "epoch": 0.5555961759921034, "grad_norm": 8.42129123816926, "learning_rate": 4.34472629117116e-06, "loss": 17.5274, "step": 30395 }, { "epoch": 0.5556144551885499, "grad_norm": 9.248557785605156, "learning_rate": 4.3444328338417714e-06, "loss": 18.4174, "step": 30396 }, { "epoch": 0.5556327343849964, "grad_norm": 6.20179996984403, "learning_rate": 4.34413937881009e-06, "loss": 17.4461, "step": 30397 }, { "epoch": 0.555651013581443, "grad_norm": 6.500946897099194, "learning_rate": 4.343845926077145e-06, "loss": 17.2593, "step": 30398 }, { "epoch": 0.5556692927778895, "grad_norm": 7.170810700216358, "learning_rate": 4.343552475643961e-06, "loss": 17.8651, "step": 30399 }, { "epoch": 0.555687571974336, "grad_norm": 6.203500191291366, "learning_rate": 4.34325902751157e-06, "loss": 17.3082, "step": 30400 }, { "epoch": 0.5557058511707825, "grad_norm": 6.7718468691423395, "learning_rate": 4.3429655816809985e-06, "loss": 17.7414, "step": 30401 }, { "epoch": 0.555724130367229, "grad_norm": 5.051788945037206, "learning_rate": 4.342672138153274e-06, "loss": 16.9813, "step": 30402 }, { "epoch": 0.5557424095636756, "grad_norm": 5.42132094671289, "learning_rate": 4.3423786969294306e-06, "loss": 17.2227, "step": 30403 }, { "epoch": 0.5557606887601221, "grad_norm": 7.914395924913029, "learning_rate": 4.34208525801049e-06, "loss": 17.7246, "step": 30404 }, { "epoch": 0.5557789679565687, "grad_norm": 5.5429919082959564, "learning_rate": 4.341791821397483e-06, "loss": 17.0709, "step": 30405 }, { "epoch": 0.5557972471530152, "grad_norm": 7.616172061944812, "learning_rate": 4.34149838709144e-06, "loss": 17.8061, "step": 30406 }, { "epoch": 0.5558155263494616, "grad_norm": 6.303538568073775, "learning_rate": 4.341204955093389e-06, "loss": 17.4164, "step": 30407 }, { "epoch": 0.5558338055459082, "grad_norm": 5.554605208275994, "learning_rate": 4.3409115254043545e-06, "loss": 16.9885, "step": 30408 }, { "epoch": 0.5558520847423547, "grad_norm": 6.8367808744871335, "learning_rate": 4.340618098025369e-06, "loss": 17.4525, "step": 30409 }, { "epoch": 0.5558703639388013, "grad_norm": 5.840189313233061, "learning_rate": 4.34032467295746e-06, "loss": 17.2027, "step": 30410 }, { "epoch": 0.5558886431352478, "grad_norm": 7.0493043407781135, "learning_rate": 4.340031250201656e-06, "loss": 17.6386, "step": 30411 }, { "epoch": 0.5559069223316943, "grad_norm": 7.041261721046854, "learning_rate": 4.3397378297589865e-06, "loss": 17.559, "step": 30412 }, { "epoch": 0.5559252015281408, "grad_norm": 7.530976874809889, "learning_rate": 4.339444411630476e-06, "loss": 18.0247, "step": 30413 }, { "epoch": 0.5559434807245873, "grad_norm": 8.262316504863977, "learning_rate": 4.3391509958171565e-06, "loss": 18.1432, "step": 30414 }, { "epoch": 0.5559617599210339, "grad_norm": 6.708545038654154, "learning_rate": 4.338857582320057e-06, "loss": 17.2047, "step": 30415 }, { "epoch": 0.5559800391174804, "grad_norm": 7.288689820357164, "learning_rate": 4.338564171140202e-06, "loss": 18.0448, "step": 30416 }, { "epoch": 0.5559983183139269, "grad_norm": 5.918358423172568, "learning_rate": 4.338270762278624e-06, "loss": 17.3723, "step": 30417 }, { "epoch": 0.5560165975103735, "grad_norm": 5.6841498698497, "learning_rate": 4.337977355736349e-06, "loss": 17.0706, "step": 30418 }, { "epoch": 0.55603487670682, "grad_norm": 6.729714669196704, "learning_rate": 4.337683951514404e-06, "loss": 17.4918, "step": 30419 }, { "epoch": 0.5560531559032665, "grad_norm": 5.793990052595531, "learning_rate": 4.3373905496138224e-06, "loss": 17.2761, "step": 30420 }, { "epoch": 0.556071435099713, "grad_norm": 8.21710851231549, "learning_rate": 4.337097150035629e-06, "loss": 17.8107, "step": 30421 }, { "epoch": 0.5560897142961595, "grad_norm": 7.452042060126598, "learning_rate": 4.336803752780851e-06, "loss": 17.6964, "step": 30422 }, { "epoch": 0.5561079934926061, "grad_norm": 6.82883603295809, "learning_rate": 4.336510357850519e-06, "loss": 17.4177, "step": 30423 }, { "epoch": 0.5561262726890526, "grad_norm": 6.460148034649363, "learning_rate": 4.336216965245661e-06, "loss": 17.3461, "step": 30424 }, { "epoch": 0.5561445518854992, "grad_norm": 5.934930699225166, "learning_rate": 4.335923574967304e-06, "loss": 17.3244, "step": 30425 }, { "epoch": 0.5561628310819456, "grad_norm": 6.220845026375947, "learning_rate": 4.335630187016478e-06, "loss": 17.5716, "step": 30426 }, { "epoch": 0.5561811102783921, "grad_norm": 5.087148970252278, "learning_rate": 4.335336801394209e-06, "loss": 17.0099, "step": 30427 }, { "epoch": 0.5561993894748387, "grad_norm": 6.1591289818507535, "learning_rate": 4.335043418101528e-06, "loss": 17.1414, "step": 30428 }, { "epoch": 0.5562176686712852, "grad_norm": 6.528134952646109, "learning_rate": 4.334750037139463e-06, "loss": 17.2288, "step": 30429 }, { "epoch": 0.5562359478677318, "grad_norm": 6.021892213965534, "learning_rate": 4.3344566585090396e-06, "loss": 17.33, "step": 30430 }, { "epoch": 0.5562542270641783, "grad_norm": 5.951091545581845, "learning_rate": 4.33416328221129e-06, "loss": 17.6398, "step": 30431 }, { "epoch": 0.5562725062606247, "grad_norm": 6.206771847494303, "learning_rate": 4.333869908247239e-06, "loss": 17.46, "step": 30432 }, { "epoch": 0.5562907854570713, "grad_norm": 7.383710327797552, "learning_rate": 4.333576536617915e-06, "loss": 17.9707, "step": 30433 }, { "epoch": 0.5563090646535178, "grad_norm": 7.09174286890949, "learning_rate": 4.333283167324349e-06, "loss": 17.4465, "step": 30434 }, { "epoch": 0.5563273438499644, "grad_norm": 5.803126542745713, "learning_rate": 4.332989800367569e-06, "loss": 17.182, "step": 30435 }, { "epoch": 0.5563456230464109, "grad_norm": 5.983659165633793, "learning_rate": 4.332696435748599e-06, "loss": 17.2472, "step": 30436 }, { "epoch": 0.5563639022428574, "grad_norm": 5.3088628672849145, "learning_rate": 4.332403073468472e-06, "loss": 17.0136, "step": 30437 }, { "epoch": 0.556382181439304, "grad_norm": 6.381575394229691, "learning_rate": 4.332109713528214e-06, "loss": 17.3745, "step": 30438 }, { "epoch": 0.5564004606357504, "grad_norm": 5.810134741091684, "learning_rate": 4.331816355928852e-06, "loss": 17.4513, "step": 30439 }, { "epoch": 0.556418739832197, "grad_norm": 6.285215406310569, "learning_rate": 4.331523000671418e-06, "loss": 17.4766, "step": 30440 }, { "epoch": 0.5564370190286435, "grad_norm": 6.428135712639924, "learning_rate": 4.3312296477569355e-06, "loss": 17.4456, "step": 30441 }, { "epoch": 0.55645529822509, "grad_norm": 6.765451418087163, "learning_rate": 4.330936297186436e-06, "loss": 17.4471, "step": 30442 }, { "epoch": 0.5564735774215366, "grad_norm": 6.457964766754927, "learning_rate": 4.330642948960948e-06, "loss": 17.1484, "step": 30443 }, { "epoch": 0.5564918566179831, "grad_norm": 8.4854233268427, "learning_rate": 4.330349603081496e-06, "loss": 17.8472, "step": 30444 }, { "epoch": 0.5565101358144297, "grad_norm": 6.083741673415645, "learning_rate": 4.330056259549113e-06, "loss": 17.3949, "step": 30445 }, { "epoch": 0.5565284150108761, "grad_norm": 5.562488305265378, "learning_rate": 4.329762918364824e-06, "loss": 17.0113, "step": 30446 }, { "epoch": 0.5565466942073226, "grad_norm": 6.290429605414269, "learning_rate": 4.329469579529656e-06, "loss": 17.459, "step": 30447 }, { "epoch": 0.5565649734037692, "grad_norm": 6.010613599110941, "learning_rate": 4.329176243044641e-06, "loss": 17.1913, "step": 30448 }, { "epoch": 0.5565832526002157, "grad_norm": 6.170446971981488, "learning_rate": 4.328882908910806e-06, "loss": 17.2604, "step": 30449 }, { "epoch": 0.5566015317966622, "grad_norm": 7.330630287856471, "learning_rate": 4.328589577129174e-06, "loss": 17.8939, "step": 30450 }, { "epoch": 0.5566198109931088, "grad_norm": 8.82309940573492, "learning_rate": 4.328296247700781e-06, "loss": 18.5188, "step": 30451 }, { "epoch": 0.5566380901895552, "grad_norm": 6.36782458752519, "learning_rate": 4.3280029206266505e-06, "loss": 17.709, "step": 30452 }, { "epoch": 0.5566563693860018, "grad_norm": 7.649302179097327, "learning_rate": 4.3277095959078095e-06, "loss": 18.1088, "step": 30453 }, { "epoch": 0.5566746485824483, "grad_norm": 6.505882226297809, "learning_rate": 4.327416273545291e-06, "loss": 17.7007, "step": 30454 }, { "epoch": 0.5566929277788948, "grad_norm": 6.3447793549733, "learning_rate": 4.327122953540119e-06, "loss": 17.745, "step": 30455 }, { "epoch": 0.5567112069753414, "grad_norm": 5.374332297333821, "learning_rate": 4.326829635893321e-06, "loss": 17.1169, "step": 30456 }, { "epoch": 0.5567294861717879, "grad_norm": 6.112564675461189, "learning_rate": 4.326536320605929e-06, "loss": 17.4123, "step": 30457 }, { "epoch": 0.5567477653682344, "grad_norm": 6.933923191275106, "learning_rate": 4.326243007678967e-06, "loss": 17.4363, "step": 30458 }, { "epoch": 0.5567660445646809, "grad_norm": 5.748282197141193, "learning_rate": 4.3259496971134665e-06, "loss": 17.2313, "step": 30459 }, { "epoch": 0.5567843237611274, "grad_norm": 5.6929904814018615, "learning_rate": 4.325656388910453e-06, "loss": 17.3818, "step": 30460 }, { "epoch": 0.556802602957574, "grad_norm": 6.839079155119191, "learning_rate": 4.3253630830709536e-06, "loss": 17.7482, "step": 30461 }, { "epoch": 0.5568208821540205, "grad_norm": 6.4524928659566045, "learning_rate": 4.325069779596001e-06, "loss": 17.4841, "step": 30462 }, { "epoch": 0.5568391613504671, "grad_norm": 6.999807447739517, "learning_rate": 4.32477647848662e-06, "loss": 17.7641, "step": 30463 }, { "epoch": 0.5568574405469136, "grad_norm": 6.58985841678248, "learning_rate": 4.324483179743837e-06, "loss": 17.6112, "step": 30464 }, { "epoch": 0.55687571974336, "grad_norm": 5.79416024440866, "learning_rate": 4.324189883368683e-06, "loss": 17.4712, "step": 30465 }, { "epoch": 0.5568939989398066, "grad_norm": 5.401814240582548, "learning_rate": 4.323896589362185e-06, "loss": 17.1213, "step": 30466 }, { "epoch": 0.5569122781362531, "grad_norm": 7.221088939089592, "learning_rate": 4.3236032977253685e-06, "loss": 17.7333, "step": 30467 }, { "epoch": 0.5569305573326997, "grad_norm": 5.425569742545539, "learning_rate": 4.323310008459267e-06, "loss": 17.1122, "step": 30468 }, { "epoch": 0.5569488365291462, "grad_norm": 7.46283208007708, "learning_rate": 4.323016721564904e-06, "loss": 17.9097, "step": 30469 }, { "epoch": 0.5569671157255927, "grad_norm": 7.268001674054693, "learning_rate": 4.322723437043307e-06, "loss": 18.1289, "step": 30470 }, { "epoch": 0.5569853949220392, "grad_norm": 7.3268589312366, "learning_rate": 4.322430154895508e-06, "loss": 17.9121, "step": 30471 }, { "epoch": 0.5570036741184857, "grad_norm": 6.52818922559969, "learning_rate": 4.32213687512253e-06, "loss": 17.4729, "step": 30472 }, { "epoch": 0.5570219533149323, "grad_norm": 6.363529779368284, "learning_rate": 4.321843597725406e-06, "loss": 17.3301, "step": 30473 }, { "epoch": 0.5570402325113788, "grad_norm": 9.558005541213715, "learning_rate": 4.32155032270516e-06, "loss": 17.6815, "step": 30474 }, { "epoch": 0.5570585117078253, "grad_norm": 5.507218736966377, "learning_rate": 4.3212570500628205e-06, "loss": 17.2874, "step": 30475 }, { "epoch": 0.5570767909042719, "grad_norm": 6.3357164273700075, "learning_rate": 4.3209637797994185e-06, "loss": 17.167, "step": 30476 }, { "epoch": 0.5570950701007183, "grad_norm": 6.609225336853062, "learning_rate": 4.3206705119159775e-06, "loss": 17.5503, "step": 30477 }, { "epoch": 0.5571133492971649, "grad_norm": 5.3027631714432735, "learning_rate": 4.320377246413528e-06, "loss": 17.0642, "step": 30478 }, { "epoch": 0.5571316284936114, "grad_norm": 5.768566976563361, "learning_rate": 4.3200839832930975e-06, "loss": 17.1471, "step": 30479 }, { "epoch": 0.5571499076900579, "grad_norm": 6.089440461554675, "learning_rate": 4.319790722555714e-06, "loss": 17.7009, "step": 30480 }, { "epoch": 0.5571681868865045, "grad_norm": 5.376399751145817, "learning_rate": 4.319497464202404e-06, "loss": 17.1796, "step": 30481 }, { "epoch": 0.557186466082951, "grad_norm": 5.040708031245319, "learning_rate": 4.319204208234197e-06, "loss": 17.0289, "step": 30482 }, { "epoch": 0.5572047452793976, "grad_norm": 6.450822923765933, "learning_rate": 4.3189109546521205e-06, "loss": 17.2443, "step": 30483 }, { "epoch": 0.557223024475844, "grad_norm": 6.294083200378454, "learning_rate": 4.3186177034572e-06, "loss": 17.5673, "step": 30484 }, { "epoch": 0.5572413036722905, "grad_norm": 6.844827141897676, "learning_rate": 4.3183244546504686e-06, "loss": 17.6895, "step": 30485 }, { "epoch": 0.5572595828687371, "grad_norm": 7.196905469086906, "learning_rate": 4.318031208232948e-06, "loss": 17.4975, "step": 30486 }, { "epoch": 0.5572778620651836, "grad_norm": 6.001542344770934, "learning_rate": 4.317737964205669e-06, "loss": 17.4349, "step": 30487 }, { "epoch": 0.5572961412616302, "grad_norm": 6.737259899369198, "learning_rate": 4.317444722569662e-06, "loss": 17.5539, "step": 30488 }, { "epoch": 0.5573144204580767, "grad_norm": 6.095173095650095, "learning_rate": 4.317151483325949e-06, "loss": 17.3335, "step": 30489 }, { "epoch": 0.5573326996545231, "grad_norm": 5.950432020815349, "learning_rate": 4.316858246475563e-06, "loss": 17.5926, "step": 30490 }, { "epoch": 0.5573509788509697, "grad_norm": 5.374414522191878, "learning_rate": 4.3165650120195275e-06, "loss": 17.1506, "step": 30491 }, { "epoch": 0.5573692580474162, "grad_norm": 5.597963813066823, "learning_rate": 4.3162717799588725e-06, "loss": 17.2211, "step": 30492 }, { "epoch": 0.5573875372438628, "grad_norm": 6.64160238814545, "learning_rate": 4.315978550294627e-06, "loss": 17.5854, "step": 30493 }, { "epoch": 0.5574058164403093, "grad_norm": 6.3309568630323705, "learning_rate": 4.3156853230278185e-06, "loss": 17.5123, "step": 30494 }, { "epoch": 0.5574240956367558, "grad_norm": 6.49216006777784, "learning_rate": 4.31539209815947e-06, "loss": 17.6975, "step": 30495 }, { "epoch": 0.5574423748332024, "grad_norm": 6.81650690016272, "learning_rate": 4.315098875690615e-06, "loss": 17.5142, "step": 30496 }, { "epoch": 0.5574606540296488, "grad_norm": 5.372309198292457, "learning_rate": 4.314805655622279e-06, "loss": 17.0638, "step": 30497 }, { "epoch": 0.5574789332260954, "grad_norm": 6.643024012989598, "learning_rate": 4.314512437955488e-06, "loss": 17.4579, "step": 30498 }, { "epoch": 0.5574972124225419, "grad_norm": 6.018181195780188, "learning_rate": 4.3142192226912735e-06, "loss": 17.1981, "step": 30499 }, { "epoch": 0.5575154916189884, "grad_norm": 6.146475037020606, "learning_rate": 4.313926009830661e-06, "loss": 17.3385, "step": 30500 }, { "epoch": 0.557533770815435, "grad_norm": 5.957416343716959, "learning_rate": 4.313632799374676e-06, "loss": 17.6676, "step": 30501 }, { "epoch": 0.5575520500118815, "grad_norm": 6.740620229585802, "learning_rate": 4.313339591324352e-06, "loss": 17.6999, "step": 30502 }, { "epoch": 0.557570329208328, "grad_norm": 5.52835237209412, "learning_rate": 4.31304638568071e-06, "loss": 16.9846, "step": 30503 }, { "epoch": 0.5575886084047745, "grad_norm": 6.997753205580319, "learning_rate": 4.312753182444782e-06, "loss": 17.8266, "step": 30504 }, { "epoch": 0.557606887601221, "grad_norm": 6.155297987399454, "learning_rate": 4.312459981617595e-06, "loss": 17.2637, "step": 30505 }, { "epoch": 0.5576251667976676, "grad_norm": 7.183433563422187, "learning_rate": 4.3121667832001745e-06, "loss": 17.5522, "step": 30506 }, { "epoch": 0.5576434459941141, "grad_norm": 5.445849464333626, "learning_rate": 4.311873587193552e-06, "loss": 17.0326, "step": 30507 }, { "epoch": 0.5576617251905607, "grad_norm": 7.629748305209861, "learning_rate": 4.311580393598753e-06, "loss": 17.9976, "step": 30508 }, { "epoch": 0.5576800043870072, "grad_norm": 8.115644494368482, "learning_rate": 4.3112872024168025e-06, "loss": 17.9756, "step": 30509 }, { "epoch": 0.5576982835834536, "grad_norm": 6.011341361379338, "learning_rate": 4.310994013648732e-06, "loss": 17.5504, "step": 30510 }, { "epoch": 0.5577165627799002, "grad_norm": 5.758654560711288, "learning_rate": 4.310700827295568e-06, "loss": 17.1234, "step": 30511 }, { "epoch": 0.5577348419763467, "grad_norm": 6.49719380903847, "learning_rate": 4.310407643358336e-06, "loss": 17.6644, "step": 30512 }, { "epoch": 0.5577531211727933, "grad_norm": 6.2608441311807495, "learning_rate": 4.310114461838067e-06, "loss": 17.4377, "step": 30513 }, { "epoch": 0.5577714003692398, "grad_norm": 6.601611089858817, "learning_rate": 4.309821282735787e-06, "loss": 17.4858, "step": 30514 }, { "epoch": 0.5577896795656863, "grad_norm": 6.378631470841042, "learning_rate": 4.309528106052522e-06, "loss": 17.6276, "step": 30515 }, { "epoch": 0.5578079587621328, "grad_norm": 5.244913971274306, "learning_rate": 4.309234931789303e-06, "loss": 17.0545, "step": 30516 }, { "epoch": 0.5578262379585793, "grad_norm": 5.923544563414477, "learning_rate": 4.308941759947154e-06, "loss": 17.191, "step": 30517 }, { "epoch": 0.5578445171550258, "grad_norm": 5.899250874851183, "learning_rate": 4.308648590527106e-06, "loss": 17.3904, "step": 30518 }, { "epoch": 0.5578627963514724, "grad_norm": 5.075639858724824, "learning_rate": 4.308355423530183e-06, "loss": 16.8944, "step": 30519 }, { "epoch": 0.5578810755479189, "grad_norm": 4.672712675562612, "learning_rate": 4.308062258957414e-06, "loss": 16.8131, "step": 30520 }, { "epoch": 0.5578993547443655, "grad_norm": 6.954179219962362, "learning_rate": 4.307769096809829e-06, "loss": 17.0966, "step": 30521 }, { "epoch": 0.557917633940812, "grad_norm": 7.255697774859989, "learning_rate": 4.307475937088452e-06, "loss": 17.6743, "step": 30522 }, { "epoch": 0.5579359131372584, "grad_norm": 5.47905844598502, "learning_rate": 4.307182779794311e-06, "loss": 17.1347, "step": 30523 }, { "epoch": 0.557954192333705, "grad_norm": 5.202368372659445, "learning_rate": 4.306889624928435e-06, "loss": 17.0066, "step": 30524 }, { "epoch": 0.5579724715301515, "grad_norm": 6.2287559979929075, "learning_rate": 4.306596472491852e-06, "loss": 17.3737, "step": 30525 }, { "epoch": 0.5579907507265981, "grad_norm": 6.0554948085724005, "learning_rate": 4.306303322485585e-06, "loss": 17.3078, "step": 30526 }, { "epoch": 0.5580090299230446, "grad_norm": 6.100143841087003, "learning_rate": 4.306010174910668e-06, "loss": 17.433, "step": 30527 }, { "epoch": 0.558027309119491, "grad_norm": 6.85694105545012, "learning_rate": 4.3057170297681225e-06, "loss": 17.6443, "step": 30528 }, { "epoch": 0.5580455883159376, "grad_norm": 5.920178973039488, "learning_rate": 4.305423887058979e-06, "loss": 17.325, "step": 30529 }, { "epoch": 0.5580638675123841, "grad_norm": 6.718897324755794, "learning_rate": 4.305130746784266e-06, "loss": 17.3836, "step": 30530 }, { "epoch": 0.5580821467088307, "grad_norm": 6.896615802985753, "learning_rate": 4.304837608945008e-06, "loss": 17.7411, "step": 30531 }, { "epoch": 0.5581004259052772, "grad_norm": 6.457984507342063, "learning_rate": 4.304544473542235e-06, "loss": 17.5626, "step": 30532 }, { "epoch": 0.5581187051017237, "grad_norm": 5.108326418235577, "learning_rate": 4.304251340576972e-06, "loss": 17.1178, "step": 30533 }, { "epoch": 0.5581369842981703, "grad_norm": 7.404844220836865, "learning_rate": 4.303958210050247e-06, "loss": 18.0502, "step": 30534 }, { "epoch": 0.5581552634946167, "grad_norm": 6.50943514086858, "learning_rate": 4.303665081963091e-06, "loss": 17.5748, "step": 30535 }, { "epoch": 0.5581735426910633, "grad_norm": 5.926690057418534, "learning_rate": 4.3033719563165266e-06, "loss": 17.1652, "step": 30536 }, { "epoch": 0.5581918218875098, "grad_norm": 6.755311409686412, "learning_rate": 4.303078833111584e-06, "loss": 17.6263, "step": 30537 }, { "epoch": 0.5582101010839563, "grad_norm": 5.652406210385939, "learning_rate": 4.302785712349288e-06, "loss": 17.2435, "step": 30538 }, { "epoch": 0.5582283802804029, "grad_norm": 7.144795235758998, "learning_rate": 4.30249259403067e-06, "loss": 17.6384, "step": 30539 }, { "epoch": 0.5582466594768494, "grad_norm": 5.302457435764489, "learning_rate": 4.302199478156752e-06, "loss": 16.9049, "step": 30540 }, { "epoch": 0.558264938673296, "grad_norm": 5.796597749259299, "learning_rate": 4.301906364728566e-06, "loss": 17.0901, "step": 30541 }, { "epoch": 0.5582832178697424, "grad_norm": 5.755158835394284, "learning_rate": 4.301613253747138e-06, "loss": 17.3498, "step": 30542 }, { "epoch": 0.5583014970661889, "grad_norm": 6.645538422007744, "learning_rate": 4.301320145213493e-06, "loss": 17.1182, "step": 30543 }, { "epoch": 0.5583197762626355, "grad_norm": 6.349395029299458, "learning_rate": 4.301027039128663e-06, "loss": 17.4298, "step": 30544 }, { "epoch": 0.558338055459082, "grad_norm": 5.570166643894828, "learning_rate": 4.300733935493671e-06, "loss": 17.1953, "step": 30545 }, { "epoch": 0.5583563346555286, "grad_norm": 6.044007477486445, "learning_rate": 4.300440834309545e-06, "loss": 17.2102, "step": 30546 }, { "epoch": 0.5583746138519751, "grad_norm": 4.759666748144722, "learning_rate": 4.3001477355773145e-06, "loss": 16.8014, "step": 30547 }, { "epoch": 0.5583928930484215, "grad_norm": 5.75291307550413, "learning_rate": 4.299854639298004e-06, "loss": 17.2464, "step": 30548 }, { "epoch": 0.5584111722448681, "grad_norm": 7.140520265326182, "learning_rate": 4.2995615454726444e-06, "loss": 17.5912, "step": 30549 }, { "epoch": 0.5584294514413146, "grad_norm": 6.826529301224331, "learning_rate": 4.29926845410226e-06, "loss": 17.9583, "step": 30550 }, { "epoch": 0.5584477306377612, "grad_norm": 5.836636197580167, "learning_rate": 4.298975365187877e-06, "loss": 17.1421, "step": 30551 }, { "epoch": 0.5584660098342077, "grad_norm": 6.758795919885863, "learning_rate": 4.298682278730527e-06, "loss": 17.5423, "step": 30552 }, { "epoch": 0.5584842890306542, "grad_norm": 6.153976491148094, "learning_rate": 4.298389194731236e-06, "loss": 17.3378, "step": 30553 }, { "epoch": 0.5585025682271008, "grad_norm": 7.366167210859421, "learning_rate": 4.298096113191026e-06, "loss": 17.9177, "step": 30554 }, { "epoch": 0.5585208474235472, "grad_norm": 6.093471225481582, "learning_rate": 4.29780303411093e-06, "loss": 17.4587, "step": 30555 }, { "epoch": 0.5585391266199938, "grad_norm": 6.837894474172632, "learning_rate": 4.297509957491975e-06, "loss": 17.5079, "step": 30556 }, { "epoch": 0.5585574058164403, "grad_norm": 6.6599834054065505, "learning_rate": 4.297216883335183e-06, "loss": 17.5574, "step": 30557 }, { "epoch": 0.5585756850128868, "grad_norm": 7.272831350895308, "learning_rate": 4.296923811641589e-06, "loss": 17.4182, "step": 30558 }, { "epoch": 0.5585939642093334, "grad_norm": 6.754802959864719, "learning_rate": 4.296630742412214e-06, "loss": 17.7395, "step": 30559 }, { "epoch": 0.5586122434057799, "grad_norm": 8.11766435214501, "learning_rate": 4.2963376756480855e-06, "loss": 17.7454, "step": 30560 }, { "epoch": 0.5586305226022265, "grad_norm": 7.477878168330257, "learning_rate": 4.296044611350236e-06, "loss": 17.6156, "step": 30561 }, { "epoch": 0.5586488017986729, "grad_norm": 6.396374642752157, "learning_rate": 4.295751549519688e-06, "loss": 17.7203, "step": 30562 }, { "epoch": 0.5586670809951194, "grad_norm": 5.6243873593989875, "learning_rate": 4.2954584901574674e-06, "loss": 16.9913, "step": 30563 }, { "epoch": 0.558685360191566, "grad_norm": 5.007340694108732, "learning_rate": 4.295165433264606e-06, "loss": 16.8941, "step": 30564 }, { "epoch": 0.5587036393880125, "grad_norm": 6.371382717207009, "learning_rate": 4.294872378842126e-06, "loss": 17.4847, "step": 30565 }, { "epoch": 0.5587219185844591, "grad_norm": 5.521096458719048, "learning_rate": 4.294579326891059e-06, "loss": 16.9894, "step": 30566 }, { "epoch": 0.5587401977809056, "grad_norm": 6.182338192483161, "learning_rate": 4.294286277412432e-06, "loss": 17.5693, "step": 30567 }, { "epoch": 0.558758476977352, "grad_norm": 5.12838097373094, "learning_rate": 4.293993230407267e-06, "loss": 16.9506, "step": 30568 }, { "epoch": 0.5587767561737986, "grad_norm": 7.362227204483668, "learning_rate": 4.2937001858765955e-06, "loss": 17.8476, "step": 30569 }, { "epoch": 0.5587950353702451, "grad_norm": 7.570251807715038, "learning_rate": 4.293407143821445e-06, "loss": 17.8662, "step": 30570 }, { "epoch": 0.5588133145666917, "grad_norm": 6.4967017013755255, "learning_rate": 4.293114104242839e-06, "loss": 17.1252, "step": 30571 }, { "epoch": 0.5588315937631382, "grad_norm": 6.8774511758233405, "learning_rate": 4.2928210671418095e-06, "loss": 17.6155, "step": 30572 }, { "epoch": 0.5588498729595847, "grad_norm": 8.496656322597575, "learning_rate": 4.292528032519379e-06, "loss": 18.217, "step": 30573 }, { "epoch": 0.5588681521560313, "grad_norm": 6.357456536425094, "learning_rate": 4.292235000376575e-06, "loss": 17.7525, "step": 30574 }, { "epoch": 0.5588864313524777, "grad_norm": 6.707046654579271, "learning_rate": 4.291941970714428e-06, "loss": 17.4581, "step": 30575 }, { "epoch": 0.5589047105489243, "grad_norm": 8.005990267776186, "learning_rate": 4.2916489435339635e-06, "loss": 18.0898, "step": 30576 }, { "epoch": 0.5589229897453708, "grad_norm": 5.216324953297119, "learning_rate": 4.291355918836206e-06, "loss": 17.0645, "step": 30577 }, { "epoch": 0.5589412689418173, "grad_norm": 7.176599579334091, "learning_rate": 4.291062896622185e-06, "loss": 17.567, "step": 30578 }, { "epoch": 0.5589595481382639, "grad_norm": 6.32981799824838, "learning_rate": 4.290769876892926e-06, "loss": 17.4537, "step": 30579 }, { "epoch": 0.5589778273347104, "grad_norm": 9.240204924148394, "learning_rate": 4.29047685964946e-06, "loss": 18.3979, "step": 30580 }, { "epoch": 0.5589961065311569, "grad_norm": 8.222114248340016, "learning_rate": 4.29018384489281e-06, "loss": 18.0538, "step": 30581 }, { "epoch": 0.5590143857276034, "grad_norm": 6.570943856998662, "learning_rate": 4.289890832624003e-06, "loss": 17.4307, "step": 30582 }, { "epoch": 0.5590326649240499, "grad_norm": 6.314098827292905, "learning_rate": 4.289597822844067e-06, "loss": 17.055, "step": 30583 }, { "epoch": 0.5590509441204965, "grad_norm": 6.316582726892834, "learning_rate": 4.289304815554031e-06, "loss": 17.3952, "step": 30584 }, { "epoch": 0.559069223316943, "grad_norm": 5.571820395259093, "learning_rate": 4.2890118107549175e-06, "loss": 17.1718, "step": 30585 }, { "epoch": 0.5590875025133895, "grad_norm": 6.90325833167843, "learning_rate": 4.2887188084477574e-06, "loss": 17.6, "step": 30586 }, { "epoch": 0.559105781709836, "grad_norm": 7.467847707167094, "learning_rate": 4.2884258086335755e-06, "loss": 17.7715, "step": 30587 }, { "epoch": 0.5591240609062825, "grad_norm": 7.692882676568562, "learning_rate": 4.288132811313398e-06, "loss": 18.0684, "step": 30588 }, { "epoch": 0.5591423401027291, "grad_norm": 5.446520375387809, "learning_rate": 4.287839816488256e-06, "loss": 17.1002, "step": 30589 }, { "epoch": 0.5591606192991756, "grad_norm": 6.456233508709413, "learning_rate": 4.287546824159173e-06, "loss": 17.6876, "step": 30590 }, { "epoch": 0.5591788984956221, "grad_norm": 6.307042643912798, "learning_rate": 4.287253834327175e-06, "loss": 17.1006, "step": 30591 }, { "epoch": 0.5591971776920687, "grad_norm": 6.0785978409900645, "learning_rate": 4.286960846993291e-06, "loss": 17.6368, "step": 30592 }, { "epoch": 0.5592154568885151, "grad_norm": 5.9665424656856825, "learning_rate": 4.286667862158549e-06, "loss": 17.5405, "step": 30593 }, { "epoch": 0.5592337360849617, "grad_norm": 7.750848656878478, "learning_rate": 4.286374879823972e-06, "loss": 17.9346, "step": 30594 }, { "epoch": 0.5592520152814082, "grad_norm": 4.835999221727466, "learning_rate": 4.286081899990591e-06, "loss": 16.7986, "step": 30595 }, { "epoch": 0.5592702944778547, "grad_norm": 9.895772479952008, "learning_rate": 4.285788922659429e-06, "loss": 18.5219, "step": 30596 }, { "epoch": 0.5592885736743013, "grad_norm": 7.240721219204889, "learning_rate": 4.285495947831516e-06, "loss": 17.6355, "step": 30597 }, { "epoch": 0.5593068528707478, "grad_norm": 5.892031088166204, "learning_rate": 4.285202975507878e-06, "loss": 17.3085, "step": 30598 }, { "epoch": 0.5593251320671944, "grad_norm": 6.5582171676503425, "learning_rate": 4.284910005689541e-06, "loss": 17.5627, "step": 30599 }, { "epoch": 0.5593434112636408, "grad_norm": 4.79276549578017, "learning_rate": 4.284617038377533e-06, "loss": 16.879, "step": 30600 }, { "epoch": 0.5593616904600873, "grad_norm": 6.633440748943724, "learning_rate": 4.28432407357288e-06, "loss": 18.0099, "step": 30601 }, { "epoch": 0.5593799696565339, "grad_norm": 6.954566516961716, "learning_rate": 4.284031111276608e-06, "loss": 17.775, "step": 30602 }, { "epoch": 0.5593982488529804, "grad_norm": 6.781104086355167, "learning_rate": 4.283738151489746e-06, "loss": 17.2658, "step": 30603 }, { "epoch": 0.559416528049427, "grad_norm": 6.972129873703062, "learning_rate": 4.283445194213319e-06, "loss": 17.609, "step": 30604 }, { "epoch": 0.5594348072458735, "grad_norm": 5.869562152105778, "learning_rate": 4.283152239448355e-06, "loss": 17.0732, "step": 30605 }, { "epoch": 0.55945308644232, "grad_norm": 5.802360226871458, "learning_rate": 4.282859287195879e-06, "loss": 17.2413, "step": 30606 }, { "epoch": 0.5594713656387665, "grad_norm": 9.045612519142802, "learning_rate": 4.282566337456921e-06, "loss": 18.6408, "step": 30607 }, { "epoch": 0.559489644835213, "grad_norm": 6.297534908437905, "learning_rate": 4.282273390232504e-06, "loss": 17.234, "step": 30608 }, { "epoch": 0.5595079240316596, "grad_norm": 7.107902224063115, "learning_rate": 4.281980445523656e-06, "loss": 17.4466, "step": 30609 }, { "epoch": 0.5595262032281061, "grad_norm": 8.273617810200701, "learning_rate": 4.281687503331405e-06, "loss": 18.6345, "step": 30610 }, { "epoch": 0.5595444824245526, "grad_norm": 6.213424018349795, "learning_rate": 4.2813945636567776e-06, "loss": 17.4478, "step": 30611 }, { "epoch": 0.5595627616209992, "grad_norm": 6.046333312683122, "learning_rate": 4.281101626500801e-06, "loss": 17.2243, "step": 30612 }, { "epoch": 0.5595810408174456, "grad_norm": 6.652037422840099, "learning_rate": 4.280808691864498e-06, "loss": 17.4577, "step": 30613 }, { "epoch": 0.5595993200138922, "grad_norm": 6.167483912439691, "learning_rate": 4.280515759748899e-06, "loss": 17.4302, "step": 30614 }, { "epoch": 0.5596175992103387, "grad_norm": 6.198901781163418, "learning_rate": 4.280222830155032e-06, "loss": 17.5638, "step": 30615 }, { "epoch": 0.5596358784067852, "grad_norm": 5.365317751853052, "learning_rate": 4.279929903083918e-06, "loss": 17.0782, "step": 30616 }, { "epoch": 0.5596541576032318, "grad_norm": 5.9425722209628535, "learning_rate": 4.27963697853659e-06, "loss": 17.5057, "step": 30617 }, { "epoch": 0.5596724367996783, "grad_norm": 9.092241502697272, "learning_rate": 4.279344056514071e-06, "loss": 18.5232, "step": 30618 }, { "epoch": 0.5596907159961249, "grad_norm": 5.441288344933552, "learning_rate": 4.279051137017387e-06, "loss": 17.0457, "step": 30619 }, { "epoch": 0.5597089951925713, "grad_norm": 6.632032440514615, "learning_rate": 4.2787582200475694e-06, "loss": 17.5856, "step": 30620 }, { "epoch": 0.5597272743890178, "grad_norm": 7.076192110772478, "learning_rate": 4.278465305605641e-06, "loss": 17.468, "step": 30621 }, { "epoch": 0.5597455535854644, "grad_norm": 7.0568722350129285, "learning_rate": 4.2781723936926266e-06, "loss": 17.8084, "step": 30622 }, { "epoch": 0.5597638327819109, "grad_norm": 6.199473743076331, "learning_rate": 4.2778794843095565e-06, "loss": 17.3058, "step": 30623 }, { "epoch": 0.5597821119783575, "grad_norm": 5.260005197410325, "learning_rate": 4.277586577457457e-06, "loss": 16.9436, "step": 30624 }, { "epoch": 0.559800391174804, "grad_norm": 6.725974661535709, "learning_rate": 4.277293673137352e-06, "loss": 17.3018, "step": 30625 }, { "epoch": 0.5598186703712504, "grad_norm": 6.669839590295551, "learning_rate": 4.277000771350273e-06, "loss": 17.3819, "step": 30626 }, { "epoch": 0.559836949567697, "grad_norm": 5.966489786927318, "learning_rate": 4.27670787209724e-06, "loss": 17.4344, "step": 30627 }, { "epoch": 0.5598552287641435, "grad_norm": 6.490259627060596, "learning_rate": 4.276414975379285e-06, "loss": 17.4129, "step": 30628 }, { "epoch": 0.5598735079605901, "grad_norm": 6.470650699942923, "learning_rate": 4.276122081197434e-06, "loss": 17.5064, "step": 30629 }, { "epoch": 0.5598917871570366, "grad_norm": 6.640213611105006, "learning_rate": 4.275829189552709e-06, "loss": 17.4758, "step": 30630 }, { "epoch": 0.5599100663534831, "grad_norm": 7.285558460173667, "learning_rate": 4.275536300446143e-06, "loss": 17.8583, "step": 30631 }, { "epoch": 0.5599283455499297, "grad_norm": 6.473047995712774, "learning_rate": 4.275243413878758e-06, "loss": 17.7486, "step": 30632 }, { "epoch": 0.5599466247463761, "grad_norm": 6.269841032199797, "learning_rate": 4.274950529851581e-06, "loss": 17.3955, "step": 30633 }, { "epoch": 0.5599649039428227, "grad_norm": 6.63260908760452, "learning_rate": 4.274657648365642e-06, "loss": 17.2922, "step": 30634 }, { "epoch": 0.5599831831392692, "grad_norm": 6.150943005064286, "learning_rate": 4.274364769421964e-06, "loss": 17.5912, "step": 30635 }, { "epoch": 0.5600014623357157, "grad_norm": 5.473928878163525, "learning_rate": 4.274071893021573e-06, "loss": 17.1134, "step": 30636 }, { "epoch": 0.5600197415321623, "grad_norm": 5.3856971739704775, "learning_rate": 4.273779019165498e-06, "loss": 17.0615, "step": 30637 }, { "epoch": 0.5600380207286088, "grad_norm": 7.1749607533540285, "learning_rate": 4.273486147854766e-06, "loss": 17.5263, "step": 30638 }, { "epoch": 0.5600562999250553, "grad_norm": 6.7271533044957135, "learning_rate": 4.2731932790903995e-06, "loss": 17.7516, "step": 30639 }, { "epoch": 0.5600745791215018, "grad_norm": 5.817895053876919, "learning_rate": 4.2729004128734295e-06, "loss": 17.2455, "step": 30640 }, { "epoch": 0.5600928583179483, "grad_norm": 6.0982925685670875, "learning_rate": 4.272607549204878e-06, "loss": 17.456, "step": 30641 }, { "epoch": 0.5601111375143949, "grad_norm": 7.458106996255198, "learning_rate": 4.272314688085776e-06, "loss": 17.8289, "step": 30642 }, { "epoch": 0.5601294167108414, "grad_norm": 7.019127732844126, "learning_rate": 4.272021829517148e-06, "loss": 17.4724, "step": 30643 }, { "epoch": 0.560147695907288, "grad_norm": 5.807679727789401, "learning_rate": 4.271728973500018e-06, "loss": 17.1069, "step": 30644 }, { "epoch": 0.5601659751037344, "grad_norm": 6.983086834491454, "learning_rate": 4.271436120035418e-06, "loss": 18.0826, "step": 30645 }, { "epoch": 0.5601842543001809, "grad_norm": 5.143871409357171, "learning_rate": 4.27114326912437e-06, "loss": 16.983, "step": 30646 }, { "epoch": 0.5602025334966275, "grad_norm": 7.511820984601978, "learning_rate": 4.2708504207679e-06, "loss": 17.7619, "step": 30647 }, { "epoch": 0.560220812693074, "grad_norm": 6.3825768225201305, "learning_rate": 4.270557574967038e-06, "loss": 17.1756, "step": 30648 }, { "epoch": 0.5602390918895206, "grad_norm": 6.507896600593981, "learning_rate": 4.2702647317228084e-06, "loss": 17.5457, "step": 30649 }, { "epoch": 0.5602573710859671, "grad_norm": 6.8812313569911785, "learning_rate": 4.269971891036236e-06, "loss": 17.5505, "step": 30650 }, { "epoch": 0.5602756502824136, "grad_norm": 6.065537958033438, "learning_rate": 4.269679052908349e-06, "loss": 17.6521, "step": 30651 }, { "epoch": 0.5602939294788601, "grad_norm": 7.453421360083235, "learning_rate": 4.269386217340175e-06, "loss": 17.3849, "step": 30652 }, { "epoch": 0.5603122086753066, "grad_norm": 5.866815345803801, "learning_rate": 4.269093384332737e-06, "loss": 17.3067, "step": 30653 }, { "epoch": 0.5603304878717531, "grad_norm": 7.384473822549667, "learning_rate": 4.268800553887065e-06, "loss": 18.2049, "step": 30654 }, { "epoch": 0.5603487670681997, "grad_norm": 5.473604778088917, "learning_rate": 4.268507726004182e-06, "loss": 17.0555, "step": 30655 }, { "epoch": 0.5603670462646462, "grad_norm": 6.333889100172625, "learning_rate": 4.268214900685117e-06, "loss": 17.5159, "step": 30656 }, { "epoch": 0.5603853254610928, "grad_norm": 5.848757083672497, "learning_rate": 4.267922077930896e-06, "loss": 17.2496, "step": 30657 }, { "epoch": 0.5604036046575392, "grad_norm": 5.750730090445108, "learning_rate": 4.267629257742542e-06, "loss": 17.1464, "step": 30658 }, { "epoch": 0.5604218838539857, "grad_norm": 5.955669152664503, "learning_rate": 4.2673364401210874e-06, "loss": 17.1056, "step": 30659 }, { "epoch": 0.5604401630504323, "grad_norm": 6.377185215918042, "learning_rate": 4.267043625067553e-06, "loss": 17.8234, "step": 30660 }, { "epoch": 0.5604584422468788, "grad_norm": 5.001377605634453, "learning_rate": 4.266750812582966e-06, "loss": 16.8805, "step": 30661 }, { "epoch": 0.5604767214433254, "grad_norm": 5.853003136584932, "learning_rate": 4.266458002668356e-06, "loss": 17.3671, "step": 30662 }, { "epoch": 0.5604950006397719, "grad_norm": 6.706953341210821, "learning_rate": 4.2661651953247465e-06, "loss": 17.8733, "step": 30663 }, { "epoch": 0.5605132798362183, "grad_norm": 7.1075110909991155, "learning_rate": 4.2658723905531634e-06, "loss": 17.9927, "step": 30664 }, { "epoch": 0.5605315590326649, "grad_norm": 6.57019187611103, "learning_rate": 4.265579588354634e-06, "loss": 17.1152, "step": 30665 }, { "epoch": 0.5605498382291114, "grad_norm": 6.460957852855719, "learning_rate": 4.265286788730186e-06, "loss": 17.392, "step": 30666 }, { "epoch": 0.560568117425558, "grad_norm": 8.476727364230465, "learning_rate": 4.264993991680842e-06, "loss": 17.8893, "step": 30667 }, { "epoch": 0.5605863966220045, "grad_norm": 5.579511763973221, "learning_rate": 4.264701197207631e-06, "loss": 17.2189, "step": 30668 }, { "epoch": 0.560604675818451, "grad_norm": 7.262210157413281, "learning_rate": 4.2644084053115795e-06, "loss": 17.7668, "step": 30669 }, { "epoch": 0.5606229550148976, "grad_norm": 6.73973982587366, "learning_rate": 4.264115615993711e-06, "loss": 17.4905, "step": 30670 }, { "epoch": 0.560641234211344, "grad_norm": 6.566583323521271, "learning_rate": 4.263822829255055e-06, "loss": 17.6538, "step": 30671 }, { "epoch": 0.5606595134077906, "grad_norm": 7.32410974496368, "learning_rate": 4.263530045096634e-06, "loss": 18.0012, "step": 30672 }, { "epoch": 0.5606777926042371, "grad_norm": 4.19095652271554, "learning_rate": 4.263237263519477e-06, "loss": 16.6822, "step": 30673 }, { "epoch": 0.5606960718006836, "grad_norm": 7.4201681896287655, "learning_rate": 4.262944484524612e-06, "loss": 18.0377, "step": 30674 }, { "epoch": 0.5607143509971302, "grad_norm": 7.960657930923976, "learning_rate": 4.262651708113059e-06, "loss": 17.5795, "step": 30675 }, { "epoch": 0.5607326301935767, "grad_norm": 5.431679113587651, "learning_rate": 4.26235893428585e-06, "loss": 17.1749, "step": 30676 }, { "epoch": 0.5607509093900233, "grad_norm": 6.991182331760715, "learning_rate": 4.262066163044009e-06, "loss": 17.5735, "step": 30677 }, { "epoch": 0.5607691885864697, "grad_norm": 5.834489530238793, "learning_rate": 4.2617733943885605e-06, "loss": 17.3326, "step": 30678 }, { "epoch": 0.5607874677829162, "grad_norm": 5.950919236415345, "learning_rate": 4.261480628320534e-06, "loss": 17.4172, "step": 30679 }, { "epoch": 0.5608057469793628, "grad_norm": 5.538390040881581, "learning_rate": 4.261187864840955e-06, "loss": 17.1928, "step": 30680 }, { "epoch": 0.5608240261758093, "grad_norm": 5.056040412603273, "learning_rate": 4.260895103950844e-06, "loss": 16.8524, "step": 30681 }, { "epoch": 0.5608423053722559, "grad_norm": 5.551471255391645, "learning_rate": 4.260602345651234e-06, "loss": 17.1843, "step": 30682 }, { "epoch": 0.5608605845687024, "grad_norm": 5.753280225068788, "learning_rate": 4.2603095899431505e-06, "loss": 17.2431, "step": 30683 }, { "epoch": 0.5608788637651488, "grad_norm": 6.0924202058435375, "learning_rate": 4.260016836827614e-06, "loss": 17.4873, "step": 30684 }, { "epoch": 0.5608971429615954, "grad_norm": 5.7366484701329, "learning_rate": 4.259724086305657e-06, "loss": 17.3077, "step": 30685 }, { "epoch": 0.5609154221580419, "grad_norm": 6.730385435303099, "learning_rate": 4.2594313383783e-06, "loss": 17.5175, "step": 30686 }, { "epoch": 0.5609337013544885, "grad_norm": 5.972553642483855, "learning_rate": 4.259138593046574e-06, "loss": 17.323, "step": 30687 }, { "epoch": 0.560951980550935, "grad_norm": 5.852992137662361, "learning_rate": 4.258845850311504e-06, "loss": 17.1262, "step": 30688 }, { "epoch": 0.5609702597473815, "grad_norm": 6.235260574701667, "learning_rate": 4.258553110174113e-06, "loss": 17.5756, "step": 30689 }, { "epoch": 0.560988538943828, "grad_norm": 5.915564239270342, "learning_rate": 4.25826037263543e-06, "loss": 17.344, "step": 30690 }, { "epoch": 0.5610068181402745, "grad_norm": 5.839635912788676, "learning_rate": 4.2579676376964796e-06, "loss": 17.1802, "step": 30691 }, { "epoch": 0.5610250973367211, "grad_norm": 5.816043972155075, "learning_rate": 4.257674905358288e-06, "loss": 17.3563, "step": 30692 }, { "epoch": 0.5610433765331676, "grad_norm": 6.651949559676582, "learning_rate": 4.257382175621883e-06, "loss": 17.7267, "step": 30693 }, { "epoch": 0.5610616557296141, "grad_norm": 5.535875611086384, "learning_rate": 4.257089448488288e-06, "loss": 17.1538, "step": 30694 }, { "epoch": 0.5610799349260607, "grad_norm": 7.952855127629984, "learning_rate": 4.256796723958529e-06, "loss": 18.353, "step": 30695 }, { "epoch": 0.5610982141225072, "grad_norm": 6.259876064476091, "learning_rate": 4.256504002033634e-06, "loss": 17.2628, "step": 30696 }, { "epoch": 0.5611164933189537, "grad_norm": 6.0703939609752355, "learning_rate": 4.256211282714628e-06, "loss": 17.2401, "step": 30697 }, { "epoch": 0.5611347725154002, "grad_norm": 7.135144106827404, "learning_rate": 4.255918566002536e-06, "loss": 17.8006, "step": 30698 }, { "epoch": 0.5611530517118467, "grad_norm": 8.03039671380653, "learning_rate": 4.255625851898386e-06, "loss": 18.1062, "step": 30699 }, { "epoch": 0.5611713309082933, "grad_norm": 6.330937233892204, "learning_rate": 4.255333140403203e-06, "loss": 17.5815, "step": 30700 }, { "epoch": 0.5611896101047398, "grad_norm": 6.191928985515472, "learning_rate": 4.25504043151801e-06, "loss": 17.3209, "step": 30701 }, { "epoch": 0.5612078893011864, "grad_norm": 6.0771075763334625, "learning_rate": 4.25474772524384e-06, "loss": 17.3461, "step": 30702 }, { "epoch": 0.5612261684976328, "grad_norm": 5.831054529499173, "learning_rate": 4.254455021581711e-06, "loss": 17.2604, "step": 30703 }, { "epoch": 0.5612444476940793, "grad_norm": 5.340813812410247, "learning_rate": 4.2541623205326545e-06, "loss": 17.2396, "step": 30704 }, { "epoch": 0.5612627268905259, "grad_norm": 6.942917844316992, "learning_rate": 4.2538696220976936e-06, "loss": 17.7169, "step": 30705 }, { "epoch": 0.5612810060869724, "grad_norm": 6.434133642157284, "learning_rate": 4.253576926277854e-06, "loss": 17.4998, "step": 30706 }, { "epoch": 0.561299285283419, "grad_norm": 6.750139155097896, "learning_rate": 4.253284233074164e-06, "loss": 17.7577, "step": 30707 }, { "epoch": 0.5613175644798655, "grad_norm": 8.2129351031172, "learning_rate": 4.2529915424876485e-06, "loss": 18.5348, "step": 30708 }, { "epoch": 0.561335843676312, "grad_norm": 5.54193366265858, "learning_rate": 4.2526988545193305e-06, "loss": 17.2161, "step": 30709 }, { "epoch": 0.5613541228727585, "grad_norm": 7.208406623880239, "learning_rate": 4.252406169170239e-06, "loss": 17.8701, "step": 30710 }, { "epoch": 0.561372402069205, "grad_norm": 5.540903370300955, "learning_rate": 4.2521134864414e-06, "loss": 16.9738, "step": 30711 }, { "epoch": 0.5613906812656516, "grad_norm": 6.201250415860696, "learning_rate": 4.251820806333837e-06, "loss": 17.4036, "step": 30712 }, { "epoch": 0.5614089604620981, "grad_norm": 7.676490773845126, "learning_rate": 4.251528128848579e-06, "loss": 18.0377, "step": 30713 }, { "epoch": 0.5614272396585446, "grad_norm": 6.558864879542333, "learning_rate": 4.251235453986648e-06, "loss": 17.8562, "step": 30714 }, { "epoch": 0.5614455188549912, "grad_norm": 7.024257127300257, "learning_rate": 4.250942781749071e-06, "loss": 17.9654, "step": 30715 }, { "epoch": 0.5614637980514376, "grad_norm": 6.220945186390179, "learning_rate": 4.250650112136878e-06, "loss": 17.0089, "step": 30716 }, { "epoch": 0.5614820772478842, "grad_norm": 5.67394621784418, "learning_rate": 4.250357445151088e-06, "loss": 16.9439, "step": 30717 }, { "epoch": 0.5615003564443307, "grad_norm": 5.133319554397015, "learning_rate": 4.250064780792733e-06, "loss": 16.9774, "step": 30718 }, { "epoch": 0.5615186356407772, "grad_norm": 6.48950365685597, "learning_rate": 4.2497721190628335e-06, "loss": 17.3652, "step": 30719 }, { "epoch": 0.5615369148372238, "grad_norm": 5.829098522538662, "learning_rate": 4.249479459962418e-06, "loss": 17.3266, "step": 30720 }, { "epoch": 0.5615551940336703, "grad_norm": 7.631036848611796, "learning_rate": 4.249186803492513e-06, "loss": 17.4351, "step": 30721 }, { "epoch": 0.5615734732301167, "grad_norm": 7.160714088615333, "learning_rate": 4.248894149654144e-06, "loss": 18.0624, "step": 30722 }, { "epoch": 0.5615917524265633, "grad_norm": 5.482237258763715, "learning_rate": 4.248601498448333e-06, "loss": 17.1022, "step": 30723 }, { "epoch": 0.5616100316230098, "grad_norm": 5.722910844330274, "learning_rate": 4.24830884987611e-06, "loss": 17.1919, "step": 30724 }, { "epoch": 0.5616283108194564, "grad_norm": 5.044094405104967, "learning_rate": 4.2480162039385e-06, "loss": 17.0486, "step": 30725 }, { "epoch": 0.5616465900159029, "grad_norm": 6.8347506142739745, "learning_rate": 4.247723560636526e-06, "loss": 17.9839, "step": 30726 }, { "epoch": 0.5616648692123494, "grad_norm": 6.764886802759526, "learning_rate": 4.2474309199712175e-06, "loss": 17.3787, "step": 30727 }, { "epoch": 0.561683148408796, "grad_norm": 5.172930647300561, "learning_rate": 4.247138281943597e-06, "loss": 17.0299, "step": 30728 }, { "epoch": 0.5617014276052424, "grad_norm": 6.148190682178059, "learning_rate": 4.246845646554692e-06, "loss": 17.2803, "step": 30729 }, { "epoch": 0.561719706801689, "grad_norm": 6.122676560331744, "learning_rate": 4.246553013805528e-06, "loss": 17.5815, "step": 30730 }, { "epoch": 0.5617379859981355, "grad_norm": 7.98672447139724, "learning_rate": 4.2462603836971304e-06, "loss": 18.4289, "step": 30731 }, { "epoch": 0.561756265194582, "grad_norm": 5.505768512270076, "learning_rate": 4.245967756230524e-06, "loss": 17.2569, "step": 30732 }, { "epoch": 0.5617745443910286, "grad_norm": 7.68141911130188, "learning_rate": 4.2456751314067354e-06, "loss": 17.6099, "step": 30733 }, { "epoch": 0.5617928235874751, "grad_norm": 5.343907459870599, "learning_rate": 4.245382509226789e-06, "loss": 17.0698, "step": 30734 }, { "epoch": 0.5618111027839217, "grad_norm": 8.018932447957917, "learning_rate": 4.245089889691714e-06, "loss": 18.1537, "step": 30735 }, { "epoch": 0.5618293819803681, "grad_norm": 6.877026034949104, "learning_rate": 4.244797272802531e-06, "loss": 17.6667, "step": 30736 }, { "epoch": 0.5618476611768146, "grad_norm": 7.297594083269932, "learning_rate": 4.244504658560269e-06, "loss": 17.7187, "step": 30737 }, { "epoch": 0.5618659403732612, "grad_norm": 5.97324352291157, "learning_rate": 4.244212046965953e-06, "loss": 17.2193, "step": 30738 }, { "epoch": 0.5618842195697077, "grad_norm": 6.700550244389717, "learning_rate": 4.243919438020609e-06, "loss": 17.3798, "step": 30739 }, { "epoch": 0.5619024987661543, "grad_norm": 5.0456162427398255, "learning_rate": 4.243626831725259e-06, "loss": 16.9613, "step": 30740 }, { "epoch": 0.5619207779626008, "grad_norm": 6.462529114790786, "learning_rate": 4.243334228080933e-06, "loss": 17.7619, "step": 30741 }, { "epoch": 0.5619390571590472, "grad_norm": 6.9713632389369495, "learning_rate": 4.243041627088656e-06, "loss": 17.7594, "step": 30742 }, { "epoch": 0.5619573363554938, "grad_norm": 5.187939626441518, "learning_rate": 4.24274902874945e-06, "loss": 16.9252, "step": 30743 }, { "epoch": 0.5619756155519403, "grad_norm": 7.51187794363933, "learning_rate": 4.242456433064345e-06, "loss": 18.04, "step": 30744 }, { "epoch": 0.5619938947483869, "grad_norm": 8.188367108913758, "learning_rate": 4.242163840034363e-06, "loss": 17.9611, "step": 30745 }, { "epoch": 0.5620121739448334, "grad_norm": 6.774608955600064, "learning_rate": 4.241871249660531e-06, "loss": 17.6076, "step": 30746 }, { "epoch": 0.5620304531412799, "grad_norm": 6.632007953324351, "learning_rate": 4.241578661943876e-06, "loss": 17.6591, "step": 30747 }, { "epoch": 0.5620487323377265, "grad_norm": 5.337263913855719, "learning_rate": 4.24128607688542e-06, "loss": 17.094, "step": 30748 }, { "epoch": 0.5620670115341729, "grad_norm": 10.825834171192096, "learning_rate": 4.240993494486192e-06, "loss": 18.2791, "step": 30749 }, { "epoch": 0.5620852907306195, "grad_norm": 6.5210168674639855, "learning_rate": 4.240700914747216e-06, "loss": 17.5579, "step": 30750 }, { "epoch": 0.562103569927066, "grad_norm": 5.855964522453207, "learning_rate": 4.2404083376695145e-06, "loss": 17.3464, "step": 30751 }, { "epoch": 0.5621218491235125, "grad_norm": 7.244060494124049, "learning_rate": 4.24011576325412e-06, "loss": 17.2597, "step": 30752 }, { "epoch": 0.5621401283199591, "grad_norm": 6.821588927754696, "learning_rate": 4.239823191502053e-06, "loss": 17.7292, "step": 30753 }, { "epoch": 0.5621584075164056, "grad_norm": 6.913970259456344, "learning_rate": 4.239530622414337e-06, "loss": 17.4378, "step": 30754 }, { "epoch": 0.5621766867128521, "grad_norm": 6.452831584918704, "learning_rate": 4.239238055992002e-06, "loss": 17.8133, "step": 30755 }, { "epoch": 0.5621949659092986, "grad_norm": 5.841500119145188, "learning_rate": 4.238945492236073e-06, "loss": 17.0527, "step": 30756 }, { "epoch": 0.5622132451057451, "grad_norm": 5.555965300477881, "learning_rate": 4.238652931147571e-06, "loss": 17.2982, "step": 30757 }, { "epoch": 0.5622315243021917, "grad_norm": 4.7523275668546106, "learning_rate": 4.2383603727275265e-06, "loss": 16.8294, "step": 30758 }, { "epoch": 0.5622498034986382, "grad_norm": 6.209475545923187, "learning_rate": 4.238067816976963e-06, "loss": 17.3829, "step": 30759 }, { "epoch": 0.5622680826950848, "grad_norm": 6.284367034931512, "learning_rate": 4.237775263896903e-06, "loss": 17.4595, "step": 30760 }, { "epoch": 0.5622863618915313, "grad_norm": 6.375287126959338, "learning_rate": 4.237482713488378e-06, "loss": 17.1904, "step": 30761 }, { "epoch": 0.5623046410879777, "grad_norm": 5.6760434166889855, "learning_rate": 4.23719016575241e-06, "loss": 17.1457, "step": 30762 }, { "epoch": 0.5623229202844243, "grad_norm": 6.243765478690133, "learning_rate": 4.2368976206900204e-06, "loss": 17.4673, "step": 30763 }, { "epoch": 0.5623411994808708, "grad_norm": 7.068221264331426, "learning_rate": 4.236605078302241e-06, "loss": 17.9197, "step": 30764 }, { "epoch": 0.5623594786773174, "grad_norm": 6.025882971327559, "learning_rate": 4.236312538590093e-06, "loss": 17.0577, "step": 30765 }, { "epoch": 0.5623777578737639, "grad_norm": 8.238117601930025, "learning_rate": 4.236020001554606e-06, "loss": 18.3754, "step": 30766 }, { "epoch": 0.5623960370702104, "grad_norm": 5.95141261685843, "learning_rate": 4.235727467196802e-06, "loss": 17.2973, "step": 30767 }, { "epoch": 0.5624143162666569, "grad_norm": 5.078630489129105, "learning_rate": 4.235434935517705e-06, "loss": 17.0988, "step": 30768 }, { "epoch": 0.5624325954631034, "grad_norm": 6.918747911417355, "learning_rate": 4.235142406518343e-06, "loss": 17.6663, "step": 30769 }, { "epoch": 0.56245087465955, "grad_norm": 7.4786253101741815, "learning_rate": 4.2348498801997416e-06, "loss": 18.0361, "step": 30770 }, { "epoch": 0.5624691538559965, "grad_norm": 7.947845233312968, "learning_rate": 4.234557356562923e-06, "loss": 17.8562, "step": 30771 }, { "epoch": 0.562487433052443, "grad_norm": 6.056678027342697, "learning_rate": 4.234264835608917e-06, "loss": 17.4149, "step": 30772 }, { "epoch": 0.5625057122488896, "grad_norm": 8.036967290114301, "learning_rate": 4.2339723173387445e-06, "loss": 17.9598, "step": 30773 }, { "epoch": 0.562523991445336, "grad_norm": 5.9948340227684405, "learning_rate": 4.233679801753431e-06, "loss": 16.935, "step": 30774 }, { "epoch": 0.5625422706417826, "grad_norm": 7.272101974309155, "learning_rate": 4.233387288854007e-06, "loss": 17.5786, "step": 30775 }, { "epoch": 0.5625605498382291, "grad_norm": 5.648565789918377, "learning_rate": 4.233094778641493e-06, "loss": 17.2716, "step": 30776 }, { "epoch": 0.5625788290346756, "grad_norm": 8.439302450914386, "learning_rate": 4.232802271116914e-06, "loss": 17.6621, "step": 30777 }, { "epoch": 0.5625971082311222, "grad_norm": 6.241543072909847, "learning_rate": 4.232509766281296e-06, "loss": 17.2041, "step": 30778 }, { "epoch": 0.5626153874275687, "grad_norm": 6.561994909917126, "learning_rate": 4.232217264135665e-06, "loss": 17.4324, "step": 30779 }, { "epoch": 0.5626336666240153, "grad_norm": 6.365368847532895, "learning_rate": 4.231924764681047e-06, "loss": 17.5217, "step": 30780 }, { "epoch": 0.5626519458204617, "grad_norm": 6.7506905296662865, "learning_rate": 4.231632267918467e-06, "loss": 17.384, "step": 30781 }, { "epoch": 0.5626702250169082, "grad_norm": 7.638855796334053, "learning_rate": 4.231339773848946e-06, "loss": 17.9392, "step": 30782 }, { "epoch": 0.5626885042133548, "grad_norm": 5.913760212465004, "learning_rate": 4.231047282473514e-06, "loss": 17.4368, "step": 30783 }, { "epoch": 0.5627067834098013, "grad_norm": 5.865274290818203, "learning_rate": 4.2307547937931956e-06, "loss": 17.4496, "step": 30784 }, { "epoch": 0.5627250626062479, "grad_norm": 5.729855031715466, "learning_rate": 4.230462307809013e-06, "loss": 17.2689, "step": 30785 }, { "epoch": 0.5627433418026944, "grad_norm": 7.045982321641113, "learning_rate": 4.2301698245219955e-06, "loss": 18.0207, "step": 30786 }, { "epoch": 0.5627616209991408, "grad_norm": 5.803570268050635, "learning_rate": 4.229877343933165e-06, "loss": 17.3668, "step": 30787 }, { "epoch": 0.5627799001955874, "grad_norm": 5.238263057638898, "learning_rate": 4.229584866043546e-06, "loss": 17.1857, "step": 30788 }, { "epoch": 0.5627981793920339, "grad_norm": 6.208715148398314, "learning_rate": 4.2292923908541675e-06, "loss": 17.4621, "step": 30789 }, { "epoch": 0.5628164585884804, "grad_norm": 5.419713293141553, "learning_rate": 4.228999918366052e-06, "loss": 17.0616, "step": 30790 }, { "epoch": 0.562834737784927, "grad_norm": 7.913072486997195, "learning_rate": 4.228707448580223e-06, "loss": 18.0179, "step": 30791 }, { "epoch": 0.5628530169813735, "grad_norm": 5.8977501094094364, "learning_rate": 4.228414981497709e-06, "loss": 17.3958, "step": 30792 }, { "epoch": 0.5628712961778201, "grad_norm": 6.516956484717333, "learning_rate": 4.228122517119532e-06, "loss": 17.5154, "step": 30793 }, { "epoch": 0.5628895753742665, "grad_norm": 6.842224714289712, "learning_rate": 4.227830055446721e-06, "loss": 17.354, "step": 30794 }, { "epoch": 0.562907854570713, "grad_norm": 5.838073200132985, "learning_rate": 4.227537596480297e-06, "loss": 17.2228, "step": 30795 }, { "epoch": 0.5629261337671596, "grad_norm": 6.757369650440264, "learning_rate": 4.227245140221286e-06, "loss": 17.3274, "step": 30796 }, { "epoch": 0.5629444129636061, "grad_norm": 7.422113275843619, "learning_rate": 4.2269526866707155e-06, "loss": 17.8135, "step": 30797 }, { "epoch": 0.5629626921600527, "grad_norm": 7.149785552226969, "learning_rate": 4.226660235829609e-06, "loss": 17.9977, "step": 30798 }, { "epoch": 0.5629809713564992, "grad_norm": 6.735613282558273, "learning_rate": 4.2263677876989886e-06, "loss": 17.5219, "step": 30799 }, { "epoch": 0.5629992505529456, "grad_norm": 6.097842724474554, "learning_rate": 4.226075342279884e-06, "loss": 17.5284, "step": 30800 }, { "epoch": 0.5630175297493922, "grad_norm": 6.398783101288377, "learning_rate": 4.2257828995733195e-06, "loss": 17.6688, "step": 30801 }, { "epoch": 0.5630358089458387, "grad_norm": 5.056835860478527, "learning_rate": 4.225490459580316e-06, "loss": 17.1245, "step": 30802 }, { "epoch": 0.5630540881422853, "grad_norm": 6.37601093619911, "learning_rate": 4.2251980223019025e-06, "loss": 17.4172, "step": 30803 }, { "epoch": 0.5630723673387318, "grad_norm": 6.980314793408576, "learning_rate": 4.224905587739102e-06, "loss": 17.8723, "step": 30804 }, { "epoch": 0.5630906465351783, "grad_norm": 7.595568141200255, "learning_rate": 4.22461315589294e-06, "loss": 18.025, "step": 30805 }, { "epoch": 0.5631089257316249, "grad_norm": 6.886980801942908, "learning_rate": 4.224320726764443e-06, "loss": 17.4217, "step": 30806 }, { "epoch": 0.5631272049280713, "grad_norm": 5.344928322796557, "learning_rate": 4.224028300354635e-06, "loss": 16.9523, "step": 30807 }, { "epoch": 0.5631454841245179, "grad_norm": 7.153475555424431, "learning_rate": 4.223735876664539e-06, "loss": 18.0407, "step": 30808 }, { "epoch": 0.5631637633209644, "grad_norm": 5.856037643917399, "learning_rate": 4.223443455695182e-06, "loss": 17.2434, "step": 30809 }, { "epoch": 0.5631820425174109, "grad_norm": 5.940881798532824, "learning_rate": 4.2231510374475856e-06, "loss": 17.4435, "step": 30810 }, { "epoch": 0.5632003217138575, "grad_norm": 5.103090259757765, "learning_rate": 4.222858621922781e-06, "loss": 16.9211, "step": 30811 }, { "epoch": 0.563218600910304, "grad_norm": 5.571129016942117, "learning_rate": 4.2225662091217895e-06, "loss": 17.2237, "step": 30812 }, { "epoch": 0.5632368801067505, "grad_norm": 6.949117196690638, "learning_rate": 4.222273799045634e-06, "loss": 17.6905, "step": 30813 }, { "epoch": 0.563255159303197, "grad_norm": 5.948592343156393, "learning_rate": 4.2219813916953415e-06, "loss": 17.418, "step": 30814 }, { "epoch": 0.5632734384996435, "grad_norm": 5.735987186934883, "learning_rate": 4.221688987071938e-06, "loss": 17.46, "step": 30815 }, { "epoch": 0.5632917176960901, "grad_norm": 4.629791176916226, "learning_rate": 4.221396585176446e-06, "loss": 16.8419, "step": 30816 }, { "epoch": 0.5633099968925366, "grad_norm": 5.689096619608073, "learning_rate": 4.221104186009893e-06, "loss": 17.2538, "step": 30817 }, { "epoch": 0.5633282760889832, "grad_norm": 7.881068106125198, "learning_rate": 4.2208117895733005e-06, "loss": 17.5661, "step": 30818 }, { "epoch": 0.5633465552854297, "grad_norm": 5.880933390290706, "learning_rate": 4.220519395867695e-06, "loss": 17.3896, "step": 30819 }, { "epoch": 0.5633648344818761, "grad_norm": 6.659174091611379, "learning_rate": 4.2202270048941025e-06, "loss": 17.8817, "step": 30820 }, { "epoch": 0.5633831136783227, "grad_norm": 6.1728456391076865, "learning_rate": 4.219934616653547e-06, "loss": 17.4935, "step": 30821 }, { "epoch": 0.5634013928747692, "grad_norm": 7.613767004541185, "learning_rate": 4.219642231147051e-06, "loss": 18.1194, "step": 30822 }, { "epoch": 0.5634196720712158, "grad_norm": 6.391360480207904, "learning_rate": 4.219349848375642e-06, "loss": 17.6341, "step": 30823 }, { "epoch": 0.5634379512676623, "grad_norm": 6.237942785683206, "learning_rate": 4.219057468340344e-06, "loss": 17.3429, "step": 30824 }, { "epoch": 0.5634562304641088, "grad_norm": 8.748234117775157, "learning_rate": 4.218765091042183e-06, "loss": 18.1697, "step": 30825 }, { "epoch": 0.5634745096605553, "grad_norm": 8.926347374478498, "learning_rate": 4.218472716482183e-06, "loss": 18.0614, "step": 30826 }, { "epoch": 0.5634927888570018, "grad_norm": 6.885909915098423, "learning_rate": 4.218180344661365e-06, "loss": 17.591, "step": 30827 }, { "epoch": 0.5635110680534484, "grad_norm": 6.132767445669035, "learning_rate": 4.21788797558076e-06, "loss": 17.3591, "step": 30828 }, { "epoch": 0.5635293472498949, "grad_norm": 5.883197673121673, "learning_rate": 4.21759560924139e-06, "loss": 17.0561, "step": 30829 }, { "epoch": 0.5635476264463414, "grad_norm": 6.949917517843899, "learning_rate": 4.217303245644277e-06, "loss": 17.8457, "step": 30830 }, { "epoch": 0.563565905642788, "grad_norm": 5.267295795080912, "learning_rate": 4.217010884790451e-06, "loss": 17.0376, "step": 30831 }, { "epoch": 0.5635841848392344, "grad_norm": 5.737768108774339, "learning_rate": 4.216718526680933e-06, "loss": 17.4008, "step": 30832 }, { "epoch": 0.563602464035681, "grad_norm": 6.452883816274633, "learning_rate": 4.2164261713167464e-06, "loss": 17.4763, "step": 30833 }, { "epoch": 0.5636207432321275, "grad_norm": 9.259901877683417, "learning_rate": 4.216133818698921e-06, "loss": 17.7742, "step": 30834 }, { "epoch": 0.563639022428574, "grad_norm": 6.792677175877593, "learning_rate": 4.215841468828478e-06, "loss": 17.6927, "step": 30835 }, { "epoch": 0.5636573016250206, "grad_norm": 6.259594701710972, "learning_rate": 4.215549121706441e-06, "loss": 17.628, "step": 30836 }, { "epoch": 0.5636755808214671, "grad_norm": 7.5288789070564395, "learning_rate": 4.215256777333837e-06, "loss": 18.0666, "step": 30837 }, { "epoch": 0.5636938600179137, "grad_norm": 7.294331739788232, "learning_rate": 4.214964435711692e-06, "loss": 18.05, "step": 30838 }, { "epoch": 0.5637121392143601, "grad_norm": 5.875930430329771, "learning_rate": 4.214672096841025e-06, "loss": 17.3899, "step": 30839 }, { "epoch": 0.5637304184108066, "grad_norm": 6.231974040488718, "learning_rate": 4.2143797607228665e-06, "loss": 17.6403, "step": 30840 }, { "epoch": 0.5637486976072532, "grad_norm": 6.372596276988972, "learning_rate": 4.214087427358237e-06, "loss": 17.4613, "step": 30841 }, { "epoch": 0.5637669768036997, "grad_norm": 5.065790810606548, "learning_rate": 4.213795096748164e-06, "loss": 17.1392, "step": 30842 }, { "epoch": 0.5637852560001463, "grad_norm": 5.2844035293537175, "learning_rate": 4.213502768893672e-06, "loss": 17.2252, "step": 30843 }, { "epoch": 0.5638035351965928, "grad_norm": 6.546501713760461, "learning_rate": 4.213210443795782e-06, "loss": 17.4192, "step": 30844 }, { "epoch": 0.5638218143930392, "grad_norm": 6.547684945297086, "learning_rate": 4.212918121455524e-06, "loss": 17.5505, "step": 30845 }, { "epoch": 0.5638400935894858, "grad_norm": 5.893344415160216, "learning_rate": 4.212625801873918e-06, "loss": 17.2882, "step": 30846 }, { "epoch": 0.5638583727859323, "grad_norm": 7.458778689296547, "learning_rate": 4.212333485051989e-06, "loss": 17.9027, "step": 30847 }, { "epoch": 0.5638766519823789, "grad_norm": 6.511548842475632, "learning_rate": 4.212041170990766e-06, "loss": 17.1811, "step": 30848 }, { "epoch": 0.5638949311788254, "grad_norm": 6.74352801165179, "learning_rate": 4.211748859691269e-06, "loss": 17.8941, "step": 30849 }, { "epoch": 0.5639132103752719, "grad_norm": 5.644078515209366, "learning_rate": 4.211456551154522e-06, "loss": 17.1944, "step": 30850 }, { "epoch": 0.5639314895717185, "grad_norm": 7.3336203485311655, "learning_rate": 4.211164245381553e-06, "loss": 17.5939, "step": 30851 }, { "epoch": 0.5639497687681649, "grad_norm": 8.258313382783902, "learning_rate": 4.2108719423733855e-06, "loss": 18.2448, "step": 30852 }, { "epoch": 0.5639680479646115, "grad_norm": 6.6883350951765275, "learning_rate": 4.2105796421310415e-06, "loss": 17.6657, "step": 30853 }, { "epoch": 0.563986327161058, "grad_norm": 5.830440677282985, "learning_rate": 4.210287344655549e-06, "loss": 17.4458, "step": 30854 }, { "epoch": 0.5640046063575045, "grad_norm": 6.967084128966803, "learning_rate": 4.2099950499479295e-06, "loss": 17.4714, "step": 30855 }, { "epoch": 0.5640228855539511, "grad_norm": 6.072619834863188, "learning_rate": 4.2097027580092095e-06, "loss": 17.445, "step": 30856 }, { "epoch": 0.5640411647503976, "grad_norm": 6.027104953269051, "learning_rate": 4.2094104688404135e-06, "loss": 17.3038, "step": 30857 }, { "epoch": 0.564059443946844, "grad_norm": 5.46397773567712, "learning_rate": 4.209118182442564e-06, "loss": 17.1675, "step": 30858 }, { "epoch": 0.5640777231432906, "grad_norm": 4.991009565735435, "learning_rate": 4.208825898816686e-06, "loss": 17.0404, "step": 30859 }, { "epoch": 0.5640960023397371, "grad_norm": 4.822797849121115, "learning_rate": 4.208533617963807e-06, "loss": 16.7806, "step": 30860 }, { "epoch": 0.5641142815361837, "grad_norm": 7.4690703241972605, "learning_rate": 4.208241339884946e-06, "loss": 17.8498, "step": 30861 }, { "epoch": 0.5641325607326302, "grad_norm": 6.0954706967606676, "learning_rate": 4.207949064581134e-06, "loss": 17.2784, "step": 30862 }, { "epoch": 0.5641508399290767, "grad_norm": 7.405388478338493, "learning_rate": 4.20765679205339e-06, "loss": 18.4013, "step": 30863 }, { "epoch": 0.5641691191255233, "grad_norm": 7.121175553834033, "learning_rate": 4.2073645223027385e-06, "loss": 18.0998, "step": 30864 }, { "epoch": 0.5641873983219697, "grad_norm": 5.535920467791342, "learning_rate": 4.207072255330208e-06, "loss": 17.143, "step": 30865 }, { "epoch": 0.5642056775184163, "grad_norm": 6.807880378644455, "learning_rate": 4.2067799911368204e-06, "loss": 17.93, "step": 30866 }, { "epoch": 0.5642239567148628, "grad_norm": 6.673068444483618, "learning_rate": 4.206487729723598e-06, "loss": 17.7086, "step": 30867 }, { "epoch": 0.5642422359113093, "grad_norm": 5.759079249015247, "learning_rate": 4.206195471091568e-06, "loss": 17.3232, "step": 30868 }, { "epoch": 0.5642605151077559, "grad_norm": 6.880981896001818, "learning_rate": 4.205903215241756e-06, "loss": 17.7165, "step": 30869 }, { "epoch": 0.5642787943042024, "grad_norm": 4.909881605846965, "learning_rate": 4.205610962175181e-06, "loss": 16.8648, "step": 30870 }, { "epoch": 0.564297073500649, "grad_norm": 5.917558014734807, "learning_rate": 4.205318711892874e-06, "loss": 17.3819, "step": 30871 }, { "epoch": 0.5643153526970954, "grad_norm": 9.650905968730935, "learning_rate": 4.205026464395854e-06, "loss": 18.7541, "step": 30872 }, { "epoch": 0.5643336318935419, "grad_norm": 7.90501276704622, "learning_rate": 4.2047342196851475e-06, "loss": 18.0384, "step": 30873 }, { "epoch": 0.5643519110899885, "grad_norm": 7.418963299374328, "learning_rate": 4.204441977761779e-06, "loss": 18.0776, "step": 30874 }, { "epoch": 0.564370190286435, "grad_norm": 6.362904087451145, "learning_rate": 4.204149738626771e-06, "loss": 17.4567, "step": 30875 }, { "epoch": 0.5643884694828816, "grad_norm": 7.171011567439563, "learning_rate": 4.203857502281152e-06, "loss": 17.8002, "step": 30876 }, { "epoch": 0.564406748679328, "grad_norm": 6.553445749739051, "learning_rate": 4.203565268725941e-06, "loss": 17.3984, "step": 30877 }, { "epoch": 0.5644250278757745, "grad_norm": 6.626521346292536, "learning_rate": 4.203273037962164e-06, "loss": 17.5653, "step": 30878 }, { "epoch": 0.5644433070722211, "grad_norm": 6.9309097380867914, "learning_rate": 4.2029808099908485e-06, "loss": 17.5335, "step": 30879 }, { "epoch": 0.5644615862686676, "grad_norm": 7.111757202149042, "learning_rate": 4.202688584813015e-06, "loss": 17.5338, "step": 30880 }, { "epoch": 0.5644798654651142, "grad_norm": 6.630828625362821, "learning_rate": 4.202396362429688e-06, "loss": 17.5004, "step": 30881 }, { "epoch": 0.5644981446615607, "grad_norm": 4.955769698050874, "learning_rate": 4.202104142841892e-06, "loss": 16.8914, "step": 30882 }, { "epoch": 0.5645164238580072, "grad_norm": 5.254625816816333, "learning_rate": 4.201811926050654e-06, "loss": 17.041, "step": 30883 }, { "epoch": 0.5645347030544537, "grad_norm": 7.36279719825492, "learning_rate": 4.201519712056994e-06, "loss": 17.6953, "step": 30884 }, { "epoch": 0.5645529822509002, "grad_norm": 8.072310346102292, "learning_rate": 4.20122750086194e-06, "loss": 17.957, "step": 30885 }, { "epoch": 0.5645712614473468, "grad_norm": 7.0987951720163425, "learning_rate": 4.2009352924665114e-06, "loss": 17.7453, "step": 30886 }, { "epoch": 0.5645895406437933, "grad_norm": 6.169651991467815, "learning_rate": 4.2006430868717365e-06, "loss": 17.2855, "step": 30887 }, { "epoch": 0.5646078198402398, "grad_norm": 8.058223800478652, "learning_rate": 4.20035088407864e-06, "loss": 18.429, "step": 30888 }, { "epoch": 0.5646260990366864, "grad_norm": 7.131058487473326, "learning_rate": 4.200058684088242e-06, "loss": 17.7976, "step": 30889 }, { "epoch": 0.5646443782331328, "grad_norm": 6.81415071446923, "learning_rate": 4.199766486901571e-06, "loss": 17.5606, "step": 30890 }, { "epoch": 0.5646626574295794, "grad_norm": 7.857047931610414, "learning_rate": 4.199474292519648e-06, "loss": 18.1553, "step": 30891 }, { "epoch": 0.5646809366260259, "grad_norm": 5.753366156454175, "learning_rate": 4.199182100943497e-06, "loss": 17.0872, "step": 30892 }, { "epoch": 0.5646992158224724, "grad_norm": 5.891993602056949, "learning_rate": 4.1988899121741444e-06, "loss": 16.9939, "step": 30893 }, { "epoch": 0.564717495018919, "grad_norm": 6.6899747346627, "learning_rate": 4.198597726212614e-06, "loss": 17.5942, "step": 30894 }, { "epoch": 0.5647357742153655, "grad_norm": 5.965722870049553, "learning_rate": 4.198305543059927e-06, "loss": 17.2948, "step": 30895 }, { "epoch": 0.5647540534118121, "grad_norm": 6.787750672875219, "learning_rate": 4.19801336271711e-06, "loss": 17.9422, "step": 30896 }, { "epoch": 0.5647723326082585, "grad_norm": 6.931163169096163, "learning_rate": 4.197721185185188e-06, "loss": 17.3685, "step": 30897 }, { "epoch": 0.564790611804705, "grad_norm": 10.17272307203107, "learning_rate": 4.197429010465182e-06, "loss": 17.9594, "step": 30898 }, { "epoch": 0.5648088910011516, "grad_norm": 5.901262815541974, "learning_rate": 4.197136838558119e-06, "loss": 17.2452, "step": 30899 }, { "epoch": 0.5648271701975981, "grad_norm": 5.944446814742256, "learning_rate": 4.1968446694650214e-06, "loss": 17.297, "step": 30900 }, { "epoch": 0.5648454493940447, "grad_norm": 6.510367039738354, "learning_rate": 4.196552503186911e-06, "loss": 17.4811, "step": 30901 }, { "epoch": 0.5648637285904912, "grad_norm": 9.027826505813822, "learning_rate": 4.196260339724818e-06, "loss": 18.0448, "step": 30902 }, { "epoch": 0.5648820077869376, "grad_norm": 5.528594135710404, "learning_rate": 4.19596817907976e-06, "loss": 17.187, "step": 30903 }, { "epoch": 0.5649002869833842, "grad_norm": 6.862554686826502, "learning_rate": 4.195676021252766e-06, "loss": 17.8898, "step": 30904 }, { "epoch": 0.5649185661798307, "grad_norm": 7.030596559221329, "learning_rate": 4.195383866244857e-06, "loss": 17.5959, "step": 30905 }, { "epoch": 0.5649368453762773, "grad_norm": 5.892377368911421, "learning_rate": 4.195091714057057e-06, "loss": 17.449, "step": 30906 }, { "epoch": 0.5649551245727238, "grad_norm": 5.889515235561286, "learning_rate": 4.194799564690392e-06, "loss": 17.4563, "step": 30907 }, { "epoch": 0.5649734037691703, "grad_norm": 5.369524030549957, "learning_rate": 4.194507418145885e-06, "loss": 17.0017, "step": 30908 }, { "epoch": 0.5649916829656169, "grad_norm": 6.721199272911676, "learning_rate": 4.194215274424557e-06, "loss": 17.6228, "step": 30909 }, { "epoch": 0.5650099621620633, "grad_norm": 5.464351926678016, "learning_rate": 4.193923133527436e-06, "loss": 17.2892, "step": 30910 }, { "epoch": 0.5650282413585099, "grad_norm": 6.874650214166879, "learning_rate": 4.193630995455545e-06, "loss": 17.8366, "step": 30911 }, { "epoch": 0.5650465205549564, "grad_norm": 5.724891250781603, "learning_rate": 4.193338860209907e-06, "loss": 17.3398, "step": 30912 }, { "epoch": 0.5650647997514029, "grad_norm": 5.945458692014579, "learning_rate": 4.1930467277915465e-06, "loss": 17.307, "step": 30913 }, { "epoch": 0.5650830789478495, "grad_norm": 6.266545906706712, "learning_rate": 4.192754598201487e-06, "loss": 17.4377, "step": 30914 }, { "epoch": 0.565101358144296, "grad_norm": 6.536269631582382, "learning_rate": 4.192462471440751e-06, "loss": 17.3594, "step": 30915 }, { "epoch": 0.5651196373407426, "grad_norm": 6.2061725673005625, "learning_rate": 4.192170347510367e-06, "loss": 17.2505, "step": 30916 }, { "epoch": 0.565137916537189, "grad_norm": 6.284252634003124, "learning_rate": 4.191878226411353e-06, "loss": 17.5403, "step": 30917 }, { "epoch": 0.5651561957336355, "grad_norm": 5.6396851623042705, "learning_rate": 4.191586108144738e-06, "loss": 17.0961, "step": 30918 }, { "epoch": 0.5651744749300821, "grad_norm": 5.839289716209725, "learning_rate": 4.191293992711543e-06, "loss": 17.2649, "step": 30919 }, { "epoch": 0.5651927541265286, "grad_norm": 7.119329589114862, "learning_rate": 4.19100188011279e-06, "loss": 17.6934, "step": 30920 }, { "epoch": 0.5652110333229752, "grad_norm": 7.236084852055016, "learning_rate": 4.190709770349508e-06, "loss": 17.8306, "step": 30921 }, { "epoch": 0.5652293125194217, "grad_norm": 7.240715521992406, "learning_rate": 4.190417663422718e-06, "loss": 17.6424, "step": 30922 }, { "epoch": 0.5652475917158681, "grad_norm": 7.365921401043813, "learning_rate": 4.190125559333442e-06, "loss": 17.6909, "step": 30923 }, { "epoch": 0.5652658709123147, "grad_norm": 5.8617577573904605, "learning_rate": 4.189833458082707e-06, "loss": 17.1487, "step": 30924 }, { "epoch": 0.5652841501087612, "grad_norm": 5.9607595271089915, "learning_rate": 4.189541359671535e-06, "loss": 17.301, "step": 30925 }, { "epoch": 0.5653024293052077, "grad_norm": 5.289154614933739, "learning_rate": 4.18924926410095e-06, "loss": 17.1137, "step": 30926 }, { "epoch": 0.5653207085016543, "grad_norm": 5.565542067413639, "learning_rate": 4.188957171371976e-06, "loss": 17.1465, "step": 30927 }, { "epoch": 0.5653389876981008, "grad_norm": 5.800910965229874, "learning_rate": 4.188665081485638e-06, "loss": 17.2765, "step": 30928 }, { "epoch": 0.5653572668945474, "grad_norm": 5.035833960602543, "learning_rate": 4.188372994442956e-06, "loss": 16.9306, "step": 30929 }, { "epoch": 0.5653755460909938, "grad_norm": 6.474896709392681, "learning_rate": 4.188080910244959e-06, "loss": 17.6641, "step": 30930 }, { "epoch": 0.5653938252874403, "grad_norm": 7.046231141360335, "learning_rate": 4.187788828892666e-06, "loss": 17.7813, "step": 30931 }, { "epoch": 0.5654121044838869, "grad_norm": 7.596468619841819, "learning_rate": 4.187496750387102e-06, "loss": 17.6866, "step": 30932 }, { "epoch": 0.5654303836803334, "grad_norm": 6.4030841056784125, "learning_rate": 4.187204674729294e-06, "loss": 17.3976, "step": 30933 }, { "epoch": 0.56544866287678, "grad_norm": 7.219461561549237, "learning_rate": 4.186912601920261e-06, "loss": 18.5793, "step": 30934 }, { "epoch": 0.5654669420732265, "grad_norm": 7.407045087953841, "learning_rate": 4.18662053196103e-06, "loss": 17.842, "step": 30935 }, { "epoch": 0.5654852212696729, "grad_norm": 8.06576047940109, "learning_rate": 4.186328464852623e-06, "loss": 18.1221, "step": 30936 }, { "epoch": 0.5655035004661195, "grad_norm": 4.949831091288924, "learning_rate": 4.186036400596063e-06, "loss": 17.0826, "step": 30937 }, { "epoch": 0.565521779662566, "grad_norm": 7.22369434987708, "learning_rate": 4.185744339192377e-06, "loss": 17.7565, "step": 30938 }, { "epoch": 0.5655400588590126, "grad_norm": 6.003419972404139, "learning_rate": 4.185452280642586e-06, "loss": 17.1872, "step": 30939 }, { "epoch": 0.5655583380554591, "grad_norm": 5.438827429839033, "learning_rate": 4.185160224947713e-06, "loss": 17.0875, "step": 30940 }, { "epoch": 0.5655766172519056, "grad_norm": 6.0390814242050075, "learning_rate": 4.1848681721087824e-06, "loss": 17.395, "step": 30941 }, { "epoch": 0.5655948964483521, "grad_norm": 6.0567805843754625, "learning_rate": 4.18457612212682e-06, "loss": 17.3612, "step": 30942 }, { "epoch": 0.5656131756447986, "grad_norm": 7.033495038738799, "learning_rate": 4.184284075002845e-06, "loss": 18.0941, "step": 30943 }, { "epoch": 0.5656314548412452, "grad_norm": 6.073745477142125, "learning_rate": 4.183992030737886e-06, "loss": 17.4213, "step": 30944 }, { "epoch": 0.5656497340376917, "grad_norm": 8.500213194477453, "learning_rate": 4.183699989332963e-06, "loss": 18.2076, "step": 30945 }, { "epoch": 0.5656680132341382, "grad_norm": 6.0903966329298695, "learning_rate": 4.183407950789099e-06, "loss": 17.7607, "step": 30946 }, { "epoch": 0.5656862924305848, "grad_norm": 5.516845552048422, "learning_rate": 4.1831159151073225e-06, "loss": 16.9316, "step": 30947 }, { "epoch": 0.5657045716270312, "grad_norm": 6.529948519131915, "learning_rate": 4.182823882288651e-06, "loss": 17.67, "step": 30948 }, { "epoch": 0.5657228508234778, "grad_norm": 8.816862114967474, "learning_rate": 4.182531852334113e-06, "loss": 17.6056, "step": 30949 }, { "epoch": 0.5657411300199243, "grad_norm": 5.579108412938425, "learning_rate": 4.182239825244729e-06, "loss": 17.2567, "step": 30950 }, { "epoch": 0.5657594092163708, "grad_norm": 6.478500441925687, "learning_rate": 4.181947801021523e-06, "loss": 17.2613, "step": 30951 }, { "epoch": 0.5657776884128174, "grad_norm": 5.930889188267906, "learning_rate": 4.1816557796655205e-06, "loss": 17.2372, "step": 30952 }, { "epoch": 0.5657959676092639, "grad_norm": 6.473471257167637, "learning_rate": 4.1813637611777436e-06, "loss": 17.5949, "step": 30953 }, { "epoch": 0.5658142468057105, "grad_norm": 5.283159241969701, "learning_rate": 4.181071745559213e-06, "loss": 17.1148, "step": 30954 }, { "epoch": 0.5658325260021569, "grad_norm": 5.381851488439245, "learning_rate": 4.1807797328109566e-06, "loss": 17.2255, "step": 30955 }, { "epoch": 0.5658508051986034, "grad_norm": 5.751218519425152, "learning_rate": 4.180487722933997e-06, "loss": 17.1566, "step": 30956 }, { "epoch": 0.56586908439505, "grad_norm": 8.299598038338486, "learning_rate": 4.180195715929355e-06, "loss": 18.1035, "step": 30957 }, { "epoch": 0.5658873635914965, "grad_norm": 7.293374634175002, "learning_rate": 4.1799037117980575e-06, "loss": 17.7543, "step": 30958 }, { "epoch": 0.5659056427879431, "grad_norm": 5.915074932117422, "learning_rate": 4.179611710541125e-06, "loss": 17.2272, "step": 30959 }, { "epoch": 0.5659239219843896, "grad_norm": 5.866288718103683, "learning_rate": 4.179319712159582e-06, "loss": 17.1072, "step": 30960 }, { "epoch": 0.565942201180836, "grad_norm": 7.0013317575819265, "learning_rate": 4.179027716654454e-06, "loss": 17.6091, "step": 30961 }, { "epoch": 0.5659604803772826, "grad_norm": 5.838174273571455, "learning_rate": 4.17873572402676e-06, "loss": 17.2704, "step": 30962 }, { "epoch": 0.5659787595737291, "grad_norm": 6.346464334713841, "learning_rate": 4.178443734277529e-06, "loss": 17.3482, "step": 30963 }, { "epoch": 0.5659970387701757, "grad_norm": 6.141268421729551, "learning_rate": 4.17815174740778e-06, "loss": 17.2994, "step": 30964 }, { "epoch": 0.5660153179666222, "grad_norm": 4.544884719794078, "learning_rate": 4.177859763418537e-06, "loss": 16.7179, "step": 30965 }, { "epoch": 0.5660335971630687, "grad_norm": 6.83440264893387, "learning_rate": 4.177567782310825e-06, "loss": 17.5791, "step": 30966 }, { "epoch": 0.5660518763595153, "grad_norm": 6.117158641958698, "learning_rate": 4.177275804085668e-06, "loss": 17.4431, "step": 30967 }, { "epoch": 0.5660701555559617, "grad_norm": 6.735280744475995, "learning_rate": 4.176983828744086e-06, "loss": 17.6035, "step": 30968 }, { "epoch": 0.5660884347524083, "grad_norm": 5.357586224933753, "learning_rate": 4.176691856287105e-06, "loss": 17.1364, "step": 30969 }, { "epoch": 0.5661067139488548, "grad_norm": 6.586151504119733, "learning_rate": 4.176399886715747e-06, "loss": 17.6543, "step": 30970 }, { "epoch": 0.5661249931453013, "grad_norm": 6.968478048228679, "learning_rate": 4.176107920031036e-06, "loss": 17.6062, "step": 30971 }, { "epoch": 0.5661432723417479, "grad_norm": 6.634937610213008, "learning_rate": 4.175815956233996e-06, "loss": 17.6197, "step": 30972 }, { "epoch": 0.5661615515381944, "grad_norm": 6.473248760430254, "learning_rate": 4.1755239953256494e-06, "loss": 17.5213, "step": 30973 }, { "epoch": 0.566179830734641, "grad_norm": 6.685315603089902, "learning_rate": 4.1752320373070184e-06, "loss": 17.458, "step": 30974 }, { "epoch": 0.5661981099310874, "grad_norm": 5.658186683556263, "learning_rate": 4.174940082179129e-06, "loss": 17.2365, "step": 30975 }, { "epoch": 0.5662163891275339, "grad_norm": 6.450658832080238, "learning_rate": 4.174648129943004e-06, "loss": 17.7045, "step": 30976 }, { "epoch": 0.5662346683239805, "grad_norm": 6.943941028377243, "learning_rate": 4.174356180599663e-06, "loss": 17.8575, "step": 30977 }, { "epoch": 0.566252947520427, "grad_norm": 26.98019530182142, "learning_rate": 4.174064234150132e-06, "loss": 17.4134, "step": 30978 }, { "epoch": 0.5662712267168736, "grad_norm": 7.034836741891997, "learning_rate": 4.173772290595435e-06, "loss": 17.9679, "step": 30979 }, { "epoch": 0.5662895059133201, "grad_norm": 8.191097710742012, "learning_rate": 4.173480349936595e-06, "loss": 18.2136, "step": 30980 }, { "epoch": 0.5663077851097665, "grad_norm": 5.8689583374340275, "learning_rate": 4.173188412174635e-06, "loss": 17.2546, "step": 30981 }, { "epoch": 0.5663260643062131, "grad_norm": 6.813210587842774, "learning_rate": 4.1728964773105775e-06, "loss": 17.4832, "step": 30982 }, { "epoch": 0.5663443435026596, "grad_norm": 7.736093205207369, "learning_rate": 4.172604545345446e-06, "loss": 18.3412, "step": 30983 }, { "epoch": 0.5663626226991062, "grad_norm": 6.39140306267954, "learning_rate": 4.1723126162802644e-06, "loss": 17.0873, "step": 30984 }, { "epoch": 0.5663809018955527, "grad_norm": 6.145268820554754, "learning_rate": 4.172020690116053e-06, "loss": 17.552, "step": 30985 }, { "epoch": 0.5663991810919992, "grad_norm": 6.560018695694608, "learning_rate": 4.171728766853839e-06, "loss": 17.4355, "step": 30986 }, { "epoch": 0.5664174602884458, "grad_norm": 7.904811103508003, "learning_rate": 4.171436846494644e-06, "loss": 18.326, "step": 30987 }, { "epoch": 0.5664357394848922, "grad_norm": 7.175244036888275, "learning_rate": 4.17114492903949e-06, "loss": 17.8705, "step": 30988 }, { "epoch": 0.5664540186813388, "grad_norm": 7.649752468822515, "learning_rate": 4.170853014489402e-06, "loss": 18.0962, "step": 30989 }, { "epoch": 0.5664722978777853, "grad_norm": 5.779714087828378, "learning_rate": 4.170561102845402e-06, "loss": 17.2352, "step": 30990 }, { "epoch": 0.5664905770742318, "grad_norm": 7.305676836286447, "learning_rate": 4.170269194108511e-06, "loss": 18.211, "step": 30991 }, { "epoch": 0.5665088562706784, "grad_norm": 5.302761799059295, "learning_rate": 4.1699772882797585e-06, "loss": 17.3319, "step": 30992 }, { "epoch": 0.5665271354671249, "grad_norm": 5.75413262257861, "learning_rate": 4.169685385360161e-06, "loss": 17.1807, "step": 30993 }, { "epoch": 0.5665454146635713, "grad_norm": 6.589721147112197, "learning_rate": 4.169393485350745e-06, "loss": 17.4418, "step": 30994 }, { "epoch": 0.5665636938600179, "grad_norm": 6.770062485759266, "learning_rate": 4.1691015882525326e-06, "loss": 17.8299, "step": 30995 }, { "epoch": 0.5665819730564644, "grad_norm": 6.224235608640548, "learning_rate": 4.168809694066546e-06, "loss": 17.5631, "step": 30996 }, { "epoch": 0.566600252252911, "grad_norm": 6.001738430336017, "learning_rate": 4.168517802793811e-06, "loss": 17.3385, "step": 30997 }, { "epoch": 0.5666185314493575, "grad_norm": 6.469654854582654, "learning_rate": 4.1682259144353495e-06, "loss": 17.7323, "step": 30998 }, { "epoch": 0.566636810645804, "grad_norm": 6.9530677075206695, "learning_rate": 4.167934028992182e-06, "loss": 17.7333, "step": 30999 }, { "epoch": 0.5666550898422505, "grad_norm": 5.598127246570995, "learning_rate": 4.1676421464653336e-06, "loss": 17.2009, "step": 31000 }, { "epoch": 0.566673369038697, "grad_norm": 6.6494314214003385, "learning_rate": 4.167350266855829e-06, "loss": 17.411, "step": 31001 }, { "epoch": 0.5666916482351436, "grad_norm": 7.258209771295803, "learning_rate": 4.167058390164688e-06, "loss": 17.8092, "step": 31002 }, { "epoch": 0.5667099274315901, "grad_norm": 6.22992280670329, "learning_rate": 4.166766516392936e-06, "loss": 17.4154, "step": 31003 }, { "epoch": 0.5667282066280366, "grad_norm": 7.2627190890212585, "learning_rate": 4.166474645541595e-06, "loss": 18.0938, "step": 31004 }, { "epoch": 0.5667464858244832, "grad_norm": 7.512224752544295, "learning_rate": 4.166182777611686e-06, "loss": 18.1722, "step": 31005 }, { "epoch": 0.5667647650209297, "grad_norm": 5.467276019673771, "learning_rate": 4.165890912604237e-06, "loss": 17.2218, "step": 31006 }, { "epoch": 0.5667830442173762, "grad_norm": 7.114693705969587, "learning_rate": 4.165599050520267e-06, "loss": 17.7728, "step": 31007 }, { "epoch": 0.5668013234138227, "grad_norm": 7.638111320045676, "learning_rate": 4.165307191360798e-06, "loss": 17.8168, "step": 31008 }, { "epoch": 0.5668196026102692, "grad_norm": 4.977719834276822, "learning_rate": 4.165015335126857e-06, "loss": 17.0969, "step": 31009 }, { "epoch": 0.5668378818067158, "grad_norm": 6.501115574787575, "learning_rate": 4.164723481819463e-06, "loss": 17.9729, "step": 31010 }, { "epoch": 0.5668561610031623, "grad_norm": 6.513186263313889, "learning_rate": 4.164431631439643e-06, "loss": 17.6492, "step": 31011 }, { "epoch": 0.5668744401996089, "grad_norm": 7.092709059088964, "learning_rate": 4.164139783988417e-06, "loss": 18.1263, "step": 31012 }, { "epoch": 0.5668927193960553, "grad_norm": 6.146192018139865, "learning_rate": 4.163847939466807e-06, "loss": 17.4577, "step": 31013 }, { "epoch": 0.5669109985925018, "grad_norm": 8.40216458484058, "learning_rate": 4.1635560978758386e-06, "loss": 18.188, "step": 31014 }, { "epoch": 0.5669292777889484, "grad_norm": 7.796945695363193, "learning_rate": 4.163264259216534e-06, "loss": 18.3444, "step": 31015 }, { "epoch": 0.5669475569853949, "grad_norm": 5.958861010319674, "learning_rate": 4.162972423489914e-06, "loss": 17.5599, "step": 31016 }, { "epoch": 0.5669658361818415, "grad_norm": 4.962019899302498, "learning_rate": 4.162680590697005e-06, "loss": 16.98, "step": 31017 }, { "epoch": 0.566984115378288, "grad_norm": 6.9431946965912505, "learning_rate": 4.162388760838826e-06, "loss": 17.8177, "step": 31018 }, { "epoch": 0.5670023945747344, "grad_norm": 6.762199744809678, "learning_rate": 4.162096933916402e-06, "loss": 17.5941, "step": 31019 }, { "epoch": 0.567020673771181, "grad_norm": 5.633138371895112, "learning_rate": 4.1618051099307566e-06, "loss": 17.1865, "step": 31020 }, { "epoch": 0.5670389529676275, "grad_norm": 8.086870534321589, "learning_rate": 4.161513288882912e-06, "loss": 18.3226, "step": 31021 }, { "epoch": 0.5670572321640741, "grad_norm": 5.497853250110096, "learning_rate": 4.1612214707738885e-06, "loss": 17.0957, "step": 31022 }, { "epoch": 0.5670755113605206, "grad_norm": 6.416294738008674, "learning_rate": 4.160929655604712e-06, "loss": 17.3727, "step": 31023 }, { "epoch": 0.5670937905569671, "grad_norm": 6.1726573578634865, "learning_rate": 4.1606378433764026e-06, "loss": 17.1318, "step": 31024 }, { "epoch": 0.5671120697534137, "grad_norm": 5.980112303536969, "learning_rate": 4.160346034089988e-06, "loss": 17.2951, "step": 31025 }, { "epoch": 0.5671303489498601, "grad_norm": 6.712476279871208, "learning_rate": 4.160054227746487e-06, "loss": 17.5695, "step": 31026 }, { "epoch": 0.5671486281463067, "grad_norm": 5.885898489616497, "learning_rate": 4.159762424346921e-06, "loss": 17.4223, "step": 31027 }, { "epoch": 0.5671669073427532, "grad_norm": 7.653965602363902, "learning_rate": 4.159470623892316e-06, "loss": 17.65, "step": 31028 }, { "epoch": 0.5671851865391997, "grad_norm": 6.078723221000788, "learning_rate": 4.159178826383693e-06, "loss": 17.2886, "step": 31029 }, { "epoch": 0.5672034657356463, "grad_norm": 7.041728200203385, "learning_rate": 4.158887031822075e-06, "loss": 17.8962, "step": 31030 }, { "epoch": 0.5672217449320928, "grad_norm": 8.264447166269317, "learning_rate": 4.158595240208487e-06, "loss": 18.3293, "step": 31031 }, { "epoch": 0.5672400241285394, "grad_norm": 6.374753030958476, "learning_rate": 4.158303451543948e-06, "loss": 17.182, "step": 31032 }, { "epoch": 0.5672583033249858, "grad_norm": 6.921157803749596, "learning_rate": 4.158011665829482e-06, "loss": 17.9587, "step": 31033 }, { "epoch": 0.5672765825214323, "grad_norm": 6.471335238340085, "learning_rate": 4.157719883066113e-06, "loss": 17.3098, "step": 31034 }, { "epoch": 0.5672948617178789, "grad_norm": 6.006132865508768, "learning_rate": 4.157428103254864e-06, "loss": 17.1466, "step": 31035 }, { "epoch": 0.5673131409143254, "grad_norm": 5.634419381752228, "learning_rate": 4.157136326396754e-06, "loss": 17.3832, "step": 31036 }, { "epoch": 0.567331420110772, "grad_norm": 6.866519884043903, "learning_rate": 4.156844552492808e-06, "loss": 17.8314, "step": 31037 }, { "epoch": 0.5673496993072185, "grad_norm": 5.766324189808867, "learning_rate": 4.156552781544051e-06, "loss": 17.3592, "step": 31038 }, { "epoch": 0.5673679785036649, "grad_norm": 7.349955596765868, "learning_rate": 4.1562610135515e-06, "loss": 18.0803, "step": 31039 }, { "epoch": 0.5673862577001115, "grad_norm": 6.357242174162678, "learning_rate": 4.155969248516184e-06, "loss": 17.7246, "step": 31040 }, { "epoch": 0.567404536896558, "grad_norm": 6.044169801389039, "learning_rate": 4.15567748643912e-06, "loss": 17.2984, "step": 31041 }, { "epoch": 0.5674228160930046, "grad_norm": 6.632351510002524, "learning_rate": 4.155385727321334e-06, "loss": 17.5255, "step": 31042 }, { "epoch": 0.5674410952894511, "grad_norm": 6.722678406471699, "learning_rate": 4.15509397116385e-06, "loss": 17.7081, "step": 31043 }, { "epoch": 0.5674593744858976, "grad_norm": 5.3473660501825115, "learning_rate": 4.154802217967685e-06, "loss": 17.2014, "step": 31044 }, { "epoch": 0.5674776536823442, "grad_norm": 6.586851769045463, "learning_rate": 4.154510467733867e-06, "loss": 17.3896, "step": 31045 }, { "epoch": 0.5674959328787906, "grad_norm": 5.843093521672273, "learning_rate": 4.154218720463416e-06, "loss": 17.3729, "step": 31046 }, { "epoch": 0.5675142120752372, "grad_norm": 5.63117675461405, "learning_rate": 4.153926976157354e-06, "loss": 17.2908, "step": 31047 }, { "epoch": 0.5675324912716837, "grad_norm": 6.477447575168344, "learning_rate": 4.153635234816706e-06, "loss": 17.4633, "step": 31048 }, { "epoch": 0.5675507704681302, "grad_norm": 6.368070934206493, "learning_rate": 4.1533434964424925e-06, "loss": 17.3831, "step": 31049 }, { "epoch": 0.5675690496645768, "grad_norm": 7.009983266837627, "learning_rate": 4.153051761035736e-06, "loss": 17.8138, "step": 31050 }, { "epoch": 0.5675873288610233, "grad_norm": 5.299192783266902, "learning_rate": 4.15276002859746e-06, "loss": 17.1112, "step": 31051 }, { "epoch": 0.5676056080574698, "grad_norm": 6.672482078795982, "learning_rate": 4.152468299128688e-06, "loss": 17.3996, "step": 31052 }, { "epoch": 0.5676238872539163, "grad_norm": 7.457845282387869, "learning_rate": 4.152176572630439e-06, "loss": 17.8251, "step": 31053 }, { "epoch": 0.5676421664503628, "grad_norm": 5.921257691549259, "learning_rate": 4.151884849103737e-06, "loss": 17.4713, "step": 31054 }, { "epoch": 0.5676604456468094, "grad_norm": 6.037264348368355, "learning_rate": 4.151593128549607e-06, "loss": 17.2623, "step": 31055 }, { "epoch": 0.5676787248432559, "grad_norm": 7.328387088711182, "learning_rate": 4.151301410969069e-06, "loss": 17.6728, "step": 31056 }, { "epoch": 0.5676970040397025, "grad_norm": 6.883224081962299, "learning_rate": 4.151009696363146e-06, "loss": 17.6315, "step": 31057 }, { "epoch": 0.567715283236149, "grad_norm": 6.842710620870904, "learning_rate": 4.150717984732859e-06, "loss": 17.6498, "step": 31058 }, { "epoch": 0.5677335624325954, "grad_norm": 6.507743946355492, "learning_rate": 4.1504262760792324e-06, "loss": 17.5252, "step": 31059 }, { "epoch": 0.567751841629042, "grad_norm": 6.668692419888233, "learning_rate": 4.1501345704032895e-06, "loss": 17.7037, "step": 31060 }, { "epoch": 0.5677701208254885, "grad_norm": 6.478317296857688, "learning_rate": 4.149842867706049e-06, "loss": 17.5892, "step": 31061 }, { "epoch": 0.567788400021935, "grad_norm": 6.524384759753895, "learning_rate": 4.149551167988537e-06, "loss": 17.6525, "step": 31062 }, { "epoch": 0.5678066792183816, "grad_norm": 6.914906592089648, "learning_rate": 4.149259471251774e-06, "loss": 17.8316, "step": 31063 }, { "epoch": 0.567824958414828, "grad_norm": 8.462276741937927, "learning_rate": 4.148967777496781e-06, "loss": 18.3364, "step": 31064 }, { "epoch": 0.5678432376112746, "grad_norm": 6.6282853462241995, "learning_rate": 4.148676086724585e-06, "loss": 17.7325, "step": 31065 }, { "epoch": 0.5678615168077211, "grad_norm": 6.305535427785866, "learning_rate": 4.148384398936205e-06, "loss": 17.4081, "step": 31066 }, { "epoch": 0.5678797960041676, "grad_norm": 5.926195847248775, "learning_rate": 4.148092714132661e-06, "loss": 17.3189, "step": 31067 }, { "epoch": 0.5678980752006142, "grad_norm": 6.541985930126915, "learning_rate": 4.14780103231498e-06, "loss": 17.8521, "step": 31068 }, { "epoch": 0.5679163543970607, "grad_norm": 5.808892709375653, "learning_rate": 4.1475093534841836e-06, "loss": 17.2897, "step": 31069 }, { "epoch": 0.5679346335935073, "grad_norm": 6.585326843621079, "learning_rate": 4.147217677641291e-06, "loss": 17.5529, "step": 31070 }, { "epoch": 0.5679529127899537, "grad_norm": 6.582765534924491, "learning_rate": 4.146926004787328e-06, "loss": 17.8171, "step": 31071 }, { "epoch": 0.5679711919864002, "grad_norm": 5.413870478649089, "learning_rate": 4.146634334923314e-06, "loss": 17.1101, "step": 31072 }, { "epoch": 0.5679894711828468, "grad_norm": 6.18198789069606, "learning_rate": 4.146342668050273e-06, "loss": 17.6475, "step": 31073 }, { "epoch": 0.5680077503792933, "grad_norm": 5.250932386822856, "learning_rate": 4.146051004169228e-06, "loss": 17.2727, "step": 31074 }, { "epoch": 0.5680260295757399, "grad_norm": 7.338547981433729, "learning_rate": 4.145759343281197e-06, "loss": 17.9598, "step": 31075 }, { "epoch": 0.5680443087721864, "grad_norm": 5.544515220010932, "learning_rate": 4.145467685387209e-06, "loss": 17.1805, "step": 31076 }, { "epoch": 0.5680625879686328, "grad_norm": 6.769598983455344, "learning_rate": 4.145176030488281e-06, "loss": 17.5709, "step": 31077 }, { "epoch": 0.5680808671650794, "grad_norm": 7.56273026576182, "learning_rate": 4.144884378585436e-06, "loss": 17.7828, "step": 31078 }, { "epoch": 0.5680991463615259, "grad_norm": 5.365268629228964, "learning_rate": 4.1445927296796985e-06, "loss": 17.0011, "step": 31079 }, { "epoch": 0.5681174255579725, "grad_norm": 6.274893791047241, "learning_rate": 4.14430108377209e-06, "loss": 17.6999, "step": 31080 }, { "epoch": 0.568135704754419, "grad_norm": 6.172506680628866, "learning_rate": 4.14400944086363e-06, "loss": 17.7048, "step": 31081 }, { "epoch": 0.5681539839508655, "grad_norm": 5.882905205890266, "learning_rate": 4.143717800955342e-06, "loss": 17.4247, "step": 31082 }, { "epoch": 0.5681722631473121, "grad_norm": 6.793554297886452, "learning_rate": 4.143426164048252e-06, "loss": 17.7128, "step": 31083 }, { "epoch": 0.5681905423437585, "grad_norm": 6.714136571844086, "learning_rate": 4.1431345301433755e-06, "loss": 18.0131, "step": 31084 }, { "epoch": 0.5682088215402051, "grad_norm": 8.204831097881371, "learning_rate": 4.142842899241741e-06, "loss": 18.1217, "step": 31085 }, { "epoch": 0.5682271007366516, "grad_norm": 5.581853770196106, "learning_rate": 4.142551271344365e-06, "loss": 17.286, "step": 31086 }, { "epoch": 0.5682453799330981, "grad_norm": 6.035563208133207, "learning_rate": 4.142259646452274e-06, "loss": 17.1884, "step": 31087 }, { "epoch": 0.5682636591295447, "grad_norm": 6.886647430464467, "learning_rate": 4.141968024566489e-06, "loss": 17.6322, "step": 31088 }, { "epoch": 0.5682819383259912, "grad_norm": 5.251320632689065, "learning_rate": 4.14167640568803e-06, "loss": 17.2345, "step": 31089 }, { "epoch": 0.5683002175224378, "grad_norm": 5.943275703005514, "learning_rate": 4.141384789817923e-06, "loss": 17.3122, "step": 31090 }, { "epoch": 0.5683184967188842, "grad_norm": 7.014129119244489, "learning_rate": 4.141093176957186e-06, "loss": 17.9565, "step": 31091 }, { "epoch": 0.5683367759153307, "grad_norm": 6.902427317290365, "learning_rate": 4.140801567106843e-06, "loss": 17.7719, "step": 31092 }, { "epoch": 0.5683550551117773, "grad_norm": 7.064412063605277, "learning_rate": 4.140509960267917e-06, "loss": 17.6337, "step": 31093 }, { "epoch": 0.5683733343082238, "grad_norm": 6.146824009766949, "learning_rate": 4.1402183564414285e-06, "loss": 17.207, "step": 31094 }, { "epoch": 0.5683916135046704, "grad_norm": 6.329112498398146, "learning_rate": 4.1399267556284e-06, "loss": 17.3449, "step": 31095 }, { "epoch": 0.5684098927011169, "grad_norm": 7.936767789031738, "learning_rate": 4.139635157829853e-06, "loss": 17.9039, "step": 31096 }, { "epoch": 0.5684281718975633, "grad_norm": 6.524648490670252, "learning_rate": 4.139343563046811e-06, "loss": 17.5245, "step": 31097 }, { "epoch": 0.5684464510940099, "grad_norm": 5.783621715031711, "learning_rate": 4.139051971280294e-06, "loss": 17.2845, "step": 31098 }, { "epoch": 0.5684647302904564, "grad_norm": 5.247614323066725, "learning_rate": 4.1387603825313266e-06, "loss": 17.174, "step": 31099 }, { "epoch": 0.568483009486903, "grad_norm": 5.235127573866958, "learning_rate": 4.138468796800928e-06, "loss": 16.8987, "step": 31100 }, { "epoch": 0.5685012886833495, "grad_norm": 5.412878664670369, "learning_rate": 4.138177214090122e-06, "loss": 17.2356, "step": 31101 }, { "epoch": 0.568519567879796, "grad_norm": 5.880106242535451, "learning_rate": 4.137885634399931e-06, "loss": 17.4565, "step": 31102 }, { "epoch": 0.5685378470762426, "grad_norm": 5.854252513071787, "learning_rate": 4.137594057731374e-06, "loss": 17.3766, "step": 31103 }, { "epoch": 0.568556126272689, "grad_norm": 7.037370746890396, "learning_rate": 4.137302484085477e-06, "loss": 17.8853, "step": 31104 }, { "epoch": 0.5685744054691356, "grad_norm": 6.475919144491232, "learning_rate": 4.137010913463259e-06, "loss": 17.7647, "step": 31105 }, { "epoch": 0.5685926846655821, "grad_norm": 7.147512189877462, "learning_rate": 4.136719345865742e-06, "loss": 17.6071, "step": 31106 }, { "epoch": 0.5686109638620286, "grad_norm": 6.168232463661791, "learning_rate": 4.136427781293951e-06, "loss": 17.4443, "step": 31107 }, { "epoch": 0.5686292430584752, "grad_norm": 6.601614566400777, "learning_rate": 4.1361362197489045e-06, "loss": 17.5943, "step": 31108 }, { "epoch": 0.5686475222549217, "grad_norm": 6.042937702478615, "learning_rate": 4.135844661231625e-06, "loss": 17.4783, "step": 31109 }, { "epoch": 0.5686658014513682, "grad_norm": 6.924483079665303, "learning_rate": 4.135553105743135e-06, "loss": 17.4033, "step": 31110 }, { "epoch": 0.5686840806478147, "grad_norm": 6.230338208909279, "learning_rate": 4.135261553284458e-06, "loss": 17.5345, "step": 31111 }, { "epoch": 0.5687023598442612, "grad_norm": 6.6149224780107065, "learning_rate": 4.134970003856612e-06, "loss": 17.4853, "step": 31112 }, { "epoch": 0.5687206390407078, "grad_norm": 6.769813074867622, "learning_rate": 4.134678457460622e-06, "loss": 17.6865, "step": 31113 }, { "epoch": 0.5687389182371543, "grad_norm": 5.935889726819909, "learning_rate": 4.13438691409751e-06, "loss": 17.4584, "step": 31114 }, { "epoch": 0.5687571974336009, "grad_norm": 5.814866757993557, "learning_rate": 4.1340953737682945e-06, "loss": 17.2166, "step": 31115 }, { "epoch": 0.5687754766300473, "grad_norm": 5.510993880142969, "learning_rate": 4.133803836474002e-06, "loss": 17.1409, "step": 31116 }, { "epoch": 0.5687937558264938, "grad_norm": 5.466841593862347, "learning_rate": 4.13351230221565e-06, "loss": 17.2295, "step": 31117 }, { "epoch": 0.5688120350229404, "grad_norm": 5.342703068589424, "learning_rate": 4.133220770994263e-06, "loss": 17.2367, "step": 31118 }, { "epoch": 0.5688303142193869, "grad_norm": 5.637107711888374, "learning_rate": 4.132929242810863e-06, "loss": 17.3331, "step": 31119 }, { "epoch": 0.5688485934158335, "grad_norm": 6.157936379540331, "learning_rate": 4.132637717666469e-06, "loss": 17.725, "step": 31120 }, { "epoch": 0.56886687261228, "grad_norm": 6.818823925134919, "learning_rate": 4.132346195562106e-06, "loss": 17.6178, "step": 31121 }, { "epoch": 0.5688851518087265, "grad_norm": 5.084966154540148, "learning_rate": 4.132054676498793e-06, "loss": 16.9985, "step": 31122 }, { "epoch": 0.568903431005173, "grad_norm": 6.031555361668368, "learning_rate": 4.131763160477552e-06, "loss": 17.1996, "step": 31123 }, { "epoch": 0.5689217102016195, "grad_norm": 6.146003747483424, "learning_rate": 4.1314716474994084e-06, "loss": 17.4535, "step": 31124 }, { "epoch": 0.5689399893980661, "grad_norm": 7.240125234544757, "learning_rate": 4.1311801375653816e-06, "loss": 17.8387, "step": 31125 }, { "epoch": 0.5689582685945126, "grad_norm": 8.527587659435056, "learning_rate": 4.130888630676491e-06, "loss": 18.2871, "step": 31126 }, { "epoch": 0.5689765477909591, "grad_norm": 5.785702810029422, "learning_rate": 4.130597126833761e-06, "loss": 17.1467, "step": 31127 }, { "epoch": 0.5689948269874057, "grad_norm": 6.627057239430447, "learning_rate": 4.130305626038214e-06, "loss": 17.5366, "step": 31128 }, { "epoch": 0.5690131061838521, "grad_norm": 5.655300411348619, "learning_rate": 4.130014128290867e-06, "loss": 17.1872, "step": 31129 }, { "epoch": 0.5690313853802986, "grad_norm": 5.861549434549706, "learning_rate": 4.1297226335927484e-06, "loss": 17.4096, "step": 31130 }, { "epoch": 0.5690496645767452, "grad_norm": 5.966642017815176, "learning_rate": 4.129431141944874e-06, "loss": 17.5213, "step": 31131 }, { "epoch": 0.5690679437731917, "grad_norm": 5.821163029998505, "learning_rate": 4.129139653348268e-06, "loss": 17.2518, "step": 31132 }, { "epoch": 0.5690862229696383, "grad_norm": 4.431195357633336, "learning_rate": 4.128848167803954e-06, "loss": 16.7799, "step": 31133 }, { "epoch": 0.5691045021660848, "grad_norm": 6.1499565193626635, "learning_rate": 4.128556685312949e-06, "loss": 17.2386, "step": 31134 }, { "epoch": 0.5691227813625312, "grad_norm": 6.52052663233909, "learning_rate": 4.12826520587628e-06, "loss": 17.8788, "step": 31135 }, { "epoch": 0.5691410605589778, "grad_norm": 6.010170248074092, "learning_rate": 4.1279737294949646e-06, "loss": 17.3277, "step": 31136 }, { "epoch": 0.5691593397554243, "grad_norm": 7.370148617530168, "learning_rate": 4.127682256170024e-06, "loss": 17.9872, "step": 31137 }, { "epoch": 0.5691776189518709, "grad_norm": 6.746658595840564, "learning_rate": 4.127390785902484e-06, "loss": 18.0198, "step": 31138 }, { "epoch": 0.5691958981483174, "grad_norm": 8.941370462103588, "learning_rate": 4.1270993186933635e-06, "loss": 18.7762, "step": 31139 }, { "epoch": 0.5692141773447639, "grad_norm": 5.469891992340403, "learning_rate": 4.126807854543682e-06, "loss": 17.1452, "step": 31140 }, { "epoch": 0.5692324565412105, "grad_norm": 7.4199466885277765, "learning_rate": 4.1265163934544635e-06, "loss": 17.8889, "step": 31141 }, { "epoch": 0.5692507357376569, "grad_norm": 5.672033128254248, "learning_rate": 4.126224935426732e-06, "loss": 17.2443, "step": 31142 }, { "epoch": 0.5692690149341035, "grad_norm": 5.0051666961490495, "learning_rate": 4.125933480461503e-06, "loss": 17.1046, "step": 31143 }, { "epoch": 0.56928729413055, "grad_norm": 8.563494481104962, "learning_rate": 4.125642028559803e-06, "loss": 18.7132, "step": 31144 }, { "epoch": 0.5693055733269965, "grad_norm": 6.6001938894994625, "learning_rate": 4.125350579722651e-06, "loss": 17.2548, "step": 31145 }, { "epoch": 0.5693238525234431, "grad_norm": 5.866282697520959, "learning_rate": 4.1250591339510685e-06, "loss": 17.18, "step": 31146 }, { "epoch": 0.5693421317198896, "grad_norm": 6.554749819730452, "learning_rate": 4.1247676912460805e-06, "loss": 17.5224, "step": 31147 }, { "epoch": 0.5693604109163362, "grad_norm": 6.9073478869402, "learning_rate": 4.124476251608704e-06, "loss": 17.7144, "step": 31148 }, { "epoch": 0.5693786901127826, "grad_norm": 8.572554604155036, "learning_rate": 4.124184815039963e-06, "loss": 17.8829, "step": 31149 }, { "epoch": 0.5693969693092291, "grad_norm": 5.659112892433862, "learning_rate": 4.123893381540878e-06, "loss": 17.106, "step": 31150 }, { "epoch": 0.5694152485056757, "grad_norm": 6.478019320204307, "learning_rate": 4.123601951112469e-06, "loss": 17.4026, "step": 31151 }, { "epoch": 0.5694335277021222, "grad_norm": 6.8242040782511015, "learning_rate": 4.123310523755761e-06, "loss": 17.5049, "step": 31152 }, { "epoch": 0.5694518068985688, "grad_norm": 7.248042153486105, "learning_rate": 4.123019099471775e-06, "loss": 17.8118, "step": 31153 }, { "epoch": 0.5694700860950153, "grad_norm": 5.706654138368308, "learning_rate": 4.1227276782615275e-06, "loss": 17.2219, "step": 31154 }, { "epoch": 0.5694883652914617, "grad_norm": 5.769512410693272, "learning_rate": 4.122436260126045e-06, "loss": 17.2012, "step": 31155 }, { "epoch": 0.5695066444879083, "grad_norm": 7.345772226508855, "learning_rate": 4.122144845066348e-06, "loss": 17.6391, "step": 31156 }, { "epoch": 0.5695249236843548, "grad_norm": 6.2622495810172225, "learning_rate": 4.121853433083455e-06, "loss": 17.4472, "step": 31157 }, { "epoch": 0.5695432028808014, "grad_norm": 7.4590230721165085, "learning_rate": 4.121562024178392e-06, "loss": 17.9268, "step": 31158 }, { "epoch": 0.5695614820772479, "grad_norm": 6.204572540621785, "learning_rate": 4.1212706183521766e-06, "loss": 17.3355, "step": 31159 }, { "epoch": 0.5695797612736944, "grad_norm": 5.925506737058046, "learning_rate": 4.120979215605831e-06, "loss": 17.4636, "step": 31160 }, { "epoch": 0.569598040470141, "grad_norm": 6.0272243338178955, "learning_rate": 4.120687815940378e-06, "loss": 17.2102, "step": 31161 }, { "epoch": 0.5696163196665874, "grad_norm": 6.441212577933786, "learning_rate": 4.1203964193568366e-06, "loss": 17.5916, "step": 31162 }, { "epoch": 0.569634598863034, "grad_norm": 4.648183793276607, "learning_rate": 4.1201050258562305e-06, "loss": 16.7727, "step": 31163 }, { "epoch": 0.5696528780594805, "grad_norm": 6.326813560043248, "learning_rate": 4.11981363543958e-06, "loss": 17.1283, "step": 31164 }, { "epoch": 0.569671157255927, "grad_norm": 7.405865282293076, "learning_rate": 4.119522248107905e-06, "loss": 17.3502, "step": 31165 }, { "epoch": 0.5696894364523736, "grad_norm": 6.390641745415424, "learning_rate": 4.1192308638622305e-06, "loss": 17.2404, "step": 31166 }, { "epoch": 0.5697077156488201, "grad_norm": 4.81180560168982, "learning_rate": 4.118939482703575e-06, "loss": 16.8975, "step": 31167 }, { "epoch": 0.5697259948452666, "grad_norm": 5.5870913343092665, "learning_rate": 4.118648104632957e-06, "loss": 17.1759, "step": 31168 }, { "epoch": 0.5697442740417131, "grad_norm": 5.714089401245616, "learning_rate": 4.118356729651403e-06, "loss": 17.0428, "step": 31169 }, { "epoch": 0.5697625532381596, "grad_norm": 5.761397006112798, "learning_rate": 4.118065357759934e-06, "loss": 17.0437, "step": 31170 }, { "epoch": 0.5697808324346062, "grad_norm": 6.7489111903582, "learning_rate": 4.117773988959566e-06, "loss": 17.3524, "step": 31171 }, { "epoch": 0.5697991116310527, "grad_norm": 7.317265381758839, "learning_rate": 4.117482623251327e-06, "loss": 17.819, "step": 31172 }, { "epoch": 0.5698173908274993, "grad_norm": 4.788975069715365, "learning_rate": 4.117191260636232e-06, "loss": 16.8286, "step": 31173 }, { "epoch": 0.5698356700239458, "grad_norm": 6.977248075811477, "learning_rate": 4.116899901115306e-06, "loss": 18.1031, "step": 31174 }, { "epoch": 0.5698539492203922, "grad_norm": 7.598797226388627, "learning_rate": 4.11660854468957e-06, "loss": 17.9999, "step": 31175 }, { "epoch": 0.5698722284168388, "grad_norm": 7.117759130538788, "learning_rate": 4.116317191360043e-06, "loss": 17.7337, "step": 31176 }, { "epoch": 0.5698905076132853, "grad_norm": 7.140228450224506, "learning_rate": 4.116025841127749e-06, "loss": 17.9414, "step": 31177 }, { "epoch": 0.5699087868097319, "grad_norm": 6.4918651905558935, "learning_rate": 4.115734493993707e-06, "loss": 17.6226, "step": 31178 }, { "epoch": 0.5699270660061784, "grad_norm": 6.19724977578245, "learning_rate": 4.115443149958937e-06, "loss": 17.4914, "step": 31179 }, { "epoch": 0.5699453452026249, "grad_norm": 6.42357237840289, "learning_rate": 4.115151809024465e-06, "loss": 17.4102, "step": 31180 }, { "epoch": 0.5699636243990714, "grad_norm": 6.833524823734772, "learning_rate": 4.114860471191308e-06, "loss": 17.5496, "step": 31181 }, { "epoch": 0.5699819035955179, "grad_norm": 4.5505145191007585, "learning_rate": 4.114569136460488e-06, "loss": 16.7555, "step": 31182 }, { "epoch": 0.5700001827919645, "grad_norm": 6.548041243503217, "learning_rate": 4.1142778048330265e-06, "loss": 17.7182, "step": 31183 }, { "epoch": 0.570018461988411, "grad_norm": 6.326099414730844, "learning_rate": 4.113986476309946e-06, "loss": 17.0115, "step": 31184 }, { "epoch": 0.5700367411848575, "grad_norm": 6.166825053724418, "learning_rate": 4.113695150892263e-06, "loss": 17.2888, "step": 31185 }, { "epoch": 0.5700550203813041, "grad_norm": 6.947215420173629, "learning_rate": 4.113403828581003e-06, "loss": 17.6833, "step": 31186 }, { "epoch": 0.5700732995777505, "grad_norm": 7.178474654264418, "learning_rate": 4.113112509377187e-06, "loss": 17.6476, "step": 31187 }, { "epoch": 0.5700915787741971, "grad_norm": 6.833490768067574, "learning_rate": 4.112821193281832e-06, "loss": 17.571, "step": 31188 }, { "epoch": 0.5701098579706436, "grad_norm": 7.456831932428657, "learning_rate": 4.112529880295964e-06, "loss": 17.8363, "step": 31189 }, { "epoch": 0.5701281371670901, "grad_norm": 5.901401689728571, "learning_rate": 4.112238570420601e-06, "loss": 17.26, "step": 31190 }, { "epoch": 0.5701464163635367, "grad_norm": 5.876025018217446, "learning_rate": 4.1119472636567635e-06, "loss": 17.4074, "step": 31191 }, { "epoch": 0.5701646955599832, "grad_norm": 7.130220395674119, "learning_rate": 4.111655960005476e-06, "loss": 17.3714, "step": 31192 }, { "epoch": 0.5701829747564298, "grad_norm": 6.83155025155654, "learning_rate": 4.111364659467755e-06, "loss": 17.66, "step": 31193 }, { "epoch": 0.5702012539528762, "grad_norm": 7.657741393349927, "learning_rate": 4.111073362044626e-06, "loss": 17.988, "step": 31194 }, { "epoch": 0.5702195331493227, "grad_norm": 5.059714180368502, "learning_rate": 4.110782067737107e-06, "loss": 16.7458, "step": 31195 }, { "epoch": 0.5702378123457693, "grad_norm": 5.505975934670869, "learning_rate": 4.110490776546218e-06, "loss": 17.119, "step": 31196 }, { "epoch": 0.5702560915422158, "grad_norm": 6.026963211203521, "learning_rate": 4.110199488472985e-06, "loss": 17.7132, "step": 31197 }, { "epoch": 0.5702743707386623, "grad_norm": 8.369600755681637, "learning_rate": 4.1099082035184244e-06, "loss": 18.1352, "step": 31198 }, { "epoch": 0.5702926499351089, "grad_norm": 6.526827089999135, "learning_rate": 4.109616921683556e-06, "loss": 17.6135, "step": 31199 }, { "epoch": 0.5703109291315553, "grad_norm": 6.570548239964706, "learning_rate": 4.109325642969405e-06, "loss": 17.5347, "step": 31200 }, { "epoch": 0.5703292083280019, "grad_norm": 7.602508584244967, "learning_rate": 4.1090343673769904e-06, "loss": 17.7137, "step": 31201 }, { "epoch": 0.5703474875244484, "grad_norm": 5.885445209482176, "learning_rate": 4.1087430949073315e-06, "loss": 17.4718, "step": 31202 }, { "epoch": 0.5703657667208949, "grad_norm": 6.25066641934551, "learning_rate": 4.108451825561452e-06, "loss": 17.1524, "step": 31203 }, { "epoch": 0.5703840459173415, "grad_norm": 6.468613781641003, "learning_rate": 4.108160559340371e-06, "loss": 17.4255, "step": 31204 }, { "epoch": 0.570402325113788, "grad_norm": 6.408178666725085, "learning_rate": 4.10786929624511e-06, "loss": 17.3557, "step": 31205 }, { "epoch": 0.5704206043102346, "grad_norm": 6.383988642631391, "learning_rate": 4.107578036276691e-06, "loss": 17.3933, "step": 31206 }, { "epoch": 0.570438883506681, "grad_norm": 5.545622117907267, "learning_rate": 4.107286779436133e-06, "loss": 17.2036, "step": 31207 }, { "epoch": 0.5704571627031275, "grad_norm": 7.782415691770749, "learning_rate": 4.106995525724455e-06, "loss": 17.9413, "step": 31208 }, { "epoch": 0.5704754418995741, "grad_norm": 7.0764762372297065, "learning_rate": 4.106704275142682e-06, "loss": 17.7223, "step": 31209 }, { "epoch": 0.5704937210960206, "grad_norm": 7.571370145313793, "learning_rate": 4.106413027691832e-06, "loss": 17.9947, "step": 31210 }, { "epoch": 0.5705120002924672, "grad_norm": 5.775135727328549, "learning_rate": 4.106121783372929e-06, "loss": 17.2045, "step": 31211 }, { "epoch": 0.5705302794889137, "grad_norm": 6.646296101287503, "learning_rate": 4.105830542186992e-06, "loss": 17.4952, "step": 31212 }, { "epoch": 0.5705485586853601, "grad_norm": 4.8603365563639676, "learning_rate": 4.1055393041350376e-06, "loss": 16.8389, "step": 31213 }, { "epoch": 0.5705668378818067, "grad_norm": 5.5627617135419465, "learning_rate": 4.105248069218092e-06, "loss": 17.2987, "step": 31214 }, { "epoch": 0.5705851170782532, "grad_norm": 6.611188219967055, "learning_rate": 4.104956837437176e-06, "loss": 17.6643, "step": 31215 }, { "epoch": 0.5706033962746998, "grad_norm": 5.304567137408914, "learning_rate": 4.104665608793306e-06, "loss": 17.2111, "step": 31216 }, { "epoch": 0.5706216754711463, "grad_norm": 5.563060687270274, "learning_rate": 4.1043743832875075e-06, "loss": 17.1396, "step": 31217 }, { "epoch": 0.5706399546675928, "grad_norm": 6.904662926280453, "learning_rate": 4.104083160920798e-06, "loss": 17.7058, "step": 31218 }, { "epoch": 0.5706582338640394, "grad_norm": 9.003344897587919, "learning_rate": 4.1037919416941984e-06, "loss": 18.3061, "step": 31219 }, { "epoch": 0.5706765130604858, "grad_norm": 7.863679988107461, "learning_rate": 4.103500725608732e-06, "loss": 18.3106, "step": 31220 }, { "epoch": 0.5706947922569324, "grad_norm": 6.051173213690177, "learning_rate": 4.103209512665419e-06, "loss": 17.1446, "step": 31221 }, { "epoch": 0.5707130714533789, "grad_norm": 5.702552452166603, "learning_rate": 4.102918302865275e-06, "loss": 17.2778, "step": 31222 }, { "epoch": 0.5707313506498254, "grad_norm": 6.936770447522673, "learning_rate": 4.102627096209327e-06, "loss": 17.6221, "step": 31223 }, { "epoch": 0.570749629846272, "grad_norm": 6.308746171365487, "learning_rate": 4.102335892698591e-06, "loss": 17.0157, "step": 31224 }, { "epoch": 0.5707679090427185, "grad_norm": 6.787688309949032, "learning_rate": 4.1020446923340924e-06, "loss": 17.6288, "step": 31225 }, { "epoch": 0.570786188239165, "grad_norm": 6.024315307204039, "learning_rate": 4.101753495116849e-06, "loss": 17.2474, "step": 31226 }, { "epoch": 0.5708044674356115, "grad_norm": 6.369158034678301, "learning_rate": 4.10146230104788e-06, "loss": 17.4992, "step": 31227 }, { "epoch": 0.570822746632058, "grad_norm": 7.358720795519951, "learning_rate": 4.101171110128209e-06, "loss": 17.4827, "step": 31228 }, { "epoch": 0.5708410258285046, "grad_norm": 6.630430206928927, "learning_rate": 4.1008799223588555e-06, "loss": 17.4021, "step": 31229 }, { "epoch": 0.5708593050249511, "grad_norm": 8.751005559702582, "learning_rate": 4.100588737740837e-06, "loss": 18.1465, "step": 31230 }, { "epoch": 0.5708775842213977, "grad_norm": 6.268404092538329, "learning_rate": 4.100297556275181e-06, "loss": 17.3418, "step": 31231 }, { "epoch": 0.5708958634178442, "grad_norm": 6.778286178621426, "learning_rate": 4.100006377962901e-06, "loss": 17.6041, "step": 31232 }, { "epoch": 0.5709141426142906, "grad_norm": 6.0113604365155116, "learning_rate": 4.099715202805022e-06, "loss": 17.339, "step": 31233 }, { "epoch": 0.5709324218107372, "grad_norm": 6.345730526338917, "learning_rate": 4.099424030802563e-06, "loss": 17.5482, "step": 31234 }, { "epoch": 0.5709507010071837, "grad_norm": 5.852205783113436, "learning_rate": 4.099132861956544e-06, "loss": 17.2684, "step": 31235 }, { "epoch": 0.5709689802036303, "grad_norm": 7.996902643088792, "learning_rate": 4.098841696267987e-06, "loss": 18.2152, "step": 31236 }, { "epoch": 0.5709872594000768, "grad_norm": 7.094996798090669, "learning_rate": 4.098550533737911e-06, "loss": 17.6603, "step": 31237 }, { "epoch": 0.5710055385965233, "grad_norm": 6.520031452884179, "learning_rate": 4.098259374367338e-06, "loss": 17.4567, "step": 31238 }, { "epoch": 0.5710238177929698, "grad_norm": 7.17213410165638, "learning_rate": 4.097968218157286e-06, "loss": 17.7672, "step": 31239 }, { "epoch": 0.5710420969894163, "grad_norm": 5.84852186264101, "learning_rate": 4.0976770651087775e-06, "loss": 17.1586, "step": 31240 }, { "epoch": 0.5710603761858629, "grad_norm": 6.784132385597727, "learning_rate": 4.097385915222834e-06, "loss": 17.6389, "step": 31241 }, { "epoch": 0.5710786553823094, "grad_norm": 5.236320490406392, "learning_rate": 4.097094768500473e-06, "loss": 17.0813, "step": 31242 }, { "epoch": 0.5710969345787559, "grad_norm": 4.818899872463214, "learning_rate": 4.096803624942718e-06, "loss": 16.9683, "step": 31243 }, { "epoch": 0.5711152137752025, "grad_norm": 5.250587545968555, "learning_rate": 4.096512484550586e-06, "loss": 16.8576, "step": 31244 }, { "epoch": 0.571133492971649, "grad_norm": 5.371577744862262, "learning_rate": 4.0962213473251e-06, "loss": 16.9669, "step": 31245 }, { "epoch": 0.5711517721680955, "grad_norm": 5.4536813016021455, "learning_rate": 4.0959302132672815e-06, "loss": 17.2322, "step": 31246 }, { "epoch": 0.571170051364542, "grad_norm": 6.028132317804362, "learning_rate": 4.095639082378146e-06, "loss": 17.2289, "step": 31247 }, { "epoch": 0.5711883305609885, "grad_norm": 5.81203785997988, "learning_rate": 4.09534795465872e-06, "loss": 17.3445, "step": 31248 }, { "epoch": 0.5712066097574351, "grad_norm": 6.446550201528471, "learning_rate": 4.0950568301100206e-06, "loss": 17.7139, "step": 31249 }, { "epoch": 0.5712248889538816, "grad_norm": 6.219735488810287, "learning_rate": 4.094765708733066e-06, "loss": 17.3476, "step": 31250 }, { "epoch": 0.5712431681503282, "grad_norm": 7.591699282229272, "learning_rate": 4.094474590528882e-06, "loss": 17.7503, "step": 31251 }, { "epoch": 0.5712614473467746, "grad_norm": 7.533436939375638, "learning_rate": 4.094183475498486e-06, "loss": 18.1913, "step": 31252 }, { "epoch": 0.5712797265432211, "grad_norm": 7.318315254324318, "learning_rate": 4.093892363642895e-06, "loss": 17.5627, "step": 31253 }, { "epoch": 0.5712980057396677, "grad_norm": 6.503411777208555, "learning_rate": 4.093601254963135e-06, "loss": 17.4361, "step": 31254 }, { "epoch": 0.5713162849361142, "grad_norm": 9.038344413573755, "learning_rate": 4.093310149460222e-06, "loss": 18.471, "step": 31255 }, { "epoch": 0.5713345641325608, "grad_norm": 5.791474347583022, "learning_rate": 4.093019047135181e-06, "loss": 17.1641, "step": 31256 }, { "epoch": 0.5713528433290073, "grad_norm": 7.178076383736087, "learning_rate": 4.092727947989029e-06, "loss": 17.8125, "step": 31257 }, { "epoch": 0.5713711225254537, "grad_norm": 5.638745527139376, "learning_rate": 4.0924368520227854e-06, "loss": 17.3216, "step": 31258 }, { "epoch": 0.5713894017219003, "grad_norm": 7.7774783785805885, "learning_rate": 4.092145759237472e-06, "loss": 18.013, "step": 31259 }, { "epoch": 0.5714076809183468, "grad_norm": 4.911600992798569, "learning_rate": 4.091854669634111e-06, "loss": 16.9834, "step": 31260 }, { "epoch": 0.5714259601147934, "grad_norm": 6.639968500034018, "learning_rate": 4.091563583213718e-06, "loss": 17.3869, "step": 31261 }, { "epoch": 0.5714442393112399, "grad_norm": 6.397739595751843, "learning_rate": 4.091272499977318e-06, "loss": 17.4377, "step": 31262 }, { "epoch": 0.5714625185076864, "grad_norm": 6.118598019487283, "learning_rate": 4.090981419925928e-06, "loss": 17.5397, "step": 31263 }, { "epoch": 0.571480797704133, "grad_norm": 6.077132493588821, "learning_rate": 4.090690343060568e-06, "loss": 17.3322, "step": 31264 }, { "epoch": 0.5714990769005794, "grad_norm": 6.453388307273616, "learning_rate": 4.090399269382262e-06, "loss": 17.4645, "step": 31265 }, { "epoch": 0.571517356097026, "grad_norm": 5.87863306237508, "learning_rate": 4.090108198892027e-06, "loss": 17.2048, "step": 31266 }, { "epoch": 0.5715356352934725, "grad_norm": 5.241236019562412, "learning_rate": 4.0898171315908815e-06, "loss": 17.0245, "step": 31267 }, { "epoch": 0.571553914489919, "grad_norm": 7.721167710273114, "learning_rate": 4.08952606747985e-06, "loss": 18.3058, "step": 31268 }, { "epoch": 0.5715721936863656, "grad_norm": 7.844318835292387, "learning_rate": 4.089235006559951e-06, "loss": 18.0013, "step": 31269 }, { "epoch": 0.5715904728828121, "grad_norm": 6.3986113091225105, "learning_rate": 4.088943948832203e-06, "loss": 17.6239, "step": 31270 }, { "epoch": 0.5716087520792585, "grad_norm": 6.588392072299416, "learning_rate": 4.088652894297629e-06, "loss": 17.3476, "step": 31271 }, { "epoch": 0.5716270312757051, "grad_norm": 6.788577612685278, "learning_rate": 4.088361842957245e-06, "loss": 17.7992, "step": 31272 }, { "epoch": 0.5716453104721516, "grad_norm": 5.835021102097434, "learning_rate": 4.0880707948120755e-06, "loss": 17.4633, "step": 31273 }, { "epoch": 0.5716635896685982, "grad_norm": 6.142447652760215, "learning_rate": 4.087779749863139e-06, "loss": 17.5476, "step": 31274 }, { "epoch": 0.5716818688650447, "grad_norm": 7.199128002034627, "learning_rate": 4.087488708111453e-06, "loss": 17.7939, "step": 31275 }, { "epoch": 0.5717001480614912, "grad_norm": 7.067116770568462, "learning_rate": 4.087197669558043e-06, "loss": 17.7249, "step": 31276 }, { "epoch": 0.5717184272579378, "grad_norm": 6.682798177186595, "learning_rate": 4.086906634203924e-06, "loss": 17.7577, "step": 31277 }, { "epoch": 0.5717367064543842, "grad_norm": 6.597864276765598, "learning_rate": 4.086615602050118e-06, "loss": 17.7702, "step": 31278 }, { "epoch": 0.5717549856508308, "grad_norm": 8.649541435657815, "learning_rate": 4.086324573097646e-06, "loss": 18.6818, "step": 31279 }, { "epoch": 0.5717732648472773, "grad_norm": 6.034181512880285, "learning_rate": 4.086033547347528e-06, "loss": 17.376, "step": 31280 }, { "epoch": 0.5717915440437238, "grad_norm": 5.931466884122396, "learning_rate": 4.08574252480078e-06, "loss": 17.4247, "step": 31281 }, { "epoch": 0.5718098232401704, "grad_norm": 6.556484032854327, "learning_rate": 4.085451505458427e-06, "loss": 17.5949, "step": 31282 }, { "epoch": 0.5718281024366169, "grad_norm": 8.027605307508225, "learning_rate": 4.085160489321488e-06, "loss": 17.9029, "step": 31283 }, { "epoch": 0.5718463816330634, "grad_norm": 4.830097188385035, "learning_rate": 4.08486947639098e-06, "loss": 16.6719, "step": 31284 }, { "epoch": 0.5718646608295099, "grad_norm": 5.986941940475199, "learning_rate": 4.084578466667926e-06, "loss": 17.1961, "step": 31285 }, { "epoch": 0.5718829400259564, "grad_norm": 7.362873598254324, "learning_rate": 4.084287460153344e-06, "loss": 18.1964, "step": 31286 }, { "epoch": 0.571901219222403, "grad_norm": 6.918242859577988, "learning_rate": 4.083996456848257e-06, "loss": 17.748, "step": 31287 }, { "epoch": 0.5719194984188495, "grad_norm": 5.227772957339558, "learning_rate": 4.083705456753683e-06, "loss": 17.2652, "step": 31288 }, { "epoch": 0.5719377776152961, "grad_norm": 6.012138306427282, "learning_rate": 4.0834144598706395e-06, "loss": 17.2296, "step": 31289 }, { "epoch": 0.5719560568117426, "grad_norm": 6.864961045117648, "learning_rate": 4.0831234662001515e-06, "loss": 17.6084, "step": 31290 }, { "epoch": 0.571974336008189, "grad_norm": 5.773698595524258, "learning_rate": 4.082832475743234e-06, "loss": 17.4353, "step": 31291 }, { "epoch": 0.5719926152046356, "grad_norm": 8.945329571600872, "learning_rate": 4.08254148850091e-06, "loss": 17.8684, "step": 31292 }, { "epoch": 0.5720108944010821, "grad_norm": 5.375357257796848, "learning_rate": 4.0822505044741986e-06, "loss": 17.0394, "step": 31293 }, { "epoch": 0.5720291735975287, "grad_norm": 5.02462603576589, "learning_rate": 4.081959523664121e-06, "loss": 16.9193, "step": 31294 }, { "epoch": 0.5720474527939752, "grad_norm": 7.169259357714521, "learning_rate": 4.081668546071692e-06, "loss": 17.9029, "step": 31295 }, { "epoch": 0.5720657319904217, "grad_norm": 6.252173188886503, "learning_rate": 4.081377571697938e-06, "loss": 17.724, "step": 31296 }, { "epoch": 0.5720840111868682, "grad_norm": 5.506344641813584, "learning_rate": 4.081086600543875e-06, "loss": 17.2331, "step": 31297 }, { "epoch": 0.5721022903833147, "grad_norm": 7.6487946216064655, "learning_rate": 4.080795632610523e-06, "loss": 17.7368, "step": 31298 }, { "epoch": 0.5721205695797613, "grad_norm": 6.441565805985271, "learning_rate": 4.080504667898904e-06, "loss": 17.4863, "step": 31299 }, { "epoch": 0.5721388487762078, "grad_norm": 5.803174627353685, "learning_rate": 4.080213706410034e-06, "loss": 16.9164, "step": 31300 }, { "epoch": 0.5721571279726543, "grad_norm": 5.248449515900732, "learning_rate": 4.079922748144937e-06, "loss": 17.0418, "step": 31301 }, { "epoch": 0.5721754071691009, "grad_norm": 7.221191698523874, "learning_rate": 4.079631793104632e-06, "loss": 17.7541, "step": 31302 }, { "epoch": 0.5721936863655473, "grad_norm": 6.969654595155065, "learning_rate": 4.079340841290135e-06, "loss": 17.6298, "step": 31303 }, { "epoch": 0.5722119655619939, "grad_norm": 5.463726286946269, "learning_rate": 4.079049892702471e-06, "loss": 17.1388, "step": 31304 }, { "epoch": 0.5722302447584404, "grad_norm": 7.005932894651601, "learning_rate": 4.078758947342656e-06, "loss": 17.8292, "step": 31305 }, { "epoch": 0.5722485239548869, "grad_norm": 5.337287413217089, "learning_rate": 4.07846800521171e-06, "loss": 17.1333, "step": 31306 }, { "epoch": 0.5722668031513335, "grad_norm": 6.053830019378914, "learning_rate": 4.078177066310656e-06, "loss": 17.2211, "step": 31307 }, { "epoch": 0.57228508234778, "grad_norm": 5.758102975171525, "learning_rate": 4.077886130640509e-06, "loss": 17.0849, "step": 31308 }, { "epoch": 0.5723033615442266, "grad_norm": 6.589957505318808, "learning_rate": 4.077595198202292e-06, "loss": 17.2672, "step": 31309 }, { "epoch": 0.572321640740673, "grad_norm": 6.272097948649556, "learning_rate": 4.077304268997024e-06, "loss": 17.7316, "step": 31310 }, { "epoch": 0.5723399199371195, "grad_norm": 6.605844353058532, "learning_rate": 4.077013343025725e-06, "loss": 17.0278, "step": 31311 }, { "epoch": 0.5723581991335661, "grad_norm": 6.478634347827677, "learning_rate": 4.076722420289413e-06, "loss": 17.4216, "step": 31312 }, { "epoch": 0.5723764783300126, "grad_norm": 6.146043450082062, "learning_rate": 4.076431500789109e-06, "loss": 17.4861, "step": 31313 }, { "epoch": 0.5723947575264592, "grad_norm": 6.390418841325007, "learning_rate": 4.076140584525834e-06, "loss": 17.3967, "step": 31314 }, { "epoch": 0.5724130367229057, "grad_norm": 5.73291125621673, "learning_rate": 4.0758496715006035e-06, "loss": 17.4433, "step": 31315 }, { "epoch": 0.5724313159193521, "grad_norm": 7.371588030702962, "learning_rate": 4.075558761714442e-06, "loss": 17.5645, "step": 31316 }, { "epoch": 0.5724495951157987, "grad_norm": 6.7674708573283935, "learning_rate": 4.075267855168365e-06, "loss": 17.4962, "step": 31317 }, { "epoch": 0.5724678743122452, "grad_norm": 7.545564557463011, "learning_rate": 4.074976951863395e-06, "loss": 18.1784, "step": 31318 }, { "epoch": 0.5724861535086918, "grad_norm": 5.613892574314319, "learning_rate": 4.074686051800551e-06, "loss": 17.2089, "step": 31319 }, { "epoch": 0.5725044327051383, "grad_norm": 7.28676742508984, "learning_rate": 4.07439515498085e-06, "loss": 18.4327, "step": 31320 }, { "epoch": 0.5725227119015848, "grad_norm": 5.482449459352475, "learning_rate": 4.074104261405316e-06, "loss": 17.2907, "step": 31321 }, { "epoch": 0.5725409910980314, "grad_norm": 7.753081709733184, "learning_rate": 4.073813371074965e-06, "loss": 17.9313, "step": 31322 }, { "epoch": 0.5725592702944778, "grad_norm": 6.417199744943433, "learning_rate": 4.073522483990817e-06, "loss": 17.7837, "step": 31323 }, { "epoch": 0.5725775494909244, "grad_norm": 7.397594728839078, "learning_rate": 4.0732316001538945e-06, "loss": 17.5404, "step": 31324 }, { "epoch": 0.5725958286873709, "grad_norm": 7.379105329098974, "learning_rate": 4.0729407195652145e-06, "loss": 17.6167, "step": 31325 }, { "epoch": 0.5726141078838174, "grad_norm": 7.134458878511162, "learning_rate": 4.072649842225795e-06, "loss": 17.9933, "step": 31326 }, { "epoch": 0.572632387080264, "grad_norm": 5.328859733975696, "learning_rate": 4.072358968136658e-06, "loss": 17.0418, "step": 31327 }, { "epoch": 0.5726506662767105, "grad_norm": 6.741958904625365, "learning_rate": 4.072068097298824e-06, "loss": 17.6268, "step": 31328 }, { "epoch": 0.572668945473157, "grad_norm": 6.406371112266246, "learning_rate": 4.0717772297133085e-06, "loss": 17.2606, "step": 31329 }, { "epoch": 0.5726872246696035, "grad_norm": 6.273656564097533, "learning_rate": 4.0714863653811355e-06, "loss": 17.7222, "step": 31330 }, { "epoch": 0.57270550386605, "grad_norm": 7.515234479438668, "learning_rate": 4.071195504303319e-06, "loss": 17.5504, "step": 31331 }, { "epoch": 0.5727237830624966, "grad_norm": 9.690461945147964, "learning_rate": 4.070904646480884e-06, "loss": 17.7066, "step": 31332 }, { "epoch": 0.5727420622589431, "grad_norm": 7.560119238685697, "learning_rate": 4.070613791914848e-06, "loss": 18.0388, "step": 31333 }, { "epoch": 0.5727603414553897, "grad_norm": 5.794327332535043, "learning_rate": 4.070322940606229e-06, "loss": 17.0322, "step": 31334 }, { "epoch": 0.5727786206518362, "grad_norm": 5.29058281732412, "learning_rate": 4.0700320925560474e-06, "loss": 17.1128, "step": 31335 }, { "epoch": 0.5727968998482826, "grad_norm": 5.525926239740748, "learning_rate": 4.0697412477653235e-06, "loss": 17.077, "step": 31336 }, { "epoch": 0.5728151790447292, "grad_norm": 5.229380788195674, "learning_rate": 4.069450406235073e-06, "loss": 16.9534, "step": 31337 }, { "epoch": 0.5728334582411757, "grad_norm": 5.3698140859644194, "learning_rate": 4.069159567966322e-06, "loss": 17.0849, "step": 31338 }, { "epoch": 0.5728517374376222, "grad_norm": 6.213470651479189, "learning_rate": 4.068868732960085e-06, "loss": 17.3787, "step": 31339 }, { "epoch": 0.5728700166340688, "grad_norm": 5.5843251668280205, "learning_rate": 4.06857790121738e-06, "loss": 17.2438, "step": 31340 }, { "epoch": 0.5728882958305153, "grad_norm": 6.524187927706885, "learning_rate": 4.0682870727392296e-06, "loss": 17.6886, "step": 31341 }, { "epoch": 0.5729065750269619, "grad_norm": 8.546566855435296, "learning_rate": 4.067996247526654e-06, "loss": 18.26, "step": 31342 }, { "epoch": 0.5729248542234083, "grad_norm": 5.366141038188826, "learning_rate": 4.067705425580668e-06, "loss": 17.0756, "step": 31343 }, { "epoch": 0.5729431334198548, "grad_norm": 6.27029430108377, "learning_rate": 4.067414606902296e-06, "loss": 17.1849, "step": 31344 }, { "epoch": 0.5729614126163014, "grad_norm": 9.00445709918508, "learning_rate": 4.067123791492554e-06, "loss": 18.3771, "step": 31345 }, { "epoch": 0.5729796918127479, "grad_norm": 7.124081348442071, "learning_rate": 4.066832979352461e-06, "loss": 17.851, "step": 31346 }, { "epoch": 0.5729979710091945, "grad_norm": 7.024983423205266, "learning_rate": 4.0665421704830386e-06, "loss": 17.7788, "step": 31347 }, { "epoch": 0.573016250205641, "grad_norm": 8.725633489711159, "learning_rate": 4.066251364885304e-06, "loss": 18.2037, "step": 31348 }, { "epoch": 0.5730345294020874, "grad_norm": 7.1488178893085195, "learning_rate": 4.065960562560279e-06, "loss": 17.5462, "step": 31349 }, { "epoch": 0.573052808598534, "grad_norm": 6.429202520048033, "learning_rate": 4.06566976350898e-06, "loss": 17.7318, "step": 31350 }, { "epoch": 0.5730710877949805, "grad_norm": 5.411285084021298, "learning_rate": 4.0653789677324265e-06, "loss": 17.1744, "step": 31351 }, { "epoch": 0.5730893669914271, "grad_norm": 5.133685825480473, "learning_rate": 4.065088175231641e-06, "loss": 17.173, "step": 31352 }, { "epoch": 0.5731076461878736, "grad_norm": 7.11995989945361, "learning_rate": 4.0647973860076395e-06, "loss": 18.0656, "step": 31353 }, { "epoch": 0.5731259253843201, "grad_norm": 6.352758650312836, "learning_rate": 4.06450660006144e-06, "loss": 17.2466, "step": 31354 }, { "epoch": 0.5731442045807666, "grad_norm": 7.470044034862393, "learning_rate": 4.064215817394065e-06, "loss": 17.981, "step": 31355 }, { "epoch": 0.5731624837772131, "grad_norm": 6.259766278333456, "learning_rate": 4.063925038006535e-06, "loss": 17.3825, "step": 31356 }, { "epoch": 0.5731807629736597, "grad_norm": 5.131347584869691, "learning_rate": 4.0636342618998616e-06, "loss": 16.9792, "step": 31357 }, { "epoch": 0.5731990421701062, "grad_norm": 6.41801793441116, "learning_rate": 4.063343489075072e-06, "loss": 17.4668, "step": 31358 }, { "epoch": 0.5732173213665527, "grad_norm": 5.702468194377073, "learning_rate": 4.063052719533182e-06, "loss": 17.2095, "step": 31359 }, { "epoch": 0.5732356005629993, "grad_norm": 5.402513260023841, "learning_rate": 4.062761953275209e-06, "loss": 17.052, "step": 31360 }, { "epoch": 0.5732538797594458, "grad_norm": 5.249872349819929, "learning_rate": 4.062471190302177e-06, "loss": 16.9974, "step": 31361 }, { "epoch": 0.5732721589558923, "grad_norm": 7.881007781048285, "learning_rate": 4.0621804306150994e-06, "loss": 17.5755, "step": 31362 }, { "epoch": 0.5732904381523388, "grad_norm": 6.468827884977059, "learning_rate": 4.061889674215e-06, "loss": 17.5144, "step": 31363 }, { "epoch": 0.5733087173487853, "grad_norm": 7.253900981672671, "learning_rate": 4.061598921102895e-06, "loss": 17.6939, "step": 31364 }, { "epoch": 0.5733269965452319, "grad_norm": 5.571922768324259, "learning_rate": 4.0613081712798034e-06, "loss": 17.2965, "step": 31365 }, { "epoch": 0.5733452757416784, "grad_norm": 6.334395875731814, "learning_rate": 4.061017424746747e-06, "loss": 17.4236, "step": 31366 }, { "epoch": 0.573363554938125, "grad_norm": 4.875174591225551, "learning_rate": 4.060726681504742e-06, "loss": 16.9564, "step": 31367 }, { "epoch": 0.5733818341345714, "grad_norm": 7.226734052137219, "learning_rate": 4.060435941554809e-06, "loss": 17.9931, "step": 31368 }, { "epoch": 0.5734001133310179, "grad_norm": 5.993815033284674, "learning_rate": 4.060145204897966e-06, "loss": 17.2257, "step": 31369 }, { "epoch": 0.5734183925274645, "grad_norm": 6.083102895324697, "learning_rate": 4.059854471535234e-06, "loss": 17.402, "step": 31370 }, { "epoch": 0.573436671723911, "grad_norm": 6.22733566326861, "learning_rate": 4.059563741467628e-06, "loss": 17.5768, "step": 31371 }, { "epoch": 0.5734549509203576, "grad_norm": 6.995822762201358, "learning_rate": 4.0592730146961704e-06, "loss": 17.882, "step": 31372 }, { "epoch": 0.5734732301168041, "grad_norm": 6.288685590316142, "learning_rate": 4.058982291221881e-06, "loss": 17.4425, "step": 31373 }, { "epoch": 0.5734915093132505, "grad_norm": 5.761107160139433, "learning_rate": 4.058691571045774e-06, "loss": 17.1127, "step": 31374 }, { "epoch": 0.5735097885096971, "grad_norm": 5.874701788735947, "learning_rate": 4.058400854168874e-06, "loss": 17.1732, "step": 31375 }, { "epoch": 0.5735280677061436, "grad_norm": 8.40058042216727, "learning_rate": 4.058110140592196e-06, "loss": 18.0138, "step": 31376 }, { "epoch": 0.5735463469025902, "grad_norm": 6.076478647452989, "learning_rate": 4.05781943031676e-06, "loss": 17.2884, "step": 31377 }, { "epoch": 0.5735646260990367, "grad_norm": 5.650090360933766, "learning_rate": 4.057528723343586e-06, "loss": 17.1209, "step": 31378 }, { "epoch": 0.5735829052954832, "grad_norm": 5.098193128821009, "learning_rate": 4.057238019673691e-06, "loss": 17.1043, "step": 31379 }, { "epoch": 0.5736011844919298, "grad_norm": 6.151229264894107, "learning_rate": 4.056947319308096e-06, "loss": 17.297, "step": 31380 }, { "epoch": 0.5736194636883762, "grad_norm": 5.843272588553793, "learning_rate": 4.056656622247818e-06, "loss": 17.3656, "step": 31381 }, { "epoch": 0.5736377428848228, "grad_norm": 6.918797377991247, "learning_rate": 4.056365928493876e-06, "loss": 17.813, "step": 31382 }, { "epoch": 0.5736560220812693, "grad_norm": 7.856575662073421, "learning_rate": 4.056075238047292e-06, "loss": 18.1116, "step": 31383 }, { "epoch": 0.5736743012777158, "grad_norm": 8.170294133333991, "learning_rate": 4.055784550909082e-06, "loss": 18.3447, "step": 31384 }, { "epoch": 0.5736925804741624, "grad_norm": 4.95648820751796, "learning_rate": 4.0554938670802626e-06, "loss": 16.8021, "step": 31385 }, { "epoch": 0.5737108596706089, "grad_norm": 5.380680176472046, "learning_rate": 4.0552031865618565e-06, "loss": 17.0581, "step": 31386 }, { "epoch": 0.5737291388670555, "grad_norm": 6.692820778962716, "learning_rate": 4.054912509354883e-06, "loss": 17.4521, "step": 31387 }, { "epoch": 0.5737474180635019, "grad_norm": 6.989389380804562, "learning_rate": 4.054621835460356e-06, "loss": 17.8399, "step": 31388 }, { "epoch": 0.5737656972599484, "grad_norm": 5.66693371630673, "learning_rate": 4.0543311648793e-06, "loss": 17.1747, "step": 31389 }, { "epoch": 0.573783976456395, "grad_norm": 6.0997982237152995, "learning_rate": 4.054040497612731e-06, "loss": 17.332, "step": 31390 }, { "epoch": 0.5738022556528415, "grad_norm": 7.168114347782095, "learning_rate": 4.053749833661666e-06, "loss": 17.5874, "step": 31391 }, { "epoch": 0.5738205348492881, "grad_norm": 6.0023202570798695, "learning_rate": 4.053459173027128e-06, "loss": 17.3926, "step": 31392 }, { "epoch": 0.5738388140457346, "grad_norm": 6.739426375628391, "learning_rate": 4.053168515710131e-06, "loss": 17.9118, "step": 31393 }, { "epoch": 0.573857093242181, "grad_norm": 7.183463840396108, "learning_rate": 4.052877861711699e-06, "loss": 18.0496, "step": 31394 }, { "epoch": 0.5738753724386276, "grad_norm": 10.1952328770755, "learning_rate": 4.0525872110328465e-06, "loss": 18.2591, "step": 31395 }, { "epoch": 0.5738936516350741, "grad_norm": 8.948991732354228, "learning_rate": 4.052296563674593e-06, "loss": 18.1785, "step": 31396 }, { "epoch": 0.5739119308315207, "grad_norm": 6.450067097703036, "learning_rate": 4.052005919637959e-06, "loss": 17.5346, "step": 31397 }, { "epoch": 0.5739302100279672, "grad_norm": 6.007609968766062, "learning_rate": 4.0517152789239624e-06, "loss": 17.6318, "step": 31398 }, { "epoch": 0.5739484892244137, "grad_norm": 6.879940142005587, "learning_rate": 4.05142464153362e-06, "loss": 17.6962, "step": 31399 }, { "epoch": 0.5739667684208603, "grad_norm": 6.128616412171072, "learning_rate": 4.051134007467952e-06, "loss": 17.6036, "step": 31400 }, { "epoch": 0.5739850476173067, "grad_norm": 6.476857291423353, "learning_rate": 4.050843376727978e-06, "loss": 17.5824, "step": 31401 }, { "epoch": 0.5740033268137533, "grad_norm": 6.929062870625645, "learning_rate": 4.0505527493147145e-06, "loss": 17.8871, "step": 31402 }, { "epoch": 0.5740216060101998, "grad_norm": 5.202159578488877, "learning_rate": 4.050262125229183e-06, "loss": 17.0081, "step": 31403 }, { "epoch": 0.5740398852066463, "grad_norm": 5.443469556357806, "learning_rate": 4.049971504472399e-06, "loss": 17.1496, "step": 31404 }, { "epoch": 0.5740581644030929, "grad_norm": 6.22318277304315, "learning_rate": 4.049680887045381e-06, "loss": 17.2172, "step": 31405 }, { "epoch": 0.5740764435995394, "grad_norm": 5.238598295713149, "learning_rate": 4.049390272949152e-06, "loss": 17.1125, "step": 31406 }, { "epoch": 0.5740947227959858, "grad_norm": 6.308707329188058, "learning_rate": 4.049099662184727e-06, "loss": 17.5123, "step": 31407 }, { "epoch": 0.5741130019924324, "grad_norm": 5.390907984690496, "learning_rate": 4.048809054753123e-06, "loss": 17.157, "step": 31408 }, { "epoch": 0.5741312811888789, "grad_norm": 4.944086836116856, "learning_rate": 4.048518450655362e-06, "loss": 16.9399, "step": 31409 }, { "epoch": 0.5741495603853255, "grad_norm": 7.367192559567693, "learning_rate": 4.0482278498924595e-06, "loss": 18.3124, "step": 31410 }, { "epoch": 0.574167839581772, "grad_norm": 7.048082464315503, "learning_rate": 4.047937252465439e-06, "loss": 17.503, "step": 31411 }, { "epoch": 0.5741861187782185, "grad_norm": 6.723546059127619, "learning_rate": 4.047646658375316e-06, "loss": 17.6156, "step": 31412 }, { "epoch": 0.574204397974665, "grad_norm": 7.478892520221781, "learning_rate": 4.047356067623106e-06, "loss": 18.0163, "step": 31413 }, { "epoch": 0.5742226771711115, "grad_norm": 7.314037759921835, "learning_rate": 4.047065480209831e-06, "loss": 17.8913, "step": 31414 }, { "epoch": 0.5742409563675581, "grad_norm": 7.5505517969914075, "learning_rate": 4.046774896136511e-06, "loss": 17.9139, "step": 31415 }, { "epoch": 0.5742592355640046, "grad_norm": 8.076276070324868, "learning_rate": 4.0464843154041585e-06, "loss": 17.5214, "step": 31416 }, { "epoch": 0.5742775147604511, "grad_norm": 5.211489792053675, "learning_rate": 4.046193738013799e-06, "loss": 17.0883, "step": 31417 }, { "epoch": 0.5742957939568977, "grad_norm": 6.139339628237269, "learning_rate": 4.045903163966447e-06, "loss": 17.1475, "step": 31418 }, { "epoch": 0.5743140731533442, "grad_norm": 5.988661900211863, "learning_rate": 4.04561259326312e-06, "loss": 17.2725, "step": 31419 }, { "epoch": 0.5743323523497907, "grad_norm": 5.122959775189268, "learning_rate": 4.04532202590484e-06, "loss": 16.8701, "step": 31420 }, { "epoch": 0.5743506315462372, "grad_norm": 7.335186953806738, "learning_rate": 4.0450314618926236e-06, "loss": 17.4919, "step": 31421 }, { "epoch": 0.5743689107426837, "grad_norm": 5.490615378294824, "learning_rate": 4.044740901227487e-06, "loss": 17.1332, "step": 31422 }, { "epoch": 0.5743871899391303, "grad_norm": 6.099017812911173, "learning_rate": 4.044450343910452e-06, "loss": 17.2141, "step": 31423 }, { "epoch": 0.5744054691355768, "grad_norm": 17.643093625803278, "learning_rate": 4.044159789942534e-06, "loss": 17.446, "step": 31424 }, { "epoch": 0.5744237483320234, "grad_norm": 5.813277077072966, "learning_rate": 4.043869239324756e-06, "loss": 17.2943, "step": 31425 }, { "epoch": 0.5744420275284698, "grad_norm": 7.6193992457491815, "learning_rate": 4.043578692058132e-06, "loss": 17.7634, "step": 31426 }, { "epoch": 0.5744603067249163, "grad_norm": 6.6047949780890525, "learning_rate": 4.043288148143681e-06, "loss": 17.3357, "step": 31427 }, { "epoch": 0.5744785859213629, "grad_norm": 7.150557531830683, "learning_rate": 4.042997607582422e-06, "loss": 17.5813, "step": 31428 }, { "epoch": 0.5744968651178094, "grad_norm": 6.659524833253199, "learning_rate": 4.042707070375374e-06, "loss": 17.332, "step": 31429 }, { "epoch": 0.574515144314256, "grad_norm": 6.9182199958138835, "learning_rate": 4.042416536523554e-06, "loss": 17.546, "step": 31430 }, { "epoch": 0.5745334235107025, "grad_norm": 6.434952033860996, "learning_rate": 4.042126006027983e-06, "loss": 17.5208, "step": 31431 }, { "epoch": 0.574551702707149, "grad_norm": 5.593098687650707, "learning_rate": 4.041835478889675e-06, "loss": 17.0621, "step": 31432 }, { "epoch": 0.5745699819035955, "grad_norm": 6.6392687487472495, "learning_rate": 4.04154495510965e-06, "loss": 17.7285, "step": 31433 }, { "epoch": 0.574588261100042, "grad_norm": 15.962864442969273, "learning_rate": 4.041254434688929e-06, "loss": 18.0214, "step": 31434 }, { "epoch": 0.5746065402964886, "grad_norm": 6.755772036098351, "learning_rate": 4.040963917628527e-06, "loss": 17.7992, "step": 31435 }, { "epoch": 0.5746248194929351, "grad_norm": 6.591142472128282, "learning_rate": 4.040673403929462e-06, "loss": 17.1513, "step": 31436 }, { "epoch": 0.5746430986893816, "grad_norm": 8.193714016135418, "learning_rate": 4.040382893592755e-06, "loss": 17.9856, "step": 31437 }, { "epoch": 0.5746613778858282, "grad_norm": 5.888863262641006, "learning_rate": 4.040092386619424e-06, "loss": 17.1353, "step": 31438 }, { "epoch": 0.5746796570822746, "grad_norm": 7.581640473313872, "learning_rate": 4.039801883010482e-06, "loss": 17.9759, "step": 31439 }, { "epoch": 0.5746979362787212, "grad_norm": 7.32218415964991, "learning_rate": 4.0395113827669545e-06, "loss": 17.9325, "step": 31440 }, { "epoch": 0.5747162154751677, "grad_norm": 5.958308600852473, "learning_rate": 4.039220885889854e-06, "loss": 17.1854, "step": 31441 }, { "epoch": 0.5747344946716142, "grad_norm": 9.272056032394175, "learning_rate": 4.038930392380202e-06, "loss": 18.0894, "step": 31442 }, { "epoch": 0.5747527738680608, "grad_norm": 5.295612119186972, "learning_rate": 4.0386399022390165e-06, "loss": 16.949, "step": 31443 }, { "epoch": 0.5747710530645073, "grad_norm": 7.491611029748309, "learning_rate": 4.0383494154673125e-06, "loss": 17.7819, "step": 31444 }, { "epoch": 0.5747893322609539, "grad_norm": 6.2498795848976485, "learning_rate": 4.038058932066112e-06, "loss": 17.5028, "step": 31445 }, { "epoch": 0.5748076114574003, "grad_norm": 6.29521432459797, "learning_rate": 4.037768452036432e-06, "loss": 17.5603, "step": 31446 }, { "epoch": 0.5748258906538468, "grad_norm": 5.813075180095101, "learning_rate": 4.037477975379288e-06, "loss": 17.259, "step": 31447 }, { "epoch": 0.5748441698502934, "grad_norm": 5.61062940452156, "learning_rate": 4.037187502095702e-06, "loss": 17.1021, "step": 31448 }, { "epoch": 0.5748624490467399, "grad_norm": 6.850082681651458, "learning_rate": 4.03689703218669e-06, "loss": 17.798, "step": 31449 }, { "epoch": 0.5748807282431865, "grad_norm": 5.474205873367412, "learning_rate": 4.036606565653268e-06, "loss": 17.1521, "step": 31450 }, { "epoch": 0.574899007439633, "grad_norm": 6.496337740985756, "learning_rate": 4.0363161024964595e-06, "loss": 17.6913, "step": 31451 }, { "epoch": 0.5749172866360794, "grad_norm": 6.6008525266144105, "learning_rate": 4.03602564271728e-06, "loss": 17.3195, "step": 31452 }, { "epoch": 0.574935565832526, "grad_norm": 7.70506799679938, "learning_rate": 4.0357351863167434e-06, "loss": 17.8505, "step": 31453 }, { "epoch": 0.5749538450289725, "grad_norm": 5.521415072832091, "learning_rate": 4.035444733295873e-06, "loss": 17.2485, "step": 31454 }, { "epoch": 0.5749721242254191, "grad_norm": 7.29921644310251, "learning_rate": 4.035154283655684e-06, "loss": 17.8139, "step": 31455 }, { "epoch": 0.5749904034218656, "grad_norm": 6.096526253269777, "learning_rate": 4.034863837397198e-06, "loss": 17.4662, "step": 31456 }, { "epoch": 0.5750086826183121, "grad_norm": 8.103612317111917, "learning_rate": 4.03457339452143e-06, "loss": 17.4113, "step": 31457 }, { "epoch": 0.5750269618147587, "grad_norm": 6.626135496066169, "learning_rate": 4.034282955029396e-06, "loss": 17.7601, "step": 31458 }, { "epoch": 0.5750452410112051, "grad_norm": 6.250127216143219, "learning_rate": 4.033992518922118e-06, "loss": 17.2434, "step": 31459 }, { "epoch": 0.5750635202076517, "grad_norm": 6.239986291251721, "learning_rate": 4.033702086200614e-06, "loss": 17.1761, "step": 31460 }, { "epoch": 0.5750817994040982, "grad_norm": 7.972603956469025, "learning_rate": 4.033411656865897e-06, "loss": 18.1211, "step": 31461 }, { "epoch": 0.5751000786005447, "grad_norm": 5.2166069915493, "learning_rate": 4.0331212309189915e-06, "loss": 16.8892, "step": 31462 }, { "epoch": 0.5751183577969913, "grad_norm": 5.554761424461516, "learning_rate": 4.032830808360911e-06, "loss": 17.2962, "step": 31463 }, { "epoch": 0.5751366369934378, "grad_norm": 7.659878183372033, "learning_rate": 4.032540389192673e-06, "loss": 17.7924, "step": 31464 }, { "epoch": 0.5751549161898843, "grad_norm": 7.545431247483289, "learning_rate": 4.0322499734152994e-06, "loss": 18.3786, "step": 31465 }, { "epoch": 0.5751731953863308, "grad_norm": 6.9369211815862, "learning_rate": 4.031959561029806e-06, "loss": 17.8436, "step": 31466 }, { "epoch": 0.5751914745827773, "grad_norm": 5.7379621707692605, "learning_rate": 4.031669152037207e-06, "loss": 17.1586, "step": 31467 }, { "epoch": 0.5752097537792239, "grad_norm": 6.526870001962753, "learning_rate": 4.0313787464385266e-06, "loss": 17.4797, "step": 31468 }, { "epoch": 0.5752280329756704, "grad_norm": 6.061317569795212, "learning_rate": 4.031088344234777e-06, "loss": 17.447, "step": 31469 }, { "epoch": 0.575246312172117, "grad_norm": 5.477784384628263, "learning_rate": 4.030797945426981e-06, "loss": 17.1452, "step": 31470 }, { "epoch": 0.5752645913685634, "grad_norm": 5.967295372403444, "learning_rate": 4.030507550016154e-06, "loss": 17.3824, "step": 31471 }, { "epoch": 0.5752828705650099, "grad_norm": 6.0333806030467345, "learning_rate": 4.0302171580033125e-06, "loss": 17.2759, "step": 31472 }, { "epoch": 0.5753011497614565, "grad_norm": 5.647061741906049, "learning_rate": 4.029926769389477e-06, "loss": 17.3114, "step": 31473 }, { "epoch": 0.575319428957903, "grad_norm": 4.727965854797919, "learning_rate": 4.029636384175664e-06, "loss": 16.9385, "step": 31474 }, { "epoch": 0.5753377081543495, "grad_norm": 5.511030543521963, "learning_rate": 4.0293460023628896e-06, "loss": 16.6982, "step": 31475 }, { "epoch": 0.5753559873507961, "grad_norm": 5.730218352249514, "learning_rate": 4.029055623952176e-06, "loss": 17.1867, "step": 31476 }, { "epoch": 0.5753742665472426, "grad_norm": 5.463657081950294, "learning_rate": 4.028765248944536e-06, "loss": 17.0406, "step": 31477 }, { "epoch": 0.5753925457436891, "grad_norm": 4.35107396074129, "learning_rate": 4.028474877340989e-06, "loss": 16.7866, "step": 31478 }, { "epoch": 0.5754108249401356, "grad_norm": 6.600145886343277, "learning_rate": 4.028184509142555e-06, "loss": 17.4491, "step": 31479 }, { "epoch": 0.5754291041365821, "grad_norm": 6.814779259745567, "learning_rate": 4.027894144350251e-06, "loss": 17.5104, "step": 31480 }, { "epoch": 0.5754473833330287, "grad_norm": 5.9136383809238025, "learning_rate": 4.0276037829650904e-06, "loss": 17.1799, "step": 31481 }, { "epoch": 0.5754656625294752, "grad_norm": 6.13319154389157, "learning_rate": 4.027313424988096e-06, "loss": 17.6444, "step": 31482 }, { "epoch": 0.5754839417259218, "grad_norm": 9.70042447920684, "learning_rate": 4.027023070420284e-06, "loss": 18.2556, "step": 31483 }, { "epoch": 0.5755022209223682, "grad_norm": 5.79404489847975, "learning_rate": 4.0267327192626706e-06, "loss": 17.1682, "step": 31484 }, { "epoch": 0.5755205001188147, "grad_norm": 5.278038659000935, "learning_rate": 4.026442371516276e-06, "loss": 17.1028, "step": 31485 }, { "epoch": 0.5755387793152613, "grad_norm": 6.662896906724064, "learning_rate": 4.026152027182114e-06, "loss": 17.3224, "step": 31486 }, { "epoch": 0.5755570585117078, "grad_norm": 9.536394009206504, "learning_rate": 4.025861686261206e-06, "loss": 18.6173, "step": 31487 }, { "epoch": 0.5755753377081544, "grad_norm": 5.710314111853667, "learning_rate": 4.02557134875457e-06, "loss": 17.1495, "step": 31488 }, { "epoch": 0.5755936169046009, "grad_norm": 7.044511203203904, "learning_rate": 4.0252810146632185e-06, "loss": 17.9472, "step": 31489 }, { "epoch": 0.5756118961010473, "grad_norm": 7.771085992430332, "learning_rate": 4.024990683988175e-06, "loss": 17.3891, "step": 31490 }, { "epoch": 0.5756301752974939, "grad_norm": 6.263646721515344, "learning_rate": 4.024700356730454e-06, "loss": 17.343, "step": 31491 }, { "epoch": 0.5756484544939404, "grad_norm": 7.947404802483696, "learning_rate": 4.024410032891072e-06, "loss": 17.9241, "step": 31492 }, { "epoch": 0.575666733690387, "grad_norm": 5.802857696085724, "learning_rate": 4.02411971247105e-06, "loss": 17.3215, "step": 31493 }, { "epoch": 0.5756850128868335, "grad_norm": 5.058600321503498, "learning_rate": 4.023829395471404e-06, "loss": 17.1435, "step": 31494 }, { "epoch": 0.57570329208328, "grad_norm": 7.007833829030539, "learning_rate": 4.02353908189315e-06, "loss": 17.4405, "step": 31495 }, { "epoch": 0.5757215712797266, "grad_norm": 5.918484571049595, "learning_rate": 4.023248771737307e-06, "loss": 17.2449, "step": 31496 }, { "epoch": 0.575739850476173, "grad_norm": 6.531073386372308, "learning_rate": 4.022958465004894e-06, "loss": 17.6669, "step": 31497 }, { "epoch": 0.5757581296726196, "grad_norm": 5.809088805452044, "learning_rate": 4.022668161696924e-06, "loss": 17.4984, "step": 31498 }, { "epoch": 0.5757764088690661, "grad_norm": 6.706874845573994, "learning_rate": 4.022377861814419e-06, "loss": 17.6425, "step": 31499 }, { "epoch": 0.5757946880655126, "grad_norm": 7.101204317728347, "learning_rate": 4.022087565358394e-06, "loss": 17.7322, "step": 31500 }, { "epoch": 0.5758129672619592, "grad_norm": 6.21229655019321, "learning_rate": 4.021797272329867e-06, "loss": 17.3801, "step": 31501 }, { "epoch": 0.5758312464584057, "grad_norm": 6.316245464908212, "learning_rate": 4.021506982729858e-06, "loss": 17.5373, "step": 31502 }, { "epoch": 0.5758495256548523, "grad_norm": 6.966004110922218, "learning_rate": 4.021216696559379e-06, "loss": 17.7674, "step": 31503 }, { "epoch": 0.5758678048512987, "grad_norm": 4.765620983833983, "learning_rate": 4.020926413819452e-06, "loss": 16.8741, "step": 31504 }, { "epoch": 0.5758860840477452, "grad_norm": 6.031530753640715, "learning_rate": 4.020636134511094e-06, "loss": 17.2079, "step": 31505 }, { "epoch": 0.5759043632441918, "grad_norm": 8.722854632133224, "learning_rate": 4.020345858635319e-06, "loss": 18.5067, "step": 31506 }, { "epoch": 0.5759226424406383, "grad_norm": 6.487334939353379, "learning_rate": 4.020055586193149e-06, "loss": 17.2602, "step": 31507 }, { "epoch": 0.5759409216370849, "grad_norm": 7.310013870609126, "learning_rate": 4.019765317185598e-06, "loss": 18.0573, "step": 31508 }, { "epoch": 0.5759592008335314, "grad_norm": 5.753057709524688, "learning_rate": 4.0194750516136835e-06, "loss": 17.1933, "step": 31509 }, { "epoch": 0.5759774800299778, "grad_norm": 8.274461803972216, "learning_rate": 4.019184789478426e-06, "loss": 18.2397, "step": 31510 }, { "epoch": 0.5759957592264244, "grad_norm": 6.274391919200609, "learning_rate": 4.018894530780842e-06, "loss": 16.9297, "step": 31511 }, { "epoch": 0.5760140384228709, "grad_norm": 6.334169604936451, "learning_rate": 4.018604275521944e-06, "loss": 17.4014, "step": 31512 }, { "epoch": 0.5760323176193175, "grad_norm": 5.848129222800858, "learning_rate": 4.0183140237027535e-06, "loss": 17.0788, "step": 31513 }, { "epoch": 0.576050596815764, "grad_norm": 7.208895946404845, "learning_rate": 4.018023775324289e-06, "loss": 17.4253, "step": 31514 }, { "epoch": 0.5760688760122105, "grad_norm": 6.621914095078201, "learning_rate": 4.017733530387563e-06, "loss": 17.5637, "step": 31515 }, { "epoch": 0.576087155208657, "grad_norm": 5.68496790214495, "learning_rate": 4.017443288893599e-06, "loss": 17.2598, "step": 31516 }, { "epoch": 0.5761054344051035, "grad_norm": 6.563072451960008, "learning_rate": 4.0171530508434085e-06, "loss": 17.6246, "step": 31517 }, { "epoch": 0.5761237136015501, "grad_norm": 5.940515124848596, "learning_rate": 4.016862816238012e-06, "loss": 17.0462, "step": 31518 }, { "epoch": 0.5761419927979966, "grad_norm": 7.208575563404179, "learning_rate": 4.016572585078427e-06, "loss": 17.4294, "step": 31519 }, { "epoch": 0.5761602719944431, "grad_norm": 6.948845455127992, "learning_rate": 4.016282357365669e-06, "loss": 17.6523, "step": 31520 }, { "epoch": 0.5761785511908897, "grad_norm": 6.142908882647007, "learning_rate": 4.015992133100757e-06, "loss": 17.4213, "step": 31521 }, { "epoch": 0.5761968303873362, "grad_norm": 5.473158064774473, "learning_rate": 4.015701912284705e-06, "loss": 17.1908, "step": 31522 }, { "epoch": 0.5762151095837827, "grad_norm": 7.088572869842244, "learning_rate": 4.015411694918533e-06, "loss": 17.6457, "step": 31523 }, { "epoch": 0.5762333887802292, "grad_norm": 6.365906059456942, "learning_rate": 4.015121481003259e-06, "loss": 17.4014, "step": 31524 }, { "epoch": 0.5762516679766757, "grad_norm": 4.460536780352353, "learning_rate": 4.014831270539899e-06, "loss": 16.6317, "step": 31525 }, { "epoch": 0.5762699471731223, "grad_norm": 7.207403489616821, "learning_rate": 4.014541063529467e-06, "loss": 17.5561, "step": 31526 }, { "epoch": 0.5762882263695688, "grad_norm": 5.981502176295994, "learning_rate": 4.0142508599729855e-06, "loss": 17.3096, "step": 31527 }, { "epoch": 0.5763065055660154, "grad_norm": 8.248637449688319, "learning_rate": 4.0139606598714696e-06, "loss": 18.1268, "step": 31528 }, { "epoch": 0.5763247847624619, "grad_norm": 7.414569216105201, "learning_rate": 4.013670463225934e-06, "loss": 17.799, "step": 31529 }, { "epoch": 0.5763430639589083, "grad_norm": 6.611706677880505, "learning_rate": 4.013380270037399e-06, "loss": 17.461, "step": 31530 }, { "epoch": 0.5763613431553549, "grad_norm": 6.480462797120571, "learning_rate": 4.013090080306879e-06, "loss": 17.6153, "step": 31531 }, { "epoch": 0.5763796223518014, "grad_norm": 7.261900613449026, "learning_rate": 4.012799894035395e-06, "loss": 18.1249, "step": 31532 }, { "epoch": 0.576397901548248, "grad_norm": 8.22519473273205, "learning_rate": 4.012509711223962e-06, "loss": 18.287, "step": 31533 }, { "epoch": 0.5764161807446945, "grad_norm": 7.202975359540551, "learning_rate": 4.0122195318735935e-06, "loss": 17.8663, "step": 31534 }, { "epoch": 0.576434459941141, "grad_norm": 7.312393067453894, "learning_rate": 4.011929355985313e-06, "loss": 17.2805, "step": 31535 }, { "epoch": 0.5764527391375875, "grad_norm": 7.074334081661277, "learning_rate": 4.011639183560133e-06, "loss": 17.3357, "step": 31536 }, { "epoch": 0.576471018334034, "grad_norm": 5.4836033975145915, "learning_rate": 4.011349014599071e-06, "loss": 17.11, "step": 31537 }, { "epoch": 0.5764892975304806, "grad_norm": 6.13125236212519, "learning_rate": 4.011058849103146e-06, "loss": 17.3717, "step": 31538 }, { "epoch": 0.5765075767269271, "grad_norm": 7.447585983066444, "learning_rate": 4.010768687073376e-06, "loss": 18.0571, "step": 31539 }, { "epoch": 0.5765258559233736, "grad_norm": 7.3540101023318, "learning_rate": 4.010478528510772e-06, "loss": 17.7853, "step": 31540 }, { "epoch": 0.5765441351198202, "grad_norm": 5.87944935956948, "learning_rate": 4.010188373416357e-06, "loss": 17.3248, "step": 31541 }, { "epoch": 0.5765624143162666, "grad_norm": 5.560954108433084, "learning_rate": 4.009898221791146e-06, "loss": 17.4121, "step": 31542 }, { "epoch": 0.5765806935127131, "grad_norm": 7.60375932746507, "learning_rate": 4.0096080736361544e-06, "loss": 17.7227, "step": 31543 }, { "epoch": 0.5765989727091597, "grad_norm": 5.33225434656662, "learning_rate": 4.0093179289524035e-06, "loss": 16.8285, "step": 31544 }, { "epoch": 0.5766172519056062, "grad_norm": 7.600204478701094, "learning_rate": 4.009027787740904e-06, "loss": 17.6261, "step": 31545 }, { "epoch": 0.5766355311020528, "grad_norm": 5.994156013508625, "learning_rate": 4.008737650002677e-06, "loss": 17.5302, "step": 31546 }, { "epoch": 0.5766538102984993, "grad_norm": 7.785096774171206, "learning_rate": 4.00844751573874e-06, "loss": 16.8903, "step": 31547 }, { "epoch": 0.5766720894949457, "grad_norm": 5.2990984566928985, "learning_rate": 4.008157384950106e-06, "loss": 17.1099, "step": 31548 }, { "epoch": 0.5766903686913923, "grad_norm": 5.641025739348432, "learning_rate": 4.007867257637797e-06, "loss": 17.1833, "step": 31549 }, { "epoch": 0.5767086478878388, "grad_norm": 7.602144005325064, "learning_rate": 4.007577133802826e-06, "loss": 18.0377, "step": 31550 }, { "epoch": 0.5767269270842854, "grad_norm": 6.544505959612686, "learning_rate": 4.007287013446209e-06, "loss": 17.5771, "step": 31551 }, { "epoch": 0.5767452062807319, "grad_norm": 6.879970433285083, "learning_rate": 4.006996896568968e-06, "loss": 17.6322, "step": 31552 }, { "epoch": 0.5767634854771784, "grad_norm": 7.2160852999619225, "learning_rate": 4.006706783172116e-06, "loss": 17.7937, "step": 31553 }, { "epoch": 0.576781764673625, "grad_norm": 6.308697047326859, "learning_rate": 4.006416673256669e-06, "loss": 17.466, "step": 31554 }, { "epoch": 0.5768000438700714, "grad_norm": 8.398029588048683, "learning_rate": 4.006126566823647e-06, "loss": 18.5654, "step": 31555 }, { "epoch": 0.576818323066518, "grad_norm": 6.424132025421793, "learning_rate": 4.005836463874065e-06, "loss": 17.1598, "step": 31556 }, { "epoch": 0.5768366022629645, "grad_norm": 6.020347900410496, "learning_rate": 4.005546364408938e-06, "loss": 17.276, "step": 31557 }, { "epoch": 0.576854881459411, "grad_norm": 6.997150308363163, "learning_rate": 4.005256268429287e-06, "loss": 18.0329, "step": 31558 }, { "epoch": 0.5768731606558576, "grad_norm": 7.058051625902674, "learning_rate": 4.004966175936126e-06, "loss": 17.7644, "step": 31559 }, { "epoch": 0.5768914398523041, "grad_norm": 6.133498379870381, "learning_rate": 4.00467608693047e-06, "loss": 17.6603, "step": 31560 }, { "epoch": 0.5769097190487507, "grad_norm": 5.50441284797382, "learning_rate": 4.004386001413341e-06, "loss": 17.0762, "step": 31561 }, { "epoch": 0.5769279982451971, "grad_norm": 7.314513066550052, "learning_rate": 4.0040959193857494e-06, "loss": 17.6612, "step": 31562 }, { "epoch": 0.5769462774416436, "grad_norm": 7.391382030440523, "learning_rate": 4.003805840848719e-06, "loss": 17.8511, "step": 31563 }, { "epoch": 0.5769645566380902, "grad_norm": 5.977650343203576, "learning_rate": 4.003515765803261e-06, "loss": 17.3495, "step": 31564 }, { "epoch": 0.5769828358345367, "grad_norm": 5.105054002970765, "learning_rate": 4.0032256942503926e-06, "loss": 17.146, "step": 31565 }, { "epoch": 0.5770011150309833, "grad_norm": 5.991927070370735, "learning_rate": 4.002935626191133e-06, "loss": 17.3195, "step": 31566 }, { "epoch": 0.5770193942274298, "grad_norm": 5.932704265081909, "learning_rate": 4.002645561626497e-06, "loss": 17.1678, "step": 31567 }, { "epoch": 0.5770376734238762, "grad_norm": 7.060184619312336, "learning_rate": 4.002355500557501e-06, "loss": 18.0147, "step": 31568 }, { "epoch": 0.5770559526203228, "grad_norm": 6.336486702166971, "learning_rate": 4.002065442985164e-06, "loss": 17.5949, "step": 31569 }, { "epoch": 0.5770742318167693, "grad_norm": 7.048870219514805, "learning_rate": 4.001775388910502e-06, "loss": 17.4281, "step": 31570 }, { "epoch": 0.5770925110132159, "grad_norm": 6.551421355376609, "learning_rate": 4.001485338334528e-06, "loss": 17.9067, "step": 31571 }, { "epoch": 0.5771107902096624, "grad_norm": 6.813641044533266, "learning_rate": 4.001195291258261e-06, "loss": 17.6513, "step": 31572 }, { "epoch": 0.5771290694061089, "grad_norm": 6.313673501784737, "learning_rate": 4.000905247682721e-06, "loss": 17.4474, "step": 31573 }, { "epoch": 0.5771473486025555, "grad_norm": 7.799024186405808, "learning_rate": 4.000615207608918e-06, "loss": 17.7602, "step": 31574 }, { "epoch": 0.5771656277990019, "grad_norm": 5.982020954349105, "learning_rate": 4.000325171037875e-06, "loss": 17.3468, "step": 31575 }, { "epoch": 0.5771839069954485, "grad_norm": 6.672893844036974, "learning_rate": 4.000035137970603e-06, "loss": 17.975, "step": 31576 }, { "epoch": 0.577202186191895, "grad_norm": 7.157181860560803, "learning_rate": 3.9997451084081205e-06, "loss": 17.2823, "step": 31577 }, { "epoch": 0.5772204653883415, "grad_norm": 6.539877815390028, "learning_rate": 3.999455082351447e-06, "loss": 17.5639, "step": 31578 }, { "epoch": 0.5772387445847881, "grad_norm": 7.9243409783210215, "learning_rate": 3.999165059801595e-06, "loss": 18.0727, "step": 31579 }, { "epoch": 0.5772570237812346, "grad_norm": 5.9107950530963045, "learning_rate": 3.9988750407595845e-06, "loss": 17.2729, "step": 31580 }, { "epoch": 0.5772753029776811, "grad_norm": 6.809185535950697, "learning_rate": 3.998585025226429e-06, "loss": 17.5392, "step": 31581 }, { "epoch": 0.5772935821741276, "grad_norm": 6.5738331068048685, "learning_rate": 3.998295013203145e-06, "loss": 17.6243, "step": 31582 }, { "epoch": 0.5773118613705741, "grad_norm": 4.931670839247374, "learning_rate": 3.9980050046907524e-06, "loss": 16.6876, "step": 31583 }, { "epoch": 0.5773301405670207, "grad_norm": 8.200286594726593, "learning_rate": 3.997714999690264e-06, "loss": 18.0685, "step": 31584 }, { "epoch": 0.5773484197634672, "grad_norm": 5.854088622893288, "learning_rate": 3.997424998202697e-06, "loss": 17.226, "step": 31585 }, { "epoch": 0.5773666989599138, "grad_norm": 5.258039253913011, "learning_rate": 3.997135000229068e-06, "loss": 16.8534, "step": 31586 }, { "epoch": 0.5773849781563603, "grad_norm": 6.354472296406268, "learning_rate": 3.996845005770397e-06, "loss": 17.3642, "step": 31587 }, { "epoch": 0.5774032573528067, "grad_norm": 6.542622407656747, "learning_rate": 3.996555014827693e-06, "loss": 17.2532, "step": 31588 }, { "epoch": 0.5774215365492533, "grad_norm": 6.975501276596725, "learning_rate": 3.9962650274019794e-06, "loss": 17.9447, "step": 31589 }, { "epoch": 0.5774398157456998, "grad_norm": 6.208137296182243, "learning_rate": 3.995975043494269e-06, "loss": 17.2626, "step": 31590 }, { "epoch": 0.5774580949421464, "grad_norm": 7.147026608992458, "learning_rate": 3.995685063105578e-06, "loss": 17.7966, "step": 31591 }, { "epoch": 0.5774763741385929, "grad_norm": 6.539646565520523, "learning_rate": 3.995395086236925e-06, "loss": 17.3024, "step": 31592 }, { "epoch": 0.5774946533350394, "grad_norm": 6.832381535826047, "learning_rate": 3.9951051128893245e-06, "loss": 17.831, "step": 31593 }, { "epoch": 0.577512932531486, "grad_norm": 6.558867618480252, "learning_rate": 3.994815143063794e-06, "loss": 17.5361, "step": 31594 }, { "epoch": 0.5775312117279324, "grad_norm": 6.157776969531781, "learning_rate": 3.994525176761348e-06, "loss": 17.2501, "step": 31595 }, { "epoch": 0.577549490924379, "grad_norm": 6.789554540487751, "learning_rate": 3.994235213983004e-06, "loss": 17.6634, "step": 31596 }, { "epoch": 0.5775677701208255, "grad_norm": 6.897984633527421, "learning_rate": 3.99394525472978e-06, "loss": 18.0524, "step": 31597 }, { "epoch": 0.577586049317272, "grad_norm": 6.3482166459602265, "learning_rate": 3.993655299002691e-06, "loss": 17.4266, "step": 31598 }, { "epoch": 0.5776043285137186, "grad_norm": 6.418735322187008, "learning_rate": 3.99336534680275e-06, "loss": 17.3866, "step": 31599 }, { "epoch": 0.577622607710165, "grad_norm": 6.218642927863522, "learning_rate": 3.993075398130977e-06, "loss": 17.056, "step": 31600 }, { "epoch": 0.5776408869066116, "grad_norm": 6.51831795585798, "learning_rate": 3.9927854529883895e-06, "loss": 17.7097, "step": 31601 }, { "epoch": 0.5776591661030581, "grad_norm": 6.921715645904044, "learning_rate": 3.992495511375999e-06, "loss": 17.6127, "step": 31602 }, { "epoch": 0.5776774452995046, "grad_norm": 5.152376927708347, "learning_rate": 3.992205573294826e-06, "loss": 17.0539, "step": 31603 }, { "epoch": 0.5776957244959512, "grad_norm": 6.3145385997365295, "learning_rate": 3.9919156387458845e-06, "loss": 17.3323, "step": 31604 }, { "epoch": 0.5777140036923977, "grad_norm": 6.67592093215207, "learning_rate": 3.991625707730189e-06, "loss": 17.5584, "step": 31605 }, { "epoch": 0.5777322828888443, "grad_norm": 5.6004226189398585, "learning_rate": 3.991335780248762e-06, "loss": 17.0183, "step": 31606 }, { "epoch": 0.5777505620852907, "grad_norm": 5.929978588819794, "learning_rate": 3.991045856302614e-06, "loss": 17.2339, "step": 31607 }, { "epoch": 0.5777688412817372, "grad_norm": 6.684666079967435, "learning_rate": 3.990755935892761e-06, "loss": 17.6518, "step": 31608 }, { "epoch": 0.5777871204781838, "grad_norm": 6.6319354404680535, "learning_rate": 3.990466019020222e-06, "loss": 17.7372, "step": 31609 }, { "epoch": 0.5778053996746303, "grad_norm": 5.720913254065027, "learning_rate": 3.9901761056860115e-06, "loss": 17.15, "step": 31610 }, { "epoch": 0.5778236788710768, "grad_norm": 5.392732879589927, "learning_rate": 3.989886195891147e-06, "loss": 17.0048, "step": 31611 }, { "epoch": 0.5778419580675234, "grad_norm": 5.88231682705289, "learning_rate": 3.989596289636645e-06, "loss": 17.2964, "step": 31612 }, { "epoch": 0.5778602372639698, "grad_norm": 6.116999087870809, "learning_rate": 3.989306386923517e-06, "loss": 17.5186, "step": 31613 }, { "epoch": 0.5778785164604164, "grad_norm": 5.537438069455885, "learning_rate": 3.989016487752784e-06, "loss": 17.3086, "step": 31614 }, { "epoch": 0.5778967956568629, "grad_norm": 6.049233105928933, "learning_rate": 3.988726592125462e-06, "loss": 17.2469, "step": 31615 }, { "epoch": 0.5779150748533094, "grad_norm": 6.752749111642918, "learning_rate": 3.988436700042563e-06, "loss": 17.863, "step": 31616 }, { "epoch": 0.577933354049756, "grad_norm": 6.8622232330206625, "learning_rate": 3.988146811505107e-06, "loss": 17.5676, "step": 31617 }, { "epoch": 0.5779516332462025, "grad_norm": 5.5420014300187, "learning_rate": 3.9878569265141085e-06, "loss": 17.0205, "step": 31618 }, { "epoch": 0.5779699124426491, "grad_norm": 6.213415640788461, "learning_rate": 3.987567045070583e-06, "loss": 17.4197, "step": 31619 }, { "epoch": 0.5779881916390955, "grad_norm": 7.705777316490605, "learning_rate": 3.987277167175548e-06, "loss": 17.988, "step": 31620 }, { "epoch": 0.578006470835542, "grad_norm": 7.229057696039526, "learning_rate": 3.986987292830019e-06, "loss": 17.8043, "step": 31621 }, { "epoch": 0.5780247500319886, "grad_norm": 6.7090006973823115, "learning_rate": 3.986697422035011e-06, "loss": 17.6339, "step": 31622 }, { "epoch": 0.5780430292284351, "grad_norm": 6.255359305053918, "learning_rate": 3.98640755479154e-06, "loss": 17.0173, "step": 31623 }, { "epoch": 0.5780613084248817, "grad_norm": 6.452725152411381, "learning_rate": 3.986117691100621e-06, "loss": 17.3364, "step": 31624 }, { "epoch": 0.5780795876213282, "grad_norm": 9.204966470897217, "learning_rate": 3.985827830963275e-06, "loss": 17.9694, "step": 31625 }, { "epoch": 0.5780978668177746, "grad_norm": 6.5365141977427506, "learning_rate": 3.9855379743805136e-06, "loss": 17.4185, "step": 31626 }, { "epoch": 0.5781161460142212, "grad_norm": 4.797291316677511, "learning_rate": 3.9852481213533524e-06, "loss": 16.8513, "step": 31627 }, { "epoch": 0.5781344252106677, "grad_norm": 6.168045771983807, "learning_rate": 3.98495827188281e-06, "loss": 17.1362, "step": 31628 }, { "epoch": 0.5781527044071143, "grad_norm": 6.648474982944894, "learning_rate": 3.9846684259699004e-06, "loss": 17.5731, "step": 31629 }, { "epoch": 0.5781709836035608, "grad_norm": 5.962212220866496, "learning_rate": 3.984378583615638e-06, "loss": 17.2842, "step": 31630 }, { "epoch": 0.5781892628000073, "grad_norm": 5.623150240598047, "learning_rate": 3.984088744821042e-06, "loss": 16.9937, "step": 31631 }, { "epoch": 0.5782075419964539, "grad_norm": 6.12272675990943, "learning_rate": 3.983798909587128e-06, "loss": 17.4648, "step": 31632 }, { "epoch": 0.5782258211929003, "grad_norm": 6.261353667590694, "learning_rate": 3.983509077914908e-06, "loss": 17.7463, "step": 31633 }, { "epoch": 0.5782441003893469, "grad_norm": 5.411021224886755, "learning_rate": 3.983219249805402e-06, "loss": 17.0249, "step": 31634 }, { "epoch": 0.5782623795857934, "grad_norm": 6.148058712456618, "learning_rate": 3.982929425259625e-06, "loss": 17.3073, "step": 31635 }, { "epoch": 0.5782806587822399, "grad_norm": 6.696944524970256, "learning_rate": 3.98263960427859e-06, "loss": 17.3958, "step": 31636 }, { "epoch": 0.5782989379786865, "grad_norm": 6.444862227966796, "learning_rate": 3.982349786863316e-06, "loss": 17.4214, "step": 31637 }, { "epoch": 0.578317217175133, "grad_norm": 5.809888159761141, "learning_rate": 3.982059973014817e-06, "loss": 17.2455, "step": 31638 }, { "epoch": 0.5783354963715795, "grad_norm": 5.341761030530907, "learning_rate": 3.981770162734111e-06, "loss": 17.0866, "step": 31639 }, { "epoch": 0.578353775568026, "grad_norm": 6.325848767358294, "learning_rate": 3.981480356022212e-06, "loss": 17.1349, "step": 31640 }, { "epoch": 0.5783720547644725, "grad_norm": 6.472733040878205, "learning_rate": 3.981190552880134e-06, "loss": 17.1128, "step": 31641 }, { "epoch": 0.5783903339609191, "grad_norm": 4.960212799972247, "learning_rate": 3.980900753308898e-06, "loss": 17.1652, "step": 31642 }, { "epoch": 0.5784086131573656, "grad_norm": 6.402461973615031, "learning_rate": 3.9806109573095155e-06, "loss": 17.5233, "step": 31643 }, { "epoch": 0.5784268923538122, "grad_norm": 5.635122132928665, "learning_rate": 3.980321164883001e-06, "loss": 17.2907, "step": 31644 }, { "epoch": 0.5784451715502587, "grad_norm": 6.863479756081074, "learning_rate": 3.980031376030373e-06, "loss": 18.0299, "step": 31645 }, { "epoch": 0.5784634507467051, "grad_norm": 6.360128788993468, "learning_rate": 3.9797415907526486e-06, "loss": 17.2628, "step": 31646 }, { "epoch": 0.5784817299431517, "grad_norm": 6.14107681251495, "learning_rate": 3.979451809050839e-06, "loss": 17.3822, "step": 31647 }, { "epoch": 0.5785000091395982, "grad_norm": 5.60909323061829, "learning_rate": 3.979162030925963e-06, "loss": 17.1172, "step": 31648 }, { "epoch": 0.5785182883360448, "grad_norm": 7.30519781815178, "learning_rate": 3.978872256379036e-06, "loss": 18.0288, "step": 31649 }, { "epoch": 0.5785365675324913, "grad_norm": 6.156942049012278, "learning_rate": 3.978582485411071e-06, "loss": 17.5139, "step": 31650 }, { "epoch": 0.5785548467289378, "grad_norm": 6.533637219855894, "learning_rate": 3.978292718023089e-06, "loss": 17.6823, "step": 31651 }, { "epoch": 0.5785731259253843, "grad_norm": 6.295918130127953, "learning_rate": 3.978002954216102e-06, "loss": 17.7498, "step": 31652 }, { "epoch": 0.5785914051218308, "grad_norm": 6.78274943383058, "learning_rate": 3.977713193991123e-06, "loss": 17.4743, "step": 31653 }, { "epoch": 0.5786096843182774, "grad_norm": 6.639251102283705, "learning_rate": 3.977423437349173e-06, "loss": 17.9002, "step": 31654 }, { "epoch": 0.5786279635147239, "grad_norm": 6.433663554552203, "learning_rate": 3.9771336842912635e-06, "loss": 17.1218, "step": 31655 }, { "epoch": 0.5786462427111704, "grad_norm": 5.587656770289556, "learning_rate": 3.9768439348184135e-06, "loss": 17.2296, "step": 31656 }, { "epoch": 0.578664521907617, "grad_norm": 7.58164622733567, "learning_rate": 3.976554188931637e-06, "loss": 17.8388, "step": 31657 }, { "epoch": 0.5786828011040634, "grad_norm": 6.871217018847976, "learning_rate": 3.976264446631947e-06, "loss": 17.775, "step": 31658 }, { "epoch": 0.57870108030051, "grad_norm": 7.508900457183313, "learning_rate": 3.975974707920362e-06, "loss": 18.2258, "step": 31659 }, { "epoch": 0.5787193594969565, "grad_norm": 7.25917881166303, "learning_rate": 3.975684972797898e-06, "loss": 17.4463, "step": 31660 }, { "epoch": 0.578737638693403, "grad_norm": 6.599237521846212, "learning_rate": 3.975395241265567e-06, "loss": 17.5084, "step": 31661 }, { "epoch": 0.5787559178898496, "grad_norm": 5.70289424932471, "learning_rate": 3.97510551332439e-06, "loss": 17.3698, "step": 31662 }, { "epoch": 0.5787741970862961, "grad_norm": 6.3451908200332365, "learning_rate": 3.974815788975377e-06, "loss": 16.8446, "step": 31663 }, { "epoch": 0.5787924762827427, "grad_norm": 6.716140760182414, "learning_rate": 3.974526068219545e-06, "loss": 17.4525, "step": 31664 }, { "epoch": 0.5788107554791891, "grad_norm": 5.701753670551879, "learning_rate": 3.974236351057913e-06, "loss": 17.123, "step": 31665 }, { "epoch": 0.5788290346756356, "grad_norm": 6.181018188830453, "learning_rate": 3.973946637491492e-06, "loss": 17.2488, "step": 31666 }, { "epoch": 0.5788473138720822, "grad_norm": 8.117269629847344, "learning_rate": 3.973656927521299e-06, "loss": 18.1117, "step": 31667 }, { "epoch": 0.5788655930685287, "grad_norm": 5.583197448258006, "learning_rate": 3.973367221148349e-06, "loss": 17.1269, "step": 31668 }, { "epoch": 0.5788838722649753, "grad_norm": 6.739422573813542, "learning_rate": 3.973077518373657e-06, "loss": 17.6249, "step": 31669 }, { "epoch": 0.5789021514614218, "grad_norm": 6.170951507423607, "learning_rate": 3.972787819198241e-06, "loss": 17.5397, "step": 31670 }, { "epoch": 0.5789204306578682, "grad_norm": 5.110003595578107, "learning_rate": 3.9724981236231144e-06, "loss": 16.8721, "step": 31671 }, { "epoch": 0.5789387098543148, "grad_norm": 7.669619640477714, "learning_rate": 3.972208431649292e-06, "loss": 18.0697, "step": 31672 }, { "epoch": 0.5789569890507613, "grad_norm": 8.047175427504019, "learning_rate": 3.9719187432777895e-06, "loss": 17.8454, "step": 31673 }, { "epoch": 0.5789752682472079, "grad_norm": 4.970610525758241, "learning_rate": 3.971629058509624e-06, "loss": 16.8705, "step": 31674 }, { "epoch": 0.5789935474436544, "grad_norm": 6.115584156454426, "learning_rate": 3.971339377345807e-06, "loss": 17.4032, "step": 31675 }, { "epoch": 0.5790118266401009, "grad_norm": 5.743858687359788, "learning_rate": 3.971049699787358e-06, "loss": 17.196, "step": 31676 }, { "epoch": 0.5790301058365475, "grad_norm": 5.467169186486842, "learning_rate": 3.970760025835292e-06, "loss": 16.9757, "step": 31677 }, { "epoch": 0.5790483850329939, "grad_norm": 6.747162905994886, "learning_rate": 3.970470355490619e-06, "loss": 17.5197, "step": 31678 }, { "epoch": 0.5790666642294404, "grad_norm": 5.781585393611601, "learning_rate": 3.9701806887543616e-06, "loss": 17.4481, "step": 31679 }, { "epoch": 0.579084943425887, "grad_norm": 5.610554935888499, "learning_rate": 3.969891025627531e-06, "loss": 17.3471, "step": 31680 }, { "epoch": 0.5791032226223335, "grad_norm": 5.8619000849355976, "learning_rate": 3.96960136611114e-06, "loss": 17.2366, "step": 31681 }, { "epoch": 0.5791215018187801, "grad_norm": 7.948496273141636, "learning_rate": 3.96931171020621e-06, "loss": 18.4472, "step": 31682 }, { "epoch": 0.5791397810152266, "grad_norm": 7.000505878608855, "learning_rate": 3.969022057913752e-06, "loss": 17.6631, "step": 31683 }, { "epoch": 0.579158060211673, "grad_norm": 6.16984677331726, "learning_rate": 3.968732409234781e-06, "loss": 17.4187, "step": 31684 }, { "epoch": 0.5791763394081196, "grad_norm": 6.604819750998503, "learning_rate": 3.968442764170315e-06, "loss": 17.711, "step": 31685 }, { "epoch": 0.5791946186045661, "grad_norm": 7.553958712710885, "learning_rate": 3.968153122721367e-06, "loss": 18.0095, "step": 31686 }, { "epoch": 0.5792128978010127, "grad_norm": 6.948500460143217, "learning_rate": 3.967863484888952e-06, "loss": 17.8759, "step": 31687 }, { "epoch": 0.5792311769974592, "grad_norm": 6.769060167942289, "learning_rate": 3.967573850674088e-06, "loss": 17.7635, "step": 31688 }, { "epoch": 0.5792494561939057, "grad_norm": 6.324939093245341, "learning_rate": 3.967284220077786e-06, "loss": 17.428, "step": 31689 }, { "epoch": 0.5792677353903523, "grad_norm": 5.493859544493164, "learning_rate": 3.966994593101063e-06, "loss": 17.0751, "step": 31690 }, { "epoch": 0.5792860145867987, "grad_norm": 6.870815242429796, "learning_rate": 3.966704969744937e-06, "loss": 17.5552, "step": 31691 }, { "epoch": 0.5793042937832453, "grad_norm": 7.696330094571315, "learning_rate": 3.9664153500104175e-06, "loss": 17.8872, "step": 31692 }, { "epoch": 0.5793225729796918, "grad_norm": 5.25454629415663, "learning_rate": 3.966125733898525e-06, "loss": 17.063, "step": 31693 }, { "epoch": 0.5793408521761383, "grad_norm": 9.30745904960601, "learning_rate": 3.96583612141027e-06, "loss": 18.4035, "step": 31694 }, { "epoch": 0.5793591313725849, "grad_norm": 6.283846710011667, "learning_rate": 3.96554651254667e-06, "loss": 17.3211, "step": 31695 }, { "epoch": 0.5793774105690314, "grad_norm": 6.010393590942668, "learning_rate": 3.965256907308741e-06, "loss": 17.5225, "step": 31696 }, { "epoch": 0.579395689765478, "grad_norm": 6.219229970353257, "learning_rate": 3.9649673056974965e-06, "loss": 17.5172, "step": 31697 }, { "epoch": 0.5794139689619244, "grad_norm": 5.576365679448866, "learning_rate": 3.9646777077139506e-06, "loss": 17.0995, "step": 31698 }, { "epoch": 0.5794322481583709, "grad_norm": 6.641717537650537, "learning_rate": 3.96438811335912e-06, "loss": 17.4209, "step": 31699 }, { "epoch": 0.5794505273548175, "grad_norm": 7.994778938956298, "learning_rate": 3.964098522634018e-06, "loss": 18.6891, "step": 31700 }, { "epoch": 0.579468806551264, "grad_norm": 6.7707179473576335, "learning_rate": 3.963808935539663e-06, "loss": 17.5175, "step": 31701 }, { "epoch": 0.5794870857477106, "grad_norm": 6.734107215771581, "learning_rate": 3.963519352077068e-06, "loss": 17.6317, "step": 31702 }, { "epoch": 0.579505364944157, "grad_norm": 7.173268362962265, "learning_rate": 3.963229772247246e-06, "loss": 17.7917, "step": 31703 }, { "epoch": 0.5795236441406035, "grad_norm": 6.9975900044629205, "learning_rate": 3.962940196051214e-06, "loss": 17.7474, "step": 31704 }, { "epoch": 0.5795419233370501, "grad_norm": 5.845608243474927, "learning_rate": 3.962650623489988e-06, "loss": 17.2377, "step": 31705 }, { "epoch": 0.5795602025334966, "grad_norm": 5.809584098039447, "learning_rate": 3.962361054564579e-06, "loss": 17.1298, "step": 31706 }, { "epoch": 0.5795784817299432, "grad_norm": 6.311527190291705, "learning_rate": 3.962071489276007e-06, "loss": 17.2944, "step": 31707 }, { "epoch": 0.5795967609263897, "grad_norm": 5.796811695779711, "learning_rate": 3.961781927625283e-06, "loss": 17.146, "step": 31708 }, { "epoch": 0.5796150401228362, "grad_norm": 6.9345078578256985, "learning_rate": 3.961492369613422e-06, "loss": 17.738, "step": 31709 }, { "epoch": 0.5796333193192827, "grad_norm": 5.313734116005517, "learning_rate": 3.9612028152414425e-06, "loss": 17.3726, "step": 31710 }, { "epoch": 0.5796515985157292, "grad_norm": 6.005639982757882, "learning_rate": 3.960913264510358e-06, "loss": 17.5306, "step": 31711 }, { "epoch": 0.5796698777121758, "grad_norm": 4.49413555393747, "learning_rate": 3.960623717421179e-06, "loss": 16.6937, "step": 31712 }, { "epoch": 0.5796881569086223, "grad_norm": 8.133378306702905, "learning_rate": 3.960334173974925e-06, "loss": 18.0006, "step": 31713 }, { "epoch": 0.5797064361050688, "grad_norm": 8.587269412376608, "learning_rate": 3.960044634172611e-06, "loss": 18.86, "step": 31714 }, { "epoch": 0.5797247153015154, "grad_norm": 6.82131668451568, "learning_rate": 3.959755098015248e-06, "loss": 17.4025, "step": 31715 }, { "epoch": 0.5797429944979618, "grad_norm": 4.150296881065222, "learning_rate": 3.959465565503855e-06, "loss": 16.7009, "step": 31716 }, { "epoch": 0.5797612736944084, "grad_norm": 5.257893648041154, "learning_rate": 3.959176036639443e-06, "loss": 16.9015, "step": 31717 }, { "epoch": 0.5797795528908549, "grad_norm": 5.8979826358834515, "learning_rate": 3.958886511423029e-06, "loss": 17.1506, "step": 31718 }, { "epoch": 0.5797978320873014, "grad_norm": 8.009150875005206, "learning_rate": 3.958596989855629e-06, "loss": 16.7287, "step": 31719 }, { "epoch": 0.579816111283748, "grad_norm": 6.9536718683751815, "learning_rate": 3.9583074719382545e-06, "loss": 17.4037, "step": 31720 }, { "epoch": 0.5798343904801945, "grad_norm": 8.471378846493407, "learning_rate": 3.958017957671923e-06, "loss": 17.8589, "step": 31721 }, { "epoch": 0.5798526696766411, "grad_norm": 6.028263420572028, "learning_rate": 3.957728447057648e-06, "loss": 17.4181, "step": 31722 }, { "epoch": 0.5798709488730875, "grad_norm": 5.321166172368559, "learning_rate": 3.957438940096443e-06, "loss": 16.8892, "step": 31723 }, { "epoch": 0.579889228069534, "grad_norm": 6.707891979084086, "learning_rate": 3.957149436789326e-06, "loss": 17.4145, "step": 31724 }, { "epoch": 0.5799075072659806, "grad_norm": 5.007690781063575, "learning_rate": 3.956859937137311e-06, "loss": 17.0441, "step": 31725 }, { "epoch": 0.5799257864624271, "grad_norm": 6.147370542186745, "learning_rate": 3.956570441141407e-06, "loss": 17.6436, "step": 31726 }, { "epoch": 0.5799440656588737, "grad_norm": 7.216217109148377, "learning_rate": 3.956280948802636e-06, "loss": 17.5772, "step": 31727 }, { "epoch": 0.5799623448553202, "grad_norm": 4.74318401816773, "learning_rate": 3.95599146012201e-06, "loss": 16.85, "step": 31728 }, { "epoch": 0.5799806240517666, "grad_norm": 6.235507649529821, "learning_rate": 3.955701975100541e-06, "loss": 17.4924, "step": 31729 }, { "epoch": 0.5799989032482132, "grad_norm": 6.233663934206047, "learning_rate": 3.9554124937392495e-06, "loss": 17.3652, "step": 31730 }, { "epoch": 0.5800171824446597, "grad_norm": 6.519566650045359, "learning_rate": 3.955123016039143e-06, "loss": 17.7167, "step": 31731 }, { "epoch": 0.5800354616411063, "grad_norm": 6.613872448391958, "learning_rate": 3.954833542001241e-06, "loss": 17.58, "step": 31732 }, { "epoch": 0.5800537408375528, "grad_norm": 6.5183582486556615, "learning_rate": 3.954544071626557e-06, "loss": 17.2221, "step": 31733 }, { "epoch": 0.5800720200339993, "grad_norm": 6.508605221933741, "learning_rate": 3.954254604916105e-06, "loss": 17.6048, "step": 31734 }, { "epoch": 0.5800902992304459, "grad_norm": 5.549405539605529, "learning_rate": 3.953965141870901e-06, "loss": 17.1684, "step": 31735 }, { "epoch": 0.5801085784268923, "grad_norm": 7.412297058073213, "learning_rate": 3.953675682491957e-06, "loss": 17.5434, "step": 31736 }, { "epoch": 0.5801268576233389, "grad_norm": 8.199412248522636, "learning_rate": 3.953386226780288e-06, "loss": 18.6158, "step": 31737 }, { "epoch": 0.5801451368197854, "grad_norm": 6.4331651740560005, "learning_rate": 3.953096774736912e-06, "loss": 17.1834, "step": 31738 }, { "epoch": 0.5801634160162319, "grad_norm": 7.144277502834422, "learning_rate": 3.952807326362841e-06, "loss": 17.9236, "step": 31739 }, { "epoch": 0.5801816952126785, "grad_norm": 6.379886007118644, "learning_rate": 3.952517881659087e-06, "loss": 17.2644, "step": 31740 }, { "epoch": 0.580199974409125, "grad_norm": 6.688851127480499, "learning_rate": 3.952228440626668e-06, "loss": 17.6805, "step": 31741 }, { "epoch": 0.5802182536055716, "grad_norm": 6.478395954116754, "learning_rate": 3.951939003266599e-06, "loss": 17.3328, "step": 31742 }, { "epoch": 0.580236532802018, "grad_norm": 5.402743935084067, "learning_rate": 3.95164956957989e-06, "loss": 16.9437, "step": 31743 }, { "epoch": 0.5802548119984645, "grad_norm": 7.293275818227497, "learning_rate": 3.951360139567561e-06, "loss": 17.7702, "step": 31744 }, { "epoch": 0.5802730911949111, "grad_norm": 5.325256902265057, "learning_rate": 3.951070713230622e-06, "loss": 17.0375, "step": 31745 }, { "epoch": 0.5802913703913576, "grad_norm": 5.832067663408694, "learning_rate": 3.950781290570088e-06, "loss": 17.3765, "step": 31746 }, { "epoch": 0.5803096495878041, "grad_norm": 6.264413550447617, "learning_rate": 3.950491871586978e-06, "loss": 17.4014, "step": 31747 }, { "epoch": 0.5803279287842507, "grad_norm": 6.40633335988377, "learning_rate": 3.9502024562822995e-06, "loss": 17.4502, "step": 31748 }, { "epoch": 0.5803462079806971, "grad_norm": 6.17962173021901, "learning_rate": 3.949913044657073e-06, "loss": 17.5883, "step": 31749 }, { "epoch": 0.5803644871771437, "grad_norm": 6.1608959105852055, "learning_rate": 3.949623636712309e-06, "loss": 17.4816, "step": 31750 }, { "epoch": 0.5803827663735902, "grad_norm": 6.825082582373906, "learning_rate": 3.949334232449022e-06, "loss": 17.0644, "step": 31751 }, { "epoch": 0.5804010455700367, "grad_norm": 8.359674928030516, "learning_rate": 3.9490448318682286e-06, "loss": 18.0787, "step": 31752 }, { "epoch": 0.5804193247664833, "grad_norm": 6.03048929086176, "learning_rate": 3.948755434970942e-06, "loss": 17.2211, "step": 31753 }, { "epoch": 0.5804376039629298, "grad_norm": 8.545548163881232, "learning_rate": 3.948466041758175e-06, "loss": 18.4085, "step": 31754 }, { "epoch": 0.5804558831593764, "grad_norm": 6.46479992833266, "learning_rate": 3.9481766522309446e-06, "loss": 17.1792, "step": 31755 }, { "epoch": 0.5804741623558228, "grad_norm": 6.2550243511130885, "learning_rate": 3.947887266390265e-06, "loss": 17.5061, "step": 31756 }, { "epoch": 0.5804924415522693, "grad_norm": 6.405330253361258, "learning_rate": 3.947597884237146e-06, "loss": 17.241, "step": 31757 }, { "epoch": 0.5805107207487159, "grad_norm": 6.993388974629709, "learning_rate": 3.947308505772607e-06, "loss": 17.44, "step": 31758 }, { "epoch": 0.5805289999451624, "grad_norm": 6.822981260812312, "learning_rate": 3.94701913099766e-06, "loss": 17.7348, "step": 31759 }, { "epoch": 0.580547279141609, "grad_norm": 7.945539755752843, "learning_rate": 3.946729759913319e-06, "loss": 17.7372, "step": 31760 }, { "epoch": 0.5805655583380555, "grad_norm": 4.938975166325886, "learning_rate": 3.946440392520601e-06, "loss": 16.8648, "step": 31761 }, { "epoch": 0.5805838375345019, "grad_norm": 6.064657055388915, "learning_rate": 3.946151028820514e-06, "loss": 17.4877, "step": 31762 }, { "epoch": 0.5806021167309485, "grad_norm": 4.997074907367173, "learning_rate": 3.945861668814079e-06, "loss": 16.8479, "step": 31763 }, { "epoch": 0.580620395927395, "grad_norm": 6.915273723064949, "learning_rate": 3.945572312502308e-06, "loss": 17.5251, "step": 31764 }, { "epoch": 0.5806386751238416, "grad_norm": 5.720782426097597, "learning_rate": 3.945282959886212e-06, "loss": 17.3404, "step": 31765 }, { "epoch": 0.5806569543202881, "grad_norm": 5.036950700786371, "learning_rate": 3.944993610966811e-06, "loss": 16.8644, "step": 31766 }, { "epoch": 0.5806752335167346, "grad_norm": 6.184046552387806, "learning_rate": 3.944704265745114e-06, "loss": 17.5746, "step": 31767 }, { "epoch": 0.5806935127131811, "grad_norm": 7.120183066554393, "learning_rate": 3.9444149242221355e-06, "loss": 17.6847, "step": 31768 }, { "epoch": 0.5807117919096276, "grad_norm": 6.0318300863238665, "learning_rate": 3.9441255863988936e-06, "loss": 17.1836, "step": 31769 }, { "epoch": 0.5807300711060742, "grad_norm": 5.96348246154688, "learning_rate": 3.9438362522764004e-06, "loss": 17.2541, "step": 31770 }, { "epoch": 0.5807483503025207, "grad_norm": 7.180431135024348, "learning_rate": 3.943546921855667e-06, "loss": 17.8029, "step": 31771 }, { "epoch": 0.5807666294989672, "grad_norm": 5.700774523957405, "learning_rate": 3.94325759513771e-06, "loss": 17.1167, "step": 31772 }, { "epoch": 0.5807849086954138, "grad_norm": 6.907337548835898, "learning_rate": 3.942968272123546e-06, "loss": 17.4757, "step": 31773 }, { "epoch": 0.5808031878918603, "grad_norm": 6.975961614662908, "learning_rate": 3.942678952814184e-06, "loss": 17.4827, "step": 31774 }, { "epoch": 0.5808214670883068, "grad_norm": 7.6642485810412, "learning_rate": 3.942389637210642e-06, "loss": 18.189, "step": 31775 }, { "epoch": 0.5808397462847533, "grad_norm": 6.082948665023564, "learning_rate": 3.9421003253139326e-06, "loss": 17.1439, "step": 31776 }, { "epoch": 0.5808580254811998, "grad_norm": 6.638287058355307, "learning_rate": 3.9418110171250675e-06, "loss": 17.585, "step": 31777 }, { "epoch": 0.5808763046776464, "grad_norm": 6.135866085285024, "learning_rate": 3.941521712645066e-06, "loss": 17.2753, "step": 31778 }, { "epoch": 0.5808945838740929, "grad_norm": 6.706628943806771, "learning_rate": 3.9412324118749355e-06, "loss": 17.6721, "step": 31779 }, { "epoch": 0.5809128630705395, "grad_norm": 7.193412439581141, "learning_rate": 3.940943114815697e-06, "loss": 17.6364, "step": 31780 }, { "epoch": 0.580931142266986, "grad_norm": 5.199337236964431, "learning_rate": 3.9406538214683596e-06, "loss": 16.8961, "step": 31781 }, { "epoch": 0.5809494214634324, "grad_norm": 6.779123113269031, "learning_rate": 3.940364531833938e-06, "loss": 17.3347, "step": 31782 }, { "epoch": 0.580967700659879, "grad_norm": 5.031814765132674, "learning_rate": 3.940075245913448e-06, "loss": 16.9592, "step": 31783 }, { "epoch": 0.5809859798563255, "grad_norm": 5.527044789643518, "learning_rate": 3.939785963707903e-06, "loss": 17.0917, "step": 31784 }, { "epoch": 0.5810042590527721, "grad_norm": 6.304561302901436, "learning_rate": 3.939496685218313e-06, "loss": 17.3609, "step": 31785 }, { "epoch": 0.5810225382492186, "grad_norm": 7.351708539542756, "learning_rate": 3.939207410445698e-06, "loss": 17.5383, "step": 31786 }, { "epoch": 0.581040817445665, "grad_norm": 5.556323698090615, "learning_rate": 3.938918139391068e-06, "loss": 17.1165, "step": 31787 }, { "epoch": 0.5810590966421116, "grad_norm": 5.887007571657293, "learning_rate": 3.938628872055437e-06, "loss": 17.0938, "step": 31788 }, { "epoch": 0.5810773758385581, "grad_norm": 6.757026524683898, "learning_rate": 3.938339608439822e-06, "loss": 17.4817, "step": 31789 }, { "epoch": 0.5810956550350047, "grad_norm": 6.197729112499964, "learning_rate": 3.938050348545233e-06, "loss": 17.3905, "step": 31790 }, { "epoch": 0.5811139342314512, "grad_norm": 7.720721868435249, "learning_rate": 3.937761092372684e-06, "loss": 17.9293, "step": 31791 }, { "epoch": 0.5811322134278977, "grad_norm": 6.895683034101999, "learning_rate": 3.937471839923192e-06, "loss": 17.7548, "step": 31792 }, { "epoch": 0.5811504926243443, "grad_norm": 6.692338160157974, "learning_rate": 3.937182591197768e-06, "loss": 17.5093, "step": 31793 }, { "epoch": 0.5811687718207907, "grad_norm": 11.678917809871756, "learning_rate": 3.936893346197429e-06, "loss": 17.6257, "step": 31794 }, { "epoch": 0.5811870510172373, "grad_norm": 6.199508455682219, "learning_rate": 3.936604104923184e-06, "loss": 17.1472, "step": 31795 }, { "epoch": 0.5812053302136838, "grad_norm": 5.4029045411314, "learning_rate": 3.936314867376049e-06, "loss": 17.0407, "step": 31796 }, { "epoch": 0.5812236094101303, "grad_norm": 5.404555084932533, "learning_rate": 3.9360256335570404e-06, "loss": 16.9607, "step": 31797 }, { "epoch": 0.5812418886065769, "grad_norm": 5.860580346602233, "learning_rate": 3.93573640346717e-06, "loss": 17.365, "step": 31798 }, { "epoch": 0.5812601678030234, "grad_norm": 9.135068728347326, "learning_rate": 3.935447177107449e-06, "loss": 18.3539, "step": 31799 }, { "epoch": 0.58127844699947, "grad_norm": 7.584862261992962, "learning_rate": 3.9351579544788936e-06, "loss": 18.0024, "step": 31800 }, { "epoch": 0.5812967261959164, "grad_norm": 5.437686255587911, "learning_rate": 3.934868735582519e-06, "loss": 17.0507, "step": 31801 }, { "epoch": 0.5813150053923629, "grad_norm": 7.457024443628506, "learning_rate": 3.9345795204193345e-06, "loss": 17.5184, "step": 31802 }, { "epoch": 0.5813332845888095, "grad_norm": 5.535802659528759, "learning_rate": 3.934290308990358e-06, "loss": 17.2174, "step": 31803 }, { "epoch": 0.581351563785256, "grad_norm": 5.787606251876502, "learning_rate": 3.934001101296601e-06, "loss": 17.2612, "step": 31804 }, { "epoch": 0.5813698429817026, "grad_norm": 6.3846796385067695, "learning_rate": 3.933711897339077e-06, "loss": 17.4606, "step": 31805 }, { "epoch": 0.5813881221781491, "grad_norm": 7.5104026197284925, "learning_rate": 3.933422697118801e-06, "loss": 17.4848, "step": 31806 }, { "epoch": 0.5814064013745955, "grad_norm": 5.380314688096574, "learning_rate": 3.9331335006367855e-06, "loss": 17.1678, "step": 31807 }, { "epoch": 0.5814246805710421, "grad_norm": 6.685782980526882, "learning_rate": 3.9328443078940456e-06, "loss": 17.5691, "step": 31808 }, { "epoch": 0.5814429597674886, "grad_norm": 6.365573902424248, "learning_rate": 3.932555118891593e-06, "loss": 17.4322, "step": 31809 }, { "epoch": 0.5814612389639352, "grad_norm": 6.999883088079117, "learning_rate": 3.932265933630441e-06, "loss": 18.2598, "step": 31810 }, { "epoch": 0.5814795181603817, "grad_norm": 7.989819457588124, "learning_rate": 3.931976752111606e-06, "loss": 17.9436, "step": 31811 }, { "epoch": 0.5814977973568282, "grad_norm": 4.971022890013863, "learning_rate": 3.931687574336099e-06, "loss": 16.8932, "step": 31812 }, { "epoch": 0.5815160765532748, "grad_norm": 7.100474430732092, "learning_rate": 3.931398400304935e-06, "loss": 18.2559, "step": 31813 }, { "epoch": 0.5815343557497212, "grad_norm": 6.595645325463331, "learning_rate": 3.931109230019126e-06, "loss": 17.7627, "step": 31814 }, { "epoch": 0.5815526349461677, "grad_norm": 5.865663873788561, "learning_rate": 3.930820063479688e-06, "loss": 17.0943, "step": 31815 }, { "epoch": 0.5815709141426143, "grad_norm": 5.575394677636512, "learning_rate": 3.93053090068763e-06, "loss": 17.2181, "step": 31816 }, { "epoch": 0.5815891933390608, "grad_norm": 6.998173899491816, "learning_rate": 3.93024174164397e-06, "loss": 17.4199, "step": 31817 }, { "epoch": 0.5816074725355074, "grad_norm": 5.923956752080834, "learning_rate": 3.9299525863497215e-06, "loss": 17.4807, "step": 31818 }, { "epoch": 0.5816257517319539, "grad_norm": 5.953184155619152, "learning_rate": 3.929663434805893e-06, "loss": 17.1007, "step": 31819 }, { "epoch": 0.5816440309284003, "grad_norm": 6.222180424193549, "learning_rate": 3.929374287013505e-06, "loss": 17.1633, "step": 31820 }, { "epoch": 0.5816623101248469, "grad_norm": 5.805733191305759, "learning_rate": 3.929085142973564e-06, "loss": 17.4099, "step": 31821 }, { "epoch": 0.5816805893212934, "grad_norm": 7.505205876811254, "learning_rate": 3.928796002687088e-06, "loss": 17.7656, "step": 31822 }, { "epoch": 0.58169886851774, "grad_norm": 7.312693634408364, "learning_rate": 3.928506866155089e-06, "loss": 17.9058, "step": 31823 }, { "epoch": 0.5817171477141865, "grad_norm": 5.98664313752073, "learning_rate": 3.9282177333785795e-06, "loss": 17.3764, "step": 31824 }, { "epoch": 0.581735426910633, "grad_norm": 6.287700230083683, "learning_rate": 3.9279286043585754e-06, "loss": 17.2372, "step": 31825 }, { "epoch": 0.5817537061070795, "grad_norm": 7.3977768081362285, "learning_rate": 3.927639479096088e-06, "loss": 17.226, "step": 31826 }, { "epoch": 0.581771985303526, "grad_norm": 6.035872625221642, "learning_rate": 3.92735035759213e-06, "loss": 17.1065, "step": 31827 }, { "epoch": 0.5817902644999726, "grad_norm": 5.840934773911729, "learning_rate": 3.927061239847718e-06, "loss": 17.3194, "step": 31828 }, { "epoch": 0.5818085436964191, "grad_norm": 7.677464812418289, "learning_rate": 3.926772125863863e-06, "loss": 17.568, "step": 31829 }, { "epoch": 0.5818268228928656, "grad_norm": 5.809331928225405, "learning_rate": 3.926483015641577e-06, "loss": 17.3273, "step": 31830 }, { "epoch": 0.5818451020893122, "grad_norm": 6.277336761136782, "learning_rate": 3.926193909181875e-06, "loss": 17.2591, "step": 31831 }, { "epoch": 0.5818633812857587, "grad_norm": 6.7572513864122845, "learning_rate": 3.9259048064857715e-06, "loss": 17.4277, "step": 31832 }, { "epoch": 0.5818816604822052, "grad_norm": 5.7505717612166585, "learning_rate": 3.925615707554277e-06, "loss": 17.0929, "step": 31833 }, { "epoch": 0.5818999396786517, "grad_norm": 6.355848594078621, "learning_rate": 3.9253266123884085e-06, "loss": 17.5376, "step": 31834 }, { "epoch": 0.5819182188750982, "grad_norm": 6.709293849041352, "learning_rate": 3.925037520989175e-06, "loss": 17.4504, "step": 31835 }, { "epoch": 0.5819364980715448, "grad_norm": 5.786128729236517, "learning_rate": 3.924748433357591e-06, "loss": 17.0186, "step": 31836 }, { "epoch": 0.5819547772679913, "grad_norm": 6.050341475203307, "learning_rate": 3.9244593494946724e-06, "loss": 17.1752, "step": 31837 }, { "epoch": 0.5819730564644379, "grad_norm": 6.444552287415581, "learning_rate": 3.9241702694014285e-06, "loss": 17.2514, "step": 31838 }, { "epoch": 0.5819913356608843, "grad_norm": 6.371699579899863, "learning_rate": 3.923881193078877e-06, "loss": 17.4053, "step": 31839 }, { "epoch": 0.5820096148573308, "grad_norm": 7.542706169733623, "learning_rate": 3.923592120528027e-06, "loss": 17.6427, "step": 31840 }, { "epoch": 0.5820278940537774, "grad_norm": 5.620436360991288, "learning_rate": 3.923303051749891e-06, "loss": 17.0476, "step": 31841 }, { "epoch": 0.5820461732502239, "grad_norm": 5.370774937332055, "learning_rate": 3.923013986745489e-06, "loss": 17.2105, "step": 31842 }, { "epoch": 0.5820644524466705, "grad_norm": 6.62460764040236, "learning_rate": 3.922724925515828e-06, "loss": 17.2544, "step": 31843 }, { "epoch": 0.582082731643117, "grad_norm": 6.021094057010248, "learning_rate": 3.92243586806192e-06, "loss": 17.3733, "step": 31844 }, { "epoch": 0.5821010108395634, "grad_norm": 8.742904360072096, "learning_rate": 3.922146814384783e-06, "loss": 18.1378, "step": 31845 }, { "epoch": 0.58211929003601, "grad_norm": 6.947599044991676, "learning_rate": 3.921857764485428e-06, "loss": 17.8459, "step": 31846 }, { "epoch": 0.5821375692324565, "grad_norm": 6.167864888099273, "learning_rate": 3.921568718364866e-06, "loss": 17.4892, "step": 31847 }, { "epoch": 0.5821558484289031, "grad_norm": 6.423937793236162, "learning_rate": 3.9212796760241155e-06, "loss": 17.4496, "step": 31848 }, { "epoch": 0.5821741276253496, "grad_norm": 6.123968924508376, "learning_rate": 3.920990637464183e-06, "loss": 16.9594, "step": 31849 }, { "epoch": 0.5821924068217961, "grad_norm": 5.776302088210786, "learning_rate": 3.920701602686086e-06, "loss": 17.1403, "step": 31850 }, { "epoch": 0.5822106860182427, "grad_norm": 6.560203535410062, "learning_rate": 3.920412571690837e-06, "loss": 17.4094, "step": 31851 }, { "epoch": 0.5822289652146891, "grad_norm": 5.927184662268967, "learning_rate": 3.920123544479448e-06, "loss": 17.1402, "step": 31852 }, { "epoch": 0.5822472444111357, "grad_norm": 6.435730513705744, "learning_rate": 3.9198345210529306e-06, "loss": 17.3692, "step": 31853 }, { "epoch": 0.5822655236075822, "grad_norm": 7.155812442854809, "learning_rate": 3.919545501412301e-06, "loss": 17.7206, "step": 31854 }, { "epoch": 0.5822838028040287, "grad_norm": 5.813384385910134, "learning_rate": 3.919256485558569e-06, "loss": 17.4002, "step": 31855 }, { "epoch": 0.5823020820004753, "grad_norm": 7.141013296151906, "learning_rate": 3.9189674734927506e-06, "loss": 17.8338, "step": 31856 }, { "epoch": 0.5823203611969218, "grad_norm": 4.804216810930371, "learning_rate": 3.918678465215858e-06, "loss": 16.8049, "step": 31857 }, { "epoch": 0.5823386403933684, "grad_norm": 5.971921499247589, "learning_rate": 3.918389460728902e-06, "loss": 17.2393, "step": 31858 }, { "epoch": 0.5823569195898148, "grad_norm": 7.137291662025654, "learning_rate": 3.918100460032897e-06, "loss": 17.5842, "step": 31859 }, { "epoch": 0.5823751987862613, "grad_norm": 5.9297239301232745, "learning_rate": 3.917811463128859e-06, "loss": 17.1508, "step": 31860 }, { "epoch": 0.5823934779827079, "grad_norm": 6.001122639190586, "learning_rate": 3.917522470017794e-06, "loss": 17.1054, "step": 31861 }, { "epoch": 0.5824117571791544, "grad_norm": 6.416348958276474, "learning_rate": 3.917233480700721e-06, "loss": 17.6358, "step": 31862 }, { "epoch": 0.582430036375601, "grad_norm": 6.406734566641075, "learning_rate": 3.916944495178651e-06, "loss": 17.2215, "step": 31863 }, { "epoch": 0.5824483155720475, "grad_norm": 6.4318169842493305, "learning_rate": 3.916655513452594e-06, "loss": 17.4739, "step": 31864 }, { "epoch": 0.5824665947684939, "grad_norm": 6.760758950695435, "learning_rate": 3.916366535523569e-06, "loss": 17.4369, "step": 31865 }, { "epoch": 0.5824848739649405, "grad_norm": 9.567410133098441, "learning_rate": 3.9160775613925836e-06, "loss": 17.8386, "step": 31866 }, { "epoch": 0.582503153161387, "grad_norm": 6.835187883188999, "learning_rate": 3.915788591060652e-06, "loss": 17.8434, "step": 31867 }, { "epoch": 0.5825214323578336, "grad_norm": 5.971919150936089, "learning_rate": 3.915499624528787e-06, "loss": 17.3441, "step": 31868 }, { "epoch": 0.5825397115542801, "grad_norm": 5.641037414943666, "learning_rate": 3.9152106617980014e-06, "loss": 16.9751, "step": 31869 }, { "epoch": 0.5825579907507266, "grad_norm": 6.642115004577191, "learning_rate": 3.91492170286931e-06, "loss": 17.4375, "step": 31870 }, { "epoch": 0.5825762699471732, "grad_norm": 5.174654554803288, "learning_rate": 3.914632747743724e-06, "loss": 16.972, "step": 31871 }, { "epoch": 0.5825945491436196, "grad_norm": 7.15001343010463, "learning_rate": 3.9143437964222535e-06, "loss": 17.8747, "step": 31872 }, { "epoch": 0.5826128283400662, "grad_norm": 6.906564279055644, "learning_rate": 3.914054848905915e-06, "loss": 17.4346, "step": 31873 }, { "epoch": 0.5826311075365127, "grad_norm": 6.003611750974434, "learning_rate": 3.913765905195721e-06, "loss": 17.5003, "step": 31874 }, { "epoch": 0.5826493867329592, "grad_norm": 5.84446423942493, "learning_rate": 3.913476965292682e-06, "loss": 17.3163, "step": 31875 }, { "epoch": 0.5826676659294058, "grad_norm": 6.181584666119825, "learning_rate": 3.913188029197813e-06, "loss": 17.2873, "step": 31876 }, { "epoch": 0.5826859451258523, "grad_norm": 6.8139135298714715, "learning_rate": 3.912899096912125e-06, "loss": 17.733, "step": 31877 }, { "epoch": 0.5827042243222988, "grad_norm": 6.898379009498769, "learning_rate": 3.912610168436631e-06, "loss": 17.4052, "step": 31878 }, { "epoch": 0.5827225035187453, "grad_norm": 8.156620362288841, "learning_rate": 3.912321243772345e-06, "loss": 18.5384, "step": 31879 }, { "epoch": 0.5827407827151918, "grad_norm": 6.37167873473521, "learning_rate": 3.9120323229202786e-06, "loss": 17.1785, "step": 31880 }, { "epoch": 0.5827590619116384, "grad_norm": 5.700129288658293, "learning_rate": 3.911743405881444e-06, "loss": 17.1487, "step": 31881 }, { "epoch": 0.5827773411080849, "grad_norm": 6.366844243891693, "learning_rate": 3.911454492656854e-06, "loss": 17.4476, "step": 31882 }, { "epoch": 0.5827956203045314, "grad_norm": 8.540771880739035, "learning_rate": 3.911165583247523e-06, "loss": 17.7487, "step": 31883 }, { "epoch": 0.582813899500978, "grad_norm": 6.171573781688739, "learning_rate": 3.910876677654459e-06, "loss": 17.1936, "step": 31884 }, { "epoch": 0.5828321786974244, "grad_norm": 6.754280406770549, "learning_rate": 3.91058777587868e-06, "loss": 17.4804, "step": 31885 }, { "epoch": 0.582850457893871, "grad_norm": 6.008761705904559, "learning_rate": 3.910298877921196e-06, "loss": 17.2663, "step": 31886 }, { "epoch": 0.5828687370903175, "grad_norm": 7.148054778964438, "learning_rate": 3.91000998378302e-06, "loss": 17.7554, "step": 31887 }, { "epoch": 0.582887016286764, "grad_norm": 6.114411436214558, "learning_rate": 3.909721093465165e-06, "loss": 17.5362, "step": 31888 }, { "epoch": 0.5829052954832106, "grad_norm": 6.632774726594008, "learning_rate": 3.909432206968642e-06, "loss": 17.57, "step": 31889 }, { "epoch": 0.582923574679657, "grad_norm": 8.560993587543791, "learning_rate": 3.909143324294465e-06, "loss": 17.876, "step": 31890 }, { "epoch": 0.5829418538761036, "grad_norm": 5.3041700417243725, "learning_rate": 3.908854445443646e-06, "loss": 17.1019, "step": 31891 }, { "epoch": 0.5829601330725501, "grad_norm": 5.708177951931137, "learning_rate": 3.908565570417196e-06, "loss": 17.0725, "step": 31892 }, { "epoch": 0.5829784122689966, "grad_norm": 6.449768444701923, "learning_rate": 3.908276699216131e-06, "loss": 17.6637, "step": 31893 }, { "epoch": 0.5829966914654432, "grad_norm": 6.283680229080904, "learning_rate": 3.907987831841461e-06, "loss": 17.2306, "step": 31894 }, { "epoch": 0.5830149706618897, "grad_norm": 6.515095770098246, "learning_rate": 3.907698968294198e-06, "loss": 17.8697, "step": 31895 }, { "epoch": 0.5830332498583363, "grad_norm": 6.618285991194156, "learning_rate": 3.907410108575356e-06, "loss": 17.4893, "step": 31896 }, { "epoch": 0.5830515290547827, "grad_norm": 7.3972426688335675, "learning_rate": 3.907121252685948e-06, "loss": 17.8701, "step": 31897 }, { "epoch": 0.5830698082512292, "grad_norm": 8.295387937478242, "learning_rate": 3.906832400626983e-06, "loss": 18.1859, "step": 31898 }, { "epoch": 0.5830880874476758, "grad_norm": 5.005978596437973, "learning_rate": 3.906543552399476e-06, "loss": 16.7909, "step": 31899 }, { "epoch": 0.5831063666441223, "grad_norm": 6.92757824977543, "learning_rate": 3.906254708004438e-06, "loss": 17.7381, "step": 31900 }, { "epoch": 0.5831246458405689, "grad_norm": 6.756472264204812, "learning_rate": 3.905965867442885e-06, "loss": 17.5924, "step": 31901 }, { "epoch": 0.5831429250370154, "grad_norm": 8.453767496753137, "learning_rate": 3.905677030715826e-06, "loss": 18.2632, "step": 31902 }, { "epoch": 0.5831612042334618, "grad_norm": 6.242545448389636, "learning_rate": 3.9053881978242725e-06, "loss": 17.3902, "step": 31903 }, { "epoch": 0.5831794834299084, "grad_norm": 6.863803060928874, "learning_rate": 3.905099368769239e-06, "loss": 18.063, "step": 31904 }, { "epoch": 0.5831977626263549, "grad_norm": 6.729125067426327, "learning_rate": 3.9048105435517396e-06, "loss": 17.6579, "step": 31905 }, { "epoch": 0.5832160418228015, "grad_norm": 5.781424553642698, "learning_rate": 3.904521722172781e-06, "loss": 17.1687, "step": 31906 }, { "epoch": 0.583234321019248, "grad_norm": 4.946343476480079, "learning_rate": 3.9042329046333805e-06, "loss": 17.0971, "step": 31907 }, { "epoch": 0.5832526002156945, "grad_norm": 7.829367494198708, "learning_rate": 3.903944090934548e-06, "loss": 18.0785, "step": 31908 }, { "epoch": 0.5832708794121411, "grad_norm": 5.601991439233245, "learning_rate": 3.903655281077295e-06, "loss": 17.1482, "step": 31909 }, { "epoch": 0.5832891586085875, "grad_norm": 7.354792240755097, "learning_rate": 3.9033664750626375e-06, "loss": 18.0797, "step": 31910 }, { "epoch": 0.5833074378050341, "grad_norm": 8.669203846883866, "learning_rate": 3.903077672891585e-06, "loss": 17.9708, "step": 31911 }, { "epoch": 0.5833257170014806, "grad_norm": 6.232789206307293, "learning_rate": 3.902788874565148e-06, "loss": 17.5063, "step": 31912 }, { "epoch": 0.5833439961979271, "grad_norm": 6.388856131140002, "learning_rate": 3.902500080084342e-06, "loss": 17.7879, "step": 31913 }, { "epoch": 0.5833622753943737, "grad_norm": 6.914774995802759, "learning_rate": 3.902211289450179e-06, "loss": 17.3903, "step": 31914 }, { "epoch": 0.5833805545908202, "grad_norm": 5.826752306352678, "learning_rate": 3.9019225026636685e-06, "loss": 17.3844, "step": 31915 }, { "epoch": 0.5833988337872668, "grad_norm": 7.84168126534693, "learning_rate": 3.901633719725826e-06, "loss": 17.7191, "step": 31916 }, { "epoch": 0.5834171129837132, "grad_norm": 7.827224811818855, "learning_rate": 3.901344940637659e-06, "loss": 17.7048, "step": 31917 }, { "epoch": 0.5834353921801597, "grad_norm": 7.782262630941862, "learning_rate": 3.901056165400186e-06, "loss": 18.182, "step": 31918 }, { "epoch": 0.5834536713766063, "grad_norm": 4.683469385350057, "learning_rate": 3.900767394014415e-06, "loss": 16.8712, "step": 31919 }, { "epoch": 0.5834719505730528, "grad_norm": 6.245852450965167, "learning_rate": 3.900478626481357e-06, "loss": 17.3787, "step": 31920 }, { "epoch": 0.5834902297694994, "grad_norm": 6.566299997637966, "learning_rate": 3.900189862802029e-06, "loss": 17.4909, "step": 31921 }, { "epoch": 0.5835085089659459, "grad_norm": 7.2372061183103975, "learning_rate": 3.8999011029774384e-06, "loss": 17.6831, "step": 31922 }, { "epoch": 0.5835267881623923, "grad_norm": 7.36747858182787, "learning_rate": 3.899612347008597e-06, "loss": 17.8442, "step": 31923 }, { "epoch": 0.5835450673588389, "grad_norm": 8.47801061509117, "learning_rate": 3.8993235948965225e-06, "loss": 18.0859, "step": 31924 }, { "epoch": 0.5835633465552854, "grad_norm": 5.509242793289513, "learning_rate": 3.899034846642222e-06, "loss": 17.0437, "step": 31925 }, { "epoch": 0.583581625751732, "grad_norm": 5.943762235499635, "learning_rate": 3.898746102246708e-06, "loss": 17.395, "step": 31926 }, { "epoch": 0.5835999049481785, "grad_norm": 6.015308803795167, "learning_rate": 3.898457361710993e-06, "loss": 17.4007, "step": 31927 }, { "epoch": 0.583618184144625, "grad_norm": 5.752045326278618, "learning_rate": 3.8981686250360915e-06, "loss": 17.4123, "step": 31928 }, { "epoch": 0.5836364633410716, "grad_norm": 7.256757448094897, "learning_rate": 3.897879892223011e-06, "loss": 17.7711, "step": 31929 }, { "epoch": 0.583654742537518, "grad_norm": 5.6089053869908785, "learning_rate": 3.897591163272768e-06, "loss": 17.1129, "step": 31930 }, { "epoch": 0.5836730217339646, "grad_norm": 6.600457645189618, "learning_rate": 3.89730243818637e-06, "loss": 17.3062, "step": 31931 }, { "epoch": 0.5836913009304111, "grad_norm": 8.525013531909144, "learning_rate": 3.897013716964833e-06, "loss": 17.6931, "step": 31932 }, { "epoch": 0.5837095801268576, "grad_norm": 5.6100990144807135, "learning_rate": 3.8967249996091675e-06, "loss": 17.3334, "step": 31933 }, { "epoch": 0.5837278593233042, "grad_norm": 5.0500806476648465, "learning_rate": 3.896436286120383e-06, "loss": 16.8416, "step": 31934 }, { "epoch": 0.5837461385197507, "grad_norm": 6.390517518070096, "learning_rate": 3.896147576499496e-06, "loss": 17.5308, "step": 31935 }, { "epoch": 0.5837644177161972, "grad_norm": 6.182690317451296, "learning_rate": 3.8958588707475145e-06, "loss": 17.278, "step": 31936 }, { "epoch": 0.5837826969126437, "grad_norm": 5.8876167555294785, "learning_rate": 3.8955701688654515e-06, "loss": 17.145, "step": 31937 }, { "epoch": 0.5838009761090902, "grad_norm": 6.79656334912701, "learning_rate": 3.8952814708543205e-06, "loss": 17.7506, "step": 31938 }, { "epoch": 0.5838192553055368, "grad_norm": 7.797599187531006, "learning_rate": 3.894992776715132e-06, "loss": 17.9358, "step": 31939 }, { "epoch": 0.5838375345019833, "grad_norm": 6.949473419613126, "learning_rate": 3.894704086448897e-06, "loss": 17.6335, "step": 31940 }, { "epoch": 0.5838558136984299, "grad_norm": 4.777759238694196, "learning_rate": 3.894415400056629e-06, "loss": 16.8126, "step": 31941 }, { "epoch": 0.5838740928948764, "grad_norm": 7.06482612728809, "learning_rate": 3.89412671753934e-06, "loss": 17.8936, "step": 31942 }, { "epoch": 0.5838923720913228, "grad_norm": 5.9388206423936625, "learning_rate": 3.893838038898038e-06, "loss": 17.1992, "step": 31943 }, { "epoch": 0.5839106512877694, "grad_norm": 6.030141561959805, "learning_rate": 3.893549364133739e-06, "loss": 17.6194, "step": 31944 }, { "epoch": 0.5839289304842159, "grad_norm": 5.836637042651911, "learning_rate": 3.893260693247455e-06, "loss": 17.3101, "step": 31945 }, { "epoch": 0.5839472096806625, "grad_norm": 7.08981676401543, "learning_rate": 3.892972026240194e-06, "loss": 17.8268, "step": 31946 }, { "epoch": 0.583965488877109, "grad_norm": 5.419889155614884, "learning_rate": 3.892683363112972e-06, "loss": 16.9389, "step": 31947 }, { "epoch": 0.5839837680735555, "grad_norm": 7.989762957743619, "learning_rate": 3.892394703866796e-06, "loss": 18.1422, "step": 31948 }, { "epoch": 0.584002047270002, "grad_norm": 6.849050733124317, "learning_rate": 3.892106048502683e-06, "loss": 17.6443, "step": 31949 }, { "epoch": 0.5840203264664485, "grad_norm": 7.026220944554633, "learning_rate": 3.891817397021643e-06, "loss": 17.7766, "step": 31950 }, { "epoch": 0.584038605662895, "grad_norm": 5.931012928292172, "learning_rate": 3.891528749424684e-06, "loss": 17.1752, "step": 31951 }, { "epoch": 0.5840568848593416, "grad_norm": 6.042894635152102, "learning_rate": 3.891240105712822e-06, "loss": 17.4996, "step": 31952 }, { "epoch": 0.5840751640557881, "grad_norm": 6.661315636206057, "learning_rate": 3.890951465887066e-06, "loss": 17.9241, "step": 31953 }, { "epoch": 0.5840934432522347, "grad_norm": 6.043340960256849, "learning_rate": 3.890662829948429e-06, "loss": 17.4651, "step": 31954 }, { "epoch": 0.5841117224486811, "grad_norm": 7.5806110551616515, "learning_rate": 3.890374197897925e-06, "loss": 18.3408, "step": 31955 }, { "epoch": 0.5841300016451276, "grad_norm": 6.38327605740815, "learning_rate": 3.890085569736563e-06, "loss": 17.7901, "step": 31956 }, { "epoch": 0.5841482808415742, "grad_norm": 4.902549931471664, "learning_rate": 3.889796945465352e-06, "loss": 16.7152, "step": 31957 }, { "epoch": 0.5841665600380207, "grad_norm": 6.4071021572571905, "learning_rate": 3.8895083250853075e-06, "loss": 17.1364, "step": 31958 }, { "epoch": 0.5841848392344673, "grad_norm": 6.011953516661125, "learning_rate": 3.88921970859744e-06, "loss": 17.404, "step": 31959 }, { "epoch": 0.5842031184309138, "grad_norm": 4.683241264305121, "learning_rate": 3.888931096002761e-06, "loss": 16.804, "step": 31960 }, { "epoch": 0.5842213976273603, "grad_norm": 6.859389652395183, "learning_rate": 3.888642487302283e-06, "loss": 17.3036, "step": 31961 }, { "epoch": 0.5842396768238068, "grad_norm": 6.0802988742707536, "learning_rate": 3.8883538824970145e-06, "loss": 17.4756, "step": 31962 }, { "epoch": 0.5842579560202533, "grad_norm": 7.817530645231598, "learning_rate": 3.88806528158797e-06, "loss": 18.3739, "step": 31963 }, { "epoch": 0.5842762352166999, "grad_norm": 9.119182222312302, "learning_rate": 3.887776684576162e-06, "loss": 18.8536, "step": 31964 }, { "epoch": 0.5842945144131464, "grad_norm": 6.946471727699373, "learning_rate": 3.887488091462598e-06, "loss": 17.5185, "step": 31965 }, { "epoch": 0.5843127936095929, "grad_norm": 6.0382085460206945, "learning_rate": 3.887199502248294e-06, "loss": 17.0668, "step": 31966 }, { "epoch": 0.5843310728060395, "grad_norm": 5.56099633934854, "learning_rate": 3.886910916934258e-06, "loss": 17.0867, "step": 31967 }, { "epoch": 0.5843493520024859, "grad_norm": 9.568994511279021, "learning_rate": 3.886622335521501e-06, "loss": 18.686, "step": 31968 }, { "epoch": 0.5843676311989325, "grad_norm": 5.755456972737238, "learning_rate": 3.886333758011038e-06, "loss": 17.166, "step": 31969 }, { "epoch": 0.584385910395379, "grad_norm": 7.772363040844001, "learning_rate": 3.88604518440388e-06, "loss": 17.8396, "step": 31970 }, { "epoch": 0.5844041895918255, "grad_norm": 6.93451677386369, "learning_rate": 3.885756614701034e-06, "loss": 17.9725, "step": 31971 }, { "epoch": 0.5844224687882721, "grad_norm": 5.67987088475987, "learning_rate": 3.885468048903515e-06, "loss": 17.3484, "step": 31972 }, { "epoch": 0.5844407479847186, "grad_norm": 6.302418379537619, "learning_rate": 3.885179487012336e-06, "loss": 17.1028, "step": 31973 }, { "epoch": 0.5844590271811652, "grad_norm": 6.582448986165934, "learning_rate": 3.884890929028502e-06, "loss": 17.5073, "step": 31974 }, { "epoch": 0.5844773063776116, "grad_norm": 6.1146025391841885, "learning_rate": 3.884602374953033e-06, "loss": 17.3365, "step": 31975 }, { "epoch": 0.5844955855740581, "grad_norm": 6.578592805173062, "learning_rate": 3.884313824786932e-06, "loss": 17.679, "step": 31976 }, { "epoch": 0.5845138647705047, "grad_norm": 6.518520782065172, "learning_rate": 3.884025278531216e-06, "loss": 17.4502, "step": 31977 }, { "epoch": 0.5845321439669512, "grad_norm": 5.486003411138159, "learning_rate": 3.883736736186896e-06, "loss": 17.1051, "step": 31978 }, { "epoch": 0.5845504231633978, "grad_norm": 7.712178051953678, "learning_rate": 3.883448197754979e-06, "loss": 18.0228, "step": 31979 }, { "epoch": 0.5845687023598443, "grad_norm": 6.77774231407659, "learning_rate": 3.883159663236482e-06, "loss": 17.4799, "step": 31980 }, { "epoch": 0.5845869815562907, "grad_norm": 7.355590771206892, "learning_rate": 3.882871132632412e-06, "loss": 17.8408, "step": 31981 }, { "epoch": 0.5846052607527373, "grad_norm": 6.922735143399839, "learning_rate": 3.88258260594378e-06, "loss": 17.6125, "step": 31982 }, { "epoch": 0.5846235399491838, "grad_norm": 5.327924010444573, "learning_rate": 3.882294083171602e-06, "loss": 17.0008, "step": 31983 }, { "epoch": 0.5846418191456304, "grad_norm": 5.347384565802102, "learning_rate": 3.882005564316887e-06, "loss": 17.2086, "step": 31984 }, { "epoch": 0.5846600983420769, "grad_norm": 6.219703452237302, "learning_rate": 3.881717049380642e-06, "loss": 17.3451, "step": 31985 }, { "epoch": 0.5846783775385234, "grad_norm": 5.901018633398325, "learning_rate": 3.8814285383638834e-06, "loss": 17.3379, "step": 31986 }, { "epoch": 0.58469665673497, "grad_norm": 7.72100620781001, "learning_rate": 3.881140031267621e-06, "loss": 17.7539, "step": 31987 }, { "epoch": 0.5847149359314164, "grad_norm": 6.325071208178365, "learning_rate": 3.880851528092864e-06, "loss": 17.3497, "step": 31988 }, { "epoch": 0.584733215127863, "grad_norm": 6.170587003120424, "learning_rate": 3.880563028840628e-06, "loss": 17.3669, "step": 31989 }, { "epoch": 0.5847514943243095, "grad_norm": 6.104315382406103, "learning_rate": 3.88027453351192e-06, "loss": 17.1202, "step": 31990 }, { "epoch": 0.584769773520756, "grad_norm": 5.430643530059144, "learning_rate": 3.879986042107752e-06, "loss": 17.1391, "step": 31991 }, { "epoch": 0.5847880527172026, "grad_norm": 5.7990065073954185, "learning_rate": 3.879697554629137e-06, "loss": 17.2715, "step": 31992 }, { "epoch": 0.5848063319136491, "grad_norm": 7.170326479717112, "learning_rate": 3.879409071077083e-06, "loss": 17.7494, "step": 31993 }, { "epoch": 0.5848246111100956, "grad_norm": 5.366708862744839, "learning_rate": 3.879120591452605e-06, "loss": 17.252, "step": 31994 }, { "epoch": 0.5848428903065421, "grad_norm": 6.340848267617345, "learning_rate": 3.878832115756711e-06, "loss": 17.5206, "step": 31995 }, { "epoch": 0.5848611695029886, "grad_norm": 5.5531559025922235, "learning_rate": 3.878543643990412e-06, "loss": 17.4587, "step": 31996 }, { "epoch": 0.5848794486994352, "grad_norm": 6.781255958135305, "learning_rate": 3.878255176154723e-06, "loss": 17.3314, "step": 31997 }, { "epoch": 0.5848977278958817, "grad_norm": 5.48852607995555, "learning_rate": 3.877966712250652e-06, "loss": 17.1308, "step": 31998 }, { "epoch": 0.5849160070923283, "grad_norm": 7.1269224694877105, "learning_rate": 3.877678252279208e-06, "loss": 17.5446, "step": 31999 }, { "epoch": 0.5849342862887748, "grad_norm": 6.016520700064778, "learning_rate": 3.877389796241405e-06, "loss": 17.3386, "step": 32000 }, { "epoch": 0.5849525654852212, "grad_norm": 6.509902973260176, "learning_rate": 3.877101344138256e-06, "loss": 17.3974, "step": 32001 }, { "epoch": 0.5849708446816678, "grad_norm": 8.200377273931506, "learning_rate": 3.876812895970766e-06, "loss": 18.2241, "step": 32002 }, { "epoch": 0.5849891238781143, "grad_norm": 5.791138439477456, "learning_rate": 3.876524451739951e-06, "loss": 17.3409, "step": 32003 }, { "epoch": 0.5850074030745609, "grad_norm": 6.187352198666184, "learning_rate": 3.87623601144682e-06, "loss": 17.4225, "step": 32004 }, { "epoch": 0.5850256822710074, "grad_norm": 5.869261146910077, "learning_rate": 3.875947575092382e-06, "loss": 17.1525, "step": 32005 }, { "epoch": 0.5850439614674539, "grad_norm": 6.978424455559203, "learning_rate": 3.875659142677654e-06, "loss": 17.6136, "step": 32006 }, { "epoch": 0.5850622406639004, "grad_norm": 7.575867874157032, "learning_rate": 3.87537071420364e-06, "loss": 17.6166, "step": 32007 }, { "epoch": 0.5850805198603469, "grad_norm": 6.872550633996198, "learning_rate": 3.875082289671357e-06, "loss": 17.6261, "step": 32008 }, { "epoch": 0.5850987990567935, "grad_norm": 7.499858165385113, "learning_rate": 3.874793869081811e-06, "loss": 17.6154, "step": 32009 }, { "epoch": 0.58511707825324, "grad_norm": 5.982202387569637, "learning_rate": 3.874505452436014e-06, "loss": 17.3925, "step": 32010 }, { "epoch": 0.5851353574496865, "grad_norm": 6.075543771678866, "learning_rate": 3.87421703973498e-06, "loss": 17.5303, "step": 32011 }, { "epoch": 0.5851536366461331, "grad_norm": 5.6998229247468455, "learning_rate": 3.873928630979717e-06, "loss": 17.1892, "step": 32012 }, { "epoch": 0.5851719158425795, "grad_norm": 6.156106592695931, "learning_rate": 3.873640226171236e-06, "loss": 17.1099, "step": 32013 }, { "epoch": 0.5851901950390261, "grad_norm": 6.851619974766164, "learning_rate": 3.873351825310548e-06, "loss": 17.7728, "step": 32014 }, { "epoch": 0.5852084742354726, "grad_norm": 5.472530587331331, "learning_rate": 3.8730634283986655e-06, "loss": 17.0658, "step": 32015 }, { "epoch": 0.5852267534319191, "grad_norm": 6.086341664921685, "learning_rate": 3.872775035436595e-06, "loss": 17.5338, "step": 32016 }, { "epoch": 0.5852450326283657, "grad_norm": 5.671120501606942, "learning_rate": 3.872486646425353e-06, "loss": 17.2177, "step": 32017 }, { "epoch": 0.5852633118248122, "grad_norm": 6.15228302998483, "learning_rate": 3.872198261365947e-06, "loss": 17.5094, "step": 32018 }, { "epoch": 0.5852815910212587, "grad_norm": 5.9975627223851, "learning_rate": 3.8719098802593865e-06, "loss": 17.3651, "step": 32019 }, { "epoch": 0.5852998702177052, "grad_norm": 6.867147430125402, "learning_rate": 3.8716215031066864e-06, "loss": 17.7369, "step": 32020 }, { "epoch": 0.5853181494141517, "grad_norm": 5.54549498336596, "learning_rate": 3.8713331299088535e-06, "loss": 17.2644, "step": 32021 }, { "epoch": 0.5853364286105983, "grad_norm": 5.020282181562154, "learning_rate": 3.871044760666899e-06, "loss": 17.2163, "step": 32022 }, { "epoch": 0.5853547078070448, "grad_norm": 6.842346610403446, "learning_rate": 3.870756395381837e-06, "loss": 17.7347, "step": 32023 }, { "epoch": 0.5853729870034913, "grad_norm": 5.924014325922557, "learning_rate": 3.870468034054674e-06, "loss": 17.2689, "step": 32024 }, { "epoch": 0.5853912661999379, "grad_norm": 5.96702776150434, "learning_rate": 3.870179676686424e-06, "loss": 17.1125, "step": 32025 }, { "epoch": 0.5854095453963843, "grad_norm": 6.418767405463292, "learning_rate": 3.869891323278095e-06, "loss": 17.5917, "step": 32026 }, { "epoch": 0.5854278245928309, "grad_norm": 5.341776960518983, "learning_rate": 3.869602973830699e-06, "loss": 17.0598, "step": 32027 }, { "epoch": 0.5854461037892774, "grad_norm": 5.299681930803937, "learning_rate": 3.869314628345248e-06, "loss": 17.1572, "step": 32028 }, { "epoch": 0.5854643829857239, "grad_norm": 6.176639097516337, "learning_rate": 3.86902628682275e-06, "loss": 17.3922, "step": 32029 }, { "epoch": 0.5854826621821705, "grad_norm": 8.294796497109598, "learning_rate": 3.868737949264216e-06, "loss": 18.5638, "step": 32030 }, { "epoch": 0.585500941378617, "grad_norm": 5.739260401077994, "learning_rate": 3.868449615670658e-06, "loss": 17.2608, "step": 32031 }, { "epoch": 0.5855192205750636, "grad_norm": 5.103928910126232, "learning_rate": 3.868161286043087e-06, "loss": 17.0032, "step": 32032 }, { "epoch": 0.58553749977151, "grad_norm": 6.380828854176015, "learning_rate": 3.86787296038251e-06, "loss": 17.7005, "step": 32033 }, { "epoch": 0.5855557789679565, "grad_norm": 5.810925976755929, "learning_rate": 3.867584638689943e-06, "loss": 16.9644, "step": 32034 }, { "epoch": 0.5855740581644031, "grad_norm": 7.877269928923206, "learning_rate": 3.867296320966391e-06, "loss": 17.7347, "step": 32035 }, { "epoch": 0.5855923373608496, "grad_norm": 5.8016123741592684, "learning_rate": 3.867008007212867e-06, "loss": 17.2333, "step": 32036 }, { "epoch": 0.5856106165572962, "grad_norm": 6.1183092381958515, "learning_rate": 3.866719697430384e-06, "loss": 17.5202, "step": 32037 }, { "epoch": 0.5856288957537427, "grad_norm": 6.238477157343728, "learning_rate": 3.8664313916199485e-06, "loss": 17.3808, "step": 32038 }, { "epoch": 0.5856471749501891, "grad_norm": 6.399335526793803, "learning_rate": 3.866143089782573e-06, "loss": 17.4302, "step": 32039 }, { "epoch": 0.5856654541466357, "grad_norm": 6.887536809133286, "learning_rate": 3.865854791919268e-06, "loss": 17.4513, "step": 32040 }, { "epoch": 0.5856837333430822, "grad_norm": 8.54887822139525, "learning_rate": 3.865566498031041e-06, "loss": 18.1424, "step": 32041 }, { "epoch": 0.5857020125395288, "grad_norm": 6.326939194567271, "learning_rate": 3.865278208118909e-06, "loss": 17.3627, "step": 32042 }, { "epoch": 0.5857202917359753, "grad_norm": 5.649302251170858, "learning_rate": 3.864989922183877e-06, "loss": 17.1671, "step": 32043 }, { "epoch": 0.5857385709324218, "grad_norm": 6.031566959053827, "learning_rate": 3.864701640226955e-06, "loss": 17.4837, "step": 32044 }, { "epoch": 0.5857568501288684, "grad_norm": 6.829109197724924, "learning_rate": 3.864413362249156e-06, "loss": 17.7327, "step": 32045 }, { "epoch": 0.5857751293253148, "grad_norm": 5.866082232048188, "learning_rate": 3.864125088251492e-06, "loss": 17.2631, "step": 32046 }, { "epoch": 0.5857934085217614, "grad_norm": 5.437600457984493, "learning_rate": 3.8638368182349675e-06, "loss": 17.0176, "step": 32047 }, { "epoch": 0.5858116877182079, "grad_norm": 5.947862864924397, "learning_rate": 3.863548552200599e-06, "loss": 17.3527, "step": 32048 }, { "epoch": 0.5858299669146544, "grad_norm": 8.471362757508421, "learning_rate": 3.863260290149393e-06, "loss": 18.2775, "step": 32049 }, { "epoch": 0.585848246111101, "grad_norm": 6.309767618521469, "learning_rate": 3.862972032082361e-06, "loss": 17.5451, "step": 32050 }, { "epoch": 0.5858665253075475, "grad_norm": 6.694731851607589, "learning_rate": 3.862683778000514e-06, "loss": 17.6672, "step": 32051 }, { "epoch": 0.585884804503994, "grad_norm": 8.317100150354584, "learning_rate": 3.8623955279048625e-06, "loss": 18.4154, "step": 32052 }, { "epoch": 0.5859030837004405, "grad_norm": 5.444423340141067, "learning_rate": 3.862107281796414e-06, "loss": 17.0536, "step": 32053 }, { "epoch": 0.585921362896887, "grad_norm": 5.373406160018824, "learning_rate": 3.861819039676181e-06, "loss": 17.2261, "step": 32054 }, { "epoch": 0.5859396420933336, "grad_norm": 5.764006211860998, "learning_rate": 3.861530801545173e-06, "loss": 17.2747, "step": 32055 }, { "epoch": 0.5859579212897801, "grad_norm": 6.058059563640926, "learning_rate": 3.861242567404403e-06, "loss": 17.3046, "step": 32056 }, { "epoch": 0.5859762004862267, "grad_norm": 6.553885097400887, "learning_rate": 3.860954337254878e-06, "loss": 17.6143, "step": 32057 }, { "epoch": 0.5859944796826732, "grad_norm": 4.720580910288211, "learning_rate": 3.8606661110976076e-06, "loss": 16.816, "step": 32058 }, { "epoch": 0.5860127588791196, "grad_norm": 5.504895550819705, "learning_rate": 3.860377888933605e-06, "loss": 17.3456, "step": 32059 }, { "epoch": 0.5860310380755662, "grad_norm": 5.856390135470978, "learning_rate": 3.86008967076388e-06, "loss": 17.2765, "step": 32060 }, { "epoch": 0.5860493172720127, "grad_norm": 4.752552583592792, "learning_rate": 3.859801456589439e-06, "loss": 16.8809, "step": 32061 }, { "epoch": 0.5860675964684593, "grad_norm": 6.769930415588571, "learning_rate": 3.859513246411298e-06, "loss": 17.6214, "step": 32062 }, { "epoch": 0.5860858756649058, "grad_norm": 8.444731951976172, "learning_rate": 3.859225040230463e-06, "loss": 17.516, "step": 32063 }, { "epoch": 0.5861041548613523, "grad_norm": 7.461292003437933, "learning_rate": 3.858936838047943e-06, "loss": 18.3182, "step": 32064 }, { "epoch": 0.5861224340577988, "grad_norm": 6.057656418054274, "learning_rate": 3.858648639864754e-06, "loss": 17.3448, "step": 32065 }, { "epoch": 0.5861407132542453, "grad_norm": 8.504623355230642, "learning_rate": 3.858360445681901e-06, "loss": 18.047, "step": 32066 }, { "epoch": 0.5861589924506919, "grad_norm": 7.125805066065005, "learning_rate": 3.858072255500395e-06, "loss": 17.6797, "step": 32067 }, { "epoch": 0.5861772716471384, "grad_norm": 7.224426725504567, "learning_rate": 3.857784069321248e-06, "loss": 17.425, "step": 32068 }, { "epoch": 0.5861955508435849, "grad_norm": 6.878796112420407, "learning_rate": 3.857495887145466e-06, "loss": 17.4534, "step": 32069 }, { "epoch": 0.5862138300400315, "grad_norm": 6.290589668706572, "learning_rate": 3.857207708974065e-06, "loss": 17.1223, "step": 32070 }, { "epoch": 0.586232109236478, "grad_norm": 6.155917749264555, "learning_rate": 3.85691953480805e-06, "loss": 17.4788, "step": 32071 }, { "epoch": 0.5862503884329245, "grad_norm": 7.492206721033603, "learning_rate": 3.856631364648433e-06, "loss": 17.2614, "step": 32072 }, { "epoch": 0.586268667629371, "grad_norm": 7.139253668271472, "learning_rate": 3.856343198496224e-06, "loss": 17.8717, "step": 32073 }, { "epoch": 0.5862869468258175, "grad_norm": 6.713411028010392, "learning_rate": 3.856055036352434e-06, "loss": 17.6853, "step": 32074 }, { "epoch": 0.5863052260222641, "grad_norm": 5.751234821833394, "learning_rate": 3.855766878218069e-06, "loss": 17.373, "step": 32075 }, { "epoch": 0.5863235052187106, "grad_norm": 7.531985912035775, "learning_rate": 3.8554787240941436e-06, "loss": 17.9104, "step": 32076 }, { "epoch": 0.5863417844151572, "grad_norm": 7.341975246951944, "learning_rate": 3.8551905739816675e-06, "loss": 17.9706, "step": 32077 }, { "epoch": 0.5863600636116036, "grad_norm": 6.552540033915268, "learning_rate": 3.854902427881646e-06, "loss": 17.9005, "step": 32078 }, { "epoch": 0.5863783428080501, "grad_norm": 6.308889719829927, "learning_rate": 3.854614285795095e-06, "loss": 17.258, "step": 32079 }, { "epoch": 0.5863966220044967, "grad_norm": 7.684377182028256, "learning_rate": 3.8543261477230195e-06, "loss": 18.043, "step": 32080 }, { "epoch": 0.5864149012009432, "grad_norm": 5.805855346576242, "learning_rate": 3.8540380136664316e-06, "loss": 17.3441, "step": 32081 }, { "epoch": 0.5864331803973898, "grad_norm": 6.8770864007003105, "learning_rate": 3.853749883626342e-06, "loss": 17.7791, "step": 32082 }, { "epoch": 0.5864514595938363, "grad_norm": 7.820883808336316, "learning_rate": 3.8534617576037605e-06, "loss": 17.7729, "step": 32083 }, { "epoch": 0.5864697387902827, "grad_norm": 4.699253962251377, "learning_rate": 3.853173635599693e-06, "loss": 16.751, "step": 32084 }, { "epoch": 0.5864880179867293, "grad_norm": 7.079860878083606, "learning_rate": 3.852885517615154e-06, "loss": 17.8225, "step": 32085 }, { "epoch": 0.5865062971831758, "grad_norm": 6.319382800687454, "learning_rate": 3.852597403651151e-06, "loss": 17.269, "step": 32086 }, { "epoch": 0.5865245763796223, "grad_norm": 5.530915146677737, "learning_rate": 3.852309293708696e-06, "loss": 17.1009, "step": 32087 }, { "epoch": 0.5865428555760689, "grad_norm": 6.308513585813827, "learning_rate": 3.852021187788798e-06, "loss": 17.3694, "step": 32088 }, { "epoch": 0.5865611347725154, "grad_norm": 6.170896053290772, "learning_rate": 3.851733085892463e-06, "loss": 17.4234, "step": 32089 }, { "epoch": 0.586579413968962, "grad_norm": 5.895713693747503, "learning_rate": 3.851444988020706e-06, "loss": 17.5128, "step": 32090 }, { "epoch": 0.5865976931654084, "grad_norm": 7.926604438267156, "learning_rate": 3.8511568941745356e-06, "loss": 17.6845, "step": 32091 }, { "epoch": 0.5866159723618549, "grad_norm": 5.894387280595747, "learning_rate": 3.850868804354958e-06, "loss": 17.0274, "step": 32092 }, { "epoch": 0.5866342515583015, "grad_norm": 5.674667579941655, "learning_rate": 3.850580718562987e-06, "loss": 17.299, "step": 32093 }, { "epoch": 0.586652530754748, "grad_norm": 6.561232056295939, "learning_rate": 3.8502926367996315e-06, "loss": 17.4352, "step": 32094 }, { "epoch": 0.5866708099511946, "grad_norm": 7.309077294795407, "learning_rate": 3.850004559065899e-06, "loss": 17.6867, "step": 32095 }, { "epoch": 0.5866890891476411, "grad_norm": 6.530541869539552, "learning_rate": 3.849716485362801e-06, "loss": 17.9096, "step": 32096 }, { "epoch": 0.5867073683440875, "grad_norm": 8.399831386295597, "learning_rate": 3.849428415691349e-06, "loss": 18.2124, "step": 32097 }, { "epoch": 0.5867256475405341, "grad_norm": 7.622121690011825, "learning_rate": 3.8491403500525476e-06, "loss": 18.0071, "step": 32098 }, { "epoch": 0.5867439267369806, "grad_norm": 5.046062970216395, "learning_rate": 3.848852288447411e-06, "loss": 17.1449, "step": 32099 }, { "epoch": 0.5867622059334272, "grad_norm": 6.660134345034835, "learning_rate": 3.848564230876946e-06, "loss": 17.4825, "step": 32100 }, { "epoch": 0.5867804851298737, "grad_norm": 5.99798920204829, "learning_rate": 3.848276177342165e-06, "loss": 17.4202, "step": 32101 }, { "epoch": 0.5867987643263202, "grad_norm": 5.7135395962102296, "learning_rate": 3.847988127844076e-06, "loss": 17.0179, "step": 32102 }, { "epoch": 0.5868170435227668, "grad_norm": 6.640978730071701, "learning_rate": 3.847700082383688e-06, "loss": 17.5184, "step": 32103 }, { "epoch": 0.5868353227192132, "grad_norm": 6.063179347563697, "learning_rate": 3.847412040962011e-06, "loss": 17.3366, "step": 32104 }, { "epoch": 0.5868536019156598, "grad_norm": 5.553883596234328, "learning_rate": 3.847124003580056e-06, "loss": 17.0963, "step": 32105 }, { "epoch": 0.5868718811121063, "grad_norm": 6.355719300567727, "learning_rate": 3.846835970238829e-06, "loss": 17.338, "step": 32106 }, { "epoch": 0.5868901603085528, "grad_norm": 5.944149968715513, "learning_rate": 3.8465479409393445e-06, "loss": 17.2539, "step": 32107 }, { "epoch": 0.5869084395049994, "grad_norm": 6.252374976629411, "learning_rate": 3.846259915682607e-06, "loss": 17.6403, "step": 32108 }, { "epoch": 0.5869267187014459, "grad_norm": 7.076023009939326, "learning_rate": 3.845971894469629e-06, "loss": 18.0155, "step": 32109 }, { "epoch": 0.5869449978978925, "grad_norm": 4.967767581055443, "learning_rate": 3.845683877301421e-06, "loss": 16.9333, "step": 32110 }, { "epoch": 0.5869632770943389, "grad_norm": 5.783620572947211, "learning_rate": 3.845395864178991e-06, "loss": 17.4594, "step": 32111 }, { "epoch": 0.5869815562907854, "grad_norm": 6.247021390907686, "learning_rate": 3.845107855103346e-06, "loss": 17.5796, "step": 32112 }, { "epoch": 0.586999835487232, "grad_norm": 5.979091759254599, "learning_rate": 3.844819850075498e-06, "loss": 17.216, "step": 32113 }, { "epoch": 0.5870181146836785, "grad_norm": 6.268688309416012, "learning_rate": 3.844531849096456e-06, "loss": 17.5476, "step": 32114 }, { "epoch": 0.5870363938801251, "grad_norm": 6.651681441214951, "learning_rate": 3.844243852167231e-06, "loss": 17.6522, "step": 32115 }, { "epoch": 0.5870546730765716, "grad_norm": 6.301498517519735, "learning_rate": 3.843955859288832e-06, "loss": 17.5488, "step": 32116 }, { "epoch": 0.587072952273018, "grad_norm": 5.098909919084947, "learning_rate": 3.843667870462265e-06, "loss": 17.1148, "step": 32117 }, { "epoch": 0.5870912314694646, "grad_norm": 5.260779712000656, "learning_rate": 3.843379885688543e-06, "loss": 17.2094, "step": 32118 }, { "epoch": 0.5871095106659111, "grad_norm": 6.361087132529976, "learning_rate": 3.8430919049686745e-06, "loss": 17.3107, "step": 32119 }, { "epoch": 0.5871277898623577, "grad_norm": 6.9629270503091085, "learning_rate": 3.842803928303667e-06, "loss": 17.7456, "step": 32120 }, { "epoch": 0.5871460690588042, "grad_norm": 8.04626548670363, "learning_rate": 3.842515955694533e-06, "loss": 18.3027, "step": 32121 }, { "epoch": 0.5871643482552507, "grad_norm": 5.682439592997291, "learning_rate": 3.842227987142279e-06, "loss": 17.0661, "step": 32122 }, { "epoch": 0.5871826274516972, "grad_norm": 6.438870429700994, "learning_rate": 3.841940022647915e-06, "loss": 17.4678, "step": 32123 }, { "epoch": 0.5872009066481437, "grad_norm": 6.392093532381406, "learning_rate": 3.841652062212452e-06, "loss": 17.346, "step": 32124 }, { "epoch": 0.5872191858445903, "grad_norm": 11.532001947888624, "learning_rate": 3.841364105836899e-06, "loss": 17.7881, "step": 32125 }, { "epoch": 0.5872374650410368, "grad_norm": 6.169374251039125, "learning_rate": 3.841076153522262e-06, "loss": 17.5796, "step": 32126 }, { "epoch": 0.5872557442374833, "grad_norm": 6.53607163756185, "learning_rate": 3.840788205269552e-06, "loss": 17.5753, "step": 32127 }, { "epoch": 0.5872740234339299, "grad_norm": 5.202450616452218, "learning_rate": 3.840500261079781e-06, "loss": 16.9029, "step": 32128 }, { "epoch": 0.5872923026303764, "grad_norm": 5.994468004886116, "learning_rate": 3.840212320953955e-06, "loss": 17.42, "step": 32129 }, { "epoch": 0.5873105818268229, "grad_norm": 5.505786279163361, "learning_rate": 3.839924384893084e-06, "loss": 17.2288, "step": 32130 }, { "epoch": 0.5873288610232694, "grad_norm": 5.947632134592926, "learning_rate": 3.839636452898177e-06, "loss": 17.5266, "step": 32131 }, { "epoch": 0.5873471402197159, "grad_norm": 6.804643937907856, "learning_rate": 3.839348524970244e-06, "loss": 17.888, "step": 32132 }, { "epoch": 0.5873654194161625, "grad_norm": 8.743656694240855, "learning_rate": 3.839060601110295e-06, "loss": 18.4198, "step": 32133 }, { "epoch": 0.587383698612609, "grad_norm": 7.0327622175133335, "learning_rate": 3.838772681319335e-06, "loss": 17.7128, "step": 32134 }, { "epoch": 0.5874019778090556, "grad_norm": 6.728456852966715, "learning_rate": 3.838484765598379e-06, "loss": 17.4529, "step": 32135 }, { "epoch": 0.587420257005502, "grad_norm": 6.404933054322642, "learning_rate": 3.8381968539484315e-06, "loss": 17.2746, "step": 32136 }, { "epoch": 0.5874385362019485, "grad_norm": 6.382347964253445, "learning_rate": 3.8379089463705026e-06, "loss": 17.7107, "step": 32137 }, { "epoch": 0.5874568153983951, "grad_norm": 5.81453373865814, "learning_rate": 3.8376210428656035e-06, "loss": 17.195, "step": 32138 }, { "epoch": 0.5874750945948416, "grad_norm": 7.363723690660982, "learning_rate": 3.837333143434741e-06, "loss": 17.9011, "step": 32139 }, { "epoch": 0.5874933737912882, "grad_norm": 6.283016197349546, "learning_rate": 3.8370452480789245e-06, "loss": 17.5536, "step": 32140 }, { "epoch": 0.5875116529877347, "grad_norm": 4.572559291495511, "learning_rate": 3.836757356799164e-06, "loss": 16.9071, "step": 32141 }, { "epoch": 0.5875299321841811, "grad_norm": 5.756676410897519, "learning_rate": 3.836469469596469e-06, "loss": 17.4088, "step": 32142 }, { "epoch": 0.5875482113806277, "grad_norm": 5.950765536258727, "learning_rate": 3.836181586471845e-06, "loss": 17.1882, "step": 32143 }, { "epoch": 0.5875664905770742, "grad_norm": 10.23933976692735, "learning_rate": 3.835893707426306e-06, "loss": 18.2642, "step": 32144 }, { "epoch": 0.5875847697735208, "grad_norm": 5.875726576232935, "learning_rate": 3.835605832460857e-06, "loss": 17.4877, "step": 32145 }, { "epoch": 0.5876030489699673, "grad_norm": 6.795377627822078, "learning_rate": 3.835317961576509e-06, "loss": 17.5022, "step": 32146 }, { "epoch": 0.5876213281664138, "grad_norm": 5.215021053957126, "learning_rate": 3.835030094774272e-06, "loss": 17.0462, "step": 32147 }, { "epoch": 0.5876396073628604, "grad_norm": 7.496929703713585, "learning_rate": 3.834742232055151e-06, "loss": 17.7966, "step": 32148 }, { "epoch": 0.5876578865593068, "grad_norm": 7.183131721332365, "learning_rate": 3.834454373420159e-06, "loss": 17.8109, "step": 32149 }, { "epoch": 0.5876761657557534, "grad_norm": 6.598857291539118, "learning_rate": 3.834166518870303e-06, "loss": 17.6218, "step": 32150 }, { "epoch": 0.5876944449521999, "grad_norm": 4.90164568749296, "learning_rate": 3.833878668406591e-06, "loss": 16.8339, "step": 32151 }, { "epoch": 0.5877127241486464, "grad_norm": 6.641279486526182, "learning_rate": 3.833590822030035e-06, "loss": 17.424, "step": 32152 }, { "epoch": 0.587731003345093, "grad_norm": 6.754066940824882, "learning_rate": 3.833302979741641e-06, "loss": 17.9008, "step": 32153 }, { "epoch": 0.5877492825415395, "grad_norm": 6.260254210326057, "learning_rate": 3.833015141542417e-06, "loss": 17.3453, "step": 32154 }, { "epoch": 0.5877675617379859, "grad_norm": 7.130110428058969, "learning_rate": 3.8327273074333774e-06, "loss": 17.8584, "step": 32155 }, { "epoch": 0.5877858409344325, "grad_norm": 6.227726165545509, "learning_rate": 3.832439477415526e-06, "loss": 17.4355, "step": 32156 }, { "epoch": 0.587804120130879, "grad_norm": 6.5523502381987715, "learning_rate": 3.83215165148987e-06, "loss": 17.6331, "step": 32157 }, { "epoch": 0.5878223993273256, "grad_norm": 7.31748700385883, "learning_rate": 3.831863829657424e-06, "loss": 17.8774, "step": 32158 }, { "epoch": 0.5878406785237721, "grad_norm": 5.845922504782156, "learning_rate": 3.8315760119191944e-06, "loss": 17.0526, "step": 32159 }, { "epoch": 0.5878589577202186, "grad_norm": 4.827866892422562, "learning_rate": 3.831288198276188e-06, "loss": 16.9667, "step": 32160 }, { "epoch": 0.5878772369166652, "grad_norm": 6.712849901913202, "learning_rate": 3.831000388729415e-06, "loss": 17.6317, "step": 32161 }, { "epoch": 0.5878955161131116, "grad_norm": 5.901622107682514, "learning_rate": 3.830712583279884e-06, "loss": 17.2729, "step": 32162 }, { "epoch": 0.5879137953095582, "grad_norm": 5.681298595044998, "learning_rate": 3.8304247819286045e-06, "loss": 17.0537, "step": 32163 }, { "epoch": 0.5879320745060047, "grad_norm": 6.4531088868832995, "learning_rate": 3.830136984676586e-06, "loss": 17.5453, "step": 32164 }, { "epoch": 0.5879503537024512, "grad_norm": 5.987835179791893, "learning_rate": 3.829849191524833e-06, "loss": 17.3163, "step": 32165 }, { "epoch": 0.5879686328988978, "grad_norm": 5.066652219656769, "learning_rate": 3.82956140247436e-06, "loss": 16.8955, "step": 32166 }, { "epoch": 0.5879869120953443, "grad_norm": 6.6660421107848356, "learning_rate": 3.829273617526171e-06, "loss": 17.5193, "step": 32167 }, { "epoch": 0.5880051912917909, "grad_norm": 6.824278530206514, "learning_rate": 3.828985836681275e-06, "loss": 17.6013, "step": 32168 }, { "epoch": 0.5880234704882373, "grad_norm": 6.025415015161479, "learning_rate": 3.8286980599406844e-06, "loss": 17.2691, "step": 32169 }, { "epoch": 0.5880417496846838, "grad_norm": 6.063898529366976, "learning_rate": 3.828410287305406e-06, "loss": 17.287, "step": 32170 }, { "epoch": 0.5880600288811304, "grad_norm": 5.5825233713677465, "learning_rate": 3.828122518776446e-06, "loss": 17.0776, "step": 32171 }, { "epoch": 0.5880783080775769, "grad_norm": 7.1366375148525165, "learning_rate": 3.827834754354816e-06, "loss": 17.648, "step": 32172 }, { "epoch": 0.5880965872740235, "grad_norm": 6.563555907445698, "learning_rate": 3.8275469940415235e-06, "loss": 17.1862, "step": 32173 }, { "epoch": 0.58811486647047, "grad_norm": 5.6172948206024715, "learning_rate": 3.827259237837575e-06, "loss": 17.23, "step": 32174 }, { "epoch": 0.5881331456669164, "grad_norm": 5.470520669777216, "learning_rate": 3.8269714857439835e-06, "loss": 17.2956, "step": 32175 }, { "epoch": 0.588151424863363, "grad_norm": 6.977926546565486, "learning_rate": 3.826683737761754e-06, "loss": 17.6153, "step": 32176 }, { "epoch": 0.5881697040598095, "grad_norm": 5.431805948682146, "learning_rate": 3.826395993891896e-06, "loss": 17.2156, "step": 32177 }, { "epoch": 0.5881879832562561, "grad_norm": 5.486942175849457, "learning_rate": 3.8261082541354196e-06, "loss": 17.2192, "step": 32178 }, { "epoch": 0.5882062624527026, "grad_norm": 6.5780925557590955, "learning_rate": 3.82582051849333e-06, "loss": 17.6171, "step": 32179 }, { "epoch": 0.5882245416491491, "grad_norm": 11.233451203898966, "learning_rate": 3.825532786966639e-06, "loss": 18.077, "step": 32180 }, { "epoch": 0.5882428208455956, "grad_norm": 6.696828063042589, "learning_rate": 3.825245059556354e-06, "loss": 17.2856, "step": 32181 }, { "epoch": 0.5882611000420421, "grad_norm": 6.428942862060959, "learning_rate": 3.824957336263481e-06, "loss": 17.654, "step": 32182 }, { "epoch": 0.5882793792384887, "grad_norm": 5.963451397307689, "learning_rate": 3.824669617089033e-06, "loss": 17.4832, "step": 32183 }, { "epoch": 0.5882976584349352, "grad_norm": 6.936130235723794, "learning_rate": 3.824381902034016e-06, "loss": 17.7885, "step": 32184 }, { "epoch": 0.5883159376313817, "grad_norm": 7.3661362655728295, "learning_rate": 3.824094191099436e-06, "loss": 17.5182, "step": 32185 }, { "epoch": 0.5883342168278283, "grad_norm": 6.114673823530316, "learning_rate": 3.823806484286305e-06, "loss": 17.6988, "step": 32186 }, { "epoch": 0.5883524960242748, "grad_norm": 5.619463028732551, "learning_rate": 3.823518781595631e-06, "loss": 17.1543, "step": 32187 }, { "epoch": 0.5883707752207213, "grad_norm": 6.924261736828492, "learning_rate": 3.82323108302842e-06, "loss": 17.4441, "step": 32188 }, { "epoch": 0.5883890544171678, "grad_norm": 6.505350821919351, "learning_rate": 3.822943388585684e-06, "loss": 17.562, "step": 32189 }, { "epoch": 0.5884073336136143, "grad_norm": 5.647722117512877, "learning_rate": 3.822655698268428e-06, "loss": 17.0544, "step": 32190 }, { "epoch": 0.5884256128100609, "grad_norm": 5.622304218155687, "learning_rate": 3.822368012077661e-06, "loss": 17.2011, "step": 32191 }, { "epoch": 0.5884438920065074, "grad_norm": 5.50718085908441, "learning_rate": 3.8220803300143934e-06, "loss": 17.1383, "step": 32192 }, { "epoch": 0.588462171202954, "grad_norm": 5.2531366508918405, "learning_rate": 3.82179265207963e-06, "loss": 17.2349, "step": 32193 }, { "epoch": 0.5884804503994004, "grad_norm": 7.272455692878633, "learning_rate": 3.8215049782743834e-06, "loss": 17.8625, "step": 32194 }, { "epoch": 0.5884987295958469, "grad_norm": 6.980597127180744, "learning_rate": 3.821217308599658e-06, "loss": 17.7315, "step": 32195 }, { "epoch": 0.5885170087922935, "grad_norm": 5.77874815584954, "learning_rate": 3.820929643056463e-06, "loss": 17.0708, "step": 32196 }, { "epoch": 0.58853528798874, "grad_norm": 4.683287889634802, "learning_rate": 3.820641981645811e-06, "loss": 16.8033, "step": 32197 }, { "epoch": 0.5885535671851866, "grad_norm": 4.979474932107534, "learning_rate": 3.820354324368704e-06, "loss": 17.0406, "step": 32198 }, { "epoch": 0.5885718463816331, "grad_norm": 7.066128487520513, "learning_rate": 3.820066671226152e-06, "loss": 17.6518, "step": 32199 }, { "epoch": 0.5885901255780795, "grad_norm": 5.349855090135341, "learning_rate": 3.819779022219165e-06, "loss": 17.1489, "step": 32200 }, { "epoch": 0.5886084047745261, "grad_norm": 6.397691185149646, "learning_rate": 3.8194913773487505e-06, "loss": 17.1772, "step": 32201 }, { "epoch": 0.5886266839709726, "grad_norm": 6.419781488671215, "learning_rate": 3.819203736615915e-06, "loss": 17.2045, "step": 32202 }, { "epoch": 0.5886449631674192, "grad_norm": 5.757090556474921, "learning_rate": 3.818916100021668e-06, "loss": 17.1272, "step": 32203 }, { "epoch": 0.5886632423638657, "grad_norm": 5.432502888435403, "learning_rate": 3.818628467567019e-06, "loss": 17.2028, "step": 32204 }, { "epoch": 0.5886815215603122, "grad_norm": 6.530514507444193, "learning_rate": 3.818340839252973e-06, "loss": 17.7494, "step": 32205 }, { "epoch": 0.5886998007567588, "grad_norm": 6.3446962130455615, "learning_rate": 3.818053215080542e-06, "loss": 17.5058, "step": 32206 }, { "epoch": 0.5887180799532052, "grad_norm": 6.413350081349085, "learning_rate": 3.8177655950507296e-06, "loss": 17.6861, "step": 32207 }, { "epoch": 0.5887363591496518, "grad_norm": 6.492070968135517, "learning_rate": 3.8174779791645465e-06, "loss": 17.1974, "step": 32208 }, { "epoch": 0.5887546383460983, "grad_norm": 6.2138021247715445, "learning_rate": 3.817190367423002e-06, "loss": 17.3349, "step": 32209 }, { "epoch": 0.5887729175425448, "grad_norm": 5.431048217800632, "learning_rate": 3.816902759827101e-06, "loss": 17.3091, "step": 32210 }, { "epoch": 0.5887911967389914, "grad_norm": 5.724285179227241, "learning_rate": 3.816615156377855e-06, "loss": 17.149, "step": 32211 }, { "epoch": 0.5888094759354379, "grad_norm": 5.2436797812652705, "learning_rate": 3.8163275570762695e-06, "loss": 17.1401, "step": 32212 }, { "epoch": 0.5888277551318845, "grad_norm": 5.943342200391463, "learning_rate": 3.8160399619233514e-06, "loss": 17.2134, "step": 32213 }, { "epoch": 0.5888460343283309, "grad_norm": 6.577769301273673, "learning_rate": 3.815752370920113e-06, "loss": 17.5533, "step": 32214 }, { "epoch": 0.5888643135247774, "grad_norm": 6.739199748611585, "learning_rate": 3.81546478406756e-06, "loss": 17.372, "step": 32215 }, { "epoch": 0.588882592721224, "grad_norm": 5.213226569270535, "learning_rate": 3.815177201366697e-06, "loss": 16.9411, "step": 32216 }, { "epoch": 0.5889008719176705, "grad_norm": 6.515728533786363, "learning_rate": 3.8148896228185374e-06, "loss": 17.7599, "step": 32217 }, { "epoch": 0.5889191511141171, "grad_norm": 7.644067412784324, "learning_rate": 3.814602048424087e-06, "loss": 17.7805, "step": 32218 }, { "epoch": 0.5889374303105636, "grad_norm": 6.120731149125879, "learning_rate": 3.8143144781843515e-06, "loss": 17.2749, "step": 32219 }, { "epoch": 0.58895570950701, "grad_norm": 6.058242756653818, "learning_rate": 3.814026912100344e-06, "loss": 17.4435, "step": 32220 }, { "epoch": 0.5889739887034566, "grad_norm": 6.873146681021138, "learning_rate": 3.813739350173067e-06, "loss": 17.3298, "step": 32221 }, { "epoch": 0.5889922678999031, "grad_norm": 6.102607472690803, "learning_rate": 3.8134517924035295e-06, "loss": 17.4797, "step": 32222 }, { "epoch": 0.5890105470963496, "grad_norm": 6.763605112295928, "learning_rate": 3.8131642387927435e-06, "loss": 17.435, "step": 32223 }, { "epoch": 0.5890288262927962, "grad_norm": 8.091011437363866, "learning_rate": 3.8128766893417113e-06, "loss": 17.7211, "step": 32224 }, { "epoch": 0.5890471054892427, "grad_norm": 6.776591998172154, "learning_rate": 3.8125891440514464e-06, "loss": 17.7034, "step": 32225 }, { "epoch": 0.5890653846856893, "grad_norm": 5.791102902714849, "learning_rate": 3.812301602922951e-06, "loss": 17.3805, "step": 32226 }, { "epoch": 0.5890836638821357, "grad_norm": 5.290817706925737, "learning_rate": 3.8120140659572347e-06, "loss": 16.9161, "step": 32227 }, { "epoch": 0.5891019430785822, "grad_norm": 6.8434670231174, "learning_rate": 3.811726533155309e-06, "loss": 17.6922, "step": 32228 }, { "epoch": 0.5891202222750288, "grad_norm": 5.531229533167525, "learning_rate": 3.8114390045181776e-06, "loss": 17.1956, "step": 32229 }, { "epoch": 0.5891385014714753, "grad_norm": 6.054458888693313, "learning_rate": 3.811151480046848e-06, "loss": 17.2152, "step": 32230 }, { "epoch": 0.5891567806679219, "grad_norm": 5.3422564628087805, "learning_rate": 3.8108639597423295e-06, "loss": 17.0061, "step": 32231 }, { "epoch": 0.5891750598643684, "grad_norm": 5.095905311187162, "learning_rate": 3.8105764436056313e-06, "loss": 16.9341, "step": 32232 }, { "epoch": 0.5891933390608148, "grad_norm": 6.434815500497325, "learning_rate": 3.8102889316377577e-06, "loss": 17.5177, "step": 32233 }, { "epoch": 0.5892116182572614, "grad_norm": 6.287080531778832, "learning_rate": 3.81000142383972e-06, "loss": 17.4013, "step": 32234 }, { "epoch": 0.5892298974537079, "grad_norm": 7.10902997312636, "learning_rate": 3.809713920212523e-06, "loss": 17.5206, "step": 32235 }, { "epoch": 0.5892481766501545, "grad_norm": 5.96431074574206, "learning_rate": 3.809426420757174e-06, "loss": 16.9485, "step": 32236 }, { "epoch": 0.589266455846601, "grad_norm": 6.085920559650417, "learning_rate": 3.809138925474684e-06, "loss": 17.6754, "step": 32237 }, { "epoch": 0.5892847350430475, "grad_norm": 6.728155062965607, "learning_rate": 3.808851434366057e-06, "loss": 17.607, "step": 32238 }, { "epoch": 0.589303014239494, "grad_norm": 6.285066663259373, "learning_rate": 3.8085639474323045e-06, "loss": 17.6034, "step": 32239 }, { "epoch": 0.5893212934359405, "grad_norm": 5.229400100253714, "learning_rate": 3.8082764646744307e-06, "loss": 17.1101, "step": 32240 }, { "epoch": 0.5893395726323871, "grad_norm": 6.1717459492985745, "learning_rate": 3.8079889860934437e-06, "loss": 17.3207, "step": 32241 }, { "epoch": 0.5893578518288336, "grad_norm": 6.7369281811637345, "learning_rate": 3.8077015116903544e-06, "loss": 17.4696, "step": 32242 }, { "epoch": 0.5893761310252801, "grad_norm": 6.838677637711815, "learning_rate": 3.807414041466167e-06, "loss": 18.0728, "step": 32243 }, { "epoch": 0.5893944102217267, "grad_norm": 7.176940351488116, "learning_rate": 3.8071265754218884e-06, "loss": 17.638, "step": 32244 }, { "epoch": 0.5894126894181732, "grad_norm": 5.883767793036516, "learning_rate": 3.806839113558528e-06, "loss": 17.1314, "step": 32245 }, { "epoch": 0.5894309686146197, "grad_norm": 5.8311653212262975, "learning_rate": 3.8065516558770948e-06, "loss": 17.0188, "step": 32246 }, { "epoch": 0.5894492478110662, "grad_norm": 7.511797335340639, "learning_rate": 3.806264202378592e-06, "loss": 18.2829, "step": 32247 }, { "epoch": 0.5894675270075127, "grad_norm": 5.2532721676369185, "learning_rate": 3.805976753064032e-06, "loss": 17.0414, "step": 32248 }, { "epoch": 0.5894858062039593, "grad_norm": 8.041224578885194, "learning_rate": 3.805689307934418e-06, "loss": 17.9149, "step": 32249 }, { "epoch": 0.5895040854004058, "grad_norm": 7.3884586938134635, "learning_rate": 3.805401866990759e-06, "loss": 17.9932, "step": 32250 }, { "epoch": 0.5895223645968524, "grad_norm": 8.800879056895564, "learning_rate": 3.8051144302340647e-06, "loss": 18.5502, "step": 32251 }, { "epoch": 0.5895406437932988, "grad_norm": 8.525326132290807, "learning_rate": 3.8048269976653405e-06, "loss": 18.266, "step": 32252 }, { "epoch": 0.5895589229897453, "grad_norm": 5.626132896664549, "learning_rate": 3.804539569285592e-06, "loss": 17.061, "step": 32253 }, { "epoch": 0.5895772021861919, "grad_norm": 7.0169121519798034, "learning_rate": 3.8042521450958296e-06, "loss": 17.6839, "step": 32254 }, { "epoch": 0.5895954813826384, "grad_norm": 6.723467748425072, "learning_rate": 3.803964725097059e-06, "loss": 17.5765, "step": 32255 }, { "epoch": 0.589613760579085, "grad_norm": 5.500662736403794, "learning_rate": 3.8036773092902895e-06, "loss": 17.2242, "step": 32256 }, { "epoch": 0.5896320397755315, "grad_norm": 6.191505872956058, "learning_rate": 3.803389897676528e-06, "loss": 17.5434, "step": 32257 }, { "epoch": 0.589650318971978, "grad_norm": 6.821326433278477, "learning_rate": 3.803102490256779e-06, "loss": 17.8773, "step": 32258 }, { "epoch": 0.5896685981684245, "grad_norm": 5.7415749110545535, "learning_rate": 3.802815087032052e-06, "loss": 16.9959, "step": 32259 }, { "epoch": 0.589686877364871, "grad_norm": 6.214143543105148, "learning_rate": 3.8025276880033564e-06, "loss": 17.2542, "step": 32260 }, { "epoch": 0.5897051565613176, "grad_norm": 5.96302782769722, "learning_rate": 3.8022402931716946e-06, "loss": 17.0411, "step": 32261 }, { "epoch": 0.5897234357577641, "grad_norm": 5.650881597461157, "learning_rate": 3.8019529025380788e-06, "loss": 17.1614, "step": 32262 }, { "epoch": 0.5897417149542106, "grad_norm": 7.266049240633758, "learning_rate": 3.801665516103513e-06, "loss": 17.4751, "step": 32263 }, { "epoch": 0.5897599941506572, "grad_norm": 6.382306524278404, "learning_rate": 3.801378133869005e-06, "loss": 17.4987, "step": 32264 }, { "epoch": 0.5897782733471036, "grad_norm": 6.120676395602967, "learning_rate": 3.801090755835564e-06, "loss": 17.1164, "step": 32265 }, { "epoch": 0.5897965525435502, "grad_norm": 6.790470083796269, "learning_rate": 3.8008033820041957e-06, "loss": 17.5907, "step": 32266 }, { "epoch": 0.5898148317399967, "grad_norm": 5.628556969682443, "learning_rate": 3.800516012375907e-06, "loss": 17.2407, "step": 32267 }, { "epoch": 0.5898331109364432, "grad_norm": 6.393849840416629, "learning_rate": 3.800228646951706e-06, "loss": 17.6485, "step": 32268 }, { "epoch": 0.5898513901328898, "grad_norm": 6.229340640043556, "learning_rate": 3.799941285732598e-06, "loss": 17.3259, "step": 32269 }, { "epoch": 0.5898696693293363, "grad_norm": 5.924709750982091, "learning_rate": 3.7996539287195934e-06, "loss": 17.0382, "step": 32270 }, { "epoch": 0.5898879485257829, "grad_norm": 6.795339606572615, "learning_rate": 3.7993665759136974e-06, "loss": 17.7726, "step": 32271 }, { "epoch": 0.5899062277222293, "grad_norm": 5.261432765568835, "learning_rate": 3.7990792273159165e-06, "loss": 17.0021, "step": 32272 }, { "epoch": 0.5899245069186758, "grad_norm": 7.287934699645097, "learning_rate": 3.798791882927259e-06, "loss": 18.2171, "step": 32273 }, { "epoch": 0.5899427861151224, "grad_norm": 5.486707177591574, "learning_rate": 3.7985045427487334e-06, "loss": 17.1614, "step": 32274 }, { "epoch": 0.5899610653115689, "grad_norm": 6.3776404745044495, "learning_rate": 3.798217206781342e-06, "loss": 17.4955, "step": 32275 }, { "epoch": 0.5899793445080155, "grad_norm": 6.71201691785145, "learning_rate": 3.797929875026097e-06, "loss": 17.3552, "step": 32276 }, { "epoch": 0.589997623704462, "grad_norm": 5.42054282717951, "learning_rate": 3.797642547484004e-06, "loss": 17.0588, "step": 32277 }, { "epoch": 0.5900159029009084, "grad_norm": 5.077017010400883, "learning_rate": 3.7973552241560676e-06, "loss": 17.167, "step": 32278 }, { "epoch": 0.590034182097355, "grad_norm": 5.652311373302435, "learning_rate": 3.7970679050432986e-06, "loss": 17.22, "step": 32279 }, { "epoch": 0.5900524612938015, "grad_norm": 7.048154910684579, "learning_rate": 3.7967805901467014e-06, "loss": 17.7734, "step": 32280 }, { "epoch": 0.5900707404902481, "grad_norm": 5.623984454385494, "learning_rate": 3.7964932794672825e-06, "loss": 17.1976, "step": 32281 }, { "epoch": 0.5900890196866946, "grad_norm": 6.578425248696037, "learning_rate": 3.796205973006053e-06, "loss": 17.5693, "step": 32282 }, { "epoch": 0.5901072988831411, "grad_norm": 5.988812887084346, "learning_rate": 3.7959186707640145e-06, "loss": 17.646, "step": 32283 }, { "epoch": 0.5901255780795877, "grad_norm": 5.101988563682886, "learning_rate": 3.7956313727421783e-06, "loss": 17.1905, "step": 32284 }, { "epoch": 0.5901438572760341, "grad_norm": 6.888270600207537, "learning_rate": 3.795344078941549e-06, "loss": 17.9214, "step": 32285 }, { "epoch": 0.5901621364724807, "grad_norm": 7.874534311575397, "learning_rate": 3.7950567893631333e-06, "loss": 17.7441, "step": 32286 }, { "epoch": 0.5901804156689272, "grad_norm": 4.995672541421042, "learning_rate": 3.7947695040079413e-06, "loss": 16.9209, "step": 32287 }, { "epoch": 0.5901986948653737, "grad_norm": 6.97325577079035, "learning_rate": 3.794482222876977e-06, "loss": 17.9442, "step": 32288 }, { "epoch": 0.5902169740618203, "grad_norm": 5.343684461720772, "learning_rate": 3.794194945971246e-06, "loss": 17.0113, "step": 32289 }, { "epoch": 0.5902352532582668, "grad_norm": 7.365329639667184, "learning_rate": 3.793907673291758e-06, "loss": 17.775, "step": 32290 }, { "epoch": 0.5902535324547132, "grad_norm": 5.973018883689246, "learning_rate": 3.7936204048395205e-06, "loss": 17.5331, "step": 32291 }, { "epoch": 0.5902718116511598, "grad_norm": 5.486654519599238, "learning_rate": 3.793333140615536e-06, "loss": 17.1188, "step": 32292 }, { "epoch": 0.5902900908476063, "grad_norm": 6.7177174247448255, "learning_rate": 3.793045880620817e-06, "loss": 17.6389, "step": 32293 }, { "epoch": 0.5903083700440529, "grad_norm": 6.108857070791032, "learning_rate": 3.7927586248563657e-06, "loss": 17.2787, "step": 32294 }, { "epoch": 0.5903266492404994, "grad_norm": 6.22820871114113, "learning_rate": 3.79247137332319e-06, "loss": 17.186, "step": 32295 }, { "epoch": 0.5903449284369459, "grad_norm": 5.721433048347884, "learning_rate": 3.792184126022299e-06, "loss": 17.2408, "step": 32296 }, { "epoch": 0.5903632076333925, "grad_norm": 6.335684909379379, "learning_rate": 3.791896882954699e-06, "loss": 17.2978, "step": 32297 }, { "epoch": 0.5903814868298389, "grad_norm": 6.217763086930059, "learning_rate": 3.791609644121392e-06, "loss": 17.4739, "step": 32298 }, { "epoch": 0.5903997660262855, "grad_norm": 7.556278851340127, "learning_rate": 3.7913224095233904e-06, "loss": 17.4489, "step": 32299 }, { "epoch": 0.590418045222732, "grad_norm": 8.646008652174746, "learning_rate": 3.7910351791616973e-06, "loss": 18.9576, "step": 32300 }, { "epoch": 0.5904363244191785, "grad_norm": 7.816736312545511, "learning_rate": 3.790747953037323e-06, "loss": 17.7604, "step": 32301 }, { "epoch": 0.5904546036156251, "grad_norm": 5.488739232190908, "learning_rate": 3.7904607311512724e-06, "loss": 17.2852, "step": 32302 }, { "epoch": 0.5904728828120716, "grad_norm": 5.582662832024497, "learning_rate": 3.7901735135045505e-06, "loss": 17.1034, "step": 32303 }, { "epoch": 0.5904911620085181, "grad_norm": 6.315238471695915, "learning_rate": 3.789886300098166e-06, "loss": 17.4892, "step": 32304 }, { "epoch": 0.5905094412049646, "grad_norm": 7.904775591346758, "learning_rate": 3.7895990909331258e-06, "loss": 18.0479, "step": 32305 }, { "epoch": 0.5905277204014111, "grad_norm": 5.671391677246383, "learning_rate": 3.7893118860104338e-06, "loss": 17.2301, "step": 32306 }, { "epoch": 0.5905459995978577, "grad_norm": 5.51645158722178, "learning_rate": 3.7890246853311007e-06, "loss": 17.2028, "step": 32307 }, { "epoch": 0.5905642787943042, "grad_norm": 6.1027320693511244, "learning_rate": 3.78873748889613e-06, "loss": 17.4346, "step": 32308 }, { "epoch": 0.5905825579907508, "grad_norm": 4.8665382094883745, "learning_rate": 3.7884502967065283e-06, "loss": 17.0443, "step": 32309 }, { "epoch": 0.5906008371871972, "grad_norm": 6.059038192049933, "learning_rate": 3.7881631087633054e-06, "loss": 17.3228, "step": 32310 }, { "epoch": 0.5906191163836437, "grad_norm": 5.149769924071177, "learning_rate": 3.7878759250674653e-06, "loss": 17.0607, "step": 32311 }, { "epoch": 0.5906373955800903, "grad_norm": 6.03087659135782, "learning_rate": 3.787588745620013e-06, "loss": 17.4887, "step": 32312 }, { "epoch": 0.5906556747765368, "grad_norm": 6.632832029891663, "learning_rate": 3.787301570421957e-06, "loss": 17.5447, "step": 32313 }, { "epoch": 0.5906739539729834, "grad_norm": 7.795505600095599, "learning_rate": 3.7870143994743043e-06, "loss": 18.1404, "step": 32314 }, { "epoch": 0.5906922331694299, "grad_norm": 7.4138480712803965, "learning_rate": 3.7867272327780622e-06, "loss": 17.9287, "step": 32315 }, { "epoch": 0.5907105123658764, "grad_norm": 8.167937024746337, "learning_rate": 3.7864400703342364e-06, "loss": 18.015, "step": 32316 }, { "epoch": 0.5907287915623229, "grad_norm": 6.5668168859969045, "learning_rate": 3.78615291214383e-06, "loss": 17.6308, "step": 32317 }, { "epoch": 0.5907470707587694, "grad_norm": 7.160225887184637, "learning_rate": 3.7858657582078535e-06, "loss": 17.5945, "step": 32318 }, { "epoch": 0.590765349955216, "grad_norm": 5.989562918998709, "learning_rate": 3.7855786085273137e-06, "loss": 17.3309, "step": 32319 }, { "epoch": 0.5907836291516625, "grad_norm": 9.756992884760024, "learning_rate": 3.7852914631032127e-06, "loss": 18.1069, "step": 32320 }, { "epoch": 0.590801908348109, "grad_norm": 5.351725239314266, "learning_rate": 3.7850043219365623e-06, "loss": 17.2203, "step": 32321 }, { "epoch": 0.5908201875445556, "grad_norm": 7.010261192109583, "learning_rate": 3.7847171850283654e-06, "loss": 18.1692, "step": 32322 }, { "epoch": 0.590838466741002, "grad_norm": 6.8988393803042225, "learning_rate": 3.7844300523796278e-06, "loss": 18.1206, "step": 32323 }, { "epoch": 0.5908567459374486, "grad_norm": 5.112729624372619, "learning_rate": 3.7841429239913596e-06, "loss": 16.9588, "step": 32324 }, { "epoch": 0.5908750251338951, "grad_norm": 6.321779003173765, "learning_rate": 3.783855799864564e-06, "loss": 17.4116, "step": 32325 }, { "epoch": 0.5908933043303416, "grad_norm": 6.495171269691186, "learning_rate": 3.7835686800002486e-06, "loss": 17.2456, "step": 32326 }, { "epoch": 0.5909115835267882, "grad_norm": 6.62913607536537, "learning_rate": 3.783281564399419e-06, "loss": 17.5391, "step": 32327 }, { "epoch": 0.5909298627232347, "grad_norm": 6.6984877714901545, "learning_rate": 3.782994453063083e-06, "loss": 17.4633, "step": 32328 }, { "epoch": 0.5909481419196813, "grad_norm": 6.41613596201492, "learning_rate": 3.7827073459922437e-06, "loss": 17.3086, "step": 32329 }, { "epoch": 0.5909664211161277, "grad_norm": 7.180667608512838, "learning_rate": 3.7824202431879107e-06, "loss": 17.9749, "step": 32330 }, { "epoch": 0.5909847003125742, "grad_norm": 6.335137898691476, "learning_rate": 3.7821331446510894e-06, "loss": 17.6017, "step": 32331 }, { "epoch": 0.5910029795090208, "grad_norm": 6.867991275638247, "learning_rate": 3.781846050382785e-06, "loss": 17.5711, "step": 32332 }, { "epoch": 0.5910212587054673, "grad_norm": 5.357425644961767, "learning_rate": 3.7815589603840063e-06, "loss": 17.0656, "step": 32333 }, { "epoch": 0.5910395379019139, "grad_norm": 5.695218833037036, "learning_rate": 3.7812718746557555e-06, "loss": 17.3691, "step": 32334 }, { "epoch": 0.5910578170983604, "grad_norm": 7.636423545940678, "learning_rate": 3.780984793199042e-06, "loss": 17.7793, "step": 32335 }, { "epoch": 0.5910760962948068, "grad_norm": 10.729003877583215, "learning_rate": 3.7806977160148716e-06, "loss": 17.0574, "step": 32336 }, { "epoch": 0.5910943754912534, "grad_norm": 6.408593911346188, "learning_rate": 3.780410643104248e-06, "loss": 17.5642, "step": 32337 }, { "epoch": 0.5911126546876999, "grad_norm": 5.799698743943653, "learning_rate": 3.780123574468182e-06, "loss": 17.2292, "step": 32338 }, { "epoch": 0.5911309338841465, "grad_norm": 6.8337570491705675, "learning_rate": 3.779836510107675e-06, "loss": 17.4251, "step": 32339 }, { "epoch": 0.591149213080593, "grad_norm": 5.790574706297314, "learning_rate": 3.7795494500237347e-06, "loss": 17.1139, "step": 32340 }, { "epoch": 0.5911674922770395, "grad_norm": 6.387250563128051, "learning_rate": 3.7792623942173695e-06, "loss": 17.3207, "step": 32341 }, { "epoch": 0.591185771473486, "grad_norm": 6.271798366490967, "learning_rate": 3.7789753426895838e-06, "loss": 17.2982, "step": 32342 }, { "epoch": 0.5912040506699325, "grad_norm": 7.261202377159157, "learning_rate": 3.7786882954413816e-06, "loss": 17.8848, "step": 32343 }, { "epoch": 0.5912223298663791, "grad_norm": 10.141903713074006, "learning_rate": 3.7784012524737718e-06, "loss": 18.0965, "step": 32344 }, { "epoch": 0.5912406090628256, "grad_norm": 6.52102032218912, "learning_rate": 3.7781142137877585e-06, "loss": 17.4206, "step": 32345 }, { "epoch": 0.5912588882592721, "grad_norm": 6.550622345709233, "learning_rate": 3.7778271793843514e-06, "loss": 17.3604, "step": 32346 }, { "epoch": 0.5912771674557187, "grad_norm": 6.105964624297633, "learning_rate": 3.7775401492645535e-06, "loss": 17.3872, "step": 32347 }, { "epoch": 0.5912954466521652, "grad_norm": 8.054080525562728, "learning_rate": 3.7772531234293696e-06, "loss": 18.1811, "step": 32348 }, { "epoch": 0.5913137258486117, "grad_norm": 9.11422642793395, "learning_rate": 3.7769661018798082e-06, "loss": 18.1753, "step": 32349 }, { "epoch": 0.5913320050450582, "grad_norm": 6.069558655846016, "learning_rate": 3.7766790846168756e-06, "loss": 17.2149, "step": 32350 }, { "epoch": 0.5913502842415047, "grad_norm": 5.513164934335705, "learning_rate": 3.776392071641574e-06, "loss": 17.1437, "step": 32351 }, { "epoch": 0.5913685634379513, "grad_norm": 5.452045019334056, "learning_rate": 3.776105062954915e-06, "loss": 17.323, "step": 32352 }, { "epoch": 0.5913868426343978, "grad_norm": 7.9864939983926675, "learning_rate": 3.7758180585579004e-06, "loss": 18.3698, "step": 32353 }, { "epoch": 0.5914051218308444, "grad_norm": 7.390816160270946, "learning_rate": 3.7755310584515354e-06, "loss": 17.8451, "step": 32354 }, { "epoch": 0.5914234010272909, "grad_norm": 6.302231174625811, "learning_rate": 3.77524406263683e-06, "loss": 17.5554, "step": 32355 }, { "epoch": 0.5914416802237373, "grad_norm": 5.39656162882966, "learning_rate": 3.7749570711147887e-06, "loss": 17.0965, "step": 32356 }, { "epoch": 0.5914599594201839, "grad_norm": 5.311649027479756, "learning_rate": 3.774670083886414e-06, "loss": 17.0377, "step": 32357 }, { "epoch": 0.5914782386166304, "grad_norm": 5.615513770425768, "learning_rate": 3.7743831009527155e-06, "loss": 17.1751, "step": 32358 }, { "epoch": 0.5914965178130769, "grad_norm": 6.0580510273348755, "learning_rate": 3.7740961223146987e-06, "loss": 17.4298, "step": 32359 }, { "epoch": 0.5915147970095235, "grad_norm": 5.007208070065542, "learning_rate": 3.7738091479733667e-06, "loss": 17.0784, "step": 32360 }, { "epoch": 0.59153307620597, "grad_norm": 5.836114534342866, "learning_rate": 3.7735221779297294e-06, "loss": 17.2347, "step": 32361 }, { "epoch": 0.5915513554024165, "grad_norm": 5.62023568501812, "learning_rate": 3.7732352121847883e-06, "loss": 16.9707, "step": 32362 }, { "epoch": 0.591569634598863, "grad_norm": 6.039986292053051, "learning_rate": 3.772948250739552e-06, "loss": 17.6188, "step": 32363 }, { "epoch": 0.5915879137953095, "grad_norm": 5.429830850525959, "learning_rate": 3.7726612935950275e-06, "loss": 17.0521, "step": 32364 }, { "epoch": 0.5916061929917561, "grad_norm": 6.392799623184473, "learning_rate": 3.7723743407522163e-06, "loss": 17.389, "step": 32365 }, { "epoch": 0.5916244721882026, "grad_norm": 6.422887957708528, "learning_rate": 3.7720873922121282e-06, "loss": 17.6263, "step": 32366 }, { "epoch": 0.5916427513846492, "grad_norm": 6.605285805965904, "learning_rate": 3.7718004479757664e-06, "loss": 17.3858, "step": 32367 }, { "epoch": 0.5916610305810956, "grad_norm": 7.155126803883741, "learning_rate": 3.7715135080441356e-06, "loss": 17.659, "step": 32368 }, { "epoch": 0.5916793097775421, "grad_norm": 5.899271970107788, "learning_rate": 3.771226572418247e-06, "loss": 17.1571, "step": 32369 }, { "epoch": 0.5916975889739887, "grad_norm": 4.872316431243929, "learning_rate": 3.770939641099102e-06, "loss": 16.9993, "step": 32370 }, { "epoch": 0.5917158681704352, "grad_norm": 7.55914493568086, "learning_rate": 3.7706527140877043e-06, "loss": 18.1417, "step": 32371 }, { "epoch": 0.5917341473668818, "grad_norm": 6.402012853096437, "learning_rate": 3.7703657913850633e-06, "loss": 17.4951, "step": 32372 }, { "epoch": 0.5917524265633283, "grad_norm": 5.8296814339213965, "learning_rate": 3.7700788729921845e-06, "loss": 17.2612, "step": 32373 }, { "epoch": 0.5917707057597748, "grad_norm": 6.546103556733175, "learning_rate": 3.7697919589100706e-06, "loss": 17.7205, "step": 32374 }, { "epoch": 0.5917889849562213, "grad_norm": 5.24944997362425, "learning_rate": 3.7695050491397317e-06, "loss": 17.126, "step": 32375 }, { "epoch": 0.5918072641526678, "grad_norm": 6.1458948862196126, "learning_rate": 3.7692181436821683e-06, "loss": 17.5188, "step": 32376 }, { "epoch": 0.5918255433491144, "grad_norm": 6.053079033148928, "learning_rate": 3.7689312425383895e-06, "loss": 17.6565, "step": 32377 }, { "epoch": 0.5918438225455609, "grad_norm": 6.731006511010226, "learning_rate": 3.768644345709401e-06, "loss": 17.5139, "step": 32378 }, { "epoch": 0.5918621017420074, "grad_norm": 5.438342352424414, "learning_rate": 3.7683574531962054e-06, "loss": 16.9655, "step": 32379 }, { "epoch": 0.591880380938454, "grad_norm": 5.581877659082044, "learning_rate": 3.7680705649998118e-06, "loss": 17.1372, "step": 32380 }, { "epoch": 0.5918986601349004, "grad_norm": 6.5591136494226605, "learning_rate": 3.7677836811212233e-06, "loss": 17.478, "step": 32381 }, { "epoch": 0.591916939331347, "grad_norm": 5.722688608343528, "learning_rate": 3.7674968015614444e-06, "loss": 17.1686, "step": 32382 }, { "epoch": 0.5919352185277935, "grad_norm": 5.5704496410688105, "learning_rate": 3.7672099263214845e-06, "loss": 17.0581, "step": 32383 }, { "epoch": 0.59195349772424, "grad_norm": 5.104724728918582, "learning_rate": 3.766923055402347e-06, "loss": 16.9108, "step": 32384 }, { "epoch": 0.5919717769206866, "grad_norm": 5.5817864563915265, "learning_rate": 3.766636188805035e-06, "loss": 17.112, "step": 32385 }, { "epoch": 0.5919900561171331, "grad_norm": 7.455077451634077, "learning_rate": 3.766349326530557e-06, "loss": 17.5003, "step": 32386 }, { "epoch": 0.5920083353135797, "grad_norm": 5.72203458986661, "learning_rate": 3.7660624685799195e-06, "loss": 17.3562, "step": 32387 }, { "epoch": 0.5920266145100261, "grad_norm": 5.283256667878949, "learning_rate": 3.7657756149541236e-06, "loss": 17.0133, "step": 32388 }, { "epoch": 0.5920448937064726, "grad_norm": 5.5308331407293085, "learning_rate": 3.7654887656541784e-06, "loss": 17.2165, "step": 32389 }, { "epoch": 0.5920631729029192, "grad_norm": 7.063337608903977, "learning_rate": 3.7652019206810875e-06, "loss": 17.7405, "step": 32390 }, { "epoch": 0.5920814520993657, "grad_norm": 5.654015056607945, "learning_rate": 3.764915080035855e-06, "loss": 17.2789, "step": 32391 }, { "epoch": 0.5920997312958123, "grad_norm": 6.675066406371906, "learning_rate": 3.764628243719491e-06, "loss": 17.7115, "step": 32392 }, { "epoch": 0.5921180104922588, "grad_norm": 5.6099314962067774, "learning_rate": 3.7643414117329953e-06, "loss": 17.2619, "step": 32393 }, { "epoch": 0.5921362896887052, "grad_norm": 5.892557387198957, "learning_rate": 3.764054584077378e-06, "loss": 17.2896, "step": 32394 }, { "epoch": 0.5921545688851518, "grad_norm": 6.964889711859851, "learning_rate": 3.7637677607536406e-06, "loss": 17.6795, "step": 32395 }, { "epoch": 0.5921728480815983, "grad_norm": 6.314293185451368, "learning_rate": 3.7634809417627894e-06, "loss": 17.5847, "step": 32396 }, { "epoch": 0.5921911272780449, "grad_norm": 6.494833038414386, "learning_rate": 3.763194127105832e-06, "loss": 17.4797, "step": 32397 }, { "epoch": 0.5922094064744914, "grad_norm": 7.045365322018765, "learning_rate": 3.762907316783771e-06, "loss": 17.696, "step": 32398 }, { "epoch": 0.5922276856709379, "grad_norm": 6.526477054826589, "learning_rate": 3.762620510797611e-06, "loss": 17.5331, "step": 32399 }, { "epoch": 0.5922459648673845, "grad_norm": 6.263779302868567, "learning_rate": 3.7623337091483614e-06, "loss": 17.3947, "step": 32400 }, { "epoch": 0.5922642440638309, "grad_norm": 6.063336773015104, "learning_rate": 3.7620469118370246e-06, "loss": 17.4611, "step": 32401 }, { "epoch": 0.5922825232602775, "grad_norm": 7.5315264593652325, "learning_rate": 3.7617601188646035e-06, "loss": 18.1997, "step": 32402 }, { "epoch": 0.592300802456724, "grad_norm": 5.526875419485432, "learning_rate": 3.7614733302321073e-06, "loss": 17.0389, "step": 32403 }, { "epoch": 0.5923190816531705, "grad_norm": 6.1375370583507065, "learning_rate": 3.7611865459405406e-06, "loss": 17.4425, "step": 32404 }, { "epoch": 0.5923373608496171, "grad_norm": 6.9311383101426465, "learning_rate": 3.760899765990905e-06, "loss": 17.7269, "step": 32405 }, { "epoch": 0.5923556400460636, "grad_norm": 5.564554017102363, "learning_rate": 3.7606129903842103e-06, "loss": 17.1448, "step": 32406 }, { "epoch": 0.5923739192425101, "grad_norm": 7.430614207151592, "learning_rate": 3.760326219121458e-06, "loss": 18.3413, "step": 32407 }, { "epoch": 0.5923921984389566, "grad_norm": 6.416276094771015, "learning_rate": 3.7600394522036555e-06, "loss": 17.437, "step": 32408 }, { "epoch": 0.5924104776354031, "grad_norm": 6.047448416326489, "learning_rate": 3.759752689631808e-06, "loss": 17.1342, "step": 32409 }, { "epoch": 0.5924287568318497, "grad_norm": 6.156953859007582, "learning_rate": 3.7594659314069175e-06, "loss": 17.0683, "step": 32410 }, { "epoch": 0.5924470360282962, "grad_norm": 5.688645401289042, "learning_rate": 3.7591791775299937e-06, "loss": 17.2958, "step": 32411 }, { "epoch": 0.5924653152247428, "grad_norm": 6.734763603589528, "learning_rate": 3.7588924280020377e-06, "loss": 17.4939, "step": 32412 }, { "epoch": 0.5924835944211893, "grad_norm": 6.401480515106015, "learning_rate": 3.758605682824056e-06, "loss": 17.1107, "step": 32413 }, { "epoch": 0.5925018736176357, "grad_norm": 6.620272975613238, "learning_rate": 3.7583189419970544e-06, "loss": 17.5621, "step": 32414 }, { "epoch": 0.5925201528140823, "grad_norm": 8.0031903432504, "learning_rate": 3.7580322055220388e-06, "loss": 18.1506, "step": 32415 }, { "epoch": 0.5925384320105288, "grad_norm": 6.309873701215058, "learning_rate": 3.757745473400009e-06, "loss": 17.8052, "step": 32416 }, { "epoch": 0.5925567112069754, "grad_norm": 6.264335010852854, "learning_rate": 3.757458745631975e-06, "loss": 17.5306, "step": 32417 }, { "epoch": 0.5925749904034219, "grad_norm": 5.325385424136971, "learning_rate": 3.7571720222189412e-06, "loss": 17.2083, "step": 32418 }, { "epoch": 0.5925932695998684, "grad_norm": 8.545722439672645, "learning_rate": 3.75688530316191e-06, "loss": 18.3224, "step": 32419 }, { "epoch": 0.592611548796315, "grad_norm": 7.527254909145011, "learning_rate": 3.7565985884618893e-06, "loss": 17.7607, "step": 32420 }, { "epoch": 0.5926298279927614, "grad_norm": 7.0652673880508505, "learning_rate": 3.7563118781198827e-06, "loss": 17.6863, "step": 32421 }, { "epoch": 0.592648107189208, "grad_norm": 6.604472154695033, "learning_rate": 3.7560251721368928e-06, "loss": 17.7188, "step": 32422 }, { "epoch": 0.5926663863856545, "grad_norm": 7.582594727783215, "learning_rate": 3.75573847051393e-06, "loss": 17.5783, "step": 32423 }, { "epoch": 0.592684665582101, "grad_norm": 8.654170449337723, "learning_rate": 3.755451773251993e-06, "loss": 17.1561, "step": 32424 }, { "epoch": 0.5927029447785476, "grad_norm": 5.568745241454988, "learning_rate": 3.7551650803520917e-06, "loss": 17.3008, "step": 32425 }, { "epoch": 0.592721223974994, "grad_norm": 8.773059117690531, "learning_rate": 3.7548783918152276e-06, "loss": 17.3103, "step": 32426 }, { "epoch": 0.5927395031714405, "grad_norm": 6.586234377819612, "learning_rate": 3.754591707642406e-06, "loss": 17.2645, "step": 32427 }, { "epoch": 0.5927577823678871, "grad_norm": 6.016371903607782, "learning_rate": 3.7543050278346344e-06, "loss": 17.2845, "step": 32428 }, { "epoch": 0.5927760615643336, "grad_norm": 8.262623959560772, "learning_rate": 3.7540183523929153e-06, "loss": 17.4315, "step": 32429 }, { "epoch": 0.5927943407607802, "grad_norm": 6.724986810820051, "learning_rate": 3.753731681318252e-06, "loss": 17.3697, "step": 32430 }, { "epoch": 0.5928126199572267, "grad_norm": 5.730497531505808, "learning_rate": 3.7534450146116518e-06, "loss": 17.1611, "step": 32431 }, { "epoch": 0.5928308991536732, "grad_norm": 6.098371937960867, "learning_rate": 3.75315835227412e-06, "loss": 17.0599, "step": 32432 }, { "epoch": 0.5928491783501197, "grad_norm": 7.949909202360231, "learning_rate": 3.752871694306658e-06, "loss": 18.0375, "step": 32433 }, { "epoch": 0.5928674575465662, "grad_norm": 6.145770677154111, "learning_rate": 3.7525850407102738e-06, "loss": 17.2439, "step": 32434 }, { "epoch": 0.5928857367430128, "grad_norm": 6.523532437145641, "learning_rate": 3.7522983914859706e-06, "loss": 17.3518, "step": 32435 }, { "epoch": 0.5929040159394593, "grad_norm": 6.951809584363566, "learning_rate": 3.7520117466347515e-06, "loss": 17.6483, "step": 32436 }, { "epoch": 0.5929222951359058, "grad_norm": 8.504015256920715, "learning_rate": 3.7517251061576254e-06, "loss": 17.6976, "step": 32437 }, { "epoch": 0.5929405743323524, "grad_norm": 5.293749199855276, "learning_rate": 3.7514384700555927e-06, "loss": 16.9589, "step": 32438 }, { "epoch": 0.5929588535287988, "grad_norm": 5.312659545800066, "learning_rate": 3.751151838329661e-06, "loss": 17.294, "step": 32439 }, { "epoch": 0.5929771327252454, "grad_norm": 7.012834986737339, "learning_rate": 3.7508652109808343e-06, "loss": 17.582, "step": 32440 }, { "epoch": 0.5929954119216919, "grad_norm": 4.955950972861397, "learning_rate": 3.7505785880101144e-06, "loss": 16.9436, "step": 32441 }, { "epoch": 0.5930136911181384, "grad_norm": 7.75156588583439, "learning_rate": 3.7502919694185104e-06, "loss": 17.4031, "step": 32442 }, { "epoch": 0.593031970314585, "grad_norm": 5.914178215017999, "learning_rate": 3.7500053552070243e-06, "loss": 17.1646, "step": 32443 }, { "epoch": 0.5930502495110315, "grad_norm": 4.892953462392987, "learning_rate": 3.7497187453766594e-06, "loss": 17.0064, "step": 32444 }, { "epoch": 0.5930685287074781, "grad_norm": 4.978218069157915, "learning_rate": 3.749432139928422e-06, "loss": 16.9915, "step": 32445 }, { "epoch": 0.5930868079039245, "grad_norm": 6.544984895112027, "learning_rate": 3.7491455388633185e-06, "loss": 17.6377, "step": 32446 }, { "epoch": 0.593105087100371, "grad_norm": 8.46758614005539, "learning_rate": 3.7488589421823486e-06, "loss": 17.7751, "step": 32447 }, { "epoch": 0.5931233662968176, "grad_norm": 8.891013930212045, "learning_rate": 3.7485723498865224e-06, "loss": 18.1293, "step": 32448 }, { "epoch": 0.5931416454932641, "grad_norm": 6.82480305601611, "learning_rate": 3.74828576197684e-06, "loss": 17.7055, "step": 32449 }, { "epoch": 0.5931599246897107, "grad_norm": 6.464917388930505, "learning_rate": 3.747999178454306e-06, "loss": 17.6248, "step": 32450 }, { "epoch": 0.5931782038861572, "grad_norm": 6.6146036184414845, "learning_rate": 3.7477125993199288e-06, "loss": 17.5363, "step": 32451 }, { "epoch": 0.5931964830826036, "grad_norm": 6.647648328603642, "learning_rate": 3.7474260245747075e-06, "loss": 17.5513, "step": 32452 }, { "epoch": 0.5932147622790502, "grad_norm": 5.913298676218633, "learning_rate": 3.747139454219652e-06, "loss": 17.3899, "step": 32453 }, { "epoch": 0.5932330414754967, "grad_norm": 6.076640830997209, "learning_rate": 3.746852888255763e-06, "loss": 17.5111, "step": 32454 }, { "epoch": 0.5932513206719433, "grad_norm": 6.045078787622481, "learning_rate": 3.7465663266840446e-06, "loss": 17.3289, "step": 32455 }, { "epoch": 0.5932695998683898, "grad_norm": 6.734346830413742, "learning_rate": 3.746279769505505e-06, "loss": 18.2236, "step": 32456 }, { "epoch": 0.5932878790648363, "grad_norm": 5.552861479260002, "learning_rate": 3.7459932167211445e-06, "loss": 17.1303, "step": 32457 }, { "epoch": 0.5933061582612829, "grad_norm": 5.299829155462852, "learning_rate": 3.7457066683319688e-06, "loss": 17.1002, "step": 32458 }, { "epoch": 0.5933244374577293, "grad_norm": 5.549189170272353, "learning_rate": 3.7454201243389825e-06, "loss": 17.3882, "step": 32459 }, { "epoch": 0.5933427166541759, "grad_norm": 4.377068701797463, "learning_rate": 3.745133584743191e-06, "loss": 16.8012, "step": 32460 }, { "epoch": 0.5933609958506224, "grad_norm": 6.941047773586793, "learning_rate": 3.744847049545596e-06, "loss": 17.6623, "step": 32461 }, { "epoch": 0.5933792750470689, "grad_norm": 7.04491917938657, "learning_rate": 3.7445605187472027e-06, "loss": 17.7507, "step": 32462 }, { "epoch": 0.5933975542435155, "grad_norm": 4.967708658937214, "learning_rate": 3.7442739923490178e-06, "loss": 17.0938, "step": 32463 }, { "epoch": 0.593415833439962, "grad_norm": 6.922366249678441, "learning_rate": 3.743987470352042e-06, "loss": 17.5901, "step": 32464 }, { "epoch": 0.5934341126364086, "grad_norm": 6.839818769402595, "learning_rate": 3.743700952757282e-06, "loss": 17.4512, "step": 32465 }, { "epoch": 0.593452391832855, "grad_norm": 7.27705126297888, "learning_rate": 3.7434144395657405e-06, "loss": 17.8103, "step": 32466 }, { "epoch": 0.5934706710293015, "grad_norm": 5.36889276294763, "learning_rate": 3.7431279307784217e-06, "loss": 17.2703, "step": 32467 }, { "epoch": 0.5934889502257481, "grad_norm": 4.839029772255333, "learning_rate": 3.742841426396332e-06, "loss": 16.9367, "step": 32468 }, { "epoch": 0.5935072294221946, "grad_norm": 5.339771189934897, "learning_rate": 3.7425549264204724e-06, "loss": 17.1725, "step": 32469 }, { "epoch": 0.5935255086186412, "grad_norm": 5.763663159230749, "learning_rate": 3.742268430851851e-06, "loss": 17.2654, "step": 32470 }, { "epoch": 0.5935437878150877, "grad_norm": 10.395722349924206, "learning_rate": 3.741981939691468e-06, "loss": 18.8835, "step": 32471 }, { "epoch": 0.5935620670115341, "grad_norm": 6.631841723267675, "learning_rate": 3.7416954529403282e-06, "loss": 17.7086, "step": 32472 }, { "epoch": 0.5935803462079807, "grad_norm": 6.786762547627843, "learning_rate": 3.741408970599439e-06, "loss": 17.5923, "step": 32473 }, { "epoch": 0.5935986254044272, "grad_norm": 5.808108031509452, "learning_rate": 3.741122492669802e-06, "loss": 17.2897, "step": 32474 }, { "epoch": 0.5936169046008738, "grad_norm": 4.234294459946237, "learning_rate": 3.740836019152419e-06, "loss": 16.6757, "step": 32475 }, { "epoch": 0.5936351837973203, "grad_norm": 6.708112582984076, "learning_rate": 3.740549550048298e-06, "loss": 17.4695, "step": 32476 }, { "epoch": 0.5936534629937668, "grad_norm": 6.291162219697142, "learning_rate": 3.740263085358442e-06, "loss": 17.4337, "step": 32477 }, { "epoch": 0.5936717421902133, "grad_norm": 5.18464626007422, "learning_rate": 3.7399766250838533e-06, "loss": 17.0328, "step": 32478 }, { "epoch": 0.5936900213866598, "grad_norm": 5.350100525274008, "learning_rate": 3.7396901692255384e-06, "loss": 17.1756, "step": 32479 }, { "epoch": 0.5937083005831064, "grad_norm": 7.66202530572682, "learning_rate": 3.7394037177845e-06, "loss": 17.7703, "step": 32480 }, { "epoch": 0.5937265797795529, "grad_norm": 6.884129529533606, "learning_rate": 3.73911727076174e-06, "loss": 17.4704, "step": 32481 }, { "epoch": 0.5937448589759994, "grad_norm": 6.067698273099964, "learning_rate": 3.7388308281582674e-06, "loss": 17.5134, "step": 32482 }, { "epoch": 0.593763138172446, "grad_norm": 6.272354752416102, "learning_rate": 3.7385443899750813e-06, "loss": 17.5502, "step": 32483 }, { "epoch": 0.5937814173688925, "grad_norm": 5.611977227240083, "learning_rate": 3.7382579562131895e-06, "loss": 17.3257, "step": 32484 }, { "epoch": 0.593799696565339, "grad_norm": 5.960774749549693, "learning_rate": 3.737971526873593e-06, "loss": 17.1762, "step": 32485 }, { "epoch": 0.5938179757617855, "grad_norm": 6.578759488758533, "learning_rate": 3.7376851019572957e-06, "loss": 17.5539, "step": 32486 }, { "epoch": 0.593836254958232, "grad_norm": 5.260691222310432, "learning_rate": 3.737398681465305e-06, "loss": 17.0872, "step": 32487 }, { "epoch": 0.5938545341546786, "grad_norm": 6.400088087100167, "learning_rate": 3.737112265398622e-06, "loss": 17.6915, "step": 32488 }, { "epoch": 0.5938728133511251, "grad_norm": 7.901580416509176, "learning_rate": 3.736825853758249e-06, "loss": 18.0534, "step": 32489 }, { "epoch": 0.5938910925475717, "grad_norm": 7.864269041487524, "learning_rate": 3.7365394465451926e-06, "loss": 18.3992, "step": 32490 }, { "epoch": 0.5939093717440181, "grad_norm": 5.81287640950201, "learning_rate": 3.736253043760457e-06, "loss": 17.058, "step": 32491 }, { "epoch": 0.5939276509404646, "grad_norm": 7.071727236206133, "learning_rate": 3.7359666454050426e-06, "loss": 17.7758, "step": 32492 }, { "epoch": 0.5939459301369112, "grad_norm": 5.659089718723531, "learning_rate": 3.7356802514799578e-06, "loss": 17.1145, "step": 32493 }, { "epoch": 0.5939642093333577, "grad_norm": 7.55706238296018, "learning_rate": 3.735393861986203e-06, "loss": 18.0259, "step": 32494 }, { "epoch": 0.5939824885298042, "grad_norm": 6.849038804017977, "learning_rate": 3.7351074769247817e-06, "loss": 17.3692, "step": 32495 }, { "epoch": 0.5940007677262508, "grad_norm": 6.40422495498138, "learning_rate": 3.7348210962967013e-06, "loss": 17.4659, "step": 32496 }, { "epoch": 0.5940190469226972, "grad_norm": 11.123061039185297, "learning_rate": 3.7345347201029625e-06, "loss": 19.2438, "step": 32497 }, { "epoch": 0.5940373261191438, "grad_norm": 6.843072124431628, "learning_rate": 3.734248348344568e-06, "loss": 17.4052, "step": 32498 }, { "epoch": 0.5940556053155903, "grad_norm": 8.651086588010369, "learning_rate": 3.7339619810225245e-06, "loss": 17.8254, "step": 32499 }, { "epoch": 0.5940738845120368, "grad_norm": 7.3610170681914395, "learning_rate": 3.7336756181378332e-06, "loss": 17.9702, "step": 32500 }, { "epoch": 0.5940921637084834, "grad_norm": 6.474274672483953, "learning_rate": 3.733389259691501e-06, "loss": 17.5544, "step": 32501 }, { "epoch": 0.5941104429049299, "grad_norm": 6.075785790883177, "learning_rate": 3.733102905684529e-06, "loss": 17.2639, "step": 32502 }, { "epoch": 0.5941287221013765, "grad_norm": 8.319038305543927, "learning_rate": 3.7328165561179197e-06, "loss": 18.7963, "step": 32503 }, { "epoch": 0.5941470012978229, "grad_norm": 8.48671750144556, "learning_rate": 3.7325302109926798e-06, "loss": 17.5182, "step": 32504 }, { "epoch": 0.5941652804942694, "grad_norm": 6.418142629531091, "learning_rate": 3.732243870309813e-06, "loss": 17.4764, "step": 32505 }, { "epoch": 0.594183559690716, "grad_norm": 7.317452158748202, "learning_rate": 3.7319575340703185e-06, "loss": 17.937, "step": 32506 }, { "epoch": 0.5942018388871625, "grad_norm": 5.142137827990184, "learning_rate": 3.7316712022752045e-06, "loss": 16.8953, "step": 32507 }, { "epoch": 0.5942201180836091, "grad_norm": 7.1478903197995205, "learning_rate": 3.731384874925473e-06, "loss": 17.7481, "step": 32508 }, { "epoch": 0.5942383972800556, "grad_norm": 5.659243779597599, "learning_rate": 3.7310985520221253e-06, "loss": 17.395, "step": 32509 }, { "epoch": 0.594256676476502, "grad_norm": 8.15821614016749, "learning_rate": 3.73081223356617e-06, "loss": 18.1866, "step": 32510 }, { "epoch": 0.5942749556729486, "grad_norm": 6.470109199424329, "learning_rate": 3.7305259195586074e-06, "loss": 17.8179, "step": 32511 }, { "epoch": 0.5942932348693951, "grad_norm": 6.939735408763934, "learning_rate": 3.730239610000439e-06, "loss": 17.2439, "step": 32512 }, { "epoch": 0.5943115140658417, "grad_norm": 7.3921866411994825, "learning_rate": 3.729953304892672e-06, "loss": 17.8486, "step": 32513 }, { "epoch": 0.5943297932622882, "grad_norm": 5.981964257378293, "learning_rate": 3.729667004236307e-06, "loss": 17.3543, "step": 32514 }, { "epoch": 0.5943480724587347, "grad_norm": 4.8420731573366576, "learning_rate": 3.729380708032351e-06, "loss": 17.0899, "step": 32515 }, { "epoch": 0.5943663516551813, "grad_norm": 6.571500643551739, "learning_rate": 3.7290944162818054e-06, "loss": 17.3615, "step": 32516 }, { "epoch": 0.5943846308516277, "grad_norm": 7.03489863092381, "learning_rate": 3.7288081289856715e-06, "loss": 17.5176, "step": 32517 }, { "epoch": 0.5944029100480743, "grad_norm": 6.143904759235542, "learning_rate": 3.7285218461449564e-06, "loss": 17.6486, "step": 32518 }, { "epoch": 0.5944211892445208, "grad_norm": 5.554603539578429, "learning_rate": 3.7282355677606624e-06, "loss": 17.144, "step": 32519 }, { "epoch": 0.5944394684409673, "grad_norm": 5.904770579362904, "learning_rate": 3.7279492938337903e-06, "loss": 17.038, "step": 32520 }, { "epoch": 0.5944577476374139, "grad_norm": 8.512806604118687, "learning_rate": 3.7276630243653465e-06, "loss": 18.0821, "step": 32521 }, { "epoch": 0.5944760268338604, "grad_norm": 4.673270038498655, "learning_rate": 3.7273767593563346e-06, "loss": 16.795, "step": 32522 }, { "epoch": 0.594494306030307, "grad_norm": 5.642085244113478, "learning_rate": 3.727090498807755e-06, "loss": 17.4326, "step": 32523 }, { "epoch": 0.5945125852267534, "grad_norm": 6.1385638360726125, "learning_rate": 3.726804242720614e-06, "loss": 17.4129, "step": 32524 }, { "epoch": 0.5945308644231999, "grad_norm": 5.8160378181483186, "learning_rate": 3.7265179910959137e-06, "loss": 17.2418, "step": 32525 }, { "epoch": 0.5945491436196465, "grad_norm": 6.629514964987007, "learning_rate": 3.726231743934655e-06, "loss": 17.5019, "step": 32526 }, { "epoch": 0.594567422816093, "grad_norm": 5.917631903323639, "learning_rate": 3.7259455012378464e-06, "loss": 17.3801, "step": 32527 }, { "epoch": 0.5945857020125396, "grad_norm": 9.334503826367873, "learning_rate": 3.725659263006488e-06, "loss": 18.4428, "step": 32528 }, { "epoch": 0.594603981208986, "grad_norm": 7.866735553106864, "learning_rate": 3.7253730292415817e-06, "loss": 17.8265, "step": 32529 }, { "epoch": 0.5946222604054325, "grad_norm": 5.774056676589034, "learning_rate": 3.725086799944133e-06, "loss": 17.2098, "step": 32530 }, { "epoch": 0.5946405396018791, "grad_norm": 6.844088310673302, "learning_rate": 3.7248005751151435e-06, "loss": 17.6422, "step": 32531 }, { "epoch": 0.5946588187983256, "grad_norm": 5.706437266443102, "learning_rate": 3.7245143547556193e-06, "loss": 17.4033, "step": 32532 }, { "epoch": 0.5946770979947722, "grad_norm": 5.426462753763927, "learning_rate": 3.7242281388665614e-06, "loss": 17.1595, "step": 32533 }, { "epoch": 0.5946953771912187, "grad_norm": 5.770307902411537, "learning_rate": 3.7239419274489717e-06, "loss": 17.4194, "step": 32534 }, { "epoch": 0.5947136563876652, "grad_norm": 8.230544595593031, "learning_rate": 3.7236557205038553e-06, "loss": 17.8678, "step": 32535 }, { "epoch": 0.5947319355841117, "grad_norm": 8.081443819098416, "learning_rate": 3.723369518032216e-06, "loss": 17.8854, "step": 32536 }, { "epoch": 0.5947502147805582, "grad_norm": 6.622526937921328, "learning_rate": 3.7230833200350536e-06, "loss": 17.3837, "step": 32537 }, { "epoch": 0.5947684939770048, "grad_norm": 7.1151330995953215, "learning_rate": 3.7227971265133754e-06, "loss": 17.5576, "step": 32538 }, { "epoch": 0.5947867731734513, "grad_norm": 8.63538476444428, "learning_rate": 3.7225109374681823e-06, "loss": 17.4924, "step": 32539 }, { "epoch": 0.5948050523698978, "grad_norm": 5.619298690778726, "learning_rate": 3.7222247529004753e-06, "loss": 17.3647, "step": 32540 }, { "epoch": 0.5948233315663444, "grad_norm": 6.245920580243971, "learning_rate": 3.7219385728112624e-06, "loss": 17.049, "step": 32541 }, { "epoch": 0.5948416107627909, "grad_norm": 6.382815611893387, "learning_rate": 3.7216523972015438e-06, "loss": 17.3811, "step": 32542 }, { "epoch": 0.5948598899592374, "grad_norm": 6.099620565403188, "learning_rate": 3.7213662260723203e-06, "loss": 17.2242, "step": 32543 }, { "epoch": 0.5948781691556839, "grad_norm": 7.958926484512095, "learning_rate": 3.721080059424598e-06, "loss": 18.1524, "step": 32544 }, { "epoch": 0.5948964483521304, "grad_norm": 6.745476402495817, "learning_rate": 3.7207938972593782e-06, "loss": 17.3764, "step": 32545 }, { "epoch": 0.594914727548577, "grad_norm": 6.0785044744795, "learning_rate": 3.720507739577668e-06, "loss": 17.5411, "step": 32546 }, { "epoch": 0.5949330067450235, "grad_norm": 5.757677556082071, "learning_rate": 3.7202215863804657e-06, "loss": 17.3357, "step": 32547 }, { "epoch": 0.5949512859414701, "grad_norm": 6.312092862632237, "learning_rate": 3.7199354376687747e-06, "loss": 17.5545, "step": 32548 }, { "epoch": 0.5949695651379165, "grad_norm": 7.239988980332017, "learning_rate": 3.719649293443599e-06, "loss": 17.627, "step": 32549 }, { "epoch": 0.594987844334363, "grad_norm": 7.348519699586301, "learning_rate": 3.7193631537059426e-06, "loss": 17.8762, "step": 32550 }, { "epoch": 0.5950061235308096, "grad_norm": 5.228298715929458, "learning_rate": 3.719077018456806e-06, "loss": 17.2403, "step": 32551 }, { "epoch": 0.5950244027272561, "grad_norm": 5.063871248042468, "learning_rate": 3.7187908876971945e-06, "loss": 17.2109, "step": 32552 }, { "epoch": 0.5950426819237027, "grad_norm": 6.407734723410603, "learning_rate": 3.718504761428109e-06, "loss": 17.5912, "step": 32553 }, { "epoch": 0.5950609611201492, "grad_norm": 5.410722810867663, "learning_rate": 3.718218639650552e-06, "loss": 16.9475, "step": 32554 }, { "epoch": 0.5950792403165956, "grad_norm": 5.303256366422511, "learning_rate": 3.7179325223655295e-06, "loss": 17.0285, "step": 32555 }, { "epoch": 0.5950975195130422, "grad_norm": 6.351868230012693, "learning_rate": 3.7176464095740426e-06, "loss": 17.5599, "step": 32556 }, { "epoch": 0.5951157987094887, "grad_norm": 5.700615603403183, "learning_rate": 3.717360301277091e-06, "loss": 17.1316, "step": 32557 }, { "epoch": 0.5951340779059353, "grad_norm": 6.547993348468196, "learning_rate": 3.7170741974756817e-06, "loss": 17.371, "step": 32558 }, { "epoch": 0.5951523571023818, "grad_norm": 5.498868780548981, "learning_rate": 3.7167880981708173e-06, "loss": 17.3007, "step": 32559 }, { "epoch": 0.5951706362988283, "grad_norm": 7.062698656183253, "learning_rate": 3.716502003363497e-06, "loss": 17.7677, "step": 32560 }, { "epoch": 0.5951889154952749, "grad_norm": 6.911653621450703, "learning_rate": 3.716215913054728e-06, "loss": 17.5509, "step": 32561 }, { "epoch": 0.5952071946917213, "grad_norm": 5.574969333936034, "learning_rate": 3.7159298272455084e-06, "loss": 17.0721, "step": 32562 }, { "epoch": 0.5952254738881678, "grad_norm": 6.621740715506872, "learning_rate": 3.7156437459368445e-06, "loss": 17.5607, "step": 32563 }, { "epoch": 0.5952437530846144, "grad_norm": 6.722979600875208, "learning_rate": 3.715357669129739e-06, "loss": 17.8805, "step": 32564 }, { "epoch": 0.5952620322810609, "grad_norm": 5.474648325956034, "learning_rate": 3.715071596825191e-06, "loss": 17.1368, "step": 32565 }, { "epoch": 0.5952803114775075, "grad_norm": 5.617606442830229, "learning_rate": 3.714785529024208e-06, "loss": 17.2117, "step": 32566 }, { "epoch": 0.595298590673954, "grad_norm": 6.875904006772385, "learning_rate": 3.7144994657277887e-06, "loss": 17.456, "step": 32567 }, { "epoch": 0.5953168698704004, "grad_norm": 5.8937227803798455, "learning_rate": 3.7142134069369367e-06, "loss": 17.2339, "step": 32568 }, { "epoch": 0.595335149066847, "grad_norm": 7.888256276710944, "learning_rate": 3.713927352652657e-06, "loss": 17.7036, "step": 32569 }, { "epoch": 0.5953534282632935, "grad_norm": 6.6704630097825035, "learning_rate": 3.7136413028759507e-06, "loss": 17.6536, "step": 32570 }, { "epoch": 0.5953717074597401, "grad_norm": 6.08917156561106, "learning_rate": 3.713355257607817e-06, "loss": 17.4118, "step": 32571 }, { "epoch": 0.5953899866561866, "grad_norm": 6.8644583893898945, "learning_rate": 3.7130692168492634e-06, "loss": 17.634, "step": 32572 }, { "epoch": 0.5954082658526331, "grad_norm": 5.1056270676711755, "learning_rate": 3.7127831806012916e-06, "loss": 16.9934, "step": 32573 }, { "epoch": 0.5954265450490797, "grad_norm": 9.21746310683452, "learning_rate": 3.7124971488648997e-06, "loss": 17.8201, "step": 32574 }, { "epoch": 0.5954448242455261, "grad_norm": 5.303482533810919, "learning_rate": 3.712211121641097e-06, "loss": 17.3046, "step": 32575 }, { "epoch": 0.5954631034419727, "grad_norm": 5.942131826108529, "learning_rate": 3.7119250989308797e-06, "loss": 17.1722, "step": 32576 }, { "epoch": 0.5954813826384192, "grad_norm": 6.399580315353358, "learning_rate": 3.7116390807352547e-06, "loss": 17.4672, "step": 32577 }, { "epoch": 0.5954996618348657, "grad_norm": 7.625485189020951, "learning_rate": 3.711353067055224e-06, "loss": 17.9271, "step": 32578 }, { "epoch": 0.5955179410313123, "grad_norm": 7.288314216658441, "learning_rate": 3.711067057891787e-06, "loss": 17.8559, "step": 32579 }, { "epoch": 0.5955362202277588, "grad_norm": 6.475386951458456, "learning_rate": 3.71078105324595e-06, "loss": 17.5047, "step": 32580 }, { "epoch": 0.5955544994242054, "grad_norm": 5.3989982640581955, "learning_rate": 3.710495053118712e-06, "loss": 17.2763, "step": 32581 }, { "epoch": 0.5955727786206518, "grad_norm": 6.186208082512089, "learning_rate": 3.710209057511076e-06, "loss": 17.4756, "step": 32582 }, { "epoch": 0.5955910578170983, "grad_norm": 5.624389274364163, "learning_rate": 3.7099230664240483e-06, "loss": 17.1803, "step": 32583 }, { "epoch": 0.5956093370135449, "grad_norm": 6.08404405839317, "learning_rate": 3.7096370798586262e-06, "loss": 17.3619, "step": 32584 }, { "epoch": 0.5956276162099914, "grad_norm": 5.4941250674989375, "learning_rate": 3.7093510978158142e-06, "loss": 17.18, "step": 32585 }, { "epoch": 0.595645895406438, "grad_norm": 7.95837454547507, "learning_rate": 3.7090651202966156e-06, "loss": 17.4411, "step": 32586 }, { "epoch": 0.5956641746028845, "grad_norm": 5.557038050477434, "learning_rate": 3.708779147302032e-06, "loss": 17.1454, "step": 32587 }, { "epoch": 0.5956824537993309, "grad_norm": 5.108993316768832, "learning_rate": 3.7084931788330634e-06, "loss": 17.0727, "step": 32588 }, { "epoch": 0.5957007329957775, "grad_norm": 6.147396364201553, "learning_rate": 3.7082072148907157e-06, "loss": 17.333, "step": 32589 }, { "epoch": 0.595719012192224, "grad_norm": 7.167986356603837, "learning_rate": 3.7079212554759904e-06, "loss": 17.6562, "step": 32590 }, { "epoch": 0.5957372913886706, "grad_norm": 5.266438677577597, "learning_rate": 3.707635300589887e-06, "loss": 17.2115, "step": 32591 }, { "epoch": 0.5957555705851171, "grad_norm": 7.101415774587655, "learning_rate": 3.707349350233411e-06, "loss": 17.7911, "step": 32592 }, { "epoch": 0.5957738497815636, "grad_norm": 5.177872609413307, "learning_rate": 3.707063404407562e-06, "loss": 16.9835, "step": 32593 }, { "epoch": 0.5957921289780101, "grad_norm": 6.352633782464779, "learning_rate": 3.7067774631133448e-06, "loss": 17.7683, "step": 32594 }, { "epoch": 0.5958104081744566, "grad_norm": 6.82709435498909, "learning_rate": 3.706491526351761e-06, "loss": 17.7841, "step": 32595 }, { "epoch": 0.5958286873709032, "grad_norm": 6.781833422537034, "learning_rate": 3.7062055941238095e-06, "loss": 17.3905, "step": 32596 }, { "epoch": 0.5958469665673497, "grad_norm": 8.187187759541796, "learning_rate": 3.7059196664304985e-06, "loss": 18.1102, "step": 32597 }, { "epoch": 0.5958652457637962, "grad_norm": 4.66286944656225, "learning_rate": 3.705633743272825e-06, "loss": 16.8399, "step": 32598 }, { "epoch": 0.5958835249602428, "grad_norm": 9.637526652850138, "learning_rate": 3.7053478246517916e-06, "loss": 18.4243, "step": 32599 }, { "epoch": 0.5959018041566893, "grad_norm": 7.040063437956536, "learning_rate": 3.7050619105684044e-06, "loss": 17.8062, "step": 32600 }, { "epoch": 0.5959200833531358, "grad_norm": 5.7928070382668, "learning_rate": 3.704776001023663e-06, "loss": 17.0123, "step": 32601 }, { "epoch": 0.5959383625495823, "grad_norm": 6.315123035890349, "learning_rate": 3.7044900960185665e-06, "loss": 17.5654, "step": 32602 }, { "epoch": 0.5959566417460288, "grad_norm": 6.384085868337912, "learning_rate": 3.7042041955541206e-06, "loss": 17.333, "step": 32603 }, { "epoch": 0.5959749209424754, "grad_norm": 5.839472089121521, "learning_rate": 3.703918299631329e-06, "loss": 17.2211, "step": 32604 }, { "epoch": 0.5959932001389219, "grad_norm": 5.748629704946028, "learning_rate": 3.7036324082511883e-06, "loss": 17.1564, "step": 32605 }, { "epoch": 0.5960114793353685, "grad_norm": 5.667136542072995, "learning_rate": 3.703346521414706e-06, "loss": 17.0258, "step": 32606 }, { "epoch": 0.596029758531815, "grad_norm": 4.489269927398531, "learning_rate": 3.7030606391228784e-06, "loss": 16.8655, "step": 32607 }, { "epoch": 0.5960480377282614, "grad_norm": 5.099499308352161, "learning_rate": 3.702774761376713e-06, "loss": 16.957, "step": 32608 }, { "epoch": 0.596066316924708, "grad_norm": 5.2893696927172105, "learning_rate": 3.7024888881772104e-06, "loss": 17.0695, "step": 32609 }, { "epoch": 0.5960845961211545, "grad_norm": 7.037391762024077, "learning_rate": 3.7022030195253687e-06, "loss": 17.7287, "step": 32610 }, { "epoch": 0.5961028753176011, "grad_norm": 6.789520695163414, "learning_rate": 3.7019171554221957e-06, "loss": 17.5252, "step": 32611 }, { "epoch": 0.5961211545140476, "grad_norm": 5.205555891600464, "learning_rate": 3.7016312958686894e-06, "loss": 17.063, "step": 32612 }, { "epoch": 0.596139433710494, "grad_norm": 5.753680829290348, "learning_rate": 3.701345440865851e-06, "loss": 17.1142, "step": 32613 }, { "epoch": 0.5961577129069406, "grad_norm": 5.6108970148046575, "learning_rate": 3.701059590414687e-06, "loss": 17.0117, "step": 32614 }, { "epoch": 0.5961759921033871, "grad_norm": 6.445812083535519, "learning_rate": 3.700773744516196e-06, "loss": 17.3422, "step": 32615 }, { "epoch": 0.5961942712998337, "grad_norm": 5.897567522098008, "learning_rate": 3.700487903171378e-06, "loss": 17.3126, "step": 32616 }, { "epoch": 0.5962125504962802, "grad_norm": 6.874707694869609, "learning_rate": 3.7002020663812388e-06, "loss": 17.7704, "step": 32617 }, { "epoch": 0.5962308296927267, "grad_norm": 5.147390465084678, "learning_rate": 3.699916234146779e-06, "loss": 16.898, "step": 32618 }, { "epoch": 0.5962491088891733, "grad_norm": 6.240866724669606, "learning_rate": 3.6996304064689984e-06, "loss": 17.3885, "step": 32619 }, { "epoch": 0.5962673880856197, "grad_norm": 6.065347383871791, "learning_rate": 3.699344583348903e-06, "loss": 17.7344, "step": 32620 }, { "epoch": 0.5962856672820663, "grad_norm": 6.13044617933526, "learning_rate": 3.699058764787489e-06, "loss": 17.5313, "step": 32621 }, { "epoch": 0.5963039464785128, "grad_norm": 5.3557493083147785, "learning_rate": 3.6987729507857615e-06, "loss": 17.2426, "step": 32622 }, { "epoch": 0.5963222256749593, "grad_norm": 6.094607194397272, "learning_rate": 3.6984871413447243e-06, "loss": 17.4023, "step": 32623 }, { "epoch": 0.5963405048714059, "grad_norm": 8.751892493219179, "learning_rate": 3.6982013364653737e-06, "loss": 18.6353, "step": 32624 }, { "epoch": 0.5963587840678524, "grad_norm": 6.320872364919992, "learning_rate": 3.697915536148717e-06, "loss": 17.3206, "step": 32625 }, { "epoch": 0.596377063264299, "grad_norm": 6.364906063500269, "learning_rate": 3.6976297403957523e-06, "loss": 17.388, "step": 32626 }, { "epoch": 0.5963953424607454, "grad_norm": 6.829236665472349, "learning_rate": 3.6973439492074814e-06, "loss": 17.4524, "step": 32627 }, { "epoch": 0.5964136216571919, "grad_norm": 7.19126917906719, "learning_rate": 3.697058162584909e-06, "loss": 17.6532, "step": 32628 }, { "epoch": 0.5964319008536385, "grad_norm": 7.246147549681459, "learning_rate": 3.6967723805290344e-06, "loss": 18.0198, "step": 32629 }, { "epoch": 0.596450180050085, "grad_norm": 7.693552019989544, "learning_rate": 3.6964866030408574e-06, "loss": 17.9333, "step": 32630 }, { "epoch": 0.5964684592465315, "grad_norm": 7.6959175696793185, "learning_rate": 3.696200830121383e-06, "loss": 18.1442, "step": 32631 }, { "epoch": 0.5964867384429781, "grad_norm": 6.038197566491418, "learning_rate": 3.6959150617716122e-06, "loss": 17.4021, "step": 32632 }, { "epoch": 0.5965050176394245, "grad_norm": 5.658811296522364, "learning_rate": 3.6956292979925444e-06, "loss": 17.1381, "step": 32633 }, { "epoch": 0.5965232968358711, "grad_norm": 5.986450227237827, "learning_rate": 3.6953435387851845e-06, "loss": 17.2452, "step": 32634 }, { "epoch": 0.5965415760323176, "grad_norm": 6.931343613750109, "learning_rate": 3.695057784150532e-06, "loss": 17.7457, "step": 32635 }, { "epoch": 0.5965598552287641, "grad_norm": 7.081452832600232, "learning_rate": 3.6947720340895866e-06, "loss": 17.2882, "step": 32636 }, { "epoch": 0.5965781344252107, "grad_norm": 6.586572335190395, "learning_rate": 3.6944862886033545e-06, "loss": 17.3105, "step": 32637 }, { "epoch": 0.5965964136216572, "grad_norm": 5.976625914553354, "learning_rate": 3.6942005476928324e-06, "loss": 17.0554, "step": 32638 }, { "epoch": 0.5966146928181038, "grad_norm": 4.880875430055479, "learning_rate": 3.6939148113590263e-06, "loss": 16.9963, "step": 32639 }, { "epoch": 0.5966329720145502, "grad_norm": 7.3870265781084905, "learning_rate": 3.6936290796029344e-06, "loss": 17.4312, "step": 32640 }, { "epoch": 0.5966512512109967, "grad_norm": 7.559265204963089, "learning_rate": 3.693343352425558e-06, "loss": 17.8081, "step": 32641 }, { "epoch": 0.5966695304074433, "grad_norm": 7.611829799814677, "learning_rate": 3.6930576298279025e-06, "loss": 17.7528, "step": 32642 }, { "epoch": 0.5966878096038898, "grad_norm": 6.070146515884971, "learning_rate": 3.692771911810965e-06, "loss": 17.2742, "step": 32643 }, { "epoch": 0.5967060888003364, "grad_norm": 6.03101661419848, "learning_rate": 3.692486198375749e-06, "loss": 17.4133, "step": 32644 }, { "epoch": 0.5967243679967829, "grad_norm": 6.603037893336806, "learning_rate": 3.6922004895232546e-06, "loss": 17.5895, "step": 32645 }, { "epoch": 0.5967426471932293, "grad_norm": 4.812722928750236, "learning_rate": 3.6919147852544855e-06, "loss": 16.9726, "step": 32646 }, { "epoch": 0.5967609263896759, "grad_norm": 6.080285889090687, "learning_rate": 3.6916290855704397e-06, "loss": 17.2601, "step": 32647 }, { "epoch": 0.5967792055861224, "grad_norm": 8.18733701240759, "learning_rate": 3.6913433904721207e-06, "loss": 18.2591, "step": 32648 }, { "epoch": 0.596797484782569, "grad_norm": 6.220069719891606, "learning_rate": 3.6910576999605312e-06, "loss": 17.6385, "step": 32649 }, { "epoch": 0.5968157639790155, "grad_norm": 7.280291635174756, "learning_rate": 3.6907720140366687e-06, "loss": 18.0203, "step": 32650 }, { "epoch": 0.596834043175462, "grad_norm": 6.022305154311299, "learning_rate": 3.690486332701539e-06, "loss": 17.3159, "step": 32651 }, { "epoch": 0.5968523223719086, "grad_norm": 6.702739026187412, "learning_rate": 3.6902006559561386e-06, "loss": 17.6735, "step": 32652 }, { "epoch": 0.596870601568355, "grad_norm": 6.381036466809067, "learning_rate": 3.6899149838014726e-06, "loss": 17.5696, "step": 32653 }, { "epoch": 0.5968888807648016, "grad_norm": 7.251013589116251, "learning_rate": 3.689629316238542e-06, "loss": 17.6215, "step": 32654 }, { "epoch": 0.5969071599612481, "grad_norm": 8.627225844292488, "learning_rate": 3.6893436532683445e-06, "loss": 18.0409, "step": 32655 }, { "epoch": 0.5969254391576946, "grad_norm": 4.721886370278781, "learning_rate": 3.6890579948918857e-06, "loss": 16.8262, "step": 32656 }, { "epoch": 0.5969437183541412, "grad_norm": 7.127404121863241, "learning_rate": 3.688772341110164e-06, "loss": 17.5857, "step": 32657 }, { "epoch": 0.5969619975505877, "grad_norm": 5.525009127463051, "learning_rate": 3.6884866919241807e-06, "loss": 17.1432, "step": 32658 }, { "epoch": 0.5969802767470342, "grad_norm": 7.74790501363315, "learning_rate": 3.6882010473349395e-06, "loss": 17.604, "step": 32659 }, { "epoch": 0.5969985559434807, "grad_norm": 7.176867852480987, "learning_rate": 3.6879154073434396e-06, "loss": 17.3402, "step": 32660 }, { "epoch": 0.5970168351399272, "grad_norm": 5.542818267638107, "learning_rate": 3.6876297719506816e-06, "loss": 17.3659, "step": 32661 }, { "epoch": 0.5970351143363738, "grad_norm": 6.913395180643617, "learning_rate": 3.6873441411576673e-06, "loss": 17.8258, "step": 32662 }, { "epoch": 0.5970533935328203, "grad_norm": 6.00959606558785, "learning_rate": 3.6870585149653985e-06, "loss": 17.1654, "step": 32663 }, { "epoch": 0.5970716727292669, "grad_norm": 5.29633889388096, "learning_rate": 3.686772893374874e-06, "loss": 17.0889, "step": 32664 }, { "epoch": 0.5970899519257133, "grad_norm": 6.358973275613236, "learning_rate": 3.686487276387099e-06, "loss": 17.2488, "step": 32665 }, { "epoch": 0.5971082311221598, "grad_norm": 6.260736316502748, "learning_rate": 3.6862016640030706e-06, "loss": 17.2669, "step": 32666 }, { "epoch": 0.5971265103186064, "grad_norm": 4.628310656875265, "learning_rate": 3.6859160562237907e-06, "loss": 16.6784, "step": 32667 }, { "epoch": 0.5971447895150529, "grad_norm": 12.013951460776113, "learning_rate": 3.685630453050263e-06, "loss": 18.1273, "step": 32668 }, { "epoch": 0.5971630687114995, "grad_norm": 5.096204085124563, "learning_rate": 3.6853448544834847e-06, "loss": 17.0907, "step": 32669 }, { "epoch": 0.597181347907946, "grad_norm": 7.836923995935984, "learning_rate": 3.68505926052446e-06, "loss": 17.9753, "step": 32670 }, { "epoch": 0.5971996271043924, "grad_norm": 5.128448603072631, "learning_rate": 3.684773671174188e-06, "loss": 17.0465, "step": 32671 }, { "epoch": 0.597217906300839, "grad_norm": 6.998870163192069, "learning_rate": 3.6844880864336684e-06, "loss": 17.8981, "step": 32672 }, { "epoch": 0.5972361854972855, "grad_norm": 6.183265338381604, "learning_rate": 3.6842025063039067e-06, "loss": 17.2757, "step": 32673 }, { "epoch": 0.5972544646937321, "grad_norm": 7.540762131161706, "learning_rate": 3.683916930785901e-06, "loss": 17.6965, "step": 32674 }, { "epoch": 0.5972727438901786, "grad_norm": 7.288667549602545, "learning_rate": 3.6836313598806493e-06, "loss": 17.6246, "step": 32675 }, { "epoch": 0.5972910230866251, "grad_norm": 6.051126175415889, "learning_rate": 3.683345793589158e-06, "loss": 17.3373, "step": 32676 }, { "epoch": 0.5973093022830717, "grad_norm": 7.049650114909883, "learning_rate": 3.6830602319124254e-06, "loss": 17.7887, "step": 32677 }, { "epoch": 0.5973275814795181, "grad_norm": 6.454563582127659, "learning_rate": 3.68277467485145e-06, "loss": 17.2508, "step": 32678 }, { "epoch": 0.5973458606759647, "grad_norm": 7.5938151861959335, "learning_rate": 3.682489122407238e-06, "loss": 17.4235, "step": 32679 }, { "epoch": 0.5973641398724112, "grad_norm": 7.764462007778552, "learning_rate": 3.682203574580786e-06, "loss": 18.1172, "step": 32680 }, { "epoch": 0.5973824190688577, "grad_norm": 5.990806754347294, "learning_rate": 3.6819180313730953e-06, "loss": 17.5485, "step": 32681 }, { "epoch": 0.5974006982653043, "grad_norm": 7.082062254743441, "learning_rate": 3.6816324927851692e-06, "loss": 17.5642, "step": 32682 }, { "epoch": 0.5974189774617508, "grad_norm": 6.277803325093644, "learning_rate": 3.6813469588180052e-06, "loss": 17.3353, "step": 32683 }, { "epoch": 0.5974372566581974, "grad_norm": 7.025941163833105, "learning_rate": 3.6810614294726075e-06, "loss": 17.9366, "step": 32684 }, { "epoch": 0.5974555358546438, "grad_norm": 5.151303578558078, "learning_rate": 3.680775904749974e-06, "loss": 16.9799, "step": 32685 }, { "epoch": 0.5974738150510903, "grad_norm": 7.169977200660774, "learning_rate": 3.6804903846511053e-06, "loss": 17.841, "step": 32686 }, { "epoch": 0.5974920942475369, "grad_norm": 7.477236056198833, "learning_rate": 3.6802048691770054e-06, "loss": 17.7288, "step": 32687 }, { "epoch": 0.5975103734439834, "grad_norm": 5.866479349962711, "learning_rate": 3.679919358328673e-06, "loss": 17.2822, "step": 32688 }, { "epoch": 0.59752865264043, "grad_norm": 6.3654649450792355, "learning_rate": 3.6796338521071063e-06, "loss": 17.7856, "step": 32689 }, { "epoch": 0.5975469318368765, "grad_norm": 6.930892692140906, "learning_rate": 3.67934835051331e-06, "loss": 17.8645, "step": 32690 }, { "epoch": 0.5975652110333229, "grad_norm": 7.692752180962556, "learning_rate": 3.6790628535482835e-06, "loss": 17.3378, "step": 32691 }, { "epoch": 0.5975834902297695, "grad_norm": 5.3053446762387395, "learning_rate": 3.6787773612130256e-06, "loss": 16.98, "step": 32692 }, { "epoch": 0.597601769426216, "grad_norm": 6.96146990876266, "learning_rate": 3.6784918735085404e-06, "loss": 17.5892, "step": 32693 }, { "epoch": 0.5976200486226626, "grad_norm": 5.873314436742445, "learning_rate": 3.678206390435825e-06, "loss": 17.3328, "step": 32694 }, { "epoch": 0.5976383278191091, "grad_norm": 5.778865898328712, "learning_rate": 3.677920911995881e-06, "loss": 17.1034, "step": 32695 }, { "epoch": 0.5976566070155556, "grad_norm": 5.255955773419316, "learning_rate": 3.677635438189712e-06, "loss": 16.9673, "step": 32696 }, { "epoch": 0.5976748862120022, "grad_norm": 7.247870376255421, "learning_rate": 3.6773499690183144e-06, "loss": 17.3612, "step": 32697 }, { "epoch": 0.5976931654084486, "grad_norm": 7.71159850109314, "learning_rate": 3.6770645044826892e-06, "loss": 17.8162, "step": 32698 }, { "epoch": 0.5977114446048951, "grad_norm": 6.402870393830735, "learning_rate": 3.6767790445838394e-06, "loss": 17.5083, "step": 32699 }, { "epoch": 0.5977297238013417, "grad_norm": 7.273625613599109, "learning_rate": 3.6764935893227634e-06, "loss": 17.6972, "step": 32700 }, { "epoch": 0.5977480029977882, "grad_norm": 6.351487654134016, "learning_rate": 3.6762081387004638e-06, "loss": 17.3372, "step": 32701 }, { "epoch": 0.5977662821942348, "grad_norm": 5.495039862827122, "learning_rate": 3.6759226927179404e-06, "loss": 16.8259, "step": 32702 }, { "epoch": 0.5977845613906813, "grad_norm": 5.585927708713518, "learning_rate": 3.6756372513761908e-06, "loss": 17.0332, "step": 32703 }, { "epoch": 0.5978028405871277, "grad_norm": 5.896893649150948, "learning_rate": 3.675351814676219e-06, "loss": 17.1326, "step": 32704 }, { "epoch": 0.5978211197835743, "grad_norm": 5.8649997932366755, "learning_rate": 3.675066382619025e-06, "loss": 17.2933, "step": 32705 }, { "epoch": 0.5978393989800208, "grad_norm": 6.650239242483716, "learning_rate": 3.674780955205606e-06, "loss": 17.7319, "step": 32706 }, { "epoch": 0.5978576781764674, "grad_norm": 7.204829547770559, "learning_rate": 3.674495532436967e-06, "loss": 18.0929, "step": 32707 }, { "epoch": 0.5978759573729139, "grad_norm": 5.865324936570111, "learning_rate": 3.674210114314105e-06, "loss": 17.1469, "step": 32708 }, { "epoch": 0.5978942365693604, "grad_norm": 5.761145898330626, "learning_rate": 3.673924700838021e-06, "loss": 17.2798, "step": 32709 }, { "epoch": 0.597912515765807, "grad_norm": 7.818473250782102, "learning_rate": 3.6736392920097177e-06, "loss": 17.7487, "step": 32710 }, { "epoch": 0.5979307949622534, "grad_norm": 6.3550281476314545, "learning_rate": 3.673353887830192e-06, "loss": 17.6276, "step": 32711 }, { "epoch": 0.5979490741587, "grad_norm": 6.982989321975781, "learning_rate": 3.6730684883004465e-06, "loss": 17.4616, "step": 32712 }, { "epoch": 0.5979673533551465, "grad_norm": 6.232212736382949, "learning_rate": 3.6727830934214804e-06, "loss": 17.4058, "step": 32713 }, { "epoch": 0.597985632551593, "grad_norm": 5.689327167192317, "learning_rate": 3.672497703194293e-06, "loss": 17.1392, "step": 32714 }, { "epoch": 0.5980039117480396, "grad_norm": 5.558921795809185, "learning_rate": 3.6722123176198886e-06, "loss": 17.1867, "step": 32715 }, { "epoch": 0.598022190944486, "grad_norm": 5.839737665696314, "learning_rate": 3.6719269366992636e-06, "loss": 17.426, "step": 32716 }, { "epoch": 0.5980404701409326, "grad_norm": 5.73993987139927, "learning_rate": 3.6716415604334193e-06, "loss": 17.1105, "step": 32717 }, { "epoch": 0.5980587493373791, "grad_norm": 6.4949975350240114, "learning_rate": 3.6713561888233563e-06, "loss": 17.4343, "step": 32718 }, { "epoch": 0.5980770285338256, "grad_norm": 5.556069920227929, "learning_rate": 3.671070821870076e-06, "loss": 17.2135, "step": 32719 }, { "epoch": 0.5980953077302722, "grad_norm": 6.9917787743872095, "learning_rate": 3.670785459574574e-06, "loss": 17.6973, "step": 32720 }, { "epoch": 0.5981135869267187, "grad_norm": 6.251726440168958, "learning_rate": 3.6705001019378557e-06, "loss": 17.4197, "step": 32721 }, { "epoch": 0.5981318661231653, "grad_norm": 7.189370428058242, "learning_rate": 3.6702147489609197e-06, "loss": 17.6173, "step": 32722 }, { "epoch": 0.5981501453196117, "grad_norm": 5.394645359003324, "learning_rate": 3.6699294006447637e-06, "loss": 17.0562, "step": 32723 }, { "epoch": 0.5981684245160582, "grad_norm": 6.942716753236228, "learning_rate": 3.6696440569903913e-06, "loss": 17.6795, "step": 32724 }, { "epoch": 0.5981867037125048, "grad_norm": 5.754010926466271, "learning_rate": 3.669358717998801e-06, "loss": 17.289, "step": 32725 }, { "epoch": 0.5982049829089513, "grad_norm": 6.9159276335665725, "learning_rate": 3.6690733836709914e-06, "loss": 17.4941, "step": 32726 }, { "epoch": 0.5982232621053979, "grad_norm": 4.927400246682654, "learning_rate": 3.6687880540079655e-06, "loss": 16.7127, "step": 32727 }, { "epoch": 0.5982415413018444, "grad_norm": 6.98083145172095, "learning_rate": 3.6685027290107228e-06, "loss": 17.8093, "step": 32728 }, { "epoch": 0.5982598204982909, "grad_norm": 7.166131240008138, "learning_rate": 3.66821740868026e-06, "loss": 17.6415, "step": 32729 }, { "epoch": 0.5982780996947374, "grad_norm": 7.664817139884473, "learning_rate": 3.667932093017581e-06, "loss": 17.7035, "step": 32730 }, { "epoch": 0.5982963788911839, "grad_norm": 6.595114570828433, "learning_rate": 3.6676467820236825e-06, "loss": 17.4905, "step": 32731 }, { "epoch": 0.5983146580876305, "grad_norm": 5.938504252062683, "learning_rate": 3.66736147569957e-06, "loss": 17.2482, "step": 32732 }, { "epoch": 0.598332937284077, "grad_norm": 5.898479822207511, "learning_rate": 3.667076174046239e-06, "loss": 17.2581, "step": 32733 }, { "epoch": 0.5983512164805235, "grad_norm": 6.018969517823513, "learning_rate": 3.6667908770646884e-06, "loss": 17.2536, "step": 32734 }, { "epoch": 0.5983694956769701, "grad_norm": 4.881770432636454, "learning_rate": 3.6665055847559205e-06, "loss": 16.73, "step": 32735 }, { "epoch": 0.5983877748734165, "grad_norm": 5.517571485221127, "learning_rate": 3.6662202971209372e-06, "loss": 17.0394, "step": 32736 }, { "epoch": 0.5984060540698631, "grad_norm": 6.364552174660361, "learning_rate": 3.665935014160733e-06, "loss": 17.6303, "step": 32737 }, { "epoch": 0.5984243332663096, "grad_norm": 6.141724225612142, "learning_rate": 3.6656497358763125e-06, "loss": 17.3192, "step": 32738 }, { "epoch": 0.5984426124627561, "grad_norm": 6.058252086084493, "learning_rate": 3.6653644622686735e-06, "loss": 17.3959, "step": 32739 }, { "epoch": 0.5984608916592027, "grad_norm": 6.759874260088358, "learning_rate": 3.6650791933388154e-06, "loss": 17.5709, "step": 32740 }, { "epoch": 0.5984791708556492, "grad_norm": 5.540097978525488, "learning_rate": 3.6647939290877403e-06, "loss": 17.1343, "step": 32741 }, { "epoch": 0.5984974500520958, "grad_norm": 5.491008665459881, "learning_rate": 3.6645086695164472e-06, "loss": 17.0965, "step": 32742 }, { "epoch": 0.5985157292485422, "grad_norm": 6.198441281529219, "learning_rate": 3.6642234146259332e-06, "loss": 17.4798, "step": 32743 }, { "epoch": 0.5985340084449887, "grad_norm": 7.6974788457671, "learning_rate": 3.663938164417201e-06, "loss": 17.7282, "step": 32744 }, { "epoch": 0.5985522876414353, "grad_norm": 5.8854294659977375, "learning_rate": 3.6636529188912483e-06, "loss": 17.3228, "step": 32745 }, { "epoch": 0.5985705668378818, "grad_norm": 6.720039504724529, "learning_rate": 3.663367678049079e-06, "loss": 17.5411, "step": 32746 }, { "epoch": 0.5985888460343284, "grad_norm": 5.88828802094894, "learning_rate": 3.6630824418916892e-06, "loss": 17.2879, "step": 32747 }, { "epoch": 0.5986071252307749, "grad_norm": 5.803828395285062, "learning_rate": 3.662797210420078e-06, "loss": 17.4385, "step": 32748 }, { "epoch": 0.5986254044272213, "grad_norm": 5.084488141433528, "learning_rate": 3.6625119836352472e-06, "loss": 17.1228, "step": 32749 }, { "epoch": 0.5986436836236679, "grad_norm": 6.654502576614161, "learning_rate": 3.662226761538197e-06, "loss": 17.6337, "step": 32750 }, { "epoch": 0.5986619628201144, "grad_norm": 6.711350152897789, "learning_rate": 3.661941544129924e-06, "loss": 17.7702, "step": 32751 }, { "epoch": 0.598680242016561, "grad_norm": 7.7520141553227235, "learning_rate": 3.6616563314114315e-06, "loss": 17.6263, "step": 32752 }, { "epoch": 0.5986985212130075, "grad_norm": 5.238162637795171, "learning_rate": 3.6613711233837167e-06, "loss": 16.9429, "step": 32753 }, { "epoch": 0.598716800409454, "grad_norm": 4.9834170435623895, "learning_rate": 3.6610859200477787e-06, "loss": 16.7771, "step": 32754 }, { "epoch": 0.5987350796059006, "grad_norm": 6.8988557635573375, "learning_rate": 3.6608007214046206e-06, "loss": 17.6521, "step": 32755 }, { "epoch": 0.598753358802347, "grad_norm": 6.055020919637492, "learning_rate": 3.6605155274552396e-06, "loss": 17.4531, "step": 32756 }, { "epoch": 0.5987716379987936, "grad_norm": 4.7137887504984395, "learning_rate": 3.660230338200633e-06, "loss": 16.8762, "step": 32757 }, { "epoch": 0.5987899171952401, "grad_norm": 5.937457235393439, "learning_rate": 3.6599451536418046e-06, "loss": 17.2555, "step": 32758 }, { "epoch": 0.5988081963916866, "grad_norm": 6.6424771098652196, "learning_rate": 3.659659973779753e-06, "loss": 17.3081, "step": 32759 }, { "epoch": 0.5988264755881332, "grad_norm": 5.277430782260944, "learning_rate": 3.659374798615475e-06, "loss": 17.0561, "step": 32760 }, { "epoch": 0.5988447547845797, "grad_norm": 5.986517046728838, "learning_rate": 3.6590896281499734e-06, "loss": 17.4555, "step": 32761 }, { "epoch": 0.5988630339810262, "grad_norm": 6.788906501755192, "learning_rate": 3.6588044623842445e-06, "loss": 17.2969, "step": 32762 }, { "epoch": 0.5988813131774727, "grad_norm": 5.831311632580014, "learning_rate": 3.6585193013192906e-06, "loss": 17.0186, "step": 32763 }, { "epoch": 0.5988995923739192, "grad_norm": 6.587969712148032, "learning_rate": 3.658234144956111e-06, "loss": 17.4134, "step": 32764 }, { "epoch": 0.5989178715703658, "grad_norm": 5.013936368442204, "learning_rate": 3.6579489932957025e-06, "loss": 17.0521, "step": 32765 }, { "epoch": 0.5989361507668123, "grad_norm": 7.910816146341234, "learning_rate": 3.6576638463390686e-06, "loss": 17.9511, "step": 32766 }, { "epoch": 0.5989544299632588, "grad_norm": 6.004016597832085, "learning_rate": 3.657378704087205e-06, "loss": 17.089, "step": 32767 }, { "epoch": 0.5989727091597054, "grad_norm": 6.822090924132607, "learning_rate": 3.6570935665411116e-06, "loss": 17.7638, "step": 32768 }, { "epoch": 0.5989909883561518, "grad_norm": 7.2493910344084975, "learning_rate": 3.656808433701791e-06, "loss": 17.8187, "step": 32769 }, { "epoch": 0.5990092675525984, "grad_norm": 5.949138478161799, "learning_rate": 3.656523305570239e-06, "loss": 17.4013, "step": 32770 }, { "epoch": 0.5990275467490449, "grad_norm": 6.731869623079819, "learning_rate": 3.656238182147456e-06, "loss": 17.5824, "step": 32771 }, { "epoch": 0.5990458259454914, "grad_norm": 5.684370867216145, "learning_rate": 3.655953063434442e-06, "loss": 17.2346, "step": 32772 }, { "epoch": 0.599064105141938, "grad_norm": 5.387695560871848, "learning_rate": 3.6556679494321965e-06, "loss": 17.0158, "step": 32773 }, { "epoch": 0.5990823843383845, "grad_norm": 5.600104522975324, "learning_rate": 3.655382840141717e-06, "loss": 17.0456, "step": 32774 }, { "epoch": 0.599100663534831, "grad_norm": 7.248963461794543, "learning_rate": 3.6550977355640044e-06, "loss": 18.038, "step": 32775 }, { "epoch": 0.5991189427312775, "grad_norm": 7.048028407720542, "learning_rate": 3.654812635700058e-06, "loss": 17.698, "step": 32776 }, { "epoch": 0.599137221927724, "grad_norm": 4.8841216314895615, "learning_rate": 3.6545275405508758e-06, "loss": 17.0075, "step": 32777 }, { "epoch": 0.5991555011241706, "grad_norm": 5.211614727116336, "learning_rate": 3.654242450117459e-06, "loss": 17.0485, "step": 32778 }, { "epoch": 0.5991737803206171, "grad_norm": 9.609764303042047, "learning_rate": 3.653957364400804e-06, "loss": 18.3485, "step": 32779 }, { "epoch": 0.5991920595170637, "grad_norm": 5.703242970660442, "learning_rate": 3.6536722834019123e-06, "loss": 17.1756, "step": 32780 }, { "epoch": 0.5992103387135101, "grad_norm": 7.346589133162242, "learning_rate": 3.6533872071217833e-06, "loss": 17.9097, "step": 32781 }, { "epoch": 0.5992286179099566, "grad_norm": 5.444802440578195, "learning_rate": 3.653102135561414e-06, "loss": 17.0491, "step": 32782 }, { "epoch": 0.5992468971064032, "grad_norm": 6.195124811024125, "learning_rate": 3.6528170687218064e-06, "loss": 17.2235, "step": 32783 }, { "epoch": 0.5992651763028497, "grad_norm": 5.580927713651552, "learning_rate": 3.652532006603957e-06, "loss": 17.2208, "step": 32784 }, { "epoch": 0.5992834554992963, "grad_norm": 5.345430874956784, "learning_rate": 3.6522469492088647e-06, "loss": 16.9589, "step": 32785 }, { "epoch": 0.5993017346957428, "grad_norm": 7.306292344269812, "learning_rate": 3.6519618965375324e-06, "loss": 18.1685, "step": 32786 }, { "epoch": 0.5993200138921893, "grad_norm": 6.627069275075451, "learning_rate": 3.6516768485909567e-06, "loss": 17.1904, "step": 32787 }, { "epoch": 0.5993382930886358, "grad_norm": 6.205651270320009, "learning_rate": 3.651391805370134e-06, "loss": 17.4298, "step": 32788 }, { "epoch": 0.5993565722850823, "grad_norm": 6.872466763087369, "learning_rate": 3.6511067668760674e-06, "loss": 17.5449, "step": 32789 }, { "epoch": 0.5993748514815289, "grad_norm": 10.068947529578478, "learning_rate": 3.650821733109754e-06, "loss": 17.9301, "step": 32790 }, { "epoch": 0.5993931306779754, "grad_norm": 6.22802419853491, "learning_rate": 3.6505367040721945e-06, "loss": 17.3531, "step": 32791 }, { "epoch": 0.5994114098744219, "grad_norm": 5.545191989140035, "learning_rate": 3.6502516797643873e-06, "loss": 17.0967, "step": 32792 }, { "epoch": 0.5994296890708685, "grad_norm": 7.312017004533577, "learning_rate": 3.6499666601873285e-06, "loss": 17.3762, "step": 32793 }, { "epoch": 0.599447968267315, "grad_norm": 19.09000143286088, "learning_rate": 3.6496816453420202e-06, "loss": 19.1369, "step": 32794 }, { "epoch": 0.5994662474637615, "grad_norm": 8.037382554101915, "learning_rate": 3.6493966352294624e-06, "loss": 18.2248, "step": 32795 }, { "epoch": 0.599484526660208, "grad_norm": 7.176352080904677, "learning_rate": 3.6491116298506503e-06, "loss": 18.1302, "step": 32796 }, { "epoch": 0.5995028058566545, "grad_norm": 6.3531926820359566, "learning_rate": 3.6488266292065855e-06, "loss": 17.3978, "step": 32797 }, { "epoch": 0.5995210850531011, "grad_norm": 6.966539219309357, "learning_rate": 3.6485416332982657e-06, "loss": 17.6954, "step": 32798 }, { "epoch": 0.5995393642495476, "grad_norm": 6.38975132447244, "learning_rate": 3.64825664212669e-06, "loss": 17.1872, "step": 32799 }, { "epoch": 0.5995576434459942, "grad_norm": 5.348424730213396, "learning_rate": 3.647971655692858e-06, "loss": 17.1212, "step": 32800 }, { "epoch": 0.5995759226424406, "grad_norm": 5.939299757602489, "learning_rate": 3.647686673997769e-06, "loss": 17.5152, "step": 32801 }, { "epoch": 0.5995942018388871, "grad_norm": 5.9437777333970905, "learning_rate": 3.6474016970424187e-06, "loss": 17.2326, "step": 32802 }, { "epoch": 0.5996124810353337, "grad_norm": 7.613180059701422, "learning_rate": 3.647116724827809e-06, "loss": 17.822, "step": 32803 }, { "epoch": 0.5996307602317802, "grad_norm": 7.129735766776231, "learning_rate": 3.646831757354939e-06, "loss": 17.5078, "step": 32804 }, { "epoch": 0.5996490394282268, "grad_norm": 7.321094428241729, "learning_rate": 3.6465467946248035e-06, "loss": 17.7572, "step": 32805 }, { "epoch": 0.5996673186246733, "grad_norm": 8.379063683717266, "learning_rate": 3.6462618366384072e-06, "loss": 18.221, "step": 32806 }, { "epoch": 0.5996855978211197, "grad_norm": 9.048294255901807, "learning_rate": 3.645976883396742e-06, "loss": 18.8999, "step": 32807 }, { "epoch": 0.5997038770175663, "grad_norm": 8.097143122552993, "learning_rate": 3.6456919349008135e-06, "loss": 18.1567, "step": 32808 }, { "epoch": 0.5997221562140128, "grad_norm": 6.280552546729263, "learning_rate": 3.6454069911516167e-06, "loss": 17.3272, "step": 32809 }, { "epoch": 0.5997404354104594, "grad_norm": 6.663797524533891, "learning_rate": 3.6451220521501497e-06, "loss": 17.6957, "step": 32810 }, { "epoch": 0.5997587146069059, "grad_norm": 6.725932367290534, "learning_rate": 3.644837117897414e-06, "loss": 17.7852, "step": 32811 }, { "epoch": 0.5997769938033524, "grad_norm": 5.8663356593548315, "learning_rate": 3.6445521883944052e-06, "loss": 17.0313, "step": 32812 }, { "epoch": 0.599795272999799, "grad_norm": 6.308218983897973, "learning_rate": 3.644267263642123e-06, "loss": 17.1524, "step": 32813 }, { "epoch": 0.5998135521962454, "grad_norm": 8.72743028050356, "learning_rate": 3.643982343641568e-06, "loss": 18.1168, "step": 32814 }, { "epoch": 0.599831831392692, "grad_norm": 5.635749525453945, "learning_rate": 3.6436974283937377e-06, "loss": 17.2672, "step": 32815 }, { "epoch": 0.5998501105891385, "grad_norm": 7.573173769413205, "learning_rate": 3.643412517899627e-06, "loss": 18.0322, "step": 32816 }, { "epoch": 0.599868389785585, "grad_norm": 7.140263522515998, "learning_rate": 3.64312761216024e-06, "loss": 17.5374, "step": 32817 }, { "epoch": 0.5998866689820316, "grad_norm": 6.5056441988644425, "learning_rate": 3.6428427111765735e-06, "loss": 17.5148, "step": 32818 }, { "epoch": 0.5999049481784781, "grad_norm": 6.7065493614857425, "learning_rate": 3.642557814949623e-06, "loss": 17.5376, "step": 32819 }, { "epoch": 0.5999232273749247, "grad_norm": 6.555848273076776, "learning_rate": 3.6422729234803926e-06, "loss": 17.4327, "step": 32820 }, { "epoch": 0.5999415065713711, "grad_norm": 6.298670333156446, "learning_rate": 3.6419880367698757e-06, "loss": 17.6646, "step": 32821 }, { "epoch": 0.5999597857678176, "grad_norm": 5.560179874822506, "learning_rate": 3.641703154819073e-06, "loss": 17.3118, "step": 32822 }, { "epoch": 0.5999780649642642, "grad_norm": 7.944176030897292, "learning_rate": 3.6414182776289843e-06, "loss": 17.4399, "step": 32823 }, { "epoch": 0.5999963441607107, "grad_norm": 6.624000639111268, "learning_rate": 3.6411334052006046e-06, "loss": 17.2317, "step": 32824 }, { "epoch": 0.6000146233571573, "grad_norm": 6.04711638820917, "learning_rate": 3.6408485375349366e-06, "loss": 17.3393, "step": 32825 }, { "epoch": 0.6000329025536038, "grad_norm": 5.078182906417461, "learning_rate": 3.6405636746329755e-06, "loss": 16.9707, "step": 32826 }, { "epoch": 0.6000511817500502, "grad_norm": 7.134745064592775, "learning_rate": 3.6402788164957196e-06, "loss": 17.4681, "step": 32827 }, { "epoch": 0.6000694609464968, "grad_norm": 7.077995029858507, "learning_rate": 3.639993963124171e-06, "loss": 17.61, "step": 32828 }, { "epoch": 0.6000877401429433, "grad_norm": 6.983019002618678, "learning_rate": 3.639709114519325e-06, "loss": 17.6325, "step": 32829 }, { "epoch": 0.6001060193393899, "grad_norm": 6.55891821816552, "learning_rate": 3.639424270682178e-06, "loss": 17.2107, "step": 32830 }, { "epoch": 0.6001242985358364, "grad_norm": 5.811452227987757, "learning_rate": 3.639139431613733e-06, "loss": 17.2906, "step": 32831 }, { "epoch": 0.6001425777322829, "grad_norm": 5.828241135499563, "learning_rate": 3.638854597314987e-06, "loss": 17.2045, "step": 32832 }, { "epoch": 0.6001608569287294, "grad_norm": 6.390563791900616, "learning_rate": 3.6385697677869346e-06, "loss": 17.3705, "step": 32833 }, { "epoch": 0.6001791361251759, "grad_norm": 5.832797684454667, "learning_rate": 3.63828494303058e-06, "loss": 16.9542, "step": 32834 }, { "epoch": 0.6001974153216224, "grad_norm": 6.668377480607232, "learning_rate": 3.638000123046917e-06, "loss": 17.7334, "step": 32835 }, { "epoch": 0.600215694518069, "grad_norm": 4.832515185268468, "learning_rate": 3.6377153078369444e-06, "loss": 16.8049, "step": 32836 }, { "epoch": 0.6002339737145155, "grad_norm": 5.741984211658471, "learning_rate": 3.637430497401664e-06, "loss": 17.0943, "step": 32837 }, { "epoch": 0.6002522529109621, "grad_norm": 6.530214791862558, "learning_rate": 3.6371456917420688e-06, "loss": 17.3181, "step": 32838 }, { "epoch": 0.6002705321074085, "grad_norm": 7.156089248504742, "learning_rate": 3.6368608908591617e-06, "loss": 17.6273, "step": 32839 }, { "epoch": 0.600288811303855, "grad_norm": 5.90464538883255, "learning_rate": 3.636576094753938e-06, "loss": 17.2586, "step": 32840 }, { "epoch": 0.6003070905003016, "grad_norm": 6.625088817685598, "learning_rate": 3.636291303427396e-06, "loss": 17.5533, "step": 32841 }, { "epoch": 0.6003253696967481, "grad_norm": 6.717768284513364, "learning_rate": 3.6360065168805364e-06, "loss": 17.6344, "step": 32842 }, { "epoch": 0.6003436488931947, "grad_norm": 9.64210113699081, "learning_rate": 3.635721735114355e-06, "loss": 18.5115, "step": 32843 }, { "epoch": 0.6003619280896412, "grad_norm": 6.023846467591416, "learning_rate": 3.6354369581298497e-06, "loss": 17.3021, "step": 32844 }, { "epoch": 0.6003802072860877, "grad_norm": 5.227714750261226, "learning_rate": 3.6351521859280203e-06, "loss": 17.1606, "step": 32845 }, { "epoch": 0.6003984864825342, "grad_norm": 6.446849756303317, "learning_rate": 3.6348674185098646e-06, "loss": 17.6487, "step": 32846 }, { "epoch": 0.6004167656789807, "grad_norm": 6.920203679905389, "learning_rate": 3.6345826558763786e-06, "loss": 17.622, "step": 32847 }, { "epoch": 0.6004350448754273, "grad_norm": 5.174314026841604, "learning_rate": 3.6342978980285627e-06, "loss": 16.9834, "step": 32848 }, { "epoch": 0.6004533240718738, "grad_norm": 6.400147194778716, "learning_rate": 3.634013144967415e-06, "loss": 17.4759, "step": 32849 }, { "epoch": 0.6004716032683203, "grad_norm": 6.287767708011128, "learning_rate": 3.6337283966939306e-06, "loss": 17.5141, "step": 32850 }, { "epoch": 0.6004898824647669, "grad_norm": 6.834875942862956, "learning_rate": 3.6334436532091118e-06, "loss": 17.7916, "step": 32851 }, { "epoch": 0.6005081616612133, "grad_norm": 5.897343184016148, "learning_rate": 3.633158914513952e-06, "loss": 17.2716, "step": 32852 }, { "epoch": 0.6005264408576599, "grad_norm": 5.259148481045079, "learning_rate": 3.6328741806094532e-06, "loss": 17.0333, "step": 32853 }, { "epoch": 0.6005447200541064, "grad_norm": 6.066078324897582, "learning_rate": 3.6325894514966126e-06, "loss": 17.4907, "step": 32854 }, { "epoch": 0.6005629992505529, "grad_norm": 7.222170475953212, "learning_rate": 3.632304727176425e-06, "loss": 18.0528, "step": 32855 }, { "epoch": 0.6005812784469995, "grad_norm": 7.674219385655178, "learning_rate": 3.6320200076498928e-06, "loss": 17.9886, "step": 32856 }, { "epoch": 0.600599557643446, "grad_norm": 6.22960347442017, "learning_rate": 3.631735292918011e-06, "loss": 17.1701, "step": 32857 }, { "epoch": 0.6006178368398926, "grad_norm": 6.42535954257399, "learning_rate": 3.631450582981777e-06, "loss": 17.376, "step": 32858 }, { "epoch": 0.600636116036339, "grad_norm": 6.43132573332249, "learning_rate": 3.631165877842192e-06, "loss": 17.4724, "step": 32859 }, { "epoch": 0.6006543952327855, "grad_norm": 5.581584820527455, "learning_rate": 3.630881177500252e-06, "loss": 17.0915, "step": 32860 }, { "epoch": 0.6006726744292321, "grad_norm": 6.930385661478232, "learning_rate": 3.6305964819569527e-06, "loss": 17.9592, "step": 32861 }, { "epoch": 0.6006909536256786, "grad_norm": 5.3143330915518305, "learning_rate": 3.6303117912132946e-06, "loss": 17.1155, "step": 32862 }, { "epoch": 0.6007092328221252, "grad_norm": 6.120728680501021, "learning_rate": 3.6300271052702763e-06, "loss": 17.3506, "step": 32863 }, { "epoch": 0.6007275120185717, "grad_norm": 8.661218998156972, "learning_rate": 3.6297424241288916e-06, "loss": 18.8981, "step": 32864 }, { "epoch": 0.6007457912150181, "grad_norm": 7.4345029024079, "learning_rate": 3.6294577477901427e-06, "loss": 17.9334, "step": 32865 }, { "epoch": 0.6007640704114647, "grad_norm": 9.465828995944872, "learning_rate": 3.6291730762550248e-06, "loss": 18.7232, "step": 32866 }, { "epoch": 0.6007823496079112, "grad_norm": 6.335050512114791, "learning_rate": 3.628888409524535e-06, "loss": 17.7041, "step": 32867 }, { "epoch": 0.6008006288043578, "grad_norm": 6.743197321251984, "learning_rate": 3.6286037475996748e-06, "loss": 17.6177, "step": 32868 }, { "epoch": 0.6008189080008043, "grad_norm": 6.31189252821404, "learning_rate": 3.628319090481437e-06, "loss": 17.6932, "step": 32869 }, { "epoch": 0.6008371871972508, "grad_norm": 5.219083427574214, "learning_rate": 3.6280344381708245e-06, "loss": 17.0417, "step": 32870 }, { "epoch": 0.6008554663936974, "grad_norm": 7.865943565072259, "learning_rate": 3.6277497906688308e-06, "loss": 18.0304, "step": 32871 }, { "epoch": 0.6008737455901438, "grad_norm": 5.773951544374042, "learning_rate": 3.6274651479764535e-06, "loss": 17.174, "step": 32872 }, { "epoch": 0.6008920247865904, "grad_norm": 6.305411378166451, "learning_rate": 3.6271805100946945e-06, "loss": 17.4134, "step": 32873 }, { "epoch": 0.6009103039830369, "grad_norm": 5.426524382831086, "learning_rate": 3.626895877024549e-06, "loss": 17.1436, "step": 32874 }, { "epoch": 0.6009285831794834, "grad_norm": 5.993487995816288, "learning_rate": 3.6266112487670115e-06, "loss": 17.1791, "step": 32875 }, { "epoch": 0.60094686237593, "grad_norm": 4.645158236015493, "learning_rate": 3.626326625323083e-06, "loss": 16.8969, "step": 32876 }, { "epoch": 0.6009651415723765, "grad_norm": 6.495158120108559, "learning_rate": 3.6260420066937617e-06, "loss": 17.6662, "step": 32877 }, { "epoch": 0.600983420768823, "grad_norm": 6.067002108469106, "learning_rate": 3.6257573928800428e-06, "loss": 17.2522, "step": 32878 }, { "epoch": 0.6010016999652695, "grad_norm": 6.369222467394156, "learning_rate": 3.6254727838829263e-06, "loss": 17.6431, "step": 32879 }, { "epoch": 0.601019979161716, "grad_norm": 7.077083771587269, "learning_rate": 3.625188179703408e-06, "loss": 17.6715, "step": 32880 }, { "epoch": 0.6010382583581626, "grad_norm": 6.549138422899655, "learning_rate": 3.6249035803424843e-06, "loss": 17.5193, "step": 32881 }, { "epoch": 0.6010565375546091, "grad_norm": 5.909868125597975, "learning_rate": 3.624618985801156e-06, "loss": 17.3521, "step": 32882 }, { "epoch": 0.6010748167510557, "grad_norm": 8.186910026817042, "learning_rate": 3.6243343960804177e-06, "loss": 18.4808, "step": 32883 }, { "epoch": 0.6010930959475022, "grad_norm": 5.0244647809805265, "learning_rate": 3.6240498111812696e-06, "loss": 17.1007, "step": 32884 }, { "epoch": 0.6011113751439486, "grad_norm": 6.929152675980812, "learning_rate": 3.6237652311047057e-06, "loss": 17.4045, "step": 32885 }, { "epoch": 0.6011296543403952, "grad_norm": 6.432881146412631, "learning_rate": 3.623480655851725e-06, "loss": 17.4687, "step": 32886 }, { "epoch": 0.6011479335368417, "grad_norm": 5.270982912184218, "learning_rate": 3.623196085423327e-06, "loss": 17.2096, "step": 32887 }, { "epoch": 0.6011662127332883, "grad_norm": 5.2183449532355635, "learning_rate": 3.6229115198205077e-06, "loss": 17.0692, "step": 32888 }, { "epoch": 0.6011844919297348, "grad_norm": 5.611063290447465, "learning_rate": 3.6226269590442616e-06, "loss": 17.1386, "step": 32889 }, { "epoch": 0.6012027711261813, "grad_norm": 6.549078524718534, "learning_rate": 3.6223424030955887e-06, "loss": 17.7067, "step": 32890 }, { "epoch": 0.6012210503226278, "grad_norm": 6.090942045795058, "learning_rate": 3.6220578519754886e-06, "loss": 17.3499, "step": 32891 }, { "epoch": 0.6012393295190743, "grad_norm": 5.355116021304386, "learning_rate": 3.6217733056849534e-06, "loss": 17.1402, "step": 32892 }, { "epoch": 0.6012576087155209, "grad_norm": 8.323617059322725, "learning_rate": 3.6214887642249854e-06, "loss": 18.1823, "step": 32893 }, { "epoch": 0.6012758879119674, "grad_norm": 5.89596156036758, "learning_rate": 3.6212042275965788e-06, "loss": 17.353, "step": 32894 }, { "epoch": 0.6012941671084139, "grad_norm": 7.0166505425884464, "learning_rate": 3.62091969580073e-06, "loss": 17.6607, "step": 32895 }, { "epoch": 0.6013124463048605, "grad_norm": 5.664421160816294, "learning_rate": 3.62063516883844e-06, "loss": 17.4095, "step": 32896 }, { "epoch": 0.601330725501307, "grad_norm": 6.233252649551751, "learning_rate": 3.6203506467107043e-06, "loss": 17.5308, "step": 32897 }, { "epoch": 0.6013490046977535, "grad_norm": 8.725756260488646, "learning_rate": 3.6200661294185187e-06, "loss": 17.7801, "step": 32898 }, { "epoch": 0.6013672838942, "grad_norm": 6.506968788691464, "learning_rate": 3.619781616962882e-06, "loss": 17.4376, "step": 32899 }, { "epoch": 0.6013855630906465, "grad_norm": 7.025382107877835, "learning_rate": 3.6194971093447897e-06, "loss": 18.084, "step": 32900 }, { "epoch": 0.6014038422870931, "grad_norm": 7.1051756620816064, "learning_rate": 3.6192126065652422e-06, "loss": 17.6954, "step": 32901 }, { "epoch": 0.6014221214835396, "grad_norm": 7.283926324653602, "learning_rate": 3.6189281086252337e-06, "loss": 17.9172, "step": 32902 }, { "epoch": 0.601440400679986, "grad_norm": 6.614634073836125, "learning_rate": 3.618643615525763e-06, "loss": 17.7679, "step": 32903 }, { "epoch": 0.6014586798764326, "grad_norm": 8.143886528734889, "learning_rate": 3.618359127267826e-06, "loss": 17.8857, "step": 32904 }, { "epoch": 0.6014769590728791, "grad_norm": 8.099680266196494, "learning_rate": 3.6180746438524215e-06, "loss": 18.2759, "step": 32905 }, { "epoch": 0.6014952382693257, "grad_norm": 5.839743988719306, "learning_rate": 3.6177901652805438e-06, "loss": 17.2354, "step": 32906 }, { "epoch": 0.6015135174657722, "grad_norm": 6.540871132833064, "learning_rate": 3.6175056915531914e-06, "loss": 17.5683, "step": 32907 }, { "epoch": 0.6015317966622187, "grad_norm": 6.111309718514196, "learning_rate": 3.6172212226713642e-06, "loss": 17.4872, "step": 32908 }, { "epoch": 0.6015500758586653, "grad_norm": 5.8251168936528845, "learning_rate": 3.6169367586360537e-06, "loss": 17.3427, "step": 32909 }, { "epoch": 0.6015683550551117, "grad_norm": 6.154201371550382, "learning_rate": 3.6166522994482624e-06, "loss": 17.1874, "step": 32910 }, { "epoch": 0.6015866342515583, "grad_norm": 7.579309502623134, "learning_rate": 3.616367845108983e-06, "loss": 18.3362, "step": 32911 }, { "epoch": 0.6016049134480048, "grad_norm": 5.973753987542889, "learning_rate": 3.6160833956192133e-06, "loss": 17.1779, "step": 32912 }, { "epoch": 0.6016231926444513, "grad_norm": 6.341533487006646, "learning_rate": 3.6157989509799536e-06, "loss": 17.7878, "step": 32913 }, { "epoch": 0.6016414718408979, "grad_norm": 6.249038656797951, "learning_rate": 3.615514511192196e-06, "loss": 17.1829, "step": 32914 }, { "epoch": 0.6016597510373444, "grad_norm": 5.8787687950557865, "learning_rate": 3.6152300762569428e-06, "loss": 17.3978, "step": 32915 }, { "epoch": 0.601678030233791, "grad_norm": 5.803329891470859, "learning_rate": 3.6149456461751864e-06, "loss": 17.3583, "step": 32916 }, { "epoch": 0.6016963094302374, "grad_norm": 6.387963147376131, "learning_rate": 3.614661220947924e-06, "loss": 17.3027, "step": 32917 }, { "epoch": 0.6017145886266839, "grad_norm": 5.696200058673395, "learning_rate": 3.6143768005761567e-06, "loss": 17.1346, "step": 32918 }, { "epoch": 0.6017328678231305, "grad_norm": 6.250752302936886, "learning_rate": 3.6140923850608776e-06, "loss": 17.3085, "step": 32919 }, { "epoch": 0.601751147019577, "grad_norm": 6.512781412448033, "learning_rate": 3.613807974403083e-06, "loss": 17.3483, "step": 32920 }, { "epoch": 0.6017694262160236, "grad_norm": 5.738404109958453, "learning_rate": 3.6135235686037716e-06, "loss": 17.1028, "step": 32921 }, { "epoch": 0.6017877054124701, "grad_norm": 6.291417227024831, "learning_rate": 3.613239167663941e-06, "loss": 17.4412, "step": 32922 }, { "epoch": 0.6018059846089165, "grad_norm": 6.380200224488896, "learning_rate": 3.612954771584585e-06, "loss": 17.2989, "step": 32923 }, { "epoch": 0.6018242638053631, "grad_norm": 6.002848460579648, "learning_rate": 3.612670380366704e-06, "loss": 17.287, "step": 32924 }, { "epoch": 0.6018425430018096, "grad_norm": 5.678785261927642, "learning_rate": 3.612385994011293e-06, "loss": 17.2234, "step": 32925 }, { "epoch": 0.6018608221982562, "grad_norm": 5.581861944513263, "learning_rate": 3.6121016125193464e-06, "loss": 17.0777, "step": 32926 }, { "epoch": 0.6018791013947027, "grad_norm": 6.208198431590997, "learning_rate": 3.6118172358918654e-06, "loss": 17.5877, "step": 32927 }, { "epoch": 0.6018973805911492, "grad_norm": 6.526985420151998, "learning_rate": 3.611532864129844e-06, "loss": 17.2137, "step": 32928 }, { "epoch": 0.6019156597875958, "grad_norm": 6.421833702131489, "learning_rate": 3.6112484972342786e-06, "loss": 17.4248, "step": 32929 }, { "epoch": 0.6019339389840422, "grad_norm": 5.069476704948242, "learning_rate": 3.610964135206167e-06, "loss": 16.8195, "step": 32930 }, { "epoch": 0.6019522181804888, "grad_norm": 5.666333188586172, "learning_rate": 3.6106797780465048e-06, "loss": 17.1154, "step": 32931 }, { "epoch": 0.6019704973769353, "grad_norm": 8.611160500611879, "learning_rate": 3.610395425756291e-06, "loss": 17.6625, "step": 32932 }, { "epoch": 0.6019887765733818, "grad_norm": 7.043237631318614, "learning_rate": 3.6101110783365212e-06, "loss": 17.5763, "step": 32933 }, { "epoch": 0.6020070557698284, "grad_norm": 7.304639794336724, "learning_rate": 3.6098267357881894e-06, "loss": 17.2957, "step": 32934 }, { "epoch": 0.6020253349662749, "grad_norm": 6.717068996813867, "learning_rate": 3.609542398112295e-06, "loss": 17.5735, "step": 32935 }, { "epoch": 0.6020436141627215, "grad_norm": 7.681752575893184, "learning_rate": 3.609258065309835e-06, "loss": 17.7344, "step": 32936 }, { "epoch": 0.6020618933591679, "grad_norm": 7.136827285960024, "learning_rate": 3.608973737381803e-06, "loss": 17.358, "step": 32937 }, { "epoch": 0.6020801725556144, "grad_norm": 6.657388704941222, "learning_rate": 3.608689414329199e-06, "loss": 17.6155, "step": 32938 }, { "epoch": 0.602098451752061, "grad_norm": 4.569194318178899, "learning_rate": 3.6084050961530172e-06, "loss": 16.8208, "step": 32939 }, { "epoch": 0.6021167309485075, "grad_norm": 7.004910792930403, "learning_rate": 3.608120782854253e-06, "loss": 17.4608, "step": 32940 }, { "epoch": 0.6021350101449541, "grad_norm": 6.201591276791761, "learning_rate": 3.607836474433908e-06, "loss": 17.4231, "step": 32941 }, { "epoch": 0.6021532893414006, "grad_norm": 6.698815712144558, "learning_rate": 3.607552170892974e-06, "loss": 17.5276, "step": 32942 }, { "epoch": 0.602171568537847, "grad_norm": 8.075635765935749, "learning_rate": 3.607267872232447e-06, "loss": 17.8468, "step": 32943 }, { "epoch": 0.6021898477342936, "grad_norm": 6.313660191414244, "learning_rate": 3.606983578453327e-06, "loss": 17.1565, "step": 32944 }, { "epoch": 0.6022081269307401, "grad_norm": 5.066157801188135, "learning_rate": 3.6066992895566074e-06, "loss": 16.8407, "step": 32945 }, { "epoch": 0.6022264061271867, "grad_norm": 6.515243026741462, "learning_rate": 3.606415005543288e-06, "loss": 17.5347, "step": 32946 }, { "epoch": 0.6022446853236332, "grad_norm": 5.786758252727165, "learning_rate": 3.606130726414363e-06, "loss": 17.2651, "step": 32947 }, { "epoch": 0.6022629645200797, "grad_norm": 6.47011111540966, "learning_rate": 3.6058464521708264e-06, "loss": 17.4616, "step": 32948 }, { "epoch": 0.6022812437165262, "grad_norm": 8.548094915209782, "learning_rate": 3.605562182813678e-06, "loss": 17.5756, "step": 32949 }, { "epoch": 0.6022995229129727, "grad_norm": 6.661163573678889, "learning_rate": 3.6052779183439146e-06, "loss": 17.4747, "step": 32950 }, { "epoch": 0.6023178021094193, "grad_norm": 6.225043963738377, "learning_rate": 3.604993658762529e-06, "loss": 17.3896, "step": 32951 }, { "epoch": 0.6023360813058658, "grad_norm": 8.174391147635976, "learning_rate": 3.6047094040705216e-06, "loss": 17.5859, "step": 32952 }, { "epoch": 0.6023543605023123, "grad_norm": 7.405791977496252, "learning_rate": 3.6044251542688857e-06, "loss": 17.6909, "step": 32953 }, { "epoch": 0.6023726396987589, "grad_norm": 5.962314649776127, "learning_rate": 3.6041409093586172e-06, "loss": 16.9275, "step": 32954 }, { "epoch": 0.6023909188952054, "grad_norm": 5.578480711573574, "learning_rate": 3.6038566693407162e-06, "loss": 17.043, "step": 32955 }, { "epoch": 0.6024091980916519, "grad_norm": 5.658194422213345, "learning_rate": 3.6035724342161758e-06, "loss": 17.1492, "step": 32956 }, { "epoch": 0.6024274772880984, "grad_norm": 6.4720026150493055, "learning_rate": 3.603288203985991e-06, "loss": 17.6806, "step": 32957 }, { "epoch": 0.6024457564845449, "grad_norm": 5.714583504088967, "learning_rate": 3.603003978651161e-06, "loss": 17.3261, "step": 32958 }, { "epoch": 0.6024640356809915, "grad_norm": 6.354416916064365, "learning_rate": 3.60271975821268e-06, "loss": 17.3892, "step": 32959 }, { "epoch": 0.602482314877438, "grad_norm": 6.723566429214893, "learning_rate": 3.6024355426715468e-06, "loss": 17.8314, "step": 32960 }, { "epoch": 0.6025005940738846, "grad_norm": 7.3701122748979095, "learning_rate": 3.602151332028756e-06, "loss": 17.7093, "step": 32961 }, { "epoch": 0.602518873270331, "grad_norm": 6.279120395969271, "learning_rate": 3.601867126285301e-06, "loss": 17.4853, "step": 32962 }, { "epoch": 0.6025371524667775, "grad_norm": 6.35012432200991, "learning_rate": 3.601582925442182e-06, "loss": 17.272, "step": 32963 }, { "epoch": 0.6025554316632241, "grad_norm": 6.095758813410393, "learning_rate": 3.601298729500394e-06, "loss": 17.2311, "step": 32964 }, { "epoch": 0.6025737108596706, "grad_norm": 5.859332270400636, "learning_rate": 3.6010145384609307e-06, "loss": 17.2914, "step": 32965 }, { "epoch": 0.6025919900561172, "grad_norm": 6.22012844372997, "learning_rate": 3.6007303523247923e-06, "loss": 17.1658, "step": 32966 }, { "epoch": 0.6026102692525637, "grad_norm": 7.186222877502611, "learning_rate": 3.600446171092971e-06, "loss": 17.705, "step": 32967 }, { "epoch": 0.6026285484490101, "grad_norm": 6.290648845812573, "learning_rate": 3.6001619947664634e-06, "loss": 17.8445, "step": 32968 }, { "epoch": 0.6026468276454567, "grad_norm": 5.569565373507982, "learning_rate": 3.5998778233462694e-06, "loss": 17.0006, "step": 32969 }, { "epoch": 0.6026651068419032, "grad_norm": 5.677392638720858, "learning_rate": 3.5995936568333806e-06, "loss": 17.0929, "step": 32970 }, { "epoch": 0.6026833860383497, "grad_norm": 7.106092327265382, "learning_rate": 3.5993094952287944e-06, "loss": 17.9944, "step": 32971 }, { "epoch": 0.6027016652347963, "grad_norm": 5.757147496735426, "learning_rate": 3.599025338533507e-06, "loss": 16.998, "step": 32972 }, { "epoch": 0.6027199444312428, "grad_norm": 5.879038490493766, "learning_rate": 3.5987411867485155e-06, "loss": 17.2709, "step": 32973 }, { "epoch": 0.6027382236276894, "grad_norm": 5.990326318208498, "learning_rate": 3.598457039874812e-06, "loss": 17.3381, "step": 32974 }, { "epoch": 0.6027565028241358, "grad_norm": 7.241556215314411, "learning_rate": 3.5981728979133967e-06, "loss": 17.8835, "step": 32975 }, { "epoch": 0.6027747820205823, "grad_norm": 5.96752887063776, "learning_rate": 3.5978887608652634e-06, "loss": 17.4679, "step": 32976 }, { "epoch": 0.6027930612170289, "grad_norm": 6.894064298408354, "learning_rate": 3.5976046287314082e-06, "loss": 17.4681, "step": 32977 }, { "epoch": 0.6028113404134754, "grad_norm": 5.290013508093399, "learning_rate": 3.5973205015128276e-06, "loss": 17.1155, "step": 32978 }, { "epoch": 0.602829619609922, "grad_norm": 7.179434494326087, "learning_rate": 3.5970363792105156e-06, "loss": 17.6056, "step": 32979 }, { "epoch": 0.6028478988063685, "grad_norm": 6.090219910903687, "learning_rate": 3.5967522618254704e-06, "loss": 17.263, "step": 32980 }, { "epoch": 0.602866178002815, "grad_norm": 7.712787439787644, "learning_rate": 3.5964681493586873e-06, "loss": 17.3563, "step": 32981 }, { "epoch": 0.6028844571992615, "grad_norm": 4.938304171906754, "learning_rate": 3.59618404181116e-06, "loss": 17.038, "step": 32982 }, { "epoch": 0.602902736395708, "grad_norm": 6.365843493741785, "learning_rate": 3.595899939183888e-06, "loss": 17.4732, "step": 32983 }, { "epoch": 0.6029210155921546, "grad_norm": 5.684005184711374, "learning_rate": 3.595615841477863e-06, "loss": 17.1244, "step": 32984 }, { "epoch": 0.6029392947886011, "grad_norm": 5.542617469093631, "learning_rate": 3.595331748694082e-06, "loss": 17.0314, "step": 32985 }, { "epoch": 0.6029575739850476, "grad_norm": 10.13147658979154, "learning_rate": 3.595047660833544e-06, "loss": 18.4792, "step": 32986 }, { "epoch": 0.6029758531814942, "grad_norm": 5.600159982386374, "learning_rate": 3.594763577897241e-06, "loss": 16.9748, "step": 32987 }, { "epoch": 0.6029941323779406, "grad_norm": 6.1342555505230685, "learning_rate": 3.5944794998861683e-06, "loss": 17.1902, "step": 32988 }, { "epoch": 0.6030124115743872, "grad_norm": 6.43723382546147, "learning_rate": 3.5941954268013236e-06, "loss": 17.1027, "step": 32989 }, { "epoch": 0.6030306907708337, "grad_norm": 5.52390979521668, "learning_rate": 3.5939113586437013e-06, "loss": 17.0287, "step": 32990 }, { "epoch": 0.6030489699672802, "grad_norm": 7.048868780514833, "learning_rate": 3.5936272954142993e-06, "loss": 17.8022, "step": 32991 }, { "epoch": 0.6030672491637268, "grad_norm": 7.928346597099636, "learning_rate": 3.593343237114112e-06, "loss": 18.2517, "step": 32992 }, { "epoch": 0.6030855283601733, "grad_norm": 7.254356891118346, "learning_rate": 3.5930591837441326e-06, "loss": 18.1025, "step": 32993 }, { "epoch": 0.6031038075566199, "grad_norm": 8.324885716867685, "learning_rate": 3.59277513530536e-06, "loss": 18.1675, "step": 32994 }, { "epoch": 0.6031220867530663, "grad_norm": 7.903405370927547, "learning_rate": 3.592491091798789e-06, "loss": 18.3013, "step": 32995 }, { "epoch": 0.6031403659495128, "grad_norm": 5.892289781691864, "learning_rate": 3.5922070532254117e-06, "loss": 17.1862, "step": 32996 }, { "epoch": 0.6031586451459594, "grad_norm": 6.069546582117895, "learning_rate": 3.59192301958623e-06, "loss": 17.1847, "step": 32997 }, { "epoch": 0.6031769243424059, "grad_norm": 8.19245904302049, "learning_rate": 3.5916389908822338e-06, "loss": 17.7766, "step": 32998 }, { "epoch": 0.6031952035388525, "grad_norm": 5.2388954332810656, "learning_rate": 3.59135496711442e-06, "loss": 16.9467, "step": 32999 }, { "epoch": 0.603213482735299, "grad_norm": 4.938779694514896, "learning_rate": 3.591070948283787e-06, "loss": 16.7324, "step": 33000 }, { "epoch": 0.6032317619317454, "grad_norm": 5.863208128795423, "learning_rate": 3.590786934391328e-06, "loss": 17.3011, "step": 33001 }, { "epoch": 0.603250041128192, "grad_norm": 5.74898212243698, "learning_rate": 3.590502925438037e-06, "loss": 17.0751, "step": 33002 }, { "epoch": 0.6032683203246385, "grad_norm": 6.194982436015145, "learning_rate": 3.590218921424912e-06, "loss": 17.3209, "step": 33003 }, { "epoch": 0.6032865995210851, "grad_norm": 4.758694914723114, "learning_rate": 3.589934922352947e-06, "loss": 16.8842, "step": 33004 }, { "epoch": 0.6033048787175316, "grad_norm": 6.971411274017073, "learning_rate": 3.589650928223137e-06, "loss": 17.5251, "step": 33005 }, { "epoch": 0.6033231579139781, "grad_norm": 6.319287761612449, "learning_rate": 3.5893669390364795e-06, "loss": 17.5914, "step": 33006 }, { "epoch": 0.6033414371104246, "grad_norm": 7.119891642488658, "learning_rate": 3.5890829547939666e-06, "loss": 17.6911, "step": 33007 }, { "epoch": 0.6033597163068711, "grad_norm": 6.02593722519298, "learning_rate": 3.588798975496597e-06, "loss": 17.3538, "step": 33008 }, { "epoch": 0.6033779955033177, "grad_norm": 5.8895187559455415, "learning_rate": 3.588515001145365e-06, "loss": 17.2206, "step": 33009 }, { "epoch": 0.6033962746997642, "grad_norm": 8.024671903547764, "learning_rate": 3.5882310317412633e-06, "loss": 18.0909, "step": 33010 }, { "epoch": 0.6034145538962107, "grad_norm": 7.12403001891818, "learning_rate": 3.587947067285292e-06, "loss": 17.9159, "step": 33011 }, { "epoch": 0.6034328330926573, "grad_norm": 6.303649435664245, "learning_rate": 3.587663107778442e-06, "loss": 17.5182, "step": 33012 }, { "epoch": 0.6034511122891038, "grad_norm": 5.957274278007, "learning_rate": 3.5873791532217094e-06, "loss": 17.2856, "step": 33013 }, { "epoch": 0.6034693914855503, "grad_norm": 5.472982543424433, "learning_rate": 3.5870952036160924e-06, "loss": 17.1436, "step": 33014 }, { "epoch": 0.6034876706819968, "grad_norm": 6.720235348454228, "learning_rate": 3.586811258962584e-06, "loss": 17.6647, "step": 33015 }, { "epoch": 0.6035059498784433, "grad_norm": 7.576566449810003, "learning_rate": 3.5865273192621774e-06, "loss": 17.5014, "step": 33016 }, { "epoch": 0.6035242290748899, "grad_norm": 7.266338833995647, "learning_rate": 3.5862433845158718e-06, "loss": 18.2622, "step": 33017 }, { "epoch": 0.6035425082713364, "grad_norm": 7.219072900706809, "learning_rate": 3.5859594547246606e-06, "loss": 17.6796, "step": 33018 }, { "epoch": 0.603560787467783, "grad_norm": 5.3161421310132635, "learning_rate": 3.585675529889537e-06, "loss": 16.9993, "step": 33019 }, { "epoch": 0.6035790666642294, "grad_norm": 6.552315370399087, "learning_rate": 3.5853916100115e-06, "loss": 17.5827, "step": 33020 }, { "epoch": 0.6035973458606759, "grad_norm": 6.49735568233114, "learning_rate": 3.5851076950915404e-06, "loss": 17.706, "step": 33021 }, { "epoch": 0.6036156250571225, "grad_norm": 6.7247632142268365, "learning_rate": 3.5848237851306567e-06, "loss": 17.4588, "step": 33022 }, { "epoch": 0.603633904253569, "grad_norm": 6.629773989994709, "learning_rate": 3.5845398801298438e-06, "loss": 17.719, "step": 33023 }, { "epoch": 0.6036521834500156, "grad_norm": 5.202533323203471, "learning_rate": 3.584255980090094e-06, "loss": 17.0041, "step": 33024 }, { "epoch": 0.6036704626464621, "grad_norm": 6.003246037088517, "learning_rate": 3.583972085012406e-06, "loss": 17.3627, "step": 33025 }, { "epoch": 0.6036887418429085, "grad_norm": 6.109161639575275, "learning_rate": 3.583688194897772e-06, "loss": 17.448, "step": 33026 }, { "epoch": 0.6037070210393551, "grad_norm": 6.21950928719951, "learning_rate": 3.583404309747187e-06, "loss": 17.6623, "step": 33027 }, { "epoch": 0.6037253002358016, "grad_norm": 5.640377842657661, "learning_rate": 3.5831204295616484e-06, "loss": 17.1747, "step": 33028 }, { "epoch": 0.6037435794322482, "grad_norm": 8.233010135258946, "learning_rate": 3.5828365543421497e-06, "loss": 18.1039, "step": 33029 }, { "epoch": 0.6037618586286947, "grad_norm": 7.770621809609157, "learning_rate": 3.5825526840896852e-06, "loss": 17.6708, "step": 33030 }, { "epoch": 0.6037801378251412, "grad_norm": 6.435488099246759, "learning_rate": 3.5822688188052507e-06, "loss": 17.6532, "step": 33031 }, { "epoch": 0.6037984170215878, "grad_norm": 6.752858986541687, "learning_rate": 3.5819849584898426e-06, "loss": 17.5882, "step": 33032 }, { "epoch": 0.6038166962180342, "grad_norm": 6.8979312758571885, "learning_rate": 3.5817011031444515e-06, "loss": 17.4594, "step": 33033 }, { "epoch": 0.6038349754144808, "grad_norm": 5.796560466368222, "learning_rate": 3.581417252770076e-06, "loss": 17.2123, "step": 33034 }, { "epoch": 0.6038532546109273, "grad_norm": 5.833705436429722, "learning_rate": 3.5811334073677106e-06, "loss": 17.4107, "step": 33035 }, { "epoch": 0.6038715338073738, "grad_norm": 5.41545944228597, "learning_rate": 3.580849566938348e-06, "loss": 16.9876, "step": 33036 }, { "epoch": 0.6038898130038204, "grad_norm": 5.205289899771492, "learning_rate": 3.5805657314829866e-06, "loss": 16.9876, "step": 33037 }, { "epoch": 0.6039080922002669, "grad_norm": 6.291795071991938, "learning_rate": 3.580281901002617e-06, "loss": 17.5563, "step": 33038 }, { "epoch": 0.6039263713967133, "grad_norm": 5.604817927997312, "learning_rate": 3.5799980754982366e-06, "loss": 17.3516, "step": 33039 }, { "epoch": 0.6039446505931599, "grad_norm": 6.215734609973325, "learning_rate": 3.579714254970842e-06, "loss": 17.1822, "step": 33040 }, { "epoch": 0.6039629297896064, "grad_norm": 6.508430823715772, "learning_rate": 3.5794304394214226e-06, "loss": 17.5795, "step": 33041 }, { "epoch": 0.603981208986053, "grad_norm": 4.525591228700957, "learning_rate": 3.579146628850978e-06, "loss": 16.767, "step": 33042 }, { "epoch": 0.6039994881824995, "grad_norm": 5.899205315391795, "learning_rate": 3.5788628232605013e-06, "loss": 17.1592, "step": 33043 }, { "epoch": 0.604017767378946, "grad_norm": 7.385104598574306, "learning_rate": 3.5785790226509853e-06, "loss": 17.8888, "step": 33044 }, { "epoch": 0.6040360465753926, "grad_norm": 8.255281561661304, "learning_rate": 3.5782952270234284e-06, "loss": 17.6317, "step": 33045 }, { "epoch": 0.604054325771839, "grad_norm": 8.522544493007377, "learning_rate": 3.578011436378824e-06, "loss": 18.0668, "step": 33046 }, { "epoch": 0.6040726049682856, "grad_norm": 6.390952928226538, "learning_rate": 3.5777276507181634e-06, "loss": 17.5273, "step": 33047 }, { "epoch": 0.6040908841647321, "grad_norm": 5.940898916917263, "learning_rate": 3.577443870042445e-06, "loss": 17.2301, "step": 33048 }, { "epoch": 0.6041091633611786, "grad_norm": 6.126319995370314, "learning_rate": 3.577160094352664e-06, "loss": 17.4645, "step": 33049 }, { "epoch": 0.6041274425576252, "grad_norm": 5.746278010425811, "learning_rate": 3.5768763236498117e-06, "loss": 17.0564, "step": 33050 }, { "epoch": 0.6041457217540717, "grad_norm": 5.22505543103134, "learning_rate": 3.5765925579348853e-06, "loss": 16.8077, "step": 33051 }, { "epoch": 0.6041640009505183, "grad_norm": 5.852022053783308, "learning_rate": 3.576308797208877e-06, "loss": 16.9024, "step": 33052 }, { "epoch": 0.6041822801469647, "grad_norm": 6.527878335614802, "learning_rate": 3.5760250414727835e-06, "loss": 17.618, "step": 33053 }, { "epoch": 0.6042005593434112, "grad_norm": 7.7738196217923, "learning_rate": 3.5757412907276e-06, "loss": 18.366, "step": 33054 }, { "epoch": 0.6042188385398578, "grad_norm": 5.786800482988848, "learning_rate": 3.575457544974318e-06, "loss": 17.1183, "step": 33055 }, { "epoch": 0.6042371177363043, "grad_norm": 7.003656549555367, "learning_rate": 3.575173804213935e-06, "loss": 17.7717, "step": 33056 }, { "epoch": 0.6042553969327509, "grad_norm": 6.190005845602253, "learning_rate": 3.574890068447444e-06, "loss": 17.4305, "step": 33057 }, { "epoch": 0.6042736761291974, "grad_norm": 5.892524765212246, "learning_rate": 3.574606337675838e-06, "loss": 17.3111, "step": 33058 }, { "epoch": 0.6042919553256438, "grad_norm": 6.119932187686222, "learning_rate": 3.574322611900115e-06, "loss": 17.3255, "step": 33059 }, { "epoch": 0.6043102345220904, "grad_norm": 6.792050442169503, "learning_rate": 3.5740388911212677e-06, "loss": 17.5249, "step": 33060 }, { "epoch": 0.6043285137185369, "grad_norm": 4.718168836498955, "learning_rate": 3.573755175340289e-06, "loss": 16.7894, "step": 33061 }, { "epoch": 0.6043467929149835, "grad_norm": 6.639953455809377, "learning_rate": 3.573471464558175e-06, "loss": 17.274, "step": 33062 }, { "epoch": 0.60436507211143, "grad_norm": 7.481056371170024, "learning_rate": 3.5731877587759205e-06, "loss": 17.6424, "step": 33063 }, { "epoch": 0.6043833513078765, "grad_norm": 6.376139753238013, "learning_rate": 3.5729040579945174e-06, "loss": 17.2413, "step": 33064 }, { "epoch": 0.604401630504323, "grad_norm": 5.944898657300646, "learning_rate": 3.5726203622149637e-06, "loss": 17.3276, "step": 33065 }, { "epoch": 0.6044199097007695, "grad_norm": 5.335381328628311, "learning_rate": 3.5723366714382513e-06, "loss": 17.1919, "step": 33066 }, { "epoch": 0.6044381888972161, "grad_norm": 5.887545623296836, "learning_rate": 3.572052985665373e-06, "loss": 17.1987, "step": 33067 }, { "epoch": 0.6044564680936626, "grad_norm": 6.8644045649850005, "learning_rate": 3.571769304897328e-06, "loss": 17.4962, "step": 33068 }, { "epoch": 0.6044747472901091, "grad_norm": 6.246622405586515, "learning_rate": 3.5714856291351046e-06, "loss": 17.2688, "step": 33069 }, { "epoch": 0.6044930264865557, "grad_norm": 5.986845839443706, "learning_rate": 3.571201958379703e-06, "loss": 17.2597, "step": 33070 }, { "epoch": 0.6045113056830022, "grad_norm": 6.508207576862655, "learning_rate": 3.570918292632113e-06, "loss": 17.5226, "step": 33071 }, { "epoch": 0.6045295848794487, "grad_norm": 6.910338906856182, "learning_rate": 3.5706346318933293e-06, "loss": 17.6071, "step": 33072 }, { "epoch": 0.6045478640758952, "grad_norm": 6.589597507699896, "learning_rate": 3.5703509761643496e-06, "loss": 17.208, "step": 33073 }, { "epoch": 0.6045661432723417, "grad_norm": 5.883425218026645, "learning_rate": 3.570067325446166e-06, "loss": 17.0817, "step": 33074 }, { "epoch": 0.6045844224687883, "grad_norm": 6.483141969909838, "learning_rate": 3.56978367973977e-06, "loss": 17.3174, "step": 33075 }, { "epoch": 0.6046027016652348, "grad_norm": 9.89751433880206, "learning_rate": 3.5695000390461588e-06, "loss": 18.3658, "step": 33076 }, { "epoch": 0.6046209808616814, "grad_norm": 5.757155144553383, "learning_rate": 3.569216403366327e-06, "loss": 17.0554, "step": 33077 }, { "epoch": 0.6046392600581278, "grad_norm": 5.805598508965178, "learning_rate": 3.568932772701266e-06, "loss": 17.0974, "step": 33078 }, { "epoch": 0.6046575392545743, "grad_norm": 6.543980133408413, "learning_rate": 3.5686491470519724e-06, "loss": 17.3734, "step": 33079 }, { "epoch": 0.6046758184510209, "grad_norm": 6.718587351300907, "learning_rate": 3.5683655264194394e-06, "loss": 17.4526, "step": 33080 }, { "epoch": 0.6046940976474674, "grad_norm": 8.986123736323352, "learning_rate": 3.5680819108046593e-06, "loss": 18.2286, "step": 33081 }, { "epoch": 0.604712376843914, "grad_norm": 6.454665159957936, "learning_rate": 3.5677983002086303e-06, "loss": 17.4879, "step": 33082 }, { "epoch": 0.6047306560403605, "grad_norm": 5.48534498757675, "learning_rate": 3.5675146946323417e-06, "loss": 16.9606, "step": 33083 }, { "epoch": 0.604748935236807, "grad_norm": 5.698959159654056, "learning_rate": 3.567231094076792e-06, "loss": 17.1509, "step": 33084 }, { "epoch": 0.6047672144332535, "grad_norm": 6.568119792917199, "learning_rate": 3.566947498542972e-06, "loss": 17.3659, "step": 33085 }, { "epoch": 0.6047854936297, "grad_norm": 7.114754588868641, "learning_rate": 3.5666639080318756e-06, "loss": 17.5092, "step": 33086 }, { "epoch": 0.6048037728261466, "grad_norm": 5.471549068571534, "learning_rate": 3.5663803225444993e-06, "loss": 17.1153, "step": 33087 }, { "epoch": 0.6048220520225931, "grad_norm": 6.562598483464331, "learning_rate": 3.566096742081836e-06, "loss": 17.5046, "step": 33088 }, { "epoch": 0.6048403312190396, "grad_norm": 6.640655261347747, "learning_rate": 3.5658131666448774e-06, "loss": 17.3626, "step": 33089 }, { "epoch": 0.6048586104154862, "grad_norm": 5.28909167238077, "learning_rate": 3.5655295962346203e-06, "loss": 16.9722, "step": 33090 }, { "epoch": 0.6048768896119326, "grad_norm": 7.39919259832745, "learning_rate": 3.565246030852058e-06, "loss": 17.5087, "step": 33091 }, { "epoch": 0.6048951688083792, "grad_norm": 5.910884243201573, "learning_rate": 3.564962470498182e-06, "loss": 17.2808, "step": 33092 }, { "epoch": 0.6049134480048257, "grad_norm": 6.073217688399557, "learning_rate": 3.56467891517399e-06, "loss": 17.4872, "step": 33093 }, { "epoch": 0.6049317272012722, "grad_norm": 9.007386029735201, "learning_rate": 3.5643953648804728e-06, "loss": 18.3857, "step": 33094 }, { "epoch": 0.6049500063977188, "grad_norm": 6.288783065385915, "learning_rate": 3.5641118196186247e-06, "loss": 17.5271, "step": 33095 }, { "epoch": 0.6049682855941653, "grad_norm": 5.997136368542232, "learning_rate": 3.563828279389442e-06, "loss": 17.384, "step": 33096 }, { "epoch": 0.6049865647906119, "grad_norm": 6.063210317851364, "learning_rate": 3.5635447441939154e-06, "loss": 17.6878, "step": 33097 }, { "epoch": 0.6050048439870583, "grad_norm": 6.585931394292596, "learning_rate": 3.5632612140330393e-06, "loss": 17.4899, "step": 33098 }, { "epoch": 0.6050231231835048, "grad_norm": 5.596312908117137, "learning_rate": 3.5629776889078084e-06, "loss": 16.9724, "step": 33099 }, { "epoch": 0.6050414023799514, "grad_norm": 5.950806373734356, "learning_rate": 3.5626941688192153e-06, "loss": 17.3878, "step": 33100 }, { "epoch": 0.6050596815763979, "grad_norm": 6.700431613905985, "learning_rate": 3.5624106537682555e-06, "loss": 17.4987, "step": 33101 }, { "epoch": 0.6050779607728445, "grad_norm": 7.598491470305341, "learning_rate": 3.562127143755922e-06, "loss": 17.9665, "step": 33102 }, { "epoch": 0.605096239969291, "grad_norm": 6.041384229686083, "learning_rate": 3.561843638783206e-06, "loss": 17.2106, "step": 33103 }, { "epoch": 0.6051145191657374, "grad_norm": 6.805258359921933, "learning_rate": 3.5615601388511054e-06, "loss": 17.6481, "step": 33104 }, { "epoch": 0.605132798362184, "grad_norm": 8.041945199432915, "learning_rate": 3.561276643960612e-06, "loss": 18.0715, "step": 33105 }, { "epoch": 0.6051510775586305, "grad_norm": 5.7349121042231594, "learning_rate": 3.5609931541127175e-06, "loss": 17.3391, "step": 33106 }, { "epoch": 0.605169356755077, "grad_norm": 6.333647445842324, "learning_rate": 3.5607096693084177e-06, "loss": 17.5953, "step": 33107 }, { "epoch": 0.6051876359515236, "grad_norm": 7.329898925151455, "learning_rate": 3.5604261895487057e-06, "loss": 17.7159, "step": 33108 }, { "epoch": 0.6052059151479701, "grad_norm": 6.2015974630822726, "learning_rate": 3.560142714834574e-06, "loss": 17.5975, "step": 33109 }, { "epoch": 0.6052241943444167, "grad_norm": 6.2189533434454445, "learning_rate": 3.5598592451670187e-06, "loss": 17.4507, "step": 33110 }, { "epoch": 0.6052424735408631, "grad_norm": 14.363030343571149, "learning_rate": 3.5595757805470317e-06, "loss": 17.1739, "step": 33111 }, { "epoch": 0.6052607527373096, "grad_norm": 6.104242843310178, "learning_rate": 3.5592923209756046e-06, "loss": 17.5429, "step": 33112 }, { "epoch": 0.6052790319337562, "grad_norm": 6.211276858514107, "learning_rate": 3.559008866453735e-06, "loss": 17.4567, "step": 33113 }, { "epoch": 0.6052973111302027, "grad_norm": 7.112713789168579, "learning_rate": 3.5587254169824127e-06, "loss": 17.741, "step": 33114 }, { "epoch": 0.6053155903266493, "grad_norm": 6.245865515041567, "learning_rate": 3.5584419725626347e-06, "loss": 17.5431, "step": 33115 }, { "epoch": 0.6053338695230958, "grad_norm": 5.910653019596408, "learning_rate": 3.5581585331953918e-06, "loss": 17.222, "step": 33116 }, { "epoch": 0.6053521487195422, "grad_norm": 7.0455406183787135, "learning_rate": 3.5578750988816757e-06, "loss": 17.7313, "step": 33117 }, { "epoch": 0.6053704279159888, "grad_norm": 6.977257842369775, "learning_rate": 3.5575916696224856e-06, "loss": 17.3682, "step": 33118 }, { "epoch": 0.6053887071124353, "grad_norm": 6.693368316758092, "learning_rate": 3.557308245418811e-06, "loss": 17.4471, "step": 33119 }, { "epoch": 0.6054069863088819, "grad_norm": 5.7610644131911215, "learning_rate": 3.5570248262716444e-06, "loss": 17.1369, "step": 33120 }, { "epoch": 0.6054252655053284, "grad_norm": 8.385289871495107, "learning_rate": 3.556741412181981e-06, "loss": 18.224, "step": 33121 }, { "epoch": 0.6054435447017749, "grad_norm": 5.7858620383142645, "learning_rate": 3.5564580031508146e-06, "loss": 16.9365, "step": 33122 }, { "epoch": 0.6054618238982215, "grad_norm": 7.441955166607329, "learning_rate": 3.556174599179136e-06, "loss": 17.6277, "step": 33123 }, { "epoch": 0.6054801030946679, "grad_norm": 8.920112472363783, "learning_rate": 3.555891200267942e-06, "loss": 17.6703, "step": 33124 }, { "epoch": 0.6054983822911145, "grad_norm": 5.340291582569287, "learning_rate": 3.555607806418223e-06, "loss": 17.1457, "step": 33125 }, { "epoch": 0.605516661487561, "grad_norm": 5.409219749087977, "learning_rate": 3.555324417630972e-06, "loss": 17.2291, "step": 33126 }, { "epoch": 0.6055349406840075, "grad_norm": 6.594275416274435, "learning_rate": 3.555041033907186e-06, "loss": 17.2465, "step": 33127 }, { "epoch": 0.6055532198804541, "grad_norm": 6.58247512323975, "learning_rate": 3.5547576552478533e-06, "loss": 17.4144, "step": 33128 }, { "epoch": 0.6055714990769006, "grad_norm": 7.2480803211044, "learning_rate": 3.5544742816539723e-06, "loss": 17.8151, "step": 33129 }, { "epoch": 0.6055897782733471, "grad_norm": 5.619971299682602, "learning_rate": 3.5541909131265325e-06, "loss": 17.1627, "step": 33130 }, { "epoch": 0.6056080574697936, "grad_norm": 5.179542611188283, "learning_rate": 3.5539075496665266e-06, "loss": 16.9289, "step": 33131 }, { "epoch": 0.6056263366662401, "grad_norm": 6.822893881357869, "learning_rate": 3.5536241912749515e-06, "loss": 17.7473, "step": 33132 }, { "epoch": 0.6056446158626867, "grad_norm": 5.572723948674215, "learning_rate": 3.553340837952798e-06, "loss": 17.2557, "step": 33133 }, { "epoch": 0.6056628950591332, "grad_norm": 7.09837323383666, "learning_rate": 3.553057489701057e-06, "loss": 17.4669, "step": 33134 }, { "epoch": 0.6056811742555798, "grad_norm": 5.945918513065621, "learning_rate": 3.5527741465207255e-06, "loss": 17.4044, "step": 33135 }, { "epoch": 0.6056994534520262, "grad_norm": 4.241953900195768, "learning_rate": 3.5524908084127956e-06, "loss": 16.603, "step": 33136 }, { "epoch": 0.6057177326484727, "grad_norm": 6.363469129170825, "learning_rate": 3.552207475378258e-06, "loss": 17.4472, "step": 33137 }, { "epoch": 0.6057360118449193, "grad_norm": 6.312310590824995, "learning_rate": 3.5519241474181097e-06, "loss": 17.0762, "step": 33138 }, { "epoch": 0.6057542910413658, "grad_norm": 5.7027622011893, "learning_rate": 3.5516408245333405e-06, "loss": 17.2165, "step": 33139 }, { "epoch": 0.6057725702378124, "grad_norm": 5.575829937513563, "learning_rate": 3.5513575067249435e-06, "loss": 17.0891, "step": 33140 }, { "epoch": 0.6057908494342589, "grad_norm": 6.161202983890385, "learning_rate": 3.551074193993915e-06, "loss": 17.2603, "step": 33141 }, { "epoch": 0.6058091286307054, "grad_norm": 6.905207676569673, "learning_rate": 3.5507908863412456e-06, "loss": 17.784, "step": 33142 }, { "epoch": 0.6058274078271519, "grad_norm": 6.049651632153991, "learning_rate": 3.550507583767926e-06, "loss": 17.4289, "step": 33143 }, { "epoch": 0.6058456870235984, "grad_norm": 6.502950063816999, "learning_rate": 3.5502242862749525e-06, "loss": 17.7209, "step": 33144 }, { "epoch": 0.605863966220045, "grad_norm": 7.208600412158953, "learning_rate": 3.5499409938633168e-06, "loss": 17.9104, "step": 33145 }, { "epoch": 0.6058822454164915, "grad_norm": 5.451908379439528, "learning_rate": 3.5496577065340134e-06, "loss": 17.0531, "step": 33146 }, { "epoch": 0.605900524612938, "grad_norm": 5.575228285097219, "learning_rate": 3.5493744242880342e-06, "loss": 17.0846, "step": 33147 }, { "epoch": 0.6059188038093846, "grad_norm": 6.640170693565726, "learning_rate": 3.5490911471263695e-06, "loss": 17.3871, "step": 33148 }, { "epoch": 0.605937083005831, "grad_norm": 6.17270708639084, "learning_rate": 3.5488078750500153e-06, "loss": 17.3657, "step": 33149 }, { "epoch": 0.6059553622022776, "grad_norm": 7.473350339212035, "learning_rate": 3.5485246080599656e-06, "loss": 17.4841, "step": 33150 }, { "epoch": 0.6059736413987241, "grad_norm": 5.775613713750823, "learning_rate": 3.5482413461572085e-06, "loss": 17.2169, "step": 33151 }, { "epoch": 0.6059919205951706, "grad_norm": 6.5325006728198955, "learning_rate": 3.5479580893427414e-06, "loss": 17.5503, "step": 33152 }, { "epoch": 0.6060101997916172, "grad_norm": 6.886895779358805, "learning_rate": 3.547674837617554e-06, "loss": 17.6427, "step": 33153 }, { "epoch": 0.6060284789880637, "grad_norm": 6.433103796970675, "learning_rate": 3.5473915909826394e-06, "loss": 17.5922, "step": 33154 }, { "epoch": 0.6060467581845103, "grad_norm": 7.365693981476474, "learning_rate": 3.5471083494389935e-06, "loss": 17.6794, "step": 33155 }, { "epoch": 0.6060650373809567, "grad_norm": 6.5620082410217755, "learning_rate": 3.546825112987606e-06, "loss": 17.4139, "step": 33156 }, { "epoch": 0.6060833165774032, "grad_norm": 6.944629130607612, "learning_rate": 3.5465418816294696e-06, "loss": 17.6606, "step": 33157 }, { "epoch": 0.6061015957738498, "grad_norm": 6.448443163680839, "learning_rate": 3.546258655365578e-06, "loss": 17.1559, "step": 33158 }, { "epoch": 0.6061198749702963, "grad_norm": 5.295248052503369, "learning_rate": 3.5459754341969234e-06, "loss": 17.1449, "step": 33159 }, { "epoch": 0.6061381541667429, "grad_norm": 6.0346611181838945, "learning_rate": 3.5456922181245e-06, "loss": 17.4329, "step": 33160 }, { "epoch": 0.6061564333631894, "grad_norm": 6.85855388695592, "learning_rate": 3.5454090071492986e-06, "loss": 17.5309, "step": 33161 }, { "epoch": 0.6061747125596358, "grad_norm": 5.568919141330256, "learning_rate": 3.545125801272312e-06, "loss": 17.0956, "step": 33162 }, { "epoch": 0.6061929917560824, "grad_norm": 7.207076262187338, "learning_rate": 3.5448426004945337e-06, "loss": 18.1943, "step": 33163 }, { "epoch": 0.6062112709525289, "grad_norm": 5.234225917847535, "learning_rate": 3.544559404816956e-06, "loss": 16.6812, "step": 33164 }, { "epoch": 0.6062295501489755, "grad_norm": 6.0990983993881995, "learning_rate": 3.5442762142405696e-06, "loss": 17.0867, "step": 33165 }, { "epoch": 0.606247829345422, "grad_norm": 6.938237265991977, "learning_rate": 3.5439930287663704e-06, "loss": 17.8204, "step": 33166 }, { "epoch": 0.6062661085418685, "grad_norm": 6.873752508603623, "learning_rate": 3.543709848395349e-06, "loss": 17.5969, "step": 33167 }, { "epoch": 0.6062843877383151, "grad_norm": 5.967969520090436, "learning_rate": 3.5434266731284973e-06, "loss": 17.1599, "step": 33168 }, { "epoch": 0.6063026669347615, "grad_norm": 7.0204881500158995, "learning_rate": 3.5431435029668102e-06, "loss": 17.8033, "step": 33169 }, { "epoch": 0.6063209461312081, "grad_norm": 6.08251375047809, "learning_rate": 3.542860337911278e-06, "loss": 17.5666, "step": 33170 }, { "epoch": 0.6063392253276546, "grad_norm": 6.642887127156407, "learning_rate": 3.542577177962893e-06, "loss": 17.8248, "step": 33171 }, { "epoch": 0.6063575045241011, "grad_norm": 5.883605230947418, "learning_rate": 3.5422940231226503e-06, "loss": 17.2693, "step": 33172 }, { "epoch": 0.6063757837205477, "grad_norm": 5.858181633445932, "learning_rate": 3.542010873391541e-06, "loss": 16.998, "step": 33173 }, { "epoch": 0.6063940629169942, "grad_norm": 5.907826378525452, "learning_rate": 3.5417277287705545e-06, "loss": 17.2519, "step": 33174 }, { "epoch": 0.6064123421134406, "grad_norm": 6.783692589711631, "learning_rate": 3.541444589260687e-06, "loss": 17.8461, "step": 33175 }, { "epoch": 0.6064306213098872, "grad_norm": 6.000147829140427, "learning_rate": 3.541161454862928e-06, "loss": 17.1964, "step": 33176 }, { "epoch": 0.6064489005063337, "grad_norm": 7.218213772636069, "learning_rate": 3.5408783255782744e-06, "loss": 17.5724, "step": 33177 }, { "epoch": 0.6064671797027803, "grad_norm": 5.786505992915807, "learning_rate": 3.5405952014077154e-06, "loss": 17.0399, "step": 33178 }, { "epoch": 0.6064854588992268, "grad_norm": 6.079505291100081, "learning_rate": 3.540312082352242e-06, "loss": 17.1333, "step": 33179 }, { "epoch": 0.6065037380956733, "grad_norm": 7.334314564327134, "learning_rate": 3.540028968412848e-06, "loss": 18.0445, "step": 33180 }, { "epoch": 0.6065220172921199, "grad_norm": 5.547027787273339, "learning_rate": 3.5397458595905274e-06, "loss": 17.0726, "step": 33181 }, { "epoch": 0.6065402964885663, "grad_norm": 6.150374045615766, "learning_rate": 3.5394627558862695e-06, "loss": 17.3094, "step": 33182 }, { "epoch": 0.6065585756850129, "grad_norm": 5.790773652160141, "learning_rate": 3.5391796573010683e-06, "loss": 17.2753, "step": 33183 }, { "epoch": 0.6065768548814594, "grad_norm": 4.944898548601789, "learning_rate": 3.538896563835916e-06, "loss": 16.8753, "step": 33184 }, { "epoch": 0.6065951340779059, "grad_norm": 5.785746097282152, "learning_rate": 3.538613475491803e-06, "loss": 17.2471, "step": 33185 }, { "epoch": 0.6066134132743525, "grad_norm": 5.334873340102687, "learning_rate": 3.5383303922697247e-06, "loss": 16.9405, "step": 33186 }, { "epoch": 0.606631692470799, "grad_norm": 5.607110507343436, "learning_rate": 3.538047314170672e-06, "loss": 17.0962, "step": 33187 }, { "epoch": 0.6066499716672455, "grad_norm": 6.921619225765933, "learning_rate": 3.537764241195635e-06, "loss": 17.8301, "step": 33188 }, { "epoch": 0.606668250863692, "grad_norm": 7.508047053086749, "learning_rate": 3.537481173345607e-06, "loss": 17.6618, "step": 33189 }, { "epoch": 0.6066865300601385, "grad_norm": 5.149727707104781, "learning_rate": 3.5371981106215806e-06, "loss": 16.9422, "step": 33190 }, { "epoch": 0.6067048092565851, "grad_norm": 6.195042973023261, "learning_rate": 3.53691505302455e-06, "loss": 17.1998, "step": 33191 }, { "epoch": 0.6067230884530316, "grad_norm": 6.763008987056749, "learning_rate": 3.5366320005555047e-06, "loss": 17.691, "step": 33192 }, { "epoch": 0.6067413676494782, "grad_norm": 6.310869469308496, "learning_rate": 3.5363489532154356e-06, "loss": 17.5711, "step": 33193 }, { "epoch": 0.6067596468459246, "grad_norm": 6.971535458385756, "learning_rate": 3.5360659110053376e-06, "loss": 17.7957, "step": 33194 }, { "epoch": 0.6067779260423711, "grad_norm": 5.11325091998725, "learning_rate": 3.535782873926202e-06, "loss": 17.0053, "step": 33195 }, { "epoch": 0.6067962052388177, "grad_norm": 5.899297115216957, "learning_rate": 3.535499841979018e-06, "loss": 17.0474, "step": 33196 }, { "epoch": 0.6068144844352642, "grad_norm": 7.835497550692722, "learning_rate": 3.5352168151647824e-06, "loss": 17.7347, "step": 33197 }, { "epoch": 0.6068327636317108, "grad_norm": 7.591660983199527, "learning_rate": 3.534933793484484e-06, "loss": 17.3426, "step": 33198 }, { "epoch": 0.6068510428281573, "grad_norm": 6.907774154619103, "learning_rate": 3.5346507769391147e-06, "loss": 17.7052, "step": 33199 }, { "epoch": 0.6068693220246038, "grad_norm": 7.2005698338497455, "learning_rate": 3.5343677655296695e-06, "loss": 17.9876, "step": 33200 }, { "epoch": 0.6068876012210503, "grad_norm": 6.193318002579171, "learning_rate": 3.534084759257137e-06, "loss": 17.4579, "step": 33201 }, { "epoch": 0.6069058804174968, "grad_norm": 5.822998537460665, "learning_rate": 3.533801758122509e-06, "loss": 17.3801, "step": 33202 }, { "epoch": 0.6069241596139434, "grad_norm": 5.867154359319703, "learning_rate": 3.53351876212678e-06, "loss": 17.3469, "step": 33203 }, { "epoch": 0.6069424388103899, "grad_norm": 6.0093216248215136, "learning_rate": 3.53323577127094e-06, "loss": 17.085, "step": 33204 }, { "epoch": 0.6069607180068364, "grad_norm": 7.648529997629419, "learning_rate": 3.532952785555981e-06, "loss": 18.0233, "step": 33205 }, { "epoch": 0.606978997203283, "grad_norm": 5.843306424725218, "learning_rate": 3.5326698049828956e-06, "loss": 17.1223, "step": 33206 }, { "epoch": 0.6069972763997294, "grad_norm": 6.68743839359229, "learning_rate": 3.5323868295526744e-06, "loss": 17.5827, "step": 33207 }, { "epoch": 0.607015555596176, "grad_norm": 6.522525189175376, "learning_rate": 3.5321038592663103e-06, "loss": 17.5672, "step": 33208 }, { "epoch": 0.6070338347926225, "grad_norm": 6.525560340156604, "learning_rate": 3.5318208941247967e-06, "loss": 17.3734, "step": 33209 }, { "epoch": 0.607052113989069, "grad_norm": 6.102792459615026, "learning_rate": 3.5315379341291206e-06, "loss": 17.4288, "step": 33210 }, { "epoch": 0.6070703931855156, "grad_norm": 9.475829533505681, "learning_rate": 3.5312549792802788e-06, "loss": 18.1675, "step": 33211 }, { "epoch": 0.6070886723819621, "grad_norm": 5.432577821678315, "learning_rate": 3.5309720295792602e-06, "loss": 17.2905, "step": 33212 }, { "epoch": 0.6071069515784087, "grad_norm": 6.138805879465926, "learning_rate": 3.530689085027056e-06, "loss": 17.3595, "step": 33213 }, { "epoch": 0.6071252307748551, "grad_norm": 7.82194675854529, "learning_rate": 3.530406145624661e-06, "loss": 17.8059, "step": 33214 }, { "epoch": 0.6071435099713016, "grad_norm": 8.147094460390491, "learning_rate": 3.5301232113730646e-06, "loss": 18.4393, "step": 33215 }, { "epoch": 0.6071617891677482, "grad_norm": 8.405732215748957, "learning_rate": 3.529840282273257e-06, "loss": 18.2527, "step": 33216 }, { "epoch": 0.6071800683641947, "grad_norm": 7.294645399010285, "learning_rate": 3.5295573583262328e-06, "loss": 17.6971, "step": 33217 }, { "epoch": 0.6071983475606413, "grad_norm": 6.815575236757629, "learning_rate": 3.5292744395329836e-06, "loss": 17.4106, "step": 33218 }, { "epoch": 0.6072166267570878, "grad_norm": 5.775996366460811, "learning_rate": 3.5289915258944974e-06, "loss": 17.2552, "step": 33219 }, { "epoch": 0.6072349059535342, "grad_norm": 6.24407886277751, "learning_rate": 3.5287086174117707e-06, "loss": 17.3425, "step": 33220 }, { "epoch": 0.6072531851499808, "grad_norm": 6.972065462805095, "learning_rate": 3.5284257140857903e-06, "loss": 17.6122, "step": 33221 }, { "epoch": 0.6072714643464273, "grad_norm": 6.514942781250804, "learning_rate": 3.528142815917551e-06, "loss": 17.5339, "step": 33222 }, { "epoch": 0.6072897435428739, "grad_norm": 5.188212891061043, "learning_rate": 3.5278599229080445e-06, "loss": 16.8854, "step": 33223 }, { "epoch": 0.6073080227393204, "grad_norm": 7.0361981341731346, "learning_rate": 3.5275770350582595e-06, "loss": 17.8635, "step": 33224 }, { "epoch": 0.6073263019357669, "grad_norm": 5.183914482341657, "learning_rate": 3.5272941523691912e-06, "loss": 17.0904, "step": 33225 }, { "epoch": 0.6073445811322135, "grad_norm": 6.450586712722079, "learning_rate": 3.5270112748418282e-06, "loss": 17.5978, "step": 33226 }, { "epoch": 0.6073628603286599, "grad_norm": 6.189951470804508, "learning_rate": 3.5267284024771616e-06, "loss": 17.2085, "step": 33227 }, { "epoch": 0.6073811395251065, "grad_norm": 5.79691306476459, "learning_rate": 3.526445535276186e-06, "loss": 17.2976, "step": 33228 }, { "epoch": 0.607399418721553, "grad_norm": 6.587223686038796, "learning_rate": 3.52616267323989e-06, "loss": 17.495, "step": 33229 }, { "epoch": 0.6074176979179995, "grad_norm": 5.4248168112298645, "learning_rate": 3.525879816369265e-06, "loss": 17.0747, "step": 33230 }, { "epoch": 0.6074359771144461, "grad_norm": 5.586684872022026, "learning_rate": 3.5255969646653056e-06, "loss": 17.3101, "step": 33231 }, { "epoch": 0.6074542563108926, "grad_norm": 6.765062500608914, "learning_rate": 3.5253141181290007e-06, "loss": 18.0512, "step": 33232 }, { "epoch": 0.6074725355073392, "grad_norm": 6.980833136609513, "learning_rate": 3.525031276761339e-06, "loss": 17.5693, "step": 33233 }, { "epoch": 0.6074908147037856, "grad_norm": 6.9007053443690225, "learning_rate": 3.524748440563317e-06, "loss": 17.4763, "step": 33234 }, { "epoch": 0.6075090939002321, "grad_norm": 6.973590919942035, "learning_rate": 3.524465609535924e-06, "loss": 18.0304, "step": 33235 }, { "epoch": 0.6075273730966787, "grad_norm": 5.7274454606791565, "learning_rate": 3.524182783680149e-06, "loss": 17.2901, "step": 33236 }, { "epoch": 0.6075456522931252, "grad_norm": 6.314377370421193, "learning_rate": 3.5238999629969873e-06, "loss": 17.7219, "step": 33237 }, { "epoch": 0.6075639314895718, "grad_norm": 7.072095735697411, "learning_rate": 3.5236171474874265e-06, "loss": 17.5768, "step": 33238 }, { "epoch": 0.6075822106860183, "grad_norm": 6.095232534770108, "learning_rate": 3.5233343371524597e-06, "loss": 17.4945, "step": 33239 }, { "epoch": 0.6076004898824647, "grad_norm": 6.165754580757595, "learning_rate": 3.5230515319930803e-06, "loss": 17.5213, "step": 33240 }, { "epoch": 0.6076187690789113, "grad_norm": 8.170698519143556, "learning_rate": 3.5227687320102737e-06, "loss": 17.3234, "step": 33241 }, { "epoch": 0.6076370482753578, "grad_norm": 6.018880967796215, "learning_rate": 3.5224859372050373e-06, "loss": 17.676, "step": 33242 }, { "epoch": 0.6076553274718043, "grad_norm": 6.33336032624406, "learning_rate": 3.522203147578358e-06, "loss": 17.4543, "step": 33243 }, { "epoch": 0.6076736066682509, "grad_norm": 5.036980280935133, "learning_rate": 3.5219203631312282e-06, "loss": 16.9267, "step": 33244 }, { "epoch": 0.6076918858646974, "grad_norm": 8.046834315576762, "learning_rate": 3.5216375838646406e-06, "loss": 17.8083, "step": 33245 }, { "epoch": 0.607710165061144, "grad_norm": 6.26602636022556, "learning_rate": 3.5213548097795847e-06, "loss": 17.4669, "step": 33246 }, { "epoch": 0.6077284442575904, "grad_norm": 8.134766495117958, "learning_rate": 3.5210720408770504e-06, "loss": 17.9521, "step": 33247 }, { "epoch": 0.6077467234540369, "grad_norm": 5.210370047360091, "learning_rate": 3.520789277158031e-06, "loss": 17.0389, "step": 33248 }, { "epoch": 0.6077650026504835, "grad_norm": 5.712242486388063, "learning_rate": 3.520506518623519e-06, "loss": 17.0028, "step": 33249 }, { "epoch": 0.60778328184693, "grad_norm": 5.9943791453493604, "learning_rate": 3.5202237652744996e-06, "loss": 17.4064, "step": 33250 }, { "epoch": 0.6078015610433766, "grad_norm": 7.077752074304651, "learning_rate": 3.51994101711197e-06, "loss": 17.8875, "step": 33251 }, { "epoch": 0.607819840239823, "grad_norm": 7.093798017946755, "learning_rate": 3.519658274136917e-06, "loss": 17.5961, "step": 33252 }, { "epoch": 0.6078381194362695, "grad_norm": 7.212526080650859, "learning_rate": 3.519375536350334e-06, "loss": 17.7667, "step": 33253 }, { "epoch": 0.6078563986327161, "grad_norm": 8.416993104661275, "learning_rate": 3.5190928037532123e-06, "loss": 18.0015, "step": 33254 }, { "epoch": 0.6078746778291626, "grad_norm": 6.231238720887091, "learning_rate": 3.5188100763465405e-06, "loss": 17.8181, "step": 33255 }, { "epoch": 0.6078929570256092, "grad_norm": 7.2494970156349146, "learning_rate": 3.518527354131312e-06, "loss": 17.7519, "step": 33256 }, { "epoch": 0.6079112362220557, "grad_norm": 5.237962318063896, "learning_rate": 3.5182446371085156e-06, "loss": 16.8514, "step": 33257 }, { "epoch": 0.6079295154185022, "grad_norm": 5.351800950465089, "learning_rate": 3.5179619252791425e-06, "loss": 17.0868, "step": 33258 }, { "epoch": 0.6079477946149487, "grad_norm": 7.130543744836969, "learning_rate": 3.5176792186441868e-06, "loss": 17.786, "step": 33259 }, { "epoch": 0.6079660738113952, "grad_norm": 6.771461613276505, "learning_rate": 3.5173965172046355e-06, "loss": 17.5469, "step": 33260 }, { "epoch": 0.6079843530078418, "grad_norm": 6.386416830578383, "learning_rate": 3.5171138209614797e-06, "loss": 17.2817, "step": 33261 }, { "epoch": 0.6080026322042883, "grad_norm": 7.574869769602362, "learning_rate": 3.516831129915712e-06, "loss": 18.1577, "step": 33262 }, { "epoch": 0.6080209114007348, "grad_norm": 5.904807766178069, "learning_rate": 3.5165484440683235e-06, "loss": 17.0961, "step": 33263 }, { "epoch": 0.6080391905971814, "grad_norm": 5.755393663389339, "learning_rate": 3.5162657634203017e-06, "loss": 17.0328, "step": 33264 }, { "epoch": 0.6080574697936278, "grad_norm": 4.971347850353096, "learning_rate": 3.515983087972642e-06, "loss": 17.0374, "step": 33265 }, { "epoch": 0.6080757489900744, "grad_norm": 4.803882411881691, "learning_rate": 3.5157004177263322e-06, "loss": 16.8319, "step": 33266 }, { "epoch": 0.6080940281865209, "grad_norm": 7.4900307124825005, "learning_rate": 3.515417752682362e-06, "loss": 17.7897, "step": 33267 }, { "epoch": 0.6081123073829674, "grad_norm": 6.168006760201969, "learning_rate": 3.5151350928417257e-06, "loss": 17.423, "step": 33268 }, { "epoch": 0.608130586579414, "grad_norm": 6.389349022627041, "learning_rate": 3.51485243820541e-06, "loss": 17.5418, "step": 33269 }, { "epoch": 0.6081488657758605, "grad_norm": 10.785990678772313, "learning_rate": 3.5145697887744103e-06, "loss": 17.4729, "step": 33270 }, { "epoch": 0.6081671449723071, "grad_norm": 6.8669714805692985, "learning_rate": 3.514287144549713e-06, "loss": 17.3867, "step": 33271 }, { "epoch": 0.6081854241687535, "grad_norm": 5.441043304967435, "learning_rate": 3.5140045055323093e-06, "loss": 16.9156, "step": 33272 }, { "epoch": 0.6082037033652, "grad_norm": 6.40949107573235, "learning_rate": 3.513721871723193e-06, "loss": 17.7438, "step": 33273 }, { "epoch": 0.6082219825616466, "grad_norm": 6.186396486837644, "learning_rate": 3.513439243123353e-06, "loss": 17.1708, "step": 33274 }, { "epoch": 0.6082402617580931, "grad_norm": 6.971461735061615, "learning_rate": 3.513156619733776e-06, "loss": 17.3754, "step": 33275 }, { "epoch": 0.6082585409545397, "grad_norm": 6.40785986232837, "learning_rate": 3.5128740015554587e-06, "loss": 17.2837, "step": 33276 }, { "epoch": 0.6082768201509862, "grad_norm": 7.4704130942188245, "learning_rate": 3.5125913885893894e-06, "loss": 17.8391, "step": 33277 }, { "epoch": 0.6082950993474326, "grad_norm": 5.537107496529591, "learning_rate": 3.5123087808365563e-06, "loss": 17.3222, "step": 33278 }, { "epoch": 0.6083133785438792, "grad_norm": 7.711276452498146, "learning_rate": 3.5120261782979535e-06, "loss": 17.933, "step": 33279 }, { "epoch": 0.6083316577403257, "grad_norm": 6.03981171062782, "learning_rate": 3.5117435809745693e-06, "loss": 17.2051, "step": 33280 }, { "epoch": 0.6083499369367723, "grad_norm": 6.640916801294556, "learning_rate": 3.511460988867393e-06, "loss": 17.3413, "step": 33281 }, { "epoch": 0.6083682161332188, "grad_norm": 8.608288639491942, "learning_rate": 3.51117840197742e-06, "loss": 17.7404, "step": 33282 }, { "epoch": 0.6083864953296653, "grad_norm": 6.333254722367802, "learning_rate": 3.5108958203056343e-06, "loss": 17.2809, "step": 33283 }, { "epoch": 0.6084047745261119, "grad_norm": 6.081921118604687, "learning_rate": 3.5106132438530314e-06, "loss": 17.3009, "step": 33284 }, { "epoch": 0.6084230537225583, "grad_norm": 7.357136262026578, "learning_rate": 3.5103306726206e-06, "loss": 17.9215, "step": 33285 }, { "epoch": 0.6084413329190049, "grad_norm": 6.3580742241692265, "learning_rate": 3.5100481066093285e-06, "loss": 17.2614, "step": 33286 }, { "epoch": 0.6084596121154514, "grad_norm": 5.818176942274753, "learning_rate": 3.5097655458202118e-06, "loss": 17.2982, "step": 33287 }, { "epoch": 0.6084778913118979, "grad_norm": 5.57891476051993, "learning_rate": 3.509482990254236e-06, "loss": 16.9517, "step": 33288 }, { "epoch": 0.6084961705083445, "grad_norm": 5.935913176588273, "learning_rate": 3.5092004399123923e-06, "loss": 17.4475, "step": 33289 }, { "epoch": 0.608514449704791, "grad_norm": 7.68246713047213, "learning_rate": 3.5089178947956726e-06, "loss": 18.1469, "step": 33290 }, { "epoch": 0.6085327289012376, "grad_norm": 8.615798037533558, "learning_rate": 3.5086353549050673e-06, "loss": 19.0433, "step": 33291 }, { "epoch": 0.608551008097684, "grad_norm": 4.86979660570748, "learning_rate": 3.5083528202415633e-06, "loss": 17.0509, "step": 33292 }, { "epoch": 0.6085692872941305, "grad_norm": 7.548425935334228, "learning_rate": 3.5080702908061546e-06, "loss": 18.0083, "step": 33293 }, { "epoch": 0.6085875664905771, "grad_norm": 7.007580252294947, "learning_rate": 3.5077877665998306e-06, "loss": 17.6652, "step": 33294 }, { "epoch": 0.6086058456870236, "grad_norm": 5.005882013741339, "learning_rate": 3.5075052476235793e-06, "loss": 17.0243, "step": 33295 }, { "epoch": 0.6086241248834702, "grad_norm": 5.973659905055878, "learning_rate": 3.507222733878395e-06, "loss": 17.1997, "step": 33296 }, { "epoch": 0.6086424040799167, "grad_norm": 8.142902606744377, "learning_rate": 3.506940225365263e-06, "loss": 17.9231, "step": 33297 }, { "epoch": 0.6086606832763631, "grad_norm": 7.028762418900318, "learning_rate": 3.5066577220851768e-06, "loss": 17.7047, "step": 33298 }, { "epoch": 0.6086789624728097, "grad_norm": 5.907407731499932, "learning_rate": 3.506375224039127e-06, "loss": 17.3293, "step": 33299 }, { "epoch": 0.6086972416692562, "grad_norm": 6.9683905538055635, "learning_rate": 3.5060927312281002e-06, "loss": 17.8627, "step": 33300 }, { "epoch": 0.6087155208657028, "grad_norm": 7.26919100101472, "learning_rate": 3.5058102436530907e-06, "loss": 17.9003, "step": 33301 }, { "epoch": 0.6087338000621493, "grad_norm": 7.884902454773542, "learning_rate": 3.505527761315086e-06, "loss": 18.1115, "step": 33302 }, { "epoch": 0.6087520792585958, "grad_norm": 5.750312142080398, "learning_rate": 3.505245284215075e-06, "loss": 17.3327, "step": 33303 }, { "epoch": 0.6087703584550423, "grad_norm": 5.061706833445027, "learning_rate": 3.5049628123540523e-06, "loss": 17.1428, "step": 33304 }, { "epoch": 0.6087886376514888, "grad_norm": 8.295288541855934, "learning_rate": 3.504680345733005e-06, "loss": 18.2646, "step": 33305 }, { "epoch": 0.6088069168479354, "grad_norm": 6.529138948737983, "learning_rate": 3.5043978843529216e-06, "loss": 17.3731, "step": 33306 }, { "epoch": 0.6088251960443819, "grad_norm": 5.301170250222167, "learning_rate": 3.504115428214795e-06, "loss": 17.0225, "step": 33307 }, { "epoch": 0.6088434752408284, "grad_norm": 6.3820147616734335, "learning_rate": 3.5038329773196146e-06, "loss": 17.3211, "step": 33308 }, { "epoch": 0.608861754437275, "grad_norm": 5.411271519341588, "learning_rate": 3.5035505316683675e-06, "loss": 17.097, "step": 33309 }, { "epoch": 0.6088800336337215, "grad_norm": 9.052717857413748, "learning_rate": 3.5032680912620487e-06, "loss": 17.7705, "step": 33310 }, { "epoch": 0.6088983128301679, "grad_norm": 6.080339738855158, "learning_rate": 3.502985656101644e-06, "loss": 17.1827, "step": 33311 }, { "epoch": 0.6089165920266145, "grad_norm": 7.326933572589613, "learning_rate": 3.5027032261881434e-06, "loss": 18.1019, "step": 33312 }, { "epoch": 0.608934871223061, "grad_norm": 5.667134107245461, "learning_rate": 3.5024208015225402e-06, "loss": 17.3172, "step": 33313 }, { "epoch": 0.6089531504195076, "grad_norm": 5.943167683648383, "learning_rate": 3.5021383821058202e-06, "loss": 17.2757, "step": 33314 }, { "epoch": 0.6089714296159541, "grad_norm": 7.093227848320166, "learning_rate": 3.5018559679389774e-06, "loss": 17.9673, "step": 33315 }, { "epoch": 0.6089897088124006, "grad_norm": 5.136852126490453, "learning_rate": 3.501573559022998e-06, "loss": 16.9321, "step": 33316 }, { "epoch": 0.6090079880088471, "grad_norm": 6.438555868873693, "learning_rate": 3.5012911553588722e-06, "loss": 17.4235, "step": 33317 }, { "epoch": 0.6090262672052936, "grad_norm": 5.830289887061442, "learning_rate": 3.5010087569475936e-06, "loss": 17.3224, "step": 33318 }, { "epoch": 0.6090445464017402, "grad_norm": 5.770610820246089, "learning_rate": 3.500726363790149e-06, "loss": 17.4026, "step": 33319 }, { "epoch": 0.6090628255981867, "grad_norm": 6.162799265705503, "learning_rate": 3.5004439758875253e-06, "loss": 17.3875, "step": 33320 }, { "epoch": 0.6090811047946332, "grad_norm": 8.027370608844967, "learning_rate": 3.5001615932407173e-06, "loss": 17.5421, "step": 33321 }, { "epoch": 0.6090993839910798, "grad_norm": 5.521317838936144, "learning_rate": 3.499879215850714e-06, "loss": 17.2558, "step": 33322 }, { "epoch": 0.6091176631875262, "grad_norm": 6.3184510233632345, "learning_rate": 3.499596843718501e-06, "loss": 17.1444, "step": 33323 }, { "epoch": 0.6091359423839728, "grad_norm": 6.261463615094108, "learning_rate": 3.499314476845074e-06, "loss": 17.3817, "step": 33324 }, { "epoch": 0.6091542215804193, "grad_norm": 6.0960573452025795, "learning_rate": 3.499032115231418e-06, "loss": 17.1401, "step": 33325 }, { "epoch": 0.6091725007768658, "grad_norm": 5.659617184975988, "learning_rate": 3.4987497588785226e-06, "loss": 17.1947, "step": 33326 }, { "epoch": 0.6091907799733124, "grad_norm": 5.642008981217826, "learning_rate": 3.4984674077873816e-06, "loss": 17.1104, "step": 33327 }, { "epoch": 0.6092090591697589, "grad_norm": 7.236702432849672, "learning_rate": 3.4981850619589795e-06, "loss": 18.0287, "step": 33328 }, { "epoch": 0.6092273383662055, "grad_norm": 7.500880547778233, "learning_rate": 3.4979027213943116e-06, "loss": 17.7098, "step": 33329 }, { "epoch": 0.6092456175626519, "grad_norm": 7.171482384158403, "learning_rate": 3.4976203860943627e-06, "loss": 17.8803, "step": 33330 }, { "epoch": 0.6092638967590984, "grad_norm": 6.043927833001686, "learning_rate": 3.4973380560601228e-06, "loss": 17.3487, "step": 33331 }, { "epoch": 0.609282175955545, "grad_norm": 7.461273240896432, "learning_rate": 3.4970557312925846e-06, "loss": 17.9076, "step": 33332 }, { "epoch": 0.6093004551519915, "grad_norm": 6.708562382147988, "learning_rate": 3.496773411792736e-06, "loss": 17.3336, "step": 33333 }, { "epoch": 0.6093187343484381, "grad_norm": 7.443153843293851, "learning_rate": 3.496491097561564e-06, "loss": 17.9841, "step": 33334 }, { "epoch": 0.6093370135448846, "grad_norm": 8.395596743545982, "learning_rate": 3.4962087886000608e-06, "loss": 18.3089, "step": 33335 }, { "epoch": 0.609355292741331, "grad_norm": 6.775902173738962, "learning_rate": 3.495926484909217e-06, "loss": 17.2371, "step": 33336 }, { "epoch": 0.6093735719377776, "grad_norm": 6.637231600960436, "learning_rate": 3.4956441864900175e-06, "loss": 17.4514, "step": 33337 }, { "epoch": 0.6093918511342241, "grad_norm": 6.645186661104092, "learning_rate": 3.4953618933434576e-06, "loss": 17.4486, "step": 33338 }, { "epoch": 0.6094101303306707, "grad_norm": 8.322558015041121, "learning_rate": 3.4950796054705215e-06, "loss": 17.6759, "step": 33339 }, { "epoch": 0.6094284095271172, "grad_norm": 7.20670879641971, "learning_rate": 3.494797322872201e-06, "loss": 17.8951, "step": 33340 }, { "epoch": 0.6094466887235637, "grad_norm": 6.418594000239719, "learning_rate": 3.494515045549487e-06, "loss": 17.2801, "step": 33341 }, { "epoch": 0.6094649679200103, "grad_norm": 5.412701864682939, "learning_rate": 3.4942327735033664e-06, "loss": 17.0078, "step": 33342 }, { "epoch": 0.6094832471164567, "grad_norm": 5.978454267421786, "learning_rate": 3.493950506734828e-06, "loss": 17.324, "step": 33343 }, { "epoch": 0.6095015263129033, "grad_norm": 6.872235837295807, "learning_rate": 3.493668245244863e-06, "loss": 17.3012, "step": 33344 }, { "epoch": 0.6095198055093498, "grad_norm": 5.543693342726004, "learning_rate": 3.493385989034459e-06, "loss": 17.0704, "step": 33345 }, { "epoch": 0.6095380847057963, "grad_norm": 7.452668540810214, "learning_rate": 3.4931037381046084e-06, "loss": 17.4723, "step": 33346 }, { "epoch": 0.6095563639022429, "grad_norm": 6.315811589583132, "learning_rate": 3.4928214924562986e-06, "loss": 17.5386, "step": 33347 }, { "epoch": 0.6095746430986894, "grad_norm": 6.087340033635555, "learning_rate": 3.4925392520905166e-06, "loss": 17.5642, "step": 33348 }, { "epoch": 0.609592922295136, "grad_norm": 6.710964285767885, "learning_rate": 3.492257017008255e-06, "loss": 17.8761, "step": 33349 }, { "epoch": 0.6096112014915824, "grad_norm": 5.952225328591664, "learning_rate": 3.4919747872105024e-06, "loss": 17.0793, "step": 33350 }, { "epoch": 0.6096294806880289, "grad_norm": 5.414227801774522, "learning_rate": 3.4916925626982456e-06, "loss": 17.0986, "step": 33351 }, { "epoch": 0.6096477598844755, "grad_norm": 6.905026894274481, "learning_rate": 3.4914103434724768e-06, "loss": 17.7203, "step": 33352 }, { "epoch": 0.609666039080922, "grad_norm": 5.473756476672161, "learning_rate": 3.4911281295341836e-06, "loss": 17.0339, "step": 33353 }, { "epoch": 0.6096843182773686, "grad_norm": 6.932266240664413, "learning_rate": 3.4908459208843543e-06, "loss": 17.8013, "step": 33354 }, { "epoch": 0.6097025974738151, "grad_norm": 6.139502023128502, "learning_rate": 3.490563717523981e-06, "loss": 17.1621, "step": 33355 }, { "epoch": 0.6097208766702615, "grad_norm": 5.514670084632606, "learning_rate": 3.4902815194540497e-06, "loss": 17.1763, "step": 33356 }, { "epoch": 0.6097391558667081, "grad_norm": 6.350822588216575, "learning_rate": 3.4899993266755494e-06, "loss": 17.6151, "step": 33357 }, { "epoch": 0.6097574350631546, "grad_norm": 6.853724667176336, "learning_rate": 3.4897171391894735e-06, "loss": 17.6359, "step": 33358 }, { "epoch": 0.6097757142596012, "grad_norm": 5.997461183830543, "learning_rate": 3.489434956996805e-06, "loss": 17.3308, "step": 33359 }, { "epoch": 0.6097939934560477, "grad_norm": 6.650517535484538, "learning_rate": 3.489152780098538e-06, "loss": 17.5382, "step": 33360 }, { "epoch": 0.6098122726524942, "grad_norm": 6.029713316278558, "learning_rate": 3.4888706084956585e-06, "loss": 17.3636, "step": 33361 }, { "epoch": 0.6098305518489407, "grad_norm": 6.229912217217704, "learning_rate": 3.4885884421891557e-06, "loss": 17.2726, "step": 33362 }, { "epoch": 0.6098488310453872, "grad_norm": 5.839020507831683, "learning_rate": 3.4883062811800207e-06, "loss": 17.2761, "step": 33363 }, { "epoch": 0.6098671102418338, "grad_norm": 7.786356450861076, "learning_rate": 3.4880241254692415e-06, "loss": 17.6953, "step": 33364 }, { "epoch": 0.6098853894382803, "grad_norm": 6.2218843992069655, "learning_rate": 3.4877419750578045e-06, "loss": 17.3171, "step": 33365 }, { "epoch": 0.6099036686347268, "grad_norm": 6.52828060114797, "learning_rate": 3.487459829946701e-06, "loss": 17.4449, "step": 33366 }, { "epoch": 0.6099219478311734, "grad_norm": 5.39585682191743, "learning_rate": 3.4871776901369216e-06, "loss": 17.3094, "step": 33367 }, { "epoch": 0.6099402270276199, "grad_norm": 5.47230926097559, "learning_rate": 3.48689555562945e-06, "loss": 17.1252, "step": 33368 }, { "epoch": 0.6099585062240664, "grad_norm": 6.775389845462406, "learning_rate": 3.486613426425281e-06, "loss": 17.4908, "step": 33369 }, { "epoch": 0.6099767854205129, "grad_norm": 8.679184157856536, "learning_rate": 3.4863313025254e-06, "loss": 17.8174, "step": 33370 }, { "epoch": 0.6099950646169594, "grad_norm": 7.108934500714045, "learning_rate": 3.4860491839307943e-06, "loss": 17.7168, "step": 33371 }, { "epoch": 0.610013343813406, "grad_norm": 6.626467446328768, "learning_rate": 3.4857670706424575e-06, "loss": 17.5444, "step": 33372 }, { "epoch": 0.6100316230098525, "grad_norm": 5.689496962507862, "learning_rate": 3.485484962661375e-06, "loss": 17.2156, "step": 33373 }, { "epoch": 0.6100499022062991, "grad_norm": 5.148903025059337, "learning_rate": 3.4852028599885353e-06, "loss": 16.9761, "step": 33374 }, { "epoch": 0.6100681814027455, "grad_norm": 5.850156611271, "learning_rate": 3.4849207626249282e-06, "loss": 17.2946, "step": 33375 }, { "epoch": 0.610086460599192, "grad_norm": 5.758800202815338, "learning_rate": 3.4846386705715418e-06, "loss": 17.2537, "step": 33376 }, { "epoch": 0.6101047397956386, "grad_norm": 6.650242977843952, "learning_rate": 3.4843565838293674e-06, "loss": 17.6301, "step": 33377 }, { "epoch": 0.6101230189920851, "grad_norm": 5.439072900022731, "learning_rate": 3.4840745023993916e-06, "loss": 17.3163, "step": 33378 }, { "epoch": 0.6101412981885316, "grad_norm": 7.903345185110099, "learning_rate": 3.483792426282602e-06, "loss": 17.7366, "step": 33379 }, { "epoch": 0.6101595773849782, "grad_norm": 5.596596070186502, "learning_rate": 3.4835103554799875e-06, "loss": 17.2038, "step": 33380 }, { "epoch": 0.6101778565814246, "grad_norm": 5.769537141601336, "learning_rate": 3.48322828999254e-06, "loss": 17.1112, "step": 33381 }, { "epoch": 0.6101961357778712, "grad_norm": 6.197201585314598, "learning_rate": 3.4829462298212434e-06, "loss": 17.4508, "step": 33382 }, { "epoch": 0.6102144149743177, "grad_norm": 5.091336960034745, "learning_rate": 3.482664174967091e-06, "loss": 17.1257, "step": 33383 }, { "epoch": 0.6102326941707642, "grad_norm": 7.898093794892243, "learning_rate": 3.4823821254310675e-06, "loss": 17.8941, "step": 33384 }, { "epoch": 0.6102509733672108, "grad_norm": 5.544437307381225, "learning_rate": 3.4821000812141615e-06, "loss": 17.1293, "step": 33385 }, { "epoch": 0.6102692525636573, "grad_norm": 6.534446746113784, "learning_rate": 3.4818180423173653e-06, "loss": 17.454, "step": 33386 }, { "epoch": 0.6102875317601039, "grad_norm": 9.848455988140744, "learning_rate": 3.481536008741666e-06, "loss": 17.8742, "step": 33387 }, { "epoch": 0.6103058109565503, "grad_norm": 6.07333621540332, "learning_rate": 3.4812539804880485e-06, "loss": 17.4712, "step": 33388 }, { "epoch": 0.6103240901529968, "grad_norm": 6.496287584508188, "learning_rate": 3.4809719575575053e-06, "loss": 17.4688, "step": 33389 }, { "epoch": 0.6103423693494434, "grad_norm": 5.6355340467443416, "learning_rate": 3.480689939951022e-06, "loss": 17.1515, "step": 33390 }, { "epoch": 0.6103606485458899, "grad_norm": 11.271209680478185, "learning_rate": 3.4804079276695908e-06, "loss": 18.1456, "step": 33391 }, { "epoch": 0.6103789277423365, "grad_norm": 4.790907956124254, "learning_rate": 3.480125920714198e-06, "loss": 16.7278, "step": 33392 }, { "epoch": 0.610397206938783, "grad_norm": 5.548021024521475, "learning_rate": 3.47984391908583e-06, "loss": 17.1137, "step": 33393 }, { "epoch": 0.6104154861352294, "grad_norm": 5.386759908898715, "learning_rate": 3.479561922785478e-06, "loss": 16.9759, "step": 33394 }, { "epoch": 0.610433765331676, "grad_norm": 6.324577286796288, "learning_rate": 3.4792799318141312e-06, "loss": 17.9578, "step": 33395 }, { "epoch": 0.6104520445281225, "grad_norm": 6.508571339392606, "learning_rate": 3.478997946172774e-06, "loss": 17.3723, "step": 33396 }, { "epoch": 0.6104703237245691, "grad_norm": 5.94912433460236, "learning_rate": 3.4787159658623986e-06, "loss": 17.1145, "step": 33397 }, { "epoch": 0.6104886029210156, "grad_norm": 7.249495881221342, "learning_rate": 3.478433990883991e-06, "loss": 17.9033, "step": 33398 }, { "epoch": 0.6105068821174621, "grad_norm": 5.856390965711518, "learning_rate": 3.478152021238539e-06, "loss": 17.3611, "step": 33399 }, { "epoch": 0.6105251613139087, "grad_norm": 5.636984070918061, "learning_rate": 3.477870056927034e-06, "loss": 17.039, "step": 33400 }, { "epoch": 0.6105434405103551, "grad_norm": 7.409795243727117, "learning_rate": 3.4775880979504627e-06, "loss": 17.5622, "step": 33401 }, { "epoch": 0.6105617197068017, "grad_norm": 5.581491108461602, "learning_rate": 3.477306144309811e-06, "loss": 17.039, "step": 33402 }, { "epoch": 0.6105799989032482, "grad_norm": 6.611112966907833, "learning_rate": 3.4770241960060703e-06, "loss": 17.2353, "step": 33403 }, { "epoch": 0.6105982780996947, "grad_norm": 5.5555156238619015, "learning_rate": 3.4767422530402285e-06, "loss": 17.1627, "step": 33404 }, { "epoch": 0.6106165572961413, "grad_norm": 7.347914025197715, "learning_rate": 3.476460315413271e-06, "loss": 17.7486, "step": 33405 }, { "epoch": 0.6106348364925878, "grad_norm": 8.392209843549763, "learning_rate": 3.4761783831261896e-06, "loss": 18.231, "step": 33406 }, { "epoch": 0.6106531156890344, "grad_norm": 7.3195961354678625, "learning_rate": 3.475896456179969e-06, "loss": 17.9543, "step": 33407 }, { "epoch": 0.6106713948854808, "grad_norm": 7.379403276557895, "learning_rate": 3.4756145345755998e-06, "loss": 17.7973, "step": 33408 }, { "epoch": 0.6106896740819273, "grad_norm": 6.785793089190591, "learning_rate": 3.4753326183140704e-06, "loss": 17.3322, "step": 33409 }, { "epoch": 0.6107079532783739, "grad_norm": 6.328669629357621, "learning_rate": 3.475050707396366e-06, "loss": 17.9138, "step": 33410 }, { "epoch": 0.6107262324748204, "grad_norm": 6.031586246294935, "learning_rate": 3.4747688018234786e-06, "loss": 17.4562, "step": 33411 }, { "epoch": 0.610744511671267, "grad_norm": 7.512633520001332, "learning_rate": 3.4744869015963935e-06, "loss": 17.816, "step": 33412 }, { "epoch": 0.6107627908677135, "grad_norm": 6.411106758528941, "learning_rate": 3.474205006716098e-06, "loss": 17.7143, "step": 33413 }, { "epoch": 0.6107810700641599, "grad_norm": 6.9821285304100575, "learning_rate": 3.4739231171835836e-06, "loss": 17.6169, "step": 33414 }, { "epoch": 0.6107993492606065, "grad_norm": 6.007398533014723, "learning_rate": 3.473641232999835e-06, "loss": 17.2051, "step": 33415 }, { "epoch": 0.610817628457053, "grad_norm": 7.031207402672312, "learning_rate": 3.4733593541658416e-06, "loss": 17.6978, "step": 33416 }, { "epoch": 0.6108359076534996, "grad_norm": 5.654814197151491, "learning_rate": 3.473077480682592e-06, "loss": 17.2885, "step": 33417 }, { "epoch": 0.6108541868499461, "grad_norm": 6.240359180467368, "learning_rate": 3.472795612551073e-06, "loss": 17.4877, "step": 33418 }, { "epoch": 0.6108724660463926, "grad_norm": 4.8792565605906555, "learning_rate": 3.4725137497722717e-06, "loss": 16.7626, "step": 33419 }, { "epoch": 0.6108907452428392, "grad_norm": 6.8433337874381035, "learning_rate": 3.4722318923471777e-06, "loss": 17.8161, "step": 33420 }, { "epoch": 0.6109090244392856, "grad_norm": 5.749869694709823, "learning_rate": 3.4719500402767786e-06, "loss": 17.1668, "step": 33421 }, { "epoch": 0.6109273036357322, "grad_norm": 6.321234305978792, "learning_rate": 3.471668193562062e-06, "loss": 17.5646, "step": 33422 }, { "epoch": 0.6109455828321787, "grad_norm": 6.636653075456533, "learning_rate": 3.471386352204017e-06, "loss": 17.5292, "step": 33423 }, { "epoch": 0.6109638620286252, "grad_norm": 5.639403032958973, "learning_rate": 3.471104516203627e-06, "loss": 17.063, "step": 33424 }, { "epoch": 0.6109821412250718, "grad_norm": 5.9063777954663195, "learning_rate": 3.4708226855618856e-06, "loss": 17.3206, "step": 33425 }, { "epoch": 0.6110004204215183, "grad_norm": 8.232310705416374, "learning_rate": 3.4705408602797784e-06, "loss": 17.7797, "step": 33426 }, { "epoch": 0.6110186996179648, "grad_norm": 8.732003218116336, "learning_rate": 3.4702590403582904e-06, "loss": 18.2944, "step": 33427 }, { "epoch": 0.6110369788144113, "grad_norm": 7.726860679979719, "learning_rate": 3.469977225798414e-06, "loss": 18.1075, "step": 33428 }, { "epoch": 0.6110552580108578, "grad_norm": 5.908701915009781, "learning_rate": 3.4696954166011337e-06, "loss": 17.2276, "step": 33429 }, { "epoch": 0.6110735372073044, "grad_norm": 5.448524146780063, "learning_rate": 3.4694136127674373e-06, "loss": 17.1563, "step": 33430 }, { "epoch": 0.6110918164037509, "grad_norm": 4.78244253820194, "learning_rate": 3.469131814298315e-06, "loss": 16.8074, "step": 33431 }, { "epoch": 0.6111100956001975, "grad_norm": 6.3452484717553554, "learning_rate": 3.468850021194753e-06, "loss": 17.4045, "step": 33432 }, { "epoch": 0.611128374796644, "grad_norm": 6.642911615520307, "learning_rate": 3.468568233457737e-06, "loss": 17.814, "step": 33433 }, { "epoch": 0.6111466539930904, "grad_norm": 8.019317790771744, "learning_rate": 3.4682864510882574e-06, "loss": 18.3122, "step": 33434 }, { "epoch": 0.611164933189537, "grad_norm": 8.671877598259702, "learning_rate": 3.468004674087302e-06, "loss": 18.2243, "step": 33435 }, { "epoch": 0.6111832123859835, "grad_norm": 6.169824955627968, "learning_rate": 3.4677229024558546e-06, "loss": 17.2544, "step": 33436 }, { "epoch": 0.6112014915824301, "grad_norm": 6.439526723198034, "learning_rate": 3.467441136194908e-06, "loss": 17.2558, "step": 33437 }, { "epoch": 0.6112197707788766, "grad_norm": 7.29611387808339, "learning_rate": 3.467159375305445e-06, "loss": 17.9691, "step": 33438 }, { "epoch": 0.611238049975323, "grad_norm": 5.763823641769959, "learning_rate": 3.466877619788457e-06, "loss": 17.2485, "step": 33439 }, { "epoch": 0.6112563291717696, "grad_norm": 5.238678822758768, "learning_rate": 3.4665958696449298e-06, "loss": 17.0506, "step": 33440 }, { "epoch": 0.6112746083682161, "grad_norm": 6.759121810487374, "learning_rate": 3.4663141248758496e-06, "loss": 17.8163, "step": 33441 }, { "epoch": 0.6112928875646627, "grad_norm": 10.847139803586488, "learning_rate": 3.4660323854822074e-06, "loss": 17.6679, "step": 33442 }, { "epoch": 0.6113111667611092, "grad_norm": 6.126288125860983, "learning_rate": 3.4657506514649874e-06, "loss": 17.2309, "step": 33443 }, { "epoch": 0.6113294459575557, "grad_norm": 6.5323381577813215, "learning_rate": 3.4654689228251776e-06, "loss": 17.4622, "step": 33444 }, { "epoch": 0.6113477251540023, "grad_norm": 6.436366870323557, "learning_rate": 3.4651871995637675e-06, "loss": 17.5354, "step": 33445 }, { "epoch": 0.6113660043504487, "grad_norm": 6.999180162420679, "learning_rate": 3.464905481681744e-06, "loss": 17.443, "step": 33446 }, { "epoch": 0.6113842835468952, "grad_norm": 6.973255259405325, "learning_rate": 3.4646237691800906e-06, "loss": 17.6959, "step": 33447 }, { "epoch": 0.6114025627433418, "grad_norm": 5.208021702914529, "learning_rate": 3.4643420620597988e-06, "loss": 17.0505, "step": 33448 }, { "epoch": 0.6114208419397883, "grad_norm": 6.236866040462618, "learning_rate": 3.4640603603218558e-06, "loss": 17.2016, "step": 33449 }, { "epoch": 0.6114391211362349, "grad_norm": 7.1784352951819965, "learning_rate": 3.463778663967247e-06, "loss": 17.4985, "step": 33450 }, { "epoch": 0.6114574003326814, "grad_norm": 5.128641237483665, "learning_rate": 3.4634969729969616e-06, "loss": 17.0334, "step": 33451 }, { "epoch": 0.6114756795291278, "grad_norm": 6.1577522362355674, "learning_rate": 3.463215287411984e-06, "loss": 17.1222, "step": 33452 }, { "epoch": 0.6114939587255744, "grad_norm": 6.388208179223064, "learning_rate": 3.4629336072133045e-06, "loss": 17.1443, "step": 33453 }, { "epoch": 0.6115122379220209, "grad_norm": 6.925716786553695, "learning_rate": 3.462651932401911e-06, "loss": 17.7177, "step": 33454 }, { "epoch": 0.6115305171184675, "grad_norm": 5.08980535799238, "learning_rate": 3.4623702629787864e-06, "loss": 17.0246, "step": 33455 }, { "epoch": 0.611548796314914, "grad_norm": 6.4951039551125085, "learning_rate": 3.4620885989449226e-06, "loss": 17.4534, "step": 33456 }, { "epoch": 0.6115670755113605, "grad_norm": 6.145326063595197, "learning_rate": 3.4618069403013042e-06, "loss": 17.0599, "step": 33457 }, { "epoch": 0.6115853547078071, "grad_norm": 5.7537861202552625, "learning_rate": 3.461525287048918e-06, "loss": 17.1616, "step": 33458 }, { "epoch": 0.6116036339042535, "grad_norm": 5.850426238594309, "learning_rate": 3.4612436391887537e-06, "loss": 17.078, "step": 33459 }, { "epoch": 0.6116219131007001, "grad_norm": 4.770716790694017, "learning_rate": 3.460961996721797e-06, "loss": 16.8144, "step": 33460 }, { "epoch": 0.6116401922971466, "grad_norm": 6.258509209004803, "learning_rate": 3.4606803596490337e-06, "loss": 17.5476, "step": 33461 }, { "epoch": 0.6116584714935931, "grad_norm": 5.741162908969932, "learning_rate": 3.4603987279714524e-06, "loss": 17.1928, "step": 33462 }, { "epoch": 0.6116767506900397, "grad_norm": 6.616432406007188, "learning_rate": 3.4601171016900415e-06, "loss": 17.7054, "step": 33463 }, { "epoch": 0.6116950298864862, "grad_norm": 5.530301790146592, "learning_rate": 3.459835480805784e-06, "loss": 17.0298, "step": 33464 }, { "epoch": 0.6117133090829328, "grad_norm": 5.832668993650484, "learning_rate": 3.459553865319671e-06, "loss": 17.2603, "step": 33465 }, { "epoch": 0.6117315882793792, "grad_norm": 8.143137381613384, "learning_rate": 3.4592722552326875e-06, "loss": 18.0093, "step": 33466 }, { "epoch": 0.6117498674758257, "grad_norm": 6.582758455278785, "learning_rate": 3.458990650545821e-06, "loss": 17.4995, "step": 33467 }, { "epoch": 0.6117681466722723, "grad_norm": 5.599864247478197, "learning_rate": 3.45870905126006e-06, "loss": 17.2764, "step": 33468 }, { "epoch": 0.6117864258687188, "grad_norm": 6.2982925064330155, "learning_rate": 3.4584274573763877e-06, "loss": 17.5037, "step": 33469 }, { "epoch": 0.6118047050651654, "grad_norm": 7.632672670305647, "learning_rate": 3.4581458688957953e-06, "loss": 17.7522, "step": 33470 }, { "epoch": 0.6118229842616119, "grad_norm": 6.415254319011238, "learning_rate": 3.4578642858192667e-06, "loss": 17.1702, "step": 33471 }, { "epoch": 0.6118412634580583, "grad_norm": 5.710687136665087, "learning_rate": 3.4575827081477896e-06, "loss": 17.2644, "step": 33472 }, { "epoch": 0.6118595426545049, "grad_norm": 6.317890353299827, "learning_rate": 3.457301135882353e-06, "loss": 17.3786, "step": 33473 }, { "epoch": 0.6118778218509514, "grad_norm": 6.861679678643908, "learning_rate": 3.4570195690239412e-06, "loss": 17.8711, "step": 33474 }, { "epoch": 0.611896101047398, "grad_norm": 6.054032983105726, "learning_rate": 3.4567380075735415e-06, "loss": 17.1931, "step": 33475 }, { "epoch": 0.6119143802438445, "grad_norm": 4.289828389930858, "learning_rate": 3.456456451532142e-06, "loss": 16.7406, "step": 33476 }, { "epoch": 0.611932659440291, "grad_norm": 5.3959545153657995, "learning_rate": 3.4561749009007285e-06, "loss": 17.2055, "step": 33477 }, { "epoch": 0.6119509386367376, "grad_norm": 6.110549846744946, "learning_rate": 3.4558933556802864e-06, "loss": 17.2446, "step": 33478 }, { "epoch": 0.611969217833184, "grad_norm": 6.2642373662161415, "learning_rate": 3.4556118158718055e-06, "loss": 17.2289, "step": 33479 }, { "epoch": 0.6119874970296306, "grad_norm": 7.894892497248476, "learning_rate": 3.4553302814762725e-06, "loss": 17.7894, "step": 33480 }, { "epoch": 0.6120057762260771, "grad_norm": 5.321319791531456, "learning_rate": 3.4550487524946696e-06, "loss": 16.7757, "step": 33481 }, { "epoch": 0.6120240554225236, "grad_norm": 5.531123552046876, "learning_rate": 3.45476722892799e-06, "loss": 17.247, "step": 33482 }, { "epoch": 0.6120423346189702, "grad_norm": 7.542265358351747, "learning_rate": 3.4544857107772143e-06, "loss": 17.4809, "step": 33483 }, { "epoch": 0.6120606138154167, "grad_norm": 6.947951205337137, "learning_rate": 3.454204198043334e-06, "loss": 17.5728, "step": 33484 }, { "epoch": 0.6120788930118632, "grad_norm": 5.653912609982626, "learning_rate": 3.4539226907273337e-06, "loss": 17.2366, "step": 33485 }, { "epoch": 0.6120971722083097, "grad_norm": 5.561782128067204, "learning_rate": 3.453641188830199e-06, "loss": 17.3959, "step": 33486 }, { "epoch": 0.6121154514047562, "grad_norm": 7.585819312422513, "learning_rate": 3.453359692352919e-06, "loss": 17.6198, "step": 33487 }, { "epoch": 0.6121337306012028, "grad_norm": 4.393686146532234, "learning_rate": 3.4530782012964794e-06, "loss": 16.825, "step": 33488 }, { "epoch": 0.6121520097976493, "grad_norm": 7.146918914970141, "learning_rate": 3.452796715661864e-06, "loss": 17.8182, "step": 33489 }, { "epoch": 0.6121702889940959, "grad_norm": 8.113899780103475, "learning_rate": 3.4525152354500645e-06, "loss": 18.0539, "step": 33490 }, { "epoch": 0.6121885681905423, "grad_norm": 6.400472504796722, "learning_rate": 3.452233760662065e-06, "loss": 17.814, "step": 33491 }, { "epoch": 0.6122068473869888, "grad_norm": 5.434466636580202, "learning_rate": 3.4519522912988497e-06, "loss": 17.1107, "step": 33492 }, { "epoch": 0.6122251265834354, "grad_norm": 6.532835565850863, "learning_rate": 3.451670827361408e-06, "loss": 17.4292, "step": 33493 }, { "epoch": 0.6122434057798819, "grad_norm": 8.01267873633243, "learning_rate": 3.4513893688507274e-06, "loss": 17.9304, "step": 33494 }, { "epoch": 0.6122616849763285, "grad_norm": 6.115727159007468, "learning_rate": 3.4511079157677895e-06, "loss": 17.1814, "step": 33495 }, { "epoch": 0.612279964172775, "grad_norm": 5.186963254730601, "learning_rate": 3.450826468113587e-06, "loss": 16.981, "step": 33496 }, { "epoch": 0.6122982433692215, "grad_norm": 6.139666797397002, "learning_rate": 3.450545025889101e-06, "loss": 17.2475, "step": 33497 }, { "epoch": 0.612316522565668, "grad_norm": 5.16454587278201, "learning_rate": 3.450263589095321e-06, "loss": 16.8679, "step": 33498 }, { "epoch": 0.6123348017621145, "grad_norm": 6.177268578089185, "learning_rate": 3.4499821577332336e-06, "loss": 17.3711, "step": 33499 }, { "epoch": 0.6123530809585611, "grad_norm": 5.062526369905888, "learning_rate": 3.449700731803823e-06, "loss": 17.1105, "step": 33500 }, { "epoch": 0.6123713601550076, "grad_norm": 5.9988035984409525, "learning_rate": 3.449419311308078e-06, "loss": 17.4951, "step": 33501 }, { "epoch": 0.6123896393514541, "grad_norm": 6.165585255751717, "learning_rate": 3.4491378962469833e-06, "loss": 17.2245, "step": 33502 }, { "epoch": 0.6124079185479007, "grad_norm": 7.133222760296182, "learning_rate": 3.4488564866215244e-06, "loss": 17.6645, "step": 33503 }, { "epoch": 0.6124261977443471, "grad_norm": 4.498920113243284, "learning_rate": 3.4485750824326906e-06, "loss": 16.9697, "step": 33504 }, { "epoch": 0.6124444769407937, "grad_norm": 7.799277288573851, "learning_rate": 3.4482936836814674e-06, "loss": 18.1799, "step": 33505 }, { "epoch": 0.6124627561372402, "grad_norm": 6.98696022235712, "learning_rate": 3.4480122903688373e-06, "loss": 17.7251, "step": 33506 }, { "epoch": 0.6124810353336867, "grad_norm": 5.094949051256138, "learning_rate": 3.4477309024957905e-06, "loss": 16.9392, "step": 33507 }, { "epoch": 0.6124993145301333, "grad_norm": 6.355564779675615, "learning_rate": 3.447449520063314e-06, "loss": 17.7738, "step": 33508 }, { "epoch": 0.6125175937265798, "grad_norm": 6.526048256289671, "learning_rate": 3.447168143072389e-06, "loss": 17.5833, "step": 33509 }, { "epoch": 0.6125358729230264, "grad_norm": 7.171868198252855, "learning_rate": 3.4468867715240084e-06, "loss": 17.6432, "step": 33510 }, { "epoch": 0.6125541521194728, "grad_norm": 5.255328056502128, "learning_rate": 3.4466054054191527e-06, "loss": 17.0356, "step": 33511 }, { "epoch": 0.6125724313159193, "grad_norm": 8.89858522556075, "learning_rate": 3.4463240447588097e-06, "loss": 18.0616, "step": 33512 }, { "epoch": 0.6125907105123659, "grad_norm": 5.47991997921583, "learning_rate": 3.4460426895439682e-06, "loss": 17.3045, "step": 33513 }, { "epoch": 0.6126089897088124, "grad_norm": 7.511600711567079, "learning_rate": 3.4457613397756096e-06, "loss": 18.1106, "step": 33514 }, { "epoch": 0.6126272689052589, "grad_norm": 8.327848222325912, "learning_rate": 3.4454799954547257e-06, "loss": 18.3226, "step": 33515 }, { "epoch": 0.6126455481017055, "grad_norm": 5.880665094977046, "learning_rate": 3.4451986565822977e-06, "loss": 17.4351, "step": 33516 }, { "epoch": 0.6126638272981519, "grad_norm": 6.128057021353619, "learning_rate": 3.4449173231593126e-06, "loss": 17.2243, "step": 33517 }, { "epoch": 0.6126821064945985, "grad_norm": 6.040658914161018, "learning_rate": 3.444635995186759e-06, "loss": 17.4323, "step": 33518 }, { "epoch": 0.612700385691045, "grad_norm": 6.106721356433863, "learning_rate": 3.4443546726656217e-06, "loss": 17.2813, "step": 33519 }, { "epoch": 0.6127186648874915, "grad_norm": 7.940683736497601, "learning_rate": 3.444073355596885e-06, "loss": 17.1814, "step": 33520 }, { "epoch": 0.6127369440839381, "grad_norm": 7.161321306196565, "learning_rate": 3.4437920439815363e-06, "loss": 17.5255, "step": 33521 }, { "epoch": 0.6127552232803846, "grad_norm": 6.818590911520417, "learning_rate": 3.4435107378205623e-06, "loss": 17.4996, "step": 33522 }, { "epoch": 0.6127735024768312, "grad_norm": 10.231007042117573, "learning_rate": 3.4432294371149465e-06, "loss": 17.8104, "step": 33523 }, { "epoch": 0.6127917816732776, "grad_norm": 8.792208937233635, "learning_rate": 3.4429481418656784e-06, "loss": 18.6582, "step": 33524 }, { "epoch": 0.6128100608697241, "grad_norm": 6.235885742103952, "learning_rate": 3.442666852073741e-06, "loss": 17.6067, "step": 33525 }, { "epoch": 0.6128283400661707, "grad_norm": 6.001095720245961, "learning_rate": 3.4423855677401197e-06, "loss": 17.3868, "step": 33526 }, { "epoch": 0.6128466192626172, "grad_norm": 5.346088436664141, "learning_rate": 3.442104288865805e-06, "loss": 17.0297, "step": 33527 }, { "epoch": 0.6128648984590638, "grad_norm": 5.487059234505055, "learning_rate": 3.4418230154517764e-06, "loss": 17.1768, "step": 33528 }, { "epoch": 0.6128831776555103, "grad_norm": 6.315944698033855, "learning_rate": 3.4415417474990256e-06, "loss": 17.3883, "step": 33529 }, { "epoch": 0.6129014568519567, "grad_norm": 5.7557746999187245, "learning_rate": 3.441260485008534e-06, "loss": 17.0535, "step": 33530 }, { "epoch": 0.6129197360484033, "grad_norm": 7.0190875878094054, "learning_rate": 3.440979227981288e-06, "loss": 17.6225, "step": 33531 }, { "epoch": 0.6129380152448498, "grad_norm": 5.978547445369314, "learning_rate": 3.4406979764182774e-06, "loss": 17.1911, "step": 33532 }, { "epoch": 0.6129562944412964, "grad_norm": 7.060999556459104, "learning_rate": 3.440416730320485e-06, "loss": 17.8819, "step": 33533 }, { "epoch": 0.6129745736377429, "grad_norm": 5.331804032259672, "learning_rate": 3.440135489688894e-06, "loss": 16.9289, "step": 33534 }, { "epoch": 0.6129928528341894, "grad_norm": 5.060695745136556, "learning_rate": 3.439854254524494e-06, "loss": 16.8253, "step": 33535 }, { "epoch": 0.613011132030636, "grad_norm": 6.132963352126794, "learning_rate": 3.4395730248282712e-06, "loss": 17.4043, "step": 33536 }, { "epoch": 0.6130294112270824, "grad_norm": 6.096670678014168, "learning_rate": 3.4392918006012068e-06, "loss": 17.4007, "step": 33537 }, { "epoch": 0.613047690423529, "grad_norm": 4.384829459761461, "learning_rate": 3.439010581844291e-06, "loss": 16.7712, "step": 33538 }, { "epoch": 0.6130659696199755, "grad_norm": 6.833511494569925, "learning_rate": 3.4387293685585073e-06, "loss": 17.814, "step": 33539 }, { "epoch": 0.613084248816422, "grad_norm": 5.490931270086343, "learning_rate": 3.43844816074484e-06, "loss": 17.0178, "step": 33540 }, { "epoch": 0.6131025280128686, "grad_norm": 8.71443057997227, "learning_rate": 3.438166958404279e-06, "loss": 18.5427, "step": 33541 }, { "epoch": 0.613120807209315, "grad_norm": 5.589700121464159, "learning_rate": 3.437885761537806e-06, "loss": 17.0839, "step": 33542 }, { "epoch": 0.6131390864057616, "grad_norm": 5.0285706856958345, "learning_rate": 3.4376045701464077e-06, "loss": 17.0692, "step": 33543 }, { "epoch": 0.6131573656022081, "grad_norm": 5.464526447973292, "learning_rate": 3.43732338423107e-06, "loss": 17.2267, "step": 33544 }, { "epoch": 0.6131756447986546, "grad_norm": 5.907943404866737, "learning_rate": 3.4370422037927777e-06, "loss": 17.3227, "step": 33545 }, { "epoch": 0.6131939239951012, "grad_norm": 6.717324312818704, "learning_rate": 3.436761028832518e-06, "loss": 17.6762, "step": 33546 }, { "epoch": 0.6132122031915477, "grad_norm": 5.981986932625501, "learning_rate": 3.436479859351275e-06, "loss": 17.4057, "step": 33547 }, { "epoch": 0.6132304823879943, "grad_norm": 5.886803760830247, "learning_rate": 3.4361986953500337e-06, "loss": 17.0325, "step": 33548 }, { "epoch": 0.6132487615844407, "grad_norm": 5.631551623053096, "learning_rate": 3.4359175368297802e-06, "loss": 17.3109, "step": 33549 }, { "epoch": 0.6132670407808872, "grad_norm": 7.636830653019367, "learning_rate": 3.435636383791502e-06, "loss": 17.9211, "step": 33550 }, { "epoch": 0.6132853199773338, "grad_norm": 6.425996021869818, "learning_rate": 3.43535523623618e-06, "loss": 17.6177, "step": 33551 }, { "epoch": 0.6133035991737803, "grad_norm": 7.689850800343031, "learning_rate": 3.4350740941648023e-06, "loss": 17.3774, "step": 33552 }, { "epoch": 0.6133218783702269, "grad_norm": 6.782078237571126, "learning_rate": 3.434792957578356e-06, "loss": 17.9185, "step": 33553 }, { "epoch": 0.6133401575666734, "grad_norm": 7.511800048133147, "learning_rate": 3.4345118264778225e-06, "loss": 17.1919, "step": 33554 }, { "epoch": 0.6133584367631199, "grad_norm": 5.667703466331562, "learning_rate": 3.4342307008641914e-06, "loss": 17.1599, "step": 33555 }, { "epoch": 0.6133767159595664, "grad_norm": 6.586622791774437, "learning_rate": 3.4339495807384443e-06, "loss": 17.3081, "step": 33556 }, { "epoch": 0.6133949951560129, "grad_norm": 7.454994939635533, "learning_rate": 3.4336684661015673e-06, "loss": 18.0999, "step": 33557 }, { "epoch": 0.6134132743524595, "grad_norm": 7.654229904288211, "learning_rate": 3.4333873569545484e-06, "loss": 17.77, "step": 33558 }, { "epoch": 0.613431553548906, "grad_norm": 6.39404562042181, "learning_rate": 3.433106253298369e-06, "loss": 17.5089, "step": 33559 }, { "epoch": 0.6134498327453525, "grad_norm": 5.673162921965931, "learning_rate": 3.4328251551340174e-06, "loss": 17.0403, "step": 33560 }, { "epoch": 0.6134681119417991, "grad_norm": 6.331482131494769, "learning_rate": 3.432544062462478e-06, "loss": 17.4439, "step": 33561 }, { "epoch": 0.6134863911382455, "grad_norm": 6.316679567287464, "learning_rate": 3.4322629752847337e-06, "loss": 17.3864, "step": 33562 }, { "epoch": 0.6135046703346921, "grad_norm": 6.936712921132725, "learning_rate": 3.4319818936017735e-06, "loss": 17.5743, "step": 33563 }, { "epoch": 0.6135229495311386, "grad_norm": 7.430454961327037, "learning_rate": 3.4317008174145815e-06, "loss": 17.977, "step": 33564 }, { "epoch": 0.6135412287275851, "grad_norm": 5.646401981217706, "learning_rate": 3.4314197467241396e-06, "loss": 17.2466, "step": 33565 }, { "epoch": 0.6135595079240317, "grad_norm": 8.245245707656927, "learning_rate": 3.431138681531436e-06, "loss": 17.4725, "step": 33566 }, { "epoch": 0.6135777871204782, "grad_norm": 7.483168507190047, "learning_rate": 3.4308576218374568e-06, "loss": 17.7313, "step": 33567 }, { "epoch": 0.6135960663169248, "grad_norm": 5.623083518757397, "learning_rate": 3.4305765676431835e-06, "loss": 17.1001, "step": 33568 }, { "epoch": 0.6136143455133712, "grad_norm": 5.789355932179123, "learning_rate": 3.4302955189496045e-06, "loss": 17.4564, "step": 33569 }, { "epoch": 0.6136326247098177, "grad_norm": 4.781702907749447, "learning_rate": 3.430014475757703e-06, "loss": 16.876, "step": 33570 }, { "epoch": 0.6136509039062643, "grad_norm": 5.965068794859001, "learning_rate": 3.429733438068463e-06, "loss": 16.8149, "step": 33571 }, { "epoch": 0.6136691831027108, "grad_norm": 5.656074715219629, "learning_rate": 3.429452405882874e-06, "loss": 17.2434, "step": 33572 }, { "epoch": 0.6136874622991574, "grad_norm": 6.416552032004863, "learning_rate": 3.4291713792019176e-06, "loss": 17.6646, "step": 33573 }, { "epoch": 0.6137057414956039, "grad_norm": 7.214103359318109, "learning_rate": 3.4288903580265764e-06, "loss": 17.8339, "step": 33574 }, { "epoch": 0.6137240206920503, "grad_norm": 6.679391610476708, "learning_rate": 3.4286093423578404e-06, "loss": 17.687, "step": 33575 }, { "epoch": 0.6137422998884969, "grad_norm": 7.61910119634523, "learning_rate": 3.4283283321966905e-06, "loss": 17.787, "step": 33576 }, { "epoch": 0.6137605790849434, "grad_norm": 5.095798430162822, "learning_rate": 3.4280473275441156e-06, "loss": 16.909, "step": 33577 }, { "epoch": 0.61377885828139, "grad_norm": 5.453887922821891, "learning_rate": 3.4277663284010988e-06, "loss": 17.1299, "step": 33578 }, { "epoch": 0.6137971374778365, "grad_norm": 6.828486444514746, "learning_rate": 3.427485334768622e-06, "loss": 17.5706, "step": 33579 }, { "epoch": 0.613815416674283, "grad_norm": 7.349835815801021, "learning_rate": 3.4272043466476734e-06, "loss": 17.5407, "step": 33580 }, { "epoch": 0.6138336958707296, "grad_norm": 9.99232512580679, "learning_rate": 3.426923364039239e-06, "loss": 18.6651, "step": 33581 }, { "epoch": 0.613851975067176, "grad_norm": 6.313275549533149, "learning_rate": 3.4266423869442988e-06, "loss": 17.5078, "step": 33582 }, { "epoch": 0.6138702542636225, "grad_norm": 10.507189709222576, "learning_rate": 3.4263614153638437e-06, "loss": 18.7375, "step": 33583 }, { "epoch": 0.6138885334600691, "grad_norm": 6.527820434074457, "learning_rate": 3.426080449298853e-06, "loss": 17.5812, "step": 33584 }, { "epoch": 0.6139068126565156, "grad_norm": 7.578266961357038, "learning_rate": 3.4257994887503134e-06, "loss": 17.9199, "step": 33585 }, { "epoch": 0.6139250918529622, "grad_norm": 7.931999344552257, "learning_rate": 3.425518533719212e-06, "loss": 17.893, "step": 33586 }, { "epoch": 0.6139433710494087, "grad_norm": 5.968161554932968, "learning_rate": 3.425237584206531e-06, "loss": 17.2465, "step": 33587 }, { "epoch": 0.6139616502458551, "grad_norm": 5.653013519275823, "learning_rate": 3.4249566402132546e-06, "loss": 17.0427, "step": 33588 }, { "epoch": 0.6139799294423017, "grad_norm": 5.636155970947004, "learning_rate": 3.4246757017403685e-06, "loss": 17.2924, "step": 33589 }, { "epoch": 0.6139982086387482, "grad_norm": 5.917722682539943, "learning_rate": 3.424394768788857e-06, "loss": 17.1953, "step": 33590 }, { "epoch": 0.6140164878351948, "grad_norm": 6.61550616517019, "learning_rate": 3.424113841359707e-06, "loss": 17.6491, "step": 33591 }, { "epoch": 0.6140347670316413, "grad_norm": 6.481309390128452, "learning_rate": 3.423832919453901e-06, "loss": 17.9994, "step": 33592 }, { "epoch": 0.6140530462280878, "grad_norm": 5.726540735868971, "learning_rate": 3.4235520030724224e-06, "loss": 17.0648, "step": 33593 }, { "epoch": 0.6140713254245344, "grad_norm": 6.736986911546465, "learning_rate": 3.423271092216257e-06, "loss": 17.6365, "step": 33594 }, { "epoch": 0.6140896046209808, "grad_norm": 7.520993108623757, "learning_rate": 3.422990186886392e-06, "loss": 17.7699, "step": 33595 }, { "epoch": 0.6141078838174274, "grad_norm": 6.600249519271665, "learning_rate": 3.4227092870838065e-06, "loss": 17.5758, "step": 33596 }, { "epoch": 0.6141261630138739, "grad_norm": 7.175723956403929, "learning_rate": 3.422428392809491e-06, "loss": 17.6863, "step": 33597 }, { "epoch": 0.6141444422103204, "grad_norm": 5.57622447749873, "learning_rate": 3.4221475040644255e-06, "loss": 17.2072, "step": 33598 }, { "epoch": 0.614162721406767, "grad_norm": 5.3213592224566675, "learning_rate": 3.4218666208495955e-06, "loss": 17.1455, "step": 33599 }, { "epoch": 0.6141810006032135, "grad_norm": 7.538222824014304, "learning_rate": 3.421585743165987e-06, "loss": 17.9702, "step": 33600 }, { "epoch": 0.61419927979966, "grad_norm": 5.640495050469788, "learning_rate": 3.4213048710145835e-06, "loss": 17.3511, "step": 33601 }, { "epoch": 0.6142175589961065, "grad_norm": 6.314179891912364, "learning_rate": 3.4210240043963693e-06, "loss": 17.4131, "step": 33602 }, { "epoch": 0.614235838192553, "grad_norm": 4.742497291452268, "learning_rate": 3.420743143312329e-06, "loss": 16.9192, "step": 33603 }, { "epoch": 0.6142541173889996, "grad_norm": 6.856413971518887, "learning_rate": 3.4204622877634454e-06, "loss": 17.752, "step": 33604 }, { "epoch": 0.6142723965854461, "grad_norm": 6.161820607850231, "learning_rate": 3.420181437750707e-06, "loss": 17.3846, "step": 33605 }, { "epoch": 0.6142906757818927, "grad_norm": 6.535576279029979, "learning_rate": 3.419900593275094e-06, "loss": 17.399, "step": 33606 }, { "epoch": 0.6143089549783392, "grad_norm": 7.01017534390237, "learning_rate": 3.419619754337593e-06, "loss": 17.4901, "step": 33607 }, { "epoch": 0.6143272341747856, "grad_norm": 10.115577570560284, "learning_rate": 3.419338920939187e-06, "loss": 17.6945, "step": 33608 }, { "epoch": 0.6143455133712322, "grad_norm": 5.319567178610547, "learning_rate": 3.4190580930808624e-06, "loss": 17.0579, "step": 33609 }, { "epoch": 0.6143637925676787, "grad_norm": 5.891277742212165, "learning_rate": 3.4187772707636003e-06, "loss": 17.5574, "step": 33610 }, { "epoch": 0.6143820717641253, "grad_norm": 6.710260046223448, "learning_rate": 3.4184964539883868e-06, "loss": 17.9136, "step": 33611 }, { "epoch": 0.6144003509605718, "grad_norm": 6.478554342631326, "learning_rate": 3.4182156427562072e-06, "loss": 17.7232, "step": 33612 }, { "epoch": 0.6144186301570183, "grad_norm": 7.037431993374773, "learning_rate": 3.4179348370680426e-06, "loss": 17.8416, "step": 33613 }, { "epoch": 0.6144369093534648, "grad_norm": 7.204169700266841, "learning_rate": 3.4176540369248812e-06, "loss": 18.0632, "step": 33614 }, { "epoch": 0.6144551885499113, "grad_norm": 8.107680793464407, "learning_rate": 3.417373242327704e-06, "loss": 18.1117, "step": 33615 }, { "epoch": 0.6144734677463579, "grad_norm": 4.490347139896884, "learning_rate": 3.417092453277495e-06, "loss": 16.7234, "step": 33616 }, { "epoch": 0.6144917469428044, "grad_norm": 6.308181009001396, "learning_rate": 3.416811669775242e-06, "loss": 17.2926, "step": 33617 }, { "epoch": 0.6145100261392509, "grad_norm": 6.9322156477621375, "learning_rate": 3.416530891821927e-06, "loss": 17.5664, "step": 33618 }, { "epoch": 0.6145283053356975, "grad_norm": 6.479457733134518, "learning_rate": 3.4162501194185315e-06, "loss": 17.7101, "step": 33619 }, { "epoch": 0.614546584532144, "grad_norm": 8.987074302922334, "learning_rate": 3.415969352566043e-06, "loss": 18.0003, "step": 33620 }, { "epoch": 0.6145648637285905, "grad_norm": 5.663532620002127, "learning_rate": 3.415688591265443e-06, "loss": 17.118, "step": 33621 }, { "epoch": 0.614583142925037, "grad_norm": 7.397669208146583, "learning_rate": 3.415407835517719e-06, "loss": 18.3065, "step": 33622 }, { "epoch": 0.6146014221214835, "grad_norm": 5.807564667036166, "learning_rate": 3.4151270853238538e-06, "loss": 17.1384, "step": 33623 }, { "epoch": 0.6146197013179301, "grad_norm": 7.671072110234201, "learning_rate": 3.414846340684828e-06, "loss": 18.3373, "step": 33624 }, { "epoch": 0.6146379805143766, "grad_norm": 6.286283848771324, "learning_rate": 3.4145656016016292e-06, "loss": 17.5154, "step": 33625 }, { "epoch": 0.6146562597108232, "grad_norm": 6.860419112621343, "learning_rate": 3.4142848680752418e-06, "loss": 17.9014, "step": 33626 }, { "epoch": 0.6146745389072696, "grad_norm": 5.6484226225709415, "learning_rate": 3.414004140106646e-06, "loss": 17.1453, "step": 33627 }, { "epoch": 0.6146928181037161, "grad_norm": 6.023610713404971, "learning_rate": 3.4137234176968305e-06, "loss": 17.0967, "step": 33628 }, { "epoch": 0.6147110973001627, "grad_norm": 6.270171914183166, "learning_rate": 3.413442700846775e-06, "loss": 17.4958, "step": 33629 }, { "epoch": 0.6147293764966092, "grad_norm": 6.214798130921459, "learning_rate": 3.413161989557465e-06, "loss": 17.2152, "step": 33630 }, { "epoch": 0.6147476556930558, "grad_norm": 6.146455975631378, "learning_rate": 3.4128812838298864e-06, "loss": 17.4514, "step": 33631 }, { "epoch": 0.6147659348895023, "grad_norm": 10.624190061349552, "learning_rate": 3.4126005836650207e-06, "loss": 17.4291, "step": 33632 }, { "epoch": 0.6147842140859487, "grad_norm": 6.819113491893822, "learning_rate": 3.41231988906385e-06, "loss": 17.6207, "step": 33633 }, { "epoch": 0.6148024932823953, "grad_norm": 4.8811187640653095, "learning_rate": 3.4120392000273617e-06, "loss": 16.9752, "step": 33634 }, { "epoch": 0.6148207724788418, "grad_norm": 8.05988514345823, "learning_rate": 3.4117585165565375e-06, "loss": 18.3848, "step": 33635 }, { "epoch": 0.6148390516752884, "grad_norm": 7.267540963292214, "learning_rate": 3.4114778386523638e-06, "loss": 17.3684, "step": 33636 }, { "epoch": 0.6148573308717349, "grad_norm": 5.80311437866112, "learning_rate": 3.4111971663158223e-06, "loss": 17.2109, "step": 33637 }, { "epoch": 0.6148756100681814, "grad_norm": 8.367575451633726, "learning_rate": 3.410916499547895e-06, "loss": 18.0875, "step": 33638 }, { "epoch": 0.614893889264628, "grad_norm": 6.7139166520748725, "learning_rate": 3.4106358383495673e-06, "loss": 17.4985, "step": 33639 }, { "epoch": 0.6149121684610744, "grad_norm": 5.525320692071929, "learning_rate": 3.4103551827218256e-06, "loss": 17.1552, "step": 33640 }, { "epoch": 0.614930447657521, "grad_norm": 5.56339692353567, "learning_rate": 3.410074532665648e-06, "loss": 17.0912, "step": 33641 }, { "epoch": 0.6149487268539675, "grad_norm": 6.841406228889831, "learning_rate": 3.4097938881820242e-06, "loss": 17.6549, "step": 33642 }, { "epoch": 0.614967006050414, "grad_norm": 6.403111623017766, "learning_rate": 3.4095132492719334e-06, "loss": 17.5869, "step": 33643 }, { "epoch": 0.6149852852468606, "grad_norm": 6.213474414827, "learning_rate": 3.40923261593636e-06, "loss": 17.5311, "step": 33644 }, { "epoch": 0.6150035644433071, "grad_norm": 6.763496731722575, "learning_rate": 3.4089519881762894e-06, "loss": 17.6386, "step": 33645 }, { "epoch": 0.6150218436397537, "grad_norm": 6.931819658892937, "learning_rate": 3.4086713659927047e-06, "loss": 17.8505, "step": 33646 }, { "epoch": 0.6150401228362001, "grad_norm": 5.537051969411984, "learning_rate": 3.4083907493865863e-06, "loss": 17.0909, "step": 33647 }, { "epoch": 0.6150584020326466, "grad_norm": 6.302657761951602, "learning_rate": 3.408110138358921e-06, "loss": 17.3969, "step": 33648 }, { "epoch": 0.6150766812290932, "grad_norm": 5.582878645051753, "learning_rate": 3.4078295329106936e-06, "loss": 16.907, "step": 33649 }, { "epoch": 0.6150949604255397, "grad_norm": 5.881194889767607, "learning_rate": 3.4075489330428822e-06, "loss": 17.3537, "step": 33650 }, { "epoch": 0.6151132396219862, "grad_norm": 5.559919218964131, "learning_rate": 3.4072683387564763e-06, "loss": 17.065, "step": 33651 }, { "epoch": 0.6151315188184328, "grad_norm": 8.989461757788883, "learning_rate": 3.406987750052455e-06, "loss": 18.2282, "step": 33652 }, { "epoch": 0.6151497980148792, "grad_norm": 6.399089904683625, "learning_rate": 3.4067071669318043e-06, "loss": 17.4253, "step": 33653 }, { "epoch": 0.6151680772113258, "grad_norm": 6.625174106586653, "learning_rate": 3.406426589395507e-06, "loss": 17.6791, "step": 33654 }, { "epoch": 0.6151863564077723, "grad_norm": 6.05371711127474, "learning_rate": 3.4061460174445447e-06, "loss": 17.3873, "step": 33655 }, { "epoch": 0.6152046356042188, "grad_norm": 7.775040550308248, "learning_rate": 3.4058654510799044e-06, "loss": 18.1276, "step": 33656 }, { "epoch": 0.6152229148006654, "grad_norm": 6.081644661228197, "learning_rate": 3.405584890302566e-06, "loss": 17.1227, "step": 33657 }, { "epoch": 0.6152411939971119, "grad_norm": 5.988494282969245, "learning_rate": 3.4053043351135134e-06, "loss": 17.5129, "step": 33658 }, { "epoch": 0.6152594731935584, "grad_norm": 6.818839473919781, "learning_rate": 3.4050237855137325e-06, "loss": 17.8664, "step": 33659 }, { "epoch": 0.6152777523900049, "grad_norm": 6.746168747516521, "learning_rate": 3.404743241504206e-06, "loss": 17.2637, "step": 33660 }, { "epoch": 0.6152960315864514, "grad_norm": 7.271784727567482, "learning_rate": 3.404462703085912e-06, "loss": 17.6072, "step": 33661 }, { "epoch": 0.615314310782898, "grad_norm": 8.03709874565724, "learning_rate": 3.4041821702598403e-06, "loss": 18.078, "step": 33662 }, { "epoch": 0.6153325899793445, "grad_norm": 5.831412705102963, "learning_rate": 3.4039016430269723e-06, "loss": 17.2782, "step": 33663 }, { "epoch": 0.6153508691757911, "grad_norm": 7.967555424626837, "learning_rate": 3.4036211213882886e-06, "loss": 18.1298, "step": 33664 }, { "epoch": 0.6153691483722376, "grad_norm": 6.684562057347378, "learning_rate": 3.4033406053447758e-06, "loss": 17.538, "step": 33665 }, { "epoch": 0.615387427568684, "grad_norm": 7.076081311263104, "learning_rate": 3.403060094897414e-06, "loss": 17.252, "step": 33666 }, { "epoch": 0.6154057067651306, "grad_norm": 5.915058605927716, "learning_rate": 3.402779590047189e-06, "loss": 17.191, "step": 33667 }, { "epoch": 0.6154239859615771, "grad_norm": 8.337927487437755, "learning_rate": 3.402499090795084e-06, "loss": 18.1392, "step": 33668 }, { "epoch": 0.6154422651580237, "grad_norm": 6.840108897481351, "learning_rate": 3.402218597142079e-06, "loss": 17.3562, "step": 33669 }, { "epoch": 0.6154605443544702, "grad_norm": 5.853756139813298, "learning_rate": 3.4019381090891613e-06, "loss": 17.2374, "step": 33670 }, { "epoch": 0.6154788235509167, "grad_norm": 6.409237517437243, "learning_rate": 3.4016576266373103e-06, "loss": 18.1149, "step": 33671 }, { "epoch": 0.6154971027473632, "grad_norm": 6.7608654186703765, "learning_rate": 3.4013771497875104e-06, "loss": 17.3873, "step": 33672 }, { "epoch": 0.6155153819438097, "grad_norm": 6.500736312965409, "learning_rate": 3.401096678540746e-06, "loss": 17.5152, "step": 33673 }, { "epoch": 0.6155336611402563, "grad_norm": 8.410961290971807, "learning_rate": 3.400816212897999e-06, "loss": 17.8202, "step": 33674 }, { "epoch": 0.6155519403367028, "grad_norm": 6.72612854646635, "learning_rate": 3.4005357528602517e-06, "loss": 17.6052, "step": 33675 }, { "epoch": 0.6155702195331493, "grad_norm": 7.266783550602568, "learning_rate": 3.4002552984284874e-06, "loss": 17.5764, "step": 33676 }, { "epoch": 0.6155884987295959, "grad_norm": 5.919952349908665, "learning_rate": 3.3999748496036916e-06, "loss": 17.4397, "step": 33677 }, { "epoch": 0.6156067779260423, "grad_norm": 7.045567029053421, "learning_rate": 3.3996944063868426e-06, "loss": 17.6429, "step": 33678 }, { "epoch": 0.6156250571224889, "grad_norm": 6.050976163970293, "learning_rate": 3.3994139687789267e-06, "loss": 17.3355, "step": 33679 }, { "epoch": 0.6156433363189354, "grad_norm": 6.457876797629926, "learning_rate": 3.399133536780927e-06, "loss": 17.3371, "step": 33680 }, { "epoch": 0.6156616155153819, "grad_norm": 5.717709197079271, "learning_rate": 3.3988531103938234e-06, "loss": 17.0125, "step": 33681 }, { "epoch": 0.6156798947118285, "grad_norm": 9.324884622782793, "learning_rate": 3.398572689618603e-06, "loss": 18.3129, "step": 33682 }, { "epoch": 0.615698173908275, "grad_norm": 6.240739145229977, "learning_rate": 3.3982922744562436e-06, "loss": 17.5379, "step": 33683 }, { "epoch": 0.6157164531047216, "grad_norm": 6.411747627422969, "learning_rate": 3.398011864907732e-06, "loss": 17.6546, "step": 33684 }, { "epoch": 0.615734732301168, "grad_norm": 6.38490370094364, "learning_rate": 3.3977314609740513e-06, "loss": 17.1938, "step": 33685 }, { "epoch": 0.6157530114976145, "grad_norm": 7.759009575865244, "learning_rate": 3.3974510626561806e-06, "loss": 18.2199, "step": 33686 }, { "epoch": 0.6157712906940611, "grad_norm": 5.76142252765922, "learning_rate": 3.3971706699551064e-06, "loss": 17.5591, "step": 33687 }, { "epoch": 0.6157895698905076, "grad_norm": 8.851223187900137, "learning_rate": 3.3968902828718097e-06, "loss": 17.6325, "step": 33688 }, { "epoch": 0.6158078490869542, "grad_norm": 6.728887423906832, "learning_rate": 3.3966099014072717e-06, "loss": 17.5466, "step": 33689 }, { "epoch": 0.6158261282834007, "grad_norm": 5.594235223633864, "learning_rate": 3.3963295255624797e-06, "loss": 17.2054, "step": 33690 }, { "epoch": 0.6158444074798471, "grad_norm": 6.704129741115412, "learning_rate": 3.396049155338413e-06, "loss": 17.5922, "step": 33691 }, { "epoch": 0.6158626866762937, "grad_norm": 7.0582571767710895, "learning_rate": 3.3957687907360524e-06, "loss": 18.0568, "step": 33692 }, { "epoch": 0.6158809658727402, "grad_norm": 6.021629562479973, "learning_rate": 3.3954884317563848e-06, "loss": 17.2586, "step": 33693 }, { "epoch": 0.6158992450691868, "grad_norm": 8.487326783942171, "learning_rate": 3.3952080784003916e-06, "loss": 17.9148, "step": 33694 }, { "epoch": 0.6159175242656333, "grad_norm": 7.365960973096604, "learning_rate": 3.394927730669053e-06, "loss": 17.6419, "step": 33695 }, { "epoch": 0.6159358034620798, "grad_norm": 8.271054263457042, "learning_rate": 3.3946473885633553e-06, "loss": 18.1178, "step": 33696 }, { "epoch": 0.6159540826585264, "grad_norm": 6.305356914445224, "learning_rate": 3.3943670520842768e-06, "loss": 17.4871, "step": 33697 }, { "epoch": 0.6159723618549728, "grad_norm": 7.241083475566109, "learning_rate": 3.3940867212328043e-06, "loss": 17.652, "step": 33698 }, { "epoch": 0.6159906410514194, "grad_norm": 5.717203429442915, "learning_rate": 3.3938063960099187e-06, "loss": 17.2612, "step": 33699 }, { "epoch": 0.6160089202478659, "grad_norm": 7.92800861455978, "learning_rate": 3.393526076416601e-06, "loss": 17.5702, "step": 33700 }, { "epoch": 0.6160271994443124, "grad_norm": 5.840500481286785, "learning_rate": 3.393245762453836e-06, "loss": 17.394, "step": 33701 }, { "epoch": 0.616045478640759, "grad_norm": 6.81061681349336, "learning_rate": 3.3929654541226053e-06, "loss": 17.5795, "step": 33702 }, { "epoch": 0.6160637578372055, "grad_norm": 6.102307554124693, "learning_rate": 3.3926851514238893e-06, "loss": 17.4309, "step": 33703 }, { "epoch": 0.616082037033652, "grad_norm": 6.89711147801807, "learning_rate": 3.392404854358675e-06, "loss": 17.8972, "step": 33704 }, { "epoch": 0.6161003162300985, "grad_norm": 5.626161247278588, "learning_rate": 3.392124562927942e-06, "loss": 17.1095, "step": 33705 }, { "epoch": 0.616118595426545, "grad_norm": 6.397343150474195, "learning_rate": 3.391844277132671e-06, "loss": 17.8565, "step": 33706 }, { "epoch": 0.6161368746229916, "grad_norm": 7.018078734237715, "learning_rate": 3.3915639969738474e-06, "loss": 17.6924, "step": 33707 }, { "epoch": 0.6161551538194381, "grad_norm": 6.385274462726961, "learning_rate": 3.391283722452453e-06, "loss": 17.2737, "step": 33708 }, { "epoch": 0.6161734330158847, "grad_norm": 6.1168606244953505, "learning_rate": 3.3910034535694682e-06, "loss": 17.1682, "step": 33709 }, { "epoch": 0.6161917122123312, "grad_norm": 8.082006870348408, "learning_rate": 3.3907231903258773e-06, "loss": 17.6361, "step": 33710 }, { "epoch": 0.6162099914087776, "grad_norm": 5.533401158286352, "learning_rate": 3.3904429327226623e-06, "loss": 17.0964, "step": 33711 }, { "epoch": 0.6162282706052242, "grad_norm": 5.679836282722316, "learning_rate": 3.3901626807608036e-06, "loss": 17.2692, "step": 33712 }, { "epoch": 0.6162465498016707, "grad_norm": 5.7187669981637885, "learning_rate": 3.389882434441287e-06, "loss": 17.27, "step": 33713 }, { "epoch": 0.6162648289981173, "grad_norm": 7.001424601838368, "learning_rate": 3.3896021937650913e-06, "loss": 17.5372, "step": 33714 }, { "epoch": 0.6162831081945638, "grad_norm": 5.867022174411802, "learning_rate": 3.3893219587332016e-06, "loss": 17.3431, "step": 33715 }, { "epoch": 0.6163013873910103, "grad_norm": 5.792913244529532, "learning_rate": 3.3890417293465976e-06, "loss": 17.1296, "step": 33716 }, { "epoch": 0.6163196665874568, "grad_norm": 9.165083818227414, "learning_rate": 3.388761505606262e-06, "loss": 18.3176, "step": 33717 }, { "epoch": 0.6163379457839033, "grad_norm": 6.24474718180163, "learning_rate": 3.388481287513179e-06, "loss": 17.3481, "step": 33718 }, { "epoch": 0.6163562249803498, "grad_norm": 6.552259365860015, "learning_rate": 3.38820107506833e-06, "loss": 17.3023, "step": 33719 }, { "epoch": 0.6163745041767964, "grad_norm": 6.972196239656714, "learning_rate": 3.387920868272694e-06, "loss": 17.4774, "step": 33720 }, { "epoch": 0.6163927833732429, "grad_norm": 6.615473916335535, "learning_rate": 3.3876406671272555e-06, "loss": 17.4495, "step": 33721 }, { "epoch": 0.6164110625696895, "grad_norm": 5.158164114003307, "learning_rate": 3.387360471632999e-06, "loss": 16.9933, "step": 33722 }, { "epoch": 0.616429341766136, "grad_norm": 5.286670737102448, "learning_rate": 3.387080281790901e-06, "loss": 17.1, "step": 33723 }, { "epoch": 0.6164476209625824, "grad_norm": 5.03631923086363, "learning_rate": 3.386800097601949e-06, "loss": 16.966, "step": 33724 }, { "epoch": 0.616465900159029, "grad_norm": 5.9240158714088444, "learning_rate": 3.3865199190671223e-06, "loss": 17.38, "step": 33725 }, { "epoch": 0.6164841793554755, "grad_norm": 6.912112889599772, "learning_rate": 3.386239746187402e-06, "loss": 17.809, "step": 33726 }, { "epoch": 0.6165024585519221, "grad_norm": 6.633876046530853, "learning_rate": 3.3859595789637727e-06, "loss": 17.6056, "step": 33727 }, { "epoch": 0.6165207377483686, "grad_norm": 7.733904723966469, "learning_rate": 3.3856794173972134e-06, "loss": 18.2211, "step": 33728 }, { "epoch": 0.616539016944815, "grad_norm": 6.625261483480516, "learning_rate": 3.38539926148871e-06, "loss": 17.4432, "step": 33729 }, { "epoch": 0.6165572961412616, "grad_norm": 8.410991962019164, "learning_rate": 3.3851191112392406e-06, "loss": 18.2393, "step": 33730 }, { "epoch": 0.6165755753377081, "grad_norm": 6.217486298993511, "learning_rate": 3.3848389666497876e-06, "loss": 17.1333, "step": 33731 }, { "epoch": 0.6165938545341547, "grad_norm": 5.345456957668284, "learning_rate": 3.384558827721336e-06, "loss": 17.2437, "step": 33732 }, { "epoch": 0.6166121337306012, "grad_norm": 6.263415600019224, "learning_rate": 3.3842786944548643e-06, "loss": 17.3637, "step": 33733 }, { "epoch": 0.6166304129270477, "grad_norm": 5.8873325100818725, "learning_rate": 3.3839985668513564e-06, "loss": 17.1947, "step": 33734 }, { "epoch": 0.6166486921234943, "grad_norm": 6.2823201144984555, "learning_rate": 3.3837184449117927e-06, "loss": 17.4363, "step": 33735 }, { "epoch": 0.6166669713199407, "grad_norm": 7.160943862467161, "learning_rate": 3.383438328637156e-06, "loss": 17.9832, "step": 33736 }, { "epoch": 0.6166852505163873, "grad_norm": 6.244858842243383, "learning_rate": 3.383158218028427e-06, "loss": 17.3702, "step": 33737 }, { "epoch": 0.6167035297128338, "grad_norm": 6.5771371736064355, "learning_rate": 3.3828781130865883e-06, "loss": 17.5226, "step": 33738 }, { "epoch": 0.6167218089092803, "grad_norm": 9.035899336242947, "learning_rate": 3.3825980138126226e-06, "loss": 18.3179, "step": 33739 }, { "epoch": 0.6167400881057269, "grad_norm": 6.2355557911653685, "learning_rate": 3.3823179202075087e-06, "loss": 17.4786, "step": 33740 }, { "epoch": 0.6167583673021734, "grad_norm": 6.201730488597781, "learning_rate": 3.3820378322722317e-06, "loss": 17.019, "step": 33741 }, { "epoch": 0.61677664649862, "grad_norm": 6.858798208095397, "learning_rate": 3.3817577500077713e-06, "loss": 17.8089, "step": 33742 }, { "epoch": 0.6167949256950664, "grad_norm": 7.0959901850183975, "learning_rate": 3.381477673415108e-06, "loss": 17.8536, "step": 33743 }, { "epoch": 0.6168132048915129, "grad_norm": 5.48238919586113, "learning_rate": 3.3811976024952275e-06, "loss": 17.212, "step": 33744 }, { "epoch": 0.6168314840879595, "grad_norm": 8.734789648551645, "learning_rate": 3.3809175372491065e-06, "loss": 17.8762, "step": 33745 }, { "epoch": 0.616849763284406, "grad_norm": 6.058719771057981, "learning_rate": 3.380637477677731e-06, "loss": 17.3152, "step": 33746 }, { "epoch": 0.6168680424808526, "grad_norm": 5.822860466482788, "learning_rate": 3.38035742378208e-06, "loss": 17.2709, "step": 33747 }, { "epoch": 0.6168863216772991, "grad_norm": 5.80897695351794, "learning_rate": 3.3800773755631344e-06, "loss": 17.058, "step": 33748 }, { "epoch": 0.6169046008737455, "grad_norm": 6.583969103856138, "learning_rate": 3.379797333021879e-06, "loss": 17.6277, "step": 33749 }, { "epoch": 0.6169228800701921, "grad_norm": 7.561586319299126, "learning_rate": 3.3795172961592935e-06, "loss": 17.453, "step": 33750 }, { "epoch": 0.6169411592666386, "grad_norm": 4.851984585799251, "learning_rate": 3.3792372649763574e-06, "loss": 16.7055, "step": 33751 }, { "epoch": 0.6169594384630852, "grad_norm": 5.862577273284898, "learning_rate": 3.3789572394740544e-06, "loss": 17.1603, "step": 33752 }, { "epoch": 0.6169777176595317, "grad_norm": 5.259769351601194, "learning_rate": 3.3786772196533678e-06, "loss": 16.9425, "step": 33753 }, { "epoch": 0.6169959968559782, "grad_norm": 8.650335702464744, "learning_rate": 3.3783972055152735e-06, "loss": 18.7767, "step": 33754 }, { "epoch": 0.6170142760524248, "grad_norm": 6.078482052172343, "learning_rate": 3.3781171970607597e-06, "loss": 17.5161, "step": 33755 }, { "epoch": 0.6170325552488712, "grad_norm": 5.784942901009961, "learning_rate": 3.377837194290802e-06, "loss": 16.9686, "step": 33756 }, { "epoch": 0.6170508344453178, "grad_norm": 6.948214055205548, "learning_rate": 3.3775571972063835e-06, "loss": 17.7852, "step": 33757 }, { "epoch": 0.6170691136417643, "grad_norm": 6.663209940027495, "learning_rate": 3.377277205808489e-06, "loss": 18.063, "step": 33758 }, { "epoch": 0.6170873928382108, "grad_norm": 6.710956319870159, "learning_rate": 3.3769972200980946e-06, "loss": 17.739, "step": 33759 }, { "epoch": 0.6171056720346574, "grad_norm": 6.421522736106094, "learning_rate": 3.376717240076186e-06, "loss": 17.6227, "step": 33760 }, { "epoch": 0.6171239512311039, "grad_norm": 5.933011729643572, "learning_rate": 3.3764372657437415e-06, "loss": 17.6487, "step": 33761 }, { "epoch": 0.6171422304275505, "grad_norm": 5.7489314056962595, "learning_rate": 3.376157297101743e-06, "loss": 17.0438, "step": 33762 }, { "epoch": 0.6171605096239969, "grad_norm": 5.920525511696153, "learning_rate": 3.375877334151174e-06, "loss": 17.3965, "step": 33763 }, { "epoch": 0.6171787888204434, "grad_norm": 5.847059304238006, "learning_rate": 3.375597376893014e-06, "loss": 17.2793, "step": 33764 }, { "epoch": 0.61719706801689, "grad_norm": 5.716115125714951, "learning_rate": 3.375317425328242e-06, "loss": 17.3258, "step": 33765 }, { "epoch": 0.6172153472133365, "grad_norm": 5.8332528926520695, "learning_rate": 3.375037479457842e-06, "loss": 17.1235, "step": 33766 }, { "epoch": 0.6172336264097831, "grad_norm": 7.190306806496085, "learning_rate": 3.374757539282797e-06, "loss": 17.7354, "step": 33767 }, { "epoch": 0.6172519056062296, "grad_norm": 5.531460984234732, "learning_rate": 3.374477604804083e-06, "loss": 17.1706, "step": 33768 }, { "epoch": 0.617270184802676, "grad_norm": 6.61954517233647, "learning_rate": 3.374197676022686e-06, "loss": 17.5748, "step": 33769 }, { "epoch": 0.6172884639991226, "grad_norm": 5.929138410552096, "learning_rate": 3.373917752939584e-06, "loss": 17.1403, "step": 33770 }, { "epoch": 0.6173067431955691, "grad_norm": 6.623692555489119, "learning_rate": 3.373637835555758e-06, "loss": 17.3504, "step": 33771 }, { "epoch": 0.6173250223920157, "grad_norm": 7.328711312520536, "learning_rate": 3.3733579238721925e-06, "loss": 18.0836, "step": 33772 }, { "epoch": 0.6173433015884622, "grad_norm": 7.090106559461721, "learning_rate": 3.3730780178898646e-06, "loss": 17.6887, "step": 33773 }, { "epoch": 0.6173615807849087, "grad_norm": 6.020558209711504, "learning_rate": 3.372798117609759e-06, "loss": 17.1833, "step": 33774 }, { "epoch": 0.6173798599813553, "grad_norm": 6.244632239144283, "learning_rate": 3.3725182230328535e-06, "loss": 17.3182, "step": 33775 }, { "epoch": 0.6173981391778017, "grad_norm": 6.697265059615209, "learning_rate": 3.3722383341601295e-06, "loss": 17.5295, "step": 33776 }, { "epoch": 0.6174164183742483, "grad_norm": 7.418045704973743, "learning_rate": 3.3719584509925705e-06, "loss": 17.7884, "step": 33777 }, { "epoch": 0.6174346975706948, "grad_norm": 6.292706287824222, "learning_rate": 3.3716785735311565e-06, "loss": 17.3184, "step": 33778 }, { "epoch": 0.6174529767671413, "grad_norm": 6.235461707765109, "learning_rate": 3.371398701776866e-06, "loss": 17.3405, "step": 33779 }, { "epoch": 0.6174712559635879, "grad_norm": 6.211566146798512, "learning_rate": 3.3711188357306816e-06, "loss": 17.4374, "step": 33780 }, { "epoch": 0.6174895351600344, "grad_norm": 5.127032182893314, "learning_rate": 3.370838975393586e-06, "loss": 17.2686, "step": 33781 }, { "epoch": 0.6175078143564809, "grad_norm": 6.002246896177078, "learning_rate": 3.3705591207665567e-06, "loss": 17.1014, "step": 33782 }, { "epoch": 0.6175260935529274, "grad_norm": 6.3005269826004495, "learning_rate": 3.370279271850578e-06, "loss": 17.1673, "step": 33783 }, { "epoch": 0.6175443727493739, "grad_norm": 5.95254559032349, "learning_rate": 3.3699994286466275e-06, "loss": 17.4523, "step": 33784 }, { "epoch": 0.6175626519458205, "grad_norm": 6.42033498075399, "learning_rate": 3.3697195911556873e-06, "loss": 17.3215, "step": 33785 }, { "epoch": 0.617580931142267, "grad_norm": 4.947147709699027, "learning_rate": 3.3694397593787404e-06, "loss": 16.9891, "step": 33786 }, { "epoch": 0.6175992103387135, "grad_norm": 6.274355928758234, "learning_rate": 3.369159933316766e-06, "loss": 17.2747, "step": 33787 }, { "epoch": 0.61761748953516, "grad_norm": 6.317851729817672, "learning_rate": 3.3688801129707416e-06, "loss": 17.4259, "step": 33788 }, { "epoch": 0.6176357687316065, "grad_norm": 7.833400508828423, "learning_rate": 3.3686002983416523e-06, "loss": 17.7928, "step": 33789 }, { "epoch": 0.6176540479280531, "grad_norm": 5.719578773115884, "learning_rate": 3.3683204894304762e-06, "loss": 17.1618, "step": 33790 }, { "epoch": 0.6176723271244996, "grad_norm": 7.491070180256352, "learning_rate": 3.3680406862381977e-06, "loss": 17.5129, "step": 33791 }, { "epoch": 0.6176906063209461, "grad_norm": 6.111409056669226, "learning_rate": 3.367760888765794e-06, "loss": 17.2106, "step": 33792 }, { "epoch": 0.6177088855173927, "grad_norm": 6.1254382015863005, "learning_rate": 3.3674810970142457e-06, "loss": 17.5129, "step": 33793 }, { "epoch": 0.6177271647138391, "grad_norm": 6.312425272555373, "learning_rate": 3.3672013109845348e-06, "loss": 17.415, "step": 33794 }, { "epoch": 0.6177454439102857, "grad_norm": 5.839472335634778, "learning_rate": 3.366921530677643e-06, "loss": 17.1958, "step": 33795 }, { "epoch": 0.6177637231067322, "grad_norm": 6.434416475155521, "learning_rate": 3.3666417560945475e-06, "loss": 17.6849, "step": 33796 }, { "epoch": 0.6177820023031787, "grad_norm": 6.257714409029044, "learning_rate": 3.366361987236233e-06, "loss": 17.3543, "step": 33797 }, { "epoch": 0.6178002814996253, "grad_norm": 6.9275555962108575, "learning_rate": 3.3660822241036774e-06, "loss": 17.2629, "step": 33798 }, { "epoch": 0.6178185606960718, "grad_norm": 6.992592435663883, "learning_rate": 3.36580246669786e-06, "loss": 17.944, "step": 33799 }, { "epoch": 0.6178368398925184, "grad_norm": 5.55419411409346, "learning_rate": 3.3655227150197657e-06, "loss": 17.0323, "step": 33800 }, { "epoch": 0.6178551190889648, "grad_norm": 5.799826715750306, "learning_rate": 3.365242969070371e-06, "loss": 17.2492, "step": 33801 }, { "epoch": 0.6178733982854113, "grad_norm": 4.740085695890961, "learning_rate": 3.364963228850658e-06, "loss": 16.9438, "step": 33802 }, { "epoch": 0.6178916774818579, "grad_norm": 6.192944572190779, "learning_rate": 3.364683494361607e-06, "loss": 17.4176, "step": 33803 }, { "epoch": 0.6179099566783044, "grad_norm": 7.645579220585548, "learning_rate": 3.3644037656041974e-06, "loss": 17.8585, "step": 33804 }, { "epoch": 0.617928235874751, "grad_norm": 5.3485314879342845, "learning_rate": 3.364124042579413e-06, "loss": 17.0618, "step": 33805 }, { "epoch": 0.6179465150711975, "grad_norm": 6.921779027516839, "learning_rate": 3.36384432528823e-06, "loss": 17.5835, "step": 33806 }, { "epoch": 0.617964794267644, "grad_norm": 5.9550635682056745, "learning_rate": 3.363564613731632e-06, "loss": 17.3792, "step": 33807 }, { "epoch": 0.6179830734640905, "grad_norm": 6.968371338277051, "learning_rate": 3.363284907910597e-06, "loss": 17.6911, "step": 33808 }, { "epoch": 0.618001352660537, "grad_norm": 5.625851533903195, "learning_rate": 3.3630052078261076e-06, "loss": 17.0575, "step": 33809 }, { "epoch": 0.6180196318569836, "grad_norm": 5.295652857237953, "learning_rate": 3.3627255134791413e-06, "loss": 16.901, "step": 33810 }, { "epoch": 0.6180379110534301, "grad_norm": 7.19578160550978, "learning_rate": 3.3624458248706804e-06, "loss": 17.7049, "step": 33811 }, { "epoch": 0.6180561902498766, "grad_norm": 7.612063928743437, "learning_rate": 3.362166142001706e-06, "loss": 17.6845, "step": 33812 }, { "epoch": 0.6180744694463232, "grad_norm": 6.6431660363192435, "learning_rate": 3.3618864648731952e-06, "loss": 17.3356, "step": 33813 }, { "epoch": 0.6180927486427696, "grad_norm": 6.337212371320222, "learning_rate": 3.3616067934861322e-06, "loss": 17.5697, "step": 33814 }, { "epoch": 0.6181110278392162, "grad_norm": 6.948327632570087, "learning_rate": 3.3613271278414944e-06, "loss": 17.9505, "step": 33815 }, { "epoch": 0.6181293070356627, "grad_norm": 8.882432540201636, "learning_rate": 3.3610474679402616e-06, "loss": 18.2683, "step": 33816 }, { "epoch": 0.6181475862321092, "grad_norm": 5.717499045865817, "learning_rate": 3.3607678137834175e-06, "loss": 17.0693, "step": 33817 }, { "epoch": 0.6181658654285558, "grad_norm": 6.972615237205359, "learning_rate": 3.3604881653719394e-06, "loss": 17.5731, "step": 33818 }, { "epoch": 0.6181841446250023, "grad_norm": 8.0323108985144, "learning_rate": 3.3602085227068065e-06, "loss": 18.3981, "step": 33819 }, { "epoch": 0.6182024238214489, "grad_norm": 7.164107811201655, "learning_rate": 3.3599288857890013e-06, "loss": 17.5592, "step": 33820 }, { "epoch": 0.6182207030178953, "grad_norm": 6.711908800361873, "learning_rate": 3.359649254619502e-06, "loss": 17.1815, "step": 33821 }, { "epoch": 0.6182389822143418, "grad_norm": 5.558300004653827, "learning_rate": 3.359369629199292e-06, "loss": 17.1163, "step": 33822 }, { "epoch": 0.6182572614107884, "grad_norm": 6.139816172696749, "learning_rate": 3.359090009529349e-06, "loss": 17.6007, "step": 33823 }, { "epoch": 0.6182755406072349, "grad_norm": 5.836607263342178, "learning_rate": 3.3588103956106507e-06, "loss": 17.3598, "step": 33824 }, { "epoch": 0.6182938198036815, "grad_norm": 5.9911398243401, "learning_rate": 3.3585307874441808e-06, "loss": 17.4155, "step": 33825 }, { "epoch": 0.618312099000128, "grad_norm": 7.741521497927152, "learning_rate": 3.3582511850309183e-06, "loss": 17.9924, "step": 33826 }, { "epoch": 0.6183303781965744, "grad_norm": 6.191254997411443, "learning_rate": 3.357971588371842e-06, "loss": 17.5801, "step": 33827 }, { "epoch": 0.618348657393021, "grad_norm": 6.359541225442397, "learning_rate": 3.3576919974679346e-06, "loss": 17.6663, "step": 33828 }, { "epoch": 0.6183669365894675, "grad_norm": 5.719597567180929, "learning_rate": 3.357412412320173e-06, "loss": 17.3189, "step": 33829 }, { "epoch": 0.6183852157859141, "grad_norm": 6.6435764955470615, "learning_rate": 3.357132832929537e-06, "loss": 17.8008, "step": 33830 }, { "epoch": 0.6184034949823606, "grad_norm": 5.746643265768886, "learning_rate": 3.3568532592970104e-06, "loss": 17.0975, "step": 33831 }, { "epoch": 0.6184217741788071, "grad_norm": 5.197740318409598, "learning_rate": 3.356573691423571e-06, "loss": 17.0154, "step": 33832 }, { "epoch": 0.6184400533752537, "grad_norm": 6.1377639034658475, "learning_rate": 3.3562941293101955e-06, "loss": 17.5267, "step": 33833 }, { "epoch": 0.6184583325717001, "grad_norm": 6.699452833677378, "learning_rate": 3.3560145729578673e-06, "loss": 17.508, "step": 33834 }, { "epoch": 0.6184766117681467, "grad_norm": 6.752010088588784, "learning_rate": 3.3557350223675645e-06, "loss": 17.3722, "step": 33835 }, { "epoch": 0.6184948909645932, "grad_norm": 6.381074522183191, "learning_rate": 3.3554554775402703e-06, "loss": 17.5079, "step": 33836 }, { "epoch": 0.6185131701610397, "grad_norm": 6.374841907608044, "learning_rate": 3.355175938476961e-06, "loss": 17.6312, "step": 33837 }, { "epoch": 0.6185314493574863, "grad_norm": 6.038635794481833, "learning_rate": 3.3548964051786158e-06, "loss": 17.0826, "step": 33838 }, { "epoch": 0.6185497285539328, "grad_norm": 6.532259763216951, "learning_rate": 3.3546168776462173e-06, "loss": 17.8034, "step": 33839 }, { "epoch": 0.6185680077503793, "grad_norm": 6.368545130752321, "learning_rate": 3.3543373558807447e-06, "loss": 17.6381, "step": 33840 }, { "epoch": 0.6185862869468258, "grad_norm": 5.639302732944798, "learning_rate": 3.354057839883175e-06, "loss": 17.1001, "step": 33841 }, { "epoch": 0.6186045661432723, "grad_norm": 6.957680130990894, "learning_rate": 3.3537783296544914e-06, "loss": 17.7974, "step": 33842 }, { "epoch": 0.6186228453397189, "grad_norm": 6.890557848975634, "learning_rate": 3.3534988251956714e-06, "loss": 17.782, "step": 33843 }, { "epoch": 0.6186411245361654, "grad_norm": 6.980580640975334, "learning_rate": 3.353219326507693e-06, "loss": 17.5889, "step": 33844 }, { "epoch": 0.618659403732612, "grad_norm": 6.6029573100350705, "learning_rate": 3.352939833591541e-06, "loss": 17.5727, "step": 33845 }, { "epoch": 0.6186776829290584, "grad_norm": 8.171024098485656, "learning_rate": 3.3526603464481914e-06, "loss": 18.1242, "step": 33846 }, { "epoch": 0.6186959621255049, "grad_norm": 6.586255530034787, "learning_rate": 3.3523808650786227e-06, "loss": 17.5312, "step": 33847 }, { "epoch": 0.6187142413219515, "grad_norm": 5.959369083735386, "learning_rate": 3.352101389483817e-06, "loss": 17.0477, "step": 33848 }, { "epoch": 0.618732520518398, "grad_norm": 6.344699487327344, "learning_rate": 3.351821919664754e-06, "loss": 17.3808, "step": 33849 }, { "epoch": 0.6187507997148446, "grad_norm": 7.120746291566081, "learning_rate": 3.3515424556224095e-06, "loss": 18.2118, "step": 33850 }, { "epoch": 0.6187690789112911, "grad_norm": 6.294104628476008, "learning_rate": 3.351262997357769e-06, "loss": 17.3548, "step": 33851 }, { "epoch": 0.6187873581077376, "grad_norm": 5.843048927806441, "learning_rate": 3.3509835448718055e-06, "loss": 17.2912, "step": 33852 }, { "epoch": 0.6188056373041841, "grad_norm": 5.943760871207645, "learning_rate": 3.3507040981655024e-06, "loss": 17.2874, "step": 33853 }, { "epoch": 0.6188239165006306, "grad_norm": 5.144739254411926, "learning_rate": 3.35042465723984e-06, "loss": 16.8917, "step": 33854 }, { "epoch": 0.6188421956970771, "grad_norm": 5.050947831460004, "learning_rate": 3.350145222095794e-06, "loss": 16.9493, "step": 33855 }, { "epoch": 0.6188604748935237, "grad_norm": 8.109901013761496, "learning_rate": 3.3498657927343482e-06, "loss": 18.0866, "step": 33856 }, { "epoch": 0.6188787540899702, "grad_norm": 5.631082775419748, "learning_rate": 3.349586369156478e-06, "loss": 17.1462, "step": 33857 }, { "epoch": 0.6188970332864168, "grad_norm": 5.380047003860209, "learning_rate": 3.3493069513631626e-06, "loss": 16.9872, "step": 33858 }, { "epoch": 0.6189153124828632, "grad_norm": 6.8050164181885595, "learning_rate": 3.3490275393553863e-06, "loss": 17.4156, "step": 33859 }, { "epoch": 0.6189335916793097, "grad_norm": 7.031790585192562, "learning_rate": 3.348748133134124e-06, "loss": 17.7523, "step": 33860 }, { "epoch": 0.6189518708757563, "grad_norm": 6.009039173680672, "learning_rate": 3.3484687327003564e-06, "loss": 17.3186, "step": 33861 }, { "epoch": 0.6189701500722028, "grad_norm": 6.75111039004573, "learning_rate": 3.348189338055062e-06, "loss": 17.4478, "step": 33862 }, { "epoch": 0.6189884292686494, "grad_norm": 5.3906571406810535, "learning_rate": 3.347909949199222e-06, "loss": 17.1422, "step": 33863 }, { "epoch": 0.6190067084650959, "grad_norm": 7.832258246989175, "learning_rate": 3.3476305661338114e-06, "loss": 18.1173, "step": 33864 }, { "epoch": 0.6190249876615423, "grad_norm": 6.678221224967801, "learning_rate": 3.347351188859814e-06, "loss": 17.6978, "step": 33865 }, { "epoch": 0.6190432668579889, "grad_norm": 5.043499187459672, "learning_rate": 3.347071817378208e-06, "loss": 17.0491, "step": 33866 }, { "epoch": 0.6190615460544354, "grad_norm": 7.382757379457708, "learning_rate": 3.3467924516899707e-06, "loss": 18.0313, "step": 33867 }, { "epoch": 0.619079825250882, "grad_norm": 7.9656139563638755, "learning_rate": 3.346513091796084e-06, "loss": 18.0209, "step": 33868 }, { "epoch": 0.6190981044473285, "grad_norm": 6.304669105465651, "learning_rate": 3.346233737697523e-06, "loss": 17.4054, "step": 33869 }, { "epoch": 0.619116383643775, "grad_norm": 6.0905986044745655, "learning_rate": 3.3459543893952707e-06, "loss": 17.7173, "step": 33870 }, { "epoch": 0.6191346628402216, "grad_norm": 7.553491485992329, "learning_rate": 3.3456750468903052e-06, "loss": 17.3478, "step": 33871 }, { "epoch": 0.619152942036668, "grad_norm": 6.346942251732454, "learning_rate": 3.3453957101836034e-06, "loss": 17.4276, "step": 33872 }, { "epoch": 0.6191712212331146, "grad_norm": 5.9245403257694305, "learning_rate": 3.345116379276148e-06, "loss": 17.4082, "step": 33873 }, { "epoch": 0.6191895004295611, "grad_norm": 7.6006675286408525, "learning_rate": 3.344837054168916e-06, "loss": 18.124, "step": 33874 }, { "epoch": 0.6192077796260076, "grad_norm": 5.55456500733875, "learning_rate": 3.3445577348628842e-06, "loss": 17.2931, "step": 33875 }, { "epoch": 0.6192260588224542, "grad_norm": 4.993823962167816, "learning_rate": 3.3442784213590364e-06, "loss": 16.7283, "step": 33876 }, { "epoch": 0.6192443380189007, "grad_norm": 6.623279452648762, "learning_rate": 3.343999113658349e-06, "loss": 18.0545, "step": 33877 }, { "epoch": 0.6192626172153473, "grad_norm": 5.594575636233233, "learning_rate": 3.343719811761799e-06, "loss": 17.0611, "step": 33878 }, { "epoch": 0.6192808964117937, "grad_norm": 7.18389647008932, "learning_rate": 3.3434405156703687e-06, "loss": 17.4875, "step": 33879 }, { "epoch": 0.6192991756082402, "grad_norm": 6.489287716984938, "learning_rate": 3.343161225385036e-06, "loss": 17.3859, "step": 33880 }, { "epoch": 0.6193174548046868, "grad_norm": 6.341468242114379, "learning_rate": 3.3428819409067775e-06, "loss": 17.2368, "step": 33881 }, { "epoch": 0.6193357340011333, "grad_norm": 6.714371910672505, "learning_rate": 3.3426026622365764e-06, "loss": 17.789, "step": 33882 }, { "epoch": 0.6193540131975799, "grad_norm": 7.080039982543369, "learning_rate": 3.342323389375407e-06, "loss": 17.8869, "step": 33883 }, { "epoch": 0.6193722923940264, "grad_norm": 7.255223285170784, "learning_rate": 3.342044122324251e-06, "loss": 17.9173, "step": 33884 }, { "epoch": 0.6193905715904728, "grad_norm": 5.8661128244707585, "learning_rate": 3.341764861084088e-06, "loss": 17.3264, "step": 33885 }, { "epoch": 0.6194088507869194, "grad_norm": 5.578631042011989, "learning_rate": 3.341485605655893e-06, "loss": 17.4485, "step": 33886 }, { "epoch": 0.6194271299833659, "grad_norm": 6.532756551302818, "learning_rate": 3.3412063560406487e-06, "loss": 17.1394, "step": 33887 }, { "epoch": 0.6194454091798125, "grad_norm": 5.346495080231919, "learning_rate": 3.3409271122393314e-06, "loss": 17.0939, "step": 33888 }, { "epoch": 0.619463688376259, "grad_norm": 5.554401675461272, "learning_rate": 3.3406478742529203e-06, "loss": 17.1478, "step": 33889 }, { "epoch": 0.6194819675727055, "grad_norm": 6.633274285371833, "learning_rate": 3.3403686420823953e-06, "loss": 17.461, "step": 33890 }, { "epoch": 0.619500246769152, "grad_norm": 6.4403613462458065, "learning_rate": 3.340089415728735e-06, "loss": 17.3092, "step": 33891 }, { "epoch": 0.6195185259655985, "grad_norm": 5.31141756586956, "learning_rate": 3.339810195192915e-06, "loss": 16.9345, "step": 33892 }, { "epoch": 0.6195368051620451, "grad_norm": 5.30451702791888, "learning_rate": 3.339530980475917e-06, "loss": 17.0436, "step": 33893 }, { "epoch": 0.6195550843584916, "grad_norm": 5.36177985483867, "learning_rate": 3.33925177157872e-06, "loss": 17.3752, "step": 33894 }, { "epoch": 0.6195733635549381, "grad_norm": 9.590313383892905, "learning_rate": 3.3389725685022996e-06, "loss": 18.148, "step": 33895 }, { "epoch": 0.6195916427513847, "grad_norm": 6.324454712499073, "learning_rate": 3.338693371247638e-06, "loss": 17.4893, "step": 33896 }, { "epoch": 0.6196099219478312, "grad_norm": 6.365704343754421, "learning_rate": 3.33841417981571e-06, "loss": 17.6013, "step": 33897 }, { "epoch": 0.6196282011442777, "grad_norm": 6.012452099700986, "learning_rate": 3.338134994207497e-06, "loss": 17.6076, "step": 33898 }, { "epoch": 0.6196464803407242, "grad_norm": 5.818509596682194, "learning_rate": 3.337855814423978e-06, "loss": 17.4152, "step": 33899 }, { "epoch": 0.6196647595371707, "grad_norm": 6.3096498742436244, "learning_rate": 3.3375766404661273e-06, "loss": 17.2942, "step": 33900 }, { "epoch": 0.6196830387336173, "grad_norm": 5.315890756480483, "learning_rate": 3.3372974723349295e-06, "loss": 17.1378, "step": 33901 }, { "epoch": 0.6197013179300638, "grad_norm": 6.377037130974659, "learning_rate": 3.337018310031358e-06, "loss": 17.4044, "step": 33902 }, { "epoch": 0.6197195971265104, "grad_norm": 5.219371717477421, "learning_rate": 3.336739153556392e-06, "loss": 16.7879, "step": 33903 }, { "epoch": 0.6197378763229568, "grad_norm": 6.154621934614231, "learning_rate": 3.336460002911013e-06, "loss": 17.1274, "step": 33904 }, { "epoch": 0.6197561555194033, "grad_norm": 4.902798116174497, "learning_rate": 3.3361808580961973e-06, "loss": 16.6114, "step": 33905 }, { "epoch": 0.6197744347158499, "grad_norm": 6.6787885713560255, "learning_rate": 3.335901719112922e-06, "loss": 17.0506, "step": 33906 }, { "epoch": 0.6197927139122964, "grad_norm": 5.8704194137144095, "learning_rate": 3.3356225859621667e-06, "loss": 17.2884, "step": 33907 }, { "epoch": 0.619810993108743, "grad_norm": 5.202281774521146, "learning_rate": 3.3353434586449105e-06, "loss": 16.9634, "step": 33908 }, { "epoch": 0.6198292723051895, "grad_norm": 5.608157063192628, "learning_rate": 3.3350643371621303e-06, "loss": 17.066, "step": 33909 }, { "epoch": 0.619847551501636, "grad_norm": 7.445362724304963, "learning_rate": 3.334785221514807e-06, "loss": 17.8039, "step": 33910 }, { "epoch": 0.6198658306980825, "grad_norm": 7.206192093533857, "learning_rate": 3.334506111703915e-06, "loss": 17.5394, "step": 33911 }, { "epoch": 0.619884109894529, "grad_norm": 7.012066622412886, "learning_rate": 3.334227007730434e-06, "loss": 17.5293, "step": 33912 }, { "epoch": 0.6199023890909756, "grad_norm": 5.561903645815939, "learning_rate": 3.333947909595345e-06, "loss": 17.1777, "step": 33913 }, { "epoch": 0.6199206682874221, "grad_norm": 5.920835108232874, "learning_rate": 3.3336688172996225e-06, "loss": 17.0493, "step": 33914 }, { "epoch": 0.6199389474838686, "grad_norm": 6.717973857498078, "learning_rate": 3.3333897308442474e-06, "loss": 17.4693, "step": 33915 }, { "epoch": 0.6199572266803152, "grad_norm": 5.530270943725569, "learning_rate": 3.333110650230196e-06, "loss": 17.2689, "step": 33916 }, { "epoch": 0.6199755058767616, "grad_norm": 4.867230178948299, "learning_rate": 3.332831575458446e-06, "loss": 16.8491, "step": 33917 }, { "epoch": 0.6199937850732082, "grad_norm": 6.3487023302870895, "learning_rate": 3.332552506529979e-06, "loss": 16.9637, "step": 33918 }, { "epoch": 0.6200120642696547, "grad_norm": 8.497114933630925, "learning_rate": 3.3322734434457703e-06, "loss": 18.489, "step": 33919 }, { "epoch": 0.6200303434661012, "grad_norm": 7.708705016388389, "learning_rate": 3.3319943862067965e-06, "loss": 18.4573, "step": 33920 }, { "epoch": 0.6200486226625478, "grad_norm": 8.296639048594809, "learning_rate": 3.3317153348140386e-06, "loss": 17.9833, "step": 33921 }, { "epoch": 0.6200669018589943, "grad_norm": 5.160456337334504, "learning_rate": 3.331436289268475e-06, "loss": 16.8995, "step": 33922 }, { "epoch": 0.6200851810554407, "grad_norm": 6.809830252425916, "learning_rate": 3.3311572495710804e-06, "loss": 17.5978, "step": 33923 }, { "epoch": 0.6201034602518873, "grad_norm": 6.112760618037146, "learning_rate": 3.330878215722837e-06, "loss": 17.495, "step": 33924 }, { "epoch": 0.6201217394483338, "grad_norm": 6.19333463707942, "learning_rate": 3.330599187724719e-06, "loss": 17.466, "step": 33925 }, { "epoch": 0.6201400186447804, "grad_norm": 5.936221282798404, "learning_rate": 3.330320165577705e-06, "loss": 17.3668, "step": 33926 }, { "epoch": 0.6201582978412269, "grad_norm": 5.554351145367577, "learning_rate": 3.330041149282777e-06, "loss": 17.1801, "step": 33927 }, { "epoch": 0.6201765770376734, "grad_norm": 5.562748733604866, "learning_rate": 3.3297621388409063e-06, "loss": 17.001, "step": 33928 }, { "epoch": 0.62019485623412, "grad_norm": 6.954658837735389, "learning_rate": 3.3294831342530774e-06, "loss": 17.637, "step": 33929 }, { "epoch": 0.6202131354305664, "grad_norm": 6.983029788914796, "learning_rate": 3.3292041355202643e-06, "loss": 17.5954, "step": 33930 }, { "epoch": 0.620231414627013, "grad_norm": 6.619579427617934, "learning_rate": 3.328925142643444e-06, "loss": 17.8011, "step": 33931 }, { "epoch": 0.6202496938234595, "grad_norm": 7.182623464556467, "learning_rate": 3.3286461556235984e-06, "loss": 17.5246, "step": 33932 }, { "epoch": 0.620267973019906, "grad_norm": 5.594185669684121, "learning_rate": 3.328367174461702e-06, "loss": 17.1103, "step": 33933 }, { "epoch": 0.6202862522163526, "grad_norm": 5.123516472511835, "learning_rate": 3.3280881991587323e-06, "loss": 17.0078, "step": 33934 }, { "epoch": 0.6203045314127991, "grad_norm": 13.994774732953369, "learning_rate": 3.3278092297156705e-06, "loss": 16.9327, "step": 33935 }, { "epoch": 0.6203228106092457, "grad_norm": 5.0811368776787145, "learning_rate": 3.327530266133493e-06, "loss": 17.0386, "step": 33936 }, { "epoch": 0.6203410898056921, "grad_norm": 7.088617716863212, "learning_rate": 3.327251308413174e-06, "loss": 17.7357, "step": 33937 }, { "epoch": 0.6203593690021386, "grad_norm": 9.090848519093743, "learning_rate": 3.326972356555695e-06, "loss": 18.0261, "step": 33938 }, { "epoch": 0.6203776481985852, "grad_norm": 5.2659968921831455, "learning_rate": 3.326693410562034e-06, "loss": 17.0961, "step": 33939 }, { "epoch": 0.6203959273950317, "grad_norm": 6.716650996982713, "learning_rate": 3.3264144704331652e-06, "loss": 17.3804, "step": 33940 }, { "epoch": 0.6204142065914783, "grad_norm": 5.689557279952579, "learning_rate": 3.326135536170071e-06, "loss": 17.38, "step": 33941 }, { "epoch": 0.6204324857879248, "grad_norm": 7.056553928075349, "learning_rate": 3.3258566077737235e-06, "loss": 17.6741, "step": 33942 }, { "epoch": 0.6204507649843712, "grad_norm": 10.14723992647386, "learning_rate": 3.325577685245105e-06, "loss": 17.8398, "step": 33943 }, { "epoch": 0.6204690441808178, "grad_norm": 5.300748547464933, "learning_rate": 3.325298768585192e-06, "loss": 17.0936, "step": 33944 }, { "epoch": 0.6204873233772643, "grad_norm": 5.728660410333599, "learning_rate": 3.3250198577949598e-06, "loss": 17.075, "step": 33945 }, { "epoch": 0.6205056025737109, "grad_norm": 6.327733235343683, "learning_rate": 3.32474095287539e-06, "loss": 17.6242, "step": 33946 }, { "epoch": 0.6205238817701574, "grad_norm": 7.02817882123073, "learning_rate": 3.3244620538274563e-06, "loss": 17.6087, "step": 33947 }, { "epoch": 0.6205421609666039, "grad_norm": 6.175127970697441, "learning_rate": 3.3241831606521368e-06, "loss": 17.6712, "step": 33948 }, { "epoch": 0.6205604401630505, "grad_norm": 7.282004372202244, "learning_rate": 3.323904273350412e-06, "loss": 17.9608, "step": 33949 }, { "epoch": 0.6205787193594969, "grad_norm": 5.25600951861977, "learning_rate": 3.323625391923257e-06, "loss": 17.0544, "step": 33950 }, { "epoch": 0.6205969985559435, "grad_norm": 8.87181906132584, "learning_rate": 3.3233465163716478e-06, "loss": 17.7592, "step": 33951 }, { "epoch": 0.62061527775239, "grad_norm": 7.580383760583821, "learning_rate": 3.3230676466965645e-06, "loss": 17.9785, "step": 33952 }, { "epoch": 0.6206335569488365, "grad_norm": 6.6034691394534, "learning_rate": 3.322788782898985e-06, "loss": 17.91, "step": 33953 }, { "epoch": 0.6206518361452831, "grad_norm": 6.496260388715279, "learning_rate": 3.322509924979882e-06, "loss": 17.5997, "step": 33954 }, { "epoch": 0.6206701153417296, "grad_norm": 5.614254390925603, "learning_rate": 3.32223107294024e-06, "loss": 17.2759, "step": 33955 }, { "epoch": 0.6206883945381761, "grad_norm": 6.7013252059250465, "learning_rate": 3.32195222678103e-06, "loss": 17.5529, "step": 33956 }, { "epoch": 0.6207066737346226, "grad_norm": 6.693385923370817, "learning_rate": 3.3216733865032313e-06, "loss": 17.8608, "step": 33957 }, { "epoch": 0.6207249529310691, "grad_norm": 6.62396399217513, "learning_rate": 3.3213945521078237e-06, "loss": 17.89, "step": 33958 }, { "epoch": 0.6207432321275157, "grad_norm": 5.684413779816052, "learning_rate": 3.3211157235957803e-06, "loss": 17.4954, "step": 33959 }, { "epoch": 0.6207615113239622, "grad_norm": 6.966198784190787, "learning_rate": 3.320836900968083e-06, "loss": 17.8719, "step": 33960 }, { "epoch": 0.6207797905204088, "grad_norm": 5.487204285710015, "learning_rate": 3.3205580842257055e-06, "loss": 17.2554, "step": 33961 }, { "epoch": 0.6207980697168552, "grad_norm": 6.615816024397094, "learning_rate": 3.3202792733696256e-06, "loss": 17.4081, "step": 33962 }, { "epoch": 0.6208163489133017, "grad_norm": 4.965130805774734, "learning_rate": 3.3200004684008223e-06, "loss": 16.9115, "step": 33963 }, { "epoch": 0.6208346281097483, "grad_norm": 5.778073507991112, "learning_rate": 3.3197216693202724e-06, "loss": 17.2801, "step": 33964 }, { "epoch": 0.6208529073061948, "grad_norm": 7.227628896747252, "learning_rate": 3.31944287612895e-06, "loss": 17.6784, "step": 33965 }, { "epoch": 0.6208711865026414, "grad_norm": 5.177038815123329, "learning_rate": 3.3191640888278346e-06, "loss": 16.9936, "step": 33966 }, { "epoch": 0.6208894656990879, "grad_norm": 6.217988609868359, "learning_rate": 3.318885307417905e-06, "loss": 17.4615, "step": 33967 }, { "epoch": 0.6209077448955344, "grad_norm": 5.785372322838824, "learning_rate": 3.3186065319001347e-06, "loss": 17.158, "step": 33968 }, { "epoch": 0.6209260240919809, "grad_norm": 6.23397664720543, "learning_rate": 3.3183277622755044e-06, "loss": 17.5229, "step": 33969 }, { "epoch": 0.6209443032884274, "grad_norm": 7.562069797087515, "learning_rate": 3.318048998544988e-06, "loss": 18.1885, "step": 33970 }, { "epoch": 0.620962582484874, "grad_norm": 5.866008941395772, "learning_rate": 3.3177702407095636e-06, "loss": 17.4369, "step": 33971 }, { "epoch": 0.6209808616813205, "grad_norm": 6.612112800284484, "learning_rate": 3.3174914887702105e-06, "loss": 17.8138, "step": 33972 }, { "epoch": 0.620999140877767, "grad_norm": 7.018371407587807, "learning_rate": 3.317212742727901e-06, "loss": 17.7756, "step": 33973 }, { "epoch": 0.6210174200742136, "grad_norm": 6.570387003263785, "learning_rate": 3.3169340025836174e-06, "loss": 17.346, "step": 33974 }, { "epoch": 0.62103569927066, "grad_norm": 6.347690595584688, "learning_rate": 3.3166552683383324e-06, "loss": 17.4821, "step": 33975 }, { "epoch": 0.6210539784671066, "grad_norm": 4.826156784557937, "learning_rate": 3.3163765399930244e-06, "loss": 16.8322, "step": 33976 }, { "epoch": 0.6210722576635531, "grad_norm": 6.2121441911951125, "learning_rate": 3.316097817548672e-06, "loss": 17.5049, "step": 33977 }, { "epoch": 0.6210905368599996, "grad_norm": 5.6849202834211185, "learning_rate": 3.3158191010062513e-06, "loss": 17.1494, "step": 33978 }, { "epoch": 0.6211088160564462, "grad_norm": 5.888859733783157, "learning_rate": 3.3155403903667357e-06, "loss": 17.2091, "step": 33979 }, { "epoch": 0.6211270952528927, "grad_norm": 6.131692231988226, "learning_rate": 3.315261685631106e-06, "loss": 17.4908, "step": 33980 }, { "epoch": 0.6211453744493393, "grad_norm": 5.615741700301898, "learning_rate": 3.3149829868003393e-06, "loss": 17.1811, "step": 33981 }, { "epoch": 0.6211636536457857, "grad_norm": 6.549608219380415, "learning_rate": 3.3147042938754086e-06, "loss": 17.5812, "step": 33982 }, { "epoch": 0.6211819328422322, "grad_norm": 6.550734244882916, "learning_rate": 3.3144256068572955e-06, "loss": 17.4609, "step": 33983 }, { "epoch": 0.6212002120386788, "grad_norm": 5.296141738751046, "learning_rate": 3.3141469257469726e-06, "loss": 16.9408, "step": 33984 }, { "epoch": 0.6212184912351253, "grad_norm": 7.637597611156311, "learning_rate": 3.3138682505454185e-06, "loss": 18.0198, "step": 33985 }, { "epoch": 0.6212367704315719, "grad_norm": 6.313476968298392, "learning_rate": 3.313589581253611e-06, "loss": 17.2764, "step": 33986 }, { "epoch": 0.6212550496280184, "grad_norm": 6.270904678997132, "learning_rate": 3.3133109178725255e-06, "loss": 17.2264, "step": 33987 }, { "epoch": 0.6212733288244648, "grad_norm": 5.94971430572456, "learning_rate": 3.3130322604031386e-06, "loss": 17.2827, "step": 33988 }, { "epoch": 0.6212916080209114, "grad_norm": 6.297240273567897, "learning_rate": 3.312753608846427e-06, "loss": 17.3157, "step": 33989 }, { "epoch": 0.6213098872173579, "grad_norm": 5.3183095606528425, "learning_rate": 3.3124749632033666e-06, "loss": 17.2515, "step": 33990 }, { "epoch": 0.6213281664138044, "grad_norm": 8.478468273771643, "learning_rate": 3.3121963234749366e-06, "loss": 17.7126, "step": 33991 }, { "epoch": 0.621346445610251, "grad_norm": 5.428706263336174, "learning_rate": 3.311917689662112e-06, "loss": 17.1882, "step": 33992 }, { "epoch": 0.6213647248066975, "grad_norm": 4.717697325726503, "learning_rate": 3.311639061765869e-06, "loss": 16.7372, "step": 33993 }, { "epoch": 0.6213830040031441, "grad_norm": 4.836747021428246, "learning_rate": 3.3113604397871845e-06, "loss": 16.9745, "step": 33994 }, { "epoch": 0.6214012831995905, "grad_norm": 6.019974309982582, "learning_rate": 3.311081823727036e-06, "loss": 17.2935, "step": 33995 }, { "epoch": 0.621419562396037, "grad_norm": 4.893264038849264, "learning_rate": 3.310803213586397e-06, "loss": 16.8452, "step": 33996 }, { "epoch": 0.6214378415924836, "grad_norm": 6.2022961378727, "learning_rate": 3.310524609366248e-06, "loss": 17.5796, "step": 33997 }, { "epoch": 0.6214561207889301, "grad_norm": 7.530891925625168, "learning_rate": 3.310246011067564e-06, "loss": 17.5569, "step": 33998 }, { "epoch": 0.6214743999853767, "grad_norm": 6.315114735539727, "learning_rate": 3.309967418691319e-06, "loss": 17.2535, "step": 33999 }, { "epoch": 0.6214926791818232, "grad_norm": 6.864709786454459, "learning_rate": 3.3096888322384935e-06, "loss": 17.594, "step": 34000 }, { "epoch": 0.6215109583782696, "grad_norm": 5.6095960887872, "learning_rate": 3.3094102517100614e-06, "loss": 17.1525, "step": 34001 }, { "epoch": 0.6215292375747162, "grad_norm": 7.750396111482438, "learning_rate": 3.3091316771069983e-06, "loss": 17.3683, "step": 34002 }, { "epoch": 0.6215475167711627, "grad_norm": 5.166323099210132, "learning_rate": 3.308853108430284e-06, "loss": 17.0809, "step": 34003 }, { "epoch": 0.6215657959676093, "grad_norm": 5.8233207282682535, "learning_rate": 3.308574545680891e-06, "loss": 17.2413, "step": 34004 }, { "epoch": 0.6215840751640558, "grad_norm": 4.977779825784737, "learning_rate": 3.3082959888597994e-06, "loss": 16.9031, "step": 34005 }, { "epoch": 0.6216023543605023, "grad_norm": 7.589211475260339, "learning_rate": 3.308017437967982e-06, "loss": 17.8717, "step": 34006 }, { "epoch": 0.6216206335569489, "grad_norm": 8.384773448486298, "learning_rate": 3.3077388930064163e-06, "loss": 18.4291, "step": 34007 }, { "epoch": 0.6216389127533953, "grad_norm": 4.535348493407129, "learning_rate": 3.3074603539760807e-06, "loss": 16.9457, "step": 34008 }, { "epoch": 0.6216571919498419, "grad_norm": 6.567629275187349, "learning_rate": 3.30718182087795e-06, "loss": 17.7799, "step": 34009 }, { "epoch": 0.6216754711462884, "grad_norm": 5.769265056443997, "learning_rate": 3.306903293712998e-06, "loss": 17.3526, "step": 34010 }, { "epoch": 0.6216937503427349, "grad_norm": 7.1101758866509845, "learning_rate": 3.306624772482204e-06, "loss": 18.018, "step": 34011 }, { "epoch": 0.6217120295391815, "grad_norm": 6.806035498769529, "learning_rate": 3.306346257186544e-06, "loss": 17.5727, "step": 34012 }, { "epoch": 0.621730308735628, "grad_norm": 4.975135827851945, "learning_rate": 3.3060677478269913e-06, "loss": 16.9542, "step": 34013 }, { "epoch": 0.6217485879320745, "grad_norm": 6.246477412423118, "learning_rate": 3.305789244404527e-06, "loss": 17.3307, "step": 34014 }, { "epoch": 0.621766867128521, "grad_norm": 6.605828907179958, "learning_rate": 3.3055107469201223e-06, "loss": 17.6208, "step": 34015 }, { "epoch": 0.6217851463249675, "grad_norm": 6.341860880771301, "learning_rate": 3.3052322553747553e-06, "loss": 17.5501, "step": 34016 }, { "epoch": 0.6218034255214141, "grad_norm": 6.011596940959712, "learning_rate": 3.3049537697694032e-06, "loss": 17.1665, "step": 34017 }, { "epoch": 0.6218217047178606, "grad_norm": 6.229323513550394, "learning_rate": 3.3046752901050417e-06, "loss": 17.1734, "step": 34018 }, { "epoch": 0.6218399839143072, "grad_norm": 6.260121744391754, "learning_rate": 3.3043968163826446e-06, "loss": 17.3105, "step": 34019 }, { "epoch": 0.6218582631107537, "grad_norm": 5.667838965807191, "learning_rate": 3.30411834860319e-06, "loss": 16.9604, "step": 34020 }, { "epoch": 0.6218765423072001, "grad_norm": 6.791448965413265, "learning_rate": 3.3038398867676525e-06, "loss": 17.5714, "step": 34021 }, { "epoch": 0.6218948215036467, "grad_norm": 6.04782755623996, "learning_rate": 3.3035614308770104e-06, "loss": 17.2804, "step": 34022 }, { "epoch": 0.6219131007000932, "grad_norm": 6.634796789319207, "learning_rate": 3.303282980932239e-06, "loss": 17.1891, "step": 34023 }, { "epoch": 0.6219313798965398, "grad_norm": 9.940083235731331, "learning_rate": 3.303004536934311e-06, "loss": 17.7338, "step": 34024 }, { "epoch": 0.6219496590929863, "grad_norm": 5.143823920752113, "learning_rate": 3.302726098884207e-06, "loss": 17.0219, "step": 34025 }, { "epoch": 0.6219679382894328, "grad_norm": 6.420430659668352, "learning_rate": 3.302447666782901e-06, "loss": 17.4504, "step": 34026 }, { "epoch": 0.6219862174858793, "grad_norm": 6.779655959481122, "learning_rate": 3.3021692406313662e-06, "loss": 17.5392, "step": 34027 }, { "epoch": 0.6220044966823258, "grad_norm": 8.58866162132311, "learning_rate": 3.301890820430583e-06, "loss": 17.9827, "step": 34028 }, { "epoch": 0.6220227758787724, "grad_norm": 5.53317585242398, "learning_rate": 3.301612406181525e-06, "loss": 17.1473, "step": 34029 }, { "epoch": 0.6220410550752189, "grad_norm": 4.921906179485333, "learning_rate": 3.3013339978851656e-06, "loss": 16.8663, "step": 34030 }, { "epoch": 0.6220593342716654, "grad_norm": 6.98992700638181, "learning_rate": 3.301055595542486e-06, "loss": 17.7934, "step": 34031 }, { "epoch": 0.622077613468112, "grad_norm": 6.718422328909542, "learning_rate": 3.3007771991544596e-06, "loss": 17.7375, "step": 34032 }, { "epoch": 0.6220958926645584, "grad_norm": 6.150769825508556, "learning_rate": 3.300498808722059e-06, "loss": 17.4604, "step": 34033 }, { "epoch": 0.622114171861005, "grad_norm": 7.554247885432396, "learning_rate": 3.300220424246264e-06, "loss": 17.9695, "step": 34034 }, { "epoch": 0.6221324510574515, "grad_norm": 6.890050304265831, "learning_rate": 3.299942045728047e-06, "loss": 17.4897, "step": 34035 }, { "epoch": 0.622150730253898, "grad_norm": 6.4791018801330305, "learning_rate": 3.2996636731683882e-06, "loss": 17.7037, "step": 34036 }, { "epoch": 0.6221690094503446, "grad_norm": 8.767262161814077, "learning_rate": 3.2993853065682612e-06, "loss": 18.3122, "step": 34037 }, { "epoch": 0.6221872886467911, "grad_norm": 5.9082279168713825, "learning_rate": 3.299106945928639e-06, "loss": 17.1133, "step": 34038 }, { "epoch": 0.6222055678432377, "grad_norm": 5.953750926470349, "learning_rate": 3.2988285912505004e-06, "loss": 17.3901, "step": 34039 }, { "epoch": 0.6222238470396841, "grad_norm": 5.703216428597273, "learning_rate": 3.29855024253482e-06, "loss": 17.5203, "step": 34040 }, { "epoch": 0.6222421262361306, "grad_norm": 4.745597989211548, "learning_rate": 3.2982718997825725e-06, "loss": 16.7601, "step": 34041 }, { "epoch": 0.6222604054325772, "grad_norm": 6.3878936753920135, "learning_rate": 3.2979935629947358e-06, "loss": 17.6763, "step": 34042 }, { "epoch": 0.6222786846290237, "grad_norm": 5.552918885328772, "learning_rate": 3.2977152321722827e-06, "loss": 17.0442, "step": 34043 }, { "epoch": 0.6222969638254703, "grad_norm": 5.749191702066202, "learning_rate": 3.2974369073161895e-06, "loss": 17.2163, "step": 34044 }, { "epoch": 0.6223152430219168, "grad_norm": 8.10612375485493, "learning_rate": 3.2971585884274337e-06, "loss": 18.4121, "step": 34045 }, { "epoch": 0.6223335222183632, "grad_norm": 5.298796452766617, "learning_rate": 3.2968802755069897e-06, "loss": 17.0452, "step": 34046 }, { "epoch": 0.6223518014148098, "grad_norm": 5.034053495545189, "learning_rate": 3.2966019685558305e-06, "loss": 16.968, "step": 34047 }, { "epoch": 0.6223700806112563, "grad_norm": 5.606172864635162, "learning_rate": 3.2963236675749343e-06, "loss": 17.2135, "step": 34048 }, { "epoch": 0.6223883598077029, "grad_norm": 6.552127265436594, "learning_rate": 3.2960453725652773e-06, "loss": 17.2715, "step": 34049 }, { "epoch": 0.6224066390041494, "grad_norm": 7.35479616515973, "learning_rate": 3.295767083527831e-06, "loss": 17.7744, "step": 34050 }, { "epoch": 0.6224249182005959, "grad_norm": 9.365823276566092, "learning_rate": 3.2954888004635753e-06, "loss": 18.2929, "step": 34051 }, { "epoch": 0.6224431973970425, "grad_norm": 6.361983213237052, "learning_rate": 3.295210523373481e-06, "loss": 17.4357, "step": 34052 }, { "epoch": 0.6224614765934889, "grad_norm": 5.234668618336464, "learning_rate": 3.2949322522585277e-06, "loss": 17.0936, "step": 34053 }, { "epoch": 0.6224797557899355, "grad_norm": 6.280268041382096, "learning_rate": 3.294653987119689e-06, "loss": 17.5696, "step": 34054 }, { "epoch": 0.622498034986382, "grad_norm": 6.354238470130663, "learning_rate": 3.294375727957939e-06, "loss": 17.1268, "step": 34055 }, { "epoch": 0.6225163141828285, "grad_norm": 6.359733446441989, "learning_rate": 3.294097474774255e-06, "loss": 17.3699, "step": 34056 }, { "epoch": 0.6225345933792751, "grad_norm": 7.476078191136726, "learning_rate": 3.293819227569612e-06, "loss": 18.0982, "step": 34057 }, { "epoch": 0.6225528725757216, "grad_norm": 5.838586394531664, "learning_rate": 3.293540986344982e-06, "loss": 17.0821, "step": 34058 }, { "epoch": 0.622571151772168, "grad_norm": 4.895222904345161, "learning_rate": 3.2932627511013453e-06, "loss": 16.9263, "step": 34059 }, { "epoch": 0.6225894309686146, "grad_norm": 6.279882870289682, "learning_rate": 3.292984521839674e-06, "loss": 17.547, "step": 34060 }, { "epoch": 0.6226077101650611, "grad_norm": 6.116477667359963, "learning_rate": 3.292706298560942e-06, "loss": 17.2827, "step": 34061 }, { "epoch": 0.6226259893615077, "grad_norm": 6.457268209862064, "learning_rate": 3.292428081266129e-06, "loss": 17.4422, "step": 34062 }, { "epoch": 0.6226442685579542, "grad_norm": 6.728865312313842, "learning_rate": 3.292149869956207e-06, "loss": 17.8589, "step": 34063 }, { "epoch": 0.6226625477544007, "grad_norm": 6.524235982210663, "learning_rate": 3.291871664632149e-06, "loss": 17.2385, "step": 34064 }, { "epoch": 0.6226808269508473, "grad_norm": 6.358195721807367, "learning_rate": 3.291593465294934e-06, "loss": 17.2439, "step": 34065 }, { "epoch": 0.6226991061472937, "grad_norm": 7.582570243387085, "learning_rate": 3.2913152719455354e-06, "loss": 18.2257, "step": 34066 }, { "epoch": 0.6227173853437403, "grad_norm": 5.9847477199964905, "learning_rate": 3.29103708458493e-06, "loss": 17.5231, "step": 34067 }, { "epoch": 0.6227356645401868, "grad_norm": 6.331566216285138, "learning_rate": 3.2907589032140905e-06, "loss": 17.2376, "step": 34068 }, { "epoch": 0.6227539437366333, "grad_norm": 5.697425211513413, "learning_rate": 3.290480727833992e-06, "loss": 17.1798, "step": 34069 }, { "epoch": 0.6227722229330799, "grad_norm": 5.485296091687721, "learning_rate": 3.290202558445611e-06, "loss": 17.1487, "step": 34070 }, { "epoch": 0.6227905021295264, "grad_norm": 6.494147839999035, "learning_rate": 3.289924395049922e-06, "loss": 17.1703, "step": 34071 }, { "epoch": 0.622808781325973, "grad_norm": 6.7843629196040816, "learning_rate": 3.2896462376478978e-06, "loss": 17.6958, "step": 34072 }, { "epoch": 0.6228270605224194, "grad_norm": 5.861130272523569, "learning_rate": 3.2893680862405174e-06, "loss": 17.8482, "step": 34073 }, { "epoch": 0.6228453397188659, "grad_norm": 7.982526010952181, "learning_rate": 3.2890899408287526e-06, "loss": 18.228, "step": 34074 }, { "epoch": 0.6228636189153125, "grad_norm": 6.502406643677955, "learning_rate": 3.288811801413577e-06, "loss": 17.5006, "step": 34075 }, { "epoch": 0.622881898111759, "grad_norm": 6.114098099371914, "learning_rate": 3.2885336679959708e-06, "loss": 17.5811, "step": 34076 }, { "epoch": 0.6229001773082056, "grad_norm": 5.75886043765039, "learning_rate": 3.2882555405769053e-06, "loss": 17.4431, "step": 34077 }, { "epoch": 0.622918456504652, "grad_norm": 6.50475970544273, "learning_rate": 3.2879774191573534e-06, "loss": 17.555, "step": 34078 }, { "epoch": 0.6229367357010985, "grad_norm": 7.24262028042872, "learning_rate": 3.2876993037382925e-06, "loss": 17.1354, "step": 34079 }, { "epoch": 0.6229550148975451, "grad_norm": 7.813598904237008, "learning_rate": 3.287421194320699e-06, "loss": 17.8778, "step": 34080 }, { "epoch": 0.6229732940939916, "grad_norm": 6.51050578408565, "learning_rate": 3.2871430909055422e-06, "loss": 17.6486, "step": 34081 }, { "epoch": 0.6229915732904382, "grad_norm": 6.1934479274804595, "learning_rate": 3.2868649934938035e-06, "loss": 17.4406, "step": 34082 }, { "epoch": 0.6230098524868847, "grad_norm": 5.651685980732037, "learning_rate": 3.286586902086452e-06, "loss": 17.0664, "step": 34083 }, { "epoch": 0.6230281316833312, "grad_norm": 6.44067316562179, "learning_rate": 3.2863088166844657e-06, "loss": 17.3936, "step": 34084 }, { "epoch": 0.6230464108797777, "grad_norm": 5.814364351472811, "learning_rate": 3.2860307372888196e-06, "loss": 17.2959, "step": 34085 }, { "epoch": 0.6230646900762242, "grad_norm": 6.351921562977362, "learning_rate": 3.2857526639004844e-06, "loss": 17.5029, "step": 34086 }, { "epoch": 0.6230829692726708, "grad_norm": 6.881232859216051, "learning_rate": 3.28547459652044e-06, "loss": 17.7093, "step": 34087 }, { "epoch": 0.6231012484691173, "grad_norm": 5.595063156570202, "learning_rate": 3.285196535149657e-06, "loss": 17.4464, "step": 34088 }, { "epoch": 0.6231195276655638, "grad_norm": 6.549339902695731, "learning_rate": 3.2849184797891098e-06, "loss": 17.6149, "step": 34089 }, { "epoch": 0.6231378068620104, "grad_norm": 5.767827046428734, "learning_rate": 3.284640430439777e-06, "loss": 17.2737, "step": 34090 }, { "epoch": 0.6231560860584568, "grad_norm": 6.00812426345743, "learning_rate": 3.284362387102631e-06, "loss": 17.429, "step": 34091 }, { "epoch": 0.6231743652549034, "grad_norm": 6.7710620513629936, "learning_rate": 3.284084349778644e-06, "loss": 18.0454, "step": 34092 }, { "epoch": 0.6231926444513499, "grad_norm": 8.089234717988576, "learning_rate": 3.283806318468793e-06, "loss": 17.653, "step": 34093 }, { "epoch": 0.6232109236477964, "grad_norm": 5.0771832401263515, "learning_rate": 3.283528293174053e-06, "loss": 16.9467, "step": 34094 }, { "epoch": 0.623229202844243, "grad_norm": 6.069835454626514, "learning_rate": 3.2832502738953955e-06, "loss": 17.5535, "step": 34095 }, { "epoch": 0.6232474820406895, "grad_norm": 5.404943250962247, "learning_rate": 3.2829722606337987e-06, "loss": 16.9658, "step": 34096 }, { "epoch": 0.6232657612371361, "grad_norm": 6.024620044038344, "learning_rate": 3.282694253390233e-06, "loss": 17.2303, "step": 34097 }, { "epoch": 0.6232840404335825, "grad_norm": 5.801287212793153, "learning_rate": 3.2824162521656765e-06, "loss": 17.282, "step": 34098 }, { "epoch": 0.623302319630029, "grad_norm": 6.107504813662039, "learning_rate": 3.2821382569611026e-06, "loss": 17.5838, "step": 34099 }, { "epoch": 0.6233205988264756, "grad_norm": 6.443337922065629, "learning_rate": 3.281860267777483e-06, "loss": 17.443, "step": 34100 }, { "epoch": 0.6233388780229221, "grad_norm": 6.046862455707169, "learning_rate": 3.2815822846157965e-06, "loss": 17.4239, "step": 34101 }, { "epoch": 0.6233571572193687, "grad_norm": 5.074829131607065, "learning_rate": 3.2813043074770133e-06, "loss": 16.7898, "step": 34102 }, { "epoch": 0.6233754364158152, "grad_norm": 8.106956705020325, "learning_rate": 3.2810263363621085e-06, "loss": 17.8552, "step": 34103 }, { "epoch": 0.6233937156122616, "grad_norm": 6.690324324768691, "learning_rate": 3.280748371272059e-06, "loss": 17.7192, "step": 34104 }, { "epoch": 0.6234119948087082, "grad_norm": 6.44465451822654, "learning_rate": 3.2804704122078377e-06, "loss": 17.3818, "step": 34105 }, { "epoch": 0.6234302740051547, "grad_norm": 5.830778473724139, "learning_rate": 3.2801924591704165e-06, "loss": 17.2257, "step": 34106 }, { "epoch": 0.6234485532016013, "grad_norm": 7.156264521544799, "learning_rate": 3.279914512160772e-06, "loss": 17.7311, "step": 34107 }, { "epoch": 0.6234668323980478, "grad_norm": 6.242838939627973, "learning_rate": 3.2796365711798794e-06, "loss": 17.3084, "step": 34108 }, { "epoch": 0.6234851115944943, "grad_norm": 8.13355782933893, "learning_rate": 3.2793586362287093e-06, "loss": 18.287, "step": 34109 }, { "epoch": 0.6235033907909409, "grad_norm": 4.7099852435464316, "learning_rate": 3.2790807073082396e-06, "loss": 16.806, "step": 34110 }, { "epoch": 0.6235216699873873, "grad_norm": 6.592851680036207, "learning_rate": 3.2788027844194408e-06, "loss": 17.6177, "step": 34111 }, { "epoch": 0.6235399491838339, "grad_norm": 5.706016441179076, "learning_rate": 3.2785248675632898e-06, "loss": 17.3245, "step": 34112 }, { "epoch": 0.6235582283802804, "grad_norm": 5.702349288901408, "learning_rate": 3.278246956740761e-06, "loss": 17.2339, "step": 34113 }, { "epoch": 0.6235765075767269, "grad_norm": 6.191206706138015, "learning_rate": 3.277969051952825e-06, "loss": 17.4659, "step": 34114 }, { "epoch": 0.6235947867731735, "grad_norm": 4.823948892336293, "learning_rate": 3.27769115320046e-06, "loss": 16.9261, "step": 34115 }, { "epoch": 0.62361306596962, "grad_norm": 7.624473408913838, "learning_rate": 3.2774132604846375e-06, "loss": 17.8292, "step": 34116 }, { "epoch": 0.6236313451660666, "grad_norm": 6.780346181981522, "learning_rate": 3.2771353738063307e-06, "loss": 17.608, "step": 34117 }, { "epoch": 0.623649624362513, "grad_norm": 5.82658307717418, "learning_rate": 3.2768574931665163e-06, "loss": 17.7515, "step": 34118 }, { "epoch": 0.6236679035589595, "grad_norm": 5.046052881458229, "learning_rate": 3.276579618566167e-06, "loss": 16.9127, "step": 34119 }, { "epoch": 0.6236861827554061, "grad_norm": 6.369899358211235, "learning_rate": 3.2763017500062554e-06, "loss": 17.4101, "step": 34120 }, { "epoch": 0.6237044619518526, "grad_norm": 6.060202316359273, "learning_rate": 3.2760238874877576e-06, "loss": 17.4023, "step": 34121 }, { "epoch": 0.6237227411482992, "grad_norm": 6.976659312262324, "learning_rate": 3.275746031011647e-06, "loss": 17.8163, "step": 34122 }, { "epoch": 0.6237410203447457, "grad_norm": 5.7351038286821545, "learning_rate": 3.2754681805788954e-06, "loss": 17.1892, "step": 34123 }, { "epoch": 0.6237592995411921, "grad_norm": 5.565453034004487, "learning_rate": 3.2751903361904787e-06, "loss": 17.0879, "step": 34124 }, { "epoch": 0.6237775787376387, "grad_norm": 6.5467385116039605, "learning_rate": 3.274912497847371e-06, "loss": 17.8793, "step": 34125 }, { "epoch": 0.6237958579340852, "grad_norm": 6.124358787472452, "learning_rate": 3.2746346655505433e-06, "loss": 17.6104, "step": 34126 }, { "epoch": 0.6238141371305317, "grad_norm": 4.502735156848285, "learning_rate": 3.274356839300974e-06, "loss": 16.7011, "step": 34127 }, { "epoch": 0.6238324163269783, "grad_norm": 5.878254334778769, "learning_rate": 3.2740790190996318e-06, "loss": 17.4093, "step": 34128 }, { "epoch": 0.6238506955234248, "grad_norm": 5.798827611999494, "learning_rate": 3.2738012049474945e-06, "loss": 17.1889, "step": 34129 }, { "epoch": 0.6238689747198713, "grad_norm": 6.655932594453009, "learning_rate": 3.2735233968455344e-06, "loss": 17.6142, "step": 34130 }, { "epoch": 0.6238872539163178, "grad_norm": 6.439909564054818, "learning_rate": 3.273245594794723e-06, "loss": 17.4667, "step": 34131 }, { "epoch": 0.6239055331127643, "grad_norm": 9.644580764308555, "learning_rate": 3.2729677987960383e-06, "loss": 17.1602, "step": 34132 }, { "epoch": 0.6239238123092109, "grad_norm": 6.770679010724278, "learning_rate": 3.272690008850451e-06, "loss": 17.8636, "step": 34133 }, { "epoch": 0.6239420915056574, "grad_norm": 6.749250232466914, "learning_rate": 3.272412224958933e-06, "loss": 17.5796, "step": 34134 }, { "epoch": 0.623960370702104, "grad_norm": 5.535688649003531, "learning_rate": 3.2721344471224637e-06, "loss": 17.0786, "step": 34135 }, { "epoch": 0.6239786498985505, "grad_norm": 5.4024991215793525, "learning_rate": 3.271856675342012e-06, "loss": 17.1661, "step": 34136 }, { "epoch": 0.6239969290949969, "grad_norm": 7.293669566279255, "learning_rate": 3.271578909618551e-06, "loss": 17.6671, "step": 34137 }, { "epoch": 0.6240152082914435, "grad_norm": 7.928753247815168, "learning_rate": 3.271301149953058e-06, "loss": 18.0151, "step": 34138 }, { "epoch": 0.62403348748789, "grad_norm": 5.881592325539662, "learning_rate": 3.2710233963465045e-06, "loss": 17.1959, "step": 34139 }, { "epoch": 0.6240517666843366, "grad_norm": 5.816066302262124, "learning_rate": 3.2707456487998624e-06, "loss": 17.2034, "step": 34140 }, { "epoch": 0.6240700458807831, "grad_norm": 5.43474744861665, "learning_rate": 3.270467907314108e-06, "loss": 17.1377, "step": 34141 }, { "epoch": 0.6240883250772296, "grad_norm": 6.201394101249695, "learning_rate": 3.270190171890212e-06, "loss": 17.5163, "step": 34142 }, { "epoch": 0.6241066042736761, "grad_norm": 6.107234890050991, "learning_rate": 3.2699124425291506e-06, "loss": 17.2562, "step": 34143 }, { "epoch": 0.6241248834701226, "grad_norm": 7.38242293708736, "learning_rate": 3.2696347192318967e-06, "loss": 17.9098, "step": 34144 }, { "epoch": 0.6241431626665692, "grad_norm": 4.8973234857625325, "learning_rate": 3.2693570019994207e-06, "loss": 16.9437, "step": 34145 }, { "epoch": 0.6241614418630157, "grad_norm": 5.613655118276298, "learning_rate": 3.2690792908327005e-06, "loss": 17.5071, "step": 34146 }, { "epoch": 0.6241797210594622, "grad_norm": 5.990797239434125, "learning_rate": 3.268801585732706e-06, "loss": 17.1468, "step": 34147 }, { "epoch": 0.6241980002559088, "grad_norm": 6.416839008006602, "learning_rate": 3.268523886700411e-06, "loss": 17.3688, "step": 34148 }, { "epoch": 0.6242162794523552, "grad_norm": 6.557937447231616, "learning_rate": 3.2682461937367917e-06, "loss": 17.5845, "step": 34149 }, { "epoch": 0.6242345586488018, "grad_norm": 5.4617234735755575, "learning_rate": 3.267968506842819e-06, "loss": 17.2141, "step": 34150 }, { "epoch": 0.6242528378452483, "grad_norm": 7.21587998912236, "learning_rate": 3.267690826019464e-06, "loss": 17.7022, "step": 34151 }, { "epoch": 0.6242711170416948, "grad_norm": 6.97086284135558, "learning_rate": 3.2674131512677037e-06, "loss": 17.6338, "step": 34152 }, { "epoch": 0.6242893962381414, "grad_norm": 4.831270831003682, "learning_rate": 3.267135482588511e-06, "loss": 16.9764, "step": 34153 }, { "epoch": 0.6243076754345879, "grad_norm": 5.597458984393001, "learning_rate": 3.266857819982856e-06, "loss": 17.1622, "step": 34154 }, { "epoch": 0.6243259546310345, "grad_norm": 5.0133242174911965, "learning_rate": 3.266580163451716e-06, "loss": 16.7408, "step": 34155 }, { "epoch": 0.6243442338274809, "grad_norm": 5.6450309107173515, "learning_rate": 3.2663025129960605e-06, "loss": 17.3291, "step": 34156 }, { "epoch": 0.6243625130239274, "grad_norm": 5.723477692492851, "learning_rate": 3.2660248686168638e-06, "loss": 17.1198, "step": 34157 }, { "epoch": 0.624380792220374, "grad_norm": 5.673557617676206, "learning_rate": 3.2657472303151017e-06, "loss": 17.1941, "step": 34158 }, { "epoch": 0.6243990714168205, "grad_norm": 5.707477130504569, "learning_rate": 3.265469598091743e-06, "loss": 17.0652, "step": 34159 }, { "epoch": 0.6244173506132671, "grad_norm": 5.178809328929178, "learning_rate": 3.265191971947764e-06, "loss": 17.0005, "step": 34160 }, { "epoch": 0.6244356298097136, "grad_norm": 6.947014799575767, "learning_rate": 3.2649143518841363e-06, "loss": 17.7841, "step": 34161 }, { "epoch": 0.62445390900616, "grad_norm": 5.399238504468079, "learning_rate": 3.2646367379018324e-06, "loss": 17.2281, "step": 34162 }, { "epoch": 0.6244721882026066, "grad_norm": 6.330484331942771, "learning_rate": 3.264359130001828e-06, "loss": 17.3945, "step": 34163 }, { "epoch": 0.6244904673990531, "grad_norm": 7.070048647100234, "learning_rate": 3.264081528185094e-06, "loss": 17.6407, "step": 34164 }, { "epoch": 0.6245087465954997, "grad_norm": 5.627750517382311, "learning_rate": 3.2638039324526017e-06, "loss": 17.205, "step": 34165 }, { "epoch": 0.6245270257919462, "grad_norm": 8.291495810449637, "learning_rate": 3.263526342805327e-06, "loss": 17.891, "step": 34166 }, { "epoch": 0.6245453049883927, "grad_norm": 5.097839398064565, "learning_rate": 3.263248759244243e-06, "loss": 17.0762, "step": 34167 }, { "epoch": 0.6245635841848393, "grad_norm": 6.976287827117648, "learning_rate": 3.262971181770319e-06, "loss": 17.7122, "step": 34168 }, { "epoch": 0.6245818633812857, "grad_norm": 5.679710166570921, "learning_rate": 3.2626936103845327e-06, "loss": 17.1195, "step": 34169 }, { "epoch": 0.6246001425777323, "grad_norm": 6.559760136574378, "learning_rate": 3.262416045087853e-06, "loss": 17.3979, "step": 34170 }, { "epoch": 0.6246184217741788, "grad_norm": 6.676425102803483, "learning_rate": 3.2621384858812533e-06, "loss": 17.4934, "step": 34171 }, { "epoch": 0.6246367009706253, "grad_norm": 5.061755834499155, "learning_rate": 3.2618609327657104e-06, "loss": 17.0422, "step": 34172 }, { "epoch": 0.6246549801670719, "grad_norm": 7.589437829604892, "learning_rate": 3.2615833857421906e-06, "loss": 17.7804, "step": 34173 }, { "epoch": 0.6246732593635184, "grad_norm": 6.736515590293594, "learning_rate": 3.2613058448116736e-06, "loss": 17.9559, "step": 34174 }, { "epoch": 0.624691538559965, "grad_norm": 5.039828572088119, "learning_rate": 3.261028309975127e-06, "loss": 17.1152, "step": 34175 }, { "epoch": 0.6247098177564114, "grad_norm": 9.042204326605084, "learning_rate": 3.2607507812335243e-06, "loss": 17.9844, "step": 34176 }, { "epoch": 0.6247280969528579, "grad_norm": 7.286268305363077, "learning_rate": 3.2604732585878414e-06, "loss": 17.8397, "step": 34177 }, { "epoch": 0.6247463761493045, "grad_norm": 6.130724636724018, "learning_rate": 3.2601957420390484e-06, "loss": 17.417, "step": 34178 }, { "epoch": 0.624764655345751, "grad_norm": 6.475544016228392, "learning_rate": 3.2599182315881167e-06, "loss": 17.6761, "step": 34179 }, { "epoch": 0.6247829345421976, "grad_norm": 6.648526187906029, "learning_rate": 3.2596407272360216e-06, "loss": 17.6335, "step": 34180 }, { "epoch": 0.6248012137386441, "grad_norm": 6.42484244250097, "learning_rate": 3.2593632289837353e-06, "loss": 17.378, "step": 34181 }, { "epoch": 0.6248194929350905, "grad_norm": 6.690379205091496, "learning_rate": 3.2590857368322283e-06, "loss": 17.5215, "step": 34182 }, { "epoch": 0.6248377721315371, "grad_norm": 7.235771486591239, "learning_rate": 3.258808250782476e-06, "loss": 17.608, "step": 34183 }, { "epoch": 0.6248560513279836, "grad_norm": 8.387010760160427, "learning_rate": 3.2585307708354496e-06, "loss": 17.9836, "step": 34184 }, { "epoch": 0.6248743305244302, "grad_norm": 5.784542270366943, "learning_rate": 3.258253296992121e-06, "loss": 17.2658, "step": 34185 }, { "epoch": 0.6248926097208767, "grad_norm": 7.861559773280175, "learning_rate": 3.2579758292534647e-06, "loss": 17.8316, "step": 34186 }, { "epoch": 0.6249108889173232, "grad_norm": 5.404201407016526, "learning_rate": 3.257698367620451e-06, "loss": 17.2295, "step": 34187 }, { "epoch": 0.6249291681137698, "grad_norm": 6.508677554156701, "learning_rate": 3.2574209120940524e-06, "loss": 17.3994, "step": 34188 }, { "epoch": 0.6249474473102162, "grad_norm": 5.313267457741655, "learning_rate": 3.257143462675244e-06, "loss": 16.9374, "step": 34189 }, { "epoch": 0.6249657265066628, "grad_norm": 7.029996034079211, "learning_rate": 3.2568660193649947e-06, "loss": 17.7414, "step": 34190 }, { "epoch": 0.6249840057031093, "grad_norm": 8.292261822235039, "learning_rate": 3.2565885821642814e-06, "loss": 18.4237, "step": 34191 }, { "epoch": 0.6250022848995558, "grad_norm": 5.970654569543478, "learning_rate": 3.2563111510740717e-06, "loss": 17.3399, "step": 34192 }, { "epoch": 0.6250205640960024, "grad_norm": 10.313289678611127, "learning_rate": 3.25603372609534e-06, "loss": 17.819, "step": 34193 }, { "epoch": 0.6250388432924489, "grad_norm": 7.004803324837785, "learning_rate": 3.255756307229061e-06, "loss": 17.7049, "step": 34194 }, { "epoch": 0.6250571224888953, "grad_norm": 6.312900779537956, "learning_rate": 3.255478894476204e-06, "loss": 17.4099, "step": 34195 }, { "epoch": 0.6250754016853419, "grad_norm": 4.4490468742300235, "learning_rate": 3.2552014878377403e-06, "loss": 16.6674, "step": 34196 }, { "epoch": 0.6250936808817884, "grad_norm": 5.583121692028698, "learning_rate": 3.254924087314645e-06, "loss": 17.0297, "step": 34197 }, { "epoch": 0.625111960078235, "grad_norm": 7.992121841556056, "learning_rate": 3.2546466929078903e-06, "loss": 17.7147, "step": 34198 }, { "epoch": 0.6251302392746815, "grad_norm": 5.089163774017933, "learning_rate": 3.2543693046184456e-06, "loss": 16.9447, "step": 34199 }, { "epoch": 0.625148518471128, "grad_norm": 7.4796600871950565, "learning_rate": 3.254091922447287e-06, "loss": 17.7029, "step": 34200 }, { "epoch": 0.6251667976675745, "grad_norm": 6.4875387666474476, "learning_rate": 3.253814546395384e-06, "loss": 17.3221, "step": 34201 }, { "epoch": 0.625185076864021, "grad_norm": 10.310206809543654, "learning_rate": 3.2535371764637082e-06, "loss": 19.097, "step": 34202 }, { "epoch": 0.6252033560604676, "grad_norm": 5.402086037546671, "learning_rate": 3.2532598126532345e-06, "loss": 16.9155, "step": 34203 }, { "epoch": 0.6252216352569141, "grad_norm": 8.303911431468897, "learning_rate": 3.2529824549649324e-06, "loss": 17.9099, "step": 34204 }, { "epoch": 0.6252399144533606, "grad_norm": 8.459874330280721, "learning_rate": 3.252705103399777e-06, "loss": 18.2159, "step": 34205 }, { "epoch": 0.6252581936498072, "grad_norm": 6.311111606486774, "learning_rate": 3.2524277579587373e-06, "loss": 17.3031, "step": 34206 }, { "epoch": 0.6252764728462537, "grad_norm": 6.81715567206256, "learning_rate": 3.2521504186427857e-06, "loss": 17.3656, "step": 34207 }, { "epoch": 0.6252947520427002, "grad_norm": 6.380791453171522, "learning_rate": 3.2518730854528978e-06, "loss": 17.5967, "step": 34208 }, { "epoch": 0.6253130312391467, "grad_norm": 6.10187254629033, "learning_rate": 3.2515957583900426e-06, "loss": 17.2997, "step": 34209 }, { "epoch": 0.6253313104355932, "grad_norm": 8.317497891722644, "learning_rate": 3.2513184374551904e-06, "loss": 17.6948, "step": 34210 }, { "epoch": 0.6253495896320398, "grad_norm": 7.533716922430719, "learning_rate": 3.2510411226493166e-06, "loss": 17.5412, "step": 34211 }, { "epoch": 0.6253678688284863, "grad_norm": 8.857229098067757, "learning_rate": 3.2507638139733927e-06, "loss": 18.0707, "step": 34212 }, { "epoch": 0.6253861480249329, "grad_norm": 6.946943801662645, "learning_rate": 3.2504865114283877e-06, "loss": 17.4831, "step": 34213 }, { "epoch": 0.6254044272213793, "grad_norm": 5.154867829103256, "learning_rate": 3.2502092150152774e-06, "loss": 17.1881, "step": 34214 }, { "epoch": 0.6254227064178258, "grad_norm": 6.641058574923566, "learning_rate": 3.249931924735032e-06, "loss": 17.4974, "step": 34215 }, { "epoch": 0.6254409856142724, "grad_norm": 6.245794370491314, "learning_rate": 3.249654640588622e-06, "loss": 17.5111, "step": 34216 }, { "epoch": 0.6254592648107189, "grad_norm": 7.367636930821984, "learning_rate": 3.249377362577022e-06, "loss": 18.0377, "step": 34217 }, { "epoch": 0.6254775440071655, "grad_norm": 7.535604430137384, "learning_rate": 3.2491000907012027e-06, "loss": 17.9271, "step": 34218 }, { "epoch": 0.625495823203612, "grad_norm": 6.374457814726946, "learning_rate": 3.248822824962134e-06, "loss": 17.573, "step": 34219 }, { "epoch": 0.6255141024000584, "grad_norm": 5.864639610801997, "learning_rate": 3.24854556536079e-06, "loss": 17.0732, "step": 34220 }, { "epoch": 0.625532381596505, "grad_norm": 8.404777860640047, "learning_rate": 3.2482683118981406e-06, "loss": 18.2473, "step": 34221 }, { "epoch": 0.6255506607929515, "grad_norm": 6.036702185903536, "learning_rate": 3.247991064575161e-06, "loss": 17.3074, "step": 34222 }, { "epoch": 0.6255689399893981, "grad_norm": 6.087552311105994, "learning_rate": 3.2477138233928214e-06, "loss": 17.3519, "step": 34223 }, { "epoch": 0.6255872191858446, "grad_norm": 6.287910018266087, "learning_rate": 3.2474365883520898e-06, "loss": 17.2477, "step": 34224 }, { "epoch": 0.6256054983822911, "grad_norm": 5.91335084428719, "learning_rate": 3.247159359453942e-06, "loss": 17.2408, "step": 34225 }, { "epoch": 0.6256237775787377, "grad_norm": 7.217304545894405, "learning_rate": 3.2468821366993496e-06, "loss": 17.8748, "step": 34226 }, { "epoch": 0.6256420567751841, "grad_norm": 6.239035455837637, "learning_rate": 3.246604920089282e-06, "loss": 17.3714, "step": 34227 }, { "epoch": 0.6256603359716307, "grad_norm": 7.733807722155354, "learning_rate": 3.2463277096247132e-06, "loss": 17.9041, "step": 34228 }, { "epoch": 0.6256786151680772, "grad_norm": 6.314803800946207, "learning_rate": 3.246050505306613e-06, "loss": 17.5449, "step": 34229 }, { "epoch": 0.6256968943645237, "grad_norm": 7.479357876340739, "learning_rate": 3.245773307135952e-06, "loss": 17.8904, "step": 34230 }, { "epoch": 0.6257151735609703, "grad_norm": 6.863800869496435, "learning_rate": 3.2454961151137054e-06, "loss": 17.676, "step": 34231 }, { "epoch": 0.6257334527574168, "grad_norm": 5.586001998864001, "learning_rate": 3.245218929240843e-06, "loss": 17.2125, "step": 34232 }, { "epoch": 0.6257517319538634, "grad_norm": 7.254204294553945, "learning_rate": 3.2449417495183344e-06, "loss": 17.5468, "step": 34233 }, { "epoch": 0.6257700111503098, "grad_norm": 5.477139131879904, "learning_rate": 3.244664575947153e-06, "loss": 17.1455, "step": 34234 }, { "epoch": 0.6257882903467563, "grad_norm": 6.239033818138475, "learning_rate": 3.244387408528269e-06, "loss": 17.4756, "step": 34235 }, { "epoch": 0.6258065695432029, "grad_norm": 6.7365748168673205, "learning_rate": 3.244110247262657e-06, "loss": 17.3354, "step": 34236 }, { "epoch": 0.6258248487396494, "grad_norm": 5.067142655000609, "learning_rate": 3.243833092151286e-06, "loss": 16.8402, "step": 34237 }, { "epoch": 0.625843127936096, "grad_norm": 5.5918951751780925, "learning_rate": 3.2435559431951256e-06, "loss": 17.3416, "step": 34238 }, { "epoch": 0.6258614071325425, "grad_norm": 5.495552830316138, "learning_rate": 3.2432788003951503e-06, "loss": 17.2107, "step": 34239 }, { "epoch": 0.6258796863289889, "grad_norm": 6.8896207209960805, "learning_rate": 3.2430016637523316e-06, "loss": 17.5988, "step": 34240 }, { "epoch": 0.6258979655254355, "grad_norm": 7.399211030136929, "learning_rate": 3.2427245332676373e-06, "loss": 17.578, "step": 34241 }, { "epoch": 0.625916244721882, "grad_norm": 5.925135372228785, "learning_rate": 3.242447408942043e-06, "loss": 17.3248, "step": 34242 }, { "epoch": 0.6259345239183286, "grad_norm": 6.467408493377141, "learning_rate": 3.2421702907765175e-06, "loss": 17.5557, "step": 34243 }, { "epoch": 0.6259528031147751, "grad_norm": 6.967347469545597, "learning_rate": 3.241893178772031e-06, "loss": 17.5606, "step": 34244 }, { "epoch": 0.6259710823112216, "grad_norm": 5.8690705386734985, "learning_rate": 3.241616072929559e-06, "loss": 17.2763, "step": 34245 }, { "epoch": 0.6259893615076682, "grad_norm": 6.323910255139216, "learning_rate": 3.2413389732500683e-06, "loss": 17.5642, "step": 34246 }, { "epoch": 0.6260076407041146, "grad_norm": 6.6082759635659825, "learning_rate": 3.2410618797345322e-06, "loss": 17.7913, "step": 34247 }, { "epoch": 0.6260259199005612, "grad_norm": 6.73001476453901, "learning_rate": 3.2407847923839218e-06, "loss": 17.6107, "step": 34248 }, { "epoch": 0.6260441990970077, "grad_norm": 8.677443098495008, "learning_rate": 3.240507711199209e-06, "loss": 17.9699, "step": 34249 }, { "epoch": 0.6260624782934542, "grad_norm": 5.94074220355639, "learning_rate": 3.2402306361813617e-06, "loss": 17.2999, "step": 34250 }, { "epoch": 0.6260807574899008, "grad_norm": 6.3916800780198955, "learning_rate": 3.2399535673313543e-06, "loss": 17.442, "step": 34251 }, { "epoch": 0.6260990366863473, "grad_norm": 6.245212873959326, "learning_rate": 3.239676504650157e-06, "loss": 17.3924, "step": 34252 }, { "epoch": 0.6261173158827938, "grad_norm": 5.957209248984848, "learning_rate": 3.2393994481387414e-06, "loss": 17.3703, "step": 34253 }, { "epoch": 0.6261355950792403, "grad_norm": 5.625152186408997, "learning_rate": 3.239122397798078e-06, "loss": 17.1512, "step": 34254 }, { "epoch": 0.6261538742756868, "grad_norm": 5.846186680532889, "learning_rate": 3.2388453536291366e-06, "loss": 17.33, "step": 34255 }, { "epoch": 0.6261721534721334, "grad_norm": 6.110504382498164, "learning_rate": 3.2385683156328896e-06, "loss": 17.1602, "step": 34256 }, { "epoch": 0.6261904326685799, "grad_norm": 5.213190349635479, "learning_rate": 3.2382912838103096e-06, "loss": 16.8442, "step": 34257 }, { "epoch": 0.6262087118650265, "grad_norm": 5.8432422555367705, "learning_rate": 3.238014258162363e-06, "loss": 17.2528, "step": 34258 }, { "epoch": 0.626226991061473, "grad_norm": 6.548370165773771, "learning_rate": 3.237737238690026e-06, "loss": 17.805, "step": 34259 }, { "epoch": 0.6262452702579194, "grad_norm": 6.961790398983891, "learning_rate": 3.2374602253942667e-06, "loss": 17.6265, "step": 34260 }, { "epoch": 0.626263549454366, "grad_norm": 8.298540066823154, "learning_rate": 3.2371832182760544e-06, "loss": 17.743, "step": 34261 }, { "epoch": 0.6262818286508125, "grad_norm": 6.071720442408745, "learning_rate": 3.2369062173363643e-06, "loss": 17.1368, "step": 34262 }, { "epoch": 0.626300107847259, "grad_norm": 6.140262162471929, "learning_rate": 3.236629222576164e-06, "loss": 17.5688, "step": 34263 }, { "epoch": 0.6263183870437056, "grad_norm": 6.2338509060454586, "learning_rate": 3.236352233996425e-06, "loss": 17.3189, "step": 34264 }, { "epoch": 0.626336666240152, "grad_norm": 6.959272450379554, "learning_rate": 3.2360752515981183e-06, "loss": 17.7662, "step": 34265 }, { "epoch": 0.6263549454365986, "grad_norm": 6.088193222851709, "learning_rate": 3.2357982753822137e-06, "loss": 17.2595, "step": 34266 }, { "epoch": 0.6263732246330451, "grad_norm": 6.819498804581441, "learning_rate": 3.235521305349685e-06, "loss": 17.4836, "step": 34267 }, { "epoch": 0.6263915038294916, "grad_norm": 5.091222139698377, "learning_rate": 3.2352443415015017e-06, "loss": 16.8648, "step": 34268 }, { "epoch": 0.6264097830259382, "grad_norm": 6.3902335700729544, "learning_rate": 3.2349673838386315e-06, "loss": 17.7066, "step": 34269 }, { "epoch": 0.6264280622223847, "grad_norm": 6.992011492779001, "learning_rate": 3.2346904323620487e-06, "loss": 17.8976, "step": 34270 }, { "epoch": 0.6264463414188313, "grad_norm": 6.427340839551413, "learning_rate": 3.2344134870727227e-06, "loss": 17.31, "step": 34271 }, { "epoch": 0.6264646206152777, "grad_norm": 5.208768466855628, "learning_rate": 3.2341365479716237e-06, "loss": 17.0943, "step": 34272 }, { "epoch": 0.6264828998117242, "grad_norm": 5.848713400476829, "learning_rate": 3.233859615059724e-06, "loss": 17.0471, "step": 34273 }, { "epoch": 0.6265011790081708, "grad_norm": 7.77120556735884, "learning_rate": 3.233582688337992e-06, "loss": 17.9287, "step": 34274 }, { "epoch": 0.6265194582046173, "grad_norm": 6.950098432064934, "learning_rate": 3.233305767807399e-06, "loss": 18.079, "step": 34275 }, { "epoch": 0.6265377374010639, "grad_norm": 5.682708147238552, "learning_rate": 3.2330288534689176e-06, "loss": 16.9982, "step": 34276 }, { "epoch": 0.6265560165975104, "grad_norm": 7.310635310505698, "learning_rate": 3.232751945323517e-06, "loss": 17.9004, "step": 34277 }, { "epoch": 0.6265742957939568, "grad_norm": 6.351278249297851, "learning_rate": 3.232475043372165e-06, "loss": 17.3938, "step": 34278 }, { "epoch": 0.6265925749904034, "grad_norm": 5.531273992418713, "learning_rate": 3.232198147615836e-06, "loss": 17.1813, "step": 34279 }, { "epoch": 0.6266108541868499, "grad_norm": 6.017984629397788, "learning_rate": 3.231921258055497e-06, "loss": 17.2114, "step": 34280 }, { "epoch": 0.6266291333832965, "grad_norm": 6.7609947157293595, "learning_rate": 3.2316443746921237e-06, "loss": 18.0565, "step": 34281 }, { "epoch": 0.626647412579743, "grad_norm": 6.894122413583468, "learning_rate": 3.2313674975266836e-06, "loss": 17.4673, "step": 34282 }, { "epoch": 0.6266656917761895, "grad_norm": 6.592992195574721, "learning_rate": 3.2310906265601445e-06, "loss": 17.263, "step": 34283 }, { "epoch": 0.6266839709726361, "grad_norm": 6.199942045677581, "learning_rate": 3.23081376179348e-06, "loss": 17.3779, "step": 34284 }, { "epoch": 0.6267022501690825, "grad_norm": 4.822641308316075, "learning_rate": 3.2305369032276612e-06, "loss": 16.9317, "step": 34285 }, { "epoch": 0.6267205293655291, "grad_norm": 6.224720789020915, "learning_rate": 3.230260050863655e-06, "loss": 17.0714, "step": 34286 }, { "epoch": 0.6267388085619756, "grad_norm": 5.709729524854074, "learning_rate": 3.2299832047024356e-06, "loss": 17.2612, "step": 34287 }, { "epoch": 0.6267570877584221, "grad_norm": 5.106607152417234, "learning_rate": 3.22970636474497e-06, "loss": 17.1017, "step": 34288 }, { "epoch": 0.6267753669548687, "grad_norm": 5.914475586552125, "learning_rate": 3.2294295309922298e-06, "loss": 17.3506, "step": 34289 }, { "epoch": 0.6267936461513152, "grad_norm": 5.381656660122948, "learning_rate": 3.229152703445187e-06, "loss": 17.1731, "step": 34290 }, { "epoch": 0.6268119253477618, "grad_norm": 6.0502272716573495, "learning_rate": 3.2288758821048107e-06, "loss": 17.3347, "step": 34291 }, { "epoch": 0.6268302045442082, "grad_norm": 7.50689860010794, "learning_rate": 3.228599066972068e-06, "loss": 17.5736, "step": 34292 }, { "epoch": 0.6268484837406547, "grad_norm": 8.283390302340392, "learning_rate": 3.2283222580479333e-06, "loss": 18.1894, "step": 34293 }, { "epoch": 0.6268667629371013, "grad_norm": 6.075243359634494, "learning_rate": 3.228045455333376e-06, "loss": 17.0288, "step": 34294 }, { "epoch": 0.6268850421335478, "grad_norm": 6.085271301553158, "learning_rate": 3.2277686588293645e-06, "loss": 17.4035, "step": 34295 }, { "epoch": 0.6269033213299944, "grad_norm": 5.7356870679421394, "learning_rate": 3.2274918685368717e-06, "loss": 17.2814, "step": 34296 }, { "epoch": 0.6269216005264409, "grad_norm": 6.827273791020478, "learning_rate": 3.227215084456864e-06, "loss": 18.0789, "step": 34297 }, { "epoch": 0.6269398797228873, "grad_norm": 7.307004018138722, "learning_rate": 3.2269383065903136e-06, "loss": 17.7377, "step": 34298 }, { "epoch": 0.6269581589193339, "grad_norm": 6.005997267702531, "learning_rate": 3.226661534938193e-06, "loss": 17.5377, "step": 34299 }, { "epoch": 0.6269764381157804, "grad_norm": 8.37161848149897, "learning_rate": 3.226384769501467e-06, "loss": 18.0797, "step": 34300 }, { "epoch": 0.626994717312227, "grad_norm": 8.59515554378264, "learning_rate": 3.2261080102811116e-06, "loss": 18.2023, "step": 34301 }, { "epoch": 0.6270129965086735, "grad_norm": 5.243636915639437, "learning_rate": 3.2258312572780914e-06, "loss": 17.1839, "step": 34302 }, { "epoch": 0.62703127570512, "grad_norm": 6.507785942294587, "learning_rate": 3.2255545104933785e-06, "loss": 17.7378, "step": 34303 }, { "epoch": 0.6270495549015666, "grad_norm": 5.639844862928083, "learning_rate": 3.225277769927945e-06, "loss": 17.1285, "step": 34304 }, { "epoch": 0.627067834098013, "grad_norm": 5.040948195356384, "learning_rate": 3.225001035582757e-06, "loss": 17.0317, "step": 34305 }, { "epoch": 0.6270861132944596, "grad_norm": 6.883419128141068, "learning_rate": 3.224724307458787e-06, "loss": 17.7991, "step": 34306 }, { "epoch": 0.6271043924909061, "grad_norm": 9.01660376551266, "learning_rate": 3.2244475855570043e-06, "loss": 18.4293, "step": 34307 }, { "epoch": 0.6271226716873526, "grad_norm": 5.667428927158903, "learning_rate": 3.2241708698783796e-06, "loss": 17.1895, "step": 34308 }, { "epoch": 0.6271409508837992, "grad_norm": 5.18976115625038, "learning_rate": 3.22389416042388e-06, "loss": 17.1218, "step": 34309 }, { "epoch": 0.6271592300802457, "grad_norm": 5.871390434421918, "learning_rate": 3.223617457194478e-06, "loss": 17.3404, "step": 34310 }, { "epoch": 0.6271775092766922, "grad_norm": 4.6803672385944, "learning_rate": 3.2233407601911433e-06, "loss": 16.9355, "step": 34311 }, { "epoch": 0.6271957884731387, "grad_norm": 6.944651927686361, "learning_rate": 3.2230640694148442e-06, "loss": 17.4312, "step": 34312 }, { "epoch": 0.6272140676695852, "grad_norm": 6.802934167208632, "learning_rate": 3.222787384866553e-06, "loss": 17.645, "step": 34313 }, { "epoch": 0.6272323468660318, "grad_norm": 5.241614084831662, "learning_rate": 3.2225107065472355e-06, "loss": 16.9334, "step": 34314 }, { "epoch": 0.6272506260624783, "grad_norm": 6.334568344694197, "learning_rate": 3.222234034457865e-06, "loss": 17.2618, "step": 34315 }, { "epoch": 0.6272689052589249, "grad_norm": 7.032830967167936, "learning_rate": 3.221957368599411e-06, "loss": 17.685, "step": 34316 }, { "epoch": 0.6272871844553713, "grad_norm": 6.182516067134629, "learning_rate": 3.2216807089728397e-06, "loss": 17.3578, "step": 34317 }, { "epoch": 0.6273054636518178, "grad_norm": 7.661721387235642, "learning_rate": 3.2214040555791255e-06, "loss": 17.7741, "step": 34318 }, { "epoch": 0.6273237428482644, "grad_norm": 4.801267886252723, "learning_rate": 3.2211274084192346e-06, "loss": 17.0037, "step": 34319 }, { "epoch": 0.6273420220447109, "grad_norm": 6.044482735137889, "learning_rate": 3.2208507674941368e-06, "loss": 17.3544, "step": 34320 }, { "epoch": 0.6273603012411575, "grad_norm": 7.024170030386052, "learning_rate": 3.2205741328048044e-06, "loss": 18.0317, "step": 34321 }, { "epoch": 0.627378580437604, "grad_norm": 4.933111080894226, "learning_rate": 3.2202975043522054e-06, "loss": 16.8134, "step": 34322 }, { "epoch": 0.6273968596340505, "grad_norm": 5.314225285437781, "learning_rate": 3.220020882137308e-06, "loss": 17.25, "step": 34323 }, { "epoch": 0.627415138830497, "grad_norm": 7.250262722974237, "learning_rate": 3.219744266161083e-06, "loss": 17.7727, "step": 34324 }, { "epoch": 0.6274334180269435, "grad_norm": 6.555345139718669, "learning_rate": 3.219467656424501e-06, "loss": 17.5628, "step": 34325 }, { "epoch": 0.6274516972233901, "grad_norm": 6.502154800623501, "learning_rate": 3.2191910529285287e-06, "loss": 17.4064, "step": 34326 }, { "epoch": 0.6274699764198366, "grad_norm": 6.8126033193908615, "learning_rate": 3.2189144556741394e-06, "loss": 17.4396, "step": 34327 }, { "epoch": 0.6274882556162831, "grad_norm": 7.280156643289391, "learning_rate": 3.2186378646622973e-06, "loss": 17.9359, "step": 34328 }, { "epoch": 0.6275065348127297, "grad_norm": 6.449082050560373, "learning_rate": 3.2183612798939766e-06, "loss": 17.0167, "step": 34329 }, { "epoch": 0.6275248140091761, "grad_norm": 7.234551681252511, "learning_rate": 3.2180847013701467e-06, "loss": 17.7873, "step": 34330 }, { "epoch": 0.6275430932056226, "grad_norm": 5.394646680956161, "learning_rate": 3.2178081290917723e-06, "loss": 16.8785, "step": 34331 }, { "epoch": 0.6275613724020692, "grad_norm": 6.9218665762519365, "learning_rate": 3.217531563059828e-06, "loss": 17.5021, "step": 34332 }, { "epoch": 0.6275796515985157, "grad_norm": 5.409169701865802, "learning_rate": 3.2172550032752802e-06, "loss": 17.0571, "step": 34333 }, { "epoch": 0.6275979307949623, "grad_norm": 6.320592737578137, "learning_rate": 3.2169784497390976e-06, "loss": 17.3869, "step": 34334 }, { "epoch": 0.6276162099914088, "grad_norm": 6.4519251582225, "learning_rate": 3.216701902452253e-06, "loss": 17.5802, "step": 34335 }, { "epoch": 0.6276344891878552, "grad_norm": 7.293119716519963, "learning_rate": 3.216425361415713e-06, "loss": 17.897, "step": 34336 }, { "epoch": 0.6276527683843018, "grad_norm": 7.4162705130089694, "learning_rate": 3.2161488266304457e-06, "loss": 18.0002, "step": 34337 }, { "epoch": 0.6276710475807483, "grad_norm": 7.166568682021939, "learning_rate": 3.2158722980974233e-06, "loss": 17.3955, "step": 34338 }, { "epoch": 0.6276893267771949, "grad_norm": 4.467985917011466, "learning_rate": 3.2155957758176144e-06, "loss": 16.7334, "step": 34339 }, { "epoch": 0.6277076059736414, "grad_norm": 5.889623356919209, "learning_rate": 3.215319259791985e-06, "loss": 17.5131, "step": 34340 }, { "epoch": 0.6277258851700879, "grad_norm": 6.810034870081956, "learning_rate": 3.21504275002151e-06, "loss": 17.704, "step": 34341 }, { "epoch": 0.6277441643665345, "grad_norm": 6.027676854406301, "learning_rate": 3.2147662465071515e-06, "loss": 17.6526, "step": 34342 }, { "epoch": 0.6277624435629809, "grad_norm": 7.501111525730156, "learning_rate": 3.214489749249885e-06, "loss": 17.6021, "step": 34343 }, { "epoch": 0.6277807227594275, "grad_norm": 6.364213424309915, "learning_rate": 3.214213258250677e-06, "loss": 17.4535, "step": 34344 }, { "epoch": 0.627799001955874, "grad_norm": 6.522128552064134, "learning_rate": 3.2139367735104946e-06, "loss": 17.7666, "step": 34345 }, { "epoch": 0.6278172811523205, "grad_norm": 5.58273914505607, "learning_rate": 3.2136602950303114e-06, "loss": 16.9824, "step": 34346 }, { "epoch": 0.6278355603487671, "grad_norm": 7.3437562074239775, "learning_rate": 3.2133838228110925e-06, "loss": 17.6203, "step": 34347 }, { "epoch": 0.6278538395452136, "grad_norm": 5.774095706785924, "learning_rate": 3.2131073568538073e-06, "loss": 17.1192, "step": 34348 }, { "epoch": 0.6278721187416602, "grad_norm": 6.039845192556594, "learning_rate": 3.212830897159428e-06, "loss": 17.2838, "step": 34349 }, { "epoch": 0.6278903979381066, "grad_norm": 6.076492927154372, "learning_rate": 3.2125544437289214e-06, "loss": 17.4169, "step": 34350 }, { "epoch": 0.6279086771345531, "grad_norm": 5.662354385797208, "learning_rate": 3.212277996563253e-06, "loss": 17.2805, "step": 34351 }, { "epoch": 0.6279269563309997, "grad_norm": 5.55157256628336, "learning_rate": 3.2120015556633976e-06, "loss": 17.3516, "step": 34352 }, { "epoch": 0.6279452355274462, "grad_norm": 4.826511995972474, "learning_rate": 3.2117251210303226e-06, "loss": 16.9392, "step": 34353 }, { "epoch": 0.6279635147238928, "grad_norm": 5.058850105376662, "learning_rate": 3.2114486926649927e-06, "loss": 17.024, "step": 34354 }, { "epoch": 0.6279817939203393, "grad_norm": 5.919888371109195, "learning_rate": 3.2111722705683835e-06, "loss": 17.2745, "step": 34355 }, { "epoch": 0.6280000731167857, "grad_norm": 7.17048634109056, "learning_rate": 3.210895854741458e-06, "loss": 17.4089, "step": 34356 }, { "epoch": 0.6280183523132323, "grad_norm": 6.226948610100214, "learning_rate": 3.2106194451851867e-06, "loss": 17.1873, "step": 34357 }, { "epoch": 0.6280366315096788, "grad_norm": 6.547060384908078, "learning_rate": 3.210343041900541e-06, "loss": 17.5794, "step": 34358 }, { "epoch": 0.6280549107061254, "grad_norm": 5.3265538610707175, "learning_rate": 3.2100666448884856e-06, "loss": 17.116, "step": 34359 }, { "epoch": 0.6280731899025719, "grad_norm": 6.720392225505673, "learning_rate": 3.209790254149993e-06, "loss": 17.3401, "step": 34360 }, { "epoch": 0.6280914690990184, "grad_norm": 7.182089331041762, "learning_rate": 3.2095138696860295e-06, "loss": 17.6298, "step": 34361 }, { "epoch": 0.628109748295465, "grad_norm": 5.793237375247929, "learning_rate": 3.209237491497563e-06, "loss": 17.5114, "step": 34362 }, { "epoch": 0.6281280274919114, "grad_norm": 5.411894677603194, "learning_rate": 3.2089611195855668e-06, "loss": 17.1321, "step": 34363 }, { "epoch": 0.628146306688358, "grad_norm": 8.029487810022534, "learning_rate": 3.2086847539510058e-06, "loss": 17.8732, "step": 34364 }, { "epoch": 0.6281645858848045, "grad_norm": 8.052937981217859, "learning_rate": 3.2084083945948473e-06, "loss": 18.0983, "step": 34365 }, { "epoch": 0.628182865081251, "grad_norm": 8.123679116058481, "learning_rate": 3.2081320415180626e-06, "loss": 18.0636, "step": 34366 }, { "epoch": 0.6282011442776976, "grad_norm": 7.676013654294283, "learning_rate": 3.2078556947216215e-06, "loss": 18.2414, "step": 34367 }, { "epoch": 0.6282194234741441, "grad_norm": 6.235278957743992, "learning_rate": 3.2075793542064883e-06, "loss": 17.3907, "step": 34368 }, { "epoch": 0.6282377026705906, "grad_norm": 8.519630617738093, "learning_rate": 3.2073030199736354e-06, "loss": 18.7955, "step": 34369 }, { "epoch": 0.6282559818670371, "grad_norm": 5.908416959463741, "learning_rate": 3.2070266920240297e-06, "loss": 17.3809, "step": 34370 }, { "epoch": 0.6282742610634836, "grad_norm": 6.715107331353068, "learning_rate": 3.2067503703586377e-06, "loss": 17.6446, "step": 34371 }, { "epoch": 0.6282925402599302, "grad_norm": 5.614116505053403, "learning_rate": 3.2064740549784334e-06, "loss": 17.05, "step": 34372 }, { "epoch": 0.6283108194563767, "grad_norm": 5.333153168516293, "learning_rate": 3.2061977458843783e-06, "loss": 17.0393, "step": 34373 }, { "epoch": 0.6283290986528233, "grad_norm": 5.918179625065236, "learning_rate": 3.2059214430774477e-06, "loss": 17.4987, "step": 34374 }, { "epoch": 0.6283473778492698, "grad_norm": 7.254490107475653, "learning_rate": 3.205645146558606e-06, "loss": 17.3386, "step": 34375 }, { "epoch": 0.6283656570457162, "grad_norm": 4.711592958444471, "learning_rate": 3.2053688563288202e-06, "loss": 16.9863, "step": 34376 }, { "epoch": 0.6283839362421628, "grad_norm": 6.887639269986012, "learning_rate": 3.2050925723890636e-06, "loss": 17.583, "step": 34377 }, { "epoch": 0.6284022154386093, "grad_norm": 5.244137316343591, "learning_rate": 3.2048162947403007e-06, "loss": 17.2933, "step": 34378 }, { "epoch": 0.6284204946350559, "grad_norm": 6.990573918413783, "learning_rate": 3.204540023383501e-06, "loss": 17.7091, "step": 34379 }, { "epoch": 0.6284387738315024, "grad_norm": 5.698043129424735, "learning_rate": 3.204263758319632e-06, "loss": 17.3675, "step": 34380 }, { "epoch": 0.6284570530279489, "grad_norm": 6.69876498686514, "learning_rate": 3.2039874995496645e-06, "loss": 17.3375, "step": 34381 }, { "epoch": 0.6284753322243954, "grad_norm": 5.382487892808117, "learning_rate": 3.2037112470745624e-06, "loss": 17.0806, "step": 34382 }, { "epoch": 0.6284936114208419, "grad_norm": 7.075169908452484, "learning_rate": 3.203435000895298e-06, "loss": 17.6284, "step": 34383 }, { "epoch": 0.6285118906172885, "grad_norm": 6.61896842767464, "learning_rate": 3.203158761012839e-06, "loss": 17.6296, "step": 34384 }, { "epoch": 0.628530169813735, "grad_norm": 6.327268035650839, "learning_rate": 3.2028825274281507e-06, "loss": 17.37, "step": 34385 }, { "epoch": 0.6285484490101815, "grad_norm": 6.750028922216347, "learning_rate": 3.2026063001422046e-06, "loss": 17.3911, "step": 34386 }, { "epoch": 0.6285667282066281, "grad_norm": 5.92649376465501, "learning_rate": 3.2023300791559665e-06, "loss": 17.363, "step": 34387 }, { "epoch": 0.6285850074030745, "grad_norm": 7.025902920674043, "learning_rate": 3.2020538644704046e-06, "loss": 17.9273, "step": 34388 }, { "epoch": 0.6286032865995211, "grad_norm": 9.11396111266521, "learning_rate": 3.20177765608649e-06, "loss": 18.1735, "step": 34389 }, { "epoch": 0.6286215657959676, "grad_norm": 6.029508912852304, "learning_rate": 3.201501454005187e-06, "loss": 17.3718, "step": 34390 }, { "epoch": 0.6286398449924141, "grad_norm": 6.446819552582544, "learning_rate": 3.201225258227467e-06, "loss": 17.4796, "step": 34391 }, { "epoch": 0.6286581241888607, "grad_norm": 6.985434821661764, "learning_rate": 3.2009490687542954e-06, "loss": 17.3367, "step": 34392 }, { "epoch": 0.6286764033853072, "grad_norm": 5.540423861160845, "learning_rate": 3.2006728855866397e-06, "loss": 17.1983, "step": 34393 }, { "epoch": 0.6286946825817538, "grad_norm": 6.928116365061109, "learning_rate": 3.200396708725472e-06, "loss": 17.7231, "step": 34394 }, { "epoch": 0.6287129617782002, "grad_norm": 6.994489084927267, "learning_rate": 3.200120538171758e-06, "loss": 18.2681, "step": 34395 }, { "epoch": 0.6287312409746467, "grad_norm": 6.935110778329568, "learning_rate": 3.199844373926463e-06, "loss": 17.2643, "step": 34396 }, { "epoch": 0.6287495201710933, "grad_norm": 6.126394349251746, "learning_rate": 3.1995682159905583e-06, "loss": 17.5861, "step": 34397 }, { "epoch": 0.6287677993675398, "grad_norm": 7.430954697412621, "learning_rate": 3.1992920643650115e-06, "loss": 17.5477, "step": 34398 }, { "epoch": 0.6287860785639863, "grad_norm": 6.379435227977683, "learning_rate": 3.1990159190507873e-06, "loss": 17.5425, "step": 34399 }, { "epoch": 0.6288043577604329, "grad_norm": 6.417169347638765, "learning_rate": 3.198739780048859e-06, "loss": 17.5584, "step": 34400 }, { "epoch": 0.6288226369568793, "grad_norm": 5.909347725331315, "learning_rate": 3.1984636473601905e-06, "loss": 17.4869, "step": 34401 }, { "epoch": 0.6288409161533259, "grad_norm": 7.302938734355003, "learning_rate": 3.1981875209857488e-06, "loss": 17.9646, "step": 34402 }, { "epoch": 0.6288591953497724, "grad_norm": 7.5863361586950635, "learning_rate": 3.1979114009265056e-06, "loss": 18.164, "step": 34403 }, { "epoch": 0.6288774745462189, "grad_norm": 5.296625372015669, "learning_rate": 3.197635287183425e-06, "loss": 17.1391, "step": 34404 }, { "epoch": 0.6288957537426655, "grad_norm": 8.020037659718385, "learning_rate": 3.197359179757478e-06, "loss": 18.3536, "step": 34405 }, { "epoch": 0.628914032939112, "grad_norm": 6.924630323150736, "learning_rate": 3.19708307864963e-06, "loss": 17.5339, "step": 34406 }, { "epoch": 0.6289323121355586, "grad_norm": 4.540963041623063, "learning_rate": 3.1968069838608485e-06, "loss": 16.8321, "step": 34407 }, { "epoch": 0.628950591332005, "grad_norm": 5.644083094887433, "learning_rate": 3.196530895392103e-06, "loss": 17.042, "step": 34408 }, { "epoch": 0.6289688705284515, "grad_norm": 5.909599830190726, "learning_rate": 3.1962548132443615e-06, "loss": 17.3746, "step": 34409 }, { "epoch": 0.6289871497248981, "grad_norm": 5.854052418974503, "learning_rate": 3.1959787374185874e-06, "loss": 17.153, "step": 34410 }, { "epoch": 0.6290054289213446, "grad_norm": 5.000955651303775, "learning_rate": 3.1957026679157525e-06, "loss": 16.9907, "step": 34411 }, { "epoch": 0.6290237081177912, "grad_norm": 7.337166208475092, "learning_rate": 3.1954266047368245e-06, "loss": 17.5427, "step": 34412 }, { "epoch": 0.6290419873142377, "grad_norm": 5.909282810386788, "learning_rate": 3.195150547882767e-06, "loss": 17.1163, "step": 34413 }, { "epoch": 0.6290602665106841, "grad_norm": 8.45022484882724, "learning_rate": 3.194874497354553e-06, "loss": 17.9614, "step": 34414 }, { "epoch": 0.6290785457071307, "grad_norm": 7.4497813156907124, "learning_rate": 3.194598453153146e-06, "loss": 17.6591, "step": 34415 }, { "epoch": 0.6290968249035772, "grad_norm": 5.596889058102549, "learning_rate": 3.194322415279514e-06, "loss": 17.3816, "step": 34416 }, { "epoch": 0.6291151041000238, "grad_norm": 7.568998753706717, "learning_rate": 3.194046383734627e-06, "loss": 17.3436, "step": 34417 }, { "epoch": 0.6291333832964703, "grad_norm": 5.698405468525404, "learning_rate": 3.1937703585194502e-06, "loss": 17.1527, "step": 34418 }, { "epoch": 0.6291516624929168, "grad_norm": 4.486396592240987, "learning_rate": 3.1934943396349506e-06, "loss": 16.6206, "step": 34419 }, { "epoch": 0.6291699416893634, "grad_norm": 6.53910375373419, "learning_rate": 3.1932183270820964e-06, "loss": 17.414, "step": 34420 }, { "epoch": 0.6291882208858098, "grad_norm": 7.5185959848548025, "learning_rate": 3.192942320861855e-06, "loss": 17.9856, "step": 34421 }, { "epoch": 0.6292065000822564, "grad_norm": 7.272811551193702, "learning_rate": 3.192666320975195e-06, "loss": 17.2703, "step": 34422 }, { "epoch": 0.6292247792787029, "grad_norm": 7.037749518876731, "learning_rate": 3.192390327423084e-06, "loss": 17.4793, "step": 34423 }, { "epoch": 0.6292430584751494, "grad_norm": 6.7211021467982945, "learning_rate": 3.1921143402064857e-06, "loss": 17.6526, "step": 34424 }, { "epoch": 0.629261337671596, "grad_norm": 7.411887118413635, "learning_rate": 3.1918383593263703e-06, "loss": 17.917, "step": 34425 }, { "epoch": 0.6292796168680425, "grad_norm": 5.441319114636952, "learning_rate": 3.191562384783706e-06, "loss": 16.9127, "step": 34426 }, { "epoch": 0.629297896064489, "grad_norm": 5.867439427696657, "learning_rate": 3.191286416579456e-06, "loss": 17.3954, "step": 34427 }, { "epoch": 0.6293161752609355, "grad_norm": 7.430768800245757, "learning_rate": 3.1910104547145926e-06, "loss": 17.7947, "step": 34428 }, { "epoch": 0.629334454457382, "grad_norm": 7.5330932320756006, "learning_rate": 3.19073449919008e-06, "loss": 17.8603, "step": 34429 }, { "epoch": 0.6293527336538286, "grad_norm": 6.559382730451666, "learning_rate": 3.1904585500068842e-06, "loss": 17.3616, "step": 34430 }, { "epoch": 0.6293710128502751, "grad_norm": 5.392408548721132, "learning_rate": 3.190182607165976e-06, "loss": 17.2331, "step": 34431 }, { "epoch": 0.6293892920467217, "grad_norm": 6.391688575073387, "learning_rate": 3.18990667066832e-06, "loss": 16.8672, "step": 34432 }, { "epoch": 0.6294075712431682, "grad_norm": 4.44691035520119, "learning_rate": 3.1896307405148847e-06, "loss": 16.7706, "step": 34433 }, { "epoch": 0.6294258504396146, "grad_norm": 7.05076040651293, "learning_rate": 3.189354816706636e-06, "loss": 17.8295, "step": 34434 }, { "epoch": 0.6294441296360612, "grad_norm": 5.209175412256833, "learning_rate": 3.189078899244541e-06, "loss": 16.7888, "step": 34435 }, { "epoch": 0.6294624088325077, "grad_norm": 6.697288278836498, "learning_rate": 3.1888029881295686e-06, "loss": 17.4847, "step": 34436 }, { "epoch": 0.6294806880289543, "grad_norm": 7.380992127796712, "learning_rate": 3.1885270833626836e-06, "loss": 17.7919, "step": 34437 }, { "epoch": 0.6294989672254008, "grad_norm": 5.251169032165884, "learning_rate": 3.1882511849448542e-06, "loss": 16.8304, "step": 34438 }, { "epoch": 0.6295172464218473, "grad_norm": 5.692079637585365, "learning_rate": 3.187975292877048e-06, "loss": 17.2438, "step": 34439 }, { "epoch": 0.6295355256182938, "grad_norm": 6.643939853610305, "learning_rate": 3.1876994071602307e-06, "loss": 17.189, "step": 34440 }, { "epoch": 0.6295538048147403, "grad_norm": 6.8415973625718145, "learning_rate": 3.187423527795369e-06, "loss": 17.9318, "step": 34441 }, { "epoch": 0.6295720840111869, "grad_norm": 5.890039238883687, "learning_rate": 3.1871476547834307e-06, "loss": 17.0992, "step": 34442 }, { "epoch": 0.6295903632076334, "grad_norm": 6.5313977239114696, "learning_rate": 3.1868717881253843e-06, "loss": 17.4091, "step": 34443 }, { "epoch": 0.6296086424040799, "grad_norm": 6.595846145084854, "learning_rate": 3.186595927822192e-06, "loss": 17.4668, "step": 34444 }, { "epoch": 0.6296269216005265, "grad_norm": 5.759206442615922, "learning_rate": 3.1863200738748264e-06, "loss": 17.2908, "step": 34445 }, { "epoch": 0.629645200796973, "grad_norm": 4.996397599909187, "learning_rate": 3.18604422628425e-06, "loss": 16.9415, "step": 34446 }, { "epoch": 0.6296634799934195, "grad_norm": 6.312674609648473, "learning_rate": 3.18576838505143e-06, "loss": 17.537, "step": 34447 }, { "epoch": 0.629681759189866, "grad_norm": 6.267649818585577, "learning_rate": 3.185492550177337e-06, "loss": 17.5675, "step": 34448 }, { "epoch": 0.6297000383863125, "grad_norm": 7.235875123000335, "learning_rate": 3.1852167216629333e-06, "loss": 17.5915, "step": 34449 }, { "epoch": 0.6297183175827591, "grad_norm": 6.432553890587445, "learning_rate": 3.1849408995091892e-06, "loss": 17.2985, "step": 34450 }, { "epoch": 0.6297365967792056, "grad_norm": 7.131285555770638, "learning_rate": 3.1846650837170683e-06, "loss": 17.4653, "step": 34451 }, { "epoch": 0.6297548759756522, "grad_norm": 6.087487470123556, "learning_rate": 3.1843892742875383e-06, "loss": 17.1525, "step": 34452 }, { "epoch": 0.6297731551720986, "grad_norm": 6.611779854231611, "learning_rate": 3.184113471221568e-06, "loss": 17.4813, "step": 34453 }, { "epoch": 0.6297914343685451, "grad_norm": 5.411536108857623, "learning_rate": 3.1838376745201227e-06, "loss": 16.9421, "step": 34454 }, { "epoch": 0.6298097135649917, "grad_norm": 6.8816714110712764, "learning_rate": 3.1835618841841663e-06, "loss": 17.5892, "step": 34455 }, { "epoch": 0.6298279927614382, "grad_norm": 6.991564053334748, "learning_rate": 3.18328610021467e-06, "loss": 17.8798, "step": 34456 }, { "epoch": 0.6298462719578848, "grad_norm": 7.169104525562707, "learning_rate": 3.1830103226125986e-06, "loss": 17.5402, "step": 34457 }, { "epoch": 0.6298645511543313, "grad_norm": 6.854330505069863, "learning_rate": 3.182734551378916e-06, "loss": 17.6534, "step": 34458 }, { "epoch": 0.6298828303507777, "grad_norm": 5.547589038608377, "learning_rate": 3.1824587865145935e-06, "loss": 17.1285, "step": 34459 }, { "epoch": 0.6299011095472243, "grad_norm": 7.235399604988772, "learning_rate": 3.182183028020594e-06, "loss": 17.5825, "step": 34460 }, { "epoch": 0.6299193887436708, "grad_norm": 5.424657330296343, "learning_rate": 3.1819072758978844e-06, "loss": 17.0832, "step": 34461 }, { "epoch": 0.6299376679401174, "grad_norm": 5.827706410475376, "learning_rate": 3.1816315301474337e-06, "loss": 17.4326, "step": 34462 }, { "epoch": 0.6299559471365639, "grad_norm": 6.636011287677374, "learning_rate": 3.1813557907702076e-06, "loss": 18.0271, "step": 34463 }, { "epoch": 0.6299742263330104, "grad_norm": 8.903432495632911, "learning_rate": 3.1810800577671686e-06, "loss": 17.6324, "step": 34464 }, { "epoch": 0.629992505529457, "grad_norm": 9.131364542466365, "learning_rate": 3.1808043311392876e-06, "loss": 18.0363, "step": 34465 }, { "epoch": 0.6300107847259034, "grad_norm": 5.452237687116193, "learning_rate": 3.1805286108875284e-06, "loss": 17.1404, "step": 34466 }, { "epoch": 0.6300290639223499, "grad_norm": 5.108523480236505, "learning_rate": 3.180252897012861e-06, "loss": 17.1033, "step": 34467 }, { "epoch": 0.6300473431187965, "grad_norm": 6.022298297977403, "learning_rate": 3.179977189516249e-06, "loss": 17.2752, "step": 34468 }, { "epoch": 0.630065622315243, "grad_norm": 6.712470325250603, "learning_rate": 3.1797014883986566e-06, "loss": 17.6911, "step": 34469 }, { "epoch": 0.6300839015116896, "grad_norm": 5.66500664468893, "learning_rate": 3.179425793661054e-06, "loss": 17.2173, "step": 34470 }, { "epoch": 0.6301021807081361, "grad_norm": 6.528925117918506, "learning_rate": 3.1791501053044064e-06, "loss": 17.4946, "step": 34471 }, { "epoch": 0.6301204599045825, "grad_norm": 5.198118387764073, "learning_rate": 3.1788744233296786e-06, "loss": 17.0172, "step": 34472 }, { "epoch": 0.6301387391010291, "grad_norm": 6.334980385425631, "learning_rate": 3.1785987477378395e-06, "loss": 17.2303, "step": 34473 }, { "epoch": 0.6301570182974756, "grad_norm": 6.154434289810162, "learning_rate": 3.1783230785298523e-06, "loss": 17.6025, "step": 34474 }, { "epoch": 0.6301752974939222, "grad_norm": 6.674590291657829, "learning_rate": 3.1780474157066843e-06, "loss": 17.2991, "step": 34475 }, { "epoch": 0.6301935766903687, "grad_norm": 5.400006599076002, "learning_rate": 3.1777717592693036e-06, "loss": 17.1834, "step": 34476 }, { "epoch": 0.6302118558868152, "grad_norm": 7.78502349164899, "learning_rate": 3.177496109218675e-06, "loss": 17.8749, "step": 34477 }, { "epoch": 0.6302301350832618, "grad_norm": 6.946372950053341, "learning_rate": 3.1772204655557627e-06, "loss": 17.7545, "step": 34478 }, { "epoch": 0.6302484142797082, "grad_norm": 5.458702315346831, "learning_rate": 3.176944828281535e-06, "loss": 17.1658, "step": 34479 }, { "epoch": 0.6302666934761548, "grad_norm": 6.926735035485282, "learning_rate": 3.1766691973969565e-06, "loss": 17.7855, "step": 34480 }, { "epoch": 0.6302849726726013, "grad_norm": 7.831362487284439, "learning_rate": 3.176393572902996e-06, "loss": 17.3876, "step": 34481 }, { "epoch": 0.6303032518690478, "grad_norm": 5.862673895997693, "learning_rate": 3.176117954800618e-06, "loss": 17.2314, "step": 34482 }, { "epoch": 0.6303215310654944, "grad_norm": 10.062895615262216, "learning_rate": 3.175842343090787e-06, "loss": 18.5676, "step": 34483 }, { "epoch": 0.6303398102619409, "grad_norm": 5.473806634475562, "learning_rate": 3.1755667377744703e-06, "loss": 17.1176, "step": 34484 }, { "epoch": 0.6303580894583874, "grad_norm": 6.720171740896404, "learning_rate": 3.1752911388526354e-06, "loss": 17.5088, "step": 34485 }, { "epoch": 0.6303763686548339, "grad_norm": 6.475572599894414, "learning_rate": 3.1750155463262444e-06, "loss": 17.4995, "step": 34486 }, { "epoch": 0.6303946478512804, "grad_norm": 8.148418884377204, "learning_rate": 3.174739960196268e-06, "loss": 17.9227, "step": 34487 }, { "epoch": 0.630412927047727, "grad_norm": 6.561431571224353, "learning_rate": 3.174464380463668e-06, "loss": 17.1143, "step": 34488 }, { "epoch": 0.6304312062441735, "grad_norm": 5.759160903099564, "learning_rate": 3.174188807129411e-06, "loss": 17.2525, "step": 34489 }, { "epoch": 0.6304494854406201, "grad_norm": 6.272103260262206, "learning_rate": 3.173913240194467e-06, "loss": 17.5078, "step": 34490 }, { "epoch": 0.6304677646370666, "grad_norm": 5.726102869529018, "learning_rate": 3.173637679659797e-06, "loss": 17.3488, "step": 34491 }, { "epoch": 0.630486043833513, "grad_norm": 6.641443576216405, "learning_rate": 3.1733621255263676e-06, "loss": 17.539, "step": 34492 }, { "epoch": 0.6305043230299596, "grad_norm": 8.053704544883757, "learning_rate": 3.173086577795146e-06, "loss": 17.8418, "step": 34493 }, { "epoch": 0.6305226022264061, "grad_norm": 6.04374396135032, "learning_rate": 3.172811036467098e-06, "loss": 17.1363, "step": 34494 }, { "epoch": 0.6305408814228527, "grad_norm": 7.062780350203417, "learning_rate": 3.1725355015431874e-06, "loss": 17.6231, "step": 34495 }, { "epoch": 0.6305591606192992, "grad_norm": 4.9832495107867105, "learning_rate": 3.172259973024383e-06, "loss": 17.032, "step": 34496 }, { "epoch": 0.6305774398157457, "grad_norm": 7.376007085730682, "learning_rate": 3.171984450911647e-06, "loss": 17.4133, "step": 34497 }, { "epoch": 0.6305957190121922, "grad_norm": 5.522716935231425, "learning_rate": 3.1717089352059484e-06, "loss": 17.146, "step": 34498 }, { "epoch": 0.6306139982086387, "grad_norm": 7.0632265672776136, "learning_rate": 3.1714334259082512e-06, "loss": 17.4014, "step": 34499 }, { "epoch": 0.6306322774050853, "grad_norm": 8.085619543672404, "learning_rate": 3.17115792301952e-06, "loss": 18.1184, "step": 34500 }, { "epoch": 0.6306505566015318, "grad_norm": 7.337790227474114, "learning_rate": 3.170882426540723e-06, "loss": 17.9752, "step": 34501 }, { "epoch": 0.6306688357979783, "grad_norm": 7.2141858300517985, "learning_rate": 3.170606936472823e-06, "loss": 18.0138, "step": 34502 }, { "epoch": 0.6306871149944249, "grad_norm": 6.7826764771199, "learning_rate": 3.1703314528167872e-06, "loss": 17.5369, "step": 34503 }, { "epoch": 0.6307053941908713, "grad_norm": 5.579595706423272, "learning_rate": 3.170055975573582e-06, "loss": 17.1077, "step": 34504 }, { "epoch": 0.6307236733873179, "grad_norm": 4.992365430844491, "learning_rate": 3.169780504744171e-06, "loss": 17.0272, "step": 34505 }, { "epoch": 0.6307419525837644, "grad_norm": 6.55992108063721, "learning_rate": 3.1695050403295203e-06, "loss": 17.4968, "step": 34506 }, { "epoch": 0.6307602317802109, "grad_norm": 6.1910424589118795, "learning_rate": 3.169229582330596e-06, "loss": 17.4224, "step": 34507 }, { "epoch": 0.6307785109766575, "grad_norm": 8.444283233473602, "learning_rate": 3.168954130748364e-06, "loss": 18.1304, "step": 34508 }, { "epoch": 0.630796790173104, "grad_norm": 6.487998997320468, "learning_rate": 3.168678685583787e-06, "loss": 17.573, "step": 34509 }, { "epoch": 0.6308150693695506, "grad_norm": 7.6240615757901615, "learning_rate": 3.1684032468378335e-06, "loss": 17.909, "step": 34510 }, { "epoch": 0.630833348565997, "grad_norm": 7.58026530933963, "learning_rate": 3.168127814511467e-06, "loss": 17.5733, "step": 34511 }, { "epoch": 0.6308516277624435, "grad_norm": 5.889943605161983, "learning_rate": 3.167852388605654e-06, "loss": 17.4167, "step": 34512 }, { "epoch": 0.6308699069588901, "grad_norm": 5.050926637441075, "learning_rate": 3.1675769691213597e-06, "loss": 17.0308, "step": 34513 }, { "epoch": 0.6308881861553366, "grad_norm": 5.641560824089095, "learning_rate": 3.1673015560595476e-06, "loss": 17.1539, "step": 34514 }, { "epoch": 0.6309064653517832, "grad_norm": 6.664428849955531, "learning_rate": 3.167026149421186e-06, "loss": 17.7568, "step": 34515 }, { "epoch": 0.6309247445482297, "grad_norm": 5.877065072791831, "learning_rate": 3.166750749207239e-06, "loss": 17.1653, "step": 34516 }, { "epoch": 0.6309430237446761, "grad_norm": 4.841753549852309, "learning_rate": 3.16647535541867e-06, "loss": 16.8011, "step": 34517 }, { "epoch": 0.6309613029411227, "grad_norm": 6.79136492860179, "learning_rate": 3.166199968056448e-06, "loss": 17.4918, "step": 34518 }, { "epoch": 0.6309795821375692, "grad_norm": 7.38046098846128, "learning_rate": 3.1659245871215344e-06, "loss": 18.1699, "step": 34519 }, { "epoch": 0.6309978613340158, "grad_norm": 6.2820938619966045, "learning_rate": 3.165649212614895e-06, "loss": 17.5912, "step": 34520 }, { "epoch": 0.6310161405304623, "grad_norm": 7.607752440359447, "learning_rate": 3.165373844537498e-06, "loss": 17.7432, "step": 34521 }, { "epoch": 0.6310344197269088, "grad_norm": 5.812637541744098, "learning_rate": 3.1650984828903068e-06, "loss": 17.5245, "step": 34522 }, { "epoch": 0.6310526989233554, "grad_norm": 7.6897699176645995, "learning_rate": 3.1648231276742834e-06, "loss": 17.4811, "step": 34523 }, { "epoch": 0.6310709781198018, "grad_norm": 7.663240143773157, "learning_rate": 3.164547778890398e-06, "loss": 17.7587, "step": 34524 }, { "epoch": 0.6310892573162484, "grad_norm": 4.760182845972778, "learning_rate": 3.1642724365396137e-06, "loss": 16.6205, "step": 34525 }, { "epoch": 0.6311075365126949, "grad_norm": 5.966657252894049, "learning_rate": 3.163997100622893e-06, "loss": 17.4659, "step": 34526 }, { "epoch": 0.6311258157091414, "grad_norm": 6.051302039982791, "learning_rate": 3.163721771141205e-06, "loss": 17.2097, "step": 34527 }, { "epoch": 0.631144094905588, "grad_norm": 7.01569194019243, "learning_rate": 3.1634464480955114e-06, "loss": 17.9494, "step": 34528 }, { "epoch": 0.6311623741020345, "grad_norm": 6.448417845344958, "learning_rate": 3.1631711314867793e-06, "loss": 17.9567, "step": 34529 }, { "epoch": 0.631180653298481, "grad_norm": 6.057711210820891, "learning_rate": 3.162895821315974e-06, "loss": 17.3018, "step": 34530 }, { "epoch": 0.6311989324949275, "grad_norm": 7.371822040004756, "learning_rate": 3.1626205175840574e-06, "loss": 17.8998, "step": 34531 }, { "epoch": 0.631217211691374, "grad_norm": 6.848928995738337, "learning_rate": 3.162345220291998e-06, "loss": 17.5333, "step": 34532 }, { "epoch": 0.6312354908878206, "grad_norm": 7.273010351608065, "learning_rate": 3.1620699294407585e-06, "loss": 17.736, "step": 34533 }, { "epoch": 0.6312537700842671, "grad_norm": 8.90515824333653, "learning_rate": 3.161794645031304e-06, "loss": 18.2575, "step": 34534 }, { "epoch": 0.6312720492807136, "grad_norm": 5.300250813057371, "learning_rate": 3.1615193670646017e-06, "loss": 16.9204, "step": 34535 }, { "epoch": 0.6312903284771602, "grad_norm": 6.018820536440446, "learning_rate": 3.1612440955416135e-06, "loss": 17.2375, "step": 34536 }, { "epoch": 0.6313086076736066, "grad_norm": 5.788126685745068, "learning_rate": 3.160968830463304e-06, "loss": 17.0314, "step": 34537 }, { "epoch": 0.6313268868700532, "grad_norm": 6.139325460505891, "learning_rate": 3.1606935718306396e-06, "loss": 17.3277, "step": 34538 }, { "epoch": 0.6313451660664997, "grad_norm": 6.314084739575204, "learning_rate": 3.1604183196445865e-06, "loss": 17.4973, "step": 34539 }, { "epoch": 0.6313634452629462, "grad_norm": 8.164147842295595, "learning_rate": 3.1601430739061045e-06, "loss": 17.9555, "step": 34540 }, { "epoch": 0.6313817244593928, "grad_norm": 6.039143818787467, "learning_rate": 3.1598678346161644e-06, "loss": 17.3478, "step": 34541 }, { "epoch": 0.6314000036558393, "grad_norm": 6.654020498746132, "learning_rate": 3.159592601775725e-06, "loss": 17.6775, "step": 34542 }, { "epoch": 0.6314182828522859, "grad_norm": 5.077660442426693, "learning_rate": 3.1593173753857553e-06, "loss": 17.0326, "step": 34543 }, { "epoch": 0.6314365620487323, "grad_norm": 5.713254283271029, "learning_rate": 3.1590421554472193e-06, "loss": 17.2137, "step": 34544 }, { "epoch": 0.6314548412451788, "grad_norm": 6.670016594829639, "learning_rate": 3.1587669419610787e-06, "loss": 17.4821, "step": 34545 }, { "epoch": 0.6314731204416254, "grad_norm": 5.476742829383323, "learning_rate": 3.158491734928303e-06, "loss": 17.1711, "step": 34546 }, { "epoch": 0.6314913996380719, "grad_norm": 6.386947435629511, "learning_rate": 3.1582165343498514e-06, "loss": 17.4169, "step": 34547 }, { "epoch": 0.6315096788345185, "grad_norm": 5.630759360239184, "learning_rate": 3.1579413402266913e-06, "loss": 17.3841, "step": 34548 }, { "epoch": 0.631527958030965, "grad_norm": 5.508141529337658, "learning_rate": 3.1576661525597883e-06, "loss": 17.1005, "step": 34549 }, { "epoch": 0.6315462372274114, "grad_norm": 5.478896365593293, "learning_rate": 3.1573909713501063e-06, "loss": 17.1312, "step": 34550 }, { "epoch": 0.631564516423858, "grad_norm": 5.921649120228827, "learning_rate": 3.1571157965986066e-06, "loss": 17.3982, "step": 34551 }, { "epoch": 0.6315827956203045, "grad_norm": 7.821127803796098, "learning_rate": 3.156840628306257e-06, "loss": 18.4378, "step": 34552 }, { "epoch": 0.6316010748167511, "grad_norm": 7.076999907758115, "learning_rate": 3.156565466474022e-06, "loss": 17.7013, "step": 34553 }, { "epoch": 0.6316193540131976, "grad_norm": 6.199656964328896, "learning_rate": 3.156290311102863e-06, "loss": 17.5447, "step": 34554 }, { "epoch": 0.6316376332096441, "grad_norm": 5.511842009035654, "learning_rate": 3.1560151621937496e-06, "loss": 17.0521, "step": 34555 }, { "epoch": 0.6316559124060906, "grad_norm": 7.540261726312701, "learning_rate": 3.1557400197476405e-06, "loss": 17.5019, "step": 34556 }, { "epoch": 0.6316741916025371, "grad_norm": 5.34314171654692, "learning_rate": 3.155464883765502e-06, "loss": 17.1992, "step": 34557 }, { "epoch": 0.6316924707989837, "grad_norm": 5.705547352035867, "learning_rate": 3.155189754248302e-06, "loss": 17.1891, "step": 34558 }, { "epoch": 0.6317107499954302, "grad_norm": 6.310209240102754, "learning_rate": 3.154914631196999e-06, "loss": 17.4772, "step": 34559 }, { "epoch": 0.6317290291918767, "grad_norm": 6.06244219895236, "learning_rate": 3.1546395146125618e-06, "loss": 17.3595, "step": 34560 }, { "epoch": 0.6317473083883233, "grad_norm": 6.116412948635944, "learning_rate": 3.1543644044959525e-06, "loss": 17.2399, "step": 34561 }, { "epoch": 0.6317655875847697, "grad_norm": 6.889943861439034, "learning_rate": 3.154089300848135e-06, "loss": 17.9043, "step": 34562 }, { "epoch": 0.6317838667812163, "grad_norm": 6.109362017170156, "learning_rate": 3.1538142036700756e-06, "loss": 17.2535, "step": 34563 }, { "epoch": 0.6318021459776628, "grad_norm": 6.21148406793288, "learning_rate": 3.1535391129627368e-06, "loss": 17.7026, "step": 34564 }, { "epoch": 0.6318204251741093, "grad_norm": 6.439251901615316, "learning_rate": 3.1532640287270832e-06, "loss": 17.4253, "step": 34565 }, { "epoch": 0.6318387043705559, "grad_norm": 5.277797258746995, "learning_rate": 3.1529889509640786e-06, "loss": 17.2032, "step": 34566 }, { "epoch": 0.6318569835670024, "grad_norm": 5.42465972851741, "learning_rate": 3.152713879674689e-06, "loss": 17.1333, "step": 34567 }, { "epoch": 0.631875262763449, "grad_norm": 7.454805119843063, "learning_rate": 3.152438814859874e-06, "loss": 17.9783, "step": 34568 }, { "epoch": 0.6318935419598954, "grad_norm": 6.007018685268087, "learning_rate": 3.152163756520602e-06, "loss": 17.5705, "step": 34569 }, { "epoch": 0.6319118211563419, "grad_norm": 5.41154462149028, "learning_rate": 3.151888704657837e-06, "loss": 17.0251, "step": 34570 }, { "epoch": 0.6319301003527885, "grad_norm": 6.193750669845547, "learning_rate": 3.1516136592725398e-06, "loss": 17.4049, "step": 34571 }, { "epoch": 0.631948379549235, "grad_norm": 6.260974074998678, "learning_rate": 3.1513386203656782e-06, "loss": 17.3362, "step": 34572 }, { "epoch": 0.6319666587456816, "grad_norm": 5.010457703419808, "learning_rate": 3.151063587938212e-06, "loss": 17.121, "step": 34573 }, { "epoch": 0.6319849379421281, "grad_norm": 6.28901799134725, "learning_rate": 3.1507885619911085e-06, "loss": 17.1104, "step": 34574 }, { "epoch": 0.6320032171385745, "grad_norm": 6.094009109760269, "learning_rate": 3.150513542525332e-06, "loss": 17.5724, "step": 34575 }, { "epoch": 0.6320214963350211, "grad_norm": 5.749466700450055, "learning_rate": 3.1502385295418426e-06, "loss": 17.3045, "step": 34576 }, { "epoch": 0.6320397755314676, "grad_norm": 7.470699778042288, "learning_rate": 3.149963523041609e-06, "loss": 17.8333, "step": 34577 }, { "epoch": 0.6320580547279142, "grad_norm": 7.373732022696128, "learning_rate": 3.1496885230255915e-06, "loss": 17.542, "step": 34578 }, { "epoch": 0.6320763339243607, "grad_norm": 6.507937773257512, "learning_rate": 3.1494135294947546e-06, "loss": 17.326, "step": 34579 }, { "epoch": 0.6320946131208072, "grad_norm": 6.521028247201972, "learning_rate": 3.149138542450064e-06, "loss": 17.5258, "step": 34580 }, { "epoch": 0.6321128923172538, "grad_norm": 5.86640343287167, "learning_rate": 3.148863561892483e-06, "loss": 17.5461, "step": 34581 }, { "epoch": 0.6321311715137002, "grad_norm": 6.141294863244781, "learning_rate": 3.1485885878229717e-06, "loss": 17.5129, "step": 34582 }, { "epoch": 0.6321494507101468, "grad_norm": 6.904232721965437, "learning_rate": 3.148313620242498e-06, "loss": 17.928, "step": 34583 }, { "epoch": 0.6321677299065933, "grad_norm": 5.380876867657859, "learning_rate": 3.1480386591520262e-06, "loss": 17.1567, "step": 34584 }, { "epoch": 0.6321860091030398, "grad_norm": 5.495753267136059, "learning_rate": 3.1477637045525156e-06, "loss": 17.1538, "step": 34585 }, { "epoch": 0.6322042882994864, "grad_norm": 5.533364779537753, "learning_rate": 3.147488756444934e-06, "loss": 17.1603, "step": 34586 }, { "epoch": 0.6322225674959329, "grad_norm": 5.985714938654341, "learning_rate": 3.1472138148302435e-06, "loss": 17.15, "step": 34587 }, { "epoch": 0.6322408466923795, "grad_norm": 6.413113348076904, "learning_rate": 3.1469388797094058e-06, "loss": 17.814, "step": 34588 }, { "epoch": 0.6322591258888259, "grad_norm": 6.102454393013312, "learning_rate": 3.146663951083389e-06, "loss": 17.2854, "step": 34589 }, { "epoch": 0.6322774050852724, "grad_norm": 6.313194523430326, "learning_rate": 3.1463890289531523e-06, "loss": 17.6769, "step": 34590 }, { "epoch": 0.632295684281719, "grad_norm": 6.941782180160428, "learning_rate": 3.146114113319663e-06, "loss": 17.826, "step": 34591 }, { "epoch": 0.6323139634781655, "grad_norm": 6.391043214981942, "learning_rate": 3.1458392041838827e-06, "loss": 17.453, "step": 34592 }, { "epoch": 0.6323322426746121, "grad_norm": 6.408439345547509, "learning_rate": 3.145564301546773e-06, "loss": 17.4021, "step": 34593 }, { "epoch": 0.6323505218710586, "grad_norm": 5.700318456935949, "learning_rate": 3.1452894054093018e-06, "loss": 17.2678, "step": 34594 }, { "epoch": 0.632368801067505, "grad_norm": 6.987259293705293, "learning_rate": 3.1450145157724303e-06, "loss": 18.0121, "step": 34595 }, { "epoch": 0.6323870802639516, "grad_norm": 5.254423843688775, "learning_rate": 3.1447396326371203e-06, "loss": 16.9617, "step": 34596 }, { "epoch": 0.6324053594603981, "grad_norm": 4.950387756122656, "learning_rate": 3.1444647560043374e-06, "loss": 17.0649, "step": 34597 }, { "epoch": 0.6324236386568447, "grad_norm": 7.62328733480866, "learning_rate": 3.144189885875046e-06, "loss": 17.8077, "step": 34598 }, { "epoch": 0.6324419178532912, "grad_norm": 6.080524723893163, "learning_rate": 3.1439150222502056e-06, "loss": 17.568, "step": 34599 }, { "epoch": 0.6324601970497377, "grad_norm": 6.8603853209269525, "learning_rate": 3.1436401651307846e-06, "loss": 17.3053, "step": 34600 }, { "epoch": 0.6324784762461843, "grad_norm": 6.597241653060989, "learning_rate": 3.143365314517742e-06, "loss": 18.0774, "step": 34601 }, { "epoch": 0.6324967554426307, "grad_norm": 7.21272101468529, "learning_rate": 3.1430904704120425e-06, "loss": 17.6138, "step": 34602 }, { "epoch": 0.6325150346390772, "grad_norm": 6.208492702704404, "learning_rate": 3.142815632814651e-06, "loss": 17.381, "step": 34603 }, { "epoch": 0.6325333138355238, "grad_norm": 6.651394965576881, "learning_rate": 3.142540801726528e-06, "loss": 17.4921, "step": 34604 }, { "epoch": 0.6325515930319703, "grad_norm": 5.725646870316962, "learning_rate": 3.1422659771486404e-06, "loss": 17.1354, "step": 34605 }, { "epoch": 0.6325698722284169, "grad_norm": 7.32973800422905, "learning_rate": 3.141991159081948e-06, "loss": 17.8888, "step": 34606 }, { "epoch": 0.6325881514248634, "grad_norm": 4.954250892161241, "learning_rate": 3.141716347527415e-06, "loss": 16.9015, "step": 34607 }, { "epoch": 0.6326064306213098, "grad_norm": 5.65039569993535, "learning_rate": 3.141441542486006e-06, "loss": 17.4649, "step": 34608 }, { "epoch": 0.6326247098177564, "grad_norm": 8.433578688819107, "learning_rate": 3.1411667439586836e-06, "loss": 17.7373, "step": 34609 }, { "epoch": 0.6326429890142029, "grad_norm": 5.78462760769103, "learning_rate": 3.1408919519464085e-06, "loss": 17.3338, "step": 34610 }, { "epoch": 0.6326612682106495, "grad_norm": 6.157067340167726, "learning_rate": 3.140617166450147e-06, "loss": 17.2998, "step": 34611 }, { "epoch": 0.632679547407096, "grad_norm": 7.493021613068012, "learning_rate": 3.1403423874708617e-06, "loss": 18.035, "step": 34612 }, { "epoch": 0.6326978266035425, "grad_norm": 5.755495452404367, "learning_rate": 3.140067615009513e-06, "loss": 17.1248, "step": 34613 }, { "epoch": 0.632716105799989, "grad_norm": 5.058696915533854, "learning_rate": 3.1397928490670683e-06, "loss": 16.9709, "step": 34614 }, { "epoch": 0.6327343849964355, "grad_norm": 6.083323000923132, "learning_rate": 3.1395180896444867e-06, "loss": 17.2602, "step": 34615 }, { "epoch": 0.6327526641928821, "grad_norm": 6.365923280402653, "learning_rate": 3.1392433367427315e-06, "loss": 17.4364, "step": 34616 }, { "epoch": 0.6327709433893286, "grad_norm": 5.802516237593888, "learning_rate": 3.13896859036277e-06, "loss": 17.2868, "step": 34617 }, { "epoch": 0.6327892225857751, "grad_norm": 6.524769781046129, "learning_rate": 3.1386938505055584e-06, "loss": 17.4717, "step": 34618 }, { "epoch": 0.6328075017822217, "grad_norm": 5.946283266035934, "learning_rate": 3.138419117172067e-06, "loss": 17.2901, "step": 34619 }, { "epoch": 0.6328257809786682, "grad_norm": 5.701524765883014, "learning_rate": 3.1381443903632534e-06, "loss": 17.1688, "step": 34620 }, { "epoch": 0.6328440601751147, "grad_norm": 6.069627029493351, "learning_rate": 3.1378696700800814e-06, "loss": 17.3939, "step": 34621 }, { "epoch": 0.6328623393715612, "grad_norm": 6.147408610139952, "learning_rate": 3.1375949563235164e-06, "loss": 17.5989, "step": 34622 }, { "epoch": 0.6328806185680077, "grad_norm": 4.881173481065821, "learning_rate": 3.1373202490945192e-06, "loss": 16.7115, "step": 34623 }, { "epoch": 0.6328988977644543, "grad_norm": 6.299590594632667, "learning_rate": 3.137045548394051e-06, "loss": 17.4536, "step": 34624 }, { "epoch": 0.6329171769609008, "grad_norm": 5.161343333301881, "learning_rate": 3.136770854223078e-06, "loss": 16.8495, "step": 34625 }, { "epoch": 0.6329354561573474, "grad_norm": 5.630608735144526, "learning_rate": 3.136496166582562e-06, "loss": 17.0739, "step": 34626 }, { "epoch": 0.6329537353537938, "grad_norm": 6.376475460313799, "learning_rate": 3.1362214854734635e-06, "loss": 17.5214, "step": 34627 }, { "epoch": 0.6329720145502403, "grad_norm": 6.797566119016067, "learning_rate": 3.135946810896749e-06, "loss": 17.7194, "step": 34628 }, { "epoch": 0.6329902937466869, "grad_norm": 6.77917987003584, "learning_rate": 3.1356721428533775e-06, "loss": 17.4227, "step": 34629 }, { "epoch": 0.6330085729431334, "grad_norm": 7.3967256782202355, "learning_rate": 3.135397481344313e-06, "loss": 17.5857, "step": 34630 }, { "epoch": 0.63302685213958, "grad_norm": 7.373726178595467, "learning_rate": 3.1351228263705204e-06, "loss": 17.8143, "step": 34631 }, { "epoch": 0.6330451313360265, "grad_norm": 6.005606575370103, "learning_rate": 3.13484817793296e-06, "loss": 17.3946, "step": 34632 }, { "epoch": 0.633063410532473, "grad_norm": 8.538687689940335, "learning_rate": 3.1345735360325936e-06, "loss": 18.4726, "step": 34633 }, { "epoch": 0.6330816897289195, "grad_norm": 6.2022870885831605, "learning_rate": 3.134298900670386e-06, "loss": 17.4883, "step": 34634 }, { "epoch": 0.633099968925366, "grad_norm": 6.598625368241477, "learning_rate": 3.134024271847298e-06, "loss": 17.1462, "step": 34635 }, { "epoch": 0.6331182481218126, "grad_norm": 6.872771012624079, "learning_rate": 3.1337496495642944e-06, "loss": 17.8391, "step": 34636 }, { "epoch": 0.6331365273182591, "grad_norm": 6.018453715095861, "learning_rate": 3.1334750338223357e-06, "loss": 17.3096, "step": 34637 }, { "epoch": 0.6331548065147056, "grad_norm": 7.110498176290491, "learning_rate": 3.1332004246223847e-06, "loss": 17.5746, "step": 34638 }, { "epoch": 0.6331730857111522, "grad_norm": 6.1603143184084, "learning_rate": 3.1329258219654046e-06, "loss": 17.2959, "step": 34639 }, { "epoch": 0.6331913649075986, "grad_norm": 6.723140612406345, "learning_rate": 3.1326512258523583e-06, "loss": 17.4832, "step": 34640 }, { "epoch": 0.6332096441040452, "grad_norm": 6.664344973168176, "learning_rate": 3.1323766362842056e-06, "loss": 17.8185, "step": 34641 }, { "epoch": 0.6332279233004917, "grad_norm": 6.811996120179416, "learning_rate": 3.1321020532619117e-06, "loss": 17.8647, "step": 34642 }, { "epoch": 0.6332462024969382, "grad_norm": 7.597368388199197, "learning_rate": 3.1318274767864387e-06, "loss": 17.6342, "step": 34643 }, { "epoch": 0.6332644816933848, "grad_norm": 5.765443179925592, "learning_rate": 3.131552906858747e-06, "loss": 17.1089, "step": 34644 }, { "epoch": 0.6332827608898313, "grad_norm": 5.725232341519297, "learning_rate": 3.1312783434798013e-06, "loss": 17.1895, "step": 34645 }, { "epoch": 0.6333010400862779, "grad_norm": 6.288169074820464, "learning_rate": 3.131003786650562e-06, "loss": 17.5097, "step": 34646 }, { "epoch": 0.6333193192827243, "grad_norm": 7.494000060179946, "learning_rate": 3.1307292363719916e-06, "loss": 17.9611, "step": 34647 }, { "epoch": 0.6333375984791708, "grad_norm": 7.400757456451404, "learning_rate": 3.1304546926450553e-06, "loss": 17.8281, "step": 34648 }, { "epoch": 0.6333558776756174, "grad_norm": 7.104350265962434, "learning_rate": 3.13018015547071e-06, "loss": 17.6468, "step": 34649 }, { "epoch": 0.6333741568720639, "grad_norm": 5.63324322336555, "learning_rate": 3.129905624849924e-06, "loss": 17.4843, "step": 34650 }, { "epoch": 0.6333924360685105, "grad_norm": 5.673499184650238, "learning_rate": 3.1296311007836556e-06, "loss": 17.1923, "step": 34651 }, { "epoch": 0.633410715264957, "grad_norm": 5.274933587321008, "learning_rate": 3.1293565832728655e-06, "loss": 17.0514, "step": 34652 }, { "epoch": 0.6334289944614034, "grad_norm": 6.579228448833791, "learning_rate": 3.1290820723185217e-06, "loss": 17.7948, "step": 34653 }, { "epoch": 0.63344727365785, "grad_norm": 6.445875053155152, "learning_rate": 3.1288075679215825e-06, "loss": 17.3891, "step": 34654 }, { "epoch": 0.6334655528542965, "grad_norm": 7.069378240761531, "learning_rate": 3.1285330700830085e-06, "loss": 17.2815, "step": 34655 }, { "epoch": 0.6334838320507431, "grad_norm": 6.12055620334684, "learning_rate": 3.1282585788037645e-06, "loss": 17.4868, "step": 34656 }, { "epoch": 0.6335021112471896, "grad_norm": 6.974505512328942, "learning_rate": 3.1279840940848126e-06, "loss": 17.6852, "step": 34657 }, { "epoch": 0.6335203904436361, "grad_norm": 4.827913085078673, "learning_rate": 3.127709615927112e-06, "loss": 16.806, "step": 34658 }, { "epoch": 0.6335386696400827, "grad_norm": 7.834627911910548, "learning_rate": 3.1274351443316286e-06, "loss": 18.1862, "step": 34659 }, { "epoch": 0.6335569488365291, "grad_norm": 5.336236674014954, "learning_rate": 3.1271606792993225e-06, "loss": 17.1873, "step": 34660 }, { "epoch": 0.6335752280329757, "grad_norm": 6.974249715343941, "learning_rate": 3.1268862208311533e-06, "loss": 17.7555, "step": 34661 }, { "epoch": 0.6335935072294222, "grad_norm": 6.8558561003504, "learning_rate": 3.1266117689280883e-06, "loss": 17.5476, "step": 34662 }, { "epoch": 0.6336117864258687, "grad_norm": 6.561699038250068, "learning_rate": 3.1263373235910867e-06, "loss": 17.4166, "step": 34663 }, { "epoch": 0.6336300656223153, "grad_norm": 7.192803504298533, "learning_rate": 3.126062884821107e-06, "loss": 18.0037, "step": 34664 }, { "epoch": 0.6336483448187618, "grad_norm": 6.545478811388248, "learning_rate": 3.125788452619116e-06, "loss": 17.7235, "step": 34665 }, { "epoch": 0.6336666240152083, "grad_norm": 4.367020130612249, "learning_rate": 3.1255140269860732e-06, "loss": 16.5496, "step": 34666 }, { "epoch": 0.6336849032116548, "grad_norm": 7.19422231293985, "learning_rate": 3.125239607922943e-06, "loss": 17.8525, "step": 34667 }, { "epoch": 0.6337031824081013, "grad_norm": 5.686649143183966, "learning_rate": 3.1249651954306847e-06, "loss": 16.9891, "step": 34668 }, { "epoch": 0.6337214616045479, "grad_norm": 5.815270567260811, "learning_rate": 3.1246907895102597e-06, "loss": 17.2826, "step": 34669 }, { "epoch": 0.6337397408009944, "grad_norm": 6.4349952072598295, "learning_rate": 3.124416390162631e-06, "loss": 17.6668, "step": 34670 }, { "epoch": 0.6337580199974409, "grad_norm": 5.807716783578208, "learning_rate": 3.124141997388761e-06, "loss": 17.1974, "step": 34671 }, { "epoch": 0.6337762991938874, "grad_norm": 6.687334874628838, "learning_rate": 3.123867611189608e-06, "loss": 17.3475, "step": 34672 }, { "epoch": 0.6337945783903339, "grad_norm": 8.372655966876149, "learning_rate": 3.123593231566139e-06, "loss": 18.0929, "step": 34673 }, { "epoch": 0.6338128575867805, "grad_norm": 7.590607814625287, "learning_rate": 3.123318858519312e-06, "loss": 17.8746, "step": 34674 }, { "epoch": 0.633831136783227, "grad_norm": 8.739075807673114, "learning_rate": 3.1230444920500885e-06, "loss": 18.208, "step": 34675 }, { "epoch": 0.6338494159796735, "grad_norm": 4.667549058885208, "learning_rate": 3.122770132159433e-06, "loss": 16.6415, "step": 34676 }, { "epoch": 0.6338676951761201, "grad_norm": 5.480847939036081, "learning_rate": 3.1224957788483055e-06, "loss": 16.9776, "step": 34677 }, { "epoch": 0.6338859743725666, "grad_norm": 5.440172386721566, "learning_rate": 3.122221432117665e-06, "loss": 17.1208, "step": 34678 }, { "epoch": 0.6339042535690131, "grad_norm": 5.720889319308768, "learning_rate": 3.121947091968477e-06, "loss": 17.3859, "step": 34679 }, { "epoch": 0.6339225327654596, "grad_norm": 6.748658153622029, "learning_rate": 3.121672758401699e-06, "loss": 17.8191, "step": 34680 }, { "epoch": 0.6339408119619061, "grad_norm": 4.7286480893345315, "learning_rate": 3.121398431418299e-06, "loss": 16.8027, "step": 34681 }, { "epoch": 0.6339590911583527, "grad_norm": 5.4959406643390745, "learning_rate": 3.1211241110192335e-06, "loss": 16.9852, "step": 34682 }, { "epoch": 0.6339773703547992, "grad_norm": 6.048306018618058, "learning_rate": 3.120849797205463e-06, "loss": 17.1916, "step": 34683 }, { "epoch": 0.6339956495512458, "grad_norm": 6.432932169765108, "learning_rate": 3.120575489977952e-06, "loss": 17.6071, "step": 34684 }, { "epoch": 0.6340139287476922, "grad_norm": 6.338424120707963, "learning_rate": 3.120301189337661e-06, "loss": 17.3044, "step": 34685 }, { "epoch": 0.6340322079441387, "grad_norm": 6.655221442623788, "learning_rate": 3.1200268952855505e-06, "loss": 17.7859, "step": 34686 }, { "epoch": 0.6340504871405853, "grad_norm": 5.208386105355772, "learning_rate": 3.1197526078225837e-06, "loss": 16.8018, "step": 34687 }, { "epoch": 0.6340687663370318, "grad_norm": 5.023862222809742, "learning_rate": 3.1194783269497203e-06, "loss": 17.1432, "step": 34688 }, { "epoch": 0.6340870455334784, "grad_norm": 7.036469889196774, "learning_rate": 3.119204052667921e-06, "loss": 17.6834, "step": 34689 }, { "epoch": 0.6341053247299249, "grad_norm": 7.225698244594402, "learning_rate": 3.1189297849781506e-06, "loss": 18.1312, "step": 34690 }, { "epoch": 0.6341236039263713, "grad_norm": 5.930644374574207, "learning_rate": 3.1186555238813666e-06, "loss": 17.2783, "step": 34691 }, { "epoch": 0.6341418831228179, "grad_norm": 4.680526258110477, "learning_rate": 3.118381269378532e-06, "loss": 16.658, "step": 34692 }, { "epoch": 0.6341601623192644, "grad_norm": 5.55989366401622, "learning_rate": 3.118107021470608e-06, "loss": 17.1112, "step": 34693 }, { "epoch": 0.634178441515711, "grad_norm": 5.348088619946655, "learning_rate": 3.117832780158556e-06, "loss": 16.9728, "step": 34694 }, { "epoch": 0.6341967207121575, "grad_norm": 6.081024293336923, "learning_rate": 3.1175585454433357e-06, "loss": 17.3563, "step": 34695 }, { "epoch": 0.634214999908604, "grad_norm": 6.095181801439317, "learning_rate": 3.1172843173259094e-06, "loss": 17.3761, "step": 34696 }, { "epoch": 0.6342332791050506, "grad_norm": 5.595737787724827, "learning_rate": 3.117010095807238e-06, "loss": 17.3211, "step": 34697 }, { "epoch": 0.634251558301497, "grad_norm": 5.4966577588889916, "learning_rate": 3.1167358808882837e-06, "loss": 17.0979, "step": 34698 }, { "epoch": 0.6342698374979436, "grad_norm": 7.195803820718895, "learning_rate": 3.116461672570007e-06, "loss": 17.7369, "step": 34699 }, { "epoch": 0.6342881166943901, "grad_norm": 8.879534736263285, "learning_rate": 3.1161874708533675e-06, "loss": 18.1534, "step": 34700 }, { "epoch": 0.6343063958908366, "grad_norm": 5.856782477549523, "learning_rate": 3.1159132757393277e-06, "loss": 17.4653, "step": 34701 }, { "epoch": 0.6343246750872832, "grad_norm": 8.883791569804577, "learning_rate": 3.11563908722885e-06, "loss": 17.4343, "step": 34702 }, { "epoch": 0.6343429542837297, "grad_norm": 6.914000453842681, "learning_rate": 3.1153649053228914e-06, "loss": 17.7438, "step": 34703 }, { "epoch": 0.6343612334801763, "grad_norm": 6.875160151205205, "learning_rate": 3.1150907300224177e-06, "loss": 17.6091, "step": 34704 }, { "epoch": 0.6343795126766227, "grad_norm": 7.211921157157817, "learning_rate": 3.1148165613283854e-06, "loss": 17.7127, "step": 34705 }, { "epoch": 0.6343977918730692, "grad_norm": 4.811551042038129, "learning_rate": 3.1145423992417573e-06, "loss": 16.8314, "step": 34706 }, { "epoch": 0.6344160710695158, "grad_norm": 5.440939505667485, "learning_rate": 3.1142682437634965e-06, "loss": 17.0305, "step": 34707 }, { "epoch": 0.6344343502659623, "grad_norm": 6.779823326687071, "learning_rate": 3.1139940948945614e-06, "loss": 17.6796, "step": 34708 }, { "epoch": 0.6344526294624089, "grad_norm": 5.808431388297607, "learning_rate": 3.113719952635912e-06, "loss": 17.0465, "step": 34709 }, { "epoch": 0.6344709086588554, "grad_norm": 5.8553036376024385, "learning_rate": 3.1134458169885106e-06, "loss": 17.0904, "step": 34710 }, { "epoch": 0.6344891878553018, "grad_norm": 7.007612867731194, "learning_rate": 3.1131716879533174e-06, "loss": 17.739, "step": 34711 }, { "epoch": 0.6345074670517484, "grad_norm": 6.569158699552516, "learning_rate": 3.1128975655312953e-06, "loss": 17.7243, "step": 34712 }, { "epoch": 0.6345257462481949, "grad_norm": 6.910746982556639, "learning_rate": 3.112623449723404e-06, "loss": 17.9692, "step": 34713 }, { "epoch": 0.6345440254446415, "grad_norm": 6.176008722374058, "learning_rate": 3.1123493405306014e-06, "loss": 17.3362, "step": 34714 }, { "epoch": 0.634562304641088, "grad_norm": 7.081841829470405, "learning_rate": 3.112075237953852e-06, "loss": 17.7923, "step": 34715 }, { "epoch": 0.6345805838375345, "grad_norm": 5.826017678031928, "learning_rate": 3.111801141994116e-06, "loss": 17.3879, "step": 34716 }, { "epoch": 0.634598863033981, "grad_norm": 7.241722468333335, "learning_rate": 3.111527052652351e-06, "loss": 17.8408, "step": 34717 }, { "epoch": 0.6346171422304275, "grad_norm": 5.490118060764702, "learning_rate": 3.111252969929522e-06, "loss": 17.255, "step": 34718 }, { "epoch": 0.6346354214268741, "grad_norm": 5.794120149338187, "learning_rate": 3.110978893826586e-06, "loss": 17.3553, "step": 34719 }, { "epoch": 0.6346537006233206, "grad_norm": 4.8703305065369245, "learning_rate": 3.1107048243445042e-06, "loss": 16.8368, "step": 34720 }, { "epoch": 0.6346719798197671, "grad_norm": 6.350566803906843, "learning_rate": 3.11043076148424e-06, "loss": 17.3463, "step": 34721 }, { "epoch": 0.6346902590162137, "grad_norm": 6.904024221502481, "learning_rate": 3.1101567052467524e-06, "loss": 17.6264, "step": 34722 }, { "epoch": 0.6347085382126602, "grad_norm": 7.239112387295453, "learning_rate": 3.109882655632999e-06, "loss": 17.3037, "step": 34723 }, { "epoch": 0.6347268174091067, "grad_norm": 6.1986496032905745, "learning_rate": 3.109608612643944e-06, "loss": 17.3679, "step": 34724 }, { "epoch": 0.6347450966055532, "grad_norm": 6.668527089176564, "learning_rate": 3.1093345762805484e-06, "loss": 17.4517, "step": 34725 }, { "epoch": 0.6347633758019997, "grad_norm": 5.076059186131579, "learning_rate": 3.1090605465437683e-06, "loss": 16.9524, "step": 34726 }, { "epoch": 0.6347816549984463, "grad_norm": 7.255808209560102, "learning_rate": 3.1087865234345695e-06, "loss": 18.0206, "step": 34727 }, { "epoch": 0.6347999341948928, "grad_norm": 6.363864003909651, "learning_rate": 3.1085125069539076e-06, "loss": 17.2017, "step": 34728 }, { "epoch": 0.6348182133913394, "grad_norm": 5.987809401210551, "learning_rate": 3.108238497102746e-06, "loss": 17.0981, "step": 34729 }, { "epoch": 0.6348364925877859, "grad_norm": 6.026751615338986, "learning_rate": 3.107964493882045e-06, "loss": 17.3863, "step": 34730 }, { "epoch": 0.6348547717842323, "grad_norm": 6.853788986378907, "learning_rate": 3.107690497292763e-06, "loss": 17.7356, "step": 34731 }, { "epoch": 0.6348730509806789, "grad_norm": 6.124584260483115, "learning_rate": 3.107416507335863e-06, "loss": 17.4642, "step": 34732 }, { "epoch": 0.6348913301771254, "grad_norm": 5.992271090640211, "learning_rate": 3.107142524012303e-06, "loss": 17.4032, "step": 34733 }, { "epoch": 0.634909609373572, "grad_norm": 5.0035620492192425, "learning_rate": 3.106868547323044e-06, "loss": 16.7924, "step": 34734 }, { "epoch": 0.6349278885700185, "grad_norm": 4.523840264839679, "learning_rate": 3.1065945772690475e-06, "loss": 16.7027, "step": 34735 }, { "epoch": 0.634946167766465, "grad_norm": 5.272295688816584, "learning_rate": 3.1063206138512736e-06, "loss": 17.0654, "step": 34736 }, { "epoch": 0.6349644469629115, "grad_norm": 7.718684080770372, "learning_rate": 3.1060466570706787e-06, "loss": 17.8486, "step": 34737 }, { "epoch": 0.634982726159358, "grad_norm": 5.608423470628357, "learning_rate": 3.1057727069282273e-06, "loss": 17.1106, "step": 34738 }, { "epoch": 0.6350010053558045, "grad_norm": 6.769224423199421, "learning_rate": 3.105498763424879e-06, "loss": 17.5834, "step": 34739 }, { "epoch": 0.6350192845522511, "grad_norm": 5.595841989452351, "learning_rate": 3.1052248265615913e-06, "loss": 17.2388, "step": 34740 }, { "epoch": 0.6350375637486976, "grad_norm": 6.099205725488985, "learning_rate": 3.1049508963393283e-06, "loss": 17.3987, "step": 34741 }, { "epoch": 0.6350558429451442, "grad_norm": 6.355140380679315, "learning_rate": 3.1046769727590466e-06, "loss": 17.3813, "step": 34742 }, { "epoch": 0.6350741221415906, "grad_norm": 5.947480910313214, "learning_rate": 3.1044030558217076e-06, "loss": 17.2881, "step": 34743 }, { "epoch": 0.6350924013380371, "grad_norm": 6.1374778679071875, "learning_rate": 3.1041291455282722e-06, "loss": 17.3275, "step": 34744 }, { "epoch": 0.6351106805344837, "grad_norm": 7.5875784847154275, "learning_rate": 3.1038552418796986e-06, "loss": 17.849, "step": 34745 }, { "epoch": 0.6351289597309302, "grad_norm": 5.825258721685639, "learning_rate": 3.103581344876949e-06, "loss": 16.9068, "step": 34746 }, { "epoch": 0.6351472389273768, "grad_norm": 6.7259392317543565, "learning_rate": 3.1033074545209817e-06, "loss": 17.6534, "step": 34747 }, { "epoch": 0.6351655181238233, "grad_norm": 6.823334468701852, "learning_rate": 3.1030335708127563e-06, "loss": 17.8599, "step": 34748 }, { "epoch": 0.6351837973202697, "grad_norm": 6.408739690105408, "learning_rate": 3.1027596937532355e-06, "loss": 17.6161, "step": 34749 }, { "epoch": 0.6352020765167163, "grad_norm": 7.7949775068182765, "learning_rate": 3.102485823343377e-06, "loss": 17.9948, "step": 34750 }, { "epoch": 0.6352203557131628, "grad_norm": 7.327410464532741, "learning_rate": 3.1022119595841393e-06, "loss": 18.0705, "step": 34751 }, { "epoch": 0.6352386349096094, "grad_norm": 6.252253762499037, "learning_rate": 3.101938102476485e-06, "loss": 17.5584, "step": 34752 }, { "epoch": 0.6352569141060559, "grad_norm": 5.582039367431618, "learning_rate": 3.1016642520213734e-06, "loss": 17.2224, "step": 34753 }, { "epoch": 0.6352751933025024, "grad_norm": 7.424715532612936, "learning_rate": 3.1013904082197627e-06, "loss": 17.7042, "step": 34754 }, { "epoch": 0.635293472498949, "grad_norm": 6.158690586890829, "learning_rate": 3.1011165710726156e-06, "loss": 17.6181, "step": 34755 }, { "epoch": 0.6353117516953954, "grad_norm": 4.945404672104348, "learning_rate": 3.1008427405808893e-06, "loss": 16.7342, "step": 34756 }, { "epoch": 0.635330030891842, "grad_norm": 7.321207441951642, "learning_rate": 3.1005689167455435e-06, "loss": 17.7128, "step": 34757 }, { "epoch": 0.6353483100882885, "grad_norm": 6.805543356479097, "learning_rate": 3.1002950995675395e-06, "loss": 17.5925, "step": 34758 }, { "epoch": 0.635366589284735, "grad_norm": 6.457249966250531, "learning_rate": 3.1000212890478353e-06, "loss": 17.7738, "step": 34759 }, { "epoch": 0.6353848684811816, "grad_norm": 4.81852683625884, "learning_rate": 3.0997474851873945e-06, "loss": 17.0, "step": 34760 }, { "epoch": 0.6354031476776281, "grad_norm": 6.877544536167743, "learning_rate": 3.0994736879871717e-06, "loss": 17.6913, "step": 34761 }, { "epoch": 0.6354214268740747, "grad_norm": 6.9957242003095095, "learning_rate": 3.099199897448128e-06, "loss": 17.6939, "step": 34762 }, { "epoch": 0.6354397060705211, "grad_norm": 4.828405587833626, "learning_rate": 3.0989261135712257e-06, "loss": 16.9937, "step": 34763 }, { "epoch": 0.6354579852669676, "grad_norm": 7.111379249813278, "learning_rate": 3.0986523363574216e-06, "loss": 17.5006, "step": 34764 }, { "epoch": 0.6354762644634142, "grad_norm": 7.862386281637828, "learning_rate": 3.0983785658076746e-06, "loss": 17.9618, "step": 34765 }, { "epoch": 0.6354945436598607, "grad_norm": 6.474188044082464, "learning_rate": 3.0981048019229483e-06, "loss": 17.5443, "step": 34766 }, { "epoch": 0.6355128228563073, "grad_norm": 6.1394791277870455, "learning_rate": 3.0978310447041992e-06, "loss": 17.4574, "step": 34767 }, { "epoch": 0.6355311020527538, "grad_norm": 5.768004432023149, "learning_rate": 3.097557294152385e-06, "loss": 16.9215, "step": 34768 }, { "epoch": 0.6355493812492002, "grad_norm": 6.725759291188686, "learning_rate": 3.097283550268469e-06, "loss": 17.8264, "step": 34769 }, { "epoch": 0.6355676604456468, "grad_norm": 5.464643046253087, "learning_rate": 3.0970098130534097e-06, "loss": 17.1321, "step": 34770 }, { "epoch": 0.6355859396420933, "grad_norm": 6.3599452534261856, "learning_rate": 3.096736082508164e-06, "loss": 17.5214, "step": 34771 }, { "epoch": 0.6356042188385399, "grad_norm": 7.188050268023017, "learning_rate": 3.096462358633695e-06, "loss": 17.6067, "step": 34772 }, { "epoch": 0.6356224980349864, "grad_norm": 6.062820618007371, "learning_rate": 3.0961886414309584e-06, "loss": 17.2206, "step": 34773 }, { "epoch": 0.6356407772314329, "grad_norm": 8.65250005900546, "learning_rate": 3.095914930900917e-06, "loss": 18.1817, "step": 34774 }, { "epoch": 0.6356590564278795, "grad_norm": 7.351035096924397, "learning_rate": 3.0956412270445283e-06, "loss": 17.5163, "step": 34775 }, { "epoch": 0.6356773356243259, "grad_norm": 6.877070900591682, "learning_rate": 3.0953675298627504e-06, "loss": 17.35, "step": 34776 }, { "epoch": 0.6356956148207725, "grad_norm": 5.611138433185784, "learning_rate": 3.0950938393565467e-06, "loss": 17.1596, "step": 34777 }, { "epoch": 0.635713894017219, "grad_norm": 6.614097615364052, "learning_rate": 3.094820155526872e-06, "loss": 17.389, "step": 34778 }, { "epoch": 0.6357321732136655, "grad_norm": 6.53043496075683, "learning_rate": 3.0945464783746863e-06, "loss": 17.2664, "step": 34779 }, { "epoch": 0.6357504524101121, "grad_norm": 7.4493081338922424, "learning_rate": 3.094272807900952e-06, "loss": 17.202, "step": 34780 }, { "epoch": 0.6357687316065586, "grad_norm": 7.457747833948153, "learning_rate": 3.0939991441066264e-06, "loss": 17.8031, "step": 34781 }, { "epoch": 0.6357870108030051, "grad_norm": 6.2773492911309035, "learning_rate": 3.0937254869926663e-06, "loss": 17.4498, "step": 34782 }, { "epoch": 0.6358052899994516, "grad_norm": 8.091285043448764, "learning_rate": 3.093451836560034e-06, "loss": 18.1688, "step": 34783 }, { "epoch": 0.6358235691958981, "grad_norm": 7.353118488911387, "learning_rate": 3.093178192809688e-06, "loss": 17.7655, "step": 34784 }, { "epoch": 0.6358418483923447, "grad_norm": 5.389277455838852, "learning_rate": 3.0929045557425863e-06, "loss": 16.9483, "step": 34785 }, { "epoch": 0.6358601275887912, "grad_norm": 6.896817354937758, "learning_rate": 3.0926309253596897e-06, "loss": 17.5846, "step": 34786 }, { "epoch": 0.6358784067852378, "grad_norm": 6.991558193963065, "learning_rate": 3.092357301661954e-06, "loss": 18.0073, "step": 34787 }, { "epoch": 0.6358966859816843, "grad_norm": 5.25206772042427, "learning_rate": 3.0920836846503414e-06, "loss": 17.0867, "step": 34788 }, { "epoch": 0.6359149651781307, "grad_norm": 6.716911399285549, "learning_rate": 3.0918100743258118e-06, "loss": 17.8147, "step": 34789 }, { "epoch": 0.6359332443745773, "grad_norm": 6.445285347313233, "learning_rate": 3.091536470689319e-06, "loss": 17.3697, "step": 34790 }, { "epoch": 0.6359515235710238, "grad_norm": 7.298030450538168, "learning_rate": 3.0912628737418283e-06, "loss": 17.9858, "step": 34791 }, { "epoch": 0.6359698027674704, "grad_norm": 8.548274737969546, "learning_rate": 3.090989283484294e-06, "loss": 17.7243, "step": 34792 }, { "epoch": 0.6359880819639169, "grad_norm": 6.253980368700839, "learning_rate": 3.0907156999176757e-06, "loss": 17.4892, "step": 34793 }, { "epoch": 0.6360063611603634, "grad_norm": 6.335033018513296, "learning_rate": 3.090442123042936e-06, "loss": 17.4064, "step": 34794 }, { "epoch": 0.63602464035681, "grad_norm": 5.799590034658149, "learning_rate": 3.0901685528610305e-06, "loss": 17.3789, "step": 34795 }, { "epoch": 0.6360429195532564, "grad_norm": 4.9734544684989865, "learning_rate": 3.0898949893729158e-06, "loss": 16.9118, "step": 34796 }, { "epoch": 0.636061198749703, "grad_norm": 5.2918964733450595, "learning_rate": 3.0896214325795546e-06, "loss": 16.8781, "step": 34797 }, { "epoch": 0.6360794779461495, "grad_norm": 6.934442016505186, "learning_rate": 3.0893478824819063e-06, "loss": 17.4065, "step": 34798 }, { "epoch": 0.636097757142596, "grad_norm": 5.8859304897273725, "learning_rate": 3.089074339080925e-06, "loss": 17.1616, "step": 34799 }, { "epoch": 0.6361160363390426, "grad_norm": 5.548381537465907, "learning_rate": 3.0888008023775752e-06, "loss": 17.0429, "step": 34800 }, { "epoch": 0.636134315535489, "grad_norm": 5.296392638742394, "learning_rate": 3.088527272372811e-06, "loss": 17.0697, "step": 34801 }, { "epoch": 0.6361525947319356, "grad_norm": 6.140372854959644, "learning_rate": 3.0882537490675924e-06, "loss": 17.2273, "step": 34802 }, { "epoch": 0.6361708739283821, "grad_norm": 5.496373602010488, "learning_rate": 3.08798023246288e-06, "loss": 17.2841, "step": 34803 }, { "epoch": 0.6361891531248286, "grad_norm": 6.071962137483284, "learning_rate": 3.0877067225596293e-06, "loss": 17.3734, "step": 34804 }, { "epoch": 0.6362074323212752, "grad_norm": 5.776887048177654, "learning_rate": 3.0874332193588025e-06, "loss": 17.1528, "step": 34805 }, { "epoch": 0.6362257115177217, "grad_norm": 6.811923400121027, "learning_rate": 3.0871597228613546e-06, "loss": 17.6872, "step": 34806 }, { "epoch": 0.6362439907141682, "grad_norm": 6.76976237251415, "learning_rate": 3.086886233068246e-06, "loss": 17.5972, "step": 34807 }, { "epoch": 0.6362622699106147, "grad_norm": 5.589617023180547, "learning_rate": 3.0866127499804366e-06, "loss": 17.1303, "step": 34808 }, { "epoch": 0.6362805491070612, "grad_norm": 5.89456137512301, "learning_rate": 3.086339273598884e-06, "loss": 17.5667, "step": 34809 }, { "epoch": 0.6362988283035078, "grad_norm": 7.97374874233283, "learning_rate": 3.086065803924544e-06, "loss": 18.2177, "step": 34810 }, { "epoch": 0.6363171074999543, "grad_norm": 6.22546461679198, "learning_rate": 3.085792340958378e-06, "loss": 17.4385, "step": 34811 }, { "epoch": 0.6363353866964008, "grad_norm": 6.513074508930219, "learning_rate": 3.085518884701344e-06, "loss": 17.6022, "step": 34812 }, { "epoch": 0.6363536658928474, "grad_norm": 6.58519751419074, "learning_rate": 3.085245435154399e-06, "loss": 17.3916, "step": 34813 }, { "epoch": 0.6363719450892938, "grad_norm": 5.351648304867349, "learning_rate": 3.084971992318505e-06, "loss": 17.0966, "step": 34814 }, { "epoch": 0.6363902242857404, "grad_norm": 5.042621689626088, "learning_rate": 3.084698556194616e-06, "loss": 16.9564, "step": 34815 }, { "epoch": 0.6364085034821869, "grad_norm": 7.6958583061841805, "learning_rate": 3.084425126783692e-06, "loss": 18.1373, "step": 34816 }, { "epoch": 0.6364267826786334, "grad_norm": 7.0767262842884096, "learning_rate": 3.084151704086694e-06, "loss": 17.6945, "step": 34817 }, { "epoch": 0.63644506187508, "grad_norm": 7.213971109508286, "learning_rate": 3.0838782881045754e-06, "loss": 18.1207, "step": 34818 }, { "epoch": 0.6364633410715265, "grad_norm": 6.275734990961017, "learning_rate": 3.0836048788382993e-06, "loss": 17.1961, "step": 34819 }, { "epoch": 0.6364816202679731, "grad_norm": 8.18885837598275, "learning_rate": 3.0833314762888213e-06, "loss": 18.1668, "step": 34820 }, { "epoch": 0.6364998994644195, "grad_norm": 6.462447135777517, "learning_rate": 3.0830580804570985e-06, "loss": 17.6081, "step": 34821 }, { "epoch": 0.636518178660866, "grad_norm": 7.11863209825204, "learning_rate": 3.0827846913440927e-06, "loss": 17.0728, "step": 34822 }, { "epoch": 0.6365364578573126, "grad_norm": 5.796812680405607, "learning_rate": 3.08251130895076e-06, "loss": 17.0672, "step": 34823 }, { "epoch": 0.6365547370537591, "grad_norm": 7.0932728734521335, "learning_rate": 3.0822379332780582e-06, "loss": 17.944, "step": 34824 }, { "epoch": 0.6365730162502057, "grad_norm": 6.079077731616086, "learning_rate": 3.081964564326946e-06, "loss": 17.2751, "step": 34825 }, { "epoch": 0.6365912954466522, "grad_norm": 5.765378323695019, "learning_rate": 3.0816912020983826e-06, "loss": 16.9435, "step": 34826 }, { "epoch": 0.6366095746430986, "grad_norm": 5.784131503814583, "learning_rate": 3.0814178465933233e-06, "loss": 17.1368, "step": 34827 }, { "epoch": 0.6366278538395452, "grad_norm": 4.53248808399679, "learning_rate": 3.081144497812728e-06, "loss": 16.941, "step": 34828 }, { "epoch": 0.6366461330359917, "grad_norm": 5.491072141175607, "learning_rate": 3.0808711557575567e-06, "loss": 17.1695, "step": 34829 }, { "epoch": 0.6366644122324383, "grad_norm": 7.1654343093951836, "learning_rate": 3.0805978204287633e-06, "loss": 17.4328, "step": 34830 }, { "epoch": 0.6366826914288848, "grad_norm": 6.612493279088143, "learning_rate": 3.08032449182731e-06, "loss": 17.4148, "step": 34831 }, { "epoch": 0.6367009706253313, "grad_norm": 6.602196284970008, "learning_rate": 3.080051169954152e-06, "loss": 17.7289, "step": 34832 }, { "epoch": 0.6367192498217779, "grad_norm": 6.925441659773178, "learning_rate": 3.0797778548102464e-06, "loss": 17.5137, "step": 34833 }, { "epoch": 0.6367375290182243, "grad_norm": 6.040068643409806, "learning_rate": 3.0795045463965555e-06, "loss": 17.1026, "step": 34834 }, { "epoch": 0.6367558082146709, "grad_norm": 6.80173239145258, "learning_rate": 3.079231244714032e-06, "loss": 17.6072, "step": 34835 }, { "epoch": 0.6367740874111174, "grad_norm": 6.705028097590705, "learning_rate": 3.0789579497636386e-06, "loss": 17.66, "step": 34836 }, { "epoch": 0.6367923666075639, "grad_norm": 6.534842613146401, "learning_rate": 3.0786846615463295e-06, "loss": 17.5538, "step": 34837 }, { "epoch": 0.6368106458040105, "grad_norm": 5.071434558678317, "learning_rate": 3.078411380063063e-06, "loss": 16.7841, "step": 34838 }, { "epoch": 0.636828925000457, "grad_norm": 7.038803472880353, "learning_rate": 3.078138105314801e-06, "loss": 17.656, "step": 34839 }, { "epoch": 0.6368472041969035, "grad_norm": 7.756117876649476, "learning_rate": 3.0778648373024967e-06, "loss": 17.7607, "step": 34840 }, { "epoch": 0.63686548339335, "grad_norm": 7.772177235602317, "learning_rate": 3.077591576027108e-06, "loss": 17.5597, "step": 34841 }, { "epoch": 0.6368837625897965, "grad_norm": 6.807898660097416, "learning_rate": 3.077318321489595e-06, "loss": 17.4215, "step": 34842 }, { "epoch": 0.6369020417862431, "grad_norm": 7.330791278341642, "learning_rate": 3.0770450736909153e-06, "loss": 17.6542, "step": 34843 }, { "epoch": 0.6369203209826896, "grad_norm": 8.59655555135432, "learning_rate": 3.076771832632024e-06, "loss": 18.142, "step": 34844 }, { "epoch": 0.6369386001791362, "grad_norm": 4.7632398763636425, "learning_rate": 3.076498598313882e-06, "loss": 16.8834, "step": 34845 }, { "epoch": 0.6369568793755827, "grad_norm": 6.824508539353281, "learning_rate": 3.0762253707374445e-06, "loss": 17.6296, "step": 34846 }, { "epoch": 0.6369751585720291, "grad_norm": 5.0283336675259696, "learning_rate": 3.075952149903669e-06, "loss": 16.9637, "step": 34847 }, { "epoch": 0.6369934377684757, "grad_norm": 7.5432916597231285, "learning_rate": 3.0756789358135175e-06, "loss": 17.797, "step": 34848 }, { "epoch": 0.6370117169649222, "grad_norm": 7.06587905791138, "learning_rate": 3.075405728467942e-06, "loss": 17.7373, "step": 34849 }, { "epoch": 0.6370299961613688, "grad_norm": 7.301220070366846, "learning_rate": 3.0751325278679034e-06, "loss": 17.8789, "step": 34850 }, { "epoch": 0.6370482753578153, "grad_norm": 5.826206798100932, "learning_rate": 3.0748593340143577e-06, "loss": 16.8977, "step": 34851 }, { "epoch": 0.6370665545542618, "grad_norm": 7.175480883756562, "learning_rate": 3.074586146908262e-06, "loss": 17.6373, "step": 34852 }, { "epoch": 0.6370848337507083, "grad_norm": 5.853710885609439, "learning_rate": 3.074312966550577e-06, "loss": 17.1247, "step": 34853 }, { "epoch": 0.6371031129471548, "grad_norm": 6.369403991176052, "learning_rate": 3.074039792942258e-06, "loss": 17.5682, "step": 34854 }, { "epoch": 0.6371213921436014, "grad_norm": 6.23085246771846, "learning_rate": 3.0737666260842603e-06, "loss": 17.2656, "step": 34855 }, { "epoch": 0.6371396713400479, "grad_norm": 5.739815322727173, "learning_rate": 3.073493465977544e-06, "loss": 17.1861, "step": 34856 }, { "epoch": 0.6371579505364944, "grad_norm": 7.33999018491489, "learning_rate": 3.0732203126230673e-06, "loss": 17.8303, "step": 34857 }, { "epoch": 0.637176229732941, "grad_norm": 5.913175491165907, "learning_rate": 3.0729471660217845e-06, "loss": 17.247, "step": 34858 }, { "epoch": 0.6371945089293874, "grad_norm": 5.3275190353533075, "learning_rate": 3.0726740261746563e-06, "loss": 17.0227, "step": 34859 }, { "epoch": 0.637212788125834, "grad_norm": 5.6438266824614605, "learning_rate": 3.0724008930826376e-06, "loss": 16.8795, "step": 34860 }, { "epoch": 0.6372310673222805, "grad_norm": 5.97511520074411, "learning_rate": 3.0721277667466853e-06, "loss": 17.408, "step": 34861 }, { "epoch": 0.637249346518727, "grad_norm": 5.709340322566072, "learning_rate": 3.07185464716776e-06, "loss": 17.2807, "step": 34862 }, { "epoch": 0.6372676257151736, "grad_norm": 5.728219876949896, "learning_rate": 3.071581534346817e-06, "loss": 16.8932, "step": 34863 }, { "epoch": 0.6372859049116201, "grad_norm": 7.203880460162501, "learning_rate": 3.0713084282848104e-06, "loss": 17.7258, "step": 34864 }, { "epoch": 0.6373041841080667, "grad_norm": 5.64269207113071, "learning_rate": 3.0710353289827024e-06, "loss": 17.0255, "step": 34865 }, { "epoch": 0.6373224633045131, "grad_norm": 7.094409015895752, "learning_rate": 3.070762236441447e-06, "loss": 17.8847, "step": 34866 }, { "epoch": 0.6373407425009596, "grad_norm": 11.50388100790617, "learning_rate": 3.0704891506620044e-06, "loss": 18.4999, "step": 34867 }, { "epoch": 0.6373590216974062, "grad_norm": 5.260661747551147, "learning_rate": 3.07021607164533e-06, "loss": 16.9922, "step": 34868 }, { "epoch": 0.6373773008938527, "grad_norm": 6.439813131014567, "learning_rate": 3.069942999392379e-06, "loss": 17.5068, "step": 34869 }, { "epoch": 0.6373955800902993, "grad_norm": 8.131310287549663, "learning_rate": 3.069669933904111e-06, "loss": 18.2866, "step": 34870 }, { "epoch": 0.6374138592867458, "grad_norm": 6.259045807573577, "learning_rate": 3.0693968751814836e-06, "loss": 17.705, "step": 34871 }, { "epoch": 0.6374321384831922, "grad_norm": 5.867738846210522, "learning_rate": 3.0691238232254506e-06, "loss": 17.3397, "step": 34872 }, { "epoch": 0.6374504176796388, "grad_norm": 5.093748609907934, "learning_rate": 3.0688507780369725e-06, "loss": 16.9442, "step": 34873 }, { "epoch": 0.6374686968760853, "grad_norm": 6.8231366771519175, "learning_rate": 3.0685777396170046e-06, "loss": 17.7345, "step": 34874 }, { "epoch": 0.6374869760725318, "grad_norm": 7.593532031378126, "learning_rate": 3.0683047079665024e-06, "loss": 17.9879, "step": 34875 }, { "epoch": 0.6375052552689784, "grad_norm": 6.101061158517342, "learning_rate": 3.068031683086427e-06, "loss": 17.4332, "step": 34876 }, { "epoch": 0.6375235344654249, "grad_norm": 4.786625941273057, "learning_rate": 3.067758664977733e-06, "loss": 16.8493, "step": 34877 }, { "epoch": 0.6375418136618715, "grad_norm": 6.215964835775378, "learning_rate": 3.067485653641375e-06, "loss": 17.389, "step": 34878 }, { "epoch": 0.6375600928583179, "grad_norm": 7.429682256084305, "learning_rate": 3.0672126490783134e-06, "loss": 17.4791, "step": 34879 }, { "epoch": 0.6375783720547644, "grad_norm": 6.96606740190315, "learning_rate": 3.066939651289502e-06, "loss": 17.5553, "step": 34880 }, { "epoch": 0.637596651251211, "grad_norm": 6.271039386688423, "learning_rate": 3.066666660275902e-06, "loss": 17.5332, "step": 34881 }, { "epoch": 0.6376149304476575, "grad_norm": 6.277795064060198, "learning_rate": 3.066393676038468e-06, "loss": 17.4718, "step": 34882 }, { "epoch": 0.6376332096441041, "grad_norm": 6.046530312257446, "learning_rate": 3.0661206985781534e-06, "loss": 17.3051, "step": 34883 }, { "epoch": 0.6376514888405506, "grad_norm": 5.963228890165408, "learning_rate": 3.06584772789592e-06, "loss": 17.472, "step": 34884 }, { "epoch": 0.637669768036997, "grad_norm": 5.58828540396298, "learning_rate": 3.0655747639927226e-06, "loss": 17.2401, "step": 34885 }, { "epoch": 0.6376880472334436, "grad_norm": 5.283861386059748, "learning_rate": 3.0653018068695163e-06, "loss": 17.1666, "step": 34886 }, { "epoch": 0.6377063264298901, "grad_norm": 8.144582956412101, "learning_rate": 3.0650288565272597e-06, "loss": 17.9459, "step": 34887 }, { "epoch": 0.6377246056263367, "grad_norm": 5.997570150695665, "learning_rate": 3.06475591296691e-06, "loss": 17.2081, "step": 34888 }, { "epoch": 0.6377428848227832, "grad_norm": 6.678829190511488, "learning_rate": 3.0644829761894213e-06, "loss": 17.7444, "step": 34889 }, { "epoch": 0.6377611640192297, "grad_norm": 5.93841999003617, "learning_rate": 3.064210046195754e-06, "loss": 17.3138, "step": 34890 }, { "epoch": 0.6377794432156763, "grad_norm": 5.74737118063297, "learning_rate": 3.0639371229868613e-06, "loss": 17.3665, "step": 34891 }, { "epoch": 0.6377977224121227, "grad_norm": 6.218411863888483, "learning_rate": 3.0636642065637e-06, "loss": 17.4994, "step": 34892 }, { "epoch": 0.6378160016085693, "grad_norm": 5.867182049883798, "learning_rate": 3.0633912969272296e-06, "loss": 17.3389, "step": 34893 }, { "epoch": 0.6378342808050158, "grad_norm": 6.149846529193025, "learning_rate": 3.0631183940784047e-06, "loss": 17.3661, "step": 34894 }, { "epoch": 0.6378525600014623, "grad_norm": 5.838385146905431, "learning_rate": 3.0628454980181797e-06, "loss": 17.3878, "step": 34895 }, { "epoch": 0.6378708391979089, "grad_norm": 7.122595381912626, "learning_rate": 3.0625726087475136e-06, "loss": 17.8405, "step": 34896 }, { "epoch": 0.6378891183943554, "grad_norm": 6.288994112904732, "learning_rate": 3.0622997262673625e-06, "loss": 17.6885, "step": 34897 }, { "epoch": 0.637907397590802, "grad_norm": 5.5548401111729975, "learning_rate": 3.0620268505786843e-06, "loss": 17.133, "step": 34898 }, { "epoch": 0.6379256767872484, "grad_norm": 9.492486555908732, "learning_rate": 3.0617539816824336e-06, "loss": 18.6434, "step": 34899 }, { "epoch": 0.6379439559836949, "grad_norm": 5.8545766882948005, "learning_rate": 3.0614811195795646e-06, "loss": 17.1641, "step": 34900 }, { "epoch": 0.6379622351801415, "grad_norm": 5.908029826004316, "learning_rate": 3.0612082642710378e-06, "loss": 17.4235, "step": 34901 }, { "epoch": 0.637980514376588, "grad_norm": 6.992510155383491, "learning_rate": 3.0609354157578086e-06, "loss": 17.9333, "step": 34902 }, { "epoch": 0.6379987935730346, "grad_norm": 7.321879109934151, "learning_rate": 3.06066257404083e-06, "loss": 17.5477, "step": 34903 }, { "epoch": 0.638017072769481, "grad_norm": 6.800632639823933, "learning_rate": 3.0603897391210636e-06, "loss": 17.3079, "step": 34904 }, { "epoch": 0.6380353519659275, "grad_norm": 5.565836669536423, "learning_rate": 3.060116910999461e-06, "loss": 17.1628, "step": 34905 }, { "epoch": 0.6380536311623741, "grad_norm": 5.6755710825241845, "learning_rate": 3.0598440896769797e-06, "loss": 17.1356, "step": 34906 }, { "epoch": 0.6380719103588206, "grad_norm": 5.932177677813053, "learning_rate": 3.0595712751545793e-06, "loss": 17.196, "step": 34907 }, { "epoch": 0.6380901895552672, "grad_norm": 8.614682239907612, "learning_rate": 3.059298467433212e-06, "loss": 18.1657, "step": 34908 }, { "epoch": 0.6381084687517137, "grad_norm": 6.039919776548639, "learning_rate": 3.059025666513834e-06, "loss": 17.4064, "step": 34909 }, { "epoch": 0.6381267479481602, "grad_norm": 5.993973371522561, "learning_rate": 3.058752872397403e-06, "loss": 17.1327, "step": 34910 }, { "epoch": 0.6381450271446067, "grad_norm": 7.013771180945944, "learning_rate": 3.058480085084874e-06, "loss": 17.4005, "step": 34911 }, { "epoch": 0.6381633063410532, "grad_norm": 6.7355575975901925, "learning_rate": 3.058207304577206e-06, "loss": 17.6257, "step": 34912 }, { "epoch": 0.6381815855374998, "grad_norm": 5.481514828714843, "learning_rate": 3.0579345308753527e-06, "loss": 17.137, "step": 34913 }, { "epoch": 0.6381998647339463, "grad_norm": 5.541652027766865, "learning_rate": 3.057661763980268e-06, "loss": 17.1342, "step": 34914 }, { "epoch": 0.6382181439303928, "grad_norm": 6.387455745937221, "learning_rate": 3.0573890038929114e-06, "loss": 17.3824, "step": 34915 }, { "epoch": 0.6382364231268394, "grad_norm": 7.643807009964113, "learning_rate": 3.0571162506142393e-06, "loss": 18.0322, "step": 34916 }, { "epoch": 0.6382547023232858, "grad_norm": 6.726802839695026, "learning_rate": 3.056843504145204e-06, "loss": 17.7894, "step": 34917 }, { "epoch": 0.6382729815197324, "grad_norm": 6.5842476432425725, "learning_rate": 3.0565707644867648e-06, "loss": 17.4383, "step": 34918 }, { "epoch": 0.6382912607161789, "grad_norm": 7.249112129673814, "learning_rate": 3.0562980316398767e-06, "loss": 17.9101, "step": 34919 }, { "epoch": 0.6383095399126254, "grad_norm": 6.7747299719253, "learning_rate": 3.0560253056054933e-06, "loss": 17.648, "step": 34920 }, { "epoch": 0.638327819109072, "grad_norm": 7.419099160770154, "learning_rate": 3.0557525863845748e-06, "loss": 18.0572, "step": 34921 }, { "epoch": 0.6383460983055185, "grad_norm": 6.385185143777851, "learning_rate": 3.055479873978075e-06, "loss": 17.2937, "step": 34922 }, { "epoch": 0.6383643775019651, "grad_norm": 6.417555703448203, "learning_rate": 3.0552071683869477e-06, "loss": 17.6189, "step": 34923 }, { "epoch": 0.6383826566984115, "grad_norm": 5.474574025334104, "learning_rate": 3.054934469612151e-06, "loss": 17.1313, "step": 34924 }, { "epoch": 0.638400935894858, "grad_norm": 5.22419746898806, "learning_rate": 3.0546617776546395e-06, "loss": 17.0579, "step": 34925 }, { "epoch": 0.6384192150913046, "grad_norm": 5.307452644787069, "learning_rate": 3.0543890925153714e-06, "loss": 17.2609, "step": 34926 }, { "epoch": 0.6384374942877511, "grad_norm": 6.907246469962298, "learning_rate": 3.0541164141953005e-06, "loss": 17.517, "step": 34927 }, { "epoch": 0.6384557734841977, "grad_norm": 5.721410900912726, "learning_rate": 3.0538437426953814e-06, "loss": 17.3927, "step": 34928 }, { "epoch": 0.6384740526806442, "grad_norm": 6.5051794237217875, "learning_rate": 3.0535710780165715e-06, "loss": 17.6788, "step": 34929 }, { "epoch": 0.6384923318770906, "grad_norm": 6.047077473636453, "learning_rate": 3.053298420159827e-06, "loss": 17.3122, "step": 34930 }, { "epoch": 0.6385106110735372, "grad_norm": 5.6939828720332395, "learning_rate": 3.053025769126101e-06, "loss": 17.304, "step": 34931 }, { "epoch": 0.6385288902699837, "grad_norm": 7.09101548092994, "learning_rate": 3.0527531249163533e-06, "loss": 17.8145, "step": 34932 }, { "epoch": 0.6385471694664303, "grad_norm": 5.408368433574039, "learning_rate": 3.052480487531535e-06, "loss": 17.0629, "step": 34933 }, { "epoch": 0.6385654486628768, "grad_norm": 5.5987746458850625, "learning_rate": 3.0522078569726025e-06, "loss": 17.4574, "step": 34934 }, { "epoch": 0.6385837278593233, "grad_norm": 6.105829431830458, "learning_rate": 3.051935233240515e-06, "loss": 17.6541, "step": 34935 }, { "epoch": 0.6386020070557699, "grad_norm": 5.837619951235159, "learning_rate": 3.051662616336225e-06, "loss": 17.3087, "step": 34936 }, { "epoch": 0.6386202862522163, "grad_norm": 5.798418682056576, "learning_rate": 3.051390006260687e-06, "loss": 17.1614, "step": 34937 }, { "epoch": 0.6386385654486629, "grad_norm": 7.423627063759884, "learning_rate": 3.0511174030148587e-06, "loss": 17.6953, "step": 34938 }, { "epoch": 0.6386568446451094, "grad_norm": 5.265992537533664, "learning_rate": 3.050844806599696e-06, "loss": 17.0067, "step": 34939 }, { "epoch": 0.6386751238415559, "grad_norm": 6.950594669544079, "learning_rate": 3.0505722170161508e-06, "loss": 17.6861, "step": 34940 }, { "epoch": 0.6386934030380025, "grad_norm": 5.404959361447495, "learning_rate": 3.0502996342651823e-06, "loss": 17.3045, "step": 34941 }, { "epoch": 0.638711682234449, "grad_norm": 6.021392883118773, "learning_rate": 3.050027058347743e-06, "loss": 17.2642, "step": 34942 }, { "epoch": 0.6387299614308954, "grad_norm": 6.412194792099351, "learning_rate": 3.0497544892647914e-06, "loss": 17.5151, "step": 34943 }, { "epoch": 0.638748240627342, "grad_norm": 4.7575237011326745, "learning_rate": 3.049481927017281e-06, "loss": 17.0422, "step": 34944 }, { "epoch": 0.6387665198237885, "grad_norm": 7.702946303002372, "learning_rate": 3.0492093716061653e-06, "loss": 18.0696, "step": 34945 }, { "epoch": 0.6387847990202351, "grad_norm": 5.1384485977513314, "learning_rate": 3.0489368230324037e-06, "loss": 17.2313, "step": 34946 }, { "epoch": 0.6388030782166816, "grad_norm": 4.839991499616515, "learning_rate": 3.048664281296948e-06, "loss": 16.8199, "step": 34947 }, { "epoch": 0.6388213574131281, "grad_norm": 5.85777477040769, "learning_rate": 3.048391746400754e-06, "loss": 17.4126, "step": 34948 }, { "epoch": 0.6388396366095747, "grad_norm": 7.500897086007257, "learning_rate": 3.0481192183447794e-06, "loss": 17.8901, "step": 34949 }, { "epoch": 0.6388579158060211, "grad_norm": 6.659007571101739, "learning_rate": 3.047846697129977e-06, "loss": 17.7703, "step": 34950 }, { "epoch": 0.6388761950024677, "grad_norm": 7.108740584266512, "learning_rate": 3.047574182757302e-06, "loss": 17.5509, "step": 34951 }, { "epoch": 0.6388944741989142, "grad_norm": 6.877676087623477, "learning_rate": 3.0473016752277103e-06, "loss": 17.5088, "step": 34952 }, { "epoch": 0.6389127533953607, "grad_norm": 6.188471341123416, "learning_rate": 3.047029174542158e-06, "loss": 17.1591, "step": 34953 }, { "epoch": 0.6389310325918073, "grad_norm": 5.819741891811897, "learning_rate": 3.046756680701596e-06, "loss": 17.0827, "step": 34954 }, { "epoch": 0.6389493117882538, "grad_norm": 6.250002710575138, "learning_rate": 3.0464841937069846e-06, "loss": 17.4604, "step": 34955 }, { "epoch": 0.6389675909847004, "grad_norm": 5.652467978505069, "learning_rate": 3.0462117135592757e-06, "loss": 17.1321, "step": 34956 }, { "epoch": 0.6389858701811468, "grad_norm": 8.25274047102, "learning_rate": 3.045939240259426e-06, "loss": 17.7842, "step": 34957 }, { "epoch": 0.6390041493775933, "grad_norm": 5.878572373162929, "learning_rate": 3.0456667738083896e-06, "loss": 17.2498, "step": 34958 }, { "epoch": 0.6390224285740399, "grad_norm": 5.68292423428307, "learning_rate": 3.0453943142071195e-06, "loss": 17.3043, "step": 34959 }, { "epoch": 0.6390407077704864, "grad_norm": 7.142826729387129, "learning_rate": 3.0451218614565748e-06, "loss": 17.5161, "step": 34960 }, { "epoch": 0.639058986966933, "grad_norm": 6.558644126095099, "learning_rate": 3.044849415557709e-06, "loss": 17.5343, "step": 34961 }, { "epoch": 0.6390772661633795, "grad_norm": 7.025999664341136, "learning_rate": 3.044576976511474e-06, "loss": 18.137, "step": 34962 }, { "epoch": 0.6390955453598259, "grad_norm": 6.737838695058427, "learning_rate": 3.044304544318829e-06, "loss": 17.6055, "step": 34963 }, { "epoch": 0.6391138245562725, "grad_norm": 7.272608062211263, "learning_rate": 3.0440321189807263e-06, "loss": 17.7992, "step": 34964 }, { "epoch": 0.639132103752719, "grad_norm": 5.256091056761009, "learning_rate": 3.04375970049812e-06, "loss": 17.0219, "step": 34965 }, { "epoch": 0.6391503829491656, "grad_norm": 5.8656961742959375, "learning_rate": 3.0434872888719685e-06, "loss": 17.2168, "step": 34966 }, { "epoch": 0.6391686621456121, "grad_norm": 5.157870107647903, "learning_rate": 3.043214884103224e-06, "loss": 17.1278, "step": 34967 }, { "epoch": 0.6391869413420586, "grad_norm": 4.102246274890307, "learning_rate": 3.0429424861928396e-06, "loss": 16.7183, "step": 34968 }, { "epoch": 0.6392052205385051, "grad_norm": 6.861517819364468, "learning_rate": 3.042670095141773e-06, "loss": 17.3682, "step": 34969 }, { "epoch": 0.6392234997349516, "grad_norm": 5.36544822376356, "learning_rate": 3.042397710950979e-06, "loss": 17.1552, "step": 34970 }, { "epoch": 0.6392417789313982, "grad_norm": 6.800788557240606, "learning_rate": 3.0421253336214083e-06, "loss": 17.7125, "step": 34971 }, { "epoch": 0.6392600581278447, "grad_norm": 6.210749707002841, "learning_rate": 3.0418529631540217e-06, "loss": 16.8507, "step": 34972 }, { "epoch": 0.6392783373242912, "grad_norm": 7.618633793737791, "learning_rate": 3.0415805995497683e-06, "loss": 17.9935, "step": 34973 }, { "epoch": 0.6392966165207378, "grad_norm": 7.191517509665983, "learning_rate": 3.041308242809605e-06, "loss": 17.3333, "step": 34974 }, { "epoch": 0.6393148957171843, "grad_norm": 6.080227639920689, "learning_rate": 3.0410358929344878e-06, "loss": 17.425, "step": 34975 }, { "epoch": 0.6393331749136308, "grad_norm": 6.774876249842135, "learning_rate": 3.0407635499253687e-06, "loss": 17.9915, "step": 34976 }, { "epoch": 0.6393514541100773, "grad_norm": 6.389340191647488, "learning_rate": 3.0404912137832045e-06, "loss": 17.5342, "step": 34977 }, { "epoch": 0.6393697333065238, "grad_norm": 6.696778659925357, "learning_rate": 3.0402188845089474e-06, "loss": 17.5004, "step": 34978 }, { "epoch": 0.6393880125029704, "grad_norm": 6.712440399265837, "learning_rate": 3.039946562103552e-06, "loss": 17.4109, "step": 34979 }, { "epoch": 0.6394062916994169, "grad_norm": 7.625032034432502, "learning_rate": 3.039674246567976e-06, "loss": 18.1553, "step": 34980 }, { "epoch": 0.6394245708958635, "grad_norm": 6.514244475227629, "learning_rate": 3.039401937903172e-06, "loss": 17.6949, "step": 34981 }, { "epoch": 0.63944285009231, "grad_norm": 6.649328515956816, "learning_rate": 3.039129636110092e-06, "loss": 17.6856, "step": 34982 }, { "epoch": 0.6394611292887564, "grad_norm": 6.741012000420136, "learning_rate": 3.0388573411896926e-06, "loss": 17.4476, "step": 34983 }, { "epoch": 0.639479408485203, "grad_norm": 5.924508993075227, "learning_rate": 3.03858505314293e-06, "loss": 17.3563, "step": 34984 }, { "epoch": 0.6394976876816495, "grad_norm": 5.942469854411462, "learning_rate": 3.0383127719707544e-06, "loss": 17.21, "step": 34985 }, { "epoch": 0.6395159668780961, "grad_norm": 6.636710668752684, "learning_rate": 3.038040497674124e-06, "loss": 17.7242, "step": 34986 }, { "epoch": 0.6395342460745426, "grad_norm": 5.737520820186018, "learning_rate": 3.03776823025399e-06, "loss": 17.241, "step": 34987 }, { "epoch": 0.639552525270989, "grad_norm": 6.342534206361193, "learning_rate": 3.0374959697113084e-06, "loss": 17.3188, "step": 34988 }, { "epoch": 0.6395708044674356, "grad_norm": 6.187919176958281, "learning_rate": 3.037223716047035e-06, "loss": 17.7349, "step": 34989 }, { "epoch": 0.6395890836638821, "grad_norm": 7.433158730975999, "learning_rate": 3.0369514692621193e-06, "loss": 18.2103, "step": 34990 }, { "epoch": 0.6396073628603287, "grad_norm": 5.390290788778063, "learning_rate": 3.0366792293575208e-06, "loss": 17.1698, "step": 34991 }, { "epoch": 0.6396256420567752, "grad_norm": 7.123639461863632, "learning_rate": 3.0364069963341895e-06, "loss": 17.2822, "step": 34992 }, { "epoch": 0.6396439212532217, "grad_norm": 7.081705807366958, "learning_rate": 3.036134770193081e-06, "loss": 17.6836, "step": 34993 }, { "epoch": 0.6396622004496683, "grad_norm": 6.702474968165281, "learning_rate": 3.0358625509351514e-06, "loss": 17.5929, "step": 34994 }, { "epoch": 0.6396804796461147, "grad_norm": 6.043906997779072, "learning_rate": 3.035590338561354e-06, "loss": 17.2053, "step": 34995 }, { "epoch": 0.6396987588425613, "grad_norm": 6.715194133874338, "learning_rate": 3.035318133072639e-06, "loss": 17.6744, "step": 34996 }, { "epoch": 0.6397170380390078, "grad_norm": 6.686768363534654, "learning_rate": 3.0350459344699645e-06, "loss": 17.4751, "step": 34997 }, { "epoch": 0.6397353172354543, "grad_norm": 6.675672189083443, "learning_rate": 3.0347737427542845e-06, "loss": 17.4719, "step": 34998 }, { "epoch": 0.6397535964319009, "grad_norm": 6.749935003676847, "learning_rate": 3.0345015579265502e-06, "loss": 17.4363, "step": 34999 }, { "epoch": 0.6397718756283474, "grad_norm": 7.650942396644036, "learning_rate": 3.0342293799877197e-06, "loss": 18.1402, "step": 35000 }, { "epoch": 0.639790154824794, "grad_norm": 7.965462307336422, "learning_rate": 3.0339572089387436e-06, "loss": 17.9619, "step": 35001 }, { "epoch": 0.6398084340212404, "grad_norm": 5.017928824101248, "learning_rate": 3.0336850447805754e-06, "loss": 17.0256, "step": 35002 }, { "epoch": 0.6398267132176869, "grad_norm": 7.483382764463916, "learning_rate": 3.0334128875141726e-06, "loss": 17.7692, "step": 35003 }, { "epoch": 0.6398449924141335, "grad_norm": 8.120423260943419, "learning_rate": 3.033140737140485e-06, "loss": 18.0065, "step": 35004 }, { "epoch": 0.63986327161058, "grad_norm": 6.494310089042574, "learning_rate": 3.0328685936604707e-06, "loss": 17.6195, "step": 35005 }, { "epoch": 0.6398815508070266, "grad_norm": 5.863909309776176, "learning_rate": 3.03259645707508e-06, "loss": 17.2128, "step": 35006 }, { "epoch": 0.6398998300034731, "grad_norm": 4.769173635088098, "learning_rate": 3.0323243273852674e-06, "loss": 16.9842, "step": 35007 }, { "epoch": 0.6399181091999195, "grad_norm": 6.765166890723639, "learning_rate": 3.0320522045919888e-06, "loss": 17.2927, "step": 35008 }, { "epoch": 0.6399363883963661, "grad_norm": 5.864816266609753, "learning_rate": 3.031780088696197e-06, "loss": 17.2457, "step": 35009 }, { "epoch": 0.6399546675928126, "grad_norm": 4.779830567260561, "learning_rate": 3.0315079796988424e-06, "loss": 16.8085, "step": 35010 }, { "epoch": 0.6399729467892591, "grad_norm": 4.962287971348869, "learning_rate": 3.0312358776008834e-06, "loss": 16.839, "step": 35011 }, { "epoch": 0.6399912259857057, "grad_norm": 7.667668389670389, "learning_rate": 3.0309637824032724e-06, "loss": 18.0786, "step": 35012 }, { "epoch": 0.6400095051821522, "grad_norm": 6.214125493360294, "learning_rate": 3.03069169410696e-06, "loss": 17.4474, "step": 35013 }, { "epoch": 0.6400277843785988, "grad_norm": 6.216568003847736, "learning_rate": 3.030419612712904e-06, "loss": 17.2686, "step": 35014 }, { "epoch": 0.6400460635750452, "grad_norm": 5.487464599077277, "learning_rate": 3.0301475382220568e-06, "loss": 17.061, "step": 35015 }, { "epoch": 0.6400643427714917, "grad_norm": 5.3657516315611735, "learning_rate": 3.02987547063537e-06, "loss": 17.1757, "step": 35016 }, { "epoch": 0.6400826219679383, "grad_norm": 7.585607578466037, "learning_rate": 3.0296034099538008e-06, "loss": 17.4348, "step": 35017 }, { "epoch": 0.6401009011643848, "grad_norm": 7.380792647985433, "learning_rate": 3.0293313561782985e-06, "loss": 17.5741, "step": 35018 }, { "epoch": 0.6401191803608314, "grad_norm": 5.626309394705076, "learning_rate": 3.0290593093098197e-06, "loss": 17.3357, "step": 35019 }, { "epoch": 0.6401374595572779, "grad_norm": 6.0164794223463725, "learning_rate": 3.028787269349318e-06, "loss": 17.2622, "step": 35020 }, { "epoch": 0.6401557387537243, "grad_norm": 4.983952718547103, "learning_rate": 3.0285152362977436e-06, "loss": 16.9265, "step": 35021 }, { "epoch": 0.6401740179501709, "grad_norm": 5.685103752594073, "learning_rate": 3.0282432101560543e-06, "loss": 17.3282, "step": 35022 }, { "epoch": 0.6401922971466174, "grad_norm": 6.821422713514076, "learning_rate": 3.027971190925201e-06, "loss": 17.6874, "step": 35023 }, { "epoch": 0.640210576343064, "grad_norm": 5.668374700602283, "learning_rate": 3.027699178606136e-06, "loss": 17.1477, "step": 35024 }, { "epoch": 0.6402288555395105, "grad_norm": 6.9065711923757265, "learning_rate": 3.027427173199816e-06, "loss": 17.5097, "step": 35025 }, { "epoch": 0.640247134735957, "grad_norm": 5.050617388248776, "learning_rate": 3.0271551747071936e-06, "loss": 16.7999, "step": 35026 }, { "epoch": 0.6402654139324035, "grad_norm": 7.483842796094209, "learning_rate": 3.026883183129219e-06, "loss": 17.7375, "step": 35027 }, { "epoch": 0.64028369312885, "grad_norm": 5.348198403787153, "learning_rate": 3.026611198466848e-06, "loss": 16.8875, "step": 35028 }, { "epoch": 0.6403019723252966, "grad_norm": 7.106500313983542, "learning_rate": 3.026339220721034e-06, "loss": 17.942, "step": 35029 }, { "epoch": 0.6403202515217431, "grad_norm": 7.82717684689927, "learning_rate": 3.026067249892729e-06, "loss": 17.6569, "step": 35030 }, { "epoch": 0.6403385307181896, "grad_norm": 5.6762792293583315, "learning_rate": 3.025795285982888e-06, "loss": 17.3188, "step": 35031 }, { "epoch": 0.6403568099146362, "grad_norm": 5.786661583936771, "learning_rate": 3.025523328992463e-06, "loss": 17.2223, "step": 35032 }, { "epoch": 0.6403750891110827, "grad_norm": 6.0292205635266525, "learning_rate": 3.025251378922406e-06, "loss": 17.3408, "step": 35033 }, { "epoch": 0.6403933683075292, "grad_norm": 6.818705669579261, "learning_rate": 3.0249794357736734e-06, "loss": 17.5263, "step": 35034 }, { "epoch": 0.6404116475039757, "grad_norm": 6.259407186764631, "learning_rate": 3.024707499547215e-06, "loss": 17.4216, "step": 35035 }, { "epoch": 0.6404299267004222, "grad_norm": 6.310617382953326, "learning_rate": 3.0244355702439864e-06, "loss": 17.617, "step": 35036 }, { "epoch": 0.6404482058968688, "grad_norm": 6.161379086345737, "learning_rate": 3.0241636478649394e-06, "loss": 17.617, "step": 35037 }, { "epoch": 0.6404664850933153, "grad_norm": 4.897180049599483, "learning_rate": 3.0238917324110266e-06, "loss": 16.8734, "step": 35038 }, { "epoch": 0.6404847642897619, "grad_norm": 6.441489026297191, "learning_rate": 3.0236198238832035e-06, "loss": 17.7715, "step": 35039 }, { "epoch": 0.6405030434862083, "grad_norm": 5.913426268478533, "learning_rate": 3.0233479222824214e-06, "loss": 17.3927, "step": 35040 }, { "epoch": 0.6405213226826548, "grad_norm": 5.897060712480678, "learning_rate": 3.0230760276096315e-06, "loss": 17.4432, "step": 35041 }, { "epoch": 0.6405396018791014, "grad_norm": 6.477080938611953, "learning_rate": 3.022804139865789e-06, "loss": 17.6072, "step": 35042 }, { "epoch": 0.6405578810755479, "grad_norm": 5.192592412959924, "learning_rate": 3.022532259051848e-06, "loss": 16.9112, "step": 35043 }, { "epoch": 0.6405761602719945, "grad_norm": 6.771775826180061, "learning_rate": 3.0222603851687582e-06, "loss": 17.4261, "step": 35044 }, { "epoch": 0.640594439468441, "grad_norm": 6.401470596979334, "learning_rate": 3.021988518217476e-06, "loss": 17.6712, "step": 35045 }, { "epoch": 0.6406127186648874, "grad_norm": 5.794962732516042, "learning_rate": 3.0217166581989506e-06, "loss": 17.1366, "step": 35046 }, { "epoch": 0.640630997861334, "grad_norm": 5.999495423122488, "learning_rate": 3.0214448051141358e-06, "loss": 17.0242, "step": 35047 }, { "epoch": 0.6406492770577805, "grad_norm": 6.86158024537314, "learning_rate": 3.021172958963988e-06, "loss": 17.4818, "step": 35048 }, { "epoch": 0.6406675562542271, "grad_norm": 7.02098231187025, "learning_rate": 3.0209011197494544e-06, "loss": 17.8596, "step": 35049 }, { "epoch": 0.6406858354506736, "grad_norm": 6.138802603469844, "learning_rate": 3.020629287471493e-06, "loss": 17.3359, "step": 35050 }, { "epoch": 0.6407041146471201, "grad_norm": 6.401375027226146, "learning_rate": 3.0203574621310533e-06, "loss": 17.5139, "step": 35051 }, { "epoch": 0.6407223938435667, "grad_norm": 5.571767037558536, "learning_rate": 3.0200856437290876e-06, "loss": 17.2239, "step": 35052 }, { "epoch": 0.6407406730400131, "grad_norm": 6.6596850678864215, "learning_rate": 3.019813832266553e-06, "loss": 17.7858, "step": 35053 }, { "epoch": 0.6407589522364597, "grad_norm": 7.83145141899022, "learning_rate": 3.0195420277443977e-06, "loss": 18.0364, "step": 35054 }, { "epoch": 0.6407772314329062, "grad_norm": 4.919834618731274, "learning_rate": 3.019270230163574e-06, "loss": 16.7974, "step": 35055 }, { "epoch": 0.6407955106293527, "grad_norm": 5.577814804313765, "learning_rate": 3.018998439525038e-06, "loss": 17.102, "step": 35056 }, { "epoch": 0.6408137898257993, "grad_norm": 7.275979626997634, "learning_rate": 3.0187266558297406e-06, "loss": 18.0446, "step": 35057 }, { "epoch": 0.6408320690222458, "grad_norm": 7.325703715676992, "learning_rate": 3.018454879078633e-06, "loss": 17.7612, "step": 35058 }, { "epoch": 0.6408503482186924, "grad_norm": 6.7162626682947115, "learning_rate": 3.018183109272671e-06, "loss": 17.7041, "step": 35059 }, { "epoch": 0.6408686274151388, "grad_norm": 6.411287357626588, "learning_rate": 3.017911346412804e-06, "loss": 17.4465, "step": 35060 }, { "epoch": 0.6408869066115853, "grad_norm": 5.973484880749222, "learning_rate": 3.0176395904999844e-06, "loss": 17.1805, "step": 35061 }, { "epoch": 0.6409051858080319, "grad_norm": 5.043643677877691, "learning_rate": 3.017367841535169e-06, "loss": 16.9325, "step": 35062 }, { "epoch": 0.6409234650044784, "grad_norm": 5.706277153268066, "learning_rate": 3.0170960995193064e-06, "loss": 17.2518, "step": 35063 }, { "epoch": 0.640941744200925, "grad_norm": 4.89949248741395, "learning_rate": 3.0168243644533485e-06, "loss": 16.887, "step": 35064 }, { "epoch": 0.6409600233973715, "grad_norm": 6.201281919101275, "learning_rate": 3.01655263633825e-06, "loss": 17.5903, "step": 35065 }, { "epoch": 0.6409783025938179, "grad_norm": 6.679795561134589, "learning_rate": 3.016280915174961e-06, "loss": 17.481, "step": 35066 }, { "epoch": 0.6409965817902645, "grad_norm": 6.395983696113611, "learning_rate": 3.016009200964438e-06, "loss": 17.521, "step": 35067 }, { "epoch": 0.641014860986711, "grad_norm": 7.464122950308207, "learning_rate": 3.01573749370763e-06, "loss": 17.9174, "step": 35068 }, { "epoch": 0.6410331401831576, "grad_norm": 6.237470156176921, "learning_rate": 3.0154657934054875e-06, "loss": 17.1843, "step": 35069 }, { "epoch": 0.6410514193796041, "grad_norm": 7.3703900862783565, "learning_rate": 3.0151941000589668e-06, "loss": 17.6763, "step": 35070 }, { "epoch": 0.6410696985760506, "grad_norm": 5.531227874442082, "learning_rate": 3.0149224136690197e-06, "loss": 17.2562, "step": 35071 }, { "epoch": 0.6410879777724972, "grad_norm": 5.804478775970429, "learning_rate": 3.014650734236595e-06, "loss": 17.1721, "step": 35072 }, { "epoch": 0.6411062569689436, "grad_norm": 6.00369163691983, "learning_rate": 3.014379061762649e-06, "loss": 17.24, "step": 35073 }, { "epoch": 0.6411245361653902, "grad_norm": 5.055630017438661, "learning_rate": 3.0141073962481313e-06, "loss": 17.0011, "step": 35074 }, { "epoch": 0.6411428153618367, "grad_norm": 5.328819071980298, "learning_rate": 3.013835737693994e-06, "loss": 16.9341, "step": 35075 }, { "epoch": 0.6411610945582832, "grad_norm": 5.066506073634949, "learning_rate": 3.0135640861011923e-06, "loss": 17.0062, "step": 35076 }, { "epoch": 0.6411793737547298, "grad_norm": 7.926231454661716, "learning_rate": 3.013292441470674e-06, "loss": 17.9116, "step": 35077 }, { "epoch": 0.6411976529511763, "grad_norm": 4.794030670813784, "learning_rate": 3.0130208038033946e-06, "loss": 16.8202, "step": 35078 }, { "epoch": 0.6412159321476227, "grad_norm": 6.53580980598016, "learning_rate": 3.0127491731003043e-06, "loss": 17.2946, "step": 35079 }, { "epoch": 0.6412342113440693, "grad_norm": 6.31162937358768, "learning_rate": 3.012477549362355e-06, "loss": 17.5294, "step": 35080 }, { "epoch": 0.6412524905405158, "grad_norm": 4.724191112820037, "learning_rate": 3.0122059325905006e-06, "loss": 16.8373, "step": 35081 }, { "epoch": 0.6412707697369624, "grad_norm": 7.282665524674206, "learning_rate": 3.0119343227856918e-06, "loss": 17.9263, "step": 35082 }, { "epoch": 0.6412890489334089, "grad_norm": 6.400426878682721, "learning_rate": 3.01166271994888e-06, "loss": 17.616, "step": 35083 }, { "epoch": 0.6413073281298554, "grad_norm": 6.203547161529796, "learning_rate": 3.0113911240810184e-06, "loss": 17.358, "step": 35084 }, { "epoch": 0.641325607326302, "grad_norm": 6.051220588470022, "learning_rate": 3.011119535183059e-06, "loss": 17.4358, "step": 35085 }, { "epoch": 0.6413438865227484, "grad_norm": 7.108221447651075, "learning_rate": 3.0108479532559507e-06, "loss": 17.6501, "step": 35086 }, { "epoch": 0.641362165719195, "grad_norm": 6.902297803102377, "learning_rate": 3.0105763783006498e-06, "loss": 18.2238, "step": 35087 }, { "epoch": 0.6413804449156415, "grad_norm": 5.475748181067824, "learning_rate": 3.0103048103181067e-06, "loss": 16.9433, "step": 35088 }, { "epoch": 0.641398724112088, "grad_norm": 6.713452610878993, "learning_rate": 3.0100332493092697e-06, "loss": 17.7615, "step": 35089 }, { "epoch": 0.6414170033085346, "grad_norm": 6.506019373043577, "learning_rate": 3.0097616952750964e-06, "loss": 17.4079, "step": 35090 }, { "epoch": 0.641435282504981, "grad_norm": 6.291484019473829, "learning_rate": 3.009490148216534e-06, "loss": 17.5239, "step": 35091 }, { "epoch": 0.6414535617014276, "grad_norm": 6.750727180149662, "learning_rate": 3.009218608134535e-06, "loss": 17.7281, "step": 35092 }, { "epoch": 0.6414718408978741, "grad_norm": 7.5822144181036935, "learning_rate": 3.0089470750300543e-06, "loss": 17.5809, "step": 35093 }, { "epoch": 0.6414901200943206, "grad_norm": 6.2559114844641455, "learning_rate": 3.00867554890404e-06, "loss": 17.0054, "step": 35094 }, { "epoch": 0.6415083992907672, "grad_norm": 6.288163601490233, "learning_rate": 3.0084040297574456e-06, "loss": 17.4625, "step": 35095 }, { "epoch": 0.6415266784872137, "grad_norm": 6.159216545573027, "learning_rate": 3.0081325175912224e-06, "loss": 17.4606, "step": 35096 }, { "epoch": 0.6415449576836603, "grad_norm": 7.331702848303067, "learning_rate": 3.00786101240632e-06, "loss": 18.1506, "step": 35097 }, { "epoch": 0.6415632368801067, "grad_norm": 7.419556574898403, "learning_rate": 3.007589514203695e-06, "loss": 17.9604, "step": 35098 }, { "epoch": 0.6415815160765532, "grad_norm": 5.9503973228377545, "learning_rate": 3.0073180229842954e-06, "loss": 17.1347, "step": 35099 }, { "epoch": 0.6415997952729998, "grad_norm": 5.928788102717097, "learning_rate": 3.007046538749071e-06, "loss": 17.3329, "step": 35100 }, { "epoch": 0.6416180744694463, "grad_norm": 5.94649863767594, "learning_rate": 3.0067750614989767e-06, "loss": 17.3017, "step": 35101 }, { "epoch": 0.6416363536658929, "grad_norm": 7.191943700650584, "learning_rate": 3.006503591234964e-06, "loss": 17.9675, "step": 35102 }, { "epoch": 0.6416546328623394, "grad_norm": 6.343206504284471, "learning_rate": 3.00623212795798e-06, "loss": 17.4965, "step": 35103 }, { "epoch": 0.6416729120587858, "grad_norm": 6.43084960437976, "learning_rate": 3.0059606716689827e-06, "loss": 17.0234, "step": 35104 }, { "epoch": 0.6416911912552324, "grad_norm": 6.579867640253702, "learning_rate": 3.0056892223689192e-06, "loss": 17.529, "step": 35105 }, { "epoch": 0.6417094704516789, "grad_norm": 6.086732061899755, "learning_rate": 3.0054177800587402e-06, "loss": 17.049, "step": 35106 }, { "epoch": 0.6417277496481255, "grad_norm": 6.218506147400159, "learning_rate": 3.0051463447394015e-06, "loss": 17.4472, "step": 35107 }, { "epoch": 0.641746028844572, "grad_norm": 5.49117092864269, "learning_rate": 3.0048749164118514e-06, "loss": 17.2229, "step": 35108 }, { "epoch": 0.6417643080410185, "grad_norm": 5.518331803810532, "learning_rate": 3.00460349507704e-06, "loss": 16.9707, "step": 35109 }, { "epoch": 0.6417825872374651, "grad_norm": 7.251288485504199, "learning_rate": 3.00433208073592e-06, "loss": 17.508, "step": 35110 }, { "epoch": 0.6418008664339115, "grad_norm": 6.150885026757219, "learning_rate": 3.004060673389443e-06, "loss": 17.321, "step": 35111 }, { "epoch": 0.6418191456303581, "grad_norm": 7.155540783655615, "learning_rate": 3.003789273038562e-06, "loss": 17.8188, "step": 35112 }, { "epoch": 0.6418374248268046, "grad_norm": 5.771368757012368, "learning_rate": 3.0035178796842263e-06, "loss": 17.6256, "step": 35113 }, { "epoch": 0.6418557040232511, "grad_norm": 6.447689396721246, "learning_rate": 3.0032464933273853e-06, "loss": 17.2047, "step": 35114 }, { "epoch": 0.6418739832196977, "grad_norm": 6.6538129971393865, "learning_rate": 3.0029751139689923e-06, "loss": 17.5946, "step": 35115 }, { "epoch": 0.6418922624161442, "grad_norm": 4.948776379282674, "learning_rate": 3.00270374161e-06, "loss": 16.9153, "step": 35116 }, { "epoch": 0.6419105416125908, "grad_norm": 6.747758055939593, "learning_rate": 3.0024323762513557e-06, "loss": 17.8782, "step": 35117 }, { "epoch": 0.6419288208090372, "grad_norm": 7.521048134035131, "learning_rate": 3.0021610178940142e-06, "loss": 17.4567, "step": 35118 }, { "epoch": 0.6419471000054837, "grad_norm": 6.189988551347465, "learning_rate": 3.0018896665389243e-06, "loss": 17.4964, "step": 35119 }, { "epoch": 0.6419653792019303, "grad_norm": 8.784264741237758, "learning_rate": 3.0016183221870367e-06, "loss": 18.1447, "step": 35120 }, { "epoch": 0.6419836583983768, "grad_norm": 7.266386155586661, "learning_rate": 3.0013469848393056e-06, "loss": 17.7265, "step": 35121 }, { "epoch": 0.6420019375948234, "grad_norm": 6.40058501542663, "learning_rate": 3.0010756544966797e-06, "loss": 17.2613, "step": 35122 }, { "epoch": 0.6420202167912699, "grad_norm": 7.813437113822527, "learning_rate": 3.000804331160108e-06, "loss": 17.4059, "step": 35123 }, { "epoch": 0.6420384959877163, "grad_norm": 5.483851935444153, "learning_rate": 3.000533014830545e-06, "loss": 17.0843, "step": 35124 }, { "epoch": 0.6420567751841629, "grad_norm": 5.042576155184109, "learning_rate": 3.000261705508939e-06, "loss": 16.9631, "step": 35125 }, { "epoch": 0.6420750543806094, "grad_norm": 6.75705543832634, "learning_rate": 2.9999904031962445e-06, "loss": 17.8895, "step": 35126 }, { "epoch": 0.642093333577056, "grad_norm": 6.146182952858906, "learning_rate": 2.9997191078934095e-06, "loss": 17.3691, "step": 35127 }, { "epoch": 0.6421116127735025, "grad_norm": 7.92534839546518, "learning_rate": 2.9994478196013845e-06, "loss": 18.1566, "step": 35128 }, { "epoch": 0.642129891969949, "grad_norm": 5.517479451360169, "learning_rate": 2.9991765383211214e-06, "loss": 16.9502, "step": 35129 }, { "epoch": 0.6421481711663956, "grad_norm": 7.711521721288365, "learning_rate": 2.998905264053572e-06, "loss": 17.8491, "step": 35130 }, { "epoch": 0.642166450362842, "grad_norm": 5.863111328101389, "learning_rate": 2.9986339967996834e-06, "loss": 17.3919, "step": 35131 }, { "epoch": 0.6421847295592886, "grad_norm": 6.195632704279807, "learning_rate": 2.998362736560412e-06, "loss": 17.1781, "step": 35132 }, { "epoch": 0.6422030087557351, "grad_norm": 6.389514635212641, "learning_rate": 2.9980914833367048e-06, "loss": 17.5482, "step": 35133 }, { "epoch": 0.6422212879521816, "grad_norm": 7.910042511229052, "learning_rate": 2.9978202371295113e-06, "loss": 18.1475, "step": 35134 }, { "epoch": 0.6422395671486282, "grad_norm": 5.646723482140053, "learning_rate": 2.9975489979397863e-06, "loss": 17.421, "step": 35135 }, { "epoch": 0.6422578463450747, "grad_norm": 6.227228512055668, "learning_rate": 2.9972777657684776e-06, "loss": 17.3925, "step": 35136 }, { "epoch": 0.6422761255415212, "grad_norm": 5.317597091725895, "learning_rate": 2.9970065406165365e-06, "loss": 17.038, "step": 35137 }, { "epoch": 0.6422944047379677, "grad_norm": 6.4047607615004285, "learning_rate": 2.9967353224849137e-06, "loss": 17.3846, "step": 35138 }, { "epoch": 0.6423126839344142, "grad_norm": 7.772254758962995, "learning_rate": 2.996464111374561e-06, "loss": 17.8154, "step": 35139 }, { "epoch": 0.6423309631308608, "grad_norm": 6.948152928596945, "learning_rate": 2.996192907286425e-06, "loss": 17.6993, "step": 35140 }, { "epoch": 0.6423492423273073, "grad_norm": 7.394388030849673, "learning_rate": 2.99592171022146e-06, "loss": 17.7403, "step": 35141 }, { "epoch": 0.6423675215237539, "grad_norm": 6.166875050336154, "learning_rate": 2.9956505201806163e-06, "loss": 17.4952, "step": 35142 }, { "epoch": 0.6423858007202004, "grad_norm": 6.038086714002541, "learning_rate": 2.9953793371648433e-06, "loss": 17.3823, "step": 35143 }, { "epoch": 0.6424040799166468, "grad_norm": 6.861490292657565, "learning_rate": 2.995108161175092e-06, "loss": 17.3257, "step": 35144 }, { "epoch": 0.6424223591130934, "grad_norm": 6.127236298026211, "learning_rate": 2.994836992212311e-06, "loss": 17.4782, "step": 35145 }, { "epoch": 0.6424406383095399, "grad_norm": 7.429518286559677, "learning_rate": 2.9945658302774537e-06, "loss": 17.391, "step": 35146 }, { "epoch": 0.6424589175059864, "grad_norm": 5.13907444185478, "learning_rate": 2.9942946753714707e-06, "loss": 17.0626, "step": 35147 }, { "epoch": 0.642477196702433, "grad_norm": 6.131816197521755, "learning_rate": 2.994023527495308e-06, "loss": 17.1998, "step": 35148 }, { "epoch": 0.6424954758988795, "grad_norm": 6.532283269215748, "learning_rate": 2.9937523866499207e-06, "loss": 17.5686, "step": 35149 }, { "epoch": 0.642513755095326, "grad_norm": 5.88225935340193, "learning_rate": 2.9934812528362565e-06, "loss": 16.8072, "step": 35150 }, { "epoch": 0.6425320342917725, "grad_norm": 7.38404068178124, "learning_rate": 2.9932101260552648e-06, "loss": 17.9843, "step": 35151 }, { "epoch": 0.642550313488219, "grad_norm": 7.525639497911595, "learning_rate": 2.9929390063079e-06, "loss": 17.935, "step": 35152 }, { "epoch": 0.6425685926846656, "grad_norm": 5.645026860667808, "learning_rate": 2.99266789359511e-06, "loss": 17.0402, "step": 35153 }, { "epoch": 0.6425868718811121, "grad_norm": 6.45440550543997, "learning_rate": 2.992396787917843e-06, "loss": 17.7485, "step": 35154 }, { "epoch": 0.6426051510775587, "grad_norm": 6.685771674929482, "learning_rate": 2.992125689277051e-06, "loss": 17.3628, "step": 35155 }, { "epoch": 0.6426234302740051, "grad_norm": 6.123488923485324, "learning_rate": 2.9918545976736847e-06, "loss": 17.0589, "step": 35156 }, { "epoch": 0.6426417094704516, "grad_norm": 6.973545989468391, "learning_rate": 2.9915835131086948e-06, "loss": 17.7061, "step": 35157 }, { "epoch": 0.6426599886668982, "grad_norm": 6.327575621975122, "learning_rate": 2.99131243558303e-06, "loss": 17.5864, "step": 35158 }, { "epoch": 0.6426782678633447, "grad_norm": 7.290509283651967, "learning_rate": 2.99104136509764e-06, "loss": 17.7739, "step": 35159 }, { "epoch": 0.6426965470597913, "grad_norm": 7.0461737093645205, "learning_rate": 2.9907703016534757e-06, "loss": 17.7407, "step": 35160 }, { "epoch": 0.6427148262562378, "grad_norm": 5.756968353691334, "learning_rate": 2.990499245251488e-06, "loss": 17.3447, "step": 35161 }, { "epoch": 0.6427331054526843, "grad_norm": 6.267060731914205, "learning_rate": 2.990228195892625e-06, "loss": 17.6205, "step": 35162 }, { "epoch": 0.6427513846491308, "grad_norm": 6.6497462438286075, "learning_rate": 2.989957153577839e-06, "loss": 17.4372, "step": 35163 }, { "epoch": 0.6427696638455773, "grad_norm": 7.3616761207129615, "learning_rate": 2.9896861183080784e-06, "loss": 17.7671, "step": 35164 }, { "epoch": 0.6427879430420239, "grad_norm": 6.6472944840476815, "learning_rate": 2.989415090084292e-06, "loss": 17.3717, "step": 35165 }, { "epoch": 0.6428062222384704, "grad_norm": 7.719424906489019, "learning_rate": 2.9891440689074337e-06, "loss": 18.0026, "step": 35166 }, { "epoch": 0.6428245014349169, "grad_norm": 6.126721568651058, "learning_rate": 2.9888730547784506e-06, "loss": 17.6545, "step": 35167 }, { "epoch": 0.6428427806313635, "grad_norm": 6.666203906334508, "learning_rate": 2.9886020476982914e-06, "loss": 17.5509, "step": 35168 }, { "epoch": 0.6428610598278099, "grad_norm": 6.265512291107219, "learning_rate": 2.988331047667908e-06, "loss": 17.5323, "step": 35169 }, { "epoch": 0.6428793390242565, "grad_norm": 7.339153455772866, "learning_rate": 2.9880600546882502e-06, "loss": 18.3239, "step": 35170 }, { "epoch": 0.642897618220703, "grad_norm": 8.03940143430317, "learning_rate": 2.987789068760266e-06, "loss": 17.9647, "step": 35171 }, { "epoch": 0.6429158974171495, "grad_norm": 5.795562032509426, "learning_rate": 2.987518089884909e-06, "loss": 17.3271, "step": 35172 }, { "epoch": 0.6429341766135961, "grad_norm": 6.19068108617424, "learning_rate": 2.9872471180631234e-06, "loss": 17.538, "step": 35173 }, { "epoch": 0.6429524558100426, "grad_norm": 5.790796018013775, "learning_rate": 2.9869761532958637e-06, "loss": 17.0868, "step": 35174 }, { "epoch": 0.6429707350064892, "grad_norm": 6.429240001179408, "learning_rate": 2.986705195584079e-06, "loss": 17.807, "step": 35175 }, { "epoch": 0.6429890142029356, "grad_norm": 6.840750627521639, "learning_rate": 2.9864342449287153e-06, "loss": 17.8098, "step": 35176 }, { "epoch": 0.6430072933993821, "grad_norm": 5.945938374855326, "learning_rate": 2.9861633013307266e-06, "loss": 17.354, "step": 35177 }, { "epoch": 0.6430255725958287, "grad_norm": 6.2282950368421695, "learning_rate": 2.985892364791061e-06, "loss": 17.1733, "step": 35178 }, { "epoch": 0.6430438517922752, "grad_norm": 5.636361026532716, "learning_rate": 2.985621435310666e-06, "loss": 17.451, "step": 35179 }, { "epoch": 0.6430621309887218, "grad_norm": 8.119338700151893, "learning_rate": 2.9853505128904945e-06, "loss": 18.1809, "step": 35180 }, { "epoch": 0.6430804101851683, "grad_norm": 7.457676010282139, "learning_rate": 2.9850795975314962e-06, "loss": 17.955, "step": 35181 }, { "epoch": 0.6430986893816147, "grad_norm": 7.632416368077536, "learning_rate": 2.984808689234616e-06, "loss": 17.6318, "step": 35182 }, { "epoch": 0.6431169685780613, "grad_norm": 6.725066117072863, "learning_rate": 2.9845377880008075e-06, "loss": 17.9474, "step": 35183 }, { "epoch": 0.6431352477745078, "grad_norm": 6.993556775127604, "learning_rate": 2.9842668938310202e-06, "loss": 17.6594, "step": 35184 }, { "epoch": 0.6431535269709544, "grad_norm": 6.7705578721937005, "learning_rate": 2.983996006726201e-06, "loss": 17.6362, "step": 35185 }, { "epoch": 0.6431718061674009, "grad_norm": 6.707318000213458, "learning_rate": 2.9837251266873022e-06, "loss": 17.4416, "step": 35186 }, { "epoch": 0.6431900853638474, "grad_norm": 6.412284673827537, "learning_rate": 2.9834542537152705e-06, "loss": 17.1854, "step": 35187 }, { "epoch": 0.643208364560294, "grad_norm": 6.143176747239371, "learning_rate": 2.9831833878110574e-06, "loss": 17.0482, "step": 35188 }, { "epoch": 0.6432266437567404, "grad_norm": 7.158474751451237, "learning_rate": 2.982912528975612e-06, "loss": 17.6655, "step": 35189 }, { "epoch": 0.643244922953187, "grad_norm": 7.344986191001799, "learning_rate": 2.982641677209882e-06, "loss": 17.7474, "step": 35190 }, { "epoch": 0.6432632021496335, "grad_norm": 6.130390655751102, "learning_rate": 2.982370832514819e-06, "loss": 17.0766, "step": 35191 }, { "epoch": 0.64328148134608, "grad_norm": 6.29937413587178, "learning_rate": 2.982099994891371e-06, "loss": 17.4332, "step": 35192 }, { "epoch": 0.6432997605425266, "grad_norm": 6.404677452450483, "learning_rate": 2.9818291643404864e-06, "loss": 17.2759, "step": 35193 }, { "epoch": 0.6433180397389731, "grad_norm": 5.870697556860884, "learning_rate": 2.9815583408631164e-06, "loss": 17.2271, "step": 35194 }, { "epoch": 0.6433363189354196, "grad_norm": 7.98227477362779, "learning_rate": 2.98128752446021e-06, "loss": 18.5996, "step": 35195 }, { "epoch": 0.6433545981318661, "grad_norm": 5.728041776495218, "learning_rate": 2.981016715132714e-06, "loss": 17.0903, "step": 35196 }, { "epoch": 0.6433728773283126, "grad_norm": 9.57212214783097, "learning_rate": 2.98074591288158e-06, "loss": 17.853, "step": 35197 }, { "epoch": 0.6433911565247592, "grad_norm": 7.562405016852879, "learning_rate": 2.9804751177077574e-06, "loss": 18.061, "step": 35198 }, { "epoch": 0.6434094357212057, "grad_norm": 6.8137386185581805, "learning_rate": 2.980204329612192e-06, "loss": 17.3659, "step": 35199 }, { "epoch": 0.6434277149176523, "grad_norm": 6.460096700527755, "learning_rate": 2.979933548595837e-06, "loss": 17.3776, "step": 35200 }, { "epoch": 0.6434459941140988, "grad_norm": 5.22644416332497, "learning_rate": 2.9796627746596387e-06, "loss": 16.9705, "step": 35201 }, { "epoch": 0.6434642733105452, "grad_norm": 5.692367197461771, "learning_rate": 2.979392007804546e-06, "loss": 17.2823, "step": 35202 }, { "epoch": 0.6434825525069918, "grad_norm": 6.1007790270413915, "learning_rate": 2.9791212480315113e-06, "loss": 17.2802, "step": 35203 }, { "epoch": 0.6435008317034383, "grad_norm": 6.1261614707804215, "learning_rate": 2.978850495341479e-06, "loss": 17.3268, "step": 35204 }, { "epoch": 0.6435191108998849, "grad_norm": 6.167498586759649, "learning_rate": 2.9785797497354023e-06, "loss": 17.1797, "step": 35205 }, { "epoch": 0.6435373900963314, "grad_norm": 5.934594965842809, "learning_rate": 2.978309011214228e-06, "loss": 17.2224, "step": 35206 }, { "epoch": 0.6435556692927779, "grad_norm": 5.8690272605663925, "learning_rate": 2.9780382797789033e-06, "loss": 17.3302, "step": 35207 }, { "epoch": 0.6435739484892244, "grad_norm": 5.215125219451224, "learning_rate": 2.977767555430381e-06, "loss": 16.9169, "step": 35208 }, { "epoch": 0.6435922276856709, "grad_norm": 5.624194546652624, "learning_rate": 2.977496838169607e-06, "loss": 16.9195, "step": 35209 }, { "epoch": 0.6436105068821175, "grad_norm": 6.647841722695466, "learning_rate": 2.9772261279975313e-06, "loss": 17.5196, "step": 35210 }, { "epoch": 0.643628786078564, "grad_norm": 4.950089438833019, "learning_rate": 2.9769554249151023e-06, "loss": 17.1395, "step": 35211 }, { "epoch": 0.6436470652750105, "grad_norm": 8.260018912779412, "learning_rate": 2.9766847289232705e-06, "loss": 17.7812, "step": 35212 }, { "epoch": 0.6436653444714571, "grad_norm": 6.262805494430734, "learning_rate": 2.97641404002298e-06, "loss": 17.3938, "step": 35213 }, { "epoch": 0.6436836236679035, "grad_norm": 6.185521668514567, "learning_rate": 2.9761433582151846e-06, "loss": 17.2107, "step": 35214 }, { "epoch": 0.64370190286435, "grad_norm": 5.660167760040412, "learning_rate": 2.975872683500832e-06, "loss": 16.9803, "step": 35215 }, { "epoch": 0.6437201820607966, "grad_norm": 6.152577648288215, "learning_rate": 2.9756020158808674e-06, "loss": 17.6137, "step": 35216 }, { "epoch": 0.6437384612572431, "grad_norm": 7.596502751849284, "learning_rate": 2.9753313553562447e-06, "loss": 17.8825, "step": 35217 }, { "epoch": 0.6437567404536897, "grad_norm": 6.628957799163321, "learning_rate": 2.975060701927908e-06, "loss": 17.6634, "step": 35218 }, { "epoch": 0.6437750196501362, "grad_norm": 7.0010978520196065, "learning_rate": 2.9747900555968083e-06, "loss": 17.6362, "step": 35219 }, { "epoch": 0.6437932988465827, "grad_norm": 8.020094841019423, "learning_rate": 2.9745194163638953e-06, "loss": 17.753, "step": 35220 }, { "epoch": 0.6438115780430292, "grad_norm": 5.32367232638624, "learning_rate": 2.9742487842301136e-06, "loss": 17.1654, "step": 35221 }, { "epoch": 0.6438298572394757, "grad_norm": 6.7483237720149285, "learning_rate": 2.9739781591964162e-06, "loss": 17.7193, "step": 35222 }, { "epoch": 0.6438481364359223, "grad_norm": 6.880548321234416, "learning_rate": 2.9737075412637484e-06, "loss": 17.7255, "step": 35223 }, { "epoch": 0.6438664156323688, "grad_norm": 7.76146936159528, "learning_rate": 2.973436930433059e-06, "loss": 17.8052, "step": 35224 }, { "epoch": 0.6438846948288153, "grad_norm": 5.819556058770356, "learning_rate": 2.973166326705299e-06, "loss": 17.38, "step": 35225 }, { "epoch": 0.6439029740252619, "grad_norm": 5.491548739064336, "learning_rate": 2.972895730081416e-06, "loss": 17.2543, "step": 35226 }, { "epoch": 0.6439212532217083, "grad_norm": 7.39170358352173, "learning_rate": 2.972625140562355e-06, "loss": 17.7675, "step": 35227 }, { "epoch": 0.6439395324181549, "grad_norm": 6.163234322745703, "learning_rate": 2.9723545581490674e-06, "loss": 17.4231, "step": 35228 }, { "epoch": 0.6439578116146014, "grad_norm": 6.444644384532202, "learning_rate": 2.972083982842503e-06, "loss": 17.6199, "step": 35229 }, { "epoch": 0.6439760908110479, "grad_norm": 5.704095753452675, "learning_rate": 2.9718134146436064e-06, "loss": 17.2527, "step": 35230 }, { "epoch": 0.6439943700074945, "grad_norm": 5.511795985235041, "learning_rate": 2.9715428535533287e-06, "loss": 17.2699, "step": 35231 }, { "epoch": 0.644012649203941, "grad_norm": 6.72086078089823, "learning_rate": 2.9712722995726174e-06, "loss": 17.227, "step": 35232 }, { "epoch": 0.6440309284003876, "grad_norm": 4.5288095399816095, "learning_rate": 2.9710017527024186e-06, "loss": 16.7389, "step": 35233 }, { "epoch": 0.644049207596834, "grad_norm": 6.396279251463286, "learning_rate": 2.9707312129436857e-06, "loss": 17.4794, "step": 35234 }, { "epoch": 0.6440674867932805, "grad_norm": 5.796651804317154, "learning_rate": 2.9704606802973614e-06, "loss": 17.4898, "step": 35235 }, { "epoch": 0.6440857659897271, "grad_norm": 6.198011994489148, "learning_rate": 2.9701901547643973e-06, "loss": 17.3122, "step": 35236 }, { "epoch": 0.6441040451861736, "grad_norm": 6.491580441887837, "learning_rate": 2.9699196363457404e-06, "loss": 17.5133, "step": 35237 }, { "epoch": 0.6441223243826202, "grad_norm": 5.5372050822841405, "learning_rate": 2.9696491250423386e-06, "loss": 16.9737, "step": 35238 }, { "epoch": 0.6441406035790667, "grad_norm": 5.936729102238187, "learning_rate": 2.969378620855142e-06, "loss": 17.2648, "step": 35239 }, { "epoch": 0.6441588827755131, "grad_norm": 4.90843213686631, "learning_rate": 2.9691081237850966e-06, "loss": 16.8325, "step": 35240 }, { "epoch": 0.6441771619719597, "grad_norm": 6.639278893715042, "learning_rate": 2.968837633833149e-06, "loss": 17.39, "step": 35241 }, { "epoch": 0.6441954411684062, "grad_norm": 5.395909324666016, "learning_rate": 2.9685671510002514e-06, "loss": 17.0882, "step": 35242 }, { "epoch": 0.6442137203648528, "grad_norm": 5.756554966463989, "learning_rate": 2.96829667528735e-06, "loss": 17.2007, "step": 35243 }, { "epoch": 0.6442319995612993, "grad_norm": 6.811862762108879, "learning_rate": 2.9680262066953897e-06, "loss": 17.7462, "step": 35244 }, { "epoch": 0.6442502787577458, "grad_norm": 6.174358537354301, "learning_rate": 2.9677557452253235e-06, "loss": 17.1931, "step": 35245 }, { "epoch": 0.6442685579541924, "grad_norm": 6.681002310597236, "learning_rate": 2.967485290878097e-06, "loss": 17.3458, "step": 35246 }, { "epoch": 0.6442868371506388, "grad_norm": 7.742691149935125, "learning_rate": 2.9672148436546564e-06, "loss": 17.8321, "step": 35247 }, { "epoch": 0.6443051163470854, "grad_norm": 4.555189269637876, "learning_rate": 2.9669444035559536e-06, "loss": 16.6786, "step": 35248 }, { "epoch": 0.6443233955435319, "grad_norm": 5.828497126234661, "learning_rate": 2.9666739705829316e-06, "loss": 17.1455, "step": 35249 }, { "epoch": 0.6443416747399784, "grad_norm": 5.272978538988546, "learning_rate": 2.966403544736544e-06, "loss": 17.0089, "step": 35250 }, { "epoch": 0.644359953936425, "grad_norm": 6.7026384437613125, "learning_rate": 2.966133126017734e-06, "loss": 17.6099, "step": 35251 }, { "epoch": 0.6443782331328715, "grad_norm": 6.037032295357954, "learning_rate": 2.96586271442745e-06, "loss": 17.0902, "step": 35252 }, { "epoch": 0.644396512329318, "grad_norm": 6.775224417117338, "learning_rate": 2.965592309966643e-06, "loss": 17.559, "step": 35253 }, { "epoch": 0.6444147915257645, "grad_norm": 8.872935028110293, "learning_rate": 2.9653219126362578e-06, "loss": 18.607, "step": 35254 }, { "epoch": 0.644433070722211, "grad_norm": 8.856844148685907, "learning_rate": 2.9650515224372404e-06, "loss": 18.2402, "step": 35255 }, { "epoch": 0.6444513499186576, "grad_norm": 5.771000399014974, "learning_rate": 2.964781139370543e-06, "loss": 17.0227, "step": 35256 }, { "epoch": 0.6444696291151041, "grad_norm": 5.444487626520871, "learning_rate": 2.9645107634371104e-06, "loss": 17.156, "step": 35257 }, { "epoch": 0.6444879083115507, "grad_norm": 5.408945622468322, "learning_rate": 2.9642403946378905e-06, "loss": 16.9344, "step": 35258 }, { "epoch": 0.6445061875079972, "grad_norm": 5.214597822199496, "learning_rate": 2.963970032973833e-06, "loss": 17.0763, "step": 35259 }, { "epoch": 0.6445244667044436, "grad_norm": 6.674620934627946, "learning_rate": 2.963699678445883e-06, "loss": 17.5705, "step": 35260 }, { "epoch": 0.6445427459008902, "grad_norm": 5.081340501061187, "learning_rate": 2.9634293310549873e-06, "loss": 16.8821, "step": 35261 }, { "epoch": 0.6445610250973367, "grad_norm": 6.569780816377062, "learning_rate": 2.9631589908020974e-06, "loss": 17.6114, "step": 35262 }, { "epoch": 0.6445793042937833, "grad_norm": 7.401319247469755, "learning_rate": 2.9628886576881565e-06, "loss": 17.6857, "step": 35263 }, { "epoch": 0.6445975834902298, "grad_norm": 6.150630392281859, "learning_rate": 2.962618331714116e-06, "loss": 17.2357, "step": 35264 }, { "epoch": 0.6446158626866763, "grad_norm": 4.821902802591994, "learning_rate": 2.9623480128809207e-06, "loss": 17.0109, "step": 35265 }, { "epoch": 0.6446341418831228, "grad_norm": 6.047329000203647, "learning_rate": 2.9620777011895176e-06, "loss": 17.4412, "step": 35266 }, { "epoch": 0.6446524210795693, "grad_norm": 5.069656373078538, "learning_rate": 2.9618073966408574e-06, "loss": 16.8439, "step": 35267 }, { "epoch": 0.6446707002760159, "grad_norm": 9.668241663048068, "learning_rate": 2.961537099235884e-06, "loss": 18.169, "step": 35268 }, { "epoch": 0.6446889794724624, "grad_norm": 6.378936535261019, "learning_rate": 2.961266808975547e-06, "loss": 17.5974, "step": 35269 }, { "epoch": 0.6447072586689089, "grad_norm": 8.402165593836772, "learning_rate": 2.960996525860792e-06, "loss": 17.856, "step": 35270 }, { "epoch": 0.6447255378653555, "grad_norm": 5.916918474624335, "learning_rate": 2.9607262498925687e-06, "loss": 17.2384, "step": 35271 }, { "epoch": 0.644743817061802, "grad_norm": 6.498089424048543, "learning_rate": 2.960455981071821e-06, "loss": 17.8138, "step": 35272 }, { "epoch": 0.6447620962582485, "grad_norm": 7.276289639871461, "learning_rate": 2.960185719399499e-06, "loss": 17.6358, "step": 35273 }, { "epoch": 0.644780375454695, "grad_norm": 5.88108709839017, "learning_rate": 2.9599154648765504e-06, "loss": 17.4563, "step": 35274 }, { "epoch": 0.6447986546511415, "grad_norm": 6.62335120322768, "learning_rate": 2.959645217503918e-06, "loss": 17.4728, "step": 35275 }, { "epoch": 0.6448169338475881, "grad_norm": 6.629261954234043, "learning_rate": 2.959374977282555e-06, "loss": 17.6529, "step": 35276 }, { "epoch": 0.6448352130440346, "grad_norm": 6.05829073174541, "learning_rate": 2.9591047442134045e-06, "loss": 17.2278, "step": 35277 }, { "epoch": 0.6448534922404812, "grad_norm": 7.226707601630351, "learning_rate": 2.9588345182974136e-06, "loss": 17.5668, "step": 35278 }, { "epoch": 0.6448717714369276, "grad_norm": 6.043331433065606, "learning_rate": 2.958564299535533e-06, "loss": 17.4857, "step": 35279 }, { "epoch": 0.6448900506333741, "grad_norm": 6.3948713834579785, "learning_rate": 2.9582940879287047e-06, "loss": 17.5472, "step": 35280 }, { "epoch": 0.6449083298298207, "grad_norm": 6.604391768068357, "learning_rate": 2.9580238834778805e-06, "loss": 17.7951, "step": 35281 }, { "epoch": 0.6449266090262672, "grad_norm": 6.037418114088963, "learning_rate": 2.957753686184005e-06, "loss": 17.3113, "step": 35282 }, { "epoch": 0.6449448882227137, "grad_norm": 5.644378916722642, "learning_rate": 2.9574834960480246e-06, "loss": 17.0606, "step": 35283 }, { "epoch": 0.6449631674191603, "grad_norm": 12.464759055539863, "learning_rate": 2.957213313070889e-06, "loss": 17.4897, "step": 35284 }, { "epoch": 0.6449814466156067, "grad_norm": 6.047724090663151, "learning_rate": 2.9569431372535433e-06, "loss": 17.3908, "step": 35285 }, { "epoch": 0.6449997258120533, "grad_norm": 5.505692905706478, "learning_rate": 2.956672968596933e-06, "loss": 17.1787, "step": 35286 }, { "epoch": 0.6450180050084998, "grad_norm": 5.458766802461036, "learning_rate": 2.9564028071020072e-06, "loss": 17.3132, "step": 35287 }, { "epoch": 0.6450362842049463, "grad_norm": 6.934973635175382, "learning_rate": 2.956132652769714e-06, "loss": 17.5581, "step": 35288 }, { "epoch": 0.6450545634013929, "grad_norm": 6.9486649523337, "learning_rate": 2.955862505600996e-06, "loss": 17.7858, "step": 35289 }, { "epoch": 0.6450728425978394, "grad_norm": 6.504017827512486, "learning_rate": 2.9555923655968044e-06, "loss": 17.5751, "step": 35290 }, { "epoch": 0.645091121794286, "grad_norm": 6.0104904274548305, "learning_rate": 2.955322232758083e-06, "loss": 17.1881, "step": 35291 }, { "epoch": 0.6451094009907324, "grad_norm": 6.751813834260643, "learning_rate": 2.955052107085779e-06, "loss": 17.7183, "step": 35292 }, { "epoch": 0.6451276801871789, "grad_norm": 5.6344015078692085, "learning_rate": 2.9547819885808414e-06, "loss": 17.2346, "step": 35293 }, { "epoch": 0.6451459593836255, "grad_norm": 7.197147577641071, "learning_rate": 2.9545118772442145e-06, "loss": 17.6849, "step": 35294 }, { "epoch": 0.645164238580072, "grad_norm": 6.3324693652201995, "learning_rate": 2.9542417730768473e-06, "loss": 17.3605, "step": 35295 }, { "epoch": 0.6451825177765186, "grad_norm": 6.626228451339801, "learning_rate": 2.9539716760796844e-06, "loss": 17.3062, "step": 35296 }, { "epoch": 0.6452007969729651, "grad_norm": 6.780216856814973, "learning_rate": 2.953701586253672e-06, "loss": 17.3437, "step": 35297 }, { "epoch": 0.6452190761694115, "grad_norm": 5.687865267793353, "learning_rate": 2.9534315035997603e-06, "loss": 17.0152, "step": 35298 }, { "epoch": 0.6452373553658581, "grad_norm": 6.775077485100025, "learning_rate": 2.9531614281188936e-06, "loss": 17.4368, "step": 35299 }, { "epoch": 0.6452556345623046, "grad_norm": 6.907425113615163, "learning_rate": 2.9528913598120156e-06, "loss": 17.7045, "step": 35300 }, { "epoch": 0.6452739137587512, "grad_norm": 5.78809580475447, "learning_rate": 2.952621298680077e-06, "loss": 16.9762, "step": 35301 }, { "epoch": 0.6452921929551977, "grad_norm": 5.186073059448269, "learning_rate": 2.9523512447240256e-06, "loss": 16.939, "step": 35302 }, { "epoch": 0.6453104721516442, "grad_norm": 5.892072220090105, "learning_rate": 2.9520811979448017e-06, "loss": 17.1886, "step": 35303 }, { "epoch": 0.6453287513480908, "grad_norm": 7.47919705054185, "learning_rate": 2.9518111583433582e-06, "loss": 17.6497, "step": 35304 }, { "epoch": 0.6453470305445372, "grad_norm": 7.228791267249031, "learning_rate": 2.9515411259206376e-06, "loss": 17.4263, "step": 35305 }, { "epoch": 0.6453653097409838, "grad_norm": 5.908956017910196, "learning_rate": 2.9512711006775868e-06, "loss": 17.3599, "step": 35306 }, { "epoch": 0.6453835889374303, "grad_norm": 6.735964220974865, "learning_rate": 2.951001082615155e-06, "loss": 17.7784, "step": 35307 }, { "epoch": 0.6454018681338768, "grad_norm": 5.541701754597606, "learning_rate": 2.9507310717342862e-06, "loss": 17.3783, "step": 35308 }, { "epoch": 0.6454201473303234, "grad_norm": 6.3082356218172775, "learning_rate": 2.950461068035926e-06, "loss": 17.3818, "step": 35309 }, { "epoch": 0.6454384265267699, "grad_norm": 6.58706390221681, "learning_rate": 2.9501910715210223e-06, "loss": 17.7303, "step": 35310 }, { "epoch": 0.6454567057232165, "grad_norm": 6.253667519893648, "learning_rate": 2.94992108219052e-06, "loss": 17.1707, "step": 35311 }, { "epoch": 0.6454749849196629, "grad_norm": 7.207560843419294, "learning_rate": 2.9496511000453687e-06, "loss": 17.6148, "step": 35312 }, { "epoch": 0.6454932641161094, "grad_norm": 6.80265965101469, "learning_rate": 2.9493811250865123e-06, "loss": 17.5007, "step": 35313 }, { "epoch": 0.645511543312556, "grad_norm": 5.758083115405514, "learning_rate": 2.949111157314895e-06, "loss": 17.3514, "step": 35314 }, { "epoch": 0.6455298225090025, "grad_norm": 6.129593521506606, "learning_rate": 2.9488411967314663e-06, "loss": 17.2271, "step": 35315 }, { "epoch": 0.6455481017054491, "grad_norm": 6.174775887246133, "learning_rate": 2.9485712433371716e-06, "loss": 17.511, "step": 35316 }, { "epoch": 0.6455663809018956, "grad_norm": 6.562330425729383, "learning_rate": 2.9483012971329555e-06, "loss": 17.4304, "step": 35317 }, { "epoch": 0.645584660098342, "grad_norm": 6.210158718847316, "learning_rate": 2.948031358119767e-06, "loss": 17.2415, "step": 35318 }, { "epoch": 0.6456029392947886, "grad_norm": 4.6034577739597395, "learning_rate": 2.9477614262985495e-06, "loss": 16.812, "step": 35319 }, { "epoch": 0.6456212184912351, "grad_norm": 6.6876152114992555, "learning_rate": 2.947491501670249e-06, "loss": 17.8131, "step": 35320 }, { "epoch": 0.6456394976876817, "grad_norm": 6.380424341831396, "learning_rate": 2.9472215842358147e-06, "loss": 17.3972, "step": 35321 }, { "epoch": 0.6456577768841282, "grad_norm": 6.891221422493123, "learning_rate": 2.946951673996191e-06, "loss": 17.6241, "step": 35322 }, { "epoch": 0.6456760560805747, "grad_norm": 6.644763977008318, "learning_rate": 2.946681770952321e-06, "loss": 17.4662, "step": 35323 }, { "epoch": 0.6456943352770212, "grad_norm": 5.983974799596059, "learning_rate": 2.9464118751051547e-06, "loss": 17.3457, "step": 35324 }, { "epoch": 0.6457126144734677, "grad_norm": 5.078218282049509, "learning_rate": 2.9461419864556352e-06, "loss": 16.9556, "step": 35325 }, { "epoch": 0.6457308936699143, "grad_norm": 6.217933930148196, "learning_rate": 2.9458721050047116e-06, "loss": 17.1476, "step": 35326 }, { "epoch": 0.6457491728663608, "grad_norm": 6.561885491422681, "learning_rate": 2.9456022307533283e-06, "loss": 17.8631, "step": 35327 }, { "epoch": 0.6457674520628073, "grad_norm": 5.070078963252133, "learning_rate": 2.945332363702429e-06, "loss": 16.9549, "step": 35328 }, { "epoch": 0.6457857312592539, "grad_norm": 6.657397446652674, "learning_rate": 2.945062503852962e-06, "loss": 17.3498, "step": 35329 }, { "epoch": 0.6458040104557004, "grad_norm": 4.9817545440590125, "learning_rate": 2.9447926512058746e-06, "loss": 16.7633, "step": 35330 }, { "epoch": 0.6458222896521469, "grad_norm": 6.55665417414913, "learning_rate": 2.9445228057621077e-06, "loss": 17.5733, "step": 35331 }, { "epoch": 0.6458405688485934, "grad_norm": 7.54219981723772, "learning_rate": 2.9442529675226126e-06, "loss": 17.7213, "step": 35332 }, { "epoch": 0.6458588480450399, "grad_norm": 5.789695778544594, "learning_rate": 2.9439831364883314e-06, "loss": 17.2742, "step": 35333 }, { "epoch": 0.6458771272414865, "grad_norm": 7.950303418341837, "learning_rate": 2.9437133126602095e-06, "loss": 17.8365, "step": 35334 }, { "epoch": 0.645895406437933, "grad_norm": 5.926085915777107, "learning_rate": 2.9434434960391967e-06, "loss": 17.171, "step": 35335 }, { "epoch": 0.6459136856343796, "grad_norm": 6.768344506558185, "learning_rate": 2.943173686626234e-06, "loss": 17.1007, "step": 35336 }, { "epoch": 0.645931964830826, "grad_norm": 7.704069586789958, "learning_rate": 2.94290388442227e-06, "loss": 17.7201, "step": 35337 }, { "epoch": 0.6459502440272725, "grad_norm": 7.510719382199167, "learning_rate": 2.942634089428249e-06, "loss": 17.8326, "step": 35338 }, { "epoch": 0.6459685232237191, "grad_norm": 6.2348744029383925, "learning_rate": 2.9423643016451176e-06, "loss": 17.3081, "step": 35339 }, { "epoch": 0.6459868024201656, "grad_norm": 6.799343805533621, "learning_rate": 2.9420945210738194e-06, "loss": 17.775, "step": 35340 }, { "epoch": 0.6460050816166122, "grad_norm": 7.717134310504905, "learning_rate": 2.9418247477153016e-06, "loss": 18.0183, "step": 35341 }, { "epoch": 0.6460233608130587, "grad_norm": 7.399566779265024, "learning_rate": 2.94155498157051e-06, "loss": 17.92, "step": 35342 }, { "epoch": 0.6460416400095051, "grad_norm": 7.127888053425567, "learning_rate": 2.941285222640389e-06, "loss": 17.8587, "step": 35343 }, { "epoch": 0.6460599192059517, "grad_norm": 6.428438498256358, "learning_rate": 2.9410154709258865e-06, "loss": 17.4956, "step": 35344 }, { "epoch": 0.6460781984023982, "grad_norm": 6.221461780982843, "learning_rate": 2.9407457264279436e-06, "loss": 17.6299, "step": 35345 }, { "epoch": 0.6460964775988448, "grad_norm": 7.385641437171625, "learning_rate": 2.940475989147509e-06, "loss": 17.7041, "step": 35346 }, { "epoch": 0.6461147567952913, "grad_norm": 7.431061308057598, "learning_rate": 2.940206259085529e-06, "loss": 17.7999, "step": 35347 }, { "epoch": 0.6461330359917378, "grad_norm": 5.820582356672723, "learning_rate": 2.939936536242945e-06, "loss": 17.2386, "step": 35348 }, { "epoch": 0.6461513151881844, "grad_norm": 5.91386245495239, "learning_rate": 2.939666820620707e-06, "loss": 17.1156, "step": 35349 }, { "epoch": 0.6461695943846308, "grad_norm": 6.132722519230976, "learning_rate": 2.9393971122197562e-06, "loss": 17.1075, "step": 35350 }, { "epoch": 0.6461878735810773, "grad_norm": 6.53585788938403, "learning_rate": 2.9391274110410396e-06, "loss": 17.5911, "step": 35351 }, { "epoch": 0.6462061527775239, "grad_norm": 6.022297848501657, "learning_rate": 2.938857717085504e-06, "loss": 17.2914, "step": 35352 }, { "epoch": 0.6462244319739704, "grad_norm": 5.31516878143613, "learning_rate": 2.938588030354093e-06, "loss": 17.0617, "step": 35353 }, { "epoch": 0.646242711170417, "grad_norm": 8.389149432965553, "learning_rate": 2.938318350847751e-06, "loss": 18.2936, "step": 35354 }, { "epoch": 0.6462609903668635, "grad_norm": 5.218861962365615, "learning_rate": 2.9380486785674247e-06, "loss": 17.1078, "step": 35355 }, { "epoch": 0.6462792695633099, "grad_norm": 8.302565831224662, "learning_rate": 2.937779013514057e-06, "loss": 18.3859, "step": 35356 }, { "epoch": 0.6462975487597565, "grad_norm": 6.025666879812582, "learning_rate": 2.937509355688598e-06, "loss": 17.1355, "step": 35357 }, { "epoch": 0.646315827956203, "grad_norm": 5.642497786073097, "learning_rate": 2.93723970509199e-06, "loss": 17.1046, "step": 35358 }, { "epoch": 0.6463341071526496, "grad_norm": 8.047516408813507, "learning_rate": 2.936970061725175e-06, "loss": 18.2271, "step": 35359 }, { "epoch": 0.6463523863490961, "grad_norm": 4.913985097825902, "learning_rate": 2.936700425589103e-06, "loss": 16.8084, "step": 35360 }, { "epoch": 0.6463706655455426, "grad_norm": 6.370223924879235, "learning_rate": 2.936430796684717e-06, "loss": 17.4491, "step": 35361 }, { "epoch": 0.6463889447419892, "grad_norm": 5.261596887214942, "learning_rate": 2.9361611750129603e-06, "loss": 17.0449, "step": 35362 }, { "epoch": 0.6464072239384356, "grad_norm": 5.595066532779791, "learning_rate": 2.9358915605747813e-06, "loss": 17.3412, "step": 35363 }, { "epoch": 0.6464255031348822, "grad_norm": 7.177148518813061, "learning_rate": 2.9356219533711226e-06, "loss": 17.7128, "step": 35364 }, { "epoch": 0.6464437823313287, "grad_norm": 5.423723922900731, "learning_rate": 2.9353523534029282e-06, "loss": 17.0998, "step": 35365 }, { "epoch": 0.6464620615277752, "grad_norm": 7.296156718650987, "learning_rate": 2.935082760671147e-06, "loss": 17.5847, "step": 35366 }, { "epoch": 0.6464803407242218, "grad_norm": 6.312155058632379, "learning_rate": 2.9348131751767223e-06, "loss": 17.5925, "step": 35367 }, { "epoch": 0.6464986199206683, "grad_norm": 5.6211764760253855, "learning_rate": 2.9345435969205954e-06, "loss": 17.2474, "step": 35368 }, { "epoch": 0.6465168991171149, "grad_norm": 7.064399860298156, "learning_rate": 2.934274025903715e-06, "loss": 18.226, "step": 35369 }, { "epoch": 0.6465351783135613, "grad_norm": 6.15707973499282, "learning_rate": 2.9340044621270262e-06, "loss": 17.4212, "step": 35370 }, { "epoch": 0.6465534575100078, "grad_norm": 6.071357562446818, "learning_rate": 2.9337349055914702e-06, "loss": 17.2532, "step": 35371 }, { "epoch": 0.6465717367064544, "grad_norm": 5.908505198226479, "learning_rate": 2.9334653562979964e-06, "loss": 17.2184, "step": 35372 }, { "epoch": 0.6465900159029009, "grad_norm": 5.463185573935408, "learning_rate": 2.933195814247545e-06, "loss": 17.0411, "step": 35373 }, { "epoch": 0.6466082950993475, "grad_norm": 5.218145041643601, "learning_rate": 2.932926279441064e-06, "loss": 16.962, "step": 35374 }, { "epoch": 0.646626574295794, "grad_norm": 5.480087002366199, "learning_rate": 2.932656751879498e-06, "loss": 17.022, "step": 35375 }, { "epoch": 0.6466448534922404, "grad_norm": 6.455445827144824, "learning_rate": 2.9323872315637885e-06, "loss": 17.4927, "step": 35376 }, { "epoch": 0.646663132688687, "grad_norm": 7.398110940901796, "learning_rate": 2.9321177184948845e-06, "loss": 17.7956, "step": 35377 }, { "epoch": 0.6466814118851335, "grad_norm": 6.660862383298945, "learning_rate": 2.9318482126737275e-06, "loss": 17.5164, "step": 35378 }, { "epoch": 0.6466996910815801, "grad_norm": 6.4360388988707005, "learning_rate": 2.931578714101262e-06, "loss": 17.8522, "step": 35379 }, { "epoch": 0.6467179702780266, "grad_norm": 5.173861006731964, "learning_rate": 2.9313092227784356e-06, "loss": 17.0358, "step": 35380 }, { "epoch": 0.6467362494744731, "grad_norm": 7.0015609459103745, "learning_rate": 2.9310397387061908e-06, "loss": 17.6242, "step": 35381 }, { "epoch": 0.6467545286709196, "grad_norm": 5.868022185144883, "learning_rate": 2.93077026188547e-06, "loss": 17.0291, "step": 35382 }, { "epoch": 0.6467728078673661, "grad_norm": 6.159158810598736, "learning_rate": 2.930500792317221e-06, "loss": 17.4738, "step": 35383 }, { "epoch": 0.6467910870638127, "grad_norm": 6.757147280970844, "learning_rate": 2.930231330002388e-06, "loss": 17.6915, "step": 35384 }, { "epoch": 0.6468093662602592, "grad_norm": 7.695158605062081, "learning_rate": 2.9299618749419132e-06, "loss": 18.1332, "step": 35385 }, { "epoch": 0.6468276454567057, "grad_norm": 6.128309293954982, "learning_rate": 2.929692427136744e-06, "loss": 17.1055, "step": 35386 }, { "epoch": 0.6468459246531523, "grad_norm": 5.943870369668898, "learning_rate": 2.929422986587821e-06, "loss": 17.1771, "step": 35387 }, { "epoch": 0.6468642038495988, "grad_norm": 9.801395755922332, "learning_rate": 2.929153553296092e-06, "loss": 17.3353, "step": 35388 }, { "epoch": 0.6468824830460453, "grad_norm": 6.309816833296501, "learning_rate": 2.928884127262501e-06, "loss": 17.3727, "step": 35389 }, { "epoch": 0.6469007622424918, "grad_norm": 6.3902781510697455, "learning_rate": 2.9286147084879894e-06, "loss": 17.3111, "step": 35390 }, { "epoch": 0.6469190414389383, "grad_norm": 8.349411995221004, "learning_rate": 2.928345296973506e-06, "loss": 17.7931, "step": 35391 }, { "epoch": 0.6469373206353849, "grad_norm": 7.561240973654844, "learning_rate": 2.9280758927199905e-06, "loss": 17.3184, "step": 35392 }, { "epoch": 0.6469555998318314, "grad_norm": 5.023832263926158, "learning_rate": 2.927806495728389e-06, "loss": 16.9188, "step": 35393 }, { "epoch": 0.646973879028278, "grad_norm": 6.186281364726438, "learning_rate": 2.9275371059996477e-06, "loss": 17.5676, "step": 35394 }, { "epoch": 0.6469921582247244, "grad_norm": 5.737382131017685, "learning_rate": 2.927267723534708e-06, "loss": 17.2448, "step": 35395 }, { "epoch": 0.6470104374211709, "grad_norm": 5.261747141540277, "learning_rate": 2.9269983483345154e-06, "loss": 17.0035, "step": 35396 }, { "epoch": 0.6470287166176175, "grad_norm": 6.449402622790641, "learning_rate": 2.9267289804000134e-06, "loss": 17.3502, "step": 35397 }, { "epoch": 0.647046995814064, "grad_norm": 7.524400911673407, "learning_rate": 2.926459619732147e-06, "loss": 17.7759, "step": 35398 }, { "epoch": 0.6470652750105106, "grad_norm": 6.74430792968481, "learning_rate": 2.9261902663318587e-06, "loss": 17.6055, "step": 35399 }, { "epoch": 0.6470835542069571, "grad_norm": 5.738633376065995, "learning_rate": 2.9259209202000936e-06, "loss": 17.2659, "step": 35400 }, { "epoch": 0.6471018334034035, "grad_norm": 6.986640409030746, "learning_rate": 2.9256515813377974e-06, "loss": 17.6063, "step": 35401 }, { "epoch": 0.6471201125998501, "grad_norm": 7.527524451640346, "learning_rate": 2.9253822497459093e-06, "loss": 17.6448, "step": 35402 }, { "epoch": 0.6471383917962966, "grad_norm": 6.16683676083587, "learning_rate": 2.9251129254253795e-06, "loss": 17.0655, "step": 35403 }, { "epoch": 0.6471566709927432, "grad_norm": 6.989495374163701, "learning_rate": 2.924843608377147e-06, "loss": 17.5762, "step": 35404 }, { "epoch": 0.6471749501891897, "grad_norm": 5.371279568786172, "learning_rate": 2.9245742986021575e-06, "loss": 17.1246, "step": 35405 }, { "epoch": 0.6471932293856362, "grad_norm": 6.870187689087504, "learning_rate": 2.924304996101357e-06, "loss": 17.2741, "step": 35406 }, { "epoch": 0.6472115085820828, "grad_norm": 7.293536416001514, "learning_rate": 2.924035700875685e-06, "loss": 17.419, "step": 35407 }, { "epoch": 0.6472297877785292, "grad_norm": 5.698389251946523, "learning_rate": 2.9237664129260895e-06, "loss": 17.154, "step": 35408 }, { "epoch": 0.6472480669749758, "grad_norm": 6.5391460078878, "learning_rate": 2.9234971322535117e-06, "loss": 17.2717, "step": 35409 }, { "epoch": 0.6472663461714223, "grad_norm": 7.513125950073849, "learning_rate": 2.923227858858895e-06, "loss": 17.5459, "step": 35410 }, { "epoch": 0.6472846253678688, "grad_norm": 6.9209072575184365, "learning_rate": 2.922958592743187e-06, "loss": 17.6304, "step": 35411 }, { "epoch": 0.6473029045643154, "grad_norm": 8.22597937370989, "learning_rate": 2.9226893339073287e-06, "loss": 18.0403, "step": 35412 }, { "epoch": 0.6473211837607619, "grad_norm": 5.473391996770817, "learning_rate": 2.922420082352262e-06, "loss": 17.0271, "step": 35413 }, { "epoch": 0.6473394629572085, "grad_norm": 7.031994901280367, "learning_rate": 2.9221508380789332e-06, "loss": 18.015, "step": 35414 }, { "epoch": 0.6473577421536549, "grad_norm": 7.504038126836175, "learning_rate": 2.9218816010882867e-06, "loss": 17.5101, "step": 35415 }, { "epoch": 0.6473760213501014, "grad_norm": 4.951357062975143, "learning_rate": 2.9216123713812633e-06, "loss": 16.7341, "step": 35416 }, { "epoch": 0.647394300546548, "grad_norm": 5.747122930684423, "learning_rate": 2.9213431489588097e-06, "loss": 17.0271, "step": 35417 }, { "epoch": 0.6474125797429945, "grad_norm": 5.305954917385424, "learning_rate": 2.9210739338218664e-06, "loss": 17.1268, "step": 35418 }, { "epoch": 0.647430858939441, "grad_norm": 6.754424208229969, "learning_rate": 2.920804725971379e-06, "loss": 17.5523, "step": 35419 }, { "epoch": 0.6474491381358876, "grad_norm": 4.915624777825416, "learning_rate": 2.9205355254082917e-06, "loss": 16.8129, "step": 35420 }, { "epoch": 0.647467417332334, "grad_norm": 7.928420237694938, "learning_rate": 2.9202663321335454e-06, "loss": 18.2204, "step": 35421 }, { "epoch": 0.6474856965287806, "grad_norm": 5.87494365859713, "learning_rate": 2.919997146148087e-06, "loss": 17.1747, "step": 35422 }, { "epoch": 0.6475039757252271, "grad_norm": 5.572993371534455, "learning_rate": 2.9197279674528565e-06, "loss": 17.0662, "step": 35423 }, { "epoch": 0.6475222549216736, "grad_norm": 6.953645373558284, "learning_rate": 2.9194587960487986e-06, "loss": 17.1551, "step": 35424 }, { "epoch": 0.6475405341181202, "grad_norm": 6.968948517837646, "learning_rate": 2.9191896319368596e-06, "loss": 17.9372, "step": 35425 }, { "epoch": 0.6475588133145667, "grad_norm": 5.438662131541587, "learning_rate": 2.9189204751179777e-06, "loss": 16.886, "step": 35426 }, { "epoch": 0.6475770925110133, "grad_norm": 6.532061710071742, "learning_rate": 2.9186513255930993e-06, "loss": 17.3531, "step": 35427 }, { "epoch": 0.6475953717074597, "grad_norm": 7.518532780606936, "learning_rate": 2.9183821833631685e-06, "loss": 17.7887, "step": 35428 }, { "epoch": 0.6476136509039062, "grad_norm": 9.710676608917861, "learning_rate": 2.918113048429129e-06, "loss": 17.8816, "step": 35429 }, { "epoch": 0.6476319301003528, "grad_norm": 5.213320750875011, "learning_rate": 2.917843920791919e-06, "loss": 17.031, "step": 35430 }, { "epoch": 0.6476502092967993, "grad_norm": 6.875163326747664, "learning_rate": 2.9175748004524883e-06, "loss": 17.3765, "step": 35431 }, { "epoch": 0.6476684884932459, "grad_norm": 6.446011958995466, "learning_rate": 2.9173056874117745e-06, "loss": 17.518, "step": 35432 }, { "epoch": 0.6476867676896924, "grad_norm": 6.846577283070364, "learning_rate": 2.917036581670726e-06, "loss": 17.5509, "step": 35433 }, { "epoch": 0.6477050468861388, "grad_norm": 6.38489134202527, "learning_rate": 2.9167674832302816e-06, "loss": 17.5968, "step": 35434 }, { "epoch": 0.6477233260825854, "grad_norm": 6.179890144032776, "learning_rate": 2.916498392091388e-06, "loss": 17.4418, "step": 35435 }, { "epoch": 0.6477416052790319, "grad_norm": 7.85248515218523, "learning_rate": 2.9162293082549854e-06, "loss": 17.9272, "step": 35436 }, { "epoch": 0.6477598844754785, "grad_norm": 7.681367483434305, "learning_rate": 2.9159602317220197e-06, "loss": 17.7249, "step": 35437 }, { "epoch": 0.647778163671925, "grad_norm": 6.604285852496364, "learning_rate": 2.915691162493431e-06, "loss": 17.4344, "step": 35438 }, { "epoch": 0.6477964428683715, "grad_norm": 7.182132244618048, "learning_rate": 2.9154221005701656e-06, "loss": 17.7704, "step": 35439 }, { "epoch": 0.647814722064818, "grad_norm": 7.327781923431533, "learning_rate": 2.9151530459531648e-06, "loss": 17.9216, "step": 35440 }, { "epoch": 0.6478330012612645, "grad_norm": 6.726360924216336, "learning_rate": 2.91488399864337e-06, "loss": 17.3204, "step": 35441 }, { "epoch": 0.6478512804577111, "grad_norm": 6.770326392626079, "learning_rate": 2.9146149586417273e-06, "loss": 17.4546, "step": 35442 }, { "epoch": 0.6478695596541576, "grad_norm": 6.241469628441006, "learning_rate": 2.9143459259491765e-06, "loss": 17.4631, "step": 35443 }, { "epoch": 0.6478878388506041, "grad_norm": 6.112657226332845, "learning_rate": 2.9140769005666623e-06, "loss": 17.315, "step": 35444 }, { "epoch": 0.6479061180470507, "grad_norm": 5.586675958708298, "learning_rate": 2.9138078824951293e-06, "loss": 17.2029, "step": 35445 }, { "epoch": 0.6479243972434972, "grad_norm": 6.713303962117302, "learning_rate": 2.913538871735519e-06, "loss": 17.6336, "step": 35446 }, { "epoch": 0.6479426764399437, "grad_norm": 5.819092165542204, "learning_rate": 2.913269868288772e-06, "loss": 17.1832, "step": 35447 }, { "epoch": 0.6479609556363902, "grad_norm": 5.710250780814965, "learning_rate": 2.9130008721558347e-06, "loss": 17.0795, "step": 35448 }, { "epoch": 0.6479792348328367, "grad_norm": 5.103262693653258, "learning_rate": 2.912731883337646e-06, "loss": 16.8561, "step": 35449 }, { "epoch": 0.6479975140292833, "grad_norm": 4.98727862746149, "learning_rate": 2.9124629018351534e-06, "loss": 16.9341, "step": 35450 }, { "epoch": 0.6480157932257298, "grad_norm": 5.965786388396482, "learning_rate": 2.9121939276492973e-06, "loss": 17.4299, "step": 35451 }, { "epoch": 0.6480340724221764, "grad_norm": 7.280319651981325, "learning_rate": 2.9119249607810172e-06, "loss": 17.8777, "step": 35452 }, { "epoch": 0.6480523516186228, "grad_norm": 6.9741300630017555, "learning_rate": 2.91165600123126e-06, "loss": 17.7761, "step": 35453 }, { "epoch": 0.6480706308150693, "grad_norm": 7.397326055641157, "learning_rate": 2.9113870490009687e-06, "loss": 17.9628, "step": 35454 }, { "epoch": 0.6480889100115159, "grad_norm": 5.995324811053167, "learning_rate": 2.911118104091083e-06, "loss": 17.4456, "step": 35455 }, { "epoch": 0.6481071892079624, "grad_norm": 5.320341500239527, "learning_rate": 2.9108491665025497e-06, "loss": 17.1897, "step": 35456 }, { "epoch": 0.648125468404409, "grad_norm": 6.878863219238092, "learning_rate": 2.910580236236308e-06, "loss": 17.665, "step": 35457 }, { "epoch": 0.6481437476008555, "grad_norm": 6.345459750873713, "learning_rate": 2.9103113132932993e-06, "loss": 17.5238, "step": 35458 }, { "epoch": 0.648162026797302, "grad_norm": 7.421130045516618, "learning_rate": 2.9100423976744695e-06, "loss": 17.8732, "step": 35459 }, { "epoch": 0.6481803059937485, "grad_norm": 5.593576349961242, "learning_rate": 2.9097734893807607e-06, "loss": 17.0204, "step": 35460 }, { "epoch": 0.648198585190195, "grad_norm": 6.069167727371063, "learning_rate": 2.9095045884131123e-06, "loss": 17.2694, "step": 35461 }, { "epoch": 0.6482168643866416, "grad_norm": 6.205298230267616, "learning_rate": 2.9092356947724688e-06, "loss": 17.4502, "step": 35462 }, { "epoch": 0.6482351435830881, "grad_norm": 5.731025956415138, "learning_rate": 2.9089668084597723e-06, "loss": 17.2681, "step": 35463 }, { "epoch": 0.6482534227795346, "grad_norm": 6.471397172836508, "learning_rate": 2.9086979294759675e-06, "loss": 17.6069, "step": 35464 }, { "epoch": 0.6482717019759812, "grad_norm": 5.566583544718535, "learning_rate": 2.9084290578219966e-06, "loss": 17.1854, "step": 35465 }, { "epoch": 0.6482899811724276, "grad_norm": 6.745466002168102, "learning_rate": 2.908160193498797e-06, "loss": 17.6317, "step": 35466 }, { "epoch": 0.6483082603688742, "grad_norm": 6.777880839050425, "learning_rate": 2.9078913365073165e-06, "loss": 17.5591, "step": 35467 }, { "epoch": 0.6483265395653207, "grad_norm": 6.802625858994343, "learning_rate": 2.9076224868484953e-06, "loss": 17.881, "step": 35468 }, { "epoch": 0.6483448187617672, "grad_norm": 6.184481996607486, "learning_rate": 2.9073536445232744e-06, "loss": 17.2287, "step": 35469 }, { "epoch": 0.6483630979582138, "grad_norm": 6.296074164016475, "learning_rate": 2.9070848095325986e-06, "loss": 17.3482, "step": 35470 }, { "epoch": 0.6483813771546603, "grad_norm": 6.751120321829126, "learning_rate": 2.9068159818774072e-06, "loss": 17.2302, "step": 35471 }, { "epoch": 0.6483996563511069, "grad_norm": 5.583175270786183, "learning_rate": 2.9065471615586444e-06, "loss": 17.2711, "step": 35472 }, { "epoch": 0.6484179355475533, "grad_norm": 8.459318254336274, "learning_rate": 2.906278348577254e-06, "loss": 18.4364, "step": 35473 }, { "epoch": 0.6484362147439998, "grad_norm": 6.064338322163282, "learning_rate": 2.906009542934176e-06, "loss": 17.4162, "step": 35474 }, { "epoch": 0.6484544939404464, "grad_norm": 6.301985838338826, "learning_rate": 2.905740744630351e-06, "loss": 17.5067, "step": 35475 }, { "epoch": 0.6484727731368929, "grad_norm": 4.997325433358374, "learning_rate": 2.9054719536667252e-06, "loss": 16.9106, "step": 35476 }, { "epoch": 0.6484910523333395, "grad_norm": 5.840072113984224, "learning_rate": 2.9052031700442384e-06, "loss": 16.9749, "step": 35477 }, { "epoch": 0.648509331529786, "grad_norm": 7.640558478341128, "learning_rate": 2.9049343937638302e-06, "loss": 17.8156, "step": 35478 }, { "epoch": 0.6485276107262324, "grad_norm": 6.9361194634133705, "learning_rate": 2.9046656248264476e-06, "loss": 17.4061, "step": 35479 }, { "epoch": 0.648545889922679, "grad_norm": 6.242033939122857, "learning_rate": 2.9043968632330283e-06, "loss": 17.3526, "step": 35480 }, { "epoch": 0.6485641691191255, "grad_norm": 9.647081341749923, "learning_rate": 2.904128108984516e-06, "loss": 18.8969, "step": 35481 }, { "epoch": 0.6485824483155721, "grad_norm": 7.100748528500648, "learning_rate": 2.9038593620818543e-06, "loss": 17.5736, "step": 35482 }, { "epoch": 0.6486007275120186, "grad_norm": 5.871480527536444, "learning_rate": 2.9035906225259826e-06, "loss": 17.2952, "step": 35483 }, { "epoch": 0.6486190067084651, "grad_norm": 5.135364683559539, "learning_rate": 2.903321890317845e-06, "loss": 17.0112, "step": 35484 }, { "epoch": 0.6486372859049117, "grad_norm": 6.093812665260869, "learning_rate": 2.9030531654583825e-06, "loss": 17.1356, "step": 35485 }, { "epoch": 0.6486555651013581, "grad_norm": 7.685273355610959, "learning_rate": 2.902784447948535e-06, "loss": 17.8735, "step": 35486 }, { "epoch": 0.6486738442978046, "grad_norm": 5.237494827569752, "learning_rate": 2.9025157377892475e-06, "loss": 16.8461, "step": 35487 }, { "epoch": 0.6486921234942512, "grad_norm": 4.799029235201812, "learning_rate": 2.9022470349814593e-06, "loss": 16.824, "step": 35488 }, { "epoch": 0.6487104026906977, "grad_norm": 4.958258931552563, "learning_rate": 2.901978339526114e-06, "loss": 17.0315, "step": 35489 }, { "epoch": 0.6487286818871443, "grad_norm": 6.099316263534052, "learning_rate": 2.901709651424151e-06, "loss": 17.1149, "step": 35490 }, { "epoch": 0.6487469610835908, "grad_norm": 7.904139862993792, "learning_rate": 2.9014409706765155e-06, "loss": 17.4673, "step": 35491 }, { "epoch": 0.6487652402800372, "grad_norm": 5.777010264581875, "learning_rate": 2.901172297284146e-06, "loss": 17.256, "step": 35492 }, { "epoch": 0.6487835194764838, "grad_norm": 10.304327655445398, "learning_rate": 2.9009036312479865e-06, "loss": 18.8598, "step": 35493 }, { "epoch": 0.6488017986729303, "grad_norm": 5.047544849030969, "learning_rate": 2.900634972568976e-06, "loss": 16.8132, "step": 35494 }, { "epoch": 0.6488200778693769, "grad_norm": 8.4145183250547, "learning_rate": 2.9003663212480603e-06, "loss": 18.2658, "step": 35495 }, { "epoch": 0.6488383570658234, "grad_norm": 7.550294190654494, "learning_rate": 2.9000976772861776e-06, "loss": 17.7575, "step": 35496 }, { "epoch": 0.6488566362622699, "grad_norm": 5.891228435750571, "learning_rate": 2.899829040684269e-06, "loss": 17.1772, "step": 35497 }, { "epoch": 0.6488749154587165, "grad_norm": 7.396091195398098, "learning_rate": 2.8995604114432767e-06, "loss": 17.6726, "step": 35498 }, { "epoch": 0.6488931946551629, "grad_norm": 6.587947363632412, "learning_rate": 2.8992917895641452e-06, "loss": 17.6611, "step": 35499 }, { "epoch": 0.6489114738516095, "grad_norm": 6.65788587780425, "learning_rate": 2.899023175047812e-06, "loss": 17.3785, "step": 35500 }, { "epoch": 0.648929753048056, "grad_norm": 8.02929623245269, "learning_rate": 2.898754567895222e-06, "loss": 18.0776, "step": 35501 }, { "epoch": 0.6489480322445025, "grad_norm": 5.566734885525092, "learning_rate": 2.898485968107315e-06, "loss": 17.146, "step": 35502 }, { "epoch": 0.6489663114409491, "grad_norm": 8.713808318982414, "learning_rate": 2.8982173756850304e-06, "loss": 17.2882, "step": 35503 }, { "epoch": 0.6489845906373956, "grad_norm": 6.539510859700215, "learning_rate": 2.897948790629313e-06, "loss": 17.4408, "step": 35504 }, { "epoch": 0.6490028698338421, "grad_norm": 6.783049424816389, "learning_rate": 2.897680212941103e-06, "loss": 17.5676, "step": 35505 }, { "epoch": 0.6490211490302886, "grad_norm": 6.263018102002706, "learning_rate": 2.8974116426213394e-06, "loss": 17.3123, "step": 35506 }, { "epoch": 0.6490394282267351, "grad_norm": 6.0569881607106, "learning_rate": 2.8971430796709656e-06, "loss": 17.1894, "step": 35507 }, { "epoch": 0.6490577074231817, "grad_norm": 6.222971815014908, "learning_rate": 2.896874524090925e-06, "loss": 17.5116, "step": 35508 }, { "epoch": 0.6490759866196282, "grad_norm": 8.060369239690583, "learning_rate": 2.896605975882154e-06, "loss": 18.3254, "step": 35509 }, { "epoch": 0.6490942658160748, "grad_norm": 7.550960011923296, "learning_rate": 2.8963374350455992e-06, "loss": 17.7502, "step": 35510 }, { "epoch": 0.6491125450125212, "grad_norm": 5.925555729239218, "learning_rate": 2.896068901582197e-06, "loss": 17.3404, "step": 35511 }, { "epoch": 0.6491308242089677, "grad_norm": 9.00632564305065, "learning_rate": 2.8958003754928928e-06, "loss": 18.5031, "step": 35512 }, { "epoch": 0.6491491034054143, "grad_norm": 6.765104286134261, "learning_rate": 2.895531856778625e-06, "loss": 17.395, "step": 35513 }, { "epoch": 0.6491673826018608, "grad_norm": 6.876472125242521, "learning_rate": 2.8952633454403347e-06, "loss": 18.0613, "step": 35514 }, { "epoch": 0.6491856617983074, "grad_norm": 5.777394582589037, "learning_rate": 2.8949948414789646e-06, "loss": 17.2722, "step": 35515 }, { "epoch": 0.6492039409947539, "grad_norm": 5.495534618445704, "learning_rate": 2.8947263448954534e-06, "loss": 17.2569, "step": 35516 }, { "epoch": 0.6492222201912003, "grad_norm": 7.269427424577125, "learning_rate": 2.894457855690743e-06, "loss": 17.4599, "step": 35517 }, { "epoch": 0.6492404993876469, "grad_norm": 5.438077674389012, "learning_rate": 2.8941893738657783e-06, "loss": 17.1983, "step": 35518 }, { "epoch": 0.6492587785840934, "grad_norm": 5.02440503455309, "learning_rate": 2.8939208994214963e-06, "loss": 16.906, "step": 35519 }, { "epoch": 0.64927705778054, "grad_norm": 7.251680645209689, "learning_rate": 2.893652432358837e-06, "loss": 18.0818, "step": 35520 }, { "epoch": 0.6492953369769865, "grad_norm": 7.9710341116111945, "learning_rate": 2.8933839726787453e-06, "loss": 17.9804, "step": 35521 }, { "epoch": 0.649313616173433, "grad_norm": 4.992612053647303, "learning_rate": 2.8931155203821604e-06, "loss": 16.8609, "step": 35522 }, { "epoch": 0.6493318953698796, "grad_norm": 5.548835223841461, "learning_rate": 2.89284707547002e-06, "loss": 17.0664, "step": 35523 }, { "epoch": 0.649350174566326, "grad_norm": 5.74034358676206, "learning_rate": 2.89257863794327e-06, "loss": 17.362, "step": 35524 }, { "epoch": 0.6493684537627726, "grad_norm": 5.418971867184205, "learning_rate": 2.8923102078028463e-06, "loss": 17.2298, "step": 35525 }, { "epoch": 0.6493867329592191, "grad_norm": 5.201649941858452, "learning_rate": 2.8920417850496936e-06, "loss": 17.0357, "step": 35526 }, { "epoch": 0.6494050121556656, "grad_norm": 6.40996634261075, "learning_rate": 2.8917733696847527e-06, "loss": 17.5204, "step": 35527 }, { "epoch": 0.6494232913521122, "grad_norm": 7.003827965699373, "learning_rate": 2.8915049617089618e-06, "loss": 17.9372, "step": 35528 }, { "epoch": 0.6494415705485587, "grad_norm": 5.816573519995736, "learning_rate": 2.8912365611232652e-06, "loss": 16.9863, "step": 35529 }, { "epoch": 0.6494598497450053, "grad_norm": 5.26416042338613, "learning_rate": 2.8909681679286007e-06, "loss": 16.8422, "step": 35530 }, { "epoch": 0.6494781289414517, "grad_norm": 6.263806615639306, "learning_rate": 2.8906997821259076e-06, "loss": 17.3687, "step": 35531 }, { "epoch": 0.6494964081378982, "grad_norm": 6.427809751009618, "learning_rate": 2.890431403716132e-06, "loss": 17.584, "step": 35532 }, { "epoch": 0.6495146873343448, "grad_norm": 5.8464556450900975, "learning_rate": 2.89016303270021e-06, "loss": 17.4042, "step": 35533 }, { "epoch": 0.6495329665307913, "grad_norm": 6.430849436778093, "learning_rate": 2.889894669079082e-06, "loss": 17.8259, "step": 35534 }, { "epoch": 0.6495512457272379, "grad_norm": 5.118513575923059, "learning_rate": 2.88962631285369e-06, "loss": 16.8912, "step": 35535 }, { "epoch": 0.6495695249236844, "grad_norm": 5.987105685367409, "learning_rate": 2.889357964024977e-06, "loss": 17.2287, "step": 35536 }, { "epoch": 0.6495878041201308, "grad_norm": 4.614533212667931, "learning_rate": 2.8890896225938788e-06, "loss": 16.6945, "step": 35537 }, { "epoch": 0.6496060833165774, "grad_norm": 5.940119208790511, "learning_rate": 2.8888212885613397e-06, "loss": 17.4266, "step": 35538 }, { "epoch": 0.6496243625130239, "grad_norm": 5.377093467942546, "learning_rate": 2.8885529619283e-06, "loss": 17.1023, "step": 35539 }, { "epoch": 0.6496426417094705, "grad_norm": 7.074222486596506, "learning_rate": 2.888284642695696e-06, "loss": 17.5371, "step": 35540 }, { "epoch": 0.649660920905917, "grad_norm": 6.0325599790859386, "learning_rate": 2.8880163308644736e-06, "loss": 17.6485, "step": 35541 }, { "epoch": 0.6496792001023635, "grad_norm": 7.78201422987223, "learning_rate": 2.8877480264355683e-06, "loss": 18.1281, "step": 35542 }, { "epoch": 0.64969747929881, "grad_norm": 5.752051193764012, "learning_rate": 2.8874797294099253e-06, "loss": 17.5761, "step": 35543 }, { "epoch": 0.6497157584952565, "grad_norm": 5.469798641791528, "learning_rate": 2.8872114397884803e-06, "loss": 17.1217, "step": 35544 }, { "epoch": 0.6497340376917031, "grad_norm": 6.776898230207098, "learning_rate": 2.8869431575721763e-06, "loss": 17.3295, "step": 35545 }, { "epoch": 0.6497523168881496, "grad_norm": 7.416994354738682, "learning_rate": 2.8866748827619544e-06, "loss": 17.8998, "step": 35546 }, { "epoch": 0.6497705960845961, "grad_norm": 5.839075837262602, "learning_rate": 2.8864066153587544e-06, "loss": 17.0368, "step": 35547 }, { "epoch": 0.6497888752810427, "grad_norm": 6.30432006671727, "learning_rate": 2.8861383553635136e-06, "loss": 17.156, "step": 35548 }, { "epoch": 0.6498071544774892, "grad_norm": 6.5152481220985985, "learning_rate": 2.8858701027771764e-06, "loss": 17.5539, "step": 35549 }, { "epoch": 0.6498254336739357, "grad_norm": 6.245523621173136, "learning_rate": 2.8856018576006804e-06, "loss": 17.4083, "step": 35550 }, { "epoch": 0.6498437128703822, "grad_norm": 4.441337941025941, "learning_rate": 2.8853336198349646e-06, "loss": 16.7402, "step": 35551 }, { "epoch": 0.6498619920668287, "grad_norm": 8.437887409177428, "learning_rate": 2.885065389480974e-06, "loss": 17.7121, "step": 35552 }, { "epoch": 0.6498802712632753, "grad_norm": 6.1936143282887794, "learning_rate": 2.8847971665396426e-06, "loss": 17.2325, "step": 35553 }, { "epoch": 0.6498985504597218, "grad_norm": 5.694374978132746, "learning_rate": 2.884528951011914e-06, "loss": 17.3242, "step": 35554 }, { "epoch": 0.6499168296561683, "grad_norm": 5.000423106439059, "learning_rate": 2.88426074289873e-06, "loss": 16.929, "step": 35555 }, { "epoch": 0.6499351088526149, "grad_norm": 6.052213690981256, "learning_rate": 2.883992542201026e-06, "loss": 17.2918, "step": 35556 }, { "epoch": 0.6499533880490613, "grad_norm": 6.480966481255095, "learning_rate": 2.8837243489197467e-06, "loss": 17.2781, "step": 35557 }, { "epoch": 0.6499716672455079, "grad_norm": 5.761752202158687, "learning_rate": 2.88345616305583e-06, "loss": 17.3895, "step": 35558 }, { "epoch": 0.6499899464419544, "grad_norm": 5.280789971777983, "learning_rate": 2.8831879846102138e-06, "loss": 17.0665, "step": 35559 }, { "epoch": 0.6500082256384009, "grad_norm": 7.40235483643179, "learning_rate": 2.882919813583842e-06, "loss": 17.8292, "step": 35560 }, { "epoch": 0.6500265048348475, "grad_norm": 7.176876466937413, "learning_rate": 2.882651649977651e-06, "loss": 17.8912, "step": 35561 }, { "epoch": 0.650044784031294, "grad_norm": 7.3263580982316725, "learning_rate": 2.882383493792584e-06, "loss": 17.8616, "step": 35562 }, { "epoch": 0.6500630632277405, "grad_norm": 6.841501215820634, "learning_rate": 2.882115345029577e-06, "loss": 17.6435, "step": 35563 }, { "epoch": 0.650081342424187, "grad_norm": 5.828220135088407, "learning_rate": 2.881847203689574e-06, "loss": 17.1263, "step": 35564 }, { "epoch": 0.6500996216206335, "grad_norm": 6.182863482141548, "learning_rate": 2.8815790697735103e-06, "loss": 17.4657, "step": 35565 }, { "epoch": 0.6501179008170801, "grad_norm": 6.518297886295182, "learning_rate": 2.8813109432823303e-06, "loss": 17.6295, "step": 35566 }, { "epoch": 0.6501361800135266, "grad_norm": 5.076186192648803, "learning_rate": 2.881042824216972e-06, "loss": 17.0114, "step": 35567 }, { "epoch": 0.6501544592099732, "grad_norm": 6.897556810245408, "learning_rate": 2.8807747125783725e-06, "loss": 17.6165, "step": 35568 }, { "epoch": 0.6501727384064196, "grad_norm": 5.967289156565216, "learning_rate": 2.8805066083674755e-06, "loss": 17.2351, "step": 35569 }, { "epoch": 0.6501910176028661, "grad_norm": 5.333914897456584, "learning_rate": 2.880238511585217e-06, "loss": 17.0091, "step": 35570 }, { "epoch": 0.6502092967993127, "grad_norm": 6.1696318198042555, "learning_rate": 2.8799704222325385e-06, "loss": 17.469, "step": 35571 }, { "epoch": 0.6502275759957592, "grad_norm": 6.356212926700128, "learning_rate": 2.879702340310382e-06, "loss": 17.3864, "step": 35572 }, { "epoch": 0.6502458551922058, "grad_norm": 6.364793986959347, "learning_rate": 2.8794342658196827e-06, "loss": 17.4924, "step": 35573 }, { "epoch": 0.6502641343886523, "grad_norm": 5.9901333274248785, "learning_rate": 2.8791661987613847e-06, "loss": 17.4187, "step": 35574 }, { "epoch": 0.6502824135850988, "grad_norm": 5.282735649733364, "learning_rate": 2.8788981391364246e-06, "loss": 17.015, "step": 35575 }, { "epoch": 0.6503006927815453, "grad_norm": 6.983775286062973, "learning_rate": 2.8786300869457406e-06, "loss": 17.5892, "step": 35576 }, { "epoch": 0.6503189719779918, "grad_norm": 6.903192659216017, "learning_rate": 2.8783620421902758e-06, "loss": 18.0002, "step": 35577 }, { "epoch": 0.6503372511744384, "grad_norm": 7.267101453320588, "learning_rate": 2.8780940048709683e-06, "loss": 17.9994, "step": 35578 }, { "epoch": 0.6503555303708849, "grad_norm": 6.771026929606014, "learning_rate": 2.877825974988755e-06, "loss": 17.8684, "step": 35579 }, { "epoch": 0.6503738095673314, "grad_norm": 5.580390699850638, "learning_rate": 2.8775579525445775e-06, "loss": 17.0735, "step": 35580 }, { "epoch": 0.650392088763778, "grad_norm": 7.320529891038735, "learning_rate": 2.8772899375393772e-06, "loss": 17.7263, "step": 35581 }, { "epoch": 0.6504103679602244, "grad_norm": 5.785546132406308, "learning_rate": 2.87702192997409e-06, "loss": 17.0985, "step": 35582 }, { "epoch": 0.650428647156671, "grad_norm": 6.972543784244299, "learning_rate": 2.8767539298496577e-06, "loss": 17.7765, "step": 35583 }, { "epoch": 0.6504469263531175, "grad_norm": 4.6123090171915075, "learning_rate": 2.8764859371670197e-06, "loss": 16.6634, "step": 35584 }, { "epoch": 0.650465205549564, "grad_norm": 5.583052491669674, "learning_rate": 2.876217951927111e-06, "loss": 16.9599, "step": 35585 }, { "epoch": 0.6504834847460106, "grad_norm": 6.716407086750987, "learning_rate": 2.8759499741308773e-06, "loss": 17.7861, "step": 35586 }, { "epoch": 0.6505017639424571, "grad_norm": 5.466573870984956, "learning_rate": 2.875682003779252e-06, "loss": 17.328, "step": 35587 }, { "epoch": 0.6505200431389037, "grad_norm": 5.785707558317498, "learning_rate": 2.875414040873179e-06, "loss": 17.2147, "step": 35588 }, { "epoch": 0.6505383223353501, "grad_norm": 4.983573602611158, "learning_rate": 2.875146085413593e-06, "loss": 16.9544, "step": 35589 }, { "epoch": 0.6505566015317966, "grad_norm": 7.321116506438608, "learning_rate": 2.874878137401436e-06, "loss": 17.9123, "step": 35590 }, { "epoch": 0.6505748807282432, "grad_norm": 5.157222046779293, "learning_rate": 2.8746101968376488e-06, "loss": 16.9682, "step": 35591 }, { "epoch": 0.6505931599246897, "grad_norm": 6.818224786030641, "learning_rate": 2.8743422637231686e-06, "loss": 17.1949, "step": 35592 }, { "epoch": 0.6506114391211363, "grad_norm": 7.527301688321582, "learning_rate": 2.874074338058932e-06, "loss": 17.8961, "step": 35593 }, { "epoch": 0.6506297183175828, "grad_norm": 6.790456801702847, "learning_rate": 2.8738064198458827e-06, "loss": 17.6378, "step": 35594 }, { "epoch": 0.6506479975140292, "grad_norm": 6.007502012719954, "learning_rate": 2.8735385090849565e-06, "loss": 17.2441, "step": 35595 }, { "epoch": 0.6506662767104758, "grad_norm": 7.594459563823168, "learning_rate": 2.8732706057770922e-06, "loss": 17.7229, "step": 35596 }, { "epoch": 0.6506845559069223, "grad_norm": 6.4813521361323, "learning_rate": 2.8730027099232314e-06, "loss": 17.5011, "step": 35597 }, { "epoch": 0.6507028351033689, "grad_norm": 6.875757919745454, "learning_rate": 2.87273482152431e-06, "loss": 17.358, "step": 35598 }, { "epoch": 0.6507211142998154, "grad_norm": 6.516561207004453, "learning_rate": 2.8724669405812683e-06, "loss": 17.5178, "step": 35599 }, { "epoch": 0.6507393934962619, "grad_norm": 6.004464309414508, "learning_rate": 2.872199067095047e-06, "loss": 17.3192, "step": 35600 }, { "epoch": 0.6507576726927085, "grad_norm": 7.294102720667156, "learning_rate": 2.8719312010665823e-06, "loss": 17.9333, "step": 35601 }, { "epoch": 0.6507759518891549, "grad_norm": 6.179449260625224, "learning_rate": 2.8716633424968155e-06, "loss": 17.3195, "step": 35602 }, { "epoch": 0.6507942310856015, "grad_norm": 6.073492209814013, "learning_rate": 2.8713954913866837e-06, "loss": 17.2532, "step": 35603 }, { "epoch": 0.650812510282048, "grad_norm": 5.1324981557733516, "learning_rate": 2.871127647737124e-06, "loss": 17.1795, "step": 35604 }, { "epoch": 0.6508307894784945, "grad_norm": 6.052803121686026, "learning_rate": 2.8708598115490793e-06, "loss": 17.3026, "step": 35605 }, { "epoch": 0.6508490686749411, "grad_norm": 5.560236650368909, "learning_rate": 2.8705919828234863e-06, "loss": 17.3621, "step": 35606 }, { "epoch": 0.6508673478713876, "grad_norm": 7.03390566106924, "learning_rate": 2.8703241615612815e-06, "loss": 17.7512, "step": 35607 }, { "epoch": 0.6508856270678341, "grad_norm": 5.853306312583265, "learning_rate": 2.870056347763406e-06, "loss": 17.1941, "step": 35608 }, { "epoch": 0.6509039062642806, "grad_norm": 6.712637638052952, "learning_rate": 2.8697885414307996e-06, "loss": 17.5319, "step": 35609 }, { "epoch": 0.6509221854607271, "grad_norm": 6.646257642324859, "learning_rate": 2.869520742564398e-06, "loss": 17.6316, "step": 35610 }, { "epoch": 0.6509404646571737, "grad_norm": 6.052153168216554, "learning_rate": 2.869252951165143e-06, "loss": 17.2283, "step": 35611 }, { "epoch": 0.6509587438536202, "grad_norm": 6.5527740687170875, "learning_rate": 2.8689851672339717e-06, "loss": 17.1294, "step": 35612 }, { "epoch": 0.6509770230500668, "grad_norm": 7.194958746207188, "learning_rate": 2.86871739077182e-06, "loss": 17.7369, "step": 35613 }, { "epoch": 0.6509953022465133, "grad_norm": 8.24480411280183, "learning_rate": 2.8684496217796322e-06, "loss": 17.9834, "step": 35614 }, { "epoch": 0.6510135814429597, "grad_norm": 6.5733997179958035, "learning_rate": 2.86818186025834e-06, "loss": 16.964, "step": 35615 }, { "epoch": 0.6510318606394063, "grad_norm": 6.649316983890445, "learning_rate": 2.867914106208888e-06, "loss": 17.7, "step": 35616 }, { "epoch": 0.6510501398358528, "grad_norm": 6.251632099191899, "learning_rate": 2.86764635963221e-06, "loss": 17.1038, "step": 35617 }, { "epoch": 0.6510684190322994, "grad_norm": 6.920918317281554, "learning_rate": 2.8673786205292463e-06, "loss": 17.6063, "step": 35618 }, { "epoch": 0.6510866982287459, "grad_norm": 6.251275721792743, "learning_rate": 2.867110888900938e-06, "loss": 17.7858, "step": 35619 }, { "epoch": 0.6511049774251924, "grad_norm": 7.03955816561744, "learning_rate": 2.8668431647482207e-06, "loss": 17.3038, "step": 35620 }, { "epoch": 0.651123256621639, "grad_norm": 5.079949343389466, "learning_rate": 2.866575448072031e-06, "loss": 17.0773, "step": 35621 }, { "epoch": 0.6511415358180854, "grad_norm": 6.211424163672594, "learning_rate": 2.866307738873311e-06, "loss": 17.3623, "step": 35622 }, { "epoch": 0.6511598150145319, "grad_norm": 5.434278879253516, "learning_rate": 2.8660400371529975e-06, "loss": 17.2629, "step": 35623 }, { "epoch": 0.6511780942109785, "grad_norm": 7.322929535385251, "learning_rate": 2.8657723429120266e-06, "loss": 17.4054, "step": 35624 }, { "epoch": 0.651196373407425, "grad_norm": 6.6318661508038375, "learning_rate": 2.8655046561513384e-06, "loss": 17.5439, "step": 35625 }, { "epoch": 0.6512146526038716, "grad_norm": 5.373838490914966, "learning_rate": 2.865236976871873e-06, "loss": 17.063, "step": 35626 }, { "epoch": 0.651232931800318, "grad_norm": 5.655428752442359, "learning_rate": 2.8649693050745648e-06, "loss": 17.3063, "step": 35627 }, { "epoch": 0.6512512109967645, "grad_norm": 5.41485197261276, "learning_rate": 2.8647016407603555e-06, "loss": 16.9958, "step": 35628 }, { "epoch": 0.6512694901932111, "grad_norm": 6.812727704678054, "learning_rate": 2.864433983930182e-06, "loss": 17.3388, "step": 35629 }, { "epoch": 0.6512877693896576, "grad_norm": 5.770955868208342, "learning_rate": 2.86416633458498e-06, "loss": 17.2213, "step": 35630 }, { "epoch": 0.6513060485861042, "grad_norm": 5.979308057404281, "learning_rate": 2.863898692725692e-06, "loss": 17.3381, "step": 35631 }, { "epoch": 0.6513243277825507, "grad_norm": 5.607074282280036, "learning_rate": 2.863631058353251e-06, "loss": 17.2939, "step": 35632 }, { "epoch": 0.6513426069789972, "grad_norm": 6.258122292711505, "learning_rate": 2.863363431468601e-06, "loss": 17.3077, "step": 35633 }, { "epoch": 0.6513608861754437, "grad_norm": 7.046001270773851, "learning_rate": 2.8630958120726735e-06, "loss": 17.3261, "step": 35634 }, { "epoch": 0.6513791653718902, "grad_norm": 5.620816225238555, "learning_rate": 2.8628282001664104e-06, "loss": 17.0199, "step": 35635 }, { "epoch": 0.6513974445683368, "grad_norm": 7.0041384548294285, "learning_rate": 2.862560595750751e-06, "loss": 17.6207, "step": 35636 }, { "epoch": 0.6514157237647833, "grad_norm": 5.037226814489619, "learning_rate": 2.8622929988266312e-06, "loss": 16.883, "step": 35637 }, { "epoch": 0.6514340029612298, "grad_norm": 6.963748982027816, "learning_rate": 2.8620254093949867e-06, "loss": 17.2563, "step": 35638 }, { "epoch": 0.6514522821576764, "grad_norm": 6.314861059960804, "learning_rate": 2.86175782745676e-06, "loss": 17.3786, "step": 35639 }, { "epoch": 0.6514705613541228, "grad_norm": 6.186880476466043, "learning_rate": 2.8614902530128865e-06, "loss": 17.2205, "step": 35640 }, { "epoch": 0.6514888405505694, "grad_norm": 7.795244237548279, "learning_rate": 2.861222686064301e-06, "loss": 17.72, "step": 35641 }, { "epoch": 0.6515071197470159, "grad_norm": 5.575214556247639, "learning_rate": 2.8609551266119473e-06, "loss": 17.2609, "step": 35642 }, { "epoch": 0.6515253989434624, "grad_norm": 6.460675146619814, "learning_rate": 2.8606875746567584e-06, "loss": 17.6463, "step": 35643 }, { "epoch": 0.651543678139909, "grad_norm": 6.2620489959331, "learning_rate": 2.8604200301996742e-06, "loss": 17.5592, "step": 35644 }, { "epoch": 0.6515619573363555, "grad_norm": 5.622126879278445, "learning_rate": 2.8601524932416336e-06, "loss": 17.2757, "step": 35645 }, { "epoch": 0.6515802365328021, "grad_norm": 6.397291048221658, "learning_rate": 2.8598849637835725e-06, "loss": 17.3684, "step": 35646 }, { "epoch": 0.6515985157292485, "grad_norm": 4.797851519201081, "learning_rate": 2.8596174418264278e-06, "loss": 16.7664, "step": 35647 }, { "epoch": 0.651616794925695, "grad_norm": 6.142227721390503, "learning_rate": 2.8593499273711396e-06, "loss": 17.354, "step": 35648 }, { "epoch": 0.6516350741221416, "grad_norm": 5.877649838595219, "learning_rate": 2.859082420418642e-06, "loss": 17.2989, "step": 35649 }, { "epoch": 0.6516533533185881, "grad_norm": 5.9437205581115, "learning_rate": 2.858814920969877e-06, "loss": 17.4245, "step": 35650 }, { "epoch": 0.6516716325150347, "grad_norm": 6.280118061151085, "learning_rate": 2.8585474290257797e-06, "loss": 17.2249, "step": 35651 }, { "epoch": 0.6516899117114812, "grad_norm": 7.794560634823274, "learning_rate": 2.858279944587287e-06, "loss": 18.0643, "step": 35652 }, { "epoch": 0.6517081909079276, "grad_norm": 5.899203751207091, "learning_rate": 2.8580124676553354e-06, "loss": 17.1637, "step": 35653 }, { "epoch": 0.6517264701043742, "grad_norm": 6.011810040489114, "learning_rate": 2.8577449982308674e-06, "loss": 17.3994, "step": 35654 }, { "epoch": 0.6517447493008207, "grad_norm": 5.986902772631025, "learning_rate": 2.857477536314815e-06, "loss": 17.0923, "step": 35655 }, { "epoch": 0.6517630284972673, "grad_norm": 6.123732841247094, "learning_rate": 2.85721008190812e-06, "loss": 17.5899, "step": 35656 }, { "epoch": 0.6517813076937138, "grad_norm": 6.148860918977849, "learning_rate": 2.856942635011718e-06, "loss": 17.3983, "step": 35657 }, { "epoch": 0.6517995868901603, "grad_norm": 6.169649498338626, "learning_rate": 2.8566751956265435e-06, "loss": 17.2667, "step": 35658 }, { "epoch": 0.6518178660866069, "grad_norm": 4.7297759028770825, "learning_rate": 2.856407763753538e-06, "loss": 16.8337, "step": 35659 }, { "epoch": 0.6518361452830533, "grad_norm": 6.502461878096968, "learning_rate": 2.856140339393638e-06, "loss": 17.3987, "step": 35660 }, { "epoch": 0.6518544244794999, "grad_norm": 5.958870835335421, "learning_rate": 2.8558729225477786e-06, "loss": 17.2345, "step": 35661 }, { "epoch": 0.6518727036759464, "grad_norm": 5.85692578242624, "learning_rate": 2.8556055132168983e-06, "loss": 17.1976, "step": 35662 }, { "epoch": 0.6518909828723929, "grad_norm": 5.046839561980821, "learning_rate": 2.855338111401934e-06, "loss": 17.0182, "step": 35663 }, { "epoch": 0.6519092620688395, "grad_norm": 5.979268586239157, "learning_rate": 2.8550707171038263e-06, "loss": 17.1384, "step": 35664 }, { "epoch": 0.651927541265286, "grad_norm": 6.944333155961487, "learning_rate": 2.8548033303235083e-06, "loss": 17.5916, "step": 35665 }, { "epoch": 0.6519458204617326, "grad_norm": 5.607314333272513, "learning_rate": 2.8545359510619177e-06, "loss": 17.0193, "step": 35666 }, { "epoch": 0.651964099658179, "grad_norm": 5.763749624567581, "learning_rate": 2.8542685793199938e-06, "loss": 17.1613, "step": 35667 }, { "epoch": 0.6519823788546255, "grad_norm": 5.0615893921561925, "learning_rate": 2.854001215098673e-06, "loss": 17.0392, "step": 35668 }, { "epoch": 0.6520006580510721, "grad_norm": 6.825769417819823, "learning_rate": 2.8537338583988887e-06, "loss": 17.6383, "step": 35669 }, { "epoch": 0.6520189372475186, "grad_norm": 5.747904336724666, "learning_rate": 2.8534665092215836e-06, "loss": 17.3293, "step": 35670 }, { "epoch": 0.6520372164439652, "grad_norm": 7.559248112499924, "learning_rate": 2.85319916756769e-06, "loss": 18.05, "step": 35671 }, { "epoch": 0.6520554956404117, "grad_norm": 5.817768223284728, "learning_rate": 2.8529318334381473e-06, "loss": 17.3137, "step": 35672 }, { "epoch": 0.6520737748368581, "grad_norm": 6.605122315480496, "learning_rate": 2.8526645068338938e-06, "loss": 16.871, "step": 35673 }, { "epoch": 0.6520920540333047, "grad_norm": 5.924274492895396, "learning_rate": 2.8523971877558656e-06, "loss": 17.4192, "step": 35674 }, { "epoch": 0.6521103332297512, "grad_norm": 5.035119696475155, "learning_rate": 2.8521298762049952e-06, "loss": 16.9827, "step": 35675 }, { "epoch": 0.6521286124261978, "grad_norm": 5.101073423754833, "learning_rate": 2.8518625721822268e-06, "loss": 16.7905, "step": 35676 }, { "epoch": 0.6521468916226443, "grad_norm": 8.639674080824138, "learning_rate": 2.8515952756884936e-06, "loss": 18.089, "step": 35677 }, { "epoch": 0.6521651708190908, "grad_norm": 5.737479497320877, "learning_rate": 2.85132798672473e-06, "loss": 17.1991, "step": 35678 }, { "epoch": 0.6521834500155373, "grad_norm": 6.717266168734433, "learning_rate": 2.851060705291877e-06, "loss": 17.4367, "step": 35679 }, { "epoch": 0.6522017292119838, "grad_norm": 5.735243641936819, "learning_rate": 2.8507934313908687e-06, "loss": 17.1156, "step": 35680 }, { "epoch": 0.6522200084084304, "grad_norm": 6.31337574779125, "learning_rate": 2.8505261650226425e-06, "loss": 17.4149, "step": 35681 }, { "epoch": 0.6522382876048769, "grad_norm": 4.562948967144765, "learning_rate": 2.8502589061881373e-06, "loss": 16.8458, "step": 35682 }, { "epoch": 0.6522565668013234, "grad_norm": 5.103212790565645, "learning_rate": 2.8499916548882867e-06, "loss": 16.9084, "step": 35683 }, { "epoch": 0.65227484599777, "grad_norm": 5.797579105428766, "learning_rate": 2.8497244111240303e-06, "loss": 17.4178, "step": 35684 }, { "epoch": 0.6522931251942164, "grad_norm": 5.477785370100282, "learning_rate": 2.849457174896303e-06, "loss": 17.2356, "step": 35685 }, { "epoch": 0.652311404390663, "grad_norm": 6.013090213767736, "learning_rate": 2.8491899462060403e-06, "loss": 17.2419, "step": 35686 }, { "epoch": 0.6523296835871095, "grad_norm": 7.00607706503664, "learning_rate": 2.848922725054182e-06, "loss": 17.5901, "step": 35687 }, { "epoch": 0.652347962783556, "grad_norm": 5.670278904368342, "learning_rate": 2.8486555114416608e-06, "loss": 17.1601, "step": 35688 }, { "epoch": 0.6523662419800026, "grad_norm": 6.2806913195680725, "learning_rate": 2.8483883053694173e-06, "loss": 17.5819, "step": 35689 }, { "epoch": 0.6523845211764491, "grad_norm": 5.182418116538877, "learning_rate": 2.848121106838384e-06, "loss": 17.1471, "step": 35690 }, { "epoch": 0.6524028003728956, "grad_norm": 7.311900651488318, "learning_rate": 2.847853915849502e-06, "loss": 17.4846, "step": 35691 }, { "epoch": 0.6524210795693421, "grad_norm": 5.247648399997073, "learning_rate": 2.8475867324037033e-06, "loss": 16.9855, "step": 35692 }, { "epoch": 0.6524393587657886, "grad_norm": 7.560355692438678, "learning_rate": 2.847319556501927e-06, "loss": 17.791, "step": 35693 }, { "epoch": 0.6524576379622352, "grad_norm": 6.6179118276627165, "learning_rate": 2.8470523881451085e-06, "loss": 17.5488, "step": 35694 }, { "epoch": 0.6524759171586817, "grad_norm": 6.510539806090674, "learning_rate": 2.8467852273341857e-06, "loss": 17.457, "step": 35695 }, { "epoch": 0.6524941963551282, "grad_norm": 6.3506794824244075, "learning_rate": 2.8465180740700936e-06, "loss": 17.5707, "step": 35696 }, { "epoch": 0.6525124755515748, "grad_norm": 13.425356501120095, "learning_rate": 2.8462509283537676e-06, "loss": 17.7807, "step": 35697 }, { "epoch": 0.6525307547480212, "grad_norm": 6.801910047419236, "learning_rate": 2.8459837901861446e-06, "loss": 17.5242, "step": 35698 }, { "epoch": 0.6525490339444678, "grad_norm": 7.677516866967135, "learning_rate": 2.845716659568164e-06, "loss": 17.7629, "step": 35699 }, { "epoch": 0.6525673131409143, "grad_norm": 4.8427750827230955, "learning_rate": 2.8454495365007574e-06, "loss": 16.8687, "step": 35700 }, { "epoch": 0.6525855923373608, "grad_norm": 6.315779192012794, "learning_rate": 2.8451824209848643e-06, "loss": 17.4356, "step": 35701 }, { "epoch": 0.6526038715338074, "grad_norm": 6.167111921076972, "learning_rate": 2.8449153130214215e-06, "loss": 17.5217, "step": 35702 }, { "epoch": 0.6526221507302539, "grad_norm": 6.529885332867021, "learning_rate": 2.8446482126113602e-06, "loss": 17.5676, "step": 35703 }, { "epoch": 0.6526404299267005, "grad_norm": 8.150872976053476, "learning_rate": 2.844381119755622e-06, "loss": 17.8968, "step": 35704 }, { "epoch": 0.6526587091231469, "grad_norm": 8.176456032307566, "learning_rate": 2.844114034455141e-06, "loss": 18.2495, "step": 35705 }, { "epoch": 0.6526769883195934, "grad_norm": 6.181813643664282, "learning_rate": 2.843846956710852e-06, "loss": 17.1707, "step": 35706 }, { "epoch": 0.65269526751604, "grad_norm": 6.652123217085314, "learning_rate": 2.843579886523692e-06, "loss": 17.7444, "step": 35707 }, { "epoch": 0.6527135467124865, "grad_norm": 5.642835237599926, "learning_rate": 2.8433128238945985e-06, "loss": 17.2121, "step": 35708 }, { "epoch": 0.6527318259089331, "grad_norm": 5.804878583986933, "learning_rate": 2.843045768824505e-06, "loss": 17.5421, "step": 35709 }, { "epoch": 0.6527501051053796, "grad_norm": 5.918560345328222, "learning_rate": 2.8427787213143508e-06, "loss": 17.5727, "step": 35710 }, { "epoch": 0.652768384301826, "grad_norm": 5.965089860641048, "learning_rate": 2.8425116813650685e-06, "loss": 17.6243, "step": 35711 }, { "epoch": 0.6527866634982726, "grad_norm": 5.598973594020371, "learning_rate": 2.842244648977597e-06, "loss": 17.0806, "step": 35712 }, { "epoch": 0.6528049426947191, "grad_norm": 7.940186668854066, "learning_rate": 2.8419776241528706e-06, "loss": 17.9019, "step": 35713 }, { "epoch": 0.6528232218911657, "grad_norm": 7.645371133761382, "learning_rate": 2.8417106068918244e-06, "loss": 18.3093, "step": 35714 }, { "epoch": 0.6528415010876122, "grad_norm": 5.883024817430613, "learning_rate": 2.8414435971953958e-06, "loss": 17.0118, "step": 35715 }, { "epoch": 0.6528597802840587, "grad_norm": 5.912038963722215, "learning_rate": 2.8411765950645193e-06, "loss": 17.4865, "step": 35716 }, { "epoch": 0.6528780594805053, "grad_norm": 5.067175477708454, "learning_rate": 2.840909600500131e-06, "loss": 16.9015, "step": 35717 }, { "epoch": 0.6528963386769517, "grad_norm": 6.803417674217101, "learning_rate": 2.8406426135031688e-06, "loss": 17.5467, "step": 35718 }, { "epoch": 0.6529146178733983, "grad_norm": 6.879548588797638, "learning_rate": 2.8403756340745676e-06, "loss": 17.527, "step": 35719 }, { "epoch": 0.6529328970698448, "grad_norm": 5.67316276626387, "learning_rate": 2.8401086622152594e-06, "loss": 17.3044, "step": 35720 }, { "epoch": 0.6529511762662913, "grad_norm": 6.552392532846173, "learning_rate": 2.8398416979261863e-06, "loss": 17.5723, "step": 35721 }, { "epoch": 0.6529694554627379, "grad_norm": 6.309915386608318, "learning_rate": 2.8395747412082795e-06, "loss": 17.2295, "step": 35722 }, { "epoch": 0.6529877346591844, "grad_norm": 7.17122445374191, "learning_rate": 2.8393077920624746e-06, "loss": 17.4019, "step": 35723 }, { "epoch": 0.653006013855631, "grad_norm": 5.581092985047357, "learning_rate": 2.839040850489709e-06, "loss": 17.0985, "step": 35724 }, { "epoch": 0.6530242930520774, "grad_norm": 8.495445584954359, "learning_rate": 2.838773916490917e-06, "loss": 18.0942, "step": 35725 }, { "epoch": 0.6530425722485239, "grad_norm": 6.31349822821661, "learning_rate": 2.8385069900670348e-06, "loss": 17.8315, "step": 35726 }, { "epoch": 0.6530608514449705, "grad_norm": 5.526518004269034, "learning_rate": 2.8382400712189995e-06, "loss": 17.1884, "step": 35727 }, { "epoch": 0.653079130641417, "grad_norm": 6.64042780571576, "learning_rate": 2.8379731599477434e-06, "loss": 17.7073, "step": 35728 }, { "epoch": 0.6530974098378636, "grad_norm": 5.503589632319524, "learning_rate": 2.837706256254206e-06, "loss": 17.1087, "step": 35729 }, { "epoch": 0.65311568903431, "grad_norm": 7.958970443854851, "learning_rate": 2.83743936013932e-06, "loss": 17.6632, "step": 35730 }, { "epoch": 0.6531339682307565, "grad_norm": 7.205964048799229, "learning_rate": 2.8371724716040195e-06, "loss": 17.7026, "step": 35731 }, { "epoch": 0.6531522474272031, "grad_norm": 6.367430074124201, "learning_rate": 2.8369055906492445e-06, "loss": 17.5311, "step": 35732 }, { "epoch": 0.6531705266236496, "grad_norm": 8.370171673116065, "learning_rate": 2.8366387172759267e-06, "loss": 17.4138, "step": 35733 }, { "epoch": 0.6531888058200962, "grad_norm": 5.855248955554937, "learning_rate": 2.8363718514850015e-06, "loss": 17.4294, "step": 35734 }, { "epoch": 0.6532070850165427, "grad_norm": 6.430493836381483, "learning_rate": 2.836104993277404e-06, "loss": 17.295, "step": 35735 }, { "epoch": 0.6532253642129892, "grad_norm": 6.378247190144407, "learning_rate": 2.835838142654074e-06, "loss": 17.5193, "step": 35736 }, { "epoch": 0.6532436434094357, "grad_norm": 5.6165011776365485, "learning_rate": 2.8355712996159405e-06, "loss": 17.1773, "step": 35737 }, { "epoch": 0.6532619226058822, "grad_norm": 7.057243933041843, "learning_rate": 2.8353044641639444e-06, "loss": 17.635, "step": 35738 }, { "epoch": 0.6532802018023288, "grad_norm": 7.1360344781103, "learning_rate": 2.835037636299018e-06, "loss": 17.461, "step": 35739 }, { "epoch": 0.6532984809987753, "grad_norm": 6.012356415491082, "learning_rate": 2.834770816022095e-06, "loss": 17.4584, "step": 35740 }, { "epoch": 0.6533167601952218, "grad_norm": 7.193798971144644, "learning_rate": 2.834504003334114e-06, "loss": 17.1168, "step": 35741 }, { "epoch": 0.6533350393916684, "grad_norm": 14.826326435123436, "learning_rate": 2.8342371982360075e-06, "loss": 18.8805, "step": 35742 }, { "epoch": 0.6533533185881149, "grad_norm": 7.03272628843503, "learning_rate": 2.8339704007287127e-06, "loss": 17.615, "step": 35743 }, { "epoch": 0.6533715977845614, "grad_norm": 6.809215818145924, "learning_rate": 2.833703610813162e-06, "loss": 17.8285, "step": 35744 }, { "epoch": 0.6533898769810079, "grad_norm": 6.217238366379957, "learning_rate": 2.833436828490292e-06, "loss": 17.6876, "step": 35745 }, { "epoch": 0.6534081561774544, "grad_norm": 5.775580284895121, "learning_rate": 2.83317005376104e-06, "loss": 17.2831, "step": 35746 }, { "epoch": 0.653426435373901, "grad_norm": 5.912819034270674, "learning_rate": 2.832903286626339e-06, "loss": 17.2424, "step": 35747 }, { "epoch": 0.6534447145703475, "grad_norm": 6.153017269635631, "learning_rate": 2.832636527087122e-06, "loss": 17.148, "step": 35748 }, { "epoch": 0.6534629937667941, "grad_norm": 6.076921577422057, "learning_rate": 2.832369775144328e-06, "loss": 17.3454, "step": 35749 }, { "epoch": 0.6534812729632405, "grad_norm": 6.2608451759964625, "learning_rate": 2.8321030307988894e-06, "loss": 17.4984, "step": 35750 }, { "epoch": 0.653499552159687, "grad_norm": 6.793845836546713, "learning_rate": 2.8318362940517396e-06, "loss": 18.0541, "step": 35751 }, { "epoch": 0.6535178313561336, "grad_norm": 6.506077427644591, "learning_rate": 2.831569564903816e-06, "loss": 17.2914, "step": 35752 }, { "epoch": 0.6535361105525801, "grad_norm": 8.47621288054029, "learning_rate": 2.8313028433560547e-06, "loss": 18.6961, "step": 35753 }, { "epoch": 0.6535543897490267, "grad_norm": 6.92746700129293, "learning_rate": 2.831036129409387e-06, "loss": 17.6983, "step": 35754 }, { "epoch": 0.6535726689454732, "grad_norm": 6.753626554358864, "learning_rate": 2.830769423064751e-06, "loss": 17.5154, "step": 35755 }, { "epoch": 0.6535909481419196, "grad_norm": 5.749059457441421, "learning_rate": 2.8305027243230774e-06, "loss": 17.3331, "step": 35756 }, { "epoch": 0.6536092273383662, "grad_norm": 5.110686998342135, "learning_rate": 2.8302360331853064e-06, "loss": 16.9201, "step": 35757 }, { "epoch": 0.6536275065348127, "grad_norm": 6.125522385074352, "learning_rate": 2.8299693496523696e-06, "loss": 17.4797, "step": 35758 }, { "epoch": 0.6536457857312592, "grad_norm": 5.880968242944741, "learning_rate": 2.8297026737252e-06, "loss": 17.2978, "step": 35759 }, { "epoch": 0.6536640649277058, "grad_norm": 6.471674591524078, "learning_rate": 2.829436005404736e-06, "loss": 17.3348, "step": 35760 }, { "epoch": 0.6536823441241523, "grad_norm": 6.615073158739187, "learning_rate": 2.8291693446919083e-06, "loss": 17.2927, "step": 35761 }, { "epoch": 0.6537006233205989, "grad_norm": 5.76309230305098, "learning_rate": 2.828902691587654e-06, "loss": 17.3077, "step": 35762 }, { "epoch": 0.6537189025170453, "grad_norm": 6.577106612787547, "learning_rate": 2.828636046092909e-06, "loss": 17.5552, "step": 35763 }, { "epoch": 0.6537371817134918, "grad_norm": 5.447478784929461, "learning_rate": 2.8283694082086067e-06, "loss": 17.1144, "step": 35764 }, { "epoch": 0.6537554609099384, "grad_norm": 8.588134150854478, "learning_rate": 2.828102777935679e-06, "loss": 17.6525, "step": 35765 }, { "epoch": 0.6537737401063849, "grad_norm": 6.629575535137619, "learning_rate": 2.8278361552750644e-06, "loss": 17.4975, "step": 35766 }, { "epoch": 0.6537920193028315, "grad_norm": 6.341697898377758, "learning_rate": 2.827569540227696e-06, "loss": 17.6869, "step": 35767 }, { "epoch": 0.653810298499278, "grad_norm": 7.232795737289826, "learning_rate": 2.8273029327945055e-06, "loss": 17.8828, "step": 35768 }, { "epoch": 0.6538285776957244, "grad_norm": 5.568478475529864, "learning_rate": 2.8270363329764315e-06, "loss": 16.992, "step": 35769 }, { "epoch": 0.653846856892171, "grad_norm": 5.585917194994327, "learning_rate": 2.826769740774405e-06, "loss": 16.925, "step": 35770 }, { "epoch": 0.6538651360886175, "grad_norm": 5.450591503455694, "learning_rate": 2.826503156189362e-06, "loss": 16.9618, "step": 35771 }, { "epoch": 0.6538834152850641, "grad_norm": 6.339790088850704, "learning_rate": 2.8262365792222385e-06, "loss": 17.469, "step": 35772 }, { "epoch": 0.6539016944815106, "grad_norm": 6.393182264941749, "learning_rate": 2.8259700098739653e-06, "loss": 17.5715, "step": 35773 }, { "epoch": 0.6539199736779571, "grad_norm": 5.190186391448125, "learning_rate": 2.8257034481454806e-06, "loss": 16.9031, "step": 35774 }, { "epoch": 0.6539382528744037, "grad_norm": 6.685572357407204, "learning_rate": 2.8254368940377166e-06, "loss": 17.6257, "step": 35775 }, { "epoch": 0.6539565320708501, "grad_norm": 5.1211360036740645, "learning_rate": 2.825170347551606e-06, "loss": 16.8313, "step": 35776 }, { "epoch": 0.6539748112672967, "grad_norm": 7.400449242447994, "learning_rate": 2.824903808688086e-06, "loss": 17.6972, "step": 35777 }, { "epoch": 0.6539930904637432, "grad_norm": 5.385361477887229, "learning_rate": 2.824637277448089e-06, "loss": 17.2942, "step": 35778 }, { "epoch": 0.6540113696601897, "grad_norm": 6.8567937181811995, "learning_rate": 2.8243707538325484e-06, "loss": 17.3016, "step": 35779 }, { "epoch": 0.6540296488566363, "grad_norm": 6.418682700243363, "learning_rate": 2.8241042378423995e-06, "loss": 17.4382, "step": 35780 }, { "epoch": 0.6540479280530828, "grad_norm": 6.479956095209629, "learning_rate": 2.8238377294785778e-06, "loss": 17.4397, "step": 35781 }, { "epoch": 0.6540662072495294, "grad_norm": 6.412969265735841, "learning_rate": 2.8235712287420146e-06, "loss": 17.3963, "step": 35782 }, { "epoch": 0.6540844864459758, "grad_norm": 5.773259073203131, "learning_rate": 2.823304735633647e-06, "loss": 17.3223, "step": 35783 }, { "epoch": 0.6541027656424223, "grad_norm": 6.682398705172331, "learning_rate": 2.8230382501544086e-06, "loss": 17.9847, "step": 35784 }, { "epoch": 0.6541210448388689, "grad_norm": 4.7200782264097345, "learning_rate": 2.8227717723052287e-06, "loss": 16.7812, "step": 35785 }, { "epoch": 0.6541393240353154, "grad_norm": 4.915093365266237, "learning_rate": 2.8225053020870475e-06, "loss": 16.8728, "step": 35786 }, { "epoch": 0.654157603231762, "grad_norm": 6.0062073497286015, "learning_rate": 2.8222388395007945e-06, "loss": 17.1092, "step": 35787 }, { "epoch": 0.6541758824282085, "grad_norm": 4.992622837997921, "learning_rate": 2.821972384547407e-06, "loss": 16.9486, "step": 35788 }, { "epoch": 0.6541941616246549, "grad_norm": 6.257046573392778, "learning_rate": 2.8217059372278153e-06, "loss": 17.3082, "step": 35789 }, { "epoch": 0.6542124408211015, "grad_norm": 6.14758241923847, "learning_rate": 2.8214394975429547e-06, "loss": 17.282, "step": 35790 }, { "epoch": 0.654230720017548, "grad_norm": 5.665355007045573, "learning_rate": 2.821173065493763e-06, "loss": 17.4715, "step": 35791 }, { "epoch": 0.6542489992139946, "grad_norm": 5.097747818340905, "learning_rate": 2.8209066410811693e-06, "loss": 16.7914, "step": 35792 }, { "epoch": 0.6542672784104411, "grad_norm": 7.550368134583082, "learning_rate": 2.820640224306107e-06, "loss": 18.0649, "step": 35793 }, { "epoch": 0.6542855576068876, "grad_norm": 8.14890605738366, "learning_rate": 2.8203738151695143e-06, "loss": 17.9918, "step": 35794 }, { "epoch": 0.6543038368033341, "grad_norm": 5.32357530167671, "learning_rate": 2.820107413672322e-06, "loss": 16.9719, "step": 35795 }, { "epoch": 0.6543221159997806, "grad_norm": 6.36321353885771, "learning_rate": 2.8198410198154614e-06, "loss": 17.3829, "step": 35796 }, { "epoch": 0.6543403951962272, "grad_norm": 15.030885645613813, "learning_rate": 2.819574633599872e-06, "loss": 18.1504, "step": 35797 }, { "epoch": 0.6543586743926737, "grad_norm": 5.942625327112799, "learning_rate": 2.819308255026481e-06, "loss": 17.2315, "step": 35798 }, { "epoch": 0.6543769535891202, "grad_norm": 6.488071018748537, "learning_rate": 2.8190418840962263e-06, "loss": 17.7124, "step": 35799 }, { "epoch": 0.6543952327855668, "grad_norm": 5.440625946921691, "learning_rate": 2.818775520810042e-06, "loss": 17.2043, "step": 35800 }, { "epoch": 0.6544135119820133, "grad_norm": 6.574415768915049, "learning_rate": 2.8185091651688584e-06, "loss": 17.7404, "step": 35801 }, { "epoch": 0.6544317911784598, "grad_norm": 6.798963485384031, "learning_rate": 2.818242817173613e-06, "loss": 17.833, "step": 35802 }, { "epoch": 0.6544500703749063, "grad_norm": 5.239729676684185, "learning_rate": 2.8179764768252373e-06, "loss": 17.3519, "step": 35803 }, { "epoch": 0.6544683495713528, "grad_norm": 6.128414658420597, "learning_rate": 2.817710144124662e-06, "loss": 17.4547, "step": 35804 }, { "epoch": 0.6544866287677994, "grad_norm": 5.736081280408011, "learning_rate": 2.817443819072826e-06, "loss": 17.0894, "step": 35805 }, { "epoch": 0.6545049079642459, "grad_norm": 6.39226254723692, "learning_rate": 2.81717750167066e-06, "loss": 17.4352, "step": 35806 }, { "epoch": 0.6545231871606925, "grad_norm": 6.687269517505398, "learning_rate": 2.816911191919095e-06, "loss": 17.7899, "step": 35807 }, { "epoch": 0.654541466357139, "grad_norm": 6.807727716123936, "learning_rate": 2.816644889819067e-06, "loss": 17.4067, "step": 35808 }, { "epoch": 0.6545597455535854, "grad_norm": 6.423551587481499, "learning_rate": 2.816378595371511e-06, "loss": 17.407, "step": 35809 }, { "epoch": 0.654578024750032, "grad_norm": 6.141710745021538, "learning_rate": 2.816112308577357e-06, "loss": 17.5461, "step": 35810 }, { "epoch": 0.6545963039464785, "grad_norm": 7.850573969809156, "learning_rate": 2.815846029437542e-06, "loss": 18.1759, "step": 35811 }, { "epoch": 0.6546145831429251, "grad_norm": 6.302704799211227, "learning_rate": 2.8155797579529963e-06, "loss": 17.5109, "step": 35812 }, { "epoch": 0.6546328623393716, "grad_norm": 6.8869714231737404, "learning_rate": 2.8153134941246528e-06, "loss": 17.3953, "step": 35813 }, { "epoch": 0.654651141535818, "grad_norm": 6.701180029997326, "learning_rate": 2.8150472379534477e-06, "loss": 17.5981, "step": 35814 }, { "epoch": 0.6546694207322646, "grad_norm": 5.9734924432196665, "learning_rate": 2.81478098944031e-06, "loss": 17.3526, "step": 35815 }, { "epoch": 0.6546876999287111, "grad_norm": 6.3574261455176915, "learning_rate": 2.8145147485861778e-06, "loss": 17.3025, "step": 35816 }, { "epoch": 0.6547059791251577, "grad_norm": 6.256231763393757, "learning_rate": 2.8142485153919797e-06, "loss": 17.2038, "step": 35817 }, { "epoch": 0.6547242583216042, "grad_norm": 6.77009940626437, "learning_rate": 2.8139822898586505e-06, "loss": 17.4794, "step": 35818 }, { "epoch": 0.6547425375180507, "grad_norm": 6.260248566635964, "learning_rate": 2.813716071987126e-06, "loss": 17.4073, "step": 35819 }, { "epoch": 0.6547608167144973, "grad_norm": 6.645436220504314, "learning_rate": 2.813449861778337e-06, "loss": 17.7656, "step": 35820 }, { "epoch": 0.6547790959109437, "grad_norm": 5.933666283437928, "learning_rate": 2.813183659233214e-06, "loss": 16.936, "step": 35821 }, { "epoch": 0.6547973751073903, "grad_norm": 6.984982379813612, "learning_rate": 2.8129174643526952e-06, "loss": 17.8133, "step": 35822 }, { "epoch": 0.6548156543038368, "grad_norm": 5.713591995056171, "learning_rate": 2.81265127713771e-06, "loss": 17.4297, "step": 35823 }, { "epoch": 0.6548339335002833, "grad_norm": 6.855889198419501, "learning_rate": 2.8123850975891915e-06, "loss": 17.6109, "step": 35824 }, { "epoch": 0.6548522126967299, "grad_norm": 6.304324763673256, "learning_rate": 2.8121189257080726e-06, "loss": 17.3331, "step": 35825 }, { "epoch": 0.6548704918931764, "grad_norm": 6.016854219473903, "learning_rate": 2.8118527614952896e-06, "loss": 17.3614, "step": 35826 }, { "epoch": 0.6548887710896228, "grad_norm": 7.346618954390871, "learning_rate": 2.8115866049517703e-06, "loss": 17.7075, "step": 35827 }, { "epoch": 0.6549070502860694, "grad_norm": 6.947110384055678, "learning_rate": 2.811320456078452e-06, "loss": 17.7454, "step": 35828 }, { "epoch": 0.6549253294825159, "grad_norm": 5.356933865125665, "learning_rate": 2.811054314876266e-06, "loss": 17.081, "step": 35829 }, { "epoch": 0.6549436086789625, "grad_norm": 6.654753033752287, "learning_rate": 2.8107881813461418e-06, "loss": 17.592, "step": 35830 }, { "epoch": 0.654961887875409, "grad_norm": 6.711897009943764, "learning_rate": 2.8105220554890176e-06, "loss": 17.7253, "step": 35831 }, { "epoch": 0.6549801670718555, "grad_norm": 5.436692471855501, "learning_rate": 2.8102559373058214e-06, "loss": 16.8964, "step": 35832 }, { "epoch": 0.6549984462683021, "grad_norm": 6.46241095134672, "learning_rate": 2.8099898267974908e-06, "loss": 17.3885, "step": 35833 }, { "epoch": 0.6550167254647485, "grad_norm": 5.923757310556113, "learning_rate": 2.8097237239649532e-06, "loss": 17.4106, "step": 35834 }, { "epoch": 0.6550350046611951, "grad_norm": 4.255623634576181, "learning_rate": 2.809457628809144e-06, "loss": 16.5219, "step": 35835 }, { "epoch": 0.6550532838576416, "grad_norm": 5.633140523233887, "learning_rate": 2.809191541330998e-06, "loss": 17.1811, "step": 35836 }, { "epoch": 0.6550715630540881, "grad_norm": 5.375759140125352, "learning_rate": 2.8089254615314452e-06, "loss": 17.1504, "step": 35837 }, { "epoch": 0.6550898422505347, "grad_norm": 8.23499989156259, "learning_rate": 2.808659389411417e-06, "loss": 18.542, "step": 35838 }, { "epoch": 0.6551081214469812, "grad_norm": 6.424860527546234, "learning_rate": 2.8083933249718488e-06, "loss": 17.3735, "step": 35839 }, { "epoch": 0.6551264006434278, "grad_norm": 6.563920309277854, "learning_rate": 2.8081272682136718e-06, "loss": 17.5184, "step": 35840 }, { "epoch": 0.6551446798398742, "grad_norm": 6.738051159517074, "learning_rate": 2.807861219137817e-06, "loss": 17.3459, "step": 35841 }, { "epoch": 0.6551629590363207, "grad_norm": 5.643382961637838, "learning_rate": 2.8075951777452202e-06, "loss": 17.4637, "step": 35842 }, { "epoch": 0.6551812382327673, "grad_norm": 6.871377463527829, "learning_rate": 2.80732914403681e-06, "loss": 17.6364, "step": 35843 }, { "epoch": 0.6551995174292138, "grad_norm": 7.603397110237417, "learning_rate": 2.8070631180135212e-06, "loss": 17.859, "step": 35844 }, { "epoch": 0.6552177966256604, "grad_norm": 7.124724518093148, "learning_rate": 2.8067970996762878e-06, "loss": 17.8677, "step": 35845 }, { "epoch": 0.6552360758221069, "grad_norm": 6.005056162565665, "learning_rate": 2.80653108902604e-06, "loss": 17.1974, "step": 35846 }, { "epoch": 0.6552543550185533, "grad_norm": 5.294106416725253, "learning_rate": 2.8062650860637086e-06, "loss": 17.0459, "step": 35847 }, { "epoch": 0.6552726342149999, "grad_norm": 5.973711880996103, "learning_rate": 2.8059990907902294e-06, "loss": 17.1193, "step": 35848 }, { "epoch": 0.6552909134114464, "grad_norm": 6.982287553022939, "learning_rate": 2.8057331032065316e-06, "loss": 17.8455, "step": 35849 }, { "epoch": 0.655309192607893, "grad_norm": 5.439400262578606, "learning_rate": 2.80546712331355e-06, "loss": 17.0344, "step": 35850 }, { "epoch": 0.6553274718043395, "grad_norm": 6.285701454580437, "learning_rate": 2.805201151112217e-06, "loss": 17.3435, "step": 35851 }, { "epoch": 0.655345751000786, "grad_norm": 5.0126261482084375, "learning_rate": 2.80493518660346e-06, "loss": 16.8673, "step": 35852 }, { "epoch": 0.6553640301972325, "grad_norm": 6.3453121271534165, "learning_rate": 2.8046692297882155e-06, "loss": 17.4095, "step": 35853 }, { "epoch": 0.655382309393679, "grad_norm": 6.926874983409055, "learning_rate": 2.804403280667417e-06, "loss": 17.7521, "step": 35854 }, { "epoch": 0.6554005885901256, "grad_norm": 6.839770705562599, "learning_rate": 2.8041373392419922e-06, "loss": 17.5573, "step": 35855 }, { "epoch": 0.6554188677865721, "grad_norm": 5.886356240914559, "learning_rate": 2.8038714055128775e-06, "loss": 17.3017, "step": 35856 }, { "epoch": 0.6554371469830186, "grad_norm": 6.351545067009849, "learning_rate": 2.803605479481003e-06, "loss": 17.4534, "step": 35857 }, { "epoch": 0.6554554261794652, "grad_norm": 9.799886239011082, "learning_rate": 2.8033395611472983e-06, "loss": 19.2174, "step": 35858 }, { "epoch": 0.6554737053759117, "grad_norm": 6.759678042117217, "learning_rate": 2.8030736505127004e-06, "loss": 17.5965, "step": 35859 }, { "epoch": 0.6554919845723582, "grad_norm": 6.694933883916939, "learning_rate": 2.8028077475781384e-06, "loss": 17.7068, "step": 35860 }, { "epoch": 0.6555102637688047, "grad_norm": 5.852557781549387, "learning_rate": 2.8025418523445426e-06, "loss": 17.6185, "step": 35861 }, { "epoch": 0.6555285429652512, "grad_norm": 5.34968351095503, "learning_rate": 2.8022759648128474e-06, "loss": 17.1707, "step": 35862 }, { "epoch": 0.6555468221616978, "grad_norm": 6.262834525413472, "learning_rate": 2.802010084983984e-06, "loss": 17.4596, "step": 35863 }, { "epoch": 0.6555651013581443, "grad_norm": 5.950453457026102, "learning_rate": 2.8017442128588856e-06, "loss": 17.1303, "step": 35864 }, { "epoch": 0.6555833805545909, "grad_norm": 6.038675987515997, "learning_rate": 2.801478348438484e-06, "loss": 17.0892, "step": 35865 }, { "epoch": 0.6556016597510373, "grad_norm": 6.724808594079587, "learning_rate": 2.8012124917237076e-06, "loss": 17.3194, "step": 35866 }, { "epoch": 0.6556199389474838, "grad_norm": 5.547895063681672, "learning_rate": 2.800946642715493e-06, "loss": 16.9671, "step": 35867 }, { "epoch": 0.6556382181439304, "grad_norm": 6.899449870091689, "learning_rate": 2.800680801414769e-06, "loss": 17.4448, "step": 35868 }, { "epoch": 0.6556564973403769, "grad_norm": 7.74175249450525, "learning_rate": 2.800414967822466e-06, "loss": 17.9573, "step": 35869 }, { "epoch": 0.6556747765368235, "grad_norm": 5.821384654273305, "learning_rate": 2.80014914193952e-06, "loss": 17.242, "step": 35870 }, { "epoch": 0.65569305573327, "grad_norm": 5.44779215337686, "learning_rate": 2.799883323766859e-06, "loss": 17.1478, "step": 35871 }, { "epoch": 0.6557113349297164, "grad_norm": 6.546328920220303, "learning_rate": 2.799617513305415e-06, "loss": 17.9274, "step": 35872 }, { "epoch": 0.655729614126163, "grad_norm": 5.918417022497298, "learning_rate": 2.799351710556123e-06, "loss": 17.1495, "step": 35873 }, { "epoch": 0.6557478933226095, "grad_norm": 6.599963993367481, "learning_rate": 2.7990859155199125e-06, "loss": 17.5383, "step": 35874 }, { "epoch": 0.6557661725190561, "grad_norm": 4.558370336099016, "learning_rate": 2.7988201281977123e-06, "loss": 16.8197, "step": 35875 }, { "epoch": 0.6557844517155026, "grad_norm": 6.968675600343225, "learning_rate": 2.798554348590459e-06, "loss": 17.6496, "step": 35876 }, { "epoch": 0.6558027309119491, "grad_norm": 6.592651162928384, "learning_rate": 2.798288576699082e-06, "loss": 17.3416, "step": 35877 }, { "epoch": 0.6558210101083957, "grad_norm": 7.114637248660008, "learning_rate": 2.7980228125245094e-06, "loss": 17.9275, "step": 35878 }, { "epoch": 0.6558392893048421, "grad_norm": 6.161393228695163, "learning_rate": 2.797757056067677e-06, "loss": 17.5202, "step": 35879 }, { "epoch": 0.6558575685012887, "grad_norm": 6.017555365798072, "learning_rate": 2.7974913073295164e-06, "loss": 17.3286, "step": 35880 }, { "epoch": 0.6558758476977352, "grad_norm": 7.499391643583269, "learning_rate": 2.7972255663109554e-06, "loss": 18.0005, "step": 35881 }, { "epoch": 0.6558941268941817, "grad_norm": 8.339253919185298, "learning_rate": 2.79695983301293e-06, "loss": 17.9138, "step": 35882 }, { "epoch": 0.6559124060906283, "grad_norm": 6.598353374381715, "learning_rate": 2.7966941074363674e-06, "loss": 17.625, "step": 35883 }, { "epoch": 0.6559306852870748, "grad_norm": 6.601678483757785, "learning_rate": 2.796428389582202e-06, "loss": 17.2343, "step": 35884 }, { "epoch": 0.6559489644835214, "grad_norm": 6.155058198149708, "learning_rate": 2.7961626794513646e-06, "loss": 17.2964, "step": 35885 }, { "epoch": 0.6559672436799678, "grad_norm": 5.059333495311421, "learning_rate": 2.7958969770447836e-06, "loss": 16.8602, "step": 35886 }, { "epoch": 0.6559855228764143, "grad_norm": 8.030022390047115, "learning_rate": 2.7956312823633946e-06, "loss": 17.849, "step": 35887 }, { "epoch": 0.6560038020728609, "grad_norm": 5.853082839057638, "learning_rate": 2.7953655954081244e-06, "loss": 17.336, "step": 35888 }, { "epoch": 0.6560220812693074, "grad_norm": 6.9248973327181815, "learning_rate": 2.7950999161799062e-06, "loss": 17.4476, "step": 35889 }, { "epoch": 0.656040360465754, "grad_norm": 5.4135377059490395, "learning_rate": 2.794834244679674e-06, "loss": 17.1199, "step": 35890 }, { "epoch": 0.6560586396622005, "grad_norm": 6.3153103830097805, "learning_rate": 2.7945685809083567e-06, "loss": 17.1559, "step": 35891 }, { "epoch": 0.6560769188586469, "grad_norm": 6.561594716949458, "learning_rate": 2.7943029248668824e-06, "loss": 17.5404, "step": 35892 }, { "epoch": 0.6560951980550935, "grad_norm": 6.042226236147416, "learning_rate": 2.794037276556187e-06, "loss": 17.4514, "step": 35893 }, { "epoch": 0.65611347725154, "grad_norm": 6.408207240104968, "learning_rate": 2.7937716359771977e-06, "loss": 17.4964, "step": 35894 }, { "epoch": 0.6561317564479865, "grad_norm": 5.569138365643743, "learning_rate": 2.793506003130849e-06, "loss": 17.0458, "step": 35895 }, { "epoch": 0.6561500356444331, "grad_norm": 5.274763795404839, "learning_rate": 2.793240378018071e-06, "loss": 16.7837, "step": 35896 }, { "epoch": 0.6561683148408796, "grad_norm": 7.321012304369385, "learning_rate": 2.7929747606397916e-06, "loss": 17.8816, "step": 35897 }, { "epoch": 0.6561865940373262, "grad_norm": 7.097220000654588, "learning_rate": 2.7927091509969446e-06, "loss": 17.7055, "step": 35898 }, { "epoch": 0.6562048732337726, "grad_norm": 5.283794437413132, "learning_rate": 2.7924435490904623e-06, "loss": 17.2315, "step": 35899 }, { "epoch": 0.6562231524302191, "grad_norm": 6.86037148452212, "learning_rate": 2.792177954921271e-06, "loss": 17.4127, "step": 35900 }, { "epoch": 0.6562414316266657, "grad_norm": 5.786140740494354, "learning_rate": 2.791912368490307e-06, "loss": 17.2737, "step": 35901 }, { "epoch": 0.6562597108231122, "grad_norm": 6.352882745617725, "learning_rate": 2.791646789798499e-06, "loss": 17.3315, "step": 35902 }, { "epoch": 0.6562779900195588, "grad_norm": 6.142425792310015, "learning_rate": 2.791381218846775e-06, "loss": 17.3227, "step": 35903 }, { "epoch": 0.6562962692160053, "grad_norm": 5.46250111744113, "learning_rate": 2.7911156556360696e-06, "loss": 17.0269, "step": 35904 }, { "epoch": 0.6563145484124517, "grad_norm": 6.894870478602776, "learning_rate": 2.790850100167313e-06, "loss": 17.7981, "step": 35905 }, { "epoch": 0.6563328276088983, "grad_norm": 5.673873231799286, "learning_rate": 2.790584552441432e-06, "loss": 17.654, "step": 35906 }, { "epoch": 0.6563511068053448, "grad_norm": 6.6987028184403865, "learning_rate": 2.7903190124593613e-06, "loss": 17.3344, "step": 35907 }, { "epoch": 0.6563693860017914, "grad_norm": 6.525170870751684, "learning_rate": 2.7900534802220323e-06, "loss": 17.7585, "step": 35908 }, { "epoch": 0.6563876651982379, "grad_norm": 5.208885636741393, "learning_rate": 2.789787955730373e-06, "loss": 17.0171, "step": 35909 }, { "epoch": 0.6564059443946844, "grad_norm": 6.371998792464659, "learning_rate": 2.7895224389853157e-06, "loss": 17.5452, "step": 35910 }, { "epoch": 0.656424223591131, "grad_norm": 6.905505563473419, "learning_rate": 2.7892569299877896e-06, "loss": 17.4776, "step": 35911 }, { "epoch": 0.6564425027875774, "grad_norm": 5.611073865123441, "learning_rate": 2.7889914287387276e-06, "loss": 17.1752, "step": 35912 }, { "epoch": 0.656460781984024, "grad_norm": 6.980314955024024, "learning_rate": 2.7887259352390596e-06, "loss": 17.9506, "step": 35913 }, { "epoch": 0.6564790611804705, "grad_norm": 5.475452230341044, "learning_rate": 2.788460449489712e-06, "loss": 17.119, "step": 35914 }, { "epoch": 0.656497340376917, "grad_norm": 7.90583455257546, "learning_rate": 2.7881949714916212e-06, "loss": 18.2017, "step": 35915 }, { "epoch": 0.6565156195733636, "grad_norm": 6.238432329378136, "learning_rate": 2.787929501245713e-06, "loss": 17.4926, "step": 35916 }, { "epoch": 0.65653389876981, "grad_norm": 7.9557304745910695, "learning_rate": 2.7876640387529206e-06, "loss": 18.1739, "step": 35917 }, { "epoch": 0.6565521779662566, "grad_norm": 5.203972723833805, "learning_rate": 2.787398584014175e-06, "loss": 17.0013, "step": 35918 }, { "epoch": 0.6565704571627031, "grad_norm": 5.512484403162345, "learning_rate": 2.7871331370304057e-06, "loss": 17.0415, "step": 35919 }, { "epoch": 0.6565887363591496, "grad_norm": 5.218588604195907, "learning_rate": 2.7868676978025404e-06, "loss": 17.003, "step": 35920 }, { "epoch": 0.6566070155555962, "grad_norm": 5.835435213992634, "learning_rate": 2.786602266331514e-06, "loss": 17.3869, "step": 35921 }, { "epoch": 0.6566252947520427, "grad_norm": 6.9313214836273485, "learning_rate": 2.7863368426182548e-06, "loss": 18.0206, "step": 35922 }, { "epoch": 0.6566435739484893, "grad_norm": 6.41662842954407, "learning_rate": 2.7860714266636903e-06, "loss": 17.73, "step": 35923 }, { "epoch": 0.6566618531449357, "grad_norm": 7.807577177344121, "learning_rate": 2.7858060184687558e-06, "loss": 18.4079, "step": 35924 }, { "epoch": 0.6566801323413822, "grad_norm": 7.9554524330098415, "learning_rate": 2.785540618034377e-06, "loss": 18.2345, "step": 35925 }, { "epoch": 0.6566984115378288, "grad_norm": 6.586984070236236, "learning_rate": 2.785275225361486e-06, "loss": 17.6809, "step": 35926 }, { "epoch": 0.6567166907342753, "grad_norm": 6.879571081139098, "learning_rate": 2.785009840451015e-06, "loss": 17.6829, "step": 35927 }, { "epoch": 0.6567349699307219, "grad_norm": 5.645728255444835, "learning_rate": 2.784744463303891e-06, "loss": 17.4141, "step": 35928 }, { "epoch": 0.6567532491271684, "grad_norm": 5.593289313756728, "learning_rate": 2.784479093921047e-06, "loss": 17.3728, "step": 35929 }, { "epoch": 0.6567715283236149, "grad_norm": 6.5144915908374506, "learning_rate": 2.7842137323034112e-06, "loss": 17.2274, "step": 35930 }, { "epoch": 0.6567898075200614, "grad_norm": 6.185245767658041, "learning_rate": 2.783948378451913e-06, "loss": 17.2266, "step": 35931 }, { "epoch": 0.6568080867165079, "grad_norm": 6.574867779750073, "learning_rate": 2.783683032367485e-06, "loss": 17.3721, "step": 35932 }, { "epoch": 0.6568263659129545, "grad_norm": 6.209974830198078, "learning_rate": 2.783417694051055e-06, "loss": 17.1267, "step": 35933 }, { "epoch": 0.656844645109401, "grad_norm": 7.138648933785241, "learning_rate": 2.7831523635035517e-06, "loss": 17.4723, "step": 35934 }, { "epoch": 0.6568629243058475, "grad_norm": 6.350430673998909, "learning_rate": 2.7828870407259073e-06, "loss": 17.132, "step": 35935 }, { "epoch": 0.6568812035022941, "grad_norm": 5.819124557417651, "learning_rate": 2.7826217257190537e-06, "loss": 17.1752, "step": 35936 }, { "epoch": 0.6568994826987405, "grad_norm": 6.27822689176906, "learning_rate": 2.7823564184839158e-06, "loss": 17.1857, "step": 35937 }, { "epoch": 0.6569177618951871, "grad_norm": 6.1926026163538195, "learning_rate": 2.7820911190214288e-06, "loss": 17.3272, "step": 35938 }, { "epoch": 0.6569360410916336, "grad_norm": 5.0608609406994525, "learning_rate": 2.7818258273325184e-06, "loss": 16.7528, "step": 35939 }, { "epoch": 0.6569543202880801, "grad_norm": 7.045365339780329, "learning_rate": 2.781560543418117e-06, "loss": 17.5638, "step": 35940 }, { "epoch": 0.6569725994845267, "grad_norm": 5.878032651257977, "learning_rate": 2.781295267279154e-06, "loss": 17.2135, "step": 35941 }, { "epoch": 0.6569908786809732, "grad_norm": 5.339573672330535, "learning_rate": 2.781029998916556e-06, "loss": 17.1782, "step": 35942 }, { "epoch": 0.6570091578774198, "grad_norm": 6.501214566540977, "learning_rate": 2.780764738331258e-06, "loss": 17.1385, "step": 35943 }, { "epoch": 0.6570274370738662, "grad_norm": 7.357000651805667, "learning_rate": 2.7804994855241847e-06, "loss": 17.7621, "step": 35944 }, { "epoch": 0.6570457162703127, "grad_norm": 5.253386830336961, "learning_rate": 2.780234240496268e-06, "loss": 17.0787, "step": 35945 }, { "epoch": 0.6570639954667593, "grad_norm": 7.829779665657069, "learning_rate": 2.77996900324844e-06, "loss": 17.7645, "step": 35946 }, { "epoch": 0.6570822746632058, "grad_norm": 5.994114812527437, "learning_rate": 2.779703773781628e-06, "loss": 17.553, "step": 35947 }, { "epoch": 0.6571005538596524, "grad_norm": 7.057070855999343, "learning_rate": 2.7794385520967592e-06, "loss": 17.8766, "step": 35948 }, { "epoch": 0.6571188330560989, "grad_norm": 6.373389174845576, "learning_rate": 2.7791733381947677e-06, "loss": 17.1637, "step": 35949 }, { "epoch": 0.6571371122525453, "grad_norm": 6.341857836074193, "learning_rate": 2.778908132076581e-06, "loss": 17.3974, "step": 35950 }, { "epoch": 0.6571553914489919, "grad_norm": 6.12191931109504, "learning_rate": 2.7786429337431263e-06, "loss": 17.347, "step": 35951 }, { "epoch": 0.6571736706454384, "grad_norm": 5.84102986362801, "learning_rate": 2.778377743195336e-06, "loss": 17.4346, "step": 35952 }, { "epoch": 0.657191949841885, "grad_norm": 6.898076076134302, "learning_rate": 2.7781125604341408e-06, "loss": 17.5012, "step": 35953 }, { "epoch": 0.6572102290383315, "grad_norm": 5.661589774746464, "learning_rate": 2.777847385460466e-06, "loss": 17.2277, "step": 35954 }, { "epoch": 0.657228508234778, "grad_norm": 5.111678503241513, "learning_rate": 2.777582218275245e-06, "loss": 16.9316, "step": 35955 }, { "epoch": 0.6572467874312246, "grad_norm": 6.312558257896681, "learning_rate": 2.777317058879404e-06, "loss": 17.5635, "step": 35956 }, { "epoch": 0.657265066627671, "grad_norm": 6.2425151572673405, "learning_rate": 2.7770519072738756e-06, "loss": 17.316, "step": 35957 }, { "epoch": 0.6572833458241176, "grad_norm": 6.112597270575876, "learning_rate": 2.7767867634595867e-06, "loss": 17.3526, "step": 35958 }, { "epoch": 0.6573016250205641, "grad_norm": 6.791335940504503, "learning_rate": 2.776521627437466e-06, "loss": 17.5196, "step": 35959 }, { "epoch": 0.6573199042170106, "grad_norm": 7.387804109371392, "learning_rate": 2.776256499208446e-06, "loss": 18.0212, "step": 35960 }, { "epoch": 0.6573381834134572, "grad_norm": 6.859120188754178, "learning_rate": 2.7759913787734525e-06, "loss": 17.7413, "step": 35961 }, { "epoch": 0.6573564626099037, "grad_norm": 5.993612134947817, "learning_rate": 2.7757262661334155e-06, "loss": 17.4309, "step": 35962 }, { "epoch": 0.6573747418063501, "grad_norm": 6.8384659438768445, "learning_rate": 2.7754611612892672e-06, "loss": 17.4218, "step": 35963 }, { "epoch": 0.6573930210027967, "grad_norm": 6.272917232633212, "learning_rate": 2.7751960642419345e-06, "loss": 17.3647, "step": 35964 }, { "epoch": 0.6574113001992432, "grad_norm": 8.659787553561628, "learning_rate": 2.7749309749923446e-06, "loss": 18.4458, "step": 35965 }, { "epoch": 0.6574295793956898, "grad_norm": 5.358234025365968, "learning_rate": 2.7746658935414304e-06, "loss": 16.9847, "step": 35966 }, { "epoch": 0.6574478585921363, "grad_norm": 6.210402018879358, "learning_rate": 2.774400819890119e-06, "loss": 17.2485, "step": 35967 }, { "epoch": 0.6574661377885828, "grad_norm": 7.0577911323713565, "learning_rate": 2.7741357540393376e-06, "loss": 18.0531, "step": 35968 }, { "epoch": 0.6574844169850294, "grad_norm": 6.9404597496046065, "learning_rate": 2.7738706959900197e-06, "loss": 17.5852, "step": 35969 }, { "epoch": 0.6575026961814758, "grad_norm": 6.0796269843896535, "learning_rate": 2.7736056457430893e-06, "loss": 17.1701, "step": 35970 }, { "epoch": 0.6575209753779224, "grad_norm": 6.576460860471188, "learning_rate": 2.773340603299478e-06, "loss": 17.5643, "step": 35971 }, { "epoch": 0.6575392545743689, "grad_norm": 5.894397705457389, "learning_rate": 2.7730755686601174e-06, "loss": 17.3116, "step": 35972 }, { "epoch": 0.6575575337708154, "grad_norm": 5.511649214055565, "learning_rate": 2.772810541825931e-06, "loss": 17.2815, "step": 35973 }, { "epoch": 0.657575812967262, "grad_norm": 6.8502063987662085, "learning_rate": 2.7725455227978524e-06, "loss": 17.8409, "step": 35974 }, { "epoch": 0.6575940921637085, "grad_norm": 5.641035882567722, "learning_rate": 2.772280511576808e-06, "loss": 17.0495, "step": 35975 }, { "epoch": 0.657612371360155, "grad_norm": 6.493276045623494, "learning_rate": 2.7720155081637258e-06, "loss": 17.3714, "step": 35976 }, { "epoch": 0.6576306505566015, "grad_norm": 6.531027556830283, "learning_rate": 2.771750512559538e-06, "loss": 17.2577, "step": 35977 }, { "epoch": 0.657648929753048, "grad_norm": 7.241056723850311, "learning_rate": 2.7714855247651707e-06, "loss": 17.6859, "step": 35978 }, { "epoch": 0.6576672089494946, "grad_norm": 5.4445793619190415, "learning_rate": 2.771220544781552e-06, "loss": 17.0367, "step": 35979 }, { "epoch": 0.6576854881459411, "grad_norm": 6.387347236494824, "learning_rate": 2.7709555726096117e-06, "loss": 17.79, "step": 35980 }, { "epoch": 0.6577037673423877, "grad_norm": 7.406002681653767, "learning_rate": 2.7706906082502803e-06, "loss": 17.9446, "step": 35981 }, { "epoch": 0.6577220465388341, "grad_norm": 6.20669442908191, "learning_rate": 2.7704256517044826e-06, "loss": 17.3722, "step": 35982 }, { "epoch": 0.6577403257352806, "grad_norm": 5.5658952947154265, "learning_rate": 2.7701607029731525e-06, "loss": 17.2691, "step": 35983 }, { "epoch": 0.6577586049317272, "grad_norm": 5.993631467492315, "learning_rate": 2.769895762057215e-06, "loss": 17.2462, "step": 35984 }, { "epoch": 0.6577768841281737, "grad_norm": 6.185170877096645, "learning_rate": 2.7696308289575976e-06, "loss": 17.3943, "step": 35985 }, { "epoch": 0.6577951633246203, "grad_norm": 7.468707518752735, "learning_rate": 2.769365903675232e-06, "loss": 17.9439, "step": 35986 }, { "epoch": 0.6578134425210668, "grad_norm": 5.934420898063888, "learning_rate": 2.769100986211044e-06, "loss": 17.4239, "step": 35987 }, { "epoch": 0.6578317217175133, "grad_norm": 5.519559522692709, "learning_rate": 2.768836076565965e-06, "loss": 17.3408, "step": 35988 }, { "epoch": 0.6578500009139598, "grad_norm": 5.548287869644951, "learning_rate": 2.7685711747409204e-06, "loss": 17.1109, "step": 35989 }, { "epoch": 0.6578682801104063, "grad_norm": 7.6733606994759525, "learning_rate": 2.76830628073684e-06, "loss": 18.0291, "step": 35990 }, { "epoch": 0.6578865593068529, "grad_norm": 6.785730039651029, "learning_rate": 2.768041394554654e-06, "loss": 17.8159, "step": 35991 }, { "epoch": 0.6579048385032994, "grad_norm": 4.715812527054188, "learning_rate": 2.7677765161952897e-06, "loss": 16.7348, "step": 35992 }, { "epoch": 0.6579231176997459, "grad_norm": 6.301859234298522, "learning_rate": 2.767511645659673e-06, "loss": 17.4807, "step": 35993 }, { "epoch": 0.6579413968961925, "grad_norm": 6.601846427828277, "learning_rate": 2.767246782948736e-06, "loss": 17.2793, "step": 35994 }, { "epoch": 0.657959676092639, "grad_norm": 9.44107128606128, "learning_rate": 2.766981928063406e-06, "loss": 17.705, "step": 35995 }, { "epoch": 0.6579779552890855, "grad_norm": 9.371932014755064, "learning_rate": 2.766717081004607e-06, "loss": 17.677, "step": 35996 }, { "epoch": 0.657996234485532, "grad_norm": 5.582244791320146, "learning_rate": 2.766452241773274e-06, "loss": 17.2043, "step": 35997 }, { "epoch": 0.6580145136819785, "grad_norm": 6.4565146716661985, "learning_rate": 2.76618741037033e-06, "loss": 17.6186, "step": 35998 }, { "epoch": 0.6580327928784251, "grad_norm": 5.738465053827026, "learning_rate": 2.7659225867967053e-06, "loss": 17.2366, "step": 35999 }, { "epoch": 0.6580510720748716, "grad_norm": 6.179599112451247, "learning_rate": 2.7656577710533307e-06, "loss": 17.645, "step": 36000 }, { "epoch": 0.6580693512713182, "grad_norm": 6.159081763861927, "learning_rate": 2.765392963141129e-06, "loss": 17.4741, "step": 36001 }, { "epoch": 0.6580876304677646, "grad_norm": 6.224068007413522, "learning_rate": 2.7651281630610327e-06, "loss": 17.3367, "step": 36002 }, { "epoch": 0.6581059096642111, "grad_norm": 5.239115635447926, "learning_rate": 2.764863370813969e-06, "loss": 17.1139, "step": 36003 }, { "epoch": 0.6581241888606577, "grad_norm": 5.246677409178157, "learning_rate": 2.764598586400863e-06, "loss": 16.9826, "step": 36004 }, { "epoch": 0.6581424680571042, "grad_norm": 6.303097017721754, "learning_rate": 2.7643338098226465e-06, "loss": 17.5586, "step": 36005 }, { "epoch": 0.6581607472535508, "grad_norm": 7.622403469536251, "learning_rate": 2.764069041080244e-06, "loss": 17.8034, "step": 36006 }, { "epoch": 0.6581790264499973, "grad_norm": 6.789522165385635, "learning_rate": 2.763804280174588e-06, "loss": 17.5251, "step": 36007 }, { "epoch": 0.6581973056464437, "grad_norm": 6.866716738003483, "learning_rate": 2.763539527106602e-06, "loss": 17.7595, "step": 36008 }, { "epoch": 0.6582155848428903, "grad_norm": 8.442526871523775, "learning_rate": 2.7632747818772176e-06, "loss": 18.0011, "step": 36009 }, { "epoch": 0.6582338640393368, "grad_norm": 7.818396984346509, "learning_rate": 2.7630100444873587e-06, "loss": 17.9176, "step": 36010 }, { "epoch": 0.6582521432357834, "grad_norm": 5.888954380621548, "learning_rate": 2.7627453149379578e-06, "loss": 17.3927, "step": 36011 }, { "epoch": 0.6582704224322299, "grad_norm": 6.427937230757845, "learning_rate": 2.7624805932299403e-06, "loss": 17.5183, "step": 36012 }, { "epoch": 0.6582887016286764, "grad_norm": 7.001400319978414, "learning_rate": 2.7622158793642327e-06, "loss": 17.8935, "step": 36013 }, { "epoch": 0.658306980825123, "grad_norm": 6.633813978120167, "learning_rate": 2.7619511733417657e-06, "loss": 17.486, "step": 36014 }, { "epoch": 0.6583252600215694, "grad_norm": 9.68199402630305, "learning_rate": 2.761686475163464e-06, "loss": 18.8464, "step": 36015 }, { "epoch": 0.658343539218016, "grad_norm": 8.57955628712022, "learning_rate": 2.7614217848302572e-06, "loss": 18.0817, "step": 36016 }, { "epoch": 0.6583618184144625, "grad_norm": 5.503742185925937, "learning_rate": 2.7611571023430737e-06, "loss": 17.1631, "step": 36017 }, { "epoch": 0.658380097610909, "grad_norm": 5.321676171514128, "learning_rate": 2.7608924277028394e-06, "loss": 17.1432, "step": 36018 }, { "epoch": 0.6583983768073556, "grad_norm": 5.995438858777632, "learning_rate": 2.7606277609104844e-06, "loss": 17.1892, "step": 36019 }, { "epoch": 0.6584166560038021, "grad_norm": 6.42364334898074, "learning_rate": 2.760363101966934e-06, "loss": 17.4625, "step": 36020 }, { "epoch": 0.6584349352002486, "grad_norm": 5.836052486893554, "learning_rate": 2.7600984508731154e-06, "loss": 17.0957, "step": 36021 }, { "epoch": 0.6584532143966951, "grad_norm": 5.846883794560701, "learning_rate": 2.7598338076299597e-06, "loss": 17.3413, "step": 36022 }, { "epoch": 0.6584714935931416, "grad_norm": 5.539362526769212, "learning_rate": 2.7595691722383917e-06, "loss": 17.1653, "step": 36023 }, { "epoch": 0.6584897727895882, "grad_norm": 5.643516655794361, "learning_rate": 2.759304544699337e-06, "loss": 17.1062, "step": 36024 }, { "epoch": 0.6585080519860347, "grad_norm": 6.235543650345715, "learning_rate": 2.7590399250137262e-06, "loss": 17.459, "step": 36025 }, { "epoch": 0.6585263311824813, "grad_norm": 6.292649717606006, "learning_rate": 2.758775313182487e-06, "loss": 17.5664, "step": 36026 }, { "epoch": 0.6585446103789278, "grad_norm": 6.054343899323262, "learning_rate": 2.758510709206545e-06, "loss": 17.4205, "step": 36027 }, { "epoch": 0.6585628895753742, "grad_norm": 6.31870191773234, "learning_rate": 2.7582461130868298e-06, "loss": 17.2177, "step": 36028 }, { "epoch": 0.6585811687718208, "grad_norm": 6.138527672803726, "learning_rate": 2.7579815248242677e-06, "loss": 17.3763, "step": 36029 }, { "epoch": 0.6585994479682673, "grad_norm": 7.157496205582953, "learning_rate": 2.757716944419784e-06, "loss": 17.9065, "step": 36030 }, { "epoch": 0.6586177271647138, "grad_norm": 6.582396011715612, "learning_rate": 2.7574523718743087e-06, "loss": 17.3965, "step": 36031 }, { "epoch": 0.6586360063611604, "grad_norm": 5.694581337365278, "learning_rate": 2.757187807188767e-06, "loss": 17.2653, "step": 36032 }, { "epoch": 0.6586542855576069, "grad_norm": 5.656277425959762, "learning_rate": 2.7569232503640896e-06, "loss": 17.394, "step": 36033 }, { "epoch": 0.6586725647540534, "grad_norm": 6.983328114571086, "learning_rate": 2.756658701401199e-06, "loss": 17.8671, "step": 36034 }, { "epoch": 0.6586908439504999, "grad_norm": 5.040268300591851, "learning_rate": 2.7563941603010257e-06, "loss": 17.0019, "step": 36035 }, { "epoch": 0.6587091231469464, "grad_norm": 7.09587045192818, "learning_rate": 2.7561296270644975e-06, "loss": 17.8287, "step": 36036 }, { "epoch": 0.658727402343393, "grad_norm": 5.092850903583449, "learning_rate": 2.755865101692541e-06, "loss": 16.9299, "step": 36037 }, { "epoch": 0.6587456815398395, "grad_norm": 6.187186873077127, "learning_rate": 2.75560058418608e-06, "loss": 17.4913, "step": 36038 }, { "epoch": 0.6587639607362861, "grad_norm": 7.227281024008961, "learning_rate": 2.755336074546047e-06, "loss": 17.8655, "step": 36039 }, { "epoch": 0.6587822399327325, "grad_norm": 7.378144538827788, "learning_rate": 2.7550715727733647e-06, "loss": 17.7277, "step": 36040 }, { "epoch": 0.658800519129179, "grad_norm": 5.926668283301754, "learning_rate": 2.754807078868961e-06, "loss": 17.2685, "step": 36041 }, { "epoch": 0.6588187983256256, "grad_norm": 5.430976854188656, "learning_rate": 2.7545425928337654e-06, "loss": 17.0158, "step": 36042 }, { "epoch": 0.6588370775220721, "grad_norm": 5.954084974124972, "learning_rate": 2.754278114668701e-06, "loss": 17.1838, "step": 36043 }, { "epoch": 0.6588553567185187, "grad_norm": 6.409805242196165, "learning_rate": 2.7540136443746966e-06, "loss": 17.5489, "step": 36044 }, { "epoch": 0.6588736359149652, "grad_norm": 8.131061617766608, "learning_rate": 2.7537491819526817e-06, "loss": 17.9645, "step": 36045 }, { "epoch": 0.6588919151114117, "grad_norm": 5.807405894707223, "learning_rate": 2.7534847274035813e-06, "loss": 17.4258, "step": 36046 }, { "epoch": 0.6589101943078582, "grad_norm": 7.04992104930754, "learning_rate": 2.7532202807283205e-06, "loss": 17.6255, "step": 36047 }, { "epoch": 0.6589284735043047, "grad_norm": 6.828284962322551, "learning_rate": 2.7529558419278293e-06, "loss": 17.7088, "step": 36048 }, { "epoch": 0.6589467527007513, "grad_norm": 7.15080895579455, "learning_rate": 2.75269141100303e-06, "loss": 17.9607, "step": 36049 }, { "epoch": 0.6589650318971978, "grad_norm": 7.483722607361569, "learning_rate": 2.752426987954855e-06, "loss": 18.1061, "step": 36050 }, { "epoch": 0.6589833110936443, "grad_norm": 5.887108578501877, "learning_rate": 2.7521625727842283e-06, "loss": 17.3292, "step": 36051 }, { "epoch": 0.6590015902900909, "grad_norm": 7.009557031567864, "learning_rate": 2.7518981654920743e-06, "loss": 17.5948, "step": 36052 }, { "epoch": 0.6590198694865373, "grad_norm": 5.49953951888839, "learning_rate": 2.7516337660793224e-06, "loss": 17.2143, "step": 36053 }, { "epoch": 0.6590381486829839, "grad_norm": 5.612419700964222, "learning_rate": 2.7513693745469018e-06, "loss": 17.3941, "step": 36054 }, { "epoch": 0.6590564278794304, "grad_norm": 6.240776725488398, "learning_rate": 2.7511049908957334e-06, "loss": 17.229, "step": 36055 }, { "epoch": 0.6590747070758769, "grad_norm": 7.101365709568865, "learning_rate": 2.7508406151267496e-06, "loss": 17.771, "step": 36056 }, { "epoch": 0.6590929862723235, "grad_norm": 6.214381996745168, "learning_rate": 2.7505762472408736e-06, "loss": 17.3409, "step": 36057 }, { "epoch": 0.65911126546877, "grad_norm": 6.602611549468893, "learning_rate": 2.750311887239031e-06, "loss": 17.6051, "step": 36058 }, { "epoch": 0.6591295446652166, "grad_norm": 5.024143449133883, "learning_rate": 2.7500475351221524e-06, "loss": 16.9573, "step": 36059 }, { "epoch": 0.659147823861663, "grad_norm": 6.822927165531445, "learning_rate": 2.749783190891161e-06, "loss": 17.7173, "step": 36060 }, { "epoch": 0.6591661030581095, "grad_norm": 8.09679324291858, "learning_rate": 2.749518854546983e-06, "loss": 17.9701, "step": 36061 }, { "epoch": 0.6591843822545561, "grad_norm": 7.120460068275151, "learning_rate": 2.749254526090546e-06, "loss": 17.8976, "step": 36062 }, { "epoch": 0.6592026614510026, "grad_norm": 6.104122075762366, "learning_rate": 2.7489902055227762e-06, "loss": 17.2645, "step": 36063 }, { "epoch": 0.6592209406474492, "grad_norm": 5.626593110584072, "learning_rate": 2.7487258928446028e-06, "loss": 17.0559, "step": 36064 }, { "epoch": 0.6592392198438957, "grad_norm": 6.473354569162618, "learning_rate": 2.74846158805695e-06, "loss": 17.5094, "step": 36065 }, { "epoch": 0.6592574990403421, "grad_norm": 6.517710760314867, "learning_rate": 2.7481972911607413e-06, "loss": 17.471, "step": 36066 }, { "epoch": 0.6592757782367887, "grad_norm": 5.898824203322185, "learning_rate": 2.7479330021569082e-06, "loss": 17.1775, "step": 36067 }, { "epoch": 0.6592940574332352, "grad_norm": 5.642733956295278, "learning_rate": 2.747668721046374e-06, "loss": 17.1302, "step": 36068 }, { "epoch": 0.6593123366296818, "grad_norm": 6.375211656460417, "learning_rate": 2.7474044478300642e-06, "loss": 17.3774, "step": 36069 }, { "epoch": 0.6593306158261283, "grad_norm": 6.62489282741297, "learning_rate": 2.7471401825089078e-06, "loss": 17.1292, "step": 36070 }, { "epoch": 0.6593488950225748, "grad_norm": 6.15401466373531, "learning_rate": 2.746875925083828e-06, "loss": 17.3589, "step": 36071 }, { "epoch": 0.6593671742190214, "grad_norm": 7.522348167268968, "learning_rate": 2.746611675555752e-06, "loss": 17.3162, "step": 36072 }, { "epoch": 0.6593854534154678, "grad_norm": 5.951931731244978, "learning_rate": 2.746347433925609e-06, "loss": 17.3998, "step": 36073 }, { "epoch": 0.6594037326119144, "grad_norm": 5.738398153585566, "learning_rate": 2.746083200194323e-06, "loss": 17.3628, "step": 36074 }, { "epoch": 0.6594220118083609, "grad_norm": 5.937309043647466, "learning_rate": 2.7458189743628164e-06, "loss": 17.1992, "step": 36075 }, { "epoch": 0.6594402910048074, "grad_norm": 5.165306145554969, "learning_rate": 2.7455547564320218e-06, "loss": 17.0345, "step": 36076 }, { "epoch": 0.659458570201254, "grad_norm": 6.888779320988753, "learning_rate": 2.7452905464028624e-06, "loss": 17.8567, "step": 36077 }, { "epoch": 0.6594768493977005, "grad_norm": 7.261107104519069, "learning_rate": 2.7450263442762613e-06, "loss": 17.7839, "step": 36078 }, { "epoch": 0.659495128594147, "grad_norm": 6.235072303919005, "learning_rate": 2.7447621500531476e-06, "loss": 17.6056, "step": 36079 }, { "epoch": 0.6595134077905935, "grad_norm": 6.651249544317567, "learning_rate": 2.744497963734446e-06, "loss": 17.3851, "step": 36080 }, { "epoch": 0.65953168698704, "grad_norm": 9.604954039136505, "learning_rate": 2.7442337853210864e-06, "loss": 17.5241, "step": 36081 }, { "epoch": 0.6595499661834866, "grad_norm": 6.511263250201923, "learning_rate": 2.743969614813991e-06, "loss": 17.6003, "step": 36082 }, { "epoch": 0.6595682453799331, "grad_norm": 7.096880909444128, "learning_rate": 2.743705452214085e-06, "loss": 17.9257, "step": 36083 }, { "epoch": 0.6595865245763797, "grad_norm": 6.493593885535301, "learning_rate": 2.7434412975222967e-06, "loss": 17.5223, "step": 36084 }, { "epoch": 0.6596048037728262, "grad_norm": 7.653588437945635, "learning_rate": 2.743177150739551e-06, "loss": 17.6467, "step": 36085 }, { "epoch": 0.6596230829692726, "grad_norm": 5.4699505253698675, "learning_rate": 2.7429130118667725e-06, "loss": 16.9942, "step": 36086 }, { "epoch": 0.6596413621657192, "grad_norm": 6.451592900654088, "learning_rate": 2.742648880904889e-06, "loss": 17.7551, "step": 36087 }, { "epoch": 0.6596596413621657, "grad_norm": 5.168545402830145, "learning_rate": 2.7423847578548234e-06, "loss": 16.9231, "step": 36088 }, { "epoch": 0.6596779205586123, "grad_norm": 6.940135353348518, "learning_rate": 2.742120642717504e-06, "loss": 17.5323, "step": 36089 }, { "epoch": 0.6596961997550588, "grad_norm": 6.196574115342829, "learning_rate": 2.7418565354938574e-06, "loss": 17.6283, "step": 36090 }, { "epoch": 0.6597144789515053, "grad_norm": 8.64498177504642, "learning_rate": 2.741592436184807e-06, "loss": 17.3535, "step": 36091 }, { "epoch": 0.6597327581479518, "grad_norm": 8.622987390533618, "learning_rate": 2.7413283447912785e-06, "loss": 18.2868, "step": 36092 }, { "epoch": 0.6597510373443983, "grad_norm": 7.371413990761431, "learning_rate": 2.741064261314199e-06, "loss": 17.9374, "step": 36093 }, { "epoch": 0.6597693165408449, "grad_norm": 6.221085411147714, "learning_rate": 2.740800185754492e-06, "loss": 17.5384, "step": 36094 }, { "epoch": 0.6597875957372914, "grad_norm": 5.662143440571798, "learning_rate": 2.7405361181130863e-06, "loss": 17.3231, "step": 36095 }, { "epoch": 0.6598058749337379, "grad_norm": 6.126427445159387, "learning_rate": 2.7402720583909048e-06, "loss": 17.3663, "step": 36096 }, { "epoch": 0.6598241541301845, "grad_norm": 5.365158000505773, "learning_rate": 2.740008006588872e-06, "loss": 17.0023, "step": 36097 }, { "epoch": 0.659842433326631, "grad_norm": 4.775822537174924, "learning_rate": 2.739743962707915e-06, "loss": 16.7647, "step": 36098 }, { "epoch": 0.6598607125230774, "grad_norm": 7.067986487018007, "learning_rate": 2.7394799267489614e-06, "loss": 17.5709, "step": 36099 }, { "epoch": 0.659878991719524, "grad_norm": 6.274130123637771, "learning_rate": 2.7392158987129326e-06, "loss": 17.3372, "step": 36100 }, { "epoch": 0.6598972709159705, "grad_norm": 5.521555680453527, "learning_rate": 2.7389518786007574e-06, "loss": 17.2656, "step": 36101 }, { "epoch": 0.6599155501124171, "grad_norm": 5.889911619974348, "learning_rate": 2.7386878664133597e-06, "loss": 17.4239, "step": 36102 }, { "epoch": 0.6599338293088636, "grad_norm": 5.824925201753205, "learning_rate": 2.7384238621516636e-06, "loss": 17.2756, "step": 36103 }, { "epoch": 0.65995210850531, "grad_norm": 5.306713939579018, "learning_rate": 2.7381598658165964e-06, "loss": 16.9251, "step": 36104 }, { "epoch": 0.6599703877017566, "grad_norm": 7.478369010090673, "learning_rate": 2.7378958774090837e-06, "loss": 17.6887, "step": 36105 }, { "epoch": 0.6599886668982031, "grad_norm": 7.054903922175702, "learning_rate": 2.7376318969300468e-06, "loss": 17.6544, "step": 36106 }, { "epoch": 0.6600069460946497, "grad_norm": 4.640863816038763, "learning_rate": 2.7373679243804143e-06, "loss": 16.8753, "step": 36107 }, { "epoch": 0.6600252252910962, "grad_norm": 7.8485809496932575, "learning_rate": 2.737103959761111e-06, "loss": 18.0886, "step": 36108 }, { "epoch": 0.6600435044875427, "grad_norm": 6.244884807226446, "learning_rate": 2.7368400030730635e-06, "loss": 17.3974, "step": 36109 }, { "epoch": 0.6600617836839893, "grad_norm": 5.521549426359424, "learning_rate": 2.736576054317196e-06, "loss": 17.193, "step": 36110 }, { "epoch": 0.6600800628804357, "grad_norm": 6.491769952392931, "learning_rate": 2.7363121134944302e-06, "loss": 17.4406, "step": 36111 }, { "epoch": 0.6600983420768823, "grad_norm": 8.28101875887025, "learning_rate": 2.7360481806056968e-06, "loss": 17.9873, "step": 36112 }, { "epoch": 0.6601166212733288, "grad_norm": 5.907452827579115, "learning_rate": 2.7357842556519175e-06, "loss": 17.3823, "step": 36113 }, { "epoch": 0.6601349004697753, "grad_norm": 7.988028246068701, "learning_rate": 2.7355203386340162e-06, "loss": 18.3055, "step": 36114 }, { "epoch": 0.6601531796662219, "grad_norm": 8.439573220945016, "learning_rate": 2.7352564295529216e-06, "loss": 18.5913, "step": 36115 }, { "epoch": 0.6601714588626684, "grad_norm": 6.793396436274933, "learning_rate": 2.7349925284095545e-06, "loss": 17.6174, "step": 36116 }, { "epoch": 0.660189738059115, "grad_norm": 6.695570021450926, "learning_rate": 2.734728635204842e-06, "loss": 17.4267, "step": 36117 }, { "epoch": 0.6602080172555614, "grad_norm": 5.742236920619035, "learning_rate": 2.7344647499397117e-06, "loss": 17.2862, "step": 36118 }, { "epoch": 0.6602262964520079, "grad_norm": 7.467974112204297, "learning_rate": 2.7342008726150846e-06, "loss": 17.8341, "step": 36119 }, { "epoch": 0.6602445756484545, "grad_norm": 6.114881506032483, "learning_rate": 2.733937003231886e-06, "loss": 17.3694, "step": 36120 }, { "epoch": 0.660262854844901, "grad_norm": 4.800643384503244, "learning_rate": 2.7336731417910433e-06, "loss": 17.0263, "step": 36121 }, { "epoch": 0.6602811340413476, "grad_norm": 4.995782974375978, "learning_rate": 2.7334092882934793e-06, "loss": 17.0146, "step": 36122 }, { "epoch": 0.6602994132377941, "grad_norm": 5.980037589315146, "learning_rate": 2.733145442740117e-06, "loss": 17.2427, "step": 36123 }, { "epoch": 0.6603176924342405, "grad_norm": 4.787467301124577, "learning_rate": 2.7328816051318858e-06, "loss": 16.935, "step": 36124 }, { "epoch": 0.6603359716306871, "grad_norm": 7.338341590023678, "learning_rate": 2.7326177754697057e-06, "loss": 17.6113, "step": 36125 }, { "epoch": 0.6603542508271336, "grad_norm": 5.5411962219183, "learning_rate": 2.732353953754503e-06, "loss": 17.0269, "step": 36126 }, { "epoch": 0.6603725300235802, "grad_norm": 6.233856877934919, "learning_rate": 2.732090139987205e-06, "loss": 17.4071, "step": 36127 }, { "epoch": 0.6603908092200267, "grad_norm": 5.193666816062576, "learning_rate": 2.731826334168733e-06, "loss": 16.8967, "step": 36128 }, { "epoch": 0.6604090884164732, "grad_norm": 8.021724025733104, "learning_rate": 2.7315625363000143e-06, "loss": 18.0444, "step": 36129 }, { "epoch": 0.6604273676129198, "grad_norm": 5.913631372560514, "learning_rate": 2.731298746381972e-06, "loss": 17.1548, "step": 36130 }, { "epoch": 0.6604456468093662, "grad_norm": 4.845038314267458, "learning_rate": 2.731034964415529e-06, "loss": 16.8983, "step": 36131 }, { "epoch": 0.6604639260058128, "grad_norm": 6.174635216130794, "learning_rate": 2.730771190401613e-06, "loss": 17.252, "step": 36132 }, { "epoch": 0.6604822052022593, "grad_norm": 6.237773602753806, "learning_rate": 2.7305074243411457e-06, "loss": 17.2635, "step": 36133 }, { "epoch": 0.6605004843987058, "grad_norm": 6.501328689493936, "learning_rate": 2.7302436662350545e-06, "loss": 17.2853, "step": 36134 }, { "epoch": 0.6605187635951524, "grad_norm": 5.202892554860985, "learning_rate": 2.7299799160842613e-06, "loss": 16.8184, "step": 36135 }, { "epoch": 0.6605370427915989, "grad_norm": 5.186564815760942, "learning_rate": 2.729716173889692e-06, "loss": 16.9375, "step": 36136 }, { "epoch": 0.6605553219880455, "grad_norm": 6.139031326982394, "learning_rate": 2.7294524396522694e-06, "loss": 17.3527, "step": 36137 }, { "epoch": 0.6605736011844919, "grad_norm": 5.991379498010104, "learning_rate": 2.7291887133729207e-06, "loss": 17.5021, "step": 36138 }, { "epoch": 0.6605918803809384, "grad_norm": 6.152329178097594, "learning_rate": 2.7289249950525664e-06, "loss": 17.5165, "step": 36139 }, { "epoch": 0.660610159577385, "grad_norm": 5.630401741657838, "learning_rate": 2.7286612846921356e-06, "loss": 17.0582, "step": 36140 }, { "epoch": 0.6606284387738315, "grad_norm": 6.192421865438791, "learning_rate": 2.728397582292549e-06, "loss": 17.4508, "step": 36141 }, { "epoch": 0.6606467179702781, "grad_norm": 5.418739513378772, "learning_rate": 2.7281338878547303e-06, "loss": 17.0968, "step": 36142 }, { "epoch": 0.6606649971667246, "grad_norm": 7.345189484512439, "learning_rate": 2.727870201379605e-06, "loss": 18.08, "step": 36143 }, { "epoch": 0.660683276363171, "grad_norm": 5.8392234666206955, "learning_rate": 2.727606522868099e-06, "loss": 17.198, "step": 36144 }, { "epoch": 0.6607015555596176, "grad_norm": 6.933825153126433, "learning_rate": 2.7273428523211345e-06, "loss": 17.4898, "step": 36145 }, { "epoch": 0.6607198347560641, "grad_norm": 8.323344472263063, "learning_rate": 2.7270791897396363e-06, "loss": 17.587, "step": 36146 }, { "epoch": 0.6607381139525107, "grad_norm": 6.342430534953842, "learning_rate": 2.7268155351245295e-06, "loss": 17.5132, "step": 36147 }, { "epoch": 0.6607563931489572, "grad_norm": 5.941803054063052, "learning_rate": 2.7265518884767345e-06, "loss": 17.2567, "step": 36148 }, { "epoch": 0.6607746723454037, "grad_norm": 6.254631600366222, "learning_rate": 2.72628824979718e-06, "loss": 17.3861, "step": 36149 }, { "epoch": 0.6607929515418502, "grad_norm": 5.607435664101476, "learning_rate": 2.7260246190867877e-06, "loss": 17.0604, "step": 36150 }, { "epoch": 0.6608112307382967, "grad_norm": 7.944026740141569, "learning_rate": 2.72576099634648e-06, "loss": 17.8527, "step": 36151 }, { "epoch": 0.6608295099347433, "grad_norm": 5.894233021621141, "learning_rate": 2.7254973815771824e-06, "loss": 17.1673, "step": 36152 }, { "epoch": 0.6608477891311898, "grad_norm": 6.897191946790827, "learning_rate": 2.725233774779821e-06, "loss": 17.4134, "step": 36153 }, { "epoch": 0.6608660683276363, "grad_norm": 5.984710603123897, "learning_rate": 2.724970175955316e-06, "loss": 17.1199, "step": 36154 }, { "epoch": 0.6608843475240829, "grad_norm": 5.549669730491064, "learning_rate": 2.724706585104595e-06, "loss": 17.1741, "step": 36155 }, { "epoch": 0.6609026267205294, "grad_norm": 5.751046488222435, "learning_rate": 2.7244430022285782e-06, "loss": 17.0943, "step": 36156 }, { "epoch": 0.6609209059169759, "grad_norm": 6.052356678276168, "learning_rate": 2.7241794273281936e-06, "loss": 17.3519, "step": 36157 }, { "epoch": 0.6609391851134224, "grad_norm": 5.909114262262815, "learning_rate": 2.723915860404362e-06, "loss": 17.2432, "step": 36158 }, { "epoch": 0.6609574643098689, "grad_norm": 5.279470824025264, "learning_rate": 2.7236523014580063e-06, "loss": 17.2176, "step": 36159 }, { "epoch": 0.6609757435063155, "grad_norm": 5.714172115476262, "learning_rate": 2.723388750490053e-06, "loss": 16.9694, "step": 36160 }, { "epoch": 0.660994022702762, "grad_norm": 5.8183971539026995, "learning_rate": 2.723125207501423e-06, "loss": 17.293, "step": 36161 }, { "epoch": 0.6610123018992086, "grad_norm": 7.227950741037821, "learning_rate": 2.722861672493042e-06, "loss": 17.3814, "step": 36162 }, { "epoch": 0.661030581095655, "grad_norm": 7.515804981520265, "learning_rate": 2.7225981454658346e-06, "loss": 17.5223, "step": 36163 }, { "epoch": 0.6610488602921015, "grad_norm": 5.575450256458076, "learning_rate": 2.7223346264207227e-06, "loss": 17.014, "step": 36164 }, { "epoch": 0.6610671394885481, "grad_norm": 7.698566574245299, "learning_rate": 2.722071115358629e-06, "loss": 18.0875, "step": 36165 }, { "epoch": 0.6610854186849946, "grad_norm": 6.0611078078824026, "learning_rate": 2.7218076122804806e-06, "loss": 17.3095, "step": 36166 }, { "epoch": 0.6611036978814411, "grad_norm": 5.2233524323395, "learning_rate": 2.721544117187198e-06, "loss": 17.1668, "step": 36167 }, { "epoch": 0.6611219770778877, "grad_norm": 7.681791002166191, "learning_rate": 2.7212806300797045e-06, "loss": 18.0643, "step": 36168 }, { "epoch": 0.6611402562743341, "grad_norm": 5.288173026797432, "learning_rate": 2.721017150958926e-06, "loss": 17.1104, "step": 36169 }, { "epoch": 0.6611585354707807, "grad_norm": 7.279180113109348, "learning_rate": 2.7207536798257826e-06, "loss": 17.7616, "step": 36170 }, { "epoch": 0.6611768146672272, "grad_norm": 6.5597479980495885, "learning_rate": 2.720490216681201e-06, "loss": 17.3531, "step": 36171 }, { "epoch": 0.6611950938636737, "grad_norm": 9.213022771778274, "learning_rate": 2.7202267615261034e-06, "loss": 17.2947, "step": 36172 }, { "epoch": 0.6612133730601203, "grad_norm": 6.403636380274495, "learning_rate": 2.7199633143614124e-06, "loss": 17.4851, "step": 36173 }, { "epoch": 0.6612316522565668, "grad_norm": 5.647634895669714, "learning_rate": 2.7196998751880545e-06, "loss": 17.117, "step": 36174 }, { "epoch": 0.6612499314530134, "grad_norm": 4.306224662862267, "learning_rate": 2.7194364440069497e-06, "loss": 16.5217, "step": 36175 }, { "epoch": 0.6612682106494598, "grad_norm": 5.7871387793525635, "learning_rate": 2.7191730208190204e-06, "loss": 17.0668, "step": 36176 }, { "epoch": 0.6612864898459063, "grad_norm": 5.774763785734998, "learning_rate": 2.718909605625194e-06, "loss": 17.3128, "step": 36177 }, { "epoch": 0.6613047690423529, "grad_norm": 6.170532046286767, "learning_rate": 2.718646198426391e-06, "loss": 17.5472, "step": 36178 }, { "epoch": 0.6613230482387994, "grad_norm": 7.399761967100039, "learning_rate": 2.718382799223534e-06, "loss": 18.0035, "step": 36179 }, { "epoch": 0.661341327435246, "grad_norm": 6.030382049953112, "learning_rate": 2.7181194080175467e-06, "loss": 17.1668, "step": 36180 }, { "epoch": 0.6613596066316925, "grad_norm": 5.724101548194946, "learning_rate": 2.717856024809355e-06, "loss": 17.3325, "step": 36181 }, { "epoch": 0.661377885828139, "grad_norm": 6.141810252421374, "learning_rate": 2.717592649599878e-06, "loss": 17.2616, "step": 36182 }, { "epoch": 0.6613961650245855, "grad_norm": 5.4443699981845555, "learning_rate": 2.717329282390042e-06, "loss": 17.1855, "step": 36183 }, { "epoch": 0.661414444221032, "grad_norm": 5.647743697815433, "learning_rate": 2.7170659231807695e-06, "loss": 17.0584, "step": 36184 }, { "epoch": 0.6614327234174786, "grad_norm": 6.4549405759988945, "learning_rate": 2.716802571972981e-06, "loss": 17.602, "step": 36185 }, { "epoch": 0.6614510026139251, "grad_norm": 5.304581567482911, "learning_rate": 2.716539228767603e-06, "loss": 16.8966, "step": 36186 }, { "epoch": 0.6614692818103716, "grad_norm": 6.872764453883427, "learning_rate": 2.7162758935655553e-06, "loss": 17.5495, "step": 36187 }, { "epoch": 0.6614875610068182, "grad_norm": 7.518949635917526, "learning_rate": 2.716012566367764e-06, "loss": 17.7375, "step": 36188 }, { "epoch": 0.6615058402032646, "grad_norm": 6.831883486953042, "learning_rate": 2.7157492471751483e-06, "loss": 17.5285, "step": 36189 }, { "epoch": 0.6615241193997112, "grad_norm": 6.225222499081382, "learning_rate": 2.715485935988633e-06, "loss": 17.3514, "step": 36190 }, { "epoch": 0.6615423985961577, "grad_norm": 7.300435848175231, "learning_rate": 2.715222632809144e-06, "loss": 17.7748, "step": 36191 }, { "epoch": 0.6615606777926042, "grad_norm": 6.729629661801126, "learning_rate": 2.7149593376376015e-06, "loss": 17.3319, "step": 36192 }, { "epoch": 0.6615789569890508, "grad_norm": 5.951138036564616, "learning_rate": 2.7146960504749253e-06, "loss": 17.0755, "step": 36193 }, { "epoch": 0.6615972361854973, "grad_norm": 5.475802755439888, "learning_rate": 2.7144327713220442e-06, "loss": 17.071, "step": 36194 }, { "epoch": 0.6616155153819439, "grad_norm": 8.188570460608478, "learning_rate": 2.7141695001798773e-06, "loss": 18.2325, "step": 36195 }, { "epoch": 0.6616337945783903, "grad_norm": 5.40915701523966, "learning_rate": 2.7139062370493465e-06, "loss": 17.1186, "step": 36196 }, { "epoch": 0.6616520737748368, "grad_norm": 4.970256451273029, "learning_rate": 2.7136429819313747e-06, "loss": 16.9552, "step": 36197 }, { "epoch": 0.6616703529712834, "grad_norm": 5.465854787843744, "learning_rate": 2.7133797348268893e-06, "loss": 17.1767, "step": 36198 }, { "epoch": 0.6616886321677299, "grad_norm": 5.637758026308032, "learning_rate": 2.7131164957368066e-06, "loss": 16.7925, "step": 36199 }, { "epoch": 0.6617069113641765, "grad_norm": 6.86923142577998, "learning_rate": 2.7128532646620543e-06, "loss": 17.8347, "step": 36200 }, { "epoch": 0.661725190560623, "grad_norm": 7.981197608521955, "learning_rate": 2.712590041603551e-06, "loss": 18.0213, "step": 36201 }, { "epoch": 0.6617434697570694, "grad_norm": 8.208906048818942, "learning_rate": 2.712326826562223e-06, "loss": 17.893, "step": 36202 }, { "epoch": 0.661761748953516, "grad_norm": 5.504797071923233, "learning_rate": 2.7120636195389917e-06, "loss": 16.9332, "step": 36203 }, { "epoch": 0.6617800281499625, "grad_norm": 6.490494555334618, "learning_rate": 2.711800420534776e-06, "loss": 17.6373, "step": 36204 }, { "epoch": 0.6617983073464091, "grad_norm": 5.655737853917908, "learning_rate": 2.711537229550503e-06, "loss": 16.97, "step": 36205 }, { "epoch": 0.6618165865428556, "grad_norm": 6.575467858949959, "learning_rate": 2.7112740465870924e-06, "loss": 17.512, "step": 36206 }, { "epoch": 0.6618348657393021, "grad_norm": 7.964875311241718, "learning_rate": 2.7110108716454663e-06, "loss": 17.6575, "step": 36207 }, { "epoch": 0.6618531449357486, "grad_norm": 5.913364733876295, "learning_rate": 2.710747704726552e-06, "loss": 17.1629, "step": 36208 }, { "epoch": 0.6618714241321951, "grad_norm": 6.160604315234678, "learning_rate": 2.7104845458312674e-06, "loss": 17.3972, "step": 36209 }, { "epoch": 0.6618897033286417, "grad_norm": 6.7596149762212026, "learning_rate": 2.710221394960534e-06, "loss": 17.7612, "step": 36210 }, { "epoch": 0.6619079825250882, "grad_norm": 4.446664983570773, "learning_rate": 2.709958252115278e-06, "loss": 16.66, "step": 36211 }, { "epoch": 0.6619262617215347, "grad_norm": 5.133848240066889, "learning_rate": 2.7096951172964192e-06, "loss": 17.0475, "step": 36212 }, { "epoch": 0.6619445409179813, "grad_norm": 6.3147922490135935, "learning_rate": 2.7094319905048787e-06, "loss": 17.1761, "step": 36213 }, { "epoch": 0.6619628201144278, "grad_norm": 6.180049519891084, "learning_rate": 2.7091688717415825e-06, "loss": 17.3313, "step": 36214 }, { "epoch": 0.6619810993108743, "grad_norm": 6.5207581699933055, "learning_rate": 2.708905761007449e-06, "loss": 17.4726, "step": 36215 }, { "epoch": 0.6619993785073208, "grad_norm": 6.507948339592554, "learning_rate": 2.708642658303401e-06, "loss": 17.6157, "step": 36216 }, { "epoch": 0.6620176577037673, "grad_norm": 5.717397054155293, "learning_rate": 2.7083795636303635e-06, "loss": 17.1996, "step": 36217 }, { "epoch": 0.6620359369002139, "grad_norm": 6.028194523923729, "learning_rate": 2.7081164769892553e-06, "loss": 17.4242, "step": 36218 }, { "epoch": 0.6620542160966604, "grad_norm": 7.221076113798918, "learning_rate": 2.707853398381002e-06, "loss": 17.8497, "step": 36219 }, { "epoch": 0.662072495293107, "grad_norm": 5.999496591873118, "learning_rate": 2.7075903278065235e-06, "loss": 17.3301, "step": 36220 }, { "epoch": 0.6620907744895534, "grad_norm": 5.985785134523708, "learning_rate": 2.7073272652667404e-06, "loss": 17.4343, "step": 36221 }, { "epoch": 0.6621090536859999, "grad_norm": 6.117685855146058, "learning_rate": 2.7070642107625777e-06, "loss": 17.4509, "step": 36222 }, { "epoch": 0.6621273328824465, "grad_norm": 5.477728059059009, "learning_rate": 2.7068011642949564e-06, "loss": 17.091, "step": 36223 }, { "epoch": 0.662145612078893, "grad_norm": 8.715592998349225, "learning_rate": 2.706538125864796e-06, "loss": 18.1424, "step": 36224 }, { "epoch": 0.6621638912753396, "grad_norm": 8.10511721513986, "learning_rate": 2.70627509547302e-06, "loss": 18.0934, "step": 36225 }, { "epoch": 0.6621821704717861, "grad_norm": 7.124052444899084, "learning_rate": 2.706012073120553e-06, "loss": 17.951, "step": 36226 }, { "epoch": 0.6622004496682325, "grad_norm": 5.994682949675803, "learning_rate": 2.7057490588083126e-06, "loss": 17.3839, "step": 36227 }, { "epoch": 0.6622187288646791, "grad_norm": 6.308336367347307, "learning_rate": 2.705486052537225e-06, "loss": 17.6087, "step": 36228 }, { "epoch": 0.6622370080611256, "grad_norm": 6.397316498516374, "learning_rate": 2.7052230543082094e-06, "loss": 17.3905, "step": 36229 }, { "epoch": 0.6622552872575722, "grad_norm": 5.429651044735388, "learning_rate": 2.7049600641221853e-06, "loss": 16.9353, "step": 36230 }, { "epoch": 0.6622735664540187, "grad_norm": 5.200025172432818, "learning_rate": 2.70469708198008e-06, "loss": 16.9159, "step": 36231 }, { "epoch": 0.6622918456504652, "grad_norm": 6.4900552220141225, "learning_rate": 2.7044341078828096e-06, "loss": 17.5432, "step": 36232 }, { "epoch": 0.6623101248469118, "grad_norm": 7.227640311964082, "learning_rate": 2.704171141831301e-06, "loss": 17.7161, "step": 36233 }, { "epoch": 0.6623284040433582, "grad_norm": 6.4918808927494185, "learning_rate": 2.703908183826471e-06, "loss": 17.5982, "step": 36234 }, { "epoch": 0.6623466832398047, "grad_norm": 5.886333872944625, "learning_rate": 2.7036452338692433e-06, "loss": 17.1405, "step": 36235 }, { "epoch": 0.6623649624362513, "grad_norm": 5.509159240579649, "learning_rate": 2.703382291960542e-06, "loss": 17.2415, "step": 36236 }, { "epoch": 0.6623832416326978, "grad_norm": 5.446655086138654, "learning_rate": 2.7031193581012866e-06, "loss": 17.1865, "step": 36237 }, { "epoch": 0.6624015208291444, "grad_norm": 7.116132128192216, "learning_rate": 2.7028564322923965e-06, "loss": 17.6057, "step": 36238 }, { "epoch": 0.6624198000255909, "grad_norm": 7.186874382729339, "learning_rate": 2.702593514534797e-06, "loss": 17.6107, "step": 36239 }, { "epoch": 0.6624380792220373, "grad_norm": 6.425592024647753, "learning_rate": 2.7023306048294083e-06, "loss": 17.2553, "step": 36240 }, { "epoch": 0.6624563584184839, "grad_norm": 4.901059503470191, "learning_rate": 2.702067703177149e-06, "loss": 17.0254, "step": 36241 }, { "epoch": 0.6624746376149304, "grad_norm": 7.31510453606689, "learning_rate": 2.701804809578945e-06, "loss": 17.7422, "step": 36242 }, { "epoch": 0.662492916811377, "grad_norm": 6.129989742111921, "learning_rate": 2.7015419240357142e-06, "loss": 17.0823, "step": 36243 }, { "epoch": 0.6625111960078235, "grad_norm": 6.688438372736457, "learning_rate": 2.7012790465483796e-06, "loss": 17.5672, "step": 36244 }, { "epoch": 0.66252947520427, "grad_norm": 5.937895354129945, "learning_rate": 2.701016177117864e-06, "loss": 17.2263, "step": 36245 }, { "epoch": 0.6625477544007166, "grad_norm": 7.000917225528942, "learning_rate": 2.7007533157450872e-06, "loss": 17.6448, "step": 36246 }, { "epoch": 0.662566033597163, "grad_norm": 7.003943765332747, "learning_rate": 2.7004904624309692e-06, "loss": 17.7934, "step": 36247 }, { "epoch": 0.6625843127936096, "grad_norm": 7.148309081773409, "learning_rate": 2.7002276171764337e-06, "loss": 17.7352, "step": 36248 }, { "epoch": 0.6626025919900561, "grad_norm": 7.310135709923361, "learning_rate": 2.699964779982399e-06, "loss": 17.7021, "step": 36249 }, { "epoch": 0.6626208711865026, "grad_norm": 6.0366063318065235, "learning_rate": 2.6997019508497903e-06, "loss": 17.2359, "step": 36250 }, { "epoch": 0.6626391503829492, "grad_norm": 6.297045754895677, "learning_rate": 2.699439129779527e-06, "loss": 17.3487, "step": 36251 }, { "epoch": 0.6626574295793957, "grad_norm": 6.73049294860792, "learning_rate": 2.699176316772527e-06, "loss": 17.8025, "step": 36252 }, { "epoch": 0.6626757087758423, "grad_norm": 6.725735189256676, "learning_rate": 2.6989135118297155e-06, "loss": 17.5351, "step": 36253 }, { "epoch": 0.6626939879722887, "grad_norm": 5.9772511497978185, "learning_rate": 2.6986507149520136e-06, "loss": 17.2133, "step": 36254 }, { "epoch": 0.6627122671687352, "grad_norm": 5.143093996712777, "learning_rate": 2.69838792614034e-06, "loss": 17.082, "step": 36255 }, { "epoch": 0.6627305463651818, "grad_norm": 9.0546438317405, "learning_rate": 2.6981251453956183e-06, "loss": 18.2796, "step": 36256 }, { "epoch": 0.6627488255616283, "grad_norm": 6.808465570948087, "learning_rate": 2.697862372718768e-06, "loss": 17.4683, "step": 36257 }, { "epoch": 0.6627671047580749, "grad_norm": 4.982779372273801, "learning_rate": 2.6975996081107094e-06, "loss": 17.0697, "step": 36258 }, { "epoch": 0.6627853839545214, "grad_norm": 6.141225298956436, "learning_rate": 2.697336851572365e-06, "loss": 17.6683, "step": 36259 }, { "epoch": 0.6628036631509678, "grad_norm": 6.565623975309563, "learning_rate": 2.6970741031046545e-06, "loss": 17.6381, "step": 36260 }, { "epoch": 0.6628219423474144, "grad_norm": 6.319256345789547, "learning_rate": 2.696811362708501e-06, "loss": 17.3119, "step": 36261 }, { "epoch": 0.6628402215438609, "grad_norm": 6.092001178310529, "learning_rate": 2.6965486303848214e-06, "loss": 17.2658, "step": 36262 }, { "epoch": 0.6628585007403075, "grad_norm": 4.968876581751641, "learning_rate": 2.696285906134539e-06, "loss": 16.6951, "step": 36263 }, { "epoch": 0.662876779936754, "grad_norm": 5.60322592598743, "learning_rate": 2.696023189958577e-06, "loss": 17.1477, "step": 36264 }, { "epoch": 0.6628950591332005, "grad_norm": 5.50860012172259, "learning_rate": 2.6957604818578536e-06, "loss": 17.1012, "step": 36265 }, { "epoch": 0.662913338329647, "grad_norm": 7.237493086309574, "learning_rate": 2.695497781833287e-06, "loss": 17.6091, "step": 36266 }, { "epoch": 0.6629316175260935, "grad_norm": 5.823801202491071, "learning_rate": 2.695235089885804e-06, "loss": 17.1133, "step": 36267 }, { "epoch": 0.6629498967225401, "grad_norm": 7.164016660980564, "learning_rate": 2.694972406016322e-06, "loss": 17.6092, "step": 36268 }, { "epoch": 0.6629681759189866, "grad_norm": 5.011434661745264, "learning_rate": 2.6947097302257586e-06, "loss": 16.8727, "step": 36269 }, { "epoch": 0.6629864551154331, "grad_norm": 5.245519902006697, "learning_rate": 2.694447062515038e-06, "loss": 16.9847, "step": 36270 }, { "epoch": 0.6630047343118797, "grad_norm": 8.367774251630946, "learning_rate": 2.694184402885083e-06, "loss": 18.0134, "step": 36271 }, { "epoch": 0.6630230135083262, "grad_norm": 6.124830933416807, "learning_rate": 2.6939217513368086e-06, "loss": 16.9458, "step": 36272 }, { "epoch": 0.6630412927047727, "grad_norm": 6.686620830852303, "learning_rate": 2.6936591078711405e-06, "loss": 17.3579, "step": 36273 }, { "epoch": 0.6630595719012192, "grad_norm": 7.042738738555284, "learning_rate": 2.693396472488998e-06, "loss": 17.544, "step": 36274 }, { "epoch": 0.6630778510976657, "grad_norm": 5.986821543129123, "learning_rate": 2.693133845191298e-06, "loss": 17.162, "step": 36275 }, { "epoch": 0.6630961302941123, "grad_norm": 6.422258705448121, "learning_rate": 2.6928712259789657e-06, "loss": 17.5511, "step": 36276 }, { "epoch": 0.6631144094905588, "grad_norm": 6.886152254083987, "learning_rate": 2.6926086148529173e-06, "loss": 17.6913, "step": 36277 }, { "epoch": 0.6631326886870054, "grad_norm": 6.1309470733824, "learning_rate": 2.6923460118140776e-06, "loss": 17.7733, "step": 36278 }, { "epoch": 0.6631509678834518, "grad_norm": 6.769117412417167, "learning_rate": 2.6920834168633637e-06, "loss": 17.8532, "step": 36279 }, { "epoch": 0.6631692470798983, "grad_norm": 6.512882076817214, "learning_rate": 2.6918208300016968e-06, "loss": 17.5439, "step": 36280 }, { "epoch": 0.6631875262763449, "grad_norm": 7.228757561268776, "learning_rate": 2.6915582512299987e-06, "loss": 17.7423, "step": 36281 }, { "epoch": 0.6632058054727914, "grad_norm": 6.202052886720385, "learning_rate": 2.69129568054919e-06, "loss": 17.3545, "step": 36282 }, { "epoch": 0.663224084669238, "grad_norm": 5.276292410502595, "learning_rate": 2.6910331179601863e-06, "loss": 16.9775, "step": 36283 }, { "epoch": 0.6632423638656845, "grad_norm": 6.508047583583948, "learning_rate": 2.6907705634639138e-06, "loss": 17.3029, "step": 36284 }, { "epoch": 0.663260643062131, "grad_norm": 6.6539004962344785, "learning_rate": 2.6905080170612897e-06, "loss": 17.8325, "step": 36285 }, { "epoch": 0.6632789222585775, "grad_norm": 5.2029282765062055, "learning_rate": 2.6902454787532326e-06, "loss": 17.1296, "step": 36286 }, { "epoch": 0.663297201455024, "grad_norm": 7.173119486945097, "learning_rate": 2.6899829485406675e-06, "loss": 17.7405, "step": 36287 }, { "epoch": 0.6633154806514706, "grad_norm": 5.619286820947096, "learning_rate": 2.6897204264245092e-06, "loss": 17.1139, "step": 36288 }, { "epoch": 0.6633337598479171, "grad_norm": 7.594009696618477, "learning_rate": 2.68945791240568e-06, "loss": 18.0271, "step": 36289 }, { "epoch": 0.6633520390443636, "grad_norm": 8.47311100757151, "learning_rate": 2.6891954064851026e-06, "loss": 18.5611, "step": 36290 }, { "epoch": 0.6633703182408102, "grad_norm": 7.004484628036813, "learning_rate": 2.688932908663695e-06, "loss": 17.356, "step": 36291 }, { "epoch": 0.6633885974372566, "grad_norm": 6.614280367850914, "learning_rate": 2.6886704189423752e-06, "loss": 17.4143, "step": 36292 }, { "epoch": 0.6634068766337032, "grad_norm": 6.597537787485634, "learning_rate": 2.6884079373220663e-06, "loss": 17.8234, "step": 36293 }, { "epoch": 0.6634251558301497, "grad_norm": 5.412839600364825, "learning_rate": 2.688145463803685e-06, "loss": 17.0636, "step": 36294 }, { "epoch": 0.6634434350265962, "grad_norm": 4.96758760638603, "learning_rate": 2.687882998388156e-06, "loss": 16.8768, "step": 36295 }, { "epoch": 0.6634617142230428, "grad_norm": 5.834160428820746, "learning_rate": 2.6876205410763958e-06, "loss": 17.3741, "step": 36296 }, { "epoch": 0.6634799934194893, "grad_norm": 5.80296554980814, "learning_rate": 2.687358091869323e-06, "loss": 17.3691, "step": 36297 }, { "epoch": 0.6634982726159359, "grad_norm": 7.695654224447547, "learning_rate": 2.6870956507678597e-06, "loss": 17.9047, "step": 36298 }, { "epoch": 0.6635165518123823, "grad_norm": 5.115102498229821, "learning_rate": 2.6868332177729273e-06, "loss": 16.9923, "step": 36299 }, { "epoch": 0.6635348310088288, "grad_norm": 6.1189596052599144, "learning_rate": 2.686570792885442e-06, "loss": 17.3443, "step": 36300 }, { "epoch": 0.6635531102052754, "grad_norm": 5.612891645000726, "learning_rate": 2.6863083761063267e-06, "loss": 17.1854, "step": 36301 }, { "epoch": 0.6635713894017219, "grad_norm": 6.0980005614275665, "learning_rate": 2.6860459674365004e-06, "loss": 17.4112, "step": 36302 }, { "epoch": 0.6635896685981684, "grad_norm": 5.398714665908667, "learning_rate": 2.6857835668768795e-06, "loss": 17.089, "step": 36303 }, { "epoch": 0.663607947794615, "grad_norm": 5.7794911404437315, "learning_rate": 2.685521174428389e-06, "loss": 17.2191, "step": 36304 }, { "epoch": 0.6636262269910614, "grad_norm": 5.434730022703295, "learning_rate": 2.6852587900919456e-06, "loss": 17.2562, "step": 36305 }, { "epoch": 0.663644506187508, "grad_norm": 5.797554848371211, "learning_rate": 2.6849964138684677e-06, "loss": 17.2874, "step": 36306 }, { "epoch": 0.6636627853839545, "grad_norm": 6.27522369099996, "learning_rate": 2.6847340457588755e-06, "loss": 17.4838, "step": 36307 }, { "epoch": 0.663681064580401, "grad_norm": 6.580839248031534, "learning_rate": 2.6844716857640906e-06, "loss": 17.4147, "step": 36308 }, { "epoch": 0.6636993437768476, "grad_norm": 4.8260851283737, "learning_rate": 2.6842093338850333e-06, "loss": 17.0113, "step": 36309 }, { "epoch": 0.6637176229732941, "grad_norm": 6.459536449286677, "learning_rate": 2.683946990122621e-06, "loss": 17.4188, "step": 36310 }, { "epoch": 0.6637359021697407, "grad_norm": 6.005220455336127, "learning_rate": 2.6836846544777717e-06, "loss": 17.2448, "step": 36311 }, { "epoch": 0.6637541813661871, "grad_norm": 5.880906409328045, "learning_rate": 2.683422326951409e-06, "loss": 17.2279, "step": 36312 }, { "epoch": 0.6637724605626336, "grad_norm": 6.06446352824471, "learning_rate": 2.6831600075444498e-06, "loss": 17.2374, "step": 36313 }, { "epoch": 0.6637907397590802, "grad_norm": 6.831381199693103, "learning_rate": 2.6828976962578113e-06, "loss": 17.5786, "step": 36314 }, { "epoch": 0.6638090189555267, "grad_norm": 7.408549869570971, "learning_rate": 2.6826353930924176e-06, "loss": 17.6162, "step": 36315 }, { "epoch": 0.6638272981519733, "grad_norm": 5.8383445195241865, "learning_rate": 2.6823730980491834e-06, "loss": 17.4401, "step": 36316 }, { "epoch": 0.6638455773484198, "grad_norm": 6.944429338905585, "learning_rate": 2.6821108111290316e-06, "loss": 17.3563, "step": 36317 }, { "epoch": 0.6638638565448662, "grad_norm": 5.278340389748778, "learning_rate": 2.681848532332881e-06, "loss": 16.9984, "step": 36318 }, { "epoch": 0.6638821357413128, "grad_norm": 4.823053877095193, "learning_rate": 2.681586261661651e-06, "loss": 16.8237, "step": 36319 }, { "epoch": 0.6639004149377593, "grad_norm": 6.306808300771131, "learning_rate": 2.681323999116258e-06, "loss": 17.5398, "step": 36320 }, { "epoch": 0.6639186941342059, "grad_norm": 6.363614085234467, "learning_rate": 2.6810617446976237e-06, "loss": 17.5057, "step": 36321 }, { "epoch": 0.6639369733306524, "grad_norm": 7.086184023255866, "learning_rate": 2.680799498406668e-06, "loss": 17.6874, "step": 36322 }, { "epoch": 0.6639552525270989, "grad_norm": 7.579762254925135, "learning_rate": 2.680537260244307e-06, "loss": 17.2367, "step": 36323 }, { "epoch": 0.6639735317235455, "grad_norm": 6.445567415565445, "learning_rate": 2.680275030211461e-06, "loss": 17.4246, "step": 36324 }, { "epoch": 0.6639918109199919, "grad_norm": 6.75752077630599, "learning_rate": 2.6800128083090515e-06, "loss": 17.6955, "step": 36325 }, { "epoch": 0.6640100901164385, "grad_norm": 5.624621591259183, "learning_rate": 2.679750594537994e-06, "loss": 16.991, "step": 36326 }, { "epoch": 0.664028369312885, "grad_norm": 6.331922885290486, "learning_rate": 2.6794883888992116e-06, "loss": 17.4318, "step": 36327 }, { "epoch": 0.6640466485093315, "grad_norm": 6.249500604694071, "learning_rate": 2.6792261913936195e-06, "loss": 17.187, "step": 36328 }, { "epoch": 0.6640649277057781, "grad_norm": 6.181773068276608, "learning_rate": 2.6789640020221395e-06, "loss": 17.1765, "step": 36329 }, { "epoch": 0.6640832069022246, "grad_norm": 6.507108977159888, "learning_rate": 2.6787018207856897e-06, "loss": 17.5633, "step": 36330 }, { "epoch": 0.6641014860986711, "grad_norm": 7.060579745039269, "learning_rate": 2.678439647685186e-06, "loss": 17.7123, "step": 36331 }, { "epoch": 0.6641197652951176, "grad_norm": 5.917574143727384, "learning_rate": 2.6781774827215525e-06, "loss": 17.2313, "step": 36332 }, { "epoch": 0.6641380444915641, "grad_norm": 5.563824784700884, "learning_rate": 2.6779153258957037e-06, "loss": 17.088, "step": 36333 }, { "epoch": 0.6641563236880107, "grad_norm": 5.815511017628762, "learning_rate": 2.677653177208559e-06, "loss": 17.2726, "step": 36334 }, { "epoch": 0.6641746028844572, "grad_norm": 5.906354247043445, "learning_rate": 2.677391036661041e-06, "loss": 17.3086, "step": 36335 }, { "epoch": 0.6641928820809038, "grad_norm": 6.3417007616628505, "learning_rate": 2.677128904254066e-06, "loss": 17.3822, "step": 36336 }, { "epoch": 0.6642111612773502, "grad_norm": 6.363417410712129, "learning_rate": 2.6768667799885506e-06, "loss": 17.2473, "step": 36337 }, { "epoch": 0.6642294404737967, "grad_norm": 4.975223490136955, "learning_rate": 2.6766046638654174e-06, "loss": 16.9978, "step": 36338 }, { "epoch": 0.6642477196702433, "grad_norm": 5.844099465667599, "learning_rate": 2.6763425558855814e-06, "loss": 17.2045, "step": 36339 }, { "epoch": 0.6642659988666898, "grad_norm": 5.054968601499035, "learning_rate": 2.6760804560499656e-06, "loss": 16.9588, "step": 36340 }, { "epoch": 0.6642842780631364, "grad_norm": 5.246312081031797, "learning_rate": 2.675818364359485e-06, "loss": 17.1053, "step": 36341 }, { "epoch": 0.6643025572595829, "grad_norm": 5.771713133974409, "learning_rate": 2.675556280815058e-06, "loss": 17.235, "step": 36342 }, { "epoch": 0.6643208364560294, "grad_norm": 6.7121100987280915, "learning_rate": 2.6752942054176047e-06, "loss": 17.3426, "step": 36343 }, { "epoch": 0.6643391156524759, "grad_norm": 5.670037601083774, "learning_rate": 2.675032138168045e-06, "loss": 17.1721, "step": 36344 }, { "epoch": 0.6643573948489224, "grad_norm": 6.604448360302737, "learning_rate": 2.6747700790672936e-06, "loss": 17.433, "step": 36345 }, { "epoch": 0.664375674045369, "grad_norm": 7.388642525530414, "learning_rate": 2.674508028116274e-06, "loss": 18.0595, "step": 36346 }, { "epoch": 0.6643939532418155, "grad_norm": 6.250932769755389, "learning_rate": 2.6742459853159018e-06, "loss": 17.0371, "step": 36347 }, { "epoch": 0.664412232438262, "grad_norm": 10.234631906153947, "learning_rate": 2.673983950667094e-06, "loss": 17.4579, "step": 36348 }, { "epoch": 0.6644305116347086, "grad_norm": 6.612585742340042, "learning_rate": 2.6737219241707723e-06, "loss": 17.7027, "step": 36349 }, { "epoch": 0.664448790831155, "grad_norm": 5.908445168587951, "learning_rate": 2.673459905827853e-06, "loss": 17.079, "step": 36350 }, { "epoch": 0.6644670700276016, "grad_norm": 7.0008935490633935, "learning_rate": 2.6731978956392534e-06, "loss": 17.4619, "step": 36351 }, { "epoch": 0.6644853492240481, "grad_norm": 5.971361577383819, "learning_rate": 2.6729358936058923e-06, "loss": 17.4342, "step": 36352 }, { "epoch": 0.6645036284204946, "grad_norm": 6.011189172198146, "learning_rate": 2.672673899728692e-06, "loss": 17.3356, "step": 36353 }, { "epoch": 0.6645219076169412, "grad_norm": 8.446126089546915, "learning_rate": 2.672411914008566e-06, "loss": 18.18, "step": 36354 }, { "epoch": 0.6645401868133877, "grad_norm": 6.570055140647341, "learning_rate": 2.6721499364464356e-06, "loss": 17.5787, "step": 36355 }, { "epoch": 0.6645584660098343, "grad_norm": 6.653807495652013, "learning_rate": 2.671887967043215e-06, "loss": 17.625, "step": 36356 }, { "epoch": 0.6645767452062807, "grad_norm": 5.801413017972209, "learning_rate": 2.671626005799828e-06, "loss": 17.2143, "step": 36357 }, { "epoch": 0.6645950244027272, "grad_norm": 6.2454452975528865, "learning_rate": 2.6713640527171896e-06, "loss": 17.4174, "step": 36358 }, { "epoch": 0.6646133035991738, "grad_norm": 5.283859543039318, "learning_rate": 2.6711021077962163e-06, "loss": 17.2296, "step": 36359 }, { "epoch": 0.6646315827956203, "grad_norm": 6.319634999539061, "learning_rate": 2.6708401710378294e-06, "loss": 17.5548, "step": 36360 }, { "epoch": 0.6646498619920669, "grad_norm": 7.059545360436841, "learning_rate": 2.6705782424429444e-06, "loss": 17.6191, "step": 36361 }, { "epoch": 0.6646681411885134, "grad_norm": 5.880160411972528, "learning_rate": 2.67031632201248e-06, "loss": 17.1957, "step": 36362 }, { "epoch": 0.6646864203849598, "grad_norm": 6.747284778422752, "learning_rate": 2.670054409747357e-06, "loss": 17.5216, "step": 36363 }, { "epoch": 0.6647046995814064, "grad_norm": 5.443722796479833, "learning_rate": 2.6697925056484906e-06, "loss": 17.18, "step": 36364 }, { "epoch": 0.6647229787778529, "grad_norm": 5.995963307960802, "learning_rate": 2.669530609716798e-06, "loss": 17.3548, "step": 36365 }, { "epoch": 0.6647412579742995, "grad_norm": 5.579795268921982, "learning_rate": 2.6692687219532e-06, "loss": 17.1081, "step": 36366 }, { "epoch": 0.664759537170746, "grad_norm": 5.252244077820294, "learning_rate": 2.6690068423586124e-06, "loss": 17.0707, "step": 36367 }, { "epoch": 0.6647778163671925, "grad_norm": 6.458632420558249, "learning_rate": 2.6687449709339523e-06, "loss": 17.4868, "step": 36368 }, { "epoch": 0.6647960955636391, "grad_norm": 4.504485987307294, "learning_rate": 2.668483107680141e-06, "loss": 16.6707, "step": 36369 }, { "epoch": 0.6648143747600855, "grad_norm": 6.573863510773518, "learning_rate": 2.668221252598091e-06, "loss": 17.7839, "step": 36370 }, { "epoch": 0.664832653956532, "grad_norm": 6.6873877659686425, "learning_rate": 2.667959405688724e-06, "loss": 17.7031, "step": 36371 }, { "epoch": 0.6648509331529786, "grad_norm": 7.167919304133671, "learning_rate": 2.667697566952959e-06, "loss": 17.4029, "step": 36372 }, { "epoch": 0.6648692123494251, "grad_norm": 6.736415688092358, "learning_rate": 2.6674357363917092e-06, "loss": 17.4261, "step": 36373 }, { "epoch": 0.6648874915458717, "grad_norm": 4.624278561149827, "learning_rate": 2.6671739140058967e-06, "loss": 16.8544, "step": 36374 }, { "epoch": 0.6649057707423182, "grad_norm": 6.730796144110259, "learning_rate": 2.666912099796437e-06, "loss": 17.8998, "step": 36375 }, { "epoch": 0.6649240499387646, "grad_norm": 7.126882771270058, "learning_rate": 2.6666502937642467e-06, "loss": 17.59, "step": 36376 }, { "epoch": 0.6649423291352112, "grad_norm": 5.508500360194318, "learning_rate": 2.666388495910246e-06, "loss": 17.1899, "step": 36377 }, { "epoch": 0.6649606083316577, "grad_norm": 6.826080639088614, "learning_rate": 2.6661267062353514e-06, "loss": 17.3271, "step": 36378 }, { "epoch": 0.6649788875281043, "grad_norm": 5.98511020396529, "learning_rate": 2.6658649247404783e-06, "loss": 17.1743, "step": 36379 }, { "epoch": 0.6649971667245508, "grad_norm": 6.279792397123619, "learning_rate": 2.6656031514265455e-06, "loss": 17.3975, "step": 36380 }, { "epoch": 0.6650154459209973, "grad_norm": 6.2983898599555905, "learning_rate": 2.6653413862944737e-06, "loss": 17.3044, "step": 36381 }, { "epoch": 0.6650337251174439, "grad_norm": 5.211573607415754, "learning_rate": 2.665079629345176e-06, "loss": 17.2288, "step": 36382 }, { "epoch": 0.6650520043138903, "grad_norm": 7.690635910886445, "learning_rate": 2.6648178805795733e-06, "loss": 18.5189, "step": 36383 }, { "epoch": 0.6650702835103369, "grad_norm": 4.396408572077515, "learning_rate": 2.664556139998581e-06, "loss": 16.7784, "step": 36384 }, { "epoch": 0.6650885627067834, "grad_norm": 6.854037198422489, "learning_rate": 2.6642944076031148e-06, "loss": 17.407, "step": 36385 }, { "epoch": 0.6651068419032299, "grad_norm": 5.585320948268275, "learning_rate": 2.6640326833940955e-06, "loss": 17.3857, "step": 36386 }, { "epoch": 0.6651251210996765, "grad_norm": 6.447966607175493, "learning_rate": 2.6637709673724375e-06, "loss": 17.5022, "step": 36387 }, { "epoch": 0.665143400296123, "grad_norm": 5.390563135096066, "learning_rate": 2.663509259539062e-06, "loss": 17.1618, "step": 36388 }, { "epoch": 0.6651616794925695, "grad_norm": 6.013906258320041, "learning_rate": 2.6632475598948814e-06, "loss": 17.1199, "step": 36389 }, { "epoch": 0.665179958689016, "grad_norm": 6.844368584871323, "learning_rate": 2.662985868440815e-06, "loss": 17.6623, "step": 36390 }, { "epoch": 0.6651982378854625, "grad_norm": 8.932727269487895, "learning_rate": 2.662724185177783e-06, "loss": 18.3197, "step": 36391 }, { "epoch": 0.6652165170819091, "grad_norm": 5.890008554588377, "learning_rate": 2.662462510106699e-06, "loss": 17.1941, "step": 36392 }, { "epoch": 0.6652347962783556, "grad_norm": 5.499189770538247, "learning_rate": 2.6622008432284802e-06, "loss": 17.1621, "step": 36393 }, { "epoch": 0.6652530754748022, "grad_norm": 6.489291141216141, "learning_rate": 2.6619391845440456e-06, "loss": 17.3102, "step": 36394 }, { "epoch": 0.6652713546712486, "grad_norm": 5.905182353296549, "learning_rate": 2.661677534054312e-06, "loss": 17.159, "step": 36395 }, { "epoch": 0.6652896338676951, "grad_norm": 6.347526145860702, "learning_rate": 2.6614158917601936e-06, "loss": 17.4009, "step": 36396 }, { "epoch": 0.6653079130641417, "grad_norm": 7.31738670662573, "learning_rate": 2.6611542576626093e-06, "loss": 17.8825, "step": 36397 }, { "epoch": 0.6653261922605882, "grad_norm": 5.621773369886271, "learning_rate": 2.660892631762478e-06, "loss": 16.9578, "step": 36398 }, { "epoch": 0.6653444714570348, "grad_norm": 7.121226196607939, "learning_rate": 2.660631014060714e-06, "loss": 17.6215, "step": 36399 }, { "epoch": 0.6653627506534813, "grad_norm": 7.475942044005322, "learning_rate": 2.660369404558236e-06, "loss": 17.6478, "step": 36400 }, { "epoch": 0.6653810298499278, "grad_norm": 8.872700875085437, "learning_rate": 2.660107803255959e-06, "loss": 18.4037, "step": 36401 }, { "epoch": 0.6653993090463743, "grad_norm": 5.616320241894759, "learning_rate": 2.659846210154804e-06, "loss": 17.2104, "step": 36402 }, { "epoch": 0.6654175882428208, "grad_norm": 4.517196628434074, "learning_rate": 2.659584625255683e-06, "loss": 16.7201, "step": 36403 }, { "epoch": 0.6654358674392674, "grad_norm": 5.228904875004092, "learning_rate": 2.6593230485595147e-06, "loss": 16.6966, "step": 36404 }, { "epoch": 0.6654541466357139, "grad_norm": 7.054078801487252, "learning_rate": 2.6590614800672164e-06, "loss": 17.7743, "step": 36405 }, { "epoch": 0.6654724258321604, "grad_norm": 5.173418813862348, "learning_rate": 2.6587999197797037e-06, "loss": 16.8781, "step": 36406 }, { "epoch": 0.665490705028607, "grad_norm": 6.270230348373279, "learning_rate": 2.6585383676978937e-06, "loss": 17.1429, "step": 36407 }, { "epoch": 0.6655089842250534, "grad_norm": 5.323807348866313, "learning_rate": 2.6582768238227052e-06, "loss": 16.8241, "step": 36408 }, { "epoch": 0.6655272634215, "grad_norm": 6.242796526571726, "learning_rate": 2.658015288155054e-06, "loss": 17.3655, "step": 36409 }, { "epoch": 0.6655455426179465, "grad_norm": 6.183768242765707, "learning_rate": 2.657753760695854e-06, "loss": 17.3952, "step": 36410 }, { "epoch": 0.665563821814393, "grad_norm": 5.902668199305103, "learning_rate": 2.6574922414460248e-06, "loss": 17.2598, "step": 36411 }, { "epoch": 0.6655821010108396, "grad_norm": 7.043910548771029, "learning_rate": 2.6572307304064827e-06, "loss": 17.5724, "step": 36412 }, { "epoch": 0.6656003802072861, "grad_norm": 5.68358563856661, "learning_rate": 2.656969227578141e-06, "loss": 17.0295, "step": 36413 }, { "epoch": 0.6656186594037327, "grad_norm": 4.937079065064065, "learning_rate": 2.656707732961921e-06, "loss": 16.7258, "step": 36414 }, { "epoch": 0.6656369386001791, "grad_norm": 6.327042116896964, "learning_rate": 2.656446246558736e-06, "loss": 17.4864, "step": 36415 }, { "epoch": 0.6656552177966256, "grad_norm": 5.6089047229269715, "learning_rate": 2.6561847683695023e-06, "loss": 17.3119, "step": 36416 }, { "epoch": 0.6656734969930722, "grad_norm": 6.859563525399977, "learning_rate": 2.65592329839514e-06, "loss": 17.7325, "step": 36417 }, { "epoch": 0.6656917761895187, "grad_norm": 6.308308177399782, "learning_rate": 2.6556618366365617e-06, "loss": 17.3528, "step": 36418 }, { "epoch": 0.6657100553859653, "grad_norm": 5.18438439203305, "learning_rate": 2.6554003830946867e-06, "loss": 16.7641, "step": 36419 }, { "epoch": 0.6657283345824118, "grad_norm": 5.79417624840374, "learning_rate": 2.65513893777043e-06, "loss": 17.256, "step": 36420 }, { "epoch": 0.6657466137788582, "grad_norm": 6.849819740538789, "learning_rate": 2.654877500664706e-06, "loss": 17.632, "step": 36421 }, { "epoch": 0.6657648929753048, "grad_norm": 6.374582605525274, "learning_rate": 2.6546160717784346e-06, "loss": 17.5622, "step": 36422 }, { "epoch": 0.6657831721717513, "grad_norm": 6.209754719766124, "learning_rate": 2.6543546511125307e-06, "loss": 17.4551, "step": 36423 }, { "epoch": 0.6658014513681979, "grad_norm": 5.695265721036111, "learning_rate": 2.6540932386679075e-06, "loss": 16.93, "step": 36424 }, { "epoch": 0.6658197305646444, "grad_norm": 6.0878804713884, "learning_rate": 2.653831834445485e-06, "loss": 17.0843, "step": 36425 }, { "epoch": 0.6658380097610909, "grad_norm": 5.72717352167793, "learning_rate": 2.6535704384461793e-06, "loss": 17.2458, "step": 36426 }, { "epoch": 0.6658562889575375, "grad_norm": 6.4313876531869125, "learning_rate": 2.653309050670904e-06, "loss": 17.8017, "step": 36427 }, { "epoch": 0.6658745681539839, "grad_norm": 5.554465558070644, "learning_rate": 2.6530476711205786e-06, "loss": 16.9799, "step": 36428 }, { "epoch": 0.6658928473504305, "grad_norm": 7.273135927390141, "learning_rate": 2.6527862997961184e-06, "loss": 17.8678, "step": 36429 }, { "epoch": 0.665911126546877, "grad_norm": 5.723344026991991, "learning_rate": 2.6525249366984357e-06, "loss": 17.3247, "step": 36430 }, { "epoch": 0.6659294057433235, "grad_norm": 5.78684506029505, "learning_rate": 2.6522635818284515e-06, "loss": 17.2568, "step": 36431 }, { "epoch": 0.6659476849397701, "grad_norm": 8.011702729454594, "learning_rate": 2.6520022351870777e-06, "loss": 17.5546, "step": 36432 }, { "epoch": 0.6659659641362166, "grad_norm": 5.02087550684266, "learning_rate": 2.6517408967752347e-06, "loss": 16.878, "step": 36433 }, { "epoch": 0.6659842433326632, "grad_norm": 6.429394618660753, "learning_rate": 2.6514795665938338e-06, "loss": 17.7273, "step": 36434 }, { "epoch": 0.6660025225291096, "grad_norm": 4.418305141919172, "learning_rate": 2.6512182446437927e-06, "loss": 16.8666, "step": 36435 }, { "epoch": 0.6660208017255561, "grad_norm": 5.972311011206798, "learning_rate": 2.650956930926031e-06, "loss": 17.2982, "step": 36436 }, { "epoch": 0.6660390809220027, "grad_norm": 6.251168198946166, "learning_rate": 2.6506956254414604e-06, "loss": 17.1975, "step": 36437 }, { "epoch": 0.6660573601184492, "grad_norm": 7.1297931721670915, "learning_rate": 2.6504343281909963e-06, "loss": 17.8253, "step": 36438 }, { "epoch": 0.6660756393148957, "grad_norm": 5.902569806085674, "learning_rate": 2.6501730391755575e-06, "loss": 17.3532, "step": 36439 }, { "epoch": 0.6660939185113423, "grad_norm": 6.947960775526767, "learning_rate": 2.6499117583960585e-06, "loss": 17.8391, "step": 36440 }, { "epoch": 0.6661121977077887, "grad_norm": 6.376352280028127, "learning_rate": 2.6496504858534133e-06, "loss": 17.4074, "step": 36441 }, { "epoch": 0.6661304769042353, "grad_norm": 6.168143089841536, "learning_rate": 2.6493892215485404e-06, "loss": 17.4093, "step": 36442 }, { "epoch": 0.6661487561006818, "grad_norm": 6.072158614724578, "learning_rate": 2.6491279654823534e-06, "loss": 17.0268, "step": 36443 }, { "epoch": 0.6661670352971283, "grad_norm": 5.322738796468451, "learning_rate": 2.6488667176557683e-06, "loss": 17.1338, "step": 36444 }, { "epoch": 0.6661853144935749, "grad_norm": 6.849148591034728, "learning_rate": 2.648605478069703e-06, "loss": 17.1858, "step": 36445 }, { "epoch": 0.6662035936900214, "grad_norm": 6.423846816825811, "learning_rate": 2.64834424672507e-06, "loss": 17.5568, "step": 36446 }, { "epoch": 0.666221872886468, "grad_norm": 8.012826936475385, "learning_rate": 2.648083023622788e-06, "loss": 17.9955, "step": 36447 }, { "epoch": 0.6662401520829144, "grad_norm": 6.804224760788742, "learning_rate": 2.6478218087637707e-06, "loss": 17.9434, "step": 36448 }, { "epoch": 0.6662584312793609, "grad_norm": 7.430733672232845, "learning_rate": 2.647560602148932e-06, "loss": 18.0751, "step": 36449 }, { "epoch": 0.6662767104758075, "grad_norm": 6.311868397377322, "learning_rate": 2.6472994037791916e-06, "loss": 17.5476, "step": 36450 }, { "epoch": 0.666294989672254, "grad_norm": 6.146838190374111, "learning_rate": 2.6470382136554606e-06, "loss": 17.1097, "step": 36451 }, { "epoch": 0.6663132688687006, "grad_norm": 6.184912829851242, "learning_rate": 2.646777031778658e-06, "loss": 17.4315, "step": 36452 }, { "epoch": 0.666331548065147, "grad_norm": 6.25573304229102, "learning_rate": 2.6465158581496963e-06, "loss": 17.5561, "step": 36453 }, { "epoch": 0.6663498272615935, "grad_norm": 7.198645084531404, "learning_rate": 2.646254692769494e-06, "loss": 18.0783, "step": 36454 }, { "epoch": 0.6663681064580401, "grad_norm": 7.260307002202078, "learning_rate": 2.645993535638963e-06, "loss": 17.6157, "step": 36455 }, { "epoch": 0.6663863856544866, "grad_norm": 6.071489961644259, "learning_rate": 2.645732386759022e-06, "loss": 17.1826, "step": 36456 }, { "epoch": 0.6664046648509332, "grad_norm": 5.841381322614861, "learning_rate": 2.645471246130584e-06, "loss": 17.2052, "step": 36457 }, { "epoch": 0.6664229440473797, "grad_norm": 7.32339630462647, "learning_rate": 2.6452101137545643e-06, "loss": 17.4851, "step": 36458 }, { "epoch": 0.6664412232438262, "grad_norm": 6.542114688972445, "learning_rate": 2.6449489896318802e-06, "loss": 17.4097, "step": 36459 }, { "epoch": 0.6664595024402727, "grad_norm": 5.375522018841798, "learning_rate": 2.644687873763444e-06, "loss": 16.9863, "step": 36460 }, { "epoch": 0.6664777816367192, "grad_norm": 6.365862780330403, "learning_rate": 2.644426766150172e-06, "loss": 17.4114, "step": 36461 }, { "epoch": 0.6664960608331658, "grad_norm": 7.219829457016593, "learning_rate": 2.6441656667929814e-06, "loss": 17.8015, "step": 36462 }, { "epoch": 0.6665143400296123, "grad_norm": 6.501091802401192, "learning_rate": 2.6439045756927844e-06, "loss": 17.656, "step": 36463 }, { "epoch": 0.6665326192260588, "grad_norm": 6.241115219033712, "learning_rate": 2.6436434928504988e-06, "loss": 17.4622, "step": 36464 }, { "epoch": 0.6665508984225054, "grad_norm": 7.4215831899386755, "learning_rate": 2.6433824182670385e-06, "loss": 17.3807, "step": 36465 }, { "epoch": 0.6665691776189518, "grad_norm": 6.773742794270312, "learning_rate": 2.6431213519433167e-06, "loss": 17.3969, "step": 36466 }, { "epoch": 0.6665874568153984, "grad_norm": 6.369880100461895, "learning_rate": 2.642860293880252e-06, "loss": 17.1664, "step": 36467 }, { "epoch": 0.6666057360118449, "grad_norm": 8.333722763519201, "learning_rate": 2.6425992440787573e-06, "loss": 18.1851, "step": 36468 }, { "epoch": 0.6666240152082914, "grad_norm": 6.349478457224252, "learning_rate": 2.6423382025397455e-06, "loss": 17.3289, "step": 36469 }, { "epoch": 0.666642294404738, "grad_norm": 5.922188019412, "learning_rate": 2.6420771692641334e-06, "loss": 17.3625, "step": 36470 }, { "epoch": 0.6666605736011845, "grad_norm": 5.873120120357756, "learning_rate": 2.641816144252839e-06, "loss": 17.3362, "step": 36471 }, { "epoch": 0.6666788527976311, "grad_norm": 6.4864551503959245, "learning_rate": 2.641555127506772e-06, "loss": 17.0429, "step": 36472 }, { "epoch": 0.6666971319940775, "grad_norm": 7.100661462055192, "learning_rate": 2.6412941190268513e-06, "loss": 17.8617, "step": 36473 }, { "epoch": 0.666715411190524, "grad_norm": 6.181277738431422, "learning_rate": 2.6410331188139905e-06, "loss": 17.1691, "step": 36474 }, { "epoch": 0.6667336903869706, "grad_norm": 5.958949854308777, "learning_rate": 2.640772126869102e-06, "loss": 17.2532, "step": 36475 }, { "epoch": 0.6667519695834171, "grad_norm": 6.98043817776425, "learning_rate": 2.6405111431931043e-06, "loss": 17.488, "step": 36476 }, { "epoch": 0.6667702487798637, "grad_norm": 6.736017476954569, "learning_rate": 2.640250167786908e-06, "loss": 17.8048, "step": 36477 }, { "epoch": 0.6667885279763102, "grad_norm": 8.412835514824591, "learning_rate": 2.6399892006514326e-06, "loss": 18.1982, "step": 36478 }, { "epoch": 0.6668068071727566, "grad_norm": 7.061922879011437, "learning_rate": 2.639728241787588e-06, "loss": 17.4297, "step": 36479 }, { "epoch": 0.6668250863692032, "grad_norm": 6.2416357207835995, "learning_rate": 2.639467291196291e-06, "loss": 17.3594, "step": 36480 }, { "epoch": 0.6668433655656497, "grad_norm": 6.029086744916132, "learning_rate": 2.639206348878458e-06, "loss": 17.4052, "step": 36481 }, { "epoch": 0.6668616447620963, "grad_norm": 7.614339025478764, "learning_rate": 2.6389454148350026e-06, "loss": 18.053, "step": 36482 }, { "epoch": 0.6668799239585428, "grad_norm": 5.686091473200404, "learning_rate": 2.638684489066836e-06, "loss": 17.2901, "step": 36483 }, { "epoch": 0.6668982031549893, "grad_norm": 5.693814254220574, "learning_rate": 2.638423571574878e-06, "loss": 17.21, "step": 36484 }, { "epoch": 0.6669164823514359, "grad_norm": 5.053131839537022, "learning_rate": 2.6381626623600397e-06, "loss": 16.9203, "step": 36485 }, { "epoch": 0.6669347615478823, "grad_norm": 6.804249240952364, "learning_rate": 2.637901761423235e-06, "loss": 17.6544, "step": 36486 }, { "epoch": 0.6669530407443289, "grad_norm": 6.046124110979358, "learning_rate": 2.637640868765382e-06, "loss": 17.2195, "step": 36487 }, { "epoch": 0.6669713199407754, "grad_norm": 5.413611682553127, "learning_rate": 2.63737998438739e-06, "loss": 17.0533, "step": 36488 }, { "epoch": 0.6669895991372219, "grad_norm": 5.783890233724893, "learning_rate": 2.637119108290176e-06, "loss": 17.1829, "step": 36489 }, { "epoch": 0.6670078783336685, "grad_norm": 7.106429550134862, "learning_rate": 2.6368582404746568e-06, "loss": 17.4592, "step": 36490 }, { "epoch": 0.667026157530115, "grad_norm": 6.792474714915737, "learning_rate": 2.6365973809417445e-06, "loss": 17.4311, "step": 36491 }, { "epoch": 0.6670444367265616, "grad_norm": 6.1895269495025795, "learning_rate": 2.6363365296923515e-06, "loss": 17.3566, "step": 36492 }, { "epoch": 0.667062715923008, "grad_norm": 8.338957116811539, "learning_rate": 2.636075686727395e-06, "loss": 18.0552, "step": 36493 }, { "epoch": 0.6670809951194545, "grad_norm": 6.445491141421959, "learning_rate": 2.6358148520477865e-06, "loss": 17.6822, "step": 36494 }, { "epoch": 0.6670992743159011, "grad_norm": 6.322616891307195, "learning_rate": 2.635554025654444e-06, "loss": 17.2059, "step": 36495 }, { "epoch": 0.6671175535123476, "grad_norm": 6.506411129462493, "learning_rate": 2.6352932075482794e-06, "loss": 17.5438, "step": 36496 }, { "epoch": 0.6671358327087942, "grad_norm": 6.157197344264516, "learning_rate": 2.635032397730205e-06, "loss": 17.14, "step": 36497 }, { "epoch": 0.6671541119052407, "grad_norm": 6.060609950565744, "learning_rate": 2.6347715962011365e-06, "loss": 17.4156, "step": 36498 }, { "epoch": 0.6671723911016871, "grad_norm": 5.795754409406312, "learning_rate": 2.634510802961991e-06, "loss": 17.0684, "step": 36499 }, { "epoch": 0.6671906702981337, "grad_norm": 7.370213299591482, "learning_rate": 2.6342500180136776e-06, "loss": 17.7656, "step": 36500 }, { "epoch": 0.6672089494945802, "grad_norm": 7.308543752833031, "learning_rate": 2.6339892413571145e-06, "loss": 17.688, "step": 36501 }, { "epoch": 0.6672272286910268, "grad_norm": 6.134406932435028, "learning_rate": 2.6337284729932133e-06, "loss": 17.5329, "step": 36502 }, { "epoch": 0.6672455078874733, "grad_norm": 5.1881132567890145, "learning_rate": 2.633467712922887e-06, "loss": 16.8308, "step": 36503 }, { "epoch": 0.6672637870839198, "grad_norm": 5.653317995005907, "learning_rate": 2.6332069611470523e-06, "loss": 17.0231, "step": 36504 }, { "epoch": 0.6672820662803663, "grad_norm": 6.337414678698595, "learning_rate": 2.6329462176666226e-06, "loss": 17.3391, "step": 36505 }, { "epoch": 0.6673003454768128, "grad_norm": 5.563734941165915, "learning_rate": 2.632685482482509e-06, "loss": 17.238, "step": 36506 }, { "epoch": 0.6673186246732593, "grad_norm": 6.162527016626108, "learning_rate": 2.6324247555956273e-06, "loss": 17.4066, "step": 36507 }, { "epoch": 0.6673369038697059, "grad_norm": 5.994710971471193, "learning_rate": 2.6321640370068913e-06, "loss": 17.1113, "step": 36508 }, { "epoch": 0.6673551830661524, "grad_norm": 5.706189276762595, "learning_rate": 2.631903326717217e-06, "loss": 17.3312, "step": 36509 }, { "epoch": 0.667373462262599, "grad_norm": 6.812387851505149, "learning_rate": 2.6316426247275153e-06, "loss": 17.7199, "step": 36510 }, { "epoch": 0.6673917414590455, "grad_norm": 4.899655914482657, "learning_rate": 2.631381931038699e-06, "loss": 16.8859, "step": 36511 }, { "epoch": 0.6674100206554919, "grad_norm": 8.391756242859456, "learning_rate": 2.6311212456516855e-06, "loss": 17.6193, "step": 36512 }, { "epoch": 0.6674282998519385, "grad_norm": 7.147833358530591, "learning_rate": 2.6308605685673873e-06, "loss": 17.4768, "step": 36513 }, { "epoch": 0.667446579048385, "grad_norm": 5.443046083974256, "learning_rate": 2.6305998997867145e-06, "loss": 17.2167, "step": 36514 }, { "epoch": 0.6674648582448316, "grad_norm": 5.98308342473768, "learning_rate": 2.630339239310585e-06, "loss": 17.2589, "step": 36515 }, { "epoch": 0.6674831374412781, "grad_norm": 6.586140062769545, "learning_rate": 2.63007858713991e-06, "loss": 17.5298, "step": 36516 }, { "epoch": 0.6675014166377246, "grad_norm": 5.07556440741061, "learning_rate": 2.629817943275603e-06, "loss": 17.0702, "step": 36517 }, { "epoch": 0.6675196958341711, "grad_norm": 5.103208543889257, "learning_rate": 2.629557307718581e-06, "loss": 16.9345, "step": 36518 }, { "epoch": 0.6675379750306176, "grad_norm": 5.33582273408996, "learning_rate": 2.6292966804697545e-06, "loss": 17.1526, "step": 36519 }, { "epoch": 0.6675562542270642, "grad_norm": 5.41985725083219, "learning_rate": 2.629036061530036e-06, "loss": 16.948, "step": 36520 }, { "epoch": 0.6675745334235107, "grad_norm": 7.120932566553874, "learning_rate": 2.6287754509003416e-06, "loss": 17.8274, "step": 36521 }, { "epoch": 0.6675928126199572, "grad_norm": 8.867787475750765, "learning_rate": 2.628514848581584e-06, "loss": 18.1377, "step": 36522 }, { "epoch": 0.6676110918164038, "grad_norm": 6.178463328589576, "learning_rate": 2.6282542545746735e-06, "loss": 17.2428, "step": 36523 }, { "epoch": 0.6676293710128502, "grad_norm": 6.533843942822419, "learning_rate": 2.6279936688805263e-06, "loss": 17.6144, "step": 36524 }, { "epoch": 0.6676476502092968, "grad_norm": 6.376759001289345, "learning_rate": 2.6277330915000575e-06, "loss": 17.233, "step": 36525 }, { "epoch": 0.6676659294057433, "grad_norm": 5.350665448272505, "learning_rate": 2.6274725224341767e-06, "loss": 16.8259, "step": 36526 }, { "epoch": 0.6676842086021898, "grad_norm": 7.159804616110121, "learning_rate": 2.627211961683801e-06, "loss": 17.5091, "step": 36527 }, { "epoch": 0.6677024877986364, "grad_norm": 6.125812239875147, "learning_rate": 2.626951409249839e-06, "loss": 17.0521, "step": 36528 }, { "epoch": 0.6677207669950829, "grad_norm": 6.405424165148572, "learning_rate": 2.6266908651332078e-06, "loss": 17.142, "step": 36529 }, { "epoch": 0.6677390461915295, "grad_norm": 7.505402116847613, "learning_rate": 2.6264303293348204e-06, "loss": 17.7242, "step": 36530 }, { "epoch": 0.6677573253879759, "grad_norm": 7.1185709064574905, "learning_rate": 2.626169801855586e-06, "loss": 17.862, "step": 36531 }, { "epoch": 0.6677756045844224, "grad_norm": 5.554981375384917, "learning_rate": 2.6259092826964226e-06, "loss": 17.3633, "step": 36532 }, { "epoch": 0.667793883780869, "grad_norm": 5.593006378829768, "learning_rate": 2.625648771858239e-06, "loss": 17.178, "step": 36533 }, { "epoch": 0.6678121629773155, "grad_norm": 7.2807038626997445, "learning_rate": 2.6253882693419506e-06, "loss": 17.979, "step": 36534 }, { "epoch": 0.6678304421737621, "grad_norm": 6.150082392727808, "learning_rate": 2.6251277751484723e-06, "loss": 17.4003, "step": 36535 }, { "epoch": 0.6678487213702086, "grad_norm": 5.742288726473103, "learning_rate": 2.624867289278715e-06, "loss": 17.173, "step": 36536 }, { "epoch": 0.667867000566655, "grad_norm": 5.534818174042081, "learning_rate": 2.6246068117335895e-06, "loss": 17.1097, "step": 36537 }, { "epoch": 0.6678852797631016, "grad_norm": 7.354615659022473, "learning_rate": 2.6243463425140132e-06, "loss": 17.5387, "step": 36538 }, { "epoch": 0.6679035589595481, "grad_norm": 7.3669008969185255, "learning_rate": 2.6240858816208946e-06, "loss": 17.6761, "step": 36539 }, { "epoch": 0.6679218381559947, "grad_norm": 5.972747580194544, "learning_rate": 2.623825429055151e-06, "loss": 17.0738, "step": 36540 }, { "epoch": 0.6679401173524412, "grad_norm": 6.550341686724209, "learning_rate": 2.6235649848176936e-06, "loss": 17.4544, "step": 36541 }, { "epoch": 0.6679583965488877, "grad_norm": 5.79191982899807, "learning_rate": 2.623304548909432e-06, "loss": 17.0202, "step": 36542 }, { "epoch": 0.6679766757453343, "grad_norm": 10.628563392353357, "learning_rate": 2.6230441213312818e-06, "loss": 18.3787, "step": 36543 }, { "epoch": 0.6679949549417807, "grad_norm": 6.71682721621549, "learning_rate": 2.622783702084158e-06, "loss": 17.4038, "step": 36544 }, { "epoch": 0.6680132341382273, "grad_norm": 6.402579839605049, "learning_rate": 2.6225232911689693e-06, "loss": 17.5461, "step": 36545 }, { "epoch": 0.6680315133346738, "grad_norm": 7.420969661065066, "learning_rate": 2.6222628885866312e-06, "loss": 18.2073, "step": 36546 }, { "epoch": 0.6680497925311203, "grad_norm": 7.044204921900554, "learning_rate": 2.6220024943380563e-06, "loss": 17.6671, "step": 36547 }, { "epoch": 0.6680680717275669, "grad_norm": 6.360557968838532, "learning_rate": 2.6217421084241535e-06, "loss": 17.6944, "step": 36548 }, { "epoch": 0.6680863509240134, "grad_norm": 6.122096878686651, "learning_rate": 2.6214817308458406e-06, "loss": 17.3879, "step": 36549 }, { "epoch": 0.66810463012046, "grad_norm": 7.353115428221047, "learning_rate": 2.6212213616040275e-06, "loss": 17.5111, "step": 36550 }, { "epoch": 0.6681229093169064, "grad_norm": 5.585289965730651, "learning_rate": 2.6209610006996255e-06, "loss": 17.0122, "step": 36551 }, { "epoch": 0.6681411885133529, "grad_norm": 6.936626465407549, "learning_rate": 2.620700648133549e-06, "loss": 17.4829, "step": 36552 }, { "epoch": 0.6681594677097995, "grad_norm": 6.157137725570795, "learning_rate": 2.6204403039067116e-06, "loss": 17.502, "step": 36553 }, { "epoch": 0.668177746906246, "grad_norm": 5.107984131256477, "learning_rate": 2.6201799680200235e-06, "loss": 16.8579, "step": 36554 }, { "epoch": 0.6681960261026926, "grad_norm": 5.472942470078283, "learning_rate": 2.619919640474399e-06, "loss": 17.0765, "step": 36555 }, { "epoch": 0.668214305299139, "grad_norm": 6.500883092964609, "learning_rate": 2.6196593212707484e-06, "loss": 17.8414, "step": 36556 }, { "epoch": 0.6682325844955855, "grad_norm": 5.233407824093922, "learning_rate": 2.6193990104099863e-06, "loss": 16.9044, "step": 36557 }, { "epoch": 0.6682508636920321, "grad_norm": 6.9063189342440054, "learning_rate": 2.619138707893025e-06, "loss": 17.4491, "step": 36558 }, { "epoch": 0.6682691428884786, "grad_norm": 5.79446640758293, "learning_rate": 2.618878413720774e-06, "loss": 17.3132, "step": 36559 }, { "epoch": 0.6682874220849252, "grad_norm": 5.931853984821854, "learning_rate": 2.618618127894149e-06, "loss": 17.2331, "step": 36560 }, { "epoch": 0.6683057012813717, "grad_norm": 5.764811664291251, "learning_rate": 2.6183578504140593e-06, "loss": 17.4976, "step": 36561 }, { "epoch": 0.6683239804778182, "grad_norm": 6.089184110169848, "learning_rate": 2.6180975812814178e-06, "loss": 17.5368, "step": 36562 }, { "epoch": 0.6683422596742647, "grad_norm": 6.363584515364844, "learning_rate": 2.6178373204971406e-06, "loss": 17.7876, "step": 36563 }, { "epoch": 0.6683605388707112, "grad_norm": 5.118586570242106, "learning_rate": 2.6175770680621366e-06, "loss": 17.0097, "step": 36564 }, { "epoch": 0.6683788180671578, "grad_norm": 6.871429852821446, "learning_rate": 2.617316823977316e-06, "loss": 17.5196, "step": 36565 }, { "epoch": 0.6683970972636043, "grad_norm": 5.866753176103241, "learning_rate": 2.6170565882435948e-06, "loss": 17.4244, "step": 36566 }, { "epoch": 0.6684153764600508, "grad_norm": 5.878171507832172, "learning_rate": 2.616796360861884e-06, "loss": 17.37, "step": 36567 }, { "epoch": 0.6684336556564974, "grad_norm": 5.044365789569727, "learning_rate": 2.6165361418330937e-06, "loss": 16.824, "step": 36568 }, { "epoch": 0.6684519348529439, "grad_norm": 4.946987511748944, "learning_rate": 2.616275931158139e-06, "loss": 16.754, "step": 36569 }, { "epoch": 0.6684702140493904, "grad_norm": 6.543422555758624, "learning_rate": 2.6160157288379272e-06, "loss": 17.4795, "step": 36570 }, { "epoch": 0.6684884932458369, "grad_norm": 6.024610538821052, "learning_rate": 2.6157555348733747e-06, "loss": 17.4391, "step": 36571 }, { "epoch": 0.6685067724422834, "grad_norm": 5.843366691881378, "learning_rate": 2.6154953492653934e-06, "loss": 17.3026, "step": 36572 }, { "epoch": 0.66852505163873, "grad_norm": 6.643956269549394, "learning_rate": 2.6152351720148926e-06, "loss": 17.4415, "step": 36573 }, { "epoch": 0.6685433308351765, "grad_norm": 6.8164023028796965, "learning_rate": 2.614975003122787e-06, "loss": 17.7947, "step": 36574 }, { "epoch": 0.668561610031623, "grad_norm": 5.150994294702356, "learning_rate": 2.614714842589988e-06, "loss": 17.0371, "step": 36575 }, { "epoch": 0.6685798892280695, "grad_norm": 5.703421216504182, "learning_rate": 2.614454690417404e-06, "loss": 17.3001, "step": 36576 }, { "epoch": 0.668598168424516, "grad_norm": 6.856130769713023, "learning_rate": 2.614194546605951e-06, "loss": 17.1926, "step": 36577 }, { "epoch": 0.6686164476209626, "grad_norm": 6.673339461616148, "learning_rate": 2.613934411156537e-06, "loss": 17.452, "step": 36578 }, { "epoch": 0.6686347268174091, "grad_norm": 6.696240774476725, "learning_rate": 2.613674284070078e-06, "loss": 17.4179, "step": 36579 }, { "epoch": 0.6686530060138556, "grad_norm": 7.17625733654002, "learning_rate": 2.613414165347482e-06, "loss": 17.3895, "step": 36580 }, { "epoch": 0.6686712852103022, "grad_norm": 7.952225648437429, "learning_rate": 2.6131540549896638e-06, "loss": 18.3256, "step": 36581 }, { "epoch": 0.6686895644067486, "grad_norm": 5.50299420734388, "learning_rate": 2.6128939529975317e-06, "loss": 17.0138, "step": 36582 }, { "epoch": 0.6687078436031952, "grad_norm": 6.3885596004794944, "learning_rate": 2.6126338593720007e-06, "loss": 17.0383, "step": 36583 }, { "epoch": 0.6687261227996417, "grad_norm": 7.439265552321622, "learning_rate": 2.6123737741139794e-06, "loss": 17.7525, "step": 36584 }, { "epoch": 0.6687444019960882, "grad_norm": 5.825692012931775, "learning_rate": 2.612113697224383e-06, "loss": 17.1408, "step": 36585 }, { "epoch": 0.6687626811925348, "grad_norm": 6.153879264996437, "learning_rate": 2.61185362870412e-06, "loss": 17.4838, "step": 36586 }, { "epoch": 0.6687809603889813, "grad_norm": 5.9291833177161255, "learning_rate": 2.6115935685541015e-06, "loss": 17.5352, "step": 36587 }, { "epoch": 0.6687992395854279, "grad_norm": 6.88150119454312, "learning_rate": 2.61133351677524e-06, "loss": 17.6456, "step": 36588 }, { "epoch": 0.6688175187818743, "grad_norm": 6.6850971409569135, "learning_rate": 2.6110734733684497e-06, "loss": 17.7323, "step": 36589 }, { "epoch": 0.6688357979783208, "grad_norm": 6.3551662755570035, "learning_rate": 2.610813438334637e-06, "loss": 17.2658, "step": 36590 }, { "epoch": 0.6688540771747674, "grad_norm": 7.154129458687373, "learning_rate": 2.6105534116747178e-06, "loss": 17.7272, "step": 36591 }, { "epoch": 0.6688723563712139, "grad_norm": 5.784231969349783, "learning_rate": 2.6102933933896024e-06, "loss": 17.1962, "step": 36592 }, { "epoch": 0.6688906355676605, "grad_norm": 6.604967712842383, "learning_rate": 2.6100333834801983e-06, "loss": 17.9342, "step": 36593 }, { "epoch": 0.668908914764107, "grad_norm": 6.166936804898159, "learning_rate": 2.6097733819474223e-06, "loss": 17.3597, "step": 36594 }, { "epoch": 0.6689271939605534, "grad_norm": 7.1549406937154725, "learning_rate": 2.609513388792183e-06, "loss": 17.6329, "step": 36595 }, { "epoch": 0.668945473157, "grad_norm": 8.186365640235831, "learning_rate": 2.6092534040153907e-06, "loss": 18.0058, "step": 36596 }, { "epoch": 0.6689637523534465, "grad_norm": 14.321091646301303, "learning_rate": 2.608993427617956e-06, "loss": 18.8159, "step": 36597 }, { "epoch": 0.6689820315498931, "grad_norm": 7.102752346000842, "learning_rate": 2.608733459600795e-06, "loss": 17.8991, "step": 36598 }, { "epoch": 0.6690003107463396, "grad_norm": 4.887867623621251, "learning_rate": 2.6084734999648144e-06, "loss": 16.8273, "step": 36599 }, { "epoch": 0.6690185899427861, "grad_norm": 8.162145830523746, "learning_rate": 2.6082135487109273e-06, "loss": 18.5068, "step": 36600 }, { "epoch": 0.6690368691392327, "grad_norm": 6.866810653952636, "learning_rate": 2.607953605840043e-06, "loss": 17.5841, "step": 36601 }, { "epoch": 0.6690551483356791, "grad_norm": 7.084346080929641, "learning_rate": 2.6076936713530756e-06, "loss": 17.8492, "step": 36602 }, { "epoch": 0.6690734275321257, "grad_norm": 6.428251810038622, "learning_rate": 2.6074337452509336e-06, "loss": 17.2468, "step": 36603 }, { "epoch": 0.6690917067285722, "grad_norm": 6.060192466062309, "learning_rate": 2.6071738275345276e-06, "loss": 17.3029, "step": 36604 }, { "epoch": 0.6691099859250187, "grad_norm": 7.207283893570233, "learning_rate": 2.606913918204771e-06, "loss": 17.7313, "step": 36605 }, { "epoch": 0.6691282651214653, "grad_norm": 6.380272413817161, "learning_rate": 2.606654017262572e-06, "loss": 17.279, "step": 36606 }, { "epoch": 0.6691465443179118, "grad_norm": 6.2618682253438385, "learning_rate": 2.606394124708843e-06, "loss": 17.2567, "step": 36607 }, { "epoch": 0.6691648235143584, "grad_norm": 8.360074376303302, "learning_rate": 2.6061342405444965e-06, "loss": 18.165, "step": 36608 }, { "epoch": 0.6691831027108048, "grad_norm": 8.2774480800072, "learning_rate": 2.605874364770442e-06, "loss": 17.913, "step": 36609 }, { "epoch": 0.6692013819072513, "grad_norm": 4.983569388692532, "learning_rate": 2.605614497387588e-06, "loss": 16.788, "step": 36610 }, { "epoch": 0.6692196611036979, "grad_norm": 6.6048593704200185, "learning_rate": 2.6053546383968494e-06, "loss": 17.513, "step": 36611 }, { "epoch": 0.6692379403001444, "grad_norm": 6.581662323844022, "learning_rate": 2.605094787799135e-06, "loss": 17.4723, "step": 36612 }, { "epoch": 0.669256219496591, "grad_norm": 6.190650663516262, "learning_rate": 2.6048349455953537e-06, "loss": 17.4377, "step": 36613 }, { "epoch": 0.6692744986930375, "grad_norm": 5.141732360505559, "learning_rate": 2.6045751117864193e-06, "loss": 16.8736, "step": 36614 }, { "epoch": 0.6692927778894839, "grad_norm": 5.512967603383603, "learning_rate": 2.60431528637324e-06, "loss": 17.0966, "step": 36615 }, { "epoch": 0.6693110570859305, "grad_norm": 6.854663621748793, "learning_rate": 2.604055469356728e-06, "loss": 17.7484, "step": 36616 }, { "epoch": 0.669329336282377, "grad_norm": 6.196938715342343, "learning_rate": 2.6037956607377956e-06, "loss": 17.0711, "step": 36617 }, { "epoch": 0.6693476154788236, "grad_norm": 4.713245209634727, "learning_rate": 2.603535860517349e-06, "loss": 16.9856, "step": 36618 }, { "epoch": 0.6693658946752701, "grad_norm": 8.043661142611553, "learning_rate": 2.603276068696303e-06, "loss": 17.8973, "step": 36619 }, { "epoch": 0.6693841738717166, "grad_norm": 6.754127403495267, "learning_rate": 2.603016285275567e-06, "loss": 17.6759, "step": 36620 }, { "epoch": 0.6694024530681632, "grad_norm": 6.549360225385036, "learning_rate": 2.602756510256049e-06, "loss": 17.7148, "step": 36621 }, { "epoch": 0.6694207322646096, "grad_norm": 5.751268833300429, "learning_rate": 2.6024967436386626e-06, "loss": 16.9864, "step": 36622 }, { "epoch": 0.6694390114610562, "grad_norm": 5.720619711821593, "learning_rate": 2.602236985424318e-06, "loss": 16.9921, "step": 36623 }, { "epoch": 0.6694572906575027, "grad_norm": 5.396693071539707, "learning_rate": 2.6019772356139226e-06, "loss": 17.152, "step": 36624 }, { "epoch": 0.6694755698539492, "grad_norm": 6.4137086391931515, "learning_rate": 2.601717494208389e-06, "loss": 17.5471, "step": 36625 }, { "epoch": 0.6694938490503958, "grad_norm": 5.987316602655052, "learning_rate": 2.601457761208629e-06, "loss": 17.3823, "step": 36626 }, { "epoch": 0.6695121282468423, "grad_norm": 6.667569486352715, "learning_rate": 2.6011980366155493e-06, "loss": 17.6729, "step": 36627 }, { "epoch": 0.6695304074432888, "grad_norm": 5.7808092604391215, "learning_rate": 2.600938320430064e-06, "loss": 17.5237, "step": 36628 }, { "epoch": 0.6695486866397353, "grad_norm": 6.6099765152728756, "learning_rate": 2.6006786126530826e-06, "loss": 17.9319, "step": 36629 }, { "epoch": 0.6695669658361818, "grad_norm": 5.933714194560611, "learning_rate": 2.600418913285512e-06, "loss": 17.1348, "step": 36630 }, { "epoch": 0.6695852450326284, "grad_norm": 7.206824702692007, "learning_rate": 2.600159222328267e-06, "loss": 17.662, "step": 36631 }, { "epoch": 0.6696035242290749, "grad_norm": 6.552778301916481, "learning_rate": 2.5998995397822536e-06, "loss": 17.5397, "step": 36632 }, { "epoch": 0.6696218034255215, "grad_norm": 5.363186781203856, "learning_rate": 2.599639865648386e-06, "loss": 17.0838, "step": 36633 }, { "epoch": 0.669640082621968, "grad_norm": 12.602659174583568, "learning_rate": 2.599380199927571e-06, "loss": 17.6166, "step": 36634 }, { "epoch": 0.6696583618184144, "grad_norm": 6.520379147736286, "learning_rate": 2.5991205426207193e-06, "loss": 17.6692, "step": 36635 }, { "epoch": 0.669676641014861, "grad_norm": 6.22695538777536, "learning_rate": 2.598860893728744e-06, "loss": 17.5839, "step": 36636 }, { "epoch": 0.6696949202113075, "grad_norm": 6.933888476222875, "learning_rate": 2.598601253252553e-06, "loss": 17.558, "step": 36637 }, { "epoch": 0.6697131994077541, "grad_norm": 5.864818326636437, "learning_rate": 2.598341621193054e-06, "loss": 17.1627, "step": 36638 }, { "epoch": 0.6697314786042006, "grad_norm": 4.998130977192725, "learning_rate": 2.5980819975511618e-06, "loss": 17.0595, "step": 36639 }, { "epoch": 0.669749757800647, "grad_norm": 5.480584102636354, "learning_rate": 2.5978223823277827e-06, "loss": 17.1356, "step": 36640 }, { "epoch": 0.6697680369970936, "grad_norm": 5.731828552205191, "learning_rate": 2.5975627755238264e-06, "loss": 17.2967, "step": 36641 }, { "epoch": 0.6697863161935401, "grad_norm": 6.5353708773435555, "learning_rate": 2.5973031771402065e-06, "loss": 17.1461, "step": 36642 }, { "epoch": 0.6698045953899866, "grad_norm": 6.160216376954919, "learning_rate": 2.5970435871778275e-06, "loss": 17.6222, "step": 36643 }, { "epoch": 0.6698228745864332, "grad_norm": 5.420717178812959, "learning_rate": 2.596784005637603e-06, "loss": 17.38, "step": 36644 }, { "epoch": 0.6698411537828797, "grad_norm": 5.518347078425771, "learning_rate": 2.5965244325204435e-06, "loss": 16.9279, "step": 36645 }, { "epoch": 0.6698594329793263, "grad_norm": 9.250588843330833, "learning_rate": 2.5962648678272557e-06, "loss": 18.0519, "step": 36646 }, { "epoch": 0.6698777121757727, "grad_norm": 7.972475798614765, "learning_rate": 2.596005311558953e-06, "loss": 18.4197, "step": 36647 }, { "epoch": 0.6698959913722192, "grad_norm": 5.019130730144295, "learning_rate": 2.595745763716443e-06, "loss": 16.8942, "step": 36648 }, { "epoch": 0.6699142705686658, "grad_norm": 11.400899014358957, "learning_rate": 2.5954862243006327e-06, "loss": 17.9206, "step": 36649 }, { "epoch": 0.6699325497651123, "grad_norm": 7.439851382550326, "learning_rate": 2.595226693312437e-06, "loss": 17.8808, "step": 36650 }, { "epoch": 0.6699508289615589, "grad_norm": 7.292939701720343, "learning_rate": 2.5949671707527614e-06, "loss": 18.0916, "step": 36651 }, { "epoch": 0.6699691081580054, "grad_norm": 7.154031157751783, "learning_rate": 2.5947076566225194e-06, "loss": 17.92, "step": 36652 }, { "epoch": 0.6699873873544518, "grad_norm": 7.0548090508205465, "learning_rate": 2.594448150922616e-06, "loss": 17.6777, "step": 36653 }, { "epoch": 0.6700056665508984, "grad_norm": 6.14645781573305, "learning_rate": 2.5941886536539652e-06, "loss": 17.568, "step": 36654 }, { "epoch": 0.6700239457473449, "grad_norm": 7.324822688700705, "learning_rate": 2.5939291648174713e-06, "loss": 17.955, "step": 36655 }, { "epoch": 0.6700422249437915, "grad_norm": 7.764518925058317, "learning_rate": 2.59366968441405e-06, "loss": 17.6229, "step": 36656 }, { "epoch": 0.670060504140238, "grad_norm": 6.4794961842612215, "learning_rate": 2.5934102124446074e-06, "loss": 17.5092, "step": 36657 }, { "epoch": 0.6700787833366845, "grad_norm": 7.466791649855707, "learning_rate": 2.5931507489100514e-06, "loss": 17.8582, "step": 36658 }, { "epoch": 0.6700970625331311, "grad_norm": 5.723299021524427, "learning_rate": 2.5928912938112948e-06, "loss": 16.9185, "step": 36659 }, { "epoch": 0.6701153417295775, "grad_norm": 5.051201504742469, "learning_rate": 2.592631847149244e-06, "loss": 16.9434, "step": 36660 }, { "epoch": 0.6701336209260241, "grad_norm": 5.873993299194282, "learning_rate": 2.5923724089248094e-06, "loss": 17.2777, "step": 36661 }, { "epoch": 0.6701519001224706, "grad_norm": 5.631073349077409, "learning_rate": 2.5921129791389023e-06, "loss": 17.147, "step": 36662 }, { "epoch": 0.6701701793189171, "grad_norm": 6.209384550195252, "learning_rate": 2.5918535577924282e-06, "loss": 17.1925, "step": 36663 }, { "epoch": 0.6701884585153637, "grad_norm": 6.275490952363238, "learning_rate": 2.5915941448863004e-06, "loss": 17.1416, "step": 36664 }, { "epoch": 0.6702067377118102, "grad_norm": 7.164179282803419, "learning_rate": 2.5913347404214263e-06, "loss": 17.6576, "step": 36665 }, { "epoch": 0.6702250169082568, "grad_norm": 6.196662080246762, "learning_rate": 2.5910753443987124e-06, "loss": 17.3749, "step": 36666 }, { "epoch": 0.6702432961047032, "grad_norm": 5.1841958252270155, "learning_rate": 2.5908159568190733e-06, "loss": 16.9716, "step": 36667 }, { "epoch": 0.6702615753011497, "grad_norm": 7.1137561549272945, "learning_rate": 2.5905565776834147e-06, "loss": 17.6041, "step": 36668 }, { "epoch": 0.6702798544975963, "grad_norm": 5.6497697877584825, "learning_rate": 2.5902972069926445e-06, "loss": 17.1216, "step": 36669 }, { "epoch": 0.6702981336940428, "grad_norm": 5.9533488116952995, "learning_rate": 2.5900378447476733e-06, "loss": 17.1153, "step": 36670 }, { "epoch": 0.6703164128904894, "grad_norm": 5.965601037905227, "learning_rate": 2.5897784909494124e-06, "loss": 17.4156, "step": 36671 }, { "epoch": 0.6703346920869359, "grad_norm": 8.219085631705669, "learning_rate": 2.5895191455987666e-06, "loss": 17.8404, "step": 36672 }, { "epoch": 0.6703529712833823, "grad_norm": 6.384733192227994, "learning_rate": 2.5892598086966493e-06, "loss": 17.565, "step": 36673 }, { "epoch": 0.6703712504798289, "grad_norm": 5.173743217072475, "learning_rate": 2.589000480243966e-06, "loss": 17.0545, "step": 36674 }, { "epoch": 0.6703895296762754, "grad_norm": 6.158049574363376, "learning_rate": 2.5887411602416264e-06, "loss": 17.1257, "step": 36675 }, { "epoch": 0.670407808872722, "grad_norm": 6.385136587514823, "learning_rate": 2.588481848690541e-06, "loss": 17.2583, "step": 36676 }, { "epoch": 0.6704260880691685, "grad_norm": 5.589761595636303, "learning_rate": 2.5882225455916157e-06, "loss": 17.1194, "step": 36677 }, { "epoch": 0.670444367265615, "grad_norm": 5.239790060576132, "learning_rate": 2.587963250945763e-06, "loss": 16.9768, "step": 36678 }, { "epoch": 0.6704626464620616, "grad_norm": 5.549270501044057, "learning_rate": 2.5877039647538875e-06, "loss": 17.252, "step": 36679 }, { "epoch": 0.670480925658508, "grad_norm": 5.803692340296212, "learning_rate": 2.5874446870169013e-06, "loss": 17.0969, "step": 36680 }, { "epoch": 0.6704992048549546, "grad_norm": 5.869748633254332, "learning_rate": 2.5871854177357124e-06, "loss": 17.2702, "step": 36681 }, { "epoch": 0.6705174840514011, "grad_norm": 8.014717591592337, "learning_rate": 2.586926156911231e-06, "loss": 17.9482, "step": 36682 }, { "epoch": 0.6705357632478476, "grad_norm": 7.752120470093901, "learning_rate": 2.586666904544361e-06, "loss": 17.1757, "step": 36683 }, { "epoch": 0.6705540424442942, "grad_norm": 7.52695201830491, "learning_rate": 2.5864076606360165e-06, "loss": 17.3495, "step": 36684 }, { "epoch": 0.6705723216407407, "grad_norm": 6.209578460266137, "learning_rate": 2.586148425187103e-06, "loss": 17.2523, "step": 36685 }, { "epoch": 0.6705906008371872, "grad_norm": 8.172862619650246, "learning_rate": 2.585889198198529e-06, "loss": 18.1715, "step": 36686 }, { "epoch": 0.6706088800336337, "grad_norm": 6.423831058210615, "learning_rate": 2.585629979671205e-06, "loss": 17.5867, "step": 36687 }, { "epoch": 0.6706271592300802, "grad_norm": 7.205543183216714, "learning_rate": 2.5853707696060364e-06, "loss": 17.3603, "step": 36688 }, { "epoch": 0.6706454384265268, "grad_norm": 6.08323686615381, "learning_rate": 2.585111568003934e-06, "loss": 17.4702, "step": 36689 }, { "epoch": 0.6706637176229733, "grad_norm": 7.313670327038206, "learning_rate": 2.584852374865808e-06, "loss": 17.4367, "step": 36690 }, { "epoch": 0.6706819968194199, "grad_norm": 5.517566842278805, "learning_rate": 2.5845931901925648e-06, "loss": 17.3082, "step": 36691 }, { "epoch": 0.6707002760158663, "grad_norm": 6.561205330915677, "learning_rate": 2.5843340139851108e-06, "loss": 17.4132, "step": 36692 }, { "epoch": 0.6707185552123128, "grad_norm": 6.499035105663697, "learning_rate": 2.584074846244359e-06, "loss": 17.6789, "step": 36693 }, { "epoch": 0.6707368344087594, "grad_norm": 8.647329943155551, "learning_rate": 2.5838156869712127e-06, "loss": 18.5379, "step": 36694 }, { "epoch": 0.6707551136052059, "grad_norm": 5.718989332660879, "learning_rate": 2.5835565361665847e-06, "loss": 17.1633, "step": 36695 }, { "epoch": 0.6707733928016525, "grad_norm": 6.478380088566007, "learning_rate": 2.583297393831381e-06, "loss": 17.6014, "step": 36696 }, { "epoch": 0.670791671998099, "grad_norm": 6.671135512768392, "learning_rate": 2.5830382599665087e-06, "loss": 17.5998, "step": 36697 }, { "epoch": 0.6708099511945455, "grad_norm": 6.267493255591823, "learning_rate": 2.582779134572878e-06, "loss": 17.2919, "step": 36698 }, { "epoch": 0.670828230390992, "grad_norm": 5.534226512461209, "learning_rate": 2.5825200176513975e-06, "loss": 17.4057, "step": 36699 }, { "epoch": 0.6708465095874385, "grad_norm": 6.012315229216652, "learning_rate": 2.582260909202974e-06, "loss": 17.1789, "step": 36700 }, { "epoch": 0.6708647887838851, "grad_norm": 5.7429520001277865, "learning_rate": 2.582001809228517e-06, "loss": 17.2259, "step": 36701 }, { "epoch": 0.6708830679803316, "grad_norm": 6.666009554297294, "learning_rate": 2.5817427177289346e-06, "loss": 17.1788, "step": 36702 }, { "epoch": 0.6709013471767781, "grad_norm": 7.70683251679176, "learning_rate": 2.581483634705132e-06, "loss": 17.819, "step": 36703 }, { "epoch": 0.6709196263732247, "grad_norm": 5.288620323171493, "learning_rate": 2.581224560158021e-06, "loss": 17.2673, "step": 36704 }, { "epoch": 0.6709379055696711, "grad_norm": 6.92874004028413, "learning_rate": 2.5809654940885064e-06, "loss": 17.7269, "step": 36705 }, { "epoch": 0.6709561847661177, "grad_norm": 6.225295073777119, "learning_rate": 2.5807064364975e-06, "loss": 17.1179, "step": 36706 }, { "epoch": 0.6709744639625642, "grad_norm": 7.338696255728691, "learning_rate": 2.5804473873859048e-06, "loss": 17.8616, "step": 36707 }, { "epoch": 0.6709927431590107, "grad_norm": 5.428605864733609, "learning_rate": 2.580188346754632e-06, "loss": 17.1869, "step": 36708 }, { "epoch": 0.6710110223554573, "grad_norm": 5.015188797724014, "learning_rate": 2.5799293146045913e-06, "loss": 16.8278, "step": 36709 }, { "epoch": 0.6710293015519038, "grad_norm": 5.754798027570977, "learning_rate": 2.5796702909366876e-06, "loss": 17.0021, "step": 36710 }, { "epoch": 0.6710475807483502, "grad_norm": 6.814084844318462, "learning_rate": 2.5794112757518287e-06, "loss": 17.6282, "step": 36711 }, { "epoch": 0.6710658599447968, "grad_norm": 6.916807206098972, "learning_rate": 2.5791522690509242e-06, "loss": 17.5166, "step": 36712 }, { "epoch": 0.6710841391412433, "grad_norm": 6.002813217879121, "learning_rate": 2.5788932708348813e-06, "loss": 17.4488, "step": 36713 }, { "epoch": 0.6711024183376899, "grad_norm": 7.497408417808758, "learning_rate": 2.5786342811046055e-06, "loss": 17.6321, "step": 36714 }, { "epoch": 0.6711206975341364, "grad_norm": 6.255876877039967, "learning_rate": 2.578375299861007e-06, "loss": 17.7751, "step": 36715 }, { "epoch": 0.6711389767305829, "grad_norm": 6.65989895598717, "learning_rate": 2.5781163271049935e-06, "loss": 17.7485, "step": 36716 }, { "epoch": 0.6711572559270295, "grad_norm": 4.476340128574464, "learning_rate": 2.5778573628374713e-06, "loss": 16.8721, "step": 36717 }, { "epoch": 0.6711755351234759, "grad_norm": 5.649194697808994, "learning_rate": 2.5775984070593503e-06, "loss": 17.0267, "step": 36718 }, { "epoch": 0.6711938143199225, "grad_norm": 6.922802541758868, "learning_rate": 2.577339459771537e-06, "loss": 17.8057, "step": 36719 }, { "epoch": 0.671212093516369, "grad_norm": 7.453468791847144, "learning_rate": 2.577080520974936e-06, "loss": 17.6422, "step": 36720 }, { "epoch": 0.6712303727128155, "grad_norm": 6.9941171354086435, "learning_rate": 2.5768215906704603e-06, "loss": 17.763, "step": 36721 }, { "epoch": 0.6712486519092621, "grad_norm": 5.9565540004036075, "learning_rate": 2.576562668859014e-06, "loss": 17.3037, "step": 36722 }, { "epoch": 0.6712669311057086, "grad_norm": 7.030036943894148, "learning_rate": 2.576303755541504e-06, "loss": 17.2732, "step": 36723 }, { "epoch": 0.6712852103021552, "grad_norm": 5.511843781781716, "learning_rate": 2.5760448507188386e-06, "loss": 17.1663, "step": 36724 }, { "epoch": 0.6713034894986016, "grad_norm": 8.80609284584758, "learning_rate": 2.5757859543919256e-06, "loss": 18.5883, "step": 36725 }, { "epoch": 0.6713217686950481, "grad_norm": 6.010705017770821, "learning_rate": 2.575527066561674e-06, "loss": 17.1476, "step": 36726 }, { "epoch": 0.6713400478914947, "grad_norm": 5.605998988714203, "learning_rate": 2.5752681872289907e-06, "loss": 17.0968, "step": 36727 }, { "epoch": 0.6713583270879412, "grad_norm": 6.974086552982958, "learning_rate": 2.57500931639478e-06, "loss": 18.0648, "step": 36728 }, { "epoch": 0.6713766062843878, "grad_norm": 6.2519089520797575, "learning_rate": 2.574750454059952e-06, "loss": 17.3628, "step": 36729 }, { "epoch": 0.6713948854808343, "grad_norm": 4.836960409619142, "learning_rate": 2.5744916002254138e-06, "loss": 16.7763, "step": 36730 }, { "epoch": 0.6714131646772807, "grad_norm": 5.073368866060099, "learning_rate": 2.5742327548920704e-06, "loss": 17.0482, "step": 36731 }, { "epoch": 0.6714314438737273, "grad_norm": 6.045120914320428, "learning_rate": 2.5739739180608325e-06, "loss": 17.2787, "step": 36732 }, { "epoch": 0.6714497230701738, "grad_norm": 5.591779348380349, "learning_rate": 2.5737150897326035e-06, "loss": 17.0385, "step": 36733 }, { "epoch": 0.6714680022666204, "grad_norm": 5.627590611647383, "learning_rate": 2.573456269908292e-06, "loss": 17.2644, "step": 36734 }, { "epoch": 0.6714862814630669, "grad_norm": 6.1524480496690686, "learning_rate": 2.5731974585888087e-06, "loss": 17.3303, "step": 36735 }, { "epoch": 0.6715045606595134, "grad_norm": 6.896981290786612, "learning_rate": 2.572938655775057e-06, "loss": 17.5243, "step": 36736 }, { "epoch": 0.67152283985596, "grad_norm": 7.123735425731771, "learning_rate": 2.5726798614679434e-06, "loss": 17.5738, "step": 36737 }, { "epoch": 0.6715411190524064, "grad_norm": 7.5012531388499415, "learning_rate": 2.5724210756683776e-06, "loss": 17.6913, "step": 36738 }, { "epoch": 0.671559398248853, "grad_norm": 6.958720692405543, "learning_rate": 2.5721622983772633e-06, "loss": 17.556, "step": 36739 }, { "epoch": 0.6715776774452995, "grad_norm": 6.098310864366372, "learning_rate": 2.571903529595512e-06, "loss": 17.5152, "step": 36740 }, { "epoch": 0.671595956641746, "grad_norm": 5.28956781900038, "learning_rate": 2.5716447693240275e-06, "loss": 17.0984, "step": 36741 }, { "epoch": 0.6716142358381926, "grad_norm": 7.052017716713513, "learning_rate": 2.5713860175637157e-06, "loss": 17.7278, "step": 36742 }, { "epoch": 0.671632515034639, "grad_norm": 8.890587063934046, "learning_rate": 2.5711272743154853e-06, "loss": 18.2875, "step": 36743 }, { "epoch": 0.6716507942310856, "grad_norm": 6.236962213609414, "learning_rate": 2.570868539580245e-06, "loss": 17.3166, "step": 36744 }, { "epoch": 0.6716690734275321, "grad_norm": 6.123654265684495, "learning_rate": 2.5706098133588975e-06, "loss": 17.8011, "step": 36745 }, { "epoch": 0.6716873526239786, "grad_norm": 6.223355740769742, "learning_rate": 2.5703510956523536e-06, "loss": 17.3456, "step": 36746 }, { "epoch": 0.6717056318204252, "grad_norm": 5.637898124832896, "learning_rate": 2.5700923864615185e-06, "loss": 17.362, "step": 36747 }, { "epoch": 0.6717239110168717, "grad_norm": 7.0755686797425446, "learning_rate": 2.5698336857872964e-06, "loss": 17.7318, "step": 36748 }, { "epoch": 0.6717421902133183, "grad_norm": 6.750364250246587, "learning_rate": 2.5695749936305993e-06, "loss": 17.6165, "step": 36749 }, { "epoch": 0.6717604694097647, "grad_norm": 6.833095548554574, "learning_rate": 2.5693163099923303e-06, "loss": 17.7207, "step": 36750 }, { "epoch": 0.6717787486062112, "grad_norm": 6.207955746788913, "learning_rate": 2.5690576348733943e-06, "loss": 17.4296, "step": 36751 }, { "epoch": 0.6717970278026578, "grad_norm": 6.284097129110869, "learning_rate": 2.5687989682747003e-06, "loss": 17.5009, "step": 36752 }, { "epoch": 0.6718153069991043, "grad_norm": 5.909645935713016, "learning_rate": 2.568540310197156e-06, "loss": 17.3833, "step": 36753 }, { "epoch": 0.6718335861955509, "grad_norm": 6.77631545776497, "learning_rate": 2.568281660641668e-06, "loss": 17.5835, "step": 36754 }, { "epoch": 0.6718518653919974, "grad_norm": 5.798033177779246, "learning_rate": 2.568023019609142e-06, "loss": 17.2899, "step": 36755 }, { "epoch": 0.6718701445884439, "grad_norm": 11.147837978378636, "learning_rate": 2.5677643871004826e-06, "loss": 19.4172, "step": 36756 }, { "epoch": 0.6718884237848904, "grad_norm": 6.411754772559427, "learning_rate": 2.5675057631166003e-06, "loss": 17.4221, "step": 36757 }, { "epoch": 0.6719067029813369, "grad_norm": 4.638020937593502, "learning_rate": 2.567247147658398e-06, "loss": 16.9709, "step": 36758 }, { "epoch": 0.6719249821777835, "grad_norm": 6.265949792672445, "learning_rate": 2.566988540726782e-06, "loss": 17.5271, "step": 36759 }, { "epoch": 0.67194326137423, "grad_norm": 5.604681961719002, "learning_rate": 2.5667299423226623e-06, "loss": 17.1286, "step": 36760 }, { "epoch": 0.6719615405706765, "grad_norm": 6.3958709134959735, "learning_rate": 2.566471352446941e-06, "loss": 17.6337, "step": 36761 }, { "epoch": 0.6719798197671231, "grad_norm": 5.404380818644699, "learning_rate": 2.566212771100526e-06, "loss": 16.904, "step": 36762 }, { "epoch": 0.6719980989635695, "grad_norm": 7.30236404106507, "learning_rate": 2.5659541982843263e-06, "loss": 17.9318, "step": 36763 }, { "epoch": 0.6720163781600161, "grad_norm": 7.810069148951523, "learning_rate": 2.5656956339992456e-06, "loss": 17.5308, "step": 36764 }, { "epoch": 0.6720346573564626, "grad_norm": 5.459329237343659, "learning_rate": 2.5654370782461885e-06, "loss": 17.2933, "step": 36765 }, { "epoch": 0.6720529365529091, "grad_norm": 6.8926517332793615, "learning_rate": 2.565178531026066e-06, "loss": 17.4374, "step": 36766 }, { "epoch": 0.6720712157493557, "grad_norm": 5.395447466244793, "learning_rate": 2.5649199923397804e-06, "loss": 16.943, "step": 36767 }, { "epoch": 0.6720894949458022, "grad_norm": 5.717224804589894, "learning_rate": 2.564661462188237e-06, "loss": 16.8688, "step": 36768 }, { "epoch": 0.6721077741422488, "grad_norm": 5.508114645999943, "learning_rate": 2.5644029405723466e-06, "loss": 17.1179, "step": 36769 }, { "epoch": 0.6721260533386952, "grad_norm": 7.487585626878919, "learning_rate": 2.56414442749301e-06, "loss": 17.3319, "step": 36770 }, { "epoch": 0.6721443325351417, "grad_norm": 5.457591936723361, "learning_rate": 2.563885922951136e-06, "loss": 17.1502, "step": 36771 }, { "epoch": 0.6721626117315883, "grad_norm": 6.392186792316984, "learning_rate": 2.5636274269476326e-06, "loss": 17.6303, "step": 36772 }, { "epoch": 0.6721808909280348, "grad_norm": 7.039264491668809, "learning_rate": 2.5633689394834013e-06, "loss": 17.8102, "step": 36773 }, { "epoch": 0.6721991701244814, "grad_norm": 5.865691798272356, "learning_rate": 2.5631104605593525e-06, "loss": 17.3686, "step": 36774 }, { "epoch": 0.6722174493209279, "grad_norm": 5.400874923857863, "learning_rate": 2.5628519901763906e-06, "loss": 16.9614, "step": 36775 }, { "epoch": 0.6722357285173743, "grad_norm": 6.626622633867875, "learning_rate": 2.562593528335418e-06, "loss": 17.5943, "step": 36776 }, { "epoch": 0.6722540077138209, "grad_norm": 8.20898588197229, "learning_rate": 2.562335075037346e-06, "loss": 17.4831, "step": 36777 }, { "epoch": 0.6722722869102674, "grad_norm": 5.5082354920081755, "learning_rate": 2.5620766302830764e-06, "loss": 17.2021, "step": 36778 }, { "epoch": 0.6722905661067139, "grad_norm": 6.391715796988589, "learning_rate": 2.5618181940735164e-06, "loss": 17.3599, "step": 36779 }, { "epoch": 0.6723088453031605, "grad_norm": 7.250769588537359, "learning_rate": 2.5615597664095736e-06, "loss": 17.7071, "step": 36780 }, { "epoch": 0.672327124499607, "grad_norm": 6.489797961619284, "learning_rate": 2.5613013472921534e-06, "loss": 17.584, "step": 36781 }, { "epoch": 0.6723454036960536, "grad_norm": 4.912615770860211, "learning_rate": 2.5610429367221574e-06, "loss": 17.0095, "step": 36782 }, { "epoch": 0.6723636828925, "grad_norm": 7.116658799691163, "learning_rate": 2.560784534700496e-06, "loss": 17.8767, "step": 36783 }, { "epoch": 0.6723819620889465, "grad_norm": 6.583121443400043, "learning_rate": 2.560526141228072e-06, "loss": 17.8588, "step": 36784 }, { "epoch": 0.6724002412853931, "grad_norm": 4.753745153111234, "learning_rate": 2.5602677563057935e-06, "loss": 16.8825, "step": 36785 }, { "epoch": 0.6724185204818396, "grad_norm": 6.9538763408177555, "learning_rate": 2.5600093799345648e-06, "loss": 17.8894, "step": 36786 }, { "epoch": 0.6724367996782862, "grad_norm": 6.292608160010988, "learning_rate": 2.5597510121152903e-06, "loss": 17.6813, "step": 36787 }, { "epoch": 0.6724550788747327, "grad_norm": 6.1286490304471055, "learning_rate": 2.5594926528488763e-06, "loss": 17.5288, "step": 36788 }, { "epoch": 0.6724733580711791, "grad_norm": 5.630762706357102, "learning_rate": 2.559234302136231e-06, "loss": 17.0164, "step": 36789 }, { "epoch": 0.6724916372676257, "grad_norm": 6.719538254822908, "learning_rate": 2.558975959978255e-06, "loss": 17.76, "step": 36790 }, { "epoch": 0.6725099164640722, "grad_norm": 7.198101038068982, "learning_rate": 2.558717626375859e-06, "loss": 17.5133, "step": 36791 }, { "epoch": 0.6725281956605188, "grad_norm": 6.092337432991891, "learning_rate": 2.558459301329946e-06, "loss": 17.7537, "step": 36792 }, { "epoch": 0.6725464748569653, "grad_norm": 6.3748195733305115, "learning_rate": 2.558200984841419e-06, "loss": 17.4069, "step": 36793 }, { "epoch": 0.6725647540534118, "grad_norm": 6.254786927205425, "learning_rate": 2.557942676911187e-06, "loss": 17.4959, "step": 36794 }, { "epoch": 0.6725830332498584, "grad_norm": 5.300997699381162, "learning_rate": 2.557684377540155e-06, "loss": 17.1127, "step": 36795 }, { "epoch": 0.6726013124463048, "grad_norm": 7.12535179319831, "learning_rate": 2.557426086729225e-06, "loss": 17.8554, "step": 36796 }, { "epoch": 0.6726195916427514, "grad_norm": 5.371979902328699, "learning_rate": 2.5571678044793045e-06, "loss": 17.0161, "step": 36797 }, { "epoch": 0.6726378708391979, "grad_norm": 6.008972587729337, "learning_rate": 2.5569095307913012e-06, "loss": 17.1195, "step": 36798 }, { "epoch": 0.6726561500356444, "grad_norm": 6.61655147345642, "learning_rate": 2.5566512656661158e-06, "loss": 17.484, "step": 36799 }, { "epoch": 0.672674429232091, "grad_norm": 7.25350596510805, "learning_rate": 2.5563930091046574e-06, "loss": 16.7754, "step": 36800 }, { "epoch": 0.6726927084285375, "grad_norm": 4.738269643682905, "learning_rate": 2.556134761107828e-06, "loss": 16.7544, "step": 36801 }, { "epoch": 0.672710987624984, "grad_norm": 7.286488178243032, "learning_rate": 2.5558765216765356e-06, "loss": 17.5255, "step": 36802 }, { "epoch": 0.6727292668214305, "grad_norm": 6.744796663143347, "learning_rate": 2.5556182908116844e-06, "loss": 17.4806, "step": 36803 }, { "epoch": 0.672747546017877, "grad_norm": 5.461818682499335, "learning_rate": 2.5553600685141766e-06, "loss": 16.9793, "step": 36804 }, { "epoch": 0.6727658252143236, "grad_norm": 5.94202401421077, "learning_rate": 2.5551018547849218e-06, "loss": 17.3428, "step": 36805 }, { "epoch": 0.6727841044107701, "grad_norm": 5.835962876098419, "learning_rate": 2.554843649624821e-06, "loss": 17.2273, "step": 36806 }, { "epoch": 0.6728023836072167, "grad_norm": 6.616452751729782, "learning_rate": 2.5545854530347806e-06, "loss": 17.3113, "step": 36807 }, { "epoch": 0.6728206628036631, "grad_norm": 7.175218867399821, "learning_rate": 2.554327265015708e-06, "loss": 17.6665, "step": 36808 }, { "epoch": 0.6728389420001096, "grad_norm": 7.243619902010087, "learning_rate": 2.5540690855685067e-06, "loss": 18.0961, "step": 36809 }, { "epoch": 0.6728572211965562, "grad_norm": 7.304289091829536, "learning_rate": 2.5538109146940787e-06, "loss": 17.6297, "step": 36810 }, { "epoch": 0.6728755003930027, "grad_norm": 7.513980272097604, "learning_rate": 2.5535527523933334e-06, "loss": 17.546, "step": 36811 }, { "epoch": 0.6728937795894493, "grad_norm": 8.844592008273708, "learning_rate": 2.553294598667173e-06, "loss": 18.5148, "step": 36812 }, { "epoch": 0.6729120587858958, "grad_norm": 6.4950407585908065, "learning_rate": 2.5530364535165007e-06, "loss": 17.1565, "step": 36813 }, { "epoch": 0.6729303379823423, "grad_norm": 6.627177219409077, "learning_rate": 2.5527783169422255e-06, "loss": 17.2575, "step": 36814 }, { "epoch": 0.6729486171787888, "grad_norm": 6.328090199461282, "learning_rate": 2.5525201889452478e-06, "loss": 17.7202, "step": 36815 }, { "epoch": 0.6729668963752353, "grad_norm": 5.587091809122538, "learning_rate": 2.5522620695264742e-06, "loss": 16.8598, "step": 36816 }, { "epoch": 0.6729851755716819, "grad_norm": 6.029542668370447, "learning_rate": 2.552003958686813e-06, "loss": 17.5195, "step": 36817 }, { "epoch": 0.6730034547681284, "grad_norm": 6.829292398052803, "learning_rate": 2.551745856427162e-06, "loss": 17.4861, "step": 36818 }, { "epoch": 0.6730217339645749, "grad_norm": 6.668153778138426, "learning_rate": 2.551487762748432e-06, "loss": 17.4671, "step": 36819 }, { "epoch": 0.6730400131610215, "grad_norm": 6.684665949424571, "learning_rate": 2.5512296776515244e-06, "loss": 17.3819, "step": 36820 }, { "epoch": 0.673058292357468, "grad_norm": 7.135728025047189, "learning_rate": 2.550971601137343e-06, "loss": 17.8264, "step": 36821 }, { "epoch": 0.6730765715539145, "grad_norm": 6.400096684936857, "learning_rate": 2.550713533206795e-06, "loss": 17.4583, "step": 36822 }, { "epoch": 0.673094850750361, "grad_norm": 5.183049346302901, "learning_rate": 2.5504554738607834e-06, "loss": 16.9221, "step": 36823 }, { "epoch": 0.6731131299468075, "grad_norm": 6.837041848118584, "learning_rate": 2.5501974231002114e-06, "loss": 17.636, "step": 36824 }, { "epoch": 0.6731314091432541, "grad_norm": 8.467713765205993, "learning_rate": 2.549939380925985e-06, "loss": 17.3297, "step": 36825 }, { "epoch": 0.6731496883397006, "grad_norm": 5.77482724589513, "learning_rate": 2.54968134733901e-06, "loss": 17.3168, "step": 36826 }, { "epoch": 0.6731679675361472, "grad_norm": 6.703622013736188, "learning_rate": 2.5494233223401873e-06, "loss": 17.5911, "step": 36827 }, { "epoch": 0.6731862467325936, "grad_norm": 5.311369517286433, "learning_rate": 2.549165305930425e-06, "loss": 16.9365, "step": 36828 }, { "epoch": 0.6732045259290401, "grad_norm": 5.419417854375535, "learning_rate": 2.5489072981106257e-06, "loss": 17.2136, "step": 36829 }, { "epoch": 0.6732228051254867, "grad_norm": 5.281556890025347, "learning_rate": 2.5486492988816926e-06, "loss": 16.8122, "step": 36830 }, { "epoch": 0.6732410843219332, "grad_norm": 5.2613505531026235, "learning_rate": 2.5483913082445315e-06, "loss": 17.0381, "step": 36831 }, { "epoch": 0.6732593635183798, "grad_norm": 6.71046054305781, "learning_rate": 2.5481333262000452e-06, "loss": 17.5272, "step": 36832 }, { "epoch": 0.6732776427148263, "grad_norm": 5.086143576008314, "learning_rate": 2.5478753527491402e-06, "loss": 16.8692, "step": 36833 }, { "epoch": 0.6732959219112727, "grad_norm": 5.9344022828032, "learning_rate": 2.5476173878927173e-06, "loss": 17.6172, "step": 36834 }, { "epoch": 0.6733142011077193, "grad_norm": 4.9758843716864956, "learning_rate": 2.547359431631683e-06, "loss": 17.0131, "step": 36835 }, { "epoch": 0.6733324803041658, "grad_norm": 6.7854709192676275, "learning_rate": 2.547101483966943e-06, "loss": 17.6373, "step": 36836 }, { "epoch": 0.6733507595006124, "grad_norm": 6.112582289441476, "learning_rate": 2.5468435448993994e-06, "loss": 17.1597, "step": 36837 }, { "epoch": 0.6733690386970589, "grad_norm": 5.968142703804941, "learning_rate": 2.546585614429954e-06, "loss": 17.307, "step": 36838 }, { "epoch": 0.6733873178935054, "grad_norm": 7.199807388870573, "learning_rate": 2.546327692559516e-06, "loss": 17.7424, "step": 36839 }, { "epoch": 0.673405597089952, "grad_norm": 5.928311980631858, "learning_rate": 2.5460697792889854e-06, "loss": 17.1308, "step": 36840 }, { "epoch": 0.6734238762863984, "grad_norm": 6.252346739023567, "learning_rate": 2.545811874619266e-06, "loss": 17.5353, "step": 36841 }, { "epoch": 0.673442155482845, "grad_norm": 6.549214580993503, "learning_rate": 2.545553978551263e-06, "loss": 17.421, "step": 36842 }, { "epoch": 0.6734604346792915, "grad_norm": 6.133644286807443, "learning_rate": 2.545296091085882e-06, "loss": 17.3143, "step": 36843 }, { "epoch": 0.673478713875738, "grad_norm": 5.04496115214341, "learning_rate": 2.5450382122240236e-06, "loss": 16.9492, "step": 36844 }, { "epoch": 0.6734969930721846, "grad_norm": 4.840334415584219, "learning_rate": 2.5447803419665945e-06, "loss": 16.9103, "step": 36845 }, { "epoch": 0.6735152722686311, "grad_norm": 6.203534956399712, "learning_rate": 2.544522480314496e-06, "loss": 17.5429, "step": 36846 }, { "epoch": 0.6735335514650775, "grad_norm": 5.970766331526337, "learning_rate": 2.5442646272686344e-06, "loss": 17.2762, "step": 36847 }, { "epoch": 0.6735518306615241, "grad_norm": 6.080249931472712, "learning_rate": 2.544006782829913e-06, "loss": 17.5737, "step": 36848 }, { "epoch": 0.6735701098579706, "grad_norm": 7.075423932198839, "learning_rate": 2.5437489469992327e-06, "loss": 17.617, "step": 36849 }, { "epoch": 0.6735883890544172, "grad_norm": 6.774524500521785, "learning_rate": 2.5434911197775004e-06, "loss": 17.6255, "step": 36850 }, { "epoch": 0.6736066682508637, "grad_norm": 6.163634826404718, "learning_rate": 2.5432333011656174e-06, "loss": 17.3699, "step": 36851 }, { "epoch": 0.6736249474473102, "grad_norm": 6.315235105426331, "learning_rate": 2.5429754911644876e-06, "loss": 17.5048, "step": 36852 }, { "epoch": 0.6736432266437568, "grad_norm": 6.1791271529421925, "learning_rate": 2.542717689775018e-06, "loss": 17.2094, "step": 36853 }, { "epoch": 0.6736615058402032, "grad_norm": 11.033026768657303, "learning_rate": 2.5424598969981098e-06, "loss": 17.9425, "step": 36854 }, { "epoch": 0.6736797850366498, "grad_norm": 6.505655839831611, "learning_rate": 2.542202112834664e-06, "loss": 17.6007, "step": 36855 }, { "epoch": 0.6736980642330963, "grad_norm": 6.418739008457251, "learning_rate": 2.541944337285589e-06, "loss": 17.5346, "step": 36856 }, { "epoch": 0.6737163434295428, "grad_norm": 6.49949153793642, "learning_rate": 2.541686570351786e-06, "loss": 17.5071, "step": 36857 }, { "epoch": 0.6737346226259894, "grad_norm": 4.925410967425936, "learning_rate": 2.541428812034155e-06, "loss": 16.9736, "step": 36858 }, { "epoch": 0.6737529018224359, "grad_norm": 5.951258745650909, "learning_rate": 2.5411710623336054e-06, "loss": 17.2686, "step": 36859 }, { "epoch": 0.6737711810188824, "grad_norm": 5.75613595172683, "learning_rate": 2.540913321251036e-06, "loss": 17.0719, "step": 36860 }, { "epoch": 0.6737894602153289, "grad_norm": 5.100284515783923, "learning_rate": 2.5406555887873518e-06, "loss": 16.8006, "step": 36861 }, { "epoch": 0.6738077394117754, "grad_norm": 6.657508919321322, "learning_rate": 2.540397864943459e-06, "loss": 17.675, "step": 36862 }, { "epoch": 0.673826018608222, "grad_norm": 7.139122029725639, "learning_rate": 2.540140149720256e-06, "loss": 17.5075, "step": 36863 }, { "epoch": 0.6738442978046685, "grad_norm": 6.195441345487071, "learning_rate": 2.53988244311865e-06, "loss": 17.304, "step": 36864 }, { "epoch": 0.6738625770011151, "grad_norm": 5.986271874158885, "learning_rate": 2.5396247451395424e-06, "loss": 17.315, "step": 36865 }, { "epoch": 0.6738808561975616, "grad_norm": 6.0900346874454705, "learning_rate": 2.5393670557838348e-06, "loss": 17.0818, "step": 36866 }, { "epoch": 0.673899135394008, "grad_norm": 5.020860619599979, "learning_rate": 2.539109375052434e-06, "loss": 16.9734, "step": 36867 }, { "epoch": 0.6739174145904546, "grad_norm": 5.684996926533359, "learning_rate": 2.538851702946241e-06, "loss": 17.3124, "step": 36868 }, { "epoch": 0.6739356937869011, "grad_norm": 5.893572404035921, "learning_rate": 2.538594039466158e-06, "loss": 17.2602, "step": 36869 }, { "epoch": 0.6739539729833477, "grad_norm": 5.173908744182702, "learning_rate": 2.5383363846130894e-06, "loss": 16.9123, "step": 36870 }, { "epoch": 0.6739722521797942, "grad_norm": 4.727513775351568, "learning_rate": 2.538078738387939e-06, "loss": 16.7352, "step": 36871 }, { "epoch": 0.6739905313762407, "grad_norm": 6.4859339511219485, "learning_rate": 2.537821100791608e-06, "loss": 17.7833, "step": 36872 }, { "epoch": 0.6740088105726872, "grad_norm": 6.801996977697779, "learning_rate": 2.537563471825002e-06, "loss": 17.5314, "step": 36873 }, { "epoch": 0.6740270897691337, "grad_norm": 5.485856336125959, "learning_rate": 2.5373058514890226e-06, "loss": 17.1773, "step": 36874 }, { "epoch": 0.6740453689655803, "grad_norm": 5.904080670555492, "learning_rate": 2.5370482397845704e-06, "loss": 16.9988, "step": 36875 }, { "epoch": 0.6740636481620268, "grad_norm": 6.256382249306257, "learning_rate": 2.5367906367125524e-06, "loss": 17.546, "step": 36876 }, { "epoch": 0.6740819273584733, "grad_norm": 9.397427864039933, "learning_rate": 2.536533042273867e-06, "loss": 17.3744, "step": 36877 }, { "epoch": 0.6741002065549199, "grad_norm": 9.89298715236295, "learning_rate": 2.536275456469422e-06, "loss": 18.2416, "step": 36878 }, { "epoch": 0.6741184857513663, "grad_norm": 6.453446333580657, "learning_rate": 2.5360178793001157e-06, "loss": 17.3722, "step": 36879 }, { "epoch": 0.6741367649478129, "grad_norm": 5.7339379021639765, "learning_rate": 2.5357603107668525e-06, "loss": 17.0971, "step": 36880 }, { "epoch": 0.6741550441442594, "grad_norm": 5.816470987647504, "learning_rate": 2.5355027508705383e-06, "loss": 17.3474, "step": 36881 }, { "epoch": 0.6741733233407059, "grad_norm": 5.3309448425250405, "learning_rate": 2.5352451996120733e-06, "loss": 17.2356, "step": 36882 }, { "epoch": 0.6741916025371525, "grad_norm": 7.365707423838416, "learning_rate": 2.534987656992357e-06, "loss": 17.5595, "step": 36883 }, { "epoch": 0.674209881733599, "grad_norm": 6.895512797525457, "learning_rate": 2.5347301230122974e-06, "loss": 17.491, "step": 36884 }, { "epoch": 0.6742281609300456, "grad_norm": 6.960637882261122, "learning_rate": 2.5344725976727945e-06, "loss": 17.9175, "step": 36885 }, { "epoch": 0.674246440126492, "grad_norm": 9.798746056627476, "learning_rate": 2.53421508097475e-06, "loss": 18.3372, "step": 36886 }, { "epoch": 0.6742647193229385, "grad_norm": 5.317936448019001, "learning_rate": 2.533957572919069e-06, "loss": 17.2362, "step": 36887 }, { "epoch": 0.6742829985193851, "grad_norm": 5.782668614378546, "learning_rate": 2.5337000735066508e-06, "loss": 17.4218, "step": 36888 }, { "epoch": 0.6743012777158316, "grad_norm": 4.726742165933915, "learning_rate": 2.5334425827384e-06, "loss": 16.8828, "step": 36889 }, { "epoch": 0.6743195569122782, "grad_norm": 6.100485854482937, "learning_rate": 2.5331851006152207e-06, "loss": 17.1622, "step": 36890 }, { "epoch": 0.6743378361087247, "grad_norm": 6.553087497546268, "learning_rate": 2.532927627138014e-06, "loss": 17.3821, "step": 36891 }, { "epoch": 0.6743561153051711, "grad_norm": 6.019656339763372, "learning_rate": 2.53267016230768e-06, "loss": 17.6902, "step": 36892 }, { "epoch": 0.6743743945016177, "grad_norm": 6.897698363119377, "learning_rate": 2.532412706125124e-06, "loss": 17.7538, "step": 36893 }, { "epoch": 0.6743926736980642, "grad_norm": 5.85348738717179, "learning_rate": 2.532155258591246e-06, "loss": 17.1941, "step": 36894 }, { "epoch": 0.6744109528945108, "grad_norm": 6.308450708582062, "learning_rate": 2.531897819706952e-06, "loss": 17.6031, "step": 36895 }, { "epoch": 0.6744292320909573, "grad_norm": 6.133283088349549, "learning_rate": 2.531640389473141e-06, "loss": 17.5, "step": 36896 }, { "epoch": 0.6744475112874038, "grad_norm": 6.008190410592454, "learning_rate": 2.5313829678907155e-06, "loss": 17.4123, "step": 36897 }, { "epoch": 0.6744657904838504, "grad_norm": 6.680177106587713, "learning_rate": 2.5311255549605772e-06, "loss": 17.4584, "step": 36898 }, { "epoch": 0.6744840696802968, "grad_norm": 7.21646706349992, "learning_rate": 2.5308681506836323e-06, "loss": 17.9158, "step": 36899 }, { "epoch": 0.6745023488767434, "grad_norm": 5.655358712710966, "learning_rate": 2.5306107550607785e-06, "loss": 17.3059, "step": 36900 }, { "epoch": 0.6745206280731899, "grad_norm": 6.133192655574431, "learning_rate": 2.5303533680929214e-06, "loss": 17.4592, "step": 36901 }, { "epoch": 0.6745389072696364, "grad_norm": 5.543706219046697, "learning_rate": 2.5300959897809613e-06, "loss": 17.0618, "step": 36902 }, { "epoch": 0.674557186466083, "grad_norm": 5.146576965458228, "learning_rate": 2.5298386201257986e-06, "loss": 17.1356, "step": 36903 }, { "epoch": 0.6745754656625295, "grad_norm": 5.490496111213454, "learning_rate": 2.529581259128339e-06, "loss": 17.0872, "step": 36904 }, { "epoch": 0.674593744858976, "grad_norm": 4.954499249346568, "learning_rate": 2.529323906789481e-06, "loss": 17.027, "step": 36905 }, { "epoch": 0.6746120240554225, "grad_norm": 6.894420617119195, "learning_rate": 2.529066563110128e-06, "loss": 17.7167, "step": 36906 }, { "epoch": 0.674630303251869, "grad_norm": 5.9259007236161825, "learning_rate": 2.5288092280911846e-06, "loss": 17.3647, "step": 36907 }, { "epoch": 0.6746485824483156, "grad_norm": 6.427088873900386, "learning_rate": 2.528551901733548e-06, "loss": 17.4036, "step": 36908 }, { "epoch": 0.6746668616447621, "grad_norm": 6.454886128869648, "learning_rate": 2.5282945840381244e-06, "loss": 17.5651, "step": 36909 }, { "epoch": 0.6746851408412087, "grad_norm": 6.8013560303761365, "learning_rate": 2.5280372750058144e-06, "loss": 17.8701, "step": 36910 }, { "epoch": 0.6747034200376552, "grad_norm": 6.5298332207436856, "learning_rate": 2.5277799746375174e-06, "loss": 17.4522, "step": 36911 }, { "epoch": 0.6747216992341016, "grad_norm": 7.638713611924665, "learning_rate": 2.5275226829341383e-06, "loss": 17.7677, "step": 36912 }, { "epoch": 0.6747399784305482, "grad_norm": 5.805035144282274, "learning_rate": 2.527265399896578e-06, "loss": 17.4831, "step": 36913 }, { "epoch": 0.6747582576269947, "grad_norm": 5.773100744825104, "learning_rate": 2.5270081255257362e-06, "loss": 17.1545, "step": 36914 }, { "epoch": 0.6747765368234412, "grad_norm": 6.087995669050185, "learning_rate": 2.526750859822516e-06, "loss": 17.5851, "step": 36915 }, { "epoch": 0.6747948160198878, "grad_norm": 7.4640828591634145, "learning_rate": 2.5264936027878217e-06, "loss": 18.0085, "step": 36916 }, { "epoch": 0.6748130952163343, "grad_norm": 6.49915253856983, "learning_rate": 2.52623635442255e-06, "loss": 17.4154, "step": 36917 }, { "epoch": 0.6748313744127808, "grad_norm": 6.6666003524276825, "learning_rate": 2.525979114727608e-06, "loss": 17.4179, "step": 36918 }, { "epoch": 0.6748496536092273, "grad_norm": 5.940174842873415, "learning_rate": 2.5257218837038942e-06, "loss": 17.3308, "step": 36919 }, { "epoch": 0.6748679328056738, "grad_norm": 6.855113223784816, "learning_rate": 2.5254646613523083e-06, "loss": 17.3474, "step": 36920 }, { "epoch": 0.6748862120021204, "grad_norm": 6.477635115081762, "learning_rate": 2.5252074476737567e-06, "loss": 17.8781, "step": 36921 }, { "epoch": 0.6749044911985669, "grad_norm": 4.962468430466332, "learning_rate": 2.5249502426691354e-06, "loss": 16.8272, "step": 36922 }, { "epoch": 0.6749227703950135, "grad_norm": 6.800566592549459, "learning_rate": 2.524693046339351e-06, "loss": 17.8638, "step": 36923 }, { "epoch": 0.67494104959146, "grad_norm": 5.866316143349624, "learning_rate": 2.5244358586853007e-06, "loss": 17.2884, "step": 36924 }, { "epoch": 0.6749593287879064, "grad_norm": 5.228408662269457, "learning_rate": 2.524178679707888e-06, "loss": 16.9264, "step": 36925 }, { "epoch": 0.674977607984353, "grad_norm": 7.055987354125819, "learning_rate": 2.523921509408016e-06, "loss": 17.4325, "step": 36926 }, { "epoch": 0.6749958871807995, "grad_norm": 9.079000294037947, "learning_rate": 2.523664347786584e-06, "loss": 18.1714, "step": 36927 }, { "epoch": 0.6750141663772461, "grad_norm": 4.766809036376407, "learning_rate": 2.5234071948444926e-06, "loss": 16.8449, "step": 36928 }, { "epoch": 0.6750324455736926, "grad_norm": 7.916488811077637, "learning_rate": 2.523150050582645e-06, "loss": 17.9521, "step": 36929 }, { "epoch": 0.675050724770139, "grad_norm": 6.2495643073826175, "learning_rate": 2.522892915001942e-06, "loss": 17.1641, "step": 36930 }, { "epoch": 0.6750690039665856, "grad_norm": 7.218964051747192, "learning_rate": 2.5226357881032825e-06, "loss": 17.5975, "step": 36931 }, { "epoch": 0.6750872831630321, "grad_norm": 4.751381556279179, "learning_rate": 2.5223786698875713e-06, "loss": 16.8343, "step": 36932 }, { "epoch": 0.6751055623594787, "grad_norm": 6.430074465669415, "learning_rate": 2.522121560355706e-06, "loss": 17.2868, "step": 36933 }, { "epoch": 0.6751238415559252, "grad_norm": 7.5261331725964995, "learning_rate": 2.5218644595085894e-06, "loss": 17.72, "step": 36934 }, { "epoch": 0.6751421207523717, "grad_norm": 5.756375372304122, "learning_rate": 2.5216073673471254e-06, "loss": 17.2543, "step": 36935 }, { "epoch": 0.6751603999488183, "grad_norm": 5.750288279677658, "learning_rate": 2.521350283872212e-06, "loss": 17.3106, "step": 36936 }, { "epoch": 0.6751786791452647, "grad_norm": 8.488400072158882, "learning_rate": 2.521093209084749e-06, "loss": 18.4437, "step": 36937 }, { "epoch": 0.6751969583417113, "grad_norm": 5.213725973582232, "learning_rate": 2.520836142985641e-06, "loss": 17.2209, "step": 36938 }, { "epoch": 0.6752152375381578, "grad_norm": 8.203894500335284, "learning_rate": 2.520579085575785e-06, "loss": 18.0239, "step": 36939 }, { "epoch": 0.6752335167346043, "grad_norm": 6.307789471407584, "learning_rate": 2.5203220368560866e-06, "loss": 17.4452, "step": 36940 }, { "epoch": 0.6752517959310509, "grad_norm": 8.622162011856766, "learning_rate": 2.520064996827444e-06, "loss": 18.5033, "step": 36941 }, { "epoch": 0.6752700751274974, "grad_norm": 7.726589851487249, "learning_rate": 2.519807965490757e-06, "loss": 17.8563, "step": 36942 }, { "epoch": 0.675288354323944, "grad_norm": 6.548060415395145, "learning_rate": 2.5195509428469268e-06, "loss": 17.6303, "step": 36943 }, { "epoch": 0.6753066335203904, "grad_norm": 5.976109238780171, "learning_rate": 2.519293928896858e-06, "loss": 17.3251, "step": 36944 }, { "epoch": 0.6753249127168369, "grad_norm": 6.593093588009555, "learning_rate": 2.5190369236414468e-06, "loss": 17.2561, "step": 36945 }, { "epoch": 0.6753431919132835, "grad_norm": 6.1781571757141585, "learning_rate": 2.5187799270815977e-06, "loss": 17.2199, "step": 36946 }, { "epoch": 0.67536147110973, "grad_norm": 8.418937873001083, "learning_rate": 2.5185229392182097e-06, "loss": 17.9582, "step": 36947 }, { "epoch": 0.6753797503061766, "grad_norm": 6.642195859458853, "learning_rate": 2.518265960052181e-06, "loss": 17.7159, "step": 36948 }, { "epoch": 0.6753980295026231, "grad_norm": 7.616656830170899, "learning_rate": 2.5180089895844173e-06, "loss": 17.7261, "step": 36949 }, { "epoch": 0.6754163086990695, "grad_norm": 6.273139342562723, "learning_rate": 2.5177520278158164e-06, "loss": 17.4328, "step": 36950 }, { "epoch": 0.6754345878955161, "grad_norm": 5.245583163189319, "learning_rate": 2.5174950747472776e-06, "loss": 17.1707, "step": 36951 }, { "epoch": 0.6754528670919626, "grad_norm": 6.213864637070172, "learning_rate": 2.5172381303797027e-06, "loss": 17.3947, "step": 36952 }, { "epoch": 0.6754711462884092, "grad_norm": 5.4497695434496105, "learning_rate": 2.5169811947139933e-06, "loss": 17.0234, "step": 36953 }, { "epoch": 0.6754894254848557, "grad_norm": 7.367245287208084, "learning_rate": 2.5167242677510506e-06, "loss": 17.6056, "step": 36954 }, { "epoch": 0.6755077046813022, "grad_norm": 5.673125497253244, "learning_rate": 2.5164673494917746e-06, "loss": 17.0668, "step": 36955 }, { "epoch": 0.6755259838777488, "grad_norm": 6.626846754202844, "learning_rate": 2.5162104399370624e-06, "loss": 17.548, "step": 36956 }, { "epoch": 0.6755442630741952, "grad_norm": 5.680864795986176, "learning_rate": 2.51595353908782e-06, "loss": 16.9362, "step": 36957 }, { "epoch": 0.6755625422706418, "grad_norm": 6.206587313646295, "learning_rate": 2.515696646944944e-06, "loss": 17.413, "step": 36958 }, { "epoch": 0.6755808214670883, "grad_norm": 6.379992362613541, "learning_rate": 2.5154397635093336e-06, "loss": 17.6875, "step": 36959 }, { "epoch": 0.6755991006635348, "grad_norm": 5.203842273717025, "learning_rate": 2.5151828887818934e-06, "loss": 16.8328, "step": 36960 }, { "epoch": 0.6756173798599814, "grad_norm": 6.522723923472533, "learning_rate": 2.5149260227635197e-06, "loss": 17.2445, "step": 36961 }, { "epoch": 0.6756356590564279, "grad_norm": 6.95260328994503, "learning_rate": 2.5146691654551147e-06, "loss": 17.543, "step": 36962 }, { "epoch": 0.6756539382528745, "grad_norm": 4.982287052854615, "learning_rate": 2.514412316857581e-06, "loss": 16.9062, "step": 36963 }, { "epoch": 0.6756722174493209, "grad_norm": 6.57025877435293, "learning_rate": 2.5141554769718156e-06, "loss": 17.3557, "step": 36964 }, { "epoch": 0.6756904966457674, "grad_norm": 5.847622077894189, "learning_rate": 2.513898645798718e-06, "loss": 16.9907, "step": 36965 }, { "epoch": 0.675708775842214, "grad_norm": 6.480814008076313, "learning_rate": 2.513641823339192e-06, "loss": 17.0955, "step": 36966 }, { "epoch": 0.6757270550386605, "grad_norm": 6.254050652669174, "learning_rate": 2.5133850095941353e-06, "loss": 17.489, "step": 36967 }, { "epoch": 0.6757453342351071, "grad_norm": 4.96442877074905, "learning_rate": 2.5131282045644466e-06, "loss": 16.7467, "step": 36968 }, { "epoch": 0.6757636134315536, "grad_norm": 5.469396029064799, "learning_rate": 2.5128714082510277e-06, "loss": 17.0172, "step": 36969 }, { "epoch": 0.675781892628, "grad_norm": 5.696257341933933, "learning_rate": 2.5126146206547806e-06, "loss": 17.2587, "step": 36970 }, { "epoch": 0.6758001718244466, "grad_norm": 5.000723231979014, "learning_rate": 2.5123578417766015e-06, "loss": 16.8958, "step": 36971 }, { "epoch": 0.6758184510208931, "grad_norm": 7.054313367624607, "learning_rate": 2.5121010716173945e-06, "loss": 17.7923, "step": 36972 }, { "epoch": 0.6758367302173397, "grad_norm": 5.704479765045169, "learning_rate": 2.511844310178055e-06, "loss": 17.3614, "step": 36973 }, { "epoch": 0.6758550094137862, "grad_norm": 6.888217422257524, "learning_rate": 2.5115875574594873e-06, "loss": 17.615, "step": 36974 }, { "epoch": 0.6758732886102327, "grad_norm": 5.6165815955926846, "learning_rate": 2.511330813462589e-06, "loss": 16.8173, "step": 36975 }, { "epoch": 0.6758915678066792, "grad_norm": 6.531813476850076, "learning_rate": 2.511074078188259e-06, "loss": 17.6433, "step": 36976 }, { "epoch": 0.6759098470031257, "grad_norm": 6.3405711971067, "learning_rate": 2.5108173516374e-06, "loss": 17.2698, "step": 36977 }, { "epoch": 0.6759281261995723, "grad_norm": 7.024476431282566, "learning_rate": 2.5105606338109082e-06, "loss": 17.7244, "step": 36978 }, { "epoch": 0.6759464053960188, "grad_norm": 5.775543895609501, "learning_rate": 2.5103039247096855e-06, "loss": 17.2368, "step": 36979 }, { "epoch": 0.6759646845924653, "grad_norm": 6.226027760995114, "learning_rate": 2.5100472243346334e-06, "loss": 17.3565, "step": 36980 }, { "epoch": 0.6759829637889119, "grad_norm": 6.7216035467888755, "learning_rate": 2.5097905326866494e-06, "loss": 16.9326, "step": 36981 }, { "epoch": 0.6760012429853584, "grad_norm": 7.166845794209617, "learning_rate": 2.5095338497666323e-06, "loss": 17.6661, "step": 36982 }, { "epoch": 0.6760195221818048, "grad_norm": 5.804508191373135, "learning_rate": 2.509277175575484e-06, "loss": 17.3923, "step": 36983 }, { "epoch": 0.6760378013782514, "grad_norm": 6.518559137635363, "learning_rate": 2.509020510114102e-06, "loss": 17.7003, "step": 36984 }, { "epoch": 0.6760560805746979, "grad_norm": 4.678905509408193, "learning_rate": 2.508763853383388e-06, "loss": 16.8763, "step": 36985 }, { "epoch": 0.6760743597711445, "grad_norm": 6.281153337729005, "learning_rate": 2.508507205384241e-06, "loss": 17.2022, "step": 36986 }, { "epoch": 0.676092638967591, "grad_norm": 5.8141526215378, "learning_rate": 2.5082505661175582e-06, "loss": 17.2379, "step": 36987 }, { "epoch": 0.6761109181640375, "grad_norm": 6.53210782216442, "learning_rate": 2.507993935584241e-06, "loss": 17.5767, "step": 36988 }, { "epoch": 0.676129197360484, "grad_norm": 5.765280981573084, "learning_rate": 2.50773731378519e-06, "loss": 16.9768, "step": 36989 }, { "epoch": 0.6761474765569305, "grad_norm": 5.987924530919301, "learning_rate": 2.507480700721302e-06, "loss": 17.1561, "step": 36990 }, { "epoch": 0.6761657557533771, "grad_norm": 6.929362179702755, "learning_rate": 2.5072240963934803e-06, "loss": 17.555, "step": 36991 }, { "epoch": 0.6761840349498236, "grad_norm": 5.439945387558884, "learning_rate": 2.5069675008026206e-06, "loss": 17.2509, "step": 36992 }, { "epoch": 0.6762023141462701, "grad_norm": 5.420165451312166, "learning_rate": 2.5067109139496228e-06, "loss": 16.9172, "step": 36993 }, { "epoch": 0.6762205933427167, "grad_norm": 5.276891781576745, "learning_rate": 2.5064543358353876e-06, "loss": 17.0952, "step": 36994 }, { "epoch": 0.6762388725391631, "grad_norm": 5.306720223597308, "learning_rate": 2.506197766460815e-06, "loss": 16.779, "step": 36995 }, { "epoch": 0.6762571517356097, "grad_norm": 5.5047307580794556, "learning_rate": 2.5059412058267997e-06, "loss": 17.1713, "step": 36996 }, { "epoch": 0.6762754309320562, "grad_norm": 6.51181732672047, "learning_rate": 2.505684653934245e-06, "loss": 17.4679, "step": 36997 }, { "epoch": 0.6762937101285027, "grad_norm": 6.260799988199018, "learning_rate": 2.5054281107840504e-06, "loss": 17.3272, "step": 36998 }, { "epoch": 0.6763119893249493, "grad_norm": 5.945136956334629, "learning_rate": 2.505171576377112e-06, "loss": 17.4872, "step": 36999 }, { "epoch": 0.6763302685213958, "grad_norm": 5.566400231776884, "learning_rate": 2.5049150507143326e-06, "loss": 17.2125, "step": 37000 }, { "epoch": 0.6763485477178424, "grad_norm": 6.042289996069726, "learning_rate": 2.504658533796608e-06, "loss": 17.2471, "step": 37001 }, { "epoch": 0.6763668269142888, "grad_norm": 6.782275440648341, "learning_rate": 2.5044020256248403e-06, "loss": 17.8059, "step": 37002 }, { "epoch": 0.6763851061107353, "grad_norm": 5.430189162012063, "learning_rate": 2.5041455261999265e-06, "loss": 17.0597, "step": 37003 }, { "epoch": 0.6764033853071819, "grad_norm": 6.969102984478228, "learning_rate": 2.5038890355227646e-06, "loss": 17.6826, "step": 37004 }, { "epoch": 0.6764216645036284, "grad_norm": 7.1768536879524625, "learning_rate": 2.503632553594257e-06, "loss": 17.6257, "step": 37005 }, { "epoch": 0.676439943700075, "grad_norm": 5.326681172223977, "learning_rate": 2.5033760804152986e-06, "loss": 17.1742, "step": 37006 }, { "epoch": 0.6764582228965215, "grad_norm": 7.601498382734814, "learning_rate": 2.50311961598679e-06, "loss": 17.5673, "step": 37007 }, { "epoch": 0.676476502092968, "grad_norm": 6.377809199333335, "learning_rate": 2.5028631603096333e-06, "loss": 17.519, "step": 37008 }, { "epoch": 0.6764947812894145, "grad_norm": 6.354532760947676, "learning_rate": 2.5026067133847237e-06, "loss": 17.4621, "step": 37009 }, { "epoch": 0.676513060485861, "grad_norm": 6.227279354377499, "learning_rate": 2.502350275212959e-06, "loss": 17.4275, "step": 37010 }, { "epoch": 0.6765313396823076, "grad_norm": 6.868474789493266, "learning_rate": 2.5020938457952426e-06, "loss": 17.3752, "step": 37011 }, { "epoch": 0.6765496188787541, "grad_norm": 5.901659875754243, "learning_rate": 2.5018374251324696e-06, "loss": 17.2252, "step": 37012 }, { "epoch": 0.6765678980752006, "grad_norm": 7.420852703664449, "learning_rate": 2.501581013225538e-06, "loss": 18.1631, "step": 37013 }, { "epoch": 0.6765861772716472, "grad_norm": 4.965194264484196, "learning_rate": 2.5013246100753506e-06, "loss": 17.0451, "step": 37014 }, { "epoch": 0.6766044564680936, "grad_norm": 5.734228972949637, "learning_rate": 2.5010682156828014e-06, "loss": 17.4213, "step": 37015 }, { "epoch": 0.6766227356645402, "grad_norm": 6.39408786520918, "learning_rate": 2.5008118300487906e-06, "loss": 17.478, "step": 37016 }, { "epoch": 0.6766410148609867, "grad_norm": 5.40892871258429, "learning_rate": 2.5005554531742206e-06, "loss": 16.905, "step": 37017 }, { "epoch": 0.6766592940574332, "grad_norm": 6.98808420821352, "learning_rate": 2.500299085059984e-06, "loss": 17.6398, "step": 37018 }, { "epoch": 0.6766775732538798, "grad_norm": 6.147356250276058, "learning_rate": 2.5000427257069837e-06, "loss": 17.342, "step": 37019 }, { "epoch": 0.6766958524503263, "grad_norm": 6.1606473115812665, "learning_rate": 2.4997863751161176e-06, "loss": 17.3158, "step": 37020 }, { "epoch": 0.6767141316467729, "grad_norm": 5.335735687814873, "learning_rate": 2.499530033288281e-06, "loss": 17.0585, "step": 37021 }, { "epoch": 0.6767324108432193, "grad_norm": 5.347195855541759, "learning_rate": 2.4992737002243767e-06, "loss": 16.9025, "step": 37022 }, { "epoch": 0.6767506900396658, "grad_norm": 6.217813071850798, "learning_rate": 2.499017375925301e-06, "loss": 17.2287, "step": 37023 }, { "epoch": 0.6767689692361124, "grad_norm": 6.24968436698523, "learning_rate": 2.498761060391951e-06, "loss": 17.421, "step": 37024 }, { "epoch": 0.6767872484325589, "grad_norm": 5.861720105848498, "learning_rate": 2.4985047536252256e-06, "loss": 17.0424, "step": 37025 }, { "epoch": 0.6768055276290055, "grad_norm": 6.410539696055421, "learning_rate": 2.498248455626026e-06, "loss": 17.3488, "step": 37026 }, { "epoch": 0.676823806825452, "grad_norm": 6.815884764887663, "learning_rate": 2.4979921663952474e-06, "loss": 17.8015, "step": 37027 }, { "epoch": 0.6768420860218984, "grad_norm": 6.912125569680586, "learning_rate": 2.49773588593379e-06, "loss": 17.9453, "step": 37028 }, { "epoch": 0.676860365218345, "grad_norm": 6.928076554702521, "learning_rate": 2.497479614242552e-06, "loss": 17.6856, "step": 37029 }, { "epoch": 0.6768786444147915, "grad_norm": 6.826588818424225, "learning_rate": 2.4972233513224283e-06, "loss": 17.7981, "step": 37030 }, { "epoch": 0.6768969236112381, "grad_norm": 6.404711126568368, "learning_rate": 2.4969670971743214e-06, "loss": 17.2082, "step": 37031 }, { "epoch": 0.6769152028076846, "grad_norm": 5.601497163833981, "learning_rate": 2.4967108517991263e-06, "loss": 17.2513, "step": 37032 }, { "epoch": 0.6769334820041311, "grad_norm": 5.4537050159739655, "learning_rate": 2.496454615197742e-06, "loss": 16.9539, "step": 37033 }, { "epoch": 0.6769517612005777, "grad_norm": 6.154772729274445, "learning_rate": 2.496198387371069e-06, "loss": 17.1798, "step": 37034 }, { "epoch": 0.6769700403970241, "grad_norm": 5.41642464318987, "learning_rate": 2.495942168320002e-06, "loss": 16.9177, "step": 37035 }, { "epoch": 0.6769883195934707, "grad_norm": 7.230449871088516, "learning_rate": 2.495685958045442e-06, "loss": 17.5212, "step": 37036 }, { "epoch": 0.6770065987899172, "grad_norm": 5.51180317644944, "learning_rate": 2.495429756548285e-06, "loss": 17.2362, "step": 37037 }, { "epoch": 0.6770248779863637, "grad_norm": 6.6595027953722745, "learning_rate": 2.495173563829428e-06, "loss": 17.536, "step": 37038 }, { "epoch": 0.6770431571828103, "grad_norm": 6.210512351893251, "learning_rate": 2.4949173798897718e-06, "loss": 17.4309, "step": 37039 }, { "epoch": 0.6770614363792568, "grad_norm": 6.946627732371843, "learning_rate": 2.4946612047302126e-06, "loss": 17.8878, "step": 37040 }, { "epoch": 0.6770797155757033, "grad_norm": 6.075732834575531, "learning_rate": 2.494405038351647e-06, "loss": 17.3632, "step": 37041 }, { "epoch": 0.6770979947721498, "grad_norm": 6.369727734332732, "learning_rate": 2.4941488807549747e-06, "loss": 17.5909, "step": 37042 }, { "epoch": 0.6771162739685963, "grad_norm": 6.920999542070879, "learning_rate": 2.4938927319410942e-06, "loss": 17.7182, "step": 37043 }, { "epoch": 0.6771345531650429, "grad_norm": 7.385704205886248, "learning_rate": 2.4936365919109005e-06, "loss": 17.6752, "step": 37044 }, { "epoch": 0.6771528323614894, "grad_norm": 8.209705206428987, "learning_rate": 2.4933804606652957e-06, "loss": 18.5355, "step": 37045 }, { "epoch": 0.677171111557936, "grad_norm": 7.089835101183058, "learning_rate": 2.4931243382051724e-06, "loss": 17.6832, "step": 37046 }, { "epoch": 0.6771893907543824, "grad_norm": 5.928883195521532, "learning_rate": 2.492868224531432e-06, "loss": 17.3628, "step": 37047 }, { "epoch": 0.6772076699508289, "grad_norm": 6.753716668421661, "learning_rate": 2.492612119644972e-06, "loss": 17.5531, "step": 37048 }, { "epoch": 0.6772259491472755, "grad_norm": 5.568875836396526, "learning_rate": 2.4923560235466864e-06, "loss": 17.143, "step": 37049 }, { "epoch": 0.677244228343722, "grad_norm": 5.886205944861283, "learning_rate": 2.4920999362374776e-06, "loss": 17.1472, "step": 37050 }, { "epoch": 0.6772625075401685, "grad_norm": 6.374232462351, "learning_rate": 2.4918438577182384e-06, "loss": 17.4021, "step": 37051 }, { "epoch": 0.6772807867366151, "grad_norm": 6.966393674064897, "learning_rate": 2.4915877879898697e-06, "loss": 17.7539, "step": 37052 }, { "epoch": 0.6772990659330616, "grad_norm": 6.343982404470298, "learning_rate": 2.4913317270532692e-06, "loss": 17.4808, "step": 37053 }, { "epoch": 0.6773173451295081, "grad_norm": 5.501579206756037, "learning_rate": 2.491075674909334e-06, "loss": 17.2201, "step": 37054 }, { "epoch": 0.6773356243259546, "grad_norm": 5.51470634552518, "learning_rate": 2.490819631558959e-06, "loss": 16.9227, "step": 37055 }, { "epoch": 0.6773539035224011, "grad_norm": 7.15061579736555, "learning_rate": 2.4905635970030446e-06, "loss": 17.4786, "step": 37056 }, { "epoch": 0.6773721827188477, "grad_norm": 6.0002459234446865, "learning_rate": 2.4903075712424875e-06, "loss": 17.2188, "step": 37057 }, { "epoch": 0.6773904619152942, "grad_norm": 6.448897858452198, "learning_rate": 2.490051554278183e-06, "loss": 17.5438, "step": 37058 }, { "epoch": 0.6774087411117408, "grad_norm": 6.650495508235899, "learning_rate": 2.489795546111031e-06, "loss": 17.4944, "step": 37059 }, { "epoch": 0.6774270203081872, "grad_norm": 5.767945795029145, "learning_rate": 2.4895395467419265e-06, "loss": 17.0843, "step": 37060 }, { "epoch": 0.6774452995046337, "grad_norm": 6.496669640962219, "learning_rate": 2.489283556171768e-06, "loss": 17.5919, "step": 37061 }, { "epoch": 0.6774635787010803, "grad_norm": 6.122620883095489, "learning_rate": 2.4890275744014547e-06, "loss": 17.4642, "step": 37062 }, { "epoch": 0.6774818578975268, "grad_norm": 7.916009403154166, "learning_rate": 2.4887716014318797e-06, "loss": 17.8908, "step": 37063 }, { "epoch": 0.6775001370939734, "grad_norm": 6.818828894907492, "learning_rate": 2.4885156372639446e-06, "loss": 17.635, "step": 37064 }, { "epoch": 0.6775184162904199, "grad_norm": 6.938524853846566, "learning_rate": 2.4882596818985437e-06, "loss": 18.0406, "step": 37065 }, { "epoch": 0.6775366954868663, "grad_norm": 7.009227572082565, "learning_rate": 2.4880037353365735e-06, "loss": 17.4906, "step": 37066 }, { "epoch": 0.6775549746833129, "grad_norm": 5.438803892817472, "learning_rate": 2.4877477975789334e-06, "loss": 17.0978, "step": 37067 }, { "epoch": 0.6775732538797594, "grad_norm": 11.372405457520658, "learning_rate": 2.48749186862652e-06, "loss": 19.2486, "step": 37068 }, { "epoch": 0.677591533076206, "grad_norm": 7.445734638287041, "learning_rate": 2.4872359484802266e-06, "loss": 17.5148, "step": 37069 }, { "epoch": 0.6776098122726525, "grad_norm": 5.716714741632272, "learning_rate": 2.4869800371409543e-06, "loss": 16.8842, "step": 37070 }, { "epoch": 0.677628091469099, "grad_norm": 5.800169345849812, "learning_rate": 2.4867241346096e-06, "loss": 17.1635, "step": 37071 }, { "epoch": 0.6776463706655456, "grad_norm": 6.410308593817554, "learning_rate": 2.4864682408870585e-06, "loss": 17.6783, "step": 37072 }, { "epoch": 0.677664649861992, "grad_norm": 6.2621267205292295, "learning_rate": 2.4862123559742285e-06, "loss": 17.307, "step": 37073 }, { "epoch": 0.6776829290584386, "grad_norm": 6.974126131106705, "learning_rate": 2.4859564798720066e-06, "loss": 17.6501, "step": 37074 }, { "epoch": 0.6777012082548851, "grad_norm": 6.248083444505958, "learning_rate": 2.4857006125812872e-06, "loss": 17.3914, "step": 37075 }, { "epoch": 0.6777194874513316, "grad_norm": 5.8745468487756485, "learning_rate": 2.4854447541029707e-06, "loss": 17.2865, "step": 37076 }, { "epoch": 0.6777377666477782, "grad_norm": 6.494202768379381, "learning_rate": 2.4851889044379507e-06, "loss": 17.4257, "step": 37077 }, { "epoch": 0.6777560458442247, "grad_norm": 5.215459164679868, "learning_rate": 2.4849330635871267e-06, "loss": 16.9622, "step": 37078 }, { "epoch": 0.6777743250406713, "grad_norm": 6.541272678104408, "learning_rate": 2.484677231551393e-06, "loss": 17.527, "step": 37079 }, { "epoch": 0.6777926042371177, "grad_norm": 6.183326058097124, "learning_rate": 2.4844214083316465e-06, "loss": 17.4566, "step": 37080 }, { "epoch": 0.6778108834335642, "grad_norm": 4.939054265519828, "learning_rate": 2.4841655939287868e-06, "loss": 17.0421, "step": 37081 }, { "epoch": 0.6778291626300108, "grad_norm": 6.18621331459307, "learning_rate": 2.483909788343708e-06, "loss": 17.3355, "step": 37082 }, { "epoch": 0.6778474418264573, "grad_norm": 7.12301343972986, "learning_rate": 2.4836539915773056e-06, "loss": 17.6337, "step": 37083 }, { "epoch": 0.6778657210229039, "grad_norm": 6.10490827703323, "learning_rate": 2.4833982036304794e-06, "loss": 17.2078, "step": 37084 }, { "epoch": 0.6778840002193504, "grad_norm": 5.012399134867166, "learning_rate": 2.483142424504124e-06, "loss": 16.8862, "step": 37085 }, { "epoch": 0.6779022794157968, "grad_norm": 7.490013255055936, "learning_rate": 2.482886654199135e-06, "loss": 17.7415, "step": 37086 }, { "epoch": 0.6779205586122434, "grad_norm": 5.401640312079532, "learning_rate": 2.4826308927164107e-06, "loss": 17.0408, "step": 37087 }, { "epoch": 0.6779388378086899, "grad_norm": 6.248538863107058, "learning_rate": 2.4823751400568454e-06, "loss": 17.3456, "step": 37088 }, { "epoch": 0.6779571170051365, "grad_norm": 7.312198747390068, "learning_rate": 2.4821193962213365e-06, "loss": 17.5242, "step": 37089 }, { "epoch": 0.677975396201583, "grad_norm": 6.009123734904509, "learning_rate": 2.481863661210782e-06, "loss": 17.1328, "step": 37090 }, { "epoch": 0.6779936753980295, "grad_norm": 8.083291563982511, "learning_rate": 2.4816079350260757e-06, "loss": 17.7098, "step": 37091 }, { "epoch": 0.678011954594476, "grad_norm": 6.38973585892633, "learning_rate": 2.4813522176681166e-06, "loss": 17.7994, "step": 37092 }, { "epoch": 0.6780302337909225, "grad_norm": 6.597493157422973, "learning_rate": 2.4810965091377993e-06, "loss": 17.3655, "step": 37093 }, { "epoch": 0.6780485129873691, "grad_norm": 5.854648742343004, "learning_rate": 2.480840809436019e-06, "loss": 17.3449, "step": 37094 }, { "epoch": 0.6780667921838156, "grad_norm": 6.847428042563206, "learning_rate": 2.480585118563674e-06, "loss": 17.8421, "step": 37095 }, { "epoch": 0.6780850713802621, "grad_norm": 6.956359870442443, "learning_rate": 2.4803294365216578e-06, "loss": 17.3105, "step": 37096 }, { "epoch": 0.6781033505767087, "grad_norm": 6.857358226393509, "learning_rate": 2.480073763310871e-06, "loss": 17.906, "step": 37097 }, { "epoch": 0.6781216297731552, "grad_norm": 5.71184839257685, "learning_rate": 2.479818098932204e-06, "loss": 17.2992, "step": 37098 }, { "epoch": 0.6781399089696017, "grad_norm": 6.694492736628793, "learning_rate": 2.4795624433865585e-06, "loss": 17.7226, "step": 37099 }, { "epoch": 0.6781581881660482, "grad_norm": 5.905830328863559, "learning_rate": 2.479306796674826e-06, "loss": 17.2068, "step": 37100 }, { "epoch": 0.6781764673624947, "grad_norm": 7.592238495863981, "learning_rate": 2.479051158797906e-06, "loss": 17.9002, "step": 37101 }, { "epoch": 0.6781947465589413, "grad_norm": 6.241565872390188, "learning_rate": 2.4787955297566927e-06, "loss": 17.3213, "step": 37102 }, { "epoch": 0.6782130257553878, "grad_norm": 5.9097268873714, "learning_rate": 2.478539909552081e-06, "loss": 17.2389, "step": 37103 }, { "epoch": 0.6782313049518344, "grad_norm": 7.362159526829282, "learning_rate": 2.4782842981849696e-06, "loss": 17.9543, "step": 37104 }, { "epoch": 0.6782495841482808, "grad_norm": 7.068120329114468, "learning_rate": 2.4780286956562517e-06, "loss": 17.7063, "step": 37105 }, { "epoch": 0.6782678633447273, "grad_norm": 5.838676908153238, "learning_rate": 2.4777731019668237e-06, "loss": 17.3358, "step": 37106 }, { "epoch": 0.6782861425411739, "grad_norm": 5.786310693466275, "learning_rate": 2.477517517117584e-06, "loss": 17.0896, "step": 37107 }, { "epoch": 0.6783044217376204, "grad_norm": 7.724086252703147, "learning_rate": 2.477261941109425e-06, "loss": 18.2573, "step": 37108 }, { "epoch": 0.678322700934067, "grad_norm": 6.1786568422879276, "learning_rate": 2.477006373943245e-06, "loss": 17.3692, "step": 37109 }, { "epoch": 0.6783409801305135, "grad_norm": 6.672742923692836, "learning_rate": 2.4767508156199397e-06, "loss": 17.5738, "step": 37110 }, { "epoch": 0.67835925932696, "grad_norm": 5.912284171360043, "learning_rate": 2.476495266140402e-06, "loss": 16.9662, "step": 37111 }, { "epoch": 0.6783775385234065, "grad_norm": 5.57821149517868, "learning_rate": 2.476239725505531e-06, "loss": 17.1346, "step": 37112 }, { "epoch": 0.678395817719853, "grad_norm": 7.6503170392447535, "learning_rate": 2.4759841937162204e-06, "loss": 17.3026, "step": 37113 }, { "epoch": 0.6784140969162996, "grad_norm": 6.988442318577212, "learning_rate": 2.4757286707733647e-06, "loss": 17.2642, "step": 37114 }, { "epoch": 0.6784323761127461, "grad_norm": 5.81126655286114, "learning_rate": 2.4754731566778613e-06, "loss": 17.2227, "step": 37115 }, { "epoch": 0.6784506553091926, "grad_norm": 5.8495756079198875, "learning_rate": 2.4752176514306064e-06, "loss": 17.0497, "step": 37116 }, { "epoch": 0.6784689345056392, "grad_norm": 7.1068099802881015, "learning_rate": 2.474962155032493e-06, "loss": 17.9511, "step": 37117 }, { "epoch": 0.6784872137020856, "grad_norm": 6.438853570306964, "learning_rate": 2.4747066674844206e-06, "loss": 17.4478, "step": 37118 }, { "epoch": 0.6785054928985321, "grad_norm": 6.618262237375264, "learning_rate": 2.4744511887872813e-06, "loss": 17.6229, "step": 37119 }, { "epoch": 0.6785237720949787, "grad_norm": 6.222438506238319, "learning_rate": 2.4741957189419704e-06, "loss": 16.8812, "step": 37120 }, { "epoch": 0.6785420512914252, "grad_norm": 5.786611978095157, "learning_rate": 2.4739402579493854e-06, "loss": 17.1312, "step": 37121 }, { "epoch": 0.6785603304878718, "grad_norm": 5.433827631138182, "learning_rate": 2.473684805810419e-06, "loss": 17.1744, "step": 37122 }, { "epoch": 0.6785786096843183, "grad_norm": 6.259306840615119, "learning_rate": 2.4734293625259705e-06, "loss": 17.0473, "step": 37123 }, { "epoch": 0.6785968888807647, "grad_norm": 6.884711910814783, "learning_rate": 2.4731739280969304e-06, "loss": 17.6784, "step": 37124 }, { "epoch": 0.6786151680772113, "grad_norm": 5.309570392308596, "learning_rate": 2.4729185025241966e-06, "loss": 16.9855, "step": 37125 }, { "epoch": 0.6786334472736578, "grad_norm": 6.041966093366931, "learning_rate": 2.472663085808666e-06, "loss": 17.2967, "step": 37126 }, { "epoch": 0.6786517264701044, "grad_norm": 4.577118308235879, "learning_rate": 2.472407677951232e-06, "loss": 16.7876, "step": 37127 }, { "epoch": 0.6786700056665509, "grad_norm": 6.009701147283619, "learning_rate": 2.472152278952788e-06, "loss": 17.4153, "step": 37128 }, { "epoch": 0.6786882848629974, "grad_norm": 7.721026152527422, "learning_rate": 2.4718968888142326e-06, "loss": 18.1464, "step": 37129 }, { "epoch": 0.678706564059444, "grad_norm": 5.960821132376315, "learning_rate": 2.471641507536459e-06, "loss": 17.5641, "step": 37130 }, { "epoch": 0.6787248432558904, "grad_norm": 6.3444688084312935, "learning_rate": 2.4713861351203612e-06, "loss": 17.2917, "step": 37131 }, { "epoch": 0.678743122452337, "grad_norm": 6.359384718455625, "learning_rate": 2.471130771566837e-06, "loss": 17.6482, "step": 37132 }, { "epoch": 0.6787614016487835, "grad_norm": 7.524157823122922, "learning_rate": 2.4708754168767783e-06, "loss": 17.6593, "step": 37133 }, { "epoch": 0.67877968084523, "grad_norm": 5.996783178432732, "learning_rate": 2.4706200710510818e-06, "loss": 17.1803, "step": 37134 }, { "epoch": 0.6787979600416766, "grad_norm": 7.081064332597083, "learning_rate": 2.4703647340906447e-06, "loss": 17.3527, "step": 37135 }, { "epoch": 0.6788162392381231, "grad_norm": 7.886268150823873, "learning_rate": 2.4701094059963593e-06, "loss": 18.0638, "step": 37136 }, { "epoch": 0.6788345184345697, "grad_norm": 5.054621070148273, "learning_rate": 2.469854086769119e-06, "loss": 17.0251, "step": 37137 }, { "epoch": 0.6788527976310161, "grad_norm": 6.346172501034278, "learning_rate": 2.4695987764098223e-06, "loss": 17.2571, "step": 37138 }, { "epoch": 0.6788710768274626, "grad_norm": 9.8615740198686, "learning_rate": 2.469343474919361e-06, "loss": 18.7006, "step": 37139 }, { "epoch": 0.6788893560239092, "grad_norm": 6.2466967736176855, "learning_rate": 2.4690881822986324e-06, "loss": 17.2761, "step": 37140 }, { "epoch": 0.6789076352203557, "grad_norm": 5.795399006044011, "learning_rate": 2.4688328985485306e-06, "loss": 17.1834, "step": 37141 }, { "epoch": 0.6789259144168023, "grad_norm": 6.408618965218218, "learning_rate": 2.468577623669948e-06, "loss": 17.4018, "step": 37142 }, { "epoch": 0.6789441936132488, "grad_norm": 5.791900547409181, "learning_rate": 2.4683223576637806e-06, "loss": 17.0456, "step": 37143 }, { "epoch": 0.6789624728096952, "grad_norm": 5.419695051140409, "learning_rate": 2.468067100530926e-06, "loss": 17.2694, "step": 37144 }, { "epoch": 0.6789807520061418, "grad_norm": 6.299394423767502, "learning_rate": 2.467811852272274e-06, "loss": 17.3892, "step": 37145 }, { "epoch": 0.6789990312025883, "grad_norm": 5.8295934837342065, "learning_rate": 2.467556612888725e-06, "loss": 17.0541, "step": 37146 }, { "epoch": 0.6790173103990349, "grad_norm": 6.35110453045869, "learning_rate": 2.467301382381169e-06, "loss": 17.2042, "step": 37147 }, { "epoch": 0.6790355895954814, "grad_norm": 5.943768780205392, "learning_rate": 2.4670461607505004e-06, "loss": 17.1196, "step": 37148 }, { "epoch": 0.6790538687919279, "grad_norm": 7.021919872931838, "learning_rate": 2.466790947997617e-06, "loss": 17.5411, "step": 37149 }, { "epoch": 0.6790721479883745, "grad_norm": 6.751485548139822, "learning_rate": 2.4665357441234094e-06, "loss": 17.618, "step": 37150 }, { "epoch": 0.6790904271848209, "grad_norm": 4.7062785914488945, "learning_rate": 2.4662805491287766e-06, "loss": 16.8585, "step": 37151 }, { "epoch": 0.6791087063812675, "grad_norm": 6.963075825722116, "learning_rate": 2.4660253630146088e-06, "loss": 17.6202, "step": 37152 }, { "epoch": 0.679126985577714, "grad_norm": 6.812894334892663, "learning_rate": 2.4657701857818016e-06, "loss": 17.4342, "step": 37153 }, { "epoch": 0.6791452647741605, "grad_norm": 5.039737361523128, "learning_rate": 2.465515017431252e-06, "loss": 16.951, "step": 37154 }, { "epoch": 0.6791635439706071, "grad_norm": 7.627602999596206, "learning_rate": 2.4652598579638527e-06, "loss": 17.7553, "step": 37155 }, { "epoch": 0.6791818231670536, "grad_norm": 6.932742174352338, "learning_rate": 2.4650047073804956e-06, "loss": 17.3835, "step": 37156 }, { "epoch": 0.6792001023635001, "grad_norm": 7.231043419945306, "learning_rate": 2.464749565682079e-06, "loss": 17.7583, "step": 37157 }, { "epoch": 0.6792183815599466, "grad_norm": 5.851361612611567, "learning_rate": 2.4644944328694946e-06, "loss": 17.3983, "step": 37158 }, { "epoch": 0.6792366607563931, "grad_norm": 8.204331160657837, "learning_rate": 2.464239308943636e-06, "loss": 17.7559, "step": 37159 }, { "epoch": 0.6792549399528397, "grad_norm": 6.946073991890032, "learning_rate": 2.4639841939053984e-06, "loss": 17.5604, "step": 37160 }, { "epoch": 0.6792732191492862, "grad_norm": 7.0242010938082755, "learning_rate": 2.463729087755678e-06, "loss": 17.623, "step": 37161 }, { "epoch": 0.6792914983457328, "grad_norm": 8.242195995974244, "learning_rate": 2.4634739904953653e-06, "loss": 17.8059, "step": 37162 }, { "epoch": 0.6793097775421792, "grad_norm": 10.164063385525676, "learning_rate": 2.4632189021253576e-06, "loss": 17.8776, "step": 37163 }, { "epoch": 0.6793280567386257, "grad_norm": 7.335352800307535, "learning_rate": 2.462963822646548e-06, "loss": 17.716, "step": 37164 }, { "epoch": 0.6793463359350723, "grad_norm": 6.292148834951341, "learning_rate": 2.462708752059828e-06, "loss": 17.29, "step": 37165 }, { "epoch": 0.6793646151315188, "grad_norm": 7.779505648386265, "learning_rate": 2.4624536903660958e-06, "loss": 18.0944, "step": 37166 }, { "epoch": 0.6793828943279654, "grad_norm": 6.947502821324799, "learning_rate": 2.4621986375662427e-06, "loss": 17.6275, "step": 37167 }, { "epoch": 0.6794011735244119, "grad_norm": 6.042528388548982, "learning_rate": 2.4619435936611615e-06, "loss": 17.159, "step": 37168 }, { "epoch": 0.6794194527208584, "grad_norm": 5.251239746118094, "learning_rate": 2.4616885586517477e-06, "loss": 16.9645, "step": 37169 }, { "epoch": 0.6794377319173049, "grad_norm": 7.54962541433963, "learning_rate": 2.461433532538895e-06, "loss": 17.8226, "step": 37170 }, { "epoch": 0.6794560111137514, "grad_norm": 6.688911920309443, "learning_rate": 2.4611785153234996e-06, "loss": 17.7011, "step": 37171 }, { "epoch": 0.679474290310198, "grad_norm": 7.434612764130552, "learning_rate": 2.4609235070064534e-06, "loss": 17.9242, "step": 37172 }, { "epoch": 0.6794925695066445, "grad_norm": 8.39781710884095, "learning_rate": 2.460668507588648e-06, "loss": 17.8328, "step": 37173 }, { "epoch": 0.679510848703091, "grad_norm": 8.293866979998608, "learning_rate": 2.460413517070981e-06, "loss": 18.3387, "step": 37174 }, { "epoch": 0.6795291278995376, "grad_norm": 6.215584963131794, "learning_rate": 2.460158535454344e-06, "loss": 16.9808, "step": 37175 }, { "epoch": 0.679547407095984, "grad_norm": 7.378651811681166, "learning_rate": 2.4599035627396295e-06, "loss": 17.7967, "step": 37176 }, { "epoch": 0.6795656862924306, "grad_norm": 6.884029371597037, "learning_rate": 2.459648598927734e-06, "loss": 17.7191, "step": 37177 }, { "epoch": 0.6795839654888771, "grad_norm": 9.586191786120894, "learning_rate": 2.459393644019548e-06, "loss": 18.5902, "step": 37178 }, { "epoch": 0.6796022446853236, "grad_norm": 7.7993577074318345, "learning_rate": 2.459138698015967e-06, "loss": 17.7077, "step": 37179 }, { "epoch": 0.6796205238817702, "grad_norm": 6.97006258528101, "learning_rate": 2.4588837609178862e-06, "loss": 17.6766, "step": 37180 }, { "epoch": 0.6796388030782167, "grad_norm": 5.36071284590024, "learning_rate": 2.458628832726197e-06, "loss": 17.0746, "step": 37181 }, { "epoch": 0.6796570822746633, "grad_norm": 5.155183473691347, "learning_rate": 2.458373913441792e-06, "loss": 16.9145, "step": 37182 }, { "epoch": 0.6796753614711097, "grad_norm": 6.2025550846168525, "learning_rate": 2.4581190030655674e-06, "loss": 17.4357, "step": 37183 }, { "epoch": 0.6796936406675562, "grad_norm": 5.734595048181856, "learning_rate": 2.457864101598413e-06, "loss": 17.2303, "step": 37184 }, { "epoch": 0.6797119198640028, "grad_norm": 6.643408017287074, "learning_rate": 2.457609209041226e-06, "loss": 17.6209, "step": 37185 }, { "epoch": 0.6797301990604493, "grad_norm": 7.499669655071696, "learning_rate": 2.457354325394899e-06, "loss": 17.6288, "step": 37186 }, { "epoch": 0.6797484782568958, "grad_norm": 6.749446147754503, "learning_rate": 2.457099450660322e-06, "loss": 17.3848, "step": 37187 }, { "epoch": 0.6797667574533424, "grad_norm": 6.124707930580407, "learning_rate": 2.4568445848383905e-06, "loss": 17.3591, "step": 37188 }, { "epoch": 0.6797850366497888, "grad_norm": 7.2628972000714525, "learning_rate": 2.4565897279300007e-06, "loss": 17.1292, "step": 37189 }, { "epoch": 0.6798033158462354, "grad_norm": 8.42819862082491, "learning_rate": 2.4563348799360405e-06, "loss": 18.032, "step": 37190 }, { "epoch": 0.6798215950426819, "grad_norm": 7.433020662210759, "learning_rate": 2.4560800408574084e-06, "loss": 17.9835, "step": 37191 }, { "epoch": 0.6798398742391284, "grad_norm": 7.375580210179875, "learning_rate": 2.4558252106949944e-06, "loss": 17.7894, "step": 37192 }, { "epoch": 0.679858153435575, "grad_norm": 5.317532958634942, "learning_rate": 2.4555703894496905e-06, "loss": 16.9086, "step": 37193 }, { "epoch": 0.6798764326320215, "grad_norm": 6.591625295064352, "learning_rate": 2.4553155771223934e-06, "loss": 17.7715, "step": 37194 }, { "epoch": 0.6798947118284681, "grad_norm": 6.233787766783194, "learning_rate": 2.455060773713995e-06, "loss": 17.6159, "step": 37195 }, { "epoch": 0.6799129910249145, "grad_norm": 5.860056196604263, "learning_rate": 2.4548059792253853e-06, "loss": 17.3008, "step": 37196 }, { "epoch": 0.679931270221361, "grad_norm": 6.224798589827885, "learning_rate": 2.4545511936574603e-06, "loss": 17.2705, "step": 37197 }, { "epoch": 0.6799495494178076, "grad_norm": 6.76623972786854, "learning_rate": 2.454296417011114e-06, "loss": 17.8199, "step": 37198 }, { "epoch": 0.6799678286142541, "grad_norm": 6.163411882535788, "learning_rate": 2.454041649287236e-06, "loss": 17.1795, "step": 37199 }, { "epoch": 0.6799861078107007, "grad_norm": 5.335686756218568, "learning_rate": 2.4537868904867227e-06, "loss": 17.0193, "step": 37200 }, { "epoch": 0.6800043870071472, "grad_norm": 6.832465775608395, "learning_rate": 2.453532140610464e-06, "loss": 17.44, "step": 37201 }, { "epoch": 0.6800226662035936, "grad_norm": 5.767328383410211, "learning_rate": 2.4532773996593556e-06, "loss": 16.9366, "step": 37202 }, { "epoch": 0.6800409454000402, "grad_norm": 6.465776917815608, "learning_rate": 2.4530226676342894e-06, "loss": 17.3427, "step": 37203 }, { "epoch": 0.6800592245964867, "grad_norm": 5.90095243536524, "learning_rate": 2.452767944536155e-06, "loss": 17.0459, "step": 37204 }, { "epoch": 0.6800775037929333, "grad_norm": 6.095283747180073, "learning_rate": 2.4525132303658506e-06, "loss": 17.2571, "step": 37205 }, { "epoch": 0.6800957829893798, "grad_norm": 6.209146746509721, "learning_rate": 2.4522585251242644e-06, "loss": 17.281, "step": 37206 }, { "epoch": 0.6801140621858263, "grad_norm": 5.456014173479956, "learning_rate": 2.4520038288122905e-06, "loss": 17.1742, "step": 37207 }, { "epoch": 0.6801323413822729, "grad_norm": 6.056906430546485, "learning_rate": 2.4517491414308243e-06, "loss": 17.415, "step": 37208 }, { "epoch": 0.6801506205787193, "grad_norm": 6.48683086488257, "learning_rate": 2.4514944629807563e-06, "loss": 17.4338, "step": 37209 }, { "epoch": 0.6801688997751659, "grad_norm": 5.9939078505509364, "learning_rate": 2.4512397934629774e-06, "loss": 17.5352, "step": 37210 }, { "epoch": 0.6801871789716124, "grad_norm": 7.141996777711672, "learning_rate": 2.450985132878383e-06, "loss": 17.5627, "step": 37211 }, { "epoch": 0.6802054581680589, "grad_norm": 6.036654680168366, "learning_rate": 2.450730481227865e-06, "loss": 17.1887, "step": 37212 }, { "epoch": 0.6802237373645055, "grad_norm": 6.678710981817144, "learning_rate": 2.4504758385123134e-06, "loss": 17.6366, "step": 37213 }, { "epoch": 0.680242016560952, "grad_norm": 7.783422947691715, "learning_rate": 2.4502212047326247e-06, "loss": 17.6252, "step": 37214 }, { "epoch": 0.6802602957573985, "grad_norm": 6.53192252470094, "learning_rate": 2.4499665798896877e-06, "loss": 17.5246, "step": 37215 }, { "epoch": 0.680278574953845, "grad_norm": 5.008654533279239, "learning_rate": 2.449711963984396e-06, "loss": 17.0274, "step": 37216 }, { "epoch": 0.6802968541502915, "grad_norm": 7.1742994714720325, "learning_rate": 2.4494573570176448e-06, "loss": 17.6618, "step": 37217 }, { "epoch": 0.6803151333467381, "grad_norm": 6.9876340229427765, "learning_rate": 2.449202758990323e-06, "loss": 18.0674, "step": 37218 }, { "epoch": 0.6803334125431846, "grad_norm": 6.057163514288468, "learning_rate": 2.448948169903325e-06, "loss": 17.3231, "step": 37219 }, { "epoch": 0.6803516917396312, "grad_norm": 9.248715576781152, "learning_rate": 2.4486935897575425e-06, "loss": 17.8496, "step": 37220 }, { "epoch": 0.6803699709360777, "grad_norm": 8.354890852091408, "learning_rate": 2.448439018553866e-06, "loss": 17.7343, "step": 37221 }, { "epoch": 0.6803882501325241, "grad_norm": 6.794993815612146, "learning_rate": 2.448184456293191e-06, "loss": 17.5496, "step": 37222 }, { "epoch": 0.6804065293289707, "grad_norm": 6.5549010992052095, "learning_rate": 2.4479299029764064e-06, "loss": 17.5856, "step": 37223 }, { "epoch": 0.6804248085254172, "grad_norm": 5.5388595846382636, "learning_rate": 2.4476753586044076e-06, "loss": 17.1903, "step": 37224 }, { "epoch": 0.6804430877218638, "grad_norm": 6.4987979773891436, "learning_rate": 2.447420823178084e-06, "loss": 17.6813, "step": 37225 }, { "epoch": 0.6804613669183103, "grad_norm": 6.286325830876818, "learning_rate": 2.44716629669833e-06, "loss": 17.4326, "step": 37226 }, { "epoch": 0.6804796461147568, "grad_norm": 4.622882771806684, "learning_rate": 2.4469117791660353e-06, "loss": 16.9674, "step": 37227 }, { "epoch": 0.6804979253112033, "grad_norm": 6.264671618437858, "learning_rate": 2.4466572705820946e-06, "loss": 17.356, "step": 37228 }, { "epoch": 0.6805162045076498, "grad_norm": 6.521518166454957, "learning_rate": 2.4464027709473992e-06, "loss": 17.5854, "step": 37229 }, { "epoch": 0.6805344837040964, "grad_norm": 7.937483256639659, "learning_rate": 2.4461482802628385e-06, "loss": 18.5097, "step": 37230 }, { "epoch": 0.6805527629005429, "grad_norm": 5.085285254183204, "learning_rate": 2.445893798529308e-06, "loss": 17.0312, "step": 37231 }, { "epoch": 0.6805710420969894, "grad_norm": 6.868403215678725, "learning_rate": 2.445639325747696e-06, "loss": 17.6665, "step": 37232 }, { "epoch": 0.680589321293436, "grad_norm": 7.013897176750242, "learning_rate": 2.445384861918897e-06, "loss": 17.6119, "step": 37233 }, { "epoch": 0.6806076004898824, "grad_norm": 6.261909903889196, "learning_rate": 2.4451304070438043e-06, "loss": 17.2702, "step": 37234 }, { "epoch": 0.680625879686329, "grad_norm": 6.227355862094457, "learning_rate": 2.444875961123306e-06, "loss": 17.3572, "step": 37235 }, { "epoch": 0.6806441588827755, "grad_norm": 5.796616631260566, "learning_rate": 2.444621524158297e-06, "loss": 16.9006, "step": 37236 }, { "epoch": 0.680662438079222, "grad_norm": 6.348450684408057, "learning_rate": 2.4443670961496685e-06, "loss": 17.5545, "step": 37237 }, { "epoch": 0.6806807172756686, "grad_norm": 6.593326916870231, "learning_rate": 2.4441126770983094e-06, "loss": 17.5273, "step": 37238 }, { "epoch": 0.6806989964721151, "grad_norm": 12.483495675185054, "learning_rate": 2.443858267005115e-06, "loss": 17.5876, "step": 37239 }, { "epoch": 0.6807172756685617, "grad_norm": 6.423203981235186, "learning_rate": 2.443603865870976e-06, "loss": 17.3716, "step": 37240 }, { "epoch": 0.6807355548650081, "grad_norm": 6.461452157011084, "learning_rate": 2.4433494736967818e-06, "loss": 17.5584, "step": 37241 }, { "epoch": 0.6807538340614546, "grad_norm": 9.945845467981226, "learning_rate": 2.443095090483426e-06, "loss": 17.9244, "step": 37242 }, { "epoch": 0.6807721132579012, "grad_norm": 7.862102353659848, "learning_rate": 2.442840716231801e-06, "loss": 17.7408, "step": 37243 }, { "epoch": 0.6807903924543477, "grad_norm": 8.059188018953302, "learning_rate": 2.4425863509427967e-06, "loss": 17.9671, "step": 37244 }, { "epoch": 0.6808086716507943, "grad_norm": 6.085547034168526, "learning_rate": 2.4423319946173067e-06, "loss": 17.4069, "step": 37245 }, { "epoch": 0.6808269508472408, "grad_norm": 7.94704538493128, "learning_rate": 2.442077647256219e-06, "loss": 17.9569, "step": 37246 }, { "epoch": 0.6808452300436872, "grad_norm": 6.74418676216758, "learning_rate": 2.441823308860429e-06, "loss": 17.5648, "step": 37247 }, { "epoch": 0.6808635092401338, "grad_norm": 5.8931158585646735, "learning_rate": 2.441568979430827e-06, "loss": 17.0493, "step": 37248 }, { "epoch": 0.6808817884365803, "grad_norm": 6.811742027549502, "learning_rate": 2.4413146589683016e-06, "loss": 17.5933, "step": 37249 }, { "epoch": 0.6809000676330269, "grad_norm": 6.447104521263184, "learning_rate": 2.4410603474737477e-06, "loss": 17.6984, "step": 37250 }, { "epoch": 0.6809183468294734, "grad_norm": 6.167289451367886, "learning_rate": 2.440806044948054e-06, "loss": 18.2032, "step": 37251 }, { "epoch": 0.6809366260259199, "grad_norm": 6.1189021543323205, "learning_rate": 2.4405517513921123e-06, "loss": 17.351, "step": 37252 }, { "epoch": 0.6809549052223665, "grad_norm": 7.194032053580975, "learning_rate": 2.4402974668068167e-06, "loss": 17.6581, "step": 37253 }, { "epoch": 0.6809731844188129, "grad_norm": 7.178101110160788, "learning_rate": 2.440043191193057e-06, "loss": 18.0142, "step": 37254 }, { "epoch": 0.6809914636152594, "grad_norm": 6.41605224472365, "learning_rate": 2.4397889245517217e-06, "loss": 17.3465, "step": 37255 }, { "epoch": 0.681009742811706, "grad_norm": 8.052187708624011, "learning_rate": 2.4395346668837055e-06, "loss": 17.2899, "step": 37256 }, { "epoch": 0.6810280220081525, "grad_norm": 7.233669797887269, "learning_rate": 2.4392804181898984e-06, "loss": 17.4607, "step": 37257 }, { "epoch": 0.6810463012045991, "grad_norm": 5.2133791463323105, "learning_rate": 2.439026178471189e-06, "loss": 17.0498, "step": 37258 }, { "epoch": 0.6810645804010456, "grad_norm": 6.503258344269135, "learning_rate": 2.4387719477284727e-06, "loss": 17.2641, "step": 37259 }, { "epoch": 0.681082859597492, "grad_norm": 6.581121337509986, "learning_rate": 2.4385177259626363e-06, "loss": 17.6249, "step": 37260 }, { "epoch": 0.6811011387939386, "grad_norm": 6.304603574673248, "learning_rate": 2.4382635131745736e-06, "loss": 17.3073, "step": 37261 }, { "epoch": 0.6811194179903851, "grad_norm": 6.002090972534428, "learning_rate": 2.4380093093651762e-06, "loss": 17.3453, "step": 37262 }, { "epoch": 0.6811376971868317, "grad_norm": 5.842574769105619, "learning_rate": 2.4377551145353322e-06, "loss": 17.1801, "step": 37263 }, { "epoch": 0.6811559763832782, "grad_norm": 5.9268488637414904, "learning_rate": 2.4375009286859365e-06, "loss": 17.2625, "step": 37264 }, { "epoch": 0.6811742555797247, "grad_norm": 7.064605896275336, "learning_rate": 2.4372467518178777e-06, "loss": 17.5432, "step": 37265 }, { "epoch": 0.6811925347761713, "grad_norm": 6.866260298248523, "learning_rate": 2.436992583932044e-06, "loss": 17.6283, "step": 37266 }, { "epoch": 0.6812108139726177, "grad_norm": 4.797090487438866, "learning_rate": 2.4367384250293313e-06, "loss": 16.7402, "step": 37267 }, { "epoch": 0.6812290931690643, "grad_norm": 6.323241872611763, "learning_rate": 2.436484275110628e-06, "loss": 17.6662, "step": 37268 }, { "epoch": 0.6812473723655108, "grad_norm": 6.071413354087148, "learning_rate": 2.436230134176823e-06, "loss": 17.1641, "step": 37269 }, { "epoch": 0.6812656515619573, "grad_norm": 5.248315936382715, "learning_rate": 2.4359760022288086e-06, "loss": 17.0368, "step": 37270 }, { "epoch": 0.6812839307584039, "grad_norm": 6.980148724032825, "learning_rate": 2.435721879267478e-06, "loss": 17.6958, "step": 37271 }, { "epoch": 0.6813022099548504, "grad_norm": 4.835526651971943, "learning_rate": 2.4354677652937176e-06, "loss": 16.8788, "step": 37272 }, { "epoch": 0.681320489151297, "grad_norm": 5.7971298212479825, "learning_rate": 2.435213660308422e-06, "loss": 17.0918, "step": 37273 }, { "epoch": 0.6813387683477434, "grad_norm": 6.537265295539224, "learning_rate": 2.43495956431248e-06, "loss": 17.5616, "step": 37274 }, { "epoch": 0.6813570475441899, "grad_norm": 7.3591019527871815, "learning_rate": 2.434705477306781e-06, "loss": 17.7149, "step": 37275 }, { "epoch": 0.6813753267406365, "grad_norm": 5.330900119148368, "learning_rate": 2.4344513992922176e-06, "loss": 17.0301, "step": 37276 }, { "epoch": 0.681393605937083, "grad_norm": 6.597913042321888, "learning_rate": 2.4341973302696777e-06, "loss": 17.4781, "step": 37277 }, { "epoch": 0.6814118851335296, "grad_norm": 5.959573172586589, "learning_rate": 2.4339432702400556e-06, "loss": 17.4397, "step": 37278 }, { "epoch": 0.681430164329976, "grad_norm": 5.608232912680562, "learning_rate": 2.433689219204238e-06, "loss": 17.2178, "step": 37279 }, { "epoch": 0.6814484435264225, "grad_norm": 6.345712051505199, "learning_rate": 2.4334351771631164e-06, "loss": 17.3809, "step": 37280 }, { "epoch": 0.6814667227228691, "grad_norm": 6.408419141043268, "learning_rate": 2.433181144117584e-06, "loss": 17.3828, "step": 37281 }, { "epoch": 0.6814850019193156, "grad_norm": 6.873976477794992, "learning_rate": 2.432927120068529e-06, "loss": 17.5878, "step": 37282 }, { "epoch": 0.6815032811157622, "grad_norm": 5.4811625385601, "learning_rate": 2.4326731050168396e-06, "loss": 17.0938, "step": 37283 }, { "epoch": 0.6815215603122087, "grad_norm": 7.014478023799278, "learning_rate": 2.4324190989634104e-06, "loss": 17.6875, "step": 37284 }, { "epoch": 0.6815398395086552, "grad_norm": 7.545405352370645, "learning_rate": 2.432165101909129e-06, "loss": 17.7528, "step": 37285 }, { "epoch": 0.6815581187051017, "grad_norm": 7.4844112085897025, "learning_rate": 2.4319111138548845e-06, "loss": 17.6109, "step": 37286 }, { "epoch": 0.6815763979015482, "grad_norm": 6.588001703120294, "learning_rate": 2.431657134801569e-06, "loss": 17.3148, "step": 37287 }, { "epoch": 0.6815946770979948, "grad_norm": 5.713868695558394, "learning_rate": 2.431403164750074e-06, "loss": 17.1446, "step": 37288 }, { "epoch": 0.6816129562944413, "grad_norm": 7.110108062094965, "learning_rate": 2.431149203701286e-06, "loss": 17.4913, "step": 37289 }, { "epoch": 0.6816312354908878, "grad_norm": 7.591509564409422, "learning_rate": 2.430895251656099e-06, "loss": 17.8391, "step": 37290 }, { "epoch": 0.6816495146873344, "grad_norm": 6.672874403269731, "learning_rate": 2.4306413086153995e-06, "loss": 17.7411, "step": 37291 }, { "epoch": 0.6816677938837808, "grad_norm": 7.21705755867256, "learning_rate": 2.430387374580081e-06, "loss": 17.7244, "step": 37292 }, { "epoch": 0.6816860730802274, "grad_norm": 6.302916270264854, "learning_rate": 2.4301334495510313e-06, "loss": 17.2661, "step": 37293 }, { "epoch": 0.6817043522766739, "grad_norm": 6.575236689129992, "learning_rate": 2.4298795335291398e-06, "loss": 17.5922, "step": 37294 }, { "epoch": 0.6817226314731204, "grad_norm": 6.45775988882318, "learning_rate": 2.429625626515299e-06, "loss": 17.4743, "step": 37295 }, { "epoch": 0.681740910669567, "grad_norm": 7.297566910042512, "learning_rate": 2.4293717285103957e-06, "loss": 17.6514, "step": 37296 }, { "epoch": 0.6817591898660135, "grad_norm": 6.888668116303668, "learning_rate": 2.4291178395153215e-06, "loss": 17.5196, "step": 37297 }, { "epoch": 0.6817774690624601, "grad_norm": 5.510200386218113, "learning_rate": 2.4288639595309677e-06, "loss": 17.1052, "step": 37298 }, { "epoch": 0.6817957482589065, "grad_norm": 6.44603227704904, "learning_rate": 2.4286100885582228e-06, "loss": 17.3152, "step": 37299 }, { "epoch": 0.681814027455353, "grad_norm": 6.6339267268052, "learning_rate": 2.4283562265979745e-06, "loss": 17.4628, "step": 37300 }, { "epoch": 0.6818323066517996, "grad_norm": 6.691897471025367, "learning_rate": 2.428102373651116e-06, "loss": 17.4436, "step": 37301 }, { "epoch": 0.6818505858482461, "grad_norm": 5.382973959933418, "learning_rate": 2.4278485297185355e-06, "loss": 17.0895, "step": 37302 }, { "epoch": 0.6818688650446927, "grad_norm": 5.739241530411222, "learning_rate": 2.4275946948011215e-06, "loss": 17.115, "step": 37303 }, { "epoch": 0.6818871442411392, "grad_norm": 7.772159693126003, "learning_rate": 2.4273408688997663e-06, "loss": 17.9409, "step": 37304 }, { "epoch": 0.6819054234375856, "grad_norm": 5.613137343007332, "learning_rate": 2.427087052015356e-06, "loss": 17.038, "step": 37305 }, { "epoch": 0.6819237026340322, "grad_norm": 6.49413321579991, "learning_rate": 2.426833244148783e-06, "loss": 17.5332, "step": 37306 }, { "epoch": 0.6819419818304787, "grad_norm": 5.197680712381599, "learning_rate": 2.4265794453009373e-06, "loss": 17.1044, "step": 37307 }, { "epoch": 0.6819602610269253, "grad_norm": 7.5205014505992445, "learning_rate": 2.426325655472706e-06, "loss": 18.1585, "step": 37308 }, { "epoch": 0.6819785402233718, "grad_norm": 6.462420627901066, "learning_rate": 2.426071874664981e-06, "loss": 17.5698, "step": 37309 }, { "epoch": 0.6819968194198183, "grad_norm": 6.84625491103216, "learning_rate": 2.425818102878652e-06, "loss": 17.986, "step": 37310 }, { "epoch": 0.6820150986162649, "grad_norm": 6.583821846574371, "learning_rate": 2.4255643401146043e-06, "loss": 17.6019, "step": 37311 }, { "epoch": 0.6820333778127113, "grad_norm": 5.14302703486007, "learning_rate": 2.425310586373733e-06, "loss": 17.2127, "step": 37312 }, { "epoch": 0.6820516570091579, "grad_norm": 5.2424383330252144, "learning_rate": 2.4250568416569235e-06, "loss": 17.0073, "step": 37313 }, { "epoch": 0.6820699362056044, "grad_norm": 5.903590901681067, "learning_rate": 2.4248031059650655e-06, "loss": 17.394, "step": 37314 }, { "epoch": 0.6820882154020509, "grad_norm": 5.499614930715963, "learning_rate": 2.4245493792990487e-06, "loss": 16.9293, "step": 37315 }, { "epoch": 0.6821064945984975, "grad_norm": 7.7678445200343536, "learning_rate": 2.4242956616597646e-06, "loss": 17.9446, "step": 37316 }, { "epoch": 0.682124773794944, "grad_norm": 6.338080988258395, "learning_rate": 2.4240419530480992e-06, "loss": 17.198, "step": 37317 }, { "epoch": 0.6821430529913906, "grad_norm": 6.0098421225512135, "learning_rate": 2.423788253464945e-06, "loss": 17.3284, "step": 37318 }, { "epoch": 0.682161332187837, "grad_norm": 7.175130652485752, "learning_rate": 2.423534562911189e-06, "loss": 17.856, "step": 37319 }, { "epoch": 0.6821796113842835, "grad_norm": 6.646107780590023, "learning_rate": 2.423280881387719e-06, "loss": 17.5013, "step": 37320 }, { "epoch": 0.6821978905807301, "grad_norm": 5.834507906526188, "learning_rate": 2.423027208895428e-06, "loss": 17.1349, "step": 37321 }, { "epoch": 0.6822161697771766, "grad_norm": 7.118318141595354, "learning_rate": 2.4227735454352013e-06, "loss": 17.8269, "step": 37322 }, { "epoch": 0.6822344489736231, "grad_norm": 6.6105367407037745, "learning_rate": 2.422519891007931e-06, "loss": 17.152, "step": 37323 }, { "epoch": 0.6822527281700697, "grad_norm": 5.708961410815814, "learning_rate": 2.4222662456145037e-06, "loss": 17.1282, "step": 37324 }, { "epoch": 0.6822710073665161, "grad_norm": 4.520215624605179, "learning_rate": 2.4220126092558088e-06, "loss": 16.7766, "step": 37325 }, { "epoch": 0.6822892865629627, "grad_norm": 7.6607138504530585, "learning_rate": 2.421758981932738e-06, "loss": 17.6245, "step": 37326 }, { "epoch": 0.6823075657594092, "grad_norm": 6.723246836941104, "learning_rate": 2.4215053636461787e-06, "loss": 17.474, "step": 37327 }, { "epoch": 0.6823258449558557, "grad_norm": 7.759823381068426, "learning_rate": 2.421251754397017e-06, "loss": 18.1506, "step": 37328 }, { "epoch": 0.6823441241523023, "grad_norm": 7.799519534319236, "learning_rate": 2.4209981541861466e-06, "loss": 18.1708, "step": 37329 }, { "epoch": 0.6823624033487488, "grad_norm": 6.295608586490239, "learning_rate": 2.4207445630144533e-06, "loss": 17.5991, "step": 37330 }, { "epoch": 0.6823806825451953, "grad_norm": 6.366809064710823, "learning_rate": 2.4204909808828246e-06, "loss": 17.4994, "step": 37331 }, { "epoch": 0.6823989617416418, "grad_norm": 6.011334084040935, "learning_rate": 2.4202374077921537e-06, "loss": 17.3177, "step": 37332 }, { "epoch": 0.6824172409380883, "grad_norm": 7.446640157285946, "learning_rate": 2.4199838437433244e-06, "loss": 17.6643, "step": 37333 }, { "epoch": 0.6824355201345349, "grad_norm": 6.551049211473901, "learning_rate": 2.4197302887372275e-06, "loss": 17.3755, "step": 37334 }, { "epoch": 0.6824537993309814, "grad_norm": 7.851507202500074, "learning_rate": 2.4194767427747544e-06, "loss": 18.0515, "step": 37335 }, { "epoch": 0.682472078527428, "grad_norm": 7.7329861329868965, "learning_rate": 2.4192232058567915e-06, "loss": 17.9471, "step": 37336 }, { "epoch": 0.6824903577238745, "grad_norm": 5.00297008107001, "learning_rate": 2.4189696779842252e-06, "loss": 16.8711, "step": 37337 }, { "epoch": 0.6825086369203209, "grad_norm": 7.2878887317349745, "learning_rate": 2.4187161591579484e-06, "loss": 17.7344, "step": 37338 }, { "epoch": 0.6825269161167675, "grad_norm": 6.848155297382673, "learning_rate": 2.4184626493788454e-06, "loss": 17.4562, "step": 37339 }, { "epoch": 0.682545195313214, "grad_norm": 5.682489133961935, "learning_rate": 2.4182091486478084e-06, "loss": 17.1684, "step": 37340 }, { "epoch": 0.6825634745096606, "grad_norm": 6.258886384481663, "learning_rate": 2.4179556569657248e-06, "loss": 17.2892, "step": 37341 }, { "epoch": 0.6825817537061071, "grad_norm": 6.727555589378174, "learning_rate": 2.41770217433348e-06, "loss": 17.8077, "step": 37342 }, { "epoch": 0.6826000329025536, "grad_norm": 6.891375856185921, "learning_rate": 2.4174487007519653e-06, "loss": 17.6571, "step": 37343 }, { "epoch": 0.6826183120990001, "grad_norm": 6.917703058126066, "learning_rate": 2.4171952362220713e-06, "loss": 18.0681, "step": 37344 }, { "epoch": 0.6826365912954466, "grad_norm": 5.441309747479626, "learning_rate": 2.416941780744681e-06, "loss": 16.9284, "step": 37345 }, { "epoch": 0.6826548704918932, "grad_norm": 6.9112015200365695, "learning_rate": 2.4166883343206883e-06, "loss": 17.4993, "step": 37346 }, { "epoch": 0.6826731496883397, "grad_norm": 5.170755228127677, "learning_rate": 2.416434896950978e-06, "loss": 16.8977, "step": 37347 }, { "epoch": 0.6826914288847862, "grad_norm": 5.054400127672662, "learning_rate": 2.416181468636438e-06, "loss": 16.9208, "step": 37348 }, { "epoch": 0.6827097080812328, "grad_norm": 5.435943822659535, "learning_rate": 2.4159280493779597e-06, "loss": 17.0115, "step": 37349 }, { "epoch": 0.6827279872776792, "grad_norm": 5.3985565784152305, "learning_rate": 2.415674639176427e-06, "loss": 17.2642, "step": 37350 }, { "epoch": 0.6827462664741258, "grad_norm": 8.162293194138524, "learning_rate": 2.4154212380327323e-06, "loss": 17.9327, "step": 37351 }, { "epoch": 0.6827645456705723, "grad_norm": 7.449610329314597, "learning_rate": 2.4151678459477607e-06, "loss": 17.5634, "step": 37352 }, { "epoch": 0.6827828248670188, "grad_norm": 6.775634502144629, "learning_rate": 2.4149144629224008e-06, "loss": 17.6729, "step": 37353 }, { "epoch": 0.6828011040634654, "grad_norm": 7.020769642837786, "learning_rate": 2.4146610889575435e-06, "loss": 17.6684, "step": 37354 }, { "epoch": 0.6828193832599119, "grad_norm": 5.425609021839514, "learning_rate": 2.4144077240540748e-06, "loss": 16.8476, "step": 37355 }, { "epoch": 0.6828376624563585, "grad_norm": 5.197058384115266, "learning_rate": 2.4141543682128803e-06, "loss": 16.8921, "step": 37356 }, { "epoch": 0.6828559416528049, "grad_norm": 6.735761503389765, "learning_rate": 2.413901021434853e-06, "loss": 17.3947, "step": 37357 }, { "epoch": 0.6828742208492514, "grad_norm": 7.093303859513478, "learning_rate": 2.413647683720878e-06, "loss": 17.5225, "step": 37358 }, { "epoch": 0.682892500045698, "grad_norm": 5.431047024946878, "learning_rate": 2.4133943550718407e-06, "loss": 16.911, "step": 37359 }, { "epoch": 0.6829107792421445, "grad_norm": 7.863454920107668, "learning_rate": 2.4131410354886327e-06, "loss": 18.3416, "step": 37360 }, { "epoch": 0.6829290584385911, "grad_norm": 8.213181145065807, "learning_rate": 2.412887724972142e-06, "loss": 17.657, "step": 37361 }, { "epoch": 0.6829473376350376, "grad_norm": 7.036839195750072, "learning_rate": 2.4126344235232546e-06, "loss": 17.5953, "step": 37362 }, { "epoch": 0.682965616831484, "grad_norm": 5.999437847213577, "learning_rate": 2.4123811311428604e-06, "loss": 17.1057, "step": 37363 }, { "epoch": 0.6829838960279306, "grad_norm": 5.618382218256802, "learning_rate": 2.4121278478318457e-06, "loss": 17.0471, "step": 37364 }, { "epoch": 0.6830021752243771, "grad_norm": 6.478644799704441, "learning_rate": 2.4118745735910957e-06, "loss": 17.382, "step": 37365 }, { "epoch": 0.6830204544208237, "grad_norm": 7.112549123627579, "learning_rate": 2.4116213084215036e-06, "loss": 17.8094, "step": 37366 }, { "epoch": 0.6830387336172702, "grad_norm": 5.640411074297628, "learning_rate": 2.4113680523239537e-06, "loss": 17.3011, "step": 37367 }, { "epoch": 0.6830570128137167, "grad_norm": 7.399774704682526, "learning_rate": 2.411114805299333e-06, "loss": 17.5779, "step": 37368 }, { "epoch": 0.6830752920101633, "grad_norm": 5.509706096582524, "learning_rate": 2.410861567348529e-06, "loss": 17.1628, "step": 37369 }, { "epoch": 0.6830935712066097, "grad_norm": 8.190911312956233, "learning_rate": 2.4106083384724316e-06, "loss": 17.7856, "step": 37370 }, { "epoch": 0.6831118504030563, "grad_norm": 6.574318791973502, "learning_rate": 2.4103551186719286e-06, "loss": 17.6098, "step": 37371 }, { "epoch": 0.6831301295995028, "grad_norm": 7.884472456308601, "learning_rate": 2.4101019079479064e-06, "loss": 18.2463, "step": 37372 }, { "epoch": 0.6831484087959493, "grad_norm": 5.639276241261316, "learning_rate": 2.4098487063012503e-06, "loss": 17.2423, "step": 37373 }, { "epoch": 0.6831666879923959, "grad_norm": 6.057496529980325, "learning_rate": 2.409595513732852e-06, "loss": 17.1099, "step": 37374 }, { "epoch": 0.6831849671888424, "grad_norm": 6.963048203021063, "learning_rate": 2.409342330243596e-06, "loss": 17.6187, "step": 37375 }, { "epoch": 0.683203246385289, "grad_norm": 7.839982408459319, "learning_rate": 2.4090891558343686e-06, "loss": 17.4764, "step": 37376 }, { "epoch": 0.6832215255817354, "grad_norm": 7.780155452478244, "learning_rate": 2.408835990506061e-06, "loss": 18.0787, "step": 37377 }, { "epoch": 0.6832398047781819, "grad_norm": 6.629924075596245, "learning_rate": 2.408582834259556e-06, "loss": 17.6448, "step": 37378 }, { "epoch": 0.6832580839746285, "grad_norm": 4.217303886387784, "learning_rate": 2.4083296870957445e-06, "loss": 16.7128, "step": 37379 }, { "epoch": 0.683276363171075, "grad_norm": 6.083330879029641, "learning_rate": 2.4080765490155132e-06, "loss": 17.3648, "step": 37380 }, { "epoch": 0.6832946423675216, "grad_norm": 5.8782056210843034, "learning_rate": 2.4078234200197493e-06, "loss": 17.1656, "step": 37381 }, { "epoch": 0.6833129215639681, "grad_norm": 5.121404332278461, "learning_rate": 2.4075703001093374e-06, "loss": 16.8562, "step": 37382 }, { "epoch": 0.6833312007604145, "grad_norm": 6.160254843738751, "learning_rate": 2.4073171892851687e-06, "loss": 17.3688, "step": 37383 }, { "epoch": 0.6833494799568611, "grad_norm": 8.086423918949365, "learning_rate": 2.407064087548126e-06, "loss": 17.9981, "step": 37384 }, { "epoch": 0.6833677591533076, "grad_norm": 6.602261693524784, "learning_rate": 2.4068109948991007e-06, "loss": 17.8054, "step": 37385 }, { "epoch": 0.6833860383497542, "grad_norm": 5.727086444016057, "learning_rate": 2.4065579113389774e-06, "loss": 16.8507, "step": 37386 }, { "epoch": 0.6834043175462007, "grad_norm": 5.788402854612823, "learning_rate": 2.4063048368686428e-06, "loss": 17.4523, "step": 37387 }, { "epoch": 0.6834225967426472, "grad_norm": 5.852123244182828, "learning_rate": 2.4060517714889834e-06, "loss": 17.2783, "step": 37388 }, { "epoch": 0.6834408759390938, "grad_norm": 5.653666969195335, "learning_rate": 2.4057987152008904e-06, "loss": 17.2246, "step": 37389 }, { "epoch": 0.6834591551355402, "grad_norm": 5.337305587519449, "learning_rate": 2.4055456680052454e-06, "loss": 17.2927, "step": 37390 }, { "epoch": 0.6834774343319867, "grad_norm": 6.344593318685056, "learning_rate": 2.4052926299029396e-06, "loss": 17.5205, "step": 37391 }, { "epoch": 0.6834957135284333, "grad_norm": 6.66232808455521, "learning_rate": 2.405039600894858e-06, "loss": 17.3617, "step": 37392 }, { "epoch": 0.6835139927248798, "grad_norm": 6.808780157831398, "learning_rate": 2.4047865809818855e-06, "loss": 17.7012, "step": 37393 }, { "epoch": 0.6835322719213264, "grad_norm": 5.872604498177238, "learning_rate": 2.4045335701649127e-06, "loss": 17.107, "step": 37394 }, { "epoch": 0.6835505511177729, "grad_norm": 6.30548174513047, "learning_rate": 2.4042805684448247e-06, "loss": 17.3997, "step": 37395 }, { "epoch": 0.6835688303142193, "grad_norm": 7.200325719840536, "learning_rate": 2.404027575822506e-06, "loss": 17.6066, "step": 37396 }, { "epoch": 0.6835871095106659, "grad_norm": 7.036649430088533, "learning_rate": 2.4037745922988454e-06, "loss": 17.7407, "step": 37397 }, { "epoch": 0.6836053887071124, "grad_norm": 6.017751527402606, "learning_rate": 2.4035216178747312e-06, "loss": 17.3643, "step": 37398 }, { "epoch": 0.683623667903559, "grad_norm": 5.401919848569793, "learning_rate": 2.4032686525510467e-06, "loss": 17.011, "step": 37399 }, { "epoch": 0.6836419471000055, "grad_norm": 6.627025065084146, "learning_rate": 2.403015696328682e-06, "loss": 17.7144, "step": 37400 }, { "epoch": 0.683660226296452, "grad_norm": 5.79280321095791, "learning_rate": 2.4027627492085204e-06, "loss": 17.1745, "step": 37401 }, { "epoch": 0.6836785054928985, "grad_norm": 7.060099063492109, "learning_rate": 2.402509811191452e-06, "loss": 17.7052, "step": 37402 }, { "epoch": 0.683696784689345, "grad_norm": 5.040248719284434, "learning_rate": 2.402256882278361e-06, "loss": 16.8541, "step": 37403 }, { "epoch": 0.6837150638857916, "grad_norm": 7.359469345674226, "learning_rate": 2.4020039624701323e-06, "loss": 17.8338, "step": 37404 }, { "epoch": 0.6837333430822381, "grad_norm": 7.267696714256576, "learning_rate": 2.4017510517676562e-06, "loss": 17.465, "step": 37405 }, { "epoch": 0.6837516222786846, "grad_norm": 6.632563320585817, "learning_rate": 2.4014981501718156e-06, "loss": 17.543, "step": 37406 }, { "epoch": 0.6837699014751312, "grad_norm": 6.408126337374798, "learning_rate": 2.4012452576834977e-06, "loss": 17.4042, "step": 37407 }, { "epoch": 0.6837881806715777, "grad_norm": 5.529467574979445, "learning_rate": 2.4009923743035923e-06, "loss": 17.1854, "step": 37408 }, { "epoch": 0.6838064598680242, "grad_norm": 7.068962603675004, "learning_rate": 2.400739500032983e-06, "loss": 17.3771, "step": 37409 }, { "epoch": 0.6838247390644707, "grad_norm": 6.063224356763782, "learning_rate": 2.400486634872554e-06, "loss": 17.2093, "step": 37410 }, { "epoch": 0.6838430182609172, "grad_norm": 6.524445100042966, "learning_rate": 2.400233778823196e-06, "loss": 17.4515, "step": 37411 }, { "epoch": 0.6838612974573638, "grad_norm": 6.964867788703548, "learning_rate": 2.3999809318857926e-06, "loss": 17.9619, "step": 37412 }, { "epoch": 0.6838795766538103, "grad_norm": 7.286564432690076, "learning_rate": 2.3997280940612288e-06, "loss": 17.4724, "step": 37413 }, { "epoch": 0.6838978558502569, "grad_norm": 7.38746874498065, "learning_rate": 2.399475265350392e-06, "loss": 18.0537, "step": 37414 }, { "epoch": 0.6839161350467033, "grad_norm": 4.665421073675697, "learning_rate": 2.3992224457541713e-06, "loss": 16.656, "step": 37415 }, { "epoch": 0.6839344142431498, "grad_norm": 5.5979382379267495, "learning_rate": 2.3989696352734484e-06, "loss": 17.0993, "step": 37416 }, { "epoch": 0.6839526934395964, "grad_norm": 6.608405793013448, "learning_rate": 2.398716833909113e-06, "loss": 17.568, "step": 37417 }, { "epoch": 0.6839709726360429, "grad_norm": 6.244674473284617, "learning_rate": 2.3984640416620468e-06, "loss": 17.306, "step": 37418 }, { "epoch": 0.6839892518324895, "grad_norm": 5.676443959479837, "learning_rate": 2.398211258533141e-06, "loss": 17.1472, "step": 37419 }, { "epoch": 0.684007531028936, "grad_norm": 5.7846834061024826, "learning_rate": 2.3979584845232782e-06, "loss": 17.1018, "step": 37420 }, { "epoch": 0.6840258102253824, "grad_norm": 6.603892842057692, "learning_rate": 2.3977057196333435e-06, "loss": 17.6212, "step": 37421 }, { "epoch": 0.684044089421829, "grad_norm": 5.812879610487894, "learning_rate": 2.397452963864227e-06, "loss": 17.11, "step": 37422 }, { "epoch": 0.6840623686182755, "grad_norm": 5.694679659931365, "learning_rate": 2.3972002172168095e-06, "loss": 17.0803, "step": 37423 }, { "epoch": 0.6840806478147221, "grad_norm": 5.057076721795927, "learning_rate": 2.3969474796919796e-06, "loss": 17.0489, "step": 37424 }, { "epoch": 0.6840989270111686, "grad_norm": 5.737937518305001, "learning_rate": 2.3966947512906246e-06, "loss": 17.2543, "step": 37425 }, { "epoch": 0.6841172062076151, "grad_norm": 7.1721736444733475, "learning_rate": 2.396442032013629e-06, "loss": 17.9465, "step": 37426 }, { "epoch": 0.6841354854040617, "grad_norm": 7.624416764773284, "learning_rate": 2.3961893218618752e-06, "loss": 17.4015, "step": 37427 }, { "epoch": 0.6841537646005081, "grad_norm": 5.786636028179799, "learning_rate": 2.395936620836255e-06, "loss": 17.4574, "step": 37428 }, { "epoch": 0.6841720437969547, "grad_norm": 6.507522229415027, "learning_rate": 2.395683928937649e-06, "loss": 17.6793, "step": 37429 }, { "epoch": 0.6841903229934012, "grad_norm": 6.382063815196273, "learning_rate": 2.395431246166946e-06, "loss": 17.4444, "step": 37430 }, { "epoch": 0.6842086021898477, "grad_norm": 6.074075328950764, "learning_rate": 2.395178572525031e-06, "loss": 17.3049, "step": 37431 }, { "epoch": 0.6842268813862943, "grad_norm": 6.946274912709625, "learning_rate": 2.394925908012787e-06, "loss": 17.7267, "step": 37432 }, { "epoch": 0.6842451605827408, "grad_norm": 6.511714784700692, "learning_rate": 2.394673252631101e-06, "loss": 17.5692, "step": 37433 }, { "epoch": 0.6842634397791874, "grad_norm": 5.929464253739464, "learning_rate": 2.3944206063808623e-06, "loss": 17.4192, "step": 37434 }, { "epoch": 0.6842817189756338, "grad_norm": 6.95194560506849, "learning_rate": 2.3941679692629504e-06, "loss": 17.7163, "step": 37435 }, { "epoch": 0.6842999981720803, "grad_norm": 6.098031494984529, "learning_rate": 2.3939153412782564e-06, "loss": 17.3953, "step": 37436 }, { "epoch": 0.6843182773685269, "grad_norm": 5.884315916560081, "learning_rate": 2.393662722427662e-06, "loss": 16.9653, "step": 37437 }, { "epoch": 0.6843365565649734, "grad_norm": 7.134618914758375, "learning_rate": 2.3934101127120524e-06, "loss": 17.5289, "step": 37438 }, { "epoch": 0.68435483576142, "grad_norm": 6.444074285425717, "learning_rate": 2.3931575121323157e-06, "loss": 17.3367, "step": 37439 }, { "epoch": 0.6843731149578665, "grad_norm": 5.195107438444289, "learning_rate": 2.392904920689336e-06, "loss": 16.9148, "step": 37440 }, { "epoch": 0.6843913941543129, "grad_norm": 5.878609253315641, "learning_rate": 2.392652338383996e-06, "loss": 17.2443, "step": 37441 }, { "epoch": 0.6844096733507595, "grad_norm": 6.378385365974901, "learning_rate": 2.392399765217184e-06, "loss": 17.525, "step": 37442 }, { "epoch": 0.684427952547206, "grad_norm": 7.43818854019296, "learning_rate": 2.3921472011897857e-06, "loss": 17.7884, "step": 37443 }, { "epoch": 0.6844462317436526, "grad_norm": 7.080916720077214, "learning_rate": 2.3918946463026836e-06, "loss": 17.8182, "step": 37444 }, { "epoch": 0.6844645109400991, "grad_norm": 6.082152988644619, "learning_rate": 2.3916421005567666e-06, "loss": 16.9076, "step": 37445 }, { "epoch": 0.6844827901365456, "grad_norm": 5.8472713120446596, "learning_rate": 2.391389563952915e-06, "loss": 17.2265, "step": 37446 }, { "epoch": 0.6845010693329922, "grad_norm": 6.581394134308637, "learning_rate": 2.3911370364920194e-06, "loss": 17.8267, "step": 37447 }, { "epoch": 0.6845193485294386, "grad_norm": 5.819034928736577, "learning_rate": 2.3908845181749617e-06, "loss": 17.274, "step": 37448 }, { "epoch": 0.6845376277258852, "grad_norm": 6.324557256405087, "learning_rate": 2.3906320090026254e-06, "loss": 17.3572, "step": 37449 }, { "epoch": 0.6845559069223317, "grad_norm": 6.211058902167662, "learning_rate": 2.3903795089758995e-06, "loss": 17.1076, "step": 37450 }, { "epoch": 0.6845741861187782, "grad_norm": 5.26321080373125, "learning_rate": 2.390127018095665e-06, "loss": 16.9033, "step": 37451 }, { "epoch": 0.6845924653152248, "grad_norm": 5.757282281796365, "learning_rate": 2.389874536362809e-06, "loss": 17.2452, "step": 37452 }, { "epoch": 0.6846107445116713, "grad_norm": 7.822264097249142, "learning_rate": 2.389622063778218e-06, "loss": 17.9069, "step": 37453 }, { "epoch": 0.6846290237081178, "grad_norm": 7.912423499536292, "learning_rate": 2.3893696003427754e-06, "loss": 17.9481, "step": 37454 }, { "epoch": 0.6846473029045643, "grad_norm": 6.313085202190614, "learning_rate": 2.389117146057363e-06, "loss": 17.377, "step": 37455 }, { "epoch": 0.6846655821010108, "grad_norm": 7.8210289317671435, "learning_rate": 2.388864700922871e-06, "loss": 17.2867, "step": 37456 }, { "epoch": 0.6846838612974574, "grad_norm": 6.775520410955707, "learning_rate": 2.3886122649401823e-06, "loss": 17.7017, "step": 37457 }, { "epoch": 0.6847021404939039, "grad_norm": 5.168117889602879, "learning_rate": 2.388359838110178e-06, "loss": 16.9656, "step": 37458 }, { "epoch": 0.6847204196903504, "grad_norm": 7.111934835413638, "learning_rate": 2.3881074204337485e-06, "loss": 17.5302, "step": 37459 }, { "epoch": 0.684738698886797, "grad_norm": 5.063618268624485, "learning_rate": 2.3878550119117734e-06, "loss": 16.9252, "step": 37460 }, { "epoch": 0.6847569780832434, "grad_norm": 6.20130735253331, "learning_rate": 2.38760261254514e-06, "loss": 17.192, "step": 37461 }, { "epoch": 0.68477525727969, "grad_norm": 7.708251109582328, "learning_rate": 2.387350222334735e-06, "loss": 18.0498, "step": 37462 }, { "epoch": 0.6847935364761365, "grad_norm": 6.223914079923938, "learning_rate": 2.3870978412814382e-06, "loss": 17.6427, "step": 37463 }, { "epoch": 0.684811815672583, "grad_norm": 7.2511496068562975, "learning_rate": 2.3868454693861393e-06, "loss": 18.2423, "step": 37464 }, { "epoch": 0.6848300948690296, "grad_norm": 6.611076952726419, "learning_rate": 2.38659310664972e-06, "loss": 17.6319, "step": 37465 }, { "epoch": 0.684848374065476, "grad_norm": 6.595375433493326, "learning_rate": 2.386340753073063e-06, "loss": 17.5088, "step": 37466 }, { "epoch": 0.6848666532619226, "grad_norm": 6.353798587233547, "learning_rate": 2.386088408657057e-06, "loss": 17.2892, "step": 37467 }, { "epoch": 0.6848849324583691, "grad_norm": 6.449070236383722, "learning_rate": 2.385836073402584e-06, "loss": 17.3516, "step": 37468 }, { "epoch": 0.6849032116548156, "grad_norm": 5.591111563989631, "learning_rate": 2.385583747310527e-06, "loss": 17.4692, "step": 37469 }, { "epoch": 0.6849214908512622, "grad_norm": 5.925947343877119, "learning_rate": 2.385331430381772e-06, "loss": 17.2087, "step": 37470 }, { "epoch": 0.6849397700477087, "grad_norm": 5.334109889767069, "learning_rate": 2.3850791226172055e-06, "loss": 17.0651, "step": 37471 }, { "epoch": 0.6849580492441553, "grad_norm": 6.517405692991659, "learning_rate": 2.384826824017708e-06, "loss": 17.4415, "step": 37472 }, { "epoch": 0.6849763284406017, "grad_norm": 6.027040548980383, "learning_rate": 2.384574534584167e-06, "loss": 17.6644, "step": 37473 }, { "epoch": 0.6849946076370482, "grad_norm": 7.806262951453869, "learning_rate": 2.384322254317466e-06, "loss": 18.2964, "step": 37474 }, { "epoch": 0.6850128868334948, "grad_norm": 6.200365696426968, "learning_rate": 2.384069983218486e-06, "loss": 17.4684, "step": 37475 }, { "epoch": 0.6850311660299413, "grad_norm": 5.853825359801953, "learning_rate": 2.383817721288116e-06, "loss": 17.1662, "step": 37476 }, { "epoch": 0.6850494452263879, "grad_norm": 5.643532696507572, "learning_rate": 2.3835654685272353e-06, "loss": 16.9388, "step": 37477 }, { "epoch": 0.6850677244228344, "grad_norm": 7.465002594318408, "learning_rate": 2.383313224936733e-06, "loss": 17.7648, "step": 37478 }, { "epoch": 0.6850860036192808, "grad_norm": 6.596062189813733, "learning_rate": 2.383060990517489e-06, "loss": 17.6034, "step": 37479 }, { "epoch": 0.6851042828157274, "grad_norm": 5.7853219807428475, "learning_rate": 2.3828087652703892e-06, "loss": 17.2737, "step": 37480 }, { "epoch": 0.6851225620121739, "grad_norm": 6.853066208881182, "learning_rate": 2.3825565491963186e-06, "loss": 17.6215, "step": 37481 }, { "epoch": 0.6851408412086205, "grad_norm": 6.373433747132029, "learning_rate": 2.382304342296161e-06, "loss": 17.5808, "step": 37482 }, { "epoch": 0.685159120405067, "grad_norm": 6.136555293529593, "learning_rate": 2.382052144570798e-06, "loss": 17.2779, "step": 37483 }, { "epoch": 0.6851773996015135, "grad_norm": 6.644298634598272, "learning_rate": 2.3817999560211157e-06, "loss": 17.8251, "step": 37484 }, { "epoch": 0.6851956787979601, "grad_norm": 6.3185261815852725, "learning_rate": 2.3815477766479985e-06, "loss": 17.5404, "step": 37485 }, { "epoch": 0.6852139579944065, "grad_norm": 7.152908402005945, "learning_rate": 2.381295606452327e-06, "loss": 17.6185, "step": 37486 }, { "epoch": 0.6852322371908531, "grad_norm": 6.604833199108938, "learning_rate": 2.3810434454349867e-06, "loss": 17.4273, "step": 37487 }, { "epoch": 0.6852505163872996, "grad_norm": 5.673745296444344, "learning_rate": 2.380791293596864e-06, "loss": 17.1103, "step": 37488 }, { "epoch": 0.6852687955837461, "grad_norm": 7.519850476684127, "learning_rate": 2.380539150938839e-06, "loss": 17.9667, "step": 37489 }, { "epoch": 0.6852870747801927, "grad_norm": 5.8585323419045645, "learning_rate": 2.3802870174617983e-06, "loss": 17.1568, "step": 37490 }, { "epoch": 0.6853053539766392, "grad_norm": 8.006552545531148, "learning_rate": 2.3800348931666223e-06, "loss": 17.853, "step": 37491 }, { "epoch": 0.6853236331730858, "grad_norm": 6.383716204619591, "learning_rate": 2.3797827780541987e-06, "loss": 17.5292, "step": 37492 }, { "epoch": 0.6853419123695322, "grad_norm": 6.64002369728705, "learning_rate": 2.3795306721254093e-06, "loss": 17.5056, "step": 37493 }, { "epoch": 0.6853601915659787, "grad_norm": 6.033170776290688, "learning_rate": 2.3792785753811353e-06, "loss": 17.5142, "step": 37494 }, { "epoch": 0.6853784707624253, "grad_norm": 5.948670011126817, "learning_rate": 2.3790264878222645e-06, "loss": 17.3403, "step": 37495 }, { "epoch": 0.6853967499588718, "grad_norm": 6.763956056400418, "learning_rate": 2.378774409449677e-06, "loss": 17.5694, "step": 37496 }, { "epoch": 0.6854150291553184, "grad_norm": 5.382014203511399, "learning_rate": 2.3785223402642572e-06, "loss": 17.135, "step": 37497 }, { "epoch": 0.6854333083517649, "grad_norm": 7.266367209482807, "learning_rate": 2.378270280266891e-06, "loss": 17.7386, "step": 37498 }, { "epoch": 0.6854515875482113, "grad_norm": 7.105835875614029, "learning_rate": 2.3780182294584603e-06, "loss": 17.7226, "step": 37499 }, { "epoch": 0.6854698667446579, "grad_norm": 5.801111391053806, "learning_rate": 2.377766187839846e-06, "loss": 17.0221, "step": 37500 }, { "epoch": 0.6854881459411044, "grad_norm": 7.154905403042062, "learning_rate": 2.3775141554119357e-06, "loss": 17.7477, "step": 37501 }, { "epoch": 0.685506425137551, "grad_norm": 5.671961106003856, "learning_rate": 2.3772621321756106e-06, "loss": 17.3523, "step": 37502 }, { "epoch": 0.6855247043339975, "grad_norm": 5.939541538292367, "learning_rate": 2.3770101181317525e-06, "loss": 17.2627, "step": 37503 }, { "epoch": 0.685542983530444, "grad_norm": 6.574953297936419, "learning_rate": 2.376758113281248e-06, "loss": 17.4369, "step": 37504 }, { "epoch": 0.6855612627268906, "grad_norm": 5.379654466934815, "learning_rate": 2.376506117624977e-06, "loss": 17.0645, "step": 37505 }, { "epoch": 0.685579541923337, "grad_norm": 6.406071049719178, "learning_rate": 2.3762541311638244e-06, "loss": 17.5804, "step": 37506 }, { "epoch": 0.6855978211197836, "grad_norm": 6.428848250330657, "learning_rate": 2.376002153898675e-06, "loss": 17.1465, "step": 37507 }, { "epoch": 0.6856161003162301, "grad_norm": 6.425858767619187, "learning_rate": 2.3757501858304085e-06, "loss": 17.6067, "step": 37508 }, { "epoch": 0.6856343795126766, "grad_norm": 7.227817558836295, "learning_rate": 2.375498226959912e-06, "loss": 17.9115, "step": 37509 }, { "epoch": 0.6856526587091232, "grad_norm": 7.107534916948758, "learning_rate": 2.375246277288066e-06, "loss": 17.0347, "step": 37510 }, { "epoch": 0.6856709379055697, "grad_norm": 8.176319486021175, "learning_rate": 2.3749943368157524e-06, "loss": 17.9814, "step": 37511 }, { "epoch": 0.6856892171020162, "grad_norm": 7.709503465375395, "learning_rate": 2.3747424055438577e-06, "loss": 17.8948, "step": 37512 }, { "epoch": 0.6857074962984627, "grad_norm": 6.361133000951237, "learning_rate": 2.3744904834732633e-06, "loss": 17.6379, "step": 37513 }, { "epoch": 0.6857257754949092, "grad_norm": 5.930446776369877, "learning_rate": 2.3742385706048497e-06, "loss": 17.233, "step": 37514 }, { "epoch": 0.6857440546913558, "grad_norm": 7.410832873862261, "learning_rate": 2.3739866669395024e-06, "loss": 17.8986, "step": 37515 }, { "epoch": 0.6857623338878023, "grad_norm": 5.201496475081934, "learning_rate": 2.373734772478106e-06, "loss": 16.9735, "step": 37516 }, { "epoch": 0.6857806130842489, "grad_norm": 5.513282284859099, "learning_rate": 2.3734828872215386e-06, "loss": 16.9791, "step": 37517 }, { "epoch": 0.6857988922806953, "grad_norm": 5.955809428409976, "learning_rate": 2.3732310111706885e-06, "loss": 16.8889, "step": 37518 }, { "epoch": 0.6858171714771418, "grad_norm": 4.887751695513918, "learning_rate": 2.3729791443264356e-06, "loss": 16.7268, "step": 37519 }, { "epoch": 0.6858354506735884, "grad_norm": 5.54821500413739, "learning_rate": 2.3727272866896604e-06, "loss": 17.2578, "step": 37520 }, { "epoch": 0.6858537298700349, "grad_norm": 5.139855173939308, "learning_rate": 2.372475438261251e-06, "loss": 16.8168, "step": 37521 }, { "epoch": 0.6858720090664815, "grad_norm": 5.85422826439272, "learning_rate": 2.3722235990420845e-06, "loss": 17.166, "step": 37522 }, { "epoch": 0.685890288262928, "grad_norm": 5.352752113295239, "learning_rate": 2.371971769033048e-06, "loss": 17.0791, "step": 37523 }, { "epoch": 0.6859085674593745, "grad_norm": 5.107645599514315, "learning_rate": 2.371719948235021e-06, "loss": 17.0044, "step": 37524 }, { "epoch": 0.685926846655821, "grad_norm": 5.092452149746818, "learning_rate": 2.3714681366488875e-06, "loss": 16.7714, "step": 37525 }, { "epoch": 0.6859451258522675, "grad_norm": 5.947807088354588, "learning_rate": 2.3712163342755318e-06, "loss": 17.1384, "step": 37526 }, { "epoch": 0.685963405048714, "grad_norm": 6.0264968794711855, "learning_rate": 2.3709645411158346e-06, "loss": 17.5116, "step": 37527 }, { "epoch": 0.6859816842451606, "grad_norm": 6.2430953017399435, "learning_rate": 2.3707127571706767e-06, "loss": 17.4257, "step": 37528 }, { "epoch": 0.6859999634416071, "grad_norm": 6.537287102203137, "learning_rate": 2.3704609824409443e-06, "loss": 17.5127, "step": 37529 }, { "epoch": 0.6860182426380537, "grad_norm": 6.174574295857909, "learning_rate": 2.370209216927518e-06, "loss": 17.463, "step": 37530 }, { "epoch": 0.6860365218345001, "grad_norm": 26.213393968968717, "learning_rate": 2.3699574606312784e-06, "loss": 18.4673, "step": 37531 }, { "epoch": 0.6860548010309466, "grad_norm": 6.7578087703405485, "learning_rate": 2.3697057135531115e-06, "loss": 17.3167, "step": 37532 }, { "epoch": 0.6860730802273932, "grad_norm": 5.381411232291561, "learning_rate": 2.369453975693896e-06, "loss": 17.006, "step": 37533 }, { "epoch": 0.6860913594238397, "grad_norm": 7.537609933571889, "learning_rate": 2.3692022470545162e-06, "loss": 17.7997, "step": 37534 }, { "epoch": 0.6861096386202863, "grad_norm": 8.02691811720448, "learning_rate": 2.368950527635856e-06, "loss": 18.5531, "step": 37535 }, { "epoch": 0.6861279178167328, "grad_norm": 6.132607861569588, "learning_rate": 2.368698817438796e-06, "loss": 17.0707, "step": 37536 }, { "epoch": 0.6861461970131792, "grad_norm": 8.912551188709, "learning_rate": 2.368447116464216e-06, "loss": 17.9661, "step": 37537 }, { "epoch": 0.6861644762096258, "grad_norm": 5.822718971588671, "learning_rate": 2.368195424713003e-06, "loss": 17.2935, "step": 37538 }, { "epoch": 0.6861827554060723, "grad_norm": 8.044002921618993, "learning_rate": 2.367943742186034e-06, "loss": 17.8269, "step": 37539 }, { "epoch": 0.6862010346025189, "grad_norm": 6.407971663044628, "learning_rate": 2.3676920688841963e-06, "loss": 17.4116, "step": 37540 }, { "epoch": 0.6862193137989654, "grad_norm": 7.19254482757744, "learning_rate": 2.367440404808367e-06, "loss": 17.4443, "step": 37541 }, { "epoch": 0.6862375929954119, "grad_norm": 5.8596967494674885, "learning_rate": 2.367188749959433e-06, "loss": 17.2833, "step": 37542 }, { "epoch": 0.6862558721918585, "grad_norm": 6.754569286692114, "learning_rate": 2.366937104338272e-06, "loss": 17.5783, "step": 37543 }, { "epoch": 0.6862741513883049, "grad_norm": 6.126179125785238, "learning_rate": 2.3666854679457704e-06, "loss": 17.3637, "step": 37544 }, { "epoch": 0.6862924305847515, "grad_norm": 6.6775941471588265, "learning_rate": 2.3664338407828052e-06, "loss": 17.5871, "step": 37545 }, { "epoch": 0.686310709781198, "grad_norm": 6.320733323052983, "learning_rate": 2.366182222850263e-06, "loss": 17.627, "step": 37546 }, { "epoch": 0.6863289889776445, "grad_norm": 5.915581486529548, "learning_rate": 2.365930614149023e-06, "loss": 17.3832, "step": 37547 }, { "epoch": 0.6863472681740911, "grad_norm": 6.397227731371567, "learning_rate": 2.3656790146799662e-06, "loss": 17.4622, "step": 37548 }, { "epoch": 0.6863655473705376, "grad_norm": 4.7008687889391, "learning_rate": 2.365427424443978e-06, "loss": 16.8675, "step": 37549 }, { "epoch": 0.6863838265669842, "grad_norm": 6.392063506824559, "learning_rate": 2.365175843441935e-06, "loss": 17.3802, "step": 37550 }, { "epoch": 0.6864021057634306, "grad_norm": 6.558238901576026, "learning_rate": 2.3649242716747232e-06, "loss": 17.6616, "step": 37551 }, { "epoch": 0.6864203849598771, "grad_norm": 6.237348034982286, "learning_rate": 2.364672709143225e-06, "loss": 17.6535, "step": 37552 }, { "epoch": 0.6864386641563237, "grad_norm": 6.508537736801904, "learning_rate": 2.3644211558483183e-06, "loss": 17.2498, "step": 37553 }, { "epoch": 0.6864569433527702, "grad_norm": 5.803731391813441, "learning_rate": 2.364169611790888e-06, "loss": 17.1408, "step": 37554 }, { "epoch": 0.6864752225492168, "grad_norm": 6.908235069078391, "learning_rate": 2.363918076971815e-06, "loss": 17.4945, "step": 37555 }, { "epoch": 0.6864935017456633, "grad_norm": 9.100088635302122, "learning_rate": 2.3636665513919787e-06, "loss": 18.3732, "step": 37556 }, { "epoch": 0.6865117809421097, "grad_norm": 6.476636535264194, "learning_rate": 2.3634150350522637e-06, "loss": 17.5305, "step": 37557 }, { "epoch": 0.6865300601385563, "grad_norm": 7.954643802693819, "learning_rate": 2.3631635279535508e-06, "loss": 18.0978, "step": 37558 }, { "epoch": 0.6865483393350028, "grad_norm": 5.705461043859583, "learning_rate": 2.3629120300967185e-06, "loss": 16.9561, "step": 37559 }, { "epoch": 0.6865666185314494, "grad_norm": 8.664981281779673, "learning_rate": 2.362660541482651e-06, "loss": 17.8934, "step": 37560 }, { "epoch": 0.6865848977278959, "grad_norm": 5.164557553620289, "learning_rate": 2.3624090621122315e-06, "loss": 17.2385, "step": 37561 }, { "epoch": 0.6866031769243424, "grad_norm": 6.076969805262541, "learning_rate": 2.3621575919863373e-06, "loss": 17.3548, "step": 37562 }, { "epoch": 0.686621456120789, "grad_norm": 5.907538882777921, "learning_rate": 2.3619061311058534e-06, "loss": 17.2207, "step": 37563 }, { "epoch": 0.6866397353172354, "grad_norm": 7.165782210853167, "learning_rate": 2.36165467947166e-06, "loss": 17.8937, "step": 37564 }, { "epoch": 0.686658014513682, "grad_norm": 5.6542689642834425, "learning_rate": 2.3614032370846357e-06, "loss": 16.9461, "step": 37565 }, { "epoch": 0.6866762937101285, "grad_norm": 6.651367988933812, "learning_rate": 2.361151803945666e-06, "loss": 17.368, "step": 37566 }, { "epoch": 0.686694572906575, "grad_norm": 7.543810676693417, "learning_rate": 2.360900380055631e-06, "loss": 18.0172, "step": 37567 }, { "epoch": 0.6867128521030216, "grad_norm": 6.960212891776729, "learning_rate": 2.3606489654154086e-06, "loss": 17.8912, "step": 37568 }, { "epoch": 0.6867311312994681, "grad_norm": 8.5197410316685, "learning_rate": 2.3603975600258827e-06, "loss": 18.4015, "step": 37569 }, { "epoch": 0.6867494104959146, "grad_norm": 6.4271337551021395, "learning_rate": 2.360146163887934e-06, "loss": 17.5914, "step": 37570 }, { "epoch": 0.6867676896923611, "grad_norm": 7.40129189629271, "learning_rate": 2.359894777002446e-06, "loss": 17.3785, "step": 37571 }, { "epoch": 0.6867859688888076, "grad_norm": 5.12351161168048, "learning_rate": 2.3596433993702976e-06, "loss": 17.0268, "step": 37572 }, { "epoch": 0.6868042480852542, "grad_norm": 5.116863853068879, "learning_rate": 2.3593920309923685e-06, "loss": 17.046, "step": 37573 }, { "epoch": 0.6868225272817007, "grad_norm": 6.700966383144303, "learning_rate": 2.359140671869543e-06, "loss": 17.3913, "step": 37574 }, { "epoch": 0.6868408064781473, "grad_norm": 7.18249720851221, "learning_rate": 2.3588893220027003e-06, "loss": 17.6361, "step": 37575 }, { "epoch": 0.6868590856745938, "grad_norm": 5.4032958591007825, "learning_rate": 2.358637981392719e-06, "loss": 17.1512, "step": 37576 }, { "epoch": 0.6868773648710402, "grad_norm": 9.695725503724708, "learning_rate": 2.3583866500404846e-06, "loss": 17.3419, "step": 37577 }, { "epoch": 0.6868956440674868, "grad_norm": 5.866693734262698, "learning_rate": 2.358135327946874e-06, "loss": 17.5409, "step": 37578 }, { "epoch": 0.6869139232639333, "grad_norm": 5.8887436689137065, "learning_rate": 2.3578840151127698e-06, "loss": 17.5298, "step": 37579 }, { "epoch": 0.6869322024603799, "grad_norm": 5.975247229652367, "learning_rate": 2.357632711539054e-06, "loss": 17.3486, "step": 37580 }, { "epoch": 0.6869504816568264, "grad_norm": 7.0737590864991216, "learning_rate": 2.357381417226607e-06, "loss": 17.7944, "step": 37581 }, { "epoch": 0.6869687608532729, "grad_norm": 7.893499659449013, "learning_rate": 2.357130132176307e-06, "loss": 18.0787, "step": 37582 }, { "epoch": 0.6869870400497194, "grad_norm": 6.770539603064153, "learning_rate": 2.356878856389038e-06, "loss": 17.35, "step": 37583 }, { "epoch": 0.6870053192461659, "grad_norm": 6.466283497623209, "learning_rate": 2.3566275898656775e-06, "loss": 17.3779, "step": 37584 }, { "epoch": 0.6870235984426125, "grad_norm": 6.7300230657270355, "learning_rate": 2.3563763326071097e-06, "loss": 17.1713, "step": 37585 }, { "epoch": 0.687041877639059, "grad_norm": 6.9299621134903955, "learning_rate": 2.3561250846142142e-06, "loss": 17.7614, "step": 37586 }, { "epoch": 0.6870601568355055, "grad_norm": 5.837553726010773, "learning_rate": 2.3558738458878685e-06, "loss": 17.1084, "step": 37587 }, { "epoch": 0.6870784360319521, "grad_norm": 6.2432851495967325, "learning_rate": 2.355622616428955e-06, "loss": 17.4425, "step": 37588 }, { "epoch": 0.6870967152283985, "grad_norm": 5.593262357525599, "learning_rate": 2.355371396238357e-06, "loss": 17.0147, "step": 37589 }, { "epoch": 0.6871149944248451, "grad_norm": 6.612729031562221, "learning_rate": 2.355120185316951e-06, "loss": 17.3187, "step": 37590 }, { "epoch": 0.6871332736212916, "grad_norm": 9.688162612432887, "learning_rate": 2.3548689836656213e-06, "loss": 17.54, "step": 37591 }, { "epoch": 0.6871515528177381, "grad_norm": 6.404635998681277, "learning_rate": 2.3546177912852457e-06, "loss": 17.7106, "step": 37592 }, { "epoch": 0.6871698320141847, "grad_norm": 5.633399771515448, "learning_rate": 2.3543666081767036e-06, "loss": 17.1927, "step": 37593 }, { "epoch": 0.6871881112106312, "grad_norm": 8.663547090540607, "learning_rate": 2.354115434340879e-06, "loss": 18.3127, "step": 37594 }, { "epoch": 0.6872063904070776, "grad_norm": 6.378633353963583, "learning_rate": 2.35386426977865e-06, "loss": 17.3563, "step": 37595 }, { "epoch": 0.6872246696035242, "grad_norm": 5.741621458982717, "learning_rate": 2.353613114490895e-06, "loss": 16.9748, "step": 37596 }, { "epoch": 0.6872429487999707, "grad_norm": 6.28047184199898, "learning_rate": 2.353361968478497e-06, "loss": 17.0973, "step": 37597 }, { "epoch": 0.6872612279964173, "grad_norm": 6.845886996049697, "learning_rate": 2.353110831742336e-06, "loss": 17.6284, "step": 37598 }, { "epoch": 0.6872795071928638, "grad_norm": 7.381411577864183, "learning_rate": 2.352859704283292e-06, "loss": 17.8386, "step": 37599 }, { "epoch": 0.6872977863893103, "grad_norm": 5.99221503117008, "learning_rate": 2.3526085861022468e-06, "loss": 17.3388, "step": 37600 }, { "epoch": 0.6873160655857569, "grad_norm": 8.567102380717685, "learning_rate": 2.3523574772000763e-06, "loss": 17.9831, "step": 37601 }, { "epoch": 0.6873343447822033, "grad_norm": 4.998670982393673, "learning_rate": 2.3521063775776647e-06, "loss": 16.982, "step": 37602 }, { "epoch": 0.6873526239786499, "grad_norm": 7.133230285228481, "learning_rate": 2.3518552872358906e-06, "loss": 17.7567, "step": 37603 }, { "epoch": 0.6873709031750964, "grad_norm": 5.676064181105455, "learning_rate": 2.351604206175632e-06, "loss": 17.1355, "step": 37604 }, { "epoch": 0.6873891823715429, "grad_norm": 5.038596007772206, "learning_rate": 2.351353134397773e-06, "loss": 17.0448, "step": 37605 }, { "epoch": 0.6874074615679895, "grad_norm": 8.686055883483887, "learning_rate": 2.351102071903189e-06, "loss": 17.9511, "step": 37606 }, { "epoch": 0.687425740764436, "grad_norm": 6.704773468103451, "learning_rate": 2.350851018692763e-06, "loss": 17.5718, "step": 37607 }, { "epoch": 0.6874440199608826, "grad_norm": 6.70788989367047, "learning_rate": 2.350599974767376e-06, "loss": 17.6079, "step": 37608 }, { "epoch": 0.687462299157329, "grad_norm": 6.048451000051333, "learning_rate": 2.350348940127906e-06, "loss": 17.0944, "step": 37609 }, { "epoch": 0.6874805783537755, "grad_norm": 5.268389890842812, "learning_rate": 2.3500979147752312e-06, "loss": 17.041, "step": 37610 }, { "epoch": 0.6874988575502221, "grad_norm": 6.441547331963987, "learning_rate": 2.349846898710235e-06, "loss": 17.6045, "step": 37611 }, { "epoch": 0.6875171367466686, "grad_norm": 5.32322338019863, "learning_rate": 2.3495958919337957e-06, "loss": 17.0363, "step": 37612 }, { "epoch": 0.6875354159431152, "grad_norm": 6.885754963159483, "learning_rate": 2.349344894446791e-06, "loss": 17.4844, "step": 37613 }, { "epoch": 0.6875536951395617, "grad_norm": 6.400287080408149, "learning_rate": 2.3490939062501024e-06, "loss": 17.2239, "step": 37614 }, { "epoch": 0.6875719743360081, "grad_norm": 5.844724277125517, "learning_rate": 2.3488429273446097e-06, "loss": 17.3215, "step": 37615 }, { "epoch": 0.6875902535324547, "grad_norm": 7.470372148865899, "learning_rate": 2.3485919577311938e-06, "loss": 16.9706, "step": 37616 }, { "epoch": 0.6876085327289012, "grad_norm": 6.774932791323081, "learning_rate": 2.3483409974107335e-06, "loss": 17.4644, "step": 37617 }, { "epoch": 0.6876268119253478, "grad_norm": 6.220810731528927, "learning_rate": 2.3480900463841063e-06, "loss": 17.3354, "step": 37618 }, { "epoch": 0.6876450911217943, "grad_norm": 5.641051272680165, "learning_rate": 2.3478391046521944e-06, "loss": 16.992, "step": 37619 }, { "epoch": 0.6876633703182408, "grad_norm": 5.204202290346681, "learning_rate": 2.3475881722158766e-06, "loss": 17.0068, "step": 37620 }, { "epoch": 0.6876816495146874, "grad_norm": 5.5840439373177695, "learning_rate": 2.3473372490760304e-06, "loss": 17.2939, "step": 37621 }, { "epoch": 0.6876999287111338, "grad_norm": 6.085970579664341, "learning_rate": 2.347086335233539e-06, "loss": 16.9593, "step": 37622 }, { "epoch": 0.6877182079075804, "grad_norm": 5.199583097746518, "learning_rate": 2.346835430689278e-06, "loss": 17.099, "step": 37623 }, { "epoch": 0.6877364871040269, "grad_norm": 4.696863301469171, "learning_rate": 2.346584535444128e-06, "loss": 16.8295, "step": 37624 }, { "epoch": 0.6877547663004734, "grad_norm": 5.731212134356717, "learning_rate": 2.3463336494989713e-06, "loss": 17.0937, "step": 37625 }, { "epoch": 0.68777304549692, "grad_norm": 5.180172246523978, "learning_rate": 2.3460827728546847e-06, "loss": 16.9241, "step": 37626 }, { "epoch": 0.6877913246933665, "grad_norm": 4.9520556209890065, "learning_rate": 2.345831905512146e-06, "loss": 16.8876, "step": 37627 }, { "epoch": 0.687809603889813, "grad_norm": 9.173105605626747, "learning_rate": 2.3455810474722374e-06, "loss": 17.9171, "step": 37628 }, { "epoch": 0.6878278830862595, "grad_norm": 7.0980517387705575, "learning_rate": 2.3453301987358356e-06, "loss": 17.6866, "step": 37629 }, { "epoch": 0.687846162282706, "grad_norm": 6.1964025396681865, "learning_rate": 2.345079359303823e-06, "loss": 17.4196, "step": 37630 }, { "epoch": 0.6878644414791526, "grad_norm": 8.21801550329978, "learning_rate": 2.3448285291770765e-06, "loss": 18.2408, "step": 37631 }, { "epoch": 0.6878827206755991, "grad_norm": 7.033064983091339, "learning_rate": 2.3445777083564734e-06, "loss": 17.3871, "step": 37632 }, { "epoch": 0.6879009998720457, "grad_norm": 6.4241860401898485, "learning_rate": 2.3443268968428963e-06, "loss": 17.3634, "step": 37633 }, { "epoch": 0.6879192790684922, "grad_norm": 6.77382266797752, "learning_rate": 2.3440760946372238e-06, "loss": 17.5581, "step": 37634 }, { "epoch": 0.6879375582649386, "grad_norm": 6.5549148741879035, "learning_rate": 2.3438253017403323e-06, "loss": 17.9025, "step": 37635 }, { "epoch": 0.6879558374613852, "grad_norm": 7.4998748173854, "learning_rate": 2.343574518153105e-06, "loss": 17.7658, "step": 37636 }, { "epoch": 0.6879741166578317, "grad_norm": 7.06240838329765, "learning_rate": 2.3433237438764177e-06, "loss": 17.4561, "step": 37637 }, { "epoch": 0.6879923958542783, "grad_norm": 6.272646287091623, "learning_rate": 2.3430729789111488e-06, "loss": 17.0055, "step": 37638 }, { "epoch": 0.6880106750507248, "grad_norm": 6.324754283662337, "learning_rate": 2.342822223258181e-06, "loss": 17.5277, "step": 37639 }, { "epoch": 0.6880289542471713, "grad_norm": 5.799749564853522, "learning_rate": 2.34257147691839e-06, "loss": 17.1349, "step": 37640 }, { "epoch": 0.6880472334436178, "grad_norm": 11.866088320074203, "learning_rate": 2.3423207398926532e-06, "loss": 18.8877, "step": 37641 }, { "epoch": 0.6880655126400643, "grad_norm": 6.369066122624521, "learning_rate": 2.3420700121818525e-06, "loss": 17.2832, "step": 37642 }, { "epoch": 0.6880837918365109, "grad_norm": 7.238132849142952, "learning_rate": 2.341819293786867e-06, "loss": 17.782, "step": 37643 }, { "epoch": 0.6881020710329574, "grad_norm": 8.104820548310936, "learning_rate": 2.3415685847085724e-06, "loss": 17.9555, "step": 37644 }, { "epoch": 0.6881203502294039, "grad_norm": 6.553948658752259, "learning_rate": 2.3413178849478515e-06, "loss": 17.6297, "step": 37645 }, { "epoch": 0.6881386294258505, "grad_norm": 6.241457673137385, "learning_rate": 2.3410671945055784e-06, "loss": 17.254, "step": 37646 }, { "epoch": 0.688156908622297, "grad_norm": 5.783274002242109, "learning_rate": 2.340816513382636e-06, "loss": 17.1773, "step": 37647 }, { "epoch": 0.6881751878187435, "grad_norm": 7.023250730040678, "learning_rate": 2.340565841579901e-06, "loss": 16.948, "step": 37648 }, { "epoch": 0.68819346701519, "grad_norm": 7.258064885494772, "learning_rate": 2.34031517909825e-06, "loss": 17.4316, "step": 37649 }, { "epoch": 0.6882117462116365, "grad_norm": 6.553854354322299, "learning_rate": 2.3400645259385652e-06, "loss": 17.8287, "step": 37650 }, { "epoch": 0.6882300254080831, "grad_norm": 5.918068355079874, "learning_rate": 2.339813882101722e-06, "loss": 16.9485, "step": 37651 }, { "epoch": 0.6882483046045296, "grad_norm": 4.933240000248516, "learning_rate": 2.3395632475886e-06, "loss": 16.951, "step": 37652 }, { "epoch": 0.6882665838009762, "grad_norm": 6.185843821166238, "learning_rate": 2.33931262240008e-06, "loss": 17.2623, "step": 37653 }, { "epoch": 0.6882848629974226, "grad_norm": 5.489413199789348, "learning_rate": 2.3390620065370386e-06, "loss": 17.2806, "step": 37654 }, { "epoch": 0.6883031421938691, "grad_norm": 6.333411652295854, "learning_rate": 2.3388114000003513e-06, "loss": 17.148, "step": 37655 }, { "epoch": 0.6883214213903157, "grad_norm": 7.107005807004858, "learning_rate": 2.3385608027909016e-06, "loss": 17.5738, "step": 37656 }, { "epoch": 0.6883397005867622, "grad_norm": 5.8704462983675505, "learning_rate": 2.3383102149095653e-06, "loss": 17.1892, "step": 37657 }, { "epoch": 0.6883579797832088, "grad_norm": 6.649577778086374, "learning_rate": 2.338059636357219e-06, "loss": 17.4341, "step": 37658 }, { "epoch": 0.6883762589796553, "grad_norm": 5.107169483642508, "learning_rate": 2.337809067134744e-06, "loss": 16.8892, "step": 37659 }, { "epoch": 0.6883945381761017, "grad_norm": 6.932521121106033, "learning_rate": 2.3375585072430157e-06, "loss": 17.4745, "step": 37660 }, { "epoch": 0.6884128173725483, "grad_norm": 6.323042816891555, "learning_rate": 2.3373079566829136e-06, "loss": 17.5139, "step": 37661 }, { "epoch": 0.6884310965689948, "grad_norm": 6.159611210362977, "learning_rate": 2.337057415455318e-06, "loss": 17.2493, "step": 37662 }, { "epoch": 0.6884493757654413, "grad_norm": 5.700232334734231, "learning_rate": 2.3368068835611033e-06, "loss": 17.0568, "step": 37663 }, { "epoch": 0.6884676549618879, "grad_norm": 5.1565102031878, "learning_rate": 2.3365563610011517e-06, "loss": 17.0554, "step": 37664 }, { "epoch": 0.6884859341583344, "grad_norm": 6.186106533384554, "learning_rate": 2.3363058477763384e-06, "loss": 17.4017, "step": 37665 }, { "epoch": 0.688504213354781, "grad_norm": 6.620836118002073, "learning_rate": 2.33605534388754e-06, "loss": 17.1783, "step": 37666 }, { "epoch": 0.6885224925512274, "grad_norm": 6.255740676218998, "learning_rate": 2.3358048493356383e-06, "loss": 17.4016, "step": 37667 }, { "epoch": 0.6885407717476739, "grad_norm": 8.868441424192573, "learning_rate": 2.335554364121508e-06, "loss": 18.5777, "step": 37668 }, { "epoch": 0.6885590509441205, "grad_norm": 5.8470835227702675, "learning_rate": 2.33530388824603e-06, "loss": 17.1746, "step": 37669 }, { "epoch": 0.688577330140567, "grad_norm": 7.313386612580987, "learning_rate": 2.335053421710079e-06, "loss": 17.5444, "step": 37670 }, { "epoch": 0.6885956093370136, "grad_norm": 5.95129046756668, "learning_rate": 2.334802964514536e-06, "loss": 17.0882, "step": 37671 }, { "epoch": 0.6886138885334601, "grad_norm": 6.373789435677664, "learning_rate": 2.3345525166602755e-06, "loss": 17.6095, "step": 37672 }, { "epoch": 0.6886321677299065, "grad_norm": 6.439045439944368, "learning_rate": 2.334302078148179e-06, "loss": 17.3744, "step": 37673 }, { "epoch": 0.6886504469263531, "grad_norm": 5.4405999375327205, "learning_rate": 2.334051648979123e-06, "loss": 17.0417, "step": 37674 }, { "epoch": 0.6886687261227996, "grad_norm": 7.0408449271381315, "learning_rate": 2.333801229153982e-06, "loss": 17.6185, "step": 37675 }, { "epoch": 0.6886870053192462, "grad_norm": 8.436017180672176, "learning_rate": 2.333550818673639e-06, "loss": 18.1633, "step": 37676 }, { "epoch": 0.6887052845156927, "grad_norm": 5.651940376454262, "learning_rate": 2.333300417538966e-06, "loss": 17.0949, "step": 37677 }, { "epoch": 0.6887235637121392, "grad_norm": 6.1607808971159725, "learning_rate": 2.3330500257508443e-06, "loss": 17.157, "step": 37678 }, { "epoch": 0.6887418429085858, "grad_norm": 4.905128833548839, "learning_rate": 2.332799643310153e-06, "loss": 16.9874, "step": 37679 }, { "epoch": 0.6887601221050322, "grad_norm": 5.747649688391892, "learning_rate": 2.332549270217765e-06, "loss": 17.1007, "step": 37680 }, { "epoch": 0.6887784013014788, "grad_norm": 9.249639065142038, "learning_rate": 2.3322989064745626e-06, "loss": 17.9908, "step": 37681 }, { "epoch": 0.6887966804979253, "grad_norm": 5.563715165660128, "learning_rate": 2.3320485520814213e-06, "loss": 17.1447, "step": 37682 }, { "epoch": 0.6888149596943718, "grad_norm": 7.721801630566798, "learning_rate": 2.3317982070392157e-06, "loss": 17.8539, "step": 37683 }, { "epoch": 0.6888332388908184, "grad_norm": 6.4289033278456, "learning_rate": 2.3315478713488282e-06, "loss": 17.3443, "step": 37684 }, { "epoch": 0.6888515180872649, "grad_norm": 6.878332873520475, "learning_rate": 2.3312975450111337e-06, "loss": 17.5702, "step": 37685 }, { "epoch": 0.6888697972837114, "grad_norm": 5.053174921858533, "learning_rate": 2.331047228027008e-06, "loss": 16.9136, "step": 37686 }, { "epoch": 0.6888880764801579, "grad_norm": 6.601064786065748, "learning_rate": 2.33079692039733e-06, "loss": 17.5294, "step": 37687 }, { "epoch": 0.6889063556766044, "grad_norm": 6.420890320347009, "learning_rate": 2.330546622122979e-06, "loss": 17.2534, "step": 37688 }, { "epoch": 0.688924634873051, "grad_norm": 7.101638548420139, "learning_rate": 2.3302963332048288e-06, "loss": 17.8177, "step": 37689 }, { "epoch": 0.6889429140694975, "grad_norm": 6.720924958312313, "learning_rate": 2.33004605364376e-06, "loss": 17.2159, "step": 37690 }, { "epoch": 0.6889611932659441, "grad_norm": 5.762234355851464, "learning_rate": 2.3297957834406466e-06, "loss": 17.1776, "step": 37691 }, { "epoch": 0.6889794724623906, "grad_norm": 7.626126099854367, "learning_rate": 2.329545522596369e-06, "loss": 17.1516, "step": 37692 }, { "epoch": 0.688997751658837, "grad_norm": 8.014467167266869, "learning_rate": 2.329295271111802e-06, "loss": 17.9333, "step": 37693 }, { "epoch": 0.6890160308552836, "grad_norm": 5.729452674772957, "learning_rate": 2.329045028987822e-06, "loss": 17.0648, "step": 37694 }, { "epoch": 0.6890343100517301, "grad_norm": 7.206654978115458, "learning_rate": 2.328794796225309e-06, "loss": 17.559, "step": 37695 }, { "epoch": 0.6890525892481767, "grad_norm": 4.825700799992084, "learning_rate": 2.3285445728251364e-06, "loss": 16.8198, "step": 37696 }, { "epoch": 0.6890708684446232, "grad_norm": 6.208193941492372, "learning_rate": 2.328294358788184e-06, "loss": 17.5784, "step": 37697 }, { "epoch": 0.6890891476410697, "grad_norm": 5.267102718163153, "learning_rate": 2.3280441541153293e-06, "loss": 17.0248, "step": 37698 }, { "epoch": 0.6891074268375162, "grad_norm": 6.421802962429674, "learning_rate": 2.3277939588074484e-06, "loss": 17.3171, "step": 37699 }, { "epoch": 0.6891257060339627, "grad_norm": 5.781019222566382, "learning_rate": 2.3275437728654154e-06, "loss": 17.3269, "step": 37700 }, { "epoch": 0.6891439852304093, "grad_norm": 6.484231247213659, "learning_rate": 2.3272935962901115e-06, "loss": 17.4105, "step": 37701 }, { "epoch": 0.6891622644268558, "grad_norm": 7.892131455423539, "learning_rate": 2.327043429082412e-06, "loss": 17.7512, "step": 37702 }, { "epoch": 0.6891805436233023, "grad_norm": 6.317760816084395, "learning_rate": 2.3267932712431914e-06, "loss": 17.5835, "step": 37703 }, { "epoch": 0.6891988228197489, "grad_norm": 5.618456387993537, "learning_rate": 2.3265431227733297e-06, "loss": 16.9099, "step": 37704 }, { "epoch": 0.6892171020161953, "grad_norm": 6.05660134765035, "learning_rate": 2.3262929836737013e-06, "loss": 17.1961, "step": 37705 }, { "epoch": 0.6892353812126419, "grad_norm": 9.837795228483325, "learning_rate": 2.3260428539451835e-06, "loss": 18.6843, "step": 37706 }, { "epoch": 0.6892536604090884, "grad_norm": 5.910424459918115, "learning_rate": 2.3257927335886553e-06, "loss": 17.226, "step": 37707 }, { "epoch": 0.6892719396055349, "grad_norm": 5.342599332894044, "learning_rate": 2.3255426226049906e-06, "loss": 17.0731, "step": 37708 }, { "epoch": 0.6892902188019815, "grad_norm": 5.677527436542327, "learning_rate": 2.3252925209950676e-06, "loss": 17.0967, "step": 37709 }, { "epoch": 0.689308497998428, "grad_norm": 5.9329367240204345, "learning_rate": 2.3250424287597627e-06, "loss": 17.2465, "step": 37710 }, { "epoch": 0.6893267771948746, "grad_norm": 6.123639729552575, "learning_rate": 2.32479234589995e-06, "loss": 17.0088, "step": 37711 }, { "epoch": 0.689345056391321, "grad_norm": 7.8231234284649425, "learning_rate": 2.32454227241651e-06, "loss": 18.3096, "step": 37712 }, { "epoch": 0.6893633355877675, "grad_norm": 5.56980294122907, "learning_rate": 2.324292208310317e-06, "loss": 17.2497, "step": 37713 }, { "epoch": 0.6893816147842141, "grad_norm": 6.344107740554613, "learning_rate": 2.3240421535822454e-06, "loss": 17.4812, "step": 37714 }, { "epoch": 0.6893998939806606, "grad_norm": 5.712308077512127, "learning_rate": 2.3237921082331745e-06, "loss": 17.0222, "step": 37715 }, { "epoch": 0.6894181731771072, "grad_norm": 5.669172613776361, "learning_rate": 2.3235420722639816e-06, "loss": 17.2021, "step": 37716 }, { "epoch": 0.6894364523735537, "grad_norm": 7.724801102947028, "learning_rate": 2.32329204567554e-06, "loss": 18.0565, "step": 37717 }, { "epoch": 0.6894547315700001, "grad_norm": 5.3622835928041646, "learning_rate": 2.323042028468729e-06, "loss": 16.9807, "step": 37718 }, { "epoch": 0.6894730107664467, "grad_norm": 5.782005618218726, "learning_rate": 2.3227920206444234e-06, "loss": 16.9166, "step": 37719 }, { "epoch": 0.6894912899628932, "grad_norm": 7.856974814859954, "learning_rate": 2.322542022203497e-06, "loss": 17.7009, "step": 37720 }, { "epoch": 0.6895095691593398, "grad_norm": 6.899031092315616, "learning_rate": 2.3222920331468315e-06, "loss": 17.9122, "step": 37721 }, { "epoch": 0.6895278483557863, "grad_norm": 9.28201650880762, "learning_rate": 2.3220420534752972e-06, "loss": 18.1129, "step": 37722 }, { "epoch": 0.6895461275522328, "grad_norm": 5.203718593097285, "learning_rate": 2.3217920831897757e-06, "loss": 17.1772, "step": 37723 }, { "epoch": 0.6895644067486794, "grad_norm": 4.72648444483743, "learning_rate": 2.321542122291138e-06, "loss": 16.9175, "step": 37724 }, { "epoch": 0.6895826859451258, "grad_norm": 6.991627742091572, "learning_rate": 2.3212921707802638e-06, "loss": 18.3543, "step": 37725 }, { "epoch": 0.6896009651415724, "grad_norm": 7.922698214785699, "learning_rate": 2.321042228658029e-06, "loss": 18.111, "step": 37726 }, { "epoch": 0.6896192443380189, "grad_norm": 6.7210083122372275, "learning_rate": 2.3207922959253094e-06, "loss": 17.6777, "step": 37727 }, { "epoch": 0.6896375235344654, "grad_norm": 6.1978591021975085, "learning_rate": 2.320542372582978e-06, "loss": 17.2617, "step": 37728 }, { "epoch": 0.689655802730912, "grad_norm": 6.878777698553315, "learning_rate": 2.320292458631915e-06, "loss": 17.7965, "step": 37729 }, { "epoch": 0.6896740819273585, "grad_norm": 5.45990118229641, "learning_rate": 2.320042554072994e-06, "loss": 16.7469, "step": 37730 }, { "epoch": 0.6896923611238049, "grad_norm": 6.98005499369845, "learning_rate": 2.31979265890709e-06, "loss": 17.4441, "step": 37731 }, { "epoch": 0.6897106403202515, "grad_norm": 7.578158097501099, "learning_rate": 2.31954277313508e-06, "loss": 17.9245, "step": 37732 }, { "epoch": 0.689728919516698, "grad_norm": 5.6452855732051885, "learning_rate": 2.319292896757842e-06, "loss": 16.9878, "step": 37733 }, { "epoch": 0.6897471987131446, "grad_norm": 4.71573919629832, "learning_rate": 2.319043029776248e-06, "loss": 16.9123, "step": 37734 }, { "epoch": 0.6897654779095911, "grad_norm": 6.394895754849569, "learning_rate": 2.3187931721911765e-06, "loss": 17.1194, "step": 37735 }, { "epoch": 0.6897837571060376, "grad_norm": 5.590644433893598, "learning_rate": 2.3185433240035016e-06, "loss": 17.1999, "step": 37736 }, { "epoch": 0.6898020363024842, "grad_norm": 5.38814408468069, "learning_rate": 2.318293485214101e-06, "loss": 16.9226, "step": 37737 }, { "epoch": 0.6898203154989306, "grad_norm": 7.825106687327968, "learning_rate": 2.318043655823849e-06, "loss": 17.6411, "step": 37738 }, { "epoch": 0.6898385946953772, "grad_norm": 5.359079698459973, "learning_rate": 2.3177938358336196e-06, "loss": 16.905, "step": 37739 }, { "epoch": 0.6898568738918237, "grad_norm": 6.637600175079701, "learning_rate": 2.317544025244291e-06, "loss": 17.489, "step": 37740 }, { "epoch": 0.6898751530882702, "grad_norm": 7.8868393415973514, "learning_rate": 2.317294224056737e-06, "loss": 18.195, "step": 37741 }, { "epoch": 0.6898934322847168, "grad_norm": 4.553840383338941, "learning_rate": 2.3170444322718335e-06, "loss": 16.7559, "step": 37742 }, { "epoch": 0.6899117114811633, "grad_norm": 5.332374361981861, "learning_rate": 2.3167946498904586e-06, "loss": 16.8514, "step": 37743 }, { "epoch": 0.6899299906776099, "grad_norm": 7.4826792149352555, "learning_rate": 2.316544876913485e-06, "loss": 17.6518, "step": 37744 }, { "epoch": 0.6899482698740563, "grad_norm": 6.935196901947951, "learning_rate": 2.3162951133417877e-06, "loss": 17.3504, "step": 37745 }, { "epoch": 0.6899665490705028, "grad_norm": 7.492940135697539, "learning_rate": 2.3160453591762445e-06, "loss": 17.5189, "step": 37746 }, { "epoch": 0.6899848282669494, "grad_norm": 6.231853726890648, "learning_rate": 2.3157956144177294e-06, "loss": 17.456, "step": 37747 }, { "epoch": 0.6900031074633959, "grad_norm": 7.639038267939856, "learning_rate": 2.3155458790671165e-06, "loss": 17.9181, "step": 37748 }, { "epoch": 0.6900213866598425, "grad_norm": 6.512059034550306, "learning_rate": 2.315296153125284e-06, "loss": 17.8237, "step": 37749 }, { "epoch": 0.690039665856289, "grad_norm": 6.395416836281836, "learning_rate": 2.3150464365931032e-06, "loss": 17.5726, "step": 37750 }, { "epoch": 0.6900579450527354, "grad_norm": 6.211879036083939, "learning_rate": 2.3147967294714515e-06, "loss": 17.1366, "step": 37751 }, { "epoch": 0.690076224249182, "grad_norm": 7.265670759205754, "learning_rate": 2.3145470317612066e-06, "loss": 17.938, "step": 37752 }, { "epoch": 0.6900945034456285, "grad_norm": 7.328300481583021, "learning_rate": 2.314297343463239e-06, "loss": 17.2801, "step": 37753 }, { "epoch": 0.6901127826420751, "grad_norm": 6.83364994173807, "learning_rate": 2.314047664578429e-06, "loss": 17.7026, "step": 37754 }, { "epoch": 0.6901310618385216, "grad_norm": 6.394953128607231, "learning_rate": 2.313797995107648e-06, "loss": 17.4612, "step": 37755 }, { "epoch": 0.6901493410349681, "grad_norm": 6.243088350275333, "learning_rate": 2.3135483350517703e-06, "loss": 17.4782, "step": 37756 }, { "epoch": 0.6901676202314146, "grad_norm": 5.753688173338009, "learning_rate": 2.313298684411674e-06, "loss": 17.2085, "step": 37757 }, { "epoch": 0.6901858994278611, "grad_norm": 6.019113320485677, "learning_rate": 2.313049043188233e-06, "loss": 17.37, "step": 37758 }, { "epoch": 0.6902041786243077, "grad_norm": 5.9870731970324025, "learning_rate": 2.3127994113823204e-06, "loss": 17.2276, "step": 37759 }, { "epoch": 0.6902224578207542, "grad_norm": 6.464310941975805, "learning_rate": 2.3125497889948123e-06, "loss": 17.5162, "step": 37760 }, { "epoch": 0.6902407370172007, "grad_norm": 6.800548169681349, "learning_rate": 2.3123001760265863e-06, "loss": 17.7286, "step": 37761 }, { "epoch": 0.6902590162136473, "grad_norm": 6.855460500652609, "learning_rate": 2.3120505724785123e-06, "loss": 17.6077, "step": 37762 }, { "epoch": 0.6902772954100937, "grad_norm": 4.831312042936234, "learning_rate": 2.3118009783514704e-06, "loss": 16.8329, "step": 37763 }, { "epoch": 0.6902955746065403, "grad_norm": 6.6383919459062195, "learning_rate": 2.3115513936463325e-06, "loss": 17.4037, "step": 37764 }, { "epoch": 0.6903138538029868, "grad_norm": 5.313699917852988, "learning_rate": 2.311301818363972e-06, "loss": 17.0814, "step": 37765 }, { "epoch": 0.6903321329994333, "grad_norm": 6.160211995213888, "learning_rate": 2.311052252505267e-06, "loss": 17.1818, "step": 37766 }, { "epoch": 0.6903504121958799, "grad_norm": 6.740620177450901, "learning_rate": 2.3108026960710885e-06, "loss": 17.552, "step": 37767 }, { "epoch": 0.6903686913923264, "grad_norm": 5.6897530655874275, "learning_rate": 2.310553149062315e-06, "loss": 17.0295, "step": 37768 }, { "epoch": 0.690386970588773, "grad_norm": 7.059478823427849, "learning_rate": 2.3103036114798177e-06, "loss": 17.5933, "step": 37769 }, { "epoch": 0.6904052497852194, "grad_norm": 6.338083041525465, "learning_rate": 2.3100540833244726e-06, "loss": 17.2045, "step": 37770 }, { "epoch": 0.6904235289816659, "grad_norm": 6.444259502462654, "learning_rate": 2.309804564597157e-06, "loss": 17.4136, "step": 37771 }, { "epoch": 0.6904418081781125, "grad_norm": 6.324947296805466, "learning_rate": 2.309555055298742e-06, "loss": 17.147, "step": 37772 }, { "epoch": 0.690460087374559, "grad_norm": 7.214169222012442, "learning_rate": 2.3093055554301026e-06, "loss": 17.6475, "step": 37773 }, { "epoch": 0.6904783665710056, "grad_norm": 6.309178301557439, "learning_rate": 2.3090560649921144e-06, "loss": 17.5023, "step": 37774 }, { "epoch": 0.6904966457674521, "grad_norm": 5.3347018579062055, "learning_rate": 2.3088065839856517e-06, "loss": 16.8646, "step": 37775 }, { "epoch": 0.6905149249638985, "grad_norm": 6.11794079089236, "learning_rate": 2.3085571124115865e-06, "loss": 17.0367, "step": 37776 }, { "epoch": 0.6905332041603451, "grad_norm": 7.0524834728834485, "learning_rate": 2.308307650270797e-06, "loss": 17.7677, "step": 37777 }, { "epoch": 0.6905514833567916, "grad_norm": 8.442747496582657, "learning_rate": 2.308058197564153e-06, "loss": 18.5903, "step": 37778 }, { "epoch": 0.6905697625532382, "grad_norm": 5.706467776093683, "learning_rate": 2.307808754292532e-06, "loss": 17.0398, "step": 37779 }, { "epoch": 0.6905880417496847, "grad_norm": 5.801037630261309, "learning_rate": 2.30755932045681e-06, "loss": 17.0194, "step": 37780 }, { "epoch": 0.6906063209461312, "grad_norm": 6.196634396356133, "learning_rate": 2.307309896057858e-06, "loss": 17.5448, "step": 37781 }, { "epoch": 0.6906246001425778, "grad_norm": 5.456747813481997, "learning_rate": 2.3070604810965495e-06, "loss": 17.1327, "step": 37782 }, { "epoch": 0.6906428793390242, "grad_norm": 5.627347502255983, "learning_rate": 2.306811075573763e-06, "loss": 17.0317, "step": 37783 }, { "epoch": 0.6906611585354708, "grad_norm": 5.777362736948545, "learning_rate": 2.3065616794903674e-06, "loss": 17.3048, "step": 37784 }, { "epoch": 0.6906794377319173, "grad_norm": 6.463201309272211, "learning_rate": 2.3063122928472413e-06, "loss": 17.3923, "step": 37785 }, { "epoch": 0.6906977169283638, "grad_norm": 6.4275646599706215, "learning_rate": 2.3060629156452575e-06, "loss": 17.3012, "step": 37786 }, { "epoch": 0.6907159961248104, "grad_norm": 7.5199404712585, "learning_rate": 2.3058135478852868e-06, "loss": 17.5196, "step": 37787 }, { "epoch": 0.6907342753212569, "grad_norm": 6.792566804609882, "learning_rate": 2.305564189568206e-06, "loss": 17.7399, "step": 37788 }, { "epoch": 0.6907525545177035, "grad_norm": 7.07899235750463, "learning_rate": 2.3053148406948904e-06, "loss": 17.6578, "step": 37789 }, { "epoch": 0.6907708337141499, "grad_norm": 5.521445033617713, "learning_rate": 2.3050655012662117e-06, "loss": 17.0539, "step": 37790 }, { "epoch": 0.6907891129105964, "grad_norm": 6.775795647708618, "learning_rate": 2.3048161712830452e-06, "loss": 17.5171, "step": 37791 }, { "epoch": 0.690807392107043, "grad_norm": 6.262110335130674, "learning_rate": 2.304566850746265e-06, "loss": 17.4248, "step": 37792 }, { "epoch": 0.6908256713034895, "grad_norm": 5.350425240423549, "learning_rate": 2.304317539656742e-06, "loss": 17.1637, "step": 37793 }, { "epoch": 0.6908439504999361, "grad_norm": 5.8108603572348825, "learning_rate": 2.304068238015354e-06, "loss": 17.2897, "step": 37794 }, { "epoch": 0.6908622296963826, "grad_norm": 5.9985671195788655, "learning_rate": 2.3038189458229703e-06, "loss": 17.1053, "step": 37795 }, { "epoch": 0.690880508892829, "grad_norm": 6.116768573205632, "learning_rate": 2.30356966308047e-06, "loss": 17.061, "step": 37796 }, { "epoch": 0.6908987880892756, "grad_norm": 6.21517295913152, "learning_rate": 2.3033203897887217e-06, "loss": 17.1009, "step": 37797 }, { "epoch": 0.6909170672857221, "grad_norm": 6.684378840791358, "learning_rate": 2.3030711259486017e-06, "loss": 17.5413, "step": 37798 }, { "epoch": 0.6909353464821686, "grad_norm": 6.11830491176878, "learning_rate": 2.3028218715609853e-06, "loss": 17.5057, "step": 37799 }, { "epoch": 0.6909536256786152, "grad_norm": 4.937959760882999, "learning_rate": 2.302572626626744e-06, "loss": 17.0341, "step": 37800 }, { "epoch": 0.6909719048750617, "grad_norm": 8.135341697800063, "learning_rate": 2.3023233911467495e-06, "loss": 18.3206, "step": 37801 }, { "epoch": 0.6909901840715083, "grad_norm": 6.025538809891417, "learning_rate": 2.3020741651218797e-06, "loss": 17.3369, "step": 37802 }, { "epoch": 0.6910084632679547, "grad_norm": 5.88016134412483, "learning_rate": 2.301824948553006e-06, "loss": 17.1243, "step": 37803 }, { "epoch": 0.6910267424644012, "grad_norm": 6.806477835299344, "learning_rate": 2.3015757414409993e-06, "loss": 17.4058, "step": 37804 }, { "epoch": 0.6910450216608478, "grad_norm": 5.637598638015004, "learning_rate": 2.3013265437867354e-06, "loss": 17.1587, "step": 37805 }, { "epoch": 0.6910633008572943, "grad_norm": 5.92193060403548, "learning_rate": 2.3010773555910905e-06, "loss": 17.4418, "step": 37806 }, { "epoch": 0.6910815800537409, "grad_norm": 6.689376624625767, "learning_rate": 2.300828176854932e-06, "loss": 17.4948, "step": 37807 }, { "epoch": 0.6910998592501874, "grad_norm": 7.072556660991159, "learning_rate": 2.3005790075791395e-06, "loss": 17.7309, "step": 37808 }, { "epoch": 0.6911181384466338, "grad_norm": 4.755224202261776, "learning_rate": 2.300329847764583e-06, "loss": 16.8219, "step": 37809 }, { "epoch": 0.6911364176430804, "grad_norm": 5.521025744887764, "learning_rate": 2.300080697412135e-06, "loss": 17.119, "step": 37810 }, { "epoch": 0.6911546968395269, "grad_norm": 5.745677079856637, "learning_rate": 2.2998315565226703e-06, "loss": 17.1398, "step": 37811 }, { "epoch": 0.6911729760359735, "grad_norm": 5.959862829557933, "learning_rate": 2.299582425097063e-06, "loss": 17.2298, "step": 37812 }, { "epoch": 0.69119125523242, "grad_norm": 6.4160821334061096, "learning_rate": 2.299333303136182e-06, "loss": 17.5072, "step": 37813 }, { "epoch": 0.6912095344288665, "grad_norm": 6.623485835104075, "learning_rate": 2.299084190640904e-06, "loss": 17.26, "step": 37814 }, { "epoch": 0.691227813625313, "grad_norm": 5.727344696407538, "learning_rate": 2.298835087612102e-06, "loss": 17.2019, "step": 37815 }, { "epoch": 0.6912460928217595, "grad_norm": 5.452489129028546, "learning_rate": 2.2985859940506493e-06, "loss": 17.0269, "step": 37816 }, { "epoch": 0.6912643720182061, "grad_norm": 6.39831780146981, "learning_rate": 2.298336909957419e-06, "loss": 17.4503, "step": 37817 }, { "epoch": 0.6912826512146526, "grad_norm": 5.8490831511801495, "learning_rate": 2.298087835333282e-06, "loss": 17.1637, "step": 37818 }, { "epoch": 0.6913009304110991, "grad_norm": 6.304965598543922, "learning_rate": 2.2978387701791137e-06, "loss": 17.4161, "step": 37819 }, { "epoch": 0.6913192096075457, "grad_norm": 8.291015914997637, "learning_rate": 2.2975897144957866e-06, "loss": 18.518, "step": 37820 }, { "epoch": 0.6913374888039922, "grad_norm": 7.385646187675766, "learning_rate": 2.297340668284171e-06, "loss": 17.725, "step": 37821 }, { "epoch": 0.6913557680004387, "grad_norm": 6.929510772279193, "learning_rate": 2.297091631545143e-06, "loss": 17.5158, "step": 37822 }, { "epoch": 0.6913740471968852, "grad_norm": 6.529990232925115, "learning_rate": 2.296842604279573e-06, "loss": 17.379, "step": 37823 }, { "epoch": 0.6913923263933317, "grad_norm": 6.533762666586337, "learning_rate": 2.2965935864883352e-06, "loss": 18.0517, "step": 37824 }, { "epoch": 0.6914106055897783, "grad_norm": 5.083958190147025, "learning_rate": 2.2963445781723044e-06, "loss": 16.8949, "step": 37825 }, { "epoch": 0.6914288847862248, "grad_norm": 6.821702636799764, "learning_rate": 2.296095579332351e-06, "loss": 17.8478, "step": 37826 }, { "epoch": 0.6914471639826714, "grad_norm": 6.58519099424022, "learning_rate": 2.2958465899693462e-06, "loss": 17.8112, "step": 37827 }, { "epoch": 0.6914654431791178, "grad_norm": 7.005657055059083, "learning_rate": 2.295597610084166e-06, "loss": 17.8148, "step": 37828 }, { "epoch": 0.6914837223755643, "grad_norm": 6.168217887399459, "learning_rate": 2.29534863967768e-06, "loss": 17.2428, "step": 37829 }, { "epoch": 0.6915020015720109, "grad_norm": 5.150351098620203, "learning_rate": 2.2950996787507636e-06, "loss": 17.0243, "step": 37830 }, { "epoch": 0.6915202807684574, "grad_norm": 6.944424458311596, "learning_rate": 2.2948507273042885e-06, "loss": 17.5525, "step": 37831 }, { "epoch": 0.691538559964904, "grad_norm": 5.355384504494216, "learning_rate": 2.294601785339125e-06, "loss": 16.9894, "step": 37832 }, { "epoch": 0.6915568391613505, "grad_norm": 6.536933040498643, "learning_rate": 2.294352852856147e-06, "loss": 17.63, "step": 37833 }, { "epoch": 0.691575118357797, "grad_norm": 6.737618045884411, "learning_rate": 2.29410392985623e-06, "loss": 17.5607, "step": 37834 }, { "epoch": 0.6915933975542435, "grad_norm": 6.532828373018842, "learning_rate": 2.2938550163402418e-06, "loss": 17.4194, "step": 37835 }, { "epoch": 0.69161167675069, "grad_norm": 6.976018588207716, "learning_rate": 2.2936061123090587e-06, "loss": 17.8649, "step": 37836 }, { "epoch": 0.6916299559471366, "grad_norm": 6.19212161233359, "learning_rate": 2.2933572177635516e-06, "loss": 17.6073, "step": 37837 }, { "epoch": 0.6916482351435831, "grad_norm": 6.068047922964283, "learning_rate": 2.2931083327045907e-06, "loss": 17.0583, "step": 37838 }, { "epoch": 0.6916665143400296, "grad_norm": 6.539212969394945, "learning_rate": 2.292859457133052e-06, "loss": 17.0847, "step": 37839 }, { "epoch": 0.6916847935364762, "grad_norm": 4.824491988100169, "learning_rate": 2.292610591049806e-06, "loss": 17.0014, "step": 37840 }, { "epoch": 0.6917030727329226, "grad_norm": 6.81588206728239, "learning_rate": 2.2923617344557225e-06, "loss": 17.8117, "step": 37841 }, { "epoch": 0.6917213519293692, "grad_norm": 5.85631845650153, "learning_rate": 2.292112887351677e-06, "loss": 17.2546, "step": 37842 }, { "epoch": 0.6917396311258157, "grad_norm": 5.39173950185191, "learning_rate": 2.291864049738542e-06, "loss": 17.0469, "step": 37843 }, { "epoch": 0.6917579103222622, "grad_norm": 7.547314524067319, "learning_rate": 2.291615221617187e-06, "loss": 17.5124, "step": 37844 }, { "epoch": 0.6917761895187088, "grad_norm": 7.143917173430652, "learning_rate": 2.2913664029884875e-06, "loss": 17.3076, "step": 37845 }, { "epoch": 0.6917944687151553, "grad_norm": 4.358449858941557, "learning_rate": 2.291117593853312e-06, "loss": 16.7426, "step": 37846 }, { "epoch": 0.6918127479116019, "grad_norm": 7.276337688699453, "learning_rate": 2.2908687942125363e-06, "loss": 17.6843, "step": 37847 }, { "epoch": 0.6918310271080483, "grad_norm": 6.282133845509537, "learning_rate": 2.2906200040670294e-06, "loss": 17.2817, "step": 37848 }, { "epoch": 0.6918493063044948, "grad_norm": 6.0760175162491965, "learning_rate": 2.2903712234176635e-06, "loss": 17.2135, "step": 37849 }, { "epoch": 0.6918675855009414, "grad_norm": 7.152424376834933, "learning_rate": 2.2901224522653128e-06, "loss": 17.5233, "step": 37850 }, { "epoch": 0.6918858646973879, "grad_norm": 7.227101380587254, "learning_rate": 2.289873690610846e-06, "loss": 17.3396, "step": 37851 }, { "epoch": 0.6919041438938345, "grad_norm": 6.402768894678023, "learning_rate": 2.2896249384551363e-06, "loss": 17.3553, "step": 37852 }, { "epoch": 0.691922423090281, "grad_norm": 6.33271672233956, "learning_rate": 2.2893761957990585e-06, "loss": 17.4493, "step": 37853 }, { "epoch": 0.6919407022867274, "grad_norm": 6.180513272997517, "learning_rate": 2.2891274626434813e-06, "loss": 17.2835, "step": 37854 }, { "epoch": 0.691958981483174, "grad_norm": 5.797616179052381, "learning_rate": 2.288878738989276e-06, "loss": 17.0592, "step": 37855 }, { "epoch": 0.6919772606796205, "grad_norm": 6.248441034830124, "learning_rate": 2.2886300248373163e-06, "loss": 17.455, "step": 37856 }, { "epoch": 0.6919955398760671, "grad_norm": 7.143079746299209, "learning_rate": 2.2883813201884737e-06, "loss": 17.6888, "step": 37857 }, { "epoch": 0.6920138190725136, "grad_norm": 5.71301143362287, "learning_rate": 2.2881326250436174e-06, "loss": 17.2986, "step": 37858 }, { "epoch": 0.6920320982689601, "grad_norm": 6.346555138689925, "learning_rate": 2.2878839394036205e-06, "loss": 17.3957, "step": 37859 }, { "epoch": 0.6920503774654067, "grad_norm": 5.819880764653375, "learning_rate": 2.2876352632693578e-06, "loss": 17.0249, "step": 37860 }, { "epoch": 0.6920686566618531, "grad_norm": 5.6740028144835835, "learning_rate": 2.287386596641695e-06, "loss": 17.1679, "step": 37861 }, { "epoch": 0.6920869358582997, "grad_norm": 8.151629900326375, "learning_rate": 2.28713793952151e-06, "loss": 18.331, "step": 37862 }, { "epoch": 0.6921052150547462, "grad_norm": 6.343292948839774, "learning_rate": 2.2868892919096684e-06, "loss": 17.2798, "step": 37863 }, { "epoch": 0.6921234942511927, "grad_norm": 6.9385336370363575, "learning_rate": 2.286640653807047e-06, "loss": 17.7691, "step": 37864 }, { "epoch": 0.6921417734476393, "grad_norm": 5.275411513178086, "learning_rate": 2.286392025214514e-06, "loss": 17.0468, "step": 37865 }, { "epoch": 0.6921600526440858, "grad_norm": 6.74586286896899, "learning_rate": 2.2861434061329394e-06, "loss": 17.4892, "step": 37866 }, { "epoch": 0.6921783318405322, "grad_norm": 6.739684971954352, "learning_rate": 2.2858947965631987e-06, "loss": 17.6296, "step": 37867 }, { "epoch": 0.6921966110369788, "grad_norm": 5.6813748484889235, "learning_rate": 2.28564619650616e-06, "loss": 17.2052, "step": 37868 }, { "epoch": 0.6922148902334253, "grad_norm": 6.045608127417281, "learning_rate": 2.285397605962695e-06, "loss": 17.1436, "step": 37869 }, { "epoch": 0.6922331694298719, "grad_norm": 6.2879956691415195, "learning_rate": 2.2851490249336784e-06, "loss": 17.0894, "step": 37870 }, { "epoch": 0.6922514486263184, "grad_norm": 5.958636620900184, "learning_rate": 2.2849004534199786e-06, "loss": 17.0745, "step": 37871 }, { "epoch": 0.6922697278227649, "grad_norm": 6.881453459429959, "learning_rate": 2.2846518914224653e-06, "loss": 17.6562, "step": 37872 }, { "epoch": 0.6922880070192114, "grad_norm": 6.6457565194694554, "learning_rate": 2.284403338942013e-06, "loss": 17.6292, "step": 37873 }, { "epoch": 0.6923062862156579, "grad_norm": 6.639366192814015, "learning_rate": 2.2841547959794916e-06, "loss": 17.3376, "step": 37874 }, { "epoch": 0.6923245654121045, "grad_norm": 8.917157647047452, "learning_rate": 2.28390626253577e-06, "loss": 18.3435, "step": 37875 }, { "epoch": 0.692342844608551, "grad_norm": 8.179790836915213, "learning_rate": 2.283657738611723e-06, "loss": 17.3002, "step": 37876 }, { "epoch": 0.6923611238049975, "grad_norm": 5.172239867831847, "learning_rate": 2.2834092242082185e-06, "loss": 16.9839, "step": 37877 }, { "epoch": 0.6923794030014441, "grad_norm": 5.379025129716105, "learning_rate": 2.283160719326129e-06, "loss": 17.0491, "step": 37878 }, { "epoch": 0.6923976821978906, "grad_norm": 5.154720135343698, "learning_rate": 2.282912223966327e-06, "loss": 17.006, "step": 37879 }, { "epoch": 0.6924159613943371, "grad_norm": 5.345666322542809, "learning_rate": 2.28266373812968e-06, "loss": 17.0985, "step": 37880 }, { "epoch": 0.6924342405907836, "grad_norm": 6.522341095641772, "learning_rate": 2.2824152618170625e-06, "loss": 17.6261, "step": 37881 }, { "epoch": 0.6924525197872301, "grad_norm": 6.145650363778325, "learning_rate": 2.282166795029344e-06, "loss": 17.2434, "step": 37882 }, { "epoch": 0.6924707989836767, "grad_norm": 6.495699548067677, "learning_rate": 2.2819183377673933e-06, "loss": 17.7241, "step": 37883 }, { "epoch": 0.6924890781801232, "grad_norm": 5.887986112024337, "learning_rate": 2.2816698900320843e-06, "loss": 17.137, "step": 37884 }, { "epoch": 0.6925073573765698, "grad_norm": 5.579599566973974, "learning_rate": 2.2814214518242866e-06, "loss": 17.0962, "step": 37885 }, { "epoch": 0.6925256365730162, "grad_norm": 6.813359067461839, "learning_rate": 2.2811730231448693e-06, "loss": 17.3123, "step": 37886 }, { "epoch": 0.6925439157694627, "grad_norm": 6.148412569519995, "learning_rate": 2.2809246039947043e-06, "loss": 17.1745, "step": 37887 }, { "epoch": 0.6925621949659093, "grad_norm": 5.106417014751793, "learning_rate": 2.2806761943746646e-06, "loss": 16.9915, "step": 37888 }, { "epoch": 0.6925804741623558, "grad_norm": 6.415290841145753, "learning_rate": 2.2804277942856166e-06, "loss": 17.4549, "step": 37889 }, { "epoch": 0.6925987533588024, "grad_norm": 6.620362534576805, "learning_rate": 2.2801794037284358e-06, "loss": 17.6582, "step": 37890 }, { "epoch": 0.6926170325552489, "grad_norm": 6.814406619870334, "learning_rate": 2.2799310227039878e-06, "loss": 17.5562, "step": 37891 }, { "epoch": 0.6926353117516953, "grad_norm": 6.955901633661436, "learning_rate": 2.2796826512131476e-06, "loss": 17.5469, "step": 37892 }, { "epoch": 0.6926535909481419, "grad_norm": 7.588372362733268, "learning_rate": 2.2794342892567835e-06, "loss": 18.0125, "step": 37893 }, { "epoch": 0.6926718701445884, "grad_norm": 6.417223684937642, "learning_rate": 2.2791859368357644e-06, "loss": 17.4031, "step": 37894 }, { "epoch": 0.692690149341035, "grad_norm": 6.242412279135221, "learning_rate": 2.278937593950964e-06, "loss": 17.5579, "step": 37895 }, { "epoch": 0.6927084285374815, "grad_norm": 5.713595473169723, "learning_rate": 2.2786892606032496e-06, "loss": 16.8969, "step": 37896 }, { "epoch": 0.692726707733928, "grad_norm": 5.397457830911019, "learning_rate": 2.2784409367934936e-06, "loss": 17.1134, "step": 37897 }, { "epoch": 0.6927449869303746, "grad_norm": 6.095838287554058, "learning_rate": 2.2781926225225674e-06, "loss": 17.4374, "step": 37898 }, { "epoch": 0.692763266126821, "grad_norm": 6.000993503410069, "learning_rate": 2.2779443177913396e-06, "loss": 17.4296, "step": 37899 }, { "epoch": 0.6927815453232676, "grad_norm": 6.404112274010289, "learning_rate": 2.2776960226006793e-06, "loss": 17.6463, "step": 37900 }, { "epoch": 0.6927998245197141, "grad_norm": 7.287422437857492, "learning_rate": 2.27744773695146e-06, "loss": 17.376, "step": 37901 }, { "epoch": 0.6928181037161606, "grad_norm": 6.2638046673564345, "learning_rate": 2.2771994608445504e-06, "loss": 17.4474, "step": 37902 }, { "epoch": 0.6928363829126072, "grad_norm": 5.800120818342855, "learning_rate": 2.2769511942808183e-06, "loss": 17.0683, "step": 37903 }, { "epoch": 0.6928546621090537, "grad_norm": 7.697256443671453, "learning_rate": 2.276702937261138e-06, "loss": 17.7863, "step": 37904 }, { "epoch": 0.6928729413055003, "grad_norm": 5.655549650042985, "learning_rate": 2.2764546897863752e-06, "loss": 17.2393, "step": 37905 }, { "epoch": 0.6928912205019467, "grad_norm": 5.839489678712735, "learning_rate": 2.276206451857403e-06, "loss": 17.2809, "step": 37906 }, { "epoch": 0.6929094996983932, "grad_norm": 6.605078309520032, "learning_rate": 2.275958223475092e-06, "loss": 17.6114, "step": 37907 }, { "epoch": 0.6929277788948398, "grad_norm": 6.291921275343918, "learning_rate": 2.27571000464031e-06, "loss": 17.3599, "step": 37908 }, { "epoch": 0.6929460580912863, "grad_norm": 6.606884714584186, "learning_rate": 2.2754617953539294e-06, "loss": 17.9644, "step": 37909 }, { "epoch": 0.6929643372877329, "grad_norm": 6.017299416451125, "learning_rate": 2.275213595616819e-06, "loss": 17.2383, "step": 37910 }, { "epoch": 0.6929826164841794, "grad_norm": 6.75992595618242, "learning_rate": 2.274965405429846e-06, "loss": 17.6338, "step": 37911 }, { "epoch": 0.6930008956806258, "grad_norm": 7.380402002709727, "learning_rate": 2.2747172247938852e-06, "loss": 18.3275, "step": 37912 }, { "epoch": 0.6930191748770724, "grad_norm": 5.172504480450805, "learning_rate": 2.274469053709804e-06, "loss": 17.3008, "step": 37913 }, { "epoch": 0.6930374540735189, "grad_norm": 6.362815107758038, "learning_rate": 2.2742208921784704e-06, "loss": 17.7585, "step": 37914 }, { "epoch": 0.6930557332699655, "grad_norm": 5.774952995317143, "learning_rate": 2.273972740200756e-06, "loss": 17.0996, "step": 37915 }, { "epoch": 0.693074012466412, "grad_norm": 7.598496057084263, "learning_rate": 2.2737245977775323e-06, "loss": 17.6473, "step": 37916 }, { "epoch": 0.6930922916628585, "grad_norm": 7.024598889155696, "learning_rate": 2.2734764649096656e-06, "loss": 17.7531, "step": 37917 }, { "epoch": 0.693110570859305, "grad_norm": 5.945023462227464, "learning_rate": 2.2732283415980293e-06, "loss": 17.3923, "step": 37918 }, { "epoch": 0.6931288500557515, "grad_norm": 6.962648462554255, "learning_rate": 2.272980227843491e-06, "loss": 17.7933, "step": 37919 }, { "epoch": 0.6931471292521981, "grad_norm": 5.7059464165006375, "learning_rate": 2.2727321236469185e-06, "loss": 17.3612, "step": 37920 }, { "epoch": 0.6931654084486446, "grad_norm": 5.79044625017852, "learning_rate": 2.272484029009185e-06, "loss": 17.2298, "step": 37921 }, { "epoch": 0.6931836876450911, "grad_norm": 7.758606907184175, "learning_rate": 2.2722359439311565e-06, "loss": 18.0344, "step": 37922 }, { "epoch": 0.6932019668415377, "grad_norm": 6.499578437942859, "learning_rate": 2.2719878684137058e-06, "loss": 17.7816, "step": 37923 }, { "epoch": 0.6932202460379842, "grad_norm": 6.170613689371448, "learning_rate": 2.2717398024576993e-06, "loss": 17.3718, "step": 37924 }, { "epoch": 0.6932385252344307, "grad_norm": 5.410063559903193, "learning_rate": 2.2714917460640083e-06, "loss": 16.9363, "step": 37925 }, { "epoch": 0.6932568044308772, "grad_norm": 6.46772580326342, "learning_rate": 2.2712436992335035e-06, "loss": 17.5345, "step": 37926 }, { "epoch": 0.6932750836273237, "grad_norm": 7.081651271805704, "learning_rate": 2.270995661967053e-06, "loss": 17.8158, "step": 37927 }, { "epoch": 0.6932933628237703, "grad_norm": 6.86734625656646, "learning_rate": 2.270747634265524e-06, "loss": 17.1676, "step": 37928 }, { "epoch": 0.6933116420202168, "grad_norm": 5.347485571292618, "learning_rate": 2.270499616129789e-06, "loss": 17.0575, "step": 37929 }, { "epoch": 0.6933299212166634, "grad_norm": 6.7955736456687355, "learning_rate": 2.2702516075607167e-06, "loss": 17.4129, "step": 37930 }, { "epoch": 0.6933482004131098, "grad_norm": 5.0401600029879186, "learning_rate": 2.2700036085591737e-06, "loss": 17.0461, "step": 37931 }, { "epoch": 0.6933664796095563, "grad_norm": 6.341642219160543, "learning_rate": 2.2697556191260308e-06, "loss": 17.3089, "step": 37932 }, { "epoch": 0.6933847588060029, "grad_norm": 6.405859088712746, "learning_rate": 2.2695076392621596e-06, "loss": 17.4461, "step": 37933 }, { "epoch": 0.6934030380024494, "grad_norm": 7.467331047563332, "learning_rate": 2.2692596689684253e-06, "loss": 17.7554, "step": 37934 }, { "epoch": 0.6934213171988959, "grad_norm": 5.220683197744136, "learning_rate": 2.269011708245701e-06, "loss": 17.0492, "step": 37935 }, { "epoch": 0.6934395963953425, "grad_norm": 5.567709446778887, "learning_rate": 2.268763757094851e-06, "loss": 17.1991, "step": 37936 }, { "epoch": 0.693457875591789, "grad_norm": 5.3865550829207445, "learning_rate": 2.2685158155167497e-06, "loss": 17.1609, "step": 37937 }, { "epoch": 0.6934761547882355, "grad_norm": 5.804950505561708, "learning_rate": 2.2682678835122635e-06, "loss": 17.3752, "step": 37938 }, { "epoch": 0.693494433984682, "grad_norm": 6.259407440943707, "learning_rate": 2.2680199610822588e-06, "loss": 17.551, "step": 37939 }, { "epoch": 0.6935127131811285, "grad_norm": 6.706856691891948, "learning_rate": 2.2677720482276096e-06, "loss": 17.5363, "step": 37940 }, { "epoch": 0.6935309923775751, "grad_norm": 6.976864902118271, "learning_rate": 2.2675241449491803e-06, "loss": 17.3378, "step": 37941 }, { "epoch": 0.6935492715740216, "grad_norm": 6.858697188236394, "learning_rate": 2.2672762512478412e-06, "loss": 17.6037, "step": 37942 }, { "epoch": 0.6935675507704682, "grad_norm": 6.737365338610003, "learning_rate": 2.267028367124464e-06, "loss": 17.727, "step": 37943 }, { "epoch": 0.6935858299669146, "grad_norm": 5.770729262931714, "learning_rate": 2.266780492579915e-06, "loss": 17.3757, "step": 37944 }, { "epoch": 0.6936041091633611, "grad_norm": 6.231053269581472, "learning_rate": 2.2665326276150617e-06, "loss": 17.3329, "step": 37945 }, { "epoch": 0.6936223883598077, "grad_norm": 6.190521141507853, "learning_rate": 2.2662847722307763e-06, "loss": 17.6458, "step": 37946 }, { "epoch": 0.6936406675562542, "grad_norm": 5.061044577833369, "learning_rate": 2.2660369264279248e-06, "loss": 17.0054, "step": 37947 }, { "epoch": 0.6936589467527008, "grad_norm": 7.723874081044563, "learning_rate": 2.2657890902073753e-06, "loss": 17.7394, "step": 37948 }, { "epoch": 0.6936772259491473, "grad_norm": 13.739658905148854, "learning_rate": 2.26554126357e-06, "loss": 18.7778, "step": 37949 }, { "epoch": 0.6936955051455937, "grad_norm": 6.009019641221826, "learning_rate": 2.2652934465166624e-06, "loss": 17.2679, "step": 37950 }, { "epoch": 0.6937137843420403, "grad_norm": 6.270223814968416, "learning_rate": 2.2650456390482347e-06, "loss": 17.3925, "step": 37951 }, { "epoch": 0.6937320635384868, "grad_norm": 8.716418357687758, "learning_rate": 2.2647978411655865e-06, "loss": 17.9946, "step": 37952 }, { "epoch": 0.6937503427349334, "grad_norm": 6.575865627477889, "learning_rate": 2.264550052869582e-06, "loss": 17.6782, "step": 37953 }, { "epoch": 0.6937686219313799, "grad_norm": 7.764410781173251, "learning_rate": 2.2643022741610956e-06, "loss": 17.9831, "step": 37954 }, { "epoch": 0.6937869011278264, "grad_norm": 5.672821559687707, "learning_rate": 2.2640545050409906e-06, "loss": 17.3572, "step": 37955 }, { "epoch": 0.693805180324273, "grad_norm": 7.171151757269442, "learning_rate": 2.2638067455101366e-06, "loss": 18.0943, "step": 37956 }, { "epoch": 0.6938234595207194, "grad_norm": 4.961337286862204, "learning_rate": 2.2635589955694037e-06, "loss": 16.8813, "step": 37957 }, { "epoch": 0.693841738717166, "grad_norm": 5.639986976255889, "learning_rate": 2.263311255219659e-06, "loss": 17.32, "step": 37958 }, { "epoch": 0.6938600179136125, "grad_norm": 6.017035718086617, "learning_rate": 2.2630635244617696e-06, "loss": 17.3076, "step": 37959 }, { "epoch": 0.693878297110059, "grad_norm": 5.939883907158771, "learning_rate": 2.2628158032966043e-06, "loss": 17.2544, "step": 37960 }, { "epoch": 0.6938965763065056, "grad_norm": 5.278334247732104, "learning_rate": 2.262568091725034e-06, "loss": 16.9967, "step": 37961 }, { "epoch": 0.6939148555029521, "grad_norm": 4.84512758293094, "learning_rate": 2.2623203897479234e-06, "loss": 16.9671, "step": 37962 }, { "epoch": 0.6939331346993987, "grad_norm": 5.7441028507185035, "learning_rate": 2.2620726973661443e-06, "loss": 17.2532, "step": 37963 }, { "epoch": 0.6939514138958451, "grad_norm": 5.476231402658182, "learning_rate": 2.2618250145805627e-06, "loss": 17.645, "step": 37964 }, { "epoch": 0.6939696930922916, "grad_norm": 6.373817674466043, "learning_rate": 2.261577341392045e-06, "loss": 17.4982, "step": 37965 }, { "epoch": 0.6939879722887382, "grad_norm": 6.474709892085419, "learning_rate": 2.261329677801463e-06, "loss": 17.5734, "step": 37966 }, { "epoch": 0.6940062514851847, "grad_norm": 6.129250069921485, "learning_rate": 2.261082023809681e-06, "loss": 17.374, "step": 37967 }, { "epoch": 0.6940245306816313, "grad_norm": 7.380783691916976, "learning_rate": 2.26083437941757e-06, "loss": 17.5442, "step": 37968 }, { "epoch": 0.6940428098780778, "grad_norm": 7.229660825336017, "learning_rate": 2.2605867446259957e-06, "loss": 17.5029, "step": 37969 }, { "epoch": 0.6940610890745242, "grad_norm": 5.697583026645042, "learning_rate": 2.260339119435827e-06, "loss": 17.2892, "step": 37970 }, { "epoch": 0.6940793682709708, "grad_norm": 6.207273624345562, "learning_rate": 2.2600915038479337e-06, "loss": 17.3764, "step": 37971 }, { "epoch": 0.6940976474674173, "grad_norm": 6.665939575215377, "learning_rate": 2.259843897863182e-06, "loss": 17.42, "step": 37972 }, { "epoch": 0.6941159266638639, "grad_norm": 5.393032953900514, "learning_rate": 2.2595963014824373e-06, "loss": 17.0956, "step": 37973 }, { "epoch": 0.6941342058603104, "grad_norm": 6.4687974751874835, "learning_rate": 2.2593487147065725e-06, "loss": 17.2069, "step": 37974 }, { "epoch": 0.6941524850567569, "grad_norm": 7.12056227558896, "learning_rate": 2.2591011375364517e-06, "loss": 17.4329, "step": 37975 }, { "epoch": 0.6941707642532035, "grad_norm": 6.0833994027761005, "learning_rate": 2.2588535699729424e-06, "loss": 17.0278, "step": 37976 }, { "epoch": 0.6941890434496499, "grad_norm": 5.602274779341388, "learning_rate": 2.258606012016915e-06, "loss": 17.0066, "step": 37977 }, { "epoch": 0.6942073226460965, "grad_norm": 6.920853597848278, "learning_rate": 2.258358463669234e-06, "loss": 17.6015, "step": 37978 }, { "epoch": 0.694225601842543, "grad_norm": 6.100425853971762, "learning_rate": 2.2581109249307688e-06, "loss": 17.1736, "step": 37979 }, { "epoch": 0.6942438810389895, "grad_norm": 6.544669085337947, "learning_rate": 2.2578633958023887e-06, "loss": 17.7576, "step": 37980 }, { "epoch": 0.6942621602354361, "grad_norm": 6.371931394080148, "learning_rate": 2.2576158762849594e-06, "loss": 17.5154, "step": 37981 }, { "epoch": 0.6942804394318826, "grad_norm": 4.894563736587179, "learning_rate": 2.2573683663793462e-06, "loss": 16.7417, "step": 37982 }, { "epoch": 0.6942987186283291, "grad_norm": 6.356105806846267, "learning_rate": 2.257120866086421e-06, "loss": 17.2742, "step": 37983 }, { "epoch": 0.6943169978247756, "grad_norm": 5.609857292489707, "learning_rate": 2.2568733754070476e-06, "loss": 16.9083, "step": 37984 }, { "epoch": 0.6943352770212221, "grad_norm": 5.5490603123262785, "learning_rate": 2.2566258943420965e-06, "loss": 17.1795, "step": 37985 }, { "epoch": 0.6943535562176687, "grad_norm": 5.708726262870765, "learning_rate": 2.2563784228924318e-06, "loss": 17.2535, "step": 37986 }, { "epoch": 0.6943718354141152, "grad_norm": 6.994287974253431, "learning_rate": 2.2561309610589245e-06, "loss": 17.7235, "step": 37987 }, { "epoch": 0.6943901146105618, "grad_norm": 4.961398745333366, "learning_rate": 2.255883508842438e-06, "loss": 16.873, "step": 37988 }, { "epoch": 0.6944083938070083, "grad_norm": 6.979419837907777, "learning_rate": 2.2556360662438436e-06, "loss": 17.7335, "step": 37989 }, { "epoch": 0.6944266730034547, "grad_norm": 5.609155780564801, "learning_rate": 2.255388633264005e-06, "loss": 16.7777, "step": 37990 }, { "epoch": 0.6944449521999013, "grad_norm": 5.9550266587999685, "learning_rate": 2.2551412099037927e-06, "loss": 17.3552, "step": 37991 }, { "epoch": 0.6944632313963478, "grad_norm": 5.663257883785764, "learning_rate": 2.2548937961640726e-06, "loss": 16.9008, "step": 37992 }, { "epoch": 0.6944815105927944, "grad_norm": 5.745547308834891, "learning_rate": 2.2546463920457094e-06, "loss": 17.3301, "step": 37993 }, { "epoch": 0.6944997897892409, "grad_norm": 7.576044193124041, "learning_rate": 2.2543989975495745e-06, "loss": 17.6881, "step": 37994 }, { "epoch": 0.6945180689856874, "grad_norm": 5.081890090035778, "learning_rate": 2.2541516126765313e-06, "loss": 16.9066, "step": 37995 }, { "epoch": 0.694536348182134, "grad_norm": 5.484921121099614, "learning_rate": 2.2539042374274477e-06, "loss": 16.9118, "step": 37996 }, { "epoch": 0.6945546273785804, "grad_norm": 8.006215311428694, "learning_rate": 2.2536568718031935e-06, "loss": 17.5496, "step": 37997 }, { "epoch": 0.694572906575027, "grad_norm": 5.69975015578413, "learning_rate": 2.253409515804632e-06, "loss": 17.0978, "step": 37998 }, { "epoch": 0.6945911857714735, "grad_norm": 6.291162930729855, "learning_rate": 2.2531621694326333e-06, "loss": 17.4594, "step": 37999 }, { "epoch": 0.69460946496792, "grad_norm": 4.62972592892872, "learning_rate": 2.2529148326880636e-06, "loss": 16.6544, "step": 38000 }, { "epoch": 0.6946277441643666, "grad_norm": 7.496930609343013, "learning_rate": 2.252667505571786e-06, "loss": 17.9037, "step": 38001 }, { "epoch": 0.694646023360813, "grad_norm": 6.979999570840007, "learning_rate": 2.252420188084673e-06, "loss": 17.7309, "step": 38002 }, { "epoch": 0.6946643025572595, "grad_norm": 6.4093202386451615, "learning_rate": 2.252172880227589e-06, "loss": 17.3825, "step": 38003 }, { "epoch": 0.6946825817537061, "grad_norm": 5.44989700015233, "learning_rate": 2.2519255820013987e-06, "loss": 16.9227, "step": 38004 }, { "epoch": 0.6947008609501526, "grad_norm": 5.278642545803796, "learning_rate": 2.2516782934069704e-06, "loss": 16.9056, "step": 38005 }, { "epoch": 0.6947191401465992, "grad_norm": 7.814358487225157, "learning_rate": 2.2514310144451728e-06, "loss": 18.4371, "step": 38006 }, { "epoch": 0.6947374193430457, "grad_norm": 6.9942644656022726, "learning_rate": 2.25118374511687e-06, "loss": 17.6337, "step": 38007 }, { "epoch": 0.6947556985394922, "grad_norm": 8.963555077907852, "learning_rate": 2.2509364854229308e-06, "loss": 18.0067, "step": 38008 }, { "epoch": 0.6947739777359387, "grad_norm": 6.436004731472627, "learning_rate": 2.2506892353642207e-06, "loss": 17.4389, "step": 38009 }, { "epoch": 0.6947922569323852, "grad_norm": 6.7425091080148984, "learning_rate": 2.2504419949416043e-06, "loss": 17.6838, "step": 38010 }, { "epoch": 0.6948105361288318, "grad_norm": 6.57868575919606, "learning_rate": 2.2501947641559525e-06, "loss": 17.356, "step": 38011 }, { "epoch": 0.6948288153252783, "grad_norm": 5.809036882476088, "learning_rate": 2.249947543008129e-06, "loss": 17.0353, "step": 38012 }, { "epoch": 0.6948470945217248, "grad_norm": 6.477612652367327, "learning_rate": 2.2497003314989997e-06, "loss": 17.4741, "step": 38013 }, { "epoch": 0.6948653737181714, "grad_norm": 8.345255336531487, "learning_rate": 2.249453129629431e-06, "loss": 17.941, "step": 38014 }, { "epoch": 0.6948836529146178, "grad_norm": 7.165877779651617, "learning_rate": 2.249205937400291e-06, "loss": 17.6382, "step": 38015 }, { "epoch": 0.6949019321110644, "grad_norm": 6.27140002101458, "learning_rate": 2.248958754812447e-06, "loss": 17.3188, "step": 38016 }, { "epoch": 0.6949202113075109, "grad_norm": 5.574288350367032, "learning_rate": 2.2487115818667643e-06, "loss": 17.2853, "step": 38017 }, { "epoch": 0.6949384905039574, "grad_norm": 5.7524854311343665, "learning_rate": 2.248464418564106e-06, "loss": 16.8629, "step": 38018 }, { "epoch": 0.694956769700404, "grad_norm": 5.462258258238832, "learning_rate": 2.2482172649053443e-06, "loss": 17.1504, "step": 38019 }, { "epoch": 0.6949750488968505, "grad_norm": 5.706012012212169, "learning_rate": 2.247970120891341e-06, "loss": 17.2555, "step": 38020 }, { "epoch": 0.6949933280932971, "grad_norm": 6.07136817625636, "learning_rate": 2.2477229865229626e-06, "loss": 17.3304, "step": 38021 }, { "epoch": 0.6950116072897435, "grad_norm": 8.681897137978362, "learning_rate": 2.2474758618010777e-06, "loss": 18.7613, "step": 38022 }, { "epoch": 0.69502988648619, "grad_norm": 5.842836085900381, "learning_rate": 2.24722874672655e-06, "loss": 17.2431, "step": 38023 }, { "epoch": 0.6950481656826366, "grad_norm": 6.051178139795736, "learning_rate": 2.246981641300246e-06, "loss": 17.5499, "step": 38024 }, { "epoch": 0.6950664448790831, "grad_norm": 7.018837388267646, "learning_rate": 2.2467345455230342e-06, "loss": 17.362, "step": 38025 }, { "epoch": 0.6950847240755297, "grad_norm": 6.175683724869985, "learning_rate": 2.246487459395779e-06, "loss": 17.5807, "step": 38026 }, { "epoch": 0.6951030032719762, "grad_norm": 6.155728792574831, "learning_rate": 2.2462403829193445e-06, "loss": 17.2347, "step": 38027 }, { "epoch": 0.6951212824684226, "grad_norm": 4.699203975880803, "learning_rate": 2.2459933160946005e-06, "loss": 16.6388, "step": 38028 }, { "epoch": 0.6951395616648692, "grad_norm": 5.517910742428582, "learning_rate": 2.245746258922409e-06, "loss": 17.0582, "step": 38029 }, { "epoch": 0.6951578408613157, "grad_norm": 6.077553889684415, "learning_rate": 2.24549921140364e-06, "loss": 17.0539, "step": 38030 }, { "epoch": 0.6951761200577623, "grad_norm": 6.1615498048996225, "learning_rate": 2.2452521735391568e-06, "loss": 17.231, "step": 38031 }, { "epoch": 0.6951943992542088, "grad_norm": 6.80532483029246, "learning_rate": 2.2450051453298236e-06, "loss": 17.4464, "step": 38032 }, { "epoch": 0.6952126784506553, "grad_norm": 10.048109185561042, "learning_rate": 2.244758126776509e-06, "loss": 17.4307, "step": 38033 }, { "epoch": 0.6952309576471019, "grad_norm": 5.731229092400584, "learning_rate": 2.2445111178800794e-06, "loss": 17.4086, "step": 38034 }, { "epoch": 0.6952492368435483, "grad_norm": 6.812991579936542, "learning_rate": 2.244264118641398e-06, "loss": 17.7924, "step": 38035 }, { "epoch": 0.6952675160399949, "grad_norm": 7.282704687709104, "learning_rate": 2.2440171290613326e-06, "loss": 17.631, "step": 38036 }, { "epoch": 0.6952857952364414, "grad_norm": 5.93520899933475, "learning_rate": 2.2437701491407485e-06, "loss": 17.2714, "step": 38037 }, { "epoch": 0.6953040744328879, "grad_norm": 5.350309812966314, "learning_rate": 2.2435231788805096e-06, "loss": 17.0632, "step": 38038 }, { "epoch": 0.6953223536293345, "grad_norm": 7.900399098344681, "learning_rate": 2.243276218281484e-06, "loss": 17.8702, "step": 38039 }, { "epoch": 0.695340632825781, "grad_norm": 6.222655587688862, "learning_rate": 2.2430292673445365e-06, "loss": 17.4449, "step": 38040 }, { "epoch": 0.6953589120222275, "grad_norm": 7.015774651237437, "learning_rate": 2.2427823260705295e-06, "loss": 17.5294, "step": 38041 }, { "epoch": 0.695377191218674, "grad_norm": 6.180686201172638, "learning_rate": 2.242535394460332e-06, "loss": 17.3593, "step": 38042 }, { "epoch": 0.6953954704151205, "grad_norm": 7.202031281601083, "learning_rate": 2.24228847251481e-06, "loss": 17.69, "step": 38043 }, { "epoch": 0.6954137496115671, "grad_norm": 6.045295100864574, "learning_rate": 2.2420415602348254e-06, "loss": 17.0932, "step": 38044 }, { "epoch": 0.6954320288080136, "grad_norm": 6.075182751104066, "learning_rate": 2.2417946576212486e-06, "loss": 17.2817, "step": 38045 }, { "epoch": 0.6954503080044602, "grad_norm": 7.726749313952893, "learning_rate": 2.24154776467494e-06, "loss": 18.0052, "step": 38046 }, { "epoch": 0.6954685872009067, "grad_norm": 5.686508543202131, "learning_rate": 2.2413008813967686e-06, "loss": 16.8707, "step": 38047 }, { "epoch": 0.6954868663973531, "grad_norm": 5.475382005678733, "learning_rate": 2.2410540077875985e-06, "loss": 17.3344, "step": 38048 }, { "epoch": 0.6955051455937997, "grad_norm": 5.789885203723473, "learning_rate": 2.2408071438482933e-06, "loss": 17.2705, "step": 38049 }, { "epoch": 0.6955234247902462, "grad_norm": 6.516045360951008, "learning_rate": 2.240560289579721e-06, "loss": 17.3975, "step": 38050 }, { "epoch": 0.6955417039866928, "grad_norm": 5.910864107982904, "learning_rate": 2.240313444982743e-06, "loss": 17.1348, "step": 38051 }, { "epoch": 0.6955599831831393, "grad_norm": 5.106866691075993, "learning_rate": 2.2400666100582273e-06, "loss": 17.0265, "step": 38052 }, { "epoch": 0.6955782623795858, "grad_norm": 5.992785141805602, "learning_rate": 2.2398197848070406e-06, "loss": 17.1966, "step": 38053 }, { "epoch": 0.6955965415760323, "grad_norm": 6.33949622606377, "learning_rate": 2.2395729692300467e-06, "loss": 17.2391, "step": 38054 }, { "epoch": 0.6956148207724788, "grad_norm": 5.249377463717623, "learning_rate": 2.239326163328107e-06, "loss": 17.0263, "step": 38055 }, { "epoch": 0.6956330999689254, "grad_norm": 6.188287307464206, "learning_rate": 2.239079367102092e-06, "loss": 17.4522, "step": 38056 }, { "epoch": 0.6956513791653719, "grad_norm": 8.223662304526584, "learning_rate": 2.2388325805528644e-06, "loss": 17.5299, "step": 38057 }, { "epoch": 0.6956696583618184, "grad_norm": 7.104333013967637, "learning_rate": 2.2385858036812867e-06, "loss": 17.5422, "step": 38058 }, { "epoch": 0.695687937558265, "grad_norm": 7.1383750135937, "learning_rate": 2.2383390364882268e-06, "loss": 17.7878, "step": 38059 }, { "epoch": 0.6957062167547114, "grad_norm": 8.545238052295304, "learning_rate": 2.2380922789745503e-06, "loss": 17.7395, "step": 38060 }, { "epoch": 0.695724495951158, "grad_norm": 7.919550021184052, "learning_rate": 2.237845531141119e-06, "loss": 17.8595, "step": 38061 }, { "epoch": 0.6957427751476045, "grad_norm": 5.713946936958025, "learning_rate": 2.237598792988801e-06, "loss": 17.1989, "step": 38062 }, { "epoch": 0.695761054344051, "grad_norm": 5.933253989883948, "learning_rate": 2.2373520645184576e-06, "loss": 16.9716, "step": 38063 }, { "epoch": 0.6957793335404976, "grad_norm": 7.973021628478984, "learning_rate": 2.2371053457309576e-06, "loss": 18.4461, "step": 38064 }, { "epoch": 0.6957976127369441, "grad_norm": 6.571346496158423, "learning_rate": 2.236858636627164e-06, "loss": 17.1577, "step": 38065 }, { "epoch": 0.6958158919333907, "grad_norm": 8.2716925784614, "learning_rate": 2.236611937207939e-06, "loss": 17.7102, "step": 38066 }, { "epoch": 0.6958341711298371, "grad_norm": 6.236810780624218, "learning_rate": 2.236365247474151e-06, "loss": 17.3658, "step": 38067 }, { "epoch": 0.6958524503262836, "grad_norm": 5.35790371008807, "learning_rate": 2.2361185674266613e-06, "loss": 17.0441, "step": 38068 }, { "epoch": 0.6958707295227302, "grad_norm": 5.459981993695579, "learning_rate": 2.235871897066336e-06, "loss": 17.1836, "step": 38069 }, { "epoch": 0.6958890087191767, "grad_norm": 6.9008242452690025, "learning_rate": 2.2356252363940417e-06, "loss": 17.1691, "step": 38070 }, { "epoch": 0.6959072879156232, "grad_norm": 6.3625449081735095, "learning_rate": 2.2353785854106407e-06, "loss": 17.3927, "step": 38071 }, { "epoch": 0.6959255671120698, "grad_norm": 6.2837765385431705, "learning_rate": 2.2351319441169967e-06, "loss": 17.5429, "step": 38072 }, { "epoch": 0.6959438463085162, "grad_norm": 7.518096346057953, "learning_rate": 2.2348853125139767e-06, "loss": 17.7161, "step": 38073 }, { "epoch": 0.6959621255049628, "grad_norm": 7.186305303158035, "learning_rate": 2.234638690602442e-06, "loss": 17.6108, "step": 38074 }, { "epoch": 0.6959804047014093, "grad_norm": 6.320234594806942, "learning_rate": 2.234392078383261e-06, "loss": 17.4559, "step": 38075 }, { "epoch": 0.6959986838978558, "grad_norm": 6.001890585821499, "learning_rate": 2.234145475857295e-06, "loss": 17.2209, "step": 38076 }, { "epoch": 0.6960169630943024, "grad_norm": 5.7873023247068955, "learning_rate": 2.233898883025408e-06, "loss": 17.1871, "step": 38077 }, { "epoch": 0.6960352422907489, "grad_norm": 5.745083933697109, "learning_rate": 2.2336522998884653e-06, "loss": 17.095, "step": 38078 }, { "epoch": 0.6960535214871955, "grad_norm": 6.710409609784765, "learning_rate": 2.2334057264473324e-06, "loss": 17.5401, "step": 38079 }, { "epoch": 0.6960718006836419, "grad_norm": 7.361105869428507, "learning_rate": 2.233159162702871e-06, "loss": 17.728, "step": 38080 }, { "epoch": 0.6960900798800884, "grad_norm": 5.290646088684291, "learning_rate": 2.232912608655949e-06, "loss": 16.9667, "step": 38081 }, { "epoch": 0.696108359076535, "grad_norm": 6.703403618939381, "learning_rate": 2.232666064307427e-06, "loss": 17.3514, "step": 38082 }, { "epoch": 0.6961266382729815, "grad_norm": 6.519796790277472, "learning_rate": 2.232419529658169e-06, "loss": 17.3135, "step": 38083 }, { "epoch": 0.6961449174694281, "grad_norm": 6.373320989832013, "learning_rate": 2.2321730047090427e-06, "loss": 17.3366, "step": 38084 }, { "epoch": 0.6961631966658746, "grad_norm": 6.608627571211429, "learning_rate": 2.231926489460909e-06, "loss": 17.5397, "step": 38085 }, { "epoch": 0.696181475862321, "grad_norm": 5.538194569243046, "learning_rate": 2.2316799839146318e-06, "loss": 17.2277, "step": 38086 }, { "epoch": 0.6961997550587676, "grad_norm": 7.13495207138713, "learning_rate": 2.2314334880710752e-06, "loss": 17.8351, "step": 38087 }, { "epoch": 0.6962180342552141, "grad_norm": 10.975011334511258, "learning_rate": 2.2311870019311066e-06, "loss": 18.5017, "step": 38088 }, { "epoch": 0.6962363134516607, "grad_norm": 5.378614140157947, "learning_rate": 2.230940525495585e-06, "loss": 17.0653, "step": 38089 }, { "epoch": 0.6962545926481072, "grad_norm": 7.859606660199873, "learning_rate": 2.2306940587653785e-06, "loss": 17.1927, "step": 38090 }, { "epoch": 0.6962728718445537, "grad_norm": 6.70516673563863, "learning_rate": 2.2304476017413474e-06, "loss": 17.6382, "step": 38091 }, { "epoch": 0.6962911510410003, "grad_norm": 7.092384110944724, "learning_rate": 2.2302011544243584e-06, "loss": 17.3053, "step": 38092 }, { "epoch": 0.6963094302374467, "grad_norm": 6.6679309861786695, "learning_rate": 2.2299547168152746e-06, "loss": 17.2452, "step": 38093 }, { "epoch": 0.6963277094338933, "grad_norm": 6.674884960904555, "learning_rate": 2.229708288914957e-06, "loss": 17.5195, "step": 38094 }, { "epoch": 0.6963459886303398, "grad_norm": 5.996935823789136, "learning_rate": 2.2294618707242733e-06, "loss": 17.1461, "step": 38095 }, { "epoch": 0.6963642678267863, "grad_norm": 6.082045272067239, "learning_rate": 2.2292154622440833e-06, "loss": 17.2224, "step": 38096 }, { "epoch": 0.6963825470232329, "grad_norm": 6.554996972474197, "learning_rate": 2.228969063475253e-06, "loss": 17.7634, "step": 38097 }, { "epoch": 0.6964008262196794, "grad_norm": 4.88557099937653, "learning_rate": 2.2287226744186475e-06, "loss": 16.906, "step": 38098 }, { "epoch": 0.696419105416126, "grad_norm": 5.161362724833583, "learning_rate": 2.228476295075128e-06, "loss": 16.9377, "step": 38099 }, { "epoch": 0.6964373846125724, "grad_norm": 5.63544471143035, "learning_rate": 2.2282299254455577e-06, "loss": 17.2243, "step": 38100 }, { "epoch": 0.6964556638090189, "grad_norm": 8.201776634471248, "learning_rate": 2.2279835655308018e-06, "loss": 18.1299, "step": 38101 }, { "epoch": 0.6964739430054655, "grad_norm": 7.376932765922294, "learning_rate": 2.227737215331724e-06, "loss": 17.7039, "step": 38102 }, { "epoch": 0.696492222201912, "grad_norm": 5.5466587526392495, "learning_rate": 2.227490874849184e-06, "loss": 17.0789, "step": 38103 }, { "epoch": 0.6965105013983586, "grad_norm": 5.608752911464146, "learning_rate": 2.22724454408405e-06, "loss": 17.0477, "step": 38104 }, { "epoch": 0.696528780594805, "grad_norm": 4.530147515476254, "learning_rate": 2.226998223037182e-06, "loss": 16.8754, "step": 38105 }, { "epoch": 0.6965470597912515, "grad_norm": 6.408333161241365, "learning_rate": 2.2267519117094436e-06, "loss": 17.6034, "step": 38106 }, { "epoch": 0.6965653389876981, "grad_norm": 5.9714556645131704, "learning_rate": 2.226505610101701e-06, "loss": 17.373, "step": 38107 }, { "epoch": 0.6965836181841446, "grad_norm": 6.9999771733069345, "learning_rate": 2.2262593182148145e-06, "loss": 17.5832, "step": 38108 }, { "epoch": 0.6966018973805912, "grad_norm": 5.668284009407327, "learning_rate": 2.2260130360496503e-06, "loss": 17.2845, "step": 38109 }, { "epoch": 0.6966201765770377, "grad_norm": 7.233287366893161, "learning_rate": 2.225766763607069e-06, "loss": 17.9516, "step": 38110 }, { "epoch": 0.6966384557734842, "grad_norm": 6.4693191323885255, "learning_rate": 2.2255205008879327e-06, "loss": 17.3409, "step": 38111 }, { "epoch": 0.6966567349699307, "grad_norm": 6.21279374619879, "learning_rate": 2.225274247893108e-06, "loss": 17.1642, "step": 38112 }, { "epoch": 0.6966750141663772, "grad_norm": 6.17748478363987, "learning_rate": 2.225028004623455e-06, "loss": 17.2138, "step": 38113 }, { "epoch": 0.6966932933628238, "grad_norm": 4.931868184948052, "learning_rate": 2.2247817710798395e-06, "loss": 17.1184, "step": 38114 }, { "epoch": 0.6967115725592703, "grad_norm": 7.982010896634175, "learning_rate": 2.224535547263122e-06, "loss": 17.9758, "step": 38115 }, { "epoch": 0.6967298517557168, "grad_norm": 7.037224000194383, "learning_rate": 2.2242893331741675e-06, "loss": 17.6834, "step": 38116 }, { "epoch": 0.6967481309521634, "grad_norm": 6.177118534577082, "learning_rate": 2.2240431288138364e-06, "loss": 17.3534, "step": 38117 }, { "epoch": 0.6967664101486098, "grad_norm": 7.334641091595931, "learning_rate": 2.223796934182995e-06, "loss": 17.6228, "step": 38118 }, { "epoch": 0.6967846893450564, "grad_norm": 6.138470516966552, "learning_rate": 2.2235507492825047e-06, "loss": 17.0791, "step": 38119 }, { "epoch": 0.6968029685415029, "grad_norm": 6.191051757377408, "learning_rate": 2.2233045741132265e-06, "loss": 17.3157, "step": 38120 }, { "epoch": 0.6968212477379494, "grad_norm": 6.85581451571036, "learning_rate": 2.223058408676026e-06, "loss": 17.7843, "step": 38121 }, { "epoch": 0.696839526934396, "grad_norm": 6.321482446183835, "learning_rate": 2.2228122529717633e-06, "loss": 17.2913, "step": 38122 }, { "epoch": 0.6968578061308425, "grad_norm": 5.695207065838895, "learning_rate": 2.222566107001303e-06, "loss": 17.09, "step": 38123 }, { "epoch": 0.6968760853272891, "grad_norm": 6.523087883189765, "learning_rate": 2.2223199707655087e-06, "loss": 17.445, "step": 38124 }, { "epoch": 0.6968943645237355, "grad_norm": 6.433585962936718, "learning_rate": 2.22207384426524e-06, "loss": 17.2578, "step": 38125 }, { "epoch": 0.696912643720182, "grad_norm": 6.99414748344536, "learning_rate": 2.221827727501364e-06, "loss": 17.542, "step": 38126 }, { "epoch": 0.6969309229166286, "grad_norm": 7.001687684866369, "learning_rate": 2.22158162047474e-06, "loss": 17.4958, "step": 38127 }, { "epoch": 0.6969492021130751, "grad_norm": 5.204418433769111, "learning_rate": 2.2213355231862295e-06, "loss": 17.0629, "step": 38128 }, { "epoch": 0.6969674813095217, "grad_norm": 5.826744119862982, "learning_rate": 2.221089435636699e-06, "loss": 17.3455, "step": 38129 }, { "epoch": 0.6969857605059682, "grad_norm": 7.014730016583647, "learning_rate": 2.2208433578270087e-06, "loss": 17.4215, "step": 38130 }, { "epoch": 0.6970040397024146, "grad_norm": 6.19341027977243, "learning_rate": 2.2205972897580197e-06, "loss": 17.5437, "step": 38131 }, { "epoch": 0.6970223188988612, "grad_norm": 6.875510408888986, "learning_rate": 2.2203512314305957e-06, "loss": 17.2603, "step": 38132 }, { "epoch": 0.6970405980953077, "grad_norm": 6.857026331127169, "learning_rate": 2.2201051828456016e-06, "loss": 17.9059, "step": 38133 }, { "epoch": 0.6970588772917543, "grad_norm": 5.459369261689002, "learning_rate": 2.2198591440038957e-06, "loss": 16.9374, "step": 38134 }, { "epoch": 0.6970771564882008, "grad_norm": 5.572353372135374, "learning_rate": 2.2196131149063437e-06, "loss": 17.2223, "step": 38135 }, { "epoch": 0.6970954356846473, "grad_norm": 6.6196116171316906, "learning_rate": 2.2193670955538054e-06, "loss": 17.3145, "step": 38136 }, { "epoch": 0.6971137148810939, "grad_norm": 6.879194353046711, "learning_rate": 2.2191210859471453e-06, "loss": 17.7924, "step": 38137 }, { "epoch": 0.6971319940775403, "grad_norm": 7.415552723948976, "learning_rate": 2.218875086087225e-06, "loss": 18.0582, "step": 38138 }, { "epoch": 0.6971502732739868, "grad_norm": 5.669951141782607, "learning_rate": 2.2186290959749036e-06, "loss": 16.9583, "step": 38139 }, { "epoch": 0.6971685524704334, "grad_norm": 6.522907333009487, "learning_rate": 2.218383115611048e-06, "loss": 17.5157, "step": 38140 }, { "epoch": 0.6971868316668799, "grad_norm": 6.581546526445597, "learning_rate": 2.2181371449965165e-06, "loss": 17.4909, "step": 38141 }, { "epoch": 0.6972051108633265, "grad_norm": 6.021636360292265, "learning_rate": 2.2178911841321733e-06, "loss": 17.3803, "step": 38142 }, { "epoch": 0.697223390059773, "grad_norm": 7.26831033931831, "learning_rate": 2.2176452330188815e-06, "loss": 17.623, "step": 38143 }, { "epoch": 0.6972416692562194, "grad_norm": 7.05820829933917, "learning_rate": 2.2173992916575015e-06, "loss": 17.6879, "step": 38144 }, { "epoch": 0.697259948452666, "grad_norm": 6.151190468324801, "learning_rate": 2.2171533600488936e-06, "loss": 17.316, "step": 38145 }, { "epoch": 0.6972782276491125, "grad_norm": 6.420961631220303, "learning_rate": 2.2169074381939244e-06, "loss": 17.2578, "step": 38146 }, { "epoch": 0.6972965068455591, "grad_norm": 5.345852766685427, "learning_rate": 2.2166615260934526e-06, "loss": 17.0919, "step": 38147 }, { "epoch": 0.6973147860420056, "grad_norm": 6.0186397105822955, "learning_rate": 2.2164156237483385e-06, "loss": 17.3927, "step": 38148 }, { "epoch": 0.6973330652384521, "grad_norm": 5.615116604928378, "learning_rate": 2.216169731159448e-06, "loss": 17.0648, "step": 38149 }, { "epoch": 0.6973513444348987, "grad_norm": 5.35598400565419, "learning_rate": 2.2159238483276395e-06, "loss": 17.0494, "step": 38150 }, { "epoch": 0.6973696236313451, "grad_norm": 5.96620000859655, "learning_rate": 2.2156779752537762e-06, "loss": 17.084, "step": 38151 }, { "epoch": 0.6973879028277917, "grad_norm": 6.0038215553161685, "learning_rate": 2.215432111938722e-06, "loss": 17.3786, "step": 38152 }, { "epoch": 0.6974061820242382, "grad_norm": 5.265112960267774, "learning_rate": 2.2151862583833344e-06, "loss": 16.9741, "step": 38153 }, { "epoch": 0.6974244612206847, "grad_norm": 6.069883567128838, "learning_rate": 2.2149404145884796e-06, "loss": 17.5086, "step": 38154 }, { "epoch": 0.6974427404171313, "grad_norm": 7.0417790510318286, "learning_rate": 2.214694580555016e-06, "loss": 17.7134, "step": 38155 }, { "epoch": 0.6974610196135778, "grad_norm": 6.463206588293404, "learning_rate": 2.2144487562838058e-06, "loss": 17.4213, "step": 38156 }, { "epoch": 0.6974792988100244, "grad_norm": 7.11693374730452, "learning_rate": 2.214202941775712e-06, "loss": 17.7441, "step": 38157 }, { "epoch": 0.6974975780064708, "grad_norm": 6.598174331555413, "learning_rate": 2.213957137031595e-06, "loss": 17.5556, "step": 38158 }, { "epoch": 0.6975158572029173, "grad_norm": 16.448997935914896, "learning_rate": 2.213711342052315e-06, "loss": 17.777, "step": 38159 }, { "epoch": 0.6975341363993639, "grad_norm": 7.103955243150268, "learning_rate": 2.2134655568387346e-06, "loss": 17.8413, "step": 38160 }, { "epoch": 0.6975524155958104, "grad_norm": 6.0160129103261974, "learning_rate": 2.213219781391718e-06, "loss": 17.2609, "step": 38161 }, { "epoch": 0.697570694792257, "grad_norm": 5.717548922354652, "learning_rate": 2.2129740157121223e-06, "loss": 17.0202, "step": 38162 }, { "epoch": 0.6975889739887035, "grad_norm": 5.510152245131951, "learning_rate": 2.212728259800812e-06, "loss": 17.1993, "step": 38163 }, { "epoch": 0.6976072531851499, "grad_norm": 6.320234548845445, "learning_rate": 2.212482513658648e-06, "loss": 17.5413, "step": 38164 }, { "epoch": 0.6976255323815965, "grad_norm": 6.083612017283902, "learning_rate": 2.212236777286489e-06, "loss": 17.0971, "step": 38165 }, { "epoch": 0.697643811578043, "grad_norm": 7.751862088481447, "learning_rate": 2.2119910506851994e-06, "loss": 17.8026, "step": 38166 }, { "epoch": 0.6976620907744896, "grad_norm": 5.463369200975475, "learning_rate": 2.211745333855638e-06, "loss": 17.1264, "step": 38167 }, { "epoch": 0.6976803699709361, "grad_norm": 6.2038329882440495, "learning_rate": 2.2114996267986687e-06, "loss": 17.3128, "step": 38168 }, { "epoch": 0.6976986491673826, "grad_norm": 6.284093051368003, "learning_rate": 2.2112539295151497e-06, "loss": 17.0153, "step": 38169 }, { "epoch": 0.6977169283638291, "grad_norm": 6.9671792180879155, "learning_rate": 2.2110082420059438e-06, "loss": 17.5577, "step": 38170 }, { "epoch": 0.6977352075602756, "grad_norm": 6.646828913687292, "learning_rate": 2.2107625642719135e-06, "loss": 17.4943, "step": 38171 }, { "epoch": 0.6977534867567222, "grad_norm": 9.081771626232234, "learning_rate": 2.2105168963139186e-06, "loss": 18.0991, "step": 38172 }, { "epoch": 0.6977717659531687, "grad_norm": 6.4484723546800895, "learning_rate": 2.210271238132818e-06, "loss": 17.5428, "step": 38173 }, { "epoch": 0.6977900451496152, "grad_norm": 6.98593070456705, "learning_rate": 2.2100255897294766e-06, "loss": 17.4179, "step": 38174 }, { "epoch": 0.6978083243460618, "grad_norm": 8.055599577322461, "learning_rate": 2.209779951104753e-06, "loss": 17.8086, "step": 38175 }, { "epoch": 0.6978266035425083, "grad_norm": 6.611349863068788, "learning_rate": 2.2095343222595067e-06, "loss": 17.2726, "step": 38176 }, { "epoch": 0.6978448827389548, "grad_norm": 9.3079662024674, "learning_rate": 2.2092887031946024e-06, "loss": 18.751, "step": 38177 }, { "epoch": 0.6978631619354013, "grad_norm": 8.054447682879415, "learning_rate": 2.2090430939108974e-06, "loss": 18.1487, "step": 38178 }, { "epoch": 0.6978814411318478, "grad_norm": 4.902426087294967, "learning_rate": 2.2087974944092545e-06, "loss": 16.8827, "step": 38179 }, { "epoch": 0.6978997203282944, "grad_norm": 11.047145240321562, "learning_rate": 2.208551904690535e-06, "loss": 18.7158, "step": 38180 }, { "epoch": 0.6979179995247409, "grad_norm": 5.216174493618522, "learning_rate": 2.2083063247555997e-06, "loss": 17.1264, "step": 38181 }, { "epoch": 0.6979362787211875, "grad_norm": 5.728923762810907, "learning_rate": 2.208060754605307e-06, "loss": 17.3028, "step": 38182 }, { "epoch": 0.6979545579176339, "grad_norm": 6.549144541469588, "learning_rate": 2.20781519424052e-06, "loss": 17.5854, "step": 38183 }, { "epoch": 0.6979728371140804, "grad_norm": 7.134192476781358, "learning_rate": 2.207569643662097e-06, "loss": 17.9063, "step": 38184 }, { "epoch": 0.697991116310527, "grad_norm": 5.763577227516417, "learning_rate": 2.207324102870902e-06, "loss": 17.1776, "step": 38185 }, { "epoch": 0.6980093955069735, "grad_norm": 6.252926111864721, "learning_rate": 2.207078571867791e-06, "loss": 17.5044, "step": 38186 }, { "epoch": 0.6980276747034201, "grad_norm": 5.221353335538181, "learning_rate": 2.20683305065363e-06, "loss": 17.1672, "step": 38187 }, { "epoch": 0.6980459538998666, "grad_norm": 6.0146190557127435, "learning_rate": 2.2065875392292744e-06, "loss": 17.3761, "step": 38188 }, { "epoch": 0.698064233096313, "grad_norm": 7.579196812841037, "learning_rate": 2.20634203759559e-06, "loss": 17.9883, "step": 38189 }, { "epoch": 0.6980825122927596, "grad_norm": 7.0051284358726535, "learning_rate": 2.2060965457534316e-06, "loss": 17.4954, "step": 38190 }, { "epoch": 0.6981007914892061, "grad_norm": 5.828290831231029, "learning_rate": 2.205851063703664e-06, "loss": 17.2144, "step": 38191 }, { "epoch": 0.6981190706856527, "grad_norm": 7.179180396770473, "learning_rate": 2.2056055914471463e-06, "loss": 17.8746, "step": 38192 }, { "epoch": 0.6981373498820992, "grad_norm": 5.735323104655032, "learning_rate": 2.205360128984737e-06, "loss": 17.4493, "step": 38193 }, { "epoch": 0.6981556290785457, "grad_norm": 6.567362047221822, "learning_rate": 2.205114676317299e-06, "loss": 17.0967, "step": 38194 }, { "epoch": 0.6981739082749923, "grad_norm": 5.617846777937988, "learning_rate": 2.2048692334456905e-06, "loss": 17.4016, "step": 38195 }, { "epoch": 0.6981921874714387, "grad_norm": 6.025402355513398, "learning_rate": 2.204623800370772e-06, "loss": 17.3442, "step": 38196 }, { "epoch": 0.6982104666678853, "grad_norm": 5.086389336002116, "learning_rate": 2.204378377093407e-06, "loss": 17.0563, "step": 38197 }, { "epoch": 0.6982287458643318, "grad_norm": 6.744047110923577, "learning_rate": 2.204132963614451e-06, "loss": 17.3321, "step": 38198 }, { "epoch": 0.6982470250607783, "grad_norm": 6.037970610093363, "learning_rate": 2.2038875599347687e-06, "loss": 17.454, "step": 38199 }, { "epoch": 0.6982653042572249, "grad_norm": 5.611830030799309, "learning_rate": 2.203642166055217e-06, "loss": 17.1535, "step": 38200 }, { "epoch": 0.6982835834536714, "grad_norm": 5.149089364304057, "learning_rate": 2.203396781976656e-06, "loss": 17.1143, "step": 38201 }, { "epoch": 0.698301862650118, "grad_norm": 6.347213785149604, "learning_rate": 2.2031514076999477e-06, "loss": 17.6188, "step": 38202 }, { "epoch": 0.6983201418465644, "grad_norm": 6.514818161941118, "learning_rate": 2.2029060432259512e-06, "loss": 17.5483, "step": 38203 }, { "epoch": 0.6983384210430109, "grad_norm": 5.617431547063992, "learning_rate": 2.2026606885555247e-06, "loss": 17.1382, "step": 38204 }, { "epoch": 0.6983567002394575, "grad_norm": 5.280861481953758, "learning_rate": 2.2024153436895294e-06, "loss": 17.0619, "step": 38205 }, { "epoch": 0.698374979435904, "grad_norm": 6.33172069899981, "learning_rate": 2.202170008628828e-06, "loss": 17.3408, "step": 38206 }, { "epoch": 0.6983932586323505, "grad_norm": 6.278623279792009, "learning_rate": 2.2019246833742754e-06, "loss": 17.5359, "step": 38207 }, { "epoch": 0.6984115378287971, "grad_norm": 6.666625188308324, "learning_rate": 2.2016793679267356e-06, "loss": 17.6234, "step": 38208 }, { "epoch": 0.6984298170252435, "grad_norm": 6.934584734928003, "learning_rate": 2.201434062287068e-06, "loss": 17.4466, "step": 38209 }, { "epoch": 0.6984480962216901, "grad_norm": 6.777261939880844, "learning_rate": 2.2011887664561284e-06, "loss": 17.6425, "step": 38210 }, { "epoch": 0.6984663754181366, "grad_norm": 6.609220442178052, "learning_rate": 2.2009434804347813e-06, "loss": 17.7133, "step": 38211 }, { "epoch": 0.6984846546145831, "grad_norm": 5.654055218756762, "learning_rate": 2.2006982042238837e-06, "loss": 17.1388, "step": 38212 }, { "epoch": 0.6985029338110297, "grad_norm": 6.037483275715292, "learning_rate": 2.2004529378242954e-06, "loss": 17.3141, "step": 38213 }, { "epoch": 0.6985212130074762, "grad_norm": 7.483917218035124, "learning_rate": 2.200207681236876e-06, "loss": 18.057, "step": 38214 }, { "epoch": 0.6985394922039228, "grad_norm": 6.891963597364052, "learning_rate": 2.199962434462486e-06, "loss": 17.7146, "step": 38215 }, { "epoch": 0.6985577714003692, "grad_norm": 5.722157687697769, "learning_rate": 2.199717197501986e-06, "loss": 17.0253, "step": 38216 }, { "epoch": 0.6985760505968157, "grad_norm": 6.484455131592145, "learning_rate": 2.199471970356235e-06, "loss": 17.3771, "step": 38217 }, { "epoch": 0.6985943297932623, "grad_norm": 7.15200375644363, "learning_rate": 2.199226753026089e-06, "loss": 18.038, "step": 38218 }, { "epoch": 0.6986126089897088, "grad_norm": 7.861422665432673, "learning_rate": 2.1989815455124125e-06, "loss": 17.7721, "step": 38219 }, { "epoch": 0.6986308881861554, "grad_norm": 7.262836285226942, "learning_rate": 2.198736347816063e-06, "loss": 17.4734, "step": 38220 }, { "epoch": 0.6986491673826019, "grad_norm": 6.434898877420011, "learning_rate": 2.198491159937897e-06, "loss": 17.6421, "step": 38221 }, { "epoch": 0.6986674465790483, "grad_norm": 6.334205464402284, "learning_rate": 2.1982459818787787e-06, "loss": 17.2133, "step": 38222 }, { "epoch": 0.6986857257754949, "grad_norm": 6.03234301169846, "learning_rate": 2.198000813639563e-06, "loss": 17.6023, "step": 38223 }, { "epoch": 0.6987040049719414, "grad_norm": 6.2804992456049495, "learning_rate": 2.1977556552211117e-06, "loss": 17.5412, "step": 38224 }, { "epoch": 0.698722284168388, "grad_norm": 6.0386653504181425, "learning_rate": 2.197510506624285e-06, "loss": 17.2506, "step": 38225 }, { "epoch": 0.6987405633648345, "grad_norm": 6.295001037891138, "learning_rate": 2.1972653678499408e-06, "loss": 17.435, "step": 38226 }, { "epoch": 0.698758842561281, "grad_norm": 7.555013228985665, "learning_rate": 2.1970202388989364e-06, "loss": 18.2381, "step": 38227 }, { "epoch": 0.6987771217577275, "grad_norm": 6.385599611866018, "learning_rate": 2.196775119772135e-06, "loss": 17.7409, "step": 38228 }, { "epoch": 0.698795400954174, "grad_norm": 5.6871599627820775, "learning_rate": 2.196530010470392e-06, "loss": 17.2682, "step": 38229 }, { "epoch": 0.6988136801506206, "grad_norm": 5.706873811587035, "learning_rate": 2.196284910994569e-06, "loss": 17.2213, "step": 38230 }, { "epoch": 0.6988319593470671, "grad_norm": 5.830940967109914, "learning_rate": 2.196039821345525e-06, "loss": 17.0717, "step": 38231 }, { "epoch": 0.6988502385435136, "grad_norm": 5.329952120281645, "learning_rate": 2.1957947415241153e-06, "loss": 17.2604, "step": 38232 }, { "epoch": 0.6988685177399602, "grad_norm": 7.462071911885408, "learning_rate": 2.1955496715312026e-06, "loss": 17.6227, "step": 38233 }, { "epoch": 0.6988867969364067, "grad_norm": 7.1203761000921535, "learning_rate": 2.1953046113676465e-06, "loss": 17.6398, "step": 38234 }, { "epoch": 0.6989050761328532, "grad_norm": 5.668242084590523, "learning_rate": 2.1950595610343024e-06, "loss": 17.2376, "step": 38235 }, { "epoch": 0.6989233553292997, "grad_norm": 5.936327508195207, "learning_rate": 2.1948145205320327e-06, "loss": 17.3377, "step": 38236 }, { "epoch": 0.6989416345257462, "grad_norm": 4.890677844041308, "learning_rate": 2.194569489861695e-06, "loss": 16.8148, "step": 38237 }, { "epoch": 0.6989599137221928, "grad_norm": 5.103800508046784, "learning_rate": 2.194324469024146e-06, "loss": 16.9864, "step": 38238 }, { "epoch": 0.6989781929186393, "grad_norm": 6.519852096729493, "learning_rate": 2.194079458020248e-06, "loss": 17.6841, "step": 38239 }, { "epoch": 0.6989964721150859, "grad_norm": 7.745640267328883, "learning_rate": 2.193834456850856e-06, "loss": 17.4819, "step": 38240 }, { "epoch": 0.6990147513115323, "grad_norm": 6.2171584725239555, "learning_rate": 2.1935894655168327e-06, "loss": 17.4497, "step": 38241 }, { "epoch": 0.6990330305079788, "grad_norm": 6.5257870390156105, "learning_rate": 2.193344484019033e-06, "loss": 17.3768, "step": 38242 }, { "epoch": 0.6990513097044254, "grad_norm": 6.616503498122293, "learning_rate": 2.1930995123583175e-06, "loss": 17.3262, "step": 38243 }, { "epoch": 0.6990695889008719, "grad_norm": 6.08625868227863, "learning_rate": 2.192854550535546e-06, "loss": 17.2484, "step": 38244 }, { "epoch": 0.6990878680973185, "grad_norm": 6.134850377860669, "learning_rate": 2.1926095985515767e-06, "loss": 17.439, "step": 38245 }, { "epoch": 0.699106147293765, "grad_norm": 7.961994068168886, "learning_rate": 2.192364656407264e-06, "loss": 18.3408, "step": 38246 }, { "epoch": 0.6991244264902114, "grad_norm": 5.298357147990222, "learning_rate": 2.1921197241034724e-06, "loss": 17.1464, "step": 38247 }, { "epoch": 0.699142705686658, "grad_norm": 5.756441973717702, "learning_rate": 2.1918748016410575e-06, "loss": 17.2784, "step": 38248 }, { "epoch": 0.6991609848831045, "grad_norm": 5.346857314202215, "learning_rate": 2.1916298890208754e-06, "loss": 17.1148, "step": 38249 }, { "epoch": 0.6991792640795511, "grad_norm": 6.8796872613915605, "learning_rate": 2.191384986243787e-06, "loss": 17.7966, "step": 38250 }, { "epoch": 0.6991975432759976, "grad_norm": 5.873792196737131, "learning_rate": 2.1911400933106524e-06, "loss": 17.3716, "step": 38251 }, { "epoch": 0.6992158224724441, "grad_norm": 6.122563137005977, "learning_rate": 2.1908952102223257e-06, "loss": 17.4029, "step": 38252 }, { "epoch": 0.6992341016688907, "grad_norm": 6.399269353460306, "learning_rate": 2.19065033697967e-06, "loss": 17.2599, "step": 38253 }, { "epoch": 0.6992523808653371, "grad_norm": 5.639316083060597, "learning_rate": 2.190405473583541e-06, "loss": 17.3134, "step": 38254 }, { "epoch": 0.6992706600617837, "grad_norm": 5.262876573225846, "learning_rate": 2.1901606200347945e-06, "loss": 16.9739, "step": 38255 }, { "epoch": 0.6992889392582302, "grad_norm": 5.994079152278737, "learning_rate": 2.189915776334294e-06, "loss": 17.2571, "step": 38256 }, { "epoch": 0.6993072184546767, "grad_norm": 7.698640758355332, "learning_rate": 2.1896709424828946e-06, "loss": 18.1135, "step": 38257 }, { "epoch": 0.6993254976511233, "grad_norm": 5.597568747265318, "learning_rate": 2.1894261184814525e-06, "loss": 17.3634, "step": 38258 }, { "epoch": 0.6993437768475698, "grad_norm": 6.314694790326451, "learning_rate": 2.189181304330828e-06, "loss": 17.5091, "step": 38259 }, { "epoch": 0.6993620560440164, "grad_norm": 5.090514152377255, "learning_rate": 2.1889365000318795e-06, "loss": 16.9098, "step": 38260 }, { "epoch": 0.6993803352404628, "grad_norm": 5.917127260777205, "learning_rate": 2.188691705585466e-06, "loss": 17.4043, "step": 38261 }, { "epoch": 0.6993986144369093, "grad_norm": 7.1745798180953875, "learning_rate": 2.188446920992445e-06, "loss": 17.4898, "step": 38262 }, { "epoch": 0.6994168936333559, "grad_norm": 6.21596382751203, "learning_rate": 2.1882021462536707e-06, "loss": 17.3866, "step": 38263 }, { "epoch": 0.6994351728298024, "grad_norm": 5.862046160122743, "learning_rate": 2.187957381370006e-06, "loss": 17.1059, "step": 38264 }, { "epoch": 0.699453452026249, "grad_norm": 5.983084830479595, "learning_rate": 2.1877126263423064e-06, "loss": 17.1148, "step": 38265 }, { "epoch": 0.6994717312226955, "grad_norm": 5.196054305262143, "learning_rate": 2.187467881171429e-06, "loss": 17.156, "step": 38266 }, { "epoch": 0.6994900104191419, "grad_norm": 6.262226377282904, "learning_rate": 2.1872231458582337e-06, "loss": 17.1199, "step": 38267 }, { "epoch": 0.6995082896155885, "grad_norm": 4.977633417898752, "learning_rate": 2.186978420403576e-06, "loss": 16.7676, "step": 38268 }, { "epoch": 0.699526568812035, "grad_norm": 5.882831992229316, "learning_rate": 2.1867337048083143e-06, "loss": 17.0668, "step": 38269 }, { "epoch": 0.6995448480084816, "grad_norm": 5.265893042042695, "learning_rate": 2.1864889990733083e-06, "loss": 17.2826, "step": 38270 }, { "epoch": 0.6995631272049281, "grad_norm": 7.001330837304079, "learning_rate": 2.186244303199415e-06, "loss": 17.556, "step": 38271 }, { "epoch": 0.6995814064013746, "grad_norm": 5.535500773536348, "learning_rate": 2.185999617187489e-06, "loss": 17.1245, "step": 38272 }, { "epoch": 0.6995996855978212, "grad_norm": 8.559920546295727, "learning_rate": 2.185754941038393e-06, "loss": 18.283, "step": 38273 }, { "epoch": 0.6996179647942676, "grad_norm": 6.339743996082191, "learning_rate": 2.1855102747529787e-06, "loss": 17.2606, "step": 38274 }, { "epoch": 0.6996362439907141, "grad_norm": 5.012290791247594, "learning_rate": 2.1852656183321086e-06, "loss": 16.7948, "step": 38275 }, { "epoch": 0.6996545231871607, "grad_norm": 4.954311642663914, "learning_rate": 2.1850209717766384e-06, "loss": 16.7965, "step": 38276 }, { "epoch": 0.6996728023836072, "grad_norm": 5.583415591285332, "learning_rate": 2.184776335087424e-06, "loss": 17.208, "step": 38277 }, { "epoch": 0.6996910815800538, "grad_norm": 6.01563287737129, "learning_rate": 2.184531708265324e-06, "loss": 17.5458, "step": 38278 }, { "epoch": 0.6997093607765003, "grad_norm": 8.063768678951456, "learning_rate": 2.1842870913111972e-06, "loss": 18.003, "step": 38279 }, { "epoch": 0.6997276399729467, "grad_norm": 6.561831638455678, "learning_rate": 2.1840424842258987e-06, "loss": 17.5627, "step": 38280 }, { "epoch": 0.6997459191693933, "grad_norm": 7.17038930268246, "learning_rate": 2.1837978870102887e-06, "loss": 17.2883, "step": 38281 }, { "epoch": 0.6997641983658398, "grad_norm": 5.24901246676679, "learning_rate": 2.183553299665223e-06, "loss": 17.0047, "step": 38282 }, { "epoch": 0.6997824775622864, "grad_norm": 5.860227390377085, "learning_rate": 2.1833087221915565e-06, "loss": 17.2893, "step": 38283 }, { "epoch": 0.6998007567587329, "grad_norm": 5.38487561208003, "learning_rate": 2.1830641545901504e-06, "loss": 16.9478, "step": 38284 }, { "epoch": 0.6998190359551794, "grad_norm": 6.509015283094326, "learning_rate": 2.18281959686186e-06, "loss": 17.4289, "step": 38285 }, { "epoch": 0.699837315151626, "grad_norm": 7.378246764231507, "learning_rate": 2.1825750490075404e-06, "loss": 18.0842, "step": 38286 }, { "epoch": 0.6998555943480724, "grad_norm": 6.162122016072417, "learning_rate": 2.182330511028051e-06, "loss": 17.3705, "step": 38287 }, { "epoch": 0.699873873544519, "grad_norm": 5.772217693276773, "learning_rate": 2.1820859829242498e-06, "loss": 17.377, "step": 38288 }, { "epoch": 0.6998921527409655, "grad_norm": 8.06021157325433, "learning_rate": 2.1818414646969917e-06, "loss": 18.171, "step": 38289 }, { "epoch": 0.699910431937412, "grad_norm": 7.729712302717872, "learning_rate": 2.181596956347136e-06, "loss": 18.0884, "step": 38290 }, { "epoch": 0.6999287111338586, "grad_norm": 5.3535377212525255, "learning_rate": 2.1813524578755367e-06, "loss": 17.0502, "step": 38291 }, { "epoch": 0.699946990330305, "grad_norm": 6.208087278228965, "learning_rate": 2.1811079692830544e-06, "loss": 17.2411, "step": 38292 }, { "epoch": 0.6999652695267516, "grad_norm": 5.969672303870689, "learning_rate": 2.1808634905705445e-06, "loss": 17.1406, "step": 38293 }, { "epoch": 0.6999835487231981, "grad_norm": 7.307564544157432, "learning_rate": 2.180619021738861e-06, "loss": 17.8585, "step": 38294 }, { "epoch": 0.7000018279196446, "grad_norm": 6.03268687213779, "learning_rate": 2.1803745627888652e-06, "loss": 17.1553, "step": 38295 }, { "epoch": 0.7000201071160912, "grad_norm": 6.252134431987444, "learning_rate": 2.18013011372141e-06, "loss": 17.6135, "step": 38296 }, { "epoch": 0.7000383863125377, "grad_norm": 6.289983132758922, "learning_rate": 2.1798856745373543e-06, "loss": 17.739, "step": 38297 }, { "epoch": 0.7000566655089843, "grad_norm": 7.201494868566135, "learning_rate": 2.1796412452375558e-06, "loss": 17.9572, "step": 38298 }, { "epoch": 0.7000749447054307, "grad_norm": 5.682151818389174, "learning_rate": 2.1793968258228706e-06, "loss": 16.9477, "step": 38299 }, { "epoch": 0.7000932239018772, "grad_norm": 7.131372186119822, "learning_rate": 2.1791524162941525e-06, "loss": 17.3688, "step": 38300 }, { "epoch": 0.7001115030983238, "grad_norm": 6.457104703014873, "learning_rate": 2.1789080166522627e-06, "loss": 17.0988, "step": 38301 }, { "epoch": 0.7001297822947703, "grad_norm": 5.912676319561994, "learning_rate": 2.178663626898055e-06, "loss": 17.3284, "step": 38302 }, { "epoch": 0.7001480614912169, "grad_norm": 5.368442690401307, "learning_rate": 2.1784192470323853e-06, "loss": 16.9839, "step": 38303 }, { "epoch": 0.7001663406876634, "grad_norm": 8.563597711711814, "learning_rate": 2.178174877056112e-06, "loss": 18.4892, "step": 38304 }, { "epoch": 0.7001846198841098, "grad_norm": 6.210907337260076, "learning_rate": 2.1779305169700904e-06, "loss": 17.4674, "step": 38305 }, { "epoch": 0.7002028990805564, "grad_norm": 6.837979188585866, "learning_rate": 2.1776861667751765e-06, "loss": 17.6156, "step": 38306 }, { "epoch": 0.7002211782770029, "grad_norm": 6.7034587337707245, "learning_rate": 2.1774418264722298e-06, "loss": 17.6525, "step": 38307 }, { "epoch": 0.7002394574734495, "grad_norm": 6.594193168573128, "learning_rate": 2.1771974960621027e-06, "loss": 17.5176, "step": 38308 }, { "epoch": 0.700257736669896, "grad_norm": 5.93510057990568, "learning_rate": 2.176953175545656e-06, "loss": 17.0518, "step": 38309 }, { "epoch": 0.7002760158663425, "grad_norm": 5.001553748092199, "learning_rate": 2.176708864923742e-06, "loss": 16.8993, "step": 38310 }, { "epoch": 0.7002942950627891, "grad_norm": 5.812586748449035, "learning_rate": 2.1764645641972177e-06, "loss": 17.1792, "step": 38311 }, { "epoch": 0.7003125742592355, "grad_norm": 6.54725805486823, "learning_rate": 2.176220273366941e-06, "loss": 17.4534, "step": 38312 }, { "epoch": 0.7003308534556821, "grad_norm": 5.86207691007408, "learning_rate": 2.1759759924337666e-06, "loss": 17.3701, "step": 38313 }, { "epoch": 0.7003491326521286, "grad_norm": 5.398873105958353, "learning_rate": 2.1757317213985523e-06, "loss": 17.2267, "step": 38314 }, { "epoch": 0.7003674118485751, "grad_norm": 4.88495868682012, "learning_rate": 2.1754874602621516e-06, "loss": 16.7801, "step": 38315 }, { "epoch": 0.7003856910450217, "grad_norm": 5.380077231898612, "learning_rate": 2.1752432090254244e-06, "loss": 16.9344, "step": 38316 }, { "epoch": 0.7004039702414682, "grad_norm": 5.572245292304623, "learning_rate": 2.1749989676892223e-06, "loss": 17.0627, "step": 38317 }, { "epoch": 0.7004222494379148, "grad_norm": 6.442559441155481, "learning_rate": 2.174754736254406e-06, "loss": 17.6141, "step": 38318 }, { "epoch": 0.7004405286343612, "grad_norm": 7.949173821094572, "learning_rate": 2.1745105147218283e-06, "loss": 17.8321, "step": 38319 }, { "epoch": 0.7004588078308077, "grad_norm": 6.405857522374392, "learning_rate": 2.174266303092345e-06, "loss": 17.589, "step": 38320 }, { "epoch": 0.7004770870272543, "grad_norm": 5.79233745126443, "learning_rate": 2.1740221013668143e-06, "loss": 17.2274, "step": 38321 }, { "epoch": 0.7004953662237008, "grad_norm": 6.228817489196845, "learning_rate": 2.17377790954609e-06, "loss": 17.8078, "step": 38322 }, { "epoch": 0.7005136454201474, "grad_norm": 5.730488477646002, "learning_rate": 2.173533727631028e-06, "loss": 17.2139, "step": 38323 }, { "epoch": 0.7005319246165939, "grad_norm": 5.910963389872592, "learning_rate": 2.1732895556224874e-06, "loss": 17.2869, "step": 38324 }, { "epoch": 0.7005502038130403, "grad_norm": 5.105251674900836, "learning_rate": 2.1730453935213196e-06, "loss": 16.9195, "step": 38325 }, { "epoch": 0.7005684830094869, "grad_norm": 5.868508529500904, "learning_rate": 2.1728012413283844e-06, "loss": 17.3447, "step": 38326 }, { "epoch": 0.7005867622059334, "grad_norm": 4.572228195732857, "learning_rate": 2.1725570990445354e-06, "loss": 16.862, "step": 38327 }, { "epoch": 0.70060504140238, "grad_norm": 6.520947430347433, "learning_rate": 2.1723129666706266e-06, "loss": 17.6961, "step": 38328 }, { "epoch": 0.7006233205988265, "grad_norm": 5.605023079696093, "learning_rate": 2.172068844207517e-06, "loss": 16.9989, "step": 38329 }, { "epoch": 0.700641599795273, "grad_norm": 5.841559106922494, "learning_rate": 2.171824731656061e-06, "loss": 17.3965, "step": 38330 }, { "epoch": 0.7006598789917196, "grad_norm": 7.373442085472843, "learning_rate": 2.171580629017112e-06, "loss": 17.7022, "step": 38331 }, { "epoch": 0.700678158188166, "grad_norm": 7.284107461142081, "learning_rate": 2.1713365362915274e-06, "loss": 17.6341, "step": 38332 }, { "epoch": 0.7006964373846126, "grad_norm": 5.846091913409876, "learning_rate": 2.1710924534801647e-06, "loss": 17.4393, "step": 38333 }, { "epoch": 0.7007147165810591, "grad_norm": 6.365482269397363, "learning_rate": 2.1708483805838753e-06, "loss": 17.3623, "step": 38334 }, { "epoch": 0.7007329957775056, "grad_norm": 6.103069863858767, "learning_rate": 2.170604317603519e-06, "loss": 17.5283, "step": 38335 }, { "epoch": 0.7007512749739522, "grad_norm": 6.003739542932848, "learning_rate": 2.1703602645399474e-06, "loss": 17.1938, "step": 38336 }, { "epoch": 0.7007695541703987, "grad_norm": 6.361768769071256, "learning_rate": 2.170116221394019e-06, "loss": 17.5818, "step": 38337 }, { "epoch": 0.7007878333668452, "grad_norm": 5.664671295722118, "learning_rate": 2.1698721881665878e-06, "loss": 17.1121, "step": 38338 }, { "epoch": 0.7008061125632917, "grad_norm": 6.6647329831442494, "learning_rate": 2.1696281648585073e-06, "loss": 17.6837, "step": 38339 }, { "epoch": 0.7008243917597382, "grad_norm": 5.987291580749757, "learning_rate": 2.1693841514706366e-06, "loss": 17.4935, "step": 38340 }, { "epoch": 0.7008426709561848, "grad_norm": 8.56472830793594, "learning_rate": 2.169140148003826e-06, "loss": 18.1362, "step": 38341 }, { "epoch": 0.7008609501526313, "grad_norm": 6.050454860586055, "learning_rate": 2.1688961544589342e-06, "loss": 17.2473, "step": 38342 }, { "epoch": 0.7008792293490778, "grad_norm": 6.774207670435346, "learning_rate": 2.1686521708368173e-06, "loss": 17.474, "step": 38343 }, { "epoch": 0.7008975085455244, "grad_norm": 7.135368588968183, "learning_rate": 2.1684081971383292e-06, "loss": 17.9477, "step": 38344 }, { "epoch": 0.7009157877419708, "grad_norm": 5.6316863600067135, "learning_rate": 2.168164233364323e-06, "loss": 17.1403, "step": 38345 }, { "epoch": 0.7009340669384174, "grad_norm": 5.684522958988353, "learning_rate": 2.1679202795156567e-06, "loss": 17.2092, "step": 38346 }, { "epoch": 0.7009523461348639, "grad_norm": 6.313231155429049, "learning_rate": 2.167676335593184e-06, "loss": 17.5097, "step": 38347 }, { "epoch": 0.7009706253313104, "grad_norm": 5.705967215795771, "learning_rate": 2.1674324015977582e-06, "loss": 17.1211, "step": 38348 }, { "epoch": 0.700988904527757, "grad_norm": 8.408484090333271, "learning_rate": 2.167188477530238e-06, "loss": 17.6953, "step": 38349 }, { "epoch": 0.7010071837242035, "grad_norm": 5.851944792759821, "learning_rate": 2.166944563391475e-06, "loss": 17.3717, "step": 38350 }, { "epoch": 0.70102546292065, "grad_norm": 6.838149462022979, "learning_rate": 2.1667006591823246e-06, "loss": 17.9277, "step": 38351 }, { "epoch": 0.7010437421170965, "grad_norm": 7.954114514689801, "learning_rate": 2.166456764903645e-06, "loss": 17.7957, "step": 38352 }, { "epoch": 0.701062021313543, "grad_norm": 5.262167245414931, "learning_rate": 2.1662128805562866e-06, "loss": 17.0325, "step": 38353 }, { "epoch": 0.7010803005099896, "grad_norm": 6.18591108872872, "learning_rate": 2.1659690061411078e-06, "loss": 17.2356, "step": 38354 }, { "epoch": 0.7010985797064361, "grad_norm": 6.052491693823118, "learning_rate": 2.165725141658962e-06, "loss": 17.5762, "step": 38355 }, { "epoch": 0.7011168589028827, "grad_norm": 5.802358343770505, "learning_rate": 2.1654812871107015e-06, "loss": 16.9684, "step": 38356 }, { "epoch": 0.7011351380993291, "grad_norm": 6.111362758923562, "learning_rate": 2.165237442497185e-06, "loss": 17.2469, "step": 38357 }, { "epoch": 0.7011534172957756, "grad_norm": 6.6792635807592555, "learning_rate": 2.1649936078192653e-06, "loss": 17.6949, "step": 38358 }, { "epoch": 0.7011716964922222, "grad_norm": 8.509274007732296, "learning_rate": 2.1647497830777954e-06, "loss": 18.2174, "step": 38359 }, { "epoch": 0.7011899756886687, "grad_norm": 6.617209036197307, "learning_rate": 2.1645059682736313e-06, "loss": 17.7243, "step": 38360 }, { "epoch": 0.7012082548851153, "grad_norm": 6.167300010211024, "learning_rate": 2.1642621634076294e-06, "loss": 17.1988, "step": 38361 }, { "epoch": 0.7012265340815618, "grad_norm": 5.872440551295684, "learning_rate": 2.164018368480641e-06, "loss": 17.216, "step": 38362 }, { "epoch": 0.7012448132780082, "grad_norm": 7.303681737671594, "learning_rate": 2.1637745834935235e-06, "loss": 17.5796, "step": 38363 }, { "epoch": 0.7012630924744548, "grad_norm": 6.471858051087266, "learning_rate": 2.16353080844713e-06, "loss": 17.5492, "step": 38364 }, { "epoch": 0.7012813716709013, "grad_norm": 5.523657933854704, "learning_rate": 2.1632870433423138e-06, "loss": 16.9638, "step": 38365 }, { "epoch": 0.7012996508673479, "grad_norm": 6.344252952015172, "learning_rate": 2.163043288179932e-06, "loss": 17.6215, "step": 38366 }, { "epoch": 0.7013179300637944, "grad_norm": 6.422948471159559, "learning_rate": 2.1627995429608345e-06, "loss": 17.5869, "step": 38367 }, { "epoch": 0.7013362092602409, "grad_norm": 7.008518852628427, "learning_rate": 2.162555807685881e-06, "loss": 17.7211, "step": 38368 }, { "epoch": 0.7013544884566875, "grad_norm": 6.837165857000721, "learning_rate": 2.162312082355921e-06, "loss": 17.9093, "step": 38369 }, { "epoch": 0.7013727676531339, "grad_norm": 7.078847655068892, "learning_rate": 2.1620683669718117e-06, "loss": 17.79, "step": 38370 }, { "epoch": 0.7013910468495805, "grad_norm": 8.124803272239413, "learning_rate": 2.1618246615344073e-06, "loss": 18.1699, "step": 38371 }, { "epoch": 0.701409326046027, "grad_norm": 5.567405182550412, "learning_rate": 2.161580966044562e-06, "loss": 16.9005, "step": 38372 }, { "epoch": 0.7014276052424735, "grad_norm": 6.012562617069827, "learning_rate": 2.1613372805031266e-06, "loss": 17.2066, "step": 38373 }, { "epoch": 0.7014458844389201, "grad_norm": 5.083247726540382, "learning_rate": 2.16109360491096e-06, "loss": 16.828, "step": 38374 }, { "epoch": 0.7014641636353666, "grad_norm": 6.556211564308554, "learning_rate": 2.160849939268914e-06, "loss": 17.2963, "step": 38375 }, { "epoch": 0.7014824428318132, "grad_norm": 6.543035600444369, "learning_rate": 2.160606283577841e-06, "loss": 17.5914, "step": 38376 }, { "epoch": 0.7015007220282596, "grad_norm": 6.092691933452901, "learning_rate": 2.1603626378385956e-06, "loss": 17.3914, "step": 38377 }, { "epoch": 0.7015190012247061, "grad_norm": 4.857334519654821, "learning_rate": 2.1601190020520353e-06, "loss": 16.9789, "step": 38378 }, { "epoch": 0.7015372804211527, "grad_norm": 5.4499923616422095, "learning_rate": 2.1598753762190096e-06, "loss": 17.2307, "step": 38379 }, { "epoch": 0.7015555596175992, "grad_norm": 5.420604717032738, "learning_rate": 2.159631760340376e-06, "loss": 17.0923, "step": 38380 }, { "epoch": 0.7015738388140458, "grad_norm": 7.4504028247638505, "learning_rate": 2.1593881544169863e-06, "loss": 17.9285, "step": 38381 }, { "epoch": 0.7015921180104923, "grad_norm": 7.483701061831763, "learning_rate": 2.1591445584496935e-06, "loss": 18.27, "step": 38382 }, { "epoch": 0.7016103972069387, "grad_norm": 6.639937710882387, "learning_rate": 2.1589009724393534e-06, "loss": 17.4613, "step": 38383 }, { "epoch": 0.7016286764033853, "grad_norm": 7.541228448439533, "learning_rate": 2.1586573963868177e-06, "loss": 18.1587, "step": 38384 }, { "epoch": 0.7016469555998318, "grad_norm": 6.346488998485573, "learning_rate": 2.158413830292943e-06, "loss": 17.3218, "step": 38385 }, { "epoch": 0.7016652347962784, "grad_norm": 6.034628726495596, "learning_rate": 2.1581702741585793e-06, "loss": 17.2156, "step": 38386 }, { "epoch": 0.7016835139927249, "grad_norm": 5.767577379375106, "learning_rate": 2.1579267279845824e-06, "loss": 17.2439, "step": 38387 }, { "epoch": 0.7017017931891714, "grad_norm": 6.805821980336378, "learning_rate": 2.1576831917718073e-06, "loss": 17.2714, "step": 38388 }, { "epoch": 0.701720072385618, "grad_norm": 6.394684401954763, "learning_rate": 2.157439665521106e-06, "loss": 17.1152, "step": 38389 }, { "epoch": 0.7017383515820644, "grad_norm": 5.3502806137398835, "learning_rate": 2.15719614923333e-06, "loss": 17.0712, "step": 38390 }, { "epoch": 0.701756630778511, "grad_norm": 5.79471866102194, "learning_rate": 2.1569526429093367e-06, "loss": 17.3306, "step": 38391 }, { "epoch": 0.7017749099749575, "grad_norm": 10.50543269586155, "learning_rate": 2.156709146549978e-06, "loss": 17.8811, "step": 38392 }, { "epoch": 0.701793189171404, "grad_norm": 6.018328160660025, "learning_rate": 2.156465660156105e-06, "loss": 17.308, "step": 38393 }, { "epoch": 0.7018114683678506, "grad_norm": 5.692649429822453, "learning_rate": 2.1562221837285744e-06, "loss": 17.0552, "step": 38394 }, { "epoch": 0.7018297475642971, "grad_norm": 5.417314643915846, "learning_rate": 2.155978717268237e-06, "loss": 17.1481, "step": 38395 }, { "epoch": 0.7018480267607436, "grad_norm": 6.804830031018734, "learning_rate": 2.155735260775948e-06, "loss": 17.6791, "step": 38396 }, { "epoch": 0.7018663059571901, "grad_norm": 5.180031896988018, "learning_rate": 2.155491814252561e-06, "loss": 16.9954, "step": 38397 }, { "epoch": 0.7018845851536366, "grad_norm": 5.849774680584759, "learning_rate": 2.155248377698927e-06, "loss": 17.142, "step": 38398 }, { "epoch": 0.7019028643500832, "grad_norm": 6.59303421529391, "learning_rate": 2.155004951115902e-06, "loss": 17.3069, "step": 38399 }, { "epoch": 0.7019211435465297, "grad_norm": 6.15949574565884, "learning_rate": 2.1547615345043384e-06, "loss": 17.2916, "step": 38400 }, { "epoch": 0.7019394227429763, "grad_norm": 7.3392012957998025, "learning_rate": 2.1545181278650863e-06, "loss": 18.5591, "step": 38401 }, { "epoch": 0.7019577019394228, "grad_norm": 6.633938074253019, "learning_rate": 2.154274731199003e-06, "loss": 17.4902, "step": 38402 }, { "epoch": 0.7019759811358692, "grad_norm": 6.679997887095663, "learning_rate": 2.1540313445069406e-06, "loss": 17.2312, "step": 38403 }, { "epoch": 0.7019942603323158, "grad_norm": 5.6141991020954425, "learning_rate": 2.153787967789749e-06, "loss": 17.0489, "step": 38404 }, { "epoch": 0.7020125395287623, "grad_norm": 6.516121948394202, "learning_rate": 2.1535446010482835e-06, "loss": 17.3961, "step": 38405 }, { "epoch": 0.7020308187252089, "grad_norm": 6.0677344012759304, "learning_rate": 2.153301244283399e-06, "loss": 17.1064, "step": 38406 }, { "epoch": 0.7020490979216554, "grad_norm": 6.553918971544748, "learning_rate": 2.153057897495945e-06, "loss": 17.3923, "step": 38407 }, { "epoch": 0.7020673771181019, "grad_norm": 5.329205866886636, "learning_rate": 2.152814560686778e-06, "loss": 17.0262, "step": 38408 }, { "epoch": 0.7020856563145484, "grad_norm": 7.306963966058569, "learning_rate": 2.1525712338567485e-06, "loss": 17.7017, "step": 38409 }, { "epoch": 0.7021039355109949, "grad_norm": 6.221714293336087, "learning_rate": 2.152327917006708e-06, "loss": 17.2221, "step": 38410 }, { "epoch": 0.7021222147074414, "grad_norm": 6.210026185819161, "learning_rate": 2.1520846101375132e-06, "loss": 17.632, "step": 38411 }, { "epoch": 0.702140493903888, "grad_norm": 6.211888282089839, "learning_rate": 2.1518413132500125e-06, "loss": 17.3222, "step": 38412 }, { "epoch": 0.7021587731003345, "grad_norm": 7.063018943135217, "learning_rate": 2.1515980263450627e-06, "loss": 18.0177, "step": 38413 }, { "epoch": 0.7021770522967811, "grad_norm": 6.5326209534145026, "learning_rate": 2.151354749423513e-06, "loss": 17.0561, "step": 38414 }, { "epoch": 0.7021953314932275, "grad_norm": 7.660154786647413, "learning_rate": 2.1511114824862176e-06, "loss": 17.8022, "step": 38415 }, { "epoch": 0.702213610689674, "grad_norm": 5.755580103740047, "learning_rate": 2.1508682255340313e-06, "loss": 17.2279, "step": 38416 }, { "epoch": 0.7022318898861206, "grad_norm": 5.366167689417243, "learning_rate": 2.150624978567804e-06, "loss": 17.2926, "step": 38417 }, { "epoch": 0.7022501690825671, "grad_norm": 5.564369549738126, "learning_rate": 2.1503817415883876e-06, "loss": 17.0792, "step": 38418 }, { "epoch": 0.7022684482790137, "grad_norm": 7.388042538842667, "learning_rate": 2.1501385145966374e-06, "loss": 17.4763, "step": 38419 }, { "epoch": 0.7022867274754602, "grad_norm": 5.4196294352719345, "learning_rate": 2.149895297593405e-06, "loss": 16.8233, "step": 38420 }, { "epoch": 0.7023050066719067, "grad_norm": 5.014779544075923, "learning_rate": 2.14965209057954e-06, "loss": 16.7921, "step": 38421 }, { "epoch": 0.7023232858683532, "grad_norm": 5.3014626735672605, "learning_rate": 2.149408893555899e-06, "loss": 16.9215, "step": 38422 }, { "epoch": 0.7023415650647997, "grad_norm": 8.073367121412995, "learning_rate": 2.149165706523331e-06, "loss": 18.427, "step": 38423 }, { "epoch": 0.7023598442612463, "grad_norm": 9.143277524223206, "learning_rate": 2.1489225294826895e-06, "loss": 18.4394, "step": 38424 }, { "epoch": 0.7023781234576928, "grad_norm": 7.117635756814997, "learning_rate": 2.1486793624348283e-06, "loss": 17.155, "step": 38425 }, { "epoch": 0.7023964026541393, "grad_norm": 6.6659650959691, "learning_rate": 2.148436205380599e-06, "loss": 17.4221, "step": 38426 }, { "epoch": 0.7024146818505859, "grad_norm": 6.379274927323162, "learning_rate": 2.1481930583208523e-06, "loss": 17.3817, "step": 38427 }, { "epoch": 0.7024329610470323, "grad_norm": 6.88369240997332, "learning_rate": 2.147949921256442e-06, "loss": 17.4952, "step": 38428 }, { "epoch": 0.7024512402434789, "grad_norm": 5.603527145109809, "learning_rate": 2.1477067941882184e-06, "loss": 16.941, "step": 38429 }, { "epoch": 0.7024695194399254, "grad_norm": 7.909173787550899, "learning_rate": 2.147463677117037e-06, "loss": 18.1718, "step": 38430 }, { "epoch": 0.7024877986363719, "grad_norm": 6.265654074730699, "learning_rate": 2.1472205700437482e-06, "loss": 17.5318, "step": 38431 }, { "epoch": 0.7025060778328185, "grad_norm": 4.8307193342724455, "learning_rate": 2.1469774729692017e-06, "loss": 16.9152, "step": 38432 }, { "epoch": 0.702524357029265, "grad_norm": 5.900163180761662, "learning_rate": 2.1467343858942515e-06, "loss": 17.334, "step": 38433 }, { "epoch": 0.7025426362257116, "grad_norm": 6.174097011920587, "learning_rate": 2.146491308819751e-06, "loss": 17.313, "step": 38434 }, { "epoch": 0.702560915422158, "grad_norm": 5.711819896869519, "learning_rate": 2.1462482417465497e-06, "loss": 17.3368, "step": 38435 }, { "epoch": 0.7025791946186045, "grad_norm": 5.772106580957545, "learning_rate": 2.146005184675502e-06, "loss": 17.1313, "step": 38436 }, { "epoch": 0.7025974738150511, "grad_norm": 7.4509714837849605, "learning_rate": 2.145762137607459e-06, "loss": 18.0888, "step": 38437 }, { "epoch": 0.7026157530114976, "grad_norm": 6.523524342776516, "learning_rate": 2.1455191005432694e-06, "loss": 17.7134, "step": 38438 }, { "epoch": 0.7026340322079442, "grad_norm": 6.095366405669158, "learning_rate": 2.14527607348379e-06, "loss": 17.4935, "step": 38439 }, { "epoch": 0.7026523114043907, "grad_norm": 8.783072626938958, "learning_rate": 2.1450330564298686e-06, "loss": 17.3616, "step": 38440 }, { "epoch": 0.7026705906008371, "grad_norm": 6.8274483914950235, "learning_rate": 2.1447900493823587e-06, "loss": 17.7828, "step": 38441 }, { "epoch": 0.7026888697972837, "grad_norm": 6.464387065114436, "learning_rate": 2.144547052342113e-06, "loss": 17.5151, "step": 38442 }, { "epoch": 0.7027071489937302, "grad_norm": 5.5323829137703715, "learning_rate": 2.1443040653099812e-06, "loss": 17.1573, "step": 38443 }, { "epoch": 0.7027254281901768, "grad_norm": 5.646128142037681, "learning_rate": 2.1440610882868166e-06, "loss": 17.2232, "step": 38444 }, { "epoch": 0.7027437073866233, "grad_norm": 6.283149176445889, "learning_rate": 2.143818121273471e-06, "loss": 17.42, "step": 38445 }, { "epoch": 0.7027619865830698, "grad_norm": 7.016637016646528, "learning_rate": 2.143575164270793e-06, "loss": 17.7173, "step": 38446 }, { "epoch": 0.7027802657795164, "grad_norm": 5.952587187662416, "learning_rate": 2.143332217279638e-06, "loss": 17.4316, "step": 38447 }, { "epoch": 0.7027985449759628, "grad_norm": 6.135597353248054, "learning_rate": 2.143089280300855e-06, "loss": 17.4689, "step": 38448 }, { "epoch": 0.7028168241724094, "grad_norm": 6.015698776958769, "learning_rate": 2.142846353335295e-06, "loss": 16.9172, "step": 38449 }, { "epoch": 0.7028351033688559, "grad_norm": 5.962939700439671, "learning_rate": 2.1426034363838103e-06, "loss": 17.3115, "step": 38450 }, { "epoch": 0.7028533825653024, "grad_norm": 6.821359894284879, "learning_rate": 2.142360529447255e-06, "loss": 17.5689, "step": 38451 }, { "epoch": 0.702871661761749, "grad_norm": 6.527621527302404, "learning_rate": 2.1421176325264757e-06, "loss": 17.2175, "step": 38452 }, { "epoch": 0.7028899409581955, "grad_norm": 5.079709481000741, "learning_rate": 2.141874745622328e-06, "loss": 17.0194, "step": 38453 }, { "epoch": 0.702908220154642, "grad_norm": 4.2979164508992636, "learning_rate": 2.141631868735661e-06, "loss": 16.6101, "step": 38454 }, { "epoch": 0.7029264993510885, "grad_norm": 6.320212298675773, "learning_rate": 2.1413890018673255e-06, "loss": 17.3799, "step": 38455 }, { "epoch": 0.702944778547535, "grad_norm": 6.526585445318639, "learning_rate": 2.141146145018175e-06, "loss": 17.417, "step": 38456 }, { "epoch": 0.7029630577439816, "grad_norm": 5.818828977519204, "learning_rate": 2.140903298189058e-06, "loss": 17.3392, "step": 38457 }, { "epoch": 0.7029813369404281, "grad_norm": 5.2227067856059515, "learning_rate": 2.140660461380826e-06, "loss": 17.1012, "step": 38458 }, { "epoch": 0.7029996161368747, "grad_norm": 6.2517552752491685, "learning_rate": 2.1404176345943314e-06, "loss": 17.6059, "step": 38459 }, { "epoch": 0.7030178953333212, "grad_norm": 5.421015735555332, "learning_rate": 2.1401748178304238e-06, "loss": 17.0504, "step": 38460 }, { "epoch": 0.7030361745297676, "grad_norm": 6.095823605990328, "learning_rate": 2.1399320110899572e-06, "loss": 17.2668, "step": 38461 }, { "epoch": 0.7030544537262142, "grad_norm": 6.632798455784985, "learning_rate": 2.1396892143737813e-06, "loss": 17.6416, "step": 38462 }, { "epoch": 0.7030727329226607, "grad_norm": 7.053545640223843, "learning_rate": 2.1394464276827447e-06, "loss": 17.6375, "step": 38463 }, { "epoch": 0.7030910121191073, "grad_norm": 5.754231784633598, "learning_rate": 2.1392036510177017e-06, "loss": 17.233, "step": 38464 }, { "epoch": 0.7031092913155538, "grad_norm": 6.2868667493352755, "learning_rate": 2.138960884379502e-06, "loss": 17.4262, "step": 38465 }, { "epoch": 0.7031275705120003, "grad_norm": 5.22037504734206, "learning_rate": 2.1387181277689934e-06, "loss": 16.9265, "step": 38466 }, { "epoch": 0.7031458497084468, "grad_norm": 6.795015282828803, "learning_rate": 2.138475381187032e-06, "loss": 17.2417, "step": 38467 }, { "epoch": 0.7031641289048933, "grad_norm": 6.894269957368391, "learning_rate": 2.138232644634464e-06, "loss": 17.5181, "step": 38468 }, { "epoch": 0.7031824081013399, "grad_norm": 6.54781384579938, "learning_rate": 2.1379899181121427e-06, "loss": 17.4261, "step": 38469 }, { "epoch": 0.7032006872977864, "grad_norm": 7.359657608736084, "learning_rate": 2.13774720162092e-06, "loss": 17.7474, "step": 38470 }, { "epoch": 0.7032189664942329, "grad_norm": 5.8310182681519045, "learning_rate": 2.1375044951616446e-06, "loss": 17.2428, "step": 38471 }, { "epoch": 0.7032372456906795, "grad_norm": 7.187061390426539, "learning_rate": 2.1372617987351657e-06, "loss": 17.8687, "step": 38472 }, { "epoch": 0.703255524887126, "grad_norm": 7.2638044599184495, "learning_rate": 2.1370191123423384e-06, "loss": 17.6764, "step": 38473 }, { "epoch": 0.7032738040835725, "grad_norm": 4.907894859431696, "learning_rate": 2.136776435984008e-06, "loss": 16.8924, "step": 38474 }, { "epoch": 0.703292083280019, "grad_norm": 5.310186054806291, "learning_rate": 2.13653376966103e-06, "loss": 17.1583, "step": 38475 }, { "epoch": 0.7033103624764655, "grad_norm": 6.257564901769106, "learning_rate": 2.1362911133742527e-06, "loss": 17.3697, "step": 38476 }, { "epoch": 0.7033286416729121, "grad_norm": 7.7979640649076085, "learning_rate": 2.1360484671245246e-06, "loss": 18.2201, "step": 38477 }, { "epoch": 0.7033469208693586, "grad_norm": 6.4442422225032, "learning_rate": 2.1358058309126976e-06, "loss": 17.122, "step": 38478 }, { "epoch": 0.703365200065805, "grad_norm": 7.39524104661597, "learning_rate": 2.1355632047396252e-06, "loss": 18.0497, "step": 38479 }, { "epoch": 0.7033834792622516, "grad_norm": 7.131639378948052, "learning_rate": 2.1353205886061527e-06, "loss": 17.7674, "step": 38480 }, { "epoch": 0.7034017584586981, "grad_norm": 4.706948934999953, "learning_rate": 2.1350779825131356e-06, "loss": 16.929, "step": 38481 }, { "epoch": 0.7034200376551447, "grad_norm": 5.830528122314941, "learning_rate": 2.1348353864614213e-06, "loss": 17.0981, "step": 38482 }, { "epoch": 0.7034383168515912, "grad_norm": 5.767867869646402, "learning_rate": 2.1345928004518586e-06, "loss": 17.1911, "step": 38483 }, { "epoch": 0.7034565960480377, "grad_norm": 6.120793669985572, "learning_rate": 2.134350224485301e-06, "loss": 17.2702, "step": 38484 }, { "epoch": 0.7034748752444843, "grad_norm": 5.769036431666329, "learning_rate": 2.134107658562597e-06, "loss": 17.4841, "step": 38485 }, { "epoch": 0.7034931544409307, "grad_norm": 6.290239861087343, "learning_rate": 2.133865102684595e-06, "loss": 17.5223, "step": 38486 }, { "epoch": 0.7035114336373773, "grad_norm": 5.954214803886302, "learning_rate": 2.133622556852148e-06, "loss": 16.8887, "step": 38487 }, { "epoch": 0.7035297128338238, "grad_norm": 6.155273633034634, "learning_rate": 2.1333800210661067e-06, "loss": 17.4682, "step": 38488 }, { "epoch": 0.7035479920302703, "grad_norm": 6.664638107904918, "learning_rate": 2.133137495327318e-06, "loss": 17.3657, "step": 38489 }, { "epoch": 0.7035662712267169, "grad_norm": 8.608733004201818, "learning_rate": 2.1328949796366342e-06, "loss": 18.1499, "step": 38490 }, { "epoch": 0.7035845504231634, "grad_norm": 5.6530799313248155, "learning_rate": 2.132652473994904e-06, "loss": 17.046, "step": 38491 }, { "epoch": 0.70360282961961, "grad_norm": 6.601311577432868, "learning_rate": 2.13240997840298e-06, "loss": 17.6025, "step": 38492 }, { "epoch": 0.7036211088160564, "grad_norm": 5.766922928783275, "learning_rate": 2.13216749286171e-06, "loss": 17.0954, "step": 38493 }, { "epoch": 0.7036393880125029, "grad_norm": 5.586279115579972, "learning_rate": 2.1319250173719418e-06, "loss": 17.108, "step": 38494 }, { "epoch": 0.7036576672089495, "grad_norm": 6.372323215324408, "learning_rate": 2.1316825519345295e-06, "loss": 17.2679, "step": 38495 }, { "epoch": 0.703675946405396, "grad_norm": 7.729965001783592, "learning_rate": 2.1314400965503196e-06, "loss": 17.8666, "step": 38496 }, { "epoch": 0.7036942256018426, "grad_norm": 6.355063111394824, "learning_rate": 2.1311976512201626e-06, "loss": 17.2542, "step": 38497 }, { "epoch": 0.7037125047982891, "grad_norm": 6.611566697265357, "learning_rate": 2.1309552159449117e-06, "loss": 17.6207, "step": 38498 }, { "epoch": 0.7037307839947355, "grad_norm": 7.116638012596236, "learning_rate": 2.130712790725413e-06, "loss": 17.7253, "step": 38499 }, { "epoch": 0.7037490631911821, "grad_norm": 5.625375522127037, "learning_rate": 2.130470375562515e-06, "loss": 17.1377, "step": 38500 }, { "epoch": 0.7037673423876286, "grad_norm": 5.359094132987871, "learning_rate": 2.1302279704570717e-06, "loss": 17.0201, "step": 38501 }, { "epoch": 0.7037856215840752, "grad_norm": 6.311708652416258, "learning_rate": 2.1299855754099303e-06, "loss": 17.3554, "step": 38502 }, { "epoch": 0.7038039007805217, "grad_norm": 7.1294389882755125, "learning_rate": 2.129743190421938e-06, "loss": 17.8535, "step": 38503 }, { "epoch": 0.7038221799769682, "grad_norm": 7.14699066557616, "learning_rate": 2.1295008154939475e-06, "loss": 17.7117, "step": 38504 }, { "epoch": 0.7038404591734148, "grad_norm": 5.806959141281969, "learning_rate": 2.129258450626809e-06, "loss": 17.3737, "step": 38505 }, { "epoch": 0.7038587383698612, "grad_norm": 6.29217180540094, "learning_rate": 2.1290160958213686e-06, "loss": 17.653, "step": 38506 }, { "epoch": 0.7038770175663078, "grad_norm": 7.783345221320957, "learning_rate": 2.1287737510784793e-06, "loss": 18.2552, "step": 38507 }, { "epoch": 0.7038952967627543, "grad_norm": 9.82848348728333, "learning_rate": 2.1285314163989873e-06, "loss": 18.2432, "step": 38508 }, { "epoch": 0.7039135759592008, "grad_norm": 8.050311903938386, "learning_rate": 2.1282890917837456e-06, "loss": 18.2542, "step": 38509 }, { "epoch": 0.7039318551556474, "grad_norm": 5.672668467460944, "learning_rate": 2.128046777233601e-06, "loss": 17.2129, "step": 38510 }, { "epoch": 0.7039501343520939, "grad_norm": 5.470031889354467, "learning_rate": 2.1278044727494017e-06, "loss": 17.2517, "step": 38511 }, { "epoch": 0.7039684135485405, "grad_norm": 9.283957776551766, "learning_rate": 2.127562178332e-06, "loss": 17.7314, "step": 38512 }, { "epoch": 0.7039866927449869, "grad_norm": 7.285303523833373, "learning_rate": 2.127319893982242e-06, "loss": 17.836, "step": 38513 }, { "epoch": 0.7040049719414334, "grad_norm": 7.621441025093957, "learning_rate": 2.1270776197009784e-06, "loss": 17.4654, "step": 38514 }, { "epoch": 0.70402325113788, "grad_norm": 6.1788603016136, "learning_rate": 2.126835355489061e-06, "loss": 17.1952, "step": 38515 }, { "epoch": 0.7040415303343265, "grad_norm": 5.85692127263548, "learning_rate": 2.126593101347335e-06, "loss": 17.2552, "step": 38516 }, { "epoch": 0.7040598095307731, "grad_norm": 5.687893043627939, "learning_rate": 2.12635085727665e-06, "loss": 17.0591, "step": 38517 }, { "epoch": 0.7040780887272196, "grad_norm": 7.291573983464942, "learning_rate": 2.1261086232778576e-06, "loss": 17.6579, "step": 38518 }, { "epoch": 0.704096367923666, "grad_norm": 6.259920588140655, "learning_rate": 2.1258663993518046e-06, "loss": 17.3695, "step": 38519 }, { "epoch": 0.7041146471201126, "grad_norm": 7.199995801328296, "learning_rate": 2.125624185499339e-06, "loss": 17.7614, "step": 38520 }, { "epoch": 0.7041329263165591, "grad_norm": 6.215909545788398, "learning_rate": 2.1253819817213127e-06, "loss": 17.1735, "step": 38521 }, { "epoch": 0.7041512055130057, "grad_norm": 6.280779010421226, "learning_rate": 2.125139788018572e-06, "loss": 17.4605, "step": 38522 }, { "epoch": 0.7041694847094522, "grad_norm": 5.638578464457455, "learning_rate": 2.124897604391966e-06, "loss": 17.1779, "step": 38523 }, { "epoch": 0.7041877639058987, "grad_norm": 6.3965062045877135, "learning_rate": 2.1246554308423472e-06, "loss": 17.2459, "step": 38524 }, { "epoch": 0.7042060431023452, "grad_norm": 5.211059648236621, "learning_rate": 2.124413267370559e-06, "loss": 17.0936, "step": 38525 }, { "epoch": 0.7042243222987917, "grad_norm": 7.73111600400166, "learning_rate": 2.1241711139774553e-06, "loss": 17.5276, "step": 38526 }, { "epoch": 0.7042426014952383, "grad_norm": 5.916929415599262, "learning_rate": 2.1239289706638815e-06, "loss": 17.2376, "step": 38527 }, { "epoch": 0.7042608806916848, "grad_norm": 6.887602834689518, "learning_rate": 2.123686837430686e-06, "loss": 17.6614, "step": 38528 }, { "epoch": 0.7042791598881313, "grad_norm": 6.421389814941427, "learning_rate": 2.1234447142787196e-06, "loss": 17.5966, "step": 38529 }, { "epoch": 0.7042974390845779, "grad_norm": 5.915889074889411, "learning_rate": 2.1232026012088303e-06, "loss": 17.0754, "step": 38530 }, { "epoch": 0.7043157182810243, "grad_norm": 6.086275543771452, "learning_rate": 2.1229604982218645e-06, "loss": 16.8608, "step": 38531 }, { "epoch": 0.7043339974774709, "grad_norm": 6.235999114779584, "learning_rate": 2.122718405318672e-06, "loss": 17.3872, "step": 38532 }, { "epoch": 0.7043522766739174, "grad_norm": 9.265730398022168, "learning_rate": 2.122476322500104e-06, "loss": 18.2744, "step": 38533 }, { "epoch": 0.7043705558703639, "grad_norm": 7.2332139163384275, "learning_rate": 2.122234249767005e-06, "loss": 17.3661, "step": 38534 }, { "epoch": 0.7043888350668105, "grad_norm": 6.42155409176179, "learning_rate": 2.121992187120227e-06, "loss": 17.4165, "step": 38535 }, { "epoch": 0.704407114263257, "grad_norm": 5.402329849556015, "learning_rate": 2.1217501345606146e-06, "loss": 17.146, "step": 38536 }, { "epoch": 0.7044253934597036, "grad_norm": 6.5053972432517115, "learning_rate": 2.1215080920890203e-06, "loss": 17.2387, "step": 38537 }, { "epoch": 0.70444367265615, "grad_norm": 6.914099822395525, "learning_rate": 2.12126605970629e-06, "loss": 17.2717, "step": 38538 }, { "epoch": 0.7044619518525965, "grad_norm": 6.346689036075231, "learning_rate": 2.121024037413271e-06, "loss": 17.4921, "step": 38539 }, { "epoch": 0.7044802310490431, "grad_norm": 6.937243321643276, "learning_rate": 2.1207820252108146e-06, "loss": 17.8908, "step": 38540 }, { "epoch": 0.7044985102454896, "grad_norm": 6.3545778747888795, "learning_rate": 2.120540023099766e-06, "loss": 17.1971, "step": 38541 }, { "epoch": 0.7045167894419362, "grad_norm": 5.9605480322464635, "learning_rate": 2.120298031080974e-06, "loss": 17.402, "step": 38542 }, { "epoch": 0.7045350686383827, "grad_norm": 9.431181503500527, "learning_rate": 2.1200560491552895e-06, "loss": 17.8602, "step": 38543 }, { "epoch": 0.7045533478348291, "grad_norm": 6.740034762079538, "learning_rate": 2.1198140773235587e-06, "loss": 17.4376, "step": 38544 }, { "epoch": 0.7045716270312757, "grad_norm": 6.11017983084003, "learning_rate": 2.1195721155866277e-06, "loss": 17.3161, "step": 38545 }, { "epoch": 0.7045899062277222, "grad_norm": 6.137683942055454, "learning_rate": 2.1193301639453485e-06, "loss": 17.5257, "step": 38546 }, { "epoch": 0.7046081854241687, "grad_norm": 6.051807800911945, "learning_rate": 2.119088222400567e-06, "loss": 17.4285, "step": 38547 }, { "epoch": 0.7046264646206153, "grad_norm": 6.830482117476997, "learning_rate": 2.1188462909531297e-06, "loss": 17.6307, "step": 38548 }, { "epoch": 0.7046447438170618, "grad_norm": 6.545779724311988, "learning_rate": 2.118604369603888e-06, "loss": 17.7581, "step": 38549 }, { "epoch": 0.7046630230135084, "grad_norm": 6.691146379255836, "learning_rate": 2.1183624583536853e-06, "loss": 17.7735, "step": 38550 }, { "epoch": 0.7046813022099548, "grad_norm": 5.719241379943807, "learning_rate": 2.118120557203373e-06, "loss": 17.3605, "step": 38551 }, { "epoch": 0.7046995814064013, "grad_norm": 8.661207100602143, "learning_rate": 2.1178786661537996e-06, "loss": 17.9483, "step": 38552 }, { "epoch": 0.7047178606028479, "grad_norm": 5.581768461003534, "learning_rate": 2.1176367852058093e-06, "loss": 17.3693, "step": 38553 }, { "epoch": 0.7047361397992944, "grad_norm": 5.504930678309781, "learning_rate": 2.1173949143602548e-06, "loss": 17.1624, "step": 38554 }, { "epoch": 0.704754418995741, "grad_norm": 6.226162341702815, "learning_rate": 2.1171530536179797e-06, "loss": 17.5748, "step": 38555 }, { "epoch": 0.7047726981921875, "grad_norm": 5.517996228887683, "learning_rate": 2.116911202979832e-06, "loss": 17.0972, "step": 38556 }, { "epoch": 0.7047909773886339, "grad_norm": 7.445916090074789, "learning_rate": 2.1166693624466617e-06, "loss": 18.1167, "step": 38557 }, { "epoch": 0.7048092565850805, "grad_norm": 6.094787896970023, "learning_rate": 2.116427532019315e-06, "loss": 17.208, "step": 38558 }, { "epoch": 0.704827535781527, "grad_norm": 5.670210983366209, "learning_rate": 2.116185711698638e-06, "loss": 17.1953, "step": 38559 }, { "epoch": 0.7048458149779736, "grad_norm": 6.236745419992208, "learning_rate": 2.1159439014854797e-06, "loss": 17.0984, "step": 38560 }, { "epoch": 0.7048640941744201, "grad_norm": 5.797135728485726, "learning_rate": 2.11570210138069e-06, "loss": 17.4328, "step": 38561 }, { "epoch": 0.7048823733708666, "grad_norm": 6.765511307601932, "learning_rate": 2.115460311385112e-06, "loss": 17.7474, "step": 38562 }, { "epoch": 0.7049006525673132, "grad_norm": 5.476323093354946, "learning_rate": 2.115218531499597e-06, "loss": 17.0324, "step": 38563 }, { "epoch": 0.7049189317637596, "grad_norm": 6.355950637771239, "learning_rate": 2.114976761724991e-06, "loss": 17.2616, "step": 38564 }, { "epoch": 0.7049372109602062, "grad_norm": 6.945147849438392, "learning_rate": 2.114735002062139e-06, "loss": 17.7232, "step": 38565 }, { "epoch": 0.7049554901566527, "grad_norm": 6.762676223079896, "learning_rate": 2.114493252511893e-06, "loss": 17.8715, "step": 38566 }, { "epoch": 0.7049737693530992, "grad_norm": 6.116621124134141, "learning_rate": 2.1142515130750946e-06, "loss": 17.3863, "step": 38567 }, { "epoch": 0.7049920485495458, "grad_norm": 7.111445705118661, "learning_rate": 2.114009783752595e-06, "loss": 17.5946, "step": 38568 }, { "epoch": 0.7050103277459923, "grad_norm": 8.413839285979613, "learning_rate": 2.1137680645452425e-06, "loss": 18.1682, "step": 38569 }, { "epoch": 0.7050286069424389, "grad_norm": 8.373698496614525, "learning_rate": 2.113526355453881e-06, "loss": 17.5119, "step": 38570 }, { "epoch": 0.7050468861388853, "grad_norm": 5.381237706351013, "learning_rate": 2.11328465647936e-06, "loss": 16.9122, "step": 38571 }, { "epoch": 0.7050651653353318, "grad_norm": 5.00634280620689, "learning_rate": 2.113042967622527e-06, "loss": 16.8879, "step": 38572 }, { "epoch": 0.7050834445317784, "grad_norm": 5.491310310416177, "learning_rate": 2.1128012888842252e-06, "loss": 17.1556, "step": 38573 }, { "epoch": 0.7051017237282249, "grad_norm": 6.515417675185131, "learning_rate": 2.1125596202653063e-06, "loss": 17.3765, "step": 38574 }, { "epoch": 0.7051200029246715, "grad_norm": 6.898497577876452, "learning_rate": 2.1123179617666156e-06, "loss": 17.6692, "step": 38575 }, { "epoch": 0.705138282121118, "grad_norm": 6.153598257309319, "learning_rate": 2.1120763133889978e-06, "loss": 17.2934, "step": 38576 }, { "epoch": 0.7051565613175644, "grad_norm": 6.258857500126036, "learning_rate": 2.1118346751333026e-06, "loss": 17.4763, "step": 38577 }, { "epoch": 0.705174840514011, "grad_norm": 5.6201147751575915, "learning_rate": 2.111593047000377e-06, "loss": 17.3844, "step": 38578 }, { "epoch": 0.7051931197104575, "grad_norm": 6.685972449250595, "learning_rate": 2.1113514289910656e-06, "loss": 17.6825, "step": 38579 }, { "epoch": 0.7052113989069041, "grad_norm": 7.6757718331658165, "learning_rate": 2.111109821106219e-06, "loss": 17.769, "step": 38580 }, { "epoch": 0.7052296781033506, "grad_norm": 6.066782847071308, "learning_rate": 2.11086822334668e-06, "loss": 17.2687, "step": 38581 }, { "epoch": 0.7052479572997971, "grad_norm": 5.314618330443891, "learning_rate": 2.1106266357132986e-06, "loss": 17.1592, "step": 38582 }, { "epoch": 0.7052662364962436, "grad_norm": 5.593780251711407, "learning_rate": 2.1103850582069204e-06, "loss": 17.0707, "step": 38583 }, { "epoch": 0.7052845156926901, "grad_norm": 5.702706871102839, "learning_rate": 2.1101434908283897e-06, "loss": 17.1207, "step": 38584 }, { "epoch": 0.7053027948891367, "grad_norm": 6.004871185250252, "learning_rate": 2.1099019335785574e-06, "loss": 17.5282, "step": 38585 }, { "epoch": 0.7053210740855832, "grad_norm": 6.0914183926185235, "learning_rate": 2.109660386458266e-06, "loss": 17.2642, "step": 38586 }, { "epoch": 0.7053393532820297, "grad_norm": 7.985651389606217, "learning_rate": 2.1094188494683645e-06, "loss": 17.8172, "step": 38587 }, { "epoch": 0.7053576324784763, "grad_norm": 6.464793456878991, "learning_rate": 2.109177322609701e-06, "loss": 17.2289, "step": 38588 }, { "epoch": 0.7053759116749228, "grad_norm": 6.738255533608739, "learning_rate": 2.1089358058831195e-06, "loss": 17.5653, "step": 38589 }, { "epoch": 0.7053941908713693, "grad_norm": 5.219904346030941, "learning_rate": 2.1086942992894655e-06, "loss": 16.9911, "step": 38590 }, { "epoch": 0.7054124700678158, "grad_norm": 4.244238091552582, "learning_rate": 2.1084528028295886e-06, "loss": 16.6952, "step": 38591 }, { "epoch": 0.7054307492642623, "grad_norm": 6.76057904300541, "learning_rate": 2.1082113165043344e-06, "loss": 17.655, "step": 38592 }, { "epoch": 0.7054490284607089, "grad_norm": 6.092780363881036, "learning_rate": 2.1079698403145464e-06, "loss": 17.3265, "step": 38593 }, { "epoch": 0.7054673076571554, "grad_norm": 5.728922579381046, "learning_rate": 2.107728374261075e-06, "loss": 17.406, "step": 38594 }, { "epoch": 0.705485586853602, "grad_norm": 6.193071985539488, "learning_rate": 2.1074869183447627e-06, "loss": 17.4353, "step": 38595 }, { "epoch": 0.7055038660500484, "grad_norm": 8.117249584995223, "learning_rate": 2.107245472566458e-06, "loss": 17.9932, "step": 38596 }, { "epoch": 0.7055221452464949, "grad_norm": 6.640067498245222, "learning_rate": 2.107004036927009e-06, "loss": 17.719, "step": 38597 }, { "epoch": 0.7055404244429415, "grad_norm": 6.013350208456409, "learning_rate": 2.1067626114272577e-06, "loss": 17.5076, "step": 38598 }, { "epoch": 0.705558703639388, "grad_norm": 6.263392329251805, "learning_rate": 2.106521196068054e-06, "loss": 17.2047, "step": 38599 }, { "epoch": 0.7055769828358346, "grad_norm": 7.7445485321988805, "learning_rate": 2.1062797908502426e-06, "loss": 18.0771, "step": 38600 }, { "epoch": 0.7055952620322811, "grad_norm": 5.637356321421194, "learning_rate": 2.106038395774667e-06, "loss": 17.1962, "step": 38601 }, { "epoch": 0.7056135412287275, "grad_norm": 6.274587636847238, "learning_rate": 2.105797010842179e-06, "loss": 17.2235, "step": 38602 }, { "epoch": 0.7056318204251741, "grad_norm": 6.84576323487949, "learning_rate": 2.10555563605362e-06, "loss": 17.8227, "step": 38603 }, { "epoch": 0.7056500996216206, "grad_norm": 7.383905596171514, "learning_rate": 2.105314271409836e-06, "loss": 17.9255, "step": 38604 }, { "epoch": 0.7056683788180672, "grad_norm": 5.435299457635544, "learning_rate": 2.1050729169116742e-06, "loss": 17.1896, "step": 38605 }, { "epoch": 0.7056866580145137, "grad_norm": 7.086346623311973, "learning_rate": 2.1048315725599823e-06, "loss": 17.6616, "step": 38606 }, { "epoch": 0.7057049372109602, "grad_norm": 6.315691817939448, "learning_rate": 2.1045902383556027e-06, "loss": 17.5737, "step": 38607 }, { "epoch": 0.7057232164074068, "grad_norm": 7.967715586337306, "learning_rate": 2.104348914299385e-06, "loss": 17.5076, "step": 38608 }, { "epoch": 0.7057414956038532, "grad_norm": 5.775467456262631, "learning_rate": 2.1041076003921732e-06, "loss": 17.22, "step": 38609 }, { "epoch": 0.7057597748002998, "grad_norm": 6.259068752780453, "learning_rate": 2.103866296634811e-06, "loss": 17.4481, "step": 38610 }, { "epoch": 0.7057780539967463, "grad_norm": 5.863756168145162, "learning_rate": 2.1036250030281484e-06, "loss": 17.1948, "step": 38611 }, { "epoch": 0.7057963331931928, "grad_norm": 6.96682321861762, "learning_rate": 2.1033837195730267e-06, "loss": 17.722, "step": 38612 }, { "epoch": 0.7058146123896394, "grad_norm": 5.144272576792186, "learning_rate": 2.1031424462702953e-06, "loss": 17.0224, "step": 38613 }, { "epoch": 0.7058328915860859, "grad_norm": 6.6555228932386425, "learning_rate": 2.102901183120796e-06, "loss": 17.3446, "step": 38614 }, { "epoch": 0.7058511707825323, "grad_norm": 6.3345991862583, "learning_rate": 2.1026599301253773e-06, "loss": 17.4616, "step": 38615 }, { "epoch": 0.7058694499789789, "grad_norm": 4.84455887947248, "learning_rate": 2.102418687284886e-06, "loss": 16.9396, "step": 38616 }, { "epoch": 0.7058877291754254, "grad_norm": 5.0153981755622254, "learning_rate": 2.102177454600166e-06, "loss": 16.9684, "step": 38617 }, { "epoch": 0.705906008371872, "grad_norm": 11.450931446218254, "learning_rate": 2.1019362320720604e-06, "loss": 19.4456, "step": 38618 }, { "epoch": 0.7059242875683185, "grad_norm": 6.7442982649557335, "learning_rate": 2.1016950197014186e-06, "loss": 17.4934, "step": 38619 }, { "epoch": 0.705942566764765, "grad_norm": 7.829617620266564, "learning_rate": 2.1014538174890842e-06, "loss": 18.3292, "step": 38620 }, { "epoch": 0.7059608459612116, "grad_norm": 6.84351888093682, "learning_rate": 2.1012126254359004e-06, "loss": 17.5807, "step": 38621 }, { "epoch": 0.705979125157658, "grad_norm": 5.038440670126898, "learning_rate": 2.1009714435427175e-06, "loss": 16.8358, "step": 38622 }, { "epoch": 0.7059974043541046, "grad_norm": 5.486181821037201, "learning_rate": 2.100730271810376e-06, "loss": 17.0659, "step": 38623 }, { "epoch": 0.7060156835505511, "grad_norm": 5.8973922145881446, "learning_rate": 2.1004891102397227e-06, "loss": 17.5088, "step": 38624 }, { "epoch": 0.7060339627469976, "grad_norm": 5.260023577413997, "learning_rate": 2.100247958831606e-06, "loss": 17.1569, "step": 38625 }, { "epoch": 0.7060522419434442, "grad_norm": 7.503935437631385, "learning_rate": 2.1000068175868688e-06, "loss": 17.9869, "step": 38626 }, { "epoch": 0.7060705211398907, "grad_norm": 6.899306859094646, "learning_rate": 2.099765686506354e-06, "loss": 17.7117, "step": 38627 }, { "epoch": 0.7060888003363373, "grad_norm": 5.48505556493942, "learning_rate": 2.09952456559091e-06, "loss": 17.2465, "step": 38628 }, { "epoch": 0.7061070795327837, "grad_norm": 6.998584964004675, "learning_rate": 2.0992834548413793e-06, "loss": 17.4755, "step": 38629 }, { "epoch": 0.7061253587292302, "grad_norm": 6.026641759922055, "learning_rate": 2.0990423542586104e-06, "loss": 17.3944, "step": 38630 }, { "epoch": 0.7061436379256768, "grad_norm": 5.859341584872629, "learning_rate": 2.0988012638434442e-06, "loss": 17.1868, "step": 38631 }, { "epoch": 0.7061619171221233, "grad_norm": 4.927303172085948, "learning_rate": 2.0985601835967294e-06, "loss": 16.7566, "step": 38632 }, { "epoch": 0.7061801963185699, "grad_norm": 5.496670942048872, "learning_rate": 2.0983191135193084e-06, "loss": 17.2786, "step": 38633 }, { "epoch": 0.7061984755150164, "grad_norm": 6.3631135445416565, "learning_rate": 2.098078053612028e-06, "loss": 17.4119, "step": 38634 }, { "epoch": 0.7062167547114628, "grad_norm": 6.886727629827751, "learning_rate": 2.097837003875731e-06, "loss": 17.7033, "step": 38635 }, { "epoch": 0.7062350339079094, "grad_norm": 6.191607602379811, "learning_rate": 2.0975959643112654e-06, "loss": 17.4824, "step": 38636 }, { "epoch": 0.7062533131043559, "grad_norm": 5.413607112378037, "learning_rate": 2.097354934919474e-06, "loss": 16.9742, "step": 38637 }, { "epoch": 0.7062715923008025, "grad_norm": 6.515215599388703, "learning_rate": 2.0971139157011993e-06, "loss": 17.7052, "step": 38638 }, { "epoch": 0.706289871497249, "grad_norm": 6.111627949279324, "learning_rate": 2.0968729066572908e-06, "loss": 17.1034, "step": 38639 }, { "epoch": 0.7063081506936955, "grad_norm": 5.722000238921173, "learning_rate": 2.0966319077885887e-06, "loss": 17.2745, "step": 38640 }, { "epoch": 0.706326429890142, "grad_norm": 6.279882620045752, "learning_rate": 2.0963909190959398e-06, "loss": 17.5641, "step": 38641 }, { "epoch": 0.7063447090865885, "grad_norm": 10.018837214254198, "learning_rate": 2.0961499405801905e-06, "loss": 17.2062, "step": 38642 }, { "epoch": 0.7063629882830351, "grad_norm": 5.2172479919183825, "learning_rate": 2.095908972242182e-06, "loss": 16.9772, "step": 38643 }, { "epoch": 0.7063812674794816, "grad_norm": 6.182394614331504, "learning_rate": 2.095668014082763e-06, "loss": 17.3922, "step": 38644 }, { "epoch": 0.7063995466759281, "grad_norm": 7.035355257585746, "learning_rate": 2.0954270661027748e-06, "loss": 17.7496, "step": 38645 }, { "epoch": 0.7064178258723747, "grad_norm": 7.129666439032697, "learning_rate": 2.095186128303061e-06, "loss": 17.2833, "step": 38646 }, { "epoch": 0.7064361050688212, "grad_norm": 5.919784208873416, "learning_rate": 2.0949452006844697e-06, "loss": 16.9388, "step": 38647 }, { "epoch": 0.7064543842652677, "grad_norm": 5.816014337670192, "learning_rate": 2.094704283247843e-06, "loss": 17.0357, "step": 38648 }, { "epoch": 0.7064726634617142, "grad_norm": 9.167096434126442, "learning_rate": 2.0944633759940248e-06, "loss": 18.4262, "step": 38649 }, { "epoch": 0.7064909426581607, "grad_norm": 6.601146067484398, "learning_rate": 2.0942224789238595e-06, "loss": 17.6784, "step": 38650 }, { "epoch": 0.7065092218546073, "grad_norm": 5.348717387270646, "learning_rate": 2.0939815920381943e-06, "loss": 16.8947, "step": 38651 }, { "epoch": 0.7065275010510538, "grad_norm": 7.084851404133879, "learning_rate": 2.0937407153378695e-06, "loss": 17.4996, "step": 38652 }, { "epoch": 0.7065457802475004, "grad_norm": 7.8472537837249465, "learning_rate": 2.0934998488237334e-06, "loss": 18.4293, "step": 38653 }, { "epoch": 0.7065640594439468, "grad_norm": 7.502735994090757, "learning_rate": 2.093258992496628e-06, "loss": 17.6071, "step": 38654 }, { "epoch": 0.7065823386403933, "grad_norm": 6.6706994659695775, "learning_rate": 2.0930181463573955e-06, "loss": 17.5121, "step": 38655 }, { "epoch": 0.7066006178368399, "grad_norm": 6.613436633441981, "learning_rate": 2.0927773104068843e-06, "loss": 17.5364, "step": 38656 }, { "epoch": 0.7066188970332864, "grad_norm": 6.335354645435642, "learning_rate": 2.092536484645936e-06, "loss": 17.3237, "step": 38657 }, { "epoch": 0.706637176229733, "grad_norm": 6.643937147431686, "learning_rate": 2.0922956690753937e-06, "loss": 17.5485, "step": 38658 }, { "epoch": 0.7066554554261795, "grad_norm": 6.5926886693415465, "learning_rate": 2.0920548636961024e-06, "loss": 17.5427, "step": 38659 }, { "epoch": 0.706673734622626, "grad_norm": 7.448115477188497, "learning_rate": 2.0918140685089065e-06, "loss": 17.8998, "step": 38660 }, { "epoch": 0.7066920138190725, "grad_norm": 6.6574644680729556, "learning_rate": 2.0915732835146523e-06, "loss": 17.3761, "step": 38661 }, { "epoch": 0.706710293015519, "grad_norm": 6.791578674888686, "learning_rate": 2.0913325087141805e-06, "loss": 17.8253, "step": 38662 }, { "epoch": 0.7067285722119656, "grad_norm": 5.687849908262371, "learning_rate": 2.0910917441083346e-06, "loss": 17.0619, "step": 38663 }, { "epoch": 0.7067468514084121, "grad_norm": 5.429952503386073, "learning_rate": 2.0908509896979617e-06, "loss": 17.0889, "step": 38664 }, { "epoch": 0.7067651306048586, "grad_norm": 5.188858026079787, "learning_rate": 2.090610245483903e-06, "loss": 16.899, "step": 38665 }, { "epoch": 0.7067834098013052, "grad_norm": 6.9985662680202605, "learning_rate": 2.0903695114670014e-06, "loss": 18.2104, "step": 38666 }, { "epoch": 0.7068016889977516, "grad_norm": 6.341129024720158, "learning_rate": 2.090128787648104e-06, "loss": 17.3897, "step": 38667 }, { "epoch": 0.7068199681941982, "grad_norm": 5.53558551243747, "learning_rate": 2.089888074028051e-06, "loss": 17.0465, "step": 38668 }, { "epoch": 0.7068382473906447, "grad_norm": 5.3305491947930905, "learning_rate": 2.0896473706076877e-06, "loss": 16.9722, "step": 38669 }, { "epoch": 0.7068565265870912, "grad_norm": 6.706157351086141, "learning_rate": 2.0894066773878596e-06, "loss": 17.3853, "step": 38670 }, { "epoch": 0.7068748057835378, "grad_norm": 5.943928452781302, "learning_rate": 2.089165994369408e-06, "loss": 17.1613, "step": 38671 }, { "epoch": 0.7068930849799843, "grad_norm": 6.7111479792398665, "learning_rate": 2.0889253215531763e-06, "loss": 17.6613, "step": 38672 }, { "epoch": 0.7069113641764309, "grad_norm": 6.050454959696402, "learning_rate": 2.0886846589400097e-06, "loss": 17.2179, "step": 38673 }, { "epoch": 0.7069296433728773, "grad_norm": 6.984940498692039, "learning_rate": 2.088444006530749e-06, "loss": 17.2511, "step": 38674 }, { "epoch": 0.7069479225693238, "grad_norm": 5.99442649504914, "learning_rate": 2.0882033643262416e-06, "loss": 17.1531, "step": 38675 }, { "epoch": 0.7069662017657704, "grad_norm": 6.279712319460076, "learning_rate": 2.0879627323273283e-06, "loss": 17.1998, "step": 38676 }, { "epoch": 0.7069844809622169, "grad_norm": 5.785089439135392, "learning_rate": 2.0877221105348516e-06, "loss": 16.9542, "step": 38677 }, { "epoch": 0.7070027601586635, "grad_norm": 6.3302727842598605, "learning_rate": 2.087481498949656e-06, "loss": 17.3822, "step": 38678 }, { "epoch": 0.70702103935511, "grad_norm": 6.110543282473088, "learning_rate": 2.0872408975725868e-06, "loss": 17.3697, "step": 38679 }, { "epoch": 0.7070393185515564, "grad_norm": 6.544868578089504, "learning_rate": 2.0870003064044838e-06, "loss": 17.173, "step": 38680 }, { "epoch": 0.707057597748003, "grad_norm": 6.99680981367712, "learning_rate": 2.086759725446193e-06, "loss": 17.7316, "step": 38681 }, { "epoch": 0.7070758769444495, "grad_norm": 6.7509202114539235, "learning_rate": 2.0865191546985575e-06, "loss": 17.4881, "step": 38682 }, { "epoch": 0.707094156140896, "grad_norm": 6.80935451620577, "learning_rate": 2.0862785941624176e-06, "loss": 17.5826, "step": 38683 }, { "epoch": 0.7071124353373426, "grad_norm": 5.975368806089559, "learning_rate": 2.0860380438386194e-06, "loss": 17.2542, "step": 38684 }, { "epoch": 0.7071307145337891, "grad_norm": 5.969874920630283, "learning_rate": 2.085797503728006e-06, "loss": 17.0785, "step": 38685 }, { "epoch": 0.7071489937302357, "grad_norm": 6.47325579190179, "learning_rate": 2.0855569738314167e-06, "loss": 17.6088, "step": 38686 }, { "epoch": 0.7071672729266821, "grad_norm": 8.127070661290132, "learning_rate": 2.0853164541496986e-06, "loss": 18.1706, "step": 38687 }, { "epoch": 0.7071855521231286, "grad_norm": 5.969601957883826, "learning_rate": 2.085075944683694e-06, "loss": 17.4565, "step": 38688 }, { "epoch": 0.7072038313195752, "grad_norm": 8.76088941168175, "learning_rate": 2.084835445434244e-06, "loss": 17.8454, "step": 38689 }, { "epoch": 0.7072221105160217, "grad_norm": 5.905075580930704, "learning_rate": 2.0845949564021944e-06, "loss": 17.1979, "step": 38690 }, { "epoch": 0.7072403897124683, "grad_norm": 6.786603839062556, "learning_rate": 2.084354477588385e-06, "loss": 17.5937, "step": 38691 }, { "epoch": 0.7072586689089148, "grad_norm": 6.5611046182144515, "learning_rate": 2.0841140089936616e-06, "loss": 17.5258, "step": 38692 }, { "epoch": 0.7072769481053612, "grad_norm": 6.4729733245913765, "learning_rate": 2.0838735506188656e-06, "loss": 17.4077, "step": 38693 }, { "epoch": 0.7072952273018078, "grad_norm": 5.83082071568126, "learning_rate": 2.083633102464838e-06, "loss": 17.0285, "step": 38694 }, { "epoch": 0.7073135064982543, "grad_norm": 7.356059874567766, "learning_rate": 2.0833926645324236e-06, "loss": 18.1113, "step": 38695 }, { "epoch": 0.7073317856947009, "grad_norm": 7.168871414457402, "learning_rate": 2.0831522368224662e-06, "loss": 17.5294, "step": 38696 }, { "epoch": 0.7073500648911474, "grad_norm": 6.447260367057043, "learning_rate": 2.0829118193358056e-06, "loss": 17.5152, "step": 38697 }, { "epoch": 0.7073683440875939, "grad_norm": 5.367009164334911, "learning_rate": 2.082671412073288e-06, "loss": 17.0077, "step": 38698 }, { "epoch": 0.7073866232840404, "grad_norm": 6.150023626738038, "learning_rate": 2.082431015035753e-06, "loss": 17.5447, "step": 38699 }, { "epoch": 0.7074049024804869, "grad_norm": 5.647908995632645, "learning_rate": 2.082190628224043e-06, "loss": 17.0072, "step": 38700 }, { "epoch": 0.7074231816769335, "grad_norm": 5.383225173194889, "learning_rate": 2.081950251639003e-06, "loss": 17.1974, "step": 38701 }, { "epoch": 0.70744146087338, "grad_norm": 5.557254181381731, "learning_rate": 2.081709885281475e-06, "loss": 17.0861, "step": 38702 }, { "epoch": 0.7074597400698265, "grad_norm": 6.2130973084435315, "learning_rate": 2.0814695291522984e-06, "loss": 17.1171, "step": 38703 }, { "epoch": 0.7074780192662731, "grad_norm": 5.893680855613288, "learning_rate": 2.0812291832523173e-06, "loss": 17.3486, "step": 38704 }, { "epoch": 0.7074962984627196, "grad_norm": 7.236818109095703, "learning_rate": 2.0809888475823753e-06, "loss": 17.9262, "step": 38705 }, { "epoch": 0.7075145776591661, "grad_norm": 6.470499793637507, "learning_rate": 2.080748522143316e-06, "loss": 17.5086, "step": 38706 }, { "epoch": 0.7075328568556126, "grad_norm": 8.254599535148174, "learning_rate": 2.0805082069359796e-06, "loss": 17.7255, "step": 38707 }, { "epoch": 0.7075511360520591, "grad_norm": 5.988712486551744, "learning_rate": 2.0802679019612064e-06, "loss": 17.0725, "step": 38708 }, { "epoch": 0.7075694152485057, "grad_norm": 6.507828377826275, "learning_rate": 2.0800276072198426e-06, "loss": 17.5259, "step": 38709 }, { "epoch": 0.7075876944449522, "grad_norm": 4.813841848246007, "learning_rate": 2.079787322712729e-06, "loss": 16.8912, "step": 38710 }, { "epoch": 0.7076059736413988, "grad_norm": 6.300216958218494, "learning_rate": 2.079547048440706e-06, "loss": 17.5732, "step": 38711 }, { "epoch": 0.7076242528378452, "grad_norm": 7.126334484792214, "learning_rate": 2.0793067844046185e-06, "loss": 17.5159, "step": 38712 }, { "epoch": 0.7076425320342917, "grad_norm": 7.113919062080348, "learning_rate": 2.0790665306053055e-06, "loss": 17.5945, "step": 38713 }, { "epoch": 0.7076608112307383, "grad_norm": 6.721067263988554, "learning_rate": 2.078826287043611e-06, "loss": 17.454, "step": 38714 }, { "epoch": 0.7076790904271848, "grad_norm": 5.042073247273171, "learning_rate": 2.0785860537203782e-06, "loss": 17.1122, "step": 38715 }, { "epoch": 0.7076973696236314, "grad_norm": 6.233554685094699, "learning_rate": 2.0783458306364484e-06, "loss": 17.5083, "step": 38716 }, { "epoch": 0.7077156488200779, "grad_norm": 8.661305195960441, "learning_rate": 2.0781056177926606e-06, "loss": 18.371, "step": 38717 }, { "epoch": 0.7077339280165243, "grad_norm": 6.372463603324955, "learning_rate": 2.0778654151898612e-06, "loss": 17.2504, "step": 38718 }, { "epoch": 0.7077522072129709, "grad_norm": 7.358571433848287, "learning_rate": 2.07762522282889e-06, "loss": 17.6815, "step": 38719 }, { "epoch": 0.7077704864094174, "grad_norm": 6.866989153915572, "learning_rate": 2.0773850407105867e-06, "loss": 17.3817, "step": 38720 }, { "epoch": 0.707788765605864, "grad_norm": 7.477102952045076, "learning_rate": 2.0771448688357976e-06, "loss": 17.7011, "step": 38721 }, { "epoch": 0.7078070448023105, "grad_norm": 6.007331680201189, "learning_rate": 2.07690470720536e-06, "loss": 17.2417, "step": 38722 }, { "epoch": 0.707825323998757, "grad_norm": 6.916670838194399, "learning_rate": 2.0766645558201174e-06, "loss": 17.5053, "step": 38723 }, { "epoch": 0.7078436031952036, "grad_norm": 6.2230717755086555, "learning_rate": 2.0764244146809137e-06, "loss": 17.2697, "step": 38724 }, { "epoch": 0.70786188239165, "grad_norm": 6.715548544463246, "learning_rate": 2.0761842837885872e-06, "loss": 17.9224, "step": 38725 }, { "epoch": 0.7078801615880966, "grad_norm": 6.10853485450392, "learning_rate": 2.075944163143983e-06, "loss": 17.4345, "step": 38726 }, { "epoch": 0.7078984407845431, "grad_norm": 5.292328587173526, "learning_rate": 2.0757040527479404e-06, "loss": 16.9403, "step": 38727 }, { "epoch": 0.7079167199809896, "grad_norm": 5.486880918383909, "learning_rate": 2.0754639526012998e-06, "loss": 17.4456, "step": 38728 }, { "epoch": 0.7079349991774362, "grad_norm": 6.335760130232979, "learning_rate": 2.075223862704906e-06, "loss": 17.3018, "step": 38729 }, { "epoch": 0.7079532783738827, "grad_norm": 5.287420396975936, "learning_rate": 2.0749837830595987e-06, "loss": 17.1271, "step": 38730 }, { "epoch": 0.7079715575703293, "grad_norm": 6.646535350098257, "learning_rate": 2.074743713666218e-06, "loss": 17.3358, "step": 38731 }, { "epoch": 0.7079898367667757, "grad_norm": 8.168034376413127, "learning_rate": 2.0745036545256057e-06, "loss": 18.4523, "step": 38732 }, { "epoch": 0.7080081159632222, "grad_norm": 5.558617145233762, "learning_rate": 2.074263605638607e-06, "loss": 17.021, "step": 38733 }, { "epoch": 0.7080263951596688, "grad_norm": 6.297744663169276, "learning_rate": 2.074023567006059e-06, "loss": 17.6112, "step": 38734 }, { "epoch": 0.7080446743561153, "grad_norm": 5.2702843119215075, "learning_rate": 2.0737835386288052e-06, "loss": 16.8467, "step": 38735 }, { "epoch": 0.7080629535525619, "grad_norm": 5.159928783448195, "learning_rate": 2.073543520507685e-06, "loss": 17.0583, "step": 38736 }, { "epoch": 0.7080812327490084, "grad_norm": 6.158899178329932, "learning_rate": 2.0733035126435423e-06, "loss": 17.3627, "step": 38737 }, { "epoch": 0.7080995119454548, "grad_norm": 6.276826444713201, "learning_rate": 2.073063515037217e-06, "loss": 17.4564, "step": 38738 }, { "epoch": 0.7081177911419014, "grad_norm": 6.323261456282502, "learning_rate": 2.0728235276895485e-06, "loss": 17.6052, "step": 38739 }, { "epoch": 0.7081360703383479, "grad_norm": 5.084057288988844, "learning_rate": 2.072583550601381e-06, "loss": 16.968, "step": 38740 }, { "epoch": 0.7081543495347945, "grad_norm": 5.707614300556209, "learning_rate": 2.0723435837735523e-06, "loss": 17.0703, "step": 38741 }, { "epoch": 0.708172628731241, "grad_norm": 6.730932919015305, "learning_rate": 2.0721036272069058e-06, "loss": 17.2048, "step": 38742 }, { "epoch": 0.7081909079276875, "grad_norm": 5.809994810673065, "learning_rate": 2.0718636809022836e-06, "loss": 17.2927, "step": 38743 }, { "epoch": 0.708209187124134, "grad_norm": 6.373161600284213, "learning_rate": 2.071623744860525e-06, "loss": 17.1078, "step": 38744 }, { "epoch": 0.7082274663205805, "grad_norm": 4.787419993610538, "learning_rate": 2.071383819082469e-06, "loss": 16.7221, "step": 38745 }, { "epoch": 0.7082457455170271, "grad_norm": 5.0132341910168075, "learning_rate": 2.0711439035689606e-06, "loss": 16.8458, "step": 38746 }, { "epoch": 0.7082640247134736, "grad_norm": 5.433636946025369, "learning_rate": 2.0709039983208385e-06, "loss": 17.2633, "step": 38747 }, { "epoch": 0.7082823039099201, "grad_norm": 5.749069917605483, "learning_rate": 2.0706641033389418e-06, "loss": 17.1973, "step": 38748 }, { "epoch": 0.7083005831063667, "grad_norm": 5.677917614349031, "learning_rate": 2.070424218624116e-06, "loss": 17.3489, "step": 38749 }, { "epoch": 0.7083188623028132, "grad_norm": 4.647844450124323, "learning_rate": 2.0701843441771964e-06, "loss": 16.7917, "step": 38750 }, { "epoch": 0.7083371414992596, "grad_norm": 7.362200811028718, "learning_rate": 2.0699444799990266e-06, "loss": 17.9932, "step": 38751 }, { "epoch": 0.7083554206957062, "grad_norm": 5.6898019093696535, "learning_rate": 2.0697046260904486e-06, "loss": 17.1326, "step": 38752 }, { "epoch": 0.7083736998921527, "grad_norm": 6.66814612785758, "learning_rate": 2.0694647824523005e-06, "loss": 17.611, "step": 38753 }, { "epoch": 0.7083919790885993, "grad_norm": 7.568678957490066, "learning_rate": 2.0692249490854256e-06, "loss": 17.8173, "step": 38754 }, { "epoch": 0.7084102582850458, "grad_norm": 6.044260850741777, "learning_rate": 2.0689851259906624e-06, "loss": 17.1975, "step": 38755 }, { "epoch": 0.7084285374814923, "grad_norm": 5.931342125037955, "learning_rate": 2.0687453131688507e-06, "loss": 17.4747, "step": 38756 }, { "epoch": 0.7084468166779389, "grad_norm": 5.092895272836637, "learning_rate": 2.068505510620834e-06, "loss": 16.8334, "step": 38757 }, { "epoch": 0.7084650958743853, "grad_norm": 7.710304185503706, "learning_rate": 2.0682657183474496e-06, "loss": 18.0654, "step": 38758 }, { "epoch": 0.7084833750708319, "grad_norm": 7.416997646494879, "learning_rate": 2.068025936349541e-06, "loss": 18.1839, "step": 38759 }, { "epoch": 0.7085016542672784, "grad_norm": 6.409963504242467, "learning_rate": 2.067786164627945e-06, "loss": 17.2142, "step": 38760 }, { "epoch": 0.7085199334637249, "grad_norm": 5.369256164879643, "learning_rate": 2.0675464031835063e-06, "loss": 16.8768, "step": 38761 }, { "epoch": 0.7085382126601715, "grad_norm": 6.383093934712349, "learning_rate": 2.067306652017061e-06, "loss": 17.3532, "step": 38762 }, { "epoch": 0.708556491856618, "grad_norm": 6.537068942580779, "learning_rate": 2.0670669111294533e-06, "loss": 17.4017, "step": 38763 }, { "epoch": 0.7085747710530645, "grad_norm": 6.068711855894709, "learning_rate": 2.0668271805215218e-06, "loss": 17.2475, "step": 38764 }, { "epoch": 0.708593050249511, "grad_norm": 5.2468609367738805, "learning_rate": 2.066587460194104e-06, "loss": 17.105, "step": 38765 }, { "epoch": 0.7086113294459575, "grad_norm": 5.971924426095749, "learning_rate": 2.066347750148045e-06, "loss": 17.393, "step": 38766 }, { "epoch": 0.7086296086424041, "grad_norm": 5.883485382783845, "learning_rate": 2.06610805038418e-06, "loss": 17.2327, "step": 38767 }, { "epoch": 0.7086478878388506, "grad_norm": 5.8527025868274976, "learning_rate": 2.0658683609033526e-06, "loss": 17.3147, "step": 38768 }, { "epoch": 0.7086661670352972, "grad_norm": 6.472527021160998, "learning_rate": 2.065628681706404e-06, "loss": 17.5726, "step": 38769 }, { "epoch": 0.7086844462317436, "grad_norm": 6.786685451079195, "learning_rate": 2.0653890127941693e-06, "loss": 17.6633, "step": 38770 }, { "epoch": 0.7087027254281901, "grad_norm": 6.740928887824027, "learning_rate": 2.065149354167494e-06, "loss": 17.808, "step": 38771 }, { "epoch": 0.7087210046246367, "grad_norm": 8.018153181373608, "learning_rate": 2.0649097058272148e-06, "loss": 17.6567, "step": 38772 }, { "epoch": 0.7087392838210832, "grad_norm": 5.459023020871336, "learning_rate": 2.0646700677741714e-06, "loss": 16.9366, "step": 38773 }, { "epoch": 0.7087575630175298, "grad_norm": 6.291833750157866, "learning_rate": 2.0644304400092064e-06, "loss": 17.2015, "step": 38774 }, { "epoch": 0.7087758422139763, "grad_norm": 6.4178798815145885, "learning_rate": 2.064190822533158e-06, "loss": 17.6061, "step": 38775 }, { "epoch": 0.7087941214104228, "grad_norm": 6.525636077618259, "learning_rate": 2.063951215346864e-06, "loss": 17.3295, "step": 38776 }, { "epoch": 0.7088124006068693, "grad_norm": 7.211283832272183, "learning_rate": 2.0637116184511664e-06, "loss": 17.5379, "step": 38777 }, { "epoch": 0.7088306798033158, "grad_norm": 8.303359165978359, "learning_rate": 2.063472031846906e-06, "loss": 17.9526, "step": 38778 }, { "epoch": 0.7088489589997624, "grad_norm": 6.702812936723311, "learning_rate": 2.0632324555349196e-06, "loss": 17.3885, "step": 38779 }, { "epoch": 0.7088672381962089, "grad_norm": 6.22202979809178, "learning_rate": 2.0629928895160514e-06, "loss": 17.27, "step": 38780 }, { "epoch": 0.7088855173926554, "grad_norm": 8.915390723283231, "learning_rate": 2.0627533337911355e-06, "loss": 18.2049, "step": 38781 }, { "epoch": 0.708903796589102, "grad_norm": 5.667072283455876, "learning_rate": 2.0625137883610162e-06, "loss": 16.9238, "step": 38782 }, { "epoch": 0.7089220757855484, "grad_norm": 6.443330946770979, "learning_rate": 2.0622742532265316e-06, "loss": 17.477, "step": 38783 }, { "epoch": 0.708940354981995, "grad_norm": 6.878046301499312, "learning_rate": 2.0620347283885184e-06, "loss": 17.5831, "step": 38784 }, { "epoch": 0.7089586341784415, "grad_norm": 7.086789902479556, "learning_rate": 2.061795213847821e-06, "loss": 17.93, "step": 38785 }, { "epoch": 0.708976913374888, "grad_norm": 6.147303105246317, "learning_rate": 2.061555709605274e-06, "loss": 17.6629, "step": 38786 }, { "epoch": 0.7089951925713346, "grad_norm": 7.283681615780965, "learning_rate": 2.06131621566172e-06, "loss": 17.7003, "step": 38787 }, { "epoch": 0.7090134717677811, "grad_norm": 7.332083450452433, "learning_rate": 2.0610767320179985e-06, "loss": 17.7645, "step": 38788 }, { "epoch": 0.7090317509642277, "grad_norm": 5.523596336812172, "learning_rate": 2.0608372586749485e-06, "loss": 17.2896, "step": 38789 }, { "epoch": 0.7090500301606741, "grad_norm": 8.881105754231143, "learning_rate": 2.0605977956334074e-06, "loss": 18.6572, "step": 38790 }, { "epoch": 0.7090683093571206, "grad_norm": 6.6440111969346844, "learning_rate": 2.0603583428942174e-06, "loss": 17.7486, "step": 38791 }, { "epoch": 0.7090865885535672, "grad_norm": 5.375558994556046, "learning_rate": 2.060118900458216e-06, "loss": 17.0409, "step": 38792 }, { "epoch": 0.7091048677500137, "grad_norm": 6.617677205815287, "learning_rate": 2.059879468326242e-06, "loss": 17.495, "step": 38793 }, { "epoch": 0.7091231469464603, "grad_norm": 7.7867977573827245, "learning_rate": 2.059640046499136e-06, "loss": 17.6689, "step": 38794 }, { "epoch": 0.7091414261429068, "grad_norm": 7.612040962435869, "learning_rate": 2.0594006349777353e-06, "loss": 17.4783, "step": 38795 }, { "epoch": 0.7091597053393532, "grad_norm": 5.713500873875574, "learning_rate": 2.05916123376288e-06, "loss": 17.0465, "step": 38796 }, { "epoch": 0.7091779845357998, "grad_norm": 6.848343628746849, "learning_rate": 2.0589218428554107e-06, "loss": 17.4895, "step": 38797 }, { "epoch": 0.7091962637322463, "grad_norm": 5.889247695071682, "learning_rate": 2.058682462256164e-06, "loss": 17.3227, "step": 38798 }, { "epoch": 0.7092145429286929, "grad_norm": 5.3667761313489235, "learning_rate": 2.058443091965982e-06, "loss": 16.8643, "step": 38799 }, { "epoch": 0.7092328221251394, "grad_norm": 4.8298209649319706, "learning_rate": 2.0582037319857013e-06, "loss": 17.0562, "step": 38800 }, { "epoch": 0.7092511013215859, "grad_norm": 7.358015392121897, "learning_rate": 2.0579643823161595e-06, "loss": 17.8836, "step": 38801 }, { "epoch": 0.7092693805180325, "grad_norm": 5.8008963621137255, "learning_rate": 2.057725042958199e-06, "loss": 17.3004, "step": 38802 }, { "epoch": 0.7092876597144789, "grad_norm": 6.596342153495868, "learning_rate": 2.0574857139126568e-06, "loss": 17.4302, "step": 38803 }, { "epoch": 0.7093059389109255, "grad_norm": 6.778442041497625, "learning_rate": 2.057246395180371e-06, "loss": 16.9925, "step": 38804 }, { "epoch": 0.709324218107372, "grad_norm": 5.289735898073668, "learning_rate": 2.0570070867621798e-06, "loss": 17.0641, "step": 38805 }, { "epoch": 0.7093424973038185, "grad_norm": 5.993572804147036, "learning_rate": 2.056767788658926e-06, "loss": 17.3143, "step": 38806 }, { "epoch": 0.7093607765002651, "grad_norm": 5.9929834055843765, "learning_rate": 2.0565285008714438e-06, "loss": 17.553, "step": 38807 }, { "epoch": 0.7093790556967116, "grad_norm": 4.7688348718882265, "learning_rate": 2.056289223400575e-06, "loss": 16.7361, "step": 38808 }, { "epoch": 0.7093973348931581, "grad_norm": 6.654388403125327, "learning_rate": 2.0560499562471577e-06, "loss": 17.6649, "step": 38809 }, { "epoch": 0.7094156140896046, "grad_norm": 5.856281070546405, "learning_rate": 2.055810699412028e-06, "loss": 17.4185, "step": 38810 }, { "epoch": 0.7094338932860511, "grad_norm": 6.278987796939483, "learning_rate": 2.055571452896028e-06, "loss": 17.4789, "step": 38811 }, { "epoch": 0.7094521724824977, "grad_norm": 6.6215462564354555, "learning_rate": 2.0553322166999923e-06, "loss": 17.5654, "step": 38812 }, { "epoch": 0.7094704516789442, "grad_norm": 5.646970763028556, "learning_rate": 2.0550929908247642e-06, "loss": 17.2881, "step": 38813 }, { "epoch": 0.7094887308753908, "grad_norm": 6.170985680938178, "learning_rate": 2.0548537752711773e-06, "loss": 17.2835, "step": 38814 }, { "epoch": 0.7095070100718373, "grad_norm": 7.062323514103843, "learning_rate": 2.0546145700400726e-06, "loss": 17.8358, "step": 38815 }, { "epoch": 0.7095252892682837, "grad_norm": 5.814197165765048, "learning_rate": 2.05437537513229e-06, "loss": 17.2255, "step": 38816 }, { "epoch": 0.7095435684647303, "grad_norm": 5.872976789088187, "learning_rate": 2.054136190548666e-06, "loss": 17.1463, "step": 38817 }, { "epoch": 0.7095618476611768, "grad_norm": 5.7987565365785185, "learning_rate": 2.0538970162900373e-06, "loss": 17.3077, "step": 38818 }, { "epoch": 0.7095801268576233, "grad_norm": 5.252461951040851, "learning_rate": 2.053657852357246e-06, "loss": 16.8819, "step": 38819 }, { "epoch": 0.7095984060540699, "grad_norm": 4.935906654309043, "learning_rate": 2.053418698751128e-06, "loss": 16.925, "step": 38820 }, { "epoch": 0.7096166852505164, "grad_norm": 6.666138813125509, "learning_rate": 2.0531795554725196e-06, "loss": 17.6006, "step": 38821 }, { "epoch": 0.709634964446963, "grad_norm": 7.020573324096887, "learning_rate": 2.0529404225222616e-06, "loss": 17.5251, "step": 38822 }, { "epoch": 0.7096532436434094, "grad_norm": 6.440916248791657, "learning_rate": 2.052701299901193e-06, "loss": 17.3136, "step": 38823 }, { "epoch": 0.7096715228398559, "grad_norm": 6.4359623879495516, "learning_rate": 2.052462187610149e-06, "loss": 17.4922, "step": 38824 }, { "epoch": 0.7096898020363025, "grad_norm": 5.548040462875022, "learning_rate": 2.0522230856499714e-06, "loss": 17.0994, "step": 38825 }, { "epoch": 0.709708081232749, "grad_norm": 5.8652578386276, "learning_rate": 2.0519839940214958e-06, "loss": 17.2357, "step": 38826 }, { "epoch": 0.7097263604291956, "grad_norm": 6.5418927452271705, "learning_rate": 2.051744912725559e-06, "loss": 17.3573, "step": 38827 }, { "epoch": 0.709744639625642, "grad_norm": 6.303428210665927, "learning_rate": 2.051505841763002e-06, "loss": 17.4144, "step": 38828 }, { "epoch": 0.7097629188220885, "grad_norm": 5.431390207226144, "learning_rate": 2.0512667811346594e-06, "loss": 17.0535, "step": 38829 }, { "epoch": 0.7097811980185351, "grad_norm": 6.173184080337548, "learning_rate": 2.051027730841373e-06, "loss": 17.2547, "step": 38830 }, { "epoch": 0.7097994772149816, "grad_norm": 6.265773413348314, "learning_rate": 2.0507886908839756e-06, "loss": 17.2558, "step": 38831 }, { "epoch": 0.7098177564114282, "grad_norm": 6.842971997795209, "learning_rate": 2.0505496612633092e-06, "loss": 17.656, "step": 38832 }, { "epoch": 0.7098360356078747, "grad_norm": 6.069955386344714, "learning_rate": 2.0503106419802116e-06, "loss": 17.3459, "step": 38833 }, { "epoch": 0.7098543148043212, "grad_norm": 7.391918340710138, "learning_rate": 2.050071633035519e-06, "loss": 17.9283, "step": 38834 }, { "epoch": 0.7098725940007677, "grad_norm": 6.149426285172932, "learning_rate": 2.0498326344300677e-06, "loss": 17.5084, "step": 38835 }, { "epoch": 0.7098908731972142, "grad_norm": 19.746012503323126, "learning_rate": 2.0495936461646986e-06, "loss": 18.142, "step": 38836 }, { "epoch": 0.7099091523936608, "grad_norm": 8.05921424264698, "learning_rate": 2.0493546682402483e-06, "loss": 18.1284, "step": 38837 }, { "epoch": 0.7099274315901073, "grad_norm": 7.598074366907096, "learning_rate": 2.049115700657552e-06, "loss": 18.0875, "step": 38838 }, { "epoch": 0.7099457107865538, "grad_norm": 5.620309439499277, "learning_rate": 2.0488767434174505e-06, "loss": 17.1918, "step": 38839 }, { "epoch": 0.7099639899830004, "grad_norm": 5.6410590882639005, "learning_rate": 2.0486377965207784e-06, "loss": 17.1105, "step": 38840 }, { "epoch": 0.7099822691794468, "grad_norm": 5.074584907531304, "learning_rate": 2.048398859968374e-06, "loss": 16.9732, "step": 38841 }, { "epoch": 0.7100005483758934, "grad_norm": 6.338517558322831, "learning_rate": 2.048159933761078e-06, "loss": 17.3938, "step": 38842 }, { "epoch": 0.7100188275723399, "grad_norm": 6.655484758030725, "learning_rate": 2.0479210178997233e-06, "loss": 17.5536, "step": 38843 }, { "epoch": 0.7100371067687864, "grad_norm": 6.208227109394483, "learning_rate": 2.0476821123851516e-06, "loss": 17.5385, "step": 38844 }, { "epoch": 0.710055385965233, "grad_norm": 5.908782942385943, "learning_rate": 2.047443217218197e-06, "loss": 17.2873, "step": 38845 }, { "epoch": 0.7100736651616795, "grad_norm": 5.279021598777277, "learning_rate": 2.0472043323996963e-06, "loss": 17.0095, "step": 38846 }, { "epoch": 0.7100919443581261, "grad_norm": 5.973241643427466, "learning_rate": 2.04696545793049e-06, "loss": 17.3389, "step": 38847 }, { "epoch": 0.7101102235545725, "grad_norm": 6.572374691988657, "learning_rate": 2.0467265938114136e-06, "loss": 17.3714, "step": 38848 }, { "epoch": 0.710128502751019, "grad_norm": 5.554813569043081, "learning_rate": 2.0464877400433027e-06, "loss": 17.213, "step": 38849 }, { "epoch": 0.7101467819474656, "grad_norm": 6.341026660353584, "learning_rate": 2.046248896626995e-06, "loss": 17.4458, "step": 38850 }, { "epoch": 0.7101650611439121, "grad_norm": 6.246878972103476, "learning_rate": 2.0460100635633313e-06, "loss": 17.4436, "step": 38851 }, { "epoch": 0.7101833403403587, "grad_norm": 7.390135091952889, "learning_rate": 2.0457712408531437e-06, "loss": 18.1459, "step": 38852 }, { "epoch": 0.7102016195368052, "grad_norm": 5.9208750339653085, "learning_rate": 2.045532428497274e-06, "loss": 16.9627, "step": 38853 }, { "epoch": 0.7102198987332516, "grad_norm": 7.51821668768606, "learning_rate": 2.0452936264965562e-06, "loss": 17.8775, "step": 38854 }, { "epoch": 0.7102381779296982, "grad_norm": 6.663006727931746, "learning_rate": 2.045054834851826e-06, "loss": 17.8182, "step": 38855 }, { "epoch": 0.7102564571261447, "grad_norm": 5.660698158101064, "learning_rate": 2.044816053563924e-06, "loss": 17.1583, "step": 38856 }, { "epoch": 0.7102747363225913, "grad_norm": 7.205931790903028, "learning_rate": 2.0445772826336855e-06, "loss": 17.6062, "step": 38857 }, { "epoch": 0.7102930155190378, "grad_norm": 7.203566904775699, "learning_rate": 2.0443385220619455e-06, "loss": 17.1481, "step": 38858 }, { "epoch": 0.7103112947154843, "grad_norm": 5.810452295066209, "learning_rate": 2.044099771849542e-06, "loss": 17.1612, "step": 38859 }, { "epoch": 0.7103295739119309, "grad_norm": 7.501292179601604, "learning_rate": 2.043861031997313e-06, "loss": 17.7523, "step": 38860 }, { "epoch": 0.7103478531083773, "grad_norm": 5.173785796811131, "learning_rate": 2.043622302506096e-06, "loss": 16.9232, "step": 38861 }, { "epoch": 0.7103661323048239, "grad_norm": 6.146708699685197, "learning_rate": 2.0433835833767256e-06, "loss": 17.4566, "step": 38862 }, { "epoch": 0.7103844115012704, "grad_norm": 6.065972270498952, "learning_rate": 2.043144874610038e-06, "loss": 17.5066, "step": 38863 }, { "epoch": 0.7104026906977169, "grad_norm": 6.208527460502668, "learning_rate": 2.042906176206873e-06, "loss": 17.5258, "step": 38864 }, { "epoch": 0.7104209698941635, "grad_norm": 7.666225211777849, "learning_rate": 2.042667488168065e-06, "loss": 17.8963, "step": 38865 }, { "epoch": 0.71043924909061, "grad_norm": 7.068196221589982, "learning_rate": 2.0424288104944494e-06, "loss": 17.2935, "step": 38866 }, { "epoch": 0.7104575282870566, "grad_norm": 6.494604670084879, "learning_rate": 2.0421901431868657e-06, "loss": 17.5162, "step": 38867 }, { "epoch": 0.710475807483503, "grad_norm": 8.144894474812034, "learning_rate": 2.041951486246147e-06, "loss": 17.7644, "step": 38868 }, { "epoch": 0.7104940866799495, "grad_norm": 9.54264686391786, "learning_rate": 2.0417128396731315e-06, "loss": 18.8055, "step": 38869 }, { "epoch": 0.7105123658763961, "grad_norm": 5.91564684798808, "learning_rate": 2.041474203468658e-06, "loss": 17.1287, "step": 38870 }, { "epoch": 0.7105306450728426, "grad_norm": 6.626201292764309, "learning_rate": 2.0412355776335606e-06, "loss": 17.2438, "step": 38871 }, { "epoch": 0.7105489242692892, "grad_norm": 7.300662350856322, "learning_rate": 2.0409969621686737e-06, "loss": 17.9208, "step": 38872 }, { "epoch": 0.7105672034657357, "grad_norm": 6.653169789442545, "learning_rate": 2.040758357074838e-06, "loss": 17.741, "step": 38873 }, { "epoch": 0.7105854826621821, "grad_norm": 5.743896843589451, "learning_rate": 2.040519762352885e-06, "loss": 17.2781, "step": 38874 }, { "epoch": 0.7106037618586287, "grad_norm": 6.269856517561297, "learning_rate": 2.0402811780036553e-06, "loss": 17.1247, "step": 38875 }, { "epoch": 0.7106220410550752, "grad_norm": 6.817898188398217, "learning_rate": 2.0400426040279838e-06, "loss": 17.6687, "step": 38876 }, { "epoch": 0.7106403202515218, "grad_norm": 7.032938456527227, "learning_rate": 2.039804040426704e-06, "loss": 17.434, "step": 38877 }, { "epoch": 0.7106585994479683, "grad_norm": 7.1469454676378685, "learning_rate": 2.039565487200654e-06, "loss": 17.822, "step": 38878 }, { "epoch": 0.7106768786444148, "grad_norm": 5.642095840024857, "learning_rate": 2.039326944350672e-06, "loss": 17.1585, "step": 38879 }, { "epoch": 0.7106951578408613, "grad_norm": 7.896704834963144, "learning_rate": 2.0390884118775906e-06, "loss": 17.9898, "step": 38880 }, { "epoch": 0.7107134370373078, "grad_norm": 6.439616156926713, "learning_rate": 2.0388498897822488e-06, "loss": 17.6763, "step": 38881 }, { "epoch": 0.7107317162337544, "grad_norm": 6.654863587931901, "learning_rate": 2.038611378065481e-06, "loss": 17.6795, "step": 38882 }, { "epoch": 0.7107499954302009, "grad_norm": 7.0674676483203065, "learning_rate": 2.038372876728122e-06, "loss": 17.4843, "step": 38883 }, { "epoch": 0.7107682746266474, "grad_norm": 5.938760909644982, "learning_rate": 2.0381343857710106e-06, "loss": 17.5417, "step": 38884 }, { "epoch": 0.710786553823094, "grad_norm": 6.213622163292429, "learning_rate": 2.0378959051949797e-06, "loss": 17.6722, "step": 38885 }, { "epoch": 0.7108048330195404, "grad_norm": 5.6900718331698465, "learning_rate": 2.037657435000868e-06, "loss": 17.1723, "step": 38886 }, { "epoch": 0.7108231122159869, "grad_norm": 6.1523188538446, "learning_rate": 2.0374189751895085e-06, "loss": 17.389, "step": 38887 }, { "epoch": 0.7108413914124335, "grad_norm": 6.99125092626992, "learning_rate": 2.03718052576174e-06, "loss": 17.8061, "step": 38888 }, { "epoch": 0.71085967060888, "grad_norm": 6.611130245084446, "learning_rate": 2.036942086718395e-06, "loss": 17.2609, "step": 38889 }, { "epoch": 0.7108779498053266, "grad_norm": 5.378742145905414, "learning_rate": 2.036703658060313e-06, "loss": 17.0665, "step": 38890 }, { "epoch": 0.7108962290017731, "grad_norm": 7.366969043179523, "learning_rate": 2.0364652397883248e-06, "loss": 17.7722, "step": 38891 }, { "epoch": 0.7109145081982196, "grad_norm": 4.981580065022658, "learning_rate": 2.0362268319032714e-06, "loss": 16.9229, "step": 38892 }, { "epoch": 0.7109327873946661, "grad_norm": 6.255961239283733, "learning_rate": 2.035988434405985e-06, "loss": 17.4492, "step": 38893 }, { "epoch": 0.7109510665911126, "grad_norm": 6.143026637247391, "learning_rate": 2.0357500472973007e-06, "loss": 17.3542, "step": 38894 }, { "epoch": 0.7109693457875592, "grad_norm": 6.415045013179552, "learning_rate": 2.035511670578055e-06, "loss": 17.4249, "step": 38895 }, { "epoch": 0.7109876249840057, "grad_norm": 6.5270601949252836, "learning_rate": 2.0352733042490857e-06, "loss": 17.6685, "step": 38896 }, { "epoch": 0.7110059041804522, "grad_norm": 5.769980498977673, "learning_rate": 2.035034948311224e-06, "loss": 17.3693, "step": 38897 }, { "epoch": 0.7110241833768988, "grad_norm": 6.22314858552763, "learning_rate": 2.03479660276531e-06, "loss": 17.5142, "step": 38898 }, { "epoch": 0.7110424625733452, "grad_norm": 6.968150219578567, "learning_rate": 2.034558267612176e-06, "loss": 17.9102, "step": 38899 }, { "epoch": 0.7110607417697918, "grad_norm": 6.042564495744105, "learning_rate": 2.0343199428526567e-06, "loss": 17.4106, "step": 38900 }, { "epoch": 0.7110790209662383, "grad_norm": 6.1133843951392, "learning_rate": 2.03408162848759e-06, "loss": 17.3025, "step": 38901 }, { "epoch": 0.7110973001626848, "grad_norm": 5.750128137543269, "learning_rate": 2.03384332451781e-06, "loss": 17.2391, "step": 38902 }, { "epoch": 0.7111155793591314, "grad_norm": 5.521509557283472, "learning_rate": 2.0336050309441496e-06, "loss": 17.1054, "step": 38903 }, { "epoch": 0.7111338585555779, "grad_norm": 7.178616485088329, "learning_rate": 2.0333667477674462e-06, "loss": 17.6247, "step": 38904 }, { "epoch": 0.7111521377520245, "grad_norm": 6.372971575891425, "learning_rate": 2.0331284749885355e-06, "loss": 17.5318, "step": 38905 }, { "epoch": 0.7111704169484709, "grad_norm": 6.217737667607758, "learning_rate": 2.032890212608253e-06, "loss": 17.931, "step": 38906 }, { "epoch": 0.7111886961449174, "grad_norm": 7.475468873581356, "learning_rate": 2.0326519606274336e-06, "loss": 18.0199, "step": 38907 }, { "epoch": 0.711206975341364, "grad_norm": 9.532031354270066, "learning_rate": 2.0324137190469097e-06, "loss": 16.9656, "step": 38908 }, { "epoch": 0.7112252545378105, "grad_norm": 5.565019514669731, "learning_rate": 2.03217548786752e-06, "loss": 17.1156, "step": 38909 }, { "epoch": 0.7112435337342571, "grad_norm": 6.671114184409949, "learning_rate": 2.0319372670900974e-06, "loss": 17.8283, "step": 38910 }, { "epoch": 0.7112618129307036, "grad_norm": 5.205460866760729, "learning_rate": 2.0316990567154747e-06, "loss": 17.0407, "step": 38911 }, { "epoch": 0.71128009212715, "grad_norm": 7.214010018854676, "learning_rate": 2.0314608567444922e-06, "loss": 17.9193, "step": 38912 }, { "epoch": 0.7112983713235966, "grad_norm": 7.236815179202532, "learning_rate": 2.0312226671779793e-06, "loss": 17.413, "step": 38913 }, { "epoch": 0.7113166505200431, "grad_norm": 4.852486125839581, "learning_rate": 2.030984488016774e-06, "loss": 16.8493, "step": 38914 }, { "epoch": 0.7113349297164897, "grad_norm": 6.280742184236179, "learning_rate": 2.0307463192617115e-06, "loss": 17.5108, "step": 38915 }, { "epoch": 0.7113532089129362, "grad_norm": 6.2197519789293745, "learning_rate": 2.0305081609136257e-06, "loss": 17.2339, "step": 38916 }, { "epoch": 0.7113714881093827, "grad_norm": 7.034757121941534, "learning_rate": 2.030270012973349e-06, "loss": 17.8101, "step": 38917 }, { "epoch": 0.7113897673058293, "grad_norm": 7.0751196306581505, "learning_rate": 2.030031875441721e-06, "loss": 17.9618, "step": 38918 }, { "epoch": 0.7114080465022757, "grad_norm": 9.858942143167335, "learning_rate": 2.0297937483195714e-06, "loss": 17.7781, "step": 38919 }, { "epoch": 0.7114263256987223, "grad_norm": 6.52005482673969, "learning_rate": 2.029555631607738e-06, "loss": 17.7595, "step": 38920 }, { "epoch": 0.7114446048951688, "grad_norm": 6.066849915385935, "learning_rate": 2.029317525307055e-06, "loss": 17.3621, "step": 38921 }, { "epoch": 0.7114628840916153, "grad_norm": 5.4684236196798715, "learning_rate": 2.0290794294183542e-06, "loss": 17.2774, "step": 38922 }, { "epoch": 0.7114811632880619, "grad_norm": 6.472866001770841, "learning_rate": 2.0288413439424725e-06, "loss": 17.5934, "step": 38923 }, { "epoch": 0.7114994424845084, "grad_norm": 5.537346330697844, "learning_rate": 2.028603268880246e-06, "loss": 17.1288, "step": 38924 }, { "epoch": 0.711517721680955, "grad_norm": 7.860622917560074, "learning_rate": 2.0283652042325045e-06, "loss": 17.851, "step": 38925 }, { "epoch": 0.7115360008774014, "grad_norm": 5.127292708583344, "learning_rate": 2.028127150000088e-06, "loss": 16.9471, "step": 38926 }, { "epoch": 0.7115542800738479, "grad_norm": 6.324143431545568, "learning_rate": 2.0278891061838275e-06, "loss": 17.3946, "step": 38927 }, { "epoch": 0.7115725592702945, "grad_norm": 5.275520972350994, "learning_rate": 2.0276510727845554e-06, "loss": 17.1558, "step": 38928 }, { "epoch": 0.711590838466741, "grad_norm": 5.858936958340164, "learning_rate": 2.027413049803111e-06, "loss": 17.3419, "step": 38929 }, { "epoch": 0.7116091176631876, "grad_norm": 4.31650407741106, "learning_rate": 2.0271750372403255e-06, "loss": 16.8195, "step": 38930 }, { "epoch": 0.711627396859634, "grad_norm": 7.0273110666478535, "learning_rate": 2.026937035097032e-06, "loss": 18.1486, "step": 38931 }, { "epoch": 0.7116456760560805, "grad_norm": 7.136661710201585, "learning_rate": 2.026699043374066e-06, "loss": 17.8423, "step": 38932 }, { "epoch": 0.7116639552525271, "grad_norm": 6.5034451469868335, "learning_rate": 2.0264610620722638e-06, "loss": 17.4758, "step": 38933 }, { "epoch": 0.7116822344489736, "grad_norm": 5.512761572838803, "learning_rate": 2.0262230911924554e-06, "loss": 17.2844, "step": 38934 }, { "epoch": 0.7117005136454202, "grad_norm": 5.426478682386215, "learning_rate": 2.025985130735479e-06, "loss": 16.9387, "step": 38935 }, { "epoch": 0.7117187928418667, "grad_norm": 5.826160704590368, "learning_rate": 2.025747180702165e-06, "loss": 17.2233, "step": 38936 }, { "epoch": 0.7117370720383132, "grad_norm": 6.430428164261022, "learning_rate": 2.025509241093351e-06, "loss": 17.4562, "step": 38937 }, { "epoch": 0.7117553512347597, "grad_norm": 6.088289409603096, "learning_rate": 2.025271311909868e-06, "loss": 17.5033, "step": 38938 }, { "epoch": 0.7117736304312062, "grad_norm": 6.45232116827826, "learning_rate": 2.02503339315255e-06, "loss": 17.4477, "step": 38939 }, { "epoch": 0.7117919096276528, "grad_norm": 6.120893379457521, "learning_rate": 2.0247954848222333e-06, "loss": 17.4656, "step": 38940 }, { "epoch": 0.7118101888240993, "grad_norm": 6.429535084797549, "learning_rate": 2.0245575869197488e-06, "loss": 17.6383, "step": 38941 }, { "epoch": 0.7118284680205458, "grad_norm": 6.921347840650128, "learning_rate": 2.0243196994459317e-06, "loss": 17.8563, "step": 38942 }, { "epoch": 0.7118467472169924, "grad_norm": 7.106816184441033, "learning_rate": 2.0240818224016174e-06, "loss": 17.9703, "step": 38943 }, { "epoch": 0.7118650264134389, "grad_norm": 7.331654841724721, "learning_rate": 2.0238439557876385e-06, "loss": 17.7863, "step": 38944 }, { "epoch": 0.7118833056098854, "grad_norm": 6.3621725554582955, "learning_rate": 2.0236060996048258e-06, "loss": 17.4808, "step": 38945 }, { "epoch": 0.7119015848063319, "grad_norm": 7.514085885510101, "learning_rate": 2.023368253854018e-06, "loss": 18.2213, "step": 38946 }, { "epoch": 0.7119198640027784, "grad_norm": 6.428257515603975, "learning_rate": 2.023130418536045e-06, "loss": 17.6652, "step": 38947 }, { "epoch": 0.711938143199225, "grad_norm": 6.211319920662758, "learning_rate": 2.022892593651741e-06, "loss": 17.1419, "step": 38948 }, { "epoch": 0.7119564223956715, "grad_norm": 6.303140667227269, "learning_rate": 2.0226547792019395e-06, "loss": 17.6273, "step": 38949 }, { "epoch": 0.7119747015921181, "grad_norm": 7.552685561257524, "learning_rate": 2.022416975187476e-06, "loss": 17.8848, "step": 38950 }, { "epoch": 0.7119929807885645, "grad_norm": 6.043587859252441, "learning_rate": 2.022179181609182e-06, "loss": 17.499, "step": 38951 }, { "epoch": 0.712011259985011, "grad_norm": 7.335038969197061, "learning_rate": 2.0219413984678926e-06, "loss": 18.1367, "step": 38952 }, { "epoch": 0.7120295391814576, "grad_norm": 6.4097199921521275, "learning_rate": 2.0217036257644383e-06, "loss": 17.6665, "step": 38953 }, { "epoch": 0.7120478183779041, "grad_norm": 8.66830040200588, "learning_rate": 2.0214658634996562e-06, "loss": 17.6919, "step": 38954 }, { "epoch": 0.7120660975743506, "grad_norm": 6.55378562971151, "learning_rate": 2.0212281116743782e-06, "loss": 17.3658, "step": 38955 }, { "epoch": 0.7120843767707972, "grad_norm": 5.193010844163926, "learning_rate": 2.020990370289435e-06, "loss": 16.9083, "step": 38956 }, { "epoch": 0.7121026559672436, "grad_norm": 6.279200467339047, "learning_rate": 2.020752639345664e-06, "loss": 17.4758, "step": 38957 }, { "epoch": 0.7121209351636902, "grad_norm": 5.80097424285612, "learning_rate": 2.0205149188438948e-06, "loss": 17.0587, "step": 38958 }, { "epoch": 0.7121392143601367, "grad_norm": 7.138170707206017, "learning_rate": 2.0202772087849622e-06, "loss": 17.667, "step": 38959 }, { "epoch": 0.7121574935565832, "grad_norm": 5.801906253621968, "learning_rate": 2.020039509169701e-06, "loss": 17.3907, "step": 38960 }, { "epoch": 0.7121757727530298, "grad_norm": 8.519414922075216, "learning_rate": 2.019801819998943e-06, "loss": 18.3894, "step": 38961 }, { "epoch": 0.7121940519494763, "grad_norm": 6.5303567264269855, "learning_rate": 2.019564141273519e-06, "loss": 17.2543, "step": 38962 }, { "epoch": 0.7122123311459229, "grad_norm": 5.765705352588088, "learning_rate": 2.0193264729942657e-06, "loss": 17.2935, "step": 38963 }, { "epoch": 0.7122306103423693, "grad_norm": 5.688635935680294, "learning_rate": 2.0190888151620146e-06, "loss": 17.2461, "step": 38964 }, { "epoch": 0.7122488895388158, "grad_norm": 5.347697504306899, "learning_rate": 2.018851167777597e-06, "loss": 17.0226, "step": 38965 }, { "epoch": 0.7122671687352624, "grad_norm": 6.022327092406433, "learning_rate": 2.0186135308418493e-06, "loss": 17.151, "step": 38966 }, { "epoch": 0.7122854479317089, "grad_norm": 5.856555801837647, "learning_rate": 2.0183759043556e-06, "loss": 17.3503, "step": 38967 }, { "epoch": 0.7123037271281555, "grad_norm": 6.446695044567393, "learning_rate": 2.018138288319685e-06, "loss": 17.3713, "step": 38968 }, { "epoch": 0.712322006324602, "grad_norm": 5.456696202278527, "learning_rate": 2.017900682734938e-06, "loss": 17.0553, "step": 38969 }, { "epoch": 0.7123402855210484, "grad_norm": 6.34668200845189, "learning_rate": 2.0176630876021885e-06, "loss": 17.5378, "step": 38970 }, { "epoch": 0.712358564717495, "grad_norm": 5.31080421651281, "learning_rate": 2.0174255029222727e-06, "loss": 17.0054, "step": 38971 }, { "epoch": 0.7123768439139415, "grad_norm": 5.306926462242524, "learning_rate": 2.0171879286960224e-06, "loss": 17.1827, "step": 38972 }, { "epoch": 0.7123951231103881, "grad_norm": 6.55458013730229, "learning_rate": 2.0169503649242674e-06, "loss": 17.452, "step": 38973 }, { "epoch": 0.7124134023068346, "grad_norm": 6.365032363226787, "learning_rate": 2.0167128116078445e-06, "loss": 17.1668, "step": 38974 }, { "epoch": 0.7124316815032811, "grad_norm": 6.653673738550858, "learning_rate": 2.0164752687475834e-06, "loss": 17.4807, "step": 38975 }, { "epoch": 0.7124499606997277, "grad_norm": 5.859206956263648, "learning_rate": 2.016237736344317e-06, "loss": 17.0432, "step": 38976 }, { "epoch": 0.7124682398961741, "grad_norm": 8.21064050885369, "learning_rate": 2.0160002143988775e-06, "loss": 18.3687, "step": 38977 }, { "epoch": 0.7124865190926207, "grad_norm": 5.182110105819805, "learning_rate": 2.0157627029121006e-06, "loss": 17.0536, "step": 38978 }, { "epoch": 0.7125047982890672, "grad_norm": 5.9424498093278855, "learning_rate": 2.0155252018848147e-06, "loss": 17.2231, "step": 38979 }, { "epoch": 0.7125230774855137, "grad_norm": 7.987377868259049, "learning_rate": 2.015287711317856e-06, "loss": 18.0333, "step": 38980 }, { "epoch": 0.7125413566819603, "grad_norm": 6.525330000030544, "learning_rate": 2.0150502312120526e-06, "loss": 17.7352, "step": 38981 }, { "epoch": 0.7125596358784068, "grad_norm": 5.769769600378053, "learning_rate": 2.014812761568241e-06, "loss": 17.1688, "step": 38982 }, { "epoch": 0.7125779150748534, "grad_norm": 6.740034260652508, "learning_rate": 2.0145753023872517e-06, "loss": 17.3769, "step": 38983 }, { "epoch": 0.7125961942712998, "grad_norm": 5.178915437411206, "learning_rate": 2.014337853669915e-06, "loss": 16.9011, "step": 38984 }, { "epoch": 0.7126144734677463, "grad_norm": 7.0074615951770864, "learning_rate": 2.014100415417067e-06, "loss": 17.8566, "step": 38985 }, { "epoch": 0.7126327526641929, "grad_norm": 6.091473261060584, "learning_rate": 2.013862987629536e-06, "loss": 17.5804, "step": 38986 }, { "epoch": 0.7126510318606394, "grad_norm": 4.567020508602632, "learning_rate": 2.0136255703081564e-06, "loss": 16.8936, "step": 38987 }, { "epoch": 0.712669311057086, "grad_norm": 6.672092961306719, "learning_rate": 2.0133881634537615e-06, "loss": 17.1579, "step": 38988 }, { "epoch": 0.7126875902535325, "grad_norm": 5.685224448408413, "learning_rate": 2.013150767067182e-06, "loss": 17.2327, "step": 38989 }, { "epoch": 0.7127058694499789, "grad_norm": 7.1485018303638155, "learning_rate": 2.012913381149248e-06, "loss": 18.1499, "step": 38990 }, { "epoch": 0.7127241486464255, "grad_norm": 7.506117471915245, "learning_rate": 2.0126760057007956e-06, "loss": 17.7233, "step": 38991 }, { "epoch": 0.712742427842872, "grad_norm": 6.098701733376918, "learning_rate": 2.012438640722654e-06, "loss": 17.2162, "step": 38992 }, { "epoch": 0.7127607070393186, "grad_norm": 7.4935283694909485, "learning_rate": 2.0122012862156544e-06, "loss": 18.0573, "step": 38993 }, { "epoch": 0.7127789862357651, "grad_norm": 7.0124567743056, "learning_rate": 2.0119639421806313e-06, "loss": 17.4805, "step": 38994 }, { "epoch": 0.7127972654322116, "grad_norm": 6.556534256633947, "learning_rate": 2.0117266086184138e-06, "loss": 17.5694, "step": 38995 }, { "epoch": 0.7128155446286581, "grad_norm": 6.967579485362007, "learning_rate": 2.0114892855298353e-06, "loss": 17.4273, "step": 38996 }, { "epoch": 0.7128338238251046, "grad_norm": 6.179704605692416, "learning_rate": 2.0112519729157294e-06, "loss": 17.3692, "step": 38997 }, { "epoch": 0.7128521030215512, "grad_norm": 8.75510562020673, "learning_rate": 2.011014670776924e-06, "loss": 18.4389, "step": 38998 }, { "epoch": 0.7128703822179977, "grad_norm": 6.931347786609402, "learning_rate": 2.0107773791142542e-06, "loss": 17.5644, "step": 38999 }, { "epoch": 0.7128886614144442, "grad_norm": 7.570170819796158, "learning_rate": 2.010540097928551e-06, "loss": 17.4711, "step": 39000 }, { "epoch": 0.7129069406108908, "grad_norm": 5.554126897433506, "learning_rate": 2.010302827220643e-06, "loss": 16.8798, "step": 39001 }, { "epoch": 0.7129252198073373, "grad_norm": 6.859334042896658, "learning_rate": 2.0100655669913664e-06, "loss": 17.5769, "step": 39002 }, { "epoch": 0.7129434990037838, "grad_norm": 5.5169166572990695, "learning_rate": 2.0098283172415504e-06, "loss": 17.1616, "step": 39003 }, { "epoch": 0.7129617782002303, "grad_norm": 6.3548885597839595, "learning_rate": 2.0095910779720253e-06, "loss": 17.5262, "step": 39004 }, { "epoch": 0.7129800573966768, "grad_norm": 6.611974764692269, "learning_rate": 2.0093538491836236e-06, "loss": 17.4967, "step": 39005 }, { "epoch": 0.7129983365931234, "grad_norm": 6.90031010197339, "learning_rate": 2.009116630877178e-06, "loss": 17.319, "step": 39006 }, { "epoch": 0.7130166157895699, "grad_norm": 6.619070510000486, "learning_rate": 2.0088794230535185e-06, "loss": 17.3717, "step": 39007 }, { "epoch": 0.7130348949860165, "grad_norm": 7.790847376135182, "learning_rate": 2.0086422257134786e-06, "loss": 18.029, "step": 39008 }, { "epoch": 0.713053174182463, "grad_norm": 5.622224320588349, "learning_rate": 2.008405038857888e-06, "loss": 16.9657, "step": 39009 }, { "epoch": 0.7130714533789094, "grad_norm": 6.916724230829745, "learning_rate": 2.008167862487576e-06, "loss": 17.4918, "step": 39010 }, { "epoch": 0.713089732575356, "grad_norm": 5.4005085754958, "learning_rate": 2.0079306966033787e-06, "loss": 17.0829, "step": 39011 }, { "epoch": 0.7131080117718025, "grad_norm": 5.890702793911702, "learning_rate": 2.007693541206122e-06, "loss": 17.0079, "step": 39012 }, { "epoch": 0.7131262909682491, "grad_norm": 6.010749036031018, "learning_rate": 2.007456396296642e-06, "loss": 17.3244, "step": 39013 }, { "epoch": 0.7131445701646956, "grad_norm": 4.140279485424972, "learning_rate": 2.007219261875766e-06, "loss": 16.4789, "step": 39014 }, { "epoch": 0.713162849361142, "grad_norm": 5.755036886587699, "learning_rate": 2.0069821379443264e-06, "loss": 17.1411, "step": 39015 }, { "epoch": 0.7131811285575886, "grad_norm": 6.620030640883555, "learning_rate": 2.0067450245031567e-06, "loss": 17.1549, "step": 39016 }, { "epoch": 0.7131994077540351, "grad_norm": 7.312124437665048, "learning_rate": 2.006507921553086e-06, "loss": 17.9928, "step": 39017 }, { "epoch": 0.7132176869504817, "grad_norm": 4.986590085463719, "learning_rate": 2.0062708290949428e-06, "loss": 16.98, "step": 39018 }, { "epoch": 0.7132359661469282, "grad_norm": 5.800378139833663, "learning_rate": 2.006033747129563e-06, "loss": 17.2772, "step": 39019 }, { "epoch": 0.7132542453433747, "grad_norm": 5.547834334789864, "learning_rate": 2.005796675657775e-06, "loss": 17.1307, "step": 39020 }, { "epoch": 0.7132725245398213, "grad_norm": 4.840388529106176, "learning_rate": 2.0055596146804086e-06, "loss": 16.8853, "step": 39021 }, { "epoch": 0.7132908037362677, "grad_norm": 6.096258230463942, "learning_rate": 2.005322564198295e-06, "loss": 17.1088, "step": 39022 }, { "epoch": 0.7133090829327142, "grad_norm": 6.852292431302474, "learning_rate": 2.005085524212268e-06, "loss": 17.4585, "step": 39023 }, { "epoch": 0.7133273621291608, "grad_norm": 7.82755888319831, "learning_rate": 2.004848494723155e-06, "loss": 17.7841, "step": 39024 }, { "epoch": 0.7133456413256073, "grad_norm": 7.026279866880228, "learning_rate": 2.004611475731789e-06, "loss": 17.4979, "step": 39025 }, { "epoch": 0.7133639205220539, "grad_norm": 6.966586464505561, "learning_rate": 2.004374467239001e-06, "loss": 17.7613, "step": 39026 }, { "epoch": 0.7133821997185004, "grad_norm": 6.339072393894201, "learning_rate": 2.004137469245618e-06, "loss": 17.3465, "step": 39027 }, { "epoch": 0.7134004789149468, "grad_norm": 7.0010373660693475, "learning_rate": 2.0039004817524756e-06, "loss": 17.6697, "step": 39028 }, { "epoch": 0.7134187581113934, "grad_norm": 4.148509966206919, "learning_rate": 2.003663504760399e-06, "loss": 16.6002, "step": 39029 }, { "epoch": 0.7134370373078399, "grad_norm": 7.111122630809953, "learning_rate": 2.0034265382702243e-06, "loss": 17.1378, "step": 39030 }, { "epoch": 0.7134553165042865, "grad_norm": 7.29986478221131, "learning_rate": 2.0031895822827784e-06, "loss": 17.7285, "step": 39031 }, { "epoch": 0.713473595700733, "grad_norm": 7.168741085296983, "learning_rate": 2.002952636798892e-06, "loss": 17.5995, "step": 39032 }, { "epoch": 0.7134918748971795, "grad_norm": 5.343396250798673, "learning_rate": 2.0027157018193993e-06, "loss": 16.9627, "step": 39033 }, { "epoch": 0.7135101540936261, "grad_norm": 5.8071075052086165, "learning_rate": 2.0024787773451273e-06, "loss": 17.1623, "step": 39034 }, { "epoch": 0.7135284332900725, "grad_norm": 6.423857260337598, "learning_rate": 2.002241863376905e-06, "loss": 17.4232, "step": 39035 }, { "epoch": 0.7135467124865191, "grad_norm": 5.787056948562308, "learning_rate": 2.002004959915567e-06, "loss": 17.207, "step": 39036 }, { "epoch": 0.7135649916829656, "grad_norm": 10.348319650251181, "learning_rate": 2.0017680669619415e-06, "loss": 17.7287, "step": 39037 }, { "epoch": 0.7135832708794121, "grad_norm": 5.828131529032543, "learning_rate": 2.0015311845168572e-06, "loss": 17.2267, "step": 39038 }, { "epoch": 0.7136015500758587, "grad_norm": 6.227531402201902, "learning_rate": 2.0012943125811475e-06, "loss": 17.5189, "step": 39039 }, { "epoch": 0.7136198292723052, "grad_norm": 6.320741199659578, "learning_rate": 2.001057451155639e-06, "loss": 17.3361, "step": 39040 }, { "epoch": 0.7136381084687518, "grad_norm": 6.563834677740035, "learning_rate": 2.000820600241164e-06, "loss": 17.5339, "step": 39041 }, { "epoch": 0.7136563876651982, "grad_norm": 5.822455008158125, "learning_rate": 2.0005837598385546e-06, "loss": 17.2493, "step": 39042 }, { "epoch": 0.7136746668616447, "grad_norm": 6.069774168988628, "learning_rate": 2.0003469299486367e-06, "loss": 17.5256, "step": 39043 }, { "epoch": 0.7136929460580913, "grad_norm": 5.9609965338416195, "learning_rate": 2.0001101105722446e-06, "loss": 17.3745, "step": 39044 }, { "epoch": 0.7137112252545378, "grad_norm": 5.544346747557441, "learning_rate": 1.9998733017102064e-06, "loss": 17.1375, "step": 39045 }, { "epoch": 0.7137295044509844, "grad_norm": 7.692049349761179, "learning_rate": 1.99963650336335e-06, "loss": 18.3055, "step": 39046 }, { "epoch": 0.7137477836474309, "grad_norm": 7.4034554260549115, "learning_rate": 1.9993997155325086e-06, "loss": 17.7961, "step": 39047 }, { "epoch": 0.7137660628438773, "grad_norm": 6.204253897952981, "learning_rate": 1.999162938218511e-06, "loss": 17.5519, "step": 39048 }, { "epoch": 0.7137843420403239, "grad_norm": 6.591258431601573, "learning_rate": 1.998926171422185e-06, "loss": 17.5293, "step": 39049 }, { "epoch": 0.7138026212367704, "grad_norm": 7.544121191502649, "learning_rate": 1.998689415144363e-06, "loss": 18.3284, "step": 39050 }, { "epoch": 0.713820900433217, "grad_norm": 6.750160339752274, "learning_rate": 1.9984526693858753e-06, "loss": 17.402, "step": 39051 }, { "epoch": 0.7138391796296635, "grad_norm": 7.593617142447135, "learning_rate": 1.9982159341475495e-06, "loss": 18.0514, "step": 39052 }, { "epoch": 0.71385745882611, "grad_norm": 7.917940300035848, "learning_rate": 1.9979792094302174e-06, "loss": 17.6993, "step": 39053 }, { "epoch": 0.7138757380225565, "grad_norm": 7.285953096755428, "learning_rate": 1.997742495234708e-06, "loss": 17.7866, "step": 39054 }, { "epoch": 0.713894017219003, "grad_norm": 7.078270769909858, "learning_rate": 1.9975057915618483e-06, "loss": 17.6679, "step": 39055 }, { "epoch": 0.7139122964154496, "grad_norm": 7.471378954310095, "learning_rate": 1.9972690984124727e-06, "loss": 17.9391, "step": 39056 }, { "epoch": 0.7139305756118961, "grad_norm": 9.453939364439579, "learning_rate": 1.9970324157874082e-06, "loss": 18.7594, "step": 39057 }, { "epoch": 0.7139488548083426, "grad_norm": 6.893990846752516, "learning_rate": 1.996795743687483e-06, "loss": 17.7179, "step": 39058 }, { "epoch": 0.7139671340047892, "grad_norm": 9.030595816698819, "learning_rate": 1.996559082113528e-06, "loss": 18.1004, "step": 39059 }, { "epoch": 0.7139854132012357, "grad_norm": 5.618775162143982, "learning_rate": 1.9963224310663722e-06, "loss": 17.2603, "step": 39060 }, { "epoch": 0.7140036923976822, "grad_norm": 6.046570457154293, "learning_rate": 1.996085790546848e-06, "loss": 17.4922, "step": 39061 }, { "epoch": 0.7140219715941287, "grad_norm": 6.788594396756464, "learning_rate": 1.9958491605557816e-06, "loss": 17.2987, "step": 39062 }, { "epoch": 0.7140402507905752, "grad_norm": 5.649721508712231, "learning_rate": 1.9956125410940025e-06, "loss": 17.1751, "step": 39063 }, { "epoch": 0.7140585299870218, "grad_norm": 8.074224198007565, "learning_rate": 1.995375932162342e-06, "loss": 18.2567, "step": 39064 }, { "epoch": 0.7140768091834683, "grad_norm": 7.28962687827173, "learning_rate": 1.9951393337616283e-06, "loss": 17.7336, "step": 39065 }, { "epoch": 0.7140950883799149, "grad_norm": 4.789935689100821, "learning_rate": 1.9949027458926883e-06, "loss": 16.778, "step": 39066 }, { "epoch": 0.7141133675763613, "grad_norm": 5.71052234213877, "learning_rate": 1.9946661685563556e-06, "loss": 17.044, "step": 39067 }, { "epoch": 0.7141316467728078, "grad_norm": 5.994618081102296, "learning_rate": 1.9944296017534553e-06, "loss": 17.3431, "step": 39068 }, { "epoch": 0.7141499259692544, "grad_norm": 6.474636008899067, "learning_rate": 1.9941930454848183e-06, "loss": 17.4081, "step": 39069 }, { "epoch": 0.7141682051657009, "grad_norm": 5.60493092329033, "learning_rate": 1.9939564997512756e-06, "loss": 17.255, "step": 39070 }, { "epoch": 0.7141864843621475, "grad_norm": 5.97995706789635, "learning_rate": 1.9937199645536547e-06, "loss": 17.2828, "step": 39071 }, { "epoch": 0.714204763558594, "grad_norm": 9.058870868434763, "learning_rate": 1.9934834398927827e-06, "loss": 18.072, "step": 39072 }, { "epoch": 0.7142230427550404, "grad_norm": 6.190086354512154, "learning_rate": 1.993246925769492e-06, "loss": 17.5211, "step": 39073 }, { "epoch": 0.714241321951487, "grad_norm": 4.363975516861106, "learning_rate": 1.993010422184608e-06, "loss": 16.6023, "step": 39074 }, { "epoch": 0.7142596011479335, "grad_norm": 6.903451347007349, "learning_rate": 1.9927739291389638e-06, "loss": 17.7686, "step": 39075 }, { "epoch": 0.7142778803443801, "grad_norm": 5.029163308412507, "learning_rate": 1.9925374466333842e-06, "loss": 16.7927, "step": 39076 }, { "epoch": 0.7142961595408266, "grad_norm": 6.619205612179004, "learning_rate": 1.992300974668701e-06, "loss": 18.1813, "step": 39077 }, { "epoch": 0.7143144387372731, "grad_norm": 6.969051224231106, "learning_rate": 1.9920645132457406e-06, "loss": 18.0305, "step": 39078 }, { "epoch": 0.7143327179337197, "grad_norm": 6.9486137870155815, "learning_rate": 1.9918280623653347e-06, "loss": 17.4986, "step": 39079 }, { "epoch": 0.7143509971301661, "grad_norm": 6.49976329710797, "learning_rate": 1.9915916220283087e-06, "loss": 17.4093, "step": 39080 }, { "epoch": 0.7143692763266127, "grad_norm": 6.365817704564444, "learning_rate": 1.9913551922354953e-06, "loss": 17.1363, "step": 39081 }, { "epoch": 0.7143875555230592, "grad_norm": 6.361922900576181, "learning_rate": 1.9911187729877202e-06, "loss": 17.5234, "step": 39082 }, { "epoch": 0.7144058347195057, "grad_norm": 5.869713919628833, "learning_rate": 1.990882364285811e-06, "loss": 17.2953, "step": 39083 }, { "epoch": 0.7144241139159523, "grad_norm": 6.059403486319761, "learning_rate": 1.9906459661306e-06, "loss": 17.4843, "step": 39084 }, { "epoch": 0.7144423931123988, "grad_norm": 8.245923379102674, "learning_rate": 1.990409578522912e-06, "loss": 17.9771, "step": 39085 }, { "epoch": 0.7144606723088454, "grad_norm": 5.391180970867169, "learning_rate": 1.9901732014635766e-06, "loss": 16.9535, "step": 39086 }, { "epoch": 0.7144789515052918, "grad_norm": 6.4597408681300195, "learning_rate": 1.9899368349534256e-06, "loss": 17.4861, "step": 39087 }, { "epoch": 0.7144972307017383, "grad_norm": 5.993770299346392, "learning_rate": 1.989700478993282e-06, "loss": 17.2668, "step": 39088 }, { "epoch": 0.7145155098981849, "grad_norm": 5.913212451408233, "learning_rate": 1.9894641335839792e-06, "loss": 17.3204, "step": 39089 }, { "epoch": 0.7145337890946314, "grad_norm": 5.4791494442775255, "learning_rate": 1.9892277987263437e-06, "loss": 17.1748, "step": 39090 }, { "epoch": 0.714552068291078, "grad_norm": 7.067389490111107, "learning_rate": 1.988991474421202e-06, "loss": 17.6584, "step": 39091 }, { "epoch": 0.7145703474875245, "grad_norm": 6.9636213753306695, "learning_rate": 1.988755160669385e-06, "loss": 17.8301, "step": 39092 }, { "epoch": 0.7145886266839709, "grad_norm": 7.344411275685268, "learning_rate": 1.98851885747172e-06, "loss": 17.8593, "step": 39093 }, { "epoch": 0.7146069058804175, "grad_norm": 5.7091543510306275, "learning_rate": 1.988282564829034e-06, "loss": 17.2678, "step": 39094 }, { "epoch": 0.714625185076864, "grad_norm": 9.358758200200354, "learning_rate": 1.9880462827421553e-06, "loss": 17.1441, "step": 39095 }, { "epoch": 0.7146434642733105, "grad_norm": 5.749613974126071, "learning_rate": 1.987810011211915e-06, "loss": 17.1056, "step": 39096 }, { "epoch": 0.7146617434697571, "grad_norm": 5.45426455340889, "learning_rate": 1.987573750239138e-06, "loss": 16.9744, "step": 39097 }, { "epoch": 0.7146800226662036, "grad_norm": 5.966371527866238, "learning_rate": 1.9873374998246545e-06, "loss": 17.3115, "step": 39098 }, { "epoch": 0.7146983018626502, "grad_norm": 6.839304646897663, "learning_rate": 1.9871012599692924e-06, "loss": 17.8012, "step": 39099 }, { "epoch": 0.7147165810590966, "grad_norm": 5.480630645243811, "learning_rate": 1.986865030673877e-06, "loss": 17.5122, "step": 39100 }, { "epoch": 0.7147348602555431, "grad_norm": 5.452877277480403, "learning_rate": 1.9866288119392394e-06, "loss": 17.0192, "step": 39101 }, { "epoch": 0.7147531394519897, "grad_norm": 5.988336987710158, "learning_rate": 1.9863926037662075e-06, "loss": 17.2057, "step": 39102 }, { "epoch": 0.7147714186484362, "grad_norm": 8.741027741586487, "learning_rate": 1.9861564061556056e-06, "loss": 17.6037, "step": 39103 }, { "epoch": 0.7147896978448828, "grad_norm": 8.14385035760194, "learning_rate": 1.985920219108263e-06, "loss": 18.0443, "step": 39104 }, { "epoch": 0.7148079770413293, "grad_norm": 6.477318059289504, "learning_rate": 1.98568404262501e-06, "loss": 17.3221, "step": 39105 }, { "epoch": 0.7148262562377757, "grad_norm": 7.3382994915798205, "learning_rate": 1.9854478767066738e-06, "loss": 17.7961, "step": 39106 }, { "epoch": 0.7148445354342223, "grad_norm": 5.0794496524858515, "learning_rate": 1.9852117213540815e-06, "loss": 16.8996, "step": 39107 }, { "epoch": 0.7148628146306688, "grad_norm": 6.458067387309253, "learning_rate": 1.984975576568059e-06, "loss": 17.5857, "step": 39108 }, { "epoch": 0.7148810938271154, "grad_norm": 5.435086338482813, "learning_rate": 1.9847394423494366e-06, "loss": 17.2167, "step": 39109 }, { "epoch": 0.7148993730235619, "grad_norm": 8.963965003973009, "learning_rate": 1.984503318699041e-06, "loss": 18.9087, "step": 39110 }, { "epoch": 0.7149176522200084, "grad_norm": 7.2324604264469805, "learning_rate": 1.984267205617698e-06, "loss": 17.6387, "step": 39111 }, { "epoch": 0.714935931416455, "grad_norm": 7.7700192108282895, "learning_rate": 1.9840311031062385e-06, "loss": 17.7967, "step": 39112 }, { "epoch": 0.7149542106129014, "grad_norm": 6.556557445183953, "learning_rate": 1.9837950111654863e-06, "loss": 17.4045, "step": 39113 }, { "epoch": 0.714972489809348, "grad_norm": 6.204141897500874, "learning_rate": 1.983558929796271e-06, "loss": 17.4557, "step": 39114 }, { "epoch": 0.7149907690057945, "grad_norm": 7.571749181677104, "learning_rate": 1.983322858999422e-06, "loss": 17.4437, "step": 39115 }, { "epoch": 0.715009048202241, "grad_norm": 6.796209207629396, "learning_rate": 1.983086798775764e-06, "loss": 17.5624, "step": 39116 }, { "epoch": 0.7150273273986876, "grad_norm": 4.873683520075565, "learning_rate": 1.9828507491261233e-06, "loss": 16.8465, "step": 39117 }, { "epoch": 0.715045606595134, "grad_norm": 6.4142111317798625, "learning_rate": 1.982614710051331e-06, "loss": 17.6515, "step": 39118 }, { "epoch": 0.7150638857915806, "grad_norm": 6.760673166450286, "learning_rate": 1.9823786815522108e-06, "loss": 17.9557, "step": 39119 }, { "epoch": 0.7150821649880271, "grad_norm": 8.800549043837405, "learning_rate": 1.9821426636295927e-06, "loss": 18.065, "step": 39120 }, { "epoch": 0.7151004441844736, "grad_norm": 6.64743179292511, "learning_rate": 1.9819066562843024e-06, "loss": 17.502, "step": 39121 }, { "epoch": 0.7151187233809202, "grad_norm": 6.680301447046492, "learning_rate": 1.9816706595171665e-06, "loss": 17.4338, "step": 39122 }, { "epoch": 0.7151370025773667, "grad_norm": 5.767485036447061, "learning_rate": 1.9814346733290123e-06, "loss": 17.1781, "step": 39123 }, { "epoch": 0.7151552817738133, "grad_norm": 6.813652408095326, "learning_rate": 1.981198697720669e-06, "loss": 17.3789, "step": 39124 }, { "epoch": 0.7151735609702597, "grad_norm": 6.004361784140952, "learning_rate": 1.9809627326929616e-06, "loss": 17.2483, "step": 39125 }, { "epoch": 0.7151918401667062, "grad_norm": 5.597681938293297, "learning_rate": 1.9807267782467194e-06, "loss": 17.0681, "step": 39126 }, { "epoch": 0.7152101193631528, "grad_norm": 5.730552832209104, "learning_rate": 1.9804908343827678e-06, "loss": 17.3223, "step": 39127 }, { "epoch": 0.7152283985595993, "grad_norm": 6.251400833437002, "learning_rate": 1.9802549011019316e-06, "loss": 17.619, "step": 39128 }, { "epoch": 0.7152466777560459, "grad_norm": 5.958216551922698, "learning_rate": 1.9800189784050424e-06, "loss": 17.19, "step": 39129 }, { "epoch": 0.7152649569524924, "grad_norm": 6.3526467425823485, "learning_rate": 1.9797830662929233e-06, "loss": 17.4674, "step": 39130 }, { "epoch": 0.7152832361489389, "grad_norm": 4.854967600485758, "learning_rate": 1.979547164766402e-06, "loss": 16.8637, "step": 39131 }, { "epoch": 0.7153015153453854, "grad_norm": 6.410208239305555, "learning_rate": 1.9793112738263056e-06, "loss": 17.387, "step": 39132 }, { "epoch": 0.7153197945418319, "grad_norm": 6.411740895465604, "learning_rate": 1.9790753934734624e-06, "loss": 17.2651, "step": 39133 }, { "epoch": 0.7153380737382785, "grad_norm": 6.045263501412243, "learning_rate": 1.978839523708696e-06, "loss": 17.1511, "step": 39134 }, { "epoch": 0.715356352934725, "grad_norm": 5.939485981251198, "learning_rate": 1.9786036645328372e-06, "loss": 17.1423, "step": 39135 }, { "epoch": 0.7153746321311715, "grad_norm": 6.211048570530724, "learning_rate": 1.9783678159467078e-06, "loss": 17.4644, "step": 39136 }, { "epoch": 0.7153929113276181, "grad_norm": 5.426325701824839, "learning_rate": 1.9781319779511397e-06, "loss": 16.9913, "step": 39137 }, { "epoch": 0.7154111905240645, "grad_norm": 7.2167233039105785, "learning_rate": 1.9778961505469562e-06, "loss": 17.915, "step": 39138 }, { "epoch": 0.7154294697205111, "grad_norm": 5.369986864271973, "learning_rate": 1.977660333734983e-06, "loss": 16.9666, "step": 39139 }, { "epoch": 0.7154477489169576, "grad_norm": 6.14625595803345, "learning_rate": 1.9774245275160498e-06, "loss": 17.4828, "step": 39140 }, { "epoch": 0.7154660281134041, "grad_norm": 7.913835593193473, "learning_rate": 1.9771887318909793e-06, "loss": 18.1563, "step": 39141 }, { "epoch": 0.7154843073098507, "grad_norm": 6.334789350399321, "learning_rate": 1.9769529468606004e-06, "loss": 17.6419, "step": 39142 }, { "epoch": 0.7155025865062972, "grad_norm": 6.794769359790724, "learning_rate": 1.97671717242574e-06, "loss": 17.4868, "step": 39143 }, { "epoch": 0.7155208657027438, "grad_norm": 6.434079330018278, "learning_rate": 1.976481408587224e-06, "loss": 17.2085, "step": 39144 }, { "epoch": 0.7155391448991902, "grad_norm": 5.120894221935594, "learning_rate": 1.976245655345877e-06, "loss": 16.8397, "step": 39145 }, { "epoch": 0.7155574240956367, "grad_norm": 6.8450694234071925, "learning_rate": 1.9760099127025285e-06, "loss": 17.6141, "step": 39146 }, { "epoch": 0.7155757032920833, "grad_norm": 6.088426272303722, "learning_rate": 1.975774180658002e-06, "loss": 17.2599, "step": 39147 }, { "epoch": 0.7155939824885298, "grad_norm": 7.3677255952821294, "learning_rate": 1.975538459213123e-06, "loss": 17.4319, "step": 39148 }, { "epoch": 0.7156122616849764, "grad_norm": 7.28752218092494, "learning_rate": 1.97530274836872e-06, "loss": 17.6449, "step": 39149 }, { "epoch": 0.7156305408814229, "grad_norm": 5.9841100461988335, "learning_rate": 1.9750670481256174e-06, "loss": 17.356, "step": 39150 }, { "epoch": 0.7156488200778693, "grad_norm": 6.103282891721692, "learning_rate": 1.9748313584846445e-06, "loss": 17.4931, "step": 39151 }, { "epoch": 0.7156670992743159, "grad_norm": 6.2447127835903204, "learning_rate": 1.9745956794466254e-06, "loss": 17.2888, "step": 39152 }, { "epoch": 0.7156853784707624, "grad_norm": 5.801523314699449, "learning_rate": 1.974360011012384e-06, "loss": 17.2009, "step": 39153 }, { "epoch": 0.715703657667209, "grad_norm": 6.568284425389046, "learning_rate": 1.97412435318275e-06, "loss": 17.5391, "step": 39154 }, { "epoch": 0.7157219368636555, "grad_norm": 5.692635883709638, "learning_rate": 1.9738887059585472e-06, "loss": 17.1226, "step": 39155 }, { "epoch": 0.715740216060102, "grad_norm": 6.974509154630059, "learning_rate": 1.9736530693406e-06, "loss": 17.44, "step": 39156 }, { "epoch": 0.7157584952565486, "grad_norm": 17.668348343693673, "learning_rate": 1.973417443329739e-06, "loss": 17.9256, "step": 39157 }, { "epoch": 0.715776774452995, "grad_norm": 9.698166697212946, "learning_rate": 1.9731818279267843e-06, "loss": 18.4755, "step": 39158 }, { "epoch": 0.7157950536494416, "grad_norm": 6.894344667644268, "learning_rate": 1.972946223132565e-06, "loss": 17.6744, "step": 39159 }, { "epoch": 0.7158133328458881, "grad_norm": 7.480576045563848, "learning_rate": 1.972710628947909e-06, "loss": 18.0814, "step": 39160 }, { "epoch": 0.7158316120423346, "grad_norm": 5.017834281386182, "learning_rate": 1.9724750453736387e-06, "loss": 16.9548, "step": 39161 }, { "epoch": 0.7158498912387812, "grad_norm": 5.853393257492259, "learning_rate": 1.9722394724105793e-06, "loss": 17.2293, "step": 39162 }, { "epoch": 0.7158681704352277, "grad_norm": 5.9197565098643, "learning_rate": 1.972003910059559e-06, "loss": 17.4065, "step": 39163 }, { "epoch": 0.7158864496316741, "grad_norm": 5.387955131636843, "learning_rate": 1.971768358321403e-06, "loss": 17.125, "step": 39164 }, { "epoch": 0.7159047288281207, "grad_norm": 6.490992464561458, "learning_rate": 1.971532817196934e-06, "loss": 17.5262, "step": 39165 }, { "epoch": 0.7159230080245672, "grad_norm": 7.239001668242428, "learning_rate": 1.971297286686981e-06, "loss": 17.653, "step": 39166 }, { "epoch": 0.7159412872210138, "grad_norm": 8.74801290930161, "learning_rate": 1.971061766792367e-06, "loss": 18.1816, "step": 39167 }, { "epoch": 0.7159595664174603, "grad_norm": 6.894905442895292, "learning_rate": 1.9708262575139187e-06, "loss": 17.5494, "step": 39168 }, { "epoch": 0.7159778456139068, "grad_norm": 6.986028471548376, "learning_rate": 1.9705907588524632e-06, "loss": 17.757, "step": 39169 }, { "epoch": 0.7159961248103534, "grad_norm": 6.003505639487606, "learning_rate": 1.9703552708088224e-06, "loss": 17.3924, "step": 39170 }, { "epoch": 0.7160144040067998, "grad_norm": 6.137635223905296, "learning_rate": 1.970119793383825e-06, "loss": 17.2944, "step": 39171 }, { "epoch": 0.7160326832032464, "grad_norm": 6.8055659908563895, "learning_rate": 1.9698843265782953e-06, "loss": 17.5725, "step": 39172 }, { "epoch": 0.7160509623996929, "grad_norm": 5.2763038240359155, "learning_rate": 1.969648870393056e-06, "loss": 17.2753, "step": 39173 }, { "epoch": 0.7160692415961394, "grad_norm": 5.383254603892881, "learning_rate": 1.9694134248289367e-06, "loss": 17.0739, "step": 39174 }, { "epoch": 0.716087520792586, "grad_norm": 5.782253275331273, "learning_rate": 1.96917798988676e-06, "loss": 17.3808, "step": 39175 }, { "epoch": 0.7161057999890325, "grad_norm": 8.584504252607617, "learning_rate": 1.968942565567349e-06, "loss": 17.4969, "step": 39176 }, { "epoch": 0.716124079185479, "grad_norm": 5.695859244997523, "learning_rate": 1.9687071518715318e-06, "loss": 17.3321, "step": 39177 }, { "epoch": 0.7161423583819255, "grad_norm": 6.8344515904820575, "learning_rate": 1.9684717488001347e-06, "loss": 17.5178, "step": 39178 }, { "epoch": 0.716160637578372, "grad_norm": 5.93758998231086, "learning_rate": 1.9682363563539795e-06, "loss": 17.352, "step": 39179 }, { "epoch": 0.7161789167748186, "grad_norm": 5.656933967974621, "learning_rate": 1.9680009745338946e-06, "loss": 17.2204, "step": 39180 }, { "epoch": 0.7161971959712651, "grad_norm": 5.8393146879020925, "learning_rate": 1.9677656033407015e-06, "loss": 17.5151, "step": 39181 }, { "epoch": 0.7162154751677117, "grad_norm": 8.056864595875382, "learning_rate": 1.9675302427752287e-06, "loss": 17.5744, "step": 39182 }, { "epoch": 0.7162337543641581, "grad_norm": 5.523560349854018, "learning_rate": 1.9672948928382982e-06, "loss": 17.1462, "step": 39183 }, { "epoch": 0.7162520335606046, "grad_norm": 6.217832786164489, "learning_rate": 1.967059553530735e-06, "loss": 17.3463, "step": 39184 }, { "epoch": 0.7162703127570512, "grad_norm": 5.156642038465527, "learning_rate": 1.966824224853367e-06, "loss": 17.0274, "step": 39185 }, { "epoch": 0.7162885919534977, "grad_norm": 6.682462177518995, "learning_rate": 1.966588906807014e-06, "loss": 17.3111, "step": 39186 }, { "epoch": 0.7163068711499443, "grad_norm": 7.427642797706944, "learning_rate": 1.9663535993925048e-06, "loss": 17.9646, "step": 39187 }, { "epoch": 0.7163251503463908, "grad_norm": 6.460168936922803, "learning_rate": 1.9661183026106644e-06, "loss": 17.2751, "step": 39188 }, { "epoch": 0.7163434295428373, "grad_norm": 8.35388507476553, "learning_rate": 1.965883016462316e-06, "loss": 18.0824, "step": 39189 }, { "epoch": 0.7163617087392838, "grad_norm": 9.617364416528615, "learning_rate": 1.965647740948283e-06, "loss": 17.5235, "step": 39190 }, { "epoch": 0.7163799879357303, "grad_norm": 9.927862415493603, "learning_rate": 1.965412476069392e-06, "loss": 17.5461, "step": 39191 }, { "epoch": 0.7163982671321769, "grad_norm": 6.436996534522768, "learning_rate": 1.9651772218264675e-06, "loss": 17.1859, "step": 39192 }, { "epoch": 0.7164165463286234, "grad_norm": 6.423349130022663, "learning_rate": 1.9649419782203324e-06, "loss": 17.3564, "step": 39193 }, { "epoch": 0.7164348255250699, "grad_norm": 5.792247955035021, "learning_rate": 1.964706745251813e-06, "loss": 17.0998, "step": 39194 }, { "epoch": 0.7164531047215165, "grad_norm": 4.987986120016888, "learning_rate": 1.964471522921732e-06, "loss": 17.1301, "step": 39195 }, { "epoch": 0.716471383917963, "grad_norm": 7.161302431067538, "learning_rate": 1.964236311230915e-06, "loss": 17.8251, "step": 39196 }, { "epoch": 0.7164896631144095, "grad_norm": 4.832136788048623, "learning_rate": 1.9640011101801874e-06, "loss": 16.8926, "step": 39197 }, { "epoch": 0.716507942310856, "grad_norm": 6.908237401643526, "learning_rate": 1.9637659197703713e-06, "loss": 17.6641, "step": 39198 }, { "epoch": 0.7165262215073025, "grad_norm": 5.944085774810534, "learning_rate": 1.9635307400022934e-06, "loss": 17.1098, "step": 39199 }, { "epoch": 0.7165445007037491, "grad_norm": 5.464248195180865, "learning_rate": 1.963295570876777e-06, "loss": 17.0681, "step": 39200 }, { "epoch": 0.7165627799001956, "grad_norm": 5.435219507876889, "learning_rate": 1.9630604123946444e-06, "loss": 17.1254, "step": 39201 }, { "epoch": 0.7165810590966422, "grad_norm": 6.276051456278294, "learning_rate": 1.9628252645567237e-06, "loss": 17.2106, "step": 39202 }, { "epoch": 0.7165993382930886, "grad_norm": 7.084983013189498, "learning_rate": 1.9625901273638345e-06, "loss": 17.6481, "step": 39203 }, { "epoch": 0.7166176174895351, "grad_norm": 5.890352783975352, "learning_rate": 1.962355000816805e-06, "loss": 17.0549, "step": 39204 }, { "epoch": 0.7166358966859817, "grad_norm": 7.257014631602465, "learning_rate": 1.962119884916456e-06, "loss": 18.1682, "step": 39205 }, { "epoch": 0.7166541758824282, "grad_norm": 5.810959172118826, "learning_rate": 1.9618847796636153e-06, "loss": 17.0495, "step": 39206 }, { "epoch": 0.7166724550788748, "grad_norm": 6.624505340808344, "learning_rate": 1.961649685059103e-06, "loss": 17.4194, "step": 39207 }, { "epoch": 0.7166907342753213, "grad_norm": 7.166515854192257, "learning_rate": 1.9614146011037466e-06, "loss": 18.1214, "step": 39208 }, { "epoch": 0.7167090134717677, "grad_norm": 6.521027071827131, "learning_rate": 1.9611795277983687e-06, "loss": 17.5965, "step": 39209 }, { "epoch": 0.7167272926682143, "grad_norm": 6.716235507162784, "learning_rate": 1.96094446514379e-06, "loss": 17.6771, "step": 39210 }, { "epoch": 0.7167455718646608, "grad_norm": 7.174223233295803, "learning_rate": 1.96070941314084e-06, "loss": 17.8621, "step": 39211 }, { "epoch": 0.7167638510611074, "grad_norm": 5.757212228185044, "learning_rate": 1.960474371790338e-06, "loss": 17.366, "step": 39212 }, { "epoch": 0.7167821302575539, "grad_norm": 6.20559313496134, "learning_rate": 1.960239341093109e-06, "loss": 17.3053, "step": 39213 }, { "epoch": 0.7168004094540004, "grad_norm": 6.845090892967886, "learning_rate": 1.9600043210499794e-06, "loss": 17.5493, "step": 39214 }, { "epoch": 0.716818688650447, "grad_norm": 7.473146082684853, "learning_rate": 1.959769311661769e-06, "loss": 17.4974, "step": 39215 }, { "epoch": 0.7168369678468934, "grad_norm": 5.845909241007447, "learning_rate": 1.959534312929305e-06, "loss": 17.2234, "step": 39216 }, { "epoch": 0.71685524704334, "grad_norm": 5.433575057911557, "learning_rate": 1.95929932485341e-06, "loss": 17.1324, "step": 39217 }, { "epoch": 0.7168735262397865, "grad_norm": 5.748655736336965, "learning_rate": 1.959064347434904e-06, "loss": 17.1342, "step": 39218 }, { "epoch": 0.716891805436233, "grad_norm": 6.286546436986636, "learning_rate": 1.958829380674617e-06, "loss": 17.2124, "step": 39219 }, { "epoch": 0.7169100846326796, "grad_norm": 7.161879717141847, "learning_rate": 1.958594424573368e-06, "loss": 17.7309, "step": 39220 }, { "epoch": 0.7169283638291261, "grad_norm": 6.22768557001936, "learning_rate": 1.95835947913198e-06, "loss": 17.4491, "step": 39221 }, { "epoch": 0.7169466430255726, "grad_norm": 7.131039141852286, "learning_rate": 1.958124544351278e-06, "loss": 17.6333, "step": 39222 }, { "epoch": 0.7169649222220191, "grad_norm": 6.25332165952202, "learning_rate": 1.9578896202320874e-06, "loss": 17.2044, "step": 39223 }, { "epoch": 0.7169832014184656, "grad_norm": 6.907262536915764, "learning_rate": 1.9576547067752284e-06, "loss": 17.5798, "step": 39224 }, { "epoch": 0.7170014806149122, "grad_norm": 6.061444452317176, "learning_rate": 1.9574198039815266e-06, "loss": 17.1951, "step": 39225 }, { "epoch": 0.7170197598113587, "grad_norm": 7.166547104074781, "learning_rate": 1.957184911851803e-06, "loss": 17.4751, "step": 39226 }, { "epoch": 0.7170380390078053, "grad_norm": 5.682592242338484, "learning_rate": 1.9569500303868843e-06, "loss": 17.1791, "step": 39227 }, { "epoch": 0.7170563182042518, "grad_norm": 6.415415347591846, "learning_rate": 1.956715159587591e-06, "loss": 17.4502, "step": 39228 }, { "epoch": 0.7170745974006982, "grad_norm": 7.569899114229563, "learning_rate": 1.956480299454746e-06, "loss": 17.8568, "step": 39229 }, { "epoch": 0.7170928765971448, "grad_norm": 6.150089035470022, "learning_rate": 1.956245449989175e-06, "loss": 17.4895, "step": 39230 }, { "epoch": 0.7171111557935913, "grad_norm": 6.076365199535161, "learning_rate": 1.956010611191697e-06, "loss": 17.5285, "step": 39231 }, { "epoch": 0.7171294349900378, "grad_norm": 6.296856575198952, "learning_rate": 1.9557757830631386e-06, "loss": 17.2441, "step": 39232 }, { "epoch": 0.7171477141864844, "grad_norm": 6.115590125185958, "learning_rate": 1.9555409656043233e-06, "loss": 17.2776, "step": 39233 }, { "epoch": 0.7171659933829309, "grad_norm": 6.123788811167337, "learning_rate": 1.9553061588160725e-06, "loss": 17.1334, "step": 39234 }, { "epoch": 0.7171842725793774, "grad_norm": 5.133277606814394, "learning_rate": 1.9550713626992086e-06, "loss": 16.862, "step": 39235 }, { "epoch": 0.7172025517758239, "grad_norm": 6.839796401451268, "learning_rate": 1.954836577254556e-06, "loss": 17.8506, "step": 39236 }, { "epoch": 0.7172208309722704, "grad_norm": 6.209287818572686, "learning_rate": 1.954601802482938e-06, "loss": 17.4363, "step": 39237 }, { "epoch": 0.717239110168717, "grad_norm": 5.272416810351985, "learning_rate": 1.9543670383851738e-06, "loss": 17.0816, "step": 39238 }, { "epoch": 0.7172573893651635, "grad_norm": 5.2322946298298785, "learning_rate": 1.954132284962091e-06, "loss": 17.1258, "step": 39239 }, { "epoch": 0.7172756685616101, "grad_norm": 9.596324132864991, "learning_rate": 1.953897542214508e-06, "loss": 17.6901, "step": 39240 }, { "epoch": 0.7172939477580565, "grad_norm": 7.025096780950844, "learning_rate": 1.9536628101432502e-06, "loss": 17.7958, "step": 39241 }, { "epoch": 0.717312226954503, "grad_norm": 7.0421900796395995, "learning_rate": 1.9534280887491414e-06, "loss": 17.5161, "step": 39242 }, { "epoch": 0.7173305061509496, "grad_norm": 6.370394547919961, "learning_rate": 1.9531933780330014e-06, "loss": 17.2513, "step": 39243 }, { "epoch": 0.7173487853473961, "grad_norm": 4.64065749265614, "learning_rate": 1.952958677995656e-06, "loss": 16.6857, "step": 39244 }, { "epoch": 0.7173670645438427, "grad_norm": 6.023288744035348, "learning_rate": 1.9527239886379257e-06, "loss": 17.3298, "step": 39245 }, { "epoch": 0.7173853437402892, "grad_norm": 6.3030812637244, "learning_rate": 1.952489309960632e-06, "loss": 17.6209, "step": 39246 }, { "epoch": 0.7174036229367357, "grad_norm": 6.02655099711575, "learning_rate": 1.9522546419646e-06, "loss": 17.2881, "step": 39247 }, { "epoch": 0.7174219021331822, "grad_norm": 6.620859277913272, "learning_rate": 1.9520199846506515e-06, "loss": 17.556, "step": 39248 }, { "epoch": 0.7174401813296287, "grad_norm": 5.793896825744386, "learning_rate": 1.951785338019606e-06, "loss": 17.0219, "step": 39249 }, { "epoch": 0.7174584605260753, "grad_norm": 6.2586711186029484, "learning_rate": 1.951550702072289e-06, "loss": 17.5213, "step": 39250 }, { "epoch": 0.7174767397225218, "grad_norm": 5.62566921831178, "learning_rate": 1.951316076809524e-06, "loss": 17.2308, "step": 39251 }, { "epoch": 0.7174950189189683, "grad_norm": 6.004346813297242, "learning_rate": 1.95108146223213e-06, "loss": 17.5136, "step": 39252 }, { "epoch": 0.7175132981154149, "grad_norm": 7.826059268402137, "learning_rate": 1.9508468583409324e-06, "loss": 18.0127, "step": 39253 }, { "epoch": 0.7175315773118613, "grad_norm": 6.116428100748439, "learning_rate": 1.950612265136752e-06, "loss": 17.2544, "step": 39254 }, { "epoch": 0.7175498565083079, "grad_norm": 7.571213459178402, "learning_rate": 1.950377682620409e-06, "loss": 17.8926, "step": 39255 }, { "epoch": 0.7175681357047544, "grad_norm": 6.192027985119857, "learning_rate": 1.9501431107927297e-06, "loss": 17.4572, "step": 39256 }, { "epoch": 0.7175864149012009, "grad_norm": 7.66448857380099, "learning_rate": 1.949908549654532e-06, "loss": 17.8076, "step": 39257 }, { "epoch": 0.7176046940976475, "grad_norm": 7.286769342268417, "learning_rate": 1.9496739992066425e-06, "loss": 17.6241, "step": 39258 }, { "epoch": 0.717622973294094, "grad_norm": 6.909398248296079, "learning_rate": 1.9494394594498787e-06, "loss": 17.6078, "step": 39259 }, { "epoch": 0.7176412524905406, "grad_norm": 6.29754448658617, "learning_rate": 1.949204930385065e-06, "loss": 17.27, "step": 39260 }, { "epoch": 0.717659531686987, "grad_norm": 6.206032101003143, "learning_rate": 1.9489704120130254e-06, "loss": 17.4579, "step": 39261 }, { "epoch": 0.7176778108834335, "grad_norm": 6.4287415216159065, "learning_rate": 1.9487359043345796e-06, "loss": 17.4591, "step": 39262 }, { "epoch": 0.7176960900798801, "grad_norm": 5.859741958855865, "learning_rate": 1.9485014073505477e-06, "loss": 17.1053, "step": 39263 }, { "epoch": 0.7177143692763266, "grad_norm": 5.19307844409025, "learning_rate": 1.948266921061755e-06, "loss": 16.9188, "step": 39264 }, { "epoch": 0.7177326484727732, "grad_norm": 7.365990706242848, "learning_rate": 1.9480324454690226e-06, "loss": 17.9473, "step": 39265 }, { "epoch": 0.7177509276692197, "grad_norm": 6.426574751587902, "learning_rate": 1.9477979805731695e-06, "loss": 17.3267, "step": 39266 }, { "epoch": 0.7177692068656661, "grad_norm": 5.742805974163269, "learning_rate": 1.9475635263750215e-06, "loss": 17.1548, "step": 39267 }, { "epoch": 0.7177874860621127, "grad_norm": 6.4847941319432545, "learning_rate": 1.9473290828753965e-06, "loss": 17.1922, "step": 39268 }, { "epoch": 0.7178057652585592, "grad_norm": 7.3551012487419545, "learning_rate": 1.947094650075118e-06, "loss": 17.606, "step": 39269 }, { "epoch": 0.7178240444550058, "grad_norm": 6.717863884546829, "learning_rate": 1.9468602279750105e-06, "loss": 17.5621, "step": 39270 }, { "epoch": 0.7178423236514523, "grad_norm": 6.4726179433888555, "learning_rate": 1.9466258165758924e-06, "loss": 17.3566, "step": 39271 }, { "epoch": 0.7178606028478988, "grad_norm": 6.4364166946527614, "learning_rate": 1.9463914158785837e-06, "loss": 17.5312, "step": 39272 }, { "epoch": 0.7178788820443454, "grad_norm": 7.876904043365558, "learning_rate": 1.94615702588391e-06, "loss": 18.3249, "step": 39273 }, { "epoch": 0.7178971612407918, "grad_norm": 6.317814742907593, "learning_rate": 1.9459226465926893e-06, "loss": 17.424, "step": 39274 }, { "epoch": 0.7179154404372384, "grad_norm": 6.494194175093521, "learning_rate": 1.9456882780057466e-06, "loss": 17.5196, "step": 39275 }, { "epoch": 0.7179337196336849, "grad_norm": 5.6893749420778645, "learning_rate": 1.9454539201238997e-06, "loss": 17.2914, "step": 39276 }, { "epoch": 0.7179519988301314, "grad_norm": 6.086319598494527, "learning_rate": 1.9452195729479725e-06, "loss": 17.1932, "step": 39277 }, { "epoch": 0.717970278026578, "grad_norm": 6.0497283108756434, "learning_rate": 1.9449852364787865e-06, "loss": 17.2801, "step": 39278 }, { "epoch": 0.7179885572230245, "grad_norm": 5.934680910913319, "learning_rate": 1.944750910717162e-06, "loss": 17.2541, "step": 39279 }, { "epoch": 0.718006836419471, "grad_norm": 7.222591625112228, "learning_rate": 1.9445165956639188e-06, "loss": 17.659, "step": 39280 }, { "epoch": 0.7180251156159175, "grad_norm": 6.621084914609206, "learning_rate": 1.9442822913198816e-06, "loss": 17.5386, "step": 39281 }, { "epoch": 0.718043394812364, "grad_norm": 5.345424438883728, "learning_rate": 1.94404799768587e-06, "loss": 16.9112, "step": 39282 }, { "epoch": 0.7180616740088106, "grad_norm": 5.778487204190319, "learning_rate": 1.9438137147627027e-06, "loss": 17.143, "step": 39283 }, { "epoch": 0.7180799532052571, "grad_norm": 6.066945457558513, "learning_rate": 1.9435794425512055e-06, "loss": 17.3343, "step": 39284 }, { "epoch": 0.7180982324017037, "grad_norm": 7.902068882596495, "learning_rate": 1.9433451810521954e-06, "loss": 17.9279, "step": 39285 }, { "epoch": 0.7181165115981502, "grad_norm": 6.377746692632964, "learning_rate": 1.9431109302664945e-06, "loss": 17.2915, "step": 39286 }, { "epoch": 0.7181347907945966, "grad_norm": 5.91111982674322, "learning_rate": 1.942876690194927e-06, "loss": 17.1223, "step": 39287 }, { "epoch": 0.7181530699910432, "grad_norm": 6.464189949707836, "learning_rate": 1.9426424608383095e-06, "loss": 17.6916, "step": 39288 }, { "epoch": 0.7181713491874897, "grad_norm": 6.253727898817353, "learning_rate": 1.942408242197466e-06, "loss": 17.29, "step": 39289 }, { "epoch": 0.7181896283839363, "grad_norm": 5.64134901165618, "learning_rate": 1.942174034273217e-06, "loss": 17.1219, "step": 39290 }, { "epoch": 0.7182079075803828, "grad_norm": 7.045897309639793, "learning_rate": 1.9419398370663807e-06, "loss": 17.6145, "step": 39291 }, { "epoch": 0.7182261867768293, "grad_norm": 6.599328615042788, "learning_rate": 1.9417056505777814e-06, "loss": 17.4186, "step": 39292 }, { "epoch": 0.7182444659732758, "grad_norm": 5.798116339202818, "learning_rate": 1.9414714748082384e-06, "loss": 17.5279, "step": 39293 }, { "epoch": 0.7182627451697223, "grad_norm": 6.561890660098446, "learning_rate": 1.9412373097585712e-06, "loss": 17.305, "step": 39294 }, { "epoch": 0.7182810243661689, "grad_norm": 5.075477431641121, "learning_rate": 1.941003155429602e-06, "loss": 17.0402, "step": 39295 }, { "epoch": 0.7182993035626154, "grad_norm": 6.368407021733802, "learning_rate": 1.9407690118221522e-06, "loss": 17.5938, "step": 39296 }, { "epoch": 0.7183175827590619, "grad_norm": 5.6122429200589306, "learning_rate": 1.9405348789370403e-06, "loss": 17.1869, "step": 39297 }, { "epoch": 0.7183358619555085, "grad_norm": 6.60267988665345, "learning_rate": 1.94030075677509e-06, "loss": 17.8557, "step": 39298 }, { "epoch": 0.718354141151955, "grad_norm": 5.974556827270905, "learning_rate": 1.94006664533712e-06, "loss": 17.3664, "step": 39299 }, { "epoch": 0.7183724203484014, "grad_norm": 7.049861071945703, "learning_rate": 1.939832544623949e-06, "loss": 17.7633, "step": 39300 }, { "epoch": 0.718390699544848, "grad_norm": 6.251488284205999, "learning_rate": 1.939598454636401e-06, "loss": 17.518, "step": 39301 }, { "epoch": 0.7184089787412945, "grad_norm": 8.473164098461718, "learning_rate": 1.939364375375295e-06, "loss": 17.9463, "step": 39302 }, { "epoch": 0.7184272579377411, "grad_norm": 6.283131523539654, "learning_rate": 1.9391303068414495e-06, "loss": 17.3054, "step": 39303 }, { "epoch": 0.7184455371341876, "grad_norm": 6.004411861124729, "learning_rate": 1.938896249035687e-06, "loss": 17.5136, "step": 39304 }, { "epoch": 0.718463816330634, "grad_norm": 7.222691755270159, "learning_rate": 1.938662201958827e-06, "loss": 18.4113, "step": 39305 }, { "epoch": 0.7184820955270806, "grad_norm": 5.707728204766985, "learning_rate": 1.938428165611692e-06, "loss": 16.9662, "step": 39306 }, { "epoch": 0.7185003747235271, "grad_norm": 6.120831617948992, "learning_rate": 1.938194139995101e-06, "loss": 17.4883, "step": 39307 }, { "epoch": 0.7185186539199737, "grad_norm": 6.872005540486926, "learning_rate": 1.9379601251098716e-06, "loss": 17.7146, "step": 39308 }, { "epoch": 0.7185369331164202, "grad_norm": 5.763254742300119, "learning_rate": 1.9377261209568287e-06, "loss": 17.1607, "step": 39309 }, { "epoch": 0.7185552123128667, "grad_norm": 7.076702712716059, "learning_rate": 1.9374921275367898e-06, "loss": 17.7638, "step": 39310 }, { "epoch": 0.7185734915093133, "grad_norm": 6.130172464874344, "learning_rate": 1.9372581448505734e-06, "loss": 17.5456, "step": 39311 }, { "epoch": 0.7185917707057597, "grad_norm": 5.74530356945098, "learning_rate": 1.937024172899003e-06, "loss": 17.3458, "step": 39312 }, { "epoch": 0.7186100499022063, "grad_norm": 6.316034801705493, "learning_rate": 1.9367902116828957e-06, "loss": 17.5549, "step": 39313 }, { "epoch": 0.7186283290986528, "grad_norm": 4.806528119761489, "learning_rate": 1.9365562612030726e-06, "loss": 16.7913, "step": 39314 }, { "epoch": 0.7186466082950993, "grad_norm": 5.449955468448705, "learning_rate": 1.936322321460356e-06, "loss": 17.1281, "step": 39315 }, { "epoch": 0.7186648874915459, "grad_norm": 4.691724638233973, "learning_rate": 1.9360883924555633e-06, "loss": 16.9647, "step": 39316 }, { "epoch": 0.7186831666879924, "grad_norm": 7.629724339622163, "learning_rate": 1.9358544741895137e-06, "loss": 17.4523, "step": 39317 }, { "epoch": 0.718701445884439, "grad_norm": 6.223680130868028, "learning_rate": 1.9356205666630294e-06, "loss": 17.244, "step": 39318 }, { "epoch": 0.7187197250808854, "grad_norm": 7.081202904680963, "learning_rate": 1.935386669876928e-06, "loss": 17.8367, "step": 39319 }, { "epoch": 0.7187380042773319, "grad_norm": 5.066612357228023, "learning_rate": 1.9351527838320316e-06, "loss": 17.0471, "step": 39320 }, { "epoch": 0.7187562834737785, "grad_norm": 7.963248867510057, "learning_rate": 1.934918908529159e-06, "loss": 17.9921, "step": 39321 }, { "epoch": 0.718774562670225, "grad_norm": 6.539327529713846, "learning_rate": 1.9346850439691284e-06, "loss": 17.4746, "step": 39322 }, { "epoch": 0.7187928418666716, "grad_norm": 5.274675587946325, "learning_rate": 1.9344511901527603e-06, "loss": 16.9159, "step": 39323 }, { "epoch": 0.7188111210631181, "grad_norm": 6.858003208641633, "learning_rate": 1.934217347080876e-06, "loss": 17.553, "step": 39324 }, { "epoch": 0.7188294002595645, "grad_norm": 7.090606240010812, "learning_rate": 1.9339835147542927e-06, "loss": 17.6759, "step": 39325 }, { "epoch": 0.7188476794560111, "grad_norm": 4.644422226198222, "learning_rate": 1.933749693173833e-06, "loss": 16.6867, "step": 39326 }, { "epoch": 0.7188659586524576, "grad_norm": 5.6765943288991405, "learning_rate": 1.9335158823403146e-06, "loss": 17.2742, "step": 39327 }, { "epoch": 0.7188842378489042, "grad_norm": 5.551778595451323, "learning_rate": 1.9332820822545544e-06, "loss": 17.1522, "step": 39328 }, { "epoch": 0.7189025170453507, "grad_norm": 7.501211915539057, "learning_rate": 1.9330482929173767e-06, "loss": 17.4069, "step": 39329 }, { "epoch": 0.7189207962417972, "grad_norm": 6.616603802113869, "learning_rate": 1.932814514329597e-06, "loss": 17.5792, "step": 39330 }, { "epoch": 0.7189390754382438, "grad_norm": 6.34977799136098, "learning_rate": 1.932580746492038e-06, "loss": 17.2342, "step": 39331 }, { "epoch": 0.7189573546346902, "grad_norm": 6.60200592390713, "learning_rate": 1.9323469894055147e-06, "loss": 17.3058, "step": 39332 }, { "epoch": 0.7189756338311368, "grad_norm": 6.873831286289884, "learning_rate": 1.9321132430708517e-06, "loss": 17.6659, "step": 39333 }, { "epoch": 0.7189939130275833, "grad_norm": 5.790872163613136, "learning_rate": 1.9318795074888633e-06, "loss": 17.3984, "step": 39334 }, { "epoch": 0.7190121922240298, "grad_norm": 6.381833974996713, "learning_rate": 1.931645782660373e-06, "loss": 17.5669, "step": 39335 }, { "epoch": 0.7190304714204764, "grad_norm": 5.1652913631574195, "learning_rate": 1.931412068586196e-06, "loss": 17.0367, "step": 39336 }, { "epoch": 0.7190487506169229, "grad_norm": 6.5239897199887915, "learning_rate": 1.9311783652671555e-06, "loss": 17.7233, "step": 39337 }, { "epoch": 0.7190670298133695, "grad_norm": 6.652590780213178, "learning_rate": 1.930944672704068e-06, "loss": 17.7814, "step": 39338 }, { "epoch": 0.7190853090098159, "grad_norm": 8.700023335460655, "learning_rate": 1.9307109908977523e-06, "loss": 18.3011, "step": 39339 }, { "epoch": 0.7191035882062624, "grad_norm": 5.646691250729527, "learning_rate": 1.9304773198490273e-06, "loss": 17.0748, "step": 39340 }, { "epoch": 0.719121867402709, "grad_norm": 5.6134313857541445, "learning_rate": 1.930243659558715e-06, "loss": 17.2228, "step": 39341 }, { "epoch": 0.7191401465991555, "grad_norm": 6.019192948439349, "learning_rate": 1.930010010027631e-06, "loss": 17.4214, "step": 39342 }, { "epoch": 0.7191584257956021, "grad_norm": 8.005399538280857, "learning_rate": 1.9297763712565964e-06, "loss": 18.1542, "step": 39343 }, { "epoch": 0.7191767049920486, "grad_norm": 5.7355022533832285, "learning_rate": 1.92954274324643e-06, "loss": 17.3975, "step": 39344 }, { "epoch": 0.719194984188495, "grad_norm": 7.6428213164247785, "learning_rate": 1.9293091259979474e-06, "loss": 17.3997, "step": 39345 }, { "epoch": 0.7192132633849416, "grad_norm": 8.635105334667475, "learning_rate": 1.929075519511972e-06, "loss": 18.5931, "step": 39346 }, { "epoch": 0.7192315425813881, "grad_norm": 5.918652956351081, "learning_rate": 1.92884192378932e-06, "loss": 17.1433, "step": 39347 }, { "epoch": 0.7192498217778347, "grad_norm": 7.572583933060386, "learning_rate": 1.9286083388308097e-06, "loss": 17.7527, "step": 39348 }, { "epoch": 0.7192681009742812, "grad_norm": 6.4616994731079815, "learning_rate": 1.9283747646372597e-06, "loss": 17.5964, "step": 39349 }, { "epoch": 0.7192863801707277, "grad_norm": 6.7993360305386465, "learning_rate": 1.9281412012094904e-06, "loss": 17.6777, "step": 39350 }, { "epoch": 0.7193046593671742, "grad_norm": 9.072086387085568, "learning_rate": 1.9279076485483204e-06, "loss": 18.0048, "step": 39351 }, { "epoch": 0.7193229385636207, "grad_norm": 4.941401185240069, "learning_rate": 1.9276741066545685e-06, "loss": 17.0054, "step": 39352 }, { "epoch": 0.7193412177600673, "grad_norm": 5.41174545902238, "learning_rate": 1.92744057552905e-06, "loss": 16.9141, "step": 39353 }, { "epoch": 0.7193594969565138, "grad_norm": 6.76291249620265, "learning_rate": 1.9272070551725874e-06, "loss": 17.6326, "step": 39354 }, { "epoch": 0.7193777761529603, "grad_norm": 6.019905707947332, "learning_rate": 1.9269735455859977e-06, "loss": 17.3587, "step": 39355 }, { "epoch": 0.7193960553494069, "grad_norm": 5.69406100205934, "learning_rate": 1.926740046770097e-06, "loss": 17.3658, "step": 39356 }, { "epoch": 0.7194143345458534, "grad_norm": 7.118697006871791, "learning_rate": 1.9265065587257077e-06, "loss": 17.8554, "step": 39357 }, { "epoch": 0.7194326137422999, "grad_norm": 4.970320032696135, "learning_rate": 1.9262730814536448e-06, "loss": 17.0134, "step": 39358 }, { "epoch": 0.7194508929387464, "grad_norm": 6.092076069069587, "learning_rate": 1.9260396149547275e-06, "loss": 17.2018, "step": 39359 }, { "epoch": 0.7194691721351929, "grad_norm": 6.5114723986960215, "learning_rate": 1.925806159229776e-06, "loss": 17.5983, "step": 39360 }, { "epoch": 0.7194874513316395, "grad_norm": 7.577543727911575, "learning_rate": 1.925572714279608e-06, "loss": 17.6515, "step": 39361 }, { "epoch": 0.719505730528086, "grad_norm": 6.643217457037883, "learning_rate": 1.9253392801050386e-06, "loss": 17.5144, "step": 39362 }, { "epoch": 0.7195240097245326, "grad_norm": 6.373509655256605, "learning_rate": 1.92510585670689e-06, "loss": 17.4731, "step": 39363 }, { "epoch": 0.719542288920979, "grad_norm": 5.5843413620139835, "learning_rate": 1.924872444085979e-06, "loss": 17.2447, "step": 39364 }, { "epoch": 0.7195605681174255, "grad_norm": 7.312093634035649, "learning_rate": 1.924639042243121e-06, "loss": 17.5942, "step": 39365 }, { "epoch": 0.7195788473138721, "grad_norm": 5.2028935157779355, "learning_rate": 1.924405651179138e-06, "loss": 17.0154, "step": 39366 }, { "epoch": 0.7195971265103186, "grad_norm": 5.7386985659954055, "learning_rate": 1.9241722708948447e-06, "loss": 17.2927, "step": 39367 }, { "epoch": 0.7196154057067651, "grad_norm": 5.482845614692377, "learning_rate": 1.923938901391061e-06, "loss": 16.9046, "step": 39368 }, { "epoch": 0.7196336849032117, "grad_norm": 5.940735246447231, "learning_rate": 1.9237055426686058e-06, "loss": 17.2806, "step": 39369 }, { "epoch": 0.7196519640996581, "grad_norm": 5.273817539088839, "learning_rate": 1.923472194728294e-06, "loss": 17.1361, "step": 39370 }, { "epoch": 0.7196702432961047, "grad_norm": 6.314970838481352, "learning_rate": 1.923238857570947e-06, "loss": 17.4025, "step": 39371 }, { "epoch": 0.7196885224925512, "grad_norm": 5.793977928238878, "learning_rate": 1.923005531197381e-06, "loss": 17.3523, "step": 39372 }, { "epoch": 0.7197068016889977, "grad_norm": 5.4548448542738885, "learning_rate": 1.9227722156084116e-06, "loss": 17.3412, "step": 39373 }, { "epoch": 0.7197250808854443, "grad_norm": 5.368587255076508, "learning_rate": 1.92253891080486e-06, "loss": 17.0098, "step": 39374 }, { "epoch": 0.7197433600818908, "grad_norm": 7.648448603732145, "learning_rate": 1.9223056167875435e-06, "loss": 17.7862, "step": 39375 }, { "epoch": 0.7197616392783374, "grad_norm": 6.552843655203199, "learning_rate": 1.9220723335572766e-06, "loss": 17.482, "step": 39376 }, { "epoch": 0.7197799184747838, "grad_norm": 6.5506506788952485, "learning_rate": 1.9218390611148785e-06, "loss": 17.497, "step": 39377 }, { "epoch": 0.7197981976712303, "grad_norm": 6.361839445091338, "learning_rate": 1.92160579946117e-06, "loss": 17.2431, "step": 39378 }, { "epoch": 0.7198164768676769, "grad_norm": 5.886917811077173, "learning_rate": 1.9213725485969637e-06, "loss": 17.0916, "step": 39379 }, { "epoch": 0.7198347560641234, "grad_norm": 6.134665373861967, "learning_rate": 1.9211393085230814e-06, "loss": 17.1795, "step": 39380 }, { "epoch": 0.71985303526057, "grad_norm": 5.69948220979201, "learning_rate": 1.9209060792403365e-06, "loss": 17.4407, "step": 39381 }, { "epoch": 0.7198713144570165, "grad_norm": 7.014657703758205, "learning_rate": 1.920672860749551e-06, "loss": 17.9752, "step": 39382 }, { "epoch": 0.719889593653463, "grad_norm": 7.388537986340032, "learning_rate": 1.9204396530515394e-06, "loss": 17.4328, "step": 39383 }, { "epoch": 0.7199078728499095, "grad_norm": 5.746348744020305, "learning_rate": 1.9202064561471177e-06, "loss": 17.1286, "step": 39384 }, { "epoch": 0.719926152046356, "grad_norm": 5.874686800580906, "learning_rate": 1.9199732700371077e-06, "loss": 17.2717, "step": 39385 }, { "epoch": 0.7199444312428026, "grad_norm": 6.398596722933621, "learning_rate": 1.9197400947223217e-06, "loss": 17.4628, "step": 39386 }, { "epoch": 0.7199627104392491, "grad_norm": 7.441298226940803, "learning_rate": 1.9195069302035797e-06, "loss": 17.9859, "step": 39387 }, { "epoch": 0.7199809896356956, "grad_norm": 7.30062933886272, "learning_rate": 1.9192737764816997e-06, "loss": 18.0305, "step": 39388 }, { "epoch": 0.7199992688321422, "grad_norm": 4.865514495443596, "learning_rate": 1.9190406335574985e-06, "loss": 16.9012, "step": 39389 }, { "epoch": 0.7200175480285886, "grad_norm": 7.386905303061712, "learning_rate": 1.918807501431791e-06, "loss": 18.0204, "step": 39390 }, { "epoch": 0.7200358272250352, "grad_norm": 6.710761200178324, "learning_rate": 1.9185743801053967e-06, "loss": 17.1775, "step": 39391 }, { "epoch": 0.7200541064214817, "grad_norm": 5.619989783802573, "learning_rate": 1.918341269579132e-06, "loss": 17.1556, "step": 39392 }, { "epoch": 0.7200723856179282, "grad_norm": 5.251478624440169, "learning_rate": 1.9181081698538124e-06, "loss": 16.9001, "step": 39393 }, { "epoch": 0.7200906648143748, "grad_norm": 6.721758596862594, "learning_rate": 1.917875080930256e-06, "loss": 17.3701, "step": 39394 }, { "epoch": 0.7201089440108213, "grad_norm": 5.392822075224543, "learning_rate": 1.9176420028092817e-06, "loss": 16.8331, "step": 39395 }, { "epoch": 0.7201272232072679, "grad_norm": 5.621820418756486, "learning_rate": 1.917408935491703e-06, "loss": 16.9054, "step": 39396 }, { "epoch": 0.7201455024037143, "grad_norm": 4.866352874243959, "learning_rate": 1.917175878978341e-06, "loss": 16.9672, "step": 39397 }, { "epoch": 0.7201637816001608, "grad_norm": 7.492007509730173, "learning_rate": 1.9169428332700066e-06, "loss": 17.8611, "step": 39398 }, { "epoch": 0.7201820607966074, "grad_norm": 7.383302480548044, "learning_rate": 1.9167097983675233e-06, "loss": 17.7002, "step": 39399 }, { "epoch": 0.7202003399930539, "grad_norm": 6.516752891182845, "learning_rate": 1.916476774271704e-06, "loss": 17.8478, "step": 39400 }, { "epoch": 0.7202186191895005, "grad_norm": 7.661231436019741, "learning_rate": 1.9162437609833645e-06, "loss": 17.6358, "step": 39401 }, { "epoch": 0.720236898385947, "grad_norm": 7.012833821613501, "learning_rate": 1.9160107585033246e-06, "loss": 17.8895, "step": 39402 }, { "epoch": 0.7202551775823934, "grad_norm": 5.665860549280952, "learning_rate": 1.9157777668323975e-06, "loss": 17.1034, "step": 39403 }, { "epoch": 0.72027345677884, "grad_norm": 7.075618544517489, "learning_rate": 1.915544785971402e-06, "loss": 17.6402, "step": 39404 }, { "epoch": 0.7202917359752865, "grad_norm": 5.9792142863940665, "learning_rate": 1.915311815921156e-06, "loss": 17.3218, "step": 39405 }, { "epoch": 0.7203100151717331, "grad_norm": 8.18617830296226, "learning_rate": 1.915078856682474e-06, "loss": 18.4439, "step": 39406 }, { "epoch": 0.7203282943681796, "grad_norm": 5.681751269563135, "learning_rate": 1.9148459082561715e-06, "loss": 17.1647, "step": 39407 }, { "epoch": 0.7203465735646261, "grad_norm": 6.536984247076757, "learning_rate": 1.9146129706430684e-06, "loss": 17.2158, "step": 39408 }, { "epoch": 0.7203648527610726, "grad_norm": 5.998273003174219, "learning_rate": 1.9143800438439785e-06, "loss": 17.3491, "step": 39409 }, { "epoch": 0.7203831319575191, "grad_norm": 6.28759568822665, "learning_rate": 1.9141471278597172e-06, "loss": 17.3062, "step": 39410 }, { "epoch": 0.7204014111539657, "grad_norm": 6.358935937169589, "learning_rate": 1.9139142226911045e-06, "loss": 17.314, "step": 39411 }, { "epoch": 0.7204196903504122, "grad_norm": 7.098728541810903, "learning_rate": 1.9136813283389525e-06, "loss": 17.7141, "step": 39412 }, { "epoch": 0.7204379695468587, "grad_norm": 8.16435221673069, "learning_rate": 1.91344844480408e-06, "loss": 18.0396, "step": 39413 }, { "epoch": 0.7204562487433053, "grad_norm": 5.6889243584275455, "learning_rate": 1.913215572087305e-06, "loss": 17.1055, "step": 39414 }, { "epoch": 0.7204745279397518, "grad_norm": 7.0719948468624425, "learning_rate": 1.912982710189439e-06, "loss": 17.6812, "step": 39415 }, { "epoch": 0.7204928071361983, "grad_norm": 6.760657042983027, "learning_rate": 1.9127498591113027e-06, "loss": 17.891, "step": 39416 }, { "epoch": 0.7205110863326448, "grad_norm": 8.659622101770326, "learning_rate": 1.91251701885371e-06, "loss": 18.6892, "step": 39417 }, { "epoch": 0.7205293655290913, "grad_norm": 6.689360199675609, "learning_rate": 1.912284189417476e-06, "loss": 17.794, "step": 39418 }, { "epoch": 0.7205476447255379, "grad_norm": 6.024674243466785, "learning_rate": 1.9120513708034195e-06, "loss": 17.3078, "step": 39419 }, { "epoch": 0.7205659239219844, "grad_norm": 6.638102156480148, "learning_rate": 1.9118185630123554e-06, "loss": 17.3461, "step": 39420 }, { "epoch": 0.720584203118431, "grad_norm": 5.686133938217912, "learning_rate": 1.911585766045097e-06, "loss": 17.2325, "step": 39421 }, { "epoch": 0.7206024823148774, "grad_norm": 5.529387398034301, "learning_rate": 1.911352979902462e-06, "loss": 17.0414, "step": 39422 }, { "epoch": 0.7206207615113239, "grad_norm": 4.915187505948289, "learning_rate": 1.9111202045852695e-06, "loss": 16.8259, "step": 39423 }, { "epoch": 0.7206390407077705, "grad_norm": 5.655260659953581, "learning_rate": 1.9108874400943306e-06, "loss": 17.0742, "step": 39424 }, { "epoch": 0.720657319904217, "grad_norm": 6.736554575169247, "learning_rate": 1.9106546864304646e-06, "loss": 17.6113, "step": 39425 }, { "epoch": 0.7206755991006636, "grad_norm": 6.0862226945215925, "learning_rate": 1.910421943594484e-06, "loss": 16.7688, "step": 39426 }, { "epoch": 0.7206938782971101, "grad_norm": 6.618297038817229, "learning_rate": 1.910189211587209e-06, "loss": 17.8098, "step": 39427 }, { "epoch": 0.7207121574935565, "grad_norm": 6.228419505107042, "learning_rate": 1.9099564904094518e-06, "loss": 17.4267, "step": 39428 }, { "epoch": 0.7207304366900031, "grad_norm": 6.729461504265279, "learning_rate": 1.9097237800620278e-06, "loss": 17.5561, "step": 39429 }, { "epoch": 0.7207487158864496, "grad_norm": 5.994791998162833, "learning_rate": 1.9094910805457555e-06, "loss": 17.7179, "step": 39430 }, { "epoch": 0.7207669950828962, "grad_norm": 5.439126899117188, "learning_rate": 1.9092583918614465e-06, "loss": 17.2669, "step": 39431 }, { "epoch": 0.7207852742793427, "grad_norm": 7.00253609823629, "learning_rate": 1.9090257140099193e-06, "loss": 17.8905, "step": 39432 }, { "epoch": 0.7208035534757892, "grad_norm": 5.5975752598657476, "learning_rate": 1.90879304699199e-06, "loss": 17.2595, "step": 39433 }, { "epoch": 0.7208218326722358, "grad_norm": 5.514053940381154, "learning_rate": 1.9085603908084734e-06, "loss": 17.0473, "step": 39434 }, { "epoch": 0.7208401118686822, "grad_norm": 6.280397417994038, "learning_rate": 1.9083277454601817e-06, "loss": 17.5613, "step": 39435 }, { "epoch": 0.7208583910651287, "grad_norm": 5.239687290908809, "learning_rate": 1.9080951109479356e-06, "loss": 16.98, "step": 39436 }, { "epoch": 0.7208766702615753, "grad_norm": 5.454598967874161, "learning_rate": 1.907862487272547e-06, "loss": 16.9574, "step": 39437 }, { "epoch": 0.7208949494580218, "grad_norm": 5.831275020880215, "learning_rate": 1.907629874434831e-06, "loss": 17.3035, "step": 39438 }, { "epoch": 0.7209132286544684, "grad_norm": 5.085266640928264, "learning_rate": 1.9073972724356053e-06, "loss": 16.8124, "step": 39439 }, { "epoch": 0.7209315078509149, "grad_norm": 6.127804795589586, "learning_rate": 1.9071646812756828e-06, "loss": 17.5376, "step": 39440 }, { "epoch": 0.7209497870473613, "grad_norm": 5.049345020047209, "learning_rate": 1.9069321009558789e-06, "loss": 17.0282, "step": 39441 }, { "epoch": 0.7209680662438079, "grad_norm": 4.9574367186688235, "learning_rate": 1.9066995314770115e-06, "loss": 16.9308, "step": 39442 }, { "epoch": 0.7209863454402544, "grad_norm": 6.187028167387683, "learning_rate": 1.9064669728398922e-06, "loss": 17.2436, "step": 39443 }, { "epoch": 0.721004624636701, "grad_norm": 8.154504094592818, "learning_rate": 1.9062344250453397e-06, "loss": 17.8627, "step": 39444 }, { "epoch": 0.7210229038331475, "grad_norm": 7.2556498278473835, "learning_rate": 1.9060018880941667e-06, "loss": 17.729, "step": 39445 }, { "epoch": 0.721041183029594, "grad_norm": 6.212620604401109, "learning_rate": 1.9057693619871864e-06, "loss": 17.4044, "step": 39446 }, { "epoch": 0.7210594622260406, "grad_norm": 5.124855786208774, "learning_rate": 1.9055368467252182e-06, "loss": 16.9804, "step": 39447 }, { "epoch": 0.721077741422487, "grad_norm": 8.604856339714434, "learning_rate": 1.905304342309075e-06, "loss": 17.7805, "step": 39448 }, { "epoch": 0.7210960206189336, "grad_norm": 6.081548186417526, "learning_rate": 1.9050718487395698e-06, "loss": 17.2663, "step": 39449 }, { "epoch": 0.7211142998153801, "grad_norm": 6.177648453615935, "learning_rate": 1.904839366017519e-06, "loss": 17.1547, "step": 39450 }, { "epoch": 0.7211325790118266, "grad_norm": 6.385872568886373, "learning_rate": 1.9046068941437395e-06, "loss": 17.2328, "step": 39451 }, { "epoch": 0.7211508582082732, "grad_norm": 5.913886713092199, "learning_rate": 1.9043744331190423e-06, "loss": 17.2769, "step": 39452 }, { "epoch": 0.7211691374047197, "grad_norm": 6.737602636407927, "learning_rate": 1.9041419829442458e-06, "loss": 17.5964, "step": 39453 }, { "epoch": 0.7211874166011663, "grad_norm": 6.250141067021935, "learning_rate": 1.9039095436201637e-06, "loss": 17.1907, "step": 39454 }, { "epoch": 0.7212056957976127, "grad_norm": 4.787748649265594, "learning_rate": 1.9036771151476075e-06, "loss": 16.9272, "step": 39455 }, { "epoch": 0.7212239749940592, "grad_norm": 7.066362175400825, "learning_rate": 1.9034446975273963e-06, "loss": 17.6617, "step": 39456 }, { "epoch": 0.7212422541905058, "grad_norm": 7.314636155017323, "learning_rate": 1.9032122907603412e-06, "loss": 17.4547, "step": 39457 }, { "epoch": 0.7212605333869523, "grad_norm": 8.098362049737556, "learning_rate": 1.9029798948472595e-06, "loss": 17.9425, "step": 39458 }, { "epoch": 0.7212788125833989, "grad_norm": 7.660824109174309, "learning_rate": 1.902747509788963e-06, "loss": 17.7677, "step": 39459 }, { "epoch": 0.7212970917798454, "grad_norm": 7.755428633673503, "learning_rate": 1.902515135586268e-06, "loss": 17.9299, "step": 39460 }, { "epoch": 0.7213153709762918, "grad_norm": 4.39512744812679, "learning_rate": 1.90228277223999e-06, "loss": 16.6095, "step": 39461 }, { "epoch": 0.7213336501727384, "grad_norm": 5.972346656494952, "learning_rate": 1.9020504197509426e-06, "loss": 16.9489, "step": 39462 }, { "epoch": 0.7213519293691849, "grad_norm": 5.9254664602389635, "learning_rate": 1.9018180781199374e-06, "loss": 17.3655, "step": 39463 }, { "epoch": 0.7213702085656315, "grad_norm": 5.829029245826002, "learning_rate": 1.9015857473477928e-06, "loss": 17.0734, "step": 39464 }, { "epoch": 0.721388487762078, "grad_norm": 6.177207926164213, "learning_rate": 1.9013534274353212e-06, "loss": 17.4501, "step": 39465 }, { "epoch": 0.7214067669585245, "grad_norm": 6.132784753691851, "learning_rate": 1.9011211183833356e-06, "loss": 17.267, "step": 39466 }, { "epoch": 0.721425046154971, "grad_norm": 7.420052192037459, "learning_rate": 1.9008888201926517e-06, "loss": 17.753, "step": 39467 }, { "epoch": 0.7214433253514175, "grad_norm": 5.619990467407185, "learning_rate": 1.9006565328640847e-06, "loss": 17.3623, "step": 39468 }, { "epoch": 0.7214616045478641, "grad_norm": 6.470891738736214, "learning_rate": 1.900424256398446e-06, "loss": 17.4342, "step": 39469 }, { "epoch": 0.7214798837443106, "grad_norm": 5.876460686793126, "learning_rate": 1.9001919907965532e-06, "loss": 17.1153, "step": 39470 }, { "epoch": 0.7214981629407571, "grad_norm": 6.78439294957707, "learning_rate": 1.8999597360592187e-06, "loss": 17.867, "step": 39471 }, { "epoch": 0.7215164421372037, "grad_norm": 7.015919208295625, "learning_rate": 1.8997274921872544e-06, "loss": 17.9843, "step": 39472 }, { "epoch": 0.7215347213336502, "grad_norm": 5.4749122275097655, "learning_rate": 1.8994952591814781e-06, "loss": 17.246, "step": 39473 }, { "epoch": 0.7215530005300967, "grad_norm": 6.038641160498236, "learning_rate": 1.8992630370426996e-06, "loss": 17.3844, "step": 39474 }, { "epoch": 0.7215712797265432, "grad_norm": 7.085200394914836, "learning_rate": 1.899030825771737e-06, "loss": 17.6687, "step": 39475 }, { "epoch": 0.7215895589229897, "grad_norm": 6.332635933597803, "learning_rate": 1.8987986253694007e-06, "loss": 17.5544, "step": 39476 }, { "epoch": 0.7216078381194363, "grad_norm": 5.886238532842066, "learning_rate": 1.8985664358365063e-06, "loss": 17.1582, "step": 39477 }, { "epoch": 0.7216261173158828, "grad_norm": 6.0617695284750255, "learning_rate": 1.898334257173869e-06, "loss": 17.459, "step": 39478 }, { "epoch": 0.7216443965123294, "grad_norm": 5.372115089333069, "learning_rate": 1.898102089382301e-06, "loss": 17.2415, "step": 39479 }, { "epoch": 0.7216626757087758, "grad_norm": 6.222480678333051, "learning_rate": 1.8978699324626137e-06, "loss": 17.1205, "step": 39480 }, { "epoch": 0.7216809549052223, "grad_norm": 6.333532388503747, "learning_rate": 1.8976377864156258e-06, "loss": 17.3583, "step": 39481 }, { "epoch": 0.7216992341016689, "grad_norm": 5.879456313897671, "learning_rate": 1.8974056512421473e-06, "loss": 17.3245, "step": 39482 }, { "epoch": 0.7217175132981154, "grad_norm": 4.720032417235298, "learning_rate": 1.897173526942992e-06, "loss": 16.8481, "step": 39483 }, { "epoch": 0.721735792494562, "grad_norm": 6.5274412240437325, "learning_rate": 1.8969414135189752e-06, "loss": 17.6537, "step": 39484 }, { "epoch": 0.7217540716910085, "grad_norm": 5.65627416102659, "learning_rate": 1.8967093109709078e-06, "loss": 17.1603, "step": 39485 }, { "epoch": 0.721772350887455, "grad_norm": 6.013040786960017, "learning_rate": 1.8964772192996055e-06, "loss": 17.3188, "step": 39486 }, { "epoch": 0.7217906300839015, "grad_norm": 4.777635227823906, "learning_rate": 1.8962451385058828e-06, "loss": 16.9209, "step": 39487 }, { "epoch": 0.721808909280348, "grad_norm": 5.913965477401988, "learning_rate": 1.8960130685905492e-06, "loss": 17.4957, "step": 39488 }, { "epoch": 0.7218271884767946, "grad_norm": 7.095966066467325, "learning_rate": 1.8957810095544226e-06, "loss": 17.5559, "step": 39489 }, { "epoch": 0.7218454676732411, "grad_norm": 5.438556350090278, "learning_rate": 1.8955489613983146e-06, "loss": 17.1664, "step": 39490 }, { "epoch": 0.7218637468696876, "grad_norm": 4.732990623258617, "learning_rate": 1.8953169241230356e-06, "loss": 16.8394, "step": 39491 }, { "epoch": 0.7218820260661342, "grad_norm": 5.493840345499968, "learning_rate": 1.895084897729404e-06, "loss": 17.2646, "step": 39492 }, { "epoch": 0.7219003052625806, "grad_norm": 7.139335201182932, "learning_rate": 1.89485288221823e-06, "loss": 17.5498, "step": 39493 }, { "epoch": 0.7219185844590272, "grad_norm": 6.49319980821652, "learning_rate": 1.8946208775903253e-06, "loss": 17.5024, "step": 39494 }, { "epoch": 0.7219368636554737, "grad_norm": 5.537429223308804, "learning_rate": 1.8943888838465046e-06, "loss": 17.0922, "step": 39495 }, { "epoch": 0.7219551428519202, "grad_norm": 5.562317781325998, "learning_rate": 1.8941569009875838e-06, "loss": 17.1975, "step": 39496 }, { "epoch": 0.7219734220483668, "grad_norm": 5.982301859554959, "learning_rate": 1.8939249290143713e-06, "loss": 17.3249, "step": 39497 }, { "epoch": 0.7219917012448133, "grad_norm": 6.646761232652843, "learning_rate": 1.8936929679276844e-06, "loss": 17.3165, "step": 39498 }, { "epoch": 0.7220099804412599, "grad_norm": 6.248338350370349, "learning_rate": 1.893461017728334e-06, "loss": 17.3505, "step": 39499 }, { "epoch": 0.7220282596377063, "grad_norm": 5.078192574460159, "learning_rate": 1.893229078417132e-06, "loss": 16.9185, "step": 39500 }, { "epoch": 0.7220465388341528, "grad_norm": 5.543273349306306, "learning_rate": 1.8929971499948934e-06, "loss": 17.1383, "step": 39501 }, { "epoch": 0.7220648180305994, "grad_norm": 6.64163365240175, "learning_rate": 1.8927652324624306e-06, "loss": 17.3856, "step": 39502 }, { "epoch": 0.7220830972270459, "grad_norm": 7.219721483061762, "learning_rate": 1.8925333258205542e-06, "loss": 17.4217, "step": 39503 }, { "epoch": 0.7221013764234924, "grad_norm": 8.122764344966077, "learning_rate": 1.8923014300700788e-06, "loss": 17.9622, "step": 39504 }, { "epoch": 0.722119655619939, "grad_norm": 5.3554265908749645, "learning_rate": 1.8920695452118171e-06, "loss": 16.9608, "step": 39505 }, { "epoch": 0.7221379348163854, "grad_norm": 7.156536622927122, "learning_rate": 1.8918376712465836e-06, "loss": 17.8367, "step": 39506 }, { "epoch": 0.722156214012832, "grad_norm": 5.416860347538488, "learning_rate": 1.8916058081751898e-06, "loss": 17.0648, "step": 39507 }, { "epoch": 0.7221744932092785, "grad_norm": 7.2351210510761526, "learning_rate": 1.891373955998446e-06, "loss": 17.1349, "step": 39508 }, { "epoch": 0.722192772405725, "grad_norm": 7.763978581692977, "learning_rate": 1.8911421147171683e-06, "loss": 18.0806, "step": 39509 }, { "epoch": 0.7222110516021716, "grad_norm": 6.142527573925376, "learning_rate": 1.890910284332168e-06, "loss": 17.2971, "step": 39510 }, { "epoch": 0.7222293307986181, "grad_norm": 6.534710871835622, "learning_rate": 1.8906784648442555e-06, "loss": 17.6031, "step": 39511 }, { "epoch": 0.7222476099950647, "grad_norm": 6.016800412401101, "learning_rate": 1.8904466562542467e-06, "loss": 17.2054, "step": 39512 }, { "epoch": 0.7222658891915111, "grad_norm": 7.056535271350689, "learning_rate": 1.890214858562951e-06, "loss": 18.1312, "step": 39513 }, { "epoch": 0.7222841683879576, "grad_norm": 6.661367586489668, "learning_rate": 1.889983071771182e-06, "loss": 17.5227, "step": 39514 }, { "epoch": 0.7223024475844042, "grad_norm": 6.725621725538553, "learning_rate": 1.8897512958797548e-06, "loss": 17.4513, "step": 39515 }, { "epoch": 0.7223207267808507, "grad_norm": 5.5190944261041075, "learning_rate": 1.8895195308894786e-06, "loss": 17.1684, "step": 39516 }, { "epoch": 0.7223390059772973, "grad_norm": 7.391419736439911, "learning_rate": 1.8892877768011659e-06, "loss": 17.3692, "step": 39517 }, { "epoch": 0.7223572851737438, "grad_norm": 6.788940394692898, "learning_rate": 1.8890560336156304e-06, "loss": 17.5073, "step": 39518 }, { "epoch": 0.7223755643701902, "grad_norm": 7.5940873126892585, "learning_rate": 1.888824301333682e-06, "loss": 17.9935, "step": 39519 }, { "epoch": 0.7223938435666368, "grad_norm": 6.153498569807919, "learning_rate": 1.8885925799561366e-06, "loss": 17.4041, "step": 39520 }, { "epoch": 0.7224121227630833, "grad_norm": 6.605354902129577, "learning_rate": 1.8883608694838024e-06, "loss": 17.3596, "step": 39521 }, { "epoch": 0.7224304019595299, "grad_norm": 7.006593219763704, "learning_rate": 1.8881291699174947e-06, "loss": 17.7363, "step": 39522 }, { "epoch": 0.7224486811559764, "grad_norm": 6.058378360712673, "learning_rate": 1.8878974812580226e-06, "loss": 17.5462, "step": 39523 }, { "epoch": 0.7224669603524229, "grad_norm": 6.511787806387248, "learning_rate": 1.8876658035062018e-06, "loss": 17.5318, "step": 39524 }, { "epoch": 0.7224852395488695, "grad_norm": 9.54368891052374, "learning_rate": 1.8874341366628402e-06, "loss": 18.1226, "step": 39525 }, { "epoch": 0.7225035187453159, "grad_norm": 6.389899954895077, "learning_rate": 1.8872024807287537e-06, "loss": 17.3147, "step": 39526 }, { "epoch": 0.7225217979417625, "grad_norm": 4.941295571365248, "learning_rate": 1.8869708357047522e-06, "loss": 16.9383, "step": 39527 }, { "epoch": 0.722540077138209, "grad_norm": 7.574142012167196, "learning_rate": 1.8867392015916465e-06, "loss": 18.1948, "step": 39528 }, { "epoch": 0.7225583563346555, "grad_norm": 6.294328309020103, "learning_rate": 1.8865075783902514e-06, "loss": 17.3255, "step": 39529 }, { "epoch": 0.7225766355311021, "grad_norm": 6.070283454029799, "learning_rate": 1.8862759661013751e-06, "loss": 17.0561, "step": 39530 }, { "epoch": 0.7225949147275486, "grad_norm": 6.004430835124229, "learning_rate": 1.8860443647258314e-06, "loss": 17.3785, "step": 39531 }, { "epoch": 0.7226131939239951, "grad_norm": 5.904040956477871, "learning_rate": 1.885812774264434e-06, "loss": 17.072, "step": 39532 }, { "epoch": 0.7226314731204416, "grad_norm": 6.703951961186167, "learning_rate": 1.8855811947179919e-06, "loss": 17.784, "step": 39533 }, { "epoch": 0.7226497523168881, "grad_norm": 6.302364650292222, "learning_rate": 1.8853496260873162e-06, "loss": 17.2722, "step": 39534 }, { "epoch": 0.7226680315133347, "grad_norm": 7.291729169811596, "learning_rate": 1.8851180683732218e-06, "loss": 18.0901, "step": 39535 }, { "epoch": 0.7226863107097812, "grad_norm": 6.618889159634016, "learning_rate": 1.8848865215765161e-06, "loss": 17.8166, "step": 39536 }, { "epoch": 0.7227045899062278, "grad_norm": 4.9920613507643745, "learning_rate": 1.8846549856980144e-06, "loss": 16.8793, "step": 39537 }, { "epoch": 0.7227228691026742, "grad_norm": 7.011323105913278, "learning_rate": 1.884423460738527e-06, "loss": 17.6711, "step": 39538 }, { "epoch": 0.7227411482991207, "grad_norm": 5.579955285553166, "learning_rate": 1.8841919466988629e-06, "loss": 17.3208, "step": 39539 }, { "epoch": 0.7227594274955673, "grad_norm": 5.846352194526607, "learning_rate": 1.8839604435798358e-06, "loss": 17.4106, "step": 39540 }, { "epoch": 0.7227777066920138, "grad_norm": 6.334864439564309, "learning_rate": 1.8837289513822594e-06, "loss": 17.3028, "step": 39541 }, { "epoch": 0.7227959858884604, "grad_norm": 5.370188590916477, "learning_rate": 1.8834974701069398e-06, "loss": 17.0306, "step": 39542 }, { "epoch": 0.7228142650849069, "grad_norm": 6.628192152114881, "learning_rate": 1.8832659997546937e-06, "loss": 17.6298, "step": 39543 }, { "epoch": 0.7228325442813534, "grad_norm": 5.48559712832591, "learning_rate": 1.8830345403263296e-06, "loss": 17.2474, "step": 39544 }, { "epoch": 0.7228508234777999, "grad_norm": 6.948682488012024, "learning_rate": 1.882803091822657e-06, "loss": 17.6264, "step": 39545 }, { "epoch": 0.7228691026742464, "grad_norm": 5.787332028015406, "learning_rate": 1.8825716542444904e-06, "loss": 17.1376, "step": 39546 }, { "epoch": 0.722887381870693, "grad_norm": 6.760168716973096, "learning_rate": 1.8823402275926407e-06, "loss": 17.9231, "step": 39547 }, { "epoch": 0.7229056610671395, "grad_norm": 5.708064862855694, "learning_rate": 1.8821088118679153e-06, "loss": 17.3849, "step": 39548 }, { "epoch": 0.722923940263586, "grad_norm": 6.029596565406367, "learning_rate": 1.8818774070711281e-06, "loss": 17.4518, "step": 39549 }, { "epoch": 0.7229422194600326, "grad_norm": 8.739378261136123, "learning_rate": 1.8816460132030906e-06, "loss": 17.9478, "step": 39550 }, { "epoch": 0.722960498656479, "grad_norm": 7.29651642517017, "learning_rate": 1.8814146302646142e-06, "loss": 17.5631, "step": 39551 }, { "epoch": 0.7229787778529256, "grad_norm": 6.694414564485923, "learning_rate": 1.8811832582565092e-06, "loss": 17.7233, "step": 39552 }, { "epoch": 0.7229970570493721, "grad_norm": 5.882280243056118, "learning_rate": 1.8809518971795847e-06, "loss": 17.1411, "step": 39553 }, { "epoch": 0.7230153362458186, "grad_norm": 5.683593932224842, "learning_rate": 1.880720547034655e-06, "loss": 17.1171, "step": 39554 }, { "epoch": 0.7230336154422652, "grad_norm": 6.987977666376229, "learning_rate": 1.8804892078225289e-06, "loss": 17.9421, "step": 39555 }, { "epoch": 0.7230518946387117, "grad_norm": 7.043678060211811, "learning_rate": 1.8802578795440158e-06, "loss": 17.7054, "step": 39556 }, { "epoch": 0.7230701738351583, "grad_norm": 6.753743757069783, "learning_rate": 1.8800265621999292e-06, "loss": 17.5881, "step": 39557 }, { "epoch": 0.7230884530316047, "grad_norm": 5.540797725847781, "learning_rate": 1.8797952557910776e-06, "loss": 17.2318, "step": 39558 }, { "epoch": 0.7231067322280512, "grad_norm": 5.658748453930914, "learning_rate": 1.879563960318273e-06, "loss": 17.1606, "step": 39559 }, { "epoch": 0.7231250114244978, "grad_norm": 5.8882403197857025, "learning_rate": 1.879332675782327e-06, "loss": 17.2288, "step": 39560 }, { "epoch": 0.7231432906209443, "grad_norm": 5.244809727331439, "learning_rate": 1.8791014021840498e-06, "loss": 17.1605, "step": 39561 }, { "epoch": 0.7231615698173909, "grad_norm": 4.23986992203819, "learning_rate": 1.8788701395242498e-06, "loss": 16.6651, "step": 39562 }, { "epoch": 0.7231798490138374, "grad_norm": 7.131110974165806, "learning_rate": 1.8786388878037403e-06, "loss": 17.578, "step": 39563 }, { "epoch": 0.7231981282102838, "grad_norm": 5.867468499313671, "learning_rate": 1.8784076470233292e-06, "loss": 17.2872, "step": 39564 }, { "epoch": 0.7232164074067304, "grad_norm": 5.3172505468639795, "learning_rate": 1.87817641718383e-06, "loss": 17.1199, "step": 39565 }, { "epoch": 0.7232346866031769, "grad_norm": 6.378908547146976, "learning_rate": 1.877945198286052e-06, "loss": 17.6861, "step": 39566 }, { "epoch": 0.7232529657996235, "grad_norm": 5.05093624569941, "learning_rate": 1.8777139903308028e-06, "loss": 17.2396, "step": 39567 }, { "epoch": 0.72327124499607, "grad_norm": 5.560913243160953, "learning_rate": 1.8774827933188956e-06, "loss": 17.2402, "step": 39568 }, { "epoch": 0.7232895241925165, "grad_norm": 5.397110652833583, "learning_rate": 1.8772516072511416e-06, "loss": 16.9226, "step": 39569 }, { "epoch": 0.723307803388963, "grad_norm": 5.35833244071776, "learning_rate": 1.8770204321283481e-06, "loss": 17.1358, "step": 39570 }, { "epoch": 0.7233260825854095, "grad_norm": 6.418818604655567, "learning_rate": 1.8767892679513289e-06, "loss": 17.6533, "step": 39571 }, { "epoch": 0.723344361781856, "grad_norm": 6.328777639164181, "learning_rate": 1.8765581147208922e-06, "loss": 17.6353, "step": 39572 }, { "epoch": 0.7233626409783026, "grad_norm": 6.450546737699009, "learning_rate": 1.8763269724378463e-06, "loss": 17.7151, "step": 39573 }, { "epoch": 0.7233809201747491, "grad_norm": 6.8487092991380605, "learning_rate": 1.8760958411030055e-06, "loss": 17.3685, "step": 39574 }, { "epoch": 0.7233991993711957, "grad_norm": 6.650694901001396, "learning_rate": 1.8758647207171775e-06, "loss": 17.5704, "step": 39575 }, { "epoch": 0.7234174785676422, "grad_norm": 6.824598043428606, "learning_rate": 1.8756336112811706e-06, "loss": 17.5548, "step": 39576 }, { "epoch": 0.7234357577640886, "grad_norm": 6.0397745297413605, "learning_rate": 1.875402512795797e-06, "loss": 17.3884, "step": 39577 }, { "epoch": 0.7234540369605352, "grad_norm": 6.380773766903733, "learning_rate": 1.875171425261868e-06, "loss": 17.4061, "step": 39578 }, { "epoch": 0.7234723161569817, "grad_norm": 5.391054401727346, "learning_rate": 1.8749403486801904e-06, "loss": 17.1618, "step": 39579 }, { "epoch": 0.7234905953534283, "grad_norm": 5.519647328847261, "learning_rate": 1.8747092830515779e-06, "loss": 17.1264, "step": 39580 }, { "epoch": 0.7235088745498748, "grad_norm": 6.654149253048029, "learning_rate": 1.8744782283768359e-06, "loss": 17.4376, "step": 39581 }, { "epoch": 0.7235271537463213, "grad_norm": 5.655350805812501, "learning_rate": 1.8742471846567784e-06, "loss": 17.3438, "step": 39582 }, { "epoch": 0.7235454329427679, "grad_norm": 6.13778006148919, "learning_rate": 1.8740161518922128e-06, "loss": 17.2908, "step": 39583 }, { "epoch": 0.7235637121392143, "grad_norm": 6.322117286129509, "learning_rate": 1.8737851300839482e-06, "loss": 17.3784, "step": 39584 }, { "epoch": 0.7235819913356609, "grad_norm": 5.425322444620258, "learning_rate": 1.8735541192327967e-06, "loss": 17.2569, "step": 39585 }, { "epoch": 0.7236002705321074, "grad_norm": 6.1688653405917595, "learning_rate": 1.8733231193395652e-06, "loss": 17.362, "step": 39586 }, { "epoch": 0.7236185497285539, "grad_norm": 5.714128318221477, "learning_rate": 1.8730921304050649e-06, "loss": 17.0227, "step": 39587 }, { "epoch": 0.7236368289250005, "grad_norm": 6.7392020767651, "learning_rate": 1.8728611524301072e-06, "loss": 17.7462, "step": 39588 }, { "epoch": 0.723655108121447, "grad_norm": 4.638512277458766, "learning_rate": 1.8726301854154993e-06, "loss": 16.7976, "step": 39589 }, { "epoch": 0.7236733873178935, "grad_norm": 5.457936590633315, "learning_rate": 1.8723992293620502e-06, "loss": 17.1198, "step": 39590 }, { "epoch": 0.72369166651434, "grad_norm": 6.326635811505847, "learning_rate": 1.8721682842705713e-06, "loss": 17.6202, "step": 39591 }, { "epoch": 0.7237099457107865, "grad_norm": 5.37614581466165, "learning_rate": 1.8719373501418719e-06, "loss": 17.1688, "step": 39592 }, { "epoch": 0.7237282249072331, "grad_norm": 7.433396073673296, "learning_rate": 1.8717064269767588e-06, "loss": 18.0103, "step": 39593 }, { "epoch": 0.7237465041036796, "grad_norm": 5.935832214009883, "learning_rate": 1.8714755147760428e-06, "loss": 17.2573, "step": 39594 }, { "epoch": 0.7237647833001262, "grad_norm": 5.9069053063999695, "learning_rate": 1.8712446135405355e-06, "loss": 17.1576, "step": 39595 }, { "epoch": 0.7237830624965726, "grad_norm": 8.09783454928108, "learning_rate": 1.871013723271043e-06, "loss": 18.0376, "step": 39596 }, { "epoch": 0.7238013416930191, "grad_norm": 5.767849609392849, "learning_rate": 1.8707828439683766e-06, "loss": 17.2719, "step": 39597 }, { "epoch": 0.7238196208894657, "grad_norm": 6.760885776518806, "learning_rate": 1.870551975633344e-06, "loss": 17.4954, "step": 39598 }, { "epoch": 0.7238379000859122, "grad_norm": 6.349822830349053, "learning_rate": 1.8703211182667568e-06, "loss": 17.1839, "step": 39599 }, { "epoch": 0.7238561792823588, "grad_norm": 6.6670161824093634, "learning_rate": 1.8700902718694224e-06, "loss": 17.1653, "step": 39600 }, { "epoch": 0.7238744584788053, "grad_norm": 5.969225413967093, "learning_rate": 1.869859436442148e-06, "loss": 17.2538, "step": 39601 }, { "epoch": 0.7238927376752518, "grad_norm": 5.676436339407951, "learning_rate": 1.8696286119857464e-06, "loss": 17.3343, "step": 39602 }, { "epoch": 0.7239110168716983, "grad_norm": 6.5631167915804465, "learning_rate": 1.8693977985010237e-06, "loss": 17.2373, "step": 39603 }, { "epoch": 0.7239292960681448, "grad_norm": 6.738329609772392, "learning_rate": 1.86916699598879e-06, "loss": 17.2215, "step": 39604 }, { "epoch": 0.7239475752645914, "grad_norm": 10.247886447303394, "learning_rate": 1.8689362044498554e-06, "loss": 18.7677, "step": 39605 }, { "epoch": 0.7239658544610379, "grad_norm": 6.415020046399777, "learning_rate": 1.8687054238850284e-06, "loss": 17.7202, "step": 39606 }, { "epoch": 0.7239841336574844, "grad_norm": 5.389981045772407, "learning_rate": 1.868474654295115e-06, "loss": 17.1232, "step": 39607 }, { "epoch": 0.724002412853931, "grad_norm": 8.248606984963624, "learning_rate": 1.868243895680928e-06, "loss": 18.2884, "step": 39608 }, { "epoch": 0.7240206920503774, "grad_norm": 7.072069638420174, "learning_rate": 1.8680131480432738e-06, "loss": 17.8494, "step": 39609 }, { "epoch": 0.724038971246824, "grad_norm": 7.609213807147946, "learning_rate": 1.8677824113829607e-06, "loss": 18.2059, "step": 39610 }, { "epoch": 0.7240572504432705, "grad_norm": 6.845971540006928, "learning_rate": 1.8675516857008002e-06, "loss": 17.6701, "step": 39611 }, { "epoch": 0.724075529639717, "grad_norm": 6.710297494303263, "learning_rate": 1.8673209709975977e-06, "loss": 17.4236, "step": 39612 }, { "epoch": 0.7240938088361636, "grad_norm": 7.124101222766626, "learning_rate": 1.8670902672741626e-06, "loss": 17.7646, "step": 39613 }, { "epoch": 0.7241120880326101, "grad_norm": 6.22219533531145, "learning_rate": 1.8668595745313067e-06, "loss": 17.5631, "step": 39614 }, { "epoch": 0.7241303672290567, "grad_norm": 6.807473605079982, "learning_rate": 1.866628892769834e-06, "loss": 17.7006, "step": 39615 }, { "epoch": 0.7241486464255031, "grad_norm": 5.657014898952063, "learning_rate": 1.8663982219905568e-06, "loss": 17.2084, "step": 39616 }, { "epoch": 0.7241669256219496, "grad_norm": 4.739050418314457, "learning_rate": 1.8661675621942821e-06, "loss": 16.8424, "step": 39617 }, { "epoch": 0.7241852048183962, "grad_norm": 5.54876709906352, "learning_rate": 1.8659369133818162e-06, "loss": 17.3005, "step": 39618 }, { "epoch": 0.7242034840148427, "grad_norm": 6.1002004835489325, "learning_rate": 1.8657062755539712e-06, "loss": 17.1865, "step": 39619 }, { "epoch": 0.7242217632112893, "grad_norm": 5.722864573974753, "learning_rate": 1.8654756487115538e-06, "loss": 17.1463, "step": 39620 }, { "epoch": 0.7242400424077358, "grad_norm": 5.805663914949987, "learning_rate": 1.8652450328553705e-06, "loss": 17.1488, "step": 39621 }, { "epoch": 0.7242583216041822, "grad_norm": 6.9398681286296355, "learning_rate": 1.865014427986232e-06, "loss": 17.9292, "step": 39622 }, { "epoch": 0.7242766008006288, "grad_norm": 6.876593017765146, "learning_rate": 1.8647838341049469e-06, "loss": 17.6846, "step": 39623 }, { "epoch": 0.7242948799970753, "grad_norm": 4.497567241285371, "learning_rate": 1.864553251212321e-06, "loss": 16.7838, "step": 39624 }, { "epoch": 0.7243131591935219, "grad_norm": 5.355915859568731, "learning_rate": 1.8643226793091652e-06, "loss": 16.9237, "step": 39625 }, { "epoch": 0.7243314383899684, "grad_norm": 5.74231530507345, "learning_rate": 1.8640921183962846e-06, "loss": 17.0096, "step": 39626 }, { "epoch": 0.7243497175864149, "grad_norm": 8.333506044901373, "learning_rate": 1.863861568474491e-06, "loss": 17.6447, "step": 39627 }, { "epoch": 0.7243679967828615, "grad_norm": 5.527600025006712, "learning_rate": 1.8636310295445904e-06, "loss": 17.2273, "step": 39628 }, { "epoch": 0.7243862759793079, "grad_norm": 7.715467569505632, "learning_rate": 1.8634005016073892e-06, "loss": 17.8667, "step": 39629 }, { "epoch": 0.7244045551757545, "grad_norm": 5.521156653956157, "learning_rate": 1.8631699846636986e-06, "loss": 17.0322, "step": 39630 }, { "epoch": 0.724422834372201, "grad_norm": 6.690973876883639, "learning_rate": 1.8629394787143235e-06, "loss": 17.7126, "step": 39631 }, { "epoch": 0.7244411135686475, "grad_norm": 5.472207194958317, "learning_rate": 1.8627089837600732e-06, "loss": 17.1717, "step": 39632 }, { "epoch": 0.7244593927650941, "grad_norm": 6.122013340111493, "learning_rate": 1.8624784998017575e-06, "loss": 17.3304, "step": 39633 }, { "epoch": 0.7244776719615406, "grad_norm": 5.476991955107968, "learning_rate": 1.8622480268401822e-06, "loss": 16.9731, "step": 39634 }, { "epoch": 0.7244959511579872, "grad_norm": 6.341487184944553, "learning_rate": 1.862017564876154e-06, "loss": 17.3114, "step": 39635 }, { "epoch": 0.7245142303544336, "grad_norm": 7.935597531272758, "learning_rate": 1.8617871139104832e-06, "loss": 17.8036, "step": 39636 }, { "epoch": 0.7245325095508801, "grad_norm": 5.597423045122426, "learning_rate": 1.861556673943976e-06, "loss": 17.1482, "step": 39637 }, { "epoch": 0.7245507887473267, "grad_norm": 6.051326772953629, "learning_rate": 1.8613262449774394e-06, "loss": 17.2955, "step": 39638 }, { "epoch": 0.7245690679437732, "grad_norm": 4.741446053245462, "learning_rate": 1.861095827011683e-06, "loss": 16.9407, "step": 39639 }, { "epoch": 0.7245873471402197, "grad_norm": 6.51311987503716, "learning_rate": 1.8608654200475117e-06, "loss": 17.4596, "step": 39640 }, { "epoch": 0.7246056263366663, "grad_norm": 5.590403304245009, "learning_rate": 1.8606350240857352e-06, "loss": 17.1163, "step": 39641 }, { "epoch": 0.7246239055331127, "grad_norm": 6.212832871822635, "learning_rate": 1.8604046391271619e-06, "loss": 17.1689, "step": 39642 }, { "epoch": 0.7246421847295593, "grad_norm": 5.563080001499031, "learning_rate": 1.8601742651725962e-06, "loss": 16.915, "step": 39643 }, { "epoch": 0.7246604639260058, "grad_norm": 6.825308701461343, "learning_rate": 1.8599439022228488e-06, "loss": 17.4051, "step": 39644 }, { "epoch": 0.7246787431224523, "grad_norm": 6.893101610055451, "learning_rate": 1.8597135502787255e-06, "loss": 17.5691, "step": 39645 }, { "epoch": 0.7246970223188989, "grad_norm": 4.848310114576604, "learning_rate": 1.8594832093410325e-06, "loss": 16.8887, "step": 39646 }, { "epoch": 0.7247153015153454, "grad_norm": 5.328515663963269, "learning_rate": 1.8592528794105797e-06, "loss": 17.2144, "step": 39647 }, { "epoch": 0.724733580711792, "grad_norm": 6.56859214721191, "learning_rate": 1.8590225604881711e-06, "loss": 17.5764, "step": 39648 }, { "epoch": 0.7247518599082384, "grad_norm": 6.67152367395517, "learning_rate": 1.8587922525746178e-06, "loss": 17.5642, "step": 39649 }, { "epoch": 0.7247701391046849, "grad_norm": 5.1577633677288, "learning_rate": 1.8585619556707235e-06, "loss": 16.9448, "step": 39650 }, { "epoch": 0.7247884183011315, "grad_norm": 6.254795103144392, "learning_rate": 1.8583316697772985e-06, "loss": 17.5265, "step": 39651 }, { "epoch": 0.724806697497578, "grad_norm": 6.087488940253044, "learning_rate": 1.8581013948951466e-06, "loss": 17.2909, "step": 39652 }, { "epoch": 0.7248249766940246, "grad_norm": 7.597142610085303, "learning_rate": 1.8578711310250786e-06, "loss": 17.6559, "step": 39653 }, { "epoch": 0.724843255890471, "grad_norm": 7.11542231955612, "learning_rate": 1.857640878167899e-06, "loss": 17.6406, "step": 39654 }, { "epoch": 0.7248615350869175, "grad_norm": 7.284996632212558, "learning_rate": 1.8574106363244143e-06, "loss": 17.731, "step": 39655 }, { "epoch": 0.7248798142833641, "grad_norm": 7.367543636493168, "learning_rate": 1.8571804054954346e-06, "loss": 17.5801, "step": 39656 }, { "epoch": 0.7248980934798106, "grad_norm": 7.767958567436666, "learning_rate": 1.8569501856817624e-06, "loss": 17.4637, "step": 39657 }, { "epoch": 0.7249163726762572, "grad_norm": 6.852014180743183, "learning_rate": 1.8567199768842075e-06, "loss": 17.7312, "step": 39658 }, { "epoch": 0.7249346518727037, "grad_norm": 5.9839660430549735, "learning_rate": 1.856489779103578e-06, "loss": 17.0468, "step": 39659 }, { "epoch": 0.7249529310691502, "grad_norm": 9.60817061619662, "learning_rate": 1.8562595923406774e-06, "loss": 18.0978, "step": 39660 }, { "epoch": 0.7249712102655967, "grad_norm": 6.966616150934312, "learning_rate": 1.8560294165963155e-06, "loss": 17.4536, "step": 39661 }, { "epoch": 0.7249894894620432, "grad_norm": 4.372445349058777, "learning_rate": 1.8557992518712981e-06, "loss": 16.7325, "step": 39662 }, { "epoch": 0.7250077686584898, "grad_norm": 5.777539793686087, "learning_rate": 1.8555690981664293e-06, "loss": 17.278, "step": 39663 }, { "epoch": 0.7250260478549363, "grad_norm": 6.0490735022495565, "learning_rate": 1.8553389554825202e-06, "loss": 17.1402, "step": 39664 }, { "epoch": 0.7250443270513828, "grad_norm": 6.304182693663717, "learning_rate": 1.8551088238203747e-06, "loss": 17.3542, "step": 39665 }, { "epoch": 0.7250626062478294, "grad_norm": 7.527179560761359, "learning_rate": 1.8548787031807985e-06, "loss": 17.7478, "step": 39666 }, { "epoch": 0.7250808854442758, "grad_norm": 6.11430168618735, "learning_rate": 1.8546485935645997e-06, "loss": 17.3948, "step": 39667 }, { "epoch": 0.7250991646407224, "grad_norm": 7.248943743809316, "learning_rate": 1.8544184949725858e-06, "loss": 17.6724, "step": 39668 }, { "epoch": 0.7251174438371689, "grad_norm": 5.929437925860706, "learning_rate": 1.8541884074055606e-06, "loss": 17.1463, "step": 39669 }, { "epoch": 0.7251357230336154, "grad_norm": 5.495854732141029, "learning_rate": 1.8539583308643338e-06, "loss": 17.2188, "step": 39670 }, { "epoch": 0.725154002230062, "grad_norm": 5.741141949759134, "learning_rate": 1.8537282653497101e-06, "loss": 17.2825, "step": 39671 }, { "epoch": 0.7251722814265085, "grad_norm": 6.824091575090623, "learning_rate": 1.8534982108624938e-06, "loss": 17.5401, "step": 39672 }, { "epoch": 0.7251905606229551, "grad_norm": 6.174949613538995, "learning_rate": 1.8532681674034953e-06, "loss": 17.3542, "step": 39673 }, { "epoch": 0.7252088398194015, "grad_norm": 5.99762788614137, "learning_rate": 1.8530381349735171e-06, "loss": 17.456, "step": 39674 }, { "epoch": 0.725227119015848, "grad_norm": 5.6387556806665176, "learning_rate": 1.8528081135733688e-06, "loss": 17.1483, "step": 39675 }, { "epoch": 0.7252453982122946, "grad_norm": 7.4485055868138454, "learning_rate": 1.852578103203853e-06, "loss": 17.8714, "step": 39676 }, { "epoch": 0.7252636774087411, "grad_norm": 7.125733665411846, "learning_rate": 1.8523481038657781e-06, "loss": 18.0543, "step": 39677 }, { "epoch": 0.7252819566051877, "grad_norm": 5.645733811744552, "learning_rate": 1.852118115559952e-06, "loss": 17.2033, "step": 39678 }, { "epoch": 0.7253002358016342, "grad_norm": 5.827966047091857, "learning_rate": 1.8518881382871784e-06, "loss": 17.2775, "step": 39679 }, { "epoch": 0.7253185149980806, "grad_norm": 7.100129863641132, "learning_rate": 1.851658172048262e-06, "loss": 17.6292, "step": 39680 }, { "epoch": 0.7253367941945272, "grad_norm": 7.808224771487221, "learning_rate": 1.8514282168440118e-06, "loss": 18.0438, "step": 39681 }, { "epoch": 0.7253550733909737, "grad_norm": 5.327235571583074, "learning_rate": 1.851198272675233e-06, "loss": 17.203, "step": 39682 }, { "epoch": 0.7253733525874203, "grad_norm": 5.819306872348184, "learning_rate": 1.8509683395427291e-06, "loss": 17.2851, "step": 39683 }, { "epoch": 0.7253916317838668, "grad_norm": 4.589754030910057, "learning_rate": 1.8507384174473097e-06, "loss": 16.7589, "step": 39684 }, { "epoch": 0.7254099109803133, "grad_norm": 9.083059419221762, "learning_rate": 1.850508506389777e-06, "loss": 18.127, "step": 39685 }, { "epoch": 0.7254281901767599, "grad_norm": 5.8176888518204315, "learning_rate": 1.8502786063709388e-06, "loss": 17.0839, "step": 39686 }, { "epoch": 0.7254464693732063, "grad_norm": 5.099739312386909, "learning_rate": 1.8500487173916027e-06, "loss": 17.0792, "step": 39687 }, { "epoch": 0.7254647485696529, "grad_norm": 8.042503304725486, "learning_rate": 1.8498188394525707e-06, "loss": 17.9831, "step": 39688 }, { "epoch": 0.7254830277660994, "grad_norm": 5.969848464761874, "learning_rate": 1.8495889725546518e-06, "loss": 17.1897, "step": 39689 }, { "epoch": 0.7255013069625459, "grad_norm": 7.004262675264898, "learning_rate": 1.8493591166986503e-06, "loss": 17.6058, "step": 39690 }, { "epoch": 0.7255195861589925, "grad_norm": 5.821085999884783, "learning_rate": 1.8491292718853699e-06, "loss": 17.2669, "step": 39691 }, { "epoch": 0.725537865355439, "grad_norm": 6.228558448651645, "learning_rate": 1.8488994381156194e-06, "loss": 17.1845, "step": 39692 }, { "epoch": 0.7255561445518856, "grad_norm": 5.498208185750256, "learning_rate": 1.8486696153902035e-06, "loss": 17.2857, "step": 39693 }, { "epoch": 0.725574423748332, "grad_norm": 5.7799772137182845, "learning_rate": 1.848439803709925e-06, "loss": 17.3759, "step": 39694 }, { "epoch": 0.7255927029447785, "grad_norm": 6.757482201642782, "learning_rate": 1.8482100030755917e-06, "loss": 17.5066, "step": 39695 }, { "epoch": 0.7256109821412251, "grad_norm": 6.044936631703818, "learning_rate": 1.8479802134880109e-06, "loss": 17.384, "step": 39696 }, { "epoch": 0.7256292613376716, "grad_norm": 6.295473587203455, "learning_rate": 1.8477504349479835e-06, "loss": 17.4815, "step": 39697 }, { "epoch": 0.7256475405341182, "grad_norm": 6.237587460326378, "learning_rate": 1.8475206674563195e-06, "loss": 17.4999, "step": 39698 }, { "epoch": 0.7256658197305647, "grad_norm": 4.845244368161493, "learning_rate": 1.8472909110138221e-06, "loss": 16.7929, "step": 39699 }, { "epoch": 0.7256840989270111, "grad_norm": 6.369471143425643, "learning_rate": 1.8470611656212946e-06, "loss": 17.5319, "step": 39700 }, { "epoch": 0.7257023781234577, "grad_norm": 5.6681368296631645, "learning_rate": 1.8468314312795455e-06, "loss": 17.0585, "step": 39701 }, { "epoch": 0.7257206573199042, "grad_norm": 6.2867982195499446, "learning_rate": 1.846601707989379e-06, "loss": 17.497, "step": 39702 }, { "epoch": 0.7257389365163508, "grad_norm": 6.251437612695157, "learning_rate": 1.846371995751598e-06, "loss": 17.1901, "step": 39703 }, { "epoch": 0.7257572157127973, "grad_norm": 7.490631606603296, "learning_rate": 1.8461422945670093e-06, "loss": 17.6482, "step": 39704 }, { "epoch": 0.7257754949092438, "grad_norm": 7.516807564105554, "learning_rate": 1.8459126044364189e-06, "loss": 17.6353, "step": 39705 }, { "epoch": 0.7257937741056903, "grad_norm": 9.125655708655183, "learning_rate": 1.8456829253606323e-06, "loss": 18.435, "step": 39706 }, { "epoch": 0.7258120533021368, "grad_norm": 5.639543447269793, "learning_rate": 1.8454532573404531e-06, "loss": 17.0461, "step": 39707 }, { "epoch": 0.7258303324985833, "grad_norm": 5.1853785393142156, "learning_rate": 1.8452236003766855e-06, "loss": 16.9671, "step": 39708 }, { "epoch": 0.7258486116950299, "grad_norm": 5.929108567579639, "learning_rate": 1.8449939544701361e-06, "loss": 17.1357, "step": 39709 }, { "epoch": 0.7258668908914764, "grad_norm": 6.21348174760361, "learning_rate": 1.8447643196216103e-06, "loss": 17.3618, "step": 39710 }, { "epoch": 0.725885170087923, "grad_norm": 6.322267539540727, "learning_rate": 1.8445346958319094e-06, "loss": 17.4868, "step": 39711 }, { "epoch": 0.7259034492843695, "grad_norm": 5.6986646586766065, "learning_rate": 1.8443050831018421e-06, "loss": 17.1215, "step": 39712 }, { "epoch": 0.7259217284808159, "grad_norm": 5.992080694073136, "learning_rate": 1.8440754814322104e-06, "loss": 17.3081, "step": 39713 }, { "epoch": 0.7259400076772625, "grad_norm": 5.79592612392642, "learning_rate": 1.8438458908238199e-06, "loss": 17.1428, "step": 39714 }, { "epoch": 0.725958286873709, "grad_norm": 5.88313517750489, "learning_rate": 1.8436163112774774e-06, "loss": 17.1776, "step": 39715 }, { "epoch": 0.7259765660701556, "grad_norm": 6.1806199930794765, "learning_rate": 1.843386742793986e-06, "loss": 17.3238, "step": 39716 }, { "epoch": 0.7259948452666021, "grad_norm": 5.471128794237767, "learning_rate": 1.8431571853741486e-06, "loss": 17.0768, "step": 39717 }, { "epoch": 0.7260131244630486, "grad_norm": 5.912854832969127, "learning_rate": 1.8429276390187727e-06, "loss": 17.6024, "step": 39718 }, { "epoch": 0.7260314036594951, "grad_norm": 6.153336786269909, "learning_rate": 1.84269810372866e-06, "loss": 17.3328, "step": 39719 }, { "epoch": 0.7260496828559416, "grad_norm": 6.281698311984228, "learning_rate": 1.8424685795046182e-06, "loss": 17.2507, "step": 39720 }, { "epoch": 0.7260679620523882, "grad_norm": 6.6561111153746335, "learning_rate": 1.8422390663474482e-06, "loss": 17.6082, "step": 39721 }, { "epoch": 0.7260862412488347, "grad_norm": 7.313693219935855, "learning_rate": 1.8420095642579578e-06, "loss": 17.9269, "step": 39722 }, { "epoch": 0.7261045204452812, "grad_norm": 5.334866203945794, "learning_rate": 1.8417800732369485e-06, "loss": 16.9367, "step": 39723 }, { "epoch": 0.7261227996417278, "grad_norm": 6.2914476030484305, "learning_rate": 1.8415505932852274e-06, "loss": 17.5808, "step": 39724 }, { "epoch": 0.7261410788381742, "grad_norm": 6.563784074078908, "learning_rate": 1.8413211244035956e-06, "loss": 17.4348, "step": 39725 }, { "epoch": 0.7261593580346208, "grad_norm": 5.9797765091495725, "learning_rate": 1.8410916665928613e-06, "loss": 17.1397, "step": 39726 }, { "epoch": 0.7261776372310673, "grad_norm": 7.115621152399903, "learning_rate": 1.8408622198538262e-06, "loss": 17.7953, "step": 39727 }, { "epoch": 0.7261959164275138, "grad_norm": 5.037411255648738, "learning_rate": 1.8406327841872933e-06, "loss": 17.032, "step": 39728 }, { "epoch": 0.7262141956239604, "grad_norm": 6.469262636402345, "learning_rate": 1.8404033595940702e-06, "loss": 17.6972, "step": 39729 }, { "epoch": 0.7262324748204069, "grad_norm": 4.967956604582689, "learning_rate": 1.8401739460749568e-06, "loss": 16.9568, "step": 39730 }, { "epoch": 0.7262507540168535, "grad_norm": 5.037076427684444, "learning_rate": 1.83994454363076e-06, "loss": 17.0203, "step": 39731 }, { "epoch": 0.7262690332132999, "grad_norm": 5.139169491155311, "learning_rate": 1.8397151522622853e-06, "loss": 17.1324, "step": 39732 }, { "epoch": 0.7262873124097464, "grad_norm": 5.395420723490637, "learning_rate": 1.8394857719703329e-06, "loss": 17.1739, "step": 39733 }, { "epoch": 0.726305591606193, "grad_norm": 7.2026006223916035, "learning_rate": 1.8392564027557102e-06, "loss": 17.7441, "step": 39734 }, { "epoch": 0.7263238708026395, "grad_norm": 6.330864647782247, "learning_rate": 1.8390270446192198e-06, "loss": 17.344, "step": 39735 }, { "epoch": 0.7263421499990861, "grad_norm": 6.474109297310999, "learning_rate": 1.8387976975616634e-06, "loss": 17.5562, "step": 39736 }, { "epoch": 0.7263604291955326, "grad_norm": 6.143936141893556, "learning_rate": 1.8385683615838483e-06, "loss": 17.688, "step": 39737 }, { "epoch": 0.726378708391979, "grad_norm": 5.616420714181727, "learning_rate": 1.8383390366865767e-06, "loss": 17.1553, "step": 39738 }, { "epoch": 0.7263969875884256, "grad_norm": 7.20730404586212, "learning_rate": 1.8381097228706513e-06, "loss": 17.8856, "step": 39739 }, { "epoch": 0.7264152667848721, "grad_norm": 7.366002511045489, "learning_rate": 1.837880420136876e-06, "loss": 17.7308, "step": 39740 }, { "epoch": 0.7264335459813187, "grad_norm": 5.609503569376791, "learning_rate": 1.8376511284860582e-06, "loss": 17.215, "step": 39741 }, { "epoch": 0.7264518251777652, "grad_norm": 7.277336224504656, "learning_rate": 1.8374218479189966e-06, "loss": 17.908, "step": 39742 }, { "epoch": 0.7264701043742117, "grad_norm": 4.304136622941514, "learning_rate": 1.8371925784364987e-06, "loss": 16.6653, "step": 39743 }, { "epoch": 0.7264883835706583, "grad_norm": 6.434224728550886, "learning_rate": 1.8369633200393662e-06, "loss": 17.3614, "step": 39744 }, { "epoch": 0.7265066627671047, "grad_norm": 5.547206940664624, "learning_rate": 1.8367340727284012e-06, "loss": 16.9734, "step": 39745 }, { "epoch": 0.7265249419635513, "grad_norm": 6.844137590404416, "learning_rate": 1.8365048365044103e-06, "loss": 17.3676, "step": 39746 }, { "epoch": 0.7265432211599978, "grad_norm": 5.104445155585534, "learning_rate": 1.8362756113681957e-06, "loss": 16.9886, "step": 39747 }, { "epoch": 0.7265615003564443, "grad_norm": 6.052430240122779, "learning_rate": 1.836046397320559e-06, "loss": 17.1635, "step": 39748 }, { "epoch": 0.7265797795528909, "grad_norm": 6.5838118972751545, "learning_rate": 1.8358171943623043e-06, "loss": 17.6353, "step": 39749 }, { "epoch": 0.7265980587493374, "grad_norm": 6.08128127833763, "learning_rate": 1.8355880024942369e-06, "loss": 17.6428, "step": 39750 }, { "epoch": 0.726616337945784, "grad_norm": 7.885135469628413, "learning_rate": 1.8353588217171598e-06, "loss": 17.474, "step": 39751 }, { "epoch": 0.7266346171422304, "grad_norm": 5.015025354550999, "learning_rate": 1.8351296520318755e-06, "loss": 16.9561, "step": 39752 }, { "epoch": 0.7266528963386769, "grad_norm": 5.749170881780758, "learning_rate": 1.8349004934391857e-06, "loss": 17.1391, "step": 39753 }, { "epoch": 0.7266711755351235, "grad_norm": 5.802278397639255, "learning_rate": 1.8346713459398969e-06, "loss": 17.0442, "step": 39754 }, { "epoch": 0.72668945473157, "grad_norm": 6.165018275352745, "learning_rate": 1.8344422095348103e-06, "loss": 17.525, "step": 39755 }, { "epoch": 0.7267077339280166, "grad_norm": 6.66060557287624, "learning_rate": 1.834213084224727e-06, "loss": 17.721, "step": 39756 }, { "epoch": 0.726726013124463, "grad_norm": 5.600078467053025, "learning_rate": 1.8339839700104539e-06, "loss": 17.1405, "step": 39757 }, { "epoch": 0.7267442923209095, "grad_norm": 9.345443750640749, "learning_rate": 1.8337548668927906e-06, "loss": 17.9787, "step": 39758 }, { "epoch": 0.7267625715173561, "grad_norm": 7.074997372371137, "learning_rate": 1.8335257748725422e-06, "loss": 17.7747, "step": 39759 }, { "epoch": 0.7267808507138026, "grad_norm": 6.33149703974343, "learning_rate": 1.833296693950512e-06, "loss": 17.4438, "step": 39760 }, { "epoch": 0.7267991299102492, "grad_norm": 6.549574564797623, "learning_rate": 1.8330676241275025e-06, "loss": 17.398, "step": 39761 }, { "epoch": 0.7268174091066957, "grad_norm": 5.730495944976415, "learning_rate": 1.8328385654043145e-06, "loss": 17.325, "step": 39762 }, { "epoch": 0.7268356883031422, "grad_norm": 5.117438988146044, "learning_rate": 1.8326095177817544e-06, "loss": 17.0705, "step": 39763 }, { "epoch": 0.7268539674995887, "grad_norm": 6.075202489583796, "learning_rate": 1.832380481260621e-06, "loss": 17.2771, "step": 39764 }, { "epoch": 0.7268722466960352, "grad_norm": 5.832175356144566, "learning_rate": 1.8321514558417214e-06, "loss": 17.31, "step": 39765 }, { "epoch": 0.7268905258924818, "grad_norm": 7.618948488226039, "learning_rate": 1.8319224415258558e-06, "loss": 17.9975, "step": 39766 }, { "epoch": 0.7269088050889283, "grad_norm": 6.106282666640234, "learning_rate": 1.8316934383138252e-06, "loss": 17.4313, "step": 39767 }, { "epoch": 0.7269270842853748, "grad_norm": 7.752474102226471, "learning_rate": 1.8314644462064346e-06, "loss": 17.7931, "step": 39768 }, { "epoch": 0.7269453634818214, "grad_norm": 5.550830195683574, "learning_rate": 1.8312354652044872e-06, "loss": 17.2705, "step": 39769 }, { "epoch": 0.7269636426782679, "grad_norm": 5.681990499785123, "learning_rate": 1.8310064953087836e-06, "loss": 17.3115, "step": 39770 }, { "epoch": 0.7269819218747144, "grad_norm": 5.925750981519832, "learning_rate": 1.830777536520128e-06, "loss": 16.9122, "step": 39771 }, { "epoch": 0.7270002010711609, "grad_norm": 5.610456222169934, "learning_rate": 1.8305485888393232e-06, "loss": 17.1029, "step": 39772 }, { "epoch": 0.7270184802676074, "grad_norm": 5.476137721723922, "learning_rate": 1.830319652267168e-06, "loss": 17.0081, "step": 39773 }, { "epoch": 0.727036759464054, "grad_norm": 5.754270769166709, "learning_rate": 1.8300907268044692e-06, "loss": 17.2483, "step": 39774 }, { "epoch": 0.7270550386605005, "grad_norm": 7.070010220012935, "learning_rate": 1.8298618124520261e-06, "loss": 17.6754, "step": 39775 }, { "epoch": 0.727073317856947, "grad_norm": 6.595325766907021, "learning_rate": 1.8296329092106436e-06, "loss": 17.2236, "step": 39776 }, { "epoch": 0.7270915970533935, "grad_norm": 4.7672353853598475, "learning_rate": 1.829404017081121e-06, "loss": 16.8134, "step": 39777 }, { "epoch": 0.72710987624984, "grad_norm": 6.831562387568794, "learning_rate": 1.829175136064264e-06, "loss": 17.4446, "step": 39778 }, { "epoch": 0.7271281554462866, "grad_norm": 6.095715361619975, "learning_rate": 1.828946266160871e-06, "loss": 17.1835, "step": 39779 }, { "epoch": 0.7271464346427331, "grad_norm": 4.618217219990934, "learning_rate": 1.8287174073717485e-06, "loss": 16.7222, "step": 39780 }, { "epoch": 0.7271647138391796, "grad_norm": 6.642138490040878, "learning_rate": 1.8284885596976937e-06, "loss": 17.4391, "step": 39781 }, { "epoch": 0.7271829930356262, "grad_norm": 5.940312052430844, "learning_rate": 1.8282597231395138e-06, "loss": 17.2229, "step": 39782 }, { "epoch": 0.7272012722320726, "grad_norm": 5.892190565948039, "learning_rate": 1.8280308976980077e-06, "loss": 16.9557, "step": 39783 }, { "epoch": 0.7272195514285192, "grad_norm": 7.269995153052306, "learning_rate": 1.8278020833739768e-06, "loss": 17.8693, "step": 39784 }, { "epoch": 0.7272378306249657, "grad_norm": 5.722775057514067, "learning_rate": 1.8275732801682239e-06, "loss": 17.2396, "step": 39785 }, { "epoch": 0.7272561098214122, "grad_norm": 6.556618741663044, "learning_rate": 1.8273444880815533e-06, "loss": 17.6055, "step": 39786 }, { "epoch": 0.7272743890178588, "grad_norm": 6.550896051769164, "learning_rate": 1.8271157071147632e-06, "loss": 17.4526, "step": 39787 }, { "epoch": 0.7272926682143053, "grad_norm": 6.435307167814349, "learning_rate": 1.8268869372686593e-06, "loss": 17.2817, "step": 39788 }, { "epoch": 0.7273109474107519, "grad_norm": 5.395171149475497, "learning_rate": 1.8266581785440414e-06, "loss": 17.1649, "step": 39789 }, { "epoch": 0.7273292266071983, "grad_norm": 5.9613693492900195, "learning_rate": 1.8264294309417086e-06, "loss": 17.2627, "step": 39790 }, { "epoch": 0.7273475058036448, "grad_norm": 6.626759670000789, "learning_rate": 1.8262006944624677e-06, "loss": 17.5831, "step": 39791 }, { "epoch": 0.7273657850000914, "grad_norm": 7.2079663038873685, "learning_rate": 1.8259719691071175e-06, "loss": 17.7097, "step": 39792 }, { "epoch": 0.7273840641965379, "grad_norm": 5.769976893014424, "learning_rate": 1.8257432548764592e-06, "loss": 17.1131, "step": 39793 }, { "epoch": 0.7274023433929845, "grad_norm": 5.46472055099391, "learning_rate": 1.8255145517712947e-06, "loss": 16.9988, "step": 39794 }, { "epoch": 0.727420622589431, "grad_norm": 5.095267531626996, "learning_rate": 1.8252858597924262e-06, "loss": 16.8957, "step": 39795 }, { "epoch": 0.7274389017858774, "grad_norm": 5.908404023073192, "learning_rate": 1.8250571789406574e-06, "loss": 17.2047, "step": 39796 }, { "epoch": 0.727457180982324, "grad_norm": 5.836748865847022, "learning_rate": 1.8248285092167871e-06, "loss": 17.5065, "step": 39797 }, { "epoch": 0.7274754601787705, "grad_norm": 5.7543245692222085, "learning_rate": 1.8245998506216163e-06, "loss": 17.5368, "step": 39798 }, { "epoch": 0.7274937393752171, "grad_norm": 7.388501367709484, "learning_rate": 1.8243712031559486e-06, "loss": 17.6552, "step": 39799 }, { "epoch": 0.7275120185716636, "grad_norm": 5.876808931935773, "learning_rate": 1.8241425668205842e-06, "loss": 17.3365, "step": 39800 }, { "epoch": 0.7275302977681101, "grad_norm": 5.928712444741589, "learning_rate": 1.8239139416163233e-06, "loss": 17.1883, "step": 39801 }, { "epoch": 0.7275485769645567, "grad_norm": 6.124160579719618, "learning_rate": 1.8236853275439697e-06, "loss": 17.3592, "step": 39802 }, { "epoch": 0.7275668561610031, "grad_norm": 6.952173187055757, "learning_rate": 1.8234567246043221e-06, "loss": 17.7714, "step": 39803 }, { "epoch": 0.7275851353574497, "grad_norm": 5.10463342163402, "learning_rate": 1.8232281327981832e-06, "loss": 17.013, "step": 39804 }, { "epoch": 0.7276034145538962, "grad_norm": 13.782989297530582, "learning_rate": 1.8229995521263549e-06, "loss": 19.3196, "step": 39805 }, { "epoch": 0.7276216937503427, "grad_norm": 6.465636974624367, "learning_rate": 1.8227709825896384e-06, "loss": 17.3296, "step": 39806 }, { "epoch": 0.7276399729467893, "grad_norm": 5.757864813971713, "learning_rate": 1.8225424241888317e-06, "loss": 17.3608, "step": 39807 }, { "epoch": 0.7276582521432358, "grad_norm": 5.677674766937952, "learning_rate": 1.82231387692474e-06, "loss": 16.9548, "step": 39808 }, { "epoch": 0.7276765313396824, "grad_norm": 5.883668785993528, "learning_rate": 1.822085340798162e-06, "loss": 17.2082, "step": 39809 }, { "epoch": 0.7276948105361288, "grad_norm": 6.597436201563627, "learning_rate": 1.8218568158098976e-06, "loss": 17.5994, "step": 39810 }, { "epoch": 0.7277130897325753, "grad_norm": 7.304091843898949, "learning_rate": 1.821628301960751e-06, "loss": 17.7201, "step": 39811 }, { "epoch": 0.7277313689290219, "grad_norm": 6.697429642228471, "learning_rate": 1.8213997992515197e-06, "loss": 17.6203, "step": 39812 }, { "epoch": 0.7277496481254684, "grad_norm": 5.164971690687683, "learning_rate": 1.8211713076830067e-06, "loss": 17.1245, "step": 39813 }, { "epoch": 0.727767927321915, "grad_norm": 6.4304340540440466, "learning_rate": 1.8209428272560136e-06, "loss": 17.1992, "step": 39814 }, { "epoch": 0.7277862065183615, "grad_norm": 6.844854072775689, "learning_rate": 1.8207143579713383e-06, "loss": 17.5397, "step": 39815 }, { "epoch": 0.7278044857148079, "grad_norm": 5.560260957841475, "learning_rate": 1.8204858998297852e-06, "loss": 17.1065, "step": 39816 }, { "epoch": 0.7278227649112545, "grad_norm": 7.191881127892522, "learning_rate": 1.8202574528321531e-06, "loss": 17.739, "step": 39817 }, { "epoch": 0.727841044107701, "grad_norm": 7.762997843221037, "learning_rate": 1.8200290169792412e-06, "loss": 18.131, "step": 39818 }, { "epoch": 0.7278593233041476, "grad_norm": 6.148976096759463, "learning_rate": 1.819800592271853e-06, "loss": 17.2533, "step": 39819 }, { "epoch": 0.7278776025005941, "grad_norm": 7.353060035089866, "learning_rate": 1.819572178710788e-06, "loss": 17.8047, "step": 39820 }, { "epoch": 0.7278958816970406, "grad_norm": 5.4044602842137985, "learning_rate": 1.8193437762968447e-06, "loss": 16.9677, "step": 39821 }, { "epoch": 0.7279141608934871, "grad_norm": 5.900734338538358, "learning_rate": 1.819115385030825e-06, "loss": 17.3446, "step": 39822 }, { "epoch": 0.7279324400899336, "grad_norm": 7.955556829235697, "learning_rate": 1.8188870049135322e-06, "loss": 17.6476, "step": 39823 }, { "epoch": 0.7279507192863802, "grad_norm": 6.582505807587843, "learning_rate": 1.8186586359457625e-06, "loss": 17.5723, "step": 39824 }, { "epoch": 0.7279689984828267, "grad_norm": 8.529993452883218, "learning_rate": 1.8184302781283204e-06, "loss": 17.9732, "step": 39825 }, { "epoch": 0.7279872776792732, "grad_norm": 6.415679984379488, "learning_rate": 1.8182019314620013e-06, "loss": 17.4087, "step": 39826 }, { "epoch": 0.7280055568757198, "grad_norm": 5.525529635134047, "learning_rate": 1.8179735959476107e-06, "loss": 17.2445, "step": 39827 }, { "epoch": 0.7280238360721663, "grad_norm": 6.542194786170704, "learning_rate": 1.8177452715859467e-06, "loss": 17.1271, "step": 39828 }, { "epoch": 0.7280421152686128, "grad_norm": 5.275229041784649, "learning_rate": 1.8175169583778075e-06, "loss": 17.1023, "step": 39829 }, { "epoch": 0.7280603944650593, "grad_norm": 6.730744291643797, "learning_rate": 1.8172886563239972e-06, "loss": 17.6729, "step": 39830 }, { "epoch": 0.7280786736615058, "grad_norm": 5.564950236248111, "learning_rate": 1.8170603654253116e-06, "loss": 17.0706, "step": 39831 }, { "epoch": 0.7280969528579524, "grad_norm": 8.684140529386356, "learning_rate": 1.816832085682554e-06, "loss": 18.0796, "step": 39832 }, { "epoch": 0.7281152320543989, "grad_norm": 5.035677592849355, "learning_rate": 1.8166038170965244e-06, "loss": 17.0589, "step": 39833 }, { "epoch": 0.7281335112508455, "grad_norm": 5.481953067214633, "learning_rate": 1.8163755596680228e-06, "loss": 17.1553, "step": 39834 }, { "epoch": 0.728151790447292, "grad_norm": 5.16561849440799, "learning_rate": 1.816147313397847e-06, "loss": 17.0176, "step": 39835 }, { "epoch": 0.7281700696437384, "grad_norm": 5.6110950946241935, "learning_rate": 1.8159190782868003e-06, "loss": 17.2256, "step": 39836 }, { "epoch": 0.728188348840185, "grad_norm": 6.223454750456414, "learning_rate": 1.8156908543356805e-06, "loss": 17.5398, "step": 39837 }, { "epoch": 0.7282066280366315, "grad_norm": 6.881584989725981, "learning_rate": 1.8154626415452865e-06, "loss": 17.5053, "step": 39838 }, { "epoch": 0.7282249072330781, "grad_norm": 6.281286548590217, "learning_rate": 1.8152344399164211e-06, "loss": 17.2889, "step": 39839 }, { "epoch": 0.7282431864295246, "grad_norm": 7.532378677268311, "learning_rate": 1.8150062494498815e-06, "loss": 17.9084, "step": 39840 }, { "epoch": 0.728261465625971, "grad_norm": 5.9233052605518415, "learning_rate": 1.814778070146468e-06, "loss": 17.2524, "step": 39841 }, { "epoch": 0.7282797448224176, "grad_norm": 7.8834367624421615, "learning_rate": 1.8145499020069829e-06, "loss": 18.1091, "step": 39842 }, { "epoch": 0.7282980240188641, "grad_norm": 7.129088128022721, "learning_rate": 1.8143217450322225e-06, "loss": 17.7129, "step": 39843 }, { "epoch": 0.7283163032153106, "grad_norm": 6.149892251333213, "learning_rate": 1.8140935992229891e-06, "loss": 17.1731, "step": 39844 }, { "epoch": 0.7283345824117572, "grad_norm": 5.797144911220253, "learning_rate": 1.8138654645800818e-06, "loss": 17.3897, "step": 39845 }, { "epoch": 0.7283528616082037, "grad_norm": 5.953956493654629, "learning_rate": 1.8136373411042974e-06, "loss": 17.3016, "step": 39846 }, { "epoch": 0.7283711408046503, "grad_norm": 5.436612516686993, "learning_rate": 1.8134092287964393e-06, "loss": 17.0849, "step": 39847 }, { "epoch": 0.7283894200010967, "grad_norm": 5.170580608220032, "learning_rate": 1.8131811276573042e-06, "loss": 16.9835, "step": 39848 }, { "epoch": 0.7284076991975432, "grad_norm": 6.633409195921335, "learning_rate": 1.8129530376876942e-06, "loss": 17.3688, "step": 39849 }, { "epoch": 0.7284259783939898, "grad_norm": 6.967299294294006, "learning_rate": 1.812724958888405e-06, "loss": 17.7609, "step": 39850 }, { "epoch": 0.7284442575904363, "grad_norm": 5.930689455461046, "learning_rate": 1.8124968912602403e-06, "loss": 17.221, "step": 39851 }, { "epoch": 0.7284625367868829, "grad_norm": 8.989093600913346, "learning_rate": 1.8122688348039956e-06, "loss": 18.4161, "step": 39852 }, { "epoch": 0.7284808159833294, "grad_norm": 6.579174162764462, "learning_rate": 1.8120407895204738e-06, "loss": 17.2003, "step": 39853 }, { "epoch": 0.7284990951797758, "grad_norm": 5.982556332098288, "learning_rate": 1.8118127554104725e-06, "loss": 17.0456, "step": 39854 }, { "epoch": 0.7285173743762224, "grad_norm": 6.684895120358082, "learning_rate": 1.8115847324747893e-06, "loss": 17.8068, "step": 39855 }, { "epoch": 0.7285356535726689, "grad_norm": 5.455616249591168, "learning_rate": 1.8113567207142263e-06, "loss": 17.2511, "step": 39856 }, { "epoch": 0.7285539327691155, "grad_norm": 5.575744198337515, "learning_rate": 1.8111287201295796e-06, "loss": 17.1558, "step": 39857 }, { "epoch": 0.728572211965562, "grad_norm": 5.54413865540767, "learning_rate": 1.8109007307216503e-06, "loss": 17.2742, "step": 39858 }, { "epoch": 0.7285904911620085, "grad_norm": 7.878419188296154, "learning_rate": 1.8106727524912388e-06, "loss": 17.7539, "step": 39859 }, { "epoch": 0.7286087703584551, "grad_norm": 6.154455286465587, "learning_rate": 1.8104447854391404e-06, "loss": 17.3462, "step": 39860 }, { "epoch": 0.7286270495549015, "grad_norm": 7.5166636848021255, "learning_rate": 1.810216829566158e-06, "loss": 17.6677, "step": 39861 }, { "epoch": 0.7286453287513481, "grad_norm": 7.299920766190211, "learning_rate": 1.8099888848730895e-06, "loss": 17.8213, "step": 39862 }, { "epoch": 0.7286636079477946, "grad_norm": 6.082261129093598, "learning_rate": 1.8097609513607311e-06, "loss": 17.0996, "step": 39863 }, { "epoch": 0.7286818871442411, "grad_norm": 6.51583597600095, "learning_rate": 1.809533029029885e-06, "loss": 17.5064, "step": 39864 }, { "epoch": 0.7287001663406877, "grad_norm": 6.970003108323996, "learning_rate": 1.8093051178813492e-06, "loss": 17.6619, "step": 39865 }, { "epoch": 0.7287184455371342, "grad_norm": 7.535386227184448, "learning_rate": 1.8090772179159205e-06, "loss": 17.3304, "step": 39866 }, { "epoch": 0.7287367247335808, "grad_norm": 7.773826989070569, "learning_rate": 1.808849329134399e-06, "loss": 17.3492, "step": 39867 }, { "epoch": 0.7287550039300272, "grad_norm": 7.3395754459359885, "learning_rate": 1.8086214515375855e-06, "loss": 17.873, "step": 39868 }, { "epoch": 0.7287732831264737, "grad_norm": 5.974655493293355, "learning_rate": 1.808393585126275e-06, "loss": 17.1753, "step": 39869 }, { "epoch": 0.7287915623229203, "grad_norm": 6.148651053353248, "learning_rate": 1.80816572990127e-06, "loss": 17.4135, "step": 39870 }, { "epoch": 0.7288098415193668, "grad_norm": 6.495939735520259, "learning_rate": 1.8079378858633673e-06, "loss": 17.2049, "step": 39871 }, { "epoch": 0.7288281207158134, "grad_norm": 5.144852323353935, "learning_rate": 1.8077100530133634e-06, "loss": 17.157, "step": 39872 }, { "epoch": 0.7288463999122599, "grad_norm": 6.1293216424856825, "learning_rate": 1.8074822313520602e-06, "loss": 17.5221, "step": 39873 }, { "epoch": 0.7288646791087063, "grad_norm": 7.21564642989512, "learning_rate": 1.8072544208802535e-06, "loss": 17.4845, "step": 39874 }, { "epoch": 0.7288829583051529, "grad_norm": 6.87153005427619, "learning_rate": 1.8070266215987442e-06, "loss": 17.4846, "step": 39875 }, { "epoch": 0.7289012375015994, "grad_norm": 5.467757576323351, "learning_rate": 1.806798833508328e-06, "loss": 16.9432, "step": 39876 }, { "epoch": 0.728919516698046, "grad_norm": 5.178496679401197, "learning_rate": 1.8065710566098049e-06, "loss": 17.0705, "step": 39877 }, { "epoch": 0.7289377958944925, "grad_norm": 6.230085154604043, "learning_rate": 1.8063432909039746e-06, "loss": 16.9499, "step": 39878 }, { "epoch": 0.728956075090939, "grad_norm": 7.633950672931814, "learning_rate": 1.8061155363916343e-06, "loss": 17.7083, "step": 39879 }, { "epoch": 0.7289743542873856, "grad_norm": 5.684549324709554, "learning_rate": 1.8058877930735803e-06, "loss": 17.1972, "step": 39880 }, { "epoch": 0.728992633483832, "grad_norm": 6.2413096461408175, "learning_rate": 1.8056600609506137e-06, "loss": 17.4943, "step": 39881 }, { "epoch": 0.7290109126802786, "grad_norm": 6.591045269472039, "learning_rate": 1.8054323400235313e-06, "loss": 17.5082, "step": 39882 }, { "epoch": 0.7290291918767251, "grad_norm": 5.639515985800955, "learning_rate": 1.8052046302931303e-06, "loss": 17.1614, "step": 39883 }, { "epoch": 0.7290474710731716, "grad_norm": 6.707103137043871, "learning_rate": 1.8049769317602105e-06, "loss": 17.7874, "step": 39884 }, { "epoch": 0.7290657502696182, "grad_norm": 5.882843048294754, "learning_rate": 1.8047492444255688e-06, "loss": 17.2226, "step": 39885 }, { "epoch": 0.7290840294660647, "grad_norm": 6.364641089615364, "learning_rate": 1.804521568290003e-06, "loss": 17.5046, "step": 39886 }, { "epoch": 0.7291023086625112, "grad_norm": 6.875399147614697, "learning_rate": 1.8042939033543133e-06, "loss": 18.005, "step": 39887 }, { "epoch": 0.7291205878589577, "grad_norm": 4.34878315847172, "learning_rate": 1.804066249619295e-06, "loss": 16.7039, "step": 39888 }, { "epoch": 0.7291388670554042, "grad_norm": 5.369928439568421, "learning_rate": 1.803838607085749e-06, "loss": 17.1586, "step": 39889 }, { "epoch": 0.7291571462518508, "grad_norm": 5.970606833707025, "learning_rate": 1.8036109757544705e-06, "loss": 17.1173, "step": 39890 }, { "epoch": 0.7291754254482973, "grad_norm": 6.197657721067677, "learning_rate": 1.803383355626257e-06, "loss": 17.2901, "step": 39891 }, { "epoch": 0.7291937046447439, "grad_norm": 6.318122398223163, "learning_rate": 1.8031557467019095e-06, "loss": 17.11, "step": 39892 }, { "epoch": 0.7292119838411903, "grad_norm": 9.07949396477635, "learning_rate": 1.8029281489822232e-06, "loss": 18.5155, "step": 39893 }, { "epoch": 0.7292302630376368, "grad_norm": 6.887956449526163, "learning_rate": 1.8027005624679944e-06, "loss": 17.8549, "step": 39894 }, { "epoch": 0.7292485422340834, "grad_norm": 7.179462553700165, "learning_rate": 1.8024729871600232e-06, "loss": 17.6609, "step": 39895 }, { "epoch": 0.7292668214305299, "grad_norm": 8.73281226375291, "learning_rate": 1.802245423059108e-06, "loss": 18.286, "step": 39896 }, { "epoch": 0.7292851006269765, "grad_norm": 8.049296291565149, "learning_rate": 1.802017870166044e-06, "loss": 17.9938, "step": 39897 }, { "epoch": 0.729303379823423, "grad_norm": 8.076499558586072, "learning_rate": 1.8017903284816307e-06, "loss": 17.949, "step": 39898 }, { "epoch": 0.7293216590198695, "grad_norm": 5.887751666395419, "learning_rate": 1.8015627980066653e-06, "loss": 17.167, "step": 39899 }, { "epoch": 0.729339938216316, "grad_norm": 7.4699264783135675, "learning_rate": 1.801335278741943e-06, "loss": 17.8366, "step": 39900 }, { "epoch": 0.7293582174127625, "grad_norm": 5.392902175828637, "learning_rate": 1.8011077706882647e-06, "loss": 17.0982, "step": 39901 }, { "epoch": 0.7293764966092091, "grad_norm": 5.45328198855115, "learning_rate": 1.8008802738464242e-06, "loss": 17.2288, "step": 39902 }, { "epoch": 0.7293947758056556, "grad_norm": 7.036009824323026, "learning_rate": 1.8006527882172225e-06, "loss": 17.6879, "step": 39903 }, { "epoch": 0.7294130550021021, "grad_norm": 6.68482083218628, "learning_rate": 1.8004253138014539e-06, "loss": 17.5571, "step": 39904 }, { "epoch": 0.7294313341985487, "grad_norm": 5.8271122547516345, "learning_rate": 1.8001978505999163e-06, "loss": 17.3784, "step": 39905 }, { "epoch": 0.7294496133949951, "grad_norm": 6.031823877073431, "learning_rate": 1.79997039861341e-06, "loss": 17.3353, "step": 39906 }, { "epoch": 0.7294678925914417, "grad_norm": 6.293125541821624, "learning_rate": 1.799742957842729e-06, "loss": 17.2077, "step": 39907 }, { "epoch": 0.7294861717878882, "grad_norm": 5.178504824077104, "learning_rate": 1.79951552828867e-06, "loss": 17.0436, "step": 39908 }, { "epoch": 0.7295044509843347, "grad_norm": 6.339636167417472, "learning_rate": 1.7992881099520327e-06, "loss": 17.9581, "step": 39909 }, { "epoch": 0.7295227301807813, "grad_norm": 6.208537582495832, "learning_rate": 1.7990607028336126e-06, "loss": 17.0339, "step": 39910 }, { "epoch": 0.7295410093772278, "grad_norm": 5.444388687040091, "learning_rate": 1.7988333069342061e-06, "loss": 17.0922, "step": 39911 }, { "epoch": 0.7295592885736742, "grad_norm": 7.204400062072756, "learning_rate": 1.7986059222546104e-06, "loss": 18.2875, "step": 39912 }, { "epoch": 0.7295775677701208, "grad_norm": 6.157601634980343, "learning_rate": 1.7983785487956252e-06, "loss": 17.3736, "step": 39913 }, { "epoch": 0.7295958469665673, "grad_norm": 9.267901022622778, "learning_rate": 1.7981511865580437e-06, "loss": 18.0627, "step": 39914 }, { "epoch": 0.7296141261630139, "grad_norm": 6.10619120854284, "learning_rate": 1.797923835542666e-06, "loss": 17.3211, "step": 39915 }, { "epoch": 0.7296324053594604, "grad_norm": 5.961105465357812, "learning_rate": 1.7976964957502874e-06, "loss": 17.0493, "step": 39916 }, { "epoch": 0.7296506845559069, "grad_norm": 6.748211112547908, "learning_rate": 1.797469167181703e-06, "loss": 17.6826, "step": 39917 }, { "epoch": 0.7296689637523535, "grad_norm": 7.628941321052852, "learning_rate": 1.7972418498377126e-06, "loss": 18.0504, "step": 39918 }, { "epoch": 0.7296872429487999, "grad_norm": 7.698623694760362, "learning_rate": 1.7970145437191105e-06, "loss": 17.7645, "step": 39919 }, { "epoch": 0.7297055221452465, "grad_norm": 4.858313267353034, "learning_rate": 1.7967872488266957e-06, "loss": 16.6853, "step": 39920 }, { "epoch": 0.729723801341693, "grad_norm": 6.219571015684751, "learning_rate": 1.796559965161262e-06, "loss": 16.9131, "step": 39921 }, { "epoch": 0.7297420805381395, "grad_norm": 7.134827993140765, "learning_rate": 1.796332692723608e-06, "loss": 18.2969, "step": 39922 }, { "epoch": 0.7297603597345861, "grad_norm": 6.2976513970438015, "learning_rate": 1.7961054315145316e-06, "loss": 17.39, "step": 39923 }, { "epoch": 0.7297786389310326, "grad_norm": 6.981811150309985, "learning_rate": 1.7958781815348276e-06, "loss": 17.7452, "step": 39924 }, { "epoch": 0.7297969181274792, "grad_norm": 7.44582892654435, "learning_rate": 1.7956509427852903e-06, "loss": 17.6336, "step": 39925 }, { "epoch": 0.7298151973239256, "grad_norm": 6.1982192245134105, "learning_rate": 1.7954237152667202e-06, "loss": 17.3585, "step": 39926 }, { "epoch": 0.7298334765203721, "grad_norm": 6.266952690640476, "learning_rate": 1.7951964989799125e-06, "loss": 17.4565, "step": 39927 }, { "epoch": 0.7298517557168187, "grad_norm": 6.031588324170457, "learning_rate": 1.7949692939256608e-06, "loss": 17.3884, "step": 39928 }, { "epoch": 0.7298700349132652, "grad_norm": 6.0479301171071995, "learning_rate": 1.7947421001047655e-06, "loss": 17.2682, "step": 39929 }, { "epoch": 0.7298883141097118, "grad_norm": 6.451796386980329, "learning_rate": 1.7945149175180192e-06, "loss": 17.2217, "step": 39930 }, { "epoch": 0.7299065933061583, "grad_norm": 8.055278825940446, "learning_rate": 1.79428774616622e-06, "loss": 17.7569, "step": 39931 }, { "epoch": 0.7299248725026047, "grad_norm": 7.46622085952384, "learning_rate": 1.7940605860501658e-06, "loss": 17.9831, "step": 39932 }, { "epoch": 0.7299431516990513, "grad_norm": 6.440517037883992, "learning_rate": 1.7938334371706495e-06, "loss": 17.5848, "step": 39933 }, { "epoch": 0.7299614308954978, "grad_norm": 6.088920548961212, "learning_rate": 1.7936062995284704e-06, "loss": 17.397, "step": 39934 }, { "epoch": 0.7299797100919444, "grad_norm": 6.341696063220522, "learning_rate": 1.7933791731244226e-06, "loss": 17.3895, "step": 39935 }, { "epoch": 0.7299979892883909, "grad_norm": 5.765321077574353, "learning_rate": 1.793152057959301e-06, "loss": 17.2677, "step": 39936 }, { "epoch": 0.7300162684848374, "grad_norm": 7.360679984735247, "learning_rate": 1.792924954033905e-06, "loss": 17.645, "step": 39937 }, { "epoch": 0.730034547681284, "grad_norm": 6.272761626422184, "learning_rate": 1.7926978613490286e-06, "loss": 17.5627, "step": 39938 }, { "epoch": 0.7300528268777304, "grad_norm": 5.955262533321469, "learning_rate": 1.7924707799054663e-06, "loss": 17.4192, "step": 39939 }, { "epoch": 0.730071106074177, "grad_norm": 6.774564933609834, "learning_rate": 1.7922437097040157e-06, "loss": 17.5281, "step": 39940 }, { "epoch": 0.7300893852706235, "grad_norm": 7.47594586317504, "learning_rate": 1.792016650745474e-06, "loss": 17.7153, "step": 39941 }, { "epoch": 0.73010766446707, "grad_norm": 6.194143741704454, "learning_rate": 1.7917896030306342e-06, "loss": 17.1574, "step": 39942 }, { "epoch": 0.7301259436635166, "grad_norm": 5.972509047727488, "learning_rate": 1.7915625665602948e-06, "loss": 17.1946, "step": 39943 }, { "epoch": 0.730144222859963, "grad_norm": 6.219428388642045, "learning_rate": 1.79133554133525e-06, "loss": 17.1941, "step": 39944 }, { "epoch": 0.7301625020564096, "grad_norm": 7.660976738384432, "learning_rate": 1.7911085273562945e-06, "loss": 17.2687, "step": 39945 }, { "epoch": 0.7301807812528561, "grad_norm": 7.046213760638944, "learning_rate": 1.7908815246242266e-06, "loss": 17.6022, "step": 39946 }, { "epoch": 0.7301990604493026, "grad_norm": 9.003576877288651, "learning_rate": 1.7906545331398407e-06, "loss": 18.5435, "step": 39947 }, { "epoch": 0.7302173396457492, "grad_norm": 6.101445281061266, "learning_rate": 1.7904275529039305e-06, "loss": 17.7098, "step": 39948 }, { "epoch": 0.7302356188421957, "grad_norm": 7.824131462000369, "learning_rate": 1.7902005839172932e-06, "loss": 17.6183, "step": 39949 }, { "epoch": 0.7302538980386423, "grad_norm": 6.420307016333938, "learning_rate": 1.7899736261807239e-06, "loss": 17.4902, "step": 39950 }, { "epoch": 0.7302721772350887, "grad_norm": 5.8496323408676645, "learning_rate": 1.7897466796950202e-06, "loss": 17.2078, "step": 39951 }, { "epoch": 0.7302904564315352, "grad_norm": 7.2195950575625245, "learning_rate": 1.7895197444609763e-06, "loss": 17.6531, "step": 39952 }, { "epoch": 0.7303087356279818, "grad_norm": 6.077008765532146, "learning_rate": 1.7892928204793852e-06, "loss": 17.2577, "step": 39953 }, { "epoch": 0.7303270148244283, "grad_norm": 5.633032254724179, "learning_rate": 1.7890659077510457e-06, "loss": 17.21, "step": 39954 }, { "epoch": 0.7303452940208749, "grad_norm": 5.337453224411342, "learning_rate": 1.7888390062767513e-06, "loss": 17.1774, "step": 39955 }, { "epoch": 0.7303635732173214, "grad_norm": 7.312341669509301, "learning_rate": 1.7886121160572967e-06, "loss": 18.1967, "step": 39956 }, { "epoch": 0.7303818524137679, "grad_norm": 6.3187383733054565, "learning_rate": 1.788385237093479e-06, "loss": 17.5038, "step": 39957 }, { "epoch": 0.7304001316102144, "grad_norm": 5.866483266194028, "learning_rate": 1.7881583693860905e-06, "loss": 17.2906, "step": 39958 }, { "epoch": 0.7304184108066609, "grad_norm": 7.175576281304644, "learning_rate": 1.7879315129359287e-06, "loss": 17.7626, "step": 39959 }, { "epoch": 0.7304366900031075, "grad_norm": 5.503104781162683, "learning_rate": 1.7877046677437897e-06, "loss": 17.1613, "step": 39960 }, { "epoch": 0.730454969199554, "grad_norm": 5.612738136025206, "learning_rate": 1.787477833810467e-06, "loss": 17.1075, "step": 39961 }, { "epoch": 0.7304732483960005, "grad_norm": 5.4429903042147565, "learning_rate": 1.7872510111367542e-06, "loss": 17.0628, "step": 39962 }, { "epoch": 0.7304915275924471, "grad_norm": 4.382095362618393, "learning_rate": 1.7870241997234494e-06, "loss": 16.9146, "step": 39963 }, { "epoch": 0.7305098067888935, "grad_norm": 6.254720208564437, "learning_rate": 1.7867973995713444e-06, "loss": 17.2371, "step": 39964 }, { "epoch": 0.7305280859853401, "grad_norm": 6.500109308347487, "learning_rate": 1.7865706106812376e-06, "loss": 17.4347, "step": 39965 }, { "epoch": 0.7305463651817866, "grad_norm": 10.120652295880522, "learning_rate": 1.7863438330539213e-06, "loss": 17.426, "step": 39966 }, { "epoch": 0.7305646443782331, "grad_norm": 5.236802197221631, "learning_rate": 1.78611706669019e-06, "loss": 17.1741, "step": 39967 }, { "epoch": 0.7305829235746797, "grad_norm": 6.887471977494316, "learning_rate": 1.7858903115908388e-06, "loss": 17.5186, "step": 39968 }, { "epoch": 0.7306012027711262, "grad_norm": 6.450006964977922, "learning_rate": 1.7856635677566653e-06, "loss": 17.2517, "step": 39969 }, { "epoch": 0.7306194819675728, "grad_norm": 6.668908617438415, "learning_rate": 1.7854368351884604e-06, "loss": 17.3114, "step": 39970 }, { "epoch": 0.7306377611640192, "grad_norm": 5.484842432906003, "learning_rate": 1.785210113887022e-06, "loss": 17.0867, "step": 39971 }, { "epoch": 0.7306560403604657, "grad_norm": 5.981559224118097, "learning_rate": 1.7849834038531433e-06, "loss": 17.2601, "step": 39972 }, { "epoch": 0.7306743195569123, "grad_norm": 6.135587549917768, "learning_rate": 1.7847567050876169e-06, "loss": 17.2948, "step": 39973 }, { "epoch": 0.7306925987533588, "grad_norm": 8.737735977917547, "learning_rate": 1.784530017591241e-06, "loss": 18.2869, "step": 39974 }, { "epoch": 0.7307108779498054, "grad_norm": 5.94068766454087, "learning_rate": 1.7843033413648065e-06, "loss": 17.3595, "step": 39975 }, { "epoch": 0.7307291571462519, "grad_norm": 10.102287019863102, "learning_rate": 1.7840766764091117e-06, "loss": 18.5043, "step": 39976 }, { "epoch": 0.7307474363426983, "grad_norm": 5.222050591556784, "learning_rate": 1.7838500227249473e-06, "loss": 17.083, "step": 39977 }, { "epoch": 0.7307657155391449, "grad_norm": 8.63202623697125, "learning_rate": 1.7836233803131115e-06, "loss": 17.935, "step": 39978 }, { "epoch": 0.7307839947355914, "grad_norm": 6.138295882707473, "learning_rate": 1.7833967491743947e-06, "loss": 17.6268, "step": 39979 }, { "epoch": 0.7308022739320379, "grad_norm": 6.77033212350809, "learning_rate": 1.7831701293095949e-06, "loss": 17.8606, "step": 39980 }, { "epoch": 0.7308205531284845, "grad_norm": 7.9461834077673865, "learning_rate": 1.7829435207195028e-06, "loss": 18.4328, "step": 39981 }, { "epoch": 0.730838832324931, "grad_norm": 6.185565260558872, "learning_rate": 1.7827169234049162e-06, "loss": 17.543, "step": 39982 }, { "epoch": 0.7308571115213776, "grad_norm": 6.398434862161427, "learning_rate": 1.7824903373666275e-06, "loss": 17.4219, "step": 39983 }, { "epoch": 0.730875390717824, "grad_norm": 6.483413358837864, "learning_rate": 1.7822637626054296e-06, "loss": 17.6762, "step": 39984 }, { "epoch": 0.7308936699142705, "grad_norm": 6.518486430646497, "learning_rate": 1.782037199122118e-06, "loss": 17.594, "step": 39985 }, { "epoch": 0.7309119491107171, "grad_norm": 7.184076757276528, "learning_rate": 1.7818106469174878e-06, "loss": 17.8414, "step": 39986 }, { "epoch": 0.7309302283071636, "grad_norm": 7.78877128924227, "learning_rate": 1.7815841059923311e-06, "loss": 18.1594, "step": 39987 }, { "epoch": 0.7309485075036102, "grad_norm": 6.371813464932915, "learning_rate": 1.7813575763474445e-06, "loss": 17.5328, "step": 39988 }, { "epoch": 0.7309667867000567, "grad_norm": 6.20251976096519, "learning_rate": 1.78113105798362e-06, "loss": 17.4726, "step": 39989 }, { "epoch": 0.7309850658965031, "grad_norm": 5.199996930263788, "learning_rate": 1.7809045509016498e-06, "loss": 16.9479, "step": 39990 }, { "epoch": 0.7310033450929497, "grad_norm": 5.91490491386204, "learning_rate": 1.7806780551023322e-06, "loss": 17.1235, "step": 39991 }, { "epoch": 0.7310216242893962, "grad_norm": 6.370626786698488, "learning_rate": 1.7804515705864583e-06, "loss": 17.3708, "step": 39992 }, { "epoch": 0.7310399034858428, "grad_norm": 7.446112924832343, "learning_rate": 1.7802250973548208e-06, "loss": 17.8859, "step": 39993 }, { "epoch": 0.7310581826822893, "grad_norm": 5.713524890681987, "learning_rate": 1.7799986354082143e-06, "loss": 17.1472, "step": 39994 }, { "epoch": 0.7310764618787358, "grad_norm": 6.47780915828319, "learning_rate": 1.7797721847474337e-06, "loss": 17.5907, "step": 39995 }, { "epoch": 0.7310947410751824, "grad_norm": 6.672683531754353, "learning_rate": 1.779545745373274e-06, "loss": 17.619, "step": 39996 }, { "epoch": 0.7311130202716288, "grad_norm": 8.390817034955312, "learning_rate": 1.7793193172865264e-06, "loss": 17.6306, "step": 39997 }, { "epoch": 0.7311312994680754, "grad_norm": 4.907551233001372, "learning_rate": 1.7790929004879836e-06, "loss": 17.0404, "step": 39998 }, { "epoch": 0.7311495786645219, "grad_norm": 5.166612442304159, "learning_rate": 1.778866494978443e-06, "loss": 17.179, "step": 39999 }, { "epoch": 0.7311678578609684, "grad_norm": 5.7595415991002366, "learning_rate": 1.778640100758695e-06, "loss": 17.1532, "step": 40000 }, { "epoch": 0.731186137057415, "grad_norm": 4.827943740154998, "learning_rate": 1.778413717829533e-06, "loss": 16.782, "step": 40001 }, { "epoch": 0.7312044162538615, "grad_norm": 7.058157461369179, "learning_rate": 1.7781873461917526e-06, "loss": 18.0099, "step": 40002 }, { "epoch": 0.731222695450308, "grad_norm": 6.945533301028222, "learning_rate": 1.7779609858461444e-06, "loss": 17.5592, "step": 40003 }, { "epoch": 0.7312409746467545, "grad_norm": 8.062935659411059, "learning_rate": 1.7777346367935034e-06, "loss": 18.0783, "step": 40004 }, { "epoch": 0.731259253843201, "grad_norm": 6.916211692609043, "learning_rate": 1.7775082990346242e-06, "loss": 17.4875, "step": 40005 }, { "epoch": 0.7312775330396476, "grad_norm": 5.2351397381713065, "learning_rate": 1.777281972570299e-06, "loss": 17.1671, "step": 40006 }, { "epoch": 0.7312958122360941, "grad_norm": 6.496563065933042, "learning_rate": 1.7770556574013192e-06, "loss": 17.3356, "step": 40007 }, { "epoch": 0.7313140914325407, "grad_norm": 5.465016159531812, "learning_rate": 1.7768293535284814e-06, "loss": 17.1606, "step": 40008 }, { "epoch": 0.7313323706289871, "grad_norm": 6.831198474077216, "learning_rate": 1.776603060952577e-06, "loss": 17.6325, "step": 40009 }, { "epoch": 0.7313506498254336, "grad_norm": 6.499121572313348, "learning_rate": 1.7763767796743968e-06, "loss": 17.7528, "step": 40010 }, { "epoch": 0.7313689290218802, "grad_norm": 6.732001127260504, "learning_rate": 1.776150509694739e-06, "loss": 17.6237, "step": 40011 }, { "epoch": 0.7313872082183267, "grad_norm": 6.573614369846451, "learning_rate": 1.7759242510143915e-06, "loss": 17.5426, "step": 40012 }, { "epoch": 0.7314054874147733, "grad_norm": 5.810800635467816, "learning_rate": 1.7756980036341503e-06, "loss": 17.2111, "step": 40013 }, { "epoch": 0.7314237666112198, "grad_norm": 6.5031383625389845, "learning_rate": 1.7754717675548089e-06, "loss": 17.327, "step": 40014 }, { "epoch": 0.7314420458076663, "grad_norm": 6.156715256630049, "learning_rate": 1.775245542777158e-06, "loss": 17.2666, "step": 40015 }, { "epoch": 0.7314603250041128, "grad_norm": 7.304396226873272, "learning_rate": 1.775019329301993e-06, "loss": 17.9009, "step": 40016 }, { "epoch": 0.7314786042005593, "grad_norm": 5.179751638747976, "learning_rate": 1.7747931271301056e-06, "loss": 17.1351, "step": 40017 }, { "epoch": 0.7314968833970059, "grad_norm": 6.5520220298343395, "learning_rate": 1.7745669362622863e-06, "loss": 17.173, "step": 40018 }, { "epoch": 0.7315151625934524, "grad_norm": 7.209823083807149, "learning_rate": 1.774340756699332e-06, "loss": 17.8258, "step": 40019 }, { "epoch": 0.7315334417898989, "grad_norm": 20.24351980716399, "learning_rate": 1.7741145884420335e-06, "loss": 18.6085, "step": 40020 }, { "epoch": 0.7315517209863455, "grad_norm": 6.175010833044889, "learning_rate": 1.773888431491182e-06, "loss": 17.5791, "step": 40021 }, { "epoch": 0.731570000182792, "grad_norm": 6.474191226631989, "learning_rate": 1.773662285847571e-06, "loss": 17.9741, "step": 40022 }, { "epoch": 0.7315882793792385, "grad_norm": 7.663610914179552, "learning_rate": 1.7734361515119958e-06, "loss": 18.0016, "step": 40023 }, { "epoch": 0.731606558575685, "grad_norm": 5.600864354988545, "learning_rate": 1.7732100284852448e-06, "loss": 17.091, "step": 40024 }, { "epoch": 0.7316248377721315, "grad_norm": 6.498303366403298, "learning_rate": 1.7729839167681146e-06, "loss": 17.6632, "step": 40025 }, { "epoch": 0.7316431169685781, "grad_norm": 6.062477325910831, "learning_rate": 1.772757816361394e-06, "loss": 17.0949, "step": 40026 }, { "epoch": 0.7316613961650246, "grad_norm": 6.668157658894381, "learning_rate": 1.7725317272658794e-06, "loss": 17.6592, "step": 40027 }, { "epoch": 0.7316796753614712, "grad_norm": 5.433120390241393, "learning_rate": 1.7723056494823603e-06, "loss": 17.3775, "step": 40028 }, { "epoch": 0.7316979545579176, "grad_norm": 6.750544052630718, "learning_rate": 1.7720795830116284e-06, "loss": 17.1541, "step": 40029 }, { "epoch": 0.7317162337543641, "grad_norm": 6.226758040241041, "learning_rate": 1.771853527854479e-06, "loss": 17.4536, "step": 40030 }, { "epoch": 0.7317345129508107, "grad_norm": 5.230436941476668, "learning_rate": 1.7716274840117014e-06, "loss": 16.9121, "step": 40031 }, { "epoch": 0.7317527921472572, "grad_norm": 5.800908838375269, "learning_rate": 1.7714014514840895e-06, "loss": 17.1449, "step": 40032 }, { "epoch": 0.7317710713437038, "grad_norm": 7.057482256741849, "learning_rate": 1.771175430272437e-06, "loss": 17.963, "step": 40033 }, { "epoch": 0.7317893505401503, "grad_norm": 11.13994503578899, "learning_rate": 1.770949420377534e-06, "loss": 17.414, "step": 40034 }, { "epoch": 0.7318076297365967, "grad_norm": 7.0478294214167825, "learning_rate": 1.7707234218001717e-06, "loss": 17.7887, "step": 40035 }, { "epoch": 0.7318259089330433, "grad_norm": 5.536476567416602, "learning_rate": 1.7704974345411453e-06, "loss": 16.9201, "step": 40036 }, { "epoch": 0.7318441881294898, "grad_norm": 9.460541975495678, "learning_rate": 1.7702714586012448e-06, "loss": 17.98, "step": 40037 }, { "epoch": 0.7318624673259364, "grad_norm": 6.3805659714144145, "learning_rate": 1.7700454939812612e-06, "loss": 17.1776, "step": 40038 }, { "epoch": 0.7318807465223829, "grad_norm": 5.566600280831477, "learning_rate": 1.7698195406819879e-06, "loss": 17.2585, "step": 40039 }, { "epoch": 0.7318990257188294, "grad_norm": 5.080416620799836, "learning_rate": 1.7695935987042184e-06, "loss": 16.9904, "step": 40040 }, { "epoch": 0.731917304915276, "grad_norm": 5.505007983625556, "learning_rate": 1.7693676680487414e-06, "loss": 17.0032, "step": 40041 }, { "epoch": 0.7319355841117224, "grad_norm": 6.821866618952068, "learning_rate": 1.769141748716352e-06, "loss": 17.5916, "step": 40042 }, { "epoch": 0.731953863308169, "grad_norm": 5.764342271035188, "learning_rate": 1.7689158407078383e-06, "loss": 17.6371, "step": 40043 }, { "epoch": 0.7319721425046155, "grad_norm": 6.725839489749529, "learning_rate": 1.7686899440239963e-06, "loss": 17.6391, "step": 40044 }, { "epoch": 0.731990421701062, "grad_norm": 5.70819387375487, "learning_rate": 1.768464058665616e-06, "loss": 17.1956, "step": 40045 }, { "epoch": 0.7320087008975086, "grad_norm": 7.140594926369084, "learning_rate": 1.7682381846334867e-06, "loss": 17.5659, "step": 40046 }, { "epoch": 0.7320269800939551, "grad_norm": 5.470629714337782, "learning_rate": 1.7680123219284033e-06, "loss": 16.9021, "step": 40047 }, { "epoch": 0.7320452592904015, "grad_norm": 5.104633335645216, "learning_rate": 1.7677864705511555e-06, "loss": 16.931, "step": 40048 }, { "epoch": 0.7320635384868481, "grad_norm": 6.666922955166978, "learning_rate": 1.767560630502535e-06, "loss": 17.2817, "step": 40049 }, { "epoch": 0.7320818176832946, "grad_norm": 6.165581243387573, "learning_rate": 1.7673348017833358e-06, "loss": 17.1777, "step": 40050 }, { "epoch": 0.7321000968797412, "grad_norm": 5.966932066087164, "learning_rate": 1.7671089843943478e-06, "loss": 17.1177, "step": 40051 }, { "epoch": 0.7321183760761877, "grad_norm": 7.2887024987775115, "learning_rate": 1.7668831783363605e-06, "loss": 17.7126, "step": 40052 }, { "epoch": 0.7321366552726342, "grad_norm": 5.851859696017336, "learning_rate": 1.7666573836101691e-06, "loss": 17.0651, "step": 40053 }, { "epoch": 0.7321549344690808, "grad_norm": 5.987123744769429, "learning_rate": 1.7664316002165626e-06, "loss": 17.4619, "step": 40054 }, { "epoch": 0.7321732136655272, "grad_norm": 8.299873157507678, "learning_rate": 1.766205828156331e-06, "loss": 18.0948, "step": 40055 }, { "epoch": 0.7321914928619738, "grad_norm": 7.3016841962932615, "learning_rate": 1.7659800674302691e-06, "loss": 17.7177, "step": 40056 }, { "epoch": 0.7322097720584203, "grad_norm": 5.0353539884758955, "learning_rate": 1.765754318039165e-06, "loss": 16.8633, "step": 40057 }, { "epoch": 0.7322280512548668, "grad_norm": 5.953786750360093, "learning_rate": 1.7655285799838113e-06, "loss": 17.5374, "step": 40058 }, { "epoch": 0.7322463304513134, "grad_norm": 5.6966446116198135, "learning_rate": 1.7653028532650008e-06, "loss": 17.0373, "step": 40059 }, { "epoch": 0.7322646096477599, "grad_norm": 4.7607298936281355, "learning_rate": 1.7650771378835212e-06, "loss": 16.9011, "step": 40060 }, { "epoch": 0.7322828888442064, "grad_norm": 6.377818768274113, "learning_rate": 1.7648514338401673e-06, "loss": 17.3753, "step": 40061 }, { "epoch": 0.7323011680406529, "grad_norm": 7.45319645791785, "learning_rate": 1.7646257411357287e-06, "loss": 18.2253, "step": 40062 }, { "epoch": 0.7323194472370994, "grad_norm": 6.395194087429203, "learning_rate": 1.7644000597709943e-06, "loss": 17.2186, "step": 40063 }, { "epoch": 0.732337726433546, "grad_norm": 7.840946375864969, "learning_rate": 1.7641743897467584e-06, "loss": 18.1058, "step": 40064 }, { "epoch": 0.7323560056299925, "grad_norm": 7.412423321570765, "learning_rate": 1.7639487310638108e-06, "loss": 18.1423, "step": 40065 }, { "epoch": 0.7323742848264391, "grad_norm": 6.373953870703393, "learning_rate": 1.7637230837229402e-06, "loss": 17.5125, "step": 40066 }, { "epoch": 0.7323925640228856, "grad_norm": 5.073513301303581, "learning_rate": 1.7634974477249394e-06, "loss": 17.0674, "step": 40067 }, { "epoch": 0.732410843219332, "grad_norm": 5.202511410090087, "learning_rate": 1.763271823070601e-06, "loss": 16.8862, "step": 40068 }, { "epoch": 0.7324291224157786, "grad_norm": 5.801078812998043, "learning_rate": 1.7630462097607122e-06, "loss": 17.171, "step": 40069 }, { "epoch": 0.7324474016122251, "grad_norm": 5.69013483613214, "learning_rate": 1.762820607796067e-06, "loss": 17.4589, "step": 40070 }, { "epoch": 0.7324656808086717, "grad_norm": 10.056908699676338, "learning_rate": 1.762595017177453e-06, "loss": 18.0435, "step": 40071 }, { "epoch": 0.7324839600051182, "grad_norm": 6.593359571313371, "learning_rate": 1.7623694379056644e-06, "loss": 17.6363, "step": 40072 }, { "epoch": 0.7325022392015647, "grad_norm": 5.794422832533871, "learning_rate": 1.7621438699814901e-06, "loss": 17.311, "step": 40073 }, { "epoch": 0.7325205183980112, "grad_norm": 6.472317188307807, "learning_rate": 1.7619183134057189e-06, "loss": 17.3731, "step": 40074 }, { "epoch": 0.7325387975944577, "grad_norm": 6.512095429911133, "learning_rate": 1.7616927681791445e-06, "loss": 17.437, "step": 40075 }, { "epoch": 0.7325570767909043, "grad_norm": 6.797054958127774, "learning_rate": 1.7614672343025546e-06, "loss": 17.7874, "step": 40076 }, { "epoch": 0.7325753559873508, "grad_norm": 6.401041962516825, "learning_rate": 1.7612417117767406e-06, "loss": 17.4976, "step": 40077 }, { "epoch": 0.7325936351837973, "grad_norm": 7.759141863785969, "learning_rate": 1.761016200602495e-06, "loss": 18.1067, "step": 40078 }, { "epoch": 0.7326119143802439, "grad_norm": 6.3705656566655895, "learning_rate": 1.7607907007806068e-06, "loss": 17.5135, "step": 40079 }, { "epoch": 0.7326301935766903, "grad_norm": 8.815510510010302, "learning_rate": 1.7605652123118643e-06, "loss": 18.0894, "step": 40080 }, { "epoch": 0.7326484727731369, "grad_norm": 6.898109553453921, "learning_rate": 1.7603397351970613e-06, "loss": 17.4052, "step": 40081 }, { "epoch": 0.7326667519695834, "grad_norm": 10.567106683887975, "learning_rate": 1.7601142694369865e-06, "loss": 18.1296, "step": 40082 }, { "epoch": 0.7326850311660299, "grad_norm": 6.353363394913418, "learning_rate": 1.7598888150324283e-06, "loss": 17.2524, "step": 40083 }, { "epoch": 0.7327033103624765, "grad_norm": 7.422652017683622, "learning_rate": 1.7596633719841806e-06, "loss": 17.6439, "step": 40084 }, { "epoch": 0.732721589558923, "grad_norm": 6.284210208681831, "learning_rate": 1.7594379402930296e-06, "loss": 17.461, "step": 40085 }, { "epoch": 0.7327398687553696, "grad_norm": 5.523794509773913, "learning_rate": 1.7592125199597675e-06, "loss": 17.1606, "step": 40086 }, { "epoch": 0.732758147951816, "grad_norm": 7.248699791763819, "learning_rate": 1.7589871109851858e-06, "loss": 17.9704, "step": 40087 }, { "epoch": 0.7327764271482625, "grad_norm": 6.955879151283235, "learning_rate": 1.7587617133700718e-06, "loss": 17.6461, "step": 40088 }, { "epoch": 0.7327947063447091, "grad_norm": 6.892377965805742, "learning_rate": 1.7585363271152178e-06, "loss": 17.6481, "step": 40089 }, { "epoch": 0.7328129855411556, "grad_norm": 6.318864535441415, "learning_rate": 1.758310952221413e-06, "loss": 17.6524, "step": 40090 }, { "epoch": 0.7328312647376022, "grad_norm": 6.057114899051877, "learning_rate": 1.7580855886894454e-06, "loss": 17.5572, "step": 40091 }, { "epoch": 0.7328495439340487, "grad_norm": 6.198029804551043, "learning_rate": 1.757860236520108e-06, "loss": 17.2998, "step": 40092 }, { "epoch": 0.7328678231304951, "grad_norm": 5.8686652264660975, "learning_rate": 1.7576348957141898e-06, "loss": 17.0655, "step": 40093 }, { "epoch": 0.7328861023269417, "grad_norm": 6.362947410713803, "learning_rate": 1.7574095662724777e-06, "loss": 17.3616, "step": 40094 }, { "epoch": 0.7329043815233882, "grad_norm": 6.477357243306707, "learning_rate": 1.7571842481957634e-06, "loss": 17.2526, "step": 40095 }, { "epoch": 0.7329226607198348, "grad_norm": 5.98863792720464, "learning_rate": 1.7569589414848388e-06, "loss": 17.2128, "step": 40096 }, { "epoch": 0.7329409399162813, "grad_norm": 6.649513645064077, "learning_rate": 1.7567336461404905e-06, "loss": 17.268, "step": 40097 }, { "epoch": 0.7329592191127278, "grad_norm": 6.659208937864625, "learning_rate": 1.7565083621635105e-06, "loss": 17.4237, "step": 40098 }, { "epoch": 0.7329774983091744, "grad_norm": 8.293715586171746, "learning_rate": 1.7562830895546873e-06, "loss": 18.4473, "step": 40099 }, { "epoch": 0.7329957775056208, "grad_norm": 7.1591513662992785, "learning_rate": 1.7560578283148088e-06, "loss": 17.6988, "step": 40100 }, { "epoch": 0.7330140567020674, "grad_norm": 9.7070373694978, "learning_rate": 1.7558325784446677e-06, "loss": 18.84, "step": 40101 }, { "epoch": 0.7330323358985139, "grad_norm": 6.930047951051364, "learning_rate": 1.7556073399450501e-06, "loss": 17.813, "step": 40102 }, { "epoch": 0.7330506150949604, "grad_norm": 6.116045544033158, "learning_rate": 1.7553821128167476e-06, "loss": 17.2027, "step": 40103 }, { "epoch": 0.733068894291407, "grad_norm": 5.9079422700342255, "learning_rate": 1.7551568970605503e-06, "loss": 17.11, "step": 40104 }, { "epoch": 0.7330871734878535, "grad_norm": 5.778676286512635, "learning_rate": 1.7549316926772453e-06, "loss": 17.3296, "step": 40105 }, { "epoch": 0.7331054526843, "grad_norm": 5.487832457743216, "learning_rate": 1.7547064996676249e-06, "loss": 17.0306, "step": 40106 }, { "epoch": 0.7331237318807465, "grad_norm": 5.512668582082829, "learning_rate": 1.7544813180324766e-06, "loss": 17.1946, "step": 40107 }, { "epoch": 0.733142011077193, "grad_norm": 6.268357203096707, "learning_rate": 1.7542561477725878e-06, "loss": 17.1819, "step": 40108 }, { "epoch": 0.7331602902736396, "grad_norm": 5.209711860519398, "learning_rate": 1.7540309888887513e-06, "loss": 17.2729, "step": 40109 }, { "epoch": 0.7331785694700861, "grad_norm": 6.663928723728273, "learning_rate": 1.7538058413817543e-06, "loss": 17.2052, "step": 40110 }, { "epoch": 0.7331968486665327, "grad_norm": 4.975901257189617, "learning_rate": 1.7535807052523845e-06, "loss": 16.8922, "step": 40111 }, { "epoch": 0.7332151278629792, "grad_norm": 5.980782648730183, "learning_rate": 1.753355580501433e-06, "loss": 17.638, "step": 40112 }, { "epoch": 0.7332334070594256, "grad_norm": 5.919956123958795, "learning_rate": 1.7531304671296901e-06, "loss": 17.6592, "step": 40113 }, { "epoch": 0.7332516862558722, "grad_norm": 6.170409634425412, "learning_rate": 1.7529053651379407e-06, "loss": 17.0031, "step": 40114 }, { "epoch": 0.7332699654523187, "grad_norm": 5.524630737281941, "learning_rate": 1.7526802745269788e-06, "loss": 17.0602, "step": 40115 }, { "epoch": 0.7332882446487652, "grad_norm": 6.252434934209826, "learning_rate": 1.7524551952975904e-06, "loss": 17.4921, "step": 40116 }, { "epoch": 0.7333065238452118, "grad_norm": 6.3758663487863725, "learning_rate": 1.7522301274505631e-06, "loss": 17.2413, "step": 40117 }, { "epoch": 0.7333248030416583, "grad_norm": 5.323149534116769, "learning_rate": 1.7520050709866887e-06, "loss": 17.1791, "step": 40118 }, { "epoch": 0.7333430822381048, "grad_norm": 6.25359283763567, "learning_rate": 1.7517800259067536e-06, "loss": 17.4893, "step": 40119 }, { "epoch": 0.7333613614345513, "grad_norm": 6.574135379532781, "learning_rate": 1.7515549922115487e-06, "loss": 17.8587, "step": 40120 }, { "epoch": 0.7333796406309978, "grad_norm": 5.304954433358347, "learning_rate": 1.7513299699018604e-06, "loss": 16.9049, "step": 40121 }, { "epoch": 0.7333979198274444, "grad_norm": 5.283312477157595, "learning_rate": 1.751104958978479e-06, "loss": 17.3529, "step": 40122 }, { "epoch": 0.7334161990238909, "grad_norm": 7.037958487024185, "learning_rate": 1.7508799594421938e-06, "loss": 17.7011, "step": 40123 }, { "epoch": 0.7334344782203375, "grad_norm": 5.516563292069407, "learning_rate": 1.7506549712937926e-06, "loss": 17.1531, "step": 40124 }, { "epoch": 0.733452757416784, "grad_norm": 5.942006754335745, "learning_rate": 1.7504299945340625e-06, "loss": 17.3547, "step": 40125 }, { "epoch": 0.7334710366132304, "grad_norm": 5.96554182489341, "learning_rate": 1.7502050291637945e-06, "loss": 17.5734, "step": 40126 }, { "epoch": 0.733489315809677, "grad_norm": 5.095743715710485, "learning_rate": 1.7499800751837759e-06, "loss": 17.1173, "step": 40127 }, { "epoch": 0.7335075950061235, "grad_norm": 6.851613629984982, "learning_rate": 1.7497551325947931e-06, "loss": 17.7128, "step": 40128 }, { "epoch": 0.7335258742025701, "grad_norm": 5.505929516106891, "learning_rate": 1.7495302013976389e-06, "loss": 17.7069, "step": 40129 }, { "epoch": 0.7335441533990166, "grad_norm": 6.038820419586007, "learning_rate": 1.7493052815930973e-06, "loss": 17.3999, "step": 40130 }, { "epoch": 0.733562432595463, "grad_norm": 5.8049524271797885, "learning_rate": 1.7490803731819583e-06, "loss": 17.2248, "step": 40131 }, { "epoch": 0.7335807117919096, "grad_norm": 6.609071663902686, "learning_rate": 1.7488554761650128e-06, "loss": 17.7456, "step": 40132 }, { "epoch": 0.7335989909883561, "grad_norm": 5.459564951848143, "learning_rate": 1.7486305905430446e-06, "loss": 17.1258, "step": 40133 }, { "epoch": 0.7336172701848027, "grad_norm": 9.211197192535074, "learning_rate": 1.748405716316846e-06, "loss": 18.3982, "step": 40134 }, { "epoch": 0.7336355493812492, "grad_norm": 6.58053880939736, "learning_rate": 1.748180853487203e-06, "loss": 17.4667, "step": 40135 }, { "epoch": 0.7336538285776957, "grad_norm": 5.66820967140605, "learning_rate": 1.7479560020549018e-06, "loss": 17.9994, "step": 40136 }, { "epoch": 0.7336721077741423, "grad_norm": 6.40073792821486, "learning_rate": 1.747731162020735e-06, "loss": 17.3782, "step": 40137 }, { "epoch": 0.7336903869705887, "grad_norm": 7.449240627223101, "learning_rate": 1.747506333385488e-06, "loss": 17.5297, "step": 40138 }, { "epoch": 0.7337086661670353, "grad_norm": 7.866965838749837, "learning_rate": 1.7472815161499473e-06, "loss": 18.1575, "step": 40139 }, { "epoch": 0.7337269453634818, "grad_norm": 6.421160259577664, "learning_rate": 1.7470567103149028e-06, "loss": 17.2486, "step": 40140 }, { "epoch": 0.7337452245599283, "grad_norm": 6.8642639517234265, "learning_rate": 1.7468319158811442e-06, "loss": 17.2085, "step": 40141 }, { "epoch": 0.7337635037563749, "grad_norm": 6.884385836210496, "learning_rate": 1.7466071328494554e-06, "loss": 17.1767, "step": 40142 }, { "epoch": 0.7337817829528214, "grad_norm": 4.638782558210554, "learning_rate": 1.7463823612206281e-06, "loss": 16.8316, "step": 40143 }, { "epoch": 0.733800062149268, "grad_norm": 6.314713980108017, "learning_rate": 1.7461576009954478e-06, "loss": 17.3761, "step": 40144 }, { "epoch": 0.7338183413457144, "grad_norm": 6.841411607621984, "learning_rate": 1.7459328521747016e-06, "loss": 17.8774, "step": 40145 }, { "epoch": 0.7338366205421609, "grad_norm": 6.812182236905962, "learning_rate": 1.7457081147591798e-06, "loss": 17.5949, "step": 40146 }, { "epoch": 0.7338548997386075, "grad_norm": 6.048591966592954, "learning_rate": 1.7454833887496692e-06, "loss": 16.9852, "step": 40147 }, { "epoch": 0.733873178935054, "grad_norm": 7.386886403167459, "learning_rate": 1.7452586741469546e-06, "loss": 17.6694, "step": 40148 }, { "epoch": 0.7338914581315006, "grad_norm": 7.003278142709092, "learning_rate": 1.745033970951826e-06, "loss": 17.8778, "step": 40149 }, { "epoch": 0.7339097373279471, "grad_norm": 6.375074329075433, "learning_rate": 1.7448092791650712e-06, "loss": 17.341, "step": 40150 }, { "epoch": 0.7339280165243935, "grad_norm": 5.679157785499856, "learning_rate": 1.7445845987874787e-06, "loss": 17.413, "step": 40151 }, { "epoch": 0.7339462957208401, "grad_norm": 6.521500602623803, "learning_rate": 1.744359929819835e-06, "loss": 17.3975, "step": 40152 }, { "epoch": 0.7339645749172866, "grad_norm": 5.90168399235684, "learning_rate": 1.7441352722629256e-06, "loss": 17.2862, "step": 40153 }, { "epoch": 0.7339828541137332, "grad_norm": 7.207761456954545, "learning_rate": 1.743910626117541e-06, "loss": 17.4479, "step": 40154 }, { "epoch": 0.7340011333101797, "grad_norm": 7.020401335903217, "learning_rate": 1.7436859913844672e-06, "loss": 17.458, "step": 40155 }, { "epoch": 0.7340194125066262, "grad_norm": 6.1777758230639215, "learning_rate": 1.7434613680644897e-06, "loss": 17.2435, "step": 40156 }, { "epoch": 0.7340376917030728, "grad_norm": 5.141813575080604, "learning_rate": 1.743236756158399e-06, "loss": 16.8786, "step": 40157 }, { "epoch": 0.7340559708995192, "grad_norm": 5.369690247943649, "learning_rate": 1.7430121556669794e-06, "loss": 16.8834, "step": 40158 }, { "epoch": 0.7340742500959658, "grad_norm": 5.915241624422117, "learning_rate": 1.7427875665910198e-06, "loss": 17.3453, "step": 40159 }, { "epoch": 0.7340925292924123, "grad_norm": 5.320006731372834, "learning_rate": 1.7425629889313083e-06, "loss": 17.1743, "step": 40160 }, { "epoch": 0.7341108084888588, "grad_norm": 5.328678577037648, "learning_rate": 1.7423384226886315e-06, "loss": 17.2882, "step": 40161 }, { "epoch": 0.7341290876853054, "grad_norm": 6.0894830778973, "learning_rate": 1.7421138678637733e-06, "loss": 17.5711, "step": 40162 }, { "epoch": 0.7341473668817519, "grad_norm": 5.686608620331851, "learning_rate": 1.7418893244575257e-06, "loss": 17.2098, "step": 40163 }, { "epoch": 0.7341656460781985, "grad_norm": 5.836717882455965, "learning_rate": 1.7416647924706714e-06, "loss": 17.2526, "step": 40164 }, { "epoch": 0.7341839252746449, "grad_norm": 6.042763354627989, "learning_rate": 1.7414402719040008e-06, "loss": 17.3299, "step": 40165 }, { "epoch": 0.7342022044710914, "grad_norm": 7.110617065013169, "learning_rate": 1.7412157627582977e-06, "loss": 17.9485, "step": 40166 }, { "epoch": 0.734220483667538, "grad_norm": 7.537829856161488, "learning_rate": 1.7409912650343524e-06, "loss": 17.7309, "step": 40167 }, { "epoch": 0.7342387628639845, "grad_norm": 6.71698428631793, "learning_rate": 1.7407667787329479e-06, "loss": 17.7234, "step": 40168 }, { "epoch": 0.7342570420604311, "grad_norm": 7.329102911983511, "learning_rate": 1.7405423038548747e-06, "loss": 17.7624, "step": 40169 }, { "epoch": 0.7342753212568776, "grad_norm": 7.93994690059617, "learning_rate": 1.7403178404009164e-06, "loss": 18.1907, "step": 40170 }, { "epoch": 0.734293600453324, "grad_norm": 5.74762569090187, "learning_rate": 1.740093388371863e-06, "loss": 17.4846, "step": 40171 }, { "epoch": 0.7343118796497706, "grad_norm": 5.817453737291571, "learning_rate": 1.7398689477684994e-06, "loss": 17.2364, "step": 40172 }, { "epoch": 0.7343301588462171, "grad_norm": 5.1557591189324325, "learning_rate": 1.7396445185916105e-06, "loss": 17.0847, "step": 40173 }, { "epoch": 0.7343484380426637, "grad_norm": 6.08283687079633, "learning_rate": 1.7394201008419858e-06, "loss": 17.3501, "step": 40174 }, { "epoch": 0.7343667172391102, "grad_norm": 6.448560760793484, "learning_rate": 1.7391956945204097e-06, "loss": 17.8506, "step": 40175 }, { "epoch": 0.7343849964355567, "grad_norm": 6.76193448460408, "learning_rate": 1.7389712996276698e-06, "loss": 17.4571, "step": 40176 }, { "epoch": 0.7344032756320032, "grad_norm": 6.771870690473926, "learning_rate": 1.7387469161645536e-06, "loss": 17.5736, "step": 40177 }, { "epoch": 0.7344215548284497, "grad_norm": 6.65150559098333, "learning_rate": 1.7385225441318466e-06, "loss": 17.7064, "step": 40178 }, { "epoch": 0.7344398340248963, "grad_norm": 7.377345796933745, "learning_rate": 1.7382981835303337e-06, "loss": 17.7908, "step": 40179 }, { "epoch": 0.7344581132213428, "grad_norm": 7.3405790139851055, "learning_rate": 1.738073834360804e-06, "loss": 17.5147, "step": 40180 }, { "epoch": 0.7344763924177893, "grad_norm": 6.363194658754211, "learning_rate": 1.737849496624041e-06, "loss": 17.7661, "step": 40181 }, { "epoch": 0.7344946716142359, "grad_norm": 5.620599890589894, "learning_rate": 1.7376251703208342e-06, "loss": 17.2448, "step": 40182 }, { "epoch": 0.7345129508106824, "grad_norm": 5.741620342525888, "learning_rate": 1.7374008554519678e-06, "loss": 17.3437, "step": 40183 }, { "epoch": 0.7345312300071288, "grad_norm": 6.042911331837375, "learning_rate": 1.7371765520182266e-06, "loss": 17.1986, "step": 40184 }, { "epoch": 0.7345495092035754, "grad_norm": 6.094562282816864, "learning_rate": 1.7369522600203986e-06, "loss": 17.3427, "step": 40185 }, { "epoch": 0.7345677884000219, "grad_norm": 7.558679493140278, "learning_rate": 1.7367279794592718e-06, "loss": 17.9731, "step": 40186 }, { "epoch": 0.7345860675964685, "grad_norm": 5.216131867199876, "learning_rate": 1.736503710335628e-06, "loss": 17.0245, "step": 40187 }, { "epoch": 0.734604346792915, "grad_norm": 6.124632396441021, "learning_rate": 1.7362794526502575e-06, "loss": 17.4576, "step": 40188 }, { "epoch": 0.7346226259893615, "grad_norm": 5.837995566638777, "learning_rate": 1.7360552064039442e-06, "loss": 17.3826, "step": 40189 }, { "epoch": 0.734640905185808, "grad_norm": 6.038098182546085, "learning_rate": 1.7358309715974725e-06, "loss": 17.1361, "step": 40190 }, { "epoch": 0.7346591843822545, "grad_norm": 6.8285115795952445, "learning_rate": 1.7356067482316314e-06, "loss": 17.7383, "step": 40191 }, { "epoch": 0.7346774635787011, "grad_norm": 7.296759445446664, "learning_rate": 1.7353825363072058e-06, "loss": 18.0023, "step": 40192 }, { "epoch": 0.7346957427751476, "grad_norm": 6.340155532232268, "learning_rate": 1.735158335824979e-06, "loss": 17.5006, "step": 40193 }, { "epoch": 0.7347140219715941, "grad_norm": 5.5631935386742395, "learning_rate": 1.7349341467857394e-06, "loss": 17.1341, "step": 40194 }, { "epoch": 0.7347323011680407, "grad_norm": 6.990423592927073, "learning_rate": 1.734709969190272e-06, "loss": 17.7202, "step": 40195 }, { "epoch": 0.7347505803644871, "grad_norm": 4.811255396159706, "learning_rate": 1.7344858030393647e-06, "loss": 16.8885, "step": 40196 }, { "epoch": 0.7347688595609337, "grad_norm": 6.065900555530661, "learning_rate": 1.734261648333801e-06, "loss": 17.4631, "step": 40197 }, { "epoch": 0.7347871387573802, "grad_norm": 5.947940810878115, "learning_rate": 1.7340375050743653e-06, "loss": 17.1931, "step": 40198 }, { "epoch": 0.7348054179538267, "grad_norm": 7.396647166974184, "learning_rate": 1.7338133732618468e-06, "loss": 17.8003, "step": 40199 }, { "epoch": 0.7348236971502733, "grad_norm": 5.790568908148222, "learning_rate": 1.7335892528970282e-06, "loss": 17.014, "step": 40200 }, { "epoch": 0.7348419763467198, "grad_norm": 5.8187563065794246, "learning_rate": 1.7333651439806948e-06, "loss": 17.1557, "step": 40201 }, { "epoch": 0.7348602555431664, "grad_norm": 6.271025385179732, "learning_rate": 1.7331410465136344e-06, "loss": 17.5541, "step": 40202 }, { "epoch": 0.7348785347396128, "grad_norm": 6.529741226382725, "learning_rate": 1.7329169604966295e-06, "loss": 17.4596, "step": 40203 }, { "epoch": 0.7348968139360593, "grad_norm": 5.6905421965553735, "learning_rate": 1.7326928859304677e-06, "loss": 16.9862, "step": 40204 }, { "epoch": 0.7349150931325059, "grad_norm": 6.665457319061705, "learning_rate": 1.7324688228159347e-06, "loss": 17.9771, "step": 40205 }, { "epoch": 0.7349333723289524, "grad_norm": 6.478355438581015, "learning_rate": 1.7322447711538155e-06, "loss": 17.6628, "step": 40206 }, { "epoch": 0.734951651525399, "grad_norm": 5.405147708614058, "learning_rate": 1.7320207309448927e-06, "loss": 17.2899, "step": 40207 }, { "epoch": 0.7349699307218455, "grad_norm": 6.35835353454378, "learning_rate": 1.7317967021899556e-06, "loss": 17.3988, "step": 40208 }, { "epoch": 0.734988209918292, "grad_norm": 7.446962248434067, "learning_rate": 1.7315726848897879e-06, "loss": 17.894, "step": 40209 }, { "epoch": 0.7350064891147385, "grad_norm": 5.513168941157056, "learning_rate": 1.7313486790451718e-06, "loss": 17.2069, "step": 40210 }, { "epoch": 0.735024768311185, "grad_norm": 5.3384603633063685, "learning_rate": 1.731124684656897e-06, "loss": 16.9981, "step": 40211 }, { "epoch": 0.7350430475076316, "grad_norm": 8.288022914348877, "learning_rate": 1.7309007017257452e-06, "loss": 18.1838, "step": 40212 }, { "epoch": 0.7350613267040781, "grad_norm": 5.751004321250714, "learning_rate": 1.7306767302525023e-06, "loss": 17.2044, "step": 40213 }, { "epoch": 0.7350796059005246, "grad_norm": 5.1989491481051315, "learning_rate": 1.7304527702379552e-06, "loss": 17.0406, "step": 40214 }, { "epoch": 0.7350978850969712, "grad_norm": 6.838481515368296, "learning_rate": 1.730228821682886e-06, "loss": 17.6573, "step": 40215 }, { "epoch": 0.7351161642934176, "grad_norm": 6.571934868084534, "learning_rate": 1.730004884588083e-06, "loss": 17.2906, "step": 40216 }, { "epoch": 0.7351344434898642, "grad_norm": 5.562115523813797, "learning_rate": 1.729780958954329e-06, "loss": 17.2293, "step": 40217 }, { "epoch": 0.7351527226863107, "grad_norm": 5.3043597438054775, "learning_rate": 1.7295570447824073e-06, "loss": 17.1363, "step": 40218 }, { "epoch": 0.7351710018827572, "grad_norm": 8.787102822466174, "learning_rate": 1.7293331420731058e-06, "loss": 18.6367, "step": 40219 }, { "epoch": 0.7351892810792038, "grad_norm": 7.673038226004741, "learning_rate": 1.729109250827208e-06, "loss": 17.6963, "step": 40220 }, { "epoch": 0.7352075602756503, "grad_norm": 5.297962499590986, "learning_rate": 1.7288853710454967e-06, "loss": 17.2083, "step": 40221 }, { "epoch": 0.7352258394720969, "grad_norm": 6.3021722514335305, "learning_rate": 1.7286615027287585e-06, "loss": 17.4353, "step": 40222 }, { "epoch": 0.7352441186685433, "grad_norm": 5.712950836540563, "learning_rate": 1.728437645877779e-06, "loss": 17.3135, "step": 40223 }, { "epoch": 0.7352623978649898, "grad_norm": 4.567704914194408, "learning_rate": 1.7282138004933407e-06, "loss": 16.6629, "step": 40224 }, { "epoch": 0.7352806770614364, "grad_norm": 7.10611212166072, "learning_rate": 1.72798996657623e-06, "loss": 17.8234, "step": 40225 }, { "epoch": 0.7352989562578829, "grad_norm": 6.869692747908795, "learning_rate": 1.7277661441272299e-06, "loss": 17.1807, "step": 40226 }, { "epoch": 0.7353172354543295, "grad_norm": 6.75120682283136, "learning_rate": 1.7275423331471264e-06, "loss": 17.4754, "step": 40227 }, { "epoch": 0.735335514650776, "grad_norm": 9.440714524093142, "learning_rate": 1.7273185336367032e-06, "loss": 17.5221, "step": 40228 }, { "epoch": 0.7353537938472224, "grad_norm": 6.723309040837293, "learning_rate": 1.727094745596743e-06, "loss": 17.3286, "step": 40229 }, { "epoch": 0.735372073043669, "grad_norm": 7.181173465513979, "learning_rate": 1.7268709690280317e-06, "loss": 17.648, "step": 40230 }, { "epoch": 0.7353903522401155, "grad_norm": 6.188055186838678, "learning_rate": 1.7266472039313552e-06, "loss": 17.2508, "step": 40231 }, { "epoch": 0.7354086314365621, "grad_norm": 6.5227633056236645, "learning_rate": 1.7264234503074944e-06, "loss": 17.1497, "step": 40232 }, { "epoch": 0.7354269106330086, "grad_norm": 6.25212943748027, "learning_rate": 1.7261997081572374e-06, "loss": 17.0186, "step": 40233 }, { "epoch": 0.7354451898294551, "grad_norm": 6.282024520065195, "learning_rate": 1.7259759774813661e-06, "loss": 17.2997, "step": 40234 }, { "epoch": 0.7354634690259017, "grad_norm": 5.66371026770243, "learning_rate": 1.725752258280663e-06, "loss": 17.068, "step": 40235 }, { "epoch": 0.7354817482223481, "grad_norm": 7.609766452444529, "learning_rate": 1.725528550555916e-06, "loss": 17.8004, "step": 40236 }, { "epoch": 0.7355000274187947, "grad_norm": 7.594504251817194, "learning_rate": 1.7253048543079075e-06, "loss": 17.9744, "step": 40237 }, { "epoch": 0.7355183066152412, "grad_norm": 5.9978671450467616, "learning_rate": 1.7250811695374193e-06, "loss": 17.4601, "step": 40238 }, { "epoch": 0.7355365858116877, "grad_norm": 7.122148549784416, "learning_rate": 1.7248574962452374e-06, "loss": 17.7579, "step": 40239 }, { "epoch": 0.7355548650081343, "grad_norm": 6.576195164876943, "learning_rate": 1.7246338344321461e-06, "loss": 17.5218, "step": 40240 }, { "epoch": 0.7355731442045808, "grad_norm": 6.016997819655754, "learning_rate": 1.7244101840989307e-06, "loss": 17.3721, "step": 40241 }, { "epoch": 0.7355914234010273, "grad_norm": 13.996421287253213, "learning_rate": 1.724186545246373e-06, "loss": 19.9482, "step": 40242 }, { "epoch": 0.7356097025974738, "grad_norm": 5.37359710533072, "learning_rate": 1.7239629178752559e-06, "loss": 16.9712, "step": 40243 }, { "epoch": 0.7356279817939203, "grad_norm": 6.08939889446343, "learning_rate": 1.7237393019863657e-06, "loss": 17.4136, "step": 40244 }, { "epoch": 0.7356462609903669, "grad_norm": 5.717939618960664, "learning_rate": 1.723515697580485e-06, "loss": 17.1352, "step": 40245 }, { "epoch": 0.7356645401868134, "grad_norm": 5.291232616953178, "learning_rate": 1.7232921046583962e-06, "loss": 17.0859, "step": 40246 }, { "epoch": 0.73568281938326, "grad_norm": 6.945724217972945, "learning_rate": 1.7230685232208856e-06, "loss": 17.8409, "step": 40247 }, { "epoch": 0.7357010985797064, "grad_norm": 5.158090880705705, "learning_rate": 1.722844953268734e-06, "loss": 17.0562, "step": 40248 }, { "epoch": 0.7357193777761529, "grad_norm": 6.23981861952659, "learning_rate": 1.7226213948027265e-06, "loss": 17.5832, "step": 40249 }, { "epoch": 0.7357376569725995, "grad_norm": 5.863695290156407, "learning_rate": 1.722397847823648e-06, "loss": 17.2454, "step": 40250 }, { "epoch": 0.735755936169046, "grad_norm": 5.99937534117544, "learning_rate": 1.7221743123322804e-06, "loss": 17.3733, "step": 40251 }, { "epoch": 0.7357742153654925, "grad_norm": 5.704877638535444, "learning_rate": 1.7219507883294062e-06, "loss": 16.8764, "step": 40252 }, { "epoch": 0.7357924945619391, "grad_norm": 5.76060494574798, "learning_rate": 1.7217272758158116e-06, "loss": 17.3157, "step": 40253 }, { "epoch": 0.7358107737583856, "grad_norm": 5.530873682996205, "learning_rate": 1.7215037747922785e-06, "loss": 17.2236, "step": 40254 }, { "epoch": 0.7358290529548321, "grad_norm": 5.6923291457857745, "learning_rate": 1.7212802852595884e-06, "loss": 17.3585, "step": 40255 }, { "epoch": 0.7358473321512786, "grad_norm": 6.346609159077155, "learning_rate": 1.721056807218528e-06, "loss": 17.3602, "step": 40256 }, { "epoch": 0.7358656113477251, "grad_norm": 6.05670858529079, "learning_rate": 1.7208333406698774e-06, "loss": 17.4739, "step": 40257 }, { "epoch": 0.7358838905441717, "grad_norm": 7.128128291692101, "learning_rate": 1.7206098856144215e-06, "loss": 17.5095, "step": 40258 }, { "epoch": 0.7359021697406182, "grad_norm": 5.93967924421746, "learning_rate": 1.7203864420529449e-06, "loss": 17.2791, "step": 40259 }, { "epoch": 0.7359204489370648, "grad_norm": 6.226099171882974, "learning_rate": 1.7201630099862276e-06, "loss": 17.2691, "step": 40260 }, { "epoch": 0.7359387281335112, "grad_norm": 5.650849679939766, "learning_rate": 1.7199395894150556e-06, "loss": 17.3438, "step": 40261 }, { "epoch": 0.7359570073299577, "grad_norm": 6.089688076587943, "learning_rate": 1.7197161803402113e-06, "loss": 17.271, "step": 40262 }, { "epoch": 0.7359752865264043, "grad_norm": 7.160142455501508, "learning_rate": 1.7194927827624752e-06, "loss": 17.5275, "step": 40263 }, { "epoch": 0.7359935657228508, "grad_norm": 5.917236436274187, "learning_rate": 1.719269396682634e-06, "loss": 17.3778, "step": 40264 }, { "epoch": 0.7360118449192974, "grad_norm": 7.781509893993929, "learning_rate": 1.7190460221014687e-06, "loss": 18.1135, "step": 40265 }, { "epoch": 0.7360301241157439, "grad_norm": 6.546384225585039, "learning_rate": 1.718822659019761e-06, "loss": 17.1883, "step": 40266 }, { "epoch": 0.7360484033121903, "grad_norm": 4.870726684941684, "learning_rate": 1.7185993074382945e-06, "loss": 16.8371, "step": 40267 }, { "epoch": 0.7360666825086369, "grad_norm": 5.9916565193119515, "learning_rate": 1.7183759673578548e-06, "loss": 17.6017, "step": 40268 }, { "epoch": 0.7360849617050834, "grad_norm": 5.9661555458400946, "learning_rate": 1.7181526387792212e-06, "loss": 17.2107, "step": 40269 }, { "epoch": 0.73610324090153, "grad_norm": 4.9400524776761126, "learning_rate": 1.7179293217031789e-06, "loss": 16.9647, "step": 40270 }, { "epoch": 0.7361215200979765, "grad_norm": 10.26003427197588, "learning_rate": 1.7177060161305086e-06, "loss": 19.1044, "step": 40271 }, { "epoch": 0.736139799294423, "grad_norm": 6.780121841947114, "learning_rate": 1.717482722061995e-06, "loss": 17.4874, "step": 40272 }, { "epoch": 0.7361580784908696, "grad_norm": 5.313961456217444, "learning_rate": 1.7172594394984198e-06, "loss": 16.8722, "step": 40273 }, { "epoch": 0.736176357687316, "grad_norm": 6.685685455298688, "learning_rate": 1.7170361684405634e-06, "loss": 17.4504, "step": 40274 }, { "epoch": 0.7361946368837626, "grad_norm": 5.87998380661328, "learning_rate": 1.716812908889212e-06, "loss": 17.1812, "step": 40275 }, { "epoch": 0.7362129160802091, "grad_norm": 7.444723338422317, "learning_rate": 1.7165896608451455e-06, "loss": 17.6929, "step": 40276 }, { "epoch": 0.7362311952766556, "grad_norm": 5.884196679587678, "learning_rate": 1.716366424309147e-06, "loss": 17.4148, "step": 40277 }, { "epoch": 0.7362494744731022, "grad_norm": 7.023386543063951, "learning_rate": 1.7161431992820004e-06, "loss": 17.3511, "step": 40278 }, { "epoch": 0.7362677536695487, "grad_norm": 6.480603115196323, "learning_rate": 1.7159199857644876e-06, "loss": 17.5104, "step": 40279 }, { "epoch": 0.7362860328659953, "grad_norm": 7.430936532250577, "learning_rate": 1.7156967837573885e-06, "loss": 17.4314, "step": 40280 }, { "epoch": 0.7363043120624417, "grad_norm": 4.779048290067555, "learning_rate": 1.7154735932614886e-06, "loss": 16.8178, "step": 40281 }, { "epoch": 0.7363225912588882, "grad_norm": 6.146585043292208, "learning_rate": 1.715250414277569e-06, "loss": 17.1429, "step": 40282 }, { "epoch": 0.7363408704553348, "grad_norm": 7.65375434247266, "learning_rate": 1.7150272468064095e-06, "loss": 17.8229, "step": 40283 }, { "epoch": 0.7363591496517813, "grad_norm": 6.164610119854898, "learning_rate": 1.7148040908487961e-06, "loss": 17.3614, "step": 40284 }, { "epoch": 0.7363774288482279, "grad_norm": 5.6398507427876, "learning_rate": 1.714580946405508e-06, "loss": 17.0529, "step": 40285 }, { "epoch": 0.7363957080446744, "grad_norm": 6.487363122000377, "learning_rate": 1.7143578134773287e-06, "loss": 17.3726, "step": 40286 }, { "epoch": 0.7364139872411208, "grad_norm": 6.837470170046876, "learning_rate": 1.7141346920650414e-06, "loss": 17.5828, "step": 40287 }, { "epoch": 0.7364322664375674, "grad_norm": 7.210127363381549, "learning_rate": 1.7139115821694257e-06, "loss": 17.6893, "step": 40288 }, { "epoch": 0.7364505456340139, "grad_norm": 5.376017878824348, "learning_rate": 1.713688483791266e-06, "loss": 17.0827, "step": 40289 }, { "epoch": 0.7364688248304605, "grad_norm": 6.453291260704406, "learning_rate": 1.713465396931343e-06, "loss": 17.1804, "step": 40290 }, { "epoch": 0.736487104026907, "grad_norm": 7.308282427136245, "learning_rate": 1.713242321590437e-06, "loss": 17.7649, "step": 40291 }, { "epoch": 0.7365053832233535, "grad_norm": 5.517280324752858, "learning_rate": 1.7130192577693334e-06, "loss": 17.0693, "step": 40292 }, { "epoch": 0.7365236624198, "grad_norm": 7.089643184012938, "learning_rate": 1.7127962054688101e-06, "loss": 17.9101, "step": 40293 }, { "epoch": 0.7365419416162465, "grad_norm": 6.241282185146661, "learning_rate": 1.7125731646896527e-06, "loss": 17.4339, "step": 40294 }, { "epoch": 0.7365602208126931, "grad_norm": 6.275552835303163, "learning_rate": 1.7123501354326394e-06, "loss": 17.5344, "step": 40295 }, { "epoch": 0.7365785000091396, "grad_norm": 7.082184021259208, "learning_rate": 1.712127117698555e-06, "loss": 17.3831, "step": 40296 }, { "epoch": 0.7365967792055861, "grad_norm": 6.1887048540415, "learning_rate": 1.7119041114881784e-06, "loss": 17.5209, "step": 40297 }, { "epoch": 0.7366150584020327, "grad_norm": 7.03483947331878, "learning_rate": 1.711681116802294e-06, "loss": 17.7959, "step": 40298 }, { "epoch": 0.7366333375984792, "grad_norm": 6.748831766949041, "learning_rate": 1.711458133641682e-06, "loss": 17.4828, "step": 40299 }, { "epoch": 0.7366516167949257, "grad_norm": 5.335610490169805, "learning_rate": 1.7112351620071222e-06, "loss": 17.1173, "step": 40300 }, { "epoch": 0.7366698959913722, "grad_norm": 7.47846470365349, "learning_rate": 1.7110122018993996e-06, "loss": 17.8911, "step": 40301 }, { "epoch": 0.7366881751878187, "grad_norm": 7.560943677535369, "learning_rate": 1.7107892533192921e-06, "loss": 18.1934, "step": 40302 }, { "epoch": 0.7367064543842653, "grad_norm": 4.5655569872591775, "learning_rate": 1.7105663162675828e-06, "loss": 16.7309, "step": 40303 }, { "epoch": 0.7367247335807118, "grad_norm": 5.1227670005887695, "learning_rate": 1.7103433907450544e-06, "loss": 17.0055, "step": 40304 }, { "epoch": 0.7367430127771584, "grad_norm": 5.778956831084347, "learning_rate": 1.710120476752486e-06, "loss": 17.1766, "step": 40305 }, { "epoch": 0.7367612919736048, "grad_norm": 7.3352194664495025, "learning_rate": 1.7098975742906609e-06, "loss": 17.6453, "step": 40306 }, { "epoch": 0.7367795711700513, "grad_norm": 6.905822316980981, "learning_rate": 1.7096746833603595e-06, "loss": 17.3256, "step": 40307 }, { "epoch": 0.7367978503664979, "grad_norm": 6.1260230789564645, "learning_rate": 1.7094518039623614e-06, "loss": 17.3732, "step": 40308 }, { "epoch": 0.7368161295629444, "grad_norm": 5.5559510058209165, "learning_rate": 1.7092289360974507e-06, "loss": 17.1747, "step": 40309 }, { "epoch": 0.736834408759391, "grad_norm": 6.880215020787752, "learning_rate": 1.7090060797664066e-06, "loss": 17.7175, "step": 40310 }, { "epoch": 0.7368526879558375, "grad_norm": 6.342563850373657, "learning_rate": 1.7087832349700094e-06, "loss": 17.4412, "step": 40311 }, { "epoch": 0.736870967152284, "grad_norm": 5.394870258050256, "learning_rate": 1.7085604017090412e-06, "loss": 17.0892, "step": 40312 }, { "epoch": 0.7368892463487305, "grad_norm": 7.842353330683957, "learning_rate": 1.708337579984285e-06, "loss": 17.712, "step": 40313 }, { "epoch": 0.736907525545177, "grad_norm": 6.344861582765027, "learning_rate": 1.7081147697965183e-06, "loss": 17.0458, "step": 40314 }, { "epoch": 0.7369258047416236, "grad_norm": 7.018409219502196, "learning_rate": 1.7078919711465252e-06, "loss": 17.5285, "step": 40315 }, { "epoch": 0.7369440839380701, "grad_norm": 5.893936113812214, "learning_rate": 1.7076691840350852e-06, "loss": 17.1619, "step": 40316 }, { "epoch": 0.7369623631345166, "grad_norm": 5.443204814288571, "learning_rate": 1.7074464084629771e-06, "loss": 16.9271, "step": 40317 }, { "epoch": 0.7369806423309632, "grad_norm": 5.812641300573565, "learning_rate": 1.7072236444309853e-06, "loss": 17.1117, "step": 40318 }, { "epoch": 0.7369989215274096, "grad_norm": 6.327309214994092, "learning_rate": 1.7070008919398877e-06, "loss": 17.1808, "step": 40319 }, { "epoch": 0.7370172007238561, "grad_norm": 6.847341867000735, "learning_rate": 1.7067781509904674e-06, "loss": 17.7712, "step": 40320 }, { "epoch": 0.7370354799203027, "grad_norm": 7.2738796027898625, "learning_rate": 1.7065554215835022e-06, "loss": 17.606, "step": 40321 }, { "epoch": 0.7370537591167492, "grad_norm": 5.565620603371711, "learning_rate": 1.7063327037197747e-06, "loss": 17.0931, "step": 40322 }, { "epoch": 0.7370720383131958, "grad_norm": 6.3670244627821, "learning_rate": 1.7061099974000672e-06, "loss": 17.1062, "step": 40323 }, { "epoch": 0.7370903175096423, "grad_norm": 5.200384240699289, "learning_rate": 1.7058873026251577e-06, "loss": 17.0523, "step": 40324 }, { "epoch": 0.7371085967060887, "grad_norm": 5.649294781899133, "learning_rate": 1.705664619395826e-06, "loss": 17.1327, "step": 40325 }, { "epoch": 0.7371268759025353, "grad_norm": 6.953550388941502, "learning_rate": 1.705441947712856e-06, "loss": 17.4356, "step": 40326 }, { "epoch": 0.7371451550989818, "grad_norm": 8.09638993882424, "learning_rate": 1.705219287577025e-06, "loss": 17.7982, "step": 40327 }, { "epoch": 0.7371634342954284, "grad_norm": 6.618864066996758, "learning_rate": 1.7049966389891137e-06, "loss": 17.6143, "step": 40328 }, { "epoch": 0.7371817134918749, "grad_norm": 5.505243306401006, "learning_rate": 1.7047740019499049e-06, "loss": 17.0757, "step": 40329 }, { "epoch": 0.7371999926883214, "grad_norm": 7.500511807770931, "learning_rate": 1.7045513764601756e-06, "loss": 17.9797, "step": 40330 }, { "epoch": 0.737218271884768, "grad_norm": 6.76875068214782, "learning_rate": 1.7043287625207072e-06, "loss": 17.8239, "step": 40331 }, { "epoch": 0.7372365510812144, "grad_norm": 5.80579476755515, "learning_rate": 1.7041061601322829e-06, "loss": 17.2022, "step": 40332 }, { "epoch": 0.737254830277661, "grad_norm": 5.0355570493537805, "learning_rate": 1.7038835692956784e-06, "loss": 16.9128, "step": 40333 }, { "epoch": 0.7372731094741075, "grad_norm": 6.477688557870016, "learning_rate": 1.7036609900116779e-06, "loss": 17.597, "step": 40334 }, { "epoch": 0.737291388670554, "grad_norm": 6.146991457591519, "learning_rate": 1.703438422281059e-06, "loss": 17.3021, "step": 40335 }, { "epoch": 0.7373096678670006, "grad_norm": 6.9615765036469375, "learning_rate": 1.7032158661046016e-06, "loss": 17.6359, "step": 40336 }, { "epoch": 0.7373279470634471, "grad_norm": 6.4964297492815, "learning_rate": 1.7029933214830873e-06, "loss": 17.6405, "step": 40337 }, { "epoch": 0.7373462262598937, "grad_norm": 6.053042205779334, "learning_rate": 1.7027707884172962e-06, "loss": 17.4068, "step": 40338 }, { "epoch": 0.7373645054563401, "grad_norm": 6.1043935419789905, "learning_rate": 1.702548266908005e-06, "loss": 17.3136, "step": 40339 }, { "epoch": 0.7373827846527866, "grad_norm": 8.249843400073038, "learning_rate": 1.702325756955996e-06, "loss": 18.2666, "step": 40340 }, { "epoch": 0.7374010638492332, "grad_norm": 6.604982910064236, "learning_rate": 1.7021032585620512e-06, "loss": 17.0111, "step": 40341 }, { "epoch": 0.7374193430456797, "grad_norm": 6.8273047942790575, "learning_rate": 1.7018807717269465e-06, "loss": 17.632, "step": 40342 }, { "epoch": 0.7374376222421263, "grad_norm": 5.932100240522792, "learning_rate": 1.7016582964514644e-06, "loss": 17.5241, "step": 40343 }, { "epoch": 0.7374559014385728, "grad_norm": 6.531091506881564, "learning_rate": 1.7014358327363845e-06, "loss": 17.9203, "step": 40344 }, { "epoch": 0.7374741806350192, "grad_norm": 7.40087861611719, "learning_rate": 1.7012133805824844e-06, "loss": 17.3504, "step": 40345 }, { "epoch": 0.7374924598314658, "grad_norm": 6.7916289928612565, "learning_rate": 1.7009909399905461e-06, "loss": 17.4975, "step": 40346 }, { "epoch": 0.7375107390279123, "grad_norm": 7.416373596851758, "learning_rate": 1.700768510961347e-06, "loss": 18.0307, "step": 40347 }, { "epoch": 0.7375290182243589, "grad_norm": 5.737865646911113, "learning_rate": 1.7005460934956696e-06, "loss": 17.2627, "step": 40348 }, { "epoch": 0.7375472974208054, "grad_norm": 6.110530953644331, "learning_rate": 1.7003236875942896e-06, "loss": 17.3096, "step": 40349 }, { "epoch": 0.7375655766172519, "grad_norm": 7.239607777754914, "learning_rate": 1.7001012932579897e-06, "loss": 17.4873, "step": 40350 }, { "epoch": 0.7375838558136985, "grad_norm": 8.131330682955198, "learning_rate": 1.6998789104875496e-06, "loss": 17.9788, "step": 40351 }, { "epoch": 0.7376021350101449, "grad_norm": 6.964373268286146, "learning_rate": 1.6996565392837478e-06, "loss": 17.9718, "step": 40352 }, { "epoch": 0.7376204142065915, "grad_norm": 5.800874046652595, "learning_rate": 1.6994341796473612e-06, "loss": 17.2205, "step": 40353 }, { "epoch": 0.737638693403038, "grad_norm": 5.974062109532082, "learning_rate": 1.6992118315791734e-06, "loss": 17.3212, "step": 40354 }, { "epoch": 0.7376569725994845, "grad_norm": 6.840132899653732, "learning_rate": 1.6989894950799619e-06, "loss": 17.5138, "step": 40355 }, { "epoch": 0.7376752517959311, "grad_norm": 7.599998520489636, "learning_rate": 1.6987671701505038e-06, "loss": 17.8779, "step": 40356 }, { "epoch": 0.7376935309923776, "grad_norm": 7.064418076599641, "learning_rate": 1.6985448567915802e-06, "loss": 17.5743, "step": 40357 }, { "epoch": 0.7377118101888241, "grad_norm": 5.2706691971914745, "learning_rate": 1.6983225550039722e-06, "loss": 17.0127, "step": 40358 }, { "epoch": 0.7377300893852706, "grad_norm": 5.967677880704914, "learning_rate": 1.698100264788456e-06, "loss": 16.9368, "step": 40359 }, { "epoch": 0.7377483685817171, "grad_norm": 6.124781688018745, "learning_rate": 1.6978779861458128e-06, "loss": 17.4297, "step": 40360 }, { "epoch": 0.7377666477781637, "grad_norm": 6.096515238536443, "learning_rate": 1.697655719076821e-06, "loss": 17.3743, "step": 40361 }, { "epoch": 0.7377849269746102, "grad_norm": 5.07187393463127, "learning_rate": 1.6974334635822576e-06, "loss": 17.112, "step": 40362 }, { "epoch": 0.7378032061710568, "grad_norm": 7.083681554430013, "learning_rate": 1.6972112196629049e-06, "loss": 17.5145, "step": 40363 }, { "epoch": 0.7378214853675032, "grad_norm": 4.899664153210716, "learning_rate": 1.696988987319539e-06, "loss": 16.937, "step": 40364 }, { "epoch": 0.7378397645639497, "grad_norm": 6.445136872473563, "learning_rate": 1.6967667665529413e-06, "loss": 17.6777, "step": 40365 }, { "epoch": 0.7378580437603963, "grad_norm": 7.516996240628682, "learning_rate": 1.6965445573638878e-06, "loss": 17.8314, "step": 40366 }, { "epoch": 0.7378763229568428, "grad_norm": 7.289091202111755, "learning_rate": 1.6963223597531591e-06, "loss": 17.8152, "step": 40367 }, { "epoch": 0.7378946021532894, "grad_norm": 6.917293741481595, "learning_rate": 1.696100173721536e-06, "loss": 17.2758, "step": 40368 }, { "epoch": 0.7379128813497359, "grad_norm": 7.772104637075387, "learning_rate": 1.6958779992697945e-06, "loss": 17.8247, "step": 40369 }, { "epoch": 0.7379311605461824, "grad_norm": 7.617073010453543, "learning_rate": 1.6956558363987124e-06, "loss": 17.839, "step": 40370 }, { "epoch": 0.7379494397426289, "grad_norm": 6.61421506162034, "learning_rate": 1.6954336851090714e-06, "loss": 17.4386, "step": 40371 }, { "epoch": 0.7379677189390754, "grad_norm": 6.2665160205581225, "learning_rate": 1.6952115454016488e-06, "loss": 17.5887, "step": 40372 }, { "epoch": 0.737985998135522, "grad_norm": 5.155089443354068, "learning_rate": 1.6949894172772209e-06, "loss": 16.9578, "step": 40373 }, { "epoch": 0.7380042773319685, "grad_norm": 6.33033208587697, "learning_rate": 1.69476730073657e-06, "loss": 17.1203, "step": 40374 }, { "epoch": 0.738022556528415, "grad_norm": 6.955765375984283, "learning_rate": 1.6945451957804716e-06, "loss": 17.6677, "step": 40375 }, { "epoch": 0.7380408357248616, "grad_norm": 5.4844509997075255, "learning_rate": 1.6943231024097051e-06, "loss": 17.088, "step": 40376 }, { "epoch": 0.738059114921308, "grad_norm": 6.236071695649573, "learning_rate": 1.6941010206250513e-06, "loss": 17.4101, "step": 40377 }, { "epoch": 0.7380773941177546, "grad_norm": 6.9555014166873965, "learning_rate": 1.6938789504272863e-06, "loss": 17.9313, "step": 40378 }, { "epoch": 0.7380956733142011, "grad_norm": 5.519288335015631, "learning_rate": 1.6936568918171875e-06, "loss": 17.1217, "step": 40379 }, { "epoch": 0.7381139525106476, "grad_norm": 5.461066149882538, "learning_rate": 1.6934348447955356e-06, "loss": 17.0951, "step": 40380 }, { "epoch": 0.7381322317070942, "grad_norm": 6.833449000867347, "learning_rate": 1.6932128093631056e-06, "loss": 17.7065, "step": 40381 }, { "epoch": 0.7381505109035407, "grad_norm": 6.560390362264792, "learning_rate": 1.6929907855206801e-06, "loss": 17.3516, "step": 40382 }, { "epoch": 0.7381687900999873, "grad_norm": 7.308721918078079, "learning_rate": 1.6927687732690346e-06, "loss": 17.447, "step": 40383 }, { "epoch": 0.7381870692964337, "grad_norm": 6.244847692961371, "learning_rate": 1.6925467726089461e-06, "loss": 17.4961, "step": 40384 }, { "epoch": 0.7382053484928802, "grad_norm": 5.3248816105705945, "learning_rate": 1.6923247835411938e-06, "loss": 17.1032, "step": 40385 }, { "epoch": 0.7382236276893268, "grad_norm": 6.715012906568172, "learning_rate": 1.6921028060665578e-06, "loss": 17.3961, "step": 40386 }, { "epoch": 0.7382419068857733, "grad_norm": 6.759321241920423, "learning_rate": 1.6918808401858134e-06, "loss": 17.3297, "step": 40387 }, { "epoch": 0.7382601860822198, "grad_norm": 5.531143553472225, "learning_rate": 1.6916588858997407e-06, "loss": 17.1698, "step": 40388 }, { "epoch": 0.7382784652786664, "grad_norm": 5.90785298015975, "learning_rate": 1.6914369432091166e-06, "loss": 17.1392, "step": 40389 }, { "epoch": 0.7382967444751128, "grad_norm": 6.444055081026731, "learning_rate": 1.6912150121147175e-06, "loss": 17.4602, "step": 40390 }, { "epoch": 0.7383150236715594, "grad_norm": 6.26776819713409, "learning_rate": 1.690993092617324e-06, "loss": 17.266, "step": 40391 }, { "epoch": 0.7383333028680059, "grad_norm": 6.7303862777732615, "learning_rate": 1.6907711847177127e-06, "loss": 17.9444, "step": 40392 }, { "epoch": 0.7383515820644524, "grad_norm": 7.94341870186824, "learning_rate": 1.6905492884166591e-06, "loss": 17.8602, "step": 40393 }, { "epoch": 0.738369861260899, "grad_norm": 5.275072060905954, "learning_rate": 1.6903274037149436e-06, "loss": 17.0798, "step": 40394 }, { "epoch": 0.7383881404573455, "grad_norm": 7.181700233406392, "learning_rate": 1.6901055306133434e-06, "loss": 17.6762, "step": 40395 }, { "epoch": 0.7384064196537921, "grad_norm": 6.685560970098697, "learning_rate": 1.6898836691126374e-06, "loss": 17.5624, "step": 40396 }, { "epoch": 0.7384246988502385, "grad_norm": 6.603422874584935, "learning_rate": 1.6896618192136022e-06, "loss": 17.4263, "step": 40397 }, { "epoch": 0.738442978046685, "grad_norm": 8.6444288183137, "learning_rate": 1.6894399809170132e-06, "loss": 18.4106, "step": 40398 }, { "epoch": 0.7384612572431316, "grad_norm": 6.902854531590509, "learning_rate": 1.689218154223652e-06, "loss": 17.9442, "step": 40399 }, { "epoch": 0.7384795364395781, "grad_norm": 5.125623711443894, "learning_rate": 1.6889963391342935e-06, "loss": 16.8982, "step": 40400 }, { "epoch": 0.7384978156360247, "grad_norm": 5.112083732012161, "learning_rate": 1.6887745356497138e-06, "loss": 16.9934, "step": 40401 }, { "epoch": 0.7385160948324712, "grad_norm": 6.493898297798346, "learning_rate": 1.6885527437706933e-06, "loss": 17.5954, "step": 40402 }, { "epoch": 0.7385343740289176, "grad_norm": 6.131281028550239, "learning_rate": 1.6883309634980066e-06, "loss": 17.0446, "step": 40403 }, { "epoch": 0.7385526532253642, "grad_norm": 6.470049176173752, "learning_rate": 1.688109194832433e-06, "loss": 17.381, "step": 40404 }, { "epoch": 0.7385709324218107, "grad_norm": 5.388451903474944, "learning_rate": 1.6878874377747506e-06, "loss": 17.1026, "step": 40405 }, { "epoch": 0.7385892116182573, "grad_norm": 7.052332627484103, "learning_rate": 1.687665692325735e-06, "loss": 17.78, "step": 40406 }, { "epoch": 0.7386074908147038, "grad_norm": 6.639400088933322, "learning_rate": 1.6874439584861623e-06, "loss": 17.5778, "step": 40407 }, { "epoch": 0.7386257700111503, "grad_norm": 4.689427659981967, "learning_rate": 1.687222236256813e-06, "loss": 16.6965, "step": 40408 }, { "epoch": 0.7386440492075969, "grad_norm": 6.482496471292009, "learning_rate": 1.6870005256384608e-06, "loss": 17.6682, "step": 40409 }, { "epoch": 0.7386623284040433, "grad_norm": 5.5258105920776925, "learning_rate": 1.6867788266318853e-06, "loss": 17.0026, "step": 40410 }, { "epoch": 0.7386806076004899, "grad_norm": 4.705614883144295, "learning_rate": 1.6865571392378627e-06, "loss": 16.9654, "step": 40411 }, { "epoch": 0.7386988867969364, "grad_norm": 7.480879724749664, "learning_rate": 1.6863354634571683e-06, "loss": 17.6751, "step": 40412 }, { "epoch": 0.7387171659933829, "grad_norm": 5.486409928780913, "learning_rate": 1.6861137992905808e-06, "loss": 17.0393, "step": 40413 }, { "epoch": 0.7387354451898295, "grad_norm": 7.192533251166371, "learning_rate": 1.6858921467388784e-06, "loss": 17.9118, "step": 40414 }, { "epoch": 0.738753724386276, "grad_norm": 5.204065362773426, "learning_rate": 1.6856705058028344e-06, "loss": 17.4147, "step": 40415 }, { "epoch": 0.7387720035827225, "grad_norm": 13.582224872925345, "learning_rate": 1.6854488764832294e-06, "loss": 17.6464, "step": 40416 }, { "epoch": 0.738790282779169, "grad_norm": 6.4067016341790355, "learning_rate": 1.6852272587808388e-06, "loss": 17.6743, "step": 40417 }, { "epoch": 0.7388085619756155, "grad_norm": 6.3946677055908125, "learning_rate": 1.685005652696437e-06, "loss": 17.6, "step": 40418 }, { "epoch": 0.7388268411720621, "grad_norm": 7.336754692359802, "learning_rate": 1.6847840582308045e-06, "loss": 17.6867, "step": 40419 }, { "epoch": 0.7388451203685086, "grad_norm": 5.331177788109652, "learning_rate": 1.6845624753847146e-06, "loss": 17.024, "step": 40420 }, { "epoch": 0.7388633995649552, "grad_norm": 5.846297276499965, "learning_rate": 1.684340904158947e-06, "loss": 16.9966, "step": 40421 }, { "epoch": 0.7388816787614017, "grad_norm": 6.553764618091495, "learning_rate": 1.6841193445542753e-06, "loss": 17.4354, "step": 40422 }, { "epoch": 0.7388999579578481, "grad_norm": 6.756304286194213, "learning_rate": 1.6838977965714793e-06, "loss": 17.5202, "step": 40423 }, { "epoch": 0.7389182371542947, "grad_norm": 7.3343973571682834, "learning_rate": 1.6836762602113321e-06, "loss": 18.0477, "step": 40424 }, { "epoch": 0.7389365163507412, "grad_norm": 4.648315039124703, "learning_rate": 1.6834547354746134e-06, "loss": 16.8088, "step": 40425 }, { "epoch": 0.7389547955471878, "grad_norm": 7.374123123792475, "learning_rate": 1.6832332223620963e-06, "loss": 17.4429, "step": 40426 }, { "epoch": 0.7389730747436343, "grad_norm": 5.8369798781715065, "learning_rate": 1.683011720874561e-06, "loss": 16.9676, "step": 40427 }, { "epoch": 0.7389913539400808, "grad_norm": 5.785462406156326, "learning_rate": 1.6827902310127813e-06, "loss": 17.1475, "step": 40428 }, { "epoch": 0.7390096331365273, "grad_norm": 6.577057762745837, "learning_rate": 1.6825687527775326e-06, "loss": 17.5616, "step": 40429 }, { "epoch": 0.7390279123329738, "grad_norm": 5.000256912423383, "learning_rate": 1.6823472861695923e-06, "loss": 17.1883, "step": 40430 }, { "epoch": 0.7390461915294204, "grad_norm": 6.468080600979272, "learning_rate": 1.6821258311897387e-06, "loss": 17.3364, "step": 40431 }, { "epoch": 0.7390644707258669, "grad_norm": 5.777416855802853, "learning_rate": 1.6819043878387448e-06, "loss": 17.2785, "step": 40432 }, { "epoch": 0.7390827499223134, "grad_norm": 6.434147060823695, "learning_rate": 1.6816829561173892e-06, "loss": 17.6273, "step": 40433 }, { "epoch": 0.73910102911876, "grad_norm": 5.5001575804373175, "learning_rate": 1.6814615360264475e-06, "loss": 17.203, "step": 40434 }, { "epoch": 0.7391193083152064, "grad_norm": 5.88866052774835, "learning_rate": 1.6812401275666929e-06, "loss": 17.0061, "step": 40435 }, { "epoch": 0.739137587511653, "grad_norm": 6.12315851835246, "learning_rate": 1.6810187307389052e-06, "loss": 17.5371, "step": 40436 }, { "epoch": 0.7391558667080995, "grad_norm": 6.33660342487685, "learning_rate": 1.6807973455438592e-06, "loss": 17.373, "step": 40437 }, { "epoch": 0.739174145904546, "grad_norm": 5.82889587322666, "learning_rate": 1.680575971982329e-06, "loss": 17.2274, "step": 40438 }, { "epoch": 0.7391924251009926, "grad_norm": 5.111593944105299, "learning_rate": 1.6803546100550916e-06, "loss": 17.1413, "step": 40439 }, { "epoch": 0.7392107042974391, "grad_norm": 7.460497210488791, "learning_rate": 1.680133259762924e-06, "loss": 17.7278, "step": 40440 }, { "epoch": 0.7392289834938857, "grad_norm": 7.83157603731984, "learning_rate": 1.6799119211066018e-06, "loss": 17.9179, "step": 40441 }, { "epoch": 0.7392472626903321, "grad_norm": 6.389531692203936, "learning_rate": 1.6796905940869007e-06, "loss": 17.2598, "step": 40442 }, { "epoch": 0.7392655418867786, "grad_norm": 8.296440728333383, "learning_rate": 1.6794692787045947e-06, "loss": 18.2244, "step": 40443 }, { "epoch": 0.7392838210832252, "grad_norm": 7.456350329302853, "learning_rate": 1.6792479749604618e-06, "loss": 17.4114, "step": 40444 }, { "epoch": 0.7393021002796717, "grad_norm": 7.164454169593468, "learning_rate": 1.6790266828552764e-06, "loss": 17.5265, "step": 40445 }, { "epoch": 0.7393203794761183, "grad_norm": 5.8404549145039235, "learning_rate": 1.678805402389813e-06, "loss": 17.2114, "step": 40446 }, { "epoch": 0.7393386586725648, "grad_norm": 6.7413431128489485, "learning_rate": 1.6785841335648505e-06, "loss": 17.6466, "step": 40447 }, { "epoch": 0.7393569378690112, "grad_norm": 5.647144632302724, "learning_rate": 1.6783628763811605e-06, "loss": 17.3938, "step": 40448 }, { "epoch": 0.7393752170654578, "grad_norm": 6.415793614737091, "learning_rate": 1.6781416308395199e-06, "loss": 17.406, "step": 40449 }, { "epoch": 0.7393934962619043, "grad_norm": 4.996263080322013, "learning_rate": 1.6779203969407065e-06, "loss": 16.6906, "step": 40450 }, { "epoch": 0.7394117754583509, "grad_norm": 6.908455907873091, "learning_rate": 1.6776991746854943e-06, "loss": 17.2753, "step": 40451 }, { "epoch": 0.7394300546547974, "grad_norm": 5.444561057945415, "learning_rate": 1.6774779640746564e-06, "loss": 17.0341, "step": 40452 }, { "epoch": 0.7394483338512439, "grad_norm": 6.485128960700143, "learning_rate": 1.677256765108971e-06, "loss": 17.4423, "step": 40453 }, { "epoch": 0.7394666130476905, "grad_norm": 6.788477185465733, "learning_rate": 1.6770355777892128e-06, "loss": 17.6575, "step": 40454 }, { "epoch": 0.7394848922441369, "grad_norm": 6.452934926584773, "learning_rate": 1.6768144021161548e-06, "loss": 17.3065, "step": 40455 }, { "epoch": 0.7395031714405834, "grad_norm": 5.928262631649043, "learning_rate": 1.6765932380905758e-06, "loss": 17.3642, "step": 40456 }, { "epoch": 0.73952145063703, "grad_norm": 6.88995834629758, "learning_rate": 1.676372085713247e-06, "loss": 17.7188, "step": 40457 }, { "epoch": 0.7395397298334765, "grad_norm": 6.013820747438353, "learning_rate": 1.676150944984946e-06, "loss": 17.523, "step": 40458 }, { "epoch": 0.7395580090299231, "grad_norm": 5.044572122597513, "learning_rate": 1.675929815906449e-06, "loss": 16.9006, "step": 40459 }, { "epoch": 0.7395762882263696, "grad_norm": 8.88367551232451, "learning_rate": 1.6757086984785276e-06, "loss": 18.6326, "step": 40460 }, { "epoch": 0.739594567422816, "grad_norm": 6.259009395302826, "learning_rate": 1.675487592701961e-06, "loss": 17.436, "step": 40461 }, { "epoch": 0.7396128466192626, "grad_norm": 7.147618725147311, "learning_rate": 1.6752664985775214e-06, "loss": 17.5796, "step": 40462 }, { "epoch": 0.7396311258157091, "grad_norm": 5.703424224723377, "learning_rate": 1.6750454161059831e-06, "loss": 17.3078, "step": 40463 }, { "epoch": 0.7396494050121557, "grad_norm": 7.032416991999989, "learning_rate": 1.674824345288123e-06, "loss": 17.6561, "step": 40464 }, { "epoch": 0.7396676842086022, "grad_norm": 5.82455475880267, "learning_rate": 1.6746032861247157e-06, "loss": 17.0456, "step": 40465 }, { "epoch": 0.7396859634050487, "grad_norm": 6.515307306355042, "learning_rate": 1.6743822386165332e-06, "loss": 17.5415, "step": 40466 }, { "epoch": 0.7397042426014953, "grad_norm": 6.575631182271311, "learning_rate": 1.6741612027643529e-06, "loss": 17.8486, "step": 40467 }, { "epoch": 0.7397225217979417, "grad_norm": 6.928821979437836, "learning_rate": 1.6739401785689502e-06, "loss": 17.4843, "step": 40468 }, { "epoch": 0.7397408009943883, "grad_norm": 6.517116842834455, "learning_rate": 1.673719166031097e-06, "loss": 17.2988, "step": 40469 }, { "epoch": 0.7397590801908348, "grad_norm": 6.943932552368089, "learning_rate": 1.6734981651515708e-06, "loss": 17.5281, "step": 40470 }, { "epoch": 0.7397773593872813, "grad_norm": 7.252230033833828, "learning_rate": 1.673277175931144e-06, "loss": 17.2611, "step": 40471 }, { "epoch": 0.7397956385837279, "grad_norm": 6.918826447093558, "learning_rate": 1.6730561983705928e-06, "loss": 17.8446, "step": 40472 }, { "epoch": 0.7398139177801744, "grad_norm": 7.046868684500807, "learning_rate": 1.6728352324706916e-06, "loss": 17.5692, "step": 40473 }, { "epoch": 0.739832196976621, "grad_norm": 6.41640496702323, "learning_rate": 1.6726142782322124e-06, "loss": 17.707, "step": 40474 }, { "epoch": 0.7398504761730674, "grad_norm": 6.448368851583952, "learning_rate": 1.672393335655933e-06, "loss": 17.904, "step": 40475 }, { "epoch": 0.7398687553695139, "grad_norm": 6.044696983075673, "learning_rate": 1.6721724047426246e-06, "loss": 17.0803, "step": 40476 }, { "epoch": 0.7398870345659605, "grad_norm": 4.796494065627171, "learning_rate": 1.6719514854930629e-06, "loss": 17.0036, "step": 40477 }, { "epoch": 0.739905313762407, "grad_norm": 7.064063458859551, "learning_rate": 1.6717305779080245e-06, "loss": 17.5841, "step": 40478 }, { "epoch": 0.7399235929588536, "grad_norm": 6.442707198470001, "learning_rate": 1.6715096819882809e-06, "loss": 17.4793, "step": 40479 }, { "epoch": 0.7399418721553, "grad_norm": 5.53802636853241, "learning_rate": 1.671288797734606e-06, "loss": 17.1975, "step": 40480 }, { "epoch": 0.7399601513517465, "grad_norm": 7.149095077567437, "learning_rate": 1.6710679251477767e-06, "loss": 17.9141, "step": 40481 }, { "epoch": 0.7399784305481931, "grad_norm": 6.0106093593312195, "learning_rate": 1.670847064228565e-06, "loss": 17.3153, "step": 40482 }, { "epoch": 0.7399967097446396, "grad_norm": 8.154256882574467, "learning_rate": 1.670626214977744e-06, "loss": 18.2142, "step": 40483 }, { "epoch": 0.7400149889410862, "grad_norm": 6.058833797035263, "learning_rate": 1.6704053773960894e-06, "loss": 17.7783, "step": 40484 }, { "epoch": 0.7400332681375327, "grad_norm": 6.622857333869618, "learning_rate": 1.6701845514843767e-06, "loss": 17.2009, "step": 40485 }, { "epoch": 0.7400515473339792, "grad_norm": 6.146866151049714, "learning_rate": 1.6699637372433765e-06, "loss": 17.4007, "step": 40486 }, { "epoch": 0.7400698265304257, "grad_norm": 5.45639791397369, "learning_rate": 1.669742934673866e-06, "loss": 17.1561, "step": 40487 }, { "epoch": 0.7400881057268722, "grad_norm": 7.101751123667253, "learning_rate": 1.6695221437766157e-06, "loss": 17.7251, "step": 40488 }, { "epoch": 0.7401063849233188, "grad_norm": 5.599094444624527, "learning_rate": 1.669301364552403e-06, "loss": 17.1357, "step": 40489 }, { "epoch": 0.7401246641197653, "grad_norm": 6.89772171256317, "learning_rate": 1.6690805970020001e-06, "loss": 17.8894, "step": 40490 }, { "epoch": 0.7401429433162118, "grad_norm": 6.220277390165321, "learning_rate": 1.6688598411261793e-06, "loss": 17.1669, "step": 40491 }, { "epoch": 0.7401612225126584, "grad_norm": 5.477496348514073, "learning_rate": 1.6686390969257165e-06, "loss": 17.1128, "step": 40492 }, { "epoch": 0.7401795017091048, "grad_norm": 8.126755780705034, "learning_rate": 1.6684183644013834e-06, "loss": 17.8646, "step": 40493 }, { "epoch": 0.7401977809055514, "grad_norm": 4.835579478165692, "learning_rate": 1.6681976435539555e-06, "loss": 16.8213, "step": 40494 }, { "epoch": 0.7402160601019979, "grad_norm": 7.373896792887852, "learning_rate": 1.6679769343842067e-06, "loss": 17.9957, "step": 40495 }, { "epoch": 0.7402343392984444, "grad_norm": 5.99509293880669, "learning_rate": 1.6677562368929095e-06, "loss": 17.4974, "step": 40496 }, { "epoch": 0.740252618494891, "grad_norm": 5.513213025231991, "learning_rate": 1.6675355510808362e-06, "loss": 17.2466, "step": 40497 }, { "epoch": 0.7402708976913375, "grad_norm": 6.526937193923613, "learning_rate": 1.667314876948763e-06, "loss": 17.753, "step": 40498 }, { "epoch": 0.7402891768877841, "grad_norm": 6.179907624092397, "learning_rate": 1.6670942144974623e-06, "loss": 17.5463, "step": 40499 }, { "epoch": 0.7403074560842305, "grad_norm": 5.870931111129396, "learning_rate": 1.666873563727705e-06, "loss": 17.4968, "step": 40500 }, { "epoch": 0.740325735280677, "grad_norm": 4.87044760048939, "learning_rate": 1.6666529246402686e-06, "loss": 16.8791, "step": 40501 }, { "epoch": 0.7403440144771236, "grad_norm": 8.231245316097155, "learning_rate": 1.6664322972359232e-06, "loss": 18.2542, "step": 40502 }, { "epoch": 0.7403622936735701, "grad_norm": 6.475308329588433, "learning_rate": 1.6662116815154428e-06, "loss": 17.634, "step": 40503 }, { "epoch": 0.7403805728700167, "grad_norm": 7.279036338392636, "learning_rate": 1.665991077479603e-06, "loss": 17.9237, "step": 40504 }, { "epoch": 0.7403988520664632, "grad_norm": 7.9525385333288945, "learning_rate": 1.6657704851291734e-06, "loss": 18.2801, "step": 40505 }, { "epoch": 0.7404171312629096, "grad_norm": 8.594532364885263, "learning_rate": 1.6655499044649304e-06, "loss": 17.9394, "step": 40506 }, { "epoch": 0.7404354104593562, "grad_norm": 7.380142143104152, "learning_rate": 1.665329335487646e-06, "loss": 18.1205, "step": 40507 }, { "epoch": 0.7404536896558027, "grad_norm": 7.008902761790903, "learning_rate": 1.6651087781980908e-06, "loss": 17.7314, "step": 40508 }, { "epoch": 0.7404719688522493, "grad_norm": 6.491660391598426, "learning_rate": 1.6648882325970422e-06, "loss": 17.3558, "step": 40509 }, { "epoch": 0.7404902480486958, "grad_norm": 7.481042962952837, "learning_rate": 1.6646676986852705e-06, "loss": 17.3468, "step": 40510 }, { "epoch": 0.7405085272451423, "grad_norm": 5.87973941449793, "learning_rate": 1.6644471764635472e-06, "loss": 16.9821, "step": 40511 }, { "epoch": 0.7405268064415889, "grad_norm": 5.90588755148673, "learning_rate": 1.6642266659326478e-06, "loss": 17.4898, "step": 40512 }, { "epoch": 0.7405450856380353, "grad_norm": 6.193626110252771, "learning_rate": 1.6640061670933455e-06, "loss": 17.1422, "step": 40513 }, { "epoch": 0.7405633648344819, "grad_norm": 7.28707673181931, "learning_rate": 1.6637856799464108e-06, "loss": 17.8167, "step": 40514 }, { "epoch": 0.7405816440309284, "grad_norm": 10.297339825356831, "learning_rate": 1.6635652044926194e-06, "loss": 18.3056, "step": 40515 }, { "epoch": 0.7405999232273749, "grad_norm": 6.897096809196513, "learning_rate": 1.6633447407327425e-06, "loss": 17.8462, "step": 40516 }, { "epoch": 0.7406182024238215, "grad_norm": 5.4896108631001725, "learning_rate": 1.663124288667551e-06, "loss": 17.0837, "step": 40517 }, { "epoch": 0.740636481620268, "grad_norm": 7.618408151379167, "learning_rate": 1.662903848297821e-06, "loss": 17.8925, "step": 40518 }, { "epoch": 0.7406547608167146, "grad_norm": 5.978064664662144, "learning_rate": 1.6626834196243218e-06, "loss": 17.5297, "step": 40519 }, { "epoch": 0.740673040013161, "grad_norm": 10.990884056408259, "learning_rate": 1.6624630026478288e-06, "loss": 17.6088, "step": 40520 }, { "epoch": 0.7406913192096075, "grad_norm": 6.167021917242442, "learning_rate": 1.6622425973691126e-06, "loss": 17.57, "step": 40521 }, { "epoch": 0.7407095984060541, "grad_norm": 7.49381062229476, "learning_rate": 1.662022203788946e-06, "loss": 17.5433, "step": 40522 }, { "epoch": 0.7407278776025006, "grad_norm": 4.937374559716444, "learning_rate": 1.6618018219081034e-06, "loss": 16.9938, "step": 40523 }, { "epoch": 0.7407461567989471, "grad_norm": 5.9787629873605415, "learning_rate": 1.661581451727356e-06, "loss": 17.4956, "step": 40524 }, { "epoch": 0.7407644359953937, "grad_norm": 6.676195963850708, "learning_rate": 1.661361093247474e-06, "loss": 17.5384, "step": 40525 }, { "epoch": 0.7407827151918401, "grad_norm": 5.659570147746219, "learning_rate": 1.6611407464692337e-06, "loss": 17.0126, "step": 40526 }, { "epoch": 0.7408009943882867, "grad_norm": 5.396760196860016, "learning_rate": 1.6609204113934048e-06, "loss": 17.2005, "step": 40527 }, { "epoch": 0.7408192735847332, "grad_norm": 5.334148274183961, "learning_rate": 1.6607000880207586e-06, "loss": 16.9545, "step": 40528 }, { "epoch": 0.7408375527811797, "grad_norm": 5.996410211757592, "learning_rate": 1.6604797763520709e-06, "loss": 17.6683, "step": 40529 }, { "epoch": 0.7408558319776263, "grad_norm": 5.909153063391521, "learning_rate": 1.6602594763881096e-06, "loss": 17.338, "step": 40530 }, { "epoch": 0.7408741111740728, "grad_norm": 7.010689730741529, "learning_rate": 1.6600391881296497e-06, "loss": 17.9605, "step": 40531 }, { "epoch": 0.7408923903705193, "grad_norm": 5.201923560949645, "learning_rate": 1.6598189115774637e-06, "loss": 17.1037, "step": 40532 }, { "epoch": 0.7409106695669658, "grad_norm": 5.2187325670968985, "learning_rate": 1.659598646732321e-06, "loss": 17.1121, "step": 40533 }, { "epoch": 0.7409289487634123, "grad_norm": 4.961170628336418, "learning_rate": 1.659378393594997e-06, "loss": 17.1336, "step": 40534 }, { "epoch": 0.7409472279598589, "grad_norm": 7.117718795525935, "learning_rate": 1.659158152166262e-06, "loss": 17.8804, "step": 40535 }, { "epoch": 0.7409655071563054, "grad_norm": 7.759434237601188, "learning_rate": 1.6589379224468855e-06, "loss": 17.8174, "step": 40536 }, { "epoch": 0.740983786352752, "grad_norm": 6.371586262412304, "learning_rate": 1.658717704437644e-06, "loss": 17.3929, "step": 40537 }, { "epoch": 0.7410020655491985, "grad_norm": 7.170420275513812, "learning_rate": 1.6584974981393065e-06, "loss": 17.5892, "step": 40538 }, { "epoch": 0.7410203447456449, "grad_norm": 6.193105354380949, "learning_rate": 1.658277303552644e-06, "loss": 17.4342, "step": 40539 }, { "epoch": 0.7410386239420915, "grad_norm": 7.436648550022727, "learning_rate": 1.658057120678429e-06, "loss": 17.3028, "step": 40540 }, { "epoch": 0.741056903138538, "grad_norm": 4.691895554795172, "learning_rate": 1.6578369495174363e-06, "loss": 16.9041, "step": 40541 }, { "epoch": 0.7410751823349846, "grad_norm": 5.1053172018814355, "learning_rate": 1.6576167900704326e-06, "loss": 17.1459, "step": 40542 }, { "epoch": 0.7410934615314311, "grad_norm": 8.371188311443499, "learning_rate": 1.6573966423381943e-06, "loss": 18.3979, "step": 40543 }, { "epoch": 0.7411117407278776, "grad_norm": 7.400798207085693, "learning_rate": 1.65717650632149e-06, "loss": 17.3746, "step": 40544 }, { "epoch": 0.7411300199243241, "grad_norm": 5.33913872629482, "learning_rate": 1.656956382021091e-06, "loss": 17.0322, "step": 40545 }, { "epoch": 0.7411482991207706, "grad_norm": 6.107430230112621, "learning_rate": 1.6567362694377715e-06, "loss": 17.5249, "step": 40546 }, { "epoch": 0.7411665783172172, "grad_norm": 6.463227253583548, "learning_rate": 1.6565161685722992e-06, "loss": 17.6066, "step": 40547 }, { "epoch": 0.7411848575136637, "grad_norm": 6.0243723270336265, "learning_rate": 1.656296079425449e-06, "loss": 17.4941, "step": 40548 }, { "epoch": 0.7412031367101102, "grad_norm": 6.408575513696968, "learning_rate": 1.6560760019979899e-06, "loss": 17.4279, "step": 40549 }, { "epoch": 0.7412214159065568, "grad_norm": 6.6606875779945005, "learning_rate": 1.655855936290694e-06, "loss": 17.4841, "step": 40550 }, { "epoch": 0.7412396951030032, "grad_norm": 5.916469393641226, "learning_rate": 1.6556358823043345e-06, "loss": 17.3329, "step": 40551 }, { "epoch": 0.7412579742994498, "grad_norm": 6.525150119006835, "learning_rate": 1.6554158400396809e-06, "loss": 17.4375, "step": 40552 }, { "epoch": 0.7412762534958963, "grad_norm": 5.734591340482328, "learning_rate": 1.6551958094975029e-06, "loss": 17.0861, "step": 40553 }, { "epoch": 0.7412945326923428, "grad_norm": 5.862887624907331, "learning_rate": 1.6549757906785751e-06, "loss": 17.3158, "step": 40554 }, { "epoch": 0.7413128118887894, "grad_norm": 6.714712991396639, "learning_rate": 1.654755783583667e-06, "loss": 17.5433, "step": 40555 }, { "epoch": 0.7413310910852359, "grad_norm": 5.496467223277018, "learning_rate": 1.6545357882135476e-06, "loss": 17.23, "step": 40556 }, { "epoch": 0.7413493702816825, "grad_norm": 6.950104872775118, "learning_rate": 1.65431580456899e-06, "loss": 17.6573, "step": 40557 }, { "epoch": 0.7413676494781289, "grad_norm": 6.714386174181153, "learning_rate": 1.6540958326507673e-06, "loss": 17.5758, "step": 40558 }, { "epoch": 0.7413859286745754, "grad_norm": 6.709904551320272, "learning_rate": 1.653875872459646e-06, "loss": 17.5238, "step": 40559 }, { "epoch": 0.741404207871022, "grad_norm": 5.690062686323397, "learning_rate": 1.6536559239964017e-06, "loss": 17.2494, "step": 40560 }, { "epoch": 0.7414224870674685, "grad_norm": 6.57790041157177, "learning_rate": 1.6534359872618028e-06, "loss": 17.1891, "step": 40561 }, { "epoch": 0.7414407662639151, "grad_norm": 5.388539426735823, "learning_rate": 1.6532160622566185e-06, "loss": 17.1206, "step": 40562 }, { "epoch": 0.7414590454603616, "grad_norm": 6.980986744301755, "learning_rate": 1.6529961489816232e-06, "loss": 17.8198, "step": 40563 }, { "epoch": 0.741477324656808, "grad_norm": 5.560348223806419, "learning_rate": 1.6527762474375842e-06, "loss": 17.1587, "step": 40564 }, { "epoch": 0.7414956038532546, "grad_norm": 4.930007211984985, "learning_rate": 1.6525563576252762e-06, "loss": 16.9413, "step": 40565 }, { "epoch": 0.7415138830497011, "grad_norm": 5.817685366513802, "learning_rate": 1.6523364795454654e-06, "loss": 17.1895, "step": 40566 }, { "epoch": 0.7415321622461477, "grad_norm": 5.11645674566091, "learning_rate": 1.6521166131989253e-06, "loss": 17.0259, "step": 40567 }, { "epoch": 0.7415504414425942, "grad_norm": 5.81043799254603, "learning_rate": 1.651896758586427e-06, "loss": 17.2186, "step": 40568 }, { "epoch": 0.7415687206390407, "grad_norm": 5.477858817811624, "learning_rate": 1.6516769157087404e-06, "loss": 16.9325, "step": 40569 }, { "epoch": 0.7415869998354873, "grad_norm": 6.218388805397645, "learning_rate": 1.6514570845666339e-06, "loss": 17.1263, "step": 40570 }, { "epoch": 0.7416052790319337, "grad_norm": 13.959874240515246, "learning_rate": 1.6512372651608816e-06, "loss": 18.7339, "step": 40571 }, { "epoch": 0.7416235582283803, "grad_norm": 8.336313055946407, "learning_rate": 1.6510174574922516e-06, "loss": 18.0539, "step": 40572 }, { "epoch": 0.7416418374248268, "grad_norm": 5.625280616819865, "learning_rate": 1.650797661561514e-06, "loss": 17.1619, "step": 40573 }, { "epoch": 0.7416601166212733, "grad_norm": 8.196532473252937, "learning_rate": 1.6505778773694409e-06, "loss": 17.7073, "step": 40574 }, { "epoch": 0.7416783958177199, "grad_norm": 5.133297140538748, "learning_rate": 1.6503581049168005e-06, "loss": 17.1237, "step": 40575 }, { "epoch": 0.7416966750141664, "grad_norm": 5.528641410213792, "learning_rate": 1.650138344204364e-06, "loss": 17.0483, "step": 40576 }, { "epoch": 0.741714954210613, "grad_norm": 6.7208412205677845, "learning_rate": 1.6499185952329032e-06, "loss": 17.222, "step": 40577 }, { "epoch": 0.7417332334070594, "grad_norm": 6.413834873732037, "learning_rate": 1.6496988580031858e-06, "loss": 17.4476, "step": 40578 }, { "epoch": 0.7417515126035059, "grad_norm": 7.985512638057706, "learning_rate": 1.6494791325159849e-06, "loss": 18.0341, "step": 40579 }, { "epoch": 0.7417697917999525, "grad_norm": 5.51961304475814, "learning_rate": 1.649259418772069e-06, "loss": 16.9697, "step": 40580 }, { "epoch": 0.741788070996399, "grad_norm": 5.894240364574496, "learning_rate": 1.6490397167722061e-06, "loss": 17.323, "step": 40581 }, { "epoch": 0.7418063501928456, "grad_norm": 6.819351883484612, "learning_rate": 1.64882002651717e-06, "loss": 17.831, "step": 40582 }, { "epoch": 0.7418246293892921, "grad_norm": 6.141550568693748, "learning_rate": 1.6486003480077283e-06, "loss": 17.1874, "step": 40583 }, { "epoch": 0.7418429085857385, "grad_norm": 5.944330503626276, "learning_rate": 1.6483806812446507e-06, "loss": 17.5002, "step": 40584 }, { "epoch": 0.7418611877821851, "grad_norm": 5.874828702922031, "learning_rate": 1.6481610262287078e-06, "loss": 17.2782, "step": 40585 }, { "epoch": 0.7418794669786316, "grad_norm": 6.082403061913244, "learning_rate": 1.647941382960671e-06, "loss": 17.4723, "step": 40586 }, { "epoch": 0.7418977461750782, "grad_norm": 6.8841885718200855, "learning_rate": 1.6477217514413075e-06, "loss": 17.4821, "step": 40587 }, { "epoch": 0.7419160253715247, "grad_norm": 5.863455713991968, "learning_rate": 1.6475021316713902e-06, "loss": 17.2348, "step": 40588 }, { "epoch": 0.7419343045679712, "grad_norm": 4.898220151570516, "learning_rate": 1.6472825236516865e-06, "loss": 16.9768, "step": 40589 }, { "epoch": 0.7419525837644178, "grad_norm": 3.9572455217374323, "learning_rate": 1.6470629273829652e-06, "loss": 16.5636, "step": 40590 }, { "epoch": 0.7419708629608642, "grad_norm": 7.464976434409583, "learning_rate": 1.646843342865999e-06, "loss": 17.826, "step": 40591 }, { "epoch": 0.7419891421573107, "grad_norm": 5.878075592101429, "learning_rate": 1.6466237701015557e-06, "loss": 17.3383, "step": 40592 }, { "epoch": 0.7420074213537573, "grad_norm": 6.462693139895169, "learning_rate": 1.6464042090904036e-06, "loss": 17.7282, "step": 40593 }, { "epoch": 0.7420257005502038, "grad_norm": 6.096631249547403, "learning_rate": 1.6461846598333141e-06, "loss": 17.7064, "step": 40594 }, { "epoch": 0.7420439797466504, "grad_norm": 6.596245291378956, "learning_rate": 1.6459651223310557e-06, "loss": 17.381, "step": 40595 }, { "epoch": 0.7420622589430969, "grad_norm": 6.596259747879995, "learning_rate": 1.6457455965844004e-06, "loss": 17.5523, "step": 40596 }, { "epoch": 0.7420805381395433, "grad_norm": 6.8497588225183055, "learning_rate": 1.645526082594116e-06, "loss": 17.9543, "step": 40597 }, { "epoch": 0.7420988173359899, "grad_norm": 5.326485946053178, "learning_rate": 1.6453065803609697e-06, "loss": 17.1018, "step": 40598 }, { "epoch": 0.7421170965324364, "grad_norm": 5.908615532925776, "learning_rate": 1.6450870898857347e-06, "loss": 17.2166, "step": 40599 }, { "epoch": 0.742135375728883, "grad_norm": 5.656925656566722, "learning_rate": 1.644867611169178e-06, "loss": 17.5191, "step": 40600 }, { "epoch": 0.7421536549253295, "grad_norm": 4.733476337344154, "learning_rate": 1.644648144212067e-06, "loss": 16.6955, "step": 40601 }, { "epoch": 0.742171934121776, "grad_norm": 6.783852022068633, "learning_rate": 1.6444286890151755e-06, "loss": 17.1538, "step": 40602 }, { "epoch": 0.7421902133182225, "grad_norm": 6.100588016374671, "learning_rate": 1.6442092455792685e-06, "loss": 17.5976, "step": 40603 }, { "epoch": 0.742208492514669, "grad_norm": 6.740508996868515, "learning_rate": 1.643989813905117e-06, "loss": 17.506, "step": 40604 }, { "epoch": 0.7422267717111156, "grad_norm": 5.175174668579147, "learning_rate": 1.6437703939934912e-06, "loss": 17.0624, "step": 40605 }, { "epoch": 0.7422450509075621, "grad_norm": 6.033192586100366, "learning_rate": 1.6435509858451593e-06, "loss": 17.524, "step": 40606 }, { "epoch": 0.7422633301040086, "grad_norm": 6.999060732072947, "learning_rate": 1.6433315894608881e-06, "loss": 17.785, "step": 40607 }, { "epoch": 0.7422816093004552, "grad_norm": 8.005894050486766, "learning_rate": 1.64311220484145e-06, "loss": 18.0998, "step": 40608 }, { "epoch": 0.7422998884969016, "grad_norm": 6.3595675024628875, "learning_rate": 1.6428928319876109e-06, "loss": 17.0241, "step": 40609 }, { "epoch": 0.7423181676933482, "grad_norm": 6.462749086010772, "learning_rate": 1.6426734709001424e-06, "loss": 17.5056, "step": 40610 }, { "epoch": 0.7423364468897947, "grad_norm": 6.088484800522451, "learning_rate": 1.6424541215798106e-06, "loss": 17.4828, "step": 40611 }, { "epoch": 0.7423547260862412, "grad_norm": 7.8405546645174775, "learning_rate": 1.6422347840273879e-06, "loss": 17.9689, "step": 40612 }, { "epoch": 0.7423730052826878, "grad_norm": 6.213967661593427, "learning_rate": 1.642015458243638e-06, "loss": 17.6004, "step": 40613 }, { "epoch": 0.7423912844791343, "grad_norm": 7.010553755837141, "learning_rate": 1.6417961442293357e-06, "loss": 17.8561, "step": 40614 }, { "epoch": 0.7424095636755809, "grad_norm": 4.8945347577077065, "learning_rate": 1.641576841985244e-06, "loss": 16.9718, "step": 40615 }, { "epoch": 0.7424278428720273, "grad_norm": 3.6802373281202545, "learning_rate": 1.6413575515121355e-06, "loss": 16.477, "step": 40616 }, { "epoch": 0.7424461220684738, "grad_norm": 6.649357477643113, "learning_rate": 1.6411382728107778e-06, "loss": 17.5061, "step": 40617 }, { "epoch": 0.7424644012649204, "grad_norm": 5.8003764339313255, "learning_rate": 1.640919005881937e-06, "loss": 16.8112, "step": 40618 }, { "epoch": 0.7424826804613669, "grad_norm": 7.169421978570883, "learning_rate": 1.6406997507263856e-06, "loss": 17.4104, "step": 40619 }, { "epoch": 0.7425009596578135, "grad_norm": 6.026934141511231, "learning_rate": 1.6404805073448882e-06, "loss": 17.1507, "step": 40620 }, { "epoch": 0.74251923885426, "grad_norm": 5.343472334392256, "learning_rate": 1.6402612757382153e-06, "loss": 17.2442, "step": 40621 }, { "epoch": 0.7425375180507064, "grad_norm": 6.526641792840446, "learning_rate": 1.6400420559071362e-06, "loss": 17.7235, "step": 40622 }, { "epoch": 0.742555797247153, "grad_norm": 6.3808107719325164, "learning_rate": 1.6398228478524187e-06, "loss": 17.4274, "step": 40623 }, { "epoch": 0.7425740764435995, "grad_norm": 5.789137828143716, "learning_rate": 1.6396036515748287e-06, "loss": 17.2755, "step": 40624 }, { "epoch": 0.7425923556400461, "grad_norm": 6.689140655145517, "learning_rate": 1.639384467075138e-06, "loss": 17.3191, "step": 40625 }, { "epoch": 0.7426106348364926, "grad_norm": 7.073368959017093, "learning_rate": 1.6391652943541114e-06, "loss": 18.129, "step": 40626 }, { "epoch": 0.7426289140329391, "grad_norm": 7.082216891592325, "learning_rate": 1.6389461334125201e-06, "loss": 18.0917, "step": 40627 }, { "epoch": 0.7426471932293857, "grad_norm": 4.857643196973743, "learning_rate": 1.6387269842511316e-06, "loss": 16.7885, "step": 40628 }, { "epoch": 0.7426654724258321, "grad_norm": 7.106976454302328, "learning_rate": 1.638507846870711e-06, "loss": 17.7492, "step": 40629 }, { "epoch": 0.7426837516222787, "grad_norm": 6.014273161457864, "learning_rate": 1.6382887212720294e-06, "loss": 17.3176, "step": 40630 }, { "epoch": 0.7427020308187252, "grad_norm": 5.652220563337874, "learning_rate": 1.6380696074558556e-06, "loss": 17.2902, "step": 40631 }, { "epoch": 0.7427203100151717, "grad_norm": 6.046657237368866, "learning_rate": 1.6378505054229544e-06, "loss": 17.1718, "step": 40632 }, { "epoch": 0.7427385892116183, "grad_norm": 8.607993523927512, "learning_rate": 1.6376314151740967e-06, "loss": 17.9362, "step": 40633 }, { "epoch": 0.7427568684080648, "grad_norm": 7.449252494276212, "learning_rate": 1.6374123367100491e-06, "loss": 17.5776, "step": 40634 }, { "epoch": 0.7427751476045114, "grad_norm": 6.550550946468448, "learning_rate": 1.6371932700315786e-06, "loss": 17.3633, "step": 40635 }, { "epoch": 0.7427934268009578, "grad_norm": 6.234337350604898, "learning_rate": 1.6369742151394546e-06, "loss": 17.3427, "step": 40636 }, { "epoch": 0.7428117059974043, "grad_norm": 6.084475527707446, "learning_rate": 1.636755172034445e-06, "loss": 17.369, "step": 40637 }, { "epoch": 0.7428299851938509, "grad_norm": 5.81361951194096, "learning_rate": 1.6365361407173142e-06, "loss": 17.0898, "step": 40638 }, { "epoch": 0.7428482643902974, "grad_norm": 7.045843858852944, "learning_rate": 1.636317121188833e-06, "loss": 17.2741, "step": 40639 }, { "epoch": 0.742866543586744, "grad_norm": 6.73270688515009, "learning_rate": 1.6360981134497679e-06, "loss": 17.4917, "step": 40640 }, { "epoch": 0.7428848227831905, "grad_norm": 7.589686803066077, "learning_rate": 1.6358791175008892e-06, "loss": 18.064, "step": 40641 }, { "epoch": 0.7429031019796369, "grad_norm": 6.534665812739631, "learning_rate": 1.6356601333429617e-06, "loss": 17.5428, "step": 40642 }, { "epoch": 0.7429213811760835, "grad_norm": 5.888016504773569, "learning_rate": 1.6354411609767523e-06, "loss": 17.3, "step": 40643 }, { "epoch": 0.74293966037253, "grad_norm": 5.914890325418048, "learning_rate": 1.635222200403031e-06, "loss": 17.2765, "step": 40644 }, { "epoch": 0.7429579395689766, "grad_norm": 6.9172597638279285, "learning_rate": 1.635003251622564e-06, "loss": 17.3075, "step": 40645 }, { "epoch": 0.7429762187654231, "grad_norm": 7.846200630675133, "learning_rate": 1.634784314636117e-06, "loss": 17.2502, "step": 40646 }, { "epoch": 0.7429944979618696, "grad_norm": 5.248518996167902, "learning_rate": 1.6345653894444607e-06, "loss": 16.9863, "step": 40647 }, { "epoch": 0.7430127771583162, "grad_norm": 6.588723405143847, "learning_rate": 1.634346476048359e-06, "loss": 17.898, "step": 40648 }, { "epoch": 0.7430310563547626, "grad_norm": 5.570855844324175, "learning_rate": 1.6341275744485806e-06, "loss": 17.2187, "step": 40649 }, { "epoch": 0.7430493355512092, "grad_norm": 7.05622982267202, "learning_rate": 1.6339086846458951e-06, "loss": 17.8245, "step": 40650 }, { "epoch": 0.7430676147476557, "grad_norm": 6.630057892982037, "learning_rate": 1.6336898066410671e-06, "loss": 17.6321, "step": 40651 }, { "epoch": 0.7430858939441022, "grad_norm": 6.695591374927539, "learning_rate": 1.6334709404348626e-06, "loss": 17.5128, "step": 40652 }, { "epoch": 0.7431041731405488, "grad_norm": 7.547251000228805, "learning_rate": 1.633252086028052e-06, "loss": 17.8551, "step": 40653 }, { "epoch": 0.7431224523369953, "grad_norm": 6.796235147351877, "learning_rate": 1.6330332434214009e-06, "loss": 17.7521, "step": 40654 }, { "epoch": 0.7431407315334418, "grad_norm": 7.729948395568082, "learning_rate": 1.6328144126156742e-06, "loss": 17.9809, "step": 40655 }, { "epoch": 0.7431590107298883, "grad_norm": 6.467560736675408, "learning_rate": 1.6325955936116428e-06, "loss": 17.3371, "step": 40656 }, { "epoch": 0.7431772899263348, "grad_norm": 5.61095838068579, "learning_rate": 1.6323767864100697e-06, "loss": 17.1259, "step": 40657 }, { "epoch": 0.7431955691227814, "grad_norm": 5.81219572444395, "learning_rate": 1.6321579910117242e-06, "loss": 17.4778, "step": 40658 }, { "epoch": 0.7432138483192279, "grad_norm": 6.8385989283905735, "learning_rate": 1.6319392074173734e-06, "loss": 17.8276, "step": 40659 }, { "epoch": 0.7432321275156744, "grad_norm": 4.722423767551266, "learning_rate": 1.6317204356277828e-06, "loss": 16.8676, "step": 40660 }, { "epoch": 0.743250406712121, "grad_norm": 6.6223446336809735, "learning_rate": 1.6315016756437207e-06, "loss": 17.617, "step": 40661 }, { "epoch": 0.7432686859085674, "grad_norm": 6.0747864044885524, "learning_rate": 1.631282927465953e-06, "loss": 17.377, "step": 40662 }, { "epoch": 0.743286965105014, "grad_norm": 6.0824837529297735, "learning_rate": 1.6310641910952452e-06, "loss": 17.489, "step": 40663 }, { "epoch": 0.7433052443014605, "grad_norm": 6.405665169301437, "learning_rate": 1.6308454665323658e-06, "loss": 17.5399, "step": 40664 }, { "epoch": 0.743323523497907, "grad_norm": 7.426073855036308, "learning_rate": 1.6306267537780818e-06, "loss": 18.0737, "step": 40665 }, { "epoch": 0.7433418026943536, "grad_norm": 6.368571786955615, "learning_rate": 1.630408052833156e-06, "loss": 17.3745, "step": 40666 }, { "epoch": 0.7433600818908, "grad_norm": 5.269392715296683, "learning_rate": 1.6301893636983578e-06, "loss": 17.1031, "step": 40667 }, { "epoch": 0.7433783610872466, "grad_norm": 5.03391513691681, "learning_rate": 1.6299706863744557e-06, "loss": 16.957, "step": 40668 }, { "epoch": 0.7433966402836931, "grad_norm": 5.781289874326953, "learning_rate": 1.629752020862212e-06, "loss": 17.1851, "step": 40669 }, { "epoch": 0.7434149194801396, "grad_norm": 6.916947429962249, "learning_rate": 1.6295333671623964e-06, "loss": 17.5352, "step": 40670 }, { "epoch": 0.7434331986765862, "grad_norm": 6.382687801698046, "learning_rate": 1.6293147252757723e-06, "loss": 17.4559, "step": 40671 }, { "epoch": 0.7434514778730327, "grad_norm": 6.807824488210002, "learning_rate": 1.6290960952031093e-06, "loss": 17.3064, "step": 40672 }, { "epoch": 0.7434697570694793, "grad_norm": 6.110738569312457, "learning_rate": 1.628877476945172e-06, "loss": 17.6136, "step": 40673 }, { "epoch": 0.7434880362659257, "grad_norm": 6.09930278830653, "learning_rate": 1.6286588705027251e-06, "loss": 17.1863, "step": 40674 }, { "epoch": 0.7435063154623722, "grad_norm": 6.559199196887617, "learning_rate": 1.6284402758765377e-06, "loss": 17.552, "step": 40675 }, { "epoch": 0.7435245946588188, "grad_norm": 6.498815004675382, "learning_rate": 1.6282216930673727e-06, "loss": 17.4883, "step": 40676 }, { "epoch": 0.7435428738552653, "grad_norm": 5.32106977384776, "learning_rate": 1.6280031220759984e-06, "loss": 17.0507, "step": 40677 }, { "epoch": 0.7435611530517119, "grad_norm": 6.244399797967837, "learning_rate": 1.6277845629031824e-06, "loss": 17.3326, "step": 40678 }, { "epoch": 0.7435794322481584, "grad_norm": 5.66209086600581, "learning_rate": 1.6275660155496885e-06, "loss": 17.0922, "step": 40679 }, { "epoch": 0.7435977114446048, "grad_norm": 7.334444435125223, "learning_rate": 1.6273474800162814e-06, "loss": 17.9914, "step": 40680 }, { "epoch": 0.7436159906410514, "grad_norm": 6.966039421857335, "learning_rate": 1.6271289563037306e-06, "loss": 17.722, "step": 40681 }, { "epoch": 0.7436342698374979, "grad_norm": 7.626373446111509, "learning_rate": 1.6269104444128003e-06, "loss": 17.7695, "step": 40682 }, { "epoch": 0.7436525490339445, "grad_norm": 9.916460302060072, "learning_rate": 1.6266919443442541e-06, "loss": 18.0481, "step": 40683 }, { "epoch": 0.743670828230391, "grad_norm": 5.249900819328747, "learning_rate": 1.6264734560988605e-06, "loss": 17.1536, "step": 40684 }, { "epoch": 0.7436891074268375, "grad_norm": 5.987167964033889, "learning_rate": 1.6262549796773859e-06, "loss": 17.3636, "step": 40685 }, { "epoch": 0.7437073866232841, "grad_norm": 5.6965009150578325, "learning_rate": 1.6260365150805928e-06, "loss": 17.1617, "step": 40686 }, { "epoch": 0.7437256658197305, "grad_norm": 5.233900364619436, "learning_rate": 1.6258180623092511e-06, "loss": 17.0606, "step": 40687 }, { "epoch": 0.7437439450161771, "grad_norm": 6.312705334762825, "learning_rate": 1.6255996213641229e-06, "loss": 17.5406, "step": 40688 }, { "epoch": 0.7437622242126236, "grad_norm": 7.389904305872877, "learning_rate": 1.6253811922459761e-06, "loss": 18.0241, "step": 40689 }, { "epoch": 0.7437805034090701, "grad_norm": 7.564754535499642, "learning_rate": 1.625162774955576e-06, "loss": 17.6163, "step": 40690 }, { "epoch": 0.7437987826055167, "grad_norm": 6.682476329748923, "learning_rate": 1.6249443694936855e-06, "loss": 17.3593, "step": 40691 }, { "epoch": 0.7438170618019632, "grad_norm": 8.399031410993349, "learning_rate": 1.624725975861074e-06, "loss": 17.8542, "step": 40692 }, { "epoch": 0.7438353409984098, "grad_norm": 5.948012789419888, "learning_rate": 1.6245075940585036e-06, "loss": 17.3736, "step": 40693 }, { "epoch": 0.7438536201948562, "grad_norm": 5.872699534197153, "learning_rate": 1.6242892240867408e-06, "loss": 17.3197, "step": 40694 }, { "epoch": 0.7438718993913027, "grad_norm": 7.182266981842001, "learning_rate": 1.624070865946553e-06, "loss": 17.4905, "step": 40695 }, { "epoch": 0.7438901785877493, "grad_norm": 6.552214378202655, "learning_rate": 1.623852519638704e-06, "loss": 17.4941, "step": 40696 }, { "epoch": 0.7439084577841958, "grad_norm": 5.489991090387051, "learning_rate": 1.6236341851639574e-06, "loss": 17.0391, "step": 40697 }, { "epoch": 0.7439267369806424, "grad_norm": 6.181112403278995, "learning_rate": 1.6234158625230817e-06, "loss": 17.0014, "step": 40698 }, { "epoch": 0.7439450161770889, "grad_norm": 7.0714547027986425, "learning_rate": 1.6231975517168403e-06, "loss": 17.8247, "step": 40699 }, { "epoch": 0.7439632953735353, "grad_norm": 7.374725266380375, "learning_rate": 1.6229792527459964e-06, "loss": 17.2715, "step": 40700 }, { "epoch": 0.7439815745699819, "grad_norm": 6.1020818569038715, "learning_rate": 1.622760965611319e-06, "loss": 17.3229, "step": 40701 }, { "epoch": 0.7439998537664284, "grad_norm": 7.24632693709808, "learning_rate": 1.6225426903135699e-06, "loss": 17.6848, "step": 40702 }, { "epoch": 0.744018132962875, "grad_norm": 5.695990499898945, "learning_rate": 1.622324426853516e-06, "loss": 17.1588, "step": 40703 }, { "epoch": 0.7440364121593215, "grad_norm": 6.074945346996711, "learning_rate": 1.6221061752319229e-06, "loss": 16.9647, "step": 40704 }, { "epoch": 0.744054691355768, "grad_norm": 6.9084720384764, "learning_rate": 1.621887935449553e-06, "loss": 17.2849, "step": 40705 }, { "epoch": 0.7440729705522146, "grad_norm": 5.884934619905146, "learning_rate": 1.6216697075071747e-06, "loss": 17.3079, "step": 40706 }, { "epoch": 0.744091249748661, "grad_norm": 4.546375314941535, "learning_rate": 1.6214514914055507e-06, "loss": 16.8232, "step": 40707 }, { "epoch": 0.7441095289451076, "grad_norm": 6.767936817525966, "learning_rate": 1.621233287145444e-06, "loss": 17.6486, "step": 40708 }, { "epoch": 0.7441278081415541, "grad_norm": 7.4676067187963975, "learning_rate": 1.6210150947276237e-06, "loss": 17.6108, "step": 40709 }, { "epoch": 0.7441460873380006, "grad_norm": 5.015989754135448, "learning_rate": 1.6207969141528518e-06, "loss": 16.8243, "step": 40710 }, { "epoch": 0.7441643665344472, "grad_norm": 6.037447411349342, "learning_rate": 1.620578745421892e-06, "loss": 17.1358, "step": 40711 }, { "epoch": 0.7441826457308937, "grad_norm": 5.791847710920968, "learning_rate": 1.6203605885355107e-06, "loss": 17.3079, "step": 40712 }, { "epoch": 0.7442009249273402, "grad_norm": 5.534876213937013, "learning_rate": 1.6201424434944736e-06, "loss": 17.2681, "step": 40713 }, { "epoch": 0.7442192041237867, "grad_norm": 5.765680814428486, "learning_rate": 1.6199243102995426e-06, "loss": 17.3175, "step": 40714 }, { "epoch": 0.7442374833202332, "grad_norm": 6.041829016648875, "learning_rate": 1.6197061889514847e-06, "loss": 17.3675, "step": 40715 }, { "epoch": 0.7442557625166798, "grad_norm": 6.039895511111707, "learning_rate": 1.619488079451062e-06, "loss": 17.3194, "step": 40716 }, { "epoch": 0.7442740417131263, "grad_norm": 6.99286580534992, "learning_rate": 1.6192699817990421e-06, "loss": 17.6209, "step": 40717 }, { "epoch": 0.7442923209095729, "grad_norm": 5.9308852978457445, "learning_rate": 1.6190518959961872e-06, "loss": 17.2385, "step": 40718 }, { "epoch": 0.7443106001060193, "grad_norm": 7.033872563587528, "learning_rate": 1.6188338220432602e-06, "loss": 17.6498, "step": 40719 }, { "epoch": 0.7443288793024658, "grad_norm": 6.389843231432092, "learning_rate": 1.6186157599410296e-06, "loss": 17.5506, "step": 40720 }, { "epoch": 0.7443471584989124, "grad_norm": 6.392822881145717, "learning_rate": 1.6183977096902553e-06, "loss": 17.5692, "step": 40721 }, { "epoch": 0.7443654376953589, "grad_norm": 5.648198571308965, "learning_rate": 1.6181796712917036e-06, "loss": 17.2236, "step": 40722 }, { "epoch": 0.7443837168918055, "grad_norm": 6.829160649506653, "learning_rate": 1.6179616447461399e-06, "loss": 17.896, "step": 40723 }, { "epoch": 0.744401996088252, "grad_norm": 6.597404835450835, "learning_rate": 1.6177436300543281e-06, "loss": 17.2008, "step": 40724 }, { "epoch": 0.7444202752846985, "grad_norm": 6.039563502019175, "learning_rate": 1.6175256272170288e-06, "loss": 17.3505, "step": 40725 }, { "epoch": 0.744438554481145, "grad_norm": 6.4149687661873, "learning_rate": 1.617307636235011e-06, "loss": 17.5596, "step": 40726 }, { "epoch": 0.7444568336775915, "grad_norm": 5.67612222171703, "learning_rate": 1.617089657109036e-06, "loss": 17.2888, "step": 40727 }, { "epoch": 0.744475112874038, "grad_norm": 6.563470551723587, "learning_rate": 1.6168716898398667e-06, "loss": 17.4642, "step": 40728 }, { "epoch": 0.7444933920704846, "grad_norm": 5.917986416452369, "learning_rate": 1.61665373442827e-06, "loss": 17.3628, "step": 40729 }, { "epoch": 0.7445116712669311, "grad_norm": 6.6991929475517695, "learning_rate": 1.6164357908750068e-06, "loss": 17.5243, "step": 40730 }, { "epoch": 0.7445299504633777, "grad_norm": 6.9352383811449805, "learning_rate": 1.6162178591808426e-06, "loss": 17.834, "step": 40731 }, { "epoch": 0.7445482296598241, "grad_norm": 4.552874023615812, "learning_rate": 1.6159999393465432e-06, "loss": 16.7668, "step": 40732 }, { "epoch": 0.7445665088562706, "grad_norm": 7.691277089228143, "learning_rate": 1.615782031372868e-06, "loss": 17.9056, "step": 40733 }, { "epoch": 0.7445847880527172, "grad_norm": 6.27330555642196, "learning_rate": 1.6155641352605849e-06, "loss": 17.5641, "step": 40734 }, { "epoch": 0.7446030672491637, "grad_norm": 7.247440183170055, "learning_rate": 1.6153462510104562e-06, "loss": 17.6604, "step": 40735 }, { "epoch": 0.7446213464456103, "grad_norm": 6.081034436430446, "learning_rate": 1.6151283786232435e-06, "loss": 17.2458, "step": 40736 }, { "epoch": 0.7446396256420568, "grad_norm": 6.369488952807707, "learning_rate": 1.6149105180997132e-06, "loss": 17.1332, "step": 40737 }, { "epoch": 0.7446579048385032, "grad_norm": 7.111732545762408, "learning_rate": 1.6146926694406267e-06, "loss": 17.7309, "step": 40738 }, { "epoch": 0.7446761840349498, "grad_norm": 6.670331912121953, "learning_rate": 1.6144748326467502e-06, "loss": 17.5698, "step": 40739 }, { "epoch": 0.7446944632313963, "grad_norm": 5.612992455782484, "learning_rate": 1.614257007718844e-06, "loss": 17.4588, "step": 40740 }, { "epoch": 0.7447127424278429, "grad_norm": 7.146653723065375, "learning_rate": 1.6140391946576745e-06, "loss": 17.4685, "step": 40741 }, { "epoch": 0.7447310216242894, "grad_norm": 6.142815070560547, "learning_rate": 1.6138213934640024e-06, "loss": 17.2, "step": 40742 }, { "epoch": 0.7447493008207359, "grad_norm": 7.405252664797461, "learning_rate": 1.6136036041385943e-06, "loss": 17.5843, "step": 40743 }, { "epoch": 0.7447675800171825, "grad_norm": 6.566600269071424, "learning_rate": 1.6133858266822117e-06, "loss": 17.3327, "step": 40744 }, { "epoch": 0.7447858592136289, "grad_norm": 5.547795784718346, "learning_rate": 1.6131680610956158e-06, "loss": 17.0148, "step": 40745 }, { "epoch": 0.7448041384100755, "grad_norm": 7.470116061917547, "learning_rate": 1.6129503073795738e-06, "loss": 17.8618, "step": 40746 }, { "epoch": 0.744822417606522, "grad_norm": 7.3447863072924715, "learning_rate": 1.6127325655348457e-06, "loss": 17.7224, "step": 40747 }, { "epoch": 0.7448406968029685, "grad_norm": 5.64035002272444, "learning_rate": 1.6125148355621956e-06, "loss": 17.2587, "step": 40748 }, { "epoch": 0.7448589759994151, "grad_norm": 6.99569400754683, "learning_rate": 1.6122971174623892e-06, "loss": 17.3336, "step": 40749 }, { "epoch": 0.7448772551958616, "grad_norm": 5.399658213351022, "learning_rate": 1.6120794112361853e-06, "loss": 17.0859, "step": 40750 }, { "epoch": 0.7448955343923082, "grad_norm": 5.6929971534406585, "learning_rate": 1.611861716884351e-06, "loss": 17.214, "step": 40751 }, { "epoch": 0.7449138135887546, "grad_norm": 6.087395707777481, "learning_rate": 1.6116440344076473e-06, "loss": 17.5724, "step": 40752 }, { "epoch": 0.7449320927852011, "grad_norm": 6.2617692789111326, "learning_rate": 1.6114263638068357e-06, "loss": 17.4203, "step": 40753 }, { "epoch": 0.7449503719816477, "grad_norm": 7.414201649643346, "learning_rate": 1.611208705082682e-06, "loss": 17.5244, "step": 40754 }, { "epoch": 0.7449686511780942, "grad_norm": 7.0682893168425895, "learning_rate": 1.6109910582359479e-06, "loss": 17.4441, "step": 40755 }, { "epoch": 0.7449869303745408, "grad_norm": 4.916605453845035, "learning_rate": 1.6107734232673945e-06, "loss": 16.6676, "step": 40756 }, { "epoch": 0.7450052095709873, "grad_norm": 5.356892045751166, "learning_rate": 1.610555800177786e-06, "loss": 16.9488, "step": 40757 }, { "epoch": 0.7450234887674337, "grad_norm": 6.515195416509152, "learning_rate": 1.6103381889678864e-06, "loss": 17.3422, "step": 40758 }, { "epoch": 0.7450417679638803, "grad_norm": 7.617457805242862, "learning_rate": 1.6101205896384565e-06, "loss": 18.1413, "step": 40759 }, { "epoch": 0.7450600471603268, "grad_norm": 6.058448581213734, "learning_rate": 1.609903002190261e-06, "loss": 17.1865, "step": 40760 }, { "epoch": 0.7450783263567734, "grad_norm": 7.456598672787096, "learning_rate": 1.609685426624062e-06, "loss": 17.901, "step": 40761 }, { "epoch": 0.7450966055532199, "grad_norm": 6.73182280618892, "learning_rate": 1.609467862940619e-06, "loss": 17.7113, "step": 40762 }, { "epoch": 0.7451148847496664, "grad_norm": 5.614180617936292, "learning_rate": 1.6092503111406988e-06, "loss": 17.2091, "step": 40763 }, { "epoch": 0.745133163946113, "grad_norm": 5.890222104160019, "learning_rate": 1.6090327712250602e-06, "loss": 17.1672, "step": 40764 }, { "epoch": 0.7451514431425594, "grad_norm": 5.654887685596102, "learning_rate": 1.6088152431944693e-06, "loss": 17.2182, "step": 40765 }, { "epoch": 0.745169722339006, "grad_norm": 6.3524241311050655, "learning_rate": 1.6085977270496849e-06, "loss": 17.6007, "step": 40766 }, { "epoch": 0.7451880015354525, "grad_norm": 5.420065553057041, "learning_rate": 1.6083802227914714e-06, "loss": 17.329, "step": 40767 }, { "epoch": 0.745206280731899, "grad_norm": 5.440246595986641, "learning_rate": 1.6081627304205928e-06, "loss": 17.1847, "step": 40768 }, { "epoch": 0.7452245599283456, "grad_norm": 8.685233558869033, "learning_rate": 1.607945249937809e-06, "loss": 18.1691, "step": 40769 }, { "epoch": 0.7452428391247921, "grad_norm": 6.138874253437976, "learning_rate": 1.6077277813438814e-06, "loss": 17.2441, "step": 40770 }, { "epoch": 0.7452611183212386, "grad_norm": 7.098515787023489, "learning_rate": 1.6075103246395745e-06, "loss": 17.8643, "step": 40771 }, { "epoch": 0.7452793975176851, "grad_norm": 5.436076844939651, "learning_rate": 1.6072928798256504e-06, "loss": 17.0185, "step": 40772 }, { "epoch": 0.7452976767141316, "grad_norm": 7.365168993328379, "learning_rate": 1.6070754469028682e-06, "loss": 17.7846, "step": 40773 }, { "epoch": 0.7453159559105782, "grad_norm": 6.502123996854508, "learning_rate": 1.606858025871994e-06, "loss": 17.5651, "step": 40774 }, { "epoch": 0.7453342351070247, "grad_norm": 7.442572860252675, "learning_rate": 1.6066406167337867e-06, "loss": 17.8599, "step": 40775 }, { "epoch": 0.7453525143034713, "grad_norm": 5.573254146229997, "learning_rate": 1.6064232194890089e-06, "loss": 17.0513, "step": 40776 }, { "epoch": 0.7453707934999177, "grad_norm": 5.1537848527320635, "learning_rate": 1.606205834138425e-06, "loss": 17.262, "step": 40777 }, { "epoch": 0.7453890726963642, "grad_norm": 5.3126247935225175, "learning_rate": 1.6059884606827941e-06, "loss": 17.1782, "step": 40778 }, { "epoch": 0.7454073518928108, "grad_norm": 5.553359965847435, "learning_rate": 1.6057710991228804e-06, "loss": 17.1972, "step": 40779 }, { "epoch": 0.7454256310892573, "grad_norm": 5.964497883985525, "learning_rate": 1.6055537494594442e-06, "loss": 17.2913, "step": 40780 }, { "epoch": 0.7454439102857039, "grad_norm": 4.895370749337423, "learning_rate": 1.6053364116932462e-06, "loss": 16.9628, "step": 40781 }, { "epoch": 0.7454621894821504, "grad_norm": 5.7755420646807085, "learning_rate": 1.605119085825051e-06, "loss": 17.137, "step": 40782 }, { "epoch": 0.7454804686785969, "grad_norm": 5.977903823597416, "learning_rate": 1.6049017718556187e-06, "loss": 17.1049, "step": 40783 }, { "epoch": 0.7454987478750434, "grad_norm": 5.421087139668849, "learning_rate": 1.6046844697857094e-06, "loss": 17.0382, "step": 40784 }, { "epoch": 0.7455170270714899, "grad_norm": 6.148309352857505, "learning_rate": 1.6044671796160872e-06, "loss": 17.1626, "step": 40785 }, { "epoch": 0.7455353062679365, "grad_norm": 5.806282392544209, "learning_rate": 1.6042499013475138e-06, "loss": 17.1524, "step": 40786 }, { "epoch": 0.745553585464383, "grad_norm": 5.879737008472409, "learning_rate": 1.6040326349807483e-06, "loss": 17.2629, "step": 40787 }, { "epoch": 0.7455718646608295, "grad_norm": 5.899560311348728, "learning_rate": 1.603815380516555e-06, "loss": 17.2008, "step": 40788 }, { "epoch": 0.7455901438572761, "grad_norm": 5.605456113932318, "learning_rate": 1.6035981379556947e-06, "loss": 17.0919, "step": 40789 }, { "epoch": 0.7456084230537225, "grad_norm": 6.782123199723816, "learning_rate": 1.6033809072989265e-06, "loss": 17.5599, "step": 40790 }, { "epoch": 0.7456267022501691, "grad_norm": 6.212353470766527, "learning_rate": 1.6031636885470147e-06, "loss": 17.1956, "step": 40791 }, { "epoch": 0.7456449814466156, "grad_norm": 6.483214559514918, "learning_rate": 1.6029464817007195e-06, "loss": 17.5161, "step": 40792 }, { "epoch": 0.7456632606430621, "grad_norm": 6.727487316621635, "learning_rate": 1.6027292867608008e-06, "loss": 17.372, "step": 40793 }, { "epoch": 0.7456815398395087, "grad_norm": 6.014039397331097, "learning_rate": 1.602512103728021e-06, "loss": 17.2389, "step": 40794 }, { "epoch": 0.7456998190359552, "grad_norm": 5.945090466209599, "learning_rate": 1.6022949326031412e-06, "loss": 17.2732, "step": 40795 }, { "epoch": 0.7457180982324016, "grad_norm": 5.721791528676625, "learning_rate": 1.6020777733869247e-06, "loss": 17.2373, "step": 40796 }, { "epoch": 0.7457363774288482, "grad_norm": 6.074630565527054, "learning_rate": 1.6018606260801306e-06, "loss": 17.1589, "step": 40797 }, { "epoch": 0.7457546566252947, "grad_norm": 6.587431198849259, "learning_rate": 1.6016434906835188e-06, "loss": 17.4048, "step": 40798 }, { "epoch": 0.7457729358217413, "grad_norm": 6.281773642397893, "learning_rate": 1.601426367197853e-06, "loss": 17.3152, "step": 40799 }, { "epoch": 0.7457912150181878, "grad_norm": 6.205895453294535, "learning_rate": 1.601209255623893e-06, "loss": 17.3479, "step": 40800 }, { "epoch": 0.7458094942146343, "grad_norm": 6.428620765695619, "learning_rate": 1.600992155962398e-06, "loss": 17.3423, "step": 40801 }, { "epoch": 0.7458277734110809, "grad_norm": 5.017103101295895, "learning_rate": 1.600775068214132e-06, "loss": 16.7279, "step": 40802 }, { "epoch": 0.7458460526075273, "grad_norm": 8.887777760211645, "learning_rate": 1.600557992379853e-06, "loss": 17.4961, "step": 40803 }, { "epoch": 0.7458643318039739, "grad_norm": 5.669665059460304, "learning_rate": 1.6003409284603232e-06, "loss": 17.0575, "step": 40804 }, { "epoch": 0.7458826110004204, "grad_norm": 6.491936731387985, "learning_rate": 1.6001238764563048e-06, "loss": 17.4096, "step": 40805 }, { "epoch": 0.7459008901968669, "grad_norm": 7.566433772266083, "learning_rate": 1.5999068363685577e-06, "loss": 17.4532, "step": 40806 }, { "epoch": 0.7459191693933135, "grad_norm": 5.569875945247374, "learning_rate": 1.5996898081978402e-06, "loss": 17.0984, "step": 40807 }, { "epoch": 0.74593744858976, "grad_norm": 6.868847035712419, "learning_rate": 1.5994727919449165e-06, "loss": 17.6761, "step": 40808 }, { "epoch": 0.7459557277862066, "grad_norm": 8.781528460973961, "learning_rate": 1.5992557876105442e-06, "loss": 17.9521, "step": 40809 }, { "epoch": 0.745974006982653, "grad_norm": 5.307530317848968, "learning_rate": 1.5990387951954862e-06, "loss": 17.3394, "step": 40810 }, { "epoch": 0.7459922861790995, "grad_norm": 5.836902276175979, "learning_rate": 1.5988218147005009e-06, "loss": 17.3042, "step": 40811 }, { "epoch": 0.7460105653755461, "grad_norm": 5.015645203712413, "learning_rate": 1.5986048461263498e-06, "loss": 17.0378, "step": 40812 }, { "epoch": 0.7460288445719926, "grad_norm": 7.528663479790858, "learning_rate": 1.598387889473796e-06, "loss": 17.9831, "step": 40813 }, { "epoch": 0.7460471237684392, "grad_norm": 6.715288584597252, "learning_rate": 1.5981709447435967e-06, "loss": 17.7692, "step": 40814 }, { "epoch": 0.7460654029648857, "grad_norm": 7.463927854340476, "learning_rate": 1.597954011936511e-06, "loss": 17.7381, "step": 40815 }, { "epoch": 0.7460836821613321, "grad_norm": 5.8482398956757145, "learning_rate": 1.5977370910533035e-06, "loss": 17.3997, "step": 40816 }, { "epoch": 0.7461019613577787, "grad_norm": 6.139650729883076, "learning_rate": 1.5975201820947323e-06, "loss": 17.3809, "step": 40817 }, { "epoch": 0.7461202405542252, "grad_norm": 6.831489860132617, "learning_rate": 1.597303285061556e-06, "loss": 17.7715, "step": 40818 }, { "epoch": 0.7461385197506718, "grad_norm": 6.924007954809402, "learning_rate": 1.5970863999545378e-06, "loss": 17.9978, "step": 40819 }, { "epoch": 0.7461567989471183, "grad_norm": 6.754180383005795, "learning_rate": 1.596869526774435e-06, "loss": 17.3717, "step": 40820 }, { "epoch": 0.7461750781435648, "grad_norm": 5.1467942844030485, "learning_rate": 1.5966526655220093e-06, "loss": 17.0049, "step": 40821 }, { "epoch": 0.7461933573400114, "grad_norm": 6.1785349866969135, "learning_rate": 1.596435816198022e-06, "loss": 17.491, "step": 40822 }, { "epoch": 0.7462116365364578, "grad_norm": 6.6329400085919605, "learning_rate": 1.5962189788032323e-06, "loss": 17.9836, "step": 40823 }, { "epoch": 0.7462299157329044, "grad_norm": 6.447907265159542, "learning_rate": 1.5960021533383978e-06, "loss": 17.3128, "step": 40824 }, { "epoch": 0.7462481949293509, "grad_norm": 5.028911429793541, "learning_rate": 1.5957853398042822e-06, "loss": 16.896, "step": 40825 }, { "epoch": 0.7462664741257974, "grad_norm": 6.4611316700871795, "learning_rate": 1.5955685382016417e-06, "loss": 17.7364, "step": 40826 }, { "epoch": 0.746284753322244, "grad_norm": 5.915931070946287, "learning_rate": 1.5953517485312397e-06, "loss": 16.9972, "step": 40827 }, { "epoch": 0.7463030325186905, "grad_norm": 5.693988256730969, "learning_rate": 1.595134970793834e-06, "loss": 17.255, "step": 40828 }, { "epoch": 0.746321311715137, "grad_norm": 5.70659038501032, "learning_rate": 1.5949182049901834e-06, "loss": 16.8248, "step": 40829 }, { "epoch": 0.7463395909115835, "grad_norm": 5.431348422234644, "learning_rate": 1.5947014511210495e-06, "loss": 17.0291, "step": 40830 }, { "epoch": 0.74635787010803, "grad_norm": 9.138703120558224, "learning_rate": 1.5944847091871929e-06, "loss": 18.3882, "step": 40831 }, { "epoch": 0.7463761493044766, "grad_norm": 6.681232833840022, "learning_rate": 1.59426797918937e-06, "loss": 17.3678, "step": 40832 }, { "epoch": 0.7463944285009231, "grad_norm": 5.832376631577645, "learning_rate": 1.5940512611283444e-06, "loss": 17.1736, "step": 40833 }, { "epoch": 0.7464127076973697, "grad_norm": 7.639871690075366, "learning_rate": 1.5938345550048727e-06, "loss": 18.0704, "step": 40834 }, { "epoch": 0.7464309868938162, "grad_norm": 5.698348125761847, "learning_rate": 1.5936178608197145e-06, "loss": 17.127, "step": 40835 }, { "epoch": 0.7464492660902626, "grad_norm": 7.532860069578281, "learning_rate": 1.5934011785736314e-06, "loss": 17.4563, "step": 40836 }, { "epoch": 0.7464675452867092, "grad_norm": 4.637926938576204, "learning_rate": 1.5931845082673814e-06, "loss": 16.9394, "step": 40837 }, { "epoch": 0.7464858244831557, "grad_norm": 5.359920206011305, "learning_rate": 1.5929678499017227e-06, "loss": 16.9589, "step": 40838 }, { "epoch": 0.7465041036796023, "grad_norm": 7.339511474590012, "learning_rate": 1.5927512034774163e-06, "loss": 17.6737, "step": 40839 }, { "epoch": 0.7465223828760488, "grad_norm": 8.00704974228599, "learning_rate": 1.592534568995221e-06, "loss": 17.8817, "step": 40840 }, { "epoch": 0.7465406620724953, "grad_norm": 5.577933116563449, "learning_rate": 1.5923179464558973e-06, "loss": 17.0806, "step": 40841 }, { "epoch": 0.7465589412689418, "grad_norm": 4.810590847646696, "learning_rate": 1.5921013358602044e-06, "loss": 16.9316, "step": 40842 }, { "epoch": 0.7465772204653883, "grad_norm": 6.913365218011943, "learning_rate": 1.5918847372088986e-06, "loss": 17.7712, "step": 40843 }, { "epoch": 0.7465954996618349, "grad_norm": 5.3689883937235825, "learning_rate": 1.591668150502743e-06, "loss": 16.9877, "step": 40844 }, { "epoch": 0.7466137788582814, "grad_norm": 5.919414676616237, "learning_rate": 1.5914515757424948e-06, "loss": 17.3721, "step": 40845 }, { "epoch": 0.7466320580547279, "grad_norm": 4.852601796762976, "learning_rate": 1.5912350129289112e-06, "loss": 16.9777, "step": 40846 }, { "epoch": 0.7466503372511745, "grad_norm": 6.469032815216064, "learning_rate": 1.5910184620627545e-06, "loss": 17.5476, "step": 40847 }, { "epoch": 0.746668616447621, "grad_norm": 11.044238334453896, "learning_rate": 1.5908019231447808e-06, "loss": 17.6393, "step": 40848 }, { "epoch": 0.7466868956440675, "grad_norm": 6.72240697943844, "learning_rate": 1.5905853961757507e-06, "loss": 17.4269, "step": 40849 }, { "epoch": 0.746705174840514, "grad_norm": 5.850846177128571, "learning_rate": 1.5903688811564238e-06, "loss": 17.4604, "step": 40850 }, { "epoch": 0.7467234540369605, "grad_norm": 4.985270316965858, "learning_rate": 1.590152378087559e-06, "loss": 17.0501, "step": 40851 }, { "epoch": 0.7467417332334071, "grad_norm": 4.807279504885327, "learning_rate": 1.5899358869699122e-06, "loss": 16.8265, "step": 40852 }, { "epoch": 0.7467600124298536, "grad_norm": 5.991847597724368, "learning_rate": 1.5897194078042461e-06, "loss": 17.209, "step": 40853 }, { "epoch": 0.7467782916263002, "grad_norm": 5.088895704621126, "learning_rate": 1.589502940591317e-06, "loss": 16.9633, "step": 40854 }, { "epoch": 0.7467965708227466, "grad_norm": 6.2338303571793245, "learning_rate": 1.589286485331883e-06, "loss": 17.4331, "step": 40855 }, { "epoch": 0.7468148500191931, "grad_norm": 6.412600687641643, "learning_rate": 1.5890700420267058e-06, "loss": 17.5701, "step": 40856 }, { "epoch": 0.7468331292156397, "grad_norm": 5.996206489427761, "learning_rate": 1.58885361067654e-06, "loss": 17.2613, "step": 40857 }, { "epoch": 0.7468514084120862, "grad_norm": 7.340295213554807, "learning_rate": 1.5886371912821463e-06, "loss": 17.5987, "step": 40858 }, { "epoch": 0.7468696876085328, "grad_norm": 7.285196169731332, "learning_rate": 1.588420783844285e-06, "loss": 17.4593, "step": 40859 }, { "epoch": 0.7468879668049793, "grad_norm": 7.692571363287325, "learning_rate": 1.588204388363711e-06, "loss": 18.1205, "step": 40860 }, { "epoch": 0.7469062460014257, "grad_norm": 7.0831576994306795, "learning_rate": 1.587988004841186e-06, "loss": 17.5467, "step": 40861 }, { "epoch": 0.7469245251978723, "grad_norm": 6.388973763915833, "learning_rate": 1.5877716332774672e-06, "loss": 17.6613, "step": 40862 }, { "epoch": 0.7469428043943188, "grad_norm": 6.216705219054755, "learning_rate": 1.5875552736733113e-06, "loss": 17.4359, "step": 40863 }, { "epoch": 0.7469610835907653, "grad_norm": 7.999396828124152, "learning_rate": 1.5873389260294797e-06, "loss": 18.5138, "step": 40864 }, { "epoch": 0.7469793627872119, "grad_norm": 5.139733280392678, "learning_rate": 1.5871225903467274e-06, "loss": 16.937, "step": 40865 }, { "epoch": 0.7469976419836584, "grad_norm": 7.658172264467788, "learning_rate": 1.5869062666258155e-06, "loss": 17.7687, "step": 40866 }, { "epoch": 0.747015921180105, "grad_norm": 6.1700398808815216, "learning_rate": 1.5866899548674996e-06, "loss": 17.2866, "step": 40867 }, { "epoch": 0.7470342003765514, "grad_norm": 4.7854512712427315, "learning_rate": 1.5864736550725408e-06, "loss": 16.9471, "step": 40868 }, { "epoch": 0.7470524795729979, "grad_norm": 6.7493654092021655, "learning_rate": 1.5862573672416943e-06, "loss": 17.7395, "step": 40869 }, { "epoch": 0.7470707587694445, "grad_norm": 6.141664706019671, "learning_rate": 1.5860410913757208e-06, "loss": 17.6236, "step": 40870 }, { "epoch": 0.747089037965891, "grad_norm": 4.867917024872719, "learning_rate": 1.585824827475375e-06, "loss": 16.8694, "step": 40871 }, { "epoch": 0.7471073171623376, "grad_norm": 9.993299110171902, "learning_rate": 1.5856085755414192e-06, "loss": 17.4959, "step": 40872 }, { "epoch": 0.7471255963587841, "grad_norm": 10.863008029475973, "learning_rate": 1.5853923355746087e-06, "loss": 17.9804, "step": 40873 }, { "epoch": 0.7471438755552305, "grad_norm": 5.740903466861008, "learning_rate": 1.5851761075757005e-06, "loss": 17.2341, "step": 40874 }, { "epoch": 0.7471621547516771, "grad_norm": 4.784621500620418, "learning_rate": 1.5849598915454534e-06, "loss": 16.936, "step": 40875 }, { "epoch": 0.7471804339481236, "grad_norm": 7.799305924142861, "learning_rate": 1.5847436874846272e-06, "loss": 17.8214, "step": 40876 }, { "epoch": 0.7471987131445702, "grad_norm": 7.668039829202365, "learning_rate": 1.5845274953939766e-06, "loss": 17.6433, "step": 40877 }, { "epoch": 0.7472169923410167, "grad_norm": 6.119384146740417, "learning_rate": 1.5843113152742617e-06, "loss": 17.2142, "step": 40878 }, { "epoch": 0.7472352715374632, "grad_norm": 6.89446758989248, "learning_rate": 1.58409514712624e-06, "loss": 17.6519, "step": 40879 }, { "epoch": 0.7472535507339098, "grad_norm": 5.9208232323867405, "learning_rate": 1.5838789909506664e-06, "loss": 17.1399, "step": 40880 }, { "epoch": 0.7472718299303562, "grad_norm": 8.494510791479826, "learning_rate": 1.5836628467483022e-06, "loss": 18.1198, "step": 40881 }, { "epoch": 0.7472901091268028, "grad_norm": 5.671853308334644, "learning_rate": 1.5834467145199033e-06, "loss": 17.2333, "step": 40882 }, { "epoch": 0.7473083883232493, "grad_norm": 4.956130179525377, "learning_rate": 1.5832305942662258e-06, "loss": 16.8495, "step": 40883 }, { "epoch": 0.7473266675196958, "grad_norm": 6.8994975805265435, "learning_rate": 1.583014485988028e-06, "loss": 18.0918, "step": 40884 }, { "epoch": 0.7473449467161424, "grad_norm": 5.890163108697766, "learning_rate": 1.5827983896860682e-06, "loss": 17.3751, "step": 40885 }, { "epoch": 0.7473632259125889, "grad_norm": 6.678386353171593, "learning_rate": 1.5825823053611056e-06, "loss": 17.5269, "step": 40886 }, { "epoch": 0.7473815051090354, "grad_norm": 6.376985915500716, "learning_rate": 1.582366233013895e-06, "loss": 17.8094, "step": 40887 }, { "epoch": 0.7473997843054819, "grad_norm": 5.501612640670327, "learning_rate": 1.5821501726451926e-06, "loss": 17.1731, "step": 40888 }, { "epoch": 0.7474180635019284, "grad_norm": 7.211042901739185, "learning_rate": 1.581934124255759e-06, "loss": 17.6711, "step": 40889 }, { "epoch": 0.747436342698375, "grad_norm": 5.035239722695692, "learning_rate": 1.5817180878463495e-06, "loss": 17.0438, "step": 40890 }, { "epoch": 0.7474546218948215, "grad_norm": 5.751625491163963, "learning_rate": 1.5815020634177198e-06, "loss": 17.2038, "step": 40891 }, { "epoch": 0.7474729010912681, "grad_norm": 7.221218492340318, "learning_rate": 1.5812860509706307e-06, "loss": 17.9663, "step": 40892 }, { "epoch": 0.7474911802877146, "grad_norm": 5.141938134175285, "learning_rate": 1.5810700505058351e-06, "loss": 17.0174, "step": 40893 }, { "epoch": 0.747509459484161, "grad_norm": 7.2255038922817, "learning_rate": 1.580854062024093e-06, "loss": 17.5389, "step": 40894 }, { "epoch": 0.7475277386806076, "grad_norm": 5.953246112875933, "learning_rate": 1.5806380855261616e-06, "loss": 17.1022, "step": 40895 }, { "epoch": 0.7475460178770541, "grad_norm": 5.563071764333898, "learning_rate": 1.5804221210127979e-06, "loss": 17.1737, "step": 40896 }, { "epoch": 0.7475642970735007, "grad_norm": 5.933622755541929, "learning_rate": 1.5802061684847557e-06, "loss": 17.4362, "step": 40897 }, { "epoch": 0.7475825762699472, "grad_norm": 5.4523018173182045, "learning_rate": 1.5799902279427959e-06, "loss": 17.087, "step": 40898 }, { "epoch": 0.7476008554663937, "grad_norm": 8.269935979451157, "learning_rate": 1.5797742993876736e-06, "loss": 17.9374, "step": 40899 }, { "epoch": 0.7476191346628402, "grad_norm": 5.332205379145427, "learning_rate": 1.5795583828201438e-06, "loss": 17.103, "step": 40900 }, { "epoch": 0.7476374138592867, "grad_norm": 6.943068602541111, "learning_rate": 1.5793424782409667e-06, "loss": 17.4643, "step": 40901 }, { "epoch": 0.7476556930557333, "grad_norm": 6.0071467577076305, "learning_rate": 1.5791265856508957e-06, "loss": 17.1794, "step": 40902 }, { "epoch": 0.7476739722521798, "grad_norm": 6.066872850246815, "learning_rate": 1.5789107050506896e-06, "loss": 17.4649, "step": 40903 }, { "epoch": 0.7476922514486263, "grad_norm": 7.345362038959762, "learning_rate": 1.5786948364411059e-06, "loss": 17.5342, "step": 40904 }, { "epoch": 0.7477105306450729, "grad_norm": 5.129989358312383, "learning_rate": 1.5784789798228983e-06, "loss": 17.0675, "step": 40905 }, { "epoch": 0.7477288098415193, "grad_norm": 7.673594016142328, "learning_rate": 1.5782631351968263e-06, "loss": 17.4843, "step": 40906 }, { "epoch": 0.7477470890379659, "grad_norm": 6.921707668702416, "learning_rate": 1.5780473025636456e-06, "loss": 17.7029, "step": 40907 }, { "epoch": 0.7477653682344124, "grad_norm": 5.740321292378413, "learning_rate": 1.5778314819241103e-06, "loss": 17.1026, "step": 40908 }, { "epoch": 0.7477836474308589, "grad_norm": 6.083081675604773, "learning_rate": 1.5776156732789804e-06, "loss": 17.4811, "step": 40909 }, { "epoch": 0.7478019266273055, "grad_norm": 6.243189769333948, "learning_rate": 1.5773998766290106e-06, "loss": 17.2358, "step": 40910 }, { "epoch": 0.747820205823752, "grad_norm": 6.832017794919168, "learning_rate": 1.5771840919749554e-06, "loss": 17.5704, "step": 40911 }, { "epoch": 0.7478384850201986, "grad_norm": 5.169236180977044, "learning_rate": 1.5769683193175733e-06, "loss": 16.9216, "step": 40912 }, { "epoch": 0.747856764216645, "grad_norm": 6.06806994307597, "learning_rate": 1.5767525586576215e-06, "loss": 17.3033, "step": 40913 }, { "epoch": 0.7478750434130915, "grad_norm": 6.22589910903787, "learning_rate": 1.5765368099958533e-06, "loss": 17.5163, "step": 40914 }, { "epoch": 0.7478933226095381, "grad_norm": 6.233746879077097, "learning_rate": 1.576321073333028e-06, "loss": 17.5624, "step": 40915 }, { "epoch": 0.7479116018059846, "grad_norm": 5.589786733059947, "learning_rate": 1.576105348669899e-06, "loss": 16.9487, "step": 40916 }, { "epoch": 0.7479298810024312, "grad_norm": 6.893635769487999, "learning_rate": 1.575889636007225e-06, "loss": 17.3881, "step": 40917 }, { "epoch": 0.7479481601988777, "grad_norm": 6.645604615620414, "learning_rate": 1.5756739353457612e-06, "loss": 17.339, "step": 40918 }, { "epoch": 0.7479664393953241, "grad_norm": 6.067785189977901, "learning_rate": 1.5754582466862612e-06, "loss": 17.2592, "step": 40919 }, { "epoch": 0.7479847185917707, "grad_norm": 5.439034244583025, "learning_rate": 1.5752425700294844e-06, "loss": 17.167, "step": 40920 }, { "epoch": 0.7480029977882172, "grad_norm": 5.818816542788876, "learning_rate": 1.5750269053761835e-06, "loss": 17.2173, "step": 40921 }, { "epoch": 0.7480212769846638, "grad_norm": 8.331494261527713, "learning_rate": 1.5748112527271165e-06, "loss": 17.6268, "step": 40922 }, { "epoch": 0.7480395561811103, "grad_norm": 7.229256641053604, "learning_rate": 1.5745956120830396e-06, "loss": 17.8676, "step": 40923 }, { "epoch": 0.7480578353775568, "grad_norm": 5.5675305800953385, "learning_rate": 1.5743799834447089e-06, "loss": 17.2629, "step": 40924 }, { "epoch": 0.7480761145740034, "grad_norm": 4.9319230801097635, "learning_rate": 1.574164366812877e-06, "loss": 16.968, "step": 40925 }, { "epoch": 0.7480943937704498, "grad_norm": 6.098712492261519, "learning_rate": 1.5739487621883032e-06, "loss": 17.1885, "step": 40926 }, { "epoch": 0.7481126729668964, "grad_norm": 7.54775047228779, "learning_rate": 1.5737331695717417e-06, "loss": 17.608, "step": 40927 }, { "epoch": 0.7481309521633429, "grad_norm": 6.578783487469382, "learning_rate": 1.5735175889639465e-06, "loss": 17.3835, "step": 40928 }, { "epoch": 0.7481492313597894, "grad_norm": 7.4577419038458155, "learning_rate": 1.5733020203656761e-06, "loss": 17.9621, "step": 40929 }, { "epoch": 0.748167510556236, "grad_norm": 5.589897771132155, "learning_rate": 1.5730864637776838e-06, "loss": 17.5345, "step": 40930 }, { "epoch": 0.7481857897526825, "grad_norm": 6.007704064807278, "learning_rate": 1.5728709192007257e-06, "loss": 17.3284, "step": 40931 }, { "epoch": 0.7482040689491289, "grad_norm": 6.940038879166105, "learning_rate": 1.5726553866355599e-06, "loss": 17.5764, "step": 40932 }, { "epoch": 0.7482223481455755, "grad_norm": 4.835233597274392, "learning_rate": 1.5724398660829372e-06, "loss": 16.8551, "step": 40933 }, { "epoch": 0.748240627342022, "grad_norm": 4.713748715812055, "learning_rate": 1.5722243575436174e-06, "loss": 16.7637, "step": 40934 }, { "epoch": 0.7482589065384686, "grad_norm": 5.745690833224867, "learning_rate": 1.572008861018353e-06, "loss": 17.1705, "step": 40935 }, { "epoch": 0.7482771857349151, "grad_norm": 5.810719652364446, "learning_rate": 1.5717933765078996e-06, "loss": 17.1037, "step": 40936 }, { "epoch": 0.7482954649313616, "grad_norm": 7.210161059357115, "learning_rate": 1.5715779040130136e-06, "loss": 17.8493, "step": 40937 }, { "epoch": 0.7483137441278082, "grad_norm": 9.466778045047894, "learning_rate": 1.5713624435344483e-06, "loss": 18.2647, "step": 40938 }, { "epoch": 0.7483320233242546, "grad_norm": 6.411607135425822, "learning_rate": 1.5711469950729602e-06, "loss": 17.2437, "step": 40939 }, { "epoch": 0.7483503025207012, "grad_norm": 5.804436699030954, "learning_rate": 1.570931558629306e-06, "loss": 17.4289, "step": 40940 }, { "epoch": 0.7483685817171477, "grad_norm": 5.0658798518282815, "learning_rate": 1.5707161342042388e-06, "loss": 16.8245, "step": 40941 }, { "epoch": 0.7483868609135942, "grad_norm": 4.827503291479856, "learning_rate": 1.5705007217985124e-06, "loss": 16.8236, "step": 40942 }, { "epoch": 0.7484051401100408, "grad_norm": 6.870531989537262, "learning_rate": 1.5702853214128854e-06, "loss": 17.7572, "step": 40943 }, { "epoch": 0.7484234193064873, "grad_norm": 6.642656976772388, "learning_rate": 1.5700699330481105e-06, "loss": 17.6446, "step": 40944 }, { "epoch": 0.7484416985029338, "grad_norm": 4.7779595888336175, "learning_rate": 1.5698545567049412e-06, "loss": 17.1025, "step": 40945 }, { "epoch": 0.7484599776993803, "grad_norm": 6.120419942556234, "learning_rate": 1.5696391923841352e-06, "loss": 17.3123, "step": 40946 }, { "epoch": 0.7484782568958268, "grad_norm": 6.415461067106763, "learning_rate": 1.5694238400864453e-06, "loss": 17.2346, "step": 40947 }, { "epoch": 0.7484965360922734, "grad_norm": 5.784168401427398, "learning_rate": 1.5692084998126266e-06, "loss": 17.1633, "step": 40948 }, { "epoch": 0.7485148152887199, "grad_norm": 20.817699101894682, "learning_rate": 1.5689931715634366e-06, "loss": 17.864, "step": 40949 }, { "epoch": 0.7485330944851665, "grad_norm": 5.803462786403294, "learning_rate": 1.568777855339626e-06, "loss": 17.1958, "step": 40950 }, { "epoch": 0.748551373681613, "grad_norm": 6.148475499573833, "learning_rate": 1.5685625511419522e-06, "loss": 16.9816, "step": 40951 }, { "epoch": 0.7485696528780594, "grad_norm": 6.766880256475565, "learning_rate": 1.5683472589711696e-06, "loss": 17.6213, "step": 40952 }, { "epoch": 0.748587932074506, "grad_norm": 5.2521308408112395, "learning_rate": 1.5681319788280302e-06, "loss": 17.2647, "step": 40953 }, { "epoch": 0.7486062112709525, "grad_norm": 7.248817459117247, "learning_rate": 1.567916710713292e-06, "loss": 17.5305, "step": 40954 }, { "epoch": 0.7486244904673991, "grad_norm": 6.016380298658016, "learning_rate": 1.567701454627708e-06, "loss": 17.2872, "step": 40955 }, { "epoch": 0.7486427696638456, "grad_norm": 4.9920817514447275, "learning_rate": 1.5674862105720312e-06, "loss": 17.1219, "step": 40956 }, { "epoch": 0.748661048860292, "grad_norm": 5.714686417693165, "learning_rate": 1.567270978547017e-06, "loss": 17.2221, "step": 40957 }, { "epoch": 0.7486793280567386, "grad_norm": 6.217261825050668, "learning_rate": 1.567055758553422e-06, "loss": 17.3162, "step": 40958 }, { "epoch": 0.7486976072531851, "grad_norm": 5.293741168627559, "learning_rate": 1.566840550591997e-06, "loss": 17.1241, "step": 40959 }, { "epoch": 0.7487158864496317, "grad_norm": 6.087527323967476, "learning_rate": 1.5666253546634991e-06, "loss": 17.6214, "step": 40960 }, { "epoch": 0.7487341656460782, "grad_norm": 5.662866469859695, "learning_rate": 1.5664101707686818e-06, "loss": 17.2423, "step": 40961 }, { "epoch": 0.7487524448425247, "grad_norm": 6.320573541765228, "learning_rate": 1.5661949989082976e-06, "loss": 17.4592, "step": 40962 }, { "epoch": 0.7487707240389713, "grad_norm": 7.149849507701189, "learning_rate": 1.5659798390831032e-06, "loss": 17.4613, "step": 40963 }, { "epoch": 0.7487890032354177, "grad_norm": 6.467817846563046, "learning_rate": 1.5657646912938495e-06, "loss": 17.6623, "step": 40964 }, { "epoch": 0.7488072824318643, "grad_norm": 6.200388883487151, "learning_rate": 1.5655495555412947e-06, "loss": 17.6484, "step": 40965 }, { "epoch": 0.7488255616283108, "grad_norm": 6.7393583231826675, "learning_rate": 1.5653344318261893e-06, "loss": 17.5766, "step": 40966 }, { "epoch": 0.7488438408247573, "grad_norm": 5.13414414331925, "learning_rate": 1.5651193201492881e-06, "loss": 16.9737, "step": 40967 }, { "epoch": 0.7488621200212039, "grad_norm": 6.542934209266736, "learning_rate": 1.5649042205113473e-06, "loss": 17.6325, "step": 40968 }, { "epoch": 0.7488803992176504, "grad_norm": 6.882244517103911, "learning_rate": 1.5646891329131193e-06, "loss": 17.5758, "step": 40969 }, { "epoch": 0.748898678414097, "grad_norm": 5.855298865861787, "learning_rate": 1.5644740573553562e-06, "loss": 17.2368, "step": 40970 }, { "epoch": 0.7489169576105434, "grad_norm": 5.986553997961551, "learning_rate": 1.5642589938388148e-06, "loss": 17.1733, "step": 40971 }, { "epoch": 0.7489352368069899, "grad_norm": 6.743260574872756, "learning_rate": 1.5640439423642478e-06, "loss": 17.6578, "step": 40972 }, { "epoch": 0.7489535160034365, "grad_norm": 6.381249028479447, "learning_rate": 1.5638289029324067e-06, "loss": 17.5231, "step": 40973 }, { "epoch": 0.748971795199883, "grad_norm": 6.327487256477693, "learning_rate": 1.5636138755440488e-06, "loss": 17.5371, "step": 40974 }, { "epoch": 0.7489900743963296, "grad_norm": 8.89006301744268, "learning_rate": 1.563398860199925e-06, "loss": 18.191, "step": 40975 }, { "epoch": 0.7490083535927761, "grad_norm": 6.063754275541747, "learning_rate": 1.5631838569007895e-06, "loss": 17.6078, "step": 40976 }, { "epoch": 0.7490266327892225, "grad_norm": 5.285516244137486, "learning_rate": 1.562968865647398e-06, "loss": 17.195, "step": 40977 }, { "epoch": 0.7490449119856691, "grad_norm": 6.002907012748358, "learning_rate": 1.5627538864405007e-06, "loss": 17.1855, "step": 40978 }, { "epoch": 0.7490631911821156, "grad_norm": 4.671106100044694, "learning_rate": 1.5625389192808544e-06, "loss": 16.9632, "step": 40979 }, { "epoch": 0.7490814703785622, "grad_norm": 6.0903138835106905, "learning_rate": 1.5623239641692107e-06, "loss": 17.3933, "step": 40980 }, { "epoch": 0.7490997495750087, "grad_norm": 5.631687651688076, "learning_rate": 1.5621090211063217e-06, "loss": 17.3618, "step": 40981 }, { "epoch": 0.7491180287714552, "grad_norm": 6.784383830452639, "learning_rate": 1.561894090092944e-06, "loss": 17.2898, "step": 40982 }, { "epoch": 0.7491363079679018, "grad_norm": 6.200465391721002, "learning_rate": 1.561679171129829e-06, "loss": 17.3858, "step": 40983 }, { "epoch": 0.7491545871643482, "grad_norm": 5.695767924473415, "learning_rate": 1.5614642642177291e-06, "loss": 17.2356, "step": 40984 }, { "epoch": 0.7491728663607948, "grad_norm": 5.7335143183223325, "learning_rate": 1.5612493693573977e-06, "loss": 17.2334, "step": 40985 }, { "epoch": 0.7491911455572413, "grad_norm": 6.479554774120351, "learning_rate": 1.5610344865495913e-06, "loss": 17.1194, "step": 40986 }, { "epoch": 0.7492094247536878, "grad_norm": 6.31564583807405, "learning_rate": 1.5608196157950584e-06, "loss": 17.7565, "step": 40987 }, { "epoch": 0.7492277039501344, "grad_norm": 5.8411012513912555, "learning_rate": 1.560604757094556e-06, "loss": 17.1421, "step": 40988 }, { "epoch": 0.7492459831465809, "grad_norm": 6.849822631205751, "learning_rate": 1.5603899104488357e-06, "loss": 17.3517, "step": 40989 }, { "epoch": 0.7492642623430275, "grad_norm": 4.782563762765938, "learning_rate": 1.5601750758586487e-06, "loss": 16.7523, "step": 40990 }, { "epoch": 0.7492825415394739, "grad_norm": 6.677884141094864, "learning_rate": 1.559960253324751e-06, "loss": 17.361, "step": 40991 }, { "epoch": 0.7493008207359204, "grad_norm": 6.54357348381695, "learning_rate": 1.5597454428478924e-06, "loss": 17.5529, "step": 40992 }, { "epoch": 0.749319099932367, "grad_norm": 9.173050300276449, "learning_rate": 1.5595306444288293e-06, "loss": 17.4247, "step": 40993 }, { "epoch": 0.7493373791288135, "grad_norm": 7.277385003610119, "learning_rate": 1.559315858068311e-06, "loss": 17.73, "step": 40994 }, { "epoch": 0.7493556583252601, "grad_norm": 5.253476609222579, "learning_rate": 1.5591010837670921e-06, "loss": 17.2361, "step": 40995 }, { "epoch": 0.7493739375217066, "grad_norm": 4.659871666845555, "learning_rate": 1.558886321525927e-06, "loss": 16.7714, "step": 40996 }, { "epoch": 0.749392216718153, "grad_norm": 4.875707208732731, "learning_rate": 1.5586715713455663e-06, "loss": 16.9785, "step": 40997 }, { "epoch": 0.7494104959145996, "grad_norm": 7.175066624977356, "learning_rate": 1.558456833226762e-06, "loss": 17.6922, "step": 40998 }, { "epoch": 0.7494287751110461, "grad_norm": 7.414997254035676, "learning_rate": 1.5582421071702696e-06, "loss": 17.654, "step": 40999 }, { "epoch": 0.7494470543074926, "grad_norm": 5.353934553251223, "learning_rate": 1.5580273931768396e-06, "loss": 17.0758, "step": 41000 }, { "epoch": 0.7494653335039392, "grad_norm": 7.163728800314505, "learning_rate": 1.5578126912472236e-06, "loss": 17.5821, "step": 41001 }, { "epoch": 0.7494836127003857, "grad_norm": 5.777560172551722, "learning_rate": 1.557598001382175e-06, "loss": 17.1639, "step": 41002 }, { "epoch": 0.7495018918968323, "grad_norm": 11.111648040390053, "learning_rate": 1.5573833235824487e-06, "loss": 17.9229, "step": 41003 }, { "epoch": 0.7495201710932787, "grad_norm": 6.780318246800481, "learning_rate": 1.5571686578487933e-06, "loss": 17.2285, "step": 41004 }, { "epoch": 0.7495384502897252, "grad_norm": 6.195827952285246, "learning_rate": 1.5569540041819642e-06, "loss": 17.5536, "step": 41005 }, { "epoch": 0.7495567294861718, "grad_norm": 6.335039847873755, "learning_rate": 1.5567393625827133e-06, "loss": 17.6093, "step": 41006 }, { "epoch": 0.7495750086826183, "grad_norm": 6.026898265006499, "learning_rate": 1.5565247330517902e-06, "loss": 17.2177, "step": 41007 }, { "epoch": 0.7495932878790649, "grad_norm": 6.680225090268685, "learning_rate": 1.5563101155899508e-06, "loss": 17.6846, "step": 41008 }, { "epoch": 0.7496115670755114, "grad_norm": 4.920681587768043, "learning_rate": 1.5560955101979436e-06, "loss": 16.9292, "step": 41009 }, { "epoch": 0.7496298462719578, "grad_norm": 8.31185141691932, "learning_rate": 1.555880916876525e-06, "loss": 18.2071, "step": 41010 }, { "epoch": 0.7496481254684044, "grad_norm": 7.421188417589603, "learning_rate": 1.5556663356264429e-06, "loss": 17.9346, "step": 41011 }, { "epoch": 0.7496664046648509, "grad_norm": 5.560377535834733, "learning_rate": 1.5554517664484513e-06, "loss": 17.171, "step": 41012 }, { "epoch": 0.7496846838612975, "grad_norm": 8.425932823488969, "learning_rate": 1.5552372093433043e-06, "loss": 18.2224, "step": 41013 }, { "epoch": 0.749702963057744, "grad_norm": 7.2839615069321235, "learning_rate": 1.5550226643117517e-06, "loss": 17.5311, "step": 41014 }, { "epoch": 0.7497212422541905, "grad_norm": 6.391603675882262, "learning_rate": 1.5548081313545444e-06, "loss": 17.2211, "step": 41015 }, { "epoch": 0.749739521450637, "grad_norm": 6.75793903344983, "learning_rate": 1.5545936104724369e-06, "loss": 17.4745, "step": 41016 }, { "epoch": 0.7497578006470835, "grad_norm": 6.0907844067396395, "learning_rate": 1.5543791016661802e-06, "loss": 17.6145, "step": 41017 }, { "epoch": 0.7497760798435301, "grad_norm": 7.304006186588784, "learning_rate": 1.554164604936524e-06, "loss": 17.5268, "step": 41018 }, { "epoch": 0.7497943590399766, "grad_norm": 6.227282335631432, "learning_rate": 1.5539501202842234e-06, "loss": 17.2027, "step": 41019 }, { "epoch": 0.7498126382364231, "grad_norm": 6.529063611626983, "learning_rate": 1.553735647710027e-06, "loss": 17.0527, "step": 41020 }, { "epoch": 0.7498309174328697, "grad_norm": 5.823746616754846, "learning_rate": 1.5535211872146883e-06, "loss": 17.4053, "step": 41021 }, { "epoch": 0.7498491966293162, "grad_norm": 6.189520853821847, "learning_rate": 1.5533067387989597e-06, "loss": 17.5015, "step": 41022 }, { "epoch": 0.7498674758257627, "grad_norm": 7.75019104888262, "learning_rate": 1.5530923024635924e-06, "loss": 17.9184, "step": 41023 }, { "epoch": 0.7498857550222092, "grad_norm": 5.891614456468599, "learning_rate": 1.5528778782093361e-06, "loss": 17.1274, "step": 41024 }, { "epoch": 0.7499040342186557, "grad_norm": 8.803365854455691, "learning_rate": 1.552663466036945e-06, "loss": 18.1986, "step": 41025 }, { "epoch": 0.7499223134151023, "grad_norm": 7.430135015752508, "learning_rate": 1.5524490659471674e-06, "loss": 17.786, "step": 41026 }, { "epoch": 0.7499405926115488, "grad_norm": 7.093614537771097, "learning_rate": 1.5522346779407587e-06, "loss": 17.561, "step": 41027 }, { "epoch": 0.7499588718079954, "grad_norm": 6.779296179895143, "learning_rate": 1.5520203020184682e-06, "loss": 17.8123, "step": 41028 }, { "epoch": 0.7499771510044418, "grad_norm": 6.2892874691087925, "learning_rate": 1.5518059381810457e-06, "loss": 17.2984, "step": 41029 }, { "epoch": 0.7499954302008883, "grad_norm": 8.156174857441354, "learning_rate": 1.5515915864292442e-06, "loss": 17.4839, "step": 41030 }, { "epoch": 0.7500137093973349, "grad_norm": 6.639435684880543, "learning_rate": 1.5513772467638165e-06, "loss": 17.5763, "step": 41031 }, { "epoch": 0.7500319885937814, "grad_norm": 6.6521797140249745, "learning_rate": 1.551162919185511e-06, "loss": 17.5888, "step": 41032 }, { "epoch": 0.750050267790228, "grad_norm": 4.966119288238345, "learning_rate": 1.5509486036950817e-06, "loss": 16.9345, "step": 41033 }, { "epoch": 0.7500685469866745, "grad_norm": 6.703437702483862, "learning_rate": 1.550734300293278e-06, "loss": 17.5983, "step": 41034 }, { "epoch": 0.750086826183121, "grad_norm": 6.617659673572469, "learning_rate": 1.5505200089808497e-06, "loss": 17.6989, "step": 41035 }, { "epoch": 0.7501051053795675, "grad_norm": 5.191398088856678, "learning_rate": 1.5503057297585512e-06, "loss": 16.9631, "step": 41036 }, { "epoch": 0.750123384576014, "grad_norm": 6.666434693444504, "learning_rate": 1.5500914626271313e-06, "loss": 17.7025, "step": 41037 }, { "epoch": 0.7501416637724606, "grad_norm": 6.892229801979209, "learning_rate": 1.5498772075873403e-06, "loss": 17.628, "step": 41038 }, { "epoch": 0.7501599429689071, "grad_norm": 7.1163850152714385, "learning_rate": 1.5496629646399303e-06, "loss": 17.9431, "step": 41039 }, { "epoch": 0.7501782221653536, "grad_norm": 7.203159669420228, "learning_rate": 1.5494487337856517e-06, "loss": 17.8292, "step": 41040 }, { "epoch": 0.7501965013618002, "grad_norm": 5.7650276556113065, "learning_rate": 1.549234515025258e-06, "loss": 17.2315, "step": 41041 }, { "epoch": 0.7502147805582466, "grad_norm": 5.327358352629726, "learning_rate": 1.5490203083594978e-06, "loss": 17.0708, "step": 41042 }, { "epoch": 0.7502330597546932, "grad_norm": 6.4305770341791, "learning_rate": 1.5488061137891202e-06, "loss": 17.4654, "step": 41043 }, { "epoch": 0.7502513389511397, "grad_norm": 5.884467911598847, "learning_rate": 1.5485919313148796e-06, "loss": 17.4168, "step": 41044 }, { "epoch": 0.7502696181475862, "grad_norm": 5.707641338251753, "learning_rate": 1.5483777609375245e-06, "loss": 17.3229, "step": 41045 }, { "epoch": 0.7502878973440328, "grad_norm": 5.541813077320603, "learning_rate": 1.548163602657804e-06, "loss": 17.0626, "step": 41046 }, { "epoch": 0.7503061765404793, "grad_norm": 5.100054019269005, "learning_rate": 1.5479494564764724e-06, "loss": 16.9427, "step": 41047 }, { "epoch": 0.7503244557369259, "grad_norm": 6.521129070231783, "learning_rate": 1.547735322394277e-06, "loss": 17.4238, "step": 41048 }, { "epoch": 0.7503427349333723, "grad_norm": 6.506286094690759, "learning_rate": 1.5475212004119693e-06, "loss": 17.3143, "step": 41049 }, { "epoch": 0.7503610141298188, "grad_norm": 5.8801103909809225, "learning_rate": 1.5473070905303022e-06, "loss": 17.2186, "step": 41050 }, { "epoch": 0.7503792933262654, "grad_norm": 6.1131075506600725, "learning_rate": 1.547092992750024e-06, "loss": 17.0856, "step": 41051 }, { "epoch": 0.7503975725227119, "grad_norm": 6.774641453189911, "learning_rate": 1.5468789070718837e-06, "loss": 17.5103, "step": 41052 }, { "epoch": 0.7504158517191585, "grad_norm": 6.291887414923761, "learning_rate": 1.546664833496635e-06, "loss": 17.5054, "step": 41053 }, { "epoch": 0.750434130915605, "grad_norm": 6.024016461316672, "learning_rate": 1.5464507720250244e-06, "loss": 17.2553, "step": 41054 }, { "epoch": 0.7504524101120514, "grad_norm": 7.737707730540507, "learning_rate": 1.5462367226578057e-06, "loss": 17.941, "step": 41055 }, { "epoch": 0.750470689308498, "grad_norm": 7.9464096236361295, "learning_rate": 1.546022685395726e-06, "loss": 17.9072, "step": 41056 }, { "epoch": 0.7504889685049445, "grad_norm": 5.930940324102634, "learning_rate": 1.545808660239539e-06, "loss": 17.1609, "step": 41057 }, { "epoch": 0.7505072477013911, "grad_norm": 5.794491273631435, "learning_rate": 1.5455946471899912e-06, "loss": 17.2875, "step": 41058 }, { "epoch": 0.7505255268978376, "grad_norm": 5.962191781441762, "learning_rate": 1.545380646247836e-06, "loss": 17.0799, "step": 41059 }, { "epoch": 0.7505438060942841, "grad_norm": 8.09068687092675, "learning_rate": 1.5451666574138198e-06, "loss": 17.5775, "step": 41060 }, { "epoch": 0.7505620852907307, "grad_norm": 5.907756380633736, "learning_rate": 1.5449526806886966e-06, "loss": 17.2276, "step": 41061 }, { "epoch": 0.7505803644871771, "grad_norm": 5.321678420440363, "learning_rate": 1.5447387160732146e-06, "loss": 16.9653, "step": 41062 }, { "epoch": 0.7505986436836237, "grad_norm": 5.96051360593954, "learning_rate": 1.544524763568122e-06, "loss": 17.2728, "step": 41063 }, { "epoch": 0.7506169228800702, "grad_norm": 6.00263313423325, "learning_rate": 1.5443108231741715e-06, "loss": 17.224, "step": 41064 }, { "epoch": 0.7506352020765167, "grad_norm": 6.443239293294422, "learning_rate": 1.5440968948921103e-06, "loss": 17.5902, "step": 41065 }, { "epoch": 0.7506534812729633, "grad_norm": 6.410154371299142, "learning_rate": 1.5438829787226894e-06, "loss": 17.6275, "step": 41066 }, { "epoch": 0.7506717604694098, "grad_norm": 5.353648495410433, "learning_rate": 1.5436690746666604e-06, "loss": 17.3201, "step": 41067 }, { "epoch": 0.7506900396658562, "grad_norm": 6.700445248441012, "learning_rate": 1.5434551827247718e-06, "loss": 17.452, "step": 41068 }, { "epoch": 0.7507083188623028, "grad_norm": 8.722704027831163, "learning_rate": 1.543241302897771e-06, "loss": 18.5103, "step": 41069 }, { "epoch": 0.7507265980587493, "grad_norm": 6.4752609966511185, "learning_rate": 1.543027435186411e-06, "loss": 17.6267, "step": 41070 }, { "epoch": 0.7507448772551959, "grad_norm": 6.655454483909287, "learning_rate": 1.5428135795914383e-06, "loss": 17.6704, "step": 41071 }, { "epoch": 0.7507631564516424, "grad_norm": 5.28377532188177, "learning_rate": 1.5425997361136053e-06, "loss": 16.9087, "step": 41072 }, { "epoch": 0.7507814356480889, "grad_norm": 6.6101713001193945, "learning_rate": 1.5423859047536604e-06, "loss": 17.5617, "step": 41073 }, { "epoch": 0.7507997148445354, "grad_norm": 6.593609463758745, "learning_rate": 1.5421720855123517e-06, "loss": 17.092, "step": 41074 }, { "epoch": 0.7508179940409819, "grad_norm": 6.832944646480954, "learning_rate": 1.5419582783904291e-06, "loss": 17.6229, "step": 41075 }, { "epoch": 0.7508362732374285, "grad_norm": 7.179892684580424, "learning_rate": 1.5417444833886447e-06, "loss": 17.592, "step": 41076 }, { "epoch": 0.750854552433875, "grad_norm": 7.181863284976146, "learning_rate": 1.541530700507744e-06, "loss": 17.4349, "step": 41077 }, { "epoch": 0.7508728316303215, "grad_norm": 7.179076572156505, "learning_rate": 1.5413169297484799e-06, "loss": 17.5226, "step": 41078 }, { "epoch": 0.7508911108267681, "grad_norm": 4.999954235172758, "learning_rate": 1.5411031711115999e-06, "loss": 16.9755, "step": 41079 }, { "epoch": 0.7509093900232146, "grad_norm": 6.033526389701383, "learning_rate": 1.5408894245978512e-06, "loss": 17.161, "step": 41080 }, { "epoch": 0.7509276692196611, "grad_norm": 5.982271010400869, "learning_rate": 1.5406756902079866e-06, "loss": 17.2451, "step": 41081 }, { "epoch": 0.7509459484161076, "grad_norm": 6.6359430881539225, "learning_rate": 1.5404619679427535e-06, "loss": 17.9226, "step": 41082 }, { "epoch": 0.7509642276125541, "grad_norm": 8.157004497569364, "learning_rate": 1.5402482578028993e-06, "loss": 18.2292, "step": 41083 }, { "epoch": 0.7509825068090007, "grad_norm": 5.422886614294633, "learning_rate": 1.5400345597891753e-06, "loss": 16.9771, "step": 41084 }, { "epoch": 0.7510007860054472, "grad_norm": 5.371627380631836, "learning_rate": 1.539820873902329e-06, "loss": 17.3456, "step": 41085 }, { "epoch": 0.7510190652018938, "grad_norm": 6.901625604428491, "learning_rate": 1.539607200143113e-06, "loss": 17.9162, "step": 41086 }, { "epoch": 0.7510373443983402, "grad_norm": 6.921284715989307, "learning_rate": 1.5393935385122727e-06, "loss": 17.7425, "step": 41087 }, { "epoch": 0.7510556235947867, "grad_norm": 5.008823006582692, "learning_rate": 1.5391798890105557e-06, "loss": 17.0449, "step": 41088 }, { "epoch": 0.7510739027912333, "grad_norm": 4.921449051825758, "learning_rate": 1.5389662516387154e-06, "loss": 16.8203, "step": 41089 }, { "epoch": 0.7510921819876798, "grad_norm": 6.326172243315264, "learning_rate": 1.5387526263974978e-06, "loss": 17.5916, "step": 41090 }, { "epoch": 0.7511104611841264, "grad_norm": 6.770755970917344, "learning_rate": 1.5385390132876504e-06, "loss": 17.4204, "step": 41091 }, { "epoch": 0.7511287403805729, "grad_norm": 7.511777915926927, "learning_rate": 1.5383254123099245e-06, "loss": 17.7701, "step": 41092 }, { "epoch": 0.7511470195770193, "grad_norm": 6.215057939325763, "learning_rate": 1.5381118234650666e-06, "loss": 17.3142, "step": 41093 }, { "epoch": 0.7511652987734659, "grad_norm": 6.061498657711189, "learning_rate": 1.5378982467538262e-06, "loss": 17.4551, "step": 41094 }, { "epoch": 0.7511835779699124, "grad_norm": 6.773371635518726, "learning_rate": 1.537684682176953e-06, "loss": 17.5984, "step": 41095 }, { "epoch": 0.751201857166359, "grad_norm": 6.987450183619426, "learning_rate": 1.5374711297351953e-06, "loss": 17.6865, "step": 41096 }, { "epoch": 0.7512201363628055, "grad_norm": 6.9414031479863425, "learning_rate": 1.537257589429299e-06, "loss": 17.4281, "step": 41097 }, { "epoch": 0.751238415559252, "grad_norm": 5.954835210959624, "learning_rate": 1.5370440612600157e-06, "loss": 17.0227, "step": 41098 }, { "epoch": 0.7512566947556986, "grad_norm": 6.197856627297123, "learning_rate": 1.5368305452280928e-06, "loss": 17.2189, "step": 41099 }, { "epoch": 0.751274973952145, "grad_norm": 6.01918768512775, "learning_rate": 1.5366170413342768e-06, "loss": 17.23, "step": 41100 }, { "epoch": 0.7512932531485916, "grad_norm": 5.712552931909369, "learning_rate": 1.536403549579319e-06, "loss": 17.0317, "step": 41101 }, { "epoch": 0.7513115323450381, "grad_norm": 4.828623966544974, "learning_rate": 1.5361900699639642e-06, "loss": 16.9232, "step": 41102 }, { "epoch": 0.7513298115414846, "grad_norm": 5.985036641820863, "learning_rate": 1.5359766024889634e-06, "loss": 17.1977, "step": 41103 }, { "epoch": 0.7513480907379312, "grad_norm": 7.4871436441881345, "learning_rate": 1.5357631471550649e-06, "loss": 17.6178, "step": 41104 }, { "epoch": 0.7513663699343777, "grad_norm": 8.887005387110664, "learning_rate": 1.5355497039630146e-06, "loss": 17.7462, "step": 41105 }, { "epoch": 0.7513846491308243, "grad_norm": 4.686304139012301, "learning_rate": 1.535336272913564e-06, "loss": 16.8417, "step": 41106 }, { "epoch": 0.7514029283272707, "grad_norm": 11.131767844275783, "learning_rate": 1.5351228540074586e-06, "loss": 18.4367, "step": 41107 }, { "epoch": 0.7514212075237172, "grad_norm": 6.037951344233981, "learning_rate": 1.5349094472454456e-06, "loss": 17.2663, "step": 41108 }, { "epoch": 0.7514394867201638, "grad_norm": 5.216151783893577, "learning_rate": 1.5346960526282756e-06, "loss": 16.8939, "step": 41109 }, { "epoch": 0.7514577659166103, "grad_norm": 6.82307325040693, "learning_rate": 1.5344826701566957e-06, "loss": 17.415, "step": 41110 }, { "epoch": 0.7514760451130569, "grad_norm": 7.074653881525121, "learning_rate": 1.5342692998314511e-06, "loss": 17.6756, "step": 41111 }, { "epoch": 0.7514943243095034, "grad_norm": 5.216093182871901, "learning_rate": 1.5340559416532924e-06, "loss": 17.0234, "step": 41112 }, { "epoch": 0.7515126035059498, "grad_norm": 5.749969870073401, "learning_rate": 1.533842595622968e-06, "loss": 17.3135, "step": 41113 }, { "epoch": 0.7515308827023964, "grad_norm": 6.595714766374446, "learning_rate": 1.5336292617412235e-06, "loss": 17.5955, "step": 41114 }, { "epoch": 0.7515491618988429, "grad_norm": 7.211282165828798, "learning_rate": 1.533415940008809e-06, "loss": 17.5654, "step": 41115 }, { "epoch": 0.7515674410952895, "grad_norm": 8.308482777310719, "learning_rate": 1.5332026304264686e-06, "loss": 17.921, "step": 41116 }, { "epoch": 0.751585720291736, "grad_norm": 5.2207936517784885, "learning_rate": 1.5329893329949546e-06, "loss": 17.0523, "step": 41117 }, { "epoch": 0.7516039994881825, "grad_norm": 6.060849703560399, "learning_rate": 1.5327760477150116e-06, "loss": 17.232, "step": 41118 }, { "epoch": 0.751622278684629, "grad_norm": 6.446041356137718, "learning_rate": 1.532562774587386e-06, "loss": 17.6197, "step": 41119 }, { "epoch": 0.7516405578810755, "grad_norm": 5.524854198875579, "learning_rate": 1.532349513612829e-06, "loss": 17.1414, "step": 41120 }, { "epoch": 0.7516588370775221, "grad_norm": 6.2450216137178405, "learning_rate": 1.5321362647920846e-06, "loss": 17.3117, "step": 41121 }, { "epoch": 0.7516771162739686, "grad_norm": 5.559742545064422, "learning_rate": 1.5319230281259013e-06, "loss": 17.2136, "step": 41122 }, { "epoch": 0.7516953954704151, "grad_norm": 5.409302363588411, "learning_rate": 1.5317098036150285e-06, "loss": 16.7279, "step": 41123 }, { "epoch": 0.7517136746668617, "grad_norm": 6.288469145948976, "learning_rate": 1.531496591260212e-06, "loss": 17.3817, "step": 41124 }, { "epoch": 0.7517319538633082, "grad_norm": 6.1322357573137305, "learning_rate": 1.531283391062197e-06, "loss": 17.4216, "step": 41125 }, { "epoch": 0.7517502330597547, "grad_norm": 7.028135156555811, "learning_rate": 1.5310702030217345e-06, "loss": 17.7772, "step": 41126 }, { "epoch": 0.7517685122562012, "grad_norm": 6.6519307272451496, "learning_rate": 1.5308570271395702e-06, "loss": 17.6077, "step": 41127 }, { "epoch": 0.7517867914526477, "grad_norm": 7.3733660202204305, "learning_rate": 1.5306438634164488e-06, "loss": 17.7911, "step": 41128 }, { "epoch": 0.7518050706490943, "grad_norm": 6.275150737637258, "learning_rate": 1.5304307118531203e-06, "loss": 17.2672, "step": 41129 }, { "epoch": 0.7518233498455408, "grad_norm": 6.417031060280934, "learning_rate": 1.530217572450332e-06, "loss": 17.0919, "step": 41130 }, { "epoch": 0.7518416290419874, "grad_norm": 6.654065249293044, "learning_rate": 1.530004445208829e-06, "loss": 17.7913, "step": 41131 }, { "epoch": 0.7518599082384338, "grad_norm": 5.041421099693464, "learning_rate": 1.529791330129361e-06, "loss": 16.989, "step": 41132 }, { "epoch": 0.7518781874348803, "grad_norm": 5.8038232506024, "learning_rate": 1.5295782272126707e-06, "loss": 17.1957, "step": 41133 }, { "epoch": 0.7518964666313269, "grad_norm": 6.676931595425232, "learning_rate": 1.5293651364595102e-06, "loss": 17.5007, "step": 41134 }, { "epoch": 0.7519147458277734, "grad_norm": 5.787756677367625, "learning_rate": 1.5291520578706232e-06, "loss": 16.9398, "step": 41135 }, { "epoch": 0.7519330250242199, "grad_norm": 5.917751872811609, "learning_rate": 1.5289389914467556e-06, "loss": 17.1185, "step": 41136 }, { "epoch": 0.7519513042206665, "grad_norm": 5.646235670967495, "learning_rate": 1.528725937188657e-06, "loss": 16.9055, "step": 41137 }, { "epoch": 0.751969583417113, "grad_norm": 7.3241135884614, "learning_rate": 1.5285128950970718e-06, "loss": 17.7388, "step": 41138 }, { "epoch": 0.7519878626135595, "grad_norm": 6.088613158508726, "learning_rate": 1.5282998651727472e-06, "loss": 17.4962, "step": 41139 }, { "epoch": 0.752006141810006, "grad_norm": 6.523023487391856, "learning_rate": 1.5280868474164317e-06, "loss": 17.6241, "step": 41140 }, { "epoch": 0.7520244210064525, "grad_norm": 14.320852600361968, "learning_rate": 1.5278738418288708e-06, "loss": 18.1069, "step": 41141 }, { "epoch": 0.7520427002028991, "grad_norm": 5.436399501467882, "learning_rate": 1.5276608484108091e-06, "loss": 17.0826, "step": 41142 }, { "epoch": 0.7520609793993456, "grad_norm": 5.610340459837423, "learning_rate": 1.5274478671629967e-06, "loss": 17.2115, "step": 41143 }, { "epoch": 0.7520792585957922, "grad_norm": 6.291162275248139, "learning_rate": 1.527234898086178e-06, "loss": 17.1199, "step": 41144 }, { "epoch": 0.7520975377922386, "grad_norm": 5.526532863864938, "learning_rate": 1.5270219411810978e-06, "loss": 16.9906, "step": 41145 }, { "epoch": 0.7521158169886851, "grad_norm": 5.152975718811221, "learning_rate": 1.526808996448506e-06, "loss": 16.9069, "step": 41146 }, { "epoch": 0.7521340961851317, "grad_norm": 6.142709675811007, "learning_rate": 1.5265960638891459e-06, "loss": 17.2996, "step": 41147 }, { "epoch": 0.7521523753815782, "grad_norm": 6.447465315235456, "learning_rate": 1.526383143503765e-06, "loss": 17.4228, "step": 41148 }, { "epoch": 0.7521706545780248, "grad_norm": 7.680656329249032, "learning_rate": 1.5261702352931113e-06, "loss": 17.6225, "step": 41149 }, { "epoch": 0.7521889337744713, "grad_norm": 6.9571865687154295, "learning_rate": 1.525957339257928e-06, "loss": 17.4195, "step": 41150 }, { "epoch": 0.7522072129709177, "grad_norm": 5.841983762570868, "learning_rate": 1.5257444553989647e-06, "loss": 17.2632, "step": 41151 }, { "epoch": 0.7522254921673643, "grad_norm": 5.828916741857342, "learning_rate": 1.5255315837169648e-06, "loss": 17.1656, "step": 41152 }, { "epoch": 0.7522437713638108, "grad_norm": 6.543338880753201, "learning_rate": 1.5253187242126743e-06, "loss": 17.4747, "step": 41153 }, { "epoch": 0.7522620505602574, "grad_norm": 6.477644689247904, "learning_rate": 1.5251058768868415e-06, "loss": 17.6272, "step": 41154 }, { "epoch": 0.7522803297567039, "grad_norm": 7.1079388039865705, "learning_rate": 1.5248930417402114e-06, "loss": 17.7265, "step": 41155 }, { "epoch": 0.7522986089531504, "grad_norm": 5.764062552646001, "learning_rate": 1.5246802187735277e-06, "loss": 17.5036, "step": 41156 }, { "epoch": 0.752316888149597, "grad_norm": 6.166639080440372, "learning_rate": 1.5244674079875381e-06, "loss": 17.0246, "step": 41157 }, { "epoch": 0.7523351673460434, "grad_norm": 6.094354124853628, "learning_rate": 1.5242546093829901e-06, "loss": 17.0788, "step": 41158 }, { "epoch": 0.75235344654249, "grad_norm": 5.8114272272519925, "learning_rate": 1.5240418229606269e-06, "loss": 17.1371, "step": 41159 }, { "epoch": 0.7523717257389365, "grad_norm": 5.031302054067947, "learning_rate": 1.5238290487211971e-06, "loss": 16.8827, "step": 41160 }, { "epoch": 0.752390004935383, "grad_norm": 4.882147167517226, "learning_rate": 1.5236162866654447e-06, "loss": 16.9306, "step": 41161 }, { "epoch": 0.7524082841318296, "grad_norm": 5.156589231532808, "learning_rate": 1.5234035367941142e-06, "loss": 16.978, "step": 41162 }, { "epoch": 0.7524265633282761, "grad_norm": 6.117588100055014, "learning_rate": 1.5231907991079536e-06, "loss": 17.5168, "step": 41163 }, { "epoch": 0.7524448425247227, "grad_norm": 8.886994565909625, "learning_rate": 1.5229780736077066e-06, "loss": 18.1723, "step": 41164 }, { "epoch": 0.7524631217211691, "grad_norm": 5.519921622493274, "learning_rate": 1.5227653602941212e-06, "loss": 17.0262, "step": 41165 }, { "epoch": 0.7524814009176156, "grad_norm": 7.6977191542837575, "learning_rate": 1.5225526591679397e-06, "loss": 18.3319, "step": 41166 }, { "epoch": 0.7524996801140622, "grad_norm": 6.520825310254104, "learning_rate": 1.522339970229909e-06, "loss": 18.2221, "step": 41167 }, { "epoch": 0.7525179593105087, "grad_norm": 5.4347885037009185, "learning_rate": 1.5221272934807774e-06, "loss": 17.098, "step": 41168 }, { "epoch": 0.7525362385069553, "grad_norm": 5.602829176840589, "learning_rate": 1.5219146289212871e-06, "loss": 17.1792, "step": 41169 }, { "epoch": 0.7525545177034018, "grad_norm": 8.073914525939465, "learning_rate": 1.5217019765521828e-06, "loss": 17.7768, "step": 41170 }, { "epoch": 0.7525727968998482, "grad_norm": 5.384690467914021, "learning_rate": 1.5214893363742128e-06, "loss": 16.9137, "step": 41171 }, { "epoch": 0.7525910760962948, "grad_norm": 5.7167497891215655, "learning_rate": 1.5212767083881215e-06, "loss": 17.373, "step": 41172 }, { "epoch": 0.7526093552927413, "grad_norm": 5.900466051061728, "learning_rate": 1.5210640925946507e-06, "loss": 16.9291, "step": 41173 }, { "epoch": 0.7526276344891879, "grad_norm": 6.397409737788522, "learning_rate": 1.5208514889945508e-06, "loss": 17.2523, "step": 41174 }, { "epoch": 0.7526459136856344, "grad_norm": 5.341578381878657, "learning_rate": 1.5206388975885628e-06, "loss": 17.1112, "step": 41175 }, { "epoch": 0.7526641928820809, "grad_norm": 6.69965106470725, "learning_rate": 1.520426318377433e-06, "loss": 17.5233, "step": 41176 }, { "epoch": 0.7526824720785275, "grad_norm": 5.431381429402599, "learning_rate": 1.520213751361909e-06, "loss": 17.2065, "step": 41177 }, { "epoch": 0.7527007512749739, "grad_norm": 6.312597495231416, "learning_rate": 1.5200011965427325e-06, "loss": 17.2762, "step": 41178 }, { "epoch": 0.7527190304714205, "grad_norm": 7.4885347474389965, "learning_rate": 1.5197886539206514e-06, "loss": 17.9254, "step": 41179 }, { "epoch": 0.752737309667867, "grad_norm": 8.124445724819944, "learning_rate": 1.519576123496409e-06, "loss": 17.9021, "step": 41180 }, { "epoch": 0.7527555888643135, "grad_norm": 6.684061853656602, "learning_rate": 1.5193636052707484e-06, "loss": 17.6406, "step": 41181 }, { "epoch": 0.7527738680607601, "grad_norm": 6.4482083477251155, "learning_rate": 1.519151099244418e-06, "loss": 17.4161, "step": 41182 }, { "epoch": 0.7527921472572066, "grad_norm": 6.215881169455927, "learning_rate": 1.5189386054181598e-06, "loss": 17.4705, "step": 41183 }, { "epoch": 0.7528104264536531, "grad_norm": 6.566928803439464, "learning_rate": 1.518726123792721e-06, "loss": 17.5424, "step": 41184 }, { "epoch": 0.7528287056500996, "grad_norm": 7.0682290320379995, "learning_rate": 1.5185136543688434e-06, "loss": 17.6477, "step": 41185 }, { "epoch": 0.7528469848465461, "grad_norm": 6.397289256991969, "learning_rate": 1.5183011971472744e-06, "loss": 17.4913, "step": 41186 }, { "epoch": 0.7528652640429927, "grad_norm": 5.723639456261607, "learning_rate": 1.5180887521287568e-06, "loss": 17.2875, "step": 41187 }, { "epoch": 0.7528835432394392, "grad_norm": 6.292784354497996, "learning_rate": 1.5178763193140373e-06, "loss": 17.2265, "step": 41188 }, { "epoch": 0.7529018224358858, "grad_norm": 6.698449954594381, "learning_rate": 1.517663898703859e-06, "loss": 17.6805, "step": 41189 }, { "epoch": 0.7529201016323323, "grad_norm": 5.995581435833416, "learning_rate": 1.517451490298965e-06, "loss": 17.4863, "step": 41190 }, { "epoch": 0.7529383808287787, "grad_norm": 7.306954174070144, "learning_rate": 1.517239094100103e-06, "loss": 17.6748, "step": 41191 }, { "epoch": 0.7529566600252253, "grad_norm": 6.1498737418782135, "learning_rate": 1.5170267101080144e-06, "loss": 17.3366, "step": 41192 }, { "epoch": 0.7529749392216718, "grad_norm": 6.7423620801249315, "learning_rate": 1.5168143383234446e-06, "loss": 17.6196, "step": 41193 }, { "epoch": 0.7529932184181184, "grad_norm": 6.750461485845144, "learning_rate": 1.5166019787471398e-06, "loss": 17.8086, "step": 41194 }, { "epoch": 0.7530114976145649, "grad_norm": 6.627717857965208, "learning_rate": 1.5163896313798415e-06, "loss": 17.6315, "step": 41195 }, { "epoch": 0.7530297768110114, "grad_norm": 6.7258617429491725, "learning_rate": 1.5161772962222964e-06, "loss": 17.4373, "step": 41196 }, { "epoch": 0.7530480560074579, "grad_norm": 7.034950452250524, "learning_rate": 1.5159649732752479e-06, "loss": 17.8832, "step": 41197 }, { "epoch": 0.7530663352039044, "grad_norm": 7.044577270424997, "learning_rate": 1.5157526625394386e-06, "loss": 17.7936, "step": 41198 }, { "epoch": 0.753084614400351, "grad_norm": 6.556075358514991, "learning_rate": 1.5155403640156147e-06, "loss": 17.5585, "step": 41199 }, { "epoch": 0.7531028935967975, "grad_norm": 5.344366185329292, "learning_rate": 1.5153280777045199e-06, "loss": 17.0432, "step": 41200 }, { "epoch": 0.753121172793244, "grad_norm": 6.660611096638573, "learning_rate": 1.5151158036068963e-06, "loss": 17.8088, "step": 41201 }, { "epoch": 0.7531394519896906, "grad_norm": 7.416512843545907, "learning_rate": 1.514903541723489e-06, "loss": 17.9003, "step": 41202 }, { "epoch": 0.753157731186137, "grad_norm": 5.834645492361996, "learning_rate": 1.5146912920550445e-06, "loss": 17.104, "step": 41203 }, { "epoch": 0.7531760103825835, "grad_norm": 7.634715603920438, "learning_rate": 1.5144790546023026e-06, "loss": 17.8293, "step": 41204 }, { "epoch": 0.7531942895790301, "grad_norm": 7.731566269318762, "learning_rate": 1.5142668293660107e-06, "loss": 17.3064, "step": 41205 }, { "epoch": 0.7532125687754766, "grad_norm": 6.111912943672134, "learning_rate": 1.5140546163469112e-06, "loss": 17.3718, "step": 41206 }, { "epoch": 0.7532308479719232, "grad_norm": 6.843218463776901, "learning_rate": 1.5138424155457465e-06, "loss": 17.538, "step": 41207 }, { "epoch": 0.7532491271683697, "grad_norm": 5.357258515514301, "learning_rate": 1.513630226963263e-06, "loss": 17.2172, "step": 41208 }, { "epoch": 0.7532674063648161, "grad_norm": 5.452479549461236, "learning_rate": 1.513418050600201e-06, "loss": 17.0384, "step": 41209 }, { "epoch": 0.7532856855612627, "grad_norm": 6.011355877976757, "learning_rate": 1.5132058864573086e-06, "loss": 17.4147, "step": 41210 }, { "epoch": 0.7533039647577092, "grad_norm": 7.944645137505278, "learning_rate": 1.5129937345353247e-06, "loss": 18.1849, "step": 41211 }, { "epoch": 0.7533222439541558, "grad_norm": 5.838002667934671, "learning_rate": 1.512781594834995e-06, "loss": 17.1513, "step": 41212 }, { "epoch": 0.7533405231506023, "grad_norm": 7.90412972522825, "learning_rate": 1.5125694673570657e-06, "loss": 18.2096, "step": 41213 }, { "epoch": 0.7533588023470488, "grad_norm": 5.6961173942607815, "learning_rate": 1.5123573521022771e-06, "loss": 17.3024, "step": 41214 }, { "epoch": 0.7533770815434954, "grad_norm": 6.245241132231145, "learning_rate": 1.5121452490713717e-06, "loss": 17.3814, "step": 41215 }, { "epoch": 0.7533953607399418, "grad_norm": 6.496777907332859, "learning_rate": 1.5119331582650964e-06, "loss": 17.6136, "step": 41216 }, { "epoch": 0.7534136399363884, "grad_norm": 5.744520119290989, "learning_rate": 1.5117210796841925e-06, "loss": 17.3809, "step": 41217 }, { "epoch": 0.7534319191328349, "grad_norm": 7.08462201610992, "learning_rate": 1.5115090133294025e-06, "loss": 17.5792, "step": 41218 }, { "epoch": 0.7534501983292814, "grad_norm": 4.852949032099081, "learning_rate": 1.5112969592014714e-06, "loss": 16.6928, "step": 41219 }, { "epoch": 0.753468477525728, "grad_norm": 5.000145618026132, "learning_rate": 1.511084917301141e-06, "loss": 16.8727, "step": 41220 }, { "epoch": 0.7534867567221745, "grad_norm": 6.3613406300926885, "learning_rate": 1.5108728876291545e-06, "loss": 17.3443, "step": 41221 }, { "epoch": 0.7535050359186211, "grad_norm": 6.069942766260492, "learning_rate": 1.510660870186258e-06, "loss": 17.4099, "step": 41222 }, { "epoch": 0.7535233151150675, "grad_norm": 6.119917642193498, "learning_rate": 1.5104488649731902e-06, "loss": 17.4071, "step": 41223 }, { "epoch": 0.753541594311514, "grad_norm": 6.144450053082261, "learning_rate": 1.510236871990698e-06, "loss": 17.5691, "step": 41224 }, { "epoch": 0.7535598735079606, "grad_norm": 5.934343757548519, "learning_rate": 1.5100248912395233e-06, "loss": 17.3924, "step": 41225 }, { "epoch": 0.7535781527044071, "grad_norm": 7.4851318189124365, "learning_rate": 1.5098129227204062e-06, "loss": 17.937, "step": 41226 }, { "epoch": 0.7535964319008537, "grad_norm": 7.791147716083776, "learning_rate": 1.5096009664340938e-06, "loss": 18.3317, "step": 41227 }, { "epoch": 0.7536147110973002, "grad_norm": 4.877666852198554, "learning_rate": 1.5093890223813273e-06, "loss": 16.6939, "step": 41228 }, { "epoch": 0.7536329902937466, "grad_norm": 6.020670814104849, "learning_rate": 1.5091770905628479e-06, "loss": 17.1958, "step": 41229 }, { "epoch": 0.7536512694901932, "grad_norm": 6.438240132209059, "learning_rate": 1.508965170979399e-06, "loss": 16.9578, "step": 41230 }, { "epoch": 0.7536695486866397, "grad_norm": 5.766169578260943, "learning_rate": 1.5087532636317265e-06, "loss": 17.2503, "step": 41231 }, { "epoch": 0.7536878278830863, "grad_norm": 5.4204352896122785, "learning_rate": 1.5085413685205697e-06, "loss": 17.103, "step": 41232 }, { "epoch": 0.7537061070795328, "grad_norm": 5.525392097910439, "learning_rate": 1.508329485646673e-06, "loss": 17.2619, "step": 41233 }, { "epoch": 0.7537243862759793, "grad_norm": 4.855845017425574, "learning_rate": 1.508117615010779e-06, "loss": 16.8313, "step": 41234 }, { "epoch": 0.7537426654724259, "grad_norm": 7.021109899408136, "learning_rate": 1.5079057566136285e-06, "loss": 17.6869, "step": 41235 }, { "epoch": 0.7537609446688723, "grad_norm": 5.625414016206704, "learning_rate": 1.5076939104559662e-06, "loss": 17.1629, "step": 41236 }, { "epoch": 0.7537792238653189, "grad_norm": 5.993566950827655, "learning_rate": 1.507482076538534e-06, "loss": 17.5575, "step": 41237 }, { "epoch": 0.7537975030617654, "grad_norm": 6.959263906559463, "learning_rate": 1.5072702548620727e-06, "loss": 17.3728, "step": 41238 }, { "epoch": 0.7538157822582119, "grad_norm": 7.350024595629285, "learning_rate": 1.5070584454273257e-06, "loss": 17.8134, "step": 41239 }, { "epoch": 0.7538340614546585, "grad_norm": 6.729714305049665, "learning_rate": 1.5068466482350357e-06, "loss": 17.4397, "step": 41240 }, { "epoch": 0.753852340651105, "grad_norm": 9.247610247286381, "learning_rate": 1.5066348632859474e-06, "loss": 18.2841, "step": 41241 }, { "epoch": 0.7538706198475515, "grad_norm": 5.729919490269679, "learning_rate": 1.5064230905807997e-06, "loss": 17.2999, "step": 41242 }, { "epoch": 0.753888899043998, "grad_norm": 6.326177698751819, "learning_rate": 1.506211330120335e-06, "loss": 17.2764, "step": 41243 }, { "epoch": 0.7539071782404445, "grad_norm": 6.398487097859005, "learning_rate": 1.5059995819052981e-06, "loss": 17.6167, "step": 41244 }, { "epoch": 0.7539254574368911, "grad_norm": 7.244645202060686, "learning_rate": 1.505787845936429e-06, "loss": 17.9474, "step": 41245 }, { "epoch": 0.7539437366333376, "grad_norm": 7.253244065825374, "learning_rate": 1.5055761222144688e-06, "loss": 17.6825, "step": 41246 }, { "epoch": 0.7539620158297842, "grad_norm": 6.251221311321231, "learning_rate": 1.5053644107401627e-06, "loss": 17.2384, "step": 41247 }, { "epoch": 0.7539802950262307, "grad_norm": 5.881675018017944, "learning_rate": 1.50515271151425e-06, "loss": 17.4704, "step": 41248 }, { "epoch": 0.7539985742226771, "grad_norm": 6.335321155274126, "learning_rate": 1.5049410245374728e-06, "loss": 17.2854, "step": 41249 }, { "epoch": 0.7540168534191237, "grad_norm": 6.295081571384569, "learning_rate": 1.5047293498105765e-06, "loss": 17.4391, "step": 41250 }, { "epoch": 0.7540351326155702, "grad_norm": 6.35077458096857, "learning_rate": 1.5045176873343005e-06, "loss": 17.3092, "step": 41251 }, { "epoch": 0.7540534118120168, "grad_norm": 5.889925852678507, "learning_rate": 1.5043060371093843e-06, "loss": 17.4515, "step": 41252 }, { "epoch": 0.7540716910084633, "grad_norm": 6.707115013561756, "learning_rate": 1.5040943991365741e-06, "loss": 17.6895, "step": 41253 }, { "epoch": 0.7540899702049098, "grad_norm": 5.27965611896706, "learning_rate": 1.5038827734166083e-06, "loss": 17.0556, "step": 41254 }, { "epoch": 0.7541082494013563, "grad_norm": 7.3972782941867745, "learning_rate": 1.5036711599502312e-06, "loss": 17.7864, "step": 41255 }, { "epoch": 0.7541265285978028, "grad_norm": 5.661116166997626, "learning_rate": 1.5034595587381824e-06, "loss": 17.2772, "step": 41256 }, { "epoch": 0.7541448077942494, "grad_norm": 5.945766360700811, "learning_rate": 1.5032479697812053e-06, "loss": 17.5046, "step": 41257 }, { "epoch": 0.7541630869906959, "grad_norm": 4.787242708341953, "learning_rate": 1.5030363930800396e-06, "loss": 16.9091, "step": 41258 }, { "epoch": 0.7541813661871424, "grad_norm": 6.04105115982868, "learning_rate": 1.5028248286354292e-06, "loss": 17.34, "step": 41259 }, { "epoch": 0.754199645383589, "grad_norm": 6.992102941280726, "learning_rate": 1.502613276448112e-06, "loss": 17.6054, "step": 41260 }, { "epoch": 0.7542179245800354, "grad_norm": 5.545957276020598, "learning_rate": 1.5024017365188343e-06, "loss": 17.0927, "step": 41261 }, { "epoch": 0.754236203776482, "grad_norm": 6.359526885925568, "learning_rate": 1.5021902088483347e-06, "loss": 17.5301, "step": 41262 }, { "epoch": 0.7542544829729285, "grad_norm": 6.866367825850577, "learning_rate": 1.5019786934373537e-06, "loss": 17.5038, "step": 41263 }, { "epoch": 0.754272762169375, "grad_norm": 4.973545948125289, "learning_rate": 1.5017671902866348e-06, "loss": 16.9251, "step": 41264 }, { "epoch": 0.7542910413658216, "grad_norm": 7.957073645628462, "learning_rate": 1.5015556993969172e-06, "loss": 17.8927, "step": 41265 }, { "epoch": 0.7543093205622681, "grad_norm": 4.91402066926131, "learning_rate": 1.5013442207689433e-06, "loss": 16.9944, "step": 41266 }, { "epoch": 0.7543275997587147, "grad_norm": 6.002816936735603, "learning_rate": 1.5011327544034555e-06, "loss": 17.4526, "step": 41267 }, { "epoch": 0.7543458789551611, "grad_norm": 5.731933732330776, "learning_rate": 1.5009213003011942e-06, "loss": 16.8892, "step": 41268 }, { "epoch": 0.7543641581516076, "grad_norm": 6.791619565393836, "learning_rate": 1.5007098584628983e-06, "loss": 17.3143, "step": 41269 }, { "epoch": 0.7543824373480542, "grad_norm": 7.707396503473408, "learning_rate": 1.500498428889312e-06, "loss": 17.8368, "step": 41270 }, { "epoch": 0.7544007165445007, "grad_norm": 7.389427977050841, "learning_rate": 1.5002870115811735e-06, "loss": 17.3905, "step": 41271 }, { "epoch": 0.7544189957409472, "grad_norm": 8.101334273396322, "learning_rate": 1.5000756065392274e-06, "loss": 17.8474, "step": 41272 }, { "epoch": 0.7544372749373938, "grad_norm": 7.368590390383016, "learning_rate": 1.4998642137642123e-06, "loss": 17.9844, "step": 41273 }, { "epoch": 0.7544555541338402, "grad_norm": 6.4865472928555095, "learning_rate": 1.4996528332568677e-06, "loss": 17.2899, "step": 41274 }, { "epoch": 0.7544738333302868, "grad_norm": 5.477301447766436, "learning_rate": 1.4994414650179367e-06, "loss": 17.0563, "step": 41275 }, { "epoch": 0.7544921125267333, "grad_norm": 7.009225040880352, "learning_rate": 1.4992301090481608e-06, "loss": 17.7092, "step": 41276 }, { "epoch": 0.7545103917231798, "grad_norm": 7.632246228733642, "learning_rate": 1.4990187653482779e-06, "loss": 17.9517, "step": 41277 }, { "epoch": 0.7545286709196264, "grad_norm": 7.127968623879636, "learning_rate": 1.4988074339190324e-06, "loss": 17.5454, "step": 41278 }, { "epoch": 0.7545469501160729, "grad_norm": 7.194147450454214, "learning_rate": 1.4985961147611627e-06, "loss": 17.2702, "step": 41279 }, { "epoch": 0.7545652293125195, "grad_norm": 6.183069664774602, "learning_rate": 1.498384807875408e-06, "loss": 17.4805, "step": 41280 }, { "epoch": 0.7545835085089659, "grad_norm": 5.6086171981764235, "learning_rate": 1.4981735132625125e-06, "loss": 16.9509, "step": 41281 }, { "epoch": 0.7546017877054124, "grad_norm": 8.698262246787449, "learning_rate": 1.497962230923215e-06, "loss": 18.5596, "step": 41282 }, { "epoch": 0.754620066901859, "grad_norm": 6.078216307312392, "learning_rate": 1.4977509608582547e-06, "loss": 17.2245, "step": 41283 }, { "epoch": 0.7546383460983055, "grad_norm": 6.224759662771259, "learning_rate": 1.4975397030683731e-06, "loss": 17.3699, "step": 41284 }, { "epoch": 0.7546566252947521, "grad_norm": 6.813887027674982, "learning_rate": 1.4973284575543107e-06, "loss": 17.6942, "step": 41285 }, { "epoch": 0.7546749044911986, "grad_norm": 8.049283557901557, "learning_rate": 1.4971172243168096e-06, "loss": 17.659, "step": 41286 }, { "epoch": 0.754693183687645, "grad_norm": 8.084359214110876, "learning_rate": 1.496906003356609e-06, "loss": 17.8954, "step": 41287 }, { "epoch": 0.7547114628840916, "grad_norm": 6.451564727922354, "learning_rate": 1.4966947946744476e-06, "loss": 17.3856, "step": 41288 }, { "epoch": 0.7547297420805381, "grad_norm": 6.988764278736471, "learning_rate": 1.496483598271068e-06, "loss": 17.5536, "step": 41289 }, { "epoch": 0.7547480212769847, "grad_norm": 5.604698565963717, "learning_rate": 1.4962724141472095e-06, "loss": 17.4713, "step": 41290 }, { "epoch": 0.7547663004734312, "grad_norm": 5.656105593070844, "learning_rate": 1.4960612423036102e-06, "loss": 17.1904, "step": 41291 }, { "epoch": 0.7547845796698777, "grad_norm": 6.097432474426226, "learning_rate": 1.4958500827410144e-06, "loss": 17.4196, "step": 41292 }, { "epoch": 0.7548028588663243, "grad_norm": 5.091179518456964, "learning_rate": 1.495638935460158e-06, "loss": 16.8817, "step": 41293 }, { "epoch": 0.7548211380627707, "grad_norm": 5.501814166448632, "learning_rate": 1.4954278004617835e-06, "loss": 17.1736, "step": 41294 }, { "epoch": 0.7548394172592173, "grad_norm": 7.853370804573435, "learning_rate": 1.4952166777466315e-06, "loss": 17.6895, "step": 41295 }, { "epoch": 0.7548576964556638, "grad_norm": 5.2412162485854505, "learning_rate": 1.4950055673154413e-06, "loss": 17.0317, "step": 41296 }, { "epoch": 0.7548759756521103, "grad_norm": 5.376862205817458, "learning_rate": 1.4947944691689503e-06, "loss": 17.0302, "step": 41297 }, { "epoch": 0.7548942548485569, "grad_norm": 6.389911700237626, "learning_rate": 1.4945833833079026e-06, "loss": 17.5934, "step": 41298 }, { "epoch": 0.7549125340450034, "grad_norm": 5.6789806333899495, "learning_rate": 1.494372309733036e-06, "loss": 17.2637, "step": 41299 }, { "epoch": 0.75493081324145, "grad_norm": 5.283950925248229, "learning_rate": 1.4941612484450886e-06, "loss": 17.0697, "step": 41300 }, { "epoch": 0.7549490924378964, "grad_norm": 6.474332011876386, "learning_rate": 1.493950199444803e-06, "loss": 17.3715, "step": 41301 }, { "epoch": 0.7549673716343429, "grad_norm": 5.29084428442572, "learning_rate": 1.4937391627329168e-06, "loss": 17.0916, "step": 41302 }, { "epoch": 0.7549856508307895, "grad_norm": 7.205206976787634, "learning_rate": 1.49352813831017e-06, "loss": 17.6101, "step": 41303 }, { "epoch": 0.755003930027236, "grad_norm": 5.69394591062862, "learning_rate": 1.4933171261773044e-06, "loss": 17.0884, "step": 41304 }, { "epoch": 0.7550222092236826, "grad_norm": 5.971270716923544, "learning_rate": 1.4931061263350565e-06, "loss": 17.4043, "step": 41305 }, { "epoch": 0.755040488420129, "grad_norm": 5.609485225708782, "learning_rate": 1.4928951387841694e-06, "loss": 17.2949, "step": 41306 }, { "epoch": 0.7550587676165755, "grad_norm": 5.565027114287124, "learning_rate": 1.4926841635253798e-06, "loss": 17.1846, "step": 41307 }, { "epoch": 0.7550770468130221, "grad_norm": 4.809198792022072, "learning_rate": 1.4924732005594266e-06, "loss": 16.7694, "step": 41308 }, { "epoch": 0.7550953260094686, "grad_norm": 6.063370414106436, "learning_rate": 1.4922622498870521e-06, "loss": 17.187, "step": 41309 }, { "epoch": 0.7551136052059152, "grad_norm": 6.313876876230048, "learning_rate": 1.4920513115089924e-06, "loss": 17.4046, "step": 41310 }, { "epoch": 0.7551318844023617, "grad_norm": 4.688007332883085, "learning_rate": 1.49184038542599e-06, "loss": 16.7809, "step": 41311 }, { "epoch": 0.7551501635988082, "grad_norm": 5.496759034227836, "learning_rate": 1.4916294716387807e-06, "loss": 17.1168, "step": 41312 }, { "epoch": 0.7551684427952547, "grad_norm": 7.361893416395045, "learning_rate": 1.4914185701481076e-06, "loss": 17.8327, "step": 41313 }, { "epoch": 0.7551867219917012, "grad_norm": 5.663990496925104, "learning_rate": 1.4912076809547066e-06, "loss": 17.2928, "step": 41314 }, { "epoch": 0.7552050011881478, "grad_norm": 7.350553771925494, "learning_rate": 1.4909968040593192e-06, "loss": 17.6345, "step": 41315 }, { "epoch": 0.7552232803845943, "grad_norm": 8.278231973643237, "learning_rate": 1.4907859394626823e-06, "loss": 18.1819, "step": 41316 }, { "epoch": 0.7552415595810408, "grad_norm": 6.520238553469688, "learning_rate": 1.4905750871655377e-06, "loss": 17.3059, "step": 41317 }, { "epoch": 0.7552598387774874, "grad_norm": 6.189539892114213, "learning_rate": 1.490364247168623e-06, "loss": 17.1935, "step": 41318 }, { "epoch": 0.7552781179739338, "grad_norm": 7.444163366935287, "learning_rate": 1.4901534194726751e-06, "loss": 17.8431, "step": 41319 }, { "epoch": 0.7552963971703804, "grad_norm": 6.21020390485532, "learning_rate": 1.4899426040784353e-06, "loss": 17.4986, "step": 41320 }, { "epoch": 0.7553146763668269, "grad_norm": 7.160227078309686, "learning_rate": 1.4897318009866435e-06, "loss": 17.7977, "step": 41321 }, { "epoch": 0.7553329555632734, "grad_norm": 5.67979018479283, "learning_rate": 1.4895210101980355e-06, "loss": 17.0445, "step": 41322 }, { "epoch": 0.75535123475972, "grad_norm": 7.996122646567123, "learning_rate": 1.4893102317133534e-06, "loss": 17.9123, "step": 41323 }, { "epoch": 0.7553695139561665, "grad_norm": 5.684636738057581, "learning_rate": 1.489099465533334e-06, "loss": 17.3154, "step": 41324 }, { "epoch": 0.7553877931526131, "grad_norm": 4.58075123297004, "learning_rate": 1.4888887116587154e-06, "loss": 16.8132, "step": 41325 }, { "epoch": 0.7554060723490595, "grad_norm": 7.325028690144691, "learning_rate": 1.4886779700902382e-06, "loss": 17.6877, "step": 41326 }, { "epoch": 0.755424351545506, "grad_norm": 6.438169130060794, "learning_rate": 1.4884672408286398e-06, "loss": 17.507, "step": 41327 }, { "epoch": 0.7554426307419526, "grad_norm": 6.724671959003951, "learning_rate": 1.4882565238746582e-06, "loss": 17.8881, "step": 41328 }, { "epoch": 0.7554609099383991, "grad_norm": 5.556644175672447, "learning_rate": 1.488045819229032e-06, "loss": 17.0738, "step": 41329 }, { "epoch": 0.7554791891348457, "grad_norm": 6.656853820658147, "learning_rate": 1.4878351268925022e-06, "loss": 17.6075, "step": 41330 }, { "epoch": 0.7554974683312922, "grad_norm": 5.467262863936745, "learning_rate": 1.4876244468658046e-06, "loss": 17.1984, "step": 41331 }, { "epoch": 0.7555157475277386, "grad_norm": 6.2089304694671155, "learning_rate": 1.4874137791496795e-06, "loss": 17.2776, "step": 41332 }, { "epoch": 0.7555340267241852, "grad_norm": 5.160884746016714, "learning_rate": 1.4872031237448625e-06, "loss": 17.089, "step": 41333 }, { "epoch": 0.7555523059206317, "grad_norm": 4.9962993026009554, "learning_rate": 1.4869924806520959e-06, "loss": 16.9322, "step": 41334 }, { "epoch": 0.7555705851170783, "grad_norm": 6.904746862993395, "learning_rate": 1.4867818498721153e-06, "loss": 17.458, "step": 41335 }, { "epoch": 0.7555888643135248, "grad_norm": 6.019544305896878, "learning_rate": 1.4865712314056585e-06, "loss": 17.1317, "step": 41336 }, { "epoch": 0.7556071435099713, "grad_norm": 5.997434094575366, "learning_rate": 1.4863606252534658e-06, "loss": 17.6176, "step": 41337 }, { "epoch": 0.7556254227064179, "grad_norm": 6.179145513922429, "learning_rate": 1.4861500314162724e-06, "loss": 17.3195, "step": 41338 }, { "epoch": 0.7556437019028643, "grad_norm": 8.547770390349616, "learning_rate": 1.4859394498948188e-06, "loss": 17.733, "step": 41339 }, { "epoch": 0.7556619810993108, "grad_norm": 5.703622086631469, "learning_rate": 1.4857288806898435e-06, "loss": 16.9187, "step": 41340 }, { "epoch": 0.7556802602957574, "grad_norm": 7.383267920322245, "learning_rate": 1.4855183238020837e-06, "loss": 17.8499, "step": 41341 }, { "epoch": 0.7556985394922039, "grad_norm": 6.694410868196965, "learning_rate": 1.4853077792322756e-06, "loss": 17.2977, "step": 41342 }, { "epoch": 0.7557168186886505, "grad_norm": 6.333222308374392, "learning_rate": 1.4850972469811603e-06, "loss": 17.201, "step": 41343 }, { "epoch": 0.755735097885097, "grad_norm": 8.123087985550654, "learning_rate": 1.4848867270494743e-06, "loss": 18.0831, "step": 41344 }, { "epoch": 0.7557533770815434, "grad_norm": 6.469475119607781, "learning_rate": 1.484676219437954e-06, "loss": 17.2524, "step": 41345 }, { "epoch": 0.75577165627799, "grad_norm": 6.0911148448170005, "learning_rate": 1.4844657241473398e-06, "loss": 17.3743, "step": 41346 }, { "epoch": 0.7557899354744365, "grad_norm": 5.640515043669871, "learning_rate": 1.4842552411783662e-06, "loss": 17.3173, "step": 41347 }, { "epoch": 0.7558082146708831, "grad_norm": 6.594285210682082, "learning_rate": 1.4840447705317735e-06, "loss": 17.3747, "step": 41348 }, { "epoch": 0.7558264938673296, "grad_norm": 5.993414478461464, "learning_rate": 1.4838343122082998e-06, "loss": 17.2473, "step": 41349 }, { "epoch": 0.7558447730637761, "grad_norm": 6.004940086585591, "learning_rate": 1.4836238662086806e-06, "loss": 17.3305, "step": 41350 }, { "epoch": 0.7558630522602227, "grad_norm": 4.0112761411727105, "learning_rate": 1.4834134325336562e-06, "loss": 16.5966, "step": 41351 }, { "epoch": 0.7558813314566691, "grad_norm": 5.58431236869398, "learning_rate": 1.4832030111839623e-06, "loss": 16.9926, "step": 41352 }, { "epoch": 0.7558996106531157, "grad_norm": 7.608745866078084, "learning_rate": 1.4829926021603352e-06, "loss": 17.5165, "step": 41353 }, { "epoch": 0.7559178898495622, "grad_norm": 5.15723701602103, "learning_rate": 1.4827822054635154e-06, "loss": 17.1185, "step": 41354 }, { "epoch": 0.7559361690460087, "grad_norm": 5.375687250990679, "learning_rate": 1.4825718210942391e-06, "loss": 17.263, "step": 41355 }, { "epoch": 0.7559544482424553, "grad_norm": 8.866165201360593, "learning_rate": 1.4823614490532411e-06, "loss": 18.6595, "step": 41356 }, { "epoch": 0.7559727274389018, "grad_norm": 5.7784754410798405, "learning_rate": 1.4821510893412616e-06, "loss": 17.0841, "step": 41357 }, { "epoch": 0.7559910066353484, "grad_norm": 6.285095881043664, "learning_rate": 1.481940741959038e-06, "loss": 17.4657, "step": 41358 }, { "epoch": 0.7560092858317948, "grad_norm": 6.917750471278475, "learning_rate": 1.4817304069073062e-06, "loss": 17.4462, "step": 41359 }, { "epoch": 0.7560275650282413, "grad_norm": 5.65814607355426, "learning_rate": 1.481520084186805e-06, "loss": 17.375, "step": 41360 }, { "epoch": 0.7560458442246879, "grad_norm": 5.404535944187633, "learning_rate": 1.4813097737982706e-06, "loss": 17.1519, "step": 41361 }, { "epoch": 0.7560641234211344, "grad_norm": 6.812894124486069, "learning_rate": 1.481099475742439e-06, "loss": 17.2972, "step": 41362 }, { "epoch": 0.756082402617581, "grad_norm": 5.787596939002848, "learning_rate": 1.4808891900200495e-06, "loss": 17.2791, "step": 41363 }, { "epoch": 0.7561006818140275, "grad_norm": 6.001497104876239, "learning_rate": 1.4806789166318368e-06, "loss": 17.5966, "step": 41364 }, { "epoch": 0.7561189610104739, "grad_norm": 7.020130655411914, "learning_rate": 1.4804686555785402e-06, "loss": 17.4856, "step": 41365 }, { "epoch": 0.7561372402069205, "grad_norm": 5.629269261586598, "learning_rate": 1.4802584068608944e-06, "loss": 17.0739, "step": 41366 }, { "epoch": 0.756155519403367, "grad_norm": 5.168279685576857, "learning_rate": 1.4800481704796378e-06, "loss": 16.9264, "step": 41367 }, { "epoch": 0.7561737985998136, "grad_norm": 6.978614518607441, "learning_rate": 1.4798379464355077e-06, "loss": 17.6386, "step": 41368 }, { "epoch": 0.7561920777962601, "grad_norm": 5.461076802808485, "learning_rate": 1.4796277347292403e-06, "loss": 16.847, "step": 41369 }, { "epoch": 0.7562103569927066, "grad_norm": 8.303762349608677, "learning_rate": 1.4794175353615708e-06, "loss": 17.6794, "step": 41370 }, { "epoch": 0.7562286361891531, "grad_norm": 7.135077090100459, "learning_rate": 1.4792073483332386e-06, "loss": 17.6536, "step": 41371 }, { "epoch": 0.7562469153855996, "grad_norm": 5.32902677894833, "learning_rate": 1.4789971736449798e-06, "loss": 16.98, "step": 41372 }, { "epoch": 0.7562651945820462, "grad_norm": 6.465293733969912, "learning_rate": 1.4787870112975283e-06, "loss": 17.1441, "step": 41373 }, { "epoch": 0.7562834737784927, "grad_norm": 4.915443946986477, "learning_rate": 1.4785768612916245e-06, "loss": 16.8408, "step": 41374 }, { "epoch": 0.7563017529749392, "grad_norm": 5.954452862308425, "learning_rate": 1.4783667236280014e-06, "loss": 17.1822, "step": 41375 }, { "epoch": 0.7563200321713858, "grad_norm": 7.601754036211396, "learning_rate": 1.4781565983073976e-06, "loss": 17.5202, "step": 41376 }, { "epoch": 0.7563383113678323, "grad_norm": 5.805651780070469, "learning_rate": 1.4779464853305508e-06, "loss": 17.1609, "step": 41377 }, { "epoch": 0.7563565905642788, "grad_norm": 8.053714921062639, "learning_rate": 1.4777363846981945e-06, "loss": 18.4522, "step": 41378 }, { "epoch": 0.7563748697607253, "grad_norm": 6.810042127952357, "learning_rate": 1.4775262964110682e-06, "loss": 17.3759, "step": 41379 }, { "epoch": 0.7563931489571718, "grad_norm": 6.041446255175436, "learning_rate": 1.477316220469906e-06, "loss": 17.3277, "step": 41380 }, { "epoch": 0.7564114281536184, "grad_norm": 6.847673975908039, "learning_rate": 1.4771061568754436e-06, "loss": 17.4775, "step": 41381 }, { "epoch": 0.7564297073500649, "grad_norm": 6.219290324651375, "learning_rate": 1.4768961056284198e-06, "loss": 17.2078, "step": 41382 }, { "epoch": 0.7564479865465115, "grad_norm": 5.854801914394689, "learning_rate": 1.476686066729568e-06, "loss": 17.3372, "step": 41383 }, { "epoch": 0.7564662657429579, "grad_norm": 6.737194073804584, "learning_rate": 1.4764760401796274e-06, "loss": 17.3018, "step": 41384 }, { "epoch": 0.7564845449394044, "grad_norm": 6.252004441880369, "learning_rate": 1.476266025979331e-06, "loss": 17.6096, "step": 41385 }, { "epoch": 0.756502824135851, "grad_norm": 6.513272348232455, "learning_rate": 1.4760560241294176e-06, "loss": 17.3367, "step": 41386 }, { "epoch": 0.7565211033322975, "grad_norm": 5.572463425643747, "learning_rate": 1.4758460346306207e-06, "loss": 17.2561, "step": 41387 }, { "epoch": 0.7565393825287441, "grad_norm": 6.745382279741323, "learning_rate": 1.4756360574836793e-06, "loss": 17.7591, "step": 41388 }, { "epoch": 0.7565576617251906, "grad_norm": 7.97163882992123, "learning_rate": 1.4754260926893278e-06, "loss": 17.9625, "step": 41389 }, { "epoch": 0.756575940921637, "grad_norm": 5.188129142642803, "learning_rate": 1.4752161402483e-06, "loss": 17.007, "step": 41390 }, { "epoch": 0.7565942201180836, "grad_norm": 6.4431009066360465, "learning_rate": 1.4750062001613357e-06, "loss": 17.6051, "step": 41391 }, { "epoch": 0.7566124993145301, "grad_norm": 6.550722958329075, "learning_rate": 1.4747962724291671e-06, "loss": 17.2854, "step": 41392 }, { "epoch": 0.7566307785109767, "grad_norm": 7.180240779426213, "learning_rate": 1.4745863570525321e-06, "loss": 17.2401, "step": 41393 }, { "epoch": 0.7566490577074232, "grad_norm": 5.667065775523166, "learning_rate": 1.474376454032167e-06, "loss": 17.1333, "step": 41394 }, { "epoch": 0.7566673369038697, "grad_norm": 7.257751017225938, "learning_rate": 1.474166563368805e-06, "loss": 17.4457, "step": 41395 }, { "epoch": 0.7566856161003163, "grad_norm": 5.8956122597276055, "learning_rate": 1.4739566850631847e-06, "loss": 17.192, "step": 41396 }, { "epoch": 0.7567038952967627, "grad_norm": 5.460688410246907, "learning_rate": 1.4737468191160404e-06, "loss": 17.166, "step": 41397 }, { "epoch": 0.7567221744932093, "grad_norm": 6.54882715157627, "learning_rate": 1.4735369655281063e-06, "loss": 17.1842, "step": 41398 }, { "epoch": 0.7567404536896558, "grad_norm": 5.2750805704964465, "learning_rate": 1.4733271243001202e-06, "loss": 16.8906, "step": 41399 }, { "epoch": 0.7567587328861023, "grad_norm": 6.405108932157981, "learning_rate": 1.4731172954328166e-06, "loss": 17.3122, "step": 41400 }, { "epoch": 0.7567770120825489, "grad_norm": 5.895846361618557, "learning_rate": 1.4729074789269293e-06, "loss": 17.1165, "step": 41401 }, { "epoch": 0.7567952912789954, "grad_norm": 6.071952838937007, "learning_rate": 1.472697674783195e-06, "loss": 17.3169, "step": 41402 }, { "epoch": 0.756813570475442, "grad_norm": 34.271881215665445, "learning_rate": 1.4724878830023508e-06, "loss": 17.2543, "step": 41403 }, { "epoch": 0.7568318496718884, "grad_norm": 6.219854805777094, "learning_rate": 1.4722781035851297e-06, "loss": 17.2066, "step": 41404 }, { "epoch": 0.7568501288683349, "grad_norm": 8.018782578702218, "learning_rate": 1.4720683365322685e-06, "loss": 18.014, "step": 41405 }, { "epoch": 0.7568684080647815, "grad_norm": 7.822831291443613, "learning_rate": 1.4718585818445018e-06, "loss": 17.951, "step": 41406 }, { "epoch": 0.756886687261228, "grad_norm": 5.638134144098482, "learning_rate": 1.471648839522563e-06, "loss": 17.2333, "step": 41407 }, { "epoch": 0.7569049664576745, "grad_norm": 6.293792143024513, "learning_rate": 1.471439109567191e-06, "loss": 17.4278, "step": 41408 }, { "epoch": 0.7569232456541211, "grad_norm": 5.667631500099389, "learning_rate": 1.4712293919791165e-06, "loss": 17.0784, "step": 41409 }, { "epoch": 0.7569415248505675, "grad_norm": 8.997014426927421, "learning_rate": 1.4710196867590788e-06, "loss": 17.8607, "step": 41410 }, { "epoch": 0.7569598040470141, "grad_norm": 5.213887196388373, "learning_rate": 1.4708099939078085e-06, "loss": 17.0744, "step": 41411 }, { "epoch": 0.7569780832434606, "grad_norm": 7.5486309903135504, "learning_rate": 1.4706003134260437e-06, "loss": 17.4191, "step": 41412 }, { "epoch": 0.7569963624399071, "grad_norm": 7.2117929644305985, "learning_rate": 1.47039064531452e-06, "loss": 17.6392, "step": 41413 }, { "epoch": 0.7570146416363537, "grad_norm": 13.698121631119953, "learning_rate": 1.4701809895739706e-06, "loss": 17.7673, "step": 41414 }, { "epoch": 0.7570329208328002, "grad_norm": 6.500043017508027, "learning_rate": 1.469971346205129e-06, "loss": 17.6109, "step": 41415 }, { "epoch": 0.7570512000292468, "grad_norm": 5.440297434596824, "learning_rate": 1.4697617152087329e-06, "loss": 16.979, "step": 41416 }, { "epoch": 0.7570694792256932, "grad_norm": 7.5676419002720055, "learning_rate": 1.469552096585516e-06, "loss": 17.6509, "step": 41417 }, { "epoch": 0.7570877584221397, "grad_norm": 7.199743016653669, "learning_rate": 1.4693424903362107e-06, "loss": 17.4445, "step": 41418 }, { "epoch": 0.7571060376185863, "grad_norm": 7.97448787762369, "learning_rate": 1.4691328964615554e-06, "loss": 17.4304, "step": 41419 }, { "epoch": 0.7571243168150328, "grad_norm": 5.951241763807548, "learning_rate": 1.4689233149622816e-06, "loss": 17.2257, "step": 41420 }, { "epoch": 0.7571425960114794, "grad_norm": 8.010912580986853, "learning_rate": 1.4687137458391248e-06, "loss": 17.4891, "step": 41421 }, { "epoch": 0.7571608752079259, "grad_norm": 6.70304684937982, "learning_rate": 1.4685041890928215e-06, "loss": 17.0943, "step": 41422 }, { "epoch": 0.7571791544043723, "grad_norm": 6.132497569585355, "learning_rate": 1.468294644724103e-06, "loss": 17.3814, "step": 41423 }, { "epoch": 0.7571974336008189, "grad_norm": 7.342388510707205, "learning_rate": 1.468085112733707e-06, "loss": 17.6699, "step": 41424 }, { "epoch": 0.7572157127972654, "grad_norm": 7.260790649879444, "learning_rate": 1.467875593122366e-06, "loss": 17.6096, "step": 41425 }, { "epoch": 0.757233991993712, "grad_norm": 6.731987101159915, "learning_rate": 1.4676660858908132e-06, "loss": 17.3541, "step": 41426 }, { "epoch": 0.7572522711901585, "grad_norm": 6.208956811657179, "learning_rate": 1.4674565910397859e-06, "loss": 17.3425, "step": 41427 }, { "epoch": 0.757270550386605, "grad_norm": 6.568538067203455, "learning_rate": 1.4672471085700162e-06, "loss": 17.3274, "step": 41428 }, { "epoch": 0.7572888295830515, "grad_norm": 8.951496027306394, "learning_rate": 1.4670376384822377e-06, "loss": 17.4431, "step": 41429 }, { "epoch": 0.757307108779498, "grad_norm": 6.946003873628678, "learning_rate": 1.4668281807771856e-06, "loss": 18.0082, "step": 41430 }, { "epoch": 0.7573253879759446, "grad_norm": 6.651449339341528, "learning_rate": 1.4666187354555955e-06, "loss": 17.4152, "step": 41431 }, { "epoch": 0.7573436671723911, "grad_norm": 6.9542416493681625, "learning_rate": 1.4664093025181986e-06, "loss": 17.4855, "step": 41432 }, { "epoch": 0.7573619463688376, "grad_norm": 6.016512792639653, "learning_rate": 1.4661998819657325e-06, "loss": 17.4575, "step": 41433 }, { "epoch": 0.7573802255652842, "grad_norm": 5.336438840951441, "learning_rate": 1.4659904737989283e-06, "loss": 17.0695, "step": 41434 }, { "epoch": 0.7573985047617307, "grad_norm": 6.515586419800753, "learning_rate": 1.46578107801852e-06, "loss": 17.5282, "step": 41435 }, { "epoch": 0.7574167839581772, "grad_norm": 7.420055137578114, "learning_rate": 1.4655716946252435e-06, "loss": 18.3392, "step": 41436 }, { "epoch": 0.7574350631546237, "grad_norm": 7.629287892715303, "learning_rate": 1.4653623236198305e-06, "loss": 18.1998, "step": 41437 }, { "epoch": 0.7574533423510702, "grad_norm": 6.69787703029075, "learning_rate": 1.4651529650030166e-06, "loss": 17.6749, "step": 41438 }, { "epoch": 0.7574716215475168, "grad_norm": 6.409111233217936, "learning_rate": 1.464943618775534e-06, "loss": 17.1328, "step": 41439 }, { "epoch": 0.7574899007439633, "grad_norm": 5.356530253673204, "learning_rate": 1.464734284938117e-06, "loss": 16.9725, "step": 41440 }, { "epoch": 0.7575081799404099, "grad_norm": 7.233060749589454, "learning_rate": 1.464524963491501e-06, "loss": 18.0537, "step": 41441 }, { "epoch": 0.7575264591368563, "grad_norm": 6.315277544797144, "learning_rate": 1.4643156544364178e-06, "loss": 17.262, "step": 41442 }, { "epoch": 0.7575447383333028, "grad_norm": 6.115796901953774, "learning_rate": 1.4641063577736002e-06, "loss": 17.2034, "step": 41443 }, { "epoch": 0.7575630175297494, "grad_norm": 6.039364272983912, "learning_rate": 1.4638970735037849e-06, "loss": 17.211, "step": 41444 }, { "epoch": 0.7575812967261959, "grad_norm": 6.620428102480228, "learning_rate": 1.4636878016277035e-06, "loss": 17.3417, "step": 41445 }, { "epoch": 0.7575995759226425, "grad_norm": 5.793435570597437, "learning_rate": 1.463478542146088e-06, "loss": 17.1235, "step": 41446 }, { "epoch": 0.757617855119089, "grad_norm": 5.568284493493787, "learning_rate": 1.4632692950596728e-06, "loss": 17.0716, "step": 41447 }, { "epoch": 0.7576361343155354, "grad_norm": 4.7080527377942865, "learning_rate": 1.4630600603691935e-06, "loss": 16.7689, "step": 41448 }, { "epoch": 0.757654413511982, "grad_norm": 6.022815751883835, "learning_rate": 1.4628508380753804e-06, "loss": 17.272, "step": 41449 }, { "epoch": 0.7576726927084285, "grad_norm": 6.263748436573374, "learning_rate": 1.4626416281789696e-06, "loss": 17.5847, "step": 41450 }, { "epoch": 0.7576909719048751, "grad_norm": 7.141319150610245, "learning_rate": 1.4624324306806925e-06, "loss": 17.8745, "step": 41451 }, { "epoch": 0.7577092511013216, "grad_norm": 11.368611418189943, "learning_rate": 1.4622232455812818e-06, "loss": 18.5366, "step": 41452 }, { "epoch": 0.7577275302977681, "grad_norm": 6.04042905299522, "learning_rate": 1.462014072881473e-06, "loss": 17.5222, "step": 41453 }, { "epoch": 0.7577458094942147, "grad_norm": 5.8353572845634565, "learning_rate": 1.4618049125819967e-06, "loss": 17.3555, "step": 41454 }, { "epoch": 0.7577640886906611, "grad_norm": 6.234482354790235, "learning_rate": 1.4615957646835882e-06, "loss": 17.5304, "step": 41455 }, { "epoch": 0.7577823678871077, "grad_norm": 5.7748098994661525, "learning_rate": 1.4613866291869783e-06, "loss": 17.0555, "step": 41456 }, { "epoch": 0.7578006470835542, "grad_norm": 6.043280228034436, "learning_rate": 1.4611775060929007e-06, "loss": 17.5891, "step": 41457 }, { "epoch": 0.7578189262800007, "grad_norm": 6.228972985355228, "learning_rate": 1.4609683954020909e-06, "loss": 17.5237, "step": 41458 }, { "epoch": 0.7578372054764473, "grad_norm": 7.2827309514301515, "learning_rate": 1.4607592971152796e-06, "loss": 17.6576, "step": 41459 }, { "epoch": 0.7578554846728938, "grad_norm": 8.233475081160206, "learning_rate": 1.460550211233198e-06, "loss": 17.6212, "step": 41460 }, { "epoch": 0.7578737638693404, "grad_norm": 4.760050755140258, "learning_rate": 1.460341137756583e-06, "loss": 16.695, "step": 41461 }, { "epoch": 0.7578920430657868, "grad_norm": 6.226073066281348, "learning_rate": 1.4601320766861643e-06, "loss": 17.4426, "step": 41462 }, { "epoch": 0.7579103222622333, "grad_norm": 5.26498234987133, "learning_rate": 1.4599230280226745e-06, "loss": 17.0383, "step": 41463 }, { "epoch": 0.7579286014586799, "grad_norm": 5.792015505762992, "learning_rate": 1.459713991766849e-06, "loss": 17.0353, "step": 41464 }, { "epoch": 0.7579468806551264, "grad_norm": 5.179949168936286, "learning_rate": 1.4595049679194167e-06, "loss": 16.934, "step": 41465 }, { "epoch": 0.757965159851573, "grad_norm": 6.111765241486933, "learning_rate": 1.4592959564811127e-06, "loss": 17.4871, "step": 41466 }, { "epoch": 0.7579834390480195, "grad_norm": 7.492056108244736, "learning_rate": 1.45908695745267e-06, "loss": 17.8024, "step": 41467 }, { "epoch": 0.7580017182444659, "grad_norm": 5.656677247927817, "learning_rate": 1.4588779708348211e-06, "loss": 17.0953, "step": 41468 }, { "epoch": 0.7580199974409125, "grad_norm": 7.212516422589108, "learning_rate": 1.4586689966282957e-06, "loss": 17.6423, "step": 41469 }, { "epoch": 0.758038276637359, "grad_norm": 7.292524555152191, "learning_rate": 1.4584600348338296e-06, "loss": 17.631, "step": 41470 }, { "epoch": 0.7580565558338056, "grad_norm": 6.1842973844102564, "learning_rate": 1.4582510854521525e-06, "loss": 17.3626, "step": 41471 }, { "epoch": 0.7580748350302521, "grad_norm": 6.160508175952901, "learning_rate": 1.4580421484839991e-06, "loss": 17.29, "step": 41472 }, { "epoch": 0.7580931142266986, "grad_norm": 6.556095363218124, "learning_rate": 1.4578332239301008e-06, "loss": 17.5694, "step": 41473 }, { "epoch": 0.7581113934231452, "grad_norm": 6.683139446163655, "learning_rate": 1.457624311791188e-06, "loss": 17.4694, "step": 41474 }, { "epoch": 0.7581296726195916, "grad_norm": 6.575075764715941, "learning_rate": 1.4574154120679952e-06, "loss": 17.6146, "step": 41475 }, { "epoch": 0.7581479518160381, "grad_norm": 6.953160427215115, "learning_rate": 1.457206524761255e-06, "loss": 17.7363, "step": 41476 }, { "epoch": 0.7581662310124847, "grad_norm": 5.70949480090262, "learning_rate": 1.456997649871697e-06, "loss": 17.2806, "step": 41477 }, { "epoch": 0.7581845102089312, "grad_norm": 7.076085117355266, "learning_rate": 1.4567887874000563e-06, "loss": 17.3733, "step": 41478 }, { "epoch": 0.7582027894053778, "grad_norm": 6.345933860726366, "learning_rate": 1.456579937347064e-06, "loss": 17.2626, "step": 41479 }, { "epoch": 0.7582210686018243, "grad_norm": 8.105051307942363, "learning_rate": 1.4563710997134495e-06, "loss": 17.564, "step": 41480 }, { "epoch": 0.7582393477982707, "grad_norm": 5.89010905100692, "learning_rate": 1.4561622744999481e-06, "loss": 17.2471, "step": 41481 }, { "epoch": 0.7582576269947173, "grad_norm": 7.426482572749883, "learning_rate": 1.4559534617072907e-06, "loss": 17.6752, "step": 41482 }, { "epoch": 0.7582759061911638, "grad_norm": 5.836445412192962, "learning_rate": 1.4557446613362075e-06, "loss": 17.3722, "step": 41483 }, { "epoch": 0.7582941853876104, "grad_norm": 6.458689744693884, "learning_rate": 1.4555358733874315e-06, "loss": 17.4561, "step": 41484 }, { "epoch": 0.7583124645840569, "grad_norm": 5.855155231051575, "learning_rate": 1.4553270978616947e-06, "loss": 17.1956, "step": 41485 }, { "epoch": 0.7583307437805034, "grad_norm": 6.600557191185698, "learning_rate": 1.4551183347597304e-06, "loss": 17.5412, "step": 41486 }, { "epoch": 0.75834902297695, "grad_norm": 7.577474522702075, "learning_rate": 1.4549095840822686e-06, "loss": 17.9852, "step": 41487 }, { "epoch": 0.7583673021733964, "grad_norm": 5.4323495408073965, "learning_rate": 1.4547008458300394e-06, "loss": 16.9284, "step": 41488 }, { "epoch": 0.758385581369843, "grad_norm": 6.823541221459082, "learning_rate": 1.4544921200037782e-06, "loss": 17.5181, "step": 41489 }, { "epoch": 0.7584038605662895, "grad_norm": 5.9207571952128895, "learning_rate": 1.454283406604214e-06, "loss": 17.5248, "step": 41490 }, { "epoch": 0.758422139762736, "grad_norm": 6.726808195938459, "learning_rate": 1.4540747056320776e-06, "loss": 17.2484, "step": 41491 }, { "epoch": 0.7584404189591826, "grad_norm": 5.306005538065271, "learning_rate": 1.4538660170881025e-06, "loss": 17.1143, "step": 41492 }, { "epoch": 0.758458698155629, "grad_norm": 6.0043560469785895, "learning_rate": 1.4536573409730187e-06, "loss": 17.1103, "step": 41493 }, { "epoch": 0.7584769773520756, "grad_norm": 6.846526728464872, "learning_rate": 1.4534486772875573e-06, "loss": 17.691, "step": 41494 }, { "epoch": 0.7584952565485221, "grad_norm": 7.459725873542345, "learning_rate": 1.4532400260324524e-06, "loss": 17.9748, "step": 41495 }, { "epoch": 0.7585135357449686, "grad_norm": 5.883305552218634, "learning_rate": 1.453031387208434e-06, "loss": 17.0863, "step": 41496 }, { "epoch": 0.7585318149414152, "grad_norm": 6.988967280702198, "learning_rate": 1.4528227608162305e-06, "loss": 17.5361, "step": 41497 }, { "epoch": 0.7585500941378617, "grad_norm": 6.19615293083087, "learning_rate": 1.4526141468565768e-06, "loss": 17.3941, "step": 41498 }, { "epoch": 0.7585683733343083, "grad_norm": 5.095422835569408, "learning_rate": 1.4524055453302033e-06, "loss": 16.9319, "step": 41499 }, { "epoch": 0.7585866525307547, "grad_norm": 7.405875086630483, "learning_rate": 1.452196956237839e-06, "loss": 17.5865, "step": 41500 }, { "epoch": 0.7586049317272012, "grad_norm": 6.250290932209923, "learning_rate": 1.4519883795802175e-06, "loss": 17.2253, "step": 41501 }, { "epoch": 0.7586232109236478, "grad_norm": 8.219547792135126, "learning_rate": 1.4517798153580675e-06, "loss": 17.8136, "step": 41502 }, { "epoch": 0.7586414901200943, "grad_norm": 6.321342450440754, "learning_rate": 1.4515712635721219e-06, "loss": 17.2474, "step": 41503 }, { "epoch": 0.7586597693165409, "grad_norm": 5.863703167580107, "learning_rate": 1.4513627242231122e-06, "loss": 17.3447, "step": 41504 }, { "epoch": 0.7586780485129874, "grad_norm": 8.215085673918104, "learning_rate": 1.4511541973117665e-06, "loss": 17.8311, "step": 41505 }, { "epoch": 0.7586963277094338, "grad_norm": 5.85626070664109, "learning_rate": 1.4509456828388191e-06, "loss": 17.2367, "step": 41506 }, { "epoch": 0.7587146069058804, "grad_norm": 7.273009550813256, "learning_rate": 1.450737180804999e-06, "loss": 17.8969, "step": 41507 }, { "epoch": 0.7587328861023269, "grad_norm": 4.775325076102912, "learning_rate": 1.450528691211036e-06, "loss": 16.898, "step": 41508 }, { "epoch": 0.7587511652987735, "grad_norm": 7.208533681359926, "learning_rate": 1.4503202140576628e-06, "loss": 17.7218, "step": 41509 }, { "epoch": 0.75876944449522, "grad_norm": 6.689851707041225, "learning_rate": 1.4501117493456086e-06, "loss": 17.6465, "step": 41510 }, { "epoch": 0.7587877236916665, "grad_norm": 6.2821286312196705, "learning_rate": 1.4499032970756056e-06, "loss": 17.427, "step": 41511 }, { "epoch": 0.7588060028881131, "grad_norm": 7.934192446938335, "learning_rate": 1.4496948572483816e-06, "loss": 18.0874, "step": 41512 }, { "epoch": 0.7588242820845595, "grad_norm": 5.309268603645609, "learning_rate": 1.449486429864671e-06, "loss": 17.1963, "step": 41513 }, { "epoch": 0.7588425612810061, "grad_norm": 6.497970437557693, "learning_rate": 1.4492780149252007e-06, "loss": 17.7026, "step": 41514 }, { "epoch": 0.7588608404774526, "grad_norm": 4.866484240545676, "learning_rate": 1.4490696124307047e-06, "loss": 16.9316, "step": 41515 }, { "epoch": 0.7588791196738991, "grad_norm": 5.699786921525889, "learning_rate": 1.44886122238191e-06, "loss": 16.9977, "step": 41516 }, { "epoch": 0.7588973988703457, "grad_norm": 7.218124269841247, "learning_rate": 1.4486528447795494e-06, "loss": 17.2824, "step": 41517 }, { "epoch": 0.7589156780667922, "grad_norm": 7.845294086216352, "learning_rate": 1.448444479624353e-06, "loss": 18.0333, "step": 41518 }, { "epoch": 0.7589339572632388, "grad_norm": 6.745444690786465, "learning_rate": 1.4482361269170493e-06, "loss": 17.3169, "step": 41519 }, { "epoch": 0.7589522364596852, "grad_norm": 6.330200335305825, "learning_rate": 1.4480277866583692e-06, "loss": 17.3755, "step": 41520 }, { "epoch": 0.7589705156561317, "grad_norm": 5.549518296477507, "learning_rate": 1.4478194588490453e-06, "loss": 17.0362, "step": 41521 }, { "epoch": 0.7589887948525783, "grad_norm": 6.0601497208421335, "learning_rate": 1.4476111434898038e-06, "loss": 17.3872, "step": 41522 }, { "epoch": 0.7590070740490248, "grad_norm": 5.7955079089191806, "learning_rate": 1.4474028405813789e-06, "loss": 17.2858, "step": 41523 }, { "epoch": 0.7590253532454714, "grad_norm": 6.239375381871798, "learning_rate": 1.4471945501244988e-06, "loss": 17.0163, "step": 41524 }, { "epoch": 0.7590436324419179, "grad_norm": 5.865652754972199, "learning_rate": 1.4469862721198925e-06, "loss": 17.1728, "step": 41525 }, { "epoch": 0.7590619116383643, "grad_norm": 5.82280970680578, "learning_rate": 1.4467780065682919e-06, "loss": 17.1472, "step": 41526 }, { "epoch": 0.7590801908348109, "grad_norm": 7.382610836193879, "learning_rate": 1.446569753470426e-06, "loss": 17.527, "step": 41527 }, { "epoch": 0.7590984700312574, "grad_norm": 6.23637873557312, "learning_rate": 1.4463615128270237e-06, "loss": 17.2207, "step": 41528 }, { "epoch": 0.759116749227704, "grad_norm": 6.1727670907810674, "learning_rate": 1.4461532846388155e-06, "loss": 17.3367, "step": 41529 }, { "epoch": 0.7591350284241505, "grad_norm": 6.453436516849378, "learning_rate": 1.4459450689065336e-06, "loss": 17.255, "step": 41530 }, { "epoch": 0.759153307620597, "grad_norm": 5.242224301752464, "learning_rate": 1.445736865630904e-06, "loss": 17.0573, "step": 41531 }, { "epoch": 0.7591715868170436, "grad_norm": 7.580138673598303, "learning_rate": 1.4455286748126595e-06, "loss": 17.5925, "step": 41532 }, { "epoch": 0.75918986601349, "grad_norm": 6.383430996688651, "learning_rate": 1.4453204964525275e-06, "loss": 17.1056, "step": 41533 }, { "epoch": 0.7592081452099366, "grad_norm": 5.868161050364622, "learning_rate": 1.44511233055124e-06, "loss": 17.3418, "step": 41534 }, { "epoch": 0.7592264244063831, "grad_norm": 5.9325131432333595, "learning_rate": 1.4449041771095257e-06, "loss": 16.8852, "step": 41535 }, { "epoch": 0.7592447036028296, "grad_norm": 5.751834436826201, "learning_rate": 1.4446960361281115e-06, "loss": 17.0273, "step": 41536 }, { "epoch": 0.7592629827992762, "grad_norm": 5.604921368678164, "learning_rate": 1.444487907607731e-06, "loss": 17.2864, "step": 41537 }, { "epoch": 0.7592812619957227, "grad_norm": 7.16679148200385, "learning_rate": 1.4442797915491109e-06, "loss": 17.7321, "step": 41538 }, { "epoch": 0.7592995411921692, "grad_norm": 7.556222769557147, "learning_rate": 1.4440716879529809e-06, "loss": 17.8866, "step": 41539 }, { "epoch": 0.7593178203886157, "grad_norm": 7.201912781348384, "learning_rate": 1.443863596820073e-06, "loss": 17.8843, "step": 41540 }, { "epoch": 0.7593360995850622, "grad_norm": 7.244602836018447, "learning_rate": 1.4436555181511141e-06, "loss": 17.3307, "step": 41541 }, { "epoch": 0.7593543787815088, "grad_norm": 6.279048675138532, "learning_rate": 1.4434474519468334e-06, "loss": 17.3417, "step": 41542 }, { "epoch": 0.7593726579779553, "grad_norm": 5.489659971200329, "learning_rate": 1.4432393982079612e-06, "loss": 17.1414, "step": 41543 }, { "epoch": 0.7593909371744018, "grad_norm": 8.091542939171548, "learning_rate": 1.4430313569352267e-06, "loss": 17.6221, "step": 41544 }, { "epoch": 0.7594092163708484, "grad_norm": 5.557243058599273, "learning_rate": 1.4428233281293575e-06, "loss": 17.0341, "step": 41545 }, { "epoch": 0.7594274955672948, "grad_norm": 6.710197508952361, "learning_rate": 1.442615311791085e-06, "loss": 17.486, "step": 41546 }, { "epoch": 0.7594457747637414, "grad_norm": 6.382156748833406, "learning_rate": 1.4424073079211353e-06, "loss": 17.6269, "step": 41547 }, { "epoch": 0.7594640539601879, "grad_norm": 5.419610864421362, "learning_rate": 1.4421993165202398e-06, "loss": 16.9856, "step": 41548 }, { "epoch": 0.7594823331566344, "grad_norm": 6.0525621750696095, "learning_rate": 1.4419913375891287e-06, "loss": 17.5445, "step": 41549 }, { "epoch": 0.759500612353081, "grad_norm": 5.329612908652047, "learning_rate": 1.4417833711285273e-06, "loss": 16.8281, "step": 41550 }, { "epoch": 0.7595188915495275, "grad_norm": 5.873201730657478, "learning_rate": 1.4415754171391677e-06, "loss": 17.2608, "step": 41551 }, { "epoch": 0.759537170745974, "grad_norm": 6.248305411540187, "learning_rate": 1.4413674756217776e-06, "loss": 17.3562, "step": 41552 }, { "epoch": 0.7595554499424205, "grad_norm": 4.692645934891195, "learning_rate": 1.4411595465770845e-06, "loss": 16.7088, "step": 41553 }, { "epoch": 0.759573729138867, "grad_norm": 7.00925367474054, "learning_rate": 1.4409516300058197e-06, "loss": 17.6607, "step": 41554 }, { "epoch": 0.7595920083353136, "grad_norm": 7.906046661849474, "learning_rate": 1.4407437259087104e-06, "loss": 18.067, "step": 41555 }, { "epoch": 0.7596102875317601, "grad_norm": 6.430655562334663, "learning_rate": 1.4405358342864845e-06, "loss": 17.4412, "step": 41556 }, { "epoch": 0.7596285667282067, "grad_norm": 6.275534857931217, "learning_rate": 1.4403279551398708e-06, "loss": 17.2541, "step": 41557 }, { "epoch": 0.7596468459246531, "grad_norm": 7.288691673665928, "learning_rate": 1.440120088469601e-06, "loss": 18.0455, "step": 41558 }, { "epoch": 0.7596651251210996, "grad_norm": 4.959486259556768, "learning_rate": 1.4399122342763994e-06, "loss": 17.012, "step": 41559 }, { "epoch": 0.7596834043175462, "grad_norm": 6.492369960584045, "learning_rate": 1.4397043925609982e-06, "loss": 17.0026, "step": 41560 }, { "epoch": 0.7597016835139927, "grad_norm": 5.78014455602383, "learning_rate": 1.4394965633241225e-06, "loss": 17.2615, "step": 41561 }, { "epoch": 0.7597199627104393, "grad_norm": 6.774287720575665, "learning_rate": 1.4392887465665039e-06, "loss": 17.7957, "step": 41562 }, { "epoch": 0.7597382419068858, "grad_norm": 6.327709908428966, "learning_rate": 1.4390809422888696e-06, "loss": 17.612, "step": 41563 }, { "epoch": 0.7597565211033322, "grad_norm": 7.280434879438948, "learning_rate": 1.4388731504919456e-06, "loss": 17.6188, "step": 41564 }, { "epoch": 0.7597748002997788, "grad_norm": 6.646231697109661, "learning_rate": 1.4386653711764642e-06, "loss": 17.6339, "step": 41565 }, { "epoch": 0.7597930794962253, "grad_norm": 7.6582211340476976, "learning_rate": 1.4384576043431498e-06, "loss": 18.1283, "step": 41566 }, { "epoch": 0.7598113586926719, "grad_norm": 5.211356579040663, "learning_rate": 1.4382498499927327e-06, "loss": 17.1874, "step": 41567 }, { "epoch": 0.7598296378891184, "grad_norm": 8.611833141716128, "learning_rate": 1.438042108125942e-06, "loss": 17.5728, "step": 41568 }, { "epoch": 0.7598479170855649, "grad_norm": 6.220119433282819, "learning_rate": 1.4378343787435046e-06, "loss": 17.5691, "step": 41569 }, { "epoch": 0.7598661962820115, "grad_norm": 5.596502974224589, "learning_rate": 1.4376266618461476e-06, "loss": 17.322, "step": 41570 }, { "epoch": 0.7598844754784579, "grad_norm": 5.052948121505143, "learning_rate": 1.437418957434601e-06, "loss": 16.96, "step": 41571 }, { "epoch": 0.7599027546749045, "grad_norm": 7.154433177822202, "learning_rate": 1.437211265509592e-06, "loss": 17.559, "step": 41572 }, { "epoch": 0.759921033871351, "grad_norm": 7.189668679377111, "learning_rate": 1.4370035860718467e-06, "loss": 18.0096, "step": 41573 }, { "epoch": 0.7599393130677975, "grad_norm": 5.3359528271649, "learning_rate": 1.4367959191220948e-06, "loss": 16.8894, "step": 41574 }, { "epoch": 0.7599575922642441, "grad_norm": 7.014721531984044, "learning_rate": 1.4365882646610657e-06, "loss": 17.9856, "step": 41575 }, { "epoch": 0.7599758714606906, "grad_norm": 7.637347019259332, "learning_rate": 1.4363806226894833e-06, "loss": 18.0437, "step": 41576 }, { "epoch": 0.7599941506571372, "grad_norm": 5.759433414302041, "learning_rate": 1.4361729932080799e-06, "loss": 17.2086, "step": 41577 }, { "epoch": 0.7600124298535836, "grad_norm": 5.911767541752408, "learning_rate": 1.4359653762175791e-06, "loss": 17.3016, "step": 41578 }, { "epoch": 0.7600307090500301, "grad_norm": 8.727446510726171, "learning_rate": 1.4357577717187116e-06, "loss": 18.5556, "step": 41579 }, { "epoch": 0.7600489882464767, "grad_norm": 5.723381619577464, "learning_rate": 1.435550179712204e-06, "loss": 17.1117, "step": 41580 }, { "epoch": 0.7600672674429232, "grad_norm": 6.13211028544909, "learning_rate": 1.4353426001987825e-06, "loss": 17.1125, "step": 41581 }, { "epoch": 0.7600855466393698, "grad_norm": 5.651693116253471, "learning_rate": 1.4351350331791769e-06, "loss": 17.1197, "step": 41582 }, { "epoch": 0.7601038258358163, "grad_norm": 5.578902266828381, "learning_rate": 1.4349274786541123e-06, "loss": 17.1008, "step": 41583 }, { "epoch": 0.7601221050322627, "grad_norm": 5.403069473204531, "learning_rate": 1.434719936624318e-06, "loss": 17.0302, "step": 41584 }, { "epoch": 0.7601403842287093, "grad_norm": 8.759132018623633, "learning_rate": 1.434512407090522e-06, "loss": 18.0471, "step": 41585 }, { "epoch": 0.7601586634251558, "grad_norm": 7.8625154584094785, "learning_rate": 1.4343048900534507e-06, "loss": 17.916, "step": 41586 }, { "epoch": 0.7601769426216024, "grad_norm": 6.7468599275548735, "learning_rate": 1.4340973855138296e-06, "loss": 17.6861, "step": 41587 }, { "epoch": 0.7601952218180489, "grad_norm": 5.559930229684893, "learning_rate": 1.4338898934723894e-06, "loss": 16.8371, "step": 41588 }, { "epoch": 0.7602135010144954, "grad_norm": 6.088707998338222, "learning_rate": 1.433682413929856e-06, "loss": 17.2088, "step": 41589 }, { "epoch": 0.760231780210942, "grad_norm": 4.9407754813790925, "learning_rate": 1.4334749468869548e-06, "loss": 16.8855, "step": 41590 }, { "epoch": 0.7602500594073884, "grad_norm": 6.584480739230059, "learning_rate": 1.4332674923444152e-06, "loss": 17.3993, "step": 41591 }, { "epoch": 0.760268338603835, "grad_norm": 7.57398723350325, "learning_rate": 1.4330600503029623e-06, "loss": 18.0427, "step": 41592 }, { "epoch": 0.7602866178002815, "grad_norm": 6.506747694579266, "learning_rate": 1.4328526207633249e-06, "loss": 17.8063, "step": 41593 }, { "epoch": 0.760304896996728, "grad_norm": 6.992009543106562, "learning_rate": 1.4326452037262306e-06, "loss": 17.3907, "step": 41594 }, { "epoch": 0.7603231761931746, "grad_norm": 6.850200157998685, "learning_rate": 1.4324377991924038e-06, "loss": 17.9698, "step": 41595 }, { "epoch": 0.7603414553896211, "grad_norm": 6.066182635869703, "learning_rate": 1.4322304071625743e-06, "loss": 17.4712, "step": 41596 }, { "epoch": 0.7603597345860676, "grad_norm": 8.395159472248086, "learning_rate": 1.432023027637468e-06, "loss": 17.2021, "step": 41597 }, { "epoch": 0.7603780137825141, "grad_norm": 6.434356171825748, "learning_rate": 1.4318156606178096e-06, "loss": 17.6752, "step": 41598 }, { "epoch": 0.7603962929789606, "grad_norm": 6.435816295017718, "learning_rate": 1.4316083061043295e-06, "loss": 17.2479, "step": 41599 }, { "epoch": 0.7604145721754072, "grad_norm": 5.8208917298256715, "learning_rate": 1.4314009640977521e-06, "loss": 17.2779, "step": 41600 }, { "epoch": 0.7604328513718537, "grad_norm": 6.87895010307849, "learning_rate": 1.4311936345988037e-06, "loss": 17.3664, "step": 41601 }, { "epoch": 0.7604511305683003, "grad_norm": 6.6616458015848075, "learning_rate": 1.4309863176082118e-06, "loss": 17.4698, "step": 41602 }, { "epoch": 0.7604694097647468, "grad_norm": 6.588487046095402, "learning_rate": 1.4307790131267042e-06, "loss": 17.3892, "step": 41603 }, { "epoch": 0.7604876889611932, "grad_norm": 6.498477169518816, "learning_rate": 1.4305717211550053e-06, "loss": 17.2206, "step": 41604 }, { "epoch": 0.7605059681576398, "grad_norm": 5.898216074410629, "learning_rate": 1.4303644416938445e-06, "loss": 17.1855, "step": 41605 }, { "epoch": 0.7605242473540863, "grad_norm": 6.117466028026266, "learning_rate": 1.4301571747439462e-06, "loss": 17.1985, "step": 41606 }, { "epoch": 0.7605425265505329, "grad_norm": 4.759100629679807, "learning_rate": 1.4299499203060356e-06, "loss": 16.799, "step": 41607 }, { "epoch": 0.7605608057469794, "grad_norm": 6.131506263120235, "learning_rate": 1.4297426783808426e-06, "loss": 17.4323, "step": 41608 }, { "epoch": 0.7605790849434259, "grad_norm": 6.253143855157027, "learning_rate": 1.4295354489690905e-06, "loss": 17.465, "step": 41609 }, { "epoch": 0.7605973641398724, "grad_norm": 5.766924736309677, "learning_rate": 1.4293282320715073e-06, "loss": 17.0482, "step": 41610 }, { "epoch": 0.7606156433363189, "grad_norm": 6.34625186437796, "learning_rate": 1.4291210276888184e-06, "loss": 17.1175, "step": 41611 }, { "epoch": 0.7606339225327654, "grad_norm": 6.149128000835713, "learning_rate": 1.4289138358217497e-06, "loss": 17.2653, "step": 41612 }, { "epoch": 0.760652201729212, "grad_norm": 6.573602798997105, "learning_rate": 1.42870665647103e-06, "loss": 17.5535, "step": 41613 }, { "epoch": 0.7606704809256585, "grad_norm": 5.471592380875276, "learning_rate": 1.4284994896373833e-06, "loss": 16.9442, "step": 41614 }, { "epoch": 0.7606887601221051, "grad_norm": 6.0146026063920734, "learning_rate": 1.4282923353215345e-06, "loss": 17.193, "step": 41615 }, { "epoch": 0.7607070393185515, "grad_norm": 5.665489068644456, "learning_rate": 1.428085193524213e-06, "loss": 16.997, "step": 41616 }, { "epoch": 0.760725318514998, "grad_norm": 6.248776979099818, "learning_rate": 1.4278780642461427e-06, "loss": 17.3892, "step": 41617 }, { "epoch": 0.7607435977114446, "grad_norm": 6.171412966485502, "learning_rate": 1.427670947488048e-06, "loss": 17.3189, "step": 41618 }, { "epoch": 0.7607618769078911, "grad_norm": 6.482851265123971, "learning_rate": 1.4274638432506581e-06, "loss": 17.527, "step": 41619 }, { "epoch": 0.7607801561043377, "grad_norm": 6.115606774843775, "learning_rate": 1.4272567515346963e-06, "loss": 17.2493, "step": 41620 }, { "epoch": 0.7607984353007842, "grad_norm": 5.5610473756527545, "learning_rate": 1.42704967234089e-06, "loss": 17.0641, "step": 41621 }, { "epoch": 0.7608167144972307, "grad_norm": 5.782704474742524, "learning_rate": 1.4268426056699653e-06, "loss": 17.3127, "step": 41622 }, { "epoch": 0.7608349936936772, "grad_norm": 6.281673662919195, "learning_rate": 1.426635551522646e-06, "loss": 17.1568, "step": 41623 }, { "epoch": 0.7608532728901237, "grad_norm": 16.221771699855125, "learning_rate": 1.4264285098996606e-06, "loss": 17.1465, "step": 41624 }, { "epoch": 0.7608715520865703, "grad_norm": 6.063466244507768, "learning_rate": 1.4262214808017328e-06, "loss": 17.2708, "step": 41625 }, { "epoch": 0.7608898312830168, "grad_norm": 7.103877070386632, "learning_rate": 1.4260144642295876e-06, "loss": 17.6341, "step": 41626 }, { "epoch": 0.7609081104794633, "grad_norm": 6.300528373478826, "learning_rate": 1.4258074601839528e-06, "loss": 17.597, "step": 41627 }, { "epoch": 0.7609263896759099, "grad_norm": 6.975976707707055, "learning_rate": 1.4256004686655523e-06, "loss": 17.7036, "step": 41628 }, { "epoch": 0.7609446688723563, "grad_norm": 7.278795148556131, "learning_rate": 1.425393489675111e-06, "loss": 17.6767, "step": 41629 }, { "epoch": 0.7609629480688029, "grad_norm": 7.072803054982635, "learning_rate": 1.425186523213355e-06, "loss": 17.1165, "step": 41630 }, { "epoch": 0.7609812272652494, "grad_norm": 5.788785511137947, "learning_rate": 1.424979569281012e-06, "loss": 17.1689, "step": 41631 }, { "epoch": 0.7609995064616959, "grad_norm": 5.03827090527676, "learning_rate": 1.4247726278788033e-06, "loss": 16.8525, "step": 41632 }, { "epoch": 0.7610177856581425, "grad_norm": 6.11591192695755, "learning_rate": 1.424565699007458e-06, "loss": 17.0115, "step": 41633 }, { "epoch": 0.761036064854589, "grad_norm": 6.39392361274578, "learning_rate": 1.4243587826677002e-06, "loss": 17.3208, "step": 41634 }, { "epoch": 0.7610543440510356, "grad_norm": 5.875416847005884, "learning_rate": 1.4241518788602526e-06, "loss": 17.2321, "step": 41635 }, { "epoch": 0.761072623247482, "grad_norm": 7.258797123852449, "learning_rate": 1.4239449875858436e-06, "loss": 17.4075, "step": 41636 }, { "epoch": 0.7610909024439285, "grad_norm": 7.614315175465723, "learning_rate": 1.4237381088451963e-06, "loss": 17.4452, "step": 41637 }, { "epoch": 0.7611091816403751, "grad_norm": 6.6916680475842965, "learning_rate": 1.4235312426390375e-06, "loss": 17.6553, "step": 41638 }, { "epoch": 0.7611274608368216, "grad_norm": 5.381660401718164, "learning_rate": 1.4233243889680898e-06, "loss": 16.8855, "step": 41639 }, { "epoch": 0.7611457400332682, "grad_norm": 7.102214765412233, "learning_rate": 1.42311754783308e-06, "loss": 17.8513, "step": 41640 }, { "epoch": 0.7611640192297147, "grad_norm": 6.416841488108334, "learning_rate": 1.4229107192347346e-06, "loss": 17.4102, "step": 41641 }, { "epoch": 0.7611822984261611, "grad_norm": 5.905506262342959, "learning_rate": 1.4227039031737767e-06, "loss": 17.2739, "step": 41642 }, { "epoch": 0.7612005776226077, "grad_norm": 5.832897218627998, "learning_rate": 1.422497099650929e-06, "loss": 17.2143, "step": 41643 }, { "epoch": 0.7612188568190542, "grad_norm": 8.350142694571382, "learning_rate": 1.4222903086669204e-06, "loss": 17.5377, "step": 41644 }, { "epoch": 0.7612371360155008, "grad_norm": 5.933291850464374, "learning_rate": 1.422083530222474e-06, "loss": 17.2975, "step": 41645 }, { "epoch": 0.7612554152119473, "grad_norm": 7.435511894086961, "learning_rate": 1.421876764318313e-06, "loss": 17.9849, "step": 41646 }, { "epoch": 0.7612736944083938, "grad_norm": 7.049815250340143, "learning_rate": 1.421670010955163e-06, "loss": 17.4017, "step": 41647 }, { "epoch": 0.7612919736048404, "grad_norm": 6.902632863017402, "learning_rate": 1.4214632701337506e-06, "loss": 17.6959, "step": 41648 }, { "epoch": 0.7613102528012868, "grad_norm": 5.803623468259663, "learning_rate": 1.4212565418547975e-06, "loss": 17.1567, "step": 41649 }, { "epoch": 0.7613285319977334, "grad_norm": 5.683727110670982, "learning_rate": 1.4210498261190314e-06, "loss": 17.2221, "step": 41650 }, { "epoch": 0.7613468111941799, "grad_norm": 5.267667616912056, "learning_rate": 1.4208431229271747e-06, "loss": 16.9599, "step": 41651 }, { "epoch": 0.7613650903906264, "grad_norm": 6.816636611241001, "learning_rate": 1.420636432279951e-06, "loss": 17.6913, "step": 41652 }, { "epoch": 0.761383369587073, "grad_norm": 6.61207513274894, "learning_rate": 1.4204297541780875e-06, "loss": 17.5551, "step": 41653 }, { "epoch": 0.7614016487835195, "grad_norm": 6.856243063876629, "learning_rate": 1.4202230886223057e-06, "loss": 17.4725, "step": 41654 }, { "epoch": 0.761419927979966, "grad_norm": 6.529092422852067, "learning_rate": 1.4200164356133327e-06, "loss": 17.3108, "step": 41655 }, { "epoch": 0.7614382071764125, "grad_norm": 6.414149911932031, "learning_rate": 1.4198097951518902e-06, "loss": 17.4432, "step": 41656 }, { "epoch": 0.761456486372859, "grad_norm": 6.346111498029345, "learning_rate": 1.4196031672387035e-06, "loss": 17.4207, "step": 41657 }, { "epoch": 0.7614747655693056, "grad_norm": 6.1291536508432, "learning_rate": 1.4193965518744978e-06, "loss": 17.3134, "step": 41658 }, { "epoch": 0.7614930447657521, "grad_norm": 7.486831868413289, "learning_rate": 1.4191899490599974e-06, "loss": 17.3523, "step": 41659 }, { "epoch": 0.7615113239621987, "grad_norm": 5.396934507811556, "learning_rate": 1.418983358795924e-06, "loss": 16.991, "step": 41660 }, { "epoch": 0.7615296031586452, "grad_norm": 6.187566423668357, "learning_rate": 1.4187767810830044e-06, "loss": 17.4643, "step": 41661 }, { "epoch": 0.7615478823550916, "grad_norm": 6.297830101812542, "learning_rate": 1.4185702159219612e-06, "loss": 17.2977, "step": 41662 }, { "epoch": 0.7615661615515382, "grad_norm": 5.403881069904272, "learning_rate": 1.4183636633135172e-06, "loss": 16.8509, "step": 41663 }, { "epoch": 0.7615844407479847, "grad_norm": 7.154340552200929, "learning_rate": 1.4181571232583995e-06, "loss": 17.6913, "step": 41664 }, { "epoch": 0.7616027199444313, "grad_norm": 6.77706210555246, "learning_rate": 1.4179505957573286e-06, "loss": 17.7412, "step": 41665 }, { "epoch": 0.7616209991408778, "grad_norm": 5.995053927823283, "learning_rate": 1.4177440808110305e-06, "loss": 17.2559, "step": 41666 }, { "epoch": 0.7616392783373243, "grad_norm": 7.183158125888268, "learning_rate": 1.4175375784202295e-06, "loss": 17.637, "step": 41667 }, { "epoch": 0.7616575575337708, "grad_norm": 9.25791320860125, "learning_rate": 1.4173310885856484e-06, "loss": 18.3208, "step": 41668 }, { "epoch": 0.7616758367302173, "grad_norm": 6.957638667192773, "learning_rate": 1.4171246113080095e-06, "loss": 17.5777, "step": 41669 }, { "epoch": 0.7616941159266639, "grad_norm": 5.5570524392924225, "learning_rate": 1.41691814658804e-06, "loss": 17.2069, "step": 41670 }, { "epoch": 0.7617123951231104, "grad_norm": 5.085484897878768, "learning_rate": 1.4167116944264593e-06, "loss": 16.9907, "step": 41671 }, { "epoch": 0.7617306743195569, "grad_norm": 5.897370498722766, "learning_rate": 1.4165052548239948e-06, "loss": 17.0826, "step": 41672 }, { "epoch": 0.7617489535160035, "grad_norm": 5.55934163176003, "learning_rate": 1.4162988277813684e-06, "loss": 16.9773, "step": 41673 }, { "epoch": 0.76176723271245, "grad_norm": 7.319456840417554, "learning_rate": 1.416092413299302e-06, "loss": 17.1191, "step": 41674 }, { "epoch": 0.7617855119088965, "grad_norm": 4.563941947222461, "learning_rate": 1.415886011378521e-06, "loss": 16.668, "step": 41675 }, { "epoch": 0.761803791105343, "grad_norm": 5.797634673545144, "learning_rate": 1.4156796220197499e-06, "loss": 17.1924, "step": 41676 }, { "epoch": 0.7618220703017895, "grad_norm": 7.214439416925403, "learning_rate": 1.415473245223709e-06, "loss": 17.7243, "step": 41677 }, { "epoch": 0.7618403494982361, "grad_norm": 6.363902017612991, "learning_rate": 1.4152668809911253e-06, "loss": 16.919, "step": 41678 }, { "epoch": 0.7618586286946826, "grad_norm": 6.269119046565651, "learning_rate": 1.4150605293227199e-06, "loss": 17.0873, "step": 41679 }, { "epoch": 0.761876907891129, "grad_norm": 7.674387481182421, "learning_rate": 1.4148541902192147e-06, "loss": 17.6965, "step": 41680 }, { "epoch": 0.7618951870875756, "grad_norm": 5.493304053925745, "learning_rate": 1.414647863681336e-06, "loss": 17.0377, "step": 41681 }, { "epoch": 0.7619134662840221, "grad_norm": 6.242959273439039, "learning_rate": 1.4144415497098058e-06, "loss": 17.3073, "step": 41682 }, { "epoch": 0.7619317454804687, "grad_norm": 8.380165636183508, "learning_rate": 1.4142352483053445e-06, "loss": 17.5083, "step": 41683 }, { "epoch": 0.7619500246769152, "grad_norm": 4.931552752286349, "learning_rate": 1.414028959468678e-06, "loss": 16.9885, "step": 41684 }, { "epoch": 0.7619683038733617, "grad_norm": 6.899283683842903, "learning_rate": 1.4138226832005286e-06, "loss": 17.754, "step": 41685 }, { "epoch": 0.7619865830698083, "grad_norm": 5.277901735126748, "learning_rate": 1.413616419501621e-06, "loss": 16.8691, "step": 41686 }, { "epoch": 0.7620048622662547, "grad_norm": 6.165702096582567, "learning_rate": 1.413410168372677e-06, "loss": 17.0923, "step": 41687 }, { "epoch": 0.7620231414627013, "grad_norm": 6.126343717560773, "learning_rate": 1.4132039298144173e-06, "loss": 17.4323, "step": 41688 }, { "epoch": 0.7620414206591478, "grad_norm": 6.852128418638527, "learning_rate": 1.4129977038275682e-06, "loss": 17.8512, "step": 41689 }, { "epoch": 0.7620596998555943, "grad_norm": 5.184024195031386, "learning_rate": 1.412791490412851e-06, "loss": 16.8495, "step": 41690 }, { "epoch": 0.7620779790520409, "grad_norm": 6.154618371117608, "learning_rate": 1.4125852895709867e-06, "loss": 17.6315, "step": 41691 }, { "epoch": 0.7620962582484874, "grad_norm": 5.689908234587789, "learning_rate": 1.4123791013027011e-06, "loss": 17.1309, "step": 41692 }, { "epoch": 0.762114537444934, "grad_norm": 6.338676010373153, "learning_rate": 1.412172925608714e-06, "loss": 17.2488, "step": 41693 }, { "epoch": 0.7621328166413804, "grad_norm": 6.7544620107705615, "learning_rate": 1.4119667624897493e-06, "loss": 17.2274, "step": 41694 }, { "epoch": 0.7621510958378269, "grad_norm": 11.6547426329898, "learning_rate": 1.4117606119465315e-06, "loss": 18.0255, "step": 41695 }, { "epoch": 0.7621693750342735, "grad_norm": 4.975175811149941, "learning_rate": 1.4115544739797815e-06, "loss": 16.8892, "step": 41696 }, { "epoch": 0.76218765423072, "grad_norm": 4.8093866812314765, "learning_rate": 1.4113483485902195e-06, "loss": 16.8548, "step": 41697 }, { "epoch": 0.7622059334271666, "grad_norm": 6.9652835926465295, "learning_rate": 1.4111422357785725e-06, "loss": 17.811, "step": 41698 }, { "epoch": 0.7622242126236131, "grad_norm": 5.976492094746568, "learning_rate": 1.4109361355455597e-06, "loss": 17.2183, "step": 41699 }, { "epoch": 0.7622424918200595, "grad_norm": 7.855645927225885, "learning_rate": 1.4107300478919034e-06, "loss": 16.9351, "step": 41700 }, { "epoch": 0.7622607710165061, "grad_norm": 5.367132214129259, "learning_rate": 1.4105239728183268e-06, "loss": 17.1111, "step": 41701 }, { "epoch": 0.7622790502129526, "grad_norm": 5.763260871303276, "learning_rate": 1.4103179103255531e-06, "loss": 17.3546, "step": 41702 }, { "epoch": 0.7622973294093992, "grad_norm": 6.220465727102749, "learning_rate": 1.4101118604143028e-06, "loss": 17.2605, "step": 41703 }, { "epoch": 0.7623156086058457, "grad_norm": 6.031543305948414, "learning_rate": 1.4099058230853001e-06, "loss": 17.5197, "step": 41704 }, { "epoch": 0.7623338878022922, "grad_norm": 6.414985580233491, "learning_rate": 1.4096997983392646e-06, "loss": 17.3047, "step": 41705 }, { "epoch": 0.7623521669987388, "grad_norm": 7.449438882601654, "learning_rate": 1.4094937861769214e-06, "loss": 17.9538, "step": 41706 }, { "epoch": 0.7623704461951852, "grad_norm": 8.612528278410837, "learning_rate": 1.4092877865989907e-06, "loss": 17.5496, "step": 41707 }, { "epoch": 0.7623887253916318, "grad_norm": 6.073478307279159, "learning_rate": 1.4090817996061928e-06, "loss": 17.2846, "step": 41708 }, { "epoch": 0.7624070045880783, "grad_norm": 8.652204840424812, "learning_rate": 1.408875825199254e-06, "loss": 17.1269, "step": 41709 }, { "epoch": 0.7624252837845248, "grad_norm": 5.583818929607268, "learning_rate": 1.4086698633788914e-06, "loss": 16.8433, "step": 41710 }, { "epoch": 0.7624435629809714, "grad_norm": 5.606556188792289, "learning_rate": 1.40846391414583e-06, "loss": 16.9783, "step": 41711 }, { "epoch": 0.7624618421774179, "grad_norm": 6.612399634938421, "learning_rate": 1.408257977500792e-06, "loss": 17.4413, "step": 41712 }, { "epoch": 0.7624801213738645, "grad_norm": 5.58847128316083, "learning_rate": 1.408052053444498e-06, "loss": 17.1037, "step": 41713 }, { "epoch": 0.7624984005703109, "grad_norm": 8.417407516309016, "learning_rate": 1.407846141977669e-06, "loss": 18.3157, "step": 41714 }, { "epoch": 0.7625166797667574, "grad_norm": 6.204136130660591, "learning_rate": 1.4076402431010282e-06, "loss": 17.3502, "step": 41715 }, { "epoch": 0.762534958963204, "grad_norm": 6.894670885049531, "learning_rate": 1.4074343568152959e-06, "loss": 17.7075, "step": 41716 }, { "epoch": 0.7625532381596505, "grad_norm": 6.6138412445381425, "learning_rate": 1.4072284831211957e-06, "loss": 17.1231, "step": 41717 }, { "epoch": 0.7625715173560971, "grad_norm": 5.906590522662524, "learning_rate": 1.4070226220194478e-06, "loss": 17.2563, "step": 41718 }, { "epoch": 0.7625897965525436, "grad_norm": 5.934987119616877, "learning_rate": 1.4068167735107719e-06, "loss": 17.4103, "step": 41719 }, { "epoch": 0.76260807574899, "grad_norm": 6.644282263399449, "learning_rate": 1.4066109375958919e-06, "loss": 17.6409, "step": 41720 }, { "epoch": 0.7626263549454366, "grad_norm": 7.3323214619196015, "learning_rate": 1.4064051142755302e-06, "loss": 17.7519, "step": 41721 }, { "epoch": 0.7626446341418831, "grad_norm": 5.376601240807651, "learning_rate": 1.406199303550405e-06, "loss": 17.1529, "step": 41722 }, { "epoch": 0.7626629133383297, "grad_norm": 6.010735980820908, "learning_rate": 1.4059935054212416e-06, "loss": 17.3978, "step": 41723 }, { "epoch": 0.7626811925347762, "grad_norm": 8.556252119461536, "learning_rate": 1.4057877198887582e-06, "loss": 18.4965, "step": 41724 }, { "epoch": 0.7626994717312227, "grad_norm": 6.353382293924019, "learning_rate": 1.4055819469536758e-06, "loss": 17.5778, "step": 41725 }, { "epoch": 0.7627177509276692, "grad_norm": 6.794145925397877, "learning_rate": 1.4053761866167187e-06, "loss": 17.8307, "step": 41726 }, { "epoch": 0.7627360301241157, "grad_norm": 4.949370315484836, "learning_rate": 1.405170438878606e-06, "loss": 16.8193, "step": 41727 }, { "epoch": 0.7627543093205623, "grad_norm": 8.932301688744326, "learning_rate": 1.4049647037400566e-06, "loss": 18.1828, "step": 41728 }, { "epoch": 0.7627725885170088, "grad_norm": 7.780045647831737, "learning_rate": 1.4047589812017948e-06, "loss": 18.3589, "step": 41729 }, { "epoch": 0.7627908677134553, "grad_norm": 7.862382459232211, "learning_rate": 1.4045532712645405e-06, "loss": 17.5872, "step": 41730 }, { "epoch": 0.7628091469099019, "grad_norm": 5.237367337183233, "learning_rate": 1.4043475739290163e-06, "loss": 16.8644, "step": 41731 }, { "epoch": 0.7628274261063483, "grad_norm": 6.223417579445658, "learning_rate": 1.4041418891959425e-06, "loss": 17.2554, "step": 41732 }, { "epoch": 0.7628457053027949, "grad_norm": 5.805906485912206, "learning_rate": 1.403936217066037e-06, "loss": 17.4707, "step": 41733 }, { "epoch": 0.7628639844992414, "grad_norm": 6.196069602702204, "learning_rate": 1.403730557540025e-06, "loss": 17.0935, "step": 41734 }, { "epoch": 0.7628822636956879, "grad_norm": 5.5739870079570855, "learning_rate": 1.4035249106186256e-06, "loss": 17.2677, "step": 41735 }, { "epoch": 0.7629005428921345, "grad_norm": 6.822782758639999, "learning_rate": 1.4033192763025572e-06, "loss": 17.7776, "step": 41736 }, { "epoch": 0.762918822088581, "grad_norm": 4.969666774805931, "learning_rate": 1.4031136545925444e-06, "loss": 16.8689, "step": 41737 }, { "epoch": 0.7629371012850276, "grad_norm": 5.742692470215541, "learning_rate": 1.4029080454893047e-06, "loss": 16.8675, "step": 41738 }, { "epoch": 0.762955380481474, "grad_norm": 5.660573315837618, "learning_rate": 1.4027024489935598e-06, "loss": 17.364, "step": 41739 }, { "epoch": 0.7629736596779205, "grad_norm": 6.89137686916735, "learning_rate": 1.4024968651060323e-06, "loss": 17.4439, "step": 41740 }, { "epoch": 0.7629919388743671, "grad_norm": 6.438698728996518, "learning_rate": 1.4022912938274413e-06, "loss": 17.5868, "step": 41741 }, { "epoch": 0.7630102180708136, "grad_norm": 7.443245761802537, "learning_rate": 1.4020857351585054e-06, "loss": 17.8053, "step": 41742 }, { "epoch": 0.7630284972672602, "grad_norm": 4.992490534631181, "learning_rate": 1.401880189099949e-06, "loss": 16.9594, "step": 41743 }, { "epoch": 0.7630467764637067, "grad_norm": 6.925207969570683, "learning_rate": 1.4016746556524895e-06, "loss": 17.4211, "step": 41744 }, { "epoch": 0.7630650556601531, "grad_norm": 5.686126863800216, "learning_rate": 1.401469134816847e-06, "loss": 17.1183, "step": 41745 }, { "epoch": 0.7630833348565997, "grad_norm": 6.669236966353313, "learning_rate": 1.401263626593744e-06, "loss": 17.0926, "step": 41746 }, { "epoch": 0.7631016140530462, "grad_norm": 6.452937285200859, "learning_rate": 1.4010581309838984e-06, "loss": 17.3072, "step": 41747 }, { "epoch": 0.7631198932494927, "grad_norm": 7.863004752015728, "learning_rate": 1.4008526479880318e-06, "loss": 17.924, "step": 41748 }, { "epoch": 0.7631381724459393, "grad_norm": 4.9753377813564, "learning_rate": 1.4006471776068658e-06, "loss": 16.9311, "step": 41749 }, { "epoch": 0.7631564516423858, "grad_norm": 5.205412657962385, "learning_rate": 1.4004417198411175e-06, "loss": 16.932, "step": 41750 }, { "epoch": 0.7631747308388324, "grad_norm": 4.638621531777064, "learning_rate": 1.4002362746915105e-06, "loss": 16.7971, "step": 41751 }, { "epoch": 0.7631930100352788, "grad_norm": 6.188701968933416, "learning_rate": 1.4000308421587628e-06, "loss": 17.7134, "step": 41752 }, { "epoch": 0.7632112892317253, "grad_norm": 6.477493090003081, "learning_rate": 1.3998254222435931e-06, "loss": 17.3731, "step": 41753 }, { "epoch": 0.7632295684281719, "grad_norm": 5.59042608533953, "learning_rate": 1.3996200149467244e-06, "loss": 17.1996, "step": 41754 }, { "epoch": 0.7632478476246184, "grad_norm": 5.5971213555051875, "learning_rate": 1.3994146202688747e-06, "loss": 17.0899, "step": 41755 }, { "epoch": 0.763266126821065, "grad_norm": 6.640212922432265, "learning_rate": 1.3992092382107631e-06, "loss": 17.1527, "step": 41756 }, { "epoch": 0.7632844060175115, "grad_norm": 7.101577937188284, "learning_rate": 1.399003868773111e-06, "loss": 17.6861, "step": 41757 }, { "epoch": 0.7633026852139579, "grad_norm": 6.266801175203062, "learning_rate": 1.3987985119566389e-06, "loss": 17.337, "step": 41758 }, { "epoch": 0.7633209644104045, "grad_norm": 6.711432486406279, "learning_rate": 1.3985931677620645e-06, "loss": 17.3268, "step": 41759 }, { "epoch": 0.763339243606851, "grad_norm": 6.827692945341622, "learning_rate": 1.3983878361901093e-06, "loss": 17.5187, "step": 41760 }, { "epoch": 0.7633575228032976, "grad_norm": 8.962616316307935, "learning_rate": 1.3981825172414914e-06, "loss": 18.6584, "step": 41761 }, { "epoch": 0.7633758019997441, "grad_norm": 6.752479333865699, "learning_rate": 1.3979772109169326e-06, "loss": 17.2949, "step": 41762 }, { "epoch": 0.7633940811961906, "grad_norm": 6.689253842170711, "learning_rate": 1.3977719172171505e-06, "loss": 17.5082, "step": 41763 }, { "epoch": 0.7634123603926372, "grad_norm": 7.85673790733498, "learning_rate": 1.3975666361428647e-06, "loss": 17.626, "step": 41764 }, { "epoch": 0.7634306395890836, "grad_norm": 7.56831662007924, "learning_rate": 1.3973613676947945e-06, "loss": 17.6979, "step": 41765 }, { "epoch": 0.7634489187855302, "grad_norm": 6.494382611532064, "learning_rate": 1.397156111873662e-06, "loss": 17.284, "step": 41766 }, { "epoch": 0.7634671979819767, "grad_norm": 7.263837932318537, "learning_rate": 1.3969508686801831e-06, "loss": 17.8156, "step": 41767 }, { "epoch": 0.7634854771784232, "grad_norm": 5.678553567352026, "learning_rate": 1.39674563811508e-06, "loss": 17.0317, "step": 41768 }, { "epoch": 0.7635037563748698, "grad_norm": 6.390814341575451, "learning_rate": 1.396540420179071e-06, "loss": 17.4912, "step": 41769 }, { "epoch": 0.7635220355713163, "grad_norm": 5.479049761630388, "learning_rate": 1.3963352148728737e-06, "loss": 17.0603, "step": 41770 }, { "epoch": 0.7635403147677629, "grad_norm": 6.606461941246743, "learning_rate": 1.3961300221972102e-06, "loss": 17.2588, "step": 41771 }, { "epoch": 0.7635585939642093, "grad_norm": 7.069587545560554, "learning_rate": 1.3959248421527977e-06, "loss": 17.3943, "step": 41772 }, { "epoch": 0.7635768731606558, "grad_norm": 7.105479547726761, "learning_rate": 1.3957196747403551e-06, "loss": 17.5353, "step": 41773 }, { "epoch": 0.7635951523571024, "grad_norm": 8.587185027967116, "learning_rate": 1.395514519960602e-06, "loss": 18.2868, "step": 41774 }, { "epoch": 0.7636134315535489, "grad_norm": 6.156865563792201, "learning_rate": 1.395309377814259e-06, "loss": 17.3953, "step": 41775 }, { "epoch": 0.7636317107499955, "grad_norm": 7.698252749855613, "learning_rate": 1.3951042483020428e-06, "loss": 17.4692, "step": 41776 }, { "epoch": 0.763649989946442, "grad_norm": 6.139407290271163, "learning_rate": 1.3948991314246747e-06, "loss": 17.3134, "step": 41777 }, { "epoch": 0.7636682691428884, "grad_norm": 6.359001983962007, "learning_rate": 1.3946940271828708e-06, "loss": 17.4756, "step": 41778 }, { "epoch": 0.763686548339335, "grad_norm": 7.171811767486174, "learning_rate": 1.3944889355773528e-06, "loss": 17.4833, "step": 41779 }, { "epoch": 0.7637048275357815, "grad_norm": 6.427995117427511, "learning_rate": 1.3942838566088384e-06, "loss": 17.3013, "step": 41780 }, { "epoch": 0.7637231067322281, "grad_norm": 8.085836542932528, "learning_rate": 1.3940787902780446e-06, "loss": 18.0421, "step": 41781 }, { "epoch": 0.7637413859286746, "grad_norm": 5.951000362247378, "learning_rate": 1.3938737365856936e-06, "loss": 17.4283, "step": 41782 }, { "epoch": 0.7637596651251211, "grad_norm": 7.115468479217678, "learning_rate": 1.3936686955325001e-06, "loss": 17.6569, "step": 41783 }, { "epoch": 0.7637779443215676, "grad_norm": 5.857555988134055, "learning_rate": 1.3934636671191853e-06, "loss": 17.307, "step": 41784 }, { "epoch": 0.7637962235180141, "grad_norm": 6.00513303868985, "learning_rate": 1.3932586513464686e-06, "loss": 17.6273, "step": 41785 }, { "epoch": 0.7638145027144607, "grad_norm": 6.271343611987616, "learning_rate": 1.3930536482150674e-06, "loss": 17.3607, "step": 41786 }, { "epoch": 0.7638327819109072, "grad_norm": 6.346868526667409, "learning_rate": 1.392848657725699e-06, "loss": 17.3023, "step": 41787 }, { "epoch": 0.7638510611073537, "grad_norm": 5.396206731040678, "learning_rate": 1.3926436798790844e-06, "loss": 16.9467, "step": 41788 }, { "epoch": 0.7638693403038003, "grad_norm": 6.485181782711377, "learning_rate": 1.3924387146759406e-06, "loss": 17.6187, "step": 41789 }, { "epoch": 0.7638876195002468, "grad_norm": 7.97774308577819, "learning_rate": 1.3922337621169846e-06, "loss": 17.4068, "step": 41790 }, { "epoch": 0.7639058986966933, "grad_norm": 7.159042083014935, "learning_rate": 1.3920288222029376e-06, "loss": 17.3654, "step": 41791 }, { "epoch": 0.7639241778931398, "grad_norm": 5.663044537710844, "learning_rate": 1.3918238949345154e-06, "loss": 17.0653, "step": 41792 }, { "epoch": 0.7639424570895863, "grad_norm": 5.884624023706784, "learning_rate": 1.3916189803124363e-06, "loss": 16.9164, "step": 41793 }, { "epoch": 0.7639607362860329, "grad_norm": 6.860849058902231, "learning_rate": 1.3914140783374219e-06, "loss": 17.3066, "step": 41794 }, { "epoch": 0.7639790154824794, "grad_norm": 11.785812745317195, "learning_rate": 1.3912091890101865e-06, "loss": 18.546, "step": 41795 }, { "epoch": 0.763997294678926, "grad_norm": 5.77936545173068, "learning_rate": 1.3910043123314508e-06, "loss": 17.4759, "step": 41796 }, { "epoch": 0.7640155738753724, "grad_norm": 6.3202280871577425, "learning_rate": 1.3907994483019327e-06, "loss": 17.4904, "step": 41797 }, { "epoch": 0.7640338530718189, "grad_norm": 8.109222456448263, "learning_rate": 1.3905945969223473e-06, "loss": 18.0708, "step": 41798 }, { "epoch": 0.7640521322682655, "grad_norm": 5.87177447847915, "learning_rate": 1.3903897581934155e-06, "loss": 17.275, "step": 41799 }, { "epoch": 0.764070411464712, "grad_norm": 6.020105477010033, "learning_rate": 1.390184932115855e-06, "loss": 17.3804, "step": 41800 }, { "epoch": 0.7640886906611586, "grad_norm": 6.055583195377228, "learning_rate": 1.3899801186903817e-06, "loss": 16.8907, "step": 41801 }, { "epoch": 0.7641069698576051, "grad_norm": 8.373184470233369, "learning_rate": 1.3897753179177148e-06, "loss": 17.8099, "step": 41802 }, { "epoch": 0.7641252490540515, "grad_norm": 6.352624390535125, "learning_rate": 1.3895705297985735e-06, "loss": 17.321, "step": 41803 }, { "epoch": 0.7641435282504981, "grad_norm": 6.28910956062541, "learning_rate": 1.3893657543336724e-06, "loss": 17.4181, "step": 41804 }, { "epoch": 0.7641618074469446, "grad_norm": 5.893115758995605, "learning_rate": 1.389160991523733e-06, "loss": 17.2956, "step": 41805 }, { "epoch": 0.7641800866433912, "grad_norm": 6.078433928636506, "learning_rate": 1.3889562413694706e-06, "loss": 17.4101, "step": 41806 }, { "epoch": 0.7641983658398377, "grad_norm": 7.005239643866799, "learning_rate": 1.388751503871602e-06, "loss": 17.6261, "step": 41807 }, { "epoch": 0.7642166450362842, "grad_norm": 6.166453538459703, "learning_rate": 1.3885467790308478e-06, "loss": 17.3285, "step": 41808 }, { "epoch": 0.7642349242327308, "grad_norm": 6.532591200581601, "learning_rate": 1.3883420668479213e-06, "loss": 17.4588, "step": 41809 }, { "epoch": 0.7642532034291772, "grad_norm": 8.325153238568882, "learning_rate": 1.3881373673235448e-06, "loss": 17.8874, "step": 41810 }, { "epoch": 0.7642714826256238, "grad_norm": 5.936010212968221, "learning_rate": 1.3879326804584314e-06, "loss": 17.278, "step": 41811 }, { "epoch": 0.7642897618220703, "grad_norm": 6.578258355343499, "learning_rate": 1.3877280062533006e-06, "loss": 17.2421, "step": 41812 }, { "epoch": 0.7643080410185168, "grad_norm": 6.6146110325600915, "learning_rate": 1.3875233447088715e-06, "loss": 17.3745, "step": 41813 }, { "epoch": 0.7643263202149634, "grad_norm": 5.389763727740426, "learning_rate": 1.3873186958258594e-06, "loss": 16.9136, "step": 41814 }, { "epoch": 0.7643445994114099, "grad_norm": 6.634378731582815, "learning_rate": 1.3871140596049803e-06, "loss": 17.1914, "step": 41815 }, { "epoch": 0.7643628786078563, "grad_norm": 5.183420575431177, "learning_rate": 1.3869094360469544e-06, "loss": 16.9255, "step": 41816 }, { "epoch": 0.7643811578043029, "grad_norm": 5.709296241698353, "learning_rate": 1.3867048251524973e-06, "loss": 17.2807, "step": 41817 }, { "epoch": 0.7643994370007494, "grad_norm": 5.255948717849436, "learning_rate": 1.3865002269223248e-06, "loss": 16.907, "step": 41818 }, { "epoch": 0.764417716197196, "grad_norm": 5.925134567330198, "learning_rate": 1.386295641357157e-06, "loss": 17.2604, "step": 41819 }, { "epoch": 0.7644359953936425, "grad_norm": 6.195227639594102, "learning_rate": 1.3860910684577078e-06, "loss": 17.0993, "step": 41820 }, { "epoch": 0.764454274590089, "grad_norm": 5.317029956151144, "learning_rate": 1.385886508224696e-06, "loss": 17.0008, "step": 41821 }, { "epoch": 0.7644725537865356, "grad_norm": 7.143124042989755, "learning_rate": 1.3856819606588395e-06, "loss": 17.0882, "step": 41822 }, { "epoch": 0.764490832982982, "grad_norm": 10.20818392881844, "learning_rate": 1.3854774257608527e-06, "loss": 17.4029, "step": 41823 }, { "epoch": 0.7645091121794286, "grad_norm": 6.309734045193716, "learning_rate": 1.385272903531456e-06, "loss": 17.2516, "step": 41824 }, { "epoch": 0.7645273913758751, "grad_norm": 7.220291177278868, "learning_rate": 1.3850683939713633e-06, "loss": 17.6512, "step": 41825 }, { "epoch": 0.7645456705723216, "grad_norm": 6.651149286614988, "learning_rate": 1.384863897081291e-06, "loss": 16.9874, "step": 41826 }, { "epoch": 0.7645639497687682, "grad_norm": 7.560452044757871, "learning_rate": 1.3846594128619583e-06, "loss": 18.0263, "step": 41827 }, { "epoch": 0.7645822289652147, "grad_norm": 6.078247527906512, "learning_rate": 1.3844549413140796e-06, "loss": 17.3937, "step": 41828 }, { "epoch": 0.7646005081616613, "grad_norm": 6.736544940856568, "learning_rate": 1.384250482438374e-06, "loss": 17.7874, "step": 41829 }, { "epoch": 0.7646187873581077, "grad_norm": 5.913999233611531, "learning_rate": 1.3840460362355551e-06, "loss": 17.5061, "step": 41830 }, { "epoch": 0.7646370665545542, "grad_norm": 5.475733835369408, "learning_rate": 1.3838416027063422e-06, "loss": 16.946, "step": 41831 }, { "epoch": 0.7646553457510008, "grad_norm": 6.6994140848987715, "learning_rate": 1.3836371818514499e-06, "loss": 17.5105, "step": 41832 }, { "epoch": 0.7646736249474473, "grad_norm": 6.005547541565373, "learning_rate": 1.3834327736715963e-06, "loss": 17.1976, "step": 41833 }, { "epoch": 0.7646919041438939, "grad_norm": 6.3543532796982936, "learning_rate": 1.3832283781674971e-06, "loss": 17.4647, "step": 41834 }, { "epoch": 0.7647101833403404, "grad_norm": 5.095369489011691, "learning_rate": 1.3830239953398671e-06, "loss": 16.7392, "step": 41835 }, { "epoch": 0.7647284625367868, "grad_norm": 7.117217191440115, "learning_rate": 1.382819625189426e-06, "loss": 17.3831, "step": 41836 }, { "epoch": 0.7647467417332334, "grad_norm": 6.949997440167756, "learning_rate": 1.3826152677168865e-06, "loss": 17.4627, "step": 41837 }, { "epoch": 0.7647650209296799, "grad_norm": 5.4287087057182815, "learning_rate": 1.3824109229229666e-06, "loss": 17.1417, "step": 41838 }, { "epoch": 0.7647833001261265, "grad_norm": 6.033003438041226, "learning_rate": 1.3822065908083842e-06, "loss": 17.171, "step": 41839 }, { "epoch": 0.764801579322573, "grad_norm": 6.344903217681759, "learning_rate": 1.3820022713738517e-06, "loss": 17.2617, "step": 41840 }, { "epoch": 0.7648198585190195, "grad_norm": 7.295756927336002, "learning_rate": 1.3817979646200892e-06, "loss": 17.737, "step": 41841 }, { "epoch": 0.764838137715466, "grad_norm": 5.520509522529624, "learning_rate": 1.381593670547811e-06, "loss": 17.1768, "step": 41842 }, { "epoch": 0.7648564169119125, "grad_norm": 6.7263653303011735, "learning_rate": 1.3813893891577313e-06, "loss": 17.1926, "step": 41843 }, { "epoch": 0.7648746961083591, "grad_norm": 8.52585974557152, "learning_rate": 1.3811851204505688e-06, "loss": 18.0987, "step": 41844 }, { "epoch": 0.7648929753048056, "grad_norm": 6.327993948175645, "learning_rate": 1.3809808644270395e-06, "loss": 17.4923, "step": 41845 }, { "epoch": 0.7649112545012521, "grad_norm": 6.560821700848105, "learning_rate": 1.380776621087856e-06, "loss": 17.094, "step": 41846 }, { "epoch": 0.7649295336976987, "grad_norm": 5.271477728007369, "learning_rate": 1.3805723904337365e-06, "loss": 16.9593, "step": 41847 }, { "epoch": 0.7649478128941452, "grad_norm": 6.123983256942975, "learning_rate": 1.3803681724653977e-06, "loss": 17.0541, "step": 41848 }, { "epoch": 0.7649660920905917, "grad_norm": 6.0135257528069745, "learning_rate": 1.3801639671835537e-06, "loss": 17.4488, "step": 41849 }, { "epoch": 0.7649843712870382, "grad_norm": 5.351824947560898, "learning_rate": 1.379959774588922e-06, "loss": 17.0869, "step": 41850 }, { "epoch": 0.7650026504834847, "grad_norm": 5.5405704532393845, "learning_rate": 1.3797555946822172e-06, "loss": 16.9879, "step": 41851 }, { "epoch": 0.7650209296799313, "grad_norm": 7.286613013883919, "learning_rate": 1.379551427464153e-06, "loss": 17.7828, "step": 41852 }, { "epoch": 0.7650392088763778, "grad_norm": 7.665799850174939, "learning_rate": 1.379347272935449e-06, "loss": 17.5816, "step": 41853 }, { "epoch": 0.7650574880728244, "grad_norm": 5.4870098848376685, "learning_rate": 1.3791431310968168e-06, "loss": 17.1586, "step": 41854 }, { "epoch": 0.7650757672692708, "grad_norm": 5.820368770938071, "learning_rate": 1.3789390019489757e-06, "loss": 17.2393, "step": 41855 }, { "epoch": 0.7650940464657173, "grad_norm": 9.11363117426196, "learning_rate": 1.378734885492637e-06, "loss": 18.1523, "step": 41856 }, { "epoch": 0.7651123256621639, "grad_norm": 5.321245828629849, "learning_rate": 1.3785307817285188e-06, "loss": 17.1382, "step": 41857 }, { "epoch": 0.7651306048586104, "grad_norm": 6.845552510476548, "learning_rate": 1.3783266906573368e-06, "loss": 17.8333, "step": 41858 }, { "epoch": 0.765148884055057, "grad_norm": 5.868728467395947, "learning_rate": 1.3781226122798057e-06, "loss": 17.1356, "step": 41859 }, { "epoch": 0.7651671632515035, "grad_norm": 5.857044062242052, "learning_rate": 1.3779185465966393e-06, "loss": 16.8974, "step": 41860 }, { "epoch": 0.76518544244795, "grad_norm": 6.55335954039844, "learning_rate": 1.3777144936085556e-06, "loss": 17.3619, "step": 41861 }, { "epoch": 0.7652037216443965, "grad_norm": 6.122230295833639, "learning_rate": 1.3775104533162686e-06, "loss": 17.3196, "step": 41862 }, { "epoch": 0.765222000840843, "grad_norm": 5.937894030156134, "learning_rate": 1.377306425720491e-06, "loss": 17.1294, "step": 41863 }, { "epoch": 0.7652402800372896, "grad_norm": 6.436366472496192, "learning_rate": 1.3771024108219417e-06, "loss": 17.4925, "step": 41864 }, { "epoch": 0.7652585592337361, "grad_norm": 6.84736672571146, "learning_rate": 1.3768984086213328e-06, "loss": 17.4735, "step": 41865 }, { "epoch": 0.7652768384301826, "grad_norm": 6.112313945360601, "learning_rate": 1.3766944191193805e-06, "loss": 17.1845, "step": 41866 }, { "epoch": 0.7652951176266292, "grad_norm": 7.305608285787388, "learning_rate": 1.3764904423168008e-06, "loss": 17.3619, "step": 41867 }, { "epoch": 0.7653133968230756, "grad_norm": 5.715912461377334, "learning_rate": 1.3762864782143082e-06, "loss": 17.1643, "step": 41868 }, { "epoch": 0.7653316760195222, "grad_norm": 6.52961655958731, "learning_rate": 1.3760825268126154e-06, "loss": 17.5786, "step": 41869 }, { "epoch": 0.7653499552159687, "grad_norm": 7.198411180994159, "learning_rate": 1.3758785881124403e-06, "loss": 17.9038, "step": 41870 }, { "epoch": 0.7653682344124152, "grad_norm": 6.841115860309808, "learning_rate": 1.3756746621144951e-06, "loss": 17.4018, "step": 41871 }, { "epoch": 0.7653865136088618, "grad_norm": 5.586999543682789, "learning_rate": 1.3754707488194967e-06, "loss": 17.0055, "step": 41872 }, { "epoch": 0.7654047928053083, "grad_norm": 6.331791105894496, "learning_rate": 1.3752668482281596e-06, "loss": 17.2279, "step": 41873 }, { "epoch": 0.7654230720017549, "grad_norm": 6.595295703580698, "learning_rate": 1.3750629603411953e-06, "loss": 17.4128, "step": 41874 }, { "epoch": 0.7654413511982013, "grad_norm": 7.845336001004158, "learning_rate": 1.3748590851593213e-06, "loss": 17.7243, "step": 41875 }, { "epoch": 0.7654596303946478, "grad_norm": 6.078974419449888, "learning_rate": 1.3746552226832527e-06, "loss": 17.4347, "step": 41876 }, { "epoch": 0.7654779095910944, "grad_norm": 7.4856681190992065, "learning_rate": 1.3744513729137016e-06, "loss": 17.4947, "step": 41877 }, { "epoch": 0.7654961887875409, "grad_norm": 7.208956265755491, "learning_rate": 1.3742475358513857e-06, "loss": 17.887, "step": 41878 }, { "epoch": 0.7655144679839875, "grad_norm": 7.1521183457018305, "learning_rate": 1.3740437114970173e-06, "loss": 17.5389, "step": 41879 }, { "epoch": 0.765532747180434, "grad_norm": 6.8465778045060395, "learning_rate": 1.373839899851309e-06, "loss": 17.6399, "step": 41880 }, { "epoch": 0.7655510263768804, "grad_norm": 5.66130643454497, "learning_rate": 1.3736361009149795e-06, "loss": 16.9862, "step": 41881 }, { "epoch": 0.765569305573327, "grad_norm": 6.274067372263925, "learning_rate": 1.3734323146887407e-06, "loss": 17.2198, "step": 41882 }, { "epoch": 0.7655875847697735, "grad_norm": 6.042807106095455, "learning_rate": 1.3732285411733048e-06, "loss": 17.238, "step": 41883 }, { "epoch": 0.76560586396622, "grad_norm": 6.6859678202465975, "learning_rate": 1.3730247803693887e-06, "loss": 17.5306, "step": 41884 }, { "epoch": 0.7656241431626666, "grad_norm": 5.539132915284842, "learning_rate": 1.3728210322777058e-06, "loss": 17.2176, "step": 41885 }, { "epoch": 0.7656424223591131, "grad_norm": 5.135533926786113, "learning_rate": 1.372617296898972e-06, "loss": 16.8604, "step": 41886 }, { "epoch": 0.7656607015555597, "grad_norm": 5.736464732882174, "learning_rate": 1.3724135742339002e-06, "loss": 17.1936, "step": 41887 }, { "epoch": 0.7656789807520061, "grad_norm": 5.61237269208288, "learning_rate": 1.372209864283202e-06, "loss": 17.1337, "step": 41888 }, { "epoch": 0.7656972599484526, "grad_norm": 5.349260947267062, "learning_rate": 1.3720061670475948e-06, "loss": 16.808, "step": 41889 }, { "epoch": 0.7657155391448992, "grad_norm": 7.727671518563607, "learning_rate": 1.3718024825277916e-06, "loss": 17.8174, "step": 41890 }, { "epoch": 0.7657338183413457, "grad_norm": 6.876437168134676, "learning_rate": 1.3715988107245044e-06, "loss": 18.0712, "step": 41891 }, { "epoch": 0.7657520975377923, "grad_norm": 6.216041737181981, "learning_rate": 1.3713951516384483e-06, "loss": 17.4932, "step": 41892 }, { "epoch": 0.7657703767342388, "grad_norm": 6.0791614214786005, "learning_rate": 1.3711915052703385e-06, "loss": 17.3043, "step": 41893 }, { "epoch": 0.7657886559306852, "grad_norm": 7.309281231591713, "learning_rate": 1.3709878716208864e-06, "loss": 17.8896, "step": 41894 }, { "epoch": 0.7658069351271318, "grad_norm": 6.832723005839898, "learning_rate": 1.3707842506908086e-06, "loss": 16.8502, "step": 41895 }, { "epoch": 0.7658252143235783, "grad_norm": 6.191045792573291, "learning_rate": 1.3705806424808165e-06, "loss": 17.558, "step": 41896 }, { "epoch": 0.7658434935200249, "grad_norm": 7.369221246941687, "learning_rate": 1.3703770469916234e-06, "loss": 17.8636, "step": 41897 }, { "epoch": 0.7658617727164714, "grad_norm": 6.109368188304827, "learning_rate": 1.370173464223945e-06, "loss": 17.1507, "step": 41898 }, { "epoch": 0.7658800519129179, "grad_norm": 7.284820930005996, "learning_rate": 1.3699698941784921e-06, "loss": 17.9122, "step": 41899 }, { "epoch": 0.7658983311093644, "grad_norm": 7.047339129175666, "learning_rate": 1.3697663368559815e-06, "loss": 17.5687, "step": 41900 }, { "epoch": 0.7659166103058109, "grad_norm": 6.447209158560328, "learning_rate": 1.369562792257123e-06, "loss": 17.1731, "step": 41901 }, { "epoch": 0.7659348895022575, "grad_norm": 6.582180170706818, "learning_rate": 1.3693592603826323e-06, "loss": 17.403, "step": 41902 }, { "epoch": 0.765953168698704, "grad_norm": 5.912641456680098, "learning_rate": 1.3691557412332236e-06, "loss": 17.4455, "step": 41903 }, { "epoch": 0.7659714478951505, "grad_norm": 7.036126793286075, "learning_rate": 1.368952234809609e-06, "loss": 17.2159, "step": 41904 }, { "epoch": 0.7659897270915971, "grad_norm": 4.9308924647007855, "learning_rate": 1.3687487411125e-06, "loss": 16.9055, "step": 41905 }, { "epoch": 0.7660080062880436, "grad_norm": 6.612062461668433, "learning_rate": 1.368545260142613e-06, "loss": 17.6277, "step": 41906 }, { "epoch": 0.7660262854844901, "grad_norm": 5.287860471799518, "learning_rate": 1.3683417919006603e-06, "loss": 16.9418, "step": 41907 }, { "epoch": 0.7660445646809366, "grad_norm": 6.187268784207686, "learning_rate": 1.3681383363873524e-06, "loss": 17.315, "step": 41908 }, { "epoch": 0.7660628438773831, "grad_norm": 5.99591518282836, "learning_rate": 1.3679348936034065e-06, "loss": 17.357, "step": 41909 }, { "epoch": 0.7660811230738297, "grad_norm": 6.221623731318778, "learning_rate": 1.3677314635495314e-06, "loss": 17.2633, "step": 41910 }, { "epoch": 0.7660994022702762, "grad_norm": 7.122589009504963, "learning_rate": 1.367528046226443e-06, "loss": 17.8746, "step": 41911 }, { "epoch": 0.7661176814667228, "grad_norm": 6.379488007167384, "learning_rate": 1.3673246416348552e-06, "loss": 17.4299, "step": 41912 }, { "epoch": 0.7661359606631692, "grad_norm": 6.989784569564426, "learning_rate": 1.3671212497754787e-06, "loss": 17.3876, "step": 41913 }, { "epoch": 0.7661542398596157, "grad_norm": 6.594874223188751, "learning_rate": 1.3669178706490255e-06, "loss": 17.4433, "step": 41914 }, { "epoch": 0.7661725190560623, "grad_norm": 5.284396230943548, "learning_rate": 1.3667145042562119e-06, "loss": 17.2409, "step": 41915 }, { "epoch": 0.7661907982525088, "grad_norm": 7.019534005753145, "learning_rate": 1.3665111505977469e-06, "loss": 17.3386, "step": 41916 }, { "epoch": 0.7662090774489554, "grad_norm": 6.076196461685097, "learning_rate": 1.3663078096743464e-06, "loss": 17.4937, "step": 41917 }, { "epoch": 0.7662273566454019, "grad_norm": 6.074604445877435, "learning_rate": 1.3661044814867218e-06, "loss": 17.1704, "step": 41918 }, { "epoch": 0.7662456358418483, "grad_norm": 6.607281524066622, "learning_rate": 1.3659011660355842e-06, "loss": 17.6945, "step": 41919 }, { "epoch": 0.7662639150382949, "grad_norm": 4.8620435905555075, "learning_rate": 1.3656978633216477e-06, "loss": 16.7763, "step": 41920 }, { "epoch": 0.7662821942347414, "grad_norm": 6.541939314344003, "learning_rate": 1.3654945733456265e-06, "loss": 17.1928, "step": 41921 }, { "epoch": 0.766300473431188, "grad_norm": 5.806338893307686, "learning_rate": 1.3652912961082293e-06, "loss": 17.2141, "step": 41922 }, { "epoch": 0.7663187526276345, "grad_norm": 6.343040636278838, "learning_rate": 1.3650880316101728e-06, "loss": 17.5418, "step": 41923 }, { "epoch": 0.766337031824081, "grad_norm": 5.3542617539264725, "learning_rate": 1.3648847798521675e-06, "loss": 17.0852, "step": 41924 }, { "epoch": 0.7663553110205276, "grad_norm": 6.828895379944249, "learning_rate": 1.3646815408349233e-06, "loss": 17.3773, "step": 41925 }, { "epoch": 0.766373590216974, "grad_norm": 7.5001433345326385, "learning_rate": 1.3644783145591573e-06, "loss": 17.6449, "step": 41926 }, { "epoch": 0.7663918694134206, "grad_norm": 5.97906685464305, "learning_rate": 1.3642751010255783e-06, "loss": 17.6251, "step": 41927 }, { "epoch": 0.7664101486098671, "grad_norm": 5.725672659475479, "learning_rate": 1.364071900234899e-06, "loss": 17.2748, "step": 41928 }, { "epoch": 0.7664284278063136, "grad_norm": 5.725712603123083, "learning_rate": 1.3638687121878314e-06, "loss": 17.3531, "step": 41929 }, { "epoch": 0.7664467070027602, "grad_norm": 4.613288469335297, "learning_rate": 1.3636655368850887e-06, "loss": 16.5777, "step": 41930 }, { "epoch": 0.7664649861992067, "grad_norm": 5.620669920154019, "learning_rate": 1.3634623743273844e-06, "loss": 17.1562, "step": 41931 }, { "epoch": 0.7664832653956533, "grad_norm": 6.346227754838747, "learning_rate": 1.3632592245154291e-06, "loss": 17.4952, "step": 41932 }, { "epoch": 0.7665015445920997, "grad_norm": 9.16271602880598, "learning_rate": 1.3630560874499326e-06, "loss": 17.6296, "step": 41933 }, { "epoch": 0.7665198237885462, "grad_norm": 7.238697921867598, "learning_rate": 1.3628529631316112e-06, "loss": 17.3137, "step": 41934 }, { "epoch": 0.7665381029849928, "grad_norm": 5.9548623398195, "learning_rate": 1.3626498515611741e-06, "loss": 16.9835, "step": 41935 }, { "epoch": 0.7665563821814393, "grad_norm": 6.240288534248973, "learning_rate": 1.3624467527393321e-06, "loss": 17.3067, "step": 41936 }, { "epoch": 0.7665746613778859, "grad_norm": 5.367413159527438, "learning_rate": 1.3622436666667999e-06, "loss": 17.076, "step": 41937 }, { "epoch": 0.7665929405743324, "grad_norm": 6.572660132282782, "learning_rate": 1.3620405933442865e-06, "loss": 17.4321, "step": 41938 }, { "epoch": 0.7666112197707788, "grad_norm": 7.101835801228671, "learning_rate": 1.3618375327725053e-06, "loss": 17.4023, "step": 41939 }, { "epoch": 0.7666294989672254, "grad_norm": 9.361965522630145, "learning_rate": 1.3616344849521696e-06, "loss": 18.5808, "step": 41940 }, { "epoch": 0.7666477781636719, "grad_norm": 6.72729960919817, "learning_rate": 1.3614314498839886e-06, "loss": 17.0089, "step": 41941 }, { "epoch": 0.7666660573601185, "grad_norm": 8.609299540295012, "learning_rate": 1.3612284275686739e-06, "loss": 18.0015, "step": 41942 }, { "epoch": 0.766684336556565, "grad_norm": 4.962653203824809, "learning_rate": 1.3610254180069387e-06, "loss": 16.6864, "step": 41943 }, { "epoch": 0.7667026157530115, "grad_norm": 5.015323725895538, "learning_rate": 1.3608224211994941e-06, "loss": 16.9576, "step": 41944 }, { "epoch": 0.766720894949458, "grad_norm": 6.426261747553577, "learning_rate": 1.3606194371470494e-06, "loss": 17.7075, "step": 41945 }, { "epoch": 0.7667391741459045, "grad_norm": 6.445117658737441, "learning_rate": 1.3604164658503195e-06, "loss": 17.3744, "step": 41946 }, { "epoch": 0.7667574533423511, "grad_norm": 7.384489054651236, "learning_rate": 1.360213507310012e-06, "loss": 17.9136, "step": 41947 }, { "epoch": 0.7667757325387976, "grad_norm": 6.4771963655020315, "learning_rate": 1.3600105615268411e-06, "loss": 17.3007, "step": 41948 }, { "epoch": 0.7667940117352441, "grad_norm": 5.2224433766453044, "learning_rate": 1.3598076285015183e-06, "loss": 16.9336, "step": 41949 }, { "epoch": 0.7668122909316907, "grad_norm": 4.595992739133034, "learning_rate": 1.3596047082347524e-06, "loss": 16.8856, "step": 41950 }, { "epoch": 0.7668305701281372, "grad_norm": 7.0978730278340665, "learning_rate": 1.3594018007272574e-06, "loss": 17.4758, "step": 41951 }, { "epoch": 0.7668488493245836, "grad_norm": 6.402643591306888, "learning_rate": 1.3591989059797433e-06, "loss": 17.2276, "step": 41952 }, { "epoch": 0.7668671285210302, "grad_norm": 4.187952902783609, "learning_rate": 1.35899602399292e-06, "loss": 16.669, "step": 41953 }, { "epoch": 0.7668854077174767, "grad_norm": 6.885717787541765, "learning_rate": 1.358793154767501e-06, "loss": 17.6583, "step": 41954 }, { "epoch": 0.7669036869139233, "grad_norm": 7.074162554459868, "learning_rate": 1.3585902983041943e-06, "loss": 17.7324, "step": 41955 }, { "epoch": 0.7669219661103698, "grad_norm": 6.400662988653581, "learning_rate": 1.358387454603714e-06, "loss": 17.2283, "step": 41956 }, { "epoch": 0.7669402453068163, "grad_norm": 5.158148327393345, "learning_rate": 1.3581846236667684e-06, "loss": 17.0413, "step": 41957 }, { "epoch": 0.7669585245032629, "grad_norm": 5.529765342206763, "learning_rate": 1.3579818054940713e-06, "loss": 17.2457, "step": 41958 }, { "epoch": 0.7669768036997093, "grad_norm": 6.351049490090601, "learning_rate": 1.3577790000863307e-06, "loss": 17.4601, "step": 41959 }, { "epoch": 0.7669950828961559, "grad_norm": 5.366425941858123, "learning_rate": 1.3575762074442594e-06, "loss": 17.0382, "step": 41960 }, { "epoch": 0.7670133620926024, "grad_norm": 7.236380937486903, "learning_rate": 1.3573734275685662e-06, "loss": 17.9564, "step": 41961 }, { "epoch": 0.7670316412890489, "grad_norm": 7.960370849051501, "learning_rate": 1.357170660459965e-06, "loss": 17.7455, "step": 41962 }, { "epoch": 0.7670499204854955, "grad_norm": 5.753962623543147, "learning_rate": 1.3569679061191643e-06, "loss": 17.3638, "step": 41963 }, { "epoch": 0.767068199681942, "grad_norm": 5.442404358499509, "learning_rate": 1.3567651645468732e-06, "loss": 16.9602, "step": 41964 }, { "epoch": 0.7670864788783885, "grad_norm": 5.893085450286495, "learning_rate": 1.356562435743804e-06, "loss": 17.2478, "step": 41965 }, { "epoch": 0.767104758074835, "grad_norm": 6.492898510858146, "learning_rate": 1.3563597197106686e-06, "loss": 17.3016, "step": 41966 }, { "epoch": 0.7671230372712815, "grad_norm": 5.09766835194113, "learning_rate": 1.3561570164481753e-06, "loss": 16.8325, "step": 41967 }, { "epoch": 0.7671413164677281, "grad_norm": 6.5818195282481495, "learning_rate": 1.3559543259570367e-06, "loss": 17.5767, "step": 41968 }, { "epoch": 0.7671595956641746, "grad_norm": 8.13391802456928, "learning_rate": 1.3557516482379624e-06, "loss": 18.0536, "step": 41969 }, { "epoch": 0.7671778748606212, "grad_norm": 6.311310432234311, "learning_rate": 1.3555489832916602e-06, "loss": 17.2476, "step": 41970 }, { "epoch": 0.7671961540570676, "grad_norm": 7.450266207115289, "learning_rate": 1.3553463311188442e-06, "loss": 17.618, "step": 41971 }, { "epoch": 0.7672144332535141, "grad_norm": 5.977152699151296, "learning_rate": 1.3551436917202232e-06, "loss": 17.2381, "step": 41972 }, { "epoch": 0.7672327124499607, "grad_norm": 5.8373752394238165, "learning_rate": 1.3549410650965056e-06, "loss": 17.2164, "step": 41973 }, { "epoch": 0.7672509916464072, "grad_norm": 7.013213907109025, "learning_rate": 1.3547384512484036e-06, "loss": 17.452, "step": 41974 }, { "epoch": 0.7672692708428538, "grad_norm": 5.382190678577232, "learning_rate": 1.3545358501766281e-06, "loss": 17.0699, "step": 41975 }, { "epoch": 0.7672875500393003, "grad_norm": 6.798096043826949, "learning_rate": 1.3543332618818872e-06, "loss": 17.6202, "step": 41976 }, { "epoch": 0.7673058292357468, "grad_norm": 5.448009847242177, "learning_rate": 1.354130686364893e-06, "loss": 17.0733, "step": 41977 }, { "epoch": 0.7673241084321933, "grad_norm": 46.555432878674104, "learning_rate": 1.3539281236263524e-06, "loss": 19.4045, "step": 41978 }, { "epoch": 0.7673423876286398, "grad_norm": 6.730491633356346, "learning_rate": 1.3537255736669798e-06, "loss": 17.3693, "step": 41979 }, { "epoch": 0.7673606668250864, "grad_norm": 7.630006221138227, "learning_rate": 1.3535230364874818e-06, "loss": 17.7044, "step": 41980 }, { "epoch": 0.7673789460215329, "grad_norm": 6.679595319482609, "learning_rate": 1.3533205120885678e-06, "loss": 17.556, "step": 41981 }, { "epoch": 0.7673972252179794, "grad_norm": 6.452033120143647, "learning_rate": 1.3531180004709504e-06, "loss": 17.239, "step": 41982 }, { "epoch": 0.767415504414426, "grad_norm": 7.52062068498035, "learning_rate": 1.3529155016353368e-06, "loss": 17.7481, "step": 41983 }, { "epoch": 0.7674337836108724, "grad_norm": 6.529227510749656, "learning_rate": 1.3527130155824381e-06, "loss": 17.7039, "step": 41984 }, { "epoch": 0.767452062807319, "grad_norm": 6.45238906461204, "learning_rate": 1.3525105423129648e-06, "loss": 17.6, "step": 41985 }, { "epoch": 0.7674703420037655, "grad_norm": 4.457771936899407, "learning_rate": 1.3523080818276257e-06, "loss": 16.7542, "step": 41986 }, { "epoch": 0.767488621200212, "grad_norm": 6.318564251510718, "learning_rate": 1.3521056341271289e-06, "loss": 17.3221, "step": 41987 }, { "epoch": 0.7675069003966586, "grad_norm": 5.832914077981863, "learning_rate": 1.3519031992121862e-06, "loss": 17.3173, "step": 41988 }, { "epoch": 0.7675251795931051, "grad_norm": 8.323557145887085, "learning_rate": 1.3517007770835067e-06, "loss": 18.1897, "step": 41989 }, { "epoch": 0.7675434587895517, "grad_norm": 6.664484953325077, "learning_rate": 1.3514983677417976e-06, "loss": 17.5128, "step": 41990 }, { "epoch": 0.7675617379859981, "grad_norm": 7.380160378055517, "learning_rate": 1.3512959711877721e-06, "loss": 17.4453, "step": 41991 }, { "epoch": 0.7675800171824446, "grad_norm": 5.470205685802795, "learning_rate": 1.351093587422136e-06, "loss": 16.9314, "step": 41992 }, { "epoch": 0.7675982963788912, "grad_norm": 7.15808048361001, "learning_rate": 1.3508912164456e-06, "loss": 17.4386, "step": 41993 }, { "epoch": 0.7676165755753377, "grad_norm": 6.228431748301056, "learning_rate": 1.3506888582588752e-06, "loss": 17.5189, "step": 41994 }, { "epoch": 0.7676348547717843, "grad_norm": 5.218108818125611, "learning_rate": 1.3504865128626677e-06, "loss": 16.971, "step": 41995 }, { "epoch": 0.7676531339682308, "grad_norm": 6.9400242822838845, "learning_rate": 1.3502841802576899e-06, "loss": 17.5331, "step": 41996 }, { "epoch": 0.7676714131646772, "grad_norm": 6.440333424565872, "learning_rate": 1.3500818604446492e-06, "loss": 17.563, "step": 41997 }, { "epoch": 0.7676896923611238, "grad_norm": 5.753394104283268, "learning_rate": 1.3498795534242536e-06, "loss": 17.0518, "step": 41998 }, { "epoch": 0.7677079715575703, "grad_norm": 6.179277978000289, "learning_rate": 1.3496772591972146e-06, "loss": 17.4593, "step": 41999 }, { "epoch": 0.7677262507540169, "grad_norm": 6.117314352661982, "learning_rate": 1.3494749777642403e-06, "loss": 17.4104, "step": 42000 }, { "epoch": 0.7677445299504634, "grad_norm": 5.661635271536251, "learning_rate": 1.3492727091260376e-06, "loss": 17.0255, "step": 42001 }, { "epoch": 0.7677628091469099, "grad_norm": 5.117555150576933, "learning_rate": 1.3490704532833177e-06, "loss": 16.7392, "step": 42002 }, { "epoch": 0.7677810883433565, "grad_norm": 4.9063432605869215, "learning_rate": 1.3488682102367906e-06, "loss": 16.9232, "step": 42003 }, { "epoch": 0.7677993675398029, "grad_norm": 6.88732867636986, "learning_rate": 1.3486659799871615e-06, "loss": 17.5346, "step": 42004 }, { "epoch": 0.7678176467362495, "grad_norm": 6.051908811244634, "learning_rate": 1.3484637625351433e-06, "loss": 17.7433, "step": 42005 }, { "epoch": 0.767835925932696, "grad_norm": 5.902812869527967, "learning_rate": 1.3482615578814429e-06, "loss": 17.3396, "step": 42006 }, { "epoch": 0.7678542051291425, "grad_norm": 7.301238822827281, "learning_rate": 1.3480593660267665e-06, "loss": 17.7823, "step": 42007 }, { "epoch": 0.7678724843255891, "grad_norm": 6.277778127250844, "learning_rate": 1.3478571869718278e-06, "loss": 17.647, "step": 42008 }, { "epoch": 0.7678907635220356, "grad_norm": 5.71950889145966, "learning_rate": 1.34765502071733e-06, "loss": 17.4476, "step": 42009 }, { "epoch": 0.7679090427184821, "grad_norm": 6.926771124160293, "learning_rate": 1.347452867263987e-06, "loss": 17.4107, "step": 42010 }, { "epoch": 0.7679273219149286, "grad_norm": 6.039639164060146, "learning_rate": 1.3472507266125024e-06, "loss": 17.1369, "step": 42011 }, { "epoch": 0.7679456011113751, "grad_norm": 7.7850375802661915, "learning_rate": 1.3470485987635874e-06, "loss": 17.2573, "step": 42012 }, { "epoch": 0.7679638803078217, "grad_norm": 5.962021823649142, "learning_rate": 1.3468464837179518e-06, "loss": 17.2443, "step": 42013 }, { "epoch": 0.7679821595042682, "grad_norm": 5.1193306320926615, "learning_rate": 1.346644381476302e-06, "loss": 16.8571, "step": 42014 }, { "epoch": 0.7680004387007148, "grad_norm": 7.176355509431181, "learning_rate": 1.346442292039345e-06, "loss": 17.3948, "step": 42015 }, { "epoch": 0.7680187178971613, "grad_norm": 6.417686809196933, "learning_rate": 1.3462402154077926e-06, "loss": 17.3539, "step": 42016 }, { "epoch": 0.7680369970936077, "grad_norm": 7.052235999812153, "learning_rate": 1.3460381515823507e-06, "loss": 17.6632, "step": 42017 }, { "epoch": 0.7680552762900543, "grad_norm": 8.27142639289995, "learning_rate": 1.345836100563727e-06, "loss": 18.7385, "step": 42018 }, { "epoch": 0.7680735554865008, "grad_norm": 5.592900759676516, "learning_rate": 1.3456340623526304e-06, "loss": 17.1915, "step": 42019 }, { "epoch": 0.7680918346829473, "grad_norm": 7.266632443125503, "learning_rate": 1.3454320369497709e-06, "loss": 17.618, "step": 42020 }, { "epoch": 0.7681101138793939, "grad_norm": 5.205658230286655, "learning_rate": 1.3452300243558535e-06, "loss": 16.9873, "step": 42021 }, { "epoch": 0.7681283930758404, "grad_norm": 5.8436834334967935, "learning_rate": 1.3450280245715891e-06, "loss": 17.0931, "step": 42022 }, { "epoch": 0.768146672272287, "grad_norm": 7.69935919729784, "learning_rate": 1.3448260375976829e-06, "loss": 17.5489, "step": 42023 }, { "epoch": 0.7681649514687334, "grad_norm": 6.889051093691721, "learning_rate": 1.3446240634348457e-06, "loss": 17.5293, "step": 42024 }, { "epoch": 0.7681832306651799, "grad_norm": 6.197426133614982, "learning_rate": 1.344422102083784e-06, "loss": 17.3769, "step": 42025 }, { "epoch": 0.7682015098616265, "grad_norm": 6.019695436139051, "learning_rate": 1.3442201535452037e-06, "loss": 17.177, "step": 42026 }, { "epoch": 0.768219789058073, "grad_norm": 6.192639137561909, "learning_rate": 1.3440182178198168e-06, "loss": 17.5748, "step": 42027 }, { "epoch": 0.7682380682545196, "grad_norm": 6.18738068700555, "learning_rate": 1.3438162949083266e-06, "loss": 17.3853, "step": 42028 }, { "epoch": 0.768256347450966, "grad_norm": 5.7122011919342635, "learning_rate": 1.3436143848114436e-06, "loss": 17.0301, "step": 42029 }, { "epoch": 0.7682746266474125, "grad_norm": 6.658440640406709, "learning_rate": 1.3434124875298759e-06, "loss": 17.2814, "step": 42030 }, { "epoch": 0.7682929058438591, "grad_norm": 6.1209108312651255, "learning_rate": 1.3432106030643304e-06, "loss": 17.1959, "step": 42031 }, { "epoch": 0.7683111850403056, "grad_norm": 5.749247838076864, "learning_rate": 1.3430087314155132e-06, "loss": 16.8823, "step": 42032 }, { "epoch": 0.7683294642367522, "grad_norm": 7.496345217305841, "learning_rate": 1.3428068725841343e-06, "loss": 17.5582, "step": 42033 }, { "epoch": 0.7683477434331987, "grad_norm": 4.997865107999226, "learning_rate": 1.3426050265708995e-06, "loss": 16.737, "step": 42034 }, { "epoch": 0.7683660226296452, "grad_norm": 5.70485139385102, "learning_rate": 1.3424031933765158e-06, "loss": 17.1378, "step": 42035 }, { "epoch": 0.7683843018260917, "grad_norm": 4.560410130982407, "learning_rate": 1.3422013730016924e-06, "loss": 16.5676, "step": 42036 }, { "epoch": 0.7684025810225382, "grad_norm": 5.5534989838932205, "learning_rate": 1.3419995654471352e-06, "loss": 17.0044, "step": 42037 }, { "epoch": 0.7684208602189848, "grad_norm": 5.230176864624802, "learning_rate": 1.3417977707135515e-06, "loss": 16.9057, "step": 42038 }, { "epoch": 0.7684391394154313, "grad_norm": 5.493785316649188, "learning_rate": 1.3415959888016505e-06, "loss": 17.1806, "step": 42039 }, { "epoch": 0.7684574186118778, "grad_norm": 5.084723755527207, "learning_rate": 1.341394219712137e-06, "loss": 16.9879, "step": 42040 }, { "epoch": 0.7684756978083244, "grad_norm": 6.561959508980792, "learning_rate": 1.3411924634457206e-06, "loss": 17.4378, "step": 42041 }, { "epoch": 0.7684939770047708, "grad_norm": 5.7539612031620795, "learning_rate": 1.3409907200031069e-06, "loss": 17.1139, "step": 42042 }, { "epoch": 0.7685122562012174, "grad_norm": 7.6852212834527815, "learning_rate": 1.3407889893850024e-06, "loss": 17.4754, "step": 42043 }, { "epoch": 0.7685305353976639, "grad_norm": 7.304774890651386, "learning_rate": 1.3405872715921159e-06, "loss": 17.6572, "step": 42044 }, { "epoch": 0.7685488145941104, "grad_norm": 5.257554495489144, "learning_rate": 1.3403855666251536e-06, "loss": 16.795, "step": 42045 }, { "epoch": 0.768567093790557, "grad_norm": 6.014422313125128, "learning_rate": 1.3401838744848205e-06, "loss": 17.4353, "step": 42046 }, { "epoch": 0.7685853729870035, "grad_norm": 7.229584160710278, "learning_rate": 1.3399821951718261e-06, "loss": 17.8851, "step": 42047 }, { "epoch": 0.7686036521834501, "grad_norm": 6.127878126690113, "learning_rate": 1.3397805286868776e-06, "loss": 17.0557, "step": 42048 }, { "epoch": 0.7686219313798965, "grad_norm": 6.866793100878818, "learning_rate": 1.3395788750306787e-06, "loss": 17.3636, "step": 42049 }, { "epoch": 0.768640210576343, "grad_norm": 5.399218051999486, "learning_rate": 1.3393772342039402e-06, "loss": 17.0554, "step": 42050 }, { "epoch": 0.7686584897727896, "grad_norm": 5.0338272183682005, "learning_rate": 1.3391756062073668e-06, "loss": 17.0079, "step": 42051 }, { "epoch": 0.7686767689692361, "grad_norm": 5.852238432230543, "learning_rate": 1.3389739910416639e-06, "loss": 17.2681, "step": 42052 }, { "epoch": 0.7686950481656827, "grad_norm": 7.1343162949241865, "learning_rate": 1.338772388707541e-06, "loss": 17.7579, "step": 42053 }, { "epoch": 0.7687133273621292, "grad_norm": 6.068385097036497, "learning_rate": 1.3385707992057011e-06, "loss": 17.2271, "step": 42054 }, { "epoch": 0.7687316065585756, "grad_norm": 6.161737318682896, "learning_rate": 1.338369222536855e-06, "loss": 16.9939, "step": 42055 }, { "epoch": 0.7687498857550222, "grad_norm": 5.390147011681706, "learning_rate": 1.3381676587017051e-06, "loss": 16.9722, "step": 42056 }, { "epoch": 0.7687681649514687, "grad_norm": 7.452297302949128, "learning_rate": 1.3379661077009603e-06, "loss": 17.9234, "step": 42057 }, { "epoch": 0.7687864441479153, "grad_norm": 5.4427651921939875, "learning_rate": 1.3377645695353276e-06, "loss": 17.0048, "step": 42058 }, { "epoch": 0.7688047233443618, "grad_norm": 5.528710942034536, "learning_rate": 1.3375630442055121e-06, "loss": 16.9728, "step": 42059 }, { "epoch": 0.7688230025408083, "grad_norm": 6.410846902665138, "learning_rate": 1.337361531712219e-06, "loss": 17.3589, "step": 42060 }, { "epoch": 0.7688412817372549, "grad_norm": 6.103160608239035, "learning_rate": 1.3371600320561578e-06, "loss": 16.9608, "step": 42061 }, { "epoch": 0.7688595609337013, "grad_norm": 7.316350306748815, "learning_rate": 1.3369585452380323e-06, "loss": 17.3569, "step": 42062 }, { "epoch": 0.7688778401301479, "grad_norm": 6.894958675914393, "learning_rate": 1.3367570712585482e-06, "loss": 17.4928, "step": 42063 }, { "epoch": 0.7688961193265944, "grad_norm": 5.2208325454483155, "learning_rate": 1.3365556101184135e-06, "loss": 16.9936, "step": 42064 }, { "epoch": 0.7689143985230409, "grad_norm": 8.416257215633392, "learning_rate": 1.3363541618183328e-06, "loss": 17.6552, "step": 42065 }, { "epoch": 0.7689326777194875, "grad_norm": 6.004412171088542, "learning_rate": 1.3361527263590124e-06, "loss": 17.3042, "step": 42066 }, { "epoch": 0.768950956915934, "grad_norm": 5.50098668523392, "learning_rate": 1.3359513037411602e-06, "loss": 16.9761, "step": 42067 }, { "epoch": 0.7689692361123805, "grad_norm": 5.399044785174746, "learning_rate": 1.3357498939654795e-06, "loss": 17.0589, "step": 42068 }, { "epoch": 0.768987515308827, "grad_norm": 5.4251543469085295, "learning_rate": 1.3355484970326788e-06, "loss": 16.9518, "step": 42069 }, { "epoch": 0.7690057945052735, "grad_norm": 6.66096672454076, "learning_rate": 1.3353471129434625e-06, "loss": 17.4114, "step": 42070 }, { "epoch": 0.7690240737017201, "grad_norm": 5.279467149272865, "learning_rate": 1.3351457416985353e-06, "loss": 16.9623, "step": 42071 }, { "epoch": 0.7690423528981666, "grad_norm": 5.081806088831604, "learning_rate": 1.3349443832986059e-06, "loss": 16.9654, "step": 42072 }, { "epoch": 0.7690606320946132, "grad_norm": 5.6920965597430255, "learning_rate": 1.3347430377443777e-06, "loss": 17.1645, "step": 42073 }, { "epoch": 0.7690789112910597, "grad_norm": 6.172433693368219, "learning_rate": 1.3345417050365561e-06, "loss": 17.5844, "step": 42074 }, { "epoch": 0.7690971904875061, "grad_norm": 6.369855571091064, "learning_rate": 1.334340385175848e-06, "loss": 17.3822, "step": 42075 }, { "epoch": 0.7691154696839527, "grad_norm": 7.166441198200393, "learning_rate": 1.3341390781629598e-06, "loss": 17.5596, "step": 42076 }, { "epoch": 0.7691337488803992, "grad_norm": 6.955635863208286, "learning_rate": 1.3339377839985945e-06, "loss": 17.2672, "step": 42077 }, { "epoch": 0.7691520280768458, "grad_norm": 8.609853835911508, "learning_rate": 1.3337365026834608e-06, "loss": 18.0737, "step": 42078 }, { "epoch": 0.7691703072732923, "grad_norm": 7.430252592526133, "learning_rate": 1.3335352342182627e-06, "loss": 17.2765, "step": 42079 }, { "epoch": 0.7691885864697388, "grad_norm": 5.394005319240251, "learning_rate": 1.3333339786037037e-06, "loss": 17.2253, "step": 42080 }, { "epoch": 0.7692068656661853, "grad_norm": 6.384898765259683, "learning_rate": 1.3331327358404928e-06, "loss": 17.2541, "step": 42081 }, { "epoch": 0.7692251448626318, "grad_norm": 6.059502524745306, "learning_rate": 1.3329315059293313e-06, "loss": 17.289, "step": 42082 }, { "epoch": 0.7692434240590784, "grad_norm": 5.922610758042924, "learning_rate": 1.3327302888709287e-06, "loss": 16.9791, "step": 42083 }, { "epoch": 0.7692617032555249, "grad_norm": 6.690451413687568, "learning_rate": 1.3325290846659866e-06, "loss": 17.4622, "step": 42084 }, { "epoch": 0.7692799824519714, "grad_norm": 5.908386573515866, "learning_rate": 1.3323278933152117e-06, "loss": 17.0374, "step": 42085 }, { "epoch": 0.769298261648418, "grad_norm": 7.628508215962999, "learning_rate": 1.3321267148193107e-06, "loss": 17.3689, "step": 42086 }, { "epoch": 0.7693165408448644, "grad_norm": 6.316706136395846, "learning_rate": 1.3319255491789873e-06, "loss": 17.4678, "step": 42087 }, { "epoch": 0.7693348200413109, "grad_norm": 9.023795564196856, "learning_rate": 1.3317243963949455e-06, "loss": 18.009, "step": 42088 }, { "epoch": 0.7693530992377575, "grad_norm": 7.925760075220276, "learning_rate": 1.3315232564678927e-06, "loss": 18.2849, "step": 42089 }, { "epoch": 0.769371378434204, "grad_norm": 6.031885339864247, "learning_rate": 1.3313221293985324e-06, "loss": 17.2324, "step": 42090 }, { "epoch": 0.7693896576306506, "grad_norm": 6.356527718568035, "learning_rate": 1.3311210151875676e-06, "loss": 17.19, "step": 42091 }, { "epoch": 0.7694079368270971, "grad_norm": 6.0775087597022, "learning_rate": 1.330919913835706e-06, "loss": 17.3642, "step": 42092 }, { "epoch": 0.7694262160235436, "grad_norm": 6.038025246766434, "learning_rate": 1.3307188253436531e-06, "loss": 17.2044, "step": 42093 }, { "epoch": 0.7694444952199901, "grad_norm": 4.42216142891082, "learning_rate": 1.3305177497121109e-06, "loss": 16.6561, "step": 42094 }, { "epoch": 0.7694627744164366, "grad_norm": 6.655479262025585, "learning_rate": 1.3303166869417867e-06, "loss": 17.4823, "step": 42095 }, { "epoch": 0.7694810536128832, "grad_norm": 6.973021216251497, "learning_rate": 1.3301156370333846e-06, "loss": 17.6526, "step": 42096 }, { "epoch": 0.7694993328093297, "grad_norm": 6.0202601711958605, "learning_rate": 1.3299145999876068e-06, "loss": 16.938, "step": 42097 }, { "epoch": 0.7695176120057762, "grad_norm": 7.402221587879155, "learning_rate": 1.329713575805161e-06, "loss": 17.95, "step": 42098 }, { "epoch": 0.7695358912022228, "grad_norm": 8.017038759073019, "learning_rate": 1.3295125644867502e-06, "loss": 18.0549, "step": 42099 }, { "epoch": 0.7695541703986692, "grad_norm": 7.016370328369079, "learning_rate": 1.3293115660330802e-06, "loss": 17.7322, "step": 42100 }, { "epoch": 0.7695724495951158, "grad_norm": 5.917775745125843, "learning_rate": 1.3291105804448528e-06, "loss": 17.0573, "step": 42101 }, { "epoch": 0.7695907287915623, "grad_norm": 6.27032457619855, "learning_rate": 1.3289096077227748e-06, "loss": 17.4973, "step": 42102 }, { "epoch": 0.7696090079880088, "grad_norm": 6.073958354281358, "learning_rate": 1.3287086478675515e-06, "loss": 17.2635, "step": 42103 }, { "epoch": 0.7696272871844554, "grad_norm": 6.612833258891773, "learning_rate": 1.3285077008798858e-06, "loss": 17.5359, "step": 42104 }, { "epoch": 0.7696455663809019, "grad_norm": 6.512795018347801, "learning_rate": 1.3283067667604804e-06, "loss": 17.2941, "step": 42105 }, { "epoch": 0.7696638455773485, "grad_norm": 5.718299576684515, "learning_rate": 1.3281058455100425e-06, "loss": 17.2392, "step": 42106 }, { "epoch": 0.7696821247737949, "grad_norm": 8.524024429755833, "learning_rate": 1.327904937129275e-06, "loss": 17.3751, "step": 42107 }, { "epoch": 0.7697004039702414, "grad_norm": 7.414986466252485, "learning_rate": 1.3277040416188808e-06, "loss": 17.6164, "step": 42108 }, { "epoch": 0.769718683166688, "grad_norm": 9.417258732198011, "learning_rate": 1.3275031589795667e-06, "loss": 17.8156, "step": 42109 }, { "epoch": 0.7697369623631345, "grad_norm": 6.1817684189034185, "learning_rate": 1.327302289212034e-06, "loss": 17.3387, "step": 42110 }, { "epoch": 0.7697552415595811, "grad_norm": 5.500456829512806, "learning_rate": 1.3271014323169878e-06, "loss": 16.9486, "step": 42111 }, { "epoch": 0.7697735207560276, "grad_norm": 6.41578938479554, "learning_rate": 1.3269005882951342e-06, "loss": 17.4374, "step": 42112 }, { "epoch": 0.769791799952474, "grad_norm": 6.108747019709526, "learning_rate": 1.3266997571471752e-06, "loss": 17.3009, "step": 42113 }, { "epoch": 0.7698100791489206, "grad_norm": 6.796408066464205, "learning_rate": 1.326498938873813e-06, "loss": 17.2241, "step": 42114 }, { "epoch": 0.7698283583453671, "grad_norm": 7.404691763348593, "learning_rate": 1.3262981334757547e-06, "loss": 17.8147, "step": 42115 }, { "epoch": 0.7698466375418137, "grad_norm": 7.247270995716043, "learning_rate": 1.3260973409537015e-06, "loss": 17.3668, "step": 42116 }, { "epoch": 0.7698649167382602, "grad_norm": 7.812853400111452, "learning_rate": 1.3258965613083596e-06, "loss": 17.3051, "step": 42117 }, { "epoch": 0.7698831959347067, "grad_norm": 5.660043544266192, "learning_rate": 1.3256957945404314e-06, "loss": 16.9002, "step": 42118 }, { "epoch": 0.7699014751311533, "grad_norm": 6.420841921241735, "learning_rate": 1.3254950406506194e-06, "loss": 17.3593, "step": 42119 }, { "epoch": 0.7699197543275997, "grad_norm": 6.495648653696358, "learning_rate": 1.3252942996396285e-06, "loss": 17.6649, "step": 42120 }, { "epoch": 0.7699380335240463, "grad_norm": 6.313346171520587, "learning_rate": 1.3250935715081631e-06, "loss": 17.3403, "step": 42121 }, { "epoch": 0.7699563127204928, "grad_norm": 5.191503427798274, "learning_rate": 1.3248928562569247e-06, "loss": 16.8783, "step": 42122 }, { "epoch": 0.7699745919169393, "grad_norm": 6.1607204523010966, "learning_rate": 1.3246921538866193e-06, "loss": 17.2341, "step": 42123 }, { "epoch": 0.7699928711133859, "grad_norm": 5.835165042794911, "learning_rate": 1.324491464397949e-06, "loss": 17.0881, "step": 42124 }, { "epoch": 0.7700111503098324, "grad_norm": 5.30870988833344, "learning_rate": 1.3242907877916161e-06, "loss": 17.0527, "step": 42125 }, { "epoch": 0.770029429506279, "grad_norm": 7.459349921829104, "learning_rate": 1.3240901240683256e-06, "loss": 17.8281, "step": 42126 }, { "epoch": 0.7700477087027254, "grad_norm": 4.915277349485325, "learning_rate": 1.323889473228781e-06, "loss": 16.9259, "step": 42127 }, { "epoch": 0.7700659878991719, "grad_norm": 5.837457907092279, "learning_rate": 1.323688835273683e-06, "loss": 17.3083, "step": 42128 }, { "epoch": 0.7700842670956185, "grad_norm": 5.804481572835266, "learning_rate": 1.3234882102037372e-06, "loss": 17.4851, "step": 42129 }, { "epoch": 0.770102546292065, "grad_norm": 5.289656985810174, "learning_rate": 1.3232875980196452e-06, "loss": 16.9028, "step": 42130 }, { "epoch": 0.7701208254885116, "grad_norm": 7.747151852731662, "learning_rate": 1.3230869987221135e-06, "loss": 18.0832, "step": 42131 }, { "epoch": 0.770139104684958, "grad_norm": 7.177404403672559, "learning_rate": 1.3228864123118423e-06, "loss": 17.9634, "step": 42132 }, { "epoch": 0.7701573838814045, "grad_norm": 8.245315054765971, "learning_rate": 1.3226858387895335e-06, "loss": 17.9903, "step": 42133 }, { "epoch": 0.7701756630778511, "grad_norm": 5.776278269033703, "learning_rate": 1.3224852781558939e-06, "loss": 16.8948, "step": 42134 }, { "epoch": 0.7701939422742976, "grad_norm": 6.848361273358456, "learning_rate": 1.3222847304116238e-06, "loss": 17.4997, "step": 42135 }, { "epoch": 0.7702122214707442, "grad_norm": 6.348945631905145, "learning_rate": 1.322084195557425e-06, "loss": 17.6082, "step": 42136 }, { "epoch": 0.7702305006671907, "grad_norm": 6.4038923950246085, "learning_rate": 1.3218836735940038e-06, "loss": 17.1967, "step": 42137 }, { "epoch": 0.7702487798636372, "grad_norm": 6.551584037024685, "learning_rate": 1.3216831645220595e-06, "loss": 17.5706, "step": 42138 }, { "epoch": 0.7702670590600837, "grad_norm": 7.516139715516341, "learning_rate": 1.3214826683422966e-06, "loss": 17.5295, "step": 42139 }, { "epoch": 0.7702853382565302, "grad_norm": 6.693498579537447, "learning_rate": 1.3212821850554186e-06, "loss": 17.8363, "step": 42140 }, { "epoch": 0.7703036174529768, "grad_norm": 6.4302285771395065, "learning_rate": 1.321081714662128e-06, "loss": 17.2689, "step": 42141 }, { "epoch": 0.7703218966494233, "grad_norm": 5.647638405060015, "learning_rate": 1.3208812571631252e-06, "loss": 17.0204, "step": 42142 }, { "epoch": 0.7703401758458698, "grad_norm": 5.761402920155874, "learning_rate": 1.3206808125591158e-06, "loss": 17.1632, "step": 42143 }, { "epoch": 0.7703584550423164, "grad_norm": 4.950851863567363, "learning_rate": 1.3204803808508004e-06, "loss": 16.9206, "step": 42144 }, { "epoch": 0.7703767342387629, "grad_norm": 6.068024758857675, "learning_rate": 1.3202799620388812e-06, "loss": 17.4678, "step": 42145 }, { "epoch": 0.7703950134352094, "grad_norm": 5.5281673596469165, "learning_rate": 1.320079556124061e-06, "loss": 17.1789, "step": 42146 }, { "epoch": 0.7704132926316559, "grad_norm": 7.218266101127419, "learning_rate": 1.319879163107044e-06, "loss": 17.6723, "step": 42147 }, { "epoch": 0.7704315718281024, "grad_norm": 5.531577262314594, "learning_rate": 1.31967878298853e-06, "loss": 17.2445, "step": 42148 }, { "epoch": 0.770449851024549, "grad_norm": 7.24619391090137, "learning_rate": 1.3194784157692243e-06, "loss": 17.468, "step": 42149 }, { "epoch": 0.7704681302209955, "grad_norm": 7.324126971605066, "learning_rate": 1.3192780614498253e-06, "loss": 17.5508, "step": 42150 }, { "epoch": 0.7704864094174421, "grad_norm": 7.432096818355165, "learning_rate": 1.319077720031039e-06, "loss": 17.3357, "step": 42151 }, { "epoch": 0.7705046886138885, "grad_norm": 6.274192020613219, "learning_rate": 1.3188773915135656e-06, "loss": 17.4085, "step": 42152 }, { "epoch": 0.770522967810335, "grad_norm": 6.927624502563825, "learning_rate": 1.3186770758981066e-06, "loss": 17.8382, "step": 42153 }, { "epoch": 0.7705412470067816, "grad_norm": 5.733295020445452, "learning_rate": 1.3184767731853658e-06, "loss": 17.5043, "step": 42154 }, { "epoch": 0.7705595262032281, "grad_norm": 8.877201616259928, "learning_rate": 1.3182764833760431e-06, "loss": 18.3939, "step": 42155 }, { "epoch": 0.7705778053996746, "grad_norm": 5.007515758564368, "learning_rate": 1.3180762064708425e-06, "loss": 16.8962, "step": 42156 }, { "epoch": 0.7705960845961212, "grad_norm": 6.292400390580914, "learning_rate": 1.3178759424704663e-06, "loss": 17.1456, "step": 42157 }, { "epoch": 0.7706143637925676, "grad_norm": 6.011698954170917, "learning_rate": 1.317675691375615e-06, "loss": 17.4727, "step": 42158 }, { "epoch": 0.7706326429890142, "grad_norm": 8.049927179690217, "learning_rate": 1.3174754531869898e-06, "loss": 17.3228, "step": 42159 }, { "epoch": 0.7706509221854607, "grad_norm": 6.74544563000899, "learning_rate": 1.3172752279052954e-06, "loss": 17.8225, "step": 42160 }, { "epoch": 0.7706692013819072, "grad_norm": 5.28470010018236, "learning_rate": 1.31707501553123e-06, "loss": 17.0471, "step": 42161 }, { "epoch": 0.7706874805783538, "grad_norm": 6.063588156089664, "learning_rate": 1.3168748160654983e-06, "loss": 17.2638, "step": 42162 }, { "epoch": 0.7707057597748003, "grad_norm": 5.687335766343422, "learning_rate": 1.3166746295088013e-06, "loss": 17.1561, "step": 42163 }, { "epoch": 0.7707240389712469, "grad_norm": 5.85140651157775, "learning_rate": 1.316474455861838e-06, "loss": 17.228, "step": 42164 }, { "epoch": 0.7707423181676933, "grad_norm": 6.169542370935145, "learning_rate": 1.3162742951253122e-06, "loss": 17.4713, "step": 42165 }, { "epoch": 0.7707605973641398, "grad_norm": 6.6505444965684335, "learning_rate": 1.3160741472999273e-06, "loss": 17.339, "step": 42166 }, { "epoch": 0.7707788765605864, "grad_norm": 5.855289844000425, "learning_rate": 1.3158740123863806e-06, "loss": 17.4115, "step": 42167 }, { "epoch": 0.7707971557570329, "grad_norm": 5.77719561758757, "learning_rate": 1.3156738903853777e-06, "loss": 17.1235, "step": 42168 }, { "epoch": 0.7708154349534795, "grad_norm": 8.108384458435838, "learning_rate": 1.3154737812976182e-06, "loss": 18.2523, "step": 42169 }, { "epoch": 0.770833714149926, "grad_norm": 6.634333166341371, "learning_rate": 1.3152736851238018e-06, "loss": 17.5388, "step": 42170 }, { "epoch": 0.7708519933463724, "grad_norm": 6.020467167905093, "learning_rate": 1.3150736018646332e-06, "loss": 17.215, "step": 42171 }, { "epoch": 0.770870272542819, "grad_norm": 5.9423341038643684, "learning_rate": 1.3148735315208117e-06, "loss": 17.4368, "step": 42172 }, { "epoch": 0.7708885517392655, "grad_norm": 6.050644277644474, "learning_rate": 1.3146734740930366e-06, "loss": 17.3381, "step": 42173 }, { "epoch": 0.7709068309357121, "grad_norm": 6.155608664295818, "learning_rate": 1.3144734295820117e-06, "loss": 17.3098, "step": 42174 }, { "epoch": 0.7709251101321586, "grad_norm": 6.981773427229799, "learning_rate": 1.3142733979884392e-06, "loss": 17.7416, "step": 42175 }, { "epoch": 0.7709433893286051, "grad_norm": 6.405298545985809, "learning_rate": 1.3140733793130167e-06, "loss": 17.2854, "step": 42176 }, { "epoch": 0.7709616685250517, "grad_norm": 5.523628353493497, "learning_rate": 1.313873373556449e-06, "loss": 17.0211, "step": 42177 }, { "epoch": 0.7709799477214981, "grad_norm": 6.4410442692287315, "learning_rate": 1.3136733807194336e-06, "loss": 17.4919, "step": 42178 }, { "epoch": 0.7709982269179447, "grad_norm": 4.875455090475091, "learning_rate": 1.3134734008026745e-06, "loss": 16.9109, "step": 42179 }, { "epoch": 0.7710165061143912, "grad_norm": 6.778525283507945, "learning_rate": 1.3132734338068714e-06, "loss": 17.2446, "step": 42180 }, { "epoch": 0.7710347853108377, "grad_norm": 6.471229944946474, "learning_rate": 1.3130734797327237e-06, "loss": 17.6312, "step": 42181 }, { "epoch": 0.7710530645072843, "grad_norm": 5.875473795438252, "learning_rate": 1.312873538580935e-06, "loss": 17.4075, "step": 42182 }, { "epoch": 0.7710713437037308, "grad_norm": 5.474046866877959, "learning_rate": 1.3126736103522026e-06, "loss": 17.2479, "step": 42183 }, { "epoch": 0.7710896229001774, "grad_norm": 7.682891173160921, "learning_rate": 1.3124736950472294e-06, "loss": 17.9905, "step": 42184 }, { "epoch": 0.7711079020966238, "grad_norm": 6.716397186795238, "learning_rate": 1.3122737926667178e-06, "loss": 17.4264, "step": 42185 }, { "epoch": 0.7711261812930703, "grad_norm": 5.8498558201944935, "learning_rate": 1.312073903211366e-06, "loss": 17.2174, "step": 42186 }, { "epoch": 0.7711444604895169, "grad_norm": 6.341107332091218, "learning_rate": 1.3118740266818741e-06, "loss": 17.2474, "step": 42187 }, { "epoch": 0.7711627396859634, "grad_norm": 7.32587127059444, "learning_rate": 1.3116741630789454e-06, "loss": 17.849, "step": 42188 }, { "epoch": 0.77118101888241, "grad_norm": 6.048641343324894, "learning_rate": 1.311474312403278e-06, "loss": 17.292, "step": 42189 }, { "epoch": 0.7711992980788565, "grad_norm": 5.732819160770743, "learning_rate": 1.3112744746555717e-06, "loss": 17.2864, "step": 42190 }, { "epoch": 0.7712175772753029, "grad_norm": 6.27596827011618, "learning_rate": 1.31107464983653e-06, "loss": 17.331, "step": 42191 }, { "epoch": 0.7712358564717495, "grad_norm": 6.362375414522369, "learning_rate": 1.31087483794685e-06, "loss": 17.3538, "step": 42192 }, { "epoch": 0.771254135668196, "grad_norm": 6.331651686458366, "learning_rate": 1.3106750389872335e-06, "loss": 17.6408, "step": 42193 }, { "epoch": 0.7712724148646426, "grad_norm": 5.892176912481217, "learning_rate": 1.3104752529583824e-06, "loss": 17.1231, "step": 42194 }, { "epoch": 0.7712906940610891, "grad_norm": 7.280116700233315, "learning_rate": 1.310275479860994e-06, "loss": 17.5229, "step": 42195 }, { "epoch": 0.7713089732575356, "grad_norm": 6.6159191827522195, "learning_rate": 1.3100757196957714e-06, "loss": 17.3523, "step": 42196 }, { "epoch": 0.7713272524539821, "grad_norm": 6.414384812337072, "learning_rate": 1.309875972463413e-06, "loss": 17.4015, "step": 42197 }, { "epoch": 0.7713455316504286, "grad_norm": 6.423835366130201, "learning_rate": 1.3096762381646178e-06, "loss": 17.4503, "step": 42198 }, { "epoch": 0.7713638108468752, "grad_norm": 5.864509879937904, "learning_rate": 1.3094765168000883e-06, "loss": 17.2169, "step": 42199 }, { "epoch": 0.7713820900433217, "grad_norm": 6.3822010102650735, "learning_rate": 1.3092768083705238e-06, "loss": 17.3512, "step": 42200 }, { "epoch": 0.7714003692397682, "grad_norm": 5.830794395677749, "learning_rate": 1.3090771128766222e-06, "loss": 17.2491, "step": 42201 }, { "epoch": 0.7714186484362148, "grad_norm": 6.314712781066457, "learning_rate": 1.3088774303190849e-06, "loss": 17.258, "step": 42202 }, { "epoch": 0.7714369276326613, "grad_norm": 5.594447997239464, "learning_rate": 1.3086777606986134e-06, "loss": 17.0309, "step": 42203 }, { "epoch": 0.7714552068291078, "grad_norm": 5.730515694157936, "learning_rate": 1.3084781040159046e-06, "loss": 16.9225, "step": 42204 }, { "epoch": 0.7714734860255543, "grad_norm": 6.743232739583662, "learning_rate": 1.3082784602716609e-06, "loss": 17.6256, "step": 42205 }, { "epoch": 0.7714917652220008, "grad_norm": 7.607841076946748, "learning_rate": 1.3080788294665796e-06, "loss": 17.9976, "step": 42206 }, { "epoch": 0.7715100444184474, "grad_norm": 6.3133097521735335, "learning_rate": 1.3078792116013634e-06, "loss": 17.2068, "step": 42207 }, { "epoch": 0.7715283236148939, "grad_norm": 5.622683450747009, "learning_rate": 1.3076796066767094e-06, "loss": 17.2493, "step": 42208 }, { "epoch": 0.7715466028113405, "grad_norm": 6.006114117755552, "learning_rate": 1.3074800146933175e-06, "loss": 17.3419, "step": 42209 }, { "epoch": 0.771564882007787, "grad_norm": 7.165590183455222, "learning_rate": 1.3072804356518887e-06, "loss": 17.4685, "step": 42210 }, { "epoch": 0.7715831612042334, "grad_norm": 7.0951619479457335, "learning_rate": 1.30708086955312e-06, "loss": 17.4892, "step": 42211 }, { "epoch": 0.77160144040068, "grad_norm": 6.967063723961384, "learning_rate": 1.3068813163977124e-06, "loss": 17.3893, "step": 42212 }, { "epoch": 0.7716197195971265, "grad_norm": 6.803417426117848, "learning_rate": 1.306681776186367e-06, "loss": 17.677, "step": 42213 }, { "epoch": 0.7716379987935731, "grad_norm": 5.2267748979205, "learning_rate": 1.3064822489197809e-06, "loss": 17.0736, "step": 42214 }, { "epoch": 0.7716562779900196, "grad_norm": 5.680642399907411, "learning_rate": 1.3062827345986534e-06, "loss": 17.3496, "step": 42215 }, { "epoch": 0.771674557186466, "grad_norm": 4.947760909126579, "learning_rate": 1.306083233223685e-06, "loss": 16.8492, "step": 42216 }, { "epoch": 0.7716928363829126, "grad_norm": 7.014981154734477, "learning_rate": 1.3058837447955752e-06, "loss": 17.7482, "step": 42217 }, { "epoch": 0.7717111155793591, "grad_norm": 6.894790587991493, "learning_rate": 1.30568426931502e-06, "loss": 17.6204, "step": 42218 }, { "epoch": 0.7717293947758057, "grad_norm": 6.80034431346749, "learning_rate": 1.3054848067827213e-06, "loss": 17.4527, "step": 42219 }, { "epoch": 0.7717476739722522, "grad_norm": 7.278278508288717, "learning_rate": 1.3052853571993796e-06, "loss": 17.62, "step": 42220 }, { "epoch": 0.7717659531686987, "grad_norm": 6.775973863663512, "learning_rate": 1.30508592056569e-06, "loss": 17.4124, "step": 42221 }, { "epoch": 0.7717842323651453, "grad_norm": 6.077693130824012, "learning_rate": 1.3048864968823554e-06, "loss": 17.4105, "step": 42222 }, { "epoch": 0.7718025115615917, "grad_norm": 5.36038969098115, "learning_rate": 1.3046870861500715e-06, "loss": 17.0694, "step": 42223 }, { "epoch": 0.7718207907580382, "grad_norm": 6.632955333469252, "learning_rate": 1.3044876883695401e-06, "loss": 17.2777, "step": 42224 }, { "epoch": 0.7718390699544848, "grad_norm": 5.376559129930341, "learning_rate": 1.3042883035414584e-06, "loss": 16.9504, "step": 42225 }, { "epoch": 0.7718573491509313, "grad_norm": 5.40062812928677, "learning_rate": 1.3040889316665244e-06, "loss": 17.0166, "step": 42226 }, { "epoch": 0.7718756283473779, "grad_norm": 5.653730739776067, "learning_rate": 1.3038895727454392e-06, "loss": 16.9063, "step": 42227 }, { "epoch": 0.7718939075438244, "grad_norm": 5.5356008603485645, "learning_rate": 1.3036902267788991e-06, "loss": 16.9998, "step": 42228 }, { "epoch": 0.7719121867402708, "grad_norm": 7.113438609786504, "learning_rate": 1.303490893767604e-06, "loss": 17.4386, "step": 42229 }, { "epoch": 0.7719304659367174, "grad_norm": 6.036139037527547, "learning_rate": 1.3032915737122536e-06, "loss": 17.1484, "step": 42230 }, { "epoch": 0.7719487451331639, "grad_norm": 4.883953466739426, "learning_rate": 1.303092266613546e-06, "loss": 16.8723, "step": 42231 }, { "epoch": 0.7719670243296105, "grad_norm": 6.15969926584805, "learning_rate": 1.3028929724721772e-06, "loss": 17.3119, "step": 42232 }, { "epoch": 0.771985303526057, "grad_norm": 8.205718797680259, "learning_rate": 1.30269369128885e-06, "loss": 17.1825, "step": 42233 }, { "epoch": 0.7720035827225035, "grad_norm": 5.315250385242432, "learning_rate": 1.30249442306426e-06, "loss": 17.0849, "step": 42234 }, { "epoch": 0.7720218619189501, "grad_norm": 6.124973536215114, "learning_rate": 1.3022951677991046e-06, "loss": 17.5149, "step": 42235 }, { "epoch": 0.7720401411153965, "grad_norm": 5.847891092035004, "learning_rate": 1.302095925494085e-06, "loss": 17.0776, "step": 42236 }, { "epoch": 0.7720584203118431, "grad_norm": 5.834732251038025, "learning_rate": 1.3018966961498974e-06, "loss": 17.3541, "step": 42237 }, { "epoch": 0.7720766995082896, "grad_norm": 5.245798117763427, "learning_rate": 1.301697479767241e-06, "loss": 16.913, "step": 42238 }, { "epoch": 0.7720949787047361, "grad_norm": 5.3036071618662435, "learning_rate": 1.3014982763468154e-06, "loss": 16.8385, "step": 42239 }, { "epoch": 0.7721132579011827, "grad_norm": 5.976230947691092, "learning_rate": 1.3012990858893155e-06, "loss": 17.3161, "step": 42240 }, { "epoch": 0.7721315370976292, "grad_norm": 7.1477769131375695, "learning_rate": 1.3010999083954435e-06, "loss": 17.8854, "step": 42241 }, { "epoch": 0.7721498162940758, "grad_norm": 5.100368691298009, "learning_rate": 1.3009007438658948e-06, "loss": 16.9774, "step": 42242 }, { "epoch": 0.7721680954905222, "grad_norm": 6.7518010149494865, "learning_rate": 1.3007015923013666e-06, "loss": 17.7109, "step": 42243 }, { "epoch": 0.7721863746869687, "grad_norm": 5.749379772405525, "learning_rate": 1.3005024537025596e-06, "loss": 17.0484, "step": 42244 }, { "epoch": 0.7722046538834153, "grad_norm": 6.494297857875175, "learning_rate": 1.300303328070171e-06, "loss": 17.2699, "step": 42245 }, { "epoch": 0.7722229330798618, "grad_norm": 7.759283582063237, "learning_rate": 1.3001042154048966e-06, "loss": 17.8317, "step": 42246 }, { "epoch": 0.7722412122763084, "grad_norm": 5.25209928643942, "learning_rate": 1.2999051157074354e-06, "loss": 16.7854, "step": 42247 }, { "epoch": 0.7722594914727549, "grad_norm": 7.20230238646671, "learning_rate": 1.2997060289784875e-06, "loss": 17.9355, "step": 42248 }, { "epoch": 0.7722777706692013, "grad_norm": 6.959715687883033, "learning_rate": 1.2995069552187473e-06, "loss": 17.6792, "step": 42249 }, { "epoch": 0.7722960498656479, "grad_norm": 7.392459889087075, "learning_rate": 1.2993078944289161e-06, "loss": 17.3795, "step": 42250 }, { "epoch": 0.7723143290620944, "grad_norm": 5.227861362212127, "learning_rate": 1.2991088466096891e-06, "loss": 16.8689, "step": 42251 }, { "epoch": 0.772332608258541, "grad_norm": 5.363004243523884, "learning_rate": 1.2989098117617626e-06, "loss": 16.891, "step": 42252 }, { "epoch": 0.7723508874549875, "grad_norm": 6.268204960812499, "learning_rate": 1.298710789885838e-06, "loss": 17.327, "step": 42253 }, { "epoch": 0.772369166651434, "grad_norm": 5.42388231549644, "learning_rate": 1.2985117809826098e-06, "loss": 16.9986, "step": 42254 }, { "epoch": 0.7723874458478805, "grad_norm": 5.641770855414494, "learning_rate": 1.2983127850527776e-06, "loss": 17.0773, "step": 42255 }, { "epoch": 0.772405725044327, "grad_norm": 6.627170602286001, "learning_rate": 1.2981138020970363e-06, "loss": 17.3736, "step": 42256 }, { "epoch": 0.7724240042407736, "grad_norm": 6.2321401410195065, "learning_rate": 1.2979148321160851e-06, "loss": 17.1031, "step": 42257 }, { "epoch": 0.7724422834372201, "grad_norm": 6.190484042809075, "learning_rate": 1.297715875110623e-06, "loss": 17.6342, "step": 42258 }, { "epoch": 0.7724605626336666, "grad_norm": 6.869631410590928, "learning_rate": 1.2975169310813446e-06, "loss": 17.7432, "step": 42259 }, { "epoch": 0.7724788418301132, "grad_norm": 5.867524161500047, "learning_rate": 1.297318000028947e-06, "loss": 17.3491, "step": 42260 }, { "epoch": 0.7724971210265597, "grad_norm": 6.682113399458901, "learning_rate": 1.2971190819541302e-06, "loss": 17.7209, "step": 42261 }, { "epoch": 0.7725154002230062, "grad_norm": 7.552479826825988, "learning_rate": 1.2969201768575895e-06, "loss": 17.3327, "step": 42262 }, { "epoch": 0.7725336794194527, "grad_norm": 7.486226195439423, "learning_rate": 1.2967212847400206e-06, "loss": 17.8638, "step": 42263 }, { "epoch": 0.7725519586158992, "grad_norm": 9.507362215739885, "learning_rate": 1.2965224056021242e-06, "loss": 18.1244, "step": 42264 }, { "epoch": 0.7725702378123458, "grad_norm": 6.00844481322314, "learning_rate": 1.296323539444594e-06, "loss": 17.4474, "step": 42265 }, { "epoch": 0.7725885170087923, "grad_norm": 6.439899098865108, "learning_rate": 1.2961246862681276e-06, "loss": 17.3798, "step": 42266 }, { "epoch": 0.7726067962052389, "grad_norm": 6.759472827948278, "learning_rate": 1.2959258460734247e-06, "loss": 16.8828, "step": 42267 }, { "epoch": 0.7726250754016853, "grad_norm": 5.345656731913808, "learning_rate": 1.2957270188611787e-06, "loss": 16.9569, "step": 42268 }, { "epoch": 0.7726433545981318, "grad_norm": 6.494257329026555, "learning_rate": 1.29552820463209e-06, "loss": 17.3973, "step": 42269 }, { "epoch": 0.7726616337945784, "grad_norm": 6.481025295775946, "learning_rate": 1.295329403386853e-06, "loss": 17.2784, "step": 42270 }, { "epoch": 0.7726799129910249, "grad_norm": 6.128205600606939, "learning_rate": 1.2951306151261633e-06, "loss": 17.3344, "step": 42271 }, { "epoch": 0.7726981921874715, "grad_norm": 7.189734380047157, "learning_rate": 1.2949318398507216e-06, "loss": 17.4271, "step": 42272 }, { "epoch": 0.772716471383918, "grad_norm": 7.382891844958235, "learning_rate": 1.29473307756122e-06, "loss": 17.5327, "step": 42273 }, { "epoch": 0.7727347505803644, "grad_norm": 5.975954600994578, "learning_rate": 1.2945343282583595e-06, "loss": 17.0454, "step": 42274 }, { "epoch": 0.772753029776811, "grad_norm": 6.853688777516559, "learning_rate": 1.2943355919428325e-06, "loss": 17.6338, "step": 42275 }, { "epoch": 0.7727713089732575, "grad_norm": 6.849181144921121, "learning_rate": 1.2941368686153393e-06, "loss": 17.288, "step": 42276 }, { "epoch": 0.7727895881697041, "grad_norm": 6.698961132917843, "learning_rate": 1.2939381582765736e-06, "loss": 17.4811, "step": 42277 }, { "epoch": 0.7728078673661506, "grad_norm": 7.1642135137225065, "learning_rate": 1.2937394609272347e-06, "loss": 17.7341, "step": 42278 }, { "epoch": 0.7728261465625971, "grad_norm": 6.073064819895582, "learning_rate": 1.2935407765680174e-06, "loss": 17.2576, "step": 42279 }, { "epoch": 0.7728444257590437, "grad_norm": 6.147857024509723, "learning_rate": 1.293342105199616e-06, "loss": 17.2794, "step": 42280 }, { "epoch": 0.7728627049554901, "grad_norm": 6.143323904741681, "learning_rate": 1.2931434468227312e-06, "loss": 17.5231, "step": 42281 }, { "epoch": 0.7728809841519367, "grad_norm": 6.079720277763135, "learning_rate": 1.2929448014380547e-06, "loss": 17.216, "step": 42282 }, { "epoch": 0.7728992633483832, "grad_norm": 6.946050235654891, "learning_rate": 1.292746169046285e-06, "loss": 17.754, "step": 42283 }, { "epoch": 0.7729175425448297, "grad_norm": 7.889381410685017, "learning_rate": 1.2925475496481199e-06, "loss": 18.0185, "step": 42284 }, { "epoch": 0.7729358217412763, "grad_norm": 9.458051200360932, "learning_rate": 1.2923489432442526e-06, "loss": 17.6108, "step": 42285 }, { "epoch": 0.7729541009377228, "grad_norm": 5.376451695846469, "learning_rate": 1.292150349835382e-06, "loss": 17.1919, "step": 42286 }, { "epoch": 0.7729723801341694, "grad_norm": 5.906496833519445, "learning_rate": 1.2919517694222029e-06, "loss": 17.1994, "step": 42287 }, { "epoch": 0.7729906593306158, "grad_norm": 5.967697849692819, "learning_rate": 1.291753202005409e-06, "loss": 17.1887, "step": 42288 }, { "epoch": 0.7730089385270623, "grad_norm": 5.00557168632036, "learning_rate": 1.2915546475857e-06, "loss": 16.9452, "step": 42289 }, { "epoch": 0.7730272177235089, "grad_norm": 5.115013619031943, "learning_rate": 1.2913561061637703e-06, "loss": 16.9441, "step": 42290 }, { "epoch": 0.7730454969199554, "grad_norm": 7.347328058060368, "learning_rate": 1.291157577740314e-06, "loss": 17.6592, "step": 42291 }, { "epoch": 0.7730637761164019, "grad_norm": 6.018107748120012, "learning_rate": 1.290959062316029e-06, "loss": 17.276, "step": 42292 }, { "epoch": 0.7730820553128485, "grad_norm": 6.109071203500903, "learning_rate": 1.2907605598916118e-06, "loss": 17.2602, "step": 42293 }, { "epoch": 0.7731003345092949, "grad_norm": 5.324570191350679, "learning_rate": 1.2905620704677551e-06, "loss": 17.1532, "step": 42294 }, { "epoch": 0.7731186137057415, "grad_norm": 5.439904147345773, "learning_rate": 1.2903635940451588e-06, "loss": 17.1653, "step": 42295 }, { "epoch": 0.773136892902188, "grad_norm": 5.500759558156496, "learning_rate": 1.2901651306245156e-06, "loss": 17.0156, "step": 42296 }, { "epoch": 0.7731551720986345, "grad_norm": 6.69604261147211, "learning_rate": 1.2899666802065204e-06, "loss": 17.3321, "step": 42297 }, { "epoch": 0.7731734512950811, "grad_norm": 5.478837869546081, "learning_rate": 1.2897682427918711e-06, "loss": 17.3086, "step": 42298 }, { "epoch": 0.7731917304915276, "grad_norm": 7.086081272309058, "learning_rate": 1.289569818381261e-06, "loss": 17.2528, "step": 42299 }, { "epoch": 0.7732100096879742, "grad_norm": 6.814356938687307, "learning_rate": 1.2893714069753882e-06, "loss": 17.4285, "step": 42300 }, { "epoch": 0.7732282888844206, "grad_norm": 7.6009599403506165, "learning_rate": 1.2891730085749454e-06, "loss": 17.4614, "step": 42301 }, { "epoch": 0.7732465680808671, "grad_norm": 6.728153753236541, "learning_rate": 1.2889746231806287e-06, "loss": 17.7097, "step": 42302 }, { "epoch": 0.7732648472773137, "grad_norm": 5.117882478095296, "learning_rate": 1.2887762507931357e-06, "loss": 16.8746, "step": 42303 }, { "epoch": 0.7732831264737602, "grad_norm": 6.852653287468367, "learning_rate": 1.2885778914131598e-06, "loss": 17.8534, "step": 42304 }, { "epoch": 0.7733014056702068, "grad_norm": 6.053490579013801, "learning_rate": 1.288379545041395e-06, "loss": 17.2604, "step": 42305 }, { "epoch": 0.7733196848666533, "grad_norm": 7.366383037735668, "learning_rate": 1.2881812116785392e-06, "loss": 18.1669, "step": 42306 }, { "epoch": 0.7733379640630997, "grad_norm": 6.495674491343326, "learning_rate": 1.2879828913252861e-06, "loss": 17.5877, "step": 42307 }, { "epoch": 0.7733562432595463, "grad_norm": 5.45264151311166, "learning_rate": 1.2877845839823294e-06, "loss": 17.0933, "step": 42308 }, { "epoch": 0.7733745224559928, "grad_norm": 4.9784723844422025, "learning_rate": 1.2875862896503672e-06, "loss": 16.7893, "step": 42309 }, { "epoch": 0.7733928016524394, "grad_norm": 7.1227056203579, "learning_rate": 1.2873880083300911e-06, "loss": 17.9234, "step": 42310 }, { "epoch": 0.7734110808488859, "grad_norm": 6.357067771168622, "learning_rate": 1.2871897400221984e-06, "loss": 17.1064, "step": 42311 }, { "epoch": 0.7734293600453324, "grad_norm": 4.9706473829244295, "learning_rate": 1.2869914847273846e-06, "loss": 16.9384, "step": 42312 }, { "epoch": 0.773447639241779, "grad_norm": 5.238228044303506, "learning_rate": 1.2867932424463431e-06, "loss": 16.9794, "step": 42313 }, { "epoch": 0.7734659184382254, "grad_norm": 6.050293385337085, "learning_rate": 1.2865950131797682e-06, "loss": 17.2858, "step": 42314 }, { "epoch": 0.773484197634672, "grad_norm": 8.535300809638052, "learning_rate": 1.286396796928357e-06, "loss": 18.0254, "step": 42315 }, { "epoch": 0.7735024768311185, "grad_norm": 5.997138473822271, "learning_rate": 1.2861985936928007e-06, "loss": 17.2295, "step": 42316 }, { "epoch": 0.773520756027565, "grad_norm": 7.476364977799793, "learning_rate": 1.2860004034737982e-06, "loss": 17.4053, "step": 42317 }, { "epoch": 0.7735390352240116, "grad_norm": 6.2937447325774025, "learning_rate": 1.2858022262720416e-06, "loss": 17.4155, "step": 42318 }, { "epoch": 0.773557314420458, "grad_norm": 5.618063652757228, "learning_rate": 1.2856040620882243e-06, "loss": 17.1649, "step": 42319 }, { "epoch": 0.7735755936169046, "grad_norm": 6.072759010689397, "learning_rate": 1.2854059109230428e-06, "loss": 17.31, "step": 42320 }, { "epoch": 0.7735938728133511, "grad_norm": 5.816912682887911, "learning_rate": 1.2852077727771928e-06, "loss": 16.9784, "step": 42321 }, { "epoch": 0.7736121520097976, "grad_norm": 6.183227415415813, "learning_rate": 1.2850096476513657e-06, "loss": 17.287, "step": 42322 }, { "epoch": 0.7736304312062442, "grad_norm": 8.75696024284972, "learning_rate": 1.2848115355462587e-06, "loss": 18.464, "step": 42323 }, { "epoch": 0.7736487104026907, "grad_norm": 7.871812393955007, "learning_rate": 1.2846134364625645e-06, "loss": 18.4892, "step": 42324 }, { "epoch": 0.7736669895991373, "grad_norm": 7.937269495936827, "learning_rate": 1.284415350400977e-06, "loss": 17.8584, "step": 42325 }, { "epoch": 0.7736852687955837, "grad_norm": 6.085144940243663, "learning_rate": 1.2842172773621926e-06, "loss": 17.2688, "step": 42326 }, { "epoch": 0.7737035479920302, "grad_norm": 7.23742460787422, "learning_rate": 1.284019217346904e-06, "loss": 17.7595, "step": 42327 }, { "epoch": 0.7737218271884768, "grad_norm": 5.660770594858563, "learning_rate": 1.283821170355804e-06, "loss": 17.3516, "step": 42328 }, { "epoch": 0.7737401063849233, "grad_norm": 6.754065690442329, "learning_rate": 1.2836231363895885e-06, "loss": 17.5648, "step": 42329 }, { "epoch": 0.7737583855813699, "grad_norm": 6.532910243329316, "learning_rate": 1.2834251154489514e-06, "loss": 17.365, "step": 42330 }, { "epoch": 0.7737766647778164, "grad_norm": 7.144494666357634, "learning_rate": 1.2832271075345881e-06, "loss": 17.5181, "step": 42331 }, { "epoch": 0.7737949439742628, "grad_norm": 5.7233424123902825, "learning_rate": 1.2830291126471916e-06, "loss": 17.2775, "step": 42332 }, { "epoch": 0.7738132231707094, "grad_norm": 7.687770172646718, "learning_rate": 1.2828311307874542e-06, "loss": 17.6192, "step": 42333 }, { "epoch": 0.7738315023671559, "grad_norm": 5.853279363614117, "learning_rate": 1.2826331619560723e-06, "loss": 16.9298, "step": 42334 }, { "epoch": 0.7738497815636025, "grad_norm": 6.819297490667586, "learning_rate": 1.2824352061537382e-06, "loss": 17.5435, "step": 42335 }, { "epoch": 0.773868060760049, "grad_norm": 7.114119880118019, "learning_rate": 1.2822372633811447e-06, "loss": 17.6491, "step": 42336 }, { "epoch": 0.7738863399564955, "grad_norm": 5.891973482614149, "learning_rate": 1.2820393336389892e-06, "loss": 17.2562, "step": 42337 }, { "epoch": 0.7739046191529421, "grad_norm": 6.432515021806155, "learning_rate": 1.2818414169279613e-06, "loss": 17.283, "step": 42338 }, { "epoch": 0.7739228983493885, "grad_norm": 6.401454259789723, "learning_rate": 1.2816435132487565e-06, "loss": 16.888, "step": 42339 }, { "epoch": 0.7739411775458351, "grad_norm": 6.8437324452379364, "learning_rate": 1.28144562260207e-06, "loss": 17.4101, "step": 42340 }, { "epoch": 0.7739594567422816, "grad_norm": 5.8177328729481665, "learning_rate": 1.281247744988594e-06, "loss": 16.9629, "step": 42341 }, { "epoch": 0.7739777359387281, "grad_norm": 5.291687014802973, "learning_rate": 1.2810498804090206e-06, "loss": 17.0952, "step": 42342 }, { "epoch": 0.7739960151351747, "grad_norm": 7.423786354146037, "learning_rate": 1.2808520288640457e-06, "loss": 18.0088, "step": 42343 }, { "epoch": 0.7740142943316212, "grad_norm": 5.882274632260611, "learning_rate": 1.2806541903543619e-06, "loss": 17.2526, "step": 42344 }, { "epoch": 0.7740325735280678, "grad_norm": 6.230696189494706, "learning_rate": 1.2804563648806612e-06, "loss": 17.3437, "step": 42345 }, { "epoch": 0.7740508527245142, "grad_norm": 6.520235140300524, "learning_rate": 1.2802585524436378e-06, "loss": 17.5344, "step": 42346 }, { "epoch": 0.7740691319209607, "grad_norm": 5.689368390120329, "learning_rate": 1.2800607530439856e-06, "loss": 17.1573, "step": 42347 }, { "epoch": 0.7740874111174073, "grad_norm": 5.4427707280721505, "learning_rate": 1.2798629666823991e-06, "loss": 16.9415, "step": 42348 }, { "epoch": 0.7741056903138538, "grad_norm": 5.788946403473104, "learning_rate": 1.27966519335957e-06, "loss": 17.1309, "step": 42349 }, { "epoch": 0.7741239695103004, "grad_norm": 5.869679681046264, "learning_rate": 1.2794674330761903e-06, "loss": 17.246, "step": 42350 }, { "epoch": 0.7741422487067469, "grad_norm": 6.091222997983542, "learning_rate": 1.2792696858329557e-06, "loss": 17.3395, "step": 42351 }, { "epoch": 0.7741605279031933, "grad_norm": 5.398937785287264, "learning_rate": 1.2790719516305584e-06, "loss": 17.158, "step": 42352 }, { "epoch": 0.7741788070996399, "grad_norm": 6.391316778608961, "learning_rate": 1.278874230469689e-06, "loss": 17.3361, "step": 42353 }, { "epoch": 0.7741970862960864, "grad_norm": 6.236908995484994, "learning_rate": 1.2786765223510438e-06, "loss": 17.5606, "step": 42354 }, { "epoch": 0.774215365492533, "grad_norm": 6.1745755069837065, "learning_rate": 1.2784788272753135e-06, "loss": 17.038, "step": 42355 }, { "epoch": 0.7742336446889795, "grad_norm": 5.520357953860322, "learning_rate": 1.278281145243192e-06, "loss": 16.9379, "step": 42356 }, { "epoch": 0.774251923885426, "grad_norm": 7.171358076863071, "learning_rate": 1.2780834762553735e-06, "loss": 17.6051, "step": 42357 }, { "epoch": 0.7742702030818726, "grad_norm": 7.159526341506344, "learning_rate": 1.2778858203125493e-06, "loss": 17.5575, "step": 42358 }, { "epoch": 0.774288482278319, "grad_norm": 6.191049078581131, "learning_rate": 1.277688177415411e-06, "loss": 17.3277, "step": 42359 }, { "epoch": 0.7743067614747655, "grad_norm": 5.820489989500273, "learning_rate": 1.2774905475646537e-06, "loss": 16.9739, "step": 42360 }, { "epoch": 0.7743250406712121, "grad_norm": 8.084376611048201, "learning_rate": 1.2772929307609683e-06, "loss": 18.1544, "step": 42361 }, { "epoch": 0.7743433198676586, "grad_norm": 5.889563300314807, "learning_rate": 1.277095327005049e-06, "loss": 17.4337, "step": 42362 }, { "epoch": 0.7743615990641052, "grad_norm": 6.030432566881194, "learning_rate": 1.2768977362975877e-06, "loss": 17.0258, "step": 42363 }, { "epoch": 0.7743798782605517, "grad_norm": 6.511189174824081, "learning_rate": 1.2767001586392752e-06, "loss": 17.2646, "step": 42364 }, { "epoch": 0.7743981574569981, "grad_norm": 5.691202439347446, "learning_rate": 1.2765025940308056e-06, "loss": 16.8847, "step": 42365 }, { "epoch": 0.7744164366534447, "grad_norm": 6.60988059790448, "learning_rate": 1.276305042472873e-06, "loss": 17.387, "step": 42366 }, { "epoch": 0.7744347158498912, "grad_norm": 5.618847437100866, "learning_rate": 1.2761075039661664e-06, "loss": 17.344, "step": 42367 }, { "epoch": 0.7744529950463378, "grad_norm": 6.513916579411731, "learning_rate": 1.275909978511381e-06, "loss": 17.3972, "step": 42368 }, { "epoch": 0.7744712742427843, "grad_norm": 8.548432366730552, "learning_rate": 1.2757124661092084e-06, "loss": 17.6292, "step": 42369 }, { "epoch": 0.7744895534392308, "grad_norm": 5.642635268238989, "learning_rate": 1.2755149667603389e-06, "loss": 17.2932, "step": 42370 }, { "epoch": 0.7745078326356774, "grad_norm": 5.86277100716431, "learning_rate": 1.2753174804654679e-06, "loss": 17.215, "step": 42371 }, { "epoch": 0.7745261118321238, "grad_norm": 6.167297907251954, "learning_rate": 1.2751200072252857e-06, "loss": 17.2731, "step": 42372 }, { "epoch": 0.7745443910285704, "grad_norm": 5.018022920042843, "learning_rate": 1.274922547040483e-06, "loss": 16.82, "step": 42373 }, { "epoch": 0.7745626702250169, "grad_norm": 5.959364038768195, "learning_rate": 1.2747250999117538e-06, "loss": 17.0688, "step": 42374 }, { "epoch": 0.7745809494214634, "grad_norm": 6.738445234898278, "learning_rate": 1.2745276658397899e-06, "loss": 17.6061, "step": 42375 }, { "epoch": 0.77459922861791, "grad_norm": 6.828446827810929, "learning_rate": 1.2743302448252841e-06, "loss": 17.4362, "step": 42376 }, { "epoch": 0.7746175078143565, "grad_norm": 5.4136627195163065, "learning_rate": 1.2741328368689282e-06, "loss": 17.0002, "step": 42377 }, { "epoch": 0.774635787010803, "grad_norm": 5.781983226870544, "learning_rate": 1.2739354419714118e-06, "loss": 17.1118, "step": 42378 }, { "epoch": 0.7746540662072495, "grad_norm": 5.146220127448063, "learning_rate": 1.2737380601334293e-06, "loss": 16.9157, "step": 42379 }, { "epoch": 0.774672345403696, "grad_norm": 7.270321880548365, "learning_rate": 1.273540691355672e-06, "loss": 17.9502, "step": 42380 }, { "epoch": 0.7746906246001426, "grad_norm": 5.166382224210629, "learning_rate": 1.2733433356388296e-06, "loss": 16.9854, "step": 42381 }, { "epoch": 0.7747089037965891, "grad_norm": 6.050369396121505, "learning_rate": 1.273145992983597e-06, "loss": 17.0435, "step": 42382 }, { "epoch": 0.7747271829930357, "grad_norm": 7.0694594306380365, "learning_rate": 1.272948663390663e-06, "loss": 17.628, "step": 42383 }, { "epoch": 0.7747454621894821, "grad_norm": 7.065841736892199, "learning_rate": 1.2727513468607206e-06, "loss": 17.9448, "step": 42384 }, { "epoch": 0.7747637413859286, "grad_norm": 6.063667725367035, "learning_rate": 1.2725540433944621e-06, "loss": 17.4203, "step": 42385 }, { "epoch": 0.7747820205823752, "grad_norm": 5.306011909802666, "learning_rate": 1.272356752992579e-06, "loss": 17.0962, "step": 42386 }, { "epoch": 0.7748002997788217, "grad_norm": 7.612626365202465, "learning_rate": 1.27215947565576e-06, "loss": 17.704, "step": 42387 }, { "epoch": 0.7748185789752683, "grad_norm": 7.069222315873027, "learning_rate": 1.2719622113847002e-06, "loss": 17.7376, "step": 42388 }, { "epoch": 0.7748368581717148, "grad_norm": 7.076183876268881, "learning_rate": 1.2717649601800892e-06, "loss": 17.5986, "step": 42389 }, { "epoch": 0.7748551373681613, "grad_norm": 8.555850035275247, "learning_rate": 1.271567722042617e-06, "loss": 17.4656, "step": 42390 }, { "epoch": 0.7748734165646078, "grad_norm": 7.863504765697494, "learning_rate": 1.2713704969729784e-06, "loss": 18.0455, "step": 42391 }, { "epoch": 0.7748916957610543, "grad_norm": 6.257646334484841, "learning_rate": 1.2711732849718605e-06, "loss": 16.9879, "step": 42392 }, { "epoch": 0.7749099749575009, "grad_norm": 6.959676722434583, "learning_rate": 1.2709760860399574e-06, "loss": 17.6777, "step": 42393 }, { "epoch": 0.7749282541539474, "grad_norm": 6.7741230533697685, "learning_rate": 1.2707789001779602e-06, "loss": 17.7107, "step": 42394 }, { "epoch": 0.7749465333503939, "grad_norm": 6.690669220837602, "learning_rate": 1.2705817273865584e-06, "loss": 17.6847, "step": 42395 }, { "epoch": 0.7749648125468405, "grad_norm": 7.009750651000051, "learning_rate": 1.270384567666445e-06, "loss": 17.9652, "step": 42396 }, { "epoch": 0.774983091743287, "grad_norm": 5.264515211993722, "learning_rate": 1.2701874210183107e-06, "loss": 17.2226, "step": 42397 }, { "epoch": 0.7750013709397335, "grad_norm": 7.112427459696735, "learning_rate": 1.2699902874428438e-06, "loss": 17.9076, "step": 42398 }, { "epoch": 0.77501965013618, "grad_norm": 6.595882760045458, "learning_rate": 1.2697931669407388e-06, "loss": 17.4209, "step": 42399 }, { "epoch": 0.7750379293326265, "grad_norm": 5.956787089865678, "learning_rate": 1.269596059512684e-06, "loss": 17.4288, "step": 42400 }, { "epoch": 0.7750562085290731, "grad_norm": 4.453349241380772, "learning_rate": 1.2693989651593725e-06, "loss": 16.8198, "step": 42401 }, { "epoch": 0.7750744877255196, "grad_norm": 5.8772315901353025, "learning_rate": 1.2692018838814924e-06, "loss": 17.1223, "step": 42402 }, { "epoch": 0.7750927669219662, "grad_norm": 5.155444259321512, "learning_rate": 1.2690048156797374e-06, "loss": 17.0456, "step": 42403 }, { "epoch": 0.7751110461184126, "grad_norm": 6.012405382573133, "learning_rate": 1.2688077605547956e-06, "loss": 16.9936, "step": 42404 }, { "epoch": 0.7751293253148591, "grad_norm": 7.370202583038078, "learning_rate": 1.2686107185073598e-06, "loss": 17.6851, "step": 42405 }, { "epoch": 0.7751476045113057, "grad_norm": 6.045031059467729, "learning_rate": 1.2684136895381188e-06, "loss": 17.5526, "step": 42406 }, { "epoch": 0.7751658837077522, "grad_norm": 6.8431780784191885, "learning_rate": 1.2682166736477646e-06, "loss": 17.2799, "step": 42407 }, { "epoch": 0.7751841629041988, "grad_norm": 5.608458118677119, "learning_rate": 1.268019670836988e-06, "loss": 16.9582, "step": 42408 }, { "epoch": 0.7752024421006453, "grad_norm": 6.309898688273475, "learning_rate": 1.2678226811064765e-06, "loss": 17.3537, "step": 42409 }, { "epoch": 0.7752207212970917, "grad_norm": 5.367469545673835, "learning_rate": 1.2676257044569228e-06, "loss": 16.9267, "step": 42410 }, { "epoch": 0.7752390004935383, "grad_norm": 8.891481073696262, "learning_rate": 1.267428740889019e-06, "loss": 17.9613, "step": 42411 }, { "epoch": 0.7752572796899848, "grad_norm": 5.968575475089445, "learning_rate": 1.267231790403452e-06, "loss": 16.9807, "step": 42412 }, { "epoch": 0.7752755588864314, "grad_norm": 5.687112330903489, "learning_rate": 1.2670348530009146e-06, "loss": 16.8811, "step": 42413 }, { "epoch": 0.7752938380828779, "grad_norm": 6.942012597822775, "learning_rate": 1.2668379286820964e-06, "loss": 17.7975, "step": 42414 }, { "epoch": 0.7753121172793244, "grad_norm": 4.338685849469204, "learning_rate": 1.2666410174476862e-06, "loss": 16.5795, "step": 42415 }, { "epoch": 0.775330396475771, "grad_norm": 9.886860747450488, "learning_rate": 1.2664441192983762e-06, "loss": 18.4041, "step": 42416 }, { "epoch": 0.7753486756722174, "grad_norm": 6.542671529083129, "learning_rate": 1.2662472342348558e-06, "loss": 16.9996, "step": 42417 }, { "epoch": 0.775366954868664, "grad_norm": 8.179770509972187, "learning_rate": 1.2660503622578135e-06, "loss": 17.4617, "step": 42418 }, { "epoch": 0.7753852340651105, "grad_norm": 5.745560054783725, "learning_rate": 1.2658535033679403e-06, "loss": 17.2409, "step": 42419 }, { "epoch": 0.775403513261557, "grad_norm": 6.9905167566093525, "learning_rate": 1.2656566575659286e-06, "loss": 17.5816, "step": 42420 }, { "epoch": 0.7754217924580036, "grad_norm": 5.657286901449458, "learning_rate": 1.265459824852464e-06, "loss": 16.9899, "step": 42421 }, { "epoch": 0.7754400716544501, "grad_norm": 6.289522521525854, "learning_rate": 1.265263005228241e-06, "loss": 16.9964, "step": 42422 }, { "epoch": 0.7754583508508966, "grad_norm": 6.244557718231733, "learning_rate": 1.265066198693945e-06, "loss": 17.1697, "step": 42423 }, { "epoch": 0.7754766300473431, "grad_norm": 5.21358698830588, "learning_rate": 1.2648694052502697e-06, "loss": 16.8343, "step": 42424 }, { "epoch": 0.7754949092437896, "grad_norm": 5.586752114511401, "learning_rate": 1.2646726248979025e-06, "loss": 17.2121, "step": 42425 }, { "epoch": 0.7755131884402362, "grad_norm": 5.608403226345672, "learning_rate": 1.2644758576375326e-06, "loss": 17.0916, "step": 42426 }, { "epoch": 0.7755314676366827, "grad_norm": 4.9057315353481314, "learning_rate": 1.2642791034698522e-06, "loss": 16.8756, "step": 42427 }, { "epoch": 0.7755497468331292, "grad_norm": 5.246810330418628, "learning_rate": 1.2640823623955473e-06, "loss": 16.9553, "step": 42428 }, { "epoch": 0.7755680260295758, "grad_norm": 6.349952608805424, "learning_rate": 1.26388563441531e-06, "loss": 17.2643, "step": 42429 }, { "epoch": 0.7755863052260222, "grad_norm": 6.210670067523976, "learning_rate": 1.2636889195298308e-06, "loss": 17.3402, "step": 42430 }, { "epoch": 0.7756045844224688, "grad_norm": 6.601182288887125, "learning_rate": 1.2634922177397978e-06, "loss": 17.5069, "step": 42431 }, { "epoch": 0.7756228636189153, "grad_norm": 6.059401011734967, "learning_rate": 1.2632955290458986e-06, "loss": 17.0497, "step": 42432 }, { "epoch": 0.7756411428153618, "grad_norm": 6.440076834253714, "learning_rate": 1.2630988534488258e-06, "loss": 17.5881, "step": 42433 }, { "epoch": 0.7756594220118084, "grad_norm": 6.951067296861567, "learning_rate": 1.2629021909492672e-06, "loss": 17.5482, "step": 42434 }, { "epoch": 0.7756777012082549, "grad_norm": 7.707010356176069, "learning_rate": 1.2627055415479112e-06, "loss": 17.7803, "step": 42435 }, { "epoch": 0.7756959804047014, "grad_norm": 5.432490002839421, "learning_rate": 1.2625089052454492e-06, "loss": 17.2531, "step": 42436 }, { "epoch": 0.7757142596011479, "grad_norm": 5.84404594327326, "learning_rate": 1.2623122820425677e-06, "loss": 17.1373, "step": 42437 }, { "epoch": 0.7757325387975944, "grad_norm": 5.204273926418493, "learning_rate": 1.2621156719399569e-06, "loss": 17.188, "step": 42438 }, { "epoch": 0.775750817994041, "grad_norm": 6.440969888437788, "learning_rate": 1.261919074938308e-06, "loss": 17.6958, "step": 42439 }, { "epoch": 0.7757690971904875, "grad_norm": 5.59501083981049, "learning_rate": 1.261722491038307e-06, "loss": 17.265, "step": 42440 }, { "epoch": 0.7757873763869341, "grad_norm": 5.961416319074307, "learning_rate": 1.2615259202406454e-06, "loss": 17.2185, "step": 42441 }, { "epoch": 0.7758056555833805, "grad_norm": 6.156027927972107, "learning_rate": 1.2613293625460115e-06, "loss": 17.249, "step": 42442 }, { "epoch": 0.775823934779827, "grad_norm": 6.898638183899067, "learning_rate": 1.261132817955092e-06, "loss": 17.5669, "step": 42443 }, { "epoch": 0.7758422139762736, "grad_norm": 6.025760929572694, "learning_rate": 1.2609362864685792e-06, "loss": 17.3494, "step": 42444 }, { "epoch": 0.7758604931727201, "grad_norm": 7.915590825932287, "learning_rate": 1.2607397680871602e-06, "loss": 17.5802, "step": 42445 }, { "epoch": 0.7758787723691667, "grad_norm": 7.138401721816492, "learning_rate": 1.2605432628115222e-06, "loss": 17.8877, "step": 42446 }, { "epoch": 0.7758970515656132, "grad_norm": 4.555515654617665, "learning_rate": 1.2603467706423556e-06, "loss": 16.8205, "step": 42447 }, { "epoch": 0.7759153307620597, "grad_norm": 6.08683166010873, "learning_rate": 1.260150291580351e-06, "loss": 17.3006, "step": 42448 }, { "epoch": 0.7759336099585062, "grad_norm": 5.753409927511685, "learning_rate": 1.259953825626193e-06, "loss": 16.8962, "step": 42449 }, { "epoch": 0.7759518891549527, "grad_norm": 7.857318235500407, "learning_rate": 1.259757372780574e-06, "loss": 17.4861, "step": 42450 }, { "epoch": 0.7759701683513993, "grad_norm": 5.92834484326742, "learning_rate": 1.259560933044181e-06, "loss": 17.2453, "step": 42451 }, { "epoch": 0.7759884475478458, "grad_norm": 5.339355998028351, "learning_rate": 1.2593645064177007e-06, "loss": 16.7872, "step": 42452 }, { "epoch": 0.7760067267442923, "grad_norm": 6.076589706909878, "learning_rate": 1.2591680929018246e-06, "loss": 17.1755, "step": 42453 }, { "epoch": 0.7760250059407389, "grad_norm": 8.093319842100236, "learning_rate": 1.2589716924972384e-06, "loss": 17.94, "step": 42454 }, { "epoch": 0.7760432851371853, "grad_norm": 5.446495243090653, "learning_rate": 1.258775305204633e-06, "loss": 16.8976, "step": 42455 }, { "epoch": 0.7760615643336319, "grad_norm": 5.262999929760421, "learning_rate": 1.2585789310246942e-06, "loss": 16.9263, "step": 42456 }, { "epoch": 0.7760798435300784, "grad_norm": 6.640882721575189, "learning_rate": 1.258382569958112e-06, "loss": 17.2558, "step": 42457 }, { "epoch": 0.7760981227265249, "grad_norm": 10.247697819746033, "learning_rate": 1.2581862220055752e-06, "loss": 17.8695, "step": 42458 }, { "epoch": 0.7761164019229715, "grad_norm": 6.169991989468984, "learning_rate": 1.2579898871677715e-06, "loss": 17.1137, "step": 42459 }, { "epoch": 0.776134681119418, "grad_norm": 7.671112572465906, "learning_rate": 1.2577935654453866e-06, "loss": 17.5601, "step": 42460 }, { "epoch": 0.7761529603158646, "grad_norm": 6.691166931782967, "learning_rate": 1.257597256839112e-06, "loss": 17.1358, "step": 42461 }, { "epoch": 0.776171239512311, "grad_norm": 5.656385932281349, "learning_rate": 1.257400961349634e-06, "loss": 17.0333, "step": 42462 }, { "epoch": 0.7761895187087575, "grad_norm": 6.019519540803947, "learning_rate": 1.25720467897764e-06, "loss": 17.1705, "step": 42463 }, { "epoch": 0.7762077979052041, "grad_norm": 7.350573950894752, "learning_rate": 1.25700840972382e-06, "loss": 17.6267, "step": 42464 }, { "epoch": 0.7762260771016506, "grad_norm": 6.316099523448675, "learning_rate": 1.2568121535888595e-06, "loss": 17.4542, "step": 42465 }, { "epoch": 0.7762443562980972, "grad_norm": 6.061261691358287, "learning_rate": 1.256615910573447e-06, "loss": 16.966, "step": 42466 }, { "epoch": 0.7762626354945437, "grad_norm": 6.108975983108907, "learning_rate": 1.256419680678273e-06, "loss": 17.1253, "step": 42467 }, { "epoch": 0.7762809146909901, "grad_norm": 5.608043483362952, "learning_rate": 1.2562234639040211e-06, "loss": 17.1194, "step": 42468 }, { "epoch": 0.7762991938874367, "grad_norm": 4.491596073003152, "learning_rate": 1.256027260251383e-06, "loss": 16.6539, "step": 42469 }, { "epoch": 0.7763174730838832, "grad_norm": 6.589776511448043, "learning_rate": 1.255831069721044e-06, "loss": 17.2294, "step": 42470 }, { "epoch": 0.7763357522803298, "grad_norm": 8.012819378351049, "learning_rate": 1.2556348923136913e-06, "loss": 17.7049, "step": 42471 }, { "epoch": 0.7763540314767763, "grad_norm": 5.612768642541918, "learning_rate": 1.2554387280300146e-06, "loss": 17.3345, "step": 42472 }, { "epoch": 0.7763723106732228, "grad_norm": 7.701646235875373, "learning_rate": 1.2552425768706988e-06, "loss": 17.9811, "step": 42473 }, { "epoch": 0.7763905898696694, "grad_norm": 6.517162712467082, "learning_rate": 1.2550464388364325e-06, "loss": 17.2334, "step": 42474 }, { "epoch": 0.7764088690661158, "grad_norm": 6.058707401769223, "learning_rate": 1.2548503139279057e-06, "loss": 17.5217, "step": 42475 }, { "epoch": 0.7764271482625624, "grad_norm": 6.4036130072507005, "learning_rate": 1.254654202145803e-06, "loss": 17.6064, "step": 42476 }, { "epoch": 0.7764454274590089, "grad_norm": 6.12324306074186, "learning_rate": 1.2544581034908109e-06, "loss": 17.1949, "step": 42477 }, { "epoch": 0.7764637066554554, "grad_norm": 5.995814219848538, "learning_rate": 1.2542620179636194e-06, "loss": 17.5128, "step": 42478 }, { "epoch": 0.776481985851902, "grad_norm": 6.21753497151198, "learning_rate": 1.2540659455649147e-06, "loss": 17.1811, "step": 42479 }, { "epoch": 0.7765002650483485, "grad_norm": 6.650952215559712, "learning_rate": 1.2538698862953824e-06, "loss": 17.3987, "step": 42480 }, { "epoch": 0.776518544244795, "grad_norm": 5.279867506000928, "learning_rate": 1.2536738401557125e-06, "loss": 17.153, "step": 42481 }, { "epoch": 0.7765368234412415, "grad_norm": 5.4302578764553635, "learning_rate": 1.2534778071465892e-06, "loss": 16.8718, "step": 42482 }, { "epoch": 0.776555102637688, "grad_norm": 5.632712596762618, "learning_rate": 1.253281787268701e-06, "loss": 17.1537, "step": 42483 }, { "epoch": 0.7765733818341346, "grad_norm": 5.903832683059674, "learning_rate": 1.2530857805227364e-06, "loss": 17.1875, "step": 42484 }, { "epoch": 0.7765916610305811, "grad_norm": 5.771704446772742, "learning_rate": 1.2528897869093798e-06, "loss": 17.172, "step": 42485 }, { "epoch": 0.7766099402270277, "grad_norm": 5.485819508318011, "learning_rate": 1.2526938064293208e-06, "loss": 17.0951, "step": 42486 }, { "epoch": 0.7766282194234742, "grad_norm": 6.543640759191094, "learning_rate": 1.2524978390832449e-06, "loss": 17.2501, "step": 42487 }, { "epoch": 0.7766464986199206, "grad_norm": 7.323602529112482, "learning_rate": 1.2523018848718372e-06, "loss": 17.4903, "step": 42488 }, { "epoch": 0.7766647778163672, "grad_norm": 5.412294550226389, "learning_rate": 1.2521059437957871e-06, "loss": 16.947, "step": 42489 }, { "epoch": 0.7766830570128137, "grad_norm": 6.550406851132449, "learning_rate": 1.2519100158557811e-06, "loss": 17.4608, "step": 42490 }, { "epoch": 0.7767013362092603, "grad_norm": 5.8687599103206525, "learning_rate": 1.2517141010525042e-06, "loss": 16.8619, "step": 42491 }, { "epoch": 0.7767196154057068, "grad_norm": 4.366279128244637, "learning_rate": 1.2515181993866433e-06, "loss": 16.4504, "step": 42492 }, { "epoch": 0.7767378946021533, "grad_norm": 7.864519341895231, "learning_rate": 1.2513223108588878e-06, "loss": 18.0974, "step": 42493 }, { "epoch": 0.7767561737985998, "grad_norm": 8.168168578150693, "learning_rate": 1.2511264354699204e-06, "loss": 18.1548, "step": 42494 }, { "epoch": 0.7767744529950463, "grad_norm": 6.525944214876314, "learning_rate": 1.2509305732204312e-06, "loss": 17.3413, "step": 42495 }, { "epoch": 0.7767927321914928, "grad_norm": 7.845919834640323, "learning_rate": 1.250734724111105e-06, "loss": 17.8765, "step": 42496 }, { "epoch": 0.7768110113879394, "grad_norm": 5.2856332300906965, "learning_rate": 1.2505388881426272e-06, "loss": 17.0069, "step": 42497 }, { "epoch": 0.7768292905843859, "grad_norm": 4.937539836801025, "learning_rate": 1.2503430653156862e-06, "loss": 16.9586, "step": 42498 }, { "epoch": 0.7768475697808325, "grad_norm": 5.269101375330755, "learning_rate": 1.2501472556309664e-06, "loss": 17.0793, "step": 42499 }, { "epoch": 0.776865848977279, "grad_norm": 5.602525918927002, "learning_rate": 1.249951459089156e-06, "loss": 17.1696, "step": 42500 }, { "epoch": 0.7768841281737254, "grad_norm": 5.924641036035134, "learning_rate": 1.249755675690939e-06, "loss": 17.1174, "step": 42501 }, { "epoch": 0.776902407370172, "grad_norm": 6.69278437104188, "learning_rate": 1.2495599054370027e-06, "loss": 17.6719, "step": 42502 }, { "epoch": 0.7769206865666185, "grad_norm": 6.53490337134376, "learning_rate": 1.2493641483280356e-06, "loss": 17.3303, "step": 42503 }, { "epoch": 0.7769389657630651, "grad_norm": 5.719679367704384, "learning_rate": 1.2491684043647212e-06, "loss": 17.1687, "step": 42504 }, { "epoch": 0.7769572449595116, "grad_norm": 6.3005526693388685, "learning_rate": 1.2489726735477448e-06, "loss": 17.4123, "step": 42505 }, { "epoch": 0.776975524155958, "grad_norm": 5.279006630428786, "learning_rate": 1.2487769558777952e-06, "loss": 17.052, "step": 42506 }, { "epoch": 0.7769938033524046, "grad_norm": 6.410203715607354, "learning_rate": 1.2485812513555561e-06, "loss": 17.3084, "step": 42507 }, { "epoch": 0.7770120825488511, "grad_norm": 6.445939265381086, "learning_rate": 1.2483855599817135e-06, "loss": 17.4224, "step": 42508 }, { "epoch": 0.7770303617452977, "grad_norm": 7.374012669430612, "learning_rate": 1.2481898817569554e-06, "loss": 17.4164, "step": 42509 }, { "epoch": 0.7770486409417442, "grad_norm": 6.791650495957689, "learning_rate": 1.2479942166819648e-06, "loss": 17.7295, "step": 42510 }, { "epoch": 0.7770669201381907, "grad_norm": 8.233011206819558, "learning_rate": 1.2477985647574287e-06, "loss": 17.591, "step": 42511 }, { "epoch": 0.7770851993346373, "grad_norm": 6.243728641313357, "learning_rate": 1.2476029259840344e-06, "loss": 17.3033, "step": 42512 }, { "epoch": 0.7771034785310837, "grad_norm": 6.595097864816338, "learning_rate": 1.2474073003624659e-06, "loss": 17.6183, "step": 42513 }, { "epoch": 0.7771217577275303, "grad_norm": 6.240688363775106, "learning_rate": 1.2472116878934083e-06, "loss": 17.4447, "step": 42514 }, { "epoch": 0.7771400369239768, "grad_norm": 5.512709587738704, "learning_rate": 1.2470160885775494e-06, "loss": 17.1372, "step": 42515 }, { "epoch": 0.7771583161204233, "grad_norm": 5.467116673520175, "learning_rate": 1.2468205024155723e-06, "loss": 16.9143, "step": 42516 }, { "epoch": 0.7771765953168699, "grad_norm": 7.683003950015837, "learning_rate": 1.2466249294081644e-06, "loss": 18.0608, "step": 42517 }, { "epoch": 0.7771948745133164, "grad_norm": 6.8187144156217805, "learning_rate": 1.2464293695560108e-06, "loss": 17.2224, "step": 42518 }, { "epoch": 0.777213153709763, "grad_norm": 6.396697576668014, "learning_rate": 1.2462338228597949e-06, "loss": 17.4386, "step": 42519 }, { "epoch": 0.7772314329062094, "grad_norm": 5.394406078143611, "learning_rate": 1.246038289320204e-06, "loss": 17.1323, "step": 42520 }, { "epoch": 0.7772497121026559, "grad_norm": 6.156865352261543, "learning_rate": 1.2458427689379244e-06, "loss": 17.3087, "step": 42521 }, { "epoch": 0.7772679912991025, "grad_norm": 4.828436551134606, "learning_rate": 1.245647261713639e-06, "loss": 16.9083, "step": 42522 }, { "epoch": 0.777286270495549, "grad_norm": 6.344423212737758, "learning_rate": 1.2454517676480349e-06, "loss": 17.1874, "step": 42523 }, { "epoch": 0.7773045496919956, "grad_norm": 8.428277380191542, "learning_rate": 1.2452562867417971e-06, "loss": 18.2185, "step": 42524 }, { "epoch": 0.7773228288884421, "grad_norm": 8.074996297873763, "learning_rate": 1.2450608189956088e-06, "loss": 17.9781, "step": 42525 }, { "epoch": 0.7773411080848885, "grad_norm": 7.434890000601011, "learning_rate": 1.2448653644101578e-06, "loss": 17.7446, "step": 42526 }, { "epoch": 0.7773593872813351, "grad_norm": 8.463554657552802, "learning_rate": 1.2446699229861264e-06, "loss": 18.0565, "step": 42527 }, { "epoch": 0.7773776664777816, "grad_norm": 5.80645912870897, "learning_rate": 1.244474494724202e-06, "loss": 17.0861, "step": 42528 }, { "epoch": 0.7773959456742282, "grad_norm": 5.455116806810338, "learning_rate": 1.244279079625068e-06, "loss": 17.1328, "step": 42529 }, { "epoch": 0.7774142248706747, "grad_norm": 8.334412494105873, "learning_rate": 1.2440836776894095e-06, "loss": 18.3371, "step": 42530 }, { "epoch": 0.7774325040671212, "grad_norm": 5.443842447975473, "learning_rate": 1.2438882889179132e-06, "loss": 17.0899, "step": 42531 }, { "epoch": 0.7774507832635678, "grad_norm": 8.841990441893463, "learning_rate": 1.2436929133112624e-06, "loss": 17.967, "step": 42532 }, { "epoch": 0.7774690624600142, "grad_norm": 5.52713760027776, "learning_rate": 1.2434975508701407e-06, "loss": 17.066, "step": 42533 }, { "epoch": 0.7774873416564608, "grad_norm": 5.738017447216362, "learning_rate": 1.2433022015952355e-06, "loss": 17.1298, "step": 42534 }, { "epoch": 0.7775056208529073, "grad_norm": 5.77917555761013, "learning_rate": 1.2431068654872303e-06, "loss": 17.2978, "step": 42535 }, { "epoch": 0.7775239000493538, "grad_norm": 5.347599638605392, "learning_rate": 1.242911542546808e-06, "loss": 17.0586, "step": 42536 }, { "epoch": 0.7775421792458004, "grad_norm": 5.720784687003816, "learning_rate": 1.2427162327746544e-06, "loss": 17.3086, "step": 42537 }, { "epoch": 0.7775604584422469, "grad_norm": 6.886431646133975, "learning_rate": 1.2425209361714563e-06, "loss": 17.7182, "step": 42538 }, { "epoch": 0.7775787376386935, "grad_norm": 7.369062416516132, "learning_rate": 1.2423256527378947e-06, "loss": 18.0882, "step": 42539 }, { "epoch": 0.7775970168351399, "grad_norm": 7.301092180788097, "learning_rate": 1.2421303824746572e-06, "loss": 17.3718, "step": 42540 }, { "epoch": 0.7776152960315864, "grad_norm": 7.082887548450705, "learning_rate": 1.2419351253824262e-06, "loss": 17.5167, "step": 42541 }, { "epoch": 0.777633575228033, "grad_norm": 6.465086233888477, "learning_rate": 1.2417398814618852e-06, "loss": 17.5684, "step": 42542 }, { "epoch": 0.7776518544244795, "grad_norm": 6.817273577423581, "learning_rate": 1.2415446507137213e-06, "loss": 17.376, "step": 42543 }, { "epoch": 0.7776701336209261, "grad_norm": 5.267767013521403, "learning_rate": 1.2413494331386156e-06, "loss": 17.0473, "step": 42544 }, { "epoch": 0.7776884128173726, "grad_norm": 5.9141651308700265, "learning_rate": 1.2411542287372558e-06, "loss": 17.3479, "step": 42545 }, { "epoch": 0.777706692013819, "grad_norm": 6.331378118807835, "learning_rate": 1.2409590375103225e-06, "loss": 17.3011, "step": 42546 }, { "epoch": 0.7777249712102656, "grad_norm": 5.909006863022207, "learning_rate": 1.2407638594585014e-06, "loss": 17.1337, "step": 42547 }, { "epoch": 0.7777432504067121, "grad_norm": 4.845927425275562, "learning_rate": 1.2405686945824785e-06, "loss": 16.8207, "step": 42548 }, { "epoch": 0.7777615296031587, "grad_norm": 4.9779785166000545, "learning_rate": 1.240373542882936e-06, "loss": 17.0981, "step": 42549 }, { "epoch": 0.7777798087996052, "grad_norm": 7.049150478845223, "learning_rate": 1.2401784043605563e-06, "loss": 17.6507, "step": 42550 }, { "epoch": 0.7777980879960517, "grad_norm": 6.15618683888709, "learning_rate": 1.2399832790160271e-06, "loss": 17.283, "step": 42551 }, { "epoch": 0.7778163671924982, "grad_norm": 6.372571171588839, "learning_rate": 1.23978816685003e-06, "loss": 17.3273, "step": 42552 }, { "epoch": 0.7778346463889447, "grad_norm": 6.3110980166582396, "learning_rate": 1.2395930678632474e-06, "loss": 17.3862, "step": 42553 }, { "epoch": 0.7778529255853913, "grad_norm": 9.299661274834046, "learning_rate": 1.2393979820563663e-06, "loss": 18.207, "step": 42554 }, { "epoch": 0.7778712047818378, "grad_norm": 7.097346054365455, "learning_rate": 1.2392029094300672e-06, "loss": 17.4782, "step": 42555 }, { "epoch": 0.7778894839782843, "grad_norm": 6.006426523429762, "learning_rate": 1.2390078499850361e-06, "loss": 17.3777, "step": 42556 }, { "epoch": 0.7779077631747309, "grad_norm": 7.124503660714503, "learning_rate": 1.2388128037219576e-06, "loss": 17.3598, "step": 42557 }, { "epoch": 0.7779260423711774, "grad_norm": 4.64904141994587, "learning_rate": 1.2386177706415137e-06, "loss": 16.7334, "step": 42558 }, { "epoch": 0.7779443215676239, "grad_norm": 7.522176563416895, "learning_rate": 1.2384227507443863e-06, "loss": 17.9127, "step": 42559 }, { "epoch": 0.7779626007640704, "grad_norm": 6.484013192525735, "learning_rate": 1.2382277440312624e-06, "loss": 17.2789, "step": 42560 }, { "epoch": 0.7779808799605169, "grad_norm": 7.323243482296395, "learning_rate": 1.2380327505028227e-06, "loss": 17.3817, "step": 42561 }, { "epoch": 0.7779991591569635, "grad_norm": 5.093297385965502, "learning_rate": 1.2378377701597528e-06, "loss": 16.9056, "step": 42562 }, { "epoch": 0.77801743835341, "grad_norm": 7.822490317911251, "learning_rate": 1.2376428030027355e-06, "loss": 17.7204, "step": 42563 }, { "epoch": 0.7780357175498565, "grad_norm": 7.274445969977755, "learning_rate": 1.2374478490324516e-06, "loss": 17.7353, "step": 42564 }, { "epoch": 0.778053996746303, "grad_norm": 5.433236755188493, "learning_rate": 1.237252908249587e-06, "loss": 17.0277, "step": 42565 }, { "epoch": 0.7780722759427495, "grad_norm": 5.780344185261619, "learning_rate": 1.2370579806548255e-06, "loss": 17.1492, "step": 42566 }, { "epoch": 0.7780905551391961, "grad_norm": 8.004295502659044, "learning_rate": 1.2368630662488484e-06, "loss": 18.0448, "step": 42567 }, { "epoch": 0.7781088343356426, "grad_norm": 6.648215583572407, "learning_rate": 1.236668165032341e-06, "loss": 17.6023, "step": 42568 }, { "epoch": 0.7781271135320891, "grad_norm": 5.4790335771768826, "learning_rate": 1.2364732770059846e-06, "loss": 17.2124, "step": 42569 }, { "epoch": 0.7781453927285357, "grad_norm": 6.122223248919366, "learning_rate": 1.2362784021704615e-06, "loss": 17.3429, "step": 42570 }, { "epoch": 0.7781636719249821, "grad_norm": 5.658344544593629, "learning_rate": 1.2360835405264576e-06, "loss": 17.1155, "step": 42571 }, { "epoch": 0.7781819511214287, "grad_norm": 6.8068164115952055, "learning_rate": 1.2358886920746543e-06, "loss": 17.712, "step": 42572 }, { "epoch": 0.7782002303178752, "grad_norm": 5.385734593827415, "learning_rate": 1.2356938568157328e-06, "loss": 16.9222, "step": 42573 }, { "epoch": 0.7782185095143217, "grad_norm": 5.910002149640927, "learning_rate": 1.2354990347503782e-06, "loss": 17.3798, "step": 42574 }, { "epoch": 0.7782367887107683, "grad_norm": 6.215093316199922, "learning_rate": 1.2353042258792725e-06, "loss": 17.5501, "step": 42575 }, { "epoch": 0.7782550679072148, "grad_norm": 7.4566940804288, "learning_rate": 1.2351094302031002e-06, "loss": 17.2876, "step": 42576 }, { "epoch": 0.7782733471036614, "grad_norm": 5.533080017096323, "learning_rate": 1.2349146477225427e-06, "loss": 17.1528, "step": 42577 }, { "epoch": 0.7782916263001078, "grad_norm": 7.5726031817051584, "learning_rate": 1.2347198784382814e-06, "loss": 17.7423, "step": 42578 }, { "epoch": 0.7783099054965543, "grad_norm": 5.921199198841298, "learning_rate": 1.2345251223510012e-06, "loss": 16.9057, "step": 42579 }, { "epoch": 0.7783281846930009, "grad_norm": 5.906939121266132, "learning_rate": 1.2343303794613837e-06, "loss": 17.4442, "step": 42580 }, { "epoch": 0.7783464638894474, "grad_norm": 7.595756233859395, "learning_rate": 1.23413564977011e-06, "loss": 17.5213, "step": 42581 }, { "epoch": 0.778364743085894, "grad_norm": 5.349231220886206, "learning_rate": 1.2339409332778663e-06, "loss": 16.9489, "step": 42582 }, { "epoch": 0.7783830222823405, "grad_norm": 4.652842536580198, "learning_rate": 1.2337462299853304e-06, "loss": 16.8031, "step": 42583 }, { "epoch": 0.7784013014787869, "grad_norm": 6.206206110605206, "learning_rate": 1.233551539893188e-06, "loss": 17.5015, "step": 42584 }, { "epoch": 0.7784195806752335, "grad_norm": 6.280880602654746, "learning_rate": 1.2333568630021214e-06, "loss": 17.1929, "step": 42585 }, { "epoch": 0.77843785987168, "grad_norm": 5.654110232936818, "learning_rate": 1.2331621993128123e-06, "loss": 17.1427, "step": 42586 }, { "epoch": 0.7784561390681266, "grad_norm": 7.41712113685423, "learning_rate": 1.2329675488259413e-06, "loss": 18.0303, "step": 42587 }, { "epoch": 0.7784744182645731, "grad_norm": 5.849270571112814, "learning_rate": 1.2327729115421938e-06, "loss": 17.112, "step": 42588 }, { "epoch": 0.7784926974610196, "grad_norm": 4.668366876029651, "learning_rate": 1.2325782874622504e-06, "loss": 16.7547, "step": 42589 }, { "epoch": 0.7785109766574662, "grad_norm": 6.350369039534873, "learning_rate": 1.2323836765867913e-06, "loss": 17.3356, "step": 42590 }, { "epoch": 0.7785292558539126, "grad_norm": 6.017760026348701, "learning_rate": 1.2321890789165026e-06, "loss": 17.0787, "step": 42591 }, { "epoch": 0.7785475350503592, "grad_norm": 5.481260771541287, "learning_rate": 1.2319944944520623e-06, "loss": 17.3582, "step": 42592 }, { "epoch": 0.7785658142468057, "grad_norm": 5.735185061863286, "learning_rate": 1.2317999231941542e-06, "loss": 17.0849, "step": 42593 }, { "epoch": 0.7785840934432522, "grad_norm": 5.834542693141164, "learning_rate": 1.2316053651434623e-06, "loss": 17.3085, "step": 42594 }, { "epoch": 0.7786023726396988, "grad_norm": 5.708837469097304, "learning_rate": 1.2314108203006648e-06, "loss": 16.9688, "step": 42595 }, { "epoch": 0.7786206518361453, "grad_norm": 5.657608698064209, "learning_rate": 1.231216288666447e-06, "loss": 17.0949, "step": 42596 }, { "epoch": 0.7786389310325919, "grad_norm": 5.989282287627052, "learning_rate": 1.231021770241489e-06, "loss": 17.2619, "step": 42597 }, { "epoch": 0.7786572102290383, "grad_norm": 5.765046810653433, "learning_rate": 1.2308272650264707e-06, "loss": 17.2269, "step": 42598 }, { "epoch": 0.7786754894254848, "grad_norm": 5.817183074890277, "learning_rate": 1.230632773022078e-06, "loss": 17.3416, "step": 42599 }, { "epoch": 0.7786937686219314, "grad_norm": 6.80892063780763, "learning_rate": 1.2304382942289883e-06, "loss": 17.423, "step": 42600 }, { "epoch": 0.7787120478183779, "grad_norm": 6.094261773121337, "learning_rate": 1.2302438286478858e-06, "loss": 17.2301, "step": 42601 }, { "epoch": 0.7787303270148245, "grad_norm": 6.564696055150384, "learning_rate": 1.2300493762794525e-06, "loss": 17.3137, "step": 42602 }, { "epoch": 0.778748606211271, "grad_norm": 6.429227928144083, "learning_rate": 1.2298549371243696e-06, "loss": 17.3398, "step": 42603 }, { "epoch": 0.7787668854077174, "grad_norm": 7.852744343056561, "learning_rate": 1.229660511183316e-06, "loss": 18.2002, "step": 42604 }, { "epoch": 0.778785164604164, "grad_norm": 5.990460472123902, "learning_rate": 1.2294660984569767e-06, "loss": 17.3902, "step": 42605 }, { "epoch": 0.7788034438006105, "grad_norm": 8.562098533065742, "learning_rate": 1.2292716989460302e-06, "loss": 17.9431, "step": 42606 }, { "epoch": 0.7788217229970571, "grad_norm": 6.161936102024805, "learning_rate": 1.2290773126511607e-06, "loss": 17.2009, "step": 42607 }, { "epoch": 0.7788400021935036, "grad_norm": 5.166663925466254, "learning_rate": 1.228882939573048e-06, "loss": 16.9159, "step": 42608 }, { "epoch": 0.7788582813899501, "grad_norm": 5.9519203413253265, "learning_rate": 1.2286885797123716e-06, "loss": 17.1884, "step": 42609 }, { "epoch": 0.7788765605863966, "grad_norm": 5.310177473295256, "learning_rate": 1.2284942330698147e-06, "loss": 17.0205, "step": 42610 }, { "epoch": 0.7788948397828431, "grad_norm": 7.791910452688526, "learning_rate": 1.2282998996460593e-06, "loss": 17.7515, "step": 42611 }, { "epoch": 0.7789131189792897, "grad_norm": 6.425372949261642, "learning_rate": 1.2281055794417846e-06, "loss": 17.3739, "step": 42612 }, { "epoch": 0.7789313981757362, "grad_norm": 5.499849779450958, "learning_rate": 1.2279112724576737e-06, "loss": 17.2448, "step": 42613 }, { "epoch": 0.7789496773721827, "grad_norm": 6.5824519851183405, "learning_rate": 1.227716978694406e-06, "loss": 17.7764, "step": 42614 }, { "epoch": 0.7789679565686293, "grad_norm": 5.535662080308192, "learning_rate": 1.2275226981526623e-06, "loss": 17.3922, "step": 42615 }, { "epoch": 0.7789862357650758, "grad_norm": 6.775059243527108, "learning_rate": 1.2273284308331252e-06, "loss": 17.3826, "step": 42616 }, { "epoch": 0.7790045149615223, "grad_norm": 7.577484678129559, "learning_rate": 1.2271341767364748e-06, "loss": 17.8096, "step": 42617 }, { "epoch": 0.7790227941579688, "grad_norm": 5.641437720006411, "learning_rate": 1.22693993586339e-06, "loss": 17.1113, "step": 42618 }, { "epoch": 0.7790410733544153, "grad_norm": 5.586565863212798, "learning_rate": 1.2267457082145535e-06, "loss": 17.1604, "step": 42619 }, { "epoch": 0.7790593525508619, "grad_norm": 6.530401885265186, "learning_rate": 1.226551493790647e-06, "loss": 17.3241, "step": 42620 }, { "epoch": 0.7790776317473084, "grad_norm": 6.038975875531012, "learning_rate": 1.2263572925923489e-06, "loss": 17.3624, "step": 42621 }, { "epoch": 0.779095910943755, "grad_norm": 5.0544218214452075, "learning_rate": 1.2261631046203426e-06, "loss": 17.0379, "step": 42622 }, { "epoch": 0.7791141901402014, "grad_norm": 5.62390194088699, "learning_rate": 1.2259689298753059e-06, "loss": 17.3598, "step": 42623 }, { "epoch": 0.7791324693366479, "grad_norm": 5.401631026265555, "learning_rate": 1.2257747683579213e-06, "loss": 17.0461, "step": 42624 }, { "epoch": 0.7791507485330945, "grad_norm": 6.503698878497694, "learning_rate": 1.225580620068869e-06, "loss": 17.0539, "step": 42625 }, { "epoch": 0.779169027729541, "grad_norm": 6.675404715420047, "learning_rate": 1.2253864850088277e-06, "loss": 17.9755, "step": 42626 }, { "epoch": 0.7791873069259876, "grad_norm": 5.540260980263048, "learning_rate": 1.2251923631784807e-06, "loss": 16.8332, "step": 42627 }, { "epoch": 0.7792055861224341, "grad_norm": 6.5642948652636095, "learning_rate": 1.2249982545785054e-06, "loss": 17.0238, "step": 42628 }, { "epoch": 0.7792238653188805, "grad_norm": 7.1757720677106684, "learning_rate": 1.2248041592095838e-06, "loss": 17.8945, "step": 42629 }, { "epoch": 0.7792421445153271, "grad_norm": 5.705240510460814, "learning_rate": 1.2246100770723973e-06, "loss": 17.3734, "step": 42630 }, { "epoch": 0.7792604237117736, "grad_norm": 5.714647838023737, "learning_rate": 1.2244160081676248e-06, "loss": 17.0759, "step": 42631 }, { "epoch": 0.7792787029082201, "grad_norm": 5.107707495643457, "learning_rate": 1.2242219524959453e-06, "loss": 16.893, "step": 42632 }, { "epoch": 0.7792969821046667, "grad_norm": 6.577592040472893, "learning_rate": 1.2240279100580415e-06, "loss": 17.5423, "step": 42633 }, { "epoch": 0.7793152613011132, "grad_norm": 4.989189160984518, "learning_rate": 1.2238338808545924e-06, "loss": 16.819, "step": 42634 }, { "epoch": 0.7793335404975598, "grad_norm": 5.139870586721333, "learning_rate": 1.2236398648862762e-06, "loss": 17.0709, "step": 42635 }, { "epoch": 0.7793518196940062, "grad_norm": 6.026392250844257, "learning_rate": 1.223445862153776e-06, "loss": 16.9744, "step": 42636 }, { "epoch": 0.7793700988904527, "grad_norm": 6.2837403894848505, "learning_rate": 1.223251872657769e-06, "loss": 17.4897, "step": 42637 }, { "epoch": 0.7793883780868993, "grad_norm": 7.29241154941073, "learning_rate": 1.2230578963989364e-06, "loss": 17.7508, "step": 42638 }, { "epoch": 0.7794066572833458, "grad_norm": 7.538429755534698, "learning_rate": 1.2228639333779596e-06, "loss": 17.7814, "step": 42639 }, { "epoch": 0.7794249364797924, "grad_norm": 5.420471268892611, "learning_rate": 1.2226699835955159e-06, "loss": 17.2044, "step": 42640 }, { "epoch": 0.7794432156762389, "grad_norm": 6.932242708734869, "learning_rate": 1.2224760470522867e-06, "loss": 17.6849, "step": 42641 }, { "epoch": 0.7794614948726853, "grad_norm": 6.296007319270624, "learning_rate": 1.2222821237489518e-06, "loss": 17.1703, "step": 42642 }, { "epoch": 0.7794797740691319, "grad_norm": 5.887771171362022, "learning_rate": 1.2220882136861888e-06, "loss": 17.5407, "step": 42643 }, { "epoch": 0.7794980532655784, "grad_norm": 6.9136628760095515, "learning_rate": 1.2218943168646803e-06, "loss": 17.6227, "step": 42644 }, { "epoch": 0.779516332462025, "grad_norm": 7.9529196530756305, "learning_rate": 1.221700433285104e-06, "loss": 17.8121, "step": 42645 }, { "epoch": 0.7795346116584715, "grad_norm": 8.08622854338447, "learning_rate": 1.2215065629481381e-06, "loss": 17.9363, "step": 42646 }, { "epoch": 0.779552890854918, "grad_norm": 5.686346667862254, "learning_rate": 1.2213127058544643e-06, "loss": 16.912, "step": 42647 }, { "epoch": 0.7795711700513646, "grad_norm": 5.708464597917384, "learning_rate": 1.2211188620047632e-06, "loss": 17.231, "step": 42648 }, { "epoch": 0.779589449247811, "grad_norm": 5.677586731482004, "learning_rate": 1.2209250313997107e-06, "loss": 16.9811, "step": 42649 }, { "epoch": 0.7796077284442576, "grad_norm": 6.061818468638301, "learning_rate": 1.2207312140399897e-06, "loss": 17.5128, "step": 42650 }, { "epoch": 0.7796260076407041, "grad_norm": 5.4762823457945204, "learning_rate": 1.2205374099262778e-06, "loss": 17.0149, "step": 42651 }, { "epoch": 0.7796442868371506, "grad_norm": 5.870269069278091, "learning_rate": 1.2203436190592533e-06, "loss": 17.2671, "step": 42652 }, { "epoch": 0.7796625660335972, "grad_norm": 5.949700156175741, "learning_rate": 1.2201498414395974e-06, "loss": 17.0519, "step": 42653 }, { "epoch": 0.7796808452300437, "grad_norm": 5.797502110761601, "learning_rate": 1.2199560770679869e-06, "loss": 17.0914, "step": 42654 }, { "epoch": 0.7796991244264903, "grad_norm": 5.85086375102694, "learning_rate": 1.2197623259451037e-06, "loss": 16.8325, "step": 42655 }, { "epoch": 0.7797174036229367, "grad_norm": 5.675292578063941, "learning_rate": 1.219568588071624e-06, "loss": 17.1239, "step": 42656 }, { "epoch": 0.7797356828193832, "grad_norm": 5.633940521957693, "learning_rate": 1.2193748634482283e-06, "loss": 16.8871, "step": 42657 }, { "epoch": 0.7797539620158298, "grad_norm": 5.390093142079519, "learning_rate": 1.2191811520755974e-06, "loss": 17.2013, "step": 42658 }, { "epoch": 0.7797722412122763, "grad_norm": 6.683203688150628, "learning_rate": 1.2189874539544084e-06, "loss": 17.6669, "step": 42659 }, { "epoch": 0.7797905204087229, "grad_norm": 5.951525034574011, "learning_rate": 1.2187937690853386e-06, "loss": 17.2736, "step": 42660 }, { "epoch": 0.7798087996051694, "grad_norm": 5.8730139674229065, "learning_rate": 1.2186000974690704e-06, "loss": 17.3742, "step": 42661 }, { "epoch": 0.7798270788016158, "grad_norm": 6.976447393274143, "learning_rate": 1.2184064391062806e-06, "loss": 17.4134, "step": 42662 }, { "epoch": 0.7798453579980624, "grad_norm": 6.240228094727846, "learning_rate": 1.2182127939976462e-06, "loss": 17.2659, "step": 42663 }, { "epoch": 0.7798636371945089, "grad_norm": 4.451061361165445, "learning_rate": 1.218019162143848e-06, "loss": 16.7415, "step": 42664 }, { "epoch": 0.7798819163909555, "grad_norm": 6.974008199270617, "learning_rate": 1.2178255435455661e-06, "loss": 17.5944, "step": 42665 }, { "epoch": 0.779900195587402, "grad_norm": 6.87059408560816, "learning_rate": 1.2176319382034762e-06, "loss": 17.3606, "step": 42666 }, { "epoch": 0.7799184747838485, "grad_norm": 6.053280614783786, "learning_rate": 1.2174383461182593e-06, "loss": 17.443, "step": 42667 }, { "epoch": 0.779936753980295, "grad_norm": 7.396061103016152, "learning_rate": 1.2172447672905912e-06, "loss": 17.9776, "step": 42668 }, { "epoch": 0.7799550331767415, "grad_norm": 8.322013203899933, "learning_rate": 1.2170512017211533e-06, "loss": 17.8868, "step": 42669 }, { "epoch": 0.7799733123731881, "grad_norm": 5.541784489385306, "learning_rate": 1.2168576494106233e-06, "loss": 17.039, "step": 42670 }, { "epoch": 0.7799915915696346, "grad_norm": 6.3981539249772705, "learning_rate": 1.2166641103596766e-06, "loss": 17.3653, "step": 42671 }, { "epoch": 0.7800098707660811, "grad_norm": 6.415224492552726, "learning_rate": 1.2164705845689962e-06, "loss": 17.6011, "step": 42672 }, { "epoch": 0.7800281499625277, "grad_norm": 6.248752888160591, "learning_rate": 1.216277072039256e-06, "loss": 17.3256, "step": 42673 }, { "epoch": 0.7800464291589742, "grad_norm": 4.968571514733656, "learning_rate": 1.2160835727711367e-06, "loss": 17.0315, "step": 42674 }, { "epoch": 0.7800647083554207, "grad_norm": 8.155361494891114, "learning_rate": 1.2158900867653173e-06, "loss": 18.0791, "step": 42675 }, { "epoch": 0.7800829875518672, "grad_norm": 5.587914434192319, "learning_rate": 1.2156966140224752e-06, "loss": 17.0469, "step": 42676 }, { "epoch": 0.7801012667483137, "grad_norm": 7.17749081179289, "learning_rate": 1.2155031545432861e-06, "loss": 17.6058, "step": 42677 }, { "epoch": 0.7801195459447603, "grad_norm": 6.255464312720115, "learning_rate": 1.2153097083284316e-06, "loss": 17.1401, "step": 42678 }, { "epoch": 0.7801378251412068, "grad_norm": 7.081567195121454, "learning_rate": 1.2151162753785882e-06, "loss": 17.7504, "step": 42679 }, { "epoch": 0.7801561043376534, "grad_norm": 6.437435404715499, "learning_rate": 1.2149228556944326e-06, "loss": 17.2858, "step": 42680 }, { "epoch": 0.7801743835340998, "grad_norm": 5.275423066798229, "learning_rate": 1.2147294492766454e-06, "loss": 16.8759, "step": 42681 }, { "epoch": 0.7801926627305463, "grad_norm": 7.796158526988258, "learning_rate": 1.2145360561259012e-06, "loss": 17.742, "step": 42682 }, { "epoch": 0.7802109419269929, "grad_norm": 7.032089505181174, "learning_rate": 1.2143426762428801e-06, "loss": 17.4205, "step": 42683 }, { "epoch": 0.7802292211234394, "grad_norm": 7.242922341143784, "learning_rate": 1.2141493096282602e-06, "loss": 17.9816, "step": 42684 }, { "epoch": 0.780247500319886, "grad_norm": 4.607420461925749, "learning_rate": 1.2139559562827174e-06, "loss": 16.6572, "step": 42685 }, { "epoch": 0.7802657795163325, "grad_norm": 5.995942872073584, "learning_rate": 1.213762616206932e-06, "loss": 17.3661, "step": 42686 }, { "epoch": 0.780284058712779, "grad_norm": 6.640670522114129, "learning_rate": 1.21356928940158e-06, "loss": 18.0177, "step": 42687 }, { "epoch": 0.7803023379092255, "grad_norm": 6.216159747931447, "learning_rate": 1.2133759758673375e-06, "loss": 17.1662, "step": 42688 }, { "epoch": 0.780320617105672, "grad_norm": 7.3399648394348285, "learning_rate": 1.2131826756048853e-06, "loss": 17.7429, "step": 42689 }, { "epoch": 0.7803388963021186, "grad_norm": 7.134555974292454, "learning_rate": 1.212989388614899e-06, "loss": 17.7792, "step": 42690 }, { "epoch": 0.7803571754985651, "grad_norm": 5.865532615444877, "learning_rate": 1.212796114898055e-06, "loss": 17.3051, "step": 42691 }, { "epoch": 0.7803754546950116, "grad_norm": 7.0830840405226905, "learning_rate": 1.2126028544550316e-06, "loss": 17.5492, "step": 42692 }, { "epoch": 0.7803937338914582, "grad_norm": 6.660962143261817, "learning_rate": 1.2124096072865087e-06, "loss": 17.2646, "step": 42693 }, { "epoch": 0.7804120130879046, "grad_norm": 8.551723388596356, "learning_rate": 1.2122163733931597e-06, "loss": 18.219, "step": 42694 }, { "epoch": 0.7804302922843512, "grad_norm": 6.212923977007496, "learning_rate": 1.2120231527756648e-06, "loss": 17.2974, "step": 42695 }, { "epoch": 0.7804485714807977, "grad_norm": 7.4707529068785705, "learning_rate": 1.2118299454347e-06, "loss": 17.7433, "step": 42696 }, { "epoch": 0.7804668506772442, "grad_norm": 5.203426479525822, "learning_rate": 1.2116367513709415e-06, "loss": 16.864, "step": 42697 }, { "epoch": 0.7804851298736908, "grad_norm": 5.871759131880911, "learning_rate": 1.2114435705850686e-06, "loss": 17.0977, "step": 42698 }, { "epoch": 0.7805034090701373, "grad_norm": 5.6561609294500315, "learning_rate": 1.2112504030777556e-06, "loss": 17.0062, "step": 42699 }, { "epoch": 0.7805216882665837, "grad_norm": 4.752811138717374, "learning_rate": 1.211057248849683e-06, "loss": 16.8521, "step": 42700 }, { "epoch": 0.7805399674630303, "grad_norm": 6.971516796706719, "learning_rate": 1.2108641079015243e-06, "loss": 17.7352, "step": 42701 }, { "epoch": 0.7805582466594768, "grad_norm": 7.304447357134524, "learning_rate": 1.2106709802339584e-06, "loss": 18.0989, "step": 42702 }, { "epoch": 0.7805765258559234, "grad_norm": 7.179211865913673, "learning_rate": 1.2104778658476629e-06, "loss": 17.619, "step": 42703 }, { "epoch": 0.7805948050523699, "grad_norm": 6.307328706888291, "learning_rate": 1.2102847647433136e-06, "loss": 17.63, "step": 42704 }, { "epoch": 0.7806130842488164, "grad_norm": 6.1811937182140335, "learning_rate": 1.2100916769215864e-06, "loss": 17.2618, "step": 42705 }, { "epoch": 0.780631363445263, "grad_norm": 5.716905658641969, "learning_rate": 1.2098986023831599e-06, "loss": 17.3647, "step": 42706 }, { "epoch": 0.7806496426417094, "grad_norm": 7.629576495590106, "learning_rate": 1.2097055411287102e-06, "loss": 17.7628, "step": 42707 }, { "epoch": 0.780667921838156, "grad_norm": 6.83912016244722, "learning_rate": 1.2095124931589124e-06, "loss": 17.368, "step": 42708 }, { "epoch": 0.7806862010346025, "grad_norm": 5.88704006306788, "learning_rate": 1.2093194584744455e-06, "loss": 17.2912, "step": 42709 }, { "epoch": 0.780704480231049, "grad_norm": 6.260686851057531, "learning_rate": 1.2091264370759836e-06, "loss": 17.3615, "step": 42710 }, { "epoch": 0.7807227594274956, "grad_norm": 6.276606334925512, "learning_rate": 1.2089334289642046e-06, "loss": 17.4674, "step": 42711 }, { "epoch": 0.7807410386239421, "grad_norm": 6.8453801591191095, "learning_rate": 1.2087404341397862e-06, "loss": 17.8209, "step": 42712 }, { "epoch": 0.7807593178203887, "grad_norm": 5.622699884759124, "learning_rate": 1.2085474526034025e-06, "loss": 17.1295, "step": 42713 }, { "epoch": 0.7807775970168351, "grad_norm": 6.048525984023393, "learning_rate": 1.2083544843557327e-06, "loss": 17.2011, "step": 42714 }, { "epoch": 0.7807958762132816, "grad_norm": 6.330757759793127, "learning_rate": 1.2081615293974508e-06, "loss": 17.1681, "step": 42715 }, { "epoch": 0.7808141554097282, "grad_norm": 7.596923036507865, "learning_rate": 1.2079685877292325e-06, "loss": 17.7752, "step": 42716 }, { "epoch": 0.7808324346061747, "grad_norm": 5.999939155635302, "learning_rate": 1.2077756593517565e-06, "loss": 17.2324, "step": 42717 }, { "epoch": 0.7808507138026213, "grad_norm": 5.809652143919409, "learning_rate": 1.2075827442656962e-06, "loss": 16.863, "step": 42718 }, { "epoch": 0.7808689929990678, "grad_norm": 5.45825326312107, "learning_rate": 1.2073898424717311e-06, "loss": 16.944, "step": 42719 }, { "epoch": 0.7808872721955142, "grad_norm": 6.220327827240432, "learning_rate": 1.207196953970534e-06, "loss": 17.3492, "step": 42720 }, { "epoch": 0.7809055513919608, "grad_norm": 7.588860051118435, "learning_rate": 1.207004078762784e-06, "loss": 17.8507, "step": 42721 }, { "epoch": 0.7809238305884073, "grad_norm": 8.240639263240075, "learning_rate": 1.2068112168491536e-06, "loss": 17.8729, "step": 42722 }, { "epoch": 0.7809421097848539, "grad_norm": 6.032809445412664, "learning_rate": 1.2066183682303228e-06, "loss": 17.2064, "step": 42723 }, { "epoch": 0.7809603889813004, "grad_norm": 6.960153423671335, "learning_rate": 1.2064255329069653e-06, "loss": 17.5041, "step": 42724 }, { "epoch": 0.7809786681777469, "grad_norm": 5.585077933782431, "learning_rate": 1.206232710879755e-06, "loss": 17.188, "step": 42725 }, { "epoch": 0.7809969473741935, "grad_norm": 6.040739449498271, "learning_rate": 1.2060399021493718e-06, "loss": 17.4115, "step": 42726 }, { "epoch": 0.7810152265706399, "grad_norm": 6.614352131835614, "learning_rate": 1.2058471067164884e-06, "loss": 17.2916, "step": 42727 }, { "epoch": 0.7810335057670865, "grad_norm": 6.987491384010311, "learning_rate": 1.2056543245817815e-06, "loss": 17.4517, "step": 42728 }, { "epoch": 0.781051784963533, "grad_norm": 5.6559605578819, "learning_rate": 1.2054615557459283e-06, "loss": 17.2165, "step": 42729 }, { "epoch": 0.7810700641599795, "grad_norm": 6.736767991684119, "learning_rate": 1.2052688002096013e-06, "loss": 17.5025, "step": 42730 }, { "epoch": 0.7810883433564261, "grad_norm": 7.664905979017578, "learning_rate": 1.2050760579734793e-06, "loss": 17.9725, "step": 42731 }, { "epoch": 0.7811066225528726, "grad_norm": 4.771978690989922, "learning_rate": 1.2048833290382366e-06, "loss": 16.8884, "step": 42732 }, { "epoch": 0.7811249017493191, "grad_norm": 5.284156660035649, "learning_rate": 1.2046906134045472e-06, "loss": 16.8281, "step": 42733 }, { "epoch": 0.7811431809457656, "grad_norm": 5.180134114645993, "learning_rate": 1.2044979110730892e-06, "loss": 16.8689, "step": 42734 }, { "epoch": 0.7811614601422121, "grad_norm": 7.169965799261897, "learning_rate": 1.2043052220445367e-06, "loss": 17.8959, "step": 42735 }, { "epoch": 0.7811797393386587, "grad_norm": 6.039725644339459, "learning_rate": 1.2041125463195635e-06, "loss": 17.2581, "step": 42736 }, { "epoch": 0.7811980185351052, "grad_norm": 7.947351971071936, "learning_rate": 1.2039198838988464e-06, "loss": 17.4229, "step": 42737 }, { "epoch": 0.7812162977315518, "grad_norm": 6.260790453799529, "learning_rate": 1.2037272347830626e-06, "loss": 17.681, "step": 42738 }, { "epoch": 0.7812345769279982, "grad_norm": 9.179107827602143, "learning_rate": 1.2035345989728835e-06, "loss": 18.3403, "step": 42739 }, { "epoch": 0.7812528561244447, "grad_norm": 7.385281218552737, "learning_rate": 1.2033419764689875e-06, "loss": 17.9351, "step": 42740 }, { "epoch": 0.7812711353208913, "grad_norm": 5.204530084350965, "learning_rate": 1.203149367272049e-06, "loss": 16.9119, "step": 42741 }, { "epoch": 0.7812894145173378, "grad_norm": 6.88752158105439, "learning_rate": 1.2029567713827407e-06, "loss": 17.4086, "step": 42742 }, { "epoch": 0.7813076937137844, "grad_norm": 6.638279856554908, "learning_rate": 1.2027641888017405e-06, "loss": 17.4789, "step": 42743 }, { "epoch": 0.7813259729102309, "grad_norm": 6.060983543977107, "learning_rate": 1.2025716195297215e-06, "loss": 17.273, "step": 42744 }, { "epoch": 0.7813442521066774, "grad_norm": 5.990667965265759, "learning_rate": 1.2023790635673605e-06, "loss": 17.1429, "step": 42745 }, { "epoch": 0.7813625313031239, "grad_norm": 5.765568846218434, "learning_rate": 1.20218652091533e-06, "loss": 16.9502, "step": 42746 }, { "epoch": 0.7813808104995704, "grad_norm": 6.628799988658001, "learning_rate": 1.2019939915743061e-06, "loss": 17.1045, "step": 42747 }, { "epoch": 0.781399089696017, "grad_norm": 5.170774881354382, "learning_rate": 1.2018014755449654e-06, "loss": 16.9797, "step": 42748 }, { "epoch": 0.7814173688924635, "grad_norm": 12.164829605961268, "learning_rate": 1.2016089728279806e-06, "loss": 17.782, "step": 42749 }, { "epoch": 0.78143564808891, "grad_norm": 6.830770784269893, "learning_rate": 1.2014164834240256e-06, "loss": 17.2933, "step": 42750 }, { "epoch": 0.7814539272853566, "grad_norm": 6.472192000763893, "learning_rate": 1.2012240073337778e-06, "loss": 17.4965, "step": 42751 }, { "epoch": 0.781472206481803, "grad_norm": 9.397647105486069, "learning_rate": 1.2010315445579096e-06, "loss": 17.6438, "step": 42752 }, { "epoch": 0.7814904856782496, "grad_norm": 5.830053787647473, "learning_rate": 1.200839095097095e-06, "loss": 17.2372, "step": 42753 }, { "epoch": 0.7815087648746961, "grad_norm": 5.670686916864026, "learning_rate": 1.200646658952011e-06, "loss": 17.0913, "step": 42754 }, { "epoch": 0.7815270440711426, "grad_norm": 7.187934857578401, "learning_rate": 1.2004542361233295e-06, "loss": 18.0331, "step": 42755 }, { "epoch": 0.7815453232675892, "grad_norm": 6.6687336738111185, "learning_rate": 1.2002618266117265e-06, "loss": 17.7632, "step": 42756 }, { "epoch": 0.7815636024640357, "grad_norm": 7.854503628187549, "learning_rate": 1.2000694304178774e-06, "loss": 17.7287, "step": 42757 }, { "epoch": 0.7815818816604823, "grad_norm": 5.81047434736952, "learning_rate": 1.1998770475424547e-06, "loss": 17.096, "step": 42758 }, { "epoch": 0.7816001608569287, "grad_norm": 6.158079160550747, "learning_rate": 1.199684677986132e-06, "loss": 17.3984, "step": 42759 }, { "epoch": 0.7816184400533752, "grad_norm": 5.798634222458621, "learning_rate": 1.1994923217495864e-06, "loss": 17.0106, "step": 42760 }, { "epoch": 0.7816367192498218, "grad_norm": 5.820830715289999, "learning_rate": 1.1992999788334891e-06, "loss": 17.0582, "step": 42761 }, { "epoch": 0.7816549984462683, "grad_norm": 6.558758776817823, "learning_rate": 1.1991076492385168e-06, "loss": 17.3473, "step": 42762 }, { "epoch": 0.7816732776427149, "grad_norm": 7.603791498882458, "learning_rate": 1.1989153329653418e-06, "loss": 17.5907, "step": 42763 }, { "epoch": 0.7816915568391614, "grad_norm": 5.285748004200565, "learning_rate": 1.198723030014638e-06, "loss": 16.8023, "step": 42764 }, { "epoch": 0.7817098360356078, "grad_norm": 7.09107554105377, "learning_rate": 1.19853074038708e-06, "loss": 17.9552, "step": 42765 }, { "epoch": 0.7817281152320544, "grad_norm": 5.65881804679318, "learning_rate": 1.198338464083343e-06, "loss": 17.1397, "step": 42766 }, { "epoch": 0.7817463944285009, "grad_norm": 6.609555656123916, "learning_rate": 1.1981462011040984e-06, "loss": 17.2583, "step": 42767 }, { "epoch": 0.7817646736249474, "grad_norm": 6.424152179815889, "learning_rate": 1.1979539514500232e-06, "loss": 17.1094, "step": 42768 }, { "epoch": 0.781782952821394, "grad_norm": 6.784503262231009, "learning_rate": 1.1977617151217897e-06, "loss": 17.5876, "step": 42769 }, { "epoch": 0.7818012320178405, "grad_norm": 7.66073281022385, "learning_rate": 1.1975694921200693e-06, "loss": 18.0342, "step": 42770 }, { "epoch": 0.781819511214287, "grad_norm": 5.450881866843921, "learning_rate": 1.1973772824455399e-06, "loss": 17.0015, "step": 42771 }, { "epoch": 0.7818377904107335, "grad_norm": 7.237075733548884, "learning_rate": 1.1971850860988727e-06, "loss": 17.9727, "step": 42772 }, { "epoch": 0.78185606960718, "grad_norm": 6.607265688668965, "learning_rate": 1.1969929030807404e-06, "loss": 17.6082, "step": 42773 }, { "epoch": 0.7818743488036266, "grad_norm": 7.257090617364822, "learning_rate": 1.196800733391818e-06, "loss": 17.8321, "step": 42774 }, { "epoch": 0.7818926280000731, "grad_norm": 6.264351941579994, "learning_rate": 1.196608577032779e-06, "loss": 17.3856, "step": 42775 }, { "epoch": 0.7819109071965197, "grad_norm": 6.951559150302467, "learning_rate": 1.196416434004299e-06, "loss": 17.7795, "step": 42776 }, { "epoch": 0.7819291863929662, "grad_norm": 6.441728786208173, "learning_rate": 1.1962243043070488e-06, "loss": 17.4189, "step": 42777 }, { "epoch": 0.7819474655894126, "grad_norm": 6.046919660433981, "learning_rate": 1.1960321879417004e-06, "loss": 17.2862, "step": 42778 }, { "epoch": 0.7819657447858592, "grad_norm": 5.9175733923503575, "learning_rate": 1.1958400849089313e-06, "loss": 17.2833, "step": 42779 }, { "epoch": 0.7819840239823057, "grad_norm": 5.992453213290898, "learning_rate": 1.195647995209412e-06, "loss": 17.3336, "step": 42780 }, { "epoch": 0.7820023031787523, "grad_norm": 5.235587851925682, "learning_rate": 1.1954559188438153e-06, "loss": 17.0535, "step": 42781 }, { "epoch": 0.7820205823751988, "grad_norm": 5.5326800437285435, "learning_rate": 1.1952638558128165e-06, "loss": 17.4098, "step": 42782 }, { "epoch": 0.7820388615716453, "grad_norm": 6.225138470221255, "learning_rate": 1.1950718061170869e-06, "loss": 17.7754, "step": 42783 }, { "epoch": 0.7820571407680919, "grad_norm": 5.885960884346636, "learning_rate": 1.1948797697573e-06, "loss": 17.0761, "step": 42784 }, { "epoch": 0.7820754199645383, "grad_norm": 5.160209279708696, "learning_rate": 1.194687746734131e-06, "loss": 16.8732, "step": 42785 }, { "epoch": 0.7820936991609849, "grad_norm": 5.818986018998294, "learning_rate": 1.1944957370482513e-06, "loss": 17.2098, "step": 42786 }, { "epoch": 0.7821119783574314, "grad_norm": 6.484476170492542, "learning_rate": 1.194303740700332e-06, "loss": 17.6986, "step": 42787 }, { "epoch": 0.7821302575538779, "grad_norm": 5.5566087466462655, "learning_rate": 1.1941117576910494e-06, "loss": 17.1463, "step": 42788 }, { "epoch": 0.7821485367503245, "grad_norm": 6.491760935414973, "learning_rate": 1.1939197880210751e-06, "loss": 17.5794, "step": 42789 }, { "epoch": 0.782166815946771, "grad_norm": 5.552064756412269, "learning_rate": 1.19372783169108e-06, "loss": 16.8781, "step": 42790 }, { "epoch": 0.7821850951432175, "grad_norm": 6.341346713989295, "learning_rate": 1.1935358887017385e-06, "loss": 17.2009, "step": 42791 }, { "epoch": 0.782203374339664, "grad_norm": 6.080465924265575, "learning_rate": 1.193343959053725e-06, "loss": 17.2199, "step": 42792 }, { "epoch": 0.7822216535361105, "grad_norm": 7.0634260555543875, "learning_rate": 1.193152042747709e-06, "loss": 17.283, "step": 42793 }, { "epoch": 0.7822399327325571, "grad_norm": 6.12373537063498, "learning_rate": 1.1929601397843665e-06, "loss": 17.3806, "step": 42794 }, { "epoch": 0.7822582119290036, "grad_norm": 6.652708265711707, "learning_rate": 1.1927682501643668e-06, "loss": 17.4343, "step": 42795 }, { "epoch": 0.7822764911254502, "grad_norm": 6.6840721225947295, "learning_rate": 1.192576373888385e-06, "loss": 17.7676, "step": 42796 }, { "epoch": 0.7822947703218966, "grad_norm": 8.177212056498895, "learning_rate": 1.1923845109570932e-06, "loss": 18.3768, "step": 42797 }, { "epoch": 0.7823130495183431, "grad_norm": 6.470374214330064, "learning_rate": 1.1921926613711615e-06, "loss": 17.5675, "step": 42798 }, { "epoch": 0.7823313287147897, "grad_norm": 5.827618702010871, "learning_rate": 1.1920008251312654e-06, "loss": 17.0976, "step": 42799 }, { "epoch": 0.7823496079112362, "grad_norm": 5.313128095898094, "learning_rate": 1.1918090022380747e-06, "loss": 17.1386, "step": 42800 }, { "epoch": 0.7823678871076828, "grad_norm": 5.5440584631097805, "learning_rate": 1.1916171926922631e-06, "loss": 17.1956, "step": 42801 }, { "epoch": 0.7823861663041293, "grad_norm": 7.079621841307022, "learning_rate": 1.1914253964945043e-06, "loss": 17.5037, "step": 42802 }, { "epoch": 0.7824044455005758, "grad_norm": 5.103159926554725, "learning_rate": 1.191233613645469e-06, "loss": 17.0387, "step": 42803 }, { "epoch": 0.7824227246970223, "grad_norm": 5.27399449819022, "learning_rate": 1.1910418441458276e-06, "loss": 16.8655, "step": 42804 }, { "epoch": 0.7824410038934688, "grad_norm": 6.036606780097282, "learning_rate": 1.1908500879962554e-06, "loss": 17.1738, "step": 42805 }, { "epoch": 0.7824592830899154, "grad_norm": 4.905298125981883, "learning_rate": 1.1906583451974219e-06, "loss": 16.8105, "step": 42806 }, { "epoch": 0.7824775622863619, "grad_norm": 5.911508870734221, "learning_rate": 1.1904666157500017e-06, "loss": 17.1052, "step": 42807 }, { "epoch": 0.7824958414828084, "grad_norm": 7.2070624732886825, "learning_rate": 1.190274899654666e-06, "loss": 17.4902, "step": 42808 }, { "epoch": 0.782514120679255, "grad_norm": 5.562227641367937, "learning_rate": 1.190083196912084e-06, "loss": 17.0875, "step": 42809 }, { "epoch": 0.7825323998757014, "grad_norm": 8.720633897500646, "learning_rate": 1.18989150752293e-06, "loss": 18.2251, "step": 42810 }, { "epoch": 0.782550679072148, "grad_norm": 5.8392276210982645, "learning_rate": 1.1896998314878771e-06, "loss": 17.3316, "step": 42811 }, { "epoch": 0.7825689582685945, "grad_norm": 5.379578410784124, "learning_rate": 1.1895081688075944e-06, "loss": 16.7346, "step": 42812 }, { "epoch": 0.782587237465041, "grad_norm": 6.8795419206272745, "learning_rate": 1.1893165194827561e-06, "loss": 17.2528, "step": 42813 }, { "epoch": 0.7826055166614876, "grad_norm": 6.267110423741253, "learning_rate": 1.1891248835140322e-06, "loss": 17.2128, "step": 42814 }, { "epoch": 0.7826237958579341, "grad_norm": 5.3891960372830985, "learning_rate": 1.1889332609020942e-06, "loss": 17.1659, "step": 42815 }, { "epoch": 0.7826420750543807, "grad_norm": 6.416224128151918, "learning_rate": 1.1887416516476157e-06, "loss": 17.1092, "step": 42816 }, { "epoch": 0.7826603542508271, "grad_norm": 6.728650158706573, "learning_rate": 1.188550055751267e-06, "loss": 17.5907, "step": 42817 }, { "epoch": 0.7826786334472736, "grad_norm": 4.900506833939032, "learning_rate": 1.1883584732137182e-06, "loss": 16.9081, "step": 42818 }, { "epoch": 0.7826969126437202, "grad_norm": 5.838929600339497, "learning_rate": 1.1881669040356424e-06, "loss": 17.3244, "step": 42819 }, { "epoch": 0.7827151918401667, "grad_norm": 5.62961021358292, "learning_rate": 1.187975348217712e-06, "loss": 17.2066, "step": 42820 }, { "epoch": 0.7827334710366133, "grad_norm": 8.937164190314586, "learning_rate": 1.1877838057605957e-06, "loss": 17.8578, "step": 42821 }, { "epoch": 0.7827517502330598, "grad_norm": 6.274126813377534, "learning_rate": 1.1875922766649679e-06, "loss": 17.2751, "step": 42822 }, { "epoch": 0.7827700294295062, "grad_norm": 7.172490728795538, "learning_rate": 1.187400760931497e-06, "loss": 17.9054, "step": 42823 }, { "epoch": 0.7827883086259528, "grad_norm": 5.572694816371766, "learning_rate": 1.1872092585608569e-06, "loss": 17.0098, "step": 42824 }, { "epoch": 0.7828065878223993, "grad_norm": 6.303275288427898, "learning_rate": 1.187017769553717e-06, "loss": 17.3433, "step": 42825 }, { "epoch": 0.7828248670188459, "grad_norm": 6.871401406372942, "learning_rate": 1.1868262939107482e-06, "loss": 17.3363, "step": 42826 }, { "epoch": 0.7828431462152924, "grad_norm": 5.577566899873731, "learning_rate": 1.1866348316326237e-06, "loss": 17.0394, "step": 42827 }, { "epoch": 0.7828614254117389, "grad_norm": 6.6236627109392225, "learning_rate": 1.1864433827200117e-06, "loss": 17.4977, "step": 42828 }, { "epoch": 0.7828797046081855, "grad_norm": 5.263793218485106, "learning_rate": 1.1862519471735844e-06, "loss": 16.9531, "step": 42829 }, { "epoch": 0.7828979838046319, "grad_norm": 5.940442472956537, "learning_rate": 1.1860605249940149e-06, "loss": 17.2587, "step": 42830 }, { "epoch": 0.7829162630010785, "grad_norm": 7.283078214640864, "learning_rate": 1.1858691161819725e-06, "loss": 17.4405, "step": 42831 }, { "epoch": 0.782934542197525, "grad_norm": 5.416607885312943, "learning_rate": 1.185677720738126e-06, "loss": 17.0613, "step": 42832 }, { "epoch": 0.7829528213939715, "grad_norm": 6.630007709553986, "learning_rate": 1.1854863386631494e-06, "loss": 17.3468, "step": 42833 }, { "epoch": 0.7829711005904181, "grad_norm": 6.791369686773743, "learning_rate": 1.1852949699577126e-06, "loss": 17.5907, "step": 42834 }, { "epoch": 0.7829893797868646, "grad_norm": 5.721879955816004, "learning_rate": 1.1851036146224842e-06, "loss": 17.0367, "step": 42835 }, { "epoch": 0.783007658983311, "grad_norm": 8.836264973576831, "learning_rate": 1.184912272658138e-06, "loss": 17.4363, "step": 42836 }, { "epoch": 0.7830259381797576, "grad_norm": 5.499293064145359, "learning_rate": 1.1847209440653418e-06, "loss": 16.9051, "step": 42837 }, { "epoch": 0.7830442173762041, "grad_norm": 6.851863945447305, "learning_rate": 1.1845296288447677e-06, "loss": 17.9252, "step": 42838 }, { "epoch": 0.7830624965726507, "grad_norm": 20.62335436827337, "learning_rate": 1.1843383269970875e-06, "loss": 17.2944, "step": 42839 }, { "epoch": 0.7830807757690972, "grad_norm": 6.9466256429247215, "learning_rate": 1.1841470385229691e-06, "loss": 17.5774, "step": 42840 }, { "epoch": 0.7830990549655437, "grad_norm": 5.858550756052127, "learning_rate": 1.183955763423086e-06, "loss": 17.1238, "step": 42841 }, { "epoch": 0.7831173341619903, "grad_norm": 7.609961816493579, "learning_rate": 1.1837645016981064e-06, "loss": 17.7809, "step": 42842 }, { "epoch": 0.7831356133584367, "grad_norm": 6.0603057563293925, "learning_rate": 1.1835732533486994e-06, "loss": 17.2438, "step": 42843 }, { "epoch": 0.7831538925548833, "grad_norm": 5.4297826419871775, "learning_rate": 1.183382018375539e-06, "loss": 17.2164, "step": 42844 }, { "epoch": 0.7831721717513298, "grad_norm": 5.766694821649852, "learning_rate": 1.1831907967792915e-06, "loss": 16.9893, "step": 42845 }, { "epoch": 0.7831904509477763, "grad_norm": 6.307928129230847, "learning_rate": 1.1829995885606305e-06, "loss": 17.5163, "step": 42846 }, { "epoch": 0.7832087301442229, "grad_norm": 5.92796610719214, "learning_rate": 1.1828083937202234e-06, "loss": 17.4106, "step": 42847 }, { "epoch": 0.7832270093406694, "grad_norm": 7.024558461214832, "learning_rate": 1.1826172122587432e-06, "loss": 17.8236, "step": 42848 }, { "epoch": 0.783245288537116, "grad_norm": 6.382798698148589, "learning_rate": 1.182426044176857e-06, "loss": 17.5002, "step": 42849 }, { "epoch": 0.7832635677335624, "grad_norm": 6.342396466273868, "learning_rate": 1.1822348894752373e-06, "loss": 17.4226, "step": 42850 }, { "epoch": 0.7832818469300089, "grad_norm": 6.000192340787251, "learning_rate": 1.1820437481545532e-06, "loss": 17.0744, "step": 42851 }, { "epoch": 0.7833001261264555, "grad_norm": 8.00925942497589, "learning_rate": 1.1818526202154728e-06, "loss": 17.4958, "step": 42852 }, { "epoch": 0.783318405322902, "grad_norm": 5.987044935466709, "learning_rate": 1.1816615056586694e-06, "loss": 17.0393, "step": 42853 }, { "epoch": 0.7833366845193486, "grad_norm": 6.762174875726645, "learning_rate": 1.1814704044848097e-06, "loss": 17.8767, "step": 42854 }, { "epoch": 0.783354963715795, "grad_norm": 5.193471047576716, "learning_rate": 1.1812793166945653e-06, "loss": 16.988, "step": 42855 }, { "epoch": 0.7833732429122415, "grad_norm": 5.346373155294613, "learning_rate": 1.181088242288606e-06, "loss": 17.0599, "step": 42856 }, { "epoch": 0.7833915221086881, "grad_norm": 7.159164851292972, "learning_rate": 1.1808971812676002e-06, "loss": 17.3487, "step": 42857 }, { "epoch": 0.7834098013051346, "grad_norm": 6.432437797205815, "learning_rate": 1.1807061336322196e-06, "loss": 17.5226, "step": 42858 }, { "epoch": 0.7834280805015812, "grad_norm": 7.087836575964951, "learning_rate": 1.1805150993831327e-06, "loss": 17.3502, "step": 42859 }, { "epoch": 0.7834463596980277, "grad_norm": 7.614308002029718, "learning_rate": 1.1803240785210075e-06, "loss": 17.8736, "step": 42860 }, { "epoch": 0.7834646388944742, "grad_norm": 5.5012223250865375, "learning_rate": 1.1801330710465158e-06, "loss": 17.016, "step": 42861 }, { "epoch": 0.7834829180909207, "grad_norm": 5.3409828100269054, "learning_rate": 1.179942076960327e-06, "loss": 17.21, "step": 42862 }, { "epoch": 0.7835011972873672, "grad_norm": 6.003438245580432, "learning_rate": 1.179751096263108e-06, "loss": 17.3765, "step": 42863 }, { "epoch": 0.7835194764838138, "grad_norm": 5.848484021137771, "learning_rate": 1.1795601289555297e-06, "loss": 17.0266, "step": 42864 }, { "epoch": 0.7835377556802603, "grad_norm": 5.897351135793664, "learning_rate": 1.1793691750382636e-06, "loss": 16.8784, "step": 42865 }, { "epoch": 0.7835560348767068, "grad_norm": 7.217620945480876, "learning_rate": 1.1791782345119752e-06, "loss": 17.3203, "step": 42866 }, { "epoch": 0.7835743140731534, "grad_norm": 8.446493662901911, "learning_rate": 1.1789873073773367e-06, "loss": 17.3492, "step": 42867 }, { "epoch": 0.7835925932695998, "grad_norm": 5.892830575356725, "learning_rate": 1.1787963936350149e-06, "loss": 17.189, "step": 42868 }, { "epoch": 0.7836108724660464, "grad_norm": 6.420273096221956, "learning_rate": 1.1786054932856817e-06, "loss": 17.4854, "step": 42869 }, { "epoch": 0.7836291516624929, "grad_norm": 6.943986072829057, "learning_rate": 1.1784146063300045e-06, "loss": 17.5432, "step": 42870 }, { "epoch": 0.7836474308589394, "grad_norm": 6.867026891781695, "learning_rate": 1.1782237327686514e-06, "loss": 17.6425, "step": 42871 }, { "epoch": 0.783665710055386, "grad_norm": 5.764750268551351, "learning_rate": 1.1780328726022938e-06, "loss": 17.2785, "step": 42872 }, { "epoch": 0.7836839892518325, "grad_norm": 8.855921977332246, "learning_rate": 1.1778420258315975e-06, "loss": 18.0699, "step": 42873 }, { "epoch": 0.7837022684482791, "grad_norm": 7.481909079537944, "learning_rate": 1.1776511924572336e-06, "loss": 17.8934, "step": 42874 }, { "epoch": 0.7837205476447255, "grad_norm": 8.271935930828997, "learning_rate": 1.1774603724798717e-06, "loss": 17.9173, "step": 42875 }, { "epoch": 0.783738826841172, "grad_norm": 7.779902105454478, "learning_rate": 1.1772695659001798e-06, "loss": 18.3767, "step": 42876 }, { "epoch": 0.7837571060376186, "grad_norm": 7.774345101683941, "learning_rate": 1.1770787727188248e-06, "loss": 17.1001, "step": 42877 }, { "epoch": 0.7837753852340651, "grad_norm": 4.964776514398999, "learning_rate": 1.1768879929364785e-06, "loss": 16.9324, "step": 42878 }, { "epoch": 0.7837936644305117, "grad_norm": 6.928316975544064, "learning_rate": 1.176697226553808e-06, "loss": 17.641, "step": 42879 }, { "epoch": 0.7838119436269582, "grad_norm": 5.515048132827798, "learning_rate": 1.1765064735714804e-06, "loss": 17.3415, "step": 42880 }, { "epoch": 0.7838302228234046, "grad_norm": 7.877629598519014, "learning_rate": 1.176315733990167e-06, "loss": 18.1463, "step": 42881 }, { "epoch": 0.7838485020198512, "grad_norm": 6.148006580654276, "learning_rate": 1.1761250078105341e-06, "loss": 17.329, "step": 42882 }, { "epoch": 0.7838667812162977, "grad_norm": 7.213559548803365, "learning_rate": 1.1759342950332514e-06, "loss": 18.1086, "step": 42883 }, { "epoch": 0.7838850604127443, "grad_norm": 6.79567418762639, "learning_rate": 1.175743595658988e-06, "loss": 17.6879, "step": 42884 }, { "epoch": 0.7839033396091908, "grad_norm": 6.5037598548993705, "learning_rate": 1.1755529096884105e-06, "loss": 17.227, "step": 42885 }, { "epoch": 0.7839216188056373, "grad_norm": 7.551546437439698, "learning_rate": 1.1753622371221895e-06, "loss": 17.9019, "step": 42886 }, { "epoch": 0.7839398980020839, "grad_norm": 6.134652558061319, "learning_rate": 1.1751715779609918e-06, "loss": 17.154, "step": 42887 }, { "epoch": 0.7839581771985303, "grad_norm": 6.674142421758252, "learning_rate": 1.1749809322054845e-06, "loss": 17.128, "step": 42888 }, { "epoch": 0.7839764563949769, "grad_norm": 6.0132090258397675, "learning_rate": 1.1747902998563387e-06, "loss": 17.1045, "step": 42889 }, { "epoch": 0.7839947355914234, "grad_norm": 6.5972574525134995, "learning_rate": 1.1745996809142207e-06, "loss": 17.5238, "step": 42890 }, { "epoch": 0.7840130147878699, "grad_norm": 5.53912117451712, "learning_rate": 1.1744090753797976e-06, "loss": 17.1518, "step": 42891 }, { "epoch": 0.7840312939843165, "grad_norm": 6.668091656015032, "learning_rate": 1.174218483253739e-06, "loss": 17.3222, "step": 42892 }, { "epoch": 0.784049573180763, "grad_norm": 7.2188772733519, "learning_rate": 1.1740279045367136e-06, "loss": 17.8601, "step": 42893 }, { "epoch": 0.7840678523772096, "grad_norm": 7.473578287137676, "learning_rate": 1.1738373392293872e-06, "loss": 17.8146, "step": 42894 }, { "epoch": 0.784086131573656, "grad_norm": 6.697607524733662, "learning_rate": 1.1736467873324302e-06, "loss": 17.4378, "step": 42895 }, { "epoch": 0.7841044107701025, "grad_norm": 6.409788414391398, "learning_rate": 1.173456248846509e-06, "loss": 17.4661, "step": 42896 }, { "epoch": 0.7841226899665491, "grad_norm": 5.624358565442689, "learning_rate": 1.1732657237722905e-06, "loss": 17.2282, "step": 42897 }, { "epoch": 0.7841409691629956, "grad_norm": 6.031908138493249, "learning_rate": 1.173075212110445e-06, "loss": 17.2338, "step": 42898 }, { "epoch": 0.7841592483594422, "grad_norm": 5.340199669105275, "learning_rate": 1.1728847138616372e-06, "loss": 16.984, "step": 42899 }, { "epoch": 0.7841775275558887, "grad_norm": 6.417978284380252, "learning_rate": 1.1726942290265381e-06, "loss": 17.3174, "step": 42900 }, { "epoch": 0.7841958067523351, "grad_norm": 7.5816699239442515, "learning_rate": 1.172503757605812e-06, "loss": 17.4019, "step": 42901 }, { "epoch": 0.7842140859487817, "grad_norm": 5.640561790176727, "learning_rate": 1.1723132996001273e-06, "loss": 17.1291, "step": 42902 }, { "epoch": 0.7842323651452282, "grad_norm": 5.880536612636144, "learning_rate": 1.1721228550101548e-06, "loss": 17.4317, "step": 42903 }, { "epoch": 0.7842506443416747, "grad_norm": 7.276079557284683, "learning_rate": 1.1719324238365593e-06, "loss": 17.859, "step": 42904 }, { "epoch": 0.7842689235381213, "grad_norm": 6.521924040189684, "learning_rate": 1.1717420060800072e-06, "loss": 17.3199, "step": 42905 }, { "epoch": 0.7842872027345678, "grad_norm": 6.040442845102132, "learning_rate": 1.1715516017411682e-06, "loss": 17.2323, "step": 42906 }, { "epoch": 0.7843054819310143, "grad_norm": 5.823954633563693, "learning_rate": 1.1713612108207085e-06, "loss": 17.1041, "step": 42907 }, { "epoch": 0.7843237611274608, "grad_norm": 5.371187764929658, "learning_rate": 1.1711708333192944e-06, "loss": 17.0591, "step": 42908 }, { "epoch": 0.7843420403239073, "grad_norm": 6.448761324272681, "learning_rate": 1.170980469237596e-06, "loss": 17.5748, "step": 42909 }, { "epoch": 0.7843603195203539, "grad_norm": 6.941841455447979, "learning_rate": 1.1707901185762772e-06, "loss": 17.3813, "step": 42910 }, { "epoch": 0.7843785987168004, "grad_norm": 6.1523623022433735, "learning_rate": 1.1705997813360065e-06, "loss": 17.4595, "step": 42911 }, { "epoch": 0.784396877913247, "grad_norm": 5.765435687621581, "learning_rate": 1.1704094575174524e-06, "loss": 17.0979, "step": 42912 }, { "epoch": 0.7844151571096935, "grad_norm": 5.893394093620187, "learning_rate": 1.1702191471212798e-06, "loss": 17.1781, "step": 42913 }, { "epoch": 0.7844334363061399, "grad_norm": 6.123661043091025, "learning_rate": 1.1700288501481582e-06, "loss": 17.5015, "step": 42914 }, { "epoch": 0.7844517155025865, "grad_norm": 5.021357667118495, "learning_rate": 1.1698385665987533e-06, "loss": 17.1513, "step": 42915 }, { "epoch": 0.784469994699033, "grad_norm": 5.5684299478777834, "learning_rate": 1.16964829647373e-06, "loss": 17.2548, "step": 42916 }, { "epoch": 0.7844882738954796, "grad_norm": 5.662340848210398, "learning_rate": 1.1694580397737588e-06, "loss": 17.0318, "step": 42917 }, { "epoch": 0.7845065530919261, "grad_norm": 6.070487048502959, "learning_rate": 1.169267796499503e-06, "loss": 17.2756, "step": 42918 }, { "epoch": 0.7845248322883726, "grad_norm": 13.618295059488048, "learning_rate": 1.169077566651633e-06, "loss": 18.2412, "step": 42919 }, { "epoch": 0.7845431114848191, "grad_norm": 8.261160561347154, "learning_rate": 1.1688873502308122e-06, "loss": 17.7692, "step": 42920 }, { "epoch": 0.7845613906812656, "grad_norm": 4.679032344574224, "learning_rate": 1.1686971472377101e-06, "loss": 16.7795, "step": 42921 }, { "epoch": 0.7845796698777122, "grad_norm": 5.833774719452144, "learning_rate": 1.1685069576729908e-06, "loss": 17.4188, "step": 42922 }, { "epoch": 0.7845979490741587, "grad_norm": 6.754889563660707, "learning_rate": 1.168316781537323e-06, "loss": 17.3934, "step": 42923 }, { "epoch": 0.7846162282706052, "grad_norm": 5.833427785397539, "learning_rate": 1.1681266188313729e-06, "loss": 17.0701, "step": 42924 }, { "epoch": 0.7846345074670518, "grad_norm": 6.44169915266153, "learning_rate": 1.167936469555805e-06, "loss": 17.1606, "step": 42925 }, { "epoch": 0.7846527866634982, "grad_norm": 6.281591696257632, "learning_rate": 1.1677463337112882e-06, "loss": 17.3884, "step": 42926 }, { "epoch": 0.7846710658599448, "grad_norm": 5.87837805567763, "learning_rate": 1.1675562112984873e-06, "loss": 17.2316, "step": 42927 }, { "epoch": 0.7846893450563913, "grad_norm": 5.578767612454941, "learning_rate": 1.1673661023180688e-06, "loss": 17.2769, "step": 42928 }, { "epoch": 0.7847076242528378, "grad_norm": 6.62006058125122, "learning_rate": 1.1671760067707011e-06, "loss": 17.5035, "step": 42929 }, { "epoch": 0.7847259034492844, "grad_norm": 6.69582307855878, "learning_rate": 1.1669859246570475e-06, "loss": 17.0261, "step": 42930 }, { "epoch": 0.7847441826457309, "grad_norm": 5.25973645253755, "learning_rate": 1.166795855977777e-06, "loss": 17.1145, "step": 42931 }, { "epoch": 0.7847624618421775, "grad_norm": 6.901284380340854, "learning_rate": 1.1666058007335545e-06, "loss": 17.6119, "step": 42932 }, { "epoch": 0.7847807410386239, "grad_norm": 6.144934329941307, "learning_rate": 1.1664157589250442e-06, "loss": 17.2636, "step": 42933 }, { "epoch": 0.7847990202350704, "grad_norm": 6.520070842160074, "learning_rate": 1.1662257305529157e-06, "loss": 17.4681, "step": 42934 }, { "epoch": 0.784817299431517, "grad_norm": 4.962016380928543, "learning_rate": 1.1660357156178337e-06, "loss": 17.074, "step": 42935 }, { "epoch": 0.7848355786279635, "grad_norm": 6.0059362702656935, "learning_rate": 1.165845714120462e-06, "loss": 17.3451, "step": 42936 }, { "epoch": 0.7848538578244101, "grad_norm": 6.040885777207398, "learning_rate": 1.1656557260614682e-06, "loss": 17.0545, "step": 42937 }, { "epoch": 0.7848721370208566, "grad_norm": 6.332113221046521, "learning_rate": 1.1654657514415202e-06, "loss": 17.1, "step": 42938 }, { "epoch": 0.784890416217303, "grad_norm": 7.6960076682251675, "learning_rate": 1.1652757902612804e-06, "loss": 17.8211, "step": 42939 }, { "epoch": 0.7849086954137496, "grad_norm": 6.440731985242378, "learning_rate": 1.1650858425214174e-06, "loss": 17.693, "step": 42940 }, { "epoch": 0.7849269746101961, "grad_norm": 7.159278173751689, "learning_rate": 1.1648959082225959e-06, "loss": 17.8804, "step": 42941 }, { "epoch": 0.7849452538066427, "grad_norm": 8.15733852016395, "learning_rate": 1.1647059873654804e-06, "loss": 17.978, "step": 42942 }, { "epoch": 0.7849635330030892, "grad_norm": 5.819717940830605, "learning_rate": 1.1645160799507382e-06, "loss": 17.1197, "step": 42943 }, { "epoch": 0.7849818121995357, "grad_norm": 6.600111716213003, "learning_rate": 1.1643261859790333e-06, "loss": 17.6579, "step": 42944 }, { "epoch": 0.7850000913959823, "grad_norm": 7.015798069950846, "learning_rate": 1.1641363054510341e-06, "loss": 17.714, "step": 42945 }, { "epoch": 0.7850183705924287, "grad_norm": 5.6257218686567265, "learning_rate": 1.1639464383674025e-06, "loss": 17.0324, "step": 42946 }, { "epoch": 0.7850366497888753, "grad_norm": 5.9213028641768775, "learning_rate": 1.1637565847288056e-06, "loss": 17.1147, "step": 42947 }, { "epoch": 0.7850549289853218, "grad_norm": 5.710269270493332, "learning_rate": 1.1635667445359106e-06, "loss": 17.2376, "step": 42948 }, { "epoch": 0.7850732081817683, "grad_norm": 5.627735615518182, "learning_rate": 1.163376917789381e-06, "loss": 17.2191, "step": 42949 }, { "epoch": 0.7850914873782149, "grad_norm": 6.338071848322815, "learning_rate": 1.1631871044898812e-06, "loss": 17.242, "step": 42950 }, { "epoch": 0.7851097665746614, "grad_norm": 7.450820706497133, "learning_rate": 1.1629973046380793e-06, "loss": 18.0979, "step": 42951 }, { "epoch": 0.785128045771108, "grad_norm": 6.847946997428599, "learning_rate": 1.1628075182346388e-06, "loss": 17.7923, "step": 42952 }, { "epoch": 0.7851463249675544, "grad_norm": 5.529531606015044, "learning_rate": 1.1626177452802235e-06, "loss": 17.1346, "step": 42953 }, { "epoch": 0.7851646041640009, "grad_norm": 6.186801321746402, "learning_rate": 1.162427985775501e-06, "loss": 17.3635, "step": 42954 }, { "epoch": 0.7851828833604475, "grad_norm": 7.807385049499234, "learning_rate": 1.1622382397211346e-06, "loss": 17.7259, "step": 42955 }, { "epoch": 0.785201162556894, "grad_norm": 8.154218360646928, "learning_rate": 1.1620485071177894e-06, "loss": 18.103, "step": 42956 }, { "epoch": 0.7852194417533406, "grad_norm": 5.864547458840046, "learning_rate": 1.1618587879661331e-06, "loss": 17.1242, "step": 42957 }, { "epoch": 0.785237720949787, "grad_norm": 6.0917437400172165, "learning_rate": 1.1616690822668286e-06, "loss": 16.9649, "step": 42958 }, { "epoch": 0.7852560001462335, "grad_norm": 7.393054173746856, "learning_rate": 1.1614793900205395e-06, "loss": 17.754, "step": 42959 }, { "epoch": 0.7852742793426801, "grad_norm": 4.355051858414017, "learning_rate": 1.1612897112279337e-06, "loss": 16.7258, "step": 42960 }, { "epoch": 0.7852925585391266, "grad_norm": 7.064676657912687, "learning_rate": 1.1611000458896726e-06, "loss": 17.6563, "step": 42961 }, { "epoch": 0.7853108377355732, "grad_norm": 6.495276680367234, "learning_rate": 1.1609103940064243e-06, "loss": 17.5309, "step": 42962 }, { "epoch": 0.7853291169320197, "grad_norm": 5.442047289357563, "learning_rate": 1.160720755578852e-06, "loss": 17.0387, "step": 42963 }, { "epoch": 0.7853473961284662, "grad_norm": 6.0652108404075555, "learning_rate": 1.1605311306076184e-06, "loss": 17.3422, "step": 42964 }, { "epoch": 0.7853656753249127, "grad_norm": 6.687259544415262, "learning_rate": 1.1603415190933903e-06, "loss": 17.3333, "step": 42965 }, { "epoch": 0.7853839545213592, "grad_norm": 5.733417792358152, "learning_rate": 1.1601519210368333e-06, "loss": 17.2697, "step": 42966 }, { "epoch": 0.7854022337178058, "grad_norm": 6.255855121300795, "learning_rate": 1.1599623364386092e-06, "loss": 17.6649, "step": 42967 }, { "epoch": 0.7854205129142523, "grad_norm": 5.390784134363727, "learning_rate": 1.1597727652993856e-06, "loss": 16.8197, "step": 42968 }, { "epoch": 0.7854387921106988, "grad_norm": 5.992339263344806, "learning_rate": 1.1595832076198249e-06, "loss": 17.0185, "step": 42969 }, { "epoch": 0.7854570713071454, "grad_norm": 5.354318492379776, "learning_rate": 1.159393663400591e-06, "loss": 16.9662, "step": 42970 }, { "epoch": 0.7854753505035919, "grad_norm": 7.036787744104813, "learning_rate": 1.1592041326423498e-06, "loss": 18.048, "step": 42971 }, { "epoch": 0.7854936297000383, "grad_norm": 5.803939433808, "learning_rate": 1.1590146153457633e-06, "loss": 17.4175, "step": 42972 }, { "epoch": 0.7855119088964849, "grad_norm": 6.2199939599730385, "learning_rate": 1.158825111511499e-06, "loss": 17.3627, "step": 42973 }, { "epoch": 0.7855301880929314, "grad_norm": 6.522780940206207, "learning_rate": 1.1586356211402184e-06, "loss": 17.5701, "step": 42974 }, { "epoch": 0.785548467289378, "grad_norm": 5.129550690076474, "learning_rate": 1.1584461442325856e-06, "loss": 16.9113, "step": 42975 }, { "epoch": 0.7855667464858245, "grad_norm": 6.038367232564583, "learning_rate": 1.1582566807892676e-06, "loss": 17.135, "step": 42976 }, { "epoch": 0.785585025682271, "grad_norm": 5.159028267807909, "learning_rate": 1.158067230810927e-06, "loss": 16.9222, "step": 42977 }, { "epoch": 0.7856033048787175, "grad_norm": 6.215608725517066, "learning_rate": 1.1578777942982256e-06, "loss": 17.0869, "step": 42978 }, { "epoch": 0.785621584075164, "grad_norm": 7.238766880327436, "learning_rate": 1.1576883712518305e-06, "loss": 17.792, "step": 42979 }, { "epoch": 0.7856398632716106, "grad_norm": 5.3457704060648465, "learning_rate": 1.1574989616724043e-06, "loss": 17.3907, "step": 42980 }, { "epoch": 0.7856581424680571, "grad_norm": 5.953546470198454, "learning_rate": 1.1573095655606092e-06, "loss": 17.0161, "step": 42981 }, { "epoch": 0.7856764216645036, "grad_norm": 6.032163770535431, "learning_rate": 1.1571201829171108e-06, "loss": 17.1804, "step": 42982 }, { "epoch": 0.7856947008609502, "grad_norm": 5.897463479465074, "learning_rate": 1.1569308137425738e-06, "loss": 17.2152, "step": 42983 }, { "epoch": 0.7857129800573966, "grad_norm": 5.935639612623084, "learning_rate": 1.1567414580376596e-06, "loss": 17.2366, "step": 42984 }, { "epoch": 0.7857312592538432, "grad_norm": 6.864027835384487, "learning_rate": 1.1565521158030346e-06, "loss": 17.9693, "step": 42985 }, { "epoch": 0.7857495384502897, "grad_norm": 5.932199776537232, "learning_rate": 1.1563627870393613e-06, "loss": 17.2948, "step": 42986 }, { "epoch": 0.7857678176467362, "grad_norm": 7.035170596707109, "learning_rate": 1.1561734717473006e-06, "loss": 17.444, "step": 42987 }, { "epoch": 0.7857860968431828, "grad_norm": 6.36805617089263, "learning_rate": 1.1559841699275204e-06, "loss": 17.4517, "step": 42988 }, { "epoch": 0.7858043760396293, "grad_norm": 6.395416715072899, "learning_rate": 1.155794881580682e-06, "loss": 17.4097, "step": 42989 }, { "epoch": 0.7858226552360759, "grad_norm": 4.373423902640016, "learning_rate": 1.1556056067074472e-06, "loss": 16.6159, "step": 42990 }, { "epoch": 0.7858409344325223, "grad_norm": 4.814521096406163, "learning_rate": 1.1554163453084817e-06, "loss": 17.012, "step": 42991 }, { "epoch": 0.7858592136289688, "grad_norm": 5.840656781396422, "learning_rate": 1.1552270973844476e-06, "loss": 17.1117, "step": 42992 }, { "epoch": 0.7858774928254154, "grad_norm": 6.745877009502302, "learning_rate": 1.1550378629360109e-06, "loss": 17.4977, "step": 42993 }, { "epoch": 0.7858957720218619, "grad_norm": 5.741857243684384, "learning_rate": 1.1548486419638327e-06, "loss": 17.1747, "step": 42994 }, { "epoch": 0.7859140512183085, "grad_norm": 7.773714383464625, "learning_rate": 1.1546594344685746e-06, "loss": 18.1321, "step": 42995 }, { "epoch": 0.785932330414755, "grad_norm": 6.4100919564558065, "learning_rate": 1.1544702404509033e-06, "loss": 17.5147, "step": 42996 }, { "epoch": 0.7859506096112014, "grad_norm": 5.869978316642008, "learning_rate": 1.15428105991148e-06, "loss": 17.0324, "step": 42997 }, { "epoch": 0.785968888807648, "grad_norm": 6.2604483379804785, "learning_rate": 1.1540918928509665e-06, "loss": 17.3565, "step": 42998 }, { "epoch": 0.7859871680040945, "grad_norm": 5.754678692257368, "learning_rate": 1.1539027392700286e-06, "loss": 17.0168, "step": 42999 }, { "epoch": 0.7860054472005411, "grad_norm": 7.256380016610058, "learning_rate": 1.1537135991693265e-06, "loss": 17.8717, "step": 43000 }, { "epoch": 0.7860237263969876, "grad_norm": 6.034209774206624, "learning_rate": 1.1535244725495244e-06, "loss": 17.2338, "step": 43001 }, { "epoch": 0.7860420055934341, "grad_norm": 6.396128164967185, "learning_rate": 1.1533353594112867e-06, "loss": 17.6926, "step": 43002 }, { "epoch": 0.7860602847898807, "grad_norm": 5.685552412318963, "learning_rate": 1.1531462597552745e-06, "loss": 17.1073, "step": 43003 }, { "epoch": 0.7860785639863271, "grad_norm": 5.672500290492889, "learning_rate": 1.1529571735821492e-06, "loss": 17.1571, "step": 43004 }, { "epoch": 0.7860968431827737, "grad_norm": 6.2322512796302965, "learning_rate": 1.1527681008925774e-06, "loss": 17.3131, "step": 43005 }, { "epoch": 0.7861151223792202, "grad_norm": 5.597390698391444, "learning_rate": 1.1525790416872178e-06, "loss": 17.0396, "step": 43006 }, { "epoch": 0.7861334015756667, "grad_norm": 6.987686104781027, "learning_rate": 1.1523899959667363e-06, "loss": 17.5181, "step": 43007 }, { "epoch": 0.7861516807721133, "grad_norm": 6.49891050062853, "learning_rate": 1.1522009637317939e-06, "loss": 17.3422, "step": 43008 }, { "epoch": 0.7861699599685598, "grad_norm": 6.702191538095681, "learning_rate": 1.1520119449830513e-06, "loss": 17.2128, "step": 43009 }, { "epoch": 0.7861882391650064, "grad_norm": 6.902383989430501, "learning_rate": 1.1518229397211734e-06, "loss": 17.2929, "step": 43010 }, { "epoch": 0.7862065183614528, "grad_norm": 6.074931475252007, "learning_rate": 1.1516339479468235e-06, "loss": 17.3462, "step": 43011 }, { "epoch": 0.7862247975578993, "grad_norm": 5.362282940664848, "learning_rate": 1.1514449696606612e-06, "loss": 16.9875, "step": 43012 }, { "epoch": 0.7862430767543459, "grad_norm": 5.634139831160146, "learning_rate": 1.151256004863352e-06, "loss": 16.8566, "step": 43013 }, { "epoch": 0.7862613559507924, "grad_norm": 4.57476952629355, "learning_rate": 1.1510670535555563e-06, "loss": 16.8022, "step": 43014 }, { "epoch": 0.786279635147239, "grad_norm": 6.704699013816149, "learning_rate": 1.150878115737935e-06, "loss": 17.2667, "step": 43015 }, { "epoch": 0.7862979143436855, "grad_norm": 5.738115890022374, "learning_rate": 1.150689191411154e-06, "loss": 17.0875, "step": 43016 }, { "epoch": 0.7863161935401319, "grad_norm": 5.258434672025138, "learning_rate": 1.1505002805758725e-06, "loss": 17.1385, "step": 43017 }, { "epoch": 0.7863344727365785, "grad_norm": 6.076969150344429, "learning_rate": 1.150311383232753e-06, "loss": 17.241, "step": 43018 }, { "epoch": 0.786352751933025, "grad_norm": 6.006679253948823, "learning_rate": 1.1501224993824572e-06, "loss": 17.3348, "step": 43019 }, { "epoch": 0.7863710311294716, "grad_norm": 5.360364559455867, "learning_rate": 1.1499336290256495e-06, "loss": 17.1128, "step": 43020 }, { "epoch": 0.7863893103259181, "grad_norm": 5.920571338025359, "learning_rate": 1.149744772162989e-06, "loss": 17.1572, "step": 43021 }, { "epoch": 0.7864075895223646, "grad_norm": 6.964523517921411, "learning_rate": 1.1495559287951407e-06, "loss": 17.4589, "step": 43022 }, { "epoch": 0.7864258687188111, "grad_norm": 6.397847931593472, "learning_rate": 1.1493670989227628e-06, "loss": 17.3277, "step": 43023 }, { "epoch": 0.7864441479152576, "grad_norm": 7.050897033798298, "learning_rate": 1.1491782825465208e-06, "loss": 17.7624, "step": 43024 }, { "epoch": 0.7864624271117042, "grad_norm": 6.205177535699501, "learning_rate": 1.1489894796670749e-06, "loss": 17.2842, "step": 43025 }, { "epoch": 0.7864807063081507, "grad_norm": 8.290813968912563, "learning_rate": 1.1488006902850851e-06, "loss": 17.7812, "step": 43026 }, { "epoch": 0.7864989855045972, "grad_norm": 5.326537434986195, "learning_rate": 1.148611914401216e-06, "loss": 17.0183, "step": 43027 }, { "epoch": 0.7865172647010438, "grad_norm": 7.032921031392491, "learning_rate": 1.1484231520161265e-06, "loss": 17.4347, "step": 43028 }, { "epoch": 0.7865355438974903, "grad_norm": 4.357660896799989, "learning_rate": 1.1482344031304798e-06, "loss": 16.552, "step": 43029 }, { "epoch": 0.7865538230939368, "grad_norm": 6.550024977638346, "learning_rate": 1.1480456677449392e-06, "loss": 17.4296, "step": 43030 }, { "epoch": 0.7865721022903833, "grad_norm": 6.911473160425084, "learning_rate": 1.1478569458601635e-06, "loss": 17.509, "step": 43031 }, { "epoch": 0.7865903814868298, "grad_norm": 6.4961070813979, "learning_rate": 1.1476682374768138e-06, "loss": 17.5201, "step": 43032 }, { "epoch": 0.7866086606832764, "grad_norm": 7.564390495007873, "learning_rate": 1.1474795425955537e-06, "loss": 18.4041, "step": 43033 }, { "epoch": 0.7866269398797229, "grad_norm": 5.389595455542531, "learning_rate": 1.1472908612170441e-06, "loss": 17.1692, "step": 43034 }, { "epoch": 0.7866452190761695, "grad_norm": 5.47417541463403, "learning_rate": 1.147102193341944e-06, "loss": 17.0078, "step": 43035 }, { "epoch": 0.786663498272616, "grad_norm": 5.823895556460048, "learning_rate": 1.146913538970918e-06, "loss": 17.1561, "step": 43036 }, { "epoch": 0.7866817774690624, "grad_norm": 7.85800038759533, "learning_rate": 1.1467248981046237e-06, "loss": 18.2774, "step": 43037 }, { "epoch": 0.786700056665509, "grad_norm": 6.2096726472809385, "learning_rate": 1.146536270743725e-06, "loss": 17.3481, "step": 43038 }, { "epoch": 0.7867183358619555, "grad_norm": 8.254148903583987, "learning_rate": 1.1463476568888832e-06, "loss": 17.628, "step": 43039 }, { "epoch": 0.786736615058402, "grad_norm": 7.287989077511543, "learning_rate": 1.1461590565407571e-06, "loss": 17.7084, "step": 43040 }, { "epoch": 0.7867548942548486, "grad_norm": 5.112146621136499, "learning_rate": 1.1459704697000107e-06, "loss": 16.9454, "step": 43041 }, { "epoch": 0.786773173451295, "grad_norm": 7.0873161865401455, "learning_rate": 1.1457818963673028e-06, "loss": 16.9919, "step": 43042 }, { "epoch": 0.7867914526477416, "grad_norm": 7.292265007479272, "learning_rate": 1.1455933365432942e-06, "loss": 17.5999, "step": 43043 }, { "epoch": 0.7868097318441881, "grad_norm": 5.359815618330775, "learning_rate": 1.1454047902286475e-06, "loss": 16.8146, "step": 43044 }, { "epoch": 0.7868280110406346, "grad_norm": 6.300739403207879, "learning_rate": 1.1452162574240212e-06, "loss": 17.2704, "step": 43045 }, { "epoch": 0.7868462902370812, "grad_norm": 6.076181352806068, "learning_rate": 1.145027738130079e-06, "loss": 17.1473, "step": 43046 }, { "epoch": 0.7868645694335277, "grad_norm": 7.133005605107506, "learning_rate": 1.1448392323474784e-06, "loss": 17.5947, "step": 43047 }, { "epoch": 0.7868828486299743, "grad_norm": 7.464306423949789, "learning_rate": 1.1446507400768831e-06, "loss": 17.6207, "step": 43048 }, { "epoch": 0.7869011278264207, "grad_norm": 6.788100960692533, "learning_rate": 1.1444622613189515e-06, "loss": 17.1949, "step": 43049 }, { "epoch": 0.7869194070228672, "grad_norm": 5.496917198591807, "learning_rate": 1.144273796074346e-06, "loss": 16.6769, "step": 43050 }, { "epoch": 0.7869376862193138, "grad_norm": 6.638260714065062, "learning_rate": 1.1440853443437249e-06, "loss": 17.5924, "step": 43051 }, { "epoch": 0.7869559654157603, "grad_norm": 7.997980050808377, "learning_rate": 1.1438969061277516e-06, "loss": 17.7355, "step": 43052 }, { "epoch": 0.7869742446122069, "grad_norm": 5.930953072734631, "learning_rate": 1.143708481427085e-06, "loss": 17.354, "step": 43053 }, { "epoch": 0.7869925238086534, "grad_norm": 5.667457771135627, "learning_rate": 1.1435200702423843e-06, "loss": 17.069, "step": 43054 }, { "epoch": 0.7870108030050998, "grad_norm": 6.349107899242943, "learning_rate": 1.143331672574311e-06, "loss": 17.5689, "step": 43055 }, { "epoch": 0.7870290822015464, "grad_norm": 5.74592867894833, "learning_rate": 1.1431432884235267e-06, "loss": 17.1298, "step": 43056 }, { "epoch": 0.7870473613979929, "grad_norm": 6.976089772067334, "learning_rate": 1.1429549177906895e-06, "loss": 17.4519, "step": 43057 }, { "epoch": 0.7870656405944395, "grad_norm": 8.055671519454435, "learning_rate": 1.1427665606764616e-06, "loss": 18.3527, "step": 43058 }, { "epoch": 0.787083919790886, "grad_norm": 6.014605559163717, "learning_rate": 1.1425782170815025e-06, "loss": 17.1446, "step": 43059 }, { "epoch": 0.7871021989873325, "grad_norm": 5.039802537028447, "learning_rate": 1.1423898870064704e-06, "loss": 16.8561, "step": 43060 }, { "epoch": 0.7871204781837791, "grad_norm": 7.602902949397708, "learning_rate": 1.1422015704520285e-06, "loss": 17.8363, "step": 43061 }, { "epoch": 0.7871387573802255, "grad_norm": 6.630072780711955, "learning_rate": 1.1420132674188355e-06, "loss": 17.312, "step": 43062 }, { "epoch": 0.7871570365766721, "grad_norm": 6.464563752903156, "learning_rate": 1.1418249779075497e-06, "loss": 17.2409, "step": 43063 }, { "epoch": 0.7871753157731186, "grad_norm": 6.571157094227565, "learning_rate": 1.1416367019188324e-06, "loss": 17.2507, "step": 43064 }, { "epoch": 0.7871935949695651, "grad_norm": 8.777926155218148, "learning_rate": 1.1414484394533454e-06, "loss": 18.1907, "step": 43065 }, { "epoch": 0.7872118741660117, "grad_norm": 6.062837718953478, "learning_rate": 1.141260190511745e-06, "loss": 17.2166, "step": 43066 }, { "epoch": 0.7872301533624582, "grad_norm": 6.0560388659901045, "learning_rate": 1.1410719550946942e-06, "loss": 17.0757, "step": 43067 }, { "epoch": 0.7872484325589048, "grad_norm": 5.158972570945011, "learning_rate": 1.14088373320285e-06, "loss": 16.9624, "step": 43068 }, { "epoch": 0.7872667117553512, "grad_norm": 7.292739701440524, "learning_rate": 1.1406955248368751e-06, "loss": 18.2131, "step": 43069 }, { "epoch": 0.7872849909517977, "grad_norm": 7.023894993371395, "learning_rate": 1.1405073299974274e-06, "loss": 17.8129, "step": 43070 }, { "epoch": 0.7873032701482443, "grad_norm": 6.280963886829021, "learning_rate": 1.140319148685165e-06, "loss": 17.3976, "step": 43071 }, { "epoch": 0.7873215493446908, "grad_norm": 6.579478712994111, "learning_rate": 1.1401309809007505e-06, "loss": 17.191, "step": 43072 }, { "epoch": 0.7873398285411374, "grad_norm": 6.089186697772453, "learning_rate": 1.1399428266448403e-06, "loss": 17.2762, "step": 43073 }, { "epoch": 0.7873581077375839, "grad_norm": 6.201110467454369, "learning_rate": 1.1397546859180958e-06, "loss": 17.3322, "step": 43074 }, { "epoch": 0.7873763869340303, "grad_norm": 7.634163435316809, "learning_rate": 1.139566558721178e-06, "loss": 18.05, "step": 43075 }, { "epoch": 0.7873946661304769, "grad_norm": 7.227636417552934, "learning_rate": 1.139378445054744e-06, "loss": 17.225, "step": 43076 }, { "epoch": 0.7874129453269234, "grad_norm": 5.9173901336158305, "learning_rate": 1.1391903449194519e-06, "loss": 17.4543, "step": 43077 }, { "epoch": 0.78743122452337, "grad_norm": 5.3817255527369126, "learning_rate": 1.1390022583159644e-06, "loss": 17.0565, "step": 43078 }, { "epoch": 0.7874495037198165, "grad_norm": 5.853349434636654, "learning_rate": 1.1388141852449385e-06, "loss": 17.1277, "step": 43079 }, { "epoch": 0.787467782916263, "grad_norm": 6.659599614110373, "learning_rate": 1.138626125707033e-06, "loss": 17.3768, "step": 43080 }, { "epoch": 0.7874860621127096, "grad_norm": 6.209670136327641, "learning_rate": 1.1384380797029087e-06, "loss": 17.1171, "step": 43081 }, { "epoch": 0.787504341309156, "grad_norm": 7.34516050842967, "learning_rate": 1.138250047233223e-06, "loss": 17.3963, "step": 43082 }, { "epoch": 0.7875226205056026, "grad_norm": 5.757452708852677, "learning_rate": 1.1380620282986355e-06, "loss": 17.2253, "step": 43083 }, { "epoch": 0.7875408997020491, "grad_norm": 5.5628169445172455, "learning_rate": 1.137874022899807e-06, "loss": 17.0154, "step": 43084 }, { "epoch": 0.7875591788984956, "grad_norm": 7.238032801582006, "learning_rate": 1.1376860310373933e-06, "loss": 17.5634, "step": 43085 }, { "epoch": 0.7875774580949422, "grad_norm": 6.05797523806618, "learning_rate": 1.1374980527120566e-06, "loss": 17.1902, "step": 43086 }, { "epoch": 0.7875957372913887, "grad_norm": 7.316353728223308, "learning_rate": 1.1373100879244536e-06, "loss": 17.88, "step": 43087 }, { "epoch": 0.7876140164878352, "grad_norm": 9.448099496097468, "learning_rate": 1.137122136675242e-06, "loss": 17.5574, "step": 43088 }, { "epoch": 0.7876322956842817, "grad_norm": 5.02704076580612, "learning_rate": 1.1369341989650846e-06, "loss": 16.9958, "step": 43089 }, { "epoch": 0.7876505748807282, "grad_norm": 6.831931565657985, "learning_rate": 1.1367462747946368e-06, "loss": 17.6371, "step": 43090 }, { "epoch": 0.7876688540771748, "grad_norm": 6.019605524979685, "learning_rate": 1.1365583641645567e-06, "loss": 17.3267, "step": 43091 }, { "epoch": 0.7876871332736213, "grad_norm": 6.859710916460606, "learning_rate": 1.1363704670755043e-06, "loss": 17.9284, "step": 43092 }, { "epoch": 0.7877054124700679, "grad_norm": 6.836094929276527, "learning_rate": 1.1361825835281393e-06, "loss": 17.7144, "step": 43093 }, { "epoch": 0.7877236916665143, "grad_norm": 5.851209872022327, "learning_rate": 1.135994713523118e-06, "loss": 17.1899, "step": 43094 }, { "epoch": 0.7877419708629608, "grad_norm": 5.343981115477729, "learning_rate": 1.1358068570611013e-06, "loss": 17.0154, "step": 43095 }, { "epoch": 0.7877602500594074, "grad_norm": 5.122655602339348, "learning_rate": 1.1356190141427464e-06, "loss": 16.887, "step": 43096 }, { "epoch": 0.7877785292558539, "grad_norm": 4.764699931065781, "learning_rate": 1.13543118476871e-06, "loss": 16.7293, "step": 43097 }, { "epoch": 0.7877968084523005, "grad_norm": 5.815770349196088, "learning_rate": 1.135243368939653e-06, "loss": 16.9809, "step": 43098 }, { "epoch": 0.787815087648747, "grad_norm": 5.372746471338661, "learning_rate": 1.135055566656232e-06, "loss": 17.0126, "step": 43099 }, { "epoch": 0.7878333668451935, "grad_norm": 6.179206697544964, "learning_rate": 1.1348677779191064e-06, "loss": 17.6195, "step": 43100 }, { "epoch": 0.78785164604164, "grad_norm": 5.998714644839135, "learning_rate": 1.1346800027289328e-06, "loss": 16.9077, "step": 43101 }, { "epoch": 0.7878699252380865, "grad_norm": 5.5842772006897565, "learning_rate": 1.1344922410863707e-06, "loss": 17.0486, "step": 43102 }, { "epoch": 0.7878882044345331, "grad_norm": 6.671719733311931, "learning_rate": 1.1343044929920787e-06, "loss": 17.5598, "step": 43103 }, { "epoch": 0.7879064836309796, "grad_norm": 5.975137658049307, "learning_rate": 1.1341167584467145e-06, "loss": 17.0552, "step": 43104 }, { "epoch": 0.7879247628274261, "grad_norm": 7.8689715297762755, "learning_rate": 1.1339290374509337e-06, "loss": 17.6328, "step": 43105 }, { "epoch": 0.7879430420238727, "grad_norm": 4.433286787904793, "learning_rate": 1.133741330005398e-06, "loss": 16.7884, "step": 43106 }, { "epoch": 0.7879613212203191, "grad_norm": 7.437559649224955, "learning_rate": 1.1335536361107629e-06, "loss": 17.5174, "step": 43107 }, { "epoch": 0.7879796004167656, "grad_norm": 5.664438971474661, "learning_rate": 1.1333659557676857e-06, "loss": 16.8388, "step": 43108 }, { "epoch": 0.7879978796132122, "grad_norm": 5.22435671111826, "learning_rate": 1.1331782889768255e-06, "loss": 17.0183, "step": 43109 }, { "epoch": 0.7880161588096587, "grad_norm": 4.630093590624916, "learning_rate": 1.132990635738841e-06, "loss": 16.7623, "step": 43110 }, { "epoch": 0.7880344380061053, "grad_norm": 6.137929558970014, "learning_rate": 1.1328029960543874e-06, "loss": 17.3658, "step": 43111 }, { "epoch": 0.7880527172025518, "grad_norm": 6.341880062092317, "learning_rate": 1.132615369924125e-06, "loss": 17.2225, "step": 43112 }, { "epoch": 0.7880709963989982, "grad_norm": 5.735059554086242, "learning_rate": 1.1324277573487092e-06, "loss": 17.0062, "step": 43113 }, { "epoch": 0.7880892755954448, "grad_norm": 6.544663863296922, "learning_rate": 1.1322401583287995e-06, "loss": 17.8867, "step": 43114 }, { "epoch": 0.7881075547918913, "grad_norm": 6.401684095574284, "learning_rate": 1.1320525728650521e-06, "loss": 17.4701, "step": 43115 }, { "epoch": 0.7881258339883379, "grad_norm": 5.672245616071425, "learning_rate": 1.1318650009581239e-06, "loss": 17.0792, "step": 43116 }, { "epoch": 0.7881441131847844, "grad_norm": 5.905569357769954, "learning_rate": 1.1316774426086747e-06, "loss": 17.2144, "step": 43117 }, { "epoch": 0.7881623923812309, "grad_norm": 7.21723964011863, "learning_rate": 1.1314898978173588e-06, "loss": 18.0966, "step": 43118 }, { "epoch": 0.7881806715776775, "grad_norm": 6.299953505250634, "learning_rate": 1.131302366584835e-06, "loss": 17.3748, "step": 43119 }, { "epoch": 0.7881989507741239, "grad_norm": 6.1918832143189695, "learning_rate": 1.1311148489117623e-06, "loss": 17.5962, "step": 43120 }, { "epoch": 0.7882172299705705, "grad_norm": 6.11787485753403, "learning_rate": 1.1309273447987963e-06, "loss": 17.2312, "step": 43121 }, { "epoch": 0.788235509167017, "grad_norm": 6.1237849460765075, "learning_rate": 1.1307398542465924e-06, "loss": 17.0828, "step": 43122 }, { "epoch": 0.7882537883634635, "grad_norm": 5.667912305763091, "learning_rate": 1.1305523772558113e-06, "loss": 17.1656, "step": 43123 }, { "epoch": 0.7882720675599101, "grad_norm": 7.285298178170572, "learning_rate": 1.1303649138271083e-06, "loss": 17.6714, "step": 43124 }, { "epoch": 0.7882903467563566, "grad_norm": 6.071062791444387, "learning_rate": 1.130177463961139e-06, "loss": 17.2145, "step": 43125 }, { "epoch": 0.7883086259528032, "grad_norm": 8.641277676524108, "learning_rate": 1.1299900276585634e-06, "loss": 17.9371, "step": 43126 }, { "epoch": 0.7883269051492496, "grad_norm": 7.057997363359457, "learning_rate": 1.1298026049200356e-06, "loss": 17.9007, "step": 43127 }, { "epoch": 0.7883451843456961, "grad_norm": 6.61365288465869, "learning_rate": 1.1296151957462137e-06, "loss": 17.7837, "step": 43128 }, { "epoch": 0.7883634635421427, "grad_norm": 5.359010111500947, "learning_rate": 1.1294278001377561e-06, "loss": 17.223, "step": 43129 }, { "epoch": 0.7883817427385892, "grad_norm": 6.047588657014379, "learning_rate": 1.129240418095317e-06, "loss": 17.1711, "step": 43130 }, { "epoch": 0.7884000219350358, "grad_norm": 5.918919394942093, "learning_rate": 1.1290530496195556e-06, "loss": 16.9278, "step": 43131 }, { "epoch": 0.7884183011314823, "grad_norm": 6.079654058684232, "learning_rate": 1.1288656947111271e-06, "loss": 17.218, "step": 43132 }, { "epoch": 0.7884365803279287, "grad_norm": 6.724194411529522, "learning_rate": 1.1286783533706875e-06, "loss": 17.5448, "step": 43133 }, { "epoch": 0.7884548595243753, "grad_norm": 6.093328411807224, "learning_rate": 1.128491025598895e-06, "loss": 17.4516, "step": 43134 }, { "epoch": 0.7884731387208218, "grad_norm": 5.687400074122129, "learning_rate": 1.1283037113964062e-06, "loss": 17.2882, "step": 43135 }, { "epoch": 0.7884914179172684, "grad_norm": 7.318623013029637, "learning_rate": 1.1281164107638753e-06, "loss": 17.9093, "step": 43136 }, { "epoch": 0.7885096971137149, "grad_norm": 6.0379764603195, "learning_rate": 1.1279291237019602e-06, "loss": 17.4517, "step": 43137 }, { "epoch": 0.7885279763101614, "grad_norm": 6.333039827498451, "learning_rate": 1.127741850211319e-06, "loss": 17.3414, "step": 43138 }, { "epoch": 0.788546255506608, "grad_norm": 5.718344716554737, "learning_rate": 1.1275545902926054e-06, "loss": 17.3999, "step": 43139 }, { "epoch": 0.7885645347030544, "grad_norm": 6.244469090993589, "learning_rate": 1.1273673439464782e-06, "loss": 17.3051, "step": 43140 }, { "epoch": 0.788582813899501, "grad_norm": 6.71897609561286, "learning_rate": 1.1271801111735926e-06, "loss": 17.6971, "step": 43141 }, { "epoch": 0.7886010930959475, "grad_norm": 5.881718983153126, "learning_rate": 1.1269928919746032e-06, "loss": 17.1836, "step": 43142 }, { "epoch": 0.788619372292394, "grad_norm": 9.078387521092502, "learning_rate": 1.1268056863501685e-06, "loss": 17.3752, "step": 43143 }, { "epoch": 0.7886376514888406, "grad_norm": 8.674112855155263, "learning_rate": 1.1266184943009429e-06, "loss": 17.2511, "step": 43144 }, { "epoch": 0.788655930685287, "grad_norm": 6.458733111855941, "learning_rate": 1.1264313158275848e-06, "loss": 17.8083, "step": 43145 }, { "epoch": 0.7886742098817336, "grad_norm": 6.265353684969043, "learning_rate": 1.126244150930747e-06, "loss": 17.3826, "step": 43146 }, { "epoch": 0.7886924890781801, "grad_norm": 6.841057750124385, "learning_rate": 1.1260569996110876e-06, "loss": 17.7559, "step": 43147 }, { "epoch": 0.7887107682746266, "grad_norm": 6.31655114979257, "learning_rate": 1.1258698618692637e-06, "loss": 17.5473, "step": 43148 }, { "epoch": 0.7887290474710732, "grad_norm": 6.148551495027276, "learning_rate": 1.1256827377059298e-06, "loss": 17.4336, "step": 43149 }, { "epoch": 0.7887473266675197, "grad_norm": 7.217764546290234, "learning_rate": 1.1254956271217398e-06, "loss": 17.7203, "step": 43150 }, { "epoch": 0.7887656058639663, "grad_norm": 6.01709279664958, "learning_rate": 1.1253085301173533e-06, "loss": 17.5551, "step": 43151 }, { "epoch": 0.7887838850604127, "grad_norm": 7.392289754847322, "learning_rate": 1.125121446693424e-06, "loss": 17.6187, "step": 43152 }, { "epoch": 0.7888021642568592, "grad_norm": 6.231834767765806, "learning_rate": 1.1249343768506066e-06, "loss": 17.0737, "step": 43153 }, { "epoch": 0.7888204434533058, "grad_norm": 6.521844950074313, "learning_rate": 1.1247473205895588e-06, "loss": 17.1945, "step": 43154 }, { "epoch": 0.7888387226497523, "grad_norm": 8.300135248679796, "learning_rate": 1.1245602779109344e-06, "loss": 18.1531, "step": 43155 }, { "epoch": 0.7888570018461989, "grad_norm": 5.957727181692506, "learning_rate": 1.1243732488153896e-06, "loss": 17.1571, "step": 43156 }, { "epoch": 0.7888752810426454, "grad_norm": 7.855088678294483, "learning_rate": 1.1241862333035824e-06, "loss": 17.0805, "step": 43157 }, { "epoch": 0.7888935602390919, "grad_norm": 6.437122301204398, "learning_rate": 1.1239992313761654e-06, "loss": 17.1536, "step": 43158 }, { "epoch": 0.7889118394355384, "grad_norm": 6.147515297411896, "learning_rate": 1.1238122430337938e-06, "loss": 17.3282, "step": 43159 }, { "epoch": 0.7889301186319849, "grad_norm": 6.335259114526591, "learning_rate": 1.1236252682771247e-06, "loss": 17.5356, "step": 43160 }, { "epoch": 0.7889483978284315, "grad_norm": 4.804744335219695, "learning_rate": 1.1234383071068118e-06, "loss": 16.7976, "step": 43161 }, { "epoch": 0.788966677024878, "grad_norm": 9.17463756767849, "learning_rate": 1.123251359523513e-06, "loss": 18.3581, "step": 43162 }, { "epoch": 0.7889849562213245, "grad_norm": 7.405409337282177, "learning_rate": 1.123064425527881e-06, "loss": 17.9266, "step": 43163 }, { "epoch": 0.7890032354177711, "grad_norm": 7.4642430695342386, "learning_rate": 1.1228775051205704e-06, "loss": 17.5717, "step": 43164 }, { "epoch": 0.7890215146142175, "grad_norm": 6.44829544877793, "learning_rate": 1.122690598302238e-06, "loss": 17.5642, "step": 43165 }, { "epoch": 0.7890397938106641, "grad_norm": 5.14806931176464, "learning_rate": 1.1225037050735399e-06, "loss": 16.9373, "step": 43166 }, { "epoch": 0.7890580730071106, "grad_norm": 5.017274633393075, "learning_rate": 1.1223168254351286e-06, "loss": 16.9708, "step": 43167 }, { "epoch": 0.7890763522035571, "grad_norm": 8.162435155709288, "learning_rate": 1.1221299593876617e-06, "loss": 17.7802, "step": 43168 }, { "epoch": 0.7890946314000037, "grad_norm": 5.579688176795867, "learning_rate": 1.1219431069317927e-06, "loss": 17.1442, "step": 43169 }, { "epoch": 0.7891129105964502, "grad_norm": 5.599576631765463, "learning_rate": 1.121756268068175e-06, "loss": 17.1599, "step": 43170 }, { "epoch": 0.7891311897928968, "grad_norm": 6.864155826204168, "learning_rate": 1.1215694427974666e-06, "loss": 17.5018, "step": 43171 }, { "epoch": 0.7891494689893432, "grad_norm": 5.28075126088164, "learning_rate": 1.1213826311203197e-06, "loss": 16.9807, "step": 43172 }, { "epoch": 0.7891677481857897, "grad_norm": 7.757127441567285, "learning_rate": 1.1211958330373912e-06, "loss": 17.7107, "step": 43173 }, { "epoch": 0.7891860273822363, "grad_norm": 5.724879748219815, "learning_rate": 1.121009048549333e-06, "loss": 16.9206, "step": 43174 }, { "epoch": 0.7892043065786828, "grad_norm": 5.93523999171069, "learning_rate": 1.1208222776568018e-06, "loss": 17.2475, "step": 43175 }, { "epoch": 0.7892225857751293, "grad_norm": 7.069549896441063, "learning_rate": 1.1206355203604536e-06, "loss": 17.8092, "step": 43176 }, { "epoch": 0.7892408649715759, "grad_norm": 6.176909791098862, "learning_rate": 1.1204487766609407e-06, "loss": 17.4986, "step": 43177 }, { "epoch": 0.7892591441680223, "grad_norm": 7.470114123365261, "learning_rate": 1.120262046558917e-06, "loss": 17.8689, "step": 43178 }, { "epoch": 0.7892774233644689, "grad_norm": 5.7004393351989435, "learning_rate": 1.12007533005504e-06, "loss": 17.374, "step": 43179 }, { "epoch": 0.7892957025609154, "grad_norm": 5.7268941833858955, "learning_rate": 1.1198886271499615e-06, "loss": 17.2614, "step": 43180 }, { "epoch": 0.7893139817573619, "grad_norm": 5.02746901785059, "learning_rate": 1.1197019378443358e-06, "loss": 17.0462, "step": 43181 }, { "epoch": 0.7893322609538085, "grad_norm": 8.155355381665812, "learning_rate": 1.1195152621388178e-06, "loss": 18.0377, "step": 43182 }, { "epoch": 0.789350540150255, "grad_norm": 6.111838829201602, "learning_rate": 1.1193286000340637e-06, "loss": 17.3413, "step": 43183 }, { "epoch": 0.7893688193467016, "grad_norm": 5.137003045716253, "learning_rate": 1.1191419515307244e-06, "loss": 17.095, "step": 43184 }, { "epoch": 0.789387098543148, "grad_norm": 5.813400274595127, "learning_rate": 1.1189553166294576e-06, "loss": 17.4498, "step": 43185 }, { "epoch": 0.7894053777395945, "grad_norm": 6.729521518598655, "learning_rate": 1.118768695330915e-06, "loss": 17.6784, "step": 43186 }, { "epoch": 0.7894236569360411, "grad_norm": 4.83395459718101, "learning_rate": 1.1185820876357506e-06, "loss": 16.9732, "step": 43187 }, { "epoch": 0.7894419361324876, "grad_norm": 4.805897714712554, "learning_rate": 1.1183954935446206e-06, "loss": 16.9852, "step": 43188 }, { "epoch": 0.7894602153289342, "grad_norm": 7.189867822170069, "learning_rate": 1.1182089130581774e-06, "loss": 17.4023, "step": 43189 }, { "epoch": 0.7894784945253807, "grad_norm": 6.49153217410484, "learning_rate": 1.1180223461770734e-06, "loss": 17.454, "step": 43190 }, { "epoch": 0.7894967737218271, "grad_norm": 5.11265157266795, "learning_rate": 1.1178357929019645e-06, "loss": 17.1482, "step": 43191 }, { "epoch": 0.7895150529182737, "grad_norm": 6.251239692483146, "learning_rate": 1.1176492532335043e-06, "loss": 17.2178, "step": 43192 }, { "epoch": 0.7895333321147202, "grad_norm": 5.130882433705746, "learning_rate": 1.1174627271723476e-06, "loss": 17.0557, "step": 43193 }, { "epoch": 0.7895516113111668, "grad_norm": 6.770356212391329, "learning_rate": 1.1172762147191475e-06, "loss": 17.4837, "step": 43194 }, { "epoch": 0.7895698905076133, "grad_norm": 4.341217407637645, "learning_rate": 1.117089715874556e-06, "loss": 16.6464, "step": 43195 }, { "epoch": 0.7895881697040598, "grad_norm": 5.794366981616693, "learning_rate": 1.1169032306392292e-06, "loss": 17.1656, "step": 43196 }, { "epoch": 0.7896064489005064, "grad_norm": 5.898064155850638, "learning_rate": 1.1167167590138195e-06, "loss": 17.4096, "step": 43197 }, { "epoch": 0.7896247280969528, "grad_norm": 6.0590126836667695, "learning_rate": 1.1165303009989797e-06, "loss": 17.2369, "step": 43198 }, { "epoch": 0.7896430072933994, "grad_norm": 4.987193676811386, "learning_rate": 1.1163438565953655e-06, "loss": 16.8854, "step": 43199 }, { "epoch": 0.7896612864898459, "grad_norm": 6.811130238339078, "learning_rate": 1.1161574258036272e-06, "loss": 17.5531, "step": 43200 }, { "epoch": 0.7896795656862924, "grad_norm": 7.601913659995566, "learning_rate": 1.1159710086244207e-06, "loss": 17.9056, "step": 43201 }, { "epoch": 0.789697844882739, "grad_norm": 7.744008955837333, "learning_rate": 1.1157846050583999e-06, "loss": 17.7661, "step": 43202 }, { "epoch": 0.7897161240791855, "grad_norm": 8.156288827406694, "learning_rate": 1.1155982151062166e-06, "loss": 18.3724, "step": 43203 }, { "epoch": 0.789734403275632, "grad_norm": 9.435651266728582, "learning_rate": 1.1154118387685237e-06, "loss": 18.1767, "step": 43204 }, { "epoch": 0.7897526824720785, "grad_norm": 5.661530235997828, "learning_rate": 1.1152254760459763e-06, "loss": 17.0994, "step": 43205 }, { "epoch": 0.789770961668525, "grad_norm": 6.016606630533423, "learning_rate": 1.1150391269392252e-06, "loss": 17.2813, "step": 43206 }, { "epoch": 0.7897892408649716, "grad_norm": 6.015437817135232, "learning_rate": 1.1148527914489265e-06, "loss": 17.2011, "step": 43207 }, { "epoch": 0.7898075200614181, "grad_norm": 6.644050959634568, "learning_rate": 1.1146664695757314e-06, "loss": 17.6528, "step": 43208 }, { "epoch": 0.7898257992578647, "grad_norm": 5.747677837482982, "learning_rate": 1.1144801613202915e-06, "loss": 17.2529, "step": 43209 }, { "epoch": 0.7898440784543111, "grad_norm": 7.4962103052288525, "learning_rate": 1.1142938666832615e-06, "loss": 17.5606, "step": 43210 }, { "epoch": 0.7898623576507576, "grad_norm": 8.034624287769553, "learning_rate": 1.114107585665296e-06, "loss": 17.9885, "step": 43211 }, { "epoch": 0.7898806368472042, "grad_norm": 5.875033151763072, "learning_rate": 1.113921318267045e-06, "loss": 17.333, "step": 43212 }, { "epoch": 0.7898989160436507, "grad_norm": 5.093091028354461, "learning_rate": 1.1137350644891636e-06, "loss": 17.0633, "step": 43213 }, { "epoch": 0.7899171952400973, "grad_norm": 5.344648009834531, "learning_rate": 1.1135488243323034e-06, "loss": 17.0001, "step": 43214 }, { "epoch": 0.7899354744365438, "grad_norm": 7.890344754862406, "learning_rate": 1.1133625977971164e-06, "loss": 17.3466, "step": 43215 }, { "epoch": 0.7899537536329903, "grad_norm": 5.309409465732446, "learning_rate": 1.1131763848842576e-06, "loss": 16.7683, "step": 43216 }, { "epoch": 0.7899720328294368, "grad_norm": 8.626580424287884, "learning_rate": 1.1129901855943775e-06, "loss": 17.4016, "step": 43217 }, { "epoch": 0.7899903120258833, "grad_norm": 7.2925972882442975, "learning_rate": 1.1128039999281287e-06, "loss": 17.4104, "step": 43218 }, { "epoch": 0.7900085912223299, "grad_norm": 6.139349275728681, "learning_rate": 1.112617827886165e-06, "loss": 17.0972, "step": 43219 }, { "epoch": 0.7900268704187764, "grad_norm": 5.610997552341211, "learning_rate": 1.1124316694691378e-06, "loss": 17.1965, "step": 43220 }, { "epoch": 0.7900451496152229, "grad_norm": 5.201299827901622, "learning_rate": 1.112245524677702e-06, "loss": 16.8182, "step": 43221 }, { "epoch": 0.7900634288116695, "grad_norm": 6.705297374715919, "learning_rate": 1.112059393512508e-06, "loss": 17.3112, "step": 43222 }, { "epoch": 0.790081708008116, "grad_norm": 6.439771913735911, "learning_rate": 1.1118732759742074e-06, "loss": 17.4786, "step": 43223 }, { "epoch": 0.7900999872045625, "grad_norm": 5.947222903418565, "learning_rate": 1.1116871720634548e-06, "loss": 17.2736, "step": 43224 }, { "epoch": 0.790118266401009, "grad_norm": 6.306878026248945, "learning_rate": 1.1115010817809007e-06, "loss": 17.1408, "step": 43225 }, { "epoch": 0.7901365455974555, "grad_norm": 6.238356454396183, "learning_rate": 1.111315005127197e-06, "loss": 17.289, "step": 43226 }, { "epoch": 0.7901548247939021, "grad_norm": 5.103360134479162, "learning_rate": 1.1111289421029986e-06, "loss": 16.8056, "step": 43227 }, { "epoch": 0.7901731039903486, "grad_norm": 7.434111609382675, "learning_rate": 1.1109428927089538e-06, "loss": 17.6599, "step": 43228 }, { "epoch": 0.7901913831867952, "grad_norm": 6.962772457807226, "learning_rate": 1.1107568569457167e-06, "loss": 17.3265, "step": 43229 }, { "epoch": 0.7902096623832416, "grad_norm": 5.743341311985463, "learning_rate": 1.110570834813941e-06, "loss": 17.1811, "step": 43230 }, { "epoch": 0.7902279415796881, "grad_norm": 6.762595266981715, "learning_rate": 1.110384826314277e-06, "loss": 17.1616, "step": 43231 }, { "epoch": 0.7902462207761347, "grad_norm": 5.108411003582306, "learning_rate": 1.110198831447375e-06, "loss": 17.0332, "step": 43232 }, { "epoch": 0.7902644999725812, "grad_norm": 6.5344096854795195, "learning_rate": 1.1100128502138902e-06, "loss": 17.3701, "step": 43233 }, { "epoch": 0.7902827791690278, "grad_norm": 5.701052895943508, "learning_rate": 1.1098268826144725e-06, "loss": 17.1632, "step": 43234 }, { "epoch": 0.7903010583654743, "grad_norm": 6.728460420313379, "learning_rate": 1.1096409286497727e-06, "loss": 17.686, "step": 43235 }, { "epoch": 0.7903193375619207, "grad_norm": 9.00556753361998, "learning_rate": 1.1094549883204441e-06, "loss": 17.6203, "step": 43236 }, { "epoch": 0.7903376167583673, "grad_norm": 4.666506009669226, "learning_rate": 1.1092690616271396e-06, "loss": 17.0261, "step": 43237 }, { "epoch": 0.7903558959548138, "grad_norm": 7.350283815009201, "learning_rate": 1.1090831485705078e-06, "loss": 17.3541, "step": 43238 }, { "epoch": 0.7903741751512604, "grad_norm": 6.730280045351615, "learning_rate": 1.1088972491512034e-06, "loss": 17.5791, "step": 43239 }, { "epoch": 0.7903924543477069, "grad_norm": 5.630072362654247, "learning_rate": 1.1087113633698748e-06, "loss": 16.994, "step": 43240 }, { "epoch": 0.7904107335441534, "grad_norm": 6.359082825786687, "learning_rate": 1.108525491227177e-06, "loss": 17.5131, "step": 43241 }, { "epoch": 0.7904290127406, "grad_norm": 6.021987546903946, "learning_rate": 1.1083396327237594e-06, "loss": 17.548, "step": 43242 }, { "epoch": 0.7904472919370464, "grad_norm": 7.658048598495461, "learning_rate": 1.108153787860272e-06, "loss": 17.7618, "step": 43243 }, { "epoch": 0.7904655711334929, "grad_norm": 5.904596638408614, "learning_rate": 1.1079679566373703e-06, "loss": 17.4474, "step": 43244 }, { "epoch": 0.7904838503299395, "grad_norm": 8.549162113545862, "learning_rate": 1.1077821390557008e-06, "loss": 18.1149, "step": 43245 }, { "epoch": 0.790502129526386, "grad_norm": 5.703759721389568, "learning_rate": 1.1075963351159174e-06, "loss": 17.0945, "step": 43246 }, { "epoch": 0.7905204087228326, "grad_norm": 6.137127735309409, "learning_rate": 1.107410544818673e-06, "loss": 17.3, "step": 43247 }, { "epoch": 0.7905386879192791, "grad_norm": 5.562421553120005, "learning_rate": 1.1072247681646158e-06, "loss": 16.918, "step": 43248 }, { "epoch": 0.7905569671157255, "grad_norm": 6.423291385548988, "learning_rate": 1.1070390051543977e-06, "loss": 17.191, "step": 43249 }, { "epoch": 0.7905752463121721, "grad_norm": 7.042535083168638, "learning_rate": 1.1068532557886708e-06, "loss": 17.732, "step": 43250 }, { "epoch": 0.7905935255086186, "grad_norm": 5.823123337660007, "learning_rate": 1.1066675200680843e-06, "loss": 17.1034, "step": 43251 }, { "epoch": 0.7906118047050652, "grad_norm": 5.760401417560913, "learning_rate": 1.1064817979932912e-06, "loss": 17.1872, "step": 43252 }, { "epoch": 0.7906300839015117, "grad_norm": 7.209846282612204, "learning_rate": 1.1062960895649422e-06, "loss": 17.6932, "step": 43253 }, { "epoch": 0.7906483630979582, "grad_norm": 8.046078197139208, "learning_rate": 1.106110394783686e-06, "loss": 18.0898, "step": 43254 }, { "epoch": 0.7906666422944048, "grad_norm": 8.883319287173101, "learning_rate": 1.1059247136501745e-06, "loss": 18.4092, "step": 43255 }, { "epoch": 0.7906849214908512, "grad_norm": 5.756582526661166, "learning_rate": 1.1057390461650607e-06, "loss": 17.1066, "step": 43256 }, { "epoch": 0.7907032006872978, "grad_norm": 5.98160067644304, "learning_rate": 1.1055533923289924e-06, "loss": 17.4697, "step": 43257 }, { "epoch": 0.7907214798837443, "grad_norm": 5.205468749090678, "learning_rate": 1.1053677521426225e-06, "loss": 17.0156, "step": 43258 }, { "epoch": 0.7907397590801908, "grad_norm": 6.448334332726509, "learning_rate": 1.1051821256066008e-06, "loss": 17.3661, "step": 43259 }, { "epoch": 0.7907580382766374, "grad_norm": 6.6858979301092925, "learning_rate": 1.1049965127215766e-06, "loss": 17.4476, "step": 43260 }, { "epoch": 0.7907763174730839, "grad_norm": 5.196173964150028, "learning_rate": 1.104810913488203e-06, "loss": 16.8956, "step": 43261 }, { "epoch": 0.7907945966695304, "grad_norm": 5.494870232369505, "learning_rate": 1.104625327907129e-06, "loss": 17.2009, "step": 43262 }, { "epoch": 0.7908128758659769, "grad_norm": 7.0028525973092, "learning_rate": 1.1044397559790037e-06, "loss": 17.5993, "step": 43263 }, { "epoch": 0.7908311550624234, "grad_norm": 6.54210035304567, "learning_rate": 1.1042541977044791e-06, "loss": 17.4739, "step": 43264 }, { "epoch": 0.79084943425887, "grad_norm": 8.807428592349309, "learning_rate": 1.104068653084207e-06, "loss": 18.0824, "step": 43265 }, { "epoch": 0.7908677134553165, "grad_norm": 5.953714791058512, "learning_rate": 1.1038831221188346e-06, "loss": 17.3028, "step": 43266 }, { "epoch": 0.7908859926517631, "grad_norm": 8.168155414103135, "learning_rate": 1.1036976048090153e-06, "loss": 17.8324, "step": 43267 }, { "epoch": 0.7909042718482096, "grad_norm": 7.357522668317413, "learning_rate": 1.103512101155396e-06, "loss": 17.8668, "step": 43268 }, { "epoch": 0.790922551044656, "grad_norm": 11.068090796163087, "learning_rate": 1.1033266111586306e-06, "loss": 17.6987, "step": 43269 }, { "epoch": 0.7909408302411026, "grad_norm": 6.723794403383254, "learning_rate": 1.1031411348193672e-06, "loss": 17.735, "step": 43270 }, { "epoch": 0.7909591094375491, "grad_norm": 7.376509439064291, "learning_rate": 1.1029556721382544e-06, "loss": 17.6107, "step": 43271 }, { "epoch": 0.7909773886339957, "grad_norm": 6.459539159397829, "learning_rate": 1.1027702231159454e-06, "loss": 17.592, "step": 43272 }, { "epoch": 0.7909956678304422, "grad_norm": 6.112009296909218, "learning_rate": 1.102584787753087e-06, "loss": 17.2677, "step": 43273 }, { "epoch": 0.7910139470268887, "grad_norm": 6.875271129842209, "learning_rate": 1.1023993660503308e-06, "loss": 17.3834, "step": 43274 }, { "epoch": 0.7910322262233352, "grad_norm": 6.295726968497311, "learning_rate": 1.102213958008328e-06, "loss": 17.4068, "step": 43275 }, { "epoch": 0.7910505054197817, "grad_norm": 5.269450146145529, "learning_rate": 1.1020285636277268e-06, "loss": 17.0043, "step": 43276 }, { "epoch": 0.7910687846162283, "grad_norm": 7.307604515686793, "learning_rate": 1.1018431829091763e-06, "loss": 17.5581, "step": 43277 }, { "epoch": 0.7910870638126748, "grad_norm": 6.823505666876487, "learning_rate": 1.101657815853328e-06, "loss": 17.4919, "step": 43278 }, { "epoch": 0.7911053430091213, "grad_norm": 5.977969276014728, "learning_rate": 1.1014724624608315e-06, "loss": 17.3623, "step": 43279 }, { "epoch": 0.7911236222055679, "grad_norm": 7.314531253264532, "learning_rate": 1.1012871227323335e-06, "loss": 17.6509, "step": 43280 }, { "epoch": 0.7911419014020143, "grad_norm": 6.20799172304964, "learning_rate": 1.1011017966684878e-06, "loss": 17.278, "step": 43281 }, { "epoch": 0.7911601805984609, "grad_norm": 5.660053925077161, "learning_rate": 1.1009164842699405e-06, "loss": 16.9954, "step": 43282 }, { "epoch": 0.7911784597949074, "grad_norm": 7.652467165306108, "learning_rate": 1.1007311855373425e-06, "loss": 18.1249, "step": 43283 }, { "epoch": 0.7911967389913539, "grad_norm": 6.458375503769788, "learning_rate": 1.1005459004713448e-06, "loss": 17.0972, "step": 43284 }, { "epoch": 0.7912150181878005, "grad_norm": 7.760190946882975, "learning_rate": 1.1003606290725938e-06, "loss": 17.979, "step": 43285 }, { "epoch": 0.791233297384247, "grad_norm": 5.121070021912456, "learning_rate": 1.1001753713417418e-06, "loss": 17.0369, "step": 43286 }, { "epoch": 0.7912515765806936, "grad_norm": 6.934873222090018, "learning_rate": 1.0999901272794367e-06, "loss": 17.7656, "step": 43287 }, { "epoch": 0.79126985577714, "grad_norm": 6.131281619159718, "learning_rate": 1.0998048968863268e-06, "loss": 17.5178, "step": 43288 }, { "epoch": 0.7912881349735865, "grad_norm": 6.1528429573388195, "learning_rate": 1.0996196801630632e-06, "loss": 17.4881, "step": 43289 }, { "epoch": 0.7913064141700331, "grad_norm": 7.209718274733655, "learning_rate": 1.0994344771102943e-06, "loss": 17.5166, "step": 43290 }, { "epoch": 0.7913246933664796, "grad_norm": 6.847437274692617, "learning_rate": 1.0992492877286677e-06, "loss": 17.0908, "step": 43291 }, { "epoch": 0.7913429725629262, "grad_norm": 6.852510335395066, "learning_rate": 1.0990641120188338e-06, "loss": 17.4916, "step": 43292 }, { "epoch": 0.7913612517593727, "grad_norm": 8.814679988480902, "learning_rate": 1.0988789499814435e-06, "loss": 18.0672, "step": 43293 }, { "epoch": 0.7913795309558191, "grad_norm": 5.939986600128583, "learning_rate": 1.0986938016171417e-06, "loss": 17.1041, "step": 43294 }, { "epoch": 0.7913978101522657, "grad_norm": 5.848446003469064, "learning_rate": 1.0985086669265815e-06, "loss": 17.3584, "step": 43295 }, { "epoch": 0.7914160893487122, "grad_norm": 6.956736693730146, "learning_rate": 1.0983235459104098e-06, "loss": 17.7574, "step": 43296 }, { "epoch": 0.7914343685451588, "grad_norm": 4.7269405219766965, "learning_rate": 1.0981384385692745e-06, "loss": 16.78, "step": 43297 }, { "epoch": 0.7914526477416053, "grad_norm": 6.569531059929584, "learning_rate": 1.097953344903826e-06, "loss": 17.2263, "step": 43298 }, { "epoch": 0.7914709269380518, "grad_norm": 8.160868718744306, "learning_rate": 1.0977682649147115e-06, "loss": 17.9723, "step": 43299 }, { "epoch": 0.7914892061344984, "grad_norm": 7.048085261187909, "learning_rate": 1.0975831986025808e-06, "loss": 17.7552, "step": 43300 }, { "epoch": 0.7915074853309448, "grad_norm": 4.674466142567083, "learning_rate": 1.0973981459680833e-06, "loss": 16.8726, "step": 43301 }, { "epoch": 0.7915257645273914, "grad_norm": 6.946606535977379, "learning_rate": 1.0972131070118657e-06, "loss": 17.3313, "step": 43302 }, { "epoch": 0.7915440437238379, "grad_norm": 5.733583672488663, "learning_rate": 1.0970280817345785e-06, "loss": 17.141, "step": 43303 }, { "epoch": 0.7915623229202844, "grad_norm": 6.454157969940714, "learning_rate": 1.0968430701368693e-06, "loss": 17.3565, "step": 43304 }, { "epoch": 0.791580602116731, "grad_norm": 6.39248333095071, "learning_rate": 1.0966580722193853e-06, "loss": 17.6757, "step": 43305 }, { "epoch": 0.7915988813131775, "grad_norm": 4.659251319422147, "learning_rate": 1.0964730879827766e-06, "loss": 16.8463, "step": 43306 }, { "epoch": 0.791617160509624, "grad_norm": 7.78577580906963, "learning_rate": 1.096288117427692e-06, "loss": 17.7212, "step": 43307 }, { "epoch": 0.7916354397060705, "grad_norm": 5.4138779672905715, "learning_rate": 1.0961031605547774e-06, "loss": 16.8656, "step": 43308 }, { "epoch": 0.791653718902517, "grad_norm": 5.352001077526321, "learning_rate": 1.0959182173646814e-06, "loss": 16.9358, "step": 43309 }, { "epoch": 0.7916719980989636, "grad_norm": 5.3264156120336486, "learning_rate": 1.0957332878580557e-06, "loss": 16.8575, "step": 43310 }, { "epoch": 0.7916902772954101, "grad_norm": 6.980407632736939, "learning_rate": 1.0955483720355441e-06, "loss": 17.5839, "step": 43311 }, { "epoch": 0.7917085564918566, "grad_norm": 5.910042200033018, "learning_rate": 1.0953634698977982e-06, "loss": 17.0875, "step": 43312 }, { "epoch": 0.7917268356883032, "grad_norm": 6.929371029372163, "learning_rate": 1.095178581445463e-06, "loss": 17.5372, "step": 43313 }, { "epoch": 0.7917451148847496, "grad_norm": 7.228903307661713, "learning_rate": 1.09499370667919e-06, "loss": 17.7439, "step": 43314 }, { "epoch": 0.7917633940811962, "grad_norm": 7.614146514959524, "learning_rate": 1.0948088455996242e-06, "loss": 17.5366, "step": 43315 }, { "epoch": 0.7917816732776427, "grad_norm": 6.91063771972759, "learning_rate": 1.0946239982074141e-06, "loss": 17.6166, "step": 43316 }, { "epoch": 0.7917999524740892, "grad_norm": 6.461065893839471, "learning_rate": 1.0944391645032088e-06, "loss": 17.2781, "step": 43317 }, { "epoch": 0.7918182316705358, "grad_norm": 6.7231741777505265, "learning_rate": 1.094254344487654e-06, "loss": 17.568, "step": 43318 }, { "epoch": 0.7918365108669823, "grad_norm": 6.107398328286619, "learning_rate": 1.0940695381613992e-06, "loss": 17.3844, "step": 43319 }, { "epoch": 0.7918547900634288, "grad_norm": 14.5953987088337, "learning_rate": 1.0938847455250923e-06, "loss": 17.3832, "step": 43320 }, { "epoch": 0.7918730692598753, "grad_norm": 6.094135428773177, "learning_rate": 1.0936999665793812e-06, "loss": 17.0913, "step": 43321 }, { "epoch": 0.7918913484563218, "grad_norm": 5.0890573749041845, "learning_rate": 1.0935152013249112e-06, "loss": 16.9851, "step": 43322 }, { "epoch": 0.7919096276527684, "grad_norm": 6.5990900801070955, "learning_rate": 1.0933304497623326e-06, "loss": 17.5294, "step": 43323 }, { "epoch": 0.7919279068492149, "grad_norm": 5.766460164890393, "learning_rate": 1.0931457118922917e-06, "loss": 17.4127, "step": 43324 }, { "epoch": 0.7919461860456615, "grad_norm": 6.807053158241241, "learning_rate": 1.0929609877154345e-06, "loss": 17.9143, "step": 43325 }, { "epoch": 0.791964465242108, "grad_norm": 6.516762172829275, "learning_rate": 1.0927762772324118e-06, "loss": 17.4987, "step": 43326 }, { "epoch": 0.7919827444385544, "grad_norm": 6.643000726596912, "learning_rate": 1.092591580443868e-06, "loss": 17.2994, "step": 43327 }, { "epoch": 0.792001023635001, "grad_norm": 5.592715622869668, "learning_rate": 1.0924068973504515e-06, "loss": 17.0451, "step": 43328 }, { "epoch": 0.7920193028314475, "grad_norm": 6.569555145291934, "learning_rate": 1.0922222279528106e-06, "loss": 17.7587, "step": 43329 }, { "epoch": 0.7920375820278941, "grad_norm": 9.429517492452103, "learning_rate": 1.0920375722515908e-06, "loss": 17.7977, "step": 43330 }, { "epoch": 0.7920558612243406, "grad_norm": 5.358142440396465, "learning_rate": 1.0918529302474412e-06, "loss": 17.2264, "step": 43331 }, { "epoch": 0.792074140420787, "grad_norm": 6.7154510682895, "learning_rate": 1.0916683019410085e-06, "loss": 17.5638, "step": 43332 }, { "epoch": 0.7920924196172336, "grad_norm": 7.390256443686448, "learning_rate": 1.0914836873329376e-06, "loss": 18.0512, "step": 43333 }, { "epoch": 0.7921106988136801, "grad_norm": 5.171428829964296, "learning_rate": 1.0912990864238787e-06, "loss": 16.7811, "step": 43334 }, { "epoch": 0.7921289780101267, "grad_norm": 5.651934831630447, "learning_rate": 1.0911144992144768e-06, "loss": 16.7588, "step": 43335 }, { "epoch": 0.7921472572065732, "grad_norm": 6.556326085812731, "learning_rate": 1.0909299257053786e-06, "loss": 17.4837, "step": 43336 }, { "epoch": 0.7921655364030197, "grad_norm": 5.787928708599543, "learning_rate": 1.0907453658972316e-06, "loss": 17.2119, "step": 43337 }, { "epoch": 0.7921838155994663, "grad_norm": 19.835717190743434, "learning_rate": 1.0905608197906841e-06, "loss": 17.5035, "step": 43338 }, { "epoch": 0.7922020947959127, "grad_norm": 7.104515402330244, "learning_rate": 1.0903762873863805e-06, "loss": 17.8615, "step": 43339 }, { "epoch": 0.7922203739923593, "grad_norm": 5.954926806978555, "learning_rate": 1.09019176868497e-06, "loss": 17.0233, "step": 43340 }, { "epoch": 0.7922386531888058, "grad_norm": 7.31837780519766, "learning_rate": 1.0900072636870984e-06, "loss": 17.9458, "step": 43341 }, { "epoch": 0.7922569323852523, "grad_norm": 7.008996003770025, "learning_rate": 1.0898227723934101e-06, "loss": 17.5432, "step": 43342 }, { "epoch": 0.7922752115816989, "grad_norm": 5.302166371502666, "learning_rate": 1.0896382948045552e-06, "loss": 16.9214, "step": 43343 }, { "epoch": 0.7922934907781454, "grad_norm": 7.17472738394909, "learning_rate": 1.0894538309211777e-06, "loss": 17.5036, "step": 43344 }, { "epoch": 0.792311769974592, "grad_norm": 5.837035469185958, "learning_rate": 1.0892693807439264e-06, "loss": 17.1085, "step": 43345 }, { "epoch": 0.7923300491710384, "grad_norm": 5.494372503745298, "learning_rate": 1.0890849442734452e-06, "loss": 17.0461, "step": 43346 }, { "epoch": 0.7923483283674849, "grad_norm": 6.7106534981868275, "learning_rate": 1.088900521510382e-06, "loss": 17.6266, "step": 43347 }, { "epoch": 0.7923666075639315, "grad_norm": 6.953288349152279, "learning_rate": 1.088716112455384e-06, "loss": 17.4634, "step": 43348 }, { "epoch": 0.792384886760378, "grad_norm": 7.9619085526637114, "learning_rate": 1.088531717109097e-06, "loss": 17.7235, "step": 43349 }, { "epoch": 0.7924031659568246, "grad_norm": 6.072031811952299, "learning_rate": 1.0883473354721657e-06, "loss": 17.287, "step": 43350 }, { "epoch": 0.7924214451532711, "grad_norm": 6.037142516314785, "learning_rate": 1.0881629675452387e-06, "loss": 17.3113, "step": 43351 }, { "epoch": 0.7924397243497175, "grad_norm": 5.477341989088521, "learning_rate": 1.087978613328961e-06, "loss": 17.0657, "step": 43352 }, { "epoch": 0.7924580035461641, "grad_norm": 7.004402519593, "learning_rate": 1.0877942728239777e-06, "loss": 17.3557, "step": 43353 }, { "epoch": 0.7924762827426106, "grad_norm": 5.232977377134682, "learning_rate": 1.0876099460309369e-06, "loss": 17.142, "step": 43354 }, { "epoch": 0.7924945619390572, "grad_norm": 6.8991914072462475, "learning_rate": 1.0874256329504823e-06, "loss": 17.6214, "step": 43355 }, { "epoch": 0.7925128411355037, "grad_norm": 8.399407689008859, "learning_rate": 1.087241333583262e-06, "loss": 17.4631, "step": 43356 }, { "epoch": 0.7925311203319502, "grad_norm": 6.20387175692064, "learning_rate": 1.087057047929922e-06, "loss": 17.4407, "step": 43357 }, { "epoch": 0.7925493995283968, "grad_norm": 6.408240423762229, "learning_rate": 1.086872775991108e-06, "loss": 17.5573, "step": 43358 }, { "epoch": 0.7925676787248432, "grad_norm": 7.18558826071546, "learning_rate": 1.0866885177674636e-06, "loss": 17.6326, "step": 43359 }, { "epoch": 0.7925859579212898, "grad_norm": 6.089001936373365, "learning_rate": 1.086504273259638e-06, "loss": 17.3301, "step": 43360 }, { "epoch": 0.7926042371177363, "grad_norm": 6.701831409438909, "learning_rate": 1.086320042468274e-06, "loss": 17.6252, "step": 43361 }, { "epoch": 0.7926225163141828, "grad_norm": 5.908462787151468, "learning_rate": 1.0861358253940197e-06, "loss": 17.1545, "step": 43362 }, { "epoch": 0.7926407955106294, "grad_norm": 6.321145044606398, "learning_rate": 1.0859516220375188e-06, "loss": 17.5354, "step": 43363 }, { "epoch": 0.7926590747070759, "grad_norm": 8.350930900437396, "learning_rate": 1.0857674323994189e-06, "loss": 17.621, "step": 43364 }, { "epoch": 0.7926773539035225, "grad_norm": 5.629038690937979, "learning_rate": 1.085583256480363e-06, "loss": 17.3026, "step": 43365 }, { "epoch": 0.7926956330999689, "grad_norm": 5.2264320366454315, "learning_rate": 1.0853990942809995e-06, "loss": 17.2065, "step": 43366 }, { "epoch": 0.7927139122964154, "grad_norm": 5.016641173336171, "learning_rate": 1.0852149458019711e-06, "loss": 16.8441, "step": 43367 }, { "epoch": 0.792732191492862, "grad_norm": 7.119593844114406, "learning_rate": 1.0850308110439261e-06, "loss": 17.7616, "step": 43368 }, { "epoch": 0.7927504706893085, "grad_norm": 6.971451648864296, "learning_rate": 1.0848466900075082e-06, "loss": 17.2095, "step": 43369 }, { "epoch": 0.7927687498857551, "grad_norm": 7.083850475240695, "learning_rate": 1.084662582693362e-06, "loss": 17.8172, "step": 43370 }, { "epoch": 0.7927870290822016, "grad_norm": 6.582344853422464, "learning_rate": 1.0844784891021348e-06, "loss": 17.5508, "step": 43371 }, { "epoch": 0.792805308278648, "grad_norm": 5.523893594531339, "learning_rate": 1.0842944092344692e-06, "loss": 17.2706, "step": 43372 }, { "epoch": 0.7928235874750946, "grad_norm": 6.493212344303961, "learning_rate": 1.084110343091012e-06, "loss": 17.8363, "step": 43373 }, { "epoch": 0.7928418666715411, "grad_norm": 6.95207113602628, "learning_rate": 1.0839262906724097e-06, "loss": 17.3859, "step": 43374 }, { "epoch": 0.7928601458679877, "grad_norm": 5.933175599036522, "learning_rate": 1.0837422519793045e-06, "loss": 17.2051, "step": 43375 }, { "epoch": 0.7928784250644342, "grad_norm": 5.4936196483358355, "learning_rate": 1.083558227012344e-06, "loss": 17.1396, "step": 43376 }, { "epoch": 0.7928967042608807, "grad_norm": 4.907526158004615, "learning_rate": 1.0833742157721726e-06, "loss": 17.0651, "step": 43377 }, { "epoch": 0.7929149834573272, "grad_norm": 6.335147542518546, "learning_rate": 1.0831902182594328e-06, "loss": 17.5461, "step": 43378 }, { "epoch": 0.7929332626537737, "grad_norm": 6.228042853797178, "learning_rate": 1.0830062344747732e-06, "loss": 17.3186, "step": 43379 }, { "epoch": 0.7929515418502202, "grad_norm": 6.488743717063098, "learning_rate": 1.0828222644188363e-06, "loss": 17.007, "step": 43380 }, { "epoch": 0.7929698210466668, "grad_norm": 7.0105129710034655, "learning_rate": 1.0826383080922665e-06, "loss": 17.3922, "step": 43381 }, { "epoch": 0.7929881002431133, "grad_norm": 6.968564434229421, "learning_rate": 1.08245436549571e-06, "loss": 17.606, "step": 43382 }, { "epoch": 0.7930063794395599, "grad_norm": 6.368194296465518, "learning_rate": 1.0822704366298115e-06, "loss": 17.1184, "step": 43383 }, { "epoch": 0.7930246586360064, "grad_norm": 7.19375055996239, "learning_rate": 1.0820865214952141e-06, "loss": 17.2841, "step": 43384 }, { "epoch": 0.7930429378324528, "grad_norm": 7.305443016970895, "learning_rate": 1.0819026200925648e-06, "loss": 18.3673, "step": 43385 }, { "epoch": 0.7930612170288994, "grad_norm": 5.506072384056579, "learning_rate": 1.0817187324225065e-06, "loss": 17.0867, "step": 43386 }, { "epoch": 0.7930794962253459, "grad_norm": 6.743746178345695, "learning_rate": 1.0815348584856834e-06, "loss": 17.4947, "step": 43387 }, { "epoch": 0.7930977754217925, "grad_norm": 5.143854682681349, "learning_rate": 1.0813509982827413e-06, "loss": 17.1594, "step": 43388 }, { "epoch": 0.793116054618239, "grad_norm": 6.227590408379362, "learning_rate": 1.081167151814323e-06, "loss": 17.2511, "step": 43389 }, { "epoch": 0.7931343338146855, "grad_norm": 6.708559301661521, "learning_rate": 1.0809833190810749e-06, "loss": 17.3697, "step": 43390 }, { "epoch": 0.793152613011132, "grad_norm": 5.895075865473705, "learning_rate": 1.0807995000836386e-06, "loss": 17.0296, "step": 43391 }, { "epoch": 0.7931708922075785, "grad_norm": 5.3109881432114525, "learning_rate": 1.0806156948226605e-06, "loss": 16.9396, "step": 43392 }, { "epoch": 0.7931891714040251, "grad_norm": 7.119432431993164, "learning_rate": 1.0804319032987854e-06, "loss": 17.5547, "step": 43393 }, { "epoch": 0.7932074506004716, "grad_norm": 5.707723207303185, "learning_rate": 1.0802481255126561e-06, "loss": 17.0693, "step": 43394 }, { "epoch": 0.7932257297969181, "grad_norm": 5.742524667531737, "learning_rate": 1.0800643614649159e-06, "loss": 17.2729, "step": 43395 }, { "epoch": 0.7932440089933647, "grad_norm": 5.475053801830824, "learning_rate": 1.079880611156211e-06, "loss": 16.8945, "step": 43396 }, { "epoch": 0.7932622881898111, "grad_norm": 6.591356759872551, "learning_rate": 1.079696874587185e-06, "loss": 17.6654, "step": 43397 }, { "epoch": 0.7932805673862577, "grad_norm": 6.568878752130875, "learning_rate": 1.0795131517584794e-06, "loss": 17.6467, "step": 43398 }, { "epoch": 0.7932988465827042, "grad_norm": 5.735958339352913, "learning_rate": 1.0793294426707413e-06, "loss": 17.0562, "step": 43399 }, { "epoch": 0.7933171257791507, "grad_norm": 5.239736126557752, "learning_rate": 1.079145747324612e-06, "loss": 17.0471, "step": 43400 }, { "epoch": 0.7933354049755973, "grad_norm": 6.325946464983996, "learning_rate": 1.0789620657207366e-06, "loss": 17.2801, "step": 43401 }, { "epoch": 0.7933536841720438, "grad_norm": 4.855562687568274, "learning_rate": 1.0787783978597605e-06, "loss": 16.9348, "step": 43402 }, { "epoch": 0.7933719633684904, "grad_norm": 6.133524393231906, "learning_rate": 1.0785947437423261e-06, "loss": 17.2835, "step": 43403 }, { "epoch": 0.7933902425649368, "grad_norm": 6.364817359508005, "learning_rate": 1.0784111033690748e-06, "loss": 17.1557, "step": 43404 }, { "epoch": 0.7934085217613833, "grad_norm": 6.345915390080849, "learning_rate": 1.0782274767406541e-06, "loss": 17.7904, "step": 43405 }, { "epoch": 0.7934268009578299, "grad_norm": 5.866504874658882, "learning_rate": 1.0780438638577046e-06, "loss": 17.2887, "step": 43406 }, { "epoch": 0.7934450801542764, "grad_norm": 6.195476574316058, "learning_rate": 1.077860264720872e-06, "loss": 17.2798, "step": 43407 }, { "epoch": 0.793463359350723, "grad_norm": 5.5041140707814185, "learning_rate": 1.0776766793307986e-06, "loss": 17.2954, "step": 43408 }, { "epoch": 0.7934816385471695, "grad_norm": 6.326407170698243, "learning_rate": 1.0774931076881274e-06, "loss": 17.4101, "step": 43409 }, { "epoch": 0.793499917743616, "grad_norm": 5.754081754243125, "learning_rate": 1.0773095497935016e-06, "loss": 17.1511, "step": 43410 }, { "epoch": 0.7935181969400625, "grad_norm": 5.498948401770603, "learning_rate": 1.0771260056475675e-06, "loss": 16.9264, "step": 43411 }, { "epoch": 0.793536476136509, "grad_norm": 7.0491484215209494, "learning_rate": 1.0769424752509649e-06, "loss": 17.5965, "step": 43412 }, { "epoch": 0.7935547553329556, "grad_norm": 6.830191830698029, "learning_rate": 1.0767589586043398e-06, "loss": 17.4255, "step": 43413 }, { "epoch": 0.7935730345294021, "grad_norm": 5.783074760393052, "learning_rate": 1.0765754557083336e-06, "loss": 17.3782, "step": 43414 }, { "epoch": 0.7935913137258486, "grad_norm": 6.136946521269728, "learning_rate": 1.076391966563589e-06, "loss": 17.2175, "step": 43415 }, { "epoch": 0.7936095929222952, "grad_norm": 4.914039811443715, "learning_rate": 1.0762084911707516e-06, "loss": 16.7548, "step": 43416 }, { "epoch": 0.7936278721187416, "grad_norm": 7.07066406493944, "learning_rate": 1.076025029530463e-06, "loss": 17.7948, "step": 43417 }, { "epoch": 0.7936461513151882, "grad_norm": 5.8729250968031605, "learning_rate": 1.0758415816433648e-06, "loss": 17.3231, "step": 43418 }, { "epoch": 0.7936644305116347, "grad_norm": 6.4387424983220445, "learning_rate": 1.0756581475101009e-06, "loss": 17.5544, "step": 43419 }, { "epoch": 0.7936827097080812, "grad_norm": 6.914560761473948, "learning_rate": 1.075474727131315e-06, "loss": 17.6773, "step": 43420 }, { "epoch": 0.7937009889045278, "grad_norm": 4.391966332969039, "learning_rate": 1.075291320507651e-06, "loss": 16.7339, "step": 43421 }, { "epoch": 0.7937192681009743, "grad_norm": 6.620621958717742, "learning_rate": 1.0751079276397502e-06, "loss": 17.1897, "step": 43422 }, { "epoch": 0.7937375472974209, "grad_norm": 4.999090692126297, "learning_rate": 1.074924548528254e-06, "loss": 17.0373, "step": 43423 }, { "epoch": 0.7937558264938673, "grad_norm": 6.054473504186626, "learning_rate": 1.0747411831738087e-06, "loss": 17.456, "step": 43424 }, { "epoch": 0.7937741056903138, "grad_norm": 8.899274553826688, "learning_rate": 1.0745578315770544e-06, "loss": 18.0275, "step": 43425 }, { "epoch": 0.7937923848867604, "grad_norm": 6.056360711331136, "learning_rate": 1.0743744937386324e-06, "loss": 17.1648, "step": 43426 }, { "epoch": 0.7938106640832069, "grad_norm": 6.746846499793492, "learning_rate": 1.0741911696591878e-06, "loss": 17.2749, "step": 43427 }, { "epoch": 0.7938289432796535, "grad_norm": 5.923066058254632, "learning_rate": 1.0740078593393638e-06, "loss": 17.4485, "step": 43428 }, { "epoch": 0.7938472224761, "grad_norm": 7.071733205906999, "learning_rate": 1.0738245627798005e-06, "loss": 17.5124, "step": 43429 }, { "epoch": 0.7938655016725464, "grad_norm": 5.364239259272481, "learning_rate": 1.0736412799811425e-06, "loss": 17.0457, "step": 43430 }, { "epoch": 0.793883780868993, "grad_norm": 6.775666629631455, "learning_rate": 1.0734580109440306e-06, "loss": 17.5602, "step": 43431 }, { "epoch": 0.7939020600654395, "grad_norm": 6.436130540765, "learning_rate": 1.073274755669107e-06, "loss": 17.5642, "step": 43432 }, { "epoch": 0.7939203392618861, "grad_norm": 6.65575935842331, "learning_rate": 1.0730915141570158e-06, "loss": 17.0824, "step": 43433 }, { "epoch": 0.7939386184583326, "grad_norm": 6.056322518809798, "learning_rate": 1.072908286408398e-06, "loss": 17.3435, "step": 43434 }, { "epoch": 0.7939568976547791, "grad_norm": 6.516419426388489, "learning_rate": 1.0727250724238947e-06, "loss": 17.3545, "step": 43435 }, { "epoch": 0.7939751768512257, "grad_norm": 5.713357627301494, "learning_rate": 1.0725418722041487e-06, "loss": 17.1768, "step": 43436 }, { "epoch": 0.7939934560476721, "grad_norm": 5.470000758253357, "learning_rate": 1.0723586857498031e-06, "loss": 17.3924, "step": 43437 }, { "epoch": 0.7940117352441187, "grad_norm": 6.81894805917376, "learning_rate": 1.0721755130615008e-06, "loss": 17.4067, "step": 43438 }, { "epoch": 0.7940300144405652, "grad_norm": 5.833320685852123, "learning_rate": 1.0719923541398825e-06, "loss": 17.4814, "step": 43439 }, { "epoch": 0.7940482936370117, "grad_norm": 4.955025131882884, "learning_rate": 1.0718092089855887e-06, "loss": 16.9711, "step": 43440 }, { "epoch": 0.7940665728334583, "grad_norm": 5.098019571917756, "learning_rate": 1.0716260775992642e-06, "loss": 16.8423, "step": 43441 }, { "epoch": 0.7940848520299048, "grad_norm": 6.320640256966096, "learning_rate": 1.071442959981549e-06, "loss": 17.0226, "step": 43442 }, { "epoch": 0.7941031312263513, "grad_norm": 6.898700182092394, "learning_rate": 1.0712598561330844e-06, "loss": 17.7501, "step": 43443 }, { "epoch": 0.7941214104227978, "grad_norm": 5.572063497202099, "learning_rate": 1.0710767660545146e-06, "loss": 17.0267, "step": 43444 }, { "epoch": 0.7941396896192443, "grad_norm": 6.028876885298275, "learning_rate": 1.0708936897464777e-06, "loss": 17.2054, "step": 43445 }, { "epoch": 0.7941579688156909, "grad_norm": 6.085333650290835, "learning_rate": 1.0707106272096179e-06, "loss": 17.0579, "step": 43446 }, { "epoch": 0.7941762480121374, "grad_norm": 5.173378198086574, "learning_rate": 1.0705275784445778e-06, "loss": 16.8115, "step": 43447 }, { "epoch": 0.7941945272085839, "grad_norm": 6.292537632725427, "learning_rate": 1.0703445434519976e-06, "loss": 17.3829, "step": 43448 }, { "epoch": 0.7942128064050304, "grad_norm": 5.592114799248138, "learning_rate": 1.0701615222325174e-06, "loss": 17.3506, "step": 43449 }, { "epoch": 0.7942310856014769, "grad_norm": 6.173554301415412, "learning_rate": 1.0699785147867813e-06, "loss": 17.2354, "step": 43450 }, { "epoch": 0.7942493647979235, "grad_norm": 5.5523442775783565, "learning_rate": 1.0697955211154282e-06, "loss": 17.4369, "step": 43451 }, { "epoch": 0.79426764399437, "grad_norm": 6.608103821742027, "learning_rate": 1.0696125412191022e-06, "loss": 17.6218, "step": 43452 }, { "epoch": 0.7942859231908165, "grad_norm": 7.130068102939481, "learning_rate": 1.069429575098443e-06, "loss": 17.6163, "step": 43453 }, { "epoch": 0.7943042023872631, "grad_norm": 5.931635581759014, "learning_rate": 1.069246622754091e-06, "loss": 16.975, "step": 43454 }, { "epoch": 0.7943224815837095, "grad_norm": 6.393971873986066, "learning_rate": 1.069063684186688e-06, "loss": 17.157, "step": 43455 }, { "epoch": 0.7943407607801561, "grad_norm": 6.103849062759465, "learning_rate": 1.0688807593968769e-06, "loss": 16.9626, "step": 43456 }, { "epoch": 0.7943590399766026, "grad_norm": 7.183244110307049, "learning_rate": 1.0686978483852971e-06, "loss": 17.629, "step": 43457 }, { "epoch": 0.7943773191730491, "grad_norm": 5.921387701972833, "learning_rate": 1.068514951152591e-06, "loss": 17.3884, "step": 43458 }, { "epoch": 0.7943955983694957, "grad_norm": 5.744165146029434, "learning_rate": 1.068332067699399e-06, "loss": 17.4481, "step": 43459 }, { "epoch": 0.7944138775659422, "grad_norm": 6.169784562288571, "learning_rate": 1.0681491980263602e-06, "loss": 17.1416, "step": 43460 }, { "epoch": 0.7944321567623888, "grad_norm": 6.293790248713421, "learning_rate": 1.067966342134119e-06, "loss": 17.3357, "step": 43461 }, { "epoch": 0.7944504359588352, "grad_norm": 5.881597596402412, "learning_rate": 1.0677835000233144e-06, "loss": 17.2827, "step": 43462 }, { "epoch": 0.7944687151552817, "grad_norm": 6.965197218067335, "learning_rate": 1.0676006716945864e-06, "loss": 17.7814, "step": 43463 }, { "epoch": 0.7944869943517283, "grad_norm": 7.337371677028744, "learning_rate": 1.0674178571485766e-06, "loss": 18.0569, "step": 43464 }, { "epoch": 0.7945052735481748, "grad_norm": 6.413655094623755, "learning_rate": 1.0672350563859273e-06, "loss": 17.5164, "step": 43465 }, { "epoch": 0.7945235527446214, "grad_norm": 6.4917901130636455, "learning_rate": 1.0670522694072766e-06, "loss": 17.2357, "step": 43466 }, { "epoch": 0.7945418319410679, "grad_norm": 7.901403602860654, "learning_rate": 1.0668694962132674e-06, "loss": 18.5526, "step": 43467 }, { "epoch": 0.7945601111375143, "grad_norm": 5.31941932051631, "learning_rate": 1.066686736804538e-06, "loss": 17.0553, "step": 43468 }, { "epoch": 0.7945783903339609, "grad_norm": 6.470050954068798, "learning_rate": 1.066503991181732e-06, "loss": 17.5781, "step": 43469 }, { "epoch": 0.7945966695304074, "grad_norm": 5.966896281128391, "learning_rate": 1.066321259345488e-06, "loss": 17.3243, "step": 43470 }, { "epoch": 0.794614948726854, "grad_norm": 5.023223626063593, "learning_rate": 1.0661385412964454e-06, "loss": 16.8334, "step": 43471 }, { "epoch": 0.7946332279233005, "grad_norm": 6.6144917943893935, "learning_rate": 1.0659558370352468e-06, "loss": 17.2221, "step": 43472 }, { "epoch": 0.794651507119747, "grad_norm": 6.369082108349194, "learning_rate": 1.0657731465625305e-06, "loss": 17.5451, "step": 43473 }, { "epoch": 0.7946697863161936, "grad_norm": 8.944374593406753, "learning_rate": 1.0655904698789382e-06, "loss": 17.8945, "step": 43474 }, { "epoch": 0.79468806551264, "grad_norm": 7.118563255615286, "learning_rate": 1.065407806985111e-06, "loss": 17.7037, "step": 43475 }, { "epoch": 0.7947063447090866, "grad_norm": 8.105551224148272, "learning_rate": 1.0652251578816881e-06, "loss": 18.1862, "step": 43476 }, { "epoch": 0.7947246239055331, "grad_norm": 6.115697154312012, "learning_rate": 1.0650425225693078e-06, "loss": 17.2446, "step": 43477 }, { "epoch": 0.7947429031019796, "grad_norm": 6.691805012750236, "learning_rate": 1.0648599010486138e-06, "loss": 17.5483, "step": 43478 }, { "epoch": 0.7947611822984262, "grad_norm": 6.347668926228368, "learning_rate": 1.0646772933202444e-06, "loss": 17.4421, "step": 43479 }, { "epoch": 0.7947794614948727, "grad_norm": 5.2791329790786925, "learning_rate": 1.0644946993848383e-06, "loss": 17.0799, "step": 43480 }, { "epoch": 0.7947977406913193, "grad_norm": 6.241083572891558, "learning_rate": 1.0643121192430378e-06, "loss": 17.3917, "step": 43481 }, { "epoch": 0.7948160198877657, "grad_norm": 5.891728097427797, "learning_rate": 1.0641295528954804e-06, "loss": 17.2409, "step": 43482 }, { "epoch": 0.7948342990842122, "grad_norm": 7.92097729644953, "learning_rate": 1.0639470003428077e-06, "loss": 17.5759, "step": 43483 }, { "epoch": 0.7948525782806588, "grad_norm": 7.475579985824094, "learning_rate": 1.0637644615856602e-06, "loss": 17.8207, "step": 43484 }, { "epoch": 0.7948708574771053, "grad_norm": 6.419514708465712, "learning_rate": 1.0635819366246758e-06, "loss": 17.4261, "step": 43485 }, { "epoch": 0.7948891366735519, "grad_norm": 6.118330759831264, "learning_rate": 1.0633994254604962e-06, "loss": 17.2812, "step": 43486 }, { "epoch": 0.7949074158699984, "grad_norm": 7.166044397172513, "learning_rate": 1.0632169280937598e-06, "loss": 17.7672, "step": 43487 }, { "epoch": 0.7949256950664448, "grad_norm": 5.537986481315647, "learning_rate": 1.0630344445251056e-06, "loss": 17.2034, "step": 43488 }, { "epoch": 0.7949439742628914, "grad_norm": 5.346961849440402, "learning_rate": 1.0628519747551746e-06, "loss": 17.119, "step": 43489 }, { "epoch": 0.7949622534593379, "grad_norm": 5.953397472038311, "learning_rate": 1.0626695187846053e-06, "loss": 17.1834, "step": 43490 }, { "epoch": 0.7949805326557845, "grad_norm": 6.29769087356711, "learning_rate": 1.0624870766140389e-06, "loss": 17.2121, "step": 43491 }, { "epoch": 0.794998811852231, "grad_norm": 5.45179180834377, "learning_rate": 1.0623046482441118e-06, "loss": 17.0843, "step": 43492 }, { "epoch": 0.7950170910486775, "grad_norm": 5.415410178937179, "learning_rate": 1.0621222336754667e-06, "loss": 17.0088, "step": 43493 }, { "epoch": 0.795035370245124, "grad_norm": 6.838458544527325, "learning_rate": 1.0619398329087404e-06, "loss": 17.7582, "step": 43494 }, { "epoch": 0.7950536494415705, "grad_norm": 5.530905275631436, "learning_rate": 1.0617574459445746e-06, "loss": 16.9828, "step": 43495 }, { "epoch": 0.7950719286380171, "grad_norm": 6.511876513806991, "learning_rate": 1.0615750727836066e-06, "loss": 17.551, "step": 43496 }, { "epoch": 0.7950902078344636, "grad_norm": 5.825379444881647, "learning_rate": 1.061392713426475e-06, "loss": 17.3258, "step": 43497 }, { "epoch": 0.7951084870309101, "grad_norm": 5.6435247264381445, "learning_rate": 1.0612103678738222e-06, "loss": 16.9826, "step": 43498 }, { "epoch": 0.7951267662273567, "grad_norm": 7.362538034694465, "learning_rate": 1.0610280361262837e-06, "loss": 17.4286, "step": 43499 }, { "epoch": 0.7951450454238032, "grad_norm": 7.232307924306094, "learning_rate": 1.0608457181844993e-06, "loss": 17.7328, "step": 43500 }, { "epoch": 0.7951633246202497, "grad_norm": 8.371690370509056, "learning_rate": 1.0606634140491113e-06, "loss": 18.0013, "step": 43501 }, { "epoch": 0.7951816038166962, "grad_norm": 6.453079512067208, "learning_rate": 1.060481123720754e-06, "loss": 17.5869, "step": 43502 }, { "epoch": 0.7951998830131427, "grad_norm": 5.626705435832622, "learning_rate": 1.0602988472000702e-06, "loss": 16.9434, "step": 43503 }, { "epoch": 0.7952181622095893, "grad_norm": 6.641961563713981, "learning_rate": 1.0601165844876977e-06, "loss": 17.4077, "step": 43504 }, { "epoch": 0.7952364414060358, "grad_norm": 5.921373826220255, "learning_rate": 1.0599343355842723e-06, "loss": 17.137, "step": 43505 }, { "epoch": 0.7952547206024824, "grad_norm": 6.362126780722012, "learning_rate": 1.059752100490437e-06, "loss": 17.3813, "step": 43506 }, { "epoch": 0.7952729997989288, "grad_norm": 6.537378923347845, "learning_rate": 1.0595698792068288e-06, "loss": 17.5332, "step": 43507 }, { "epoch": 0.7952912789953753, "grad_norm": 7.778570517375712, "learning_rate": 1.0593876717340845e-06, "loss": 17.8449, "step": 43508 }, { "epoch": 0.7953095581918219, "grad_norm": 6.13513229284772, "learning_rate": 1.0592054780728445e-06, "loss": 17.3738, "step": 43509 }, { "epoch": 0.7953278373882684, "grad_norm": 7.1017064551944635, "learning_rate": 1.0590232982237492e-06, "loss": 17.6391, "step": 43510 }, { "epoch": 0.795346116584715, "grad_norm": 5.965857203149305, "learning_rate": 1.0588411321874332e-06, "loss": 17.1975, "step": 43511 }, { "epoch": 0.7953643957811615, "grad_norm": 5.371366784791508, "learning_rate": 1.0586589799645385e-06, "loss": 17.2071, "step": 43512 }, { "epoch": 0.795382674977608, "grad_norm": 7.406278644463835, "learning_rate": 1.0584768415557011e-06, "loss": 17.8033, "step": 43513 }, { "epoch": 0.7954009541740545, "grad_norm": 5.820256762470668, "learning_rate": 1.0582947169615615e-06, "loss": 16.9219, "step": 43514 }, { "epoch": 0.795419233370501, "grad_norm": 4.940440377523336, "learning_rate": 1.0581126061827567e-06, "loss": 17.0631, "step": 43515 }, { "epoch": 0.7954375125669475, "grad_norm": 6.558789514497426, "learning_rate": 1.0579305092199244e-06, "loss": 17.4167, "step": 43516 }, { "epoch": 0.7954557917633941, "grad_norm": 6.608086721772384, "learning_rate": 1.0577484260737041e-06, "loss": 17.3562, "step": 43517 }, { "epoch": 0.7954740709598406, "grad_norm": 5.432145722269596, "learning_rate": 1.0575663567447332e-06, "loss": 17.0147, "step": 43518 }, { "epoch": 0.7954923501562872, "grad_norm": 6.972148303654615, "learning_rate": 1.0573843012336493e-06, "loss": 17.7647, "step": 43519 }, { "epoch": 0.7955106293527336, "grad_norm": 4.997800046085555, "learning_rate": 1.0572022595410925e-06, "loss": 16.867, "step": 43520 }, { "epoch": 0.7955289085491801, "grad_norm": 7.939221955820735, "learning_rate": 1.0570202316677002e-06, "loss": 18.2518, "step": 43521 }, { "epoch": 0.7955471877456267, "grad_norm": 5.509502824671226, "learning_rate": 1.0568382176141078e-06, "loss": 17.0365, "step": 43522 }, { "epoch": 0.7955654669420732, "grad_norm": 7.385135013514164, "learning_rate": 1.0566562173809574e-06, "loss": 17.8364, "step": 43523 }, { "epoch": 0.7955837461385198, "grad_norm": 6.971545972209332, "learning_rate": 1.0564742309688846e-06, "loss": 17.6679, "step": 43524 }, { "epoch": 0.7956020253349663, "grad_norm": 6.244089050928947, "learning_rate": 1.0562922583785257e-06, "loss": 17.2578, "step": 43525 }, { "epoch": 0.7956203045314127, "grad_norm": 6.752462808891115, "learning_rate": 1.0561102996105216e-06, "loss": 17.6609, "step": 43526 }, { "epoch": 0.7956385837278593, "grad_norm": 5.655228918680042, "learning_rate": 1.0559283546655075e-06, "loss": 16.9808, "step": 43527 }, { "epoch": 0.7956568629243058, "grad_norm": 6.571067486925783, "learning_rate": 1.0557464235441222e-06, "loss": 17.4461, "step": 43528 }, { "epoch": 0.7956751421207524, "grad_norm": 6.644276184330121, "learning_rate": 1.0555645062470049e-06, "loss": 17.4515, "step": 43529 }, { "epoch": 0.7956934213171989, "grad_norm": 5.011304184184786, "learning_rate": 1.0553826027747904e-06, "loss": 16.8532, "step": 43530 }, { "epoch": 0.7957117005136454, "grad_norm": 4.881080412661637, "learning_rate": 1.0552007131281188e-06, "loss": 16.6513, "step": 43531 }, { "epoch": 0.795729979710092, "grad_norm": 7.732350378934611, "learning_rate": 1.0550188373076264e-06, "loss": 18.062, "step": 43532 }, { "epoch": 0.7957482589065384, "grad_norm": 6.104170205529251, "learning_rate": 1.0548369753139492e-06, "loss": 17.2576, "step": 43533 }, { "epoch": 0.795766538102985, "grad_norm": 5.254975809060827, "learning_rate": 1.0546551271477278e-06, "loss": 16.9546, "step": 43534 }, { "epoch": 0.7957848172994315, "grad_norm": 6.15843927686036, "learning_rate": 1.0544732928095974e-06, "loss": 17.3587, "step": 43535 }, { "epoch": 0.795803096495878, "grad_norm": 6.715315481229418, "learning_rate": 1.0542914723001946e-06, "loss": 17.1805, "step": 43536 }, { "epoch": 0.7958213756923246, "grad_norm": 5.2903592943482565, "learning_rate": 1.0541096656201577e-06, "loss": 16.9077, "step": 43537 }, { "epoch": 0.7958396548887711, "grad_norm": 5.488258821409448, "learning_rate": 1.0539278727701252e-06, "loss": 17.2181, "step": 43538 }, { "epoch": 0.7958579340852177, "grad_norm": 6.690570793422645, "learning_rate": 1.053746093750732e-06, "loss": 17.6197, "step": 43539 }, { "epoch": 0.7958762132816641, "grad_norm": 6.769731397649387, "learning_rate": 1.0535643285626174e-06, "loss": 17.4652, "step": 43540 }, { "epoch": 0.7958944924781106, "grad_norm": 7.247078531955624, "learning_rate": 1.0533825772064176e-06, "loss": 17.7882, "step": 43541 }, { "epoch": 0.7959127716745572, "grad_norm": 6.543107587351002, "learning_rate": 1.0532008396827682e-06, "loss": 17.3284, "step": 43542 }, { "epoch": 0.7959310508710037, "grad_norm": 6.282253386548127, "learning_rate": 1.053019115992308e-06, "loss": 17.3334, "step": 43543 }, { "epoch": 0.7959493300674503, "grad_norm": 6.425567133282973, "learning_rate": 1.0528374061356727e-06, "loss": 17.4851, "step": 43544 }, { "epoch": 0.7959676092638968, "grad_norm": 5.843137728828713, "learning_rate": 1.0526557101135009e-06, "loss": 17.3341, "step": 43545 }, { "epoch": 0.7959858884603432, "grad_norm": 7.302060678588048, "learning_rate": 1.0524740279264268e-06, "loss": 17.4381, "step": 43546 }, { "epoch": 0.7960041676567898, "grad_norm": 5.090698875765307, "learning_rate": 1.052292359575089e-06, "loss": 17.0844, "step": 43547 }, { "epoch": 0.7960224468532363, "grad_norm": 7.740729369854913, "learning_rate": 1.0521107050601248e-06, "loss": 17.7183, "step": 43548 }, { "epoch": 0.7960407260496829, "grad_norm": 7.289304093154349, "learning_rate": 1.05192906438217e-06, "loss": 17.4612, "step": 43549 }, { "epoch": 0.7960590052461294, "grad_norm": 5.194260868292036, "learning_rate": 1.0517474375418602e-06, "loss": 17.0325, "step": 43550 }, { "epoch": 0.7960772844425759, "grad_norm": 6.287508688441544, "learning_rate": 1.0515658245398348e-06, "loss": 17.5432, "step": 43551 }, { "epoch": 0.7960955636390225, "grad_norm": 6.967582079135607, "learning_rate": 1.0513842253767275e-06, "loss": 17.4863, "step": 43552 }, { "epoch": 0.7961138428354689, "grad_norm": 6.228396144951435, "learning_rate": 1.0512026400531755e-06, "loss": 17.2632, "step": 43553 }, { "epoch": 0.7961321220319155, "grad_norm": 5.964778367682067, "learning_rate": 1.0510210685698147e-06, "loss": 17.047, "step": 43554 }, { "epoch": 0.796150401228362, "grad_norm": 6.861567874329271, "learning_rate": 1.0508395109272846e-06, "loss": 17.5628, "step": 43555 }, { "epoch": 0.7961686804248085, "grad_norm": 5.228325595721982, "learning_rate": 1.0506579671262173e-06, "loss": 16.9692, "step": 43556 }, { "epoch": 0.7961869596212551, "grad_norm": 5.667727500936744, "learning_rate": 1.0504764371672527e-06, "loss": 16.9629, "step": 43557 }, { "epoch": 0.7962052388177016, "grad_norm": 5.899255212885068, "learning_rate": 1.0502949210510238e-06, "loss": 17.0137, "step": 43558 }, { "epoch": 0.7962235180141481, "grad_norm": 5.484259027098068, "learning_rate": 1.0501134187781703e-06, "loss": 16.958, "step": 43559 }, { "epoch": 0.7962417972105946, "grad_norm": 6.2604151611009495, "learning_rate": 1.0499319303493266e-06, "loss": 17.51, "step": 43560 }, { "epoch": 0.7962600764070411, "grad_norm": 6.8497664153428115, "learning_rate": 1.0497504557651266e-06, "loss": 17.2897, "step": 43561 }, { "epoch": 0.7962783556034877, "grad_norm": 5.400825106777444, "learning_rate": 1.0495689950262105e-06, "loss": 17.2011, "step": 43562 }, { "epoch": 0.7962966347999342, "grad_norm": 6.140221389745143, "learning_rate": 1.0493875481332112e-06, "loss": 17.2908, "step": 43563 }, { "epoch": 0.7963149139963808, "grad_norm": 6.385330110176382, "learning_rate": 1.0492061150867649e-06, "loss": 17.3966, "step": 43564 }, { "epoch": 0.7963331931928272, "grad_norm": 5.029903640187762, "learning_rate": 1.04902469588751e-06, "loss": 16.9872, "step": 43565 }, { "epoch": 0.7963514723892737, "grad_norm": 6.27712980562335, "learning_rate": 1.0488432905360807e-06, "loss": 17.3534, "step": 43566 }, { "epoch": 0.7963697515857203, "grad_norm": 6.535371703758259, "learning_rate": 1.0486618990331116e-06, "loss": 17.4255, "step": 43567 }, { "epoch": 0.7963880307821668, "grad_norm": 6.381718576421871, "learning_rate": 1.048480521379241e-06, "loss": 17.2972, "step": 43568 }, { "epoch": 0.7964063099786134, "grad_norm": 6.232578409117792, "learning_rate": 1.0482991575751028e-06, "loss": 17.4174, "step": 43569 }, { "epoch": 0.7964245891750599, "grad_norm": 5.353127401544909, "learning_rate": 1.0481178076213322e-06, "loss": 17.0677, "step": 43570 }, { "epoch": 0.7964428683715064, "grad_norm": 6.452603131903785, "learning_rate": 1.0479364715185676e-06, "loss": 17.4668, "step": 43571 }, { "epoch": 0.7964611475679529, "grad_norm": 7.06393157920779, "learning_rate": 1.0477551492674405e-06, "loss": 17.8259, "step": 43572 }, { "epoch": 0.7964794267643994, "grad_norm": 5.661649087136177, "learning_rate": 1.0475738408685888e-06, "loss": 17.2411, "step": 43573 }, { "epoch": 0.796497705960846, "grad_norm": 6.8192447501833025, "learning_rate": 1.0473925463226497e-06, "loss": 17.7094, "step": 43574 }, { "epoch": 0.7965159851572925, "grad_norm": 8.044114755943385, "learning_rate": 1.0472112656302546e-06, "loss": 17.843, "step": 43575 }, { "epoch": 0.796534264353739, "grad_norm": 6.097453908248195, "learning_rate": 1.047029998792043e-06, "loss": 17.3915, "step": 43576 }, { "epoch": 0.7965525435501856, "grad_norm": 5.5331949032968675, "learning_rate": 1.046848745808648e-06, "loss": 17.0356, "step": 43577 }, { "epoch": 0.796570822746632, "grad_norm": 6.685947264063111, "learning_rate": 1.0466675066807036e-06, "loss": 17.6899, "step": 43578 }, { "epoch": 0.7965891019430786, "grad_norm": 7.921279646554929, "learning_rate": 1.0464862814088484e-06, "loss": 18.0729, "step": 43579 }, { "epoch": 0.7966073811395251, "grad_norm": 5.718731875064656, "learning_rate": 1.046305069993715e-06, "loss": 17.1816, "step": 43580 }, { "epoch": 0.7966256603359716, "grad_norm": 6.435696290025953, "learning_rate": 1.0461238724359384e-06, "loss": 17.3445, "step": 43581 }, { "epoch": 0.7966439395324182, "grad_norm": 5.830749011649717, "learning_rate": 1.0459426887361545e-06, "loss": 17.1121, "step": 43582 }, { "epoch": 0.7966622187288647, "grad_norm": 6.933294591986953, "learning_rate": 1.0457615188949993e-06, "loss": 17.817, "step": 43583 }, { "epoch": 0.7966804979253111, "grad_norm": 5.243382048864295, "learning_rate": 1.045580362913106e-06, "loss": 17.0787, "step": 43584 }, { "epoch": 0.7966987771217577, "grad_norm": 5.917375358303449, "learning_rate": 1.0453992207911117e-06, "loss": 17.1768, "step": 43585 }, { "epoch": 0.7967170563182042, "grad_norm": 6.074679785014892, "learning_rate": 1.0452180925296496e-06, "loss": 17.4898, "step": 43586 }, { "epoch": 0.7967353355146508, "grad_norm": 5.094401102066379, "learning_rate": 1.045036978129354e-06, "loss": 16.8829, "step": 43587 }, { "epoch": 0.7967536147110973, "grad_norm": 6.1450756997101745, "learning_rate": 1.0448558775908613e-06, "loss": 17.1871, "step": 43588 }, { "epoch": 0.7967718939075438, "grad_norm": 6.566471412360749, "learning_rate": 1.0446747909148046e-06, "loss": 17.4486, "step": 43589 }, { "epoch": 0.7967901731039904, "grad_norm": 6.46937740720456, "learning_rate": 1.0444937181018212e-06, "loss": 17.4944, "step": 43590 }, { "epoch": 0.7968084523004368, "grad_norm": 5.129841535924055, "learning_rate": 1.0443126591525421e-06, "loss": 16.8834, "step": 43591 }, { "epoch": 0.7968267314968834, "grad_norm": 5.370429534313587, "learning_rate": 1.0441316140676045e-06, "loss": 17.1249, "step": 43592 }, { "epoch": 0.7968450106933299, "grad_norm": 5.55958013879356, "learning_rate": 1.0439505828476432e-06, "loss": 17.0441, "step": 43593 }, { "epoch": 0.7968632898897764, "grad_norm": 4.75935019113644, "learning_rate": 1.0437695654932918e-06, "loss": 16.8954, "step": 43594 }, { "epoch": 0.796881569086223, "grad_norm": 4.890980488763306, "learning_rate": 1.0435885620051833e-06, "loss": 16.9297, "step": 43595 }, { "epoch": 0.7968998482826695, "grad_norm": 9.400309260592135, "learning_rate": 1.0434075723839554e-06, "loss": 17.3864, "step": 43596 }, { "epoch": 0.7969181274791161, "grad_norm": 5.65946118535622, "learning_rate": 1.04322659663024e-06, "loss": 16.9794, "step": 43597 }, { "epoch": 0.7969364066755625, "grad_norm": 6.616756411261239, "learning_rate": 1.0430456347446715e-06, "loss": 17.4696, "step": 43598 }, { "epoch": 0.796954685872009, "grad_norm": 6.158081422170476, "learning_rate": 1.0428646867278857e-06, "loss": 17.6203, "step": 43599 }, { "epoch": 0.7969729650684556, "grad_norm": 6.336022085638362, "learning_rate": 1.042683752580514e-06, "loss": 17.2794, "step": 43600 }, { "epoch": 0.7969912442649021, "grad_norm": 7.4965095364364664, "learning_rate": 1.0425028323031928e-06, "loss": 17.8463, "step": 43601 }, { "epoch": 0.7970095234613487, "grad_norm": 6.128444385647272, "learning_rate": 1.0423219258965572e-06, "loss": 17.2367, "step": 43602 }, { "epoch": 0.7970278026577952, "grad_norm": 7.168387402616568, "learning_rate": 1.0421410333612397e-06, "loss": 17.8123, "step": 43603 }, { "epoch": 0.7970460818542416, "grad_norm": 6.570672167224674, "learning_rate": 1.0419601546978726e-06, "loss": 17.4018, "step": 43604 }, { "epoch": 0.7970643610506882, "grad_norm": 6.905450910091003, "learning_rate": 1.0417792899070934e-06, "loss": 17.453, "step": 43605 }, { "epoch": 0.7970826402471347, "grad_norm": 5.97715826505548, "learning_rate": 1.0415984389895335e-06, "loss": 17.2696, "step": 43606 }, { "epoch": 0.7971009194435813, "grad_norm": 5.778586882430297, "learning_rate": 1.0414176019458282e-06, "loss": 17.3441, "step": 43607 }, { "epoch": 0.7971191986400278, "grad_norm": 7.583689896323646, "learning_rate": 1.041236778776611e-06, "loss": 17.8682, "step": 43608 }, { "epoch": 0.7971374778364743, "grad_norm": 5.490558145771713, "learning_rate": 1.041055969482514e-06, "loss": 17.3371, "step": 43609 }, { "epoch": 0.7971557570329209, "grad_norm": 6.691328010732298, "learning_rate": 1.040875174064172e-06, "loss": 17.5642, "step": 43610 }, { "epoch": 0.7971740362293673, "grad_norm": 8.344960305373055, "learning_rate": 1.0406943925222206e-06, "loss": 17.8927, "step": 43611 }, { "epoch": 0.7971923154258139, "grad_norm": 6.505956978553746, "learning_rate": 1.04051362485729e-06, "loss": 17.3815, "step": 43612 }, { "epoch": 0.7972105946222604, "grad_norm": 5.702438603736159, "learning_rate": 1.0403328710700178e-06, "loss": 17.2216, "step": 43613 }, { "epoch": 0.7972288738187069, "grad_norm": 5.312505392771198, "learning_rate": 1.0401521311610346e-06, "loss": 16.9652, "step": 43614 }, { "epoch": 0.7972471530151535, "grad_norm": 6.400685663411359, "learning_rate": 1.0399714051309735e-06, "loss": 17.3533, "step": 43615 }, { "epoch": 0.7972654322116, "grad_norm": 8.723717808658938, "learning_rate": 1.0397906929804702e-06, "loss": 18.0451, "step": 43616 }, { "epoch": 0.7972837114080465, "grad_norm": 5.695345001291317, "learning_rate": 1.0396099947101552e-06, "loss": 16.9884, "step": 43617 }, { "epoch": 0.797301990604493, "grad_norm": 6.105258331404608, "learning_rate": 1.0394293103206655e-06, "loss": 17.5564, "step": 43618 }, { "epoch": 0.7973202698009395, "grad_norm": 5.88103915961222, "learning_rate": 1.0392486398126306e-06, "loss": 16.9158, "step": 43619 }, { "epoch": 0.7973385489973861, "grad_norm": 6.076194855286665, "learning_rate": 1.0390679831866856e-06, "loss": 17.1904, "step": 43620 }, { "epoch": 0.7973568281938326, "grad_norm": 6.6796360499917125, "learning_rate": 1.0388873404434647e-06, "loss": 17.686, "step": 43621 }, { "epoch": 0.7973751073902792, "grad_norm": 6.533726543218713, "learning_rate": 1.0387067115836002e-06, "loss": 17.2579, "step": 43622 }, { "epoch": 0.7973933865867256, "grad_norm": 6.251992326630658, "learning_rate": 1.0385260966077238e-06, "loss": 17.4871, "step": 43623 }, { "epoch": 0.7974116657831721, "grad_norm": 5.5803023594118, "learning_rate": 1.0383454955164708e-06, "loss": 17.1885, "step": 43624 }, { "epoch": 0.7974299449796187, "grad_norm": 6.578481757318618, "learning_rate": 1.0381649083104728e-06, "loss": 17.5386, "step": 43625 }, { "epoch": 0.7974482241760652, "grad_norm": 6.780758534479037, "learning_rate": 1.0379843349903623e-06, "loss": 17.6165, "step": 43626 }, { "epoch": 0.7974665033725118, "grad_norm": 7.0389503108962606, "learning_rate": 1.0378037755567722e-06, "loss": 17.6352, "step": 43627 }, { "epoch": 0.7974847825689583, "grad_norm": 6.658719334022305, "learning_rate": 1.0376232300103378e-06, "loss": 17.6645, "step": 43628 }, { "epoch": 0.7975030617654048, "grad_norm": 6.792347009794986, "learning_rate": 1.0374426983516888e-06, "loss": 17.8952, "step": 43629 }, { "epoch": 0.7975213409618513, "grad_norm": 7.124229411389003, "learning_rate": 1.0372621805814604e-06, "loss": 17.7484, "step": 43630 }, { "epoch": 0.7975396201582978, "grad_norm": 7.246847834280695, "learning_rate": 1.0370816767002845e-06, "loss": 17.646, "step": 43631 }, { "epoch": 0.7975578993547444, "grad_norm": 6.513589822403, "learning_rate": 1.036901186708792e-06, "loss": 17.6228, "step": 43632 }, { "epoch": 0.7975761785511909, "grad_norm": 6.541563297399802, "learning_rate": 1.0367207106076181e-06, "loss": 17.4395, "step": 43633 }, { "epoch": 0.7975944577476374, "grad_norm": 6.337420045879775, "learning_rate": 1.0365402483973947e-06, "loss": 17.4858, "step": 43634 }, { "epoch": 0.797612736944084, "grad_norm": 6.215369506057718, "learning_rate": 1.0363598000787516e-06, "loss": 17.6725, "step": 43635 }, { "epoch": 0.7976310161405304, "grad_norm": 6.0664116007546145, "learning_rate": 1.036179365652324e-06, "loss": 17.5015, "step": 43636 }, { "epoch": 0.797649295336977, "grad_norm": 6.990847611836308, "learning_rate": 1.0359989451187436e-06, "loss": 17.7905, "step": 43637 }, { "epoch": 0.7976675745334235, "grad_norm": 5.236038998243115, "learning_rate": 1.0358185384786446e-06, "loss": 17.1149, "step": 43638 }, { "epoch": 0.79768585372987, "grad_norm": 6.127619098283721, "learning_rate": 1.0356381457326569e-06, "loss": 17.3882, "step": 43639 }, { "epoch": 0.7977041329263166, "grad_norm": 6.381186563013294, "learning_rate": 1.0354577668814131e-06, "loss": 17.3036, "step": 43640 }, { "epoch": 0.7977224121227631, "grad_norm": 7.6015119227137165, "learning_rate": 1.0352774019255463e-06, "loss": 17.6653, "step": 43641 }, { "epoch": 0.7977406913192097, "grad_norm": 6.020681570675449, "learning_rate": 1.0350970508656888e-06, "loss": 17.3213, "step": 43642 }, { "epoch": 0.7977589705156561, "grad_norm": 8.75210730019607, "learning_rate": 1.0349167137024703e-06, "loss": 18.3236, "step": 43643 }, { "epoch": 0.7977772497121026, "grad_norm": 5.909201400923824, "learning_rate": 1.034736390436526e-06, "loss": 17.266, "step": 43644 }, { "epoch": 0.7977955289085492, "grad_norm": 5.884350375569324, "learning_rate": 1.0345560810684856e-06, "loss": 17.4926, "step": 43645 }, { "epoch": 0.7978138081049957, "grad_norm": 7.158604429551033, "learning_rate": 1.0343757855989816e-06, "loss": 17.2265, "step": 43646 }, { "epoch": 0.7978320873014423, "grad_norm": 6.764338180073353, "learning_rate": 1.0341955040286477e-06, "loss": 17.5088, "step": 43647 }, { "epoch": 0.7978503664978888, "grad_norm": 5.714950345886541, "learning_rate": 1.0340152363581147e-06, "loss": 17.1852, "step": 43648 }, { "epoch": 0.7978686456943352, "grad_norm": 5.7132679365659245, "learning_rate": 1.0338349825880128e-06, "loss": 17.2286, "step": 43649 }, { "epoch": 0.7978869248907818, "grad_norm": 5.931874595606341, "learning_rate": 1.0336547427189763e-06, "loss": 17.5211, "step": 43650 }, { "epoch": 0.7979052040872283, "grad_norm": 5.150677830174513, "learning_rate": 1.0334745167516346e-06, "loss": 16.9136, "step": 43651 }, { "epoch": 0.7979234832836748, "grad_norm": 5.565957922107202, "learning_rate": 1.0332943046866216e-06, "loss": 17.0671, "step": 43652 }, { "epoch": 0.7979417624801214, "grad_norm": 5.648857425742446, "learning_rate": 1.0331141065245682e-06, "loss": 17.0898, "step": 43653 }, { "epoch": 0.7979600416765679, "grad_norm": 6.363626466505349, "learning_rate": 1.032933922266104e-06, "loss": 17.5097, "step": 43654 }, { "epoch": 0.7979783208730145, "grad_norm": 6.667573568359071, "learning_rate": 1.0327537519118618e-06, "loss": 17.245, "step": 43655 }, { "epoch": 0.7979966000694609, "grad_norm": 7.611198323180637, "learning_rate": 1.0325735954624754e-06, "loss": 17.5615, "step": 43656 }, { "epoch": 0.7980148792659074, "grad_norm": 7.036487722299548, "learning_rate": 1.0323934529185725e-06, "loss": 17.5958, "step": 43657 }, { "epoch": 0.798033158462354, "grad_norm": 4.836503144623845, "learning_rate": 1.032213324280788e-06, "loss": 16.9083, "step": 43658 }, { "epoch": 0.7980514376588005, "grad_norm": 6.120358275793734, "learning_rate": 1.0320332095497516e-06, "loss": 17.3534, "step": 43659 }, { "epoch": 0.7980697168552471, "grad_norm": 8.526886624186124, "learning_rate": 1.0318531087260925e-06, "loss": 18.3012, "step": 43660 }, { "epoch": 0.7980879960516936, "grad_norm": 5.9991476858945845, "learning_rate": 1.0316730218104459e-06, "loss": 17.4376, "step": 43661 }, { "epoch": 0.79810627524814, "grad_norm": 6.510322406071814, "learning_rate": 1.0314929488034402e-06, "loss": 17.2258, "step": 43662 }, { "epoch": 0.7981245544445866, "grad_norm": 6.717295581131661, "learning_rate": 1.0313128897057068e-06, "loss": 17.261, "step": 43663 }, { "epoch": 0.7981428336410331, "grad_norm": 6.430196527585687, "learning_rate": 1.0311328445178769e-06, "loss": 17.4434, "step": 43664 }, { "epoch": 0.7981611128374797, "grad_norm": 7.058058418638312, "learning_rate": 1.0309528132405838e-06, "loss": 17.4054, "step": 43665 }, { "epoch": 0.7981793920339262, "grad_norm": 5.743356939740472, "learning_rate": 1.0307727958744546e-06, "loss": 16.9608, "step": 43666 }, { "epoch": 0.7981976712303727, "grad_norm": 6.11302137833586, "learning_rate": 1.0305927924201241e-06, "loss": 17.522, "step": 43667 }, { "epoch": 0.7982159504268193, "grad_norm": 5.736715768775574, "learning_rate": 1.03041280287822e-06, "loss": 17.4864, "step": 43668 }, { "epoch": 0.7982342296232657, "grad_norm": 4.746412822042589, "learning_rate": 1.030232827249376e-06, "loss": 17.1744, "step": 43669 }, { "epoch": 0.7982525088197123, "grad_norm": 7.020978728136215, "learning_rate": 1.0300528655342213e-06, "loss": 17.4288, "step": 43670 }, { "epoch": 0.7982707880161588, "grad_norm": 4.286223428004669, "learning_rate": 1.0298729177333855e-06, "loss": 16.729, "step": 43671 }, { "epoch": 0.7982890672126053, "grad_norm": 7.6115550307233635, "learning_rate": 1.0296929838475023e-06, "loss": 18.0614, "step": 43672 }, { "epoch": 0.7983073464090519, "grad_norm": 5.295340827622513, "learning_rate": 1.0295130638771989e-06, "loss": 17.0065, "step": 43673 }, { "epoch": 0.7983256256054984, "grad_norm": 4.843317972606948, "learning_rate": 1.0293331578231076e-06, "loss": 16.8318, "step": 43674 }, { "epoch": 0.798343904801945, "grad_norm": 6.057028754086238, "learning_rate": 1.0291532656858606e-06, "loss": 17.3792, "step": 43675 }, { "epoch": 0.7983621839983914, "grad_norm": 6.132759187568631, "learning_rate": 1.0289733874660868e-06, "loss": 17.5475, "step": 43676 }, { "epoch": 0.7983804631948379, "grad_norm": 6.54707733139629, "learning_rate": 1.0287935231644154e-06, "loss": 17.4192, "step": 43677 }, { "epoch": 0.7983987423912845, "grad_norm": 5.954495772984917, "learning_rate": 1.0286136727814794e-06, "loss": 17.1576, "step": 43678 }, { "epoch": 0.798417021587731, "grad_norm": 5.4614623926819235, "learning_rate": 1.0284338363179074e-06, "loss": 17.1042, "step": 43679 }, { "epoch": 0.7984353007841776, "grad_norm": 7.180026361967419, "learning_rate": 1.0282540137743297e-06, "loss": 17.4687, "step": 43680 }, { "epoch": 0.798453579980624, "grad_norm": 5.68392384463639, "learning_rate": 1.0280742051513765e-06, "loss": 17.1516, "step": 43681 }, { "epoch": 0.7984718591770705, "grad_norm": 5.6791782070296835, "learning_rate": 1.0278944104496796e-06, "loss": 17.3888, "step": 43682 }, { "epoch": 0.7984901383735171, "grad_norm": 5.955270942841166, "learning_rate": 1.0277146296698675e-06, "loss": 17.2991, "step": 43683 }, { "epoch": 0.7985084175699636, "grad_norm": 5.379827950226274, "learning_rate": 1.027534862812572e-06, "loss": 16.9341, "step": 43684 }, { "epoch": 0.7985266967664102, "grad_norm": 7.566590922573536, "learning_rate": 1.0273551098784206e-06, "loss": 18.0344, "step": 43685 }, { "epoch": 0.7985449759628567, "grad_norm": 5.864543671917071, "learning_rate": 1.027175370868046e-06, "loss": 17.1023, "step": 43686 }, { "epoch": 0.7985632551593032, "grad_norm": 6.13504048745101, "learning_rate": 1.0269956457820774e-06, "loss": 17.3212, "step": 43687 }, { "epoch": 0.7985815343557497, "grad_norm": 5.692916378346089, "learning_rate": 1.0268159346211427e-06, "loss": 17.2838, "step": 43688 }, { "epoch": 0.7985998135521962, "grad_norm": 5.214091377309713, "learning_rate": 1.0266362373858746e-06, "loss": 17.0874, "step": 43689 }, { "epoch": 0.7986180927486428, "grad_norm": 5.880637123035861, "learning_rate": 1.0264565540769006e-06, "loss": 17.3734, "step": 43690 }, { "epoch": 0.7986363719450893, "grad_norm": 4.980525738680686, "learning_rate": 1.0262768846948512e-06, "loss": 16.9158, "step": 43691 }, { "epoch": 0.7986546511415358, "grad_norm": 6.374858094613043, "learning_rate": 1.0260972292403581e-06, "loss": 17.4354, "step": 43692 }, { "epoch": 0.7986729303379824, "grad_norm": 5.774773488574874, "learning_rate": 1.0259175877140494e-06, "loss": 17.2554, "step": 43693 }, { "epoch": 0.7986912095344288, "grad_norm": 5.768036428416462, "learning_rate": 1.0257379601165535e-06, "loss": 17.1691, "step": 43694 }, { "epoch": 0.7987094887308754, "grad_norm": 5.34814706273861, "learning_rate": 1.0255583464485025e-06, "loss": 16.8921, "step": 43695 }, { "epoch": 0.7987277679273219, "grad_norm": 7.126137035892159, "learning_rate": 1.0253787467105232e-06, "loss": 17.9627, "step": 43696 }, { "epoch": 0.7987460471237684, "grad_norm": 5.926955491459572, "learning_rate": 1.0251991609032474e-06, "loss": 17.0504, "step": 43697 }, { "epoch": 0.798764326320215, "grad_norm": 5.53023877525286, "learning_rate": 1.025019589027304e-06, "loss": 17.0908, "step": 43698 }, { "epoch": 0.7987826055166615, "grad_norm": 5.277612107666569, "learning_rate": 1.0248400310833207e-06, "loss": 17.1803, "step": 43699 }, { "epoch": 0.7988008847131081, "grad_norm": 5.3859134903540395, "learning_rate": 1.024660487071928e-06, "loss": 16.8827, "step": 43700 }, { "epoch": 0.7988191639095545, "grad_norm": 8.199595463362634, "learning_rate": 1.0244809569937569e-06, "loss": 18.2248, "step": 43701 }, { "epoch": 0.798837443106001, "grad_norm": 6.870464017016303, "learning_rate": 1.0243014408494335e-06, "loss": 16.9546, "step": 43702 }, { "epoch": 0.7988557223024476, "grad_norm": 5.586544483563235, "learning_rate": 1.0241219386395902e-06, "loss": 17.1052, "step": 43703 }, { "epoch": 0.7988740014988941, "grad_norm": 7.48722776071471, "learning_rate": 1.0239424503648542e-06, "loss": 17.6426, "step": 43704 }, { "epoch": 0.7988922806953407, "grad_norm": 5.801604611653457, "learning_rate": 1.0237629760258534e-06, "loss": 17.2901, "step": 43705 }, { "epoch": 0.7989105598917872, "grad_norm": 6.5415313286855605, "learning_rate": 1.0235835156232198e-06, "loss": 17.3088, "step": 43706 }, { "epoch": 0.7989288390882336, "grad_norm": 5.943040052858648, "learning_rate": 1.0234040691575808e-06, "loss": 17.0997, "step": 43707 }, { "epoch": 0.7989471182846802, "grad_norm": 7.3036772390661895, "learning_rate": 1.0232246366295644e-06, "loss": 17.6531, "step": 43708 }, { "epoch": 0.7989653974811267, "grad_norm": 6.9556920288708, "learning_rate": 1.023045218039801e-06, "loss": 17.4687, "step": 43709 }, { "epoch": 0.7989836766775733, "grad_norm": 6.726737335825235, "learning_rate": 1.0228658133889196e-06, "loss": 17.5873, "step": 43710 }, { "epoch": 0.7990019558740198, "grad_norm": 5.341125886754479, "learning_rate": 1.0226864226775474e-06, "loss": 17.0374, "step": 43711 }, { "epoch": 0.7990202350704663, "grad_norm": 6.40956881208017, "learning_rate": 1.0225070459063153e-06, "loss": 17.3443, "step": 43712 }, { "epoch": 0.7990385142669129, "grad_norm": 6.917087769609048, "learning_rate": 1.0223276830758493e-06, "loss": 17.3134, "step": 43713 }, { "epoch": 0.7990567934633593, "grad_norm": 6.497752385274863, "learning_rate": 1.0221483341867817e-06, "loss": 17.2878, "step": 43714 }, { "epoch": 0.7990750726598059, "grad_norm": 5.866676249212873, "learning_rate": 1.0219689992397385e-06, "loss": 17.42, "step": 43715 }, { "epoch": 0.7990933518562524, "grad_norm": 8.382127845020301, "learning_rate": 1.021789678235347e-06, "loss": 18.2701, "step": 43716 }, { "epoch": 0.7991116310526989, "grad_norm": 5.8297940472085745, "learning_rate": 1.0216103711742397e-06, "loss": 17.1818, "step": 43717 }, { "epoch": 0.7991299102491455, "grad_norm": 7.0056178869028916, "learning_rate": 1.0214310780570408e-06, "loss": 17.56, "step": 43718 }, { "epoch": 0.799148189445592, "grad_norm": 7.436359857973205, "learning_rate": 1.021251798884381e-06, "loss": 17.4751, "step": 43719 }, { "epoch": 0.7991664686420384, "grad_norm": 7.640166727925022, "learning_rate": 1.02107253365689e-06, "loss": 17.8679, "step": 43720 }, { "epoch": 0.799184747838485, "grad_norm": 6.327964294672497, "learning_rate": 1.020893282375194e-06, "loss": 17.397, "step": 43721 }, { "epoch": 0.7992030270349315, "grad_norm": 7.049045348884781, "learning_rate": 1.0207140450399205e-06, "loss": 17.3476, "step": 43722 }, { "epoch": 0.7992213062313781, "grad_norm": 5.630051314495588, "learning_rate": 1.0205348216517008e-06, "loss": 17.2503, "step": 43723 }, { "epoch": 0.7992395854278246, "grad_norm": 8.751808757730165, "learning_rate": 1.0203556122111608e-06, "loss": 18.8197, "step": 43724 }, { "epoch": 0.7992578646242711, "grad_norm": 4.8371074699430805, "learning_rate": 1.0201764167189277e-06, "loss": 16.7536, "step": 43725 }, { "epoch": 0.7992761438207177, "grad_norm": 6.846873166086579, "learning_rate": 1.019997235175632e-06, "loss": 17.5882, "step": 43726 }, { "epoch": 0.7992944230171641, "grad_norm": 8.125123361816668, "learning_rate": 1.0198180675818997e-06, "loss": 18.0637, "step": 43727 }, { "epoch": 0.7993127022136107, "grad_norm": 6.341525056789344, "learning_rate": 1.0196389139383594e-06, "loss": 17.3362, "step": 43728 }, { "epoch": 0.7993309814100572, "grad_norm": 5.035846117827196, "learning_rate": 1.0194597742456407e-06, "loss": 16.9837, "step": 43729 }, { "epoch": 0.7993492606065037, "grad_norm": 5.164276581705978, "learning_rate": 1.0192806485043694e-06, "loss": 17.0718, "step": 43730 }, { "epoch": 0.7993675398029503, "grad_norm": 6.257240403049908, "learning_rate": 1.0191015367151747e-06, "loss": 17.372, "step": 43731 }, { "epoch": 0.7993858189993968, "grad_norm": 5.453179334554776, "learning_rate": 1.0189224388786834e-06, "loss": 17.1813, "step": 43732 }, { "epoch": 0.7994040981958433, "grad_norm": 5.186206117196793, "learning_rate": 1.0187433549955227e-06, "loss": 16.8704, "step": 43733 }, { "epoch": 0.7994223773922898, "grad_norm": 6.1868845063159, "learning_rate": 1.0185642850663225e-06, "loss": 17.2924, "step": 43734 }, { "epoch": 0.7994406565887363, "grad_norm": 7.644468336837893, "learning_rate": 1.0183852290917084e-06, "loss": 17.814, "step": 43735 }, { "epoch": 0.7994589357851829, "grad_norm": 5.834159297367508, "learning_rate": 1.0182061870723075e-06, "loss": 16.8879, "step": 43736 }, { "epoch": 0.7994772149816294, "grad_norm": 5.313674446077375, "learning_rate": 1.0180271590087488e-06, "loss": 16.9048, "step": 43737 }, { "epoch": 0.799495494178076, "grad_norm": 6.693890753418606, "learning_rate": 1.0178481449016598e-06, "loss": 17.4848, "step": 43738 }, { "epoch": 0.7995137733745225, "grad_norm": 6.065066171312267, "learning_rate": 1.0176691447516669e-06, "loss": 17.4293, "step": 43739 }, { "epoch": 0.7995320525709689, "grad_norm": 8.161505684048903, "learning_rate": 1.0174901585593993e-06, "loss": 18.2797, "step": 43740 }, { "epoch": 0.7995503317674155, "grad_norm": 6.047940947777919, "learning_rate": 1.0173111863254831e-06, "loss": 17.3264, "step": 43741 }, { "epoch": 0.799568610963862, "grad_norm": 6.428285825985837, "learning_rate": 1.0171322280505441e-06, "loss": 17.3371, "step": 43742 }, { "epoch": 0.7995868901603086, "grad_norm": 7.086424594847101, "learning_rate": 1.016953283735212e-06, "loss": 17.3256, "step": 43743 }, { "epoch": 0.7996051693567551, "grad_norm": 6.720784744929186, "learning_rate": 1.0167743533801122e-06, "loss": 17.5516, "step": 43744 }, { "epoch": 0.7996234485532016, "grad_norm": 4.9520095560867965, "learning_rate": 1.0165954369858738e-06, "loss": 16.8237, "step": 43745 }, { "epoch": 0.7996417277496481, "grad_norm": 6.369239446345069, "learning_rate": 1.0164165345531218e-06, "loss": 17.499, "step": 43746 }, { "epoch": 0.7996600069460946, "grad_norm": 6.847185842681981, "learning_rate": 1.0162376460824835e-06, "loss": 17.7542, "step": 43747 }, { "epoch": 0.7996782861425412, "grad_norm": 7.409745744291656, "learning_rate": 1.0160587715745883e-06, "loss": 17.3026, "step": 43748 }, { "epoch": 0.7996965653389877, "grad_norm": 6.526498484700863, "learning_rate": 1.0158799110300616e-06, "loss": 17.3951, "step": 43749 }, { "epoch": 0.7997148445354342, "grad_norm": 6.771302382907413, "learning_rate": 1.015701064449528e-06, "loss": 17.528, "step": 43750 }, { "epoch": 0.7997331237318808, "grad_norm": 5.300754045304155, "learning_rate": 1.0155222318336184e-06, "loss": 16.9856, "step": 43751 }, { "epoch": 0.7997514029283272, "grad_norm": 6.50145417369707, "learning_rate": 1.0153434131829575e-06, "loss": 17.3175, "step": 43752 }, { "epoch": 0.7997696821247738, "grad_norm": 6.202118761632826, "learning_rate": 1.015164608498171e-06, "loss": 17.4617, "step": 43753 }, { "epoch": 0.7997879613212203, "grad_norm": 7.589511677314576, "learning_rate": 1.014985817779886e-06, "loss": 17.4203, "step": 43754 }, { "epoch": 0.7998062405176668, "grad_norm": 6.516042021274941, "learning_rate": 1.014807041028732e-06, "loss": 17.4734, "step": 43755 }, { "epoch": 0.7998245197141134, "grad_norm": 7.006672675055769, "learning_rate": 1.014628278245332e-06, "loss": 17.6385, "step": 43756 }, { "epoch": 0.7998427989105599, "grad_norm": 6.460743760850251, "learning_rate": 1.0144495294303154e-06, "loss": 17.174, "step": 43757 }, { "epoch": 0.7998610781070065, "grad_norm": 7.189832730423302, "learning_rate": 1.014270794584306e-06, "loss": 17.7074, "step": 43758 }, { "epoch": 0.7998793573034529, "grad_norm": 6.22451615286532, "learning_rate": 1.0140920737079329e-06, "loss": 17.4515, "step": 43759 }, { "epoch": 0.7998976364998994, "grad_norm": 6.17115925816624, "learning_rate": 1.0139133668018208e-06, "loss": 17.4983, "step": 43760 }, { "epoch": 0.799915915696346, "grad_norm": 5.38085854652681, "learning_rate": 1.0137346738665954e-06, "loss": 16.9726, "step": 43761 }, { "epoch": 0.7999341948927925, "grad_norm": 6.7193628812630575, "learning_rate": 1.0135559949028856e-06, "loss": 17.8053, "step": 43762 }, { "epoch": 0.7999524740892391, "grad_norm": 7.451320477398077, "learning_rate": 1.0133773299113148e-06, "loss": 17.873, "step": 43763 }, { "epoch": 0.7999707532856856, "grad_norm": 6.064447475936556, "learning_rate": 1.0131986788925103e-06, "loss": 17.3728, "step": 43764 }, { "epoch": 0.799989032482132, "grad_norm": 9.763218369019834, "learning_rate": 1.0130200418470997e-06, "loss": 17.6324, "step": 43765 }, { "epoch": 0.8000073116785786, "grad_norm": 6.212109618857108, "learning_rate": 1.0128414187757085e-06, "loss": 17.2862, "step": 43766 }, { "epoch": 0.8000255908750251, "grad_norm": 6.2760703813994105, "learning_rate": 1.0126628096789604e-06, "loss": 17.1687, "step": 43767 }, { "epoch": 0.8000438700714717, "grad_norm": 4.466345039683702, "learning_rate": 1.0124842145574842e-06, "loss": 16.6591, "step": 43768 }, { "epoch": 0.8000621492679182, "grad_norm": 6.780422612279129, "learning_rate": 1.0123056334119052e-06, "loss": 17.6619, "step": 43769 }, { "epoch": 0.8000804284643647, "grad_norm": 5.2322010979616085, "learning_rate": 1.0121270662428473e-06, "loss": 17.252, "step": 43770 }, { "epoch": 0.8000987076608113, "grad_norm": 7.448622715199698, "learning_rate": 1.0119485130509394e-06, "loss": 18.0973, "step": 43771 }, { "epoch": 0.8001169868572577, "grad_norm": 6.507566605202383, "learning_rate": 1.0117699738368048e-06, "loss": 17.5522, "step": 43772 }, { "epoch": 0.8001352660537043, "grad_norm": 7.160051022659929, "learning_rate": 1.0115914486010697e-06, "loss": 17.7874, "step": 43773 }, { "epoch": 0.8001535452501508, "grad_norm": 6.2619770789728575, "learning_rate": 1.0114129373443627e-06, "loss": 17.3545, "step": 43774 }, { "epoch": 0.8001718244465973, "grad_norm": 5.874418515690712, "learning_rate": 1.0112344400673052e-06, "loss": 17.5906, "step": 43775 }, { "epoch": 0.8001901036430439, "grad_norm": 6.484184492736454, "learning_rate": 1.0110559567705264e-06, "loss": 17.2708, "step": 43776 }, { "epoch": 0.8002083828394904, "grad_norm": 7.028497937664179, "learning_rate": 1.0108774874546505e-06, "loss": 17.5481, "step": 43777 }, { "epoch": 0.800226662035937, "grad_norm": 6.521613520929639, "learning_rate": 1.0106990321203013e-06, "loss": 17.5056, "step": 43778 }, { "epoch": 0.8002449412323834, "grad_norm": 7.751314984999369, "learning_rate": 1.0105205907681075e-06, "loss": 17.9032, "step": 43779 }, { "epoch": 0.8002632204288299, "grad_norm": 7.238429682592718, "learning_rate": 1.0103421633986925e-06, "loss": 17.8217, "step": 43780 }, { "epoch": 0.8002814996252765, "grad_norm": 5.737874449738185, "learning_rate": 1.010163750012681e-06, "loss": 17.2171, "step": 43781 }, { "epoch": 0.800299778821723, "grad_norm": 5.468540190907995, "learning_rate": 1.0099853506106988e-06, "loss": 17.1612, "step": 43782 }, { "epoch": 0.8003180580181696, "grad_norm": 5.945663595180563, "learning_rate": 1.0098069651933734e-06, "loss": 17.5658, "step": 43783 }, { "epoch": 0.8003363372146161, "grad_norm": 5.447784710088978, "learning_rate": 1.009628593761327e-06, "loss": 17.0488, "step": 43784 }, { "epoch": 0.8003546164110625, "grad_norm": 6.809428975059103, "learning_rate": 1.0094502363151877e-06, "loss": 17.6266, "step": 43785 }, { "epoch": 0.8003728956075091, "grad_norm": 6.576645625571224, "learning_rate": 1.0092718928555784e-06, "loss": 17.3189, "step": 43786 }, { "epoch": 0.8003911748039556, "grad_norm": 6.007953464089614, "learning_rate": 1.0090935633831239e-06, "loss": 17.2786, "step": 43787 }, { "epoch": 0.8004094540004021, "grad_norm": 5.81206629067377, "learning_rate": 1.0089152478984516e-06, "loss": 17.1425, "step": 43788 }, { "epoch": 0.8004277331968487, "grad_norm": 5.374848293829785, "learning_rate": 1.0087369464021835e-06, "loss": 17.321, "step": 43789 }, { "epoch": 0.8004460123932952, "grad_norm": 5.467003849021251, "learning_rate": 1.008558658894947e-06, "loss": 16.919, "step": 43790 }, { "epoch": 0.8004642915897417, "grad_norm": 6.569708564913264, "learning_rate": 1.0083803853773655e-06, "loss": 17.6664, "step": 43791 }, { "epoch": 0.8004825707861882, "grad_norm": 7.18910618684208, "learning_rate": 1.0082021258500635e-06, "loss": 17.4508, "step": 43792 }, { "epoch": 0.8005008499826347, "grad_norm": 6.042621410342787, "learning_rate": 1.0080238803136687e-06, "loss": 17.3144, "step": 43793 }, { "epoch": 0.8005191291790813, "grad_norm": 5.033091537624904, "learning_rate": 1.0078456487688038e-06, "loss": 16.8618, "step": 43794 }, { "epoch": 0.8005374083755278, "grad_norm": 7.483478287077619, "learning_rate": 1.0076674312160917e-06, "loss": 17.4647, "step": 43795 }, { "epoch": 0.8005556875719744, "grad_norm": 6.828746988534529, "learning_rate": 1.0074892276561605e-06, "loss": 17.9107, "step": 43796 }, { "epoch": 0.8005739667684209, "grad_norm": 6.298679886264835, "learning_rate": 1.007311038089633e-06, "loss": 17.0197, "step": 43797 }, { "epoch": 0.8005922459648673, "grad_norm": 5.557822910185598, "learning_rate": 1.0071328625171323e-06, "loss": 16.9757, "step": 43798 }, { "epoch": 0.8006105251613139, "grad_norm": 7.351851596685847, "learning_rate": 1.0069547009392855e-06, "loss": 17.8118, "step": 43799 }, { "epoch": 0.8006288043577604, "grad_norm": 5.80417553065374, "learning_rate": 1.0067765533567152e-06, "loss": 17.1243, "step": 43800 }, { "epoch": 0.800647083554207, "grad_norm": 5.972787663700388, "learning_rate": 1.0065984197700469e-06, "loss": 17.0336, "step": 43801 }, { "epoch": 0.8006653627506535, "grad_norm": 6.681772657209969, "learning_rate": 1.006420300179905e-06, "loss": 17.552, "step": 43802 }, { "epoch": 0.8006836419471, "grad_norm": 7.052839180164388, "learning_rate": 1.006242194586914e-06, "loss": 17.7641, "step": 43803 }, { "epoch": 0.8007019211435465, "grad_norm": 6.813697836179422, "learning_rate": 1.0060641029916962e-06, "loss": 17.4405, "step": 43804 }, { "epoch": 0.800720200339993, "grad_norm": 6.941685868020219, "learning_rate": 1.0058860253948783e-06, "loss": 17.4609, "step": 43805 }, { "epoch": 0.8007384795364396, "grad_norm": 7.41772478246819, "learning_rate": 1.0057079617970817e-06, "loss": 18.0662, "step": 43806 }, { "epoch": 0.8007567587328861, "grad_norm": 6.650251185069258, "learning_rate": 1.0055299121989338e-06, "loss": 17.5316, "step": 43807 }, { "epoch": 0.8007750379293326, "grad_norm": 6.076947039678101, "learning_rate": 1.0053518766010555e-06, "loss": 17.3701, "step": 43808 }, { "epoch": 0.8007933171257792, "grad_norm": 6.039608603762167, "learning_rate": 1.0051738550040734e-06, "loss": 17.1286, "step": 43809 }, { "epoch": 0.8008115963222256, "grad_norm": 6.955459028461192, "learning_rate": 1.004995847408609e-06, "loss": 17.7729, "step": 43810 }, { "epoch": 0.8008298755186722, "grad_norm": 4.830455907692905, "learning_rate": 1.004817853815289e-06, "loss": 17.0309, "step": 43811 }, { "epoch": 0.8008481547151187, "grad_norm": 5.3901413555624, "learning_rate": 1.0046398742247349e-06, "loss": 16.9315, "step": 43812 }, { "epoch": 0.8008664339115652, "grad_norm": 7.213290056656428, "learning_rate": 1.0044619086375718e-06, "loss": 17.8685, "step": 43813 }, { "epoch": 0.8008847131080118, "grad_norm": 8.02827234238506, "learning_rate": 1.0042839570544233e-06, "loss": 17.5832, "step": 43814 }, { "epoch": 0.8009029923044583, "grad_norm": 6.833619810322121, "learning_rate": 1.0041060194759118e-06, "loss": 17.6173, "step": 43815 }, { "epoch": 0.8009212715009049, "grad_norm": 6.447591452506053, "learning_rate": 1.0039280959026627e-06, "loss": 17.177, "step": 43816 }, { "epoch": 0.8009395506973513, "grad_norm": 5.067950935242978, "learning_rate": 1.0037501863352977e-06, "loss": 16.9073, "step": 43817 }, { "epoch": 0.8009578298937978, "grad_norm": 6.570803190262346, "learning_rate": 1.0035722907744417e-06, "loss": 17.3248, "step": 43818 }, { "epoch": 0.8009761090902444, "grad_norm": 5.189701606181141, "learning_rate": 1.0033944092207194e-06, "loss": 17.0589, "step": 43819 }, { "epoch": 0.8009943882866909, "grad_norm": 6.793277886528245, "learning_rate": 1.0032165416747513e-06, "loss": 17.5394, "step": 43820 }, { "epoch": 0.8010126674831375, "grad_norm": 5.516782525521175, "learning_rate": 1.003038688137164e-06, "loss": 17.1483, "step": 43821 }, { "epoch": 0.801030946679584, "grad_norm": 5.954519867022387, "learning_rate": 1.002860848608579e-06, "loss": 17.128, "step": 43822 }, { "epoch": 0.8010492258760304, "grad_norm": 5.294317435958638, "learning_rate": 1.002683023089619e-06, "loss": 16.9757, "step": 43823 }, { "epoch": 0.801067505072477, "grad_norm": 5.56751182797377, "learning_rate": 1.0025052115809086e-06, "loss": 17.0626, "step": 43824 }, { "epoch": 0.8010857842689235, "grad_norm": 7.475993313537918, "learning_rate": 1.0023274140830713e-06, "loss": 17.954, "step": 43825 }, { "epoch": 0.8011040634653701, "grad_norm": 7.563703763331073, "learning_rate": 1.0021496305967283e-06, "loss": 18.0018, "step": 43826 }, { "epoch": 0.8011223426618166, "grad_norm": 6.666014828144664, "learning_rate": 1.0019718611225032e-06, "loss": 17.5269, "step": 43827 }, { "epoch": 0.8011406218582631, "grad_norm": 6.358601581672098, "learning_rate": 1.0017941056610213e-06, "loss": 17.5776, "step": 43828 }, { "epoch": 0.8011589010547097, "grad_norm": 6.268612908434563, "learning_rate": 1.0016163642129034e-06, "loss": 17.654, "step": 43829 }, { "epoch": 0.8011771802511561, "grad_norm": 7.631868670658687, "learning_rate": 1.0014386367787738e-06, "loss": 17.5554, "step": 43830 }, { "epoch": 0.8011954594476027, "grad_norm": 6.275542956336179, "learning_rate": 1.0012609233592551e-06, "loss": 17.4745, "step": 43831 }, { "epoch": 0.8012137386440492, "grad_norm": 6.226718315361537, "learning_rate": 1.0010832239549683e-06, "loss": 17.2866, "step": 43832 }, { "epoch": 0.8012320178404957, "grad_norm": 6.672008417165564, "learning_rate": 1.0009055385665395e-06, "loss": 17.6215, "step": 43833 }, { "epoch": 0.8012502970369423, "grad_norm": 5.078016367093074, "learning_rate": 1.0007278671945897e-06, "loss": 16.9524, "step": 43834 }, { "epoch": 0.8012685762333888, "grad_norm": 7.557201112056803, "learning_rate": 1.0005502098397401e-06, "loss": 17.6101, "step": 43835 }, { "epoch": 0.8012868554298354, "grad_norm": 7.26921718349192, "learning_rate": 1.0003725665026154e-06, "loss": 17.5651, "step": 43836 }, { "epoch": 0.8013051346262818, "grad_norm": 7.1988152794450215, "learning_rate": 1.0001949371838376e-06, "loss": 17.5218, "step": 43837 }, { "epoch": 0.8013234138227283, "grad_norm": 6.12864840204441, "learning_rate": 1.0000173218840303e-06, "loss": 17.4057, "step": 43838 }, { "epoch": 0.8013416930191749, "grad_norm": 5.963285800453542, "learning_rate": 9.998397206038157e-07, "loss": 17.0973, "step": 43839 }, { "epoch": 0.8013599722156214, "grad_norm": 5.932561329799355, "learning_rate": 9.996621333438144e-07, "loss": 16.9276, "step": 43840 }, { "epoch": 0.801378251412068, "grad_norm": 5.680549545519881, "learning_rate": 9.994845601046516e-07, "loss": 17.2271, "step": 43841 }, { "epoch": 0.8013965306085145, "grad_norm": 6.311757826109113, "learning_rate": 9.993070008869476e-07, "loss": 17.2482, "step": 43842 }, { "epoch": 0.8014148098049609, "grad_norm": 5.209852551947522, "learning_rate": 9.991294556913251e-07, "loss": 16.8244, "step": 43843 }, { "epoch": 0.8014330890014075, "grad_norm": 5.894277962438656, "learning_rate": 9.98951924518407e-07, "loss": 17.2295, "step": 43844 }, { "epoch": 0.801451368197854, "grad_norm": 5.664366950474225, "learning_rate": 9.987744073688143e-07, "loss": 17.2958, "step": 43845 }, { "epoch": 0.8014696473943006, "grad_norm": 5.474332877562862, "learning_rate": 9.985969042431703e-07, "loss": 17.1175, "step": 43846 }, { "epoch": 0.8014879265907471, "grad_norm": 8.384537990135776, "learning_rate": 9.984194151420983e-07, "loss": 18.4219, "step": 43847 }, { "epoch": 0.8015062057871936, "grad_norm": 6.074457635752288, "learning_rate": 9.982419400662186e-07, "loss": 17.2203, "step": 43848 }, { "epoch": 0.8015244849836402, "grad_norm": 5.027347122238254, "learning_rate": 9.980644790161525e-07, "loss": 16.7316, "step": 43849 }, { "epoch": 0.8015427641800866, "grad_norm": 5.236717403508433, "learning_rate": 9.97887031992525e-07, "loss": 17.0517, "step": 43850 }, { "epoch": 0.8015610433765332, "grad_norm": 6.915837848539384, "learning_rate": 9.97709598995954e-07, "loss": 17.5213, "step": 43851 }, { "epoch": 0.8015793225729797, "grad_norm": 7.137806777693152, "learning_rate": 9.975321800270655e-07, "loss": 17.7513, "step": 43852 }, { "epoch": 0.8015976017694262, "grad_norm": 5.106642006478907, "learning_rate": 9.973547750864788e-07, "loss": 16.7234, "step": 43853 }, { "epoch": 0.8016158809658728, "grad_norm": 5.606087173787201, "learning_rate": 9.97177384174815e-07, "loss": 17.1831, "step": 43854 }, { "epoch": 0.8016341601623193, "grad_norm": 6.621813512414418, "learning_rate": 9.970000072926978e-07, "loss": 17.3239, "step": 43855 }, { "epoch": 0.8016524393587657, "grad_norm": 7.907622917604247, "learning_rate": 9.968226444407486e-07, "loss": 18.1139, "step": 43856 }, { "epoch": 0.8016707185552123, "grad_norm": 6.250324233966483, "learning_rate": 9.966452956195883e-07, "loss": 17.2653, "step": 43857 }, { "epoch": 0.8016889977516588, "grad_norm": 6.074678717480343, "learning_rate": 9.964679608298394e-07, "loss": 17.4376, "step": 43858 }, { "epoch": 0.8017072769481054, "grad_norm": 9.129456447024655, "learning_rate": 9.962906400721234e-07, "loss": 17.6325, "step": 43859 }, { "epoch": 0.8017255561445519, "grad_norm": 6.655028593399852, "learning_rate": 9.961133333470595e-07, "loss": 17.8862, "step": 43860 }, { "epoch": 0.8017438353409984, "grad_norm": 6.029655974029754, "learning_rate": 9.959360406552727e-07, "loss": 17.3194, "step": 43861 }, { "epoch": 0.801762114537445, "grad_norm": 6.036280646776968, "learning_rate": 9.957587619973824e-07, "loss": 17.3134, "step": 43862 }, { "epoch": 0.8017803937338914, "grad_norm": 6.12022714595388, "learning_rate": 9.955814973740086e-07, "loss": 17.6745, "step": 43863 }, { "epoch": 0.801798672930338, "grad_norm": 5.565180330694607, "learning_rate": 9.954042467857744e-07, "loss": 17.2518, "step": 43864 }, { "epoch": 0.8018169521267845, "grad_norm": 8.032074969619527, "learning_rate": 9.952270102333012e-07, "loss": 18.1511, "step": 43865 }, { "epoch": 0.801835231323231, "grad_norm": 6.908852660518813, "learning_rate": 9.950497877172104e-07, "loss": 17.5261, "step": 43866 }, { "epoch": 0.8018535105196776, "grad_norm": 5.8497902013362575, "learning_rate": 9.948725792381231e-07, "loss": 17.4688, "step": 43867 }, { "epoch": 0.801871789716124, "grad_norm": 5.531928600162035, "learning_rate": 9.946953847966584e-07, "loss": 17.3208, "step": 43868 }, { "epoch": 0.8018900689125706, "grad_norm": 6.668635029093718, "learning_rate": 9.945182043934397e-07, "loss": 17.5617, "step": 43869 }, { "epoch": 0.8019083481090171, "grad_norm": 6.618989860737753, "learning_rate": 9.943410380290875e-07, "loss": 17.5788, "step": 43870 }, { "epoch": 0.8019266273054636, "grad_norm": 6.7125321772614965, "learning_rate": 9.941638857042212e-07, "loss": 17.6211, "step": 43871 }, { "epoch": 0.8019449065019102, "grad_norm": 6.988621364071142, "learning_rate": 9.939867474194643e-07, "loss": 17.8888, "step": 43872 }, { "epoch": 0.8019631856983567, "grad_norm": 5.38591282084931, "learning_rate": 9.938096231754347e-07, "loss": 17.0711, "step": 43873 }, { "epoch": 0.8019814648948033, "grad_norm": 6.030309145406148, "learning_rate": 9.936325129727541e-07, "loss": 17.3081, "step": 43874 }, { "epoch": 0.8019997440912497, "grad_norm": 6.618737793722778, "learning_rate": 9.934554168120458e-07, "loss": 17.3269, "step": 43875 }, { "epoch": 0.8020180232876962, "grad_norm": 7.025570871771947, "learning_rate": 9.93278334693929e-07, "loss": 17.3039, "step": 43876 }, { "epoch": 0.8020363024841428, "grad_norm": 6.459806457995816, "learning_rate": 9.93101266619022e-07, "loss": 17.4931, "step": 43877 }, { "epoch": 0.8020545816805893, "grad_norm": 7.299486020030274, "learning_rate": 9.92924212587949e-07, "loss": 17.88, "step": 43878 }, { "epoch": 0.8020728608770359, "grad_norm": 5.372212483121452, "learning_rate": 9.927471726013289e-07, "loss": 17.3221, "step": 43879 }, { "epoch": 0.8020911400734824, "grad_norm": 6.365725701369935, "learning_rate": 9.925701466597804e-07, "loss": 17.585, "step": 43880 }, { "epoch": 0.8021094192699288, "grad_norm": 5.719620261873268, "learning_rate": 9.923931347639264e-07, "loss": 17.1961, "step": 43881 }, { "epoch": 0.8021276984663754, "grad_norm": 6.148043464525454, "learning_rate": 9.922161369143878e-07, "loss": 17.2319, "step": 43882 }, { "epoch": 0.8021459776628219, "grad_norm": 5.286311666327453, "learning_rate": 9.920391531117823e-07, "loss": 17.1601, "step": 43883 }, { "epoch": 0.8021642568592685, "grad_norm": 7.959894325263251, "learning_rate": 9.918621833567338e-07, "loss": 18.2209, "step": 43884 }, { "epoch": 0.802182536055715, "grad_norm": 5.439421057177491, "learning_rate": 9.916852276498583e-07, "loss": 17.15, "step": 43885 }, { "epoch": 0.8022008152521615, "grad_norm": 6.143675539218404, "learning_rate": 9.915082859917801e-07, "loss": 17.6273, "step": 43886 }, { "epoch": 0.8022190944486081, "grad_norm": 5.392217213208532, "learning_rate": 9.913313583831174e-07, "loss": 17.0312, "step": 43887 }, { "epoch": 0.8022373736450545, "grad_norm": 5.340601845901724, "learning_rate": 9.911544448244892e-07, "loss": 17.1381, "step": 43888 }, { "epoch": 0.8022556528415011, "grad_norm": 4.705935827400525, "learning_rate": 9.909775453165183e-07, "loss": 16.8534, "step": 43889 }, { "epoch": 0.8022739320379476, "grad_norm": 6.414015041850166, "learning_rate": 9.90800659859822e-07, "loss": 17.2992, "step": 43890 }, { "epoch": 0.8022922112343941, "grad_norm": 6.634327187773933, "learning_rate": 9.906237884550208e-07, "loss": 17.4408, "step": 43891 }, { "epoch": 0.8023104904308407, "grad_norm": 6.851928407948582, "learning_rate": 9.90446931102737e-07, "loss": 17.6577, "step": 43892 }, { "epoch": 0.8023287696272872, "grad_norm": 5.564167959320792, "learning_rate": 9.902700878035887e-07, "loss": 16.7572, "step": 43893 }, { "epoch": 0.8023470488237338, "grad_norm": 6.267467935289678, "learning_rate": 9.900932585581947e-07, "loss": 17.5846, "step": 43894 }, { "epoch": 0.8023653280201802, "grad_norm": 6.130698261890216, "learning_rate": 9.89916443367177e-07, "loss": 17.3592, "step": 43895 }, { "epoch": 0.8023836072166267, "grad_norm": 5.305660283299622, "learning_rate": 9.897396422311528e-07, "loss": 17.0875, "step": 43896 }, { "epoch": 0.8024018864130733, "grad_norm": 7.065217993858596, "learning_rate": 9.895628551507446e-07, "loss": 17.8657, "step": 43897 }, { "epoch": 0.8024201656095198, "grad_norm": 7.075243656912415, "learning_rate": 9.893860821265706e-07, "loss": 18.0046, "step": 43898 }, { "epoch": 0.8024384448059664, "grad_norm": 6.989514562844967, "learning_rate": 9.892093231592486e-07, "loss": 17.5972, "step": 43899 }, { "epoch": 0.8024567240024129, "grad_norm": 5.067498624442654, "learning_rate": 9.890325782494004e-07, "loss": 16.8557, "step": 43900 }, { "epoch": 0.8024750031988593, "grad_norm": 5.864767167306625, "learning_rate": 9.888558473976462e-07, "loss": 17.3493, "step": 43901 }, { "epoch": 0.8024932823953059, "grad_norm": 6.7252591544455305, "learning_rate": 9.886791306046029e-07, "loss": 17.4685, "step": 43902 }, { "epoch": 0.8025115615917524, "grad_norm": 10.853346302070355, "learning_rate": 9.885024278708921e-07, "loss": 17.9675, "step": 43903 }, { "epoch": 0.802529840788199, "grad_norm": 6.881170635596119, "learning_rate": 9.88325739197133e-07, "loss": 17.5859, "step": 43904 }, { "epoch": 0.8025481199846455, "grad_norm": 7.817618843308452, "learning_rate": 9.88149064583942e-07, "loss": 17.5183, "step": 43905 }, { "epoch": 0.802566399181092, "grad_norm": 6.5604753787968555, "learning_rate": 9.879724040319416e-07, "loss": 17.6719, "step": 43906 }, { "epoch": 0.8025846783775386, "grad_norm": 13.55593040744057, "learning_rate": 9.877957575417503e-07, "loss": 17.6608, "step": 43907 }, { "epoch": 0.802602957573985, "grad_norm": 7.02297318645941, "learning_rate": 9.876191251139855e-07, "loss": 17.7921, "step": 43908 }, { "epoch": 0.8026212367704316, "grad_norm": 10.993074350251538, "learning_rate": 9.87442506749267e-07, "loss": 18.6341, "step": 43909 }, { "epoch": 0.8026395159668781, "grad_norm": 8.186297591335963, "learning_rate": 9.872659024482156e-07, "loss": 18.3363, "step": 43910 }, { "epoch": 0.8026577951633246, "grad_norm": 5.036406115180108, "learning_rate": 9.870893122114482e-07, "loss": 17.0617, "step": 43911 }, { "epoch": 0.8026760743597712, "grad_norm": 6.129386276813603, "learning_rate": 9.869127360395853e-07, "loss": 17.4126, "step": 43912 }, { "epoch": 0.8026943535562177, "grad_norm": 6.304163204408193, "learning_rate": 9.867361739332442e-07, "loss": 17.317, "step": 43913 }, { "epoch": 0.8027126327526642, "grad_norm": 6.007788229533479, "learning_rate": 9.865596258930454e-07, "loss": 17.553, "step": 43914 }, { "epoch": 0.8027309119491107, "grad_norm": 6.191544695072912, "learning_rate": 9.863830919196071e-07, "loss": 17.2068, "step": 43915 }, { "epoch": 0.8027491911455572, "grad_norm": 5.934848317206477, "learning_rate": 9.862065720135467e-07, "loss": 17.0218, "step": 43916 }, { "epoch": 0.8027674703420038, "grad_norm": 6.1450378210681045, "learning_rate": 9.86030066175485e-07, "loss": 17.5397, "step": 43917 }, { "epoch": 0.8027857495384503, "grad_norm": 6.434676661353877, "learning_rate": 9.858535744060383e-07, "loss": 17.7431, "step": 43918 }, { "epoch": 0.8028040287348969, "grad_norm": 5.815967137603755, "learning_rate": 9.856770967058265e-07, "loss": 17.286, "step": 43919 }, { "epoch": 0.8028223079313433, "grad_norm": 7.438633611585014, "learning_rate": 9.855006330754696e-07, "loss": 17.9232, "step": 43920 }, { "epoch": 0.8028405871277898, "grad_norm": 5.370126000234228, "learning_rate": 9.853241835155842e-07, "loss": 17.0741, "step": 43921 }, { "epoch": 0.8028588663242364, "grad_norm": 7.3018636527654195, "learning_rate": 9.85147748026788e-07, "loss": 17.7308, "step": 43922 }, { "epoch": 0.8028771455206829, "grad_norm": 7.05513115304553, "learning_rate": 9.849713266097022e-07, "loss": 17.43, "step": 43923 }, { "epoch": 0.8028954247171294, "grad_norm": 5.774115881802276, "learning_rate": 9.84794919264943e-07, "loss": 17.1971, "step": 43924 }, { "epoch": 0.802913703913576, "grad_norm": 6.249530972716593, "learning_rate": 9.846185259931284e-07, "loss": 17.4222, "step": 43925 }, { "epoch": 0.8029319831100225, "grad_norm": 6.998054740198373, "learning_rate": 9.844421467948784e-07, "loss": 17.8898, "step": 43926 }, { "epoch": 0.802950262306469, "grad_norm": 5.904302513057738, "learning_rate": 9.84265781670809e-07, "loss": 17.2116, "step": 43927 }, { "epoch": 0.8029685415029155, "grad_norm": 6.708196051801731, "learning_rate": 9.840894306215392e-07, "loss": 17.6978, "step": 43928 }, { "epoch": 0.802986820699362, "grad_norm": 5.087674897444216, "learning_rate": 9.839130936476892e-07, "loss": 17.1177, "step": 43929 }, { "epoch": 0.8030050998958086, "grad_norm": 4.969232226436822, "learning_rate": 9.83736770749874e-07, "loss": 16.9522, "step": 43930 }, { "epoch": 0.8030233790922551, "grad_norm": 5.9289516387974865, "learning_rate": 9.835604619287143e-07, "loss": 17.3574, "step": 43931 }, { "epoch": 0.8030416582887017, "grad_norm": 6.710182853371984, "learning_rate": 9.833841671848266e-07, "loss": 17.7098, "step": 43932 }, { "epoch": 0.8030599374851481, "grad_norm": 5.4407044745413575, "learning_rate": 9.832078865188271e-07, "loss": 17.1374, "step": 43933 }, { "epoch": 0.8030782166815946, "grad_norm": 5.530347837511452, "learning_rate": 9.830316199313373e-07, "loss": 17.3767, "step": 43934 }, { "epoch": 0.8030964958780412, "grad_norm": 8.414197980555867, "learning_rate": 9.828553674229713e-07, "loss": 18.6955, "step": 43935 }, { "epoch": 0.8031147750744877, "grad_norm": 5.67296138270758, "learning_rate": 9.826791289943504e-07, "loss": 17.1385, "step": 43936 }, { "epoch": 0.8031330542709343, "grad_norm": 6.976945421399322, "learning_rate": 9.825029046460888e-07, "loss": 17.7431, "step": 43937 }, { "epoch": 0.8031513334673808, "grad_norm": 5.7611497681124195, "learning_rate": 9.823266943788073e-07, "loss": 17.2913, "step": 43938 }, { "epoch": 0.8031696126638272, "grad_norm": 9.148244905315119, "learning_rate": 9.821504981931213e-07, "loss": 17.2328, "step": 43939 }, { "epoch": 0.8031878918602738, "grad_norm": 6.9499437758292775, "learning_rate": 9.8197431608965e-07, "loss": 17.4197, "step": 43940 }, { "epoch": 0.8032061710567203, "grad_norm": 5.379973508128771, "learning_rate": 9.817981480690098e-07, "loss": 17.1813, "step": 43941 }, { "epoch": 0.8032244502531669, "grad_norm": 5.245527817609659, "learning_rate": 9.816219941318178e-07, "loss": 17.0304, "step": 43942 }, { "epoch": 0.8032427294496134, "grad_norm": 6.899168012313868, "learning_rate": 9.814458542786926e-07, "loss": 17.4386, "step": 43943 }, { "epoch": 0.8032610086460599, "grad_norm": 7.6534118260962805, "learning_rate": 9.812697285102502e-07, "loss": 17.5514, "step": 43944 }, { "epoch": 0.8032792878425065, "grad_norm": 5.775641377953053, "learning_rate": 9.810936168271086e-07, "loss": 17.1944, "step": 43945 }, { "epoch": 0.8032975670389529, "grad_norm": 6.182623816425769, "learning_rate": 9.80917519229886e-07, "loss": 17.2137, "step": 43946 }, { "epoch": 0.8033158462353995, "grad_norm": 6.240523978620345, "learning_rate": 9.80741435719198e-07, "loss": 17.1385, "step": 43947 }, { "epoch": 0.803334125431846, "grad_norm": 5.107758357887759, "learning_rate": 9.805653662956633e-07, "loss": 17.003, "step": 43948 }, { "epoch": 0.8033524046282925, "grad_norm": 6.985327373649481, "learning_rate": 9.803893109598983e-07, "loss": 17.1966, "step": 43949 }, { "epoch": 0.8033706838247391, "grad_norm": 5.695367267187749, "learning_rate": 9.80213269712519e-07, "loss": 17.0761, "step": 43950 }, { "epoch": 0.8033889630211856, "grad_norm": 6.020898852603209, "learning_rate": 9.800372425541443e-07, "loss": 17.3152, "step": 43951 }, { "epoch": 0.8034072422176322, "grad_norm": 7.46481307631092, "learning_rate": 9.798612294853903e-07, "loss": 17.9351, "step": 43952 }, { "epoch": 0.8034255214140786, "grad_norm": 6.723221898075943, "learning_rate": 9.796852305068727e-07, "loss": 17.272, "step": 43953 }, { "epoch": 0.8034438006105251, "grad_norm": 6.913651857562805, "learning_rate": 9.795092456192095e-07, "loss": 18.0362, "step": 43954 }, { "epoch": 0.8034620798069717, "grad_norm": 8.909060589547153, "learning_rate": 9.793332748230188e-07, "loss": 18.3884, "step": 43955 }, { "epoch": 0.8034803590034182, "grad_norm": 6.972391973425957, "learning_rate": 9.791573181189146e-07, "loss": 17.5206, "step": 43956 }, { "epoch": 0.8034986381998648, "grad_norm": 5.99319904892473, "learning_rate": 9.789813755075167e-07, "loss": 17.4983, "step": 43957 }, { "epoch": 0.8035169173963113, "grad_norm": 6.647903021157211, "learning_rate": 9.788054469894388e-07, "loss": 17.4474, "step": 43958 }, { "epoch": 0.8035351965927577, "grad_norm": 7.135884348137363, "learning_rate": 9.786295325652994e-07, "loss": 17.4249, "step": 43959 }, { "epoch": 0.8035534757892043, "grad_norm": 8.789635516466317, "learning_rate": 9.784536322357153e-07, "loss": 18.6005, "step": 43960 }, { "epoch": 0.8035717549856508, "grad_norm": 5.529931826050675, "learning_rate": 9.78277746001301e-07, "loss": 17.2119, "step": 43961 }, { "epoch": 0.8035900341820974, "grad_norm": 7.267756794077778, "learning_rate": 9.781018738626752e-07, "loss": 17.3818, "step": 43962 }, { "epoch": 0.8036083133785439, "grad_norm": 6.670986703992469, "learning_rate": 9.779260158204522e-07, "loss": 17.0592, "step": 43963 }, { "epoch": 0.8036265925749904, "grad_norm": 5.7474764750379705, "learning_rate": 9.77750171875249e-07, "loss": 17.2275, "step": 43964 }, { "epoch": 0.803644871771437, "grad_norm": 4.935502429150306, "learning_rate": 9.775743420276845e-07, "loss": 17.2457, "step": 43965 }, { "epoch": 0.8036631509678834, "grad_norm": 5.799583489118141, "learning_rate": 9.773985262783724e-07, "loss": 17.4091, "step": 43966 }, { "epoch": 0.80368143016433, "grad_norm": 6.42088226152061, "learning_rate": 9.772227246279282e-07, "loss": 17.597, "step": 43967 }, { "epoch": 0.8036997093607765, "grad_norm": 7.553363500029191, "learning_rate": 9.770469370769708e-07, "loss": 17.8721, "step": 43968 }, { "epoch": 0.803717988557223, "grad_norm": 5.88289434562988, "learning_rate": 9.768711636261147e-07, "loss": 17.0075, "step": 43969 }, { "epoch": 0.8037362677536696, "grad_norm": 6.642256806111356, "learning_rate": 9.76695404275974e-07, "loss": 17.6099, "step": 43970 }, { "epoch": 0.8037545469501161, "grad_norm": 5.2389477782716645, "learning_rate": 9.765196590271692e-07, "loss": 17.1328, "step": 43971 }, { "epoch": 0.8037728261465626, "grad_norm": 6.844928151875909, "learning_rate": 9.76343927880312e-07, "loss": 17.4758, "step": 43972 }, { "epoch": 0.8037911053430091, "grad_norm": 5.924763977884043, "learning_rate": 9.761682108360204e-07, "loss": 17.3729, "step": 43973 }, { "epoch": 0.8038093845394556, "grad_norm": 7.681847704595155, "learning_rate": 9.759925078949111e-07, "loss": 18.1902, "step": 43974 }, { "epoch": 0.8038276637359022, "grad_norm": 5.164528161032752, "learning_rate": 9.758168190575974e-07, "loss": 17.084, "step": 43975 }, { "epoch": 0.8038459429323487, "grad_norm": 6.467285753328045, "learning_rate": 9.756411443246982e-07, "loss": 17.3522, "step": 43976 }, { "epoch": 0.8038642221287953, "grad_norm": 7.460450060692017, "learning_rate": 9.754654836968274e-07, "loss": 17.2934, "step": 43977 }, { "epoch": 0.8038825013252417, "grad_norm": 8.724594443299234, "learning_rate": 9.752898371745994e-07, "loss": 18.8312, "step": 43978 }, { "epoch": 0.8039007805216882, "grad_norm": 5.96498967575878, "learning_rate": 9.751142047586333e-07, "loss": 17.2386, "step": 43979 }, { "epoch": 0.8039190597181348, "grad_norm": 5.326756710983685, "learning_rate": 9.74938586449542e-07, "loss": 16.9751, "step": 43980 }, { "epoch": 0.8039373389145813, "grad_norm": 6.242026735490989, "learning_rate": 9.747629822479405e-07, "loss": 17.1604, "step": 43981 }, { "epoch": 0.8039556181110279, "grad_norm": 7.54472137406532, "learning_rate": 9.745873921544458e-07, "loss": 17.9099, "step": 43982 }, { "epoch": 0.8039738973074744, "grad_norm": 11.404908348341655, "learning_rate": 9.744118161696736e-07, "loss": 17.2509, "step": 43983 }, { "epoch": 0.8039921765039209, "grad_norm": 6.845451675794641, "learning_rate": 9.74236254294238e-07, "loss": 17.4435, "step": 43984 }, { "epoch": 0.8040104557003674, "grad_norm": 5.354163522244539, "learning_rate": 9.740607065287555e-07, "loss": 17.0387, "step": 43985 }, { "epoch": 0.8040287348968139, "grad_norm": 5.919167373096269, "learning_rate": 9.738851728738419e-07, "loss": 16.9557, "step": 43986 }, { "epoch": 0.8040470140932605, "grad_norm": 7.797948766905122, "learning_rate": 9.737096533301093e-07, "loss": 17.804, "step": 43987 }, { "epoch": 0.804065293289707, "grad_norm": 6.101852270243277, "learning_rate": 9.735341478981763e-07, "loss": 17.4198, "step": 43988 }, { "epoch": 0.8040835724861535, "grad_norm": 5.82607237884925, "learning_rate": 9.733586565786557e-07, "loss": 17.285, "step": 43989 }, { "epoch": 0.8041018516826001, "grad_norm": 6.904203865100147, "learning_rate": 9.731831793721652e-07, "loss": 17.5664, "step": 43990 }, { "epoch": 0.8041201308790465, "grad_norm": 7.29940463186674, "learning_rate": 9.730077162793166e-07, "loss": 17.4516, "step": 43991 }, { "epoch": 0.804138410075493, "grad_norm": 5.976201252153256, "learning_rate": 9.728322673007262e-07, "loss": 17.133, "step": 43992 }, { "epoch": 0.8041566892719396, "grad_norm": 5.576578108692254, "learning_rate": 9.726568324370111e-07, "loss": 17.2444, "step": 43993 }, { "epoch": 0.8041749684683861, "grad_norm": 5.48343102913107, "learning_rate": 9.724814116887837e-07, "loss": 17.0029, "step": 43994 }, { "epoch": 0.8041932476648327, "grad_norm": 5.138039645571571, "learning_rate": 9.723060050566592e-07, "loss": 17.1138, "step": 43995 }, { "epoch": 0.8042115268612792, "grad_norm": 6.405010640190571, "learning_rate": 9.721306125412528e-07, "loss": 17.4851, "step": 43996 }, { "epoch": 0.8042298060577256, "grad_norm": 5.113632737288648, "learning_rate": 9.7195523414318e-07, "loss": 16.8857, "step": 43997 }, { "epoch": 0.8042480852541722, "grad_norm": 6.780066022640014, "learning_rate": 9.717798698630525e-07, "loss": 17.9858, "step": 43998 }, { "epoch": 0.8042663644506187, "grad_norm": 5.125485085319073, "learning_rate": 9.716045197014885e-07, "loss": 17.0033, "step": 43999 }, { "epoch": 0.8042846436470653, "grad_norm": 5.641920508282216, "learning_rate": 9.714291836591e-07, "loss": 17.1928, "step": 44000 }, { "epoch": 0.8043029228435118, "grad_norm": 5.612156961726699, "learning_rate": 9.712538617365025e-07, "loss": 17.0645, "step": 44001 }, { "epoch": 0.8043212020399583, "grad_norm": 6.352729554012089, "learning_rate": 9.71078553934312e-07, "loss": 17.5541, "step": 44002 }, { "epoch": 0.8043394812364049, "grad_norm": 6.669524182555548, "learning_rate": 9.709032602531416e-07, "loss": 17.4507, "step": 44003 }, { "epoch": 0.8043577604328513, "grad_norm": 7.864969974384987, "learning_rate": 9.707279806936043e-07, "loss": 17.946, "step": 44004 }, { "epoch": 0.8043760396292979, "grad_norm": 7.4208518684831715, "learning_rate": 9.705527152563166e-07, "loss": 18.1462, "step": 44005 }, { "epoch": 0.8043943188257444, "grad_norm": 6.698434309706989, "learning_rate": 9.70377463941891e-07, "loss": 17.1054, "step": 44006 }, { "epoch": 0.8044125980221909, "grad_norm": 4.461459697889424, "learning_rate": 9.702022267509441e-07, "loss": 16.6601, "step": 44007 }, { "epoch": 0.8044308772186375, "grad_norm": 6.210315188223773, "learning_rate": 9.700270036840876e-07, "loss": 17.1495, "step": 44008 }, { "epoch": 0.804449156415084, "grad_norm": 6.678201337759166, "learning_rate": 9.698517947419368e-07, "loss": 17.3636, "step": 44009 }, { "epoch": 0.8044674356115306, "grad_norm": 7.39911104798655, "learning_rate": 9.696765999251069e-07, "loss": 17.8664, "step": 44010 }, { "epoch": 0.804485714807977, "grad_norm": 5.626496363193582, "learning_rate": 9.695014192342105e-07, "loss": 16.9903, "step": 44011 }, { "epoch": 0.8045039940044235, "grad_norm": 8.83387935982012, "learning_rate": 9.69326252669861e-07, "loss": 18.3624, "step": 44012 }, { "epoch": 0.8045222732008701, "grad_norm": 5.876440358100214, "learning_rate": 9.691511002326743e-07, "loss": 17.2583, "step": 44013 }, { "epoch": 0.8045405523973166, "grad_norm": 8.511077386680201, "learning_rate": 9.68975961923263e-07, "loss": 18.4396, "step": 44014 }, { "epoch": 0.8045588315937632, "grad_norm": 6.811542299871307, "learning_rate": 9.688008377422403e-07, "loss": 17.5987, "step": 44015 }, { "epoch": 0.8045771107902097, "grad_norm": 5.327677103492919, "learning_rate": 9.686257276902217e-07, "loss": 17.053, "step": 44016 }, { "epoch": 0.8045953899866561, "grad_norm": 6.295500698420299, "learning_rate": 9.68450631767819e-07, "loss": 17.1331, "step": 44017 }, { "epoch": 0.8046136691831027, "grad_norm": 7.328532294817102, "learning_rate": 9.682755499756469e-07, "loss": 17.4003, "step": 44018 }, { "epoch": 0.8046319483795492, "grad_norm": 5.59154246105397, "learning_rate": 9.68100482314321e-07, "loss": 16.8731, "step": 44019 }, { "epoch": 0.8046502275759958, "grad_norm": 5.856369389254863, "learning_rate": 9.679254287844508e-07, "loss": 17.1425, "step": 44020 }, { "epoch": 0.8046685067724423, "grad_norm": 7.7478244781031025, "learning_rate": 9.67750389386654e-07, "loss": 18.2023, "step": 44021 }, { "epoch": 0.8046867859688888, "grad_norm": 6.546343086524779, "learning_rate": 9.675753641215419e-07, "loss": 17.65, "step": 44022 }, { "epoch": 0.8047050651653354, "grad_norm": 6.0121613357547865, "learning_rate": 9.67400352989727e-07, "loss": 17.397, "step": 44023 }, { "epoch": 0.8047233443617818, "grad_norm": 7.154440478257929, "learning_rate": 9.672253559918254e-07, "loss": 17.6393, "step": 44024 }, { "epoch": 0.8047416235582284, "grad_norm": 6.075212658840745, "learning_rate": 9.670503731284493e-07, "loss": 17.3927, "step": 44025 }, { "epoch": 0.8047599027546749, "grad_norm": 7.770505811829407, "learning_rate": 9.668754044002099e-07, "loss": 17.3725, "step": 44026 }, { "epoch": 0.8047781819511214, "grad_norm": 6.531183709207873, "learning_rate": 9.66700449807722e-07, "loss": 17.2766, "step": 44027 }, { "epoch": 0.804796461147568, "grad_norm": 7.025690386486365, "learning_rate": 9.665255093516007e-07, "loss": 17.8128, "step": 44028 }, { "epoch": 0.8048147403440145, "grad_norm": 6.274492437859234, "learning_rate": 9.66350583032456e-07, "loss": 17.3433, "step": 44029 }, { "epoch": 0.804833019540461, "grad_norm": 7.461565499080702, "learning_rate": 9.66175670850904e-07, "loss": 17.7366, "step": 44030 }, { "epoch": 0.8048512987369075, "grad_norm": 6.699114123234889, "learning_rate": 9.66000772807556e-07, "loss": 17.7197, "step": 44031 }, { "epoch": 0.804869577933354, "grad_norm": 6.328368839128375, "learning_rate": 9.658258889030242e-07, "loss": 17.5872, "step": 44032 }, { "epoch": 0.8048878571298006, "grad_norm": 6.052344741006827, "learning_rate": 9.656510191379237e-07, "loss": 17.187, "step": 44033 }, { "epoch": 0.8049061363262471, "grad_norm": 4.969723646258776, "learning_rate": 9.65476163512865e-07, "loss": 16.7735, "step": 44034 }, { "epoch": 0.8049244155226937, "grad_norm": 6.076140234752101, "learning_rate": 9.653013220284635e-07, "loss": 17.1462, "step": 44035 }, { "epoch": 0.8049426947191402, "grad_norm": 5.975788563140169, "learning_rate": 9.651264946853295e-07, "loss": 17.4238, "step": 44036 }, { "epoch": 0.8049609739155866, "grad_norm": 4.95239797769928, "learning_rate": 9.64951681484077e-07, "loss": 16.8974, "step": 44037 }, { "epoch": 0.8049792531120332, "grad_norm": 6.247811398455815, "learning_rate": 9.6477688242532e-07, "loss": 17.1471, "step": 44038 }, { "epoch": 0.8049975323084797, "grad_norm": 7.097690218303863, "learning_rate": 9.646020975096704e-07, "loss": 17.485, "step": 44039 }, { "epoch": 0.8050158115049263, "grad_norm": 6.342473730896126, "learning_rate": 9.64427326737738e-07, "loss": 17.4982, "step": 44040 }, { "epoch": 0.8050340907013728, "grad_norm": 5.492168570450289, "learning_rate": 9.642525701101396e-07, "loss": 17.1661, "step": 44041 }, { "epoch": 0.8050523698978193, "grad_norm": 6.727010425505045, "learning_rate": 9.640778276274853e-07, "loss": 17.3996, "step": 44042 }, { "epoch": 0.8050706490942658, "grad_norm": 6.986809028464563, "learning_rate": 9.639030992903876e-07, "loss": 17.8745, "step": 44043 }, { "epoch": 0.8050889282907123, "grad_norm": 6.7477589844534185, "learning_rate": 9.637283850994594e-07, "loss": 17.4198, "step": 44044 }, { "epoch": 0.8051072074871589, "grad_norm": 5.892169042387466, "learning_rate": 9.635536850553123e-07, "loss": 17.0884, "step": 44045 }, { "epoch": 0.8051254866836054, "grad_norm": 7.010680684775248, "learning_rate": 9.633789991585596e-07, "loss": 17.6869, "step": 44046 }, { "epoch": 0.8051437658800519, "grad_norm": 6.537656384160165, "learning_rate": 9.63204327409814e-07, "loss": 17.4753, "step": 44047 }, { "epoch": 0.8051620450764985, "grad_norm": 5.55419788489768, "learning_rate": 9.63029669809687e-07, "loss": 17.0952, "step": 44048 }, { "epoch": 0.805180324272945, "grad_norm": 7.068984372654761, "learning_rate": 9.628550263587894e-07, "loss": 17.3665, "step": 44049 }, { "epoch": 0.8051986034693915, "grad_norm": 6.919089982885812, "learning_rate": 9.62680397057736e-07, "loss": 17.5832, "step": 44050 }, { "epoch": 0.805216882665838, "grad_norm": 6.047612425605046, "learning_rate": 9.625057819071359e-07, "loss": 17.2762, "step": 44051 }, { "epoch": 0.8052351618622845, "grad_norm": 6.68405103103099, "learning_rate": 9.623311809076042e-07, "loss": 17.9421, "step": 44052 }, { "epoch": 0.8052534410587311, "grad_norm": 6.822517514051907, "learning_rate": 9.621565940597516e-07, "loss": 17.7472, "step": 44053 }, { "epoch": 0.8052717202551776, "grad_norm": 6.419594041639958, "learning_rate": 9.61982021364188e-07, "loss": 17.5402, "step": 44054 }, { "epoch": 0.8052899994516242, "grad_norm": 6.3720436838339305, "learning_rate": 9.618074628215273e-07, "loss": 17.5097, "step": 44055 }, { "epoch": 0.8053082786480706, "grad_norm": 4.898167920864472, "learning_rate": 9.61632918432382e-07, "loss": 16.8418, "step": 44056 }, { "epoch": 0.8053265578445171, "grad_norm": 6.243091823262133, "learning_rate": 9.614583881973615e-07, "loss": 17.4305, "step": 44057 }, { "epoch": 0.8053448370409637, "grad_norm": 6.905197236372417, "learning_rate": 9.612838721170809e-07, "loss": 17.6808, "step": 44058 }, { "epoch": 0.8053631162374102, "grad_norm": 6.634878803023063, "learning_rate": 9.611093701921493e-07, "loss": 17.5526, "step": 44059 }, { "epoch": 0.8053813954338567, "grad_norm": 5.692001487811437, "learning_rate": 9.60934882423178e-07, "loss": 17.3964, "step": 44060 }, { "epoch": 0.8053996746303033, "grad_norm": 8.961927852412868, "learning_rate": 9.607604088107803e-07, "loss": 18.0837, "step": 44061 }, { "epoch": 0.8054179538267497, "grad_norm": 7.56113365471858, "learning_rate": 9.605859493555657e-07, "loss": 18.6093, "step": 44062 }, { "epoch": 0.8054362330231963, "grad_norm": 5.584300975312138, "learning_rate": 9.60411504058148e-07, "loss": 17.5774, "step": 44063 }, { "epoch": 0.8054545122196428, "grad_norm": 5.538590860241375, "learning_rate": 9.602370729191364e-07, "loss": 17.1662, "step": 44064 }, { "epoch": 0.8054727914160893, "grad_norm": 7.067368719653457, "learning_rate": 9.600626559391434e-07, "loss": 17.9977, "step": 44065 }, { "epoch": 0.8054910706125359, "grad_norm": 6.551121223883054, "learning_rate": 9.598882531187815e-07, "loss": 17.5728, "step": 44066 }, { "epoch": 0.8055093498089824, "grad_norm": 9.005555318670414, "learning_rate": 9.5971386445866e-07, "loss": 17.9253, "step": 44067 }, { "epoch": 0.805527629005429, "grad_norm": 5.691603009704383, "learning_rate": 9.595394899593903e-07, "loss": 17.127, "step": 44068 }, { "epoch": 0.8055459082018754, "grad_norm": 5.88892483549682, "learning_rate": 9.59365129621585e-07, "loss": 17.0242, "step": 44069 }, { "epoch": 0.8055641873983219, "grad_norm": 6.603778866033808, "learning_rate": 9.591907834458546e-07, "loss": 17.5756, "step": 44070 }, { "epoch": 0.8055824665947685, "grad_norm": 5.3400971326649875, "learning_rate": 9.590164514328081e-07, "loss": 17.0478, "step": 44071 }, { "epoch": 0.805600745791215, "grad_norm": 6.81520627609492, "learning_rate": 9.588421335830588e-07, "loss": 17.4039, "step": 44072 }, { "epoch": 0.8056190249876616, "grad_norm": 5.805981908439472, "learning_rate": 9.586678298972179e-07, "loss": 17.1777, "step": 44073 }, { "epoch": 0.8056373041841081, "grad_norm": 6.258505796842755, "learning_rate": 9.584935403758945e-07, "loss": 17.3898, "step": 44074 }, { "epoch": 0.8056555833805545, "grad_norm": 6.441069234668031, "learning_rate": 9.58319265019702e-07, "loss": 17.3867, "step": 44075 }, { "epoch": 0.8056738625770011, "grad_norm": 5.937618646413489, "learning_rate": 9.581450038292496e-07, "loss": 17.3627, "step": 44076 }, { "epoch": 0.8056921417734476, "grad_norm": 5.810878783212714, "learning_rate": 9.579707568051472e-07, "loss": 17.1308, "step": 44077 }, { "epoch": 0.8057104209698942, "grad_norm": 6.400751387924787, "learning_rate": 9.577965239480075e-07, "loss": 17.3685, "step": 44078 }, { "epoch": 0.8057287001663407, "grad_norm": 6.449872923210873, "learning_rate": 9.576223052584404e-07, "loss": 17.6384, "step": 44079 }, { "epoch": 0.8057469793627872, "grad_norm": 5.867746570600367, "learning_rate": 9.574481007370545e-07, "loss": 17.1894, "step": 44080 }, { "epoch": 0.8057652585592338, "grad_norm": 5.032875321818978, "learning_rate": 9.572739103844631e-07, "loss": 16.8319, "step": 44081 }, { "epoch": 0.8057835377556802, "grad_norm": 5.89108388742535, "learning_rate": 9.57099734201275e-07, "loss": 17.405, "step": 44082 }, { "epoch": 0.8058018169521268, "grad_norm": 6.037373280987891, "learning_rate": 9.569255721881033e-07, "loss": 17.4673, "step": 44083 }, { "epoch": 0.8058200961485733, "grad_norm": 6.0761896626708065, "learning_rate": 9.567514243455567e-07, "loss": 17.1767, "step": 44084 }, { "epoch": 0.8058383753450198, "grad_norm": 6.520410869645883, "learning_rate": 9.565772906742438e-07, "loss": 17.3957, "step": 44085 }, { "epoch": 0.8058566545414664, "grad_norm": 6.908609597810006, "learning_rate": 9.564031711747785e-07, "loss": 17.864, "step": 44086 }, { "epoch": 0.8058749337379129, "grad_norm": 7.033591763948449, "learning_rate": 9.562290658477685e-07, "loss": 17.5726, "step": 44087 }, { "epoch": 0.8058932129343594, "grad_norm": 6.27870110243477, "learning_rate": 9.560549746938241e-07, "loss": 17.174, "step": 44088 }, { "epoch": 0.8059114921308059, "grad_norm": 7.391708181904827, "learning_rate": 9.55880897713557e-07, "loss": 17.5895, "step": 44089 }, { "epoch": 0.8059297713272524, "grad_norm": 6.851464842158381, "learning_rate": 9.557068349075749e-07, "loss": 17.642, "step": 44090 }, { "epoch": 0.805948050523699, "grad_norm": 5.037939961226774, "learning_rate": 9.555327862764897e-07, "loss": 16.7471, "step": 44091 }, { "epoch": 0.8059663297201455, "grad_norm": 7.069730638075384, "learning_rate": 9.553587518209123e-07, "loss": 17.4627, "step": 44092 }, { "epoch": 0.8059846089165921, "grad_norm": 7.375625707482254, "learning_rate": 9.55184731541452e-07, "loss": 17.5974, "step": 44093 }, { "epoch": 0.8060028881130386, "grad_norm": 4.97497061518849, "learning_rate": 9.550107254387165e-07, "loss": 16.8538, "step": 44094 }, { "epoch": 0.806021167309485, "grad_norm": 8.775925454172667, "learning_rate": 9.548367335133186e-07, "loss": 18.9185, "step": 44095 }, { "epoch": 0.8060394465059316, "grad_norm": 7.229929154435505, "learning_rate": 9.546627557658656e-07, "loss": 17.5977, "step": 44096 }, { "epoch": 0.8060577257023781, "grad_norm": 6.254207372093199, "learning_rate": 9.544887921969702e-07, "loss": 17.3212, "step": 44097 }, { "epoch": 0.8060760048988247, "grad_norm": 4.882139570830741, "learning_rate": 9.543148428072401e-07, "loss": 17.1097, "step": 44098 }, { "epoch": 0.8060942840952712, "grad_norm": 6.149459629404229, "learning_rate": 9.541409075972846e-07, "loss": 17.1687, "step": 44099 }, { "epoch": 0.8061125632917177, "grad_norm": 6.2499632827206035, "learning_rate": 9.539669865677143e-07, "loss": 17.4928, "step": 44100 }, { "epoch": 0.8061308424881642, "grad_norm": 6.472930191116083, "learning_rate": 9.53793079719139e-07, "loss": 17.6938, "step": 44101 }, { "epoch": 0.8061491216846107, "grad_norm": 5.098090271822933, "learning_rate": 9.536191870521672e-07, "loss": 17.079, "step": 44102 }, { "epoch": 0.8061674008810573, "grad_norm": 6.971737886839197, "learning_rate": 9.534453085674106e-07, "loss": 17.9479, "step": 44103 }, { "epoch": 0.8061856800775038, "grad_norm": 6.214844635220554, "learning_rate": 9.532714442654761e-07, "loss": 17.4199, "step": 44104 }, { "epoch": 0.8062039592739503, "grad_norm": 5.972581263088305, "learning_rate": 9.530975941469733e-07, "loss": 17.3358, "step": 44105 }, { "epoch": 0.8062222384703969, "grad_norm": 6.443002947586044, "learning_rate": 9.529237582125139e-07, "loss": 17.3933, "step": 44106 }, { "epoch": 0.8062405176668433, "grad_norm": 6.8804039466465, "learning_rate": 9.527499364627052e-07, "loss": 17.4098, "step": 44107 }, { "epoch": 0.8062587968632899, "grad_norm": 6.398178526404398, "learning_rate": 9.525761288981555e-07, "loss": 17.6105, "step": 44108 }, { "epoch": 0.8062770760597364, "grad_norm": 6.849165156313241, "learning_rate": 9.52402335519475e-07, "loss": 17.8912, "step": 44109 }, { "epoch": 0.8062953552561829, "grad_norm": 7.144842346557703, "learning_rate": 9.522285563272749e-07, "loss": 17.8768, "step": 44110 }, { "epoch": 0.8063136344526295, "grad_norm": 5.380733380956112, "learning_rate": 9.520547913221606e-07, "loss": 17.0414, "step": 44111 }, { "epoch": 0.806331913649076, "grad_norm": 4.83961505695288, "learning_rate": 9.518810405047446e-07, "loss": 16.8544, "step": 44112 }, { "epoch": 0.8063501928455226, "grad_norm": 9.059038542795445, "learning_rate": 9.517073038756331e-07, "loss": 18.548, "step": 44113 }, { "epoch": 0.806368472041969, "grad_norm": 5.157299606049992, "learning_rate": 9.51533581435437e-07, "loss": 16.977, "step": 44114 }, { "epoch": 0.8063867512384155, "grad_norm": 6.061037082759903, "learning_rate": 9.513598731847645e-07, "loss": 17.07, "step": 44115 }, { "epoch": 0.8064050304348621, "grad_norm": 5.135461701471406, "learning_rate": 9.511861791242233e-07, "loss": 16.9513, "step": 44116 }, { "epoch": 0.8064233096313086, "grad_norm": 6.503545419933212, "learning_rate": 9.510124992544246e-07, "loss": 17.314, "step": 44117 }, { "epoch": 0.8064415888277552, "grad_norm": 6.106540123917064, "learning_rate": 9.508388335759744e-07, "loss": 17.5578, "step": 44118 }, { "epoch": 0.8064598680242017, "grad_norm": 5.590717669152553, "learning_rate": 9.506651820894824e-07, "loss": 17.0886, "step": 44119 }, { "epoch": 0.8064781472206481, "grad_norm": 5.546734438968169, "learning_rate": 9.504915447955587e-07, "loss": 17.1314, "step": 44120 }, { "epoch": 0.8064964264170947, "grad_norm": 6.378211707917194, "learning_rate": 9.503179216948111e-07, "loss": 17.5768, "step": 44121 }, { "epoch": 0.8065147056135412, "grad_norm": 5.502459645663471, "learning_rate": 9.501443127878468e-07, "loss": 17.0034, "step": 44122 }, { "epoch": 0.8065329848099878, "grad_norm": 5.4133029861582855, "learning_rate": 9.49970718075276e-07, "loss": 17.2186, "step": 44123 }, { "epoch": 0.8065512640064343, "grad_norm": 6.709493322987813, "learning_rate": 9.497971375577064e-07, "loss": 17.4829, "step": 44124 }, { "epoch": 0.8065695432028808, "grad_norm": 5.323326711566022, "learning_rate": 9.496235712357454e-07, "loss": 17.0625, "step": 44125 }, { "epoch": 0.8065878223993274, "grad_norm": 5.571138927663759, "learning_rate": 9.494500191100037e-07, "loss": 16.8865, "step": 44126 }, { "epoch": 0.8066061015957738, "grad_norm": 5.298675907438513, "learning_rate": 9.492764811810862e-07, "loss": 17.1872, "step": 44127 }, { "epoch": 0.8066243807922203, "grad_norm": 6.563384543321054, "learning_rate": 9.491029574496036e-07, "loss": 17.6593, "step": 44128 }, { "epoch": 0.8066426599886669, "grad_norm": 5.395891847594612, "learning_rate": 9.489294479161648e-07, "loss": 16.9518, "step": 44129 }, { "epoch": 0.8066609391851134, "grad_norm": 6.546061985204763, "learning_rate": 9.487559525813756e-07, "loss": 17.3667, "step": 44130 }, { "epoch": 0.80667921838156, "grad_norm": 7.376771258430499, "learning_rate": 9.485824714458463e-07, "loss": 17.5203, "step": 44131 }, { "epoch": 0.8066974975780065, "grad_norm": 7.189055358899582, "learning_rate": 9.484090045101846e-07, "loss": 18.1827, "step": 44132 }, { "epoch": 0.8067157767744529, "grad_norm": 8.363082532837378, "learning_rate": 9.482355517749958e-07, "loss": 17.6424, "step": 44133 }, { "epoch": 0.8067340559708995, "grad_norm": 5.910820247334077, "learning_rate": 9.480621132408913e-07, "loss": 17.0191, "step": 44134 }, { "epoch": 0.806752335167346, "grad_norm": 5.839635599753645, "learning_rate": 9.47888688908476e-07, "loss": 17.0999, "step": 44135 }, { "epoch": 0.8067706143637926, "grad_norm": 4.852721634128186, "learning_rate": 9.477152787783589e-07, "loss": 16.7246, "step": 44136 }, { "epoch": 0.8067888935602391, "grad_norm": 6.273552554187649, "learning_rate": 9.475418828511501e-07, "loss": 17.4488, "step": 44137 }, { "epoch": 0.8068071727566856, "grad_norm": 6.048402926947788, "learning_rate": 9.473685011274547e-07, "loss": 17.0257, "step": 44138 }, { "epoch": 0.8068254519531322, "grad_norm": 5.411294437089483, "learning_rate": 9.471951336078805e-07, "loss": 17.1624, "step": 44139 }, { "epoch": 0.8068437311495786, "grad_norm": 5.515995964183675, "learning_rate": 9.470217802930365e-07, "loss": 17.3709, "step": 44140 }, { "epoch": 0.8068620103460252, "grad_norm": 6.027875794251268, "learning_rate": 9.468484411835293e-07, "loss": 17.2838, "step": 44141 }, { "epoch": 0.8068802895424717, "grad_norm": 5.894340450593285, "learning_rate": 9.466751162799654e-07, "loss": 17.4381, "step": 44142 }, { "epoch": 0.8068985687389182, "grad_norm": 5.648365053299245, "learning_rate": 9.46501805582955e-07, "loss": 17.041, "step": 44143 }, { "epoch": 0.8069168479353648, "grad_norm": 7.53550894272099, "learning_rate": 9.463285090931029e-07, "loss": 17.9405, "step": 44144 }, { "epoch": 0.8069351271318113, "grad_norm": 6.597915194281865, "learning_rate": 9.461552268110169e-07, "loss": 17.2402, "step": 44145 }, { "epoch": 0.8069534063282578, "grad_norm": 6.388450565222742, "learning_rate": 9.45981958737307e-07, "loss": 17.3201, "step": 44146 }, { "epoch": 0.8069716855247043, "grad_norm": 6.307907356383488, "learning_rate": 9.458087048725773e-07, "loss": 17.449, "step": 44147 }, { "epoch": 0.8069899647211508, "grad_norm": 6.161156121150225, "learning_rate": 9.456354652174371e-07, "loss": 17.5027, "step": 44148 }, { "epoch": 0.8070082439175974, "grad_norm": 6.367041965880469, "learning_rate": 9.454622397724927e-07, "loss": 17.3019, "step": 44149 }, { "epoch": 0.8070265231140439, "grad_norm": 6.879410300786612, "learning_rate": 9.452890285383504e-07, "loss": 17.4649, "step": 44150 }, { "epoch": 0.8070448023104905, "grad_norm": 5.547954390175901, "learning_rate": 9.451158315156194e-07, "loss": 17.1424, "step": 44151 }, { "epoch": 0.807063081506937, "grad_norm": 6.56922313416153, "learning_rate": 9.449426487049058e-07, "loss": 17.3022, "step": 44152 }, { "epoch": 0.8070813607033834, "grad_norm": 5.406329070954839, "learning_rate": 9.447694801068146e-07, "loss": 17.1759, "step": 44153 }, { "epoch": 0.80709963989983, "grad_norm": 5.741170833399516, "learning_rate": 9.445963257219548e-07, "loss": 17.1359, "step": 44154 }, { "epoch": 0.8071179190962765, "grad_norm": 6.5146907618327745, "learning_rate": 9.444231855509339e-07, "loss": 17.4507, "step": 44155 }, { "epoch": 0.8071361982927231, "grad_norm": 6.928135540123766, "learning_rate": 9.442500595943566e-07, "loss": 17.6053, "step": 44156 }, { "epoch": 0.8071544774891696, "grad_norm": 6.228720807333185, "learning_rate": 9.440769478528322e-07, "loss": 17.1927, "step": 44157 }, { "epoch": 0.807172756685616, "grad_norm": 6.529227646679525, "learning_rate": 9.439038503269654e-07, "loss": 17.535, "step": 44158 }, { "epoch": 0.8071910358820626, "grad_norm": 5.2754991179258095, "learning_rate": 9.437307670173646e-07, "loss": 17.1093, "step": 44159 }, { "epoch": 0.8072093150785091, "grad_norm": 5.778574502930664, "learning_rate": 9.435576979246353e-07, "loss": 17.3945, "step": 44160 }, { "epoch": 0.8072275942749557, "grad_norm": 7.253860550740512, "learning_rate": 9.433846430493826e-07, "loss": 17.8103, "step": 44161 }, { "epoch": 0.8072458734714022, "grad_norm": 5.658757603561204, "learning_rate": 9.432116023922167e-07, "loss": 17.2404, "step": 44162 }, { "epoch": 0.8072641526678487, "grad_norm": 5.65810723048751, "learning_rate": 9.430385759537402e-07, "loss": 17.3309, "step": 44163 }, { "epoch": 0.8072824318642953, "grad_norm": 6.364643393199422, "learning_rate": 9.428655637345623e-07, "loss": 17.632, "step": 44164 }, { "epoch": 0.8073007110607417, "grad_norm": 6.733343277109904, "learning_rate": 9.426925657352892e-07, "loss": 17.2794, "step": 44165 }, { "epoch": 0.8073189902571883, "grad_norm": 5.2001727830997275, "learning_rate": 9.42519581956527e-07, "loss": 17.0039, "step": 44166 }, { "epoch": 0.8073372694536348, "grad_norm": 5.515728684042585, "learning_rate": 9.423466123988795e-07, "loss": 17.1296, "step": 44167 }, { "epoch": 0.8073555486500813, "grad_norm": 5.900855699739158, "learning_rate": 9.421736570629569e-07, "loss": 17.4696, "step": 44168 }, { "epoch": 0.8073738278465279, "grad_norm": 6.253840933557618, "learning_rate": 9.420007159493633e-07, "loss": 17.4639, "step": 44169 }, { "epoch": 0.8073921070429744, "grad_norm": 5.490511453916024, "learning_rate": 9.418277890587041e-07, "loss": 16.9619, "step": 44170 }, { "epoch": 0.807410386239421, "grad_norm": 5.182471317457988, "learning_rate": 9.416548763915873e-07, "loss": 17.133, "step": 44171 }, { "epoch": 0.8074286654358674, "grad_norm": 5.355878377783817, "learning_rate": 9.414819779486167e-07, "loss": 17.3647, "step": 44172 }, { "epoch": 0.8074469446323139, "grad_norm": 6.968180865349899, "learning_rate": 9.413090937303998e-07, "loss": 17.3503, "step": 44173 }, { "epoch": 0.8074652238287605, "grad_norm": 5.1927395782496, "learning_rate": 9.411362237375438e-07, "loss": 17.0509, "step": 44174 }, { "epoch": 0.807483503025207, "grad_norm": 5.386607831225617, "learning_rate": 9.409633679706514e-07, "loss": 16.9495, "step": 44175 }, { "epoch": 0.8075017822216536, "grad_norm": 8.758980442244368, "learning_rate": 9.407905264303313e-07, "loss": 18.1619, "step": 44176 }, { "epoch": 0.8075200614181001, "grad_norm": 5.958590933567905, "learning_rate": 9.406176991171884e-07, "loss": 16.8725, "step": 44177 }, { "epoch": 0.8075383406145465, "grad_norm": 6.184153440138005, "learning_rate": 9.404448860318272e-07, "loss": 17.3093, "step": 44178 }, { "epoch": 0.8075566198109931, "grad_norm": 6.562090480976087, "learning_rate": 9.402720871748555e-07, "loss": 17.1774, "step": 44179 }, { "epoch": 0.8075748990074396, "grad_norm": 5.482380658218511, "learning_rate": 9.400993025468779e-07, "loss": 17.2083, "step": 44180 }, { "epoch": 0.8075931782038862, "grad_norm": 6.300753080736051, "learning_rate": 9.399265321484985e-07, "loss": 17.6112, "step": 44181 }, { "epoch": 0.8076114574003327, "grad_norm": 6.6611483965614875, "learning_rate": 9.397537759803244e-07, "loss": 17.6919, "step": 44182 }, { "epoch": 0.8076297365967792, "grad_norm": 5.1631145108430365, "learning_rate": 9.395810340429624e-07, "loss": 16.985, "step": 44183 }, { "epoch": 0.8076480157932258, "grad_norm": 6.3955370913999525, "learning_rate": 9.394083063370146e-07, "loss": 17.1094, "step": 44184 }, { "epoch": 0.8076662949896722, "grad_norm": 4.968737937750534, "learning_rate": 9.392355928630903e-07, "loss": 16.8828, "step": 44185 }, { "epoch": 0.8076845741861188, "grad_norm": 6.627727814379081, "learning_rate": 9.390628936217927e-07, "loss": 17.3166, "step": 44186 }, { "epoch": 0.8077028533825653, "grad_norm": 5.477672969563247, "learning_rate": 9.388902086137258e-07, "loss": 17.2216, "step": 44187 }, { "epoch": 0.8077211325790118, "grad_norm": 6.816217255294166, "learning_rate": 9.38717537839498e-07, "loss": 17.0278, "step": 44188 }, { "epoch": 0.8077394117754584, "grad_norm": 8.489471116901253, "learning_rate": 9.385448812997111e-07, "loss": 18.4342, "step": 44189 }, { "epoch": 0.8077576909719049, "grad_norm": 6.7716293460288846, "learning_rate": 9.383722389949735e-07, "loss": 17.5498, "step": 44190 }, { "epoch": 0.8077759701683515, "grad_norm": 5.677714806170038, "learning_rate": 9.381996109258873e-07, "loss": 17.1535, "step": 44191 }, { "epoch": 0.8077942493647979, "grad_norm": 6.078931496119163, "learning_rate": 9.380269970930589e-07, "loss": 17.1239, "step": 44192 }, { "epoch": 0.8078125285612444, "grad_norm": 5.870061889361952, "learning_rate": 9.37854397497095e-07, "loss": 17.5272, "step": 44193 }, { "epoch": 0.807830807757691, "grad_norm": 7.036282539126981, "learning_rate": 9.376818121385989e-07, "loss": 17.6924, "step": 44194 }, { "epoch": 0.8078490869541375, "grad_norm": 6.792007096214483, "learning_rate": 9.375092410181741e-07, "loss": 17.5524, "step": 44195 }, { "epoch": 0.807867366150584, "grad_norm": 6.9785510479908215, "learning_rate": 9.373366841364279e-07, "loss": 17.4769, "step": 44196 }, { "epoch": 0.8078856453470306, "grad_norm": 5.857932100507708, "learning_rate": 9.371641414939648e-07, "loss": 17.232, "step": 44197 }, { "epoch": 0.807903924543477, "grad_norm": 4.847995249991021, "learning_rate": 9.36991613091387e-07, "loss": 16.8169, "step": 44198 }, { "epoch": 0.8079222037399236, "grad_norm": 6.504898662787896, "learning_rate": 9.368190989293013e-07, "loss": 17.4554, "step": 44199 }, { "epoch": 0.8079404829363701, "grad_norm": 7.343652171796915, "learning_rate": 9.366465990083129e-07, "loss": 17.5649, "step": 44200 }, { "epoch": 0.8079587621328166, "grad_norm": 6.762961415647817, "learning_rate": 9.364741133290245e-07, "loss": 17.246, "step": 44201 }, { "epoch": 0.8079770413292632, "grad_norm": 8.441891499418142, "learning_rate": 9.363016418920434e-07, "loss": 18.4605, "step": 44202 }, { "epoch": 0.8079953205257097, "grad_norm": 6.3374049144875535, "learning_rate": 9.361291846979708e-07, "loss": 17.1835, "step": 44203 }, { "epoch": 0.8080135997221563, "grad_norm": 6.704758735760042, "learning_rate": 9.359567417474142e-07, "loss": 17.5243, "step": 44204 }, { "epoch": 0.8080318789186027, "grad_norm": 5.804514640615928, "learning_rate": 9.35784313040976e-07, "loss": 17.1597, "step": 44205 }, { "epoch": 0.8080501581150492, "grad_norm": 5.117273677873032, "learning_rate": 9.356118985792606e-07, "loss": 17.2108, "step": 44206 }, { "epoch": 0.8080684373114958, "grad_norm": 7.198106207380016, "learning_rate": 9.354394983628734e-07, "loss": 17.4155, "step": 44207 }, { "epoch": 0.8080867165079423, "grad_norm": 6.390300960848414, "learning_rate": 9.352671123924173e-07, "loss": 17.267, "step": 44208 }, { "epoch": 0.8081049957043889, "grad_norm": 6.500321833435005, "learning_rate": 9.350947406684969e-07, "loss": 17.4185, "step": 44209 }, { "epoch": 0.8081232749008354, "grad_norm": 6.351471574724645, "learning_rate": 9.349223831917181e-07, "loss": 17.5388, "step": 44210 }, { "epoch": 0.8081415540972818, "grad_norm": 8.25221529744489, "learning_rate": 9.347500399626835e-07, "loss": 17.6919, "step": 44211 }, { "epoch": 0.8081598332937284, "grad_norm": 5.742267281886216, "learning_rate": 9.345777109819965e-07, "loss": 17.4086, "step": 44212 }, { "epoch": 0.8081781124901749, "grad_norm": 5.932114429045774, "learning_rate": 9.344053962502625e-07, "loss": 17.1804, "step": 44213 }, { "epoch": 0.8081963916866215, "grad_norm": 6.247575096927776, "learning_rate": 9.342330957680851e-07, "loss": 17.3988, "step": 44214 }, { "epoch": 0.808214670883068, "grad_norm": 5.400427346476599, "learning_rate": 9.340608095360665e-07, "loss": 16.9785, "step": 44215 }, { "epoch": 0.8082329500795145, "grad_norm": 5.5787436645029365, "learning_rate": 9.338885375548129e-07, "loss": 17.0866, "step": 44216 }, { "epoch": 0.808251229275961, "grad_norm": 6.8474487616340385, "learning_rate": 9.337162798249266e-07, "loss": 17.4107, "step": 44217 }, { "epoch": 0.8082695084724075, "grad_norm": 5.9148077012353335, "learning_rate": 9.335440363470111e-07, "loss": 17.1796, "step": 44218 }, { "epoch": 0.8082877876688541, "grad_norm": 6.021358268076541, "learning_rate": 9.333718071216729e-07, "loss": 17.3646, "step": 44219 }, { "epoch": 0.8083060668653006, "grad_norm": 5.723960035387458, "learning_rate": 9.331995921495118e-07, "loss": 17.1398, "step": 44220 }, { "epoch": 0.8083243460617471, "grad_norm": 7.609790922390687, "learning_rate": 9.33027391431135e-07, "loss": 17.6556, "step": 44221 }, { "epoch": 0.8083426252581937, "grad_norm": 5.625128597304047, "learning_rate": 9.328552049671435e-07, "loss": 17.3228, "step": 44222 }, { "epoch": 0.8083609044546402, "grad_norm": 6.450780220856981, "learning_rate": 9.32683032758141e-07, "loss": 17.5328, "step": 44223 }, { "epoch": 0.8083791836510867, "grad_norm": 6.871651237529927, "learning_rate": 9.325108748047324e-07, "loss": 17.6489, "step": 44224 }, { "epoch": 0.8083974628475332, "grad_norm": 5.978802633144143, "learning_rate": 9.323387311075205e-07, "loss": 17.1395, "step": 44225 }, { "epoch": 0.8084157420439797, "grad_norm": 5.402609576201319, "learning_rate": 9.321666016671072e-07, "loss": 16.9774, "step": 44226 }, { "epoch": 0.8084340212404263, "grad_norm": 8.572020206001076, "learning_rate": 9.319944864840963e-07, "loss": 18.0655, "step": 44227 }, { "epoch": 0.8084523004368728, "grad_norm": 4.942257016007521, "learning_rate": 9.318223855590935e-07, "loss": 16.7866, "step": 44228 }, { "epoch": 0.8084705796333194, "grad_norm": 5.56736627023177, "learning_rate": 9.31650298892699e-07, "loss": 16.8865, "step": 44229 }, { "epoch": 0.8084888588297658, "grad_norm": 5.868922990184714, "learning_rate": 9.314782264855182e-07, "loss": 17.404, "step": 44230 }, { "epoch": 0.8085071380262123, "grad_norm": 5.565759157888683, "learning_rate": 9.313061683381536e-07, "loss": 17.1249, "step": 44231 }, { "epoch": 0.8085254172226589, "grad_norm": 5.546305836835831, "learning_rate": 9.311341244512062e-07, "loss": 16.9432, "step": 44232 }, { "epoch": 0.8085436964191054, "grad_norm": 7.530173052151421, "learning_rate": 9.309620948252818e-07, "loss": 17.6928, "step": 44233 }, { "epoch": 0.808561975615552, "grad_norm": 5.146547455376883, "learning_rate": 9.307900794609809e-07, "loss": 16.8317, "step": 44234 }, { "epoch": 0.8085802548119985, "grad_norm": 6.396124221641863, "learning_rate": 9.306180783589091e-07, "loss": 17.415, "step": 44235 }, { "epoch": 0.808598534008445, "grad_norm": 5.397514445790608, "learning_rate": 9.304460915196667e-07, "loss": 17.0767, "step": 44236 }, { "epoch": 0.8086168132048915, "grad_norm": 6.604803257453563, "learning_rate": 9.302741189438574e-07, "loss": 17.6001, "step": 44237 }, { "epoch": 0.808635092401338, "grad_norm": 5.805711469893114, "learning_rate": 9.301021606320853e-07, "loss": 17.3172, "step": 44238 }, { "epoch": 0.8086533715977846, "grad_norm": 7.429692584092274, "learning_rate": 9.299302165849522e-07, "loss": 17.7947, "step": 44239 }, { "epoch": 0.8086716507942311, "grad_norm": 6.776816884241388, "learning_rate": 9.297582868030591e-07, "loss": 17.5252, "step": 44240 }, { "epoch": 0.8086899299906776, "grad_norm": 7.7390855264353355, "learning_rate": 9.295863712870113e-07, "loss": 17.5937, "step": 44241 }, { "epoch": 0.8087082091871242, "grad_norm": 6.185332018424762, "learning_rate": 9.294144700374097e-07, "loss": 17.2235, "step": 44242 }, { "epoch": 0.8087264883835706, "grad_norm": 4.612551106301399, "learning_rate": 9.292425830548562e-07, "loss": 16.7909, "step": 44243 }, { "epoch": 0.8087447675800172, "grad_norm": 6.887037211573239, "learning_rate": 9.29070710339956e-07, "loss": 17.5522, "step": 44244 }, { "epoch": 0.8087630467764637, "grad_norm": 6.875552081759137, "learning_rate": 9.288988518933078e-07, "loss": 17.5834, "step": 44245 }, { "epoch": 0.8087813259729102, "grad_norm": 6.355447069926466, "learning_rate": 9.287270077155159e-07, "loss": 17.3244, "step": 44246 }, { "epoch": 0.8087996051693568, "grad_norm": 5.607723743531057, "learning_rate": 9.285551778071839e-07, "loss": 17.2454, "step": 44247 }, { "epoch": 0.8088178843658033, "grad_norm": 6.37100498634577, "learning_rate": 9.283833621689125e-07, "loss": 17.7607, "step": 44248 }, { "epoch": 0.8088361635622499, "grad_norm": 8.471491118587956, "learning_rate": 9.282115608013032e-07, "loss": 17.8115, "step": 44249 }, { "epoch": 0.8088544427586963, "grad_norm": 6.390959276847864, "learning_rate": 9.280397737049601e-07, "loss": 17.4043, "step": 44250 }, { "epoch": 0.8088727219551428, "grad_norm": 7.499415369316967, "learning_rate": 9.27868000880483e-07, "loss": 17.4009, "step": 44251 }, { "epoch": 0.8088910011515894, "grad_norm": 7.720385281242731, "learning_rate": 9.276962423284764e-07, "loss": 17.6869, "step": 44252 }, { "epoch": 0.8089092803480359, "grad_norm": 6.661084666607688, "learning_rate": 9.275244980495401e-07, "loss": 17.8042, "step": 44253 }, { "epoch": 0.8089275595444825, "grad_norm": 5.611131356864857, "learning_rate": 9.273527680442779e-07, "loss": 17.2083, "step": 44254 }, { "epoch": 0.808945838740929, "grad_norm": 6.858953374652895, "learning_rate": 9.271810523132901e-07, "loss": 17.1783, "step": 44255 }, { "epoch": 0.8089641179373754, "grad_norm": 5.951305099140406, "learning_rate": 9.270093508571803e-07, "loss": 17.0491, "step": 44256 }, { "epoch": 0.808982397133822, "grad_norm": 6.838378954650663, "learning_rate": 9.268376636765481e-07, "loss": 17.5677, "step": 44257 }, { "epoch": 0.8090006763302685, "grad_norm": 8.557931130641622, "learning_rate": 9.266659907719972e-07, "loss": 17.867, "step": 44258 }, { "epoch": 0.8090189555267151, "grad_norm": 5.665655045230569, "learning_rate": 9.264943321441288e-07, "loss": 17.2062, "step": 44259 }, { "epoch": 0.8090372347231616, "grad_norm": 5.7753028225145435, "learning_rate": 9.263226877935433e-07, "loss": 17.1689, "step": 44260 }, { "epoch": 0.8090555139196081, "grad_norm": 4.930996675140523, "learning_rate": 9.261510577208443e-07, "loss": 17.0399, "step": 44261 }, { "epoch": 0.8090737931160547, "grad_norm": 7.4260847055394015, "learning_rate": 9.25979441926631e-07, "loss": 17.5752, "step": 44262 }, { "epoch": 0.8090920723125011, "grad_norm": 6.818621403826473, "learning_rate": 9.258078404115068e-07, "loss": 17.2175, "step": 44263 }, { "epoch": 0.8091103515089476, "grad_norm": 4.3535154281104465, "learning_rate": 9.256362531760731e-07, "loss": 16.8459, "step": 44264 }, { "epoch": 0.8091286307053942, "grad_norm": 6.531748493086684, "learning_rate": 9.2546468022093e-07, "loss": 17.426, "step": 44265 }, { "epoch": 0.8091469099018407, "grad_norm": 6.62984333339677, "learning_rate": 9.252931215466803e-07, "loss": 17.5342, "step": 44266 }, { "epoch": 0.8091651890982873, "grad_norm": 5.67083835128013, "learning_rate": 9.251215771539251e-07, "loss": 17.1101, "step": 44267 }, { "epoch": 0.8091834682947338, "grad_norm": 7.35882992634677, "learning_rate": 9.249500470432643e-07, "loss": 17.9513, "step": 44268 }, { "epoch": 0.8092017474911802, "grad_norm": 4.927620914919205, "learning_rate": 9.247785312153007e-07, "loss": 16.8169, "step": 44269 }, { "epoch": 0.8092200266876268, "grad_norm": 5.169142604766511, "learning_rate": 9.246070296706344e-07, "loss": 16.9537, "step": 44270 }, { "epoch": 0.8092383058840733, "grad_norm": 8.672168189352163, "learning_rate": 9.244355424098661e-07, "loss": 18.424, "step": 44271 }, { "epoch": 0.8092565850805199, "grad_norm": 6.106203687941156, "learning_rate": 9.242640694335975e-07, "loss": 17.2698, "step": 44272 }, { "epoch": 0.8092748642769664, "grad_norm": 6.352718513457032, "learning_rate": 9.240926107424309e-07, "loss": 17.5439, "step": 44273 }, { "epoch": 0.8092931434734129, "grad_norm": 8.410488033199357, "learning_rate": 9.239211663369646e-07, "loss": 18.4718, "step": 44274 }, { "epoch": 0.8093114226698594, "grad_norm": 5.171064843031255, "learning_rate": 9.237497362178022e-07, "loss": 16.9551, "step": 44275 }, { "epoch": 0.8093297018663059, "grad_norm": 6.1682250634437885, "learning_rate": 9.235783203855431e-07, "loss": 17.6256, "step": 44276 }, { "epoch": 0.8093479810627525, "grad_norm": 6.390026388651579, "learning_rate": 9.234069188407874e-07, "loss": 17.3615, "step": 44277 }, { "epoch": 0.809366260259199, "grad_norm": 6.4100113658085, "learning_rate": 9.232355315841379e-07, "loss": 17.5123, "step": 44278 }, { "epoch": 0.8093845394556455, "grad_norm": 5.737033737411813, "learning_rate": 9.230641586161937e-07, "loss": 17.2965, "step": 44279 }, { "epoch": 0.8094028186520921, "grad_norm": 6.777646200445011, "learning_rate": 9.228927999375547e-07, "loss": 17.6157, "step": 44280 }, { "epoch": 0.8094210978485386, "grad_norm": 5.959199128889864, "learning_rate": 9.227214555488229e-07, "loss": 17.2585, "step": 44281 }, { "epoch": 0.8094393770449851, "grad_norm": 6.613141895118459, "learning_rate": 9.225501254505986e-07, "loss": 17.7349, "step": 44282 }, { "epoch": 0.8094576562414316, "grad_norm": 6.243475552181353, "learning_rate": 9.223788096434833e-07, "loss": 17.0906, "step": 44283 }, { "epoch": 0.8094759354378781, "grad_norm": 6.090584606893905, "learning_rate": 9.222075081280762e-07, "loss": 17.4424, "step": 44284 }, { "epoch": 0.8094942146343247, "grad_norm": 7.485243329484788, "learning_rate": 9.220362209049771e-07, "loss": 17.9136, "step": 44285 }, { "epoch": 0.8095124938307712, "grad_norm": 5.032988373586404, "learning_rate": 9.218649479747882e-07, "loss": 16.9029, "step": 44286 }, { "epoch": 0.8095307730272178, "grad_norm": 5.128743432492607, "learning_rate": 9.21693689338109e-07, "loss": 16.8916, "step": 44287 }, { "epoch": 0.8095490522236642, "grad_norm": 5.746531147694005, "learning_rate": 9.215224449955385e-07, "loss": 17.5249, "step": 44288 }, { "epoch": 0.8095673314201107, "grad_norm": 5.306115037646359, "learning_rate": 9.213512149476789e-07, "loss": 17.0254, "step": 44289 }, { "epoch": 0.8095856106165573, "grad_norm": 9.785289790848534, "learning_rate": 9.211799991951275e-07, "loss": 17.9054, "step": 44290 }, { "epoch": 0.8096038898130038, "grad_norm": 6.032575281282718, "learning_rate": 9.210087977384868e-07, "loss": 17.5305, "step": 44291 }, { "epoch": 0.8096221690094504, "grad_norm": 5.506938002268831, "learning_rate": 9.208376105783579e-07, "loss": 17.0484, "step": 44292 }, { "epoch": 0.8096404482058969, "grad_norm": 6.190737874355116, "learning_rate": 9.206664377153391e-07, "loss": 17.5355, "step": 44293 }, { "epoch": 0.8096587274023433, "grad_norm": 7.013084086014899, "learning_rate": 9.204952791500287e-07, "loss": 17.632, "step": 44294 }, { "epoch": 0.8096770065987899, "grad_norm": 8.843870288140188, "learning_rate": 9.203241348830299e-07, "loss": 17.6614, "step": 44295 }, { "epoch": 0.8096952857952364, "grad_norm": 7.3358747338535055, "learning_rate": 9.201530049149399e-07, "loss": 17.4355, "step": 44296 }, { "epoch": 0.809713564991683, "grad_norm": 7.174528540407861, "learning_rate": 9.199818892463602e-07, "loss": 17.5302, "step": 44297 }, { "epoch": 0.8097318441881295, "grad_norm": 5.811689696832371, "learning_rate": 9.198107878778906e-07, "loss": 17.2694, "step": 44298 }, { "epoch": 0.809750123384576, "grad_norm": 7.61653402247298, "learning_rate": 9.196397008101287e-07, "loss": 17.134, "step": 44299 }, { "epoch": 0.8097684025810226, "grad_norm": 6.385742059609819, "learning_rate": 9.194686280436754e-07, "loss": 17.3377, "step": 44300 }, { "epoch": 0.809786681777469, "grad_norm": 7.216451173478297, "learning_rate": 9.192975695791318e-07, "loss": 17.691, "step": 44301 }, { "epoch": 0.8098049609739156, "grad_norm": 6.095461704660316, "learning_rate": 9.19126525417095e-07, "loss": 17.3226, "step": 44302 }, { "epoch": 0.8098232401703621, "grad_norm": 6.428520207487334, "learning_rate": 9.189554955581665e-07, "loss": 17.3002, "step": 44303 }, { "epoch": 0.8098415193668086, "grad_norm": 6.224320349653975, "learning_rate": 9.187844800029449e-07, "loss": 17.299, "step": 44304 }, { "epoch": 0.8098597985632552, "grad_norm": 4.940226222949368, "learning_rate": 9.186134787520289e-07, "loss": 16.8315, "step": 44305 }, { "epoch": 0.8098780777597017, "grad_norm": 6.502476475364062, "learning_rate": 9.184424918060192e-07, "loss": 17.3898, "step": 44306 }, { "epoch": 0.8098963569561483, "grad_norm": 5.016891215451654, "learning_rate": 9.182715191655145e-07, "loss": 16.5965, "step": 44307 }, { "epoch": 0.8099146361525947, "grad_norm": 5.172304521136889, "learning_rate": 9.181005608311127e-07, "loss": 16.9032, "step": 44308 }, { "epoch": 0.8099329153490412, "grad_norm": 8.362344271298218, "learning_rate": 9.17929616803414e-07, "loss": 17.7492, "step": 44309 }, { "epoch": 0.8099511945454878, "grad_norm": 5.6154216328335, "learning_rate": 9.177586870830196e-07, "loss": 17.0382, "step": 44310 }, { "epoch": 0.8099694737419343, "grad_norm": 7.103522646095982, "learning_rate": 9.175877716705251e-07, "loss": 17.4646, "step": 44311 }, { "epoch": 0.8099877529383809, "grad_norm": 5.726040542079987, "learning_rate": 9.174168705665326e-07, "loss": 17.2655, "step": 44312 }, { "epoch": 0.8100060321348274, "grad_norm": 6.20074411699857, "learning_rate": 9.172459837716379e-07, "loss": 17.1541, "step": 44313 }, { "epoch": 0.8100243113312738, "grad_norm": 6.763197854459059, "learning_rate": 9.170751112864435e-07, "loss": 17.7601, "step": 44314 }, { "epoch": 0.8100425905277204, "grad_norm": 6.028454531453467, "learning_rate": 9.169042531115469e-07, "loss": 17.4649, "step": 44315 }, { "epoch": 0.8100608697241669, "grad_norm": 6.9253929237943, "learning_rate": 9.167334092475444e-07, "loss": 17.487, "step": 44316 }, { "epoch": 0.8100791489206135, "grad_norm": 5.7739878942775045, "learning_rate": 9.16562579695039e-07, "loss": 17.1728, "step": 44317 }, { "epoch": 0.81009742811706, "grad_norm": 8.606869087598005, "learning_rate": 9.163917644546261e-07, "loss": 17.86, "step": 44318 }, { "epoch": 0.8101157073135065, "grad_norm": 6.828223740018566, "learning_rate": 9.162209635269053e-07, "loss": 17.5195, "step": 44319 }, { "epoch": 0.810133986509953, "grad_norm": 4.988983122064728, "learning_rate": 9.160501769124768e-07, "loss": 16.7779, "step": 44320 }, { "epoch": 0.8101522657063995, "grad_norm": 4.926652610032658, "learning_rate": 9.158794046119385e-07, "loss": 16.8975, "step": 44321 }, { "epoch": 0.8101705449028461, "grad_norm": 6.620075315521409, "learning_rate": 9.157086466258868e-07, "loss": 17.5853, "step": 44322 }, { "epoch": 0.8101888240992926, "grad_norm": 6.639694426462262, "learning_rate": 9.155379029549239e-07, "loss": 17.5877, "step": 44323 }, { "epoch": 0.8102071032957391, "grad_norm": 5.9329578463816, "learning_rate": 9.153671735996455e-07, "loss": 17.1147, "step": 44324 }, { "epoch": 0.8102253824921857, "grad_norm": 5.421263352267292, "learning_rate": 9.151964585606498e-07, "loss": 17.223, "step": 44325 }, { "epoch": 0.8102436616886322, "grad_norm": 6.163477017925531, "learning_rate": 9.150257578385358e-07, "loss": 17.5743, "step": 44326 }, { "epoch": 0.8102619408850787, "grad_norm": 5.980316539179547, "learning_rate": 9.148550714339033e-07, "loss": 16.9048, "step": 44327 }, { "epoch": 0.8102802200815252, "grad_norm": 5.263448261951908, "learning_rate": 9.14684399347348e-07, "loss": 17.1071, "step": 44328 }, { "epoch": 0.8102984992779717, "grad_norm": 6.055155945198473, "learning_rate": 9.145137415794708e-07, "loss": 17.14, "step": 44329 }, { "epoch": 0.8103167784744183, "grad_norm": 6.876783556999181, "learning_rate": 9.14343098130867e-07, "loss": 17.1569, "step": 44330 }, { "epoch": 0.8103350576708648, "grad_norm": 5.6295446503783335, "learning_rate": 9.141724690021381e-07, "loss": 17.0669, "step": 44331 }, { "epoch": 0.8103533368673113, "grad_norm": 5.890058036463227, "learning_rate": 9.140018541938794e-07, "loss": 17.34, "step": 44332 }, { "epoch": 0.8103716160637578, "grad_norm": 5.915840035535606, "learning_rate": 9.138312537066885e-07, "loss": 17.5496, "step": 44333 }, { "epoch": 0.8103898952602043, "grad_norm": 7.521610799851394, "learning_rate": 9.136606675411658e-07, "loss": 18.2306, "step": 44334 }, { "epoch": 0.8104081744566509, "grad_norm": 5.410637098543946, "learning_rate": 9.134900956979064e-07, "loss": 17.0238, "step": 44335 }, { "epoch": 0.8104264536530974, "grad_norm": 5.62321040986703, "learning_rate": 9.133195381775096e-07, "loss": 17.0047, "step": 44336 }, { "epoch": 0.8104447328495439, "grad_norm": 5.607170150059922, "learning_rate": 9.131489949805744e-07, "loss": 17.1831, "step": 44337 }, { "epoch": 0.8104630120459905, "grad_norm": 5.713342126248853, "learning_rate": 9.129784661076979e-07, "loss": 16.9926, "step": 44338 }, { "epoch": 0.810481291242437, "grad_norm": 5.799108869328308, "learning_rate": 9.128079515594756e-07, "loss": 17.1816, "step": 44339 }, { "epoch": 0.8104995704388835, "grad_norm": 5.787322125075612, "learning_rate": 9.126374513365077e-07, "loss": 17.127, "step": 44340 }, { "epoch": 0.81051784963533, "grad_norm": 5.846249566929248, "learning_rate": 9.124669654393914e-07, "loss": 17.4668, "step": 44341 }, { "epoch": 0.8105361288317765, "grad_norm": 5.837638956014712, "learning_rate": 9.122964938687223e-07, "loss": 17.2068, "step": 44342 }, { "epoch": 0.8105544080282231, "grad_norm": 6.688759571588281, "learning_rate": 9.121260366251001e-07, "loss": 17.7606, "step": 44343 }, { "epoch": 0.8105726872246696, "grad_norm": 5.670659140228912, "learning_rate": 9.119555937091207e-07, "loss": 17.3488, "step": 44344 }, { "epoch": 0.8105909664211162, "grad_norm": 5.678056790685703, "learning_rate": 9.117851651213816e-07, "loss": 17.1142, "step": 44345 }, { "epoch": 0.8106092456175626, "grad_norm": 6.570004689814667, "learning_rate": 9.116147508624828e-07, "loss": 17.484, "step": 44346 }, { "epoch": 0.8106275248140091, "grad_norm": 5.2411697487714015, "learning_rate": 9.114443509330178e-07, "loss": 16.9618, "step": 44347 }, { "epoch": 0.8106458040104557, "grad_norm": 6.197256563054659, "learning_rate": 9.112739653335867e-07, "loss": 17.2895, "step": 44348 }, { "epoch": 0.8106640832069022, "grad_norm": 7.255855536085549, "learning_rate": 9.111035940647856e-07, "loss": 17.3339, "step": 44349 }, { "epoch": 0.8106823624033488, "grad_norm": 6.673215860994091, "learning_rate": 9.109332371272106e-07, "loss": 17.1545, "step": 44350 }, { "epoch": 0.8107006415997953, "grad_norm": 9.068789774574865, "learning_rate": 9.107628945214608e-07, "loss": 18.3043, "step": 44351 }, { "epoch": 0.8107189207962417, "grad_norm": 7.5916374920595855, "learning_rate": 9.105925662481324e-07, "loss": 17.5537, "step": 44352 }, { "epoch": 0.8107371999926883, "grad_norm": 6.240655709200962, "learning_rate": 9.104222523078205e-07, "loss": 17.329, "step": 44353 }, { "epoch": 0.8107554791891348, "grad_norm": 6.948558108880766, "learning_rate": 9.102519527011244e-07, "loss": 17.8198, "step": 44354 }, { "epoch": 0.8107737583855814, "grad_norm": 7.408782824837291, "learning_rate": 9.100816674286411e-07, "loss": 17.3978, "step": 44355 }, { "epoch": 0.8107920375820279, "grad_norm": 6.858146241678503, "learning_rate": 9.099113964909656e-07, "loss": 17.6967, "step": 44356 }, { "epoch": 0.8108103167784744, "grad_norm": 6.902638751540577, "learning_rate": 9.097411398886968e-07, "loss": 17.4883, "step": 44357 }, { "epoch": 0.810828595974921, "grad_norm": 6.554167233561377, "learning_rate": 9.095708976224288e-07, "loss": 17.458, "step": 44358 }, { "epoch": 0.8108468751713674, "grad_norm": 5.478073286967146, "learning_rate": 9.094006696927615e-07, "loss": 17.3287, "step": 44359 }, { "epoch": 0.810865154367814, "grad_norm": 6.2568743453432685, "learning_rate": 9.092304561002901e-07, "loss": 17.3569, "step": 44360 }, { "epoch": 0.8108834335642605, "grad_norm": 6.726074971770398, "learning_rate": 9.090602568456098e-07, "loss": 17.7981, "step": 44361 }, { "epoch": 0.810901712760707, "grad_norm": 7.445656768293683, "learning_rate": 9.088900719293192e-07, "loss": 17.8944, "step": 44362 }, { "epoch": 0.8109199919571536, "grad_norm": 6.701928911650348, "learning_rate": 9.087199013520126e-07, "loss": 17.4631, "step": 44363 }, { "epoch": 0.8109382711536001, "grad_norm": 5.566993872760043, "learning_rate": 9.085497451142877e-07, "loss": 17.0022, "step": 44364 }, { "epoch": 0.8109565503500467, "grad_norm": 5.435826556396748, "learning_rate": 9.083796032167425e-07, "loss": 17.1158, "step": 44365 }, { "epoch": 0.8109748295464931, "grad_norm": 5.390895731705182, "learning_rate": 9.082094756599713e-07, "loss": 16.7968, "step": 44366 }, { "epoch": 0.8109931087429396, "grad_norm": 4.632093935757207, "learning_rate": 9.080393624445699e-07, "loss": 16.72, "step": 44367 }, { "epoch": 0.8110113879393862, "grad_norm": 7.521953012840901, "learning_rate": 9.078692635711367e-07, "loss": 18.0678, "step": 44368 }, { "epoch": 0.8110296671358327, "grad_norm": 5.373994002599347, "learning_rate": 9.076991790402661e-07, "loss": 17.0804, "step": 44369 }, { "epoch": 0.8110479463322793, "grad_norm": 5.536944673726144, "learning_rate": 9.075291088525539e-07, "loss": 17.4493, "step": 44370 }, { "epoch": 0.8110662255287258, "grad_norm": 6.031210303942521, "learning_rate": 9.073590530085985e-07, "loss": 17.1752, "step": 44371 }, { "epoch": 0.8110845047251722, "grad_norm": 6.197382426270258, "learning_rate": 9.071890115089932e-07, "loss": 17.5678, "step": 44372 }, { "epoch": 0.8111027839216188, "grad_norm": 7.587807608922071, "learning_rate": 9.070189843543347e-07, "loss": 17.774, "step": 44373 }, { "epoch": 0.8111210631180653, "grad_norm": 5.839037013575092, "learning_rate": 9.068489715452212e-07, "loss": 17.1541, "step": 44374 }, { "epoch": 0.8111393423145119, "grad_norm": 9.354018714575345, "learning_rate": 9.066789730822456e-07, "loss": 18.264, "step": 44375 }, { "epoch": 0.8111576215109584, "grad_norm": 5.845733016643202, "learning_rate": 9.065089889660062e-07, "loss": 17.1058, "step": 44376 }, { "epoch": 0.8111759007074049, "grad_norm": 10.827089936544475, "learning_rate": 9.063390191970978e-07, "loss": 18.0286, "step": 44377 }, { "epoch": 0.8111941799038515, "grad_norm": 5.462757815788075, "learning_rate": 9.061690637761145e-07, "loss": 17.2507, "step": 44378 }, { "epoch": 0.8112124591002979, "grad_norm": 5.847828009161489, "learning_rate": 9.059991227036546e-07, "loss": 17.1566, "step": 44379 }, { "epoch": 0.8112307382967445, "grad_norm": 5.8305012268872805, "learning_rate": 9.058291959803111e-07, "loss": 17.3902, "step": 44380 }, { "epoch": 0.811249017493191, "grad_norm": 6.59224235225304, "learning_rate": 9.056592836066824e-07, "loss": 17.2509, "step": 44381 }, { "epoch": 0.8112672966896375, "grad_norm": 6.532781961001042, "learning_rate": 9.054893855833613e-07, "loss": 17.0318, "step": 44382 }, { "epoch": 0.8112855758860841, "grad_norm": 6.330701573077871, "learning_rate": 9.053195019109457e-07, "loss": 17.3771, "step": 44383 }, { "epoch": 0.8113038550825306, "grad_norm": 6.378958611436333, "learning_rate": 9.051496325900288e-07, "loss": 17.5517, "step": 44384 }, { "epoch": 0.8113221342789771, "grad_norm": 7.173943288344037, "learning_rate": 9.049797776212088e-07, "loss": 17.8762, "step": 44385 }, { "epoch": 0.8113404134754236, "grad_norm": 6.696509541047248, "learning_rate": 9.048099370050789e-07, "loss": 17.3317, "step": 44386 }, { "epoch": 0.8113586926718701, "grad_norm": 5.698419013838153, "learning_rate": 9.046401107422337e-07, "loss": 17.2332, "step": 44387 }, { "epoch": 0.8113769718683167, "grad_norm": 5.543742181679276, "learning_rate": 9.044702988332704e-07, "loss": 17.2599, "step": 44388 }, { "epoch": 0.8113952510647632, "grad_norm": 5.250911923008681, "learning_rate": 9.043005012787826e-07, "loss": 16.9252, "step": 44389 }, { "epoch": 0.8114135302612098, "grad_norm": 6.108738429258312, "learning_rate": 9.041307180793657e-07, "loss": 17.1764, "step": 44390 }, { "epoch": 0.8114318094576563, "grad_norm": 6.294597798954408, "learning_rate": 9.039609492356166e-07, "loss": 17.3213, "step": 44391 }, { "epoch": 0.8114500886541027, "grad_norm": 6.248288081688346, "learning_rate": 9.037911947481276e-07, "loss": 17.4928, "step": 44392 }, { "epoch": 0.8114683678505493, "grad_norm": 6.147886607509345, "learning_rate": 9.036214546174965e-07, "loss": 17.4098, "step": 44393 }, { "epoch": 0.8114866470469958, "grad_norm": 12.298944849802385, "learning_rate": 9.034517288443168e-07, "loss": 18.0428, "step": 44394 }, { "epoch": 0.8115049262434424, "grad_norm": 6.6332656388597515, "learning_rate": 9.032820174291817e-07, "loss": 17.7079, "step": 44395 }, { "epoch": 0.8115232054398889, "grad_norm": 7.623873669073504, "learning_rate": 9.031123203726894e-07, "loss": 17.9851, "step": 44396 }, { "epoch": 0.8115414846363354, "grad_norm": 6.0052891577868746, "learning_rate": 9.029426376754325e-07, "loss": 17.1007, "step": 44397 }, { "epoch": 0.8115597638327819, "grad_norm": 5.513016097397105, "learning_rate": 9.02772969338005e-07, "loss": 17.1124, "step": 44398 }, { "epoch": 0.8115780430292284, "grad_norm": 5.988709939331854, "learning_rate": 9.026033153610026e-07, "loss": 17.2336, "step": 44399 }, { "epoch": 0.8115963222256749, "grad_norm": 6.647859061023221, "learning_rate": 9.024336757450214e-07, "loss": 17.4479, "step": 44400 }, { "epoch": 0.8116146014221215, "grad_norm": 5.5866792538944265, "learning_rate": 9.022640504906532e-07, "loss": 17.1885, "step": 44401 }, { "epoch": 0.811632880618568, "grad_norm": 6.211808821677622, "learning_rate": 9.020944395984954e-07, "loss": 17.2788, "step": 44402 }, { "epoch": 0.8116511598150146, "grad_norm": 6.981772215396191, "learning_rate": 9.019248430691391e-07, "loss": 17.641, "step": 44403 }, { "epoch": 0.811669439011461, "grad_norm": 6.154831918546282, "learning_rate": 9.017552609031827e-07, "loss": 17.483, "step": 44404 }, { "epoch": 0.8116877182079075, "grad_norm": 6.773690542033465, "learning_rate": 9.01585693101218e-07, "loss": 17.6524, "step": 44405 }, { "epoch": 0.8117059974043541, "grad_norm": 7.133097677547966, "learning_rate": 9.014161396638388e-07, "loss": 17.6137, "step": 44406 }, { "epoch": 0.8117242766008006, "grad_norm": 6.499443075309628, "learning_rate": 9.012466005916415e-07, "loss": 17.5305, "step": 44407 }, { "epoch": 0.8117425557972472, "grad_norm": 7.293940123807522, "learning_rate": 9.010770758852177e-07, "loss": 17.8838, "step": 44408 }, { "epoch": 0.8117608349936937, "grad_norm": 5.860935121523284, "learning_rate": 9.009075655451633e-07, "loss": 17.0172, "step": 44409 }, { "epoch": 0.8117791141901401, "grad_norm": 5.285789381010642, "learning_rate": 9.007380695720736e-07, "loss": 16.8707, "step": 44410 }, { "epoch": 0.8117973933865867, "grad_norm": 6.184998246544786, "learning_rate": 9.00568587966541e-07, "loss": 17.2603, "step": 44411 }, { "epoch": 0.8118156725830332, "grad_norm": 5.7295132963459325, "learning_rate": 9.003991207291584e-07, "loss": 17.3571, "step": 44412 }, { "epoch": 0.8118339517794798, "grad_norm": 5.184284234192183, "learning_rate": 9.002296678605227e-07, "loss": 17.0144, "step": 44413 }, { "epoch": 0.8118522309759263, "grad_norm": 5.509367738882544, "learning_rate": 9.000602293612259e-07, "loss": 17.286, "step": 44414 }, { "epoch": 0.8118705101723728, "grad_norm": 4.801219374383123, "learning_rate": 8.998908052318617e-07, "loss": 17.0514, "step": 44415 }, { "epoch": 0.8118887893688194, "grad_norm": 5.85560768693088, "learning_rate": 8.99721395473025e-07, "loss": 17.2612, "step": 44416 }, { "epoch": 0.8119070685652658, "grad_norm": 6.659276357228719, "learning_rate": 8.995520000853081e-07, "loss": 17.5592, "step": 44417 }, { "epoch": 0.8119253477617124, "grad_norm": 7.757165807051164, "learning_rate": 8.99382619069305e-07, "loss": 18.0611, "step": 44418 }, { "epoch": 0.8119436269581589, "grad_norm": 5.081194041807407, "learning_rate": 8.992132524256119e-07, "loss": 16.8633, "step": 44419 }, { "epoch": 0.8119619061546054, "grad_norm": 6.583438380491448, "learning_rate": 8.99043900154819e-07, "loss": 17.4935, "step": 44420 }, { "epoch": 0.811980185351052, "grad_norm": 6.718593007545253, "learning_rate": 8.988745622575229e-07, "loss": 17.0868, "step": 44421 }, { "epoch": 0.8119984645474985, "grad_norm": 4.874702378694377, "learning_rate": 8.987052387343153e-07, "loss": 16.9971, "step": 44422 }, { "epoch": 0.8120167437439451, "grad_norm": 8.000577039821849, "learning_rate": 8.985359295857888e-07, "loss": 18.0493, "step": 44423 }, { "epoch": 0.8120350229403915, "grad_norm": 9.518202956645782, "learning_rate": 8.983666348125392e-07, "loss": 18.14, "step": 44424 }, { "epoch": 0.812053302136838, "grad_norm": 6.858489706003748, "learning_rate": 8.981973544151584e-07, "loss": 17.5351, "step": 44425 }, { "epoch": 0.8120715813332846, "grad_norm": 5.464080613128717, "learning_rate": 8.980280883942388e-07, "loss": 17.0254, "step": 44426 }, { "epoch": 0.8120898605297311, "grad_norm": 5.797199694494211, "learning_rate": 8.978588367503749e-07, "loss": 16.8107, "step": 44427 }, { "epoch": 0.8121081397261777, "grad_norm": 6.720390546377524, "learning_rate": 8.976895994841611e-07, "loss": 17.5566, "step": 44428 }, { "epoch": 0.8121264189226242, "grad_norm": 6.922407887968566, "learning_rate": 8.975203765961876e-07, "loss": 17.3785, "step": 44429 }, { "epoch": 0.8121446981190706, "grad_norm": 6.702230931293651, "learning_rate": 8.973511680870511e-07, "loss": 17.4187, "step": 44430 }, { "epoch": 0.8121629773155172, "grad_norm": 6.647729706872318, "learning_rate": 8.971819739573423e-07, "loss": 17.4903, "step": 44431 }, { "epoch": 0.8121812565119637, "grad_norm": 5.3040650369234, "learning_rate": 8.970127942076534e-07, "loss": 16.8082, "step": 44432 }, { "epoch": 0.8121995357084103, "grad_norm": 7.04065365885921, "learning_rate": 8.968436288385801e-07, "loss": 17.3785, "step": 44433 }, { "epoch": 0.8122178149048568, "grad_norm": 5.361229558628328, "learning_rate": 8.966744778507124e-07, "loss": 17.4319, "step": 44434 }, { "epoch": 0.8122360941013033, "grad_norm": 6.297780894827894, "learning_rate": 8.965053412446456e-07, "loss": 17.2954, "step": 44435 }, { "epoch": 0.8122543732977499, "grad_norm": 5.762703754803507, "learning_rate": 8.963362190209707e-07, "loss": 17.0094, "step": 44436 }, { "epoch": 0.8122726524941963, "grad_norm": 6.3363829956981315, "learning_rate": 8.961671111802811e-07, "loss": 17.525, "step": 44437 }, { "epoch": 0.8122909316906429, "grad_norm": 6.235326384506088, "learning_rate": 8.959980177231709e-07, "loss": 17.3215, "step": 44438 }, { "epoch": 0.8123092108870894, "grad_norm": 6.0794290195709815, "learning_rate": 8.958289386502317e-07, "loss": 17.5219, "step": 44439 }, { "epoch": 0.8123274900835359, "grad_norm": 7.608962058899544, "learning_rate": 8.956598739620542e-07, "loss": 17.3349, "step": 44440 }, { "epoch": 0.8123457692799825, "grad_norm": 5.4074680196536775, "learning_rate": 8.954908236592341e-07, "loss": 17.3991, "step": 44441 }, { "epoch": 0.812364048476429, "grad_norm": 5.573114911392109, "learning_rate": 8.953217877423632e-07, "loss": 17.3733, "step": 44442 }, { "epoch": 0.8123823276728755, "grad_norm": 6.596743474717008, "learning_rate": 8.951527662120313e-07, "loss": 17.3664, "step": 44443 }, { "epoch": 0.812400606869322, "grad_norm": 6.118190479648063, "learning_rate": 8.949837590688338e-07, "loss": 17.2356, "step": 44444 }, { "epoch": 0.8124188860657685, "grad_norm": 6.986340409387896, "learning_rate": 8.948147663133611e-07, "loss": 17.461, "step": 44445 }, { "epoch": 0.8124371652622151, "grad_norm": 6.783460967527257, "learning_rate": 8.946457879462062e-07, "loss": 17.5022, "step": 44446 }, { "epoch": 0.8124554444586616, "grad_norm": 5.885060718510763, "learning_rate": 8.94476823967963e-07, "loss": 17.2019, "step": 44447 }, { "epoch": 0.8124737236551082, "grad_norm": 5.937937165453195, "learning_rate": 8.943078743792227e-07, "loss": 17.4614, "step": 44448 }, { "epoch": 0.8124920028515547, "grad_norm": 6.077659130609736, "learning_rate": 8.941389391805749e-07, "loss": 17.2153, "step": 44449 }, { "epoch": 0.8125102820480011, "grad_norm": 6.029542260556726, "learning_rate": 8.939700183726157e-07, "loss": 16.9744, "step": 44450 }, { "epoch": 0.8125285612444477, "grad_norm": 6.297843923869904, "learning_rate": 8.938011119559337e-07, "loss": 17.5571, "step": 44451 }, { "epoch": 0.8125468404408942, "grad_norm": 5.8136292251636785, "learning_rate": 8.936322199311243e-07, "loss": 17.2614, "step": 44452 }, { "epoch": 0.8125651196373408, "grad_norm": 7.76073573656401, "learning_rate": 8.93463342298776e-07, "loss": 18.0248, "step": 44453 }, { "epoch": 0.8125833988337873, "grad_norm": 6.726817209024908, "learning_rate": 8.932944790594833e-07, "loss": 17.5827, "step": 44454 }, { "epoch": 0.8126016780302338, "grad_norm": 8.381477222391899, "learning_rate": 8.931256302138363e-07, "loss": 17.4005, "step": 44455 }, { "epoch": 0.8126199572266803, "grad_norm": 5.41178318670634, "learning_rate": 8.929567957624286e-07, "loss": 17.1046, "step": 44456 }, { "epoch": 0.8126382364231268, "grad_norm": 9.887135780112756, "learning_rate": 8.927879757058494e-07, "loss": 17.3905, "step": 44457 }, { "epoch": 0.8126565156195734, "grad_norm": 7.790289015988687, "learning_rate": 8.926191700446935e-07, "loss": 17.9526, "step": 44458 }, { "epoch": 0.8126747948160199, "grad_norm": 8.780245238012721, "learning_rate": 8.924503787795513e-07, "loss": 18.4897, "step": 44459 }, { "epoch": 0.8126930740124664, "grad_norm": 5.83835898576668, "learning_rate": 8.922816019110125e-07, "loss": 17.1791, "step": 44460 }, { "epoch": 0.812711353208913, "grad_norm": 7.553978257595259, "learning_rate": 8.921128394396716e-07, "loss": 17.6698, "step": 44461 }, { "epoch": 0.8127296324053594, "grad_norm": 6.392174503196732, "learning_rate": 8.91944091366117e-07, "loss": 17.1872, "step": 44462 }, { "epoch": 0.812747911601806, "grad_norm": 7.671009423863458, "learning_rate": 8.917753576909427e-07, "loss": 18.0043, "step": 44463 }, { "epoch": 0.8127661907982525, "grad_norm": 6.106983455101979, "learning_rate": 8.916066384147398e-07, "loss": 17.3766, "step": 44464 }, { "epoch": 0.812784469994699, "grad_norm": 6.247980463327025, "learning_rate": 8.914379335380979e-07, "loss": 17.5087, "step": 44465 }, { "epoch": 0.8128027491911456, "grad_norm": 7.952430436385045, "learning_rate": 8.912692430616116e-07, "loss": 17.7211, "step": 44466 }, { "epoch": 0.8128210283875921, "grad_norm": 4.7513760222227175, "learning_rate": 8.911005669858691e-07, "loss": 16.9101, "step": 44467 }, { "epoch": 0.8128393075840386, "grad_norm": 5.728058596113474, "learning_rate": 8.909319053114618e-07, "loss": 17.2432, "step": 44468 }, { "epoch": 0.8128575867804851, "grad_norm": 6.169980071442493, "learning_rate": 8.90763258038983e-07, "loss": 17.2512, "step": 44469 }, { "epoch": 0.8128758659769316, "grad_norm": 6.560855414760062, "learning_rate": 8.905946251690217e-07, "loss": 17.4881, "step": 44470 }, { "epoch": 0.8128941451733782, "grad_norm": 6.229502621077844, "learning_rate": 8.904260067021691e-07, "loss": 17.3784, "step": 44471 }, { "epoch": 0.8129124243698247, "grad_norm": 6.200954267585089, "learning_rate": 8.902574026390165e-07, "loss": 17.7723, "step": 44472 }, { "epoch": 0.8129307035662712, "grad_norm": 6.305633407274969, "learning_rate": 8.900888129801566e-07, "loss": 17.2421, "step": 44473 }, { "epoch": 0.8129489827627178, "grad_norm": 5.61576117497862, "learning_rate": 8.899202377261768e-07, "loss": 17.5494, "step": 44474 }, { "epoch": 0.8129672619591642, "grad_norm": 6.273707124086276, "learning_rate": 8.897516768776715e-07, "loss": 17.2058, "step": 44475 }, { "epoch": 0.8129855411556108, "grad_norm": 5.730161417097697, "learning_rate": 8.895831304352303e-07, "loss": 17.1621, "step": 44476 }, { "epoch": 0.8130038203520573, "grad_norm": 6.852666440690234, "learning_rate": 8.894145983994418e-07, "loss": 17.5644, "step": 44477 }, { "epoch": 0.8130220995485038, "grad_norm": 5.986149626992828, "learning_rate": 8.892460807709002e-07, "loss": 17.3603, "step": 44478 }, { "epoch": 0.8130403787449504, "grad_norm": 5.6410660217055515, "learning_rate": 8.890775775501936e-07, "loss": 17.231, "step": 44479 }, { "epoch": 0.8130586579413969, "grad_norm": 6.52451904327104, "learning_rate": 8.889090887379126e-07, "loss": 17.624, "step": 44480 }, { "epoch": 0.8130769371378435, "grad_norm": 7.010850560522596, "learning_rate": 8.887406143346489e-07, "loss": 17.9553, "step": 44481 }, { "epoch": 0.8130952163342899, "grad_norm": 6.65843170609579, "learning_rate": 8.88572154340992e-07, "loss": 17.4084, "step": 44482 }, { "epoch": 0.8131134955307364, "grad_norm": 7.146568948481952, "learning_rate": 8.884037087575342e-07, "loss": 17.634, "step": 44483 }, { "epoch": 0.813131774727183, "grad_norm": 4.749912430743128, "learning_rate": 8.882352775848646e-07, "loss": 16.8491, "step": 44484 }, { "epoch": 0.8131500539236295, "grad_norm": 6.536950015437021, "learning_rate": 8.880668608235727e-07, "loss": 17.4292, "step": 44485 }, { "epoch": 0.8131683331200761, "grad_norm": 6.223541593655417, "learning_rate": 8.878984584742506e-07, "loss": 17.3845, "step": 44486 }, { "epoch": 0.8131866123165226, "grad_norm": 6.018096237942751, "learning_rate": 8.877300705374875e-07, "loss": 17.3751, "step": 44487 }, { "epoch": 0.813204891512969, "grad_norm": 7.582307603808284, "learning_rate": 8.875616970138723e-07, "loss": 17.777, "step": 44488 }, { "epoch": 0.8132231707094156, "grad_norm": 7.015204124961002, "learning_rate": 8.873933379039984e-07, "loss": 17.5978, "step": 44489 }, { "epoch": 0.8132414499058621, "grad_norm": 6.363240377336749, "learning_rate": 8.87224993208452e-07, "loss": 17.3781, "step": 44490 }, { "epoch": 0.8132597291023087, "grad_norm": 5.851296103851321, "learning_rate": 8.870566629278255e-07, "loss": 17.248, "step": 44491 }, { "epoch": 0.8132780082987552, "grad_norm": 6.239700274439928, "learning_rate": 8.868883470627094e-07, "loss": 17.4248, "step": 44492 }, { "epoch": 0.8132962874952017, "grad_norm": 9.280191472223796, "learning_rate": 8.867200456136932e-07, "loss": 18.3858, "step": 44493 }, { "epoch": 0.8133145666916483, "grad_norm": 8.432383864895991, "learning_rate": 8.865517585813644e-07, "loss": 17.8559, "step": 44494 }, { "epoch": 0.8133328458880947, "grad_norm": 7.231106424456102, "learning_rate": 8.863834859663167e-07, "loss": 17.938, "step": 44495 }, { "epoch": 0.8133511250845413, "grad_norm": 5.020321039556376, "learning_rate": 8.86215227769136e-07, "loss": 17.0379, "step": 44496 }, { "epoch": 0.8133694042809878, "grad_norm": 7.01969249098129, "learning_rate": 8.860469839904157e-07, "loss": 17.9343, "step": 44497 }, { "epoch": 0.8133876834774343, "grad_norm": 6.673905081400321, "learning_rate": 8.858787546307435e-07, "loss": 18.0516, "step": 44498 }, { "epoch": 0.8134059626738809, "grad_norm": 7.275921947955432, "learning_rate": 8.857105396907084e-07, "loss": 17.2885, "step": 44499 }, { "epoch": 0.8134242418703274, "grad_norm": 4.8970865286168825, "learning_rate": 8.855423391709e-07, "loss": 16.8589, "step": 44500 }, { "epoch": 0.813442521066774, "grad_norm": 7.189021051119803, "learning_rate": 8.853741530719106e-07, "loss": 17.3118, "step": 44501 }, { "epoch": 0.8134608002632204, "grad_norm": 5.549325596167271, "learning_rate": 8.852059813943265e-07, "loss": 17.1578, "step": 44502 }, { "epoch": 0.8134790794596669, "grad_norm": 4.85443133264268, "learning_rate": 8.850378241387392e-07, "loss": 16.7585, "step": 44503 }, { "epoch": 0.8134973586561135, "grad_norm": 7.025304065803333, "learning_rate": 8.848696813057378e-07, "loss": 17.3681, "step": 44504 }, { "epoch": 0.81351563785256, "grad_norm": 4.606040859758879, "learning_rate": 8.847015528959096e-07, "loss": 16.7429, "step": 44505 }, { "epoch": 0.8135339170490066, "grad_norm": 6.463576798745203, "learning_rate": 8.845334389098464e-07, "loss": 17.246, "step": 44506 }, { "epoch": 0.813552196245453, "grad_norm": 4.8708544113400265, "learning_rate": 8.84365339348135e-07, "loss": 16.9108, "step": 44507 }, { "epoch": 0.8135704754418995, "grad_norm": 6.80982357131272, "learning_rate": 8.841972542113675e-07, "loss": 17.1037, "step": 44508 }, { "epoch": 0.8135887546383461, "grad_norm": 6.98276902922695, "learning_rate": 8.840291835001302e-07, "loss": 17.4002, "step": 44509 }, { "epoch": 0.8136070338347926, "grad_norm": 6.243838156944179, "learning_rate": 8.838611272150149e-07, "loss": 17.387, "step": 44510 }, { "epoch": 0.8136253130312392, "grad_norm": 7.38500788115666, "learning_rate": 8.836930853566072e-07, "loss": 18.0019, "step": 44511 }, { "epoch": 0.8136435922276857, "grad_norm": 5.410021573198425, "learning_rate": 8.835250579255e-07, "loss": 17.0406, "step": 44512 }, { "epoch": 0.8136618714241322, "grad_norm": 8.123793091745796, "learning_rate": 8.833570449222789e-07, "loss": 17.7721, "step": 44513 }, { "epoch": 0.8136801506205787, "grad_norm": 5.817405043071178, "learning_rate": 8.831890463475351e-07, "loss": 17.1996, "step": 44514 }, { "epoch": 0.8136984298170252, "grad_norm": 7.072279099806526, "learning_rate": 8.830210622018564e-07, "loss": 17.8904, "step": 44515 }, { "epoch": 0.8137167090134718, "grad_norm": 5.8543563307570645, "learning_rate": 8.828530924858308e-07, "loss": 17.0422, "step": 44516 }, { "epoch": 0.8137349882099183, "grad_norm": 7.146082818276101, "learning_rate": 8.826851372000478e-07, "loss": 17.4721, "step": 44517 }, { "epoch": 0.8137532674063648, "grad_norm": 7.211792133698094, "learning_rate": 8.825171963450968e-07, "loss": 17.4516, "step": 44518 }, { "epoch": 0.8137715466028114, "grad_norm": 4.79272798442664, "learning_rate": 8.823492699215652e-07, "loss": 16.9678, "step": 44519 }, { "epoch": 0.8137898257992578, "grad_norm": 5.955910276232588, "learning_rate": 8.821813579300425e-07, "loss": 17.0663, "step": 44520 }, { "epoch": 0.8138081049957044, "grad_norm": 5.648315152072677, "learning_rate": 8.820134603711177e-07, "loss": 17.0321, "step": 44521 }, { "epoch": 0.8138263841921509, "grad_norm": 6.457275863007109, "learning_rate": 8.818455772453766e-07, "loss": 17.357, "step": 44522 }, { "epoch": 0.8138446633885974, "grad_norm": 6.140022591623765, "learning_rate": 8.816777085534107e-07, "loss": 17.426, "step": 44523 }, { "epoch": 0.813862942585044, "grad_norm": 6.583941649149147, "learning_rate": 8.815098542958073e-07, "loss": 17.3838, "step": 44524 }, { "epoch": 0.8138812217814905, "grad_norm": 10.973964095498674, "learning_rate": 8.813420144731532e-07, "loss": 17.4002, "step": 44525 }, { "epoch": 0.8138995009779371, "grad_norm": 6.819325540875736, "learning_rate": 8.811741890860376e-07, "loss": 17.6956, "step": 44526 }, { "epoch": 0.8139177801743835, "grad_norm": 7.27502241271047, "learning_rate": 8.810063781350492e-07, "loss": 17.9531, "step": 44527 }, { "epoch": 0.81393605937083, "grad_norm": 5.9136779604649785, "learning_rate": 8.808385816207776e-07, "loss": 16.9949, "step": 44528 }, { "epoch": 0.8139543385672766, "grad_norm": 5.767699117489335, "learning_rate": 8.806707995438091e-07, "loss": 17.1295, "step": 44529 }, { "epoch": 0.8139726177637231, "grad_norm": 6.35940599548316, "learning_rate": 8.805030319047303e-07, "loss": 17.494, "step": 44530 }, { "epoch": 0.8139908969601697, "grad_norm": 6.732124784678012, "learning_rate": 8.803352787041325e-07, "loss": 17.6781, "step": 44531 }, { "epoch": 0.8140091761566162, "grad_norm": 6.451350423569538, "learning_rate": 8.801675399426024e-07, "loss": 17.7421, "step": 44532 }, { "epoch": 0.8140274553530626, "grad_norm": 6.156743842541617, "learning_rate": 8.799998156207257e-07, "loss": 17.2361, "step": 44533 }, { "epoch": 0.8140457345495092, "grad_norm": 6.653499278637472, "learning_rate": 8.798321057390941e-07, "loss": 17.356, "step": 44534 }, { "epoch": 0.8140640137459557, "grad_norm": 6.143256162433998, "learning_rate": 8.796644102982915e-07, "loss": 17.5023, "step": 44535 }, { "epoch": 0.8140822929424022, "grad_norm": 5.889739012976287, "learning_rate": 8.794967292989082e-07, "loss": 16.991, "step": 44536 }, { "epoch": 0.8141005721388488, "grad_norm": 7.3830647129962275, "learning_rate": 8.793290627415319e-07, "loss": 17.4857, "step": 44537 }, { "epoch": 0.8141188513352953, "grad_norm": 5.82911308199061, "learning_rate": 8.791614106267498e-07, "loss": 17.278, "step": 44538 }, { "epoch": 0.8141371305317419, "grad_norm": 5.4260779990804116, "learning_rate": 8.789937729551485e-07, "loss": 17.0155, "step": 44539 }, { "epoch": 0.8141554097281883, "grad_norm": 6.655758899147362, "learning_rate": 8.788261497273176e-07, "loss": 17.4241, "step": 44540 }, { "epoch": 0.8141736889246348, "grad_norm": 6.203136044219972, "learning_rate": 8.786585409438419e-07, "loss": 17.3726, "step": 44541 }, { "epoch": 0.8141919681210814, "grad_norm": 6.563046566471118, "learning_rate": 8.784909466053121e-07, "loss": 17.3956, "step": 44542 }, { "epoch": 0.8142102473175279, "grad_norm": 6.834752608930561, "learning_rate": 8.783233667123131e-07, "loss": 17.8426, "step": 44543 }, { "epoch": 0.8142285265139745, "grad_norm": 5.481234038054847, "learning_rate": 8.781558012654323e-07, "loss": 17.1093, "step": 44544 }, { "epoch": 0.814246805710421, "grad_norm": 6.609088870909815, "learning_rate": 8.779882502652582e-07, "loss": 17.7538, "step": 44545 }, { "epoch": 0.8142650849068674, "grad_norm": 7.360451453590116, "learning_rate": 8.778207137123778e-07, "loss": 18.2112, "step": 44546 }, { "epoch": 0.814283364103314, "grad_norm": 6.085844689952823, "learning_rate": 8.776531916073777e-07, "loss": 17.2522, "step": 44547 }, { "epoch": 0.8143016432997605, "grad_norm": 6.068148489701485, "learning_rate": 8.774856839508461e-07, "loss": 17.5179, "step": 44548 }, { "epoch": 0.8143199224962071, "grad_norm": 6.289818461577278, "learning_rate": 8.773181907433697e-07, "loss": 17.2274, "step": 44549 }, { "epoch": 0.8143382016926536, "grad_norm": 7.123048753512074, "learning_rate": 8.771507119855343e-07, "loss": 17.466, "step": 44550 }, { "epoch": 0.8143564808891001, "grad_norm": 4.645017786803849, "learning_rate": 8.769832476779289e-07, "loss": 16.791, "step": 44551 }, { "epoch": 0.8143747600855467, "grad_norm": 7.459600413944468, "learning_rate": 8.768157978211395e-07, "loss": 17.8164, "step": 44552 }, { "epoch": 0.8143930392819931, "grad_norm": 5.746068773694344, "learning_rate": 8.766483624157513e-07, "loss": 17.2691, "step": 44553 }, { "epoch": 0.8144113184784397, "grad_norm": 7.122590781013942, "learning_rate": 8.764809414623531e-07, "loss": 17.7193, "step": 44554 }, { "epoch": 0.8144295976748862, "grad_norm": 6.624261543628005, "learning_rate": 8.763135349615326e-07, "loss": 17.5427, "step": 44555 }, { "epoch": 0.8144478768713327, "grad_norm": 5.447718827379704, "learning_rate": 8.761461429138746e-07, "loss": 17.3252, "step": 44556 }, { "epoch": 0.8144661560677793, "grad_norm": 6.023429516325424, "learning_rate": 8.759787653199669e-07, "loss": 17.1649, "step": 44557 }, { "epoch": 0.8144844352642258, "grad_norm": 5.722286298371863, "learning_rate": 8.758114021803948e-07, "loss": 17.1656, "step": 44558 }, { "epoch": 0.8145027144606724, "grad_norm": 7.7655380992423435, "learning_rate": 8.756440534957472e-07, "loss": 18.1123, "step": 44559 }, { "epoch": 0.8145209936571188, "grad_norm": 6.745988967852038, "learning_rate": 8.754767192666092e-07, "loss": 17.4323, "step": 44560 }, { "epoch": 0.8145392728535653, "grad_norm": 5.579718202255074, "learning_rate": 8.753093994935663e-07, "loss": 17.2288, "step": 44561 }, { "epoch": 0.8145575520500119, "grad_norm": 5.6190519518537085, "learning_rate": 8.75142094177207e-07, "loss": 17.1865, "step": 44562 }, { "epoch": 0.8145758312464584, "grad_norm": 6.126266729828481, "learning_rate": 8.74974803318116e-07, "loss": 17.1652, "step": 44563 }, { "epoch": 0.814594110442905, "grad_norm": 4.973625246144049, "learning_rate": 8.748075269168799e-07, "loss": 16.8431, "step": 44564 }, { "epoch": 0.8146123896393515, "grad_norm": 5.7713616104869345, "learning_rate": 8.74640264974087e-07, "loss": 17.2093, "step": 44565 }, { "epoch": 0.8146306688357979, "grad_norm": 5.601188091624525, "learning_rate": 8.744730174903215e-07, "loss": 17.2716, "step": 44566 }, { "epoch": 0.8146489480322445, "grad_norm": 5.732057902988233, "learning_rate": 8.743057844661695e-07, "loss": 17.1992, "step": 44567 }, { "epoch": 0.814667227228691, "grad_norm": 4.312060889337503, "learning_rate": 8.741385659022184e-07, "loss": 16.5484, "step": 44568 }, { "epoch": 0.8146855064251376, "grad_norm": 6.581381309513797, "learning_rate": 8.739713617990541e-07, "loss": 17.4398, "step": 44569 }, { "epoch": 0.8147037856215841, "grad_norm": 7.00710049301468, "learning_rate": 8.738041721572605e-07, "loss": 17.6405, "step": 44570 }, { "epoch": 0.8147220648180306, "grad_norm": 5.699047591990797, "learning_rate": 8.736369969774266e-07, "loss": 17.0832, "step": 44571 }, { "epoch": 0.8147403440144771, "grad_norm": 5.719288448995691, "learning_rate": 8.734698362601357e-07, "loss": 17.154, "step": 44572 }, { "epoch": 0.8147586232109236, "grad_norm": 5.368578360294004, "learning_rate": 8.733026900059748e-07, "loss": 16.999, "step": 44573 }, { "epoch": 0.8147769024073702, "grad_norm": 6.891906531211821, "learning_rate": 8.731355582155315e-07, "loss": 17.8084, "step": 44574 }, { "epoch": 0.8147951816038167, "grad_norm": 6.380746070505782, "learning_rate": 8.729684408893885e-07, "loss": 17.4693, "step": 44575 }, { "epoch": 0.8148134608002632, "grad_norm": 6.4761582797240855, "learning_rate": 8.728013380281341e-07, "loss": 17.3153, "step": 44576 }, { "epoch": 0.8148317399967098, "grad_norm": 6.7908475555717525, "learning_rate": 8.726342496323525e-07, "loss": 17.4433, "step": 44577 }, { "epoch": 0.8148500191931562, "grad_norm": 8.494544524992333, "learning_rate": 8.724671757026293e-07, "loss": 17.8675, "step": 44578 }, { "epoch": 0.8148682983896028, "grad_norm": 6.420218320904862, "learning_rate": 8.723001162395512e-07, "loss": 17.4176, "step": 44579 }, { "epoch": 0.8148865775860493, "grad_norm": 5.499780810378607, "learning_rate": 8.721330712437021e-07, "loss": 17.1957, "step": 44580 }, { "epoch": 0.8149048567824958, "grad_norm": 5.816840507288677, "learning_rate": 8.719660407156694e-07, "loss": 17.0383, "step": 44581 }, { "epoch": 0.8149231359789424, "grad_norm": 4.828318070034496, "learning_rate": 8.717990246560359e-07, "loss": 16.8085, "step": 44582 }, { "epoch": 0.8149414151753889, "grad_norm": 8.117744540330925, "learning_rate": 8.716320230653902e-07, "loss": 17.8959, "step": 44583 }, { "epoch": 0.8149596943718355, "grad_norm": 5.25793104622471, "learning_rate": 8.714650359443149e-07, "loss": 17.0623, "step": 44584 }, { "epoch": 0.8149779735682819, "grad_norm": 6.10229001656543, "learning_rate": 8.712980632933976e-07, "loss": 16.8803, "step": 44585 }, { "epoch": 0.8149962527647284, "grad_norm": 6.01572759236113, "learning_rate": 8.71131105113222e-07, "loss": 17.3491, "step": 44586 }, { "epoch": 0.815014531961175, "grad_norm": 6.841571799956574, "learning_rate": 8.709641614043729e-07, "loss": 17.346, "step": 44587 }, { "epoch": 0.8150328111576215, "grad_norm": 5.760450597166234, "learning_rate": 8.707972321674368e-07, "loss": 17.0268, "step": 44588 }, { "epoch": 0.8150510903540681, "grad_norm": 5.082963480927903, "learning_rate": 8.706303174029973e-07, "loss": 16.8989, "step": 44589 }, { "epoch": 0.8150693695505146, "grad_norm": 6.40293680286054, "learning_rate": 8.704634171116399e-07, "loss": 17.4846, "step": 44590 }, { "epoch": 0.815087648746961, "grad_norm": 6.90368017771202, "learning_rate": 8.702965312939515e-07, "loss": 17.7437, "step": 44591 }, { "epoch": 0.8151059279434076, "grad_norm": 5.814540486029076, "learning_rate": 8.701296599505137e-07, "loss": 17.0306, "step": 44592 }, { "epoch": 0.8151242071398541, "grad_norm": 7.502370065039484, "learning_rate": 8.699628030819146e-07, "loss": 17.0732, "step": 44593 }, { "epoch": 0.8151424863363007, "grad_norm": 6.766594120340286, "learning_rate": 8.697959606887374e-07, "loss": 17.3868, "step": 44594 }, { "epoch": 0.8151607655327472, "grad_norm": 5.873791153864922, "learning_rate": 8.696291327715656e-07, "loss": 17.5137, "step": 44595 }, { "epoch": 0.8151790447291937, "grad_norm": 6.380898580260444, "learning_rate": 8.694623193309865e-07, "loss": 17.607, "step": 44596 }, { "epoch": 0.8151973239256403, "grad_norm": 7.20704841554856, "learning_rate": 8.692955203675834e-07, "loss": 17.7315, "step": 44597 }, { "epoch": 0.8152156031220867, "grad_norm": 4.638097191376877, "learning_rate": 8.691287358819406e-07, "loss": 16.6159, "step": 44598 }, { "epoch": 0.8152338823185333, "grad_norm": 6.93343265983422, "learning_rate": 8.689619658746424e-07, "loss": 17.2392, "step": 44599 }, { "epoch": 0.8152521615149798, "grad_norm": 5.985103790391105, "learning_rate": 8.687952103462755e-07, "loss": 17.2053, "step": 44600 }, { "epoch": 0.8152704407114263, "grad_norm": 6.82066476783261, "learning_rate": 8.686284692974212e-07, "loss": 17.4637, "step": 44601 }, { "epoch": 0.8152887199078729, "grad_norm": 5.952100252683776, "learning_rate": 8.684617427286673e-07, "loss": 17.1996, "step": 44602 }, { "epoch": 0.8153069991043194, "grad_norm": 5.958347207649445, "learning_rate": 8.682950306405951e-07, "loss": 17.3126, "step": 44603 }, { "epoch": 0.8153252783007658, "grad_norm": 6.283724995896958, "learning_rate": 8.681283330337919e-07, "loss": 17.4991, "step": 44604 }, { "epoch": 0.8153435574972124, "grad_norm": 6.623803204797526, "learning_rate": 8.6796164990884e-07, "loss": 17.6371, "step": 44605 }, { "epoch": 0.8153618366936589, "grad_norm": 6.087973472697898, "learning_rate": 8.677949812663228e-07, "loss": 17.3638, "step": 44606 }, { "epoch": 0.8153801158901055, "grad_norm": 6.031895254414662, "learning_rate": 8.67628327106827e-07, "loss": 17.3722, "step": 44607 }, { "epoch": 0.815398395086552, "grad_norm": 6.329536150640077, "learning_rate": 8.674616874309338e-07, "loss": 17.4646, "step": 44608 }, { "epoch": 0.8154166742829985, "grad_norm": 5.105878193891244, "learning_rate": 8.67295062239229e-07, "loss": 17.0374, "step": 44609 }, { "epoch": 0.8154349534794451, "grad_norm": 5.646213383340673, "learning_rate": 8.671284515322975e-07, "loss": 17.2597, "step": 44610 }, { "epoch": 0.8154532326758915, "grad_norm": 8.415865290435555, "learning_rate": 8.669618553107223e-07, "loss": 17.9182, "step": 44611 }, { "epoch": 0.8154715118723381, "grad_norm": 6.855473843868134, "learning_rate": 8.667952735750856e-07, "loss": 17.3758, "step": 44612 }, { "epoch": 0.8154897910687846, "grad_norm": 4.927410876133074, "learning_rate": 8.666287063259743e-07, "loss": 16.8264, "step": 44613 }, { "epoch": 0.8155080702652311, "grad_norm": 7.7161208892434265, "learning_rate": 8.664621535639705e-07, "loss": 17.9535, "step": 44614 }, { "epoch": 0.8155263494616777, "grad_norm": 5.83556748458751, "learning_rate": 8.662956152896567e-07, "loss": 17.2565, "step": 44615 }, { "epoch": 0.8155446286581242, "grad_norm": 7.347440826029882, "learning_rate": 8.661290915036202e-07, "loss": 17.5205, "step": 44616 }, { "epoch": 0.8155629078545708, "grad_norm": 6.7057314190686705, "learning_rate": 8.659625822064405e-07, "loss": 17.5394, "step": 44617 }, { "epoch": 0.8155811870510172, "grad_norm": 5.760352091004655, "learning_rate": 8.657960873987031e-07, "loss": 17.2744, "step": 44618 }, { "epoch": 0.8155994662474637, "grad_norm": 5.7892380888350825, "learning_rate": 8.656296070809934e-07, "loss": 16.8499, "step": 44619 }, { "epoch": 0.8156177454439103, "grad_norm": 4.58275507265387, "learning_rate": 8.654631412538916e-07, "loss": 16.9991, "step": 44620 }, { "epoch": 0.8156360246403568, "grad_norm": 7.677301518074812, "learning_rate": 8.652966899179843e-07, "loss": 17.5389, "step": 44621 }, { "epoch": 0.8156543038368034, "grad_norm": 6.511463922758091, "learning_rate": 8.651302530738532e-07, "loss": 17.6835, "step": 44622 }, { "epoch": 0.8156725830332499, "grad_norm": 6.405992646809525, "learning_rate": 8.649638307220803e-07, "loss": 17.3001, "step": 44623 }, { "epoch": 0.8156908622296963, "grad_norm": 5.454300624307301, "learning_rate": 8.647974228632516e-07, "loss": 17.0167, "step": 44624 }, { "epoch": 0.8157091414261429, "grad_norm": 6.031283565872286, "learning_rate": 8.646310294979493e-07, "loss": 17.2421, "step": 44625 }, { "epoch": 0.8157274206225894, "grad_norm": 6.052934616677293, "learning_rate": 8.644646506267551e-07, "loss": 17.2806, "step": 44626 }, { "epoch": 0.815745699819036, "grad_norm": 5.681701981112946, "learning_rate": 8.642982862502536e-07, "loss": 17.4094, "step": 44627 }, { "epoch": 0.8157639790154825, "grad_norm": 6.528343095659476, "learning_rate": 8.641319363690287e-07, "loss": 17.3083, "step": 44628 }, { "epoch": 0.815782258211929, "grad_norm": 6.039278864181428, "learning_rate": 8.639656009836611e-07, "loss": 17.3615, "step": 44629 }, { "epoch": 0.8158005374083755, "grad_norm": 8.173316295882014, "learning_rate": 8.637992800947371e-07, "loss": 17.414, "step": 44630 }, { "epoch": 0.815818816604822, "grad_norm": 7.902139693719851, "learning_rate": 8.636329737028371e-07, "loss": 17.7562, "step": 44631 }, { "epoch": 0.8158370958012686, "grad_norm": 5.294430508619951, "learning_rate": 8.634666818085436e-07, "loss": 16.8984, "step": 44632 }, { "epoch": 0.8158553749977151, "grad_norm": 6.279857319406055, "learning_rate": 8.63300404412441e-07, "loss": 17.2624, "step": 44633 }, { "epoch": 0.8158736541941616, "grad_norm": 7.005181064539611, "learning_rate": 8.631341415151112e-07, "loss": 17.6232, "step": 44634 }, { "epoch": 0.8158919333906082, "grad_norm": 6.884993595648751, "learning_rate": 8.629678931171381e-07, "loss": 17.5574, "step": 44635 }, { "epoch": 0.8159102125870547, "grad_norm": 6.719876426948063, "learning_rate": 8.628016592191019e-07, "loss": 17.7956, "step": 44636 }, { "epoch": 0.8159284917835012, "grad_norm": 6.164782282237896, "learning_rate": 8.626354398215875e-07, "loss": 17.1476, "step": 44637 }, { "epoch": 0.8159467709799477, "grad_norm": 5.96712991974008, "learning_rate": 8.624692349251779e-07, "loss": 17.105, "step": 44638 }, { "epoch": 0.8159650501763942, "grad_norm": 4.515388732215886, "learning_rate": 8.623030445304547e-07, "loss": 16.8144, "step": 44639 }, { "epoch": 0.8159833293728408, "grad_norm": 5.888779181382697, "learning_rate": 8.621368686379989e-07, "loss": 17.1989, "step": 44640 }, { "epoch": 0.8160016085692873, "grad_norm": 6.200141042687027, "learning_rate": 8.619707072483952e-07, "loss": 17.346, "step": 44641 }, { "epoch": 0.8160198877657339, "grad_norm": 5.993334806732755, "learning_rate": 8.618045603622255e-07, "loss": 17.2373, "step": 44642 }, { "epoch": 0.8160381669621803, "grad_norm": 5.667125094395166, "learning_rate": 8.616384279800705e-07, "loss": 17.3257, "step": 44643 }, { "epoch": 0.8160564461586268, "grad_norm": 6.218545777203784, "learning_rate": 8.614723101025135e-07, "loss": 17.1524, "step": 44644 }, { "epoch": 0.8160747253550734, "grad_norm": 5.015907352996868, "learning_rate": 8.613062067301387e-07, "loss": 16.9526, "step": 44645 }, { "epoch": 0.8160930045515199, "grad_norm": 7.095454667324031, "learning_rate": 8.611401178635242e-07, "loss": 18.0346, "step": 44646 }, { "epoch": 0.8161112837479665, "grad_norm": 5.658833429688483, "learning_rate": 8.609740435032565e-07, "loss": 17.087, "step": 44647 }, { "epoch": 0.816129562944413, "grad_norm": 5.140634738033425, "learning_rate": 8.608079836499156e-07, "loss": 16.9995, "step": 44648 }, { "epoch": 0.8161478421408594, "grad_norm": 8.923714115874455, "learning_rate": 8.60641938304082e-07, "loss": 18.6832, "step": 44649 }, { "epoch": 0.816166121337306, "grad_norm": 7.268090253607769, "learning_rate": 8.604759074663405e-07, "loss": 17.5564, "step": 44650 }, { "epoch": 0.8161844005337525, "grad_norm": 7.319670142216354, "learning_rate": 8.603098911372709e-07, "loss": 17.8588, "step": 44651 }, { "epoch": 0.8162026797301991, "grad_norm": 6.134982695210906, "learning_rate": 8.601438893174568e-07, "loss": 17.3723, "step": 44652 }, { "epoch": 0.8162209589266456, "grad_norm": 6.651431554027728, "learning_rate": 8.599779020074777e-07, "loss": 17.0763, "step": 44653 }, { "epoch": 0.8162392381230921, "grad_norm": 7.041262936079658, "learning_rate": 8.598119292079171e-07, "loss": 17.5987, "step": 44654 }, { "epoch": 0.8162575173195387, "grad_norm": 7.153009875591122, "learning_rate": 8.596459709193578e-07, "loss": 17.4777, "step": 44655 }, { "epoch": 0.8162757965159851, "grad_norm": 4.908880482703414, "learning_rate": 8.594800271423797e-07, "loss": 16.9033, "step": 44656 }, { "epoch": 0.8162940757124317, "grad_norm": 5.494981232353067, "learning_rate": 8.593140978775633e-07, "loss": 17.0931, "step": 44657 }, { "epoch": 0.8163123549088782, "grad_norm": 6.596517670864552, "learning_rate": 8.591481831254933e-07, "loss": 17.6885, "step": 44658 }, { "epoch": 0.8163306341053247, "grad_norm": 8.463947308467326, "learning_rate": 8.589822828867501e-07, "loss": 18.2178, "step": 44659 }, { "epoch": 0.8163489133017713, "grad_norm": 6.476803321913116, "learning_rate": 8.588163971619129e-07, "loss": 17.5388, "step": 44660 }, { "epoch": 0.8163671924982178, "grad_norm": 6.723703962714048, "learning_rate": 8.586505259515659e-07, "loss": 17.7554, "step": 44661 }, { "epoch": 0.8163854716946644, "grad_norm": 5.428016990450453, "learning_rate": 8.584846692562887e-07, "loss": 17.0038, "step": 44662 }, { "epoch": 0.8164037508911108, "grad_norm": 5.438849031961976, "learning_rate": 8.583188270766629e-07, "loss": 16.9484, "step": 44663 }, { "epoch": 0.8164220300875573, "grad_norm": 7.632949300649438, "learning_rate": 8.581529994132715e-07, "loss": 17.7975, "step": 44664 }, { "epoch": 0.8164403092840039, "grad_norm": 7.35461053123974, "learning_rate": 8.579871862666933e-07, "loss": 17.5121, "step": 44665 }, { "epoch": 0.8164585884804504, "grad_norm": 8.92965190227236, "learning_rate": 8.578213876375119e-07, "loss": 18.5747, "step": 44666 }, { "epoch": 0.816476867676897, "grad_norm": 4.968402778878188, "learning_rate": 8.576556035263073e-07, "loss": 16.9857, "step": 44667 }, { "epoch": 0.8164951468733435, "grad_norm": 5.695469388050348, "learning_rate": 8.574898339336585e-07, "loss": 17.3029, "step": 44668 }, { "epoch": 0.8165134260697899, "grad_norm": 5.959084147903307, "learning_rate": 8.5732407886015e-07, "loss": 17.5923, "step": 44669 }, { "epoch": 0.8165317052662365, "grad_norm": 6.871444152124276, "learning_rate": 8.571583383063614e-07, "loss": 17.7996, "step": 44670 }, { "epoch": 0.816549984462683, "grad_norm": 4.926642125663891, "learning_rate": 8.569926122728717e-07, "loss": 16.9867, "step": 44671 }, { "epoch": 0.8165682636591295, "grad_norm": 6.574527334337575, "learning_rate": 8.568269007602637e-07, "loss": 17.3364, "step": 44672 }, { "epoch": 0.8165865428555761, "grad_norm": 6.478476951876682, "learning_rate": 8.566612037691186e-07, "loss": 17.6153, "step": 44673 }, { "epoch": 0.8166048220520226, "grad_norm": 6.096956907183877, "learning_rate": 8.564955213000153e-07, "loss": 17.4666, "step": 44674 }, { "epoch": 0.8166231012484692, "grad_norm": 6.50540573237507, "learning_rate": 8.563298533535375e-07, "loss": 17.3023, "step": 44675 }, { "epoch": 0.8166413804449156, "grad_norm": 6.842537832699797, "learning_rate": 8.561641999302634e-07, "loss": 17.5046, "step": 44676 }, { "epoch": 0.8166596596413621, "grad_norm": 5.91155391024338, "learning_rate": 8.55998561030773e-07, "loss": 17.0895, "step": 44677 }, { "epoch": 0.8166779388378087, "grad_norm": 7.360832784217087, "learning_rate": 8.558329366556495e-07, "loss": 18.4128, "step": 44678 }, { "epoch": 0.8166962180342552, "grad_norm": 5.876662325359045, "learning_rate": 8.556673268054716e-07, "loss": 17.3654, "step": 44679 }, { "epoch": 0.8167144972307018, "grad_norm": 5.947420608195309, "learning_rate": 8.555017314808189e-07, "loss": 17.3071, "step": 44680 }, { "epoch": 0.8167327764271483, "grad_norm": 7.195152691979575, "learning_rate": 8.553361506822732e-07, "loss": 17.7873, "step": 44681 }, { "epoch": 0.8167510556235947, "grad_norm": 7.608294845461373, "learning_rate": 8.551705844104147e-07, "loss": 17.6565, "step": 44682 }, { "epoch": 0.8167693348200413, "grad_norm": 5.222226209244133, "learning_rate": 8.550050326658249e-07, "loss": 17.0782, "step": 44683 }, { "epoch": 0.8167876140164878, "grad_norm": 6.216506842983744, "learning_rate": 8.548394954490824e-07, "loss": 17.469, "step": 44684 }, { "epoch": 0.8168058932129344, "grad_norm": 6.933949786586842, "learning_rate": 8.546739727607673e-07, "loss": 17.9269, "step": 44685 }, { "epoch": 0.8168241724093809, "grad_norm": 5.803774457296807, "learning_rate": 8.545084646014607e-07, "loss": 17.2037, "step": 44686 }, { "epoch": 0.8168424516058274, "grad_norm": 7.280402973811752, "learning_rate": 8.543429709717422e-07, "loss": 17.5669, "step": 44687 }, { "epoch": 0.816860730802274, "grad_norm": 5.624424432097985, "learning_rate": 8.541774918721912e-07, "loss": 17.1608, "step": 44688 }, { "epoch": 0.8168790099987204, "grad_norm": 6.432541133538944, "learning_rate": 8.540120273033892e-07, "loss": 17.393, "step": 44689 }, { "epoch": 0.816897289195167, "grad_norm": 5.571009732152215, "learning_rate": 8.538465772659138e-07, "loss": 17.1075, "step": 44690 }, { "epoch": 0.8169155683916135, "grad_norm": 6.218630104400313, "learning_rate": 8.536811417603463e-07, "loss": 17.0071, "step": 44691 }, { "epoch": 0.81693384758806, "grad_norm": 5.9400703516619435, "learning_rate": 8.535157207872685e-07, "loss": 17.2925, "step": 44692 }, { "epoch": 0.8169521267845066, "grad_norm": 6.217595080370381, "learning_rate": 8.533503143472577e-07, "loss": 17.1539, "step": 44693 }, { "epoch": 0.816970405980953, "grad_norm": 6.736704170303604, "learning_rate": 8.531849224408933e-07, "loss": 17.5635, "step": 44694 }, { "epoch": 0.8169886851773996, "grad_norm": 9.267748382067504, "learning_rate": 8.530195450687573e-07, "loss": 17.7255, "step": 44695 }, { "epoch": 0.8170069643738461, "grad_norm": 5.437117983633676, "learning_rate": 8.528541822314263e-07, "loss": 17.016, "step": 44696 }, { "epoch": 0.8170252435702926, "grad_norm": 7.974359418446147, "learning_rate": 8.526888339294825e-07, "loss": 17.8467, "step": 44697 }, { "epoch": 0.8170435227667392, "grad_norm": 6.53124702285602, "learning_rate": 8.525235001635052e-07, "loss": 17.6007, "step": 44698 }, { "epoch": 0.8170618019631857, "grad_norm": 6.708186857132719, "learning_rate": 8.523581809340714e-07, "loss": 17.52, "step": 44699 }, { "epoch": 0.8170800811596323, "grad_norm": 6.923622579888994, "learning_rate": 8.521928762417625e-07, "loss": 17.7986, "step": 44700 }, { "epoch": 0.8170983603560787, "grad_norm": 5.817882058413285, "learning_rate": 8.520275860871585e-07, "loss": 17.1313, "step": 44701 }, { "epoch": 0.8171166395525252, "grad_norm": 6.083008655756214, "learning_rate": 8.518623104708374e-07, "loss": 17.1548, "step": 44702 }, { "epoch": 0.8171349187489718, "grad_norm": 6.425202656607151, "learning_rate": 8.516970493933796e-07, "loss": 17.4351, "step": 44703 }, { "epoch": 0.8171531979454183, "grad_norm": 5.573962004823198, "learning_rate": 8.515318028553637e-07, "loss": 17.1075, "step": 44704 }, { "epoch": 0.8171714771418649, "grad_norm": 5.750767346245838, "learning_rate": 8.513665708573676e-07, "loss": 17.2841, "step": 44705 }, { "epoch": 0.8171897563383114, "grad_norm": 5.06547302456621, "learning_rate": 8.51201353399973e-07, "loss": 16.9113, "step": 44706 }, { "epoch": 0.8172080355347578, "grad_norm": 6.640966660614316, "learning_rate": 8.510361504837561e-07, "loss": 17.8584, "step": 44707 }, { "epoch": 0.8172263147312044, "grad_norm": 5.726611852322078, "learning_rate": 8.508709621092992e-07, "loss": 17.0896, "step": 44708 }, { "epoch": 0.8172445939276509, "grad_norm": 6.680792139712611, "learning_rate": 8.50705788277178e-07, "loss": 17.3853, "step": 44709 }, { "epoch": 0.8172628731240975, "grad_norm": 8.283007920242607, "learning_rate": 8.50540628987973e-07, "loss": 18.0778, "step": 44710 }, { "epoch": 0.817281152320544, "grad_norm": 6.889829639808452, "learning_rate": 8.50375484242264e-07, "loss": 17.7606, "step": 44711 }, { "epoch": 0.8172994315169905, "grad_norm": 7.372719987840036, "learning_rate": 8.502103540406292e-07, "loss": 17.7162, "step": 44712 }, { "epoch": 0.8173177107134371, "grad_norm": 7.160666088593777, "learning_rate": 8.500452383836455e-07, "loss": 17.6122, "step": 44713 }, { "epoch": 0.8173359899098835, "grad_norm": 7.544164442509273, "learning_rate": 8.498801372718946e-07, "loss": 17.6863, "step": 44714 }, { "epoch": 0.8173542691063301, "grad_norm": 5.219678288216836, "learning_rate": 8.497150507059537e-07, "loss": 16.9532, "step": 44715 }, { "epoch": 0.8173725483027766, "grad_norm": 5.4128028461914965, "learning_rate": 8.495499786864004e-07, "loss": 16.9628, "step": 44716 }, { "epoch": 0.8173908274992231, "grad_norm": 5.658267368709325, "learning_rate": 8.493849212138139e-07, "loss": 17.1433, "step": 44717 }, { "epoch": 0.8174091066956697, "grad_norm": 6.141747688830962, "learning_rate": 8.492198782887739e-07, "loss": 17.3395, "step": 44718 }, { "epoch": 0.8174273858921162, "grad_norm": 6.655568344574626, "learning_rate": 8.490548499118578e-07, "loss": 17.3762, "step": 44719 }, { "epoch": 0.8174456650885628, "grad_norm": 6.296453200401222, "learning_rate": 8.488898360836445e-07, "loss": 17.0474, "step": 44720 }, { "epoch": 0.8174639442850092, "grad_norm": 5.724746715345682, "learning_rate": 8.48724836804713e-07, "loss": 17.1353, "step": 44721 }, { "epoch": 0.8174822234814557, "grad_norm": 6.261911794111293, "learning_rate": 8.485598520756388e-07, "loss": 17.276, "step": 44722 }, { "epoch": 0.8175005026779023, "grad_norm": 7.363403986057204, "learning_rate": 8.483948818970034e-07, "loss": 17.6624, "step": 44723 }, { "epoch": 0.8175187818743488, "grad_norm": 6.040155859106142, "learning_rate": 8.482299262693838e-07, "loss": 17.5564, "step": 44724 }, { "epoch": 0.8175370610707954, "grad_norm": 5.84475552532486, "learning_rate": 8.48064985193357e-07, "loss": 17.4923, "step": 44725 }, { "epoch": 0.8175553402672419, "grad_norm": 4.500849514082517, "learning_rate": 8.479000586695019e-07, "loss": 16.7022, "step": 44726 }, { "epoch": 0.8175736194636883, "grad_norm": 6.437946660675341, "learning_rate": 8.477351466983968e-07, "loss": 17.345, "step": 44727 }, { "epoch": 0.8175918986601349, "grad_norm": 6.11029546013621, "learning_rate": 8.47570249280621e-07, "loss": 17.3338, "step": 44728 }, { "epoch": 0.8176101778565814, "grad_norm": 5.347057549394195, "learning_rate": 8.474053664167514e-07, "loss": 17.1752, "step": 44729 }, { "epoch": 0.817628457053028, "grad_norm": 6.742309337866228, "learning_rate": 8.472404981073639e-07, "loss": 17.6197, "step": 44730 }, { "epoch": 0.8176467362494745, "grad_norm": 5.509503748351037, "learning_rate": 8.470756443530398e-07, "loss": 17.049, "step": 44731 }, { "epoch": 0.817665015445921, "grad_norm": 6.20772744800139, "learning_rate": 8.46910805154355e-07, "loss": 17.6428, "step": 44732 }, { "epoch": 0.8176832946423676, "grad_norm": 7.552025287716897, "learning_rate": 8.467459805118861e-07, "loss": 17.9922, "step": 44733 }, { "epoch": 0.817701573838814, "grad_norm": 6.657263350485588, "learning_rate": 8.465811704262134e-07, "loss": 17.3197, "step": 44734 }, { "epoch": 0.8177198530352606, "grad_norm": 5.749007327612527, "learning_rate": 8.464163748979121e-07, "loss": 17.2577, "step": 44735 }, { "epoch": 0.8177381322317071, "grad_norm": 5.888762858953342, "learning_rate": 8.462515939275612e-07, "loss": 17.174, "step": 44736 }, { "epoch": 0.8177564114281536, "grad_norm": 5.58583279598117, "learning_rate": 8.460868275157386e-07, "loss": 17.2207, "step": 44737 }, { "epoch": 0.8177746906246002, "grad_norm": 7.163052134802129, "learning_rate": 8.459220756630216e-07, "loss": 17.6284, "step": 44738 }, { "epoch": 0.8177929698210467, "grad_norm": 5.334154439881026, "learning_rate": 8.457573383699863e-07, "loss": 16.7629, "step": 44739 }, { "epoch": 0.8178112490174931, "grad_norm": 4.8899349886971715, "learning_rate": 8.455926156372119e-07, "loss": 17.0029, "step": 44740 }, { "epoch": 0.8178295282139397, "grad_norm": 5.469560245812388, "learning_rate": 8.454279074652738e-07, "loss": 17.1737, "step": 44741 }, { "epoch": 0.8178478074103862, "grad_norm": 6.824152168613959, "learning_rate": 8.452632138547517e-07, "loss": 17.5407, "step": 44742 }, { "epoch": 0.8178660866068328, "grad_norm": 6.872689578782799, "learning_rate": 8.450985348062213e-07, "loss": 17.7971, "step": 44743 }, { "epoch": 0.8178843658032793, "grad_norm": 7.427385280563188, "learning_rate": 8.449338703202586e-07, "loss": 17.8957, "step": 44744 }, { "epoch": 0.8179026449997258, "grad_norm": 5.646764614759032, "learning_rate": 8.447692203974422e-07, "loss": 17.0872, "step": 44745 }, { "epoch": 0.8179209241961723, "grad_norm": 5.851136309292336, "learning_rate": 8.446045850383505e-07, "loss": 17.0779, "step": 44746 }, { "epoch": 0.8179392033926188, "grad_norm": 6.6307236829712295, "learning_rate": 8.44439964243558e-07, "loss": 17.6698, "step": 44747 }, { "epoch": 0.8179574825890654, "grad_norm": 7.736340960357718, "learning_rate": 8.442753580136442e-07, "loss": 17.828, "step": 44748 }, { "epoch": 0.8179757617855119, "grad_norm": 6.497278936173849, "learning_rate": 8.441107663491843e-07, "loss": 17.3706, "step": 44749 }, { "epoch": 0.8179940409819584, "grad_norm": 6.279614956570963, "learning_rate": 8.439461892507544e-07, "loss": 17.309, "step": 44750 }, { "epoch": 0.818012320178405, "grad_norm": 6.577821400011704, "learning_rate": 8.437816267189336e-07, "loss": 17.4294, "step": 44751 }, { "epoch": 0.8180305993748515, "grad_norm": 5.97541073796953, "learning_rate": 8.436170787542974e-07, "loss": 17.325, "step": 44752 }, { "epoch": 0.818048878571298, "grad_norm": 6.44837721706697, "learning_rate": 8.434525453574221e-07, "loss": 17.5218, "step": 44753 }, { "epoch": 0.8180671577677445, "grad_norm": 5.46531053511339, "learning_rate": 8.432880265288845e-07, "loss": 17.2186, "step": 44754 }, { "epoch": 0.818085436964191, "grad_norm": 4.812057436205822, "learning_rate": 8.43123522269263e-07, "loss": 16.7776, "step": 44755 }, { "epoch": 0.8181037161606376, "grad_norm": 6.108828647612833, "learning_rate": 8.429590325791315e-07, "loss": 17.4202, "step": 44756 }, { "epoch": 0.8181219953570841, "grad_norm": 7.293575210353854, "learning_rate": 8.42794557459069e-07, "loss": 17.7009, "step": 44757 }, { "epoch": 0.8181402745535307, "grad_norm": 7.12486275498549, "learning_rate": 8.426300969096495e-07, "loss": 17.3459, "step": 44758 }, { "epoch": 0.8181585537499771, "grad_norm": 6.457201250995357, "learning_rate": 8.424656509314522e-07, "loss": 17.8398, "step": 44759 }, { "epoch": 0.8181768329464236, "grad_norm": 5.501639121352718, "learning_rate": 8.423012195250518e-07, "loss": 17.028, "step": 44760 }, { "epoch": 0.8181951121428702, "grad_norm": 5.4255553199661986, "learning_rate": 8.421368026910242e-07, "loss": 17.0979, "step": 44761 }, { "epoch": 0.8182133913393167, "grad_norm": 5.219507398448195, "learning_rate": 8.419724004299468e-07, "loss": 16.8782, "step": 44762 }, { "epoch": 0.8182316705357633, "grad_norm": 8.10131575708977, "learning_rate": 8.418080127423944e-07, "loss": 17.9572, "step": 44763 }, { "epoch": 0.8182499497322098, "grad_norm": 6.633397844652534, "learning_rate": 8.41643639628944e-07, "loss": 17.3277, "step": 44764 }, { "epoch": 0.8182682289286562, "grad_norm": 6.140825846279118, "learning_rate": 8.414792810901734e-07, "loss": 17.4761, "step": 44765 }, { "epoch": 0.8182865081251028, "grad_norm": 5.580287270277967, "learning_rate": 8.413149371266566e-07, "loss": 17.1291, "step": 44766 }, { "epoch": 0.8183047873215493, "grad_norm": 6.81524458686034, "learning_rate": 8.411506077389692e-07, "loss": 17.4355, "step": 44767 }, { "epoch": 0.8183230665179959, "grad_norm": 6.581590690594148, "learning_rate": 8.40986292927689e-07, "loss": 17.5062, "step": 44768 }, { "epoch": 0.8183413457144424, "grad_norm": 7.100893163043857, "learning_rate": 8.408219926933913e-07, "loss": 17.5114, "step": 44769 }, { "epoch": 0.8183596249108889, "grad_norm": 8.169722201772908, "learning_rate": 8.4065770703665e-07, "loss": 18.0596, "step": 44770 }, { "epoch": 0.8183779041073355, "grad_norm": 6.100115468398571, "learning_rate": 8.404934359580425e-07, "loss": 17.2174, "step": 44771 }, { "epoch": 0.8183961833037819, "grad_norm": 5.066292655147748, "learning_rate": 8.403291794581458e-07, "loss": 16.9623, "step": 44772 }, { "epoch": 0.8184144625002285, "grad_norm": 5.259948128221321, "learning_rate": 8.401649375375337e-07, "loss": 17.0024, "step": 44773 }, { "epoch": 0.818432741696675, "grad_norm": 5.593666473075389, "learning_rate": 8.400007101967833e-07, "loss": 17.0444, "step": 44774 }, { "epoch": 0.8184510208931215, "grad_norm": 8.774345733165822, "learning_rate": 8.398364974364681e-07, "loss": 18.1446, "step": 44775 }, { "epoch": 0.8184693000895681, "grad_norm": 5.964911245405935, "learning_rate": 8.39672299257166e-07, "loss": 17.2445, "step": 44776 }, { "epoch": 0.8184875792860146, "grad_norm": 5.172807419913238, "learning_rate": 8.39508115659452e-07, "loss": 16.8912, "step": 44777 }, { "epoch": 0.8185058584824612, "grad_norm": 4.8577887325345905, "learning_rate": 8.393439466438996e-07, "loss": 17.1206, "step": 44778 }, { "epoch": 0.8185241376789076, "grad_norm": 5.528403889976603, "learning_rate": 8.391797922110867e-07, "loss": 17.0968, "step": 44779 }, { "epoch": 0.8185424168753541, "grad_norm": 6.34158431932134, "learning_rate": 8.390156523615867e-07, "loss": 16.8662, "step": 44780 }, { "epoch": 0.8185606960718007, "grad_norm": 7.540976641281455, "learning_rate": 8.388515270959751e-07, "loss": 17.7351, "step": 44781 }, { "epoch": 0.8185789752682472, "grad_norm": 7.731674232107941, "learning_rate": 8.386874164148295e-07, "loss": 17.6066, "step": 44782 }, { "epoch": 0.8185972544646938, "grad_norm": 6.419282837541669, "learning_rate": 8.385233203187231e-07, "loss": 17.3663, "step": 44783 }, { "epoch": 0.8186155336611403, "grad_norm": 6.474008788619458, "learning_rate": 8.383592388082306e-07, "loss": 17.4677, "step": 44784 }, { "epoch": 0.8186338128575867, "grad_norm": 6.206787460327313, "learning_rate": 8.381951718839288e-07, "loss": 17.3763, "step": 44785 }, { "epoch": 0.8186520920540333, "grad_norm": 8.138420599228203, "learning_rate": 8.380311195463919e-07, "loss": 18.427, "step": 44786 }, { "epoch": 0.8186703712504798, "grad_norm": 6.962110727076238, "learning_rate": 8.378670817961937e-07, "loss": 17.2521, "step": 44787 }, { "epoch": 0.8186886504469264, "grad_norm": 6.923362075446049, "learning_rate": 8.377030586339108e-07, "loss": 17.6449, "step": 44788 }, { "epoch": 0.8187069296433729, "grad_norm": 5.824669299776241, "learning_rate": 8.375390500601166e-07, "loss": 17.2392, "step": 44789 }, { "epoch": 0.8187252088398194, "grad_norm": 5.967902737591132, "learning_rate": 8.373750560753874e-07, "loss": 17.4631, "step": 44790 }, { "epoch": 0.818743488036266, "grad_norm": 5.395684262609913, "learning_rate": 8.372110766802976e-07, "loss": 17.166, "step": 44791 }, { "epoch": 0.8187617672327124, "grad_norm": 6.33657901535398, "learning_rate": 8.370471118754215e-07, "loss": 17.5878, "step": 44792 }, { "epoch": 0.818780046429159, "grad_norm": 5.192378590448171, "learning_rate": 8.368831616613348e-07, "loss": 16.8108, "step": 44793 }, { "epoch": 0.8187983256256055, "grad_norm": 5.18360617798131, "learning_rate": 8.367192260386109e-07, "loss": 16.9626, "step": 44794 }, { "epoch": 0.818816604822052, "grad_norm": 6.24019637352885, "learning_rate": 8.365553050078246e-07, "loss": 17.1566, "step": 44795 }, { "epoch": 0.8188348840184986, "grad_norm": 5.829010327662133, "learning_rate": 8.36391398569551e-07, "loss": 17.4535, "step": 44796 }, { "epoch": 0.8188531632149451, "grad_norm": 6.851250525549141, "learning_rate": 8.362275067243647e-07, "loss": 17.7028, "step": 44797 }, { "epoch": 0.8188714424113916, "grad_norm": 5.979529769568101, "learning_rate": 8.360636294728386e-07, "loss": 17.1985, "step": 44798 }, { "epoch": 0.8188897216078381, "grad_norm": 5.9789991283130455, "learning_rate": 8.358997668155478e-07, "loss": 17.31, "step": 44799 }, { "epoch": 0.8189080008042846, "grad_norm": 5.273606039764411, "learning_rate": 8.357359187530684e-07, "loss": 17.0902, "step": 44800 }, { "epoch": 0.8189262800007312, "grad_norm": 5.827508417252392, "learning_rate": 8.355720852859717e-07, "loss": 17.2136, "step": 44801 }, { "epoch": 0.8189445591971777, "grad_norm": 6.7924186641436, "learning_rate": 8.35408266414835e-07, "loss": 17.4192, "step": 44802 }, { "epoch": 0.8189628383936243, "grad_norm": 6.060384251462007, "learning_rate": 8.352444621402295e-07, "loss": 17.316, "step": 44803 }, { "epoch": 0.8189811175900708, "grad_norm": 6.305378802128407, "learning_rate": 8.350806724627319e-07, "loss": 17.4534, "step": 44804 }, { "epoch": 0.8189993967865172, "grad_norm": 6.385083912202196, "learning_rate": 8.349168973829158e-07, "loss": 17.5713, "step": 44805 }, { "epoch": 0.8190176759829638, "grad_norm": 4.7825051402175704, "learning_rate": 8.347531369013528e-07, "loss": 16.8116, "step": 44806 }, { "epoch": 0.8190359551794103, "grad_norm": 6.2382068474759045, "learning_rate": 8.345893910186198e-07, "loss": 17.2254, "step": 44807 }, { "epoch": 0.8190542343758568, "grad_norm": 6.464102456827454, "learning_rate": 8.344256597352885e-07, "loss": 17.6187, "step": 44808 }, { "epoch": 0.8190725135723034, "grad_norm": 6.025243408390929, "learning_rate": 8.342619430519333e-07, "loss": 17.3271, "step": 44809 }, { "epoch": 0.8190907927687499, "grad_norm": 7.836856770727268, "learning_rate": 8.340982409691306e-07, "loss": 17.5292, "step": 44810 }, { "epoch": 0.8191090719651964, "grad_norm": 6.08687628953697, "learning_rate": 8.339345534874515e-07, "loss": 17.2904, "step": 44811 }, { "epoch": 0.8191273511616429, "grad_norm": 5.506294694538231, "learning_rate": 8.33770880607469e-07, "loss": 17.2925, "step": 44812 }, { "epoch": 0.8191456303580894, "grad_norm": 6.165663670478086, "learning_rate": 8.336072223297592e-07, "loss": 17.1267, "step": 44813 }, { "epoch": 0.819163909554536, "grad_norm": 7.673451384002789, "learning_rate": 8.33443578654895e-07, "loss": 18.0802, "step": 44814 }, { "epoch": 0.8191821887509825, "grad_norm": 8.585843465533056, "learning_rate": 8.332799495834482e-07, "loss": 18.145, "step": 44815 }, { "epoch": 0.8192004679474291, "grad_norm": 5.680507093457062, "learning_rate": 8.331163351159944e-07, "loss": 17.0856, "step": 44816 }, { "epoch": 0.8192187471438755, "grad_norm": 5.4215041498055285, "learning_rate": 8.329527352531053e-07, "loss": 16.7638, "step": 44817 }, { "epoch": 0.819237026340322, "grad_norm": 5.813612389285269, "learning_rate": 8.32789149995355e-07, "loss": 17.0378, "step": 44818 }, { "epoch": 0.8192553055367686, "grad_norm": 6.248137079552365, "learning_rate": 8.326255793433191e-07, "loss": 17.2076, "step": 44819 }, { "epoch": 0.8192735847332151, "grad_norm": 7.43695402084213, "learning_rate": 8.324620232975667e-07, "loss": 17.7375, "step": 44820 }, { "epoch": 0.8192918639296617, "grad_norm": 7.027237382214258, "learning_rate": 8.322984818586755e-07, "loss": 17.6641, "step": 44821 }, { "epoch": 0.8193101431261082, "grad_norm": 7.635848645552882, "learning_rate": 8.321349550272156e-07, "loss": 18.0351, "step": 44822 }, { "epoch": 0.8193284223225547, "grad_norm": 5.552788201779087, "learning_rate": 8.3197144280376e-07, "loss": 17.0118, "step": 44823 }, { "epoch": 0.8193467015190012, "grad_norm": 11.458149336636442, "learning_rate": 8.318079451888844e-07, "loss": 18.2249, "step": 44824 }, { "epoch": 0.8193649807154477, "grad_norm": 5.897278346478407, "learning_rate": 8.316444621831599e-07, "loss": 17.2831, "step": 44825 }, { "epoch": 0.8193832599118943, "grad_norm": 7.586598911368488, "learning_rate": 8.314809937871587e-07, "loss": 17.8116, "step": 44826 }, { "epoch": 0.8194015391083408, "grad_norm": 6.616313019336424, "learning_rate": 8.313175400014551e-07, "loss": 17.4579, "step": 44827 }, { "epoch": 0.8194198183047873, "grad_norm": 6.5365211345616965, "learning_rate": 8.311541008266228e-07, "loss": 17.2369, "step": 44828 }, { "epoch": 0.8194380975012339, "grad_norm": 6.566017776171703, "learning_rate": 8.309906762632325e-07, "loss": 17.2453, "step": 44829 }, { "epoch": 0.8194563766976803, "grad_norm": 6.110645238910006, "learning_rate": 8.308272663118589e-07, "loss": 17.2828, "step": 44830 }, { "epoch": 0.8194746558941269, "grad_norm": 5.376815258415298, "learning_rate": 8.306638709730747e-07, "loss": 17.1457, "step": 44831 }, { "epoch": 0.8194929350905734, "grad_norm": 6.182128020312798, "learning_rate": 8.305004902474501e-07, "loss": 17.2247, "step": 44832 }, { "epoch": 0.8195112142870199, "grad_norm": 6.648874335594615, "learning_rate": 8.303371241355606e-07, "loss": 17.8222, "step": 44833 }, { "epoch": 0.8195294934834665, "grad_norm": 5.478766542473575, "learning_rate": 8.301737726379766e-07, "loss": 17.0131, "step": 44834 }, { "epoch": 0.819547772679913, "grad_norm": 6.020432055498111, "learning_rate": 8.30010435755273e-07, "loss": 17.2897, "step": 44835 }, { "epoch": 0.8195660518763596, "grad_norm": 5.310076680506089, "learning_rate": 8.298471134880198e-07, "loss": 17.0038, "step": 44836 }, { "epoch": 0.819584331072806, "grad_norm": 5.9883189331459725, "learning_rate": 8.296838058367906e-07, "loss": 17.3935, "step": 44837 }, { "epoch": 0.8196026102692525, "grad_norm": 6.339804927491454, "learning_rate": 8.295205128021589e-07, "loss": 17.2218, "step": 44838 }, { "epoch": 0.8196208894656991, "grad_norm": 4.858754098617503, "learning_rate": 8.293572343846956e-07, "loss": 16.8521, "step": 44839 }, { "epoch": 0.8196391686621456, "grad_norm": 6.260683210299351, "learning_rate": 8.291939705849722e-07, "loss": 17.3767, "step": 44840 }, { "epoch": 0.8196574478585922, "grad_norm": 6.8772395129303385, "learning_rate": 8.290307214035636e-07, "loss": 17.7288, "step": 44841 }, { "epoch": 0.8196757270550387, "grad_norm": 5.880417792638922, "learning_rate": 8.288674868410401e-07, "loss": 17.1412, "step": 44842 }, { "epoch": 0.8196940062514851, "grad_norm": 6.613909141624491, "learning_rate": 8.287042668979728e-07, "loss": 17.8978, "step": 44843 }, { "epoch": 0.8197122854479317, "grad_norm": 6.0779364118365, "learning_rate": 8.285410615749356e-07, "loss": 17.2869, "step": 44844 }, { "epoch": 0.8197305646443782, "grad_norm": 6.343624328582075, "learning_rate": 8.283778708725004e-07, "loss": 17.4168, "step": 44845 }, { "epoch": 0.8197488438408248, "grad_norm": 6.214318983256252, "learning_rate": 8.282146947912384e-07, "loss": 17.2892, "step": 44846 }, { "epoch": 0.8197671230372713, "grad_norm": 6.433305332500336, "learning_rate": 8.280515333317224e-07, "loss": 17.1041, "step": 44847 }, { "epoch": 0.8197854022337178, "grad_norm": 5.581404499986757, "learning_rate": 8.278883864945231e-07, "loss": 17.3052, "step": 44848 }, { "epoch": 0.8198036814301644, "grad_norm": 6.824387292070906, "learning_rate": 8.277252542802139e-07, "loss": 17.0764, "step": 44849 }, { "epoch": 0.8198219606266108, "grad_norm": 5.834545626809498, "learning_rate": 8.275621366893655e-07, "loss": 17.1434, "step": 44850 }, { "epoch": 0.8198402398230574, "grad_norm": 6.855040947670885, "learning_rate": 8.273990337225485e-07, "loss": 17.5344, "step": 44851 }, { "epoch": 0.8198585190195039, "grad_norm": 6.384986786834769, "learning_rate": 8.272359453803374e-07, "loss": 17.0722, "step": 44852 }, { "epoch": 0.8198767982159504, "grad_norm": 6.618012449778718, "learning_rate": 8.270728716633009e-07, "loss": 17.3205, "step": 44853 }, { "epoch": 0.819895077412397, "grad_norm": 6.033272038950027, "learning_rate": 8.269098125720115e-07, "loss": 17.1506, "step": 44854 }, { "epoch": 0.8199133566088435, "grad_norm": 5.484372487532448, "learning_rate": 8.267467681070424e-07, "loss": 17.1476, "step": 44855 }, { "epoch": 0.81993163580529, "grad_norm": 7.159950622086636, "learning_rate": 8.26583738268964e-07, "loss": 18.1147, "step": 44856 }, { "epoch": 0.8199499150017365, "grad_norm": 5.6160575314234125, "learning_rate": 8.26420723058346e-07, "loss": 16.9923, "step": 44857 }, { "epoch": 0.819968194198183, "grad_norm": 6.161858391177776, "learning_rate": 8.262577224757628e-07, "loss": 17.3523, "step": 44858 }, { "epoch": 0.8199864733946296, "grad_norm": 5.550186260875206, "learning_rate": 8.260947365217836e-07, "loss": 17.1072, "step": 44859 }, { "epoch": 0.8200047525910761, "grad_norm": 6.800431890681636, "learning_rate": 8.259317651969795e-07, "loss": 17.3944, "step": 44860 }, { "epoch": 0.8200230317875227, "grad_norm": 5.442862875228575, "learning_rate": 8.257688085019233e-07, "loss": 17.0769, "step": 44861 }, { "epoch": 0.8200413109839692, "grad_norm": 6.524268490452253, "learning_rate": 8.256058664371835e-07, "loss": 17.3858, "step": 44862 }, { "epoch": 0.8200595901804156, "grad_norm": 5.775298430815294, "learning_rate": 8.254429390033336e-07, "loss": 17.1027, "step": 44863 }, { "epoch": 0.8200778693768622, "grad_norm": 5.576157910325689, "learning_rate": 8.252800262009447e-07, "loss": 17.2092, "step": 44864 }, { "epoch": 0.8200961485733087, "grad_norm": 5.749692061407719, "learning_rate": 8.251171280305864e-07, "loss": 17.0739, "step": 44865 }, { "epoch": 0.8201144277697553, "grad_norm": 4.807908099655006, "learning_rate": 8.249542444928311e-07, "loss": 16.7776, "step": 44866 }, { "epoch": 0.8201327069662018, "grad_norm": 7.373542019127894, "learning_rate": 8.247913755882486e-07, "loss": 17.8508, "step": 44867 }, { "epoch": 0.8201509861626483, "grad_norm": 5.418431518815216, "learning_rate": 8.246285213174093e-07, "loss": 16.891, "step": 44868 }, { "epoch": 0.8201692653590948, "grad_norm": 7.108567231177925, "learning_rate": 8.244656816808854e-07, "loss": 17.4764, "step": 44869 }, { "epoch": 0.8201875445555413, "grad_norm": 6.2045697227764265, "learning_rate": 8.243028566792477e-07, "loss": 17.4634, "step": 44870 }, { "epoch": 0.8202058237519879, "grad_norm": 6.57699097021353, "learning_rate": 8.241400463130644e-07, "loss": 17.1858, "step": 44871 }, { "epoch": 0.8202241029484344, "grad_norm": 6.6301500660329316, "learning_rate": 8.23977250582908e-07, "loss": 17.6839, "step": 44872 }, { "epoch": 0.8202423821448809, "grad_norm": 6.518868095968165, "learning_rate": 8.238144694893501e-07, "loss": 17.3369, "step": 44873 }, { "epoch": 0.8202606613413275, "grad_norm": 6.831693555217121, "learning_rate": 8.236517030329589e-07, "loss": 17.5712, "step": 44874 }, { "epoch": 0.820278940537774, "grad_norm": 6.048488923238751, "learning_rate": 8.234889512143074e-07, "loss": 17.2629, "step": 44875 }, { "epoch": 0.8202972197342204, "grad_norm": 5.387926810905892, "learning_rate": 8.233262140339643e-07, "loss": 16.8757, "step": 44876 }, { "epoch": 0.820315498930667, "grad_norm": 5.492556352591673, "learning_rate": 8.231634914924996e-07, "loss": 16.9451, "step": 44877 }, { "epoch": 0.8203337781271135, "grad_norm": 8.215116763999017, "learning_rate": 8.230007835904857e-07, "loss": 17.6423, "step": 44878 }, { "epoch": 0.8203520573235601, "grad_norm": 6.171374539347238, "learning_rate": 8.228380903284899e-07, "loss": 17.4378, "step": 44879 }, { "epoch": 0.8203703365200066, "grad_norm": 5.458324552145748, "learning_rate": 8.226754117070857e-07, "loss": 17.1053, "step": 44880 }, { "epoch": 0.820388615716453, "grad_norm": 7.170622418898588, "learning_rate": 8.225127477268407e-07, "loss": 17.8859, "step": 44881 }, { "epoch": 0.8204068949128996, "grad_norm": 5.294593120007555, "learning_rate": 8.223500983883253e-07, "loss": 16.9197, "step": 44882 }, { "epoch": 0.8204251741093461, "grad_norm": 6.671584522167582, "learning_rate": 8.221874636921123e-07, "loss": 17.4186, "step": 44883 }, { "epoch": 0.8204434533057927, "grad_norm": 7.409687741653222, "learning_rate": 8.22024843638769e-07, "loss": 17.9004, "step": 44884 }, { "epoch": 0.8204617325022392, "grad_norm": 7.440835323968854, "learning_rate": 8.218622382288655e-07, "loss": 17.6262, "step": 44885 }, { "epoch": 0.8204800116986857, "grad_norm": 6.000699413741945, "learning_rate": 8.21699647462973e-07, "loss": 17.1844, "step": 44886 }, { "epoch": 0.8204982908951323, "grad_norm": 6.618393658029801, "learning_rate": 8.215370713416609e-07, "loss": 17.6894, "step": 44887 }, { "epoch": 0.8205165700915787, "grad_norm": 7.0549675953367945, "learning_rate": 8.213745098654974e-07, "loss": 17.3979, "step": 44888 }, { "epoch": 0.8205348492880253, "grad_norm": 7.825906883551853, "learning_rate": 8.212119630350552e-07, "loss": 18.3288, "step": 44889 }, { "epoch": 0.8205531284844718, "grad_norm": 8.918575964677247, "learning_rate": 8.210494308509009e-07, "loss": 17.6437, "step": 44890 }, { "epoch": 0.8205714076809183, "grad_norm": 6.507404006351925, "learning_rate": 8.20886913313606e-07, "loss": 17.2749, "step": 44891 }, { "epoch": 0.8205896868773649, "grad_norm": 7.1258446720893565, "learning_rate": 8.207244104237405e-07, "loss": 17.0245, "step": 44892 }, { "epoch": 0.8206079660738114, "grad_norm": 6.020023290066323, "learning_rate": 8.205619221818739e-07, "loss": 17.1132, "step": 44893 }, { "epoch": 0.820626245270258, "grad_norm": 6.410616841900291, "learning_rate": 8.203994485885736e-07, "loss": 17.4692, "step": 44894 }, { "epoch": 0.8206445244667044, "grad_norm": 6.123163202974791, "learning_rate": 8.202369896444118e-07, "loss": 17.208, "step": 44895 }, { "epoch": 0.8206628036631509, "grad_norm": 6.233022199496298, "learning_rate": 8.200745453499553e-07, "loss": 17.0796, "step": 44896 }, { "epoch": 0.8206810828595975, "grad_norm": 8.081180561706068, "learning_rate": 8.19912115705776e-07, "loss": 18.0323, "step": 44897 }, { "epoch": 0.820699362056044, "grad_norm": 6.602897410739309, "learning_rate": 8.19749700712441e-07, "loss": 17.4275, "step": 44898 }, { "epoch": 0.8207176412524906, "grad_norm": 7.614543421583626, "learning_rate": 8.195873003705218e-07, "loss": 17.5827, "step": 44899 }, { "epoch": 0.8207359204489371, "grad_norm": 7.9566179982279035, "learning_rate": 8.194249146805849e-07, "loss": 17.9899, "step": 44900 }, { "epoch": 0.8207541996453835, "grad_norm": 7.254279473369936, "learning_rate": 8.192625436432022e-07, "loss": 17.1018, "step": 44901 }, { "epoch": 0.8207724788418301, "grad_norm": 4.7544815704249315, "learning_rate": 8.191001872589405e-07, "loss": 16.9106, "step": 44902 }, { "epoch": 0.8207907580382766, "grad_norm": 6.206303606537664, "learning_rate": 8.189378455283709e-07, "loss": 17.3131, "step": 44903 }, { "epoch": 0.8208090372347232, "grad_norm": 6.922723755038632, "learning_rate": 8.187755184520613e-07, "loss": 17.6507, "step": 44904 }, { "epoch": 0.8208273164311697, "grad_norm": 6.355377486572348, "learning_rate": 8.18613206030579e-07, "loss": 17.4014, "step": 44905 }, { "epoch": 0.8208455956276162, "grad_norm": 5.128796969970015, "learning_rate": 8.184509082644965e-07, "loss": 16.9827, "step": 44906 }, { "epoch": 0.8208638748240628, "grad_norm": 5.910659587669988, "learning_rate": 8.182886251543793e-07, "loss": 17.3179, "step": 44907 }, { "epoch": 0.8208821540205092, "grad_norm": 6.686476027201349, "learning_rate": 8.181263567007974e-07, "loss": 17.4188, "step": 44908 }, { "epoch": 0.8209004332169558, "grad_norm": 5.551757208207929, "learning_rate": 8.179641029043211e-07, "loss": 17.0623, "step": 44909 }, { "epoch": 0.8209187124134023, "grad_norm": 7.968218159196372, "learning_rate": 8.178018637655166e-07, "loss": 17.5384, "step": 44910 }, { "epoch": 0.8209369916098488, "grad_norm": 8.533932892326852, "learning_rate": 8.176396392849545e-07, "loss": 18.1277, "step": 44911 }, { "epoch": 0.8209552708062954, "grad_norm": 5.960611515051311, "learning_rate": 8.174774294632026e-07, "loss": 17.3111, "step": 44912 }, { "epoch": 0.8209735500027419, "grad_norm": 6.83735483487989, "learning_rate": 8.173152343008284e-07, "loss": 17.3115, "step": 44913 }, { "epoch": 0.8209918291991884, "grad_norm": 5.68421884901027, "learning_rate": 8.171530537984029e-07, "loss": 17.089, "step": 44914 }, { "epoch": 0.8210101083956349, "grad_norm": 5.0378209049100855, "learning_rate": 8.169908879564925e-07, "loss": 16.9418, "step": 44915 }, { "epoch": 0.8210283875920814, "grad_norm": 5.348323150508101, "learning_rate": 8.16828736775665e-07, "loss": 17.0665, "step": 44916 }, { "epoch": 0.821046666788528, "grad_norm": 7.852005208392972, "learning_rate": 8.166666002564893e-07, "loss": 17.6372, "step": 44917 }, { "epoch": 0.8210649459849745, "grad_norm": 4.8258966116781625, "learning_rate": 8.165044783995357e-07, "loss": 16.9249, "step": 44918 }, { "epoch": 0.8210832251814211, "grad_norm": 6.421145099507138, "learning_rate": 8.163423712053698e-07, "loss": 17.3052, "step": 44919 }, { "epoch": 0.8211015043778676, "grad_norm": 7.082352232594944, "learning_rate": 8.161802786745621e-07, "loss": 17.5723, "step": 44920 }, { "epoch": 0.821119783574314, "grad_norm": 5.215420875469449, "learning_rate": 8.160182008076794e-07, "loss": 16.9501, "step": 44921 }, { "epoch": 0.8211380627707606, "grad_norm": 6.391032483476807, "learning_rate": 8.158561376052887e-07, "loss": 17.4582, "step": 44922 }, { "epoch": 0.8211563419672071, "grad_norm": 5.8101786432510485, "learning_rate": 8.156940890679599e-07, "loss": 17.1397, "step": 44923 }, { "epoch": 0.8211746211636537, "grad_norm": 4.720037709240638, "learning_rate": 8.155320551962609e-07, "loss": 16.692, "step": 44924 }, { "epoch": 0.8211929003601002, "grad_norm": 6.522284691300648, "learning_rate": 8.153700359907574e-07, "loss": 17.128, "step": 44925 }, { "epoch": 0.8212111795565467, "grad_norm": 6.0952445539189375, "learning_rate": 8.152080314520189e-07, "loss": 17.2904, "step": 44926 }, { "epoch": 0.8212294587529932, "grad_norm": 4.8658979294290665, "learning_rate": 8.150460415806128e-07, "loss": 16.914, "step": 44927 }, { "epoch": 0.8212477379494397, "grad_norm": 6.830314827186, "learning_rate": 8.148840663771085e-07, "loss": 17.2503, "step": 44928 }, { "epoch": 0.8212660171458863, "grad_norm": 5.617244730767783, "learning_rate": 8.147221058420723e-07, "loss": 17.0505, "step": 44929 }, { "epoch": 0.8212842963423328, "grad_norm": 5.659622139223697, "learning_rate": 8.145601599760711e-07, "loss": 16.9844, "step": 44930 }, { "epoch": 0.8213025755387793, "grad_norm": 6.6506026574264885, "learning_rate": 8.143982287796743e-07, "loss": 17.1359, "step": 44931 }, { "epoch": 0.8213208547352259, "grad_norm": 6.6257885181245, "learning_rate": 8.142363122534485e-07, "loss": 17.3621, "step": 44932 }, { "epoch": 0.8213391339316723, "grad_norm": 6.132612137519507, "learning_rate": 8.140744103979603e-07, "loss": 17.1534, "step": 44933 }, { "epoch": 0.8213574131281189, "grad_norm": 7.467594343207123, "learning_rate": 8.13912523213779e-07, "loss": 17.8001, "step": 44934 }, { "epoch": 0.8213756923245654, "grad_norm": 7.285817135016993, "learning_rate": 8.137506507014698e-07, "loss": 17.6626, "step": 44935 }, { "epoch": 0.8213939715210119, "grad_norm": 7.098594675297903, "learning_rate": 8.13588792861601e-07, "loss": 17.2778, "step": 44936 }, { "epoch": 0.8214122507174585, "grad_norm": 6.232364049036344, "learning_rate": 8.134269496947417e-07, "loss": 17.4861, "step": 44937 }, { "epoch": 0.821430529913905, "grad_norm": 6.872364463585362, "learning_rate": 8.13265121201458e-07, "loss": 17.5315, "step": 44938 }, { "epoch": 0.8214488091103516, "grad_norm": 5.123083710200809, "learning_rate": 8.131033073823152e-07, "loss": 16.8179, "step": 44939 }, { "epoch": 0.821467088306798, "grad_norm": 7.680726118652487, "learning_rate": 8.129415082378833e-07, "loss": 17.8501, "step": 44940 }, { "epoch": 0.8214853675032445, "grad_norm": 5.4213849268291385, "learning_rate": 8.127797237687269e-07, "loss": 17.0051, "step": 44941 }, { "epoch": 0.8215036466996911, "grad_norm": 5.618199350843534, "learning_rate": 8.126179539754147e-07, "loss": 17.0973, "step": 44942 }, { "epoch": 0.8215219258961376, "grad_norm": 4.952375950110018, "learning_rate": 8.124561988585139e-07, "loss": 16.816, "step": 44943 }, { "epoch": 0.8215402050925841, "grad_norm": 5.337429630804274, "learning_rate": 8.122944584185899e-07, "loss": 16.8583, "step": 44944 }, { "epoch": 0.8215584842890307, "grad_norm": 8.014668361397739, "learning_rate": 8.1213273265621e-07, "loss": 17.8802, "step": 44945 }, { "epoch": 0.8215767634854771, "grad_norm": 7.309705715907136, "learning_rate": 8.119710215719423e-07, "loss": 17.4251, "step": 44946 }, { "epoch": 0.8215950426819237, "grad_norm": 6.112079368674555, "learning_rate": 8.118093251663522e-07, "loss": 17.4025, "step": 44947 }, { "epoch": 0.8216133218783702, "grad_norm": 6.236787501988437, "learning_rate": 8.116476434400078e-07, "loss": 17.2477, "step": 44948 }, { "epoch": 0.8216316010748167, "grad_norm": 5.74358865878022, "learning_rate": 8.114859763934751e-07, "loss": 17.1377, "step": 44949 }, { "epoch": 0.8216498802712633, "grad_norm": 6.398115068189274, "learning_rate": 8.113243240273195e-07, "loss": 17.393, "step": 44950 }, { "epoch": 0.8216681594677098, "grad_norm": 7.687551016265441, "learning_rate": 8.111626863421096e-07, "loss": 17.8214, "step": 44951 }, { "epoch": 0.8216864386641564, "grad_norm": 6.624768656617861, "learning_rate": 8.110010633384108e-07, "loss": 17.4504, "step": 44952 }, { "epoch": 0.8217047178606028, "grad_norm": 6.842366446231546, "learning_rate": 8.108394550167892e-07, "loss": 17.4089, "step": 44953 }, { "epoch": 0.8217229970570493, "grad_norm": 6.1226508191276, "learning_rate": 8.106778613778116e-07, "loss": 17.4681, "step": 44954 }, { "epoch": 0.8217412762534959, "grad_norm": 5.595585609821751, "learning_rate": 8.105162824220453e-07, "loss": 17.3114, "step": 44955 }, { "epoch": 0.8217595554499424, "grad_norm": 5.951512413408995, "learning_rate": 8.103547181500548e-07, "loss": 17.3152, "step": 44956 }, { "epoch": 0.821777834646389, "grad_norm": 6.154024195228791, "learning_rate": 8.101931685624092e-07, "loss": 17.6698, "step": 44957 }, { "epoch": 0.8217961138428355, "grad_norm": 7.927419086573032, "learning_rate": 8.100316336596714e-07, "loss": 17.9643, "step": 44958 }, { "epoch": 0.8218143930392819, "grad_norm": 5.178863276619297, "learning_rate": 8.098701134424103e-07, "loss": 16.8225, "step": 44959 }, { "epoch": 0.8218326722357285, "grad_norm": 5.102688913633098, "learning_rate": 8.09708607911191e-07, "loss": 17.0246, "step": 44960 }, { "epoch": 0.821850951432175, "grad_norm": 7.129422466657576, "learning_rate": 8.09547117066578e-07, "loss": 17.347, "step": 44961 }, { "epoch": 0.8218692306286216, "grad_norm": 6.598737792356155, "learning_rate": 8.093856409091388e-07, "loss": 17.5253, "step": 44962 }, { "epoch": 0.8218875098250681, "grad_norm": 5.535215031931644, "learning_rate": 8.092241794394406e-07, "loss": 17.0387, "step": 44963 }, { "epoch": 0.8219057890215146, "grad_norm": 5.444782574492489, "learning_rate": 8.090627326580469e-07, "loss": 16.97, "step": 44964 }, { "epoch": 0.8219240682179612, "grad_norm": 7.797838716085904, "learning_rate": 8.089013005655255e-07, "loss": 17.5108, "step": 44965 }, { "epoch": 0.8219423474144076, "grad_norm": 8.073567548738183, "learning_rate": 8.087398831624416e-07, "loss": 18.0606, "step": 44966 }, { "epoch": 0.8219606266108542, "grad_norm": 5.983252326104019, "learning_rate": 8.085784804493596e-07, "loss": 17.4972, "step": 44967 }, { "epoch": 0.8219789058073007, "grad_norm": 9.003558554904192, "learning_rate": 8.084170924268469e-07, "loss": 17.8811, "step": 44968 }, { "epoch": 0.8219971850037472, "grad_norm": 7.1320646973666895, "learning_rate": 8.082557190954693e-07, "loss": 17.789, "step": 44969 }, { "epoch": 0.8220154642001938, "grad_norm": 6.733654823672565, "learning_rate": 8.0809436045579e-07, "loss": 17.3781, "step": 44970 }, { "epoch": 0.8220337433966403, "grad_norm": 6.038324517082672, "learning_rate": 8.079330165083765e-07, "loss": 16.8604, "step": 44971 }, { "epoch": 0.8220520225930869, "grad_norm": 4.442924563398658, "learning_rate": 8.077716872537939e-07, "loss": 16.8195, "step": 44972 }, { "epoch": 0.8220703017895333, "grad_norm": 6.4243024805860545, "learning_rate": 8.076103726926088e-07, "loss": 17.448, "step": 44973 }, { "epoch": 0.8220885809859798, "grad_norm": 5.569758613160435, "learning_rate": 8.074490728253859e-07, "loss": 17.2317, "step": 44974 }, { "epoch": 0.8221068601824264, "grad_norm": 6.179443099956244, "learning_rate": 8.072877876526886e-07, "loss": 17.2026, "step": 44975 }, { "epoch": 0.8221251393788729, "grad_norm": 5.555467956542747, "learning_rate": 8.071265171750853e-07, "loss": 16.9639, "step": 44976 }, { "epoch": 0.8221434185753195, "grad_norm": 6.146524742142395, "learning_rate": 8.069652613931395e-07, "loss": 16.9167, "step": 44977 }, { "epoch": 0.822161697771766, "grad_norm": 6.63643158560724, "learning_rate": 8.068040203074157e-07, "loss": 17.1023, "step": 44978 }, { "epoch": 0.8221799769682124, "grad_norm": 5.9231814380005465, "learning_rate": 8.066427939184806e-07, "loss": 17.3384, "step": 44979 }, { "epoch": 0.822198256164659, "grad_norm": 5.801168103739646, "learning_rate": 8.064815822268979e-07, "loss": 17.095, "step": 44980 }, { "epoch": 0.8222165353611055, "grad_norm": 6.0272592621502135, "learning_rate": 8.06320385233233e-07, "loss": 17.1202, "step": 44981 }, { "epoch": 0.8222348145575521, "grad_norm": 6.765091132582015, "learning_rate": 8.061592029380527e-07, "loss": 17.6435, "step": 44982 }, { "epoch": 0.8222530937539986, "grad_norm": 5.654409120121399, "learning_rate": 8.059980353419206e-07, "loss": 17.2022, "step": 44983 }, { "epoch": 0.8222713729504451, "grad_norm": 5.674528884824575, "learning_rate": 8.058368824453999e-07, "loss": 17.2206, "step": 44984 }, { "epoch": 0.8222896521468916, "grad_norm": 7.20543542197919, "learning_rate": 8.056757442490587e-07, "loss": 17.9931, "step": 44985 }, { "epoch": 0.8223079313433381, "grad_norm": 6.138279305240938, "learning_rate": 8.055146207534593e-07, "loss": 17.5324, "step": 44986 }, { "epoch": 0.8223262105397847, "grad_norm": 6.307776808517089, "learning_rate": 8.053535119591665e-07, "loss": 17.2838, "step": 44987 }, { "epoch": 0.8223444897362312, "grad_norm": 5.52471498887174, "learning_rate": 8.051924178667464e-07, "loss": 17.0406, "step": 44988 }, { "epoch": 0.8223627689326777, "grad_norm": 5.709016629965555, "learning_rate": 8.050313384767622e-07, "loss": 17.2095, "step": 44989 }, { "epoch": 0.8223810481291243, "grad_norm": 6.0926065333696355, "learning_rate": 8.04870273789779e-07, "loss": 17.3847, "step": 44990 }, { "epoch": 0.8223993273255708, "grad_norm": 6.862558903877556, "learning_rate": 8.047092238063626e-07, "loss": 17.6804, "step": 44991 }, { "epoch": 0.8224176065220173, "grad_norm": 6.368166316278181, "learning_rate": 8.04548188527075e-07, "loss": 17.6351, "step": 44992 }, { "epoch": 0.8224358857184638, "grad_norm": 6.648562899735989, "learning_rate": 8.043871679524834e-07, "loss": 17.2741, "step": 44993 }, { "epoch": 0.8224541649149103, "grad_norm": 4.894316705134166, "learning_rate": 8.042261620831504e-07, "loss": 17.0056, "step": 44994 }, { "epoch": 0.8224724441113569, "grad_norm": 6.198426388885835, "learning_rate": 8.040651709196396e-07, "loss": 17.6235, "step": 44995 }, { "epoch": 0.8224907233078034, "grad_norm": 4.888718127608725, "learning_rate": 8.03904194462517e-07, "loss": 17.0551, "step": 44996 }, { "epoch": 0.82250900250425, "grad_norm": 6.669036288306508, "learning_rate": 8.037432327123468e-07, "loss": 17.5406, "step": 44997 }, { "epoch": 0.8225272817006964, "grad_norm": 8.264664731952147, "learning_rate": 8.035822856696906e-07, "loss": 17.5928, "step": 44998 }, { "epoch": 0.8225455608971429, "grad_norm": 7.365580100356894, "learning_rate": 8.034213533351149e-07, "loss": 17.6686, "step": 44999 }, { "epoch": 0.8225638400935895, "grad_norm": 5.6300006175333905, "learning_rate": 8.032604357091839e-07, "loss": 17.1263, "step": 45000 }, { "epoch": 0.822582119290036, "grad_norm": 5.043644184394477, "learning_rate": 8.030995327924601e-07, "loss": 16.9377, "step": 45001 }, { "epoch": 0.8226003984864826, "grad_norm": 5.397006135642754, "learning_rate": 8.029386445855092e-07, "loss": 17.1076, "step": 45002 }, { "epoch": 0.8226186776829291, "grad_norm": 6.622177351901499, "learning_rate": 8.027777710888928e-07, "loss": 17.1526, "step": 45003 }, { "epoch": 0.8226369568793755, "grad_norm": 5.922596009252297, "learning_rate": 8.02616912303178e-07, "loss": 17.1723, "step": 45004 }, { "epoch": 0.8226552360758221, "grad_norm": 5.556636194514141, "learning_rate": 8.024560682289261e-07, "loss": 17.1224, "step": 45005 }, { "epoch": 0.8226735152722686, "grad_norm": 6.574223474166502, "learning_rate": 8.022952388666999e-07, "loss": 17.2371, "step": 45006 }, { "epoch": 0.8226917944687152, "grad_norm": 5.623894527445024, "learning_rate": 8.021344242170664e-07, "loss": 17.4022, "step": 45007 }, { "epoch": 0.8227100736651617, "grad_norm": 6.089173992450411, "learning_rate": 8.019736242805865e-07, "loss": 17.1651, "step": 45008 }, { "epoch": 0.8227283528616082, "grad_norm": 6.983191584631895, "learning_rate": 8.01812839057824e-07, "loss": 17.4894, "step": 45009 }, { "epoch": 0.8227466320580548, "grad_norm": 6.223117929122696, "learning_rate": 8.016520685493451e-07, "loss": 17.2439, "step": 45010 }, { "epoch": 0.8227649112545012, "grad_norm": 5.47189688830602, "learning_rate": 8.014913127557112e-07, "loss": 17.0268, "step": 45011 }, { "epoch": 0.8227831904509477, "grad_norm": 9.338785458716792, "learning_rate": 8.013305716774844e-07, "loss": 18.262, "step": 45012 }, { "epoch": 0.8228014696473943, "grad_norm": 7.175745381936204, "learning_rate": 8.011698453152311e-07, "loss": 18.0372, "step": 45013 }, { "epoch": 0.8228197488438408, "grad_norm": 5.896517895272767, "learning_rate": 8.010091336695131e-07, "loss": 17.1887, "step": 45014 }, { "epoch": 0.8228380280402874, "grad_norm": 5.632625943222051, "learning_rate": 8.00848436740892e-07, "loss": 17.0323, "step": 45015 }, { "epoch": 0.8228563072367339, "grad_norm": 6.013817784678725, "learning_rate": 8.006877545299347e-07, "loss": 17.1508, "step": 45016 }, { "epoch": 0.8228745864331803, "grad_norm": 17.965344339484545, "learning_rate": 8.005270870372006e-07, "loss": 18.0804, "step": 45017 }, { "epoch": 0.8228928656296269, "grad_norm": 21.776789431890535, "learning_rate": 8.00366434263255e-07, "loss": 17.2446, "step": 45018 }, { "epoch": 0.8229111448260734, "grad_norm": 6.525262860244115, "learning_rate": 8.002057962086618e-07, "loss": 17.3831, "step": 45019 }, { "epoch": 0.82292942402252, "grad_norm": 7.78399588236045, "learning_rate": 8.000451728739816e-07, "loss": 17.3121, "step": 45020 }, { "epoch": 0.8229477032189665, "grad_norm": 6.743666318679306, "learning_rate": 7.998845642597802e-07, "loss": 17.0796, "step": 45021 }, { "epoch": 0.822965982415413, "grad_norm": 4.714386348389924, "learning_rate": 7.997239703666182e-07, "loss": 16.7445, "step": 45022 }, { "epoch": 0.8229842616118596, "grad_norm": 5.691338935343142, "learning_rate": 7.995633911950585e-07, "loss": 17.2285, "step": 45023 }, { "epoch": 0.823002540808306, "grad_norm": 5.0299339434293895, "learning_rate": 7.994028267456654e-07, "loss": 17.028, "step": 45024 }, { "epoch": 0.8230208200047526, "grad_norm": 6.804779653600188, "learning_rate": 7.992422770190001e-07, "loss": 17.4872, "step": 45025 }, { "epoch": 0.8230390992011991, "grad_norm": 5.728770613642845, "learning_rate": 7.990817420156272e-07, "loss": 17.1735, "step": 45026 }, { "epoch": 0.8230573783976456, "grad_norm": 7.173699672420744, "learning_rate": 7.98921221736107e-07, "loss": 17.5294, "step": 45027 }, { "epoch": 0.8230756575940922, "grad_norm": 6.4816975068802805, "learning_rate": 7.987607161810046e-07, "loss": 17.5246, "step": 45028 }, { "epoch": 0.8230939367905387, "grad_norm": 6.877836804415985, "learning_rate": 7.986002253508802e-07, "loss": 17.3032, "step": 45029 }, { "epoch": 0.8231122159869853, "grad_norm": 8.314811345326001, "learning_rate": 7.984397492462986e-07, "loss": 18.2296, "step": 45030 }, { "epoch": 0.8231304951834317, "grad_norm": 5.801526153626401, "learning_rate": 7.982792878678208e-07, "loss": 17.0179, "step": 45031 }, { "epoch": 0.8231487743798782, "grad_norm": 6.67004146056022, "learning_rate": 7.981188412160084e-07, "loss": 17.7844, "step": 45032 }, { "epoch": 0.8231670535763248, "grad_norm": 6.184200515495449, "learning_rate": 7.979584092914267e-07, "loss": 17.3332, "step": 45033 }, { "epoch": 0.8231853327727713, "grad_norm": 4.704143415635953, "learning_rate": 7.977979920946344e-07, "loss": 16.7959, "step": 45034 }, { "epoch": 0.8232036119692179, "grad_norm": 6.27523772898011, "learning_rate": 7.976375896261951e-07, "loss": 17.4013, "step": 45035 }, { "epoch": 0.8232218911656644, "grad_norm": 6.52313143890936, "learning_rate": 7.974772018866728e-07, "loss": 17.4804, "step": 45036 }, { "epoch": 0.8232401703621108, "grad_norm": 6.690570550733389, "learning_rate": 7.973168288766276e-07, "loss": 17.1673, "step": 45037 }, { "epoch": 0.8232584495585574, "grad_norm": 5.363774835434555, "learning_rate": 7.971564705966228e-07, "loss": 17.0317, "step": 45038 }, { "epoch": 0.8232767287550039, "grad_norm": 6.926377091094718, "learning_rate": 7.969961270472198e-07, "loss": 17.4439, "step": 45039 }, { "epoch": 0.8232950079514505, "grad_norm": 5.9430115316138, "learning_rate": 7.968357982289798e-07, "loss": 17.0901, "step": 45040 }, { "epoch": 0.823313287147897, "grad_norm": 6.4628490889410415, "learning_rate": 7.966754841424667e-07, "loss": 17.4781, "step": 45041 }, { "epoch": 0.8233315663443435, "grad_norm": 6.744232757790407, "learning_rate": 7.965151847882413e-07, "loss": 17.4305, "step": 45042 }, { "epoch": 0.82334984554079, "grad_norm": 4.646619758533851, "learning_rate": 7.963549001668646e-07, "loss": 16.653, "step": 45043 }, { "epoch": 0.8233681247372365, "grad_norm": 6.666066156968739, "learning_rate": 7.961946302788986e-07, "loss": 17.2193, "step": 45044 }, { "epoch": 0.8233864039336831, "grad_norm": 5.057519057492287, "learning_rate": 7.960343751249072e-07, "loss": 17.0813, "step": 45045 }, { "epoch": 0.8234046831301296, "grad_norm": 5.523502507196543, "learning_rate": 7.958741347054488e-07, "loss": 17.1047, "step": 45046 }, { "epoch": 0.8234229623265761, "grad_norm": 5.653522357417508, "learning_rate": 7.957139090210886e-07, "loss": 17.1209, "step": 45047 }, { "epoch": 0.8234412415230227, "grad_norm": 5.429009062318906, "learning_rate": 7.955536980723849e-07, "loss": 17.1761, "step": 45048 }, { "epoch": 0.8234595207194692, "grad_norm": 6.53320601860647, "learning_rate": 7.953935018599018e-07, "loss": 17.5149, "step": 45049 }, { "epoch": 0.8234777999159157, "grad_norm": 6.462674047579035, "learning_rate": 7.952333203841994e-07, "loss": 17.3591, "step": 45050 }, { "epoch": 0.8234960791123622, "grad_norm": 7.5570595705877865, "learning_rate": 7.950731536458384e-07, "loss": 17.9041, "step": 45051 }, { "epoch": 0.8235143583088087, "grad_norm": 4.890347358112051, "learning_rate": 7.949130016453821e-07, "loss": 16.7904, "step": 45052 }, { "epoch": 0.8235326375052553, "grad_norm": 8.315647911036061, "learning_rate": 7.947528643833896e-07, "loss": 17.7942, "step": 45053 }, { "epoch": 0.8235509167017018, "grad_norm": 7.764204136661283, "learning_rate": 7.945927418604238e-07, "loss": 18.027, "step": 45054 }, { "epoch": 0.8235691958981484, "grad_norm": 6.001723009106917, "learning_rate": 7.944326340770464e-07, "loss": 17.1632, "step": 45055 }, { "epoch": 0.8235874750945948, "grad_norm": 5.762533596692325, "learning_rate": 7.942725410338176e-07, "loss": 17.0466, "step": 45056 }, { "epoch": 0.8236057542910413, "grad_norm": 7.067471394088452, "learning_rate": 7.941124627312979e-07, "loss": 17.7337, "step": 45057 }, { "epoch": 0.8236240334874879, "grad_norm": 8.967134207604186, "learning_rate": 7.939523991700499e-07, "loss": 17.7688, "step": 45058 }, { "epoch": 0.8236423126839344, "grad_norm": 7.605427015481866, "learning_rate": 7.937923503506334e-07, "loss": 17.8008, "step": 45059 }, { "epoch": 0.823660591880381, "grad_norm": 7.1305648176289305, "learning_rate": 7.936323162736087e-07, "loss": 17.9252, "step": 45060 }, { "epoch": 0.8236788710768275, "grad_norm": 7.0921187682403035, "learning_rate": 7.934722969395392e-07, "loss": 17.5317, "step": 45061 }, { "epoch": 0.823697150273274, "grad_norm": 6.841632434466359, "learning_rate": 7.933122923489828e-07, "loss": 17.6457, "step": 45062 }, { "epoch": 0.8237154294697205, "grad_norm": 6.056477290801717, "learning_rate": 7.931523025025023e-07, "loss": 17.4951, "step": 45063 }, { "epoch": 0.823733708666167, "grad_norm": 7.773107218904543, "learning_rate": 7.929923274006584e-07, "loss": 17.6967, "step": 45064 }, { "epoch": 0.8237519878626136, "grad_norm": 5.179818907344638, "learning_rate": 7.928323670440108e-07, "loss": 16.9359, "step": 45065 }, { "epoch": 0.8237702670590601, "grad_norm": 5.346499569200775, "learning_rate": 7.926724214331216e-07, "loss": 17.0076, "step": 45066 }, { "epoch": 0.8237885462555066, "grad_norm": 5.60372698082049, "learning_rate": 7.925124905685499e-07, "loss": 17.063, "step": 45067 }, { "epoch": 0.8238068254519532, "grad_norm": 5.454482940235122, "learning_rate": 7.923525744508564e-07, "loss": 17.2167, "step": 45068 }, { "epoch": 0.8238251046483996, "grad_norm": 7.577483156780915, "learning_rate": 7.921926730806029e-07, "loss": 18.6103, "step": 45069 }, { "epoch": 0.8238433838448462, "grad_norm": 5.226435183868067, "learning_rate": 7.920327864583488e-07, "loss": 16.909, "step": 45070 }, { "epoch": 0.8238616630412927, "grad_norm": 5.189400430634533, "learning_rate": 7.918729145846538e-07, "loss": 16.8262, "step": 45071 }, { "epoch": 0.8238799422377392, "grad_norm": 5.532323617353712, "learning_rate": 7.91713057460079e-07, "loss": 17.0645, "step": 45072 }, { "epoch": 0.8238982214341858, "grad_norm": 6.120980256893829, "learning_rate": 7.915532150851857e-07, "loss": 17.3704, "step": 45073 }, { "epoch": 0.8239165006306323, "grad_norm": 5.693278982413847, "learning_rate": 7.913933874605323e-07, "loss": 16.9468, "step": 45074 }, { "epoch": 0.8239347798270789, "grad_norm": 7.815820868537039, "learning_rate": 7.912335745866806e-07, "loss": 17.5001, "step": 45075 }, { "epoch": 0.8239530590235253, "grad_norm": 6.146115870022462, "learning_rate": 7.910737764641901e-07, "loss": 17.1594, "step": 45076 }, { "epoch": 0.8239713382199718, "grad_norm": 6.28171810124966, "learning_rate": 7.9091399309362e-07, "loss": 17.1918, "step": 45077 }, { "epoch": 0.8239896174164184, "grad_norm": 6.540115901295983, "learning_rate": 7.907542244755317e-07, "loss": 17.6884, "step": 45078 }, { "epoch": 0.8240078966128649, "grad_norm": 7.681523403369123, "learning_rate": 7.90594470610484e-07, "loss": 17.3786, "step": 45079 }, { "epoch": 0.8240261758093114, "grad_norm": 5.086117403929194, "learning_rate": 7.904347314990385e-07, "loss": 16.7421, "step": 45080 }, { "epoch": 0.824044455005758, "grad_norm": 5.986725521328797, "learning_rate": 7.902750071417525e-07, "loss": 17.4725, "step": 45081 }, { "epoch": 0.8240627342022044, "grad_norm": 4.450069583713625, "learning_rate": 7.901152975391874e-07, "loss": 16.7143, "step": 45082 }, { "epoch": 0.824081013398651, "grad_norm": 4.655978618367225, "learning_rate": 7.899556026919042e-07, "loss": 16.835, "step": 45083 }, { "epoch": 0.8240992925950975, "grad_norm": 5.586273980646729, "learning_rate": 7.897959226004609e-07, "loss": 17.4863, "step": 45084 }, { "epoch": 0.824117571791544, "grad_norm": 6.801084976441823, "learning_rate": 7.896362572654171e-07, "loss": 17.6496, "step": 45085 }, { "epoch": 0.8241358509879906, "grad_norm": 5.5333366469371095, "learning_rate": 7.894766066873333e-07, "loss": 16.9194, "step": 45086 }, { "epoch": 0.8241541301844371, "grad_norm": 6.422779694628144, "learning_rate": 7.893169708667692e-07, "loss": 17.2322, "step": 45087 }, { "epoch": 0.8241724093808837, "grad_norm": 6.929067507477722, "learning_rate": 7.89157349804282e-07, "loss": 17.4034, "step": 45088 }, { "epoch": 0.8241906885773301, "grad_norm": 7.08184957739454, "learning_rate": 7.889977435004332e-07, "loss": 16.9567, "step": 45089 }, { "epoch": 0.8242089677737766, "grad_norm": 5.889215908014465, "learning_rate": 7.888381519557831e-07, "loss": 17.0941, "step": 45090 }, { "epoch": 0.8242272469702232, "grad_norm": 7.111602248219839, "learning_rate": 7.886785751708881e-07, "loss": 17.7007, "step": 45091 }, { "epoch": 0.8242455261666697, "grad_norm": 6.792851767607086, "learning_rate": 7.885190131463111e-07, "loss": 17.5303, "step": 45092 }, { "epoch": 0.8242638053631163, "grad_norm": 5.427975074269943, "learning_rate": 7.883594658826094e-07, "loss": 16.9484, "step": 45093 }, { "epoch": 0.8242820845595628, "grad_norm": 5.133647963106307, "learning_rate": 7.881999333803409e-07, "loss": 17.0913, "step": 45094 }, { "epoch": 0.8243003637560092, "grad_norm": 6.747650918803856, "learning_rate": 7.880404156400678e-07, "loss": 17.6645, "step": 45095 }, { "epoch": 0.8243186429524558, "grad_norm": 6.884376475830169, "learning_rate": 7.87880912662346e-07, "loss": 17.6043, "step": 45096 }, { "epoch": 0.8243369221489023, "grad_norm": 5.692482938781096, "learning_rate": 7.877214244477372e-07, "loss": 17.1025, "step": 45097 }, { "epoch": 0.8243552013453489, "grad_norm": 6.7493132637484745, "learning_rate": 7.87561950996798e-07, "loss": 17.3977, "step": 45098 }, { "epoch": 0.8243734805417954, "grad_norm": 5.2303719779526405, "learning_rate": 7.874024923100887e-07, "loss": 16.8458, "step": 45099 }, { "epoch": 0.8243917597382419, "grad_norm": 5.8233288068924285, "learning_rate": 7.872430483881699e-07, "loss": 17.4779, "step": 45100 }, { "epoch": 0.8244100389346884, "grad_norm": 5.549943224239449, "learning_rate": 7.870836192315978e-07, "loss": 17.1483, "step": 45101 }, { "epoch": 0.8244283181311349, "grad_norm": 5.6326132019684065, "learning_rate": 7.869242048409314e-07, "loss": 17.0387, "step": 45102 }, { "epoch": 0.8244465973275815, "grad_norm": 4.554671584838614, "learning_rate": 7.867648052167309e-07, "loss": 16.7155, "step": 45103 }, { "epoch": 0.824464876524028, "grad_norm": 6.732692859515088, "learning_rate": 7.866054203595547e-07, "loss": 17.343, "step": 45104 }, { "epoch": 0.8244831557204745, "grad_norm": 7.26704928470609, "learning_rate": 7.864460502699589e-07, "loss": 17.9635, "step": 45105 }, { "epoch": 0.8245014349169211, "grad_norm": 5.322341234925871, "learning_rate": 7.862866949485059e-07, "loss": 16.8021, "step": 45106 }, { "epoch": 0.8245197141133676, "grad_norm": 5.612835370595917, "learning_rate": 7.861273543957509e-07, "loss": 17.0867, "step": 45107 }, { "epoch": 0.8245379933098141, "grad_norm": 6.843598436487459, "learning_rate": 7.85968028612254e-07, "loss": 17.6308, "step": 45108 }, { "epoch": 0.8245562725062606, "grad_norm": 6.8432890384624425, "learning_rate": 7.858087175985746e-07, "loss": 17.6989, "step": 45109 }, { "epoch": 0.8245745517027071, "grad_norm": 5.873926537241739, "learning_rate": 7.856494213552684e-07, "loss": 17.4697, "step": 45110 }, { "epoch": 0.8245928308991537, "grad_norm": 5.891106122108562, "learning_rate": 7.854901398828968e-07, "loss": 17.1529, "step": 45111 }, { "epoch": 0.8246111100956002, "grad_norm": 6.06364741661357, "learning_rate": 7.85330873182017e-07, "loss": 17.1374, "step": 45112 }, { "epoch": 0.8246293892920468, "grad_norm": 7.015316490843395, "learning_rate": 7.851716212531851e-07, "loss": 16.8839, "step": 45113 }, { "epoch": 0.8246476684884932, "grad_norm": 4.88041474160043, "learning_rate": 7.85012384096962e-07, "loss": 16.8009, "step": 45114 }, { "epoch": 0.8246659476849397, "grad_norm": 5.701291735894598, "learning_rate": 7.848531617139049e-07, "loss": 16.8419, "step": 45115 }, { "epoch": 0.8246842268813863, "grad_norm": 5.542936537443077, "learning_rate": 7.846939541045706e-07, "loss": 16.7903, "step": 45116 }, { "epoch": 0.8247025060778328, "grad_norm": 6.325737086738483, "learning_rate": 7.845347612695181e-07, "loss": 17.2342, "step": 45117 }, { "epoch": 0.8247207852742794, "grad_norm": 7.851835567022102, "learning_rate": 7.843755832093064e-07, "loss": 17.7176, "step": 45118 }, { "epoch": 0.8247390644707259, "grad_norm": 5.628372704578824, "learning_rate": 7.842164199244917e-07, "loss": 16.9962, "step": 45119 }, { "epoch": 0.8247573436671723, "grad_norm": 6.046762907988131, "learning_rate": 7.840572714156336e-07, "loss": 16.9417, "step": 45120 }, { "epoch": 0.8247756228636189, "grad_norm": 6.561221039792653, "learning_rate": 7.838981376832888e-07, "loss": 17.6159, "step": 45121 }, { "epoch": 0.8247939020600654, "grad_norm": 8.842764158226181, "learning_rate": 7.837390187280142e-07, "loss": 17.6695, "step": 45122 }, { "epoch": 0.824812181256512, "grad_norm": 6.716020111012721, "learning_rate": 7.835799145503697e-07, "loss": 17.525, "step": 45123 }, { "epoch": 0.8248304604529585, "grad_norm": 6.739706107006372, "learning_rate": 7.834208251509112e-07, "loss": 17.3763, "step": 45124 }, { "epoch": 0.824848739649405, "grad_norm": 5.559804894750232, "learning_rate": 7.832617505301965e-07, "loss": 17.0334, "step": 45125 }, { "epoch": 0.8248670188458516, "grad_norm": 6.520430653586746, "learning_rate": 7.831026906887828e-07, "loss": 17.4263, "step": 45126 }, { "epoch": 0.824885298042298, "grad_norm": 6.430686374288288, "learning_rate": 7.829436456272282e-07, "loss": 17.2914, "step": 45127 }, { "epoch": 0.8249035772387446, "grad_norm": 5.548914574487957, "learning_rate": 7.827846153460917e-07, "loss": 16.9667, "step": 45128 }, { "epoch": 0.8249218564351911, "grad_norm": 6.410263495912134, "learning_rate": 7.826255998459292e-07, "loss": 17.3768, "step": 45129 }, { "epoch": 0.8249401356316376, "grad_norm": 6.163536579657267, "learning_rate": 7.824665991272968e-07, "loss": 17.1121, "step": 45130 }, { "epoch": 0.8249584148280842, "grad_norm": 5.447090491535774, "learning_rate": 7.823076131907536e-07, "loss": 16.9635, "step": 45131 }, { "epoch": 0.8249766940245307, "grad_norm": 6.277875831557109, "learning_rate": 7.821486420368568e-07, "loss": 17.2605, "step": 45132 }, { "epoch": 0.8249949732209773, "grad_norm": 5.8153944692680115, "learning_rate": 7.819896856661618e-07, "loss": 17.2556, "step": 45133 }, { "epoch": 0.8250132524174237, "grad_norm": 5.190955636206653, "learning_rate": 7.818307440792278e-07, "loss": 16.7588, "step": 45134 }, { "epoch": 0.8250315316138702, "grad_norm": 5.027556772502902, "learning_rate": 7.816718172766103e-07, "loss": 16.9984, "step": 45135 }, { "epoch": 0.8250498108103168, "grad_norm": 7.531422638416829, "learning_rate": 7.815129052588666e-07, "loss": 17.5229, "step": 45136 }, { "epoch": 0.8250680900067633, "grad_norm": 6.721192542846253, "learning_rate": 7.813540080265553e-07, "loss": 17.4617, "step": 45137 }, { "epoch": 0.8250863692032099, "grad_norm": 4.952700295649746, "learning_rate": 7.811951255802324e-07, "loss": 17.0589, "step": 45138 }, { "epoch": 0.8251046483996564, "grad_norm": 6.316487364225168, "learning_rate": 7.810362579204534e-07, "loss": 17.1223, "step": 45139 }, { "epoch": 0.8251229275961028, "grad_norm": 6.547813006197947, "learning_rate": 7.808774050477769e-07, "loss": 17.352, "step": 45140 }, { "epoch": 0.8251412067925494, "grad_norm": 7.747415450498818, "learning_rate": 7.807185669627582e-07, "loss": 17.4306, "step": 45141 }, { "epoch": 0.8251594859889959, "grad_norm": 5.274979048802379, "learning_rate": 7.805597436659557e-07, "loss": 17.0673, "step": 45142 }, { "epoch": 0.8251777651854425, "grad_norm": 5.412646689015712, "learning_rate": 7.80400935157925e-07, "loss": 16.8072, "step": 45143 }, { "epoch": 0.825196044381889, "grad_norm": 6.569731611578401, "learning_rate": 7.80242141439222e-07, "loss": 17.2345, "step": 45144 }, { "epoch": 0.8252143235783355, "grad_norm": 4.6946910616622874, "learning_rate": 7.800833625104043e-07, "loss": 16.8045, "step": 45145 }, { "epoch": 0.825232602774782, "grad_norm": 6.270473318820052, "learning_rate": 7.799245983720294e-07, "loss": 17.4262, "step": 45146 }, { "epoch": 0.8252508819712285, "grad_norm": 5.547500149602489, "learning_rate": 7.797658490246512e-07, "loss": 17.1853, "step": 45147 }, { "epoch": 0.825269161167675, "grad_norm": 7.673193589864402, "learning_rate": 7.796071144688283e-07, "loss": 17.8165, "step": 45148 }, { "epoch": 0.8252874403641216, "grad_norm": 5.312169495525218, "learning_rate": 7.794483947051168e-07, "loss": 17.0212, "step": 45149 }, { "epoch": 0.8253057195605681, "grad_norm": 6.850880234676195, "learning_rate": 7.792896897340707e-07, "loss": 17.368, "step": 45150 }, { "epoch": 0.8253239987570147, "grad_norm": 5.52543890671679, "learning_rate": 7.791309995562496e-07, "loss": 17.1264, "step": 45151 }, { "epoch": 0.8253422779534612, "grad_norm": 5.722340227939933, "learning_rate": 7.789723241722069e-07, "loss": 17.383, "step": 45152 }, { "epoch": 0.8253605571499076, "grad_norm": 5.196468901626921, "learning_rate": 7.788136635825011e-07, "loss": 16.6836, "step": 45153 }, { "epoch": 0.8253788363463542, "grad_norm": 5.33085219762215, "learning_rate": 7.786550177876856e-07, "loss": 16.9756, "step": 45154 }, { "epoch": 0.8253971155428007, "grad_norm": 5.866863378184338, "learning_rate": 7.784963867883194e-07, "loss": 17.1872, "step": 45155 }, { "epoch": 0.8254153947392473, "grad_norm": 7.219778686517336, "learning_rate": 7.78337770584956e-07, "loss": 17.6549, "step": 45156 }, { "epoch": 0.8254336739356938, "grad_norm": 6.366649353241625, "learning_rate": 7.781791691781532e-07, "loss": 17.5475, "step": 45157 }, { "epoch": 0.8254519531321403, "grad_norm": 5.673851368012375, "learning_rate": 7.78020582568465e-07, "loss": 17.3862, "step": 45158 }, { "epoch": 0.8254702323285869, "grad_norm": 6.246157346483987, "learning_rate": 7.778620107564499e-07, "loss": 17.0535, "step": 45159 }, { "epoch": 0.8254885115250333, "grad_norm": 7.635023206487869, "learning_rate": 7.777034537426615e-07, "loss": 18.0351, "step": 45160 }, { "epoch": 0.8255067907214799, "grad_norm": 5.590590144992112, "learning_rate": 7.775449115276551e-07, "loss": 16.9688, "step": 45161 }, { "epoch": 0.8255250699179264, "grad_norm": 6.628132880124723, "learning_rate": 7.773863841119877e-07, "loss": 17.6286, "step": 45162 }, { "epoch": 0.8255433491143729, "grad_norm": 5.978095424356533, "learning_rate": 7.77227871496215e-07, "loss": 16.9487, "step": 45163 }, { "epoch": 0.8255616283108195, "grad_norm": 6.927860521073268, "learning_rate": 7.770693736808916e-07, "loss": 17.6746, "step": 45164 }, { "epoch": 0.825579907507266, "grad_norm": 5.996887159702962, "learning_rate": 7.76910890666575e-07, "loss": 17.27, "step": 45165 }, { "epoch": 0.8255981867037125, "grad_norm": 5.45282930905596, "learning_rate": 7.767524224538187e-07, "loss": 17.118, "step": 45166 }, { "epoch": 0.825616465900159, "grad_norm": 5.290437307603774, "learning_rate": 7.765939690431779e-07, "loss": 16.9717, "step": 45167 }, { "epoch": 0.8256347450966055, "grad_norm": 6.574466369203911, "learning_rate": 7.764355304352095e-07, "loss": 17.5336, "step": 45168 }, { "epoch": 0.8256530242930521, "grad_norm": 6.237482949499903, "learning_rate": 7.762771066304686e-07, "loss": 17.274, "step": 45169 }, { "epoch": 0.8256713034894986, "grad_norm": 7.443741541189728, "learning_rate": 7.761186976295082e-07, "loss": 17.7271, "step": 45170 }, { "epoch": 0.8256895826859452, "grad_norm": 6.131402901105602, "learning_rate": 7.759603034328856e-07, "loss": 17.2008, "step": 45171 }, { "epoch": 0.8257078618823916, "grad_norm": 6.988139736165739, "learning_rate": 7.758019240411557e-07, "loss": 17.4433, "step": 45172 }, { "epoch": 0.8257261410788381, "grad_norm": 6.9941440119565454, "learning_rate": 7.756435594548744e-07, "loss": 17.6533, "step": 45173 }, { "epoch": 0.8257444202752847, "grad_norm": 6.0482601590053084, "learning_rate": 7.754852096745957e-07, "loss": 17.4609, "step": 45174 }, { "epoch": 0.8257626994717312, "grad_norm": 5.488111718179924, "learning_rate": 7.753268747008736e-07, "loss": 16.9019, "step": 45175 }, { "epoch": 0.8257809786681778, "grad_norm": 7.387735374741142, "learning_rate": 7.751685545342658e-07, "loss": 17.8147, "step": 45176 }, { "epoch": 0.8257992578646243, "grad_norm": 5.210020046596184, "learning_rate": 7.750102491753253e-07, "loss": 17.0976, "step": 45177 }, { "epoch": 0.8258175370610707, "grad_norm": 6.783944058151926, "learning_rate": 7.748519586246061e-07, "loss": 17.5674, "step": 45178 }, { "epoch": 0.8258358162575173, "grad_norm": 7.222160296296763, "learning_rate": 7.746936828826656e-07, "loss": 17.7914, "step": 45179 }, { "epoch": 0.8258540954539638, "grad_norm": 6.176069644864213, "learning_rate": 7.745354219500556e-07, "loss": 16.9492, "step": 45180 }, { "epoch": 0.8258723746504104, "grad_norm": 11.62897708942681, "learning_rate": 7.743771758273322e-07, "loss": 18.3008, "step": 45181 }, { "epoch": 0.8258906538468569, "grad_norm": 5.2659336949950255, "learning_rate": 7.742189445150517e-07, "loss": 16.8947, "step": 45182 }, { "epoch": 0.8259089330433034, "grad_norm": 6.627147791976073, "learning_rate": 7.740607280137669e-07, "loss": 17.4832, "step": 45183 }, { "epoch": 0.82592721223975, "grad_norm": 6.680204774863978, "learning_rate": 7.739025263240313e-07, "loss": 17.6298, "step": 45184 }, { "epoch": 0.8259454914361964, "grad_norm": 6.06190736993303, "learning_rate": 7.737443394464017e-07, "loss": 17.298, "step": 45185 }, { "epoch": 0.825963770632643, "grad_norm": 6.199775998533533, "learning_rate": 7.735861673814305e-07, "loss": 17.3661, "step": 45186 }, { "epoch": 0.8259820498290895, "grad_norm": 7.5748705151345135, "learning_rate": 7.734280101296737e-07, "loss": 17.5806, "step": 45187 }, { "epoch": 0.826000329025536, "grad_norm": 6.166077759020057, "learning_rate": 7.732698676916856e-07, "loss": 17.2269, "step": 45188 }, { "epoch": 0.8260186082219826, "grad_norm": 8.068041114813145, "learning_rate": 7.731117400680182e-07, "loss": 18.4819, "step": 45189 }, { "epoch": 0.8260368874184291, "grad_norm": 7.363307173369931, "learning_rate": 7.729536272592275e-07, "loss": 17.7744, "step": 45190 }, { "epoch": 0.8260551666148757, "grad_norm": 5.315274823240889, "learning_rate": 7.727955292658689e-07, "loss": 17.1396, "step": 45191 }, { "epoch": 0.8260734458113221, "grad_norm": 7.065050883093407, "learning_rate": 7.726374460884939e-07, "loss": 17.2924, "step": 45192 }, { "epoch": 0.8260917250077686, "grad_norm": 4.976651510839763, "learning_rate": 7.724793777276591e-07, "loss": 16.9571, "step": 45193 }, { "epoch": 0.8261100042042152, "grad_norm": 7.719010551913481, "learning_rate": 7.723213241839167e-07, "loss": 17.8324, "step": 45194 }, { "epoch": 0.8261282834006617, "grad_norm": 5.807464589414449, "learning_rate": 7.721632854578204e-07, "loss": 17.2319, "step": 45195 }, { "epoch": 0.8261465625971083, "grad_norm": 7.50095978926302, "learning_rate": 7.720052615499263e-07, "loss": 17.3241, "step": 45196 }, { "epoch": 0.8261648417935548, "grad_norm": 5.067041491835426, "learning_rate": 7.718472524607867e-07, "loss": 17.0059, "step": 45197 }, { "epoch": 0.8261831209900012, "grad_norm": 8.186250605641996, "learning_rate": 7.716892581909541e-07, "loss": 18.2981, "step": 45198 }, { "epoch": 0.8262014001864478, "grad_norm": 6.071049305573531, "learning_rate": 7.71531278740984e-07, "loss": 17.2604, "step": 45199 }, { "epoch": 0.8262196793828943, "grad_norm": 6.125136296668863, "learning_rate": 7.71373314111431e-07, "loss": 17.4662, "step": 45200 }, { "epoch": 0.8262379585793409, "grad_norm": 6.14773456942154, "learning_rate": 7.712153643028464e-07, "loss": 17.4193, "step": 45201 }, { "epoch": 0.8262562377757874, "grad_norm": 5.074022159942285, "learning_rate": 7.710574293157863e-07, "loss": 17.0579, "step": 45202 }, { "epoch": 0.8262745169722339, "grad_norm": 5.942024854931124, "learning_rate": 7.708995091508014e-07, "loss": 17.3215, "step": 45203 }, { "epoch": 0.8262927961686805, "grad_norm": 5.680173046969475, "learning_rate": 7.707416038084486e-07, "loss": 17.1119, "step": 45204 }, { "epoch": 0.8263110753651269, "grad_norm": 6.569008512536519, "learning_rate": 7.705837132892791e-07, "loss": 17.6492, "step": 45205 }, { "epoch": 0.8263293545615735, "grad_norm": 6.768640384320478, "learning_rate": 7.70425837593845e-07, "loss": 17.8253, "step": 45206 }, { "epoch": 0.82634763375802, "grad_norm": 5.794064147800063, "learning_rate": 7.702679767227028e-07, "loss": 17.2265, "step": 45207 }, { "epoch": 0.8263659129544665, "grad_norm": 5.966286332271544, "learning_rate": 7.701101306764036e-07, "loss": 17.1572, "step": 45208 }, { "epoch": 0.8263841921509131, "grad_norm": 5.837982684949556, "learning_rate": 7.699522994555009e-07, "loss": 17.3029, "step": 45209 }, { "epoch": 0.8264024713473596, "grad_norm": 6.994250247454345, "learning_rate": 7.697944830605497e-07, "loss": 17.7638, "step": 45210 }, { "epoch": 0.8264207505438061, "grad_norm": 5.854660488340495, "learning_rate": 7.696366814921013e-07, "loss": 17.22, "step": 45211 }, { "epoch": 0.8264390297402526, "grad_norm": 6.617667505373211, "learning_rate": 7.694788947507087e-07, "loss": 17.4914, "step": 45212 }, { "epoch": 0.8264573089366991, "grad_norm": 6.181811312146503, "learning_rate": 7.693211228369263e-07, "loss": 17.1196, "step": 45213 }, { "epoch": 0.8264755881331457, "grad_norm": 5.343183564494031, "learning_rate": 7.691633657513064e-07, "loss": 16.836, "step": 45214 }, { "epoch": 0.8264938673295922, "grad_norm": 9.86200895079609, "learning_rate": 7.690056234944005e-07, "loss": 17.9272, "step": 45215 }, { "epoch": 0.8265121465260387, "grad_norm": 5.492319500990356, "learning_rate": 7.688478960667628e-07, "loss": 17.3759, "step": 45216 }, { "epoch": 0.8265304257224853, "grad_norm": 5.424267878182993, "learning_rate": 7.68690183468947e-07, "loss": 17.2083, "step": 45217 }, { "epoch": 0.8265487049189317, "grad_norm": 6.312259010079122, "learning_rate": 7.685324857015036e-07, "loss": 17.39, "step": 45218 }, { "epoch": 0.8265669841153783, "grad_norm": 6.580762229261582, "learning_rate": 7.683748027649885e-07, "loss": 17.3225, "step": 45219 }, { "epoch": 0.8265852633118248, "grad_norm": 7.267984423628073, "learning_rate": 7.682171346599509e-07, "loss": 17.794, "step": 45220 }, { "epoch": 0.8266035425082713, "grad_norm": 5.856778012943303, "learning_rate": 7.680594813869463e-07, "loss": 17.3343, "step": 45221 }, { "epoch": 0.8266218217047179, "grad_norm": 6.766324911209437, "learning_rate": 7.679018429465256e-07, "loss": 17.3641, "step": 45222 }, { "epoch": 0.8266401009011644, "grad_norm": 7.078956090270729, "learning_rate": 7.677442193392409e-07, "loss": 17.5227, "step": 45223 }, { "epoch": 0.826658380097611, "grad_norm": 7.489355682682853, "learning_rate": 7.675866105656471e-07, "loss": 17.7397, "step": 45224 }, { "epoch": 0.8266766592940574, "grad_norm": 6.720900021948186, "learning_rate": 7.674290166262932e-07, "loss": 17.2262, "step": 45225 }, { "epoch": 0.8266949384905039, "grad_norm": 7.304058294033957, "learning_rate": 7.672714375217333e-07, "loss": 17.7741, "step": 45226 }, { "epoch": 0.8267132176869505, "grad_norm": 4.743671562742903, "learning_rate": 7.671138732525207e-07, "loss": 16.7509, "step": 45227 }, { "epoch": 0.826731496883397, "grad_norm": 5.862878832812926, "learning_rate": 7.669563238192074e-07, "loss": 17.3048, "step": 45228 }, { "epoch": 0.8267497760798436, "grad_norm": 5.5878543489532975, "learning_rate": 7.667987892223432e-07, "loss": 17.3636, "step": 45229 }, { "epoch": 0.82676805527629, "grad_norm": 6.088979364087596, "learning_rate": 7.66641269462483e-07, "loss": 17.196, "step": 45230 }, { "epoch": 0.8267863344727365, "grad_norm": 7.02236621289365, "learning_rate": 7.66483764540178e-07, "loss": 17.477, "step": 45231 }, { "epoch": 0.8268046136691831, "grad_norm": 4.696629659167113, "learning_rate": 7.663262744559785e-07, "loss": 16.6656, "step": 45232 }, { "epoch": 0.8268228928656296, "grad_norm": 7.046257191255333, "learning_rate": 7.661687992104394e-07, "loss": 17.4953, "step": 45233 }, { "epoch": 0.8268411720620762, "grad_norm": 8.877276699682737, "learning_rate": 7.660113388041107e-07, "loss": 18.4519, "step": 45234 }, { "epoch": 0.8268594512585227, "grad_norm": 5.784990284606842, "learning_rate": 7.658538932375442e-07, "loss": 17.0825, "step": 45235 }, { "epoch": 0.8268777304549692, "grad_norm": 6.425009906351949, "learning_rate": 7.656964625112933e-07, "loss": 17.3881, "step": 45236 }, { "epoch": 0.8268960096514157, "grad_norm": 5.912588273024868, "learning_rate": 7.655390466259083e-07, "loss": 17.2812, "step": 45237 }, { "epoch": 0.8269142888478622, "grad_norm": 6.7703706308358145, "learning_rate": 7.653816455819424e-07, "loss": 17.8219, "step": 45238 }, { "epoch": 0.8269325680443088, "grad_norm": 5.759140643913773, "learning_rate": 7.652242593799464e-07, "loss": 17.0271, "step": 45239 }, { "epoch": 0.8269508472407553, "grad_norm": 5.46343646803537, "learning_rate": 7.650668880204709e-07, "loss": 17.0717, "step": 45240 }, { "epoch": 0.8269691264372018, "grad_norm": 5.95903971218255, "learning_rate": 7.649095315040694e-07, "loss": 17.2667, "step": 45241 }, { "epoch": 0.8269874056336484, "grad_norm": 6.355946170379668, "learning_rate": 7.647521898312926e-07, "loss": 17.1542, "step": 45242 }, { "epoch": 0.8270056848300948, "grad_norm": 5.182515369085798, "learning_rate": 7.645948630026906e-07, "loss": 17.059, "step": 45243 }, { "epoch": 0.8270239640265414, "grad_norm": 5.955998591361094, "learning_rate": 7.644375510188162e-07, "loss": 17.4309, "step": 45244 }, { "epoch": 0.8270422432229879, "grad_norm": 6.6040959637400665, "learning_rate": 7.642802538802213e-07, "loss": 17.3271, "step": 45245 }, { "epoch": 0.8270605224194344, "grad_norm": 5.829615126395823, "learning_rate": 7.64122971587456e-07, "loss": 17.1398, "step": 45246 }, { "epoch": 0.827078801615881, "grad_norm": 6.640267379056129, "learning_rate": 7.63965704141073e-07, "loss": 17.0389, "step": 45247 }, { "epoch": 0.8270970808123275, "grad_norm": 5.191292731542104, "learning_rate": 7.638084515416216e-07, "loss": 16.9223, "step": 45248 }, { "epoch": 0.8271153600087741, "grad_norm": 4.8506174936492, "learning_rate": 7.636512137896551e-07, "loss": 16.9393, "step": 45249 }, { "epoch": 0.8271336392052205, "grad_norm": 7.069581888090901, "learning_rate": 7.634939908857236e-07, "loss": 17.152, "step": 45250 }, { "epoch": 0.827151918401667, "grad_norm": 6.204498682821766, "learning_rate": 7.633367828303767e-07, "loss": 17.1653, "step": 45251 }, { "epoch": 0.8271701975981136, "grad_norm": 6.213593939232234, "learning_rate": 7.631795896241678e-07, "loss": 17.5232, "step": 45252 }, { "epoch": 0.8271884767945601, "grad_norm": 5.176477871840211, "learning_rate": 7.630224112676454e-07, "loss": 16.8538, "step": 45253 }, { "epoch": 0.8272067559910067, "grad_norm": 7.0393851085052, "learning_rate": 7.628652477613624e-07, "loss": 17.7831, "step": 45254 }, { "epoch": 0.8272250351874532, "grad_norm": 5.595576236480289, "learning_rate": 7.6270809910587e-07, "loss": 16.8769, "step": 45255 }, { "epoch": 0.8272433143838996, "grad_norm": 4.859817646289136, "learning_rate": 7.625509653017177e-07, "loss": 16.9601, "step": 45256 }, { "epoch": 0.8272615935803462, "grad_norm": 4.628420787414962, "learning_rate": 7.623938463494556e-07, "loss": 16.6876, "step": 45257 }, { "epoch": 0.8272798727767927, "grad_norm": 5.316723003553907, "learning_rate": 7.622367422496369e-07, "loss": 16.7642, "step": 45258 }, { "epoch": 0.8272981519732393, "grad_norm": 5.943696056954914, "learning_rate": 7.620796530028102e-07, "loss": 17.4511, "step": 45259 }, { "epoch": 0.8273164311696858, "grad_norm": 6.149754525309138, "learning_rate": 7.619225786095252e-07, "loss": 17.352, "step": 45260 }, { "epoch": 0.8273347103661323, "grad_norm": 5.358301432233522, "learning_rate": 7.617655190703355e-07, "loss": 17.1131, "step": 45261 }, { "epoch": 0.8273529895625789, "grad_norm": 5.232508752042774, "learning_rate": 7.616084743857882e-07, "loss": 17.028, "step": 45262 }, { "epoch": 0.8273712687590253, "grad_norm": 6.619862141935811, "learning_rate": 7.614514445564358e-07, "loss": 17.5085, "step": 45263 }, { "epoch": 0.8273895479554719, "grad_norm": 5.766872216324644, "learning_rate": 7.612944295828295e-07, "loss": 17.2209, "step": 45264 }, { "epoch": 0.8274078271519184, "grad_norm": 5.960736580064161, "learning_rate": 7.611374294655172e-07, "loss": 17.3847, "step": 45265 }, { "epoch": 0.8274261063483649, "grad_norm": 4.7629418254591815, "learning_rate": 7.609804442050511e-07, "loss": 16.84, "step": 45266 }, { "epoch": 0.8274443855448115, "grad_norm": 7.028675979532865, "learning_rate": 7.608234738019815e-07, "loss": 17.4084, "step": 45267 }, { "epoch": 0.827462664741258, "grad_norm": 6.898960969795347, "learning_rate": 7.606665182568557e-07, "loss": 17.5288, "step": 45268 }, { "epoch": 0.8274809439377045, "grad_norm": 7.518736816811347, "learning_rate": 7.605095775702276e-07, "loss": 17.5583, "step": 45269 }, { "epoch": 0.827499223134151, "grad_norm": 4.9843999480880345, "learning_rate": 7.603526517426457e-07, "loss": 16.8126, "step": 45270 }, { "epoch": 0.8275175023305975, "grad_norm": 5.83549405069628, "learning_rate": 7.60195740774658e-07, "loss": 17.3285, "step": 45271 }, { "epoch": 0.8275357815270441, "grad_norm": 5.880081171534115, "learning_rate": 7.600388446668172e-07, "loss": 17.4075, "step": 45272 }, { "epoch": 0.8275540607234906, "grad_norm": 5.890272796457483, "learning_rate": 7.59881963419673e-07, "loss": 17.2791, "step": 45273 }, { "epoch": 0.8275723399199372, "grad_norm": 8.596617221185637, "learning_rate": 7.597250970337733e-07, "loss": 17.6497, "step": 45274 }, { "epoch": 0.8275906191163837, "grad_norm": 6.714131821831043, "learning_rate": 7.595682455096703e-07, "loss": 17.5135, "step": 45275 }, { "epoch": 0.8276088983128301, "grad_norm": 6.0337354931748735, "learning_rate": 7.594114088479127e-07, "loss": 17.3337, "step": 45276 }, { "epoch": 0.8276271775092767, "grad_norm": 6.030555975276544, "learning_rate": 7.592545870490492e-07, "loss": 17.1661, "step": 45277 }, { "epoch": 0.8276454567057232, "grad_norm": 6.0789623529537815, "learning_rate": 7.590977801136312e-07, "loss": 17.4928, "step": 45278 }, { "epoch": 0.8276637359021698, "grad_norm": 4.658315048496322, "learning_rate": 7.589409880422066e-07, "loss": 16.6568, "step": 45279 }, { "epoch": 0.8276820150986163, "grad_norm": 6.814037518057788, "learning_rate": 7.587842108353272e-07, "loss": 17.5307, "step": 45280 }, { "epoch": 0.8277002942950628, "grad_norm": 5.816176424917887, "learning_rate": 7.586274484935396e-07, "loss": 17.2064, "step": 45281 }, { "epoch": 0.8277185734915093, "grad_norm": 6.759631817241584, "learning_rate": 7.584707010173947e-07, "loss": 17.4162, "step": 45282 }, { "epoch": 0.8277368526879558, "grad_norm": 5.127246495981753, "learning_rate": 7.583139684074431e-07, "loss": 17.1365, "step": 45283 }, { "epoch": 0.8277551318844023, "grad_norm": 6.845520187813572, "learning_rate": 7.581572506642332e-07, "loss": 17.6245, "step": 45284 }, { "epoch": 0.8277734110808489, "grad_norm": 6.0400919066826, "learning_rate": 7.580005477883129e-07, "loss": 17.1665, "step": 45285 }, { "epoch": 0.8277916902772954, "grad_norm": 5.999238299283896, "learning_rate": 7.578438597802334e-07, "loss": 17.5191, "step": 45286 }, { "epoch": 0.827809969473742, "grad_norm": 6.409498226664962, "learning_rate": 7.576871866405433e-07, "loss": 17.5451, "step": 45287 }, { "epoch": 0.8278282486701884, "grad_norm": 5.707570423100298, "learning_rate": 7.575305283697904e-07, "loss": 17.1244, "step": 45288 }, { "epoch": 0.8278465278666349, "grad_norm": 7.103171127840576, "learning_rate": 7.573738849685241e-07, "loss": 17.2999, "step": 45289 }, { "epoch": 0.8278648070630815, "grad_norm": 5.407827879844267, "learning_rate": 7.57217256437296e-07, "loss": 17.1302, "step": 45290 }, { "epoch": 0.827883086259528, "grad_norm": 7.249440092669952, "learning_rate": 7.570606427766519e-07, "loss": 17.6527, "step": 45291 }, { "epoch": 0.8279013654559746, "grad_norm": 6.366340466426548, "learning_rate": 7.569040439871439e-07, "loss": 17.3087, "step": 45292 }, { "epoch": 0.8279196446524211, "grad_norm": 7.050240697141724, "learning_rate": 7.567474600693181e-07, "loss": 17.7572, "step": 45293 }, { "epoch": 0.8279379238488676, "grad_norm": 6.797077804376542, "learning_rate": 7.565908910237236e-07, "loss": 17.7379, "step": 45294 }, { "epoch": 0.8279562030453141, "grad_norm": 6.267179392557795, "learning_rate": 7.564343368509108e-07, "loss": 17.5979, "step": 45295 }, { "epoch": 0.8279744822417606, "grad_norm": 5.377834519950673, "learning_rate": 7.562777975514263e-07, "loss": 17.0005, "step": 45296 }, { "epoch": 0.8279927614382072, "grad_norm": 5.931867461481558, "learning_rate": 7.561212731258211e-07, "loss": 17.1721, "step": 45297 }, { "epoch": 0.8280110406346537, "grad_norm": 4.80425982998343, "learning_rate": 7.559647635746408e-07, "loss": 16.852, "step": 45298 }, { "epoch": 0.8280293198311002, "grad_norm": 6.024582217950703, "learning_rate": 7.558082688984364e-07, "loss": 17.2525, "step": 45299 }, { "epoch": 0.8280475990275468, "grad_norm": 5.728489028438697, "learning_rate": 7.556517890977567e-07, "loss": 17.213, "step": 45300 }, { "epoch": 0.8280658782239932, "grad_norm": 6.328983204243684, "learning_rate": 7.554953241731494e-07, "loss": 17.0696, "step": 45301 }, { "epoch": 0.8280841574204398, "grad_norm": 4.379915623668025, "learning_rate": 7.553388741251611e-07, "loss": 16.617, "step": 45302 }, { "epoch": 0.8281024366168863, "grad_norm": 5.554621027153399, "learning_rate": 7.551824389543433e-07, "loss": 17.2589, "step": 45303 }, { "epoch": 0.8281207158133328, "grad_norm": 6.054175378896219, "learning_rate": 7.550260186612423e-07, "loss": 17.3905, "step": 45304 }, { "epoch": 0.8281389950097794, "grad_norm": 6.006798946045218, "learning_rate": 7.548696132464057e-07, "loss": 17.1704, "step": 45305 }, { "epoch": 0.8281572742062259, "grad_norm": 5.515455361343248, "learning_rate": 7.547132227103837e-07, "loss": 16.9756, "step": 45306 }, { "epoch": 0.8281755534026725, "grad_norm": 5.872425214514421, "learning_rate": 7.545568470537229e-07, "loss": 17.2663, "step": 45307 }, { "epoch": 0.8281938325991189, "grad_norm": 5.491645459951151, "learning_rate": 7.544004862769711e-07, "loss": 17.0005, "step": 45308 }, { "epoch": 0.8282121117955654, "grad_norm": 6.793976530082423, "learning_rate": 7.542441403806789e-07, "loss": 17.9568, "step": 45309 }, { "epoch": 0.828230390992012, "grad_norm": 5.318508610652179, "learning_rate": 7.540878093653914e-07, "loss": 17.1285, "step": 45310 }, { "epoch": 0.8282486701884585, "grad_norm": 6.405799349510311, "learning_rate": 7.539314932316588e-07, "loss": 17.2472, "step": 45311 }, { "epoch": 0.8282669493849051, "grad_norm": 5.772489042676024, "learning_rate": 7.537751919800279e-07, "loss": 17.0629, "step": 45312 }, { "epoch": 0.8282852285813516, "grad_norm": 5.699188717844438, "learning_rate": 7.536189056110448e-07, "loss": 17.1781, "step": 45313 }, { "epoch": 0.828303507777798, "grad_norm": 6.576060174388388, "learning_rate": 7.53462634125261e-07, "loss": 17.4205, "step": 45314 }, { "epoch": 0.8283217869742446, "grad_norm": 6.28632258494489, "learning_rate": 7.533063775232214e-07, "loss": 17.3583, "step": 45315 }, { "epoch": 0.8283400661706911, "grad_norm": 6.409816284863663, "learning_rate": 7.531501358054738e-07, "loss": 17.1994, "step": 45316 }, { "epoch": 0.8283583453671377, "grad_norm": 6.76693766676055, "learning_rate": 7.529939089725663e-07, "loss": 17.2841, "step": 45317 }, { "epoch": 0.8283766245635842, "grad_norm": 6.007728480344512, "learning_rate": 7.528376970250478e-07, "loss": 17.3056, "step": 45318 }, { "epoch": 0.8283949037600307, "grad_norm": 4.908046944463885, "learning_rate": 7.526814999634629e-07, "loss": 16.7949, "step": 45319 }, { "epoch": 0.8284131829564773, "grad_norm": 6.26575665812731, "learning_rate": 7.525253177883629e-07, "loss": 17.298, "step": 45320 }, { "epoch": 0.8284314621529237, "grad_norm": 5.903040750742032, "learning_rate": 7.523691505002923e-07, "loss": 16.8367, "step": 45321 }, { "epoch": 0.8284497413493703, "grad_norm": 5.858603012656531, "learning_rate": 7.522129980997983e-07, "loss": 17.1684, "step": 45322 }, { "epoch": 0.8284680205458168, "grad_norm": 6.042114358653987, "learning_rate": 7.520568605874307e-07, "loss": 17.3504, "step": 45323 }, { "epoch": 0.8284862997422633, "grad_norm": 4.100244130516097, "learning_rate": 7.51900737963735e-07, "loss": 16.6807, "step": 45324 }, { "epoch": 0.8285045789387099, "grad_norm": 6.829076145509798, "learning_rate": 7.517446302292569e-07, "loss": 17.336, "step": 45325 }, { "epoch": 0.8285228581351564, "grad_norm": 5.732197498171988, "learning_rate": 7.515885373845455e-07, "loss": 16.9606, "step": 45326 }, { "epoch": 0.828541137331603, "grad_norm": 6.297436302686675, "learning_rate": 7.514324594301481e-07, "loss": 17.226, "step": 45327 }, { "epoch": 0.8285594165280494, "grad_norm": 5.541703716762622, "learning_rate": 7.51276396366612e-07, "loss": 17.037, "step": 45328 }, { "epoch": 0.8285776957244959, "grad_norm": 4.921044703738005, "learning_rate": 7.511203481944834e-07, "loss": 16.6872, "step": 45329 }, { "epoch": 0.8285959749209425, "grad_norm": 5.420458954151251, "learning_rate": 7.509643149143081e-07, "loss": 17.2279, "step": 45330 }, { "epoch": 0.828614254117389, "grad_norm": 5.6278118850841885, "learning_rate": 7.508082965266355e-07, "loss": 17.0849, "step": 45331 }, { "epoch": 0.8286325333138356, "grad_norm": 6.926249045863524, "learning_rate": 7.506522930320109e-07, "loss": 17.7806, "step": 45332 }, { "epoch": 0.828650812510282, "grad_norm": 6.128231525400826, "learning_rate": 7.504963044309804e-07, "loss": 17.4384, "step": 45333 }, { "epoch": 0.8286690917067285, "grad_norm": 6.231827403020596, "learning_rate": 7.503403307240925e-07, "loss": 17.6621, "step": 45334 }, { "epoch": 0.8286873709031751, "grad_norm": 5.679362615583685, "learning_rate": 7.501843719118917e-07, "loss": 16.9573, "step": 45335 }, { "epoch": 0.8287056500996216, "grad_norm": 6.444068675946883, "learning_rate": 7.500284279949261e-07, "loss": 17.4502, "step": 45336 }, { "epoch": 0.8287239292960682, "grad_norm": 7.9665824325891235, "learning_rate": 7.498724989737432e-07, "loss": 18.4873, "step": 45337 }, { "epoch": 0.8287422084925147, "grad_norm": 4.8337898120263425, "learning_rate": 7.497165848488885e-07, "loss": 16.794, "step": 45338 }, { "epoch": 0.8287604876889612, "grad_norm": 5.144257459406226, "learning_rate": 7.49560685620907e-07, "loss": 17.1755, "step": 45339 }, { "epoch": 0.8287787668854077, "grad_norm": 7.14019518161494, "learning_rate": 7.494048012903477e-07, "loss": 17.5003, "step": 45340 }, { "epoch": 0.8287970460818542, "grad_norm": 5.607844644665927, "learning_rate": 7.492489318577545e-07, "loss": 17.1854, "step": 45341 }, { "epoch": 0.8288153252783008, "grad_norm": 6.123802884437232, "learning_rate": 7.49093077323676e-07, "loss": 17.4761, "step": 45342 }, { "epoch": 0.8288336044747473, "grad_norm": 6.435226949992977, "learning_rate": 7.489372376886567e-07, "loss": 17.3505, "step": 45343 }, { "epoch": 0.8288518836711938, "grad_norm": 7.267375851234728, "learning_rate": 7.487814129532439e-07, "loss": 17.2878, "step": 45344 }, { "epoch": 0.8288701628676404, "grad_norm": 6.003616599994457, "learning_rate": 7.486256031179828e-07, "loss": 16.96, "step": 45345 }, { "epoch": 0.8288884420640869, "grad_norm": 6.167111806763968, "learning_rate": 7.484698081834212e-07, "loss": 17.4356, "step": 45346 }, { "epoch": 0.8289067212605334, "grad_norm": 5.314631123278593, "learning_rate": 7.483140281501022e-07, "loss": 17.1031, "step": 45347 }, { "epoch": 0.8289250004569799, "grad_norm": 6.26136731592189, "learning_rate": 7.481582630185757e-07, "loss": 17.201, "step": 45348 }, { "epoch": 0.8289432796534264, "grad_norm": 5.142309368135656, "learning_rate": 7.480025127893847e-07, "loss": 16.8755, "step": 45349 }, { "epoch": 0.828961558849873, "grad_norm": 6.028276921756555, "learning_rate": 7.478467774630755e-07, "loss": 17.0699, "step": 45350 }, { "epoch": 0.8289798380463195, "grad_norm": 6.847040824339269, "learning_rate": 7.476910570401946e-07, "loss": 17.5586, "step": 45351 }, { "epoch": 0.828998117242766, "grad_norm": 5.192451793272662, "learning_rate": 7.475353515212869e-07, "loss": 17.1111, "step": 45352 }, { "epoch": 0.8290163964392125, "grad_norm": 4.665900182265639, "learning_rate": 7.47379660906899e-07, "loss": 16.7444, "step": 45353 }, { "epoch": 0.829034675635659, "grad_norm": 5.026751737071725, "learning_rate": 7.472239851975771e-07, "loss": 17.0783, "step": 45354 }, { "epoch": 0.8290529548321056, "grad_norm": 6.822586905480096, "learning_rate": 7.470683243938653e-07, "loss": 17.1417, "step": 45355 }, { "epoch": 0.8290712340285521, "grad_norm": 6.439570138748445, "learning_rate": 7.469126784963109e-07, "loss": 17.1887, "step": 45356 }, { "epoch": 0.8290895132249986, "grad_norm": 6.171518833615788, "learning_rate": 7.467570475054586e-07, "loss": 17.3268, "step": 45357 }, { "epoch": 0.8291077924214452, "grad_norm": 8.08735549630211, "learning_rate": 7.466014314218522e-07, "loss": 18.34, "step": 45358 }, { "epoch": 0.8291260716178916, "grad_norm": 4.938889758573651, "learning_rate": 7.464458302460403e-07, "loss": 16.918, "step": 45359 }, { "epoch": 0.8291443508143382, "grad_norm": 5.561246451012183, "learning_rate": 7.462902439785668e-07, "loss": 17.0965, "step": 45360 }, { "epoch": 0.8291626300107847, "grad_norm": 8.735191813031689, "learning_rate": 7.461346726199753e-07, "loss": 17.9939, "step": 45361 }, { "epoch": 0.8291809092072312, "grad_norm": 6.15135137046459, "learning_rate": 7.45979116170813e-07, "loss": 17.1337, "step": 45362 }, { "epoch": 0.8291991884036778, "grad_norm": 8.43530797768387, "learning_rate": 7.458235746316256e-07, "loss": 18.0729, "step": 45363 }, { "epoch": 0.8292174676001243, "grad_norm": 5.527279309275948, "learning_rate": 7.456680480029565e-07, "loss": 17.0084, "step": 45364 }, { "epoch": 0.8292357467965709, "grad_norm": 5.129624865050212, "learning_rate": 7.455125362853527e-07, "loss": 16.8169, "step": 45365 }, { "epoch": 0.8292540259930173, "grad_norm": 6.195566864160674, "learning_rate": 7.453570394793586e-07, "loss": 17.3651, "step": 45366 }, { "epoch": 0.8292723051894638, "grad_norm": 7.619098297797197, "learning_rate": 7.45201557585517e-07, "loss": 17.7788, "step": 45367 }, { "epoch": 0.8292905843859104, "grad_norm": 5.778323516440533, "learning_rate": 7.450460906043766e-07, "loss": 17.0856, "step": 45368 }, { "epoch": 0.8293088635823569, "grad_norm": 4.973010631002067, "learning_rate": 7.448906385364802e-07, "loss": 16.7043, "step": 45369 }, { "epoch": 0.8293271427788035, "grad_norm": 5.908074996905778, "learning_rate": 7.447352013823717e-07, "loss": 17.1843, "step": 45370 }, { "epoch": 0.82934542197525, "grad_norm": 5.4317904282724, "learning_rate": 7.445797791425968e-07, "loss": 16.8776, "step": 45371 }, { "epoch": 0.8293637011716964, "grad_norm": 6.564994816949224, "learning_rate": 7.444243718177007e-07, "loss": 17.4082, "step": 45372 }, { "epoch": 0.829381980368143, "grad_norm": 4.354269927751351, "learning_rate": 7.442689794082292e-07, "loss": 16.5587, "step": 45373 }, { "epoch": 0.8294002595645895, "grad_norm": 8.340994676096514, "learning_rate": 7.441136019147255e-07, "loss": 17.8486, "step": 45374 }, { "epoch": 0.8294185387610361, "grad_norm": 5.395459786865993, "learning_rate": 7.439582393377331e-07, "loss": 17.0224, "step": 45375 }, { "epoch": 0.8294368179574826, "grad_norm": 7.187519877002804, "learning_rate": 7.438028916777989e-07, "loss": 17.3513, "step": 45376 }, { "epoch": 0.8294550971539291, "grad_norm": 6.1625460539612105, "learning_rate": 7.436475589354669e-07, "loss": 17.2125, "step": 45377 }, { "epoch": 0.8294733763503757, "grad_norm": 6.9168570842982815, "learning_rate": 7.434922411112788e-07, "loss": 17.4428, "step": 45378 }, { "epoch": 0.8294916555468221, "grad_norm": 7.3056966932139815, "learning_rate": 7.433369382057825e-07, "loss": 17.6614, "step": 45379 }, { "epoch": 0.8295099347432687, "grad_norm": 5.424001947272237, "learning_rate": 7.431816502195199e-07, "loss": 17.2002, "step": 45380 }, { "epoch": 0.8295282139397152, "grad_norm": 6.959288318163738, "learning_rate": 7.430263771530361e-07, "loss": 17.5832, "step": 45381 }, { "epoch": 0.8295464931361617, "grad_norm": 5.202254527552789, "learning_rate": 7.428711190068766e-07, "loss": 17.0107, "step": 45382 }, { "epoch": 0.8295647723326083, "grad_norm": 5.975473345608253, "learning_rate": 7.427158757815839e-07, "loss": 17.221, "step": 45383 }, { "epoch": 0.8295830515290548, "grad_norm": 6.488461579648154, "learning_rate": 7.42560647477702e-07, "loss": 17.4719, "step": 45384 }, { "epoch": 0.8296013307255014, "grad_norm": 7.314460546404757, "learning_rate": 7.424054340957771e-07, "loss": 17.8531, "step": 45385 }, { "epoch": 0.8296196099219478, "grad_norm": 6.427580236626095, "learning_rate": 7.422502356363498e-07, "loss": 17.3432, "step": 45386 }, { "epoch": 0.8296378891183943, "grad_norm": 5.257758677114023, "learning_rate": 7.420950520999676e-07, "loss": 16.9402, "step": 45387 }, { "epoch": 0.8296561683148409, "grad_norm": 6.894050956648294, "learning_rate": 7.419398834871721e-07, "loss": 17.6175, "step": 45388 }, { "epoch": 0.8296744475112874, "grad_norm": 7.610853720001313, "learning_rate": 7.417847297985076e-07, "loss": 17.4681, "step": 45389 }, { "epoch": 0.829692726707734, "grad_norm": 4.949975215372151, "learning_rate": 7.416295910345172e-07, "loss": 16.7315, "step": 45390 }, { "epoch": 0.8297110059041805, "grad_norm": 6.432008586157271, "learning_rate": 7.414744671957474e-07, "loss": 17.3509, "step": 45391 }, { "epoch": 0.8297292851006269, "grad_norm": 5.391651878542376, "learning_rate": 7.413193582827388e-07, "loss": 17.0135, "step": 45392 }, { "epoch": 0.8297475642970735, "grad_norm": 5.774259169790506, "learning_rate": 7.41164264296037e-07, "loss": 17.2078, "step": 45393 }, { "epoch": 0.82976584349352, "grad_norm": 5.623834711645136, "learning_rate": 7.41009185236185e-07, "loss": 17.2409, "step": 45394 }, { "epoch": 0.8297841226899666, "grad_norm": 7.867511752821837, "learning_rate": 7.408541211037256e-07, "loss": 18.0035, "step": 45395 }, { "epoch": 0.8298024018864131, "grad_norm": 5.089340261031562, "learning_rate": 7.406990718992036e-07, "loss": 16.8467, "step": 45396 }, { "epoch": 0.8298206810828596, "grad_norm": 5.835189072722233, "learning_rate": 7.40544037623162e-07, "loss": 17.1498, "step": 45397 }, { "epoch": 0.8298389602793061, "grad_norm": 6.521775995373645, "learning_rate": 7.403890182761425e-07, "loss": 17.5275, "step": 45398 }, { "epoch": 0.8298572394757526, "grad_norm": 6.166772020212486, "learning_rate": 7.4023401385869e-07, "loss": 17.1256, "step": 45399 }, { "epoch": 0.8298755186721992, "grad_norm": 6.708425182138043, "learning_rate": 7.40079024371349e-07, "loss": 17.7174, "step": 45400 }, { "epoch": 0.8298937978686457, "grad_norm": 6.528825001302381, "learning_rate": 7.399240498146598e-07, "loss": 17.1471, "step": 45401 }, { "epoch": 0.8299120770650922, "grad_norm": 6.887481596687209, "learning_rate": 7.397690901891685e-07, "loss": 17.2849, "step": 45402 }, { "epoch": 0.8299303562615388, "grad_norm": 6.3094812133367215, "learning_rate": 7.396141454954159e-07, "loss": 17.2679, "step": 45403 }, { "epoch": 0.8299486354579853, "grad_norm": 7.24277090641084, "learning_rate": 7.39459215733947e-07, "loss": 17.76, "step": 45404 }, { "epoch": 0.8299669146544318, "grad_norm": 8.092601034640387, "learning_rate": 7.393043009053036e-07, "loss": 17.6027, "step": 45405 }, { "epoch": 0.8299851938508783, "grad_norm": 5.093912877481131, "learning_rate": 7.391494010100281e-07, "loss": 16.9341, "step": 45406 }, { "epoch": 0.8300034730473248, "grad_norm": 5.517939473137148, "learning_rate": 7.389945160486651e-07, "loss": 16.8844, "step": 45407 }, { "epoch": 0.8300217522437714, "grad_norm": 6.89431014326837, "learning_rate": 7.388396460217556e-07, "loss": 17.4887, "step": 45408 }, { "epoch": 0.8300400314402179, "grad_norm": 7.05449772651169, "learning_rate": 7.386847909298433e-07, "loss": 17.8201, "step": 45409 }, { "epoch": 0.8300583106366645, "grad_norm": 6.304483045153025, "learning_rate": 7.385299507734723e-07, "loss": 17.2773, "step": 45410 }, { "epoch": 0.830076589833111, "grad_norm": 5.827248595158714, "learning_rate": 7.383751255531841e-07, "loss": 17.3164, "step": 45411 }, { "epoch": 0.8300948690295574, "grad_norm": 6.741251720897773, "learning_rate": 7.382203152695194e-07, "loss": 17.4782, "step": 45412 }, { "epoch": 0.830113148226004, "grad_norm": 5.861695509021081, "learning_rate": 7.380655199230247e-07, "loss": 16.9554, "step": 45413 }, { "epoch": 0.8301314274224505, "grad_norm": 5.6395832823483305, "learning_rate": 7.379107395142399e-07, "loss": 17.3231, "step": 45414 }, { "epoch": 0.8301497066188971, "grad_norm": 7.1819987519126105, "learning_rate": 7.377559740437073e-07, "loss": 17.5426, "step": 45415 }, { "epoch": 0.8301679858153436, "grad_norm": 6.222573053370846, "learning_rate": 7.3760122351197e-07, "loss": 17.0859, "step": 45416 }, { "epoch": 0.83018626501179, "grad_norm": 6.524894587574276, "learning_rate": 7.37446487919572e-07, "loss": 17.7395, "step": 45417 }, { "epoch": 0.8302045442082366, "grad_norm": 7.790973881421147, "learning_rate": 7.372917672670527e-07, "loss": 17.9055, "step": 45418 }, { "epoch": 0.8302228234046831, "grad_norm": 7.144583271567893, "learning_rate": 7.371370615549572e-07, "loss": 17.5373, "step": 45419 }, { "epoch": 0.8302411026011296, "grad_norm": 5.810348317425459, "learning_rate": 7.369823707838253e-07, "loss": 17.0898, "step": 45420 }, { "epoch": 0.8302593817975762, "grad_norm": 6.227306473745606, "learning_rate": 7.368276949542013e-07, "loss": 17.2343, "step": 45421 }, { "epoch": 0.8302776609940227, "grad_norm": 6.687922026078275, "learning_rate": 7.366730340666261e-07, "loss": 17.6019, "step": 45422 }, { "epoch": 0.8302959401904693, "grad_norm": 6.803451619485679, "learning_rate": 7.365183881216409e-07, "loss": 17.4077, "step": 45423 }, { "epoch": 0.8303142193869157, "grad_norm": 6.476943258334355, "learning_rate": 7.363637571197901e-07, "loss": 17.7634, "step": 45424 }, { "epoch": 0.8303324985833622, "grad_norm": 7.064998689246404, "learning_rate": 7.362091410616129e-07, "loss": 17.3271, "step": 45425 }, { "epoch": 0.8303507777798088, "grad_norm": 8.261232305081524, "learning_rate": 7.360545399476532e-07, "loss": 18.4455, "step": 45426 }, { "epoch": 0.8303690569762553, "grad_norm": 7.726445574930621, "learning_rate": 7.358999537784528e-07, "loss": 17.9278, "step": 45427 }, { "epoch": 0.8303873361727019, "grad_norm": 6.807318807416855, "learning_rate": 7.357453825545535e-07, "loss": 18.0947, "step": 45428 }, { "epoch": 0.8304056153691484, "grad_norm": 7.442282704000673, "learning_rate": 7.355908262764954e-07, "loss": 17.368, "step": 45429 }, { "epoch": 0.8304238945655948, "grad_norm": 6.396967431241386, "learning_rate": 7.354362849448222e-07, "loss": 17.4463, "step": 45430 }, { "epoch": 0.8304421737620414, "grad_norm": 4.627422907223866, "learning_rate": 7.352817585600752e-07, "loss": 16.8812, "step": 45431 }, { "epoch": 0.8304604529584879, "grad_norm": 5.691531067995908, "learning_rate": 7.351272471227944e-07, "loss": 16.9506, "step": 45432 }, { "epoch": 0.8304787321549345, "grad_norm": 5.692494573975327, "learning_rate": 7.349727506335236e-07, "loss": 17.0146, "step": 45433 }, { "epoch": 0.830497011351381, "grad_norm": 6.832143300210709, "learning_rate": 7.348182690928018e-07, "loss": 17.4006, "step": 45434 }, { "epoch": 0.8305152905478275, "grad_norm": 5.214212531786544, "learning_rate": 7.34663802501172e-07, "loss": 16.9905, "step": 45435 }, { "epoch": 0.8305335697442741, "grad_norm": 6.332887615259809, "learning_rate": 7.345093508591766e-07, "loss": 17.5669, "step": 45436 }, { "epoch": 0.8305518489407205, "grad_norm": 5.362050028754383, "learning_rate": 7.343549141673545e-07, "loss": 17.1504, "step": 45437 }, { "epoch": 0.8305701281371671, "grad_norm": 4.522900712448276, "learning_rate": 7.342004924262497e-07, "loss": 16.6383, "step": 45438 }, { "epoch": 0.8305884073336136, "grad_norm": 5.87302102641579, "learning_rate": 7.340460856364018e-07, "loss": 17.4078, "step": 45439 }, { "epoch": 0.8306066865300601, "grad_norm": 6.778532113821825, "learning_rate": 7.338916937983509e-07, "loss": 17.4334, "step": 45440 }, { "epoch": 0.8306249657265067, "grad_norm": 6.000413999362623, "learning_rate": 7.337373169126411e-07, "loss": 17.1522, "step": 45441 }, { "epoch": 0.8306432449229532, "grad_norm": 6.391431385029493, "learning_rate": 7.335829549798112e-07, "loss": 17.3473, "step": 45442 }, { "epoch": 0.8306615241193998, "grad_norm": 5.202289978717083, "learning_rate": 7.334286080004022e-07, "loss": 16.9748, "step": 45443 }, { "epoch": 0.8306798033158462, "grad_norm": 5.97661953994099, "learning_rate": 7.332742759749556e-07, "loss": 17.1191, "step": 45444 }, { "epoch": 0.8306980825122927, "grad_norm": 5.164025512449529, "learning_rate": 7.331199589040134e-07, "loss": 16.9181, "step": 45445 }, { "epoch": 0.8307163617087393, "grad_norm": 6.010261027681978, "learning_rate": 7.329656567881138e-07, "loss": 17.4968, "step": 45446 }, { "epoch": 0.8307346409051858, "grad_norm": 6.271098736306842, "learning_rate": 7.328113696278011e-07, "loss": 17.73, "step": 45447 }, { "epoch": 0.8307529201016324, "grad_norm": 5.737933938816491, "learning_rate": 7.32657097423613e-07, "loss": 17.0013, "step": 45448 }, { "epoch": 0.8307711992980789, "grad_norm": 5.007927600720019, "learning_rate": 7.325028401760925e-07, "loss": 16.943, "step": 45449 }, { "epoch": 0.8307894784945253, "grad_norm": 5.924052948989343, "learning_rate": 7.323485978857797e-07, "loss": 17.2026, "step": 45450 }, { "epoch": 0.8308077576909719, "grad_norm": 5.790343192988319, "learning_rate": 7.321943705532131e-07, "loss": 17.3823, "step": 45451 }, { "epoch": 0.8308260368874184, "grad_norm": 7.188028525731529, "learning_rate": 7.320401581789361e-07, "loss": 17.6644, "step": 45452 }, { "epoch": 0.830844316083865, "grad_norm": 4.977905143521507, "learning_rate": 7.31885960763487e-07, "loss": 16.9245, "step": 45453 }, { "epoch": 0.8308625952803115, "grad_norm": 7.56141301064406, "learning_rate": 7.317317783074068e-07, "loss": 17.8543, "step": 45454 }, { "epoch": 0.830880874476758, "grad_norm": 5.550859672019103, "learning_rate": 7.315776108112382e-07, "loss": 16.9421, "step": 45455 }, { "epoch": 0.8308991536732045, "grad_norm": 6.137393830315113, "learning_rate": 7.314234582755192e-07, "loss": 17.4324, "step": 45456 }, { "epoch": 0.830917432869651, "grad_norm": 8.405472264937128, "learning_rate": 7.312693207007893e-07, "loss": 18.3281, "step": 45457 }, { "epoch": 0.8309357120660976, "grad_norm": 8.956507367619793, "learning_rate": 7.311151980875913e-07, "loss": 17.8175, "step": 45458 }, { "epoch": 0.8309539912625441, "grad_norm": 7.307911738393746, "learning_rate": 7.309610904364645e-07, "loss": 17.4942, "step": 45459 }, { "epoch": 0.8309722704589906, "grad_norm": 5.455745353131403, "learning_rate": 7.308069977479471e-07, "loss": 17.1227, "step": 45460 }, { "epoch": 0.8309905496554372, "grad_norm": 6.627795561701388, "learning_rate": 7.306529200225821e-07, "loss": 17.6345, "step": 45461 }, { "epoch": 0.8310088288518837, "grad_norm": 5.781394708975292, "learning_rate": 7.304988572609067e-07, "loss": 17.2003, "step": 45462 }, { "epoch": 0.8310271080483302, "grad_norm": 6.351487206613307, "learning_rate": 7.303448094634624e-07, "loss": 17.5085, "step": 45463 }, { "epoch": 0.8310453872447767, "grad_norm": 6.40530608419423, "learning_rate": 7.301907766307898e-07, "loss": 17.3787, "step": 45464 }, { "epoch": 0.8310636664412232, "grad_norm": 7.793945408090808, "learning_rate": 7.300367587634278e-07, "loss": 17.643, "step": 45465 }, { "epoch": 0.8310819456376698, "grad_norm": 5.375667637010157, "learning_rate": 7.298827558619165e-07, "loss": 17.0444, "step": 45466 }, { "epoch": 0.8311002248341163, "grad_norm": 5.837920438539721, "learning_rate": 7.297287679267962e-07, "loss": 17.2464, "step": 45467 }, { "epoch": 0.8311185040305629, "grad_norm": 6.8388082155968055, "learning_rate": 7.295747949586052e-07, "loss": 18.0651, "step": 45468 }, { "epoch": 0.8311367832270093, "grad_norm": 7.895142817987474, "learning_rate": 7.294208369578842e-07, "loss": 17.7232, "step": 45469 }, { "epoch": 0.8311550624234558, "grad_norm": 7.1646633237, "learning_rate": 7.292668939251724e-07, "loss": 17.3763, "step": 45470 }, { "epoch": 0.8311733416199024, "grad_norm": 7.171668261109036, "learning_rate": 7.291129658610102e-07, "loss": 17.1281, "step": 45471 }, { "epoch": 0.8311916208163489, "grad_norm": 6.23783249915997, "learning_rate": 7.289590527659352e-07, "loss": 17.3784, "step": 45472 }, { "epoch": 0.8312099000127955, "grad_norm": 4.844084679816408, "learning_rate": 7.288051546404895e-07, "loss": 16.8851, "step": 45473 }, { "epoch": 0.831228179209242, "grad_norm": 6.77595638489562, "learning_rate": 7.286512714852095e-07, "loss": 17.1216, "step": 45474 }, { "epoch": 0.8312464584056884, "grad_norm": 4.667803359426782, "learning_rate": 7.284974033006376e-07, "loss": 16.6825, "step": 45475 }, { "epoch": 0.831264737602135, "grad_norm": 6.827756440524422, "learning_rate": 7.28343550087312e-07, "loss": 17.4086, "step": 45476 }, { "epoch": 0.8312830167985815, "grad_norm": 5.3333956982525255, "learning_rate": 7.281897118457698e-07, "loss": 17.05, "step": 45477 }, { "epoch": 0.8313012959950281, "grad_norm": 7.3718082536436995, "learning_rate": 7.280358885765537e-07, "loss": 17.7926, "step": 45478 }, { "epoch": 0.8313195751914746, "grad_norm": 5.551412384638514, "learning_rate": 7.278820802801995e-07, "loss": 17.2286, "step": 45479 }, { "epoch": 0.8313378543879211, "grad_norm": 6.441652463386571, "learning_rate": 7.277282869572483e-07, "loss": 17.4531, "step": 45480 }, { "epoch": 0.8313561335843677, "grad_norm": 6.347343648254377, "learning_rate": 7.275745086082392e-07, "loss": 17.2777, "step": 45481 }, { "epoch": 0.8313744127808141, "grad_norm": 5.246186112080639, "learning_rate": 7.274207452337101e-07, "loss": 17.0389, "step": 45482 }, { "epoch": 0.8313926919772607, "grad_norm": 7.744885351450444, "learning_rate": 7.272669968342017e-07, "loss": 17.7181, "step": 45483 }, { "epoch": 0.8314109711737072, "grad_norm": 7.445312104243644, "learning_rate": 7.271132634102512e-07, "loss": 17.3942, "step": 45484 }, { "epoch": 0.8314292503701537, "grad_norm": 5.907260639672198, "learning_rate": 7.269595449623973e-07, "loss": 17.2551, "step": 45485 }, { "epoch": 0.8314475295666003, "grad_norm": 5.567669468871346, "learning_rate": 7.268058414911805e-07, "loss": 17.1419, "step": 45486 }, { "epoch": 0.8314658087630468, "grad_norm": 6.915385930138104, "learning_rate": 7.266521529971382e-07, "loss": 17.779, "step": 45487 }, { "epoch": 0.8314840879594932, "grad_norm": 5.224499426973394, "learning_rate": 7.264984794808083e-07, "loss": 16.9659, "step": 45488 }, { "epoch": 0.8315023671559398, "grad_norm": 9.860628065598402, "learning_rate": 7.263448209427304e-07, "loss": 18.114, "step": 45489 }, { "epoch": 0.8315206463523863, "grad_norm": 6.088524084633017, "learning_rate": 7.26191177383444e-07, "loss": 16.8714, "step": 45490 }, { "epoch": 0.8315389255488329, "grad_norm": 6.111523724395721, "learning_rate": 7.260375488034855e-07, "loss": 17.1999, "step": 45491 }, { "epoch": 0.8315572047452794, "grad_norm": 5.2606667171136055, "learning_rate": 7.258839352033959e-07, "loss": 17.2436, "step": 45492 }, { "epoch": 0.8315754839417259, "grad_norm": 5.8825676134798535, "learning_rate": 7.25730336583712e-07, "loss": 17.2542, "step": 45493 }, { "epoch": 0.8315937631381725, "grad_norm": 6.192572790848515, "learning_rate": 7.255767529449714e-07, "loss": 17.3923, "step": 45494 }, { "epoch": 0.8316120423346189, "grad_norm": 7.650287313489268, "learning_rate": 7.254231842877146e-07, "loss": 17.8919, "step": 45495 }, { "epoch": 0.8316303215310655, "grad_norm": 6.022348208208675, "learning_rate": 7.252696306124774e-07, "loss": 17.2177, "step": 45496 }, { "epoch": 0.831648600727512, "grad_norm": 7.847840300323714, "learning_rate": 7.251160919198003e-07, "loss": 17.3863, "step": 45497 }, { "epoch": 0.8316668799239585, "grad_norm": 5.2813517667350585, "learning_rate": 7.249625682102191e-07, "loss": 16.8117, "step": 45498 }, { "epoch": 0.8316851591204051, "grad_norm": 4.467747822449547, "learning_rate": 7.248090594842733e-07, "loss": 16.8571, "step": 45499 }, { "epoch": 0.8317034383168516, "grad_norm": 7.040959522483943, "learning_rate": 7.246555657425014e-07, "loss": 17.3193, "step": 45500 }, { "epoch": 0.8317217175132982, "grad_norm": 7.13024258982522, "learning_rate": 7.245020869854414e-07, "loss": 17.3558, "step": 45501 }, { "epoch": 0.8317399967097446, "grad_norm": 6.121919095500222, "learning_rate": 7.243486232136288e-07, "loss": 17.5514, "step": 45502 }, { "epoch": 0.8317582759061911, "grad_norm": 5.817330617176895, "learning_rate": 7.241951744276049e-07, "loss": 17.189, "step": 45503 }, { "epoch": 0.8317765551026377, "grad_norm": 6.4657503926544235, "learning_rate": 7.24041740627906e-07, "loss": 17.3539, "step": 45504 }, { "epoch": 0.8317948342990842, "grad_norm": 7.10869492853799, "learning_rate": 7.238883218150683e-07, "loss": 17.5905, "step": 45505 }, { "epoch": 0.8318131134955308, "grad_norm": 6.604524319776053, "learning_rate": 7.237349179896319e-07, "loss": 17.6825, "step": 45506 }, { "epoch": 0.8318313926919773, "grad_norm": 6.4279597601010625, "learning_rate": 7.235815291521326e-07, "loss": 17.5728, "step": 45507 }, { "epoch": 0.8318496718884237, "grad_norm": 5.530729202302723, "learning_rate": 7.234281553031091e-07, "loss": 17.0098, "step": 45508 }, { "epoch": 0.8318679510848703, "grad_norm": 7.5825916283304435, "learning_rate": 7.232747964430997e-07, "loss": 17.6287, "step": 45509 }, { "epoch": 0.8318862302813168, "grad_norm": 5.1080836311307225, "learning_rate": 7.231214525726404e-07, "loss": 16.9048, "step": 45510 }, { "epoch": 0.8319045094777634, "grad_norm": 6.347404639353405, "learning_rate": 7.229681236922697e-07, "loss": 17.3519, "step": 45511 }, { "epoch": 0.8319227886742099, "grad_norm": 6.210631921725469, "learning_rate": 7.228148098025251e-07, "loss": 17.3029, "step": 45512 }, { "epoch": 0.8319410678706564, "grad_norm": 6.02815244911686, "learning_rate": 7.22661510903942e-07, "loss": 17.2852, "step": 45513 }, { "epoch": 0.831959347067103, "grad_norm": 7.836447416756578, "learning_rate": 7.225082269970606e-07, "loss": 17.6427, "step": 45514 }, { "epoch": 0.8319776262635494, "grad_norm": 6.341071111217109, "learning_rate": 7.223549580824162e-07, "loss": 17.4467, "step": 45515 }, { "epoch": 0.831995905459996, "grad_norm": 5.988103989532808, "learning_rate": 7.222017041605455e-07, "loss": 17.1249, "step": 45516 }, { "epoch": 0.8320141846564425, "grad_norm": 6.125637478547202, "learning_rate": 7.22048465231987e-07, "loss": 17.304, "step": 45517 }, { "epoch": 0.832032463852889, "grad_norm": 7.995213626830996, "learning_rate": 7.21895241297278e-07, "loss": 18.028, "step": 45518 }, { "epoch": 0.8320507430493356, "grad_norm": 6.996818631829929, "learning_rate": 7.217420323569541e-07, "loss": 17.7301, "step": 45519 }, { "epoch": 0.832069022245782, "grad_norm": 6.539683143372742, "learning_rate": 7.215888384115543e-07, "loss": 17.2928, "step": 45520 }, { "epoch": 0.8320873014422286, "grad_norm": 5.59296596463931, "learning_rate": 7.214356594616145e-07, "loss": 17.1097, "step": 45521 }, { "epoch": 0.8321055806386751, "grad_norm": 5.887935527264031, "learning_rate": 7.212824955076703e-07, "loss": 17.0554, "step": 45522 }, { "epoch": 0.8321238598351216, "grad_norm": 5.661117423638873, "learning_rate": 7.211293465502606e-07, "loss": 17.1349, "step": 45523 }, { "epoch": 0.8321421390315682, "grad_norm": 6.073877181454219, "learning_rate": 7.2097621258992e-07, "loss": 17.5473, "step": 45524 }, { "epoch": 0.8321604182280147, "grad_norm": 7.522289350407778, "learning_rate": 7.208230936271876e-07, "loss": 17.6684, "step": 45525 }, { "epoch": 0.8321786974244613, "grad_norm": 5.221138744401737, "learning_rate": 7.206699896625979e-07, "loss": 16.8206, "step": 45526 }, { "epoch": 0.8321969766209077, "grad_norm": 5.994994724773642, "learning_rate": 7.205169006966883e-07, "loss": 16.9815, "step": 45527 }, { "epoch": 0.8322152558173542, "grad_norm": 5.600073260088679, "learning_rate": 7.203638267299972e-07, "loss": 17.1338, "step": 45528 }, { "epoch": 0.8322335350138008, "grad_norm": 7.072077211308906, "learning_rate": 7.202107677630594e-07, "loss": 17.1867, "step": 45529 }, { "epoch": 0.8322518142102473, "grad_norm": 11.654506679980107, "learning_rate": 7.200577237964102e-07, "loss": 18.5712, "step": 45530 }, { "epoch": 0.8322700934066939, "grad_norm": 5.684105982119498, "learning_rate": 7.199046948305882e-07, "loss": 17.3081, "step": 45531 }, { "epoch": 0.8322883726031404, "grad_norm": 6.5110248732010305, "learning_rate": 7.197516808661292e-07, "loss": 17.2567, "step": 45532 }, { "epoch": 0.8323066517995868, "grad_norm": 7.212594413280365, "learning_rate": 7.195986819035678e-07, "loss": 17.754, "step": 45533 }, { "epoch": 0.8323249309960334, "grad_norm": 5.968300152848891, "learning_rate": 7.194456979434428e-07, "loss": 17.2022, "step": 45534 }, { "epoch": 0.8323432101924799, "grad_norm": 6.113363651837043, "learning_rate": 7.192927289862877e-07, "loss": 17.419, "step": 45535 }, { "epoch": 0.8323614893889265, "grad_norm": 6.333249375197212, "learning_rate": 7.191397750326406e-07, "loss": 17.0147, "step": 45536 }, { "epoch": 0.832379768585373, "grad_norm": 6.993247703803884, "learning_rate": 7.189868360830383e-07, "loss": 17.3864, "step": 45537 }, { "epoch": 0.8323980477818195, "grad_norm": 5.657200190075208, "learning_rate": 7.188339121380155e-07, "loss": 16.8189, "step": 45538 }, { "epoch": 0.8324163269782661, "grad_norm": 6.74738454424209, "learning_rate": 7.186810031981073e-07, "loss": 17.5067, "step": 45539 }, { "epoch": 0.8324346061747125, "grad_norm": 5.334784753175086, "learning_rate": 7.185281092638513e-07, "loss": 17.1137, "step": 45540 }, { "epoch": 0.8324528853711591, "grad_norm": 5.1356268493415085, "learning_rate": 7.183752303357822e-07, "loss": 16.9824, "step": 45541 }, { "epoch": 0.8324711645676056, "grad_norm": 7.052243656274345, "learning_rate": 7.182223664144372e-07, "loss": 17.2791, "step": 45542 }, { "epoch": 0.8324894437640521, "grad_norm": 10.93166279668593, "learning_rate": 7.180695175003504e-07, "loss": 18.1969, "step": 45543 }, { "epoch": 0.8325077229604987, "grad_norm": 6.350029082703895, "learning_rate": 7.179166835940594e-07, "loss": 17.2429, "step": 45544 }, { "epoch": 0.8325260021569452, "grad_norm": 5.283563588036403, "learning_rate": 7.177638646960971e-07, "loss": 16.9207, "step": 45545 }, { "epoch": 0.8325442813533918, "grad_norm": 5.369652762597938, "learning_rate": 7.176110608070025e-07, "loss": 16.6761, "step": 45546 }, { "epoch": 0.8325625605498382, "grad_norm": 5.535368262367738, "learning_rate": 7.174582719273087e-07, "loss": 17.1421, "step": 45547 }, { "epoch": 0.8325808397462847, "grad_norm": 6.236846493033784, "learning_rate": 7.173054980575527e-07, "loss": 17.0186, "step": 45548 }, { "epoch": 0.8325991189427313, "grad_norm": 8.039693122853798, "learning_rate": 7.171527391982696e-07, "loss": 17.972, "step": 45549 }, { "epoch": 0.8326173981391778, "grad_norm": 6.064109790268947, "learning_rate": 7.169999953499934e-07, "loss": 17.1734, "step": 45550 }, { "epoch": 0.8326356773356244, "grad_norm": 6.177758687855521, "learning_rate": 7.168472665132614e-07, "loss": 17.2626, "step": 45551 }, { "epoch": 0.8326539565320709, "grad_norm": 5.682609359602228, "learning_rate": 7.166945526886071e-07, "loss": 17.4773, "step": 45552 }, { "epoch": 0.8326722357285173, "grad_norm": 6.579243846680351, "learning_rate": 7.165418538765667e-07, "loss": 17.5986, "step": 45553 }, { "epoch": 0.8326905149249639, "grad_norm": 6.280716052421026, "learning_rate": 7.163891700776764e-07, "loss": 17.5076, "step": 45554 }, { "epoch": 0.8327087941214104, "grad_norm": 7.072047577985629, "learning_rate": 7.162365012924693e-07, "loss": 17.3528, "step": 45555 }, { "epoch": 0.8327270733178569, "grad_norm": 6.585811022538219, "learning_rate": 7.160838475214827e-07, "loss": 17.3964, "step": 45556 }, { "epoch": 0.8327453525143035, "grad_norm": 6.367741815866651, "learning_rate": 7.159312087652503e-07, "loss": 17.3157, "step": 45557 }, { "epoch": 0.83276363171075, "grad_norm": 6.242159786591897, "learning_rate": 7.15778585024306e-07, "loss": 17.3807, "step": 45558 }, { "epoch": 0.8327819109071966, "grad_norm": 6.368547096057285, "learning_rate": 7.156259762991874e-07, "loss": 17.5371, "step": 45559 }, { "epoch": 0.832800190103643, "grad_norm": 5.976961062525573, "learning_rate": 7.154733825904281e-07, "loss": 17.2727, "step": 45560 }, { "epoch": 0.8328184693000895, "grad_norm": 6.284960468563377, "learning_rate": 7.153208038985615e-07, "loss": 17.4903, "step": 45561 }, { "epoch": 0.8328367484965361, "grad_norm": 6.075551486580584, "learning_rate": 7.151682402241239e-07, "loss": 17.2039, "step": 45562 }, { "epoch": 0.8328550276929826, "grad_norm": 5.035517128198998, "learning_rate": 7.150156915676503e-07, "loss": 16.9938, "step": 45563 }, { "epoch": 0.8328733068894292, "grad_norm": 5.1518951749336965, "learning_rate": 7.148631579296745e-07, "loss": 16.9985, "step": 45564 }, { "epoch": 0.8328915860858757, "grad_norm": 9.553257592632812, "learning_rate": 7.147106393107323e-07, "loss": 17.8143, "step": 45565 }, { "epoch": 0.8329098652823221, "grad_norm": 6.9642317559348275, "learning_rate": 7.145581357113573e-07, "loss": 17.5027, "step": 45566 }, { "epoch": 0.8329281444787687, "grad_norm": 6.643358816697038, "learning_rate": 7.144056471320832e-07, "loss": 17.304, "step": 45567 }, { "epoch": 0.8329464236752152, "grad_norm": 6.319628558063594, "learning_rate": 7.142531735734464e-07, "loss": 17.3413, "step": 45568 }, { "epoch": 0.8329647028716618, "grad_norm": 7.319896217082409, "learning_rate": 7.141007150359808e-07, "loss": 17.9449, "step": 45569 }, { "epoch": 0.8329829820681083, "grad_norm": 5.327832352377716, "learning_rate": 7.139482715202183e-07, "loss": 17.0121, "step": 45570 }, { "epoch": 0.8330012612645548, "grad_norm": 6.623697032578442, "learning_rate": 7.137958430266962e-07, "loss": 17.4375, "step": 45571 }, { "epoch": 0.8330195404610014, "grad_norm": 6.708692876360125, "learning_rate": 7.136434295559469e-07, "loss": 17.7724, "step": 45572 }, { "epoch": 0.8330378196574478, "grad_norm": 7.408710065352379, "learning_rate": 7.134910311085069e-07, "loss": 17.9979, "step": 45573 }, { "epoch": 0.8330560988538944, "grad_norm": 6.799008650140308, "learning_rate": 7.133386476849085e-07, "loss": 17.3831, "step": 45574 }, { "epoch": 0.8330743780503409, "grad_norm": 4.794087670023852, "learning_rate": 7.131862792856859e-07, "loss": 16.753, "step": 45575 }, { "epoch": 0.8330926572467874, "grad_norm": 8.2325766879653, "learning_rate": 7.130339259113739e-07, "loss": 18.1268, "step": 45576 }, { "epoch": 0.833110936443234, "grad_norm": 5.953775501335799, "learning_rate": 7.128815875625056e-07, "loss": 17.2435, "step": 45577 }, { "epoch": 0.8331292156396805, "grad_norm": 5.673530127108424, "learning_rate": 7.12729264239615e-07, "loss": 17.2986, "step": 45578 }, { "epoch": 0.833147494836127, "grad_norm": 7.769646001671401, "learning_rate": 7.125769559432372e-07, "loss": 17.7225, "step": 45579 }, { "epoch": 0.8331657740325735, "grad_norm": 5.905656958283396, "learning_rate": 7.124246626739034e-07, "loss": 17.1236, "step": 45580 }, { "epoch": 0.83318405322902, "grad_norm": 6.9784669886254775, "learning_rate": 7.1227238443215e-07, "loss": 17.5345, "step": 45581 }, { "epoch": 0.8332023324254666, "grad_norm": 5.437826778597, "learning_rate": 7.121201212185102e-07, "loss": 17.1026, "step": 45582 }, { "epoch": 0.8332206116219131, "grad_norm": 6.125725934727711, "learning_rate": 7.119678730335178e-07, "loss": 17.2852, "step": 45583 }, { "epoch": 0.8332388908183597, "grad_norm": 6.8645896179949295, "learning_rate": 7.118156398777043e-07, "loss": 17.3357, "step": 45584 }, { "epoch": 0.8332571700148061, "grad_norm": 5.838068882448229, "learning_rate": 7.116634217516066e-07, "loss": 17.3301, "step": 45585 }, { "epoch": 0.8332754492112526, "grad_norm": 6.735809057927243, "learning_rate": 7.115112186557549e-07, "loss": 17.7444, "step": 45586 }, { "epoch": 0.8332937284076992, "grad_norm": 5.623785895589329, "learning_rate": 7.113590305906853e-07, "loss": 17.1359, "step": 45587 }, { "epoch": 0.8333120076041457, "grad_norm": 5.409963607103862, "learning_rate": 7.112068575569309e-07, "loss": 16.9249, "step": 45588 }, { "epoch": 0.8333302868005923, "grad_norm": 5.608663438746261, "learning_rate": 7.110546995550222e-07, "loss": 17.1732, "step": 45589 }, { "epoch": 0.8333485659970388, "grad_norm": 4.559664631066478, "learning_rate": 7.109025565854954e-07, "loss": 16.7237, "step": 45590 }, { "epoch": 0.8333668451934853, "grad_norm": 5.647973831013014, "learning_rate": 7.107504286488837e-07, "loss": 16.9768, "step": 45591 }, { "epoch": 0.8333851243899318, "grad_norm": 6.92725833171431, "learning_rate": 7.10598315745718e-07, "loss": 18.0537, "step": 45592 }, { "epoch": 0.8334034035863783, "grad_norm": 6.97987575499356, "learning_rate": 7.104462178765347e-07, "loss": 17.8656, "step": 45593 }, { "epoch": 0.8334216827828249, "grad_norm": 5.539309694499023, "learning_rate": 7.102941350418651e-07, "loss": 17.0133, "step": 45594 }, { "epoch": 0.8334399619792714, "grad_norm": 5.818657358226893, "learning_rate": 7.10142067242241e-07, "loss": 17.1154, "step": 45595 }, { "epoch": 0.8334582411757179, "grad_norm": 6.335048471018421, "learning_rate": 7.099900144781979e-07, "loss": 17.0651, "step": 45596 }, { "epoch": 0.8334765203721645, "grad_norm": 6.269544730228919, "learning_rate": 7.098379767502667e-07, "loss": 17.125, "step": 45597 }, { "epoch": 0.833494799568611, "grad_norm": 6.583394327178688, "learning_rate": 7.096859540589818e-07, "loss": 17.3999, "step": 45598 }, { "epoch": 0.8335130787650575, "grad_norm": 6.767488578993638, "learning_rate": 7.095339464048745e-07, "loss": 17.3121, "step": 45599 }, { "epoch": 0.833531357961504, "grad_norm": 6.379494041332311, "learning_rate": 7.093819537884794e-07, "loss": 17.3042, "step": 45600 }, { "epoch": 0.8335496371579505, "grad_norm": 7.373869800585389, "learning_rate": 7.092299762103272e-07, "loss": 17.8064, "step": 45601 }, { "epoch": 0.8335679163543971, "grad_norm": 13.679448048865778, "learning_rate": 7.090780136709525e-07, "loss": 17.8608, "step": 45602 }, { "epoch": 0.8335861955508436, "grad_norm": 6.685614320390333, "learning_rate": 7.089260661708857e-07, "loss": 17.6232, "step": 45603 }, { "epoch": 0.8336044747472902, "grad_norm": 7.184790589741724, "learning_rate": 7.087741337106624e-07, "loss": 17.6589, "step": 45604 }, { "epoch": 0.8336227539437366, "grad_norm": 6.93497061444473, "learning_rate": 7.08622216290813e-07, "loss": 17.6648, "step": 45605 }, { "epoch": 0.8336410331401831, "grad_norm": 7.061192768981377, "learning_rate": 7.084703139118693e-07, "loss": 17.4938, "step": 45606 }, { "epoch": 0.8336593123366297, "grad_norm": 5.722197879627565, "learning_rate": 7.083184265743648e-07, "loss": 17.2999, "step": 45607 }, { "epoch": 0.8336775915330762, "grad_norm": 6.400788336784283, "learning_rate": 7.081665542788329e-07, "loss": 17.4793, "step": 45608 }, { "epoch": 0.8336958707295228, "grad_norm": 5.200576069646507, "learning_rate": 7.080146970258034e-07, "loss": 17.0459, "step": 45609 }, { "epoch": 0.8337141499259693, "grad_norm": 5.619667607389894, "learning_rate": 7.078628548158117e-07, "loss": 17.115, "step": 45610 }, { "epoch": 0.8337324291224157, "grad_norm": 6.20237009054135, "learning_rate": 7.077110276493875e-07, "loss": 17.3399, "step": 45611 }, { "epoch": 0.8337507083188623, "grad_norm": 13.920562630436388, "learning_rate": 7.075592155270633e-07, "loss": 17.4316, "step": 45612 }, { "epoch": 0.8337689875153088, "grad_norm": 4.992165282301652, "learning_rate": 7.074074184493724e-07, "loss": 16.7619, "step": 45613 }, { "epoch": 0.8337872667117554, "grad_norm": 7.681860921550695, "learning_rate": 7.072556364168459e-07, "loss": 17.5121, "step": 45614 }, { "epoch": 0.8338055459082019, "grad_norm": 6.230119949008982, "learning_rate": 7.071038694300147e-07, "loss": 17.3117, "step": 45615 }, { "epoch": 0.8338238251046484, "grad_norm": 7.223484122748254, "learning_rate": 7.069521174894117e-07, "loss": 17.9015, "step": 45616 }, { "epoch": 0.833842104301095, "grad_norm": 5.767610339622668, "learning_rate": 7.068003805955698e-07, "loss": 17.0841, "step": 45617 }, { "epoch": 0.8338603834975414, "grad_norm": 6.216833623783879, "learning_rate": 7.066486587490201e-07, "loss": 17.2256, "step": 45618 }, { "epoch": 0.833878662693988, "grad_norm": 6.101761248993479, "learning_rate": 7.064969519502951e-07, "loss": 17.3977, "step": 45619 }, { "epoch": 0.8338969418904345, "grad_norm": 5.2225348648723084, "learning_rate": 7.063452601999243e-07, "loss": 17.1503, "step": 45620 }, { "epoch": 0.833915221086881, "grad_norm": 7.50622558120445, "learning_rate": 7.061935834984418e-07, "loss": 17.6618, "step": 45621 }, { "epoch": 0.8339335002833276, "grad_norm": 5.418834652187418, "learning_rate": 7.060419218463782e-07, "loss": 17.0785, "step": 45622 }, { "epoch": 0.8339517794797741, "grad_norm": 5.425988196151331, "learning_rate": 7.058902752442643e-07, "loss": 16.993, "step": 45623 }, { "epoch": 0.8339700586762205, "grad_norm": 7.298366456914916, "learning_rate": 7.057386436926333e-07, "loss": 17.8667, "step": 45624 }, { "epoch": 0.8339883378726671, "grad_norm": 6.038906309705532, "learning_rate": 7.055870271920146e-07, "loss": 17.1393, "step": 45625 }, { "epoch": 0.8340066170691136, "grad_norm": 7.153849532603674, "learning_rate": 7.054354257429402e-07, "loss": 17.7073, "step": 45626 }, { "epoch": 0.8340248962655602, "grad_norm": 5.185805462832907, "learning_rate": 7.052838393459438e-07, "loss": 17.0104, "step": 45627 }, { "epoch": 0.8340431754620067, "grad_norm": 5.972205288182978, "learning_rate": 7.051322680015549e-07, "loss": 17.3593, "step": 45628 }, { "epoch": 0.8340614546584532, "grad_norm": 6.045255572500874, "learning_rate": 7.04980711710303e-07, "loss": 17.5104, "step": 45629 }, { "epoch": 0.8340797338548998, "grad_norm": 5.753686164111749, "learning_rate": 7.048291704727223e-07, "loss": 17.201, "step": 45630 }, { "epoch": 0.8340980130513462, "grad_norm": 6.392910346192721, "learning_rate": 7.046776442893428e-07, "loss": 17.32, "step": 45631 }, { "epoch": 0.8341162922477928, "grad_norm": 7.015641312500983, "learning_rate": 7.045261331606946e-07, "loss": 17.6214, "step": 45632 }, { "epoch": 0.8341345714442393, "grad_norm": 6.501931284847211, "learning_rate": 7.0437463708731e-07, "loss": 17.6281, "step": 45633 }, { "epoch": 0.8341528506406858, "grad_norm": 4.530296693331973, "learning_rate": 7.042231560697188e-07, "loss": 16.642, "step": 45634 }, { "epoch": 0.8341711298371324, "grad_norm": 5.670383081429403, "learning_rate": 7.040716901084527e-07, "loss": 17.1171, "step": 45635 }, { "epoch": 0.8341894090335789, "grad_norm": 7.248874002232413, "learning_rate": 7.039202392040434e-07, "loss": 17.9652, "step": 45636 }, { "epoch": 0.8342076882300254, "grad_norm": 6.658546111787602, "learning_rate": 7.0376880335702e-07, "loss": 17.5519, "step": 45637 }, { "epoch": 0.8342259674264719, "grad_norm": 6.146516629855665, "learning_rate": 7.036173825679149e-07, "loss": 17.5246, "step": 45638 }, { "epoch": 0.8342442466229184, "grad_norm": 5.614440731610347, "learning_rate": 7.034659768372587e-07, "loss": 17.2283, "step": 45639 }, { "epoch": 0.834262525819365, "grad_norm": 5.58463897726801, "learning_rate": 7.033145861655799e-07, "loss": 17.1423, "step": 45640 }, { "epoch": 0.8342808050158115, "grad_norm": 5.607422353411015, "learning_rate": 7.031632105534114e-07, "loss": 17.2159, "step": 45641 }, { "epoch": 0.8342990842122581, "grad_norm": 5.371169576323955, "learning_rate": 7.030118500012833e-07, "loss": 17.0534, "step": 45642 }, { "epoch": 0.8343173634087045, "grad_norm": 6.0007430236299175, "learning_rate": 7.028605045097248e-07, "loss": 17.1891, "step": 45643 }, { "epoch": 0.834335642605151, "grad_norm": 7.504439785102201, "learning_rate": 7.027091740792675e-07, "loss": 17.5879, "step": 45644 }, { "epoch": 0.8343539218015976, "grad_norm": 5.829190333537065, "learning_rate": 7.025578587104426e-07, "loss": 17.3402, "step": 45645 }, { "epoch": 0.8343722009980441, "grad_norm": 8.47148807513426, "learning_rate": 7.024065584037782e-07, "loss": 18.3313, "step": 45646 }, { "epoch": 0.8343904801944907, "grad_norm": 6.015826138404177, "learning_rate": 7.022552731598071e-07, "loss": 17.0633, "step": 45647 }, { "epoch": 0.8344087593909372, "grad_norm": 6.638387128160278, "learning_rate": 7.021040029790571e-07, "loss": 17.5104, "step": 45648 }, { "epoch": 0.8344270385873837, "grad_norm": 6.417471675219512, "learning_rate": 7.019527478620608e-07, "loss": 17.4991, "step": 45649 }, { "epoch": 0.8344453177838302, "grad_norm": 6.881595531518993, "learning_rate": 7.018015078093477e-07, "loss": 17.4862, "step": 45650 }, { "epoch": 0.8344635969802767, "grad_norm": 6.801203707280221, "learning_rate": 7.016502828214455e-07, "loss": 17.4048, "step": 45651 }, { "epoch": 0.8344818761767233, "grad_norm": 5.372930163764449, "learning_rate": 7.014990728988875e-07, "loss": 16.8998, "step": 45652 }, { "epoch": 0.8345001553731698, "grad_norm": 5.23119930331543, "learning_rate": 7.013478780422007e-07, "loss": 17.0572, "step": 45653 }, { "epoch": 0.8345184345696163, "grad_norm": 7.16999076558812, "learning_rate": 7.01196698251917e-07, "loss": 17.9666, "step": 45654 }, { "epoch": 0.8345367137660629, "grad_norm": 10.148992881018485, "learning_rate": 7.010455335285671e-07, "loss": 17.792, "step": 45655 }, { "epoch": 0.8345549929625093, "grad_norm": 9.43593165055766, "learning_rate": 7.00894383872679e-07, "loss": 17.9905, "step": 45656 }, { "epoch": 0.8345732721589559, "grad_norm": 6.400376258549135, "learning_rate": 7.007432492847821e-07, "loss": 17.6638, "step": 45657 }, { "epoch": 0.8345915513554024, "grad_norm": 8.477610948494643, "learning_rate": 7.005921297654083e-07, "loss": 18.2137, "step": 45658 }, { "epoch": 0.8346098305518489, "grad_norm": 6.634183468581603, "learning_rate": 7.004410253150861e-07, "loss": 17.1994, "step": 45659 }, { "epoch": 0.8346281097482955, "grad_norm": 6.328011524102482, "learning_rate": 7.002899359343434e-07, "loss": 17.6961, "step": 45660 }, { "epoch": 0.834646388944742, "grad_norm": 5.142112238883655, "learning_rate": 7.001388616237132e-07, "loss": 17.0152, "step": 45661 }, { "epoch": 0.8346646681411886, "grad_norm": 6.8039440621877185, "learning_rate": 6.999878023837215e-07, "loss": 17.6136, "step": 45662 }, { "epoch": 0.834682947337635, "grad_norm": 6.643698196412258, "learning_rate": 6.998367582148996e-07, "loss": 17.3143, "step": 45663 }, { "epoch": 0.8347012265340815, "grad_norm": 5.620429650505924, "learning_rate": 6.996857291177778e-07, "loss": 17.2828, "step": 45664 }, { "epoch": 0.8347195057305281, "grad_norm": 7.304138620843471, "learning_rate": 6.995347150928833e-07, "loss": 17.5908, "step": 45665 }, { "epoch": 0.8347377849269746, "grad_norm": 5.939446588328827, "learning_rate": 6.993837161407474e-07, "loss": 17.1073, "step": 45666 }, { "epoch": 0.8347560641234212, "grad_norm": 6.88255238748373, "learning_rate": 6.992327322618986e-07, "loss": 17.7193, "step": 45667 }, { "epoch": 0.8347743433198677, "grad_norm": 5.773955768543832, "learning_rate": 6.990817634568647e-07, "loss": 16.9685, "step": 45668 }, { "epoch": 0.8347926225163141, "grad_norm": 8.215150400286307, "learning_rate": 6.989308097261771e-07, "loss": 17.9128, "step": 45669 }, { "epoch": 0.8348109017127607, "grad_norm": 8.839427193389902, "learning_rate": 6.98779871070363e-07, "loss": 17.5614, "step": 45670 }, { "epoch": 0.8348291809092072, "grad_norm": 5.964116359331482, "learning_rate": 6.986289474899515e-07, "loss": 17.3649, "step": 45671 }, { "epoch": 0.8348474601056538, "grad_norm": 7.459882030064017, "learning_rate": 6.984780389854739e-07, "loss": 17.7427, "step": 45672 }, { "epoch": 0.8348657393021003, "grad_norm": 7.978751081876348, "learning_rate": 6.983271455574575e-07, "loss": 17.7172, "step": 45673 }, { "epoch": 0.8348840184985468, "grad_norm": 6.931085951132104, "learning_rate": 6.981762672064296e-07, "loss": 17.1569, "step": 45674 }, { "epoch": 0.8349022976949934, "grad_norm": 6.980289271586568, "learning_rate": 6.98025403932922e-07, "loss": 17.5878, "step": 45675 }, { "epoch": 0.8349205768914398, "grad_norm": 6.844063853269846, "learning_rate": 6.978745557374622e-07, "loss": 17.4747, "step": 45676 }, { "epoch": 0.8349388560878864, "grad_norm": 6.449590344530306, "learning_rate": 6.977237226205774e-07, "loss": 17.1635, "step": 45677 }, { "epoch": 0.8349571352843329, "grad_norm": 6.300972455810875, "learning_rate": 6.975729045827989e-07, "loss": 17.4, "step": 45678 }, { "epoch": 0.8349754144807794, "grad_norm": 5.90285912828385, "learning_rate": 6.974221016246529e-07, "loss": 17.0974, "step": 45679 }, { "epoch": 0.834993693677226, "grad_norm": 6.41939038704514, "learning_rate": 6.972713137466692e-07, "loss": 17.7497, "step": 45680 }, { "epoch": 0.8350119728736725, "grad_norm": 6.551032871650111, "learning_rate": 6.97120540949377e-07, "loss": 17.2236, "step": 45681 }, { "epoch": 0.835030252070119, "grad_norm": 6.266104656224091, "learning_rate": 6.969697832333028e-07, "loss": 17.3609, "step": 45682 }, { "epoch": 0.8350485312665655, "grad_norm": 5.963954816864734, "learning_rate": 6.968190405989777e-07, "loss": 17.3594, "step": 45683 }, { "epoch": 0.835066810463012, "grad_norm": 6.319143030668403, "learning_rate": 6.966683130469281e-07, "loss": 17.2589, "step": 45684 }, { "epoch": 0.8350850896594586, "grad_norm": 7.095620083003079, "learning_rate": 6.965176005776814e-07, "loss": 17.2541, "step": 45685 }, { "epoch": 0.8351033688559051, "grad_norm": 6.3241092210090635, "learning_rate": 6.963669031917686e-07, "loss": 17.5149, "step": 45686 }, { "epoch": 0.8351216480523517, "grad_norm": 5.801003729217886, "learning_rate": 6.962162208897155e-07, "loss": 17.2811, "step": 45687 }, { "epoch": 0.8351399272487982, "grad_norm": 7.074432646036631, "learning_rate": 6.960655536720506e-07, "loss": 17.4153, "step": 45688 }, { "epoch": 0.8351582064452446, "grad_norm": 6.019934408997623, "learning_rate": 6.959149015393024e-07, "loss": 17.1639, "step": 45689 }, { "epoch": 0.8351764856416912, "grad_norm": 6.997310891137934, "learning_rate": 6.957642644920004e-07, "loss": 17.5799, "step": 45690 }, { "epoch": 0.8351947648381377, "grad_norm": 5.261652751346225, "learning_rate": 6.956136425306697e-07, "loss": 17.1317, "step": 45691 }, { "epoch": 0.8352130440345842, "grad_norm": 5.996361036689306, "learning_rate": 6.954630356558406e-07, "loss": 17.4529, "step": 45692 }, { "epoch": 0.8352313232310308, "grad_norm": 7.110315339799047, "learning_rate": 6.953124438680392e-07, "loss": 17.5302, "step": 45693 }, { "epoch": 0.8352496024274773, "grad_norm": 6.12756137273617, "learning_rate": 6.951618671677951e-07, "loss": 17.377, "step": 45694 }, { "epoch": 0.8352678816239238, "grad_norm": 5.661411886027962, "learning_rate": 6.950113055556351e-07, "loss": 17.0534, "step": 45695 }, { "epoch": 0.8352861608203703, "grad_norm": 5.1555894235533035, "learning_rate": 6.948607590320861e-07, "loss": 16.9475, "step": 45696 }, { "epoch": 0.8353044400168168, "grad_norm": 6.861669194899279, "learning_rate": 6.947102275976775e-07, "loss": 17.6977, "step": 45697 }, { "epoch": 0.8353227192132634, "grad_norm": 7.832595282657624, "learning_rate": 6.945597112529346e-07, "loss": 17.4624, "step": 45698 }, { "epoch": 0.8353409984097099, "grad_norm": 5.99394124865252, "learning_rate": 6.944092099983862e-07, "loss": 17.0372, "step": 45699 }, { "epoch": 0.8353592776061565, "grad_norm": 6.570843946947207, "learning_rate": 6.942587238345616e-07, "loss": 17.0598, "step": 45700 }, { "epoch": 0.835377556802603, "grad_norm": 6.629300879122426, "learning_rate": 6.941082527619858e-07, "loss": 17.3591, "step": 45701 }, { "epoch": 0.8353958359990494, "grad_norm": 4.833026788305855, "learning_rate": 6.939577967811867e-07, "loss": 16.8285, "step": 45702 }, { "epoch": 0.835414115195496, "grad_norm": 6.24715865110369, "learning_rate": 6.938073558926922e-07, "loss": 17.3699, "step": 45703 }, { "epoch": 0.8354323943919425, "grad_norm": 7.001908885089433, "learning_rate": 6.936569300970297e-07, "loss": 17.4633, "step": 45704 }, { "epoch": 0.8354506735883891, "grad_norm": 6.492987806729031, "learning_rate": 6.935065193947244e-07, "loss": 17.438, "step": 45705 }, { "epoch": 0.8354689527848356, "grad_norm": 5.506337523893767, "learning_rate": 6.933561237863068e-07, "loss": 17.138, "step": 45706 }, { "epoch": 0.835487231981282, "grad_norm": 6.312939283079961, "learning_rate": 6.932057432723005e-07, "loss": 17.4256, "step": 45707 }, { "epoch": 0.8355055111777286, "grad_norm": 6.25148046865372, "learning_rate": 6.93055377853235e-07, "loss": 17.1461, "step": 45708 }, { "epoch": 0.8355237903741751, "grad_norm": 6.153093951702967, "learning_rate": 6.929050275296373e-07, "loss": 17.4095, "step": 45709 }, { "epoch": 0.8355420695706217, "grad_norm": 6.485365734385407, "learning_rate": 6.927546923020328e-07, "loss": 17.6696, "step": 45710 }, { "epoch": 0.8355603487670682, "grad_norm": 6.853534707776645, "learning_rate": 6.926043721709502e-07, "loss": 17.5998, "step": 45711 }, { "epoch": 0.8355786279635147, "grad_norm": 5.663654184614391, "learning_rate": 6.924540671369156e-07, "loss": 17.261, "step": 45712 }, { "epoch": 0.8355969071599613, "grad_norm": 6.429845226786258, "learning_rate": 6.923037772004549e-07, "loss": 17.8032, "step": 45713 }, { "epoch": 0.8356151863564077, "grad_norm": 6.548190226094217, "learning_rate": 6.921535023620968e-07, "loss": 17.4349, "step": 45714 }, { "epoch": 0.8356334655528543, "grad_norm": 5.415805475408284, "learning_rate": 6.920032426223666e-07, "loss": 17.1118, "step": 45715 }, { "epoch": 0.8356517447493008, "grad_norm": 7.596053717338615, "learning_rate": 6.9185299798179e-07, "loss": 17.8535, "step": 45716 }, { "epoch": 0.8356700239457473, "grad_norm": 4.683617472212006, "learning_rate": 6.91702768440895e-07, "loss": 16.6543, "step": 45717 }, { "epoch": 0.8356883031421939, "grad_norm": 8.762494331451588, "learning_rate": 6.915525540002088e-07, "loss": 17.8921, "step": 45718 }, { "epoch": 0.8357065823386404, "grad_norm": 7.242859146385938, "learning_rate": 6.914023546602561e-07, "loss": 17.5012, "step": 45719 }, { "epoch": 0.835724861535087, "grad_norm": 5.485082032848951, "learning_rate": 6.912521704215652e-07, "loss": 16.8607, "step": 45720 }, { "epoch": 0.8357431407315334, "grad_norm": 5.716386003472621, "learning_rate": 6.911020012846619e-07, "loss": 17.0676, "step": 45721 }, { "epoch": 0.8357614199279799, "grad_norm": 5.26607323297799, "learning_rate": 6.909518472500709e-07, "loss": 17.0865, "step": 45722 }, { "epoch": 0.8357796991244265, "grad_norm": 8.210570333960801, "learning_rate": 6.908017083183205e-07, "loss": 18.5767, "step": 45723 }, { "epoch": 0.835797978320873, "grad_norm": 5.807826698282121, "learning_rate": 6.906515844899359e-07, "loss": 17.4373, "step": 45724 }, { "epoch": 0.8358162575173196, "grad_norm": 7.313316533395074, "learning_rate": 6.905014757654438e-07, "loss": 17.48, "step": 45725 }, { "epoch": 0.8358345367137661, "grad_norm": 8.205310526227299, "learning_rate": 6.903513821453694e-07, "loss": 17.6689, "step": 45726 }, { "epoch": 0.8358528159102125, "grad_norm": 5.291284062569855, "learning_rate": 6.90201303630239e-07, "loss": 16.9537, "step": 45727 }, { "epoch": 0.8358710951066591, "grad_norm": 6.924261013281747, "learning_rate": 6.900512402205812e-07, "loss": 17.354, "step": 45728 }, { "epoch": 0.8358893743031056, "grad_norm": 7.41094917237701, "learning_rate": 6.899011919169185e-07, "loss": 17.9625, "step": 45729 }, { "epoch": 0.8359076534995522, "grad_norm": 5.62025050897266, "learning_rate": 6.89751158719778e-07, "loss": 17.0718, "step": 45730 }, { "epoch": 0.8359259326959987, "grad_norm": 6.2465493462086235, "learning_rate": 6.896011406296859e-07, "loss": 17.4576, "step": 45731 }, { "epoch": 0.8359442118924452, "grad_norm": 5.713737000800887, "learning_rate": 6.894511376471686e-07, "loss": 17.0854, "step": 45732 }, { "epoch": 0.8359624910888918, "grad_norm": 5.530553242695255, "learning_rate": 6.893011497727498e-07, "loss": 17.0148, "step": 45733 }, { "epoch": 0.8359807702853382, "grad_norm": 7.3651325632777445, "learning_rate": 6.891511770069559e-07, "loss": 17.73, "step": 45734 }, { "epoch": 0.8359990494817848, "grad_norm": 6.048160190299956, "learning_rate": 6.890012193503143e-07, "loss": 17.3802, "step": 45735 }, { "epoch": 0.8360173286782313, "grad_norm": 6.300865525484217, "learning_rate": 6.888512768033484e-07, "loss": 17.2576, "step": 45736 }, { "epoch": 0.8360356078746778, "grad_norm": 5.759805031064425, "learning_rate": 6.887013493665856e-07, "loss": 17.1332, "step": 45737 }, { "epoch": 0.8360538870711244, "grad_norm": 4.802937355177475, "learning_rate": 6.885514370405505e-07, "loss": 16.9845, "step": 45738 }, { "epoch": 0.8360721662675709, "grad_norm": 5.979581966516466, "learning_rate": 6.884015398257677e-07, "loss": 17.4726, "step": 45739 }, { "epoch": 0.8360904454640175, "grad_norm": 6.315884177648242, "learning_rate": 6.882516577227638e-07, "loss": 17.2506, "step": 45740 }, { "epoch": 0.8361087246604639, "grad_norm": 5.784508183769952, "learning_rate": 6.881017907320631e-07, "loss": 17.1083, "step": 45741 }, { "epoch": 0.8361270038569104, "grad_norm": 5.667581687367581, "learning_rate": 6.879519388541928e-07, "loss": 17.1693, "step": 45742 }, { "epoch": 0.836145283053357, "grad_norm": 6.684348785531014, "learning_rate": 6.878021020896753e-07, "loss": 17.4519, "step": 45743 }, { "epoch": 0.8361635622498035, "grad_norm": 6.578705251896865, "learning_rate": 6.876522804390368e-07, "loss": 17.479, "step": 45744 }, { "epoch": 0.8361818414462501, "grad_norm": 5.917475829845474, "learning_rate": 6.875024739028046e-07, "loss": 17.3679, "step": 45745 }, { "epoch": 0.8362001206426966, "grad_norm": 5.782219439226522, "learning_rate": 6.873526824815019e-07, "loss": 17.2497, "step": 45746 }, { "epoch": 0.836218399839143, "grad_norm": 6.271157570974911, "learning_rate": 6.872029061756524e-07, "loss": 17.1159, "step": 45747 }, { "epoch": 0.8362366790355896, "grad_norm": 6.0085854071373355, "learning_rate": 6.870531449857836e-07, "loss": 17.3914, "step": 45748 }, { "epoch": 0.8362549582320361, "grad_norm": 7.538871056788169, "learning_rate": 6.869033989124196e-07, "loss": 17.9146, "step": 45749 }, { "epoch": 0.8362732374284827, "grad_norm": 6.551479483202649, "learning_rate": 6.867536679560838e-07, "loss": 17.477, "step": 45750 }, { "epoch": 0.8362915166249292, "grad_norm": 6.066853556701164, "learning_rate": 6.866039521173029e-07, "loss": 16.9745, "step": 45751 }, { "epoch": 0.8363097958213757, "grad_norm": 5.303878256580375, "learning_rate": 6.864542513966e-07, "loss": 17.2383, "step": 45752 }, { "epoch": 0.8363280750178222, "grad_norm": 5.17625828257831, "learning_rate": 6.863045657945005e-07, "loss": 17.0011, "step": 45753 }, { "epoch": 0.8363463542142687, "grad_norm": 8.58285148596623, "learning_rate": 6.861548953115304e-07, "loss": 18.1871, "step": 45754 }, { "epoch": 0.8363646334107153, "grad_norm": 4.992354485385739, "learning_rate": 6.86005239948212e-07, "loss": 16.7859, "step": 45755 }, { "epoch": 0.8363829126071618, "grad_norm": 6.316312589047929, "learning_rate": 6.858555997050714e-07, "loss": 17.6593, "step": 45756 }, { "epoch": 0.8364011918036083, "grad_norm": 6.191872002389667, "learning_rate": 6.857059745826333e-07, "loss": 17.4544, "step": 45757 }, { "epoch": 0.8364194710000549, "grad_norm": 6.251941129067334, "learning_rate": 6.855563645814201e-07, "loss": 17.532, "step": 45758 }, { "epoch": 0.8364377501965014, "grad_norm": 6.49550451582055, "learning_rate": 6.854067697019584e-07, "loss": 17.529, "step": 45759 }, { "epoch": 0.8364560293929478, "grad_norm": 5.963230413572197, "learning_rate": 6.852571899447713e-07, "loss": 16.9991, "step": 45760 }, { "epoch": 0.8364743085893944, "grad_norm": 6.909377425981421, "learning_rate": 6.851076253103828e-07, "loss": 17.4842, "step": 45761 }, { "epoch": 0.8364925877858409, "grad_norm": 6.200506245988481, "learning_rate": 6.849580757993174e-07, "loss": 17.3734, "step": 45762 }, { "epoch": 0.8365108669822875, "grad_norm": 5.302442267236541, "learning_rate": 6.848085414121003e-07, "loss": 16.9963, "step": 45763 }, { "epoch": 0.836529146178734, "grad_norm": 7.344068126145321, "learning_rate": 6.846590221492544e-07, "loss": 17.2518, "step": 45764 }, { "epoch": 0.8365474253751805, "grad_norm": 8.431250440694722, "learning_rate": 6.845095180113048e-07, "loss": 18.4089, "step": 45765 }, { "epoch": 0.836565704571627, "grad_norm": 5.359682379729987, "learning_rate": 6.843600289987751e-07, "loss": 17.0108, "step": 45766 }, { "epoch": 0.8365839837680735, "grad_norm": 5.583531706025806, "learning_rate": 6.842105551121875e-07, "loss": 17.1518, "step": 45767 }, { "epoch": 0.8366022629645201, "grad_norm": 5.707393002898396, "learning_rate": 6.840610963520689e-07, "loss": 16.9776, "step": 45768 }, { "epoch": 0.8366205421609666, "grad_norm": 6.543264301936389, "learning_rate": 6.83911652718941e-07, "loss": 17.1988, "step": 45769 }, { "epoch": 0.8366388213574131, "grad_norm": 6.7373653577507735, "learning_rate": 6.837622242133279e-07, "loss": 17.3948, "step": 45770 }, { "epoch": 0.8366571005538597, "grad_norm": 5.273318603476795, "learning_rate": 6.836128108357531e-07, "loss": 17.1159, "step": 45771 }, { "epoch": 0.8366753797503061, "grad_norm": 5.677728504314844, "learning_rate": 6.834634125867413e-07, "loss": 17.0715, "step": 45772 }, { "epoch": 0.8366936589467527, "grad_norm": 6.252559619451681, "learning_rate": 6.833140294668166e-07, "loss": 17.1476, "step": 45773 }, { "epoch": 0.8367119381431992, "grad_norm": 5.193458283934865, "learning_rate": 6.831646614765014e-07, "loss": 17.0306, "step": 45774 }, { "epoch": 0.8367302173396457, "grad_norm": 5.309012353257979, "learning_rate": 6.830153086163189e-07, "loss": 16.8792, "step": 45775 }, { "epoch": 0.8367484965360923, "grad_norm": 7.335769563758462, "learning_rate": 6.828659708867935e-07, "loss": 17.9777, "step": 45776 }, { "epoch": 0.8367667757325388, "grad_norm": 7.835120782871009, "learning_rate": 6.82716648288449e-07, "loss": 18.0413, "step": 45777 }, { "epoch": 0.8367850549289854, "grad_norm": 6.590560176555999, "learning_rate": 6.825673408218064e-07, "loss": 17.5494, "step": 45778 }, { "epoch": 0.8368033341254318, "grad_norm": 6.2034820599137825, "learning_rate": 6.824180484873916e-07, "loss": 17.3797, "step": 45779 }, { "epoch": 0.8368216133218783, "grad_norm": 6.127376135549413, "learning_rate": 6.822687712857263e-07, "loss": 17.5565, "step": 45780 }, { "epoch": 0.8368398925183249, "grad_norm": 7.370817456296447, "learning_rate": 6.821195092173339e-07, "loss": 18.0156, "step": 45781 }, { "epoch": 0.8368581717147714, "grad_norm": 7.167800309207591, "learning_rate": 6.81970262282739e-07, "loss": 17.5179, "step": 45782 }, { "epoch": 0.836876450911218, "grad_norm": 7.0636116502575765, "learning_rate": 6.818210304824641e-07, "loss": 17.6354, "step": 45783 }, { "epoch": 0.8368947301076645, "grad_norm": 7.5018483405643055, "learning_rate": 6.816718138170303e-07, "loss": 17.3334, "step": 45784 }, { "epoch": 0.8369130093041109, "grad_norm": 7.911612839045766, "learning_rate": 6.815226122869634e-07, "loss": 17.346, "step": 45785 }, { "epoch": 0.8369312885005575, "grad_norm": 5.896556044808349, "learning_rate": 6.813734258927834e-07, "loss": 17.1791, "step": 45786 }, { "epoch": 0.836949567697004, "grad_norm": 6.039324644843218, "learning_rate": 6.812242546350162e-07, "loss": 17.2057, "step": 45787 }, { "epoch": 0.8369678468934506, "grad_norm": 7.225818202842661, "learning_rate": 6.810750985141834e-07, "loss": 17.8047, "step": 45788 }, { "epoch": 0.8369861260898971, "grad_norm": 5.672884915882331, "learning_rate": 6.809259575308064e-07, "loss": 16.9147, "step": 45789 }, { "epoch": 0.8370044052863436, "grad_norm": 5.462836814925517, "learning_rate": 6.807768316854091e-07, "loss": 17.0775, "step": 45790 }, { "epoch": 0.8370226844827902, "grad_norm": 6.04741876950947, "learning_rate": 6.806277209785151e-07, "loss": 17.1762, "step": 45791 }, { "epoch": 0.8370409636792366, "grad_norm": 6.665260937129, "learning_rate": 6.804786254106449e-07, "loss": 17.3031, "step": 45792 }, { "epoch": 0.8370592428756832, "grad_norm": 5.1775841270541685, "learning_rate": 6.803295449823238e-07, "loss": 16.9456, "step": 45793 }, { "epoch": 0.8370775220721297, "grad_norm": 5.182950239030862, "learning_rate": 6.801804796940731e-07, "loss": 16.8467, "step": 45794 }, { "epoch": 0.8370958012685762, "grad_norm": 5.3366984203384105, "learning_rate": 6.800314295464134e-07, "loss": 16.8305, "step": 45795 }, { "epoch": 0.8371140804650228, "grad_norm": 7.821289900046686, "learning_rate": 6.798823945398697e-07, "loss": 18.0535, "step": 45796 }, { "epoch": 0.8371323596614693, "grad_norm": 6.095730034160655, "learning_rate": 6.797333746749624e-07, "loss": 17.2214, "step": 45797 }, { "epoch": 0.8371506388579159, "grad_norm": 7.004390367237883, "learning_rate": 6.795843699522142e-07, "loss": 17.2334, "step": 45798 }, { "epoch": 0.8371689180543623, "grad_norm": 5.680188421396865, "learning_rate": 6.794353803721499e-07, "loss": 16.9683, "step": 45799 }, { "epoch": 0.8371871972508088, "grad_norm": 5.812821303581399, "learning_rate": 6.792864059352888e-07, "loss": 17.1387, "step": 45800 }, { "epoch": 0.8372054764472554, "grad_norm": 7.21704204138036, "learning_rate": 6.791374466421535e-07, "loss": 17.5868, "step": 45801 }, { "epoch": 0.8372237556437019, "grad_norm": 5.407502131720111, "learning_rate": 6.789885024932674e-07, "loss": 16.8958, "step": 45802 }, { "epoch": 0.8372420348401485, "grad_norm": 6.762060709493603, "learning_rate": 6.7883957348915e-07, "loss": 17.5384, "step": 45803 }, { "epoch": 0.837260314036595, "grad_norm": 5.056851693553892, "learning_rate": 6.786906596303266e-07, "loss": 16.8616, "step": 45804 }, { "epoch": 0.8372785932330414, "grad_norm": 5.313283566326912, "learning_rate": 6.785417609173173e-07, "loss": 17.0288, "step": 45805 }, { "epoch": 0.837296872429488, "grad_norm": 5.1109069749907725, "learning_rate": 6.783928773506432e-07, "loss": 16.9092, "step": 45806 }, { "epoch": 0.8373151516259345, "grad_norm": 5.810528534226819, "learning_rate": 6.782440089308267e-07, "loss": 16.8188, "step": 45807 }, { "epoch": 0.8373334308223811, "grad_norm": 9.971111289492494, "learning_rate": 6.780951556583914e-07, "loss": 17.4226, "step": 45808 }, { "epoch": 0.8373517100188276, "grad_norm": 5.8129709773525216, "learning_rate": 6.779463175338563e-07, "loss": 17.2308, "step": 45809 }, { "epoch": 0.8373699892152741, "grad_norm": 6.358270536717649, "learning_rate": 6.777974945577454e-07, "loss": 17.2925, "step": 45810 }, { "epoch": 0.8373882684117206, "grad_norm": 5.729966011784139, "learning_rate": 6.776486867305792e-07, "loss": 17.2819, "step": 45811 }, { "epoch": 0.8374065476081671, "grad_norm": 6.630654799664847, "learning_rate": 6.774998940528782e-07, "loss": 17.5622, "step": 45812 }, { "epoch": 0.8374248268046137, "grad_norm": 5.94304928696741, "learning_rate": 6.773511165251661e-07, "loss": 17.0499, "step": 45813 }, { "epoch": 0.8374431060010602, "grad_norm": 5.220497436766198, "learning_rate": 6.772023541479633e-07, "loss": 16.9237, "step": 45814 }, { "epoch": 0.8374613851975067, "grad_norm": 8.554051749409908, "learning_rate": 6.770536069217897e-07, "loss": 18.3659, "step": 45815 }, { "epoch": 0.8374796643939533, "grad_norm": 7.211835774519065, "learning_rate": 6.769048748471685e-07, "loss": 17.4799, "step": 45816 }, { "epoch": 0.8374979435903998, "grad_norm": 7.1521720934707735, "learning_rate": 6.767561579246201e-07, "loss": 17.3896, "step": 45817 }, { "epoch": 0.8375162227868463, "grad_norm": 4.902569691081423, "learning_rate": 6.766074561546676e-07, "loss": 17.1331, "step": 45818 }, { "epoch": 0.8375345019832928, "grad_norm": 8.064786688196035, "learning_rate": 6.76458769537831e-07, "loss": 17.4635, "step": 45819 }, { "epoch": 0.8375527811797393, "grad_norm": 7.680401224167789, "learning_rate": 6.763100980746302e-07, "loss": 17.6782, "step": 45820 }, { "epoch": 0.8375710603761859, "grad_norm": 6.843696692022805, "learning_rate": 6.761614417655876e-07, "loss": 17.5833, "step": 45821 }, { "epoch": 0.8375893395726324, "grad_norm": 5.676206245847104, "learning_rate": 6.760128006112249e-07, "loss": 17.2325, "step": 45822 }, { "epoch": 0.837607618769079, "grad_norm": 5.464737995331248, "learning_rate": 6.758641746120603e-07, "loss": 17.2183, "step": 45823 }, { "epoch": 0.8376258979655254, "grad_norm": 6.691458802741574, "learning_rate": 6.75715563768618e-07, "loss": 17.6496, "step": 45824 }, { "epoch": 0.8376441771619719, "grad_norm": 5.936806354942111, "learning_rate": 6.755669680814165e-07, "loss": 17.107, "step": 45825 }, { "epoch": 0.8376624563584185, "grad_norm": 7.21132438104003, "learning_rate": 6.754183875509767e-07, "loss": 17.634, "step": 45826 }, { "epoch": 0.837680735554865, "grad_norm": 5.73025605164434, "learning_rate": 6.75269822177822e-07, "loss": 17.4088, "step": 45827 }, { "epoch": 0.8376990147513115, "grad_norm": 11.072679881885529, "learning_rate": 6.751212719624711e-07, "loss": 18.5335, "step": 45828 }, { "epoch": 0.8377172939477581, "grad_norm": 5.037649157012751, "learning_rate": 6.749727369054437e-07, "loss": 16.8951, "step": 45829 }, { "epoch": 0.8377355731442045, "grad_norm": 5.2625695051268435, "learning_rate": 6.748242170072627e-07, "loss": 16.8982, "step": 45830 }, { "epoch": 0.8377538523406511, "grad_norm": 8.880412623804059, "learning_rate": 6.746757122684478e-07, "loss": 18.746, "step": 45831 }, { "epoch": 0.8377721315370976, "grad_norm": 6.431998309935045, "learning_rate": 6.745272226895178e-07, "loss": 17.4898, "step": 45832 }, { "epoch": 0.8377904107335441, "grad_norm": 6.248618064187127, "learning_rate": 6.743787482709957e-07, "loss": 17.5558, "step": 45833 }, { "epoch": 0.8378086899299907, "grad_norm": 7.064298943523699, "learning_rate": 6.742302890133994e-07, "loss": 17.6517, "step": 45834 }, { "epoch": 0.8378269691264372, "grad_norm": 4.635015064341012, "learning_rate": 6.74081844917251e-07, "loss": 16.7549, "step": 45835 }, { "epoch": 0.8378452483228838, "grad_norm": 4.803271282984503, "learning_rate": 6.739334159830713e-07, "loss": 16.8737, "step": 45836 }, { "epoch": 0.8378635275193302, "grad_norm": 7.2834777994903765, "learning_rate": 6.737850022113785e-07, "loss": 17.6497, "step": 45837 }, { "epoch": 0.8378818067157767, "grad_norm": 7.148646991388837, "learning_rate": 6.736366036026942e-07, "loss": 17.7008, "step": 45838 }, { "epoch": 0.8379000859122233, "grad_norm": 5.59844163924044, "learning_rate": 6.734882201575394e-07, "loss": 17.1864, "step": 45839 }, { "epoch": 0.8379183651086698, "grad_norm": 5.895575657813707, "learning_rate": 6.733398518764311e-07, "loss": 17.2117, "step": 45840 }, { "epoch": 0.8379366443051164, "grad_norm": 5.777635883576827, "learning_rate": 6.731914987598926e-07, "loss": 17.1605, "step": 45841 }, { "epoch": 0.8379549235015629, "grad_norm": 7.623287090960176, "learning_rate": 6.73043160808442e-07, "loss": 17.8466, "step": 45842 }, { "epoch": 0.8379732026980093, "grad_norm": 6.09860759075564, "learning_rate": 6.728948380225986e-07, "loss": 17.0627, "step": 45843 }, { "epoch": 0.8379914818944559, "grad_norm": 5.825580835923183, "learning_rate": 6.727465304028835e-07, "loss": 17.3213, "step": 45844 }, { "epoch": 0.8380097610909024, "grad_norm": 8.165940495517178, "learning_rate": 6.725982379498175e-07, "loss": 17.2467, "step": 45845 }, { "epoch": 0.838028040287349, "grad_norm": 7.023686518313273, "learning_rate": 6.724499606639178e-07, "loss": 17.2294, "step": 45846 }, { "epoch": 0.8380463194837955, "grad_norm": 5.785289379328282, "learning_rate": 6.72301698545707e-07, "loss": 17.1143, "step": 45847 }, { "epoch": 0.838064598680242, "grad_norm": 6.251671830492884, "learning_rate": 6.721534515957018e-07, "loss": 17.6395, "step": 45848 }, { "epoch": 0.8380828778766886, "grad_norm": 6.191272331564688, "learning_rate": 6.720052198144245e-07, "loss": 17.1546, "step": 45849 }, { "epoch": 0.838101157073135, "grad_norm": 6.7079929083844245, "learning_rate": 6.71857003202393e-07, "loss": 17.2119, "step": 45850 }, { "epoch": 0.8381194362695816, "grad_norm": 5.352199078808045, "learning_rate": 6.717088017601264e-07, "loss": 16.9968, "step": 45851 }, { "epoch": 0.8381377154660281, "grad_norm": 5.848965524633451, "learning_rate": 6.715606154881454e-07, "loss": 17.013, "step": 45852 }, { "epoch": 0.8381559946624746, "grad_norm": 5.999067276366277, "learning_rate": 6.714124443869685e-07, "loss": 17.1527, "step": 45853 }, { "epoch": 0.8381742738589212, "grad_norm": 5.939294755650433, "learning_rate": 6.712642884571147e-07, "loss": 17.1534, "step": 45854 }, { "epoch": 0.8381925530553677, "grad_norm": 6.443040899956322, "learning_rate": 6.711161476991051e-07, "loss": 17.5213, "step": 45855 }, { "epoch": 0.8382108322518143, "grad_norm": 5.710018173089066, "learning_rate": 6.709680221134584e-07, "loss": 17.0923, "step": 45856 }, { "epoch": 0.8382291114482607, "grad_norm": 7.763923103874087, "learning_rate": 6.708199117006914e-07, "loss": 17.8009, "step": 45857 }, { "epoch": 0.8382473906447072, "grad_norm": 5.179048242417909, "learning_rate": 6.706718164613263e-07, "loss": 16.9013, "step": 45858 }, { "epoch": 0.8382656698411538, "grad_norm": 7.690373853249864, "learning_rate": 6.705237363958805e-07, "loss": 17.6041, "step": 45859 }, { "epoch": 0.8382839490376003, "grad_norm": 6.992803697893476, "learning_rate": 6.703756715048726e-07, "loss": 16.9737, "step": 45860 }, { "epoch": 0.8383022282340469, "grad_norm": 6.740641711520098, "learning_rate": 6.702276217888221e-07, "loss": 17.5328, "step": 45861 }, { "epoch": 0.8383205074304934, "grad_norm": 9.212562283076132, "learning_rate": 6.700795872482491e-07, "loss": 18.5225, "step": 45862 }, { "epoch": 0.8383387866269398, "grad_norm": 5.634708749687779, "learning_rate": 6.699315678836704e-07, "loss": 17.3339, "step": 45863 }, { "epoch": 0.8383570658233864, "grad_norm": 6.027498501707613, "learning_rate": 6.697835636956069e-07, "loss": 17.0582, "step": 45864 }, { "epoch": 0.8383753450198329, "grad_norm": 5.421064890846027, "learning_rate": 6.696355746845751e-07, "loss": 16.9753, "step": 45865 }, { "epoch": 0.8383936242162795, "grad_norm": 8.848306342579138, "learning_rate": 6.694876008510964e-07, "loss": 17.9401, "step": 45866 }, { "epoch": 0.838411903412726, "grad_norm": 6.348422393498123, "learning_rate": 6.693396421956872e-07, "loss": 17.4155, "step": 45867 }, { "epoch": 0.8384301826091725, "grad_norm": 5.374781968492089, "learning_rate": 6.691916987188657e-07, "loss": 17.0814, "step": 45868 }, { "epoch": 0.838448461805619, "grad_norm": 7.337634543091633, "learning_rate": 6.690437704211528e-07, "loss": 17.0587, "step": 45869 }, { "epoch": 0.8384667410020655, "grad_norm": 7.1443157242915145, "learning_rate": 6.688958573030647e-07, "loss": 17.71, "step": 45870 }, { "epoch": 0.8384850201985121, "grad_norm": 7.1796260090197785, "learning_rate": 6.687479593651208e-07, "loss": 17.5992, "step": 45871 }, { "epoch": 0.8385032993949586, "grad_norm": 13.001846819391117, "learning_rate": 6.686000766078405e-07, "loss": 17.6246, "step": 45872 }, { "epoch": 0.8385215785914051, "grad_norm": 5.651466745057956, "learning_rate": 6.684522090317408e-07, "loss": 16.9573, "step": 45873 }, { "epoch": 0.8385398577878517, "grad_norm": 5.37985181024429, "learning_rate": 6.683043566373393e-07, "loss": 17.4352, "step": 45874 }, { "epoch": 0.8385581369842982, "grad_norm": 4.556177272736568, "learning_rate": 6.681565194251561e-07, "loss": 16.823, "step": 45875 }, { "epoch": 0.8385764161807447, "grad_norm": 7.295451255800628, "learning_rate": 6.680086973957089e-07, "loss": 17.3292, "step": 45876 }, { "epoch": 0.8385946953771912, "grad_norm": 7.445660578723754, "learning_rate": 6.678608905495138e-07, "loss": 17.883, "step": 45877 }, { "epoch": 0.8386129745736377, "grad_norm": 4.348467287257627, "learning_rate": 6.677130988870911e-07, "loss": 16.7136, "step": 45878 }, { "epoch": 0.8386312537700843, "grad_norm": 5.744542565357656, "learning_rate": 6.675653224089573e-07, "loss": 17.4472, "step": 45879 }, { "epoch": 0.8386495329665308, "grad_norm": 12.035648772501366, "learning_rate": 6.674175611156313e-07, "loss": 17.7278, "step": 45880 }, { "epoch": 0.8386678121629774, "grad_norm": 6.279587093151583, "learning_rate": 6.672698150076318e-07, "loss": 17.4951, "step": 45881 }, { "epoch": 0.8386860913594238, "grad_norm": 7.787607054278559, "learning_rate": 6.671220840854747e-07, "loss": 17.7062, "step": 45882 }, { "epoch": 0.8387043705558703, "grad_norm": 6.155067623465717, "learning_rate": 6.669743683496794e-07, "loss": 17.5676, "step": 45883 }, { "epoch": 0.8387226497523169, "grad_norm": 8.488870318100359, "learning_rate": 6.668266678007629e-07, "loss": 18.2621, "step": 45884 }, { "epoch": 0.8387409289487634, "grad_norm": 5.413791027106278, "learning_rate": 6.666789824392423e-07, "loss": 17.2016, "step": 45885 }, { "epoch": 0.83875920814521, "grad_norm": 6.678346980513973, "learning_rate": 6.665313122656364e-07, "loss": 17.3057, "step": 45886 }, { "epoch": 0.8387774873416565, "grad_norm": 6.604140989040505, "learning_rate": 6.663836572804628e-07, "loss": 17.5949, "step": 45887 }, { "epoch": 0.838795766538103, "grad_norm": 6.621534105961092, "learning_rate": 6.66236017484237e-07, "loss": 17.4989, "step": 45888 }, { "epoch": 0.8388140457345495, "grad_norm": 6.331708590240893, "learning_rate": 6.660883928774775e-07, "loss": 17.4496, "step": 45889 }, { "epoch": 0.838832324930996, "grad_norm": 6.742618413161166, "learning_rate": 6.659407834607034e-07, "loss": 17.5704, "step": 45890 }, { "epoch": 0.8388506041274426, "grad_norm": 4.207879156524284, "learning_rate": 6.657931892344299e-07, "loss": 16.6224, "step": 45891 }, { "epoch": 0.8388688833238891, "grad_norm": 8.105992360416817, "learning_rate": 6.656456101991765e-07, "loss": 17.8815, "step": 45892 }, { "epoch": 0.8388871625203356, "grad_norm": 5.1333180178220985, "learning_rate": 6.654980463554578e-07, "loss": 16.986, "step": 45893 }, { "epoch": 0.8389054417167822, "grad_norm": 6.078570885670681, "learning_rate": 6.653504977037933e-07, "loss": 17.1957, "step": 45894 }, { "epoch": 0.8389237209132286, "grad_norm": 5.54017787071461, "learning_rate": 6.652029642446989e-07, "loss": 17.0044, "step": 45895 }, { "epoch": 0.8389420001096751, "grad_norm": 9.107590529664941, "learning_rate": 6.650554459786912e-07, "loss": 17.082, "step": 45896 }, { "epoch": 0.8389602793061217, "grad_norm": 6.3485717527476435, "learning_rate": 6.649079429062893e-07, "loss": 17.3586, "step": 45897 }, { "epoch": 0.8389785585025682, "grad_norm": 4.887286587285564, "learning_rate": 6.647604550280073e-07, "loss": 16.9371, "step": 45898 }, { "epoch": 0.8389968376990148, "grad_norm": 5.4169321035238065, "learning_rate": 6.646129823443642e-07, "loss": 17.1304, "step": 45899 }, { "epoch": 0.8390151168954613, "grad_norm": 7.618054458661021, "learning_rate": 6.644655248558767e-07, "loss": 17.5384, "step": 45900 }, { "epoch": 0.8390333960919077, "grad_norm": 6.173059352026193, "learning_rate": 6.643180825630624e-07, "loss": 17.2954, "step": 45901 }, { "epoch": 0.8390516752883543, "grad_norm": 7.343873109791315, "learning_rate": 6.641706554664351e-07, "loss": 17.92, "step": 45902 }, { "epoch": 0.8390699544848008, "grad_norm": 6.890889815819189, "learning_rate": 6.640232435665145e-07, "loss": 17.21, "step": 45903 }, { "epoch": 0.8390882336812474, "grad_norm": 7.2345352941153065, "learning_rate": 6.638758468638168e-07, "loss": 17.7825, "step": 45904 }, { "epoch": 0.8391065128776939, "grad_norm": 6.9117092011965955, "learning_rate": 6.637284653588566e-07, "loss": 17.9836, "step": 45905 }, { "epoch": 0.8391247920741404, "grad_norm": 7.176183519316779, "learning_rate": 6.635810990521524e-07, "loss": 17.4793, "step": 45906 }, { "epoch": 0.839143071270587, "grad_norm": 5.671565514759011, "learning_rate": 6.634337479442199e-07, "loss": 17.2386, "step": 45907 }, { "epoch": 0.8391613504670334, "grad_norm": 8.819869311631331, "learning_rate": 6.632864120355753e-07, "loss": 18.3188, "step": 45908 }, { "epoch": 0.83917962966348, "grad_norm": 7.11353159503861, "learning_rate": 6.631390913267366e-07, "loss": 17.2865, "step": 45909 }, { "epoch": 0.8391979088599265, "grad_norm": 5.700237288366894, "learning_rate": 6.629917858182177e-07, "loss": 17.0583, "step": 45910 }, { "epoch": 0.839216188056373, "grad_norm": 5.675272492970938, "learning_rate": 6.628444955105379e-07, "loss": 16.9068, "step": 45911 }, { "epoch": 0.8392344672528196, "grad_norm": 6.498425825051021, "learning_rate": 6.626972204042114e-07, "loss": 17.5261, "step": 45912 }, { "epoch": 0.8392527464492661, "grad_norm": 5.488198276408299, "learning_rate": 6.625499604997543e-07, "loss": 16.9516, "step": 45913 }, { "epoch": 0.8392710256457127, "grad_norm": 5.4930451223909875, "learning_rate": 6.624027157976836e-07, "loss": 16.8619, "step": 45914 }, { "epoch": 0.8392893048421591, "grad_norm": 6.507901430606136, "learning_rate": 6.622554862985142e-07, "loss": 17.5063, "step": 45915 }, { "epoch": 0.8393075840386056, "grad_norm": 6.230666801469831, "learning_rate": 6.621082720027639e-07, "loss": 17.2786, "step": 45916 }, { "epoch": 0.8393258632350522, "grad_norm": 6.189243901679075, "learning_rate": 6.619610729109466e-07, "loss": 17.3444, "step": 45917 }, { "epoch": 0.8393441424314987, "grad_norm": 5.633617660309885, "learning_rate": 6.618138890235804e-07, "loss": 16.8237, "step": 45918 }, { "epoch": 0.8393624216279453, "grad_norm": 6.841239056936122, "learning_rate": 6.616667203411792e-07, "loss": 17.3405, "step": 45919 }, { "epoch": 0.8393807008243918, "grad_norm": 6.804250489653411, "learning_rate": 6.615195668642604e-07, "loss": 17.6903, "step": 45920 }, { "epoch": 0.8393989800208382, "grad_norm": 5.091502651474757, "learning_rate": 6.613724285933392e-07, "loss": 16.9561, "step": 45921 }, { "epoch": 0.8394172592172848, "grad_norm": 6.786912938244954, "learning_rate": 6.612253055289297e-07, "loss": 17.25, "step": 45922 }, { "epoch": 0.8394355384137313, "grad_norm": 6.895706813109, "learning_rate": 6.610781976715508e-07, "loss": 17.4856, "step": 45923 }, { "epoch": 0.8394538176101779, "grad_norm": 6.118946728634936, "learning_rate": 6.609311050217149e-07, "loss": 17.1766, "step": 45924 }, { "epoch": 0.8394720968066244, "grad_norm": 8.000693591123591, "learning_rate": 6.607840275799388e-07, "loss": 18.1022, "step": 45925 }, { "epoch": 0.8394903760030709, "grad_norm": 6.538592789212072, "learning_rate": 6.606369653467393e-07, "loss": 17.452, "step": 45926 }, { "epoch": 0.8395086551995175, "grad_norm": 6.338973533765623, "learning_rate": 6.604899183226299e-07, "loss": 17.3654, "step": 45927 }, { "epoch": 0.8395269343959639, "grad_norm": 5.106657505533443, "learning_rate": 6.603428865081274e-07, "loss": 16.9609, "step": 45928 }, { "epoch": 0.8395452135924105, "grad_norm": 6.9324370073699555, "learning_rate": 6.601958699037463e-07, "loss": 17.5998, "step": 45929 }, { "epoch": 0.839563492788857, "grad_norm": 5.169703482728586, "learning_rate": 6.600488685100015e-07, "loss": 16.8841, "step": 45930 }, { "epoch": 0.8395817719853035, "grad_norm": 7.925028683946927, "learning_rate": 6.599018823274101e-07, "loss": 17.545, "step": 45931 }, { "epoch": 0.8396000511817501, "grad_norm": 7.039255235200828, "learning_rate": 6.597549113564855e-07, "loss": 17.2827, "step": 45932 }, { "epoch": 0.8396183303781966, "grad_norm": 5.916809621463437, "learning_rate": 6.596079555977425e-07, "loss": 17.2547, "step": 45933 }, { "epoch": 0.8396366095746431, "grad_norm": 7.063939175914281, "learning_rate": 6.594610150516967e-07, "loss": 17.6216, "step": 45934 }, { "epoch": 0.8396548887710896, "grad_norm": 7.074623126674788, "learning_rate": 6.59314089718865e-07, "loss": 17.8286, "step": 45935 }, { "epoch": 0.8396731679675361, "grad_norm": 7.022271351194671, "learning_rate": 6.59167179599759e-07, "loss": 17.7675, "step": 45936 }, { "epoch": 0.8396914471639827, "grad_norm": 5.64478276912772, "learning_rate": 6.590202846948968e-07, "loss": 17.1234, "step": 45937 }, { "epoch": 0.8397097263604292, "grad_norm": 4.671447219055741, "learning_rate": 6.588734050047918e-07, "loss": 16.7582, "step": 45938 }, { "epoch": 0.8397280055568758, "grad_norm": 6.433078738023635, "learning_rate": 6.587265405299576e-07, "loss": 17.5148, "step": 45939 }, { "epoch": 0.8397462847533222, "grad_norm": 7.477159641895511, "learning_rate": 6.585796912709114e-07, "loss": 18.1221, "step": 45940 }, { "epoch": 0.8397645639497687, "grad_norm": 7.125436318188445, "learning_rate": 6.584328572281657e-07, "loss": 17.77, "step": 45941 }, { "epoch": 0.8397828431462153, "grad_norm": 8.051605092721177, "learning_rate": 6.582860384022372e-07, "loss": 17.9467, "step": 45942 }, { "epoch": 0.8398011223426618, "grad_norm": 5.2093621677828725, "learning_rate": 6.581392347936377e-07, "loss": 17.1225, "step": 45943 }, { "epoch": 0.8398194015391084, "grad_norm": 6.071258156495503, "learning_rate": 6.579924464028836e-07, "loss": 17.1, "step": 45944 }, { "epoch": 0.8398376807355549, "grad_norm": 6.179579599667156, "learning_rate": 6.578456732304905e-07, "loss": 17.3218, "step": 45945 }, { "epoch": 0.8398559599320014, "grad_norm": 6.514030152202923, "learning_rate": 6.576989152769719e-07, "loss": 17.2531, "step": 45946 }, { "epoch": 0.8398742391284479, "grad_norm": 5.955479861641738, "learning_rate": 6.575521725428402e-07, "loss": 17.1943, "step": 45947 }, { "epoch": 0.8398925183248944, "grad_norm": 5.4891895339478625, "learning_rate": 6.574054450286121e-07, "loss": 17.2261, "step": 45948 }, { "epoch": 0.839910797521341, "grad_norm": 6.613098748129645, "learning_rate": 6.572587327348018e-07, "loss": 17.2149, "step": 45949 }, { "epoch": 0.8399290767177875, "grad_norm": 7.490178749088429, "learning_rate": 6.571120356619215e-07, "loss": 17.402, "step": 45950 }, { "epoch": 0.839947355914234, "grad_norm": 5.944445328993444, "learning_rate": 6.569653538104875e-07, "loss": 17.1757, "step": 45951 }, { "epoch": 0.8399656351106806, "grad_norm": 8.119150153766704, "learning_rate": 6.568186871810122e-07, "loss": 18.177, "step": 45952 }, { "epoch": 0.839983914307127, "grad_norm": 6.109035123254798, "learning_rate": 6.566720357740103e-07, "loss": 17.31, "step": 45953 }, { "epoch": 0.8400021935035736, "grad_norm": 7.4084827572066425, "learning_rate": 6.565253995899973e-07, "loss": 17.8889, "step": 45954 }, { "epoch": 0.8400204727000201, "grad_norm": 5.560138393202465, "learning_rate": 6.563787786294845e-07, "loss": 16.9848, "step": 45955 }, { "epoch": 0.8400387518964666, "grad_norm": 5.527459031038974, "learning_rate": 6.562321728929888e-07, "loss": 16.8935, "step": 45956 }, { "epoch": 0.8400570310929132, "grad_norm": 6.93471824769782, "learning_rate": 6.56085582381022e-07, "loss": 17.6903, "step": 45957 }, { "epoch": 0.8400753102893597, "grad_norm": 7.574711532494878, "learning_rate": 6.559390070940969e-07, "loss": 17.6168, "step": 45958 }, { "epoch": 0.8400935894858063, "grad_norm": 14.107636189755395, "learning_rate": 6.557924470327304e-07, "loss": 18.83, "step": 45959 }, { "epoch": 0.8401118686822527, "grad_norm": 6.3645348504018235, "learning_rate": 6.556459021974337e-07, "loss": 17.5091, "step": 45960 }, { "epoch": 0.8401301478786992, "grad_norm": 4.814001712777441, "learning_rate": 6.554993725887205e-07, "loss": 16.7761, "step": 45961 }, { "epoch": 0.8401484270751458, "grad_norm": 5.678432936843382, "learning_rate": 6.553528582071051e-07, "loss": 16.795, "step": 45962 }, { "epoch": 0.8401667062715923, "grad_norm": 7.335275182527162, "learning_rate": 6.552063590531016e-07, "loss": 17.9574, "step": 45963 }, { "epoch": 0.8401849854680388, "grad_norm": 8.399426118536185, "learning_rate": 6.550598751272219e-07, "loss": 18.0801, "step": 45964 }, { "epoch": 0.8402032646644854, "grad_norm": 5.5645961696586115, "learning_rate": 6.549134064299817e-07, "loss": 17.0262, "step": 45965 }, { "epoch": 0.8402215438609318, "grad_norm": 4.919806946498531, "learning_rate": 6.547669529618921e-07, "loss": 16.9105, "step": 45966 }, { "epoch": 0.8402398230573784, "grad_norm": 6.357061011502586, "learning_rate": 6.546205147234669e-07, "loss": 17.5892, "step": 45967 }, { "epoch": 0.8402581022538249, "grad_norm": 6.20647749583773, "learning_rate": 6.544740917152209e-07, "loss": 17.3405, "step": 45968 }, { "epoch": 0.8402763814502714, "grad_norm": 6.28015110815981, "learning_rate": 6.543276839376655e-07, "loss": 17.3308, "step": 45969 }, { "epoch": 0.840294660646718, "grad_norm": 6.203329388313725, "learning_rate": 6.541812913913142e-07, "loss": 17.5105, "step": 45970 }, { "epoch": 0.8403129398431645, "grad_norm": 7.257668136578101, "learning_rate": 6.5403491407668e-07, "loss": 17.7737, "step": 45971 }, { "epoch": 0.840331219039611, "grad_norm": 5.508592511358306, "learning_rate": 6.538885519942762e-07, "loss": 17.0826, "step": 45972 }, { "epoch": 0.8403494982360575, "grad_norm": 5.434901115027419, "learning_rate": 6.537422051446169e-07, "loss": 16.8534, "step": 45973 }, { "epoch": 0.840367777432504, "grad_norm": 6.796262209241343, "learning_rate": 6.535958735282138e-07, "loss": 17.2964, "step": 45974 }, { "epoch": 0.8403860566289506, "grad_norm": 6.294183164674374, "learning_rate": 6.534495571455796e-07, "loss": 17.288, "step": 45975 }, { "epoch": 0.8404043358253971, "grad_norm": 6.18423400904535, "learning_rate": 6.53303255997228e-07, "loss": 17.3191, "step": 45976 }, { "epoch": 0.8404226150218437, "grad_norm": 6.653528552967247, "learning_rate": 6.531569700836715e-07, "loss": 17.2411, "step": 45977 }, { "epoch": 0.8404408942182902, "grad_norm": 6.226792823909636, "learning_rate": 6.530106994054214e-07, "loss": 17.2799, "step": 45978 }, { "epoch": 0.8404591734147366, "grad_norm": 6.5816723955423395, "learning_rate": 6.528644439629927e-07, "loss": 17.7287, "step": 45979 }, { "epoch": 0.8404774526111832, "grad_norm": 5.89411356613498, "learning_rate": 6.527182037568963e-07, "loss": 17.1092, "step": 45980 }, { "epoch": 0.8404957318076297, "grad_norm": 6.862157167128133, "learning_rate": 6.52571978787645e-07, "loss": 17.5864, "step": 45981 }, { "epoch": 0.8405140110040763, "grad_norm": 7.983814643935499, "learning_rate": 6.524257690557529e-07, "loss": 18.257, "step": 45982 }, { "epoch": 0.8405322902005228, "grad_norm": 6.070262389644991, "learning_rate": 6.522795745617306e-07, "loss": 17.2472, "step": 45983 }, { "epoch": 0.8405505693969693, "grad_norm": 5.327344426407135, "learning_rate": 6.521333953060905e-07, "loss": 17.0927, "step": 45984 }, { "epoch": 0.8405688485934159, "grad_norm": 6.044841329859329, "learning_rate": 6.519872312893466e-07, "loss": 17.2887, "step": 45985 }, { "epoch": 0.8405871277898623, "grad_norm": 6.164201629417903, "learning_rate": 6.518410825120092e-07, "loss": 17.296, "step": 45986 }, { "epoch": 0.8406054069863089, "grad_norm": 7.060537586462984, "learning_rate": 6.516949489745928e-07, "loss": 17.2681, "step": 45987 }, { "epoch": 0.8406236861827554, "grad_norm": 5.5430811749694335, "learning_rate": 6.515488306776064e-07, "loss": 17.0222, "step": 45988 }, { "epoch": 0.8406419653792019, "grad_norm": 5.491455023623495, "learning_rate": 6.514027276215657e-07, "loss": 17.181, "step": 45989 }, { "epoch": 0.8406602445756485, "grad_norm": 5.605809216044385, "learning_rate": 6.512566398069797e-07, "loss": 17.0408, "step": 45990 }, { "epoch": 0.840678523772095, "grad_norm": 5.601958556486469, "learning_rate": 6.511105672343631e-07, "loss": 17.0041, "step": 45991 }, { "epoch": 0.8406968029685415, "grad_norm": 5.611218795429521, "learning_rate": 6.509645099042256e-07, "loss": 17.1422, "step": 45992 }, { "epoch": 0.840715082164988, "grad_norm": 6.366920863550938, "learning_rate": 6.50818467817081e-07, "loss": 17.4396, "step": 45993 }, { "epoch": 0.8407333613614345, "grad_norm": 5.4691217812940565, "learning_rate": 6.506724409734405e-07, "loss": 16.9626, "step": 45994 }, { "epoch": 0.8407516405578811, "grad_norm": 6.94929378470732, "learning_rate": 6.50526429373814e-07, "loss": 17.2472, "step": 45995 }, { "epoch": 0.8407699197543276, "grad_norm": 8.127758855432162, "learning_rate": 6.503804330187164e-07, "loss": 17.677, "step": 45996 }, { "epoch": 0.8407881989507742, "grad_norm": 5.573603078037782, "learning_rate": 6.502344519086568e-07, "loss": 17.1319, "step": 45997 }, { "epoch": 0.8408064781472206, "grad_norm": 6.261329442705966, "learning_rate": 6.500884860441481e-07, "loss": 17.1905, "step": 45998 }, { "epoch": 0.8408247573436671, "grad_norm": 5.746583852685997, "learning_rate": 6.499425354257027e-07, "loss": 17.117, "step": 45999 }, { "epoch": 0.8408430365401137, "grad_norm": 5.959511352060649, "learning_rate": 6.497966000538313e-07, "loss": 17.3429, "step": 46000 }, { "epoch": 0.8408613157365602, "grad_norm": 6.504766101437747, "learning_rate": 6.496506799290447e-07, "loss": 17.4006, "step": 46001 }, { "epoch": 0.8408795949330068, "grad_norm": 4.671205025410941, "learning_rate": 6.495047750518557e-07, "loss": 16.7645, "step": 46002 }, { "epoch": 0.8408978741294533, "grad_norm": 7.225027932661744, "learning_rate": 6.49358885422774e-07, "loss": 17.6903, "step": 46003 }, { "epoch": 0.8409161533258998, "grad_norm": 6.755624826840082, "learning_rate": 6.492130110423123e-07, "loss": 17.5358, "step": 46004 }, { "epoch": 0.8409344325223463, "grad_norm": 6.4135867739631145, "learning_rate": 6.49067151910982e-07, "loss": 17.2087, "step": 46005 }, { "epoch": 0.8409527117187928, "grad_norm": 6.078278456478421, "learning_rate": 6.489213080292928e-07, "loss": 17.2294, "step": 46006 }, { "epoch": 0.8409709909152394, "grad_norm": 8.69715168232273, "learning_rate": 6.487754793977563e-07, "loss": 18.3975, "step": 46007 }, { "epoch": 0.8409892701116859, "grad_norm": 6.036866570146784, "learning_rate": 6.486296660168856e-07, "loss": 17.101, "step": 46008 }, { "epoch": 0.8410075493081324, "grad_norm": 6.43643861917837, "learning_rate": 6.484838678871897e-07, "loss": 17.654, "step": 46009 }, { "epoch": 0.841025828504579, "grad_norm": 5.437250539939162, "learning_rate": 6.483380850091809e-07, "loss": 17.1348, "step": 46010 }, { "epoch": 0.8410441077010254, "grad_norm": 6.047622843100339, "learning_rate": 6.481923173833693e-07, "loss": 17.2434, "step": 46011 }, { "epoch": 0.841062386897472, "grad_norm": 5.859822783789786, "learning_rate": 6.480465650102652e-07, "loss": 16.9972, "step": 46012 }, { "epoch": 0.8410806660939185, "grad_norm": 6.05129432838725, "learning_rate": 6.479008278903809e-07, "loss": 17.3589, "step": 46013 }, { "epoch": 0.841098945290365, "grad_norm": 5.152998761929196, "learning_rate": 6.477551060242271e-07, "loss": 16.7079, "step": 46014 }, { "epoch": 0.8411172244868116, "grad_norm": 6.495586228068481, "learning_rate": 6.476093994123123e-07, "loss": 17.3062, "step": 46015 }, { "epoch": 0.8411355036832581, "grad_norm": 6.775137941242456, "learning_rate": 6.474637080551494e-07, "loss": 17.4795, "step": 46016 }, { "epoch": 0.8411537828797047, "grad_norm": 5.623615229651762, "learning_rate": 6.473180319532479e-07, "loss": 17.1245, "step": 46017 }, { "epoch": 0.8411720620761511, "grad_norm": 6.020870195897604, "learning_rate": 6.471723711071209e-07, "loss": 17.2486, "step": 46018 }, { "epoch": 0.8411903412725976, "grad_norm": 7.0791950953328575, "learning_rate": 6.470267255172763e-07, "loss": 17.3536, "step": 46019 }, { "epoch": 0.8412086204690442, "grad_norm": 6.251474314748463, "learning_rate": 6.468810951842247e-07, "loss": 17.2509, "step": 46020 }, { "epoch": 0.8412268996654907, "grad_norm": 7.36334679910485, "learning_rate": 6.46735480108478e-07, "loss": 17.6854, "step": 46021 }, { "epoch": 0.8412451788619373, "grad_norm": 6.542522161213495, "learning_rate": 6.465898802905457e-07, "loss": 17.14, "step": 46022 }, { "epoch": 0.8412634580583838, "grad_norm": 6.2803800974532376, "learning_rate": 6.464442957309369e-07, "loss": 17.2423, "step": 46023 }, { "epoch": 0.8412817372548302, "grad_norm": 6.250795831061435, "learning_rate": 6.462987264301645e-07, "loss": 17.3092, "step": 46024 }, { "epoch": 0.8413000164512768, "grad_norm": 7.45365127203219, "learning_rate": 6.461531723887359e-07, "loss": 18.0707, "step": 46025 }, { "epoch": 0.8413182956477233, "grad_norm": 5.654755286978041, "learning_rate": 6.460076336071624e-07, "loss": 17.1898, "step": 46026 }, { "epoch": 0.8413365748441699, "grad_norm": 5.797680379558349, "learning_rate": 6.458621100859558e-07, "loss": 17.2854, "step": 46027 }, { "epoch": 0.8413548540406164, "grad_norm": 7.979603836941512, "learning_rate": 6.457166018256245e-07, "loss": 18.3275, "step": 46028 }, { "epoch": 0.8413731332370629, "grad_norm": 6.724435921240554, "learning_rate": 6.455711088266775e-07, "loss": 17.2614, "step": 46029 }, { "epoch": 0.8413914124335095, "grad_norm": 5.7795022874891995, "learning_rate": 6.454256310896273e-07, "loss": 17.2691, "step": 46030 }, { "epoch": 0.8414096916299559, "grad_norm": 5.5495982833056114, "learning_rate": 6.452801686149812e-07, "loss": 16.9901, "step": 46031 }, { "epoch": 0.8414279708264024, "grad_norm": 7.223539791353772, "learning_rate": 6.45134721403251e-07, "loss": 17.636, "step": 46032 }, { "epoch": 0.841446250022849, "grad_norm": 6.360579654531656, "learning_rate": 6.449892894549459e-07, "loss": 17.384, "step": 46033 }, { "epoch": 0.8414645292192955, "grad_norm": 6.445097387381629, "learning_rate": 6.448438727705747e-07, "loss": 17.1252, "step": 46034 }, { "epoch": 0.8414828084157421, "grad_norm": 6.908615547460806, "learning_rate": 6.446984713506471e-07, "loss": 17.5248, "step": 46035 }, { "epoch": 0.8415010876121886, "grad_norm": 4.685907658531576, "learning_rate": 6.44553085195675e-07, "loss": 16.6922, "step": 46036 }, { "epoch": 0.841519366808635, "grad_norm": 5.6173658752646976, "learning_rate": 6.44407714306165e-07, "loss": 17.2574, "step": 46037 }, { "epoch": 0.8415376460050816, "grad_norm": 6.792581095948525, "learning_rate": 6.442623586826296e-07, "loss": 17.6095, "step": 46038 }, { "epoch": 0.8415559252015281, "grad_norm": 6.18526068568259, "learning_rate": 6.441170183255763e-07, "loss": 17.0625, "step": 46039 }, { "epoch": 0.8415742043979747, "grad_norm": 6.987500572928363, "learning_rate": 6.439716932355139e-07, "loss": 17.6393, "step": 46040 }, { "epoch": 0.8415924835944212, "grad_norm": 7.908484325434119, "learning_rate": 6.438263834129532e-07, "loss": 18.1052, "step": 46041 }, { "epoch": 0.8416107627908677, "grad_norm": 6.973400245124961, "learning_rate": 6.436810888584028e-07, "loss": 17.5837, "step": 46042 }, { "epoch": 0.8416290419873143, "grad_norm": 6.016047816767892, "learning_rate": 6.435358095723726e-07, "loss": 17.2147, "step": 46043 }, { "epoch": 0.8416473211837607, "grad_norm": 6.044023999900806, "learning_rate": 6.433905455553708e-07, "loss": 17.1414, "step": 46044 }, { "epoch": 0.8416656003802073, "grad_norm": 5.924154897271701, "learning_rate": 6.432452968079078e-07, "loss": 17.0507, "step": 46045 }, { "epoch": 0.8416838795766538, "grad_norm": 5.671939831839104, "learning_rate": 6.431000633304912e-07, "loss": 17.2513, "step": 46046 }, { "epoch": 0.8417021587731003, "grad_norm": 5.6031414104996555, "learning_rate": 6.429548451236317e-07, "loss": 17.1232, "step": 46047 }, { "epoch": 0.8417204379695469, "grad_norm": 5.99858896105723, "learning_rate": 6.428096421878361e-07, "loss": 17.2972, "step": 46048 }, { "epoch": 0.8417387171659934, "grad_norm": 5.724864049554612, "learning_rate": 6.42664454523616e-07, "loss": 17.1911, "step": 46049 }, { "epoch": 0.84175699636244, "grad_norm": 5.75558666064834, "learning_rate": 6.425192821314785e-07, "loss": 17.118, "step": 46050 }, { "epoch": 0.8417752755588864, "grad_norm": 9.736483073451076, "learning_rate": 6.42374125011932e-07, "loss": 17.666, "step": 46051 }, { "epoch": 0.8417935547553329, "grad_norm": 6.07546345750289, "learning_rate": 6.42228983165486e-07, "loss": 17.2755, "step": 46052 }, { "epoch": 0.8418118339517795, "grad_norm": 6.0561258269587395, "learning_rate": 6.420838565926507e-07, "loss": 16.9124, "step": 46053 }, { "epoch": 0.841830113148226, "grad_norm": 5.36499730525132, "learning_rate": 6.419387452939319e-07, "loss": 17.0113, "step": 46054 }, { "epoch": 0.8418483923446726, "grad_norm": 4.56400711176909, "learning_rate": 6.417936492698406e-07, "loss": 16.7262, "step": 46055 }, { "epoch": 0.841866671541119, "grad_norm": 5.148171906717537, "learning_rate": 6.416485685208845e-07, "loss": 17.1676, "step": 46056 }, { "epoch": 0.8418849507375655, "grad_norm": 6.357585311279041, "learning_rate": 6.415035030475708e-07, "loss": 17.2625, "step": 46057 }, { "epoch": 0.8419032299340121, "grad_norm": 5.974761906795675, "learning_rate": 6.413584528504102e-07, "loss": 17.4283, "step": 46058 }, { "epoch": 0.8419215091304586, "grad_norm": 6.326919577810463, "learning_rate": 6.412134179299101e-07, "loss": 17.3256, "step": 46059 }, { "epoch": 0.8419397883269052, "grad_norm": 6.374960838453082, "learning_rate": 6.410683982865773e-07, "loss": 17.1592, "step": 46060 }, { "epoch": 0.8419580675233517, "grad_norm": 7.431167757002881, "learning_rate": 6.40923393920922e-07, "loss": 17.5344, "step": 46061 }, { "epoch": 0.8419763467197982, "grad_norm": 6.066645450814572, "learning_rate": 6.40778404833452e-07, "loss": 17.5208, "step": 46062 }, { "epoch": 0.8419946259162447, "grad_norm": 7.698769968755253, "learning_rate": 6.406334310246759e-07, "loss": 17.8139, "step": 46063 }, { "epoch": 0.8420129051126912, "grad_norm": 7.189780862995328, "learning_rate": 6.404884724951016e-07, "loss": 17.6677, "step": 46064 }, { "epoch": 0.8420311843091378, "grad_norm": 8.416295837305531, "learning_rate": 6.403435292452359e-07, "loss": 17.9413, "step": 46065 }, { "epoch": 0.8420494635055843, "grad_norm": 6.376175274776856, "learning_rate": 6.40198601275589e-07, "loss": 17.1678, "step": 46066 }, { "epoch": 0.8420677427020308, "grad_norm": 7.998640791008017, "learning_rate": 6.400536885866676e-07, "loss": 17.9024, "step": 46067 }, { "epoch": 0.8420860218984774, "grad_norm": 7.581633892198435, "learning_rate": 6.399087911789786e-07, "loss": 17.4424, "step": 46068 }, { "epoch": 0.8421043010949238, "grad_norm": 7.624278338561091, "learning_rate": 6.397639090530323e-07, "loss": 17.4417, "step": 46069 }, { "epoch": 0.8421225802913704, "grad_norm": 4.431148753546614, "learning_rate": 6.396190422093334e-07, "loss": 16.7623, "step": 46070 }, { "epoch": 0.8421408594878169, "grad_norm": 6.933007612130489, "learning_rate": 6.39474190648392e-07, "loss": 17.6772, "step": 46071 }, { "epoch": 0.8421591386842634, "grad_norm": 6.639727835313199, "learning_rate": 6.393293543707158e-07, "loss": 17.4353, "step": 46072 }, { "epoch": 0.84217741788071, "grad_norm": 7.825427366918067, "learning_rate": 6.391845333768121e-07, "loss": 17.3226, "step": 46073 }, { "epoch": 0.8421956970771565, "grad_norm": 5.768588835399484, "learning_rate": 6.390397276671867e-07, "loss": 17.018, "step": 46074 }, { "epoch": 0.8422139762736031, "grad_norm": 5.623510933616133, "learning_rate": 6.388949372423503e-07, "loss": 17.3169, "step": 46075 }, { "epoch": 0.8422322554700495, "grad_norm": 5.4659653126384224, "learning_rate": 6.387501621028086e-07, "loss": 16.9975, "step": 46076 }, { "epoch": 0.842250534666496, "grad_norm": 5.712027280574739, "learning_rate": 6.386054022490678e-07, "loss": 17.0654, "step": 46077 }, { "epoch": 0.8422688138629426, "grad_norm": 10.141913776972203, "learning_rate": 6.384606576816377e-07, "loss": 18.2188, "step": 46078 }, { "epoch": 0.8422870930593891, "grad_norm": 5.700546210288327, "learning_rate": 6.383159284010232e-07, "loss": 17.3201, "step": 46079 }, { "epoch": 0.8423053722558357, "grad_norm": 6.9836778108353945, "learning_rate": 6.381712144077335e-07, "loss": 17.4987, "step": 46080 }, { "epoch": 0.8423236514522822, "grad_norm": 9.069502808204602, "learning_rate": 6.380265157022758e-07, "loss": 17.9221, "step": 46081 }, { "epoch": 0.8423419306487286, "grad_norm": 6.1851603112477385, "learning_rate": 6.37881832285156e-07, "loss": 17.142, "step": 46082 }, { "epoch": 0.8423602098451752, "grad_norm": 5.450880202089164, "learning_rate": 6.377371641568824e-07, "loss": 17.0089, "step": 46083 }, { "epoch": 0.8423784890416217, "grad_norm": 7.405240285885902, "learning_rate": 6.375925113179615e-07, "loss": 17.7146, "step": 46084 }, { "epoch": 0.8423967682380683, "grad_norm": 5.717928936379854, "learning_rate": 6.374478737688999e-07, "loss": 17.1631, "step": 46085 }, { "epoch": 0.8424150474345148, "grad_norm": 6.18064080096749, "learning_rate": 6.373032515102057e-07, "loss": 17.2322, "step": 46086 }, { "epoch": 0.8424333266309613, "grad_norm": 5.761965671860631, "learning_rate": 6.371586445423844e-07, "loss": 17.2952, "step": 46087 }, { "epoch": 0.8424516058274079, "grad_norm": 5.867532652461305, "learning_rate": 6.37014052865943e-07, "loss": 17.391, "step": 46088 }, { "epoch": 0.8424698850238543, "grad_norm": 5.266607202147457, "learning_rate": 6.368694764813887e-07, "loss": 17.0294, "step": 46089 }, { "epoch": 0.8424881642203009, "grad_norm": 6.126828173291664, "learning_rate": 6.36724915389229e-07, "loss": 17.1871, "step": 46090 }, { "epoch": 0.8425064434167474, "grad_norm": 5.378592819509256, "learning_rate": 6.365803695899692e-07, "loss": 17.0291, "step": 46091 }, { "epoch": 0.8425247226131939, "grad_norm": 6.116380674735897, "learning_rate": 6.364358390841174e-07, "loss": 17.3521, "step": 46092 }, { "epoch": 0.8425430018096405, "grad_norm": 6.403823802280335, "learning_rate": 6.362913238721785e-07, "loss": 17.4793, "step": 46093 }, { "epoch": 0.842561281006087, "grad_norm": 5.404727817861879, "learning_rate": 6.361468239546608e-07, "loss": 17.0457, "step": 46094 }, { "epoch": 0.8425795602025336, "grad_norm": 6.587614993459766, "learning_rate": 6.360023393320697e-07, "loss": 17.4122, "step": 46095 }, { "epoch": 0.84259783939898, "grad_norm": 6.453502252105697, "learning_rate": 6.358578700049107e-07, "loss": 17.6329, "step": 46096 }, { "epoch": 0.8426161185954265, "grad_norm": 5.433082572919445, "learning_rate": 6.357134159736922e-07, "loss": 17.114, "step": 46097 }, { "epoch": 0.8426343977918731, "grad_norm": 7.161361959038653, "learning_rate": 6.355689772389185e-07, "loss": 17.5192, "step": 46098 }, { "epoch": 0.8426526769883196, "grad_norm": 7.033742785178503, "learning_rate": 6.354245538010967e-07, "loss": 17.3339, "step": 46099 }, { "epoch": 0.8426709561847661, "grad_norm": 5.708913436273312, "learning_rate": 6.352801456607344e-07, "loss": 16.8877, "step": 46100 }, { "epoch": 0.8426892353812127, "grad_norm": 5.12295436169752, "learning_rate": 6.351357528183361e-07, "loss": 17.2169, "step": 46101 }, { "epoch": 0.8427075145776591, "grad_norm": 4.8319557001494715, "learning_rate": 6.349913752744074e-07, "loss": 16.7233, "step": 46102 }, { "epoch": 0.8427257937741057, "grad_norm": 5.147464616136142, "learning_rate": 6.348470130294559e-07, "loss": 16.8426, "step": 46103 }, { "epoch": 0.8427440729705522, "grad_norm": 5.638684538772498, "learning_rate": 6.347026660839872e-07, "loss": 17.1765, "step": 46104 }, { "epoch": 0.8427623521669987, "grad_norm": 6.14290613932862, "learning_rate": 6.345583344385053e-07, "loss": 16.9208, "step": 46105 }, { "epoch": 0.8427806313634453, "grad_norm": 5.57131042239043, "learning_rate": 6.344140180935194e-07, "loss": 17.059, "step": 46106 }, { "epoch": 0.8427989105598918, "grad_norm": 7.232048220232974, "learning_rate": 6.342697170495321e-07, "loss": 18.0948, "step": 46107 }, { "epoch": 0.8428171897563383, "grad_norm": 5.397285257654072, "learning_rate": 6.341254313070505e-07, "loss": 17.2539, "step": 46108 }, { "epoch": 0.8428354689527848, "grad_norm": 7.156606806638524, "learning_rate": 6.339811608665813e-07, "loss": 17.3843, "step": 46109 }, { "epoch": 0.8428537481492313, "grad_norm": 5.984677908560996, "learning_rate": 6.338369057286287e-07, "loss": 17.2544, "step": 46110 }, { "epoch": 0.8428720273456779, "grad_norm": 6.302899628979128, "learning_rate": 6.336926658936998e-07, "loss": 17.9145, "step": 46111 }, { "epoch": 0.8428903065421244, "grad_norm": 5.434769941350568, "learning_rate": 6.335484413622989e-07, "loss": 17.0407, "step": 46112 }, { "epoch": 0.842908585738571, "grad_norm": 6.100003985553447, "learning_rate": 6.334042321349309e-07, "loss": 17.0759, "step": 46113 }, { "epoch": 0.8429268649350175, "grad_norm": 7.7921764455469935, "learning_rate": 6.332600382121034e-07, "loss": 17.5831, "step": 46114 }, { "epoch": 0.8429451441314639, "grad_norm": 7.627853392464421, "learning_rate": 6.331158595943194e-07, "loss": 17.5473, "step": 46115 }, { "epoch": 0.8429634233279105, "grad_norm": 6.798277786680915, "learning_rate": 6.329716962820865e-07, "loss": 17.4608, "step": 46116 }, { "epoch": 0.842981702524357, "grad_norm": 6.577569160228844, "learning_rate": 6.328275482759077e-07, "loss": 17.4209, "step": 46117 }, { "epoch": 0.8429999817208036, "grad_norm": 5.73046219267721, "learning_rate": 6.326834155762901e-07, "loss": 17.0959, "step": 46118 }, { "epoch": 0.8430182609172501, "grad_norm": 5.580752767353826, "learning_rate": 6.325392981837375e-07, "loss": 17.3096, "step": 46119 }, { "epoch": 0.8430365401136966, "grad_norm": 5.859040199356692, "learning_rate": 6.323951960987568e-07, "loss": 16.8726, "step": 46120 }, { "epoch": 0.8430548193101431, "grad_norm": 6.158867009828318, "learning_rate": 6.322511093218514e-07, "loss": 17.0147, "step": 46121 }, { "epoch": 0.8430730985065896, "grad_norm": 8.70438845278514, "learning_rate": 6.321070378535266e-07, "loss": 18.2677, "step": 46122 }, { "epoch": 0.8430913777030362, "grad_norm": 8.719000541081042, "learning_rate": 6.319629816942879e-07, "loss": 17.9546, "step": 46123 }, { "epoch": 0.8431096568994827, "grad_norm": 7.956544974203615, "learning_rate": 6.31818940844639e-07, "loss": 18.0868, "step": 46124 }, { "epoch": 0.8431279360959292, "grad_norm": 5.978625122305905, "learning_rate": 6.316749153050861e-07, "loss": 17.3073, "step": 46125 }, { "epoch": 0.8431462152923758, "grad_norm": 5.783784514535276, "learning_rate": 6.315309050761343e-07, "loss": 17.0584, "step": 46126 }, { "epoch": 0.8431644944888222, "grad_norm": 5.68974321410848, "learning_rate": 6.313869101582865e-07, "loss": 17.3295, "step": 46127 }, { "epoch": 0.8431827736852688, "grad_norm": 6.272036722380221, "learning_rate": 6.312429305520496e-07, "loss": 17.4984, "step": 46128 }, { "epoch": 0.8432010528817153, "grad_norm": 6.013589864254006, "learning_rate": 6.31098966257927e-07, "loss": 17.2374, "step": 46129 }, { "epoch": 0.8432193320781618, "grad_norm": 7.332062469055498, "learning_rate": 6.309550172764228e-07, "loss": 17.6711, "step": 46130 }, { "epoch": 0.8432376112746084, "grad_norm": 6.308897151288401, "learning_rate": 6.308110836080427e-07, "loss": 17.2526, "step": 46131 }, { "epoch": 0.8432558904710549, "grad_norm": 6.130194253064733, "learning_rate": 6.306671652532909e-07, "loss": 17.3274, "step": 46132 }, { "epoch": 0.8432741696675015, "grad_norm": 5.566883477629925, "learning_rate": 6.305232622126705e-07, "loss": 17.1539, "step": 46133 }, { "epoch": 0.8432924488639479, "grad_norm": 7.955904018643479, "learning_rate": 6.303793744866865e-07, "loss": 17.9717, "step": 46134 }, { "epoch": 0.8433107280603944, "grad_norm": 5.6267692829978815, "learning_rate": 6.30235502075845e-07, "loss": 17.1133, "step": 46135 }, { "epoch": 0.843329007256841, "grad_norm": 6.1383506891608555, "learning_rate": 6.300916449806477e-07, "loss": 17.3649, "step": 46136 }, { "epoch": 0.8433472864532875, "grad_norm": 6.575397564760067, "learning_rate": 6.29947803201601e-07, "loss": 17.819, "step": 46137 }, { "epoch": 0.8433655656497341, "grad_norm": 5.609258140001392, "learning_rate": 6.298039767392083e-07, "loss": 17.1374, "step": 46138 }, { "epoch": 0.8433838448461806, "grad_norm": 6.376159760958797, "learning_rate": 6.296601655939721e-07, "loss": 17.3735, "step": 46139 }, { "epoch": 0.843402124042627, "grad_norm": 6.6068521380910745, "learning_rate": 6.295163697663992e-07, "loss": 17.5186, "step": 46140 }, { "epoch": 0.8434204032390736, "grad_norm": 6.317898182087654, "learning_rate": 6.293725892569907e-07, "loss": 17.0394, "step": 46141 }, { "epoch": 0.8434386824355201, "grad_norm": 6.423273898095944, "learning_rate": 6.292288240662531e-07, "loss": 17.4334, "step": 46142 }, { "epoch": 0.8434569616319667, "grad_norm": 7.492676421260496, "learning_rate": 6.290850741946885e-07, "loss": 17.9699, "step": 46143 }, { "epoch": 0.8434752408284132, "grad_norm": 5.4748447537114275, "learning_rate": 6.289413396428013e-07, "loss": 17.0464, "step": 46144 }, { "epoch": 0.8434935200248597, "grad_norm": 6.519749126441515, "learning_rate": 6.287976204110963e-07, "loss": 17.5441, "step": 46145 }, { "epoch": 0.8435117992213063, "grad_norm": 6.264210347970919, "learning_rate": 6.286539165000765e-07, "loss": 17.4751, "step": 46146 }, { "epoch": 0.8435300784177527, "grad_norm": 5.566249722109238, "learning_rate": 6.28510227910244e-07, "loss": 17.0827, "step": 46147 }, { "epoch": 0.8435483576141993, "grad_norm": 5.795767818812697, "learning_rate": 6.283665546421053e-07, "loss": 17.1365, "step": 46148 }, { "epoch": 0.8435666368106458, "grad_norm": 5.640292376997583, "learning_rate": 6.282228966961623e-07, "loss": 17.1498, "step": 46149 }, { "epoch": 0.8435849160070923, "grad_norm": 8.02455656115299, "learning_rate": 6.280792540729175e-07, "loss": 17.5462, "step": 46150 }, { "epoch": 0.8436031952035389, "grad_norm": 6.94096578401807, "learning_rate": 6.279356267728765e-07, "loss": 17.4765, "step": 46151 }, { "epoch": 0.8436214743999854, "grad_norm": 5.762179513447485, "learning_rate": 6.277920147965411e-07, "loss": 17.2682, "step": 46152 }, { "epoch": 0.843639753596432, "grad_norm": 5.968405737393675, "learning_rate": 6.276484181444148e-07, "loss": 17.4097, "step": 46153 }, { "epoch": 0.8436580327928784, "grad_norm": 5.5920765560079095, "learning_rate": 6.275048368170028e-07, "loss": 16.9518, "step": 46154 }, { "epoch": 0.8436763119893249, "grad_norm": 6.672311035968386, "learning_rate": 6.273612708148058e-07, "loss": 17.1916, "step": 46155 }, { "epoch": 0.8436945911857715, "grad_norm": 6.8035975249203595, "learning_rate": 6.27217720138329e-07, "loss": 17.1264, "step": 46156 }, { "epoch": 0.843712870382218, "grad_norm": 6.471321356548542, "learning_rate": 6.270741847880745e-07, "loss": 17.5626, "step": 46157 }, { "epoch": 0.8437311495786646, "grad_norm": 5.9890033409520225, "learning_rate": 6.269306647645446e-07, "loss": 17.1138, "step": 46158 }, { "epoch": 0.843749428775111, "grad_norm": 5.9500063033615005, "learning_rate": 6.267871600682446e-07, "loss": 17.1706, "step": 46159 }, { "epoch": 0.8437677079715575, "grad_norm": 5.933629032158119, "learning_rate": 6.266436706996759e-07, "loss": 17.0484, "step": 46160 }, { "epoch": 0.8437859871680041, "grad_norm": 6.426480494694421, "learning_rate": 6.265001966593403e-07, "loss": 17.3585, "step": 46161 }, { "epoch": 0.8438042663644506, "grad_norm": 7.079066895467041, "learning_rate": 6.263567379477425e-07, "loss": 17.737, "step": 46162 }, { "epoch": 0.8438225455608972, "grad_norm": 5.975609164826784, "learning_rate": 6.262132945653859e-07, "loss": 17.3365, "step": 46163 }, { "epoch": 0.8438408247573437, "grad_norm": 6.633875298930035, "learning_rate": 6.260698665127706e-07, "loss": 17.2338, "step": 46164 }, { "epoch": 0.8438591039537902, "grad_norm": 6.036200141384278, "learning_rate": 6.25926453790402e-07, "loss": 17.2256, "step": 46165 }, { "epoch": 0.8438773831502367, "grad_norm": 7.2634553380545945, "learning_rate": 6.257830563987821e-07, "loss": 17.6238, "step": 46166 }, { "epoch": 0.8438956623466832, "grad_norm": 6.462989485302095, "learning_rate": 6.256396743384113e-07, "loss": 17.4799, "step": 46167 }, { "epoch": 0.8439139415431297, "grad_norm": 6.135360978076376, "learning_rate": 6.254963076097953e-07, "loss": 17.1446, "step": 46168 }, { "epoch": 0.8439322207395763, "grad_norm": 6.901681784000167, "learning_rate": 6.253529562134342e-07, "loss": 17.2767, "step": 46169 }, { "epoch": 0.8439504999360228, "grad_norm": 7.732560273294313, "learning_rate": 6.252096201498325e-07, "loss": 18.1581, "step": 46170 }, { "epoch": 0.8439687791324694, "grad_norm": 7.83080595615922, "learning_rate": 6.2506629941949e-07, "loss": 17.9924, "step": 46171 }, { "epoch": 0.8439870583289159, "grad_norm": 6.3612751501633085, "learning_rate": 6.249229940229107e-07, "loss": 17.5072, "step": 46172 }, { "epoch": 0.8440053375253623, "grad_norm": 6.431898925452813, "learning_rate": 6.247797039605974e-07, "loss": 17.4071, "step": 46173 }, { "epoch": 0.8440236167218089, "grad_norm": 6.964228381508161, "learning_rate": 6.24636429233052e-07, "loss": 17.5275, "step": 46174 }, { "epoch": 0.8440418959182554, "grad_norm": 6.440989814069687, "learning_rate": 6.244931698407747e-07, "loss": 17.6391, "step": 46175 }, { "epoch": 0.844060175114702, "grad_norm": 6.0647878813547385, "learning_rate": 6.243499257842706e-07, "loss": 17.0078, "step": 46176 }, { "epoch": 0.8440784543111485, "grad_norm": 6.8221501129911335, "learning_rate": 6.242066970640404e-07, "loss": 17.3912, "step": 46177 }, { "epoch": 0.844096733507595, "grad_norm": 6.085209588812232, "learning_rate": 6.240634836805848e-07, "loss": 17.4389, "step": 46178 }, { "epoch": 0.8441150127040415, "grad_norm": 4.7881466965668755, "learning_rate": 6.239202856344068e-07, "loss": 16.9125, "step": 46179 }, { "epoch": 0.844133291900488, "grad_norm": 5.780863515523245, "learning_rate": 6.237771029260098e-07, "loss": 17.0632, "step": 46180 }, { "epoch": 0.8441515710969346, "grad_norm": 6.45141816611886, "learning_rate": 6.236339355558929e-07, "loss": 17.4266, "step": 46181 }, { "epoch": 0.8441698502933811, "grad_norm": 5.612669134953132, "learning_rate": 6.234907835245607e-07, "loss": 16.9421, "step": 46182 }, { "epoch": 0.8441881294898276, "grad_norm": 6.3456967730873615, "learning_rate": 6.233476468325134e-07, "loss": 17.2283, "step": 46183 }, { "epoch": 0.8442064086862742, "grad_norm": 5.548497419009417, "learning_rate": 6.232045254802516e-07, "loss": 17.2136, "step": 46184 }, { "epoch": 0.8442246878827206, "grad_norm": 7.006738165878737, "learning_rate": 6.230614194682793e-07, "loss": 17.8195, "step": 46185 }, { "epoch": 0.8442429670791672, "grad_norm": 5.289097802073452, "learning_rate": 6.229183287970958e-07, "loss": 17.0025, "step": 46186 }, { "epoch": 0.8442612462756137, "grad_norm": 5.8143079117641365, "learning_rate": 6.22775253467205e-07, "loss": 17.3231, "step": 46187 }, { "epoch": 0.8442795254720602, "grad_norm": 6.250247798973741, "learning_rate": 6.226321934791063e-07, "loss": 16.9444, "step": 46188 }, { "epoch": 0.8442978046685068, "grad_norm": 6.918908611326479, "learning_rate": 6.224891488333013e-07, "loss": 17.5681, "step": 46189 }, { "epoch": 0.8443160838649533, "grad_norm": 7.526231648250083, "learning_rate": 6.223461195302932e-07, "loss": 17.9836, "step": 46190 }, { "epoch": 0.8443343630613999, "grad_norm": 6.370707239340985, "learning_rate": 6.222031055705823e-07, "loss": 17.2494, "step": 46191 }, { "epoch": 0.8443526422578463, "grad_norm": 5.939208034868343, "learning_rate": 6.220601069546683e-07, "loss": 17.5747, "step": 46192 }, { "epoch": 0.8443709214542928, "grad_norm": 5.419948410024577, "learning_rate": 6.219171236830546e-07, "loss": 17.1089, "step": 46193 }, { "epoch": 0.8443892006507394, "grad_norm": 7.080287426616819, "learning_rate": 6.217741557562418e-07, "loss": 17.6231, "step": 46194 }, { "epoch": 0.8444074798471859, "grad_norm": 6.498085921713254, "learning_rate": 6.216312031747296e-07, "loss": 17.7393, "step": 46195 }, { "epoch": 0.8444257590436325, "grad_norm": 5.448276075232004, "learning_rate": 6.214882659390215e-07, "loss": 16.9311, "step": 46196 }, { "epoch": 0.844444038240079, "grad_norm": 7.8570782132306745, "learning_rate": 6.213453440496153e-07, "loss": 17.7752, "step": 46197 }, { "epoch": 0.8444623174365254, "grad_norm": 6.242725760689146, "learning_rate": 6.212024375070142e-07, "loss": 17.4917, "step": 46198 }, { "epoch": 0.844480596632972, "grad_norm": 7.104403438079455, "learning_rate": 6.210595463117192e-07, "loss": 17.7319, "step": 46199 }, { "epoch": 0.8444988758294185, "grad_norm": 6.729283087195542, "learning_rate": 6.2091667046423e-07, "loss": 17.0483, "step": 46200 }, { "epoch": 0.8445171550258651, "grad_norm": 5.7901417326297455, "learning_rate": 6.207738099650485e-07, "loss": 17.098, "step": 46201 }, { "epoch": 0.8445354342223116, "grad_norm": 6.339228966700129, "learning_rate": 6.206309648146753e-07, "loss": 17.2285, "step": 46202 }, { "epoch": 0.8445537134187581, "grad_norm": 7.618526821885173, "learning_rate": 6.204881350136088e-07, "loss": 17.6954, "step": 46203 }, { "epoch": 0.8445719926152047, "grad_norm": 6.677115260064081, "learning_rate": 6.203453205623528e-07, "loss": 17.3652, "step": 46204 }, { "epoch": 0.8445902718116511, "grad_norm": 7.1633755014075415, "learning_rate": 6.20202521461406e-07, "loss": 17.8171, "step": 46205 }, { "epoch": 0.8446085510080977, "grad_norm": 6.447679294926219, "learning_rate": 6.200597377112688e-07, "loss": 17.5203, "step": 46206 }, { "epoch": 0.8446268302045442, "grad_norm": 7.808753237547805, "learning_rate": 6.199169693124418e-07, "loss": 18.4141, "step": 46207 }, { "epoch": 0.8446451094009907, "grad_norm": 5.98785412082984, "learning_rate": 6.197742162654269e-07, "loss": 17.5412, "step": 46208 }, { "epoch": 0.8446633885974373, "grad_norm": 5.457729876867781, "learning_rate": 6.196314785707224e-07, "loss": 16.8906, "step": 46209 }, { "epoch": 0.8446816677938838, "grad_norm": 6.136852507542158, "learning_rate": 6.194887562288304e-07, "loss": 17.4102, "step": 46210 }, { "epoch": 0.8446999469903304, "grad_norm": 10.161740738755771, "learning_rate": 6.193460492402498e-07, "loss": 18.1662, "step": 46211 }, { "epoch": 0.8447182261867768, "grad_norm": 5.408367012532549, "learning_rate": 6.192033576054807e-07, "loss": 17.127, "step": 46212 }, { "epoch": 0.8447365053832233, "grad_norm": 6.611681190367565, "learning_rate": 6.190606813250244e-07, "loss": 17.4428, "step": 46213 }, { "epoch": 0.8447547845796699, "grad_norm": 9.066095216134698, "learning_rate": 6.189180203993805e-07, "loss": 17.5613, "step": 46214 }, { "epoch": 0.8447730637761164, "grad_norm": 8.007367546495383, "learning_rate": 6.187753748290476e-07, "loss": 17.4897, "step": 46215 }, { "epoch": 0.844791342972563, "grad_norm": 6.252543966326447, "learning_rate": 6.186327446145263e-07, "loss": 17.3297, "step": 46216 }, { "epoch": 0.8448096221690095, "grad_norm": 6.635525742708184, "learning_rate": 6.184901297563178e-07, "loss": 17.5144, "step": 46217 }, { "epoch": 0.8448279013654559, "grad_norm": 6.273442756325001, "learning_rate": 6.183475302549219e-07, "loss": 17.1405, "step": 46218 }, { "epoch": 0.8448461805619025, "grad_norm": 6.625040556986819, "learning_rate": 6.182049461108374e-07, "loss": 17.6966, "step": 46219 }, { "epoch": 0.844864459758349, "grad_norm": 7.133953272686304, "learning_rate": 6.180623773245636e-07, "loss": 17.6655, "step": 46220 }, { "epoch": 0.8448827389547956, "grad_norm": 5.9857809995138584, "learning_rate": 6.179198238966022e-07, "loss": 17.2456, "step": 46221 }, { "epoch": 0.8449010181512421, "grad_norm": 7.786207693197191, "learning_rate": 6.177772858274511e-07, "loss": 18.0738, "step": 46222 }, { "epoch": 0.8449192973476886, "grad_norm": 5.5341851457833044, "learning_rate": 6.176347631176099e-07, "loss": 17.1476, "step": 46223 }, { "epoch": 0.8449375765441351, "grad_norm": 5.8750752256249745, "learning_rate": 6.174922557675789e-07, "loss": 17.208, "step": 46224 }, { "epoch": 0.8449558557405816, "grad_norm": 7.427423372912281, "learning_rate": 6.173497637778569e-07, "loss": 17.6047, "step": 46225 }, { "epoch": 0.8449741349370282, "grad_norm": 6.518337075882625, "learning_rate": 6.172072871489432e-07, "loss": 17.4232, "step": 46226 }, { "epoch": 0.8449924141334747, "grad_norm": 6.16976859213342, "learning_rate": 6.170648258813389e-07, "loss": 17.5659, "step": 46227 }, { "epoch": 0.8450106933299212, "grad_norm": 5.653792018866178, "learning_rate": 6.16922379975542e-07, "loss": 17.0137, "step": 46228 }, { "epoch": 0.8450289725263678, "grad_norm": 6.291391620277504, "learning_rate": 6.167799494320503e-07, "loss": 17.3864, "step": 46229 }, { "epoch": 0.8450472517228143, "grad_norm": 6.323553047041541, "learning_rate": 6.166375342513659e-07, "loss": 17.3276, "step": 46230 }, { "epoch": 0.8450655309192608, "grad_norm": 6.539109635491313, "learning_rate": 6.164951344339859e-07, "loss": 17.658, "step": 46231 }, { "epoch": 0.8450838101157073, "grad_norm": 7.75480727734558, "learning_rate": 6.163527499804106e-07, "loss": 18.0378, "step": 46232 }, { "epoch": 0.8451020893121538, "grad_norm": 7.3466409879311225, "learning_rate": 6.16210380891139e-07, "loss": 17.8401, "step": 46233 }, { "epoch": 0.8451203685086004, "grad_norm": 6.523308659668005, "learning_rate": 6.16068027166668e-07, "loss": 17.1904, "step": 46234 }, { "epoch": 0.8451386477050469, "grad_norm": 5.705317818738901, "learning_rate": 6.159256888074982e-07, "loss": 17.0811, "step": 46235 }, { "epoch": 0.8451569269014934, "grad_norm": 7.33269422572029, "learning_rate": 6.157833658141298e-07, "loss": 17.531, "step": 46236 }, { "epoch": 0.84517520609794, "grad_norm": 7.24149860215536, "learning_rate": 6.156410581870592e-07, "loss": 17.6227, "step": 46237 }, { "epoch": 0.8451934852943864, "grad_norm": 5.147836568615003, "learning_rate": 6.154987659267875e-07, "loss": 17.0994, "step": 46238 }, { "epoch": 0.845211764490833, "grad_norm": 6.7214484891710375, "learning_rate": 6.153564890338115e-07, "loss": 17.6704, "step": 46239 }, { "epoch": 0.8452300436872795, "grad_norm": 5.626715027138469, "learning_rate": 6.152142275086298e-07, "loss": 17.3689, "step": 46240 }, { "epoch": 0.845248322883726, "grad_norm": 5.876224134902151, "learning_rate": 6.150719813517431e-07, "loss": 17.2069, "step": 46241 }, { "epoch": 0.8452666020801726, "grad_norm": 6.870924160591784, "learning_rate": 6.14929750563647e-07, "loss": 17.5334, "step": 46242 }, { "epoch": 0.845284881276619, "grad_norm": 6.426125935214659, "learning_rate": 6.147875351448429e-07, "loss": 17.175, "step": 46243 }, { "epoch": 0.8453031604730656, "grad_norm": 6.482489535736359, "learning_rate": 6.14645335095827e-07, "loss": 17.3099, "step": 46244 }, { "epoch": 0.8453214396695121, "grad_norm": 7.563924025033729, "learning_rate": 6.145031504170996e-07, "loss": 17.4877, "step": 46245 }, { "epoch": 0.8453397188659586, "grad_norm": 7.846278538734912, "learning_rate": 6.143609811091573e-07, "loss": 18.0483, "step": 46246 }, { "epoch": 0.8453579980624052, "grad_norm": 6.068115964032393, "learning_rate": 6.142188271725003e-07, "loss": 17.3501, "step": 46247 }, { "epoch": 0.8453762772588517, "grad_norm": 5.033467066462684, "learning_rate": 6.140766886076239e-07, "loss": 16.9738, "step": 46248 }, { "epoch": 0.8453945564552983, "grad_norm": 6.638846818080287, "learning_rate": 6.139345654150297e-07, "loss": 17.487, "step": 46249 }, { "epoch": 0.8454128356517447, "grad_norm": 7.288952707450226, "learning_rate": 6.137924575952142e-07, "loss": 17.3984, "step": 46250 }, { "epoch": 0.8454311148481912, "grad_norm": 6.61305117015478, "learning_rate": 6.136503651486747e-07, "loss": 17.6668, "step": 46251 }, { "epoch": 0.8454493940446378, "grad_norm": 6.15947715941376, "learning_rate": 6.135082880759096e-07, "loss": 17.6046, "step": 46252 }, { "epoch": 0.8454676732410843, "grad_norm": 6.319874577811748, "learning_rate": 6.133662263774187e-07, "loss": 17.398, "step": 46253 }, { "epoch": 0.8454859524375309, "grad_norm": 6.073751534071657, "learning_rate": 6.132241800536975e-07, "loss": 17.0705, "step": 46254 }, { "epoch": 0.8455042316339774, "grad_norm": 5.341313849401389, "learning_rate": 6.130821491052458e-07, "loss": 17.0891, "step": 46255 }, { "epoch": 0.8455225108304238, "grad_norm": 6.748790675817591, "learning_rate": 6.129401335325602e-07, "loss": 17.6083, "step": 46256 }, { "epoch": 0.8455407900268704, "grad_norm": 7.958148276103493, "learning_rate": 6.127981333361382e-07, "loss": 17.7678, "step": 46257 }, { "epoch": 0.8455590692233169, "grad_norm": 5.820140449893818, "learning_rate": 6.126561485164789e-07, "loss": 17.2217, "step": 46258 }, { "epoch": 0.8455773484197635, "grad_norm": 6.564794376546401, "learning_rate": 6.125141790740791e-07, "loss": 17.33, "step": 46259 }, { "epoch": 0.84559562761621, "grad_norm": 6.53542153963473, "learning_rate": 6.12372225009435e-07, "loss": 17.4342, "step": 46260 }, { "epoch": 0.8456139068126565, "grad_norm": 5.970567369733798, "learning_rate": 6.122302863230456e-07, "loss": 17.1412, "step": 46261 }, { "epoch": 0.8456321860091031, "grad_norm": 8.869470179830484, "learning_rate": 6.12088363015409e-07, "loss": 18.4548, "step": 46262 }, { "epoch": 0.8456504652055495, "grad_norm": 5.979980429353093, "learning_rate": 6.119464550870224e-07, "loss": 17.2369, "step": 46263 }, { "epoch": 0.8456687444019961, "grad_norm": 6.466584734894736, "learning_rate": 6.118045625383822e-07, "loss": 17.3174, "step": 46264 }, { "epoch": 0.8456870235984426, "grad_norm": 5.1709441421812565, "learning_rate": 6.116626853699859e-07, "loss": 17.1594, "step": 46265 }, { "epoch": 0.8457053027948891, "grad_norm": 5.393498112906961, "learning_rate": 6.115208235823322e-07, "loss": 16.9844, "step": 46266 }, { "epoch": 0.8457235819913357, "grad_norm": 7.775199383313628, "learning_rate": 6.113789771759165e-07, "loss": 17.9413, "step": 46267 }, { "epoch": 0.8457418611877822, "grad_norm": 4.742422529552055, "learning_rate": 6.112371461512362e-07, "loss": 16.894, "step": 46268 }, { "epoch": 0.8457601403842288, "grad_norm": 6.349688688155014, "learning_rate": 6.110953305087897e-07, "loss": 17.0634, "step": 46269 }, { "epoch": 0.8457784195806752, "grad_norm": 6.489555941247276, "learning_rate": 6.109535302490726e-07, "loss": 17.5034, "step": 46270 }, { "epoch": 0.8457966987771217, "grad_norm": 5.647171991253879, "learning_rate": 6.10811745372582e-07, "loss": 17.3899, "step": 46271 }, { "epoch": 0.8458149779735683, "grad_norm": 5.1719726912907085, "learning_rate": 6.106699758798163e-07, "loss": 17.0134, "step": 46272 }, { "epoch": 0.8458332571700148, "grad_norm": 6.1514826925894015, "learning_rate": 6.105282217712721e-07, "loss": 17.4027, "step": 46273 }, { "epoch": 0.8458515363664614, "grad_norm": 6.4343348469147745, "learning_rate": 6.103864830474437e-07, "loss": 17.5546, "step": 46274 }, { "epoch": 0.8458698155629079, "grad_norm": 6.624923680295102, "learning_rate": 6.102447597088312e-07, "loss": 17.755, "step": 46275 }, { "epoch": 0.8458880947593543, "grad_norm": 6.903906293201054, "learning_rate": 6.101030517559304e-07, "loss": 17.7481, "step": 46276 }, { "epoch": 0.8459063739558009, "grad_norm": 5.611143966228213, "learning_rate": 6.099613591892356e-07, "loss": 17.0727, "step": 46277 }, { "epoch": 0.8459246531522474, "grad_norm": 5.3482797109716556, "learning_rate": 6.098196820092466e-07, "loss": 17.0032, "step": 46278 }, { "epoch": 0.845942932348694, "grad_norm": 4.5761837730439865, "learning_rate": 6.096780202164576e-07, "loss": 16.6603, "step": 46279 }, { "epoch": 0.8459612115451405, "grad_norm": 7.183339011075379, "learning_rate": 6.095363738113664e-07, "loss": 17.5151, "step": 46280 }, { "epoch": 0.845979490741587, "grad_norm": 7.015697501555848, "learning_rate": 6.093947427944702e-07, "loss": 17.7362, "step": 46281 }, { "epoch": 0.8459977699380336, "grad_norm": 6.631575299210763, "learning_rate": 6.092531271662633e-07, "loss": 17.5329, "step": 46282 }, { "epoch": 0.84601604913448, "grad_norm": 6.346025729201199, "learning_rate": 6.091115269272441e-07, "loss": 17.5469, "step": 46283 }, { "epoch": 0.8460343283309266, "grad_norm": 4.835736837934786, "learning_rate": 6.089699420779082e-07, "loss": 16.8358, "step": 46284 }, { "epoch": 0.8460526075273731, "grad_norm": 6.9568380623702994, "learning_rate": 6.088283726187505e-07, "loss": 17.6311, "step": 46285 }, { "epoch": 0.8460708867238196, "grad_norm": 6.4744306089301595, "learning_rate": 6.086868185502693e-07, "loss": 17.6109, "step": 46286 }, { "epoch": 0.8460891659202662, "grad_norm": 5.971155013580041, "learning_rate": 6.085452798729597e-07, "loss": 17.221, "step": 46287 }, { "epoch": 0.8461074451167127, "grad_norm": 7.152649804611073, "learning_rate": 6.084037565873169e-07, "loss": 17.7243, "step": 46288 }, { "epoch": 0.8461257243131592, "grad_norm": 5.624663080917239, "learning_rate": 6.082622486938377e-07, "loss": 17.3206, "step": 46289 }, { "epoch": 0.8461440035096057, "grad_norm": 5.422769001696061, "learning_rate": 6.081207561930197e-07, "loss": 17.0452, "step": 46290 }, { "epoch": 0.8461622827060522, "grad_norm": 5.230441642391372, "learning_rate": 6.079792790853556e-07, "loss": 16.9092, "step": 46291 }, { "epoch": 0.8461805619024988, "grad_norm": 7.159132549536139, "learning_rate": 6.078378173713445e-07, "loss": 17.4463, "step": 46292 }, { "epoch": 0.8461988410989453, "grad_norm": 6.0059585444605075, "learning_rate": 6.076963710514794e-07, "loss": 17.3186, "step": 46293 }, { "epoch": 0.8462171202953919, "grad_norm": 8.17293847480371, "learning_rate": 6.075549401262587e-07, "loss": 17.0932, "step": 46294 }, { "epoch": 0.8462353994918383, "grad_norm": 6.314717064912688, "learning_rate": 6.074135245961766e-07, "loss": 17.4367, "step": 46295 }, { "epoch": 0.8462536786882848, "grad_norm": 6.384293230653199, "learning_rate": 6.07272124461728e-07, "loss": 17.2651, "step": 46296 }, { "epoch": 0.8462719578847314, "grad_norm": 5.520039622116213, "learning_rate": 6.071307397234105e-07, "loss": 17.0201, "step": 46297 }, { "epoch": 0.8462902370811779, "grad_norm": 7.358994410827307, "learning_rate": 6.069893703817176e-07, "loss": 17.7918, "step": 46298 }, { "epoch": 0.8463085162776245, "grad_norm": 5.282289471768526, "learning_rate": 6.068480164371454e-07, "loss": 17.0397, "step": 46299 }, { "epoch": 0.846326795474071, "grad_norm": 6.965501349359728, "learning_rate": 6.067066778901909e-07, "loss": 17.3915, "step": 46300 }, { "epoch": 0.8463450746705174, "grad_norm": 5.848951376668912, "learning_rate": 6.065653547413481e-07, "loss": 17.0564, "step": 46301 }, { "epoch": 0.846363353866964, "grad_norm": 7.741437018224837, "learning_rate": 6.064240469911115e-07, "loss": 17.9294, "step": 46302 }, { "epoch": 0.8463816330634105, "grad_norm": 7.3193091308583105, "learning_rate": 6.062827546399785e-07, "loss": 17.7172, "step": 46303 }, { "epoch": 0.846399912259857, "grad_norm": 7.592453227399713, "learning_rate": 6.061414776884433e-07, "loss": 17.7893, "step": 46304 }, { "epoch": 0.8464181914563036, "grad_norm": 7.9581706895268605, "learning_rate": 6.060002161369999e-07, "loss": 17.476, "step": 46305 }, { "epoch": 0.8464364706527501, "grad_norm": 5.435700874146918, "learning_rate": 6.058589699861439e-07, "loss": 17.1618, "step": 46306 }, { "epoch": 0.8464547498491967, "grad_norm": 6.295916674452893, "learning_rate": 6.057177392363728e-07, "loss": 17.4732, "step": 46307 }, { "epoch": 0.8464730290456431, "grad_norm": 6.043602661085739, "learning_rate": 6.055765238881784e-07, "loss": 16.8115, "step": 46308 }, { "epoch": 0.8464913082420896, "grad_norm": 6.4479745976550475, "learning_rate": 6.054353239420574e-07, "loss": 17.3846, "step": 46309 }, { "epoch": 0.8465095874385362, "grad_norm": 6.8937543613436265, "learning_rate": 6.052941393985035e-07, "loss": 17.4989, "step": 46310 }, { "epoch": 0.8465278666349827, "grad_norm": 6.122266320002467, "learning_rate": 6.051529702580139e-07, "loss": 17.1129, "step": 46311 }, { "epoch": 0.8465461458314293, "grad_norm": 5.728269941825321, "learning_rate": 6.050118165210811e-07, "loss": 17.2489, "step": 46312 }, { "epoch": 0.8465644250278758, "grad_norm": 5.319628945184635, "learning_rate": 6.048706781881997e-07, "loss": 16.8642, "step": 46313 }, { "epoch": 0.8465827042243222, "grad_norm": 5.825648561523089, "learning_rate": 6.047295552598664e-07, "loss": 17.3898, "step": 46314 }, { "epoch": 0.8466009834207688, "grad_norm": 5.487442886869097, "learning_rate": 6.045884477365732e-07, "loss": 16.9657, "step": 46315 }, { "epoch": 0.8466192626172153, "grad_norm": 5.932492134336607, "learning_rate": 6.044473556188163e-07, "loss": 17.1704, "step": 46316 }, { "epoch": 0.8466375418136619, "grad_norm": 6.559125643317827, "learning_rate": 6.043062789070909e-07, "loss": 17.244, "step": 46317 }, { "epoch": 0.8466558210101084, "grad_norm": 6.703497353694614, "learning_rate": 6.041652176018908e-07, "loss": 17.5656, "step": 46318 }, { "epoch": 0.8466741002065549, "grad_norm": 6.215486614652694, "learning_rate": 6.040241717037088e-07, "loss": 17.1439, "step": 46319 }, { "epoch": 0.8466923794030015, "grad_norm": 7.588956348910639, "learning_rate": 6.038831412130419e-07, "loss": 17.6481, "step": 46320 }, { "epoch": 0.8467106585994479, "grad_norm": 6.931671161659342, "learning_rate": 6.037421261303828e-07, "loss": 17.5502, "step": 46321 }, { "epoch": 0.8467289377958945, "grad_norm": 6.1048084426278235, "learning_rate": 6.036011264562253e-07, "loss": 17.2462, "step": 46322 }, { "epoch": 0.846747216992341, "grad_norm": 6.354549082959889, "learning_rate": 6.034601421910652e-07, "loss": 17.1349, "step": 46323 }, { "epoch": 0.8467654961887875, "grad_norm": 6.128991421948812, "learning_rate": 6.033191733353949e-07, "loss": 17.196, "step": 46324 }, { "epoch": 0.8467837753852341, "grad_norm": 6.885434455002509, "learning_rate": 6.031782198897096e-07, "loss": 17.6174, "step": 46325 }, { "epoch": 0.8468020545816806, "grad_norm": 6.671063315117276, "learning_rate": 6.030372818545038e-07, "loss": 17.3889, "step": 46326 }, { "epoch": 0.8468203337781272, "grad_norm": 5.636329121488959, "learning_rate": 6.0289635923027e-07, "loss": 17.2786, "step": 46327 }, { "epoch": 0.8468386129745736, "grad_norm": 5.8329279652088575, "learning_rate": 6.027554520175039e-07, "loss": 17.1025, "step": 46328 }, { "epoch": 0.8468568921710201, "grad_norm": 6.015754390968082, "learning_rate": 6.026145602166978e-07, "loss": 16.866, "step": 46329 }, { "epoch": 0.8468751713674667, "grad_norm": 7.122473060133786, "learning_rate": 6.024736838283457e-07, "loss": 17.7212, "step": 46330 }, { "epoch": 0.8468934505639132, "grad_norm": 5.275886004835771, "learning_rate": 6.023328228529424e-07, "loss": 16.9551, "step": 46331 }, { "epoch": 0.8469117297603598, "grad_norm": 6.959663556534134, "learning_rate": 6.021919772909813e-07, "loss": 17.4387, "step": 46332 }, { "epoch": 0.8469300089568063, "grad_norm": 6.170637455361342, "learning_rate": 6.020511471429541e-07, "loss": 17.3529, "step": 46333 }, { "epoch": 0.8469482881532527, "grad_norm": 6.260858700505903, "learning_rate": 6.019103324093567e-07, "loss": 17.4799, "step": 46334 }, { "epoch": 0.8469665673496993, "grad_norm": 5.418211422308829, "learning_rate": 6.017695330906825e-07, "loss": 17.3481, "step": 46335 }, { "epoch": 0.8469848465461458, "grad_norm": 6.1353001866038275, "learning_rate": 6.016287491874234e-07, "loss": 17.3465, "step": 46336 }, { "epoch": 0.8470031257425924, "grad_norm": 8.266343451280841, "learning_rate": 6.014879807000751e-07, "loss": 18.2085, "step": 46337 }, { "epoch": 0.8470214049390389, "grad_norm": 6.31842594733319, "learning_rate": 6.013472276291288e-07, "loss": 17.3233, "step": 46338 }, { "epoch": 0.8470396841354854, "grad_norm": 6.14039457334629, "learning_rate": 6.012064899750791e-07, "loss": 17.4554, "step": 46339 }, { "epoch": 0.847057963331932, "grad_norm": 4.8191443264014024, "learning_rate": 6.010657677384197e-07, "loss": 16.8793, "step": 46340 }, { "epoch": 0.8470762425283784, "grad_norm": 5.884884497608376, "learning_rate": 6.009250609196416e-07, "loss": 17.134, "step": 46341 }, { "epoch": 0.847094521724825, "grad_norm": 6.353520664153623, "learning_rate": 6.007843695192406e-07, "loss": 17.406, "step": 46342 }, { "epoch": 0.8471128009212715, "grad_norm": 6.8201568759699995, "learning_rate": 6.006436935377081e-07, "loss": 17.2351, "step": 46343 }, { "epoch": 0.847131080117718, "grad_norm": 6.649127998209189, "learning_rate": 6.005030329755368e-07, "loss": 17.3289, "step": 46344 }, { "epoch": 0.8471493593141646, "grad_norm": 5.51371558698571, "learning_rate": 6.003623878332221e-07, "loss": 17.2071, "step": 46345 }, { "epoch": 0.847167638510611, "grad_norm": 6.891396718385393, "learning_rate": 6.002217581112557e-07, "loss": 17.5876, "step": 46346 }, { "epoch": 0.8471859177070576, "grad_norm": 7.528489574794413, "learning_rate": 6.00081143810129e-07, "loss": 17.6795, "step": 46347 }, { "epoch": 0.8472041969035041, "grad_norm": 6.818146256306783, "learning_rate": 5.999405449303369e-07, "loss": 17.6725, "step": 46348 }, { "epoch": 0.8472224760999506, "grad_norm": 6.056693358739807, "learning_rate": 5.997999614723716e-07, "loss": 17.2334, "step": 46349 }, { "epoch": 0.8472407552963972, "grad_norm": 5.494852596643855, "learning_rate": 5.996593934367245e-07, "loss": 16.8116, "step": 46350 }, { "epoch": 0.8472590344928437, "grad_norm": 7.370290039789291, "learning_rate": 5.9951884082389e-07, "loss": 17.926, "step": 46351 }, { "epoch": 0.8472773136892903, "grad_norm": 6.041705430744679, "learning_rate": 5.993783036343598e-07, "loss": 17.0278, "step": 46352 }, { "epoch": 0.8472955928857367, "grad_norm": 7.04089502439454, "learning_rate": 5.992377818686262e-07, "loss": 17.3441, "step": 46353 }, { "epoch": 0.8473138720821832, "grad_norm": 5.342483827638675, "learning_rate": 5.99097275527184e-07, "loss": 16.8806, "step": 46354 }, { "epoch": 0.8473321512786298, "grad_norm": 4.701292711219627, "learning_rate": 5.989567846105221e-07, "loss": 16.7601, "step": 46355 }, { "epoch": 0.8473504304750763, "grad_norm": 6.669324290199927, "learning_rate": 5.988163091191362e-07, "loss": 17.7224, "step": 46356 }, { "epoch": 0.8473687096715229, "grad_norm": 5.323124877111008, "learning_rate": 5.986758490535172e-07, "loss": 17.0224, "step": 46357 }, { "epoch": 0.8473869888679694, "grad_norm": 6.683806245845221, "learning_rate": 5.985354044141567e-07, "loss": 17.9062, "step": 46358 }, { "epoch": 0.8474052680644159, "grad_norm": 4.991596911934842, "learning_rate": 5.983949752015484e-07, "loss": 16.9576, "step": 46359 }, { "epoch": 0.8474235472608624, "grad_norm": 5.925359972476347, "learning_rate": 5.982545614161839e-07, "loss": 17.2326, "step": 46360 }, { "epoch": 0.8474418264573089, "grad_norm": 9.083035061602647, "learning_rate": 5.981141630585535e-07, "loss": 17.709, "step": 46361 }, { "epoch": 0.8474601056537555, "grad_norm": 6.592611681081894, "learning_rate": 5.979737801291519e-07, "loss": 17.6629, "step": 46362 }, { "epoch": 0.847478384850202, "grad_norm": 7.266461234461848, "learning_rate": 5.978334126284707e-07, "loss": 17.8371, "step": 46363 }, { "epoch": 0.8474966640466485, "grad_norm": 7.0092297590585995, "learning_rate": 5.976930605570002e-07, "loss": 17.6161, "step": 46364 }, { "epoch": 0.8475149432430951, "grad_norm": 6.221814104305453, "learning_rate": 5.975527239152351e-07, "loss": 17.3616, "step": 46365 }, { "epoch": 0.8475332224395415, "grad_norm": 6.51469973393413, "learning_rate": 5.974124027036654e-07, "loss": 17.1302, "step": 46366 }, { "epoch": 0.8475515016359881, "grad_norm": 6.606199803505639, "learning_rate": 5.972720969227824e-07, "loss": 17.5557, "step": 46367 }, { "epoch": 0.8475697808324346, "grad_norm": 6.534108506833766, "learning_rate": 5.971318065730791e-07, "loss": 17.2393, "step": 46368 }, { "epoch": 0.8475880600288811, "grad_norm": 6.443250086731848, "learning_rate": 5.969915316550462e-07, "loss": 17.1514, "step": 46369 }, { "epoch": 0.8476063392253277, "grad_norm": 5.845178663200265, "learning_rate": 5.968512721691772e-07, "loss": 17.2875, "step": 46370 }, { "epoch": 0.8476246184217742, "grad_norm": 7.128717023043976, "learning_rate": 5.967110281159605e-07, "loss": 17.9161, "step": 46371 }, { "epoch": 0.8476428976182206, "grad_norm": 5.3125780843799975, "learning_rate": 5.965707994958902e-07, "loss": 17.0661, "step": 46372 }, { "epoch": 0.8476611768146672, "grad_norm": 5.496946658159806, "learning_rate": 5.964305863094583e-07, "loss": 17.1842, "step": 46373 }, { "epoch": 0.8476794560111137, "grad_norm": 5.215556309802917, "learning_rate": 5.962903885571547e-07, "loss": 17.0017, "step": 46374 }, { "epoch": 0.8476977352075603, "grad_norm": 6.91721421237061, "learning_rate": 5.961502062394703e-07, "loss": 17.2371, "step": 46375 }, { "epoch": 0.8477160144040068, "grad_norm": 6.877422771518247, "learning_rate": 5.96010039356898e-07, "loss": 17.4329, "step": 46376 }, { "epoch": 0.8477342936004533, "grad_norm": 6.582642657635609, "learning_rate": 5.958698879099284e-07, "loss": 17.2638, "step": 46377 }, { "epoch": 0.8477525727968999, "grad_norm": 5.690654220979101, "learning_rate": 5.957297518990518e-07, "loss": 16.985, "step": 46378 }, { "epoch": 0.8477708519933463, "grad_norm": 6.104681069863317, "learning_rate": 5.955896313247605e-07, "loss": 17.1434, "step": 46379 }, { "epoch": 0.8477891311897929, "grad_norm": 5.967024678855995, "learning_rate": 5.954495261875459e-07, "loss": 17.1302, "step": 46380 }, { "epoch": 0.8478074103862394, "grad_norm": 6.0232169206051145, "learning_rate": 5.953094364878975e-07, "loss": 16.9818, "step": 46381 }, { "epoch": 0.8478256895826859, "grad_norm": 7.866345899022223, "learning_rate": 5.951693622263089e-07, "loss": 17.4545, "step": 46382 }, { "epoch": 0.8478439687791325, "grad_norm": 9.219815180381136, "learning_rate": 5.950293034032689e-07, "loss": 17.5675, "step": 46383 }, { "epoch": 0.847862247975579, "grad_norm": 6.031383924226654, "learning_rate": 5.948892600192679e-07, "loss": 17.1003, "step": 46384 }, { "epoch": 0.8478805271720256, "grad_norm": 7.335732621820483, "learning_rate": 5.947492320747994e-07, "loss": 17.4783, "step": 46385 }, { "epoch": 0.847898806368472, "grad_norm": 5.493970699663126, "learning_rate": 5.946092195703507e-07, "loss": 17.0748, "step": 46386 }, { "epoch": 0.8479170855649185, "grad_norm": 6.015121472488012, "learning_rate": 5.944692225064164e-07, "loss": 17.1248, "step": 46387 }, { "epoch": 0.8479353647613651, "grad_norm": 5.5202912323589235, "learning_rate": 5.943292408834834e-07, "loss": 17.0063, "step": 46388 }, { "epoch": 0.8479536439578116, "grad_norm": 4.926916112038797, "learning_rate": 5.941892747020444e-07, "loss": 16.7657, "step": 46389 }, { "epoch": 0.8479719231542582, "grad_norm": 6.650095331584238, "learning_rate": 5.940493239625905e-07, "loss": 17.1949, "step": 46390 }, { "epoch": 0.8479902023507047, "grad_norm": 5.904362418600147, "learning_rate": 5.939093886656116e-07, "loss": 16.9229, "step": 46391 }, { "epoch": 0.8480084815471511, "grad_norm": 6.8994148546597, "learning_rate": 5.937694688115969e-07, "loss": 17.3427, "step": 46392 }, { "epoch": 0.8480267607435977, "grad_norm": 6.9182561687967405, "learning_rate": 5.936295644010387e-07, "loss": 17.4952, "step": 46393 }, { "epoch": 0.8480450399400442, "grad_norm": 6.3515388142464495, "learning_rate": 5.934896754344266e-07, "loss": 17.2165, "step": 46394 }, { "epoch": 0.8480633191364908, "grad_norm": 6.016300857286726, "learning_rate": 5.933498019122496e-07, "loss": 17.1179, "step": 46395 }, { "epoch": 0.8480815983329373, "grad_norm": 6.272925777865556, "learning_rate": 5.932099438350009e-07, "loss": 17.3529, "step": 46396 }, { "epoch": 0.8480998775293838, "grad_norm": 9.011275077221558, "learning_rate": 5.93070101203167e-07, "loss": 18.7222, "step": 46397 }, { "epoch": 0.8481181567258304, "grad_norm": 7.474403303009199, "learning_rate": 5.929302740172405e-07, "loss": 18.3627, "step": 46398 }, { "epoch": 0.8481364359222768, "grad_norm": 5.672623548192236, "learning_rate": 5.92790462277712e-07, "loss": 16.9733, "step": 46399 }, { "epoch": 0.8481547151187234, "grad_norm": 4.573950753541042, "learning_rate": 5.92650665985069e-07, "loss": 16.6901, "step": 46400 }, { "epoch": 0.8481729943151699, "grad_norm": 5.958232313585511, "learning_rate": 5.925108851398043e-07, "loss": 17.1402, "step": 46401 }, { "epoch": 0.8481912735116164, "grad_norm": 5.794460191533141, "learning_rate": 5.923711197424065e-07, "loss": 17.3187, "step": 46402 }, { "epoch": 0.848209552708063, "grad_norm": 5.592470966382563, "learning_rate": 5.922313697933646e-07, "loss": 17.2305, "step": 46403 }, { "epoch": 0.8482278319045095, "grad_norm": 5.441196204589625, "learning_rate": 5.920916352931699e-07, "loss": 17.155, "step": 46404 }, { "epoch": 0.848246111100956, "grad_norm": 5.756048122551537, "learning_rate": 5.919519162423121e-07, "loss": 17.3585, "step": 46405 }, { "epoch": 0.8482643902974025, "grad_norm": 6.301863160176076, "learning_rate": 5.918122126412789e-07, "loss": 17.3977, "step": 46406 }, { "epoch": 0.848282669493849, "grad_norm": 7.170749208100669, "learning_rate": 5.916725244905613e-07, "loss": 17.4318, "step": 46407 }, { "epoch": 0.8483009486902956, "grad_norm": 6.055882144233034, "learning_rate": 5.915328517906499e-07, "loss": 17.2906, "step": 46408 }, { "epoch": 0.8483192278867421, "grad_norm": 6.361851777132472, "learning_rate": 5.913931945420326e-07, "loss": 17.2318, "step": 46409 }, { "epoch": 0.8483375070831887, "grad_norm": 6.21174035163363, "learning_rate": 5.912535527452007e-07, "loss": 17.1434, "step": 46410 }, { "epoch": 0.8483557862796351, "grad_norm": 6.282627634700654, "learning_rate": 5.911139264006421e-07, "loss": 17.3252, "step": 46411 }, { "epoch": 0.8483740654760816, "grad_norm": 5.8179850193099885, "learning_rate": 5.90974315508846e-07, "loss": 17.2925, "step": 46412 }, { "epoch": 0.8483923446725282, "grad_norm": 6.056808520403459, "learning_rate": 5.90834720070303e-07, "loss": 17.385, "step": 46413 }, { "epoch": 0.8484106238689747, "grad_norm": 5.5429955013484475, "learning_rate": 5.906951400855021e-07, "loss": 17.0467, "step": 46414 }, { "epoch": 0.8484289030654213, "grad_norm": 7.008238608895542, "learning_rate": 5.905555755549308e-07, "loss": 17.4442, "step": 46415 }, { "epoch": 0.8484471822618678, "grad_norm": 6.388953059123369, "learning_rate": 5.904160264790793e-07, "loss": 17.3638, "step": 46416 }, { "epoch": 0.8484654614583143, "grad_norm": 8.052525111006387, "learning_rate": 5.902764928584376e-07, "loss": 18.2081, "step": 46417 }, { "epoch": 0.8484837406547608, "grad_norm": 8.168642189125636, "learning_rate": 5.901369746934948e-07, "loss": 17.3548, "step": 46418 }, { "epoch": 0.8485020198512073, "grad_norm": 6.09567291100862, "learning_rate": 5.89997471984739e-07, "loss": 17.1704, "step": 46419 }, { "epoch": 0.8485202990476539, "grad_norm": 7.161010364112528, "learning_rate": 5.898579847326585e-07, "loss": 17.9739, "step": 46420 }, { "epoch": 0.8485385782441004, "grad_norm": 6.098179822114521, "learning_rate": 5.897185129377447e-07, "loss": 17.1516, "step": 46421 }, { "epoch": 0.8485568574405469, "grad_norm": 6.493088278926376, "learning_rate": 5.895790566004839e-07, "loss": 17.4283, "step": 46422 }, { "epoch": 0.8485751366369935, "grad_norm": 5.547396925961946, "learning_rate": 5.894396157213655e-07, "loss": 17.092, "step": 46423 }, { "epoch": 0.84859341583344, "grad_norm": 6.320411769304832, "learning_rate": 5.893001903008794e-07, "loss": 17.6273, "step": 46424 }, { "epoch": 0.8486116950298865, "grad_norm": 7.6403013997247, "learning_rate": 5.891607803395122e-07, "loss": 17.2382, "step": 46425 }, { "epoch": 0.848629974226333, "grad_norm": 6.295960497043923, "learning_rate": 5.890213858377536e-07, "loss": 17.2225, "step": 46426 }, { "epoch": 0.8486482534227795, "grad_norm": 6.704795016119947, "learning_rate": 5.888820067960932e-07, "loss": 17.6507, "step": 46427 }, { "epoch": 0.8486665326192261, "grad_norm": 5.86477753024616, "learning_rate": 5.88742643215019e-07, "loss": 17.0535, "step": 46428 }, { "epoch": 0.8486848118156726, "grad_norm": 7.254018766084089, "learning_rate": 5.886032950950177e-07, "loss": 17.4657, "step": 46429 }, { "epoch": 0.8487030910121192, "grad_norm": 5.472630738574778, "learning_rate": 5.884639624365801e-07, "loss": 17.0612, "step": 46430 }, { "epoch": 0.8487213702085656, "grad_norm": 5.3234957006140515, "learning_rate": 5.883246452401925e-07, "loss": 17.2887, "step": 46431 }, { "epoch": 0.8487396494050121, "grad_norm": 6.775635466752095, "learning_rate": 5.881853435063456e-07, "loss": 17.1699, "step": 46432 }, { "epoch": 0.8487579286014587, "grad_norm": 6.305228253555073, "learning_rate": 5.880460572355245e-07, "loss": 17.4189, "step": 46433 }, { "epoch": 0.8487762077979052, "grad_norm": 6.81610458887254, "learning_rate": 5.879067864282206e-07, "loss": 17.4253, "step": 46434 }, { "epoch": 0.8487944869943518, "grad_norm": 6.577186352294932, "learning_rate": 5.877675310849196e-07, "loss": 17.1803, "step": 46435 }, { "epoch": 0.8488127661907983, "grad_norm": 5.481331295438781, "learning_rate": 5.876282912061115e-07, "loss": 16.9372, "step": 46436 }, { "epoch": 0.8488310453872447, "grad_norm": 7.724979672638006, "learning_rate": 5.874890667922822e-07, "loss": 17.4656, "step": 46437 }, { "epoch": 0.8488493245836913, "grad_norm": 7.60979513978912, "learning_rate": 5.873498578439219e-07, "loss": 17.5256, "step": 46438 }, { "epoch": 0.8488676037801378, "grad_norm": 4.896938990260753, "learning_rate": 5.872106643615172e-07, "loss": 16.9566, "step": 46439 }, { "epoch": 0.8488858829765843, "grad_norm": 5.155052239069299, "learning_rate": 5.870714863455551e-07, "loss": 16.7086, "step": 46440 }, { "epoch": 0.8489041621730309, "grad_norm": 6.5632283787422825, "learning_rate": 5.869323237965263e-07, "loss": 17.3985, "step": 46441 }, { "epoch": 0.8489224413694774, "grad_norm": 6.429399015781883, "learning_rate": 5.867931767149148e-07, "loss": 16.9391, "step": 46442 }, { "epoch": 0.848940720565924, "grad_norm": 5.718053456789868, "learning_rate": 5.866540451012109e-07, "loss": 17.0912, "step": 46443 }, { "epoch": 0.8489589997623704, "grad_norm": 5.835669276639728, "learning_rate": 5.865149289559025e-07, "loss": 17.3399, "step": 46444 }, { "epoch": 0.8489772789588169, "grad_norm": 6.233010088097597, "learning_rate": 5.863758282794763e-07, "loss": 17.368, "step": 46445 }, { "epoch": 0.8489955581552635, "grad_norm": 7.138305601383869, "learning_rate": 5.862367430724186e-07, "loss": 17.5605, "step": 46446 }, { "epoch": 0.84901383735171, "grad_norm": 7.118257749346135, "learning_rate": 5.860976733352191e-07, "loss": 17.9199, "step": 46447 }, { "epoch": 0.8490321165481566, "grad_norm": 5.852428254453989, "learning_rate": 5.859586190683636e-07, "loss": 17.2623, "step": 46448 }, { "epoch": 0.8490503957446031, "grad_norm": 5.2437072528089725, "learning_rate": 5.858195802723409e-07, "loss": 16.8304, "step": 46449 }, { "epoch": 0.8490686749410495, "grad_norm": 5.146959123538132, "learning_rate": 5.856805569476376e-07, "loss": 17.0491, "step": 46450 }, { "epoch": 0.8490869541374961, "grad_norm": 6.354059490765818, "learning_rate": 5.855415490947397e-07, "loss": 17.46, "step": 46451 }, { "epoch": 0.8491052333339426, "grad_norm": 7.525765252696029, "learning_rate": 5.854025567141353e-07, "loss": 17.119, "step": 46452 }, { "epoch": 0.8491235125303892, "grad_norm": 6.321571113830441, "learning_rate": 5.852635798063133e-07, "loss": 17.3466, "step": 46453 }, { "epoch": 0.8491417917268357, "grad_norm": 6.199731553655896, "learning_rate": 5.851246183717585e-07, "loss": 17.2822, "step": 46454 }, { "epoch": 0.8491600709232822, "grad_norm": 4.914314024003379, "learning_rate": 5.849856724109592e-07, "loss": 16.8386, "step": 46455 }, { "epoch": 0.8491783501197288, "grad_norm": 6.088544086587961, "learning_rate": 5.848467419244031e-07, "loss": 17.1408, "step": 46456 }, { "epoch": 0.8491966293161752, "grad_norm": 5.487261745293404, "learning_rate": 5.847078269125739e-07, "loss": 16.9586, "step": 46457 }, { "epoch": 0.8492149085126218, "grad_norm": 5.379922172402489, "learning_rate": 5.84568927375962e-07, "loss": 16.9427, "step": 46458 }, { "epoch": 0.8492331877090683, "grad_norm": 6.360154784207189, "learning_rate": 5.84430043315053e-07, "loss": 17.3034, "step": 46459 }, { "epoch": 0.8492514669055148, "grad_norm": 6.577274807477542, "learning_rate": 5.842911747303326e-07, "loss": 17.2112, "step": 46460 }, { "epoch": 0.8492697461019614, "grad_norm": 7.273471324621314, "learning_rate": 5.841523216222883e-07, "loss": 17.5092, "step": 46461 }, { "epoch": 0.8492880252984079, "grad_norm": 5.760206251390457, "learning_rate": 5.840134839914069e-07, "loss": 17.3449, "step": 46462 }, { "epoch": 0.8493063044948544, "grad_norm": 5.524885661647027, "learning_rate": 5.838746618381763e-07, "loss": 17.195, "step": 46463 }, { "epoch": 0.8493245836913009, "grad_norm": 6.625760633892066, "learning_rate": 5.837358551630812e-07, "loss": 17.1841, "step": 46464 }, { "epoch": 0.8493428628877474, "grad_norm": 5.340830294375778, "learning_rate": 5.835970639666083e-07, "loss": 17.1501, "step": 46465 }, { "epoch": 0.849361142084194, "grad_norm": 5.498583007742781, "learning_rate": 5.834582882492452e-07, "loss": 16.9214, "step": 46466 }, { "epoch": 0.8493794212806405, "grad_norm": 6.4184372600810615, "learning_rate": 5.833195280114778e-07, "loss": 17.6664, "step": 46467 }, { "epoch": 0.8493977004770871, "grad_norm": 5.998073566690932, "learning_rate": 5.831807832537906e-07, "loss": 17.2879, "step": 46468 }, { "epoch": 0.8494159796735335, "grad_norm": 6.006320600935282, "learning_rate": 5.830420539766729e-07, "loss": 17.3409, "step": 46469 }, { "epoch": 0.84943425886998, "grad_norm": 5.725488164751785, "learning_rate": 5.829033401806084e-07, "loss": 17.1711, "step": 46470 }, { "epoch": 0.8494525380664266, "grad_norm": 4.866732155727357, "learning_rate": 5.827646418660848e-07, "loss": 16.7988, "step": 46471 }, { "epoch": 0.8494708172628731, "grad_norm": 7.920807674224754, "learning_rate": 5.826259590335881e-07, "loss": 17.5876, "step": 46472 }, { "epoch": 0.8494890964593197, "grad_norm": 6.758893052883143, "learning_rate": 5.824872916836049e-07, "loss": 17.6213, "step": 46473 }, { "epoch": 0.8495073756557662, "grad_norm": 6.460740019035271, "learning_rate": 5.823486398166189e-07, "loss": 17.2833, "step": 46474 }, { "epoch": 0.8495256548522127, "grad_norm": 6.357519206055143, "learning_rate": 5.822100034331185e-07, "loss": 17.5444, "step": 46475 }, { "epoch": 0.8495439340486592, "grad_norm": 7.097078932855679, "learning_rate": 5.820713825335889e-07, "loss": 17.8844, "step": 46476 }, { "epoch": 0.8495622132451057, "grad_norm": 5.266039300759257, "learning_rate": 5.819327771185146e-07, "loss": 17.0446, "step": 46477 }, { "epoch": 0.8495804924415523, "grad_norm": 5.8434641422084415, "learning_rate": 5.817941871883837e-07, "loss": 17.2514, "step": 46478 }, { "epoch": 0.8495987716379988, "grad_norm": 6.57783923937061, "learning_rate": 5.816556127436796e-07, "loss": 17.5636, "step": 46479 }, { "epoch": 0.8496170508344453, "grad_norm": 6.6074915611822185, "learning_rate": 5.815170537848891e-07, "loss": 17.6714, "step": 46480 }, { "epoch": 0.8496353300308919, "grad_norm": 4.538393762475987, "learning_rate": 5.813785103124992e-07, "loss": 16.8303, "step": 46481 }, { "epoch": 0.8496536092273383, "grad_norm": 6.55200518132397, "learning_rate": 5.812399823269932e-07, "loss": 17.1969, "step": 46482 }, { "epoch": 0.8496718884237849, "grad_norm": 6.383983594121791, "learning_rate": 5.811014698288587e-07, "loss": 17.3645, "step": 46483 }, { "epoch": 0.8496901676202314, "grad_norm": 6.667295579097535, "learning_rate": 5.809629728185795e-07, "loss": 17.5172, "step": 46484 }, { "epoch": 0.8497084468166779, "grad_norm": 7.509837709363606, "learning_rate": 5.80824491296641e-07, "loss": 18.1668, "step": 46485 }, { "epoch": 0.8497267260131245, "grad_norm": 6.266716618484736, "learning_rate": 5.806860252635305e-07, "loss": 17.2398, "step": 46486 }, { "epoch": 0.849745005209571, "grad_norm": 5.573131175349652, "learning_rate": 5.805475747197314e-07, "loss": 16.9353, "step": 46487 }, { "epoch": 0.8497632844060176, "grad_norm": 5.199098347581794, "learning_rate": 5.804091396657291e-07, "loss": 16.974, "step": 46488 }, { "epoch": 0.849781563602464, "grad_norm": 6.010760227401883, "learning_rate": 5.802707201020086e-07, "loss": 17.3366, "step": 46489 }, { "epoch": 0.8497998427989105, "grad_norm": 6.069071995635964, "learning_rate": 5.801323160290573e-07, "loss": 17.6049, "step": 46490 }, { "epoch": 0.8498181219953571, "grad_norm": 6.38609610953082, "learning_rate": 5.79993927447357e-07, "loss": 17.2882, "step": 46491 }, { "epoch": 0.8498364011918036, "grad_norm": 7.109690494476676, "learning_rate": 5.798555543573964e-07, "loss": 17.6743, "step": 46492 }, { "epoch": 0.8498546803882502, "grad_norm": 5.705803682055039, "learning_rate": 5.797171967596566e-07, "loss": 17.0656, "step": 46493 }, { "epoch": 0.8498729595846967, "grad_norm": 5.645522590966912, "learning_rate": 5.79578854654626e-07, "loss": 17.0242, "step": 46494 }, { "epoch": 0.8498912387811431, "grad_norm": 6.410530296599759, "learning_rate": 5.794405280427878e-07, "loss": 17.5106, "step": 46495 }, { "epoch": 0.8499095179775897, "grad_norm": 7.941364805646126, "learning_rate": 5.793022169246259e-07, "loss": 17.6513, "step": 46496 }, { "epoch": 0.8499277971740362, "grad_norm": 8.126976688279017, "learning_rate": 5.791639213006272e-07, "loss": 18.1083, "step": 46497 }, { "epoch": 0.8499460763704828, "grad_norm": 7.224833503711694, "learning_rate": 5.790256411712741e-07, "loss": 17.6534, "step": 46498 }, { "epoch": 0.8499643555669293, "grad_norm": 6.062045720665319, "learning_rate": 5.788873765370528e-07, "loss": 17.1303, "step": 46499 }, { "epoch": 0.8499826347633758, "grad_norm": 5.843997139875117, "learning_rate": 5.787491273984486e-07, "loss": 17.1062, "step": 46500 }, { "epoch": 0.8500009139598224, "grad_norm": 6.140103423788349, "learning_rate": 5.786108937559448e-07, "loss": 17.1128, "step": 46501 }, { "epoch": 0.8500191931562688, "grad_norm": 4.893189578061456, "learning_rate": 5.784726756100251e-07, "loss": 16.93, "step": 46502 }, { "epoch": 0.8500374723527154, "grad_norm": 6.490070368131528, "learning_rate": 5.783344729611762e-07, "loss": 17.645, "step": 46503 }, { "epoch": 0.8500557515491619, "grad_norm": 6.785520954273158, "learning_rate": 5.781962858098817e-07, "loss": 17.5541, "step": 46504 }, { "epoch": 0.8500740307456084, "grad_norm": 8.170494727807805, "learning_rate": 5.780581141566238e-07, "loss": 18.1624, "step": 46505 }, { "epoch": 0.850092309942055, "grad_norm": 6.743353807917443, "learning_rate": 5.779199580018884e-07, "loss": 17.6318, "step": 46506 }, { "epoch": 0.8501105891385015, "grad_norm": 5.828341628495757, "learning_rate": 5.777818173461602e-07, "loss": 17.0182, "step": 46507 }, { "epoch": 0.8501288683349479, "grad_norm": 4.94387741671143, "learning_rate": 5.776436921899242e-07, "loss": 16.8523, "step": 46508 }, { "epoch": 0.8501471475313945, "grad_norm": 5.551381930159559, "learning_rate": 5.775055825336628e-07, "loss": 17.1279, "step": 46509 }, { "epoch": 0.850165426727841, "grad_norm": 6.667373857255122, "learning_rate": 5.773674883778602e-07, "loss": 17.265, "step": 46510 }, { "epoch": 0.8501837059242876, "grad_norm": 6.246142950634857, "learning_rate": 5.772294097230013e-07, "loss": 17.3031, "step": 46511 }, { "epoch": 0.8502019851207341, "grad_norm": 5.673126143130397, "learning_rate": 5.770913465695693e-07, "loss": 16.9143, "step": 46512 }, { "epoch": 0.8502202643171806, "grad_norm": 5.068080085729721, "learning_rate": 5.769532989180476e-07, "loss": 16.8284, "step": 46513 }, { "epoch": 0.8502385435136272, "grad_norm": 6.352615930522847, "learning_rate": 5.768152667689219e-07, "loss": 17.2586, "step": 46514 }, { "epoch": 0.8502568227100736, "grad_norm": 5.7144742500311585, "learning_rate": 5.766772501226742e-07, "loss": 17.0936, "step": 46515 }, { "epoch": 0.8502751019065202, "grad_norm": 6.509211107184963, "learning_rate": 5.765392489797883e-07, "loss": 17.4229, "step": 46516 }, { "epoch": 0.8502933811029667, "grad_norm": 6.309439026037851, "learning_rate": 5.764012633407495e-07, "loss": 17.1889, "step": 46517 }, { "epoch": 0.8503116602994132, "grad_norm": 7.496368582859246, "learning_rate": 5.762632932060408e-07, "loss": 17.4819, "step": 46518 }, { "epoch": 0.8503299394958598, "grad_norm": 5.456574212816981, "learning_rate": 5.76125338576144e-07, "loss": 16.7633, "step": 46519 }, { "epoch": 0.8503482186923063, "grad_norm": 6.140871161809901, "learning_rate": 5.759873994515452e-07, "loss": 17.0548, "step": 46520 }, { "epoch": 0.8503664978887528, "grad_norm": 4.4380516047678364, "learning_rate": 5.758494758327265e-07, "loss": 16.6675, "step": 46521 }, { "epoch": 0.8503847770851993, "grad_norm": 6.53625595464203, "learning_rate": 5.757115677201708e-07, "loss": 17.4387, "step": 46522 }, { "epoch": 0.8504030562816458, "grad_norm": 6.442574733566056, "learning_rate": 5.755736751143632e-07, "loss": 17.4353, "step": 46523 }, { "epoch": 0.8504213354780924, "grad_norm": 5.64078599794631, "learning_rate": 5.754357980157843e-07, "loss": 17.1059, "step": 46524 }, { "epoch": 0.8504396146745389, "grad_norm": 4.273589434193322, "learning_rate": 5.752979364249195e-07, "loss": 16.5465, "step": 46525 }, { "epoch": 0.8504578938709855, "grad_norm": 5.621375761411744, "learning_rate": 5.751600903422522e-07, "loss": 16.8837, "step": 46526 }, { "epoch": 0.850476173067432, "grad_norm": 5.7662921469919635, "learning_rate": 5.750222597682642e-07, "loss": 17.1002, "step": 46527 }, { "epoch": 0.8504944522638784, "grad_norm": 6.625465807770977, "learning_rate": 5.748844447034396e-07, "loss": 17.4645, "step": 46528 }, { "epoch": 0.850512731460325, "grad_norm": 5.599027753452406, "learning_rate": 5.747466451482614e-07, "loss": 16.9987, "step": 46529 }, { "epoch": 0.8505310106567715, "grad_norm": 5.959213056444545, "learning_rate": 5.746088611032113e-07, "loss": 17.1211, "step": 46530 }, { "epoch": 0.8505492898532181, "grad_norm": 4.778927277579588, "learning_rate": 5.744710925687741e-07, "loss": 16.7639, "step": 46531 }, { "epoch": 0.8505675690496646, "grad_norm": 7.4960846828243195, "learning_rate": 5.743333395454315e-07, "loss": 17.7777, "step": 46532 }, { "epoch": 0.850585848246111, "grad_norm": 5.758363268351093, "learning_rate": 5.741956020336653e-07, "loss": 16.9017, "step": 46533 }, { "epoch": 0.8506041274425576, "grad_norm": 5.157401355923851, "learning_rate": 5.740578800339597e-07, "loss": 16.9856, "step": 46534 }, { "epoch": 0.8506224066390041, "grad_norm": 6.02435474790367, "learning_rate": 5.739201735467987e-07, "loss": 16.941, "step": 46535 }, { "epoch": 0.8506406858354507, "grad_norm": 5.419377360491059, "learning_rate": 5.737824825726618e-07, "loss": 16.9381, "step": 46536 }, { "epoch": 0.8506589650318972, "grad_norm": 6.072037995115286, "learning_rate": 5.736448071120343e-07, "loss": 17.1341, "step": 46537 }, { "epoch": 0.8506772442283437, "grad_norm": 5.634746857184878, "learning_rate": 5.735071471653964e-07, "loss": 16.9669, "step": 46538 }, { "epoch": 0.8506955234247903, "grad_norm": 6.396787117792248, "learning_rate": 5.733695027332331e-07, "loss": 17.3156, "step": 46539 }, { "epoch": 0.8507138026212367, "grad_norm": 6.144677915506893, "learning_rate": 5.732318738160259e-07, "loss": 17.2006, "step": 46540 }, { "epoch": 0.8507320818176833, "grad_norm": 7.709589798344826, "learning_rate": 5.730942604142553e-07, "loss": 17.3225, "step": 46541 }, { "epoch": 0.8507503610141298, "grad_norm": 7.4054904734164895, "learning_rate": 5.729566625284067e-07, "loss": 17.57, "step": 46542 }, { "epoch": 0.8507686402105763, "grad_norm": 6.435446453498256, "learning_rate": 5.728190801589595e-07, "loss": 17.4143, "step": 46543 }, { "epoch": 0.8507869194070229, "grad_norm": 5.59464505363826, "learning_rate": 5.726815133063973e-07, "loss": 17.0829, "step": 46544 }, { "epoch": 0.8508051986034694, "grad_norm": 7.809900538259386, "learning_rate": 5.725439619712031e-07, "loss": 17.6353, "step": 46545 }, { "epoch": 0.850823477799916, "grad_norm": 5.896517776868149, "learning_rate": 5.724064261538581e-07, "loss": 17.2561, "step": 46546 }, { "epoch": 0.8508417569963624, "grad_norm": 5.374370180654381, "learning_rate": 5.722689058548436e-07, "loss": 17.3396, "step": 46547 }, { "epoch": 0.8508600361928089, "grad_norm": 6.552874312656986, "learning_rate": 5.721314010746432e-07, "loss": 17.4047, "step": 46548 }, { "epoch": 0.8508783153892555, "grad_norm": 5.379077802901093, "learning_rate": 5.71993911813738e-07, "loss": 16.9834, "step": 46549 }, { "epoch": 0.850896594585702, "grad_norm": 6.395894922020665, "learning_rate": 5.718564380726089e-07, "loss": 17.3453, "step": 46550 }, { "epoch": 0.8509148737821486, "grad_norm": 5.540617590840567, "learning_rate": 5.717189798517398e-07, "loss": 16.7889, "step": 46551 }, { "epoch": 0.8509331529785951, "grad_norm": 5.2726004774684165, "learning_rate": 5.715815371516103e-07, "loss": 16.8895, "step": 46552 }, { "epoch": 0.8509514321750415, "grad_norm": 6.120190737538496, "learning_rate": 5.714441099727036e-07, "loss": 17.2763, "step": 46553 }, { "epoch": 0.8509697113714881, "grad_norm": 6.0384535519008535, "learning_rate": 5.713066983155019e-07, "loss": 17.1574, "step": 46554 }, { "epoch": 0.8509879905679346, "grad_norm": 6.014910105250535, "learning_rate": 5.711693021804843e-07, "loss": 17.0953, "step": 46555 }, { "epoch": 0.8510062697643812, "grad_norm": 10.722835513570663, "learning_rate": 5.710319215681354e-07, "loss": 17.7981, "step": 46556 }, { "epoch": 0.8510245489608277, "grad_norm": 7.95338093525949, "learning_rate": 5.708945564789354e-07, "loss": 17.696, "step": 46557 }, { "epoch": 0.8510428281572742, "grad_norm": 9.266652448678222, "learning_rate": 5.707572069133649e-07, "loss": 18.8487, "step": 46558 }, { "epoch": 0.8510611073537208, "grad_norm": 5.15880694826849, "learning_rate": 5.706198728719065e-07, "loss": 16.9908, "step": 46559 }, { "epoch": 0.8510793865501672, "grad_norm": 5.174219509881794, "learning_rate": 5.704825543550402e-07, "loss": 17.0341, "step": 46560 }, { "epoch": 0.8510976657466138, "grad_norm": 5.342770001678702, "learning_rate": 5.70345251363249e-07, "loss": 17.1315, "step": 46561 }, { "epoch": 0.8511159449430603, "grad_norm": 7.176495979045347, "learning_rate": 5.702079638970126e-07, "loss": 17.3994, "step": 46562 }, { "epoch": 0.8511342241395068, "grad_norm": 7.658064377138771, "learning_rate": 5.700706919568139e-07, "loss": 17.8525, "step": 46563 }, { "epoch": 0.8511525033359534, "grad_norm": 5.24298225254374, "learning_rate": 5.699334355431318e-07, "loss": 17.057, "step": 46564 }, { "epoch": 0.8511707825323999, "grad_norm": 5.655585225948584, "learning_rate": 5.697961946564501e-07, "loss": 17.0331, "step": 46565 }, { "epoch": 0.8511890617288465, "grad_norm": 6.015057451751865, "learning_rate": 5.696589692972476e-07, "loss": 17.2337, "step": 46566 }, { "epoch": 0.8512073409252929, "grad_norm": 7.800473615812808, "learning_rate": 5.695217594660052e-07, "loss": 17.5181, "step": 46567 }, { "epoch": 0.8512256201217394, "grad_norm": 5.726614121187882, "learning_rate": 5.693845651632057e-07, "loss": 17.1826, "step": 46568 }, { "epoch": 0.851243899318186, "grad_norm": 5.8815392176340975, "learning_rate": 5.692473863893277e-07, "loss": 17.0699, "step": 46569 }, { "epoch": 0.8512621785146325, "grad_norm": 5.429417336889552, "learning_rate": 5.691102231448537e-07, "loss": 17.0501, "step": 46570 }, { "epoch": 0.8512804577110791, "grad_norm": 6.337647056802513, "learning_rate": 5.689730754302642e-07, "loss": 17.348, "step": 46571 }, { "epoch": 0.8512987369075256, "grad_norm": 7.163114874499577, "learning_rate": 5.688359432460388e-07, "loss": 17.5223, "step": 46572 }, { "epoch": 0.851317016103972, "grad_norm": 4.99504622291998, "learning_rate": 5.686988265926597e-07, "loss": 16.852, "step": 46573 }, { "epoch": 0.8513352953004186, "grad_norm": 7.5819354690165826, "learning_rate": 5.685617254706072e-07, "loss": 17.5745, "step": 46574 }, { "epoch": 0.8513535744968651, "grad_norm": 6.196668024765139, "learning_rate": 5.684246398803594e-07, "loss": 17.323, "step": 46575 }, { "epoch": 0.8513718536933116, "grad_norm": 7.421509889502197, "learning_rate": 5.682875698224005e-07, "loss": 17.8181, "step": 46576 }, { "epoch": 0.8513901328897582, "grad_norm": 6.0841085173241165, "learning_rate": 5.681505152972089e-07, "loss": 16.869, "step": 46577 }, { "epoch": 0.8514084120862047, "grad_norm": 5.470739859282277, "learning_rate": 5.680134763052642e-07, "loss": 17.0034, "step": 46578 }, { "epoch": 0.8514266912826512, "grad_norm": 7.6389591835965645, "learning_rate": 5.678764528470476e-07, "loss": 18.0163, "step": 46579 }, { "epoch": 0.8514449704790977, "grad_norm": 6.805691894681372, "learning_rate": 5.6773944492304e-07, "loss": 17.444, "step": 46580 }, { "epoch": 0.8514632496755442, "grad_norm": 6.386537477999374, "learning_rate": 5.676024525337209e-07, "loss": 17.3017, "step": 46581 }, { "epoch": 0.8514815288719908, "grad_norm": 6.178813735564948, "learning_rate": 5.674654756795711e-07, "loss": 17.2312, "step": 46582 }, { "epoch": 0.8514998080684373, "grad_norm": 10.231175448913707, "learning_rate": 5.673285143610702e-07, "loss": 18.2922, "step": 46583 }, { "epoch": 0.8515180872648839, "grad_norm": 5.581212155878637, "learning_rate": 5.671915685786972e-07, "loss": 17.0567, "step": 46584 }, { "epoch": 0.8515363664613304, "grad_norm": 5.196637456582281, "learning_rate": 5.67054638332934e-07, "loss": 16.946, "step": 46585 }, { "epoch": 0.8515546456577768, "grad_norm": 5.645006484328127, "learning_rate": 5.669177236242585e-07, "loss": 17.1339, "step": 46586 }, { "epoch": 0.8515729248542234, "grad_norm": 7.160674425394652, "learning_rate": 5.667808244531531e-07, "loss": 17.3756, "step": 46587 }, { "epoch": 0.8515912040506699, "grad_norm": 5.640478484312887, "learning_rate": 5.666439408200947e-07, "loss": 17.1965, "step": 46588 }, { "epoch": 0.8516094832471165, "grad_norm": 5.985735271857981, "learning_rate": 5.665070727255651e-07, "loss": 17.1654, "step": 46589 }, { "epoch": 0.851627762443563, "grad_norm": 5.191723446975067, "learning_rate": 5.663702201700444e-07, "loss": 16.8709, "step": 46590 }, { "epoch": 0.8516460416400095, "grad_norm": 6.229883319463303, "learning_rate": 5.662333831540112e-07, "loss": 17.5947, "step": 46591 }, { "epoch": 0.851664320836456, "grad_norm": 10.04155570390042, "learning_rate": 5.66096561677944e-07, "loss": 18.3318, "step": 46592 }, { "epoch": 0.8516826000329025, "grad_norm": 5.871781749425598, "learning_rate": 5.659597557423253e-07, "loss": 17.2225, "step": 46593 }, { "epoch": 0.8517008792293491, "grad_norm": 6.332355479399, "learning_rate": 5.658229653476322e-07, "loss": 17.2441, "step": 46594 }, { "epoch": 0.8517191584257956, "grad_norm": 6.829897847166803, "learning_rate": 5.656861904943439e-07, "loss": 17.4599, "step": 46595 }, { "epoch": 0.8517374376222421, "grad_norm": 6.898375980897286, "learning_rate": 5.655494311829418e-07, "loss": 17.5637, "step": 46596 }, { "epoch": 0.8517557168186887, "grad_norm": 5.622504296319692, "learning_rate": 5.654126874139032e-07, "loss": 17.2116, "step": 46597 }, { "epoch": 0.8517739960151351, "grad_norm": 5.394365051847803, "learning_rate": 5.652759591877088e-07, "loss": 16.986, "step": 46598 }, { "epoch": 0.8517922752115817, "grad_norm": 5.5297557943561095, "learning_rate": 5.651392465048377e-07, "loss": 17.0633, "step": 46599 }, { "epoch": 0.8518105544080282, "grad_norm": 5.7467115698406905, "learning_rate": 5.650025493657679e-07, "loss": 17.3091, "step": 46600 }, { "epoch": 0.8518288336044747, "grad_norm": 5.484037560889361, "learning_rate": 5.6486586777098e-07, "loss": 17.1746, "step": 46601 }, { "epoch": 0.8518471128009213, "grad_norm": 6.974686392491599, "learning_rate": 5.647292017209527e-07, "loss": 17.4083, "step": 46602 }, { "epoch": 0.8518653919973678, "grad_norm": 5.130930552434108, "learning_rate": 5.645925512161637e-07, "loss": 17.1081, "step": 46603 }, { "epoch": 0.8518836711938144, "grad_norm": 7.258144170668445, "learning_rate": 5.644559162570939e-07, "loss": 17.2606, "step": 46604 }, { "epoch": 0.8519019503902608, "grad_norm": 7.9799278795408615, "learning_rate": 5.643192968442213e-07, "loss": 18.2348, "step": 46605 }, { "epoch": 0.8519202295867073, "grad_norm": 6.032908091747304, "learning_rate": 5.641826929780237e-07, "loss": 17.1691, "step": 46606 }, { "epoch": 0.8519385087831539, "grad_norm": 7.543427283747972, "learning_rate": 5.640461046589807e-07, "loss": 17.2168, "step": 46607 }, { "epoch": 0.8519567879796004, "grad_norm": 6.3124078488903175, "learning_rate": 5.63909531887572e-07, "loss": 17.4875, "step": 46608 }, { "epoch": 0.851975067176047, "grad_norm": 5.839299531164903, "learning_rate": 5.637729746642745e-07, "loss": 17.2566, "step": 46609 }, { "epoch": 0.8519933463724935, "grad_norm": 5.310750256089157, "learning_rate": 5.636364329895688e-07, "loss": 17.0641, "step": 46610 }, { "epoch": 0.85201162556894, "grad_norm": 5.086672731700492, "learning_rate": 5.634999068639324e-07, "loss": 17.0483, "step": 46611 }, { "epoch": 0.8520299047653865, "grad_norm": 7.368764663778911, "learning_rate": 5.633633962878432e-07, "loss": 18.0804, "step": 46612 }, { "epoch": 0.852048183961833, "grad_norm": 6.686917124108285, "learning_rate": 5.632269012617809e-07, "loss": 17.5794, "step": 46613 }, { "epoch": 0.8520664631582796, "grad_norm": 6.10841983462866, "learning_rate": 5.630904217862232e-07, "loss": 17.4273, "step": 46614 }, { "epoch": 0.8520847423547261, "grad_norm": 6.3526992420547685, "learning_rate": 5.629539578616478e-07, "loss": 17.4812, "step": 46615 }, { "epoch": 0.8521030215511726, "grad_norm": 5.789366818014579, "learning_rate": 5.628175094885341e-07, "loss": 17.0457, "step": 46616 }, { "epoch": 0.8521213007476192, "grad_norm": 5.5595006212557205, "learning_rate": 5.626810766673596e-07, "loss": 17.2056, "step": 46617 }, { "epoch": 0.8521395799440656, "grad_norm": 4.875979364652387, "learning_rate": 5.625446593986039e-07, "loss": 16.7377, "step": 46618 }, { "epoch": 0.8521578591405122, "grad_norm": 6.66370731266891, "learning_rate": 5.624082576827439e-07, "loss": 17.1816, "step": 46619 }, { "epoch": 0.8521761383369587, "grad_norm": 5.472347560259079, "learning_rate": 5.622718715202569e-07, "loss": 17.0298, "step": 46620 }, { "epoch": 0.8521944175334052, "grad_norm": 4.509407766658481, "learning_rate": 5.62135500911623e-07, "loss": 16.849, "step": 46621 }, { "epoch": 0.8522126967298518, "grad_norm": 6.044039191929536, "learning_rate": 5.619991458573193e-07, "loss": 17.3313, "step": 46622 }, { "epoch": 0.8522309759262983, "grad_norm": 6.225311146654455, "learning_rate": 5.618628063578224e-07, "loss": 17.2726, "step": 46623 }, { "epoch": 0.8522492551227449, "grad_norm": 5.5383490121200625, "learning_rate": 5.617264824136109e-07, "loss": 16.9277, "step": 46624 }, { "epoch": 0.8522675343191913, "grad_norm": 7.144391095388155, "learning_rate": 5.615901740251645e-07, "loss": 17.7594, "step": 46625 }, { "epoch": 0.8522858135156378, "grad_norm": 7.459712469056674, "learning_rate": 5.614538811929582e-07, "loss": 17.7115, "step": 46626 }, { "epoch": 0.8523040927120844, "grad_norm": 5.955105324637481, "learning_rate": 5.613176039174717e-07, "loss": 17.2433, "step": 46627 }, { "epoch": 0.8523223719085309, "grad_norm": 7.214316861044037, "learning_rate": 5.611813421991818e-07, "loss": 17.5833, "step": 46628 }, { "epoch": 0.8523406511049775, "grad_norm": 5.062855768713544, "learning_rate": 5.610450960385655e-07, "loss": 17.0533, "step": 46629 }, { "epoch": 0.852358930301424, "grad_norm": 6.493238989907521, "learning_rate": 5.609088654361017e-07, "loss": 17.4217, "step": 46630 }, { "epoch": 0.8523772094978704, "grad_norm": 6.900214820884619, "learning_rate": 5.607726503922661e-07, "loss": 17.0435, "step": 46631 }, { "epoch": 0.852395488694317, "grad_norm": 6.434001831857268, "learning_rate": 5.60636450907538e-07, "loss": 17.0399, "step": 46632 }, { "epoch": 0.8524137678907635, "grad_norm": 6.764540403548347, "learning_rate": 5.605002669823928e-07, "loss": 17.6022, "step": 46633 }, { "epoch": 0.8524320470872101, "grad_norm": 5.6553846130526635, "learning_rate": 5.603640986173092e-07, "loss": 17.0464, "step": 46634 }, { "epoch": 0.8524503262836566, "grad_norm": 5.852394875264494, "learning_rate": 5.602279458127652e-07, "loss": 17.0823, "step": 46635 }, { "epoch": 0.8524686054801031, "grad_norm": 7.519725338781564, "learning_rate": 5.600918085692369e-07, "loss": 17.7584, "step": 46636 }, { "epoch": 0.8524868846765496, "grad_norm": 6.859161280155928, "learning_rate": 5.599556868872009e-07, "loss": 17.6007, "step": 46637 }, { "epoch": 0.8525051638729961, "grad_norm": 6.712487779399778, "learning_rate": 5.598195807671353e-07, "loss": 17.4377, "step": 46638 }, { "epoch": 0.8525234430694427, "grad_norm": 7.3084950350163105, "learning_rate": 5.596834902095171e-07, "loss": 17.3195, "step": 46639 }, { "epoch": 0.8525417222658892, "grad_norm": 6.5823095132599345, "learning_rate": 5.595474152148222e-07, "loss": 17.2553, "step": 46640 }, { "epoch": 0.8525600014623357, "grad_norm": 7.497657682114893, "learning_rate": 5.59411355783529e-07, "loss": 18.0035, "step": 46641 }, { "epoch": 0.8525782806587823, "grad_norm": 6.627765817162429, "learning_rate": 5.592753119161126e-07, "loss": 17.1982, "step": 46642 }, { "epoch": 0.8525965598552288, "grad_norm": 5.142778409990431, "learning_rate": 5.591392836130511e-07, "loss": 16.7478, "step": 46643 }, { "epoch": 0.8526148390516752, "grad_norm": 6.491490858723585, "learning_rate": 5.590032708748216e-07, "loss": 17.3081, "step": 46644 }, { "epoch": 0.8526331182481218, "grad_norm": 5.465132731085993, "learning_rate": 5.588672737019008e-07, "loss": 17.1269, "step": 46645 }, { "epoch": 0.8526513974445683, "grad_norm": 6.032588398290272, "learning_rate": 5.587312920947636e-07, "loss": 17.5565, "step": 46646 }, { "epoch": 0.8526696766410149, "grad_norm": 6.722477241875296, "learning_rate": 5.585953260538885e-07, "loss": 17.5166, "step": 46647 }, { "epoch": 0.8526879558374614, "grad_norm": 6.339019590680158, "learning_rate": 5.584593755797507e-07, "loss": 17.0365, "step": 46648 }, { "epoch": 0.8527062350339079, "grad_norm": 5.451153769788097, "learning_rate": 5.583234406728283e-07, "loss": 16.7728, "step": 46649 }, { "epoch": 0.8527245142303544, "grad_norm": 6.087664487604258, "learning_rate": 5.581875213335969e-07, "loss": 17.6056, "step": 46650 }, { "epoch": 0.8527427934268009, "grad_norm": 7.095809964206081, "learning_rate": 5.580516175625317e-07, "loss": 17.2496, "step": 46651 }, { "epoch": 0.8527610726232475, "grad_norm": 6.304420045633404, "learning_rate": 5.579157293601106e-07, "loss": 17.5171, "step": 46652 }, { "epoch": 0.852779351819694, "grad_norm": 4.885981082112738, "learning_rate": 5.5777985672681e-07, "loss": 16.661, "step": 46653 }, { "epoch": 0.8527976310161405, "grad_norm": 5.115611126396423, "learning_rate": 5.576439996631044e-07, "loss": 16.868, "step": 46654 }, { "epoch": 0.8528159102125871, "grad_norm": 6.001953839067299, "learning_rate": 5.575081581694719e-07, "loss": 17.1252, "step": 46655 }, { "epoch": 0.8528341894090335, "grad_norm": 6.579653838407005, "learning_rate": 5.573723322463881e-07, "loss": 17.5606, "step": 46656 }, { "epoch": 0.8528524686054801, "grad_norm": 6.346803710316081, "learning_rate": 5.572365218943282e-07, "loss": 17.5418, "step": 46657 }, { "epoch": 0.8528707478019266, "grad_norm": 6.792065417123827, "learning_rate": 5.57100727113769e-07, "loss": 17.5715, "step": 46658 }, { "epoch": 0.8528890269983731, "grad_norm": 5.9197382474638784, "learning_rate": 5.569649479051864e-07, "loss": 17.1419, "step": 46659 }, { "epoch": 0.8529073061948197, "grad_norm": 6.350973510109449, "learning_rate": 5.568291842690555e-07, "loss": 17.2715, "step": 46660 }, { "epoch": 0.8529255853912662, "grad_norm": 6.667463903353798, "learning_rate": 5.566934362058524e-07, "loss": 17.5943, "step": 46661 }, { "epoch": 0.8529438645877128, "grad_norm": 5.616666093641273, "learning_rate": 5.565577037160536e-07, "loss": 17.2637, "step": 46662 }, { "epoch": 0.8529621437841592, "grad_norm": 6.0791177791861815, "learning_rate": 5.564219868001353e-07, "loss": 17.2128, "step": 46663 }, { "epoch": 0.8529804229806057, "grad_norm": 6.17401730293162, "learning_rate": 5.562862854585721e-07, "loss": 17.4511, "step": 46664 }, { "epoch": 0.8529987021770523, "grad_norm": 7.082544782544446, "learning_rate": 5.561505996918387e-07, "loss": 17.9689, "step": 46665 }, { "epoch": 0.8530169813734988, "grad_norm": 6.479538161651467, "learning_rate": 5.56014929500413e-07, "loss": 17.2084, "step": 46666 }, { "epoch": 0.8530352605699454, "grad_norm": 5.1331733842520135, "learning_rate": 5.558792748847697e-07, "loss": 17.0475, "step": 46667 }, { "epoch": 0.8530535397663919, "grad_norm": 8.424891644446, "learning_rate": 5.557436358453827e-07, "loss": 17.1753, "step": 46668 }, { "epoch": 0.8530718189628383, "grad_norm": 5.9645529265381425, "learning_rate": 5.556080123827296e-07, "loss": 17.2575, "step": 46669 }, { "epoch": 0.8530900981592849, "grad_norm": 5.858872749458337, "learning_rate": 5.554724044972837e-07, "loss": 17.3039, "step": 46670 }, { "epoch": 0.8531083773557314, "grad_norm": 6.149723906485765, "learning_rate": 5.553368121895214e-07, "loss": 17.4175, "step": 46671 }, { "epoch": 0.853126656552178, "grad_norm": 7.063306223579832, "learning_rate": 5.55201235459919e-07, "loss": 17.1249, "step": 46672 }, { "epoch": 0.8531449357486245, "grad_norm": 5.081419104135845, "learning_rate": 5.550656743089505e-07, "loss": 17.1049, "step": 46673 }, { "epoch": 0.853163214945071, "grad_norm": 5.459741293971748, "learning_rate": 5.549301287370895e-07, "loss": 17.2326, "step": 46674 }, { "epoch": 0.8531814941415176, "grad_norm": 6.384101495722213, "learning_rate": 5.547945987448145e-07, "loss": 17.383, "step": 46675 }, { "epoch": 0.853199773337964, "grad_norm": 5.545684290684774, "learning_rate": 5.546590843325972e-07, "loss": 17.1532, "step": 46676 }, { "epoch": 0.8532180525344106, "grad_norm": 7.876208384554123, "learning_rate": 5.545235855009156e-07, "loss": 17.9625, "step": 46677 }, { "epoch": 0.8532363317308571, "grad_norm": 7.334433534962139, "learning_rate": 5.543881022502423e-07, "loss": 18.1627, "step": 46678 }, { "epoch": 0.8532546109273036, "grad_norm": 5.298209783514835, "learning_rate": 5.542526345810528e-07, "loss": 17.1861, "step": 46679 }, { "epoch": 0.8532728901237502, "grad_norm": 5.215080957561424, "learning_rate": 5.541171824938213e-07, "loss": 16.9979, "step": 46680 }, { "epoch": 0.8532911693201967, "grad_norm": 6.678866556695578, "learning_rate": 5.539817459890245e-07, "loss": 17.604, "step": 46681 }, { "epoch": 0.8533094485166433, "grad_norm": 5.406428029936885, "learning_rate": 5.53846325067135e-07, "loss": 17.1048, "step": 46682 }, { "epoch": 0.8533277277130897, "grad_norm": 5.801527688065113, "learning_rate": 5.537109197286295e-07, "loss": 17.2079, "step": 46683 }, { "epoch": 0.8533460069095362, "grad_norm": 6.590388293129513, "learning_rate": 5.535755299739809e-07, "loss": 17.0044, "step": 46684 }, { "epoch": 0.8533642861059828, "grad_norm": 4.97414833720567, "learning_rate": 5.534401558036634e-07, "loss": 16.8899, "step": 46685 }, { "epoch": 0.8533825653024293, "grad_norm": 6.631120431485943, "learning_rate": 5.533047972181532e-07, "loss": 17.705, "step": 46686 }, { "epoch": 0.8534008444988759, "grad_norm": 5.388200239001904, "learning_rate": 5.531694542179234e-07, "loss": 17.0423, "step": 46687 }, { "epoch": 0.8534191236953224, "grad_norm": 5.639919472754867, "learning_rate": 5.530341268034489e-07, "loss": 17.2388, "step": 46688 }, { "epoch": 0.8534374028917688, "grad_norm": 6.861001871243631, "learning_rate": 5.528988149752035e-07, "loss": 17.1011, "step": 46689 }, { "epoch": 0.8534556820882154, "grad_norm": 5.012866403095882, "learning_rate": 5.527635187336627e-07, "loss": 17.0865, "step": 46690 }, { "epoch": 0.8534739612846619, "grad_norm": 6.523432873474482, "learning_rate": 5.526282380792991e-07, "loss": 17.5223, "step": 46691 }, { "epoch": 0.8534922404811085, "grad_norm": 6.398966601137713, "learning_rate": 5.524929730125883e-07, "loss": 17.2951, "step": 46692 }, { "epoch": 0.853510519677555, "grad_norm": 5.55446848933288, "learning_rate": 5.523577235340028e-07, "loss": 16.9529, "step": 46693 }, { "epoch": 0.8535287988740015, "grad_norm": 7.503757259903052, "learning_rate": 5.522224896440181e-07, "loss": 17.5314, "step": 46694 }, { "epoch": 0.853547078070448, "grad_norm": 7.516216284829758, "learning_rate": 5.520872713431086e-07, "loss": 17.7527, "step": 46695 }, { "epoch": 0.8535653572668945, "grad_norm": 7.414159864752233, "learning_rate": 5.519520686317453e-07, "loss": 17.6819, "step": 46696 }, { "epoch": 0.8535836364633411, "grad_norm": 4.931001385060922, "learning_rate": 5.518168815104041e-07, "loss": 16.7973, "step": 46697 }, { "epoch": 0.8536019156597876, "grad_norm": 7.5489341885413666, "learning_rate": 5.516817099795601e-07, "loss": 17.5292, "step": 46698 }, { "epoch": 0.8536201948562341, "grad_norm": 6.955254198147362, "learning_rate": 5.515465540396847e-07, "loss": 17.6763, "step": 46699 }, { "epoch": 0.8536384740526807, "grad_norm": 5.914206982517891, "learning_rate": 5.514114136912535e-07, "loss": 17.1048, "step": 46700 }, { "epoch": 0.8536567532491272, "grad_norm": 5.95637492024354, "learning_rate": 5.512762889347395e-07, "loss": 17.2374, "step": 46701 }, { "epoch": 0.8536750324455737, "grad_norm": 5.8322391662069535, "learning_rate": 5.511411797706146e-07, "loss": 17.1209, "step": 46702 }, { "epoch": 0.8536933116420202, "grad_norm": 6.391257802224305, "learning_rate": 5.510060861993549e-07, "loss": 17.1616, "step": 46703 }, { "epoch": 0.8537115908384667, "grad_norm": 5.483414230089293, "learning_rate": 5.50871008221433e-07, "loss": 17.2146, "step": 46704 }, { "epoch": 0.8537298700349133, "grad_norm": 7.002068827154902, "learning_rate": 5.507359458373212e-07, "loss": 17.7235, "step": 46705 }, { "epoch": 0.8537481492313598, "grad_norm": 5.538734604991437, "learning_rate": 5.506008990474937e-07, "loss": 16.9148, "step": 46706 }, { "epoch": 0.8537664284278064, "grad_norm": 6.073174262157588, "learning_rate": 5.504658678524238e-07, "loss": 17.0787, "step": 46707 }, { "epoch": 0.8537847076242528, "grad_norm": 6.64613298994316, "learning_rate": 5.503308522525863e-07, "loss": 17.3918, "step": 46708 }, { "epoch": 0.8538029868206993, "grad_norm": 6.8091192801328555, "learning_rate": 5.50195852248453e-07, "loss": 17.3153, "step": 46709 }, { "epoch": 0.8538212660171459, "grad_norm": 4.606667853273877, "learning_rate": 5.500608678404956e-07, "loss": 16.7177, "step": 46710 }, { "epoch": 0.8538395452135924, "grad_norm": 5.492932930073087, "learning_rate": 5.4992589902919e-07, "loss": 17.259, "step": 46711 }, { "epoch": 0.8538578244100389, "grad_norm": 6.265725851619412, "learning_rate": 5.49790945815008e-07, "loss": 17.3649, "step": 46712 }, { "epoch": 0.8538761036064855, "grad_norm": 7.4398151105966885, "learning_rate": 5.496560081984215e-07, "loss": 17.6515, "step": 46713 }, { "epoch": 0.853894382802932, "grad_norm": 7.484732470748782, "learning_rate": 5.495210861799055e-07, "loss": 17.9144, "step": 46714 }, { "epoch": 0.8539126619993785, "grad_norm": 5.908605044175515, "learning_rate": 5.493861797599309e-07, "loss": 17.2416, "step": 46715 }, { "epoch": 0.853930941195825, "grad_norm": 4.905207377107949, "learning_rate": 5.492512889389717e-07, "loss": 17.0203, "step": 46716 }, { "epoch": 0.8539492203922715, "grad_norm": 5.370671451896464, "learning_rate": 5.491164137175015e-07, "loss": 17.2302, "step": 46717 }, { "epoch": 0.8539674995887181, "grad_norm": 5.373499616309405, "learning_rate": 5.48981554095992e-07, "loss": 17.0426, "step": 46718 }, { "epoch": 0.8539857787851646, "grad_norm": 5.83113760411305, "learning_rate": 5.488467100749151e-07, "loss": 17.2369, "step": 46719 }, { "epoch": 0.8540040579816112, "grad_norm": 6.82272361986575, "learning_rate": 5.487118816547449e-07, "loss": 17.4808, "step": 46720 }, { "epoch": 0.8540223371780576, "grad_norm": 6.13148629214019, "learning_rate": 5.485770688359532e-07, "loss": 17.0585, "step": 46721 }, { "epoch": 0.8540406163745041, "grad_norm": 5.276672922604804, "learning_rate": 5.484422716190118e-07, "loss": 17.1457, "step": 46722 }, { "epoch": 0.8540588955709507, "grad_norm": 6.863690315090449, "learning_rate": 5.48307490004395e-07, "loss": 17.1871, "step": 46723 }, { "epoch": 0.8540771747673972, "grad_norm": 6.688134933299668, "learning_rate": 5.481727239925727e-07, "loss": 17.5275, "step": 46724 }, { "epoch": 0.8540954539638438, "grad_norm": 6.332568821543934, "learning_rate": 5.480379735840191e-07, "loss": 17.4856, "step": 46725 }, { "epoch": 0.8541137331602903, "grad_norm": 6.838302792286727, "learning_rate": 5.479032387792072e-07, "loss": 17.5567, "step": 46726 }, { "epoch": 0.8541320123567367, "grad_norm": 7.169184031531402, "learning_rate": 5.47768519578607e-07, "loss": 17.9684, "step": 46727 }, { "epoch": 0.8541502915531833, "grad_norm": 6.525165307697287, "learning_rate": 5.476338159826927e-07, "loss": 17.9748, "step": 46728 }, { "epoch": 0.8541685707496298, "grad_norm": 5.9218055813834525, "learning_rate": 5.474991279919356e-07, "loss": 17.2245, "step": 46729 }, { "epoch": 0.8541868499460764, "grad_norm": 6.080230983106535, "learning_rate": 5.473644556068064e-07, "loss": 17.4375, "step": 46730 }, { "epoch": 0.8542051291425229, "grad_norm": 5.183289644690335, "learning_rate": 5.472297988277797e-07, "loss": 16.8835, "step": 46731 }, { "epoch": 0.8542234083389694, "grad_norm": 6.1171313646950445, "learning_rate": 5.470951576553262e-07, "loss": 17.2497, "step": 46732 }, { "epoch": 0.854241687535416, "grad_norm": 4.223714543976419, "learning_rate": 5.469605320899169e-07, "loss": 16.577, "step": 46733 }, { "epoch": 0.8542599667318624, "grad_norm": 6.976493639676317, "learning_rate": 5.468259221320244e-07, "loss": 17.7855, "step": 46734 }, { "epoch": 0.854278245928309, "grad_norm": 5.57950910956835, "learning_rate": 5.466913277821212e-07, "loss": 17.1535, "step": 46735 }, { "epoch": 0.8542965251247555, "grad_norm": 6.061949317238054, "learning_rate": 5.465567490406776e-07, "loss": 17.3425, "step": 46736 }, { "epoch": 0.854314804321202, "grad_norm": 6.495552863881004, "learning_rate": 5.464221859081675e-07, "loss": 17.4498, "step": 46737 }, { "epoch": 0.8543330835176486, "grad_norm": 6.648354097868395, "learning_rate": 5.462876383850601e-07, "loss": 17.2671, "step": 46738 }, { "epoch": 0.8543513627140951, "grad_norm": 6.479087513313784, "learning_rate": 5.46153106471829e-07, "loss": 17.045, "step": 46739 }, { "epoch": 0.8543696419105417, "grad_norm": 6.282493989901827, "learning_rate": 5.460185901689447e-07, "loss": 17.3778, "step": 46740 }, { "epoch": 0.8543879211069881, "grad_norm": 6.256603118347629, "learning_rate": 5.458840894768775e-07, "loss": 17.4304, "step": 46741 }, { "epoch": 0.8544062003034346, "grad_norm": 6.991756408560393, "learning_rate": 5.457496043961014e-07, "loss": 17.6943, "step": 46742 }, { "epoch": 0.8544244794998812, "grad_norm": 5.744723787862036, "learning_rate": 5.456151349270855e-07, "loss": 17.2087, "step": 46743 }, { "epoch": 0.8544427586963277, "grad_norm": 6.862378967032833, "learning_rate": 5.454806810703017e-07, "loss": 17.4802, "step": 46744 }, { "epoch": 0.8544610378927743, "grad_norm": 6.195341860080876, "learning_rate": 5.453462428262224e-07, "loss": 17.0837, "step": 46745 }, { "epoch": 0.8544793170892208, "grad_norm": 7.663706318528317, "learning_rate": 5.452118201953183e-07, "loss": 17.7663, "step": 46746 }, { "epoch": 0.8544975962856672, "grad_norm": 7.500739751027041, "learning_rate": 5.450774131780596e-07, "loss": 18.1457, "step": 46747 }, { "epoch": 0.8545158754821138, "grad_norm": 5.4897959284529465, "learning_rate": 5.449430217749185e-07, "loss": 17.0038, "step": 46748 }, { "epoch": 0.8545341546785603, "grad_norm": 5.29878571490554, "learning_rate": 5.448086459863655e-07, "loss": 17.2701, "step": 46749 }, { "epoch": 0.8545524338750069, "grad_norm": 5.249537551402945, "learning_rate": 5.446742858128707e-07, "loss": 17.1978, "step": 46750 }, { "epoch": 0.8545707130714534, "grad_norm": 7.728181686815885, "learning_rate": 5.445399412549058e-07, "loss": 17.4448, "step": 46751 }, { "epoch": 0.8545889922678999, "grad_norm": 6.883098739946379, "learning_rate": 5.444056123129426e-07, "loss": 17.4222, "step": 46752 }, { "epoch": 0.8546072714643465, "grad_norm": 8.272504690623265, "learning_rate": 5.442712989874505e-07, "loss": 17.9642, "step": 46753 }, { "epoch": 0.8546255506607929, "grad_norm": 7.810552975018907, "learning_rate": 5.441370012789016e-07, "loss": 18.2151, "step": 46754 }, { "epoch": 0.8546438298572395, "grad_norm": 6.67883074946503, "learning_rate": 5.44002719187765e-07, "loss": 17.8573, "step": 46755 }, { "epoch": 0.854662109053686, "grad_norm": 6.234975708374684, "learning_rate": 5.438684527145133e-07, "loss": 17.613, "step": 46756 }, { "epoch": 0.8546803882501325, "grad_norm": 6.337888372719781, "learning_rate": 5.437342018596159e-07, "loss": 17.4467, "step": 46757 }, { "epoch": 0.8546986674465791, "grad_norm": 6.48701135637228, "learning_rate": 5.43599966623542e-07, "loss": 17.467, "step": 46758 }, { "epoch": 0.8547169466430256, "grad_norm": 5.974468137611065, "learning_rate": 5.434657470067651e-07, "loss": 17.5079, "step": 46759 }, { "epoch": 0.8547352258394721, "grad_norm": 6.279503192268512, "learning_rate": 5.433315430097524e-07, "loss": 17.4741, "step": 46760 }, { "epoch": 0.8547535050359186, "grad_norm": 7.7728181108426755, "learning_rate": 5.431973546329766e-07, "loss": 17.4764, "step": 46761 }, { "epoch": 0.8547717842323651, "grad_norm": 5.688919646669706, "learning_rate": 5.430631818769078e-07, "loss": 17.2936, "step": 46762 }, { "epoch": 0.8547900634288117, "grad_norm": 5.259999010166459, "learning_rate": 5.429290247420161e-07, "loss": 17.0724, "step": 46763 }, { "epoch": 0.8548083426252582, "grad_norm": 6.182406176005403, "learning_rate": 5.427948832287705e-07, "loss": 17.2079, "step": 46764 }, { "epoch": 0.8548266218217048, "grad_norm": 5.467371859936479, "learning_rate": 5.42660757337643e-07, "loss": 17.0464, "step": 46765 }, { "epoch": 0.8548449010181512, "grad_norm": 6.845110839244869, "learning_rate": 5.425266470691026e-07, "loss": 17.6249, "step": 46766 }, { "epoch": 0.8548631802145977, "grad_norm": 5.723503068965822, "learning_rate": 5.423925524236184e-07, "loss": 17.0221, "step": 46767 }, { "epoch": 0.8548814594110443, "grad_norm": 5.70929109515137, "learning_rate": 5.422584734016628e-07, "loss": 17.0856, "step": 46768 }, { "epoch": 0.8548997386074908, "grad_norm": 6.125278920814736, "learning_rate": 5.421244100037032e-07, "loss": 16.9246, "step": 46769 }, { "epoch": 0.8549180178039374, "grad_norm": 6.581621355065148, "learning_rate": 5.419903622302103e-07, "loss": 17.0003, "step": 46770 }, { "epoch": 0.8549362970003839, "grad_norm": 5.660393099388767, "learning_rate": 5.418563300816559e-07, "loss": 17.219, "step": 46771 }, { "epoch": 0.8549545761968304, "grad_norm": 5.856556296678634, "learning_rate": 5.41722313558507e-07, "loss": 17.0602, "step": 46772 }, { "epoch": 0.8549728553932769, "grad_norm": 5.308520102942748, "learning_rate": 5.415883126612359e-07, "loss": 17.0643, "step": 46773 }, { "epoch": 0.8549911345897234, "grad_norm": 7.5769504259249345, "learning_rate": 5.414543273903106e-07, "loss": 17.6909, "step": 46774 }, { "epoch": 0.85500941378617, "grad_norm": 5.594055301949861, "learning_rate": 5.413203577462001e-07, "loss": 17.3382, "step": 46775 }, { "epoch": 0.8550276929826165, "grad_norm": 6.202866795158751, "learning_rate": 5.411864037293757e-07, "loss": 17.357, "step": 46776 }, { "epoch": 0.855045972179063, "grad_norm": 6.259767706991556, "learning_rate": 5.41052465340306e-07, "loss": 17.282, "step": 46777 }, { "epoch": 0.8550642513755096, "grad_norm": 5.530941317007533, "learning_rate": 5.409185425794594e-07, "loss": 17.0769, "step": 46778 }, { "epoch": 0.855082530571956, "grad_norm": 6.688619301686793, "learning_rate": 5.407846354473067e-07, "loss": 17.4446, "step": 46779 }, { "epoch": 0.8551008097684025, "grad_norm": 6.642078625762095, "learning_rate": 5.406507439443175e-07, "loss": 17.6345, "step": 46780 }, { "epoch": 0.8551190889648491, "grad_norm": 6.249396672024705, "learning_rate": 5.405168680709599e-07, "loss": 17.1677, "step": 46781 }, { "epoch": 0.8551373681612956, "grad_norm": 6.377985502464215, "learning_rate": 5.403830078277044e-07, "loss": 17.4061, "step": 46782 }, { "epoch": 0.8551556473577422, "grad_norm": 8.235827209089058, "learning_rate": 5.402491632150192e-07, "loss": 17.2997, "step": 46783 }, { "epoch": 0.8551739265541887, "grad_norm": 6.391661814462006, "learning_rate": 5.401153342333731e-07, "loss": 17.201, "step": 46784 }, { "epoch": 0.8551922057506351, "grad_norm": 6.415004507171019, "learning_rate": 5.39981520883237e-07, "loss": 17.2274, "step": 46785 }, { "epoch": 0.8552104849470817, "grad_norm": 5.599973404958958, "learning_rate": 5.398477231650773e-07, "loss": 17.2173, "step": 46786 }, { "epoch": 0.8552287641435282, "grad_norm": 5.564447307538861, "learning_rate": 5.39713941079365e-07, "loss": 17.3119, "step": 46787 }, { "epoch": 0.8552470433399748, "grad_norm": 6.6235649997226425, "learning_rate": 5.395801746265677e-07, "loss": 17.2814, "step": 46788 }, { "epoch": 0.8552653225364213, "grad_norm": 4.983471447927548, "learning_rate": 5.394464238071551e-07, "loss": 17.0091, "step": 46789 }, { "epoch": 0.8552836017328678, "grad_norm": 6.255327780477432, "learning_rate": 5.39312688621596e-07, "loss": 17.5683, "step": 46790 }, { "epoch": 0.8553018809293144, "grad_norm": 6.886795312611418, "learning_rate": 5.391789690703591e-07, "loss": 17.6295, "step": 46791 }, { "epoch": 0.8553201601257608, "grad_norm": 6.711215430217107, "learning_rate": 5.390452651539118e-07, "loss": 17.4655, "step": 46792 }, { "epoch": 0.8553384393222074, "grad_norm": 6.638287406863451, "learning_rate": 5.389115768727255e-07, "loss": 17.4309, "step": 46793 }, { "epoch": 0.8553567185186539, "grad_norm": 8.589661770708297, "learning_rate": 5.387779042272662e-07, "loss": 18.2707, "step": 46794 }, { "epoch": 0.8553749977151004, "grad_norm": 7.0425295615525965, "learning_rate": 5.38644247218002e-07, "loss": 17.7685, "step": 46795 }, { "epoch": 0.855393276911547, "grad_norm": 7.41119347965941, "learning_rate": 5.385106058454043e-07, "loss": 17.3146, "step": 46796 }, { "epoch": 0.8554115561079935, "grad_norm": 7.236593126244866, "learning_rate": 5.383769801099381e-07, "loss": 17.8412, "step": 46797 }, { "epoch": 0.8554298353044401, "grad_norm": 6.969456582683681, "learning_rate": 5.382433700120743e-07, "loss": 18.0224, "step": 46798 }, { "epoch": 0.8554481145008865, "grad_norm": 5.813025640036867, "learning_rate": 5.381097755522807e-07, "loss": 17.2178, "step": 46799 }, { "epoch": 0.855466393697333, "grad_norm": 6.5869080295588365, "learning_rate": 5.379761967310243e-07, "loss": 17.67, "step": 46800 }, { "epoch": 0.8554846728937796, "grad_norm": 5.972624836973321, "learning_rate": 5.378426335487747e-07, "loss": 17.3896, "step": 46801 }, { "epoch": 0.8555029520902261, "grad_norm": 7.954517296690185, "learning_rate": 5.377090860059997e-07, "loss": 17.9403, "step": 46802 }, { "epoch": 0.8555212312866727, "grad_norm": 5.617123103738869, "learning_rate": 5.375755541031663e-07, "loss": 16.879, "step": 46803 }, { "epoch": 0.8555395104831192, "grad_norm": 9.151544843358709, "learning_rate": 5.374420378407446e-07, "loss": 18.0811, "step": 46804 }, { "epoch": 0.8555577896795656, "grad_norm": 6.2235285123444415, "learning_rate": 5.373085372192011e-07, "loss": 17.4202, "step": 46805 }, { "epoch": 0.8555760688760122, "grad_norm": 7.801076296131458, "learning_rate": 5.371750522390029e-07, "loss": 17.8662, "step": 46806 }, { "epoch": 0.8555943480724587, "grad_norm": 5.534157338019788, "learning_rate": 5.370415829006187e-07, "loss": 17.0029, "step": 46807 }, { "epoch": 0.8556126272689053, "grad_norm": 5.243074063142599, "learning_rate": 5.369081292045175e-07, "loss": 17.0096, "step": 46808 }, { "epoch": 0.8556309064653518, "grad_norm": 7.146434386647797, "learning_rate": 5.367746911511651e-07, "loss": 17.7536, "step": 46809 }, { "epoch": 0.8556491856617983, "grad_norm": 4.742965032256942, "learning_rate": 5.366412687410311e-07, "loss": 16.7813, "step": 46810 }, { "epoch": 0.8556674648582449, "grad_norm": 5.227969486099445, "learning_rate": 5.365078619745822e-07, "loss": 16.9068, "step": 46811 }, { "epoch": 0.8556857440546913, "grad_norm": 6.906337272230369, "learning_rate": 5.36374470852285e-07, "loss": 17.5339, "step": 46812 }, { "epoch": 0.8557040232511379, "grad_norm": 5.511251565545374, "learning_rate": 5.362410953746089e-07, "loss": 17.0089, "step": 46813 }, { "epoch": 0.8557223024475844, "grad_norm": 6.683437934974569, "learning_rate": 5.361077355420191e-07, "loss": 17.7545, "step": 46814 }, { "epoch": 0.8557405816440309, "grad_norm": 6.9158532082496675, "learning_rate": 5.359743913549859e-07, "loss": 17.5566, "step": 46815 }, { "epoch": 0.8557588608404775, "grad_norm": 5.7730599001726155, "learning_rate": 5.358410628139738e-07, "loss": 17.2847, "step": 46816 }, { "epoch": 0.855777140036924, "grad_norm": 7.158014103409016, "learning_rate": 5.357077499194513e-07, "loss": 17.6909, "step": 46817 }, { "epoch": 0.8557954192333705, "grad_norm": 6.91037897577965, "learning_rate": 5.355744526718864e-07, "loss": 17.6488, "step": 46818 }, { "epoch": 0.855813698429817, "grad_norm": 6.6273215477001175, "learning_rate": 5.354411710717461e-07, "loss": 17.5454, "step": 46819 }, { "epoch": 0.8558319776262635, "grad_norm": 5.657027690505737, "learning_rate": 5.353079051194959e-07, "loss": 17.058, "step": 46820 }, { "epoch": 0.8558502568227101, "grad_norm": 5.093002484281604, "learning_rate": 5.351746548156056e-07, "loss": 16.9113, "step": 46821 }, { "epoch": 0.8558685360191566, "grad_norm": 6.958382913462385, "learning_rate": 5.350414201605402e-07, "loss": 17.678, "step": 46822 }, { "epoch": 0.8558868152156032, "grad_norm": 7.070138827642161, "learning_rate": 5.349082011547663e-07, "loss": 17.615, "step": 46823 }, { "epoch": 0.8559050944120496, "grad_norm": 6.031655643522464, "learning_rate": 5.347749977987515e-07, "loss": 17.1888, "step": 46824 }, { "epoch": 0.8559233736084961, "grad_norm": 6.1312403160268385, "learning_rate": 5.34641810092964e-07, "loss": 17.5814, "step": 46825 }, { "epoch": 0.8559416528049427, "grad_norm": 6.8486839592984845, "learning_rate": 5.345086380378683e-07, "loss": 17.6108, "step": 46826 }, { "epoch": 0.8559599320013892, "grad_norm": 6.2179157238236185, "learning_rate": 5.343754816339336e-07, "loss": 17.6395, "step": 46827 }, { "epoch": 0.8559782111978358, "grad_norm": 6.493406103251161, "learning_rate": 5.34242340881625e-07, "loss": 17.4541, "step": 46828 }, { "epoch": 0.8559964903942823, "grad_norm": 6.493909131478546, "learning_rate": 5.341092157814087e-07, "loss": 17.4736, "step": 46829 }, { "epoch": 0.8560147695907288, "grad_norm": 6.877699028283106, "learning_rate": 5.339761063337535e-07, "loss": 17.4184, "step": 46830 }, { "epoch": 0.8560330487871753, "grad_norm": 6.2447658966455695, "learning_rate": 5.338430125391225e-07, "loss": 17.4257, "step": 46831 }, { "epoch": 0.8560513279836218, "grad_norm": 4.568714495693274, "learning_rate": 5.337099343979863e-07, "loss": 16.8772, "step": 46832 }, { "epoch": 0.8560696071800684, "grad_norm": 5.615429628164078, "learning_rate": 5.335768719108076e-07, "loss": 17.2572, "step": 46833 }, { "epoch": 0.8560878863765149, "grad_norm": 5.547246973504253, "learning_rate": 5.334438250780544e-07, "loss": 16.8624, "step": 46834 }, { "epoch": 0.8561061655729614, "grad_norm": 5.4292532529839015, "learning_rate": 5.333107939001936e-07, "loss": 17.2163, "step": 46835 }, { "epoch": 0.856124444769408, "grad_norm": 5.827584477786895, "learning_rate": 5.331777783776915e-07, "loss": 17.1854, "step": 46836 }, { "epoch": 0.8561427239658544, "grad_norm": 6.623118950772511, "learning_rate": 5.330447785110127e-07, "loss": 17.251, "step": 46837 }, { "epoch": 0.856161003162301, "grad_norm": 8.135481674078026, "learning_rate": 5.329117943006251e-07, "loss": 17.9284, "step": 46838 }, { "epoch": 0.8561792823587475, "grad_norm": 6.860493543017252, "learning_rate": 5.32778825746994e-07, "loss": 17.4406, "step": 46839 }, { "epoch": 0.856197561555194, "grad_norm": 7.917632721366882, "learning_rate": 5.326458728505845e-07, "loss": 18.4017, "step": 46840 }, { "epoch": 0.8562158407516406, "grad_norm": 6.132661831797539, "learning_rate": 5.325129356118646e-07, "loss": 17.2341, "step": 46841 }, { "epoch": 0.8562341199480871, "grad_norm": 7.3523310103325015, "learning_rate": 5.323800140312979e-07, "loss": 17.4819, "step": 46842 }, { "epoch": 0.8562523991445337, "grad_norm": 5.4638264194201644, "learning_rate": 5.322471081093522e-07, "loss": 17.2532, "step": 46843 }, { "epoch": 0.8562706783409801, "grad_norm": 5.792269511515729, "learning_rate": 5.321142178464934e-07, "loss": 17.1369, "step": 46844 }, { "epoch": 0.8562889575374266, "grad_norm": 7.027091203013434, "learning_rate": 5.319813432431853e-07, "loss": 17.9832, "step": 46845 }, { "epoch": 0.8563072367338732, "grad_norm": 6.2606967539123195, "learning_rate": 5.318484842998961e-07, "loss": 17.2059, "step": 46846 }, { "epoch": 0.8563255159303197, "grad_norm": 6.257417546140323, "learning_rate": 5.317156410170899e-07, "loss": 17.2521, "step": 46847 }, { "epoch": 0.8563437951267662, "grad_norm": 5.395794897929082, "learning_rate": 5.315828133952322e-07, "loss": 17.1907, "step": 46848 }, { "epoch": 0.8563620743232128, "grad_norm": 5.327709877783328, "learning_rate": 5.314500014347895e-07, "loss": 16.955, "step": 46849 }, { "epoch": 0.8563803535196592, "grad_norm": 4.89879381915785, "learning_rate": 5.31317205136227e-07, "loss": 16.7872, "step": 46850 }, { "epoch": 0.8563986327161058, "grad_norm": 5.903567356301291, "learning_rate": 5.311844245000087e-07, "loss": 17.3076, "step": 46851 }, { "epoch": 0.8564169119125523, "grad_norm": 5.697410020241914, "learning_rate": 5.310516595266018e-07, "loss": 17.1979, "step": 46852 }, { "epoch": 0.8564351911089988, "grad_norm": 6.694473340276386, "learning_rate": 5.309189102164714e-07, "loss": 17.6906, "step": 46853 }, { "epoch": 0.8564534703054454, "grad_norm": 5.108499455285097, "learning_rate": 5.307861765700817e-07, "loss": 17.3231, "step": 46854 }, { "epoch": 0.8564717495018919, "grad_norm": 5.2970768432479725, "learning_rate": 5.306534585878997e-07, "loss": 17.1785, "step": 46855 }, { "epoch": 0.8564900286983385, "grad_norm": 8.153876904998915, "learning_rate": 5.305207562703896e-07, "loss": 18.6299, "step": 46856 }, { "epoch": 0.8565083078947849, "grad_norm": 6.800068184893113, "learning_rate": 5.303880696180147e-07, "loss": 17.5817, "step": 46857 }, { "epoch": 0.8565265870912314, "grad_norm": 5.64816703516937, "learning_rate": 5.302553986312436e-07, "loss": 17.1234, "step": 46858 }, { "epoch": 0.856544866287678, "grad_norm": 5.146302345746594, "learning_rate": 5.301227433105388e-07, "loss": 17.0054, "step": 46859 }, { "epoch": 0.8565631454841245, "grad_norm": 5.603660044513812, "learning_rate": 5.299901036563654e-07, "loss": 17.1122, "step": 46860 }, { "epoch": 0.8565814246805711, "grad_norm": 7.103746815844152, "learning_rate": 5.298574796691886e-07, "loss": 17.5731, "step": 46861 }, { "epoch": 0.8565997038770176, "grad_norm": 6.1065176883532555, "learning_rate": 5.297248713494735e-07, "loss": 17.4308, "step": 46862 }, { "epoch": 0.856617983073464, "grad_norm": 5.6015430063777085, "learning_rate": 5.295922786976859e-07, "loss": 17.1489, "step": 46863 }, { "epoch": 0.8566362622699106, "grad_norm": 6.609237392522966, "learning_rate": 5.294597017142894e-07, "loss": 17.5356, "step": 46864 }, { "epoch": 0.8566545414663571, "grad_norm": 7.8945144948108465, "learning_rate": 5.293271403997474e-07, "loss": 18.0999, "step": 46865 }, { "epoch": 0.8566728206628037, "grad_norm": 4.989686295048751, "learning_rate": 5.291945947545268e-07, "loss": 16.9059, "step": 46866 }, { "epoch": 0.8566910998592502, "grad_norm": 5.7986436627054845, "learning_rate": 5.290620647790918e-07, "loss": 17.1688, "step": 46867 }, { "epoch": 0.8567093790556967, "grad_norm": 6.6539349762312945, "learning_rate": 5.289295504739051e-07, "loss": 17.3286, "step": 46868 }, { "epoch": 0.8567276582521433, "grad_norm": 6.571725787921047, "learning_rate": 5.287970518394331e-07, "loss": 17.5248, "step": 46869 }, { "epoch": 0.8567459374485897, "grad_norm": 7.108595487282424, "learning_rate": 5.286645688761383e-07, "loss": 17.7775, "step": 46870 }, { "epoch": 0.8567642166450363, "grad_norm": 5.929644576392526, "learning_rate": 5.285321015844863e-07, "loss": 17.0704, "step": 46871 }, { "epoch": 0.8567824958414828, "grad_norm": 6.86493184580026, "learning_rate": 5.283996499649425e-07, "loss": 17.4643, "step": 46872 }, { "epoch": 0.8568007750379293, "grad_norm": 6.72165912813725, "learning_rate": 5.282672140179695e-07, "loss": 17.6093, "step": 46873 }, { "epoch": 0.8568190542343759, "grad_norm": 6.522589466793985, "learning_rate": 5.281347937440312e-07, "loss": 17.3597, "step": 46874 }, { "epoch": 0.8568373334308224, "grad_norm": 6.613200690550957, "learning_rate": 5.280023891435937e-07, "loss": 17.4934, "step": 46875 }, { "epoch": 0.856855612627269, "grad_norm": 5.995682208350788, "learning_rate": 5.278700002171183e-07, "loss": 17.2456, "step": 46876 }, { "epoch": 0.8568738918237154, "grad_norm": 7.784001928801047, "learning_rate": 5.277376269650714e-07, "loss": 17.6999, "step": 46877 }, { "epoch": 0.8568921710201619, "grad_norm": 5.540503777260098, "learning_rate": 5.276052693879152e-07, "loss": 17.0255, "step": 46878 }, { "epoch": 0.8569104502166085, "grad_norm": 4.216784807848043, "learning_rate": 5.274729274861157e-07, "loss": 16.5195, "step": 46879 }, { "epoch": 0.856928729413055, "grad_norm": 6.135579541355754, "learning_rate": 5.27340601260134e-07, "loss": 17.2477, "step": 46880 }, { "epoch": 0.8569470086095016, "grad_norm": 6.5010222628007135, "learning_rate": 5.272082907104364e-07, "loss": 17.1783, "step": 46881 }, { "epoch": 0.856965287805948, "grad_norm": 6.5353546386284505, "learning_rate": 5.270759958374849e-07, "loss": 17.2179, "step": 46882 }, { "epoch": 0.8569835670023945, "grad_norm": 4.56384234783867, "learning_rate": 5.269437166417446e-07, "loss": 16.6368, "step": 46883 }, { "epoch": 0.8570018461988411, "grad_norm": 4.538059959507359, "learning_rate": 5.26811453123679e-07, "loss": 16.8461, "step": 46884 }, { "epoch": 0.8570201253952876, "grad_norm": 6.596889935587101, "learning_rate": 5.266792052837494e-07, "loss": 17.3835, "step": 46885 }, { "epoch": 0.8570384045917342, "grad_norm": 7.654884484439175, "learning_rate": 5.265469731224226e-07, "loss": 17.808, "step": 46886 }, { "epoch": 0.8570566837881807, "grad_norm": 5.067605603187417, "learning_rate": 5.264147566401589e-07, "loss": 16.8366, "step": 46887 }, { "epoch": 0.8570749629846272, "grad_norm": 6.569232318111597, "learning_rate": 5.262825558374234e-07, "loss": 17.2808, "step": 46888 }, { "epoch": 0.8570932421810737, "grad_norm": 7.053314245239719, "learning_rate": 5.261503707146809e-07, "loss": 17.8245, "step": 46889 }, { "epoch": 0.8571115213775202, "grad_norm": 6.53282349692603, "learning_rate": 5.260182012723924e-07, "loss": 17.3976, "step": 46890 }, { "epoch": 0.8571298005739668, "grad_norm": 7.2125925137112885, "learning_rate": 5.258860475110211e-07, "loss": 17.4738, "step": 46891 }, { "epoch": 0.8571480797704133, "grad_norm": 5.2407070417257495, "learning_rate": 5.257539094310321e-07, "loss": 17.0207, "step": 46892 }, { "epoch": 0.8571663589668598, "grad_norm": 7.968006646888924, "learning_rate": 5.256217870328867e-07, "loss": 17.5473, "step": 46893 }, { "epoch": 0.8571846381633064, "grad_norm": 6.272373027017258, "learning_rate": 5.254896803170489e-07, "loss": 17.1732, "step": 46894 }, { "epoch": 0.8572029173597528, "grad_norm": 5.713393588953355, "learning_rate": 5.253575892839824e-07, "loss": 17.2734, "step": 46895 }, { "epoch": 0.8572211965561994, "grad_norm": 7.939235222717227, "learning_rate": 5.252255139341472e-07, "loss": 18.2518, "step": 46896 }, { "epoch": 0.8572394757526459, "grad_norm": 6.686520480425798, "learning_rate": 5.250934542680092e-07, "loss": 17.6628, "step": 46897 }, { "epoch": 0.8572577549490924, "grad_norm": 7.956872831344606, "learning_rate": 5.249614102860307e-07, "loss": 18.1783, "step": 46898 }, { "epoch": 0.857276034145539, "grad_norm": 5.23117982459221, "learning_rate": 5.248293819886735e-07, "loss": 17.1801, "step": 46899 }, { "epoch": 0.8572943133419855, "grad_norm": 6.7443335523889685, "learning_rate": 5.246973693764018e-07, "loss": 17.4004, "step": 46900 }, { "epoch": 0.8573125925384321, "grad_norm": 6.392602151946835, "learning_rate": 5.245653724496774e-07, "loss": 17.1801, "step": 46901 }, { "epoch": 0.8573308717348785, "grad_norm": 5.9302304306651585, "learning_rate": 5.244333912089622e-07, "loss": 17.3867, "step": 46902 }, { "epoch": 0.857349150931325, "grad_norm": 5.497928629925683, "learning_rate": 5.243014256547208e-07, "loss": 17.2086, "step": 46903 }, { "epoch": 0.8573674301277716, "grad_norm": 4.945392814269993, "learning_rate": 5.241694757874144e-07, "loss": 16.736, "step": 46904 }, { "epoch": 0.8573857093242181, "grad_norm": 5.649651876854626, "learning_rate": 5.240375416075044e-07, "loss": 17.1727, "step": 46905 }, { "epoch": 0.8574039885206647, "grad_norm": 6.0616802631761635, "learning_rate": 5.239056231154543e-07, "loss": 17.4229, "step": 46906 }, { "epoch": 0.8574222677171112, "grad_norm": 5.5463086128877075, "learning_rate": 5.237737203117266e-07, "loss": 16.8675, "step": 46907 }, { "epoch": 0.8574405469135576, "grad_norm": 5.375841030533443, "learning_rate": 5.236418331967847e-07, "loss": 17.2233, "step": 46908 }, { "epoch": 0.8574588261100042, "grad_norm": 5.761449070787849, "learning_rate": 5.235099617710899e-07, "loss": 16.9671, "step": 46909 }, { "epoch": 0.8574771053064507, "grad_norm": 7.128188096227307, "learning_rate": 5.233781060351029e-07, "loss": 17.8841, "step": 46910 }, { "epoch": 0.8574953845028973, "grad_norm": 6.0714634570903385, "learning_rate": 5.232462659892884e-07, "loss": 17.5569, "step": 46911 }, { "epoch": 0.8575136636993438, "grad_norm": 6.368213326188124, "learning_rate": 5.231144416341072e-07, "loss": 17.6225, "step": 46912 }, { "epoch": 0.8575319428957903, "grad_norm": 6.263875455273413, "learning_rate": 5.229826329700206e-07, "loss": 17.5171, "step": 46913 }, { "epoch": 0.8575502220922369, "grad_norm": 7.766133265773689, "learning_rate": 5.228508399974919e-07, "loss": 17.7134, "step": 46914 }, { "epoch": 0.8575685012886833, "grad_norm": 7.219763896237578, "learning_rate": 5.22719062716982e-07, "loss": 17.4746, "step": 46915 }, { "epoch": 0.8575867804851299, "grad_norm": 5.840090965650839, "learning_rate": 5.225873011289528e-07, "loss": 17.3251, "step": 46916 }, { "epoch": 0.8576050596815764, "grad_norm": 6.286291276923076, "learning_rate": 5.224555552338678e-07, "loss": 17.5481, "step": 46917 }, { "epoch": 0.8576233388780229, "grad_norm": 5.49003814743065, "learning_rate": 5.223238250321877e-07, "loss": 17.0505, "step": 46918 }, { "epoch": 0.8576416180744695, "grad_norm": 5.978994403148698, "learning_rate": 5.221921105243732e-07, "loss": 17.0559, "step": 46919 }, { "epoch": 0.857659897270916, "grad_norm": 8.236773930457053, "learning_rate": 5.220604117108873e-07, "loss": 18.0191, "step": 46920 }, { "epoch": 0.8576781764673624, "grad_norm": 6.243108454039369, "learning_rate": 5.219287285921915e-07, "loss": 17.5717, "step": 46921 }, { "epoch": 0.857696455663809, "grad_norm": 7.823223511200209, "learning_rate": 5.217970611687456e-07, "loss": 18.1113, "step": 46922 }, { "epoch": 0.8577147348602555, "grad_norm": 6.487705437416217, "learning_rate": 5.216654094410134e-07, "loss": 17.2883, "step": 46923 }, { "epoch": 0.8577330140567021, "grad_norm": 4.914864155435676, "learning_rate": 5.215337734094545e-07, "loss": 16.8291, "step": 46924 }, { "epoch": 0.8577512932531486, "grad_norm": 6.967469072932507, "learning_rate": 5.214021530745306e-07, "loss": 17.7319, "step": 46925 }, { "epoch": 0.8577695724495951, "grad_norm": 5.223165879636032, "learning_rate": 5.212705484367053e-07, "loss": 16.9204, "step": 46926 }, { "epoch": 0.8577878516460417, "grad_norm": 5.568155731651129, "learning_rate": 5.211389594964366e-07, "loss": 16.8434, "step": 46927 }, { "epoch": 0.8578061308424881, "grad_norm": 5.819499164976429, "learning_rate": 5.210073862541881e-07, "loss": 17.1789, "step": 46928 }, { "epoch": 0.8578244100389347, "grad_norm": 7.047256369119932, "learning_rate": 5.208758287104205e-07, "loss": 17.8128, "step": 46929 }, { "epoch": 0.8578426892353812, "grad_norm": 5.432680834198106, "learning_rate": 5.207442868655932e-07, "loss": 16.9597, "step": 46930 }, { "epoch": 0.8578609684318277, "grad_norm": 6.670096753831929, "learning_rate": 5.206127607201694e-07, "loss": 17.2421, "step": 46931 }, { "epoch": 0.8578792476282743, "grad_norm": 5.025442591836291, "learning_rate": 5.204812502746092e-07, "loss": 16.9226, "step": 46932 }, { "epoch": 0.8578975268247208, "grad_norm": 5.7124546697498015, "learning_rate": 5.203497555293724e-07, "loss": 17.1772, "step": 46933 }, { "epoch": 0.8579158060211673, "grad_norm": 6.855079485011909, "learning_rate": 5.202182764849212e-07, "loss": 17.3564, "step": 46934 }, { "epoch": 0.8579340852176138, "grad_norm": 5.625665413284919, "learning_rate": 5.200868131417169e-07, "loss": 17.2215, "step": 46935 }, { "epoch": 0.8579523644140603, "grad_norm": 4.730028275200775, "learning_rate": 5.199553655002188e-07, "loss": 16.7111, "step": 46936 }, { "epoch": 0.8579706436105069, "grad_norm": 5.823602765179603, "learning_rate": 5.198239335608895e-07, "loss": 16.9785, "step": 46937 }, { "epoch": 0.8579889228069534, "grad_norm": 6.493232335395655, "learning_rate": 5.196925173241874e-07, "loss": 17.2019, "step": 46938 }, { "epoch": 0.8580072020034, "grad_norm": 6.95319927185088, "learning_rate": 5.195611167905751e-07, "loss": 17.8651, "step": 46939 }, { "epoch": 0.8580254811998465, "grad_norm": 5.44345140971354, "learning_rate": 5.194297319605129e-07, "loss": 17.1508, "step": 46940 }, { "epoch": 0.8580437603962929, "grad_norm": 5.248218109869335, "learning_rate": 5.192983628344589e-07, "loss": 16.9002, "step": 46941 }, { "epoch": 0.8580620395927395, "grad_norm": 5.566205109537109, "learning_rate": 5.19167009412877e-07, "loss": 16.9772, "step": 46942 }, { "epoch": 0.858080318789186, "grad_norm": 4.894586509341994, "learning_rate": 5.190356716962242e-07, "loss": 16.8825, "step": 46943 }, { "epoch": 0.8580985979856326, "grad_norm": 7.611254429313216, "learning_rate": 5.189043496849633e-07, "loss": 17.7574, "step": 46944 }, { "epoch": 0.8581168771820791, "grad_norm": 6.222998014112204, "learning_rate": 5.18773043379554e-07, "loss": 17.7234, "step": 46945 }, { "epoch": 0.8581351563785256, "grad_norm": 6.587880652238667, "learning_rate": 5.186417527804566e-07, "loss": 17.5284, "step": 46946 }, { "epoch": 0.8581534355749721, "grad_norm": 7.146988703934472, "learning_rate": 5.1851047788813e-07, "loss": 17.5416, "step": 46947 }, { "epoch": 0.8581717147714186, "grad_norm": 5.25850928299964, "learning_rate": 5.183792187030368e-07, "loss": 17.1105, "step": 46948 }, { "epoch": 0.8581899939678652, "grad_norm": 7.39410362435001, "learning_rate": 5.182479752256348e-07, "loss": 17.864, "step": 46949 }, { "epoch": 0.8582082731643117, "grad_norm": 5.549519518500501, "learning_rate": 5.181167474563841e-07, "loss": 16.8095, "step": 46950 }, { "epoch": 0.8582265523607582, "grad_norm": 6.491741815515957, "learning_rate": 5.179855353957452e-07, "loss": 17.7612, "step": 46951 }, { "epoch": 0.8582448315572048, "grad_norm": 7.145673591339577, "learning_rate": 5.178543390441792e-07, "loss": 17.1389, "step": 46952 }, { "epoch": 0.8582631107536512, "grad_norm": 5.831693993876395, "learning_rate": 5.177231584021436e-07, "loss": 17.3235, "step": 46953 }, { "epoch": 0.8582813899500978, "grad_norm": 5.738448266753791, "learning_rate": 5.175919934701007e-07, "loss": 17.2842, "step": 46954 }, { "epoch": 0.8582996691465443, "grad_norm": 6.140981039945693, "learning_rate": 5.174608442485074e-07, "loss": 17.4871, "step": 46955 }, { "epoch": 0.8583179483429908, "grad_norm": 5.3124799951437165, "learning_rate": 5.173297107378267e-07, "loss": 17.0488, "step": 46956 }, { "epoch": 0.8583362275394374, "grad_norm": 7.592893671787944, "learning_rate": 5.171985929385153e-07, "loss": 17.8529, "step": 46957 }, { "epoch": 0.8583545067358839, "grad_norm": 6.72506766734482, "learning_rate": 5.170674908510337e-07, "loss": 17.6816, "step": 46958 }, { "epoch": 0.8583727859323305, "grad_norm": 5.5190034872748726, "learning_rate": 5.169364044758418e-07, "loss": 17.1432, "step": 46959 }, { "epoch": 0.8583910651287769, "grad_norm": 6.435236763190316, "learning_rate": 5.168053338133983e-07, "loss": 17.0944, "step": 46960 }, { "epoch": 0.8584093443252234, "grad_norm": 5.151559688205757, "learning_rate": 5.166742788641632e-07, "loss": 16.9716, "step": 46961 }, { "epoch": 0.85842762352167, "grad_norm": 5.895792048775433, "learning_rate": 5.165432396285963e-07, "loss": 17.3145, "step": 46962 }, { "epoch": 0.8584459027181165, "grad_norm": 6.157536387262371, "learning_rate": 5.164122161071567e-07, "loss": 17.3384, "step": 46963 }, { "epoch": 0.8584641819145631, "grad_norm": 6.462735012973871, "learning_rate": 5.162812083003016e-07, "loss": 17.583, "step": 46964 }, { "epoch": 0.8584824611110096, "grad_norm": 7.199591480409093, "learning_rate": 5.16150216208493e-07, "loss": 17.6502, "step": 46965 }, { "epoch": 0.858500740307456, "grad_norm": 6.263278896314822, "learning_rate": 5.160192398321889e-07, "loss": 17.2573, "step": 46966 }, { "epoch": 0.8585190195039026, "grad_norm": 5.682635871427294, "learning_rate": 5.158882791718478e-07, "loss": 17.0223, "step": 46967 }, { "epoch": 0.8585372987003491, "grad_norm": 4.735650989657762, "learning_rate": 5.157573342279293e-07, "loss": 16.9247, "step": 46968 }, { "epoch": 0.8585555778967957, "grad_norm": 7.612240801094777, "learning_rate": 5.156264050008919e-07, "loss": 17.8565, "step": 46969 }, { "epoch": 0.8585738570932422, "grad_norm": 6.220096100828945, "learning_rate": 5.154954914911941e-07, "loss": 17.2995, "step": 46970 }, { "epoch": 0.8585921362896887, "grad_norm": 8.04710432864285, "learning_rate": 5.153645936992968e-07, "loss": 18.1307, "step": 46971 }, { "epoch": 0.8586104154861353, "grad_norm": 8.775218172899162, "learning_rate": 5.152337116256567e-07, "loss": 17.8916, "step": 46972 }, { "epoch": 0.8586286946825817, "grad_norm": 6.557385699194627, "learning_rate": 5.151028452707341e-07, "loss": 17.395, "step": 46973 }, { "epoch": 0.8586469738790283, "grad_norm": 5.470162977443194, "learning_rate": 5.149719946349868e-07, "loss": 16.9423, "step": 46974 }, { "epoch": 0.8586652530754748, "grad_norm": 7.703387665663123, "learning_rate": 5.148411597188724e-07, "loss": 17.4588, "step": 46975 }, { "epoch": 0.8586835322719213, "grad_norm": 7.582689050204076, "learning_rate": 5.147103405228515e-07, "loss": 17.902, "step": 46976 }, { "epoch": 0.8587018114683679, "grad_norm": 5.818316908478497, "learning_rate": 5.145795370473822e-07, "loss": 17.0418, "step": 46977 }, { "epoch": 0.8587200906648144, "grad_norm": 6.988957792187903, "learning_rate": 5.144487492929206e-07, "loss": 17.4604, "step": 46978 }, { "epoch": 0.858738369861261, "grad_norm": 7.073189128085825, "learning_rate": 5.143179772599272e-07, "loss": 17.7258, "step": 46979 }, { "epoch": 0.8587566490577074, "grad_norm": 7.112503607046875, "learning_rate": 5.141872209488613e-07, "loss": 17.6805, "step": 46980 }, { "epoch": 0.8587749282541539, "grad_norm": 5.283271895464743, "learning_rate": 5.140564803601788e-07, "loss": 16.8934, "step": 46981 }, { "epoch": 0.8587932074506005, "grad_norm": 7.306961830202065, "learning_rate": 5.1392575549434e-07, "loss": 17.6065, "step": 46982 }, { "epoch": 0.858811486647047, "grad_norm": 5.449640138399277, "learning_rate": 5.137950463518021e-07, "loss": 17.1317, "step": 46983 }, { "epoch": 0.8588297658434936, "grad_norm": 5.71390070423033, "learning_rate": 5.136643529330221e-07, "loss": 17.165, "step": 46984 }, { "epoch": 0.8588480450399401, "grad_norm": 7.460400396309355, "learning_rate": 5.135336752384601e-07, "loss": 17.8623, "step": 46985 }, { "epoch": 0.8588663242363865, "grad_norm": 4.8302296943383345, "learning_rate": 5.13403013268573e-07, "loss": 16.7894, "step": 46986 }, { "epoch": 0.8588846034328331, "grad_norm": 5.824538783206374, "learning_rate": 5.132723670238194e-07, "loss": 17.0208, "step": 46987 }, { "epoch": 0.8589028826292796, "grad_norm": 5.704225083746652, "learning_rate": 5.13141736504656e-07, "loss": 17.0427, "step": 46988 }, { "epoch": 0.8589211618257261, "grad_norm": 6.921583603319342, "learning_rate": 5.130111217115413e-07, "loss": 17.6387, "step": 46989 }, { "epoch": 0.8589394410221727, "grad_norm": 6.590041464337453, "learning_rate": 5.128805226449346e-07, "loss": 17.0216, "step": 46990 }, { "epoch": 0.8589577202186192, "grad_norm": 5.510374252193532, "learning_rate": 5.12749939305292e-07, "loss": 16.9889, "step": 46991 }, { "epoch": 0.8589759994150657, "grad_norm": 7.080915810154231, "learning_rate": 5.126193716930706e-07, "loss": 17.8218, "step": 46992 }, { "epoch": 0.8589942786115122, "grad_norm": 5.796232937288397, "learning_rate": 5.124888198087302e-07, "loss": 17.0059, "step": 46993 }, { "epoch": 0.8590125578079587, "grad_norm": 7.720169178146874, "learning_rate": 5.123582836527264e-07, "loss": 17.7356, "step": 46994 }, { "epoch": 0.8590308370044053, "grad_norm": 5.785525716134479, "learning_rate": 5.122277632255168e-07, "loss": 17.1352, "step": 46995 }, { "epoch": 0.8590491162008518, "grad_norm": 6.3305732138403155, "learning_rate": 5.120972585275608e-07, "loss": 17.3175, "step": 46996 }, { "epoch": 0.8590673953972984, "grad_norm": 5.388140438299949, "learning_rate": 5.119667695593128e-07, "loss": 17.0777, "step": 46997 }, { "epoch": 0.8590856745937449, "grad_norm": 6.6149247698403615, "learning_rate": 5.11836296321232e-07, "loss": 17.4228, "step": 46998 }, { "epoch": 0.8591039537901913, "grad_norm": 5.2378720142950295, "learning_rate": 5.117058388137769e-07, "loss": 16.927, "step": 46999 }, { "epoch": 0.8591222329866379, "grad_norm": 6.906827335489047, "learning_rate": 5.115753970374021e-07, "loss": 17.514, "step": 47000 }, { "epoch": 0.8591405121830844, "grad_norm": 6.924640764047961, "learning_rate": 5.114449709925667e-07, "loss": 17.1792, "step": 47001 }, { "epoch": 0.859158791379531, "grad_norm": 7.097098469025035, "learning_rate": 5.113145606797276e-07, "loss": 17.7743, "step": 47002 }, { "epoch": 0.8591770705759775, "grad_norm": 6.836686532776799, "learning_rate": 5.111841660993405e-07, "loss": 17.4453, "step": 47003 }, { "epoch": 0.859195349772424, "grad_norm": 7.538688191909608, "learning_rate": 5.110537872518639e-07, "loss": 17.5828, "step": 47004 }, { "epoch": 0.8592136289688705, "grad_norm": 6.281750992294186, "learning_rate": 5.109234241377536e-07, "loss": 16.9502, "step": 47005 }, { "epoch": 0.859231908165317, "grad_norm": 5.605164951569225, "learning_rate": 5.107930767574681e-07, "loss": 17.3367, "step": 47006 }, { "epoch": 0.8592501873617636, "grad_norm": 5.854897827319353, "learning_rate": 5.106627451114627e-07, "loss": 17.2989, "step": 47007 }, { "epoch": 0.8592684665582101, "grad_norm": 4.750108808927216, "learning_rate": 5.105324292001951e-07, "loss": 16.966, "step": 47008 }, { "epoch": 0.8592867457546566, "grad_norm": 7.5098713505474075, "learning_rate": 5.104021290241212e-07, "loss": 17.2467, "step": 47009 }, { "epoch": 0.8593050249511032, "grad_norm": 5.5527102705217555, "learning_rate": 5.10271844583699e-07, "loss": 17.1871, "step": 47010 }, { "epoch": 0.8593233041475496, "grad_norm": 5.266310553353121, "learning_rate": 5.101415758793849e-07, "loss": 16.9301, "step": 47011 }, { "epoch": 0.8593415833439962, "grad_norm": 6.252391766560607, "learning_rate": 5.100113229116338e-07, "loss": 17.4043, "step": 47012 }, { "epoch": 0.8593598625404427, "grad_norm": 5.671734156135316, "learning_rate": 5.098810856809039e-07, "loss": 17.179, "step": 47013 }, { "epoch": 0.8593781417368892, "grad_norm": 6.2328062449180175, "learning_rate": 5.097508641876503e-07, "loss": 16.982, "step": 47014 }, { "epoch": 0.8593964209333358, "grad_norm": 6.48603438371618, "learning_rate": 5.096206584323304e-07, "loss": 17.5127, "step": 47015 }, { "epoch": 0.8594147001297823, "grad_norm": 6.655843139698465, "learning_rate": 5.094904684154017e-07, "loss": 17.3218, "step": 47016 }, { "epoch": 0.8594329793262289, "grad_norm": 6.494097665037649, "learning_rate": 5.093602941373182e-07, "loss": 17.6218, "step": 47017 }, { "epoch": 0.8594512585226753, "grad_norm": 7.35612196374247, "learning_rate": 5.092301355985379e-07, "loss": 17.1809, "step": 47018 }, { "epoch": 0.8594695377191218, "grad_norm": 5.991711202093105, "learning_rate": 5.090999927995161e-07, "loss": 17.2481, "step": 47019 }, { "epoch": 0.8594878169155684, "grad_norm": 6.374389960950118, "learning_rate": 5.089698657407088e-07, "loss": 17.4441, "step": 47020 }, { "epoch": 0.8595060961120149, "grad_norm": 5.687467904972136, "learning_rate": 5.088397544225726e-07, "loss": 17.2204, "step": 47021 }, { "epoch": 0.8595243753084615, "grad_norm": 8.956618506562712, "learning_rate": 5.087096588455636e-07, "loss": 18.4276, "step": 47022 }, { "epoch": 0.859542654504908, "grad_norm": 6.949867870310358, "learning_rate": 5.085795790101372e-07, "loss": 17.7204, "step": 47023 }, { "epoch": 0.8595609337013544, "grad_norm": 7.016350925286689, "learning_rate": 5.08449514916749e-07, "loss": 17.6187, "step": 47024 }, { "epoch": 0.859579212897801, "grad_norm": 7.535756355843914, "learning_rate": 5.083194665658564e-07, "loss": 17.7416, "step": 47025 }, { "epoch": 0.8595974920942475, "grad_norm": 8.594284280496524, "learning_rate": 5.081894339579135e-07, "loss": 19.0686, "step": 47026 }, { "epoch": 0.8596157712906941, "grad_norm": 5.478670898316585, "learning_rate": 5.080594170933778e-07, "loss": 17.224, "step": 47027 }, { "epoch": 0.8596340504871406, "grad_norm": 6.878322174266632, "learning_rate": 5.079294159727039e-07, "loss": 17.4058, "step": 47028 }, { "epoch": 0.8596523296835871, "grad_norm": 7.350372910437167, "learning_rate": 5.077994305963469e-07, "loss": 17.12, "step": 47029 }, { "epoch": 0.8596706088800337, "grad_norm": 8.000116388464011, "learning_rate": 5.076694609647642e-07, "loss": 17.8557, "step": 47030 }, { "epoch": 0.8596888880764801, "grad_norm": 5.894300674534616, "learning_rate": 5.07539507078409e-07, "loss": 17.3155, "step": 47031 }, { "epoch": 0.8597071672729267, "grad_norm": 5.740433751069346, "learning_rate": 5.074095689377389e-07, "loss": 17.0019, "step": 47032 }, { "epoch": 0.8597254464693732, "grad_norm": 7.066337816549952, "learning_rate": 5.072796465432073e-07, "loss": 17.6364, "step": 47033 }, { "epoch": 0.8597437256658197, "grad_norm": 6.38612996804083, "learning_rate": 5.071497398952707e-07, "loss": 17.4114, "step": 47034 }, { "epoch": 0.8597620048622663, "grad_norm": 5.73669810985727, "learning_rate": 5.070198489943856e-07, "loss": 17.2186, "step": 47035 }, { "epoch": 0.8597802840587128, "grad_norm": 6.513228459409685, "learning_rate": 5.06889973841006e-07, "loss": 17.3986, "step": 47036 }, { "epoch": 0.8597985632551594, "grad_norm": 5.303780628106109, "learning_rate": 5.067601144355861e-07, "loss": 17.0695, "step": 47037 }, { "epoch": 0.8598168424516058, "grad_norm": 7.288499405950645, "learning_rate": 5.066302707785831e-07, "loss": 17.4949, "step": 47038 }, { "epoch": 0.8598351216480523, "grad_norm": 6.2546973421825935, "learning_rate": 5.065004428704512e-07, "loss": 16.9779, "step": 47039 }, { "epoch": 0.8598534008444989, "grad_norm": 5.598749509751331, "learning_rate": 5.063706307116445e-07, "loss": 17.0461, "step": 47040 }, { "epoch": 0.8598716800409454, "grad_norm": 6.050316149431145, "learning_rate": 5.062408343026193e-07, "loss": 17.3029, "step": 47041 }, { "epoch": 0.859889959237392, "grad_norm": 5.676673116570881, "learning_rate": 5.061110536438291e-07, "loss": 17.3175, "step": 47042 }, { "epoch": 0.8599082384338385, "grad_norm": 6.264792542591709, "learning_rate": 5.059812887357302e-07, "loss": 17.1549, "step": 47043 }, { "epoch": 0.8599265176302849, "grad_norm": 6.663633388958519, "learning_rate": 5.058515395787772e-07, "loss": 17.3093, "step": 47044 }, { "epoch": 0.8599447968267315, "grad_norm": 6.646305363128028, "learning_rate": 5.057218061734243e-07, "loss": 17.777, "step": 47045 }, { "epoch": 0.859963076023178, "grad_norm": 5.380743737958309, "learning_rate": 5.055920885201271e-07, "loss": 16.9865, "step": 47046 }, { "epoch": 0.8599813552196246, "grad_norm": 7.818751463727962, "learning_rate": 5.054623866193393e-07, "loss": 17.9097, "step": 47047 }, { "epoch": 0.8599996344160711, "grad_norm": 5.9331253971659, "learning_rate": 5.053327004715153e-07, "loss": 17.114, "step": 47048 }, { "epoch": 0.8600179136125176, "grad_norm": 6.940374652264747, "learning_rate": 5.05203030077111e-07, "loss": 17.8859, "step": 47049 }, { "epoch": 0.8600361928089642, "grad_norm": 9.097934558379484, "learning_rate": 5.0507337543658e-07, "loss": 18.051, "step": 47050 }, { "epoch": 0.8600544720054106, "grad_norm": 6.712501029753003, "learning_rate": 5.049437365503756e-07, "loss": 17.7409, "step": 47051 }, { "epoch": 0.8600727512018572, "grad_norm": 5.7640139283722895, "learning_rate": 5.048141134189538e-07, "loss": 17.3845, "step": 47052 }, { "epoch": 0.8600910303983037, "grad_norm": 6.41113995413649, "learning_rate": 5.046845060427691e-07, "loss": 17.4616, "step": 47053 }, { "epoch": 0.8601093095947502, "grad_norm": 5.673520310972555, "learning_rate": 5.045549144222744e-07, "loss": 17.0672, "step": 47054 }, { "epoch": 0.8601275887911968, "grad_norm": 8.97783538015862, "learning_rate": 5.04425338557925e-07, "loss": 18.0396, "step": 47055 }, { "epoch": 0.8601458679876433, "grad_norm": 5.202655326369826, "learning_rate": 5.042957784501756e-07, "loss": 16.9594, "step": 47056 }, { "epoch": 0.8601641471840897, "grad_norm": 5.405825892908147, "learning_rate": 5.04166234099478e-07, "loss": 16.935, "step": 47057 }, { "epoch": 0.8601824263805363, "grad_norm": 7.65953761588538, "learning_rate": 5.040367055062883e-07, "loss": 17.7007, "step": 47058 }, { "epoch": 0.8602007055769828, "grad_norm": 5.478592492390811, "learning_rate": 5.039071926710598e-07, "loss": 16.911, "step": 47059 }, { "epoch": 0.8602189847734294, "grad_norm": 6.612468921552842, "learning_rate": 5.037776955942458e-07, "loss": 17.4228, "step": 47060 }, { "epoch": 0.8602372639698759, "grad_norm": 5.217543984867734, "learning_rate": 5.036482142763005e-07, "loss": 16.9736, "step": 47061 }, { "epoch": 0.8602555431663224, "grad_norm": 5.962036056062785, "learning_rate": 5.035187487176785e-07, "loss": 17.0903, "step": 47062 }, { "epoch": 0.860273822362769, "grad_norm": 6.740552345073345, "learning_rate": 5.033892989188338e-07, "loss": 17.4891, "step": 47063 }, { "epoch": 0.8602921015592154, "grad_norm": 6.266954274181331, "learning_rate": 5.032598648802195e-07, "loss": 17.4252, "step": 47064 }, { "epoch": 0.860310380755662, "grad_norm": 4.936255868254813, "learning_rate": 5.03130446602288e-07, "loss": 16.6701, "step": 47065 }, { "epoch": 0.8603286599521085, "grad_norm": 6.682409861178573, "learning_rate": 5.030010440854949e-07, "loss": 17.2205, "step": 47066 }, { "epoch": 0.860346939148555, "grad_norm": 5.757377135125079, "learning_rate": 5.028716573302933e-07, "loss": 16.8719, "step": 47067 }, { "epoch": 0.8603652183450016, "grad_norm": 6.165689404658646, "learning_rate": 5.02742286337135e-07, "loss": 17.4303, "step": 47068 }, { "epoch": 0.860383497541448, "grad_norm": 6.97061800653843, "learning_rate": 5.026129311064759e-07, "loss": 17.3421, "step": 47069 }, { "epoch": 0.8604017767378946, "grad_norm": 6.180140287115053, "learning_rate": 5.024835916387672e-07, "loss": 17.4303, "step": 47070 }, { "epoch": 0.8604200559343411, "grad_norm": 5.474159059113308, "learning_rate": 5.023542679344634e-07, "loss": 17.0299, "step": 47071 }, { "epoch": 0.8604383351307876, "grad_norm": 4.91200216934193, "learning_rate": 5.022249599940182e-07, "loss": 16.9613, "step": 47072 }, { "epoch": 0.8604566143272342, "grad_norm": 4.528562857443351, "learning_rate": 5.020956678178845e-07, "loss": 16.7624, "step": 47073 }, { "epoch": 0.8604748935236807, "grad_norm": 6.200164956971783, "learning_rate": 5.019663914065143e-07, "loss": 17.3293, "step": 47074 }, { "epoch": 0.8604931727201273, "grad_norm": 5.738120921333374, "learning_rate": 5.018371307603625e-07, "loss": 16.8794, "step": 47075 }, { "epoch": 0.8605114519165737, "grad_norm": 5.429245876780596, "learning_rate": 5.017078858798802e-07, "loss": 16.9614, "step": 47076 }, { "epoch": 0.8605297311130202, "grad_norm": 6.267735229483064, "learning_rate": 5.015786567655223e-07, "loss": 17.1456, "step": 47077 }, { "epoch": 0.8605480103094668, "grad_norm": 5.326286474946286, "learning_rate": 5.014494434177397e-07, "loss": 16.903, "step": 47078 }, { "epoch": 0.8605662895059133, "grad_norm": 8.381602638088797, "learning_rate": 5.013202458369876e-07, "loss": 17.9532, "step": 47079 }, { "epoch": 0.8605845687023599, "grad_norm": 4.950224660961041, "learning_rate": 5.011910640237167e-07, "loss": 16.7415, "step": 47080 }, { "epoch": 0.8606028478988064, "grad_norm": 6.362437531747932, "learning_rate": 5.010618979783821e-07, "loss": 17.5077, "step": 47081 }, { "epoch": 0.8606211270952528, "grad_norm": 5.163169000662482, "learning_rate": 5.009327477014336e-07, "loss": 16.9988, "step": 47082 }, { "epoch": 0.8606394062916994, "grad_norm": 6.661747112110008, "learning_rate": 5.008036131933264e-07, "loss": 17.4281, "step": 47083 }, { "epoch": 0.8606576854881459, "grad_norm": 6.757583908720371, "learning_rate": 5.006744944545122e-07, "loss": 17.3661, "step": 47084 }, { "epoch": 0.8606759646845925, "grad_norm": 5.894016195655421, "learning_rate": 5.00545391485443e-07, "loss": 17.1513, "step": 47085 }, { "epoch": 0.860694243881039, "grad_norm": 6.140351703306719, "learning_rate": 5.004163042865723e-07, "loss": 17.3099, "step": 47086 }, { "epoch": 0.8607125230774855, "grad_norm": 7.470721946349987, "learning_rate": 5.002872328583507e-07, "loss": 17.6342, "step": 47087 }, { "epoch": 0.8607308022739321, "grad_norm": 5.927912842396589, "learning_rate": 5.001581772012321e-07, "loss": 17.1408, "step": 47088 }, { "epoch": 0.8607490814703785, "grad_norm": 7.302514872905244, "learning_rate": 5.000291373156701e-07, "loss": 17.4932, "step": 47089 }, { "epoch": 0.8607673606668251, "grad_norm": 6.509057791519132, "learning_rate": 4.999001132021147e-07, "loss": 17.0872, "step": 47090 }, { "epoch": 0.8607856398632716, "grad_norm": 5.800441095829099, "learning_rate": 4.997711048610182e-07, "loss": 17.1638, "step": 47091 }, { "epoch": 0.8608039190597181, "grad_norm": 6.831307878752768, "learning_rate": 4.996421122928347e-07, "loss": 17.5374, "step": 47092 }, { "epoch": 0.8608221982561647, "grad_norm": 5.503458023325403, "learning_rate": 4.995131354980142e-07, "loss": 17.1252, "step": 47093 }, { "epoch": 0.8608404774526112, "grad_norm": 6.2718359207681855, "learning_rate": 4.9938417447701e-07, "loss": 17.3527, "step": 47094 }, { "epoch": 0.8608587566490578, "grad_norm": 6.793407811299587, "learning_rate": 4.992552292302743e-07, "loss": 17.6395, "step": 47095 }, { "epoch": 0.8608770358455042, "grad_norm": 7.7504354044463994, "learning_rate": 4.991262997582575e-07, "loss": 17.6869, "step": 47096 }, { "epoch": 0.8608953150419507, "grad_norm": 6.745336449102621, "learning_rate": 4.989973860614116e-07, "loss": 17.1704, "step": 47097 }, { "epoch": 0.8609135942383973, "grad_norm": 7.484788458053254, "learning_rate": 4.988684881401912e-07, "loss": 17.8109, "step": 47098 }, { "epoch": 0.8609318734348438, "grad_norm": 4.848348861317436, "learning_rate": 4.98739605995045e-07, "loss": 16.8288, "step": 47099 }, { "epoch": 0.8609501526312904, "grad_norm": 5.582074217180279, "learning_rate": 4.986107396264267e-07, "loss": 17.0936, "step": 47100 }, { "epoch": 0.8609684318277369, "grad_norm": 6.636116589718093, "learning_rate": 4.98481889034787e-07, "loss": 17.5024, "step": 47101 }, { "epoch": 0.8609867110241833, "grad_norm": 5.4368694849013055, "learning_rate": 4.983530542205767e-07, "loss": 17.1636, "step": 47102 }, { "epoch": 0.8610049902206299, "grad_norm": 5.08954299715908, "learning_rate": 4.982242351842498e-07, "loss": 17.1704, "step": 47103 }, { "epoch": 0.8610232694170764, "grad_norm": 6.159387579968234, "learning_rate": 4.980954319262554e-07, "loss": 17.262, "step": 47104 }, { "epoch": 0.861041548613523, "grad_norm": 8.003771593895697, "learning_rate": 4.979666444470454e-07, "loss": 17.3529, "step": 47105 }, { "epoch": 0.8610598278099695, "grad_norm": 5.541511621352145, "learning_rate": 4.978378727470717e-07, "loss": 16.9204, "step": 47106 }, { "epoch": 0.861078107006416, "grad_norm": 5.6545631804992755, "learning_rate": 4.977091168267856e-07, "loss": 17.0339, "step": 47107 }, { "epoch": 0.8610963862028626, "grad_norm": 6.2518593199177035, "learning_rate": 4.975803766866394e-07, "loss": 16.9275, "step": 47108 }, { "epoch": 0.861114665399309, "grad_norm": 5.08274156299282, "learning_rate": 4.974516523270833e-07, "loss": 16.8373, "step": 47109 }, { "epoch": 0.8611329445957556, "grad_norm": 5.420948865485692, "learning_rate": 4.973229437485672e-07, "loss": 17.0772, "step": 47110 }, { "epoch": 0.8611512237922021, "grad_norm": 6.414069535835579, "learning_rate": 4.971942509515443e-07, "loss": 17.5103, "step": 47111 }, { "epoch": 0.8611695029886486, "grad_norm": 6.5697547747696845, "learning_rate": 4.970655739364655e-07, "loss": 17.576, "step": 47112 }, { "epoch": 0.8611877821850952, "grad_norm": 6.198889427138607, "learning_rate": 4.969369127037793e-07, "loss": 17.3119, "step": 47113 }, { "epoch": 0.8612060613815417, "grad_norm": 5.499335087725688, "learning_rate": 4.968082672539398e-07, "loss": 16.9293, "step": 47114 }, { "epoch": 0.8612243405779882, "grad_norm": 5.620206369233074, "learning_rate": 4.966796375873955e-07, "loss": 17.3131, "step": 47115 }, { "epoch": 0.8612426197744347, "grad_norm": 6.1721313380433465, "learning_rate": 4.965510237045984e-07, "loss": 16.9882, "step": 47116 }, { "epoch": 0.8612608989708812, "grad_norm": 6.548876646976861, "learning_rate": 4.964224256060001e-07, "loss": 17.5382, "step": 47117 }, { "epoch": 0.8612791781673278, "grad_norm": 6.992158227322044, "learning_rate": 4.962938432920505e-07, "loss": 17.587, "step": 47118 }, { "epoch": 0.8612974573637743, "grad_norm": 6.141143019729945, "learning_rate": 4.961652767631992e-07, "loss": 17.1581, "step": 47119 }, { "epoch": 0.8613157365602209, "grad_norm": 7.094476188623213, "learning_rate": 4.960367260198989e-07, "loss": 17.381, "step": 47120 }, { "epoch": 0.8613340157566673, "grad_norm": 5.95682671542112, "learning_rate": 4.959081910625991e-07, "loss": 17.2505, "step": 47121 }, { "epoch": 0.8613522949531138, "grad_norm": 7.345133504582366, "learning_rate": 4.957796718917485e-07, "loss": 17.6322, "step": 47122 }, { "epoch": 0.8613705741495604, "grad_norm": 5.477728093690914, "learning_rate": 4.956511685078008e-07, "loss": 16.9274, "step": 47123 }, { "epoch": 0.8613888533460069, "grad_norm": 6.358819552343591, "learning_rate": 4.95522680911204e-07, "loss": 17.1042, "step": 47124 }, { "epoch": 0.8614071325424534, "grad_norm": 6.479807369519079, "learning_rate": 4.953942091024095e-07, "loss": 17.0665, "step": 47125 }, { "epoch": 0.8614254117389, "grad_norm": 4.746217926900013, "learning_rate": 4.952657530818678e-07, "loss": 16.8937, "step": 47126 }, { "epoch": 0.8614436909353465, "grad_norm": 5.428563900897622, "learning_rate": 4.951373128500281e-07, "loss": 16.8209, "step": 47127 }, { "epoch": 0.861461970131793, "grad_norm": 4.309283224126832, "learning_rate": 4.950088884073422e-07, "loss": 16.6775, "step": 47128 }, { "epoch": 0.8614802493282395, "grad_norm": 5.456537143552537, "learning_rate": 4.94880479754259e-07, "loss": 16.9079, "step": 47129 }, { "epoch": 0.861498528524686, "grad_norm": 6.149518074284901, "learning_rate": 4.947520868912281e-07, "loss": 17.1802, "step": 47130 }, { "epoch": 0.8615168077211326, "grad_norm": 5.747236496622216, "learning_rate": 4.946237098187007e-07, "loss": 17.2425, "step": 47131 }, { "epoch": 0.8615350869175791, "grad_norm": 5.157632261811566, "learning_rate": 4.944953485371257e-07, "loss": 16.9116, "step": 47132 }, { "epoch": 0.8615533661140257, "grad_norm": 5.55891943256731, "learning_rate": 4.943670030469544e-07, "loss": 17.1382, "step": 47133 }, { "epoch": 0.8615716453104721, "grad_norm": 6.570998629610679, "learning_rate": 4.942386733486348e-07, "loss": 17.3365, "step": 47134 }, { "epoch": 0.8615899245069186, "grad_norm": 11.138149968401898, "learning_rate": 4.941103594426183e-07, "loss": 17.3843, "step": 47135 }, { "epoch": 0.8616082037033652, "grad_norm": 6.559962047930445, "learning_rate": 4.939820613293533e-07, "loss": 17.3382, "step": 47136 }, { "epoch": 0.8616264828998117, "grad_norm": 4.907316272134278, "learning_rate": 4.938537790092912e-07, "loss": 17.0041, "step": 47137 }, { "epoch": 0.8616447620962583, "grad_norm": 7.663479387934557, "learning_rate": 4.9372551248288e-07, "loss": 17.8628, "step": 47138 }, { "epoch": 0.8616630412927048, "grad_norm": 6.756190105901258, "learning_rate": 4.935972617505702e-07, "loss": 17.1955, "step": 47139 }, { "epoch": 0.8616813204891512, "grad_norm": 7.498774048299726, "learning_rate": 4.934690268128111e-07, "loss": 17.6609, "step": 47140 }, { "epoch": 0.8616995996855978, "grad_norm": 6.839131042384837, "learning_rate": 4.933408076700507e-07, "loss": 17.3406, "step": 47141 }, { "epoch": 0.8617178788820443, "grad_norm": 6.045331409338808, "learning_rate": 4.932126043227403e-07, "loss": 17.241, "step": 47142 }, { "epoch": 0.8617361580784909, "grad_norm": 8.00229335499, "learning_rate": 4.930844167713295e-07, "loss": 17.8495, "step": 47143 }, { "epoch": 0.8617544372749374, "grad_norm": 5.937800418624426, "learning_rate": 4.929562450162657e-07, "loss": 17.3084, "step": 47144 }, { "epoch": 0.8617727164713839, "grad_norm": 6.63877892571058, "learning_rate": 4.928280890580001e-07, "loss": 17.3914, "step": 47145 }, { "epoch": 0.8617909956678305, "grad_norm": 4.555786385832895, "learning_rate": 4.926999488969808e-07, "loss": 16.7213, "step": 47146 }, { "epoch": 0.8618092748642769, "grad_norm": 7.040414008268108, "learning_rate": 4.925718245336558e-07, "loss": 17.6715, "step": 47147 }, { "epoch": 0.8618275540607235, "grad_norm": 7.210151846251581, "learning_rate": 4.924437159684769e-07, "loss": 17.6978, "step": 47148 }, { "epoch": 0.86184583325717, "grad_norm": 6.885640753881324, "learning_rate": 4.923156232018916e-07, "loss": 17.2987, "step": 47149 }, { "epoch": 0.8618641124536165, "grad_norm": 5.883197424428475, "learning_rate": 4.921875462343478e-07, "loss": 17.2796, "step": 47150 }, { "epoch": 0.8618823916500631, "grad_norm": 7.074692731873025, "learning_rate": 4.920594850662952e-07, "loss": 17.8978, "step": 47151 }, { "epoch": 0.8619006708465096, "grad_norm": 5.659943067497648, "learning_rate": 4.919314396981845e-07, "loss": 17.043, "step": 47152 }, { "epoch": 0.8619189500429562, "grad_norm": 5.997021066058483, "learning_rate": 4.918034101304614e-07, "loss": 17.1491, "step": 47153 }, { "epoch": 0.8619372292394026, "grad_norm": 6.45053444180063, "learning_rate": 4.916753963635773e-07, "loss": 16.8885, "step": 47154 }, { "epoch": 0.8619555084358491, "grad_norm": 6.214756060386204, "learning_rate": 4.91547398397979e-07, "loss": 17.2729, "step": 47155 }, { "epoch": 0.8619737876322957, "grad_norm": 6.400989262192111, "learning_rate": 4.914194162341163e-07, "loss": 17.6028, "step": 47156 }, { "epoch": 0.8619920668287422, "grad_norm": 6.257569939727301, "learning_rate": 4.912914498724374e-07, "loss": 17.3542, "step": 47157 }, { "epoch": 0.8620103460251888, "grad_norm": 6.8426675406291855, "learning_rate": 4.911634993133901e-07, "loss": 17.3807, "step": 47158 }, { "epoch": 0.8620286252216353, "grad_norm": 5.368045479017222, "learning_rate": 4.910355645574244e-07, "loss": 17.093, "step": 47159 }, { "epoch": 0.8620469044180817, "grad_norm": 4.844283089528985, "learning_rate": 4.909076456049866e-07, "loss": 16.9326, "step": 47160 }, { "epoch": 0.8620651836145283, "grad_norm": 7.0052196836127365, "learning_rate": 4.907797424565264e-07, "loss": 17.352, "step": 47161 }, { "epoch": 0.8620834628109748, "grad_norm": 5.508595305764743, "learning_rate": 4.906518551124928e-07, "loss": 17.1672, "step": 47162 }, { "epoch": 0.8621017420074214, "grad_norm": 5.238771275717374, "learning_rate": 4.905239835733333e-07, "loss": 16.865, "step": 47163 }, { "epoch": 0.8621200212038679, "grad_norm": 6.135531246317764, "learning_rate": 4.903961278394947e-07, "loss": 17.1876, "step": 47164 }, { "epoch": 0.8621383004003144, "grad_norm": 5.166226137668884, "learning_rate": 4.902682879114273e-07, "loss": 17.1757, "step": 47165 }, { "epoch": 0.862156579596761, "grad_norm": 5.219132904037811, "learning_rate": 4.901404637895785e-07, "loss": 16.8331, "step": 47166 }, { "epoch": 0.8621748587932074, "grad_norm": 5.319821543584787, "learning_rate": 4.90012655474395e-07, "loss": 17.1241, "step": 47167 }, { "epoch": 0.862193137989654, "grad_norm": 5.692170309362416, "learning_rate": 4.898848629663266e-07, "loss": 17.1253, "step": 47168 }, { "epoch": 0.8622114171861005, "grad_norm": 5.809369748984745, "learning_rate": 4.897570862658196e-07, "loss": 17.2573, "step": 47169 }, { "epoch": 0.862229696382547, "grad_norm": 7.50909549385368, "learning_rate": 4.896293253733225e-07, "loss": 17.6128, "step": 47170 }, { "epoch": 0.8622479755789936, "grad_norm": 5.179460517424308, "learning_rate": 4.895015802892838e-07, "loss": 16.9399, "step": 47171 }, { "epoch": 0.86226625477544, "grad_norm": 6.520997467605416, "learning_rate": 4.893738510141505e-07, "loss": 17.4308, "step": 47172 }, { "epoch": 0.8622845339718866, "grad_norm": 5.600452302410764, "learning_rate": 4.892461375483709e-07, "loss": 17.3123, "step": 47173 }, { "epoch": 0.8623028131683331, "grad_norm": 7.089157003743321, "learning_rate": 4.891184398923921e-07, "loss": 17.7017, "step": 47174 }, { "epoch": 0.8623210923647796, "grad_norm": 5.911572397125226, "learning_rate": 4.889907580466608e-07, "loss": 16.9501, "step": 47175 }, { "epoch": 0.8623393715612262, "grad_norm": 6.675570071256342, "learning_rate": 4.888630920116266e-07, "loss": 17.5372, "step": 47176 }, { "epoch": 0.8623576507576727, "grad_norm": 4.081387140633793, "learning_rate": 4.887354417877355e-07, "loss": 16.6675, "step": 47177 }, { "epoch": 0.8623759299541193, "grad_norm": 7.340904526578002, "learning_rate": 4.886078073754347e-07, "loss": 17.861, "step": 47178 }, { "epoch": 0.8623942091505657, "grad_norm": 8.678808890692157, "learning_rate": 4.884801887751717e-07, "loss": 17.281, "step": 47179 }, { "epoch": 0.8624124883470122, "grad_norm": 6.932062869388489, "learning_rate": 4.88352585987395e-07, "loss": 17.8572, "step": 47180 }, { "epoch": 0.8624307675434588, "grad_norm": 6.293883724144762, "learning_rate": 4.882249990125504e-07, "loss": 17.3211, "step": 47181 }, { "epoch": 0.8624490467399053, "grad_norm": 6.513820290825525, "learning_rate": 4.880974278510863e-07, "loss": 17.5905, "step": 47182 }, { "epoch": 0.8624673259363519, "grad_norm": 6.226707729094931, "learning_rate": 4.879698725034488e-07, "loss": 17.1352, "step": 47183 }, { "epoch": 0.8624856051327984, "grad_norm": 5.655087438767868, "learning_rate": 4.87842332970086e-07, "loss": 17.0094, "step": 47184 }, { "epoch": 0.8625038843292449, "grad_norm": 5.813518742512975, "learning_rate": 4.877148092514439e-07, "loss": 17.2655, "step": 47185 }, { "epoch": 0.8625221635256914, "grad_norm": 6.729012321622019, "learning_rate": 4.875873013479693e-07, "loss": 17.5694, "step": 47186 }, { "epoch": 0.8625404427221379, "grad_norm": 5.912233008673179, "learning_rate": 4.874598092601107e-07, "loss": 17.1657, "step": 47187 }, { "epoch": 0.8625587219185845, "grad_norm": 5.71344167125156, "learning_rate": 4.873323329883123e-07, "loss": 17.1899, "step": 47188 }, { "epoch": 0.862577001115031, "grad_norm": 6.933864495835174, "learning_rate": 4.872048725330231e-07, "loss": 17.5733, "step": 47189 }, { "epoch": 0.8625952803114775, "grad_norm": 6.63923200455147, "learning_rate": 4.870774278946899e-07, "loss": 17.6933, "step": 47190 }, { "epoch": 0.8626135595079241, "grad_norm": 7.1256194239528075, "learning_rate": 4.869499990737586e-07, "loss": 17.6841, "step": 47191 }, { "epoch": 0.8626318387043705, "grad_norm": 6.095309352587444, "learning_rate": 4.868225860706755e-07, "loss": 16.9476, "step": 47192 }, { "epoch": 0.862650117900817, "grad_norm": 8.470660959290287, "learning_rate": 4.866951888858879e-07, "loss": 18.234, "step": 47193 }, { "epoch": 0.8626683970972636, "grad_norm": 5.601054781363088, "learning_rate": 4.865678075198427e-07, "loss": 16.9273, "step": 47194 }, { "epoch": 0.8626866762937101, "grad_norm": 5.920726070977338, "learning_rate": 4.864404419729841e-07, "loss": 17.1878, "step": 47195 }, { "epoch": 0.8627049554901567, "grad_norm": 6.708467535642466, "learning_rate": 4.86313092245761e-07, "loss": 17.5068, "step": 47196 }, { "epoch": 0.8627232346866032, "grad_norm": 5.743229681135456, "learning_rate": 4.861857583386182e-07, "loss": 17.2413, "step": 47197 }, { "epoch": 0.8627415138830496, "grad_norm": 6.186053736265699, "learning_rate": 4.860584402520025e-07, "loss": 17.2817, "step": 47198 }, { "epoch": 0.8627597930794962, "grad_norm": 6.085124636050472, "learning_rate": 4.859311379863612e-07, "loss": 17.148, "step": 47199 }, { "epoch": 0.8627780722759427, "grad_norm": 5.84857745411249, "learning_rate": 4.858038515421387e-07, "loss": 17.2721, "step": 47200 }, { "epoch": 0.8627963514723893, "grad_norm": 7.285375978759837, "learning_rate": 4.856765809197834e-07, "loss": 17.8686, "step": 47201 }, { "epoch": 0.8628146306688358, "grad_norm": 6.8630684006469345, "learning_rate": 4.855493261197392e-07, "loss": 17.7845, "step": 47202 }, { "epoch": 0.8628329098652823, "grad_norm": 7.791852514478468, "learning_rate": 4.85422087142452e-07, "loss": 18.0345, "step": 47203 }, { "epoch": 0.8628511890617289, "grad_norm": 6.06541060552944, "learning_rate": 4.852948639883697e-07, "loss": 17.2353, "step": 47204 }, { "epoch": 0.8628694682581753, "grad_norm": 8.496277750855219, "learning_rate": 4.851676566579367e-07, "loss": 18.0321, "step": 47205 }, { "epoch": 0.8628877474546219, "grad_norm": 10.438113823144253, "learning_rate": 4.850404651515994e-07, "loss": 17.9924, "step": 47206 }, { "epoch": 0.8629060266510684, "grad_norm": 6.2223058982589645, "learning_rate": 4.849132894698034e-07, "loss": 17.2045, "step": 47207 }, { "epoch": 0.8629243058475149, "grad_norm": 5.257583605079675, "learning_rate": 4.847861296129946e-07, "loss": 16.7709, "step": 47208 }, { "epoch": 0.8629425850439615, "grad_norm": 9.968027986564291, "learning_rate": 4.846589855816181e-07, "loss": 17.6964, "step": 47209 }, { "epoch": 0.862960864240408, "grad_norm": 5.360426234315678, "learning_rate": 4.84531857376121e-07, "loss": 17.0819, "step": 47210 }, { "epoch": 0.8629791434368546, "grad_norm": 6.318120235406609, "learning_rate": 4.84404744996948e-07, "loss": 17.4092, "step": 47211 }, { "epoch": 0.862997422633301, "grad_norm": 5.983497121884072, "learning_rate": 4.842776484445433e-07, "loss": 17.1577, "step": 47212 }, { "epoch": 0.8630157018297475, "grad_norm": 5.976107526976111, "learning_rate": 4.841505677193542e-07, "loss": 17.302, "step": 47213 }, { "epoch": 0.8630339810261941, "grad_norm": 6.730040756861642, "learning_rate": 4.840235028218254e-07, "loss": 17.1269, "step": 47214 }, { "epoch": 0.8630522602226406, "grad_norm": 6.790904197876314, "learning_rate": 4.838964537524016e-07, "loss": 17.5181, "step": 47215 }, { "epoch": 0.8630705394190872, "grad_norm": 6.737555461710658, "learning_rate": 4.8376942051153e-07, "loss": 17.8716, "step": 47216 }, { "epoch": 0.8630888186155337, "grad_norm": 7.146363251924056, "learning_rate": 4.836424030996539e-07, "loss": 17.853, "step": 47217 }, { "epoch": 0.8631070978119801, "grad_norm": 5.588847817540528, "learning_rate": 4.835154015172205e-07, "loss": 17.0578, "step": 47218 }, { "epoch": 0.8631253770084267, "grad_norm": 6.846860794234734, "learning_rate": 4.833884157646729e-07, "loss": 17.6471, "step": 47219 }, { "epoch": 0.8631436562048732, "grad_norm": 6.680030526305628, "learning_rate": 4.832614458424567e-07, "loss": 17.7566, "step": 47220 }, { "epoch": 0.8631619354013198, "grad_norm": 6.120647772670594, "learning_rate": 4.831344917510178e-07, "loss": 17.2642, "step": 47221 }, { "epoch": 0.8631802145977663, "grad_norm": 6.029349860148386, "learning_rate": 4.830075534908008e-07, "loss": 17.3681, "step": 47222 }, { "epoch": 0.8631984937942128, "grad_norm": 5.724945665747659, "learning_rate": 4.828806310622491e-07, "loss": 17.4302, "step": 47223 }, { "epoch": 0.8632167729906594, "grad_norm": 5.3065601855890145, "learning_rate": 4.827537244658093e-07, "loss": 17.1088, "step": 47224 }, { "epoch": 0.8632350521871058, "grad_norm": 5.8330041255221365, "learning_rate": 4.826268337019263e-07, "loss": 16.8889, "step": 47225 }, { "epoch": 0.8632533313835524, "grad_norm": 6.0320279739248495, "learning_rate": 4.824999587710433e-07, "loss": 17.2171, "step": 47226 }, { "epoch": 0.8632716105799989, "grad_norm": 5.276965696805823, "learning_rate": 4.823730996736064e-07, "loss": 16.928, "step": 47227 }, { "epoch": 0.8632898897764454, "grad_norm": 5.747200807694111, "learning_rate": 4.822462564100605e-07, "loss": 17.2113, "step": 47228 }, { "epoch": 0.863308168972892, "grad_norm": 5.07309443670884, "learning_rate": 4.821194289808485e-07, "loss": 16.8778, "step": 47229 }, { "epoch": 0.8633264481693385, "grad_norm": 5.644338400561319, "learning_rate": 4.819926173864164e-07, "loss": 16.9509, "step": 47230 }, { "epoch": 0.863344727365785, "grad_norm": 7.836510860849611, "learning_rate": 4.81865821627207e-07, "loss": 18.0584, "step": 47231 }, { "epoch": 0.8633630065622315, "grad_norm": 7.160816490868428, "learning_rate": 4.817390417036677e-07, "loss": 17.845, "step": 47232 }, { "epoch": 0.863381285758678, "grad_norm": 5.577967711971246, "learning_rate": 4.816122776162391e-07, "loss": 17.1768, "step": 47233 }, { "epoch": 0.8633995649551246, "grad_norm": 5.104996523184887, "learning_rate": 4.814855293653681e-07, "loss": 17.1989, "step": 47234 }, { "epoch": 0.8634178441515711, "grad_norm": 6.382071677278449, "learning_rate": 4.813587969514988e-07, "loss": 17.1701, "step": 47235 }, { "epoch": 0.8634361233480177, "grad_norm": 7.503392448353599, "learning_rate": 4.812320803750747e-07, "loss": 17.5395, "step": 47236 }, { "epoch": 0.8634544025444642, "grad_norm": 7.385473880934932, "learning_rate": 4.811053796365389e-07, "loss": 17.9583, "step": 47237 }, { "epoch": 0.8634726817409106, "grad_norm": 5.944192523304591, "learning_rate": 4.809786947363382e-07, "loss": 17.3243, "step": 47238 }, { "epoch": 0.8634909609373572, "grad_norm": 7.309807641611446, "learning_rate": 4.808520256749144e-07, "loss": 17.5978, "step": 47239 }, { "epoch": 0.8635092401338037, "grad_norm": 7.099309026528422, "learning_rate": 4.807253724527116e-07, "loss": 17.8023, "step": 47240 }, { "epoch": 0.8635275193302503, "grad_norm": 7.891669674336531, "learning_rate": 4.805987350701752e-07, "loss": 18.2158, "step": 47241 }, { "epoch": 0.8635457985266968, "grad_norm": 5.882613951221403, "learning_rate": 4.804721135277468e-07, "loss": 17.067, "step": 47242 }, { "epoch": 0.8635640777231433, "grad_norm": 7.4809271997831255, "learning_rate": 4.803455078258712e-07, "loss": 18.0427, "step": 47243 }, { "epoch": 0.8635823569195898, "grad_norm": 7.084138650930456, "learning_rate": 4.802189179649941e-07, "loss": 17.4763, "step": 47244 }, { "epoch": 0.8636006361160363, "grad_norm": 5.402837968577636, "learning_rate": 4.800923439455557e-07, "loss": 17.4042, "step": 47245 }, { "epoch": 0.8636189153124829, "grad_norm": 6.944939796922672, "learning_rate": 4.799657857680029e-07, "loss": 17.41, "step": 47246 }, { "epoch": 0.8636371945089294, "grad_norm": 6.039149264216549, "learning_rate": 4.798392434327775e-07, "loss": 17.1311, "step": 47247 }, { "epoch": 0.8636554737053759, "grad_norm": 9.77557611487403, "learning_rate": 4.797127169403226e-07, "loss": 18.6358, "step": 47248 }, { "epoch": 0.8636737529018225, "grad_norm": 6.404436161636078, "learning_rate": 4.795862062910834e-07, "loss": 17.3166, "step": 47249 }, { "epoch": 0.863692032098269, "grad_norm": 6.390158566652719, "learning_rate": 4.794597114855021e-07, "loss": 17.4657, "step": 47250 }, { "epoch": 0.8637103112947155, "grad_norm": 7.130245645701453, "learning_rate": 4.793332325240213e-07, "loss": 17.4602, "step": 47251 }, { "epoch": 0.863728590491162, "grad_norm": 7.350557444519061, "learning_rate": 4.792067694070856e-07, "loss": 17.7931, "step": 47252 }, { "epoch": 0.8637468696876085, "grad_norm": 6.43323067612466, "learning_rate": 4.790803221351387e-07, "loss": 17.4713, "step": 47253 }, { "epoch": 0.8637651488840551, "grad_norm": 4.542874601492603, "learning_rate": 4.789538907086216e-07, "loss": 16.6473, "step": 47254 }, { "epoch": 0.8637834280805016, "grad_norm": 7.227753671811764, "learning_rate": 4.788274751279798e-07, "loss": 18.093, "step": 47255 }, { "epoch": 0.8638017072769482, "grad_norm": 6.4996792907233365, "learning_rate": 4.787010753936555e-07, "loss": 17.4946, "step": 47256 }, { "epoch": 0.8638199864733946, "grad_norm": 5.546829573717986, "learning_rate": 4.785746915060907e-07, "loss": 16.9935, "step": 47257 }, { "epoch": 0.8638382656698411, "grad_norm": 7.160038353522918, "learning_rate": 4.784483234657306e-07, "loss": 17.5168, "step": 47258 }, { "epoch": 0.8638565448662877, "grad_norm": 6.581719007300851, "learning_rate": 4.783219712730152e-07, "loss": 17.5938, "step": 47259 }, { "epoch": 0.8638748240627342, "grad_norm": 5.400089652366466, "learning_rate": 4.7819563492839e-07, "loss": 17.0232, "step": 47260 }, { "epoch": 0.8638931032591807, "grad_norm": 5.90354561743327, "learning_rate": 4.780693144322956e-07, "loss": 17.1753, "step": 47261 }, { "epoch": 0.8639113824556273, "grad_norm": 7.15996301076272, "learning_rate": 4.779430097851762e-07, "loss": 17.7306, "step": 47262 }, { "epoch": 0.8639296616520737, "grad_norm": 5.6168745851630675, "learning_rate": 4.778167209874751e-07, "loss": 17.1128, "step": 47263 }, { "epoch": 0.8639479408485203, "grad_norm": 4.817904471367629, "learning_rate": 4.776904480396338e-07, "loss": 16.9812, "step": 47264 }, { "epoch": 0.8639662200449668, "grad_norm": 8.245995572677256, "learning_rate": 4.775641909420942e-07, "loss": 17.659, "step": 47265 }, { "epoch": 0.8639844992414133, "grad_norm": 5.975543240492577, "learning_rate": 4.774379496953007e-07, "loss": 17.0801, "step": 47266 }, { "epoch": 0.8640027784378599, "grad_norm": 7.0459289377209116, "learning_rate": 4.773117242996944e-07, "loss": 17.5846, "step": 47267 }, { "epoch": 0.8640210576343064, "grad_norm": 10.930134441686102, "learning_rate": 4.771855147557175e-07, "loss": 18.3823, "step": 47268 }, { "epoch": 0.864039336830753, "grad_norm": 6.3705941947749185, "learning_rate": 4.770593210638125e-07, "loss": 17.1878, "step": 47269 }, { "epoch": 0.8640576160271994, "grad_norm": 7.532523565302585, "learning_rate": 4.769331432244234e-07, "loss": 17.8623, "step": 47270 }, { "epoch": 0.8640758952236459, "grad_norm": 5.399812170772159, "learning_rate": 4.7680698123798995e-07, "loss": 17.0669, "step": 47271 }, { "epoch": 0.8640941744200925, "grad_norm": 5.814406693924071, "learning_rate": 4.7668083510495677e-07, "loss": 17.496, "step": 47272 }, { "epoch": 0.864112453616539, "grad_norm": 6.069611258679667, "learning_rate": 4.765547048257646e-07, "loss": 17.3303, "step": 47273 }, { "epoch": 0.8641307328129856, "grad_norm": 5.306649050356313, "learning_rate": 4.764285904008542e-07, "loss": 16.9092, "step": 47274 }, { "epoch": 0.8641490120094321, "grad_norm": 5.594803289122977, "learning_rate": 4.763024918306708e-07, "loss": 17.0878, "step": 47275 }, { "epoch": 0.8641672912058785, "grad_norm": 6.82300163826723, "learning_rate": 4.761764091156534e-07, "loss": 17.6208, "step": 47276 }, { "epoch": 0.8641855704023251, "grad_norm": 4.740222429308597, "learning_rate": 4.760503422562457e-07, "loss": 16.8091, "step": 47277 }, { "epoch": 0.8642038495987716, "grad_norm": 5.609198307099613, "learning_rate": 4.759242912528883e-07, "loss": 17.3057, "step": 47278 }, { "epoch": 0.8642221287952182, "grad_norm": 5.816452985091733, "learning_rate": 4.757982561060237e-07, "loss": 17.2033, "step": 47279 }, { "epoch": 0.8642404079916647, "grad_norm": 5.436905000089071, "learning_rate": 4.756722368160949e-07, "loss": 16.9244, "step": 47280 }, { "epoch": 0.8642586871881112, "grad_norm": 5.7152990025992265, "learning_rate": 4.75546233383542e-07, "loss": 17.2643, "step": 47281 }, { "epoch": 0.8642769663845578, "grad_norm": 6.451519062216871, "learning_rate": 4.7542024580880587e-07, "loss": 17.3101, "step": 47282 }, { "epoch": 0.8642952455810042, "grad_norm": 7.429312370715903, "learning_rate": 4.752942740923305e-07, "loss": 17.9404, "step": 47283 }, { "epoch": 0.8643135247774508, "grad_norm": 5.438512392563585, "learning_rate": 4.751683182345557e-07, "loss": 17.1538, "step": 47284 }, { "epoch": 0.8643318039738973, "grad_norm": 7.266250372873886, "learning_rate": 4.75042378235922e-07, "loss": 17.1354, "step": 47285 }, { "epoch": 0.8643500831703438, "grad_norm": 7.001961633545646, "learning_rate": 4.749164540968737e-07, "loss": 17.7692, "step": 47286 }, { "epoch": 0.8643683623667904, "grad_norm": 5.640887965980426, "learning_rate": 4.747905458178492e-07, "loss": 17.1339, "step": 47287 }, { "epoch": 0.8643866415632369, "grad_norm": 6.712105522472522, "learning_rate": 4.746646533992916e-07, "loss": 17.2237, "step": 47288 }, { "epoch": 0.8644049207596834, "grad_norm": 7.640640320531211, "learning_rate": 4.745387768416426e-07, "loss": 18.1576, "step": 47289 }, { "epoch": 0.8644231999561299, "grad_norm": 6.633532501967231, "learning_rate": 4.7441291614534203e-07, "loss": 17.5202, "step": 47290 }, { "epoch": 0.8644414791525764, "grad_norm": 5.934420018859654, "learning_rate": 4.742870713108311e-07, "loss": 17.1715, "step": 47291 }, { "epoch": 0.864459758349023, "grad_norm": 5.160224487828859, "learning_rate": 4.7416124233855167e-07, "loss": 17.0476, "step": 47292 }, { "epoch": 0.8644780375454695, "grad_norm": 5.538535999390953, "learning_rate": 4.740354292289434e-07, "loss": 17.3846, "step": 47293 }, { "epoch": 0.8644963167419161, "grad_norm": 5.890438519543929, "learning_rate": 4.7390963198244935e-07, "loss": 17.0863, "step": 47294 }, { "epoch": 0.8645145959383626, "grad_norm": 6.14128819527358, "learning_rate": 4.737838505995096e-07, "loss": 17.2753, "step": 47295 }, { "epoch": 0.864532875134809, "grad_norm": 5.562728591028847, "learning_rate": 4.736580850805633e-07, "loss": 17.1026, "step": 47296 }, { "epoch": 0.8645511543312556, "grad_norm": 5.639660168775241, "learning_rate": 4.7353233542605295e-07, "loss": 17.0677, "step": 47297 }, { "epoch": 0.8645694335277021, "grad_norm": 7.210741843883728, "learning_rate": 4.734066016364197e-07, "loss": 17.8296, "step": 47298 }, { "epoch": 0.8645877127241487, "grad_norm": 5.867497623452994, "learning_rate": 4.732808837121028e-07, "loss": 17.2896, "step": 47299 }, { "epoch": 0.8646059919205952, "grad_norm": 6.508756524265846, "learning_rate": 4.73155181653544e-07, "loss": 17.7941, "step": 47300 }, { "epoch": 0.8646242711170417, "grad_norm": 7.065158134573305, "learning_rate": 4.7302949546118415e-07, "loss": 17.5369, "step": 47301 }, { "epoch": 0.8646425503134882, "grad_norm": 7.528890422467986, "learning_rate": 4.7290382513546176e-07, "loss": 17.6176, "step": 47302 }, { "epoch": 0.8646608295099347, "grad_norm": 6.063135236768521, "learning_rate": 4.727781706768197e-07, "loss": 17.5591, "step": 47303 }, { "epoch": 0.8646791087063813, "grad_norm": 7.166877137605859, "learning_rate": 4.726525320856973e-07, "loss": 17.6016, "step": 47304 }, { "epoch": 0.8646973879028278, "grad_norm": 6.654803014436357, "learning_rate": 4.725269093625334e-07, "loss": 17.0816, "step": 47305 }, { "epoch": 0.8647156670992743, "grad_norm": 6.084916446347743, "learning_rate": 4.724013025077706e-07, "loss": 17.2761, "step": 47306 }, { "epoch": 0.8647339462957209, "grad_norm": 8.115716202658064, "learning_rate": 4.7227571152184794e-07, "loss": 17.3945, "step": 47307 }, { "epoch": 0.8647522254921673, "grad_norm": 6.32403847241209, "learning_rate": 4.7215013640520724e-07, "loss": 17.4108, "step": 47308 }, { "epoch": 0.8647705046886139, "grad_norm": 6.646929204744601, "learning_rate": 4.720245771582871e-07, "loss": 17.3304, "step": 47309 }, { "epoch": 0.8647887838850604, "grad_norm": 6.060391371961277, "learning_rate": 4.7189903378152715e-07, "loss": 17.3582, "step": 47310 }, { "epoch": 0.8648070630815069, "grad_norm": 6.449114352817698, "learning_rate": 4.717735062753692e-07, "loss": 17.3793, "step": 47311 }, { "epoch": 0.8648253422779535, "grad_norm": 8.268056971432555, "learning_rate": 4.716479946402519e-07, "loss": 17.8609, "step": 47312 }, { "epoch": 0.8648436214744, "grad_norm": 6.256667118676629, "learning_rate": 4.715224988766143e-07, "loss": 17.2575, "step": 47313 }, { "epoch": 0.8648619006708466, "grad_norm": 9.779232583032083, "learning_rate": 4.7139701898489876e-07, "loss": 18.1505, "step": 47314 }, { "epoch": 0.864880179867293, "grad_norm": 5.793587694581421, "learning_rate": 4.712715549655428e-07, "loss": 16.9985, "step": 47315 }, { "epoch": 0.8648984590637395, "grad_norm": 6.0798344219340645, "learning_rate": 4.711461068189865e-07, "loss": 17.2361, "step": 47316 }, { "epoch": 0.8649167382601861, "grad_norm": 5.266563672304668, "learning_rate": 4.7102067454567133e-07, "loss": 17.141, "step": 47317 }, { "epoch": 0.8649350174566326, "grad_norm": 5.98530788471852, "learning_rate": 4.7089525814603575e-07, "loss": 17.26, "step": 47318 }, { "epoch": 0.8649532966530792, "grad_norm": 6.19721078540651, "learning_rate": 4.707698576205183e-07, "loss": 16.9895, "step": 47319 }, { "epoch": 0.8649715758495257, "grad_norm": 6.596649854341659, "learning_rate": 4.706444729695608e-07, "loss": 17.3113, "step": 47320 }, { "epoch": 0.8649898550459721, "grad_norm": 6.531863403419166, "learning_rate": 4.705191041936008e-07, "loss": 17.5586, "step": 47321 }, { "epoch": 0.8650081342424187, "grad_norm": 7.456235570872469, "learning_rate": 4.7039375129307787e-07, "loss": 17.5408, "step": 47322 }, { "epoch": 0.8650264134388652, "grad_norm": 8.971518280940822, "learning_rate": 4.7026841426843216e-07, "loss": 17.8045, "step": 47323 }, { "epoch": 0.8650446926353118, "grad_norm": 6.167266164665092, "learning_rate": 4.7014309312010175e-07, "loss": 17.1841, "step": 47324 }, { "epoch": 0.8650629718317583, "grad_norm": 5.957000769274315, "learning_rate": 4.700177878485268e-07, "loss": 17.1949, "step": 47325 }, { "epoch": 0.8650812510282048, "grad_norm": 5.219468691520305, "learning_rate": 4.698924984541475e-07, "loss": 16.9453, "step": 47326 }, { "epoch": 0.8650995302246514, "grad_norm": 5.69264439649004, "learning_rate": 4.6976722493740077e-07, "loss": 17.2428, "step": 47327 }, { "epoch": 0.8651178094210978, "grad_norm": 7.306425334342773, "learning_rate": 4.6964196729872793e-07, "loss": 17.4825, "step": 47328 }, { "epoch": 0.8651360886175443, "grad_norm": 6.004141852918939, "learning_rate": 4.6951672553856686e-07, "loss": 17.2599, "step": 47329 }, { "epoch": 0.8651543678139909, "grad_norm": 6.905380317865105, "learning_rate": 4.693914996573556e-07, "loss": 17.7335, "step": 47330 }, { "epoch": 0.8651726470104374, "grad_norm": 5.646057831751791, "learning_rate": 4.69266289655535e-07, "loss": 17.36, "step": 47331 }, { "epoch": 0.865190926206884, "grad_norm": 6.919259716530101, "learning_rate": 4.691410955335418e-07, "loss": 17.6476, "step": 47332 }, { "epoch": 0.8652092054033305, "grad_norm": 5.468456314202326, "learning_rate": 4.6901591729181573e-07, "loss": 17.0679, "step": 47333 }, { "epoch": 0.8652274845997769, "grad_norm": 6.060562882135592, "learning_rate": 4.6889075493079693e-07, "loss": 17.0045, "step": 47334 }, { "epoch": 0.8652457637962235, "grad_norm": 5.956028144152774, "learning_rate": 4.687656084509223e-07, "loss": 17.2362, "step": 47335 }, { "epoch": 0.86526404299267, "grad_norm": 6.119035360674083, "learning_rate": 4.6864047785263043e-07, "loss": 17.314, "step": 47336 }, { "epoch": 0.8652823221891166, "grad_norm": 5.713054799988639, "learning_rate": 4.685153631363615e-07, "loss": 17.0163, "step": 47337 }, { "epoch": 0.8653006013855631, "grad_norm": 6.490676144840522, "learning_rate": 4.683902643025523e-07, "loss": 17.2038, "step": 47338 }, { "epoch": 0.8653188805820096, "grad_norm": 5.332651662234588, "learning_rate": 4.6826518135164254e-07, "loss": 17.1558, "step": 47339 }, { "epoch": 0.8653371597784562, "grad_norm": 5.81785873857018, "learning_rate": 4.6814011428407023e-07, "loss": 17.0018, "step": 47340 }, { "epoch": 0.8653554389749026, "grad_norm": 5.491422016572133, "learning_rate": 4.680150631002728e-07, "loss": 17.1419, "step": 47341 }, { "epoch": 0.8653737181713492, "grad_norm": 4.96448492098324, "learning_rate": 4.678900278006887e-07, "loss": 16.8344, "step": 47342 }, { "epoch": 0.8653919973677957, "grad_norm": 7.468616260909234, "learning_rate": 4.6776500838575824e-07, "loss": 17.3641, "step": 47343 }, { "epoch": 0.8654102765642422, "grad_norm": 5.611521999790289, "learning_rate": 4.676400048559171e-07, "loss": 17.2784, "step": 47344 }, { "epoch": 0.8654285557606888, "grad_norm": 7.135534039455852, "learning_rate": 4.6751501721160495e-07, "loss": 17.6492, "step": 47345 }, { "epoch": 0.8654468349571353, "grad_norm": 5.952606825604808, "learning_rate": 4.6739004545325983e-07, "loss": 17.2746, "step": 47346 }, { "epoch": 0.8654651141535818, "grad_norm": 5.027978770776331, "learning_rate": 4.6726508958131855e-07, "loss": 16.8627, "step": 47347 }, { "epoch": 0.8654833933500283, "grad_norm": 7.828517034756831, "learning_rate": 4.6714014959622024e-07, "loss": 18.0162, "step": 47348 }, { "epoch": 0.8655016725464748, "grad_norm": 6.933450590072823, "learning_rate": 4.670152254984023e-07, "loss": 17.6772, "step": 47349 }, { "epoch": 0.8655199517429214, "grad_norm": 7.715273610711969, "learning_rate": 4.668903172883021e-07, "loss": 17.7241, "step": 47350 }, { "epoch": 0.8655382309393679, "grad_norm": 7.543573658046909, "learning_rate": 4.667654249663578e-07, "loss": 17.8453, "step": 47351 }, { "epoch": 0.8655565101358145, "grad_norm": 5.680530664008956, "learning_rate": 4.666405485330072e-07, "loss": 16.9916, "step": 47352 }, { "epoch": 0.865574789332261, "grad_norm": 8.563954767826118, "learning_rate": 4.665156879886895e-07, "loss": 18.1367, "step": 47353 }, { "epoch": 0.8655930685287074, "grad_norm": 5.7247224144509765, "learning_rate": 4.6639084333384043e-07, "loss": 17.0712, "step": 47354 }, { "epoch": 0.865611347725154, "grad_norm": 5.734294508679419, "learning_rate": 4.662660145688974e-07, "loss": 17.0986, "step": 47355 }, { "epoch": 0.8656296269216005, "grad_norm": 6.403724255895151, "learning_rate": 4.66141201694299e-07, "loss": 17.4949, "step": 47356 }, { "epoch": 0.8656479061180471, "grad_norm": 7.11003025562489, "learning_rate": 4.6601640471048257e-07, "loss": 17.8449, "step": 47357 }, { "epoch": 0.8656661853144936, "grad_norm": 7.009162131015206, "learning_rate": 4.6589162361788455e-07, "loss": 17.6433, "step": 47358 }, { "epoch": 0.86568446451094, "grad_norm": 5.121477298109195, "learning_rate": 4.6576685841694334e-07, "loss": 16.9866, "step": 47359 }, { "epoch": 0.8657027437073866, "grad_norm": 4.867661461946292, "learning_rate": 4.656421091080954e-07, "loss": 16.8798, "step": 47360 }, { "epoch": 0.8657210229038331, "grad_norm": 6.780738183347869, "learning_rate": 4.65517375691778e-07, "loss": 16.8303, "step": 47361 }, { "epoch": 0.8657393021002797, "grad_norm": 6.968824768019511, "learning_rate": 4.653926581684298e-07, "loss": 17.6489, "step": 47362 }, { "epoch": 0.8657575812967262, "grad_norm": 6.204302999882409, "learning_rate": 4.652679565384871e-07, "loss": 17.7166, "step": 47363 }, { "epoch": 0.8657758604931727, "grad_norm": 7.691927626334833, "learning_rate": 4.6514327080238563e-07, "loss": 17.6034, "step": 47364 }, { "epoch": 0.8657941396896193, "grad_norm": 6.1519629392770705, "learning_rate": 4.650186009605645e-07, "loss": 17.5763, "step": 47365 }, { "epoch": 0.8658124188860657, "grad_norm": 6.442424523517515, "learning_rate": 4.6489394701345946e-07, "loss": 17.5572, "step": 47366 }, { "epoch": 0.8658306980825123, "grad_norm": 5.850833895673569, "learning_rate": 4.647693089615063e-07, "loss": 16.949, "step": 47367 }, { "epoch": 0.8658489772789588, "grad_norm": 7.37784480538389, "learning_rate": 4.6464468680514464e-07, "loss": 17.3912, "step": 47368 }, { "epoch": 0.8658672564754053, "grad_norm": 7.283864585404842, "learning_rate": 4.645200805448086e-07, "loss": 17.3879, "step": 47369 }, { "epoch": 0.8658855356718519, "grad_norm": 6.856896367027696, "learning_rate": 4.643954901809361e-07, "loss": 17.7518, "step": 47370 }, { "epoch": 0.8659038148682984, "grad_norm": 7.244038618278239, "learning_rate": 4.6427091571396523e-07, "loss": 17.9557, "step": 47371 }, { "epoch": 0.865922094064745, "grad_norm": 7.47199264144173, "learning_rate": 4.6414635714432944e-07, "loss": 17.889, "step": 47372 }, { "epoch": 0.8659403732611914, "grad_norm": 5.151227941701756, "learning_rate": 4.640218144724684e-07, "loss": 17.0155, "step": 47373 }, { "epoch": 0.8659586524576379, "grad_norm": 5.722813764947572, "learning_rate": 4.6389728769881735e-07, "loss": 17.223, "step": 47374 }, { "epoch": 0.8659769316540845, "grad_norm": 6.0828648191709975, "learning_rate": 4.6377277682381206e-07, "loss": 17.4291, "step": 47375 }, { "epoch": 0.865995210850531, "grad_norm": 6.013181885928172, "learning_rate": 4.636482818478899e-07, "loss": 17.1647, "step": 47376 }, { "epoch": 0.8660134900469776, "grad_norm": 6.006849524459769, "learning_rate": 4.6352380277148723e-07, "loss": 17.3631, "step": 47377 }, { "epoch": 0.8660317692434241, "grad_norm": 6.361106940647693, "learning_rate": 4.6339933959503923e-07, "loss": 17.4316, "step": 47378 }, { "epoch": 0.8660500484398705, "grad_norm": 11.094435640616656, "learning_rate": 4.6327489231898223e-07, "loss": 18.5598, "step": 47379 }, { "epoch": 0.8660683276363171, "grad_norm": 7.45412866292768, "learning_rate": 4.6315046094375425e-07, "loss": 18.0494, "step": 47380 }, { "epoch": 0.8660866068327636, "grad_norm": 5.720550161984593, "learning_rate": 4.6302604546978934e-07, "loss": 17.4214, "step": 47381 }, { "epoch": 0.8661048860292102, "grad_norm": 5.381812259727416, "learning_rate": 4.629016458975255e-07, "loss": 17.1827, "step": 47382 }, { "epoch": 0.8661231652256567, "grad_norm": 10.15486138094246, "learning_rate": 4.6277726222739683e-07, "loss": 18.003, "step": 47383 }, { "epoch": 0.8661414444221032, "grad_norm": 4.843560321714085, "learning_rate": 4.6265289445984073e-07, "loss": 16.9105, "step": 47384 }, { "epoch": 0.8661597236185498, "grad_norm": 5.859198252347298, "learning_rate": 4.62528542595293e-07, "loss": 17.0565, "step": 47385 }, { "epoch": 0.8661780028149962, "grad_norm": 8.169922382415228, "learning_rate": 4.624042066341877e-07, "loss": 17.5551, "step": 47386 }, { "epoch": 0.8661962820114428, "grad_norm": 4.761056344588522, "learning_rate": 4.622798865769629e-07, "loss": 17.001, "step": 47387 }, { "epoch": 0.8662145612078893, "grad_norm": 6.075314802098967, "learning_rate": 4.6215558242405203e-07, "loss": 17.0469, "step": 47388 }, { "epoch": 0.8662328404043358, "grad_norm": 8.313275159635676, "learning_rate": 4.6203129417589253e-07, "loss": 18.0312, "step": 47389 }, { "epoch": 0.8662511196007824, "grad_norm": 4.769073242587826, "learning_rate": 4.6190702183292025e-07, "loss": 16.9181, "step": 47390 }, { "epoch": 0.8662693987972289, "grad_norm": 5.853319195537093, "learning_rate": 4.6178276539557034e-07, "loss": 17.2578, "step": 47391 }, { "epoch": 0.8662876779936755, "grad_norm": 5.279608223582275, "learning_rate": 4.6165852486427686e-07, "loss": 17.1474, "step": 47392 }, { "epoch": 0.8663059571901219, "grad_norm": 4.682476560094308, "learning_rate": 4.615343002394773e-07, "loss": 16.671, "step": 47393 }, { "epoch": 0.8663242363865684, "grad_norm": 6.761051102971279, "learning_rate": 4.6141009152160634e-07, "loss": 17.4173, "step": 47394 }, { "epoch": 0.866342515583015, "grad_norm": 6.153878862447572, "learning_rate": 4.6128589871109794e-07, "loss": 17.0742, "step": 47395 }, { "epoch": 0.8663607947794615, "grad_norm": 6.094202644881729, "learning_rate": 4.6116172180838916e-07, "loss": 17.0142, "step": 47396 }, { "epoch": 0.866379073975908, "grad_norm": 7.108396442622987, "learning_rate": 4.6103756081391506e-07, "loss": 17.7882, "step": 47397 }, { "epoch": 0.8663973531723546, "grad_norm": 5.831879267460738, "learning_rate": 4.6091341572810975e-07, "loss": 17.0715, "step": 47398 }, { "epoch": 0.866415632368801, "grad_norm": 7.467561144430691, "learning_rate": 4.607892865514102e-07, "loss": 17.7755, "step": 47399 }, { "epoch": 0.8664339115652476, "grad_norm": 5.825240237811286, "learning_rate": 4.606651732842493e-07, "loss": 17.1582, "step": 47400 }, { "epoch": 0.8664521907616941, "grad_norm": 7.29811637328824, "learning_rate": 4.6054107592706343e-07, "loss": 17.3024, "step": 47401 }, { "epoch": 0.8664704699581406, "grad_norm": 5.453279249566069, "learning_rate": 4.6041699448028774e-07, "loss": 17.0626, "step": 47402 }, { "epoch": 0.8664887491545872, "grad_norm": 5.56402481741644, "learning_rate": 4.6029292894435585e-07, "loss": 17.2359, "step": 47403 }, { "epoch": 0.8665070283510337, "grad_norm": 5.570869292274601, "learning_rate": 4.6016887931970346e-07, "loss": 17.2304, "step": 47404 }, { "epoch": 0.8665253075474803, "grad_norm": 5.407814441668956, "learning_rate": 4.600448456067647e-07, "loss": 17.0303, "step": 47405 }, { "epoch": 0.8665435867439267, "grad_norm": 8.734948701307495, "learning_rate": 4.599208278059747e-07, "loss": 18.3483, "step": 47406 }, { "epoch": 0.8665618659403732, "grad_norm": 6.670709365903628, "learning_rate": 4.597968259177693e-07, "loss": 17.2053, "step": 47407 }, { "epoch": 0.8665801451368198, "grad_norm": 8.888954996285413, "learning_rate": 4.5967283994258204e-07, "loss": 18.2429, "step": 47408 }, { "epoch": 0.8665984243332663, "grad_norm": 7.397871954428926, "learning_rate": 4.595488698808465e-07, "loss": 17.4913, "step": 47409 }, { "epoch": 0.8666167035297129, "grad_norm": 6.717556534454861, "learning_rate": 4.594249157329994e-07, "loss": 17.6421, "step": 47410 }, { "epoch": 0.8666349827261594, "grad_norm": 6.603606591680258, "learning_rate": 4.5930097749947335e-07, "loss": 17.538, "step": 47411 }, { "epoch": 0.8666532619226058, "grad_norm": 5.1537334627990745, "learning_rate": 4.5917705518070286e-07, "loss": 17.0315, "step": 47412 }, { "epoch": 0.8666715411190524, "grad_norm": 5.756109320706204, "learning_rate": 4.5905314877712325e-07, "loss": 17.1495, "step": 47413 }, { "epoch": 0.8666898203154989, "grad_norm": 5.911079424757828, "learning_rate": 4.58929258289168e-07, "loss": 17.1932, "step": 47414 }, { "epoch": 0.8667080995119455, "grad_norm": 7.600668110509518, "learning_rate": 4.5880538371727126e-07, "loss": 17.775, "step": 47415 }, { "epoch": 0.866726378708392, "grad_norm": 6.926131856682248, "learning_rate": 4.586815250618687e-07, "loss": 17.6122, "step": 47416 }, { "epoch": 0.8667446579048385, "grad_norm": 4.293552400508835, "learning_rate": 4.5855768232339226e-07, "loss": 16.664, "step": 47417 }, { "epoch": 0.866762937101285, "grad_norm": 5.230501810927888, "learning_rate": 4.584338555022777e-07, "loss": 16.7318, "step": 47418 }, { "epoch": 0.8667812162977315, "grad_norm": 6.962196265514127, "learning_rate": 4.583100445989586e-07, "loss": 17.4328, "step": 47419 }, { "epoch": 0.8667994954941781, "grad_norm": 7.226239785808188, "learning_rate": 4.581862496138678e-07, "loss": 17.9251, "step": 47420 }, { "epoch": 0.8668177746906246, "grad_norm": 6.420454282376263, "learning_rate": 4.5806247054744123e-07, "loss": 17.2708, "step": 47421 }, { "epoch": 0.8668360538870711, "grad_norm": 5.731787651085271, "learning_rate": 4.5793870740011126e-07, "loss": 17.0471, "step": 47422 }, { "epoch": 0.8668543330835177, "grad_norm": 5.761413159756473, "learning_rate": 4.578149601723114e-07, "loss": 17.152, "step": 47423 }, { "epoch": 0.8668726122799641, "grad_norm": 9.447386062926507, "learning_rate": 4.576912288644758e-07, "loss": 17.7445, "step": 47424 }, { "epoch": 0.8668908914764107, "grad_norm": 5.665614157071083, "learning_rate": 4.575675134770391e-07, "loss": 17.2966, "step": 47425 }, { "epoch": 0.8669091706728572, "grad_norm": 6.338998428513757, "learning_rate": 4.5744381401043313e-07, "loss": 17.1211, "step": 47426 }, { "epoch": 0.8669274498693037, "grad_norm": 6.537011783537685, "learning_rate": 4.5732013046509316e-07, "loss": 17.6958, "step": 47427 }, { "epoch": 0.8669457290657503, "grad_norm": 5.267796767576695, "learning_rate": 4.5719646284145213e-07, "loss": 17.0568, "step": 47428 }, { "epoch": 0.8669640082621968, "grad_norm": 5.104676786426639, "learning_rate": 4.5707281113994253e-07, "loss": 16.9046, "step": 47429 }, { "epoch": 0.8669822874586434, "grad_norm": 7.010327419949228, "learning_rate": 4.5694917536099946e-07, "loss": 17.7348, "step": 47430 }, { "epoch": 0.8670005666550898, "grad_norm": 5.804135119470701, "learning_rate": 4.568255555050538e-07, "loss": 17.211, "step": 47431 }, { "epoch": 0.8670188458515363, "grad_norm": 5.418217571561456, "learning_rate": 4.567019515725418e-07, "loss": 16.8911, "step": 47432 }, { "epoch": 0.8670371250479829, "grad_norm": 6.586965620849375, "learning_rate": 4.565783635638943e-07, "loss": 17.231, "step": 47433 }, { "epoch": 0.8670554042444294, "grad_norm": 5.468805533914594, "learning_rate": 4.564547914795453e-07, "loss": 17.0489, "step": 47434 }, { "epoch": 0.867073683440876, "grad_norm": 5.539207690451103, "learning_rate": 4.563312353199284e-07, "loss": 17.1607, "step": 47435 }, { "epoch": 0.8670919626373225, "grad_norm": 6.398386951930923, "learning_rate": 4.5620769508547715e-07, "loss": 17.5541, "step": 47436 }, { "epoch": 0.867110241833769, "grad_norm": 7.753513117458198, "learning_rate": 4.5608417077662225e-07, "loss": 17.9959, "step": 47437 }, { "epoch": 0.8671285210302155, "grad_norm": 6.674378449619417, "learning_rate": 4.5596066239379846e-07, "loss": 17.3513, "step": 47438 }, { "epoch": 0.867146800226662, "grad_norm": 4.882692480276763, "learning_rate": 4.558371699374392e-07, "loss": 16.7945, "step": 47439 }, { "epoch": 0.8671650794231086, "grad_norm": 6.5109909709029035, "learning_rate": 4.557136934079748e-07, "loss": 17.7866, "step": 47440 }, { "epoch": 0.8671833586195551, "grad_norm": 5.480228206980465, "learning_rate": 4.5559023280584037e-07, "loss": 17.2417, "step": 47441 }, { "epoch": 0.8672016378160016, "grad_norm": 5.561011161524609, "learning_rate": 4.554667881314673e-07, "loss": 17.1678, "step": 47442 }, { "epoch": 0.8672199170124482, "grad_norm": 6.357072732272322, "learning_rate": 4.553433593852885e-07, "loss": 17.4394, "step": 47443 }, { "epoch": 0.8672381962088946, "grad_norm": 7.282608785613831, "learning_rate": 4.5521994656773816e-07, "loss": 17.4371, "step": 47444 }, { "epoch": 0.8672564754053412, "grad_norm": 7.0676385569395785, "learning_rate": 4.5509654967924643e-07, "loss": 17.536, "step": 47445 }, { "epoch": 0.8672747546017877, "grad_norm": 5.255175939210219, "learning_rate": 4.549731687202474e-07, "loss": 17.0688, "step": 47446 }, { "epoch": 0.8672930337982342, "grad_norm": 6.647256542525296, "learning_rate": 4.548498036911736e-07, "loss": 17.7038, "step": 47447 }, { "epoch": 0.8673113129946808, "grad_norm": 5.4675142368738285, "learning_rate": 4.5472645459245566e-07, "loss": 17.1739, "step": 47448 }, { "epoch": 0.8673295921911273, "grad_norm": 6.717952737437719, "learning_rate": 4.546031214245278e-07, "loss": 17.3353, "step": 47449 }, { "epoch": 0.8673478713875739, "grad_norm": 7.790362856389862, "learning_rate": 4.544798041878218e-07, "loss": 17.683, "step": 47450 }, { "epoch": 0.8673661505840203, "grad_norm": 5.37406422794041, "learning_rate": 4.54356502882769e-07, "loss": 16.9557, "step": 47451 }, { "epoch": 0.8673844297804668, "grad_norm": 5.614699908420543, "learning_rate": 4.5423321750980185e-07, "loss": 16.9301, "step": 47452 }, { "epoch": 0.8674027089769134, "grad_norm": 5.873265560800704, "learning_rate": 4.5410994806935395e-07, "loss": 17.2595, "step": 47453 }, { "epoch": 0.8674209881733599, "grad_norm": 6.110186363854021, "learning_rate": 4.539866945618548e-07, "loss": 17.2577, "step": 47454 }, { "epoch": 0.8674392673698065, "grad_norm": 6.7962719896606, "learning_rate": 4.5386345698773926e-07, "loss": 17.6293, "step": 47455 }, { "epoch": 0.867457546566253, "grad_norm": 7.663594781744421, "learning_rate": 4.537402353474374e-07, "loss": 16.901, "step": 47456 }, { "epoch": 0.8674758257626994, "grad_norm": 6.3597166689177795, "learning_rate": 4.536170296413811e-07, "loss": 17.3364, "step": 47457 }, { "epoch": 0.867494104959146, "grad_norm": 5.747010781496841, "learning_rate": 4.5349383987000286e-07, "loss": 17.4853, "step": 47458 }, { "epoch": 0.8675123841555925, "grad_norm": 5.9399228967588495, "learning_rate": 4.533706660337339e-07, "loss": 17.287, "step": 47459 }, { "epoch": 0.8675306633520391, "grad_norm": 7.365269574375756, "learning_rate": 4.532475081330057e-07, "loss": 17.5442, "step": 47460 }, { "epoch": 0.8675489425484856, "grad_norm": 5.551284972436154, "learning_rate": 4.5312436616825164e-07, "loss": 17.2411, "step": 47461 }, { "epoch": 0.8675672217449321, "grad_norm": 9.572449253719832, "learning_rate": 4.5300124013990146e-07, "loss": 18.046, "step": 47462 }, { "epoch": 0.8675855009413787, "grad_norm": 5.888676597062479, "learning_rate": 4.5287813004838756e-07, "loss": 17.3195, "step": 47463 }, { "epoch": 0.8676037801378251, "grad_norm": 4.8958241205323745, "learning_rate": 4.527550358941418e-07, "loss": 16.9139, "step": 47464 }, { "epoch": 0.8676220593342716, "grad_norm": 6.228279180402248, "learning_rate": 4.5263195767759396e-07, "loss": 17.1555, "step": 47465 }, { "epoch": 0.8676403385307182, "grad_norm": 6.0786825695971, "learning_rate": 4.5250889539917744e-07, "loss": 17.3391, "step": 47466 }, { "epoch": 0.8676586177271647, "grad_norm": 5.987067800541139, "learning_rate": 4.523858490593225e-07, "loss": 17.4332, "step": 47467 }, { "epoch": 0.8676768969236113, "grad_norm": 5.400789740157765, "learning_rate": 4.522628186584599e-07, "loss": 17.0467, "step": 47468 }, { "epoch": 0.8676951761200578, "grad_norm": 5.928770894349012, "learning_rate": 4.52139804197021e-07, "loss": 17.1796, "step": 47469 }, { "epoch": 0.8677134553165042, "grad_norm": 6.138425493356122, "learning_rate": 4.5201680567543873e-07, "loss": 17.4263, "step": 47470 }, { "epoch": 0.8677317345129508, "grad_norm": 5.943178831900853, "learning_rate": 4.518938230941422e-07, "loss": 17.2324, "step": 47471 }, { "epoch": 0.8677500137093973, "grad_norm": 7.078583488533041, "learning_rate": 4.5177085645356387e-07, "loss": 17.4783, "step": 47472 }, { "epoch": 0.8677682929058439, "grad_norm": 5.636869140628849, "learning_rate": 4.51647905754134e-07, "loss": 16.9245, "step": 47473 }, { "epoch": 0.8677865721022904, "grad_norm": 7.020930920934604, "learning_rate": 4.515249709962821e-07, "loss": 17.2935, "step": 47474 }, { "epoch": 0.8678048512987369, "grad_norm": 5.674536012983238, "learning_rate": 4.5140205218044185e-07, "loss": 17.3782, "step": 47475 }, { "epoch": 0.8678231304951834, "grad_norm": 7.9734857563608355, "learning_rate": 4.5127914930704175e-07, "loss": 18.1885, "step": 47476 }, { "epoch": 0.8678414096916299, "grad_norm": 5.630309177899839, "learning_rate": 4.511562623765148e-07, "loss": 17.2758, "step": 47477 }, { "epoch": 0.8678596888880765, "grad_norm": 4.716806927578833, "learning_rate": 4.5103339138928893e-07, "loss": 16.8522, "step": 47478 }, { "epoch": 0.867877968084523, "grad_norm": 6.026372939436053, "learning_rate": 4.5091053634579664e-07, "loss": 17.4611, "step": 47479 }, { "epoch": 0.8678962472809695, "grad_norm": 6.484001172817477, "learning_rate": 4.507876972464692e-07, "loss": 17.5634, "step": 47480 }, { "epoch": 0.8679145264774161, "grad_norm": 5.70848437767303, "learning_rate": 4.5066487409173577e-07, "loss": 16.9122, "step": 47481 }, { "epoch": 0.8679328056738626, "grad_norm": 5.398556840263297, "learning_rate": 4.5054206688202593e-07, "loss": 16.9978, "step": 47482 }, { "epoch": 0.8679510848703091, "grad_norm": 7.1683154742215525, "learning_rate": 4.504192756177728e-07, "loss": 17.8361, "step": 47483 }, { "epoch": 0.8679693640667556, "grad_norm": 6.472198819935176, "learning_rate": 4.502965002994053e-07, "loss": 17.589, "step": 47484 }, { "epoch": 0.8679876432632021, "grad_norm": 7.016956275879123, "learning_rate": 4.5017374092735266e-07, "loss": 17.5768, "step": 47485 }, { "epoch": 0.8680059224596487, "grad_norm": 5.843986311605677, "learning_rate": 4.5005099750204786e-07, "loss": 17.1825, "step": 47486 }, { "epoch": 0.8680242016560952, "grad_norm": 6.21427971007723, "learning_rate": 4.499282700239177e-07, "loss": 17.2638, "step": 47487 }, { "epoch": 0.8680424808525418, "grad_norm": 7.907274798562354, "learning_rate": 4.4980555849339416e-07, "loss": 18.0635, "step": 47488 }, { "epoch": 0.8680607600489882, "grad_norm": 5.179459432540911, "learning_rate": 4.4968286291090844e-07, "loss": 16.9242, "step": 47489 }, { "epoch": 0.8680790392454347, "grad_norm": 6.120302429584776, "learning_rate": 4.4956018327688976e-07, "loss": 17.3218, "step": 47490 }, { "epoch": 0.8680973184418813, "grad_norm": 7.637549342209122, "learning_rate": 4.4943751959176663e-07, "loss": 17.7015, "step": 47491 }, { "epoch": 0.8681155976383278, "grad_norm": 6.257869756776531, "learning_rate": 4.4931487185597087e-07, "loss": 17.1936, "step": 47492 }, { "epoch": 0.8681338768347744, "grad_norm": 6.162713885628374, "learning_rate": 4.4919224006993055e-07, "loss": 17.7213, "step": 47493 }, { "epoch": 0.8681521560312209, "grad_norm": 5.183054894594668, "learning_rate": 4.490696242340775e-07, "loss": 16.8391, "step": 47494 }, { "epoch": 0.8681704352276673, "grad_norm": 5.423252547186158, "learning_rate": 4.4894702434884084e-07, "loss": 17.119, "step": 47495 }, { "epoch": 0.8681887144241139, "grad_norm": 5.633689872151109, "learning_rate": 4.4882444041464913e-07, "loss": 17.1467, "step": 47496 }, { "epoch": 0.8682069936205604, "grad_norm": 6.219991112178496, "learning_rate": 4.4870187243193196e-07, "loss": 17.3773, "step": 47497 }, { "epoch": 0.868225272817007, "grad_norm": 5.920635885293121, "learning_rate": 4.4857932040112127e-07, "loss": 17.4619, "step": 47498 }, { "epoch": 0.8682435520134535, "grad_norm": 4.62343108211404, "learning_rate": 4.484567843226445e-07, "loss": 16.7345, "step": 47499 }, { "epoch": 0.8682618312099, "grad_norm": 5.083848973996184, "learning_rate": 4.4833426419693236e-07, "loss": 16.8202, "step": 47500 }, { "epoch": 0.8682801104063466, "grad_norm": 7.547428302738134, "learning_rate": 4.4821176002441344e-07, "loss": 17.4261, "step": 47501 }, { "epoch": 0.868298389602793, "grad_norm": 5.432221929647491, "learning_rate": 4.480892718055169e-07, "loss": 17.1683, "step": 47502 }, { "epoch": 0.8683166687992396, "grad_norm": 5.127362966608269, "learning_rate": 4.4796679954067277e-07, "loss": 16.9229, "step": 47503 }, { "epoch": 0.8683349479956861, "grad_norm": 6.702535086235925, "learning_rate": 4.478443432303103e-07, "loss": 16.9711, "step": 47504 }, { "epoch": 0.8683532271921326, "grad_norm": 6.393214890538493, "learning_rate": 4.477219028748575e-07, "loss": 17.3881, "step": 47505 }, { "epoch": 0.8683715063885792, "grad_norm": 5.154261601720262, "learning_rate": 4.475994784747445e-07, "loss": 16.8596, "step": 47506 }, { "epoch": 0.8683897855850257, "grad_norm": 6.025917506113212, "learning_rate": 4.4747707003039985e-07, "loss": 17.2656, "step": 47507 }, { "epoch": 0.8684080647814723, "grad_norm": 7.419432756217293, "learning_rate": 4.4735467754225436e-07, "loss": 18.0167, "step": 47508 }, { "epoch": 0.8684263439779187, "grad_norm": 6.6923217425555, "learning_rate": 4.4723230101073545e-07, "loss": 17.423, "step": 47509 }, { "epoch": 0.8684446231743652, "grad_norm": 7.148247727378611, "learning_rate": 4.4710994043627166e-07, "loss": 17.7316, "step": 47510 }, { "epoch": 0.8684629023708118, "grad_norm": 6.119940765446208, "learning_rate": 4.4698759581929317e-07, "loss": 17.1567, "step": 47511 }, { "epoch": 0.8684811815672583, "grad_norm": 7.16275073841111, "learning_rate": 4.4686526716022805e-07, "loss": 17.6958, "step": 47512 }, { "epoch": 0.8684994607637049, "grad_norm": 5.38641063025952, "learning_rate": 4.467429544595042e-07, "loss": 16.6968, "step": 47513 }, { "epoch": 0.8685177399601514, "grad_norm": 6.725848843660508, "learning_rate": 4.466206577175519e-07, "loss": 17.7104, "step": 47514 }, { "epoch": 0.8685360191565978, "grad_norm": 7.009728004813357, "learning_rate": 4.46498376934798e-07, "loss": 17.794, "step": 47515 }, { "epoch": 0.8685542983530444, "grad_norm": 5.9023294493268175, "learning_rate": 4.4637611211167275e-07, "loss": 17.1373, "step": 47516 }, { "epoch": 0.8685725775494909, "grad_norm": 5.45317958420133, "learning_rate": 4.4625386324860465e-07, "loss": 17.2475, "step": 47517 }, { "epoch": 0.8685908567459375, "grad_norm": 5.943989772034862, "learning_rate": 4.461316303460217e-07, "loss": 17.2618, "step": 47518 }, { "epoch": 0.868609135942384, "grad_norm": 6.753452502138331, "learning_rate": 4.4600941340435134e-07, "loss": 17.7647, "step": 47519 }, { "epoch": 0.8686274151388305, "grad_norm": 5.793959466264, "learning_rate": 4.4588721242402324e-07, "loss": 17.2841, "step": 47520 }, { "epoch": 0.868645694335277, "grad_norm": 5.5158422497383395, "learning_rate": 4.457650274054648e-07, "loss": 17.0942, "step": 47521 }, { "epoch": 0.8686639735317235, "grad_norm": 7.34408026063567, "learning_rate": 4.456428583491057e-07, "loss": 17.4039, "step": 47522 }, { "epoch": 0.8686822527281701, "grad_norm": 6.011913513232485, "learning_rate": 4.455207052553723e-07, "loss": 16.8592, "step": 47523 }, { "epoch": 0.8687005319246166, "grad_norm": 5.720716202585014, "learning_rate": 4.4539856812469415e-07, "loss": 17.3564, "step": 47524 }, { "epoch": 0.8687188111210631, "grad_norm": 8.225069754893157, "learning_rate": 4.4527644695749827e-07, "loss": 17.8276, "step": 47525 }, { "epoch": 0.8687370903175097, "grad_norm": 6.918751895679386, "learning_rate": 4.4515434175421366e-07, "loss": 17.838, "step": 47526 }, { "epoch": 0.8687553695139562, "grad_norm": 6.708209861667433, "learning_rate": 4.4503225251526727e-07, "loss": 17.4325, "step": 47527 }, { "epoch": 0.8687736487104027, "grad_norm": 6.6477306434741745, "learning_rate": 4.449101792410887e-07, "loss": 17.1122, "step": 47528 }, { "epoch": 0.8687919279068492, "grad_norm": 5.832450097959399, "learning_rate": 4.447881219321043e-07, "loss": 17.0839, "step": 47529 }, { "epoch": 0.8688102071032957, "grad_norm": 5.551380291665764, "learning_rate": 4.446660805887415e-07, "loss": 16.9724, "step": 47530 }, { "epoch": 0.8688284862997423, "grad_norm": 6.738173390550656, "learning_rate": 4.445440552114294e-07, "loss": 17.3051, "step": 47531 }, { "epoch": 0.8688467654961888, "grad_norm": 6.678782265188701, "learning_rate": 4.444220458005943e-07, "loss": 17.4574, "step": 47532 }, { "epoch": 0.8688650446926353, "grad_norm": 6.910095419682385, "learning_rate": 4.4430005235666483e-07, "loss": 17.7326, "step": 47533 }, { "epoch": 0.8688833238890818, "grad_norm": 5.047463815427969, "learning_rate": 4.4417807488006945e-07, "loss": 17.0867, "step": 47534 }, { "epoch": 0.8689016030855283, "grad_norm": 7.2319604167269, "learning_rate": 4.44056113371234e-07, "loss": 17.7012, "step": 47535 }, { "epoch": 0.8689198822819749, "grad_norm": 9.118758682680403, "learning_rate": 4.4393416783058585e-07, "loss": 18.0816, "step": 47536 }, { "epoch": 0.8689381614784214, "grad_norm": 6.196014127335473, "learning_rate": 4.4381223825855413e-07, "loss": 17.4514, "step": 47537 }, { "epoch": 0.8689564406748679, "grad_norm": 6.089335775833951, "learning_rate": 4.4369032465556404e-07, "loss": 17.4311, "step": 47538 }, { "epoch": 0.8689747198713145, "grad_norm": 5.9842442034342715, "learning_rate": 4.4356842702204526e-07, "loss": 17.2194, "step": 47539 }, { "epoch": 0.868992999067761, "grad_norm": 8.471314830522221, "learning_rate": 4.4344654535842356e-07, "loss": 18.7386, "step": 47540 }, { "epoch": 0.8690112782642075, "grad_norm": 5.514432875056241, "learning_rate": 4.4332467966512527e-07, "loss": 17.1383, "step": 47541 }, { "epoch": 0.869029557460654, "grad_norm": 7.199179219314939, "learning_rate": 4.432028299425789e-07, "loss": 17.5689, "step": 47542 }, { "epoch": 0.8690478366571005, "grad_norm": 6.0341489502158865, "learning_rate": 4.4308099619121137e-07, "loss": 17.4326, "step": 47543 }, { "epoch": 0.8690661158535471, "grad_norm": 5.827698736283402, "learning_rate": 4.429591784114495e-07, "loss": 17.4892, "step": 47544 }, { "epoch": 0.8690843950499936, "grad_norm": 6.538784285598049, "learning_rate": 4.4283737660372085e-07, "loss": 17.5306, "step": 47545 }, { "epoch": 0.8691026742464402, "grad_norm": 5.169351525623666, "learning_rate": 4.427155907684516e-07, "loss": 16.9568, "step": 47546 }, { "epoch": 0.8691209534428866, "grad_norm": 5.158322346157013, "learning_rate": 4.4259382090606763e-07, "loss": 16.8918, "step": 47547 }, { "epoch": 0.8691392326393331, "grad_norm": 6.981906614098073, "learning_rate": 4.4247206701699794e-07, "loss": 17.5519, "step": 47548 }, { "epoch": 0.8691575118357797, "grad_norm": 5.83746672717647, "learning_rate": 4.4235032910166843e-07, "loss": 17.0118, "step": 47549 }, { "epoch": 0.8691757910322262, "grad_norm": 6.493073845144243, "learning_rate": 4.422286071605042e-07, "loss": 17.538, "step": 47550 }, { "epoch": 0.8691940702286728, "grad_norm": 7.569859885943018, "learning_rate": 4.4210690119393387e-07, "loss": 17.9609, "step": 47551 }, { "epoch": 0.8692123494251193, "grad_norm": 6.194457957188617, "learning_rate": 4.419852112023826e-07, "loss": 17.6504, "step": 47552 }, { "epoch": 0.8692306286215657, "grad_norm": 9.122914544282377, "learning_rate": 4.4186353718627894e-07, "loss": 17.7088, "step": 47553 }, { "epoch": 0.8692489078180123, "grad_norm": 8.584080872440591, "learning_rate": 4.4174187914604814e-07, "loss": 17.8711, "step": 47554 }, { "epoch": 0.8692671870144588, "grad_norm": 5.4517630759508675, "learning_rate": 4.416202370821154e-07, "loss": 17.1444, "step": 47555 }, { "epoch": 0.8692854662109054, "grad_norm": 7.647113473228067, "learning_rate": 4.4149861099490877e-07, "loss": 17.3445, "step": 47556 }, { "epoch": 0.8693037454073519, "grad_norm": 7.771165237112609, "learning_rate": 4.4137700088485447e-07, "loss": 17.603, "step": 47557 }, { "epoch": 0.8693220246037984, "grad_norm": 6.344108092757774, "learning_rate": 4.412554067523772e-07, "loss": 17.1602, "step": 47558 }, { "epoch": 0.869340303800245, "grad_norm": 5.956378769068959, "learning_rate": 4.4113382859790446e-07, "loss": 17.1872, "step": 47559 }, { "epoch": 0.8693585829966914, "grad_norm": 6.893552170695516, "learning_rate": 4.410122664218619e-07, "loss": 17.5088, "step": 47560 }, { "epoch": 0.869376862193138, "grad_norm": 7.385233642026477, "learning_rate": 4.408907202246754e-07, "loss": 17.3698, "step": 47561 }, { "epoch": 0.8693951413895845, "grad_norm": 6.386297871076787, "learning_rate": 4.407691900067723e-07, "loss": 17.2019, "step": 47562 }, { "epoch": 0.869413420586031, "grad_norm": 6.364937717622017, "learning_rate": 4.4064767576857737e-07, "loss": 17.2743, "step": 47563 }, { "epoch": 0.8694316997824776, "grad_norm": 6.547126909742137, "learning_rate": 4.4052617751051575e-07, "loss": 17.6662, "step": 47564 }, { "epoch": 0.8694499789789241, "grad_norm": 7.315098444174204, "learning_rate": 4.404046952330154e-07, "loss": 17.609, "step": 47565 }, { "epoch": 0.8694682581753707, "grad_norm": 6.049475510067116, "learning_rate": 4.402832289365011e-07, "loss": 17.5022, "step": 47566 }, { "epoch": 0.8694865373718171, "grad_norm": 5.444405571976477, "learning_rate": 4.4016177862139744e-07, "loss": 17.2195, "step": 47567 }, { "epoch": 0.8695048165682636, "grad_norm": 5.915936078484362, "learning_rate": 4.4004034428813127e-07, "loss": 17.107, "step": 47568 }, { "epoch": 0.8695230957647102, "grad_norm": 7.073877868926655, "learning_rate": 4.399189259371278e-07, "loss": 17.6428, "step": 47569 }, { "epoch": 0.8695413749611567, "grad_norm": 5.445567633759338, "learning_rate": 4.3979752356881235e-07, "loss": 16.9699, "step": 47570 }, { "epoch": 0.8695596541576033, "grad_norm": 5.599572905383387, "learning_rate": 4.396761371836122e-07, "loss": 17.0583, "step": 47571 }, { "epoch": 0.8695779333540498, "grad_norm": 6.846927952664614, "learning_rate": 4.395547667819505e-07, "loss": 17.3929, "step": 47572 }, { "epoch": 0.8695962125504962, "grad_norm": 6.863735885560522, "learning_rate": 4.394334123642541e-07, "loss": 17.4034, "step": 47573 }, { "epoch": 0.8696144917469428, "grad_norm": 6.998914929195903, "learning_rate": 4.393120739309481e-07, "loss": 17.6571, "step": 47574 }, { "epoch": 0.8696327709433893, "grad_norm": 5.465986796151819, "learning_rate": 4.391907514824567e-07, "loss": 16.9273, "step": 47575 }, { "epoch": 0.8696510501398359, "grad_norm": 8.788264902827125, "learning_rate": 4.3906944501920676e-07, "loss": 17.9094, "step": 47576 }, { "epoch": 0.8696693293362824, "grad_norm": 6.496206712744686, "learning_rate": 4.3894815454162185e-07, "loss": 17.9164, "step": 47577 }, { "epoch": 0.8696876085327289, "grad_norm": 5.8583965296396245, "learning_rate": 4.3882688005012887e-07, "loss": 17.2389, "step": 47578 }, { "epoch": 0.8697058877291755, "grad_norm": 5.686666746710751, "learning_rate": 4.387056215451507e-07, "loss": 17.0203, "step": 47579 }, { "epoch": 0.8697241669256219, "grad_norm": 6.3464649890050095, "learning_rate": 4.385843790271144e-07, "loss": 17.4609, "step": 47580 }, { "epoch": 0.8697424461220685, "grad_norm": 6.249835336867718, "learning_rate": 4.3846315249644333e-07, "loss": 17.1329, "step": 47581 }, { "epoch": 0.869760725318515, "grad_norm": 5.986486236705014, "learning_rate": 4.3834194195356396e-07, "loss": 17.4188, "step": 47582 }, { "epoch": 0.8697790045149615, "grad_norm": 5.42590364822728, "learning_rate": 4.382207473988992e-07, "loss": 17.1551, "step": 47583 }, { "epoch": 0.8697972837114081, "grad_norm": 5.161261993633633, "learning_rate": 4.3809956883287543e-07, "loss": 16.9396, "step": 47584 }, { "epoch": 0.8698155629078546, "grad_norm": 7.1427947985139575, "learning_rate": 4.3797840625591724e-07, "loss": 17.675, "step": 47585 }, { "epoch": 0.8698338421043011, "grad_norm": 5.877484306460909, "learning_rate": 4.3785725966844827e-07, "loss": 17.0291, "step": 47586 }, { "epoch": 0.8698521213007476, "grad_norm": 6.699086500149682, "learning_rate": 4.3773612907089315e-07, "loss": 17.4755, "step": 47587 }, { "epoch": 0.8698704004971941, "grad_norm": 6.37774450337862, "learning_rate": 4.3761501446367816e-07, "loss": 17.2266, "step": 47588 }, { "epoch": 0.8698886796936407, "grad_norm": 5.961162886449047, "learning_rate": 4.374939158472258e-07, "loss": 17.1911, "step": 47589 }, { "epoch": 0.8699069588900872, "grad_norm": 6.258844009238047, "learning_rate": 4.3737283322196177e-07, "loss": 17.5978, "step": 47590 }, { "epoch": 0.8699252380865338, "grad_norm": 5.762250690505447, "learning_rate": 4.372517665883108e-07, "loss": 16.9479, "step": 47591 }, { "epoch": 0.8699435172829802, "grad_norm": 6.339583498750499, "learning_rate": 4.371307159466953e-07, "loss": 17.3846, "step": 47592 }, { "epoch": 0.8699617964794267, "grad_norm": 4.806142681841453, "learning_rate": 4.3700968129754105e-07, "loss": 16.7909, "step": 47593 }, { "epoch": 0.8699800756758733, "grad_norm": 6.217320208994158, "learning_rate": 4.368886626412727e-07, "loss": 17.4167, "step": 47594 }, { "epoch": 0.8699983548723198, "grad_norm": 4.583910059746047, "learning_rate": 4.3676765997831214e-07, "loss": 16.8872, "step": 47595 }, { "epoch": 0.8700166340687664, "grad_norm": 6.546659387000583, "learning_rate": 4.3664667330908517e-07, "loss": 17.6823, "step": 47596 }, { "epoch": 0.8700349132652129, "grad_norm": 6.5603999271490325, "learning_rate": 4.3652570263401637e-07, "loss": 17.4892, "step": 47597 }, { "epoch": 0.8700531924616594, "grad_norm": 5.2524539991141825, "learning_rate": 4.3640474795352826e-07, "loss": 17.053, "step": 47598 }, { "epoch": 0.8700714716581059, "grad_norm": 5.755639422229755, "learning_rate": 4.362838092680466e-07, "loss": 17.4326, "step": 47599 }, { "epoch": 0.8700897508545524, "grad_norm": 5.525487108006441, "learning_rate": 4.361628865779927e-07, "loss": 17.1417, "step": 47600 }, { "epoch": 0.8701080300509989, "grad_norm": 6.4497285129061375, "learning_rate": 4.360419798837934e-07, "loss": 17.5133, "step": 47601 }, { "epoch": 0.8701263092474455, "grad_norm": 5.368077229933196, "learning_rate": 4.359210891858701e-07, "loss": 16.972, "step": 47602 }, { "epoch": 0.870144588443892, "grad_norm": 4.718215827230423, "learning_rate": 4.358002144846474e-07, "loss": 16.9615, "step": 47603 }, { "epoch": 0.8701628676403386, "grad_norm": 7.049193311628385, "learning_rate": 4.356793557805489e-07, "loss": 17.7994, "step": 47604 }, { "epoch": 0.870181146836785, "grad_norm": 5.67603553178923, "learning_rate": 4.355585130739981e-07, "loss": 17.2127, "step": 47605 }, { "epoch": 0.8701994260332315, "grad_norm": 5.0296850714335175, "learning_rate": 4.3543768636541807e-07, "loss": 16.9754, "step": 47606 }, { "epoch": 0.8702177052296781, "grad_norm": 7.319908934146646, "learning_rate": 4.3531687565523393e-07, "loss": 17.6606, "step": 47607 }, { "epoch": 0.8702359844261246, "grad_norm": 6.7134881089653655, "learning_rate": 4.3519608094386766e-07, "loss": 17.4787, "step": 47608 }, { "epoch": 0.8702542636225712, "grad_norm": 4.894835212518693, "learning_rate": 4.3507530223174277e-07, "loss": 16.7411, "step": 47609 }, { "epoch": 0.8702725428190177, "grad_norm": 5.301669060604572, "learning_rate": 4.3495453951928336e-07, "loss": 17.1248, "step": 47610 }, { "epoch": 0.8702908220154641, "grad_norm": 4.887018162759588, "learning_rate": 4.3483379280691183e-07, "loss": 17.0181, "step": 47611 }, { "epoch": 0.8703091012119107, "grad_norm": 5.652939452790381, "learning_rate": 4.347130620950513e-07, "loss": 17.0927, "step": 47612 }, { "epoch": 0.8703273804083572, "grad_norm": 5.7252366998430295, "learning_rate": 4.3459234738412623e-07, "loss": 17.2379, "step": 47613 }, { "epoch": 0.8703456596048038, "grad_norm": 6.1563267123528265, "learning_rate": 4.344716486745576e-07, "loss": 17.2859, "step": 47614 }, { "epoch": 0.8703639388012503, "grad_norm": 5.584137801926068, "learning_rate": 4.3435096596677e-07, "loss": 17.0277, "step": 47615 }, { "epoch": 0.8703822179976968, "grad_norm": 5.4109443257929914, "learning_rate": 4.3423029926118686e-07, "loss": 17.1486, "step": 47616 }, { "epoch": 0.8704004971941434, "grad_norm": 6.389717086154837, "learning_rate": 4.3410964855822913e-07, "loss": 17.4016, "step": 47617 }, { "epoch": 0.8704187763905898, "grad_norm": 5.788044359415126, "learning_rate": 4.3398901385832247e-07, "loss": 17.3398, "step": 47618 }, { "epoch": 0.8704370555870364, "grad_norm": 5.475824372464702, "learning_rate": 4.338683951618877e-07, "loss": 16.9945, "step": 47619 }, { "epoch": 0.8704553347834829, "grad_norm": 5.83836391366942, "learning_rate": 4.3374779246934674e-07, "loss": 17.3098, "step": 47620 }, { "epoch": 0.8704736139799294, "grad_norm": 6.890270554897803, "learning_rate": 4.336272057811247e-07, "loss": 17.4566, "step": 47621 }, { "epoch": 0.870491893176376, "grad_norm": 8.49237901440936, "learning_rate": 4.3350663509764304e-07, "loss": 18.0896, "step": 47622 }, { "epoch": 0.8705101723728225, "grad_norm": 6.41228797442705, "learning_rate": 4.333860804193235e-07, "loss": 17.4997, "step": 47623 }, { "epoch": 0.8705284515692691, "grad_norm": 5.870614472417479, "learning_rate": 4.3326554174658975e-07, "loss": 17.2321, "step": 47624 }, { "epoch": 0.8705467307657155, "grad_norm": 6.65290697561481, "learning_rate": 4.3314501907986416e-07, "loss": 17.7289, "step": 47625 }, { "epoch": 0.870565009962162, "grad_norm": 5.706269550910818, "learning_rate": 4.3302451241956867e-07, "loss": 17.396, "step": 47626 }, { "epoch": 0.8705832891586086, "grad_norm": 5.60658469198554, "learning_rate": 4.329040217661268e-07, "loss": 17.2042, "step": 47627 }, { "epoch": 0.8706015683550551, "grad_norm": 5.757355460287566, "learning_rate": 4.3278354711995987e-07, "loss": 17.1721, "step": 47628 }, { "epoch": 0.8706198475515017, "grad_norm": 5.930787685189469, "learning_rate": 4.3266308848148973e-07, "loss": 17.4464, "step": 47629 }, { "epoch": 0.8706381267479482, "grad_norm": 6.163413735738394, "learning_rate": 4.3254264585113946e-07, "loss": 16.9032, "step": 47630 }, { "epoch": 0.8706564059443946, "grad_norm": 6.371182176138315, "learning_rate": 4.3242221922933036e-07, "loss": 17.1772, "step": 47631 }, { "epoch": 0.8706746851408412, "grad_norm": 6.329280577520114, "learning_rate": 4.3230180861648595e-07, "loss": 17.5143, "step": 47632 }, { "epoch": 0.8706929643372877, "grad_norm": 4.9540517070854495, "learning_rate": 4.321814140130265e-07, "loss": 16.9566, "step": 47633 }, { "epoch": 0.8707112435337343, "grad_norm": 5.641350371931971, "learning_rate": 4.320610354193744e-07, "loss": 17.261, "step": 47634 }, { "epoch": 0.8707295227301808, "grad_norm": 5.587376361678111, "learning_rate": 4.319406728359532e-07, "loss": 17.3249, "step": 47635 }, { "epoch": 0.8707478019266273, "grad_norm": 10.319596019297444, "learning_rate": 4.3182032626318317e-07, "loss": 17.9502, "step": 47636 }, { "epoch": 0.8707660811230739, "grad_norm": 6.3469952394657545, "learning_rate": 4.3169999570148613e-07, "loss": 17.6336, "step": 47637 }, { "epoch": 0.8707843603195203, "grad_norm": 7.239059302022562, "learning_rate": 4.3157968115128454e-07, "loss": 17.8899, "step": 47638 }, { "epoch": 0.8708026395159669, "grad_norm": 6.8314946858343575, "learning_rate": 4.3145938261300035e-07, "loss": 17.5945, "step": 47639 }, { "epoch": 0.8708209187124134, "grad_norm": 6.406099345730743, "learning_rate": 4.3133910008705314e-07, "loss": 17.5849, "step": 47640 }, { "epoch": 0.8708391979088599, "grad_norm": 6.0361624951954385, "learning_rate": 4.3121883357386707e-07, "loss": 17.2814, "step": 47641 }, { "epoch": 0.8708574771053065, "grad_norm": 5.19096322547832, "learning_rate": 4.310985830738618e-07, "loss": 16.8911, "step": 47642 }, { "epoch": 0.870875756301753, "grad_norm": 7.819194149347485, "learning_rate": 4.309783485874591e-07, "loss": 17.5684, "step": 47643 }, { "epoch": 0.8708940354981995, "grad_norm": 5.052794813030522, "learning_rate": 4.3085813011508216e-07, "loss": 17.0985, "step": 47644 }, { "epoch": 0.870912314694646, "grad_norm": 6.624957537016566, "learning_rate": 4.307379276571499e-07, "loss": 17.5287, "step": 47645 }, { "epoch": 0.8709305938910925, "grad_norm": 5.664452100271655, "learning_rate": 4.306177412140855e-07, "loss": 17.0996, "step": 47646 }, { "epoch": 0.8709488730875391, "grad_norm": 8.55223385900959, "learning_rate": 4.304975707863096e-07, "loss": 18.816, "step": 47647 }, { "epoch": 0.8709671522839856, "grad_norm": 5.877918318256579, "learning_rate": 4.303774163742419e-07, "loss": 17.3101, "step": 47648 }, { "epoch": 0.8709854314804322, "grad_norm": 6.388641120326659, "learning_rate": 4.3025727797830597e-07, "loss": 17.3429, "step": 47649 }, { "epoch": 0.8710037106768787, "grad_norm": 4.908381608296682, "learning_rate": 4.3013715559892087e-07, "loss": 16.903, "step": 47650 }, { "epoch": 0.8710219898733251, "grad_norm": 5.641033562603255, "learning_rate": 4.300170492365097e-07, "loss": 17.0446, "step": 47651 }, { "epoch": 0.8710402690697717, "grad_norm": 4.841534749389871, "learning_rate": 4.298969588914909e-07, "loss": 16.9479, "step": 47652 }, { "epoch": 0.8710585482662182, "grad_norm": 5.302400623021044, "learning_rate": 4.297768845642875e-07, "loss": 16.8484, "step": 47653 }, { "epoch": 0.8710768274626648, "grad_norm": 5.6266270055307075, "learning_rate": 4.296568262553191e-07, "loss": 17.0725, "step": 47654 }, { "epoch": 0.8710951066591113, "grad_norm": 6.709810076762257, "learning_rate": 4.295367839650072e-07, "loss": 17.5186, "step": 47655 }, { "epoch": 0.8711133858555578, "grad_norm": 6.687182328192321, "learning_rate": 4.29416757693773e-07, "loss": 17.1234, "step": 47656 }, { "epoch": 0.8711316650520043, "grad_norm": 5.812326028272715, "learning_rate": 4.2929674744203507e-07, "loss": 17.3778, "step": 47657 }, { "epoch": 0.8711499442484508, "grad_norm": 7.0097980655257555, "learning_rate": 4.2917675321021643e-07, "loss": 17.2957, "step": 47658 }, { "epoch": 0.8711682234448974, "grad_norm": 6.004428582141761, "learning_rate": 4.2905677499873565e-07, "loss": 17.3259, "step": 47659 }, { "epoch": 0.8711865026413439, "grad_norm": 6.071188090695555, "learning_rate": 4.28936812808014e-07, "loss": 16.9738, "step": 47660 }, { "epoch": 0.8712047818377904, "grad_norm": 6.899300804621069, "learning_rate": 4.2881686663847344e-07, "loss": 17.195, "step": 47661 }, { "epoch": 0.871223061034237, "grad_norm": 6.4373141638396225, "learning_rate": 4.286969364905319e-07, "loss": 17.2736, "step": 47662 }, { "epoch": 0.8712413402306834, "grad_norm": 5.764643564146044, "learning_rate": 4.2857702236461183e-07, "loss": 17.2201, "step": 47663 }, { "epoch": 0.87125961942713, "grad_norm": 6.623623944367336, "learning_rate": 4.2845712426113296e-07, "loss": 17.1372, "step": 47664 }, { "epoch": 0.8712778986235765, "grad_norm": 7.552357138234706, "learning_rate": 4.283372421805143e-07, "loss": 17.7415, "step": 47665 }, { "epoch": 0.871296177820023, "grad_norm": 6.561496420547696, "learning_rate": 4.2821737612317724e-07, "loss": 17.5074, "step": 47666 }, { "epoch": 0.8713144570164696, "grad_norm": 5.799593257310638, "learning_rate": 4.28097526089542e-07, "loss": 17.3377, "step": 47667 }, { "epoch": 0.8713327362129161, "grad_norm": 5.251085983385391, "learning_rate": 4.2797769208002713e-07, "loss": 16.9676, "step": 47668 }, { "epoch": 0.8713510154093626, "grad_norm": 7.756297059585623, "learning_rate": 4.2785787409505397e-07, "loss": 17.9465, "step": 47669 }, { "epoch": 0.8713692946058091, "grad_norm": 5.763026629076699, "learning_rate": 4.277380721350427e-07, "loss": 17.0935, "step": 47670 }, { "epoch": 0.8713875738022556, "grad_norm": 6.377068689324626, "learning_rate": 4.276182862004119e-07, "loss": 17.4019, "step": 47671 }, { "epoch": 0.8714058529987022, "grad_norm": 7.164313223879334, "learning_rate": 4.2749851629158347e-07, "loss": 17.723, "step": 47672 }, { "epoch": 0.8714241321951487, "grad_norm": 4.935537712944637, "learning_rate": 4.273787624089759e-07, "loss": 16.6987, "step": 47673 }, { "epoch": 0.8714424113915952, "grad_norm": 5.8585753458134375, "learning_rate": 4.2725902455300784e-07, "loss": 17.153, "step": 47674 }, { "epoch": 0.8714606905880418, "grad_norm": 5.997347843225879, "learning_rate": 4.271393027241011e-07, "loss": 17.1664, "step": 47675 }, { "epoch": 0.8714789697844882, "grad_norm": 6.895042775264345, "learning_rate": 4.2701959692267313e-07, "loss": 17.5462, "step": 47676 }, { "epoch": 0.8714972489809348, "grad_norm": 7.66207479272835, "learning_rate": 4.2689990714914585e-07, "loss": 17.6171, "step": 47677 }, { "epoch": 0.8715155281773813, "grad_norm": 6.781514993492832, "learning_rate": 4.267802334039367e-07, "loss": 17.3801, "step": 47678 }, { "epoch": 0.8715338073738278, "grad_norm": 6.851480155038413, "learning_rate": 4.2666057568746587e-07, "loss": 17.7043, "step": 47679 }, { "epoch": 0.8715520865702744, "grad_norm": 4.938496826009774, "learning_rate": 4.2654093400015364e-07, "loss": 16.9896, "step": 47680 }, { "epoch": 0.8715703657667209, "grad_norm": 7.691572882396648, "learning_rate": 4.26421308342419e-07, "loss": 17.9594, "step": 47681 }, { "epoch": 0.8715886449631675, "grad_norm": 5.570067022341369, "learning_rate": 4.263016987146795e-07, "loss": 17.2576, "step": 47682 }, { "epoch": 0.8716069241596139, "grad_norm": 8.405886349551512, "learning_rate": 4.2618210511735645e-07, "loss": 18.5855, "step": 47683 }, { "epoch": 0.8716252033560604, "grad_norm": 9.964111709049345, "learning_rate": 4.260625275508684e-07, "loss": 17.8403, "step": 47684 }, { "epoch": 0.871643482552507, "grad_norm": 5.69891455602271, "learning_rate": 4.2594296601563387e-07, "loss": 17.3037, "step": 47685 }, { "epoch": 0.8716617617489535, "grad_norm": 6.378229179138635, "learning_rate": 4.258234205120726e-07, "loss": 17.3767, "step": 47686 }, { "epoch": 0.8716800409454001, "grad_norm": 5.757330416285943, "learning_rate": 4.257038910406025e-07, "loss": 17.3207, "step": 47687 }, { "epoch": 0.8716983201418466, "grad_norm": 6.105751487123405, "learning_rate": 4.255843776016433e-07, "loss": 17.388, "step": 47688 }, { "epoch": 0.871716599338293, "grad_norm": 4.411279369223195, "learning_rate": 4.254648801956152e-07, "loss": 16.6722, "step": 47689 }, { "epoch": 0.8717348785347396, "grad_norm": 5.558330671719389, "learning_rate": 4.2534539882293445e-07, "loss": 17.0532, "step": 47690 }, { "epoch": 0.8717531577311861, "grad_norm": 5.174393494031196, "learning_rate": 4.2522593348402196e-07, "loss": 16.9099, "step": 47691 }, { "epoch": 0.8717714369276327, "grad_norm": 7.162788010186548, "learning_rate": 4.251064841792957e-07, "loss": 17.5641, "step": 47692 }, { "epoch": 0.8717897161240792, "grad_norm": 5.940572820324721, "learning_rate": 4.249870509091736e-07, "loss": 17.1187, "step": 47693 }, { "epoch": 0.8718079953205257, "grad_norm": 5.976097727667278, "learning_rate": 4.2486763367407534e-07, "loss": 17.165, "step": 47694 }, { "epoch": 0.8718262745169723, "grad_norm": 6.427156934085973, "learning_rate": 4.2474823247441953e-07, "loss": 17.429, "step": 47695 }, { "epoch": 0.8718445537134187, "grad_norm": 5.99711933630822, "learning_rate": 4.2462884731062303e-07, "loss": 17.2559, "step": 47696 }, { "epoch": 0.8718628329098653, "grad_norm": 7.824282559871375, "learning_rate": 4.2450947818310496e-07, "loss": 17.5759, "step": 47697 }, { "epoch": 0.8718811121063118, "grad_norm": 5.871797570195113, "learning_rate": 4.243901250922855e-07, "loss": 17.0886, "step": 47698 }, { "epoch": 0.8718993913027583, "grad_norm": 7.012647413685963, "learning_rate": 4.2427078803858047e-07, "loss": 17.8333, "step": 47699 }, { "epoch": 0.8719176704992049, "grad_norm": 5.494872431167418, "learning_rate": 4.241514670224101e-07, "loss": 16.9171, "step": 47700 }, { "epoch": 0.8719359496956514, "grad_norm": 5.4604450026701805, "learning_rate": 4.240321620441923e-07, "loss": 17.0497, "step": 47701 }, { "epoch": 0.871954228892098, "grad_norm": 7.402394736721589, "learning_rate": 4.239128731043435e-07, "loss": 17.7328, "step": 47702 }, { "epoch": 0.8719725080885444, "grad_norm": 5.521975201943909, "learning_rate": 4.237936002032833e-07, "loss": 17.0287, "step": 47703 }, { "epoch": 0.8719907872849909, "grad_norm": 8.56083746821231, "learning_rate": 4.236743433414292e-07, "loss": 18.5023, "step": 47704 }, { "epoch": 0.8720090664814375, "grad_norm": 5.827762677823852, "learning_rate": 4.235551025192003e-07, "loss": 17.4607, "step": 47705 }, { "epoch": 0.872027345677884, "grad_norm": 5.582577009399271, "learning_rate": 4.234358777370123e-07, "loss": 17.1959, "step": 47706 }, { "epoch": 0.8720456248743306, "grad_norm": 7.187671135843329, "learning_rate": 4.23316668995285e-07, "loss": 17.5386, "step": 47707 }, { "epoch": 0.872063904070777, "grad_norm": 6.677827567814054, "learning_rate": 4.231974762944363e-07, "loss": 17.5578, "step": 47708 }, { "epoch": 0.8720821832672235, "grad_norm": 6.338689460594096, "learning_rate": 4.2307829963488314e-07, "loss": 17.485, "step": 47709 }, { "epoch": 0.8721004624636701, "grad_norm": 6.50862437658124, "learning_rate": 4.229591390170423e-07, "loss": 17.3326, "step": 47710 }, { "epoch": 0.8721187416601166, "grad_norm": 6.547936473940326, "learning_rate": 4.228399944413336e-07, "loss": 17.212, "step": 47711 }, { "epoch": 0.8721370208565632, "grad_norm": 6.783394307855051, "learning_rate": 4.2272086590817383e-07, "loss": 17.5037, "step": 47712 }, { "epoch": 0.8721553000530097, "grad_norm": 5.553523255456579, "learning_rate": 4.2260175341797883e-07, "loss": 17.3256, "step": 47713 }, { "epoch": 0.8721735792494562, "grad_norm": 5.381652278780429, "learning_rate": 4.2248265697116765e-07, "loss": 17.1221, "step": 47714 }, { "epoch": 0.8721918584459027, "grad_norm": 4.80021443521755, "learning_rate": 4.223635765681583e-07, "loss": 16.8064, "step": 47715 }, { "epoch": 0.8722101376423492, "grad_norm": 6.050198781871835, "learning_rate": 4.222445122093671e-07, "loss": 17.3518, "step": 47716 }, { "epoch": 0.8722284168387958, "grad_norm": 7.386285199362686, "learning_rate": 4.221254638952121e-07, "loss": 17.7189, "step": 47717 }, { "epoch": 0.8722466960352423, "grad_norm": 6.387213211463327, "learning_rate": 4.220064316261102e-07, "loss": 17.3084, "step": 47718 }, { "epoch": 0.8722649752316888, "grad_norm": 5.4338126234026936, "learning_rate": 4.2188741540247715e-07, "loss": 17.1231, "step": 47719 }, { "epoch": 0.8722832544281354, "grad_norm": 7.810767620175551, "learning_rate": 4.217684152247331e-07, "loss": 17.7228, "step": 47720 }, { "epoch": 0.8723015336245818, "grad_norm": 7.255604966607717, "learning_rate": 4.216494310932917e-07, "loss": 17.5734, "step": 47721 }, { "epoch": 0.8723198128210284, "grad_norm": 5.577248018585739, "learning_rate": 4.2153046300857317e-07, "loss": 17.14, "step": 47722 }, { "epoch": 0.8723380920174749, "grad_norm": 6.997902636500082, "learning_rate": 4.214115109709921e-07, "loss": 17.5166, "step": 47723 }, { "epoch": 0.8723563712139214, "grad_norm": 9.396465352294532, "learning_rate": 4.2129257498096654e-07, "loss": 18.1762, "step": 47724 }, { "epoch": 0.872374650410368, "grad_norm": 4.576018771789781, "learning_rate": 4.2117365503891394e-07, "loss": 16.7124, "step": 47725 }, { "epoch": 0.8723929296068145, "grad_norm": 7.710644027432261, "learning_rate": 4.2105475114525064e-07, "loss": 17.3822, "step": 47726 }, { "epoch": 0.8724112088032611, "grad_norm": 6.394615724956077, "learning_rate": 4.209358633003918e-07, "loss": 17.4535, "step": 47727 }, { "epoch": 0.8724294879997075, "grad_norm": 6.114613541457429, "learning_rate": 4.2081699150475664e-07, "loss": 17.2385, "step": 47728 }, { "epoch": 0.872447767196154, "grad_norm": 8.223090956692328, "learning_rate": 4.2069813575876027e-07, "loss": 17.3704, "step": 47729 }, { "epoch": 0.8724660463926006, "grad_norm": 6.545558042194471, "learning_rate": 4.2057929606281857e-07, "loss": 17.3369, "step": 47730 }, { "epoch": 0.8724843255890471, "grad_norm": 5.939568585166331, "learning_rate": 4.2046047241735e-07, "loss": 17.5355, "step": 47731 }, { "epoch": 0.8725026047854937, "grad_norm": 7.09664783804592, "learning_rate": 4.203416648227693e-07, "loss": 17.4732, "step": 47732 }, { "epoch": 0.8725208839819402, "grad_norm": 7.0546975965375, "learning_rate": 4.2022287327949385e-07, "loss": 17.6151, "step": 47733 }, { "epoch": 0.8725391631783866, "grad_norm": 5.335571888385143, "learning_rate": 4.201040977879406e-07, "loss": 16.9927, "step": 47734 }, { "epoch": 0.8725574423748332, "grad_norm": 5.64455781728929, "learning_rate": 4.1998533834852475e-07, "loss": 17.2888, "step": 47735 }, { "epoch": 0.8725757215712797, "grad_norm": 6.430851330482123, "learning_rate": 4.1986659496166207e-07, "loss": 17.7637, "step": 47736 }, { "epoch": 0.8725940007677262, "grad_norm": 5.60584953901094, "learning_rate": 4.1974786762777056e-07, "loss": 16.9765, "step": 47737 }, { "epoch": 0.8726122799641728, "grad_norm": 5.022114852571638, "learning_rate": 4.196291563472643e-07, "loss": 17.0905, "step": 47738 }, { "epoch": 0.8726305591606193, "grad_norm": 7.046641010368095, "learning_rate": 4.195104611205614e-07, "loss": 18.0521, "step": 47739 }, { "epoch": 0.8726488383570659, "grad_norm": 6.623296439390338, "learning_rate": 4.1939178194807694e-07, "loss": 17.0217, "step": 47740 }, { "epoch": 0.8726671175535123, "grad_norm": 7.493271537327459, "learning_rate": 4.192731188302257e-07, "loss": 17.4834, "step": 47741 }, { "epoch": 0.8726853967499588, "grad_norm": 9.399546036200341, "learning_rate": 4.191544717674251e-07, "loss": 18.3536, "step": 47742 }, { "epoch": 0.8727036759464054, "grad_norm": 5.949269792315954, "learning_rate": 4.1903584076009086e-07, "loss": 17.108, "step": 47743 }, { "epoch": 0.8727219551428519, "grad_norm": 6.8022616458156975, "learning_rate": 4.1891722580863825e-07, "loss": 17.28, "step": 47744 }, { "epoch": 0.8727402343392985, "grad_norm": 6.642549489886188, "learning_rate": 4.187986269134836e-07, "loss": 17.4141, "step": 47745 }, { "epoch": 0.872758513535745, "grad_norm": 6.6790774650935, "learning_rate": 4.1868004407504213e-07, "loss": 17.7669, "step": 47746 }, { "epoch": 0.8727767927321914, "grad_norm": 5.794178414563835, "learning_rate": 4.185614772937291e-07, "loss": 17.0915, "step": 47747 }, { "epoch": 0.872795071928638, "grad_norm": 6.575051285824707, "learning_rate": 4.1844292656996133e-07, "loss": 17.3864, "step": 47748 }, { "epoch": 0.8728133511250845, "grad_norm": 6.992018147055503, "learning_rate": 4.183243919041535e-07, "loss": 17.3603, "step": 47749 }, { "epoch": 0.8728316303215311, "grad_norm": 5.672944283802025, "learning_rate": 4.182058732967198e-07, "loss": 17.118, "step": 47750 }, { "epoch": 0.8728499095179776, "grad_norm": 6.066687961513607, "learning_rate": 4.180873707480776e-07, "loss": 17.1939, "step": 47751 }, { "epoch": 0.8728681887144241, "grad_norm": 6.567119131715891, "learning_rate": 4.1796888425864104e-07, "loss": 17.2086, "step": 47752 }, { "epoch": 0.8728864679108707, "grad_norm": 6.459261919589083, "learning_rate": 4.1785041382882694e-07, "loss": 17.4308, "step": 47753 }, { "epoch": 0.8729047471073171, "grad_norm": 5.303869389561985, "learning_rate": 4.1773195945904954e-07, "loss": 17.0569, "step": 47754 }, { "epoch": 0.8729230263037637, "grad_norm": 6.213974601150456, "learning_rate": 4.176135211497234e-07, "loss": 17.0156, "step": 47755 }, { "epoch": 0.8729413055002102, "grad_norm": 6.275144881390245, "learning_rate": 4.1749509890126495e-07, "loss": 17.2978, "step": 47756 }, { "epoch": 0.8729595846966567, "grad_norm": 5.574948652328454, "learning_rate": 4.1737669271408823e-07, "loss": 17.3487, "step": 47757 }, { "epoch": 0.8729778638931033, "grad_norm": 4.7683150412774316, "learning_rate": 4.172583025886079e-07, "loss": 16.6192, "step": 47758 }, { "epoch": 0.8729961430895498, "grad_norm": 5.174462917201858, "learning_rate": 4.171399285252403e-07, "loss": 17.165, "step": 47759 }, { "epoch": 0.8730144222859963, "grad_norm": 6.205393407541482, "learning_rate": 4.170215705243985e-07, "loss": 17.2099, "step": 47760 }, { "epoch": 0.8730327014824428, "grad_norm": 6.107832791194652, "learning_rate": 4.169032285864988e-07, "loss": 16.8895, "step": 47761 }, { "epoch": 0.8730509806788893, "grad_norm": 4.9022432619022664, "learning_rate": 4.167849027119564e-07, "loss": 16.9397, "step": 47762 }, { "epoch": 0.8730692598753359, "grad_norm": 5.996143591339983, "learning_rate": 4.1666659290118536e-07, "loss": 17.1842, "step": 47763 }, { "epoch": 0.8730875390717824, "grad_norm": 5.437614498926958, "learning_rate": 4.1654829915459884e-07, "loss": 17.238, "step": 47764 }, { "epoch": 0.873105818268229, "grad_norm": 7.4385319279245845, "learning_rate": 4.1643002147261416e-07, "loss": 17.6686, "step": 47765 }, { "epoch": 0.8731240974646755, "grad_norm": 5.960888222382217, "learning_rate": 4.1631175985564434e-07, "loss": 17.1017, "step": 47766 }, { "epoch": 0.8731423766611219, "grad_norm": 7.822546762036235, "learning_rate": 4.1619351430410294e-07, "loss": 17.7279, "step": 47767 }, { "epoch": 0.8731606558575685, "grad_norm": 6.540972116453342, "learning_rate": 4.160752848184069e-07, "loss": 17.3187, "step": 47768 }, { "epoch": 0.873178935054015, "grad_norm": 6.992600149613256, "learning_rate": 4.1595707139896744e-07, "loss": 17.8255, "step": 47769 }, { "epoch": 0.8731972142504616, "grad_norm": 6.233607059553974, "learning_rate": 4.158388740462016e-07, "loss": 17.1242, "step": 47770 }, { "epoch": 0.8732154934469081, "grad_norm": 6.984319320904357, "learning_rate": 4.1572069276052287e-07, "loss": 17.5454, "step": 47771 }, { "epoch": 0.8732337726433546, "grad_norm": 5.8759884338557775, "learning_rate": 4.1560252754234476e-07, "loss": 17.1008, "step": 47772 }, { "epoch": 0.8732520518398011, "grad_norm": 6.892311310952121, "learning_rate": 4.154843783920831e-07, "loss": 17.4348, "step": 47773 }, { "epoch": 0.8732703310362476, "grad_norm": 6.119093137721071, "learning_rate": 4.1536624531015036e-07, "loss": 17.0376, "step": 47774 }, { "epoch": 0.8732886102326942, "grad_norm": 6.922171263968274, "learning_rate": 4.152481282969606e-07, "loss": 16.9523, "step": 47775 }, { "epoch": 0.8733068894291407, "grad_norm": 5.4098540830024335, "learning_rate": 4.1513002735292907e-07, "loss": 16.9577, "step": 47776 }, { "epoch": 0.8733251686255872, "grad_norm": 4.88166813991357, "learning_rate": 4.150119424784682e-07, "loss": 16.7936, "step": 47777 }, { "epoch": 0.8733434478220338, "grad_norm": 6.805346577100808, "learning_rate": 4.1489387367399327e-07, "loss": 17.697, "step": 47778 }, { "epoch": 0.8733617270184802, "grad_norm": 5.756821314977302, "learning_rate": 4.1477582093991666e-07, "loss": 17.314, "step": 47779 }, { "epoch": 0.8733800062149268, "grad_norm": 4.664737605690688, "learning_rate": 4.146577842766536e-07, "loss": 16.67, "step": 47780 }, { "epoch": 0.8733982854113733, "grad_norm": 5.634198267084554, "learning_rate": 4.145397636846166e-07, "loss": 17.1975, "step": 47781 }, { "epoch": 0.8734165646078198, "grad_norm": 4.529041166854807, "learning_rate": 4.144217591642202e-07, "loss": 16.8362, "step": 47782 }, { "epoch": 0.8734348438042664, "grad_norm": 6.836263618918539, "learning_rate": 4.14303770715877e-07, "loss": 17.426, "step": 47783 }, { "epoch": 0.8734531230007129, "grad_norm": 8.627825241373591, "learning_rate": 4.141857983400022e-07, "loss": 18.7036, "step": 47784 }, { "epoch": 0.8734714021971595, "grad_norm": 7.090354561692479, "learning_rate": 4.140678420370081e-07, "loss": 17.5735, "step": 47785 }, { "epoch": 0.8734896813936059, "grad_norm": 6.728318975130074, "learning_rate": 4.139499018073073e-07, "loss": 17.4924, "step": 47786 }, { "epoch": 0.8735079605900524, "grad_norm": 7.2791777229892345, "learning_rate": 4.1383197765131446e-07, "loss": 17.7973, "step": 47787 }, { "epoch": 0.873526239786499, "grad_norm": 7.4523208825422635, "learning_rate": 4.1371406956944305e-07, "loss": 17.824, "step": 47788 }, { "epoch": 0.8735445189829455, "grad_norm": 5.84901801990141, "learning_rate": 4.1359617756210504e-07, "loss": 17.1644, "step": 47789 }, { "epoch": 0.8735627981793921, "grad_norm": 7.422925494040232, "learning_rate": 4.134783016297156e-07, "loss": 17.4768, "step": 47790 }, { "epoch": 0.8735810773758386, "grad_norm": 5.599330359387488, "learning_rate": 4.1336044177268664e-07, "loss": 17.0198, "step": 47791 }, { "epoch": 0.873599356572285, "grad_norm": 5.7207897640975185, "learning_rate": 4.1324259799143005e-07, "loss": 17.187, "step": 47792 }, { "epoch": 0.8736176357687316, "grad_norm": 7.972773549234345, "learning_rate": 4.131247702863611e-07, "loss": 18.2203, "step": 47793 }, { "epoch": 0.8736359149651781, "grad_norm": 6.697150184110683, "learning_rate": 4.130069586578922e-07, "loss": 17.7692, "step": 47794 }, { "epoch": 0.8736541941616247, "grad_norm": 6.462423805080489, "learning_rate": 4.128891631064347e-07, "loss": 17.4416, "step": 47795 }, { "epoch": 0.8736724733580712, "grad_norm": 6.58896651283359, "learning_rate": 4.1277138363240263e-07, "loss": 17.4623, "step": 47796 }, { "epoch": 0.8736907525545177, "grad_norm": 6.747870490338747, "learning_rate": 4.126536202362097e-07, "loss": 17.058, "step": 47797 }, { "epoch": 0.8737090317509643, "grad_norm": 5.623842536872954, "learning_rate": 4.125358729182671e-07, "loss": 17.2205, "step": 47798 }, { "epoch": 0.8737273109474107, "grad_norm": 9.477087941341274, "learning_rate": 4.124181416789891e-07, "loss": 18.416, "step": 47799 }, { "epoch": 0.8737455901438573, "grad_norm": 8.233133003199585, "learning_rate": 4.123004265187858e-07, "loss": 17.9462, "step": 47800 }, { "epoch": 0.8737638693403038, "grad_norm": 6.222909484422108, "learning_rate": 4.1218272743807297e-07, "loss": 17.1433, "step": 47801 }, { "epoch": 0.8737821485367503, "grad_norm": 6.3214859421241245, "learning_rate": 4.1206504443726147e-07, "loss": 17.209, "step": 47802 }, { "epoch": 0.8738004277331969, "grad_norm": 5.6933957519895175, "learning_rate": 4.1194737751676315e-07, "loss": 17.0617, "step": 47803 }, { "epoch": 0.8738187069296434, "grad_norm": 7.002228472068958, "learning_rate": 4.1182972667699163e-07, "loss": 17.5788, "step": 47804 }, { "epoch": 0.8738369861260898, "grad_norm": 5.22529704211078, "learning_rate": 4.1171209191835814e-07, "loss": 16.9458, "step": 47805 }, { "epoch": 0.8738552653225364, "grad_norm": 5.6037294988822, "learning_rate": 4.1159447324127524e-07, "loss": 16.8272, "step": 47806 }, { "epoch": 0.8738735445189829, "grad_norm": 11.057856051219112, "learning_rate": 4.114768706461569e-07, "loss": 18.2536, "step": 47807 }, { "epoch": 0.8738918237154295, "grad_norm": 6.365460045280623, "learning_rate": 4.113592841334135e-07, "loss": 17.1394, "step": 47808 }, { "epoch": 0.873910102911876, "grad_norm": 5.408721876042356, "learning_rate": 4.1124171370345746e-07, "loss": 17.0655, "step": 47809 }, { "epoch": 0.8739283821083225, "grad_norm": 6.190092538295766, "learning_rate": 4.1112415935670113e-07, "loss": 17.0342, "step": 47810 }, { "epoch": 0.8739466613047691, "grad_norm": 5.704467050304464, "learning_rate": 4.1100662109355706e-07, "loss": 17.3079, "step": 47811 }, { "epoch": 0.8739649405012155, "grad_norm": 5.005145175023999, "learning_rate": 4.108890989144354e-07, "loss": 16.8118, "step": 47812 }, { "epoch": 0.8739832196976621, "grad_norm": 7.163576798610953, "learning_rate": 4.107715928197498e-07, "loss": 17.8119, "step": 47813 }, { "epoch": 0.8740014988941086, "grad_norm": 8.380759571221729, "learning_rate": 4.106541028099109e-07, "loss": 17.9198, "step": 47814 }, { "epoch": 0.8740197780905551, "grad_norm": 6.265991621727879, "learning_rate": 4.105366288853313e-07, "loss": 17.1996, "step": 47815 }, { "epoch": 0.8740380572870017, "grad_norm": 7.187717433572986, "learning_rate": 4.104191710464234e-07, "loss": 17.9162, "step": 47816 }, { "epoch": 0.8740563364834482, "grad_norm": 6.051595850559671, "learning_rate": 4.103017292935968e-07, "loss": 17.3557, "step": 47817 }, { "epoch": 0.8740746156798948, "grad_norm": 6.0841965249157735, "learning_rate": 4.1018430362726567e-07, "loss": 17.2496, "step": 47818 }, { "epoch": 0.8740928948763412, "grad_norm": 5.339829149985285, "learning_rate": 4.100668940478397e-07, "loss": 17.0142, "step": 47819 }, { "epoch": 0.8741111740727877, "grad_norm": 6.124121663282655, "learning_rate": 4.099495005557308e-07, "loss": 17.2667, "step": 47820 }, { "epoch": 0.8741294532692343, "grad_norm": 5.520863656552852, "learning_rate": 4.098321231513513e-07, "loss": 17.2099, "step": 47821 }, { "epoch": 0.8741477324656808, "grad_norm": 6.131580915604188, "learning_rate": 4.097147618351116e-07, "loss": 17.3049, "step": 47822 }, { "epoch": 0.8741660116621274, "grad_norm": 5.663952017979416, "learning_rate": 4.0959741660742237e-07, "loss": 17.3255, "step": 47823 }, { "epoch": 0.8741842908585739, "grad_norm": 4.678573968789232, "learning_rate": 4.0948008746869603e-07, "loss": 16.7546, "step": 47824 }, { "epoch": 0.8742025700550203, "grad_norm": 6.574018786155067, "learning_rate": 4.093627744193446e-07, "loss": 17.3105, "step": 47825 }, { "epoch": 0.8742208492514669, "grad_norm": 6.339810646840026, "learning_rate": 4.0924547745977763e-07, "loss": 17.6863, "step": 47826 }, { "epoch": 0.8742391284479134, "grad_norm": 6.706494518131522, "learning_rate": 4.0912819659040757e-07, "loss": 17.4236, "step": 47827 }, { "epoch": 0.87425740764436, "grad_norm": 5.434411285457652, "learning_rate": 4.090109318116436e-07, "loss": 16.9627, "step": 47828 }, { "epoch": 0.8742756868408065, "grad_norm": 5.495857574051327, "learning_rate": 4.0889368312389934e-07, "loss": 17.1346, "step": 47829 }, { "epoch": 0.874293966037253, "grad_norm": 5.918465610841658, "learning_rate": 4.087764505275843e-07, "loss": 17.1842, "step": 47830 }, { "epoch": 0.8743122452336995, "grad_norm": 6.500897984401601, "learning_rate": 4.0865923402310826e-07, "loss": 17.4628, "step": 47831 }, { "epoch": 0.874330524430146, "grad_norm": 5.748159517115967, "learning_rate": 4.0854203361088365e-07, "loss": 17.1798, "step": 47832 }, { "epoch": 0.8743488036265926, "grad_norm": 6.433552645962309, "learning_rate": 4.0842484929132064e-07, "loss": 17.5271, "step": 47833 }, { "epoch": 0.8743670828230391, "grad_norm": 5.716012657206913, "learning_rate": 4.0830768106482956e-07, "loss": 17.1105, "step": 47834 }, { "epoch": 0.8743853620194856, "grad_norm": 6.319048941726648, "learning_rate": 4.0819052893182276e-07, "loss": 17.5289, "step": 47835 }, { "epoch": 0.8744036412159322, "grad_norm": 4.98697438974838, "learning_rate": 4.0807339289271e-07, "loss": 17.0495, "step": 47836 }, { "epoch": 0.8744219204123787, "grad_norm": 4.371913291638512, "learning_rate": 4.079562729479003e-07, "loss": 16.7429, "step": 47837 }, { "epoch": 0.8744401996088252, "grad_norm": 6.109976280636227, "learning_rate": 4.078391690978062e-07, "loss": 17.5492, "step": 47838 }, { "epoch": 0.8744584788052717, "grad_norm": 5.402683422966858, "learning_rate": 4.077220813428373e-07, "loss": 17.2051, "step": 47839 }, { "epoch": 0.8744767580017182, "grad_norm": 5.55419529978753, "learning_rate": 4.076050096834033e-07, "loss": 17.0348, "step": 47840 }, { "epoch": 0.8744950371981648, "grad_norm": 6.067132573186041, "learning_rate": 4.0748795411991503e-07, "loss": 17.0482, "step": 47841 }, { "epoch": 0.8745133163946113, "grad_norm": 7.892385919440081, "learning_rate": 4.073709146527838e-07, "loss": 16.9901, "step": 47842 }, { "epoch": 0.8745315955910579, "grad_norm": 4.929018408719507, "learning_rate": 4.0725389128241866e-07, "loss": 16.735, "step": 47843 }, { "epoch": 0.8745498747875043, "grad_norm": 5.456550475564857, "learning_rate": 4.0713688400923044e-07, "loss": 17.0042, "step": 47844 }, { "epoch": 0.8745681539839508, "grad_norm": 6.797641081502732, "learning_rate": 4.070198928336283e-07, "loss": 17.5512, "step": 47845 }, { "epoch": 0.8745864331803974, "grad_norm": 5.911845324340681, "learning_rate": 4.069029177560235e-07, "loss": 17.0607, "step": 47846 }, { "epoch": 0.8746047123768439, "grad_norm": 6.612355378778996, "learning_rate": 4.0678595877682525e-07, "loss": 17.3374, "step": 47847 }, { "epoch": 0.8746229915732905, "grad_norm": 4.995393757371608, "learning_rate": 4.0666901589644313e-07, "loss": 17.1682, "step": 47848 }, { "epoch": 0.874641270769737, "grad_norm": 5.182686658866216, "learning_rate": 4.0655208911528855e-07, "loss": 17.1126, "step": 47849 }, { "epoch": 0.8746595499661834, "grad_norm": 5.165354538365904, "learning_rate": 4.064351784337689e-07, "loss": 16.9337, "step": 47850 }, { "epoch": 0.87467782916263, "grad_norm": 6.650041141399835, "learning_rate": 4.0631828385229553e-07, "loss": 17.3496, "step": 47851 }, { "epoch": 0.8746961083590765, "grad_norm": 7.258169327643997, "learning_rate": 4.062014053712793e-07, "loss": 17.5944, "step": 47852 }, { "epoch": 0.8747143875555231, "grad_norm": 7.357023798901476, "learning_rate": 4.060845429911281e-07, "loss": 17.2297, "step": 47853 }, { "epoch": 0.8747326667519696, "grad_norm": 6.403009955583419, "learning_rate": 4.059676967122511e-07, "loss": 17.7696, "step": 47854 }, { "epoch": 0.8747509459484161, "grad_norm": 6.958954271417377, "learning_rate": 4.0585086653505913e-07, "loss": 17.2583, "step": 47855 }, { "epoch": 0.8747692251448627, "grad_norm": 5.5421774390189436, "learning_rate": 4.0573405245996176e-07, "loss": 17.1197, "step": 47856 }, { "epoch": 0.8747875043413091, "grad_norm": 6.3736935020543495, "learning_rate": 4.0561725448736655e-07, "loss": 17.3957, "step": 47857 }, { "epoch": 0.8748057835377557, "grad_norm": 5.290908403945523, "learning_rate": 4.055004726176853e-07, "loss": 17.0066, "step": 47858 }, { "epoch": 0.8748240627342022, "grad_norm": 6.175104777604914, "learning_rate": 4.0538370685132554e-07, "loss": 17.3273, "step": 47859 }, { "epoch": 0.8748423419306487, "grad_norm": 6.723290736536408, "learning_rate": 4.052669571886969e-07, "loss": 17.6104, "step": 47860 }, { "epoch": 0.8748606211270953, "grad_norm": 5.402235255989518, "learning_rate": 4.0515022363021015e-07, "loss": 16.9335, "step": 47861 }, { "epoch": 0.8748789003235418, "grad_norm": 9.193602405215394, "learning_rate": 4.0503350617627167e-07, "loss": 18.4227, "step": 47862 }, { "epoch": 0.8748971795199884, "grad_norm": 6.200613071440265, "learning_rate": 4.049168048272933e-07, "loss": 17.3638, "step": 47863 }, { "epoch": 0.8749154587164348, "grad_norm": 6.3526158825978385, "learning_rate": 4.048001195836826e-07, "loss": 17.2502, "step": 47864 }, { "epoch": 0.8749337379128813, "grad_norm": 5.701991241906437, "learning_rate": 4.04683450445848e-07, "loss": 17.0415, "step": 47865 }, { "epoch": 0.8749520171093279, "grad_norm": 6.884381203640804, "learning_rate": 4.045667974141998e-07, "loss": 17.323, "step": 47866 }, { "epoch": 0.8749702963057744, "grad_norm": 7.183314140168048, "learning_rate": 4.0445016048914654e-07, "loss": 17.9007, "step": 47867 }, { "epoch": 0.874988575502221, "grad_norm": 5.574961702474009, "learning_rate": 4.043335396710957e-07, "loss": 16.9691, "step": 47868 }, { "epoch": 0.8750068546986675, "grad_norm": 6.007919596907283, "learning_rate": 4.042169349604569e-07, "loss": 17.1813, "step": 47869 }, { "epoch": 0.8750251338951139, "grad_norm": 5.917103852181085, "learning_rate": 4.041003463576398e-07, "loss": 17.3769, "step": 47870 }, { "epoch": 0.8750434130915605, "grad_norm": 6.023710860649235, "learning_rate": 4.039837738630514e-07, "loss": 17.1171, "step": 47871 }, { "epoch": 0.875061692288007, "grad_norm": 5.212858144905693, "learning_rate": 4.038672174771019e-07, "loss": 17.0415, "step": 47872 }, { "epoch": 0.8750799714844535, "grad_norm": 4.5410798802894545, "learning_rate": 4.037506772001987e-07, "loss": 16.9616, "step": 47873 }, { "epoch": 0.8750982506809001, "grad_norm": 6.784845338512484, "learning_rate": 4.036341530327498e-07, "loss": 17.4082, "step": 47874 }, { "epoch": 0.8751165298773466, "grad_norm": 5.2933442674456055, "learning_rate": 4.0351764497516545e-07, "loss": 16.8025, "step": 47875 }, { "epoch": 0.8751348090737932, "grad_norm": 5.758719524169023, "learning_rate": 4.03401153027852e-07, "loss": 17.1699, "step": 47876 }, { "epoch": 0.8751530882702396, "grad_norm": 6.63022555426752, "learning_rate": 4.032846771912191e-07, "loss": 17.2971, "step": 47877 }, { "epoch": 0.8751713674666861, "grad_norm": 6.518902368731087, "learning_rate": 4.031682174656737e-07, "loss": 17.3534, "step": 47878 }, { "epoch": 0.8751896466631327, "grad_norm": 5.8111554460589385, "learning_rate": 4.030517738516254e-07, "loss": 17.1785, "step": 47879 }, { "epoch": 0.8752079258595792, "grad_norm": 9.089443547741867, "learning_rate": 4.029353463494823e-07, "loss": 18.4219, "step": 47880 }, { "epoch": 0.8752262050560258, "grad_norm": 5.353138671343205, "learning_rate": 4.0281893495965175e-07, "loss": 17.0311, "step": 47881 }, { "epoch": 0.8752444842524723, "grad_norm": 6.813131623019263, "learning_rate": 4.027025396825407e-07, "loss": 17.5217, "step": 47882 }, { "epoch": 0.8752627634489187, "grad_norm": 6.167241855968303, "learning_rate": 4.025861605185599e-07, "loss": 17.1754, "step": 47883 }, { "epoch": 0.8752810426453653, "grad_norm": 6.430092929727067, "learning_rate": 4.0246979746811523e-07, "loss": 17.2638, "step": 47884 }, { "epoch": 0.8752993218418118, "grad_norm": 6.565423915109771, "learning_rate": 4.02353450531614e-07, "loss": 17.4683, "step": 47885 }, { "epoch": 0.8753176010382584, "grad_norm": 6.995880218209743, "learning_rate": 4.02237119709466e-07, "loss": 17.7005, "step": 47886 }, { "epoch": 0.8753358802347049, "grad_norm": 5.874265389564046, "learning_rate": 4.021208050020775e-07, "loss": 17.0012, "step": 47887 }, { "epoch": 0.8753541594311514, "grad_norm": 6.501457453449809, "learning_rate": 4.02004506409856e-07, "loss": 17.3073, "step": 47888 }, { "epoch": 0.875372438627598, "grad_norm": 5.938974104356835, "learning_rate": 4.018882239332106e-07, "loss": 17.4176, "step": 47889 }, { "epoch": 0.8753907178240444, "grad_norm": 6.070322968332683, "learning_rate": 4.017719575725476e-07, "loss": 17.1285, "step": 47890 }, { "epoch": 0.875408997020491, "grad_norm": 5.5078747789416225, "learning_rate": 4.016557073282751e-07, "loss": 17.0173, "step": 47891 }, { "epoch": 0.8754272762169375, "grad_norm": 4.9732865213844235, "learning_rate": 4.01539473200801e-07, "loss": 16.8339, "step": 47892 }, { "epoch": 0.875445555413384, "grad_norm": 6.791424588137645, "learning_rate": 4.014232551905306e-07, "loss": 17.4187, "step": 47893 }, { "epoch": 0.8754638346098306, "grad_norm": 6.551635535310099, "learning_rate": 4.013070532978736e-07, "loss": 17.1699, "step": 47894 }, { "epoch": 0.875482113806277, "grad_norm": 5.164275266821645, "learning_rate": 4.011908675232362e-07, "loss": 16.9468, "step": 47895 }, { "epoch": 0.8755003930027236, "grad_norm": 5.697666392121509, "learning_rate": 4.010746978670255e-07, "loss": 17.0853, "step": 47896 }, { "epoch": 0.8755186721991701, "grad_norm": 5.661371383502658, "learning_rate": 4.009585443296482e-07, "loss": 17.0922, "step": 47897 }, { "epoch": 0.8755369513956166, "grad_norm": 5.032090551484041, "learning_rate": 4.0084240691151357e-07, "loss": 17.0487, "step": 47898 }, { "epoch": 0.8755552305920632, "grad_norm": 6.282460577501798, "learning_rate": 4.0072628561302563e-07, "loss": 17.5363, "step": 47899 }, { "epoch": 0.8755735097885097, "grad_norm": 7.26617101352476, "learning_rate": 4.006101804345941e-07, "loss": 17.4499, "step": 47900 }, { "epoch": 0.8755917889849563, "grad_norm": 6.955876877785109, "learning_rate": 4.0049409137662534e-07, "loss": 17.5382, "step": 47901 }, { "epoch": 0.8756100681814027, "grad_norm": 4.779786642619299, "learning_rate": 4.0037801843952397e-07, "loss": 16.839, "step": 47902 }, { "epoch": 0.8756283473778492, "grad_norm": 4.798241924828031, "learning_rate": 4.0026196162370025e-07, "loss": 16.7415, "step": 47903 }, { "epoch": 0.8756466265742958, "grad_norm": 6.683771443279209, "learning_rate": 4.0014592092955774e-07, "loss": 17.6636, "step": 47904 }, { "epoch": 0.8756649057707423, "grad_norm": 5.889316975584809, "learning_rate": 4.0002989635750556e-07, "loss": 17.1489, "step": 47905 }, { "epoch": 0.8756831849671889, "grad_norm": 5.817645304895511, "learning_rate": 3.999138879079484e-07, "loss": 17.1932, "step": 47906 }, { "epoch": 0.8757014641636354, "grad_norm": 8.404996239943138, "learning_rate": 3.997978955812942e-07, "loss": 17.703, "step": 47907 }, { "epoch": 0.8757197433600818, "grad_norm": 6.713499892195158, "learning_rate": 3.9968191937795e-07, "loss": 17.5639, "step": 47908 }, { "epoch": 0.8757380225565284, "grad_norm": 4.859007430927628, "learning_rate": 3.9956595929832143e-07, "loss": 16.9448, "step": 47909 }, { "epoch": 0.8757563017529749, "grad_norm": 5.9955064865621175, "learning_rate": 3.994500153428138e-07, "loss": 17.4085, "step": 47910 }, { "epoch": 0.8757745809494215, "grad_norm": 7.395840193165087, "learning_rate": 3.9933408751183566e-07, "loss": 17.6669, "step": 47911 }, { "epoch": 0.875792860145868, "grad_norm": 6.4285811842455445, "learning_rate": 3.992181758057928e-07, "loss": 17.3115, "step": 47912 }, { "epoch": 0.8758111393423145, "grad_norm": 6.437380657696544, "learning_rate": 3.991022802250899e-07, "loss": 17.569, "step": 47913 }, { "epoch": 0.8758294185387611, "grad_norm": 6.623408731799499, "learning_rate": 3.9898640077013386e-07, "loss": 17.5015, "step": 47914 }, { "epoch": 0.8758476977352075, "grad_norm": 7.267789109718845, "learning_rate": 3.9887053744133266e-07, "loss": 17.6994, "step": 47915 }, { "epoch": 0.8758659769316541, "grad_norm": 6.741114171052834, "learning_rate": 3.987546902390893e-07, "loss": 17.375, "step": 47916 }, { "epoch": 0.8758842561281006, "grad_norm": 7.155647093096662, "learning_rate": 3.9863885916381296e-07, "loss": 17.6416, "step": 47917 }, { "epoch": 0.8759025353245471, "grad_norm": 6.3247080269120834, "learning_rate": 3.9852304421590825e-07, "loss": 17.3035, "step": 47918 }, { "epoch": 0.8759208145209937, "grad_norm": 6.052448689163665, "learning_rate": 3.984072453957799e-07, "loss": 17.4525, "step": 47919 }, { "epoch": 0.8759390937174402, "grad_norm": 6.249590632771711, "learning_rate": 3.982914627038353e-07, "loss": 17.3601, "step": 47920 }, { "epoch": 0.8759573729138868, "grad_norm": 10.608073722159341, "learning_rate": 3.981756961404798e-07, "loss": 18.1935, "step": 47921 }, { "epoch": 0.8759756521103332, "grad_norm": 8.878128760625643, "learning_rate": 3.980599457061196e-07, "loss": 17.8849, "step": 47922 }, { "epoch": 0.8759939313067797, "grad_norm": 6.69700752578946, "learning_rate": 3.979442114011589e-07, "loss": 17.2958, "step": 47923 }, { "epoch": 0.8760122105032263, "grad_norm": 5.9869121839271076, "learning_rate": 3.978284932260046e-07, "loss": 17.1151, "step": 47924 }, { "epoch": 0.8760304896996728, "grad_norm": 6.693049848092069, "learning_rate": 3.9771279118106244e-07, "loss": 17.2498, "step": 47925 }, { "epoch": 0.8760487688961194, "grad_norm": 8.132299539993962, "learning_rate": 3.9759710526673824e-07, "loss": 18.012, "step": 47926 }, { "epoch": 0.8760670480925659, "grad_norm": 6.096717558160557, "learning_rate": 3.9748143548343553e-07, "loss": 17.218, "step": 47927 }, { "epoch": 0.8760853272890123, "grad_norm": 5.606353134616341, "learning_rate": 3.973657818315624e-07, "loss": 17.2138, "step": 47928 }, { "epoch": 0.8761036064854589, "grad_norm": 6.3406316230458435, "learning_rate": 3.972501443115223e-07, "loss": 17.4134, "step": 47929 }, { "epoch": 0.8761218856819054, "grad_norm": 6.657774954777988, "learning_rate": 3.9713452292372003e-07, "loss": 17.5829, "step": 47930 }, { "epoch": 0.876140164878352, "grad_norm": 5.972117281550948, "learning_rate": 3.970189176685635e-07, "loss": 17.1472, "step": 47931 }, { "epoch": 0.8761584440747985, "grad_norm": 7.394329176465875, "learning_rate": 3.9690332854645464e-07, "loss": 17.6625, "step": 47932 }, { "epoch": 0.876176723271245, "grad_norm": 5.552252959286116, "learning_rate": 3.9678775555780037e-07, "loss": 16.8498, "step": 47933 }, { "epoch": 0.8761950024676916, "grad_norm": 4.6896489361581315, "learning_rate": 3.9667219870300643e-07, "loss": 16.7651, "step": 47934 }, { "epoch": 0.876213281664138, "grad_norm": 6.235180457808831, "learning_rate": 3.96556657982477e-07, "loss": 17.1731, "step": 47935 }, { "epoch": 0.8762315608605846, "grad_norm": 8.51242894986989, "learning_rate": 3.9644113339661614e-07, "loss": 18.2205, "step": 47936 }, { "epoch": 0.8762498400570311, "grad_norm": 6.421331495815501, "learning_rate": 3.9632562494583083e-07, "loss": 17.4702, "step": 47937 }, { "epoch": 0.8762681192534776, "grad_norm": 5.859556234126456, "learning_rate": 3.962101326305234e-07, "loss": 17.2939, "step": 47938 }, { "epoch": 0.8762863984499242, "grad_norm": 6.135082021174332, "learning_rate": 3.960946564511009e-07, "loss": 17.2024, "step": 47939 }, { "epoch": 0.8763046776463707, "grad_norm": 6.838801458281422, "learning_rate": 3.9597919640796736e-07, "loss": 17.4931, "step": 47940 }, { "epoch": 0.8763229568428171, "grad_norm": 5.081893058335542, "learning_rate": 3.958637525015258e-07, "loss": 16.9762, "step": 47941 }, { "epoch": 0.8763412360392637, "grad_norm": 6.2923320696935665, "learning_rate": 3.957483247321825e-07, "loss": 17.252, "step": 47942 }, { "epoch": 0.8763595152357102, "grad_norm": 6.197395024110918, "learning_rate": 3.956329131003428e-07, "loss": 17.028, "step": 47943 }, { "epoch": 0.8763777944321568, "grad_norm": 7.850646586388335, "learning_rate": 3.9551751760640966e-07, "loss": 17.7776, "step": 47944 }, { "epoch": 0.8763960736286033, "grad_norm": 18.045485185899448, "learning_rate": 3.9540213825078833e-07, "loss": 18.2475, "step": 47945 }, { "epoch": 0.8764143528250498, "grad_norm": 9.403792664503772, "learning_rate": 3.952867750338835e-07, "loss": 17.7332, "step": 47946 }, { "epoch": 0.8764326320214963, "grad_norm": 5.9271304032508905, "learning_rate": 3.9517142795609754e-07, "loss": 17.1544, "step": 47947 }, { "epoch": 0.8764509112179428, "grad_norm": 5.891402323184153, "learning_rate": 3.950560970178374e-07, "loss": 16.9981, "step": 47948 }, { "epoch": 0.8764691904143894, "grad_norm": 6.2415320789190405, "learning_rate": 3.949407822195056e-07, "loss": 17.5292, "step": 47949 }, { "epoch": 0.8764874696108359, "grad_norm": 6.557722896159241, "learning_rate": 3.948254835615062e-07, "loss": 17.3538, "step": 47950 }, { "epoch": 0.8765057488072824, "grad_norm": 6.207307229121512, "learning_rate": 3.947102010442438e-07, "loss": 17.1374, "step": 47951 }, { "epoch": 0.876524028003729, "grad_norm": 5.660890969843175, "learning_rate": 3.945949346681227e-07, "loss": 17.1307, "step": 47952 }, { "epoch": 0.8765423072001755, "grad_norm": 6.267197515855154, "learning_rate": 3.9447968443354745e-07, "loss": 17.6684, "step": 47953 }, { "epoch": 0.876560586396622, "grad_norm": 4.977515928659413, "learning_rate": 3.9436445034092106e-07, "loss": 16.7996, "step": 47954 }, { "epoch": 0.8765788655930685, "grad_norm": 6.525935928676057, "learning_rate": 3.942492323906466e-07, "loss": 17.6608, "step": 47955 }, { "epoch": 0.876597144789515, "grad_norm": 5.950994366007358, "learning_rate": 3.9413403058313036e-07, "loss": 16.9964, "step": 47956 }, { "epoch": 0.8766154239859616, "grad_norm": 6.891543450198594, "learning_rate": 3.940188449187743e-07, "loss": 17.5597, "step": 47957 }, { "epoch": 0.8766337031824081, "grad_norm": 5.88830138203126, "learning_rate": 3.939036753979819e-07, "loss": 17.2828, "step": 47958 }, { "epoch": 0.8766519823788547, "grad_norm": 5.981824262925089, "learning_rate": 3.937885220211579e-07, "loss": 17.4841, "step": 47959 }, { "epoch": 0.8766702615753011, "grad_norm": 6.9415998605129445, "learning_rate": 3.936733847887048e-07, "loss": 17.6263, "step": 47960 }, { "epoch": 0.8766885407717476, "grad_norm": 5.88232551997907, "learning_rate": 3.935582637010266e-07, "loss": 17.1648, "step": 47961 }, { "epoch": 0.8767068199681942, "grad_norm": 4.893008358977822, "learning_rate": 3.93443158758528e-07, "loss": 16.8741, "step": 47962 }, { "epoch": 0.8767250991646407, "grad_norm": 6.5374279883610145, "learning_rate": 3.9332806996161154e-07, "loss": 17.557, "step": 47963 }, { "epoch": 0.8767433783610873, "grad_norm": 5.496709734756734, "learning_rate": 3.9321299731067906e-07, "loss": 16.9151, "step": 47964 }, { "epoch": 0.8767616575575338, "grad_norm": 7.225153828783537, "learning_rate": 3.930979408061364e-07, "loss": 18.0056, "step": 47965 }, { "epoch": 0.8767799367539802, "grad_norm": 5.052227715973484, "learning_rate": 3.9298290044838594e-07, "loss": 16.9712, "step": 47966 }, { "epoch": 0.8767982159504268, "grad_norm": 6.5952046063813565, "learning_rate": 3.928678762378296e-07, "loss": 17.8767, "step": 47967 }, { "epoch": 0.8768164951468733, "grad_norm": 5.254882474065372, "learning_rate": 3.927528681748716e-07, "loss": 17.01, "step": 47968 }, { "epoch": 0.8768347743433199, "grad_norm": 6.082416092383172, "learning_rate": 3.926378762599153e-07, "loss": 17.2379, "step": 47969 }, { "epoch": 0.8768530535397664, "grad_norm": 4.921474876779037, "learning_rate": 3.925229004933634e-07, "loss": 16.9275, "step": 47970 }, { "epoch": 0.8768713327362129, "grad_norm": 5.77861533244735, "learning_rate": 3.924079408756193e-07, "loss": 17.2571, "step": 47971 }, { "epoch": 0.8768896119326595, "grad_norm": 7.433800169109578, "learning_rate": 3.9229299740708493e-07, "loss": 17.6351, "step": 47972 }, { "epoch": 0.8769078911291059, "grad_norm": 6.0553021057093375, "learning_rate": 3.921780700881644e-07, "loss": 17.6755, "step": 47973 }, { "epoch": 0.8769261703255525, "grad_norm": 6.648256695613434, "learning_rate": 3.920631589192603e-07, "loss": 17.7417, "step": 47974 }, { "epoch": 0.876944449521999, "grad_norm": 6.352805355156446, "learning_rate": 3.919482639007738e-07, "loss": 17.5303, "step": 47975 }, { "epoch": 0.8769627287184455, "grad_norm": 5.695461100533514, "learning_rate": 3.9183338503310965e-07, "loss": 16.9512, "step": 47976 }, { "epoch": 0.8769810079148921, "grad_norm": 5.4952910256920235, "learning_rate": 3.917185223166686e-07, "loss": 16.9382, "step": 47977 }, { "epoch": 0.8769992871113386, "grad_norm": 6.877325843566615, "learning_rate": 3.916036757518543e-07, "loss": 17.6327, "step": 47978 }, { "epoch": 0.8770175663077852, "grad_norm": 6.46552962379414, "learning_rate": 3.9148884533907027e-07, "loss": 17.4936, "step": 47979 }, { "epoch": 0.8770358455042316, "grad_norm": 4.574205108643897, "learning_rate": 3.9137403107871784e-07, "loss": 16.9297, "step": 47980 }, { "epoch": 0.8770541247006781, "grad_norm": 7.28895179321988, "learning_rate": 3.912592329711984e-07, "loss": 17.7619, "step": 47981 }, { "epoch": 0.8770724038971247, "grad_norm": 6.901721685528703, "learning_rate": 3.9114445101691656e-07, "loss": 17.6101, "step": 47982 }, { "epoch": 0.8770906830935712, "grad_norm": 7.091042175318048, "learning_rate": 3.910296852162726e-07, "loss": 18.0953, "step": 47983 }, { "epoch": 0.8771089622900178, "grad_norm": 6.823472111807261, "learning_rate": 3.9091493556967064e-07, "loss": 17.7047, "step": 47984 }, { "epoch": 0.8771272414864643, "grad_norm": 5.747011335052741, "learning_rate": 3.908002020775115e-07, "loss": 17.1831, "step": 47985 }, { "epoch": 0.8771455206829107, "grad_norm": 4.272240355757263, "learning_rate": 3.90685484740197e-07, "loss": 16.6497, "step": 47986 }, { "epoch": 0.8771637998793573, "grad_norm": 6.673902969099807, "learning_rate": 3.9057078355813014e-07, "loss": 17.6627, "step": 47987 }, { "epoch": 0.8771820790758038, "grad_norm": 5.189385732705821, "learning_rate": 3.9045609853171297e-07, "loss": 17.046, "step": 47988 }, { "epoch": 0.8772003582722504, "grad_norm": 5.406408913117497, "learning_rate": 3.903414296613467e-07, "loss": 17.0696, "step": 47989 }, { "epoch": 0.8772186374686969, "grad_norm": 6.0426306435683355, "learning_rate": 3.902267769474349e-07, "loss": 17.2812, "step": 47990 }, { "epoch": 0.8772369166651434, "grad_norm": 5.267812092975678, "learning_rate": 3.901121403903779e-07, "loss": 17.1362, "step": 47991 }, { "epoch": 0.87725519586159, "grad_norm": 6.187266408428418, "learning_rate": 3.89997519990577e-07, "loss": 17.4288, "step": 47992 }, { "epoch": 0.8772734750580364, "grad_norm": 7.137525240912641, "learning_rate": 3.8988291574843517e-07, "loss": 17.7645, "step": 47993 }, { "epoch": 0.877291754254483, "grad_norm": 5.234596288180156, "learning_rate": 3.8976832766435435e-07, "loss": 17.1487, "step": 47994 }, { "epoch": 0.8773100334509295, "grad_norm": 6.131263341991007, "learning_rate": 3.896537557387342e-07, "loss": 17.3035, "step": 47995 }, { "epoch": 0.877328312647376, "grad_norm": 7.373532101202367, "learning_rate": 3.8953919997197776e-07, "loss": 17.6296, "step": 47996 }, { "epoch": 0.8773465918438226, "grad_norm": 5.860909548341488, "learning_rate": 3.8942466036448634e-07, "loss": 17.2445, "step": 47997 }, { "epoch": 0.8773648710402691, "grad_norm": 4.761377537739163, "learning_rate": 3.8931013691666186e-07, "loss": 16.7258, "step": 47998 }, { "epoch": 0.8773831502367156, "grad_norm": 7.851451911044515, "learning_rate": 3.8919562962890565e-07, "loss": 17.2635, "step": 47999 }, { "epoch": 0.8774014294331621, "grad_norm": 5.383093670108628, "learning_rate": 3.89081138501618e-07, "loss": 17.2567, "step": 48000 }, { "epoch": 0.8774197086296086, "grad_norm": 6.436828875923494, "learning_rate": 3.889666635352013e-07, "loss": 17.575, "step": 48001 }, { "epoch": 0.8774379878260552, "grad_norm": 6.255008737236952, "learning_rate": 3.8885220473005637e-07, "loss": 17.1228, "step": 48002 }, { "epoch": 0.8774562670225017, "grad_norm": 6.230767679789119, "learning_rate": 3.88737762086584e-07, "loss": 17.2641, "step": 48003 }, { "epoch": 0.8774745462189483, "grad_norm": 5.743669974476828, "learning_rate": 3.8862333560518553e-07, "loss": 17.1332, "step": 48004 }, { "epoch": 0.8774928254153948, "grad_norm": 6.000018282983704, "learning_rate": 3.885089252862623e-07, "loss": 17.3103, "step": 48005 }, { "epoch": 0.8775111046118412, "grad_norm": 8.789671864269486, "learning_rate": 3.883945311302145e-07, "loss": 18.9651, "step": 48006 }, { "epoch": 0.8775293838082878, "grad_norm": 6.681114201361789, "learning_rate": 3.882801531374447e-07, "loss": 17.5445, "step": 48007 }, { "epoch": 0.8775476630047343, "grad_norm": 6.697064819401038, "learning_rate": 3.8816579130835254e-07, "loss": 17.3236, "step": 48008 }, { "epoch": 0.8775659422011808, "grad_norm": 7.833362284968332, "learning_rate": 3.880514456433382e-07, "loss": 18.5826, "step": 48009 }, { "epoch": 0.8775842213976274, "grad_norm": 5.336758212762168, "learning_rate": 3.879371161428047e-07, "loss": 16.9095, "step": 48010 }, { "epoch": 0.8776025005940739, "grad_norm": 6.447230161470789, "learning_rate": 3.8782280280715065e-07, "loss": 17.377, "step": 48011 }, { "epoch": 0.8776207797905204, "grad_norm": 9.47979668142011, "learning_rate": 3.877085056367774e-07, "loss": 18.2379, "step": 48012 }, { "epoch": 0.8776390589869669, "grad_norm": 6.033725032607029, "learning_rate": 3.8759422463208575e-07, "loss": 17.1395, "step": 48013 }, { "epoch": 0.8776573381834134, "grad_norm": 4.634980810637408, "learning_rate": 3.8747995979347583e-07, "loss": 16.7639, "step": 48014 }, { "epoch": 0.87767561737986, "grad_norm": 6.055914435548879, "learning_rate": 3.87365711121348e-07, "loss": 17.2657, "step": 48015 }, { "epoch": 0.8776938965763065, "grad_norm": 6.7516567536420595, "learning_rate": 3.872514786161041e-07, "loss": 17.3022, "step": 48016 }, { "epoch": 0.8777121757727531, "grad_norm": 7.2449699680659965, "learning_rate": 3.8713726227814273e-07, "loss": 17.6352, "step": 48017 }, { "epoch": 0.8777304549691995, "grad_norm": 6.902697806418614, "learning_rate": 3.8702306210786575e-07, "loss": 17.3609, "step": 48018 }, { "epoch": 0.877748734165646, "grad_norm": 7.622194492983825, "learning_rate": 3.8690887810567236e-07, "loss": 17.2845, "step": 48019 }, { "epoch": 0.8777670133620926, "grad_norm": 7.2234215133397575, "learning_rate": 3.867947102719627e-07, "loss": 17.6403, "step": 48020 }, { "epoch": 0.8777852925585391, "grad_norm": 5.814764373609633, "learning_rate": 3.866805586071376e-07, "loss": 17.2593, "step": 48021 }, { "epoch": 0.8778035717549857, "grad_norm": 6.535236651119669, "learning_rate": 3.8656642311159733e-07, "loss": 17.1665, "step": 48022 }, { "epoch": 0.8778218509514322, "grad_norm": 6.265260092109108, "learning_rate": 3.864523037857404e-07, "loss": 17.4374, "step": 48023 }, { "epoch": 0.8778401301478787, "grad_norm": 6.255743820092084, "learning_rate": 3.863382006299676e-07, "loss": 17.3324, "step": 48024 }, { "epoch": 0.8778584093443252, "grad_norm": 6.800566520351758, "learning_rate": 3.862241136446798e-07, "loss": 17.4651, "step": 48025 }, { "epoch": 0.8778766885407717, "grad_norm": 6.366276048392612, "learning_rate": 3.861100428302755e-07, "loss": 17.2842, "step": 48026 }, { "epoch": 0.8778949677372183, "grad_norm": 5.193876702599773, "learning_rate": 3.859959881871561e-07, "loss": 17.1241, "step": 48027 }, { "epoch": 0.8779132469336648, "grad_norm": 5.241909739502713, "learning_rate": 3.858819497157196e-07, "loss": 16.9911, "step": 48028 }, { "epoch": 0.8779315261301113, "grad_norm": 7.79158074669503, "learning_rate": 3.857679274163667e-07, "loss": 17.9998, "step": 48029 }, { "epoch": 0.8779498053265579, "grad_norm": 6.769555752758766, "learning_rate": 3.8565392128949717e-07, "loss": 17.68, "step": 48030 }, { "epoch": 0.8779680845230043, "grad_norm": 6.735969126883386, "learning_rate": 3.855399313355096e-07, "loss": 17.2735, "step": 48031 }, { "epoch": 0.8779863637194509, "grad_norm": 5.088565748992564, "learning_rate": 3.854259575548047e-07, "loss": 16.81, "step": 48032 }, { "epoch": 0.8780046429158974, "grad_norm": 6.911309300996425, "learning_rate": 3.853119999477806e-07, "loss": 17.4242, "step": 48033 }, { "epoch": 0.8780229221123439, "grad_norm": 6.537581649377496, "learning_rate": 3.8519805851483793e-07, "loss": 17.2426, "step": 48034 }, { "epoch": 0.8780412013087905, "grad_norm": 5.965487691606435, "learning_rate": 3.850841332563759e-07, "loss": 17.3885, "step": 48035 }, { "epoch": 0.878059480505237, "grad_norm": 6.333902618317955, "learning_rate": 3.849702241727937e-07, "loss": 17.3975, "step": 48036 }, { "epoch": 0.8780777597016836, "grad_norm": 5.930381163499507, "learning_rate": 3.8485633126448927e-07, "loss": 17.1677, "step": 48037 }, { "epoch": 0.87809603889813, "grad_norm": 5.719235727531391, "learning_rate": 3.8474245453186453e-07, "loss": 17.3167, "step": 48038 }, { "epoch": 0.8781143180945765, "grad_norm": 4.845208537200841, "learning_rate": 3.8462859397531635e-07, "loss": 16.9827, "step": 48039 }, { "epoch": 0.8781325972910231, "grad_norm": 5.515694321804761, "learning_rate": 3.8451474959524395e-07, "loss": 17.1203, "step": 48040 }, { "epoch": 0.8781508764874696, "grad_norm": 7.651601796470401, "learning_rate": 3.844009213920463e-07, "loss": 17.8065, "step": 48041 }, { "epoch": 0.8781691556839162, "grad_norm": 5.655081684437698, "learning_rate": 3.8428710936612433e-07, "loss": 17.4158, "step": 48042 }, { "epoch": 0.8781874348803627, "grad_norm": 6.769846825617437, "learning_rate": 3.8417331351787433e-07, "loss": 17.6097, "step": 48043 }, { "epoch": 0.8782057140768091, "grad_norm": 7.186569196338976, "learning_rate": 3.8405953384769714e-07, "loss": 17.6065, "step": 48044 }, { "epoch": 0.8782239932732557, "grad_norm": 6.280587956195033, "learning_rate": 3.839457703559901e-07, "loss": 17.3225, "step": 48045 }, { "epoch": 0.8782422724697022, "grad_norm": 7.705541533635058, "learning_rate": 3.8383202304315304e-07, "loss": 17.9539, "step": 48046 }, { "epoch": 0.8782605516661488, "grad_norm": 5.615325003855067, "learning_rate": 3.837182919095844e-07, "loss": 17.211, "step": 48047 }, { "epoch": 0.8782788308625953, "grad_norm": 7.56029257979595, "learning_rate": 3.8360457695568175e-07, "loss": 17.8251, "step": 48048 }, { "epoch": 0.8782971100590418, "grad_norm": 7.079650448947452, "learning_rate": 3.834908781818453e-07, "loss": 17.6798, "step": 48049 }, { "epoch": 0.8783153892554884, "grad_norm": 5.93946578503415, "learning_rate": 3.833771955884719e-07, "loss": 17.3363, "step": 48050 }, { "epoch": 0.8783336684519348, "grad_norm": 5.090604058190338, "learning_rate": 3.832635291759612e-07, "loss": 16.8382, "step": 48051 }, { "epoch": 0.8783519476483814, "grad_norm": 6.349642592062997, "learning_rate": 3.831498789447113e-07, "loss": 17.2502, "step": 48052 }, { "epoch": 0.8783702268448279, "grad_norm": 5.7726069183138895, "learning_rate": 3.830362448951208e-07, "loss": 17.0001, "step": 48053 }, { "epoch": 0.8783885060412744, "grad_norm": 4.386286283038165, "learning_rate": 3.829226270275871e-07, "loss": 16.9039, "step": 48054 }, { "epoch": 0.878406785237721, "grad_norm": 6.252628062177124, "learning_rate": 3.828090253425093e-07, "loss": 17.4742, "step": 48055 }, { "epoch": 0.8784250644341675, "grad_norm": 5.454575029305684, "learning_rate": 3.8269543984028546e-07, "loss": 17.1997, "step": 48056 }, { "epoch": 0.878443343630614, "grad_norm": 6.026641485377228, "learning_rate": 3.8258187052131245e-07, "loss": 17.2093, "step": 48057 }, { "epoch": 0.8784616228270605, "grad_norm": 6.30918678228027, "learning_rate": 3.8246831738599e-07, "loss": 17.7263, "step": 48058 }, { "epoch": 0.878479902023507, "grad_norm": 7.551462954883527, "learning_rate": 3.8235478043471494e-07, "loss": 18.1306, "step": 48059 }, { "epoch": 0.8784981812199536, "grad_norm": 5.7955234860203495, "learning_rate": 3.822412596678854e-07, "loss": 17.3038, "step": 48060 }, { "epoch": 0.8785164604164001, "grad_norm": 9.72786862478858, "learning_rate": 3.8212775508590037e-07, "loss": 19.0125, "step": 48061 }, { "epoch": 0.8785347396128467, "grad_norm": 4.772568615081075, "learning_rate": 3.820142666891563e-07, "loss": 16.6412, "step": 48062 }, { "epoch": 0.8785530188092932, "grad_norm": 6.711669451397678, "learning_rate": 3.819007944780523e-07, "loss": 17.6585, "step": 48063 }, { "epoch": 0.8785712980057396, "grad_norm": 5.422672241576084, "learning_rate": 3.8178733845298466e-07, "loss": 17.121, "step": 48064 }, { "epoch": 0.8785895772021862, "grad_norm": 5.694520834955958, "learning_rate": 3.8167389861435147e-07, "loss": 17.3533, "step": 48065 }, { "epoch": 0.8786078563986327, "grad_norm": 6.801099741558734, "learning_rate": 3.815604749625512e-07, "loss": 17.42, "step": 48066 }, { "epoch": 0.8786261355950793, "grad_norm": 5.840484314836154, "learning_rate": 3.814470674979803e-07, "loss": 17.1947, "step": 48067 }, { "epoch": 0.8786444147915258, "grad_norm": 5.832717837410309, "learning_rate": 3.8133367622103625e-07, "loss": 17.1856, "step": 48068 }, { "epoch": 0.8786626939879723, "grad_norm": 6.408437888092154, "learning_rate": 3.812203011321169e-07, "loss": 17.3743, "step": 48069 }, { "epoch": 0.8786809731844188, "grad_norm": 7.905508102073957, "learning_rate": 3.811069422316205e-07, "loss": 17.8493, "step": 48070 }, { "epoch": 0.8786992523808653, "grad_norm": 7.456909318433469, "learning_rate": 3.809935995199421e-07, "loss": 17.8964, "step": 48071 }, { "epoch": 0.8787175315773119, "grad_norm": 5.8122005800911705, "learning_rate": 3.8088027299748144e-07, "loss": 17.3836, "step": 48072 }, { "epoch": 0.8787358107737584, "grad_norm": 5.3320444540610055, "learning_rate": 3.8076696266463486e-07, "loss": 17.0284, "step": 48073 }, { "epoch": 0.8787540899702049, "grad_norm": 7.262222267221311, "learning_rate": 3.806536685217982e-07, "loss": 17.3196, "step": 48074 }, { "epoch": 0.8787723691666515, "grad_norm": 5.903618175020169, "learning_rate": 3.8054039056936997e-07, "loss": 17.4158, "step": 48075 }, { "epoch": 0.878790648363098, "grad_norm": 5.5206344291334295, "learning_rate": 3.804271288077466e-07, "loss": 17.0062, "step": 48076 }, { "epoch": 0.8788089275595444, "grad_norm": 6.059962253623375, "learning_rate": 3.80313883237326e-07, "loss": 17.2537, "step": 48077 }, { "epoch": 0.878827206755991, "grad_norm": 5.284428298176424, "learning_rate": 3.802006538585035e-07, "loss": 16.9155, "step": 48078 }, { "epoch": 0.8788454859524375, "grad_norm": 9.898399434421156, "learning_rate": 3.8008744067167647e-07, "loss": 17.6262, "step": 48079 }, { "epoch": 0.8788637651488841, "grad_norm": 6.139886811664066, "learning_rate": 3.79974243677243e-07, "loss": 17.4351, "step": 48080 }, { "epoch": 0.8788820443453306, "grad_norm": 4.605894972982061, "learning_rate": 3.7986106287559945e-07, "loss": 16.6543, "step": 48081 }, { "epoch": 0.878900323541777, "grad_norm": 6.908323937596956, "learning_rate": 3.797478982671404e-07, "loss": 17.5381, "step": 48082 }, { "epoch": 0.8789186027382236, "grad_norm": 8.81631470390918, "learning_rate": 3.796347498522657e-07, "loss": 17.9882, "step": 48083 }, { "epoch": 0.8789368819346701, "grad_norm": 6.214452558721576, "learning_rate": 3.795216176313693e-07, "loss": 17.3522, "step": 48084 }, { "epoch": 0.8789551611311167, "grad_norm": 5.24280995228031, "learning_rate": 3.7940850160484875e-07, "loss": 16.8323, "step": 48085 }, { "epoch": 0.8789734403275632, "grad_norm": 6.075320916974242, "learning_rate": 3.792954017731004e-07, "loss": 17.2614, "step": 48086 }, { "epoch": 0.8789917195240097, "grad_norm": 5.3728875333819195, "learning_rate": 3.791823181365206e-07, "loss": 17.0421, "step": 48087 }, { "epoch": 0.8790099987204563, "grad_norm": 5.473017868832936, "learning_rate": 3.790692506955057e-07, "loss": 16.8748, "step": 48088 }, { "epoch": 0.8790282779169027, "grad_norm": 4.611848718713733, "learning_rate": 3.789561994504526e-07, "loss": 16.7767, "step": 48089 }, { "epoch": 0.8790465571133493, "grad_norm": 7.690165589528427, "learning_rate": 3.7884316440175653e-07, "loss": 18.1518, "step": 48090 }, { "epoch": 0.8790648363097958, "grad_norm": 6.598166517459526, "learning_rate": 3.787301455498144e-07, "loss": 17.335, "step": 48091 }, { "epoch": 0.8790831155062423, "grad_norm": 5.051318581456758, "learning_rate": 3.786171428950225e-07, "loss": 17.13, "step": 48092 }, { "epoch": 0.8791013947026889, "grad_norm": 5.900490764091371, "learning_rate": 3.785041564377756e-07, "loss": 17.0253, "step": 48093 }, { "epoch": 0.8791196738991354, "grad_norm": 4.937417395498082, "learning_rate": 3.783911861784717e-07, "loss": 16.953, "step": 48094 }, { "epoch": 0.879137953095582, "grad_norm": 6.8536022559264085, "learning_rate": 3.7827823211750434e-07, "loss": 17.4897, "step": 48095 }, { "epoch": 0.8791562322920284, "grad_norm": 6.192528463740811, "learning_rate": 3.7816529425527215e-07, "loss": 17.628, "step": 48096 }, { "epoch": 0.8791745114884749, "grad_norm": 4.529311016905748, "learning_rate": 3.780523725921681e-07, "loss": 16.7819, "step": 48097 }, { "epoch": 0.8791927906849215, "grad_norm": 6.516878270132029, "learning_rate": 3.7793946712859075e-07, "loss": 17.5225, "step": 48098 }, { "epoch": 0.879211069881368, "grad_norm": 5.146727558914871, "learning_rate": 3.7782657786493315e-07, "loss": 16.9188, "step": 48099 }, { "epoch": 0.8792293490778146, "grad_norm": 5.766549015948925, "learning_rate": 3.7771370480159387e-07, "loss": 17.0012, "step": 48100 }, { "epoch": 0.8792476282742611, "grad_norm": 6.547482642517915, "learning_rate": 3.7760084793896646e-07, "loss": 17.432, "step": 48101 }, { "epoch": 0.8792659074707075, "grad_norm": 4.978269566003471, "learning_rate": 3.7748800727744617e-07, "loss": 16.8648, "step": 48102 }, { "epoch": 0.8792841866671541, "grad_norm": 5.8179333502601915, "learning_rate": 3.773751828174299e-07, "loss": 17.1879, "step": 48103 }, { "epoch": 0.8793024658636006, "grad_norm": 5.092160747231959, "learning_rate": 3.772623745593118e-07, "loss": 16.9684, "step": 48104 }, { "epoch": 0.8793207450600472, "grad_norm": 6.217251278596625, "learning_rate": 3.771495825034882e-07, "loss": 17.3932, "step": 48105 }, { "epoch": 0.8793390242564937, "grad_norm": 6.308128547457667, "learning_rate": 3.7703680665035433e-07, "loss": 17.1846, "step": 48106 }, { "epoch": 0.8793573034529402, "grad_norm": 6.6451670072581575, "learning_rate": 3.7692404700030496e-07, "loss": 17.5616, "step": 48107 }, { "epoch": 0.8793755826493868, "grad_norm": 7.701898988250907, "learning_rate": 3.7681130355373684e-07, "loss": 17.7188, "step": 48108 }, { "epoch": 0.8793938618458332, "grad_norm": 5.466242232822481, "learning_rate": 3.766985763110431e-07, "loss": 16.9985, "step": 48109 }, { "epoch": 0.8794121410422798, "grad_norm": 5.817271856577611, "learning_rate": 3.765858652726195e-07, "loss": 17.0645, "step": 48110 }, { "epoch": 0.8794304202387263, "grad_norm": 5.889709973628241, "learning_rate": 3.764731704388619e-07, "loss": 16.9821, "step": 48111 }, { "epoch": 0.8794486994351728, "grad_norm": 6.731468399771281, "learning_rate": 3.763604918101643e-07, "loss": 17.4283, "step": 48112 }, { "epoch": 0.8794669786316194, "grad_norm": 4.670023937889905, "learning_rate": 3.762478293869215e-07, "loss": 16.6682, "step": 48113 }, { "epoch": 0.8794852578280659, "grad_norm": 6.476929368696282, "learning_rate": 3.761351831695287e-07, "loss": 17.6747, "step": 48114 }, { "epoch": 0.8795035370245124, "grad_norm": 5.219854860944218, "learning_rate": 3.7602255315838166e-07, "loss": 16.9466, "step": 48115 }, { "epoch": 0.8795218162209589, "grad_norm": 5.8989968090636, "learning_rate": 3.7590993935387345e-07, "loss": 17.228, "step": 48116 }, { "epoch": 0.8795400954174054, "grad_norm": 6.357037369943835, "learning_rate": 3.757973417564004e-07, "loss": 17.3114, "step": 48117 }, { "epoch": 0.879558374613852, "grad_norm": 6.241782046059388, "learning_rate": 3.7568476036635657e-07, "loss": 17.2418, "step": 48118 }, { "epoch": 0.8795766538102985, "grad_norm": 4.898114664604343, "learning_rate": 3.755721951841351e-07, "loss": 17.0567, "step": 48119 }, { "epoch": 0.8795949330067451, "grad_norm": 5.9516148343816875, "learning_rate": 3.754596462101334e-07, "loss": 17.2346, "step": 48120 }, { "epoch": 0.8796132122031916, "grad_norm": 5.1144566856485385, "learning_rate": 3.7534711344474285e-07, "loss": 16.9835, "step": 48121 }, { "epoch": 0.879631491399638, "grad_norm": 6.126007036869283, "learning_rate": 3.7523459688836027e-07, "loss": 17.4144, "step": 48122 }, { "epoch": 0.8796497705960846, "grad_norm": 6.195203177321497, "learning_rate": 3.7512209654137875e-07, "loss": 17.0781, "step": 48123 }, { "epoch": 0.8796680497925311, "grad_norm": 7.173693822369302, "learning_rate": 3.750096124041924e-07, "loss": 17.5158, "step": 48124 }, { "epoch": 0.8796863289889777, "grad_norm": 6.347619717351562, "learning_rate": 3.7489714447719705e-07, "loss": 17.2623, "step": 48125 }, { "epoch": 0.8797046081854242, "grad_norm": 5.5234838784752585, "learning_rate": 3.747846927607862e-07, "loss": 17.1309, "step": 48126 }, { "epoch": 0.8797228873818707, "grad_norm": 5.40750207115738, "learning_rate": 3.7467225725535236e-07, "loss": 17.1537, "step": 48127 }, { "epoch": 0.8797411665783172, "grad_norm": 9.662020870660113, "learning_rate": 3.745598379612919e-07, "loss": 17.5615, "step": 48128 }, { "epoch": 0.8797594457747637, "grad_norm": 6.667621315357142, "learning_rate": 3.7444743487899783e-07, "loss": 17.4139, "step": 48129 }, { "epoch": 0.8797777249712103, "grad_norm": 6.479941399866466, "learning_rate": 3.7433504800886366e-07, "loss": 17.6546, "step": 48130 }, { "epoch": 0.8797960041676568, "grad_norm": 4.8847054916180355, "learning_rate": 3.7422267735128415e-07, "loss": 16.982, "step": 48131 }, { "epoch": 0.8798142833641033, "grad_norm": 7.279054537819092, "learning_rate": 3.7411032290665174e-07, "loss": 17.7298, "step": 48132 }, { "epoch": 0.8798325625605499, "grad_norm": 5.619419852119975, "learning_rate": 3.7399798467536164e-07, "loss": 17.2563, "step": 48133 }, { "epoch": 0.8798508417569963, "grad_norm": 5.593225932767096, "learning_rate": 3.7388566265780744e-07, "loss": 17.0047, "step": 48134 }, { "epoch": 0.8798691209534429, "grad_norm": 5.42860353263458, "learning_rate": 3.7377335685438333e-07, "loss": 17.1405, "step": 48135 }, { "epoch": 0.8798874001498894, "grad_norm": 5.444364931313011, "learning_rate": 3.736610672654806e-07, "loss": 16.865, "step": 48136 }, { "epoch": 0.8799056793463359, "grad_norm": 5.758755786881013, "learning_rate": 3.735487938914956e-07, "loss": 17.1231, "step": 48137 }, { "epoch": 0.8799239585427825, "grad_norm": 6.0231494082976935, "learning_rate": 3.7343653673281963e-07, "loss": 17.3043, "step": 48138 }, { "epoch": 0.879942237739229, "grad_norm": 6.122591521676123, "learning_rate": 3.733242957898481e-07, "loss": 17.1874, "step": 48139 }, { "epoch": 0.8799605169356756, "grad_norm": 6.420983691380532, "learning_rate": 3.7321207106297387e-07, "loss": 17.2073, "step": 48140 }, { "epoch": 0.879978796132122, "grad_norm": 4.708351816935679, "learning_rate": 3.7309986255258836e-07, "loss": 16.7183, "step": 48141 }, { "epoch": 0.8799970753285685, "grad_norm": 6.488740460733922, "learning_rate": 3.7298767025908623e-07, "loss": 17.5537, "step": 48142 }, { "epoch": 0.8800153545250151, "grad_norm": 6.371833272644764, "learning_rate": 3.728754941828622e-07, "loss": 17.7767, "step": 48143 }, { "epoch": 0.8800336337214616, "grad_norm": 7.115794069703951, "learning_rate": 3.7276333432430645e-07, "loss": 17.6718, "step": 48144 }, { "epoch": 0.8800519129179081, "grad_norm": 6.833033878154675, "learning_rate": 3.726511906838154e-07, "loss": 17.4351, "step": 48145 }, { "epoch": 0.8800701921143547, "grad_norm": 6.070975390963218, "learning_rate": 3.725390632617798e-07, "loss": 17.3108, "step": 48146 }, { "epoch": 0.8800884713108011, "grad_norm": 6.088742670377705, "learning_rate": 3.7242695205859267e-07, "loss": 17.3738, "step": 48147 }, { "epoch": 0.8801067505072477, "grad_norm": 6.823556378844558, "learning_rate": 3.723148570746482e-07, "loss": 17.7434, "step": 48148 }, { "epoch": 0.8801250297036942, "grad_norm": 6.964620756155584, "learning_rate": 3.7220277831033824e-07, "loss": 17.6569, "step": 48149 }, { "epoch": 0.8801433089001407, "grad_norm": 6.118172923249383, "learning_rate": 3.720907157660558e-07, "loss": 17.303, "step": 48150 }, { "epoch": 0.8801615880965873, "grad_norm": 5.664533173663904, "learning_rate": 3.719786694421934e-07, "loss": 17.2586, "step": 48151 }, { "epoch": 0.8801798672930338, "grad_norm": 5.891434832984204, "learning_rate": 3.7186663933914404e-07, "loss": 16.9117, "step": 48152 }, { "epoch": 0.8801981464894804, "grad_norm": 5.288883333082886, "learning_rate": 3.717546254573018e-07, "loss": 17.1394, "step": 48153 }, { "epoch": 0.8802164256859268, "grad_norm": 5.446004895556895, "learning_rate": 3.716426277970575e-07, "loss": 16.967, "step": 48154 }, { "epoch": 0.8802347048823733, "grad_norm": 8.626075329822223, "learning_rate": 3.7153064635880364e-07, "loss": 17.7783, "step": 48155 }, { "epoch": 0.8802529840788199, "grad_norm": 6.510914385501477, "learning_rate": 3.7141868114293433e-07, "loss": 17.3405, "step": 48156 }, { "epoch": 0.8802712632752664, "grad_norm": 5.9819455063489, "learning_rate": 3.7130673214984035e-07, "loss": 17.1586, "step": 48157 }, { "epoch": 0.880289542471713, "grad_norm": 7.90041376484254, "learning_rate": 3.711947993799137e-07, "loss": 17.1845, "step": 48158 }, { "epoch": 0.8803078216681595, "grad_norm": 5.976683430351767, "learning_rate": 3.7108288283354885e-07, "loss": 17.2113, "step": 48159 }, { "epoch": 0.8803261008646059, "grad_norm": 5.797554089034304, "learning_rate": 3.7097098251113574e-07, "loss": 17.0708, "step": 48160 }, { "epoch": 0.8803443800610525, "grad_norm": 6.352748275260045, "learning_rate": 3.708590984130678e-07, "loss": 17.4496, "step": 48161 }, { "epoch": 0.880362659257499, "grad_norm": 5.134763846284808, "learning_rate": 3.7074723053973804e-07, "loss": 17.2219, "step": 48162 }, { "epoch": 0.8803809384539456, "grad_norm": 5.890382812947896, "learning_rate": 3.7063537889153733e-07, "loss": 17.0708, "step": 48163 }, { "epoch": 0.8803992176503921, "grad_norm": 5.492506726913719, "learning_rate": 3.70523543468857e-07, "loss": 17.1866, "step": 48164 }, { "epoch": 0.8804174968468386, "grad_norm": 5.571186164715697, "learning_rate": 3.704117242720906e-07, "loss": 17.1623, "step": 48165 }, { "epoch": 0.8804357760432852, "grad_norm": 6.129199686719434, "learning_rate": 3.7029992130162897e-07, "loss": 17.3087, "step": 48166 }, { "epoch": 0.8804540552397316, "grad_norm": 5.313731940080543, "learning_rate": 3.701881345578645e-07, "loss": 17.0209, "step": 48167 }, { "epoch": 0.8804723344361782, "grad_norm": 4.448416073615439, "learning_rate": 3.7007636404118865e-07, "loss": 16.703, "step": 48168 }, { "epoch": 0.8804906136326247, "grad_norm": 6.419782895257448, "learning_rate": 3.699646097519932e-07, "loss": 17.6612, "step": 48169 }, { "epoch": 0.8805088928290712, "grad_norm": 6.0207202748821835, "learning_rate": 3.6985287169067075e-07, "loss": 17.2671, "step": 48170 }, { "epoch": 0.8805271720255178, "grad_norm": 5.8595411308239465, "learning_rate": 3.6974114985761256e-07, "loss": 17.3212, "step": 48171 }, { "epoch": 0.8805454512219643, "grad_norm": 6.144652004598788, "learning_rate": 3.696294442532089e-07, "loss": 16.9668, "step": 48172 }, { "epoch": 0.8805637304184109, "grad_norm": 5.041231416641177, "learning_rate": 3.695177548778528e-07, "loss": 17.0028, "step": 48173 }, { "epoch": 0.8805820096148573, "grad_norm": 5.883351632334541, "learning_rate": 3.6940608173193504e-07, "loss": 17.1491, "step": 48174 }, { "epoch": 0.8806002888113038, "grad_norm": 6.306943373086459, "learning_rate": 3.692944248158464e-07, "loss": 17.2035, "step": 48175 }, { "epoch": 0.8806185680077504, "grad_norm": 5.996323569384476, "learning_rate": 3.6918278412998e-07, "loss": 17.2535, "step": 48176 }, { "epoch": 0.8806368472041969, "grad_norm": 6.802255120614358, "learning_rate": 3.690711596747254e-07, "loss": 17.5071, "step": 48177 }, { "epoch": 0.8806551264006435, "grad_norm": 5.515799632257865, "learning_rate": 3.6895955145047457e-07, "loss": 16.9282, "step": 48178 }, { "epoch": 0.88067340559709, "grad_norm": 6.37348330316962, "learning_rate": 3.6884795945761885e-07, "loss": 17.2632, "step": 48179 }, { "epoch": 0.8806916847935364, "grad_norm": 5.43964385402408, "learning_rate": 3.687363836965496e-07, "loss": 17.1023, "step": 48180 }, { "epoch": 0.880709963989983, "grad_norm": 5.627883787753745, "learning_rate": 3.6862482416765657e-07, "loss": 17.2111, "step": 48181 }, { "epoch": 0.8807282431864295, "grad_norm": 6.586619866269185, "learning_rate": 3.685132808713326e-07, "loss": 17.3725, "step": 48182 }, { "epoch": 0.8807465223828761, "grad_norm": 6.06718771144454, "learning_rate": 3.684017538079665e-07, "loss": 17.3452, "step": 48183 }, { "epoch": 0.8807648015793226, "grad_norm": 7.660026673166134, "learning_rate": 3.682902429779517e-07, "loss": 17.3407, "step": 48184 }, { "epoch": 0.8807830807757691, "grad_norm": 6.05016502885146, "learning_rate": 3.6817874838167735e-07, "loss": 17.2443, "step": 48185 }, { "epoch": 0.8808013599722156, "grad_norm": 6.715359875531818, "learning_rate": 3.680672700195337e-07, "loss": 17.8043, "step": 48186 }, { "epoch": 0.8808196391686621, "grad_norm": 6.012671407607473, "learning_rate": 3.679558078919121e-07, "loss": 17.306, "step": 48187 }, { "epoch": 0.8808379183651087, "grad_norm": 7.052937937768807, "learning_rate": 3.6784436199920504e-07, "loss": 17.4149, "step": 48188 }, { "epoch": 0.8808561975615552, "grad_norm": 6.25033264229583, "learning_rate": 3.6773293234179994e-07, "loss": 17.3312, "step": 48189 }, { "epoch": 0.8808744767580017, "grad_norm": 5.477577650330542, "learning_rate": 3.676215189200899e-07, "loss": 17.1526, "step": 48190 }, { "epoch": 0.8808927559544483, "grad_norm": 5.201084445223371, "learning_rate": 3.6751012173446455e-07, "loss": 16.9465, "step": 48191 }, { "epoch": 0.8809110351508948, "grad_norm": 8.172544877092632, "learning_rate": 3.673987407853136e-07, "loss": 18.0109, "step": 48192 }, { "epoch": 0.8809293143473413, "grad_norm": 7.045192605019296, "learning_rate": 3.6728737607302886e-07, "loss": 17.4654, "step": 48193 }, { "epoch": 0.8809475935437878, "grad_norm": 5.992460958254306, "learning_rate": 3.671760275979996e-07, "loss": 17.0398, "step": 48194 }, { "epoch": 0.8809658727402343, "grad_norm": 6.558317713923614, "learning_rate": 3.6706469536061553e-07, "loss": 17.2314, "step": 48195 }, { "epoch": 0.8809841519366809, "grad_norm": 6.878641790141727, "learning_rate": 3.669533793612678e-07, "loss": 17.4655, "step": 48196 }, { "epoch": 0.8810024311331274, "grad_norm": 5.599912570753556, "learning_rate": 3.668420796003469e-07, "loss": 17.0984, "step": 48197 }, { "epoch": 0.881020710329574, "grad_norm": 4.997569243836164, "learning_rate": 3.6673079607824237e-07, "loss": 16.9423, "step": 48198 }, { "epoch": 0.8810389895260204, "grad_norm": 6.306317538810049, "learning_rate": 3.66619528795345e-07, "loss": 17.4284, "step": 48199 }, { "epoch": 0.8810572687224669, "grad_norm": 5.226700759397005, "learning_rate": 3.665082777520429e-07, "loss": 17.0691, "step": 48200 }, { "epoch": 0.8810755479189135, "grad_norm": 6.667071038701733, "learning_rate": 3.663970429487285e-07, "loss": 17.5021, "step": 48201 }, { "epoch": 0.88109382711536, "grad_norm": 6.356548983141175, "learning_rate": 3.662858243857903e-07, "loss": 17.8265, "step": 48202 }, { "epoch": 0.8811121063118066, "grad_norm": 6.285757410667336, "learning_rate": 3.66174622063617e-07, "loss": 17.5338, "step": 48203 }, { "epoch": 0.8811303855082531, "grad_norm": 7.694801332793408, "learning_rate": 3.660634359826004e-07, "loss": 17.7468, "step": 48204 }, { "epoch": 0.8811486647046995, "grad_norm": 8.567871870341532, "learning_rate": 3.6595226614312864e-07, "loss": 17.8585, "step": 48205 }, { "epoch": 0.8811669439011461, "grad_norm": 7.0803717056725395, "learning_rate": 3.658411125455924e-07, "loss": 17.8324, "step": 48206 }, { "epoch": 0.8811852230975926, "grad_norm": 5.707016861427365, "learning_rate": 3.6572997519038145e-07, "loss": 17.0867, "step": 48207 }, { "epoch": 0.8812035022940392, "grad_norm": 5.301294201628941, "learning_rate": 3.6561885407788435e-07, "loss": 16.9662, "step": 48208 }, { "epoch": 0.8812217814904857, "grad_norm": 6.315241383656615, "learning_rate": 3.6550774920849074e-07, "loss": 17.5306, "step": 48209 }, { "epoch": 0.8812400606869322, "grad_norm": 4.876579861431799, "learning_rate": 3.6539666058259094e-07, "loss": 16.8065, "step": 48210 }, { "epoch": 0.8812583398833788, "grad_norm": 7.241031992908414, "learning_rate": 3.65285588200574e-07, "loss": 17.6602, "step": 48211 }, { "epoch": 0.8812766190798252, "grad_norm": 6.237987223936037, "learning_rate": 3.651745320628275e-07, "loss": 17.1011, "step": 48212 }, { "epoch": 0.8812948982762717, "grad_norm": 6.883772961165907, "learning_rate": 3.6506349216974326e-07, "loss": 17.1005, "step": 48213 }, { "epoch": 0.8813131774727183, "grad_norm": 6.568640029569566, "learning_rate": 3.649524685217087e-07, "loss": 17.6593, "step": 48214 }, { "epoch": 0.8813314566691648, "grad_norm": 7.791331415290607, "learning_rate": 3.648414611191131e-07, "loss": 17.3821, "step": 48215 }, { "epoch": 0.8813497358656114, "grad_norm": 5.426131287476925, "learning_rate": 3.647304699623472e-07, "loss": 16.9883, "step": 48216 }, { "epoch": 0.8813680150620579, "grad_norm": 7.192863682414176, "learning_rate": 3.646194950517973e-07, "loss": 17.6882, "step": 48217 }, { "epoch": 0.8813862942585043, "grad_norm": 6.374410549816814, "learning_rate": 3.6450853638785534e-07, "loss": 17.1436, "step": 48218 }, { "epoch": 0.8814045734549509, "grad_norm": 5.116902174712218, "learning_rate": 3.6439759397090824e-07, "loss": 16.9227, "step": 48219 }, { "epoch": 0.8814228526513974, "grad_norm": 6.354623904661567, "learning_rate": 3.642866678013446e-07, "loss": 17.8253, "step": 48220 }, { "epoch": 0.881441131847844, "grad_norm": 6.48742015253497, "learning_rate": 3.641757578795546e-07, "loss": 17.3601, "step": 48221 }, { "epoch": 0.8814594110442905, "grad_norm": 6.136708517894362, "learning_rate": 3.640648642059258e-07, "loss": 17.4441, "step": 48222 }, { "epoch": 0.881477690240737, "grad_norm": 5.768302352988958, "learning_rate": 3.6395398678084784e-07, "loss": 17.073, "step": 48223 }, { "epoch": 0.8814959694371836, "grad_norm": 7.430579043823968, "learning_rate": 3.638431256047081e-07, "loss": 17.4554, "step": 48224 }, { "epoch": 0.88151424863363, "grad_norm": 6.227100557524606, "learning_rate": 3.63732280677897e-07, "loss": 17.4262, "step": 48225 }, { "epoch": 0.8815325278300766, "grad_norm": 8.253059467610852, "learning_rate": 3.6362145200080134e-07, "loss": 17.6458, "step": 48226 }, { "epoch": 0.8815508070265231, "grad_norm": 7.876394334557881, "learning_rate": 3.635106395738103e-07, "loss": 17.8031, "step": 48227 }, { "epoch": 0.8815690862229696, "grad_norm": 5.79450626161328, "learning_rate": 3.633998433973118e-07, "loss": 17.405, "step": 48228 }, { "epoch": 0.8815873654194162, "grad_norm": 5.966083812829068, "learning_rate": 3.6328906347169513e-07, "loss": 17.2981, "step": 48229 }, { "epoch": 0.8816056446158627, "grad_norm": 5.073655022247725, "learning_rate": 3.6317829979734766e-07, "loss": 17.0091, "step": 48230 }, { "epoch": 0.8816239238123093, "grad_norm": 4.164702281092489, "learning_rate": 3.630675523746574e-07, "loss": 16.5553, "step": 48231 }, { "epoch": 0.8816422030087557, "grad_norm": 7.792900333810912, "learning_rate": 3.62956821204013e-07, "loss": 17.8621, "step": 48232 }, { "epoch": 0.8816604822052022, "grad_norm": 6.390215867177096, "learning_rate": 3.6284610628580353e-07, "loss": 17.6703, "step": 48233 }, { "epoch": 0.8816787614016488, "grad_norm": 6.082262262351835, "learning_rate": 3.6273540762041483e-07, "loss": 17.0691, "step": 48234 }, { "epoch": 0.8816970405980953, "grad_norm": 7.650833181597319, "learning_rate": 3.626247252082371e-07, "loss": 17.5964, "step": 48235 }, { "epoch": 0.8817153197945419, "grad_norm": 6.710349566416794, "learning_rate": 3.625140590496573e-07, "loss": 16.9342, "step": 48236 }, { "epoch": 0.8817335989909884, "grad_norm": 7.402700669197508, "learning_rate": 3.624034091450629e-07, "loss": 17.509, "step": 48237 }, { "epoch": 0.8817518781874348, "grad_norm": 6.646401706351376, "learning_rate": 3.622927754948424e-07, "loss": 17.5255, "step": 48238 }, { "epoch": 0.8817701573838814, "grad_norm": 6.966945762275502, "learning_rate": 3.621821580993834e-07, "loss": 17.3119, "step": 48239 }, { "epoch": 0.8817884365803279, "grad_norm": 5.193383601530511, "learning_rate": 3.620715569590727e-07, "loss": 16.8845, "step": 48240 }, { "epoch": 0.8818067157767745, "grad_norm": 8.371943393818961, "learning_rate": 3.619609720742989e-07, "loss": 17.4092, "step": 48241 }, { "epoch": 0.881824994973221, "grad_norm": 7.22156636966751, "learning_rate": 3.618504034454501e-07, "loss": 17.3975, "step": 48242 }, { "epoch": 0.8818432741696675, "grad_norm": 7.5977788425798805, "learning_rate": 3.61739851072912e-07, "loss": 17.5029, "step": 48243 }, { "epoch": 0.881861553366114, "grad_norm": 4.720793238403102, "learning_rate": 3.6162931495707485e-07, "loss": 16.7276, "step": 48244 }, { "epoch": 0.8818798325625605, "grad_norm": 5.4939979695484755, "learning_rate": 3.615187950983229e-07, "loss": 17.0317, "step": 48245 }, { "epoch": 0.8818981117590071, "grad_norm": 6.322659121144319, "learning_rate": 3.6140829149704624e-07, "loss": 17.591, "step": 48246 }, { "epoch": 0.8819163909554536, "grad_norm": 5.7323015898329155, "learning_rate": 3.6129780415363083e-07, "loss": 17.1035, "step": 48247 }, { "epoch": 0.8819346701519001, "grad_norm": 6.398574336551354, "learning_rate": 3.6118733306846343e-07, "loss": 17.1673, "step": 48248 }, { "epoch": 0.8819529493483467, "grad_norm": 8.630280499865783, "learning_rate": 3.6107687824193273e-07, "loss": 18.0559, "step": 48249 }, { "epoch": 0.8819712285447932, "grad_norm": 4.7693186797081815, "learning_rate": 3.609664396744239e-07, "loss": 16.8559, "step": 48250 }, { "epoch": 0.8819895077412397, "grad_norm": 5.422963933990912, "learning_rate": 3.6085601736632557e-07, "loss": 17.0631, "step": 48251 }, { "epoch": 0.8820077869376862, "grad_norm": 8.148742230516842, "learning_rate": 3.6074561131802467e-07, "loss": 17.4892, "step": 48252 }, { "epoch": 0.8820260661341327, "grad_norm": 5.585669423368425, "learning_rate": 3.6063522152990805e-07, "loss": 17.1617, "step": 48253 }, { "epoch": 0.8820443453305793, "grad_norm": 6.617012540165154, "learning_rate": 3.6052484800236154e-07, "loss": 17.1117, "step": 48254 }, { "epoch": 0.8820626245270258, "grad_norm": 6.272393124855084, "learning_rate": 3.604144907357737e-07, "loss": 17.4266, "step": 48255 }, { "epoch": 0.8820809037234724, "grad_norm": 5.382797225136061, "learning_rate": 3.6030414973053043e-07, "loss": 16.8785, "step": 48256 }, { "epoch": 0.8820991829199188, "grad_norm": 5.195508465725904, "learning_rate": 3.6019382498701795e-07, "loss": 16.9456, "step": 48257 }, { "epoch": 0.8821174621163653, "grad_norm": 6.52115398589613, "learning_rate": 3.6008351650562433e-07, "loss": 17.4305, "step": 48258 }, { "epoch": 0.8821357413128119, "grad_norm": 5.376285902896604, "learning_rate": 3.599732242867343e-07, "loss": 17.0158, "step": 48259 }, { "epoch": 0.8821540205092584, "grad_norm": 7.139190970740505, "learning_rate": 3.5986294833073534e-07, "loss": 17.674, "step": 48260 }, { "epoch": 0.882172299705705, "grad_norm": 6.833759838045572, "learning_rate": 3.5975268863801485e-07, "loss": 17.3692, "step": 48261 }, { "epoch": 0.8821905789021515, "grad_norm": 5.959628591843655, "learning_rate": 3.596424452089581e-07, "loss": 17.4814, "step": 48262 }, { "epoch": 0.882208858098598, "grad_norm": 5.824796064169068, "learning_rate": 3.5953221804395253e-07, "loss": 17.1253, "step": 48263 }, { "epoch": 0.8822271372950445, "grad_norm": 5.555963858406815, "learning_rate": 3.594220071433835e-07, "loss": 17.2687, "step": 48264 }, { "epoch": 0.882245416491491, "grad_norm": 5.55675737075481, "learning_rate": 3.5931181250763724e-07, "loss": 16.919, "step": 48265 }, { "epoch": 0.8822636956879376, "grad_norm": 6.216317493598846, "learning_rate": 3.592016341371013e-07, "loss": 17.4227, "step": 48266 }, { "epoch": 0.8822819748843841, "grad_norm": 6.353052128905995, "learning_rate": 3.590914720321609e-07, "loss": 17.4896, "step": 48267 }, { "epoch": 0.8823002540808306, "grad_norm": 7.223361906150487, "learning_rate": 3.589813261932013e-07, "loss": 18.1635, "step": 48268 }, { "epoch": 0.8823185332772772, "grad_norm": 5.734743833314726, "learning_rate": 3.588711966206093e-07, "loss": 17.1923, "step": 48269 }, { "epoch": 0.8823368124737236, "grad_norm": 8.507857973311234, "learning_rate": 3.5876108331477145e-07, "loss": 17.8117, "step": 48270 }, { "epoch": 0.8823550916701702, "grad_norm": 6.927772143720486, "learning_rate": 3.5865098627607284e-07, "loss": 17.4751, "step": 48271 }, { "epoch": 0.8823733708666167, "grad_norm": 5.932059895344904, "learning_rate": 3.585409055049011e-07, "loss": 17.2654, "step": 48272 }, { "epoch": 0.8823916500630632, "grad_norm": 5.054500123429667, "learning_rate": 3.5843084100164017e-07, "loss": 17.0931, "step": 48273 }, { "epoch": 0.8824099292595098, "grad_norm": 8.111624758458964, "learning_rate": 3.583207927666754e-07, "loss": 18.1305, "step": 48274 }, { "epoch": 0.8824282084559563, "grad_norm": 6.9145024610301435, "learning_rate": 3.582107608003943e-07, "loss": 17.6885, "step": 48275 }, { "epoch": 0.8824464876524029, "grad_norm": 5.675377448157581, "learning_rate": 3.581007451031815e-07, "loss": 17.0831, "step": 48276 }, { "epoch": 0.8824647668488493, "grad_norm": 6.35567279541177, "learning_rate": 3.579907456754228e-07, "loss": 17.3528, "step": 48277 }, { "epoch": 0.8824830460452958, "grad_norm": 4.850699611443746, "learning_rate": 3.578807625175035e-07, "loss": 17.0138, "step": 48278 }, { "epoch": 0.8825013252417424, "grad_norm": 7.073212614156054, "learning_rate": 3.577707956298088e-07, "loss": 17.8486, "step": 48279 }, { "epoch": 0.8825196044381889, "grad_norm": 8.738067076764763, "learning_rate": 3.576608450127256e-07, "loss": 17.7234, "step": 48280 }, { "epoch": 0.8825378836346354, "grad_norm": 6.488686642345536, "learning_rate": 3.575509106666386e-07, "loss": 17.1678, "step": 48281 }, { "epoch": 0.882556162831082, "grad_norm": 6.308066457989733, "learning_rate": 3.57440992591932e-07, "loss": 17.2688, "step": 48282 }, { "epoch": 0.8825744420275284, "grad_norm": 6.0618598518485385, "learning_rate": 3.573310907889921e-07, "loss": 17.0277, "step": 48283 }, { "epoch": 0.882592721223975, "grad_norm": 5.03628813480991, "learning_rate": 3.572212052582041e-07, "loss": 16.6847, "step": 48284 }, { "epoch": 0.8826110004204215, "grad_norm": 6.71547040915764, "learning_rate": 3.571113359999523e-07, "loss": 17.396, "step": 48285 }, { "epoch": 0.882629279616868, "grad_norm": 5.927343396356637, "learning_rate": 3.5700148301462234e-07, "loss": 17.2149, "step": 48286 }, { "epoch": 0.8826475588133146, "grad_norm": 4.837390238426441, "learning_rate": 3.568916463026001e-07, "loss": 16.7929, "step": 48287 }, { "epoch": 0.8826658380097611, "grad_norm": 5.135640946840008, "learning_rate": 3.567818258642686e-07, "loss": 16.8932, "step": 48288 }, { "epoch": 0.8826841172062077, "grad_norm": 9.526574965537796, "learning_rate": 3.5667202170001524e-07, "loss": 17.8466, "step": 48289 }, { "epoch": 0.8827023964026541, "grad_norm": 8.82549260805418, "learning_rate": 3.56562233810222e-07, "loss": 17.4326, "step": 48290 }, { "epoch": 0.8827206755991006, "grad_norm": 5.089690352137203, "learning_rate": 3.564524621952764e-07, "loss": 16.7884, "step": 48291 }, { "epoch": 0.8827389547955472, "grad_norm": 6.154693693521268, "learning_rate": 3.563427068555625e-07, "loss": 17.368, "step": 48292 }, { "epoch": 0.8827572339919937, "grad_norm": 5.460788840375041, "learning_rate": 3.562329677914628e-07, "loss": 17.1637, "step": 48293 }, { "epoch": 0.8827755131884403, "grad_norm": 5.906733767945339, "learning_rate": 3.561232450033647e-07, "loss": 17.2682, "step": 48294 }, { "epoch": 0.8827937923848868, "grad_norm": 5.5148976272363734, "learning_rate": 3.560135384916508e-07, "loss": 17.0858, "step": 48295 }, { "epoch": 0.8828120715813332, "grad_norm": 5.231749444687986, "learning_rate": 3.5590384825670677e-07, "loss": 17.0133, "step": 48296 }, { "epoch": 0.8828303507777798, "grad_norm": 6.435947594968385, "learning_rate": 3.5579417429891685e-07, "loss": 17.2958, "step": 48297 }, { "epoch": 0.8828486299742263, "grad_norm": 5.657399957640718, "learning_rate": 3.556845166186662e-07, "loss": 17.2318, "step": 48298 }, { "epoch": 0.8828669091706729, "grad_norm": 5.417827127861443, "learning_rate": 3.555748752163368e-07, "loss": 17.0264, "step": 48299 }, { "epoch": 0.8828851883671194, "grad_norm": 7.166367532884519, "learning_rate": 3.554652500923156e-07, "loss": 17.8266, "step": 48300 }, { "epoch": 0.8829034675635659, "grad_norm": 4.534490445198095, "learning_rate": 3.5535564124698553e-07, "loss": 16.8396, "step": 48301 }, { "epoch": 0.8829217467600124, "grad_norm": 7.103486618936922, "learning_rate": 3.552460486807302e-07, "loss": 17.687, "step": 48302 }, { "epoch": 0.8829400259564589, "grad_norm": 8.403832738345782, "learning_rate": 3.5513647239393544e-07, "loss": 17.7889, "step": 48303 }, { "epoch": 0.8829583051529055, "grad_norm": 4.912658195783637, "learning_rate": 3.550269123869832e-07, "loss": 16.6863, "step": 48304 }, { "epoch": 0.882976584349352, "grad_norm": 7.383127979967023, "learning_rate": 3.549173686602586e-07, "loss": 17.3881, "step": 48305 }, { "epoch": 0.8829948635457985, "grad_norm": 6.506808166713963, "learning_rate": 3.548078412141459e-07, "loss": 17.7418, "step": 48306 }, { "epoch": 0.8830131427422451, "grad_norm": 6.037085518074313, "learning_rate": 3.5469833004902865e-07, "loss": 17.1569, "step": 48307 }, { "epoch": 0.8830314219386916, "grad_norm": 6.26993171413909, "learning_rate": 3.5458883516529097e-07, "loss": 17.2781, "step": 48308 }, { "epoch": 0.8830497011351381, "grad_norm": 6.131920791237228, "learning_rate": 3.5447935656331643e-07, "loss": 16.9212, "step": 48309 }, { "epoch": 0.8830679803315846, "grad_norm": 6.543084620712895, "learning_rate": 3.5436989424348756e-07, "loss": 17.2312, "step": 48310 }, { "epoch": 0.8830862595280311, "grad_norm": 6.65444184000589, "learning_rate": 3.542604482061901e-07, "loss": 17.4915, "step": 48311 }, { "epoch": 0.8831045387244777, "grad_norm": 6.267185282559429, "learning_rate": 3.5415101845180655e-07, "loss": 17.038, "step": 48312 }, { "epoch": 0.8831228179209242, "grad_norm": 4.955421991995853, "learning_rate": 3.5404160498071995e-07, "loss": 16.8854, "step": 48313 }, { "epoch": 0.8831410971173708, "grad_norm": 7.323004304889041, "learning_rate": 3.539322077933144e-07, "loss": 17.4966, "step": 48314 }, { "epoch": 0.8831593763138172, "grad_norm": 7.122824918097208, "learning_rate": 3.538228268899735e-07, "loss": 17.5061, "step": 48315 }, { "epoch": 0.8831776555102637, "grad_norm": 7.634721686479374, "learning_rate": 3.5371346227108026e-07, "loss": 17.096, "step": 48316 }, { "epoch": 0.8831959347067103, "grad_norm": 6.253956926048536, "learning_rate": 3.536041139370189e-07, "loss": 17.5568, "step": 48317 }, { "epoch": 0.8832142139031568, "grad_norm": 7.231325937153145, "learning_rate": 3.5349478188817175e-07, "loss": 17.7747, "step": 48318 }, { "epoch": 0.8832324930996034, "grad_norm": 6.684975679876064, "learning_rate": 3.533854661249214e-07, "loss": 17.2729, "step": 48319 }, { "epoch": 0.8832507722960499, "grad_norm": 8.901085567981948, "learning_rate": 3.532761666476525e-07, "loss": 18.1033, "step": 48320 }, { "epoch": 0.8832690514924963, "grad_norm": 6.121356212363063, "learning_rate": 3.531668834567464e-07, "loss": 17.1791, "step": 48321 }, { "epoch": 0.8832873306889429, "grad_norm": 6.746847341301756, "learning_rate": 3.530576165525884e-07, "loss": 17.4092, "step": 48322 }, { "epoch": 0.8833056098853894, "grad_norm": 6.478626362904601, "learning_rate": 3.5294836593555926e-07, "loss": 17.3989, "step": 48323 }, { "epoch": 0.883323889081836, "grad_norm": 5.000606989679364, "learning_rate": 3.5283913160604254e-07, "loss": 16.9119, "step": 48324 }, { "epoch": 0.8833421682782825, "grad_norm": 6.691832426826236, "learning_rate": 3.5272991356442243e-07, "loss": 17.5634, "step": 48325 }, { "epoch": 0.883360447474729, "grad_norm": 5.654173941159505, "learning_rate": 3.526207118110808e-07, "loss": 17.1684, "step": 48326 }, { "epoch": 0.8833787266711756, "grad_norm": 5.158766547201, "learning_rate": 3.5251152634639963e-07, "loss": 16.9338, "step": 48327 }, { "epoch": 0.883397005867622, "grad_norm": 6.843744642720517, "learning_rate": 3.52402357170763e-07, "loss": 17.5032, "step": 48328 }, { "epoch": 0.8834152850640686, "grad_norm": 7.077034159324687, "learning_rate": 3.5229320428455285e-07, "loss": 17.5287, "step": 48329 }, { "epoch": 0.8834335642605151, "grad_norm": 7.60560610984887, "learning_rate": 3.521840676881505e-07, "loss": 17.5625, "step": 48330 }, { "epoch": 0.8834518434569616, "grad_norm": 8.594525656222402, "learning_rate": 3.520749473819407e-07, "loss": 18.0901, "step": 48331 }, { "epoch": 0.8834701226534082, "grad_norm": 7.9345495967847315, "learning_rate": 3.519658433663042e-07, "loss": 17.7647, "step": 48332 }, { "epoch": 0.8834884018498547, "grad_norm": 5.541429193355814, "learning_rate": 3.518567556416236e-07, "loss": 17.3442, "step": 48333 }, { "epoch": 0.8835066810463013, "grad_norm": 6.486839707323158, "learning_rate": 3.517476842082834e-07, "loss": 17.402, "step": 48334 }, { "epoch": 0.8835249602427477, "grad_norm": 6.254284920713767, "learning_rate": 3.516386290666629e-07, "loss": 17.5038, "step": 48335 }, { "epoch": 0.8835432394391942, "grad_norm": 5.350178987595372, "learning_rate": 3.515295902171462e-07, "loss": 17.1788, "step": 48336 }, { "epoch": 0.8835615186356408, "grad_norm": 7.202759111914754, "learning_rate": 3.514205676601151e-07, "loss": 17.9182, "step": 48337 }, { "epoch": 0.8835797978320873, "grad_norm": 6.909286044176537, "learning_rate": 3.5131156139595115e-07, "loss": 17.5569, "step": 48338 }, { "epoch": 0.8835980770285339, "grad_norm": 5.352476339605411, "learning_rate": 3.5120257142503725e-07, "loss": 17.0764, "step": 48339 }, { "epoch": 0.8836163562249804, "grad_norm": 4.828914528529991, "learning_rate": 3.5109359774775473e-07, "loss": 16.9132, "step": 48340 }, { "epoch": 0.8836346354214268, "grad_norm": 4.443992904259557, "learning_rate": 3.509846403644851e-07, "loss": 16.7883, "step": 48341 }, { "epoch": 0.8836529146178734, "grad_norm": 8.045031486841498, "learning_rate": 3.508756992756107e-07, "loss": 17.85, "step": 48342 }, { "epoch": 0.8836711938143199, "grad_norm": 7.169599649659628, "learning_rate": 3.5076677448151463e-07, "loss": 17.8315, "step": 48343 }, { "epoch": 0.8836894730107665, "grad_norm": 5.4985266636143395, "learning_rate": 3.5065786598257656e-07, "loss": 17.036, "step": 48344 }, { "epoch": 0.883707752207213, "grad_norm": 5.159106894596805, "learning_rate": 3.5054897377918006e-07, "loss": 16.9554, "step": 48345 }, { "epoch": 0.8837260314036595, "grad_norm": 6.537057919500488, "learning_rate": 3.5044009787170595e-07, "loss": 17.3266, "step": 48346 }, { "epoch": 0.883744310600106, "grad_norm": 8.007738420148378, "learning_rate": 3.503312382605356e-07, "loss": 17.8329, "step": 48347 }, { "epoch": 0.8837625897965525, "grad_norm": 6.0578728686485235, "learning_rate": 3.502223949460509e-07, "loss": 16.958, "step": 48348 }, { "epoch": 0.883780868992999, "grad_norm": 6.35349080618522, "learning_rate": 3.5011356792863274e-07, "loss": 17.6138, "step": 48349 }, { "epoch": 0.8837991481894456, "grad_norm": 5.851919426353539, "learning_rate": 3.500047572086634e-07, "loss": 17.3982, "step": 48350 }, { "epoch": 0.8838174273858921, "grad_norm": 5.775380108797563, "learning_rate": 3.498959627865234e-07, "loss": 17.2523, "step": 48351 }, { "epoch": 0.8838357065823387, "grad_norm": 6.276817638969975, "learning_rate": 3.4978718466259445e-07, "loss": 17.3312, "step": 48352 }, { "epoch": 0.8838539857787852, "grad_norm": 5.509134054273056, "learning_rate": 3.4967842283725907e-07, "loss": 17.077, "step": 48353 }, { "epoch": 0.8838722649752316, "grad_norm": 6.471027612331937, "learning_rate": 3.495696773108964e-07, "loss": 17.4435, "step": 48354 }, { "epoch": 0.8838905441716782, "grad_norm": 6.483095284843213, "learning_rate": 3.494609480838884e-07, "loss": 17.5593, "step": 48355 }, { "epoch": 0.8839088233681247, "grad_norm": 5.713487649690426, "learning_rate": 3.4935223515661697e-07, "loss": 17.2902, "step": 48356 }, { "epoch": 0.8839271025645713, "grad_norm": 6.408345153002523, "learning_rate": 3.4924353852946234e-07, "loss": 17.5483, "step": 48357 }, { "epoch": 0.8839453817610178, "grad_norm": 6.472311002456433, "learning_rate": 3.491348582028048e-07, "loss": 17.2722, "step": 48358 }, { "epoch": 0.8839636609574643, "grad_norm": 6.556868244192813, "learning_rate": 3.4902619417702566e-07, "loss": 17.5002, "step": 48359 }, { "epoch": 0.8839819401539109, "grad_norm": 5.712329727320967, "learning_rate": 3.489175464525069e-07, "loss": 17.1308, "step": 48360 }, { "epoch": 0.8840002193503573, "grad_norm": 5.81777043748361, "learning_rate": 3.488089150296281e-07, "loss": 17.1188, "step": 48361 }, { "epoch": 0.8840184985468039, "grad_norm": 6.274452659272277, "learning_rate": 3.487002999087713e-07, "loss": 17.3658, "step": 48362 }, { "epoch": 0.8840367777432504, "grad_norm": 6.2939967664724294, "learning_rate": 3.485917010903156e-07, "loss": 17.4006, "step": 48363 }, { "epoch": 0.8840550569396969, "grad_norm": 5.526376754338871, "learning_rate": 3.4848311857464233e-07, "loss": 17.1241, "step": 48364 }, { "epoch": 0.8840733361361435, "grad_norm": 5.95170111615559, "learning_rate": 3.4837455236213235e-07, "loss": 17.3847, "step": 48365 }, { "epoch": 0.88409161533259, "grad_norm": 6.6205046201695605, "learning_rate": 3.482660024531659e-07, "loss": 17.5248, "step": 48366 }, { "epoch": 0.8841098945290365, "grad_norm": 6.1784558508836325, "learning_rate": 3.4815746884812376e-07, "loss": 17.533, "step": 48367 }, { "epoch": 0.884128173725483, "grad_norm": 6.545143044579675, "learning_rate": 3.4804895154738504e-07, "loss": 17.4161, "step": 48368 }, { "epoch": 0.8841464529219295, "grad_norm": 7.2984974588366525, "learning_rate": 3.4794045055133117e-07, "loss": 17.3142, "step": 48369 }, { "epoch": 0.8841647321183761, "grad_norm": 6.021651317417034, "learning_rate": 3.4783196586034295e-07, "loss": 17.6416, "step": 48370 }, { "epoch": 0.8841830113148226, "grad_norm": 6.626920830768433, "learning_rate": 3.477234974748006e-07, "loss": 17.1803, "step": 48371 }, { "epoch": 0.8842012905112692, "grad_norm": 5.535477047415491, "learning_rate": 3.4761504539508217e-07, "loss": 17.138, "step": 48372 }, { "epoch": 0.8842195697077156, "grad_norm": 6.640576536211291, "learning_rate": 3.4750660962157067e-07, "loss": 17.4756, "step": 48373 }, { "epoch": 0.8842378489041621, "grad_norm": 6.115269241343679, "learning_rate": 3.473981901546447e-07, "loss": 17.3831, "step": 48374 }, { "epoch": 0.8842561281006087, "grad_norm": 6.3346490161983615, "learning_rate": 3.472897869946834e-07, "loss": 17.4351, "step": 48375 }, { "epoch": 0.8842744072970552, "grad_norm": 5.948799762168085, "learning_rate": 3.4718140014206814e-07, "loss": 17.2199, "step": 48376 }, { "epoch": 0.8842926864935018, "grad_norm": 5.122702325520418, "learning_rate": 3.4707302959717805e-07, "loss": 16.9002, "step": 48377 }, { "epoch": 0.8843109656899483, "grad_norm": 6.26145721315911, "learning_rate": 3.469646753603928e-07, "loss": 17.2931, "step": 48378 }, { "epoch": 0.8843292448863947, "grad_norm": 6.274268838604046, "learning_rate": 3.468563374320938e-07, "loss": 17.3997, "step": 48379 }, { "epoch": 0.8843475240828413, "grad_norm": 6.473165870443778, "learning_rate": 3.4674801581265903e-07, "loss": 17.2827, "step": 48380 }, { "epoch": 0.8843658032792878, "grad_norm": 6.4831989027820365, "learning_rate": 3.4663971050246825e-07, "loss": 17.3753, "step": 48381 }, { "epoch": 0.8843840824757344, "grad_norm": 6.0465904817895035, "learning_rate": 3.465314215019022e-07, "loss": 17.2238, "step": 48382 }, { "epoch": 0.8844023616721809, "grad_norm": 6.765144890989144, "learning_rate": 3.4642314881133896e-07, "loss": 17.3608, "step": 48383 }, { "epoch": 0.8844206408686274, "grad_norm": 8.221538558933089, "learning_rate": 3.463148924311599e-07, "loss": 17.6728, "step": 48384 }, { "epoch": 0.884438920065074, "grad_norm": 9.467013885126665, "learning_rate": 3.46206652361743e-07, "loss": 18.7031, "step": 48385 }, { "epoch": 0.8844571992615204, "grad_norm": 7.9032557247253825, "learning_rate": 3.460984286034669e-07, "loss": 17.8321, "step": 48386 }, { "epoch": 0.884475478457967, "grad_norm": 6.776837618974799, "learning_rate": 3.459902211567123e-07, "loss": 17.3628, "step": 48387 }, { "epoch": 0.8844937576544135, "grad_norm": 7.406786053217641, "learning_rate": 3.458820300218585e-07, "loss": 17.5507, "step": 48388 }, { "epoch": 0.88451203685086, "grad_norm": 6.958248185216304, "learning_rate": 3.45773855199284e-07, "loss": 17.6135, "step": 48389 }, { "epoch": 0.8845303160473066, "grad_norm": 6.856177409207268, "learning_rate": 3.45665696689369e-07, "loss": 17.1489, "step": 48390 }, { "epoch": 0.8845485952437531, "grad_norm": 5.161114397111831, "learning_rate": 3.4555755449249164e-07, "loss": 16.9762, "step": 48391 }, { "epoch": 0.8845668744401997, "grad_norm": 7.340273782140591, "learning_rate": 3.454494286090304e-07, "loss": 17.8402, "step": 48392 }, { "epoch": 0.8845851536366461, "grad_norm": 6.900604791675076, "learning_rate": 3.453413190393656e-07, "loss": 17.7502, "step": 48393 }, { "epoch": 0.8846034328330926, "grad_norm": 6.991162167841747, "learning_rate": 3.4523322578387587e-07, "loss": 17.4059, "step": 48394 }, { "epoch": 0.8846217120295392, "grad_norm": 8.221000968057881, "learning_rate": 3.4512514884293913e-07, "loss": 17.744, "step": 48395 }, { "epoch": 0.8846399912259857, "grad_norm": 6.022473494221638, "learning_rate": 3.450170882169346e-07, "loss": 17.1081, "step": 48396 }, { "epoch": 0.8846582704224323, "grad_norm": 6.244667624824329, "learning_rate": 3.449090439062408e-07, "loss": 17.035, "step": 48397 }, { "epoch": 0.8846765496188788, "grad_norm": 7.7998335170656645, "learning_rate": 3.4480101591123804e-07, "loss": 17.4901, "step": 48398 }, { "epoch": 0.8846948288153252, "grad_norm": 5.6110361801939, "learning_rate": 3.446930042323038e-07, "loss": 17.181, "step": 48399 }, { "epoch": 0.8847131080117718, "grad_norm": 6.479488344028005, "learning_rate": 3.4458500886981606e-07, "loss": 17.3988, "step": 48400 }, { "epoch": 0.8847313872082183, "grad_norm": 4.7402251602918835, "learning_rate": 3.4447702982415453e-07, "loss": 16.7072, "step": 48401 }, { "epoch": 0.8847496664046649, "grad_norm": 8.005117615904814, "learning_rate": 3.4436906709569673e-07, "loss": 18.4758, "step": 48402 }, { "epoch": 0.8847679456011114, "grad_norm": 6.832249404937563, "learning_rate": 3.4426112068482066e-07, "loss": 17.2139, "step": 48403 }, { "epoch": 0.8847862247975579, "grad_norm": 5.226871223655617, "learning_rate": 3.44153190591906e-07, "loss": 17.1518, "step": 48404 }, { "epoch": 0.8848045039940045, "grad_norm": 8.825503722987657, "learning_rate": 3.4404527681733023e-07, "loss": 17.3859, "step": 48405 }, { "epoch": 0.8848227831904509, "grad_norm": 6.9842707326102955, "learning_rate": 3.4393737936147143e-07, "loss": 17.2129, "step": 48406 }, { "epoch": 0.8848410623868975, "grad_norm": 6.748784817942368, "learning_rate": 3.438294982247087e-07, "loss": 17.7496, "step": 48407 }, { "epoch": 0.884859341583344, "grad_norm": 6.317789416983785, "learning_rate": 3.4372163340741946e-07, "loss": 17.5381, "step": 48408 }, { "epoch": 0.8848776207797905, "grad_norm": 6.044548350068516, "learning_rate": 3.436137849099813e-07, "loss": 17.2317, "step": 48409 }, { "epoch": 0.8848958999762371, "grad_norm": 6.065491351492634, "learning_rate": 3.435059527327733e-07, "loss": 17.194, "step": 48410 }, { "epoch": 0.8849141791726836, "grad_norm": 5.854314928336347, "learning_rate": 3.43398136876173e-07, "loss": 17.3118, "step": 48411 }, { "epoch": 0.8849324583691301, "grad_norm": 8.884597561286094, "learning_rate": 3.4329033734055717e-07, "loss": 18.0698, "step": 48412 }, { "epoch": 0.8849507375655766, "grad_norm": 6.715723661614604, "learning_rate": 3.431825541263051e-07, "loss": 17.6203, "step": 48413 }, { "epoch": 0.8849690167620231, "grad_norm": 6.253846090456774, "learning_rate": 3.430747872337947e-07, "loss": 17.0108, "step": 48414 }, { "epoch": 0.8849872959584697, "grad_norm": 6.515485567011781, "learning_rate": 3.429670366634019e-07, "loss": 17.6155, "step": 48415 }, { "epoch": 0.8850055751549162, "grad_norm": 5.90701534617077, "learning_rate": 3.4285930241550693e-07, "loss": 16.7289, "step": 48416 }, { "epoch": 0.8850238543513627, "grad_norm": 5.568235633770576, "learning_rate": 3.427515844904844e-07, "loss": 17.2543, "step": 48417 }, { "epoch": 0.8850421335478093, "grad_norm": 7.362739059928539, "learning_rate": 3.4264388288871465e-07, "loss": 17.6609, "step": 48418 }, { "epoch": 0.8850604127442557, "grad_norm": 5.658920893358788, "learning_rate": 3.42536197610574e-07, "loss": 16.9835, "step": 48419 }, { "epoch": 0.8850786919407023, "grad_norm": 6.25776241553555, "learning_rate": 3.424285286564388e-07, "loss": 16.8376, "step": 48420 }, { "epoch": 0.8850969711371488, "grad_norm": 5.443004514871006, "learning_rate": 3.4232087602668886e-07, "loss": 17.0115, "step": 48421 }, { "epoch": 0.8851152503335953, "grad_norm": 9.100093582047332, "learning_rate": 3.4221323972169874e-07, "loss": 18.4629, "step": 48422 }, { "epoch": 0.8851335295300419, "grad_norm": 7.549907752670494, "learning_rate": 3.421056197418471e-07, "loss": 17.4552, "step": 48423 }, { "epoch": 0.8851518087264884, "grad_norm": 5.605577457643443, "learning_rate": 3.419980160875119e-07, "loss": 17.1711, "step": 48424 }, { "epoch": 0.885170087922935, "grad_norm": 5.145489649010765, "learning_rate": 3.4189042875906907e-07, "loss": 16.8196, "step": 48425 }, { "epoch": 0.8851883671193814, "grad_norm": 8.734505381873538, "learning_rate": 3.41782857756896e-07, "loss": 18.4488, "step": 48426 }, { "epoch": 0.8852066463158279, "grad_norm": 5.87294909595572, "learning_rate": 3.416753030813702e-07, "loss": 17.2458, "step": 48427 }, { "epoch": 0.8852249255122745, "grad_norm": 6.842890090775229, "learning_rate": 3.4156776473286746e-07, "loss": 17.7327, "step": 48428 }, { "epoch": 0.885243204708721, "grad_norm": 5.890572501939262, "learning_rate": 3.4146024271176636e-07, "loss": 17.4335, "step": 48429 }, { "epoch": 0.8852614839051676, "grad_norm": 5.111014246995685, "learning_rate": 3.413527370184427e-07, "loss": 17.0282, "step": 48430 }, { "epoch": 0.885279763101614, "grad_norm": 5.419259198572571, "learning_rate": 3.412452476532724e-07, "loss": 17.1888, "step": 48431 }, { "epoch": 0.8852980422980605, "grad_norm": 6.131395666218696, "learning_rate": 3.4113777461663387e-07, "loss": 17.1081, "step": 48432 }, { "epoch": 0.8853163214945071, "grad_norm": 10.789293685020736, "learning_rate": 3.4103031790890364e-07, "loss": 17.3515, "step": 48433 }, { "epoch": 0.8853346006909536, "grad_norm": 5.738600623933628, "learning_rate": 3.4092287753045684e-07, "loss": 17.1584, "step": 48434 }, { "epoch": 0.8853528798874002, "grad_norm": 6.883813923409903, "learning_rate": 3.408154534816721e-07, "loss": 17.9282, "step": 48435 }, { "epoch": 0.8853711590838467, "grad_norm": 5.122376010976388, "learning_rate": 3.4070804576292473e-07, "loss": 16.8539, "step": 48436 }, { "epoch": 0.8853894382802932, "grad_norm": 5.961395260441832, "learning_rate": 3.40600654374591e-07, "loss": 17.2453, "step": 48437 }, { "epoch": 0.8854077174767397, "grad_norm": 4.9744908758854605, "learning_rate": 3.404932793170479e-07, "loss": 17.0278, "step": 48438 }, { "epoch": 0.8854259966731862, "grad_norm": 6.908215498232127, "learning_rate": 3.4038592059067177e-07, "loss": 17.4944, "step": 48439 }, { "epoch": 0.8854442758696328, "grad_norm": 7.181837665866405, "learning_rate": 3.402785781958379e-07, "loss": 17.574, "step": 48440 }, { "epoch": 0.8854625550660793, "grad_norm": 6.693180163386111, "learning_rate": 3.4017125213292313e-07, "loss": 17.4226, "step": 48441 }, { "epoch": 0.8854808342625258, "grad_norm": 5.924020374783709, "learning_rate": 3.4006394240230443e-07, "loss": 17.2402, "step": 48442 }, { "epoch": 0.8854991134589724, "grad_norm": 5.6318326607347, "learning_rate": 3.399566490043571e-07, "loss": 17.2661, "step": 48443 }, { "epoch": 0.8855173926554188, "grad_norm": 5.0823106104510085, "learning_rate": 3.3984937193945743e-07, "loss": 16.945, "step": 48444 }, { "epoch": 0.8855356718518654, "grad_norm": 6.475349160901917, "learning_rate": 3.397421112079807e-07, "loss": 17.3999, "step": 48445 }, { "epoch": 0.8855539510483119, "grad_norm": 6.873985142619986, "learning_rate": 3.3963486681030443e-07, "loss": 17.2346, "step": 48446 }, { "epoch": 0.8855722302447584, "grad_norm": 7.317325498637379, "learning_rate": 3.3952763874680325e-07, "loss": 17.5809, "step": 48447 }, { "epoch": 0.885590509441205, "grad_norm": 5.379966300882997, "learning_rate": 3.394204270178525e-07, "loss": 17.1615, "step": 48448 }, { "epoch": 0.8856087886376515, "grad_norm": 6.60248370502208, "learning_rate": 3.3931323162383014e-07, "loss": 17.4592, "step": 48449 }, { "epoch": 0.8856270678340981, "grad_norm": 7.928475091247122, "learning_rate": 3.392060525651092e-07, "loss": 17.4628, "step": 48450 }, { "epoch": 0.8856453470305445, "grad_norm": 7.266561143784089, "learning_rate": 3.3909888984206664e-07, "loss": 17.3613, "step": 48451 }, { "epoch": 0.885663626226991, "grad_norm": 4.944877875617535, "learning_rate": 3.389917434550788e-07, "loss": 16.7778, "step": 48452 }, { "epoch": 0.8856819054234376, "grad_norm": 4.667413688948223, "learning_rate": 3.388846134045204e-07, "loss": 16.9321, "step": 48453 }, { "epoch": 0.8857001846198841, "grad_norm": 4.622307870513699, "learning_rate": 3.387774996907661e-07, "loss": 16.7196, "step": 48454 }, { "epoch": 0.8857184638163307, "grad_norm": 5.480662930463188, "learning_rate": 3.386704023141929e-07, "loss": 17.2145, "step": 48455 }, { "epoch": 0.8857367430127772, "grad_norm": 9.596009469043103, "learning_rate": 3.38563321275176e-07, "loss": 18.4931, "step": 48456 }, { "epoch": 0.8857550222092236, "grad_norm": 5.77746128199142, "learning_rate": 3.3845625657408897e-07, "loss": 17.149, "step": 48457 }, { "epoch": 0.8857733014056702, "grad_norm": 6.552014165030612, "learning_rate": 3.383492082113088e-07, "loss": 17.6939, "step": 48458 }, { "epoch": 0.8857915806021167, "grad_norm": 4.651628011802951, "learning_rate": 3.382421761872096e-07, "loss": 16.8238, "step": 48459 }, { "epoch": 0.8858098597985633, "grad_norm": 4.529008786453863, "learning_rate": 3.381351605021671e-07, "loss": 16.7705, "step": 48460 }, { "epoch": 0.8858281389950098, "grad_norm": 6.757010994604303, "learning_rate": 3.380281611565572e-07, "loss": 17.2257, "step": 48461 }, { "epoch": 0.8858464181914563, "grad_norm": 8.796877968505042, "learning_rate": 3.37921178150753e-07, "loss": 18.4217, "step": 48462 }, { "epoch": 0.8858646973879029, "grad_norm": 7.098468349675852, "learning_rate": 3.3781421148513125e-07, "loss": 17.6351, "step": 48463 }, { "epoch": 0.8858829765843493, "grad_norm": 6.799797387749322, "learning_rate": 3.3770726116006624e-07, "loss": 17.3117, "step": 48464 }, { "epoch": 0.8859012557807959, "grad_norm": 5.62091701005047, "learning_rate": 3.3760032717593205e-07, "loss": 17.4995, "step": 48465 }, { "epoch": 0.8859195349772424, "grad_norm": 8.884940469405329, "learning_rate": 3.3749340953310503e-07, "loss": 17.8641, "step": 48466 }, { "epoch": 0.8859378141736889, "grad_norm": 6.987369629780425, "learning_rate": 3.373865082319588e-07, "loss": 17.5722, "step": 48467 }, { "epoch": 0.8859560933701355, "grad_norm": 6.2498626884657655, "learning_rate": 3.3727962327286756e-07, "loss": 17.3749, "step": 48468 }, { "epoch": 0.885974372566582, "grad_norm": 6.807456712899186, "learning_rate": 3.3717275465620646e-07, "loss": 17.7186, "step": 48469 }, { "epoch": 0.8859926517630285, "grad_norm": 5.60040967229546, "learning_rate": 3.3706590238235083e-07, "loss": 17.0614, "step": 48470 }, { "epoch": 0.886010930959475, "grad_norm": 5.315344340945034, "learning_rate": 3.369590664516742e-07, "loss": 17.1047, "step": 48471 }, { "epoch": 0.8860292101559215, "grad_norm": 6.09608507624977, "learning_rate": 3.3685224686455244e-07, "loss": 17.4269, "step": 48472 }, { "epoch": 0.8860474893523681, "grad_norm": 5.081082539616897, "learning_rate": 3.367454436213585e-07, "loss": 16.8799, "step": 48473 }, { "epoch": 0.8860657685488146, "grad_norm": 7.258300004435648, "learning_rate": 3.366386567224661e-07, "loss": 17.5767, "step": 48474 }, { "epoch": 0.8860840477452612, "grad_norm": 6.298443812912119, "learning_rate": 3.365318861682515e-07, "loss": 17.1067, "step": 48475 }, { "epoch": 0.8861023269417077, "grad_norm": 6.479920695120896, "learning_rate": 3.364251319590872e-07, "loss": 17.2577, "step": 48476 }, { "epoch": 0.8861206061381541, "grad_norm": 5.4572419996211385, "learning_rate": 3.3631839409534905e-07, "loss": 17.0722, "step": 48477 }, { "epoch": 0.8861388853346007, "grad_norm": 7.479361948084394, "learning_rate": 3.362116725774095e-07, "loss": 17.5749, "step": 48478 }, { "epoch": 0.8861571645310472, "grad_norm": 8.735196843569934, "learning_rate": 3.3610496740564325e-07, "loss": 17.8683, "step": 48479 }, { "epoch": 0.8861754437274938, "grad_norm": 5.647297747934123, "learning_rate": 3.35998278580425e-07, "loss": 17.1317, "step": 48480 }, { "epoch": 0.8861937229239403, "grad_norm": 6.31416568717285, "learning_rate": 3.358916061021283e-07, "loss": 17.1076, "step": 48481 }, { "epoch": 0.8862120021203868, "grad_norm": 5.091789495507386, "learning_rate": 3.357849499711258e-07, "loss": 16.7696, "step": 48482 }, { "epoch": 0.8862302813168333, "grad_norm": 4.841809840487192, "learning_rate": 3.356783101877936e-07, "loss": 16.9814, "step": 48483 }, { "epoch": 0.8862485605132798, "grad_norm": 5.459979260581431, "learning_rate": 3.355716867525033e-07, "loss": 16.8545, "step": 48484 }, { "epoch": 0.8862668397097263, "grad_norm": 7.570341558151901, "learning_rate": 3.354650796656295e-07, "loss": 17.8341, "step": 48485 }, { "epoch": 0.8862851189061729, "grad_norm": 6.268884021194691, "learning_rate": 3.353584889275452e-07, "loss": 17.2276, "step": 48486 }, { "epoch": 0.8863033981026194, "grad_norm": 6.069543271933602, "learning_rate": 3.352519145386257e-07, "loss": 17.2138, "step": 48487 }, { "epoch": 0.886321677299066, "grad_norm": 5.211345278613974, "learning_rate": 3.3514535649924294e-07, "loss": 16.9, "step": 48488 }, { "epoch": 0.8863399564955124, "grad_norm": 10.546107495831254, "learning_rate": 3.3503881480977107e-07, "loss": 17.3225, "step": 48489 }, { "epoch": 0.8863582356919589, "grad_norm": 5.952023889300436, "learning_rate": 3.349322894705831e-07, "loss": 17.2424, "step": 48490 }, { "epoch": 0.8863765148884055, "grad_norm": 4.784214609871204, "learning_rate": 3.3482578048205315e-07, "loss": 16.7815, "step": 48491 }, { "epoch": 0.886394794084852, "grad_norm": 5.6600697846000525, "learning_rate": 3.347192878445543e-07, "loss": 16.9674, "step": 48492 }, { "epoch": 0.8864130732812986, "grad_norm": 6.008055322326151, "learning_rate": 3.346128115584585e-07, "loss": 17.4018, "step": 48493 }, { "epoch": 0.8864313524777451, "grad_norm": 6.824453461191961, "learning_rate": 3.3450635162414035e-07, "loss": 17.7654, "step": 48494 }, { "epoch": 0.8864496316741916, "grad_norm": 6.80534915108485, "learning_rate": 3.343999080419724e-07, "loss": 17.5514, "step": 48495 }, { "epoch": 0.8864679108706381, "grad_norm": 6.0576562845534605, "learning_rate": 3.342934808123277e-07, "loss": 17.1372, "step": 48496 }, { "epoch": 0.8864861900670846, "grad_norm": 6.540596515691316, "learning_rate": 3.3418706993558035e-07, "loss": 17.4258, "step": 48497 }, { "epoch": 0.8865044692635312, "grad_norm": 4.790528145735593, "learning_rate": 3.3408067541210233e-07, "loss": 16.7845, "step": 48498 }, { "epoch": 0.8865227484599777, "grad_norm": 7.082226760778057, "learning_rate": 3.33974297242266e-07, "loss": 17.425, "step": 48499 }, { "epoch": 0.8865410276564242, "grad_norm": 8.701913591851513, "learning_rate": 3.3386793542644513e-07, "loss": 18.0245, "step": 48500 }, { "epoch": 0.8865593068528708, "grad_norm": 6.404295694005169, "learning_rate": 3.337615899650126e-07, "loss": 17.2697, "step": 48501 }, { "epoch": 0.8865775860493172, "grad_norm": 9.317791778783315, "learning_rate": 3.336552608583404e-07, "loss": 18.1256, "step": 48502 }, { "epoch": 0.8865958652457638, "grad_norm": 7.54717489744766, "learning_rate": 3.335489481068016e-07, "loss": 17.4295, "step": 48503 }, { "epoch": 0.8866141444422103, "grad_norm": 5.507544737862609, "learning_rate": 3.334426517107686e-07, "loss": 17.0871, "step": 48504 }, { "epoch": 0.8866324236386568, "grad_norm": 8.38000076732694, "learning_rate": 3.3333637167061394e-07, "loss": 17.9865, "step": 48505 }, { "epoch": 0.8866507028351034, "grad_norm": 4.984193072209047, "learning_rate": 3.3323010798671116e-07, "loss": 16.8401, "step": 48506 }, { "epoch": 0.8866689820315499, "grad_norm": 5.216946595985001, "learning_rate": 3.331238606594311e-07, "loss": 16.8144, "step": 48507 }, { "epoch": 0.8866872612279965, "grad_norm": 5.63935433749886, "learning_rate": 3.33017629689148e-07, "loss": 17.1454, "step": 48508 }, { "epoch": 0.8867055404244429, "grad_norm": 9.643598360793426, "learning_rate": 3.3291141507623247e-07, "loss": 17.6178, "step": 48509 }, { "epoch": 0.8867238196208894, "grad_norm": 5.221164522313789, "learning_rate": 3.3280521682105713e-07, "loss": 16.9033, "step": 48510 }, { "epoch": 0.886742098817336, "grad_norm": 5.758526591419331, "learning_rate": 3.326990349239956e-07, "loss": 17.4106, "step": 48511 }, { "epoch": 0.8867603780137825, "grad_norm": 6.661340627271774, "learning_rate": 3.325928693854186e-07, "loss": 17.4007, "step": 48512 }, { "epoch": 0.8867786572102291, "grad_norm": 6.875494872560684, "learning_rate": 3.324867202056975e-07, "loss": 17.1599, "step": 48513 }, { "epoch": 0.8867969364066756, "grad_norm": 5.971665091064759, "learning_rate": 3.323805873852054e-07, "loss": 17.2498, "step": 48514 }, { "epoch": 0.886815215603122, "grad_norm": 5.792610473492951, "learning_rate": 3.322744709243153e-07, "loss": 17.1267, "step": 48515 }, { "epoch": 0.8868334947995686, "grad_norm": 4.82715892595515, "learning_rate": 3.321683708233975e-07, "loss": 16.7886, "step": 48516 }, { "epoch": 0.8868517739960151, "grad_norm": 5.062843364096745, "learning_rate": 3.320622870828255e-07, "loss": 16.939, "step": 48517 }, { "epoch": 0.8868700531924617, "grad_norm": 7.750053479411663, "learning_rate": 3.3195621970296965e-07, "loss": 17.9342, "step": 48518 }, { "epoch": 0.8868883323889082, "grad_norm": 5.404777810921568, "learning_rate": 3.3185016868420126e-07, "loss": 17.1959, "step": 48519 }, { "epoch": 0.8869066115853547, "grad_norm": 7.4628887445104795, "learning_rate": 3.3174413402689456e-07, "loss": 17.4638, "step": 48520 }, { "epoch": 0.8869248907818013, "grad_norm": 6.22844636194999, "learning_rate": 3.31638115731418e-07, "loss": 17.7151, "step": 48521 }, { "epoch": 0.8869431699782477, "grad_norm": 6.0090466823445245, "learning_rate": 3.3153211379814585e-07, "loss": 17.1779, "step": 48522 }, { "epoch": 0.8869614491746943, "grad_norm": 7.134914664597371, "learning_rate": 3.314261282274478e-07, "loss": 17.2908, "step": 48523 }, { "epoch": 0.8869797283711408, "grad_norm": 5.95335525706409, "learning_rate": 3.313201590196957e-07, "loss": 17.1034, "step": 48524 }, { "epoch": 0.8869980075675873, "grad_norm": 7.058396879125376, "learning_rate": 3.312142061752621e-07, "loss": 17.0988, "step": 48525 }, { "epoch": 0.8870162867640339, "grad_norm": 5.924090274520811, "learning_rate": 3.311082696945178e-07, "loss": 17.4169, "step": 48526 }, { "epoch": 0.8870345659604804, "grad_norm": 5.7510274430496775, "learning_rate": 3.3100234957783305e-07, "loss": 17.2255, "step": 48527 }, { "epoch": 0.887052845156927, "grad_norm": 6.436432605123067, "learning_rate": 3.308964458255809e-07, "loss": 17.512, "step": 48528 }, { "epoch": 0.8870711243533734, "grad_norm": 6.486736746203364, "learning_rate": 3.307905584381316e-07, "loss": 17.6917, "step": 48529 }, { "epoch": 0.8870894035498199, "grad_norm": 6.0497625823624475, "learning_rate": 3.306846874158548e-07, "loss": 17.0157, "step": 48530 }, { "epoch": 0.8871076827462665, "grad_norm": 5.165108304234508, "learning_rate": 3.3057883275912415e-07, "loss": 17.0795, "step": 48531 }, { "epoch": 0.887125961942713, "grad_norm": 6.554751175849875, "learning_rate": 3.304729944683088e-07, "loss": 17.1616, "step": 48532 }, { "epoch": 0.8871442411391596, "grad_norm": 5.434787673934762, "learning_rate": 3.3036717254378014e-07, "loss": 17.1189, "step": 48533 }, { "epoch": 0.887162520335606, "grad_norm": 6.793263041839589, "learning_rate": 3.302613669859106e-07, "loss": 17.5993, "step": 48534 }, { "epoch": 0.8871807995320525, "grad_norm": 5.199883111592137, "learning_rate": 3.3015557779506825e-07, "loss": 16.9993, "step": 48535 }, { "epoch": 0.8871990787284991, "grad_norm": 7.1487732787935885, "learning_rate": 3.300498049716266e-07, "loss": 17.58, "step": 48536 }, { "epoch": 0.8872173579249456, "grad_norm": 6.1195426231671215, "learning_rate": 3.2994404851595553e-07, "loss": 17.2411, "step": 48537 }, { "epoch": 0.8872356371213922, "grad_norm": 6.145885355381693, "learning_rate": 3.29838308428424e-07, "loss": 17.0899, "step": 48538 }, { "epoch": 0.8872539163178387, "grad_norm": 8.787103020495275, "learning_rate": 3.2973258470940463e-07, "loss": 17.6266, "step": 48539 }, { "epoch": 0.8872721955142852, "grad_norm": 6.048377091724623, "learning_rate": 3.2962687735926703e-07, "loss": 17.2379, "step": 48540 }, { "epoch": 0.8872904747107317, "grad_norm": 7.947709964417533, "learning_rate": 3.29521186378382e-07, "loss": 18.0995, "step": 48541 }, { "epoch": 0.8873087539071782, "grad_norm": 6.687354893737671, "learning_rate": 3.2941551176711996e-07, "loss": 17.1645, "step": 48542 }, { "epoch": 0.8873270331036248, "grad_norm": 5.754362205899727, "learning_rate": 3.293098535258515e-07, "loss": 17.1581, "step": 48543 }, { "epoch": 0.8873453123000713, "grad_norm": 5.934384221812531, "learning_rate": 3.29204211654946e-07, "loss": 17.354, "step": 48544 }, { "epoch": 0.8873635914965178, "grad_norm": 5.858712393574924, "learning_rate": 3.290985861547752e-07, "loss": 17.1108, "step": 48545 }, { "epoch": 0.8873818706929644, "grad_norm": 6.2051338512184575, "learning_rate": 3.2899297702570833e-07, "loss": 16.9776, "step": 48546 }, { "epoch": 0.8874001498894108, "grad_norm": 6.195801790571358, "learning_rate": 3.2888738426811564e-07, "loss": 17.2778, "step": 48547 }, { "epoch": 0.8874184290858574, "grad_norm": 6.286811698597674, "learning_rate": 3.2878180788236735e-07, "loss": 17.225, "step": 48548 }, { "epoch": 0.8874367082823039, "grad_norm": 6.554853870185566, "learning_rate": 3.2867624786883324e-07, "loss": 17.5002, "step": 48549 }, { "epoch": 0.8874549874787504, "grad_norm": 6.138984887529273, "learning_rate": 3.2857070422788293e-07, "loss": 17.3887, "step": 48550 }, { "epoch": 0.887473266675197, "grad_norm": 6.120879252998588, "learning_rate": 3.2846517695988844e-07, "loss": 17.3669, "step": 48551 }, { "epoch": 0.8874915458716435, "grad_norm": 5.58155144073937, "learning_rate": 3.2835966606521663e-07, "loss": 17.27, "step": 48552 }, { "epoch": 0.88750982506809, "grad_norm": 5.985293112427708, "learning_rate": 3.2825417154424e-07, "loss": 17.4449, "step": 48553 }, { "epoch": 0.8875281042645365, "grad_norm": 6.492104501936464, "learning_rate": 3.281486933973271e-07, "loss": 17.4988, "step": 48554 }, { "epoch": 0.887546383460983, "grad_norm": 7.621301088334787, "learning_rate": 3.280432316248472e-07, "loss": 17.7629, "step": 48555 }, { "epoch": 0.8875646626574296, "grad_norm": 5.920031627416165, "learning_rate": 3.2793778622717045e-07, "loss": 17.0929, "step": 48556 }, { "epoch": 0.8875829418538761, "grad_norm": 6.245513187411865, "learning_rate": 3.278323572046666e-07, "loss": 17.5519, "step": 48557 }, { "epoch": 0.8876012210503226, "grad_norm": 6.640915526131225, "learning_rate": 3.2772694455770414e-07, "loss": 17.071, "step": 48558 }, { "epoch": 0.8876195002467692, "grad_norm": 4.633251840296265, "learning_rate": 3.276215482866535e-07, "loss": 16.6761, "step": 48559 }, { "epoch": 0.8876377794432156, "grad_norm": 6.274835735148741, "learning_rate": 3.2751616839188427e-07, "loss": 17.2838, "step": 48560 }, { "epoch": 0.8876560586396622, "grad_norm": 6.847803413080686, "learning_rate": 3.274108048737651e-07, "loss": 17.6357, "step": 48561 }, { "epoch": 0.8876743378361087, "grad_norm": 7.518639797862369, "learning_rate": 3.273054577326662e-07, "loss": 17.928, "step": 48562 }, { "epoch": 0.8876926170325552, "grad_norm": 6.555682366776276, "learning_rate": 3.2720012696895566e-07, "loss": 17.8317, "step": 48563 }, { "epoch": 0.8877108962290018, "grad_norm": 4.950145511579947, "learning_rate": 3.2709481258300313e-07, "loss": 16.8773, "step": 48564 }, { "epoch": 0.8877291754254483, "grad_norm": 7.675473949968162, "learning_rate": 3.2698951457517833e-07, "loss": 18.0172, "step": 48565 }, { "epoch": 0.8877474546218949, "grad_norm": 6.792519099751809, "learning_rate": 3.268842329458488e-07, "loss": 17.2901, "step": 48566 }, { "epoch": 0.8877657338183413, "grad_norm": 6.266461389060173, "learning_rate": 3.2677896769538533e-07, "loss": 17.5533, "step": 48567 }, { "epoch": 0.8877840130147878, "grad_norm": 7.420119245632325, "learning_rate": 3.266737188241553e-07, "loss": 17.6879, "step": 48568 }, { "epoch": 0.8878022922112344, "grad_norm": 4.331192172728436, "learning_rate": 3.2656848633252857e-07, "loss": 17.3495, "step": 48569 }, { "epoch": 0.8878205714076809, "grad_norm": 6.514242214497785, "learning_rate": 3.264632702208748e-07, "loss": 17.2287, "step": 48570 }, { "epoch": 0.8878388506041275, "grad_norm": 6.287374312551695, "learning_rate": 3.263580704895608e-07, "loss": 17.999, "step": 48571 }, { "epoch": 0.887857129800574, "grad_norm": 5.445872036234453, "learning_rate": 3.2625288713895643e-07, "loss": 16.9495, "step": 48572 }, { "epoch": 0.8878754089970204, "grad_norm": 6.542543166159101, "learning_rate": 3.261477201694302e-07, "loss": 17.5842, "step": 48573 }, { "epoch": 0.887893688193467, "grad_norm": 4.574149076190663, "learning_rate": 3.260425695813507e-07, "loss": 16.638, "step": 48574 }, { "epoch": 0.8879119673899135, "grad_norm": 6.285808658198453, "learning_rate": 3.25937435375086e-07, "loss": 17.4561, "step": 48575 }, { "epoch": 0.8879302465863601, "grad_norm": 5.856194904070246, "learning_rate": 3.258323175510053e-07, "loss": 17.3037, "step": 48576 }, { "epoch": 0.8879485257828066, "grad_norm": 6.096185607736417, "learning_rate": 3.257272161094765e-07, "loss": 17.2205, "step": 48577 }, { "epoch": 0.8879668049792531, "grad_norm": 6.24047764748502, "learning_rate": 3.256221310508678e-07, "loss": 17.3431, "step": 48578 }, { "epoch": 0.8879850841756997, "grad_norm": 7.250475474398157, "learning_rate": 3.255170623755488e-07, "loss": 17.6873, "step": 48579 }, { "epoch": 0.8880033633721461, "grad_norm": 6.4403593098930845, "learning_rate": 3.254120100838865e-07, "loss": 17.4204, "step": 48580 }, { "epoch": 0.8880216425685927, "grad_norm": 6.079663030011995, "learning_rate": 3.2530697417624937e-07, "loss": 17.356, "step": 48581 }, { "epoch": 0.8880399217650392, "grad_norm": 6.952872381193845, "learning_rate": 3.252019546530061e-07, "loss": 17.646, "step": 48582 }, { "epoch": 0.8880582009614857, "grad_norm": 4.765226783669416, "learning_rate": 3.2509695151452357e-07, "loss": 16.7701, "step": 48583 }, { "epoch": 0.8880764801579323, "grad_norm": 6.380836673556503, "learning_rate": 3.24991964761171e-07, "loss": 17.5114, "step": 48584 }, { "epoch": 0.8880947593543788, "grad_norm": 7.587908224195642, "learning_rate": 3.248869943933164e-07, "loss": 17.6044, "step": 48585 }, { "epoch": 0.8881130385508254, "grad_norm": 9.1257228716495, "learning_rate": 3.247820404113261e-07, "loss": 17.918, "step": 48586 }, { "epoch": 0.8881313177472718, "grad_norm": 7.02868296110183, "learning_rate": 3.246771028155693e-07, "loss": 17.2084, "step": 48587 }, { "epoch": 0.8881495969437183, "grad_norm": 5.888761281785643, "learning_rate": 3.24572181606414e-07, "loss": 17.1751, "step": 48588 }, { "epoch": 0.8881678761401649, "grad_norm": 5.897268667053741, "learning_rate": 3.2446727678422663e-07, "loss": 17.0666, "step": 48589 }, { "epoch": 0.8881861553366114, "grad_norm": 6.245212438389577, "learning_rate": 3.2436238834937685e-07, "loss": 17.1445, "step": 48590 }, { "epoch": 0.888204434533058, "grad_norm": 8.028137266773696, "learning_rate": 3.24257516302231e-07, "loss": 18.3661, "step": 48591 }, { "epoch": 0.8882227137295045, "grad_norm": 5.913860711046682, "learning_rate": 3.2415266064315663e-07, "loss": 17.2361, "step": 48592 }, { "epoch": 0.8882409929259509, "grad_norm": 8.028019139423268, "learning_rate": 3.240478213725212e-07, "loss": 17.6647, "step": 48593 }, { "epoch": 0.8882592721223975, "grad_norm": 4.910933001515704, "learning_rate": 3.2394299849069333e-07, "loss": 16.7846, "step": 48594 }, { "epoch": 0.888277551318844, "grad_norm": 5.867492213793779, "learning_rate": 3.2383819199803824e-07, "loss": 17.0461, "step": 48595 }, { "epoch": 0.8882958305152906, "grad_norm": 6.753346423093278, "learning_rate": 3.237334018949245e-07, "loss": 17.3742, "step": 48596 }, { "epoch": 0.8883141097117371, "grad_norm": 6.03603139073681, "learning_rate": 3.2362862818171916e-07, "loss": 17.173, "step": 48597 }, { "epoch": 0.8883323889081836, "grad_norm": 6.670266687992889, "learning_rate": 3.2352387085879123e-07, "loss": 17.1891, "step": 48598 }, { "epoch": 0.8883506681046301, "grad_norm": 5.7762845643161524, "learning_rate": 3.234191299265055e-07, "loss": 17.1907, "step": 48599 }, { "epoch": 0.8883689473010766, "grad_norm": 6.538484926380013, "learning_rate": 3.233144053852294e-07, "loss": 17.347, "step": 48600 }, { "epoch": 0.8883872264975232, "grad_norm": 7.407781173005092, "learning_rate": 3.232096972353316e-07, "loss": 17.7545, "step": 48601 }, { "epoch": 0.8884055056939697, "grad_norm": 6.802736053021056, "learning_rate": 3.2310500547717784e-07, "loss": 17.6507, "step": 48602 }, { "epoch": 0.8884237848904162, "grad_norm": 6.36479671452199, "learning_rate": 3.2300033011113395e-07, "loss": 17.3856, "step": 48603 }, { "epoch": 0.8884420640868628, "grad_norm": 6.495849661703214, "learning_rate": 3.2289567113756914e-07, "loss": 17.5133, "step": 48604 }, { "epoch": 0.8884603432833093, "grad_norm": 7.838446915341042, "learning_rate": 3.2279102855684864e-07, "loss": 17.8616, "step": 48605 }, { "epoch": 0.8884786224797558, "grad_norm": 5.219148759237131, "learning_rate": 3.2268640236933936e-07, "loss": 16.8858, "step": 48606 }, { "epoch": 0.8884969016762023, "grad_norm": 9.691354638316499, "learning_rate": 3.2258179257540934e-07, "loss": 17.6469, "step": 48607 }, { "epoch": 0.8885151808726488, "grad_norm": 5.452303102446809, "learning_rate": 3.224771991754239e-07, "loss": 17.086, "step": 48608 }, { "epoch": 0.8885334600690954, "grad_norm": 6.692148023176314, "learning_rate": 3.223726221697493e-07, "loss": 17.7705, "step": 48609 }, { "epoch": 0.8885517392655419, "grad_norm": 6.0857213107893005, "learning_rate": 3.222680615587537e-07, "loss": 17.396, "step": 48610 }, { "epoch": 0.8885700184619885, "grad_norm": 10.226572844714726, "learning_rate": 3.2216351734280283e-07, "loss": 17.1979, "step": 48611 }, { "epoch": 0.888588297658435, "grad_norm": 5.552779474407712, "learning_rate": 3.22058989522262e-07, "loss": 16.9734, "step": 48612 }, { "epoch": 0.8886065768548814, "grad_norm": 5.771885178899497, "learning_rate": 3.2195447809749814e-07, "loss": 17.2164, "step": 48613 }, { "epoch": 0.888624856051328, "grad_norm": 8.016583754100186, "learning_rate": 3.2184998306887927e-07, "loss": 18.2378, "step": 48614 }, { "epoch": 0.8886431352477745, "grad_norm": 7.04485784530427, "learning_rate": 3.21745504436769e-07, "loss": 17.381, "step": 48615 }, { "epoch": 0.8886614144442211, "grad_norm": 5.8738969197397575, "learning_rate": 3.216410422015354e-07, "loss": 17.1963, "step": 48616 }, { "epoch": 0.8886796936406676, "grad_norm": 7.993670818102708, "learning_rate": 3.215365963635436e-07, "loss": 17.1842, "step": 48617 }, { "epoch": 0.888697972837114, "grad_norm": 6.663204062182949, "learning_rate": 3.2143216692316073e-07, "loss": 17.1477, "step": 48618 }, { "epoch": 0.8887162520335606, "grad_norm": 7.051065782622249, "learning_rate": 3.213277538807524e-07, "loss": 17.8815, "step": 48619 }, { "epoch": 0.8887345312300071, "grad_norm": 4.359784097256635, "learning_rate": 3.21223357236683e-07, "loss": 16.6573, "step": 48620 }, { "epoch": 0.8887528104264536, "grad_norm": 5.25577937620984, "learning_rate": 3.211189769913209e-07, "loss": 16.8513, "step": 48621 }, { "epoch": 0.8887710896229002, "grad_norm": 5.766665119118859, "learning_rate": 3.210146131450298e-07, "loss": 17.1109, "step": 48622 }, { "epoch": 0.8887893688193467, "grad_norm": 5.0273670602828, "learning_rate": 3.2091026569817664e-07, "loss": 16.7783, "step": 48623 }, { "epoch": 0.8888076480157933, "grad_norm": 5.737250618481388, "learning_rate": 3.208059346511272e-07, "loss": 17.3066, "step": 48624 }, { "epoch": 0.8888259272122397, "grad_norm": 6.303242316094916, "learning_rate": 3.207016200042473e-07, "loss": 17.4555, "step": 48625 }, { "epoch": 0.8888442064086862, "grad_norm": 6.3474277466361935, "learning_rate": 3.205973217579017e-07, "loss": 17.1208, "step": 48626 }, { "epoch": 0.8888624856051328, "grad_norm": 6.0570178198400235, "learning_rate": 3.204930399124567e-07, "loss": 17.3013, "step": 48627 }, { "epoch": 0.8888807648015793, "grad_norm": 5.82605077649616, "learning_rate": 3.203887744682771e-07, "loss": 17.2222, "step": 48628 }, { "epoch": 0.8888990439980259, "grad_norm": 8.826936469654166, "learning_rate": 3.202845254257292e-07, "loss": 18.2713, "step": 48629 }, { "epoch": 0.8889173231944724, "grad_norm": 5.666254988926635, "learning_rate": 3.2018029278517826e-07, "loss": 17.0886, "step": 48630 }, { "epoch": 0.8889356023909188, "grad_norm": 6.044684196015614, "learning_rate": 3.2007607654698846e-07, "loss": 17.164, "step": 48631 }, { "epoch": 0.8889538815873654, "grad_norm": 5.086042447248748, "learning_rate": 3.199718767115256e-07, "loss": 16.9316, "step": 48632 }, { "epoch": 0.8889721607838119, "grad_norm": 6.328835123156985, "learning_rate": 3.1986769327915666e-07, "loss": 17.3066, "step": 48633 }, { "epoch": 0.8889904399802585, "grad_norm": 5.032884073576756, "learning_rate": 3.197635262502441e-07, "loss": 16.945, "step": 48634 }, { "epoch": 0.889008719176705, "grad_norm": 5.343821643510824, "learning_rate": 3.196593756251548e-07, "loss": 16.9722, "step": 48635 }, { "epoch": 0.8890269983731515, "grad_norm": 5.119994243377798, "learning_rate": 3.1955524140425355e-07, "loss": 16.9764, "step": 48636 }, { "epoch": 0.8890452775695981, "grad_norm": 5.612687567477053, "learning_rate": 3.194511235879044e-07, "loss": 17.2415, "step": 48637 }, { "epoch": 0.8890635567660445, "grad_norm": 6.793742453978765, "learning_rate": 3.193470221764733e-07, "loss": 17.6461, "step": 48638 }, { "epoch": 0.8890818359624911, "grad_norm": 7.084789469399295, "learning_rate": 3.1924293717032483e-07, "loss": 17.632, "step": 48639 }, { "epoch": 0.8891001151589376, "grad_norm": 6.1133976755831325, "learning_rate": 3.191388685698232e-07, "loss": 17.678, "step": 48640 }, { "epoch": 0.8891183943553841, "grad_norm": 5.423600780285685, "learning_rate": 3.190348163753332e-07, "loss": 17.1489, "step": 48641 }, { "epoch": 0.8891366735518307, "grad_norm": 5.861051433754127, "learning_rate": 3.1893078058722103e-07, "loss": 17.192, "step": 48642 }, { "epoch": 0.8891549527482772, "grad_norm": 6.267033309179386, "learning_rate": 3.1882676120584934e-07, "loss": 17.3032, "step": 48643 }, { "epoch": 0.8891732319447238, "grad_norm": 5.865218476829381, "learning_rate": 3.187227582315844e-07, "loss": 17.2606, "step": 48644 }, { "epoch": 0.8891915111411702, "grad_norm": 4.882454490948528, "learning_rate": 3.186187716647893e-07, "loss": 17.1477, "step": 48645 }, { "epoch": 0.8892097903376167, "grad_norm": 4.378988359238647, "learning_rate": 3.1851480150583035e-07, "loss": 16.5677, "step": 48646 }, { "epoch": 0.8892280695340633, "grad_norm": 6.40099130529138, "learning_rate": 3.1841084775507016e-07, "loss": 17.5491, "step": 48647 }, { "epoch": 0.8892463487305098, "grad_norm": 7.105931696165459, "learning_rate": 3.183069104128733e-07, "loss": 17.3606, "step": 48648 }, { "epoch": 0.8892646279269564, "grad_norm": 5.726965681273778, "learning_rate": 3.1820298947960516e-07, "loss": 17.1506, "step": 48649 }, { "epoch": 0.8892829071234029, "grad_norm": 6.0622883243122, "learning_rate": 3.180990849556281e-07, "loss": 17.3048, "step": 48650 }, { "epoch": 0.8893011863198493, "grad_norm": 4.584019420251133, "learning_rate": 3.1799519684130807e-07, "loss": 16.7739, "step": 48651 }, { "epoch": 0.8893194655162959, "grad_norm": 4.990812453557787, "learning_rate": 3.1789132513700915e-07, "loss": 16.9119, "step": 48652 }, { "epoch": 0.8893377447127424, "grad_norm": 7.956508951234336, "learning_rate": 3.1778746984309437e-07, "loss": 17.6178, "step": 48653 }, { "epoch": 0.889356023909189, "grad_norm": 6.903731979348746, "learning_rate": 3.1768363095992794e-07, "loss": 17.5378, "step": 48654 }, { "epoch": 0.8893743031056355, "grad_norm": 4.947488972245695, "learning_rate": 3.1757980848787505e-07, "loss": 16.8465, "step": 48655 }, { "epoch": 0.889392582302082, "grad_norm": 5.4346108828802215, "learning_rate": 3.174760024272983e-07, "loss": 17.0295, "step": 48656 }, { "epoch": 0.8894108614985285, "grad_norm": 6.203048455174732, "learning_rate": 3.173722127785611e-07, "loss": 17.5033, "step": 48657 }, { "epoch": 0.889429140694975, "grad_norm": 7.556443345661584, "learning_rate": 3.1726843954202833e-07, "loss": 17.6513, "step": 48658 }, { "epoch": 0.8894474198914216, "grad_norm": 6.534198602133165, "learning_rate": 3.17164682718063e-07, "loss": 17.4965, "step": 48659 }, { "epoch": 0.8894656990878681, "grad_norm": 5.572067628446071, "learning_rate": 3.170609423070292e-07, "loss": 17.1845, "step": 48660 }, { "epoch": 0.8894839782843146, "grad_norm": 5.183575232396974, "learning_rate": 3.1695721830929114e-07, "loss": 16.7363, "step": 48661 }, { "epoch": 0.8895022574807612, "grad_norm": 5.775882890288672, "learning_rate": 3.1685351072521077e-07, "loss": 17.2154, "step": 48662 }, { "epoch": 0.8895205366772077, "grad_norm": 8.189015684227563, "learning_rate": 3.167498195551533e-07, "loss": 17.2844, "step": 48663 }, { "epoch": 0.8895388158736542, "grad_norm": 6.6674188509683905, "learning_rate": 3.166461447994812e-07, "loss": 17.3694, "step": 48664 }, { "epoch": 0.8895570950701007, "grad_norm": 6.359519160673428, "learning_rate": 3.1654248645855703e-07, "loss": 17.3202, "step": 48665 }, { "epoch": 0.8895753742665472, "grad_norm": 6.723580566554686, "learning_rate": 3.1643884453274596e-07, "loss": 17.7686, "step": 48666 }, { "epoch": 0.8895936534629938, "grad_norm": 6.139462771853604, "learning_rate": 3.163352190224095e-07, "loss": 17.2184, "step": 48667 }, { "epoch": 0.8896119326594403, "grad_norm": 4.945126927556269, "learning_rate": 3.162316099279128e-07, "loss": 17.0176, "step": 48668 }, { "epoch": 0.8896302118558869, "grad_norm": 4.890628859828304, "learning_rate": 3.161280172496167e-07, "loss": 16.9501, "step": 48669 }, { "epoch": 0.8896484910523333, "grad_norm": 5.816889038457765, "learning_rate": 3.1602444098788654e-07, "loss": 17.1829, "step": 48670 }, { "epoch": 0.8896667702487798, "grad_norm": 6.6011866766441845, "learning_rate": 3.1592088114308303e-07, "loss": 17.5474, "step": 48671 }, { "epoch": 0.8896850494452264, "grad_norm": 6.142250753868044, "learning_rate": 3.1581733771557153e-07, "loss": 17.7818, "step": 48672 }, { "epoch": 0.8897033286416729, "grad_norm": 7.2699395343038145, "learning_rate": 3.157138107057128e-07, "loss": 18.1092, "step": 48673 }, { "epoch": 0.8897216078381195, "grad_norm": 6.432585052452869, "learning_rate": 3.156103001138716e-07, "loss": 17.433, "step": 48674 }, { "epoch": 0.889739887034566, "grad_norm": 5.230613883021127, "learning_rate": 3.155068059404098e-07, "loss": 17.042, "step": 48675 }, { "epoch": 0.8897581662310124, "grad_norm": 7.1826058641960655, "learning_rate": 3.154033281856894e-07, "loss": 17.8893, "step": 48676 }, { "epoch": 0.889776445427459, "grad_norm": 6.285699745335997, "learning_rate": 3.152998668500734e-07, "loss": 17.431, "step": 48677 }, { "epoch": 0.8897947246239055, "grad_norm": 5.905900379863762, "learning_rate": 3.15196421933926e-07, "loss": 17.0012, "step": 48678 }, { "epoch": 0.8898130038203521, "grad_norm": 6.180898065512442, "learning_rate": 3.1509299343760793e-07, "loss": 17.3017, "step": 48679 }, { "epoch": 0.8898312830167986, "grad_norm": 5.9252035611407665, "learning_rate": 3.1498958136148286e-07, "loss": 17.1603, "step": 48680 }, { "epoch": 0.8898495622132451, "grad_norm": 5.97315445580379, "learning_rate": 3.1488618570591325e-07, "loss": 17.2287, "step": 48681 }, { "epoch": 0.8898678414096917, "grad_norm": 5.547990402999098, "learning_rate": 3.1478280647125994e-07, "loss": 17.1374, "step": 48682 }, { "epoch": 0.8898861206061381, "grad_norm": 6.5396100300237805, "learning_rate": 3.1467944365788705e-07, "loss": 17.4186, "step": 48683 }, { "epoch": 0.8899043998025847, "grad_norm": 5.859476432087806, "learning_rate": 3.14576097266156e-07, "loss": 17.2563, "step": 48684 }, { "epoch": 0.8899226789990312, "grad_norm": 7.057426201935135, "learning_rate": 3.144727672964287e-07, "loss": 17.7374, "step": 48685 }, { "epoch": 0.8899409581954777, "grad_norm": 5.3105682040587325, "learning_rate": 3.143694537490677e-07, "loss": 17.0937, "step": 48686 }, { "epoch": 0.8899592373919243, "grad_norm": 5.705942012529303, "learning_rate": 3.142661566244354e-07, "loss": 17.209, "step": 48687 }, { "epoch": 0.8899775165883708, "grad_norm": 5.437979939389158, "learning_rate": 3.1416287592289375e-07, "loss": 17.0995, "step": 48688 }, { "epoch": 0.8899957957848172, "grad_norm": 7.27176783463715, "learning_rate": 3.140596116448047e-07, "loss": 17.4897, "step": 48689 }, { "epoch": 0.8900140749812638, "grad_norm": 6.049558555065852, "learning_rate": 3.1395636379052963e-07, "loss": 17.0885, "step": 48690 }, { "epoch": 0.8900323541777103, "grad_norm": 4.292679482492092, "learning_rate": 3.1385313236043105e-07, "loss": 16.6976, "step": 48691 }, { "epoch": 0.8900506333741569, "grad_norm": 5.192898081778025, "learning_rate": 3.1374991735487137e-07, "loss": 17.1216, "step": 48692 }, { "epoch": 0.8900689125706034, "grad_norm": 5.132841387425916, "learning_rate": 3.1364671877421036e-07, "loss": 17.0186, "step": 48693 }, { "epoch": 0.8900871917670499, "grad_norm": 6.496132951026589, "learning_rate": 3.135435366188117e-07, "loss": 17.3475, "step": 48694 }, { "epoch": 0.8901054709634965, "grad_norm": 5.258169692937188, "learning_rate": 3.13440370889036e-07, "loss": 17.0611, "step": 48695 }, { "epoch": 0.8901237501599429, "grad_norm": 5.046070667995792, "learning_rate": 3.133372215852448e-07, "loss": 17.112, "step": 48696 }, { "epoch": 0.8901420293563895, "grad_norm": 6.193797633402312, "learning_rate": 3.132340887078006e-07, "loss": 17.1485, "step": 48697 }, { "epoch": 0.890160308552836, "grad_norm": 6.258456584068959, "learning_rate": 3.131309722570641e-07, "loss": 17.6084, "step": 48698 }, { "epoch": 0.8901785877492825, "grad_norm": 5.597163476141421, "learning_rate": 3.130278722333963e-07, "loss": 17.3328, "step": 48699 }, { "epoch": 0.8901968669457291, "grad_norm": 8.441597783655029, "learning_rate": 3.1292478863716e-07, "loss": 17.3523, "step": 48700 }, { "epoch": 0.8902151461421756, "grad_norm": 5.94969465882582, "learning_rate": 3.128217214687151e-07, "loss": 17.1639, "step": 48701 }, { "epoch": 0.8902334253386222, "grad_norm": 6.129261377952287, "learning_rate": 3.12718670728423e-07, "loss": 17.5239, "step": 48702 }, { "epoch": 0.8902517045350686, "grad_norm": 6.4382407096389, "learning_rate": 3.1261563641664603e-07, "loss": 17.3455, "step": 48703 }, { "epoch": 0.8902699837315151, "grad_norm": 6.296202395463229, "learning_rate": 3.1251261853374347e-07, "loss": 17.3135, "step": 48704 }, { "epoch": 0.8902882629279617, "grad_norm": 5.493190157889039, "learning_rate": 3.1240961708007777e-07, "loss": 17.2353, "step": 48705 }, { "epoch": 0.8903065421244082, "grad_norm": 5.6014126327229965, "learning_rate": 3.1230663205600977e-07, "loss": 17.2159, "step": 48706 }, { "epoch": 0.8903248213208548, "grad_norm": 5.69049604761432, "learning_rate": 3.1220366346189967e-07, "loss": 17.0995, "step": 48707 }, { "epoch": 0.8903431005173013, "grad_norm": 4.8248420825474705, "learning_rate": 3.1210071129811005e-07, "loss": 17.0161, "step": 48708 }, { "epoch": 0.8903613797137477, "grad_norm": 9.799567072352211, "learning_rate": 3.1199777556500053e-07, "loss": 18.3908, "step": 48709 }, { "epoch": 0.8903796589101943, "grad_norm": 5.432649409322645, "learning_rate": 3.1189485626293095e-07, "loss": 17.1557, "step": 48710 }, { "epoch": 0.8903979381066408, "grad_norm": 5.60920964968609, "learning_rate": 3.117919533922642e-07, "loss": 17.102, "step": 48711 }, { "epoch": 0.8904162173030874, "grad_norm": 5.587110743413788, "learning_rate": 3.116890669533595e-07, "loss": 17.4209, "step": 48712 }, { "epoch": 0.8904344964995339, "grad_norm": 5.335282926878672, "learning_rate": 3.1158619694657723e-07, "loss": 16.8134, "step": 48713 }, { "epoch": 0.8904527756959804, "grad_norm": 6.764333647287995, "learning_rate": 3.114833433722786e-07, "loss": 17.0671, "step": 48714 }, { "epoch": 0.890471054892427, "grad_norm": 7.275477878559657, "learning_rate": 3.113805062308245e-07, "loss": 17.7394, "step": 48715 }, { "epoch": 0.8904893340888734, "grad_norm": 5.1993586094465325, "learning_rate": 3.1127768552257463e-07, "loss": 17.1721, "step": 48716 }, { "epoch": 0.89050761328532, "grad_norm": 4.987063935131288, "learning_rate": 3.111748812478899e-07, "loss": 17.0783, "step": 48717 }, { "epoch": 0.8905258924817665, "grad_norm": 6.207241032954693, "learning_rate": 3.110720934071304e-07, "loss": 17.45, "step": 48718 }, { "epoch": 0.890544171678213, "grad_norm": 5.9473571750786896, "learning_rate": 3.1096932200065546e-07, "loss": 17.1066, "step": 48719 }, { "epoch": 0.8905624508746596, "grad_norm": 5.738217482828579, "learning_rate": 3.108665670288269e-07, "loss": 17.485, "step": 48720 }, { "epoch": 0.890580730071106, "grad_norm": 7.7949384321543835, "learning_rate": 3.1076382849200335e-07, "loss": 17.8519, "step": 48721 }, { "epoch": 0.8905990092675526, "grad_norm": 5.559058967312445, "learning_rate": 3.106611063905468e-07, "loss": 16.9591, "step": 48722 }, { "epoch": 0.8906172884639991, "grad_norm": 8.443185188875894, "learning_rate": 3.1055840072481527e-07, "loss": 17.4583, "step": 48723 }, { "epoch": 0.8906355676604456, "grad_norm": 5.3185949468891085, "learning_rate": 3.104557114951695e-07, "loss": 16.9469, "step": 48724 }, { "epoch": 0.8906538468568922, "grad_norm": 6.090253579482428, "learning_rate": 3.1035303870196985e-07, "loss": 17.2548, "step": 48725 }, { "epoch": 0.8906721260533387, "grad_norm": 5.231990924952238, "learning_rate": 3.102503823455766e-07, "loss": 17.0057, "step": 48726 }, { "epoch": 0.8906904052497853, "grad_norm": 6.542320692056889, "learning_rate": 3.101477424263477e-07, "loss": 17.7069, "step": 48727 }, { "epoch": 0.8907086844462317, "grad_norm": 5.006535844947024, "learning_rate": 3.100451189446446e-07, "loss": 17.2036, "step": 48728 }, { "epoch": 0.8907269636426782, "grad_norm": 4.770367896366605, "learning_rate": 3.0994251190082646e-07, "loss": 16.8072, "step": 48729 }, { "epoch": 0.8907452428391248, "grad_norm": 5.367070943326865, "learning_rate": 3.098399212952524e-07, "loss": 17.1512, "step": 48730 }, { "epoch": 0.8907635220355713, "grad_norm": 5.752091660057533, "learning_rate": 3.0973734712828275e-07, "loss": 17.2358, "step": 48731 }, { "epoch": 0.8907818012320179, "grad_norm": 6.992998580812433, "learning_rate": 3.09634789400276e-07, "loss": 17.4956, "step": 48732 }, { "epoch": 0.8908000804284644, "grad_norm": 5.401382679383716, "learning_rate": 3.0953224811159256e-07, "loss": 17.3056, "step": 48733 }, { "epoch": 0.8908183596249108, "grad_norm": 6.148198625373717, "learning_rate": 3.094297232625926e-07, "loss": 17.2613, "step": 48734 }, { "epoch": 0.8908366388213574, "grad_norm": 8.886394350585773, "learning_rate": 3.0932721485363306e-07, "loss": 17.9852, "step": 48735 }, { "epoch": 0.8908549180178039, "grad_norm": 6.715118494157143, "learning_rate": 3.092247228850753e-07, "loss": 17.5347, "step": 48736 }, { "epoch": 0.8908731972142505, "grad_norm": 6.124916067906919, "learning_rate": 3.091222473572786e-07, "loss": 17.4145, "step": 48737 }, { "epoch": 0.890891476410697, "grad_norm": 5.810230371275536, "learning_rate": 3.0901978827059973e-07, "loss": 17.2406, "step": 48738 }, { "epoch": 0.8909097556071435, "grad_norm": 5.6394881023357675, "learning_rate": 3.089173456254008e-07, "loss": 17.4115, "step": 48739 }, { "epoch": 0.8909280348035901, "grad_norm": 7.061378381055096, "learning_rate": 3.088149194220386e-07, "loss": 17.5973, "step": 48740 }, { "epoch": 0.8909463140000365, "grad_norm": 6.499665693237159, "learning_rate": 3.087125096608734e-07, "loss": 17.4671, "step": 48741 }, { "epoch": 0.8909645931964831, "grad_norm": 5.464612542323887, "learning_rate": 3.0861011634226336e-07, "loss": 17.3179, "step": 48742 }, { "epoch": 0.8909828723929296, "grad_norm": 6.978434053405956, "learning_rate": 3.085077394665686e-07, "loss": 17.6302, "step": 48743 }, { "epoch": 0.8910011515893761, "grad_norm": 5.076646788860646, "learning_rate": 3.0840537903414624e-07, "loss": 17.1513, "step": 48744 }, { "epoch": 0.8910194307858227, "grad_norm": 6.977510788103817, "learning_rate": 3.083030350453564e-07, "loss": 17.4877, "step": 48745 }, { "epoch": 0.8910377099822692, "grad_norm": 4.732800009807046, "learning_rate": 3.0820070750055776e-07, "loss": 16.8551, "step": 48746 }, { "epoch": 0.8910559891787158, "grad_norm": 6.128163456728719, "learning_rate": 3.0809839640010773e-07, "loss": 17.0726, "step": 48747 }, { "epoch": 0.8910742683751622, "grad_norm": 5.161163327764272, "learning_rate": 3.079961017443661e-07, "loss": 17.0342, "step": 48748 }, { "epoch": 0.8910925475716087, "grad_norm": 6.508393455037711, "learning_rate": 3.0789382353369035e-07, "loss": 17.3875, "step": 48749 }, { "epoch": 0.8911108267680553, "grad_norm": 6.642713721759312, "learning_rate": 3.077915617684396e-07, "loss": 17.4882, "step": 48750 }, { "epoch": 0.8911291059645018, "grad_norm": 6.479723761878084, "learning_rate": 3.07689316448973e-07, "loss": 17.292, "step": 48751 }, { "epoch": 0.8911473851609484, "grad_norm": 6.185778351880808, "learning_rate": 3.075870875756476e-07, "loss": 17.3835, "step": 48752 }, { "epoch": 0.8911656643573949, "grad_norm": 6.6866202687836305, "learning_rate": 3.074848751488224e-07, "loss": 17.5717, "step": 48753 }, { "epoch": 0.8911839435538413, "grad_norm": 6.1488518563655274, "learning_rate": 3.073826791688561e-07, "loss": 17.1357, "step": 48754 }, { "epoch": 0.8912022227502879, "grad_norm": 7.004691862442594, "learning_rate": 3.0728049963610564e-07, "loss": 17.804, "step": 48755 }, { "epoch": 0.8912205019467344, "grad_norm": 6.034176967988369, "learning_rate": 3.071783365509301e-07, "loss": 17.1069, "step": 48756 }, { "epoch": 0.8912387811431809, "grad_norm": 5.81735426538337, "learning_rate": 3.0707618991368757e-07, "loss": 17.2025, "step": 48757 }, { "epoch": 0.8912570603396275, "grad_norm": 5.854561942801448, "learning_rate": 3.0697405972473506e-07, "loss": 17.0754, "step": 48758 }, { "epoch": 0.891275339536074, "grad_norm": 4.760022939206156, "learning_rate": 3.06871945984431e-07, "loss": 16.7675, "step": 48759 }, { "epoch": 0.8912936187325206, "grad_norm": 6.227864141514928, "learning_rate": 3.0676984869313417e-07, "loss": 17.3691, "step": 48760 }, { "epoch": 0.891311897928967, "grad_norm": 5.78877958957783, "learning_rate": 3.0666776785120144e-07, "loss": 17.1634, "step": 48761 }, { "epoch": 0.8913301771254135, "grad_norm": 6.4524181410305825, "learning_rate": 3.065657034589914e-07, "loss": 17.1772, "step": 48762 }, { "epoch": 0.8913484563218601, "grad_norm": 6.483473159703044, "learning_rate": 3.0646365551686155e-07, "loss": 17.1757, "step": 48763 }, { "epoch": 0.8913667355183066, "grad_norm": 5.3464592929918995, "learning_rate": 3.063616240251682e-07, "loss": 17.0788, "step": 48764 }, { "epoch": 0.8913850147147532, "grad_norm": 7.380471707819362, "learning_rate": 3.0625960898427065e-07, "loss": 17.5747, "step": 48765 }, { "epoch": 0.8914032939111997, "grad_norm": 6.393098885048121, "learning_rate": 3.0615761039452576e-07, "loss": 17.3001, "step": 48766 }, { "epoch": 0.8914215731076461, "grad_norm": 7.515011305468617, "learning_rate": 3.060556282562915e-07, "loss": 17.3069, "step": 48767 }, { "epoch": 0.8914398523040927, "grad_norm": 6.752217864435324, "learning_rate": 3.0595366256992385e-07, "loss": 17.5971, "step": 48768 }, { "epoch": 0.8914581315005392, "grad_norm": 6.2502049226198935, "learning_rate": 3.058517133357819e-07, "loss": 17.3192, "step": 48769 }, { "epoch": 0.8914764106969858, "grad_norm": 7.054484810543055, "learning_rate": 3.0574978055422254e-07, "loss": 17.5652, "step": 48770 }, { "epoch": 0.8914946898934323, "grad_norm": 5.917471975159343, "learning_rate": 3.0564786422560333e-07, "loss": 17.0921, "step": 48771 }, { "epoch": 0.8915129690898788, "grad_norm": 8.089125685899447, "learning_rate": 3.0554596435028005e-07, "loss": 17.7779, "step": 48772 }, { "epoch": 0.8915312482863254, "grad_norm": 6.605220094178399, "learning_rate": 3.054440809286119e-07, "loss": 17.2979, "step": 48773 }, { "epoch": 0.8915495274827718, "grad_norm": 6.686924891857263, "learning_rate": 3.0534221396095463e-07, "loss": 17.6004, "step": 48774 }, { "epoch": 0.8915678066792184, "grad_norm": 5.480349060960973, "learning_rate": 3.0524036344766474e-07, "loss": 16.873, "step": 48775 }, { "epoch": 0.8915860858756649, "grad_norm": 6.583417074004214, "learning_rate": 3.0513852938910015e-07, "loss": 17.7059, "step": 48776 }, { "epoch": 0.8916043650721114, "grad_norm": 4.843546821348626, "learning_rate": 3.050367117856179e-07, "loss": 16.8163, "step": 48777 }, { "epoch": 0.891622644268558, "grad_norm": 4.9217533227382075, "learning_rate": 3.0493491063757376e-07, "loss": 16.8638, "step": 48778 }, { "epoch": 0.8916409234650045, "grad_norm": 5.899037861256107, "learning_rate": 3.048331259453263e-07, "loss": 17.3472, "step": 48779 }, { "epoch": 0.891659202661451, "grad_norm": 5.853343994191245, "learning_rate": 3.0473135770923144e-07, "loss": 17.2019, "step": 48780 }, { "epoch": 0.8916774818578975, "grad_norm": 5.775503158937558, "learning_rate": 3.0462960592964496e-07, "loss": 17.0663, "step": 48781 }, { "epoch": 0.891695761054344, "grad_norm": 6.594238806687839, "learning_rate": 3.045278706069249e-07, "loss": 17.4944, "step": 48782 }, { "epoch": 0.8917140402507906, "grad_norm": 7.723366788492963, "learning_rate": 3.044261517414265e-07, "loss": 17.8677, "step": 48783 }, { "epoch": 0.8917323194472371, "grad_norm": 6.294422595203675, "learning_rate": 3.043244493335079e-07, "loss": 17.4673, "step": 48784 }, { "epoch": 0.8917505986436837, "grad_norm": 6.803613133013449, "learning_rate": 3.0422276338352484e-07, "loss": 17.4631, "step": 48785 }, { "epoch": 0.8917688778401301, "grad_norm": 7.699928112405518, "learning_rate": 3.041210938918321e-07, "loss": 17.7727, "step": 48786 }, { "epoch": 0.8917871570365766, "grad_norm": 8.560401411470176, "learning_rate": 3.0401944085878767e-07, "loss": 18.0522, "step": 48787 }, { "epoch": 0.8918054362330232, "grad_norm": 6.839663491418397, "learning_rate": 3.039178042847485e-07, "loss": 18.0756, "step": 48788 }, { "epoch": 0.8918237154294697, "grad_norm": 5.641250506542183, "learning_rate": 3.038161841700693e-07, "loss": 17.1862, "step": 48789 }, { "epoch": 0.8918419946259163, "grad_norm": 5.293342381686091, "learning_rate": 3.0371458051510763e-07, "loss": 16.894, "step": 48790 }, { "epoch": 0.8918602738223628, "grad_norm": 5.77283740382676, "learning_rate": 3.0361299332021864e-07, "loss": 17.1172, "step": 48791 }, { "epoch": 0.8918785530188093, "grad_norm": 5.593311425295224, "learning_rate": 3.035114225857577e-07, "loss": 17.1079, "step": 48792 }, { "epoch": 0.8918968322152558, "grad_norm": 7.264702141928815, "learning_rate": 3.034098683120823e-07, "loss": 17.4329, "step": 48793 }, { "epoch": 0.8919151114117023, "grad_norm": 5.592644891046755, "learning_rate": 3.0330833049954764e-07, "loss": 16.9982, "step": 48794 }, { "epoch": 0.8919333906081489, "grad_norm": 5.722559720354743, "learning_rate": 3.0320680914850964e-07, "loss": 17.1236, "step": 48795 }, { "epoch": 0.8919516698045954, "grad_norm": 7.391955505892901, "learning_rate": 3.0310530425932404e-07, "loss": 17.673, "step": 48796 }, { "epoch": 0.8919699490010419, "grad_norm": 5.153108634666294, "learning_rate": 3.0300381583234674e-07, "loss": 16.9176, "step": 48797 }, { "epoch": 0.8919882281974885, "grad_norm": 6.08044086645074, "learning_rate": 3.0290234386793404e-07, "loss": 17.0914, "step": 48798 }, { "epoch": 0.8920065073939349, "grad_norm": 6.94921536940937, "learning_rate": 3.028008883664413e-07, "loss": 17.2508, "step": 48799 }, { "epoch": 0.8920247865903815, "grad_norm": 5.864196889915593, "learning_rate": 3.026994493282226e-07, "loss": 17.1298, "step": 48800 }, { "epoch": 0.892043065786828, "grad_norm": 7.043854370915378, "learning_rate": 3.0259802675363603e-07, "loss": 17.955, "step": 48801 }, { "epoch": 0.8920613449832745, "grad_norm": 6.566627367311961, "learning_rate": 3.024966206430352e-07, "loss": 17.2484, "step": 48802 }, { "epoch": 0.8920796241797211, "grad_norm": 5.318919511400267, "learning_rate": 3.0239523099677595e-07, "loss": 16.976, "step": 48803 }, { "epoch": 0.8920979033761676, "grad_norm": 5.484948510601988, "learning_rate": 3.0229385781521347e-07, "loss": 17.072, "step": 48804 }, { "epoch": 0.8921161825726142, "grad_norm": 8.33576526868179, "learning_rate": 3.021925010987037e-07, "loss": 18.2129, "step": 48805 }, { "epoch": 0.8921344617690606, "grad_norm": 6.7455962115387855, "learning_rate": 3.0209116084760125e-07, "loss": 17.4412, "step": 48806 }, { "epoch": 0.8921527409655071, "grad_norm": 6.307172365771659, "learning_rate": 3.019898370622626e-07, "loss": 17.1947, "step": 48807 }, { "epoch": 0.8921710201619537, "grad_norm": 5.76809409020961, "learning_rate": 3.018885297430413e-07, "loss": 17.1841, "step": 48808 }, { "epoch": 0.8921892993584002, "grad_norm": 5.959690698254268, "learning_rate": 3.017872388902926e-07, "loss": 17.257, "step": 48809 }, { "epoch": 0.8922075785548468, "grad_norm": 5.077590543011189, "learning_rate": 3.0168596450437294e-07, "loss": 16.7211, "step": 48810 }, { "epoch": 0.8922258577512933, "grad_norm": 7.605811394841063, "learning_rate": 3.015847065856359e-07, "loss": 17.8325, "step": 48811 }, { "epoch": 0.8922441369477397, "grad_norm": 5.763749254380181, "learning_rate": 3.0148346513443614e-07, "loss": 17.1561, "step": 48812 }, { "epoch": 0.8922624161441863, "grad_norm": 8.590369909521735, "learning_rate": 3.0138224015112904e-07, "loss": 18.2429, "step": 48813 }, { "epoch": 0.8922806953406328, "grad_norm": 5.996404971000588, "learning_rate": 3.012810316360698e-07, "loss": 17.2797, "step": 48814 }, { "epoch": 0.8922989745370794, "grad_norm": 6.3461530340873855, "learning_rate": 3.011798395896132e-07, "loss": 17.29, "step": 48815 }, { "epoch": 0.8923172537335259, "grad_norm": 5.936218889736578, "learning_rate": 3.010786640121133e-07, "loss": 17.1569, "step": 48816 }, { "epoch": 0.8923355329299724, "grad_norm": 4.897157509738202, "learning_rate": 3.0097750490392485e-07, "loss": 16.9582, "step": 48817 }, { "epoch": 0.892353812126419, "grad_norm": 4.690919689339271, "learning_rate": 3.008763622654026e-07, "loss": 16.8634, "step": 48818 }, { "epoch": 0.8923720913228654, "grad_norm": 5.448534300756498, "learning_rate": 3.007752360969013e-07, "loss": 16.9921, "step": 48819 }, { "epoch": 0.892390370519312, "grad_norm": 5.61094106417231, "learning_rate": 3.006741263987739e-07, "loss": 17.0491, "step": 48820 }, { "epoch": 0.8924086497157585, "grad_norm": 6.779894032582258, "learning_rate": 3.005730331713769e-07, "loss": 17.9471, "step": 48821 }, { "epoch": 0.892426928912205, "grad_norm": 5.618784262357306, "learning_rate": 3.0047195641506324e-07, "loss": 17.0677, "step": 48822 }, { "epoch": 0.8924452081086516, "grad_norm": 5.706960379110219, "learning_rate": 3.003708961301871e-07, "loss": 17.0544, "step": 48823 }, { "epoch": 0.8924634873050981, "grad_norm": 7.394439683734637, "learning_rate": 3.002698523171038e-07, "loss": 17.7864, "step": 48824 }, { "epoch": 0.8924817665015445, "grad_norm": 7.137501729391448, "learning_rate": 3.00168824976167e-07, "loss": 17.2504, "step": 48825 }, { "epoch": 0.8925000456979911, "grad_norm": 11.54996637757583, "learning_rate": 3.000678141077301e-07, "loss": 17.9524, "step": 48826 }, { "epoch": 0.8925183248944376, "grad_norm": 6.816644235545675, "learning_rate": 2.9996681971214804e-07, "loss": 17.7683, "step": 48827 }, { "epoch": 0.8925366040908842, "grad_norm": 6.533478588026822, "learning_rate": 2.9986584178977373e-07, "loss": 17.4398, "step": 48828 }, { "epoch": 0.8925548832873307, "grad_norm": 6.498765633534199, "learning_rate": 2.9976488034096305e-07, "loss": 17.4519, "step": 48829 }, { "epoch": 0.8925731624837772, "grad_norm": 5.100798702563533, "learning_rate": 2.9966393536606795e-07, "loss": 16.8196, "step": 48830 }, { "epoch": 0.8925914416802238, "grad_norm": 5.881503849759405, "learning_rate": 2.995630068654426e-07, "loss": 17.2269, "step": 48831 }, { "epoch": 0.8926097208766702, "grad_norm": 5.190723138100936, "learning_rate": 2.9946209483944113e-07, "loss": 17.0999, "step": 48832 }, { "epoch": 0.8926280000731168, "grad_norm": 6.478851305748688, "learning_rate": 2.993611992884171e-07, "loss": 17.6037, "step": 48833 }, { "epoch": 0.8926462792695633, "grad_norm": 6.711005017180032, "learning_rate": 2.992603202127242e-07, "loss": 17.5425, "step": 48834 }, { "epoch": 0.8926645584660098, "grad_norm": 5.720239385401216, "learning_rate": 2.9915945761271603e-07, "loss": 17.3552, "step": 48835 }, { "epoch": 0.8926828376624564, "grad_norm": 6.2577247892919345, "learning_rate": 2.990586114887467e-07, "loss": 17.3306, "step": 48836 }, { "epoch": 0.8927011168589029, "grad_norm": 5.146720091508996, "learning_rate": 2.989577818411682e-07, "loss": 17.0285, "step": 48837 }, { "epoch": 0.8927193960553494, "grad_norm": 6.388103848710742, "learning_rate": 2.988569686703352e-07, "loss": 17.568, "step": 48838 }, { "epoch": 0.8927376752517959, "grad_norm": 11.551126508121659, "learning_rate": 2.9875617197660023e-07, "loss": 18.0084, "step": 48839 }, { "epoch": 0.8927559544482424, "grad_norm": 6.914601939056808, "learning_rate": 2.986553917603169e-07, "loss": 17.6848, "step": 48840 }, { "epoch": 0.892774233644689, "grad_norm": 7.814059765492376, "learning_rate": 2.985546280218382e-07, "loss": 18.4827, "step": 48841 }, { "epoch": 0.8927925128411355, "grad_norm": 5.015184946533114, "learning_rate": 2.9845388076151726e-07, "loss": 16.952, "step": 48842 }, { "epoch": 0.8928107920375821, "grad_norm": 8.018358347098923, "learning_rate": 2.9835314997970876e-07, "loss": 17.5555, "step": 48843 }, { "epoch": 0.8928290712340285, "grad_norm": 5.732957061201667, "learning_rate": 2.982524356767641e-07, "loss": 17.1657, "step": 48844 }, { "epoch": 0.892847350430475, "grad_norm": 5.652172543347675, "learning_rate": 2.9815173785303577e-07, "loss": 17.136, "step": 48845 }, { "epoch": 0.8928656296269216, "grad_norm": 6.286916425978412, "learning_rate": 2.98051056508879e-07, "loss": 17.5033, "step": 48846 }, { "epoch": 0.8928839088233681, "grad_norm": 6.401936048115737, "learning_rate": 2.979503916446447e-07, "loss": 17.6086, "step": 48847 }, { "epoch": 0.8929021880198147, "grad_norm": 7.3296176220285405, "learning_rate": 2.978497432606853e-07, "loss": 17.7308, "step": 48848 }, { "epoch": 0.8929204672162612, "grad_norm": 7.628254504513602, "learning_rate": 2.977491113573561e-07, "loss": 17.7371, "step": 48849 }, { "epoch": 0.8929387464127077, "grad_norm": 7.242517869996785, "learning_rate": 2.9764849593500676e-07, "loss": 17.8832, "step": 48850 }, { "epoch": 0.8929570256091542, "grad_norm": 6.138566119691823, "learning_rate": 2.975478969939916e-07, "loss": 17.3878, "step": 48851 }, { "epoch": 0.8929753048056007, "grad_norm": 5.589936724956119, "learning_rate": 2.9744731453466345e-07, "loss": 17.2787, "step": 48852 }, { "epoch": 0.8929935840020473, "grad_norm": 4.932965637918788, "learning_rate": 2.97346748557375e-07, "loss": 16.7596, "step": 48853 }, { "epoch": 0.8930118631984938, "grad_norm": 9.609146510424075, "learning_rate": 2.9724619906247696e-07, "loss": 18.0953, "step": 48854 }, { "epoch": 0.8930301423949403, "grad_norm": 5.929139329030349, "learning_rate": 2.971456660503236e-07, "loss": 17.2049, "step": 48855 }, { "epoch": 0.8930484215913869, "grad_norm": 6.1604163178144224, "learning_rate": 2.9704514952126673e-07, "loss": 17.0845, "step": 48856 }, { "epoch": 0.8930667007878333, "grad_norm": 6.017696105783723, "learning_rate": 2.969446494756578e-07, "loss": 17.3186, "step": 48857 }, { "epoch": 0.8930849799842799, "grad_norm": 5.841860606422563, "learning_rate": 2.968441659138505e-07, "loss": 17.4383, "step": 48858 }, { "epoch": 0.8931032591807264, "grad_norm": 7.364558841574847, "learning_rate": 2.967436988361955e-07, "loss": 17.3473, "step": 48859 }, { "epoch": 0.8931215383771729, "grad_norm": 7.271011013414785, "learning_rate": 2.966432482430459e-07, "loss": 18.0612, "step": 48860 }, { "epoch": 0.8931398175736195, "grad_norm": 5.8731484535409955, "learning_rate": 2.965428141347543e-07, "loss": 17.2161, "step": 48861 }, { "epoch": 0.893158096770066, "grad_norm": 7.068008230371281, "learning_rate": 2.9644239651167083e-07, "loss": 17.5646, "step": 48862 }, { "epoch": 0.8931763759665126, "grad_norm": 6.91150258240877, "learning_rate": 2.9634199537414975e-07, "loss": 17.8486, "step": 48863 }, { "epoch": 0.893194655162959, "grad_norm": 5.707189762970403, "learning_rate": 2.962416107225419e-07, "loss": 17.3328, "step": 48864 }, { "epoch": 0.8932129343594055, "grad_norm": 6.565928397415109, "learning_rate": 2.9614124255719803e-07, "loss": 17.5281, "step": 48865 }, { "epoch": 0.8932312135558521, "grad_norm": 5.037146865221292, "learning_rate": 2.960408908784718e-07, "loss": 16.9267, "step": 48866 }, { "epoch": 0.8932494927522986, "grad_norm": 5.598824479267349, "learning_rate": 2.959405556867129e-07, "loss": 17.1188, "step": 48867 }, { "epoch": 0.8932677719487452, "grad_norm": 6.349766611337169, "learning_rate": 2.958402369822744e-07, "loss": 17.4615, "step": 48868 }, { "epoch": 0.8932860511451917, "grad_norm": 5.76374242889604, "learning_rate": 2.957399347655088e-07, "loss": 17.0709, "step": 48869 }, { "epoch": 0.8933043303416381, "grad_norm": 6.484106086119909, "learning_rate": 2.956396490367658e-07, "loss": 17.3996, "step": 48870 }, { "epoch": 0.8933226095380847, "grad_norm": 5.600188426958612, "learning_rate": 2.9553937979639735e-07, "loss": 17.1432, "step": 48871 }, { "epoch": 0.8933408887345312, "grad_norm": 8.013357454529277, "learning_rate": 2.9543912704475596e-07, "loss": 17.6404, "step": 48872 }, { "epoch": 0.8933591679309778, "grad_norm": 6.651135569102576, "learning_rate": 2.953388907821908e-07, "loss": 17.8868, "step": 48873 }, { "epoch": 0.8933774471274243, "grad_norm": 6.0052023290782985, "learning_rate": 2.95238671009056e-07, "loss": 17.4814, "step": 48874 }, { "epoch": 0.8933957263238708, "grad_norm": 6.022459377327837, "learning_rate": 2.9513846772570133e-07, "loss": 17.2679, "step": 48875 }, { "epoch": 0.8934140055203174, "grad_norm": 8.030940302686597, "learning_rate": 2.95038280932477e-07, "loss": 18.0172, "step": 48876 }, { "epoch": 0.8934322847167638, "grad_norm": 5.132363131031559, "learning_rate": 2.9493811062973553e-07, "loss": 17.1767, "step": 48877 }, { "epoch": 0.8934505639132104, "grad_norm": 7.488405633205009, "learning_rate": 2.948379568178283e-07, "loss": 17.4244, "step": 48878 }, { "epoch": 0.8934688431096569, "grad_norm": 6.609814840888081, "learning_rate": 2.9473781949710514e-07, "loss": 17.6792, "step": 48879 }, { "epoch": 0.8934871223061034, "grad_norm": 6.383711790556868, "learning_rate": 2.946376986679178e-07, "loss": 17.2813, "step": 48880 }, { "epoch": 0.89350540150255, "grad_norm": 7.3446303369558485, "learning_rate": 2.945375943306178e-07, "loss": 17.8514, "step": 48881 }, { "epoch": 0.8935236806989965, "grad_norm": 5.5070670291946335, "learning_rate": 2.944375064855537e-07, "loss": 16.9861, "step": 48882 }, { "epoch": 0.893541959895443, "grad_norm": 6.823881338625081, "learning_rate": 2.9433743513307913e-07, "loss": 17.3127, "step": 48883 }, { "epoch": 0.8935602390918895, "grad_norm": 5.7375805249237075, "learning_rate": 2.942373802735432e-07, "loss": 17.108, "step": 48884 }, { "epoch": 0.893578518288336, "grad_norm": 6.535856377185569, "learning_rate": 2.9413734190729627e-07, "loss": 17.3996, "step": 48885 }, { "epoch": 0.8935967974847826, "grad_norm": 6.730118101969005, "learning_rate": 2.940373200346897e-07, "loss": 17.6776, "step": 48886 }, { "epoch": 0.8936150766812291, "grad_norm": 6.531904893390309, "learning_rate": 2.9393731465607476e-07, "loss": 17.2932, "step": 48887 }, { "epoch": 0.8936333558776757, "grad_norm": 5.224799239083223, "learning_rate": 2.9383732577180025e-07, "loss": 17.0836, "step": 48888 }, { "epoch": 0.8936516350741222, "grad_norm": 5.097347495590519, "learning_rate": 2.9373735338221797e-07, "loss": 17.0489, "step": 48889 }, { "epoch": 0.8936699142705686, "grad_norm": 4.865390517011575, "learning_rate": 2.9363739748767775e-07, "loss": 16.7284, "step": 48890 }, { "epoch": 0.8936881934670152, "grad_norm": 5.3354989997356554, "learning_rate": 2.935374580885303e-07, "loss": 17.0078, "step": 48891 }, { "epoch": 0.8937064726634617, "grad_norm": 7.5692307336837725, "learning_rate": 2.9343753518512595e-07, "loss": 17.4482, "step": 48892 }, { "epoch": 0.8937247518599082, "grad_norm": 12.479129072942143, "learning_rate": 2.933376287778139e-07, "loss": 18.0883, "step": 48893 }, { "epoch": 0.8937430310563548, "grad_norm": 7.2411546740381745, "learning_rate": 2.932377388669455e-07, "loss": 17.5827, "step": 48894 }, { "epoch": 0.8937613102528013, "grad_norm": 5.447941396134273, "learning_rate": 2.9313786545286995e-07, "loss": 16.8557, "step": 48895 }, { "epoch": 0.8937795894492478, "grad_norm": 5.355879264882659, "learning_rate": 2.9303800853593745e-07, "loss": 16.9212, "step": 48896 }, { "epoch": 0.8937978686456943, "grad_norm": 8.436450836417016, "learning_rate": 2.929381681164989e-07, "loss": 18.2607, "step": 48897 }, { "epoch": 0.8938161478421408, "grad_norm": 5.700355267339383, "learning_rate": 2.9283834419490397e-07, "loss": 16.7898, "step": 48898 }, { "epoch": 0.8938344270385874, "grad_norm": 6.481323842222695, "learning_rate": 2.927385367715013e-07, "loss": 17.6765, "step": 48899 }, { "epoch": 0.8938527062350339, "grad_norm": 6.349955017387444, "learning_rate": 2.926387458466423e-07, "loss": 17.581, "step": 48900 }, { "epoch": 0.8938709854314805, "grad_norm": 5.242875939874366, "learning_rate": 2.9253897142067613e-07, "loss": 17.1526, "step": 48901 }, { "epoch": 0.893889264627927, "grad_norm": 6.973050674255541, "learning_rate": 2.9243921349395186e-07, "loss": 17.2954, "step": 48902 }, { "epoch": 0.8939075438243734, "grad_norm": 4.916486740300974, "learning_rate": 2.923394720668199e-07, "loss": 17.0326, "step": 48903 }, { "epoch": 0.89392582302082, "grad_norm": 5.499550604543629, "learning_rate": 2.922397471396293e-07, "loss": 17.1328, "step": 48904 }, { "epoch": 0.8939441022172665, "grad_norm": 5.377261292402278, "learning_rate": 2.921400387127293e-07, "loss": 17.1455, "step": 48905 }, { "epoch": 0.8939623814137131, "grad_norm": 6.66708233532266, "learning_rate": 2.920403467864713e-07, "loss": 17.3505, "step": 48906 }, { "epoch": 0.8939806606101596, "grad_norm": 5.905710417313386, "learning_rate": 2.9194067136120217e-07, "loss": 17.0962, "step": 48907 }, { "epoch": 0.893998939806606, "grad_norm": 5.721281763495238, "learning_rate": 2.9184101243727336e-07, "loss": 17.1842, "step": 48908 }, { "epoch": 0.8940172190030526, "grad_norm": 8.0800428150758, "learning_rate": 2.917413700150329e-07, "loss": 18.0799, "step": 48909 }, { "epoch": 0.8940354981994991, "grad_norm": 7.0278400242218115, "learning_rate": 2.916417440948299e-07, "loss": 17.8285, "step": 48910 }, { "epoch": 0.8940537773959457, "grad_norm": 5.586128169751286, "learning_rate": 2.9154213467701473e-07, "loss": 16.9517, "step": 48911 }, { "epoch": 0.8940720565923922, "grad_norm": 5.240926184143824, "learning_rate": 2.9144254176193597e-07, "loss": 16.88, "step": 48912 }, { "epoch": 0.8940903357888387, "grad_norm": 6.733522706455623, "learning_rate": 2.913429653499417e-07, "loss": 17.6181, "step": 48913 }, { "epoch": 0.8941086149852853, "grad_norm": 5.988201929481953, "learning_rate": 2.9124340544138153e-07, "loss": 17.304, "step": 48914 }, { "epoch": 0.8941268941817317, "grad_norm": 5.330181420396292, "learning_rate": 2.9114386203660583e-07, "loss": 16.8363, "step": 48915 }, { "epoch": 0.8941451733781783, "grad_norm": 5.754352857711243, "learning_rate": 2.9104433513596096e-07, "loss": 17.2471, "step": 48916 }, { "epoch": 0.8941634525746248, "grad_norm": 6.224191844301022, "learning_rate": 2.909448247397978e-07, "loss": 17.4296, "step": 48917 }, { "epoch": 0.8941817317710713, "grad_norm": 8.270835413981235, "learning_rate": 2.908453308484649e-07, "loss": 18.2926, "step": 48918 }, { "epoch": 0.8942000109675179, "grad_norm": 5.681418257184947, "learning_rate": 2.9074585346230923e-07, "loss": 17.0446, "step": 48919 }, { "epoch": 0.8942182901639644, "grad_norm": 7.342909477973104, "learning_rate": 2.9064639258168215e-07, "loss": 17.8432, "step": 48920 }, { "epoch": 0.894236569360411, "grad_norm": 6.257200495005491, "learning_rate": 2.905469482069295e-07, "loss": 17.0046, "step": 48921 }, { "epoch": 0.8942548485568574, "grad_norm": 5.37023725198488, "learning_rate": 2.9044752033840164e-07, "loss": 17.0161, "step": 48922 }, { "epoch": 0.8942731277533039, "grad_norm": 8.375495936717526, "learning_rate": 2.903481089764465e-07, "loss": 17.6246, "step": 48923 }, { "epoch": 0.8942914069497505, "grad_norm": 6.290452922333777, "learning_rate": 2.902487141214122e-07, "loss": 17.3187, "step": 48924 }, { "epoch": 0.894309686146197, "grad_norm": 7.717031209736966, "learning_rate": 2.901493357736479e-07, "loss": 17.7552, "step": 48925 }, { "epoch": 0.8943279653426436, "grad_norm": 7.925730255150063, "learning_rate": 2.900499739335022e-07, "loss": 17.9991, "step": 48926 }, { "epoch": 0.8943462445390901, "grad_norm": 6.073940643370783, "learning_rate": 2.8995062860132095e-07, "loss": 17.0723, "step": 48927 }, { "epoch": 0.8943645237355365, "grad_norm": 6.575227032740352, "learning_rate": 2.898512997774555e-07, "loss": 17.0164, "step": 48928 }, { "epoch": 0.8943828029319831, "grad_norm": 7.014445191630523, "learning_rate": 2.897519874622523e-07, "loss": 17.533, "step": 48929 }, { "epoch": 0.8944010821284296, "grad_norm": 5.874355918741228, "learning_rate": 2.896526916560588e-07, "loss": 17.3492, "step": 48930 }, { "epoch": 0.8944193613248762, "grad_norm": 5.547865565635159, "learning_rate": 2.8955341235922417e-07, "loss": 17.0799, "step": 48931 }, { "epoch": 0.8944376405213227, "grad_norm": 6.146245224806414, "learning_rate": 2.894541495720965e-07, "loss": 17.3422, "step": 48932 }, { "epoch": 0.8944559197177692, "grad_norm": 7.036601139158152, "learning_rate": 2.893549032950227e-07, "loss": 17.3867, "step": 48933 }, { "epoch": 0.8944741989142158, "grad_norm": 5.936225220727053, "learning_rate": 2.8925567352835195e-07, "loss": 16.8478, "step": 48934 }, { "epoch": 0.8944924781106622, "grad_norm": 6.786848889377488, "learning_rate": 2.891564602724306e-07, "loss": 17.7516, "step": 48935 }, { "epoch": 0.8945107573071088, "grad_norm": 8.074758579593743, "learning_rate": 2.8905726352760734e-07, "loss": 17.8952, "step": 48936 }, { "epoch": 0.8945290365035553, "grad_norm": 6.024551181805815, "learning_rate": 2.889580832942301e-07, "loss": 17.4089, "step": 48937 }, { "epoch": 0.8945473157000018, "grad_norm": 7.44146596759859, "learning_rate": 2.888589195726449e-07, "loss": 17.4421, "step": 48938 }, { "epoch": 0.8945655948964484, "grad_norm": 6.436611637590579, "learning_rate": 2.887597723632013e-07, "loss": 17.4744, "step": 48939 }, { "epoch": 0.8945838740928949, "grad_norm": 6.7096084888159595, "learning_rate": 2.886606416662452e-07, "loss": 17.6475, "step": 48940 }, { "epoch": 0.8946021532893415, "grad_norm": 5.792036527658737, "learning_rate": 2.885615274821246e-07, "loss": 17.18, "step": 48941 }, { "epoch": 0.8946204324857879, "grad_norm": 5.448023640716597, "learning_rate": 2.884624298111877e-07, "loss": 16.9841, "step": 48942 }, { "epoch": 0.8946387116822344, "grad_norm": 4.845933937908358, "learning_rate": 2.8836334865378126e-07, "loss": 16.9152, "step": 48943 }, { "epoch": 0.894656990878681, "grad_norm": 5.5262393126541625, "learning_rate": 2.8826428401025174e-07, "loss": 17.085, "step": 48944 }, { "epoch": 0.8946752700751275, "grad_norm": 4.939215456421506, "learning_rate": 2.881652358809478e-07, "loss": 16.9632, "step": 48945 }, { "epoch": 0.8946935492715741, "grad_norm": 5.821405337716541, "learning_rate": 2.880662042662158e-07, "loss": 16.9255, "step": 48946 }, { "epoch": 0.8947118284680206, "grad_norm": 5.914373605081989, "learning_rate": 2.8796718916640155e-07, "loss": 17.2311, "step": 48947 }, { "epoch": 0.894730107664467, "grad_norm": 5.864741710086646, "learning_rate": 2.8786819058185477e-07, "loss": 17.1375, "step": 48948 }, { "epoch": 0.8947483868609136, "grad_norm": 5.752738423923991, "learning_rate": 2.8776920851292025e-07, "loss": 17.0209, "step": 48949 }, { "epoch": 0.8947666660573601, "grad_norm": 6.777812090689836, "learning_rate": 2.8767024295994595e-07, "loss": 17.4258, "step": 48950 }, { "epoch": 0.8947849452538067, "grad_norm": 5.5588057244447855, "learning_rate": 2.875712939232789e-07, "loss": 17.2287, "step": 48951 }, { "epoch": 0.8948032244502532, "grad_norm": 6.250530567622901, "learning_rate": 2.874723614032648e-07, "loss": 17.4612, "step": 48952 }, { "epoch": 0.8948215036466997, "grad_norm": 5.812183555282394, "learning_rate": 2.8737344540025193e-07, "loss": 17.3245, "step": 48953 }, { "epoch": 0.8948397828431462, "grad_norm": 5.108359438077555, "learning_rate": 2.872745459145865e-07, "loss": 17.0371, "step": 48954 }, { "epoch": 0.8948580620395927, "grad_norm": 7.888560260673306, "learning_rate": 2.871756629466138e-07, "loss": 18.0312, "step": 48955 }, { "epoch": 0.8948763412360393, "grad_norm": 8.149564989802561, "learning_rate": 2.87076796496682e-07, "loss": 17.9683, "step": 48956 }, { "epoch": 0.8948946204324858, "grad_norm": 7.185273658251625, "learning_rate": 2.869779465651373e-07, "loss": 17.6899, "step": 48957 }, { "epoch": 0.8949128996289323, "grad_norm": 5.926863006832646, "learning_rate": 2.868791131523252e-07, "loss": 17.5795, "step": 48958 }, { "epoch": 0.8949311788253789, "grad_norm": 6.104716246853871, "learning_rate": 2.8678029625859306e-07, "loss": 17.6599, "step": 48959 }, { "epoch": 0.8949494580218254, "grad_norm": 5.149703593842177, "learning_rate": 2.866814958842873e-07, "loss": 16.872, "step": 48960 }, { "epoch": 0.8949677372182718, "grad_norm": 7.3172770941625584, "learning_rate": 2.865827120297532e-07, "loss": 17.9989, "step": 48961 }, { "epoch": 0.8949860164147184, "grad_norm": 6.116818227261627, "learning_rate": 2.864839446953388e-07, "loss": 17.0107, "step": 48962 }, { "epoch": 0.8950042956111649, "grad_norm": 6.731789145699654, "learning_rate": 2.8638519388138886e-07, "loss": 17.6119, "step": 48963 }, { "epoch": 0.8950225748076115, "grad_norm": 7.13049942384884, "learning_rate": 2.862864595882492e-07, "loss": 18.0573, "step": 48964 }, { "epoch": 0.895040854004058, "grad_norm": 5.621669818557788, "learning_rate": 2.8618774181626676e-07, "loss": 17.1723, "step": 48965 }, { "epoch": 0.8950591332005045, "grad_norm": 5.270513400178443, "learning_rate": 2.860890405657868e-07, "loss": 17.0921, "step": 48966 }, { "epoch": 0.895077412396951, "grad_norm": 6.198753433369572, "learning_rate": 2.8599035583715686e-07, "loss": 17.4622, "step": 48967 }, { "epoch": 0.8950956915933975, "grad_norm": 5.877486542772123, "learning_rate": 2.858916876307205e-07, "loss": 17.0989, "step": 48968 }, { "epoch": 0.8951139707898441, "grad_norm": 5.30633362358448, "learning_rate": 2.857930359468247e-07, "loss": 17.2183, "step": 48969 }, { "epoch": 0.8951322499862906, "grad_norm": 5.3788892068994185, "learning_rate": 2.856944007858159e-07, "loss": 17.2007, "step": 48970 }, { "epoch": 0.8951505291827371, "grad_norm": 4.958756884233368, "learning_rate": 2.855957821480393e-07, "loss": 17.1185, "step": 48971 }, { "epoch": 0.8951688083791837, "grad_norm": 6.411962317546665, "learning_rate": 2.8549718003383965e-07, "loss": 17.4783, "step": 48972 }, { "epoch": 0.8951870875756301, "grad_norm": 6.905218414672682, "learning_rate": 2.853985944435639e-07, "loss": 18.1629, "step": 48973 }, { "epoch": 0.8952053667720767, "grad_norm": 6.1562563021823635, "learning_rate": 2.8530002537755676e-07, "loss": 17.2377, "step": 48974 }, { "epoch": 0.8952236459685232, "grad_norm": 6.481381123094928, "learning_rate": 2.852014728361635e-07, "loss": 17.2658, "step": 48975 }, { "epoch": 0.8952419251649697, "grad_norm": 5.356745779978945, "learning_rate": 2.851029368197306e-07, "loss": 16.9799, "step": 48976 }, { "epoch": 0.8952602043614163, "grad_norm": 6.3414812479555795, "learning_rate": 2.8500441732860204e-07, "loss": 17.4692, "step": 48977 }, { "epoch": 0.8952784835578628, "grad_norm": 5.940747253660638, "learning_rate": 2.8490591436312385e-07, "loss": 17.1552, "step": 48978 }, { "epoch": 0.8952967627543094, "grad_norm": 5.967777220129609, "learning_rate": 2.848074279236418e-07, "loss": 17.0856, "step": 48979 }, { "epoch": 0.8953150419507558, "grad_norm": 6.0313776920517075, "learning_rate": 2.8470895801050114e-07, "loss": 17.206, "step": 48980 }, { "epoch": 0.8953333211472023, "grad_norm": 6.324991716536376, "learning_rate": 2.8461050462404494e-07, "loss": 17.6875, "step": 48981 }, { "epoch": 0.8953516003436489, "grad_norm": 4.93146903358591, "learning_rate": 2.845120677646213e-07, "loss": 16.9909, "step": 48982 }, { "epoch": 0.8953698795400954, "grad_norm": 5.676800349818218, "learning_rate": 2.8441364743257203e-07, "loss": 17.254, "step": 48983 }, { "epoch": 0.895388158736542, "grad_norm": 6.187866640907276, "learning_rate": 2.843152436282448e-07, "loss": 17.5583, "step": 48984 }, { "epoch": 0.8954064379329885, "grad_norm": 6.282169544098556, "learning_rate": 2.842168563519837e-07, "loss": 17.5118, "step": 48985 }, { "epoch": 0.8954247171294349, "grad_norm": 6.386572218746308, "learning_rate": 2.8411848560413237e-07, "loss": 17.0294, "step": 48986 }, { "epoch": 0.8954429963258815, "grad_norm": 4.752278034177787, "learning_rate": 2.840201313850366e-07, "loss": 17.0506, "step": 48987 }, { "epoch": 0.895461275522328, "grad_norm": 7.709563583800542, "learning_rate": 2.839217936950417e-07, "loss": 17.5767, "step": 48988 }, { "epoch": 0.8954795547187746, "grad_norm": 8.144496159548263, "learning_rate": 2.838234725344907e-07, "loss": 17.8818, "step": 48989 }, { "epoch": 0.8954978339152211, "grad_norm": 5.404702112170076, "learning_rate": 2.8372516790373064e-07, "loss": 17.0999, "step": 48990 }, { "epoch": 0.8955161131116676, "grad_norm": 5.522581814910207, "learning_rate": 2.8362687980310387e-07, "loss": 17.1302, "step": 48991 }, { "epoch": 0.8955343923081142, "grad_norm": 6.045199909351012, "learning_rate": 2.8352860823295527e-07, "loss": 17.1041, "step": 48992 }, { "epoch": 0.8955526715045606, "grad_norm": 5.540135947693817, "learning_rate": 2.8343035319363e-07, "loss": 16.8547, "step": 48993 }, { "epoch": 0.8955709507010072, "grad_norm": 7.068394258796212, "learning_rate": 2.833321146854717e-07, "loss": 17.235, "step": 48994 }, { "epoch": 0.8955892298974537, "grad_norm": 5.920247884910801, "learning_rate": 2.832338927088246e-07, "loss": 17.0738, "step": 48995 }, { "epoch": 0.8956075090939002, "grad_norm": 6.370069197144747, "learning_rate": 2.831356872640345e-07, "loss": 17.4986, "step": 48996 }, { "epoch": 0.8956257882903468, "grad_norm": 5.025418569574852, "learning_rate": 2.8303749835144387e-07, "loss": 16.8022, "step": 48997 }, { "epoch": 0.8956440674867933, "grad_norm": 6.81295706968012, "learning_rate": 2.8293932597139803e-07, "loss": 17.5345, "step": 48998 }, { "epoch": 0.8956623466832399, "grad_norm": 4.397331067699201, "learning_rate": 2.8284117012424064e-07, "loss": 16.6692, "step": 48999 }, { "epoch": 0.8956806258796863, "grad_norm": 6.337761910079979, "learning_rate": 2.8274303081031463e-07, "loss": 17.5998, "step": 49000 }, { "epoch": 0.8956989050761328, "grad_norm": 7.662202587201868, "learning_rate": 2.826449080299659e-07, "loss": 17.5355, "step": 49001 }, { "epoch": 0.8957171842725794, "grad_norm": 7.43104088236527, "learning_rate": 2.8254680178353755e-07, "loss": 17.379, "step": 49002 }, { "epoch": 0.8957354634690259, "grad_norm": 7.122668272003433, "learning_rate": 2.8244871207137203e-07, "loss": 17.4638, "step": 49003 }, { "epoch": 0.8957537426654725, "grad_norm": 6.595160701138884, "learning_rate": 2.823506388938152e-07, "loss": 17.5908, "step": 49004 }, { "epoch": 0.895772021861919, "grad_norm": 6.535080529447163, "learning_rate": 2.822525822512101e-07, "loss": 17.4457, "step": 49005 }, { "epoch": 0.8957903010583654, "grad_norm": 6.326007571686645, "learning_rate": 2.821545421438998e-07, "loss": 17.3448, "step": 49006 }, { "epoch": 0.895808580254812, "grad_norm": 5.026740346996991, "learning_rate": 2.8205651857222904e-07, "loss": 16.9868, "step": 49007 }, { "epoch": 0.8958268594512585, "grad_norm": 7.100978674827761, "learning_rate": 2.8195851153654086e-07, "loss": 17.7872, "step": 49008 }, { "epoch": 0.8958451386477051, "grad_norm": 5.796670312067572, "learning_rate": 2.8186052103717775e-07, "loss": 17.2643, "step": 49009 }, { "epoch": 0.8958634178441516, "grad_norm": 7.416769284914009, "learning_rate": 2.8176254707448504e-07, "loss": 17.7342, "step": 49010 }, { "epoch": 0.8958816970405981, "grad_norm": 5.509338744171973, "learning_rate": 2.816645896488046e-07, "loss": 17.2057, "step": 49011 }, { "epoch": 0.8958999762370446, "grad_norm": 5.450412230676695, "learning_rate": 2.815666487604807e-07, "loss": 17.0548, "step": 49012 }, { "epoch": 0.8959182554334911, "grad_norm": 5.354430232852535, "learning_rate": 2.814687244098557e-07, "loss": 16.8988, "step": 49013 }, { "epoch": 0.8959365346299377, "grad_norm": 6.060756677224878, "learning_rate": 2.813708165972734e-07, "loss": 17.4515, "step": 49014 }, { "epoch": 0.8959548138263842, "grad_norm": 7.433915077500906, "learning_rate": 2.812729253230778e-07, "loss": 17.1551, "step": 49015 }, { "epoch": 0.8959730930228307, "grad_norm": 5.922207942284259, "learning_rate": 2.8117505058761097e-07, "loss": 17.4693, "step": 49016 }, { "epoch": 0.8959913722192773, "grad_norm": 6.963456678520035, "learning_rate": 2.8107719239121535e-07, "loss": 17.5189, "step": 49017 }, { "epoch": 0.8960096514157238, "grad_norm": 5.375012776964491, "learning_rate": 2.809793507342357e-07, "loss": 16.8846, "step": 49018 }, { "epoch": 0.8960279306121703, "grad_norm": 6.5429344378724155, "learning_rate": 2.808815256170133e-07, "loss": 17.6798, "step": 49019 }, { "epoch": 0.8960462098086168, "grad_norm": 6.884908291672831, "learning_rate": 2.807837170398914e-07, "loss": 17.5328, "step": 49020 }, { "epoch": 0.8960644890050633, "grad_norm": 7.625629281948836, "learning_rate": 2.806859250032135e-07, "loss": 17.6361, "step": 49021 }, { "epoch": 0.8960827682015099, "grad_norm": 5.946237471133058, "learning_rate": 2.805881495073215e-07, "loss": 17.1934, "step": 49022 }, { "epoch": 0.8961010473979564, "grad_norm": 6.2105279189628435, "learning_rate": 2.8049039055255853e-07, "loss": 17.4203, "step": 49023 }, { "epoch": 0.896119326594403, "grad_norm": 9.315671474751033, "learning_rate": 2.803926481392677e-07, "loss": 18.167, "step": 49024 }, { "epoch": 0.8961376057908494, "grad_norm": 7.7682141763271, "learning_rate": 2.802949222677909e-07, "loss": 17.7412, "step": 49025 }, { "epoch": 0.8961558849872959, "grad_norm": 6.36859667572702, "learning_rate": 2.8019721293847056e-07, "loss": 17.6536, "step": 49026 }, { "epoch": 0.8961741641837425, "grad_norm": 5.698496560356588, "learning_rate": 2.8009952015164985e-07, "loss": 17.0631, "step": 49027 }, { "epoch": 0.896192443380189, "grad_norm": 5.446949179914704, "learning_rate": 2.8000184390767016e-07, "loss": 17.0661, "step": 49028 }, { "epoch": 0.8962107225766355, "grad_norm": 6.077028820967871, "learning_rate": 2.7990418420687503e-07, "loss": 17.4716, "step": 49029 }, { "epoch": 0.8962290017730821, "grad_norm": 6.062532105742388, "learning_rate": 2.798065410496059e-07, "loss": 17.1935, "step": 49030 }, { "epoch": 0.8962472809695285, "grad_norm": 6.880052294256407, "learning_rate": 2.797089144362053e-07, "loss": 18.0873, "step": 49031 }, { "epoch": 0.8962655601659751, "grad_norm": 6.674450861179471, "learning_rate": 2.796113043670146e-07, "loss": 17.5526, "step": 49032 }, { "epoch": 0.8962838393624216, "grad_norm": 5.281922342895106, "learning_rate": 2.7951371084237787e-07, "loss": 17.0769, "step": 49033 }, { "epoch": 0.8963021185588681, "grad_norm": 6.277968483102049, "learning_rate": 2.79416133862635e-07, "loss": 17.4343, "step": 49034 }, { "epoch": 0.8963203977553147, "grad_norm": 8.280706439955047, "learning_rate": 2.793185734281295e-07, "loss": 18.0402, "step": 49035 }, { "epoch": 0.8963386769517612, "grad_norm": 5.210334408500849, "learning_rate": 2.792210295392028e-07, "loss": 17.0705, "step": 49036 }, { "epoch": 0.8963569561482078, "grad_norm": 5.476067360725759, "learning_rate": 2.791235021961963e-07, "loss": 17.1154, "step": 49037 }, { "epoch": 0.8963752353446542, "grad_norm": 5.539146374498832, "learning_rate": 2.7902599139945306e-07, "loss": 17.2435, "step": 49038 }, { "epoch": 0.8963935145411007, "grad_norm": 6.113003843573064, "learning_rate": 2.7892849714931393e-07, "loss": 17.4295, "step": 49039 }, { "epoch": 0.8964117937375473, "grad_norm": 5.413657236431431, "learning_rate": 2.7883101944611966e-07, "loss": 16.8378, "step": 49040 }, { "epoch": 0.8964300729339938, "grad_norm": 7.781530087647583, "learning_rate": 2.7873355829021344e-07, "loss": 18.1024, "step": 49041 }, { "epoch": 0.8964483521304404, "grad_norm": 5.658356137475787, "learning_rate": 2.78636113681936e-07, "loss": 17.1866, "step": 49042 }, { "epoch": 0.8964666313268869, "grad_norm": 5.217468562113685, "learning_rate": 2.7853868562163044e-07, "loss": 16.8485, "step": 49043 }, { "epoch": 0.8964849105233333, "grad_norm": 5.371344719154207, "learning_rate": 2.7844127410963705e-07, "loss": 17.1577, "step": 49044 }, { "epoch": 0.8965031897197799, "grad_norm": 7.030895988050911, "learning_rate": 2.783438791462961e-07, "loss": 17.5033, "step": 49045 }, { "epoch": 0.8965214689162264, "grad_norm": 6.0092287476892015, "learning_rate": 2.7824650073195124e-07, "loss": 17.1745, "step": 49046 }, { "epoch": 0.896539748112673, "grad_norm": 5.116039608175808, "learning_rate": 2.781491388669427e-07, "loss": 16.8844, "step": 49047 }, { "epoch": 0.8965580273091195, "grad_norm": 6.535975506513678, "learning_rate": 2.780517935516108e-07, "loss": 17.2751, "step": 49048 }, { "epoch": 0.896576306505566, "grad_norm": 5.077750967340832, "learning_rate": 2.7795446478629806e-07, "loss": 17.0477, "step": 49049 }, { "epoch": 0.8965945857020126, "grad_norm": 6.076123201145395, "learning_rate": 2.778571525713447e-07, "loss": 17.2318, "step": 49050 }, { "epoch": 0.896612864898459, "grad_norm": 4.825782334790331, "learning_rate": 2.7775985690709216e-07, "loss": 16.8255, "step": 49051 }, { "epoch": 0.8966311440949056, "grad_norm": 5.557912040062981, "learning_rate": 2.776625777938824e-07, "loss": 17.3845, "step": 49052 }, { "epoch": 0.8966494232913521, "grad_norm": 5.837126354317519, "learning_rate": 2.7756531523205565e-07, "loss": 17.349, "step": 49053 }, { "epoch": 0.8966677024877986, "grad_norm": 6.288276637593233, "learning_rate": 2.774680692219517e-07, "loss": 17.3077, "step": 49054 }, { "epoch": 0.8966859816842452, "grad_norm": 8.921753301957184, "learning_rate": 2.773708397639135e-07, "loss": 18.3207, "step": 49055 }, { "epoch": 0.8967042608806917, "grad_norm": 6.775086939564437, "learning_rate": 2.772736268582804e-07, "loss": 17.8288, "step": 49056 }, { "epoch": 0.8967225400771383, "grad_norm": 7.164701242934906, "learning_rate": 2.7717643050539255e-07, "loss": 17.7547, "step": 49057 }, { "epoch": 0.8967408192735847, "grad_norm": 6.277411599268198, "learning_rate": 2.770792507055914e-07, "loss": 17.0505, "step": 49058 }, { "epoch": 0.8967590984700312, "grad_norm": 5.74930098535271, "learning_rate": 2.7698208745921884e-07, "loss": 17.1891, "step": 49059 }, { "epoch": 0.8967773776664778, "grad_norm": 5.906001981043282, "learning_rate": 2.76884940766613e-07, "loss": 17.1295, "step": 49060 }, { "epoch": 0.8967956568629243, "grad_norm": 5.070303464120579, "learning_rate": 2.767878106281169e-07, "loss": 16.9755, "step": 49061 }, { "epoch": 0.8968139360593709, "grad_norm": 5.7237271452847605, "learning_rate": 2.766906970440686e-07, "loss": 17.507, "step": 49062 }, { "epoch": 0.8968322152558174, "grad_norm": 6.011594806560506, "learning_rate": 2.7659360001481005e-07, "loss": 17.1157, "step": 49063 }, { "epoch": 0.8968504944522638, "grad_norm": 6.43172940911636, "learning_rate": 2.7649651954068103e-07, "loss": 17.5068, "step": 49064 }, { "epoch": 0.8968687736487104, "grad_norm": 5.567741121386207, "learning_rate": 2.763994556220212e-07, "loss": 17.2, "step": 49065 }, { "epoch": 0.8968870528451569, "grad_norm": 4.875450111631062, "learning_rate": 2.7630240825917255e-07, "loss": 16.9165, "step": 49066 }, { "epoch": 0.8969053320416035, "grad_norm": 6.48602249483211, "learning_rate": 2.762053774524731e-07, "loss": 17.2629, "step": 49067 }, { "epoch": 0.89692361123805, "grad_norm": 6.647755832046489, "learning_rate": 2.7610836320226373e-07, "loss": 17.0469, "step": 49068 }, { "epoch": 0.8969418904344965, "grad_norm": 5.544420901313561, "learning_rate": 2.760113655088853e-07, "loss": 17.1469, "step": 49069 }, { "epoch": 0.896960169630943, "grad_norm": 5.315018010773992, "learning_rate": 2.7591438437267746e-07, "loss": 17.2124, "step": 49070 }, { "epoch": 0.8969784488273895, "grad_norm": 7.1314327558079995, "learning_rate": 2.7581741979397835e-07, "loss": 17.4962, "step": 49071 }, { "epoch": 0.8969967280238361, "grad_norm": 5.9941746563030405, "learning_rate": 2.7572047177313043e-07, "loss": 17.3469, "step": 49072 }, { "epoch": 0.8970150072202826, "grad_norm": 7.193018175366892, "learning_rate": 2.756235403104718e-07, "loss": 17.9, "step": 49073 }, { "epoch": 0.8970332864167291, "grad_norm": 6.31661262086503, "learning_rate": 2.755266254063432e-07, "loss": 17.4198, "step": 49074 }, { "epoch": 0.8970515656131757, "grad_norm": 7.763035156738484, "learning_rate": 2.7542972706108393e-07, "loss": 17.9219, "step": 49075 }, { "epoch": 0.8970698448096222, "grad_norm": 6.390140313529233, "learning_rate": 2.7533284527503257e-07, "loss": 17.458, "step": 49076 }, { "epoch": 0.8970881240060687, "grad_norm": 5.425702217390013, "learning_rate": 2.752359800485294e-07, "loss": 17.1648, "step": 49077 }, { "epoch": 0.8971064032025152, "grad_norm": 5.898248371126512, "learning_rate": 2.751391313819152e-07, "loss": 17.1615, "step": 49078 }, { "epoch": 0.8971246823989617, "grad_norm": 8.626053564662158, "learning_rate": 2.750422992755275e-07, "loss": 17.7747, "step": 49079 }, { "epoch": 0.8971429615954083, "grad_norm": 5.415354912614363, "learning_rate": 2.749454837297072e-07, "loss": 17.0109, "step": 49080 }, { "epoch": 0.8971612407918548, "grad_norm": 6.093231291798382, "learning_rate": 2.748486847447934e-07, "loss": 17.1984, "step": 49081 }, { "epoch": 0.8971795199883014, "grad_norm": 7.299123703342957, "learning_rate": 2.747519023211237e-07, "loss": 17.3352, "step": 49082 }, { "epoch": 0.8971977991847478, "grad_norm": 5.875294100769136, "learning_rate": 2.7465513645903943e-07, "loss": 17.2807, "step": 49083 }, { "epoch": 0.8972160783811943, "grad_norm": 5.418591297604797, "learning_rate": 2.745583871588786e-07, "loss": 16.9277, "step": 49084 }, { "epoch": 0.8972343575776409, "grad_norm": 6.539360863892426, "learning_rate": 2.7446165442097993e-07, "loss": 17.4382, "step": 49085 }, { "epoch": 0.8972526367740874, "grad_norm": 5.166183926158085, "learning_rate": 2.7436493824568365e-07, "loss": 16.9138, "step": 49086 }, { "epoch": 0.897270915970534, "grad_norm": 6.0646595736645095, "learning_rate": 2.7426823863332843e-07, "loss": 17.2623, "step": 49087 }, { "epoch": 0.8972891951669805, "grad_norm": 6.057182461890168, "learning_rate": 2.7417155558425225e-07, "loss": 17.3518, "step": 49088 }, { "epoch": 0.897307474363427, "grad_norm": 6.027153541769315, "learning_rate": 2.7407488909879545e-07, "loss": 17.3579, "step": 49089 }, { "epoch": 0.8973257535598735, "grad_norm": 6.551392756720978, "learning_rate": 2.7397823917729547e-07, "loss": 17.4685, "step": 49090 }, { "epoch": 0.89734403275632, "grad_norm": 4.878428697809324, "learning_rate": 2.738816058200922e-07, "loss": 16.9009, "step": 49091 }, { "epoch": 0.8973623119527666, "grad_norm": 5.863346229170553, "learning_rate": 2.73784989027524e-07, "loss": 17.2687, "step": 49092 }, { "epoch": 0.8973805911492131, "grad_norm": 5.553621290119219, "learning_rate": 2.736883887999281e-07, "loss": 17.1377, "step": 49093 }, { "epoch": 0.8973988703456596, "grad_norm": 6.921298586807779, "learning_rate": 2.7359180513764517e-07, "loss": 17.7205, "step": 49094 }, { "epoch": 0.8974171495421062, "grad_norm": 4.198594328760914, "learning_rate": 2.7349523804101274e-07, "loss": 16.6342, "step": 49095 }, { "epoch": 0.8974354287385526, "grad_norm": 6.28218948841707, "learning_rate": 2.733986875103684e-07, "loss": 17.3481, "step": 49096 }, { "epoch": 0.8974537079349991, "grad_norm": 5.517074545925693, "learning_rate": 2.7330215354605283e-07, "loss": 16.9931, "step": 49097 }, { "epoch": 0.8974719871314457, "grad_norm": 6.2751733395252085, "learning_rate": 2.7320563614840313e-07, "loss": 17.4262, "step": 49098 }, { "epoch": 0.8974902663278922, "grad_norm": 6.9949371293290765, "learning_rate": 2.731091353177562e-07, "loss": 17.6966, "step": 49099 }, { "epoch": 0.8975085455243388, "grad_norm": 6.2436598770031955, "learning_rate": 2.7301265105445285e-07, "loss": 17.5346, "step": 49100 }, { "epoch": 0.8975268247207853, "grad_norm": 6.513451426687418, "learning_rate": 2.7291618335882953e-07, "loss": 17.3864, "step": 49101 }, { "epoch": 0.8975451039172317, "grad_norm": 6.90422637103015, "learning_rate": 2.728197322312243e-07, "loss": 17.5304, "step": 49102 }, { "epoch": 0.8975633831136783, "grad_norm": 5.612037313341495, "learning_rate": 2.727232976719768e-07, "loss": 17.3726, "step": 49103 }, { "epoch": 0.8975816623101248, "grad_norm": 8.672008759524209, "learning_rate": 2.726268796814224e-07, "loss": 18.3539, "step": 49104 }, { "epoch": 0.8975999415065714, "grad_norm": 6.09254046778718, "learning_rate": 2.725304782599014e-07, "loss": 17.2108, "step": 49105 }, { "epoch": 0.8976182207030179, "grad_norm": 6.893196427082954, "learning_rate": 2.7243409340775074e-07, "loss": 17.5295, "step": 49106 }, { "epoch": 0.8976364998994644, "grad_norm": 5.962706612059943, "learning_rate": 2.7233772512530843e-07, "loss": 17.0193, "step": 49107 }, { "epoch": 0.897654779095911, "grad_norm": 7.024069404402083, "learning_rate": 2.722413734129126e-07, "loss": 17.4197, "step": 49108 }, { "epoch": 0.8976730582923574, "grad_norm": 6.038952870814775, "learning_rate": 2.721450382709001e-07, "loss": 17.2543, "step": 49109 }, { "epoch": 0.897691337488804, "grad_norm": 5.271572540658111, "learning_rate": 2.7204871969960857e-07, "loss": 16.9669, "step": 49110 }, { "epoch": 0.8977096166852505, "grad_norm": 6.2253695409731105, "learning_rate": 2.719524176993765e-07, "loss": 17.3802, "step": 49111 }, { "epoch": 0.897727895881697, "grad_norm": 5.874921714342687, "learning_rate": 2.7185613227054153e-07, "loss": 17.3786, "step": 49112 }, { "epoch": 0.8977461750781436, "grad_norm": 5.807791285230783, "learning_rate": 2.7175986341343887e-07, "loss": 17.2015, "step": 49113 }, { "epoch": 0.8977644542745901, "grad_norm": 6.909181260808992, "learning_rate": 2.7166361112840824e-07, "loss": 17.4434, "step": 49114 }, { "epoch": 0.8977827334710367, "grad_norm": 6.836705096511722, "learning_rate": 2.7156737541578714e-07, "loss": 17.3554, "step": 49115 }, { "epoch": 0.8978010126674831, "grad_norm": 6.059307064773588, "learning_rate": 2.714711562759109e-07, "loss": 17.3797, "step": 49116 }, { "epoch": 0.8978192918639296, "grad_norm": 6.032747336795154, "learning_rate": 2.7137495370911925e-07, "loss": 17.1888, "step": 49117 }, { "epoch": 0.8978375710603762, "grad_norm": 5.186549642625476, "learning_rate": 2.7127876771574745e-07, "loss": 16.9026, "step": 49118 }, { "epoch": 0.8978558502568227, "grad_norm": 6.683175517034437, "learning_rate": 2.71182598296133e-07, "loss": 17.2864, "step": 49119 }, { "epoch": 0.8978741294532693, "grad_norm": 7.428605093257455, "learning_rate": 2.710864454506135e-07, "loss": 17.8543, "step": 49120 }, { "epoch": 0.8978924086497158, "grad_norm": 7.191952298892875, "learning_rate": 2.7099030917952517e-07, "loss": 17.5466, "step": 49121 }, { "epoch": 0.8979106878461622, "grad_norm": 7.627787920155042, "learning_rate": 2.708941894832051e-07, "loss": 18.1336, "step": 49122 }, { "epoch": 0.8979289670426088, "grad_norm": 5.702291092020598, "learning_rate": 2.7079808636199187e-07, "loss": 17.1142, "step": 49123 }, { "epoch": 0.8979472462390553, "grad_norm": 6.5967766184003, "learning_rate": 2.707019998162197e-07, "loss": 17.5537, "step": 49124 }, { "epoch": 0.8979655254355019, "grad_norm": 6.346297281249949, "learning_rate": 2.706059298462277e-07, "loss": 17.4324, "step": 49125 }, { "epoch": 0.8979838046319484, "grad_norm": 6.097958125726812, "learning_rate": 2.7050987645235115e-07, "loss": 17.1979, "step": 49126 }, { "epoch": 0.8980020838283949, "grad_norm": 6.712848732937529, "learning_rate": 2.7041383963492706e-07, "loss": 17.6761, "step": 49127 }, { "epoch": 0.8980203630248415, "grad_norm": 6.4367771202772515, "learning_rate": 2.703178193942918e-07, "loss": 17.4841, "step": 49128 }, { "epoch": 0.8980386422212879, "grad_norm": 5.759109064216357, "learning_rate": 2.702218157307829e-07, "loss": 17.2538, "step": 49129 }, { "epoch": 0.8980569214177345, "grad_norm": 6.48134795830137, "learning_rate": 2.70125828644735e-07, "loss": 17.2532, "step": 49130 }, { "epoch": 0.898075200614181, "grad_norm": 5.892142066578836, "learning_rate": 2.7002985813648576e-07, "loss": 17.3148, "step": 49131 }, { "epoch": 0.8980934798106275, "grad_norm": 7.228679977209531, "learning_rate": 2.6993390420637255e-07, "loss": 17.3571, "step": 49132 }, { "epoch": 0.8981117590070741, "grad_norm": 5.549817495011633, "learning_rate": 2.6983796685472906e-07, "loss": 17.1851, "step": 49133 }, { "epoch": 0.8981300382035206, "grad_norm": 5.480200561492513, "learning_rate": 2.6974204608189447e-07, "loss": 16.9593, "step": 49134 }, { "epoch": 0.8981483173999671, "grad_norm": 6.655607516718319, "learning_rate": 2.6964614188820236e-07, "loss": 17.56, "step": 49135 }, { "epoch": 0.8981665965964136, "grad_norm": 7.1177458906528885, "learning_rate": 2.6955025427399086e-07, "loss": 17.5281, "step": 49136 }, { "epoch": 0.8981848757928601, "grad_norm": 5.309595693607606, "learning_rate": 2.6945438323959517e-07, "loss": 17.2647, "step": 49137 }, { "epoch": 0.8982031549893067, "grad_norm": 5.796613244939375, "learning_rate": 2.693585287853506e-07, "loss": 17.1197, "step": 49138 }, { "epoch": 0.8982214341857532, "grad_norm": 6.745353413120092, "learning_rate": 2.6926269091159473e-07, "loss": 17.7483, "step": 49139 }, { "epoch": 0.8982397133821998, "grad_norm": 5.218517770625397, "learning_rate": 2.691668696186617e-07, "loss": 16.7863, "step": 49140 }, { "epoch": 0.8982579925786462, "grad_norm": 4.5634289878265015, "learning_rate": 2.690710649068884e-07, "loss": 16.8395, "step": 49141 }, { "epoch": 0.8982762717750927, "grad_norm": 7.588280640789835, "learning_rate": 2.689752767766113e-07, "loss": 17.8408, "step": 49142 }, { "epoch": 0.8982945509715393, "grad_norm": 5.703467414175389, "learning_rate": 2.6887950522816565e-07, "loss": 17.0853, "step": 49143 }, { "epoch": 0.8983128301679858, "grad_norm": 5.766264667219249, "learning_rate": 2.6878375026188565e-07, "loss": 17.1988, "step": 49144 }, { "epoch": 0.8983311093644324, "grad_norm": 6.1259975114749805, "learning_rate": 2.686880118781088e-07, "loss": 17.4066, "step": 49145 }, { "epoch": 0.8983493885608789, "grad_norm": 7.2636101503689, "learning_rate": 2.685922900771698e-07, "loss": 17.9391, "step": 49146 }, { "epoch": 0.8983676677573253, "grad_norm": 4.956507879476986, "learning_rate": 2.6849658485940344e-07, "loss": 17.0879, "step": 49147 }, { "epoch": 0.8983859469537719, "grad_norm": 5.762878167083278, "learning_rate": 2.684008962251472e-07, "loss": 17.1689, "step": 49148 }, { "epoch": 0.8984042261502184, "grad_norm": 6.811302731411466, "learning_rate": 2.6830522417473415e-07, "loss": 17.3471, "step": 49149 }, { "epoch": 0.898422505346665, "grad_norm": 6.364277160451377, "learning_rate": 2.6820956870850067e-07, "loss": 17.265, "step": 49150 }, { "epoch": 0.8984407845431115, "grad_norm": 6.740196343674033, "learning_rate": 2.6811392982678266e-07, "loss": 17.7796, "step": 49151 }, { "epoch": 0.898459063739558, "grad_norm": 8.06838844776305, "learning_rate": 2.680183075299142e-07, "loss": 18.1022, "step": 49152 }, { "epoch": 0.8984773429360046, "grad_norm": 6.00885021380932, "learning_rate": 2.6792270181823175e-07, "loss": 17.3215, "step": 49153 }, { "epoch": 0.898495622132451, "grad_norm": 6.249421584572989, "learning_rate": 2.6782711269206953e-07, "loss": 17.4148, "step": 49154 }, { "epoch": 0.8985139013288976, "grad_norm": 5.5895911269145575, "learning_rate": 2.677315401517616e-07, "loss": 17.043, "step": 49155 }, { "epoch": 0.8985321805253441, "grad_norm": 5.8390293407189215, "learning_rate": 2.6763598419764505e-07, "loss": 17.3005, "step": 49156 }, { "epoch": 0.8985504597217906, "grad_norm": 5.884101848687435, "learning_rate": 2.675404448300539e-07, "loss": 17.3471, "step": 49157 }, { "epoch": 0.8985687389182372, "grad_norm": 5.581060082457894, "learning_rate": 2.674449220493219e-07, "loss": 17.0883, "step": 49158 }, { "epoch": 0.8985870181146837, "grad_norm": 5.430202204115283, "learning_rate": 2.6734941585578435e-07, "loss": 17.196, "step": 49159 }, { "epoch": 0.8986052973111303, "grad_norm": 5.908985232594471, "learning_rate": 2.6725392624977754e-07, "loss": 17.57, "step": 49160 }, { "epoch": 0.8986235765075767, "grad_norm": 7.985522164097967, "learning_rate": 2.6715845323163457e-07, "loss": 17.3897, "step": 49161 }, { "epoch": 0.8986418557040232, "grad_norm": 7.078125714651372, "learning_rate": 2.6706299680169077e-07, "loss": 17.4412, "step": 49162 }, { "epoch": 0.8986601349004698, "grad_norm": 6.118212083252935, "learning_rate": 2.6696755696028087e-07, "loss": 17.201, "step": 49163 }, { "epoch": 0.8986784140969163, "grad_norm": 7.5191771477368485, "learning_rate": 2.668721337077379e-07, "loss": 18.0053, "step": 49164 }, { "epoch": 0.8986966932933628, "grad_norm": 6.787275505871094, "learning_rate": 2.667767270443983e-07, "loss": 17.2441, "step": 49165 }, { "epoch": 0.8987149724898094, "grad_norm": 5.994250601351976, "learning_rate": 2.666813369705945e-07, "loss": 17.0096, "step": 49166 }, { "epoch": 0.8987332516862558, "grad_norm": 5.6016982166386375, "learning_rate": 2.6658596348666297e-07, "loss": 17.0298, "step": 49167 }, { "epoch": 0.8987515308827024, "grad_norm": 6.8371601908164354, "learning_rate": 2.664906065929362e-07, "loss": 17.6408, "step": 49168 }, { "epoch": 0.8987698100791489, "grad_norm": 5.981501070209277, "learning_rate": 2.66395266289749e-07, "loss": 17.3045, "step": 49169 }, { "epoch": 0.8987880892755954, "grad_norm": 6.163503261087684, "learning_rate": 2.6629994257743596e-07, "loss": 17.0068, "step": 49170 }, { "epoch": 0.898806368472042, "grad_norm": 6.065617292235949, "learning_rate": 2.6620463545633134e-07, "loss": 17.2041, "step": 49171 }, { "epoch": 0.8988246476684885, "grad_norm": 8.730765806298292, "learning_rate": 2.6610934492676765e-07, "loss": 17.9548, "step": 49172 }, { "epoch": 0.898842926864935, "grad_norm": 6.655382451188872, "learning_rate": 2.660140709890807e-07, "loss": 17.3148, "step": 49173 }, { "epoch": 0.8988612060613815, "grad_norm": 6.6331375177265866, "learning_rate": 2.659188136436042e-07, "loss": 17.5679, "step": 49174 }, { "epoch": 0.898879485257828, "grad_norm": 5.118776911145402, "learning_rate": 2.6582357289066994e-07, "loss": 16.9328, "step": 49175 }, { "epoch": 0.8988977644542746, "grad_norm": 5.447472528504568, "learning_rate": 2.657283487306145e-07, "loss": 16.8811, "step": 49176 }, { "epoch": 0.8989160436507211, "grad_norm": 6.146574460000738, "learning_rate": 2.6563314116376915e-07, "loss": 17.0463, "step": 49177 }, { "epoch": 0.8989343228471677, "grad_norm": 5.952706920372286, "learning_rate": 2.6553795019046924e-07, "loss": 17.1748, "step": 49178 }, { "epoch": 0.8989526020436142, "grad_norm": 5.1266184145752325, "learning_rate": 2.654427758110484e-07, "loss": 16.987, "step": 49179 }, { "epoch": 0.8989708812400606, "grad_norm": 5.705167789253811, "learning_rate": 2.6534761802583966e-07, "loss": 17.0654, "step": 49180 }, { "epoch": 0.8989891604365072, "grad_norm": 7.502266755412813, "learning_rate": 2.652524768351766e-07, "loss": 17.8544, "step": 49181 }, { "epoch": 0.8990074396329537, "grad_norm": 5.327689073851158, "learning_rate": 2.65157352239393e-07, "loss": 17.003, "step": 49182 }, { "epoch": 0.8990257188294003, "grad_norm": 5.793165599143292, "learning_rate": 2.6506224423882174e-07, "loss": 17.2138, "step": 49183 }, { "epoch": 0.8990439980258468, "grad_norm": 6.840400699807656, "learning_rate": 2.649671528337966e-07, "loss": 17.2879, "step": 49184 }, { "epoch": 0.8990622772222933, "grad_norm": 10.35161480271809, "learning_rate": 2.6487207802464997e-07, "loss": 18.3061, "step": 49185 }, { "epoch": 0.8990805564187399, "grad_norm": 5.82342358513223, "learning_rate": 2.6477701981171724e-07, "loss": 17.2027, "step": 49186 }, { "epoch": 0.8990988356151863, "grad_norm": 5.768868236756905, "learning_rate": 2.646819781953286e-07, "loss": 17.2788, "step": 49187 }, { "epoch": 0.8991171148116329, "grad_norm": 5.3995800656356, "learning_rate": 2.645869531758199e-07, "loss": 17.04, "step": 49188 }, { "epoch": 0.8991353940080794, "grad_norm": 9.43010681778441, "learning_rate": 2.644919447535221e-07, "loss": 17.75, "step": 49189 }, { "epoch": 0.8991536732045259, "grad_norm": 7.189226909432966, "learning_rate": 2.643969529287699e-07, "loss": 17.4646, "step": 49190 }, { "epoch": 0.8991719524009725, "grad_norm": 6.154424428008332, "learning_rate": 2.643019777018957e-07, "loss": 17.5104, "step": 49191 }, { "epoch": 0.899190231597419, "grad_norm": 5.675933013287184, "learning_rate": 2.6420701907323097e-07, "loss": 17.0884, "step": 49192 }, { "epoch": 0.8992085107938655, "grad_norm": 6.575058548928182, "learning_rate": 2.641120770431105e-07, "loss": 17.4875, "step": 49193 }, { "epoch": 0.899226789990312, "grad_norm": 7.2069911433026945, "learning_rate": 2.6401715161186557e-07, "loss": 17.3412, "step": 49194 }, { "epoch": 0.8992450691867585, "grad_norm": 6.9931465653824825, "learning_rate": 2.639222427798294e-07, "loss": 17.5935, "step": 49195 }, { "epoch": 0.8992633483832051, "grad_norm": 6.684294665946871, "learning_rate": 2.63827350547336e-07, "loss": 17.6855, "step": 49196 }, { "epoch": 0.8992816275796516, "grad_norm": 5.760563322586659, "learning_rate": 2.6373247491471577e-07, "loss": 17.0778, "step": 49197 }, { "epoch": 0.8992999067760982, "grad_norm": 6.3147268550010365, "learning_rate": 2.6363761588230284e-07, "loss": 17.5513, "step": 49198 }, { "epoch": 0.8993181859725446, "grad_norm": 5.878042809530137, "learning_rate": 2.6354277345042924e-07, "loss": 17.1311, "step": 49199 }, { "epoch": 0.8993364651689911, "grad_norm": 5.306410124652382, "learning_rate": 2.6344794761942626e-07, "loss": 16.8125, "step": 49200 }, { "epoch": 0.8993547443654377, "grad_norm": 5.905395087176771, "learning_rate": 2.633531383896276e-07, "loss": 17.2778, "step": 49201 }, { "epoch": 0.8993730235618842, "grad_norm": 6.249979320476561, "learning_rate": 2.6325834576136575e-07, "loss": 17.1727, "step": 49202 }, { "epoch": 0.8993913027583308, "grad_norm": 5.624551645929635, "learning_rate": 2.631635697349716e-07, "loss": 17.345, "step": 49203 }, { "epoch": 0.8994095819547773, "grad_norm": 5.990762808866373, "learning_rate": 2.6306881031077815e-07, "loss": 17.1919, "step": 49204 }, { "epoch": 0.8994278611512238, "grad_norm": 6.58997572267504, "learning_rate": 2.629740674891179e-07, "loss": 17.4181, "step": 49205 }, { "epoch": 0.8994461403476703, "grad_norm": 5.601899346936024, "learning_rate": 2.6287934127032175e-07, "loss": 17.1321, "step": 49206 }, { "epoch": 0.8994644195441168, "grad_norm": 5.188815129131033, "learning_rate": 2.627846316547233e-07, "loss": 17.0345, "step": 49207 }, { "epoch": 0.8994826987405634, "grad_norm": 5.904909813112672, "learning_rate": 2.6268993864265344e-07, "loss": 17.3293, "step": 49208 }, { "epoch": 0.8995009779370099, "grad_norm": 8.13836975145299, "learning_rate": 2.625952622344441e-07, "loss": 17.2441, "step": 49209 }, { "epoch": 0.8995192571334564, "grad_norm": 4.85966419046252, "learning_rate": 2.625006024304272e-07, "loss": 16.8289, "step": 49210 }, { "epoch": 0.899537536329903, "grad_norm": 6.485376876559823, "learning_rate": 2.6240595923093473e-07, "loss": 17.4516, "step": 49211 }, { "epoch": 0.8995558155263494, "grad_norm": 5.529006804229427, "learning_rate": 2.62311332636298e-07, "loss": 17.1317, "step": 49212 }, { "epoch": 0.899574094722796, "grad_norm": 6.702885810090402, "learning_rate": 2.6221672264684915e-07, "loss": 17.6363, "step": 49213 }, { "epoch": 0.8995923739192425, "grad_norm": 5.5515547700129275, "learning_rate": 2.6212212926291894e-07, "loss": 17.1395, "step": 49214 }, { "epoch": 0.899610653115689, "grad_norm": 5.68247322090596, "learning_rate": 2.620275524848404e-07, "loss": 17.3812, "step": 49215 }, { "epoch": 0.8996289323121356, "grad_norm": 5.893174626593138, "learning_rate": 2.6193299231294433e-07, "loss": 17.2184, "step": 49216 }, { "epoch": 0.8996472115085821, "grad_norm": 5.542326427729722, "learning_rate": 2.6183844874756116e-07, "loss": 16.9386, "step": 49217 }, { "epoch": 0.8996654907050287, "grad_norm": 4.469975812850405, "learning_rate": 2.6174392178902384e-07, "loss": 16.6967, "step": 49218 }, { "epoch": 0.8996837699014751, "grad_norm": 7.258681893928796, "learning_rate": 2.6164941143766277e-07, "loss": 17.4307, "step": 49219 }, { "epoch": 0.8997020490979216, "grad_norm": 6.639145380071124, "learning_rate": 2.615549176938087e-07, "loss": 17.364, "step": 49220 }, { "epoch": 0.8997203282943682, "grad_norm": 6.202946855821762, "learning_rate": 2.614604405577942e-07, "loss": 17.3222, "step": 49221 }, { "epoch": 0.8997386074908147, "grad_norm": 6.523199577134221, "learning_rate": 2.6136598002994896e-07, "loss": 17.1792, "step": 49222 }, { "epoch": 0.8997568866872613, "grad_norm": 4.730287088436061, "learning_rate": 2.612715361106044e-07, "loss": 16.827, "step": 49223 }, { "epoch": 0.8997751658837078, "grad_norm": 6.688423906321673, "learning_rate": 2.611771088000931e-07, "loss": 17.6367, "step": 49224 }, { "epoch": 0.8997934450801542, "grad_norm": 4.537313613716365, "learning_rate": 2.6108269809874466e-07, "loss": 16.8243, "step": 49225 }, { "epoch": 0.8998117242766008, "grad_norm": 6.982716795174917, "learning_rate": 2.6098830400688945e-07, "loss": 18.0309, "step": 49226 }, { "epoch": 0.8998300034730473, "grad_norm": 6.0341145849366455, "learning_rate": 2.6089392652485947e-07, "loss": 17.2484, "step": 49227 }, { "epoch": 0.8998482826694939, "grad_norm": 7.493431953821615, "learning_rate": 2.607995656529849e-07, "loss": 17.6774, "step": 49228 }, { "epoch": 0.8998665618659404, "grad_norm": 5.853909202232786, "learning_rate": 2.607052213915967e-07, "loss": 17.1881, "step": 49229 }, { "epoch": 0.8998848410623869, "grad_norm": 5.515970866037379, "learning_rate": 2.606108937410257e-07, "loss": 17.1145, "step": 49230 }, { "epoch": 0.8999031202588335, "grad_norm": 6.682772715735236, "learning_rate": 2.605165827016015e-07, "loss": 17.5579, "step": 49231 }, { "epoch": 0.8999213994552799, "grad_norm": 5.513392740583636, "learning_rate": 2.604222882736557e-07, "loss": 17.0938, "step": 49232 }, { "epoch": 0.8999396786517264, "grad_norm": 7.978871897116162, "learning_rate": 2.603280104575184e-07, "loss": 17.5906, "step": 49233 }, { "epoch": 0.899957957848173, "grad_norm": 6.198038494827642, "learning_rate": 2.6023374925352007e-07, "loss": 17.3586, "step": 49234 }, { "epoch": 0.8999762370446195, "grad_norm": 5.295901198169469, "learning_rate": 2.60139504661992e-07, "loss": 17.1261, "step": 49235 }, { "epoch": 0.8999945162410661, "grad_norm": 5.729834249310348, "learning_rate": 2.600452766832634e-07, "loss": 17.2591, "step": 49236 }, { "epoch": 0.9000127954375126, "grad_norm": 6.542351025179787, "learning_rate": 2.5995106531766403e-07, "loss": 17.5223, "step": 49237 }, { "epoch": 0.900031074633959, "grad_norm": 7.512247278752645, "learning_rate": 2.5985687056552576e-07, "loss": 17.8437, "step": 49238 }, { "epoch": 0.9000493538304056, "grad_norm": 6.160386081705309, "learning_rate": 2.597626924271768e-07, "loss": 17.3517, "step": 49239 }, { "epoch": 0.9000676330268521, "grad_norm": 7.197392942972025, "learning_rate": 2.59668530902949e-07, "loss": 17.4102, "step": 49240 }, { "epoch": 0.9000859122232987, "grad_norm": 6.298289107822433, "learning_rate": 2.5957438599317155e-07, "loss": 17.465, "step": 49241 }, { "epoch": 0.9001041914197452, "grad_norm": 5.206625631471294, "learning_rate": 2.5948025769817365e-07, "loss": 17.24, "step": 49242 }, { "epoch": 0.9001224706161917, "grad_norm": 5.987744630121891, "learning_rate": 2.593861460182873e-07, "loss": 17.2862, "step": 49243 }, { "epoch": 0.9001407498126383, "grad_norm": 5.972784371653093, "learning_rate": 2.59292050953841e-07, "loss": 17.206, "step": 49244 }, { "epoch": 0.9001590290090847, "grad_norm": 7.504599830678527, "learning_rate": 2.591979725051641e-07, "loss": 17.5864, "step": 49245 }, { "epoch": 0.9001773082055313, "grad_norm": 6.248946783415296, "learning_rate": 2.5910391067258734e-07, "loss": 17.21, "step": 49246 }, { "epoch": 0.9001955874019778, "grad_norm": 6.559021225170839, "learning_rate": 2.5900986545644046e-07, "loss": 17.4194, "step": 49247 }, { "epoch": 0.9002138665984243, "grad_norm": 6.173218179400851, "learning_rate": 2.589158368570516e-07, "loss": 17.3417, "step": 49248 }, { "epoch": 0.9002321457948709, "grad_norm": 5.215860739733495, "learning_rate": 2.588218248747515e-07, "loss": 16.8944, "step": 49249 }, { "epoch": 0.9002504249913174, "grad_norm": 6.187517193904994, "learning_rate": 2.5872782950986995e-07, "loss": 17.3229, "step": 49250 }, { "epoch": 0.900268704187764, "grad_norm": 4.986281631272347, "learning_rate": 2.586338507627351e-07, "loss": 16.801, "step": 49251 }, { "epoch": 0.9002869833842104, "grad_norm": 6.785408535685409, "learning_rate": 2.5853988863367816e-07, "loss": 17.6133, "step": 49252 }, { "epoch": 0.9003052625806569, "grad_norm": 7.253836961535692, "learning_rate": 2.584459431230274e-07, "loss": 17.39, "step": 49253 }, { "epoch": 0.9003235417771035, "grad_norm": 5.645668676657404, "learning_rate": 2.5835201423111133e-07, "loss": 16.9308, "step": 49254 }, { "epoch": 0.90034182097355, "grad_norm": 6.503985429663828, "learning_rate": 2.5825810195826085e-07, "loss": 17.3307, "step": 49255 }, { "epoch": 0.9003601001699966, "grad_norm": 5.860495589198332, "learning_rate": 2.58164206304804e-07, "loss": 17.1608, "step": 49256 }, { "epoch": 0.900378379366443, "grad_norm": 6.044173793073428, "learning_rate": 2.5807032727107e-07, "loss": 17.2197, "step": 49257 }, { "epoch": 0.9003966585628895, "grad_norm": 4.872127837965028, "learning_rate": 2.5797646485738736e-07, "loss": 16.8171, "step": 49258 }, { "epoch": 0.9004149377593361, "grad_norm": 7.112830725233869, "learning_rate": 2.57882619064086e-07, "loss": 17.5032, "step": 49259 }, { "epoch": 0.9004332169557826, "grad_norm": 5.7637701491234745, "learning_rate": 2.577887898914955e-07, "loss": 16.8669, "step": 49260 }, { "epoch": 0.9004514961522292, "grad_norm": 9.079424307381723, "learning_rate": 2.576949773399434e-07, "loss": 17.9805, "step": 49261 }, { "epoch": 0.9004697753486757, "grad_norm": 6.400153928165069, "learning_rate": 2.576011814097584e-07, "loss": 17.2625, "step": 49262 }, { "epoch": 0.9004880545451222, "grad_norm": 7.02758642989852, "learning_rate": 2.5750740210127023e-07, "loss": 17.4564, "step": 49263 }, { "epoch": 0.9005063337415687, "grad_norm": 7.402517071502136, "learning_rate": 2.574136394148069e-07, "loss": 17.6837, "step": 49264 }, { "epoch": 0.9005246129380152, "grad_norm": 6.520497098158305, "learning_rate": 2.5731989335069706e-07, "loss": 17.6005, "step": 49265 }, { "epoch": 0.9005428921344618, "grad_norm": 7.249853518313396, "learning_rate": 2.572261639092699e-07, "loss": 17.6293, "step": 49266 }, { "epoch": 0.9005611713309083, "grad_norm": 5.761275836873925, "learning_rate": 2.5713245109085294e-07, "loss": 17.0649, "step": 49267 }, { "epoch": 0.9005794505273548, "grad_norm": 5.55423001598906, "learning_rate": 2.570387548957748e-07, "loss": 17.4013, "step": 49268 }, { "epoch": 0.9005977297238014, "grad_norm": 5.778853942810943, "learning_rate": 2.569450753243652e-07, "loss": 17.1749, "step": 49269 }, { "epoch": 0.9006160089202478, "grad_norm": 6.477948222018554, "learning_rate": 2.5685141237695165e-07, "loss": 17.5294, "step": 49270 }, { "epoch": 0.9006342881166944, "grad_norm": 5.105059506478091, "learning_rate": 2.5675776605386116e-07, "loss": 16.8433, "step": 49271 }, { "epoch": 0.9006525673131409, "grad_norm": 6.7001148999647135, "learning_rate": 2.5666413635542396e-07, "loss": 17.4443, "step": 49272 }, { "epoch": 0.9006708465095874, "grad_norm": 5.749043826248064, "learning_rate": 2.5657052328196707e-07, "loss": 17.174, "step": 49273 }, { "epoch": 0.900689125706034, "grad_norm": 6.2140946805459025, "learning_rate": 2.5647692683381907e-07, "loss": 17.4576, "step": 49274 }, { "epoch": 0.9007074049024805, "grad_norm": 5.622847070189799, "learning_rate": 2.563833470113081e-07, "loss": 17.0877, "step": 49275 }, { "epoch": 0.9007256840989271, "grad_norm": 6.76661109831633, "learning_rate": 2.5628978381476097e-07, "loss": 17.3981, "step": 49276 }, { "epoch": 0.9007439632953735, "grad_norm": 6.949630550156034, "learning_rate": 2.5619623724450706e-07, "loss": 17.5775, "step": 49277 }, { "epoch": 0.90076224249182, "grad_norm": 7.339683873505451, "learning_rate": 2.561027073008737e-07, "loss": 17.5555, "step": 49278 }, { "epoch": 0.9007805216882666, "grad_norm": 5.4031108875022635, "learning_rate": 2.5600919398418856e-07, "loss": 17.0261, "step": 49279 }, { "epoch": 0.9007988008847131, "grad_norm": 5.400336565548951, "learning_rate": 2.5591569729477963e-07, "loss": 16.9889, "step": 49280 }, { "epoch": 0.9008170800811597, "grad_norm": 6.297411063686384, "learning_rate": 2.5582221723297495e-07, "loss": 17.0894, "step": 49281 }, { "epoch": 0.9008353592776062, "grad_norm": 6.0837497796519315, "learning_rate": 2.55728753799101e-07, "loss": 17.1441, "step": 49282 }, { "epoch": 0.9008536384740526, "grad_norm": 7.406547443382717, "learning_rate": 2.5563530699348637e-07, "loss": 17.8777, "step": 49283 }, { "epoch": 0.9008719176704992, "grad_norm": 6.4891006624368, "learning_rate": 2.555418768164586e-07, "loss": 17.628, "step": 49284 }, { "epoch": 0.9008901968669457, "grad_norm": 6.399549298962686, "learning_rate": 2.554484632683446e-07, "loss": 17.5161, "step": 49285 }, { "epoch": 0.9009084760633923, "grad_norm": 6.586300814549777, "learning_rate": 2.5535506634947137e-07, "loss": 17.3546, "step": 49286 }, { "epoch": 0.9009267552598388, "grad_norm": 7.020936687604026, "learning_rate": 2.5526168606016755e-07, "loss": 17.7534, "step": 49287 }, { "epoch": 0.9009450344562853, "grad_norm": 6.182171593656964, "learning_rate": 2.551683224007595e-07, "loss": 17.1542, "step": 49288 }, { "epoch": 0.9009633136527319, "grad_norm": 6.312225010212981, "learning_rate": 2.550749753715753e-07, "loss": 17.3427, "step": 49289 }, { "epoch": 0.9009815928491783, "grad_norm": 17.48146892075842, "learning_rate": 2.5498164497294085e-07, "loss": 17.6302, "step": 49290 }, { "epoch": 0.9009998720456249, "grad_norm": 6.598227762286587, "learning_rate": 2.5488833120518474e-07, "loss": 17.3669, "step": 49291 }, { "epoch": 0.9010181512420714, "grad_norm": 7.929872135819357, "learning_rate": 2.5479503406863335e-07, "loss": 17.8717, "step": 49292 }, { "epoch": 0.9010364304385179, "grad_norm": 6.1790639980361135, "learning_rate": 2.547017535636126e-07, "loss": 17.0759, "step": 49293 }, { "epoch": 0.9010547096349645, "grad_norm": 8.253807253302357, "learning_rate": 2.54608489690451e-07, "loss": 17.7486, "step": 49294 }, { "epoch": 0.901072988831411, "grad_norm": 5.862557875949329, "learning_rate": 2.5451524244947457e-07, "loss": 17.0814, "step": 49295 }, { "epoch": 0.9010912680278576, "grad_norm": 6.6743839575456345, "learning_rate": 2.5442201184101013e-07, "loss": 17.2643, "step": 49296 }, { "epoch": 0.901109547224304, "grad_norm": 6.764862523135071, "learning_rate": 2.543287978653858e-07, "loss": 17.4196, "step": 49297 }, { "epoch": 0.9011278264207505, "grad_norm": 4.9706498922780815, "learning_rate": 2.5423560052292686e-07, "loss": 16.7335, "step": 49298 }, { "epoch": 0.9011461056171971, "grad_norm": 6.672124318117464, "learning_rate": 2.541424198139597e-07, "loss": 17.8132, "step": 49299 }, { "epoch": 0.9011643848136436, "grad_norm": 6.042181040002711, "learning_rate": 2.5404925573881244e-07, "loss": 17.1984, "step": 49300 }, { "epoch": 0.9011826640100901, "grad_norm": 7.214208320235081, "learning_rate": 2.539561082978109e-07, "loss": 17.9073, "step": 49301 }, { "epoch": 0.9012009432065367, "grad_norm": 7.227743560325761, "learning_rate": 2.538629774912799e-07, "loss": 17.8449, "step": 49302 }, { "epoch": 0.9012192224029831, "grad_norm": 6.2177032176394995, "learning_rate": 2.5376986331954843e-07, "loss": 17.3628, "step": 49303 }, { "epoch": 0.9012375015994297, "grad_norm": 6.039180118726829, "learning_rate": 2.536767657829414e-07, "loss": 17.1592, "step": 49304 }, { "epoch": 0.9012557807958762, "grad_norm": 6.285457777186521, "learning_rate": 2.5358368488178464e-07, "loss": 17.2437, "step": 49305 }, { "epoch": 0.9012740599923227, "grad_norm": 5.671853028380991, "learning_rate": 2.5349062061640674e-07, "loss": 16.9746, "step": 49306 }, { "epoch": 0.9012923391887693, "grad_norm": 6.276663926126068, "learning_rate": 2.533975729871313e-07, "loss": 17.2933, "step": 49307 }, { "epoch": 0.9013106183852158, "grad_norm": 5.934821938902007, "learning_rate": 2.533045419942859e-07, "loss": 17.1396, "step": 49308 }, { "epoch": 0.9013288975816623, "grad_norm": 8.133512815021843, "learning_rate": 2.5321152763819635e-07, "loss": 17.5031, "step": 49309 }, { "epoch": 0.9013471767781088, "grad_norm": 7.303633951342081, "learning_rate": 2.5311852991918797e-07, "loss": 17.7688, "step": 49310 }, { "epoch": 0.9013654559745553, "grad_norm": 5.875575875616364, "learning_rate": 2.5302554883758766e-07, "loss": 17.1571, "step": 49311 }, { "epoch": 0.9013837351710019, "grad_norm": 5.20855627884777, "learning_rate": 2.5293258439372027e-07, "loss": 16.9275, "step": 49312 }, { "epoch": 0.9014020143674484, "grad_norm": 5.136498411557964, "learning_rate": 2.528396365879132e-07, "loss": 16.8835, "step": 49313 }, { "epoch": 0.901420293563895, "grad_norm": 7.831851818589989, "learning_rate": 2.527467054204902e-07, "loss": 17.3751, "step": 49314 }, { "epoch": 0.9014385727603414, "grad_norm": 5.25043011473923, "learning_rate": 2.5265379089177864e-07, "loss": 16.7426, "step": 49315 }, { "epoch": 0.9014568519567879, "grad_norm": 7.456718555160405, "learning_rate": 2.525608930021034e-07, "loss": 17.243, "step": 49316 }, { "epoch": 0.9014751311532345, "grad_norm": 6.754737774841609, "learning_rate": 2.5246801175179025e-07, "loss": 17.3712, "step": 49317 }, { "epoch": 0.901493410349681, "grad_norm": 6.116645992058586, "learning_rate": 2.523751471411645e-07, "loss": 16.9703, "step": 49318 }, { "epoch": 0.9015116895461276, "grad_norm": 5.8790518598930115, "learning_rate": 2.522822991705526e-07, "loss": 17.0616, "step": 49319 }, { "epoch": 0.9015299687425741, "grad_norm": 6.481940565905734, "learning_rate": 2.521894678402792e-07, "loss": 17.1575, "step": 49320 }, { "epoch": 0.9015482479390206, "grad_norm": 6.264070677948201, "learning_rate": 2.5209665315066913e-07, "loss": 17.5224, "step": 49321 }, { "epoch": 0.9015665271354671, "grad_norm": 7.990771628177098, "learning_rate": 2.520038551020482e-07, "loss": 18.0627, "step": 49322 }, { "epoch": 0.9015848063319136, "grad_norm": 7.423375649663043, "learning_rate": 2.519110736947428e-07, "loss": 17.9352, "step": 49323 }, { "epoch": 0.9016030855283602, "grad_norm": 6.7797410920288605, "learning_rate": 2.51818308929076e-07, "loss": 17.4427, "step": 49324 }, { "epoch": 0.9016213647248067, "grad_norm": 5.988293200245405, "learning_rate": 2.5172556080537426e-07, "loss": 17.3761, "step": 49325 }, { "epoch": 0.9016396439212532, "grad_norm": 5.4923535601595965, "learning_rate": 2.516328293239628e-07, "loss": 17.3164, "step": 49326 }, { "epoch": 0.9016579231176998, "grad_norm": 7.632973337554725, "learning_rate": 2.515401144851659e-07, "loss": 18.2346, "step": 49327 }, { "epoch": 0.9016762023141462, "grad_norm": 5.143515108758707, "learning_rate": 2.5144741628930933e-07, "loss": 16.8666, "step": 49328 }, { "epoch": 0.9016944815105928, "grad_norm": 6.735819802408184, "learning_rate": 2.5135473473671734e-07, "loss": 17.4017, "step": 49329 }, { "epoch": 0.9017127607070393, "grad_norm": 5.297515491400039, "learning_rate": 2.5126206982771406e-07, "loss": 17.0135, "step": 49330 }, { "epoch": 0.9017310399034858, "grad_norm": 7.358898002615934, "learning_rate": 2.5116942156262534e-07, "loss": 17.7617, "step": 49331 }, { "epoch": 0.9017493190999324, "grad_norm": 6.888329195716396, "learning_rate": 2.5107678994177654e-07, "loss": 17.4542, "step": 49332 }, { "epoch": 0.9017675982963789, "grad_norm": 7.864221287521758, "learning_rate": 2.5098417496549065e-07, "loss": 17.5505, "step": 49333 }, { "epoch": 0.9017858774928255, "grad_norm": 8.1439673197551, "learning_rate": 2.5089157663409415e-07, "loss": 17.8756, "step": 49334 }, { "epoch": 0.9018041566892719, "grad_norm": 6.292126766639887, "learning_rate": 2.507989949479095e-07, "loss": 17.6297, "step": 49335 }, { "epoch": 0.9018224358857184, "grad_norm": 6.451711226833329, "learning_rate": 2.5070642990726315e-07, "loss": 17.4543, "step": 49336 }, { "epoch": 0.901840715082165, "grad_norm": 6.996649139388116, "learning_rate": 2.506138815124781e-07, "loss": 17.3463, "step": 49337 }, { "epoch": 0.9018589942786115, "grad_norm": 6.157070760585986, "learning_rate": 2.505213497638792e-07, "loss": 17.0693, "step": 49338 }, { "epoch": 0.9018772734750581, "grad_norm": 5.2996291105150775, "learning_rate": 2.5042883466179114e-07, "loss": 16.956, "step": 49339 }, { "epoch": 0.9018955526715046, "grad_norm": 5.521023600319066, "learning_rate": 2.503363362065375e-07, "loss": 17.0006, "step": 49340 }, { "epoch": 0.901913831867951, "grad_norm": 6.231302788331642, "learning_rate": 2.5024385439844255e-07, "loss": 17.2701, "step": 49341 }, { "epoch": 0.9019321110643976, "grad_norm": 5.294749225398077, "learning_rate": 2.5015138923783156e-07, "loss": 17.0292, "step": 49342 }, { "epoch": 0.9019503902608441, "grad_norm": 6.487867573336085, "learning_rate": 2.5005894072502814e-07, "loss": 17.5909, "step": 49343 }, { "epoch": 0.9019686694572907, "grad_norm": 8.954692488446957, "learning_rate": 2.499665088603548e-07, "loss": 18.3682, "step": 49344 }, { "epoch": 0.9019869486537372, "grad_norm": 6.747091492963504, "learning_rate": 2.498740936441374e-07, "loss": 17.385, "step": 49345 }, { "epoch": 0.9020052278501837, "grad_norm": 5.309293452934335, "learning_rate": 2.49781695076699e-07, "loss": 17.1031, "step": 49346 }, { "epoch": 0.9020235070466303, "grad_norm": 5.243802663024662, "learning_rate": 2.496893131583633e-07, "loss": 17.2846, "step": 49347 }, { "epoch": 0.9020417862430767, "grad_norm": 6.451080149038444, "learning_rate": 2.49596947889455e-07, "loss": 17.6941, "step": 49348 }, { "epoch": 0.9020600654395233, "grad_norm": 6.258868125239532, "learning_rate": 2.49504599270296e-07, "loss": 17.3455, "step": 49349 }, { "epoch": 0.9020783446359698, "grad_norm": 7.46744633042637, "learning_rate": 2.4941226730121114e-07, "loss": 17.784, "step": 49350 }, { "epoch": 0.9020966238324163, "grad_norm": 6.037072014371175, "learning_rate": 2.493199519825251e-07, "loss": 17.3543, "step": 49351 }, { "epoch": 0.9021149030288629, "grad_norm": 5.253834250200137, "learning_rate": 2.492276533145599e-07, "loss": 17.1236, "step": 49352 }, { "epoch": 0.9021331822253094, "grad_norm": 5.168393015103118, "learning_rate": 2.4913537129763966e-07, "loss": 17.1699, "step": 49353 }, { "epoch": 0.902151461421756, "grad_norm": 4.8633848914249675, "learning_rate": 2.49043105932088e-07, "loss": 16.7693, "step": 49354 }, { "epoch": 0.9021697406182024, "grad_norm": 5.761458141536799, "learning_rate": 2.48950857218227e-07, "loss": 17.1609, "step": 49355 }, { "epoch": 0.9021880198146489, "grad_norm": 6.584669909232768, "learning_rate": 2.488586251563818e-07, "loss": 17.7631, "step": 49356 }, { "epoch": 0.9022062990110955, "grad_norm": 5.022026885777145, "learning_rate": 2.4876640974687503e-07, "loss": 16.9115, "step": 49357 }, { "epoch": 0.902224578207542, "grad_norm": 6.80671993373129, "learning_rate": 2.486742109900292e-07, "loss": 17.9236, "step": 49358 }, { "epoch": 0.9022428574039886, "grad_norm": 6.759587039018969, "learning_rate": 2.485820288861673e-07, "loss": 17.6777, "step": 49359 }, { "epoch": 0.902261136600435, "grad_norm": 9.186078131076574, "learning_rate": 2.484898634356142e-07, "loss": 17.6646, "step": 49360 }, { "epoch": 0.9022794157968815, "grad_norm": 6.1346709601223885, "learning_rate": 2.483977146386912e-07, "loss": 17.5166, "step": 49361 }, { "epoch": 0.9022976949933281, "grad_norm": 6.173942726629447, "learning_rate": 2.483055824957226e-07, "loss": 17.3821, "step": 49362 }, { "epoch": 0.9023159741897746, "grad_norm": 4.791774545415166, "learning_rate": 2.482134670070302e-07, "loss": 17.034, "step": 49363 }, { "epoch": 0.9023342533862212, "grad_norm": 6.195598367340697, "learning_rate": 2.481213681729366e-07, "loss": 17.6721, "step": 49364 }, { "epoch": 0.9023525325826677, "grad_norm": 5.90005623971666, "learning_rate": 2.480292859937661e-07, "loss": 16.9874, "step": 49365 }, { "epoch": 0.9023708117791142, "grad_norm": 5.078737998453991, "learning_rate": 2.479372204698399e-07, "loss": 16.94, "step": 49366 }, { "epoch": 0.9023890909755607, "grad_norm": 6.639190143132871, "learning_rate": 2.478451716014824e-07, "loss": 17.4663, "step": 49367 }, { "epoch": 0.9024073701720072, "grad_norm": 7.036712200344541, "learning_rate": 2.4775313938901426e-07, "loss": 17.7846, "step": 49368 }, { "epoch": 0.9024256493684537, "grad_norm": 6.212713709363675, "learning_rate": 2.476611238327586e-07, "loss": 17.2923, "step": 49369 }, { "epoch": 0.9024439285649003, "grad_norm": 7.791605981301114, "learning_rate": 2.4756912493303976e-07, "loss": 17.7976, "step": 49370 }, { "epoch": 0.9024622077613468, "grad_norm": 7.004234536054989, "learning_rate": 2.4747714269017785e-07, "loss": 17.5108, "step": 49371 }, { "epoch": 0.9024804869577934, "grad_norm": 6.040623828831747, "learning_rate": 2.4738517710449604e-07, "loss": 17.1404, "step": 49372 }, { "epoch": 0.9024987661542399, "grad_norm": 6.269291182404717, "learning_rate": 2.472932281763174e-07, "loss": 17.2929, "step": 49373 }, { "epoch": 0.9025170453506863, "grad_norm": 6.710243822422396, "learning_rate": 2.4720129590596386e-07, "loss": 17.2722, "step": 49374 }, { "epoch": 0.9025353245471329, "grad_norm": 6.031444450577942, "learning_rate": 2.471093802937563e-07, "loss": 17.2105, "step": 49375 }, { "epoch": 0.9025536037435794, "grad_norm": 8.0019702725255, "learning_rate": 2.470174813400178e-07, "loss": 18.2075, "step": 49376 }, { "epoch": 0.902571882940026, "grad_norm": 5.349077919811687, "learning_rate": 2.469255990450714e-07, "loss": 16.9045, "step": 49377 }, { "epoch": 0.9025901621364725, "grad_norm": 6.76697279506671, "learning_rate": 2.4683373340923744e-07, "loss": 17.4618, "step": 49378 }, { "epoch": 0.902608441332919, "grad_norm": 7.786936164160719, "learning_rate": 2.467418844328401e-07, "loss": 17.5275, "step": 49379 }, { "epoch": 0.9026267205293655, "grad_norm": 9.381973034491025, "learning_rate": 2.466500521161985e-07, "loss": 18.6082, "step": 49380 }, { "epoch": 0.902644999725812, "grad_norm": 7.271132298669294, "learning_rate": 2.4655823645963694e-07, "loss": 17.6527, "step": 49381 }, { "epoch": 0.9026632789222586, "grad_norm": 6.085878674468123, "learning_rate": 2.4646643746347674e-07, "loss": 17.1184, "step": 49382 }, { "epoch": 0.9026815581187051, "grad_norm": 5.914793564051586, "learning_rate": 2.4637465512803824e-07, "loss": 17.4251, "step": 49383 }, { "epoch": 0.9026998373151516, "grad_norm": 5.992596245381641, "learning_rate": 2.4628288945364454e-07, "loss": 17.3022, "step": 49384 }, { "epoch": 0.9027181165115982, "grad_norm": 5.3388293929295765, "learning_rate": 2.4619114044061644e-07, "loss": 17.0875, "step": 49385 }, { "epoch": 0.9027363957080446, "grad_norm": 5.877221059035085, "learning_rate": 2.4609940808927535e-07, "loss": 17.0924, "step": 49386 }, { "epoch": 0.9027546749044912, "grad_norm": 5.889197071231769, "learning_rate": 2.4600769239994435e-07, "loss": 17.3752, "step": 49387 }, { "epoch": 0.9027729541009377, "grad_norm": 6.1323337234207616, "learning_rate": 2.459159933729438e-07, "loss": 17.3929, "step": 49388 }, { "epoch": 0.9027912332973842, "grad_norm": 6.50617864643687, "learning_rate": 2.458243110085945e-07, "loss": 17.3709, "step": 49389 }, { "epoch": 0.9028095124938308, "grad_norm": 5.4958273178512895, "learning_rate": 2.457326453072195e-07, "loss": 17.3056, "step": 49390 }, { "epoch": 0.9028277916902773, "grad_norm": 5.671673147315914, "learning_rate": 2.456409962691386e-07, "loss": 17.1234, "step": 49391 }, { "epoch": 0.9028460708867239, "grad_norm": 9.830079119997597, "learning_rate": 2.4554936389467266e-07, "loss": 18.5658, "step": 49392 }, { "epoch": 0.9028643500831703, "grad_norm": 5.640215159286101, "learning_rate": 2.454577481841447e-07, "loss": 17.0839, "step": 49393 }, { "epoch": 0.9028826292796168, "grad_norm": 8.754299261761373, "learning_rate": 2.453661491378734e-07, "loss": 17.9817, "step": 49394 }, { "epoch": 0.9029009084760634, "grad_norm": 5.461839328687457, "learning_rate": 2.452745667561818e-07, "loss": 17.2489, "step": 49395 }, { "epoch": 0.9029191876725099, "grad_norm": 5.935131566977483, "learning_rate": 2.4518300103939075e-07, "loss": 17.1855, "step": 49396 }, { "epoch": 0.9029374668689565, "grad_norm": 4.76775773535731, "learning_rate": 2.4509145198782004e-07, "loss": 16.9719, "step": 49397 }, { "epoch": 0.902955746065403, "grad_norm": 6.289628904208417, "learning_rate": 2.449999196017916e-07, "loss": 17.3042, "step": 49398 }, { "epoch": 0.9029740252618494, "grad_norm": 7.151396741354779, "learning_rate": 2.4490840388162627e-07, "loss": 17.8156, "step": 49399 }, { "epoch": 0.902992304458296, "grad_norm": 6.814871617389532, "learning_rate": 2.448169048276433e-07, "loss": 17.9335, "step": 49400 }, { "epoch": 0.9030105836547425, "grad_norm": 5.822775509219553, "learning_rate": 2.447254224401652e-07, "loss": 17.2199, "step": 49401 }, { "epoch": 0.9030288628511891, "grad_norm": 5.801797094756316, "learning_rate": 2.446339567195116e-07, "loss": 17.1927, "step": 49402 }, { "epoch": 0.9030471420476356, "grad_norm": 5.790461891930944, "learning_rate": 2.44542507666003e-07, "loss": 17.3785, "step": 49403 }, { "epoch": 0.9030654212440821, "grad_norm": 5.936220175557298, "learning_rate": 2.444510752799606e-07, "loss": 17.41, "step": 49404 }, { "epoch": 0.9030837004405287, "grad_norm": 7.8005633371728225, "learning_rate": 2.443596595617048e-07, "loss": 17.5197, "step": 49405 }, { "epoch": 0.9031019796369751, "grad_norm": 4.865828571221644, "learning_rate": 2.4426826051155485e-07, "loss": 16.846, "step": 49406 }, { "epoch": 0.9031202588334217, "grad_norm": 5.583526376180398, "learning_rate": 2.441768781298326e-07, "loss": 16.9084, "step": 49407 }, { "epoch": 0.9031385380298682, "grad_norm": 6.171466955117578, "learning_rate": 2.440855124168584e-07, "loss": 17.3543, "step": 49408 }, { "epoch": 0.9031568172263147, "grad_norm": 6.378698905536128, "learning_rate": 2.439941633729503e-07, "loss": 17.7292, "step": 49409 }, { "epoch": 0.9031750964227613, "grad_norm": 10.720748243786154, "learning_rate": 2.4390283099843093e-07, "loss": 18.4128, "step": 49410 }, { "epoch": 0.9031933756192078, "grad_norm": 6.207041019020516, "learning_rate": 2.438115152936188e-07, "loss": 17.3349, "step": 49411 }, { "epoch": 0.9032116548156544, "grad_norm": 5.938224272557377, "learning_rate": 2.437202162588353e-07, "loss": 17.1873, "step": 49412 }, { "epoch": 0.9032299340121008, "grad_norm": 6.963772273910337, "learning_rate": 2.4362893389439867e-07, "loss": 17.5035, "step": 49413 }, { "epoch": 0.9032482132085473, "grad_norm": 4.331409048436789, "learning_rate": 2.4353766820063016e-07, "loss": 16.5526, "step": 49414 }, { "epoch": 0.9032664924049939, "grad_norm": 7.503754533440826, "learning_rate": 2.434464191778502e-07, "loss": 18.0444, "step": 49415 }, { "epoch": 0.9032847716014404, "grad_norm": 5.5204224287846495, "learning_rate": 2.433551868263778e-07, "loss": 17.1176, "step": 49416 }, { "epoch": 0.903303050797887, "grad_norm": 5.995536289980019, "learning_rate": 2.4326397114653175e-07, "loss": 17.3412, "step": 49417 }, { "epoch": 0.9033213299943335, "grad_norm": 5.74449696663005, "learning_rate": 2.4317277213863343e-07, "loss": 16.8785, "step": 49418 }, { "epoch": 0.9033396091907799, "grad_norm": 5.684162180251299, "learning_rate": 2.4308158980300203e-07, "loss": 17.2739, "step": 49419 }, { "epoch": 0.9033578883872265, "grad_norm": 7.957308686720943, "learning_rate": 2.429904241399561e-07, "loss": 17.5929, "step": 49420 }, { "epoch": 0.903376167583673, "grad_norm": 6.788893542154491, "learning_rate": 2.4289927514981657e-07, "loss": 17.6045, "step": 49421 }, { "epoch": 0.9033944467801196, "grad_norm": 4.953212000370609, "learning_rate": 2.4280814283290155e-07, "loss": 16.8852, "step": 49422 }, { "epoch": 0.9034127259765661, "grad_norm": 4.540617408079974, "learning_rate": 2.4271702718953125e-07, "loss": 16.612, "step": 49423 }, { "epoch": 0.9034310051730126, "grad_norm": 9.16467996479903, "learning_rate": 2.426259282200255e-07, "loss": 18.0303, "step": 49424 }, { "epoch": 0.9034492843694591, "grad_norm": 6.42717359857992, "learning_rate": 2.425348459247029e-07, "loss": 17.4998, "step": 49425 }, { "epoch": 0.9034675635659056, "grad_norm": 4.609778550550283, "learning_rate": 2.424437803038826e-07, "loss": 16.7116, "step": 49426 }, { "epoch": 0.9034858427623522, "grad_norm": 6.198606403700163, "learning_rate": 2.423527313578844e-07, "loss": 17.3771, "step": 49427 }, { "epoch": 0.9035041219587987, "grad_norm": 6.835220093839423, "learning_rate": 2.4226169908702636e-07, "loss": 17.5122, "step": 49428 }, { "epoch": 0.9035224011552452, "grad_norm": 8.357564803005054, "learning_rate": 2.421706834916293e-07, "loss": 18.1982, "step": 49429 }, { "epoch": 0.9035406803516918, "grad_norm": 6.118777299106084, "learning_rate": 2.4207968457201027e-07, "loss": 17.4526, "step": 49430 }, { "epoch": 0.9035589595481383, "grad_norm": 7.45254950760096, "learning_rate": 2.419887023284889e-07, "loss": 17.7944, "step": 49431 }, { "epoch": 0.9035772387445848, "grad_norm": 5.879193548604661, "learning_rate": 2.4189773676138453e-07, "loss": 17.1307, "step": 49432 }, { "epoch": 0.9035955179410313, "grad_norm": 6.244489760818667, "learning_rate": 2.4180678787101617e-07, "loss": 17.0711, "step": 49433 }, { "epoch": 0.9036137971374778, "grad_norm": 5.206886694971616, "learning_rate": 2.417158556577015e-07, "loss": 17.1224, "step": 49434 }, { "epoch": 0.9036320763339244, "grad_norm": 5.471986620486923, "learning_rate": 2.416249401217602e-07, "loss": 16.8319, "step": 49435 }, { "epoch": 0.9036503555303709, "grad_norm": 6.787051661136595, "learning_rate": 2.4153404126351085e-07, "loss": 17.8728, "step": 49436 }, { "epoch": 0.9036686347268174, "grad_norm": 5.727710576627668, "learning_rate": 2.414431590832711e-07, "loss": 17.2819, "step": 49437 }, { "epoch": 0.903686913923264, "grad_norm": 4.865272834916763, "learning_rate": 2.4135229358136057e-07, "loss": 16.8846, "step": 49438 }, { "epoch": 0.9037051931197104, "grad_norm": 6.37263601692583, "learning_rate": 2.412614447580969e-07, "loss": 17.3937, "step": 49439 }, { "epoch": 0.903723472316157, "grad_norm": 8.863916488210043, "learning_rate": 2.411706126137992e-07, "loss": 18.0993, "step": 49440 }, { "epoch": 0.9037417515126035, "grad_norm": 7.431967445235294, "learning_rate": 2.410797971487855e-07, "loss": 17.5636, "step": 49441 }, { "epoch": 0.90376003070905, "grad_norm": 5.547912888333419, "learning_rate": 2.4098899836337344e-07, "loss": 17.258, "step": 49442 }, { "epoch": 0.9037783099054966, "grad_norm": 7.0666821032474925, "learning_rate": 2.4089821625788324e-07, "loss": 17.6729, "step": 49443 }, { "epoch": 0.903796589101943, "grad_norm": 5.855578872983652, "learning_rate": 2.4080745083263133e-07, "loss": 17.0976, "step": 49444 }, { "epoch": 0.9038148682983896, "grad_norm": 7.982661024497057, "learning_rate": 2.4071670208793584e-07, "loss": 18.6471, "step": 49445 }, { "epoch": 0.9038331474948361, "grad_norm": 5.6202123669301205, "learning_rate": 2.4062597002411645e-07, "loss": 17.0752, "step": 49446 }, { "epoch": 0.9038514266912826, "grad_norm": 7.992166788622028, "learning_rate": 2.4053525464148965e-07, "loss": 17.5208, "step": 49447 }, { "epoch": 0.9038697058877292, "grad_norm": 6.767460811068553, "learning_rate": 2.4044455594037286e-07, "loss": 17.4912, "step": 49448 }, { "epoch": 0.9038879850841757, "grad_norm": 6.372407503215069, "learning_rate": 2.4035387392108536e-07, "loss": 17.4736, "step": 49449 }, { "epoch": 0.9039062642806223, "grad_norm": 7.043975978338571, "learning_rate": 2.4026320858394515e-07, "loss": 17.702, "step": 49450 }, { "epoch": 0.9039245434770687, "grad_norm": 6.673159987862644, "learning_rate": 2.401725599292681e-07, "loss": 17.4713, "step": 49451 }, { "epoch": 0.9039428226735152, "grad_norm": 6.903194624889225, "learning_rate": 2.400819279573746e-07, "loss": 17.4022, "step": 49452 }, { "epoch": 0.9039611018699618, "grad_norm": 6.323081917563211, "learning_rate": 2.3999131266858043e-07, "loss": 16.9576, "step": 49453 }, { "epoch": 0.9039793810664083, "grad_norm": 6.265018538471043, "learning_rate": 2.3990071406320313e-07, "loss": 17.3603, "step": 49454 }, { "epoch": 0.9039976602628549, "grad_norm": 7.677611968951057, "learning_rate": 2.3981013214156136e-07, "loss": 18.0684, "step": 49455 }, { "epoch": 0.9040159394593014, "grad_norm": 6.484421068234131, "learning_rate": 2.3971956690397203e-07, "loss": 17.0447, "step": 49456 }, { "epoch": 0.9040342186557478, "grad_norm": 5.17492673676248, "learning_rate": 2.3962901835075213e-07, "loss": 17.0445, "step": 49457 }, { "epoch": 0.9040524978521944, "grad_norm": 6.513977591822702, "learning_rate": 2.3953848648221925e-07, "loss": 17.2644, "step": 49458 }, { "epoch": 0.9040707770486409, "grad_norm": 6.582544753754146, "learning_rate": 2.394479712986908e-07, "loss": 17.1879, "step": 49459 }, { "epoch": 0.9040890562450875, "grad_norm": 7.082726162137844, "learning_rate": 2.393574728004844e-07, "loss": 17.4039, "step": 49460 }, { "epoch": 0.904107335441534, "grad_norm": 8.423799679854177, "learning_rate": 2.392669909879175e-07, "loss": 18.7579, "step": 49461 }, { "epoch": 0.9041256146379805, "grad_norm": 6.809450314422451, "learning_rate": 2.3917652586130603e-07, "loss": 17.4411, "step": 49462 }, { "epoch": 0.9041438938344271, "grad_norm": 7.623250788247704, "learning_rate": 2.3908607742096803e-07, "loss": 17.7858, "step": 49463 }, { "epoch": 0.9041621730308735, "grad_norm": 6.875913010523928, "learning_rate": 2.3899564566722044e-07, "loss": 17.7018, "step": 49464 }, { "epoch": 0.9041804522273201, "grad_norm": 6.791445137433148, "learning_rate": 2.389052306003792e-07, "loss": 17.5691, "step": 49465 }, { "epoch": 0.9041987314237666, "grad_norm": 6.1793083125470645, "learning_rate": 2.3881483222076284e-07, "loss": 17.4113, "step": 49466 }, { "epoch": 0.9042170106202131, "grad_norm": 5.164896170904268, "learning_rate": 2.387244505286862e-07, "loss": 16.9741, "step": 49467 }, { "epoch": 0.9042352898166597, "grad_norm": 6.27801546500168, "learning_rate": 2.386340855244679e-07, "loss": 17.3733, "step": 49468 }, { "epoch": 0.9042535690131062, "grad_norm": 6.204088697453209, "learning_rate": 2.385437372084243e-07, "loss": 17.6931, "step": 49469 }, { "epoch": 0.9042718482095528, "grad_norm": 5.846560005587908, "learning_rate": 2.3845340558087125e-07, "loss": 17.4908, "step": 49470 }, { "epoch": 0.9042901274059992, "grad_norm": 7.556299118497042, "learning_rate": 2.3836309064212581e-07, "loss": 18.0883, "step": 49471 }, { "epoch": 0.9043084066024457, "grad_norm": 5.796737956028201, "learning_rate": 2.382727923925049e-07, "loss": 17.2661, "step": 49472 }, { "epoch": 0.9043266857988923, "grad_norm": 5.772740511740281, "learning_rate": 2.3818251083232436e-07, "loss": 17.3866, "step": 49473 }, { "epoch": 0.9043449649953388, "grad_norm": 6.041998769115328, "learning_rate": 2.3809224596190118e-07, "loss": 17.4317, "step": 49474 }, { "epoch": 0.9043632441917854, "grad_norm": 7.654122288354134, "learning_rate": 2.380019977815512e-07, "loss": 18.1996, "step": 49475 }, { "epoch": 0.9043815233882319, "grad_norm": 5.601259416146793, "learning_rate": 2.3791176629159085e-07, "loss": 17.1325, "step": 49476 }, { "epoch": 0.9043998025846783, "grad_norm": 5.075456527031299, "learning_rate": 2.3782155149233653e-07, "loss": 16.9242, "step": 49477 }, { "epoch": 0.9044180817811249, "grad_norm": 5.84712733289828, "learning_rate": 2.377313533841047e-07, "loss": 17.3622, "step": 49478 }, { "epoch": 0.9044363609775714, "grad_norm": 5.51990523666815, "learning_rate": 2.376411719672106e-07, "loss": 17.0474, "step": 49479 }, { "epoch": 0.904454640174018, "grad_norm": 4.838838424577215, "learning_rate": 2.3755100724197178e-07, "loss": 16.6762, "step": 49480 }, { "epoch": 0.9044729193704645, "grad_norm": 7.179066795804983, "learning_rate": 2.3746085920870355e-07, "loss": 17.8967, "step": 49481 }, { "epoch": 0.904491198566911, "grad_norm": 5.8143655357197765, "learning_rate": 2.373707278677212e-07, "loss": 17.249, "step": 49482 }, { "epoch": 0.9045094777633576, "grad_norm": 5.73548450725568, "learning_rate": 2.3728061321934116e-07, "loss": 16.9106, "step": 49483 }, { "epoch": 0.904527756959804, "grad_norm": 5.605630297614448, "learning_rate": 2.371905152638798e-07, "loss": 17.1549, "step": 49484 }, { "epoch": 0.9045460361562506, "grad_norm": 6.549195236450208, "learning_rate": 2.3710043400165138e-07, "loss": 17.8109, "step": 49485 }, { "epoch": 0.9045643153526971, "grad_norm": 6.181020781297515, "learning_rate": 2.3701036943297285e-07, "loss": 17.355, "step": 49486 }, { "epoch": 0.9045825945491436, "grad_norm": 6.519984975778685, "learning_rate": 2.3692032155816002e-07, "loss": 17.4328, "step": 49487 }, { "epoch": 0.9046008737455902, "grad_norm": 6.524544741289901, "learning_rate": 2.3683029037752825e-07, "loss": 17.1715, "step": 49488 }, { "epoch": 0.9046191529420367, "grad_norm": 5.850978853730815, "learning_rate": 2.3674027589139337e-07, "loss": 17.3255, "step": 49489 }, { "epoch": 0.9046374321384832, "grad_norm": 6.372579542074942, "learning_rate": 2.3665027810006958e-07, "loss": 17.358, "step": 49490 }, { "epoch": 0.9046557113349297, "grad_norm": 7.703952041502489, "learning_rate": 2.3656029700387384e-07, "loss": 17.8198, "step": 49491 }, { "epoch": 0.9046739905313762, "grad_norm": 5.667011669481787, "learning_rate": 2.364703326031209e-07, "loss": 17.2523, "step": 49492 }, { "epoch": 0.9046922697278228, "grad_norm": 6.211475964398525, "learning_rate": 2.3638038489812554e-07, "loss": 17.5406, "step": 49493 }, { "epoch": 0.9047105489242693, "grad_norm": 6.3714725370209875, "learning_rate": 2.3629045388920413e-07, "loss": 17.3429, "step": 49494 }, { "epoch": 0.9047288281207159, "grad_norm": 6.994424371218925, "learning_rate": 2.362005395766709e-07, "loss": 17.5339, "step": 49495 }, { "epoch": 0.9047471073171623, "grad_norm": 6.0985712241804855, "learning_rate": 2.361106419608411e-07, "loss": 17.3424, "step": 49496 }, { "epoch": 0.9047653865136088, "grad_norm": 7.474658256001617, "learning_rate": 2.360207610420312e-07, "loss": 17.7101, "step": 49497 }, { "epoch": 0.9047836657100554, "grad_norm": 5.474043060016001, "learning_rate": 2.3593089682055482e-07, "loss": 17.1136, "step": 49498 }, { "epoch": 0.9048019449065019, "grad_norm": 6.4626067323822305, "learning_rate": 2.358410492967267e-07, "loss": 17.6608, "step": 49499 }, { "epoch": 0.9048202241029485, "grad_norm": 6.984805724807429, "learning_rate": 2.3575121847086268e-07, "loss": 17.6083, "step": 49500 }, { "epoch": 0.904838503299395, "grad_norm": 8.052714193546528, "learning_rate": 2.3566140434327756e-07, "loss": 17.8881, "step": 49501 }, { "epoch": 0.9048567824958414, "grad_norm": 5.831884484476918, "learning_rate": 2.3557160691428494e-07, "loss": 17.1295, "step": 49502 }, { "epoch": 0.904875061692288, "grad_norm": 5.9481467031842525, "learning_rate": 2.3548182618420012e-07, "loss": 17.1493, "step": 49503 }, { "epoch": 0.9048933408887345, "grad_norm": 6.034866156552641, "learning_rate": 2.35392062153339e-07, "loss": 17.4311, "step": 49504 }, { "epoch": 0.904911620085181, "grad_norm": 9.824316940802234, "learning_rate": 2.353023148220146e-07, "loss": 18.7784, "step": 49505 }, { "epoch": 0.9049298992816276, "grad_norm": 7.790106395136528, "learning_rate": 2.3521258419054282e-07, "loss": 17.6617, "step": 49506 }, { "epoch": 0.9049481784780741, "grad_norm": 4.64100218103923, "learning_rate": 2.3512287025923675e-07, "loss": 16.7095, "step": 49507 }, { "epoch": 0.9049664576745207, "grad_norm": 6.736907558078908, "learning_rate": 2.3503317302841221e-07, "loss": 17.4962, "step": 49508 }, { "epoch": 0.9049847368709671, "grad_norm": 8.996860550701332, "learning_rate": 2.3494349249838288e-07, "loss": 18.2771, "step": 49509 }, { "epoch": 0.9050030160674136, "grad_norm": 6.613147853884717, "learning_rate": 2.3485382866946237e-07, "loss": 17.6016, "step": 49510 }, { "epoch": 0.9050212952638602, "grad_norm": 5.352461616172751, "learning_rate": 2.3476418154196656e-07, "loss": 17.0689, "step": 49511 }, { "epoch": 0.9050395744603067, "grad_norm": 5.003671158962974, "learning_rate": 2.3467455111620796e-07, "loss": 16.9318, "step": 49512 }, { "epoch": 0.9050578536567533, "grad_norm": 7.094025404076799, "learning_rate": 2.3458493739250132e-07, "loss": 17.4013, "step": 49513 }, { "epoch": 0.9050761328531998, "grad_norm": 6.375581712664031, "learning_rate": 2.3449534037116195e-07, "loss": 17.3398, "step": 49514 }, { "epoch": 0.9050944120496462, "grad_norm": 6.924364640889229, "learning_rate": 2.3440576005250293e-07, "loss": 17.4557, "step": 49515 }, { "epoch": 0.9051126912460928, "grad_norm": 6.764372209717078, "learning_rate": 2.3431619643683735e-07, "loss": 17.6283, "step": 49516 }, { "epoch": 0.9051309704425393, "grad_norm": 6.377888929551589, "learning_rate": 2.3422664952448105e-07, "loss": 17.2856, "step": 49517 }, { "epoch": 0.9051492496389859, "grad_norm": 6.06154406029473, "learning_rate": 2.3413711931574546e-07, "loss": 17.2268, "step": 49518 }, { "epoch": 0.9051675288354324, "grad_norm": 6.076290698790373, "learning_rate": 2.34047605810947e-07, "loss": 17.6881, "step": 49519 }, { "epoch": 0.9051858080318789, "grad_norm": 5.21801177805389, "learning_rate": 2.3395810901039763e-07, "loss": 16.9647, "step": 49520 }, { "epoch": 0.9052040872283255, "grad_norm": 6.802048962787382, "learning_rate": 2.3386862891441097e-07, "loss": 17.4302, "step": 49521 }, { "epoch": 0.9052223664247719, "grad_norm": 7.168989319792646, "learning_rate": 2.3377916552330128e-07, "loss": 17.9674, "step": 49522 }, { "epoch": 0.9052406456212185, "grad_norm": 6.138395881924461, "learning_rate": 2.3368971883738268e-07, "loss": 17.1746, "step": 49523 }, { "epoch": 0.905258924817665, "grad_norm": 6.170033876007316, "learning_rate": 2.3360028885696717e-07, "loss": 17.1378, "step": 49524 }, { "epoch": 0.9052772040141115, "grad_norm": 6.222996436890713, "learning_rate": 2.335108755823695e-07, "loss": 17.3289, "step": 49525 }, { "epoch": 0.9052954832105581, "grad_norm": 6.236148687626718, "learning_rate": 2.3342147901390333e-07, "loss": 17.3265, "step": 49526 }, { "epoch": 0.9053137624070046, "grad_norm": 8.526300133334262, "learning_rate": 2.3333209915188005e-07, "loss": 17.4074, "step": 49527 }, { "epoch": 0.9053320416034512, "grad_norm": 5.68737670799997, "learning_rate": 2.3324273599661495e-07, "loss": 17.3074, "step": 49528 }, { "epoch": 0.9053503207998976, "grad_norm": 7.043450761927996, "learning_rate": 2.3315338954842004e-07, "loss": 17.9802, "step": 49529 }, { "epoch": 0.9053685999963441, "grad_norm": 8.814330061219753, "learning_rate": 2.3306405980760894e-07, "loss": 18.3113, "step": 49530 }, { "epoch": 0.9053868791927907, "grad_norm": 7.548608318265567, "learning_rate": 2.3297474677449417e-07, "loss": 17.5695, "step": 49531 }, { "epoch": 0.9054051583892372, "grad_norm": 5.628680676303208, "learning_rate": 2.3288545044938992e-07, "loss": 17.0927, "step": 49532 }, { "epoch": 0.9054234375856838, "grad_norm": 5.540449700173642, "learning_rate": 2.3279617083260763e-07, "loss": 16.9741, "step": 49533 }, { "epoch": 0.9054417167821303, "grad_norm": 7.158408130352597, "learning_rate": 2.3270690792446206e-07, "loss": 17.7718, "step": 49534 }, { "epoch": 0.9054599959785767, "grad_norm": 8.550102531704788, "learning_rate": 2.3261766172526456e-07, "loss": 18.0631, "step": 49535 }, { "epoch": 0.9054782751750233, "grad_norm": 6.972529737766605, "learning_rate": 2.3252843223532884e-07, "loss": 17.5503, "step": 49536 }, { "epoch": 0.9054965543714698, "grad_norm": 6.6953854789946705, "learning_rate": 2.324392194549674e-07, "loss": 17.5455, "step": 49537 }, { "epoch": 0.9055148335679164, "grad_norm": 8.152550183193846, "learning_rate": 2.323500233844922e-07, "loss": 17.2756, "step": 49538 }, { "epoch": 0.9055331127643629, "grad_norm": 5.708219917833776, "learning_rate": 2.322608440242169e-07, "loss": 17.1658, "step": 49539 }, { "epoch": 0.9055513919608094, "grad_norm": 7.579360972557308, "learning_rate": 2.321716813744529e-07, "loss": 18.2122, "step": 49540 }, { "epoch": 0.905569671157256, "grad_norm": 7.321233256315674, "learning_rate": 2.3208253543551386e-07, "loss": 17.7875, "step": 49541 }, { "epoch": 0.9055879503537024, "grad_norm": 7.337078278399275, "learning_rate": 2.3199340620771227e-07, "loss": 17.4536, "step": 49542 }, { "epoch": 0.905606229550149, "grad_norm": 6.101000749218766, "learning_rate": 2.3190429369135958e-07, "loss": 17.2435, "step": 49543 }, { "epoch": 0.9056245087465955, "grad_norm": 7.048767357070104, "learning_rate": 2.318151978867683e-07, "loss": 17.5753, "step": 49544 }, { "epoch": 0.905642787943042, "grad_norm": 6.698047084733416, "learning_rate": 2.3172611879425154e-07, "loss": 17.5708, "step": 49545 }, { "epoch": 0.9056610671394886, "grad_norm": 5.553652115991379, "learning_rate": 2.3163705641412125e-07, "loss": 17.3385, "step": 49546 }, { "epoch": 0.905679346335935, "grad_norm": 5.9689612709188475, "learning_rate": 2.315480107466883e-07, "loss": 17.4464, "step": 49547 }, { "epoch": 0.9056976255323816, "grad_norm": 7.877694360063421, "learning_rate": 2.3145898179226689e-07, "loss": 17.8339, "step": 49548 }, { "epoch": 0.9057159047288281, "grad_norm": 6.042994827077156, "learning_rate": 2.3136996955116675e-07, "loss": 17.2492, "step": 49549 }, { "epoch": 0.9057341839252746, "grad_norm": 7.196193994302741, "learning_rate": 2.3128097402370152e-07, "loss": 17.4484, "step": 49550 }, { "epoch": 0.9057524631217212, "grad_norm": 5.565249951659622, "learning_rate": 2.311919952101832e-07, "loss": 16.8676, "step": 49551 }, { "epoch": 0.9057707423181677, "grad_norm": 5.638174308830129, "learning_rate": 2.3110303311092208e-07, "loss": 17.4378, "step": 49552 }, { "epoch": 0.9057890215146143, "grad_norm": 6.036919014513502, "learning_rate": 2.3101408772623179e-07, "loss": 17.0287, "step": 49553 }, { "epoch": 0.9058073007110607, "grad_norm": 5.322020722257534, "learning_rate": 2.3092515905642376e-07, "loss": 17.0214, "step": 49554 }, { "epoch": 0.9058255799075072, "grad_norm": 5.0345804041785645, "learning_rate": 2.308362471018083e-07, "loss": 17.073, "step": 49555 }, { "epoch": 0.9058438591039538, "grad_norm": 7.5620491176212905, "learning_rate": 2.3074735186269848e-07, "loss": 17.7632, "step": 49556 }, { "epoch": 0.9058621383004003, "grad_norm": 13.882381243124323, "learning_rate": 2.3065847333940517e-07, "loss": 17.7197, "step": 49557 }, { "epoch": 0.9058804174968469, "grad_norm": 5.097519090611981, "learning_rate": 2.3056961153223977e-07, "loss": 17.0348, "step": 49558 }, { "epoch": 0.9058986966932934, "grad_norm": 5.339133181652702, "learning_rate": 2.3048076644151374e-07, "loss": 17.102, "step": 49559 }, { "epoch": 0.9059169758897399, "grad_norm": 6.992936892859958, "learning_rate": 2.3039193806753958e-07, "loss": 17.5786, "step": 49560 }, { "epoch": 0.9059352550861864, "grad_norm": 5.236214177255406, "learning_rate": 2.3030312641062702e-07, "loss": 17.04, "step": 49561 }, { "epoch": 0.9059535342826329, "grad_norm": 6.026636600329026, "learning_rate": 2.302143314710892e-07, "loss": 17.172, "step": 49562 }, { "epoch": 0.9059718134790795, "grad_norm": 6.157820095270568, "learning_rate": 2.301255532492358e-07, "loss": 17.2521, "step": 49563 }, { "epoch": 0.905990092675526, "grad_norm": 5.384022546403078, "learning_rate": 2.3003679174537774e-07, "loss": 17.0072, "step": 49564 }, { "epoch": 0.9060083718719725, "grad_norm": 8.198101973442975, "learning_rate": 2.2994804695982753e-07, "loss": 18.3202, "step": 49565 }, { "epoch": 0.9060266510684191, "grad_norm": 5.223817767883943, "learning_rate": 2.2985931889289492e-07, "loss": 17.0191, "step": 49566 }, { "epoch": 0.9060449302648655, "grad_norm": 5.785060800236239, "learning_rate": 2.2977060754489188e-07, "loss": 17.216, "step": 49567 }, { "epoch": 0.9060632094613121, "grad_norm": 5.863485193808444, "learning_rate": 2.2968191291612874e-07, "loss": 17.3525, "step": 49568 }, { "epoch": 0.9060814886577586, "grad_norm": 5.795121327023346, "learning_rate": 2.2959323500691577e-07, "loss": 17.2751, "step": 49569 }, { "epoch": 0.9060997678542051, "grad_norm": 6.017960751019998, "learning_rate": 2.2950457381756608e-07, "loss": 17.2527, "step": 49570 }, { "epoch": 0.9061180470506517, "grad_norm": 6.256765303178705, "learning_rate": 2.294159293483883e-07, "loss": 17.4646, "step": 49571 }, { "epoch": 0.9061363262470982, "grad_norm": 6.02350306321244, "learning_rate": 2.293273015996933e-07, "loss": 17.5546, "step": 49572 }, { "epoch": 0.9061546054435446, "grad_norm": 8.524393224353817, "learning_rate": 2.292386905717925e-07, "loss": 17.9435, "step": 49573 }, { "epoch": 0.9061728846399912, "grad_norm": 6.577451357437673, "learning_rate": 2.2915009626499673e-07, "loss": 17.5202, "step": 49574 }, { "epoch": 0.9061911638364377, "grad_norm": 7.9049588753109505, "learning_rate": 2.2906151867961468e-07, "loss": 17.5816, "step": 49575 }, { "epoch": 0.9062094430328843, "grad_norm": 5.109670598804431, "learning_rate": 2.289729578159583e-07, "loss": 16.9727, "step": 49576 }, { "epoch": 0.9062277222293308, "grad_norm": 6.254213125235292, "learning_rate": 2.288844136743379e-07, "loss": 17.402, "step": 49577 }, { "epoch": 0.9062460014257773, "grad_norm": 6.44833914543971, "learning_rate": 2.287958862550632e-07, "loss": 16.9546, "step": 49578 }, { "epoch": 0.9062642806222239, "grad_norm": 6.1443712204922685, "learning_rate": 2.287073755584457e-07, "loss": 17.3699, "step": 49579 }, { "epoch": 0.9062825598186703, "grad_norm": 6.606391283451127, "learning_rate": 2.2861888158479394e-07, "loss": 17.6555, "step": 49580 }, { "epoch": 0.9063008390151169, "grad_norm": 7.323779685352284, "learning_rate": 2.2853040433441997e-07, "loss": 17.8839, "step": 49581 }, { "epoch": 0.9063191182115634, "grad_norm": 5.888174941861462, "learning_rate": 2.284419438076324e-07, "loss": 17.2954, "step": 49582 }, { "epoch": 0.9063373974080099, "grad_norm": 4.8550843101111925, "learning_rate": 2.2835350000474155e-07, "loss": 16.931, "step": 49583 }, { "epoch": 0.9063556766044565, "grad_norm": 5.799159487744931, "learning_rate": 2.2826507292605827e-07, "loss": 17.2049, "step": 49584 }, { "epoch": 0.906373955800903, "grad_norm": 5.565787198895988, "learning_rate": 2.2817666257189064e-07, "loss": 17.0425, "step": 49585 }, { "epoch": 0.9063922349973496, "grad_norm": 5.312457331241991, "learning_rate": 2.280882689425501e-07, "loss": 16.8633, "step": 49586 }, { "epoch": 0.906410514193796, "grad_norm": 9.46781259555454, "learning_rate": 2.2799989203834694e-07, "loss": 18.4114, "step": 49587 }, { "epoch": 0.9064287933902425, "grad_norm": 7.7298959542853405, "learning_rate": 2.2791153185958982e-07, "loss": 17.5883, "step": 49588 }, { "epoch": 0.9064470725866891, "grad_norm": 6.447936211651051, "learning_rate": 2.278231884065879e-07, "loss": 17.4058, "step": 49589 }, { "epoch": 0.9064653517831356, "grad_norm": 6.506838575138652, "learning_rate": 2.2773486167965264e-07, "loss": 16.965, "step": 49590 }, { "epoch": 0.9064836309795822, "grad_norm": 5.055429562460441, "learning_rate": 2.276465516790921e-07, "loss": 16.8871, "step": 49591 }, { "epoch": 0.9065019101760287, "grad_norm": 5.930786984192362, "learning_rate": 2.2755825840521605e-07, "loss": 17.3022, "step": 49592 }, { "epoch": 0.9065201893724751, "grad_norm": 6.198737890519351, "learning_rate": 2.274699818583348e-07, "loss": 17.6139, "step": 49593 }, { "epoch": 0.9065384685689217, "grad_norm": 6.866087254780394, "learning_rate": 2.273817220387564e-07, "loss": 17.4442, "step": 49594 }, { "epoch": 0.9065567477653682, "grad_norm": 6.992952223762442, "learning_rate": 2.2729347894679065e-07, "loss": 17.3841, "step": 49595 }, { "epoch": 0.9065750269618148, "grad_norm": 6.68508292037826, "learning_rate": 2.2720525258274783e-07, "loss": 17.5034, "step": 49596 }, { "epoch": 0.9065933061582613, "grad_norm": 5.091305926183518, "learning_rate": 2.2711704294693604e-07, "loss": 16.907, "step": 49597 }, { "epoch": 0.9066115853547078, "grad_norm": 7.727896674001989, "learning_rate": 2.2702885003966502e-07, "loss": 17.4712, "step": 49598 }, { "epoch": 0.9066298645511544, "grad_norm": 6.017762586790448, "learning_rate": 2.2694067386124396e-07, "loss": 17.2789, "step": 49599 }, { "epoch": 0.9066481437476008, "grad_norm": 5.824127682494409, "learning_rate": 2.2685251441198098e-07, "loss": 17.215, "step": 49600 }, { "epoch": 0.9066664229440474, "grad_norm": 7.199141229267526, "learning_rate": 2.267643716921869e-07, "loss": 17.5363, "step": 49601 }, { "epoch": 0.9066847021404939, "grad_norm": 7.425894407844141, "learning_rate": 2.2667624570216874e-07, "loss": 17.6723, "step": 49602 }, { "epoch": 0.9067029813369404, "grad_norm": 6.809895216965308, "learning_rate": 2.2658813644223564e-07, "loss": 17.7856, "step": 49603 }, { "epoch": 0.906721260533387, "grad_norm": 6.232960439204207, "learning_rate": 2.2650004391269742e-07, "loss": 17.0153, "step": 49604 }, { "epoch": 0.9067395397298335, "grad_norm": 5.929380396089148, "learning_rate": 2.264119681138627e-07, "loss": 17.3057, "step": 49605 }, { "epoch": 0.90675781892628, "grad_norm": 7.743740542597845, "learning_rate": 2.2632390904603896e-07, "loss": 17.98, "step": 49606 }, { "epoch": 0.9067760981227265, "grad_norm": 6.368000255712312, "learning_rate": 2.2623586670953656e-07, "loss": 17.1654, "step": 49607 }, { "epoch": 0.906794377319173, "grad_norm": 8.234321256317475, "learning_rate": 2.2614784110466304e-07, "loss": 18.2553, "step": 49608 }, { "epoch": 0.9068126565156196, "grad_norm": 6.1502964440435575, "learning_rate": 2.2605983223172646e-07, "loss": 17.2568, "step": 49609 }, { "epoch": 0.9068309357120661, "grad_norm": 5.604734197732828, "learning_rate": 2.2597184009103657e-07, "loss": 17.0766, "step": 49610 }, { "epoch": 0.9068492149085127, "grad_norm": 6.743371240327929, "learning_rate": 2.2588386468290035e-07, "loss": 17.325, "step": 49611 }, { "epoch": 0.9068674941049591, "grad_norm": 8.269212735088777, "learning_rate": 2.2579590600762758e-07, "loss": 18.1669, "step": 49612 }, { "epoch": 0.9068857733014056, "grad_norm": 6.13159494847474, "learning_rate": 2.257079640655252e-07, "loss": 17.357, "step": 49613 }, { "epoch": 0.9069040524978522, "grad_norm": 5.183634815423822, "learning_rate": 2.2562003885690243e-07, "loss": 17.0376, "step": 49614 }, { "epoch": 0.9069223316942987, "grad_norm": 6.322769167696079, "learning_rate": 2.2553213038206735e-07, "loss": 17.3464, "step": 49615 }, { "epoch": 0.9069406108907453, "grad_norm": 4.893331587382418, "learning_rate": 2.2544423864132804e-07, "loss": 16.9739, "step": 49616 }, { "epoch": 0.9069588900871918, "grad_norm": 6.701400386518836, "learning_rate": 2.2535636363499147e-07, "loss": 17.7544, "step": 49617 }, { "epoch": 0.9069771692836383, "grad_norm": 5.587911586194006, "learning_rate": 2.2526850536336743e-07, "loss": 17.2583, "step": 49618 }, { "epoch": 0.9069954484800848, "grad_norm": 8.280704348622542, "learning_rate": 2.2518066382676284e-07, "loss": 17.6407, "step": 49619 }, { "epoch": 0.9070137276765313, "grad_norm": 5.982938819926152, "learning_rate": 2.2509283902548528e-07, "loss": 17.2078, "step": 49620 }, { "epoch": 0.9070320068729779, "grad_norm": 7.31392290439612, "learning_rate": 2.250050309598434e-07, "loss": 17.7831, "step": 49621 }, { "epoch": 0.9070502860694244, "grad_norm": 6.528203295684722, "learning_rate": 2.2491723963014412e-07, "loss": 17.314, "step": 49622 }, { "epoch": 0.9070685652658709, "grad_norm": 6.358495502291159, "learning_rate": 2.2482946503669556e-07, "loss": 16.9226, "step": 49623 }, { "epoch": 0.9070868444623175, "grad_norm": 10.257570759275731, "learning_rate": 2.247417071798058e-07, "loss": 18.9185, "step": 49624 }, { "epoch": 0.907105123658764, "grad_norm": 5.676503586196498, "learning_rate": 2.2465396605978184e-07, "loss": 17.3969, "step": 49625 }, { "epoch": 0.9071234028552105, "grad_norm": 5.814644941775615, "learning_rate": 2.2456624167693065e-07, "loss": 17.4909, "step": 49626 }, { "epoch": 0.907141682051657, "grad_norm": 5.167546344072026, "learning_rate": 2.2447853403156139e-07, "loss": 16.8085, "step": 49627 }, { "epoch": 0.9071599612481035, "grad_norm": 5.268400334172319, "learning_rate": 2.2439084312397942e-07, "loss": 16.9507, "step": 49628 }, { "epoch": 0.9071782404445501, "grad_norm": 6.239237433630443, "learning_rate": 2.2430316895449445e-07, "loss": 17.6276, "step": 49629 }, { "epoch": 0.9071965196409966, "grad_norm": 7.775983791121789, "learning_rate": 2.2421551152341125e-07, "loss": 17.7195, "step": 49630 }, { "epoch": 0.9072147988374432, "grad_norm": 5.9166685722567545, "learning_rate": 2.2412787083103904e-07, "loss": 17.5991, "step": 49631 }, { "epoch": 0.9072330780338896, "grad_norm": 5.688458286795114, "learning_rate": 2.2404024687768366e-07, "loss": 17.0963, "step": 49632 }, { "epoch": 0.9072513572303361, "grad_norm": 5.825430302152312, "learning_rate": 2.239526396636532e-07, "loss": 17.2221, "step": 49633 }, { "epoch": 0.9072696364267827, "grad_norm": 6.613162280299994, "learning_rate": 2.2386504918925357e-07, "loss": 17.4391, "step": 49634 }, { "epoch": 0.9072879156232292, "grad_norm": 6.804704786041237, "learning_rate": 2.2377747545479333e-07, "loss": 17.728, "step": 49635 }, { "epoch": 0.9073061948196758, "grad_norm": 6.408466165675465, "learning_rate": 2.2368991846057896e-07, "loss": 17.5575, "step": 49636 }, { "epoch": 0.9073244740161223, "grad_norm": 7.992587449758643, "learning_rate": 2.2360237820691576e-07, "loss": 17.7807, "step": 49637 }, { "epoch": 0.9073427532125687, "grad_norm": 5.851274195330262, "learning_rate": 2.2351485469411237e-07, "loss": 17.382, "step": 49638 }, { "epoch": 0.9073610324090153, "grad_norm": 6.6511478415660905, "learning_rate": 2.2342734792247467e-07, "loss": 17.5358, "step": 49639 }, { "epoch": 0.9073793116054618, "grad_norm": 5.6564881957092705, "learning_rate": 2.233398578923096e-07, "loss": 17.2273, "step": 49640 }, { "epoch": 0.9073975908019083, "grad_norm": 6.630506735639012, "learning_rate": 2.2325238460392418e-07, "loss": 17.6176, "step": 49641 }, { "epoch": 0.9074158699983549, "grad_norm": 8.977612059441137, "learning_rate": 2.2316492805762424e-07, "loss": 17.8727, "step": 49642 }, { "epoch": 0.9074341491948014, "grad_norm": 6.880911067661506, "learning_rate": 2.2307748825371678e-07, "loss": 17.8003, "step": 49643 }, { "epoch": 0.907452428391248, "grad_norm": 5.837693710187878, "learning_rate": 2.2299006519250876e-07, "loss": 17.2559, "step": 49644 }, { "epoch": 0.9074707075876944, "grad_norm": 6.571450348295884, "learning_rate": 2.229026588743055e-07, "loss": 17.1595, "step": 49645 }, { "epoch": 0.9074889867841409, "grad_norm": 4.708793365297207, "learning_rate": 2.2281526929941455e-07, "loss": 16.921, "step": 49646 }, { "epoch": 0.9075072659805875, "grad_norm": 5.6156943269939985, "learning_rate": 2.227278964681412e-07, "loss": 17.0971, "step": 49647 }, { "epoch": 0.907525545177034, "grad_norm": 6.541661153262188, "learning_rate": 2.226405403807913e-07, "loss": 17.2819, "step": 49648 }, { "epoch": 0.9075438243734806, "grad_norm": 4.550175025992366, "learning_rate": 2.2255320103767185e-07, "loss": 16.8373, "step": 49649 }, { "epoch": 0.9075621035699271, "grad_norm": 5.91081548500142, "learning_rate": 2.2246587843908985e-07, "loss": 17.1341, "step": 49650 }, { "epoch": 0.9075803827663735, "grad_norm": 5.66005118054071, "learning_rate": 2.2237857258534944e-07, "loss": 17.0743, "step": 49651 }, { "epoch": 0.9075986619628201, "grad_norm": 6.364760276388105, "learning_rate": 2.2229128347675822e-07, "loss": 17.4306, "step": 49652 }, { "epoch": 0.9076169411592666, "grad_norm": 6.425974996650685, "learning_rate": 2.2220401111362143e-07, "loss": 17.2445, "step": 49653 }, { "epoch": 0.9076352203557132, "grad_norm": 5.246660808481092, "learning_rate": 2.2211675549624446e-07, "loss": 16.8833, "step": 49654 }, { "epoch": 0.9076534995521597, "grad_norm": 7.4280543948435005, "learning_rate": 2.220295166249342e-07, "loss": 17.9388, "step": 49655 }, { "epoch": 0.9076717787486062, "grad_norm": 8.45805991631865, "learning_rate": 2.2194229449999494e-07, "loss": 17.8605, "step": 49656 }, { "epoch": 0.9076900579450528, "grad_norm": 7.60452571488883, "learning_rate": 2.2185508912173415e-07, "loss": 17.4791, "step": 49657 }, { "epoch": 0.9077083371414992, "grad_norm": 5.950453176302403, "learning_rate": 2.217679004904566e-07, "loss": 17.2683, "step": 49658 }, { "epoch": 0.9077266163379458, "grad_norm": 6.963395903683609, "learning_rate": 2.2168072860646704e-07, "loss": 17.4108, "step": 49659 }, { "epoch": 0.9077448955343923, "grad_norm": 6.3384145675230625, "learning_rate": 2.2159357347007304e-07, "loss": 17.4384, "step": 49660 }, { "epoch": 0.9077631747308388, "grad_norm": 6.297757894208436, "learning_rate": 2.2150643508157876e-07, "loss": 17.229, "step": 49661 }, { "epoch": 0.9077814539272854, "grad_norm": 6.665756672703256, "learning_rate": 2.2141931344128897e-07, "loss": 17.8205, "step": 49662 }, { "epoch": 0.9077997331237319, "grad_norm": 5.331090123195987, "learning_rate": 2.2133220854951064e-07, "loss": 16.9991, "step": 49663 }, { "epoch": 0.9078180123201784, "grad_norm": 7.894890339664612, "learning_rate": 2.2124512040654856e-07, "loss": 17.343, "step": 49664 }, { "epoch": 0.9078362915166249, "grad_norm": 7.353181684025176, "learning_rate": 2.2115804901270686e-07, "loss": 17.6209, "step": 49665 }, { "epoch": 0.9078545707130714, "grad_norm": 5.575708245607423, "learning_rate": 2.2107099436829206e-07, "loss": 16.984, "step": 49666 }, { "epoch": 0.907872849909518, "grad_norm": 5.784574824382052, "learning_rate": 2.2098395647360771e-07, "loss": 17.0857, "step": 49667 }, { "epoch": 0.9078911291059645, "grad_norm": 5.559283004893655, "learning_rate": 2.2089693532896028e-07, "loss": 17.0147, "step": 49668 }, { "epoch": 0.9079094083024111, "grad_norm": 5.557082865319281, "learning_rate": 2.2080993093465508e-07, "loss": 17.0888, "step": 49669 }, { "epoch": 0.9079276874988575, "grad_norm": 6.078350807307045, "learning_rate": 2.207229432909963e-07, "loss": 17.1874, "step": 49670 }, { "epoch": 0.907945966695304, "grad_norm": 6.430395692007922, "learning_rate": 2.206359723982876e-07, "loss": 17.5408, "step": 49671 }, { "epoch": 0.9079642458917506, "grad_norm": 6.523067980388753, "learning_rate": 2.205490182568365e-07, "loss": 17.2719, "step": 49672 }, { "epoch": 0.9079825250881971, "grad_norm": 5.439348162439912, "learning_rate": 2.2046208086694553e-07, "loss": 16.9553, "step": 49673 }, { "epoch": 0.9080008042846437, "grad_norm": 5.5751220825119905, "learning_rate": 2.203751602289206e-07, "loss": 17.0307, "step": 49674 }, { "epoch": 0.9080190834810902, "grad_norm": 8.91731086309263, "learning_rate": 2.202882563430664e-07, "loss": 17.9896, "step": 49675 }, { "epoch": 0.9080373626775367, "grad_norm": 5.992675550881476, "learning_rate": 2.2020136920968604e-07, "loss": 17.3857, "step": 49676 }, { "epoch": 0.9080556418739832, "grad_norm": 6.19334316324238, "learning_rate": 2.2011449882908542e-07, "loss": 17.6246, "step": 49677 }, { "epoch": 0.9080739210704297, "grad_norm": 6.1007605175981725, "learning_rate": 2.2002764520156928e-07, "loss": 17.4596, "step": 49678 }, { "epoch": 0.9080922002668763, "grad_norm": 6.858694031414269, "learning_rate": 2.199408083274407e-07, "loss": 17.7892, "step": 49679 }, { "epoch": 0.9081104794633228, "grad_norm": 7.2745978250942995, "learning_rate": 2.1985398820700555e-07, "loss": 17.237, "step": 49680 }, { "epoch": 0.9081287586597693, "grad_norm": 6.056632631118724, "learning_rate": 2.1976718484056748e-07, "loss": 17.2475, "step": 49681 }, { "epoch": 0.9081470378562159, "grad_norm": 6.616878089427067, "learning_rate": 2.1968039822842957e-07, "loss": 17.4567, "step": 49682 }, { "epoch": 0.9081653170526623, "grad_norm": 5.967584354398022, "learning_rate": 2.1959362837089826e-07, "loss": 17.4276, "step": 49683 }, { "epoch": 0.9081835962491089, "grad_norm": 6.434755943882418, "learning_rate": 2.1950687526827606e-07, "loss": 17.2288, "step": 49684 }, { "epoch": 0.9082018754455554, "grad_norm": 6.118953086232537, "learning_rate": 2.1942013892086668e-07, "loss": 17.3683, "step": 49685 }, { "epoch": 0.9082201546420019, "grad_norm": 6.8784692297954635, "learning_rate": 2.1933341932897535e-07, "loss": 17.877, "step": 49686 }, { "epoch": 0.9082384338384485, "grad_norm": 5.993384286971993, "learning_rate": 2.1924671649290518e-07, "loss": 17.3236, "step": 49687 }, { "epoch": 0.908256713034895, "grad_norm": 5.000235689390426, "learning_rate": 2.1916003041296098e-07, "loss": 16.8432, "step": 49688 }, { "epoch": 0.9082749922313416, "grad_norm": 6.471722349437887, "learning_rate": 2.1907336108944631e-07, "loss": 17.5764, "step": 49689 }, { "epoch": 0.908293271427788, "grad_norm": 6.415152223084678, "learning_rate": 2.189867085226638e-07, "loss": 17.4901, "step": 49690 }, { "epoch": 0.9083115506242345, "grad_norm": 6.431957832791834, "learning_rate": 2.1890007271291868e-07, "loss": 17.3677, "step": 49691 }, { "epoch": 0.9083298298206811, "grad_norm": 5.846374798047523, "learning_rate": 2.1881345366051409e-07, "loss": 17.2968, "step": 49692 }, { "epoch": 0.9083481090171276, "grad_norm": 5.110953734923604, "learning_rate": 2.1872685136575254e-07, "loss": 16.9508, "step": 49693 }, { "epoch": 0.9083663882135742, "grad_norm": 6.21400438531622, "learning_rate": 2.1864026582893883e-07, "loss": 17.1668, "step": 49694 }, { "epoch": 0.9083846674100207, "grad_norm": 6.017597964086256, "learning_rate": 2.1855369705037599e-07, "loss": 17.3534, "step": 49695 }, { "epoch": 0.9084029466064671, "grad_norm": 6.370265633443809, "learning_rate": 2.184671450303677e-07, "loss": 17.3439, "step": 49696 }, { "epoch": 0.9084212258029137, "grad_norm": 7.461270024534124, "learning_rate": 2.1838060976921705e-07, "loss": 17.6114, "step": 49697 }, { "epoch": 0.9084395049993602, "grad_norm": 5.788097274007359, "learning_rate": 2.1829409126722822e-07, "loss": 17.0685, "step": 49698 }, { "epoch": 0.9084577841958068, "grad_norm": 7.8393199318644395, "learning_rate": 2.1820758952470267e-07, "loss": 17.1405, "step": 49699 }, { "epoch": 0.9084760633922533, "grad_norm": 7.014568678097328, "learning_rate": 2.1812110454194513e-07, "loss": 17.3835, "step": 49700 }, { "epoch": 0.9084943425886998, "grad_norm": 7.102476827014275, "learning_rate": 2.1803463631925814e-07, "loss": 17.7006, "step": 49701 }, { "epoch": 0.9085126217851464, "grad_norm": 5.032745200749417, "learning_rate": 2.1794818485694426e-07, "loss": 16.9777, "step": 49702 }, { "epoch": 0.9085309009815928, "grad_norm": 5.934734571352816, "learning_rate": 2.1786175015530708e-07, "loss": 17.0766, "step": 49703 }, { "epoch": 0.9085491801780394, "grad_norm": 5.971870890089511, "learning_rate": 2.177753322146492e-07, "loss": 17.3024, "step": 49704 }, { "epoch": 0.9085674593744859, "grad_norm": 6.364857273100375, "learning_rate": 2.1768893103527477e-07, "loss": 17.2629, "step": 49705 }, { "epoch": 0.9085857385709324, "grad_norm": 6.248392716084333, "learning_rate": 2.1760254661748526e-07, "loss": 17.3827, "step": 49706 }, { "epoch": 0.908604017767379, "grad_norm": 5.491618254778279, "learning_rate": 2.1751617896158374e-07, "loss": 17.2438, "step": 49707 }, { "epoch": 0.9086222969638255, "grad_norm": 6.21399107074097, "learning_rate": 2.1742982806787327e-07, "loss": 17.2448, "step": 49708 }, { "epoch": 0.9086405761602719, "grad_norm": 5.651579863130902, "learning_rate": 2.1734349393665642e-07, "loss": 16.8404, "step": 49709 }, { "epoch": 0.9086588553567185, "grad_norm": 4.847428839242253, "learning_rate": 2.1725717656823465e-07, "loss": 16.7641, "step": 49710 }, { "epoch": 0.908677134553165, "grad_norm": 5.93267370116479, "learning_rate": 2.1717087596291209e-07, "loss": 17.2749, "step": 49711 }, { "epoch": 0.9086954137496116, "grad_norm": 5.846980541191615, "learning_rate": 2.170845921209902e-07, "loss": 17.174, "step": 49712 }, { "epoch": 0.9087136929460581, "grad_norm": 6.980713909779334, "learning_rate": 2.1699832504277208e-07, "loss": 16.9479, "step": 49713 }, { "epoch": 0.9087319721425046, "grad_norm": 6.665556385763941, "learning_rate": 2.1691207472855967e-07, "loss": 17.2654, "step": 49714 }, { "epoch": 0.9087502513389512, "grad_norm": 6.618675711835521, "learning_rate": 2.1682584117865613e-07, "loss": 17.5562, "step": 49715 }, { "epoch": 0.9087685305353976, "grad_norm": 7.315717477018857, "learning_rate": 2.167396243933617e-07, "loss": 17.5046, "step": 49716 }, { "epoch": 0.9087868097318442, "grad_norm": 6.021265015509531, "learning_rate": 2.1665342437298064e-07, "loss": 17.2858, "step": 49717 }, { "epoch": 0.9088050889282907, "grad_norm": 5.754108044314008, "learning_rate": 2.165672411178138e-07, "loss": 17.3311, "step": 49718 }, { "epoch": 0.9088233681247372, "grad_norm": 6.018691914504599, "learning_rate": 2.1648107462816426e-07, "loss": 17.2427, "step": 49719 }, { "epoch": 0.9088416473211838, "grad_norm": 8.613045849251568, "learning_rate": 2.1639492490433344e-07, "loss": 18.1492, "step": 49720 }, { "epoch": 0.9088599265176303, "grad_norm": 4.949824213056432, "learning_rate": 2.163087919466228e-07, "loss": 16.8702, "step": 49721 }, { "epoch": 0.9088782057140768, "grad_norm": 7.864741097004294, "learning_rate": 2.162226757553343e-07, "loss": 18.1643, "step": 49722 }, { "epoch": 0.9088964849105233, "grad_norm": 7.050635000695732, "learning_rate": 2.1613657633077102e-07, "loss": 17.2115, "step": 49723 }, { "epoch": 0.9089147641069698, "grad_norm": 5.2168537829242565, "learning_rate": 2.1605049367323327e-07, "loss": 16.9283, "step": 49724 }, { "epoch": 0.9089330433034164, "grad_norm": 7.30964481737654, "learning_rate": 2.1596442778302418e-07, "loss": 17.4577, "step": 49725 }, { "epoch": 0.9089513224998629, "grad_norm": 7.297128160250993, "learning_rate": 2.1587837866044458e-07, "loss": 17.9181, "step": 49726 }, { "epoch": 0.9089696016963095, "grad_norm": 6.446953809806904, "learning_rate": 2.1579234630579593e-07, "loss": 17.1462, "step": 49727 }, { "epoch": 0.908987880892756, "grad_norm": 8.224807042310381, "learning_rate": 2.1570633071938018e-07, "loss": 18.0244, "step": 49728 }, { "epoch": 0.9090061600892024, "grad_norm": 7.508934698224441, "learning_rate": 2.1562033190149823e-07, "loss": 17.5703, "step": 49729 }, { "epoch": 0.909024439285649, "grad_norm": 7.386005461671126, "learning_rate": 2.1553434985245147e-07, "loss": 17.6056, "step": 49730 }, { "epoch": 0.9090427184820955, "grad_norm": 6.751294320965233, "learning_rate": 2.1544838457254192e-07, "loss": 17.5379, "step": 49731 }, { "epoch": 0.9090609976785421, "grad_norm": 5.425716824911269, "learning_rate": 2.1536243606207096e-07, "loss": 16.895, "step": 49732 }, { "epoch": 0.9090792768749886, "grad_norm": 5.949100135570891, "learning_rate": 2.1527650432133895e-07, "loss": 17.3514, "step": 49733 }, { "epoch": 0.909097556071435, "grad_norm": 5.541061624649973, "learning_rate": 2.1519058935064785e-07, "loss": 17.158, "step": 49734 }, { "epoch": 0.9091158352678816, "grad_norm": 4.666117469988733, "learning_rate": 2.1510469115029852e-07, "loss": 16.8075, "step": 49735 }, { "epoch": 0.9091341144643281, "grad_norm": 5.935118685869495, "learning_rate": 2.1501880972059242e-07, "loss": 17.2145, "step": 49736 }, { "epoch": 0.9091523936607747, "grad_norm": 9.292261717027017, "learning_rate": 2.1493294506182983e-07, "loss": 18.5387, "step": 49737 }, { "epoch": 0.9091706728572212, "grad_norm": 6.508067489247457, "learning_rate": 2.1484709717431162e-07, "loss": 17.6262, "step": 49738 }, { "epoch": 0.9091889520536677, "grad_norm": 5.116577218185478, "learning_rate": 2.147612660583398e-07, "loss": 16.9281, "step": 49739 }, { "epoch": 0.9092072312501143, "grad_norm": 7.776646298373489, "learning_rate": 2.146754517142141e-07, "loss": 18.1271, "step": 49740 }, { "epoch": 0.9092255104465607, "grad_norm": 8.265934999119334, "learning_rate": 2.1458965414223542e-07, "loss": 18.0817, "step": 49741 }, { "epoch": 0.9092437896430073, "grad_norm": 4.632370451389428, "learning_rate": 2.1450387334270518e-07, "loss": 16.8258, "step": 49742 }, { "epoch": 0.9092620688394538, "grad_norm": 6.06247208029788, "learning_rate": 2.144181093159242e-07, "loss": 17.2651, "step": 49743 }, { "epoch": 0.9092803480359003, "grad_norm": 6.535732071510143, "learning_rate": 2.143323620621912e-07, "loss": 17.4911, "step": 49744 }, { "epoch": 0.9092986272323469, "grad_norm": 5.382537718885881, "learning_rate": 2.1424663158180926e-07, "loss": 17.2358, "step": 49745 }, { "epoch": 0.9093169064287934, "grad_norm": 6.569218031389391, "learning_rate": 2.14160917875077e-07, "loss": 17.7034, "step": 49746 }, { "epoch": 0.90933518562524, "grad_norm": 5.522136104765352, "learning_rate": 2.140752209422947e-07, "loss": 17.1131, "step": 49747 }, { "epoch": 0.9093534648216864, "grad_norm": 8.299651860330904, "learning_rate": 2.1398954078376443e-07, "loss": 17.5521, "step": 49748 }, { "epoch": 0.9093717440181329, "grad_norm": 5.013774860026497, "learning_rate": 2.1390387739978535e-07, "loss": 16.9736, "step": 49749 }, { "epoch": 0.9093900232145795, "grad_norm": 5.463800088402062, "learning_rate": 2.1381823079065722e-07, "loss": 16.9001, "step": 49750 }, { "epoch": 0.909408302411026, "grad_norm": 5.435507628937143, "learning_rate": 2.1373260095668147e-07, "loss": 17.1385, "step": 49751 }, { "epoch": 0.9094265816074726, "grad_norm": 6.8386562534869, "learning_rate": 2.136469878981573e-07, "loss": 17.7175, "step": 49752 }, { "epoch": 0.9094448608039191, "grad_norm": 5.763180324942262, "learning_rate": 2.135613916153856e-07, "loss": 17.3846, "step": 49753 }, { "epoch": 0.9094631400003655, "grad_norm": 6.451304417580118, "learning_rate": 2.1347581210866552e-07, "loss": 17.5106, "step": 49754 }, { "epoch": 0.9094814191968121, "grad_norm": 6.329831006618529, "learning_rate": 2.1339024937829744e-07, "loss": 17.3458, "step": 49755 }, { "epoch": 0.9094996983932586, "grad_norm": 5.819364605849387, "learning_rate": 2.1330470342458109e-07, "loss": 17.0866, "step": 49756 }, { "epoch": 0.9095179775897052, "grad_norm": 7.547783135937625, "learning_rate": 2.1321917424781624e-07, "loss": 17.7145, "step": 49757 }, { "epoch": 0.9095362567861517, "grad_norm": 7.852823821919208, "learning_rate": 2.131336618483032e-07, "loss": 18.4036, "step": 49758 }, { "epoch": 0.9095545359825982, "grad_norm": 5.269008838084584, "learning_rate": 2.130481662263406e-07, "loss": 17.1484, "step": 49759 }, { "epoch": 0.9095728151790448, "grad_norm": 7.042954480322516, "learning_rate": 2.1296268738222992e-07, "loss": 17.4214, "step": 49760 }, { "epoch": 0.9095910943754912, "grad_norm": 6.3971431888131765, "learning_rate": 2.1287722531626863e-07, "loss": 17.153, "step": 49761 }, { "epoch": 0.9096093735719378, "grad_norm": 8.799402835573849, "learning_rate": 2.1279178002875766e-07, "loss": 17.6085, "step": 49762 }, { "epoch": 0.9096276527683843, "grad_norm": 6.67784433182805, "learning_rate": 2.127063515199962e-07, "loss": 17.528, "step": 49763 }, { "epoch": 0.9096459319648308, "grad_norm": 7.716370522579211, "learning_rate": 2.1262093979028343e-07, "loss": 17.78, "step": 49764 }, { "epoch": 0.9096642111612774, "grad_norm": 7.08951998953481, "learning_rate": 2.1253554483991856e-07, "loss": 17.8458, "step": 49765 }, { "epoch": 0.9096824903577239, "grad_norm": 6.068607933456459, "learning_rate": 2.1245016666920136e-07, "loss": 17.1919, "step": 49766 }, { "epoch": 0.9097007695541705, "grad_norm": 6.754228577836619, "learning_rate": 2.1236480527843052e-07, "loss": 17.4784, "step": 49767 }, { "epoch": 0.9097190487506169, "grad_norm": 5.871948763658373, "learning_rate": 2.1227946066790628e-07, "loss": 16.9296, "step": 49768 }, { "epoch": 0.9097373279470634, "grad_norm": 5.621075325460914, "learning_rate": 2.1219413283792623e-07, "loss": 17.0245, "step": 49769 }, { "epoch": 0.90975560714351, "grad_norm": 6.441823956130826, "learning_rate": 2.1210882178879122e-07, "loss": 17.2195, "step": 49770 }, { "epoch": 0.9097738863399565, "grad_norm": 7.106860541958994, "learning_rate": 2.1202352752079934e-07, "loss": 17.553, "step": 49771 }, { "epoch": 0.9097921655364031, "grad_norm": 5.750447536915298, "learning_rate": 2.1193825003424873e-07, "loss": 17.2855, "step": 49772 }, { "epoch": 0.9098104447328496, "grad_norm": 8.352134722974474, "learning_rate": 2.1185298932943965e-07, "loss": 17.7534, "step": 49773 }, { "epoch": 0.909828723929296, "grad_norm": 5.829201888401837, "learning_rate": 2.1176774540667023e-07, "loss": 17.314, "step": 49774 }, { "epoch": 0.9098470031257426, "grad_norm": 6.377512755292976, "learning_rate": 2.1168251826623908e-07, "loss": 17.2265, "step": 49775 }, { "epoch": 0.9098652823221891, "grad_norm": 6.019298256402319, "learning_rate": 2.1159730790844491e-07, "loss": 17.6002, "step": 49776 }, { "epoch": 0.9098835615186356, "grad_norm": 6.387849546527441, "learning_rate": 2.1151211433358742e-07, "loss": 17.4888, "step": 49777 }, { "epoch": 0.9099018407150822, "grad_norm": 4.7819212142761325, "learning_rate": 2.1142693754196364e-07, "loss": 16.9789, "step": 49778 }, { "epoch": 0.9099201199115287, "grad_norm": 6.689694077519186, "learning_rate": 2.1134177753387385e-07, "loss": 17.3189, "step": 49779 }, { "epoch": 0.9099383991079752, "grad_norm": 5.785725670497672, "learning_rate": 2.1125663430961507e-07, "loss": 17.1117, "step": 49780 }, { "epoch": 0.9099566783044217, "grad_norm": 6.828856874387652, "learning_rate": 2.1117150786948649e-07, "loss": 17.5183, "step": 49781 }, { "epoch": 0.9099749575008682, "grad_norm": 5.609816538089771, "learning_rate": 2.1108639821378617e-07, "loss": 17.0192, "step": 49782 }, { "epoch": 0.9099932366973148, "grad_norm": 5.483570892925239, "learning_rate": 2.1100130534281228e-07, "loss": 16.9484, "step": 49783 }, { "epoch": 0.9100115158937613, "grad_norm": 7.033616091475131, "learning_rate": 2.109162292568634e-07, "loss": 17.5232, "step": 49784 }, { "epoch": 0.9100297950902079, "grad_norm": 5.836113938021809, "learning_rate": 2.108311699562371e-07, "loss": 17.1321, "step": 49785 }, { "epoch": 0.9100480742866544, "grad_norm": 5.880391676873842, "learning_rate": 2.1074612744123202e-07, "loss": 16.9777, "step": 49786 }, { "epoch": 0.9100663534831008, "grad_norm": 5.633180666417298, "learning_rate": 2.1066110171214682e-07, "loss": 16.9655, "step": 49787 }, { "epoch": 0.9100846326795474, "grad_norm": 6.123866275491792, "learning_rate": 2.1057609276927848e-07, "loss": 17.1926, "step": 49788 }, { "epoch": 0.9101029118759939, "grad_norm": 5.692902081664123, "learning_rate": 2.104911006129251e-07, "loss": 16.9992, "step": 49789 }, { "epoch": 0.9101211910724405, "grad_norm": 5.480610810284079, "learning_rate": 2.1040612524338532e-07, "loss": 17.0268, "step": 49790 }, { "epoch": 0.910139470268887, "grad_norm": 5.4285865290626125, "learning_rate": 2.103211666609567e-07, "loss": 16.8185, "step": 49791 }, { "epoch": 0.9101577494653335, "grad_norm": 6.916874749438884, "learning_rate": 2.1023622486593565e-07, "loss": 17.5985, "step": 49792 }, { "epoch": 0.91017602866178, "grad_norm": 5.471936892528067, "learning_rate": 2.1015129985862193e-07, "loss": 17.0292, "step": 49793 }, { "epoch": 0.9101943078582265, "grad_norm": 8.243245302097325, "learning_rate": 2.1006639163931142e-07, "loss": 18.2126, "step": 49794 }, { "epoch": 0.9102125870546731, "grad_norm": 7.654192194512922, "learning_rate": 2.0998150020830276e-07, "loss": 17.9292, "step": 49795 }, { "epoch": 0.9102308662511196, "grad_norm": 6.870584436477306, "learning_rate": 2.0989662556589406e-07, "loss": 17.5003, "step": 49796 }, { "epoch": 0.9102491454475661, "grad_norm": 5.2563545587790195, "learning_rate": 2.098117677123812e-07, "loss": 17.0418, "step": 49797 }, { "epoch": 0.9102674246440127, "grad_norm": 6.0985825568175285, "learning_rate": 2.0972692664806282e-07, "loss": 17.4336, "step": 49798 }, { "epoch": 0.9102857038404591, "grad_norm": 7.5115448006082985, "learning_rate": 2.0964210237323646e-07, "loss": 18.0588, "step": 49799 }, { "epoch": 0.9103039830369057, "grad_norm": 5.750296380588347, "learning_rate": 2.09557294888198e-07, "loss": 17.0119, "step": 49800 }, { "epoch": 0.9103222622333522, "grad_norm": 5.472257847889058, "learning_rate": 2.0947250419324605e-07, "loss": 16.9484, "step": 49801 }, { "epoch": 0.9103405414297987, "grad_norm": 5.952018243647902, "learning_rate": 2.0938773028867708e-07, "loss": 17.4173, "step": 49802 }, { "epoch": 0.9103588206262453, "grad_norm": 4.805612336259466, "learning_rate": 2.093029731747881e-07, "loss": 16.9034, "step": 49803 }, { "epoch": 0.9103770998226918, "grad_norm": 5.661578017963891, "learning_rate": 2.0921823285187603e-07, "loss": 17.2356, "step": 49804 }, { "epoch": 0.9103953790191384, "grad_norm": 5.70373468442291, "learning_rate": 2.09133509320239e-07, "loss": 17.2531, "step": 49805 }, { "epoch": 0.9104136582155848, "grad_norm": 5.55780011875824, "learning_rate": 2.090488025801729e-07, "loss": 17.0222, "step": 49806 }, { "epoch": 0.9104319374120313, "grad_norm": 6.176700973650231, "learning_rate": 2.089641126319758e-07, "loss": 17.345, "step": 49807 }, { "epoch": 0.9104502166084779, "grad_norm": 7.093645948494321, "learning_rate": 2.0887943947594358e-07, "loss": 17.27, "step": 49808 }, { "epoch": 0.9104684958049244, "grad_norm": 6.166617441895696, "learning_rate": 2.0879478311237268e-07, "loss": 17.2488, "step": 49809 }, { "epoch": 0.910486775001371, "grad_norm": 7.892232748172191, "learning_rate": 2.087101435415606e-07, "loss": 17.8527, "step": 49810 }, { "epoch": 0.9105050541978175, "grad_norm": 6.625427628614251, "learning_rate": 2.0862552076380326e-07, "loss": 17.4065, "step": 49811 }, { "epoch": 0.910523333394264, "grad_norm": 7.478474628083235, "learning_rate": 2.085409147793982e-07, "loss": 17.3979, "step": 49812 }, { "epoch": 0.9105416125907105, "grad_norm": 6.873978132422383, "learning_rate": 2.0845632558864127e-07, "loss": 17.436, "step": 49813 }, { "epoch": 0.910559891787157, "grad_norm": 5.759818566875019, "learning_rate": 2.083717531918289e-07, "loss": 17.4078, "step": 49814 }, { "epoch": 0.9105781709836036, "grad_norm": 7.817119171070189, "learning_rate": 2.0828719758925807e-07, "loss": 17.45, "step": 49815 }, { "epoch": 0.9105964501800501, "grad_norm": 6.5015025479133355, "learning_rate": 2.0820265878122524e-07, "loss": 17.2079, "step": 49816 }, { "epoch": 0.9106147293764966, "grad_norm": 5.295344178644939, "learning_rate": 2.0811813676802516e-07, "loss": 16.9498, "step": 49817 }, { "epoch": 0.9106330085729432, "grad_norm": 6.17515288587922, "learning_rate": 2.0803363154995592e-07, "loss": 17.2053, "step": 49818 }, { "epoch": 0.9106512877693896, "grad_norm": 6.0690343620657545, "learning_rate": 2.0794914312731341e-07, "loss": 17.4785, "step": 49819 }, { "epoch": 0.9106695669658362, "grad_norm": 5.196325447800114, "learning_rate": 2.0786467150039235e-07, "loss": 16.9213, "step": 49820 }, { "epoch": 0.9106878461622827, "grad_norm": 6.662226857405525, "learning_rate": 2.0778021666948977e-07, "loss": 17.6555, "step": 49821 }, { "epoch": 0.9107061253587292, "grad_norm": 5.810406977939673, "learning_rate": 2.0769577863490265e-07, "loss": 17.4143, "step": 49822 }, { "epoch": 0.9107244045551758, "grad_norm": 5.191445463098711, "learning_rate": 2.076113573969246e-07, "loss": 16.8007, "step": 49823 }, { "epoch": 0.9107426837516223, "grad_norm": 6.107838073008002, "learning_rate": 2.0752695295585435e-07, "loss": 17.4855, "step": 49824 }, { "epoch": 0.9107609629480689, "grad_norm": 6.6344076477315745, "learning_rate": 2.0744256531198492e-07, "loss": 17.5126, "step": 49825 }, { "epoch": 0.9107792421445153, "grad_norm": 5.576827152770236, "learning_rate": 2.0735819446561445e-07, "loss": 17.0886, "step": 49826 }, { "epoch": 0.9107975213409618, "grad_norm": 6.361605394308454, "learning_rate": 2.0727384041703769e-07, "loss": 17.3195, "step": 49827 }, { "epoch": 0.9108158005374084, "grad_norm": 6.701013771197368, "learning_rate": 2.071895031665494e-07, "loss": 17.2685, "step": 49828 }, { "epoch": 0.9108340797338549, "grad_norm": 5.944074901555187, "learning_rate": 2.0710518271444656e-07, "loss": 17.4485, "step": 49829 }, { "epoch": 0.9108523589303015, "grad_norm": 6.205425087723454, "learning_rate": 2.0702087906102398e-07, "loss": 17.4579, "step": 49830 }, { "epoch": 0.910870638126748, "grad_norm": 4.586626198854176, "learning_rate": 2.069365922065769e-07, "loss": 16.5974, "step": 49831 }, { "epoch": 0.9108889173231944, "grad_norm": 5.845943843731336, "learning_rate": 2.0685232215140183e-07, "loss": 17.0605, "step": 49832 }, { "epoch": 0.910907196519641, "grad_norm": 5.9859966386806835, "learning_rate": 2.0676806889579348e-07, "loss": 18.0248, "step": 49833 }, { "epoch": 0.9109254757160875, "grad_norm": 6.1668787363238176, "learning_rate": 2.0668383244004608e-07, "loss": 17.1218, "step": 49834 }, { "epoch": 0.9109437549125341, "grad_norm": 7.697084439167189, "learning_rate": 2.0659961278445718e-07, "loss": 17.9059, "step": 49835 }, { "epoch": 0.9109620341089806, "grad_norm": 5.016722393358015, "learning_rate": 2.065154099293204e-07, "loss": 16.712, "step": 49836 }, { "epoch": 0.9109803133054271, "grad_norm": 5.16720898820674, "learning_rate": 2.0643122387493053e-07, "loss": 16.9278, "step": 49837 }, { "epoch": 0.9109985925018736, "grad_norm": 8.164598626282817, "learning_rate": 2.06347054621584e-07, "loss": 17.7196, "step": 49838 }, { "epoch": 0.9110168716983201, "grad_norm": 6.388823812209537, "learning_rate": 2.062629021695739e-07, "loss": 17.4989, "step": 49839 }, { "epoch": 0.9110351508947667, "grad_norm": 9.585513950847178, "learning_rate": 2.0617876651919666e-07, "loss": 18.7982, "step": 49840 }, { "epoch": 0.9110534300912132, "grad_norm": 5.8334713142323, "learning_rate": 2.0609464767074761e-07, "loss": 17.0605, "step": 49841 }, { "epoch": 0.9110717092876597, "grad_norm": 5.120664322856013, "learning_rate": 2.0601054562452038e-07, "loss": 16.9776, "step": 49842 }, { "epoch": 0.9110899884841063, "grad_norm": 5.841233404207099, "learning_rate": 2.0592646038081033e-07, "loss": 17.0635, "step": 49843 }, { "epoch": 0.9111082676805528, "grad_norm": 6.931529548070125, "learning_rate": 2.0584239193991274e-07, "loss": 17.8487, "step": 49844 }, { "epoch": 0.9111265468769992, "grad_norm": 8.424685684021664, "learning_rate": 2.0575834030212016e-07, "loss": 17.5302, "step": 49845 }, { "epoch": 0.9111448260734458, "grad_norm": 6.602145922323032, "learning_rate": 2.056743054677296e-07, "loss": 17.5087, "step": 49846 }, { "epoch": 0.9111631052698923, "grad_norm": 7.576490540726996, "learning_rate": 2.0559028743703468e-07, "loss": 17.3106, "step": 49847 }, { "epoch": 0.9111813844663389, "grad_norm": 5.963951584421836, "learning_rate": 2.0550628621032854e-07, "loss": 17.1389, "step": 49848 }, { "epoch": 0.9111996636627854, "grad_norm": 5.4342572686785235, "learning_rate": 2.0542230178790756e-07, "loss": 17.1269, "step": 49849 }, { "epoch": 0.9112179428592319, "grad_norm": 6.802027892186115, "learning_rate": 2.0533833417006542e-07, "loss": 17.7405, "step": 49850 }, { "epoch": 0.9112362220556784, "grad_norm": 6.8527049783961695, "learning_rate": 2.0525438335709636e-07, "loss": 17.647, "step": 49851 }, { "epoch": 0.9112545012521249, "grad_norm": 6.583234524365146, "learning_rate": 2.0517044934929454e-07, "loss": 17.4534, "step": 49852 }, { "epoch": 0.9112727804485715, "grad_norm": 6.671525586794269, "learning_rate": 2.0508653214695472e-07, "loss": 17.4879, "step": 49853 }, { "epoch": 0.911291059645018, "grad_norm": 5.994734823618786, "learning_rate": 2.050026317503695e-07, "loss": 17.1552, "step": 49854 }, { "epoch": 0.9113093388414645, "grad_norm": 6.100859241387655, "learning_rate": 2.049187481598347e-07, "loss": 17.3748, "step": 49855 }, { "epoch": 0.9113276180379111, "grad_norm": 5.555029780376035, "learning_rate": 2.048348813756429e-07, "loss": 17.064, "step": 49856 }, { "epoch": 0.9113458972343575, "grad_norm": 5.429141020855845, "learning_rate": 2.0475103139808993e-07, "loss": 17.1219, "step": 49857 }, { "epoch": 0.9113641764308041, "grad_norm": 6.604394253742723, "learning_rate": 2.0466719822746728e-07, "loss": 17.3268, "step": 49858 }, { "epoch": 0.9113824556272506, "grad_norm": 7.022073495051659, "learning_rate": 2.0458338186407023e-07, "loss": 17.5833, "step": 49859 }, { "epoch": 0.9114007348236971, "grad_norm": 8.191712836455467, "learning_rate": 2.04499582308193e-07, "loss": 17.7627, "step": 49860 }, { "epoch": 0.9114190140201437, "grad_norm": 6.391484953153466, "learning_rate": 2.0441579956012813e-07, "loss": 17.4298, "step": 49861 }, { "epoch": 0.9114372932165902, "grad_norm": 6.977402378363637, "learning_rate": 2.043320336201693e-07, "loss": 17.5772, "step": 49862 }, { "epoch": 0.9114555724130368, "grad_norm": 5.5268957061474016, "learning_rate": 2.0424828448861124e-07, "loss": 17.068, "step": 49863 }, { "epoch": 0.9114738516094832, "grad_norm": 5.356445531543896, "learning_rate": 2.0416455216574705e-07, "loss": 17.2896, "step": 49864 }, { "epoch": 0.9114921308059297, "grad_norm": 4.924913462590151, "learning_rate": 2.0408083665186928e-07, "loss": 16.9685, "step": 49865 }, { "epoch": 0.9115104100023763, "grad_norm": 5.334299295813851, "learning_rate": 2.0399713794727217e-07, "loss": 17.055, "step": 49866 }, { "epoch": 0.9115286891988228, "grad_norm": 5.416442246654425, "learning_rate": 2.0391345605224877e-07, "loss": 16.9563, "step": 49867 }, { "epoch": 0.9115469683952694, "grad_norm": 5.538434972327793, "learning_rate": 2.0382979096709222e-07, "loss": 17.1308, "step": 49868 }, { "epoch": 0.9115652475917159, "grad_norm": 5.66082567677582, "learning_rate": 2.037461426920967e-07, "loss": 17.1689, "step": 49869 }, { "epoch": 0.9115835267881623, "grad_norm": 4.9874186888267955, "learning_rate": 2.0366251122755475e-07, "loss": 16.9047, "step": 49870 }, { "epoch": 0.9116018059846089, "grad_norm": 7.082816231728607, "learning_rate": 2.0357889657375896e-07, "loss": 17.3846, "step": 49871 }, { "epoch": 0.9116200851810554, "grad_norm": 7.444133881197632, "learning_rate": 2.034952987310035e-07, "loss": 17.8708, "step": 49872 }, { "epoch": 0.911638364377502, "grad_norm": 7.313228154495603, "learning_rate": 2.0341171769958034e-07, "loss": 17.9403, "step": 49873 }, { "epoch": 0.9116566435739485, "grad_norm": 4.8404675343973125, "learning_rate": 2.0332815347978318e-07, "loss": 16.7215, "step": 49874 }, { "epoch": 0.911674922770395, "grad_norm": 5.973933201030617, "learning_rate": 2.032446060719051e-07, "loss": 17.3344, "step": 49875 }, { "epoch": 0.9116932019668416, "grad_norm": 5.997930735515344, "learning_rate": 2.0316107547623753e-07, "loss": 16.9951, "step": 49876 }, { "epoch": 0.911711481163288, "grad_norm": 6.113807202033663, "learning_rate": 2.0307756169307413e-07, "loss": 17.3147, "step": 49877 }, { "epoch": 0.9117297603597346, "grad_norm": 7.516787506300628, "learning_rate": 2.0299406472270798e-07, "loss": 17.7141, "step": 49878 }, { "epoch": 0.9117480395561811, "grad_norm": 7.309461727350888, "learning_rate": 2.029105845654311e-07, "loss": 17.5626, "step": 49879 }, { "epoch": 0.9117663187526276, "grad_norm": 6.0170471082280015, "learning_rate": 2.0282712122153714e-07, "loss": 17.2183, "step": 49880 }, { "epoch": 0.9117845979490742, "grad_norm": 6.529598363356034, "learning_rate": 2.027436746913175e-07, "loss": 17.3568, "step": 49881 }, { "epoch": 0.9118028771455207, "grad_norm": 6.683141782890771, "learning_rate": 2.026602449750642e-07, "loss": 17.4808, "step": 49882 }, { "epoch": 0.9118211563419673, "grad_norm": 6.460660015621753, "learning_rate": 2.0257683207307144e-07, "loss": 17.4872, "step": 49883 }, { "epoch": 0.9118394355384137, "grad_norm": 5.756549355739381, "learning_rate": 2.0249343598562954e-07, "loss": 17.4681, "step": 49884 }, { "epoch": 0.9118577147348602, "grad_norm": 6.929607035434141, "learning_rate": 2.0241005671303272e-07, "loss": 17.818, "step": 49885 }, { "epoch": 0.9118759939313068, "grad_norm": 5.481193011888759, "learning_rate": 2.0232669425557182e-07, "loss": 17.0342, "step": 49886 }, { "epoch": 0.9118942731277533, "grad_norm": 6.9677973700735025, "learning_rate": 2.0224334861353946e-07, "loss": 17.6339, "step": 49887 }, { "epoch": 0.9119125523241999, "grad_norm": 5.755918274647631, "learning_rate": 2.0216001978722866e-07, "loss": 17.2188, "step": 49888 }, { "epoch": 0.9119308315206464, "grad_norm": 7.546525009823416, "learning_rate": 2.0207670777693034e-07, "loss": 17.8074, "step": 49889 }, { "epoch": 0.9119491107170928, "grad_norm": 7.475207608297274, "learning_rate": 2.0199341258293648e-07, "loss": 17.3289, "step": 49890 }, { "epoch": 0.9119673899135394, "grad_norm": 7.163251389674664, "learning_rate": 2.019101342055396e-07, "loss": 17.8102, "step": 49891 }, { "epoch": 0.9119856691099859, "grad_norm": 4.69606876933256, "learning_rate": 2.0182687264503175e-07, "loss": 16.8916, "step": 49892 }, { "epoch": 0.9120039483064325, "grad_norm": 7.176733044219439, "learning_rate": 2.017436279017032e-07, "loss": 17.7873, "step": 49893 }, { "epoch": 0.912022227502879, "grad_norm": 8.219642786050711, "learning_rate": 2.0166039997584708e-07, "loss": 17.9199, "step": 49894 }, { "epoch": 0.9120405066993255, "grad_norm": 6.417906670527224, "learning_rate": 2.015771888677559e-07, "loss": 17.5262, "step": 49895 }, { "epoch": 0.912058785895772, "grad_norm": 6.274470956539905, "learning_rate": 2.0149399457771945e-07, "loss": 17.344, "step": 49896 }, { "epoch": 0.9120770650922185, "grad_norm": 8.300408033408504, "learning_rate": 2.014108171060303e-07, "loss": 18.1571, "step": 49897 }, { "epoch": 0.9120953442886651, "grad_norm": 5.257464608011017, "learning_rate": 2.0132765645298037e-07, "loss": 16.8088, "step": 49898 }, { "epoch": 0.9121136234851116, "grad_norm": 7.0653289863456825, "learning_rate": 2.0124451261886e-07, "loss": 17.6609, "step": 49899 }, { "epoch": 0.9121319026815581, "grad_norm": 6.931846019946178, "learning_rate": 2.0116138560396126e-07, "loss": 17.7318, "step": 49900 }, { "epoch": 0.9121501818780047, "grad_norm": 6.904210599336755, "learning_rate": 2.0107827540857604e-07, "loss": 17.4847, "step": 49901 }, { "epoch": 0.9121684610744512, "grad_norm": 5.964400197994833, "learning_rate": 2.0099518203299416e-07, "loss": 17.0627, "step": 49902 }, { "epoch": 0.9121867402708977, "grad_norm": 5.607927591610415, "learning_rate": 2.009121054775076e-07, "loss": 17.1323, "step": 49903 }, { "epoch": 0.9122050194673442, "grad_norm": 5.654941967527023, "learning_rate": 2.008290457424078e-07, "loss": 17.0975, "step": 49904 }, { "epoch": 0.9122232986637907, "grad_norm": 6.148723826071678, "learning_rate": 2.0074600282798672e-07, "loss": 17.3708, "step": 49905 }, { "epoch": 0.9122415778602373, "grad_norm": 6.325168103752597, "learning_rate": 2.0066297673453415e-07, "loss": 17.4002, "step": 49906 }, { "epoch": 0.9122598570566838, "grad_norm": 6.077368951209291, "learning_rate": 2.0057996746234044e-07, "loss": 17.4426, "step": 49907 }, { "epoch": 0.9122781362531304, "grad_norm": 6.430444973335236, "learning_rate": 2.0049697501169862e-07, "loss": 17.6875, "step": 49908 }, { "epoch": 0.9122964154495768, "grad_norm": 5.918911265598866, "learning_rate": 2.0041399938289797e-07, "loss": 17.4999, "step": 49909 }, { "epoch": 0.9123146946460233, "grad_norm": 4.867811609998334, "learning_rate": 2.0033104057622932e-07, "loss": 16.8296, "step": 49910 }, { "epoch": 0.9123329738424699, "grad_norm": 5.205679723577652, "learning_rate": 2.0024809859198468e-07, "loss": 16.9222, "step": 49911 }, { "epoch": 0.9123512530389164, "grad_norm": 7.146288288991448, "learning_rate": 2.0016517343045326e-07, "loss": 17.7363, "step": 49912 }, { "epoch": 0.9123695322353629, "grad_norm": 6.28796798059735, "learning_rate": 2.0008226509192596e-07, "loss": 17.412, "step": 49913 }, { "epoch": 0.9123878114318095, "grad_norm": 7.3612493133952395, "learning_rate": 1.9999937357669475e-07, "loss": 17.693, "step": 49914 }, { "epoch": 0.912406090628256, "grad_norm": 5.448947140016024, "learning_rate": 1.9991649888504938e-07, "loss": 16.9618, "step": 49915 }, { "epoch": 0.9124243698247025, "grad_norm": 4.789191354560304, "learning_rate": 1.9983364101727908e-07, "loss": 16.9313, "step": 49916 }, { "epoch": 0.912442649021149, "grad_norm": 6.33657602272009, "learning_rate": 1.997507999736764e-07, "loss": 17.0081, "step": 49917 }, { "epoch": 0.9124609282175955, "grad_norm": 4.888339913625661, "learning_rate": 1.9966797575453001e-07, "loss": 17.1644, "step": 49918 }, { "epoch": 0.9124792074140421, "grad_norm": 5.788871152731636, "learning_rate": 1.995851683601313e-07, "loss": 17.2722, "step": 49919 }, { "epoch": 0.9124974866104886, "grad_norm": 4.473483971026961, "learning_rate": 1.9950237779077008e-07, "loss": 16.7038, "step": 49920 }, { "epoch": 0.9125157658069352, "grad_norm": 4.615459856602813, "learning_rate": 1.9941960404673556e-07, "loss": 16.7182, "step": 49921 }, { "epoch": 0.9125340450033816, "grad_norm": 6.293502230841772, "learning_rate": 1.9933684712831912e-07, "loss": 17.3187, "step": 49922 }, { "epoch": 0.9125523241998281, "grad_norm": 6.449180993486863, "learning_rate": 1.992541070358106e-07, "loss": 17.4774, "step": 49923 }, { "epoch": 0.9125706033962747, "grad_norm": 5.937311580776577, "learning_rate": 1.9917138376949975e-07, "loss": 17.3385, "step": 49924 }, { "epoch": 0.9125888825927212, "grad_norm": 5.947742291393758, "learning_rate": 1.9908867732967685e-07, "loss": 17.4377, "step": 49925 }, { "epoch": 0.9126071617891678, "grad_norm": 7.515531652647146, "learning_rate": 1.990059877166317e-07, "loss": 17.7617, "step": 49926 }, { "epoch": 0.9126254409856143, "grad_norm": 5.694166222841082, "learning_rate": 1.9892331493065298e-07, "loss": 17.1774, "step": 49927 }, { "epoch": 0.9126437201820607, "grad_norm": 6.805571829894954, "learning_rate": 1.9884065897203265e-07, "loss": 17.7186, "step": 49928 }, { "epoch": 0.9126619993785073, "grad_norm": 6.835990407892309, "learning_rate": 1.987580198410588e-07, "loss": 17.4244, "step": 49929 }, { "epoch": 0.9126802785749538, "grad_norm": 6.225156795405079, "learning_rate": 1.9867539753802067e-07, "loss": 17.4629, "step": 49930 }, { "epoch": 0.9126985577714004, "grad_norm": 5.101916574257273, "learning_rate": 1.9859279206320914e-07, "loss": 16.9941, "step": 49931 }, { "epoch": 0.9127168369678469, "grad_norm": 6.3190443915869094, "learning_rate": 1.985102034169134e-07, "loss": 17.3386, "step": 49932 }, { "epoch": 0.9127351161642934, "grad_norm": 9.979635727226439, "learning_rate": 1.9842763159942268e-07, "loss": 17.7142, "step": 49933 }, { "epoch": 0.91275339536074, "grad_norm": 4.606140809286739, "learning_rate": 1.983450766110262e-07, "loss": 16.9514, "step": 49934 }, { "epoch": 0.9127716745571864, "grad_norm": 5.789212612929645, "learning_rate": 1.982625384520137e-07, "loss": 17.3006, "step": 49935 }, { "epoch": 0.912789953753633, "grad_norm": 6.361273413068279, "learning_rate": 1.9818001712267443e-07, "loss": 17.7165, "step": 49936 }, { "epoch": 0.9128082329500795, "grad_norm": 5.216967224277493, "learning_rate": 1.9809751262329812e-07, "loss": 16.921, "step": 49937 }, { "epoch": 0.912826512146526, "grad_norm": 7.104504569686889, "learning_rate": 1.9801502495417234e-07, "loss": 17.9944, "step": 49938 }, { "epoch": 0.9128447913429726, "grad_norm": 5.949224450723949, "learning_rate": 1.97932554115588e-07, "loss": 17.0695, "step": 49939 }, { "epoch": 0.9128630705394191, "grad_norm": 5.487718588178211, "learning_rate": 1.9785010010783257e-07, "loss": 17.2376, "step": 49940 }, { "epoch": 0.9128813497358657, "grad_norm": 5.647981063483595, "learning_rate": 1.9776766293119588e-07, "loss": 17.2277, "step": 49941 }, { "epoch": 0.9128996289323121, "grad_norm": 5.4425227723191645, "learning_rate": 1.9768524258596711e-07, "loss": 17.0666, "step": 49942 }, { "epoch": 0.9129179081287586, "grad_norm": 8.001469306136714, "learning_rate": 1.9760283907243495e-07, "loss": 17.8609, "step": 49943 }, { "epoch": 0.9129361873252052, "grad_norm": 5.828215793416986, "learning_rate": 1.97520452390888e-07, "loss": 17.0494, "step": 49944 }, { "epoch": 0.9129544665216517, "grad_norm": 5.368848726556582, "learning_rate": 1.97438082541615e-07, "loss": 17.0527, "step": 49945 }, { "epoch": 0.9129727457180983, "grad_norm": 5.5444172678221495, "learning_rate": 1.9735572952490566e-07, "loss": 17.0637, "step": 49946 }, { "epoch": 0.9129910249145448, "grad_norm": 9.806993859029754, "learning_rate": 1.9727339334104645e-07, "loss": 18.3829, "step": 49947 }, { "epoch": 0.9130093041109912, "grad_norm": 6.786683515262901, "learning_rate": 1.9719107399032766e-07, "loss": 17.2892, "step": 49948 }, { "epoch": 0.9130275833074378, "grad_norm": 6.21267630133685, "learning_rate": 1.9710877147303742e-07, "loss": 17.5333, "step": 49949 }, { "epoch": 0.9130458625038843, "grad_norm": 5.359142403788577, "learning_rate": 1.9702648578946382e-07, "loss": 17.3145, "step": 49950 }, { "epoch": 0.9130641417003309, "grad_norm": 5.754566987498222, "learning_rate": 1.9694421693989662e-07, "loss": 17.1379, "step": 49951 }, { "epoch": 0.9130824208967774, "grad_norm": 6.124621264370131, "learning_rate": 1.9686196492462228e-07, "loss": 17.5792, "step": 49952 }, { "epoch": 0.9131007000932239, "grad_norm": 5.840246568453333, "learning_rate": 1.9677972974393e-07, "loss": 17.2157, "step": 49953 }, { "epoch": 0.9131189792896705, "grad_norm": 7.143414050917994, "learning_rate": 1.9669751139810845e-07, "loss": 17.6716, "step": 49954 }, { "epoch": 0.9131372584861169, "grad_norm": 6.260318196373484, "learning_rate": 1.9661530988744515e-07, "loss": 17.3727, "step": 49955 }, { "epoch": 0.9131555376825635, "grad_norm": 5.0772521885717525, "learning_rate": 1.9653312521222821e-07, "loss": 16.9292, "step": 49956 }, { "epoch": 0.91317381687901, "grad_norm": 7.2027490465143345, "learning_rate": 1.964509573727458e-07, "loss": 17.3227, "step": 49957 }, { "epoch": 0.9131920960754565, "grad_norm": 6.1006571368377855, "learning_rate": 1.9636880636928534e-07, "loss": 17.0664, "step": 49958 }, { "epoch": 0.9132103752719031, "grad_norm": 6.522094394765253, "learning_rate": 1.962866722021367e-07, "loss": 17.107, "step": 49959 }, { "epoch": 0.9132286544683496, "grad_norm": 7.313027890750394, "learning_rate": 1.9620455487158573e-07, "loss": 17.8737, "step": 49960 }, { "epoch": 0.9132469336647961, "grad_norm": 7.272874369156601, "learning_rate": 1.961224543779211e-07, "loss": 17.8455, "step": 49961 }, { "epoch": 0.9132652128612426, "grad_norm": 6.471768702522962, "learning_rate": 1.9604037072143034e-07, "loss": 17.1947, "step": 49962 }, { "epoch": 0.9132834920576891, "grad_norm": 4.593752099265128, "learning_rate": 1.9595830390240156e-07, "loss": 16.747, "step": 49963 }, { "epoch": 0.9133017712541357, "grad_norm": 6.123130675724023, "learning_rate": 1.958762539211212e-07, "loss": 17.4474, "step": 49964 }, { "epoch": 0.9133200504505822, "grad_norm": 6.174797319826389, "learning_rate": 1.9579422077787847e-07, "loss": 17.431, "step": 49965 }, { "epoch": 0.9133383296470288, "grad_norm": 6.766318751864884, "learning_rate": 1.957122044729598e-07, "loss": 17.5105, "step": 49966 }, { "epoch": 0.9133566088434752, "grad_norm": 5.793063798846342, "learning_rate": 1.956302050066522e-07, "loss": 17.2013, "step": 49967 }, { "epoch": 0.9133748880399217, "grad_norm": 6.112553259862579, "learning_rate": 1.9554822237924488e-07, "loss": 17.635, "step": 49968 }, { "epoch": 0.9133931672363683, "grad_norm": 6.89439810047073, "learning_rate": 1.9546625659102368e-07, "loss": 17.2103, "step": 49969 }, { "epoch": 0.9134114464328148, "grad_norm": 6.121653192903621, "learning_rate": 1.953843076422768e-07, "loss": 17.3977, "step": 49970 }, { "epoch": 0.9134297256292614, "grad_norm": 5.15452201405046, "learning_rate": 1.9530237553329112e-07, "loss": 17.0941, "step": 49971 }, { "epoch": 0.9134480048257079, "grad_norm": 5.8715971908627305, "learning_rate": 1.952204602643526e-07, "loss": 17.3752, "step": 49972 }, { "epoch": 0.9134662840221544, "grad_norm": 5.319593632187573, "learning_rate": 1.9513856183575042e-07, "loss": 17.0556, "step": 49973 }, { "epoch": 0.9134845632186009, "grad_norm": 5.792640988475587, "learning_rate": 1.9505668024777048e-07, "loss": 17.2726, "step": 49974 }, { "epoch": 0.9135028424150474, "grad_norm": 5.7064310636033335, "learning_rate": 1.9497481550069973e-07, "loss": 17.2212, "step": 49975 }, { "epoch": 0.913521121611494, "grad_norm": 5.873121647780203, "learning_rate": 1.948929675948247e-07, "loss": 17.5064, "step": 49976 }, { "epoch": 0.9135394008079405, "grad_norm": 8.148319775138912, "learning_rate": 1.9481113653043392e-07, "loss": 17.4373, "step": 49977 }, { "epoch": 0.913557680004387, "grad_norm": 5.45662235902159, "learning_rate": 1.9472932230781228e-07, "loss": 16.814, "step": 49978 }, { "epoch": 0.9135759592008336, "grad_norm": 5.219470850629442, "learning_rate": 1.9464752492724782e-07, "loss": 16.8834, "step": 49979 }, { "epoch": 0.91359423839728, "grad_norm": 5.8952532344937545, "learning_rate": 1.9456574438902642e-07, "loss": 17.3653, "step": 49980 }, { "epoch": 0.9136125175937265, "grad_norm": 5.516639709143283, "learning_rate": 1.9448398069343566e-07, "loss": 16.9685, "step": 49981 }, { "epoch": 0.9136307967901731, "grad_norm": 5.634916158645988, "learning_rate": 1.944022338407614e-07, "loss": 17.1007, "step": 49982 }, { "epoch": 0.9136490759866196, "grad_norm": 6.292381040798705, "learning_rate": 1.9432050383129008e-07, "loss": 17.2164, "step": 49983 }, { "epoch": 0.9136673551830662, "grad_norm": 5.602181437150418, "learning_rate": 1.942387906653087e-07, "loss": 17.1307, "step": 49984 }, { "epoch": 0.9136856343795127, "grad_norm": 5.535063978525984, "learning_rate": 1.9415709434310259e-07, "loss": 16.9577, "step": 49985 }, { "epoch": 0.9137039135759591, "grad_norm": 6.473398007117336, "learning_rate": 1.9407541486495928e-07, "loss": 17.7043, "step": 49986 }, { "epoch": 0.9137221927724057, "grad_norm": 6.8443068969966845, "learning_rate": 1.9399375223116523e-07, "loss": 17.2928, "step": 49987 }, { "epoch": 0.9137404719688522, "grad_norm": 6.706370428846485, "learning_rate": 1.9391210644200576e-07, "loss": 17.4548, "step": 49988 }, { "epoch": 0.9137587511652988, "grad_norm": 16.852753458842205, "learning_rate": 1.9383047749776618e-07, "loss": 18.4954, "step": 49989 }, { "epoch": 0.9137770303617453, "grad_norm": 5.161429279089152, "learning_rate": 1.9374886539873516e-07, "loss": 17.0075, "step": 49990 }, { "epoch": 0.9137953095581918, "grad_norm": 6.1880169849360245, "learning_rate": 1.9366727014519692e-07, "loss": 17.4892, "step": 49991 }, { "epoch": 0.9138135887546384, "grad_norm": 6.995735700908302, "learning_rate": 1.9358569173743735e-07, "loss": 17.0866, "step": 49992 }, { "epoch": 0.9138318679510848, "grad_norm": 7.217979864811867, "learning_rate": 1.9350413017574343e-07, "loss": 17.992, "step": 49993 }, { "epoch": 0.9138501471475314, "grad_norm": 5.535465475206777, "learning_rate": 1.9342258546039939e-07, "loss": 17.1905, "step": 49994 }, { "epoch": 0.9138684263439779, "grad_norm": 6.35657912910661, "learning_rate": 1.9334105759169275e-07, "loss": 17.0607, "step": 49995 }, { "epoch": 0.9138867055404244, "grad_norm": 6.377498200900606, "learning_rate": 1.9325954656990885e-07, "loss": 17.6353, "step": 49996 }, { "epoch": 0.913904984736871, "grad_norm": 6.169182006310921, "learning_rate": 1.931780523953325e-07, "loss": 17.2294, "step": 49997 }, { "epoch": 0.9139232639333175, "grad_norm": 5.400765349086107, "learning_rate": 1.9309657506825062e-07, "loss": 16.8512, "step": 49998 }, { "epoch": 0.9139415431297641, "grad_norm": 5.243017428807304, "learning_rate": 1.9301511458894863e-07, "loss": 16.8568, "step": 49999 }, { "epoch": 0.9139598223262105, "grad_norm": 5.914325319746961, "learning_rate": 1.929336709577101e-07, "loss": 17.4746, "step": 50000 }, { "epoch": 0.913978101522657, "grad_norm": 6.354165254119564, "learning_rate": 1.9285224417482318e-07, "loss": 17.1365, "step": 50001 }, { "epoch": 0.9139963807191036, "grad_norm": 6.3265889219475335, "learning_rate": 1.9277083424057152e-07, "loss": 17.2661, "step": 50002 }, { "epoch": 0.9140146599155501, "grad_norm": 6.5040787069069355, "learning_rate": 1.9268944115524046e-07, "loss": 17.8039, "step": 50003 }, { "epoch": 0.9140329391119967, "grad_norm": 6.3222641216915285, "learning_rate": 1.9260806491911587e-07, "loss": 17.3379, "step": 50004 }, { "epoch": 0.9140512183084432, "grad_norm": 7.171777300667666, "learning_rate": 1.9252670553248364e-07, "loss": 17.6058, "step": 50005 }, { "epoch": 0.9140694975048896, "grad_norm": 8.28306081562245, "learning_rate": 1.9244536299562745e-07, "loss": 17.9868, "step": 50006 }, { "epoch": 0.9140877767013362, "grad_norm": 7.133350977179929, "learning_rate": 1.9236403730883313e-07, "loss": 17.6511, "step": 50007 }, { "epoch": 0.9141060558977827, "grad_norm": 7.83550297943062, "learning_rate": 1.9228272847238606e-07, "loss": 17.8255, "step": 50008 }, { "epoch": 0.9141243350942293, "grad_norm": 5.769516268420891, "learning_rate": 1.922014364865704e-07, "loss": 17.1643, "step": 50009 }, { "epoch": 0.9141426142906758, "grad_norm": 5.972424677957097, "learning_rate": 1.921201613516721e-07, "loss": 17.0885, "step": 50010 }, { "epoch": 0.9141608934871223, "grad_norm": 7.508897659057741, "learning_rate": 1.9203890306797424e-07, "loss": 17.6939, "step": 50011 }, { "epoch": 0.9141791726835689, "grad_norm": 6.0470806927447525, "learning_rate": 1.9195766163576434e-07, "loss": 17.1333, "step": 50012 }, { "epoch": 0.9141974518800153, "grad_norm": 6.509182670836915, "learning_rate": 1.9187643705532445e-07, "loss": 17.2975, "step": 50013 }, { "epoch": 0.9142157310764619, "grad_norm": 7.706625500928553, "learning_rate": 1.9179522932694039e-07, "loss": 17.4558, "step": 50014 }, { "epoch": 0.9142340102729084, "grad_norm": 6.893181778439412, "learning_rate": 1.9171403845089753e-07, "loss": 17.3596, "step": 50015 }, { "epoch": 0.9142522894693549, "grad_norm": 7.629187589729473, "learning_rate": 1.9163286442747952e-07, "loss": 17.5094, "step": 50016 }, { "epoch": 0.9142705686658015, "grad_norm": 4.25870166561932, "learning_rate": 1.915517072569706e-07, "loss": 16.6623, "step": 50017 }, { "epoch": 0.914288847862248, "grad_norm": 6.3338792029281015, "learning_rate": 1.914705669396566e-07, "loss": 17.4783, "step": 50018 }, { "epoch": 0.9143071270586945, "grad_norm": 8.890701188423233, "learning_rate": 1.9138944347582067e-07, "loss": 17.9904, "step": 50019 }, { "epoch": 0.914325406255141, "grad_norm": 6.8647179295608485, "learning_rate": 1.91308336865747e-07, "loss": 17.4499, "step": 50020 }, { "epoch": 0.9143436854515875, "grad_norm": 5.586597657042941, "learning_rate": 1.9122724710971985e-07, "loss": 17.1859, "step": 50021 }, { "epoch": 0.9143619646480341, "grad_norm": 6.068638440604352, "learning_rate": 1.9114617420802505e-07, "loss": 17.0983, "step": 50022 }, { "epoch": 0.9143802438444806, "grad_norm": 5.540950903599161, "learning_rate": 1.910651181609452e-07, "loss": 16.9983, "step": 50023 }, { "epoch": 0.9143985230409272, "grad_norm": 7.242067671586973, "learning_rate": 1.9098407896876503e-07, "loss": 17.5475, "step": 50024 }, { "epoch": 0.9144168022373736, "grad_norm": 6.403727040593493, "learning_rate": 1.9090305663176767e-07, "loss": 17.3382, "step": 50025 }, { "epoch": 0.9144350814338201, "grad_norm": 7.033827645442496, "learning_rate": 1.9082205115023843e-07, "loss": 17.5108, "step": 50026 }, { "epoch": 0.9144533606302667, "grad_norm": 6.066316515605552, "learning_rate": 1.90741062524461e-07, "loss": 17.4415, "step": 50027 }, { "epoch": 0.9144716398267132, "grad_norm": 6.250868337621487, "learning_rate": 1.9066009075471792e-07, "loss": 17.6099, "step": 50028 }, { "epoch": 0.9144899190231598, "grad_norm": 6.7382194076199555, "learning_rate": 1.905791358412945e-07, "loss": 17.5408, "step": 50029 }, { "epoch": 0.9145081982196063, "grad_norm": 5.928232017253534, "learning_rate": 1.9049819778447275e-07, "loss": 17.5028, "step": 50030 }, { "epoch": 0.9145264774160528, "grad_norm": 6.793920281716418, "learning_rate": 1.9041727658453802e-07, "loss": 17.5944, "step": 50031 }, { "epoch": 0.9145447566124993, "grad_norm": 6.64788886004748, "learning_rate": 1.9033637224177393e-07, "loss": 17.4806, "step": 50032 }, { "epoch": 0.9145630358089458, "grad_norm": 5.9133519903960465, "learning_rate": 1.902554847564636e-07, "loss": 17.312, "step": 50033 }, { "epoch": 0.9145813150053924, "grad_norm": 4.999159435759187, "learning_rate": 1.901746141288896e-07, "loss": 16.9537, "step": 50034 }, { "epoch": 0.9145995942018389, "grad_norm": 6.613623537713095, "learning_rate": 1.9009376035933725e-07, "loss": 17.6627, "step": 50035 }, { "epoch": 0.9146178733982854, "grad_norm": 6.696617865246108, "learning_rate": 1.9001292344808854e-07, "loss": 17.5736, "step": 50036 }, { "epoch": 0.914636152594732, "grad_norm": 6.538487918274305, "learning_rate": 1.8993210339542657e-07, "loss": 16.9965, "step": 50037 }, { "epoch": 0.9146544317911784, "grad_norm": 6.150479427247207, "learning_rate": 1.8985130020163556e-07, "loss": 17.339, "step": 50038 }, { "epoch": 0.914672710987625, "grad_norm": 6.522897847863999, "learning_rate": 1.8977051386699806e-07, "loss": 17.7588, "step": 50039 }, { "epoch": 0.9146909901840715, "grad_norm": 5.52271327493438, "learning_rate": 1.896897443917972e-07, "loss": 17.3064, "step": 50040 }, { "epoch": 0.914709269380518, "grad_norm": 8.34731558032747, "learning_rate": 1.896089917763172e-07, "loss": 17.9021, "step": 50041 }, { "epoch": 0.9147275485769646, "grad_norm": 6.7595598267267265, "learning_rate": 1.8952825602083948e-07, "loss": 17.3085, "step": 50042 }, { "epoch": 0.9147458277734111, "grad_norm": 6.541140698142563, "learning_rate": 1.8944753712564824e-07, "loss": 17.5173, "step": 50043 }, { "epoch": 0.9147641069698577, "grad_norm": 5.192132646736287, "learning_rate": 1.893668350910266e-07, "loss": 17.1057, "step": 50044 }, { "epoch": 0.9147823861663041, "grad_norm": 5.437073642190035, "learning_rate": 1.8928614991725548e-07, "loss": 17.0079, "step": 50045 }, { "epoch": 0.9148006653627506, "grad_norm": 6.804403972346097, "learning_rate": 1.8920548160462017e-07, "loss": 17.612, "step": 50046 }, { "epoch": 0.9148189445591972, "grad_norm": 6.359055298142187, "learning_rate": 1.8912483015340154e-07, "loss": 17.1887, "step": 50047 }, { "epoch": 0.9148372237556437, "grad_norm": 5.469517259760471, "learning_rate": 1.8904419556388275e-07, "loss": 17.0022, "step": 50048 }, { "epoch": 0.9148555029520902, "grad_norm": 6.888343758176998, "learning_rate": 1.8896357783634633e-07, "loss": 17.4774, "step": 50049 }, { "epoch": 0.9148737821485368, "grad_norm": 5.818241855408288, "learning_rate": 1.8888297697107595e-07, "loss": 17.1632, "step": 50050 }, { "epoch": 0.9148920613449832, "grad_norm": 6.280967217891253, "learning_rate": 1.8880239296835246e-07, "loss": 16.9817, "step": 50051 }, { "epoch": 0.9149103405414298, "grad_norm": 7.026205060975152, "learning_rate": 1.8872182582845954e-07, "loss": 17.5219, "step": 50052 }, { "epoch": 0.9149286197378763, "grad_norm": 6.223455406603513, "learning_rate": 1.8864127555167922e-07, "loss": 17.2488, "step": 50053 }, { "epoch": 0.9149468989343228, "grad_norm": 6.955203524994146, "learning_rate": 1.8856074213829346e-07, "loss": 17.4709, "step": 50054 }, { "epoch": 0.9149651781307694, "grad_norm": 7.080476580398266, "learning_rate": 1.884802255885848e-07, "loss": 17.4002, "step": 50055 }, { "epoch": 0.9149834573272159, "grad_norm": 6.0148106491948585, "learning_rate": 1.883997259028353e-07, "loss": 17.5094, "step": 50056 }, { "epoch": 0.9150017365236625, "grad_norm": 6.109403200840171, "learning_rate": 1.8831924308132743e-07, "loss": 17.7206, "step": 50057 }, { "epoch": 0.9150200157201089, "grad_norm": 7.145941037548552, "learning_rate": 1.882387771243427e-07, "loss": 17.4226, "step": 50058 }, { "epoch": 0.9150382949165554, "grad_norm": 6.384722687992468, "learning_rate": 1.881583280321636e-07, "loss": 17.244, "step": 50059 }, { "epoch": 0.915056574113002, "grad_norm": 5.629296620708555, "learning_rate": 1.8807789580507275e-07, "loss": 17.2534, "step": 50060 }, { "epoch": 0.9150748533094485, "grad_norm": 4.965903147361433, "learning_rate": 1.8799748044335097e-07, "loss": 16.9131, "step": 50061 }, { "epoch": 0.9150931325058951, "grad_norm": 7.221059482636087, "learning_rate": 1.879170819472803e-07, "loss": 17.5506, "step": 50062 }, { "epoch": 0.9151114117023416, "grad_norm": 6.730707779538088, "learning_rate": 1.8783670031714273e-07, "loss": 17.7722, "step": 50063 }, { "epoch": 0.915129690898788, "grad_norm": 5.19900630030872, "learning_rate": 1.8775633555322025e-07, "loss": 17.0696, "step": 50064 }, { "epoch": 0.9151479700952346, "grad_norm": 5.699457080142571, "learning_rate": 1.876759876557932e-07, "loss": 17.0468, "step": 50065 }, { "epoch": 0.9151662492916811, "grad_norm": 6.585901206216804, "learning_rate": 1.8759565662514523e-07, "loss": 17.6082, "step": 50066 }, { "epoch": 0.9151845284881277, "grad_norm": 4.824576228376761, "learning_rate": 1.8751534246155666e-07, "loss": 16.7128, "step": 50067 }, { "epoch": 0.9152028076845742, "grad_norm": 7.041002486971349, "learning_rate": 1.8743504516530842e-07, "loss": 17.5318, "step": 50068 }, { "epoch": 0.9152210868810207, "grad_norm": 6.790998312707019, "learning_rate": 1.8735476473668358e-07, "loss": 17.6959, "step": 50069 }, { "epoch": 0.9152393660774673, "grad_norm": 6.487835634600397, "learning_rate": 1.8727450117596303e-07, "loss": 17.4427, "step": 50070 }, { "epoch": 0.9152576452739137, "grad_norm": 6.802656770003461, "learning_rate": 1.8719425448342708e-07, "loss": 17.309, "step": 50071 }, { "epoch": 0.9152759244703603, "grad_norm": 5.1350695800708, "learning_rate": 1.871140246593578e-07, "loss": 16.9309, "step": 50072 }, { "epoch": 0.9152942036668068, "grad_norm": 6.155283553641268, "learning_rate": 1.87033811704036e-07, "loss": 17.599, "step": 50073 }, { "epoch": 0.9153124828632533, "grad_norm": 6.086752824969355, "learning_rate": 1.8695361561774317e-07, "loss": 17.2478, "step": 50074 }, { "epoch": 0.9153307620596999, "grad_norm": 6.498525828649037, "learning_rate": 1.8687343640076017e-07, "loss": 17.2269, "step": 50075 }, { "epoch": 0.9153490412561464, "grad_norm": 4.04872648141993, "learning_rate": 1.8679327405336843e-07, "loss": 16.5819, "step": 50076 }, { "epoch": 0.915367320452593, "grad_norm": 5.393780655148015, "learning_rate": 1.8671312857584833e-07, "loss": 16.9295, "step": 50077 }, { "epoch": 0.9153855996490394, "grad_norm": 6.277591949517345, "learning_rate": 1.8663299996848127e-07, "loss": 17.2727, "step": 50078 }, { "epoch": 0.9154038788454859, "grad_norm": 7.171147615754245, "learning_rate": 1.8655288823154705e-07, "loss": 17.8444, "step": 50079 }, { "epoch": 0.9154221580419325, "grad_norm": 5.773466095497153, "learning_rate": 1.864727933653282e-07, "loss": 17.1188, "step": 50080 }, { "epoch": 0.915440437238379, "grad_norm": 5.979366279217176, "learning_rate": 1.863927153701045e-07, "loss": 17.1078, "step": 50081 }, { "epoch": 0.9154587164348256, "grad_norm": 5.857680794606024, "learning_rate": 1.863126542461563e-07, "loss": 17.2325, "step": 50082 }, { "epoch": 0.915476995631272, "grad_norm": 5.267754874635256, "learning_rate": 1.8623260999376447e-07, "loss": 17.0545, "step": 50083 }, { "epoch": 0.9154952748277185, "grad_norm": 6.180432976235217, "learning_rate": 1.861525826132099e-07, "loss": 17.1345, "step": 50084 }, { "epoch": 0.9155135540241651, "grad_norm": 6.291003790531875, "learning_rate": 1.8607257210477237e-07, "loss": 17.3187, "step": 50085 }, { "epoch": 0.9155318332206116, "grad_norm": 5.178448525866379, "learning_rate": 1.8599257846873275e-07, "loss": 16.8589, "step": 50086 }, { "epoch": 0.9155501124170582, "grad_norm": 5.032191977938498, "learning_rate": 1.8591260170537139e-07, "loss": 16.9079, "step": 50087 }, { "epoch": 0.9155683916135047, "grad_norm": 7.384325545654791, "learning_rate": 1.858326418149692e-07, "loss": 17.9503, "step": 50088 }, { "epoch": 0.9155866708099512, "grad_norm": 4.827912859490317, "learning_rate": 1.8575269879780588e-07, "loss": 17.0022, "step": 50089 }, { "epoch": 0.9156049500063977, "grad_norm": 6.35177790846749, "learning_rate": 1.8567277265416073e-07, "loss": 17.0393, "step": 50090 }, { "epoch": 0.9156232292028442, "grad_norm": 5.047023407516225, "learning_rate": 1.855928633843157e-07, "loss": 16.9804, "step": 50091 }, { "epoch": 0.9156415083992908, "grad_norm": 5.8047759210433485, "learning_rate": 1.8551297098855002e-07, "loss": 17.1107, "step": 50092 }, { "epoch": 0.9156597875957373, "grad_norm": 5.367830001149054, "learning_rate": 1.854330954671424e-07, "loss": 17.1685, "step": 50093 }, { "epoch": 0.9156780667921838, "grad_norm": 5.644000016942897, "learning_rate": 1.8535323682037421e-07, "loss": 17.4408, "step": 50094 }, { "epoch": 0.9156963459886304, "grad_norm": 5.946843856007839, "learning_rate": 1.8527339504852583e-07, "loss": 17.1403, "step": 50095 }, { "epoch": 0.9157146251850768, "grad_norm": 6.951200878383805, "learning_rate": 1.8519357015187588e-07, "loss": 17.4935, "step": 50096 }, { "epoch": 0.9157329043815234, "grad_norm": 5.628778017338492, "learning_rate": 1.8511376213070532e-07, "loss": 17.07, "step": 50097 }, { "epoch": 0.9157511835779699, "grad_norm": 6.751901718851697, "learning_rate": 1.8503397098529274e-07, "loss": 17.7019, "step": 50098 }, { "epoch": 0.9157694627744164, "grad_norm": 6.293746474835711, "learning_rate": 1.8495419671591852e-07, "loss": 17.1239, "step": 50099 }, { "epoch": 0.915787741970863, "grad_norm": 6.741112319592283, "learning_rate": 1.8487443932286188e-07, "loss": 17.7514, "step": 50100 }, { "epoch": 0.9158060211673095, "grad_norm": 5.451675435856219, "learning_rate": 1.8479469880640255e-07, "loss": 17.0453, "step": 50101 }, { "epoch": 0.9158243003637561, "grad_norm": 7.058561991520317, "learning_rate": 1.847149751668198e-07, "loss": 17.4575, "step": 50102 }, { "epoch": 0.9158425795602025, "grad_norm": 5.384554669889525, "learning_rate": 1.8463526840439283e-07, "loss": 17.1331, "step": 50103 }, { "epoch": 0.915860858756649, "grad_norm": 5.286184282952028, "learning_rate": 1.8455557851940143e-07, "loss": 16.9928, "step": 50104 }, { "epoch": 0.9158791379530956, "grad_norm": 6.3948744847128705, "learning_rate": 1.8447590551212535e-07, "loss": 17.3447, "step": 50105 }, { "epoch": 0.9158974171495421, "grad_norm": 5.270353909176348, "learning_rate": 1.8439624938284328e-07, "loss": 16.9977, "step": 50106 }, { "epoch": 0.9159156963459887, "grad_norm": 5.326900083922171, "learning_rate": 1.8431661013183444e-07, "loss": 17.1108, "step": 50107 }, { "epoch": 0.9159339755424352, "grad_norm": 4.904586067834906, "learning_rate": 1.8423698775937805e-07, "loss": 16.9879, "step": 50108 }, { "epoch": 0.9159522547388816, "grad_norm": 6.01116134538415, "learning_rate": 1.8415738226575331e-07, "loss": 17.3108, "step": 50109 }, { "epoch": 0.9159705339353282, "grad_norm": 6.048912744472871, "learning_rate": 1.8407779365123835e-07, "loss": 17.2724, "step": 50110 }, { "epoch": 0.9159888131317747, "grad_norm": 5.07021018570077, "learning_rate": 1.839982219161135e-07, "loss": 16.9521, "step": 50111 }, { "epoch": 0.9160070923282213, "grad_norm": 6.825353400951504, "learning_rate": 1.839186670606563e-07, "loss": 17.2173, "step": 50112 }, { "epoch": 0.9160253715246678, "grad_norm": 7.488500333199722, "learning_rate": 1.8383912908514656e-07, "loss": 17.8032, "step": 50113 }, { "epoch": 0.9160436507211143, "grad_norm": 5.8370684726249955, "learning_rate": 1.8375960798986293e-07, "loss": 17.2317, "step": 50114 }, { "epoch": 0.9160619299175609, "grad_norm": 8.137394634218207, "learning_rate": 1.836801037750846e-07, "loss": 17.8088, "step": 50115 }, { "epoch": 0.9160802091140073, "grad_norm": 5.912057810486172, "learning_rate": 1.8360061644108805e-07, "loss": 17.1908, "step": 50116 }, { "epoch": 0.9160984883104538, "grad_norm": 7.674259513533143, "learning_rate": 1.8352114598815474e-07, "loss": 17.7615, "step": 50117 }, { "epoch": 0.9161167675069004, "grad_norm": 6.25589115315962, "learning_rate": 1.8344169241656107e-07, "loss": 17.3362, "step": 50118 }, { "epoch": 0.9161350467033469, "grad_norm": 5.923647012831334, "learning_rate": 1.8336225572658684e-07, "loss": 17.4685, "step": 50119 }, { "epoch": 0.9161533258997935, "grad_norm": 7.1382363078721385, "learning_rate": 1.8328283591850958e-07, "loss": 17.3397, "step": 50120 }, { "epoch": 0.91617160509624, "grad_norm": 6.006683848846598, "learning_rate": 1.83203432992608e-07, "loss": 17.3485, "step": 50121 }, { "epoch": 0.9161898842926864, "grad_norm": 6.472312588631551, "learning_rate": 1.8312404694915963e-07, "loss": 17.0655, "step": 50122 }, { "epoch": 0.916208163489133, "grad_norm": 5.647313687527372, "learning_rate": 1.8304467778844482e-07, "loss": 16.9983, "step": 50123 }, { "epoch": 0.9162264426855795, "grad_norm": 6.405889285893703, "learning_rate": 1.829653255107394e-07, "loss": 17.3504, "step": 50124 }, { "epoch": 0.9162447218820261, "grad_norm": 5.628812629006913, "learning_rate": 1.8288599011632325e-07, "loss": 16.9946, "step": 50125 }, { "epoch": 0.9162630010784726, "grad_norm": 6.686049076048559, "learning_rate": 1.8280667160547382e-07, "loss": 17.8219, "step": 50126 }, { "epoch": 0.9162812802749191, "grad_norm": 5.985681315696863, "learning_rate": 1.8272736997846875e-07, "loss": 17.1791, "step": 50127 }, { "epoch": 0.9162995594713657, "grad_norm": 9.005833479904643, "learning_rate": 1.826480852355861e-07, "loss": 18.421, "step": 50128 }, { "epoch": 0.9163178386678121, "grad_norm": 5.755609385035628, "learning_rate": 1.8256881737710396e-07, "loss": 17.016, "step": 50129 }, { "epoch": 0.9163361178642587, "grad_norm": 6.97215863628247, "learning_rate": 1.8248956640329994e-07, "loss": 17.4505, "step": 50130 }, { "epoch": 0.9163543970607052, "grad_norm": 5.616297557643164, "learning_rate": 1.824103323144516e-07, "loss": 17.1602, "step": 50131 }, { "epoch": 0.9163726762571517, "grad_norm": 6.36250951412707, "learning_rate": 1.8233111511083756e-07, "loss": 17.5687, "step": 50132 }, { "epoch": 0.9163909554535983, "grad_norm": 5.8938184028244684, "learning_rate": 1.8225191479273429e-07, "loss": 17.0812, "step": 50133 }, { "epoch": 0.9164092346500448, "grad_norm": 6.549499621456429, "learning_rate": 1.8217273136042102e-07, "loss": 17.3192, "step": 50134 }, { "epoch": 0.9164275138464913, "grad_norm": 6.09828343346473, "learning_rate": 1.8209356481417307e-07, "loss": 17.1355, "step": 50135 }, { "epoch": 0.9164457930429378, "grad_norm": 6.725565619369333, "learning_rate": 1.8201441515427022e-07, "loss": 17.3186, "step": 50136 }, { "epoch": 0.9164640722393843, "grad_norm": 5.177740108756123, "learning_rate": 1.8193528238098834e-07, "loss": 17.0627, "step": 50137 }, { "epoch": 0.9164823514358309, "grad_norm": 6.031619057454711, "learning_rate": 1.8185616649460501e-07, "loss": 17.4441, "step": 50138 }, { "epoch": 0.9165006306322774, "grad_norm": 5.2035477440468325, "learning_rate": 1.817770674953978e-07, "loss": 17.0798, "step": 50139 }, { "epoch": 0.916518909828724, "grad_norm": 6.118313136171515, "learning_rate": 1.8169798538364368e-07, "loss": 17.4219, "step": 50140 }, { "epoch": 0.9165371890251705, "grad_norm": 5.757060006973935, "learning_rate": 1.8161892015962024e-07, "loss": 17.2428, "step": 50141 }, { "epoch": 0.9165554682216169, "grad_norm": 6.209335866390042, "learning_rate": 1.8153987182360444e-07, "loss": 17.1932, "step": 50142 }, { "epoch": 0.9165737474180635, "grad_norm": 6.195547518784263, "learning_rate": 1.814608403758733e-07, "loss": 17.3955, "step": 50143 }, { "epoch": 0.91659202661451, "grad_norm": 6.025463354079415, "learning_rate": 1.8138182581670327e-07, "loss": 17.4104, "step": 50144 }, { "epoch": 0.9166103058109566, "grad_norm": 6.979147502093883, "learning_rate": 1.8130282814637246e-07, "loss": 17.5971, "step": 50145 }, { "epoch": 0.9166285850074031, "grad_norm": 7.065350324002518, "learning_rate": 1.812238473651573e-07, "loss": 17.564, "step": 50146 }, { "epoch": 0.9166468642038496, "grad_norm": 7.909459848327349, "learning_rate": 1.8114488347333315e-07, "loss": 18.0969, "step": 50147 }, { "epoch": 0.9166651434002961, "grad_norm": 6.579987664844393, "learning_rate": 1.8106593647117864e-07, "loss": 17.6168, "step": 50148 }, { "epoch": 0.9166834225967426, "grad_norm": 5.334533618392892, "learning_rate": 1.8098700635897026e-07, "loss": 17.251, "step": 50149 }, { "epoch": 0.9167017017931892, "grad_norm": 5.238870820760903, "learning_rate": 1.8090809313698388e-07, "loss": 17.1099, "step": 50150 }, { "epoch": 0.9167199809896357, "grad_norm": 7.553850338534373, "learning_rate": 1.808291968054965e-07, "loss": 17.2257, "step": 50151 }, { "epoch": 0.9167382601860822, "grad_norm": 5.979240296940876, "learning_rate": 1.8075031736478454e-07, "loss": 17.3264, "step": 50152 }, { "epoch": 0.9167565393825288, "grad_norm": 6.899863929783236, "learning_rate": 1.8067145481512505e-07, "loss": 17.9527, "step": 50153 }, { "epoch": 0.9167748185789752, "grad_norm": 7.608500089686956, "learning_rate": 1.8059260915679333e-07, "loss": 17.839, "step": 50154 }, { "epoch": 0.9167930977754218, "grad_norm": 5.696967288986715, "learning_rate": 1.805137803900664e-07, "loss": 17.1968, "step": 50155 }, { "epoch": 0.9168113769718683, "grad_norm": 5.592645978687056, "learning_rate": 1.8043496851522068e-07, "loss": 16.8345, "step": 50156 }, { "epoch": 0.9168296561683148, "grad_norm": 6.537067153550381, "learning_rate": 1.8035617353253154e-07, "loss": 17.5884, "step": 50157 }, { "epoch": 0.9168479353647614, "grad_norm": 8.108498505680066, "learning_rate": 1.8027739544227595e-07, "loss": 17.6862, "step": 50158 }, { "epoch": 0.9168662145612079, "grad_norm": 7.999836087121812, "learning_rate": 1.8019863424473038e-07, "loss": 18.0987, "step": 50159 }, { "epoch": 0.9168844937576545, "grad_norm": 6.617400251793829, "learning_rate": 1.801198899401707e-07, "loss": 17.632, "step": 50160 }, { "epoch": 0.9169027729541009, "grad_norm": 7.127202839570754, "learning_rate": 1.8004116252887172e-07, "loss": 17.2993, "step": 50161 }, { "epoch": 0.9169210521505474, "grad_norm": 6.175265472321632, "learning_rate": 1.799624520111104e-07, "loss": 17.5747, "step": 50162 }, { "epoch": 0.916939331346994, "grad_norm": 5.773586661762769, "learning_rate": 1.798837583871621e-07, "loss": 17.1311, "step": 50163 }, { "epoch": 0.9169576105434405, "grad_norm": 5.536617790476175, "learning_rate": 1.798050816573038e-07, "loss": 17.0003, "step": 50164 }, { "epoch": 0.9169758897398871, "grad_norm": 6.3413831801433895, "learning_rate": 1.7972642182181031e-07, "loss": 17.0633, "step": 50165 }, { "epoch": 0.9169941689363336, "grad_norm": 5.396687726685475, "learning_rate": 1.7964777888095642e-07, "loss": 16.9116, "step": 50166 }, { "epoch": 0.91701244813278, "grad_norm": 5.849142848091174, "learning_rate": 1.795691528350191e-07, "loss": 17.1773, "step": 50167 }, { "epoch": 0.9170307273292266, "grad_norm": 8.558212505529443, "learning_rate": 1.794905436842742e-07, "loss": 18.4854, "step": 50168 }, { "epoch": 0.9170490065256731, "grad_norm": 7.197186485715986, "learning_rate": 1.7941195142899604e-07, "loss": 17.5796, "step": 50169 }, { "epoch": 0.9170672857221197, "grad_norm": 6.588514566676948, "learning_rate": 1.7933337606946154e-07, "loss": 17.2951, "step": 50170 }, { "epoch": 0.9170855649185662, "grad_norm": 7.453117509026948, "learning_rate": 1.7925481760594497e-07, "loss": 18.0701, "step": 50171 }, { "epoch": 0.9171038441150127, "grad_norm": 6.375762929868427, "learning_rate": 1.7917627603872112e-07, "loss": 17.1786, "step": 50172 }, { "epoch": 0.9171221233114593, "grad_norm": 5.5604526815156285, "learning_rate": 1.7909775136806695e-07, "loss": 17.295, "step": 50173 }, { "epoch": 0.9171404025079057, "grad_norm": 5.3376781472475, "learning_rate": 1.7901924359425672e-07, "loss": 17.156, "step": 50174 }, { "epoch": 0.9171586817043523, "grad_norm": 6.979237042579222, "learning_rate": 1.7894075271756517e-07, "loss": 17.8346, "step": 50175 }, { "epoch": 0.9171769609007988, "grad_norm": 5.207313436646377, "learning_rate": 1.7886227873826823e-07, "loss": 17.1891, "step": 50176 }, { "epoch": 0.9171952400972453, "grad_norm": 7.458833661151531, "learning_rate": 1.7878382165664065e-07, "loss": 17.9734, "step": 50177 }, { "epoch": 0.9172135192936919, "grad_norm": 6.061584990922999, "learning_rate": 1.7870538147295725e-07, "loss": 17.2241, "step": 50178 }, { "epoch": 0.9172317984901384, "grad_norm": 6.999325867973257, "learning_rate": 1.7862695818749387e-07, "loss": 17.5249, "step": 50179 }, { "epoch": 0.917250077686585, "grad_norm": 7.775079990176476, "learning_rate": 1.7854855180052367e-07, "loss": 17.6681, "step": 50180 }, { "epoch": 0.9172683568830314, "grad_norm": 5.801882772963384, "learning_rate": 1.7847016231232306e-07, "loss": 17.2066, "step": 50181 }, { "epoch": 0.9172866360794779, "grad_norm": 6.401443015073498, "learning_rate": 1.7839178972316574e-07, "loss": 17.5, "step": 50182 }, { "epoch": 0.9173049152759245, "grad_norm": 5.809450996326564, "learning_rate": 1.7831343403332702e-07, "loss": 17.3758, "step": 50183 }, { "epoch": 0.917323194472371, "grad_norm": 6.402975033968109, "learning_rate": 1.7823509524308113e-07, "loss": 17.3552, "step": 50184 }, { "epoch": 0.9173414736688175, "grad_norm": 7.249163189078952, "learning_rate": 1.7815677335270287e-07, "loss": 17.4257, "step": 50185 }, { "epoch": 0.917359752865264, "grad_norm": 5.6741219314878455, "learning_rate": 1.7807846836246646e-07, "loss": 17.131, "step": 50186 }, { "epoch": 0.9173780320617105, "grad_norm": 5.58428320055382, "learning_rate": 1.780001802726472e-07, "loss": 17.2046, "step": 50187 }, { "epoch": 0.9173963112581571, "grad_norm": 5.90201598322727, "learning_rate": 1.779219090835188e-07, "loss": 17.2225, "step": 50188 }, { "epoch": 0.9174145904546036, "grad_norm": 6.233887254690519, "learning_rate": 1.7784365479535491e-07, "loss": 17.1581, "step": 50189 }, { "epoch": 0.9174328696510501, "grad_norm": 5.409573471409679, "learning_rate": 1.7776541740843146e-07, "loss": 16.9301, "step": 50190 }, { "epoch": 0.9174511488474967, "grad_norm": 6.296360788294204, "learning_rate": 1.776871969230215e-07, "loss": 17.364, "step": 50191 }, { "epoch": 0.9174694280439432, "grad_norm": 6.276211181369958, "learning_rate": 1.7760899333939873e-07, "loss": 17.414, "step": 50192 }, { "epoch": 0.9174877072403897, "grad_norm": 6.737789810548365, "learning_rate": 1.7753080665783907e-07, "loss": 17.4648, "step": 50193 }, { "epoch": 0.9175059864368362, "grad_norm": 5.805901870597567, "learning_rate": 1.7745263687861446e-07, "loss": 17.2346, "step": 50194 }, { "epoch": 0.9175242656332827, "grad_norm": 6.519821691680329, "learning_rate": 1.7737448400200031e-07, "loss": 17.4665, "step": 50195 }, { "epoch": 0.9175425448297293, "grad_norm": 7.821968399615145, "learning_rate": 1.7729634802827022e-07, "loss": 17.6319, "step": 50196 }, { "epoch": 0.9175608240261758, "grad_norm": 5.2314400279102005, "learning_rate": 1.7721822895769735e-07, "loss": 17.1959, "step": 50197 }, { "epoch": 0.9175791032226224, "grad_norm": 9.564318888783516, "learning_rate": 1.7714012679055704e-07, "loss": 17.4752, "step": 50198 }, { "epoch": 0.9175973824190689, "grad_norm": 7.165933185540298, "learning_rate": 1.770620415271218e-07, "loss": 17.2381, "step": 50199 }, { "epoch": 0.9176156616155153, "grad_norm": 6.067540220683096, "learning_rate": 1.7698397316766535e-07, "loss": 17.3219, "step": 50200 }, { "epoch": 0.9176339408119619, "grad_norm": 5.618895011086914, "learning_rate": 1.769059217124619e-07, "loss": 17.2458, "step": 50201 }, { "epoch": 0.9176522200084084, "grad_norm": 6.747007869876375, "learning_rate": 1.76827887161784e-07, "loss": 17.6493, "step": 50202 }, { "epoch": 0.917670499204855, "grad_norm": 5.956846776864514, "learning_rate": 1.7674986951590645e-07, "loss": 17.3165, "step": 50203 }, { "epoch": 0.9176887784013015, "grad_norm": 6.17146987886659, "learning_rate": 1.7667186877510122e-07, "loss": 17.0806, "step": 50204 }, { "epoch": 0.917707057597748, "grad_norm": 6.013661192344169, "learning_rate": 1.765938849396437e-07, "loss": 17.3999, "step": 50205 }, { "epoch": 0.9177253367941945, "grad_norm": 5.386049177395751, "learning_rate": 1.7651591800980526e-07, "loss": 17.3863, "step": 50206 }, { "epoch": 0.917743615990641, "grad_norm": 6.573648710959528, "learning_rate": 1.764379679858602e-07, "loss": 17.5018, "step": 50207 }, { "epoch": 0.9177618951870876, "grad_norm": 8.16112994010274, "learning_rate": 1.7636003486808218e-07, "loss": 18.5223, "step": 50208 }, { "epoch": 0.9177801743835341, "grad_norm": 6.219481907979758, "learning_rate": 1.762821186567426e-07, "loss": 17.4573, "step": 50209 }, { "epoch": 0.9177984535799806, "grad_norm": 6.682911422270158, "learning_rate": 1.762042193521163e-07, "loss": 17.5172, "step": 50210 }, { "epoch": 0.9178167327764272, "grad_norm": 5.169324697267469, "learning_rate": 1.7612633695447468e-07, "loss": 16.9619, "step": 50211 }, { "epoch": 0.9178350119728736, "grad_norm": 6.126919297573059, "learning_rate": 1.7604847146409198e-07, "loss": 17.2965, "step": 50212 }, { "epoch": 0.9178532911693202, "grad_norm": 6.246606547696776, "learning_rate": 1.7597062288124134e-07, "loss": 17.4938, "step": 50213 }, { "epoch": 0.9178715703657667, "grad_norm": 6.961394571946749, "learning_rate": 1.7589279120619475e-07, "loss": 17.7385, "step": 50214 }, { "epoch": 0.9178898495622132, "grad_norm": 5.289560492462992, "learning_rate": 1.758149764392253e-07, "loss": 17.1483, "step": 50215 }, { "epoch": 0.9179081287586598, "grad_norm": 5.141028719416489, "learning_rate": 1.7573717858060614e-07, "loss": 17.0196, "step": 50216 }, { "epoch": 0.9179264079551063, "grad_norm": 6.429086782025779, "learning_rate": 1.7565939763060924e-07, "loss": 17.4227, "step": 50217 }, { "epoch": 0.9179446871515529, "grad_norm": 5.650123868193521, "learning_rate": 1.7558163358950776e-07, "loss": 17.0467, "step": 50218 }, { "epoch": 0.9179629663479993, "grad_norm": 6.960202807126538, "learning_rate": 1.7550388645757365e-07, "loss": 17.4599, "step": 50219 }, { "epoch": 0.9179812455444458, "grad_norm": 7.047258431127376, "learning_rate": 1.7542615623507952e-07, "loss": 17.4274, "step": 50220 }, { "epoch": 0.9179995247408924, "grad_norm": 9.679598289267036, "learning_rate": 1.7534844292229847e-07, "loss": 17.7692, "step": 50221 }, { "epoch": 0.9180178039373389, "grad_norm": 6.366410686573056, "learning_rate": 1.752707465195025e-07, "loss": 17.4043, "step": 50222 }, { "epoch": 0.9180360831337855, "grad_norm": 4.461784784820641, "learning_rate": 1.751930670269636e-07, "loss": 16.8516, "step": 50223 }, { "epoch": 0.918054362330232, "grad_norm": 7.194491936165198, "learning_rate": 1.7511540444495435e-07, "loss": 17.3294, "step": 50224 }, { "epoch": 0.9180726415266784, "grad_norm": 6.031209779813628, "learning_rate": 1.7503775877374673e-07, "loss": 17.2735, "step": 50225 }, { "epoch": 0.918090920723125, "grad_norm": 6.002808156707299, "learning_rate": 1.749601300136139e-07, "loss": 16.9911, "step": 50226 }, { "epoch": 0.9181091999195715, "grad_norm": 5.340658163890381, "learning_rate": 1.748825181648267e-07, "loss": 17.2033, "step": 50227 }, { "epoch": 0.9181274791160181, "grad_norm": 6.3438594106029615, "learning_rate": 1.748049232276572e-07, "loss": 17.21, "step": 50228 }, { "epoch": 0.9181457583124646, "grad_norm": 6.586853502156636, "learning_rate": 1.7472734520237844e-07, "loss": 17.2548, "step": 50229 }, { "epoch": 0.9181640375089111, "grad_norm": 5.1538246167676665, "learning_rate": 1.746497840892608e-07, "loss": 16.983, "step": 50230 }, { "epoch": 0.9181823167053577, "grad_norm": 5.277865235296507, "learning_rate": 1.7457223988857685e-07, "loss": 16.8342, "step": 50231 }, { "epoch": 0.9182005959018041, "grad_norm": 6.740797257539606, "learning_rate": 1.7449471260059969e-07, "loss": 17.4006, "step": 50232 }, { "epoch": 0.9182188750982507, "grad_norm": 6.247219506846371, "learning_rate": 1.744172022255991e-07, "loss": 17.3502, "step": 50233 }, { "epoch": 0.9182371542946972, "grad_norm": 5.575607323669197, "learning_rate": 1.743397087638471e-07, "loss": 17.1397, "step": 50234 }, { "epoch": 0.9182554334911437, "grad_norm": 7.689718760846245, "learning_rate": 1.7426223221561623e-07, "loss": 17.3836, "step": 50235 }, { "epoch": 0.9182737126875903, "grad_norm": 5.848877884200374, "learning_rate": 1.7418477258117793e-07, "loss": 17.1651, "step": 50236 }, { "epoch": 0.9182919918840368, "grad_norm": 5.695931122281007, "learning_rate": 1.7410732986080204e-07, "loss": 17.1558, "step": 50237 }, { "epoch": 0.9183102710804834, "grad_norm": 8.974024074500173, "learning_rate": 1.7402990405476216e-07, "loss": 17.9203, "step": 50238 }, { "epoch": 0.9183285502769298, "grad_norm": 5.379896440773064, "learning_rate": 1.7395249516332758e-07, "loss": 17.0977, "step": 50239 }, { "epoch": 0.9183468294733763, "grad_norm": 4.939774019942018, "learning_rate": 1.7387510318677136e-07, "loss": 16.8444, "step": 50240 }, { "epoch": 0.9183651086698229, "grad_norm": 7.54612817287757, "learning_rate": 1.7379772812536445e-07, "loss": 17.6977, "step": 50241 }, { "epoch": 0.9183833878662694, "grad_norm": 5.190736454726238, "learning_rate": 1.7372036997937715e-07, "loss": 17.0148, "step": 50242 }, { "epoch": 0.918401667062716, "grad_norm": 8.989232523447676, "learning_rate": 1.7364302874908146e-07, "loss": 17.8557, "step": 50243 }, { "epoch": 0.9184199462591625, "grad_norm": 6.250357105501631, "learning_rate": 1.7356570443474828e-07, "loss": 17.3982, "step": 50244 }, { "epoch": 0.9184382254556089, "grad_norm": 5.359914963731649, "learning_rate": 1.73488397036648e-07, "loss": 16.9325, "step": 50245 }, { "epoch": 0.9184565046520555, "grad_norm": 5.636937529383613, "learning_rate": 1.734111065550531e-07, "loss": 16.7492, "step": 50246 }, { "epoch": 0.918474783848502, "grad_norm": 7.1305015169706145, "learning_rate": 1.7333383299023287e-07, "loss": 17.5403, "step": 50247 }, { "epoch": 0.9184930630449486, "grad_norm": 5.677183497164375, "learning_rate": 1.7325657634245817e-07, "loss": 17.2403, "step": 50248 }, { "epoch": 0.9185113422413951, "grad_norm": 8.00618415072637, "learning_rate": 1.731793366120005e-07, "loss": 17.7356, "step": 50249 }, { "epoch": 0.9185296214378416, "grad_norm": 5.304669706162567, "learning_rate": 1.7310211379913066e-07, "loss": 17.0837, "step": 50250 }, { "epoch": 0.9185479006342882, "grad_norm": 5.551425600649073, "learning_rate": 1.7302490790411853e-07, "loss": 16.9785, "step": 50251 }, { "epoch": 0.9185661798307346, "grad_norm": 5.708964622411094, "learning_rate": 1.7294771892723604e-07, "loss": 17.2585, "step": 50252 }, { "epoch": 0.9185844590271811, "grad_norm": 6.292217435197287, "learning_rate": 1.7287054686875305e-07, "loss": 17.3728, "step": 50253 }, { "epoch": 0.9186027382236277, "grad_norm": 5.310556785695829, "learning_rate": 1.7279339172893926e-07, "loss": 16.8039, "step": 50254 }, { "epoch": 0.9186210174200742, "grad_norm": 7.082097108561616, "learning_rate": 1.727162535080662e-07, "loss": 17.4322, "step": 50255 }, { "epoch": 0.9186392966165208, "grad_norm": 6.707397166120887, "learning_rate": 1.7263913220640305e-07, "loss": 17.6374, "step": 50256 }, { "epoch": 0.9186575758129673, "grad_norm": 7.497646332105691, "learning_rate": 1.7256202782422126e-07, "loss": 17.4975, "step": 50257 }, { "epoch": 0.9186758550094137, "grad_norm": 6.647023787689962, "learning_rate": 1.7248494036179063e-07, "loss": 17.4816, "step": 50258 }, { "epoch": 0.9186941342058603, "grad_norm": 5.798256806605163, "learning_rate": 1.7240786981938097e-07, "loss": 17.028, "step": 50259 }, { "epoch": 0.9187124134023068, "grad_norm": 6.463950785444965, "learning_rate": 1.7233081619726365e-07, "loss": 17.4804, "step": 50260 }, { "epoch": 0.9187306925987534, "grad_norm": 5.71064999647215, "learning_rate": 1.7225377949570797e-07, "loss": 17.2281, "step": 50261 }, { "epoch": 0.9187489717951999, "grad_norm": 6.7525919653415665, "learning_rate": 1.7217675971498315e-07, "loss": 17.5783, "step": 50262 }, { "epoch": 0.9187672509916464, "grad_norm": 6.413770206289173, "learning_rate": 1.7209975685536008e-07, "loss": 17.5606, "step": 50263 }, { "epoch": 0.918785530188093, "grad_norm": 7.412181206083379, "learning_rate": 1.7202277091710906e-07, "loss": 17.5738, "step": 50264 }, { "epoch": 0.9188038093845394, "grad_norm": 7.158685964543296, "learning_rate": 1.7194580190049827e-07, "loss": 17.682, "step": 50265 }, { "epoch": 0.918822088580986, "grad_norm": 5.066267435535476, "learning_rate": 1.718688498057991e-07, "loss": 17.0694, "step": 50266 }, { "epoch": 0.9188403677774325, "grad_norm": 7.150467538068309, "learning_rate": 1.7179191463328028e-07, "loss": 17.4616, "step": 50267 }, { "epoch": 0.918858646973879, "grad_norm": 6.088767217512855, "learning_rate": 1.7171499638321153e-07, "loss": 16.996, "step": 50268 }, { "epoch": 0.9188769261703256, "grad_norm": 6.654574397508844, "learning_rate": 1.7163809505586327e-07, "loss": 17.5842, "step": 50269 }, { "epoch": 0.918895205366772, "grad_norm": 6.715498075628429, "learning_rate": 1.7156121065150466e-07, "loss": 17.6598, "step": 50270 }, { "epoch": 0.9189134845632186, "grad_norm": 5.964641842554421, "learning_rate": 1.7148434317040442e-07, "loss": 17.1217, "step": 50271 }, { "epoch": 0.9189317637596651, "grad_norm": 6.344825578184806, "learning_rate": 1.714074926128334e-07, "loss": 17.4897, "step": 50272 }, { "epoch": 0.9189500429561116, "grad_norm": 4.852176204046164, "learning_rate": 1.713306589790592e-07, "loss": 16.801, "step": 50273 }, { "epoch": 0.9189683221525582, "grad_norm": 6.690309716824905, "learning_rate": 1.712538422693527e-07, "loss": 17.5042, "step": 50274 }, { "epoch": 0.9189866013490047, "grad_norm": 6.06010952267056, "learning_rate": 1.7117704248398204e-07, "loss": 16.9996, "step": 50275 }, { "epoch": 0.9190048805454513, "grad_norm": 8.068663895130609, "learning_rate": 1.7110025962321696e-07, "loss": 18.0694, "step": 50276 }, { "epoch": 0.9190231597418977, "grad_norm": 5.558270400151789, "learning_rate": 1.7102349368732618e-07, "loss": 17.0467, "step": 50277 }, { "epoch": 0.9190414389383442, "grad_norm": 6.675306680548369, "learning_rate": 1.7094674467657945e-07, "loss": 17.1497, "step": 50278 }, { "epoch": 0.9190597181347908, "grad_norm": 5.279244101524821, "learning_rate": 1.7087001259124492e-07, "loss": 17.0197, "step": 50279 }, { "epoch": 0.9190779973312373, "grad_norm": 5.383562493504457, "learning_rate": 1.7079329743159233e-07, "loss": 16.9049, "step": 50280 }, { "epoch": 0.9190962765276839, "grad_norm": 6.104867297624165, "learning_rate": 1.707165991978904e-07, "loss": 17.2138, "step": 50281 }, { "epoch": 0.9191145557241304, "grad_norm": 5.65155614659033, "learning_rate": 1.7063991789040724e-07, "loss": 17.1171, "step": 50282 }, { "epoch": 0.9191328349205768, "grad_norm": 5.6375164477798165, "learning_rate": 1.705632535094126e-07, "loss": 17.1368, "step": 50283 }, { "epoch": 0.9191511141170234, "grad_norm": 6.424329265277734, "learning_rate": 1.704866060551741e-07, "loss": 17.6343, "step": 50284 }, { "epoch": 0.9191693933134699, "grad_norm": 6.119080164473372, "learning_rate": 1.704099755279609e-07, "loss": 17.2062, "step": 50285 }, { "epoch": 0.9191876725099165, "grad_norm": 6.134947417238296, "learning_rate": 1.7033336192804173e-07, "loss": 17.2026, "step": 50286 }, { "epoch": 0.919205951706363, "grad_norm": 7.387840717046263, "learning_rate": 1.7025676525568524e-07, "loss": 17.8304, "step": 50287 }, { "epoch": 0.9192242309028095, "grad_norm": 5.424682086281553, "learning_rate": 1.7018018551115954e-07, "loss": 17.0021, "step": 50288 }, { "epoch": 0.9192425100992561, "grad_norm": 6.111892271412329, "learning_rate": 1.701036226947339e-07, "loss": 17.0202, "step": 50289 }, { "epoch": 0.9192607892957025, "grad_norm": 5.581246829176128, "learning_rate": 1.7002707680667474e-07, "loss": 17.1541, "step": 50290 }, { "epoch": 0.9192790684921491, "grad_norm": 5.900007046625004, "learning_rate": 1.699505478472524e-07, "loss": 17.1375, "step": 50291 }, { "epoch": 0.9192973476885956, "grad_norm": 6.856850456415368, "learning_rate": 1.6987403581673444e-07, "loss": 17.2682, "step": 50292 }, { "epoch": 0.9193156268850421, "grad_norm": 4.785666161491398, "learning_rate": 1.697975407153879e-07, "loss": 16.8013, "step": 50293 }, { "epoch": 0.9193339060814887, "grad_norm": 6.105391870194643, "learning_rate": 1.6972106254348196e-07, "loss": 17.4348, "step": 50294 }, { "epoch": 0.9193521852779352, "grad_norm": 6.884128910116216, "learning_rate": 1.6964460130128535e-07, "loss": 17.3973, "step": 50295 }, { "epoch": 0.9193704644743818, "grad_norm": 6.65649861293777, "learning_rate": 1.6956815698906447e-07, "loss": 17.6747, "step": 50296 }, { "epoch": 0.9193887436708282, "grad_norm": 5.149322187350867, "learning_rate": 1.694917296070886e-07, "loss": 16.9123, "step": 50297 }, { "epoch": 0.9194070228672747, "grad_norm": 8.229458239467403, "learning_rate": 1.6941531915562526e-07, "loss": 17.9394, "step": 50298 }, { "epoch": 0.9194253020637213, "grad_norm": 6.1445245087787965, "learning_rate": 1.6933892563494148e-07, "loss": 17.4685, "step": 50299 }, { "epoch": 0.9194435812601678, "grad_norm": 6.896006245299525, "learning_rate": 1.6926254904530592e-07, "loss": 17.5044, "step": 50300 }, { "epoch": 0.9194618604566144, "grad_norm": 7.875718502881331, "learning_rate": 1.691861893869856e-07, "loss": 17.4477, "step": 50301 }, { "epoch": 0.9194801396530609, "grad_norm": 5.892151208238568, "learning_rate": 1.6910984666024866e-07, "loss": 17.1811, "step": 50302 }, { "epoch": 0.9194984188495073, "grad_norm": 6.731426528630718, "learning_rate": 1.6903352086536206e-07, "loss": 17.1248, "step": 50303 }, { "epoch": 0.9195166980459539, "grad_norm": 5.7694080806034895, "learning_rate": 1.6895721200259396e-07, "loss": 17.2413, "step": 50304 }, { "epoch": 0.9195349772424004, "grad_norm": 8.536496624892424, "learning_rate": 1.688809200722119e-07, "loss": 17.5548, "step": 50305 }, { "epoch": 0.919553256438847, "grad_norm": 5.2609851283466345, "learning_rate": 1.6880464507448346e-07, "loss": 16.9278, "step": 50306 }, { "epoch": 0.9195715356352935, "grad_norm": 8.32803666100418, "learning_rate": 1.6872838700967452e-07, "loss": 17.6297, "step": 50307 }, { "epoch": 0.91958981483174, "grad_norm": 7.406827548951736, "learning_rate": 1.686521458780538e-07, "loss": 18.3675, "step": 50308 }, { "epoch": 0.9196080940281866, "grad_norm": 5.314095304500133, "learning_rate": 1.6857592167988824e-07, "loss": 16.8024, "step": 50309 }, { "epoch": 0.919626373224633, "grad_norm": 7.909731240019741, "learning_rate": 1.6849971441544433e-07, "loss": 17.6639, "step": 50310 }, { "epoch": 0.9196446524210796, "grad_norm": 7.202163949292013, "learning_rate": 1.684235240849902e-07, "loss": 17.7721, "step": 50311 }, { "epoch": 0.9196629316175261, "grad_norm": 6.272001274813915, "learning_rate": 1.683473506887917e-07, "loss": 17.3297, "step": 50312 }, { "epoch": 0.9196812108139726, "grad_norm": 5.568013995296426, "learning_rate": 1.68271194227117e-07, "loss": 16.8437, "step": 50313 }, { "epoch": 0.9196994900104192, "grad_norm": 5.485022875860964, "learning_rate": 1.6819505470023257e-07, "loss": 17.26, "step": 50314 }, { "epoch": 0.9197177692068657, "grad_norm": 6.859094508006776, "learning_rate": 1.6811893210840536e-07, "loss": 17.8084, "step": 50315 }, { "epoch": 0.9197360484033122, "grad_norm": 9.51751968719022, "learning_rate": 1.6804282645190128e-07, "loss": 18.0445, "step": 50316 }, { "epoch": 0.9197543275997587, "grad_norm": 5.991761281312428, "learning_rate": 1.6796673773098793e-07, "loss": 17.3946, "step": 50317 }, { "epoch": 0.9197726067962052, "grad_norm": 6.467743492880269, "learning_rate": 1.6789066594593173e-07, "loss": 17.3247, "step": 50318 }, { "epoch": 0.9197908859926518, "grad_norm": 5.999327697365238, "learning_rate": 1.6781461109700027e-07, "loss": 17.4371, "step": 50319 }, { "epoch": 0.9198091651890983, "grad_norm": 6.791357248428721, "learning_rate": 1.6773857318445884e-07, "loss": 17.617, "step": 50320 }, { "epoch": 0.9198274443855448, "grad_norm": 7.942373480338683, "learning_rate": 1.6766255220857398e-07, "loss": 17.7781, "step": 50321 }, { "epoch": 0.9198457235819913, "grad_norm": 5.7390074069849994, "learning_rate": 1.675865481696126e-07, "loss": 17.0867, "step": 50322 }, { "epoch": 0.9198640027784378, "grad_norm": 5.463238927830102, "learning_rate": 1.675105610678418e-07, "loss": 17.2427, "step": 50323 }, { "epoch": 0.9198822819748844, "grad_norm": 4.832254045141874, "learning_rate": 1.6743459090352632e-07, "loss": 16.9507, "step": 50324 }, { "epoch": 0.9199005611713309, "grad_norm": 6.444421817301866, "learning_rate": 1.673586376769337e-07, "loss": 17.3078, "step": 50325 }, { "epoch": 0.9199188403677774, "grad_norm": 5.608465839242058, "learning_rate": 1.672827013883299e-07, "loss": 17.3501, "step": 50326 }, { "epoch": 0.919937119564224, "grad_norm": 7.218783411417325, "learning_rate": 1.6720678203798024e-07, "loss": 17.6693, "step": 50327 }, { "epoch": 0.9199553987606705, "grad_norm": 7.5023175431172655, "learning_rate": 1.6713087962615227e-07, "loss": 17.7865, "step": 50328 }, { "epoch": 0.919973677957117, "grad_norm": 7.6282512449030655, "learning_rate": 1.6705499415311022e-07, "loss": 17.4565, "step": 50329 }, { "epoch": 0.9199919571535635, "grad_norm": 5.5282646140556295, "learning_rate": 1.6697912561912167e-07, "loss": 17.1896, "step": 50330 }, { "epoch": 0.92001023635001, "grad_norm": 6.566897236924669, "learning_rate": 1.669032740244514e-07, "loss": 17.331, "step": 50331 }, { "epoch": 0.9200285155464566, "grad_norm": 6.331179259226822, "learning_rate": 1.6682743936936586e-07, "loss": 17.5162, "step": 50332 }, { "epoch": 0.9200467947429031, "grad_norm": 5.94913164703975, "learning_rate": 1.6675162165413095e-07, "loss": 17.3775, "step": 50333 }, { "epoch": 0.9200650739393497, "grad_norm": 6.485489592669349, "learning_rate": 1.6667582087901258e-07, "loss": 17.1415, "step": 50334 }, { "epoch": 0.9200833531357961, "grad_norm": 4.303240340724774, "learning_rate": 1.6660003704427552e-07, "loss": 16.6202, "step": 50335 }, { "epoch": 0.9201016323322426, "grad_norm": 5.651872528239241, "learning_rate": 1.6652427015018624e-07, "loss": 17.2701, "step": 50336 }, { "epoch": 0.9201199115286892, "grad_norm": 6.081826569802484, "learning_rate": 1.6644852019701009e-07, "loss": 17.6104, "step": 50337 }, { "epoch": 0.9201381907251357, "grad_norm": 6.814726860695306, "learning_rate": 1.6637278718501182e-07, "loss": 17.8523, "step": 50338 }, { "epoch": 0.9201564699215823, "grad_norm": 5.602629651988817, "learning_rate": 1.6629707111445735e-07, "loss": 16.8176, "step": 50339 }, { "epoch": 0.9201747491180288, "grad_norm": 6.227967929055022, "learning_rate": 1.6622137198561316e-07, "loss": 17.3331, "step": 50340 }, { "epoch": 0.9201930283144752, "grad_norm": 6.928750306056506, "learning_rate": 1.6614568979874235e-07, "loss": 17.2018, "step": 50341 }, { "epoch": 0.9202113075109218, "grad_norm": 8.448779967629097, "learning_rate": 1.6607002455411248e-07, "loss": 17.7374, "step": 50342 }, { "epoch": 0.9202295867073683, "grad_norm": 7.622328652277372, "learning_rate": 1.659943762519878e-07, "loss": 17.4995, "step": 50343 }, { "epoch": 0.9202478659038149, "grad_norm": 5.672220897071002, "learning_rate": 1.6591874489263248e-07, "loss": 16.9234, "step": 50344 }, { "epoch": 0.9202661451002614, "grad_norm": 6.897455995187391, "learning_rate": 1.6584313047631308e-07, "loss": 17.692, "step": 50345 }, { "epoch": 0.9202844242967079, "grad_norm": 5.80620055507775, "learning_rate": 1.657675330032943e-07, "loss": 17.0893, "step": 50346 }, { "epoch": 0.9203027034931545, "grad_norm": 7.221953822797352, "learning_rate": 1.656919524738404e-07, "loss": 17.0654, "step": 50347 }, { "epoch": 0.9203209826896009, "grad_norm": 6.957770634319061, "learning_rate": 1.656163888882162e-07, "loss": 17.5049, "step": 50348 }, { "epoch": 0.9203392618860475, "grad_norm": 5.492254815554873, "learning_rate": 1.6554084224668699e-07, "loss": 17.4093, "step": 50349 }, { "epoch": 0.920357541082494, "grad_norm": 5.894321987559275, "learning_rate": 1.6546531254951869e-07, "loss": 17.194, "step": 50350 }, { "epoch": 0.9203758202789405, "grad_norm": 6.682825125164934, "learning_rate": 1.6538979979697445e-07, "loss": 17.6064, "step": 50351 }, { "epoch": 0.9203940994753871, "grad_norm": 5.77729463976098, "learning_rate": 1.6531430398931903e-07, "loss": 16.9785, "step": 50352 }, { "epoch": 0.9204123786718336, "grad_norm": 6.941695026385093, "learning_rate": 1.6523882512681832e-07, "loss": 17.6823, "step": 50353 }, { "epoch": 0.9204306578682802, "grad_norm": 6.995343499321582, "learning_rate": 1.6516336320973547e-07, "loss": 17.3544, "step": 50354 }, { "epoch": 0.9204489370647266, "grad_norm": 5.835134378004522, "learning_rate": 1.6508791823833524e-07, "loss": 17.3149, "step": 50355 }, { "epoch": 0.9204672162611731, "grad_norm": 7.369604280185024, "learning_rate": 1.6501249021288302e-07, "loss": 17.4639, "step": 50356 }, { "epoch": 0.9204854954576197, "grad_norm": 5.413021412081329, "learning_rate": 1.6493707913364188e-07, "loss": 16.9443, "step": 50357 }, { "epoch": 0.9205037746540662, "grad_norm": 7.74497421115814, "learning_rate": 1.6486168500087663e-07, "loss": 17.6104, "step": 50358 }, { "epoch": 0.9205220538505128, "grad_norm": 6.241712837752547, "learning_rate": 1.6478630781485205e-07, "loss": 17.3727, "step": 50359 }, { "epoch": 0.9205403330469593, "grad_norm": 6.915517341445884, "learning_rate": 1.6471094757583184e-07, "loss": 17.2919, "step": 50360 }, { "epoch": 0.9205586122434057, "grad_norm": 5.738250426496475, "learning_rate": 1.6463560428407964e-07, "loss": 16.9402, "step": 50361 }, { "epoch": 0.9205768914398523, "grad_norm": 5.5318087673195375, "learning_rate": 1.6456027793986028e-07, "loss": 17.3599, "step": 50362 }, { "epoch": 0.9205951706362988, "grad_norm": 7.2892665093816635, "learning_rate": 1.6448496854343744e-07, "loss": 17.6761, "step": 50363 }, { "epoch": 0.9206134498327454, "grad_norm": 5.847202079957388, "learning_rate": 1.6440967609507587e-07, "loss": 17.0814, "step": 50364 }, { "epoch": 0.9206317290291919, "grad_norm": 7.231773372891865, "learning_rate": 1.6433440059503815e-07, "loss": 17.8211, "step": 50365 }, { "epoch": 0.9206500082256384, "grad_norm": 6.152643922205309, "learning_rate": 1.6425914204358906e-07, "loss": 17.1922, "step": 50366 }, { "epoch": 0.920668287422085, "grad_norm": 5.978097293014653, "learning_rate": 1.641839004409912e-07, "loss": 17.2734, "step": 50367 }, { "epoch": 0.9206865666185314, "grad_norm": 6.425926850913683, "learning_rate": 1.6410867578751045e-07, "loss": 16.9182, "step": 50368 }, { "epoch": 0.920704845814978, "grad_norm": 6.3240151475072786, "learning_rate": 1.640334680834077e-07, "loss": 17.0504, "step": 50369 }, { "epoch": 0.9207231250114245, "grad_norm": 5.746159987622769, "learning_rate": 1.6395827732894942e-07, "loss": 16.9151, "step": 50370 }, { "epoch": 0.920741404207871, "grad_norm": 5.800798580678383, "learning_rate": 1.6388310352439708e-07, "loss": 16.9948, "step": 50371 }, { "epoch": 0.9207596834043176, "grad_norm": 6.60027710075702, "learning_rate": 1.6380794667001486e-07, "loss": 17.3491, "step": 50372 }, { "epoch": 0.920777962600764, "grad_norm": 10.492493707603094, "learning_rate": 1.6373280676606595e-07, "loss": 17.3201, "step": 50373 }, { "epoch": 0.9207962417972106, "grad_norm": 5.665994830866044, "learning_rate": 1.6365768381281455e-07, "loss": 17.339, "step": 50374 }, { "epoch": 0.9208145209936571, "grad_norm": 6.41850228856151, "learning_rate": 1.6358257781052267e-07, "loss": 17.3361, "step": 50375 }, { "epoch": 0.9208328001901036, "grad_norm": 6.496431192601877, "learning_rate": 1.6350748875945343e-07, "loss": 17.1542, "step": 50376 }, { "epoch": 0.9208510793865502, "grad_norm": 5.4235186589431486, "learning_rate": 1.634324166598722e-07, "loss": 16.9337, "step": 50377 }, { "epoch": 0.9208693585829967, "grad_norm": 5.651073834163547, "learning_rate": 1.6335736151203984e-07, "loss": 17.0217, "step": 50378 }, { "epoch": 0.9208876377794433, "grad_norm": 8.302398095151913, "learning_rate": 1.632823233162206e-07, "loss": 18.5179, "step": 50379 }, { "epoch": 0.9209059169758897, "grad_norm": 7.146215659291804, "learning_rate": 1.6320730207267654e-07, "loss": 17.701, "step": 50380 }, { "epoch": 0.9209241961723362, "grad_norm": 6.687078003604946, "learning_rate": 1.6313229778167184e-07, "loss": 17.5371, "step": 50381 }, { "epoch": 0.9209424753687828, "grad_norm": 6.996481809101028, "learning_rate": 1.630573104434685e-07, "loss": 17.0849, "step": 50382 }, { "epoch": 0.9209607545652293, "grad_norm": 6.041546500840395, "learning_rate": 1.6298234005832914e-07, "loss": 17.3786, "step": 50383 }, { "epoch": 0.9209790337616759, "grad_norm": 7.669003285896519, "learning_rate": 1.6290738662651738e-07, "loss": 17.8606, "step": 50384 }, { "epoch": 0.9209973129581224, "grad_norm": 6.242663343129463, "learning_rate": 1.628324501482953e-07, "loss": 17.4848, "step": 50385 }, { "epoch": 0.9210155921545689, "grad_norm": 5.421687609715754, "learning_rate": 1.6275753062392542e-07, "loss": 16.8676, "step": 50386 }, { "epoch": 0.9210338713510154, "grad_norm": 7.320909991120564, "learning_rate": 1.626826280536714e-07, "loss": 17.5654, "step": 50387 }, { "epoch": 0.9210521505474619, "grad_norm": 5.59863560303559, "learning_rate": 1.6260774243779476e-07, "loss": 17.2561, "step": 50388 }, { "epoch": 0.9210704297439084, "grad_norm": 6.005147993994143, "learning_rate": 1.62532873776558e-07, "loss": 17.2469, "step": 50389 }, { "epoch": 0.921088708940355, "grad_norm": 5.686160831256157, "learning_rate": 1.624580220702243e-07, "loss": 17.2332, "step": 50390 }, { "epoch": 0.9211069881368015, "grad_norm": 5.37750309228116, "learning_rate": 1.6238318731905513e-07, "loss": 16.9883, "step": 50391 }, { "epoch": 0.9211252673332481, "grad_norm": 6.279073016694652, "learning_rate": 1.623083695233124e-07, "loss": 17.3631, "step": 50392 }, { "epoch": 0.9211435465296945, "grad_norm": 8.066523527272633, "learning_rate": 1.622335686832599e-07, "loss": 17.8554, "step": 50393 }, { "epoch": 0.921161825726141, "grad_norm": 6.945101557571041, "learning_rate": 1.6215878479915849e-07, "loss": 17.8357, "step": 50394 }, { "epoch": 0.9211801049225876, "grad_norm": 6.845445003226564, "learning_rate": 1.620840178712707e-07, "loss": 17.4801, "step": 50395 }, { "epoch": 0.9211983841190341, "grad_norm": 6.452456166775093, "learning_rate": 1.620092678998586e-07, "loss": 17.5022, "step": 50396 }, { "epoch": 0.9212166633154807, "grad_norm": 7.516137964380493, "learning_rate": 1.619345348851842e-07, "loss": 18.0511, "step": 50397 }, { "epoch": 0.9212349425119272, "grad_norm": 6.836117992040294, "learning_rate": 1.6185981882751e-07, "loss": 17.1969, "step": 50398 }, { "epoch": 0.9212532217083736, "grad_norm": 6.51751448303062, "learning_rate": 1.61785119727097e-07, "loss": 17.2137, "step": 50399 }, { "epoch": 0.9212715009048202, "grad_norm": 6.746068226826028, "learning_rate": 1.6171043758420713e-07, "loss": 17.3209, "step": 50400 }, { "epoch": 0.9212897801012667, "grad_norm": 5.919656535481864, "learning_rate": 1.6163577239910244e-07, "loss": 17.146, "step": 50401 }, { "epoch": 0.9213080592977133, "grad_norm": 6.559556186807968, "learning_rate": 1.6156112417204438e-07, "loss": 17.3032, "step": 50402 }, { "epoch": 0.9213263384941598, "grad_norm": 5.6012719227286825, "learning_rate": 1.61486492903295e-07, "loss": 17.0744, "step": 50403 }, { "epoch": 0.9213446176906063, "grad_norm": 5.292196205721742, "learning_rate": 1.6141187859311513e-07, "loss": 16.9048, "step": 50404 }, { "epoch": 0.9213628968870529, "grad_norm": 5.554368746042349, "learning_rate": 1.613372812417674e-07, "loss": 16.9678, "step": 50405 }, { "epoch": 0.9213811760834993, "grad_norm": 5.701449246226401, "learning_rate": 1.612627008495121e-07, "loss": 17.0291, "step": 50406 }, { "epoch": 0.9213994552799459, "grad_norm": 5.360216709901349, "learning_rate": 1.6118813741661132e-07, "loss": 17.0162, "step": 50407 }, { "epoch": 0.9214177344763924, "grad_norm": 6.11542847985162, "learning_rate": 1.6111359094332645e-07, "loss": 17.5715, "step": 50408 }, { "epoch": 0.9214360136728389, "grad_norm": 5.297476536737946, "learning_rate": 1.6103906142991787e-07, "loss": 17.0446, "step": 50409 }, { "epoch": 0.9214542928692855, "grad_norm": 5.947922078683381, "learning_rate": 1.609645488766487e-07, "loss": 17.0942, "step": 50410 }, { "epoch": 0.921472572065732, "grad_norm": 5.363997017172577, "learning_rate": 1.608900532837776e-07, "loss": 17.1691, "step": 50411 }, { "epoch": 0.9214908512621786, "grad_norm": 6.275621336750288, "learning_rate": 1.6081557465156717e-07, "loss": 17.4426, "step": 50412 }, { "epoch": 0.921509130458625, "grad_norm": 4.920007729709538, "learning_rate": 1.6074111298027883e-07, "loss": 16.9348, "step": 50413 }, { "epoch": 0.9215274096550715, "grad_norm": 7.752669642257724, "learning_rate": 1.6066666827017241e-07, "loss": 17.2707, "step": 50414 }, { "epoch": 0.9215456888515181, "grad_norm": 6.653596977102427, "learning_rate": 1.6059224052150935e-07, "loss": 17.2637, "step": 50415 }, { "epoch": 0.9215639680479646, "grad_norm": 4.011161253686686, "learning_rate": 1.605178297345511e-07, "loss": 16.5166, "step": 50416 }, { "epoch": 0.9215822472444112, "grad_norm": 6.473122997057002, "learning_rate": 1.6044343590955747e-07, "loss": 17.2474, "step": 50417 }, { "epoch": 0.9216005264408577, "grad_norm": 5.716330024533144, "learning_rate": 1.603690590467899e-07, "loss": 17.0471, "step": 50418 }, { "epoch": 0.9216188056373041, "grad_norm": 6.875978080382762, "learning_rate": 1.602946991465093e-07, "loss": 17.1355, "step": 50419 }, { "epoch": 0.9216370848337507, "grad_norm": 8.036398842902958, "learning_rate": 1.6022035620897492e-07, "loss": 18.7041, "step": 50420 }, { "epoch": 0.9216553640301972, "grad_norm": 6.152143360845339, "learning_rate": 1.601460302344482e-07, "loss": 17.0904, "step": 50421 }, { "epoch": 0.9216736432266438, "grad_norm": 5.006908800132796, "learning_rate": 1.6007172122319003e-07, "loss": 16.9939, "step": 50422 }, { "epoch": 0.9216919224230903, "grad_norm": 6.003421851197022, "learning_rate": 1.5999742917546025e-07, "loss": 17.2842, "step": 50423 }, { "epoch": 0.9217102016195368, "grad_norm": 6.023058486018868, "learning_rate": 1.599231540915197e-07, "loss": 17.0679, "step": 50424 }, { "epoch": 0.9217284808159834, "grad_norm": 6.530720601054586, "learning_rate": 1.5984889597162822e-07, "loss": 17.6398, "step": 50425 }, { "epoch": 0.9217467600124298, "grad_norm": 5.7124239882569245, "learning_rate": 1.597746548160467e-07, "loss": 17.0784, "step": 50426 }, { "epoch": 0.9217650392088764, "grad_norm": 5.436012406366592, "learning_rate": 1.5970043062503493e-07, "loss": 16.9088, "step": 50427 }, { "epoch": 0.9217833184053229, "grad_norm": 7.211769791315821, "learning_rate": 1.596262233988527e-07, "loss": 17.909, "step": 50428 }, { "epoch": 0.9218015976017694, "grad_norm": 5.150007584768155, "learning_rate": 1.5955203313776148e-07, "loss": 17.1515, "step": 50429 }, { "epoch": 0.921819876798216, "grad_norm": 6.202966678738009, "learning_rate": 1.5947785984201934e-07, "loss": 17.0458, "step": 50430 }, { "epoch": 0.9218381559946625, "grad_norm": 5.954350411625157, "learning_rate": 1.5940370351188729e-07, "loss": 17.0675, "step": 50431 }, { "epoch": 0.921856435191109, "grad_norm": 6.401035860955413, "learning_rate": 1.5932956414762613e-07, "loss": 17.4284, "step": 50432 }, { "epoch": 0.9218747143875555, "grad_norm": 5.597503284287078, "learning_rate": 1.5925544174949402e-07, "loss": 17.0855, "step": 50433 }, { "epoch": 0.921892993584002, "grad_norm": 6.120043325069162, "learning_rate": 1.591813363177519e-07, "loss": 17.1623, "step": 50434 }, { "epoch": 0.9219112727804486, "grad_norm": 5.834833953678381, "learning_rate": 1.5910724785265896e-07, "loss": 17.0793, "step": 50435 }, { "epoch": 0.9219295519768951, "grad_norm": 6.201635589408199, "learning_rate": 1.5903317635447558e-07, "loss": 17.2822, "step": 50436 }, { "epoch": 0.9219478311733417, "grad_norm": 5.518042823856456, "learning_rate": 1.5895912182345984e-07, "loss": 17.0045, "step": 50437 }, { "epoch": 0.9219661103697881, "grad_norm": 7.905154608078347, "learning_rate": 1.5888508425987325e-07, "loss": 18.3186, "step": 50438 }, { "epoch": 0.9219843895662346, "grad_norm": 7.463442917593409, "learning_rate": 1.5881106366397337e-07, "loss": 18.0234, "step": 50439 }, { "epoch": 0.9220026687626812, "grad_norm": 7.241837665656476, "learning_rate": 1.5873706003602106e-07, "loss": 17.8186, "step": 50440 }, { "epoch": 0.9220209479591277, "grad_norm": 5.9040574468089355, "learning_rate": 1.5866307337627562e-07, "loss": 17.0428, "step": 50441 }, { "epoch": 0.9220392271555743, "grad_norm": 5.776945221975629, "learning_rate": 1.5858910368499514e-07, "loss": 17.3606, "step": 50442 }, { "epoch": 0.9220575063520208, "grad_norm": 7.569173696582129, "learning_rate": 1.5851515096244053e-07, "loss": 17.6927, "step": 50443 }, { "epoch": 0.9220757855484673, "grad_norm": 7.057306661364068, "learning_rate": 1.5844121520887046e-07, "loss": 17.5894, "step": 50444 }, { "epoch": 0.9220940647449138, "grad_norm": 8.196841108520657, "learning_rate": 1.5836729642454251e-07, "loss": 17.8245, "step": 50445 }, { "epoch": 0.9221123439413603, "grad_norm": 6.4283304342143985, "learning_rate": 1.5829339460971816e-07, "loss": 17.1932, "step": 50446 }, { "epoch": 0.9221306231378069, "grad_norm": 6.512539386499851, "learning_rate": 1.582195097646555e-07, "loss": 17.3362, "step": 50447 }, { "epoch": 0.9221489023342534, "grad_norm": 5.972369162923311, "learning_rate": 1.5814564188961212e-07, "loss": 17.2476, "step": 50448 }, { "epoch": 0.9221671815306999, "grad_norm": 7.316016028021775, "learning_rate": 1.5807179098484893e-07, "loss": 17.6822, "step": 50449 }, { "epoch": 0.9221854607271465, "grad_norm": 4.840045082219546, "learning_rate": 1.5799795705062405e-07, "loss": 16.7209, "step": 50450 }, { "epoch": 0.922203739923593, "grad_norm": 5.775344179823001, "learning_rate": 1.579241400871956e-07, "loss": 17.0686, "step": 50451 }, { "epoch": 0.9222220191200395, "grad_norm": 6.161031952103878, "learning_rate": 1.5785034009482337e-07, "loss": 17.3211, "step": 50452 }, { "epoch": 0.922240298316486, "grad_norm": 5.672610435058814, "learning_rate": 1.5777655707376548e-07, "loss": 17.0111, "step": 50453 }, { "epoch": 0.9222585775129325, "grad_norm": 6.223864159727378, "learning_rate": 1.577027910242801e-07, "loss": 17.3551, "step": 50454 }, { "epoch": 0.9222768567093791, "grad_norm": 6.724979876221779, "learning_rate": 1.5762904194662698e-07, "loss": 17.3074, "step": 50455 }, { "epoch": 0.9222951359058256, "grad_norm": 7.500322218075032, "learning_rate": 1.5755530984106316e-07, "loss": 17.5911, "step": 50456 }, { "epoch": 0.922313415102272, "grad_norm": 5.713359772271769, "learning_rate": 1.5748159470784785e-07, "loss": 17.3531, "step": 50457 }, { "epoch": 0.9223316942987186, "grad_norm": 6.864753251021438, "learning_rate": 1.574078965472392e-07, "loss": 17.5733, "step": 50458 }, { "epoch": 0.9223499734951651, "grad_norm": 6.6972520631515975, "learning_rate": 1.5733421535949588e-07, "loss": 17.3718, "step": 50459 }, { "epoch": 0.9223682526916117, "grad_norm": 4.6486751437102125, "learning_rate": 1.5726055114487605e-07, "loss": 16.9627, "step": 50460 }, { "epoch": 0.9223865318880582, "grad_norm": 5.639706998145337, "learning_rate": 1.5718690390363834e-07, "loss": 17.0193, "step": 50461 }, { "epoch": 0.9224048110845047, "grad_norm": 5.145925110037511, "learning_rate": 1.5711327363603923e-07, "loss": 17.0618, "step": 50462 }, { "epoch": 0.9224230902809513, "grad_norm": 5.682063926320721, "learning_rate": 1.5703966034233852e-07, "loss": 16.8663, "step": 50463 }, { "epoch": 0.9224413694773977, "grad_norm": 6.47929021048185, "learning_rate": 1.569660640227938e-07, "loss": 17.2191, "step": 50464 }, { "epoch": 0.9224596486738443, "grad_norm": 6.012434959843662, "learning_rate": 1.568924846776615e-07, "loss": 17.2278, "step": 50465 }, { "epoch": 0.9224779278702908, "grad_norm": 5.870584707780908, "learning_rate": 1.568189223072014e-07, "loss": 17.2398, "step": 50466 }, { "epoch": 0.9224962070667373, "grad_norm": 5.888991867094725, "learning_rate": 1.5674537691167112e-07, "loss": 17.1318, "step": 50467 }, { "epoch": 0.9225144862631839, "grad_norm": 5.31569607148018, "learning_rate": 1.5667184849132765e-07, "loss": 17.0762, "step": 50468 }, { "epoch": 0.9225327654596304, "grad_norm": 6.295156089429547, "learning_rate": 1.5659833704642912e-07, "loss": 17.4898, "step": 50469 }, { "epoch": 0.922551044656077, "grad_norm": 5.423179640685488, "learning_rate": 1.5652484257723312e-07, "loss": 16.9099, "step": 50470 }, { "epoch": 0.9225693238525234, "grad_norm": 6.929321791603451, "learning_rate": 1.5645136508399772e-07, "loss": 17.6064, "step": 50471 }, { "epoch": 0.9225876030489699, "grad_norm": 5.921109999260555, "learning_rate": 1.5637790456697944e-07, "loss": 17.5084, "step": 50472 }, { "epoch": 0.9226058822454165, "grad_norm": 5.463169523110444, "learning_rate": 1.5630446102643637e-07, "loss": 17.109, "step": 50473 }, { "epoch": 0.922624161441863, "grad_norm": 6.301825686700988, "learning_rate": 1.562310344626261e-07, "loss": 17.3577, "step": 50474 }, { "epoch": 0.9226424406383096, "grad_norm": 7.1551320002913235, "learning_rate": 1.561576248758051e-07, "loss": 17.7542, "step": 50475 }, { "epoch": 0.9226607198347561, "grad_norm": 5.729970725823122, "learning_rate": 1.5608423226623149e-07, "loss": 17.2218, "step": 50476 }, { "epoch": 0.9226789990312025, "grad_norm": 5.77849353483946, "learning_rate": 1.5601085663416283e-07, "loss": 16.9458, "step": 50477 }, { "epoch": 0.9226972782276491, "grad_norm": 6.413099919892024, "learning_rate": 1.5593749797985558e-07, "loss": 17.3016, "step": 50478 }, { "epoch": 0.9227155574240956, "grad_norm": 7.492996954246125, "learning_rate": 1.5586415630356677e-07, "loss": 17.3839, "step": 50479 }, { "epoch": 0.9227338366205422, "grad_norm": 7.746410889287016, "learning_rate": 1.5579083160555454e-07, "loss": 17.5893, "step": 50480 }, { "epoch": 0.9227521158169887, "grad_norm": 7.161550915293412, "learning_rate": 1.5571752388607474e-07, "loss": 17.7298, "step": 50481 }, { "epoch": 0.9227703950134352, "grad_norm": 6.134387804607636, "learning_rate": 1.556442331453839e-07, "loss": 17.4907, "step": 50482 }, { "epoch": 0.9227886742098818, "grad_norm": 8.63755914132128, "learning_rate": 1.5557095938374012e-07, "loss": 17.957, "step": 50483 }, { "epoch": 0.9228069534063282, "grad_norm": 7.2037725406101645, "learning_rate": 1.5549770260139984e-07, "loss": 17.8478, "step": 50484 }, { "epoch": 0.9228252326027748, "grad_norm": 4.854802172397043, "learning_rate": 1.55424462798619e-07, "loss": 16.777, "step": 50485 }, { "epoch": 0.9228435117992213, "grad_norm": 5.879933896932357, "learning_rate": 1.5535123997565627e-07, "loss": 17.4456, "step": 50486 }, { "epoch": 0.9228617909956678, "grad_norm": 6.450975734765528, "learning_rate": 1.5527803413276586e-07, "loss": 17.2576, "step": 50487 }, { "epoch": 0.9228800701921144, "grad_norm": 7.178602037350051, "learning_rate": 1.5520484527020651e-07, "loss": 17.7796, "step": 50488 }, { "epoch": 0.9228983493885609, "grad_norm": 9.457795305973768, "learning_rate": 1.5513167338823355e-07, "loss": 18.3241, "step": 50489 }, { "epoch": 0.9229166285850074, "grad_norm": 7.9227677559106535, "learning_rate": 1.550585184871034e-07, "loss": 17.8229, "step": 50490 }, { "epoch": 0.9229349077814539, "grad_norm": 5.588559644269843, "learning_rate": 1.5498538056707314e-07, "loss": 17.1377, "step": 50491 }, { "epoch": 0.9229531869779004, "grad_norm": 6.445104892627733, "learning_rate": 1.5491225962839862e-07, "loss": 17.4762, "step": 50492 }, { "epoch": 0.922971466174347, "grad_norm": 6.696856643237734, "learning_rate": 1.5483915567133524e-07, "loss": 17.5312, "step": 50493 }, { "epoch": 0.9229897453707935, "grad_norm": 5.793753995217867, "learning_rate": 1.5476606869614052e-07, "loss": 17.2305, "step": 50494 }, { "epoch": 0.9230080245672401, "grad_norm": 6.6266955635928895, "learning_rate": 1.5469299870307098e-07, "loss": 17.3649, "step": 50495 }, { "epoch": 0.9230263037636866, "grad_norm": 6.408970036704748, "learning_rate": 1.5461994569238136e-07, "loss": 16.9913, "step": 50496 }, { "epoch": 0.923044582960133, "grad_norm": 5.560048775595729, "learning_rate": 1.545469096643293e-07, "loss": 17.3226, "step": 50497 }, { "epoch": 0.9230628621565796, "grad_norm": 5.73089306043182, "learning_rate": 1.5447389061916894e-07, "loss": 17.2764, "step": 50498 }, { "epoch": 0.9230811413530261, "grad_norm": 6.08884790726672, "learning_rate": 1.544008885571574e-07, "loss": 17.3363, "step": 50499 }, { "epoch": 0.9230994205494727, "grad_norm": 7.081161046941361, "learning_rate": 1.5432790347855053e-07, "loss": 17.4627, "step": 50500 }, { "epoch": 0.9231176997459192, "grad_norm": 6.244815487822759, "learning_rate": 1.5425493538360315e-07, "loss": 17.3125, "step": 50501 }, { "epoch": 0.9231359789423657, "grad_norm": 4.46961418413296, "learning_rate": 1.541819842725728e-07, "loss": 16.6175, "step": 50502 }, { "epoch": 0.9231542581388122, "grad_norm": 6.6378119321310844, "learning_rate": 1.541090501457132e-07, "loss": 17.7084, "step": 50503 }, { "epoch": 0.9231725373352587, "grad_norm": 4.87694030637119, "learning_rate": 1.540361330032808e-07, "loss": 16.9998, "step": 50504 }, { "epoch": 0.9231908165317053, "grad_norm": 6.495577071717456, "learning_rate": 1.5396323284553148e-07, "loss": 17.3218, "step": 50505 }, { "epoch": 0.9232090957281518, "grad_norm": 6.186463416731121, "learning_rate": 1.538903496727212e-07, "loss": 17.2029, "step": 50506 }, { "epoch": 0.9232273749245983, "grad_norm": 6.814825777999251, "learning_rate": 1.5381748348510362e-07, "loss": 17.771, "step": 50507 }, { "epoch": 0.9232456541210449, "grad_norm": 6.87565298338802, "learning_rate": 1.537446342829363e-07, "loss": 17.6866, "step": 50508 }, { "epoch": 0.9232639333174913, "grad_norm": 5.535610834662607, "learning_rate": 1.5367180206647292e-07, "loss": 16.9765, "step": 50509 }, { "epoch": 0.9232822125139379, "grad_norm": 7.201076583185015, "learning_rate": 1.5359898683596884e-07, "loss": 17.4178, "step": 50510 }, { "epoch": 0.9233004917103844, "grad_norm": 5.889944417746838, "learning_rate": 1.5352618859168056e-07, "loss": 17.1205, "step": 50511 }, { "epoch": 0.9233187709068309, "grad_norm": 5.567502692590073, "learning_rate": 1.5345340733386228e-07, "loss": 17.2353, "step": 50512 }, { "epoch": 0.9233370501032775, "grad_norm": 6.242768697670707, "learning_rate": 1.5338064306276879e-07, "loss": 17.5955, "step": 50513 }, { "epoch": 0.923355329299724, "grad_norm": 6.454306803529495, "learning_rate": 1.53307895778656e-07, "loss": 17.393, "step": 50514 }, { "epoch": 0.9233736084961706, "grad_norm": 6.1278153533792485, "learning_rate": 1.5323516548177876e-07, "loss": 17.1444, "step": 50515 }, { "epoch": 0.923391887692617, "grad_norm": 5.3632869625415305, "learning_rate": 1.5316245217239066e-07, "loss": 17.0325, "step": 50516 }, { "epoch": 0.9234101668890635, "grad_norm": 6.976336518846908, "learning_rate": 1.5308975585074826e-07, "loss": 17.5369, "step": 50517 }, { "epoch": 0.9234284460855101, "grad_norm": 6.283120140275301, "learning_rate": 1.5301707651710574e-07, "loss": 17.4447, "step": 50518 }, { "epoch": 0.9234467252819566, "grad_norm": 8.136641750366017, "learning_rate": 1.529444141717179e-07, "loss": 18.0621, "step": 50519 }, { "epoch": 0.9234650044784032, "grad_norm": 11.157911044774792, "learning_rate": 1.52871768814839e-07, "loss": 18.103, "step": 50520 }, { "epoch": 0.9234832836748497, "grad_norm": 6.078322872124737, "learning_rate": 1.5279914044672385e-07, "loss": 17.2709, "step": 50521 }, { "epoch": 0.9235015628712961, "grad_norm": 7.915327585344878, "learning_rate": 1.5272652906762663e-07, "loss": 17.5345, "step": 50522 }, { "epoch": 0.9235198420677427, "grad_norm": 5.641526456222634, "learning_rate": 1.5265393467780332e-07, "loss": 17.1748, "step": 50523 }, { "epoch": 0.9235381212641892, "grad_norm": 6.384492029051735, "learning_rate": 1.5258135727750645e-07, "loss": 17.4437, "step": 50524 }, { "epoch": 0.9235564004606357, "grad_norm": 6.16071949847352, "learning_rate": 1.5250879686699194e-07, "loss": 17.1847, "step": 50525 }, { "epoch": 0.9235746796570823, "grad_norm": 7.375612598633095, "learning_rate": 1.5243625344651346e-07, "loss": 17.3005, "step": 50526 }, { "epoch": 0.9235929588535288, "grad_norm": 8.469049388308227, "learning_rate": 1.5236372701632472e-07, "loss": 18.4375, "step": 50527 }, { "epoch": 0.9236112380499754, "grad_norm": 6.778122717317046, "learning_rate": 1.522912175766811e-07, "loss": 17.2208, "step": 50528 }, { "epoch": 0.9236295172464218, "grad_norm": 6.8716898949297365, "learning_rate": 1.5221872512783564e-07, "loss": 17.3659, "step": 50529 }, { "epoch": 0.9236477964428683, "grad_norm": 6.275368124854691, "learning_rate": 1.5214624967004265e-07, "loss": 17.258, "step": 50530 }, { "epoch": 0.9236660756393149, "grad_norm": 5.4443585259326035, "learning_rate": 1.5207379120355692e-07, "loss": 17.2327, "step": 50531 }, { "epoch": 0.9236843548357614, "grad_norm": 6.848389858210664, "learning_rate": 1.52001349728631e-07, "loss": 17.583, "step": 50532 }, { "epoch": 0.923702634032208, "grad_norm": 5.302985751355634, "learning_rate": 1.5192892524552084e-07, "loss": 17.0496, "step": 50533 }, { "epoch": 0.9237209132286545, "grad_norm": 7.464045951810148, "learning_rate": 1.5185651775447896e-07, "loss": 17.2116, "step": 50534 }, { "epoch": 0.9237391924251009, "grad_norm": 4.799049961967683, "learning_rate": 1.5178412725575852e-07, "loss": 16.8851, "step": 50535 }, { "epoch": 0.9237574716215475, "grad_norm": 5.803000273564538, "learning_rate": 1.5171175374961434e-07, "loss": 17.1938, "step": 50536 }, { "epoch": 0.923775750817994, "grad_norm": 6.377222258959773, "learning_rate": 1.5163939723630005e-07, "loss": 17.118, "step": 50537 }, { "epoch": 0.9237940300144406, "grad_norm": 5.6951850810134905, "learning_rate": 1.5156705771606828e-07, "loss": 17.0579, "step": 50538 }, { "epoch": 0.9238123092108871, "grad_norm": 6.272850599999123, "learning_rate": 1.5149473518917378e-07, "loss": 17.208, "step": 50539 }, { "epoch": 0.9238305884073336, "grad_norm": 6.157491234697018, "learning_rate": 1.5142242965586972e-07, "loss": 17.2587, "step": 50540 }, { "epoch": 0.9238488676037802, "grad_norm": 6.2045013380037535, "learning_rate": 1.5135014111640866e-07, "loss": 17.42, "step": 50541 }, { "epoch": 0.9238671468002266, "grad_norm": 7.529766753504386, "learning_rate": 1.5127786957104485e-07, "loss": 17.7178, "step": 50542 }, { "epoch": 0.9238854259966732, "grad_norm": 6.385215387928368, "learning_rate": 1.5120561502003195e-07, "loss": 17.3352, "step": 50543 }, { "epoch": 0.9239037051931197, "grad_norm": 6.512310687285522, "learning_rate": 1.5113337746362144e-07, "loss": 17.0508, "step": 50544 }, { "epoch": 0.9239219843895662, "grad_norm": 6.067136310047002, "learning_rate": 1.510611569020687e-07, "loss": 17.2851, "step": 50545 }, { "epoch": 0.9239402635860128, "grad_norm": 5.188377503837963, "learning_rate": 1.5098895333562568e-07, "loss": 16.8107, "step": 50546 }, { "epoch": 0.9239585427824593, "grad_norm": 5.548835086334901, "learning_rate": 1.5091676676454504e-07, "loss": 17.2665, "step": 50547 }, { "epoch": 0.9239768219789058, "grad_norm": 5.386427406256403, "learning_rate": 1.508445971890804e-07, "loss": 17.1732, "step": 50548 }, { "epoch": 0.9239951011753523, "grad_norm": 5.7814520884373035, "learning_rate": 1.5077244460948493e-07, "loss": 17.1954, "step": 50549 }, { "epoch": 0.9240133803717988, "grad_norm": 5.982667173453817, "learning_rate": 1.507003090260112e-07, "loss": 17.0607, "step": 50550 }, { "epoch": 0.9240316595682454, "grad_norm": 7.339481364944557, "learning_rate": 1.506281904389123e-07, "loss": 17.5749, "step": 50551 }, { "epoch": 0.9240499387646919, "grad_norm": 6.656512533876914, "learning_rate": 1.505560888484403e-07, "loss": 17.3232, "step": 50552 }, { "epoch": 0.9240682179611385, "grad_norm": 5.9472280343046515, "learning_rate": 1.5048400425484887e-07, "loss": 17.1379, "step": 50553 }, { "epoch": 0.924086497157585, "grad_norm": 5.661333526299753, "learning_rate": 1.504119366583906e-07, "loss": 17.1208, "step": 50554 }, { "epoch": 0.9241047763540314, "grad_norm": 6.188576733516354, "learning_rate": 1.5033988605931694e-07, "loss": 17.1625, "step": 50555 }, { "epoch": 0.924123055550478, "grad_norm": 5.432116806900502, "learning_rate": 1.5026785245788155e-07, "loss": 16.9401, "step": 50556 }, { "epoch": 0.9241413347469245, "grad_norm": 7.532005390898817, "learning_rate": 1.5019583585433595e-07, "loss": 17.5505, "step": 50557 }, { "epoch": 0.9241596139433711, "grad_norm": 5.2460484391429985, "learning_rate": 1.5012383624893268e-07, "loss": 16.904, "step": 50558 }, { "epoch": 0.9241778931398176, "grad_norm": 6.946000720760962, "learning_rate": 1.5005185364192542e-07, "loss": 17.5152, "step": 50559 }, { "epoch": 0.924196172336264, "grad_norm": 6.039011267538917, "learning_rate": 1.4997988803356568e-07, "loss": 17.1488, "step": 50560 }, { "epoch": 0.9242144515327106, "grad_norm": 7.130706165633814, "learning_rate": 1.4990793942410486e-07, "loss": 17.4178, "step": 50561 }, { "epoch": 0.9242327307291571, "grad_norm": 6.202708133359712, "learning_rate": 1.498360078137967e-07, "loss": 17.4738, "step": 50562 }, { "epoch": 0.9242510099256037, "grad_norm": 5.567916758528807, "learning_rate": 1.4976409320289153e-07, "loss": 17.2724, "step": 50563 }, { "epoch": 0.9242692891220502, "grad_norm": 6.382068364924232, "learning_rate": 1.4969219559164304e-07, "loss": 17.5845, "step": 50564 }, { "epoch": 0.9242875683184967, "grad_norm": 5.6022574233452795, "learning_rate": 1.4962031498030272e-07, "loss": 17.0972, "step": 50565 }, { "epoch": 0.9243058475149433, "grad_norm": 4.698384453897627, "learning_rate": 1.4954845136912145e-07, "loss": 16.8862, "step": 50566 }, { "epoch": 0.9243241267113897, "grad_norm": 7.754379370276109, "learning_rate": 1.494766047583518e-07, "loss": 17.5378, "step": 50567 }, { "epoch": 0.9243424059078363, "grad_norm": 6.743011962646097, "learning_rate": 1.4940477514824692e-07, "loss": 17.2501, "step": 50568 }, { "epoch": 0.9243606851042828, "grad_norm": 6.6258453613839166, "learning_rate": 1.4933296253905605e-07, "loss": 16.968, "step": 50569 }, { "epoch": 0.9243789643007293, "grad_norm": 4.940480065697892, "learning_rate": 1.4926116693103344e-07, "loss": 16.8157, "step": 50570 }, { "epoch": 0.9243972434971759, "grad_norm": 6.966267937308049, "learning_rate": 1.4918938832442943e-07, "loss": 17.8176, "step": 50571 }, { "epoch": 0.9244155226936224, "grad_norm": 6.159824996659502, "learning_rate": 1.4911762671949492e-07, "loss": 17.0436, "step": 50572 }, { "epoch": 0.924433801890069, "grad_norm": 5.359573408983711, "learning_rate": 1.4904588211648253e-07, "loss": 17.1848, "step": 50573 }, { "epoch": 0.9244520810865154, "grad_norm": 5.630557743471523, "learning_rate": 1.4897415451564367e-07, "loss": 17.4657, "step": 50574 }, { "epoch": 0.9244703602829619, "grad_norm": 8.273388560695611, "learning_rate": 1.4890244391722875e-07, "loss": 18.1492, "step": 50575 }, { "epoch": 0.9244886394794085, "grad_norm": 5.934293920940086, "learning_rate": 1.4883075032148975e-07, "loss": 17.1518, "step": 50576 }, { "epoch": 0.924506918675855, "grad_norm": 5.627341560820709, "learning_rate": 1.4875907372867872e-07, "loss": 17.1924, "step": 50577 }, { "epoch": 0.9245251978723016, "grad_norm": 5.913595871006186, "learning_rate": 1.4868741413904597e-07, "loss": 17.247, "step": 50578 }, { "epoch": 0.9245434770687481, "grad_norm": 5.779122603372101, "learning_rate": 1.48615771552843e-07, "loss": 17.2864, "step": 50579 }, { "epoch": 0.9245617562651945, "grad_norm": 6.664434116121036, "learning_rate": 1.485441459703202e-07, "loss": 17.667, "step": 50580 }, { "epoch": 0.9245800354616411, "grad_norm": 4.555512101256563, "learning_rate": 1.4847253739172952e-07, "loss": 16.6947, "step": 50581 }, { "epoch": 0.9245983146580876, "grad_norm": 6.6901488695306615, "learning_rate": 1.4840094581732188e-07, "loss": 17.6708, "step": 50582 }, { "epoch": 0.9246165938545342, "grad_norm": 5.387219593997161, "learning_rate": 1.4832937124734715e-07, "loss": 16.9885, "step": 50583 }, { "epoch": 0.9246348730509807, "grad_norm": 5.466469101257375, "learning_rate": 1.4825781368205784e-07, "loss": 17.0924, "step": 50584 }, { "epoch": 0.9246531522474272, "grad_norm": 8.395232045761945, "learning_rate": 1.481862731217032e-07, "loss": 18.0949, "step": 50585 }, { "epoch": 0.9246714314438738, "grad_norm": 5.62580570751809, "learning_rate": 1.4811474956653472e-07, "loss": 17.0003, "step": 50586 }, { "epoch": 0.9246897106403202, "grad_norm": 7.601237785276465, "learning_rate": 1.4804324301680329e-07, "loss": 17.7742, "step": 50587 }, { "epoch": 0.9247079898367668, "grad_norm": 6.620549035674098, "learning_rate": 1.4797175347275982e-07, "loss": 17.2799, "step": 50588 }, { "epoch": 0.9247262690332133, "grad_norm": 5.728803234577979, "learning_rate": 1.4790028093465303e-07, "loss": 17.1072, "step": 50589 }, { "epoch": 0.9247445482296598, "grad_norm": 6.818259043687292, "learning_rate": 1.4782882540273602e-07, "loss": 17.4854, "step": 50590 }, { "epoch": 0.9247628274261064, "grad_norm": 4.291281338770246, "learning_rate": 1.4775738687725748e-07, "loss": 16.7153, "step": 50591 }, { "epoch": 0.9247811066225529, "grad_norm": 8.148638254070688, "learning_rate": 1.476859653584678e-07, "loss": 17.7909, "step": 50592 }, { "epoch": 0.9247993858189993, "grad_norm": 6.913062048841508, "learning_rate": 1.476145608466173e-07, "loss": 17.4895, "step": 50593 }, { "epoch": 0.9248176650154459, "grad_norm": 6.38743451715242, "learning_rate": 1.47543173341958e-07, "loss": 17.591, "step": 50594 }, { "epoch": 0.9248359442118924, "grad_norm": 7.665362209383612, "learning_rate": 1.474718028447375e-07, "loss": 17.8923, "step": 50595 }, { "epoch": 0.924854223408339, "grad_norm": 8.994001523460028, "learning_rate": 1.4740044935520837e-07, "loss": 18.9495, "step": 50596 }, { "epoch": 0.9248725026047855, "grad_norm": 5.320390697859397, "learning_rate": 1.4732911287361874e-07, "loss": 16.9112, "step": 50597 }, { "epoch": 0.924890781801232, "grad_norm": 6.7831807996599895, "learning_rate": 1.4725779340022007e-07, "loss": 17.5112, "step": 50598 }, { "epoch": 0.9249090609976786, "grad_norm": 5.895652795341287, "learning_rate": 1.4718649093526216e-07, "loss": 17.3681, "step": 50599 }, { "epoch": 0.924927340194125, "grad_norm": 6.0765856313787525, "learning_rate": 1.4711520547899317e-07, "loss": 17.1725, "step": 50600 }, { "epoch": 0.9249456193905716, "grad_norm": 8.415250099335477, "learning_rate": 1.4704393703166565e-07, "loss": 18.1261, "step": 50601 }, { "epoch": 0.9249638985870181, "grad_norm": 5.8358615647865495, "learning_rate": 1.4697268559352662e-07, "loss": 17.1708, "step": 50602 }, { "epoch": 0.9249821777834646, "grad_norm": 6.518723615329236, "learning_rate": 1.4690145116482813e-07, "loss": 17.8893, "step": 50603 }, { "epoch": 0.9250004569799112, "grad_norm": 5.1961985220634554, "learning_rate": 1.4683023374581883e-07, "loss": 16.9857, "step": 50604 }, { "epoch": 0.9250187361763577, "grad_norm": 6.191397168019063, "learning_rate": 1.4675903333674857e-07, "loss": 17.4649, "step": 50605 }, { "epoch": 0.9250370153728042, "grad_norm": 6.1428942215440525, "learning_rate": 1.46687849937866e-07, "loss": 17.2908, "step": 50606 }, { "epoch": 0.9250552945692507, "grad_norm": 5.165846932632356, "learning_rate": 1.4661668354942205e-07, "loss": 17.0071, "step": 50607 }, { "epoch": 0.9250735737656972, "grad_norm": 6.872473418988997, "learning_rate": 1.4654553417166539e-07, "loss": 17.651, "step": 50608 }, { "epoch": 0.9250918529621438, "grad_norm": 6.050844522818859, "learning_rate": 1.464744018048453e-07, "loss": 17.4619, "step": 50609 }, { "epoch": 0.9251101321585903, "grad_norm": 6.507827202418364, "learning_rate": 1.4640328644921153e-07, "loss": 17.4648, "step": 50610 }, { "epoch": 0.9251284113550369, "grad_norm": 5.526098906649229, "learning_rate": 1.4633218810501227e-07, "loss": 16.7106, "step": 50611 }, { "epoch": 0.9251466905514834, "grad_norm": 5.557607602214377, "learning_rate": 1.4626110677249784e-07, "loss": 16.8061, "step": 50612 }, { "epoch": 0.9251649697479298, "grad_norm": 7.227429336608383, "learning_rate": 1.4619004245191747e-07, "loss": 17.9754, "step": 50613 }, { "epoch": 0.9251832489443764, "grad_norm": 6.0908496418231435, "learning_rate": 1.4611899514351935e-07, "loss": 17.2815, "step": 50614 }, { "epoch": 0.9252015281408229, "grad_norm": 7.053598554415046, "learning_rate": 1.4604796484755378e-07, "loss": 17.6834, "step": 50615 }, { "epoch": 0.9252198073372695, "grad_norm": 6.877401652535847, "learning_rate": 1.4597695156426838e-07, "loss": 17.4327, "step": 50616 }, { "epoch": 0.925238086533716, "grad_norm": 6.406746932819465, "learning_rate": 1.459059552939124e-07, "loss": 17.2231, "step": 50617 }, { "epoch": 0.9252563657301625, "grad_norm": 5.174213457633698, "learning_rate": 1.4583497603673503e-07, "loss": 17.0999, "step": 50618 }, { "epoch": 0.925274644926609, "grad_norm": 5.572730268264714, "learning_rate": 1.45764013792985e-07, "loss": 16.9447, "step": 50619 }, { "epoch": 0.9252929241230555, "grad_norm": 7.812333863750829, "learning_rate": 1.4569306856291042e-07, "loss": 17.8595, "step": 50620 }, { "epoch": 0.9253112033195021, "grad_norm": 5.313552630451385, "learning_rate": 1.4562214034676003e-07, "loss": 17.005, "step": 50621 }, { "epoch": 0.9253294825159486, "grad_norm": 5.258387656864, "learning_rate": 1.455512291447836e-07, "loss": 16.9951, "step": 50622 }, { "epoch": 0.9253477617123951, "grad_norm": 6.497732617138449, "learning_rate": 1.4548033495722867e-07, "loss": 17.718, "step": 50623 }, { "epoch": 0.9253660409088417, "grad_norm": 6.780635295265212, "learning_rate": 1.45409457784344e-07, "loss": 17.5001, "step": 50624 }, { "epoch": 0.9253843201052881, "grad_norm": 5.627028931438044, "learning_rate": 1.4533859762637715e-07, "loss": 17.136, "step": 50625 }, { "epoch": 0.9254025993017347, "grad_norm": 7.163467429657428, "learning_rate": 1.4526775448357845e-07, "loss": 17.5349, "step": 50626 }, { "epoch": 0.9254208784981812, "grad_norm": 6.714376774082968, "learning_rate": 1.4519692835619438e-07, "loss": 17.1293, "step": 50627 }, { "epoch": 0.9254391576946277, "grad_norm": 5.051042414704235, "learning_rate": 1.4512611924447362e-07, "loss": 17.0144, "step": 50628 }, { "epoch": 0.9254574368910743, "grad_norm": 6.496418856839014, "learning_rate": 1.450553271486649e-07, "loss": 17.4049, "step": 50629 }, { "epoch": 0.9254757160875208, "grad_norm": 5.395215601985295, "learning_rate": 1.4498455206901573e-07, "loss": 17.0953, "step": 50630 }, { "epoch": 0.9254939952839674, "grad_norm": 5.454201497779694, "learning_rate": 1.4491379400577432e-07, "loss": 17.1523, "step": 50631 }, { "epoch": 0.9255122744804138, "grad_norm": 5.116672765676564, "learning_rate": 1.4484305295918876e-07, "loss": 16.7476, "step": 50632 }, { "epoch": 0.9255305536768603, "grad_norm": 7.462609255537523, "learning_rate": 1.4477232892950777e-07, "loss": 17.7607, "step": 50633 }, { "epoch": 0.9255488328733069, "grad_norm": 6.0808075993184065, "learning_rate": 1.4470162191697722e-07, "loss": 17.3912, "step": 50634 }, { "epoch": 0.9255671120697534, "grad_norm": 6.043358173987742, "learning_rate": 1.4463093192184695e-07, "loss": 17.2932, "step": 50635 }, { "epoch": 0.9255853912662, "grad_norm": 5.33646357781169, "learning_rate": 1.44560258944364e-07, "loss": 17.1553, "step": 50636 }, { "epoch": 0.9256036704626465, "grad_norm": 9.856015610373138, "learning_rate": 1.444896029847759e-07, "loss": 18.3971, "step": 50637 }, { "epoch": 0.925621949659093, "grad_norm": 6.06744258742844, "learning_rate": 1.4441896404333023e-07, "loss": 17.2065, "step": 50638 }, { "epoch": 0.9256402288555395, "grad_norm": 6.950101712490237, "learning_rate": 1.443483421202746e-07, "loss": 17.4858, "step": 50639 }, { "epoch": 0.925658508051986, "grad_norm": 4.550648729044107, "learning_rate": 1.442777372158566e-07, "loss": 16.8399, "step": 50640 }, { "epoch": 0.9256767872484326, "grad_norm": 5.604629990541918, "learning_rate": 1.4420714933032432e-07, "loss": 17.1207, "step": 50641 }, { "epoch": 0.9256950664448791, "grad_norm": 6.061363667783855, "learning_rate": 1.441365784639237e-07, "loss": 17.2264, "step": 50642 }, { "epoch": 0.9257133456413256, "grad_norm": 5.0914513573791575, "learning_rate": 1.44066024616904e-07, "loss": 17.0061, "step": 50643 }, { "epoch": 0.9257316248377722, "grad_norm": 5.939349920520395, "learning_rate": 1.439954877895111e-07, "loss": 17.3138, "step": 50644 }, { "epoch": 0.9257499040342186, "grad_norm": 5.842777101158878, "learning_rate": 1.439249679819926e-07, "loss": 17.3053, "step": 50645 }, { "epoch": 0.9257681832306652, "grad_norm": 7.0597359073874, "learning_rate": 1.438544651945961e-07, "loss": 17.715, "step": 50646 }, { "epoch": 0.9257864624271117, "grad_norm": 7.416102649736143, "learning_rate": 1.4378397942756806e-07, "loss": 17.3445, "step": 50647 }, { "epoch": 0.9258047416235582, "grad_norm": 6.54959088291684, "learning_rate": 1.4371351068115548e-07, "loss": 17.3207, "step": 50648 }, { "epoch": 0.9258230208200048, "grad_norm": 9.600942169914278, "learning_rate": 1.4364305895560538e-07, "loss": 17.9154, "step": 50649 }, { "epoch": 0.9258413000164513, "grad_norm": 6.153223974258565, "learning_rate": 1.4357262425116536e-07, "loss": 17.2541, "step": 50650 }, { "epoch": 0.9258595792128979, "grad_norm": 6.290450026162387, "learning_rate": 1.4350220656808188e-07, "loss": 17.2069, "step": 50651 }, { "epoch": 0.9258778584093443, "grad_norm": 6.603303761609698, "learning_rate": 1.4343180590660199e-07, "loss": 17.2577, "step": 50652 }, { "epoch": 0.9258961376057908, "grad_norm": 5.03785196053961, "learning_rate": 1.4336142226697214e-07, "loss": 17.1014, "step": 50653 }, { "epoch": 0.9259144168022374, "grad_norm": 5.528356999387729, "learning_rate": 1.432910556494388e-07, "loss": 16.9153, "step": 50654 }, { "epoch": 0.9259326959986839, "grad_norm": 8.204019911358179, "learning_rate": 1.43220706054249e-07, "loss": 17.5293, "step": 50655 }, { "epoch": 0.9259509751951305, "grad_norm": 7.546969475702179, "learning_rate": 1.4315037348164862e-07, "loss": 17.8067, "step": 50656 }, { "epoch": 0.925969254391577, "grad_norm": 5.40536603014942, "learning_rate": 1.430800579318853e-07, "loss": 17.2253, "step": 50657 }, { "epoch": 0.9259875335880234, "grad_norm": 8.217489512486717, "learning_rate": 1.430097594052049e-07, "loss": 18.1525, "step": 50658 }, { "epoch": 0.92600581278447, "grad_norm": 5.029958347744881, "learning_rate": 1.4293947790185336e-07, "loss": 16.8978, "step": 50659 }, { "epoch": 0.9260240919809165, "grad_norm": 7.546339934390729, "learning_rate": 1.4286921342207828e-07, "loss": 17.5899, "step": 50660 }, { "epoch": 0.926042371177363, "grad_norm": 4.743943583933941, "learning_rate": 1.4279896596612497e-07, "loss": 16.7154, "step": 50661 }, { "epoch": 0.9260606503738096, "grad_norm": 5.573949543069289, "learning_rate": 1.4272873553423882e-07, "loss": 17.133, "step": 50662 }, { "epoch": 0.9260789295702561, "grad_norm": 5.551942594614527, "learning_rate": 1.4265852212666797e-07, "loss": 17.0733, "step": 50663 }, { "epoch": 0.9260972087667027, "grad_norm": 7.535594537337889, "learning_rate": 1.4258832574365778e-07, "loss": 18.0057, "step": 50664 }, { "epoch": 0.9261154879631491, "grad_norm": 6.234968871408941, "learning_rate": 1.4251814638545302e-07, "loss": 17.3815, "step": 50665 }, { "epoch": 0.9261337671595956, "grad_norm": 6.989941299192601, "learning_rate": 1.4244798405230076e-07, "loss": 17.3796, "step": 50666 }, { "epoch": 0.9261520463560422, "grad_norm": 5.328014075060277, "learning_rate": 1.4237783874444744e-07, "loss": 16.8856, "step": 50667 }, { "epoch": 0.9261703255524887, "grad_norm": 5.3590612057681755, "learning_rate": 1.4230771046213732e-07, "loss": 17.1608, "step": 50668 }, { "epoch": 0.9261886047489353, "grad_norm": 6.757881993095975, "learning_rate": 1.4223759920561797e-07, "loss": 17.6568, "step": 50669 }, { "epoch": 0.9262068839453818, "grad_norm": 5.326174849438642, "learning_rate": 1.4216750497513366e-07, "loss": 16.9482, "step": 50670 }, { "epoch": 0.9262251631418282, "grad_norm": 6.707569049572955, "learning_rate": 1.420974277709314e-07, "loss": 18.3336, "step": 50671 }, { "epoch": 0.9262434423382748, "grad_norm": 7.189261635409199, "learning_rate": 1.4202736759325596e-07, "loss": 17.9042, "step": 50672 }, { "epoch": 0.9262617215347213, "grad_norm": 5.032974444890456, "learning_rate": 1.4195732444235278e-07, "loss": 16.8824, "step": 50673 }, { "epoch": 0.9262800007311679, "grad_norm": 5.461582533727725, "learning_rate": 1.418872983184677e-07, "loss": 17.1407, "step": 50674 }, { "epoch": 0.9262982799276144, "grad_norm": 6.601426898839223, "learning_rate": 1.4181728922184613e-07, "loss": 17.4997, "step": 50675 }, { "epoch": 0.9263165591240609, "grad_norm": 4.926587360889383, "learning_rate": 1.4174729715273282e-07, "loss": 16.7892, "step": 50676 }, { "epoch": 0.9263348383205074, "grad_norm": 6.246883899481906, "learning_rate": 1.4167732211137486e-07, "loss": 17.3224, "step": 50677 }, { "epoch": 0.9263531175169539, "grad_norm": 5.7283128101527385, "learning_rate": 1.4160736409801535e-07, "loss": 17.1806, "step": 50678 }, { "epoch": 0.9263713967134005, "grad_norm": 5.168437320232783, "learning_rate": 1.4153742311290076e-07, "loss": 16.9064, "step": 50679 }, { "epoch": 0.926389675909847, "grad_norm": 6.166784930833705, "learning_rate": 1.414674991562759e-07, "loss": 17.3679, "step": 50680 }, { "epoch": 0.9264079551062935, "grad_norm": 6.479474783176076, "learning_rate": 1.413975922283861e-07, "loss": 17.654, "step": 50681 }, { "epoch": 0.9264262343027401, "grad_norm": 6.2256945230514695, "learning_rate": 1.4132770232947568e-07, "loss": 17.4045, "step": 50682 }, { "epoch": 0.9264445134991866, "grad_norm": 5.560759824189018, "learning_rate": 1.412578294597905e-07, "loss": 16.94, "step": 50683 }, { "epoch": 0.9264627926956331, "grad_norm": 7.151488783652446, "learning_rate": 1.4118797361957425e-07, "loss": 17.5424, "step": 50684 }, { "epoch": 0.9264810718920796, "grad_norm": 6.285329656737051, "learning_rate": 1.411181348090729e-07, "loss": 17.2892, "step": 50685 }, { "epoch": 0.9264993510885261, "grad_norm": 9.377095377074628, "learning_rate": 1.4104831302853117e-07, "loss": 18.2651, "step": 50686 }, { "epoch": 0.9265176302849727, "grad_norm": 5.515434184878584, "learning_rate": 1.4097850827819283e-07, "loss": 16.8222, "step": 50687 }, { "epoch": 0.9265359094814192, "grad_norm": 6.66776943768974, "learning_rate": 1.4090872055830374e-07, "loss": 17.3758, "step": 50688 }, { "epoch": 0.9265541886778658, "grad_norm": 6.489368362723503, "learning_rate": 1.4083894986910817e-07, "loss": 17.4221, "step": 50689 }, { "epoch": 0.9265724678743122, "grad_norm": 6.67320790153335, "learning_rate": 1.4076919621084927e-07, "loss": 17.1347, "step": 50690 }, { "epoch": 0.9265907470707587, "grad_norm": 6.696112250522597, "learning_rate": 1.4069945958377408e-07, "loss": 17.2088, "step": 50691 }, { "epoch": 0.9266090262672053, "grad_norm": 8.010259992892875, "learning_rate": 1.4062973998812512e-07, "loss": 18.0323, "step": 50692 }, { "epoch": 0.9266273054636518, "grad_norm": 8.337248430373709, "learning_rate": 1.405600374241467e-07, "loss": 18.0389, "step": 50693 }, { "epoch": 0.9266455846600984, "grad_norm": 5.295314558590388, "learning_rate": 1.404903518920836e-07, "loss": 17.0183, "step": 50694 }, { "epoch": 0.9266638638565449, "grad_norm": 6.4397566632997005, "learning_rate": 1.4042068339218118e-07, "loss": 17.1628, "step": 50695 }, { "epoch": 0.9266821430529913, "grad_norm": 8.546001285202166, "learning_rate": 1.4035103192468148e-07, "loss": 17.5513, "step": 50696 }, { "epoch": 0.9267004222494379, "grad_norm": 7.465772316471967, "learning_rate": 1.4028139748983039e-07, "loss": 17.5658, "step": 50697 }, { "epoch": 0.9267187014458844, "grad_norm": 6.270607216057947, "learning_rate": 1.4021178008787107e-07, "loss": 17.2909, "step": 50698 }, { "epoch": 0.926736980642331, "grad_norm": 5.827896139076598, "learning_rate": 1.4014217971904775e-07, "loss": 17.3458, "step": 50699 }, { "epoch": 0.9267552598387775, "grad_norm": 5.6385878953305015, "learning_rate": 1.400725963836047e-07, "loss": 17.221, "step": 50700 }, { "epoch": 0.926773539035224, "grad_norm": 5.460920057694933, "learning_rate": 1.4000303008178507e-07, "loss": 17.0429, "step": 50701 }, { "epoch": 0.9267918182316706, "grad_norm": 5.574705072547213, "learning_rate": 1.3993348081383307e-07, "loss": 17.0709, "step": 50702 }, { "epoch": 0.926810097428117, "grad_norm": 6.3145363220098405, "learning_rate": 1.3986394857999243e-07, "loss": 17.2059, "step": 50703 }, { "epoch": 0.9268283766245636, "grad_norm": 6.834174960616879, "learning_rate": 1.397944333805068e-07, "loss": 17.4938, "step": 50704 }, { "epoch": 0.9268466558210101, "grad_norm": 6.332412548409045, "learning_rate": 1.3972493521562046e-07, "loss": 17.197, "step": 50705 }, { "epoch": 0.9268649350174566, "grad_norm": 6.070172885040346, "learning_rate": 1.3965545408557658e-07, "loss": 17.0664, "step": 50706 }, { "epoch": 0.9268832142139032, "grad_norm": 5.426922689593583, "learning_rate": 1.395859899906177e-07, "loss": 17.1026, "step": 50707 }, { "epoch": 0.9269014934103497, "grad_norm": 5.894901456001656, "learning_rate": 1.3951654293098916e-07, "loss": 17.095, "step": 50708 }, { "epoch": 0.9269197726067963, "grad_norm": 5.875841554133486, "learning_rate": 1.3944711290693303e-07, "loss": 17.3973, "step": 50709 }, { "epoch": 0.9269380518032427, "grad_norm": 6.447258981262058, "learning_rate": 1.3937769991869242e-07, "loss": 17.4461, "step": 50710 }, { "epoch": 0.9269563309996892, "grad_norm": 6.1093257066261275, "learning_rate": 1.3930830396651164e-07, "loss": 17.1656, "step": 50711 }, { "epoch": 0.9269746101961358, "grad_norm": 6.9994531368802635, "learning_rate": 1.3923892505063318e-07, "loss": 17.7306, "step": 50712 }, { "epoch": 0.9269928893925823, "grad_norm": 6.992129379168356, "learning_rate": 1.3916956317130082e-07, "loss": 17.7657, "step": 50713 }, { "epoch": 0.9270111685890289, "grad_norm": 6.440086085277243, "learning_rate": 1.391002183287571e-07, "loss": 17.4238, "step": 50714 }, { "epoch": 0.9270294477854754, "grad_norm": 6.040051328993896, "learning_rate": 1.390308905232457e-07, "loss": 17.5345, "step": 50715 }, { "epoch": 0.9270477269819218, "grad_norm": 5.6919152874182, "learning_rate": 1.3896157975500867e-07, "loss": 17.1204, "step": 50716 }, { "epoch": 0.9270660061783684, "grad_norm": 6.322682695409084, "learning_rate": 1.388922860242903e-07, "loss": 17.3489, "step": 50717 }, { "epoch": 0.9270842853748149, "grad_norm": 6.217190723456026, "learning_rate": 1.3882300933133142e-07, "loss": 17.6281, "step": 50718 }, { "epoch": 0.9271025645712615, "grad_norm": 8.478496193986933, "learning_rate": 1.3875374967637745e-07, "loss": 18.1378, "step": 50719 }, { "epoch": 0.927120843767708, "grad_norm": 6.715330303967345, "learning_rate": 1.3868450705966875e-07, "loss": 17.4351, "step": 50720 }, { "epoch": 0.9271391229641545, "grad_norm": 7.096435517404489, "learning_rate": 1.3861528148144954e-07, "loss": 17.5319, "step": 50721 }, { "epoch": 0.927157402160601, "grad_norm": 7.055927776050345, "learning_rate": 1.3854607294196133e-07, "loss": 17.5516, "step": 50722 }, { "epoch": 0.9271756813570475, "grad_norm": 7.410800159058492, "learning_rate": 1.3847688144144779e-07, "loss": 18.09, "step": 50723 }, { "epoch": 0.9271939605534941, "grad_norm": 7.35222362893952, "learning_rate": 1.384077069801504e-07, "loss": 17.9293, "step": 50724 }, { "epoch": 0.9272122397499406, "grad_norm": 5.450607172053171, "learning_rate": 1.3833854955831283e-07, "loss": 16.8903, "step": 50725 }, { "epoch": 0.9272305189463871, "grad_norm": 5.822969490253728, "learning_rate": 1.3826940917617659e-07, "loss": 16.9523, "step": 50726 }, { "epoch": 0.9272487981428337, "grad_norm": 5.331249386044681, "learning_rate": 1.382002858339837e-07, "loss": 17.0666, "step": 50727 }, { "epoch": 0.9272670773392802, "grad_norm": 5.001480610257457, "learning_rate": 1.381311795319773e-07, "loss": 16.9705, "step": 50728 }, { "epoch": 0.9272853565357266, "grad_norm": 7.085229409557348, "learning_rate": 1.3806209027039885e-07, "loss": 17.4075, "step": 50729 }, { "epoch": 0.9273036357321732, "grad_norm": 6.096712969199722, "learning_rate": 1.3799301804949094e-07, "loss": 17.2667, "step": 50730 }, { "epoch": 0.9273219149286197, "grad_norm": 6.4824050505682695, "learning_rate": 1.3792396286949616e-07, "loss": 17.3575, "step": 50731 }, { "epoch": 0.9273401941250663, "grad_norm": 5.689511732480847, "learning_rate": 1.3785492473065544e-07, "loss": 17.0752, "step": 50732 }, { "epoch": 0.9273584733215128, "grad_norm": 8.348702597020479, "learning_rate": 1.3778590363321187e-07, "loss": 17.9995, "step": 50733 }, { "epoch": 0.9273767525179593, "grad_norm": 5.8837871509385735, "learning_rate": 1.3771689957740642e-07, "loss": 17.2126, "step": 50734 }, { "epoch": 0.9273950317144058, "grad_norm": 7.363746468729521, "learning_rate": 1.3764791256348109e-07, "loss": 17.5275, "step": 50735 }, { "epoch": 0.9274133109108523, "grad_norm": 5.382864358816247, "learning_rate": 1.3757894259167847e-07, "loss": 16.8801, "step": 50736 }, { "epoch": 0.9274315901072989, "grad_norm": 6.308170817617243, "learning_rate": 1.375099896622395e-07, "loss": 17.0447, "step": 50737 }, { "epoch": 0.9274498693037454, "grad_norm": 6.6583934237269835, "learning_rate": 1.3744105377540562e-07, "loss": 17.5704, "step": 50738 }, { "epoch": 0.9274681485001919, "grad_norm": 5.214538504262358, "learning_rate": 1.3737213493141887e-07, "loss": 17.0932, "step": 50739 }, { "epoch": 0.9274864276966385, "grad_norm": 5.739277581063524, "learning_rate": 1.3730323313052184e-07, "loss": 17.228, "step": 50740 }, { "epoch": 0.927504706893085, "grad_norm": 6.031489151218355, "learning_rate": 1.3723434837295378e-07, "loss": 17.1251, "step": 50741 }, { "epoch": 0.9275229860895315, "grad_norm": 8.235326942128943, "learning_rate": 1.371654806589584e-07, "loss": 17.629, "step": 50742 }, { "epoch": 0.927541265285978, "grad_norm": 5.14411578620711, "learning_rate": 1.370966299887755e-07, "loss": 17.035, "step": 50743 }, { "epoch": 0.9275595444824245, "grad_norm": 6.663004043830699, "learning_rate": 1.3702779636264652e-07, "loss": 17.4923, "step": 50744 }, { "epoch": 0.9275778236788711, "grad_norm": 4.420845114511882, "learning_rate": 1.3695897978081406e-07, "loss": 16.7023, "step": 50745 }, { "epoch": 0.9275961028753176, "grad_norm": 6.234099507527401, "learning_rate": 1.3689018024351797e-07, "loss": 17.2393, "step": 50746 }, { "epoch": 0.9276143820717642, "grad_norm": 6.275505440842751, "learning_rate": 1.3682139775099968e-07, "loss": 17.3182, "step": 50747 }, { "epoch": 0.9276326612682106, "grad_norm": 6.781874038808148, "learning_rate": 1.367526323035001e-07, "loss": 17.388, "step": 50748 }, { "epoch": 0.9276509404646571, "grad_norm": 6.119229766928559, "learning_rate": 1.3668388390126074e-07, "loss": 17.3549, "step": 50749 }, { "epoch": 0.9276692196611037, "grad_norm": 6.1091236324849705, "learning_rate": 1.3661515254452252e-07, "loss": 17.408, "step": 50750 }, { "epoch": 0.9276874988575502, "grad_norm": 6.140749308725072, "learning_rate": 1.365464382335263e-07, "loss": 17.7056, "step": 50751 }, { "epoch": 0.9277057780539968, "grad_norm": 6.863600178366301, "learning_rate": 1.3647774096851253e-07, "loss": 17.4731, "step": 50752 }, { "epoch": 0.9277240572504433, "grad_norm": 6.005908079672885, "learning_rate": 1.364090607497226e-07, "loss": 17.5287, "step": 50753 }, { "epoch": 0.9277423364468897, "grad_norm": 6.41273160630879, "learning_rate": 1.3634039757739637e-07, "loss": 17.5328, "step": 50754 }, { "epoch": 0.9277606156433363, "grad_norm": 5.252884308490737, "learning_rate": 1.3627175145177528e-07, "loss": 16.941, "step": 50755 }, { "epoch": 0.9277788948397828, "grad_norm": 6.0671281529369265, "learning_rate": 1.3620312237309973e-07, "loss": 17.0499, "step": 50756 }, { "epoch": 0.9277971740362294, "grad_norm": 6.261718709792555, "learning_rate": 1.3613451034160952e-07, "loss": 17.4202, "step": 50757 }, { "epoch": 0.9278154532326759, "grad_norm": 5.179474276046006, "learning_rate": 1.3606591535754609e-07, "loss": 17.0031, "step": 50758 }, { "epoch": 0.9278337324291224, "grad_norm": 5.641784237325891, "learning_rate": 1.3599733742114984e-07, "loss": 16.9869, "step": 50759 }, { "epoch": 0.927852011625569, "grad_norm": 6.270374137296507, "learning_rate": 1.3592877653266112e-07, "loss": 17.0428, "step": 50760 }, { "epoch": 0.9278702908220154, "grad_norm": 5.845701589418693, "learning_rate": 1.3586023269231917e-07, "loss": 17.0949, "step": 50761 }, { "epoch": 0.927888570018462, "grad_norm": 5.640802923433127, "learning_rate": 1.3579170590036605e-07, "loss": 17.2591, "step": 50762 }, { "epoch": 0.9279068492149085, "grad_norm": 14.880603647673366, "learning_rate": 1.3572319615703988e-07, "loss": 17.3744, "step": 50763 }, { "epoch": 0.927925128411355, "grad_norm": 6.308429930371642, "learning_rate": 1.3565470346258215e-07, "loss": 17.3905, "step": 50764 }, { "epoch": 0.9279434076078016, "grad_norm": 6.8903139197894685, "learning_rate": 1.3558622781723317e-07, "loss": 17.4178, "step": 50765 }, { "epoch": 0.9279616868042481, "grad_norm": 9.047806013189069, "learning_rate": 1.3551776922123115e-07, "loss": 18.6615, "step": 50766 }, { "epoch": 0.9279799660006947, "grad_norm": 6.59800069310425, "learning_rate": 1.3544932767481755e-07, "loss": 17.3156, "step": 50767 }, { "epoch": 0.9279982451971411, "grad_norm": 5.171504275811132, "learning_rate": 1.353809031782327e-07, "loss": 16.8437, "step": 50768 }, { "epoch": 0.9280165243935876, "grad_norm": 5.3872774150763, "learning_rate": 1.353124957317148e-07, "loss": 17.2028, "step": 50769 }, { "epoch": 0.9280348035900342, "grad_norm": 7.024054515514678, "learning_rate": 1.3524410533550525e-07, "loss": 18.0957, "step": 50770 }, { "epoch": 0.9280530827864807, "grad_norm": 8.179692832927708, "learning_rate": 1.351757319898428e-07, "loss": 17.9203, "step": 50771 }, { "epoch": 0.9280713619829273, "grad_norm": 5.641814364794504, "learning_rate": 1.351073756949667e-07, "loss": 17.2731, "step": 50772 }, { "epoch": 0.9280896411793738, "grad_norm": 6.0951654144812695, "learning_rate": 1.3503903645111783e-07, "loss": 17.1415, "step": 50773 }, { "epoch": 0.9281079203758202, "grad_norm": 6.906740327003558, "learning_rate": 1.3497071425853435e-07, "loss": 17.6097, "step": 50774 }, { "epoch": 0.9281261995722668, "grad_norm": 6.47019126232158, "learning_rate": 1.3490240911745666e-07, "loss": 17.5085, "step": 50775 }, { "epoch": 0.9281444787687133, "grad_norm": 5.935774766751182, "learning_rate": 1.3483412102812344e-07, "loss": 17.2669, "step": 50776 }, { "epoch": 0.9281627579651599, "grad_norm": 7.052243494468708, "learning_rate": 1.3476584999077503e-07, "loss": 17.8154, "step": 50777 }, { "epoch": 0.9281810371616064, "grad_norm": 5.262460647906271, "learning_rate": 1.3469759600564957e-07, "loss": 16.8657, "step": 50778 }, { "epoch": 0.9281993163580529, "grad_norm": 5.570539725517718, "learning_rate": 1.3462935907298747e-07, "loss": 17.1227, "step": 50779 }, { "epoch": 0.9282175955544995, "grad_norm": 6.221594653496464, "learning_rate": 1.3456113919302683e-07, "loss": 17.4015, "step": 50780 }, { "epoch": 0.9282358747509459, "grad_norm": 4.786047635498939, "learning_rate": 1.3449293636600745e-07, "loss": 16.6775, "step": 50781 }, { "epoch": 0.9282541539473925, "grad_norm": 7.6585206802171015, "learning_rate": 1.3442475059216863e-07, "loss": 17.3317, "step": 50782 }, { "epoch": 0.928272433143839, "grad_norm": 6.632305907743372, "learning_rate": 1.3435658187174794e-07, "loss": 17.5535, "step": 50783 }, { "epoch": 0.9282907123402855, "grad_norm": 6.736989103927726, "learning_rate": 1.3428843020498517e-07, "loss": 17.6376, "step": 50784 }, { "epoch": 0.9283089915367321, "grad_norm": 4.918862926296685, "learning_rate": 1.3422029559211958e-07, "loss": 16.8013, "step": 50785 }, { "epoch": 0.9283272707331786, "grad_norm": 4.610936479852241, "learning_rate": 1.341521780333893e-07, "loss": 16.8347, "step": 50786 }, { "epoch": 0.9283455499296251, "grad_norm": 5.164627015680622, "learning_rate": 1.3408407752903363e-07, "loss": 16.7769, "step": 50787 }, { "epoch": 0.9283638291260716, "grad_norm": 5.944135785905737, "learning_rate": 1.3401599407929177e-07, "loss": 17.4646, "step": 50788 }, { "epoch": 0.9283821083225181, "grad_norm": 6.055482317941161, "learning_rate": 1.3394792768440024e-07, "loss": 17.111, "step": 50789 }, { "epoch": 0.9284003875189647, "grad_norm": 5.169967917037872, "learning_rate": 1.3387987834459994e-07, "loss": 17.1193, "step": 50790 }, { "epoch": 0.9284186667154112, "grad_norm": 6.727754193517134, "learning_rate": 1.3381184606012844e-07, "loss": 17.4661, "step": 50791 }, { "epoch": 0.9284369459118578, "grad_norm": 6.442013238941061, "learning_rate": 1.3374383083122334e-07, "loss": 17.5131, "step": 50792 }, { "epoch": 0.9284552251083042, "grad_norm": 5.251209452404499, "learning_rate": 1.3367583265812389e-07, "loss": 17.1028, "step": 50793 }, { "epoch": 0.9284735043047507, "grad_norm": 5.276968638470748, "learning_rate": 1.3360785154106882e-07, "loss": 16.7799, "step": 50794 }, { "epoch": 0.9284917835011973, "grad_norm": 8.184328504234855, "learning_rate": 1.3353988748029622e-07, "loss": 17.8294, "step": 50795 }, { "epoch": 0.9285100626976438, "grad_norm": 5.87220463821579, "learning_rate": 1.3347194047604373e-07, "loss": 17.1434, "step": 50796 }, { "epoch": 0.9285283418940903, "grad_norm": 6.078973428885282, "learning_rate": 1.3340401052854946e-07, "loss": 17.5878, "step": 50797 }, { "epoch": 0.9285466210905369, "grad_norm": 4.512734891348521, "learning_rate": 1.3333609763805267e-07, "loss": 16.7683, "step": 50798 }, { "epoch": 0.9285649002869834, "grad_norm": 5.784320012177785, "learning_rate": 1.3326820180478984e-07, "loss": 17.0807, "step": 50799 }, { "epoch": 0.9285831794834299, "grad_norm": 6.128205299520564, "learning_rate": 1.3320032302899966e-07, "loss": 17.2928, "step": 50800 }, { "epoch": 0.9286014586798764, "grad_norm": 7.163701185853322, "learning_rate": 1.3313246131092029e-07, "loss": 17.3678, "step": 50801 }, { "epoch": 0.9286197378763229, "grad_norm": 5.532713158597842, "learning_rate": 1.330646166507893e-07, "loss": 16.9639, "step": 50802 }, { "epoch": 0.9286380170727695, "grad_norm": 5.751908623091687, "learning_rate": 1.3299678904884372e-07, "loss": 17.0507, "step": 50803 }, { "epoch": 0.928656296269216, "grad_norm": 8.461657143426502, "learning_rate": 1.3292897850532338e-07, "loss": 17.3394, "step": 50804 }, { "epoch": 0.9286745754656626, "grad_norm": 5.294362701086225, "learning_rate": 1.3286118502046418e-07, "loss": 17.1753, "step": 50805 }, { "epoch": 0.928692854662109, "grad_norm": 5.994623904551692, "learning_rate": 1.3279340859450375e-07, "loss": 17.2205, "step": 50806 }, { "epoch": 0.9287111338585555, "grad_norm": 6.026658510967335, "learning_rate": 1.3272564922768017e-07, "loss": 17.347, "step": 50807 }, { "epoch": 0.9287294130550021, "grad_norm": 6.532035942730296, "learning_rate": 1.3265790692023107e-07, "loss": 17.4629, "step": 50808 }, { "epoch": 0.9287476922514486, "grad_norm": 5.931583580752895, "learning_rate": 1.3259018167239346e-07, "loss": 17.1066, "step": 50809 }, { "epoch": 0.9287659714478952, "grad_norm": 5.465048830878035, "learning_rate": 1.3252247348440496e-07, "loss": 17.4097, "step": 50810 }, { "epoch": 0.9287842506443417, "grad_norm": 4.618818464611052, "learning_rate": 1.3245478235650254e-07, "loss": 16.8044, "step": 50811 }, { "epoch": 0.9288025298407881, "grad_norm": 6.5521913859416845, "learning_rate": 1.3238710828892387e-07, "loss": 17.0298, "step": 50812 }, { "epoch": 0.9288208090372347, "grad_norm": 5.51349759009256, "learning_rate": 1.323194512819065e-07, "loss": 17.1663, "step": 50813 }, { "epoch": 0.9288390882336812, "grad_norm": 6.326514660320865, "learning_rate": 1.3225181133568632e-07, "loss": 17.5244, "step": 50814 }, { "epoch": 0.9288573674301278, "grad_norm": 5.3102588498234145, "learning_rate": 1.3218418845050152e-07, "loss": 17.0577, "step": 50815 }, { "epoch": 0.9288756466265743, "grad_norm": 5.778074356624356, "learning_rate": 1.321165826265891e-07, "loss": 17.1688, "step": 50816 }, { "epoch": 0.9288939258230208, "grad_norm": 5.063466918775945, "learning_rate": 1.3204899386418501e-07, "loss": 16.9412, "step": 50817 }, { "epoch": 0.9289122050194674, "grad_norm": 5.449351228082946, "learning_rate": 1.3198142216352738e-07, "loss": 16.9567, "step": 50818 }, { "epoch": 0.9289304842159138, "grad_norm": 5.918054377975563, "learning_rate": 1.3191386752485213e-07, "loss": 17.258, "step": 50819 }, { "epoch": 0.9289487634123604, "grad_norm": 6.466758008584295, "learning_rate": 1.3184632994839574e-07, "loss": 17.5891, "step": 50820 }, { "epoch": 0.9289670426088069, "grad_norm": 6.917982188902907, "learning_rate": 1.317788094343958e-07, "loss": 17.3522, "step": 50821 }, { "epoch": 0.9289853218052534, "grad_norm": 6.206573572553664, "learning_rate": 1.3171130598308934e-07, "loss": 17.19, "step": 50822 }, { "epoch": 0.9290036010017, "grad_norm": 8.861224456889799, "learning_rate": 1.3164381959471172e-07, "loss": 18.2916, "step": 50823 }, { "epoch": 0.9290218801981465, "grad_norm": 6.781496800110315, "learning_rate": 1.3157635026950056e-07, "loss": 17.5349, "step": 50824 }, { "epoch": 0.9290401593945931, "grad_norm": 5.074059257231904, "learning_rate": 1.3150889800769118e-07, "loss": 16.8149, "step": 50825 }, { "epoch": 0.9290584385910395, "grad_norm": 6.644474772547225, "learning_rate": 1.314414628095212e-07, "loss": 17.4268, "step": 50826 }, { "epoch": 0.929076717787486, "grad_norm": 8.806131513388863, "learning_rate": 1.3137404467522597e-07, "loss": 17.8417, "step": 50827 }, { "epoch": 0.9290949969839326, "grad_norm": 5.995602442632303, "learning_rate": 1.3130664360504252e-07, "loss": 17.3496, "step": 50828 }, { "epoch": 0.9291132761803791, "grad_norm": 8.601797851433947, "learning_rate": 1.312392595992068e-07, "loss": 18.4394, "step": 50829 }, { "epoch": 0.9291315553768257, "grad_norm": 7.066291763891818, "learning_rate": 1.3117189265795472e-07, "loss": 17.7878, "step": 50830 }, { "epoch": 0.9291498345732722, "grad_norm": 6.863912878777639, "learning_rate": 1.3110454278152275e-07, "loss": 17.4224, "step": 50831 }, { "epoch": 0.9291681137697186, "grad_norm": 6.607081605294036, "learning_rate": 1.310372099701468e-07, "loss": 17.3499, "step": 50832 }, { "epoch": 0.9291863929661652, "grad_norm": 6.188799048391179, "learning_rate": 1.3096989422406338e-07, "loss": 17.4444, "step": 50833 }, { "epoch": 0.9292046721626117, "grad_norm": 5.550242104492821, "learning_rate": 1.3090259554350727e-07, "loss": 17.1237, "step": 50834 }, { "epoch": 0.9292229513590583, "grad_norm": 5.208897095030541, "learning_rate": 1.3083531392871606e-07, "loss": 16.9232, "step": 50835 }, { "epoch": 0.9292412305555048, "grad_norm": 6.0695059368739095, "learning_rate": 1.3076804937992404e-07, "loss": 17.0754, "step": 50836 }, { "epoch": 0.9292595097519513, "grad_norm": 6.678988978182227, "learning_rate": 1.307008018973671e-07, "loss": 17.1638, "step": 50837 }, { "epoch": 0.9292777889483979, "grad_norm": 6.308456864890523, "learning_rate": 1.3063357148128175e-07, "loss": 17.3013, "step": 50838 }, { "epoch": 0.9292960681448443, "grad_norm": 4.700066436246591, "learning_rate": 1.3056635813190276e-07, "loss": 16.7446, "step": 50839 }, { "epoch": 0.9293143473412909, "grad_norm": 4.673654970023628, "learning_rate": 1.304991618494661e-07, "loss": 16.6678, "step": 50840 }, { "epoch": 0.9293326265377374, "grad_norm": 4.848007570995532, "learning_rate": 1.3043198263420763e-07, "loss": 16.7353, "step": 50841 }, { "epoch": 0.9293509057341839, "grad_norm": 5.078904851941415, "learning_rate": 1.3036482048636224e-07, "loss": 16.9489, "step": 50842 }, { "epoch": 0.9293691849306305, "grad_norm": 5.7390962760830515, "learning_rate": 1.3029767540616633e-07, "loss": 17.0668, "step": 50843 }, { "epoch": 0.929387464127077, "grad_norm": 5.7594774893890754, "learning_rate": 1.3023054739385422e-07, "loss": 17.2894, "step": 50844 }, { "epoch": 0.9294057433235235, "grad_norm": 7.045330199026275, "learning_rate": 1.3016343644966067e-07, "loss": 17.6074, "step": 50845 }, { "epoch": 0.92942402251997, "grad_norm": 6.436389044314094, "learning_rate": 1.3009634257382276e-07, "loss": 17.5007, "step": 50846 }, { "epoch": 0.9294423017164165, "grad_norm": 6.667167483108239, "learning_rate": 1.300292657665736e-07, "loss": 17.7655, "step": 50847 }, { "epoch": 0.9294605809128631, "grad_norm": 8.100868540971824, "learning_rate": 1.2996220602814967e-07, "loss": 18.0601, "step": 50848 }, { "epoch": 0.9294788601093096, "grad_norm": 5.618866448950544, "learning_rate": 1.2989516335878581e-07, "loss": 17.1859, "step": 50849 }, { "epoch": 0.9294971393057562, "grad_norm": 6.995394508421844, "learning_rate": 1.2982813775871683e-07, "loss": 17.3555, "step": 50850 }, { "epoch": 0.9295154185022027, "grad_norm": 8.804070538086537, "learning_rate": 1.2976112922817698e-07, "loss": 17.9726, "step": 50851 }, { "epoch": 0.9295336976986491, "grad_norm": 4.948082800755381, "learning_rate": 1.2969413776740268e-07, "loss": 16.8837, "step": 50852 }, { "epoch": 0.9295519768950957, "grad_norm": 5.996603521292495, "learning_rate": 1.2962716337662717e-07, "loss": 17.4992, "step": 50853 }, { "epoch": 0.9295702560915422, "grad_norm": 4.526196092488598, "learning_rate": 1.2956020605608577e-07, "loss": 16.7063, "step": 50854 }, { "epoch": 0.9295885352879888, "grad_norm": 6.2546910557763695, "learning_rate": 1.2949326580601386e-07, "loss": 17.3194, "step": 50855 }, { "epoch": 0.9296068144844353, "grad_norm": 5.631144500163488, "learning_rate": 1.2942634262664456e-07, "loss": 16.9306, "step": 50856 }, { "epoch": 0.9296250936808818, "grad_norm": 7.584313594569136, "learning_rate": 1.2935943651821326e-07, "loss": 17.3223, "step": 50857 }, { "epoch": 0.9296433728773283, "grad_norm": 6.418204502245346, "learning_rate": 1.2929254748095533e-07, "loss": 17.4289, "step": 50858 }, { "epoch": 0.9296616520737748, "grad_norm": 6.833902474844433, "learning_rate": 1.2922567551510335e-07, "loss": 17.7048, "step": 50859 }, { "epoch": 0.9296799312702214, "grad_norm": 6.299160950420573, "learning_rate": 1.291588206208938e-07, "loss": 17.4163, "step": 50860 }, { "epoch": 0.9296982104666679, "grad_norm": 5.719979524861554, "learning_rate": 1.2909198279855984e-07, "loss": 17.0334, "step": 50861 }, { "epoch": 0.9297164896631144, "grad_norm": 5.7047563161220545, "learning_rate": 1.2902516204833515e-07, "loss": 17.2496, "step": 50862 }, { "epoch": 0.929734768859561, "grad_norm": 5.680877793585051, "learning_rate": 1.2895835837045456e-07, "loss": 17.0803, "step": 50863 }, { "epoch": 0.9297530480560074, "grad_norm": 4.577725952935866, "learning_rate": 1.2889157176515289e-07, "loss": 16.6715, "step": 50864 }, { "epoch": 0.9297713272524539, "grad_norm": 6.83245153728904, "learning_rate": 1.288248022326627e-07, "loss": 17.2848, "step": 50865 }, { "epoch": 0.9297896064489005, "grad_norm": 7.0201898058305945, "learning_rate": 1.2875804977321939e-07, "loss": 17.8277, "step": 50866 }, { "epoch": 0.929807885645347, "grad_norm": 5.549454376003858, "learning_rate": 1.2869131438705663e-07, "loss": 17.0509, "step": 50867 }, { "epoch": 0.9298261648417936, "grad_norm": 7.089344459393236, "learning_rate": 1.2862459607440758e-07, "loss": 17.8545, "step": 50868 }, { "epoch": 0.9298444440382401, "grad_norm": 5.525975980948813, "learning_rate": 1.2855789483550707e-07, "loss": 17.0845, "step": 50869 }, { "epoch": 0.9298627232346866, "grad_norm": 5.758120361122779, "learning_rate": 1.2849121067058768e-07, "loss": 17.2007, "step": 50870 }, { "epoch": 0.9298810024311331, "grad_norm": 6.100673458980826, "learning_rate": 1.2842454357988476e-07, "loss": 17.2957, "step": 50871 }, { "epoch": 0.9298992816275796, "grad_norm": 6.396433941077926, "learning_rate": 1.2835789356363094e-07, "loss": 17.5213, "step": 50872 }, { "epoch": 0.9299175608240262, "grad_norm": 5.2281011670268445, "learning_rate": 1.282912606220593e-07, "loss": 17.0082, "step": 50873 }, { "epoch": 0.9299358400204727, "grad_norm": 5.9160692073943935, "learning_rate": 1.2822464475540474e-07, "loss": 16.8585, "step": 50874 }, { "epoch": 0.9299541192169192, "grad_norm": 5.875488253690607, "learning_rate": 1.2815804596389925e-07, "loss": 16.925, "step": 50875 }, { "epoch": 0.9299723984133658, "grad_norm": 6.132136692501776, "learning_rate": 1.2809146424777762e-07, "loss": 17.1937, "step": 50876 }, { "epoch": 0.9299906776098122, "grad_norm": 7.322829165468901, "learning_rate": 1.280248996072725e-07, "loss": 17.4355, "step": 50877 }, { "epoch": 0.9300089568062588, "grad_norm": 6.387908745737934, "learning_rate": 1.2795835204261753e-07, "loss": 17.1647, "step": 50878 }, { "epoch": 0.9300272360027053, "grad_norm": 7.041227902971858, "learning_rate": 1.278918215540459e-07, "loss": 17.8366, "step": 50879 }, { "epoch": 0.9300455151991518, "grad_norm": 5.735171341077409, "learning_rate": 1.2782530814179017e-07, "loss": 16.9606, "step": 50880 }, { "epoch": 0.9300637943955984, "grad_norm": 7.431362244604598, "learning_rate": 1.2775881180608462e-07, "loss": 17.7651, "step": 50881 }, { "epoch": 0.9300820735920449, "grad_norm": 5.912933650836398, "learning_rate": 1.2769233254716075e-07, "loss": 17.4317, "step": 50882 }, { "epoch": 0.9301003527884915, "grad_norm": 7.240442467419721, "learning_rate": 1.2762587036525332e-07, "loss": 17.7956, "step": 50883 }, { "epoch": 0.9301186319849379, "grad_norm": 6.476726526126011, "learning_rate": 1.2755942526059383e-07, "loss": 17.2798, "step": 50884 }, { "epoch": 0.9301369111813844, "grad_norm": 4.711779187345893, "learning_rate": 1.2749299723341547e-07, "loss": 16.7777, "step": 50885 }, { "epoch": 0.930155190377831, "grad_norm": 4.9067701805775075, "learning_rate": 1.274265862839519e-07, "loss": 16.9218, "step": 50886 }, { "epoch": 0.9301734695742775, "grad_norm": 7.1204418679284744, "learning_rate": 1.2736019241243514e-07, "loss": 17.1625, "step": 50887 }, { "epoch": 0.9301917487707241, "grad_norm": 6.410760713226326, "learning_rate": 1.2729381561909836e-07, "loss": 17.4267, "step": 50888 }, { "epoch": 0.9302100279671706, "grad_norm": 5.542032577715432, "learning_rate": 1.2722745590417362e-07, "loss": 17.253, "step": 50889 }, { "epoch": 0.930228307163617, "grad_norm": 5.871206985768264, "learning_rate": 1.2716111326789404e-07, "loss": 17.0634, "step": 50890 }, { "epoch": 0.9302465863600636, "grad_norm": 7.644271207040181, "learning_rate": 1.2709478771049167e-07, "loss": 17.098, "step": 50891 }, { "epoch": 0.9302648655565101, "grad_norm": 7.21055973863538, "learning_rate": 1.2702847923219964e-07, "loss": 17.517, "step": 50892 }, { "epoch": 0.9302831447529567, "grad_norm": 6.736083411718857, "learning_rate": 1.2696218783324943e-07, "loss": 17.5834, "step": 50893 }, { "epoch": 0.9303014239494032, "grad_norm": 6.59008280918027, "learning_rate": 1.2689591351387364e-07, "loss": 17.4605, "step": 50894 }, { "epoch": 0.9303197031458497, "grad_norm": 8.505263146136546, "learning_rate": 1.2682965627430543e-07, "loss": 17.3085, "step": 50895 }, { "epoch": 0.9303379823422963, "grad_norm": 5.232016250893199, "learning_rate": 1.2676341611477628e-07, "loss": 17.0292, "step": 50896 }, { "epoch": 0.9303562615387427, "grad_norm": 5.517929573950458, "learning_rate": 1.266971930355182e-07, "loss": 17.3397, "step": 50897 }, { "epoch": 0.9303745407351893, "grad_norm": 6.631284618384754, "learning_rate": 1.2663098703676435e-07, "loss": 17.4735, "step": 50898 }, { "epoch": 0.9303928199316358, "grad_norm": 4.764400727842216, "learning_rate": 1.265647981187451e-07, "loss": 16.9127, "step": 50899 }, { "epoch": 0.9304110991280823, "grad_norm": 6.3998889084334145, "learning_rate": 1.2649862628169364e-07, "loss": 17.1531, "step": 50900 }, { "epoch": 0.9304293783245289, "grad_norm": 6.1157747545515635, "learning_rate": 1.2643247152584138e-07, "loss": 17.0661, "step": 50901 }, { "epoch": 0.9304476575209754, "grad_norm": 6.32848208311207, "learning_rate": 1.2636633385142095e-07, "loss": 17.6685, "step": 50902 }, { "epoch": 0.930465936717422, "grad_norm": 6.332790869690192, "learning_rate": 1.263002132586627e-07, "loss": 17.4548, "step": 50903 }, { "epoch": 0.9304842159138684, "grad_norm": 13.02937231315675, "learning_rate": 1.2623410974779982e-07, "loss": 17.5698, "step": 50904 }, { "epoch": 0.9305024951103149, "grad_norm": 4.991259681533266, "learning_rate": 1.2616802331906376e-07, "loss": 17.0377, "step": 50905 }, { "epoch": 0.9305207743067615, "grad_norm": 7.78204964643936, "learning_rate": 1.2610195397268599e-07, "loss": 17.6709, "step": 50906 }, { "epoch": 0.930539053503208, "grad_norm": 5.181574882374382, "learning_rate": 1.260359017088969e-07, "loss": 17.2234, "step": 50907 }, { "epoch": 0.9305573326996546, "grad_norm": 6.380844962324829, "learning_rate": 1.259698665279302e-07, "loss": 17.5519, "step": 50908 }, { "epoch": 0.930575611896101, "grad_norm": 6.931479635406445, "learning_rate": 1.259038484300157e-07, "loss": 17.5131, "step": 50909 }, { "epoch": 0.9305938910925475, "grad_norm": 4.747415160687303, "learning_rate": 1.2583784741538484e-07, "loss": 16.9066, "step": 50910 }, { "epoch": 0.9306121702889941, "grad_norm": 7.746829217693708, "learning_rate": 1.2577186348426973e-07, "loss": 18.2884, "step": 50911 }, { "epoch": 0.9306304494854406, "grad_norm": 6.172159511862673, "learning_rate": 1.2570589663690125e-07, "loss": 17.4371, "step": 50912 }, { "epoch": 0.9306487286818872, "grad_norm": 5.760479576920961, "learning_rate": 1.2563994687351034e-07, "loss": 17.1698, "step": 50913 }, { "epoch": 0.9306670078783337, "grad_norm": 5.626302508203929, "learning_rate": 1.25574014194329e-07, "loss": 17.2268, "step": 50914 }, { "epoch": 0.9306852870747802, "grad_norm": 6.0211222013776275, "learning_rate": 1.2550809859958768e-07, "loss": 17.1557, "step": 50915 }, { "epoch": 0.9307035662712267, "grad_norm": 6.488166085163834, "learning_rate": 1.2544220008951725e-07, "loss": 17.4505, "step": 50916 }, { "epoch": 0.9307218454676732, "grad_norm": 7.208466594384094, "learning_rate": 1.2537631866434918e-07, "loss": 17.9554, "step": 50917 }, { "epoch": 0.9307401246641198, "grad_norm": 7.221942852958489, "learning_rate": 1.2531045432431333e-07, "loss": 17.1834, "step": 50918 }, { "epoch": 0.9307584038605663, "grad_norm": 7.373371105622097, "learning_rate": 1.2524460706964227e-07, "loss": 17.454, "step": 50919 }, { "epoch": 0.9307766830570128, "grad_norm": 5.559512866821071, "learning_rate": 1.2517877690056524e-07, "loss": 17.0927, "step": 50920 }, { "epoch": 0.9307949622534594, "grad_norm": 5.317097552669641, "learning_rate": 1.251129638173132e-07, "loss": 16.9504, "step": 50921 }, { "epoch": 0.9308132414499058, "grad_norm": 5.05003244627441, "learning_rate": 1.2504716782011818e-07, "loss": 17.008, "step": 50922 }, { "epoch": 0.9308315206463524, "grad_norm": 4.485784460715552, "learning_rate": 1.2498138890920996e-07, "loss": 16.7462, "step": 50923 }, { "epoch": 0.9308497998427989, "grad_norm": 5.738499161685658, "learning_rate": 1.2491562708481786e-07, "loss": 17.1071, "step": 50924 }, { "epoch": 0.9308680790392454, "grad_norm": 6.301879443722505, "learning_rate": 1.2484988234717443e-07, "loss": 17.3491, "step": 50925 }, { "epoch": 0.930886358235692, "grad_norm": 6.306952816349119, "learning_rate": 1.2478415469650896e-07, "loss": 17.0995, "step": 50926 }, { "epoch": 0.9309046374321385, "grad_norm": 6.513978949788229, "learning_rate": 1.2471844413305124e-07, "loss": 17.6132, "step": 50927 }, { "epoch": 0.9309229166285851, "grad_norm": 7.341963161950486, "learning_rate": 1.246527506570333e-07, "loss": 17.5707, "step": 50928 }, { "epoch": 0.9309411958250315, "grad_norm": 6.777203606191966, "learning_rate": 1.2458707426868333e-07, "loss": 17.6246, "step": 50929 }, { "epoch": 0.930959475021478, "grad_norm": 6.0374001945625695, "learning_rate": 1.2452141496823277e-07, "loss": 17.0571, "step": 50930 }, { "epoch": 0.9309777542179246, "grad_norm": 6.266643200653716, "learning_rate": 1.244557727559126e-07, "loss": 17.358, "step": 50931 }, { "epoch": 0.9309960334143711, "grad_norm": 6.689774997736368, "learning_rate": 1.243901476319509e-07, "loss": 17.5854, "step": 50932 }, { "epoch": 0.9310143126108176, "grad_norm": 4.978689970854307, "learning_rate": 1.243245395965792e-07, "loss": 16.9464, "step": 50933 }, { "epoch": 0.9310325918072642, "grad_norm": 4.983975758067589, "learning_rate": 1.2425894865002674e-07, "loss": 16.9848, "step": 50934 }, { "epoch": 0.9310508710037106, "grad_norm": 5.822910145429974, "learning_rate": 1.2419337479252335e-07, "loss": 17.1455, "step": 50935 }, { "epoch": 0.9310691502001572, "grad_norm": 7.580559976077696, "learning_rate": 1.241278180242994e-07, "loss": 17.8754, "step": 50936 }, { "epoch": 0.9310874293966037, "grad_norm": 6.397742783616459, "learning_rate": 1.2406227834558414e-07, "loss": 17.452, "step": 50937 }, { "epoch": 0.9311057085930502, "grad_norm": 8.985094238224427, "learning_rate": 1.2399675575660742e-07, "loss": 18.4427, "step": 50938 }, { "epoch": 0.9311239877894968, "grad_norm": 7.183915253316859, "learning_rate": 1.2393125025759845e-07, "loss": 17.7501, "step": 50939 }, { "epoch": 0.9311422669859433, "grad_norm": 7.160499009824286, "learning_rate": 1.2386576184878817e-07, "loss": 17.8102, "step": 50940 }, { "epoch": 0.9311605461823899, "grad_norm": 5.169179437686488, "learning_rate": 1.2380029053040476e-07, "loss": 16.8711, "step": 50941 }, { "epoch": 0.9311788253788363, "grad_norm": 8.96106171416973, "learning_rate": 1.2373483630267857e-07, "loss": 17.7476, "step": 50942 }, { "epoch": 0.9311971045752828, "grad_norm": 4.79835747255063, "learning_rate": 1.2366939916583887e-07, "loss": 16.8489, "step": 50943 }, { "epoch": 0.9312153837717294, "grad_norm": 6.167703526242815, "learning_rate": 1.2360397912011436e-07, "loss": 17.2942, "step": 50944 }, { "epoch": 0.9312336629681759, "grad_norm": 6.775424585990415, "learning_rate": 1.2353857616573484e-07, "loss": 17.4098, "step": 50945 }, { "epoch": 0.9312519421646225, "grad_norm": 6.418379882110006, "learning_rate": 1.234731903029296e-07, "loss": 17.3903, "step": 50946 }, { "epoch": 0.931270221361069, "grad_norm": 6.781405981009193, "learning_rate": 1.2340782153192732e-07, "loss": 17.629, "step": 50947 }, { "epoch": 0.9312885005575154, "grad_norm": 5.3273803182712065, "learning_rate": 1.2334246985295728e-07, "loss": 17.0486, "step": 50948 }, { "epoch": 0.931306779753962, "grad_norm": 5.392080066985617, "learning_rate": 1.2327713526624873e-07, "loss": 16.9829, "step": 50949 }, { "epoch": 0.9313250589504085, "grad_norm": 6.892392276111788, "learning_rate": 1.2321181777203096e-07, "loss": 17.6319, "step": 50950 }, { "epoch": 0.9313433381468551, "grad_norm": 5.554947052124415, "learning_rate": 1.231465173705332e-07, "loss": 17.0729, "step": 50951 }, { "epoch": 0.9313616173433016, "grad_norm": 5.796237961569896, "learning_rate": 1.2308123406198247e-07, "loss": 17.3347, "step": 50952 }, { "epoch": 0.9313798965397481, "grad_norm": 6.5913922716732865, "learning_rate": 1.2301596784660973e-07, "loss": 17.8937, "step": 50953 }, { "epoch": 0.9313981757361947, "grad_norm": 5.273301202255047, "learning_rate": 1.229507187246426e-07, "loss": 17.0947, "step": 50954 }, { "epoch": 0.9314164549326411, "grad_norm": 5.988970847277369, "learning_rate": 1.2288548669630973e-07, "loss": 17.4241, "step": 50955 }, { "epoch": 0.9314347341290877, "grad_norm": 5.660452943309356, "learning_rate": 1.2282027176184043e-07, "loss": 17.1642, "step": 50956 }, { "epoch": 0.9314530133255342, "grad_norm": 6.60319312131645, "learning_rate": 1.2275507392146225e-07, "loss": 17.5706, "step": 50957 }, { "epoch": 0.9314712925219807, "grad_norm": 6.147546342927857, "learning_rate": 1.2268989317540446e-07, "loss": 17.5, "step": 50958 }, { "epoch": 0.9314895717184273, "grad_norm": 7.591957353946078, "learning_rate": 1.226247295238958e-07, "loss": 18.0474, "step": 50959 }, { "epoch": 0.9315078509148738, "grad_norm": 5.381266956152308, "learning_rate": 1.2255958296716387e-07, "loss": 17.0236, "step": 50960 }, { "epoch": 0.9315261301113203, "grad_norm": 7.636504469107175, "learning_rate": 1.224944535054373e-07, "loss": 17.3028, "step": 50961 }, { "epoch": 0.9315444093077668, "grad_norm": 6.831876927914928, "learning_rate": 1.2242934113894489e-07, "loss": 17.0061, "step": 50962 }, { "epoch": 0.9315626885042133, "grad_norm": 6.21725484267518, "learning_rate": 1.2236424586791362e-07, "loss": 17.663, "step": 50963 }, { "epoch": 0.9315809677006599, "grad_norm": 7.603648499348474, "learning_rate": 1.2229916769257334e-07, "loss": 18.1668, "step": 50964 }, { "epoch": 0.9315992468971064, "grad_norm": 5.956587693242585, "learning_rate": 1.2223410661315105e-07, "loss": 17.3956, "step": 50965 }, { "epoch": 0.931617526093553, "grad_norm": 5.424815124890379, "learning_rate": 1.2216906262987437e-07, "loss": 17.2411, "step": 50966 }, { "epoch": 0.9316358052899995, "grad_norm": 4.955299140986627, "learning_rate": 1.2210403574297147e-07, "loss": 16.9841, "step": 50967 }, { "epoch": 0.9316540844864459, "grad_norm": 6.623794366074777, "learning_rate": 1.220390259526716e-07, "loss": 17.3229, "step": 50968 }, { "epoch": 0.9316723636828925, "grad_norm": 5.2506564111004135, "learning_rate": 1.219740332592012e-07, "loss": 16.9187, "step": 50969 }, { "epoch": 0.931690642879339, "grad_norm": 5.639766384423573, "learning_rate": 1.2190905766278905e-07, "loss": 17.238, "step": 50970 }, { "epoch": 0.9317089220757856, "grad_norm": 6.24702580713521, "learning_rate": 1.2184409916366212e-07, "loss": 17.3065, "step": 50971 }, { "epoch": 0.9317272012722321, "grad_norm": 5.635671088768756, "learning_rate": 1.2177915776204752e-07, "loss": 17.1945, "step": 50972 }, { "epoch": 0.9317454804686786, "grad_norm": 6.121244829511274, "learning_rate": 1.2171423345817447e-07, "loss": 17.1364, "step": 50973 }, { "epoch": 0.9317637596651251, "grad_norm": 6.966841162806058, "learning_rate": 1.2164932625226944e-07, "loss": 17.8612, "step": 50974 }, { "epoch": 0.9317820388615716, "grad_norm": 7.87497160939833, "learning_rate": 1.215844361445606e-07, "loss": 18.2012, "step": 50975 }, { "epoch": 0.9318003180580182, "grad_norm": 6.191352933989097, "learning_rate": 1.2151956313527447e-07, "loss": 17.3565, "step": 50976 }, { "epoch": 0.9318185972544647, "grad_norm": 6.762540781370173, "learning_rate": 1.2145470722463858e-07, "loss": 17.2826, "step": 50977 }, { "epoch": 0.9318368764509112, "grad_norm": 6.8682239381822345, "learning_rate": 1.2138986841288169e-07, "loss": 17.4497, "step": 50978 }, { "epoch": 0.9318551556473578, "grad_norm": 7.257384718574531, "learning_rate": 1.2132504670022971e-07, "loss": 17.5095, "step": 50979 }, { "epoch": 0.9318734348438042, "grad_norm": 6.841838330277599, "learning_rate": 1.2126024208690912e-07, "loss": 17.5661, "step": 50980 }, { "epoch": 0.9318917140402508, "grad_norm": 6.122423848054143, "learning_rate": 1.2119545457314918e-07, "loss": 17.4937, "step": 50981 }, { "epoch": 0.9319099932366973, "grad_norm": 6.334282192757034, "learning_rate": 1.2113068415917527e-07, "loss": 17.3246, "step": 50982 }, { "epoch": 0.9319282724331438, "grad_norm": 6.171133549267691, "learning_rate": 1.2106593084521502e-07, "loss": 17.0991, "step": 50983 }, { "epoch": 0.9319465516295904, "grad_norm": 6.794450047216124, "learning_rate": 1.2100119463149485e-07, "loss": 17.8267, "step": 50984 }, { "epoch": 0.9319648308260369, "grad_norm": 6.662345863057229, "learning_rate": 1.2093647551824294e-07, "loss": 17.4308, "step": 50985 }, { "epoch": 0.9319831100224835, "grad_norm": 5.8568674142341175, "learning_rate": 1.2087177350568467e-07, "loss": 17.333, "step": 50986 }, { "epoch": 0.9320013892189299, "grad_norm": 6.37735771286365, "learning_rate": 1.2080708859404766e-07, "loss": 17.3232, "step": 50987 }, { "epoch": 0.9320196684153764, "grad_norm": 4.896934297679844, "learning_rate": 1.2074242078355892e-07, "loss": 16.9861, "step": 50988 }, { "epoch": 0.932037947611823, "grad_norm": 5.55066332845017, "learning_rate": 1.2067777007444382e-07, "loss": 17.0454, "step": 50989 }, { "epoch": 0.9320562268082695, "grad_norm": 5.2082363324577585, "learning_rate": 1.2061313646692997e-07, "loss": 16.8287, "step": 50990 }, { "epoch": 0.9320745060047161, "grad_norm": 6.346671712627887, "learning_rate": 1.205485199612444e-07, "loss": 17.4761, "step": 50991 }, { "epoch": 0.9320927852011626, "grad_norm": 5.827437924335845, "learning_rate": 1.2048392055761138e-07, "loss": 17.228, "step": 50992 }, { "epoch": 0.932111064397609, "grad_norm": 4.238161769193964, "learning_rate": 1.2041933825625962e-07, "loss": 16.6275, "step": 50993 }, { "epoch": 0.9321293435940556, "grad_norm": 5.782832668556098, "learning_rate": 1.203547730574145e-07, "loss": 17.0394, "step": 50994 }, { "epoch": 0.9321476227905021, "grad_norm": 5.81188487664946, "learning_rate": 1.202902249613025e-07, "loss": 17.1502, "step": 50995 }, { "epoch": 0.9321659019869487, "grad_norm": 5.522938629100619, "learning_rate": 1.202256939681501e-07, "loss": 17.087, "step": 50996 }, { "epoch": 0.9321841811833952, "grad_norm": 6.300649540874963, "learning_rate": 1.2016118007818268e-07, "loss": 17.1486, "step": 50997 }, { "epoch": 0.9322024603798417, "grad_norm": 6.313071794362472, "learning_rate": 1.2009668329162726e-07, "loss": 17.6572, "step": 50998 }, { "epoch": 0.9322207395762883, "grad_norm": 6.84816430507756, "learning_rate": 1.2003220360870982e-07, "loss": 17.4456, "step": 50999 }, { "epoch": 0.9322390187727347, "grad_norm": 6.224492920292585, "learning_rate": 1.1996774102965515e-07, "loss": 17.3832, "step": 51000 }, { "epoch": 0.9322572979691812, "grad_norm": 6.456973602354893, "learning_rate": 1.1990329555469082e-07, "loss": 17.1358, "step": 51001 }, { "epoch": 0.9322755771656278, "grad_norm": 7.527469481284724, "learning_rate": 1.198388671840417e-07, "loss": 17.6206, "step": 51002 }, { "epoch": 0.9322938563620743, "grad_norm": 7.852993169759718, "learning_rate": 1.197744559179337e-07, "loss": 17.6301, "step": 51003 }, { "epoch": 0.9323121355585209, "grad_norm": 6.20096525131727, "learning_rate": 1.1971006175659327e-07, "loss": 17.0789, "step": 51004 }, { "epoch": 0.9323304147549674, "grad_norm": 6.3705281541204695, "learning_rate": 1.196456847002453e-07, "loss": 17.2409, "step": 51005 }, { "epoch": 0.9323486939514138, "grad_norm": 7.34444374679273, "learning_rate": 1.195813247491151e-07, "loss": 17.5934, "step": 51006 }, { "epoch": 0.9323669731478604, "grad_norm": 5.622673882494691, "learning_rate": 1.1951698190342976e-07, "loss": 17.1514, "step": 51007 }, { "epoch": 0.9323852523443069, "grad_norm": 5.905412789025597, "learning_rate": 1.1945265616341351e-07, "loss": 17.1072, "step": 51008 }, { "epoch": 0.9324035315407535, "grad_norm": 5.641435563284064, "learning_rate": 1.193883475292923e-07, "loss": 17.0478, "step": 51009 }, { "epoch": 0.9324218107372, "grad_norm": 6.132883316612275, "learning_rate": 1.193240560012915e-07, "loss": 17.4431, "step": 51010 }, { "epoch": 0.9324400899336465, "grad_norm": 6.252281406748904, "learning_rate": 1.1925978157963592e-07, "loss": 17.1135, "step": 51011 }, { "epoch": 0.9324583691300931, "grad_norm": 5.142132645321047, "learning_rate": 1.1919552426455094e-07, "loss": 16.9942, "step": 51012 }, { "epoch": 0.9324766483265395, "grad_norm": 7.1098748951276045, "learning_rate": 1.1913128405626306e-07, "loss": 17.7474, "step": 51013 }, { "epoch": 0.9324949275229861, "grad_norm": 7.33355607423619, "learning_rate": 1.1906706095499599e-07, "loss": 17.3244, "step": 51014 }, { "epoch": 0.9325132067194326, "grad_norm": 6.106106921240139, "learning_rate": 1.1900285496097563e-07, "loss": 17.4235, "step": 51015 }, { "epoch": 0.9325314859158791, "grad_norm": 7.102143457159202, "learning_rate": 1.1893866607442684e-07, "loss": 17.539, "step": 51016 }, { "epoch": 0.9325497651123257, "grad_norm": 7.542137289117532, "learning_rate": 1.1887449429557386e-07, "loss": 17.7661, "step": 51017 }, { "epoch": 0.9325680443087722, "grad_norm": 6.52559891694185, "learning_rate": 1.1881033962464261e-07, "loss": 17.4837, "step": 51018 }, { "epoch": 0.9325863235052188, "grad_norm": 7.8025274142912915, "learning_rate": 1.1874620206185794e-07, "loss": 17.5658, "step": 51019 }, { "epoch": 0.9326046027016652, "grad_norm": 6.363544834149443, "learning_rate": 1.1868208160744354e-07, "loss": 17.1378, "step": 51020 }, { "epoch": 0.9326228818981117, "grad_norm": 6.776527602521718, "learning_rate": 1.186179782616248e-07, "loss": 17.7196, "step": 51021 }, { "epoch": 0.9326411610945583, "grad_norm": 5.476786413402526, "learning_rate": 1.1855389202462708e-07, "loss": 17.2775, "step": 51022 }, { "epoch": 0.9326594402910048, "grad_norm": 6.591323455750738, "learning_rate": 1.1848982289667355e-07, "loss": 17.4166, "step": 51023 }, { "epoch": 0.9326777194874514, "grad_norm": 6.005522176323234, "learning_rate": 1.1842577087799013e-07, "loss": 17.3965, "step": 51024 }, { "epoch": 0.9326959986838979, "grad_norm": 6.359814491494441, "learning_rate": 1.1836173596880051e-07, "loss": 17.2372, "step": 51025 }, { "epoch": 0.9327142778803443, "grad_norm": 5.668818784272163, "learning_rate": 1.182977181693301e-07, "loss": 17.2223, "step": 51026 }, { "epoch": 0.9327325570767909, "grad_norm": 8.075015976818683, "learning_rate": 1.1823371747980206e-07, "loss": 18.3597, "step": 51027 }, { "epoch": 0.9327508362732374, "grad_norm": 6.756720065851516, "learning_rate": 1.1816973390044117e-07, "loss": 17.5532, "step": 51028 }, { "epoch": 0.932769115469684, "grad_norm": 5.950701705059281, "learning_rate": 1.1810576743147228e-07, "loss": 17.3416, "step": 51029 }, { "epoch": 0.9327873946661305, "grad_norm": 9.349960340691272, "learning_rate": 1.1804181807311798e-07, "loss": 17.4522, "step": 51030 }, { "epoch": 0.932805673862577, "grad_norm": 6.8915151301933655, "learning_rate": 1.1797788582560365e-07, "loss": 17.4891, "step": 51031 }, { "epoch": 0.9328239530590235, "grad_norm": 6.22626388341107, "learning_rate": 1.1791397068915411e-07, "loss": 17.4743, "step": 51032 }, { "epoch": 0.93284223225547, "grad_norm": 6.136334720626664, "learning_rate": 1.1785007266399196e-07, "loss": 17.0932, "step": 51033 }, { "epoch": 0.9328605114519166, "grad_norm": 4.8760489357052785, "learning_rate": 1.1778619175034145e-07, "loss": 16.6007, "step": 51034 }, { "epoch": 0.9328787906483631, "grad_norm": 5.468091463207053, "learning_rate": 1.1772232794842742e-07, "loss": 16.926, "step": 51035 }, { "epoch": 0.9328970698448096, "grad_norm": 6.517915544661685, "learning_rate": 1.1765848125847245e-07, "loss": 17.5253, "step": 51036 }, { "epoch": 0.9329153490412562, "grad_norm": 7.08696729371529, "learning_rate": 1.1759465168070083e-07, "loss": 17.4716, "step": 51037 }, { "epoch": 0.9329336282377027, "grad_norm": 5.304167763990766, "learning_rate": 1.175308392153357e-07, "loss": 17.0189, "step": 51038 }, { "epoch": 0.9329519074341492, "grad_norm": 5.935946672880368, "learning_rate": 1.1746704386260243e-07, "loss": 17.229, "step": 51039 }, { "epoch": 0.9329701866305957, "grad_norm": 5.862438536820065, "learning_rate": 1.1740326562272253e-07, "loss": 17.1024, "step": 51040 }, { "epoch": 0.9329884658270422, "grad_norm": 4.611492753806701, "learning_rate": 1.173395044959208e-07, "loss": 16.9204, "step": 51041 }, { "epoch": 0.9330067450234888, "grad_norm": 6.175221402148952, "learning_rate": 1.172757604824204e-07, "loss": 17.471, "step": 51042 }, { "epoch": 0.9330250242199353, "grad_norm": 5.5599310199793965, "learning_rate": 1.1721203358244505e-07, "loss": 17.4951, "step": 51043 }, { "epoch": 0.9330433034163819, "grad_norm": 6.550011979205639, "learning_rate": 1.1714832379621788e-07, "loss": 17.3471, "step": 51044 }, { "epoch": 0.9330615826128283, "grad_norm": 10.813482987716053, "learning_rate": 1.1708463112396152e-07, "loss": 18.4717, "step": 51045 }, { "epoch": 0.9330798618092748, "grad_norm": 4.702962198620598, "learning_rate": 1.170209555659002e-07, "loss": 16.8798, "step": 51046 }, { "epoch": 0.9330981410057214, "grad_norm": 6.054806551336983, "learning_rate": 1.1695729712225657e-07, "loss": 17.4708, "step": 51047 }, { "epoch": 0.9331164202021679, "grad_norm": 6.2242402221576745, "learning_rate": 1.1689365579325373e-07, "loss": 17.2039, "step": 51048 }, { "epoch": 0.9331346993986145, "grad_norm": 6.025493603920106, "learning_rate": 1.1683003157911543e-07, "loss": 17.109, "step": 51049 }, { "epoch": 0.933152978595061, "grad_norm": 5.919107860968797, "learning_rate": 1.167664244800637e-07, "loss": 17.2453, "step": 51050 }, { "epoch": 0.9331712577915074, "grad_norm": 7.150059256933771, "learning_rate": 1.167028344963217e-07, "loss": 17.4217, "step": 51051 }, { "epoch": 0.933189536987954, "grad_norm": 5.723451232408765, "learning_rate": 1.1663926162811312e-07, "loss": 17.0017, "step": 51052 }, { "epoch": 0.9332078161844005, "grad_norm": 7.333646666006699, "learning_rate": 1.1657570587566003e-07, "loss": 18.0564, "step": 51053 }, { "epoch": 0.9332260953808471, "grad_norm": 6.0203296660533105, "learning_rate": 1.1651216723918502e-07, "loss": 17.7862, "step": 51054 }, { "epoch": 0.9332443745772936, "grad_norm": 5.963610128098901, "learning_rate": 1.1644864571891178e-07, "loss": 17.3289, "step": 51055 }, { "epoch": 0.9332626537737401, "grad_norm": 6.234260593260835, "learning_rate": 1.1638514131506129e-07, "loss": 17.2023, "step": 51056 }, { "epoch": 0.9332809329701867, "grad_norm": 5.782619917187875, "learning_rate": 1.1632165402785722e-07, "loss": 17.2951, "step": 51057 }, { "epoch": 0.9332992121666331, "grad_norm": 5.264062817944673, "learning_rate": 1.1625818385752275e-07, "loss": 16.8015, "step": 51058 }, { "epoch": 0.9333174913630797, "grad_norm": 5.59287248651112, "learning_rate": 1.1619473080427879e-07, "loss": 17.0575, "step": 51059 }, { "epoch": 0.9333357705595262, "grad_norm": 5.848862502235872, "learning_rate": 1.1613129486834906e-07, "loss": 17.2405, "step": 51060 }, { "epoch": 0.9333540497559727, "grad_norm": 6.27740729176885, "learning_rate": 1.1606787604995507e-07, "loss": 17.0072, "step": 51061 }, { "epoch": 0.9333723289524193, "grad_norm": 6.815656651275725, "learning_rate": 1.1600447434931939e-07, "loss": 17.3046, "step": 51062 }, { "epoch": 0.9333906081488658, "grad_norm": 6.718912680449004, "learning_rate": 1.1594108976666408e-07, "loss": 17.4437, "step": 51063 }, { "epoch": 0.9334088873453124, "grad_norm": 6.967371695867401, "learning_rate": 1.1587772230221173e-07, "loss": 17.5623, "step": 51064 }, { "epoch": 0.9334271665417588, "grad_norm": 6.419793781460191, "learning_rate": 1.1581437195618384e-07, "loss": 17.7889, "step": 51065 }, { "epoch": 0.9334454457382053, "grad_norm": 7.03832583610656, "learning_rate": 1.1575103872880244e-07, "loss": 17.2907, "step": 51066 }, { "epoch": 0.9334637249346519, "grad_norm": 4.991091506739744, "learning_rate": 1.156877226202907e-07, "loss": 16.6835, "step": 51067 }, { "epoch": 0.9334820041310984, "grad_norm": 6.3301505008064, "learning_rate": 1.15624423630869e-07, "loss": 17.5394, "step": 51068 }, { "epoch": 0.9335002833275449, "grad_norm": 6.495569685752713, "learning_rate": 1.1556114176075995e-07, "loss": 17.6142, "step": 51069 }, { "epoch": 0.9335185625239915, "grad_norm": 6.73962883860932, "learning_rate": 1.15497877010185e-07, "loss": 17.6266, "step": 51070 }, { "epoch": 0.9335368417204379, "grad_norm": 7.502061002291531, "learning_rate": 1.1543462937936678e-07, "loss": 17.8254, "step": 51071 }, { "epoch": 0.9335551209168845, "grad_norm": 5.880025181961713, "learning_rate": 1.1537139886852622e-07, "loss": 17.1958, "step": 51072 }, { "epoch": 0.933573400113331, "grad_norm": 7.399391692078234, "learning_rate": 1.153081854778848e-07, "loss": 17.7775, "step": 51073 }, { "epoch": 0.9335916793097775, "grad_norm": 5.76145576196045, "learning_rate": 1.1524498920766458e-07, "loss": 17.2739, "step": 51074 }, { "epoch": 0.9336099585062241, "grad_norm": 5.864143426875914, "learning_rate": 1.1518181005808592e-07, "loss": 16.8523, "step": 51075 }, { "epoch": 0.9336282377026706, "grad_norm": 5.455290009508458, "learning_rate": 1.15118648029372e-07, "loss": 17.1088, "step": 51076 }, { "epoch": 0.9336465168991172, "grad_norm": 6.540748198937246, "learning_rate": 1.1505550312174319e-07, "loss": 17.2206, "step": 51077 }, { "epoch": 0.9336647960955636, "grad_norm": 6.105048376604021, "learning_rate": 1.1499237533542096e-07, "loss": 17.5653, "step": 51078 }, { "epoch": 0.9336830752920101, "grad_norm": 6.80510292340413, "learning_rate": 1.1492926467062627e-07, "loss": 17.4706, "step": 51079 }, { "epoch": 0.9337013544884567, "grad_norm": 6.6067598147740565, "learning_rate": 1.1486617112758114e-07, "loss": 17.2603, "step": 51080 }, { "epoch": 0.9337196336849032, "grad_norm": 5.68646389237377, "learning_rate": 1.1480309470650597e-07, "loss": 17.1504, "step": 51081 }, { "epoch": 0.9337379128813498, "grad_norm": 5.108519159461224, "learning_rate": 1.1474003540762169e-07, "loss": 16.8479, "step": 51082 }, { "epoch": 0.9337561920777963, "grad_norm": 7.434752759073485, "learning_rate": 1.1467699323114978e-07, "loss": 17.4745, "step": 51083 }, { "epoch": 0.9337744712742427, "grad_norm": 6.24785320344279, "learning_rate": 1.1461396817731062e-07, "loss": 17.184, "step": 51084 }, { "epoch": 0.9337927504706893, "grad_norm": 5.107957248219336, "learning_rate": 1.1455096024632628e-07, "loss": 16.8266, "step": 51085 }, { "epoch": 0.9338110296671358, "grad_norm": 4.971251713817211, "learning_rate": 1.1448796943841656e-07, "loss": 16.983, "step": 51086 }, { "epoch": 0.9338293088635824, "grad_norm": 6.241841064386408, "learning_rate": 1.1442499575380239e-07, "loss": 17.3401, "step": 51087 }, { "epoch": 0.9338475880600289, "grad_norm": 5.3049690632239885, "learning_rate": 1.1436203919270527e-07, "loss": 17.0081, "step": 51088 }, { "epoch": 0.9338658672564754, "grad_norm": 5.994156152732711, "learning_rate": 1.1429909975534503e-07, "loss": 17.3669, "step": 51089 }, { "epoch": 0.933884146452922, "grad_norm": 6.503660295389964, "learning_rate": 1.142361774419426e-07, "loss": 17.6266, "step": 51090 }, { "epoch": 0.9339024256493684, "grad_norm": 5.964546851848015, "learning_rate": 1.1417327225271835e-07, "loss": 17.5479, "step": 51091 }, { "epoch": 0.933920704845815, "grad_norm": 8.807427388543925, "learning_rate": 1.1411038418789267e-07, "loss": 17.4973, "step": 51092 }, { "epoch": 0.9339389840422615, "grad_norm": 9.367308892675345, "learning_rate": 1.1404751324768593e-07, "loss": 18.0398, "step": 51093 }, { "epoch": 0.933957263238708, "grad_norm": 6.216229260740116, "learning_rate": 1.1398465943231851e-07, "loss": 17.0687, "step": 51094 }, { "epoch": 0.9339755424351546, "grad_norm": 7.247681477364998, "learning_rate": 1.1392182274201135e-07, "loss": 17.4928, "step": 51095 }, { "epoch": 0.933993821631601, "grad_norm": 6.244977233203254, "learning_rate": 1.1385900317698373e-07, "loss": 17.3714, "step": 51096 }, { "epoch": 0.9340121008280476, "grad_norm": 7.796210477293291, "learning_rate": 1.1379620073745712e-07, "loss": 17.6329, "step": 51097 }, { "epoch": 0.9340303800244941, "grad_norm": 5.019237963197247, "learning_rate": 1.1373341542365024e-07, "loss": 16.8605, "step": 51098 }, { "epoch": 0.9340486592209406, "grad_norm": 5.474909086030221, "learning_rate": 1.1367064723578402e-07, "loss": 17.2823, "step": 51099 }, { "epoch": 0.9340669384173872, "grad_norm": 5.302527899353827, "learning_rate": 1.1360789617407775e-07, "loss": 17.1102, "step": 51100 }, { "epoch": 0.9340852176138337, "grad_norm": 6.320757374502104, "learning_rate": 1.1354516223875178e-07, "loss": 17.3653, "step": 51101 }, { "epoch": 0.9341034968102803, "grad_norm": 5.896717003312659, "learning_rate": 1.1348244543002651e-07, "loss": 17.3629, "step": 51102 }, { "epoch": 0.9341217760067267, "grad_norm": 5.310279209063342, "learning_rate": 1.1341974574812064e-07, "loss": 17.2082, "step": 51103 }, { "epoch": 0.9341400552031732, "grad_norm": 5.574757429410839, "learning_rate": 1.1335706319325457e-07, "loss": 17.1932, "step": 51104 }, { "epoch": 0.9341583343996198, "grad_norm": 5.7798491209859355, "learning_rate": 1.1329439776564865e-07, "loss": 17.076, "step": 51105 }, { "epoch": 0.9341766135960663, "grad_norm": 5.801593862765897, "learning_rate": 1.1323174946552163e-07, "loss": 16.8626, "step": 51106 }, { "epoch": 0.9341948927925129, "grad_norm": 5.4854850790571, "learning_rate": 1.1316911829309274e-07, "loss": 17.0082, "step": 51107 }, { "epoch": 0.9342131719889594, "grad_norm": 6.763796983418539, "learning_rate": 1.1310650424858238e-07, "loss": 17.2767, "step": 51108 }, { "epoch": 0.9342314511854058, "grad_norm": 6.547084627513366, "learning_rate": 1.1304390733220983e-07, "loss": 17.3762, "step": 51109 }, { "epoch": 0.9342497303818524, "grad_norm": 5.840139268034262, "learning_rate": 1.1298132754419378e-07, "loss": 16.9292, "step": 51110 }, { "epoch": 0.9342680095782989, "grad_norm": 5.87910280174152, "learning_rate": 1.1291876488475406e-07, "loss": 16.9936, "step": 51111 }, { "epoch": 0.9342862887747455, "grad_norm": 6.181474587872409, "learning_rate": 1.128562193541105e-07, "loss": 17.4308, "step": 51112 }, { "epoch": 0.934304567971192, "grad_norm": 6.428012057333843, "learning_rate": 1.1279369095248127e-07, "loss": 17.1825, "step": 51113 }, { "epoch": 0.9343228471676385, "grad_norm": 7.856671408457083, "learning_rate": 1.1273117968008673e-07, "loss": 17.8305, "step": 51114 }, { "epoch": 0.9343411263640851, "grad_norm": 8.672842367482906, "learning_rate": 1.1266868553714505e-07, "loss": 17.6373, "step": 51115 }, { "epoch": 0.9343594055605315, "grad_norm": 7.021216788570823, "learning_rate": 1.1260620852387549e-07, "loss": 17.3055, "step": 51116 }, { "epoch": 0.9343776847569781, "grad_norm": 6.859194040201975, "learning_rate": 1.1254374864049678e-07, "loss": 17.4472, "step": 51117 }, { "epoch": 0.9343959639534246, "grad_norm": 4.652182964912487, "learning_rate": 1.1248130588722816e-07, "loss": 16.8478, "step": 51118 }, { "epoch": 0.9344142431498711, "grad_norm": 5.924850362301184, "learning_rate": 1.1241888026428893e-07, "loss": 17.276, "step": 51119 }, { "epoch": 0.9344325223463177, "grad_norm": 6.91552059265042, "learning_rate": 1.1235647177189724e-07, "loss": 17.3354, "step": 51120 }, { "epoch": 0.9344508015427642, "grad_norm": 6.265856607346062, "learning_rate": 1.1229408041027123e-07, "loss": 17.1939, "step": 51121 }, { "epoch": 0.9344690807392108, "grad_norm": 7.2013909266461935, "learning_rate": 1.1223170617963131e-07, "loss": 17.5485, "step": 51122 }, { "epoch": 0.9344873599356572, "grad_norm": 5.552229386351615, "learning_rate": 1.1216934908019505e-07, "loss": 17.1203, "step": 51123 }, { "epoch": 0.9345056391321037, "grad_norm": 6.786268485698251, "learning_rate": 1.1210700911218064e-07, "loss": 17.8475, "step": 51124 }, { "epoch": 0.9345239183285503, "grad_norm": 4.964596027488989, "learning_rate": 1.1204468627580733e-07, "loss": 16.9571, "step": 51125 }, { "epoch": 0.9345421975249968, "grad_norm": 5.445322155545694, "learning_rate": 1.1198238057129329e-07, "loss": 17.2436, "step": 51126 }, { "epoch": 0.9345604767214434, "grad_norm": 6.826359068260597, "learning_rate": 1.1192009199885612e-07, "loss": 17.379, "step": 51127 }, { "epoch": 0.9345787559178899, "grad_norm": 5.145073236603035, "learning_rate": 1.1185782055871564e-07, "loss": 17.0437, "step": 51128 }, { "epoch": 0.9345970351143363, "grad_norm": 4.945003595183276, "learning_rate": 1.1179556625108889e-07, "loss": 16.9695, "step": 51129 }, { "epoch": 0.9346153143107829, "grad_norm": 6.300415131435273, "learning_rate": 1.1173332907619461e-07, "loss": 16.913, "step": 51130 }, { "epoch": 0.9346335935072294, "grad_norm": 6.4592288296835845, "learning_rate": 1.1167110903425093e-07, "loss": 17.607, "step": 51131 }, { "epoch": 0.934651872703676, "grad_norm": 5.246221299872523, "learning_rate": 1.1160890612547548e-07, "loss": 17.1585, "step": 51132 }, { "epoch": 0.9346701519001225, "grad_norm": 6.129037806627978, "learning_rate": 1.1154672035008695e-07, "loss": 17.1604, "step": 51133 }, { "epoch": 0.934688431096569, "grad_norm": 5.905757342474681, "learning_rate": 1.1148455170830353e-07, "loss": 17.1952, "step": 51134 }, { "epoch": 0.9347067102930156, "grad_norm": 6.951270140472609, "learning_rate": 1.1142240020034167e-07, "loss": 17.6751, "step": 51135 }, { "epoch": 0.934724989489462, "grad_norm": 4.897539288887306, "learning_rate": 1.1136026582642067e-07, "loss": 16.9407, "step": 51136 }, { "epoch": 0.9347432686859085, "grad_norm": 5.984551092563184, "learning_rate": 1.1129814858675758e-07, "loss": 17.0135, "step": 51137 }, { "epoch": 0.9347615478823551, "grad_norm": 6.087095792736438, "learning_rate": 1.1123604848156943e-07, "loss": 17.3609, "step": 51138 }, { "epoch": 0.9347798270788016, "grad_norm": 5.80176617487115, "learning_rate": 1.111739655110755e-07, "loss": 17.2877, "step": 51139 }, { "epoch": 0.9347981062752482, "grad_norm": 6.899214143541823, "learning_rate": 1.111118996754923e-07, "loss": 17.6675, "step": 51140 }, { "epoch": 0.9348163854716947, "grad_norm": 5.01752459169847, "learning_rate": 1.1104985097503796e-07, "loss": 17.0376, "step": 51141 }, { "epoch": 0.9348346646681411, "grad_norm": 6.8800752832814505, "learning_rate": 1.1098781940992953e-07, "loss": 17.1023, "step": 51142 }, { "epoch": 0.9348529438645877, "grad_norm": 6.174488794841399, "learning_rate": 1.1092580498038463e-07, "loss": 17.124, "step": 51143 }, { "epoch": 0.9348712230610342, "grad_norm": 6.666181086757393, "learning_rate": 1.108638076866203e-07, "loss": 17.5247, "step": 51144 }, { "epoch": 0.9348895022574808, "grad_norm": 6.847519215719577, "learning_rate": 1.1080182752885416e-07, "loss": 17.5308, "step": 51145 }, { "epoch": 0.9349077814539273, "grad_norm": 5.474817372965075, "learning_rate": 1.1073986450730323e-07, "loss": 16.9241, "step": 51146 }, { "epoch": 0.9349260606503738, "grad_norm": 5.618390338365731, "learning_rate": 1.1067791862218513e-07, "loss": 17.1485, "step": 51147 }, { "epoch": 0.9349443398468203, "grad_norm": 6.267646304743442, "learning_rate": 1.1061598987371635e-07, "loss": 17.6722, "step": 51148 }, { "epoch": 0.9349626190432668, "grad_norm": 6.793068819650071, "learning_rate": 1.1055407826211395e-07, "loss": 17.7911, "step": 51149 }, { "epoch": 0.9349808982397134, "grad_norm": 7.104176974472643, "learning_rate": 1.1049218378759607e-07, "loss": 18.0242, "step": 51150 }, { "epoch": 0.9349991774361599, "grad_norm": 6.53209893244768, "learning_rate": 1.1043030645037867e-07, "loss": 17.0573, "step": 51151 }, { "epoch": 0.9350174566326064, "grad_norm": 5.605818593477534, "learning_rate": 1.1036844625067767e-07, "loss": 16.9781, "step": 51152 }, { "epoch": 0.935035735829053, "grad_norm": 5.211269932290331, "learning_rate": 1.1030660318871234e-07, "loss": 17.1126, "step": 51153 }, { "epoch": 0.9350540150254995, "grad_norm": 7.178907284418153, "learning_rate": 1.1024477726469751e-07, "loss": 17.5477, "step": 51154 }, { "epoch": 0.935072294221946, "grad_norm": 7.057088262419818, "learning_rate": 1.1018296847885024e-07, "loss": 17.9171, "step": 51155 }, { "epoch": 0.9350905734183925, "grad_norm": 6.009925822611336, "learning_rate": 1.1012117683138756e-07, "loss": 17.6781, "step": 51156 }, { "epoch": 0.935108852614839, "grad_norm": 5.474788330676207, "learning_rate": 1.1005940232252543e-07, "loss": 17.1282, "step": 51157 }, { "epoch": 0.9351271318112856, "grad_norm": 6.561403832535761, "learning_rate": 1.0999764495248088e-07, "loss": 17.5072, "step": 51158 }, { "epoch": 0.9351454110077321, "grad_norm": 6.910547929889397, "learning_rate": 1.0993590472147042e-07, "loss": 17.5018, "step": 51159 }, { "epoch": 0.9351636902041787, "grad_norm": 6.340128636858093, "learning_rate": 1.0987418162971053e-07, "loss": 17.3793, "step": 51160 }, { "epoch": 0.9351819694006251, "grad_norm": 5.799041698975761, "learning_rate": 1.0981247567741659e-07, "loss": 17.3418, "step": 51161 }, { "epoch": 0.9352002485970716, "grad_norm": 5.05025510499773, "learning_rate": 1.0975078686480677e-07, "loss": 16.8058, "step": 51162 }, { "epoch": 0.9352185277935182, "grad_norm": 4.223228758831643, "learning_rate": 1.096891151920948e-07, "loss": 16.6426, "step": 51163 }, { "epoch": 0.9352368069899647, "grad_norm": 5.072131195232022, "learning_rate": 1.096274606594988e-07, "loss": 16.9047, "step": 51164 }, { "epoch": 0.9352550861864113, "grad_norm": 5.477137317644597, "learning_rate": 1.0956582326723364e-07, "loss": 16.8224, "step": 51165 }, { "epoch": 0.9352733653828578, "grad_norm": 8.14241156148515, "learning_rate": 1.095042030155169e-07, "loss": 17.9973, "step": 51166 }, { "epoch": 0.9352916445793042, "grad_norm": 6.6705619357751065, "learning_rate": 1.094425999045623e-07, "loss": 17.3993, "step": 51167 }, { "epoch": 0.9353099237757508, "grad_norm": 6.7352048120922605, "learning_rate": 1.09381013934588e-07, "loss": 17.6689, "step": 51168 }, { "epoch": 0.9353282029721973, "grad_norm": 4.948606788877316, "learning_rate": 1.0931944510580827e-07, "loss": 16.8975, "step": 51169 }, { "epoch": 0.9353464821686439, "grad_norm": 6.44940264529615, "learning_rate": 1.0925789341844017e-07, "loss": 17.5731, "step": 51170 }, { "epoch": 0.9353647613650904, "grad_norm": 6.569389049341126, "learning_rate": 1.0919635887269853e-07, "loss": 17.5961, "step": 51171 }, { "epoch": 0.9353830405615369, "grad_norm": 5.95666881891562, "learning_rate": 1.0913484146879872e-07, "loss": 16.9485, "step": 51172 }, { "epoch": 0.9354013197579835, "grad_norm": 7.193794476436017, "learning_rate": 1.0907334120695723e-07, "loss": 17.738, "step": 51173 }, { "epoch": 0.9354195989544299, "grad_norm": 6.978095125719642, "learning_rate": 1.090118580873889e-07, "loss": 17.1531, "step": 51174 }, { "epoch": 0.9354378781508765, "grad_norm": 5.424452762466159, "learning_rate": 1.0895039211030967e-07, "loss": 17.0601, "step": 51175 }, { "epoch": 0.935456157347323, "grad_norm": 6.826276996141371, "learning_rate": 1.0888894327593547e-07, "loss": 17.3354, "step": 51176 }, { "epoch": 0.9354744365437695, "grad_norm": 6.399115956668784, "learning_rate": 1.0882751158448057e-07, "loss": 17.5359, "step": 51177 }, { "epoch": 0.9354927157402161, "grad_norm": 4.920176297252396, "learning_rate": 1.0876609703616148e-07, "loss": 16.9232, "step": 51178 }, { "epoch": 0.9355109949366626, "grad_norm": 6.769433836591743, "learning_rate": 1.0870469963119246e-07, "loss": 17.512, "step": 51179 }, { "epoch": 0.9355292741331092, "grad_norm": 7.849850560584082, "learning_rate": 1.0864331936978833e-07, "loss": 17.4556, "step": 51180 }, { "epoch": 0.9355475533295556, "grad_norm": 7.079905583833809, "learning_rate": 1.0858195625216561e-07, "loss": 17.6058, "step": 51181 }, { "epoch": 0.9355658325260021, "grad_norm": 8.849336747253613, "learning_rate": 1.0852061027853855e-07, "loss": 17.7965, "step": 51182 }, { "epoch": 0.9355841117224487, "grad_norm": 6.7127267681363705, "learning_rate": 1.08459281449122e-07, "loss": 17.363, "step": 51183 }, { "epoch": 0.9356023909188952, "grad_norm": 6.584728003235605, "learning_rate": 1.083979697641313e-07, "loss": 17.6286, "step": 51184 }, { "epoch": 0.9356206701153418, "grad_norm": 6.872407566231425, "learning_rate": 1.0833667522378188e-07, "loss": 17.4277, "step": 51185 }, { "epoch": 0.9356389493117883, "grad_norm": 6.674598796972749, "learning_rate": 1.0827539782828688e-07, "loss": 17.5134, "step": 51186 }, { "epoch": 0.9356572285082347, "grad_norm": 6.975912525075884, "learning_rate": 1.082141375778628e-07, "loss": 17.1437, "step": 51187 }, { "epoch": 0.9356755077046813, "grad_norm": 5.826802494821558, "learning_rate": 1.0815289447272393e-07, "loss": 17.1405, "step": 51188 }, { "epoch": 0.9356937869011278, "grad_norm": 6.935107665035414, "learning_rate": 1.0809166851308395e-07, "loss": 17.7608, "step": 51189 }, { "epoch": 0.9357120660975744, "grad_norm": 5.416221946805593, "learning_rate": 1.0803045969915881e-07, "loss": 17.0302, "step": 51190 }, { "epoch": 0.9357303452940209, "grad_norm": 6.08209561972153, "learning_rate": 1.0796926803116226e-07, "loss": 17.1291, "step": 51191 }, { "epoch": 0.9357486244904674, "grad_norm": 5.831303359879826, "learning_rate": 1.0790809350930853e-07, "loss": 17.1587, "step": 51192 }, { "epoch": 0.935766903686914, "grad_norm": 8.068369401921295, "learning_rate": 1.0784693613381247e-07, "loss": 17.6817, "step": 51193 }, { "epoch": 0.9357851828833604, "grad_norm": 6.032635472773476, "learning_rate": 1.0778579590488836e-07, "loss": 17.1587, "step": 51194 }, { "epoch": 0.935803462079807, "grad_norm": 5.928095847167552, "learning_rate": 1.0772467282275101e-07, "loss": 17.0058, "step": 51195 }, { "epoch": 0.9358217412762535, "grad_norm": 5.852663466918475, "learning_rate": 1.0766356688761414e-07, "loss": 16.9585, "step": 51196 }, { "epoch": 0.9358400204727, "grad_norm": 6.9045028126819155, "learning_rate": 1.0760247809969149e-07, "loss": 17.5618, "step": 51197 }, { "epoch": 0.9358582996691466, "grad_norm": 5.259920354545331, "learning_rate": 1.0754140645919786e-07, "loss": 16.8869, "step": 51198 }, { "epoch": 0.9358765788655931, "grad_norm": 6.08934906170053, "learning_rate": 1.0748035196634698e-07, "loss": 17.4995, "step": 51199 }, { "epoch": 0.9358948580620396, "grad_norm": 6.495235929225087, "learning_rate": 1.0741931462135258e-07, "loss": 17.209, "step": 51200 }, { "epoch": 0.9359131372584861, "grad_norm": 6.74062925412471, "learning_rate": 1.0735829442442946e-07, "loss": 17.4631, "step": 51201 }, { "epoch": 0.9359314164549326, "grad_norm": 6.7675547980482404, "learning_rate": 1.0729729137579081e-07, "loss": 17.5167, "step": 51202 }, { "epoch": 0.9359496956513792, "grad_norm": 5.984184824006028, "learning_rate": 1.0723630547564978e-07, "loss": 17.1494, "step": 51203 }, { "epoch": 0.9359679748478257, "grad_norm": 3.818851143955798, "learning_rate": 1.0717533672422231e-07, "loss": 16.4518, "step": 51204 }, { "epoch": 0.9359862540442722, "grad_norm": 6.631446862298944, "learning_rate": 1.071143851217199e-07, "loss": 17.3193, "step": 51205 }, { "epoch": 0.9360045332407188, "grad_norm": 6.426182314437258, "learning_rate": 1.0705345066835682e-07, "loss": 17.2544, "step": 51206 }, { "epoch": 0.9360228124371652, "grad_norm": 5.907173686812364, "learning_rate": 1.0699253336434734e-07, "loss": 17.2627, "step": 51207 }, { "epoch": 0.9360410916336118, "grad_norm": 6.486657820104623, "learning_rate": 1.0693163320990407e-07, "loss": 17.4877, "step": 51208 }, { "epoch": 0.9360593708300583, "grad_norm": 6.500856416755895, "learning_rate": 1.068707502052413e-07, "loss": 17.0952, "step": 51209 }, { "epoch": 0.9360776500265048, "grad_norm": 6.505072785814353, "learning_rate": 1.068098843505716e-07, "loss": 17.3149, "step": 51210 }, { "epoch": 0.9360959292229514, "grad_norm": 6.936291054449938, "learning_rate": 1.0674903564610872e-07, "loss": 17.414, "step": 51211 }, { "epoch": 0.9361142084193979, "grad_norm": 6.625678769256983, "learning_rate": 1.0668820409206526e-07, "loss": 17.4265, "step": 51212 }, { "epoch": 0.9361324876158444, "grad_norm": 5.309576431897905, "learning_rate": 1.0662738968865605e-07, "loss": 17.0255, "step": 51213 }, { "epoch": 0.9361507668122909, "grad_norm": 5.684216281635627, "learning_rate": 1.0656659243609202e-07, "loss": 17.2407, "step": 51214 }, { "epoch": 0.9361690460087374, "grad_norm": 6.514978973561642, "learning_rate": 1.0650581233458857e-07, "loss": 17.2557, "step": 51215 }, { "epoch": 0.936187325205184, "grad_norm": 5.591860960925622, "learning_rate": 1.0644504938435718e-07, "loss": 17.173, "step": 51216 }, { "epoch": 0.9362056044016305, "grad_norm": 6.764350871954088, "learning_rate": 1.0638430358561102e-07, "loss": 17.1521, "step": 51217 }, { "epoch": 0.9362238835980771, "grad_norm": 6.447261670434294, "learning_rate": 1.0632357493856383e-07, "loss": 17.2176, "step": 51218 }, { "epoch": 0.9362421627945235, "grad_norm": 6.922866741838925, "learning_rate": 1.0626286344342762e-07, "loss": 17.6092, "step": 51219 }, { "epoch": 0.93626044199097, "grad_norm": 6.4335481478394465, "learning_rate": 1.0620216910041503e-07, "loss": 17.412, "step": 51220 }, { "epoch": 0.9362787211874166, "grad_norm": 6.527722130876248, "learning_rate": 1.0614149190973921e-07, "loss": 17.2227, "step": 51221 }, { "epoch": 0.9362970003838631, "grad_norm": 5.283433023689425, "learning_rate": 1.0608083187161333e-07, "loss": 17.0225, "step": 51222 }, { "epoch": 0.9363152795803097, "grad_norm": 5.798089570946315, "learning_rate": 1.0602018898624889e-07, "loss": 17.3728, "step": 51223 }, { "epoch": 0.9363335587767562, "grad_norm": 6.477602083427618, "learning_rate": 1.0595956325385959e-07, "loss": 17.5578, "step": 51224 }, { "epoch": 0.9363518379732026, "grad_norm": 6.803174839721236, "learning_rate": 1.0589895467465639e-07, "loss": 17.6806, "step": 51225 }, { "epoch": 0.9363701171696492, "grad_norm": 5.6476581499138065, "learning_rate": 1.0583836324885355e-07, "loss": 17.0807, "step": 51226 }, { "epoch": 0.9363883963660957, "grad_norm": 6.2691701268951565, "learning_rate": 1.0577778897666257e-07, "loss": 17.5521, "step": 51227 }, { "epoch": 0.9364066755625423, "grad_norm": 6.387370007051992, "learning_rate": 1.0571723185829496e-07, "loss": 17.618, "step": 51228 }, { "epoch": 0.9364249547589888, "grad_norm": 4.8514226159771585, "learning_rate": 1.0565669189396443e-07, "loss": 16.8288, "step": 51229 }, { "epoch": 0.9364432339554353, "grad_norm": 7.414295476310968, "learning_rate": 1.0559616908388249e-07, "loss": 17.5283, "step": 51230 }, { "epoch": 0.9364615131518819, "grad_norm": 5.470807058808624, "learning_rate": 1.0553566342826061e-07, "loss": 16.871, "step": 51231 }, { "epoch": 0.9364797923483283, "grad_norm": 5.436155933754169, "learning_rate": 1.0547517492731196e-07, "loss": 16.9491, "step": 51232 }, { "epoch": 0.9364980715447749, "grad_norm": 7.769514007894407, "learning_rate": 1.0541470358124861e-07, "loss": 17.6644, "step": 51233 }, { "epoch": 0.9365163507412214, "grad_norm": 6.368650031094827, "learning_rate": 1.0535424939028093e-07, "loss": 17.4092, "step": 51234 }, { "epoch": 0.9365346299376679, "grad_norm": 7.696574944223694, "learning_rate": 1.0529381235462266e-07, "loss": 17.7765, "step": 51235 }, { "epoch": 0.9365529091341145, "grad_norm": 5.324713328002093, "learning_rate": 1.0523339247448472e-07, "loss": 17.0708, "step": 51236 }, { "epoch": 0.936571188330561, "grad_norm": 6.24640116934687, "learning_rate": 1.0517298975007862e-07, "loss": 17.578, "step": 51237 }, { "epoch": 0.9365894675270076, "grad_norm": 6.792315742706451, "learning_rate": 1.051126041816164e-07, "loss": 17.743, "step": 51238 }, { "epoch": 0.936607746723454, "grad_norm": 4.613546462826309, "learning_rate": 1.0505223576930956e-07, "loss": 16.7376, "step": 51239 }, { "epoch": 0.9366260259199005, "grad_norm": 5.282146500614947, "learning_rate": 1.0499188451337073e-07, "loss": 16.7927, "step": 51240 }, { "epoch": 0.9366443051163471, "grad_norm": 6.934516165435188, "learning_rate": 1.0493155041401026e-07, "loss": 17.7255, "step": 51241 }, { "epoch": 0.9366625843127936, "grad_norm": 6.4818639725892915, "learning_rate": 1.0487123347143968e-07, "loss": 17.3519, "step": 51242 }, { "epoch": 0.9366808635092402, "grad_norm": 6.781173705137288, "learning_rate": 1.0481093368587103e-07, "loss": 17.3022, "step": 51243 }, { "epoch": 0.9366991427056867, "grad_norm": 6.536423071678854, "learning_rate": 1.0475065105751525e-07, "loss": 17.3902, "step": 51244 }, { "epoch": 0.9367174219021331, "grad_norm": 6.277493898715419, "learning_rate": 1.0469038558658329e-07, "loss": 17.3583, "step": 51245 }, { "epoch": 0.9367357010985797, "grad_norm": 8.113148915981839, "learning_rate": 1.0463013727328719e-07, "loss": 17.7027, "step": 51246 }, { "epoch": 0.9367539802950262, "grad_norm": 4.631694587743984, "learning_rate": 1.045699061178368e-07, "loss": 16.7795, "step": 51247 }, { "epoch": 0.9367722594914728, "grad_norm": 5.7153895367545955, "learning_rate": 1.045096921204447e-07, "loss": 17.071, "step": 51248 }, { "epoch": 0.9367905386879193, "grad_norm": 6.209439799551609, "learning_rate": 1.0444949528132131e-07, "loss": 17.4544, "step": 51249 }, { "epoch": 0.9368088178843658, "grad_norm": 8.365548932637267, "learning_rate": 1.0438931560067756e-07, "loss": 18.4637, "step": 51250 }, { "epoch": 0.9368270970808124, "grad_norm": 6.148508335600236, "learning_rate": 1.0432915307872437e-07, "loss": 17.3612, "step": 51251 }, { "epoch": 0.9368453762772588, "grad_norm": 9.704252301136101, "learning_rate": 1.0426900771567273e-07, "loss": 17.9542, "step": 51252 }, { "epoch": 0.9368636554737054, "grad_norm": 5.719647480357542, "learning_rate": 1.0420887951173353e-07, "loss": 16.9614, "step": 51253 }, { "epoch": 0.9368819346701519, "grad_norm": 6.809497437941808, "learning_rate": 1.0414876846711718e-07, "loss": 17.3871, "step": 51254 }, { "epoch": 0.9369002138665984, "grad_norm": 6.607149092563045, "learning_rate": 1.0408867458203465e-07, "loss": 17.5083, "step": 51255 }, { "epoch": 0.936918493063045, "grad_norm": 5.829015113232351, "learning_rate": 1.0402859785669627e-07, "loss": 17.3248, "step": 51256 }, { "epoch": 0.9369367722594915, "grad_norm": 6.872988175798422, "learning_rate": 1.0396853829131304e-07, "loss": 17.3888, "step": 51257 }, { "epoch": 0.936955051455938, "grad_norm": 4.985554973024965, "learning_rate": 1.039084958860953e-07, "loss": 17.0786, "step": 51258 }, { "epoch": 0.9369733306523845, "grad_norm": 5.077108469742181, "learning_rate": 1.0384847064125291e-07, "loss": 16.9041, "step": 51259 }, { "epoch": 0.936991609848831, "grad_norm": 6.92487968001246, "learning_rate": 1.0378846255699738e-07, "loss": 18.0652, "step": 51260 }, { "epoch": 0.9370098890452776, "grad_norm": 7.404980440519063, "learning_rate": 1.037284716335385e-07, "loss": 17.8153, "step": 51261 }, { "epoch": 0.9370281682417241, "grad_norm": 6.38375101530887, "learning_rate": 1.0366849787108557e-07, "loss": 17.1046, "step": 51262 }, { "epoch": 0.9370464474381707, "grad_norm": 7.786056050035562, "learning_rate": 1.0360854126985064e-07, "loss": 17.8206, "step": 51263 }, { "epoch": 0.9370647266346172, "grad_norm": 6.809795417963151, "learning_rate": 1.0354860183004245e-07, "loss": 17.7977, "step": 51264 }, { "epoch": 0.9370830058310636, "grad_norm": 6.315358498511887, "learning_rate": 1.0348867955187137e-07, "loss": 17.5407, "step": 51265 }, { "epoch": 0.9371012850275102, "grad_norm": 4.807890750214645, "learning_rate": 1.0342877443554722e-07, "loss": 16.8429, "step": 51266 }, { "epoch": 0.9371195642239567, "grad_norm": 6.330918412937388, "learning_rate": 1.0336888648128096e-07, "loss": 17.5554, "step": 51267 }, { "epoch": 0.9371378434204033, "grad_norm": 5.023877554753483, "learning_rate": 1.0330901568928187e-07, "loss": 16.8894, "step": 51268 }, { "epoch": 0.9371561226168498, "grad_norm": 6.41201995474641, "learning_rate": 1.0324916205975977e-07, "loss": 17.133, "step": 51269 }, { "epoch": 0.9371744018132963, "grad_norm": 11.443855460128917, "learning_rate": 1.0318932559292394e-07, "loss": 18.0247, "step": 51270 }, { "epoch": 0.9371926810097428, "grad_norm": 6.037923854357446, "learning_rate": 1.0312950628898478e-07, "loss": 17.141, "step": 51271 }, { "epoch": 0.9372109602061893, "grad_norm": 6.345715679081875, "learning_rate": 1.0306970414815209e-07, "loss": 17.4259, "step": 51272 }, { "epoch": 0.9372292394026358, "grad_norm": 6.5536031128742565, "learning_rate": 1.0300991917063463e-07, "loss": 17.5655, "step": 51273 }, { "epoch": 0.9372475185990824, "grad_norm": 6.2057394594700925, "learning_rate": 1.0295015135664333e-07, "loss": 17.1689, "step": 51274 }, { "epoch": 0.9372657977955289, "grad_norm": 4.554654499817819, "learning_rate": 1.0289040070638579e-07, "loss": 16.8526, "step": 51275 }, { "epoch": 0.9372840769919755, "grad_norm": 6.8158314971188965, "learning_rate": 1.0283066722007295e-07, "loss": 17.8444, "step": 51276 }, { "epoch": 0.937302356188422, "grad_norm": 5.438367895280145, "learning_rate": 1.0277095089791356e-07, "loss": 17.2457, "step": 51277 }, { "epoch": 0.9373206353848684, "grad_norm": 7.332905623809828, "learning_rate": 1.0271125174011743e-07, "loss": 17.5615, "step": 51278 }, { "epoch": 0.937338914581315, "grad_norm": 7.069788386130159, "learning_rate": 1.0265156974689273e-07, "loss": 17.9335, "step": 51279 }, { "epoch": 0.9373571937777615, "grad_norm": 4.7397950147107615, "learning_rate": 1.0259190491844983e-07, "loss": 16.9586, "step": 51280 }, { "epoch": 0.9373754729742081, "grad_norm": 5.942261403340311, "learning_rate": 1.025322572549975e-07, "loss": 17.1812, "step": 51281 }, { "epoch": 0.9373937521706546, "grad_norm": 5.342458935682688, "learning_rate": 1.0247262675674385e-07, "loss": 16.9171, "step": 51282 }, { "epoch": 0.937412031367101, "grad_norm": 7.22906544951539, "learning_rate": 1.024130134238993e-07, "loss": 17.4964, "step": 51283 }, { "epoch": 0.9374303105635476, "grad_norm": 6.309661054044901, "learning_rate": 1.0235341725667148e-07, "loss": 17.0694, "step": 51284 }, { "epoch": 0.9374485897599941, "grad_norm": 6.262869918941738, "learning_rate": 1.0229383825527017e-07, "loss": 17.2762, "step": 51285 }, { "epoch": 0.9374668689564407, "grad_norm": 4.929330579491017, "learning_rate": 1.0223427641990468e-07, "loss": 17.0648, "step": 51286 }, { "epoch": 0.9374851481528872, "grad_norm": 4.863007528225623, "learning_rate": 1.0217473175078207e-07, "loss": 16.8099, "step": 51287 }, { "epoch": 0.9375034273493337, "grad_norm": 6.44898534243032, "learning_rate": 1.0211520424811272e-07, "loss": 17.3173, "step": 51288 }, { "epoch": 0.9375217065457803, "grad_norm": 5.6515570038427345, "learning_rate": 1.0205569391210424e-07, "loss": 17.114, "step": 51289 }, { "epoch": 0.9375399857422267, "grad_norm": 4.649027525206509, "learning_rate": 1.0199620074296535e-07, "loss": 16.8086, "step": 51290 }, { "epoch": 0.9375582649386733, "grad_norm": 7.466322575428758, "learning_rate": 1.0193672474090533e-07, "loss": 17.9424, "step": 51291 }, { "epoch": 0.9375765441351198, "grad_norm": 6.170466657217358, "learning_rate": 1.0187726590613123e-07, "loss": 17.2372, "step": 51292 }, { "epoch": 0.9375948233315663, "grad_norm": 5.584128037146898, "learning_rate": 1.0181782423885289e-07, "loss": 17.0482, "step": 51293 }, { "epoch": 0.9376131025280129, "grad_norm": 4.640171176017616, "learning_rate": 1.0175839973927792e-07, "loss": 16.7245, "step": 51294 }, { "epoch": 0.9376313817244594, "grad_norm": 6.395483098326168, "learning_rate": 1.0169899240761505e-07, "loss": 17.2123, "step": 51295 }, { "epoch": 0.937649660920906, "grad_norm": 5.173824173156463, "learning_rate": 1.0163960224407133e-07, "loss": 16.9834, "step": 51296 }, { "epoch": 0.9376679401173524, "grad_norm": 8.042451283879894, "learning_rate": 1.0158022924885658e-07, "loss": 17.753, "step": 51297 }, { "epoch": 0.9376862193137989, "grad_norm": 6.600618160749774, "learning_rate": 1.0152087342217787e-07, "loss": 17.6439, "step": 51298 }, { "epoch": 0.9377044985102455, "grad_norm": 6.509698927550246, "learning_rate": 1.0146153476424281e-07, "loss": 17.3347, "step": 51299 }, { "epoch": 0.937722777706692, "grad_norm": 6.257521830534796, "learning_rate": 1.0140221327526067e-07, "loss": 17.3996, "step": 51300 }, { "epoch": 0.9377410569031386, "grad_norm": 6.922094613958237, "learning_rate": 1.0134290895543796e-07, "loss": 17.794, "step": 51301 }, { "epoch": 0.9377593360995851, "grad_norm": 7.078783623518481, "learning_rate": 1.0128362180498341e-07, "loss": 18.4656, "step": 51302 }, { "epoch": 0.9377776152960315, "grad_norm": 6.309473862349677, "learning_rate": 1.0122435182410518e-07, "loss": 17.1616, "step": 51303 }, { "epoch": 0.9377958944924781, "grad_norm": 6.499464427336393, "learning_rate": 1.011650990130103e-07, "loss": 17.1042, "step": 51304 }, { "epoch": 0.9378141736889246, "grad_norm": 5.84059277803068, "learning_rate": 1.0110586337190698e-07, "loss": 17.2288, "step": 51305 }, { "epoch": 0.9378324528853712, "grad_norm": 7.359389743629216, "learning_rate": 1.0104664490100224e-07, "loss": 17.4968, "step": 51306 }, { "epoch": 0.9378507320818177, "grad_norm": 6.743083813382774, "learning_rate": 1.0098744360050373e-07, "loss": 17.4775, "step": 51307 }, { "epoch": 0.9378690112782642, "grad_norm": 7.270058584400274, "learning_rate": 1.0092825947061957e-07, "loss": 17.7379, "step": 51308 }, { "epoch": 0.9378872904747108, "grad_norm": 5.826666549334834, "learning_rate": 1.008690925115563e-07, "loss": 17.3971, "step": 51309 }, { "epoch": 0.9379055696711572, "grad_norm": 7.20144674630316, "learning_rate": 1.0080994272352152e-07, "loss": 17.7974, "step": 51310 }, { "epoch": 0.9379238488676038, "grad_norm": 7.258260135351083, "learning_rate": 1.0075081010672283e-07, "loss": 17.422, "step": 51311 }, { "epoch": 0.9379421280640503, "grad_norm": 8.378448829988809, "learning_rate": 1.0069169466136785e-07, "loss": 17.4112, "step": 51312 }, { "epoch": 0.9379604072604968, "grad_norm": 7.139775403129303, "learning_rate": 1.0063259638766309e-07, "loss": 17.336, "step": 51313 }, { "epoch": 0.9379786864569434, "grad_norm": 7.224472275309081, "learning_rate": 1.0057351528581615e-07, "loss": 18.153, "step": 51314 }, { "epoch": 0.9379969656533899, "grad_norm": 5.726039221026598, "learning_rate": 1.0051445135603355e-07, "loss": 17.2112, "step": 51315 }, { "epoch": 0.9380152448498364, "grad_norm": 6.755605786126514, "learning_rate": 1.0045540459852288e-07, "loss": 17.9142, "step": 51316 }, { "epoch": 0.9380335240462829, "grad_norm": 6.609530383923595, "learning_rate": 1.003963750134912e-07, "loss": 17.5271, "step": 51317 }, { "epoch": 0.9380518032427294, "grad_norm": 8.60084649328304, "learning_rate": 1.0033736260114447e-07, "loss": 18.1433, "step": 51318 }, { "epoch": 0.938070082439176, "grad_norm": 5.285729376917815, "learning_rate": 1.0027836736169028e-07, "loss": 17.1963, "step": 51319 }, { "epoch": 0.9380883616356225, "grad_norm": 5.881017945838913, "learning_rate": 1.0021938929533515e-07, "loss": 17.2793, "step": 51320 }, { "epoch": 0.9381066408320691, "grad_norm": 5.565995960928298, "learning_rate": 1.0016042840228558e-07, "loss": 17.3591, "step": 51321 }, { "epoch": 0.9381249200285156, "grad_norm": 6.134549917886211, "learning_rate": 1.0010148468274916e-07, "loss": 17.3574, "step": 51322 }, { "epoch": 0.938143199224962, "grad_norm": 7.717006758091545, "learning_rate": 1.0004255813693187e-07, "loss": 17.7911, "step": 51323 }, { "epoch": 0.9381614784214086, "grad_norm": 5.097359920970941, "learning_rate": 9.998364876503963e-08, "loss": 17.0635, "step": 51324 }, { "epoch": 0.9381797576178551, "grad_norm": 5.565567989580935, "learning_rate": 9.992475656728007e-08, "loss": 17.2421, "step": 51325 }, { "epoch": 0.9381980368143017, "grad_norm": 5.96356959827122, "learning_rate": 9.986588154385912e-08, "loss": 17.1512, "step": 51326 }, { "epoch": 0.9382163160107482, "grad_norm": 6.637643312581902, "learning_rate": 9.980702369498219e-08, "loss": 17.2398, "step": 51327 }, { "epoch": 0.9382345952071947, "grad_norm": 5.692851574386888, "learning_rate": 9.974818302085742e-08, "loss": 16.9989, "step": 51328 }, { "epoch": 0.9382528744036412, "grad_norm": 5.519785497410312, "learning_rate": 9.968935952168912e-08, "loss": 17.2354, "step": 51329 }, { "epoch": 0.9382711536000877, "grad_norm": 5.2080644237760465, "learning_rate": 9.963055319768433e-08, "loss": 16.8018, "step": 51330 }, { "epoch": 0.9382894327965343, "grad_norm": 6.705170482071744, "learning_rate": 9.957176404905011e-08, "loss": 17.1511, "step": 51331 }, { "epoch": 0.9383077119929808, "grad_norm": 5.605341248576254, "learning_rate": 9.951299207599075e-08, "loss": 17.1351, "step": 51332 }, { "epoch": 0.9383259911894273, "grad_norm": 5.678788404295283, "learning_rate": 9.945423727871383e-08, "loss": 17.1023, "step": 51333 }, { "epoch": 0.9383442703858739, "grad_norm": 6.07643867010913, "learning_rate": 9.939549965742479e-08, "loss": 17.4758, "step": 51334 }, { "epoch": 0.9383625495823203, "grad_norm": 5.772346135293527, "learning_rate": 9.933677921232843e-08, "loss": 17.0132, "step": 51335 }, { "epoch": 0.9383808287787669, "grad_norm": 6.362184396784828, "learning_rate": 9.927807594363182e-08, "loss": 17.1893, "step": 51336 }, { "epoch": 0.9383991079752134, "grad_norm": 6.184741260024697, "learning_rate": 9.921938985154033e-08, "loss": 17.2272, "step": 51337 }, { "epoch": 0.9384173871716599, "grad_norm": 6.390160864323763, "learning_rate": 9.916072093625939e-08, "loss": 17.3747, "step": 51338 }, { "epoch": 0.9384356663681065, "grad_norm": 5.552543733350953, "learning_rate": 9.910206919799493e-08, "loss": 16.8376, "step": 51339 }, { "epoch": 0.938453945564553, "grad_norm": 7.753662939820873, "learning_rate": 9.904343463695232e-08, "loss": 18.0865, "step": 51340 }, { "epoch": 0.9384722247609995, "grad_norm": 5.798035775082638, "learning_rate": 9.898481725333697e-08, "loss": 17.0314, "step": 51341 }, { "epoch": 0.938490503957446, "grad_norm": 8.496349830462306, "learning_rate": 9.892621704735539e-08, "loss": 18.0378, "step": 51342 }, { "epoch": 0.9385087831538925, "grad_norm": 6.838705054594693, "learning_rate": 9.886763401921185e-08, "loss": 17.2514, "step": 51343 }, { "epoch": 0.9385270623503391, "grad_norm": 5.181129584984937, "learning_rate": 9.880906816911118e-08, "loss": 16.8454, "step": 51344 }, { "epoch": 0.9385453415467856, "grad_norm": 6.299947453319915, "learning_rate": 9.875051949726044e-08, "loss": 17.5665, "step": 51345 }, { "epoch": 0.9385636207432321, "grad_norm": 6.047407410513538, "learning_rate": 9.869198800386282e-08, "loss": 17.3736, "step": 51346 }, { "epoch": 0.9385818999396787, "grad_norm": 6.981882609521882, "learning_rate": 9.863347368912535e-08, "loss": 17.521, "step": 51347 }, { "epoch": 0.9386001791361251, "grad_norm": 5.249007061157012, "learning_rate": 9.857497655325177e-08, "loss": 17.1809, "step": 51348 }, { "epoch": 0.9386184583325717, "grad_norm": 6.600423706319879, "learning_rate": 9.851649659644747e-08, "loss": 17.3415, "step": 51349 }, { "epoch": 0.9386367375290182, "grad_norm": 6.134097087776442, "learning_rate": 9.845803381891783e-08, "loss": 17.2759, "step": 51350 }, { "epoch": 0.9386550167254647, "grad_norm": 5.1885069070320196, "learning_rate": 9.83995882208677e-08, "loss": 17.2552, "step": 51351 }, { "epoch": 0.9386732959219113, "grad_norm": 7.104387095620691, "learning_rate": 9.834115980250081e-08, "loss": 17.7283, "step": 51352 }, { "epoch": 0.9386915751183578, "grad_norm": 5.86505588139769, "learning_rate": 9.828274856402364e-08, "loss": 17.2911, "step": 51353 }, { "epoch": 0.9387098543148044, "grad_norm": 6.375364106699577, "learning_rate": 9.822435450563994e-08, "loss": 17.0343, "step": 51354 }, { "epoch": 0.9387281335112508, "grad_norm": 6.732775630124877, "learning_rate": 9.816597762755398e-08, "loss": 18.0646, "step": 51355 }, { "epoch": 0.9387464127076973, "grad_norm": 5.4812751670873, "learning_rate": 9.81076179299717e-08, "loss": 16.9199, "step": 51356 }, { "epoch": 0.9387646919041439, "grad_norm": 5.731260586184544, "learning_rate": 9.804927541309573e-08, "loss": 17.1987, "step": 51357 }, { "epoch": 0.9387829711005904, "grad_norm": 6.621843961021135, "learning_rate": 9.799095007713255e-08, "loss": 17.45, "step": 51358 }, { "epoch": 0.938801250297037, "grad_norm": 7.519723201232621, "learning_rate": 9.793264192228535e-08, "loss": 17.6545, "step": 51359 }, { "epoch": 0.9388195294934835, "grad_norm": 6.721663945587643, "learning_rate": 9.787435094875952e-08, "loss": 17.5673, "step": 51360 }, { "epoch": 0.9388378086899299, "grad_norm": 5.141039160105787, "learning_rate": 9.781607715675767e-08, "loss": 16.9009, "step": 51361 }, { "epoch": 0.9388560878863765, "grad_norm": 4.826188701319743, "learning_rate": 9.77578205464863e-08, "loss": 17.0284, "step": 51362 }, { "epoch": 0.938874367082823, "grad_norm": 6.245882169583449, "learning_rate": 9.769958111814747e-08, "loss": 17.154, "step": 51363 }, { "epoch": 0.9388926462792696, "grad_norm": 5.748103198327369, "learning_rate": 9.764135887194659e-08, "loss": 17.2659, "step": 51364 }, { "epoch": 0.9389109254757161, "grad_norm": 7.131414878909451, "learning_rate": 9.75831538080868e-08, "loss": 17.8164, "step": 51365 }, { "epoch": 0.9389292046721626, "grad_norm": 5.25617151260867, "learning_rate": 9.752496592677296e-08, "loss": 16.8636, "step": 51366 }, { "epoch": 0.9389474838686092, "grad_norm": 6.5505133793719645, "learning_rate": 9.746679522820879e-08, "loss": 17.5028, "step": 51367 }, { "epoch": 0.9389657630650556, "grad_norm": 7.379169500617165, "learning_rate": 9.740864171259801e-08, "loss": 17.8685, "step": 51368 }, { "epoch": 0.9389840422615022, "grad_norm": 4.774778350347945, "learning_rate": 9.735050538014435e-08, "loss": 16.6367, "step": 51369 }, { "epoch": 0.9390023214579487, "grad_norm": 5.717573745325162, "learning_rate": 9.729238623105208e-08, "loss": 17.4556, "step": 51370 }, { "epoch": 0.9390206006543952, "grad_norm": 5.62803589195704, "learning_rate": 9.723428426552494e-08, "loss": 17.1546, "step": 51371 }, { "epoch": 0.9390388798508418, "grad_norm": 4.941579427906699, "learning_rate": 9.717619948376556e-08, "loss": 16.8788, "step": 51372 }, { "epoch": 0.9390571590472883, "grad_norm": 5.742392873200491, "learning_rate": 9.711813188597818e-08, "loss": 17.0051, "step": 51373 }, { "epoch": 0.9390754382437349, "grad_norm": 6.007619632423698, "learning_rate": 9.7060081472366e-08, "loss": 17.4352, "step": 51374 }, { "epoch": 0.9390937174401813, "grad_norm": 6.918428477728001, "learning_rate": 9.70020482431333e-08, "loss": 17.5287, "step": 51375 }, { "epoch": 0.9391119966366278, "grad_norm": 5.30354013492786, "learning_rate": 9.69440321984827e-08, "loss": 16.8864, "step": 51376 }, { "epoch": 0.9391302758330744, "grad_norm": 5.889147139439796, "learning_rate": 9.68860333386179e-08, "loss": 17.0917, "step": 51377 }, { "epoch": 0.9391485550295209, "grad_norm": 7.959208190401645, "learning_rate": 9.68280516637421e-08, "loss": 17.7289, "step": 51378 }, { "epoch": 0.9391668342259675, "grad_norm": 6.922259669663927, "learning_rate": 9.677008717405901e-08, "loss": 17.6676, "step": 51379 }, { "epoch": 0.939185113422414, "grad_norm": 6.969470706969379, "learning_rate": 9.671213986977013e-08, "loss": 16.929, "step": 51380 }, { "epoch": 0.9392033926188604, "grad_norm": 4.7586428054383205, "learning_rate": 9.665420975108086e-08, "loss": 16.7447, "step": 51381 }, { "epoch": 0.939221671815307, "grad_norm": 7.369130378371608, "learning_rate": 9.659629681819271e-08, "loss": 17.9188, "step": 51382 }, { "epoch": 0.9392399510117535, "grad_norm": 7.401835526363622, "learning_rate": 9.653840107130886e-08, "loss": 17.8937, "step": 51383 }, { "epoch": 0.9392582302082001, "grad_norm": 6.329065112420766, "learning_rate": 9.64805225106319e-08, "loss": 17.5062, "step": 51384 }, { "epoch": 0.9392765094046466, "grad_norm": 5.729535311963461, "learning_rate": 9.642266113636611e-08, "loss": 17.2598, "step": 51385 }, { "epoch": 0.9392947886010931, "grad_norm": 7.175576584013142, "learning_rate": 9.636481694871303e-08, "loss": 17.6983, "step": 51386 }, { "epoch": 0.9393130677975396, "grad_norm": 7.498823542038607, "learning_rate": 9.63069899478758e-08, "loss": 17.802, "step": 51387 }, { "epoch": 0.9393313469939861, "grad_norm": 5.3450986849002815, "learning_rate": 9.624918013405704e-08, "loss": 16.8727, "step": 51388 }, { "epoch": 0.9393496261904327, "grad_norm": 5.281848207051755, "learning_rate": 9.619138750745938e-08, "loss": 16.9358, "step": 51389 }, { "epoch": 0.9393679053868792, "grad_norm": 6.534547182523316, "learning_rate": 9.613361206828542e-08, "loss": 17.6297, "step": 51390 }, { "epoch": 0.9393861845833257, "grad_norm": 5.908973797657855, "learning_rate": 9.607585381673778e-08, "loss": 17.5098, "step": 51391 }, { "epoch": 0.9394044637797723, "grad_norm": 6.823624675934177, "learning_rate": 9.601811275301798e-08, "loss": 17.5045, "step": 51392 }, { "epoch": 0.9394227429762187, "grad_norm": 6.197471544375372, "learning_rate": 9.596038887732973e-08, "loss": 17.3722, "step": 51393 }, { "epoch": 0.9394410221726653, "grad_norm": 5.511158563417236, "learning_rate": 9.590268218987397e-08, "loss": 17.3058, "step": 51394 }, { "epoch": 0.9394593013691118, "grad_norm": 6.682696975185367, "learning_rate": 9.584499269085501e-08, "loss": 17.6303, "step": 51395 }, { "epoch": 0.9394775805655583, "grad_norm": 5.721382006173967, "learning_rate": 9.578732038047323e-08, "loss": 17.2528, "step": 51396 }, { "epoch": 0.9394958597620049, "grad_norm": 6.149347664028897, "learning_rate": 9.572966525893124e-08, "loss": 17.3016, "step": 51397 }, { "epoch": 0.9395141389584514, "grad_norm": 4.873949461384366, "learning_rate": 9.567202732643166e-08, "loss": 16.7618, "step": 51398 }, { "epoch": 0.939532418154898, "grad_norm": 6.368220187637095, "learning_rate": 9.561440658317545e-08, "loss": 17.2857, "step": 51399 }, { "epoch": 0.9395506973513444, "grad_norm": 5.312143414137174, "learning_rate": 9.55568030293652e-08, "loss": 16.9821, "step": 51400 }, { "epoch": 0.9395689765477909, "grad_norm": 9.651830040985992, "learning_rate": 9.549921666520301e-08, "loss": 17.5164, "step": 51401 }, { "epoch": 0.9395872557442375, "grad_norm": 6.007858909437848, "learning_rate": 9.544164749089035e-08, "loss": 17.2777, "step": 51402 }, { "epoch": 0.939605534940684, "grad_norm": 6.004417607540269, "learning_rate": 9.538409550662875e-08, "loss": 17.0943, "step": 51403 }, { "epoch": 0.9396238141371306, "grad_norm": 5.271572711590361, "learning_rate": 9.532656071262136e-08, "loss": 17.0239, "step": 51404 }, { "epoch": 0.9396420933335771, "grad_norm": 6.080917202937522, "learning_rate": 9.526904310906804e-08, "loss": 17.3796, "step": 51405 }, { "epoch": 0.9396603725300235, "grad_norm": 6.851914666064934, "learning_rate": 9.521154269617083e-08, "loss": 17.2795, "step": 51406 }, { "epoch": 0.9396786517264701, "grad_norm": 7.70303473833755, "learning_rate": 9.515405947413237e-08, "loss": 17.6962, "step": 51407 }, { "epoch": 0.9396969309229166, "grad_norm": 5.083317464933023, "learning_rate": 9.509659344315247e-08, "loss": 17.0531, "step": 51408 }, { "epoch": 0.9397152101193631, "grad_norm": 6.766229931825171, "learning_rate": 9.503914460343433e-08, "loss": 17.5165, "step": 51409 }, { "epoch": 0.9397334893158097, "grad_norm": 6.558329505027717, "learning_rate": 9.498171295517777e-08, "loss": 17.3977, "step": 51410 }, { "epoch": 0.9397517685122562, "grad_norm": 6.274932410674484, "learning_rate": 9.492429849858431e-08, "loss": 17.4608, "step": 51411 }, { "epoch": 0.9397700477087028, "grad_norm": 5.59285945407026, "learning_rate": 9.4866901233856e-08, "loss": 17.2413, "step": 51412 }, { "epoch": 0.9397883269051492, "grad_norm": 6.682325612857856, "learning_rate": 9.480952116119324e-08, "loss": 17.3651, "step": 51413 }, { "epoch": 0.9398066061015957, "grad_norm": 8.901824365318523, "learning_rate": 9.475215828079754e-08, "loss": 18.5508, "step": 51414 }, { "epoch": 0.9398248852980423, "grad_norm": 4.80818483317909, "learning_rate": 9.469481259287038e-08, "loss": 16.7593, "step": 51415 }, { "epoch": 0.9398431644944888, "grad_norm": 6.387076494638628, "learning_rate": 9.46374840976122e-08, "loss": 17.5208, "step": 51416 }, { "epoch": 0.9398614436909354, "grad_norm": 7.17236738750174, "learning_rate": 9.458017279522336e-08, "loss": 17.4266, "step": 51417 }, { "epoch": 0.9398797228873819, "grad_norm": 5.608012594932431, "learning_rate": 9.452287868590593e-08, "loss": 17.2103, "step": 51418 }, { "epoch": 0.9398980020838283, "grad_norm": 5.002612558499303, "learning_rate": 9.44656017698592e-08, "loss": 16.8358, "step": 51419 }, { "epoch": 0.9399162812802749, "grad_norm": 5.2500080744403474, "learning_rate": 9.440834204728578e-08, "loss": 17.0316, "step": 51420 }, { "epoch": 0.9399345604767214, "grad_norm": 6.900720647880199, "learning_rate": 9.435109951838494e-08, "loss": 17.699, "step": 51421 }, { "epoch": 0.939952839673168, "grad_norm": 6.284713844752529, "learning_rate": 9.429387418335823e-08, "loss": 17.4466, "step": 51422 }, { "epoch": 0.9399711188696145, "grad_norm": 5.7317583416102, "learning_rate": 9.423666604240544e-08, "loss": 17.042, "step": 51423 }, { "epoch": 0.939989398066061, "grad_norm": 6.383699692413139, "learning_rate": 9.417947509572756e-08, "loss": 17.3281, "step": 51424 }, { "epoch": 0.9400076772625076, "grad_norm": 6.241773800150418, "learning_rate": 9.412230134352495e-08, "loss": 17.1739, "step": 51425 }, { "epoch": 0.940025956458954, "grad_norm": 4.978680622452668, "learning_rate": 9.406514478599804e-08, "loss": 16.9714, "step": 51426 }, { "epoch": 0.9400442356554006, "grad_norm": 6.107308340187305, "learning_rate": 9.400800542334721e-08, "loss": 17.4274, "step": 51427 }, { "epoch": 0.9400625148518471, "grad_norm": 5.07523328457798, "learning_rate": 9.395088325577228e-08, "loss": 16.7085, "step": 51428 }, { "epoch": 0.9400807940482936, "grad_norm": 5.388189849575157, "learning_rate": 9.389377828347312e-08, "loss": 17.1477, "step": 51429 }, { "epoch": 0.9400990732447402, "grad_norm": 6.122541319475792, "learning_rate": 9.383669050665179e-08, "loss": 17.3358, "step": 51430 }, { "epoch": 0.9401173524411867, "grad_norm": 7.0742673649034815, "learning_rate": 9.377961992550644e-08, "loss": 17.5474, "step": 51431 }, { "epoch": 0.9401356316376333, "grad_norm": 6.2153691628630545, "learning_rate": 9.372256654023804e-08, "loss": 17.1367, "step": 51432 }, { "epoch": 0.9401539108340797, "grad_norm": 7.028451782769131, "learning_rate": 9.366553035104641e-08, "loss": 17.4561, "step": 51433 }, { "epoch": 0.9401721900305262, "grad_norm": 5.6223255243876, "learning_rate": 9.360851135813086e-08, "loss": 17.1466, "step": 51434 }, { "epoch": 0.9401904692269728, "grad_norm": 6.385001750811568, "learning_rate": 9.355150956169234e-08, "loss": 17.4334, "step": 51435 }, { "epoch": 0.9402087484234193, "grad_norm": 6.773907611673186, "learning_rate": 9.349452496192956e-08, "loss": 17.4911, "step": 51436 }, { "epoch": 0.9402270276198659, "grad_norm": 6.1299683017341255, "learning_rate": 9.343755755904238e-08, "loss": 17.3195, "step": 51437 }, { "epoch": 0.9402453068163124, "grad_norm": 6.1582617695123805, "learning_rate": 9.338060735323117e-08, "loss": 17.2797, "step": 51438 }, { "epoch": 0.9402635860127588, "grad_norm": 6.965149960840134, "learning_rate": 9.332367434469524e-08, "loss": 17.6016, "step": 51439 }, { "epoch": 0.9402818652092054, "grad_norm": 5.877150273295549, "learning_rate": 9.326675853363387e-08, "loss": 17.0886, "step": 51440 }, { "epoch": 0.9403001444056519, "grad_norm": 7.695488187967654, "learning_rate": 9.320985992024745e-08, "loss": 17.6702, "step": 51441 }, { "epoch": 0.9403184236020985, "grad_norm": 6.608859459385287, "learning_rate": 9.315297850473359e-08, "loss": 17.3943, "step": 51442 }, { "epoch": 0.940336702798545, "grad_norm": 5.647954304884843, "learning_rate": 9.309611428729327e-08, "loss": 17.1879, "step": 51443 }, { "epoch": 0.9403549819949915, "grad_norm": 6.765631632193145, "learning_rate": 9.303926726812573e-08, "loss": 16.8655, "step": 51444 }, { "epoch": 0.940373261191438, "grad_norm": 8.150295374163019, "learning_rate": 9.298243744742918e-08, "loss": 17.3513, "step": 51445 }, { "epoch": 0.9403915403878845, "grad_norm": 7.434903087769341, "learning_rate": 9.292562482540346e-08, "loss": 17.8156, "step": 51446 }, { "epoch": 0.9404098195843311, "grad_norm": 6.483416312918838, "learning_rate": 9.286882940224729e-08, "loss": 17.2778, "step": 51447 }, { "epoch": 0.9404280987807776, "grad_norm": 5.620101521557121, "learning_rate": 9.281205117815994e-08, "loss": 17.2715, "step": 51448 }, { "epoch": 0.9404463779772241, "grad_norm": 6.204984129603398, "learning_rate": 9.275529015334128e-08, "loss": 17.4908, "step": 51449 }, { "epoch": 0.9404646571736707, "grad_norm": 7.696282205181621, "learning_rate": 9.269854632798947e-08, "loss": 18.1342, "step": 51450 }, { "epoch": 0.9404829363701172, "grad_norm": 5.594092132041785, "learning_rate": 9.264181970230269e-08, "loss": 16.8677, "step": 51451 }, { "epoch": 0.9405012155665637, "grad_norm": 5.470115929185059, "learning_rate": 9.258511027648131e-08, "loss": 17.1943, "step": 51452 }, { "epoch": 0.9405194947630102, "grad_norm": 6.814674959089525, "learning_rate": 9.2528418050723e-08, "loss": 17.5072, "step": 51453 }, { "epoch": 0.9405377739594567, "grad_norm": 5.0172852713619704, "learning_rate": 9.247174302522643e-08, "loss": 16.9055, "step": 51454 }, { "epoch": 0.9405560531559033, "grad_norm": 6.831160440735979, "learning_rate": 9.241508520019093e-08, "loss": 17.4199, "step": 51455 }, { "epoch": 0.9405743323523498, "grad_norm": 6.9766319219563355, "learning_rate": 9.23584445758141e-08, "loss": 17.6492, "step": 51456 }, { "epoch": 0.9405926115487964, "grad_norm": 5.074765536852274, "learning_rate": 9.230182115229524e-08, "loss": 17.2204, "step": 51457 }, { "epoch": 0.9406108907452428, "grad_norm": 7.15279387109702, "learning_rate": 9.224521492983308e-08, "loss": 17.8529, "step": 51458 }, { "epoch": 0.9406291699416893, "grad_norm": 5.729117112483471, "learning_rate": 9.218862590862521e-08, "loss": 16.9951, "step": 51459 }, { "epoch": 0.9406474491381359, "grad_norm": 7.617857279476741, "learning_rate": 9.213205408887094e-08, "loss": 17.7117, "step": 51460 }, { "epoch": 0.9406657283345824, "grad_norm": 5.5392479320648285, "learning_rate": 9.207549947076733e-08, "loss": 17.2405, "step": 51461 }, { "epoch": 0.940684007531029, "grad_norm": 5.666921489987005, "learning_rate": 9.20189620545131e-08, "loss": 17.2453, "step": 51462 }, { "epoch": 0.9407022867274755, "grad_norm": 8.667468375168825, "learning_rate": 9.1962441840307e-08, "loss": 17.6085, "step": 51463 }, { "epoch": 0.940720565923922, "grad_norm": 7.044529415831492, "learning_rate": 9.190593882834664e-08, "loss": 17.6311, "step": 51464 }, { "epoch": 0.9407388451203685, "grad_norm": 6.327795902364221, "learning_rate": 9.184945301882964e-08, "loss": 17.2162, "step": 51465 }, { "epoch": 0.940757124316815, "grad_norm": 8.694937000253622, "learning_rate": 9.179298441195417e-08, "loss": 17.8809, "step": 51466 }, { "epoch": 0.9407754035132616, "grad_norm": 6.18891699798892, "learning_rate": 9.173653300791896e-08, "loss": 17.289, "step": 51467 }, { "epoch": 0.9407936827097081, "grad_norm": 6.359876566082061, "learning_rate": 9.168009880692052e-08, "loss": 17.1379, "step": 51468 }, { "epoch": 0.9408119619061546, "grad_norm": 7.858013316069332, "learning_rate": 9.162368180915815e-08, "loss": 18.1063, "step": 51469 }, { "epoch": 0.9408302411026012, "grad_norm": 6.024448941879634, "learning_rate": 9.156728201482889e-08, "loss": 17.3138, "step": 51470 }, { "epoch": 0.9408485202990476, "grad_norm": 6.095360025266761, "learning_rate": 9.151089942412983e-08, "loss": 17.2372, "step": 51471 }, { "epoch": 0.9408667994954942, "grad_norm": 7.35657213031632, "learning_rate": 9.145453403725968e-08, "loss": 17.4956, "step": 51472 }, { "epoch": 0.9408850786919407, "grad_norm": 5.807967472640598, "learning_rate": 9.139818585441495e-08, "loss": 16.9434, "step": 51473 }, { "epoch": 0.9409033578883872, "grad_norm": 5.30857767890527, "learning_rate": 9.134185487579439e-08, "loss": 16.9962, "step": 51474 }, { "epoch": 0.9409216370848338, "grad_norm": 5.5118500273714774, "learning_rate": 9.128554110159393e-08, "loss": 16.8611, "step": 51475 }, { "epoch": 0.9409399162812803, "grad_norm": 5.294847955237823, "learning_rate": 9.122924453201176e-08, "loss": 16.9854, "step": 51476 }, { "epoch": 0.9409581954777267, "grad_norm": 6.287732259728379, "learning_rate": 9.11729651672455e-08, "loss": 17.5344, "step": 51477 }, { "epoch": 0.9409764746741733, "grad_norm": 5.798288933502944, "learning_rate": 9.111670300749165e-08, "loss": 17.1413, "step": 51478 }, { "epoch": 0.9409947538706198, "grad_norm": 5.178162056476815, "learning_rate": 9.106045805294783e-08, "loss": 17.139, "step": 51479 }, { "epoch": 0.9410130330670664, "grad_norm": 6.330495082976329, "learning_rate": 9.100423030381167e-08, "loss": 17.2749, "step": 51480 }, { "epoch": 0.9410313122635129, "grad_norm": 5.01212995489179, "learning_rate": 9.094801976027912e-08, "loss": 16.8535, "step": 51481 }, { "epoch": 0.9410495914599594, "grad_norm": 5.152957120537688, "learning_rate": 9.08918264225478e-08, "loss": 16.9278, "step": 51482 }, { "epoch": 0.941067870656406, "grad_norm": 5.036411358470404, "learning_rate": 9.083565029081475e-08, "loss": 16.9642, "step": 51483 }, { "epoch": 0.9410861498528524, "grad_norm": 7.708536406587726, "learning_rate": 9.077949136527708e-08, "loss": 17.6678, "step": 51484 }, { "epoch": 0.941104429049299, "grad_norm": 6.139570091483334, "learning_rate": 9.072334964613071e-08, "loss": 17.4075, "step": 51485 }, { "epoch": 0.9411227082457455, "grad_norm": 6.546342829394263, "learning_rate": 9.06672251335733e-08, "loss": 17.1053, "step": 51486 }, { "epoch": 0.941140987442192, "grad_norm": 5.254109927694257, "learning_rate": 9.061111782780074e-08, "loss": 16.9704, "step": 51487 }, { "epoch": 0.9411592666386386, "grad_norm": 6.414906383606306, "learning_rate": 9.055502772901125e-08, "loss": 17.5982, "step": 51488 }, { "epoch": 0.9411775458350851, "grad_norm": 6.279728227934167, "learning_rate": 9.049895483739968e-08, "loss": 17.2729, "step": 51489 }, { "epoch": 0.9411958250315317, "grad_norm": 4.929220448457399, "learning_rate": 9.044289915316307e-08, "loss": 16.783, "step": 51490 }, { "epoch": 0.9412141042279781, "grad_norm": 7.809910561872946, "learning_rate": 9.038686067649849e-08, "loss": 17.9109, "step": 51491 }, { "epoch": 0.9412323834244246, "grad_norm": 7.662641673249446, "learning_rate": 9.03308394076019e-08, "loss": 17.6718, "step": 51492 }, { "epoch": 0.9412506626208712, "grad_norm": 6.3390880138777135, "learning_rate": 9.027483534666925e-08, "loss": 17.138, "step": 51493 }, { "epoch": 0.9412689418173177, "grad_norm": 7.214161509391751, "learning_rate": 9.021884849389762e-08, "loss": 17.4274, "step": 51494 }, { "epoch": 0.9412872210137643, "grad_norm": 6.94978212986575, "learning_rate": 9.016287884948294e-08, "loss": 17.9013, "step": 51495 }, { "epoch": 0.9413055002102108, "grad_norm": 7.273925226780147, "learning_rate": 9.010692641362118e-08, "loss": 17.7766, "step": 51496 }, { "epoch": 0.9413237794066572, "grad_norm": 6.681754280643882, "learning_rate": 9.005099118650884e-08, "loss": 17.6853, "step": 51497 }, { "epoch": 0.9413420586031038, "grad_norm": 5.777839455034788, "learning_rate": 8.999507316834133e-08, "loss": 17.202, "step": 51498 }, { "epoch": 0.9413603377995503, "grad_norm": 5.804604172261827, "learning_rate": 8.993917235931515e-08, "loss": 17.5079, "step": 51499 }, { "epoch": 0.9413786169959969, "grad_norm": 5.356399920898353, "learning_rate": 8.988328875962626e-08, "loss": 17.1571, "step": 51500 }, { "epoch": 0.9413968961924434, "grad_norm": 5.429795525169104, "learning_rate": 8.982742236947006e-08, "loss": 16.8928, "step": 51501 }, { "epoch": 0.9414151753888899, "grad_norm": 4.797533860192631, "learning_rate": 8.97715731890425e-08, "loss": 16.8571, "step": 51502 }, { "epoch": 0.9414334545853364, "grad_norm": 6.392486041197898, "learning_rate": 8.971574121854009e-08, "loss": 17.3396, "step": 51503 }, { "epoch": 0.9414517337817829, "grad_norm": 6.96270999728004, "learning_rate": 8.965992645815714e-08, "loss": 17.5759, "step": 51504 }, { "epoch": 0.9414700129782295, "grad_norm": 5.997532938203561, "learning_rate": 8.960412890809067e-08, "loss": 17.2463, "step": 51505 }, { "epoch": 0.941488292174676, "grad_norm": 5.859535150637482, "learning_rate": 8.954834856853556e-08, "loss": 17.3223, "step": 51506 }, { "epoch": 0.9415065713711225, "grad_norm": 4.321119684052892, "learning_rate": 8.94925854396872e-08, "loss": 16.8133, "step": 51507 }, { "epoch": 0.9415248505675691, "grad_norm": 7.024539932437912, "learning_rate": 8.943683952174153e-08, "loss": 17.517, "step": 51508 }, { "epoch": 0.9415431297640156, "grad_norm": 5.298508367629693, "learning_rate": 8.938111081489342e-08, "loss": 16.9007, "step": 51509 }, { "epoch": 0.9415614089604621, "grad_norm": 5.998030014994929, "learning_rate": 8.932539931933826e-08, "loss": 17.2795, "step": 51510 }, { "epoch": 0.9415796881569086, "grad_norm": 6.04750427555759, "learning_rate": 8.926970503527144e-08, "loss": 17.4775, "step": 51511 }, { "epoch": 0.9415979673533551, "grad_norm": 7.651037784652829, "learning_rate": 8.921402796288837e-08, "loss": 18.2104, "step": 51512 }, { "epoch": 0.9416162465498017, "grad_norm": 7.289903011849976, "learning_rate": 8.915836810238387e-08, "loss": 17.6434, "step": 51513 }, { "epoch": 0.9416345257462482, "grad_norm": 6.616891797260377, "learning_rate": 8.910272545395338e-08, "loss": 17.7149, "step": 51514 }, { "epoch": 0.9416528049426948, "grad_norm": 5.053020173339382, "learning_rate": 8.904710001779115e-08, "loss": 16.8342, "step": 51515 }, { "epoch": 0.9416710841391412, "grad_norm": 6.408553106885664, "learning_rate": 8.899149179409316e-08, "loss": 17.1233, "step": 51516 }, { "epoch": 0.9416893633355877, "grad_norm": 7.172699204039617, "learning_rate": 8.89359007830537e-08, "loss": 17.857, "step": 51517 }, { "epoch": 0.9417076425320343, "grad_norm": 6.569483022001752, "learning_rate": 8.888032698486759e-08, "loss": 17.4019, "step": 51518 }, { "epoch": 0.9417259217284808, "grad_norm": 5.356798410435455, "learning_rate": 8.882477039973026e-08, "loss": 17.0826, "step": 51519 }, { "epoch": 0.9417442009249274, "grad_norm": 6.197759767803392, "learning_rate": 8.876923102783542e-08, "loss": 17.4719, "step": 51520 }, { "epoch": 0.9417624801213739, "grad_norm": 6.691256490103582, "learning_rate": 8.871370886937791e-08, "loss": 17.3727, "step": 51521 }, { "epoch": 0.9417807593178203, "grad_norm": 5.9840221549522425, "learning_rate": 8.865820392455371e-08, "loss": 17.2814, "step": 51522 }, { "epoch": 0.9417990385142669, "grad_norm": 7.259001274599257, "learning_rate": 8.860271619355598e-08, "loss": 17.8116, "step": 51523 }, { "epoch": 0.9418173177107134, "grad_norm": 4.45697328389639, "learning_rate": 8.854724567657902e-08, "loss": 16.6988, "step": 51524 }, { "epoch": 0.94183559690716, "grad_norm": 7.012248048672465, "learning_rate": 8.849179237381821e-08, "loss": 17.6569, "step": 51525 }, { "epoch": 0.9418538761036065, "grad_norm": 6.202285847728114, "learning_rate": 8.843635628546731e-08, "loss": 17.4686, "step": 51526 }, { "epoch": 0.941872155300053, "grad_norm": 5.2815567046274, "learning_rate": 8.83809374117206e-08, "loss": 17.0528, "step": 51527 }, { "epoch": 0.9418904344964996, "grad_norm": 5.403512109916122, "learning_rate": 8.832553575277291e-08, "loss": 17.0257, "step": 51528 }, { "epoch": 0.941908713692946, "grad_norm": 5.517332838099641, "learning_rate": 8.8270151308818e-08, "loss": 17.0741, "step": 51529 }, { "epoch": 0.9419269928893926, "grad_norm": 6.252115804437298, "learning_rate": 8.821478408004957e-08, "loss": 17.6809, "step": 51530 }, { "epoch": 0.9419452720858391, "grad_norm": 5.534958082017416, "learning_rate": 8.81594340666625e-08, "loss": 17.2067, "step": 51531 }, { "epoch": 0.9419635512822856, "grad_norm": 6.114515656515971, "learning_rate": 8.810410126884994e-08, "loss": 17.1823, "step": 51532 }, { "epoch": 0.9419818304787322, "grad_norm": 6.3397395547476725, "learning_rate": 8.804878568680675e-08, "loss": 17.6561, "step": 51533 }, { "epoch": 0.9420001096751787, "grad_norm": 6.901552298452594, "learning_rate": 8.79934873207261e-08, "loss": 17.6782, "step": 51534 }, { "epoch": 0.9420183888716253, "grad_norm": 8.505315661382284, "learning_rate": 8.793820617080173e-08, "loss": 17.5996, "step": 51535 }, { "epoch": 0.9420366680680717, "grad_norm": 6.277668175299324, "learning_rate": 8.788294223722849e-08, "loss": 17.2129, "step": 51536 }, { "epoch": 0.9420549472645182, "grad_norm": 4.7784445547787096, "learning_rate": 8.782769552019899e-08, "loss": 17.1028, "step": 51537 }, { "epoch": 0.9420732264609648, "grad_norm": 7.691229715708325, "learning_rate": 8.777246601990641e-08, "loss": 17.6532, "step": 51538 }, { "epoch": 0.9420915056574113, "grad_norm": 6.519822250373158, "learning_rate": 8.771725373654561e-08, "loss": 17.4408, "step": 51539 }, { "epoch": 0.9421097848538579, "grad_norm": 7.238989247947421, "learning_rate": 8.766205867030975e-08, "loss": 17.2491, "step": 51540 }, { "epoch": 0.9421280640503044, "grad_norm": 4.755080197334424, "learning_rate": 8.760688082139146e-08, "loss": 16.6448, "step": 51541 }, { "epoch": 0.9421463432467508, "grad_norm": 6.376328492015136, "learning_rate": 8.755172018998559e-08, "loss": 17.7401, "step": 51542 }, { "epoch": 0.9421646224431974, "grad_norm": 5.647858351006967, "learning_rate": 8.749657677628476e-08, "loss": 17.1664, "step": 51543 }, { "epoch": 0.9421829016396439, "grad_norm": 6.011570993985115, "learning_rate": 8.744145058048159e-08, "loss": 17.5826, "step": 51544 }, { "epoch": 0.9422011808360904, "grad_norm": 5.832997419761043, "learning_rate": 8.738634160276981e-08, "loss": 17.0918, "step": 51545 }, { "epoch": 0.942219460032537, "grad_norm": 6.880139444835994, "learning_rate": 8.733124984334263e-08, "loss": 17.7425, "step": 51546 }, { "epoch": 0.9422377392289835, "grad_norm": 8.136765823860177, "learning_rate": 8.727617530239319e-08, "loss": 17.4361, "step": 51547 }, { "epoch": 0.94225601842543, "grad_norm": 6.77920060594086, "learning_rate": 8.722111798011467e-08, "loss": 17.3199, "step": 51548 }, { "epoch": 0.9422742976218765, "grad_norm": 7.412942644682872, "learning_rate": 8.716607787669917e-08, "loss": 17.5628, "step": 51549 }, { "epoch": 0.942292576818323, "grad_norm": 6.111422253853529, "learning_rate": 8.711105499234096e-08, "loss": 17.0753, "step": 51550 }, { "epoch": 0.9423108560147696, "grad_norm": 6.612354748722002, "learning_rate": 8.705604932723211e-08, "loss": 17.3262, "step": 51551 }, { "epoch": 0.9423291352112161, "grad_norm": 5.058616751660343, "learning_rate": 8.700106088156524e-08, "loss": 17.1069, "step": 51552 }, { "epoch": 0.9423474144076627, "grad_norm": 4.82139721723889, "learning_rate": 8.694608965553352e-08, "loss": 16.8248, "step": 51553 }, { "epoch": 0.9423656936041092, "grad_norm": 6.383241978877296, "learning_rate": 8.68911356493296e-08, "loss": 17.1987, "step": 51554 }, { "epoch": 0.9423839728005556, "grad_norm": 6.838993374360924, "learning_rate": 8.683619886314554e-08, "loss": 17.5219, "step": 51555 }, { "epoch": 0.9424022519970022, "grad_norm": 7.78976759982897, "learning_rate": 8.678127929717395e-08, "loss": 18.0682, "step": 51556 }, { "epoch": 0.9424205311934487, "grad_norm": 6.043654669811929, "learning_rate": 8.672637695160801e-08, "loss": 17.7982, "step": 51557 }, { "epoch": 0.9424388103898953, "grad_norm": 5.067290984326055, "learning_rate": 8.667149182663981e-08, "loss": 16.965, "step": 51558 }, { "epoch": 0.9424570895863418, "grad_norm": 8.205643951545484, "learning_rate": 8.66166239224614e-08, "loss": 17.9287, "step": 51559 }, { "epoch": 0.9424753687827883, "grad_norm": 6.118335668820016, "learning_rate": 8.656177323926595e-08, "loss": 17.3862, "step": 51560 }, { "epoch": 0.9424936479792348, "grad_norm": 6.582530638078552, "learning_rate": 8.650693977724445e-08, "loss": 17.4323, "step": 51561 }, { "epoch": 0.9425119271756813, "grad_norm": 6.07227505944113, "learning_rate": 8.64521235365906e-08, "loss": 17.8185, "step": 51562 }, { "epoch": 0.9425302063721279, "grad_norm": 6.847666907694562, "learning_rate": 8.639732451749483e-08, "loss": 17.5734, "step": 51563 }, { "epoch": 0.9425484855685744, "grad_norm": 5.383944422153487, "learning_rate": 8.63425427201503e-08, "loss": 16.8634, "step": 51564 }, { "epoch": 0.9425667647650209, "grad_norm": 5.65474954824551, "learning_rate": 8.628777814474853e-08, "loss": 17.3366, "step": 51565 }, { "epoch": 0.9425850439614675, "grad_norm": 5.4513944739939255, "learning_rate": 8.623303079148215e-08, "loss": 17.0275, "step": 51566 }, { "epoch": 0.942603323157914, "grad_norm": 6.918750371847166, "learning_rate": 8.617830066054266e-08, "loss": 17.1064, "step": 51567 }, { "epoch": 0.9426216023543605, "grad_norm": 5.987413266915298, "learning_rate": 8.612358775212159e-08, "loss": 17.4636, "step": 51568 }, { "epoch": 0.942639881550807, "grad_norm": 7.143994262038408, "learning_rate": 8.6068892066411e-08, "loss": 17.0878, "step": 51569 }, { "epoch": 0.9426581607472535, "grad_norm": 6.814034842018756, "learning_rate": 8.60142136036024e-08, "loss": 17.6797, "step": 51570 }, { "epoch": 0.9426764399437001, "grad_norm": 7.390172978642819, "learning_rate": 8.595955236388787e-08, "loss": 17.4456, "step": 51571 }, { "epoch": 0.9426947191401466, "grad_norm": 6.5701589818856965, "learning_rate": 8.590490834745835e-08, "loss": 17.2591, "step": 51572 }, { "epoch": 0.9427129983365932, "grad_norm": 8.96762300306977, "learning_rate": 8.585028155450591e-08, "loss": 17.4932, "step": 51573 }, { "epoch": 0.9427312775330396, "grad_norm": 6.4278703332541065, "learning_rate": 8.579567198522154e-08, "loss": 17.5428, "step": 51574 }, { "epoch": 0.9427495567294861, "grad_norm": 6.101657487150806, "learning_rate": 8.574107963979672e-08, "loss": 17.181, "step": 51575 }, { "epoch": 0.9427678359259327, "grad_norm": 6.958065113294812, "learning_rate": 8.568650451842353e-08, "loss": 17.3969, "step": 51576 }, { "epoch": 0.9427861151223792, "grad_norm": 5.3796631183018615, "learning_rate": 8.563194662129238e-08, "loss": 17.0935, "step": 51577 }, { "epoch": 0.9428043943188258, "grad_norm": 6.489045502043537, "learning_rate": 8.557740594859532e-08, "loss": 17.2316, "step": 51578 }, { "epoch": 0.9428226735152723, "grad_norm": 4.953667852639708, "learning_rate": 8.552288250052277e-08, "loss": 16.9733, "step": 51579 }, { "epoch": 0.9428409527117187, "grad_norm": 5.30255294919624, "learning_rate": 8.546837627726567e-08, "loss": 17.0365, "step": 51580 }, { "epoch": 0.9428592319081653, "grad_norm": 6.868318088870614, "learning_rate": 8.541388727901556e-08, "loss": 17.1487, "step": 51581 }, { "epoch": 0.9428775111046118, "grad_norm": 9.162384303267078, "learning_rate": 8.535941550596393e-08, "loss": 18.2311, "step": 51582 }, { "epoch": 0.9428957903010584, "grad_norm": 7.764644160388469, "learning_rate": 8.53049609583001e-08, "loss": 17.6252, "step": 51583 }, { "epoch": 0.9429140694975049, "grad_norm": 4.812039513190675, "learning_rate": 8.525052363621611e-08, "loss": 16.7526, "step": 51584 }, { "epoch": 0.9429323486939514, "grad_norm": 4.921740306429502, "learning_rate": 8.519610353990348e-08, "loss": 16.9718, "step": 51585 }, { "epoch": 0.942950627890398, "grad_norm": 5.823967723564647, "learning_rate": 8.514170066955096e-08, "loss": 17.2616, "step": 51586 }, { "epoch": 0.9429689070868444, "grad_norm": 5.970791870886297, "learning_rate": 8.508731502535061e-08, "loss": 17.5161, "step": 51587 }, { "epoch": 0.942987186283291, "grad_norm": 6.640439254828855, "learning_rate": 8.50329466074934e-08, "loss": 17.4851, "step": 51588 }, { "epoch": 0.9430054654797375, "grad_norm": 6.381886949411129, "learning_rate": 8.497859541616804e-08, "loss": 17.1262, "step": 51589 }, { "epoch": 0.943023744676184, "grad_norm": 5.629695659699817, "learning_rate": 8.492426145156718e-08, "loss": 17.0433, "step": 51590 }, { "epoch": 0.9430420238726306, "grad_norm": 7.117842047452492, "learning_rate": 8.486994471388011e-08, "loss": 17.4057, "step": 51591 }, { "epoch": 0.9430603030690771, "grad_norm": 8.855210601019609, "learning_rate": 8.481564520329666e-08, "loss": 17.3015, "step": 51592 }, { "epoch": 0.9430785822655237, "grad_norm": 6.92778838894462, "learning_rate": 8.476136292000836e-08, "loss": 17.8171, "step": 51593 }, { "epoch": 0.9430968614619701, "grad_norm": 6.350181910631309, "learning_rate": 8.47070978642045e-08, "loss": 17.4767, "step": 51594 }, { "epoch": 0.9431151406584166, "grad_norm": 6.776323779013354, "learning_rate": 8.46528500360766e-08, "loss": 17.6386, "step": 51595 }, { "epoch": 0.9431334198548632, "grad_norm": 5.93678692704948, "learning_rate": 8.459861943581338e-08, "loss": 17.4583, "step": 51596 }, { "epoch": 0.9431516990513097, "grad_norm": 5.370924707043541, "learning_rate": 8.454440606360526e-08, "loss": 17.0535, "step": 51597 }, { "epoch": 0.9431699782477563, "grad_norm": 5.058621978073283, "learning_rate": 8.449020991964318e-08, "loss": 16.7428, "step": 51598 }, { "epoch": 0.9431882574442028, "grad_norm": 6.085950440488684, "learning_rate": 8.44360310041159e-08, "loss": 17.3625, "step": 51599 }, { "epoch": 0.9432065366406492, "grad_norm": 3.8513213622366655, "learning_rate": 8.438186931721382e-08, "loss": 16.4678, "step": 51600 }, { "epoch": 0.9432248158370958, "grad_norm": 7.853436848548226, "learning_rate": 8.432772485912677e-08, "loss": 17.9389, "step": 51601 }, { "epoch": 0.9432430950335423, "grad_norm": 5.602109791890613, "learning_rate": 8.427359763004406e-08, "loss": 17.1867, "step": 51602 }, { "epoch": 0.9432613742299889, "grad_norm": 5.624284413351244, "learning_rate": 8.421948763015608e-08, "loss": 17.3133, "step": 51603 }, { "epoch": 0.9432796534264354, "grad_norm": 7.366195259298757, "learning_rate": 8.416539485965269e-08, "loss": 17.7149, "step": 51604 }, { "epoch": 0.9432979326228819, "grad_norm": 5.996012378377292, "learning_rate": 8.411131931872263e-08, "loss": 17.0429, "step": 51605 }, { "epoch": 0.9433162118193285, "grad_norm": 7.688728122157485, "learning_rate": 8.405726100755574e-08, "loss": 17.2655, "step": 51606 }, { "epoch": 0.9433344910157749, "grad_norm": 6.257404245723691, "learning_rate": 8.400321992634186e-08, "loss": 17.5382, "step": 51607 }, { "epoch": 0.9433527702122215, "grad_norm": 4.635132362306517, "learning_rate": 8.394919607526975e-08, "loss": 16.8094, "step": 51608 }, { "epoch": 0.943371049408668, "grad_norm": 7.193698370612243, "learning_rate": 8.389518945452923e-08, "loss": 17.8415, "step": 51609 }, { "epoch": 0.9433893286051145, "grad_norm": 5.982271369879472, "learning_rate": 8.384120006430963e-08, "loss": 17.2766, "step": 51610 }, { "epoch": 0.9434076078015611, "grad_norm": 5.475860482410516, "learning_rate": 8.378722790480021e-08, "loss": 17.1628, "step": 51611 }, { "epoch": 0.9434258869980076, "grad_norm": 7.554278525873138, "learning_rate": 8.373327297618916e-08, "loss": 17.6942, "step": 51612 }, { "epoch": 0.943444166194454, "grad_norm": 6.331644218703177, "learning_rate": 8.367933527866689e-08, "loss": 17.3546, "step": 51613 }, { "epoch": 0.9434624453909006, "grad_norm": 5.099477434359344, "learning_rate": 8.362541481242215e-08, "loss": 16.7942, "step": 51614 }, { "epoch": 0.9434807245873471, "grad_norm": 7.9627222697848525, "learning_rate": 8.357151157764365e-08, "loss": 17.7551, "step": 51615 }, { "epoch": 0.9434990037837937, "grad_norm": 6.258852688044616, "learning_rate": 8.351762557452014e-08, "loss": 17.4454, "step": 51616 }, { "epoch": 0.9435172829802402, "grad_norm": 5.192935853055531, "learning_rate": 8.346375680324036e-08, "loss": 17.0775, "step": 51617 }, { "epoch": 0.9435355621766867, "grad_norm": 5.464991130964691, "learning_rate": 8.340990526399418e-08, "loss": 17.036, "step": 51618 }, { "epoch": 0.9435538413731333, "grad_norm": 5.351990658176166, "learning_rate": 8.335607095696918e-08, "loss": 16.8193, "step": 51619 }, { "epoch": 0.9435721205695797, "grad_norm": 6.01285736980238, "learning_rate": 8.330225388235469e-08, "loss": 17.2404, "step": 51620 }, { "epoch": 0.9435903997660263, "grad_norm": 7.185500626498764, "learning_rate": 8.324845404033888e-08, "loss": 17.8067, "step": 51621 }, { "epoch": 0.9436086789624728, "grad_norm": 8.048173402374607, "learning_rate": 8.319467143111105e-08, "loss": 18.1124, "step": 51622 }, { "epoch": 0.9436269581589193, "grad_norm": 5.763680238108102, "learning_rate": 8.314090605485881e-08, "loss": 17.2887, "step": 51623 }, { "epoch": 0.9436452373553659, "grad_norm": 5.24856384020177, "learning_rate": 8.308715791177146e-08, "loss": 16.8768, "step": 51624 }, { "epoch": 0.9436635165518124, "grad_norm": 6.596669852920535, "learning_rate": 8.303342700203665e-08, "loss": 17.8954, "step": 51625 }, { "epoch": 0.943681795748259, "grad_norm": 5.171902350498216, "learning_rate": 8.297971332584309e-08, "loss": 16.913, "step": 51626 }, { "epoch": 0.9437000749447054, "grad_norm": 6.292072114865486, "learning_rate": 8.292601688337897e-08, "loss": 17.4791, "step": 51627 }, { "epoch": 0.9437183541411519, "grad_norm": 7.307959419365124, "learning_rate": 8.287233767483249e-08, "loss": 17.5672, "step": 51628 }, { "epoch": 0.9437366333375985, "grad_norm": 7.845292478970721, "learning_rate": 8.281867570039126e-08, "loss": 17.435, "step": 51629 }, { "epoch": 0.943754912534045, "grad_norm": 5.451886201468214, "learning_rate": 8.276503096024457e-08, "loss": 17.2634, "step": 51630 }, { "epoch": 0.9437731917304916, "grad_norm": 6.2745497655511935, "learning_rate": 8.271140345457951e-08, "loss": 17.179, "step": 51631 }, { "epoch": 0.943791470926938, "grad_norm": 6.117641206499175, "learning_rate": 8.26577931835848e-08, "loss": 17.3341, "step": 51632 }, { "epoch": 0.9438097501233845, "grad_norm": 6.946429408210624, "learning_rate": 8.260420014744697e-08, "loss": 17.6686, "step": 51633 }, { "epoch": 0.9438280293198311, "grad_norm": 6.7117555000456965, "learning_rate": 8.25506243463553e-08, "loss": 17.5772, "step": 51634 }, { "epoch": 0.9438463085162776, "grad_norm": 5.321081231675456, "learning_rate": 8.249706578049688e-08, "loss": 16.8864, "step": 51635 }, { "epoch": 0.9438645877127242, "grad_norm": 6.495856861827757, "learning_rate": 8.244352445005932e-08, "loss": 17.3933, "step": 51636 }, { "epoch": 0.9438828669091707, "grad_norm": 6.811808404410143, "learning_rate": 8.239000035523026e-08, "loss": 17.461, "step": 51637 }, { "epoch": 0.9439011461056172, "grad_norm": 7.264209658216882, "learning_rate": 8.233649349619787e-08, "loss": 17.2691, "step": 51638 }, { "epoch": 0.9439194253020637, "grad_norm": 6.996158100145291, "learning_rate": 8.228300387314869e-08, "loss": 17.715, "step": 51639 }, { "epoch": 0.9439377044985102, "grad_norm": 6.460297392522973, "learning_rate": 8.2229531486272e-08, "loss": 17.6423, "step": 51640 }, { "epoch": 0.9439559836949568, "grad_norm": 6.300815676216546, "learning_rate": 8.21760763357532e-08, "loss": 17.4313, "step": 51641 }, { "epoch": 0.9439742628914033, "grad_norm": 6.60500833164238, "learning_rate": 8.212263842178103e-08, "loss": 17.3925, "step": 51642 }, { "epoch": 0.9439925420878498, "grad_norm": 5.711868910309475, "learning_rate": 8.206921774454201e-08, "loss": 17.2292, "step": 51643 }, { "epoch": 0.9440108212842964, "grad_norm": 5.085682879075384, "learning_rate": 8.201581430422378e-08, "loss": 16.95, "step": 51644 }, { "epoch": 0.9440291004807428, "grad_norm": 6.6513599251241695, "learning_rate": 8.196242810101284e-08, "loss": 17.364, "step": 51645 }, { "epoch": 0.9440473796771894, "grad_norm": 7.301768743366375, "learning_rate": 8.190905913509739e-08, "loss": 17.6151, "step": 51646 }, { "epoch": 0.9440656588736359, "grad_norm": 4.963311858260816, "learning_rate": 8.185570740666337e-08, "loss": 16.8835, "step": 51647 }, { "epoch": 0.9440839380700824, "grad_norm": 5.172612939455244, "learning_rate": 8.180237291589843e-08, "loss": 16.8909, "step": 51648 }, { "epoch": 0.944102217266529, "grad_norm": 5.338882081762889, "learning_rate": 8.174905566298963e-08, "loss": 17.099, "step": 51649 }, { "epoch": 0.9441204964629755, "grad_norm": 5.80632390654596, "learning_rate": 8.169575564812405e-08, "loss": 16.9094, "step": 51650 }, { "epoch": 0.9441387756594221, "grad_norm": 6.6118252825075015, "learning_rate": 8.164247287148764e-08, "loss": 17.232, "step": 51651 }, { "epoch": 0.9441570548558685, "grad_norm": 7.408088648506276, "learning_rate": 8.158920733326747e-08, "loss": 17.6841, "step": 51652 }, { "epoch": 0.944175334052315, "grad_norm": 7.014059830213654, "learning_rate": 8.153595903365008e-08, "loss": 17.438, "step": 51653 }, { "epoch": 0.9441936132487616, "grad_norm": 6.598777621045101, "learning_rate": 8.148272797282309e-08, "loss": 17.2032, "step": 51654 }, { "epoch": 0.9442118924452081, "grad_norm": 5.999502921317128, "learning_rate": 8.142951415097189e-08, "loss": 16.9487, "step": 51655 }, { "epoch": 0.9442301716416547, "grad_norm": 5.127889102436149, "learning_rate": 8.137631756828302e-08, "loss": 16.9299, "step": 51656 }, { "epoch": 0.9442484508381012, "grad_norm": 6.255354307388972, "learning_rate": 8.132313822494353e-08, "loss": 17.3604, "step": 51657 }, { "epoch": 0.9442667300345476, "grad_norm": 5.764578065678862, "learning_rate": 8.126997612113996e-08, "loss": 16.9663, "step": 51658 }, { "epoch": 0.9442850092309942, "grad_norm": 5.562852816836801, "learning_rate": 8.121683125705826e-08, "loss": 17.1627, "step": 51659 }, { "epoch": 0.9443032884274407, "grad_norm": 5.942430458737657, "learning_rate": 8.116370363288439e-08, "loss": 17.1912, "step": 51660 }, { "epoch": 0.9443215676238873, "grad_norm": 5.17268216458474, "learning_rate": 8.111059324880543e-08, "loss": 17.0677, "step": 51661 }, { "epoch": 0.9443398468203338, "grad_norm": 5.575868630574557, "learning_rate": 8.105750010500679e-08, "loss": 17.1517, "step": 51662 }, { "epoch": 0.9443581260167803, "grad_norm": 5.866992902416442, "learning_rate": 8.100442420167498e-08, "loss": 17.0681, "step": 51663 }, { "epoch": 0.9443764052132269, "grad_norm": 5.716459441587155, "learning_rate": 8.095136553899541e-08, "loss": 17.1675, "step": 51664 }, { "epoch": 0.9443946844096733, "grad_norm": 6.296340324170258, "learning_rate": 8.08983241171546e-08, "loss": 17.1683, "step": 51665 }, { "epoch": 0.9444129636061199, "grad_norm": 5.347034024696216, "learning_rate": 8.084529993633794e-08, "loss": 17.0596, "step": 51666 }, { "epoch": 0.9444312428025664, "grad_norm": 5.329702642587549, "learning_rate": 8.079229299673197e-08, "loss": 17.1585, "step": 51667 }, { "epoch": 0.9444495219990129, "grad_norm": 6.0745791098209025, "learning_rate": 8.073930329852209e-08, "loss": 17.0999, "step": 51668 }, { "epoch": 0.9444678011954595, "grad_norm": 6.527713967122638, "learning_rate": 8.068633084189425e-08, "loss": 17.4823, "step": 51669 }, { "epoch": 0.944486080391906, "grad_norm": 8.45910040043231, "learning_rate": 8.063337562703333e-08, "loss": 17.7065, "step": 51670 }, { "epoch": 0.9445043595883525, "grad_norm": 6.153634914064023, "learning_rate": 8.058043765412638e-08, "loss": 17.2982, "step": 51671 }, { "epoch": 0.944522638784799, "grad_norm": 6.344659683452921, "learning_rate": 8.052751692335825e-08, "loss": 17.3642, "step": 51672 }, { "epoch": 0.9445409179812455, "grad_norm": 7.373583684058025, "learning_rate": 8.047461343491325e-08, "loss": 17.1739, "step": 51673 }, { "epoch": 0.9445591971776921, "grad_norm": 7.612176876916209, "learning_rate": 8.042172718897844e-08, "loss": 17.7202, "step": 51674 }, { "epoch": 0.9445774763741386, "grad_norm": 7.4967852650103355, "learning_rate": 8.036885818573869e-08, "loss": 17.534, "step": 51675 }, { "epoch": 0.9445957555705852, "grad_norm": 5.081301528893946, "learning_rate": 8.031600642537885e-08, "loss": 16.9247, "step": 51676 }, { "epoch": 0.9446140347670317, "grad_norm": 7.228266346073058, "learning_rate": 8.02631719080843e-08, "loss": 17.6574, "step": 51677 }, { "epoch": 0.9446323139634781, "grad_norm": 7.322799431695863, "learning_rate": 8.021035463404104e-08, "loss": 17.9598, "step": 51678 }, { "epoch": 0.9446505931599247, "grad_norm": 5.536213834463462, "learning_rate": 8.015755460343333e-08, "loss": 16.8471, "step": 51679 }, { "epoch": 0.9446688723563712, "grad_norm": 7.475927721913641, "learning_rate": 8.01047718164466e-08, "loss": 17.5789, "step": 51680 }, { "epoch": 0.9446871515528177, "grad_norm": 5.616459025346554, "learning_rate": 8.005200627326571e-08, "loss": 17.0613, "step": 51681 }, { "epoch": 0.9447054307492643, "grad_norm": 6.386223125842846, "learning_rate": 7.999925797407493e-08, "loss": 17.3893, "step": 51682 }, { "epoch": 0.9447237099457108, "grad_norm": 6.5236581888753875, "learning_rate": 7.994652691906024e-08, "loss": 17.2488, "step": 51683 }, { "epoch": 0.9447419891421573, "grad_norm": 5.780567431897127, "learning_rate": 7.989381310840649e-08, "loss": 17.2046, "step": 51684 }, { "epoch": 0.9447602683386038, "grad_norm": 6.55346959302936, "learning_rate": 7.984111654229743e-08, "loss": 17.6345, "step": 51685 }, { "epoch": 0.9447785475350503, "grad_norm": 6.5547913278414525, "learning_rate": 7.978843722091844e-08, "loss": 17.619, "step": 51686 }, { "epoch": 0.9447968267314969, "grad_norm": 5.3491282786982355, "learning_rate": 7.973577514445386e-08, "loss": 17.1228, "step": 51687 }, { "epoch": 0.9448151059279434, "grad_norm": 6.391165865387386, "learning_rate": 7.968313031308905e-08, "loss": 17.3051, "step": 51688 }, { "epoch": 0.94483338512439, "grad_norm": 6.545264803937263, "learning_rate": 7.963050272700778e-08, "loss": 17.9139, "step": 51689 }, { "epoch": 0.9448516643208364, "grad_norm": 6.6491068460245915, "learning_rate": 7.957789238639379e-08, "loss": 17.3688, "step": 51690 }, { "epoch": 0.9448699435172829, "grad_norm": 5.087135817984754, "learning_rate": 7.952529929143305e-08, "loss": 16.8062, "step": 51691 }, { "epoch": 0.9448882227137295, "grad_norm": 6.868216184771279, "learning_rate": 7.947272344230872e-08, "loss": 17.5146, "step": 51692 }, { "epoch": 0.944906501910176, "grad_norm": 5.250298558485768, "learning_rate": 7.942016483920567e-08, "loss": 16.9717, "step": 51693 }, { "epoch": 0.9449247811066226, "grad_norm": 6.13682042839231, "learning_rate": 7.936762348230819e-08, "loss": 17.2738, "step": 51694 }, { "epoch": 0.9449430603030691, "grad_norm": 5.670288975831137, "learning_rate": 7.931509937180004e-08, "loss": 16.9504, "step": 51695 }, { "epoch": 0.9449613394995156, "grad_norm": 5.66473001212223, "learning_rate": 7.92625925078655e-08, "loss": 17.1019, "step": 51696 }, { "epoch": 0.9449796186959621, "grad_norm": 5.251197094815441, "learning_rate": 7.921010289068887e-08, "loss": 16.8155, "step": 51697 }, { "epoch": 0.9449978978924086, "grad_norm": 7.900981372851571, "learning_rate": 7.915763052045333e-08, "loss": 17.7383, "step": 51698 }, { "epoch": 0.9450161770888552, "grad_norm": 6.832198681186811, "learning_rate": 7.910517539734319e-08, "loss": 17.5455, "step": 51699 }, { "epoch": 0.9450344562853017, "grad_norm": 5.591336717098171, "learning_rate": 7.905273752154275e-08, "loss": 17.0423, "step": 51700 }, { "epoch": 0.9450527354817482, "grad_norm": 6.47123367457417, "learning_rate": 7.900031689323517e-08, "loss": 17.583, "step": 51701 }, { "epoch": 0.9450710146781948, "grad_norm": 5.854468966679324, "learning_rate": 7.894791351260478e-08, "loss": 17.1656, "step": 51702 }, { "epoch": 0.9450892938746412, "grad_norm": 6.190266284343851, "learning_rate": 7.889552737983474e-08, "loss": 17.3415, "step": 51703 }, { "epoch": 0.9451075730710878, "grad_norm": 7.10930937265014, "learning_rate": 7.884315849510881e-08, "loss": 17.5459, "step": 51704 }, { "epoch": 0.9451258522675343, "grad_norm": 5.303296381447302, "learning_rate": 7.879080685861074e-08, "loss": 17.1772, "step": 51705 }, { "epoch": 0.9451441314639808, "grad_norm": 6.364669265988493, "learning_rate": 7.873847247052368e-08, "loss": 17.279, "step": 51706 }, { "epoch": 0.9451624106604274, "grad_norm": 6.1771427837886685, "learning_rate": 7.868615533103141e-08, "loss": 17.165, "step": 51707 }, { "epoch": 0.9451806898568739, "grad_norm": 5.530882795987447, "learning_rate": 7.86338554403171e-08, "loss": 17.0302, "step": 51708 }, { "epoch": 0.9451989690533205, "grad_norm": 4.218285103465284, "learning_rate": 7.858157279856393e-08, "loss": 16.6566, "step": 51709 }, { "epoch": 0.9452172482497669, "grad_norm": 5.521467727143015, "learning_rate": 7.85293074059551e-08, "loss": 16.8717, "step": 51710 }, { "epoch": 0.9452355274462134, "grad_norm": 5.44935748048324, "learning_rate": 7.847705926267435e-08, "loss": 16.8881, "step": 51711 }, { "epoch": 0.94525380664266, "grad_norm": 5.104615927375641, "learning_rate": 7.84248283689043e-08, "loss": 16.7714, "step": 51712 }, { "epoch": 0.9452720858391065, "grad_norm": 5.6126003766655375, "learning_rate": 7.837261472482815e-08, "loss": 17.0632, "step": 51713 }, { "epoch": 0.9452903650355531, "grad_norm": 6.158526778057462, "learning_rate": 7.832041833062909e-08, "loss": 17.1901, "step": 51714 }, { "epoch": 0.9453086442319996, "grad_norm": 6.675507819362112, "learning_rate": 7.826823918648918e-08, "loss": 17.7719, "step": 51715 }, { "epoch": 0.945326923428446, "grad_norm": 6.330661677901995, "learning_rate": 7.821607729259273e-08, "loss": 17.4814, "step": 51716 }, { "epoch": 0.9453452026248926, "grad_norm": 5.948730520364501, "learning_rate": 7.816393264912181e-08, "loss": 17.1848, "step": 51717 }, { "epoch": 0.9453634818213391, "grad_norm": 6.676455349801405, "learning_rate": 7.811180525625906e-08, "loss": 17.5302, "step": 51718 }, { "epoch": 0.9453817610177857, "grad_norm": 7.561516196695681, "learning_rate": 7.80596951141871e-08, "loss": 17.8366, "step": 51719 }, { "epoch": 0.9454000402142322, "grad_norm": 6.378675851250194, "learning_rate": 7.800760222308912e-08, "loss": 17.4837, "step": 51720 }, { "epoch": 0.9454183194106787, "grad_norm": 5.974203784168591, "learning_rate": 7.795552658314665e-08, "loss": 17.4088, "step": 51721 }, { "epoch": 0.9454365986071253, "grad_norm": 6.148318949871895, "learning_rate": 7.790346819454397e-08, "loss": 17.4782, "step": 51722 }, { "epoch": 0.9454548778035717, "grad_norm": 6.268619804316084, "learning_rate": 7.785142705746207e-08, "loss": 17.5377, "step": 51723 }, { "epoch": 0.9454731570000183, "grad_norm": 5.739453847585038, "learning_rate": 7.779940317208357e-08, "loss": 17.0457, "step": 51724 }, { "epoch": 0.9454914361964648, "grad_norm": 6.964776559100936, "learning_rate": 7.774739653859165e-08, "loss": 16.9185, "step": 51725 }, { "epoch": 0.9455097153929113, "grad_norm": 6.407527527359452, "learning_rate": 7.769540715716784e-08, "loss": 17.3623, "step": 51726 }, { "epoch": 0.9455279945893579, "grad_norm": 6.817941319241744, "learning_rate": 7.76434350279942e-08, "loss": 17.6953, "step": 51727 }, { "epoch": 0.9455462737858044, "grad_norm": 5.274606270597897, "learning_rate": 7.75914801512534e-08, "loss": 17.0006, "step": 51728 }, { "epoch": 0.945564552982251, "grad_norm": 7.60150165141514, "learning_rate": 7.753954252712692e-08, "loss": 17.9977, "step": 51729 }, { "epoch": 0.9455828321786974, "grad_norm": 6.41070467539445, "learning_rate": 7.748762215579687e-08, "loss": 17.4744, "step": 51730 }, { "epoch": 0.9456011113751439, "grad_norm": 6.560933489618112, "learning_rate": 7.74357190374464e-08, "loss": 17.5762, "step": 51731 }, { "epoch": 0.9456193905715905, "grad_norm": 6.61282155695658, "learning_rate": 7.738383317225595e-08, "loss": 17.6024, "step": 51732 }, { "epoch": 0.945637669768037, "grad_norm": 5.37259568700234, "learning_rate": 7.73319645604087e-08, "loss": 17.0301, "step": 51733 }, { "epoch": 0.9456559489644836, "grad_norm": 6.1161101474141235, "learning_rate": 7.728011320208561e-08, "loss": 17.3129, "step": 51734 }, { "epoch": 0.94567422816093, "grad_norm": 6.601497064166309, "learning_rate": 7.722827909746821e-08, "loss": 17.5877, "step": 51735 }, { "epoch": 0.9456925073573765, "grad_norm": 5.1292982753508225, "learning_rate": 7.717646224673858e-08, "loss": 16.9383, "step": 51736 }, { "epoch": 0.9457107865538231, "grad_norm": 6.404318986733232, "learning_rate": 7.712466265007823e-08, "loss": 17.2184, "step": 51737 }, { "epoch": 0.9457290657502696, "grad_norm": 8.12792470126521, "learning_rate": 7.707288030766868e-08, "loss": 18.0808, "step": 51738 }, { "epoch": 0.9457473449467162, "grad_norm": 6.890530997057747, "learning_rate": 7.702111521969147e-08, "loss": 17.7291, "step": 51739 }, { "epoch": 0.9457656241431627, "grad_norm": 6.128914353609759, "learning_rate": 7.696936738632866e-08, "loss": 17.4489, "step": 51740 }, { "epoch": 0.9457839033396092, "grad_norm": 6.054718359103269, "learning_rate": 7.69176368077601e-08, "loss": 17.445, "step": 51741 }, { "epoch": 0.9458021825360557, "grad_norm": 5.864572551848787, "learning_rate": 7.686592348416899e-08, "loss": 17.3588, "step": 51742 }, { "epoch": 0.9458204617325022, "grad_norm": 7.067271018459147, "learning_rate": 7.681422741573518e-08, "loss": 17.6936, "step": 51743 }, { "epoch": 0.9458387409289488, "grad_norm": 6.19745818569201, "learning_rate": 7.676254860263965e-08, "loss": 17.1618, "step": 51744 }, { "epoch": 0.9458570201253953, "grad_norm": 7.000933411947705, "learning_rate": 7.671088704506502e-08, "loss": 17.4835, "step": 51745 }, { "epoch": 0.9458752993218418, "grad_norm": 5.924334464429088, "learning_rate": 7.665924274319114e-08, "loss": 17.2102, "step": 51746 }, { "epoch": 0.9458935785182884, "grad_norm": 5.948590847622618, "learning_rate": 7.660761569719899e-08, "loss": 17.4943, "step": 51747 }, { "epoch": 0.9459118577147348, "grad_norm": 5.350436923230958, "learning_rate": 7.655600590727064e-08, "loss": 16.9766, "step": 51748 }, { "epoch": 0.9459301369111813, "grad_norm": 8.649953439478828, "learning_rate": 7.650441337358594e-08, "loss": 18.1057, "step": 51749 }, { "epoch": 0.9459484161076279, "grad_norm": 12.350124509612675, "learning_rate": 7.645283809632642e-08, "loss": 17.6165, "step": 51750 }, { "epoch": 0.9459666953040744, "grad_norm": 6.204987057023572, "learning_rate": 7.640128007567193e-08, "loss": 17.1043, "step": 51751 }, { "epoch": 0.945984974500521, "grad_norm": 5.510109652304054, "learning_rate": 7.634973931180401e-08, "loss": 16.9562, "step": 51752 }, { "epoch": 0.9460032536969675, "grad_norm": 7.26836535661941, "learning_rate": 7.629821580490304e-08, "loss": 17.8121, "step": 51753 }, { "epoch": 0.946021532893414, "grad_norm": 9.190150980064134, "learning_rate": 7.624670955514945e-08, "loss": 18.2178, "step": 51754 }, { "epoch": 0.9460398120898605, "grad_norm": 6.129853389044105, "learning_rate": 7.619522056272366e-08, "loss": 17.479, "step": 51755 }, { "epoch": 0.946058091286307, "grad_norm": 5.312135822718714, "learning_rate": 7.614374882780662e-08, "loss": 17.0868, "step": 51756 }, { "epoch": 0.9460763704827536, "grad_norm": 6.541618140713663, "learning_rate": 7.609229435057819e-08, "loss": 17.1912, "step": 51757 }, { "epoch": 0.9460946496792001, "grad_norm": 5.478511145475388, "learning_rate": 7.604085713121934e-08, "loss": 17.1327, "step": 51758 }, { "epoch": 0.9461129288756466, "grad_norm": 6.978245432255516, "learning_rate": 7.598943716990992e-08, "loss": 17.8861, "step": 51759 }, { "epoch": 0.9461312080720932, "grad_norm": 7.144233549036013, "learning_rate": 7.593803446683034e-08, "loss": 17.5912, "step": 51760 }, { "epoch": 0.9461494872685396, "grad_norm": 6.906214090344001, "learning_rate": 7.588664902215992e-08, "loss": 17.4448, "step": 51761 }, { "epoch": 0.9461677664649862, "grad_norm": 7.426906914080462, "learning_rate": 7.583528083608072e-08, "loss": 18.0091, "step": 51762 }, { "epoch": 0.9461860456614327, "grad_norm": 6.350314733800001, "learning_rate": 7.578392990877037e-08, "loss": 17.3146, "step": 51763 }, { "epoch": 0.9462043248578792, "grad_norm": 5.9767639980796945, "learning_rate": 7.573259624041096e-08, "loss": 17.4132, "step": 51764 }, { "epoch": 0.9462226040543258, "grad_norm": 5.500496721663057, "learning_rate": 7.568127983118068e-08, "loss": 17.1419, "step": 51765 }, { "epoch": 0.9462408832507723, "grad_norm": 6.238432011848726, "learning_rate": 7.562998068126049e-08, "loss": 17.2219, "step": 51766 }, { "epoch": 0.9462591624472189, "grad_norm": 7.069394037391933, "learning_rate": 7.557869879083024e-08, "loss": 17.9632, "step": 51767 }, { "epoch": 0.9462774416436653, "grad_norm": 5.514155523322532, "learning_rate": 7.552743416006924e-08, "loss": 17.0842, "step": 51768 }, { "epoch": 0.9462957208401118, "grad_norm": 4.887685332611694, "learning_rate": 7.547618678915681e-08, "loss": 16.9511, "step": 51769 }, { "epoch": 0.9463140000365584, "grad_norm": 6.062805664906888, "learning_rate": 7.542495667827388e-08, "loss": 17.1311, "step": 51770 }, { "epoch": 0.9463322792330049, "grad_norm": 4.729953564028084, "learning_rate": 7.537374382759865e-08, "loss": 16.7145, "step": 51771 }, { "epoch": 0.9463505584294515, "grad_norm": 5.992401597223363, "learning_rate": 7.5322548237311e-08, "loss": 17.0662, "step": 51772 }, { "epoch": 0.946368837625898, "grad_norm": 6.071776183903585, "learning_rate": 7.527136990759076e-08, "loss": 17.2508, "step": 51773 }, { "epoch": 0.9463871168223444, "grad_norm": 5.151658735684154, "learning_rate": 7.522020883861669e-08, "loss": 16.9752, "step": 51774 }, { "epoch": 0.946405396018791, "grad_norm": 6.0326745688390035, "learning_rate": 7.516906503056808e-08, "loss": 17.3684, "step": 51775 }, { "epoch": 0.9464236752152375, "grad_norm": 5.853271413281417, "learning_rate": 7.511793848362536e-08, "loss": 17.3573, "step": 51776 }, { "epoch": 0.9464419544116841, "grad_norm": 5.198523019218322, "learning_rate": 7.506682919796615e-08, "loss": 17.141, "step": 51777 }, { "epoch": 0.9464602336081306, "grad_norm": 5.622022607268002, "learning_rate": 7.501573717377086e-08, "loss": 17.2702, "step": 51778 }, { "epoch": 0.9464785128045771, "grad_norm": 6.2224264517652825, "learning_rate": 7.496466241121824e-08, "loss": 17.4284, "step": 51779 }, { "epoch": 0.9464967920010237, "grad_norm": 6.121140395454988, "learning_rate": 7.491360491048649e-08, "loss": 16.9803, "step": 51780 }, { "epoch": 0.9465150711974701, "grad_norm": 6.698895782972475, "learning_rate": 7.486256467175546e-08, "loss": 17.8204, "step": 51781 }, { "epoch": 0.9465333503939167, "grad_norm": 5.874903754901382, "learning_rate": 7.481154169520443e-08, "loss": 17.0893, "step": 51782 }, { "epoch": 0.9465516295903632, "grad_norm": 6.604431233045238, "learning_rate": 7.476053598101052e-08, "loss": 17.7054, "step": 51783 }, { "epoch": 0.9465699087868097, "grad_norm": 10.738424525799845, "learning_rate": 7.470954752935355e-08, "loss": 18.1095, "step": 51784 }, { "epoch": 0.9465881879832563, "grad_norm": 6.6947634856429445, "learning_rate": 7.465857634041284e-08, "loss": 17.6087, "step": 51785 }, { "epoch": 0.9466064671797028, "grad_norm": 6.681532748583036, "learning_rate": 7.460762241436604e-08, "loss": 17.8526, "step": 51786 }, { "epoch": 0.9466247463761494, "grad_norm": 6.460357855163933, "learning_rate": 7.455668575139241e-08, "loss": 17.122, "step": 51787 }, { "epoch": 0.9466430255725958, "grad_norm": 5.214086732245959, "learning_rate": 7.450576635167018e-08, "loss": 16.9859, "step": 51788 }, { "epoch": 0.9466613047690423, "grad_norm": 6.518394330429973, "learning_rate": 7.445486421537751e-08, "loss": 17.3971, "step": 51789 }, { "epoch": 0.9466795839654889, "grad_norm": 7.261218165714883, "learning_rate": 7.440397934269317e-08, "loss": 17.5682, "step": 51790 }, { "epoch": 0.9466978631619354, "grad_norm": 7.044184937435797, "learning_rate": 7.435311173379589e-08, "loss": 17.4306, "step": 51791 }, { "epoch": 0.946716142358382, "grad_norm": 6.771131694523929, "learning_rate": 7.430226138886276e-08, "loss": 17.9573, "step": 51792 }, { "epoch": 0.9467344215548285, "grad_norm": 6.205353920905604, "learning_rate": 7.425142830807308e-08, "loss": 17.0844, "step": 51793 }, { "epoch": 0.9467527007512749, "grad_norm": 6.722409106191187, "learning_rate": 7.420061249160504e-08, "loss": 17.499, "step": 51794 }, { "epoch": 0.9467709799477215, "grad_norm": 5.224872181784935, "learning_rate": 7.414981393963628e-08, "loss": 17.1068, "step": 51795 }, { "epoch": 0.946789259144168, "grad_norm": 6.054006358937284, "learning_rate": 7.409903265234497e-08, "loss": 17.2668, "step": 51796 }, { "epoch": 0.9468075383406146, "grad_norm": 6.551099957751526, "learning_rate": 7.404826862990877e-08, "loss": 17.5068, "step": 51797 }, { "epoch": 0.9468258175370611, "grad_norm": 5.604942474147887, "learning_rate": 7.399752187250641e-08, "loss": 17.0612, "step": 51798 }, { "epoch": 0.9468440967335076, "grad_norm": 5.553446371710274, "learning_rate": 7.3946792380315e-08, "loss": 16.9073, "step": 51799 }, { "epoch": 0.9468623759299541, "grad_norm": 4.711940598562374, "learning_rate": 7.389608015351268e-08, "loss": 16.8935, "step": 51800 }, { "epoch": 0.9468806551264006, "grad_norm": 8.198438014336757, "learning_rate": 7.384538519227713e-08, "loss": 17.6035, "step": 51801 }, { "epoch": 0.9468989343228472, "grad_norm": 6.168462103409518, "learning_rate": 7.379470749678596e-08, "loss": 16.9078, "step": 51802 }, { "epoch": 0.9469172135192937, "grad_norm": 5.686551420425697, "learning_rate": 7.374404706721683e-08, "loss": 17.2055, "step": 51803 }, { "epoch": 0.9469354927157402, "grad_norm": 7.580118497795277, "learning_rate": 7.369340390374791e-08, "loss": 17.8459, "step": 51804 }, { "epoch": 0.9469537719121868, "grad_norm": 6.378509298349865, "learning_rate": 7.364277800655573e-08, "loss": 17.0054, "step": 51805 }, { "epoch": 0.9469720511086333, "grad_norm": 5.144818977640577, "learning_rate": 7.359216937581792e-08, "loss": 17.1991, "step": 51806 }, { "epoch": 0.9469903303050798, "grad_norm": 6.213536396789818, "learning_rate": 7.354157801171214e-08, "loss": 17.6109, "step": 51807 }, { "epoch": 0.9470086095015263, "grad_norm": 6.727879050242452, "learning_rate": 7.349100391441544e-08, "loss": 17.6621, "step": 51808 }, { "epoch": 0.9470268886979728, "grad_norm": 4.340407030643317, "learning_rate": 7.344044708410603e-08, "loss": 16.6367, "step": 51809 }, { "epoch": 0.9470451678944194, "grad_norm": 13.64419138289487, "learning_rate": 7.338990752095932e-08, "loss": 16.94, "step": 51810 }, { "epoch": 0.9470634470908659, "grad_norm": 5.280284490835703, "learning_rate": 7.333938522515405e-08, "loss": 17.0965, "step": 51811 }, { "epoch": 0.9470817262873125, "grad_norm": 6.436081884339944, "learning_rate": 7.328888019686675e-08, "loss": 17.0069, "step": 51812 }, { "epoch": 0.9471000054837589, "grad_norm": 6.294338709091787, "learning_rate": 7.323839243627451e-08, "loss": 17.1985, "step": 51813 }, { "epoch": 0.9471182846802054, "grad_norm": 6.328648931079916, "learning_rate": 7.318792194355385e-08, "loss": 17.5442, "step": 51814 }, { "epoch": 0.947136563876652, "grad_norm": 5.985808315008945, "learning_rate": 7.313746871888183e-08, "loss": 17.2315, "step": 51815 }, { "epoch": 0.9471548430730985, "grad_norm": 6.345988446901727, "learning_rate": 7.308703276243612e-08, "loss": 17.3643, "step": 51816 }, { "epoch": 0.947173122269545, "grad_norm": 5.553361908146239, "learning_rate": 7.303661407439211e-08, "loss": 16.9456, "step": 51817 }, { "epoch": 0.9471914014659916, "grad_norm": 7.6784394806677625, "learning_rate": 7.2986212654928e-08, "loss": 17.5355, "step": 51818 }, { "epoch": 0.947209680662438, "grad_norm": 5.152718741693236, "learning_rate": 7.293582850421866e-08, "loss": 16.9127, "step": 51819 }, { "epoch": 0.9472279598588846, "grad_norm": 7.985477504230096, "learning_rate": 7.288546162244225e-08, "loss": 17.8967, "step": 51820 }, { "epoch": 0.9472462390553311, "grad_norm": 7.040147119767538, "learning_rate": 7.283511200977477e-08, "loss": 17.5955, "step": 51821 }, { "epoch": 0.9472645182517776, "grad_norm": 6.81335282517484, "learning_rate": 7.278477966639275e-08, "loss": 17.5147, "step": 51822 }, { "epoch": 0.9472827974482242, "grad_norm": 5.502882209003583, "learning_rate": 7.273446459247269e-08, "loss": 16.9182, "step": 51823 }, { "epoch": 0.9473010766446707, "grad_norm": 5.574269395686954, "learning_rate": 7.268416678819057e-08, "loss": 17.0868, "step": 51824 }, { "epoch": 0.9473193558411173, "grad_norm": 8.480291012837139, "learning_rate": 7.263388625372236e-08, "loss": 17.9837, "step": 51825 }, { "epoch": 0.9473376350375637, "grad_norm": 5.601179584922378, "learning_rate": 7.25836229892457e-08, "loss": 17.0442, "step": 51826 }, { "epoch": 0.9473559142340102, "grad_norm": 5.849326106598414, "learning_rate": 7.253337699493545e-08, "loss": 17.3316, "step": 51827 }, { "epoch": 0.9473741934304568, "grad_norm": 5.651130733397097, "learning_rate": 7.248314827096758e-08, "loss": 17.0635, "step": 51828 }, { "epoch": 0.9473924726269033, "grad_norm": 6.246989919889746, "learning_rate": 7.243293681751917e-08, "loss": 17.522, "step": 51829 }, { "epoch": 0.9474107518233499, "grad_norm": 5.685600561436619, "learning_rate": 7.238274263476564e-08, "loss": 17.2503, "step": 51830 }, { "epoch": 0.9474290310197964, "grad_norm": 13.356504640849954, "learning_rate": 7.233256572288294e-08, "loss": 16.8692, "step": 51831 }, { "epoch": 0.9474473102162428, "grad_norm": 5.291879413870419, "learning_rate": 7.228240608204762e-08, "loss": 16.9664, "step": 51832 }, { "epoch": 0.9474655894126894, "grad_norm": 6.344532035496083, "learning_rate": 7.223226371243452e-08, "loss": 17.3069, "step": 51833 }, { "epoch": 0.9474838686091359, "grad_norm": 6.214212007960501, "learning_rate": 7.218213861421907e-08, "loss": 17.3477, "step": 51834 }, { "epoch": 0.9475021478055825, "grad_norm": 7.044909413011364, "learning_rate": 7.213203078757836e-08, "loss": 17.4841, "step": 51835 }, { "epoch": 0.947520427002029, "grad_norm": 5.910237288246387, "learning_rate": 7.208194023268722e-08, "loss": 17.1001, "step": 51836 }, { "epoch": 0.9475387061984755, "grad_norm": 6.149527249671771, "learning_rate": 7.203186694972109e-08, "loss": 17.3819, "step": 51837 }, { "epoch": 0.9475569853949221, "grad_norm": 5.353220341613719, "learning_rate": 7.198181093885537e-08, "loss": 16.94, "step": 51838 }, { "epoch": 0.9475752645913685, "grad_norm": 6.86693031282689, "learning_rate": 7.193177220026549e-08, "loss": 17.3046, "step": 51839 }, { "epoch": 0.9475935437878151, "grad_norm": 7.916539207726815, "learning_rate": 7.188175073412796e-08, "loss": 18.0919, "step": 51840 }, { "epoch": 0.9476118229842616, "grad_norm": 6.6984448646651895, "learning_rate": 7.183174654061708e-08, "loss": 17.7199, "step": 51841 }, { "epoch": 0.9476301021807081, "grad_norm": 6.46191369786279, "learning_rate": 7.178175961990775e-08, "loss": 17.395, "step": 51842 }, { "epoch": 0.9476483813771547, "grad_norm": 5.8797677360023295, "learning_rate": 7.173178997217645e-08, "loss": 17.181, "step": 51843 }, { "epoch": 0.9476666605736012, "grad_norm": 5.163568785179271, "learning_rate": 7.168183759759695e-08, "loss": 16.9798, "step": 51844 }, { "epoch": 0.9476849397700478, "grad_norm": 8.04772153219377, "learning_rate": 7.163190249634522e-08, "loss": 18.0522, "step": 51845 }, { "epoch": 0.9477032189664942, "grad_norm": 7.179271592513434, "learning_rate": 7.158198466859612e-08, "loss": 17.4137, "step": 51846 }, { "epoch": 0.9477214981629407, "grad_norm": 7.908428563289755, "learning_rate": 7.153208411452395e-08, "loss": 17.4123, "step": 51847 }, { "epoch": 0.9477397773593873, "grad_norm": 5.950518895629605, "learning_rate": 7.148220083430468e-08, "loss": 17.3191, "step": 51848 }, { "epoch": 0.9477580565558338, "grad_norm": 5.220594971341663, "learning_rate": 7.143233482811262e-08, "loss": 16.9073, "step": 51849 }, { "epoch": 0.9477763357522804, "grad_norm": 7.072874591397369, "learning_rate": 7.138248609612264e-08, "loss": 17.4222, "step": 51850 }, { "epoch": 0.9477946149487269, "grad_norm": 8.683313802948815, "learning_rate": 7.133265463850847e-08, "loss": 18.5669, "step": 51851 }, { "epoch": 0.9478128941451733, "grad_norm": 6.704671963351235, "learning_rate": 7.128284045544664e-08, "loss": 17.6028, "step": 51852 }, { "epoch": 0.9478311733416199, "grad_norm": 6.174889706072846, "learning_rate": 7.123304354711036e-08, "loss": 17.3246, "step": 51853 }, { "epoch": 0.9478494525380664, "grad_norm": 7.59954173403862, "learning_rate": 7.118326391367502e-08, "loss": 17.6682, "step": 51854 }, { "epoch": 0.947867731734513, "grad_norm": 9.232219607011045, "learning_rate": 7.11335015553144e-08, "loss": 18.0603, "step": 51855 }, { "epoch": 0.9478860109309595, "grad_norm": 5.383205386767196, "learning_rate": 7.108375647220278e-08, "loss": 17.165, "step": 51856 }, { "epoch": 0.947904290127406, "grad_norm": 5.993695919510861, "learning_rate": 7.103402866451448e-08, "loss": 17.2831, "step": 51857 }, { "epoch": 0.9479225693238525, "grad_norm": 5.790468318843097, "learning_rate": 7.098431813242546e-08, "loss": 17.1652, "step": 51858 }, { "epoch": 0.947940848520299, "grad_norm": 5.113932864875076, "learning_rate": 7.093462487610781e-08, "loss": 17.0138, "step": 51859 }, { "epoch": 0.9479591277167456, "grad_norm": 5.998410669554518, "learning_rate": 7.088494889573694e-08, "loss": 17.3122, "step": 51860 }, { "epoch": 0.9479774069131921, "grad_norm": 4.352635481556151, "learning_rate": 7.08352901914866e-08, "loss": 16.7425, "step": 51861 }, { "epoch": 0.9479956861096386, "grad_norm": 6.538789516591863, "learning_rate": 7.078564876353056e-08, "loss": 17.3499, "step": 51862 }, { "epoch": 0.9480139653060852, "grad_norm": 6.240072231514635, "learning_rate": 7.07360246120431e-08, "loss": 17.4529, "step": 51863 }, { "epoch": 0.9480322445025317, "grad_norm": 5.942541970855729, "learning_rate": 7.068641773719798e-08, "loss": 17.0636, "step": 51864 }, { "epoch": 0.9480505236989782, "grad_norm": 7.059186975543357, "learning_rate": 7.063682813917005e-08, "loss": 17.551, "step": 51865 }, { "epoch": 0.9480688028954247, "grad_norm": 6.800911723111251, "learning_rate": 7.058725581813142e-08, "loss": 17.4296, "step": 51866 }, { "epoch": 0.9480870820918712, "grad_norm": 5.77694918541708, "learning_rate": 7.053770077425693e-08, "loss": 17.5036, "step": 51867 }, { "epoch": 0.9481053612883178, "grad_norm": 6.06639639255128, "learning_rate": 7.048816300771976e-08, "loss": 17.1322, "step": 51868 }, { "epoch": 0.9481236404847643, "grad_norm": 5.113270661448796, "learning_rate": 7.043864251869481e-08, "loss": 16.9431, "step": 51869 }, { "epoch": 0.9481419196812109, "grad_norm": 5.643254511311479, "learning_rate": 7.038913930735358e-08, "loss": 17.0269, "step": 51870 }, { "epoch": 0.9481601988776573, "grad_norm": 6.504758498200176, "learning_rate": 7.03396533738715e-08, "loss": 17.4965, "step": 51871 }, { "epoch": 0.9481784780741038, "grad_norm": 6.376990308114319, "learning_rate": 7.029018471842064e-08, "loss": 17.4646, "step": 51872 }, { "epoch": 0.9481967572705504, "grad_norm": 5.308040512082338, "learning_rate": 7.024073334117476e-08, "loss": 16.992, "step": 51873 }, { "epoch": 0.9482150364669969, "grad_norm": 6.051014845369021, "learning_rate": 7.019129924230761e-08, "loss": 16.9463, "step": 51874 }, { "epoch": 0.9482333156634435, "grad_norm": 5.767036507948723, "learning_rate": 7.014188242199239e-08, "loss": 17.1818, "step": 51875 }, { "epoch": 0.94825159485989, "grad_norm": 5.791815347001176, "learning_rate": 7.009248288040115e-08, "loss": 16.935, "step": 51876 }, { "epoch": 0.9482698740563364, "grad_norm": 5.370923523771794, "learning_rate": 7.00431006177088e-08, "loss": 17.0414, "step": 51877 }, { "epoch": 0.948288153252783, "grad_norm": 7.424300762573553, "learning_rate": 6.999373563408795e-08, "loss": 17.373, "step": 51878 }, { "epoch": 0.9483064324492295, "grad_norm": 7.436223972754621, "learning_rate": 6.994438792971015e-08, "loss": 17.3369, "step": 51879 }, { "epoch": 0.9483247116456761, "grad_norm": 6.763989749219556, "learning_rate": 6.989505750475023e-08, "loss": 17.3908, "step": 51880 }, { "epoch": 0.9483429908421226, "grad_norm": 5.861432035394985, "learning_rate": 6.984574435938029e-08, "loss": 16.9087, "step": 51881 }, { "epoch": 0.9483612700385691, "grad_norm": 9.403036367684567, "learning_rate": 6.979644849377298e-08, "loss": 17.5595, "step": 51882 }, { "epoch": 0.9483795492350157, "grad_norm": 6.396827926963833, "learning_rate": 6.974716990810093e-08, "loss": 17.2419, "step": 51883 }, { "epoch": 0.9483978284314621, "grad_norm": 6.794093525380042, "learning_rate": 6.969790860253734e-08, "loss": 17.6617, "step": 51884 }, { "epoch": 0.9484161076279086, "grad_norm": 7.029800522471284, "learning_rate": 6.964866457725539e-08, "loss": 17.8068, "step": 51885 }, { "epoch": 0.9484343868243552, "grad_norm": 4.873137851040859, "learning_rate": 6.959943783242662e-08, "loss": 16.6488, "step": 51886 }, { "epoch": 0.9484526660208017, "grad_norm": 5.089484973676351, "learning_rate": 6.955022836822423e-08, "loss": 16.8025, "step": 51887 }, { "epoch": 0.9484709452172483, "grad_norm": 6.326803732339102, "learning_rate": 6.950103618482029e-08, "loss": 17.253, "step": 51888 }, { "epoch": 0.9484892244136948, "grad_norm": 6.868352921804777, "learning_rate": 6.945186128238746e-08, "loss": 17.4859, "step": 51889 }, { "epoch": 0.9485075036101412, "grad_norm": 6.848580613300615, "learning_rate": 6.94027036610978e-08, "loss": 17.0192, "step": 51890 }, { "epoch": 0.9485257828065878, "grad_norm": 6.604242584783877, "learning_rate": 6.935356332112397e-08, "loss": 17.3077, "step": 51891 }, { "epoch": 0.9485440620030343, "grad_norm": 5.96935110434658, "learning_rate": 6.93044402626375e-08, "loss": 17.4193, "step": 51892 }, { "epoch": 0.9485623411994809, "grad_norm": 5.652216662076836, "learning_rate": 6.925533448581157e-08, "loss": 17.0017, "step": 51893 }, { "epoch": 0.9485806203959274, "grad_norm": 6.092669243095518, "learning_rate": 6.920624599081771e-08, "loss": 17.4689, "step": 51894 }, { "epoch": 0.9485988995923739, "grad_norm": 5.86341070367025, "learning_rate": 6.915717477782857e-08, "loss": 17.2288, "step": 51895 }, { "epoch": 0.9486171787888205, "grad_norm": 7.701722086278984, "learning_rate": 6.910812084701513e-08, "loss": 17.5594, "step": 51896 }, { "epoch": 0.9486354579852669, "grad_norm": 8.226717941222928, "learning_rate": 6.905908419855001e-08, "loss": 17.8878, "step": 51897 }, { "epoch": 0.9486537371817135, "grad_norm": 6.460479450023821, "learning_rate": 6.901006483260475e-08, "loss": 17.3924, "step": 51898 }, { "epoch": 0.94867201637816, "grad_norm": 6.540258793261776, "learning_rate": 6.896106274935143e-08, "loss": 17.1336, "step": 51899 }, { "epoch": 0.9486902955746065, "grad_norm": 5.621985963090919, "learning_rate": 6.891207794896216e-08, "loss": 17.092, "step": 51900 }, { "epoch": 0.9487085747710531, "grad_norm": 7.601986481428907, "learning_rate": 6.886311043160732e-08, "loss": 17.7677, "step": 51901 }, { "epoch": 0.9487268539674996, "grad_norm": 6.111025549663864, "learning_rate": 6.881416019745956e-08, "loss": 17.4926, "step": 51902 }, { "epoch": 0.9487451331639462, "grad_norm": 5.50418403641295, "learning_rate": 6.876522724669044e-08, "loss": 17.0114, "step": 51903 }, { "epoch": 0.9487634123603926, "grad_norm": 6.576264926753794, "learning_rate": 6.871631157947145e-08, "loss": 17.0845, "step": 51904 }, { "epoch": 0.9487816915568391, "grad_norm": 6.725688209113447, "learning_rate": 6.866741319597358e-08, "loss": 17.5003, "step": 51905 }, { "epoch": 0.9487999707532857, "grad_norm": 6.675867359026629, "learning_rate": 6.861853209636893e-08, "loss": 17.1417, "step": 51906 }, { "epoch": 0.9488182499497322, "grad_norm": 5.394968897765096, "learning_rate": 6.85696682808279e-08, "loss": 17.074, "step": 51907 }, { "epoch": 0.9488365291461788, "grad_norm": 5.564666481959618, "learning_rate": 6.852082174952258e-08, "loss": 16.9681, "step": 51908 }, { "epoch": 0.9488548083426253, "grad_norm": 6.887944363504676, "learning_rate": 6.847199250262393e-08, "loss": 17.5727, "step": 51909 }, { "epoch": 0.9488730875390717, "grad_norm": 6.077700910648518, "learning_rate": 6.842318054030239e-08, "loss": 17.2104, "step": 51910 }, { "epoch": 0.9488913667355183, "grad_norm": 5.907525972918974, "learning_rate": 6.837438586273004e-08, "loss": 16.999, "step": 51911 }, { "epoch": 0.9489096459319648, "grad_norm": 4.474182506836404, "learning_rate": 6.832560847007785e-08, "loss": 16.7349, "step": 51912 }, { "epoch": 0.9489279251284114, "grad_norm": 7.258277803231454, "learning_rate": 6.827684836251569e-08, "loss": 17.6431, "step": 51913 }, { "epoch": 0.9489462043248579, "grad_norm": 5.963961774180465, "learning_rate": 6.822810554021619e-08, "loss": 17.5332, "step": 51914 }, { "epoch": 0.9489644835213044, "grad_norm": 6.373356363355709, "learning_rate": 6.817938000334812e-08, "loss": 17.6321, "step": 51915 }, { "epoch": 0.948982762717751, "grad_norm": 7.246299181176486, "learning_rate": 6.81306717520841e-08, "loss": 17.7903, "step": 51916 }, { "epoch": 0.9490010419141974, "grad_norm": 5.69414336420208, "learning_rate": 6.808198078659456e-08, "loss": 17.0863, "step": 51917 }, { "epoch": 0.949019321110644, "grad_norm": 5.4550216627602675, "learning_rate": 6.80333071070488e-08, "loss": 16.8807, "step": 51918 }, { "epoch": 0.9490376003070905, "grad_norm": 4.289436272851965, "learning_rate": 6.798465071361893e-08, "loss": 16.635, "step": 51919 }, { "epoch": 0.949055879503537, "grad_norm": 5.140239668280976, "learning_rate": 6.793601160647423e-08, "loss": 16.8133, "step": 51920 }, { "epoch": 0.9490741586999836, "grad_norm": 6.03902820304193, "learning_rate": 6.788738978578569e-08, "loss": 17.1684, "step": 51921 }, { "epoch": 0.94909243789643, "grad_norm": 5.589555133643565, "learning_rate": 6.783878525172483e-08, "loss": 17.3484, "step": 51922 }, { "epoch": 0.9491107170928766, "grad_norm": 5.545085014807033, "learning_rate": 6.77901980044604e-08, "loss": 16.9633, "step": 51923 }, { "epoch": 0.9491289962893231, "grad_norm": 6.0855322248839725, "learning_rate": 6.77416280441634e-08, "loss": 17.2674, "step": 51924 }, { "epoch": 0.9491472754857696, "grad_norm": 6.29574876701191, "learning_rate": 6.769307537100422e-08, "loss": 17.1031, "step": 51925 }, { "epoch": 0.9491655546822162, "grad_norm": 5.752027306067266, "learning_rate": 6.764453998515275e-08, "loss": 17.3433, "step": 51926 }, { "epoch": 0.9491838338786627, "grad_norm": 7.226636888269396, "learning_rate": 6.759602188677883e-08, "loss": 17.596, "step": 51927 }, { "epoch": 0.9492021130751093, "grad_norm": 10.887789063521174, "learning_rate": 6.754752107605345e-08, "loss": 18.7222, "step": 51928 }, { "epoch": 0.9492203922715557, "grad_norm": 7.764835383665748, "learning_rate": 6.749903755314535e-08, "loss": 17.4913, "step": 51929 }, { "epoch": 0.9492386714680022, "grad_norm": 4.905720604644589, "learning_rate": 6.745057131822496e-08, "loss": 16.9167, "step": 51930 }, { "epoch": 0.9492569506644488, "grad_norm": 5.513599368746015, "learning_rate": 6.740212237146326e-08, "loss": 17.0043, "step": 51931 }, { "epoch": 0.9492752298608953, "grad_norm": 7.019069771315346, "learning_rate": 6.735369071302788e-08, "loss": 17.3878, "step": 51932 }, { "epoch": 0.9492935090573419, "grad_norm": 5.0982149670713515, "learning_rate": 6.730527634309092e-08, "loss": 16.7814, "step": 51933 }, { "epoch": 0.9493117882537884, "grad_norm": 6.8943664189813125, "learning_rate": 6.725687926182056e-08, "loss": 17.5689, "step": 51934 }, { "epoch": 0.9493300674502348, "grad_norm": 6.965270385873423, "learning_rate": 6.720849946938668e-08, "loss": 17.6037, "step": 51935 }, { "epoch": 0.9493483466466814, "grad_norm": 5.885965026609481, "learning_rate": 6.716013696595913e-08, "loss": 17.2775, "step": 51936 }, { "epoch": 0.9493666258431279, "grad_norm": 5.652747398980285, "learning_rate": 6.711179175170668e-08, "loss": 17.0476, "step": 51937 }, { "epoch": 0.9493849050395745, "grad_norm": 5.906504133580514, "learning_rate": 6.706346382679973e-08, "loss": 16.9753, "step": 51938 }, { "epoch": 0.949403184236021, "grad_norm": 12.516229335706106, "learning_rate": 6.701515319140706e-08, "loss": 19.3284, "step": 51939 }, { "epoch": 0.9494214634324675, "grad_norm": 6.017854130937737, "learning_rate": 6.696685984569851e-08, "loss": 17.1869, "step": 51940 }, { "epoch": 0.9494397426289141, "grad_norm": 5.798913763275408, "learning_rate": 6.691858378984285e-08, "loss": 17.1952, "step": 51941 }, { "epoch": 0.9494580218253605, "grad_norm": 6.162408591083888, "learning_rate": 6.687032502400992e-08, "loss": 17.0539, "step": 51942 }, { "epoch": 0.9494763010218071, "grad_norm": 5.629140294988713, "learning_rate": 6.682208354836795e-08, "loss": 16.9015, "step": 51943 }, { "epoch": 0.9494945802182536, "grad_norm": 6.828784727449261, "learning_rate": 6.677385936308678e-08, "loss": 17.5403, "step": 51944 }, { "epoch": 0.9495128594147001, "grad_norm": 6.224529619063401, "learning_rate": 6.672565246833518e-08, "loss": 17.21, "step": 51945 }, { "epoch": 0.9495311386111467, "grad_norm": 6.03368229482757, "learning_rate": 6.667746286428133e-08, "loss": 17.5958, "step": 51946 }, { "epoch": 0.9495494178075932, "grad_norm": 5.864193510890896, "learning_rate": 6.662929055109568e-08, "loss": 17.3875, "step": 51947 }, { "epoch": 0.9495676970040398, "grad_norm": 6.616889647250528, "learning_rate": 6.658113552894585e-08, "loss": 17.1445, "step": 51948 }, { "epoch": 0.9495859762004862, "grad_norm": 6.5813295579191395, "learning_rate": 6.653299779800115e-08, "loss": 17.5796, "step": 51949 }, { "epoch": 0.9496042553969327, "grad_norm": 5.987131581913131, "learning_rate": 6.648487735843035e-08, "loss": 17.1397, "step": 51950 }, { "epoch": 0.9496225345933793, "grad_norm": 5.098562486579905, "learning_rate": 6.643677421040218e-08, "loss": 16.9551, "step": 51951 }, { "epoch": 0.9496408137898258, "grad_norm": 6.179799601358217, "learning_rate": 6.63886883540843e-08, "loss": 17.3082, "step": 51952 }, { "epoch": 0.9496590929862723, "grad_norm": 5.38490230687571, "learning_rate": 6.634061978964656e-08, "loss": 17.2578, "step": 51953 }, { "epoch": 0.9496773721827189, "grad_norm": 5.406083846728004, "learning_rate": 6.629256851725718e-08, "loss": 17.002, "step": 51954 }, { "epoch": 0.9496956513791653, "grad_norm": 5.887759965081422, "learning_rate": 6.624453453708324e-08, "loss": 17.1278, "step": 51955 }, { "epoch": 0.9497139305756119, "grad_norm": 6.527802275534399, "learning_rate": 6.619651784929404e-08, "loss": 17.4523, "step": 51956 }, { "epoch": 0.9497322097720584, "grad_norm": 5.986045305809944, "learning_rate": 6.61485184540589e-08, "loss": 17.2537, "step": 51957 }, { "epoch": 0.9497504889685049, "grad_norm": 7.1760080676488185, "learning_rate": 6.610053635154434e-08, "loss": 17.5923, "step": 51958 }, { "epoch": 0.9497687681649515, "grad_norm": 9.219718504664582, "learning_rate": 6.605257154191913e-08, "loss": 18.9484, "step": 51959 }, { "epoch": 0.949787047361398, "grad_norm": 5.68603929283398, "learning_rate": 6.600462402535201e-08, "loss": 17.2222, "step": 51960 }, { "epoch": 0.9498053265578446, "grad_norm": 6.110493495592867, "learning_rate": 6.595669380201009e-08, "loss": 17.1006, "step": 51961 }, { "epoch": 0.949823605754291, "grad_norm": 5.20185418271025, "learning_rate": 6.590878087206265e-08, "loss": 16.9873, "step": 51962 }, { "epoch": 0.9498418849507375, "grad_norm": 5.431855579008853, "learning_rate": 6.586088523567568e-08, "loss": 17.0254, "step": 51963 }, { "epoch": 0.9498601641471841, "grad_norm": 6.5872546641580065, "learning_rate": 6.58130068930185e-08, "loss": 17.3043, "step": 51964 }, { "epoch": 0.9498784433436306, "grad_norm": 5.780860165025578, "learning_rate": 6.576514584425875e-08, "loss": 17.1918, "step": 51965 }, { "epoch": 0.9498967225400772, "grad_norm": 5.9632393182027155, "learning_rate": 6.57173020895635e-08, "loss": 17.2675, "step": 51966 }, { "epoch": 0.9499150017365237, "grad_norm": 6.224393274723653, "learning_rate": 6.566947562910153e-08, "loss": 17.5382, "step": 51967 }, { "epoch": 0.9499332809329701, "grad_norm": 6.28385785645444, "learning_rate": 6.562166646303991e-08, "loss": 17.593, "step": 51968 }, { "epoch": 0.9499515601294167, "grad_norm": 6.93474107679399, "learning_rate": 6.557387459154574e-08, "loss": 17.3858, "step": 51969 }, { "epoch": 0.9499698393258632, "grad_norm": 6.648338919812613, "learning_rate": 6.55261000147872e-08, "loss": 17.3998, "step": 51970 }, { "epoch": 0.9499881185223098, "grad_norm": 6.192930885059954, "learning_rate": 6.547834273293197e-08, "loss": 17.2, "step": 51971 }, { "epoch": 0.9500063977187563, "grad_norm": 7.375521217967527, "learning_rate": 6.543060274614599e-08, "loss": 17.7394, "step": 51972 }, { "epoch": 0.9500246769152028, "grad_norm": 5.568627913375268, "learning_rate": 6.538288005459804e-08, "loss": 17.4469, "step": 51973 }, { "epoch": 0.9500429561116494, "grad_norm": 9.851072244599155, "learning_rate": 6.533517465845462e-08, "loss": 18.9649, "step": 51974 }, { "epoch": 0.9500612353080958, "grad_norm": 4.752723289499993, "learning_rate": 6.528748655788341e-08, "loss": 16.7846, "step": 51975 }, { "epoch": 0.9500795145045424, "grad_norm": 7.45129733052231, "learning_rate": 6.523981575305149e-08, "loss": 17.8272, "step": 51976 }, { "epoch": 0.9500977937009889, "grad_norm": 6.104153971090029, "learning_rate": 6.519216224412539e-08, "loss": 17.2072, "step": 51977 }, { "epoch": 0.9501160728974354, "grad_norm": 7.093527777341356, "learning_rate": 6.514452603127275e-08, "loss": 17.4817, "step": 51978 }, { "epoch": 0.950134352093882, "grad_norm": 6.117961494119231, "learning_rate": 6.509690711466065e-08, "loss": 17.2559, "step": 51979 }, { "epoch": 0.9501526312903285, "grad_norm": 5.171308427717573, "learning_rate": 6.50493054944551e-08, "loss": 17.0682, "step": 51980 }, { "epoch": 0.950170910486775, "grad_norm": 4.941626794499318, "learning_rate": 6.500172117082427e-08, "loss": 17.0973, "step": 51981 }, { "epoch": 0.9501891896832215, "grad_norm": 6.533565224189019, "learning_rate": 6.495415414393357e-08, "loss": 17.326, "step": 51982 }, { "epoch": 0.950207468879668, "grad_norm": 6.279206662553818, "learning_rate": 6.490660441395014e-08, "loss": 17.3236, "step": 51983 }, { "epoch": 0.9502257480761146, "grad_norm": 5.184388310556215, "learning_rate": 6.485907198104102e-08, "loss": 17.1358, "step": 51984 }, { "epoch": 0.9502440272725611, "grad_norm": 5.631506722752053, "learning_rate": 6.481155684537277e-08, "loss": 17.0527, "step": 51985 }, { "epoch": 0.9502623064690077, "grad_norm": 6.992199160662818, "learning_rate": 6.476405900711136e-08, "loss": 17.7745, "step": 51986 }, { "epoch": 0.9502805856654541, "grad_norm": 4.334060315024717, "learning_rate": 6.471657846642442e-08, "loss": 16.7688, "step": 51987 }, { "epoch": 0.9502988648619006, "grad_norm": 4.428465231152517, "learning_rate": 6.46691152234774e-08, "loss": 16.6301, "step": 51988 }, { "epoch": 0.9503171440583472, "grad_norm": 5.898598385714137, "learning_rate": 6.462166927843627e-08, "loss": 17.4074, "step": 51989 }, { "epoch": 0.9503354232547937, "grad_norm": 7.261525324743427, "learning_rate": 6.457424063146811e-08, "loss": 17.6419, "step": 51990 }, { "epoch": 0.9503537024512403, "grad_norm": 5.983089818214971, "learning_rate": 6.45268292827389e-08, "loss": 17.0348, "step": 51991 }, { "epoch": 0.9503719816476868, "grad_norm": 6.473515698153997, "learning_rate": 6.447943523241574e-08, "loss": 17.4318, "step": 51992 }, { "epoch": 0.9503902608441333, "grad_norm": 6.891690179499078, "learning_rate": 6.443205848066292e-08, "loss": 17.7244, "step": 51993 }, { "epoch": 0.9504085400405798, "grad_norm": 7.707350986228741, "learning_rate": 6.438469902764755e-08, "loss": 17.7527, "step": 51994 }, { "epoch": 0.9504268192370263, "grad_norm": 5.762978803858678, "learning_rate": 6.43373568735356e-08, "loss": 17.1657, "step": 51995 }, { "epoch": 0.9504450984334729, "grad_norm": 6.117106148458761, "learning_rate": 6.429003201849304e-08, "loss": 17.2722, "step": 51996 }, { "epoch": 0.9504633776299194, "grad_norm": 7.286183431954871, "learning_rate": 6.424272446268586e-08, "loss": 17.4634, "step": 51997 }, { "epoch": 0.9504816568263659, "grad_norm": 5.399547324910278, "learning_rate": 6.41954342062795e-08, "loss": 17.0278, "step": 51998 }, { "epoch": 0.9504999360228125, "grad_norm": 5.630981303652289, "learning_rate": 6.41481612494399e-08, "loss": 17.0622, "step": 51999 }, { "epoch": 0.9505182152192589, "grad_norm": 6.8330371169715844, "learning_rate": 6.410090559233195e-08, "loss": 17.4903, "step": 52000 }, { "epoch": 0.9505364944157055, "grad_norm": 6.409627283648822, "learning_rate": 6.405366723512218e-08, "loss": 17.1915, "step": 52001 }, { "epoch": 0.950554773612152, "grad_norm": 5.104811658897638, "learning_rate": 6.400644617797658e-08, "loss": 16.8312, "step": 52002 }, { "epoch": 0.9505730528085985, "grad_norm": 10.100851788536884, "learning_rate": 6.395924242106e-08, "loss": 18.4758, "step": 52003 }, { "epoch": 0.9505913320050451, "grad_norm": 6.095473673968336, "learning_rate": 6.391205596453787e-08, "loss": 17.2732, "step": 52004 }, { "epoch": 0.9506096112014916, "grad_norm": 4.254777800610132, "learning_rate": 6.38648868085756e-08, "loss": 16.6505, "step": 52005 }, { "epoch": 0.9506278903979382, "grad_norm": 7.182316298052651, "learning_rate": 6.38177349533381e-08, "loss": 17.4689, "step": 52006 }, { "epoch": 0.9506461695943846, "grad_norm": 7.358616864140386, "learning_rate": 6.377060039899185e-08, "loss": 18.1584, "step": 52007 }, { "epoch": 0.9506644487908311, "grad_norm": 5.710361119160194, "learning_rate": 6.372348314570065e-08, "loss": 17.0991, "step": 52008 }, { "epoch": 0.9506827279872777, "grad_norm": 4.796234659869706, "learning_rate": 6.367638319363101e-08, "loss": 16.7436, "step": 52009 }, { "epoch": 0.9507010071837242, "grad_norm": 7.989038564782833, "learning_rate": 6.362930054294669e-08, "loss": 17.4991, "step": 52010 }, { "epoch": 0.9507192863801708, "grad_norm": 6.67248203060404, "learning_rate": 6.358223519381313e-08, "loss": 17.5681, "step": 52011 }, { "epoch": 0.9507375655766173, "grad_norm": 5.673361899054328, "learning_rate": 6.353518714639629e-08, "loss": 17.2361, "step": 52012 }, { "epoch": 0.9507558447730637, "grad_norm": 6.47175640933246, "learning_rate": 6.348815640085992e-08, "loss": 17.5143, "step": 52013 }, { "epoch": 0.9507741239695103, "grad_norm": 6.020532861432072, "learning_rate": 6.344114295736892e-08, "loss": 17.2846, "step": 52014 }, { "epoch": 0.9507924031659568, "grad_norm": 6.153171833576237, "learning_rate": 6.339414681608813e-08, "loss": 17.0041, "step": 52015 }, { "epoch": 0.9508106823624034, "grad_norm": 5.051755092719092, "learning_rate": 6.334716797718354e-08, "loss": 17.2478, "step": 52016 }, { "epoch": 0.9508289615588499, "grad_norm": 6.848966093621271, "learning_rate": 6.330020644081781e-08, "loss": 17.6722, "step": 52017 }, { "epoch": 0.9508472407552964, "grad_norm": 7.098287055211269, "learning_rate": 6.325326220715689e-08, "loss": 17.7875, "step": 52018 }, { "epoch": 0.950865519951743, "grad_norm": 4.769474139796132, "learning_rate": 6.3206335276364e-08, "loss": 16.926, "step": 52019 }, { "epoch": 0.9508837991481894, "grad_norm": 5.228454311418634, "learning_rate": 6.315942564860511e-08, "loss": 16.9015, "step": 52020 }, { "epoch": 0.9509020783446359, "grad_norm": 7.412199194854839, "learning_rate": 6.311253332404454e-08, "loss": 17.7385, "step": 52021 }, { "epoch": 0.9509203575410825, "grad_norm": 5.776949840545621, "learning_rate": 6.306565830284551e-08, "loss": 17.1882, "step": 52022 }, { "epoch": 0.950938636737529, "grad_norm": 6.362852977973762, "learning_rate": 6.30188005851734e-08, "loss": 17.2341, "step": 52023 }, { "epoch": 0.9509569159339756, "grad_norm": 6.878769015542502, "learning_rate": 6.2971960171192e-08, "loss": 17.4309, "step": 52024 }, { "epoch": 0.9509751951304221, "grad_norm": 6.309713622610835, "learning_rate": 6.292513706106506e-08, "loss": 17.4463, "step": 52025 }, { "epoch": 0.9509934743268685, "grad_norm": 5.321504497412689, "learning_rate": 6.287833125495746e-08, "loss": 17.0309, "step": 52026 }, { "epoch": 0.9510117535233151, "grad_norm": 9.070416131408304, "learning_rate": 6.283154275303238e-08, "loss": 18.3599, "step": 52027 }, { "epoch": 0.9510300327197616, "grad_norm": 8.491841605758712, "learning_rate": 6.27847715554547e-08, "loss": 17.974, "step": 52028 }, { "epoch": 0.9510483119162082, "grad_norm": 6.765646234299462, "learning_rate": 6.273801766238762e-08, "loss": 17.7323, "step": 52029 }, { "epoch": 0.9510665911126547, "grad_norm": 8.425534390953112, "learning_rate": 6.269128107399602e-08, "loss": 18.5844, "step": 52030 }, { "epoch": 0.9510848703091012, "grad_norm": 6.2869622356714805, "learning_rate": 6.264456179044199e-08, "loss": 17.5667, "step": 52031 }, { "epoch": 0.9511031495055478, "grad_norm": 5.447629919008467, "learning_rate": 6.259785981189148e-08, "loss": 17.1303, "step": 52032 }, { "epoch": 0.9511214287019942, "grad_norm": 6.499791561647554, "learning_rate": 6.255117513850662e-08, "loss": 17.5876, "step": 52033 }, { "epoch": 0.9511397078984408, "grad_norm": 7.042497523340607, "learning_rate": 6.250450777045114e-08, "loss": 17.5747, "step": 52034 }, { "epoch": 0.9511579870948873, "grad_norm": 5.71250373599828, "learning_rate": 6.245785770788936e-08, "loss": 16.9353, "step": 52035 }, { "epoch": 0.9511762662913338, "grad_norm": 6.8440314581805755, "learning_rate": 6.241122495098395e-08, "loss": 17.1419, "step": 52036 }, { "epoch": 0.9511945454877804, "grad_norm": 7.251396574267092, "learning_rate": 6.236460949989864e-08, "loss": 17.5477, "step": 52037 }, { "epoch": 0.9512128246842269, "grad_norm": 5.073150576115787, "learning_rate": 6.231801135479721e-08, "loss": 16.774, "step": 52038 }, { "epoch": 0.9512311038806734, "grad_norm": 5.251701069108562, "learning_rate": 6.227143051584228e-08, "loss": 17.0102, "step": 52039 }, { "epoch": 0.9512493830771199, "grad_norm": 5.625916244729692, "learning_rate": 6.222486698319763e-08, "loss": 17.1439, "step": 52040 }, { "epoch": 0.9512676622735664, "grad_norm": 5.705646791025156, "learning_rate": 6.217832075702701e-08, "loss": 17.0072, "step": 52041 }, { "epoch": 0.951285941470013, "grad_norm": 5.4549012768130725, "learning_rate": 6.213179183749197e-08, "loss": 16.8815, "step": 52042 }, { "epoch": 0.9513042206664595, "grad_norm": 5.697483153743297, "learning_rate": 6.208528022475735e-08, "loss": 17.0337, "step": 52043 }, { "epoch": 0.9513224998629061, "grad_norm": 6.601767392871283, "learning_rate": 6.203878591898471e-08, "loss": 17.56, "step": 52044 }, { "epoch": 0.9513407790593525, "grad_norm": 6.204220142516832, "learning_rate": 6.199230892033781e-08, "loss": 17.2804, "step": 52045 }, { "epoch": 0.951359058255799, "grad_norm": 7.263943657247074, "learning_rate": 6.194584922897983e-08, "loss": 17.7454, "step": 52046 }, { "epoch": 0.9513773374522456, "grad_norm": 6.007865485728439, "learning_rate": 6.189940684507234e-08, "loss": 17.215, "step": 52047 }, { "epoch": 0.9513956166486921, "grad_norm": 5.567805324452649, "learning_rate": 6.185298176877907e-08, "loss": 17.2278, "step": 52048 }, { "epoch": 0.9514138958451387, "grad_norm": 6.5347231243519275, "learning_rate": 6.180657400026268e-08, "loss": 17.3996, "step": 52049 }, { "epoch": 0.9514321750415852, "grad_norm": 6.629582313807098, "learning_rate": 6.176018353968637e-08, "loss": 17.2963, "step": 52050 }, { "epoch": 0.9514504542380317, "grad_norm": 5.568161021247124, "learning_rate": 6.171381038721114e-08, "loss": 17.003, "step": 52051 }, { "epoch": 0.9514687334344782, "grad_norm": 6.496160081638797, "learning_rate": 6.166745454300127e-08, "loss": 17.4587, "step": 52052 }, { "epoch": 0.9514870126309247, "grad_norm": 5.009929633246992, "learning_rate": 6.162111600721776e-08, "loss": 16.6464, "step": 52053 }, { "epoch": 0.9515052918273713, "grad_norm": 4.970555932643876, "learning_rate": 6.157479478002437e-08, "loss": 16.888, "step": 52054 }, { "epoch": 0.9515235710238178, "grad_norm": 5.878721130942391, "learning_rate": 6.152849086158264e-08, "loss": 17.0774, "step": 52055 }, { "epoch": 0.9515418502202643, "grad_norm": 6.825463786355848, "learning_rate": 6.148220425205464e-08, "loss": 16.9777, "step": 52056 }, { "epoch": 0.9515601294167109, "grad_norm": 6.806619737838149, "learning_rate": 6.143593495160305e-08, "loss": 17.412, "step": 52057 }, { "epoch": 0.9515784086131573, "grad_norm": 6.109759174293998, "learning_rate": 6.138968296038994e-08, "loss": 17.3395, "step": 52058 }, { "epoch": 0.9515966878096039, "grad_norm": 5.084208228749305, "learning_rate": 6.134344827857742e-08, "loss": 16.7325, "step": 52059 }, { "epoch": 0.9516149670060504, "grad_norm": 6.851245933656974, "learning_rate": 6.129723090632811e-08, "loss": 17.51, "step": 52060 }, { "epoch": 0.9516332462024969, "grad_norm": 5.6591874270057865, "learning_rate": 6.125103084380301e-08, "loss": 17.1518, "step": 52061 }, { "epoch": 0.9516515253989435, "grad_norm": 5.355702584893191, "learning_rate": 6.120484809116423e-08, "loss": 17.1504, "step": 52062 }, { "epoch": 0.95166980459539, "grad_norm": 6.8968726185469515, "learning_rate": 6.115868264857438e-08, "loss": 17.2279, "step": 52063 }, { "epoch": 0.9516880837918366, "grad_norm": 5.69417478495582, "learning_rate": 6.111253451619448e-08, "loss": 17.0845, "step": 52064 }, { "epoch": 0.951706362988283, "grad_norm": 6.751980086819002, "learning_rate": 6.10664036941866e-08, "loss": 17.3486, "step": 52065 }, { "epoch": 0.9517246421847295, "grad_norm": 6.814965314039511, "learning_rate": 6.102029018271227e-08, "loss": 17.5058, "step": 52066 }, { "epoch": 0.9517429213811761, "grad_norm": 6.904633733553235, "learning_rate": 6.097419398193361e-08, "loss": 17.2149, "step": 52067 }, { "epoch": 0.9517612005776226, "grad_norm": 8.744037476377157, "learning_rate": 6.092811509201157e-08, "loss": 17.6268, "step": 52068 }, { "epoch": 0.9517794797740692, "grad_norm": 6.343151568961638, "learning_rate": 6.088205351310771e-08, "loss": 17.3232, "step": 52069 }, { "epoch": 0.9517977589705157, "grad_norm": 5.484382013903288, "learning_rate": 6.083600924538413e-08, "loss": 17.1553, "step": 52070 }, { "epoch": 0.9518160381669621, "grad_norm": 6.24355360940015, "learning_rate": 6.078998228900124e-08, "loss": 17.4052, "step": 52071 }, { "epoch": 0.9518343173634087, "grad_norm": 6.068845047841402, "learning_rate": 6.074397264412169e-08, "loss": 16.8777, "step": 52072 }, { "epoch": 0.9518525965598552, "grad_norm": 12.212482203114368, "learning_rate": 6.069798031090479e-08, "loss": 18.8529, "step": 52073 }, { "epoch": 0.9518708757563018, "grad_norm": 6.120068605022587, "learning_rate": 6.06520052895132e-08, "loss": 17.4636, "step": 52074 }, { "epoch": 0.9518891549527483, "grad_norm": 6.1931960273201465, "learning_rate": 6.060604758010847e-08, "loss": 17.3924, "step": 52075 }, { "epoch": 0.9519074341491948, "grad_norm": 6.686434360683629, "learning_rate": 6.056010718284987e-08, "loss": 17.4403, "step": 52076 }, { "epoch": 0.9519257133456414, "grad_norm": 6.772029370759345, "learning_rate": 6.051418409790011e-08, "loss": 17.6761, "step": 52077 }, { "epoch": 0.9519439925420878, "grad_norm": 5.413692105335451, "learning_rate": 6.046827832541957e-08, "loss": 17.0345, "step": 52078 }, { "epoch": 0.9519622717385344, "grad_norm": 7.010723472914114, "learning_rate": 6.042238986556869e-08, "loss": 17.7206, "step": 52079 }, { "epoch": 0.9519805509349809, "grad_norm": 6.053163949099243, "learning_rate": 6.037651871850902e-08, "loss": 17.2848, "step": 52080 }, { "epoch": 0.9519988301314274, "grad_norm": 6.294618352568674, "learning_rate": 6.033066488440098e-08, "loss": 17.4037, "step": 52081 }, { "epoch": 0.952017109327874, "grad_norm": 7.664950923469784, "learning_rate": 6.028482836340499e-08, "loss": 18.0317, "step": 52082 }, { "epoch": 0.9520353885243205, "grad_norm": 5.525417111113435, "learning_rate": 6.023900915568204e-08, "loss": 17.0755, "step": 52083 }, { "epoch": 0.952053667720767, "grad_norm": 5.7603744489494675, "learning_rate": 6.019320726139255e-08, "loss": 17.321, "step": 52084 }, { "epoch": 0.9520719469172135, "grad_norm": 7.266210670748653, "learning_rate": 6.014742268069807e-08, "loss": 17.3945, "step": 52085 }, { "epoch": 0.95209022611366, "grad_norm": 5.540970504405444, "learning_rate": 6.01016554137579e-08, "loss": 17.0711, "step": 52086 }, { "epoch": 0.9521085053101066, "grad_norm": 4.9595958659769455, "learning_rate": 6.005590546073192e-08, "loss": 16.8025, "step": 52087 }, { "epoch": 0.9521267845065531, "grad_norm": 6.701122774998857, "learning_rate": 6.001017282178223e-08, "loss": 17.7364, "step": 52088 }, { "epoch": 0.9521450637029996, "grad_norm": 5.574683731914439, "learning_rate": 5.996445749706814e-08, "loss": 17.4144, "step": 52089 }, { "epoch": 0.9521633428994462, "grad_norm": 6.037306404113845, "learning_rate": 5.991875948674952e-08, "loss": 17.3175, "step": 52090 }, { "epoch": 0.9521816220958926, "grad_norm": 7.4015824795623075, "learning_rate": 5.987307879098736e-08, "loss": 17.8868, "step": 52091 }, { "epoch": 0.9521999012923392, "grad_norm": 5.047436106747385, "learning_rate": 5.982741540994096e-08, "loss": 16.9851, "step": 52092 }, { "epoch": 0.9522181804887857, "grad_norm": 6.281332812419464, "learning_rate": 5.978176934377133e-08, "loss": 17.2269, "step": 52093 }, { "epoch": 0.9522364596852322, "grad_norm": 9.163079065800956, "learning_rate": 5.973614059263778e-08, "loss": 17.9024, "step": 52094 }, { "epoch": 0.9522547388816788, "grad_norm": 5.728344580402286, "learning_rate": 5.969052915670016e-08, "loss": 17.1841, "step": 52095 }, { "epoch": 0.9522730180781253, "grad_norm": 5.824750499462367, "learning_rate": 5.964493503611835e-08, "loss": 17.3024, "step": 52096 }, { "epoch": 0.9522912972745718, "grad_norm": 7.085780796766762, "learning_rate": 5.9599358231052785e-08, "loss": 17.5801, "step": 52097 }, { "epoch": 0.9523095764710183, "grad_norm": 7.288096939394853, "learning_rate": 5.9553798741662783e-08, "loss": 17.5375, "step": 52098 }, { "epoch": 0.9523278556674648, "grad_norm": 5.0448187139736325, "learning_rate": 5.950825656810766e-08, "loss": 16.9679, "step": 52099 }, { "epoch": 0.9523461348639114, "grad_norm": 5.917838246367013, "learning_rate": 5.9462731710547284e-08, "loss": 17.2007, "step": 52100 }, { "epoch": 0.9523644140603579, "grad_norm": 5.5990977126072705, "learning_rate": 5.941722416914153e-08, "loss": 16.8151, "step": 52101 }, { "epoch": 0.9523826932568045, "grad_norm": 7.053504538523284, "learning_rate": 5.937173394404916e-08, "loss": 17.2178, "step": 52102 }, { "epoch": 0.952400972453251, "grad_norm": 6.569595135142911, "learning_rate": 5.9326261035431155e-08, "loss": 17.5958, "step": 52103 }, { "epoch": 0.9524192516496974, "grad_norm": 6.022369250783295, "learning_rate": 5.928080544344517e-08, "loss": 17.6626, "step": 52104 }, { "epoch": 0.952437530846144, "grad_norm": 7.746371136593423, "learning_rate": 5.923536716825162e-08, "loss": 17.7563, "step": 52105 }, { "epoch": 0.9524558100425905, "grad_norm": 5.6422121227405935, "learning_rate": 5.9189946210009284e-08, "loss": 17.1679, "step": 52106 }, { "epoch": 0.9524740892390371, "grad_norm": 5.799061632888599, "learning_rate": 5.914454256887692e-08, "loss": 17.097, "step": 52107 }, { "epoch": 0.9524923684354836, "grad_norm": 6.322173867994052, "learning_rate": 5.909915624501494e-08, "loss": 17.2513, "step": 52108 }, { "epoch": 0.95251064763193, "grad_norm": 7.059893203435548, "learning_rate": 5.9053787238581575e-08, "loss": 17.6937, "step": 52109 }, { "epoch": 0.9525289268283766, "grad_norm": 5.625496218961227, "learning_rate": 5.900843554973501e-08, "loss": 17.1722, "step": 52110 }, { "epoch": 0.9525472060248231, "grad_norm": 6.341058244244397, "learning_rate": 5.896310117863569e-08, "loss": 17.4564, "step": 52111 }, { "epoch": 0.9525654852212697, "grad_norm": 5.914372088762858, "learning_rate": 5.891778412544236e-08, "loss": 17.0006, "step": 52112 }, { "epoch": 0.9525837644177162, "grad_norm": 7.139915933999204, "learning_rate": 5.887248439031268e-08, "loss": 17.6906, "step": 52113 }, { "epoch": 0.9526020436141627, "grad_norm": 6.078495423443999, "learning_rate": 5.8827201973407076e-08, "loss": 17.287, "step": 52114 }, { "epoch": 0.9526203228106093, "grad_norm": 5.734225598540663, "learning_rate": 5.878193687488265e-08, "loss": 17.1705, "step": 52115 }, { "epoch": 0.9526386020070557, "grad_norm": 5.349183454962423, "learning_rate": 5.873668909489927e-08, "loss": 16.9669, "step": 52116 }, { "epoch": 0.9526568812035023, "grad_norm": 5.097522686911859, "learning_rate": 5.8691458633614585e-08, "loss": 17.0344, "step": 52117 }, { "epoch": 0.9526751603999488, "grad_norm": 6.889255223017086, "learning_rate": 5.864624549118736e-08, "loss": 17.2524, "step": 52118 }, { "epoch": 0.9526934395963953, "grad_norm": 6.047086744703629, "learning_rate": 5.860104966777691e-08, "loss": 17.3726, "step": 52119 }, { "epoch": 0.9527117187928419, "grad_norm": 7.179208910902449, "learning_rate": 5.855587116354034e-08, "loss": 17.8013, "step": 52120 }, { "epoch": 0.9527299979892884, "grad_norm": 6.672289510529842, "learning_rate": 5.851070997863695e-08, "loss": 17.2309, "step": 52121 }, { "epoch": 0.952748277185735, "grad_norm": 7.057693613445172, "learning_rate": 5.846556611322496e-08, "loss": 17.1979, "step": 52122 }, { "epoch": 0.9527665563821814, "grad_norm": 6.930219172907007, "learning_rate": 5.8420439567462014e-08, "loss": 17.4659, "step": 52123 }, { "epoch": 0.9527848355786279, "grad_norm": 7.148394032986808, "learning_rate": 5.837533034150633e-08, "loss": 17.7581, "step": 52124 }, { "epoch": 0.9528031147750745, "grad_norm": 7.541792385845969, "learning_rate": 5.833023843551722e-08, "loss": 17.9541, "step": 52125 }, { "epoch": 0.952821393971521, "grad_norm": 8.562839451282288, "learning_rate": 5.8285163849651217e-08, "loss": 18.1122, "step": 52126 }, { "epoch": 0.9528396731679676, "grad_norm": 7.521179101656112, "learning_rate": 5.8240106584067093e-08, "loss": 17.4214, "step": 52127 }, { "epoch": 0.9528579523644141, "grad_norm": 7.3181056473534465, "learning_rate": 5.8195066638921936e-08, "loss": 17.4031, "step": 52128 }, { "epoch": 0.9528762315608605, "grad_norm": 7.386158681174554, "learning_rate": 5.815004401437507e-08, "loss": 17.8712, "step": 52129 }, { "epoch": 0.9528945107573071, "grad_norm": 6.383312192198071, "learning_rate": 5.810503871058304e-08, "loss": 17.2535, "step": 52130 }, { "epoch": 0.9529127899537536, "grad_norm": 6.399856266943763, "learning_rate": 5.8060050727704045e-08, "loss": 17.1546, "step": 52131 }, { "epoch": 0.9529310691502002, "grad_norm": 7.462095621572373, "learning_rate": 5.8015080065895735e-08, "loss": 17.5273, "step": 52132 }, { "epoch": 0.9529493483466467, "grad_norm": 6.475452977350866, "learning_rate": 5.7970126725315765e-08, "loss": 16.9689, "step": 52133 }, { "epoch": 0.9529676275430932, "grad_norm": 6.594498111712934, "learning_rate": 5.7925190706121795e-08, "loss": 17.459, "step": 52134 }, { "epoch": 0.9529859067395398, "grad_norm": 6.696952920179183, "learning_rate": 5.7880272008470905e-08, "loss": 17.4369, "step": 52135 }, { "epoch": 0.9530041859359862, "grad_norm": 6.853803307342909, "learning_rate": 5.783537063252076e-08, "loss": 17.1976, "step": 52136 }, { "epoch": 0.9530224651324328, "grad_norm": 5.141346408481319, "learning_rate": 5.7790486578429005e-08, "loss": 16.95, "step": 52137 }, { "epoch": 0.9530407443288793, "grad_norm": 6.593343524101941, "learning_rate": 5.774561984635274e-08, "loss": 17.2307, "step": 52138 }, { "epoch": 0.9530590235253258, "grad_norm": 6.142483176984717, "learning_rate": 5.770077043644906e-08, "loss": 17.3956, "step": 52139 }, { "epoch": 0.9530773027217724, "grad_norm": 6.330388727014225, "learning_rate": 5.7655938348875616e-08, "loss": 17.3574, "step": 52140 }, { "epoch": 0.9530955819182189, "grad_norm": 5.381861135554727, "learning_rate": 5.761112358378895e-08, "loss": 16.8396, "step": 52141 }, { "epoch": 0.9531138611146655, "grad_norm": 8.042161561626758, "learning_rate": 5.756632614134616e-08, "loss": 17.3948, "step": 52142 }, { "epoch": 0.9531321403111119, "grad_norm": 5.154167463885194, "learning_rate": 5.7521546021705456e-08, "loss": 16.9262, "step": 52143 }, { "epoch": 0.9531504195075584, "grad_norm": 6.620352960033755, "learning_rate": 5.747678322502171e-08, "loss": 17.3659, "step": 52144 }, { "epoch": 0.953168698704005, "grad_norm": 5.793966106970675, "learning_rate": 5.743203775145367e-08, "loss": 17.1546, "step": 52145 }, { "epoch": 0.9531869779004515, "grad_norm": 6.658422545267307, "learning_rate": 5.738730960115679e-08, "loss": 17.724, "step": 52146 }, { "epoch": 0.9532052570968981, "grad_norm": 7.761213396525644, "learning_rate": 5.734259877428927e-08, "loss": 17.9866, "step": 52147 }, { "epoch": 0.9532235362933446, "grad_norm": 7.648150334614118, "learning_rate": 5.729790527100654e-08, "loss": 17.4584, "step": 52148 }, { "epoch": 0.953241815489791, "grad_norm": 5.7496032269147435, "learning_rate": 5.725322909146569e-08, "loss": 16.9931, "step": 52149 }, { "epoch": 0.9532600946862376, "grad_norm": 5.339117309938594, "learning_rate": 5.720857023582382e-08, "loss": 17.0565, "step": 52150 }, { "epoch": 0.9532783738826841, "grad_norm": 7.490529187601663, "learning_rate": 5.716392870423693e-08, "loss": 17.513, "step": 52151 }, { "epoch": 0.9532966530791307, "grad_norm": 5.827195914099974, "learning_rate": 5.7119304496860985e-08, "loss": 17.2473, "step": 52152 }, { "epoch": 0.9533149322755772, "grad_norm": 5.5777695920280195, "learning_rate": 5.707469761385365e-08, "loss": 16.8603, "step": 52153 }, { "epoch": 0.9533332114720237, "grad_norm": 4.974845140194684, "learning_rate": 5.703010805537035e-08, "loss": 16.8411, "step": 52154 }, { "epoch": 0.9533514906684702, "grad_norm": 5.591808568456374, "learning_rate": 5.698553582156707e-08, "loss": 17.2503, "step": 52155 }, { "epoch": 0.9533697698649167, "grad_norm": 5.062389369752266, "learning_rate": 5.694098091260092e-08, "loss": 16.8843, "step": 52156 }, { "epoch": 0.9533880490613632, "grad_norm": 6.359799658253176, "learning_rate": 5.6896443328627873e-08, "loss": 17.2078, "step": 52157 }, { "epoch": 0.9534063282578098, "grad_norm": 6.221213724150196, "learning_rate": 5.685192306980336e-08, "loss": 17.5431, "step": 52158 }, { "epoch": 0.9534246074542563, "grad_norm": 5.612965500222642, "learning_rate": 5.6807420136284486e-08, "loss": 16.8209, "step": 52159 }, { "epoch": 0.9534428866507029, "grad_norm": 5.747944823280264, "learning_rate": 5.6762934528226676e-08, "loss": 17.0822, "step": 52160 }, { "epoch": 0.9534611658471494, "grad_norm": 5.534561149220877, "learning_rate": 5.6718466245785365e-08, "loss": 16.9576, "step": 52161 }, { "epoch": 0.9534794450435958, "grad_norm": 6.172600035910091, "learning_rate": 5.6674015289117647e-08, "loss": 17.1931, "step": 52162 }, { "epoch": 0.9534977242400424, "grad_norm": 7.702625197662258, "learning_rate": 5.662958165837728e-08, "loss": 17.6563, "step": 52163 }, { "epoch": 0.9535160034364889, "grad_norm": 5.675837517222033, "learning_rate": 5.658516535372249e-08, "loss": 16.9912, "step": 52164 }, { "epoch": 0.9535342826329355, "grad_norm": 5.747399446402414, "learning_rate": 5.654076637530703e-08, "loss": 17.1522, "step": 52165 }, { "epoch": 0.953552561829382, "grad_norm": 6.068469392447594, "learning_rate": 5.649638472328689e-08, "loss": 17.6105, "step": 52166 }, { "epoch": 0.9535708410258285, "grad_norm": 5.674469639526755, "learning_rate": 5.645202039781861e-08, "loss": 17.1463, "step": 52167 }, { "epoch": 0.953589120222275, "grad_norm": 6.856642515881532, "learning_rate": 5.6407673399057063e-08, "loss": 17.7009, "step": 52168 }, { "epoch": 0.9536073994187215, "grad_norm": 5.780014656213565, "learning_rate": 5.6363343727157126e-08, "loss": 17.1228, "step": 52169 }, { "epoch": 0.9536256786151681, "grad_norm": 6.356036153421756, "learning_rate": 5.631903138227535e-08, "loss": 17.2904, "step": 52170 }, { "epoch": 0.9536439578116146, "grad_norm": 4.608934303739825, "learning_rate": 5.627473636456604e-08, "loss": 16.851, "step": 52171 }, { "epoch": 0.9536622370080611, "grad_norm": 7.94314520244154, "learning_rate": 5.623045867418464e-08, "loss": 17.1322, "step": 52172 }, { "epoch": 0.9536805162045077, "grad_norm": 5.662550159359293, "learning_rate": 5.6186198311286575e-08, "loss": 17.1728, "step": 52173 }, { "epoch": 0.9536987954009541, "grad_norm": 7.379582730454664, "learning_rate": 5.6141955276026726e-08, "loss": 17.6995, "step": 52174 }, { "epoch": 0.9537170745974007, "grad_norm": 4.905120268828955, "learning_rate": 5.609772956856052e-08, "loss": 16.8335, "step": 52175 }, { "epoch": 0.9537353537938472, "grad_norm": 6.282352750051201, "learning_rate": 5.6053521189042835e-08, "loss": 16.9403, "step": 52176 }, { "epoch": 0.9537536329902937, "grad_norm": 5.583413404736408, "learning_rate": 5.600933013762855e-08, "loss": 17.0814, "step": 52177 }, { "epoch": 0.9537719121867403, "grad_norm": 7.170960493530532, "learning_rate": 5.596515641447253e-08, "loss": 17.7023, "step": 52178 }, { "epoch": 0.9537901913831868, "grad_norm": 6.140223618886253, "learning_rate": 5.592100001972967e-08, "loss": 17.5608, "step": 52179 }, { "epoch": 0.9538084705796334, "grad_norm": 7.601276505620235, "learning_rate": 5.587686095355427e-08, "loss": 17.4114, "step": 52180 }, { "epoch": 0.9538267497760798, "grad_norm": 6.115531935798765, "learning_rate": 5.583273921610177e-08, "loss": 17.2565, "step": 52181 }, { "epoch": 0.9538450289725263, "grad_norm": 6.430415530214452, "learning_rate": 5.57886348075265e-08, "loss": 17.4254, "step": 52182 }, { "epoch": 0.9538633081689729, "grad_norm": 5.461041935457914, "learning_rate": 5.574454772798277e-08, "loss": 17.1871, "step": 52183 }, { "epoch": 0.9538815873654194, "grad_norm": 7.62710782756409, "learning_rate": 5.5700477977625455e-08, "loss": 17.4799, "step": 52184 }, { "epoch": 0.953899866561866, "grad_norm": 6.7286524831074175, "learning_rate": 5.5656425556608887e-08, "loss": 17.0635, "step": 52185 }, { "epoch": 0.9539181457583125, "grad_norm": 7.285184560959151, "learning_rate": 5.5612390465087376e-08, "loss": 17.3984, "step": 52186 }, { "epoch": 0.9539364249547589, "grad_norm": 6.597048054873633, "learning_rate": 5.55683727032158e-08, "loss": 17.2152, "step": 52187 }, { "epoch": 0.9539547041512055, "grad_norm": 6.89713122743059, "learning_rate": 5.552437227114793e-08, "loss": 17.3943, "step": 52188 }, { "epoch": 0.953972983347652, "grad_norm": 6.3240027605448095, "learning_rate": 5.548038916903808e-08, "loss": 17.18, "step": 52189 }, { "epoch": 0.9539912625440986, "grad_norm": 4.569315861702104, "learning_rate": 5.543642339704003e-08, "loss": 16.6324, "step": 52190 }, { "epoch": 0.9540095417405451, "grad_norm": 7.5229969462856445, "learning_rate": 5.539247495530864e-08, "loss": 17.5531, "step": 52191 }, { "epoch": 0.9540278209369916, "grad_norm": 8.753460639595396, "learning_rate": 5.5348543843997126e-08, "loss": 17.8856, "step": 52192 }, { "epoch": 0.9540461001334382, "grad_norm": 6.935658715443515, "learning_rate": 5.530463006326037e-08, "loss": 17.5858, "step": 52193 }, { "epoch": 0.9540643793298846, "grad_norm": 6.6693403659097195, "learning_rate": 5.526073361325102e-08, "loss": 17.2494, "step": 52194 }, { "epoch": 0.9540826585263312, "grad_norm": 7.783469019119279, "learning_rate": 5.521685449412451e-08, "loss": 17.7788, "step": 52195 }, { "epoch": 0.9541009377227777, "grad_norm": 6.555488761978388, "learning_rate": 5.517299270603405e-08, "loss": 17.4572, "step": 52196 }, { "epoch": 0.9541192169192242, "grad_norm": 5.972659923773566, "learning_rate": 5.512914824913229e-08, "loss": 16.9509, "step": 52197 }, { "epoch": 0.9541374961156708, "grad_norm": 5.641714542379266, "learning_rate": 5.5085321123574675e-08, "loss": 17.2306, "step": 52198 }, { "epoch": 0.9541557753121173, "grad_norm": 5.928453952145993, "learning_rate": 5.50415113295133e-08, "loss": 17.5353, "step": 52199 }, { "epoch": 0.9541740545085639, "grad_norm": 5.978772992854726, "learning_rate": 5.4997718867102476e-08, "loss": 17.0863, "step": 52200 }, { "epoch": 0.9541923337050103, "grad_norm": 6.808371577669975, "learning_rate": 5.495394373649543e-08, "loss": 17.3204, "step": 52201 }, { "epoch": 0.9542106129014568, "grad_norm": 7.00212795649344, "learning_rate": 5.491018593784592e-08, "loss": 17.5228, "step": 52202 }, { "epoch": 0.9542288920979034, "grad_norm": 5.064277659711942, "learning_rate": 5.48664454713066e-08, "loss": 16.9787, "step": 52203 }, { "epoch": 0.9542471712943499, "grad_norm": 7.3131413286543, "learning_rate": 5.482272233703179e-08, "loss": 17.5462, "step": 52204 }, { "epoch": 0.9542654504907965, "grad_norm": 5.507718505451303, "learning_rate": 5.4779016535174146e-08, "loss": 16.9746, "step": 52205 }, { "epoch": 0.954283729687243, "grad_norm": 6.185450401587581, "learning_rate": 5.473532806588689e-08, "loss": 17.2659, "step": 52206 }, { "epoch": 0.9543020088836894, "grad_norm": 6.004149557895033, "learning_rate": 5.469165692932321e-08, "loss": 17.3189, "step": 52207 }, { "epoch": 0.954320288080136, "grad_norm": 7.623659679312569, "learning_rate": 5.464800312563578e-08, "loss": 17.8449, "step": 52208 }, { "epoch": 0.9543385672765825, "grad_norm": 8.032732204540569, "learning_rate": 5.460436665497837e-08, "loss": 17.5915, "step": 52209 }, { "epoch": 0.9543568464730291, "grad_norm": 5.575667444655538, "learning_rate": 5.456074751750307e-08, "loss": 17.1128, "step": 52210 }, { "epoch": 0.9543751256694756, "grad_norm": 7.760108505392089, "learning_rate": 5.451714571336364e-08, "loss": 17.7187, "step": 52211 }, { "epoch": 0.9543934048659221, "grad_norm": 6.287963837864757, "learning_rate": 5.447356124271219e-08, "loss": 17.4737, "step": 52212 }, { "epoch": 0.9544116840623686, "grad_norm": 6.488163255603743, "learning_rate": 5.442999410570249e-08, "loss": 17.2264, "step": 52213 }, { "epoch": 0.9544299632588151, "grad_norm": 6.567807387579522, "learning_rate": 5.438644430248552e-08, "loss": 17.571, "step": 52214 }, { "epoch": 0.9544482424552617, "grad_norm": 7.9370692454245635, "learning_rate": 5.4342911833215604e-08, "loss": 17.1253, "step": 52215 }, { "epoch": 0.9544665216517082, "grad_norm": 6.034268752961473, "learning_rate": 5.4299396698044295e-08, "loss": 17.0726, "step": 52216 }, { "epoch": 0.9544848008481547, "grad_norm": 5.649391659873478, "learning_rate": 5.425589889712424e-08, "loss": 17.156, "step": 52217 }, { "epoch": 0.9545030800446013, "grad_norm": 8.806319830676108, "learning_rate": 5.421241843060865e-08, "loss": 17.9558, "step": 52218 }, { "epoch": 0.9545213592410478, "grad_norm": 5.30526595379786, "learning_rate": 5.416895529864852e-08, "loss": 16.7658, "step": 52219 }, { "epoch": 0.9545396384374943, "grad_norm": 6.087250360744819, "learning_rate": 5.412550950139761e-08, "loss": 17.2719, "step": 52220 }, { "epoch": 0.9545579176339408, "grad_norm": 6.713821352993216, "learning_rate": 5.408208103900747e-08, "loss": 17.2296, "step": 52221 }, { "epoch": 0.9545761968303873, "grad_norm": 5.738519007280106, "learning_rate": 5.40386699116302e-08, "loss": 16.875, "step": 52222 }, { "epoch": 0.9545944760268339, "grad_norm": 6.917897002337695, "learning_rate": 5.3995276119419016e-08, "loss": 17.5668, "step": 52223 }, { "epoch": 0.9546127552232804, "grad_norm": 4.892680304437527, "learning_rate": 5.3951899662524895e-08, "loss": 16.8684, "step": 52224 }, { "epoch": 0.9546310344197269, "grad_norm": 6.393151601892703, "learning_rate": 5.39085405410994e-08, "loss": 17.3711, "step": 52225 }, { "epoch": 0.9546493136161734, "grad_norm": 8.990732376378523, "learning_rate": 5.386519875529628e-08, "loss": 18.0157, "step": 52226 }, { "epoch": 0.9546675928126199, "grad_norm": 6.717963044680023, "learning_rate": 5.382187430526597e-08, "loss": 17.423, "step": 52227 }, { "epoch": 0.9546858720090665, "grad_norm": 6.403360883883352, "learning_rate": 5.377856719116059e-08, "loss": 17.3467, "step": 52228 }, { "epoch": 0.954704151205513, "grad_norm": 5.945032902997414, "learning_rate": 5.3735277413132224e-08, "loss": 17.3467, "step": 52229 }, { "epoch": 0.9547224304019595, "grad_norm": 6.1023004093859985, "learning_rate": 5.369200497133242e-08, "loss": 17.2971, "step": 52230 }, { "epoch": 0.9547407095984061, "grad_norm": 4.809510989941846, "learning_rate": 5.364874986591329e-08, "loss": 16.7888, "step": 52231 }, { "epoch": 0.9547589887948525, "grad_norm": 6.353385634412626, "learning_rate": 5.3605512097025805e-08, "loss": 17.1569, "step": 52232 }, { "epoch": 0.9547772679912991, "grad_norm": 5.9275672092789335, "learning_rate": 5.3562291664822075e-08, "loss": 17.1003, "step": 52233 }, { "epoch": 0.9547955471877456, "grad_norm": 5.856847761384628, "learning_rate": 5.351908856945254e-08, "loss": 17.1535, "step": 52234 }, { "epoch": 0.9548138263841921, "grad_norm": 6.913905431093957, "learning_rate": 5.34759028110704e-08, "loss": 17.3044, "step": 52235 }, { "epoch": 0.9548321055806387, "grad_norm": 5.918140601263954, "learning_rate": 5.343273438982555e-08, "loss": 17.0987, "step": 52236 }, { "epoch": 0.9548503847770852, "grad_norm": 5.791155419364004, "learning_rate": 5.3389583305868964e-08, "loss": 17.2117, "step": 52237 }, { "epoch": 0.9548686639735318, "grad_norm": 6.1441284194217385, "learning_rate": 5.334644955935331e-08, "loss": 17.4607, "step": 52238 }, { "epoch": 0.9548869431699782, "grad_norm": 6.729614968292356, "learning_rate": 5.3303333150429016e-08, "loss": 17.4863, "step": 52239 }, { "epoch": 0.9549052223664247, "grad_norm": 6.562977887507747, "learning_rate": 5.3260234079247076e-08, "loss": 17.5111, "step": 52240 }, { "epoch": 0.9549235015628713, "grad_norm": 7.199897622492473, "learning_rate": 5.321715234595959e-08, "loss": 17.635, "step": 52241 }, { "epoch": 0.9549417807593178, "grad_norm": 5.55197289306934, "learning_rate": 5.317408795071588e-08, "loss": 17.0482, "step": 52242 }, { "epoch": 0.9549600599557644, "grad_norm": 5.64677100384571, "learning_rate": 5.3131040893668054e-08, "loss": 16.9714, "step": 52243 }, { "epoch": 0.9549783391522109, "grad_norm": 6.550978449150148, "learning_rate": 5.308801117496654e-08, "loss": 17.5582, "step": 52244 }, { "epoch": 0.9549966183486573, "grad_norm": 7.771520532402159, "learning_rate": 5.304499879476233e-08, "loss": 17.6255, "step": 52245 }, { "epoch": 0.9550148975451039, "grad_norm": 8.693770296219066, "learning_rate": 5.300200375320641e-08, "loss": 17.7804, "step": 52246 }, { "epoch": 0.9550331767415504, "grad_norm": 5.2341844338278385, "learning_rate": 5.2959026050448667e-08, "loss": 17.0958, "step": 52247 }, { "epoch": 0.955051455937997, "grad_norm": 6.959708994071204, "learning_rate": 5.291606568664009e-08, "loss": 17.5206, "step": 52248 }, { "epoch": 0.9550697351344435, "grad_norm": 5.631571060666098, "learning_rate": 5.287312266193223e-08, "loss": 16.9595, "step": 52249 }, { "epoch": 0.95508801433089, "grad_norm": 4.636073084641776, "learning_rate": 5.2830196976474405e-08, "loss": 16.815, "step": 52250 }, { "epoch": 0.9551062935273366, "grad_norm": 6.316988665803342, "learning_rate": 5.278728863041704e-08, "loss": 17.25, "step": 52251 }, { "epoch": 0.955124572723783, "grad_norm": 5.834153942560149, "learning_rate": 5.27443976239117e-08, "loss": 17.1549, "step": 52252 }, { "epoch": 0.9551428519202296, "grad_norm": 5.7554996650546535, "learning_rate": 5.270152395710715e-08, "loss": 17.104, "step": 52253 }, { "epoch": 0.9551611311166761, "grad_norm": 5.987962228491177, "learning_rate": 5.2658667630154924e-08, "loss": 17.3993, "step": 52254 }, { "epoch": 0.9551794103131226, "grad_norm": 7.966701732645587, "learning_rate": 5.2615828643204915e-08, "loss": 17.6208, "step": 52255 }, { "epoch": 0.9551976895095692, "grad_norm": 7.257942717583455, "learning_rate": 5.2573006996406995e-08, "loss": 17.3633, "step": 52256 }, { "epoch": 0.9552159687060157, "grad_norm": 5.458883615257778, "learning_rate": 5.253020268991105e-08, "loss": 16.9553, "step": 52257 }, { "epoch": 0.9552342479024623, "grad_norm": 6.279422354173808, "learning_rate": 5.248741572386751e-08, "loss": 17.4096, "step": 52258 }, { "epoch": 0.9552525270989087, "grad_norm": 5.5726437357361185, "learning_rate": 5.244464609842626e-08, "loss": 16.9633, "step": 52259 }, { "epoch": 0.9552708062953552, "grad_norm": 7.38148995764287, "learning_rate": 5.240189381373717e-08, "loss": 17.4909, "step": 52260 }, { "epoch": 0.9552890854918018, "grad_norm": 8.972260205636674, "learning_rate": 5.235915886995069e-08, "loss": 18.9416, "step": 52261 }, { "epoch": 0.9553073646882483, "grad_norm": 5.88644913909289, "learning_rate": 5.231644126721502e-08, "loss": 16.981, "step": 52262 }, { "epoch": 0.9553256438846949, "grad_norm": 5.16398289919297, "learning_rate": 5.227374100568172e-08, "loss": 17.0071, "step": 52263 }, { "epoch": 0.9553439230811414, "grad_norm": 7.40429880386952, "learning_rate": 5.2231058085499e-08, "loss": 17.8914, "step": 52264 }, { "epoch": 0.9553622022775878, "grad_norm": 5.70833369082774, "learning_rate": 5.218839250681729e-08, "loss": 17.2495, "step": 52265 }, { "epoch": 0.9553804814740344, "grad_norm": 6.916730058244276, "learning_rate": 5.2145744269786466e-08, "loss": 17.7706, "step": 52266 }, { "epoch": 0.9553987606704809, "grad_norm": 5.581151651163972, "learning_rate": 5.210311337455476e-08, "loss": 17.0674, "step": 52267 }, { "epoch": 0.9554170398669275, "grad_norm": 5.27571102515712, "learning_rate": 5.206049982127259e-08, "loss": 17.321, "step": 52268 }, { "epoch": 0.955435319063374, "grad_norm": 7.234393758863205, "learning_rate": 5.201790361008929e-08, "loss": 17.9752, "step": 52269 }, { "epoch": 0.9554535982598205, "grad_norm": 7.323275651701272, "learning_rate": 5.197532474115308e-08, "loss": 17.8563, "step": 52270 }, { "epoch": 0.955471877456267, "grad_norm": 5.618585903411751, "learning_rate": 5.193276321461438e-08, "loss": 17.1082, "step": 52271 }, { "epoch": 0.9554901566527135, "grad_norm": 7.123461296912219, "learning_rate": 5.189021903062252e-08, "loss": 17.6634, "step": 52272 }, { "epoch": 0.9555084358491601, "grad_norm": 5.96887612542735, "learning_rate": 5.1847692189325175e-08, "loss": 17.2366, "step": 52273 }, { "epoch": 0.9555267150456066, "grad_norm": 5.6147588329012015, "learning_rate": 5.180518269087276e-08, "loss": 17.1559, "step": 52274 }, { "epoch": 0.9555449942420531, "grad_norm": 7.721110609053905, "learning_rate": 5.1762690535413495e-08, "loss": 17.7675, "step": 52275 }, { "epoch": 0.9555632734384997, "grad_norm": 6.33630883791198, "learning_rate": 5.172021572309671e-08, "loss": 17.1492, "step": 52276 }, { "epoch": 0.9555815526349462, "grad_norm": 7.05993029336774, "learning_rate": 5.167775825407173e-08, "loss": 17.9325, "step": 52277 }, { "epoch": 0.9555998318313927, "grad_norm": 6.000556617584791, "learning_rate": 5.163531812848621e-08, "loss": 17.3538, "step": 52278 }, { "epoch": 0.9556181110278392, "grad_norm": 6.497393548007953, "learning_rate": 5.159289534648948e-08, "loss": 17.6506, "step": 52279 }, { "epoch": 0.9556363902242857, "grad_norm": 6.205240968605086, "learning_rate": 5.155048990823086e-08, "loss": 17.4005, "step": 52280 }, { "epoch": 0.9556546694207323, "grad_norm": 7.322351670839708, "learning_rate": 5.150810181385801e-08, "loss": 17.6614, "step": 52281 }, { "epoch": 0.9556729486171788, "grad_norm": 7.094486579829761, "learning_rate": 5.146573106351971e-08, "loss": 17.7221, "step": 52282 }, { "epoch": 0.9556912278136254, "grad_norm": 6.445264317242786, "learning_rate": 5.142337765736416e-08, "loss": 17.5834, "step": 52283 }, { "epoch": 0.9557095070100718, "grad_norm": 6.5990947698946645, "learning_rate": 5.138104159554069e-08, "loss": 17.2488, "step": 52284 }, { "epoch": 0.9557277862065183, "grad_norm": 5.7633231663531115, "learning_rate": 5.1338722878197524e-08, "loss": 17.1599, "step": 52285 }, { "epoch": 0.9557460654029649, "grad_norm": 6.945196515207631, "learning_rate": 5.129642150548286e-08, "loss": 17.3823, "step": 52286 }, { "epoch": 0.9557643445994114, "grad_norm": 7.1307076124817845, "learning_rate": 5.1254137477544375e-08, "loss": 17.5301, "step": 52287 }, { "epoch": 0.955782623795858, "grad_norm": 5.656109731729442, "learning_rate": 5.121187079453083e-08, "loss": 17.1754, "step": 52288 }, { "epoch": 0.9558009029923045, "grad_norm": 5.459785318391293, "learning_rate": 5.116962145659043e-08, "loss": 17.3768, "step": 52289 }, { "epoch": 0.955819182188751, "grad_norm": 5.100416331174998, "learning_rate": 5.112738946387086e-08, "loss": 16.8504, "step": 52290 }, { "epoch": 0.9558374613851975, "grad_norm": 6.026411447898991, "learning_rate": 5.108517481652031e-08, "loss": 17.3554, "step": 52291 }, { "epoch": 0.955855740581644, "grad_norm": 5.741129524683492, "learning_rate": 5.104297751468645e-08, "loss": 17.2722, "step": 52292 }, { "epoch": 0.9558740197780905, "grad_norm": 7.58196621707162, "learning_rate": 5.1000797558517504e-08, "loss": 17.6385, "step": 52293 }, { "epoch": 0.9558922989745371, "grad_norm": 6.94724593044074, "learning_rate": 5.0958634948162225e-08, "loss": 17.3227, "step": 52294 }, { "epoch": 0.9559105781709836, "grad_norm": 6.177607445620359, "learning_rate": 5.091648968376661e-08, "loss": 17.0697, "step": 52295 }, { "epoch": 0.9559288573674302, "grad_norm": 5.056570404178011, "learning_rate": 5.087436176547944e-08, "loss": 16.7561, "step": 52296 }, { "epoch": 0.9559471365638766, "grad_norm": 6.660597988701852, "learning_rate": 5.083225119344837e-08, "loss": 17.0374, "step": 52297 }, { "epoch": 0.9559654157603231, "grad_norm": 4.62626486496444, "learning_rate": 5.0790157967820495e-08, "loss": 16.7466, "step": 52298 }, { "epoch": 0.9559836949567697, "grad_norm": 7.025087252547145, "learning_rate": 5.0748082088744044e-08, "loss": 17.3116, "step": 52299 }, { "epoch": 0.9560019741532162, "grad_norm": 7.240128420230771, "learning_rate": 5.0706023556366115e-08, "loss": 17.0542, "step": 52300 }, { "epoch": 0.9560202533496628, "grad_norm": 4.994629990931607, "learning_rate": 5.066398237083381e-08, "loss": 16.7996, "step": 52301 }, { "epoch": 0.9560385325461093, "grad_norm": 5.730086603407961, "learning_rate": 5.062195853229479e-08, "loss": 17.1271, "step": 52302 }, { "epoch": 0.9560568117425557, "grad_norm": 6.880246238804541, "learning_rate": 5.0579952040896165e-08, "loss": 17.3569, "step": 52303 }, { "epoch": 0.9560750909390023, "grad_norm": 6.137423120200472, "learning_rate": 5.0537962896785585e-08, "loss": 17.0222, "step": 52304 }, { "epoch": 0.9560933701354488, "grad_norm": 5.927734047722068, "learning_rate": 5.049599110010961e-08, "loss": 17.2984, "step": 52305 }, { "epoch": 0.9561116493318954, "grad_norm": 5.976965065391045, "learning_rate": 5.045403665101645e-08, "loss": 17.1414, "step": 52306 }, { "epoch": 0.9561299285283419, "grad_norm": 6.364570739043797, "learning_rate": 5.041209954965154e-08, "loss": 17.1664, "step": 52307 }, { "epoch": 0.9561482077247884, "grad_norm": 6.389839876889565, "learning_rate": 5.03701797961631e-08, "loss": 17.2653, "step": 52308 }, { "epoch": 0.956166486921235, "grad_norm": 8.95556262732119, "learning_rate": 5.032827739069768e-08, "loss": 17.5152, "step": 52309 }, { "epoch": 0.9561847661176814, "grad_norm": 5.921739490967006, "learning_rate": 5.0286392333401827e-08, "loss": 16.7808, "step": 52310 }, { "epoch": 0.956203045314128, "grad_norm": 5.7841373077481935, "learning_rate": 5.02445246244232e-08, "loss": 16.9666, "step": 52311 }, { "epoch": 0.9562213245105745, "grad_norm": 5.802147337381911, "learning_rate": 5.020267426390779e-08, "loss": 17.0361, "step": 52312 }, { "epoch": 0.956239603707021, "grad_norm": 5.922615756155846, "learning_rate": 5.0160841252002156e-08, "loss": 17.656, "step": 52313 }, { "epoch": 0.9562578829034676, "grad_norm": 6.153186054484972, "learning_rate": 5.01190255888534e-08, "loss": 17.4361, "step": 52314 }, { "epoch": 0.9562761620999141, "grad_norm": 6.344981256875926, "learning_rate": 5.0077227274608064e-08, "loss": 17.5835, "step": 52315 }, { "epoch": 0.9562944412963607, "grad_norm": 5.9557263511089085, "learning_rate": 5.003544630941215e-08, "loss": 17.411, "step": 52316 }, { "epoch": 0.9563127204928071, "grad_norm": 4.9459117000464765, "learning_rate": 4.9993682693413314e-08, "loss": 16.7929, "step": 52317 }, { "epoch": 0.9563309996892536, "grad_norm": 6.009321205505837, "learning_rate": 4.995193642675644e-08, "loss": 17.0074, "step": 52318 }, { "epoch": 0.9563492788857002, "grad_norm": 7.081494955260101, "learning_rate": 4.991020750958808e-08, "loss": 17.7444, "step": 52319 }, { "epoch": 0.9563675580821467, "grad_norm": 5.047110454556089, "learning_rate": 4.986849594205534e-08, "loss": 16.7589, "step": 52320 }, { "epoch": 0.9563858372785933, "grad_norm": 5.512772436952929, "learning_rate": 4.9826801724304206e-08, "loss": 16.7461, "step": 52321 }, { "epoch": 0.9564041164750398, "grad_norm": 8.17083212612177, "learning_rate": 4.9785124856480685e-08, "loss": 18.0522, "step": 52322 }, { "epoch": 0.9564223956714862, "grad_norm": 5.372290059221783, "learning_rate": 4.97434653387302e-08, "loss": 16.8496, "step": 52323 }, { "epoch": 0.9564406748679328, "grad_norm": 6.47080084976348, "learning_rate": 4.970182317119931e-08, "loss": 17.4579, "step": 52324 }, { "epoch": 0.9564589540643793, "grad_norm": 6.1130895863918004, "learning_rate": 4.966019835403457e-08, "loss": 17.2444, "step": 52325 }, { "epoch": 0.9564772332608259, "grad_norm": 6.6528325088570055, "learning_rate": 4.9618590887380856e-08, "loss": 17.4645, "step": 52326 }, { "epoch": 0.9564955124572724, "grad_norm": 6.132623251392789, "learning_rate": 4.9577000771384165e-08, "loss": 17.2891, "step": 52327 }, { "epoch": 0.9565137916537189, "grad_norm": 6.66127245541364, "learning_rate": 4.953542800619105e-08, "loss": 17.8667, "step": 52328 }, { "epoch": 0.9565320708501655, "grad_norm": 5.713122682471637, "learning_rate": 4.9493872591945824e-08, "loss": 16.8587, "step": 52329 }, { "epoch": 0.9565503500466119, "grad_norm": 6.086062787136669, "learning_rate": 4.945233452879616e-08, "loss": 17.4128, "step": 52330 }, { "epoch": 0.9565686292430585, "grad_norm": 7.5239706120641445, "learning_rate": 4.941081381688584e-08, "loss": 17.7366, "step": 52331 }, { "epoch": 0.956586908439505, "grad_norm": 4.2148778491917085, "learning_rate": 4.936931045636084e-08, "loss": 16.6026, "step": 52332 }, { "epoch": 0.9566051876359515, "grad_norm": 6.522908313740717, "learning_rate": 4.932782444736717e-08, "loss": 17.6255, "step": 52333 }, { "epoch": 0.9566234668323981, "grad_norm": 4.750671132006854, "learning_rate": 4.92863557900497e-08, "loss": 16.8428, "step": 52334 }, { "epoch": 0.9566417460288446, "grad_norm": 4.796845582835864, "learning_rate": 4.924490448455388e-08, "loss": 17.06, "step": 52335 }, { "epoch": 0.9566600252252911, "grad_norm": 6.581915379515236, "learning_rate": 4.92034705310257e-08, "loss": 17.451, "step": 52336 }, { "epoch": 0.9566783044217376, "grad_norm": 5.580206039995196, "learning_rate": 4.916205392960893e-08, "loss": 17.1222, "step": 52337 }, { "epoch": 0.9566965836181841, "grad_norm": 6.551444023727982, "learning_rate": 4.9120654680449574e-08, "loss": 17.3869, "step": 52338 }, { "epoch": 0.9567148628146307, "grad_norm": 5.405920658292906, "learning_rate": 4.907927278369307e-08, "loss": 17.1549, "step": 52339 }, { "epoch": 0.9567331420110772, "grad_norm": 5.390369158950326, "learning_rate": 4.903790823948429e-08, "loss": 17.0863, "step": 52340 }, { "epoch": 0.9567514212075238, "grad_norm": 4.6325839969827465, "learning_rate": 4.899656104796757e-08, "loss": 16.7894, "step": 52341 }, { "epoch": 0.9567697004039702, "grad_norm": 6.875191151776644, "learning_rate": 4.8955231209288356e-08, "loss": 17.4841, "step": 52342 }, { "epoch": 0.9567879796004167, "grad_norm": 7.178717054729031, "learning_rate": 4.8913918723591525e-08, "loss": 17.4015, "step": 52343 }, { "epoch": 0.9568062587968633, "grad_norm": 6.146534711956605, "learning_rate": 4.887262359102196e-08, "loss": 17.5246, "step": 52344 }, { "epoch": 0.9568245379933098, "grad_norm": 25.27504586956296, "learning_rate": 4.8831345811724e-08, "loss": 20.5359, "step": 52345 }, { "epoch": 0.9568428171897564, "grad_norm": 6.39018292316836, "learning_rate": 4.879008538584251e-08, "loss": 17.4896, "step": 52346 }, { "epoch": 0.9568610963862029, "grad_norm": 5.064908116856053, "learning_rate": 4.874884231352184e-08, "loss": 16.7918, "step": 52347 }, { "epoch": 0.9568793755826493, "grad_norm": 6.138352971996289, "learning_rate": 4.8707616594907415e-08, "loss": 17.1761, "step": 52348 }, { "epoch": 0.9568976547790959, "grad_norm": 5.463782243188115, "learning_rate": 4.8666408230142456e-08, "loss": 17.137, "step": 52349 }, { "epoch": 0.9569159339755424, "grad_norm": 7.332557812748221, "learning_rate": 4.8625217219372415e-08, "loss": 17.6232, "step": 52350 }, { "epoch": 0.956934213171989, "grad_norm": 7.424313068784862, "learning_rate": 4.85840435627416e-08, "loss": 17.6801, "step": 52351 }, { "epoch": 0.9569524923684355, "grad_norm": 6.1308524402845395, "learning_rate": 4.8542887260393804e-08, "loss": 17.5484, "step": 52352 }, { "epoch": 0.956970771564882, "grad_norm": 6.257925889538529, "learning_rate": 4.8501748312473894e-08, "loss": 17.2811, "step": 52353 }, { "epoch": 0.9569890507613286, "grad_norm": 5.936184531066679, "learning_rate": 4.84606267191251e-08, "loss": 17.0006, "step": 52354 }, { "epoch": 0.957007329957775, "grad_norm": 6.3133639226513, "learning_rate": 4.841952248049231e-08, "loss": 17.2867, "step": 52355 }, { "epoch": 0.9570256091542216, "grad_norm": 5.958949318022548, "learning_rate": 4.837843559671929e-08, "loss": 17.2554, "step": 52356 }, { "epoch": 0.9570438883506681, "grad_norm": 7.535097063766757, "learning_rate": 4.833736606795092e-08, "loss": 17.7589, "step": 52357 }, { "epoch": 0.9570621675471146, "grad_norm": 8.290049004753447, "learning_rate": 4.8296313894329317e-08, "loss": 18.7524, "step": 52358 }, { "epoch": 0.9570804467435612, "grad_norm": 6.464450498690687, "learning_rate": 4.825527907600047e-08, "loss": 17.3645, "step": 52359 }, { "epoch": 0.9570987259400077, "grad_norm": 6.528428809707585, "learning_rate": 4.821426161310649e-08, "loss": 17.3741, "step": 52360 }, { "epoch": 0.9571170051364541, "grad_norm": 5.965584136222184, "learning_rate": 4.817326150579171e-08, "loss": 17.1141, "step": 52361 }, { "epoch": 0.9571352843329007, "grad_norm": 5.974675135564484, "learning_rate": 4.813227875420046e-08, "loss": 17.2551, "step": 52362 }, { "epoch": 0.9571535635293472, "grad_norm": 5.970103265378015, "learning_rate": 4.8091313358475944e-08, "loss": 17.1458, "step": 52363 }, { "epoch": 0.9571718427257938, "grad_norm": 6.074568017547873, "learning_rate": 4.805036531876139e-08, "loss": 17.0213, "step": 52364 }, { "epoch": 0.9571901219222403, "grad_norm": 6.896041988339129, "learning_rate": 4.800943463520058e-08, "loss": 17.7807, "step": 52365 }, { "epoch": 0.9572084011186868, "grad_norm": 4.846092047956769, "learning_rate": 4.7968521307936724e-08, "loss": 16.7476, "step": 52366 }, { "epoch": 0.9572266803151334, "grad_norm": 5.7179196182447845, "learning_rate": 4.7927625337113594e-08, "loss": 17.0535, "step": 52367 }, { "epoch": 0.9572449595115798, "grad_norm": 5.909812481443654, "learning_rate": 4.7886746722874966e-08, "loss": 17.3795, "step": 52368 }, { "epoch": 0.9572632387080264, "grad_norm": 6.049135402938758, "learning_rate": 4.784588546536295e-08, "loss": 16.8585, "step": 52369 }, { "epoch": 0.9572815179044729, "grad_norm": 5.389458282862093, "learning_rate": 4.7805041564721875e-08, "loss": 16.9311, "step": 52370 }, { "epoch": 0.9572997971009194, "grad_norm": 7.1614430087045555, "learning_rate": 4.7764215021094405e-08, "loss": 17.4464, "step": 52371 }, { "epoch": 0.957318076297366, "grad_norm": 5.57977286007587, "learning_rate": 4.772340583462265e-08, "loss": 17.072, "step": 52372 }, { "epoch": 0.9573363554938125, "grad_norm": 6.130119373413818, "learning_rate": 4.768261400545149e-08, "loss": 17.1295, "step": 52373 }, { "epoch": 0.957354634690259, "grad_norm": 6.059820965970078, "learning_rate": 4.764183953372303e-08, "loss": 17.2791, "step": 52374 }, { "epoch": 0.9573729138867055, "grad_norm": 7.617955822055259, "learning_rate": 4.7601082419579944e-08, "loss": 17.4346, "step": 52375 }, { "epoch": 0.957391193083152, "grad_norm": 5.434687495168102, "learning_rate": 4.756034266316545e-08, "loss": 16.8285, "step": 52376 }, { "epoch": 0.9574094722795986, "grad_norm": 11.308649219230029, "learning_rate": 4.7519620264621645e-08, "loss": 17.8563, "step": 52377 }, { "epoch": 0.9574277514760451, "grad_norm": 5.669110026186929, "learning_rate": 4.747891522409287e-08, "loss": 16.9891, "step": 52378 }, { "epoch": 0.9574460306724917, "grad_norm": 6.11295705034345, "learning_rate": 4.7438227541720114e-08, "loss": 17.1608, "step": 52379 }, { "epoch": 0.9574643098689382, "grad_norm": 5.385743490890465, "learning_rate": 4.7397557217646604e-08, "loss": 17.0563, "step": 52380 }, { "epoch": 0.9574825890653846, "grad_norm": 6.533406438717779, "learning_rate": 4.7356904252015e-08, "loss": 17.34, "step": 52381 }, { "epoch": 0.9575008682618312, "grad_norm": 7.32975344886263, "learning_rate": 4.731626864496741e-08, "loss": 16.9987, "step": 52382 }, { "epoch": 0.9575191474582777, "grad_norm": 5.418956332335014, "learning_rate": 4.7275650396647054e-08, "loss": 16.9951, "step": 52383 }, { "epoch": 0.9575374266547243, "grad_norm": 7.305146046985162, "learning_rate": 4.723504950719493e-08, "loss": 17.4293, "step": 52384 }, { "epoch": 0.9575557058511708, "grad_norm": 6.293515562469539, "learning_rate": 4.719446597675481e-08, "loss": 17.2537, "step": 52385 }, { "epoch": 0.9575739850476173, "grad_norm": 4.712277572463112, "learning_rate": 4.715389980546825e-08, "loss": 16.6968, "step": 52386 }, { "epoch": 0.9575922642440639, "grad_norm": 6.147189250389861, "learning_rate": 4.711335099347736e-08, "loss": 17.0185, "step": 52387 }, { "epoch": 0.9576105434405103, "grad_norm": 6.184089946066427, "learning_rate": 4.7072819540924795e-08, "loss": 17.2988, "step": 52388 }, { "epoch": 0.9576288226369569, "grad_norm": 5.9847021642612965, "learning_rate": 4.703230544795156e-08, "loss": 17.2287, "step": 52389 }, { "epoch": 0.9576471018334034, "grad_norm": 8.989852315476377, "learning_rate": 4.6991808714700884e-08, "loss": 18.3612, "step": 52390 }, { "epoch": 0.9576653810298499, "grad_norm": 8.957503267210855, "learning_rate": 4.695132934131374e-08, "loss": 17.9057, "step": 52391 }, { "epoch": 0.9576836602262965, "grad_norm": 5.898574943885582, "learning_rate": 4.691086732793226e-08, "loss": 17.3722, "step": 52392 }, { "epoch": 0.957701939422743, "grad_norm": 5.241420295057719, "learning_rate": 4.6870422674698546e-08, "loss": 16.962, "step": 52393 }, { "epoch": 0.9577202186191895, "grad_norm": 7.986236553057266, "learning_rate": 4.682999538175415e-08, "loss": 17.501, "step": 52394 }, { "epoch": 0.957738497815636, "grad_norm": 4.801438943996015, "learning_rate": 4.6789585449240614e-08, "loss": 16.6106, "step": 52395 }, { "epoch": 0.9577567770120825, "grad_norm": 5.392759438373708, "learning_rate": 4.674919287730062e-08, "loss": 17.3193, "step": 52396 }, { "epoch": 0.9577750562085291, "grad_norm": 6.473418800546223, "learning_rate": 4.670881766607405e-08, "loss": 17.4955, "step": 52397 }, { "epoch": 0.9577933354049756, "grad_norm": 6.554437573765462, "learning_rate": 4.6668459815703556e-08, "loss": 17.316, "step": 52398 }, { "epoch": 0.9578116146014222, "grad_norm": 5.009048114221057, "learning_rate": 4.662811932633016e-08, "loss": 16.9326, "step": 52399 }, { "epoch": 0.9578298937978686, "grad_norm": 7.044972781606569, "learning_rate": 4.658779619809539e-08, "loss": 17.7054, "step": 52400 }, { "epoch": 0.9578481729943151, "grad_norm": 6.7140187375162546, "learning_rate": 4.6547490431140264e-08, "loss": 17.2255, "step": 52401 }, { "epoch": 0.9578664521907617, "grad_norm": 6.4567697562932285, "learning_rate": 4.650720202560688e-08, "loss": 17.2936, "step": 52402 }, { "epoch": 0.9578847313872082, "grad_norm": 5.2630969230350475, "learning_rate": 4.646693098163568e-08, "loss": 17.0483, "step": 52403 }, { "epoch": 0.9579030105836548, "grad_norm": 6.1125025447317, "learning_rate": 4.642667729936823e-08, "loss": 17.0573, "step": 52404 }, { "epoch": 0.9579212897801013, "grad_norm": 5.704372859011193, "learning_rate": 4.638644097894496e-08, "loss": 17.0543, "step": 52405 }, { "epoch": 0.9579395689765478, "grad_norm": 5.598706746555568, "learning_rate": 4.6346222020507424e-08, "loss": 17.1138, "step": 52406 }, { "epoch": 0.9579578481729943, "grad_norm": 8.041869716111016, "learning_rate": 4.630602042419718e-08, "loss": 17.4144, "step": 52407 }, { "epoch": 0.9579761273694408, "grad_norm": 5.817237703025092, "learning_rate": 4.626583619015357e-08, "loss": 17.1602, "step": 52408 }, { "epoch": 0.9579944065658874, "grad_norm": 6.536224371114475, "learning_rate": 4.622566931851924e-08, "loss": 17.3121, "step": 52409 }, { "epoch": 0.9580126857623339, "grad_norm": 6.971477738714405, "learning_rate": 4.6185519809433535e-08, "loss": 17.9419, "step": 52410 }, { "epoch": 0.9580309649587804, "grad_norm": 5.790182133470015, "learning_rate": 4.614538766303744e-08, "loss": 17.2187, "step": 52411 }, { "epoch": 0.958049244155227, "grad_norm": 5.551029550610074, "learning_rate": 4.610527287947253e-08, "loss": 17.1739, "step": 52412 }, { "epoch": 0.9580675233516734, "grad_norm": 4.786971229353017, "learning_rate": 4.606517545887867e-08, "loss": 16.7853, "step": 52413 }, { "epoch": 0.95808580254812, "grad_norm": 8.68460227316054, "learning_rate": 4.602509540139577e-08, "loss": 17.8109, "step": 52414 }, { "epoch": 0.9581040817445665, "grad_norm": 6.4403896634517315, "learning_rate": 4.5985032707165925e-08, "loss": 17.4997, "step": 52415 }, { "epoch": 0.958122360941013, "grad_norm": 6.369771179750261, "learning_rate": 4.5944987376328465e-08, "loss": 17.103, "step": 52416 }, { "epoch": 0.9581406401374596, "grad_norm": 7.0465692635937, "learning_rate": 4.5904959409023287e-08, "loss": 17.3751, "step": 52417 }, { "epoch": 0.9581589193339061, "grad_norm": 4.802696079297325, "learning_rate": 4.586494880539194e-08, "loss": 16.7739, "step": 52418 }, { "epoch": 0.9581771985303527, "grad_norm": 7.332717285160751, "learning_rate": 4.5824955565573756e-08, "loss": 17.1936, "step": 52419 }, { "epoch": 0.9581954777267991, "grad_norm": 6.202008935737675, "learning_rate": 4.578497968970918e-08, "loss": 17.483, "step": 52420 }, { "epoch": 0.9582137569232456, "grad_norm": 6.227336408482727, "learning_rate": 4.574502117793867e-08, "loss": 17.3136, "step": 52421 }, { "epoch": 0.9582320361196922, "grad_norm": 6.471065793141092, "learning_rate": 4.5705080030401525e-08, "loss": 17.3664, "step": 52422 }, { "epoch": 0.9582503153161387, "grad_norm": 6.115299946774408, "learning_rate": 4.5665156247238775e-08, "loss": 17.185, "step": 52423 }, { "epoch": 0.9582685945125853, "grad_norm": 6.051647848951898, "learning_rate": 4.5625249828589734e-08, "loss": 17.2617, "step": 52424 }, { "epoch": 0.9582868737090318, "grad_norm": 6.580272904255461, "learning_rate": 4.5585360774593746e-08, "loss": 17.2486, "step": 52425 }, { "epoch": 0.9583051529054782, "grad_norm": 5.3277151852540445, "learning_rate": 4.5545489085391247e-08, "loss": 16.9145, "step": 52426 }, { "epoch": 0.9583234321019248, "grad_norm": 4.537836906283458, "learning_rate": 4.550563476112213e-08, "loss": 16.91, "step": 52427 }, { "epoch": 0.9583417112983713, "grad_norm": 6.133800328487303, "learning_rate": 4.5465797801925726e-08, "loss": 17.5302, "step": 52428 }, { "epoch": 0.9583599904948178, "grad_norm": 7.288695783435007, "learning_rate": 4.5425978207941366e-08, "loss": 17.6517, "step": 52429 }, { "epoch": 0.9583782696912644, "grad_norm": 5.991169556096653, "learning_rate": 4.5386175979309497e-08, "loss": 16.9606, "step": 52430 }, { "epoch": 0.9583965488877109, "grad_norm": 6.5459536138672965, "learning_rate": 4.53463911161689e-08, "loss": 17.0766, "step": 52431 }, { "epoch": 0.9584148280841575, "grad_norm": 6.350880608153635, "learning_rate": 4.530662361865945e-08, "loss": 17.4214, "step": 52432 }, { "epoch": 0.9584331072806039, "grad_norm": 5.571933185854949, "learning_rate": 4.526687348691994e-08, "loss": 17.0465, "step": 52433 }, { "epoch": 0.9584513864770504, "grad_norm": 4.605005731097383, "learning_rate": 4.522714072109025e-08, "loss": 16.832, "step": 52434 }, { "epoch": 0.958469665673497, "grad_norm": 5.393779765617893, "learning_rate": 4.5187425321309706e-08, "loss": 17.1516, "step": 52435 }, { "epoch": 0.9584879448699435, "grad_norm": 7.305782625692168, "learning_rate": 4.5147727287717105e-08, "loss": 17.3625, "step": 52436 }, { "epoch": 0.9585062240663901, "grad_norm": 5.468197987950713, "learning_rate": 4.510804662045232e-08, "loss": 17.0696, "step": 52437 }, { "epoch": 0.9585245032628366, "grad_norm": 6.124833936728334, "learning_rate": 4.506838331965302e-08, "loss": 17.1424, "step": 52438 }, { "epoch": 0.958542782459283, "grad_norm": 4.466860258013401, "learning_rate": 4.5028737385459096e-08, "loss": 16.7771, "step": 52439 }, { "epoch": 0.9585610616557296, "grad_norm": 6.521639889272513, "learning_rate": 4.498910881800989e-08, "loss": 17.521, "step": 52440 }, { "epoch": 0.9585793408521761, "grad_norm": 6.182761066091553, "learning_rate": 4.494949761744416e-08, "loss": 16.9788, "step": 52441 }, { "epoch": 0.9585976200486227, "grad_norm": 6.491855961011772, "learning_rate": 4.490990378389959e-08, "loss": 17.2612, "step": 52442 }, { "epoch": 0.9586158992450692, "grad_norm": 7.307091600715217, "learning_rate": 4.487032731751661e-08, "loss": 17.7124, "step": 52443 }, { "epoch": 0.9586341784415157, "grad_norm": 5.349039784734831, "learning_rate": 4.483076821843291e-08, "loss": 16.9214, "step": 52444 }, { "epoch": 0.9586524576379623, "grad_norm": 7.210820101476587, "learning_rate": 4.479122648678724e-08, "loss": 17.7524, "step": 52445 }, { "epoch": 0.9586707368344087, "grad_norm": 6.4484719442785225, "learning_rate": 4.47517021227184e-08, "loss": 17.5425, "step": 52446 }, { "epoch": 0.9586890160308553, "grad_norm": 6.5605876251984565, "learning_rate": 4.47121951263646e-08, "loss": 17.279, "step": 52447 }, { "epoch": 0.9587072952273018, "grad_norm": 6.896448204500274, "learning_rate": 4.4672705497865175e-08, "loss": 17.7367, "step": 52448 }, { "epoch": 0.9587255744237483, "grad_norm": 5.354780830280276, "learning_rate": 4.4633233237357245e-08, "loss": 17.0271, "step": 52449 }, { "epoch": 0.9587438536201949, "grad_norm": 5.94081572942048, "learning_rate": 4.459377834498069e-08, "loss": 17.3451, "step": 52450 }, { "epoch": 0.9587621328166414, "grad_norm": 5.932560895706284, "learning_rate": 4.455434082087207e-08, "loss": 17.1335, "step": 52451 }, { "epoch": 0.958780412013088, "grad_norm": 5.963604031396478, "learning_rate": 4.451492066517071e-08, "loss": 17.3286, "step": 52452 }, { "epoch": 0.9587986912095344, "grad_norm": 6.654246636230254, "learning_rate": 4.44755178780143e-08, "loss": 17.1475, "step": 52453 }, { "epoch": 0.9588169704059809, "grad_norm": 6.424819855109239, "learning_rate": 4.443613245954159e-08, "loss": 17.3153, "step": 52454 }, { "epoch": 0.9588352496024275, "grad_norm": 6.347233810981503, "learning_rate": 4.439676440989027e-08, "loss": 17.5892, "step": 52455 }, { "epoch": 0.958853528798874, "grad_norm": 7.38856349204506, "learning_rate": 4.435741372919799e-08, "loss": 17.9148, "step": 52456 }, { "epoch": 0.9588718079953206, "grad_norm": 5.567455985419466, "learning_rate": 4.431808041760299e-08, "loss": 17.131, "step": 52457 }, { "epoch": 0.958890087191767, "grad_norm": 5.756554083693312, "learning_rate": 4.4278764475243485e-08, "loss": 17.311, "step": 52458 }, { "epoch": 0.9589083663882135, "grad_norm": 5.099978028283119, "learning_rate": 4.423946590225603e-08, "loss": 16.9326, "step": 52459 }, { "epoch": 0.9589266455846601, "grad_norm": 5.833900629905138, "learning_rate": 4.420018469877996e-08, "loss": 17.2295, "step": 52460 }, { "epoch": 0.9589449247811066, "grad_norm": 9.165569091425928, "learning_rate": 4.416092086495238e-08, "loss": 18.1654, "step": 52461 }, { "epoch": 0.9589632039775532, "grad_norm": 6.653975000038684, "learning_rate": 4.4121674400909864e-08, "loss": 17.4967, "step": 52462 }, { "epoch": 0.9589814831739997, "grad_norm": 5.558965272382086, "learning_rate": 4.4082445306791175e-08, "loss": 17.278, "step": 52463 }, { "epoch": 0.9589997623704462, "grad_norm": 7.1658389288860125, "learning_rate": 4.404323358273343e-08, "loss": 17.3082, "step": 52464 }, { "epoch": 0.9590180415668927, "grad_norm": 5.9653375671054345, "learning_rate": 4.400403922887431e-08, "loss": 17.1921, "step": 52465 }, { "epoch": 0.9590363207633392, "grad_norm": 6.298082784249113, "learning_rate": 4.39648622453509e-08, "loss": 17.5831, "step": 52466 }, { "epoch": 0.9590545999597858, "grad_norm": 6.832946657713845, "learning_rate": 4.392570263230034e-08, "loss": 17.4174, "step": 52467 }, { "epoch": 0.9590728791562323, "grad_norm": 6.562548327297014, "learning_rate": 4.3886560389860274e-08, "loss": 17.3876, "step": 52468 }, { "epoch": 0.9590911583526788, "grad_norm": 6.7774495905358085, "learning_rate": 4.3847435518168394e-08, "loss": 17.3185, "step": 52469 }, { "epoch": 0.9591094375491254, "grad_norm": 5.9524725152685365, "learning_rate": 4.380832801736012e-08, "loss": 17.2112, "step": 52470 }, { "epoch": 0.9591277167455718, "grad_norm": 6.65431065734795, "learning_rate": 4.376923788757426e-08, "loss": 17.2105, "step": 52471 }, { "epoch": 0.9591459959420184, "grad_norm": 5.884088274364032, "learning_rate": 4.373016512894679e-08, "loss": 17.2221, "step": 52472 }, { "epoch": 0.9591642751384649, "grad_norm": 7.457695509044307, "learning_rate": 4.369110974161539e-08, "loss": 18.0479, "step": 52473 }, { "epoch": 0.9591825543349114, "grad_norm": 6.279587577619518, "learning_rate": 4.365207172571606e-08, "loss": 17.0677, "step": 52474 }, { "epoch": 0.959200833531358, "grad_norm": 5.343624312031863, "learning_rate": 4.3613051081386474e-08, "loss": 16.9062, "step": 52475 }, { "epoch": 0.9592191127278045, "grad_norm": 7.510421714850905, "learning_rate": 4.357404780876262e-08, "loss": 17.662, "step": 52476 }, { "epoch": 0.9592373919242511, "grad_norm": 6.95909063887827, "learning_rate": 4.353506190798218e-08, "loss": 17.2747, "step": 52477 }, { "epoch": 0.9592556711206975, "grad_norm": 5.443128658513893, "learning_rate": 4.349609337918115e-08, "loss": 16.8762, "step": 52478 }, { "epoch": 0.959273950317144, "grad_norm": 5.151404981849263, "learning_rate": 4.345714222249608e-08, "loss": 17.0338, "step": 52479 }, { "epoch": 0.9592922295135906, "grad_norm": 5.514419984982281, "learning_rate": 4.341820843806355e-08, "loss": 16.8266, "step": 52480 }, { "epoch": 0.9593105087100371, "grad_norm": 5.325115327506901, "learning_rate": 4.337929202602009e-08, "loss": 16.8413, "step": 52481 }, { "epoch": 0.9593287879064837, "grad_norm": 6.194041344305835, "learning_rate": 4.334039298650228e-08, "loss": 17.4874, "step": 52482 }, { "epoch": 0.9593470671029302, "grad_norm": 6.29986933563373, "learning_rate": 4.3301511319646106e-08, "loss": 17.1306, "step": 52483 }, { "epoch": 0.9593653462993766, "grad_norm": 9.886045741249662, "learning_rate": 4.326264702558758e-08, "loss": 17.3406, "step": 52484 }, { "epoch": 0.9593836254958232, "grad_norm": 7.245337030189623, "learning_rate": 4.322380010446436e-08, "loss": 17.4456, "step": 52485 }, { "epoch": 0.9594019046922697, "grad_norm": 5.479083511459565, "learning_rate": 4.31849705564108e-08, "loss": 17.0145, "step": 52486 }, { "epoch": 0.9594201838887163, "grad_norm": 4.982168027763633, "learning_rate": 4.3146158381563996e-08, "loss": 16.9119, "step": 52487 }, { "epoch": 0.9594384630851628, "grad_norm": 6.387152376171948, "learning_rate": 4.310736358005996e-08, "loss": 17.5281, "step": 52488 }, { "epoch": 0.9594567422816093, "grad_norm": 5.5240777811452935, "learning_rate": 4.3068586152034685e-08, "loss": 17.2417, "step": 52489 }, { "epoch": 0.9594750214780559, "grad_norm": 6.472853213810371, "learning_rate": 4.3029826097623075e-08, "loss": 17.5991, "step": 52490 }, { "epoch": 0.9594933006745023, "grad_norm": 5.78816672838066, "learning_rate": 4.299108341696223e-08, "loss": 17.2098, "step": 52491 }, { "epoch": 0.9595115798709489, "grad_norm": 6.207491947513023, "learning_rate": 4.2952358110187606e-08, "loss": 17.4238, "step": 52492 }, { "epoch": 0.9595298590673954, "grad_norm": 5.908956975717993, "learning_rate": 4.291365017743465e-08, "loss": 17.1623, "step": 52493 }, { "epoch": 0.9595481382638419, "grad_norm": 6.285540701916239, "learning_rate": 4.287495961883936e-08, "loss": 17.1391, "step": 52494 }, { "epoch": 0.9595664174602885, "grad_norm": 6.356300095196225, "learning_rate": 4.283628643453719e-08, "loss": 17.4229, "step": 52495 }, { "epoch": 0.959584696656735, "grad_norm": 6.488096191086598, "learning_rate": 4.279763062466302e-08, "loss": 17.3767, "step": 52496 }, { "epoch": 0.9596029758531814, "grad_norm": 6.229614320587347, "learning_rate": 4.2758992189353974e-08, "loss": 16.8, "step": 52497 }, { "epoch": 0.959621255049628, "grad_norm": 5.570289906540631, "learning_rate": 4.272037112874383e-08, "loss": 16.8425, "step": 52498 }, { "epoch": 0.9596395342460745, "grad_norm": 7.062615461907369, "learning_rate": 4.268176744296859e-08, "loss": 17.5475, "step": 52499 }, { "epoch": 0.9596578134425211, "grad_norm": 6.000775740706316, "learning_rate": 4.264318113216426e-08, "loss": 17.4085, "step": 52500 }, { "epoch": 0.9596760926389676, "grad_norm": 6.625709960963949, "learning_rate": 4.260461219646461e-08, "loss": 17.2898, "step": 52501 }, { "epoch": 0.9596943718354141, "grad_norm": 6.795426982550057, "learning_rate": 4.256606063600566e-08, "loss": 17.2945, "step": 52502 }, { "epoch": 0.9597126510318607, "grad_norm": 6.773460792540718, "learning_rate": 4.252752645092284e-08, "loss": 17.3416, "step": 52503 }, { "epoch": 0.9597309302283071, "grad_norm": 6.558179037073769, "learning_rate": 4.248900964135105e-08, "loss": 17.257, "step": 52504 }, { "epoch": 0.9597492094247537, "grad_norm": 7.722236615882502, "learning_rate": 4.2450510207424634e-08, "loss": 18.2385, "step": 52505 }, { "epoch": 0.9597674886212002, "grad_norm": 7.404016474342637, "learning_rate": 4.241202814927958e-08, "loss": 17.6219, "step": 52506 }, { "epoch": 0.9597857678176467, "grad_norm": 6.449754561379594, "learning_rate": 4.2373563467049126e-08, "loss": 17.3546, "step": 52507 }, { "epoch": 0.9598040470140933, "grad_norm": 7.089348331725335, "learning_rate": 4.233511616087038e-08, "loss": 17.8281, "step": 52508 }, { "epoch": 0.9598223262105398, "grad_norm": 6.529124343383554, "learning_rate": 4.229668623087602e-08, "loss": 17.1764, "step": 52509 }, { "epoch": 0.9598406054069863, "grad_norm": 6.531917125843689, "learning_rate": 4.2258273677201476e-08, "loss": 17.3714, "step": 52510 }, { "epoch": 0.9598588846034328, "grad_norm": 6.111721427724087, "learning_rate": 4.2219878499981656e-08, "loss": 17.47, "step": 52511 }, { "epoch": 0.9598771637998793, "grad_norm": 6.294744402467581, "learning_rate": 4.218150069935145e-08, "loss": 17.2277, "step": 52512 }, { "epoch": 0.9598954429963259, "grad_norm": 6.554376986791541, "learning_rate": 4.2143140275444084e-08, "loss": 17.4995, "step": 52513 }, { "epoch": 0.9599137221927724, "grad_norm": 6.878409201247342, "learning_rate": 4.2104797228395556e-08, "loss": 17.6503, "step": 52514 }, { "epoch": 0.959932001389219, "grad_norm": 6.717574007322722, "learning_rate": 4.206647155833854e-08, "loss": 17.4667, "step": 52515 }, { "epoch": 0.9599502805856654, "grad_norm": 7.070494734912636, "learning_rate": 4.202816326540904e-08, "loss": 17.2477, "step": 52516 }, { "epoch": 0.9599685597821119, "grad_norm": 5.002967164709511, "learning_rate": 4.1989872349740837e-08, "loss": 16.8404, "step": 52517 }, { "epoch": 0.9599868389785585, "grad_norm": 4.911488464750784, "learning_rate": 4.1951598811467154e-08, "loss": 16.7955, "step": 52518 }, { "epoch": 0.960005118175005, "grad_norm": 6.210726712519315, "learning_rate": 4.191334265072289e-08, "loss": 17.3678, "step": 52519 }, { "epoch": 0.9600233973714516, "grad_norm": 6.754572979790507, "learning_rate": 4.187510386764293e-08, "loss": 17.0144, "step": 52520 }, { "epoch": 0.9600416765678981, "grad_norm": 5.325253941775085, "learning_rate": 4.1836882462359394e-08, "loss": 16.9774, "step": 52521 }, { "epoch": 0.9600599557643446, "grad_norm": 5.782138187174247, "learning_rate": 4.179867843500829e-08, "loss": 17.2613, "step": 52522 }, { "epoch": 0.9600782349607911, "grad_norm": 5.808350138824043, "learning_rate": 4.176049178572228e-08, "loss": 17.1421, "step": 52523 }, { "epoch": 0.9600965141572376, "grad_norm": 6.3047698917912385, "learning_rate": 4.172232251463515e-08, "loss": 17.3285, "step": 52524 }, { "epoch": 0.9601147933536842, "grad_norm": 6.729952313576036, "learning_rate": 4.168417062188124e-08, "loss": 17.508, "step": 52525 }, { "epoch": 0.9601330725501307, "grad_norm": 4.614264948367673, "learning_rate": 4.1646036107594326e-08, "loss": 16.7973, "step": 52526 }, { "epoch": 0.9601513517465772, "grad_norm": 6.32047843193278, "learning_rate": 4.160791897190764e-08, "loss": 17.1889, "step": 52527 }, { "epoch": 0.9601696309430238, "grad_norm": 6.788085070280297, "learning_rate": 4.1569819214954396e-08, "loss": 17.7061, "step": 52528 }, { "epoch": 0.9601879101394702, "grad_norm": 7.664117601749749, "learning_rate": 4.1531736836868955e-08, "loss": 17.7377, "step": 52529 }, { "epoch": 0.9602061893359168, "grad_norm": 6.50397216163267, "learning_rate": 4.149367183778508e-08, "loss": 17.5848, "step": 52530 }, { "epoch": 0.9602244685323633, "grad_norm": 6.386233617646388, "learning_rate": 4.145562421783489e-08, "loss": 16.9728, "step": 52531 }, { "epoch": 0.9602427477288098, "grad_norm": 5.174995501087608, "learning_rate": 4.1417593977152726e-08, "loss": 16.9703, "step": 52532 }, { "epoch": 0.9602610269252564, "grad_norm": 6.902136627907822, "learning_rate": 4.137958111587182e-08, "loss": 17.357, "step": 52533 }, { "epoch": 0.9602793061217029, "grad_norm": 5.959587343165292, "learning_rate": 4.134158563412538e-08, "loss": 17.082, "step": 52534 }, { "epoch": 0.9602975853181495, "grad_norm": 6.021274169571941, "learning_rate": 4.130360753204554e-08, "loss": 17.0263, "step": 52535 }, { "epoch": 0.9603158645145959, "grad_norm": 5.787372541487252, "learning_rate": 4.1265646809766634e-08, "loss": 16.9501, "step": 52536 }, { "epoch": 0.9603341437110424, "grad_norm": 5.749017627390225, "learning_rate": 4.122770346742133e-08, "loss": 17.0812, "step": 52537 }, { "epoch": 0.960352422907489, "grad_norm": 5.445596691730165, "learning_rate": 4.118977750514286e-08, "loss": 17.0438, "step": 52538 }, { "epoch": 0.9603707021039355, "grad_norm": 5.422312217563272, "learning_rate": 4.115186892306389e-08, "loss": 16.9025, "step": 52539 }, { "epoch": 0.9603889813003821, "grad_norm": 5.9478727137762, "learning_rate": 4.111397772131709e-08, "loss": 17.1158, "step": 52540 }, { "epoch": 0.9604072604968286, "grad_norm": 5.869606096854614, "learning_rate": 4.107610390003569e-08, "loss": 17.0754, "step": 52541 }, { "epoch": 0.960425539693275, "grad_norm": 5.963339700095794, "learning_rate": 4.103824745935236e-08, "loss": 17.241, "step": 52542 }, { "epoch": 0.9604438188897216, "grad_norm": 5.761836688902984, "learning_rate": 4.100040839939923e-08, "loss": 17.1849, "step": 52543 }, { "epoch": 0.9604620980861681, "grad_norm": 6.619883497771222, "learning_rate": 4.096258672030951e-08, "loss": 17.2794, "step": 52544 }, { "epoch": 0.9604803772826147, "grad_norm": 7.203446940190156, "learning_rate": 4.092478242221587e-08, "loss": 17.5159, "step": 52545 }, { "epoch": 0.9604986564790612, "grad_norm": 7.421207420112731, "learning_rate": 4.088699550524988e-08, "loss": 17.9096, "step": 52546 }, { "epoch": 0.9605169356755077, "grad_norm": 7.135018646117828, "learning_rate": 4.084922596954477e-08, "loss": 17.502, "step": 52547 }, { "epoch": 0.9605352148719543, "grad_norm": 5.532945136368834, "learning_rate": 4.08114738152332e-08, "loss": 16.9662, "step": 52548 }, { "epoch": 0.9605534940684007, "grad_norm": 6.137064278482623, "learning_rate": 4.07737390424473e-08, "loss": 17.4787, "step": 52549 }, { "epoch": 0.9605717732648473, "grad_norm": 6.943093694687607, "learning_rate": 4.0736021651318625e-08, "loss": 17.5384, "step": 52550 }, { "epoch": 0.9605900524612938, "grad_norm": 6.120325205659133, "learning_rate": 4.069832164197984e-08, "loss": 17.3904, "step": 52551 }, { "epoch": 0.9606083316577403, "grad_norm": 6.869312702864925, "learning_rate": 4.0660639014563074e-08, "loss": 17.7323, "step": 52552 }, { "epoch": 0.9606266108541869, "grad_norm": 5.271475476140176, "learning_rate": 4.062297376920099e-08, "loss": 16.8798, "step": 52553 }, { "epoch": 0.9606448900506334, "grad_norm": 5.068642171746134, "learning_rate": 4.05853259060246e-08, "loss": 17.0084, "step": 52554 }, { "epoch": 0.96066316924708, "grad_norm": 6.2856135121334535, "learning_rate": 4.054769542516601e-08, "loss": 17.4925, "step": 52555 }, { "epoch": 0.9606814484435264, "grad_norm": 5.898904100244653, "learning_rate": 4.0510082326757905e-08, "loss": 17.0747, "step": 52556 }, { "epoch": 0.9606997276399729, "grad_norm": 6.099222400331353, "learning_rate": 4.0472486610931284e-08, "loss": 17.1817, "step": 52557 }, { "epoch": 0.9607180068364195, "grad_norm": 5.93337320827384, "learning_rate": 4.0434908277818265e-08, "loss": 16.8927, "step": 52558 }, { "epoch": 0.960736286032866, "grad_norm": 5.907085132242186, "learning_rate": 4.039734732755041e-08, "loss": 16.9872, "step": 52559 }, { "epoch": 0.9607545652293126, "grad_norm": 5.931542810031886, "learning_rate": 4.035980376025983e-08, "loss": 17.2742, "step": 52560 }, { "epoch": 0.960772844425759, "grad_norm": 6.485113251588733, "learning_rate": 4.032227757607754e-08, "loss": 17.2088, "step": 52561 }, { "epoch": 0.9607911236222055, "grad_norm": 7.187249912608951, "learning_rate": 4.0284768775135095e-08, "loss": 17.7138, "step": 52562 }, { "epoch": 0.9608094028186521, "grad_norm": 5.569935802934185, "learning_rate": 4.024727735756462e-08, "loss": 17.0301, "step": 52563 }, { "epoch": 0.9608276820150986, "grad_norm": 5.367844241469927, "learning_rate": 4.0209803323496554e-08, "loss": 17.3453, "step": 52564 }, { "epoch": 0.9608459612115451, "grad_norm": 5.930145897582602, "learning_rate": 4.017234667306302e-08, "loss": 16.9588, "step": 52565 }, { "epoch": 0.9608642404079917, "grad_norm": 7.636338807522279, "learning_rate": 4.013490740639503e-08, "loss": 17.5948, "step": 52566 }, { "epoch": 0.9608825196044382, "grad_norm": 7.31677151310772, "learning_rate": 4.009748552362358e-08, "loss": 17.6441, "step": 52567 }, { "epoch": 0.9609007988008847, "grad_norm": 5.262022496067778, "learning_rate": 4.0060081024880235e-08, "loss": 17.1534, "step": 52568 }, { "epoch": 0.9609190779973312, "grad_norm": 5.672248499179636, "learning_rate": 4.0022693910296004e-08, "loss": 17.1106, "step": 52569 }, { "epoch": 0.9609373571937777, "grad_norm": 7.341809080569573, "learning_rate": 3.9985324180001894e-08, "loss": 17.2009, "step": 52570 }, { "epoch": 0.9609556363902243, "grad_norm": 4.997943228208807, "learning_rate": 3.994797183412835e-08, "loss": 17.0453, "step": 52571 }, { "epoch": 0.9609739155866708, "grad_norm": 6.823545570925396, "learning_rate": 3.991063687280694e-08, "loss": 17.6735, "step": 52572 }, { "epoch": 0.9609921947831174, "grad_norm": 5.141988146757113, "learning_rate": 3.987331929616811e-08, "loss": 16.8279, "step": 52573 }, { "epoch": 0.9610104739795639, "grad_norm": 5.163319728562794, "learning_rate": 3.9836019104343424e-08, "loss": 16.9384, "step": 52574 }, { "epoch": 0.9610287531760103, "grad_norm": 7.572499403209288, "learning_rate": 3.9798736297462784e-08, "loss": 17.7179, "step": 52575 }, { "epoch": 0.9610470323724569, "grad_norm": 6.226977141093268, "learning_rate": 3.976147087565718e-08, "loss": 17.1114, "step": 52576 }, { "epoch": 0.9610653115689034, "grad_norm": 7.298459787802802, "learning_rate": 3.9724222839057083e-08, "loss": 17.5448, "step": 52577 }, { "epoch": 0.96108359076535, "grad_norm": 6.3692383902829315, "learning_rate": 3.9686992187793485e-08, "loss": 17.5881, "step": 52578 }, { "epoch": 0.9611018699617965, "grad_norm": 5.937696815892917, "learning_rate": 3.9649778921996286e-08, "loss": 17.2773, "step": 52579 }, { "epoch": 0.961120149158243, "grad_norm": 6.181863856259918, "learning_rate": 3.9612583041795936e-08, "loss": 17.3272, "step": 52580 }, { "epoch": 0.9611384283546895, "grad_norm": 6.332470780678882, "learning_rate": 3.9575404547323446e-08, "loss": 17.1848, "step": 52581 }, { "epoch": 0.961156707551136, "grad_norm": 8.02630154907342, "learning_rate": 3.9538243438708156e-08, "loss": 17.2354, "step": 52582 }, { "epoch": 0.9611749867475826, "grad_norm": 6.048029094728004, "learning_rate": 3.9501099716081626e-08, "loss": 17.0782, "step": 52583 }, { "epoch": 0.9611932659440291, "grad_norm": 6.996616906359953, "learning_rate": 3.946397337957264e-08, "loss": 17.8278, "step": 52584 }, { "epoch": 0.9612115451404756, "grad_norm": 5.713300045295844, "learning_rate": 3.942686442931276e-08, "loss": 17.1468, "step": 52585 }, { "epoch": 0.9612298243369222, "grad_norm": 7.178216450311533, "learning_rate": 3.938977286543022e-08, "loss": 17.5366, "step": 52586 }, { "epoch": 0.9612481035333686, "grad_norm": 5.702176823535907, "learning_rate": 3.9352698688057137e-08, "loss": 16.9912, "step": 52587 }, { "epoch": 0.9612663827298152, "grad_norm": 7.386743743637811, "learning_rate": 3.931564189732173e-08, "loss": 17.9513, "step": 52588 }, { "epoch": 0.9612846619262617, "grad_norm": 7.101693489833928, "learning_rate": 3.927860249335447e-08, "loss": 17.8645, "step": 52589 }, { "epoch": 0.9613029411227082, "grad_norm": 5.88490503323592, "learning_rate": 3.924158047628579e-08, "loss": 17.2626, "step": 52590 }, { "epoch": 0.9613212203191548, "grad_norm": 6.169895477909287, "learning_rate": 3.9204575846244485e-08, "loss": 17.2626, "step": 52591 }, { "epoch": 0.9613394995156013, "grad_norm": 6.803072488794249, "learning_rate": 3.916758860336045e-08, "loss": 17.3915, "step": 52592 }, { "epoch": 0.9613577787120479, "grad_norm": 5.824789183822006, "learning_rate": 3.913061874776358e-08, "loss": 17.3125, "step": 52593 }, { "epoch": 0.9613760579084943, "grad_norm": 6.454204539525161, "learning_rate": 3.9093666279583777e-08, "loss": 17.2272, "step": 52594 }, { "epoch": 0.9613943371049408, "grad_norm": 6.689843813042622, "learning_rate": 3.905673119895037e-08, "loss": 17.0632, "step": 52595 }, { "epoch": 0.9614126163013874, "grad_norm": 5.671757257602925, "learning_rate": 3.9019813505992156e-08, "loss": 17.4139, "step": 52596 }, { "epoch": 0.9614308954978339, "grad_norm": 7.844636498586987, "learning_rate": 3.8982913200839024e-08, "loss": 18.1528, "step": 52597 }, { "epoch": 0.9614491746942805, "grad_norm": 6.526812601967341, "learning_rate": 3.894603028362032e-08, "loss": 17.5983, "step": 52598 }, { "epoch": 0.961467453890727, "grad_norm": 6.834476061022383, "learning_rate": 3.8909164754465376e-08, "loss": 17.8327, "step": 52599 }, { "epoch": 0.9614857330871734, "grad_norm": 6.564490496551966, "learning_rate": 3.887231661350355e-08, "loss": 17.8603, "step": 52600 }, { "epoch": 0.96150401228362, "grad_norm": 5.4042856361007905, "learning_rate": 3.883548586086305e-08, "loss": 17.1806, "step": 52601 }, { "epoch": 0.9615222914800665, "grad_norm": 4.913331669504619, "learning_rate": 3.879867249667435e-08, "loss": 16.6881, "step": 52602 }, { "epoch": 0.9615405706765131, "grad_norm": 8.624323462495648, "learning_rate": 3.8761876521065665e-08, "loss": 17.7299, "step": 52603 }, { "epoch": 0.9615588498729596, "grad_norm": 5.904673406868691, "learning_rate": 3.872509793416579e-08, "loss": 17.5387, "step": 52604 }, { "epoch": 0.9615771290694061, "grad_norm": 5.331232778671097, "learning_rate": 3.868833673610406e-08, "loss": 17.0171, "step": 52605 }, { "epoch": 0.9615954082658527, "grad_norm": 4.295683656740122, "learning_rate": 3.8651592927009265e-08, "loss": 16.8371, "step": 52606 }, { "epoch": 0.9616136874622991, "grad_norm": 6.553760644550597, "learning_rate": 3.861486650701019e-08, "loss": 17.2568, "step": 52607 }, { "epoch": 0.9616319666587457, "grad_norm": 6.182827860453062, "learning_rate": 3.8578157476235053e-08, "loss": 17.154, "step": 52608 }, { "epoch": 0.9616502458551922, "grad_norm": 6.995204351950874, "learning_rate": 3.8541465834813776e-08, "loss": 17.5172, "step": 52609 }, { "epoch": 0.9616685250516387, "grad_norm": 5.42175531658681, "learning_rate": 3.85047915828729e-08, "loss": 16.9641, "step": 52610 }, { "epoch": 0.9616868042480853, "grad_norm": 5.472668797315844, "learning_rate": 3.846813472054289e-08, "loss": 17.0877, "step": 52611 }, { "epoch": 0.9617050834445318, "grad_norm": 8.098667193265193, "learning_rate": 3.843149524795198e-08, "loss": 17.7629, "step": 52612 }, { "epoch": 0.9617233626409784, "grad_norm": 5.80641767806261, "learning_rate": 3.839487316522783e-08, "loss": 17.0296, "step": 52613 }, { "epoch": 0.9617416418374248, "grad_norm": 9.522899678378348, "learning_rate": 3.8358268472498686e-08, "loss": 18.4645, "step": 52614 }, { "epoch": 0.9617599210338713, "grad_norm": 7.451847498296272, "learning_rate": 3.832168116989332e-08, "loss": 17.2172, "step": 52615 }, { "epoch": 0.9617782002303179, "grad_norm": 6.0520280247018, "learning_rate": 3.828511125754053e-08, "loss": 17.1409, "step": 52616 }, { "epoch": 0.9617964794267644, "grad_norm": 5.581803878828745, "learning_rate": 3.8248558735567433e-08, "loss": 17.2511, "step": 52617 }, { "epoch": 0.961814758623211, "grad_norm": 5.722826751249178, "learning_rate": 3.821202360410225e-08, "loss": 17.3843, "step": 52618 }, { "epoch": 0.9618330378196575, "grad_norm": 7.584253762350622, "learning_rate": 3.8175505863273786e-08, "loss": 17.9258, "step": 52619 }, { "epoch": 0.9618513170161039, "grad_norm": 6.580074499221532, "learning_rate": 3.813900551320915e-08, "loss": 17.615, "step": 52620 }, { "epoch": 0.9618695962125505, "grad_norm": 6.121566062662375, "learning_rate": 3.810252255403768e-08, "loss": 17.517, "step": 52621 }, { "epoch": 0.961887875408997, "grad_norm": 5.640375846080038, "learning_rate": 3.8066056985885394e-08, "loss": 17.0902, "step": 52622 }, { "epoch": 0.9619061546054436, "grad_norm": 6.619529946587579, "learning_rate": 3.802960880888162e-08, "loss": 17.3648, "step": 52623 }, { "epoch": 0.9619244338018901, "grad_norm": 5.014665089335741, "learning_rate": 3.799317802315294e-08, "loss": 17.0424, "step": 52624 }, { "epoch": 0.9619427129983366, "grad_norm": 4.7885574889869575, "learning_rate": 3.795676462882758e-08, "loss": 16.6897, "step": 52625 }, { "epoch": 0.9619609921947831, "grad_norm": 5.825433211774698, "learning_rate": 3.792036862603376e-08, "loss": 17.2412, "step": 52626 }, { "epoch": 0.9619792713912296, "grad_norm": 5.696557607903726, "learning_rate": 3.7883990014898066e-08, "loss": 17.0029, "step": 52627 }, { "epoch": 0.9619975505876762, "grad_norm": 6.044203469161679, "learning_rate": 3.7847628795548706e-08, "loss": 17.1977, "step": 52628 }, { "epoch": 0.9620158297841227, "grad_norm": 7.345594049081008, "learning_rate": 3.781128496811226e-08, "loss": 17.6055, "step": 52629 }, { "epoch": 0.9620341089805692, "grad_norm": 7.887420285227825, "learning_rate": 3.777495853271695e-08, "loss": 17.7532, "step": 52630 }, { "epoch": 0.9620523881770158, "grad_norm": 4.79650652063197, "learning_rate": 3.7738649489489906e-08, "loss": 16.8391, "step": 52631 }, { "epoch": 0.9620706673734623, "grad_norm": 5.857116084016561, "learning_rate": 3.7702357838558244e-08, "loss": 17.1687, "step": 52632 }, { "epoch": 0.9620889465699087, "grad_norm": 7.05735140568775, "learning_rate": 3.7666083580049087e-08, "loss": 17.3887, "step": 52633 }, { "epoch": 0.9621072257663553, "grad_norm": 5.602809878333895, "learning_rate": 3.762982671408955e-08, "loss": 17.0867, "step": 52634 }, { "epoch": 0.9621255049628018, "grad_norm": 5.666985707107854, "learning_rate": 3.759358724080731e-08, "loss": 17.269, "step": 52635 }, { "epoch": 0.9621437841592484, "grad_norm": 6.999164495269836, "learning_rate": 3.755736516032837e-08, "loss": 17.2906, "step": 52636 }, { "epoch": 0.9621620633556949, "grad_norm": 12.381273373471917, "learning_rate": 3.752116047278098e-08, "loss": 17.9423, "step": 52637 }, { "epoch": 0.9621803425521414, "grad_norm": 5.9189231307592785, "learning_rate": 3.7484973178290584e-08, "loss": 17.3598, "step": 52638 }, { "epoch": 0.962198621748588, "grad_norm": 5.332603392388407, "learning_rate": 3.7448803276984856e-08, "loss": 17.2475, "step": 52639 }, { "epoch": 0.9622169009450344, "grad_norm": 7.118641881562837, "learning_rate": 3.741265076899092e-08, "loss": 17.6381, "step": 52640 }, { "epoch": 0.962235180141481, "grad_norm": 6.4055117296793, "learning_rate": 3.7376515654434786e-08, "loss": 17.2927, "step": 52641 }, { "epoch": 0.9622534593379275, "grad_norm": 6.476772772583775, "learning_rate": 3.734039793344302e-08, "loss": 17.5591, "step": 52642 }, { "epoch": 0.962271738534374, "grad_norm": 5.246816806010078, "learning_rate": 3.730429760614329e-08, "loss": 17.0671, "step": 52643 }, { "epoch": 0.9622900177308206, "grad_norm": 6.498373328095167, "learning_rate": 3.726821467266051e-08, "loss": 17.1745, "step": 52644 }, { "epoch": 0.962308296927267, "grad_norm": 6.979828637283626, "learning_rate": 3.723214913312234e-08, "loss": 17.6572, "step": 52645 }, { "epoch": 0.9623265761237136, "grad_norm": 5.441186809739264, "learning_rate": 3.7196100987654806e-08, "loss": 17.2547, "step": 52646 }, { "epoch": 0.9623448553201601, "grad_norm": 7.693526458223717, "learning_rate": 3.716007023638446e-08, "loss": 18.041, "step": 52647 }, { "epoch": 0.9623631345166066, "grad_norm": 6.385627606625376, "learning_rate": 3.712405687943676e-08, "loss": 17.1064, "step": 52648 }, { "epoch": 0.9623814137130532, "grad_norm": 5.284354663915692, "learning_rate": 3.70880609169394e-08, "loss": 17.3235, "step": 52649 }, { "epoch": 0.9623996929094997, "grad_norm": 5.7367991408008026, "learning_rate": 3.705208234901725e-08, "loss": 16.8682, "step": 52650 }, { "epoch": 0.9624179721059463, "grad_norm": 6.205891650025857, "learning_rate": 3.701612117579689e-08, "loss": 17.1005, "step": 52651 }, { "epoch": 0.9624362513023927, "grad_norm": 6.7853107996608335, "learning_rate": 3.6980177397404894e-08, "loss": 17.2648, "step": 52652 }, { "epoch": 0.9624545304988392, "grad_norm": 5.359180948155655, "learning_rate": 3.694425101396615e-08, "loss": 17.1508, "step": 52653 }, { "epoch": 0.9624728096952858, "grad_norm": 5.4949475583577385, "learning_rate": 3.690834202560723e-08, "loss": 17.067, "step": 52654 }, { "epoch": 0.9624910888917323, "grad_norm": 6.376013176290975, "learning_rate": 3.6872450432453577e-08, "loss": 17.3072, "step": 52655 }, { "epoch": 0.9625093680881789, "grad_norm": 6.116487609298027, "learning_rate": 3.683657623463177e-08, "loss": 17.3332, "step": 52656 }, { "epoch": 0.9625276472846254, "grad_norm": 6.964113244026471, "learning_rate": 3.680071943226726e-08, "loss": 17.2082, "step": 52657 }, { "epoch": 0.9625459264810718, "grad_norm": 6.385521779045424, "learning_rate": 3.676488002548495e-08, "loss": 17.7041, "step": 52658 }, { "epoch": 0.9625642056775184, "grad_norm": 7.067334963988395, "learning_rate": 3.67290580144114e-08, "loss": 17.3925, "step": 52659 }, { "epoch": 0.9625824848739649, "grad_norm": 6.230169510436515, "learning_rate": 3.6693253399172066e-08, "loss": 17.0625, "step": 52660 }, { "epoch": 0.9626007640704115, "grad_norm": 5.2185604424037795, "learning_rate": 3.6657466179891856e-08, "loss": 17.1409, "step": 52661 }, { "epoch": 0.962619043266858, "grad_norm": 6.232842901160614, "learning_rate": 3.662169635669621e-08, "loss": 17.4955, "step": 52662 }, { "epoch": 0.9626373224633045, "grad_norm": 5.0691314824826765, "learning_rate": 3.658594392971171e-08, "loss": 16.8154, "step": 52663 }, { "epoch": 0.9626556016597511, "grad_norm": 5.904406435906146, "learning_rate": 3.655020889906213e-08, "loss": 17.1078, "step": 52664 }, { "epoch": 0.9626738808561975, "grad_norm": 5.202128632890689, "learning_rate": 3.651449126487294e-08, "loss": 17.0945, "step": 52665 }, { "epoch": 0.9626921600526441, "grad_norm": 6.4693129305900365, "learning_rate": 3.64787910272707e-08, "loss": 17.7499, "step": 52666 }, { "epoch": 0.9627104392490906, "grad_norm": 6.535090756472981, "learning_rate": 3.644310818637864e-08, "loss": 17.7565, "step": 52667 }, { "epoch": 0.9627287184455371, "grad_norm": 6.3624611692094035, "learning_rate": 3.640744274232333e-08, "loss": 17.1442, "step": 52668 }, { "epoch": 0.9627469976419837, "grad_norm": 5.9937107430366146, "learning_rate": 3.6371794695229665e-08, "loss": 17.1841, "step": 52669 }, { "epoch": 0.9627652768384302, "grad_norm": 6.084819191833562, "learning_rate": 3.633616404522144e-08, "loss": 17.5242, "step": 52670 }, { "epoch": 0.9627835560348768, "grad_norm": 6.257843760596695, "learning_rate": 3.6300550792424115e-08, "loss": 17.4768, "step": 52671 }, { "epoch": 0.9628018352313232, "grad_norm": 6.700459527299738, "learning_rate": 3.6264954936963136e-08, "loss": 17.2089, "step": 52672 }, { "epoch": 0.9628201144277697, "grad_norm": 4.8851739725748935, "learning_rate": 3.62293764789623e-08, "loss": 16.908, "step": 52673 }, { "epoch": 0.9628383936242163, "grad_norm": 5.917316709066088, "learning_rate": 3.6193815418547054e-08, "loss": 16.8732, "step": 52674 }, { "epoch": 0.9628566728206628, "grad_norm": 5.522695530717241, "learning_rate": 3.6158271755841746e-08, "loss": 17.0332, "step": 52675 }, { "epoch": 0.9628749520171094, "grad_norm": 7.958222808193892, "learning_rate": 3.612274549097072e-08, "loss": 17.3724, "step": 52676 }, { "epoch": 0.9628932312135559, "grad_norm": 6.782939439332001, "learning_rate": 3.608723662405944e-08, "loss": 17.1996, "step": 52677 }, { "epoch": 0.9629115104100023, "grad_norm": 5.0278699219101295, "learning_rate": 3.6051745155231135e-08, "loss": 16.7841, "step": 52678 }, { "epoch": 0.9629297896064489, "grad_norm": 7.857728224570705, "learning_rate": 3.60162710846107e-08, "loss": 17.6327, "step": 52679 }, { "epoch": 0.9629480688028954, "grad_norm": 4.924683538811713, "learning_rate": 3.5980814412322487e-08, "loss": 16.9065, "step": 52680 }, { "epoch": 0.962966347999342, "grad_norm": 7.122170170837894, "learning_rate": 3.594537513849083e-08, "loss": 17.7256, "step": 52681 }, { "epoch": 0.9629846271957885, "grad_norm": 4.985513018869553, "learning_rate": 3.590995326323954e-08, "loss": 16.9172, "step": 52682 }, { "epoch": 0.963002906392235, "grad_norm": 6.578592741670827, "learning_rate": 3.58745487866935e-08, "loss": 16.9541, "step": 52683 }, { "epoch": 0.9630211855886815, "grad_norm": 5.889193792156728, "learning_rate": 3.583916170897594e-08, "loss": 17.1175, "step": 52684 }, { "epoch": 0.963039464785128, "grad_norm": 8.726124159086776, "learning_rate": 3.580379203021178e-08, "loss": 17.6026, "step": 52685 }, { "epoch": 0.9630577439815746, "grad_norm": 5.986394125704655, "learning_rate": 3.576843975052424e-08, "loss": 17.1446, "step": 52686 }, { "epoch": 0.9630760231780211, "grad_norm": 4.872898969930226, "learning_rate": 3.5733104870037674e-08, "loss": 16.809, "step": 52687 }, { "epoch": 0.9630943023744676, "grad_norm": 7.583312357704536, "learning_rate": 3.5697787388876414e-08, "loss": 17.7866, "step": 52688 }, { "epoch": 0.9631125815709142, "grad_norm": 5.134875077244558, "learning_rate": 3.566248730716315e-08, "loss": 17.0447, "step": 52689 }, { "epoch": 0.9631308607673607, "grad_norm": 5.731797686054928, "learning_rate": 3.562720462502167e-08, "loss": 17.3085, "step": 52690 }, { "epoch": 0.9631491399638072, "grad_norm": 5.761637308681405, "learning_rate": 3.5591939342576324e-08, "loss": 16.9222, "step": 52691 }, { "epoch": 0.9631674191602537, "grad_norm": 5.583220975896677, "learning_rate": 3.5556691459950335e-08, "loss": 17.0192, "step": 52692 }, { "epoch": 0.9631856983567002, "grad_norm": 6.358136581507694, "learning_rate": 3.55214609772675e-08, "loss": 17.2611, "step": 52693 }, { "epoch": 0.9632039775531468, "grad_norm": 6.659099348404652, "learning_rate": 3.5486247894651605e-08, "loss": 17.4832, "step": 52694 }, { "epoch": 0.9632222567495933, "grad_norm": 6.38495760092356, "learning_rate": 3.545105221222478e-08, "loss": 17.2299, "step": 52695 }, { "epoch": 0.9632405359460399, "grad_norm": 5.215085740025755, "learning_rate": 3.5415873930111924e-08, "loss": 16.9488, "step": 52696 }, { "epoch": 0.9632588151424863, "grad_norm": 6.683929229569487, "learning_rate": 3.538071304843515e-08, "loss": 17.2112, "step": 52697 }, { "epoch": 0.9632770943389328, "grad_norm": 6.580854985441111, "learning_rate": 3.534556956731827e-08, "loss": 17.4036, "step": 52698 }, { "epoch": 0.9632953735353794, "grad_norm": 6.555835010152033, "learning_rate": 3.5310443486884506e-08, "loss": 17.1617, "step": 52699 }, { "epoch": 0.9633136527318259, "grad_norm": 5.301829325155242, "learning_rate": 3.5275334807256535e-08, "loss": 17.0614, "step": 52700 }, { "epoch": 0.9633319319282724, "grad_norm": 6.962956186271601, "learning_rate": 3.5240243528558146e-08, "loss": 17.6237, "step": 52701 }, { "epoch": 0.963350211124719, "grad_norm": 7.503635084238143, "learning_rate": 3.5205169650911476e-08, "loss": 17.6548, "step": 52702 }, { "epoch": 0.9633684903211654, "grad_norm": 5.273517327706745, "learning_rate": 3.517011317443975e-08, "loss": 17.0193, "step": 52703 }, { "epoch": 0.963386769517612, "grad_norm": 7.064995429662004, "learning_rate": 3.51350740992662e-08, "loss": 17.501, "step": 52704 }, { "epoch": 0.9634050487140585, "grad_norm": 6.718641622012398, "learning_rate": 3.5100052425513506e-08, "loss": 17.5821, "step": 52705 }, { "epoch": 0.963423327910505, "grad_norm": 8.406882958342115, "learning_rate": 3.506504815330436e-08, "loss": 18.1278, "step": 52706 }, { "epoch": 0.9634416071069516, "grad_norm": 6.549580050321041, "learning_rate": 3.503006128276087e-08, "loss": 17.3043, "step": 52707 }, { "epoch": 0.9634598863033981, "grad_norm": 5.410374074274115, "learning_rate": 3.4995091814006844e-08, "loss": 16.8544, "step": 52708 }, { "epoch": 0.9634781654998447, "grad_norm": 7.869624803763741, "learning_rate": 3.4960139747163837e-08, "loss": 17.7322, "step": 52709 }, { "epoch": 0.9634964446962911, "grad_norm": 6.597352493529711, "learning_rate": 3.492520508235453e-08, "loss": 17.4285, "step": 52710 }, { "epoch": 0.9635147238927376, "grad_norm": 5.875029473691181, "learning_rate": 3.489028781970161e-08, "loss": 17.3524, "step": 52711 }, { "epoch": 0.9635330030891842, "grad_norm": 6.492569670047542, "learning_rate": 3.485538795932775e-08, "loss": 17.6408, "step": 52712 }, { "epoch": 0.9635512822856307, "grad_norm": 5.283511087135902, "learning_rate": 3.4820505501354516e-08, "loss": 17.0168, "step": 52713 }, { "epoch": 0.9635695614820773, "grad_norm": 5.418370400114099, "learning_rate": 3.47856404459046e-08, "loss": 17.2586, "step": 52714 }, { "epoch": 0.9635878406785238, "grad_norm": 9.182730490482713, "learning_rate": 3.475079279310012e-08, "loss": 17.946, "step": 52715 }, { "epoch": 0.9636061198749702, "grad_norm": 6.101015049792193, "learning_rate": 3.4715962543063196e-08, "loss": 17.2438, "step": 52716 }, { "epoch": 0.9636243990714168, "grad_norm": 5.561776664027408, "learning_rate": 3.4681149695916515e-08, "loss": 17.0896, "step": 52717 }, { "epoch": 0.9636426782678633, "grad_norm": 5.825400456094362, "learning_rate": 3.464635425178109e-08, "loss": 17.1803, "step": 52718 }, { "epoch": 0.9636609574643099, "grad_norm": 6.585189154075762, "learning_rate": 3.461157621077904e-08, "loss": 17.5497, "step": 52719 }, { "epoch": 0.9636792366607564, "grad_norm": 7.325040004126376, "learning_rate": 3.457681557303305e-08, "loss": 17.6822, "step": 52720 }, { "epoch": 0.9636975158572029, "grad_norm": 6.987850266725187, "learning_rate": 3.4542072338664137e-08, "loss": 17.3732, "step": 52721 }, { "epoch": 0.9637157950536495, "grad_norm": 4.760200795375408, "learning_rate": 3.4507346507794416e-08, "loss": 16.8025, "step": 52722 }, { "epoch": 0.9637340742500959, "grad_norm": 5.386254119159572, "learning_rate": 3.447263808054546e-08, "loss": 16.8417, "step": 52723 }, { "epoch": 0.9637523534465425, "grad_norm": 6.876179245058343, "learning_rate": 3.44379470570394e-08, "loss": 17.5663, "step": 52724 }, { "epoch": 0.963770632642989, "grad_norm": 17.10409402623898, "learning_rate": 3.4403273437397244e-08, "loss": 17.2967, "step": 52725 }, { "epoch": 0.9637889118394355, "grad_norm": 6.620192841182476, "learning_rate": 3.436861722174056e-08, "loss": 17.4099, "step": 52726 }, { "epoch": 0.9638071910358821, "grad_norm": 6.588051414002768, "learning_rate": 3.4333978410190926e-08, "loss": 17.0636, "step": 52727 }, { "epoch": 0.9638254702323286, "grad_norm": 6.493970258615406, "learning_rate": 3.4299357002869906e-08, "loss": 17.5377, "step": 52728 }, { "epoch": 0.9638437494287752, "grad_norm": 6.6643962941123505, "learning_rate": 3.426475299989851e-08, "loss": 17.1526, "step": 52729 }, { "epoch": 0.9638620286252216, "grad_norm": 5.384714237438671, "learning_rate": 3.4230166401398865e-08, "loss": 17.0598, "step": 52730 }, { "epoch": 0.9638803078216681, "grad_norm": 5.195971640897261, "learning_rate": 3.419559720749144e-08, "loss": 17.1056, "step": 52731 }, { "epoch": 0.9638985870181147, "grad_norm": 7.558736117055176, "learning_rate": 3.416104541829724e-08, "loss": 17.6432, "step": 52732 }, { "epoch": 0.9639168662145612, "grad_norm": 6.166478355668261, "learning_rate": 3.412651103393783e-08, "loss": 17.4493, "step": 52733 }, { "epoch": 0.9639351454110078, "grad_norm": 6.016953293748084, "learning_rate": 3.409199405453423e-08, "loss": 17.0377, "step": 52734 }, { "epoch": 0.9639534246074543, "grad_norm": 7.046380859687113, "learning_rate": 3.40574944802069e-08, "loss": 17.4993, "step": 52735 }, { "epoch": 0.9639717038039007, "grad_norm": 5.517159611225895, "learning_rate": 3.4023012311077406e-08, "loss": 17.1161, "step": 52736 }, { "epoch": 0.9639899830003473, "grad_norm": 6.296344675419155, "learning_rate": 3.398854754726621e-08, "loss": 17.2448, "step": 52737 }, { "epoch": 0.9640082621967938, "grad_norm": 6.1866051497119985, "learning_rate": 3.395410018889378e-08, "loss": 17.5701, "step": 52738 }, { "epoch": 0.9640265413932404, "grad_norm": 5.994268601204159, "learning_rate": 3.391967023608167e-08, "loss": 17.1529, "step": 52739 }, { "epoch": 0.9640448205896869, "grad_norm": 5.5985727916078085, "learning_rate": 3.388525768895035e-08, "loss": 17.047, "step": 52740 }, { "epoch": 0.9640630997861334, "grad_norm": 5.228252375368383, "learning_rate": 3.3850862547620266e-08, "loss": 17.2039, "step": 52741 }, { "epoch": 0.96408137898258, "grad_norm": 5.407403453452127, "learning_rate": 3.381648481221189e-08, "loss": 16.9721, "step": 52742 }, { "epoch": 0.9640996581790264, "grad_norm": 7.220370821300664, "learning_rate": 3.378212448284623e-08, "loss": 17.7656, "step": 52743 }, { "epoch": 0.964117937375473, "grad_norm": 5.155330944276732, "learning_rate": 3.374778155964265e-08, "loss": 16.9082, "step": 52744 }, { "epoch": 0.9641362165719195, "grad_norm": 6.323111382588979, "learning_rate": 3.371345604272214e-08, "loss": 17.6573, "step": 52745 }, { "epoch": 0.964154495768366, "grad_norm": 5.774822428118674, "learning_rate": 3.367914793220517e-08, "loss": 17.3254, "step": 52746 }, { "epoch": 0.9641727749648126, "grad_norm": 6.421523836631103, "learning_rate": 3.3644857228211645e-08, "loss": 17.0721, "step": 52747 }, { "epoch": 0.964191054161259, "grad_norm": 6.925936461213194, "learning_rate": 3.361058393086203e-08, "loss": 17.7947, "step": 52748 }, { "epoch": 0.9642093333577056, "grad_norm": 6.04174716987789, "learning_rate": 3.357632804027622e-08, "loss": 17.0115, "step": 52749 }, { "epoch": 0.9642276125541521, "grad_norm": 6.193381498421873, "learning_rate": 3.354208955657468e-08, "loss": 17.0675, "step": 52750 }, { "epoch": 0.9642458917505986, "grad_norm": 7.06862944218249, "learning_rate": 3.35078684798773e-08, "loss": 17.2935, "step": 52751 }, { "epoch": 0.9642641709470452, "grad_norm": 6.283711752334615, "learning_rate": 3.347366481030345e-08, "loss": 17.3306, "step": 52752 }, { "epoch": 0.9642824501434917, "grad_norm": 6.150467426717634, "learning_rate": 3.3439478547974134e-08, "loss": 17.194, "step": 52753 }, { "epoch": 0.9643007293399383, "grad_norm": 5.2063251459095445, "learning_rate": 3.340530969300815e-08, "loss": 16.9897, "step": 52754 }, { "epoch": 0.9643190085363847, "grad_norm": 5.65791429022107, "learning_rate": 3.337115824552484e-08, "loss": 17.3125, "step": 52755 }, { "epoch": 0.9643372877328312, "grad_norm": 5.617870289501103, "learning_rate": 3.333702420564522e-08, "loss": 17.132, "step": 52756 }, { "epoch": 0.9643555669292778, "grad_norm": 8.179698216244885, "learning_rate": 3.3302907573488644e-08, "loss": 17.9466, "step": 52757 }, { "epoch": 0.9643738461257243, "grad_norm": 5.436765913172306, "learning_rate": 3.32688083491739e-08, "loss": 16.9964, "step": 52758 }, { "epoch": 0.9643921253221709, "grad_norm": 6.518056382805355, "learning_rate": 3.323472653282145e-08, "loss": 17.0983, "step": 52759 }, { "epoch": 0.9644104045186174, "grad_norm": 9.515493461626365, "learning_rate": 3.320066212454953e-08, "loss": 17.5996, "step": 52760 }, { "epoch": 0.9644286837150639, "grad_norm": 4.744256303128087, "learning_rate": 3.316661512447916e-08, "loss": 16.8727, "step": 52761 }, { "epoch": 0.9644469629115104, "grad_norm": 5.429633180314203, "learning_rate": 3.3132585532728024e-08, "loss": 16.9876, "step": 52762 }, { "epoch": 0.9644652421079569, "grad_norm": 6.165959573278194, "learning_rate": 3.309857334941657e-08, "loss": 16.9941, "step": 52763 }, { "epoch": 0.9644835213044035, "grad_norm": 5.779998353421891, "learning_rate": 3.306457857466361e-08, "loss": 17.2394, "step": 52764 }, { "epoch": 0.96450180050085, "grad_norm": 6.469830444721969, "learning_rate": 3.3030601208588476e-08, "loss": 17.2335, "step": 52765 }, { "epoch": 0.9645200796972965, "grad_norm": 7.140571216644896, "learning_rate": 3.299664125130942e-08, "loss": 17.5251, "step": 52766 }, { "epoch": 0.9645383588937431, "grad_norm": 6.990450619047654, "learning_rate": 3.2962698702946884e-08, "loss": 17.5251, "step": 52767 }, { "epoch": 0.9645566380901895, "grad_norm": 7.6282091588327905, "learning_rate": 3.292877356361857e-08, "loss": 17.4189, "step": 52768 }, { "epoch": 0.964574917286636, "grad_norm": 6.787050325136427, "learning_rate": 3.2894865833444366e-08, "loss": 17.281, "step": 52769 }, { "epoch": 0.9645931964830826, "grad_norm": 6.390242313996987, "learning_rate": 3.286097551254197e-08, "loss": 17.2295, "step": 52770 }, { "epoch": 0.9646114756795291, "grad_norm": 6.798751068408515, "learning_rate": 3.282710260103128e-08, "loss": 17.8286, "step": 52771 }, { "epoch": 0.9646297548759757, "grad_norm": 5.374516008125919, "learning_rate": 3.2793247099030537e-08, "loss": 17.1999, "step": 52772 }, { "epoch": 0.9646480340724222, "grad_norm": 5.563291958547864, "learning_rate": 3.2759409006657974e-08, "loss": 17.1082, "step": 52773 }, { "epoch": 0.9646663132688686, "grad_norm": 5.268706208153645, "learning_rate": 3.27255883240335e-08, "loss": 16.9657, "step": 52774 }, { "epoch": 0.9646845924653152, "grad_norm": 8.012336964419601, "learning_rate": 3.269178505127424e-08, "loss": 17.7563, "step": 52775 }, { "epoch": 0.9647028716617617, "grad_norm": 5.338132921306691, "learning_rate": 3.265799918849954e-08, "loss": 17.0824, "step": 52776 }, { "epoch": 0.9647211508582083, "grad_norm": 5.28307253119494, "learning_rate": 3.26242307358271e-08, "loss": 17.1558, "step": 52777 }, { "epoch": 0.9647394300546548, "grad_norm": 6.572366673146809, "learning_rate": 3.25904796933757e-08, "loss": 17.5235, "step": 52778 }, { "epoch": 0.9647577092511013, "grad_norm": 5.323072010581849, "learning_rate": 3.255674606126413e-08, "loss": 17.1097, "step": 52779 }, { "epoch": 0.9647759884475479, "grad_norm": 5.322688767079443, "learning_rate": 3.252302983960953e-08, "loss": 17.221, "step": 52780 }, { "epoch": 0.9647942676439943, "grad_norm": 6.768599334078588, "learning_rate": 3.2489331028530694e-08, "loss": 17.5077, "step": 52781 }, { "epoch": 0.9648125468404409, "grad_norm": 6.400503683654197, "learning_rate": 3.245564962814585e-08, "loss": 17.5791, "step": 52782 }, { "epoch": 0.9648308260368874, "grad_norm": 5.562683579804067, "learning_rate": 3.242198563857268e-08, "loss": 17.0722, "step": 52783 }, { "epoch": 0.9648491052333339, "grad_norm": 9.561407987473437, "learning_rate": 3.2388339059929994e-08, "loss": 17.9243, "step": 52784 }, { "epoch": 0.9648673844297805, "grad_norm": 5.944431620509779, "learning_rate": 3.2354709892334914e-08, "loss": 16.9729, "step": 52785 }, { "epoch": 0.964885663626227, "grad_norm": 6.103861107709442, "learning_rate": 3.232109813590511e-08, "loss": 17.2905, "step": 52786 }, { "epoch": 0.9649039428226736, "grad_norm": 5.78214994757488, "learning_rate": 3.2287503790758846e-08, "loss": 17.2199, "step": 52787 }, { "epoch": 0.96492222201912, "grad_norm": 6.142046188284588, "learning_rate": 3.225392685701434e-08, "loss": 16.8358, "step": 52788 }, { "epoch": 0.9649405012155665, "grad_norm": 6.34846736394929, "learning_rate": 3.222036733478817e-08, "loss": 17.5619, "step": 52789 }, { "epoch": 0.9649587804120131, "grad_norm": 6.664977408580633, "learning_rate": 3.218682522419858e-08, "loss": 17.1387, "step": 52790 }, { "epoch": 0.9649770596084596, "grad_norm": 7.553630037658124, "learning_rate": 3.215330052536325e-08, "loss": 17.152, "step": 52791 }, { "epoch": 0.9649953388049062, "grad_norm": 6.237784432012593, "learning_rate": 3.211979323839931e-08, "loss": 17.3874, "step": 52792 }, { "epoch": 0.9650136180013527, "grad_norm": 8.473835565539767, "learning_rate": 3.2086303363425e-08, "loss": 18.6574, "step": 52793 }, { "epoch": 0.9650318971977991, "grad_norm": 6.855772518824487, "learning_rate": 3.205283090055633e-08, "loss": 17.5453, "step": 52794 }, { "epoch": 0.9650501763942457, "grad_norm": 6.824418309458932, "learning_rate": 3.2019375849912106e-08, "loss": 17.0842, "step": 52795 }, { "epoch": 0.9650684555906922, "grad_norm": 5.204847268033518, "learning_rate": 3.198593821160834e-08, "loss": 17.048, "step": 52796 }, { "epoch": 0.9650867347871388, "grad_norm": 7.345567174079333, "learning_rate": 3.195251798576271e-08, "loss": 17.5476, "step": 52797 }, { "epoch": 0.9651050139835853, "grad_norm": 6.265503075051071, "learning_rate": 3.191911517249291e-08, "loss": 17.0357, "step": 52798 }, { "epoch": 0.9651232931800318, "grad_norm": 5.441463120184329, "learning_rate": 3.188572977191495e-08, "loss": 17.1515, "step": 52799 }, { "epoch": 0.9651415723764784, "grad_norm": 5.911309354206819, "learning_rate": 3.185236178414653e-08, "loss": 17.3702, "step": 52800 }, { "epoch": 0.9651598515729248, "grad_norm": 5.837195581015756, "learning_rate": 3.181901120930475e-08, "loss": 17.3287, "step": 52801 }, { "epoch": 0.9651781307693714, "grad_norm": 5.409058129702665, "learning_rate": 3.178567804750621e-08, "loss": 17.0814, "step": 52802 }, { "epoch": 0.9651964099658179, "grad_norm": 4.61200309685318, "learning_rate": 3.1752362298867464e-08, "loss": 16.636, "step": 52803 }, { "epoch": 0.9652146891622644, "grad_norm": 4.876818145876309, "learning_rate": 3.171906396350566e-08, "loss": 16.9035, "step": 52804 }, { "epoch": 0.965232968358711, "grad_norm": 7.024771217756758, "learning_rate": 3.1685783041537355e-08, "loss": 17.8715, "step": 52805 }, { "epoch": 0.9652512475551575, "grad_norm": 6.546984295515677, "learning_rate": 3.165251953307913e-08, "loss": 16.9734, "step": 52806 }, { "epoch": 0.965269526751604, "grad_norm": 6.584001846689546, "learning_rate": 3.161927343824811e-08, "loss": 17.4836, "step": 52807 }, { "epoch": 0.9652878059480505, "grad_norm": 5.483231562965791, "learning_rate": 3.1586044757159764e-08, "loss": 16.901, "step": 52808 }, { "epoch": 0.965306085144497, "grad_norm": 5.857683303250769, "learning_rate": 3.1552833489931766e-08, "loss": 17.011, "step": 52809 }, { "epoch": 0.9653243643409436, "grad_norm": 6.561463174680487, "learning_rate": 3.151963963667959e-08, "loss": 17.4269, "step": 52810 }, { "epoch": 0.9653426435373901, "grad_norm": 5.852399887962319, "learning_rate": 3.1486463197520354e-08, "loss": 17.2347, "step": 52811 }, { "epoch": 0.9653609227338367, "grad_norm": 6.719952844331417, "learning_rate": 3.145330417256953e-08, "loss": 17.6249, "step": 52812 }, { "epoch": 0.9653792019302831, "grad_norm": 6.298727642514143, "learning_rate": 3.142016256194425e-08, "loss": 17.2788, "step": 52813 }, { "epoch": 0.9653974811267296, "grad_norm": 7.311294816274932, "learning_rate": 3.13870383657594e-08, "loss": 17.3024, "step": 52814 }, { "epoch": 0.9654157603231762, "grad_norm": 4.944763187711334, "learning_rate": 3.135393158413214e-08, "loss": 16.7129, "step": 52815 }, { "epoch": 0.9654340395196227, "grad_norm": 6.611112587990509, "learning_rate": 3.132084221717846e-08, "loss": 17.5984, "step": 52816 }, { "epoch": 0.9654523187160693, "grad_norm": 5.690102463268421, "learning_rate": 3.128777026501384e-08, "loss": 17.111, "step": 52817 }, { "epoch": 0.9654705979125158, "grad_norm": 5.993844219423959, "learning_rate": 3.125471572775429e-08, "loss": 16.8249, "step": 52818 }, { "epoch": 0.9654888771089623, "grad_norm": 6.521382745677108, "learning_rate": 3.122167860551584e-08, "loss": 17.2795, "step": 52819 }, { "epoch": 0.9655071563054088, "grad_norm": 5.8679590378859015, "learning_rate": 3.1188658898414494e-08, "loss": 17.0529, "step": 52820 }, { "epoch": 0.9655254355018553, "grad_norm": 5.939372687361832, "learning_rate": 3.115565660656572e-08, "loss": 17.3181, "step": 52821 }, { "epoch": 0.9655437146983019, "grad_norm": 6.364059156829163, "learning_rate": 3.112267173008443e-08, "loss": 17.3764, "step": 52822 }, { "epoch": 0.9655619938947484, "grad_norm": 6.979691827877456, "learning_rate": 3.108970426908775e-08, "loss": 17.4946, "step": 52823 }, { "epoch": 0.9655802730911949, "grad_norm": 5.990244345629018, "learning_rate": 3.105675422369059e-08, "loss": 17.2912, "step": 52824 }, { "epoch": 0.9655985522876415, "grad_norm": 7.284629972295712, "learning_rate": 3.1023821594007854e-08, "loss": 17.5395, "step": 52825 }, { "epoch": 0.965616831484088, "grad_norm": 6.4648636659361625, "learning_rate": 3.0990906380156115e-08, "loss": 17.3663, "step": 52826 }, { "epoch": 0.9656351106805345, "grad_norm": 5.055593627652412, "learning_rate": 3.095800858224918e-08, "loss": 16.9856, "step": 52827 }, { "epoch": 0.965653389876981, "grad_norm": 6.728708820809569, "learning_rate": 3.092512820040361e-08, "loss": 17.5325, "step": 52828 }, { "epoch": 0.9656716690734275, "grad_norm": 6.0303907522959, "learning_rate": 3.089226523473432e-08, "loss": 17.3364, "step": 52829 }, { "epoch": 0.9656899482698741, "grad_norm": 5.9437679754446275, "learning_rate": 3.085941968535622e-08, "loss": 17.4247, "step": 52830 }, { "epoch": 0.9657082274663206, "grad_norm": 6.588306776807874, "learning_rate": 3.082659155238477e-08, "loss": 17.3926, "step": 52831 }, { "epoch": 0.9657265066627672, "grad_norm": 6.346471747967066, "learning_rate": 3.0793780835935426e-08, "loss": 17.5359, "step": 52832 }, { "epoch": 0.9657447858592136, "grad_norm": 6.977004720745134, "learning_rate": 3.0760987536122e-08, "loss": 17.1351, "step": 52833 }, { "epoch": 0.9657630650556601, "grad_norm": 6.4759094845196845, "learning_rate": 3.07282116530605e-08, "loss": 17.2571, "step": 52834 }, { "epoch": 0.9657813442521067, "grad_norm": 5.762478260946994, "learning_rate": 3.069545318686529e-08, "loss": 17.0846, "step": 52835 }, { "epoch": 0.9657996234485532, "grad_norm": 5.359601636950296, "learning_rate": 3.066271213765126e-08, "loss": 16.9823, "step": 52836 }, { "epoch": 0.9658179026449997, "grad_norm": 6.842294048969077, "learning_rate": 3.062998850553278e-08, "loss": 17.2396, "step": 52837 }, { "epoch": 0.9658361818414463, "grad_norm": 5.090287168736122, "learning_rate": 3.0597282290625306e-08, "loss": 16.9577, "step": 52838 }, { "epoch": 0.9658544610378927, "grad_norm": 5.508100917115154, "learning_rate": 3.056459349304319e-08, "loss": 16.8452, "step": 52839 }, { "epoch": 0.9658727402343393, "grad_norm": 7.366666617150781, "learning_rate": 3.0531922112901346e-08, "loss": 17.644, "step": 52840 }, { "epoch": 0.9658910194307858, "grad_norm": 7.544714888522759, "learning_rate": 3.049926815031357e-08, "loss": 17.889, "step": 52841 }, { "epoch": 0.9659092986272323, "grad_norm": 6.0489726221615125, "learning_rate": 3.04666316053942e-08, "loss": 17.2567, "step": 52842 }, { "epoch": 0.9659275778236789, "grad_norm": 6.3144426016279835, "learning_rate": 3.043401247825872e-08, "loss": 17.1442, "step": 52843 }, { "epoch": 0.9659458570201254, "grad_norm": 7.666788740468835, "learning_rate": 3.0401410769020366e-08, "loss": 17.7025, "step": 52844 }, { "epoch": 0.965964136216572, "grad_norm": 6.965604112248207, "learning_rate": 3.0368826477794043e-08, "loss": 17.5062, "step": 52845 }, { "epoch": 0.9659824154130184, "grad_norm": 5.642077497285389, "learning_rate": 3.033625960469355e-08, "loss": 17.1556, "step": 52846 }, { "epoch": 0.9660006946094649, "grad_norm": 6.995911858315361, "learning_rate": 3.0303710149833244e-08, "loss": 17.3313, "step": 52847 }, { "epoch": 0.9660189738059115, "grad_norm": 5.859224254715706, "learning_rate": 3.027117811332692e-08, "loss": 17.1308, "step": 52848 }, { "epoch": 0.966037253002358, "grad_norm": 7.011052210235502, "learning_rate": 3.023866349528948e-08, "loss": 17.3888, "step": 52849 }, { "epoch": 0.9660555321988046, "grad_norm": 6.174560582498008, "learning_rate": 3.020616629583417e-08, "loss": 17.2309, "step": 52850 }, { "epoch": 0.9660738113952511, "grad_norm": 6.124744267908667, "learning_rate": 3.017368651507424e-08, "loss": 17.1105, "step": 52851 }, { "epoch": 0.9660920905916975, "grad_norm": 6.468261042893325, "learning_rate": 3.014122415312515e-08, "loss": 17.3355, "step": 52852 }, { "epoch": 0.9661103697881441, "grad_norm": 8.291593250677252, "learning_rate": 3.010877921009958e-08, "loss": 17.7127, "step": 52853 }, { "epoch": 0.9661286489845906, "grad_norm": 4.830254937887846, "learning_rate": 3.007635168611134e-08, "loss": 16.9673, "step": 52854 }, { "epoch": 0.9661469281810372, "grad_norm": 5.854195507786266, "learning_rate": 3.0043941581274215e-08, "loss": 17.2757, "step": 52855 }, { "epoch": 0.9661652073774837, "grad_norm": 6.181464448029339, "learning_rate": 3.001154889570146e-08, "loss": 17.2961, "step": 52856 }, { "epoch": 0.9661834865739302, "grad_norm": 5.611853349371095, "learning_rate": 2.997917362950742e-08, "loss": 17.2566, "step": 52857 }, { "epoch": 0.9662017657703768, "grad_norm": 5.628619675766215, "learning_rate": 2.994681578280534e-08, "loss": 16.8699, "step": 52858 }, { "epoch": 0.9662200449668232, "grad_norm": 6.401682620199858, "learning_rate": 2.991447535570791e-08, "loss": 17.492, "step": 52859 }, { "epoch": 0.9662383241632698, "grad_norm": 7.069782478011986, "learning_rate": 2.988215234832892e-08, "loss": 17.2414, "step": 52860 }, { "epoch": 0.9662566033597163, "grad_norm": 4.721011435949157, "learning_rate": 2.984984676078217e-08, "loss": 16.8572, "step": 52861 }, { "epoch": 0.9662748825561628, "grad_norm": 6.7765559019315225, "learning_rate": 2.981755859317981e-08, "loss": 17.5573, "step": 52862 }, { "epoch": 0.9662931617526094, "grad_norm": 6.3258531155367255, "learning_rate": 2.9785287845636167e-08, "loss": 17.2517, "step": 52863 }, { "epoch": 0.9663114409490559, "grad_norm": 6.537008644470074, "learning_rate": 2.975303451826339e-08, "loss": 17.1537, "step": 52864 }, { "epoch": 0.9663297201455024, "grad_norm": 5.895911092373636, "learning_rate": 2.9720798611175274e-08, "loss": 17.0911, "step": 52865 }, { "epoch": 0.9663479993419489, "grad_norm": 5.068237205472065, "learning_rate": 2.9688580124483946e-08, "loss": 16.9815, "step": 52866 }, { "epoch": 0.9663662785383954, "grad_norm": 6.218049133212801, "learning_rate": 2.965637905830321e-08, "loss": 17.3416, "step": 52867 }, { "epoch": 0.966384557734842, "grad_norm": 6.490880480148268, "learning_rate": 2.962419541274575e-08, "loss": 17.2259, "step": 52868 }, { "epoch": 0.9664028369312885, "grad_norm": 7.289720598251215, "learning_rate": 2.95920291879237e-08, "loss": 17.5866, "step": 52869 }, { "epoch": 0.9664211161277351, "grad_norm": 5.190865689824825, "learning_rate": 2.9559880383950856e-08, "loss": 17.0085, "step": 52870 }, { "epoch": 0.9664393953241815, "grad_norm": 6.8159464441103275, "learning_rate": 2.9527749000938798e-08, "loss": 17.882, "step": 52871 }, { "epoch": 0.966457674520628, "grad_norm": 6.488469851920421, "learning_rate": 2.949563503900077e-08, "loss": 17.361, "step": 52872 }, { "epoch": 0.9664759537170746, "grad_norm": 5.316456752627359, "learning_rate": 2.9463538498249456e-08, "loss": 16.9633, "step": 52873 }, { "epoch": 0.9664942329135211, "grad_norm": 7.580982441258695, "learning_rate": 2.943145937879699e-08, "loss": 17.9455, "step": 52874 }, { "epoch": 0.9665125121099677, "grad_norm": 6.177938428339976, "learning_rate": 2.939939768075606e-08, "loss": 17.4025, "step": 52875 }, { "epoch": 0.9665307913064142, "grad_norm": 6.698002321000668, "learning_rate": 2.9367353404238243e-08, "loss": 17.3677, "step": 52876 }, { "epoch": 0.9665490705028607, "grad_norm": 6.9409996088264405, "learning_rate": 2.9335326549357335e-08, "loss": 17.5286, "step": 52877 }, { "epoch": 0.9665673496993072, "grad_norm": 5.647426105526226, "learning_rate": 2.930331711622436e-08, "loss": 17.1288, "step": 52878 }, { "epoch": 0.9665856288957537, "grad_norm": 6.133112292750458, "learning_rate": 2.927132510495201e-08, "loss": 17.5837, "step": 52879 }, { "epoch": 0.9666039080922003, "grad_norm": 5.260242212892267, "learning_rate": 2.923935051565241e-08, "loss": 16.8643, "step": 52880 }, { "epoch": 0.9666221872886468, "grad_norm": 6.299576307944228, "learning_rate": 2.9207393348437694e-08, "loss": 17.3697, "step": 52881 }, { "epoch": 0.9666404664850933, "grad_norm": 8.142173692540563, "learning_rate": 2.9175453603419446e-08, "loss": 18.0038, "step": 52882 }, { "epoch": 0.9666587456815399, "grad_norm": 5.254358290854852, "learning_rate": 2.914353128070979e-08, "loss": 17.0585, "step": 52883 }, { "epoch": 0.9666770248779863, "grad_norm": 7.4949443691668955, "learning_rate": 2.9111626380420866e-08, "loss": 17.5781, "step": 52884 }, { "epoch": 0.9666953040744329, "grad_norm": 7.688149190201651, "learning_rate": 2.9079738902664244e-08, "loss": 17.7021, "step": 52885 }, { "epoch": 0.9667135832708794, "grad_norm": 6.5519801884418, "learning_rate": 2.9047868847552063e-08, "loss": 17.3501, "step": 52886 }, { "epoch": 0.9667318624673259, "grad_norm": 6.236024059312964, "learning_rate": 2.9016016215195343e-08, "loss": 17.2284, "step": 52887 }, { "epoch": 0.9667501416637725, "grad_norm": 7.354908550204929, "learning_rate": 2.8984181005706214e-08, "loss": 17.9049, "step": 52888 }, { "epoch": 0.966768420860219, "grad_norm": 6.260374475457745, "learning_rate": 2.895236321919681e-08, "loss": 17.4034, "step": 52889 }, { "epoch": 0.9667867000566656, "grad_norm": 5.630864405176635, "learning_rate": 2.8920562855777044e-08, "loss": 17.2023, "step": 52890 }, { "epoch": 0.966804979253112, "grad_norm": 5.746544789605753, "learning_rate": 2.888877991556016e-08, "loss": 17.304, "step": 52891 }, { "epoch": 0.9668232584495585, "grad_norm": 5.789152836612897, "learning_rate": 2.885701439865607e-08, "loss": 16.9596, "step": 52892 }, { "epoch": 0.9668415376460051, "grad_norm": 7.621406580710854, "learning_rate": 2.8825266305176902e-08, "loss": 17.7092, "step": 52893 }, { "epoch": 0.9668598168424516, "grad_norm": 7.647223605692752, "learning_rate": 2.879353563523424e-08, "loss": 17.9125, "step": 52894 }, { "epoch": 0.9668780960388982, "grad_norm": 6.611875073321863, "learning_rate": 2.8761822388938543e-08, "loss": 17.3773, "step": 52895 }, { "epoch": 0.9668963752353447, "grad_norm": 5.5110141048664065, "learning_rate": 2.873012656640084e-08, "loss": 17.0681, "step": 52896 }, { "epoch": 0.9669146544317911, "grad_norm": 5.672131856455679, "learning_rate": 2.8698448167733263e-08, "loss": 17.0338, "step": 52897 }, { "epoch": 0.9669329336282377, "grad_norm": 6.746577435293833, "learning_rate": 2.8666787193046276e-08, "loss": 17.1719, "step": 52898 }, { "epoch": 0.9669512128246842, "grad_norm": 7.093920460393027, "learning_rate": 2.8635143642450347e-08, "loss": 17.2889, "step": 52899 }, { "epoch": 0.9669694920211308, "grad_norm": 5.226498653716406, "learning_rate": 2.8603517516057054e-08, "loss": 16.8957, "step": 52900 }, { "epoch": 0.9669877712175773, "grad_norm": 4.841368210751378, "learning_rate": 2.8571908813976868e-08, "loss": 16.7286, "step": 52901 }, { "epoch": 0.9670060504140238, "grad_norm": 5.0945598687329765, "learning_rate": 2.854031753632136e-08, "loss": 17.0788, "step": 52902 }, { "epoch": 0.9670243296104704, "grad_norm": 9.423072056197947, "learning_rate": 2.8508743683199892e-08, "loss": 18.0796, "step": 52903 }, { "epoch": 0.9670426088069168, "grad_norm": 6.3518649758098675, "learning_rate": 2.8477187254724037e-08, "loss": 17.5046, "step": 52904 }, { "epoch": 0.9670608880033633, "grad_norm": 7.360909974877433, "learning_rate": 2.844564825100482e-08, "loss": 17.0385, "step": 52905 }, { "epoch": 0.9670791671998099, "grad_norm": 6.230592883085525, "learning_rate": 2.84141266721516e-08, "loss": 17.1132, "step": 52906 }, { "epoch": 0.9670974463962564, "grad_norm": 5.907998519160562, "learning_rate": 2.8382622518275395e-08, "loss": 17.3229, "step": 52907 }, { "epoch": 0.967115725592703, "grad_norm": 6.7076157917502925, "learning_rate": 2.8351135789487226e-08, "loss": 17.4506, "step": 52908 }, { "epoch": 0.9671340047891495, "grad_norm": 6.5119161926584255, "learning_rate": 2.8319666485896457e-08, "loss": 17.5743, "step": 52909 }, { "epoch": 0.9671522839855959, "grad_norm": 5.520831369363806, "learning_rate": 2.8288214607614105e-08, "loss": 17.3878, "step": 52910 }, { "epoch": 0.9671705631820425, "grad_norm": 8.224166888767872, "learning_rate": 2.8256780154750086e-08, "loss": 17.8643, "step": 52911 }, { "epoch": 0.967188842378489, "grad_norm": 7.041585983101588, "learning_rate": 2.822536312741431e-08, "loss": 17.4119, "step": 52912 }, { "epoch": 0.9672071215749356, "grad_norm": 5.243677788637086, "learning_rate": 2.8193963525717238e-08, "loss": 16.9779, "step": 52913 }, { "epoch": 0.9672254007713821, "grad_norm": 6.698391327457894, "learning_rate": 2.8162581349769346e-08, "loss": 17.7803, "step": 52914 }, { "epoch": 0.9672436799678286, "grad_norm": 5.749464811174992, "learning_rate": 2.813121659967999e-08, "loss": 17.255, "step": 52915 }, { "epoch": 0.9672619591642752, "grad_norm": 6.987728930620933, "learning_rate": 2.8099869275559076e-08, "loss": 17.5309, "step": 52916 }, { "epoch": 0.9672802383607216, "grad_norm": 5.467344610104428, "learning_rate": 2.806853937751708e-08, "loss": 17.0572, "step": 52917 }, { "epoch": 0.9672985175571682, "grad_norm": 6.475317403749835, "learning_rate": 2.8037226905662795e-08, "loss": 17.6025, "step": 52918 }, { "epoch": 0.9673167967536147, "grad_norm": 5.775286575149274, "learning_rate": 2.8005931860106693e-08, "loss": 17.0615, "step": 52919 }, { "epoch": 0.9673350759500612, "grad_norm": 8.721458171085061, "learning_rate": 2.7974654240958688e-08, "loss": 18.0281, "step": 52920 }, { "epoch": 0.9673533551465078, "grad_norm": 6.923746800466604, "learning_rate": 2.794339404832813e-08, "loss": 17.6129, "step": 52921 }, { "epoch": 0.9673716343429543, "grad_norm": 5.482166538483747, "learning_rate": 2.7912151282324383e-08, "loss": 16.9353, "step": 52922 }, { "epoch": 0.9673899135394008, "grad_norm": 5.177267603341751, "learning_rate": 2.7880925943056803e-08, "loss": 17.2097, "step": 52923 }, { "epoch": 0.9674081927358473, "grad_norm": 7.333076805135264, "learning_rate": 2.7849718030635296e-08, "loss": 17.7307, "step": 52924 }, { "epoch": 0.9674264719322938, "grad_norm": 6.977362085679152, "learning_rate": 2.781852754516923e-08, "loss": 17.2856, "step": 52925 }, { "epoch": 0.9674447511287404, "grad_norm": 5.102485137752475, "learning_rate": 2.7787354486767947e-08, "loss": 16.9018, "step": 52926 }, { "epoch": 0.9674630303251869, "grad_norm": 5.618362475171114, "learning_rate": 2.7756198855540262e-08, "loss": 17.0851, "step": 52927 }, { "epoch": 0.9674813095216335, "grad_norm": 5.898517683337133, "learning_rate": 2.7725060651595524e-08, "loss": 17.1533, "step": 52928 }, { "epoch": 0.96749958871808, "grad_norm": 5.769835726770127, "learning_rate": 2.769393987504254e-08, "loss": 17.1873, "step": 52929 }, { "epoch": 0.9675178679145264, "grad_norm": 5.761367184071958, "learning_rate": 2.7662836525991775e-08, "loss": 17.1094, "step": 52930 }, { "epoch": 0.967536147110973, "grad_norm": 6.963248611595272, "learning_rate": 2.763175060455092e-08, "loss": 17.557, "step": 52931 }, { "epoch": 0.9675544263074195, "grad_norm": 6.415826012111673, "learning_rate": 2.760068211082878e-08, "loss": 17.207, "step": 52932 }, { "epoch": 0.9675727055038661, "grad_norm": 7.282862379809352, "learning_rate": 2.7569631044935263e-08, "loss": 17.6576, "step": 52933 }, { "epoch": 0.9675909847003126, "grad_norm": 6.3471464729092695, "learning_rate": 2.753859740697862e-08, "loss": 17.4116, "step": 52934 }, { "epoch": 0.967609263896759, "grad_norm": 6.33937713479307, "learning_rate": 2.7507581197067644e-08, "loss": 17.1141, "step": 52935 }, { "epoch": 0.9676275430932056, "grad_norm": 6.812569177419989, "learning_rate": 2.7476582415311703e-08, "loss": 17.4596, "step": 52936 }, { "epoch": 0.9676458222896521, "grad_norm": 6.461453535926906, "learning_rate": 2.744560106181793e-08, "loss": 17.3327, "step": 52937 }, { "epoch": 0.9676641014860987, "grad_norm": 7.460026630477066, "learning_rate": 2.7414637136696233e-08, "loss": 17.4845, "step": 52938 }, { "epoch": 0.9676823806825452, "grad_norm": 4.399419735770575, "learning_rate": 2.738369064005486e-08, "loss": 16.7295, "step": 52939 }, { "epoch": 0.9677006598789917, "grad_norm": 6.118992380949779, "learning_rate": 2.735276157200206e-08, "loss": 17.1211, "step": 52940 }, { "epoch": 0.9677189390754383, "grad_norm": 5.483784879987828, "learning_rate": 2.7321849932646084e-08, "loss": 16.9732, "step": 52941 }, { "epoch": 0.9677372182718847, "grad_norm": 6.359031478408383, "learning_rate": 2.7290955722095725e-08, "loss": 17.2579, "step": 52942 }, { "epoch": 0.9677554974683313, "grad_norm": 6.6703282574661555, "learning_rate": 2.7260078940459234e-08, "loss": 17.2586, "step": 52943 }, { "epoch": 0.9677737766647778, "grad_norm": 4.617162595684565, "learning_rate": 2.7229219587844302e-08, "loss": 16.7663, "step": 52944 }, { "epoch": 0.9677920558612243, "grad_norm": 7.016161368125849, "learning_rate": 2.719837766435973e-08, "loss": 17.4671, "step": 52945 }, { "epoch": 0.9678103350576709, "grad_norm": 6.64915239561256, "learning_rate": 2.7167553170113215e-08, "loss": 17.4871, "step": 52946 }, { "epoch": 0.9678286142541174, "grad_norm": 5.057655187162303, "learning_rate": 2.7136746105212442e-08, "loss": 16.9827, "step": 52947 }, { "epoch": 0.967846893450564, "grad_norm": 7.2577955430518895, "learning_rate": 2.7105956469766213e-08, "loss": 17.6676, "step": 52948 }, { "epoch": 0.9678651726470104, "grad_norm": 5.191330283605918, "learning_rate": 2.7075184263882227e-08, "loss": 16.9383, "step": 52949 }, { "epoch": 0.9678834518434569, "grad_norm": 5.755990770867772, "learning_rate": 2.7044429487668167e-08, "loss": 17.0389, "step": 52950 }, { "epoch": 0.9679017310399035, "grad_norm": 6.909576677292397, "learning_rate": 2.701369214123173e-08, "loss": 17.2569, "step": 52951 }, { "epoch": 0.96792001023635, "grad_norm": 8.162924832812045, "learning_rate": 2.6982972224680603e-08, "loss": 17.9114, "step": 52952 }, { "epoch": 0.9679382894327966, "grad_norm": 5.247987318233303, "learning_rate": 2.6952269738123038e-08, "loss": 17.0176, "step": 52953 }, { "epoch": 0.9679565686292431, "grad_norm": 6.316863811309373, "learning_rate": 2.692158468166617e-08, "loss": 17.1162, "step": 52954 }, { "epoch": 0.9679748478256895, "grad_norm": 5.623103015164644, "learning_rate": 2.6890917055417686e-08, "loss": 17.2374, "step": 52955 }, { "epoch": 0.9679931270221361, "grad_norm": 6.268519896391774, "learning_rate": 2.6860266859484175e-08, "loss": 17.2894, "step": 52956 }, { "epoch": 0.9680114062185826, "grad_norm": 6.479380811348294, "learning_rate": 2.682963409397499e-08, "loss": 17.2377, "step": 52957 }, { "epoch": 0.9680296854150292, "grad_norm": 7.298892381325504, "learning_rate": 2.6799018758995598e-08, "loss": 17.8551, "step": 52958 }, { "epoch": 0.9680479646114757, "grad_norm": 5.298947033182543, "learning_rate": 2.6768420854654252e-08, "loss": 17.1035, "step": 52959 }, { "epoch": 0.9680662438079222, "grad_norm": 4.404557481652781, "learning_rate": 2.673784038105809e-08, "loss": 16.6094, "step": 52960 }, { "epoch": 0.9680845230043688, "grad_norm": 8.607209057084136, "learning_rate": 2.670727733831424e-08, "loss": 17.4753, "step": 52961 }, { "epoch": 0.9681028022008152, "grad_norm": 5.336246046511117, "learning_rate": 2.6676731726530402e-08, "loss": 16.9818, "step": 52962 }, { "epoch": 0.9681210813972618, "grad_norm": 8.583411131431784, "learning_rate": 2.6646203545812044e-08, "loss": 18.2507, "step": 52963 }, { "epoch": 0.9681393605937083, "grad_norm": 5.731992701416247, "learning_rate": 2.6615692796267413e-08, "loss": 17.3135, "step": 52964 }, { "epoch": 0.9681576397901548, "grad_norm": 6.171628217408918, "learning_rate": 2.658519947800364e-08, "loss": 17.3728, "step": 52965 }, { "epoch": 0.9681759189866014, "grad_norm": 7.24818010615123, "learning_rate": 2.6554723591126764e-08, "loss": 17.0825, "step": 52966 }, { "epoch": 0.9681941981830479, "grad_norm": 9.349343666868501, "learning_rate": 2.6524265135744466e-08, "loss": 18.0735, "step": 52967 }, { "epoch": 0.9682124773794945, "grad_norm": 5.592679979001269, "learning_rate": 2.6493824111962773e-08, "loss": 17.1605, "step": 52968 }, { "epoch": 0.9682307565759409, "grad_norm": 5.688891740969518, "learning_rate": 2.6463400519888827e-08, "loss": 17.2143, "step": 52969 }, { "epoch": 0.9682490357723874, "grad_norm": 5.766428434452851, "learning_rate": 2.643299435962865e-08, "loss": 16.9641, "step": 52970 }, { "epoch": 0.968267314968834, "grad_norm": 5.553267893073807, "learning_rate": 2.6402605631289935e-08, "loss": 17.0805, "step": 52971 }, { "epoch": 0.9682855941652805, "grad_norm": 6.466808005296855, "learning_rate": 2.6372234334977597e-08, "loss": 17.301, "step": 52972 }, { "epoch": 0.968303873361727, "grad_norm": 5.195955900416953, "learning_rate": 2.6341880470799332e-08, "loss": 16.854, "step": 52973 }, { "epoch": 0.9683221525581736, "grad_norm": 5.882248666806722, "learning_rate": 2.6311544038861158e-08, "loss": 16.9972, "step": 52974 }, { "epoch": 0.96834043175462, "grad_norm": 7.386313997112991, "learning_rate": 2.628122503927022e-08, "loss": 17.4423, "step": 52975 }, { "epoch": 0.9683587109510666, "grad_norm": 5.791064404833686, "learning_rate": 2.625092347213143e-08, "loss": 16.8938, "step": 52976 }, { "epoch": 0.9683769901475131, "grad_norm": 6.57507982380957, "learning_rate": 2.6220639337550813e-08, "loss": 17.4194, "step": 52977 }, { "epoch": 0.9683952693439596, "grad_norm": 7.665453502611878, "learning_rate": 2.6190372635636064e-08, "loss": 17.2922, "step": 52978 }, { "epoch": 0.9684135485404062, "grad_norm": 4.6921753214033854, "learning_rate": 2.616012336649265e-08, "loss": 16.7372, "step": 52979 }, { "epoch": 0.9684318277368527, "grad_norm": 7.333504293028897, "learning_rate": 2.612989153022605e-08, "loss": 17.6008, "step": 52980 }, { "epoch": 0.9684501069332992, "grad_norm": 5.953485428263137, "learning_rate": 2.609967712694228e-08, "loss": 17.1477, "step": 52981 }, { "epoch": 0.9684683861297457, "grad_norm": 5.025200006771105, "learning_rate": 2.6069480156747928e-08, "loss": 16.9303, "step": 52982 }, { "epoch": 0.9684866653261922, "grad_norm": 7.016753428292441, "learning_rate": 2.603930061974791e-08, "loss": 17.5837, "step": 52983 }, { "epoch": 0.9685049445226388, "grad_norm": 6.816654489216413, "learning_rate": 2.600913851604936e-08, "loss": 17.3855, "step": 52984 }, { "epoch": 0.9685232237190853, "grad_norm": 5.644246215567006, "learning_rate": 2.597899384575664e-08, "loss": 17.0906, "step": 52985 }, { "epoch": 0.9685415029155319, "grad_norm": 4.933768531775658, "learning_rate": 2.5948866608975777e-08, "loss": 16.8139, "step": 52986 }, { "epoch": 0.9685597821119784, "grad_norm": 7.650216315541118, "learning_rate": 2.5918756805812794e-08, "loss": 17.2991, "step": 52987 }, { "epoch": 0.9685780613084248, "grad_norm": 8.120419933539324, "learning_rate": 2.5888664436373166e-08, "loss": 17.6464, "step": 52988 }, { "epoch": 0.9685963405048714, "grad_norm": 6.2145710103839065, "learning_rate": 2.585858950076181e-08, "loss": 17.3028, "step": 52989 }, { "epoch": 0.9686146197013179, "grad_norm": 5.748457782933903, "learning_rate": 2.5828531999084194e-08, "loss": 17.1463, "step": 52990 }, { "epoch": 0.9686328988977645, "grad_norm": 8.243319567301077, "learning_rate": 2.5798491931446346e-08, "loss": 17.6187, "step": 52991 }, { "epoch": 0.968651178094211, "grad_norm": 5.800667965776388, "learning_rate": 2.5768469297952624e-08, "loss": 17.4674, "step": 52992 }, { "epoch": 0.9686694572906575, "grad_norm": 7.400047218934078, "learning_rate": 2.5738464098709614e-08, "loss": 17.6148, "step": 52993 }, { "epoch": 0.968687736487104, "grad_norm": 7.315515427924646, "learning_rate": 2.570847633382112e-08, "loss": 17.7178, "step": 52994 }, { "epoch": 0.9687060156835505, "grad_norm": 5.404376779004061, "learning_rate": 2.567850600339261e-08, "loss": 16.9331, "step": 52995 }, { "epoch": 0.9687242948799971, "grad_norm": 5.178426538767146, "learning_rate": 2.5648553107529562e-08, "loss": 16.8741, "step": 52996 }, { "epoch": 0.9687425740764436, "grad_norm": 5.313328452084244, "learning_rate": 2.5618617646336884e-08, "loss": 16.9998, "step": 52997 }, { "epoch": 0.9687608532728901, "grad_norm": 6.495846062041949, "learning_rate": 2.5588699619918944e-08, "loss": 17.2597, "step": 52998 }, { "epoch": 0.9687791324693367, "grad_norm": 5.763439419826637, "learning_rate": 2.5558799028381208e-08, "loss": 17.0164, "step": 52999 }, { "epoch": 0.9687974116657831, "grad_norm": 5.99948557974721, "learning_rate": 2.5528915871828043e-08, "loss": 17.1768, "step": 53000 }, { "epoch": 0.9688156908622297, "grad_norm": 5.557580846785018, "learning_rate": 2.5499050150364357e-08, "loss": 17.0741, "step": 53001 }, { "epoch": 0.9688339700586762, "grad_norm": 6.1813677788759165, "learning_rate": 2.5469201864094516e-08, "loss": 17.3109, "step": 53002 }, { "epoch": 0.9688522492551227, "grad_norm": 5.367787607080088, "learning_rate": 2.543937101312399e-08, "loss": 17.0127, "step": 53003 }, { "epoch": 0.9688705284515693, "grad_norm": 5.03547197273783, "learning_rate": 2.540955759755659e-08, "loss": 17.04, "step": 53004 }, { "epoch": 0.9688888076480158, "grad_norm": 6.022137368123797, "learning_rate": 2.5379761617496666e-08, "loss": 16.9228, "step": 53005 }, { "epoch": 0.9689070868444624, "grad_norm": 5.68878219346409, "learning_rate": 2.53499830730497e-08, "loss": 16.933, "step": 53006 }, { "epoch": 0.9689253660409088, "grad_norm": 6.697129645193093, "learning_rate": 2.5320221964318935e-08, "loss": 17.9144, "step": 53007 }, { "epoch": 0.9689436452373553, "grad_norm": 6.234684002516318, "learning_rate": 2.529047829140929e-08, "loss": 17.2252, "step": 53008 }, { "epoch": 0.9689619244338019, "grad_norm": 5.644809813934453, "learning_rate": 2.526075205442513e-08, "loss": 17.1783, "step": 53009 }, { "epoch": 0.9689802036302484, "grad_norm": 5.637490196962324, "learning_rate": 2.52310432534697e-08, "loss": 16.9673, "step": 53010 }, { "epoch": 0.968998482826695, "grad_norm": 6.744312049311823, "learning_rate": 2.520135188864792e-08, "loss": 17.3771, "step": 53011 }, { "epoch": 0.9690167620231415, "grad_norm": 5.01488556791557, "learning_rate": 2.517167796006359e-08, "loss": 16.7972, "step": 53012 }, { "epoch": 0.969035041219588, "grad_norm": 7.323780786667778, "learning_rate": 2.5142021467821076e-08, "loss": 17.6296, "step": 53013 }, { "epoch": 0.9690533204160345, "grad_norm": 6.850178301498573, "learning_rate": 2.5112382412024185e-08, "loss": 18.1103, "step": 53014 }, { "epoch": 0.969071599612481, "grad_norm": 5.87333093481714, "learning_rate": 2.5082760792776718e-08, "loss": 17.369, "step": 53015 }, { "epoch": 0.9690898788089276, "grad_norm": 5.5351108888886005, "learning_rate": 2.5053156610182484e-08, "loss": 17.1511, "step": 53016 }, { "epoch": 0.9691081580053741, "grad_norm": 5.1688818456278804, "learning_rate": 2.5023569864344734e-08, "loss": 16.8998, "step": 53017 }, { "epoch": 0.9691264372018206, "grad_norm": 6.1457801069256766, "learning_rate": 2.499400055536838e-08, "loss": 17.2964, "step": 53018 }, { "epoch": 0.9691447163982672, "grad_norm": 7.650854606127955, "learning_rate": 2.4964448683355568e-08, "loss": 17.6124, "step": 53019 }, { "epoch": 0.9691629955947136, "grad_norm": 5.804821181473385, "learning_rate": 2.4934914248411212e-08, "loss": 17.2171, "step": 53020 }, { "epoch": 0.9691812747911602, "grad_norm": 6.695556078667013, "learning_rate": 2.490539725063801e-08, "loss": 17.4027, "step": 53021 }, { "epoch": 0.9691995539876067, "grad_norm": 6.135870288939307, "learning_rate": 2.487589769013976e-08, "loss": 17.2036, "step": 53022 }, { "epoch": 0.9692178331840532, "grad_norm": 5.847925327701627, "learning_rate": 2.4846415567019723e-08, "loss": 17.2895, "step": 53023 }, { "epoch": 0.9692361123804998, "grad_norm": 6.465333100029066, "learning_rate": 2.4816950881381695e-08, "loss": 17.5536, "step": 53024 }, { "epoch": 0.9692543915769463, "grad_norm": 6.092923838555123, "learning_rate": 2.4787503633327825e-08, "loss": 17.1766, "step": 53025 }, { "epoch": 0.9692726707733929, "grad_norm": 6.807131433036195, "learning_rate": 2.475807382296247e-08, "loss": 17.8339, "step": 53026 }, { "epoch": 0.9692909499698393, "grad_norm": 6.8421556469988705, "learning_rate": 2.4728661450388326e-08, "loss": 17.5569, "step": 53027 }, { "epoch": 0.9693092291662858, "grad_norm": 6.506122250004051, "learning_rate": 2.4699266515708086e-08, "loss": 17.7774, "step": 53028 }, { "epoch": 0.9693275083627324, "grad_norm": 11.495343247867552, "learning_rate": 2.466988901902556e-08, "loss": 17.9134, "step": 53029 }, { "epoch": 0.9693457875591789, "grad_norm": 6.280200340365319, "learning_rate": 2.4640528960443443e-08, "loss": 17.1173, "step": 53030 }, { "epoch": 0.9693640667556255, "grad_norm": 7.035622299361294, "learning_rate": 2.4611186340063874e-08, "loss": 17.1304, "step": 53031 }, { "epoch": 0.969382345952072, "grad_norm": 7.6521334301162325, "learning_rate": 2.4581861157991216e-08, "loss": 17.8962, "step": 53032 }, { "epoch": 0.9694006251485184, "grad_norm": 5.2375133743313045, "learning_rate": 2.455255341432705e-08, "loss": 16.8872, "step": 53033 }, { "epoch": 0.969418904344965, "grad_norm": 6.04388544226357, "learning_rate": 2.4523263109174078e-08, "loss": 17.0876, "step": 53034 }, { "epoch": 0.9694371835414115, "grad_norm": 5.531301352102367, "learning_rate": 2.4493990242635547e-08, "loss": 16.9264, "step": 53035 }, { "epoch": 0.9694554627378581, "grad_norm": 6.292225018516545, "learning_rate": 2.4464734814814152e-08, "loss": 17.554, "step": 53036 }, { "epoch": 0.9694737419343046, "grad_norm": 5.930640186288458, "learning_rate": 2.443549682581148e-08, "loss": 17.2865, "step": 53037 }, { "epoch": 0.9694920211307511, "grad_norm": 6.0055288889736325, "learning_rate": 2.4406276275731332e-08, "loss": 17.3651, "step": 53038 }, { "epoch": 0.9695103003271976, "grad_norm": 5.860780041935867, "learning_rate": 2.4377073164674746e-08, "loss": 16.9596, "step": 53039 }, { "epoch": 0.9695285795236441, "grad_norm": 4.7664921171166155, "learning_rate": 2.4347887492745526e-08, "loss": 16.9264, "step": 53040 }, { "epoch": 0.9695468587200906, "grad_norm": 9.032431394787432, "learning_rate": 2.4318719260044697e-08, "loss": 17.8191, "step": 53041 }, { "epoch": 0.9695651379165372, "grad_norm": 5.367063426497306, "learning_rate": 2.4289568466674962e-08, "loss": 17.1233, "step": 53042 }, { "epoch": 0.9695834171129837, "grad_norm": 5.670482855044799, "learning_rate": 2.4260435112738457e-08, "loss": 17.0802, "step": 53043 }, { "epoch": 0.9696016963094303, "grad_norm": 5.86968125513203, "learning_rate": 2.4231319198337876e-08, "loss": 17.3768, "step": 53044 }, { "epoch": 0.9696199755058768, "grad_norm": 5.731389465125828, "learning_rate": 2.4202220723574254e-08, "loss": 17.3805, "step": 53045 }, { "epoch": 0.9696382547023232, "grad_norm": 5.373188941552916, "learning_rate": 2.417313968854973e-08, "loss": 17.2286, "step": 53046 }, { "epoch": 0.9696565338987698, "grad_norm": 6.454279143639432, "learning_rate": 2.4144076093367554e-08, "loss": 16.9755, "step": 53047 }, { "epoch": 0.9696748130952163, "grad_norm": 5.797685308133243, "learning_rate": 2.4115029938127642e-08, "loss": 17.2292, "step": 53048 }, { "epoch": 0.9696930922916629, "grad_norm": 6.55910859434037, "learning_rate": 2.4086001222933253e-08, "loss": 17.4225, "step": 53049 }, { "epoch": 0.9697113714881094, "grad_norm": 7.397688422283084, "learning_rate": 2.4056989947885965e-08, "loss": 17.8623, "step": 53050 }, { "epoch": 0.9697296506845559, "grad_norm": 5.3029634511500525, "learning_rate": 2.402799611308626e-08, "loss": 16.6559, "step": 53051 }, { "epoch": 0.9697479298810024, "grad_norm": 6.653082133633415, "learning_rate": 2.3999019718637385e-08, "loss": 17.6065, "step": 53052 }, { "epoch": 0.9697662090774489, "grad_norm": 5.6221584102085025, "learning_rate": 2.3970060764639814e-08, "loss": 17.3534, "step": 53053 }, { "epoch": 0.9697844882738955, "grad_norm": 6.121182400891627, "learning_rate": 2.3941119251195688e-08, "loss": 17.174, "step": 53054 }, { "epoch": 0.969802767470342, "grad_norm": 6.1600323973938105, "learning_rate": 2.3912195178405483e-08, "loss": 17.1686, "step": 53055 }, { "epoch": 0.9698210466667885, "grad_norm": 9.054200030553249, "learning_rate": 2.3883288546371898e-08, "loss": 17.7347, "step": 53056 }, { "epoch": 0.9698393258632351, "grad_norm": 9.162477784905604, "learning_rate": 2.3854399355195403e-08, "loss": 18.2348, "step": 53057 }, { "epoch": 0.9698576050596815, "grad_norm": 5.961478081515165, "learning_rate": 2.382552760497703e-08, "loss": 17.2493, "step": 53058 }, { "epoch": 0.9698758842561281, "grad_norm": 7.008698244563523, "learning_rate": 2.3796673295818918e-08, "loss": 17.7713, "step": 53059 }, { "epoch": 0.9698941634525746, "grad_norm": 5.254131895038461, "learning_rate": 2.3767836427821544e-08, "loss": 17.0093, "step": 53060 }, { "epoch": 0.9699124426490211, "grad_norm": 6.301818541101638, "learning_rate": 2.373901700108594e-08, "loss": 17.3014, "step": 53061 }, { "epoch": 0.9699307218454677, "grad_norm": 6.701453482990777, "learning_rate": 2.3710215015713134e-08, "loss": 17.5378, "step": 53062 }, { "epoch": 0.9699490010419142, "grad_norm": 6.081554205320462, "learning_rate": 2.368143047180471e-08, "loss": 17.2212, "step": 53063 }, { "epoch": 0.9699672802383608, "grad_norm": 5.111176825528198, "learning_rate": 2.3652663369460593e-08, "loss": 17.0729, "step": 53064 }, { "epoch": 0.9699855594348072, "grad_norm": 6.5451123441417, "learning_rate": 2.362391370878181e-08, "loss": 17.19, "step": 53065 }, { "epoch": 0.9700038386312537, "grad_norm": 6.1461139766847435, "learning_rate": 2.3595181489869946e-08, "loss": 17.6443, "step": 53066 }, { "epoch": 0.9700221178277003, "grad_norm": 6.193103705221204, "learning_rate": 2.356646671282492e-08, "loss": 17.3144, "step": 53067 }, { "epoch": 0.9700403970241468, "grad_norm": 7.045061365351263, "learning_rate": 2.3537769377747212e-08, "loss": 17.7609, "step": 53068 }, { "epoch": 0.9700586762205934, "grad_norm": 5.3540750543401305, "learning_rate": 2.35090894847384e-08, "loss": 16.9993, "step": 53069 }, { "epoch": 0.9700769554170399, "grad_norm": 6.535852131921939, "learning_rate": 2.348042703389841e-08, "loss": 17.5666, "step": 53070 }, { "epoch": 0.9700952346134863, "grad_norm": 7.852528978099888, "learning_rate": 2.3451782025327162e-08, "loss": 17.9124, "step": 53071 }, { "epoch": 0.9701135138099329, "grad_norm": 7.896664647056745, "learning_rate": 2.3423154459126242e-08, "loss": 18.2757, "step": 53072 }, { "epoch": 0.9701317930063794, "grad_norm": 5.810787800168184, "learning_rate": 2.3394544335394454e-08, "loss": 16.6399, "step": 53073 }, { "epoch": 0.970150072202826, "grad_norm": 6.2396726875996125, "learning_rate": 2.336595165423339e-08, "loss": 17.4745, "step": 53074 }, { "epoch": 0.9701683513992725, "grad_norm": 6.338417395887025, "learning_rate": 2.3337376415742964e-08, "loss": 17.4345, "step": 53075 }, { "epoch": 0.970186630595719, "grad_norm": 7.0288369174248135, "learning_rate": 2.33088186200231e-08, "loss": 18.0362, "step": 53076 }, { "epoch": 0.9702049097921656, "grad_norm": 10.67720298877449, "learning_rate": 2.3280278267173716e-08, "loss": 18.6396, "step": 53077 }, { "epoch": 0.970223188988612, "grad_norm": 5.34231168367277, "learning_rate": 2.3251755357295292e-08, "loss": 17.0736, "step": 53078 }, { "epoch": 0.9702414681850586, "grad_norm": 6.414673876104946, "learning_rate": 2.322324989048774e-08, "loss": 17.1815, "step": 53079 }, { "epoch": 0.9702597473815051, "grad_norm": 5.223064504313483, "learning_rate": 2.319476186685099e-08, "loss": 17.05, "step": 53080 }, { "epoch": 0.9702780265779516, "grad_norm": 6.066277867791852, "learning_rate": 2.31662912864844e-08, "loss": 17.2205, "step": 53081 }, { "epoch": 0.9702963057743982, "grad_norm": 6.660413764807081, "learning_rate": 2.313783814948789e-08, "loss": 17.4478, "step": 53082 }, { "epoch": 0.9703145849708447, "grad_norm": 8.18195658490181, "learning_rate": 2.3109402455961384e-08, "loss": 17.746, "step": 53083 }, { "epoch": 0.9703328641672913, "grad_norm": 5.203881115712482, "learning_rate": 2.3080984206004797e-08, "loss": 16.8513, "step": 53084 }, { "epoch": 0.9703511433637377, "grad_norm": 4.163794428174504, "learning_rate": 2.30525833997175e-08, "loss": 16.5317, "step": 53085 }, { "epoch": 0.9703694225601842, "grad_norm": 5.0026551837405036, "learning_rate": 2.3024200037199406e-08, "loss": 16.8651, "step": 53086 }, { "epoch": 0.9703877017566308, "grad_norm": 5.622015007020334, "learning_rate": 2.2995834118548775e-08, "loss": 17.3359, "step": 53087 }, { "epoch": 0.9704059809530773, "grad_norm": 6.524551411854706, "learning_rate": 2.2967485643866638e-08, "loss": 17.4291, "step": 53088 }, { "epoch": 0.9704242601495239, "grad_norm": 6.2865774860166, "learning_rate": 2.2939154613251246e-08, "loss": 17.5019, "step": 53089 }, { "epoch": 0.9704425393459704, "grad_norm": 7.492310973057174, "learning_rate": 2.2910841026801967e-08, "loss": 17.4184, "step": 53090 }, { "epoch": 0.9704608185424168, "grad_norm": 6.464668343210391, "learning_rate": 2.2882544884618162e-08, "loss": 17.5182, "step": 53091 }, { "epoch": 0.9704790977388634, "grad_norm": 5.766626938772126, "learning_rate": 2.2854266186799755e-08, "loss": 17.2314, "step": 53092 }, { "epoch": 0.9704973769353099, "grad_norm": 6.244287623548871, "learning_rate": 2.2826004933445002e-08, "loss": 17.4213, "step": 53093 }, { "epoch": 0.9705156561317565, "grad_norm": 6.2638777014916105, "learning_rate": 2.2797761124653263e-08, "loss": 17.4643, "step": 53094 }, { "epoch": 0.970533935328203, "grad_norm": 6.882981651363176, "learning_rate": 2.276953476052335e-08, "loss": 17.4973, "step": 53095 }, { "epoch": 0.9705522145246495, "grad_norm": 7.098304229573753, "learning_rate": 2.2741325841154627e-08, "loss": 17.3042, "step": 53096 }, { "epoch": 0.970570493721096, "grad_norm": 6.517346273525568, "learning_rate": 2.271313436664535e-08, "loss": 17.3123, "step": 53097 }, { "epoch": 0.9705887729175425, "grad_norm": 8.618386839710842, "learning_rate": 2.2684960337094332e-08, "loss": 18.881, "step": 53098 }, { "epoch": 0.9706070521139891, "grad_norm": 7.396398246098829, "learning_rate": 2.2656803752601485e-08, "loss": 17.4075, "step": 53099 }, { "epoch": 0.9706253313104356, "grad_norm": 6.607317755701598, "learning_rate": 2.2628664613263963e-08, "loss": 17.3561, "step": 53100 }, { "epoch": 0.9706436105068821, "grad_norm": 5.655236371821424, "learning_rate": 2.2600542919181124e-08, "loss": 17.1095, "step": 53101 }, { "epoch": 0.9706618897033287, "grad_norm": 7.5123819145168955, "learning_rate": 2.2572438670451224e-08, "loss": 17.6419, "step": 53102 }, { "epoch": 0.9706801688997752, "grad_norm": 5.674682985468237, "learning_rate": 2.254435186717363e-08, "loss": 17.2389, "step": 53103 }, { "epoch": 0.9706984480962217, "grad_norm": 5.217232532782689, "learning_rate": 2.2516282509445486e-08, "loss": 16.9587, "step": 53104 }, { "epoch": 0.9707167272926682, "grad_norm": 6.610676484888033, "learning_rate": 2.248823059736671e-08, "loss": 17.1299, "step": 53105 }, { "epoch": 0.9707350064891147, "grad_norm": 6.827835090046102, "learning_rate": 2.2460196131033896e-08, "loss": 17.3457, "step": 53106 }, { "epoch": 0.9707532856855613, "grad_norm": 5.282768783291262, "learning_rate": 2.2432179110546405e-08, "loss": 16.9897, "step": 53107 }, { "epoch": 0.9707715648820078, "grad_norm": 8.106887663110369, "learning_rate": 2.240417953600249e-08, "loss": 18.1646, "step": 53108 }, { "epoch": 0.9707898440784543, "grad_norm": 5.998813925491317, "learning_rate": 2.2376197407499856e-08, "loss": 16.88, "step": 53109 }, { "epoch": 0.9708081232749008, "grad_norm": 6.768468176877247, "learning_rate": 2.2348232725136754e-08, "loss": 16.9602, "step": 53110 }, { "epoch": 0.9708264024713473, "grad_norm": 8.776124861148766, "learning_rate": 2.2320285489010883e-08, "loss": 18.0001, "step": 53111 }, { "epoch": 0.9708446816677939, "grad_norm": 6.493497996362861, "learning_rate": 2.2292355699220503e-08, "loss": 17.2971, "step": 53112 }, { "epoch": 0.9708629608642404, "grad_norm": 6.683302691268763, "learning_rate": 2.2264443355863862e-08, "loss": 17.372, "step": 53113 }, { "epoch": 0.9708812400606869, "grad_norm": 6.7449347321429105, "learning_rate": 2.223654845903811e-08, "loss": 17.4034, "step": 53114 }, { "epoch": 0.9708995192571335, "grad_norm": 6.23814462061178, "learning_rate": 2.2208671008841497e-08, "loss": 17.4294, "step": 53115 }, { "epoch": 0.97091779845358, "grad_norm": 5.963776748876484, "learning_rate": 2.2180811005371173e-08, "loss": 17.1536, "step": 53116 }, { "epoch": 0.9709360776500265, "grad_norm": 6.07507778186047, "learning_rate": 2.2152968448725388e-08, "loss": 17.3347, "step": 53117 }, { "epoch": 0.970954356846473, "grad_norm": 5.984784849122551, "learning_rate": 2.2125143339001287e-08, "loss": 16.8825, "step": 53118 }, { "epoch": 0.9709726360429195, "grad_norm": 7.880370203684146, "learning_rate": 2.209733567629713e-08, "loss": 17.5067, "step": 53119 }, { "epoch": 0.9709909152393661, "grad_norm": 7.465339350926075, "learning_rate": 2.2069545460709497e-08, "loss": 17.6393, "step": 53120 }, { "epoch": 0.9710091944358126, "grad_norm": 6.640271310468, "learning_rate": 2.2041772692336094e-08, "loss": 17.4931, "step": 53121 }, { "epoch": 0.9710274736322592, "grad_norm": 7.578339174447139, "learning_rate": 2.2014017371274065e-08, "loss": 17.7768, "step": 53122 }, { "epoch": 0.9710457528287056, "grad_norm": 6.844158879296019, "learning_rate": 2.1986279497621664e-08, "loss": 17.5714, "step": 53123 }, { "epoch": 0.9710640320251521, "grad_norm": 5.698823957416545, "learning_rate": 2.1958559071474926e-08, "loss": 17.148, "step": 53124 }, { "epoch": 0.9710823112215987, "grad_norm": 6.632990963696064, "learning_rate": 2.193085609293155e-08, "loss": 17.4459, "step": 53125 }, { "epoch": 0.9711005904180452, "grad_norm": 7.106659734831643, "learning_rate": 2.1903170562088128e-08, "loss": 17.5231, "step": 53126 }, { "epoch": 0.9711188696144918, "grad_norm": 5.331562118004495, "learning_rate": 2.1875502479042356e-08, "loss": 16.9691, "step": 53127 }, { "epoch": 0.9711371488109383, "grad_norm": 6.557990481570753, "learning_rate": 2.1847851843891375e-08, "loss": 17.4125, "step": 53128 }, { "epoch": 0.9711554280073847, "grad_norm": 6.151499299232312, "learning_rate": 2.182021865673123e-08, "loss": 17.4681, "step": 53129 }, { "epoch": 0.9711737072038313, "grad_norm": 5.710839587450762, "learning_rate": 2.179260291765961e-08, "loss": 17.3626, "step": 53130 }, { "epoch": 0.9711919864002778, "grad_norm": 6.533775376028727, "learning_rate": 2.1765004626772556e-08, "loss": 17.4369, "step": 53131 }, { "epoch": 0.9712102655967244, "grad_norm": 5.783556983010407, "learning_rate": 2.173742378416721e-08, "loss": 17.0998, "step": 53132 }, { "epoch": 0.9712285447931709, "grad_norm": 6.760367224249547, "learning_rate": 2.170986038994072e-08, "loss": 17.1887, "step": 53133 }, { "epoch": 0.9712468239896174, "grad_norm": 4.886091458493665, "learning_rate": 2.1682314444188557e-08, "loss": 16.7906, "step": 53134 }, { "epoch": 0.971265103186064, "grad_norm": 5.357402149028351, "learning_rate": 2.1654785947007874e-08, "loss": 16.9428, "step": 53135 }, { "epoch": 0.9712833823825104, "grad_norm": 6.37237219016331, "learning_rate": 2.162727489849581e-08, "loss": 17.6604, "step": 53136 }, { "epoch": 0.971301661578957, "grad_norm": 7.578900982743392, "learning_rate": 2.159978129874729e-08, "loss": 17.6055, "step": 53137 }, { "epoch": 0.9713199407754035, "grad_norm": 7.249866021363189, "learning_rate": 2.1572305147860018e-08, "loss": 17.6311, "step": 53138 }, { "epoch": 0.97133821997185, "grad_norm": 6.526071559241874, "learning_rate": 2.1544846445930023e-08, "loss": 17.5272, "step": 53139 }, { "epoch": 0.9713564991682966, "grad_norm": 7.402090144860617, "learning_rate": 2.1517405193052788e-08, "loss": 17.707, "step": 53140 }, { "epoch": 0.9713747783647431, "grad_norm": 6.841024862153777, "learning_rate": 2.1489981389325452e-08, "loss": 17.2601, "step": 53141 }, { "epoch": 0.9713930575611897, "grad_norm": 6.844271013063335, "learning_rate": 2.1462575034843503e-08, "loss": 17.6659, "step": 53142 }, { "epoch": 0.9714113367576361, "grad_norm": 6.391635789543398, "learning_rate": 2.143518612970297e-08, "loss": 17.3156, "step": 53143 }, { "epoch": 0.9714296159540826, "grad_norm": 6.904141712276355, "learning_rate": 2.1407814674000993e-08, "loss": 17.3274, "step": 53144 }, { "epoch": 0.9714478951505292, "grad_norm": 6.008429740108199, "learning_rate": 2.1380460667831392e-08, "loss": 17.0985, "step": 53145 }, { "epoch": 0.9714661743469757, "grad_norm": 5.620838137895015, "learning_rate": 2.1353124111292422e-08, "loss": 16.9796, "step": 53146 }, { "epoch": 0.9714844535434223, "grad_norm": 6.998898357248854, "learning_rate": 2.1325805004477895e-08, "loss": 17.5642, "step": 53147 }, { "epoch": 0.9715027327398688, "grad_norm": 7.659058630658926, "learning_rate": 2.1298503347484955e-08, "loss": 17.9957, "step": 53148 }, { "epoch": 0.9715210119363152, "grad_norm": 5.306477556812452, "learning_rate": 2.1271219140407972e-08, "loss": 17.1391, "step": 53149 }, { "epoch": 0.9715392911327618, "grad_norm": 5.663447854729858, "learning_rate": 2.124395238334409e-08, "loss": 17.2202, "step": 53150 }, { "epoch": 0.9715575703292083, "grad_norm": 6.065446401476723, "learning_rate": 2.121670307638768e-08, "loss": 17.1483, "step": 53151 }, { "epoch": 0.9715758495256549, "grad_norm": 7.325636007069318, "learning_rate": 2.1189471219634773e-08, "loss": 17.9997, "step": 53152 }, { "epoch": 0.9715941287221014, "grad_norm": 5.568075492972169, "learning_rate": 2.116225681318085e-08, "loss": 16.984, "step": 53153 }, { "epoch": 0.9716124079185479, "grad_norm": 6.08639893696772, "learning_rate": 2.1135059857121387e-08, "loss": 17.1856, "step": 53154 }, { "epoch": 0.9716306871149945, "grad_norm": 6.12172691415763, "learning_rate": 2.110788035155076e-08, "loss": 17.3986, "step": 53155 }, { "epoch": 0.9716489663114409, "grad_norm": 4.670513191870177, "learning_rate": 2.108071829656555e-08, "loss": 16.8196, "step": 53156 }, { "epoch": 0.9716672455078875, "grad_norm": 6.439354746539615, "learning_rate": 2.1053573692260686e-08, "loss": 17.0012, "step": 53157 }, { "epoch": 0.971685524704334, "grad_norm": 5.529110546464656, "learning_rate": 2.1026446538729983e-08, "loss": 17.2223, "step": 53158 }, { "epoch": 0.9717038039007805, "grad_norm": 5.828515984517943, "learning_rate": 2.0999336836070584e-08, "loss": 17.0811, "step": 53159 }, { "epoch": 0.9717220830972271, "grad_norm": 7.963987214741718, "learning_rate": 2.0972244584375745e-08, "loss": 18.2136, "step": 53160 }, { "epoch": 0.9717403622936736, "grad_norm": 6.359988413889942, "learning_rate": 2.0945169783741505e-08, "loss": 17.5714, "step": 53161 }, { "epoch": 0.9717586414901201, "grad_norm": 5.5491891513118174, "learning_rate": 2.0918112434262226e-08, "loss": 16.9643, "step": 53162 }, { "epoch": 0.9717769206865666, "grad_norm": 5.119413575207395, "learning_rate": 2.089107253603284e-08, "loss": 16.8879, "step": 53163 }, { "epoch": 0.9717951998830131, "grad_norm": 7.457671179786524, "learning_rate": 2.086405008914827e-08, "loss": 17.6641, "step": 53164 }, { "epoch": 0.9718134790794597, "grad_norm": 5.230822162344054, "learning_rate": 2.083704509370288e-08, "loss": 17.0094, "step": 53165 }, { "epoch": 0.9718317582759062, "grad_norm": 6.919366128041227, "learning_rate": 2.0810057549791597e-08, "loss": 17.6298, "step": 53166 }, { "epoch": 0.9718500374723528, "grad_norm": 5.601482354267096, "learning_rate": 2.0783087457509345e-08, "loss": 17.0961, "step": 53167 }, { "epoch": 0.9718683166687992, "grad_norm": 7.535217622034806, "learning_rate": 2.075613481695049e-08, "loss": 17.9121, "step": 53168 }, { "epoch": 0.9718865958652457, "grad_norm": 5.4019427826577555, "learning_rate": 2.072919962820885e-08, "loss": 16.8696, "step": 53169 }, { "epoch": 0.9719048750616923, "grad_norm": 6.5696686825254895, "learning_rate": 2.070228189137935e-08, "loss": 17.5168, "step": 53170 }, { "epoch": 0.9719231542581388, "grad_norm": 6.469146820449671, "learning_rate": 2.0675381606556357e-08, "loss": 17.4535, "step": 53171 }, { "epoch": 0.9719414334545854, "grad_norm": 7.207548593332612, "learning_rate": 2.064849877383368e-08, "loss": 17.6078, "step": 53172 }, { "epoch": 0.9719597126510319, "grad_norm": 5.668080706759707, "learning_rate": 2.062163339330625e-08, "loss": 17.1865, "step": 53173 }, { "epoch": 0.9719779918474784, "grad_norm": 6.271903517600793, "learning_rate": 2.0594785465067878e-08, "loss": 17.259, "step": 53174 }, { "epoch": 0.9719962710439249, "grad_norm": 5.59127121314871, "learning_rate": 2.0567954989212934e-08, "loss": 17.1092, "step": 53175 }, { "epoch": 0.9720145502403714, "grad_norm": 6.528597140276758, "learning_rate": 2.054114196583523e-08, "loss": 17.3996, "step": 53176 }, { "epoch": 0.9720328294368179, "grad_norm": 5.932430001358502, "learning_rate": 2.0514346395028028e-08, "loss": 17.2809, "step": 53177 }, { "epoch": 0.9720511086332645, "grad_norm": 6.524731938844607, "learning_rate": 2.0487568276886804e-08, "loss": 17.583, "step": 53178 }, { "epoch": 0.972069387829711, "grad_norm": 5.346576951834657, "learning_rate": 2.0460807611504264e-08, "loss": 16.922, "step": 53179 }, { "epoch": 0.9720876670261576, "grad_norm": 5.283255090804738, "learning_rate": 2.0434064398974217e-08, "loss": 17.2415, "step": 53180 }, { "epoch": 0.972105946222604, "grad_norm": 6.762997454508122, "learning_rate": 2.040733863939104e-08, "loss": 17.3451, "step": 53181 }, { "epoch": 0.9721242254190505, "grad_norm": 6.643954261013491, "learning_rate": 2.038063033284743e-08, "loss": 17.3953, "step": 53182 }, { "epoch": 0.9721425046154971, "grad_norm": 7.119018742844483, "learning_rate": 2.0353939479437756e-08, "loss": 17.6209, "step": 53183 }, { "epoch": 0.9721607838119436, "grad_norm": 5.725305623811809, "learning_rate": 2.0327266079255837e-08, "loss": 17.2425, "step": 53184 }, { "epoch": 0.9721790630083902, "grad_norm": 7.871121514731281, "learning_rate": 2.0300610132394928e-08, "loss": 17.6408, "step": 53185 }, { "epoch": 0.9721973422048367, "grad_norm": 5.659779865812848, "learning_rate": 2.027397163894773e-08, "loss": 17.1246, "step": 53186 }, { "epoch": 0.9722156214012831, "grad_norm": 7.148838892760138, "learning_rate": 2.0247350599008063e-08, "loss": 18.0788, "step": 53187 }, { "epoch": 0.9722339005977297, "grad_norm": 5.891832358364813, "learning_rate": 2.022074701266974e-08, "loss": 16.9759, "step": 53188 }, { "epoch": 0.9722521797941762, "grad_norm": 6.745878279700208, "learning_rate": 2.019416088002546e-08, "loss": 17.5334, "step": 53189 }, { "epoch": 0.9722704589906228, "grad_norm": 7.555972691278532, "learning_rate": 2.0167592201168483e-08, "loss": 18.0559, "step": 53190 }, { "epoch": 0.9722887381870693, "grad_norm": 5.027600757231112, "learning_rate": 2.0141040976192072e-08, "loss": 16.8394, "step": 53191 }, { "epoch": 0.9723070173835158, "grad_norm": 6.226843420444934, "learning_rate": 2.011450720518837e-08, "loss": 16.9133, "step": 53192 }, { "epoch": 0.9723252965799624, "grad_norm": 4.922795197705618, "learning_rate": 2.0087990888252308e-08, "loss": 16.902, "step": 53193 }, { "epoch": 0.9723435757764088, "grad_norm": 7.387508847879486, "learning_rate": 2.0061492025474914e-08, "loss": 17.9572, "step": 53194 }, { "epoch": 0.9723618549728554, "grad_norm": 6.053339777482244, "learning_rate": 2.003501061695057e-08, "loss": 17.0953, "step": 53195 }, { "epoch": 0.9723801341693019, "grad_norm": 6.5878563940197115, "learning_rate": 2.0008546662770857e-08, "loss": 17.5568, "step": 53196 }, { "epoch": 0.9723984133657484, "grad_norm": 10.730394732787152, "learning_rate": 1.998210016302904e-08, "loss": 18.14, "step": 53197 }, { "epoch": 0.972416692562195, "grad_norm": 6.173826988223421, "learning_rate": 1.995567111781782e-08, "loss": 17.1377, "step": 53198 }, { "epoch": 0.9724349717586415, "grad_norm": 6.550519736577318, "learning_rate": 1.992925952723046e-08, "loss": 17.3869, "step": 53199 }, { "epoch": 0.972453250955088, "grad_norm": 7.180038120393775, "learning_rate": 1.9902865391357994e-08, "loss": 17.5671, "step": 53200 }, { "epoch": 0.9724715301515345, "grad_norm": 5.865724649863933, "learning_rate": 1.987648871029424e-08, "loss": 17.2989, "step": 53201 }, { "epoch": 0.972489809347981, "grad_norm": 6.116280583329639, "learning_rate": 1.9850129484131343e-08, "loss": 17.3944, "step": 53202 }, { "epoch": 0.9725080885444276, "grad_norm": 7.124313880071417, "learning_rate": 1.982378771296145e-08, "loss": 17.4488, "step": 53203 }, { "epoch": 0.9725263677408741, "grad_norm": 4.562642525890064, "learning_rate": 1.9797463396876716e-08, "loss": 16.9249, "step": 53204 }, { "epoch": 0.9725446469373207, "grad_norm": 5.556670003527565, "learning_rate": 1.9771156535969837e-08, "loss": 17.1843, "step": 53205 }, { "epoch": 0.9725629261337672, "grad_norm": 4.9874594671182075, "learning_rate": 1.9744867130333522e-08, "loss": 16.7973, "step": 53206 }, { "epoch": 0.9725812053302136, "grad_norm": 6.772805875344274, "learning_rate": 1.971859518005881e-08, "loss": 17.5102, "step": 53207 }, { "epoch": 0.9725994845266602, "grad_norm": 6.152861652878553, "learning_rate": 1.96923406852384e-08, "loss": 17.4288, "step": 53208 }, { "epoch": 0.9726177637231067, "grad_norm": 5.408751623582065, "learning_rate": 1.9666103645963884e-08, "loss": 16.9285, "step": 53209 }, { "epoch": 0.9726360429195533, "grad_norm": 5.039338828650618, "learning_rate": 1.963988406232742e-08, "loss": 17.0327, "step": 53210 }, { "epoch": 0.9726543221159998, "grad_norm": 6.080922627298397, "learning_rate": 1.9613681934421148e-08, "loss": 17.3424, "step": 53211 }, { "epoch": 0.9726726013124463, "grad_norm": 6.936530847539987, "learning_rate": 1.958749726233722e-08, "loss": 17.7521, "step": 53212 }, { "epoch": 0.9726908805088929, "grad_norm": 6.012446492594081, "learning_rate": 1.9561330046166116e-08, "loss": 17.5077, "step": 53213 }, { "epoch": 0.9727091597053393, "grad_norm": 5.967521919935854, "learning_rate": 1.95351802860011e-08, "loss": 17.3103, "step": 53214 }, { "epoch": 0.9727274389017859, "grad_norm": 7.89066719272378, "learning_rate": 1.950904798193265e-08, "loss": 18.297, "step": 53215 }, { "epoch": 0.9727457180982324, "grad_norm": 7.146303868808066, "learning_rate": 1.9482933134052918e-08, "loss": 17.7022, "step": 53216 }, { "epoch": 0.9727639972946789, "grad_norm": 6.616891668083667, "learning_rate": 1.9456835742453495e-08, "loss": 17.1749, "step": 53217 }, { "epoch": 0.9727822764911255, "grad_norm": 7.012166379208328, "learning_rate": 1.943075580722542e-08, "loss": 17.6512, "step": 53218 }, { "epoch": 0.972800555687572, "grad_norm": 6.265110917203291, "learning_rate": 1.9404693328460288e-08, "loss": 16.9796, "step": 53219 }, { "epoch": 0.9728188348840185, "grad_norm": 6.4083565348097755, "learning_rate": 1.9378648306249693e-08, "loss": 17.3619, "step": 53220 }, { "epoch": 0.972837114080465, "grad_norm": 5.283087946827089, "learning_rate": 1.9352620740684668e-08, "loss": 17.0348, "step": 53221 }, { "epoch": 0.9728553932769115, "grad_norm": 7.727433874233257, "learning_rate": 1.9326610631856257e-08, "loss": 17.9854, "step": 53222 }, { "epoch": 0.9728736724733581, "grad_norm": 6.649327016595124, "learning_rate": 1.9300617979856606e-08, "loss": 17.4498, "step": 53223 }, { "epoch": 0.9728919516698046, "grad_norm": 6.097496877257153, "learning_rate": 1.9274642784775642e-08, "loss": 17.0981, "step": 53224 }, { "epoch": 0.9729102308662512, "grad_norm": 6.261874139817742, "learning_rate": 1.924868504670441e-08, "loss": 17.2402, "step": 53225 }, { "epoch": 0.9729285100626976, "grad_norm": 6.599684727963611, "learning_rate": 1.9222744765735047e-08, "loss": 17.4004, "step": 53226 }, { "epoch": 0.9729467892591441, "grad_norm": 5.516805303110107, "learning_rate": 1.919682194195749e-08, "loss": 17.2254, "step": 53227 }, { "epoch": 0.9729650684555907, "grad_norm": 6.412243583000539, "learning_rate": 1.9170916575462772e-08, "loss": 17.3021, "step": 53228 }, { "epoch": 0.9729833476520372, "grad_norm": 8.501220897981058, "learning_rate": 1.9145028666341936e-08, "loss": 18.5899, "step": 53229 }, { "epoch": 0.9730016268484838, "grad_norm": 6.024877656699317, "learning_rate": 1.9119158214685462e-08, "loss": 17.191, "step": 53230 }, { "epoch": 0.9730199060449303, "grad_norm": 7.296135484539229, "learning_rate": 1.9093305220583836e-08, "loss": 17.6148, "step": 53231 }, { "epoch": 0.9730381852413768, "grad_norm": 6.052192260001501, "learning_rate": 1.9067469684128092e-08, "loss": 17.4982, "step": 53232 }, { "epoch": 0.9730564644378233, "grad_norm": 5.449892915202456, "learning_rate": 1.9041651605409272e-08, "loss": 16.9435, "step": 53233 }, { "epoch": 0.9730747436342698, "grad_norm": 5.7515194905404865, "learning_rate": 1.9015850984516193e-08, "loss": 17.3075, "step": 53234 }, { "epoch": 0.9730930228307164, "grad_norm": 5.496789606105577, "learning_rate": 1.8990067821541002e-08, "loss": 16.9448, "step": 53235 }, { "epoch": 0.9731113020271629, "grad_norm": 5.833042397886607, "learning_rate": 1.8964302116573075e-08, "loss": 17.277, "step": 53236 }, { "epoch": 0.9731295812236094, "grad_norm": 5.807653212606551, "learning_rate": 1.8938553869703448e-08, "loss": 17.1375, "step": 53237 }, { "epoch": 0.973147860420056, "grad_norm": 5.174330188320309, "learning_rate": 1.8912823081021492e-08, "loss": 16.9102, "step": 53238 }, { "epoch": 0.9731661396165024, "grad_norm": 4.839843480377133, "learning_rate": 1.8887109750618248e-08, "loss": 16.7322, "step": 53239 }, { "epoch": 0.973184418812949, "grad_norm": 5.570769327259715, "learning_rate": 1.886141387858309e-08, "loss": 16.9206, "step": 53240 }, { "epoch": 0.9732026980093955, "grad_norm": 5.93262310483274, "learning_rate": 1.8835735465007055e-08, "loss": 17.2226, "step": 53241 }, { "epoch": 0.973220977205842, "grad_norm": 6.457041914291932, "learning_rate": 1.8810074509978958e-08, "loss": 17.616, "step": 53242 }, { "epoch": 0.9732392564022886, "grad_norm": 5.373305120305289, "learning_rate": 1.8784431013589287e-08, "loss": 16.993, "step": 53243 }, { "epoch": 0.9732575355987351, "grad_norm": 6.422568528411058, "learning_rate": 1.875880497592797e-08, "loss": 17.3098, "step": 53244 }, { "epoch": 0.9732758147951815, "grad_norm": 5.297805496685066, "learning_rate": 1.873319639708493e-08, "loss": 16.9235, "step": 53245 }, { "epoch": 0.9732940939916281, "grad_norm": 6.709529577817065, "learning_rate": 1.8707605277149542e-08, "loss": 17.1387, "step": 53246 }, { "epoch": 0.9733123731880746, "grad_norm": 5.939262361175877, "learning_rate": 1.868203161621229e-08, "loss": 17.276, "step": 53247 }, { "epoch": 0.9733306523845212, "grad_norm": 6.580256187711117, "learning_rate": 1.8656475414361997e-08, "loss": 17.6448, "step": 53248 }, { "epoch": 0.9733489315809677, "grad_norm": 5.356764980843171, "learning_rate": 1.8630936671688028e-08, "loss": 17.0425, "step": 53249 }, { "epoch": 0.9733672107774142, "grad_norm": 6.361884776959987, "learning_rate": 1.8605415388280866e-08, "loss": 17.2589, "step": 53250 }, { "epoch": 0.9733854899738608, "grad_norm": 6.873716540117309, "learning_rate": 1.8579911564229335e-08, "loss": 17.3272, "step": 53251 }, { "epoch": 0.9734037691703072, "grad_norm": 6.5095287733372285, "learning_rate": 1.855442519962336e-08, "loss": 17.4966, "step": 53252 }, { "epoch": 0.9734220483667538, "grad_norm": 5.385253679690331, "learning_rate": 1.8528956294551203e-08, "loss": 17.0972, "step": 53253 }, { "epoch": 0.9734403275632003, "grad_norm": 5.067182857242718, "learning_rate": 1.8503504849102794e-08, "loss": 16.7537, "step": 53254 }, { "epoch": 0.9734586067596468, "grad_norm": 8.624857160635662, "learning_rate": 1.8478070863367502e-08, "loss": 18.1327, "step": 53255 }, { "epoch": 0.9734768859560934, "grad_norm": 6.1477287561242795, "learning_rate": 1.8452654337434152e-08, "loss": 17.1777, "step": 53256 }, { "epoch": 0.9734951651525399, "grad_norm": 6.7359777043241, "learning_rate": 1.8427255271392664e-08, "loss": 17.4999, "step": 53257 }, { "epoch": 0.9735134443489865, "grad_norm": 6.955450915576563, "learning_rate": 1.840187366533075e-08, "loss": 17.7562, "step": 53258 }, { "epoch": 0.9735317235454329, "grad_norm": 4.864400959899446, "learning_rate": 1.8376509519337783e-08, "loss": 17.0569, "step": 53259 }, { "epoch": 0.9735500027418794, "grad_norm": 5.742046424256838, "learning_rate": 1.835116283350369e-08, "loss": 17.4432, "step": 53260 }, { "epoch": 0.973568281938326, "grad_norm": 6.144682656320222, "learning_rate": 1.832583360791562e-08, "loss": 17.249, "step": 53261 }, { "epoch": 0.9735865611347725, "grad_norm": 6.817445378128324, "learning_rate": 1.830052184266351e-08, "loss": 17.5407, "step": 53262 }, { "epoch": 0.9736048403312191, "grad_norm": 5.88186600398755, "learning_rate": 1.8275227537836172e-08, "loss": 17.2141, "step": 53263 }, { "epoch": 0.9736231195276656, "grad_norm": 5.655348251411976, "learning_rate": 1.8249950693521313e-08, "loss": 16.9704, "step": 53264 }, { "epoch": 0.973641398724112, "grad_norm": 5.05855545144403, "learning_rate": 1.8224691309808306e-08, "loss": 16.8842, "step": 53265 }, { "epoch": 0.9736596779205586, "grad_norm": 6.377311201117069, "learning_rate": 1.8199449386785417e-08, "loss": 17.3024, "step": 53266 }, { "epoch": 0.9736779571170051, "grad_norm": 7.006374409977334, "learning_rate": 1.8174224924540905e-08, "loss": 17.5313, "step": 53267 }, { "epoch": 0.9736962363134517, "grad_norm": 6.2085450647275255, "learning_rate": 1.8149017923164148e-08, "loss": 17.4731, "step": 53268 }, { "epoch": 0.9737145155098982, "grad_norm": 6.689880171582847, "learning_rate": 1.812382838274229e-08, "loss": 17.4783, "step": 53269 }, { "epoch": 0.9737327947063447, "grad_norm": 6.634680760667569, "learning_rate": 1.809865630336416e-08, "loss": 17.5669, "step": 53270 }, { "epoch": 0.9737510739027913, "grad_norm": 7.073641848541043, "learning_rate": 1.807350168511801e-08, "loss": 17.482, "step": 53271 }, { "epoch": 0.9737693530992377, "grad_norm": 5.622890255023296, "learning_rate": 1.8048364528091556e-08, "loss": 17.19, "step": 53272 }, { "epoch": 0.9737876322956843, "grad_norm": 6.091170297832999, "learning_rate": 1.802324483237361e-08, "loss": 17.2172, "step": 53273 }, { "epoch": 0.9738059114921308, "grad_norm": 7.07576280727661, "learning_rate": 1.7998142598051883e-08, "loss": 17.92, "step": 53274 }, { "epoch": 0.9738241906885773, "grad_norm": 6.231120636666982, "learning_rate": 1.7973057825214636e-08, "loss": 17.4529, "step": 53275 }, { "epoch": 0.9738424698850239, "grad_norm": 6.059543303593341, "learning_rate": 1.794799051394902e-08, "loss": 17.3986, "step": 53276 }, { "epoch": 0.9738607490814704, "grad_norm": 6.88819243824232, "learning_rate": 1.79229406643433e-08, "loss": 17.379, "step": 53277 }, { "epoch": 0.973879028277917, "grad_norm": 7.194542535716253, "learning_rate": 1.789790827648574e-08, "loss": 17.9483, "step": 53278 }, { "epoch": 0.9738973074743634, "grad_norm": 5.202614888702569, "learning_rate": 1.7872893350463495e-08, "loss": 17.0491, "step": 53279 }, { "epoch": 0.9739155866708099, "grad_norm": 7.036205319138495, "learning_rate": 1.7847895886364262e-08, "loss": 17.6053, "step": 53280 }, { "epoch": 0.9739338658672565, "grad_norm": 6.94196250524966, "learning_rate": 1.7822915884276314e-08, "loss": 17.6406, "step": 53281 }, { "epoch": 0.973952145063703, "grad_norm": 5.796077000887399, "learning_rate": 1.7797953344286246e-08, "loss": 17.2325, "step": 53282 }, { "epoch": 0.9739704242601496, "grad_norm": 5.156812239732067, "learning_rate": 1.7773008266482318e-08, "loss": 16.9835, "step": 53283 }, { "epoch": 0.973988703456596, "grad_norm": 5.909034751894643, "learning_rate": 1.7748080650951127e-08, "loss": 17.4232, "step": 53284 }, { "epoch": 0.9740069826530425, "grad_norm": 5.890927254232789, "learning_rate": 1.7723170497780938e-08, "loss": 17.015, "step": 53285 }, { "epoch": 0.9740252618494891, "grad_norm": 5.7509414389990114, "learning_rate": 1.7698277807058905e-08, "loss": 17.1877, "step": 53286 }, { "epoch": 0.9740435410459356, "grad_norm": 6.674992402371713, "learning_rate": 1.7673402578871625e-08, "loss": 17.3643, "step": 53287 }, { "epoch": 0.9740618202423822, "grad_norm": 10.884538502521782, "learning_rate": 1.7648544813307356e-08, "loss": 17.6815, "step": 53288 }, { "epoch": 0.9740800994388287, "grad_norm": 6.684413891045376, "learning_rate": 1.7623704510452145e-08, "loss": 17.513, "step": 53289 }, { "epoch": 0.9740983786352752, "grad_norm": 5.597924441079561, "learning_rate": 1.75988816703937e-08, "loss": 16.849, "step": 53290 }, { "epoch": 0.9741166578317217, "grad_norm": 7.040622874687166, "learning_rate": 1.7574076293218613e-08, "loss": 17.4735, "step": 53291 }, { "epoch": 0.9741349370281682, "grad_norm": 5.59940200608305, "learning_rate": 1.754928837901404e-08, "loss": 16.9596, "step": 53292 }, { "epoch": 0.9741532162246148, "grad_norm": 9.549231804729144, "learning_rate": 1.752451792786658e-08, "loss": 19.0752, "step": 53293 }, { "epoch": 0.9741714954210613, "grad_norm": 5.856070749068708, "learning_rate": 1.749976493986394e-08, "loss": 17.3096, "step": 53294 }, { "epoch": 0.9741897746175078, "grad_norm": 6.6291851678598075, "learning_rate": 1.747502941509216e-08, "loss": 17.5746, "step": 53295 }, { "epoch": 0.9742080538139544, "grad_norm": 6.86301952899102, "learning_rate": 1.7450311353637838e-08, "loss": 17.5674, "step": 53296 }, { "epoch": 0.9742263330104008, "grad_norm": 6.623739341100298, "learning_rate": 1.7425610755587573e-08, "loss": 17.3253, "step": 53297 }, { "epoch": 0.9742446122068474, "grad_norm": 5.107496943650173, "learning_rate": 1.7400927621028518e-08, "loss": 16.8097, "step": 53298 }, { "epoch": 0.9742628914032939, "grad_norm": 8.62675054549257, "learning_rate": 1.7376261950046712e-08, "loss": 17.8707, "step": 53299 }, { "epoch": 0.9742811705997404, "grad_norm": 10.963770788255415, "learning_rate": 1.7351613742728755e-08, "loss": 18.0977, "step": 53300 }, { "epoch": 0.974299449796187, "grad_norm": 6.071332981544748, "learning_rate": 1.732698299916069e-08, "loss": 17.1193, "step": 53301 }, { "epoch": 0.9743177289926335, "grad_norm": 5.616578758018914, "learning_rate": 1.7302369719429667e-08, "loss": 17.0447, "step": 53302 }, { "epoch": 0.9743360081890801, "grad_norm": 5.429901879127039, "learning_rate": 1.727777390362173e-08, "loss": 16.8949, "step": 53303 }, { "epoch": 0.9743542873855265, "grad_norm": 6.8780204439558945, "learning_rate": 1.7253195551822366e-08, "loss": 17.3057, "step": 53304 }, { "epoch": 0.974372566581973, "grad_norm": 5.850402001422034, "learning_rate": 1.722863466411817e-08, "loss": 16.9733, "step": 53305 }, { "epoch": 0.9743908457784196, "grad_norm": 7.044685776846968, "learning_rate": 1.7204091240595188e-08, "loss": 17.7514, "step": 53306 }, { "epoch": 0.9744091249748661, "grad_norm": 5.471495583470152, "learning_rate": 1.717956528133946e-08, "loss": 17.3054, "step": 53307 }, { "epoch": 0.9744274041713127, "grad_norm": 5.0184335234506605, "learning_rate": 1.715505678643703e-08, "loss": 16.929, "step": 53308 }, { "epoch": 0.9744456833677592, "grad_norm": 6.979691835180797, "learning_rate": 1.7130565755973937e-08, "loss": 17.6635, "step": 53309 }, { "epoch": 0.9744639625642056, "grad_norm": 8.958788679419733, "learning_rate": 1.7106092190035118e-08, "loss": 18.1097, "step": 53310 }, { "epoch": 0.9744822417606522, "grad_norm": 6.171899699135979, "learning_rate": 1.708163608870772e-08, "loss": 16.9833, "step": 53311 }, { "epoch": 0.9745005209570987, "grad_norm": 6.165976183002918, "learning_rate": 1.705719745207668e-08, "loss": 17.3661, "step": 53312 }, { "epoch": 0.9745188001535452, "grad_norm": 5.361447923270864, "learning_rate": 1.7032776280228035e-08, "loss": 17.0046, "step": 53313 }, { "epoch": 0.9745370793499918, "grad_norm": 6.044704738718034, "learning_rate": 1.7008372573246725e-08, "loss": 17.427, "step": 53314 }, { "epoch": 0.9745553585464383, "grad_norm": 4.949142453241389, "learning_rate": 1.698398633121878e-08, "loss": 16.7546, "step": 53315 }, { "epoch": 0.9745736377428849, "grad_norm": 6.625036389055932, "learning_rate": 1.6959617554229703e-08, "loss": 17.5674, "step": 53316 }, { "epoch": 0.9745919169393313, "grad_norm": 6.962651921647594, "learning_rate": 1.6935266242364414e-08, "loss": 17.1272, "step": 53317 }, { "epoch": 0.9746101961357778, "grad_norm": 4.64133413779723, "learning_rate": 1.6910932395708402e-08, "loss": 16.8178, "step": 53318 }, { "epoch": 0.9746284753322244, "grad_norm": 7.534195141243222, "learning_rate": 1.6886616014347713e-08, "loss": 17.8719, "step": 53319 }, { "epoch": 0.9746467545286709, "grad_norm": 6.320283599945898, "learning_rate": 1.686231709836672e-08, "loss": 17.2658, "step": 53320 }, { "epoch": 0.9746650337251175, "grad_norm": 7.621725433765692, "learning_rate": 1.6838035647850916e-08, "loss": 17.5522, "step": 53321 }, { "epoch": 0.974683312921564, "grad_norm": 5.980381755817595, "learning_rate": 1.681377166288578e-08, "loss": 17.1234, "step": 53322 }, { "epoch": 0.9747015921180104, "grad_norm": 5.784489735348857, "learning_rate": 1.6789525143555697e-08, "loss": 17.0945, "step": 53323 }, { "epoch": 0.974719871314457, "grad_norm": 7.288596610277498, "learning_rate": 1.676529608994559e-08, "loss": 17.3777, "step": 53324 }, { "epoch": 0.9747381505109035, "grad_norm": 7.009228807771932, "learning_rate": 1.6741084502140958e-08, "loss": 17.5833, "step": 53325 }, { "epoch": 0.9747564297073501, "grad_norm": 5.863556594820438, "learning_rate": 1.6716890380226725e-08, "loss": 17.1539, "step": 53326 }, { "epoch": 0.9747747089037966, "grad_norm": 8.920330868714407, "learning_rate": 1.6692713724286713e-08, "loss": 17.4682, "step": 53327 }, { "epoch": 0.9747929881002431, "grad_norm": 5.407540802687811, "learning_rate": 1.6668554534406413e-08, "loss": 17.3513, "step": 53328 }, { "epoch": 0.9748112672966897, "grad_norm": 5.890487923355378, "learning_rate": 1.664441281067075e-08, "loss": 17.1915, "step": 53329 }, { "epoch": 0.9748295464931361, "grad_norm": 5.975512259885471, "learning_rate": 1.6620288553163553e-08, "loss": 17.2157, "step": 53330 }, { "epoch": 0.9748478256895827, "grad_norm": 6.597839768416597, "learning_rate": 1.659618176197031e-08, "loss": 17.549, "step": 53331 }, { "epoch": 0.9748661048860292, "grad_norm": 6.823492926342327, "learning_rate": 1.6572092437174838e-08, "loss": 17.5278, "step": 53332 }, { "epoch": 0.9748843840824757, "grad_norm": 6.373392506492617, "learning_rate": 1.6548020578861512e-08, "loss": 17.561, "step": 53333 }, { "epoch": 0.9749026632789223, "grad_norm": 5.842246160974441, "learning_rate": 1.652396618711527e-08, "loss": 17.0127, "step": 53334 }, { "epoch": 0.9749209424753688, "grad_norm": 6.658765553190438, "learning_rate": 1.649992926201993e-08, "loss": 17.2274, "step": 53335 }, { "epoch": 0.9749392216718153, "grad_norm": 6.809160556166296, "learning_rate": 1.6475909803659873e-08, "loss": 17.3778, "step": 53336 }, { "epoch": 0.9749575008682618, "grad_norm": 7.438954245048333, "learning_rate": 1.645190781211947e-08, "loss": 17.5922, "step": 53337 }, { "epoch": 0.9749757800647083, "grad_norm": 5.195049929743605, "learning_rate": 1.6427923287481995e-08, "loss": 17.1198, "step": 53338 }, { "epoch": 0.9749940592611549, "grad_norm": 5.743492202730228, "learning_rate": 1.640395622983293e-08, "loss": 17.1744, "step": 53339 }, { "epoch": 0.9750123384576014, "grad_norm": 6.324178637940232, "learning_rate": 1.638000663925554e-08, "loss": 17.0794, "step": 53340 }, { "epoch": 0.975030617654048, "grad_norm": 5.445366049070056, "learning_rate": 1.6356074515833653e-08, "loss": 17.1015, "step": 53341 }, { "epoch": 0.9750488968504945, "grad_norm": 5.500896867164726, "learning_rate": 1.6332159859651086e-08, "loss": 17.0783, "step": 53342 }, { "epoch": 0.9750671760469409, "grad_norm": 5.716325684787835, "learning_rate": 1.6308262670791663e-08, "loss": 17.4947, "step": 53343 }, { "epoch": 0.9750854552433875, "grad_norm": 5.634419767020411, "learning_rate": 1.6284382949339762e-08, "loss": 17.0959, "step": 53344 }, { "epoch": 0.975103734439834, "grad_norm": 6.586138067402093, "learning_rate": 1.626052069537809e-08, "loss": 16.9927, "step": 53345 }, { "epoch": 0.9751220136362806, "grad_norm": 5.11752959344072, "learning_rate": 1.6236675908991585e-08, "loss": 16.9968, "step": 53346 }, { "epoch": 0.9751402928327271, "grad_norm": 5.524290739531565, "learning_rate": 1.62128485902624e-08, "loss": 17.114, "step": 53347 }, { "epoch": 0.9751585720291736, "grad_norm": 7.030147943086717, "learning_rate": 1.618903873927491e-08, "loss": 17.5407, "step": 53348 }, { "epoch": 0.9751768512256201, "grad_norm": 6.26883690505793, "learning_rate": 1.6165246356112386e-08, "loss": 17.2348, "step": 53349 }, { "epoch": 0.9751951304220666, "grad_norm": 6.301510322152636, "learning_rate": 1.6141471440858093e-08, "loss": 17.2362, "step": 53350 }, { "epoch": 0.9752134096185132, "grad_norm": 4.890293347828008, "learning_rate": 1.6117713993595853e-08, "loss": 16.9364, "step": 53351 }, { "epoch": 0.9752316888149597, "grad_norm": 8.158935616667517, "learning_rate": 1.6093974014408375e-08, "loss": 18.013, "step": 53352 }, { "epoch": 0.9752499680114062, "grad_norm": 5.868906042013948, "learning_rate": 1.607025150337893e-08, "loss": 17.427, "step": 53353 }, { "epoch": 0.9752682472078528, "grad_norm": 5.792034549194804, "learning_rate": 1.6046546460590784e-08, "loss": 17.1743, "step": 53354 }, { "epoch": 0.9752865264042992, "grad_norm": 6.229716097541467, "learning_rate": 1.6022858886126646e-08, "loss": 17.2065, "step": 53355 }, { "epoch": 0.9753048056007458, "grad_norm": 5.247586353535602, "learning_rate": 1.599918878007034e-08, "loss": 17.0004, "step": 53356 }, { "epoch": 0.9753230847971923, "grad_norm": 7.0069755580318835, "learning_rate": 1.5975536142504576e-08, "loss": 17.4552, "step": 53357 }, { "epoch": 0.9753413639936388, "grad_norm": 5.753202018559298, "learning_rate": 1.5951900973511514e-08, "loss": 16.8838, "step": 53358 }, { "epoch": 0.9753596431900854, "grad_norm": 7.8803152085550074, "learning_rate": 1.592828327317497e-08, "loss": 17.2943, "step": 53359 }, { "epoch": 0.9753779223865319, "grad_norm": 6.0273179986384156, "learning_rate": 1.5904683041577106e-08, "loss": 17.3091, "step": 53360 }, { "epoch": 0.9753962015829785, "grad_norm": 5.603941805950127, "learning_rate": 1.588110027880063e-08, "loss": 17.3863, "step": 53361 }, { "epoch": 0.9754144807794249, "grad_norm": 6.042233063786497, "learning_rate": 1.5857534984928812e-08, "loss": 17.1953, "step": 53362 }, { "epoch": 0.9754327599758714, "grad_norm": 6.784321046877012, "learning_rate": 1.5833987160043805e-08, "loss": 17.4268, "step": 53363 }, { "epoch": 0.975451039172318, "grad_norm": 7.459317143722962, "learning_rate": 1.581045680422777e-08, "loss": 17.5574, "step": 53364 }, { "epoch": 0.9754693183687645, "grad_norm": 6.30508482425596, "learning_rate": 1.5786943917563967e-08, "loss": 17.1217, "step": 53365 }, { "epoch": 0.9754875975652111, "grad_norm": 5.289498527911734, "learning_rate": 1.5763448500134006e-08, "loss": 16.9589, "step": 53366 }, { "epoch": 0.9755058767616576, "grad_norm": 4.829666395351599, "learning_rate": 1.5739970552021143e-08, "loss": 17.0155, "step": 53367 }, { "epoch": 0.975524155958104, "grad_norm": 6.597761384966218, "learning_rate": 1.571651007330699e-08, "loss": 17.1441, "step": 53368 }, { "epoch": 0.9755424351545506, "grad_norm": 5.820731846166636, "learning_rate": 1.5693067064073697e-08, "loss": 17.3355, "step": 53369 }, { "epoch": 0.9755607143509971, "grad_norm": 9.750991433168426, "learning_rate": 1.566964152440398e-08, "loss": 16.921, "step": 53370 }, { "epoch": 0.9755789935474437, "grad_norm": 7.002367900615524, "learning_rate": 1.5646233454379434e-08, "loss": 17.2542, "step": 53371 }, { "epoch": 0.9755972727438902, "grad_norm": 6.688185348212034, "learning_rate": 1.562284285408222e-08, "loss": 17.6602, "step": 53372 }, { "epoch": 0.9756155519403367, "grad_norm": 5.851261100663886, "learning_rate": 1.5599469723594495e-08, "loss": 17.5559, "step": 53373 }, { "epoch": 0.9756338311367833, "grad_norm": 6.192964458476551, "learning_rate": 1.5576114062998416e-08, "loss": 17.5055, "step": 53374 }, { "epoch": 0.9756521103332297, "grad_norm": 6.024865528682085, "learning_rate": 1.5552775872375582e-08, "loss": 17.137, "step": 53375 }, { "epoch": 0.9756703895296763, "grad_norm": 6.460096030366365, "learning_rate": 1.5529455151807593e-08, "loss": 17.6305, "step": 53376 }, { "epoch": 0.9756886687261228, "grad_norm": 7.7673106679897375, "learning_rate": 1.5506151901376055e-08, "loss": 17.151, "step": 53377 }, { "epoch": 0.9757069479225693, "grad_norm": 5.774728447155579, "learning_rate": 1.548286612116312e-08, "loss": 16.9382, "step": 53378 }, { "epoch": 0.9757252271190159, "grad_norm": 6.212219931202187, "learning_rate": 1.5459597811250392e-08, "loss": 17.3375, "step": 53379 }, { "epoch": 0.9757435063154624, "grad_norm": 5.703719569884259, "learning_rate": 1.5436346971718918e-08, "loss": 17.1115, "step": 53380 }, { "epoch": 0.9757617855119088, "grad_norm": 6.2995267418712855, "learning_rate": 1.5413113602650297e-08, "loss": 17.3054, "step": 53381 }, { "epoch": 0.9757800647083554, "grad_norm": 6.535896536474092, "learning_rate": 1.5389897704126688e-08, "loss": 17.4006, "step": 53382 }, { "epoch": 0.9757983439048019, "grad_norm": 5.820629834099575, "learning_rate": 1.536669927622858e-08, "loss": 17.1897, "step": 53383 }, { "epoch": 0.9758166231012485, "grad_norm": 5.430610930461122, "learning_rate": 1.534351831903813e-08, "loss": 16.9405, "step": 53384 }, { "epoch": 0.975834902297695, "grad_norm": 6.0357524710933665, "learning_rate": 1.532035483263583e-08, "loss": 17.2256, "step": 53385 }, { "epoch": 0.9758531814941415, "grad_norm": 6.250661743885177, "learning_rate": 1.5297208817102725e-08, "loss": 17.33, "step": 53386 }, { "epoch": 0.975871460690588, "grad_norm": 5.691862074615506, "learning_rate": 1.5274080272520973e-08, "loss": 16.9112, "step": 53387 }, { "epoch": 0.9758897398870345, "grad_norm": 8.356114144008702, "learning_rate": 1.525096919897051e-08, "loss": 18.0798, "step": 53388 }, { "epoch": 0.9759080190834811, "grad_norm": 7.474100952364136, "learning_rate": 1.5227875596532938e-08, "loss": 17.5008, "step": 53389 }, { "epoch": 0.9759262982799276, "grad_norm": 5.673234140107519, "learning_rate": 1.5204799465289298e-08, "loss": 17.1437, "step": 53390 }, { "epoch": 0.9759445774763741, "grad_norm": 6.470591902310135, "learning_rate": 1.5181740805320644e-08, "loss": 17.5753, "step": 53391 }, { "epoch": 0.9759628566728207, "grad_norm": 6.3657575124398535, "learning_rate": 1.5158699616706908e-08, "loss": 17.5197, "step": 53392 }, { "epoch": 0.9759811358692672, "grad_norm": 7.039850771755453, "learning_rate": 1.5135675899529134e-08, "loss": 17.6987, "step": 53393 }, { "epoch": 0.9759994150657137, "grad_norm": 5.240227493961545, "learning_rate": 1.5112669653868928e-08, "loss": 16.9437, "step": 53394 }, { "epoch": 0.9760176942621602, "grad_norm": 6.975199262563461, "learning_rate": 1.508968087980567e-08, "loss": 17.475, "step": 53395 }, { "epoch": 0.9760359734586067, "grad_norm": 5.627263317280361, "learning_rate": 1.5066709577420956e-08, "loss": 17.2974, "step": 53396 }, { "epoch": 0.9760542526550533, "grad_norm": 6.635647301896227, "learning_rate": 1.504375574679473e-08, "loss": 17.621, "step": 53397 }, { "epoch": 0.9760725318514998, "grad_norm": 5.754612705059543, "learning_rate": 1.502081938800748e-08, "loss": 17.2347, "step": 53398 }, { "epoch": 0.9760908110479464, "grad_norm": 5.225164149352612, "learning_rate": 1.499790050113914e-08, "loss": 16.9289, "step": 53399 }, { "epoch": 0.9761090902443929, "grad_norm": 5.299729712681263, "learning_rate": 1.4974999086271315e-08, "loss": 17.0013, "step": 53400 }, { "epoch": 0.9761273694408393, "grad_norm": 6.9707667218081495, "learning_rate": 1.4952115143483382e-08, "loss": 17.4534, "step": 53401 }, { "epoch": 0.9761456486372859, "grad_norm": 5.242781015708836, "learning_rate": 1.4929248672855832e-08, "loss": 17.0547, "step": 53402 }, { "epoch": 0.9761639278337324, "grad_norm": 5.9649387825744435, "learning_rate": 1.4906399674468053e-08, "loss": 17.0142, "step": 53403 }, { "epoch": 0.976182207030179, "grad_norm": 6.101326814870919, "learning_rate": 1.4883568148401639e-08, "loss": 17.4789, "step": 53404 }, { "epoch": 0.9762004862266255, "grad_norm": 6.478360381716925, "learning_rate": 1.4860754094734864e-08, "loss": 17.6734, "step": 53405 }, { "epoch": 0.976218765423072, "grad_norm": 5.239530024920496, "learning_rate": 1.4837957513549328e-08, "loss": 17.0752, "step": 53406 }, { "epoch": 0.9762370446195185, "grad_norm": 6.429649119187894, "learning_rate": 1.4815178404923858e-08, "loss": 17.4624, "step": 53407 }, { "epoch": 0.976255323815965, "grad_norm": 7.251880322012236, "learning_rate": 1.4792416768938389e-08, "loss": 17.6245, "step": 53408 }, { "epoch": 0.9762736030124116, "grad_norm": 9.666667467307763, "learning_rate": 1.4769672605672858e-08, "loss": 18.3771, "step": 53409 }, { "epoch": 0.9762918822088581, "grad_norm": 7.405894339754148, "learning_rate": 1.4746945915207756e-08, "loss": 17.7785, "step": 53410 }, { "epoch": 0.9763101614053046, "grad_norm": 6.294125425575779, "learning_rate": 1.4724236697621352e-08, "loss": 17.4388, "step": 53411 }, { "epoch": 0.9763284406017512, "grad_norm": 5.3076699032393195, "learning_rate": 1.470154495299414e-08, "loss": 16.9326, "step": 53412 }, { "epoch": 0.9763467197981976, "grad_norm": 5.931092534359679, "learning_rate": 1.4678870681404944e-08, "loss": 17.3885, "step": 53413 }, { "epoch": 0.9763649989946442, "grad_norm": 6.212415186833039, "learning_rate": 1.4656213882934256e-08, "loss": 17.1939, "step": 53414 }, { "epoch": 0.9763832781910907, "grad_norm": 6.548541201574192, "learning_rate": 1.4633574557660902e-08, "loss": 17.6593, "step": 53415 }, { "epoch": 0.9764015573875372, "grad_norm": 10.362718716955115, "learning_rate": 1.461095270566426e-08, "loss": 18.4767, "step": 53416 }, { "epoch": 0.9764198365839838, "grad_norm": 5.641723618030113, "learning_rate": 1.458834832702316e-08, "loss": 17.1114, "step": 53417 }, { "epoch": 0.9764381157804303, "grad_norm": 6.178518746692412, "learning_rate": 1.4565761421817537e-08, "loss": 17.2326, "step": 53418 }, { "epoch": 0.9764563949768769, "grad_norm": 6.55631768866288, "learning_rate": 1.4543191990126215e-08, "loss": 16.7519, "step": 53419 }, { "epoch": 0.9764746741733233, "grad_norm": 6.694180527962715, "learning_rate": 1.4520640032028576e-08, "loss": 17.5335, "step": 53420 }, { "epoch": 0.9764929533697698, "grad_norm": 5.936302217527277, "learning_rate": 1.4498105547603448e-08, "loss": 16.9581, "step": 53421 }, { "epoch": 0.9765112325662164, "grad_norm": 5.121405837166941, "learning_rate": 1.4475588536929652e-08, "loss": 16.9667, "step": 53422 }, { "epoch": 0.9765295117626629, "grad_norm": 5.221928120921368, "learning_rate": 1.4453089000086573e-08, "loss": 16.906, "step": 53423 }, { "epoch": 0.9765477909591095, "grad_norm": 7.293745649111662, "learning_rate": 1.443060693715248e-08, "loss": 17.8433, "step": 53424 }, { "epoch": 0.976566070155556, "grad_norm": 5.574885805187138, "learning_rate": 1.4408142348206756e-08, "loss": 17.1291, "step": 53425 }, { "epoch": 0.9765843493520024, "grad_norm": 7.271369536965362, "learning_rate": 1.4385695233327667e-08, "loss": 17.9894, "step": 53426 }, { "epoch": 0.976602628548449, "grad_norm": 5.325329150174652, "learning_rate": 1.4363265592594045e-08, "loss": 17.2127, "step": 53427 }, { "epoch": 0.9766209077448955, "grad_norm": 4.474404516493078, "learning_rate": 1.4340853426084711e-08, "loss": 16.7667, "step": 53428 }, { "epoch": 0.9766391869413421, "grad_norm": 4.840519393589734, "learning_rate": 1.4318458733878492e-08, "loss": 17.0374, "step": 53429 }, { "epoch": 0.9766574661377886, "grad_norm": 5.219882231900081, "learning_rate": 1.429608151605255e-08, "loss": 17.1494, "step": 53430 }, { "epoch": 0.9766757453342351, "grad_norm": 7.280099982219123, "learning_rate": 1.4273721772686822e-08, "loss": 18.0735, "step": 53431 }, { "epoch": 0.9766940245306817, "grad_norm": 6.454136140382645, "learning_rate": 1.4251379503859019e-08, "loss": 17.4901, "step": 53432 }, { "epoch": 0.9767123037271281, "grad_norm": 6.409680116011055, "learning_rate": 1.4229054709647416e-08, "loss": 17.4213, "step": 53433 }, { "epoch": 0.9767305829235747, "grad_norm": 5.620676140142363, "learning_rate": 1.4206747390130283e-08, "loss": 17.1568, "step": 53434 }, { "epoch": 0.9767488621200212, "grad_norm": 5.9085072240260805, "learning_rate": 1.4184457545385888e-08, "loss": 17.2171, "step": 53435 }, { "epoch": 0.9767671413164677, "grad_norm": 6.668955149299883, "learning_rate": 1.416218517549195e-08, "loss": 17.614, "step": 53436 }, { "epoch": 0.9767854205129143, "grad_norm": 6.109247111740635, "learning_rate": 1.4139930280527292e-08, "loss": 17.2889, "step": 53437 }, { "epoch": 0.9768036997093608, "grad_norm": 6.626123324460775, "learning_rate": 1.4117692860569077e-08, "loss": 17.5847, "step": 53438 }, { "epoch": 0.9768219789058074, "grad_norm": 6.174798120911498, "learning_rate": 1.409547291569613e-08, "loss": 16.9961, "step": 53439 }, { "epoch": 0.9768402581022538, "grad_norm": 6.4751877505388284, "learning_rate": 1.4073270445985609e-08, "loss": 17.6474, "step": 53440 }, { "epoch": 0.9768585372987003, "grad_norm": 6.289450693395533, "learning_rate": 1.4051085451516344e-08, "loss": 17.4207, "step": 53441 }, { "epoch": 0.9768768164951469, "grad_norm": 5.426452447725256, "learning_rate": 1.4028917932364383e-08, "loss": 16.9485, "step": 53442 }, { "epoch": 0.9768950956915934, "grad_norm": 5.9353299408655955, "learning_rate": 1.4006767888609107e-08, "loss": 17.3204, "step": 53443 }, { "epoch": 0.97691337488804, "grad_norm": 7.681092842471894, "learning_rate": 1.3984635320327122e-08, "loss": 17.8296, "step": 53444 }, { "epoch": 0.9769316540844865, "grad_norm": 5.7482895186645395, "learning_rate": 1.3962520227596699e-08, "loss": 17.2362, "step": 53445 }, { "epoch": 0.9769499332809329, "grad_norm": 6.795048312492879, "learning_rate": 1.3940422610494442e-08, "loss": 17.3687, "step": 53446 }, { "epoch": 0.9769682124773795, "grad_norm": 6.5263857856465854, "learning_rate": 1.3918342469098623e-08, "loss": 17.4596, "step": 53447 }, { "epoch": 0.976986491673826, "grad_norm": 5.693135133346503, "learning_rate": 1.3896279803485846e-08, "loss": 17.2523, "step": 53448 }, { "epoch": 0.9770047708702725, "grad_norm": 6.072603810038908, "learning_rate": 1.3874234613734938e-08, "loss": 17.1604, "step": 53449 }, { "epoch": 0.9770230500667191, "grad_norm": 7.58739629732081, "learning_rate": 1.3852206899921395e-08, "loss": 17.0751, "step": 53450 }, { "epoch": 0.9770413292631656, "grad_norm": 4.747798710014315, "learning_rate": 1.3830196662123484e-08, "loss": 16.7455, "step": 53451 }, { "epoch": 0.9770596084596121, "grad_norm": 5.2462721649289445, "learning_rate": 1.3808203900417816e-08, "loss": 16.8616, "step": 53452 }, { "epoch": 0.9770778876560586, "grad_norm": 8.127299309329137, "learning_rate": 1.3786228614881547e-08, "loss": 18.0172, "step": 53453 }, { "epoch": 0.9770961668525051, "grad_norm": 6.181152056791667, "learning_rate": 1.3764270805592395e-08, "loss": 17.2926, "step": 53454 }, { "epoch": 0.9771144460489517, "grad_norm": 6.304854880760048, "learning_rate": 1.3742330472626409e-08, "loss": 17.3789, "step": 53455 }, { "epoch": 0.9771327252453982, "grad_norm": 5.664623849432917, "learning_rate": 1.3720407616061304e-08, "loss": 17.0214, "step": 53456 }, { "epoch": 0.9771510044418448, "grad_norm": 5.0276915384215535, "learning_rate": 1.3698502235973133e-08, "loss": 16.9987, "step": 53457 }, { "epoch": 0.9771692836382913, "grad_norm": 6.513003121730897, "learning_rate": 1.3676614332439053e-08, "loss": 17.6582, "step": 53458 }, { "epoch": 0.9771875628347377, "grad_norm": 6.353612388728437, "learning_rate": 1.3654743905535673e-08, "loss": 17.7526, "step": 53459 }, { "epoch": 0.9772058420311843, "grad_norm": 5.969939294861369, "learning_rate": 1.363289095534015e-08, "loss": 16.9834, "step": 53460 }, { "epoch": 0.9772241212276308, "grad_norm": 6.6442058513814635, "learning_rate": 1.3611055481927981e-08, "loss": 17.5339, "step": 53461 }, { "epoch": 0.9772424004240774, "grad_norm": 7.995805932295297, "learning_rate": 1.3589237485376882e-08, "loss": 17.844, "step": 53462 }, { "epoch": 0.9772606796205239, "grad_norm": 5.803381978872129, "learning_rate": 1.3567436965762348e-08, "loss": 17.0323, "step": 53463 }, { "epoch": 0.9772789588169704, "grad_norm": 6.754489019448979, "learning_rate": 1.3545653923161538e-08, "loss": 17.2965, "step": 53464 }, { "epoch": 0.977297238013417, "grad_norm": 5.409814377128468, "learning_rate": 1.3523888357650505e-08, "loss": 16.8602, "step": 53465 }, { "epoch": 0.9773155172098634, "grad_norm": 7.452008374565482, "learning_rate": 1.3502140269305853e-08, "loss": 17.9385, "step": 53466 }, { "epoch": 0.97733379640631, "grad_norm": 6.229541286087049, "learning_rate": 1.3480409658203075e-08, "loss": 17.565, "step": 53467 }, { "epoch": 0.9773520756027565, "grad_norm": 5.37122419488131, "learning_rate": 1.345869652441878e-08, "loss": 17.0388, "step": 53468 }, { "epoch": 0.977370354799203, "grad_norm": 5.965593328553994, "learning_rate": 1.3437000868029016e-08, "loss": 17.1364, "step": 53469 }, { "epoch": 0.9773886339956496, "grad_norm": 5.686659469742047, "learning_rate": 1.3415322689109833e-08, "loss": 17.0025, "step": 53470 }, { "epoch": 0.977406913192096, "grad_norm": 9.285650646044985, "learning_rate": 1.3393661987737283e-08, "loss": 17.8937, "step": 53471 }, { "epoch": 0.9774251923885426, "grad_norm": 5.911528006277959, "learning_rate": 1.3372018763987415e-08, "loss": 17.3707, "step": 53472 }, { "epoch": 0.9774434715849891, "grad_norm": 6.526755540522777, "learning_rate": 1.3350393017935726e-08, "loss": 17.1021, "step": 53473 }, { "epoch": 0.9774617507814356, "grad_norm": 6.523519779053343, "learning_rate": 1.3328784749658263e-08, "loss": 17.2419, "step": 53474 }, { "epoch": 0.9774800299778822, "grad_norm": 6.201599040617582, "learning_rate": 1.3307193959230524e-08, "loss": 17.2714, "step": 53475 }, { "epoch": 0.9774983091743287, "grad_norm": 5.641449864769362, "learning_rate": 1.3285620646728558e-08, "loss": 17.0924, "step": 53476 }, { "epoch": 0.9775165883707753, "grad_norm": 6.407698849384073, "learning_rate": 1.3264064812227862e-08, "loss": 17.3876, "step": 53477 }, { "epoch": 0.9775348675672217, "grad_norm": 5.883369030558114, "learning_rate": 1.3242526455803372e-08, "loss": 17.5327, "step": 53478 }, { "epoch": 0.9775531467636682, "grad_norm": 5.796230276008938, "learning_rate": 1.3221005577531698e-08, "loss": 17.3287, "step": 53479 }, { "epoch": 0.9775714259601148, "grad_norm": 4.882681581448671, "learning_rate": 1.3199502177487222e-08, "loss": 16.8429, "step": 53480 }, { "epoch": 0.9775897051565613, "grad_norm": 6.373647925662037, "learning_rate": 1.3178016255745995e-08, "loss": 17.277, "step": 53481 }, { "epoch": 0.9776079843530079, "grad_norm": 7.973803334328131, "learning_rate": 1.3156547812382958e-08, "loss": 18.1351, "step": 53482 }, { "epoch": 0.9776262635494544, "grad_norm": 4.459287000547538, "learning_rate": 1.3135096847473605e-08, "loss": 16.8207, "step": 53483 }, { "epoch": 0.9776445427459008, "grad_norm": 5.195282878656272, "learning_rate": 1.3113663361092876e-08, "loss": 17.0494, "step": 53484 }, { "epoch": 0.9776628219423474, "grad_norm": 5.786445467519901, "learning_rate": 1.3092247353316267e-08, "loss": 17.0295, "step": 53485 }, { "epoch": 0.9776811011387939, "grad_norm": 5.900431793117207, "learning_rate": 1.3070848824218162e-08, "loss": 17.1588, "step": 53486 }, { "epoch": 0.9776993803352405, "grad_norm": 4.65146176065633, "learning_rate": 1.3049467773874058e-08, "loss": 16.8359, "step": 53487 }, { "epoch": 0.977717659531687, "grad_norm": 7.19168358521889, "learning_rate": 1.3028104202359448e-08, "loss": 17.9288, "step": 53488 }, { "epoch": 0.9777359387281335, "grad_norm": 6.357175929098584, "learning_rate": 1.3006758109748164e-08, "loss": 17.5257, "step": 53489 }, { "epoch": 0.9777542179245801, "grad_norm": 5.344283501631605, "learning_rate": 1.2985429496115143e-08, "loss": 17.028, "step": 53490 }, { "epoch": 0.9777724971210265, "grad_norm": 8.157419209489033, "learning_rate": 1.2964118361535326e-08, "loss": 17.2461, "step": 53491 }, { "epoch": 0.9777907763174731, "grad_norm": 4.978143188807161, "learning_rate": 1.2942824706084212e-08, "loss": 16.9288, "step": 53492 }, { "epoch": 0.9778090555139196, "grad_norm": 5.935131063980865, "learning_rate": 1.2921548529835071e-08, "loss": 17.2799, "step": 53493 }, { "epoch": 0.9778273347103661, "grad_norm": 6.011562209211279, "learning_rate": 1.2900289832863955e-08, "loss": 17.2813, "step": 53494 }, { "epoch": 0.9778456139068127, "grad_norm": 6.123418475536061, "learning_rate": 1.287904861524414e-08, "loss": 17.3047, "step": 53495 }, { "epoch": 0.9778638931032592, "grad_norm": 5.355049243429177, "learning_rate": 1.2857824877050007e-08, "loss": 17.2964, "step": 53496 }, { "epoch": 0.9778821722997058, "grad_norm": 5.247788982039389, "learning_rate": 1.2836618618357055e-08, "loss": 16.8228, "step": 53497 }, { "epoch": 0.9779004514961522, "grad_norm": 6.978153184250692, "learning_rate": 1.2815429839238557e-08, "loss": 17.3342, "step": 53498 }, { "epoch": 0.9779187306925987, "grad_norm": 8.426059877500279, "learning_rate": 1.279425853977001e-08, "loss": 18.0562, "step": 53499 }, { "epoch": 0.9779370098890453, "grad_norm": 6.728730321534884, "learning_rate": 1.2773104720024132e-08, "loss": 17.4218, "step": 53500 }, { "epoch": 0.9779552890854918, "grad_norm": 5.268613000927839, "learning_rate": 1.2751968380075863e-08, "loss": 16.8492, "step": 53501 }, { "epoch": 0.9779735682819384, "grad_norm": 7.259330752850201, "learning_rate": 1.2730849519999034e-08, "loss": 17.723, "step": 53502 }, { "epoch": 0.9779918474783849, "grad_norm": 6.417748468694472, "learning_rate": 1.2709748139868028e-08, "loss": 17.2822, "step": 53503 }, { "epoch": 0.9780101266748313, "grad_norm": 6.841323461580476, "learning_rate": 1.2688664239756121e-08, "loss": 17.2324, "step": 53504 }, { "epoch": 0.9780284058712779, "grad_norm": 5.130881592934762, "learning_rate": 1.2667597819738254e-08, "loss": 16.9808, "step": 53505 }, { "epoch": 0.9780466850677244, "grad_norm": 5.347142375822784, "learning_rate": 1.26465488798877e-08, "loss": 16.8171, "step": 53506 }, { "epoch": 0.978064964264171, "grad_norm": 6.397594908289603, "learning_rate": 1.2625517420277734e-08, "loss": 17.1019, "step": 53507 }, { "epoch": 0.9780832434606175, "grad_norm": 5.3397605681209175, "learning_rate": 1.2604503440983295e-08, "loss": 16.8605, "step": 53508 }, { "epoch": 0.978101522657064, "grad_norm": 5.637094472989595, "learning_rate": 1.258350694207655e-08, "loss": 17.047, "step": 53509 }, { "epoch": 0.9781198018535106, "grad_norm": 7.165460746352944, "learning_rate": 1.2562527923631885e-08, "loss": 17.9489, "step": 53510 }, { "epoch": 0.978138081049957, "grad_norm": 6.503218955649607, "learning_rate": 1.2541566385723126e-08, "loss": 17.1473, "step": 53511 }, { "epoch": 0.9781563602464036, "grad_norm": 8.313605831736051, "learning_rate": 1.2520622328422993e-08, "loss": 17.4177, "step": 53512 }, { "epoch": 0.9781746394428501, "grad_norm": 6.5655000306080495, "learning_rate": 1.249969575180532e-08, "loss": 17.4383, "step": 53513 }, { "epoch": 0.9781929186392966, "grad_norm": 5.862149991249124, "learning_rate": 1.247878665594393e-08, "loss": 17.1338, "step": 53514 }, { "epoch": 0.9782111978357432, "grad_norm": 7.372798710099834, "learning_rate": 1.2457895040910994e-08, "loss": 17.4967, "step": 53515 }, { "epoch": 0.9782294770321897, "grad_norm": 6.78353866089457, "learning_rate": 1.2437020906780895e-08, "loss": 17.5497, "step": 53516 }, { "epoch": 0.9782477562286361, "grad_norm": 4.603008808698945, "learning_rate": 1.2416164253625796e-08, "loss": 16.8943, "step": 53517 }, { "epoch": 0.9782660354250827, "grad_norm": 7.330509033361259, "learning_rate": 1.2395325081519527e-08, "loss": 17.8493, "step": 53518 }, { "epoch": 0.9782843146215292, "grad_norm": 5.66367551128862, "learning_rate": 1.237450339053481e-08, "loss": 17.0939, "step": 53519 }, { "epoch": 0.9783025938179758, "grad_norm": 6.793160367543496, "learning_rate": 1.2353699180744361e-08, "loss": 18.0097, "step": 53520 }, { "epoch": 0.9783208730144223, "grad_norm": 6.733786957543327, "learning_rate": 1.2332912452221458e-08, "loss": 17.3293, "step": 53521 }, { "epoch": 0.9783391522108688, "grad_norm": 6.794418542804982, "learning_rate": 1.2312143205038818e-08, "loss": 17.8224, "step": 53522 }, { "epoch": 0.9783574314073153, "grad_norm": 7.365265494667688, "learning_rate": 1.2291391439269718e-08, "loss": 17.6867, "step": 53523 }, { "epoch": 0.9783757106037618, "grad_norm": 5.528798144128598, "learning_rate": 1.2270657154986321e-08, "loss": 17.1114, "step": 53524 }, { "epoch": 0.9783939898002084, "grad_norm": 5.601575906489303, "learning_rate": 1.2249940352261347e-08, "loss": 17.0064, "step": 53525 }, { "epoch": 0.9784122689966549, "grad_norm": 6.966872767176314, "learning_rate": 1.2229241031167516e-08, "loss": 17.7321, "step": 53526 }, { "epoch": 0.9784305481931014, "grad_norm": 5.8160352171754335, "learning_rate": 1.2208559191777547e-08, "loss": 17.2227, "step": 53527 }, { "epoch": 0.978448827389548, "grad_norm": 6.319034903728657, "learning_rate": 1.2187894834163606e-08, "loss": 17.1667, "step": 53528 }, { "epoch": 0.9784671065859945, "grad_norm": 5.898114794347725, "learning_rate": 1.2167247958397855e-08, "loss": 17.2393, "step": 53529 }, { "epoch": 0.978485385782441, "grad_norm": 7.474988146664295, "learning_rate": 1.2146618564553569e-08, "loss": 17.4578, "step": 53530 }, { "epoch": 0.9785036649788875, "grad_norm": 5.107312960071525, "learning_rate": 1.2126006652702915e-08, "loss": 16.8856, "step": 53531 }, { "epoch": 0.978521944175334, "grad_norm": 6.071340746051761, "learning_rate": 1.2105412222916945e-08, "loss": 16.8822, "step": 53532 }, { "epoch": 0.9785402233717806, "grad_norm": 5.206174951949546, "learning_rate": 1.2084835275269491e-08, "loss": 16.9981, "step": 53533 }, { "epoch": 0.9785585025682271, "grad_norm": 4.747104252116405, "learning_rate": 1.2064275809831606e-08, "loss": 16.8203, "step": 53534 }, { "epoch": 0.9785767817646737, "grad_norm": 4.853796944118073, "learning_rate": 1.2043733826674897e-08, "loss": 16.8907, "step": 53535 }, { "epoch": 0.9785950609611201, "grad_norm": 5.868442001549682, "learning_rate": 1.2023209325872643e-08, "loss": 17.251, "step": 53536 }, { "epoch": 0.9786133401575666, "grad_norm": 8.2134871785468, "learning_rate": 1.200270230749645e-08, "loss": 17.9327, "step": 53537 }, { "epoch": 0.9786316193540132, "grad_norm": 5.541679141198699, "learning_rate": 1.1982212771617375e-08, "loss": 17.2381, "step": 53538 }, { "epoch": 0.9786498985504597, "grad_norm": 6.912738701797856, "learning_rate": 1.1961740718308134e-08, "loss": 17.6461, "step": 53539 }, { "epoch": 0.9786681777469063, "grad_norm": 6.6808816127906105, "learning_rate": 1.194128614764034e-08, "loss": 17.3377, "step": 53540 }, { "epoch": 0.9786864569433528, "grad_norm": 6.503659688545237, "learning_rate": 1.1920849059685046e-08, "loss": 17.5118, "step": 53541 }, { "epoch": 0.9787047361397992, "grad_norm": 8.523828443161142, "learning_rate": 1.1900429454514973e-08, "loss": 18.2105, "step": 53542 }, { "epoch": 0.9787230153362458, "grad_norm": 6.245775963180072, "learning_rate": 1.1880027332200616e-08, "loss": 17.2842, "step": 53543 }, { "epoch": 0.9787412945326923, "grad_norm": 8.15768183372573, "learning_rate": 1.1859642692813589e-08, "loss": 17.6602, "step": 53544 }, { "epoch": 0.9787595737291389, "grad_norm": 6.765753169603077, "learning_rate": 1.1839275536426054e-08, "loss": 17.6039, "step": 53545 }, { "epoch": 0.9787778529255854, "grad_norm": 7.074876267747457, "learning_rate": 1.1818925863109066e-08, "loss": 17.0365, "step": 53546 }, { "epoch": 0.9787961321220319, "grad_norm": 6.2592748888546605, "learning_rate": 1.1798593672933677e-08, "loss": 17.0051, "step": 53547 }, { "epoch": 0.9788144113184785, "grad_norm": 7.077420913242717, "learning_rate": 1.1778278965971501e-08, "loss": 17.643, "step": 53548 }, { "epoch": 0.9788326905149249, "grad_norm": 7.115569441599144, "learning_rate": 1.175798174229359e-08, "loss": 17.6884, "step": 53549 }, { "epoch": 0.9788509697113715, "grad_norm": 5.351363209319329, "learning_rate": 1.1737702001970996e-08, "loss": 17.0212, "step": 53550 }, { "epoch": 0.978869248907818, "grad_norm": 7.149486560394559, "learning_rate": 1.1717439745075333e-08, "loss": 17.4256, "step": 53551 }, { "epoch": 0.9788875281042645, "grad_norm": 7.118676972324483, "learning_rate": 1.169719497167654e-08, "loss": 17.935, "step": 53552 }, { "epoch": 0.9789058073007111, "grad_norm": 5.876660637591379, "learning_rate": 1.1676967681846784e-08, "loss": 17.3898, "step": 53553 }, { "epoch": 0.9789240864971576, "grad_norm": 5.885073452401775, "learning_rate": 1.165675787565601e-08, "loss": 17.1944, "step": 53554 }, { "epoch": 0.9789423656936042, "grad_norm": 6.87680926613074, "learning_rate": 1.1636565553175272e-08, "loss": 17.0885, "step": 53555 }, { "epoch": 0.9789606448900506, "grad_norm": 5.0098002179071734, "learning_rate": 1.1616390714475623e-08, "loss": 16.7868, "step": 53556 }, { "epoch": 0.9789789240864971, "grad_norm": 5.9494234455979855, "learning_rate": 1.1596233359628117e-08, "loss": 17.2254, "step": 53557 }, { "epoch": 0.9789972032829437, "grad_norm": 6.988319313697817, "learning_rate": 1.1576093488702144e-08, "loss": 17.4572, "step": 53558 }, { "epoch": 0.9790154824793902, "grad_norm": 6.100686834654787, "learning_rate": 1.1555971101769869e-08, "loss": 17.1758, "step": 53559 }, { "epoch": 0.9790337616758368, "grad_norm": 7.413095649457072, "learning_rate": 1.153586619890068e-08, "loss": 17.5886, "step": 53560 }, { "epoch": 0.9790520408722833, "grad_norm": 6.411195024477666, "learning_rate": 1.1515778780165631e-08, "loss": 17.1663, "step": 53561 }, { "epoch": 0.9790703200687297, "grad_norm": 5.606914034039173, "learning_rate": 1.1495708845634668e-08, "loss": 17.2899, "step": 53562 }, { "epoch": 0.9790885992651763, "grad_norm": 6.155486075199391, "learning_rate": 1.1475656395378288e-08, "loss": 17.2029, "step": 53563 }, { "epoch": 0.9791068784616228, "grad_norm": 8.844559268796369, "learning_rate": 1.145562142946699e-08, "loss": 18.2368, "step": 53564 }, { "epoch": 0.9791251576580694, "grad_norm": 5.746124103669115, "learning_rate": 1.1435603947970718e-08, "loss": 17.1563, "step": 53565 }, { "epoch": 0.9791434368545159, "grad_norm": 6.102449399812766, "learning_rate": 1.1415603950959974e-08, "loss": 17.4148, "step": 53566 }, { "epoch": 0.9791617160509624, "grad_norm": 7.158263076687658, "learning_rate": 1.1395621438504146e-08, "loss": 17.4643, "step": 53567 }, { "epoch": 0.979179995247409, "grad_norm": 6.814481513359215, "learning_rate": 1.1375656410674284e-08, "loss": 17.6884, "step": 53568 }, { "epoch": 0.9791982744438554, "grad_norm": 7.192084077294039, "learning_rate": 1.1355708867539782e-08, "loss": 17.4571, "step": 53569 }, { "epoch": 0.979216553640302, "grad_norm": 6.024215695112818, "learning_rate": 1.1335778809170583e-08, "loss": 17.3126, "step": 53570 }, { "epoch": 0.9792348328367485, "grad_norm": 8.283003568579094, "learning_rate": 1.131586623563663e-08, "loss": 18.0713, "step": 53571 }, { "epoch": 0.979253112033195, "grad_norm": 5.803236388606091, "learning_rate": 1.1295971147007867e-08, "loss": 17.1116, "step": 53572 }, { "epoch": 0.9792713912296416, "grad_norm": 6.552490745457049, "learning_rate": 1.1276093543353128e-08, "loss": 17.6177, "step": 53573 }, { "epoch": 0.979289670426088, "grad_norm": 7.618275350470412, "learning_rate": 1.1256233424743469e-08, "loss": 17.7095, "step": 53574 }, { "epoch": 0.9793079496225346, "grad_norm": 5.401392212465964, "learning_rate": 1.123639079124772e-08, "loss": 16.9463, "step": 53575 }, { "epoch": 0.9793262288189811, "grad_norm": 4.90522483803799, "learning_rate": 1.1216565642935274e-08, "loss": 16.8715, "step": 53576 }, { "epoch": 0.9793445080154276, "grad_norm": 6.738149701027397, "learning_rate": 1.1196757979875516e-08, "loss": 17.2385, "step": 53577 }, { "epoch": 0.9793627872118742, "grad_norm": 6.081052127226056, "learning_rate": 1.117696780213895e-08, "loss": 17.096, "step": 53578 }, { "epoch": 0.9793810664083207, "grad_norm": 6.1214617413991546, "learning_rate": 1.115719510979385e-08, "loss": 17.3233, "step": 53579 }, { "epoch": 0.9793993456047673, "grad_norm": 5.838768548203087, "learning_rate": 1.1137439902910164e-08, "loss": 17.1285, "step": 53580 }, { "epoch": 0.9794176248012137, "grad_norm": 4.475467362131014, "learning_rate": 1.1117702181556167e-08, "loss": 16.8235, "step": 53581 }, { "epoch": 0.9794359039976602, "grad_norm": 6.493265720514928, "learning_rate": 1.109798194580236e-08, "loss": 17.0757, "step": 53582 }, { "epoch": 0.9794541831941068, "grad_norm": 5.413377814260294, "learning_rate": 1.1078279195717024e-08, "loss": 16.9723, "step": 53583 }, { "epoch": 0.9794724623905533, "grad_norm": 6.539729366713186, "learning_rate": 1.1058593931368988e-08, "loss": 17.4657, "step": 53584 }, { "epoch": 0.9794907415869998, "grad_norm": 7.166660147995593, "learning_rate": 1.10389261528282e-08, "loss": 17.7834, "step": 53585 }, { "epoch": 0.9795090207834464, "grad_norm": 6.062090124301946, "learning_rate": 1.1019275860162937e-08, "loss": 17.4027, "step": 53586 }, { "epoch": 0.9795272999798929, "grad_norm": 4.884179449569042, "learning_rate": 1.0999643053442033e-08, "loss": 16.9367, "step": 53587 }, { "epoch": 0.9795455791763394, "grad_norm": 7.217257429344635, "learning_rate": 1.0980027732734877e-08, "loss": 17.6528, "step": 53588 }, { "epoch": 0.9795638583727859, "grad_norm": 6.096468935862635, "learning_rate": 1.0960429898109192e-08, "loss": 17.4122, "step": 53589 }, { "epoch": 0.9795821375692324, "grad_norm": 6.772082786464071, "learning_rate": 1.0940849549634925e-08, "loss": 17.4918, "step": 53590 }, { "epoch": 0.979600416765679, "grad_norm": 6.282346887636674, "learning_rate": 1.092128668737924e-08, "loss": 17.3782, "step": 53591 }, { "epoch": 0.9796186959621255, "grad_norm": 7.282999768505638, "learning_rate": 1.0901741311412084e-08, "loss": 17.6265, "step": 53592 }, { "epoch": 0.9796369751585721, "grad_norm": 6.012168388331246, "learning_rate": 1.088221342180118e-08, "loss": 16.9905, "step": 53593 }, { "epoch": 0.9796552543550185, "grad_norm": 8.999042941435864, "learning_rate": 1.0862703018615361e-08, "loss": 18.1367, "step": 53594 }, { "epoch": 0.979673533551465, "grad_norm": 5.454604418809306, "learning_rate": 1.0843210101922908e-08, "loss": 17.2069, "step": 53595 }, { "epoch": 0.9796918127479116, "grad_norm": 5.815917294565442, "learning_rate": 1.0823734671791541e-08, "loss": 16.926, "step": 53596 }, { "epoch": 0.9797100919443581, "grad_norm": 7.656388500306096, "learning_rate": 1.0804276728290653e-08, "loss": 17.6492, "step": 53597 }, { "epoch": 0.9797283711408047, "grad_norm": 5.719564479030949, "learning_rate": 1.078483627148741e-08, "loss": 17.0228, "step": 53598 }, { "epoch": 0.9797466503372512, "grad_norm": 8.064432913462557, "learning_rate": 1.0765413301450645e-08, "loss": 17.7785, "step": 53599 }, { "epoch": 0.9797649295336976, "grad_norm": 5.041893224951601, "learning_rate": 1.0746007818248082e-08, "loss": 16.7953, "step": 53600 }, { "epoch": 0.9797832087301442, "grad_norm": 6.298849650649409, "learning_rate": 1.0726619821947448e-08, "loss": 17.1911, "step": 53601 }, { "epoch": 0.9798014879265907, "grad_norm": 7.128284477758535, "learning_rate": 1.0707249312617574e-08, "loss": 17.7003, "step": 53602 }, { "epoch": 0.9798197671230373, "grad_norm": 9.930223675093432, "learning_rate": 1.0687896290325628e-08, "loss": 18.0305, "step": 53603 }, { "epoch": 0.9798380463194838, "grad_norm": 4.815943351913089, "learning_rate": 1.066856075513989e-08, "loss": 16.8701, "step": 53604 }, { "epoch": 0.9798563255159303, "grad_norm": 6.929735690084988, "learning_rate": 1.0649242707127527e-08, "loss": 17.0421, "step": 53605 }, { "epoch": 0.9798746047123769, "grad_norm": 6.879398728867253, "learning_rate": 1.0629942146356819e-08, "loss": 17.6793, "step": 53606 }, { "epoch": 0.9798928839088233, "grad_norm": 5.421083572640713, "learning_rate": 1.0610659072894936e-08, "loss": 17.0163, "step": 53607 }, { "epoch": 0.9799111631052699, "grad_norm": 6.926316434729453, "learning_rate": 1.0591393486810154e-08, "loss": 17.8698, "step": 53608 }, { "epoch": 0.9799294423017164, "grad_norm": 6.816032036641834, "learning_rate": 1.0572145388169086e-08, "loss": 17.5439, "step": 53609 }, { "epoch": 0.9799477214981629, "grad_norm": 7.038684500562999, "learning_rate": 1.0552914777040013e-08, "loss": 17.6083, "step": 53610 }, { "epoch": 0.9799660006946095, "grad_norm": 6.1885237326014675, "learning_rate": 1.0533701653490102e-08, "loss": 17.2688, "step": 53611 }, { "epoch": 0.979984279891056, "grad_norm": 8.056860911503584, "learning_rate": 1.0514506017586523e-08, "loss": 17.9202, "step": 53612 }, { "epoch": 0.9800025590875026, "grad_norm": 5.75157155009631, "learning_rate": 1.0495327869396443e-08, "loss": 17.0984, "step": 53613 }, { "epoch": 0.980020838283949, "grad_norm": 5.965226916931435, "learning_rate": 1.0476167208987586e-08, "loss": 17.2247, "step": 53614 }, { "epoch": 0.9800391174803955, "grad_norm": 6.540293959342948, "learning_rate": 1.0457024036426566e-08, "loss": 17.3025, "step": 53615 }, { "epoch": 0.9800573966768421, "grad_norm": 5.794575931569115, "learning_rate": 1.0437898351780551e-08, "loss": 17.2667, "step": 53616 }, { "epoch": 0.9800756758732886, "grad_norm": 7.282694996220892, "learning_rate": 1.0418790155117264e-08, "loss": 17.4833, "step": 53617 }, { "epoch": 0.9800939550697352, "grad_norm": 5.171728577265118, "learning_rate": 1.0399699446502765e-08, "loss": 16.7635, "step": 53618 }, { "epoch": 0.9801122342661817, "grad_norm": 7.285507349154373, "learning_rate": 1.038062622600422e-08, "loss": 17.7624, "step": 53619 }, { "epoch": 0.9801305134626281, "grad_norm": 3.9603271183610413, "learning_rate": 1.03615704936888e-08, "loss": 16.518, "step": 53620 }, { "epoch": 0.9801487926590747, "grad_norm": 7.9590354887153385, "learning_rate": 1.0342532249623116e-08, "loss": 17.4538, "step": 53621 }, { "epoch": 0.9801670718555212, "grad_norm": 5.823678831501475, "learning_rate": 1.0323511493873783e-08, "loss": 17.0323, "step": 53622 }, { "epoch": 0.9801853510519678, "grad_norm": 5.580212465996169, "learning_rate": 1.0304508226507415e-08, "loss": 17.0475, "step": 53623 }, { "epoch": 0.9802036302484143, "grad_norm": 6.89739902059556, "learning_rate": 1.0285522447591178e-08, "loss": 17.2681, "step": 53624 }, { "epoch": 0.9802219094448608, "grad_norm": 6.2305803258734125, "learning_rate": 1.0266554157190578e-08, "loss": 17.3362, "step": 53625 }, { "epoch": 0.9802401886413074, "grad_norm": 4.18252576818233, "learning_rate": 1.0247603355373337e-08, "loss": 16.7414, "step": 53626 }, { "epoch": 0.9802584678377538, "grad_norm": 5.631050714970029, "learning_rate": 1.0228670042204958e-08, "loss": 17.1119, "step": 53627 }, { "epoch": 0.9802767470342004, "grad_norm": 6.526666788155261, "learning_rate": 1.0209754217752055e-08, "loss": 17.2246, "step": 53628 }, { "epoch": 0.9802950262306469, "grad_norm": 4.904937606563237, "learning_rate": 1.019085588208124e-08, "loss": 16.7157, "step": 53629 }, { "epoch": 0.9803133054270934, "grad_norm": 5.646318664828957, "learning_rate": 1.0171975035258574e-08, "loss": 17.024, "step": 53630 }, { "epoch": 0.98033158462354, "grad_norm": 5.289761451701396, "learning_rate": 1.0153111677349559e-08, "loss": 16.9862, "step": 53631 }, { "epoch": 0.9803498638199865, "grad_norm": 8.250838950474185, "learning_rate": 1.0134265808421362e-08, "loss": 18.1341, "step": 53632 }, { "epoch": 0.980368143016433, "grad_norm": 5.2245779279471645, "learning_rate": 1.0115437428539487e-08, "loss": 17.1155, "step": 53633 }, { "epoch": 0.9803864222128795, "grad_norm": 6.794235765892779, "learning_rate": 1.0096626537769438e-08, "loss": 17.5868, "step": 53634 }, { "epoch": 0.980404701409326, "grad_norm": 5.683410964198491, "learning_rate": 1.0077833136178383e-08, "loss": 16.9544, "step": 53635 }, { "epoch": 0.9804229806057726, "grad_norm": 7.997263681121911, "learning_rate": 1.005905722383127e-08, "loss": 17.8047, "step": 53636 }, { "epoch": 0.9804412598022191, "grad_norm": 5.738046614044623, "learning_rate": 1.0040298800794156e-08, "loss": 17.2772, "step": 53637 }, { "epoch": 0.9804595389986657, "grad_norm": 7.286854155802318, "learning_rate": 1.0021557867133103e-08, "loss": 17.5438, "step": 53638 }, { "epoch": 0.9804778181951121, "grad_norm": 5.627617969243995, "learning_rate": 1.0002834422913054e-08, "loss": 17.1785, "step": 53639 }, { "epoch": 0.9804960973915586, "grad_norm": 5.797931052559444, "learning_rate": 9.984128468200072e-09, "loss": 17.2777, "step": 53640 }, { "epoch": 0.9805143765880052, "grad_norm": 6.624521769707552, "learning_rate": 9.965440003060212e-09, "loss": 17.3329, "step": 53641 }, { "epoch": 0.9805326557844517, "grad_norm": 4.573906127321083, "learning_rate": 9.946769027557868e-09, "loss": 16.4865, "step": 53642 }, { "epoch": 0.9805509349808983, "grad_norm": 6.698780226501516, "learning_rate": 9.928115541759098e-09, "loss": 17.2408, "step": 53643 }, { "epoch": 0.9805692141773448, "grad_norm": 7.106243862284173, "learning_rate": 9.90947954572996e-09, "loss": 17.6242, "step": 53644 }, { "epoch": 0.9805874933737913, "grad_norm": 5.049603481686905, "learning_rate": 9.890861039534294e-09, "loss": 17.1432, "step": 53645 }, { "epoch": 0.9806057725702378, "grad_norm": 5.653003383837362, "learning_rate": 9.87226002323871e-09, "loss": 17.1573, "step": 53646 }, { "epoch": 0.9806240517666843, "grad_norm": 7.266487774605226, "learning_rate": 9.853676496907605e-09, "loss": 17.6994, "step": 53647 }, { "epoch": 0.9806423309631309, "grad_norm": 5.4669705816594885, "learning_rate": 9.835110460605923e-09, "loss": 17.011, "step": 53648 }, { "epoch": 0.9806606101595774, "grad_norm": 5.455750521411118, "learning_rate": 9.816561914399724e-09, "loss": 17.0301, "step": 53649 }, { "epoch": 0.9806788893560239, "grad_norm": 5.353936563183881, "learning_rate": 9.798030858352847e-09, "loss": 17.1312, "step": 53650 }, { "epoch": 0.9806971685524705, "grad_norm": 7.21700211280564, "learning_rate": 9.779517292531349e-09, "loss": 17.3428, "step": 53651 }, { "epoch": 0.980715447748917, "grad_norm": 6.554547169260323, "learning_rate": 9.761021216999623e-09, "loss": 17.1721, "step": 53652 }, { "epoch": 0.9807337269453634, "grad_norm": 6.418777095720405, "learning_rate": 9.742542631822616e-09, "loss": 17.2388, "step": 53653 }, { "epoch": 0.98075200614181, "grad_norm": 5.716854461106238, "learning_rate": 9.724081537064722e-09, "loss": 17.4993, "step": 53654 }, { "epoch": 0.9807702853382565, "grad_norm": 4.882774940661436, "learning_rate": 9.70563793279089e-09, "loss": 16.7446, "step": 53655 }, { "epoch": 0.9807885645347031, "grad_norm": 8.248094623059693, "learning_rate": 9.687211819065511e-09, "loss": 17.9044, "step": 53656 }, { "epoch": 0.9808068437311496, "grad_norm": 5.7606279688452995, "learning_rate": 9.668803195953536e-09, "loss": 17.2542, "step": 53657 }, { "epoch": 0.980825122927596, "grad_norm": 7.118532681243987, "learning_rate": 9.65041206351991e-09, "loss": 18.0953, "step": 53658 }, { "epoch": 0.9808434021240426, "grad_norm": 6.603782481241069, "learning_rate": 9.632038421827915e-09, "loss": 17.109, "step": 53659 }, { "epoch": 0.9808616813204891, "grad_norm": 5.797549731023771, "learning_rate": 9.613682270943058e-09, "loss": 17.0055, "step": 53660 }, { "epoch": 0.9808799605169357, "grad_norm": 7.963119658891656, "learning_rate": 9.59534361092862e-09, "loss": 17.8735, "step": 53661 }, { "epoch": 0.9808982397133822, "grad_norm": 5.228198016421765, "learning_rate": 9.577022441849548e-09, "loss": 16.8225, "step": 53662 }, { "epoch": 0.9809165189098287, "grad_norm": 5.863930946264623, "learning_rate": 9.558718763770236e-09, "loss": 17.2163, "step": 53663 }, { "epoch": 0.9809347981062753, "grad_norm": 8.919219180303802, "learning_rate": 9.540432576754521e-09, "loss": 18.6313, "step": 53664 }, { "epoch": 0.9809530773027217, "grad_norm": 6.523833087550294, "learning_rate": 9.522163880866796e-09, "loss": 17.3539, "step": 53665 }, { "epoch": 0.9809713564991683, "grad_norm": 8.648134894940931, "learning_rate": 9.503912676170347e-09, "loss": 17.9072, "step": 53666 }, { "epoch": 0.9809896356956148, "grad_norm": 5.747337859640824, "learning_rate": 9.485678962730117e-09, "loss": 17.0426, "step": 53667 }, { "epoch": 0.9810079148920613, "grad_norm": 6.476918933143719, "learning_rate": 9.467462740608836e-09, "loss": 17.3942, "step": 53668 }, { "epoch": 0.9810261940885079, "grad_norm": 6.581898995727406, "learning_rate": 9.44926400987145e-09, "loss": 17.8752, "step": 53669 }, { "epoch": 0.9810444732849544, "grad_norm": 5.312990016935297, "learning_rate": 9.431082770581801e-09, "loss": 16.9535, "step": 53670 }, { "epoch": 0.981062752481401, "grad_norm": 5.9508829518422885, "learning_rate": 9.412919022802614e-09, "loss": 17.0495, "step": 53671 }, { "epoch": 0.9810810316778474, "grad_norm": 5.9597151990295805, "learning_rate": 9.394772766597727e-09, "loss": 17.2062, "step": 53672 }, { "epoch": 0.9810993108742939, "grad_norm": 5.688293404164238, "learning_rate": 9.376644002031532e-09, "loss": 17.127, "step": 53673 }, { "epoch": 0.9811175900707405, "grad_norm": 8.708968604503102, "learning_rate": 9.358532729167313e-09, "loss": 18.6099, "step": 53674 }, { "epoch": 0.981135869267187, "grad_norm": 6.4592879145358015, "learning_rate": 9.340438948068353e-09, "loss": 17.3667, "step": 53675 }, { "epoch": 0.9811541484636336, "grad_norm": 7.796403530732082, "learning_rate": 9.322362658797934e-09, "loss": 17.4378, "step": 53676 }, { "epoch": 0.9811724276600801, "grad_norm": 5.6134452753385755, "learning_rate": 9.30430386141934e-09, "loss": 17.0906, "step": 53677 }, { "epoch": 0.9811907068565265, "grad_norm": 6.601786969725094, "learning_rate": 9.286262555996406e-09, "loss": 17.5025, "step": 53678 }, { "epoch": 0.9812089860529731, "grad_norm": 5.9176160953094135, "learning_rate": 9.268238742591861e-09, "loss": 17.3155, "step": 53679 }, { "epoch": 0.9812272652494196, "grad_norm": 7.930100013651489, "learning_rate": 9.25023242126899e-09, "loss": 17.465, "step": 53680 }, { "epoch": 0.9812455444458662, "grad_norm": 7.141536678792743, "learning_rate": 9.232243592091072e-09, "loss": 17.4939, "step": 53681 }, { "epoch": 0.9812638236423127, "grad_norm": 5.647154009996595, "learning_rate": 9.214272255121392e-09, "loss": 17.1611, "step": 53682 }, { "epoch": 0.9812821028387592, "grad_norm": 6.694083026800236, "learning_rate": 9.196318410422122e-09, "loss": 17.8086, "step": 53683 }, { "epoch": 0.9813003820352058, "grad_norm": 6.487395749133199, "learning_rate": 9.1783820580571e-09, "loss": 17.4911, "step": 53684 }, { "epoch": 0.9813186612316522, "grad_norm": 7.977901817384042, "learning_rate": 9.160463198088499e-09, "loss": 17.5711, "step": 53685 }, { "epoch": 0.9813369404280988, "grad_norm": 6.239796804108046, "learning_rate": 9.142561830579045e-09, "loss": 17.156, "step": 53686 }, { "epoch": 0.9813552196245453, "grad_norm": 6.356031673927684, "learning_rate": 9.124677955592576e-09, "loss": 17.0542, "step": 53687 }, { "epoch": 0.9813734988209918, "grad_norm": 5.745307102038736, "learning_rate": 9.10681157319071e-09, "loss": 17.351, "step": 53688 }, { "epoch": 0.9813917780174384, "grad_norm": 6.3285300400869655, "learning_rate": 9.088962683436176e-09, "loss": 17.1838, "step": 53689 }, { "epoch": 0.9814100572138849, "grad_norm": 8.364932950090687, "learning_rate": 9.071131286391699e-09, "loss": 17.8549, "step": 53690 }, { "epoch": 0.9814283364103314, "grad_norm": 7.7388892117958985, "learning_rate": 9.053317382120008e-09, "loss": 17.5672, "step": 53691 }, { "epoch": 0.9814466156067779, "grad_norm": 5.858430559700876, "learning_rate": 9.035520970683277e-09, "loss": 17.2253, "step": 53692 }, { "epoch": 0.9814648948032244, "grad_norm": 6.239342918334937, "learning_rate": 9.017742052144229e-09, "loss": 17.5551, "step": 53693 }, { "epoch": 0.981483173999671, "grad_norm": 5.116168312083719, "learning_rate": 8.999980626564487e-09, "loss": 17.0893, "step": 53694 }, { "epoch": 0.9815014531961175, "grad_norm": 5.320335323724899, "learning_rate": 8.98223669400733e-09, "loss": 16.9026, "step": 53695 }, { "epoch": 0.9815197323925641, "grad_norm": 6.433751300793836, "learning_rate": 8.96451025453382e-09, "loss": 17.4347, "step": 53696 }, { "epoch": 0.9815380115890106, "grad_norm": 5.726491301330323, "learning_rate": 8.946801308206687e-09, "loss": 16.9491, "step": 53697 }, { "epoch": 0.981556290785457, "grad_norm": 5.733667421678687, "learning_rate": 8.929109855087547e-09, "loss": 17.09, "step": 53698 }, { "epoch": 0.9815745699819036, "grad_norm": 6.5678411430463495, "learning_rate": 8.911435895239128e-09, "loss": 17.7424, "step": 53699 }, { "epoch": 0.9815928491783501, "grad_norm": 6.994881022142018, "learning_rate": 8.893779428723049e-09, "loss": 17.6499, "step": 53700 }, { "epoch": 0.9816111283747967, "grad_norm": 4.9770471822139974, "learning_rate": 8.876140455601479e-09, "loss": 16.7377, "step": 53701 }, { "epoch": 0.9816294075712432, "grad_norm": 7.4270005201800515, "learning_rate": 8.858518975935482e-09, "loss": 17.6997, "step": 53702 }, { "epoch": 0.9816476867676897, "grad_norm": 6.901492220083611, "learning_rate": 8.84091498978723e-09, "loss": 17.7022, "step": 53703 }, { "epoch": 0.9816659659641362, "grad_norm": 7.333192534532946, "learning_rate": 8.823328497218898e-09, "loss": 17.8213, "step": 53704 }, { "epoch": 0.9816842451605827, "grad_norm": 5.151762645577921, "learning_rate": 8.80575949829099e-09, "loss": 16.8137, "step": 53705 }, { "epoch": 0.9817025243570293, "grad_norm": 6.007385897750693, "learning_rate": 8.788207993066234e-09, "loss": 17.0357, "step": 53706 }, { "epoch": 0.9817208035534758, "grad_norm": 5.3887007879391575, "learning_rate": 8.770673981605693e-09, "loss": 17.012, "step": 53707 }, { "epoch": 0.9817390827499223, "grad_norm": 8.147321295214011, "learning_rate": 8.753157463970985e-09, "loss": 17.842, "step": 53708 }, { "epoch": 0.9817573619463689, "grad_norm": 8.084561126318816, "learning_rate": 8.735658440223171e-09, "loss": 17.9949, "step": 53709 }, { "epoch": 0.9817756411428153, "grad_norm": 6.3033309258499575, "learning_rate": 8.718176910423315e-09, "loss": 17.4867, "step": 53710 }, { "epoch": 0.9817939203392619, "grad_norm": 5.145222691705558, "learning_rate": 8.700712874633587e-09, "loss": 17.0372, "step": 53711 }, { "epoch": 0.9818121995357084, "grad_norm": 5.367326388525524, "learning_rate": 8.683266332914497e-09, "loss": 16.8905, "step": 53712 }, { "epoch": 0.9818304787321549, "grad_norm": 5.597159325296527, "learning_rate": 8.665837285327661e-09, "loss": 16.9565, "step": 53713 }, { "epoch": 0.9818487579286015, "grad_norm": 5.965734411424534, "learning_rate": 8.64842573193414e-09, "loss": 17.4918, "step": 53714 }, { "epoch": 0.981867037125048, "grad_norm": 7.572573664917748, "learning_rate": 8.631031672794443e-09, "loss": 17.6096, "step": 53715 }, { "epoch": 0.9818853163214946, "grad_norm": 8.006628104406609, "learning_rate": 8.613655107969632e-09, "loss": 17.3936, "step": 53716 }, { "epoch": 0.981903595517941, "grad_norm": 5.901288901225726, "learning_rate": 8.596296037521323e-09, "loss": 17.2543, "step": 53717 }, { "epoch": 0.9819218747143875, "grad_norm": 6.768807362918833, "learning_rate": 8.57895446150947e-09, "loss": 17.3214, "step": 53718 }, { "epoch": 0.9819401539108341, "grad_norm": 6.004531716851894, "learning_rate": 8.56163037999569e-09, "loss": 17.1762, "step": 53719 }, { "epoch": 0.9819584331072806, "grad_norm": 6.303565709478989, "learning_rate": 8.544323793039933e-09, "loss": 17.1996, "step": 53720 }, { "epoch": 0.9819767123037271, "grad_norm": 11.330312249577286, "learning_rate": 8.527034700703263e-09, "loss": 18.3774, "step": 53721 }, { "epoch": 0.9819949915001737, "grad_norm": 6.643961328813833, "learning_rate": 8.509763103046187e-09, "loss": 17.6518, "step": 53722 }, { "epoch": 0.9820132706966201, "grad_norm": 5.0121702302711295, "learning_rate": 8.492509000129212e-09, "loss": 16.8449, "step": 53723 }, { "epoch": 0.9820315498930667, "grad_norm": 6.468777447840168, "learning_rate": 8.475272392012846e-09, "loss": 17.5103, "step": 53724 }, { "epoch": 0.9820498290895132, "grad_norm": 5.754596751010515, "learning_rate": 8.458053278757594e-09, "loss": 17.0987, "step": 53725 }, { "epoch": 0.9820681082859597, "grad_norm": 5.85035320957968, "learning_rate": 8.44085166042341e-09, "loss": 17.2221, "step": 53726 }, { "epoch": 0.9820863874824063, "grad_norm": 7.208229593214283, "learning_rate": 8.423667537071356e-09, "loss": 17.7154, "step": 53727 }, { "epoch": 0.9821046666788528, "grad_norm": 5.156165396128274, "learning_rate": 8.406500908760829e-09, "loss": 16.8028, "step": 53728 }, { "epoch": 0.9821229458752994, "grad_norm": 6.55622472824312, "learning_rate": 8.389351775552334e-09, "loss": 17.5414, "step": 53729 }, { "epoch": 0.9821412250717458, "grad_norm": 6.025937863778101, "learning_rate": 8.372220137506382e-09, "loss": 17.1426, "step": 53730 }, { "epoch": 0.9821595042681923, "grad_norm": 6.297661097281785, "learning_rate": 8.355105994682367e-09, "loss": 17.2273, "step": 53731 }, { "epoch": 0.9821777834646389, "grad_norm": 7.545471077832101, "learning_rate": 8.338009347140242e-09, "loss": 18.1199, "step": 53732 }, { "epoch": 0.9821960626610854, "grad_norm": 5.868526137226886, "learning_rate": 8.320930194941067e-09, "loss": 17.2988, "step": 53733 }, { "epoch": 0.982214341857532, "grad_norm": 4.7107557210343165, "learning_rate": 8.303868538143133e-09, "loss": 16.864, "step": 53734 }, { "epoch": 0.9822326210539785, "grad_norm": 7.7094094148771655, "learning_rate": 8.2868243768075e-09, "loss": 17.7161, "step": 53735 }, { "epoch": 0.9822509002504249, "grad_norm": 6.647607847020959, "learning_rate": 8.269797710993011e-09, "loss": 17.4247, "step": 53736 }, { "epoch": 0.9822691794468715, "grad_norm": 5.354393690653791, "learning_rate": 8.252788540759616e-09, "loss": 17.1453, "step": 53737 }, { "epoch": 0.982287458643318, "grad_norm": 7.695525267102839, "learning_rate": 8.235796866167267e-09, "loss": 17.527, "step": 53738 }, { "epoch": 0.9823057378397646, "grad_norm": 7.399510686733524, "learning_rate": 8.218822687275364e-09, "loss": 17.6461, "step": 53739 }, { "epoch": 0.9823240170362111, "grad_norm": 6.292708434611729, "learning_rate": 8.2018660041433e-09, "loss": 17.504, "step": 53740 }, { "epoch": 0.9823422962326576, "grad_norm": 7.611394767772051, "learning_rate": 8.184926816830474e-09, "loss": 17.6927, "step": 53741 }, { "epoch": 0.9823605754291042, "grad_norm": 6.408967357188552, "learning_rate": 8.168005125396284e-09, "loss": 17.197, "step": 53742 }, { "epoch": 0.9823788546255506, "grad_norm": 12.323510012688008, "learning_rate": 8.151100929900124e-09, "loss": 17.8945, "step": 53743 }, { "epoch": 0.9823971338219972, "grad_norm": 20.752118508254963, "learning_rate": 8.134214230401395e-09, "loss": 18.4063, "step": 53744 }, { "epoch": 0.9824154130184437, "grad_norm": 6.484626810175333, "learning_rate": 8.117345026958934e-09, "loss": 17.2381, "step": 53745 }, { "epoch": 0.9824336922148902, "grad_norm": 5.952997129235351, "learning_rate": 8.100493319631586e-09, "loss": 17.3031, "step": 53746 }, { "epoch": 0.9824519714113368, "grad_norm": 6.5639994906549015, "learning_rate": 8.083659108479303e-09, "loss": 17.2109, "step": 53747 }, { "epoch": 0.9824702506077833, "grad_norm": 7.559766742618542, "learning_rate": 8.06684239356037e-09, "loss": 17.7617, "step": 53748 }, { "epoch": 0.9824885298042298, "grad_norm": 6.588432756917305, "learning_rate": 8.050043174934186e-09, "loss": 17.5086, "step": 53749 }, { "epoch": 0.9825068090006763, "grad_norm": 5.711789327110004, "learning_rate": 8.033261452659036e-09, "loss": 17.3986, "step": 53750 }, { "epoch": 0.9825250881971228, "grad_norm": 6.584776994412678, "learning_rate": 8.016497226794873e-09, "loss": 17.6227, "step": 53751 }, { "epoch": 0.9825433673935694, "grad_norm": 5.410387494358798, "learning_rate": 7.999750497398873e-09, "loss": 16.9711, "step": 53752 }, { "epoch": 0.9825616465900159, "grad_norm": 4.946076873148226, "learning_rate": 7.983021264530988e-09, "loss": 16.9227, "step": 53753 }, { "epoch": 0.9825799257864625, "grad_norm": 5.438862967586154, "learning_rate": 7.966309528249504e-09, "loss": 17.2331, "step": 53754 }, { "epoch": 0.982598204982909, "grad_norm": 5.9095717551860965, "learning_rate": 7.94961528861271e-09, "loss": 17.4495, "step": 53755 }, { "epoch": 0.9826164841793554, "grad_norm": 6.728299790798949, "learning_rate": 7.932938545679447e-09, "loss": 17.7785, "step": 53756 }, { "epoch": 0.982634763375802, "grad_norm": 4.807781956019627, "learning_rate": 7.916279299508e-09, "loss": 16.6168, "step": 53757 }, { "epoch": 0.9826530425722485, "grad_norm": 6.215822888536703, "learning_rate": 7.899637550156659e-09, "loss": 17.4615, "step": 53758 }, { "epoch": 0.9826713217686951, "grad_norm": 5.841916662397862, "learning_rate": 7.883013297683705e-09, "loss": 17.3692, "step": 53759 }, { "epoch": 0.9826896009651416, "grad_norm": 7.13534293910638, "learning_rate": 7.866406542147987e-09, "loss": 17.6792, "step": 53760 }, { "epoch": 0.982707880161588, "grad_norm": 6.750225339336931, "learning_rate": 7.849817283607231e-09, "loss": 17.3941, "step": 53761 }, { "epoch": 0.9827261593580346, "grad_norm": 5.464952681597745, "learning_rate": 7.833245522119726e-09, "loss": 17.2546, "step": 53762 }, { "epoch": 0.9827444385544811, "grad_norm": 4.683356723891117, "learning_rate": 7.816691257743203e-09, "loss": 16.7409, "step": 53763 }, { "epoch": 0.9827627177509277, "grad_norm": 6.3869480804295025, "learning_rate": 7.800154490536505e-09, "loss": 17.3422, "step": 53764 }, { "epoch": 0.9827809969473742, "grad_norm": 6.673372414582423, "learning_rate": 7.783635220556806e-09, "loss": 17.3265, "step": 53765 }, { "epoch": 0.9827992761438207, "grad_norm": 4.817401980712645, "learning_rate": 7.76713344786184e-09, "loss": 16.8947, "step": 53766 }, { "epoch": 0.9828175553402673, "grad_norm": 4.6126739538966985, "learning_rate": 7.750649172510449e-09, "loss": 16.7145, "step": 53767 }, { "epoch": 0.9828358345367137, "grad_norm": 5.5528870794047736, "learning_rate": 7.734182394559809e-09, "loss": 17.0725, "step": 53768 }, { "epoch": 0.9828541137331603, "grad_norm": 5.810814252058228, "learning_rate": 7.717733114067094e-09, "loss": 17.0228, "step": 53769 }, { "epoch": 0.9828723929296068, "grad_norm": 5.764037668637109, "learning_rate": 7.701301331091149e-09, "loss": 17.0993, "step": 53770 }, { "epoch": 0.9828906721260533, "grad_norm": 6.321690351092574, "learning_rate": 7.684887045688594e-09, "loss": 17.3768, "step": 53771 }, { "epoch": 0.9829089513224999, "grad_norm": 5.078391764476495, "learning_rate": 7.66849025791716e-09, "loss": 16.8098, "step": 53772 }, { "epoch": 0.9829272305189464, "grad_norm": 5.1874027016640305, "learning_rate": 7.652110967834581e-09, "loss": 16.7346, "step": 53773 }, { "epoch": 0.982945509715393, "grad_norm": 7.435771161949646, "learning_rate": 7.635749175498031e-09, "loss": 17.9393, "step": 53774 }, { "epoch": 0.9829637889118394, "grad_norm": 8.790121052885189, "learning_rate": 7.619404880965243e-09, "loss": 18.1686, "step": 53775 }, { "epoch": 0.9829820681082859, "grad_norm": 8.803006268355942, "learning_rate": 7.603078084293392e-09, "loss": 18.4643, "step": 53776 }, { "epoch": 0.9830003473047325, "grad_norm": 6.809409681639416, "learning_rate": 7.5867687855391e-09, "loss": 17.4771, "step": 53777 }, { "epoch": 0.983018626501179, "grad_norm": 6.207774335476476, "learning_rate": 7.5704769847601e-09, "loss": 17.3425, "step": 53778 }, { "epoch": 0.9830369056976256, "grad_norm": 6.119864024798498, "learning_rate": 7.554202682013567e-09, "loss": 16.9531, "step": 53779 }, { "epoch": 0.9830551848940721, "grad_norm": 5.4283098722960865, "learning_rate": 7.537945877356123e-09, "loss": 17.0958, "step": 53780 }, { "epoch": 0.9830734640905185, "grad_norm": 6.338634796090126, "learning_rate": 7.521706570844944e-09, "loss": 17.1966, "step": 53781 }, { "epoch": 0.9830917432869651, "grad_norm": 5.722462258310717, "learning_rate": 7.505484762537207e-09, "loss": 16.9867, "step": 53782 }, { "epoch": 0.9831100224834116, "grad_norm": 6.564487390376543, "learning_rate": 7.48928045248898e-09, "loss": 17.5263, "step": 53783 }, { "epoch": 0.9831283016798582, "grad_norm": 5.978395670881727, "learning_rate": 7.47309364075799e-09, "loss": 17.1713, "step": 53784 }, { "epoch": 0.9831465808763047, "grad_norm": 6.513686478842387, "learning_rate": 7.456924327400306e-09, "loss": 17.4241, "step": 53785 }, { "epoch": 0.9831648600727512, "grad_norm": 7.252016503790416, "learning_rate": 7.440772512473104e-09, "loss": 17.4923, "step": 53786 }, { "epoch": 0.9831831392691978, "grad_norm": 6.650757760833929, "learning_rate": 7.424638196033007e-09, "loss": 17.3317, "step": 53787 }, { "epoch": 0.9832014184656442, "grad_norm": 4.779884735859896, "learning_rate": 7.4085213781355245e-09, "loss": 16.72, "step": 53788 }, { "epoch": 0.9832196976620907, "grad_norm": 4.7990289149850405, "learning_rate": 7.392422058838944e-09, "loss": 16.6874, "step": 53789 }, { "epoch": 0.9832379768585373, "grad_norm": 6.675830314841401, "learning_rate": 7.376340238197666e-09, "loss": 17.5342, "step": 53790 }, { "epoch": 0.9832562560549838, "grad_norm": 7.004283792387517, "learning_rate": 7.3602759162699764e-09, "loss": 17.6226, "step": 53791 }, { "epoch": 0.9832745352514304, "grad_norm": 6.520582949312355, "learning_rate": 7.3442290931108326e-09, "loss": 17.7021, "step": 53792 }, { "epoch": 0.9832928144478769, "grad_norm": 6.035921263272296, "learning_rate": 7.3281997687768555e-09, "loss": 17.1047, "step": 53793 }, { "epoch": 0.9833110936443233, "grad_norm": 6.002908229207575, "learning_rate": 7.312187943324111e-09, "loss": 17.3124, "step": 53794 }, { "epoch": 0.9833293728407699, "grad_norm": 6.31047703254781, "learning_rate": 7.296193616809777e-09, "loss": 17.3694, "step": 53795 }, { "epoch": 0.9833476520372164, "grad_norm": 7.070859613122842, "learning_rate": 7.280216789288253e-09, "loss": 17.8382, "step": 53796 }, { "epoch": 0.983365931233663, "grad_norm": 5.65634670366316, "learning_rate": 7.264257460816715e-09, "loss": 17.1235, "step": 53797 }, { "epoch": 0.9833842104301095, "grad_norm": 6.426701142418774, "learning_rate": 7.24831563145012e-09, "loss": 17.3958, "step": 53798 }, { "epoch": 0.983402489626556, "grad_norm": 5.815913088972162, "learning_rate": 7.232391301245645e-09, "loss": 17.1491, "step": 53799 }, { "epoch": 0.9834207688230026, "grad_norm": 8.40942481184055, "learning_rate": 7.21648447025769e-09, "loss": 18.1703, "step": 53800 }, { "epoch": 0.983439048019449, "grad_norm": 5.986668875015163, "learning_rate": 7.200595138543431e-09, "loss": 17.2932, "step": 53801 }, { "epoch": 0.9834573272158956, "grad_norm": 10.061131883830898, "learning_rate": 7.184723306157271e-09, "loss": 18.7147, "step": 53802 }, { "epoch": 0.9834756064123421, "grad_norm": 5.106201665302917, "learning_rate": 7.168868973155829e-09, "loss": 16.9577, "step": 53803 }, { "epoch": 0.9834938856087886, "grad_norm": 5.969553073400937, "learning_rate": 7.153032139594063e-09, "loss": 17.4397, "step": 53804 }, { "epoch": 0.9835121648052352, "grad_norm": 7.115026258249302, "learning_rate": 7.137212805527483e-09, "loss": 18.0406, "step": 53805 }, { "epoch": 0.9835304440016817, "grad_norm": 6.221629599106898, "learning_rate": 7.121410971011599e-09, "loss": 17.5918, "step": 53806 }, { "epoch": 0.9835487231981282, "grad_norm": 5.8246473723150025, "learning_rate": 7.105626636102481e-09, "loss": 17.0232, "step": 53807 }, { "epoch": 0.9835670023945747, "grad_norm": 5.300674729799401, "learning_rate": 7.0898598008545265e-09, "loss": 17.0493, "step": 53808 }, { "epoch": 0.9835852815910212, "grad_norm": 7.593746527987152, "learning_rate": 7.074110465323248e-09, "loss": 17.2905, "step": 53809 }, { "epoch": 0.9836035607874678, "grad_norm": 5.585248593661203, "learning_rate": 7.058378629564156e-09, "loss": 17.0367, "step": 53810 }, { "epoch": 0.9836218399839143, "grad_norm": 5.501343852214202, "learning_rate": 7.042664293632207e-09, "loss": 17.0326, "step": 53811 }, { "epoch": 0.9836401191803609, "grad_norm": 7.834496894453935, "learning_rate": 7.026967457582356e-09, "loss": 18.0352, "step": 53812 }, { "epoch": 0.9836583983768074, "grad_norm": 5.748708182072675, "learning_rate": 7.011288121469561e-09, "loss": 17.0068, "step": 53813 }, { "epoch": 0.9836766775732538, "grad_norm": 6.62057029875815, "learning_rate": 6.995626285348778e-09, "loss": 17.5067, "step": 53814 }, { "epoch": 0.9836949567697004, "grad_norm": 6.634998594804685, "learning_rate": 6.979981949275516e-09, "loss": 17.5863, "step": 53815 }, { "epoch": 0.9837132359661469, "grad_norm": 6.377783766242521, "learning_rate": 6.964355113304178e-09, "loss": 17.4207, "step": 53816 }, { "epoch": 0.9837315151625935, "grad_norm": 5.891669601313754, "learning_rate": 6.948745777489163e-09, "loss": 17.3459, "step": 53817 }, { "epoch": 0.98374979435904, "grad_norm": 5.965522973056467, "learning_rate": 6.933153941885429e-09, "loss": 17.4881, "step": 53818 }, { "epoch": 0.9837680735554865, "grad_norm": 7.314375745105363, "learning_rate": 6.917579606547931e-09, "loss": 17.3997, "step": 53819 }, { "epoch": 0.983786352751933, "grad_norm": 6.546751301288976, "learning_rate": 6.90202277153107e-09, "loss": 17.385, "step": 53820 }, { "epoch": 0.9838046319483795, "grad_norm": 7.1955536406828555, "learning_rate": 6.886483436889247e-09, "loss": 17.9213, "step": 53821 }, { "epoch": 0.9838229111448261, "grad_norm": 6.005616280804842, "learning_rate": 6.8709616026774175e-09, "loss": 17.04, "step": 53822 }, { "epoch": 0.9838411903412726, "grad_norm": 6.9174260721860055, "learning_rate": 6.855457268948873e-09, "loss": 18.0038, "step": 53823 }, { "epoch": 0.9838594695377191, "grad_norm": 6.187392188239419, "learning_rate": 6.839970435759125e-09, "loss": 17.2879, "step": 53824 }, { "epoch": 0.9838777487341657, "grad_norm": 5.174694898336956, "learning_rate": 6.8245011031620175e-09, "loss": 16.8656, "step": 53825 }, { "epoch": 0.9838960279306121, "grad_norm": 8.221891600615614, "learning_rate": 6.809049271211954e-09, "loss": 17.5145, "step": 53826 }, { "epoch": 0.9839143071270587, "grad_norm": 6.911317714834367, "learning_rate": 6.793614939962223e-09, "loss": 17.7183, "step": 53827 }, { "epoch": 0.9839325863235052, "grad_norm": 5.184335700871065, "learning_rate": 6.778198109467782e-09, "loss": 17.033, "step": 53828 }, { "epoch": 0.9839508655199517, "grad_norm": 5.6750118468856, "learning_rate": 6.762798779782475e-09, "loss": 17.0958, "step": 53829 }, { "epoch": 0.9839691447163983, "grad_norm": 6.67649092913152, "learning_rate": 6.7474169509607055e-09, "loss": 17.5106, "step": 53830 }, { "epoch": 0.9839874239128448, "grad_norm": 5.830837885151293, "learning_rate": 6.7320526230557626e-09, "loss": 17.1991, "step": 53831 }, { "epoch": 0.9840057031092914, "grad_norm": 6.521812486316013, "learning_rate": 6.716705796121492e-09, "loss": 17.0566, "step": 53832 }, { "epoch": 0.9840239823057378, "grad_norm": 6.346827684248571, "learning_rate": 6.70137647021174e-09, "loss": 17.5812, "step": 53833 }, { "epoch": 0.9840422615021843, "grad_norm": 7.3881308592148605, "learning_rate": 6.686064645380908e-09, "loss": 17.9879, "step": 53834 }, { "epoch": 0.9840605406986309, "grad_norm": 6.304968685891093, "learning_rate": 6.67077032168173e-09, "loss": 17.2601, "step": 53835 }, { "epoch": 0.9840788198950774, "grad_norm": 5.996061889113152, "learning_rate": 6.655493499168053e-09, "loss": 17.1159, "step": 53836 }, { "epoch": 0.984097099091524, "grad_norm": 6.534324926937102, "learning_rate": 6.640234177893723e-09, "loss": 17.2387, "step": 53837 }, { "epoch": 0.9841153782879705, "grad_norm": 5.265472720555835, "learning_rate": 6.62499235791203e-09, "loss": 16.8681, "step": 53838 }, { "epoch": 0.984133657484417, "grad_norm": 5.289099164619087, "learning_rate": 6.609768039276265e-09, "loss": 16.9257, "step": 53839 }, { "epoch": 0.9841519366808635, "grad_norm": 7.225768940216284, "learning_rate": 6.594561222040274e-09, "loss": 17.662, "step": 53840 }, { "epoch": 0.98417021587731, "grad_norm": 5.238141331886376, "learning_rate": 6.579371906257348e-09, "loss": 17.0137, "step": 53841 }, { "epoch": 0.9841884950737566, "grad_norm": 5.709868431067097, "learning_rate": 6.564200091979667e-09, "loss": 17.114, "step": 53842 }, { "epoch": 0.9842067742702031, "grad_norm": 7.221587596939282, "learning_rate": 6.549045779261631e-09, "loss": 17.7205, "step": 53843 }, { "epoch": 0.9842250534666496, "grad_norm": 5.281541723108116, "learning_rate": 6.5339089681559774e-09, "loss": 17.018, "step": 53844 }, { "epoch": 0.9842433326630962, "grad_norm": 5.502544959105241, "learning_rate": 6.518789658715441e-09, "loss": 17.1242, "step": 53845 }, { "epoch": 0.9842616118595426, "grad_norm": 5.361986824506333, "learning_rate": 6.503687850992757e-09, "loss": 17.1052, "step": 53846 }, { "epoch": 0.9842798910559892, "grad_norm": 5.930081166965968, "learning_rate": 6.4886035450417715e-09, "loss": 17.1495, "step": 53847 }, { "epoch": 0.9842981702524357, "grad_norm": 6.98444706461924, "learning_rate": 6.47353674091522e-09, "loss": 17.2755, "step": 53848 }, { "epoch": 0.9843164494488822, "grad_norm": 7.011136608407094, "learning_rate": 6.458487438665284e-09, "loss": 17.2699, "step": 53849 }, { "epoch": 0.9843347286453288, "grad_norm": 7.421996968730938, "learning_rate": 6.4434556383452525e-09, "loss": 17.8486, "step": 53850 }, { "epoch": 0.9843530078417753, "grad_norm": 6.207779614863489, "learning_rate": 6.428441340007307e-09, "loss": 17.0375, "step": 53851 }, { "epoch": 0.9843712870382219, "grad_norm": 5.296311619617896, "learning_rate": 6.413444543704184e-09, "loss": 17.1236, "step": 53852 }, { "epoch": 0.9843895662346683, "grad_norm": 7.1819445558049715, "learning_rate": 6.398465249489172e-09, "loss": 17.5565, "step": 53853 }, { "epoch": 0.9844078454311148, "grad_norm": 6.9696790039006355, "learning_rate": 6.383503457413897e-09, "loss": 17.4531, "step": 53854 }, { "epoch": 0.9844261246275614, "grad_norm": 5.528051862688487, "learning_rate": 6.368559167531096e-09, "loss": 16.9842, "step": 53855 }, { "epoch": 0.9844444038240079, "grad_norm": 5.996265136714266, "learning_rate": 6.353632379892949e-09, "loss": 17.0994, "step": 53856 }, { "epoch": 0.9844626830204544, "grad_norm": 6.161937967628218, "learning_rate": 6.33872309455219e-09, "loss": 17.3938, "step": 53857 }, { "epoch": 0.984480962216901, "grad_norm": 6.180716413241125, "learning_rate": 6.323831311561001e-09, "loss": 17.0858, "step": 53858 }, { "epoch": 0.9844992414133474, "grad_norm": 6.471774935700605, "learning_rate": 6.308957030971563e-09, "loss": 17.444, "step": 53859 }, { "epoch": 0.984517520609794, "grad_norm": 5.317932542554725, "learning_rate": 6.294100252836055e-09, "loss": 17.1962, "step": 53860 }, { "epoch": 0.9845357998062405, "grad_norm": 7.603795110571739, "learning_rate": 6.279260977206103e-09, "loss": 17.4427, "step": 53861 }, { "epoch": 0.984554079002687, "grad_norm": 6.6322011588519425, "learning_rate": 6.264439204133887e-09, "loss": 17.5242, "step": 53862 }, { "epoch": 0.9845723581991336, "grad_norm": 6.282572006074761, "learning_rate": 6.249634933671034e-09, "loss": 17.2563, "step": 53863 }, { "epoch": 0.9845906373955801, "grad_norm": 7.219268258380728, "learning_rate": 6.234848165870833e-09, "loss": 17.5496, "step": 53864 }, { "epoch": 0.9846089165920267, "grad_norm": 6.210196052806949, "learning_rate": 6.220078900783799e-09, "loss": 17.1006, "step": 53865 }, { "epoch": 0.9846271957884731, "grad_norm": 8.328546675708722, "learning_rate": 6.205327138461558e-09, "loss": 18.3254, "step": 53866 }, { "epoch": 0.9846454749849196, "grad_norm": 6.591811908644616, "learning_rate": 6.190592878956847e-09, "loss": 17.3846, "step": 53867 }, { "epoch": 0.9846637541813662, "grad_norm": 6.117801196450474, "learning_rate": 6.175876122320734e-09, "loss": 17.1819, "step": 53868 }, { "epoch": 0.9846820333778127, "grad_norm": 5.828942038113949, "learning_rate": 6.161176868604845e-09, "loss": 17.346, "step": 53869 }, { "epoch": 0.9847003125742593, "grad_norm": 6.151246084823151, "learning_rate": 6.146495117860251e-09, "loss": 17.2663, "step": 53870 }, { "epoch": 0.9847185917707058, "grad_norm": 6.478426388119529, "learning_rate": 6.131830870139133e-09, "loss": 17.0106, "step": 53871 }, { "epoch": 0.9847368709671522, "grad_norm": 6.472085539198639, "learning_rate": 6.1171841254925594e-09, "loss": 17.6613, "step": 53872 }, { "epoch": 0.9847551501635988, "grad_norm": 4.8497061646031865, "learning_rate": 6.102554883972156e-09, "loss": 16.9092, "step": 53873 }, { "epoch": 0.9847734293600453, "grad_norm": 5.285833539065274, "learning_rate": 6.087943145628994e-09, "loss": 16.8972, "step": 53874 }, { "epoch": 0.9847917085564919, "grad_norm": 7.383254862308672, "learning_rate": 6.0733489105135876e-09, "loss": 17.6622, "step": 53875 }, { "epoch": 0.9848099877529384, "grad_norm": 5.663043571922799, "learning_rate": 6.058772178678119e-09, "loss": 17.1907, "step": 53876 }, { "epoch": 0.9848282669493849, "grad_norm": 6.408680908534596, "learning_rate": 6.044212950173656e-09, "loss": 17.1642, "step": 53877 }, { "epoch": 0.9848465461458314, "grad_norm": 5.404106563678953, "learning_rate": 6.029671225050715e-09, "loss": 16.9562, "step": 53878 }, { "epoch": 0.9848648253422779, "grad_norm": 5.816598910337017, "learning_rate": 6.015147003359812e-09, "loss": 17.1363, "step": 53879 }, { "epoch": 0.9848831045387245, "grad_norm": 6.427556683500858, "learning_rate": 6.000640285153125e-09, "loss": 17.4006, "step": 53880 }, { "epoch": 0.984901383735171, "grad_norm": 6.452636643698783, "learning_rate": 5.986151070480617e-09, "loss": 17.1763, "step": 53881 }, { "epoch": 0.9849196629316175, "grad_norm": 5.8971741181780795, "learning_rate": 5.971679359393356e-09, "loss": 17.1347, "step": 53882 }, { "epoch": 0.9849379421280641, "grad_norm": 4.914512903681115, "learning_rate": 5.957225151941859e-09, "loss": 16.9317, "step": 53883 }, { "epoch": 0.9849562213245106, "grad_norm": 6.222784821667851, "learning_rate": 5.9427884481771944e-09, "loss": 17.3607, "step": 53884 }, { "epoch": 0.9849745005209571, "grad_norm": 6.241908788161643, "learning_rate": 5.928369248149324e-09, "loss": 16.9761, "step": 53885 }, { "epoch": 0.9849927797174036, "grad_norm": 6.243466173109457, "learning_rate": 5.913967551909316e-09, "loss": 17.3045, "step": 53886 }, { "epoch": 0.9850110589138501, "grad_norm": 5.720470213814956, "learning_rate": 5.899583359507688e-09, "loss": 17.1374, "step": 53887 }, { "epoch": 0.9850293381102967, "grad_norm": 6.208804272391317, "learning_rate": 5.885216670994398e-09, "loss": 17.2533, "step": 53888 }, { "epoch": 0.9850476173067432, "grad_norm": 6.4196372976137885, "learning_rate": 5.8708674864199625e-09, "loss": 17.3238, "step": 53889 }, { "epoch": 0.9850658965031898, "grad_norm": 5.703013814739677, "learning_rate": 5.856535805834896e-09, "loss": 17.0289, "step": 53890 }, { "epoch": 0.9850841756996362, "grad_norm": 8.588653091165646, "learning_rate": 5.8422216292891575e-09, "loss": 16.9702, "step": 53891 }, { "epoch": 0.9851024548960827, "grad_norm": 5.043648094921394, "learning_rate": 5.827924956833264e-09, "loss": 16.8512, "step": 53892 }, { "epoch": 0.9851207340925293, "grad_norm": 6.61377483260885, "learning_rate": 5.813645788517175e-09, "loss": 17.521, "step": 53893 }, { "epoch": 0.9851390132889758, "grad_norm": 8.21240855240697, "learning_rate": 5.79938412439085e-09, "loss": 18.0233, "step": 53894 }, { "epoch": 0.9851572924854224, "grad_norm": 4.839803187344161, "learning_rate": 5.785139964504249e-09, "loss": 16.8868, "step": 53895 }, { "epoch": 0.9851755716818689, "grad_norm": 5.645014035581526, "learning_rate": 5.770913308907334e-09, "loss": 17.1779, "step": 53896 }, { "epoch": 0.9851938508783153, "grad_norm": 4.824218521730195, "learning_rate": 5.7567041576500615e-09, "loss": 16.7979, "step": 53897 }, { "epoch": 0.9852121300747619, "grad_norm": 6.127371106797111, "learning_rate": 5.742512510781839e-09, "loss": 17.1694, "step": 53898 }, { "epoch": 0.9852304092712084, "grad_norm": 6.853873365287214, "learning_rate": 5.728338368353181e-09, "loss": 17.1632, "step": 53899 }, { "epoch": 0.985248688467655, "grad_norm": 7.637840428636715, "learning_rate": 5.714181730412938e-09, "loss": 17.7315, "step": 53900 }, { "epoch": 0.9852669676641015, "grad_norm": 4.993969704081994, "learning_rate": 5.700042597011623e-09, "loss": 17.0134, "step": 53901 }, { "epoch": 0.985285246860548, "grad_norm": 6.994670148006619, "learning_rate": 5.6859209681980885e-09, "loss": 17.3152, "step": 53902 }, { "epoch": 0.9853035260569946, "grad_norm": 6.809691579255576, "learning_rate": 5.671816844021738e-09, "loss": 17.527, "step": 53903 }, { "epoch": 0.985321805253441, "grad_norm": 6.143089377065693, "learning_rate": 5.657730224532532e-09, "loss": 17.3652, "step": 53904 }, { "epoch": 0.9853400844498876, "grad_norm": 5.551462008264137, "learning_rate": 5.643661109779319e-09, "loss": 17.1183, "step": 53905 }, { "epoch": 0.9853583636463341, "grad_norm": 6.255223916671146, "learning_rate": 5.629609499812061e-09, "loss": 17.0858, "step": 53906 }, { "epoch": 0.9853766428427806, "grad_norm": 7.27753775424096, "learning_rate": 5.6155753946790515e-09, "loss": 17.645, "step": 53907 }, { "epoch": 0.9853949220392272, "grad_norm": 4.928703893313198, "learning_rate": 5.601558794430251e-09, "loss": 16.9488, "step": 53908 }, { "epoch": 0.9854132012356737, "grad_norm": 7.777161274169199, "learning_rate": 5.5875596991150636e-09, "loss": 17.4389, "step": 53909 }, { "epoch": 0.9854314804321203, "grad_norm": 6.696305576581211, "learning_rate": 5.573578108781786e-09, "loss": 17.7241, "step": 53910 }, { "epoch": 0.9854497596285667, "grad_norm": 6.86555248882753, "learning_rate": 5.55961402347982e-09, "loss": 17.4892, "step": 53911 }, { "epoch": 0.9854680388250132, "grad_norm": 5.714421533922147, "learning_rate": 5.5456674432574634e-09, "loss": 17.0276, "step": 53912 }, { "epoch": 0.9854863180214598, "grad_norm": 9.562264304991235, "learning_rate": 5.531738368164674e-09, "loss": 18.3081, "step": 53913 }, { "epoch": 0.9855045972179063, "grad_norm": 7.032432160893347, "learning_rate": 5.517826798249748e-09, "loss": 17.5893, "step": 53914 }, { "epoch": 0.9855228764143529, "grad_norm": 5.673852429732833, "learning_rate": 5.503932733561534e-09, "loss": 17.1469, "step": 53915 }, { "epoch": 0.9855411556107994, "grad_norm": 9.80690077762439, "learning_rate": 5.490056174148328e-09, "loss": 17.6436, "step": 53916 }, { "epoch": 0.9855594348072458, "grad_norm": 8.327504036546893, "learning_rate": 5.476197120058979e-09, "loss": 18.4373, "step": 53917 }, { "epoch": 0.9855777140036924, "grad_norm": 7.175512684134315, "learning_rate": 5.462355571342337e-09, "loss": 17.3189, "step": 53918 }, { "epoch": 0.9855959932001389, "grad_norm": 6.378564419020709, "learning_rate": 5.448531528046697e-09, "loss": 17.7064, "step": 53919 }, { "epoch": 0.9856142723965855, "grad_norm": 7.316131059481687, "learning_rate": 5.434724990220907e-09, "loss": 17.758, "step": 53920 }, { "epoch": 0.985632551593032, "grad_norm": 5.936761563068709, "learning_rate": 5.420935957912709e-09, "loss": 17.2835, "step": 53921 }, { "epoch": 0.9856508307894785, "grad_norm": 7.107099484396244, "learning_rate": 5.4071644311703976e-09, "loss": 17.8338, "step": 53922 }, { "epoch": 0.985669109985925, "grad_norm": 6.533578857027473, "learning_rate": 5.3934104100428205e-09, "loss": 17.2144, "step": 53923 }, { "epoch": 0.9856873891823715, "grad_norm": 6.946604441149272, "learning_rate": 5.379673894578275e-09, "loss": 17.4152, "step": 53924 }, { "epoch": 0.985705668378818, "grad_norm": 5.775066205733778, "learning_rate": 5.365954884823943e-09, "loss": 17.3008, "step": 53925 }, { "epoch": 0.9857239475752646, "grad_norm": 5.9018422483528585, "learning_rate": 5.352253380829231e-09, "loss": 17.0594, "step": 53926 }, { "epoch": 0.9857422267717111, "grad_norm": 6.005203803670478, "learning_rate": 5.338569382640768e-09, "loss": 17.3249, "step": 53927 }, { "epoch": 0.9857605059681577, "grad_norm": 5.729215549754412, "learning_rate": 5.324902890307404e-09, "loss": 17.2556, "step": 53928 }, { "epoch": 0.9857787851646042, "grad_norm": 6.022807315038068, "learning_rate": 5.311253903876878e-09, "loss": 17.169, "step": 53929 }, { "epoch": 0.9857970643610506, "grad_norm": 8.383114825496245, "learning_rate": 5.297622423397486e-09, "loss": 17.824, "step": 53930 }, { "epoch": 0.9858153435574972, "grad_norm": 7.20750867627835, "learning_rate": 5.284008448915856e-09, "loss": 17.3685, "step": 53931 }, { "epoch": 0.9858336227539437, "grad_norm": 7.012540932448688, "learning_rate": 5.270411980480284e-09, "loss": 17.3283, "step": 53932 }, { "epoch": 0.9858519019503903, "grad_norm": 6.269524458715926, "learning_rate": 5.2568330181390626e-09, "loss": 17.2631, "step": 53933 }, { "epoch": 0.9858701811468368, "grad_norm": 5.791855919593184, "learning_rate": 5.2432715619388234e-09, "loss": 17.1837, "step": 53934 }, { "epoch": 0.9858884603432833, "grad_norm": 5.958162338921864, "learning_rate": 5.22972761192786e-09, "loss": 17.4254, "step": 53935 }, { "epoch": 0.9859067395397298, "grad_norm": 5.764846235842292, "learning_rate": 5.216201168152801e-09, "loss": 17.2772, "step": 53936 }, { "epoch": 0.9859250187361763, "grad_norm": 6.9460826504839135, "learning_rate": 5.202692230661943e-09, "loss": 17.4948, "step": 53937 }, { "epoch": 0.9859432979326229, "grad_norm": 6.192679505589665, "learning_rate": 5.1892007995019145e-09, "loss": 17.3029, "step": 53938 }, { "epoch": 0.9859615771290694, "grad_norm": 5.991385806818174, "learning_rate": 5.175726874720455e-09, "loss": 17.2252, "step": 53939 }, { "epoch": 0.9859798563255159, "grad_norm": 5.090542248690033, "learning_rate": 5.162270456364748e-09, "loss": 17.0542, "step": 53940 }, { "epoch": 0.9859981355219625, "grad_norm": 7.6578335833462265, "learning_rate": 5.148831544481425e-09, "loss": 17.83, "step": 53941 }, { "epoch": 0.986016414718409, "grad_norm": 6.650745334797536, "learning_rate": 5.135410139118224e-09, "loss": 17.4629, "step": 53942 }, { "epoch": 0.9860346939148555, "grad_norm": 7.341642176894061, "learning_rate": 5.12200624032233e-09, "loss": 17.7389, "step": 53943 }, { "epoch": 0.986052973111302, "grad_norm": 7.500120627993541, "learning_rate": 5.108619848139817e-09, "loss": 17.6975, "step": 53944 }, { "epoch": 0.9860712523077485, "grad_norm": 7.290928593402507, "learning_rate": 5.095250962618425e-09, "loss": 17.6012, "step": 53945 }, { "epoch": 0.9860895315041951, "grad_norm": 5.909759951505512, "learning_rate": 5.081899583804784e-09, "loss": 17.0945, "step": 53946 }, { "epoch": 0.9861078107006416, "grad_norm": 5.8123596560425215, "learning_rate": 5.068565711745521e-09, "loss": 16.9363, "step": 53947 }, { "epoch": 0.9861260898970882, "grad_norm": 5.6376549375574285, "learning_rate": 5.055249346487823e-09, "loss": 17.1685, "step": 53948 }, { "epoch": 0.9861443690935346, "grad_norm": 5.842235473147648, "learning_rate": 5.041950488077763e-09, "loss": 16.9071, "step": 53949 }, { "epoch": 0.9861626482899811, "grad_norm": 7.139348530685611, "learning_rate": 5.02866913656197e-09, "loss": 17.7799, "step": 53950 }, { "epoch": 0.9861809274864277, "grad_norm": 6.346857755325512, "learning_rate": 5.01540529198763e-09, "loss": 17.3606, "step": 53951 }, { "epoch": 0.9861992066828742, "grad_norm": 9.392990703775189, "learning_rate": 5.002158954400815e-09, "loss": 18.3525, "step": 53952 }, { "epoch": 0.9862174858793208, "grad_norm": 5.430462043863153, "learning_rate": 4.9889301238476014e-09, "loss": 16.9445, "step": 53953 }, { "epoch": 0.9862357650757673, "grad_norm": 6.953339182056035, "learning_rate": 4.9757188003751734e-09, "loss": 17.6991, "step": 53954 }, { "epoch": 0.9862540442722137, "grad_norm": 5.339963060286304, "learning_rate": 4.962524984029049e-09, "loss": 17.0717, "step": 53955 }, { "epoch": 0.9862723234686603, "grad_norm": 5.421705202150493, "learning_rate": 4.949348674856413e-09, "loss": 16.9233, "step": 53956 }, { "epoch": 0.9862906026651068, "grad_norm": 4.898463249160091, "learning_rate": 4.93618987290223e-09, "loss": 16.9362, "step": 53957 }, { "epoch": 0.9863088818615534, "grad_norm": 4.673375226130801, "learning_rate": 4.923048578213685e-09, "loss": 16.7625, "step": 53958 }, { "epoch": 0.9863271610579999, "grad_norm": 7.696412806629046, "learning_rate": 4.909924790836295e-09, "loss": 17.5474, "step": 53959 }, { "epoch": 0.9863454402544464, "grad_norm": 6.23364673689331, "learning_rate": 4.896818510816137e-09, "loss": 16.9902, "step": 53960 }, { "epoch": 0.986363719450893, "grad_norm": 7.00457402560577, "learning_rate": 4.883729738199283e-09, "loss": 17.2521, "step": 53961 }, { "epoch": 0.9863819986473394, "grad_norm": 9.138299346584004, "learning_rate": 4.870658473031253e-09, "loss": 17.9891, "step": 53962 }, { "epoch": 0.986400277843786, "grad_norm": 7.006171904254124, "learning_rate": 4.857604715358122e-09, "loss": 17.2359, "step": 53963 }, { "epoch": 0.9864185570402325, "grad_norm": 5.940957095360634, "learning_rate": 4.844568465225963e-09, "loss": 17.3896, "step": 53964 }, { "epoch": 0.986436836236679, "grad_norm": 6.406777615721336, "learning_rate": 4.83154972267974e-09, "loss": 17.5375, "step": 53965 }, { "epoch": 0.9864551154331256, "grad_norm": 7.781614553611958, "learning_rate": 4.818548487765529e-09, "loss": 17.5698, "step": 53966 }, { "epoch": 0.9864733946295721, "grad_norm": 5.371160160051401, "learning_rate": 4.8055647605288466e-09, "loss": 16.9789, "step": 53967 }, { "epoch": 0.9864916738260187, "grad_norm": 6.796248919078832, "learning_rate": 4.792598541015214e-09, "loss": 17.4843, "step": 53968 }, { "epoch": 0.9865099530224651, "grad_norm": 5.583966956369211, "learning_rate": 4.77964982927015e-09, "loss": 17.2295, "step": 53969 }, { "epoch": 0.9865282322189116, "grad_norm": 5.237579244473497, "learning_rate": 4.766718625339173e-09, "loss": 17.0332, "step": 53970 }, { "epoch": 0.9865465114153582, "grad_norm": 6.01872247884823, "learning_rate": 4.753804929267247e-09, "loss": 17.3838, "step": 53971 }, { "epoch": 0.9865647906118047, "grad_norm": 5.5321563079439535, "learning_rate": 4.740908741099337e-09, "loss": 16.993, "step": 53972 }, { "epoch": 0.9865830698082513, "grad_norm": 7.055555024050097, "learning_rate": 4.728030060881517e-09, "loss": 17.6143, "step": 53973 }, { "epoch": 0.9866013490046978, "grad_norm": 5.690513422042387, "learning_rate": 4.7151688886581945e-09, "loss": 16.9514, "step": 53974 }, { "epoch": 0.9866196282011442, "grad_norm": 6.622151505262531, "learning_rate": 4.70232522447489e-09, "loss": 17.4455, "step": 53975 }, { "epoch": 0.9866379073975908, "grad_norm": 5.768746064232918, "learning_rate": 4.6894990683765685e-09, "loss": 17.1625, "step": 53976 }, { "epoch": 0.9866561865940373, "grad_norm": 5.5150122644639685, "learning_rate": 4.676690420408192e-09, "loss": 17.0542, "step": 53977 }, { "epoch": 0.9866744657904839, "grad_norm": 6.708506762351218, "learning_rate": 4.66389928061417e-09, "loss": 17.5827, "step": 53978 }, { "epoch": 0.9866927449869304, "grad_norm": 5.689563358750597, "learning_rate": 4.651125649040022e-09, "loss": 16.8896, "step": 53979 }, { "epoch": 0.9867110241833769, "grad_norm": 4.299369942512752, "learning_rate": 4.638369525730157e-09, "loss": 16.574, "step": 53980 }, { "epoch": 0.9867293033798235, "grad_norm": 6.169601565265982, "learning_rate": 4.625630910728984e-09, "loss": 17.0837, "step": 53981 }, { "epoch": 0.9867475825762699, "grad_norm": 8.354192079697421, "learning_rate": 4.612909804082022e-09, "loss": 17.6602, "step": 53982 }, { "epoch": 0.9867658617727165, "grad_norm": 5.676490886865904, "learning_rate": 4.600206205832569e-09, "loss": 16.9065, "step": 53983 }, { "epoch": 0.986784140969163, "grad_norm": 5.8430471531216135, "learning_rate": 4.5875201160267e-09, "loss": 17.0924, "step": 53984 }, { "epoch": 0.9868024201656095, "grad_norm": 6.369936131699505, "learning_rate": 4.574851534707714e-09, "loss": 17.2592, "step": 53985 }, { "epoch": 0.9868206993620561, "grad_norm": 5.3522635556607, "learning_rate": 4.562200461920574e-09, "loss": 16.8616, "step": 53986 }, { "epoch": 0.9868389785585026, "grad_norm": 5.695137921815875, "learning_rate": 4.549566897709134e-09, "loss": 17.0537, "step": 53987 }, { "epoch": 0.9868572577549491, "grad_norm": 8.183388636482958, "learning_rate": 4.53695084211836e-09, "loss": 18.0894, "step": 53988 }, { "epoch": 0.9868755369513956, "grad_norm": 4.96414269842186, "learning_rate": 4.524352295191547e-09, "loss": 16.6966, "step": 53989 }, { "epoch": 0.9868938161478421, "grad_norm": 4.868364072411855, "learning_rate": 4.511771256974218e-09, "loss": 16.8185, "step": 53990 }, { "epoch": 0.9869120953442887, "grad_norm": 5.167880256444845, "learning_rate": 4.499207727509114e-09, "loss": 17.0226, "step": 53991 }, { "epoch": 0.9869303745407352, "grad_norm": 6.111461330922473, "learning_rate": 4.486661706840645e-09, "loss": 17.2723, "step": 53992 }, { "epoch": 0.9869486537371817, "grad_norm": 5.334774027060431, "learning_rate": 4.47413319501322e-09, "loss": 17.0529, "step": 53993 }, { "epoch": 0.9869669329336282, "grad_norm": 5.168464761998433, "learning_rate": 4.461622192070136e-09, "loss": 16.8759, "step": 53994 }, { "epoch": 0.9869852121300747, "grad_norm": 6.720667404445755, "learning_rate": 4.449128698055805e-09, "loss": 17.221, "step": 53995 }, { "epoch": 0.9870034913265213, "grad_norm": 6.748106892456099, "learning_rate": 4.436652713014078e-09, "loss": 17.748, "step": 53996 }, { "epoch": 0.9870217705229678, "grad_norm": 6.033601414909475, "learning_rate": 4.424194236988255e-09, "loss": 17.3178, "step": 53997 }, { "epoch": 0.9870400497194143, "grad_norm": 5.5821674806841015, "learning_rate": 4.4117532700216346e-09, "loss": 17.1972, "step": 53998 }, { "epoch": 0.9870583289158609, "grad_norm": 7.656942627209422, "learning_rate": 4.3993298121586255e-09, "loss": 17.3509, "step": 53999 }, { "epoch": 0.9870766081123074, "grad_norm": 6.269928233196127, "learning_rate": 4.386923863442527e-09, "loss": 17.4375, "step": 54000 }, { "epoch": 0.9870948873087539, "grad_norm": 6.1703517981826295, "learning_rate": 4.374535423916637e-09, "loss": 17.0413, "step": 54001 }, { "epoch": 0.9871131665052004, "grad_norm": 7.138529574652607, "learning_rate": 4.36216449362481e-09, "loss": 17.4265, "step": 54002 }, { "epoch": 0.9871314457016469, "grad_norm": 5.806580314636955, "learning_rate": 4.349811072609789e-09, "loss": 17.3057, "step": 54003 }, { "epoch": 0.9871497248980935, "grad_norm": 6.516724953252693, "learning_rate": 4.337475160915428e-09, "loss": 17.489, "step": 54004 }, { "epoch": 0.98716800409454, "grad_norm": 5.304641620257098, "learning_rate": 4.32515675858447e-09, "loss": 16.8517, "step": 54005 }, { "epoch": 0.9871862832909866, "grad_norm": 5.760700556286757, "learning_rate": 4.31285586566077e-09, "loss": 17.0281, "step": 54006 }, { "epoch": 0.987204562487433, "grad_norm": 5.670604732317518, "learning_rate": 4.300572482186516e-09, "loss": 17.247, "step": 54007 }, { "epoch": 0.9872228416838795, "grad_norm": 6.5259598593223185, "learning_rate": 4.2883066082055616e-09, "loss": 17.4875, "step": 54008 }, { "epoch": 0.9872411208803261, "grad_norm": 4.81326093717225, "learning_rate": 4.276058243760095e-09, "loss": 16.8051, "step": 54009 }, { "epoch": 0.9872594000767726, "grad_norm": 6.648580779709109, "learning_rate": 4.263827388893971e-09, "loss": 17.6267, "step": 54010 }, { "epoch": 0.9872776792732192, "grad_norm": 6.8216582930025105, "learning_rate": 4.251614043649377e-09, "loss": 17.2926, "step": 54011 }, { "epoch": 0.9872959584696657, "grad_norm": 5.76405437916282, "learning_rate": 4.239418208069613e-09, "loss": 16.9771, "step": 54012 }, { "epoch": 0.9873142376661121, "grad_norm": 7.323268987938478, "learning_rate": 4.2272398821968654e-09, "loss": 17.9022, "step": 54013 }, { "epoch": 0.9873325168625587, "grad_norm": 6.526634927163913, "learning_rate": 4.215079066074434e-09, "loss": 17.3589, "step": 54014 }, { "epoch": 0.9873507960590052, "grad_norm": 5.321051116935017, "learning_rate": 4.202935759744508e-09, "loss": 16.9276, "step": 54015 }, { "epoch": 0.9873690752554518, "grad_norm": 6.942330199819894, "learning_rate": 4.190809963249831e-09, "loss": 17.6499, "step": 54016 }, { "epoch": 0.9873873544518983, "grad_norm": 5.848780471993034, "learning_rate": 4.178701676633146e-09, "loss": 17.345, "step": 54017 }, { "epoch": 0.9874056336483448, "grad_norm": 6.077006774368794, "learning_rate": 4.166610899936086e-09, "loss": 17.2209, "step": 54018 }, { "epoch": 0.9874239128447914, "grad_norm": 5.841780395005369, "learning_rate": 4.154537633201394e-09, "loss": 17.1482, "step": 54019 }, { "epoch": 0.9874421920412378, "grad_norm": 5.650984866928628, "learning_rate": 4.142481876471816e-09, "loss": 17.3848, "step": 54020 }, { "epoch": 0.9874604712376844, "grad_norm": 6.992031319899651, "learning_rate": 4.130443629789538e-09, "loss": 17.4202, "step": 54021 }, { "epoch": 0.9874787504341309, "grad_norm": 7.173681664775021, "learning_rate": 4.118422893196195e-09, "loss": 17.9806, "step": 54022 }, { "epoch": 0.9874970296305774, "grad_norm": 5.618474304459318, "learning_rate": 4.106419666734529e-09, "loss": 16.9777, "step": 54023 }, { "epoch": 0.987515308827024, "grad_norm": 5.955411849520248, "learning_rate": 4.094433950446175e-09, "loss": 17.2688, "step": 54024 }, { "epoch": 0.9875335880234705, "grad_norm": 6.264766391669079, "learning_rate": 4.082465744373321e-09, "loss": 17.6472, "step": 54025 }, { "epoch": 0.9875518672199171, "grad_norm": 6.555206397133702, "learning_rate": 4.0705150485581545e-09, "loss": 17.6523, "step": 54026 }, { "epoch": 0.9875701464163635, "grad_norm": 6.450607561206467, "learning_rate": 4.058581863041755e-09, "loss": 17.2941, "step": 54027 }, { "epoch": 0.98758842561281, "grad_norm": 5.873198886148908, "learning_rate": 4.046666187866865e-09, "loss": 17.1476, "step": 54028 }, { "epoch": 0.9876067048092566, "grad_norm": 6.434060270946836, "learning_rate": 4.03476802307512e-09, "loss": 17.0829, "step": 54029 }, { "epoch": 0.9876249840057031, "grad_norm": 4.833895383524956, "learning_rate": 4.022887368707596e-09, "loss": 16.8946, "step": 54030 }, { "epoch": 0.9876432632021497, "grad_norm": 7.604935714495955, "learning_rate": 4.011024224806481e-09, "loss": 17.7019, "step": 54031 }, { "epoch": 0.9876615423985962, "grad_norm": 5.555644662131046, "learning_rate": 3.999178591413411e-09, "loss": 17.1066, "step": 54032 }, { "epoch": 0.9876798215950426, "grad_norm": 5.893480887902076, "learning_rate": 3.9873504685694616e-09, "loss": 17.2548, "step": 54033 }, { "epoch": 0.9876981007914892, "grad_norm": 5.808444866930729, "learning_rate": 3.9755398563157135e-09, "loss": 16.9458, "step": 54034 }, { "epoch": 0.9877163799879357, "grad_norm": 8.300076733406407, "learning_rate": 3.963746754694908e-09, "loss": 17.4664, "step": 54035 }, { "epoch": 0.9877346591843823, "grad_norm": 6.100968907741287, "learning_rate": 3.95197116374757e-09, "loss": 17.0966, "step": 54036 }, { "epoch": 0.9877529383808288, "grad_norm": 6.441928751519208, "learning_rate": 3.940213083514777e-09, "loss": 17.269, "step": 54037 }, { "epoch": 0.9877712175772753, "grad_norm": 7.078710080765474, "learning_rate": 3.928472514038162e-09, "loss": 17.879, "step": 54038 }, { "epoch": 0.9877894967737219, "grad_norm": 5.900377130220539, "learning_rate": 3.9167494553588035e-09, "loss": 17.1417, "step": 54039 }, { "epoch": 0.9878077759701683, "grad_norm": 6.180110221542711, "learning_rate": 3.9050439075172255e-09, "loss": 17.1697, "step": 54040 }, { "epoch": 0.9878260551666149, "grad_norm": 6.148395297774611, "learning_rate": 3.893355870555615e-09, "loss": 17.2905, "step": 54041 }, { "epoch": 0.9878443343630614, "grad_norm": 5.6945738565644755, "learning_rate": 3.8816853445139415e-09, "loss": 17.2451, "step": 54042 }, { "epoch": 0.9878626135595079, "grad_norm": 6.313953538460906, "learning_rate": 3.870032329433282e-09, "loss": 17.4774, "step": 54043 }, { "epoch": 0.9878808927559545, "grad_norm": 6.984254407439393, "learning_rate": 3.858396825354716e-09, "loss": 17.6695, "step": 54044 }, { "epoch": 0.987899171952401, "grad_norm": 5.22237579517995, "learning_rate": 3.846778832318765e-09, "loss": 17.0425, "step": 54045 }, { "epoch": 0.9879174511488475, "grad_norm": 6.356013289897043, "learning_rate": 3.835178350366509e-09, "loss": 17.5961, "step": 54046 }, { "epoch": 0.987935730345294, "grad_norm": 10.17061771887176, "learning_rate": 3.823595379538469e-09, "loss": 17.6708, "step": 54047 }, { "epoch": 0.9879540095417405, "grad_norm": 8.090417063298029, "learning_rate": 3.812029919875171e-09, "loss": 18.3068, "step": 54048 }, { "epoch": 0.9879722887381871, "grad_norm": 6.187797700226874, "learning_rate": 3.800481971417136e-09, "loss": 17.2827, "step": 54049 }, { "epoch": 0.9879905679346336, "grad_norm": 4.333702003163915, "learning_rate": 3.788951534204332e-09, "loss": 16.7684, "step": 54050 }, { "epoch": 0.9880088471310802, "grad_norm": 5.936931649421029, "learning_rate": 3.777438608278394e-09, "loss": 17.0849, "step": 54051 }, { "epoch": 0.9880271263275267, "grad_norm": 5.6604924750181915, "learning_rate": 3.7659431936787334e-09, "loss": 17.0536, "step": 54052 }, { "epoch": 0.9880454055239731, "grad_norm": 6.319477763160513, "learning_rate": 3.754465290445875e-09, "loss": 17.3322, "step": 54053 }, { "epoch": 0.9880636847204197, "grad_norm": 6.7535653531868975, "learning_rate": 3.7430048986203396e-09, "loss": 17.6227, "step": 54054 }, { "epoch": 0.9880819639168662, "grad_norm": 4.8020640994618935, "learning_rate": 3.731562018241541e-09, "loss": 17.072, "step": 54055 }, { "epoch": 0.9881002431133128, "grad_norm": 4.829401430668169, "learning_rate": 3.7201366493505587e-09, "loss": 16.8912, "step": 54056 }, { "epoch": 0.9881185223097593, "grad_norm": 6.394816458141583, "learning_rate": 3.7087287919862493e-09, "loss": 17.693, "step": 54057 }, { "epoch": 0.9881368015062058, "grad_norm": 5.386428276227841, "learning_rate": 3.6973384461896912e-09, "loss": 17.18, "step": 54058 }, { "epoch": 0.9881550807026523, "grad_norm": 6.427866064066116, "learning_rate": 3.6859656120008526e-09, "loss": 17.4755, "step": 54059 }, { "epoch": 0.9881733598990988, "grad_norm": 5.19021558691008, "learning_rate": 3.674610289458591e-09, "loss": 16.8317, "step": 54060 }, { "epoch": 0.9881916390955453, "grad_norm": 5.597125821335917, "learning_rate": 3.6632724786034302e-09, "loss": 17.1751, "step": 54061 }, { "epoch": 0.9882099182919919, "grad_norm": 5.117511951154943, "learning_rate": 3.6519521794747827e-09, "loss": 16.9229, "step": 54062 }, { "epoch": 0.9882281974884384, "grad_norm": 6.234279473727197, "learning_rate": 3.640649392112616e-09, "loss": 17.3053, "step": 54063 }, { "epoch": 0.988246476684885, "grad_norm": 4.90404602913635, "learning_rate": 3.6293641165563442e-09, "loss": 16.8429, "step": 54064 }, { "epoch": 0.9882647558813314, "grad_norm": 5.5309138321106035, "learning_rate": 3.6180963528453795e-09, "loss": 17.2205, "step": 54065 }, { "epoch": 0.9882830350777779, "grad_norm": 7.275736317473434, "learning_rate": 3.6068461010196897e-09, "loss": 17.6946, "step": 54066 }, { "epoch": 0.9883013142742245, "grad_norm": 5.498284787047549, "learning_rate": 3.5956133611186884e-09, "loss": 17.1761, "step": 54067 }, { "epoch": 0.988319593470671, "grad_norm": 7.614945837628692, "learning_rate": 3.5843981331812327e-09, "loss": 17.2425, "step": 54068 }, { "epoch": 0.9883378726671176, "grad_norm": 7.11938569842659, "learning_rate": 3.573200417246736e-09, "loss": 17.1379, "step": 54069 }, { "epoch": 0.9883561518635641, "grad_norm": 4.765434339878192, "learning_rate": 3.562020213354611e-09, "loss": 16.712, "step": 54070 }, { "epoch": 0.9883744310600105, "grad_norm": 6.157371381189756, "learning_rate": 3.550857521544271e-09, "loss": 17.1142, "step": 54071 }, { "epoch": 0.9883927102564571, "grad_norm": 5.279977198734578, "learning_rate": 3.5397123418545733e-09, "loss": 16.9999, "step": 54072 }, { "epoch": 0.9884109894529036, "grad_norm": 6.219885595667947, "learning_rate": 3.5285846743243755e-09, "loss": 17.5561, "step": 54073 }, { "epoch": 0.9884292686493502, "grad_norm": 6.884956993747959, "learning_rate": 3.5174745189930914e-09, "loss": 17.6161, "step": 54074 }, { "epoch": 0.9884475478457967, "grad_norm": 8.43730292819108, "learning_rate": 3.506381875899578e-09, "loss": 17.9562, "step": 54075 }, { "epoch": 0.9884658270422432, "grad_norm": 5.472063648483445, "learning_rate": 3.495306745082139e-09, "loss": 16.9364, "step": 54076 }, { "epoch": 0.9884841062386898, "grad_norm": 5.386537534732996, "learning_rate": 3.484249126580741e-09, "loss": 17.3075, "step": 54077 }, { "epoch": 0.9885023854351362, "grad_norm": 5.207359491011422, "learning_rate": 3.4732090204331325e-09, "loss": 17.1533, "step": 54078 }, { "epoch": 0.9885206646315828, "grad_norm": 6.66052208717505, "learning_rate": 3.4621864266781713e-09, "loss": 17.3276, "step": 54079 }, { "epoch": 0.9885389438280293, "grad_norm": 6.850979304029996, "learning_rate": 3.45118134535527e-09, "loss": 17.4755, "step": 54080 }, { "epoch": 0.9885572230244758, "grad_norm": 6.834328883225398, "learning_rate": 3.4401937765016213e-09, "loss": 17.3557, "step": 54081 }, { "epoch": 0.9885755022209224, "grad_norm": 6.759822558194927, "learning_rate": 3.429223720157193e-09, "loss": 17.5596, "step": 54082 }, { "epoch": 0.9885937814173689, "grad_norm": 5.285148127255406, "learning_rate": 3.418271176359178e-09, "loss": 17.082, "step": 54083 }, { "epoch": 0.9886120606138155, "grad_norm": 6.45126796943364, "learning_rate": 3.4073361451469888e-09, "loss": 17.4638, "step": 54084 }, { "epoch": 0.9886303398102619, "grad_norm": 7.578353051652412, "learning_rate": 3.3964186265583732e-09, "loss": 17.4844, "step": 54085 }, { "epoch": 0.9886486190067084, "grad_norm": 5.305226701264345, "learning_rate": 3.3855186206316336e-09, "loss": 16.8335, "step": 54086 }, { "epoch": 0.988666898203155, "grad_norm": 7.285360084053944, "learning_rate": 3.3746361274050734e-09, "loss": 17.1134, "step": 54087 }, { "epoch": 0.9886851773996015, "grad_norm": 5.487562750588891, "learning_rate": 3.3637711469169943e-09, "loss": 17.3294, "step": 54088 }, { "epoch": 0.9887034565960481, "grad_norm": 7.017216751881483, "learning_rate": 3.3529236792051447e-09, "loss": 16.9839, "step": 54089 }, { "epoch": 0.9887217357924946, "grad_norm": 4.519922293716564, "learning_rate": 3.3420937243078268e-09, "loss": 16.7511, "step": 54090 }, { "epoch": 0.988740014988941, "grad_norm": 7.6276078549095665, "learning_rate": 3.331281282262788e-09, "loss": 18.045, "step": 54091 }, { "epoch": 0.9887582941853876, "grad_norm": 5.300074063937029, "learning_rate": 3.3204863531077768e-09, "loss": 16.9916, "step": 54092 }, { "epoch": 0.9887765733818341, "grad_norm": 6.331686921456846, "learning_rate": 3.309708936881095e-09, "loss": 17.3767, "step": 54093 }, { "epoch": 0.9887948525782807, "grad_norm": 4.279195803836271, "learning_rate": 3.298949033620491e-09, "loss": 16.601, "step": 54094 }, { "epoch": 0.9888131317747272, "grad_norm": 7.538848509864788, "learning_rate": 3.2882066433631564e-09, "loss": 17.9686, "step": 54095 }, { "epoch": 0.9888314109711737, "grad_norm": 6.282491816963858, "learning_rate": 3.2774817661473947e-09, "loss": 17.2145, "step": 54096 }, { "epoch": 0.9888496901676203, "grad_norm": 4.98478061583896, "learning_rate": 3.2667744020103976e-09, "loss": 16.7089, "step": 54097 }, { "epoch": 0.9888679693640667, "grad_norm": 5.421101796659564, "learning_rate": 3.256084550989358e-09, "loss": 17.0842, "step": 54098 }, { "epoch": 0.9888862485605133, "grad_norm": 4.6463674719498265, "learning_rate": 3.2454122131225787e-09, "loss": 16.811, "step": 54099 }, { "epoch": 0.9889045277569598, "grad_norm": 6.089305121076916, "learning_rate": 3.2347573884466965e-09, "loss": 17.2912, "step": 54100 }, { "epoch": 0.9889228069534063, "grad_norm": 5.803052252354127, "learning_rate": 3.2241200770000146e-09, "loss": 16.9455, "step": 54101 }, { "epoch": 0.9889410861498529, "grad_norm": 5.537253528743205, "learning_rate": 3.2135002788186154e-09, "loss": 16.9721, "step": 54102 }, { "epoch": 0.9889593653462994, "grad_norm": 5.196897347948584, "learning_rate": 3.202897993940246e-09, "loss": 16.9752, "step": 54103 }, { "epoch": 0.988977644542746, "grad_norm": 6.346900191974579, "learning_rate": 3.1923132224026544e-09, "loss": 17.2295, "step": 54104 }, { "epoch": 0.9889959237391924, "grad_norm": 5.499595514073193, "learning_rate": 3.1817459642419223e-09, "loss": 17.0778, "step": 54105 }, { "epoch": 0.9890142029356389, "grad_norm": 7.195017338545784, "learning_rate": 3.171196219495798e-09, "loss": 17.569, "step": 54106 }, { "epoch": 0.9890324821320855, "grad_norm": 5.519997815180903, "learning_rate": 3.160663988200918e-09, "loss": 17.1317, "step": 54107 }, { "epoch": 0.989050761328532, "grad_norm": 5.168388320702714, "learning_rate": 3.1501492703939206e-09, "loss": 16.9905, "step": 54108 }, { "epoch": 0.9890690405249786, "grad_norm": 6.629234374743465, "learning_rate": 3.139652066112553e-09, "loss": 17.4648, "step": 54109 }, { "epoch": 0.989087319721425, "grad_norm": 6.776900352600469, "learning_rate": 3.129172375392897e-09, "loss": 17.6382, "step": 54110 }, { "epoch": 0.9891055989178715, "grad_norm": 5.722017129305357, "learning_rate": 3.1187101982721458e-09, "loss": 17.2429, "step": 54111 }, { "epoch": 0.9891238781143181, "grad_norm": 6.533674164047116, "learning_rate": 3.108265534786381e-09, "loss": 17.481, "step": 54112 }, { "epoch": 0.9891421573107646, "grad_norm": 6.500888218140306, "learning_rate": 3.0978383849727955e-09, "loss": 17.5054, "step": 54113 }, { "epoch": 0.9891604365072112, "grad_norm": 5.346447475761975, "learning_rate": 3.0874287488674714e-09, "loss": 16.7696, "step": 54114 }, { "epoch": 0.9891787157036577, "grad_norm": 6.043355808633382, "learning_rate": 3.0770366265070463e-09, "loss": 17.1428, "step": 54115 }, { "epoch": 0.9891969949001042, "grad_norm": 5.391520637887192, "learning_rate": 3.066662017928157e-09, "loss": 16.9571, "step": 54116 }, { "epoch": 0.9892152740965507, "grad_norm": 6.942935722083934, "learning_rate": 3.056304923166886e-09, "loss": 17.4643, "step": 54117 }, { "epoch": 0.9892335532929972, "grad_norm": 6.615048070736012, "learning_rate": 3.045965342259871e-09, "loss": 17.4232, "step": 54118 }, { "epoch": 0.9892518324894438, "grad_norm": 5.849272893374682, "learning_rate": 3.0356432752431943e-09, "loss": 17.0151, "step": 54119 }, { "epoch": 0.9892701116858903, "grad_norm": 6.294784611936888, "learning_rate": 3.0253387221529372e-09, "loss": 17.4204, "step": 54120 }, { "epoch": 0.9892883908823368, "grad_norm": 7.223551319549173, "learning_rate": 3.015051683024628e-09, "loss": 17.833, "step": 54121 }, { "epoch": 0.9893066700787834, "grad_norm": 5.866105417937165, "learning_rate": 3.0047821578954583e-09, "loss": 17.1698, "step": 54122 }, { "epoch": 0.9893249492752298, "grad_norm": 8.51436650081031, "learning_rate": 2.994530146800956e-09, "loss": 17.4998, "step": 54123 }, { "epoch": 0.9893432284716764, "grad_norm": 6.405841878545907, "learning_rate": 2.9842956497772026e-09, "loss": 17.3799, "step": 54124 }, { "epoch": 0.9893615076681229, "grad_norm": 6.363143646262069, "learning_rate": 2.97407866685917e-09, "loss": 16.8033, "step": 54125 }, { "epoch": 0.9893797868645694, "grad_norm": 5.611244993715052, "learning_rate": 2.963879198084052e-09, "loss": 17.1947, "step": 54126 }, { "epoch": 0.989398066061016, "grad_norm": 5.500802171352688, "learning_rate": 2.9536972434868194e-09, "loss": 17.112, "step": 54127 }, { "epoch": 0.9894163452574625, "grad_norm": 5.655645251973831, "learning_rate": 2.943532803103e-09, "loss": 17.069, "step": 54128 }, { "epoch": 0.989434624453909, "grad_norm": 6.131708631742608, "learning_rate": 2.9333858769681202e-09, "loss": 17.3231, "step": 54129 }, { "epoch": 0.9894529036503555, "grad_norm": 6.944750236567738, "learning_rate": 2.923256465118818e-09, "loss": 17.4176, "step": 54130 }, { "epoch": 0.989471182846802, "grad_norm": 5.968084573434903, "learning_rate": 2.9131445675895097e-09, "loss": 17.1613, "step": 54131 }, { "epoch": 0.9894894620432486, "grad_norm": 6.125111819421595, "learning_rate": 2.903050184416278e-09, "loss": 17.1785, "step": 54132 }, { "epoch": 0.9895077412396951, "grad_norm": 6.438576347271208, "learning_rate": 2.8929733156340957e-09, "loss": 17.3485, "step": 54133 }, { "epoch": 0.9895260204361416, "grad_norm": 5.595549561395839, "learning_rate": 2.8829139612779334e-09, "loss": 17.1364, "step": 54134 }, { "epoch": 0.9895442996325882, "grad_norm": 8.107313767710611, "learning_rate": 2.8728721213838737e-09, "loss": 17.2463, "step": 54135 }, { "epoch": 0.9895625788290346, "grad_norm": 4.780744305632312, "learning_rate": 2.8628477959868894e-09, "loss": 16.8857, "step": 54136 }, { "epoch": 0.9895808580254812, "grad_norm": 6.689260888947048, "learning_rate": 2.8528409851219516e-09, "loss": 17.2507, "step": 54137 }, { "epoch": 0.9895991372219277, "grad_norm": 8.076718575708862, "learning_rate": 2.842851688824033e-09, "loss": 17.9387, "step": 54138 }, { "epoch": 0.9896174164183742, "grad_norm": 5.759190527902142, "learning_rate": 2.8328799071286605e-09, "loss": 17.292, "step": 54139 }, { "epoch": 0.9896356956148208, "grad_norm": 5.983528084750759, "learning_rate": 2.822925640070251e-09, "loss": 17.4176, "step": 54140 }, { "epoch": 0.9896539748112673, "grad_norm": 6.8632077651387595, "learning_rate": 2.8129888876837765e-09, "loss": 17.3213, "step": 54141 }, { "epoch": 0.9896722540077139, "grad_norm": 7.88361819720193, "learning_rate": 2.803069650004209e-09, "loss": 17.5364, "step": 54142 }, { "epoch": 0.9896905332041603, "grad_norm": 7.086099477068711, "learning_rate": 2.793167927065965e-09, "loss": 17.8891, "step": 54143 }, { "epoch": 0.9897088124006068, "grad_norm": 7.766952211359471, "learning_rate": 2.7832837189040175e-09, "loss": 17.2217, "step": 54144 }, { "epoch": 0.9897270915970534, "grad_norm": 5.406303347824973, "learning_rate": 2.7734170255533375e-09, "loss": 17.1111, "step": 54145 }, { "epoch": 0.9897453707934999, "grad_norm": 8.183231661861575, "learning_rate": 2.763567847048343e-09, "loss": 17.6632, "step": 54146 }, { "epoch": 0.9897636499899465, "grad_norm": 6.166540626169597, "learning_rate": 2.753736183422895e-09, "loss": 17.3402, "step": 54147 }, { "epoch": 0.989781929186393, "grad_norm": 4.892485026686637, "learning_rate": 2.7439220347125207e-09, "loss": 16.7809, "step": 54148 }, { "epoch": 0.9898002083828394, "grad_norm": 7.259617845746163, "learning_rate": 2.734125400950527e-09, "loss": 17.5326, "step": 54149 }, { "epoch": 0.989818487579286, "grad_norm": 7.87557333928101, "learning_rate": 2.724346282171886e-09, "loss": 17.8313, "step": 54150 }, { "epoch": 0.9898367667757325, "grad_norm": 5.65777797815353, "learning_rate": 2.7145846784110143e-09, "loss": 17.337, "step": 54151 }, { "epoch": 0.9898550459721791, "grad_norm": 4.234576407595826, "learning_rate": 2.704840589701774e-09, "loss": 16.7388, "step": 54152 }, { "epoch": 0.9898733251686256, "grad_norm": 6.909174924396392, "learning_rate": 2.6951140160785815e-09, "loss": 17.7346, "step": 54153 }, { "epoch": 0.9898916043650721, "grad_norm": 5.429248334554008, "learning_rate": 2.685404957574744e-09, "loss": 17.0828, "step": 54154 }, { "epoch": 0.9899098835615187, "grad_norm": 5.622810665461965, "learning_rate": 2.6757134142257892e-09, "loss": 17.102, "step": 54155 }, { "epoch": 0.9899281627579651, "grad_norm": 6.3582629297102065, "learning_rate": 2.6660393860639123e-09, "loss": 17.2894, "step": 54156 }, { "epoch": 0.9899464419544117, "grad_norm": 6.245079557997178, "learning_rate": 2.656382873124641e-09, "loss": 17.2885, "step": 54157 }, { "epoch": 0.9899647211508582, "grad_norm": 4.990091046483997, "learning_rate": 2.646743875440727e-09, "loss": 16.7495, "step": 54158 }, { "epoch": 0.9899830003473047, "grad_norm": 6.815969490964968, "learning_rate": 2.637122393046587e-09, "loss": 17.6642, "step": 54159 }, { "epoch": 0.9900012795437513, "grad_norm": 5.5060051416365186, "learning_rate": 2.6275184259755284e-09, "loss": 17.3551, "step": 54160 }, { "epoch": 0.9900195587401978, "grad_norm": 6.596270580258576, "learning_rate": 2.6179319742614118e-09, "loss": 17.3607, "step": 54161 }, { "epoch": 0.9900378379366443, "grad_norm": 6.1238428869890615, "learning_rate": 2.6083630379375444e-09, "loss": 17.3022, "step": 54162 }, { "epoch": 0.9900561171330908, "grad_norm": 6.024234083898366, "learning_rate": 2.5988116170377887e-09, "loss": 17.3461, "step": 54163 }, { "epoch": 0.9900743963295373, "grad_norm": 5.448222014843071, "learning_rate": 2.5892777115960056e-09, "loss": 17.0697, "step": 54164 }, { "epoch": 0.9900926755259839, "grad_norm": 4.637916583948295, "learning_rate": 2.579761321644392e-09, "loss": 16.9295, "step": 54165 }, { "epoch": 0.9901109547224304, "grad_norm": 5.707983258592383, "learning_rate": 2.5702624472179196e-09, "loss": 16.853, "step": 54166 }, { "epoch": 0.990129233918877, "grad_norm": 4.9213514549714645, "learning_rate": 2.56078108834823e-09, "loss": 16.9792, "step": 54167 }, { "epoch": 0.9901475131153235, "grad_norm": 5.688361061466161, "learning_rate": 2.55131724506974e-09, "loss": 17.0771, "step": 54168 }, { "epoch": 0.9901657923117699, "grad_norm": 5.683406559671997, "learning_rate": 2.541870917415201e-09, "loss": 17.2407, "step": 54169 }, { "epoch": 0.9901840715082165, "grad_norm": 5.412471468424192, "learning_rate": 2.5324421054179204e-09, "loss": 17.0246, "step": 54170 }, { "epoch": 0.990202350704663, "grad_norm": 5.566971861706828, "learning_rate": 2.523030809110649e-09, "loss": 17.123, "step": 54171 }, { "epoch": 0.9902206299011096, "grad_norm": 5.521952739131126, "learning_rate": 2.513637028526139e-09, "loss": 17.1829, "step": 54172 }, { "epoch": 0.9902389090975561, "grad_norm": 6.405177004035314, "learning_rate": 2.504260763698252e-09, "loss": 17.5056, "step": 54173 }, { "epoch": 0.9902571882940026, "grad_norm": 6.68812606914428, "learning_rate": 2.494902014658629e-09, "loss": 17.7508, "step": 54174 }, { "epoch": 0.9902754674904491, "grad_norm": 8.075855342167564, "learning_rate": 2.485560781441132e-09, "loss": 17.6968, "step": 54175 }, { "epoch": 0.9902937466868956, "grad_norm": 7.741549013371755, "learning_rate": 2.4762370640779577e-09, "loss": 18.5127, "step": 54176 }, { "epoch": 0.9903120258833422, "grad_norm": 7.212293114627317, "learning_rate": 2.4669308626018573e-09, "loss": 17.9148, "step": 54177 }, { "epoch": 0.9903303050797887, "grad_norm": 5.9160169613713505, "learning_rate": 2.4576421770455826e-09, "loss": 17.245, "step": 54178 }, { "epoch": 0.9903485842762352, "grad_norm": 4.96343888865324, "learning_rate": 2.4483710074418853e-09, "loss": 17.1053, "step": 54179 }, { "epoch": 0.9903668634726818, "grad_norm": 5.135753299450467, "learning_rate": 2.4391173538224067e-09, "loss": 16.8901, "step": 54180 }, { "epoch": 0.9903851426691282, "grad_norm": 7.111754561386756, "learning_rate": 2.4298812162204533e-09, "loss": 17.7533, "step": 54181 }, { "epoch": 0.9904034218655748, "grad_norm": 5.924367097563781, "learning_rate": 2.4206625946682214e-09, "loss": 17.1429, "step": 54182 }, { "epoch": 0.9904217010620213, "grad_norm": 4.976323607653633, "learning_rate": 2.4114614891979083e-09, "loss": 16.9499, "step": 54183 }, { "epoch": 0.9904399802584678, "grad_norm": 8.392804656559079, "learning_rate": 2.4022778998417097e-09, "loss": 17.5554, "step": 54184 }, { "epoch": 0.9904582594549144, "grad_norm": 6.9712505994929455, "learning_rate": 2.393111826631267e-09, "loss": 17.6229, "step": 54185 }, { "epoch": 0.9904765386513609, "grad_norm": 5.694225626899222, "learning_rate": 2.383963269599887e-09, "loss": 17.164, "step": 54186 }, { "epoch": 0.9904948178478075, "grad_norm": 6.730991373937796, "learning_rate": 2.3748322287792112e-09, "loss": 17.5609, "step": 54187 }, { "epoch": 0.9905130970442539, "grad_norm": 7.107015627874934, "learning_rate": 2.365718704200881e-09, "loss": 17.1094, "step": 54188 }, { "epoch": 0.9905313762407004, "grad_norm": 5.8224055569663, "learning_rate": 2.356622695896538e-09, "loss": 17.0279, "step": 54189 }, { "epoch": 0.990549655437147, "grad_norm": 5.933921606200513, "learning_rate": 2.347544203898933e-09, "loss": 17.3349, "step": 54190 }, { "epoch": 0.9905679346335935, "grad_norm": 4.602133913159466, "learning_rate": 2.3384832282397072e-09, "loss": 16.8764, "step": 54191 }, { "epoch": 0.9905862138300401, "grad_norm": 6.917918069336775, "learning_rate": 2.329439768950503e-09, "loss": 17.6851, "step": 54192 }, { "epoch": 0.9906044930264866, "grad_norm": 5.0659316171986575, "learning_rate": 2.3204138260624066e-09, "loss": 16.8563, "step": 54193 }, { "epoch": 0.990622772222933, "grad_norm": 5.668087180501634, "learning_rate": 2.311405399608169e-09, "loss": 16.9874, "step": 54194 }, { "epoch": 0.9906410514193796, "grad_norm": 4.721258020180958, "learning_rate": 2.3024144896183207e-09, "loss": 16.9429, "step": 54195 }, { "epoch": 0.9906593306158261, "grad_norm": 7.777217099702122, "learning_rate": 2.29344109612506e-09, "loss": 18.9715, "step": 54196 }, { "epoch": 0.9906776098122726, "grad_norm": 6.44990727979507, "learning_rate": 2.2844852191594712e-09, "loss": 17.4082, "step": 54197 }, { "epoch": 0.9906958890087192, "grad_norm": 6.524962830614531, "learning_rate": 2.275546858753197e-09, "loss": 17.3851, "step": 54198 }, { "epoch": 0.9907141682051657, "grad_norm": 5.1763598563809765, "learning_rate": 2.266626014937323e-09, "loss": 17.1861, "step": 54199 }, { "epoch": 0.9907324474016123, "grad_norm": 6.185183759465581, "learning_rate": 2.257722687742936e-09, "loss": 17.3557, "step": 54200 }, { "epoch": 0.9907507265980587, "grad_norm": 6.241118638007614, "learning_rate": 2.2488368772022316e-09, "loss": 17.4608, "step": 54201 }, { "epoch": 0.9907690057945052, "grad_norm": 4.578846009228085, "learning_rate": 2.2399685833451866e-09, "loss": 16.8268, "step": 54202 }, { "epoch": 0.9907872849909518, "grad_norm": 9.802131646173827, "learning_rate": 2.231117806203442e-09, "loss": 18.3747, "step": 54203 }, { "epoch": 0.9908055641873983, "grad_norm": 6.421950854458638, "learning_rate": 2.2222845458080845e-09, "loss": 17.4629, "step": 54204 }, { "epoch": 0.9908238433838449, "grad_norm": 7.203494557713932, "learning_rate": 2.2134688021901996e-09, "loss": 17.6842, "step": 54205 }, { "epoch": 0.9908421225802914, "grad_norm": 5.506602010861728, "learning_rate": 2.2046705753797638e-09, "loss": 17.2534, "step": 54206 }, { "epoch": 0.9908604017767378, "grad_norm": 4.97502043606861, "learning_rate": 2.1958898654089735e-09, "loss": 17.0061, "step": 54207 }, { "epoch": 0.9908786809731844, "grad_norm": 4.738998838564988, "learning_rate": 2.1871266723072494e-09, "loss": 16.6947, "step": 54208 }, { "epoch": 0.9908969601696309, "grad_norm": 6.281631512571996, "learning_rate": 2.178380996106233e-09, "loss": 17.3428, "step": 54209 }, { "epoch": 0.9909152393660775, "grad_norm": 5.470602036067134, "learning_rate": 2.1696528368364557e-09, "loss": 17.1446, "step": 54210 }, { "epoch": 0.990933518562524, "grad_norm": 5.891394059502924, "learning_rate": 2.160942194527893e-09, "loss": 17.3079, "step": 54211 }, { "epoch": 0.9909517977589705, "grad_norm": 5.718402503367813, "learning_rate": 2.1522490692121867e-09, "loss": 17.0363, "step": 54212 }, { "epoch": 0.9909700769554171, "grad_norm": 6.734553576913109, "learning_rate": 2.1435734609187573e-09, "loss": 17.7651, "step": 54213 }, { "epoch": 0.9909883561518635, "grad_norm": 6.4641271558138875, "learning_rate": 2.1349153696786918e-09, "loss": 17.5122, "step": 54214 }, { "epoch": 0.9910066353483101, "grad_norm": 7.6862850368372975, "learning_rate": 2.1262747955214103e-09, "loss": 17.3851, "step": 54215 }, { "epoch": 0.9910249145447566, "grad_norm": 5.168377092911435, "learning_rate": 2.1176517384785546e-09, "loss": 17.0778, "step": 54216 }, { "epoch": 0.9910431937412031, "grad_norm": 5.948980181371268, "learning_rate": 2.10904619857899e-09, "loss": 17.0632, "step": 54217 }, { "epoch": 0.9910614729376497, "grad_norm": 6.274032991680335, "learning_rate": 2.1004581758543587e-09, "loss": 17.7489, "step": 54218 }, { "epoch": 0.9910797521340962, "grad_norm": 6.061449282860311, "learning_rate": 2.0918876703329705e-09, "loss": 17.1533, "step": 54219 }, { "epoch": 0.9910980313305428, "grad_norm": 6.170694395393845, "learning_rate": 2.0833346820464674e-09, "loss": 17.2477, "step": 54220 }, { "epoch": 0.9911163105269892, "grad_norm": 7.468051919818148, "learning_rate": 2.07479921102427e-09, "loss": 17.6491, "step": 54221 }, { "epoch": 0.9911345897234357, "grad_norm": 6.883425345657221, "learning_rate": 2.066281257295799e-09, "loss": 17.34, "step": 54222 }, { "epoch": 0.9911528689198823, "grad_norm": 7.993542165070079, "learning_rate": 2.057780820891031e-09, "loss": 17.6023, "step": 54223 }, { "epoch": 0.9911711481163288, "grad_norm": 5.092570349694801, "learning_rate": 2.049297901840497e-09, "loss": 17.0809, "step": 54224 }, { "epoch": 0.9911894273127754, "grad_norm": 7.128837455441369, "learning_rate": 2.040832500173617e-09, "loss": 17.5533, "step": 54225 }, { "epoch": 0.9912077065092219, "grad_norm": 5.737472254730858, "learning_rate": 2.0323846159198136e-09, "loss": 16.9268, "step": 54226 }, { "epoch": 0.9912259857056683, "grad_norm": 5.219034107625698, "learning_rate": 2.0239542491085064e-09, "loss": 16.9277, "step": 54227 }, { "epoch": 0.9912442649021149, "grad_norm": 5.609655239671976, "learning_rate": 2.0155413997696714e-09, "loss": 17.1406, "step": 54228 }, { "epoch": 0.9912625440985614, "grad_norm": 6.4783860100009365, "learning_rate": 2.00714606793273e-09, "loss": 17.1387, "step": 54229 }, { "epoch": 0.991280823295008, "grad_norm": 6.186659974649833, "learning_rate": 1.9987682536271034e-09, "loss": 17.6182, "step": 54230 }, { "epoch": 0.9912991024914545, "grad_norm": 6.888495203839996, "learning_rate": 1.9904079568822123e-09, "loss": 17.7711, "step": 54231 }, { "epoch": 0.991317381687901, "grad_norm": 8.60470416706514, "learning_rate": 1.9820651777269217e-09, "loss": 17.2448, "step": 54232 }, { "epoch": 0.9913356608843475, "grad_norm": 5.446006405590874, "learning_rate": 1.9737399161906533e-09, "loss": 17.1468, "step": 54233 }, { "epoch": 0.991353940080794, "grad_norm": 6.25349905091845, "learning_rate": 1.965432172303383e-09, "loss": 17.2733, "step": 54234 }, { "epoch": 0.9913722192772406, "grad_norm": 8.942521241533322, "learning_rate": 1.9571419460928665e-09, "loss": 18.7027, "step": 54235 }, { "epoch": 0.9913904984736871, "grad_norm": 6.106891306287669, "learning_rate": 1.9488692375890796e-09, "loss": 17.4207, "step": 54236 }, { "epoch": 0.9914087776701336, "grad_norm": 5.875919076508779, "learning_rate": 1.9406140468208877e-09, "loss": 17.0356, "step": 54237 }, { "epoch": 0.9914270568665802, "grad_norm": 6.395197239616614, "learning_rate": 1.9323763738171573e-09, "loss": 17.2098, "step": 54238 }, { "epoch": 0.9914453360630266, "grad_norm": 7.439177316900479, "learning_rate": 1.9241562186067543e-09, "loss": 17.4215, "step": 54239 }, { "epoch": 0.9914636152594732, "grad_norm": 6.173715551366598, "learning_rate": 1.9159535812179887e-09, "loss": 17.1704, "step": 54240 }, { "epoch": 0.9914818944559197, "grad_norm": 6.465686563463639, "learning_rate": 1.907768461680837e-09, "loss": 17.3093, "step": 54241 }, { "epoch": 0.9915001736523662, "grad_norm": 6.103839415883191, "learning_rate": 1.8996008600230544e-09, "loss": 17.248, "step": 54242 }, { "epoch": 0.9915184528488128, "grad_norm": 5.797038691838754, "learning_rate": 1.8914507762729515e-09, "loss": 17.1193, "step": 54243 }, { "epoch": 0.9915367320452593, "grad_norm": 5.629790663208286, "learning_rate": 1.8833182104599502e-09, "loss": 17.0111, "step": 54244 }, { "epoch": 0.9915550112417059, "grad_norm": 5.243524007282883, "learning_rate": 1.87520316261236e-09, "loss": 16.996, "step": 54245 }, { "epoch": 0.9915732904381523, "grad_norm": 5.826478299602196, "learning_rate": 1.867105632757937e-09, "loss": 17.198, "step": 54246 }, { "epoch": 0.9915915696345988, "grad_norm": 5.167627973929446, "learning_rate": 1.8590256209261026e-09, "loss": 16.9687, "step": 54247 }, { "epoch": 0.9916098488310454, "grad_norm": 5.134446121982621, "learning_rate": 1.8509631271446115e-09, "loss": 16.8056, "step": 54248 }, { "epoch": 0.9916281280274919, "grad_norm": 6.499777093006607, "learning_rate": 1.8429181514417749e-09, "loss": 16.9546, "step": 54249 }, { "epoch": 0.9916464072239385, "grad_norm": 6.17513468941667, "learning_rate": 1.834890693845348e-09, "loss": 17.1552, "step": 54250 }, { "epoch": 0.991664686420385, "grad_norm": 7.9373894075086575, "learning_rate": 1.8268807543847521e-09, "loss": 17.8565, "step": 54251 }, { "epoch": 0.9916829656168314, "grad_norm": 7.103151870990883, "learning_rate": 1.8188883330866324e-09, "loss": 17.4162, "step": 54252 }, { "epoch": 0.991701244813278, "grad_norm": 6.432874752043554, "learning_rate": 1.8109134299792997e-09, "loss": 17.2053, "step": 54253 }, { "epoch": 0.9917195240097245, "grad_norm": 6.132087635309862, "learning_rate": 1.8029560450916196e-09, "loss": 17.3214, "step": 54254 }, { "epoch": 0.9917378032061711, "grad_norm": 7.282552785191664, "learning_rate": 1.7950161784502374e-09, "loss": 17.5393, "step": 54255 }, { "epoch": 0.9917560824026176, "grad_norm": 7.096617539970239, "learning_rate": 1.787093830084019e-09, "loss": 17.6227, "step": 54256 }, { "epoch": 0.9917743615990641, "grad_norm": 6.263332960944926, "learning_rate": 1.77918900002072e-09, "loss": 17.0257, "step": 54257 }, { "epoch": 0.9917926407955107, "grad_norm": 5.520376501359478, "learning_rate": 1.771301688286986e-09, "loss": 17.1765, "step": 54258 }, { "epoch": 0.9918109199919571, "grad_norm": 7.288711375537268, "learning_rate": 1.7634318949111273e-09, "loss": 17.6098, "step": 54259 }, { "epoch": 0.9918291991884037, "grad_norm": 6.683850958455091, "learning_rate": 1.7555796199208996e-09, "loss": 17.6053, "step": 54260 }, { "epoch": 0.9918474783848502, "grad_norm": 7.562751458869405, "learning_rate": 1.7477448633435035e-09, "loss": 17.4583, "step": 54261 }, { "epoch": 0.9918657575812967, "grad_norm": 5.772907398030609, "learning_rate": 1.7399276252066943e-09, "loss": 17.1664, "step": 54262 }, { "epoch": 0.9918840367777433, "grad_norm": 5.905019362187534, "learning_rate": 1.7321279055376727e-09, "loss": 17.1856, "step": 54263 }, { "epoch": 0.9919023159741898, "grad_norm": 7.379164443717115, "learning_rate": 1.7243457043641942e-09, "loss": 17.3716, "step": 54264 }, { "epoch": 0.9919205951706362, "grad_norm": 7.197098302691322, "learning_rate": 1.7165810217123491e-09, "loss": 17.6217, "step": 54265 }, { "epoch": 0.9919388743670828, "grad_norm": 7.128889204049, "learning_rate": 1.7088338576110031e-09, "loss": 17.7357, "step": 54266 }, { "epoch": 0.9919571535635293, "grad_norm": 5.013967678623523, "learning_rate": 1.7011042120856912e-09, "loss": 16.7674, "step": 54267 }, { "epoch": 0.9919754327599759, "grad_norm": 5.286713039076953, "learning_rate": 1.6933920851647245e-09, "loss": 17.0109, "step": 54268 }, { "epoch": 0.9919937119564224, "grad_norm": 6.030202209624484, "learning_rate": 1.685697476874748e-09, "loss": 16.9041, "step": 54269 }, { "epoch": 0.9920119911528689, "grad_norm": 6.228707826022368, "learning_rate": 1.6780203872424071e-09, "loss": 17.5157, "step": 54270 }, { "epoch": 0.9920302703493155, "grad_norm": 6.210964895339394, "learning_rate": 1.6703608162949025e-09, "loss": 17.5706, "step": 54271 }, { "epoch": 0.9920485495457619, "grad_norm": 6.815201790751241, "learning_rate": 1.6627187640588794e-09, "loss": 17.3639, "step": 54272 }, { "epoch": 0.9920668287422085, "grad_norm": 7.5575604699428345, "learning_rate": 1.6550942305615381e-09, "loss": 17.9529, "step": 54273 }, { "epoch": 0.992085107938655, "grad_norm": 6.812138308044502, "learning_rate": 1.6474872158295242e-09, "loss": 17.2295, "step": 54274 }, { "epoch": 0.9921033871351015, "grad_norm": 6.403299675993449, "learning_rate": 1.6398977198889277e-09, "loss": 17.3297, "step": 54275 }, { "epoch": 0.9921216663315481, "grad_norm": 4.86862662611367, "learning_rate": 1.6323257427669493e-09, "loss": 16.8728, "step": 54276 }, { "epoch": 0.9921399455279946, "grad_norm": 6.033165623922889, "learning_rate": 1.6247712844902342e-09, "loss": 17.2771, "step": 54277 }, { "epoch": 0.9921582247244412, "grad_norm": 5.450617824721589, "learning_rate": 1.6172343450843174e-09, "loss": 17.0348, "step": 54278 }, { "epoch": 0.9921765039208876, "grad_norm": 5.775666139283467, "learning_rate": 1.609714924576955e-09, "loss": 16.787, "step": 54279 }, { "epoch": 0.9921947831173341, "grad_norm": 8.49384844880066, "learning_rate": 1.6022130229931266e-09, "loss": 17.9616, "step": 54280 }, { "epoch": 0.9922130623137807, "grad_norm": 9.529819764838324, "learning_rate": 1.5947286403605878e-09, "loss": 17.6725, "step": 54281 }, { "epoch": 0.9922313415102272, "grad_norm": 6.603548308354617, "learning_rate": 1.5872617767043185e-09, "loss": 17.0999, "step": 54282 }, { "epoch": 0.9922496207066738, "grad_norm": 5.947799043618102, "learning_rate": 1.5798124320509644e-09, "loss": 17.2166, "step": 54283 }, { "epoch": 0.9922678999031203, "grad_norm": 7.408277403178828, "learning_rate": 1.5723806064260604e-09, "loss": 17.5515, "step": 54284 }, { "epoch": 0.9922861790995667, "grad_norm": 6.224988117081321, "learning_rate": 1.564966299856807e-09, "loss": 17.3193, "step": 54285 }, { "epoch": 0.9923044582960133, "grad_norm": 5.384702334327021, "learning_rate": 1.5575695123687395e-09, "loss": 17.1245, "step": 54286 }, { "epoch": 0.9923227374924598, "grad_norm": 6.299496692922691, "learning_rate": 1.5501902439873927e-09, "loss": 17.1811, "step": 54287 }, { "epoch": 0.9923410166889064, "grad_norm": 4.669544846670053, "learning_rate": 1.5428284947394124e-09, "loss": 16.8487, "step": 54288 }, { "epoch": 0.9923592958853529, "grad_norm": 6.990305585512482, "learning_rate": 1.535484264649223e-09, "loss": 17.4368, "step": 54289 }, { "epoch": 0.9923775750817994, "grad_norm": 5.823264749030743, "learning_rate": 1.5281575537440253e-09, "loss": 17.2764, "step": 54290 }, { "epoch": 0.992395854278246, "grad_norm": 4.690624757580176, "learning_rate": 1.5208483620487991e-09, "loss": 16.7685, "step": 54291 }, { "epoch": 0.9924141334746924, "grad_norm": 6.596600229763006, "learning_rate": 1.5135566895890797e-09, "loss": 17.2924, "step": 54292 }, { "epoch": 0.992432412671139, "grad_norm": 5.526004110034927, "learning_rate": 1.5062825363904022e-09, "loss": 17.1542, "step": 54293 }, { "epoch": 0.9924506918675855, "grad_norm": 5.550141243544271, "learning_rate": 1.4990259024788567e-09, "loss": 16.9202, "step": 54294 }, { "epoch": 0.992468971064032, "grad_norm": 4.758718313701639, "learning_rate": 1.4917867878794235e-09, "loss": 16.9227, "step": 54295 }, { "epoch": 0.9924872502604786, "grad_norm": 6.710971076178838, "learning_rate": 1.4845651926170823e-09, "loss": 17.2797, "step": 54296 }, { "epoch": 0.992505529456925, "grad_norm": 5.9331642632855175, "learning_rate": 1.4773611167179235e-09, "loss": 17.2332, "step": 54297 }, { "epoch": 0.9925238086533716, "grad_norm": 6.328967072387145, "learning_rate": 1.4701745602069272e-09, "loss": 17.3858, "step": 54298 }, { "epoch": 0.9925420878498181, "grad_norm": 6.879585517972935, "learning_rate": 1.4630055231096286e-09, "loss": 17.4952, "step": 54299 }, { "epoch": 0.9925603670462646, "grad_norm": 6.561949221357287, "learning_rate": 1.4558540054498972e-09, "loss": 17.4626, "step": 54300 }, { "epoch": 0.9925786462427112, "grad_norm": 5.988127816833275, "learning_rate": 1.4487200072538232e-09, "loss": 17.1837, "step": 54301 }, { "epoch": 0.9925969254391577, "grad_norm": 5.898416054214303, "learning_rate": 1.4416035285469421e-09, "loss": 17.1979, "step": 54302 }, { "epoch": 0.9926152046356043, "grad_norm": 4.712163558195468, "learning_rate": 1.4345045693525683e-09, "loss": 16.5985, "step": 54303 }, { "epoch": 0.9926334838320507, "grad_norm": 8.108480092054194, "learning_rate": 1.4274231296967923e-09, "loss": 17.9833, "step": 54304 }, { "epoch": 0.9926517630284972, "grad_norm": 6.7930907236103195, "learning_rate": 1.4203592096045936e-09, "loss": 17.8117, "step": 54305 }, { "epoch": 0.9926700422249438, "grad_norm": 5.511978467878751, "learning_rate": 1.4133128090998427e-09, "loss": 16.9466, "step": 54306 }, { "epoch": 0.9926883214213903, "grad_norm": 7.066898168894387, "learning_rate": 1.4062839282080743e-09, "loss": 17.4143, "step": 54307 }, { "epoch": 0.9927066006178369, "grad_norm": 5.932483645665612, "learning_rate": 1.3992725669531581e-09, "loss": 17.1789, "step": 54308 }, { "epoch": 0.9927248798142834, "grad_norm": 5.890867587468231, "learning_rate": 1.3922787253600745e-09, "loss": 17.3822, "step": 54309 }, { "epoch": 0.9927431590107298, "grad_norm": 6.306715568296623, "learning_rate": 1.385302403453248e-09, "loss": 16.8038, "step": 54310 }, { "epoch": 0.9927614382071764, "grad_norm": 5.107982243089401, "learning_rate": 1.378343601257104e-09, "loss": 16.8921, "step": 54311 }, { "epoch": 0.9927797174036229, "grad_norm": 5.75538342086479, "learning_rate": 1.3714023187960668e-09, "loss": 17.1986, "step": 54312 }, { "epoch": 0.9927979966000695, "grad_norm": 7.077571733904272, "learning_rate": 1.3644785560951167e-09, "loss": 17.4586, "step": 54313 }, { "epoch": 0.992816275796516, "grad_norm": 7.776341776413591, "learning_rate": 1.3575723131770136e-09, "loss": 18.005, "step": 54314 }, { "epoch": 0.9928345549929625, "grad_norm": 5.723503792019714, "learning_rate": 1.3506835900672922e-09, "loss": 16.9518, "step": 54315 }, { "epoch": 0.9928528341894091, "grad_norm": 6.139589685602175, "learning_rate": 1.3438123867898223e-09, "loss": 17.4631, "step": 54316 }, { "epoch": 0.9928711133858555, "grad_norm": 5.999740863226726, "learning_rate": 1.336958703368474e-09, "loss": 17.2627, "step": 54317 }, { "epoch": 0.9928893925823021, "grad_norm": 5.302967649323192, "learning_rate": 1.3301225398271167e-09, "loss": 17.0857, "step": 54318 }, { "epoch": 0.9929076717787486, "grad_norm": 6.37308881020243, "learning_rate": 1.3233038961901756e-09, "loss": 17.0257, "step": 54319 }, { "epoch": 0.9929259509751951, "grad_norm": 6.417083865276478, "learning_rate": 1.3165027724809654e-09, "loss": 17.4309, "step": 54320 }, { "epoch": 0.9929442301716417, "grad_norm": 6.239250952031901, "learning_rate": 1.3097191687239109e-09, "loss": 17.2048, "step": 54321 }, { "epoch": 0.9929625093680882, "grad_norm": 6.323379515585055, "learning_rate": 1.3029530849423266e-09, "loss": 17.25, "step": 54322 }, { "epoch": 0.9929807885645348, "grad_norm": 5.772885827509626, "learning_rate": 1.2962045211606378e-09, "loss": 17.1289, "step": 54323 }, { "epoch": 0.9929990677609812, "grad_norm": 4.294099710674441, "learning_rate": 1.2894734774016037e-09, "loss": 16.7651, "step": 54324 }, { "epoch": 0.9930173469574277, "grad_norm": 6.249052462167442, "learning_rate": 1.2827599536890944e-09, "loss": 17.1002, "step": 54325 }, { "epoch": 0.9930356261538743, "grad_norm": 5.652389563332121, "learning_rate": 1.2760639500469796e-09, "loss": 17.1137, "step": 54326 }, { "epoch": 0.9930539053503208, "grad_norm": 5.555588606327216, "learning_rate": 1.2693854664980188e-09, "loss": 17.0869, "step": 54327 }, { "epoch": 0.9930721845467674, "grad_norm": 5.991661141306718, "learning_rate": 1.2627245030666368e-09, "loss": 17.381, "step": 54328 }, { "epoch": 0.9930904637432139, "grad_norm": 7.092375992748312, "learning_rate": 1.2560810597750383e-09, "loss": 17.7125, "step": 54329 }, { "epoch": 0.9931087429396603, "grad_norm": 5.307495960433847, "learning_rate": 1.2494551366476481e-09, "loss": 16.8209, "step": 54330 }, { "epoch": 0.9931270221361069, "grad_norm": 5.474876681411864, "learning_rate": 1.2428467337072258e-09, "loss": 17.0067, "step": 54331 }, { "epoch": 0.9931453013325534, "grad_norm": 5.21264431777127, "learning_rate": 1.236255850976531e-09, "loss": 16.9955, "step": 54332 }, { "epoch": 0.9931635805289999, "grad_norm": 7.610704665604972, "learning_rate": 1.2296824884788783e-09, "loss": 17.8241, "step": 54333 }, { "epoch": 0.9931818597254465, "grad_norm": 6.012354957165824, "learning_rate": 1.2231266462375823e-09, "loss": 17.2415, "step": 54334 }, { "epoch": 0.993200138921893, "grad_norm": 5.846666710014657, "learning_rate": 1.216588324275403e-09, "loss": 17.1856, "step": 54335 }, { "epoch": 0.9932184181183396, "grad_norm": 5.61898436178434, "learning_rate": 1.2100675226156543e-09, "loss": 17.2748, "step": 54336 }, { "epoch": 0.993236697314786, "grad_norm": 5.4019977839767, "learning_rate": 1.2035642412805416e-09, "loss": 17.0015, "step": 54337 }, { "epoch": 0.9932549765112325, "grad_norm": 7.151621158371001, "learning_rate": 1.1970784802928236e-09, "loss": 17.6745, "step": 54338 }, { "epoch": 0.9932732557076791, "grad_norm": 8.738415771500213, "learning_rate": 1.1906102396763709e-09, "loss": 18.3575, "step": 54339 }, { "epoch": 0.9932915349041256, "grad_norm": 6.094319040721228, "learning_rate": 1.184159519452277e-09, "loss": 17.2597, "step": 54340 }, { "epoch": 0.9933098141005722, "grad_norm": 5.658154909913996, "learning_rate": 1.1777263196444122e-09, "loss": 17.0937, "step": 54341 }, { "epoch": 0.9933280932970187, "grad_norm": 5.339537298825609, "learning_rate": 1.1713106402744256e-09, "loss": 16.7686, "step": 54342 }, { "epoch": 0.9933463724934651, "grad_norm": 5.614532813369447, "learning_rate": 1.1649124813656321e-09, "loss": 17.0671, "step": 54343 }, { "epoch": 0.9933646516899117, "grad_norm": 4.9764870192424455, "learning_rate": 1.1585318429396808e-09, "loss": 16.8572, "step": 54344 }, { "epoch": 0.9933829308863582, "grad_norm": 6.102086069661268, "learning_rate": 1.1521687250198864e-09, "loss": 17.2508, "step": 54345 }, { "epoch": 0.9934012100828048, "grad_norm": 6.493852533989705, "learning_rate": 1.1458231276273434e-09, "loss": 17.4184, "step": 54346 }, { "epoch": 0.9934194892792513, "grad_norm": 6.894717326793481, "learning_rate": 1.1394950507853663e-09, "loss": 17.4, "step": 54347 }, { "epoch": 0.9934377684756978, "grad_norm": 5.9005879238162695, "learning_rate": 1.1331844945156046e-09, "loss": 17.0796, "step": 54348 }, { "epoch": 0.9934560476721443, "grad_norm": 6.724595170283572, "learning_rate": 1.1268914588397073e-09, "loss": 17.5945, "step": 54349 }, { "epoch": 0.9934743268685908, "grad_norm": 6.742464732992825, "learning_rate": 1.1206159437809893e-09, "loss": 17.374, "step": 54350 }, { "epoch": 0.9934926060650374, "grad_norm": 6.79959605198353, "learning_rate": 1.11435794935999e-09, "loss": 17.0913, "step": 54351 }, { "epoch": 0.9935108852614839, "grad_norm": 6.883033333501322, "learning_rate": 1.1081174756000235e-09, "loss": 17.6504, "step": 54352 }, { "epoch": 0.9935291644579304, "grad_norm": 7.63415264873829, "learning_rate": 1.1018945225221844e-09, "loss": 16.9758, "step": 54353 }, { "epoch": 0.993547443654377, "grad_norm": 7.060526813789162, "learning_rate": 1.095689090148122e-09, "loss": 17.4713, "step": 54354 }, { "epoch": 0.9935657228508235, "grad_norm": 5.385234675304591, "learning_rate": 1.0895011785000408e-09, "loss": 17.0988, "step": 54355 }, { "epoch": 0.99358400204727, "grad_norm": 5.897007834726545, "learning_rate": 1.0833307875995901e-09, "loss": 17.0041, "step": 54356 }, { "epoch": 0.9936022812437165, "grad_norm": 7.482451616036292, "learning_rate": 1.0771779174684194e-09, "loss": 17.2908, "step": 54357 }, { "epoch": 0.993620560440163, "grad_norm": 5.441851948857979, "learning_rate": 1.0710425681276226e-09, "loss": 17.1037, "step": 54358 }, { "epoch": 0.9936388396366096, "grad_norm": 6.393357431845766, "learning_rate": 1.0649247395994044e-09, "loss": 17.4568, "step": 54359 }, { "epoch": 0.9936571188330561, "grad_norm": 5.835666798689774, "learning_rate": 1.058824431904304e-09, "loss": 17.0061, "step": 54360 }, { "epoch": 0.9936753980295027, "grad_norm": 5.098908767598183, "learning_rate": 1.0527416450645257e-09, "loss": 16.9013, "step": 54361 }, { "epoch": 0.9936936772259491, "grad_norm": 7.590845856057945, "learning_rate": 1.0466763791011636e-09, "loss": 17.7516, "step": 54362 }, { "epoch": 0.9937119564223956, "grad_norm": 4.610209285816965, "learning_rate": 1.0406286340353122e-09, "loss": 16.9035, "step": 54363 }, { "epoch": 0.9937302356188422, "grad_norm": 5.443191894482025, "learning_rate": 1.0345984098880658e-09, "loss": 17.1407, "step": 54364 }, { "epoch": 0.9937485148152887, "grad_norm": 4.842048941811637, "learning_rate": 1.0285857066810734e-09, "loss": 16.8869, "step": 54365 }, { "epoch": 0.9937667940117353, "grad_norm": 4.551591209201098, "learning_rate": 1.0225905244348744e-09, "loss": 16.801, "step": 54366 }, { "epoch": 0.9937850732081818, "grad_norm": 6.437733820946227, "learning_rate": 1.016612863171118e-09, "loss": 17.2783, "step": 54367 }, { "epoch": 0.9938033524046282, "grad_norm": 7.111708428665559, "learning_rate": 1.0106527229103435e-09, "loss": 17.7136, "step": 54368 }, { "epoch": 0.9938216316010748, "grad_norm": 6.131149456096863, "learning_rate": 1.0047101036730899e-09, "loss": 16.9568, "step": 54369 }, { "epoch": 0.9938399107975213, "grad_norm": 6.290655588246508, "learning_rate": 9.987850054804515e-10, "loss": 17.504, "step": 54370 }, { "epoch": 0.9938581899939679, "grad_norm": 5.879273967260686, "learning_rate": 9.928774283535225e-10, "loss": 17.1909, "step": 54371 }, { "epoch": 0.9938764691904144, "grad_norm": 5.678470242130539, "learning_rate": 9.86987372312842e-10, "loss": 17.1824, "step": 54372 }, { "epoch": 0.9938947483868609, "grad_norm": 5.305665367992521, "learning_rate": 9.811148373783941e-10, "loss": 17.066, "step": 54373 }, { "epoch": 0.9939130275833075, "grad_norm": 5.28780648642352, "learning_rate": 9.75259823571828e-10, "loss": 16.9239, "step": 54374 }, { "epoch": 0.9939313067797539, "grad_norm": 6.687876756421091, "learning_rate": 9.69422330913128e-10, "loss": 17.2765, "step": 54375 }, { "epoch": 0.9939495859762005, "grad_norm": 5.257831817829349, "learning_rate": 9.636023594228327e-10, "loss": 17.2608, "step": 54376 }, { "epoch": 0.993967865172647, "grad_norm": 7.163409899973888, "learning_rate": 9.577999091214818e-10, "loss": 17.7007, "step": 54377 }, { "epoch": 0.9939861443690935, "grad_norm": 6.605173139083232, "learning_rate": 9.52014980029059e-10, "loss": 17.3895, "step": 54378 }, { "epoch": 0.9940044235655401, "grad_norm": 8.663583020365714, "learning_rate": 9.462475721661036e-10, "loss": 17.6773, "step": 54379 }, { "epoch": 0.9940227027619866, "grad_norm": 6.005603354554796, "learning_rate": 9.404976855525994e-10, "loss": 16.965, "step": 54380 }, { "epoch": 0.9940409819584332, "grad_norm": 5.217189088281136, "learning_rate": 9.347653202085305e-10, "loss": 16.9564, "step": 54381 }, { "epoch": 0.9940592611548796, "grad_norm": 6.452406100819366, "learning_rate": 9.290504761549912e-10, "loss": 17.2872, "step": 54382 }, { "epoch": 0.9940775403513261, "grad_norm": 5.570818375967252, "learning_rate": 9.233531534108553e-10, "loss": 16.8781, "step": 54383 }, { "epoch": 0.9940958195477727, "grad_norm": 6.382993270929429, "learning_rate": 9.176733519966618e-10, "loss": 17.357, "step": 54384 }, { "epoch": 0.9941140987442192, "grad_norm": 5.878199288651087, "learning_rate": 9.120110719323949e-10, "loss": 17.144, "step": 54385 }, { "epoch": 0.9941323779406658, "grad_norm": 5.807758324753031, "learning_rate": 9.063663132374833e-10, "loss": 17.2625, "step": 54386 }, { "epoch": 0.9941506571371123, "grad_norm": 6.348018544673815, "learning_rate": 9.007390759324664e-10, "loss": 17.3833, "step": 54387 }, { "epoch": 0.9941689363335587, "grad_norm": 6.3026979556845815, "learning_rate": 8.951293600362176e-10, "loss": 17.3286, "step": 54388 }, { "epoch": 0.9941872155300053, "grad_norm": 7.4744080190623485, "learning_rate": 8.895371655687213e-10, "loss": 17.7159, "step": 54389 }, { "epoch": 0.9942054947264518, "grad_norm": 6.774627155491698, "learning_rate": 8.839624925494062e-10, "loss": 17.4423, "step": 54390 }, { "epoch": 0.9942237739228984, "grad_norm": 6.5972570427400195, "learning_rate": 8.784053409982562e-10, "loss": 17.1118, "step": 54391 }, { "epoch": 0.9942420531193449, "grad_norm": 6.483384596994132, "learning_rate": 8.728657109341454e-10, "loss": 17.7159, "step": 54392 }, { "epoch": 0.9942603323157914, "grad_norm": 5.138296071982158, "learning_rate": 8.673436023776127e-10, "loss": 16.9225, "step": 54393 }, { "epoch": 0.994278611512238, "grad_norm": 5.252005148050641, "learning_rate": 8.618390153464217e-10, "loss": 17.2455, "step": 54394 }, { "epoch": 0.9942968907086844, "grad_norm": 6.749416721960384, "learning_rate": 8.563519498611117e-10, "loss": 17.6006, "step": 54395 }, { "epoch": 0.994315169905131, "grad_norm": 7.420472929272197, "learning_rate": 8.508824059405563e-10, "loss": 17.4447, "step": 54396 }, { "epoch": 0.9943334491015775, "grad_norm": 7.057943714004557, "learning_rate": 8.454303836036293e-10, "loss": 17.4083, "step": 54397 }, { "epoch": 0.994351728298024, "grad_norm": 6.424127090830999, "learning_rate": 8.399958828692045e-10, "loss": 17.5518, "step": 54398 }, { "epoch": 0.9943700074944706, "grad_norm": 5.663654559702362, "learning_rate": 8.34578903757266e-10, "loss": 17.0532, "step": 54399 }, { "epoch": 0.9943882866909171, "grad_norm": 7.678930767412831, "learning_rate": 8.291794462866875e-10, "loss": 18.1328, "step": 54400 }, { "epoch": 0.9944065658873635, "grad_norm": 5.223571670525646, "learning_rate": 8.237975104757878e-10, "loss": 16.922, "step": 54401 }, { "epoch": 0.9944248450838101, "grad_norm": 6.74359554251469, "learning_rate": 8.184330963434405e-10, "loss": 17.2864, "step": 54402 }, { "epoch": 0.9944431242802566, "grad_norm": 7.395331674435502, "learning_rate": 8.130862039085197e-10, "loss": 17.4877, "step": 54403 }, { "epoch": 0.9944614034767032, "grad_norm": 5.34230534156505, "learning_rate": 8.07756833190454e-10, "loss": 16.9932, "step": 54404 }, { "epoch": 0.9944796826731497, "grad_norm": 5.303398724812577, "learning_rate": 8.02444984207007e-10, "loss": 16.9359, "step": 54405 }, { "epoch": 0.9944979618695962, "grad_norm": 7.264457195499448, "learning_rate": 7.971506569770527e-10, "loss": 17.4581, "step": 54406 }, { "epoch": 0.9945162410660427, "grad_norm": 6.146685138301114, "learning_rate": 7.918738515200197e-10, "loss": 17.1981, "step": 54407 }, { "epoch": 0.9945345202624892, "grad_norm": 6.612986607279172, "learning_rate": 7.866145678531167e-10, "loss": 17.2621, "step": 54408 }, { "epoch": 0.9945527994589358, "grad_norm": 5.15676028815498, "learning_rate": 7.813728059952174e-10, "loss": 16.9893, "step": 54409 }, { "epoch": 0.9945710786553823, "grad_norm": 5.666306010736511, "learning_rate": 7.761485659646406e-10, "loss": 16.8911, "step": 54410 }, { "epoch": 0.9945893578518288, "grad_norm": 4.977687785430883, "learning_rate": 7.709418477802599e-10, "loss": 16.9444, "step": 54411 }, { "epoch": 0.9946076370482754, "grad_norm": 6.053201101209886, "learning_rate": 7.65752651459839e-10, "loss": 17.1178, "step": 54412 }, { "epoch": 0.9946259162447219, "grad_norm": 7.649378861597939, "learning_rate": 7.605809770216965e-10, "loss": 17.8157, "step": 54413 }, { "epoch": 0.9946441954411684, "grad_norm": 6.614309208903715, "learning_rate": 7.554268244835961e-10, "loss": 17.4436, "step": 54414 }, { "epoch": 0.9946624746376149, "grad_norm": 5.877196060908908, "learning_rate": 7.502901938644113e-10, "loss": 17.2525, "step": 54415 }, { "epoch": 0.9946807538340614, "grad_norm": 4.440420480516495, "learning_rate": 7.45171085181351e-10, "loss": 16.6196, "step": 54416 }, { "epoch": 0.994699033030508, "grad_norm": 5.287576148831, "learning_rate": 7.400694984527335e-10, "loss": 16.8787, "step": 54417 }, { "epoch": 0.9947173122269545, "grad_norm": 4.562063079897998, "learning_rate": 7.349854336963225e-10, "loss": 16.7718, "step": 54418 }, { "epoch": 0.9947355914234011, "grad_norm": 8.03140663600034, "learning_rate": 7.299188909298815e-10, "loss": 18.0724, "step": 54419 }, { "epoch": 0.9947538706198475, "grad_norm": 5.391866715121262, "learning_rate": 7.248698701711743e-10, "loss": 17.0337, "step": 54420 }, { "epoch": 0.994772149816294, "grad_norm": 5.348675128217077, "learning_rate": 7.198383714379642e-10, "loss": 17.0317, "step": 54421 }, { "epoch": 0.9947904290127406, "grad_norm": 6.658153123930843, "learning_rate": 7.148243947480149e-10, "loss": 17.2468, "step": 54422 }, { "epoch": 0.9948087082091871, "grad_norm": 7.766541496307229, "learning_rate": 7.0982794011909e-10, "loss": 17.7785, "step": 54423 }, { "epoch": 0.9948269874056337, "grad_norm": 4.81847775636746, "learning_rate": 7.048490075678426e-10, "loss": 16.7397, "step": 54424 }, { "epoch": 0.9948452666020802, "grad_norm": 5.579926962262649, "learning_rate": 6.998875971125918e-10, "loss": 17.0185, "step": 54425 }, { "epoch": 0.9948635457985266, "grad_norm": 6.186691467556325, "learning_rate": 6.949437087705457e-10, "loss": 17.2754, "step": 54426 }, { "epoch": 0.9948818249949732, "grad_norm": 5.983060006174252, "learning_rate": 6.900173425589129e-10, "loss": 17.2758, "step": 54427 }, { "epoch": 0.9949001041914197, "grad_norm": 5.90175088016551, "learning_rate": 6.851084984949019e-10, "loss": 16.9949, "step": 54428 }, { "epoch": 0.9949183833878663, "grad_norm": 7.3668175447638, "learning_rate": 6.80217176595166e-10, "loss": 18.3585, "step": 54429 }, { "epoch": 0.9949366625843128, "grad_norm": 6.936770400725446, "learning_rate": 6.753433768780237e-10, "loss": 17.3034, "step": 54430 }, { "epoch": 0.9949549417807593, "grad_norm": 5.396304188132332, "learning_rate": 6.704870993601287e-10, "loss": 17.0428, "step": 54431 }, { "epoch": 0.9949732209772059, "grad_norm": 6.133154291919615, "learning_rate": 6.65648344058134e-10, "loss": 17.174, "step": 54432 }, { "epoch": 0.9949915001736523, "grad_norm": 5.428165912012269, "learning_rate": 6.608271109892484e-10, "loss": 17.2446, "step": 54433 }, { "epoch": 0.9950097793700989, "grad_norm": 7.339440950348139, "learning_rate": 6.560234001701249e-10, "loss": 17.6866, "step": 54434 }, { "epoch": 0.9950280585665454, "grad_norm": 4.526900079629503, "learning_rate": 6.512372116179722e-10, "loss": 16.707, "step": 54435 }, { "epoch": 0.9950463377629919, "grad_norm": 6.303571789430032, "learning_rate": 6.464685453494435e-10, "loss": 17.5073, "step": 54436 }, { "epoch": 0.9950646169594385, "grad_norm": 7.869787152997465, "learning_rate": 6.417174013811922e-10, "loss": 18.1486, "step": 54437 }, { "epoch": 0.995082896155885, "grad_norm": 5.482059099849404, "learning_rate": 6.369837797298717e-10, "loss": 16.9678, "step": 54438 }, { "epoch": 0.9951011753523316, "grad_norm": 5.283078588571251, "learning_rate": 6.322676804121352e-10, "loss": 17.1128, "step": 54439 }, { "epoch": 0.995119454548778, "grad_norm": 6.51339315522227, "learning_rate": 6.275691034446362e-10, "loss": 17.3606, "step": 54440 }, { "epoch": 0.9951377337452245, "grad_norm": 5.775151467030729, "learning_rate": 6.22888048843473e-10, "loss": 17.1357, "step": 54441 }, { "epoch": 0.9951560129416711, "grad_norm": 6.49522789950704, "learning_rate": 6.182245166252987e-10, "loss": 17.4757, "step": 54442 }, { "epoch": 0.9951742921381176, "grad_norm": 7.6557074526387074, "learning_rate": 6.135785068062117e-10, "loss": 17.6397, "step": 54443 }, { "epoch": 0.9951925713345642, "grad_norm": 6.19630704807048, "learning_rate": 6.089500194028653e-10, "loss": 17.4182, "step": 54444 }, { "epoch": 0.9952108505310107, "grad_norm": 6.187850325384975, "learning_rate": 6.043390544313576e-10, "loss": 17.1648, "step": 54445 }, { "epoch": 0.9952291297274571, "grad_norm": 5.623676108661895, "learning_rate": 5.997456119077871e-10, "loss": 16.9603, "step": 54446 }, { "epoch": 0.9952474089239037, "grad_norm": 5.766462373261588, "learning_rate": 5.95169691848807e-10, "loss": 17.031, "step": 54447 }, { "epoch": 0.9952656881203502, "grad_norm": 7.053587989171821, "learning_rate": 5.906112942694053e-10, "loss": 17.6471, "step": 54448 }, { "epoch": 0.9952839673167968, "grad_norm": 5.788501434234237, "learning_rate": 5.860704191862355e-10, "loss": 17.3267, "step": 54449 }, { "epoch": 0.9953022465132433, "grad_norm": 6.928250800135492, "learning_rate": 5.815470666148404e-10, "loss": 17.1993, "step": 54450 }, { "epoch": 0.9953205257096898, "grad_norm": 7.665010502693649, "learning_rate": 5.770412365713185e-10, "loss": 17.911, "step": 54451 }, { "epoch": 0.9953388049061364, "grad_norm": 6.334347858643796, "learning_rate": 5.725529290712128e-10, "loss": 17.7183, "step": 54452 }, { "epoch": 0.9953570841025828, "grad_norm": 6.399146028209555, "learning_rate": 5.680821441311767e-10, "loss": 17.1269, "step": 54453 }, { "epoch": 0.9953753632990294, "grad_norm": 5.871261926750341, "learning_rate": 5.636288817656432e-10, "loss": 17.4287, "step": 54454 }, { "epoch": 0.9953936424954759, "grad_norm": 5.69688012987935, "learning_rate": 5.591931419907104e-10, "loss": 16.7954, "step": 54455 }, { "epoch": 0.9954119216919224, "grad_norm": 5.670631308996316, "learning_rate": 5.547749248224766e-10, "loss": 17.1389, "step": 54456 }, { "epoch": 0.995430200888369, "grad_norm": 8.650264434658098, "learning_rate": 5.503742302753745e-10, "loss": 18.0105, "step": 54457 }, { "epoch": 0.9954484800848155, "grad_norm": 5.9992650347928755, "learning_rate": 5.459910583655025e-10, "loss": 17.2806, "step": 54458 }, { "epoch": 0.995466759281262, "grad_norm": 6.839615687748347, "learning_rate": 5.416254091084039e-10, "loss": 17.4417, "step": 54459 }, { "epoch": 0.9954850384777085, "grad_norm": 7.297306350611921, "learning_rate": 5.372772825190664e-10, "loss": 17.64, "step": 54460 }, { "epoch": 0.995503317674155, "grad_norm": 6.703034279087307, "learning_rate": 5.329466786124781e-10, "loss": 17.4133, "step": 54461 }, { "epoch": 0.9955215968706016, "grad_norm": 7.0167473600447385, "learning_rate": 5.286335974041823e-10, "loss": 17.5199, "step": 54462 }, { "epoch": 0.9955398760670481, "grad_norm": 6.976624532225125, "learning_rate": 5.243380389091668e-10, "loss": 17.5749, "step": 54463 }, { "epoch": 0.9955581552634947, "grad_norm": 5.715401208650679, "learning_rate": 5.200600031424197e-10, "loss": 16.9386, "step": 54464 }, { "epoch": 0.9955764344599412, "grad_norm": 5.133209904664402, "learning_rate": 5.157994901189289e-10, "loss": 16.6332, "step": 54465 }, { "epoch": 0.9955947136563876, "grad_norm": 7.672655821170596, "learning_rate": 5.115564998542377e-10, "loss": 17.8451, "step": 54466 }, { "epoch": 0.9956129928528342, "grad_norm": 5.815537984222324, "learning_rate": 5.073310323622238e-10, "loss": 16.9133, "step": 54467 }, { "epoch": 0.9956312720492807, "grad_norm": 5.828969798195553, "learning_rate": 5.031230876584303e-10, "loss": 17.2166, "step": 54468 }, { "epoch": 0.9956495512457272, "grad_norm": 4.753236673396973, "learning_rate": 4.989326657572902e-10, "loss": 16.7785, "step": 54469 }, { "epoch": 0.9956678304421738, "grad_norm": 5.272521985507489, "learning_rate": 4.947597666732362e-10, "loss": 16.8939, "step": 54470 }, { "epoch": 0.9956861096386203, "grad_norm": 7.049611371168208, "learning_rate": 4.906043904218116e-10, "loss": 17.6484, "step": 54471 }, { "epoch": 0.9957043888350668, "grad_norm": 5.734952415337532, "learning_rate": 4.864665370168942e-10, "loss": 17.1904, "step": 54472 }, { "epoch": 0.9957226680315133, "grad_norm": 6.173195320182789, "learning_rate": 4.823462064723617e-10, "loss": 17.3373, "step": 54473 }, { "epoch": 0.9957409472279598, "grad_norm": 5.496291219317635, "learning_rate": 4.782433988043122e-10, "loss": 17.0854, "step": 54474 }, { "epoch": 0.9957592264244064, "grad_norm": 5.632856186392485, "learning_rate": 4.741581140255136e-10, "loss": 17.1423, "step": 54475 }, { "epoch": 0.9957775056208529, "grad_norm": 5.6245765806689345, "learning_rate": 4.700903521509537e-10, "loss": 17.0761, "step": 54476 }, { "epoch": 0.9957957848172995, "grad_norm": 15.314512645466834, "learning_rate": 4.660401131950654e-10, "loss": 18.0041, "step": 54477 }, { "epoch": 0.995814064013746, "grad_norm": 6.698163150053168, "learning_rate": 4.620073971722816e-10, "loss": 17.4639, "step": 54478 }, { "epoch": 0.9958323432101924, "grad_norm": 6.559503382051864, "learning_rate": 4.5799220409592503e-10, "loss": 17.5473, "step": 54479 }, { "epoch": 0.995850622406639, "grad_norm": 8.332839930166019, "learning_rate": 4.539945339804286e-10, "loss": 18.3405, "step": 54480 }, { "epoch": 0.9958689016030855, "grad_norm": 8.617483968232873, "learning_rate": 4.5001438683967e-10, "loss": 17.868, "step": 54481 }, { "epoch": 0.9958871807995321, "grad_norm": 6.559548269538251, "learning_rate": 4.460517626880823e-10, "loss": 17.5605, "step": 54482 }, { "epoch": 0.9959054599959786, "grad_norm": 6.432363313941321, "learning_rate": 4.4210666153954307e-10, "loss": 17.2853, "step": 54483 }, { "epoch": 0.995923739192425, "grad_norm": 7.3100823973758695, "learning_rate": 4.3817908340682004e-10, "loss": 17.7499, "step": 54484 }, { "epoch": 0.9959420183888716, "grad_norm": 6.355472578993088, "learning_rate": 4.342690283049011e-10, "loss": 17.337, "step": 54485 }, { "epoch": 0.9959602975853181, "grad_norm": 7.447869147261474, "learning_rate": 4.303764962471091e-10, "loss": 17.5085, "step": 54486 }, { "epoch": 0.9959785767817647, "grad_norm": 7.350395997352679, "learning_rate": 4.2650148724676655e-10, "loss": 17.5893, "step": 54487 }, { "epoch": 0.9959968559782112, "grad_norm": 5.1366001450894405, "learning_rate": 4.226440013177513e-10, "loss": 17.0751, "step": 54488 }, { "epoch": 0.9960151351746577, "grad_norm": 6.499450151072195, "learning_rate": 4.18804038473386e-10, "loss": 17.1232, "step": 54489 }, { "epoch": 0.9960334143711043, "grad_norm": 5.705065378385702, "learning_rate": 4.149815987275485e-10, "loss": 16.931, "step": 54490 }, { "epoch": 0.9960516935675507, "grad_norm": 4.520954430410976, "learning_rate": 4.1117668209300633e-10, "loss": 16.8895, "step": 54491 }, { "epoch": 0.9960699727639973, "grad_norm": 7.179845357348762, "learning_rate": 4.0738928858363727e-10, "loss": 17.3028, "step": 54492 }, { "epoch": 0.9960882519604438, "grad_norm": 6.263743037940148, "learning_rate": 4.0361941821276394e-10, "loss": 17.5554, "step": 54493 }, { "epoch": 0.9961065311568903, "grad_norm": 4.693923020019431, "learning_rate": 3.99867070993154e-10, "loss": 16.8016, "step": 54494 }, { "epoch": 0.9961248103533369, "grad_norm": 6.338030601128453, "learning_rate": 3.961322469381301e-10, "loss": 17.5701, "step": 54495 }, { "epoch": 0.9961430895497834, "grad_norm": 7.228911373321436, "learning_rate": 3.9241494606101495e-10, "loss": 17.6693, "step": 54496 }, { "epoch": 0.99616136874623, "grad_norm": 8.098608641621283, "learning_rate": 3.887151683740209e-10, "loss": 17.7476, "step": 54497 }, { "epoch": 0.9961796479426764, "grad_norm": 6.805397593394626, "learning_rate": 3.8503291389158095e-10, "loss": 17.5094, "step": 54498 }, { "epoch": 0.9961979271391229, "grad_norm": 6.248941957330666, "learning_rate": 3.8136818262535236e-10, "loss": 17.5665, "step": 54499 }, { "epoch": 0.9962162063355695, "grad_norm": 6.134894316411477, "learning_rate": 3.777209745886579e-10, "loss": 17.2282, "step": 54500 }, { "epoch": 0.996234485532016, "grad_norm": 6.858465731093218, "learning_rate": 3.740912897937099e-10, "loss": 17.6929, "step": 54501 }, { "epoch": 0.9962527647284626, "grad_norm": 5.694244728710937, "learning_rate": 3.704791282543863e-10, "loss": 17.245, "step": 54502 }, { "epoch": 0.9962710439249091, "grad_norm": 5.878166343485834, "learning_rate": 3.6688448998234427e-10, "loss": 17.2042, "step": 54503 }, { "epoch": 0.9962893231213555, "grad_norm": 9.343003141080134, "learning_rate": 3.6330737499090665e-10, "loss": 18.362, "step": 54504 }, { "epoch": 0.9963076023178021, "grad_norm": 5.165275469369117, "learning_rate": 3.5974778329173067e-10, "loss": 16.8967, "step": 54505 }, { "epoch": 0.9963258815142486, "grad_norm": 6.721003606049182, "learning_rate": 3.5620571489813906e-10, "loss": 17.2715, "step": 54506 }, { "epoch": 0.9963441607106952, "grad_norm": 7.539594010103785, "learning_rate": 3.526811698217891e-10, "loss": 17.8846, "step": 54507 }, { "epoch": 0.9963624399071417, "grad_norm": 4.512450246149366, "learning_rate": 3.4917414807600356e-10, "loss": 16.8171, "step": 54508 }, { "epoch": 0.9963807191035882, "grad_norm": 7.540075815232244, "learning_rate": 3.456846496718846e-10, "loss": 17.8328, "step": 54509 }, { "epoch": 0.9963989983000348, "grad_norm": 5.002576131630012, "learning_rate": 3.42212674622755e-10, "loss": 16.8651, "step": 54510 }, { "epoch": 0.9964172774964812, "grad_norm": 6.094787761126215, "learning_rate": 3.3875822294027197e-10, "loss": 17.319, "step": 54511 }, { "epoch": 0.9964355566929278, "grad_norm": 6.634675517039242, "learning_rate": 3.3532129463664796e-10, "loss": 17.5354, "step": 54512 }, { "epoch": 0.9964538358893743, "grad_norm": 4.857964839901264, "learning_rate": 3.319018897235404e-10, "loss": 16.8162, "step": 54513 }, { "epoch": 0.9964721150858208, "grad_norm": 6.366857821590624, "learning_rate": 3.2850000821371685e-10, "loss": 17.2411, "step": 54514 }, { "epoch": 0.9964903942822674, "grad_norm": 10.224937246374887, "learning_rate": 3.251156501182795e-10, "loss": 17.6744, "step": 54515 }, { "epoch": 0.9965086734787139, "grad_norm": 6.6525878576553374, "learning_rate": 3.2174881544944083e-10, "loss": 17.3239, "step": 54516 }, { "epoch": 0.9965269526751604, "grad_norm": 5.331127579155074, "learning_rate": 3.183995042188581e-10, "loss": 17.2548, "step": 54517 }, { "epoch": 0.9965452318716069, "grad_norm": 6.074982628172998, "learning_rate": 3.150677164387439e-10, "loss": 17.1128, "step": 54518 }, { "epoch": 0.9965635110680534, "grad_norm": 7.310714302748975, "learning_rate": 3.117534521207555e-10, "loss": 17.9488, "step": 54519 }, { "epoch": 0.9965817902645, "grad_norm": 6.591704163767108, "learning_rate": 3.084567112759951e-10, "loss": 16.9759, "step": 54520 }, { "epoch": 0.9966000694609465, "grad_norm": 6.3845096545759255, "learning_rate": 3.0517749391612005e-10, "loss": 17.6233, "step": 54521 }, { "epoch": 0.9966183486573931, "grad_norm": 8.873037216606022, "learning_rate": 3.0191580005278775e-10, "loss": 17.7742, "step": 54522 }, { "epoch": 0.9966366278538396, "grad_norm": 6.188701062856353, "learning_rate": 2.986716296971004e-10, "loss": 17.2132, "step": 54523 }, { "epoch": 0.996654907050286, "grad_norm": 7.5947987817006775, "learning_rate": 2.9544498286127044e-10, "loss": 17.5345, "step": 54524 }, { "epoch": 0.9966731862467326, "grad_norm": 5.594910965041725, "learning_rate": 2.92235859555845e-10, "loss": 17.1938, "step": 54525 }, { "epoch": 0.9966914654431791, "grad_norm": 6.678293147221805, "learning_rate": 2.890442597919263e-10, "loss": 17.5509, "step": 54526 }, { "epoch": 0.9967097446396257, "grad_norm": 5.362857730107931, "learning_rate": 2.858701835811717e-10, "loss": 17.1708, "step": 54527 }, { "epoch": 0.9967280238360722, "grad_norm": 6.716278451907546, "learning_rate": 2.827136309352385e-10, "loss": 17.3218, "step": 54528 }, { "epoch": 0.9967463030325187, "grad_norm": 5.339575545636456, "learning_rate": 2.7957460186356364e-10, "loss": 17.127, "step": 54529 }, { "epoch": 0.9967645822289652, "grad_norm": 7.011304407928288, "learning_rate": 2.7645309637891473e-10, "loss": 17.7801, "step": 54530 }, { "epoch": 0.9967828614254117, "grad_norm": 4.642348041112225, "learning_rate": 2.7334911449128364e-10, "loss": 16.7824, "step": 54531 }, { "epoch": 0.9968011406218583, "grad_norm": 6.917044975412388, "learning_rate": 2.702626562112176e-10, "loss": 17.1128, "step": 54532 }, { "epoch": 0.9968194198183048, "grad_norm": 7.896228030590086, "learning_rate": 2.6719372155037393e-10, "loss": 18.0533, "step": 54533 }, { "epoch": 0.9968376990147513, "grad_norm": 7.26109373726604, "learning_rate": 2.6414231051929973e-10, "loss": 17.5932, "step": 54534 }, { "epoch": 0.9968559782111979, "grad_norm": 7.155480469321664, "learning_rate": 2.6110842312854213e-10, "loss": 17.3414, "step": 54535 }, { "epoch": 0.9968742574076443, "grad_norm": 6.003296140389537, "learning_rate": 2.5809205938864824e-10, "loss": 17.5344, "step": 54536 }, { "epoch": 0.9968925366040908, "grad_norm": 7.1208985703489835, "learning_rate": 2.550932193101652e-10, "loss": 17.4711, "step": 54537 }, { "epoch": 0.9969108158005374, "grad_norm": 9.522177039969485, "learning_rate": 2.5211190290364007e-10, "loss": 17.8339, "step": 54538 }, { "epoch": 0.9969290949969839, "grad_norm": 6.424196178871235, "learning_rate": 2.4914811017962e-10, "loss": 17.3688, "step": 54539 }, { "epoch": 0.9969473741934305, "grad_norm": 6.072020515682648, "learning_rate": 2.462018411486522e-10, "loss": 17.3594, "step": 54540 }, { "epoch": 0.996965653389877, "grad_norm": 4.9742020553921105, "learning_rate": 2.4327309582072857e-10, "loss": 16.9952, "step": 54541 }, { "epoch": 0.9969839325863235, "grad_norm": 7.15981379633424, "learning_rate": 2.403618742063962e-10, "loss": 17.4617, "step": 54542 }, { "epoch": 0.99700221178277, "grad_norm": 5.8984679044342245, "learning_rate": 2.3746817631620234e-10, "loss": 17.2372, "step": 54543 }, { "epoch": 0.9970204909792165, "grad_norm": 6.492648691034954, "learning_rate": 2.3459200215902865e-10, "loss": 17.295, "step": 54544 }, { "epoch": 0.9970387701756631, "grad_norm": 7.285258506553624, "learning_rate": 2.3173335174597744e-10, "loss": 17.5185, "step": 54545 }, { "epoch": 0.9970570493721096, "grad_norm": 5.355387747068443, "learning_rate": 2.2889222508704068e-10, "loss": 16.9319, "step": 54546 }, { "epoch": 0.9970753285685561, "grad_norm": 7.322043070441888, "learning_rate": 2.2606862219221038e-10, "loss": 17.8451, "step": 54547 }, { "epoch": 0.9970936077650027, "grad_norm": 6.102813597721722, "learning_rate": 2.2326254307036832e-10, "loss": 17.3373, "step": 54548 }, { "epoch": 0.9971118869614491, "grad_norm": 7.41296995989899, "learning_rate": 2.2047398773261675e-10, "loss": 17.4469, "step": 54549 }, { "epoch": 0.9971301661578957, "grad_norm": 5.908402907372695, "learning_rate": 2.1770295618839254e-10, "loss": 17.4627, "step": 54550 }, { "epoch": 0.9971484453543422, "grad_norm": 7.221623440636353, "learning_rate": 2.1494944844713262e-10, "loss": 17.664, "step": 54551 }, { "epoch": 0.9971667245507887, "grad_norm": 5.163612671166307, "learning_rate": 2.1221346451827386e-10, "loss": 16.9378, "step": 54552 }, { "epoch": 0.9971850037472353, "grad_norm": 4.514720111152025, "learning_rate": 2.0949500441236338e-10, "loss": 16.6832, "step": 54553 }, { "epoch": 0.9972032829436818, "grad_norm": 6.620623156917401, "learning_rate": 2.0679406813772783e-10, "loss": 17.4367, "step": 54554 }, { "epoch": 0.9972215621401284, "grad_norm": 7.905179166697738, "learning_rate": 2.041106557049144e-10, "loss": 18.1847, "step": 54555 }, { "epoch": 0.9972398413365748, "grad_norm": 5.114038492955716, "learning_rate": 2.014447671222497e-10, "loss": 16.8949, "step": 54556 }, { "epoch": 0.9972581205330213, "grad_norm": 6.631575780347629, "learning_rate": 1.9879640240028087e-10, "loss": 17.4795, "step": 54557 }, { "epoch": 0.9972763997294679, "grad_norm": 7.12952061912992, "learning_rate": 1.9616556154733458e-10, "loss": 17.2507, "step": 54558 }, { "epoch": 0.9972946789259144, "grad_norm": 5.468032261695173, "learning_rate": 1.9355224457284772e-10, "loss": 17.2397, "step": 54559 }, { "epoch": 0.997312958122361, "grad_norm": 6.857053889836651, "learning_rate": 1.909564514862572e-10, "loss": 17.3483, "step": 54560 }, { "epoch": 0.9973312373188075, "grad_norm": 6.40836868562433, "learning_rate": 1.8837818229644478e-10, "loss": 17.5368, "step": 54561 }, { "epoch": 0.9973495165152539, "grad_norm": 6.363565351538367, "learning_rate": 1.8581743701284738e-10, "loss": 17.3038, "step": 54562 }, { "epoch": 0.9973677957117005, "grad_norm": 5.119979639969559, "learning_rate": 1.8327421564379166e-10, "loss": 16.8502, "step": 54563 }, { "epoch": 0.997386074908147, "grad_norm": 6.934047451259871, "learning_rate": 1.8074851819815942e-10, "loss": 17.5176, "step": 54564 }, { "epoch": 0.9974043541045936, "grad_norm": 6.667978264525192, "learning_rate": 1.7824034468594264e-10, "loss": 17.7422, "step": 54565 }, { "epoch": 0.9974226333010401, "grad_norm": 5.753215243509562, "learning_rate": 1.757496951143578e-10, "loss": 17.0458, "step": 54566 }, { "epoch": 0.9974409124974866, "grad_norm": 4.761230562477752, "learning_rate": 1.732765694933969e-10, "loss": 17.0541, "step": 54567 }, { "epoch": 0.9974591916939332, "grad_norm": 5.4455857800328875, "learning_rate": 1.7082096783083146e-10, "loss": 17.0677, "step": 54568 }, { "epoch": 0.9974774708903796, "grad_norm": 6.340451663066294, "learning_rate": 1.6838289013609842e-10, "loss": 17.2078, "step": 54569 }, { "epoch": 0.9974957500868262, "grad_norm": 5.316028301462963, "learning_rate": 1.6596233641696936e-10, "loss": 16.923, "step": 54570 }, { "epoch": 0.9975140292832727, "grad_norm": 5.831439026015873, "learning_rate": 1.63559306682326e-10, "loss": 17.1073, "step": 54571 }, { "epoch": 0.9975323084797192, "grad_norm": 6.141107006403569, "learning_rate": 1.6117380094105019e-10, "loss": 17.2404, "step": 54572 }, { "epoch": 0.9975505876761658, "grad_norm": 6.636449539456727, "learning_rate": 1.588058192003583e-10, "loss": 17.6397, "step": 54573 }, { "epoch": 0.9975688668726123, "grad_norm": 6.021124162244606, "learning_rate": 1.5645536146968732e-10, "loss": 17.1714, "step": 54574 }, { "epoch": 0.9975871460690588, "grad_norm": 6.565793610271754, "learning_rate": 1.5412242775625364e-10, "loss": 17.5462, "step": 54575 }, { "epoch": 0.9976054252655053, "grad_norm": 4.896094088268906, "learning_rate": 1.5180701806893906e-10, "loss": 16.9553, "step": 54576 }, { "epoch": 0.9976237044619518, "grad_norm": 5.670173308627631, "learning_rate": 1.4950913241607024e-10, "loss": 17.1288, "step": 54577 }, { "epoch": 0.9976419836583984, "grad_norm": 5.857685855719577, "learning_rate": 1.4722877080486363e-10, "loss": 17.0278, "step": 54578 }, { "epoch": 0.9976602628548449, "grad_norm": 6.146899115222561, "learning_rate": 1.4496593324364594e-10, "loss": 17.477, "step": 54579 }, { "epoch": 0.9976785420512915, "grad_norm": 7.945044112973796, "learning_rate": 1.427206197407438e-10, "loss": 17.3322, "step": 54580 }, { "epoch": 0.997696821247738, "grad_norm": 4.869556624862531, "learning_rate": 1.404928303039288e-10, "loss": 16.8504, "step": 54581 }, { "epoch": 0.9977151004441844, "grad_norm": 6.936533167748476, "learning_rate": 1.3828256494041737e-10, "loss": 18.2466, "step": 54582 }, { "epoch": 0.997733379640631, "grad_norm": 7.071530733510596, "learning_rate": 1.360898236585362e-10, "loss": 17.5485, "step": 54583 }, { "epoch": 0.9977516588370775, "grad_norm": 3.8534099617196085, "learning_rate": 1.339146064655017e-10, "loss": 16.5083, "step": 54584 }, { "epoch": 0.9977699380335241, "grad_norm": 5.707199851030734, "learning_rate": 1.3175691336964059e-10, "loss": 16.9577, "step": 54585 }, { "epoch": 0.9977882172299706, "grad_norm": 5.798816597098755, "learning_rate": 1.296167443781693e-10, "loss": 17.1142, "step": 54586 }, { "epoch": 0.9978064964264171, "grad_norm": 6.437827867007838, "learning_rate": 1.2749409949830426e-10, "loss": 17.1651, "step": 54587 }, { "epoch": 0.9978247756228636, "grad_norm": 7.877274100121252, "learning_rate": 1.2538897873726198e-10, "loss": 17.6684, "step": 54588 }, { "epoch": 0.9978430548193101, "grad_norm": 5.814865647759007, "learning_rate": 1.2330138210336905e-10, "loss": 17.0764, "step": 54589 }, { "epoch": 0.9978613340157567, "grad_norm": 7.073849228035675, "learning_rate": 1.212313096032869e-10, "loss": 17.6645, "step": 54590 }, { "epoch": 0.9978796132122032, "grad_norm": 6.154762196516304, "learning_rate": 1.19178761244787e-10, "loss": 16.9914, "step": 54591 }, { "epoch": 0.9978978924086497, "grad_norm": 6.132306791702389, "learning_rate": 1.1714373703397564e-10, "loss": 17.2396, "step": 54592 }, { "epoch": 0.9979161716050963, "grad_norm": 6.0235733705950585, "learning_rate": 1.1512623697917946e-10, "loss": 17.4103, "step": 54593 }, { "epoch": 0.9979344508015427, "grad_norm": 6.076941385160907, "learning_rate": 1.1312626108705982e-10, "loss": 17.2826, "step": 54594 }, { "epoch": 0.9979527299979893, "grad_norm": 5.661018597063694, "learning_rate": 1.1114380936483315e-10, "loss": 17.1738, "step": 54595 }, { "epoch": 0.9979710091944358, "grad_norm": 7.213856371411356, "learning_rate": 1.091788818186057e-10, "loss": 17.8318, "step": 54596 }, { "epoch": 0.9979892883908823, "grad_norm": 7.223324626148723, "learning_rate": 1.0723147845614901e-10, "loss": 17.4204, "step": 54597 }, { "epoch": 0.9980075675873289, "grad_norm": 5.445460273355545, "learning_rate": 1.0530159928356931e-10, "loss": 17.0209, "step": 54598 }, { "epoch": 0.9980258467837754, "grad_norm": 7.545726278364186, "learning_rate": 1.0338924430863817e-10, "loss": 17.6797, "step": 54599 }, { "epoch": 0.998044125980222, "grad_norm": 5.865159976014049, "learning_rate": 1.014944135369067e-10, "loss": 17.2626, "step": 54600 }, { "epoch": 0.9980624051766684, "grad_norm": 8.557806533664744, "learning_rate": 9.961710697559135e-11, "loss": 18.2188, "step": 54601 }, { "epoch": 0.9980806843731149, "grad_norm": 7.606158432612428, "learning_rate": 9.775732463190856e-11, "loss": 17.4643, "step": 54602 }, { "epoch": 0.9980989635695615, "grad_norm": 8.034601910104831, "learning_rate": 9.591506651085436e-11, "loss": 17.8671, "step": 54603 }, { "epoch": 0.998117242766008, "grad_norm": 6.447011771654313, "learning_rate": 9.409033262020028e-11, "loss": 17.1477, "step": 54604 }, { "epoch": 0.9981355219624545, "grad_norm": 6.00612052314779, "learning_rate": 9.228312296605258e-11, "loss": 17.4596, "step": 54605 }, { "epoch": 0.9981538011589011, "grad_norm": 6.189077554731924, "learning_rate": 9.049343755396234e-11, "loss": 17.0595, "step": 54606 }, { "epoch": 0.9981720803553475, "grad_norm": 7.433367599218094, "learning_rate": 8.872127639170113e-11, "loss": 17.7423, "step": 54607 }, { "epoch": 0.9981903595517941, "grad_norm": 5.896512401428335, "learning_rate": 8.696663948370987e-11, "loss": 17.1817, "step": 54608 }, { "epoch": 0.9982086387482406, "grad_norm": 6.517708239369557, "learning_rate": 8.52295268377601e-11, "loss": 17.7064, "step": 54609 }, { "epoch": 0.9982269179446871, "grad_norm": 6.385539039095613, "learning_rate": 8.350993845884781e-11, "loss": 17.1451, "step": 54610 }, { "epoch": 0.9982451971411337, "grad_norm": 6.036790584977443, "learning_rate": 8.180787435307924e-11, "loss": 16.8195, "step": 54611 }, { "epoch": 0.9982634763375802, "grad_norm": 4.920529134994294, "learning_rate": 8.012333452711574e-11, "loss": 16.6588, "step": 54612 }, { "epoch": 0.9982817555340268, "grad_norm": 7.039412268535601, "learning_rate": 7.845631898650841e-11, "loss": 17.6868, "step": 54613 }, { "epoch": 0.9983000347304732, "grad_norm": 6.104061584852752, "learning_rate": 7.680682773680837e-11, "loss": 17.0128, "step": 54614 }, { "epoch": 0.9983183139269197, "grad_norm": 5.229437216861951, "learning_rate": 7.517486078412184e-11, "loss": 16.7404, "step": 54615 }, { "epoch": 0.9983365931233663, "grad_norm": 7.1540295710639095, "learning_rate": 7.356041813399995e-11, "loss": 17.2484, "step": 54616 }, { "epoch": 0.9983548723198128, "grad_norm": 9.227359230002053, "learning_rate": 7.196349979254891e-11, "loss": 17.283, "step": 54617 }, { "epoch": 0.9983731515162594, "grad_norm": 6.71368731619535, "learning_rate": 7.038410576420962e-11, "loss": 17.5713, "step": 54618 }, { "epoch": 0.9983914307127059, "grad_norm": 5.566185955320981, "learning_rate": 6.882223605619853e-11, "loss": 17.2514, "step": 54619 }, { "epoch": 0.9984097099091523, "grad_norm": 5.595614572513889, "learning_rate": 6.727789067240143e-11, "loss": 17.2307, "step": 54620 }, { "epoch": 0.9984279891055989, "grad_norm": 6.690357602907722, "learning_rate": 6.575106961947964e-11, "loss": 17.3604, "step": 54621 }, { "epoch": 0.9984462683020454, "grad_norm": 5.8628770712602485, "learning_rate": 6.424177290187406e-11, "loss": 17.1618, "step": 54622 }, { "epoch": 0.998464547498492, "grad_norm": 7.495703948027256, "learning_rate": 6.27500005251358e-11, "loss": 17.7519, "step": 54623 }, { "epoch": 0.9984828266949385, "grad_norm": 7.027258982544285, "learning_rate": 6.127575249481599e-11, "loss": 17.3202, "step": 54624 }, { "epoch": 0.998501105891385, "grad_norm": 5.638270884705873, "learning_rate": 5.981902881535551e-11, "loss": 17.0737, "step": 54625 }, { "epoch": 0.9985193850878316, "grad_norm": 6.716620488000787, "learning_rate": 5.837982949286058e-11, "loss": 17.2271, "step": 54626 }, { "epoch": 0.998537664284278, "grad_norm": 5.513867591841549, "learning_rate": 5.695815453177211e-11, "loss": 16.826, "step": 54627 }, { "epoch": 0.9985559434807246, "grad_norm": 6.937148358749058, "learning_rate": 5.55540039370861e-11, "loss": 17.2891, "step": 54628 }, { "epoch": 0.9985742226771711, "grad_norm": 6.995658964060972, "learning_rate": 5.416737771379854e-11, "loss": 17.3715, "step": 54629 }, { "epoch": 0.9985925018736176, "grad_norm": 5.631510754012056, "learning_rate": 5.2798275866905445e-11, "loss": 17.0353, "step": 54630 }, { "epoch": 0.9986107810700642, "grad_norm": 6.235547790421807, "learning_rate": 5.1446698401402815e-11, "loss": 17.225, "step": 54631 }, { "epoch": 0.9986290602665107, "grad_norm": 7.015949773972031, "learning_rate": 5.011264532117643e-11, "loss": 17.7389, "step": 54632 }, { "epoch": 0.9986473394629573, "grad_norm": 8.277880457368575, "learning_rate": 4.879611663177741e-11, "loss": 17.8337, "step": 54633 }, { "epoch": 0.9986656186594037, "grad_norm": 4.9190271600283895, "learning_rate": 4.7497112337091536e-11, "loss": 16.7432, "step": 54634 }, { "epoch": 0.9986838978558502, "grad_norm": 4.932160448033557, "learning_rate": 4.62156324421148e-11, "loss": 16.9044, "step": 54635 }, { "epoch": 0.9987021770522968, "grad_norm": 4.834513556306011, "learning_rate": 4.4951676951288105e-11, "loss": 16.9621, "step": 54636 }, { "epoch": 0.9987204562487433, "grad_norm": 6.755423094143461, "learning_rate": 4.370524586960745e-11, "loss": 17.2594, "step": 54637 }, { "epoch": 0.9987387354451899, "grad_norm": 7.365294259318968, "learning_rate": 4.247633920040351e-11, "loss": 17.9886, "step": 54638 }, { "epoch": 0.9987570146416364, "grad_norm": 5.836604858633716, "learning_rate": 4.1264956948117164e-11, "loss": 17.1226, "step": 54639 }, { "epoch": 0.9987752938380828, "grad_norm": 6.194090328660538, "learning_rate": 4.007109911774443e-11, "loss": 17.4817, "step": 54640 }, { "epoch": 0.9987935730345294, "grad_norm": 5.435328637837981, "learning_rate": 3.8894765713171076e-11, "loss": 17.148, "step": 54641 }, { "epoch": 0.9988118522309759, "grad_norm": 5.811954471323268, "learning_rate": 3.7735956737727784e-11, "loss": 17.3012, "step": 54642 }, { "epoch": 0.9988301314274225, "grad_norm": 5.539796764535184, "learning_rate": 3.6594672196965664e-11, "loss": 16.9819, "step": 54643 }, { "epoch": 0.998848410623869, "grad_norm": 5.463753419778406, "learning_rate": 3.547091209366027e-11, "loss": 17.1983, "step": 54644 }, { "epoch": 0.9988666898203155, "grad_norm": 5.587212908715006, "learning_rate": 3.43646764322525e-11, "loss": 16.862, "step": 54645 }, { "epoch": 0.998884969016762, "grad_norm": 6.884214911104962, "learning_rate": 3.3275965216073015e-11, "loss": 17.398, "step": 54646 }, { "epoch": 0.9989032482132085, "grad_norm": 5.82870023047217, "learning_rate": 3.220477845011782e-11, "loss": 17.2556, "step": 54647 }, { "epoch": 0.9989215274096551, "grad_norm": 5.211906407987423, "learning_rate": 3.1151116136607374e-11, "loss": 16.8667, "step": 54648 }, { "epoch": 0.9989398066061016, "grad_norm": 5.356292083651857, "learning_rate": 3.011497828053766e-11, "loss": 16.9376, "step": 54649 }, { "epoch": 0.9989580858025481, "grad_norm": 6.25744915333465, "learning_rate": 2.909636488523937e-11, "loss": 17.2949, "step": 54650 }, { "epoch": 0.9989763649989947, "grad_norm": 6.221098857499889, "learning_rate": 2.8095275953488043e-11, "loss": 17.074, "step": 54651 }, { "epoch": 0.9989946441954412, "grad_norm": 5.485428752991657, "learning_rate": 2.7111711489724578e-11, "loss": 17.1668, "step": 54652 }, { "epoch": 0.9990129233918877, "grad_norm": 7.907314520864452, "learning_rate": 2.6145671497279646e-11, "loss": 17.9212, "step": 54653 }, { "epoch": 0.9990312025883342, "grad_norm": 6.877673459808795, "learning_rate": 2.51971559789288e-11, "loss": 17.401, "step": 54654 }, { "epoch": 0.9990494817847807, "grad_norm": 5.783234571114131, "learning_rate": 2.4266164938557822e-11, "loss": 16.9963, "step": 54655 }, { "epoch": 0.9990677609812273, "grad_norm": 6.120517491683577, "learning_rate": 2.3352698379497384e-11, "loss": 17.0307, "step": 54656 }, { "epoch": 0.9990860401776738, "grad_norm": 6.020431996255373, "learning_rate": 2.245675630452304e-11, "loss": 17.2982, "step": 54657 }, { "epoch": 0.9991043193741204, "grad_norm": 6.234707452354454, "learning_rate": 2.157833871641035e-11, "loss": 17.1677, "step": 54658 }, { "epoch": 0.9991225985705668, "grad_norm": 7.235459983763955, "learning_rate": 2.0717445619600206e-11, "loss": 17.8492, "step": 54659 }, { "epoch": 0.9991408777670133, "grad_norm": 6.441864701197844, "learning_rate": 1.9874077015757942e-11, "loss": 16.8697, "step": 54660 }, { "epoch": 0.9991591569634599, "grad_norm": 5.289814687097569, "learning_rate": 1.9048232908769338e-11, "loss": 16.6348, "step": 54661 }, { "epoch": 0.9991774361599064, "grad_norm": 5.710983375568044, "learning_rate": 1.8239913300854838e-11, "loss": 16.9881, "step": 54662 }, { "epoch": 0.999195715356353, "grad_norm": 7.091296166149142, "learning_rate": 1.7449118195345115e-11, "loss": 17.5547, "step": 54663 }, { "epoch": 0.9992139945527995, "grad_norm": 7.052789812648049, "learning_rate": 1.6675847594460615e-11, "loss": 17.765, "step": 54664 }, { "epoch": 0.999232273749246, "grad_norm": 6.2106570112112385, "learning_rate": 1.5920101501532005e-11, "loss": 17.6312, "step": 54665 }, { "epoch": 0.9992505529456925, "grad_norm": 6.155367327935908, "learning_rate": 1.518187991877973e-11, "loss": 17.5061, "step": 54666 }, { "epoch": 0.999268832142139, "grad_norm": 6.216307172654799, "learning_rate": 1.446118284897935e-11, "loss": 17.1227, "step": 54667 }, { "epoch": 0.9992871113385856, "grad_norm": 4.483110166730021, "learning_rate": 1.375801029490642e-11, "loss": 16.6513, "step": 54668 }, { "epoch": 0.9993053905350321, "grad_norm": 5.581114728407274, "learning_rate": 1.3072362258226279e-11, "loss": 17.1154, "step": 54669 }, { "epoch": 0.9993236697314786, "grad_norm": 5.880911261543972, "learning_rate": 1.2404238742269591e-11, "loss": 17.0651, "step": 54670 }, { "epoch": 0.9993419489279252, "grad_norm": 6.015451545744466, "learning_rate": 1.1753639748701695e-11, "loss": 17.1345, "step": 54671 }, { "epoch": 0.9993602281243716, "grad_norm": 6.324484867978573, "learning_rate": 1.1120565280298145e-11, "loss": 17.3435, "step": 54672 }, { "epoch": 0.9993785073208181, "grad_norm": 5.884196607892625, "learning_rate": 1.0505015338724279e-11, "loss": 17.7159, "step": 54673 }, { "epoch": 0.9993967865172647, "grad_norm": 6.549932025417886, "learning_rate": 9.90698992675565e-12, "loss": 17.6196, "step": 54674 }, { "epoch": 0.9994150657137112, "grad_norm": 6.234897901476078, "learning_rate": 9.326489046057597e-12, "loss": 17.2335, "step": 54675 }, { "epoch": 0.9994333449101578, "grad_norm": 5.768333748098588, "learning_rate": 8.763512698850562e-12, "loss": 17.3512, "step": 54676 }, { "epoch": 0.9994516241066043, "grad_norm": 6.2742652764902, "learning_rate": 8.218060886799883e-12, "loss": 17.3247, "step": 54677 }, { "epoch": 0.9994699033030507, "grad_norm": 5.6074914459078, "learning_rate": 7.690133612126005e-12, "loss": 17.1536, "step": 54678 }, { "epoch": 0.9994881824994973, "grad_norm": 7.487087694245954, "learning_rate": 7.179730876494262e-12, "loss": 17.9751, "step": 54679 }, { "epoch": 0.9995064616959438, "grad_norm": 5.356487682551546, "learning_rate": 6.6868526821251e-12, "loss": 17.0224, "step": 54680 }, { "epoch": 0.9995247408923904, "grad_norm": 8.823201155071056, "learning_rate": 6.211499030683854e-12, "loss": 18.4228, "step": 54681 }, { "epoch": 0.9995430200888369, "grad_norm": 7.753937808434728, "learning_rate": 5.753669923280747e-12, "loss": 17.7712, "step": 54682 }, { "epoch": 0.9995612992852834, "grad_norm": 7.342543685580566, "learning_rate": 5.313365361581113e-12, "loss": 17.6233, "step": 54683 }, { "epoch": 0.99957957848173, "grad_norm": 7.627615899247098, "learning_rate": 4.890585347805399e-12, "loss": 17.7206, "step": 54684 }, { "epoch": 0.9995978576781764, "grad_norm": 5.848018265057509, "learning_rate": 4.485329882508715e-12, "loss": 17.368, "step": 54685 }, { "epoch": 0.999616136874623, "grad_norm": 4.631335709500043, "learning_rate": 4.097598967911509e-12, "loss": 16.7336, "step": 54686 }, { "epoch": 0.9996344160710695, "grad_norm": 5.676950824675553, "learning_rate": 3.727392605124003e-12, "loss": 17.0472, "step": 54687 }, { "epoch": 0.999652695267516, "grad_norm": 7.026739823619472, "learning_rate": 3.3747107952564196e-12, "loss": 17.3217, "step": 54688 }, { "epoch": 0.9996709744639626, "grad_norm": 5.197299387335918, "learning_rate": 3.039553539974094e-12, "loss": 16.8657, "step": 54689 }, { "epoch": 0.9996892536604091, "grad_norm": 6.089315978006577, "learning_rate": 2.7219208398321374e-12, "loss": 16.989, "step": 54690 }, { "epoch": 0.9997075328568557, "grad_norm": 6.059319583221548, "learning_rate": 2.4218126964958845e-12, "loss": 17.1785, "step": 54691 }, { "epoch": 0.9997258120533021, "grad_norm": 5.563550622064475, "learning_rate": 2.1392291105204464e-12, "loss": 16.9241, "step": 54692 }, { "epoch": 0.9997440912497486, "grad_norm": 4.632506098949366, "learning_rate": 1.8741700835711586e-12, "loss": 16.8821, "step": 54693 }, { "epoch": 0.9997623704461952, "grad_norm": 5.468630617475701, "learning_rate": 1.6266356162031316e-12, "loss": 17.1797, "step": 54694 }, { "epoch": 0.9997806496426417, "grad_norm": 6.967118649077163, "learning_rate": 1.3966257089714774e-12, "loss": 17.3516, "step": 54695 }, { "epoch": 0.9997989288390883, "grad_norm": 8.276536902600078, "learning_rate": 1.184140362986419e-12, "loss": 17.5714, "step": 54696 }, { "epoch": 0.9998172080355348, "grad_norm": 5.600505862504848, "learning_rate": 9.891795788030679e-13, "loss": 17.1209, "step": 54697 }, { "epoch": 0.9998354872319812, "grad_norm": 6.95173730687473, "learning_rate": 8.117433575316469e-13, "loss": 16.9851, "step": 54698 }, { "epoch": 0.9998537664284278, "grad_norm": 6.844853087044732, "learning_rate": 6.518316991721563e-13, "loss": 17.6168, "step": 54699 }, { "epoch": 0.9998720456248743, "grad_norm": 5.707320537843663, "learning_rate": 5.094446048348189e-13, "loss": 17.3844, "step": 54700 }, { "epoch": 0.9998903248213209, "grad_norm": 7.114851877349142, "learning_rate": 3.8458207507474645e-13, "loss": 17.438, "step": 54701 }, { "epoch": 0.9999086040177674, "grad_norm": 5.859980929006127, "learning_rate": 2.7724410989193875e-13, "loss": 17.3342, "step": 54702 }, { "epoch": 0.9999268832142139, "grad_norm": 4.546976123287807, "learning_rate": 1.8743070984150734e-13, "loss": 16.8081, "step": 54703 }, { "epoch": 0.9999451624106604, "grad_norm": 5.735640215550282, "learning_rate": 1.1514187547856382e-13, "loss": 17.2085, "step": 54704 }, { "epoch": 0.9999634416071069, "grad_norm": 6.131966010931266, "learning_rate": 6.037760680310812e-14, "loss": 17.4164, "step": 54705 }, { "epoch": 0.9999817208035535, "grad_norm": 6.204844173950913, "learning_rate": 2.313790381514025e-14, "loss": 17.3373, "step": 54706 }, { "epoch": 1.0, "grad_norm": 6.636013433117457, "learning_rate": 3.4227670697717375e-15, "loss": 17.3939, "step": 54707 } ], "logging_steps": 1.0, "max_steps": 54707, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }