{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 1468, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0013623978201634877, "grad_norm": 255.1936492919922, "learning_rate": 0.0, "loss": 5.977943420410156, "step": 1 }, { "epoch": 0.0027247956403269754, "grad_norm": 786.9889526367188, "learning_rate": 4.444444444444445e-07, "loss": 4.763317108154297, "step": 2 }, { "epoch": 0.004087193460490463, "grad_norm": 492.0194091796875, "learning_rate": 8.88888888888889e-07, "loss": 4.8710551261901855, "step": 3 }, { "epoch": 0.005449591280653951, "grad_norm": 366.0640869140625, "learning_rate": 1.3333333333333334e-06, "loss": 5.831564903259277, "step": 4 }, { "epoch": 0.006811989100817439, "grad_norm": 1585.5228271484375, "learning_rate": 1.777777777777778e-06, "loss": 5.387962341308594, "step": 5 }, { "epoch": 0.008174386920980926, "grad_norm": 2851.98486328125, "learning_rate": 2.222222222222222e-06, "loss": 4.9249958992004395, "step": 6 }, { "epoch": 0.009536784741144414, "grad_norm": 1861.60986328125, "learning_rate": 2.666666666666667e-06, "loss": 4.404921531677246, "step": 7 }, { "epoch": 0.010899182561307902, "grad_norm": 406.1229553222656, "learning_rate": 3.1111111111111116e-06, "loss": 4.726287364959717, "step": 8 }, { "epoch": 0.01226158038147139, "grad_norm": 151.337158203125, "learning_rate": 3.555555555555556e-06, "loss": 4.267265796661377, "step": 9 }, { "epoch": 0.013623978201634877, "grad_norm": 281.90283203125, "learning_rate": 4.000000000000001e-06, "loss": 3.754570245742798, "step": 10 }, { "epoch": 0.014986376021798364, "grad_norm": 173.0663604736328, "learning_rate": 4.444444444444444e-06, "loss": 4.639986991882324, "step": 11 }, { "epoch": 0.01634877384196185, "grad_norm": 126.4852523803711, "learning_rate": 4.888888888888889e-06, "loss": 3.811030149459839, "step": 12 }, { "epoch": 0.017711171662125342, "grad_norm": 105.14614868164062, "learning_rate": 5.333333333333334e-06, "loss": 2.676652431488037, "step": 13 }, { "epoch": 0.01907356948228883, "grad_norm": 62.94166564941406, "learning_rate": 5.777777777777778e-06, "loss": 3.411611557006836, "step": 14 }, { "epoch": 0.020435967302452316, "grad_norm": 73.7257308959961, "learning_rate": 6.222222222222223e-06, "loss": 3.134140968322754, "step": 15 }, { "epoch": 0.021798365122615803, "grad_norm": 105.67352294921875, "learning_rate": 6.666666666666667e-06, "loss": 3.5317726135253906, "step": 16 }, { "epoch": 0.02316076294277929, "grad_norm": 223.5050048828125, "learning_rate": 7.111111111111112e-06, "loss": 3.372407913208008, "step": 17 }, { "epoch": 0.02452316076294278, "grad_norm": 104.68070220947266, "learning_rate": 7.555555555555556e-06, "loss": 3.0667271614074707, "step": 18 }, { "epoch": 0.025885558583106268, "grad_norm": 218.82122802734375, "learning_rate": 8.000000000000001e-06, "loss": 4.044309139251709, "step": 19 }, { "epoch": 0.027247956403269755, "grad_norm": 66.2984848022461, "learning_rate": 8.444444444444446e-06, "loss": 3.5008432865142822, "step": 20 }, { "epoch": 0.02861035422343324, "grad_norm": 331.3477783203125, "learning_rate": 8.888888888888888e-06, "loss": 2.3679962158203125, "step": 21 }, { "epoch": 0.02997275204359673, "grad_norm": 62.871337890625, "learning_rate": 9.333333333333334e-06, "loss": 3.0448615550994873, "step": 22 }, { "epoch": 0.031335149863760216, "grad_norm": 238.0573272705078, "learning_rate": 9.777777777777779e-06, "loss": 2.765260696411133, "step": 23 }, { "epoch": 0.0326975476839237, "grad_norm": 82.25668334960938, "learning_rate": 1.0222222222222223e-05, "loss": 2.010239601135254, "step": 24 }, { "epoch": 0.0340599455040872, "grad_norm": 79.22156524658203, "learning_rate": 1.0666666666666667e-05, "loss": 3.273993492126465, "step": 25 }, { "epoch": 0.035422343324250684, "grad_norm": 211.97865295410156, "learning_rate": 1.1111111111111113e-05, "loss": 2.4256887435913086, "step": 26 }, { "epoch": 0.03678474114441417, "grad_norm": 46.0669059753418, "learning_rate": 1.1555555555555556e-05, "loss": 2.2351551055908203, "step": 27 }, { "epoch": 0.03814713896457766, "grad_norm": 118.11949920654297, "learning_rate": 1.2e-05, "loss": 2.5084986686706543, "step": 28 }, { "epoch": 0.039509536784741145, "grad_norm": 82.12428283691406, "learning_rate": 1.2444444444444446e-05, "loss": 2.6026601791381836, "step": 29 }, { "epoch": 0.04087193460490463, "grad_norm": 43.18901062011719, "learning_rate": 1.288888888888889e-05, "loss": 2.664149761199951, "step": 30 }, { "epoch": 0.04223433242506812, "grad_norm": 22.044647216796875, "learning_rate": 1.3333333333333333e-05, "loss": 2.3608181476593018, "step": 31 }, { "epoch": 0.043596730245231606, "grad_norm": 70.06600189208984, "learning_rate": 1.377777777777778e-05, "loss": 1.6939555406570435, "step": 32 }, { "epoch": 0.04495912806539509, "grad_norm": 20.15525245666504, "learning_rate": 1.4222222222222224e-05, "loss": 1.8010144233703613, "step": 33 }, { "epoch": 0.04632152588555858, "grad_norm": 147.11480712890625, "learning_rate": 1.4666666666666666e-05, "loss": 1.4835395812988281, "step": 34 }, { "epoch": 0.047683923705722074, "grad_norm": 11.888444900512695, "learning_rate": 1.5111111111111112e-05, "loss": 1.8386578559875488, "step": 35 }, { "epoch": 0.04904632152588556, "grad_norm": 24.076086044311523, "learning_rate": 1.555555555555556e-05, "loss": 1.9059340953826904, "step": 36 }, { "epoch": 0.05040871934604905, "grad_norm": 51.02381896972656, "learning_rate": 1.6000000000000003e-05, "loss": 2.2906460762023926, "step": 37 }, { "epoch": 0.051771117166212535, "grad_norm": 36.23479080200195, "learning_rate": 1.6444444444444444e-05, "loss": 2.2603085041046143, "step": 38 }, { "epoch": 0.05313351498637602, "grad_norm": 27.56309700012207, "learning_rate": 1.688888888888889e-05, "loss": 2.012605905532837, "step": 39 }, { "epoch": 0.05449591280653951, "grad_norm": 7.225550174713135, "learning_rate": 1.7333333333333336e-05, "loss": 2.1838011741638184, "step": 40 }, { "epoch": 0.055858310626702996, "grad_norm": 30.263853073120117, "learning_rate": 1.7777777777777777e-05, "loss": 2.153271198272705, "step": 41 }, { "epoch": 0.05722070844686648, "grad_norm": 10.24344253540039, "learning_rate": 1.8222222222222224e-05, "loss": 1.4747258424758911, "step": 42 }, { "epoch": 0.05858310626702997, "grad_norm": 88.14250183105469, "learning_rate": 1.866666666666667e-05, "loss": 1.695185661315918, "step": 43 }, { "epoch": 0.05994550408719346, "grad_norm": 5.35848331451416, "learning_rate": 1.9111111111111113e-05, "loss": 1.4296722412109375, "step": 44 }, { "epoch": 0.06130790190735695, "grad_norm": 19.473684310913086, "learning_rate": 1.9555555555555557e-05, "loss": 1.4682118892669678, "step": 45 }, { "epoch": 0.06267029972752043, "grad_norm": 16.275846481323242, "learning_rate": 2e-05, "loss": 1.9111659526824951, "step": 46 }, { "epoch": 0.06403269754768393, "grad_norm": 104.42042541503906, "learning_rate": 1.9999975629761854e-05, "loss": 2.2345409393310547, "step": 47 }, { "epoch": 0.0653950953678474, "grad_norm": 9.325274467468262, "learning_rate": 1.9999902519166192e-05, "loss": 1.7245526313781738, "step": 48 }, { "epoch": 0.0667574931880109, "grad_norm": 14.608559608459473, "learning_rate": 1.9999780668569363e-05, "loss": 1.1159684658050537, "step": 49 }, { "epoch": 0.0681198910081744, "grad_norm": 10.00455093383789, "learning_rate": 1.9999610078565272e-05, "loss": 1.206161379814148, "step": 50 }, { "epoch": 0.06948228882833787, "grad_norm": 7.008636951446533, "learning_rate": 1.999939074998538e-05, "loss": 1.8427221775054932, "step": 51 }, { "epoch": 0.07084468664850137, "grad_norm": 73.50210571289062, "learning_rate": 1.9999122683898708e-05, "loss": 2.230473518371582, "step": 52 }, { "epoch": 0.07220708446866485, "grad_norm": 3.512129545211792, "learning_rate": 1.9998805881611816e-05, "loss": 1.3873107433319092, "step": 53 }, { "epoch": 0.07356948228882834, "grad_norm": 11.345011711120605, "learning_rate": 1.9998440344668827e-05, "loss": 1.7577924728393555, "step": 54 }, { "epoch": 0.07493188010899182, "grad_norm": 8.3037691116333, "learning_rate": 1.999802607485137e-05, "loss": 2.0517354011535645, "step": 55 }, { "epoch": 0.07629427792915532, "grad_norm": 29.58579444885254, "learning_rate": 1.999756307417863e-05, "loss": 2.1793274879455566, "step": 56 }, { "epoch": 0.0776566757493188, "grad_norm": 6.470326900482178, "learning_rate": 1.9997051344907284e-05, "loss": 1.635005235671997, "step": 57 }, { "epoch": 0.07901907356948229, "grad_norm": 11.640275955200195, "learning_rate": 1.9996490889531528e-05, "loss": 2.005619525909424, "step": 58 }, { "epoch": 0.08038147138964577, "grad_norm": 7.844667434692383, "learning_rate": 1.999588171078305e-05, "loss": 1.824695110321045, "step": 59 }, { "epoch": 0.08174386920980926, "grad_norm": 30.64260482788086, "learning_rate": 1.9995223811631016e-05, "loss": 1.7149019241333008, "step": 60 }, { "epoch": 0.08310626702997276, "grad_norm": 7.330290794372559, "learning_rate": 1.9994517195282053e-05, "loss": 2.1633987426757812, "step": 61 }, { "epoch": 0.08446866485013624, "grad_norm": 17.721323013305664, "learning_rate": 1.999376186518025e-05, "loss": 1.8637008666992188, "step": 62 }, { "epoch": 0.08583106267029973, "grad_norm": 20455.25, "learning_rate": 1.9992957825007115e-05, "loss": 1.4629058837890625, "step": 63 }, { "epoch": 0.08719346049046321, "grad_norm": 32.010257720947266, "learning_rate": 1.9992105078681587e-05, "loss": 1.52781081199646, "step": 64 }, { "epoch": 0.0885558583106267, "grad_norm": 132.17056274414062, "learning_rate": 1.999120363035998e-05, "loss": 1.3915117979049683, "step": 65 }, { "epoch": 0.08991825613079019, "grad_norm": 7.302569389343262, "learning_rate": 1.9990253484436004e-05, "loss": 1.6994096040725708, "step": 66 }, { "epoch": 0.09128065395095368, "grad_norm": 4.3377227783203125, "learning_rate": 1.9989254645540715e-05, "loss": 1.6994991302490234, "step": 67 }, { "epoch": 0.09264305177111716, "grad_norm": 22.93842887878418, "learning_rate": 1.9988207118542504e-05, "loss": 1.8096067905426025, "step": 68 }, { "epoch": 0.09400544959128065, "grad_norm": 7.0845255851745605, "learning_rate": 1.998711090854706e-05, "loss": 1.783044695854187, "step": 69 }, { "epoch": 0.09536784741144415, "grad_norm": 22.55027961730957, "learning_rate": 1.998596602089737e-05, "loss": 1.8111974000930786, "step": 70 }, { "epoch": 0.09673024523160763, "grad_norm": 11.120646476745605, "learning_rate": 1.9984772461173663e-05, "loss": 1.3425049781799316, "step": 71 }, { "epoch": 0.09809264305177112, "grad_norm": 43.038997650146484, "learning_rate": 1.998353023519341e-05, "loss": 1.4167280197143555, "step": 72 }, { "epoch": 0.0994550408719346, "grad_norm": 36.41879653930664, "learning_rate": 1.9982239349011286e-05, "loss": 1.6119059324264526, "step": 73 }, { "epoch": 0.1008174386920981, "grad_norm": 21.334951400756836, "learning_rate": 1.9980899808919122e-05, "loss": 1.826452374458313, "step": 74 }, { "epoch": 0.10217983651226158, "grad_norm": 236.16416931152344, "learning_rate": 1.9979511621445902e-05, "loss": 1.275795578956604, "step": 75 }, { "epoch": 0.10354223433242507, "grad_norm": 5.314990043640137, "learning_rate": 1.9978074793357726e-05, "loss": 1.530911922454834, "step": 76 }, { "epoch": 0.10490463215258855, "grad_norm": 86.2394027709961, "learning_rate": 1.9976589331657754e-05, "loss": 1.3827338218688965, "step": 77 }, { "epoch": 0.10626702997275204, "grad_norm": 1456.9078369140625, "learning_rate": 1.99750552435862e-05, "loss": 1.3018248081207275, "step": 78 }, { "epoch": 0.10762942779291552, "grad_norm": 362.99798583984375, "learning_rate": 1.997347253662028e-05, "loss": 1.2203123569488525, "step": 79 }, { "epoch": 0.10899182561307902, "grad_norm": 10.866148948669434, "learning_rate": 1.9971841218474184e-05, "loss": 1.6400402784347534, "step": 80 }, { "epoch": 0.11035422343324251, "grad_norm": 10.85586929321289, "learning_rate": 1.997016129709904e-05, "loss": 1.7072572708129883, "step": 81 }, { "epoch": 0.11171662125340599, "grad_norm": 15.440835952758789, "learning_rate": 1.9968432780682855e-05, "loss": 1.9395954608917236, "step": 82 }, { "epoch": 0.11307901907356949, "grad_norm": 15.077877044677734, "learning_rate": 1.9966655677650512e-05, "loss": 1.0009617805480957, "step": 83 }, { "epoch": 0.11444141689373297, "grad_norm": 12.075288772583008, "learning_rate": 1.9964829996663684e-05, "loss": 1.7570009231567383, "step": 84 }, { "epoch": 0.11580381471389646, "grad_norm": 168.84727478027344, "learning_rate": 1.9962955746620832e-05, "loss": 0.9241946935653687, "step": 85 }, { "epoch": 0.11716621253405994, "grad_norm": 18.533748626708984, "learning_rate": 1.9961032936657143e-05, "loss": 1.3731493949890137, "step": 86 }, { "epoch": 0.11852861035422343, "grad_norm": 13.447470664978027, "learning_rate": 1.9959061576144482e-05, "loss": 2.338679790496826, "step": 87 }, { "epoch": 0.11989100817438691, "grad_norm": 5.259335041046143, "learning_rate": 1.9957041674691356e-05, "loss": 1.1267805099487305, "step": 88 }, { "epoch": 0.12125340599455041, "grad_norm": 21.200061798095703, "learning_rate": 1.995497324214285e-05, "loss": 1.3930729627609253, "step": 89 }, { "epoch": 0.1226158038147139, "grad_norm": 14.131498336791992, "learning_rate": 1.995285628858062e-05, "loss": 1.4397244453430176, "step": 90 }, { "epoch": 0.12397820163487738, "grad_norm": 46.904842376708984, "learning_rate": 1.995069082432279e-05, "loss": 2.401963710784912, "step": 91 }, { "epoch": 0.12534059945504086, "grad_norm": 209.3900909423828, "learning_rate": 1.994847685992393e-05, "loss": 1.4542686939239502, "step": 92 }, { "epoch": 0.12670299727520437, "grad_norm": 46.27727127075195, "learning_rate": 1.9946214406175016e-05, "loss": 1.5343117713928223, "step": 93 }, { "epoch": 0.12806539509536785, "grad_norm": 19.841386795043945, "learning_rate": 1.9943903474103354e-05, "loss": 2.0001091957092285, "step": 94 }, { "epoch": 0.12942779291553133, "grad_norm": 93.7799301147461, "learning_rate": 1.994154407497254e-05, "loss": 0.9358162879943848, "step": 95 }, { "epoch": 0.1307901907356948, "grad_norm": 23.674835205078125, "learning_rate": 1.993913622028239e-05, "loss": 0.9326591491699219, "step": 96 }, { "epoch": 0.13215258855585832, "grad_norm": 16.45394515991211, "learning_rate": 1.9936679921768905e-05, "loss": 1.1655969619750977, "step": 97 }, { "epoch": 0.1335149863760218, "grad_norm": 22.76195526123047, "learning_rate": 1.9934175191404202e-05, "loss": 1.4714016914367676, "step": 98 }, { "epoch": 0.13487738419618528, "grad_norm": 4.313206672668457, "learning_rate": 1.9931622041396456e-05, "loss": 0.9404367208480835, "step": 99 }, { "epoch": 0.1362397820163488, "grad_norm": 11.318977355957031, "learning_rate": 1.9929020484189843e-05, "loss": 1.5243396759033203, "step": 100 }, { "epoch": 0.13760217983651227, "grad_norm": 5.220909595489502, "learning_rate": 1.992637053246448e-05, "loss": 1.1928545236587524, "step": 101 }, { "epoch": 0.13896457765667575, "grad_norm": 9.201738357543945, "learning_rate": 1.992367219913635e-05, "loss": 1.309905767440796, "step": 102 }, { "epoch": 0.14032697547683923, "grad_norm": 22.2390193939209, "learning_rate": 1.9920925497357265e-05, "loss": 1.6257928609848022, "step": 103 }, { "epoch": 0.14168937329700274, "grad_norm": 22.242359161376953, "learning_rate": 1.9918130440514775e-05, "loss": 1.1354283094406128, "step": 104 }, { "epoch": 0.14305177111716622, "grad_norm": 86.57989501953125, "learning_rate": 1.9915287042232117e-05, "loss": 0.7232756018638611, "step": 105 }, { "epoch": 0.1444141689373297, "grad_norm": 13.18682861328125, "learning_rate": 1.9912395316368163e-05, "loss": 1.1990281343460083, "step": 106 }, { "epoch": 0.14577656675749318, "grad_norm": 35.66343307495117, "learning_rate": 1.990945527701731e-05, "loss": 1.4931535720825195, "step": 107 }, { "epoch": 0.14713896457765668, "grad_norm": 15.834054946899414, "learning_rate": 1.9906466938509456e-05, "loss": 0.6720550656318665, "step": 108 }, { "epoch": 0.14850136239782016, "grad_norm": 23.979185104370117, "learning_rate": 1.9903430315409908e-05, "loss": 0.8504596948623657, "step": 109 }, { "epoch": 0.14986376021798364, "grad_norm": 6.9147820472717285, "learning_rate": 1.9900345422519302e-05, "loss": 1.1073373556137085, "step": 110 }, { "epoch": 0.15122615803814715, "grad_norm": 36.30422592163086, "learning_rate": 1.9897212274873558e-05, "loss": 1.9511359930038452, "step": 111 }, { "epoch": 0.15258855585831063, "grad_norm": 30.87666130065918, "learning_rate": 1.989403088774379e-05, "loss": 1.3477885723114014, "step": 112 }, { "epoch": 0.1539509536784741, "grad_norm": 7.837673664093018, "learning_rate": 1.9890801276636226e-05, "loss": 1.608608365058899, "step": 113 }, { "epoch": 0.1553133514986376, "grad_norm": 9.653854370117188, "learning_rate": 1.9887523457292145e-05, "loss": 1.7528820037841797, "step": 114 }, { "epoch": 0.1566757493188011, "grad_norm": 91.34810638427734, "learning_rate": 1.9884197445687795e-05, "loss": 0.9989630579948425, "step": 115 }, { "epoch": 0.15803814713896458, "grad_norm": 33.962703704833984, "learning_rate": 1.9880823258034317e-05, "loss": 1.6023415327072144, "step": 116 }, { "epoch": 0.15940054495912806, "grad_norm": 6.296090602874756, "learning_rate": 1.987740091077766e-05, "loss": 1.0738110542297363, "step": 117 }, { "epoch": 0.16076294277929154, "grad_norm": 4.983788013458252, "learning_rate": 1.9873930420598508e-05, "loss": 1.3211939334869385, "step": 118 }, { "epoch": 0.16212534059945505, "grad_norm": 12.209794998168945, "learning_rate": 1.9870411804412196e-05, "loss": 1.1410646438598633, "step": 119 }, { "epoch": 0.16348773841961853, "grad_norm": 193.8845672607422, "learning_rate": 1.9866845079368628e-05, "loss": 1.2269351482391357, "step": 120 }, { "epoch": 0.164850136239782, "grad_norm": 49.62782669067383, "learning_rate": 1.9863230262852188e-05, "loss": 1.2612409591674805, "step": 121 }, { "epoch": 0.16621253405994552, "grad_norm": 67.75338745117188, "learning_rate": 1.9859567372481666e-05, "loss": 1.6083930730819702, "step": 122 }, { "epoch": 0.167574931880109, "grad_norm": 22.486276626586914, "learning_rate": 1.9855856426110163e-05, "loss": 1.2336335182189941, "step": 123 }, { "epoch": 0.16893732970027248, "grad_norm": 26.935361862182617, "learning_rate": 1.9852097441825017e-05, "loss": 1.4997451305389404, "step": 124 }, { "epoch": 0.17029972752043596, "grad_norm": 18.006975173950195, "learning_rate": 1.9848290437947683e-05, "loss": 1.3723387718200684, "step": 125 }, { "epoch": 0.17166212534059946, "grad_norm": 36.87037658691406, "learning_rate": 1.9844435433033687e-05, "loss": 1.0927711725234985, "step": 126 }, { "epoch": 0.17302452316076294, "grad_norm": 32.00332260131836, "learning_rate": 1.9840532445872504e-05, "loss": 1.694850206375122, "step": 127 }, { "epoch": 0.17438692098092642, "grad_norm": 21.677570343017578, "learning_rate": 1.983658149548748e-05, "loss": 1.292415976524353, "step": 128 }, { "epoch": 0.17574931880108993, "grad_norm": 19.951566696166992, "learning_rate": 1.9832582601135737e-05, "loss": 2.574887275695801, "step": 129 }, { "epoch": 0.1771117166212534, "grad_norm": 56.657535552978516, "learning_rate": 1.9828535782308074e-05, "loss": 1.2832244634628296, "step": 130 }, { "epoch": 0.1784741144414169, "grad_norm": 10.198763847351074, "learning_rate": 1.9824441058728882e-05, "loss": 1.1939632892608643, "step": 131 }, { "epoch": 0.17983651226158037, "grad_norm": 77.20623016357422, "learning_rate": 1.9820298450356036e-05, "loss": 1.7353812456130981, "step": 132 }, { "epoch": 0.18119891008174388, "grad_norm": 308.4658508300781, "learning_rate": 1.9816107977380805e-05, "loss": 2.130159854888916, "step": 133 }, { "epoch": 0.18256130790190736, "grad_norm": 22.72699546813965, "learning_rate": 1.9811869660227757e-05, "loss": 1.2344276905059814, "step": 134 }, { "epoch": 0.18392370572207084, "grad_norm": 18.252962112426758, "learning_rate": 1.980758351955465e-05, "loss": 1.258174180984497, "step": 135 }, { "epoch": 0.18528610354223432, "grad_norm": 68.59465789794922, "learning_rate": 1.9803249576252338e-05, "loss": 0.9723294973373413, "step": 136 }, { "epoch": 0.18664850136239783, "grad_norm": 15.701346397399902, "learning_rate": 1.979886785144467e-05, "loss": 2.104464530944824, "step": 137 }, { "epoch": 0.1880108991825613, "grad_norm": 111.0069808959961, "learning_rate": 1.9794438366488377e-05, "loss": 1.6660346984863281, "step": 138 }, { "epoch": 0.1893732970027248, "grad_norm": 15.181336402893066, "learning_rate": 1.9789961142972983e-05, "loss": 1.3849374055862427, "step": 139 }, { "epoch": 0.1907356948228883, "grad_norm": 20.555503845214844, "learning_rate": 1.9785436202720687e-05, "loss": 1.6557199954986572, "step": 140 }, { "epoch": 0.19209809264305178, "grad_norm": 31.236351013183594, "learning_rate": 1.978086356778626e-05, "loss": 1.9125089645385742, "step": 141 }, { "epoch": 0.19346049046321526, "grad_norm": 14.303613662719727, "learning_rate": 1.9776243260456953e-05, "loss": 1.6729528903961182, "step": 142 }, { "epoch": 0.19482288828337874, "grad_norm": 12.190939903259277, "learning_rate": 1.977157530325235e-05, "loss": 1.3772521018981934, "step": 143 }, { "epoch": 0.19618528610354224, "grad_norm": 55.51506042480469, "learning_rate": 1.976685971892431e-05, "loss": 1.8838613033294678, "step": 144 }, { "epoch": 0.19754768392370572, "grad_norm": 44.39870071411133, "learning_rate": 1.9762096530456803e-05, "loss": 2.1355111598968506, "step": 145 }, { "epoch": 0.1989100817438692, "grad_norm": 23.6383056640625, "learning_rate": 1.9757285761065846e-05, "loss": 1.2595380544662476, "step": 146 }, { "epoch": 0.20027247956403268, "grad_norm": 20.090862274169922, "learning_rate": 1.9752427434199356e-05, "loss": 1.905421257019043, "step": 147 }, { "epoch": 0.2016348773841962, "grad_norm": 7.764129638671875, "learning_rate": 1.9747521573537048e-05, "loss": 1.3974249362945557, "step": 148 }, { "epoch": 0.20299727520435967, "grad_norm": 111.19981384277344, "learning_rate": 1.974256820299032e-05, "loss": 1.5679601430892944, "step": 149 }, { "epoch": 0.20435967302452315, "grad_norm": 5.616426467895508, "learning_rate": 1.9737567346702137e-05, "loss": 1.5397052764892578, "step": 150 }, { "epoch": 0.20572207084468666, "grad_norm": 18.340105056762695, "learning_rate": 1.973251902904691e-05, "loss": 1.6224992275238037, "step": 151 }, { "epoch": 0.20708446866485014, "grad_norm": 8.673772811889648, "learning_rate": 1.9727423274630385e-05, "loss": 1.345162034034729, "step": 152 }, { "epoch": 0.20844686648501362, "grad_norm": 25.245019912719727, "learning_rate": 1.97222801082895e-05, "loss": 1.6527200937271118, "step": 153 }, { "epoch": 0.2098092643051771, "grad_norm": 15.105555534362793, "learning_rate": 1.9717089555092306e-05, "loss": 2.303041458129883, "step": 154 }, { "epoch": 0.2111716621253406, "grad_norm": 81.81103515625, "learning_rate": 1.97118516403378e-05, "loss": 1.5143072605133057, "step": 155 }, { "epoch": 0.2125340599455041, "grad_norm": 8.99626636505127, "learning_rate": 1.9706566389555825e-05, "loss": 2.0205183029174805, "step": 156 }, { "epoch": 0.21389645776566757, "grad_norm": 105.95153045654297, "learning_rate": 1.970123382850695e-05, "loss": 1.2933770418167114, "step": 157 }, { "epoch": 0.21525885558583105, "grad_norm": 23.49799919128418, "learning_rate": 1.969585398318233e-05, "loss": 1.1329820156097412, "step": 158 }, { "epoch": 0.21662125340599456, "grad_norm": 21.976200103759766, "learning_rate": 1.969042687980359e-05, "loss": 1.7795616388320923, "step": 159 }, { "epoch": 0.21798365122615804, "grad_norm": 36.59941101074219, "learning_rate": 1.9684952544822685e-05, "loss": 1.2428920269012451, "step": 160 }, { "epoch": 0.21934604904632152, "grad_norm": 16.310401916503906, "learning_rate": 1.9679431004921788e-05, "loss": 0.9396666884422302, "step": 161 }, { "epoch": 0.22070844686648503, "grad_norm": 5.7113871574401855, "learning_rate": 1.9673862287013144e-05, "loss": 1.4448082447052002, "step": 162 }, { "epoch": 0.2220708446866485, "grad_norm": 13.691057205200195, "learning_rate": 1.9668246418238955e-05, "loss": 1.3966199159622192, "step": 163 }, { "epoch": 0.22343324250681199, "grad_norm": 58.44424819946289, "learning_rate": 1.9662583425971227e-05, "loss": 1.3412401676177979, "step": 164 }, { "epoch": 0.22479564032697547, "grad_norm": 8.327013969421387, "learning_rate": 1.9656873337811658e-05, "loss": 0.966040313243866, "step": 165 }, { "epoch": 0.22615803814713897, "grad_norm": 4.4662652015686035, "learning_rate": 1.9651116181591493e-05, "loss": 0.9228675365447998, "step": 166 }, { "epoch": 0.22752043596730245, "grad_norm": 50.51068878173828, "learning_rate": 1.9645311985371374e-05, "loss": 1.4534227848052979, "step": 167 }, { "epoch": 0.22888283378746593, "grad_norm": 5.380500316619873, "learning_rate": 1.9639460777441243e-05, "loss": 1.0608735084533691, "step": 168 }, { "epoch": 0.23024523160762944, "grad_norm": 33.324378967285156, "learning_rate": 1.9633562586320157e-05, "loss": 1.6287078857421875, "step": 169 }, { "epoch": 0.23160762942779292, "grad_norm": 17.873071670532227, "learning_rate": 1.962761744075618e-05, "loss": 2.1449267864227295, "step": 170 }, { "epoch": 0.2329700272479564, "grad_norm": 7.6625800132751465, "learning_rate": 1.9621625369726246e-05, "loss": 0.9729044437408447, "step": 171 }, { "epoch": 0.23433242506811988, "grad_norm": 15.168426513671875, "learning_rate": 1.961558640243598e-05, "loss": 1.4125659465789795, "step": 172 }, { "epoch": 0.2356948228882834, "grad_norm": 73.09243774414062, "learning_rate": 1.9609500568319605e-05, "loss": 2.1766257286071777, "step": 173 }, { "epoch": 0.23705722070844687, "grad_norm": 59.492530822753906, "learning_rate": 1.960336789703977e-05, "loss": 1.2526952028274536, "step": 174 }, { "epoch": 0.23841961852861035, "grad_norm": 19.279144287109375, "learning_rate": 1.9597188418487395e-05, "loss": 1.3987064361572266, "step": 175 }, { "epoch": 0.23978201634877383, "grad_norm": 13.545157432556152, "learning_rate": 1.959096216278156e-05, "loss": 1.1536396741867065, "step": 176 }, { "epoch": 0.24114441416893734, "grad_norm": 47.02814483642578, "learning_rate": 1.958468916026933e-05, "loss": 1.1951217651367188, "step": 177 }, { "epoch": 0.24250681198910082, "grad_norm": 152.36248779296875, "learning_rate": 1.957836944152562e-05, "loss": 1.661449909210205, "step": 178 }, { "epoch": 0.2438692098092643, "grad_norm": 21.625080108642578, "learning_rate": 1.957200303735304e-05, "loss": 1.6087756156921387, "step": 179 }, { "epoch": 0.2452316076294278, "grad_norm": 30.184619903564453, "learning_rate": 1.9565589978781747e-05, "loss": 1.129873514175415, "step": 180 }, { "epoch": 0.24659400544959129, "grad_norm": 10.294071197509766, "learning_rate": 1.955913029706929e-05, "loss": 1.893623948097229, "step": 181 }, { "epoch": 0.24795640326975477, "grad_norm": 39.03071975708008, "learning_rate": 1.9552624023700472e-05, "loss": 1.7128105163574219, "step": 182 }, { "epoch": 0.24931880108991825, "grad_norm": 13.111297607421875, "learning_rate": 1.9546071190387175e-05, "loss": 1.170483112335205, "step": 183 }, { "epoch": 0.2506811989100817, "grad_norm": 14.154705047607422, "learning_rate": 1.953947182906822e-05, "loss": 1.0115466117858887, "step": 184 }, { "epoch": 0.25204359673024523, "grad_norm": 7.42869758605957, "learning_rate": 1.953282597190921e-05, "loss": 1.407454013824463, "step": 185 }, { "epoch": 0.25340599455040874, "grad_norm": 83.33525085449219, "learning_rate": 1.9526133651302372e-05, "loss": 1.5775861740112305, "step": 186 }, { "epoch": 0.2547683923705722, "grad_norm": 10.964442253112793, "learning_rate": 1.951939489986639e-05, "loss": 2.106907367706299, "step": 187 }, { "epoch": 0.2561307901907357, "grad_norm": 4.845801830291748, "learning_rate": 1.951260975044626e-05, "loss": 1.620349645614624, "step": 188 }, { "epoch": 0.2574931880108992, "grad_norm": 20.734224319458008, "learning_rate": 1.950577823611313e-05, "loss": 1.9613642692565918, "step": 189 }, { "epoch": 0.25885558583106266, "grad_norm": 10.003898620605469, "learning_rate": 1.9498900390164118e-05, "loss": 1.090088129043579, "step": 190 }, { "epoch": 0.26021798365122617, "grad_norm": 16.818862915039062, "learning_rate": 1.949197624612218e-05, "loss": 1.8157355785369873, "step": 191 }, { "epoch": 0.2615803814713896, "grad_norm": 11.89087200164795, "learning_rate": 1.9485005837735918e-05, "loss": 1.9218425750732422, "step": 192 }, { "epoch": 0.26294277929155313, "grad_norm": 20.741195678710938, "learning_rate": 1.947798919897944e-05, "loss": 1.4963088035583496, "step": 193 }, { "epoch": 0.26430517711171664, "grad_norm": 13.59697151184082, "learning_rate": 1.947092636405217e-05, "loss": 1.5759832859039307, "step": 194 }, { "epoch": 0.2656675749318801, "grad_norm": 23.819440841674805, "learning_rate": 1.946381736737871e-05, "loss": 0.9411324262619019, "step": 195 }, { "epoch": 0.2670299727520436, "grad_norm": 8.186532974243164, "learning_rate": 1.9456662243608643e-05, "loss": 1.514115810394287, "step": 196 }, { "epoch": 0.2683923705722071, "grad_norm": 7.262381076812744, "learning_rate": 1.9449461027616382e-05, "loss": 0.7800123691558838, "step": 197 }, { "epoch": 0.26975476839237056, "grad_norm": 12.26691722869873, "learning_rate": 1.9442213754501002e-05, "loss": 1.8123799562454224, "step": 198 }, { "epoch": 0.27111716621253407, "grad_norm": 3.9358699321746826, "learning_rate": 1.9434920459586054e-05, "loss": 1.9911051988601685, "step": 199 }, { "epoch": 0.2724795640326976, "grad_norm": 173.1545867919922, "learning_rate": 1.9427581178419408e-05, "loss": 2.168071746826172, "step": 200 }, { "epoch": 0.273841961852861, "grad_norm": 14.167070388793945, "learning_rate": 1.9420195946773063e-05, "loss": 1.8269840478897095, "step": 201 }, { "epoch": 0.27520435967302453, "grad_norm": 11.052868843078613, "learning_rate": 1.9412764800643e-05, "loss": 1.677504301071167, "step": 202 }, { "epoch": 0.276566757493188, "grad_norm": 16.05508804321289, "learning_rate": 1.940528777624897e-05, "loss": 1.614619493484497, "step": 203 }, { "epoch": 0.2779291553133515, "grad_norm": 61.652767181396484, "learning_rate": 1.939776491003435e-05, "loss": 2.4674952030181885, "step": 204 }, { "epoch": 0.279291553133515, "grad_norm": 4.494046688079834, "learning_rate": 1.9390196238665944e-05, "loss": 0.9794859290122986, "step": 205 }, { "epoch": 0.28065395095367845, "grad_norm": 8.619671821594238, "learning_rate": 1.9382581799033824e-05, "loss": 2.7552385330200195, "step": 206 }, { "epoch": 0.28201634877384196, "grad_norm": 5.626151084899902, "learning_rate": 1.9374921628251127e-05, "loss": 1.3778140544891357, "step": 207 }, { "epoch": 0.28337874659400547, "grad_norm": 19.242416381835938, "learning_rate": 1.936721576365389e-05, "loss": 1.3701825141906738, "step": 208 }, { "epoch": 0.2847411444141689, "grad_norm": 3.4220023155212402, "learning_rate": 1.935946424280087e-05, "loss": 1.5027049779891968, "step": 209 }, { "epoch": 0.28610354223433243, "grad_norm": 11.453391075134277, "learning_rate": 1.935166710347334e-05, "loss": 1.1889855861663818, "step": 210 }, { "epoch": 0.28746594005449594, "grad_norm": 14.49972152709961, "learning_rate": 1.9343824383674936e-05, "loss": 0.7374680042266846, "step": 211 }, { "epoch": 0.2888283378746594, "grad_norm": 6.671357154846191, "learning_rate": 1.9335936121631442e-05, "loss": 1.393950343132019, "step": 212 }, { "epoch": 0.2901907356948229, "grad_norm": 12.67508602142334, "learning_rate": 1.9328002355790624e-05, "loss": 1.285720944404602, "step": 213 }, { "epoch": 0.29155313351498635, "grad_norm": 5.697569847106934, "learning_rate": 1.9320023124822035e-05, "loss": 1.33206045627594, "step": 214 }, { "epoch": 0.29291553133514986, "grad_norm": 13.284814834594727, "learning_rate": 1.931199846761683e-05, "loss": 1.1100962162017822, "step": 215 }, { "epoch": 0.29427792915531337, "grad_norm": 4.58121395111084, "learning_rate": 1.9303928423287568e-05, "loss": 1.309066653251648, "step": 216 }, { "epoch": 0.2956403269754768, "grad_norm": 4.796217918395996, "learning_rate": 1.929581303116803e-05, "loss": 1.3221436738967896, "step": 217 }, { "epoch": 0.2970027247956403, "grad_norm": 34.37131881713867, "learning_rate": 1.9287652330813024e-05, "loss": 1.6201462745666504, "step": 218 }, { "epoch": 0.29836512261580383, "grad_norm": 3.592020273208618, "learning_rate": 1.9279446361998188e-05, "loss": 1.287322759628296, "step": 219 }, { "epoch": 0.2997275204359673, "grad_norm": 9.371930122375488, "learning_rate": 1.927119516471981e-05, "loss": 1.2675158977508545, "step": 220 }, { "epoch": 0.3010899182561308, "grad_norm": 5.7051544189453125, "learning_rate": 1.9262898779194613e-05, "loss": 1.207767367362976, "step": 221 }, { "epoch": 0.3024523160762943, "grad_norm": 160.80111694335938, "learning_rate": 1.9254557245859583e-05, "loss": 2.247450828552246, "step": 222 }, { "epoch": 0.30381471389645776, "grad_norm": 34.89888000488281, "learning_rate": 1.924617060537175e-05, "loss": 1.4713530540466309, "step": 223 }, { "epoch": 0.30517711171662126, "grad_norm": 6.292564392089844, "learning_rate": 1.9237738898607992e-05, "loss": 1.1982619762420654, "step": 224 }, { "epoch": 0.3065395095367847, "grad_norm": 107.35348510742188, "learning_rate": 1.9229262166664854e-05, "loss": 1.34859037399292, "step": 225 }, { "epoch": 0.3079019073569482, "grad_norm": 8.227984428405762, "learning_rate": 1.9220740450858328e-05, "loss": 0.6223532557487488, "step": 226 }, { "epoch": 0.30926430517711173, "grad_norm": 10.317214012145996, "learning_rate": 1.921217379272367e-05, "loss": 2.2256319522857666, "step": 227 }, { "epoch": 0.3106267029972752, "grad_norm": 5.8834123611450195, "learning_rate": 1.9203562234015172e-05, "loss": 0.5850299596786499, "step": 228 }, { "epoch": 0.3119891008174387, "grad_norm": 10.008376121520996, "learning_rate": 1.9194905816705988e-05, "loss": 0.9801367521286011, "step": 229 }, { "epoch": 0.3133514986376022, "grad_norm": 15.778343200683594, "learning_rate": 1.91862045829879e-05, "loss": 1.1885411739349365, "step": 230 }, { "epoch": 0.31471389645776565, "grad_norm": 20.022510528564453, "learning_rate": 1.9177458575271143e-05, "loss": 2.108072280883789, "step": 231 }, { "epoch": 0.31607629427792916, "grad_norm": 6.938968181610107, "learning_rate": 1.916866783618417e-05, "loss": 1.7138636112213135, "step": 232 }, { "epoch": 0.31743869209809267, "grad_norm": 5.94962739944458, "learning_rate": 1.9159832408573467e-05, "loss": 1.0658137798309326, "step": 233 }, { "epoch": 0.3188010899182561, "grad_norm": 14.2401762008667, "learning_rate": 1.9150952335503325e-05, "loss": 1.502290964126587, "step": 234 }, { "epoch": 0.3201634877384196, "grad_norm": 5.806396007537842, "learning_rate": 1.9142027660255645e-05, "loss": 2.0007309913635254, "step": 235 }, { "epoch": 0.3215258855585831, "grad_norm": 17.292991638183594, "learning_rate": 1.9133058426329717e-05, "loss": 1.0603134632110596, "step": 236 }, { "epoch": 0.3228882833787466, "grad_norm": 175.2050323486328, "learning_rate": 1.912404467744202e-05, "loss": 1.02361261844635, "step": 237 }, { "epoch": 0.3242506811989101, "grad_norm": 4.879598617553711, "learning_rate": 1.911498645752599e-05, "loss": 1.414052963256836, "step": 238 }, { "epoch": 0.32561307901907355, "grad_norm": 3.397895097732544, "learning_rate": 1.9105883810731822e-05, "loss": 0.866095244884491, "step": 239 }, { "epoch": 0.32697547683923706, "grad_norm": 23.099910736083984, "learning_rate": 1.9096736781426252e-05, "loss": 1.6803913116455078, "step": 240 }, { "epoch": 0.32833787465940056, "grad_norm": 23.778182983398438, "learning_rate": 1.9087545414192338e-05, "loss": 1.751513957977295, "step": 241 }, { "epoch": 0.329700272479564, "grad_norm": 21.555883407592773, "learning_rate": 1.907830975382924e-05, "loss": 1.1033903360366821, "step": 242 }, { "epoch": 0.3310626702997275, "grad_norm": 23.9313907623291, "learning_rate": 1.9069029845352006e-05, "loss": 1.10393226146698, "step": 243 }, { "epoch": 0.33242506811989103, "grad_norm": 17.43801498413086, "learning_rate": 1.9059705733991352e-05, "loss": 1.6998939514160156, "step": 244 }, { "epoch": 0.3337874659400545, "grad_norm": 56.304420471191406, "learning_rate": 1.9050337465193443e-05, "loss": 1.4251322746276855, "step": 245 }, { "epoch": 0.335149863760218, "grad_norm": 4.618871688842773, "learning_rate": 1.9040925084619663e-05, "loss": 1.257938027381897, "step": 246 }, { "epoch": 0.33651226158038144, "grad_norm": 3.7431139945983887, "learning_rate": 1.9031468638146408e-05, "loss": 1.063662052154541, "step": 247 }, { "epoch": 0.33787465940054495, "grad_norm": 5.993250846862793, "learning_rate": 1.9021968171864843e-05, "loss": 1.130645990371704, "step": 248 }, { "epoch": 0.33923705722070846, "grad_norm": 3.628101110458374, "learning_rate": 1.90124237320807e-05, "loss": 1.3751482963562012, "step": 249 }, { "epoch": 0.3405994550408719, "grad_norm": 11.673934936523438, "learning_rate": 1.900283536531403e-05, "loss": 1.5402003526687622, "step": 250 }, { "epoch": 0.3419618528610354, "grad_norm": 7.867159366607666, "learning_rate": 1.8993203118298988e-05, "loss": 1.551003336906433, "step": 251 }, { "epoch": 0.34332425068119893, "grad_norm": 2.988420009613037, "learning_rate": 1.8983527037983606e-05, "loss": 1.1513736248016357, "step": 252 }, { "epoch": 0.3446866485013624, "grad_norm": 3.9205222129821777, "learning_rate": 1.8973807171529556e-05, "loss": 0.9852898120880127, "step": 253 }, { "epoch": 0.3460490463215259, "grad_norm": 5.6089558601379395, "learning_rate": 1.8964043566311942e-05, "loss": 1.379655361175537, "step": 254 }, { "epoch": 0.3474114441416894, "grad_norm": 2.8041203022003174, "learning_rate": 1.8954236269919026e-05, "loss": 0.9801149964332581, "step": 255 }, { "epoch": 0.34877384196185285, "grad_norm": 6.235827922821045, "learning_rate": 1.8944385330152047e-05, "loss": 1.6932134628295898, "step": 256 }, { "epoch": 0.35013623978201636, "grad_norm": 8.651252746582031, "learning_rate": 1.893449079502495e-05, "loss": 1.4605860710144043, "step": 257 }, { "epoch": 0.35149863760217986, "grad_norm": 203.2852325439453, "learning_rate": 1.892455271276418e-05, "loss": 1.416308879852295, "step": 258 }, { "epoch": 0.3528610354223433, "grad_norm": 3.8215627670288086, "learning_rate": 1.8914571131808407e-05, "loss": 0.7999370098114014, "step": 259 }, { "epoch": 0.3542234332425068, "grad_norm": 57.537296295166016, "learning_rate": 1.8904546100808346e-05, "loss": 1.4765617847442627, "step": 260 }, { "epoch": 0.3555858310626703, "grad_norm": 29.82985496520996, "learning_rate": 1.889447766862647e-05, "loss": 2.0698375701904297, "step": 261 }, { "epoch": 0.3569482288828338, "grad_norm": 7.342774868011475, "learning_rate": 1.8884365884336796e-05, "loss": 1.3249305486679077, "step": 262 }, { "epoch": 0.3583106267029973, "grad_norm": 7.754505634307861, "learning_rate": 1.8874210797224646e-05, "loss": 1.6526285409927368, "step": 263 }, { "epoch": 0.35967302452316074, "grad_norm": 246.0274658203125, "learning_rate": 1.8864012456786397e-05, "loss": 1.471924901008606, "step": 264 }, { "epoch": 0.36103542234332425, "grad_norm": 4.88141393661499, "learning_rate": 1.8853770912729243e-05, "loss": 1.3805925846099854, "step": 265 }, { "epoch": 0.36239782016348776, "grad_norm": 204.0898895263672, "learning_rate": 1.884348621497096e-05, "loss": 1.5441490411758423, "step": 266 }, { "epoch": 0.3637602179836512, "grad_norm": 4.585450172424316, "learning_rate": 1.8833158413639656e-05, "loss": 1.5545108318328857, "step": 267 }, { "epoch": 0.3651226158038147, "grad_norm": 2.8275651931762695, "learning_rate": 1.8822787559073522e-05, "loss": 1.2571587562561035, "step": 268 }, { "epoch": 0.36648501362397823, "grad_norm": 5.387750625610352, "learning_rate": 1.8812373701820603e-05, "loss": 0.8217707872390747, "step": 269 }, { "epoch": 0.3678474114441417, "grad_norm": 2.5909345149993896, "learning_rate": 1.8801916892638533e-05, "loss": 1.4382812976837158, "step": 270 }, { "epoch": 0.3692098092643052, "grad_norm": 2.748065948486328, "learning_rate": 1.8791417182494296e-05, "loss": 0.9497096538543701, "step": 271 }, { "epoch": 0.37057220708446864, "grad_norm": 12.766650199890137, "learning_rate": 1.878087462256398e-05, "loss": 1.7209450006484985, "step": 272 }, { "epoch": 0.37193460490463215, "grad_norm": 10.629404067993164, "learning_rate": 1.8770289264232526e-05, "loss": 1.471362590789795, "step": 273 }, { "epoch": 0.37329700272479566, "grad_norm": 2.607900619506836, "learning_rate": 1.875966115909347e-05, "loss": 1.2400625944137573, "step": 274 }, { "epoch": 0.3746594005449591, "grad_norm": 5.7553558349609375, "learning_rate": 1.8748990358948713e-05, "loss": 1.7980198860168457, "step": 275 }, { "epoch": 0.3760217983651226, "grad_norm": 2.295656442642212, "learning_rate": 1.8738276915808232e-05, "loss": 1.5914705991744995, "step": 276 }, { "epoch": 0.3773841961852861, "grad_norm": 4.874490261077881, "learning_rate": 1.8727520881889865e-05, "loss": 1.1869215965270996, "step": 277 }, { "epoch": 0.3787465940054496, "grad_norm": 4.679964065551758, "learning_rate": 1.8716722309619033e-05, "loss": 1.699357032775879, "step": 278 }, { "epoch": 0.3801089918256131, "grad_norm": 2.1324944496154785, "learning_rate": 1.870588125162849e-05, "loss": 1.3925402164459229, "step": 279 }, { "epoch": 0.3814713896457766, "grad_norm": 2.7246155738830566, "learning_rate": 1.8694997760758073e-05, "loss": 1.0654032230377197, "step": 280 }, { "epoch": 0.38283378746594005, "grad_norm": 2.5401268005371094, "learning_rate": 1.8684071890054425e-05, "loss": 1.4932522773742676, "step": 281 }, { "epoch": 0.38419618528610355, "grad_norm": 11.038225173950195, "learning_rate": 1.8673103692770772e-05, "loss": 1.8077526092529297, "step": 282 }, { "epoch": 0.385558583106267, "grad_norm": 2.026712656021118, "learning_rate": 1.8662093222366623e-05, "loss": 0.9147944450378418, "step": 283 }, { "epoch": 0.3869209809264305, "grad_norm": 4.215963363647461, "learning_rate": 1.8651040532507538e-05, "loss": 1.2288663387298584, "step": 284 }, { "epoch": 0.388283378746594, "grad_norm": 5.5508952140808105, "learning_rate": 1.863994567706485e-05, "loss": 1.3186817169189453, "step": 285 }, { "epoch": 0.3896457765667575, "grad_norm": 3.0306971073150635, "learning_rate": 1.8628808710115417e-05, "loss": 0.9942561388015747, "step": 286 }, { "epoch": 0.391008174386921, "grad_norm": 4.588597774505615, "learning_rate": 1.861762968594135e-05, "loss": 1.019487977027893, "step": 287 }, { "epoch": 0.3923705722070845, "grad_norm": 2.2577602863311768, "learning_rate": 1.8606408659029736e-05, "loss": 1.1410129070281982, "step": 288 }, { "epoch": 0.39373297002724794, "grad_norm": 1.677466869354248, "learning_rate": 1.8595145684072398e-05, "loss": 0.9466380476951599, "step": 289 }, { "epoch": 0.39509536784741145, "grad_norm": 2.5543346405029297, "learning_rate": 1.8583840815965614e-05, "loss": 0.8839404582977295, "step": 290 }, { "epoch": 0.39645776566757496, "grad_norm": 2.7717044353485107, "learning_rate": 1.8572494109809852e-05, "loss": 0.7279094457626343, "step": 291 }, { "epoch": 0.3978201634877384, "grad_norm": 4.991819381713867, "learning_rate": 1.856110562090949e-05, "loss": 1.358851671218872, "step": 292 }, { "epoch": 0.3991825613079019, "grad_norm": 8.582398414611816, "learning_rate": 1.8549675404772574e-05, "loss": 2.270228862762451, "step": 293 }, { "epoch": 0.40054495912806537, "grad_norm": 5.452423095703125, "learning_rate": 1.853820351711052e-05, "loss": 1.3966484069824219, "step": 294 }, { "epoch": 0.4019073569482289, "grad_norm": 10.26943588256836, "learning_rate": 1.852669001383785e-05, "loss": 1.6725530624389648, "step": 295 }, { "epoch": 0.4032697547683924, "grad_norm": 2.4113173484802246, "learning_rate": 1.8515134951071932e-05, "loss": 1.4799284934997559, "step": 296 }, { "epoch": 0.40463215258855584, "grad_norm": 6.40676736831665, "learning_rate": 1.8503538385132692e-05, "loss": 1.7502304315567017, "step": 297 }, { "epoch": 0.40599455040871935, "grad_norm": 26.85030174255371, "learning_rate": 1.849190037254234e-05, "loss": 1.0998108386993408, "step": 298 }, { "epoch": 0.40735694822888285, "grad_norm": 46.117408752441406, "learning_rate": 1.8480220970025114e-05, "loss": 1.7254866361618042, "step": 299 }, { "epoch": 0.4087193460490463, "grad_norm": 2.1561241149902344, "learning_rate": 1.8468500234506965e-05, "loss": 1.8060368299484253, "step": 300 }, { "epoch": 0.4100817438692098, "grad_norm": 2.911719560623169, "learning_rate": 1.8456738223115325e-05, "loss": 1.8339844942092896, "step": 301 }, { "epoch": 0.4114441416893733, "grad_norm": 3.1277239322662354, "learning_rate": 1.8444934993178796e-05, "loss": 0.6663273572921753, "step": 302 }, { "epoch": 0.4128065395095368, "grad_norm": 1.8254786729812622, "learning_rate": 1.843309060222688e-05, "loss": 1.3221614360809326, "step": 303 }, { "epoch": 0.4141689373297003, "grad_norm": 22.787841796875, "learning_rate": 1.8421205107989707e-05, "loss": 2.173891544342041, "step": 304 }, { "epoch": 0.41553133514986373, "grad_norm": 1.509688138961792, "learning_rate": 1.8409278568397742e-05, "loss": 0.9460150599479675, "step": 305 }, { "epoch": 0.41689373297002724, "grad_norm": 6.455505847930908, "learning_rate": 1.83973110415815e-05, "loss": 1.5562154054641724, "step": 306 }, { "epoch": 0.41825613079019075, "grad_norm": 1.5721864700317383, "learning_rate": 1.8385302585871284e-05, "loss": 0.8800030946731567, "step": 307 }, { "epoch": 0.4196185286103542, "grad_norm": 4.211442470550537, "learning_rate": 1.8373253259796877e-05, "loss": 1.1302986145019531, "step": 308 }, { "epoch": 0.4209809264305177, "grad_norm": 4.3167405128479, "learning_rate": 1.8361163122087265e-05, "loss": 1.0757460594177246, "step": 309 }, { "epoch": 0.4223433242506812, "grad_norm": 3.2106773853302, "learning_rate": 1.8349032231670363e-05, "loss": 0.8396819829940796, "step": 310 }, { "epoch": 0.42370572207084467, "grad_norm": 8.29305362701416, "learning_rate": 1.8336860647672702e-05, "loss": 1.8413667678833008, "step": 311 }, { "epoch": 0.4250681198910082, "grad_norm": 3.183060884475708, "learning_rate": 1.8324648429419164e-05, "loss": 1.0207281112670898, "step": 312 }, { "epoch": 0.4264305177111717, "grad_norm": 1.3169615268707275, "learning_rate": 1.831239563643268e-05, "loss": 0.8578565120697021, "step": 313 }, { "epoch": 0.42779291553133514, "grad_norm": 1.8172861337661743, "learning_rate": 1.8300102328433952e-05, "loss": 1.0093719959259033, "step": 314 }, { "epoch": 0.42915531335149865, "grad_norm": 6.301525592803955, "learning_rate": 1.8287768565341143e-05, "loss": 2.1235768795013428, "step": 315 }, { "epoch": 0.4305177111716621, "grad_norm": 1.1219329833984375, "learning_rate": 1.82753944072696e-05, "loss": 0.9537709951400757, "step": 316 }, { "epoch": 0.4318801089918256, "grad_norm": 2.587090492248535, "learning_rate": 1.826297991453157e-05, "loss": 1.4001516103744507, "step": 317 }, { "epoch": 0.4332425068119891, "grad_norm": 1.3578035831451416, "learning_rate": 1.8250525147635873e-05, "loss": 1.5081818103790283, "step": 318 }, { "epoch": 0.43460490463215257, "grad_norm": 1.6829197406768799, "learning_rate": 1.8238030167287638e-05, "loss": 2.0029025077819824, "step": 319 }, { "epoch": 0.4359673024523161, "grad_norm": 4.22566556930542, "learning_rate": 1.8225495034387996e-05, "loss": 1.2824198007583618, "step": 320 }, { "epoch": 0.4373297002724796, "grad_norm": 6.20258092880249, "learning_rate": 1.8212919810033777e-05, "loss": 1.1309611797332764, "step": 321 }, { "epoch": 0.43869209809264303, "grad_norm": 4.849828720092773, "learning_rate": 1.820030455551723e-05, "loss": 0.8764885663986206, "step": 322 }, { "epoch": 0.44005449591280654, "grad_norm": 3.3712899684906006, "learning_rate": 1.8187649332325702e-05, "loss": 1.3626868724822998, "step": 323 }, { "epoch": 0.44141689373297005, "grad_norm": 1.332635760307312, "learning_rate": 1.8174954202141352e-05, "loss": 1.1287131309509277, "step": 324 }, { "epoch": 0.4427792915531335, "grad_norm": 1.404646635055542, "learning_rate": 1.8162219226840857e-05, "loss": 1.0553526878356934, "step": 325 }, { "epoch": 0.444141689373297, "grad_norm": 3.747934579849243, "learning_rate": 1.814944446849508e-05, "loss": 1.537015676498413, "step": 326 }, { "epoch": 0.44550408719346046, "grad_norm": 6.1423468589782715, "learning_rate": 1.8136629989368815e-05, "loss": 2.159648895263672, "step": 327 }, { "epoch": 0.44686648501362397, "grad_norm": 3.0745179653167725, "learning_rate": 1.8123775851920438e-05, "loss": 0.8884219527244568, "step": 328 }, { "epoch": 0.4482288828337875, "grad_norm": 2.672372579574585, "learning_rate": 1.8110882118801633e-05, "loss": 1.4532339572906494, "step": 329 }, { "epoch": 0.44959128065395093, "grad_norm": 1.6655161380767822, "learning_rate": 1.8097948852857054e-05, "loss": 1.3143253326416016, "step": 330 }, { "epoch": 0.45095367847411444, "grad_norm": 1.7594853639602661, "learning_rate": 1.8084976117124072e-05, "loss": 0.8232778310775757, "step": 331 }, { "epoch": 0.45231607629427795, "grad_norm": 1.1349751949310303, "learning_rate": 1.807196397483241e-05, "loss": 0.7802037000656128, "step": 332 }, { "epoch": 0.4536784741144414, "grad_norm": 1.8786836862564087, "learning_rate": 1.8058912489403867e-05, "loss": 1.478520393371582, "step": 333 }, { "epoch": 0.4550408719346049, "grad_norm": 1.5663573741912842, "learning_rate": 1.804582172445201e-05, "loss": 1.3901585340499878, "step": 334 }, { "epoch": 0.4564032697547684, "grad_norm": 1.0753687620162964, "learning_rate": 1.8032691743781853e-05, "loss": 1.4070327281951904, "step": 335 }, { "epoch": 0.45776566757493187, "grad_norm": 1.1969891786575317, "learning_rate": 1.8019522611389543e-05, "loss": 1.2074099779129028, "step": 336 }, { "epoch": 0.4591280653950954, "grad_norm": 2.7641990184783936, "learning_rate": 1.8006314391462056e-05, "loss": 0.900089681148529, "step": 337 }, { "epoch": 0.4604904632152589, "grad_norm": 9.13116455078125, "learning_rate": 1.799306714837689e-05, "loss": 2.066049575805664, "step": 338 }, { "epoch": 0.46185286103542234, "grad_norm": 1.515107274055481, "learning_rate": 1.7979780946701737e-05, "loss": 1.5054748058319092, "step": 339 }, { "epoch": 0.46321525885558584, "grad_norm": 1.4880552291870117, "learning_rate": 1.7966455851194178e-05, "loss": 1.3471062183380127, "step": 340 }, { "epoch": 0.4645776566757493, "grad_norm": 3.1846203804016113, "learning_rate": 1.795309192680136e-05, "loss": 1.5420103073120117, "step": 341 }, { "epoch": 0.4659400544959128, "grad_norm": 1.4495476484298706, "learning_rate": 1.7939689238659692e-05, "loss": 1.2054235935211182, "step": 342 }, { "epoch": 0.4673024523160763, "grad_norm": 3.4376769065856934, "learning_rate": 1.792624785209451e-05, "loss": 0.6462281942367554, "step": 343 }, { "epoch": 0.46866485013623976, "grad_norm": 1.408954381942749, "learning_rate": 1.7912767832619776e-05, "loss": 1.1705362796783447, "step": 344 }, { "epoch": 0.47002724795640327, "grad_norm": 3.7647786140441895, "learning_rate": 1.789924924593774e-05, "loss": 1.756138563156128, "step": 345 }, { "epoch": 0.4713896457765668, "grad_norm": 1.702545166015625, "learning_rate": 1.7885692157938646e-05, "loss": 1.3937970399856567, "step": 346 }, { "epoch": 0.47275204359673023, "grad_norm": 2.139781951904297, "learning_rate": 1.787209663470038e-05, "loss": 1.4223475456237793, "step": 347 }, { "epoch": 0.47411444141689374, "grad_norm": 1.3482489585876465, "learning_rate": 1.7858462742488175e-05, "loss": 1.4907723665237427, "step": 348 }, { "epoch": 0.47547683923705725, "grad_norm": 1.1381847858428955, "learning_rate": 1.7844790547754264e-05, "loss": 1.142978549003601, "step": 349 }, { "epoch": 0.4768392370572207, "grad_norm": 1.6444528102874756, "learning_rate": 1.7831080117137584e-05, "loss": 1.2124494314193726, "step": 350 }, { "epoch": 0.4782016348773842, "grad_norm": 1.0980571508407593, "learning_rate": 1.781733151746342e-05, "loss": 1.380192756652832, "step": 351 }, { "epoch": 0.47956403269754766, "grad_norm": 1.9528623819351196, "learning_rate": 1.7803544815743107e-05, "loss": 1.403810977935791, "step": 352 }, { "epoch": 0.48092643051771117, "grad_norm": 2.6098921298980713, "learning_rate": 1.7789720079173682e-05, "loss": 1.5473246574401855, "step": 353 }, { "epoch": 0.4822888283378747, "grad_norm": 2.844017505645752, "learning_rate": 1.777585737513757e-05, "loss": 1.1214189529418945, "step": 354 }, { "epoch": 0.48365122615803813, "grad_norm": 3.102412223815918, "learning_rate": 1.7761956771202255e-05, "loss": 0.7656224966049194, "step": 355 }, { "epoch": 0.48501362397820164, "grad_norm": 1.7356327772140503, "learning_rate": 1.7748018335119935e-05, "loss": 1.7529077529907227, "step": 356 }, { "epoch": 0.48637602179836514, "grad_norm": 1.4373060464859009, "learning_rate": 1.7734042134827216e-05, "loss": 1.6099027395248413, "step": 357 }, { "epoch": 0.4877384196185286, "grad_norm": 1.6830295324325562, "learning_rate": 1.772002823844476e-05, "loss": 1.5444934368133545, "step": 358 }, { "epoch": 0.4891008174386921, "grad_norm": 2.1587584018707275, "learning_rate": 1.7705976714276976e-05, "loss": 1.4240455627441406, "step": 359 }, { "epoch": 0.4904632152588556, "grad_norm": 1.5211689472198486, "learning_rate": 1.7691887630811653e-05, "loss": 1.1710970401763916, "step": 360 }, { "epoch": 0.49182561307901906, "grad_norm": 1.578305959701538, "learning_rate": 1.7677761056719652e-05, "loss": 1.0757114887237549, "step": 361 }, { "epoch": 0.49318801089918257, "grad_norm": 1.1681820154190063, "learning_rate": 1.7663597060854577e-05, "loss": 1.0990022420883179, "step": 362 }, { "epoch": 0.494550408719346, "grad_norm": 1.9169180393218994, "learning_rate": 1.764939571225241e-05, "loss": 1.112961769104004, "step": 363 }, { "epoch": 0.49591280653950953, "grad_norm": 2.5654072761535645, "learning_rate": 1.763515708013121e-05, "loss": 1.0364793539047241, "step": 364 }, { "epoch": 0.49727520435967304, "grad_norm": 1.265934705734253, "learning_rate": 1.762088123389074e-05, "loss": 1.0109307765960693, "step": 365 }, { "epoch": 0.4986376021798365, "grad_norm": 1.5466065406799316, "learning_rate": 1.760656824311216e-05, "loss": 0.7269711494445801, "step": 366 }, { "epoch": 0.5, "grad_norm": 3.24131178855896, "learning_rate": 1.7592218177557662e-05, "loss": 1.8510479927062988, "step": 367 }, { "epoch": 0.5013623978201635, "grad_norm": 3.927333354949951, "learning_rate": 1.7577831107170157e-05, "loss": 1.7095143795013428, "step": 368 }, { "epoch": 0.502724795640327, "grad_norm": 1.1513056755065918, "learning_rate": 1.7563407102072902e-05, "loss": 1.1286771297454834, "step": 369 }, { "epoch": 0.5040871934604905, "grad_norm": 2.1706907749176025, "learning_rate": 1.7548946232569196e-05, "loss": 1.336264729499817, "step": 370 }, { "epoch": 0.5054495912806539, "grad_norm": 1.24580717086792, "learning_rate": 1.7534448569141997e-05, "loss": 1.3364312648773193, "step": 371 }, { "epoch": 0.5068119891008175, "grad_norm": 1.4906052350997925, "learning_rate": 1.751991418245361e-05, "loss": 1.606203556060791, "step": 372 }, { "epoch": 0.5081743869209809, "grad_norm": 3.8518126010894775, "learning_rate": 1.7505343143345328e-05, "loss": 1.0533281564712524, "step": 373 }, { "epoch": 0.5095367847411444, "grad_norm": 3.1790931224823, "learning_rate": 1.749073552283709e-05, "loss": 1.0586538314819336, "step": 374 }, { "epoch": 0.510899182561308, "grad_norm": 2.072589874267578, "learning_rate": 1.7476091392127132e-05, "loss": 1.198589563369751, "step": 375 }, { "epoch": 0.5122615803814714, "grad_norm": 1.5832409858703613, "learning_rate": 1.746141082259165e-05, "loss": 1.1132609844207764, "step": 376 }, { "epoch": 0.5136239782016349, "grad_norm": 1.2291409969329834, "learning_rate": 1.7446693885784435e-05, "loss": 0.8886187076568604, "step": 377 }, { "epoch": 0.5149863760217984, "grad_norm": 1.558626413345337, "learning_rate": 1.7431940653436538e-05, "loss": 0.9443507194519043, "step": 378 }, { "epoch": 0.5163487738419619, "grad_norm": 1.1366232633590698, "learning_rate": 1.7417151197455915e-05, "loss": 0.7520076036453247, "step": 379 }, { "epoch": 0.5177111716621253, "grad_norm": 3.902540922164917, "learning_rate": 1.740232558992708e-05, "loss": 1.5834012031555176, "step": 380 }, { "epoch": 0.5190735694822888, "grad_norm": 3.728201150894165, "learning_rate": 1.738746390311075e-05, "loss": 1.4345779418945312, "step": 381 }, { "epoch": 0.5204359673024523, "grad_norm": 5.438842296600342, "learning_rate": 1.7372566209443496e-05, "loss": 2.118577003479004, "step": 382 }, { "epoch": 0.5217983651226158, "grad_norm": 2.306042194366455, "learning_rate": 1.735763258153739e-05, "loss": 1.2179433107376099, "step": 383 }, { "epoch": 0.5231607629427792, "grad_norm": 1.6707911491394043, "learning_rate": 1.7342663092179636e-05, "loss": 1.4700546264648438, "step": 384 }, { "epoch": 0.5245231607629428, "grad_norm": 2.1553244590759277, "learning_rate": 1.7327657814332247e-05, "loss": 1.8596495389938354, "step": 385 }, { "epoch": 0.5258855585831063, "grad_norm": 3.7728042602539062, "learning_rate": 1.7312616821131657e-05, "loss": 1.0395113229751587, "step": 386 }, { "epoch": 0.5272479564032697, "grad_norm": 4.194542407989502, "learning_rate": 1.729754018588838e-05, "loss": 0.8783695697784424, "step": 387 }, { "epoch": 0.5286103542234333, "grad_norm": 1.5108283758163452, "learning_rate": 1.728242798208666e-05, "loss": 1.372223138809204, "step": 388 }, { "epoch": 0.5299727520435967, "grad_norm": 3.1199631690979004, "learning_rate": 1.7267280283384104e-05, "loss": 1.200405240058899, "step": 389 }, { "epoch": 0.5313351498637602, "grad_norm": 6.30712366104126, "learning_rate": 1.7252097163611304e-05, "loss": 1.2734204530715942, "step": 390 }, { "epoch": 0.5326975476839237, "grad_norm": 22.904071807861328, "learning_rate": 1.723687869677152e-05, "loss": 1.6886060237884521, "step": 391 }, { "epoch": 0.5340599455040872, "grad_norm": 2.029233455657959, "learning_rate": 1.7221624957040274e-05, "loss": 0.6093384027481079, "step": 392 }, { "epoch": 0.5354223433242506, "grad_norm": 3.1741232872009277, "learning_rate": 1.7206336018765026e-05, "loss": 1.1582179069519043, "step": 393 }, { "epoch": 0.5367847411444142, "grad_norm": 9.164698600769043, "learning_rate": 1.7191011956464788e-05, "loss": 1.7413139343261719, "step": 394 }, { "epoch": 0.5381471389645777, "grad_norm": 2.120776653289795, "learning_rate": 1.717565284482977e-05, "loss": 0.8467795848846436, "step": 395 }, { "epoch": 0.5395095367847411, "grad_norm": 73.94832611083984, "learning_rate": 1.7160258758721015e-05, "loss": 2.1446962356567383, "step": 396 }, { "epoch": 0.5408719346049047, "grad_norm": 5.396602630615234, "learning_rate": 1.714482977317003e-05, "loss": 1.2803164720535278, "step": 397 }, { "epoch": 0.5422343324250681, "grad_norm": 6.618705749511719, "learning_rate": 1.7129365963378428e-05, "loss": 1.5739619731903076, "step": 398 }, { "epoch": 0.5435967302452316, "grad_norm": 4.682571887969971, "learning_rate": 1.711386740471755e-05, "loss": 1.3818941116333008, "step": 399 }, { "epoch": 0.5449591280653951, "grad_norm": 1.999107003211975, "learning_rate": 1.7098334172728112e-05, "loss": 1.2089521884918213, "step": 400 }, { "epoch": 0.5463215258855586, "grad_norm": 2.585503578186035, "learning_rate": 1.7082766343119822e-05, "loss": 1.3988025188446045, "step": 401 }, { "epoch": 0.547683923705722, "grad_norm": 16.858793258666992, "learning_rate": 1.706716399177103e-05, "loss": 1.2364073991775513, "step": 402 }, { "epoch": 0.5490463215258855, "grad_norm": 10.500734329223633, "learning_rate": 1.7051527194728343e-05, "loss": 2.1667866706848145, "step": 403 }, { "epoch": 0.5504087193460491, "grad_norm": 2.454922914505005, "learning_rate": 1.703585602820624e-05, "loss": 0.8129177093505859, "step": 404 }, { "epoch": 0.5517711171662125, "grad_norm": 24.675334930419922, "learning_rate": 1.7020150568586743e-05, "loss": 1.167266845703125, "step": 405 }, { "epoch": 0.553133514986376, "grad_norm": 2.790771961212158, "learning_rate": 1.7004410892419012e-05, "loss": 1.2973177433013916, "step": 406 }, { "epoch": 0.5544959128065395, "grad_norm": 1.9675254821777344, "learning_rate": 1.698863707641897e-05, "loss": 1.2042503356933594, "step": 407 }, { "epoch": 0.555858310626703, "grad_norm": 2.570556879043579, "learning_rate": 1.6972829197468958e-05, "loss": 1.3606789112091064, "step": 408 }, { "epoch": 0.5572207084468664, "grad_norm": 2.5950911045074463, "learning_rate": 1.695698733261732e-05, "loss": 1.7616398334503174, "step": 409 }, { "epoch": 0.55858310626703, "grad_norm": 2.4683210849761963, "learning_rate": 1.694111155907807e-05, "loss": 2.420485019683838, "step": 410 }, { "epoch": 0.5599455040871935, "grad_norm": 1.4973143339157104, "learning_rate": 1.6925201954230474e-05, "loss": 1.0399372577667236, "step": 411 }, { "epoch": 0.5613079019073569, "grad_norm": 2.037031888961792, "learning_rate": 1.690925859561871e-05, "loss": 1.4002578258514404, "step": 412 }, { "epoch": 0.5626702997275205, "grad_norm": 1.2240928411483765, "learning_rate": 1.689328156095147e-05, "loss": 0.8805115818977356, "step": 413 }, { "epoch": 0.5640326975476839, "grad_norm": 2.608114242553711, "learning_rate": 1.6877270928101573e-05, "loss": 1.6550626754760742, "step": 414 }, { "epoch": 0.5653950953678474, "grad_norm": 2.1926870346069336, "learning_rate": 1.6861226775105618e-05, "loss": 1.293431043624878, "step": 415 }, { "epoch": 0.5667574931880109, "grad_norm": 1.314041018486023, "learning_rate": 1.684514918016356e-05, "loss": 1.0999348163604736, "step": 416 }, { "epoch": 0.5681198910081744, "grad_norm": 2.7356832027435303, "learning_rate": 1.6829038221638366e-05, "loss": 1.6221911907196045, "step": 417 }, { "epoch": 0.5694822888283378, "grad_norm": 1.3285125494003296, "learning_rate": 1.681289397805562e-05, "loss": 1.3582847118377686, "step": 418 }, { "epoch": 0.5708446866485014, "grad_norm": 4.270798206329346, "learning_rate": 1.6796716528103127e-05, "loss": 1.5273692607879639, "step": 419 }, { "epoch": 0.5722070844686649, "grad_norm": 57.48065185546875, "learning_rate": 1.6780505950630552e-05, "loss": 1.375321865081787, "step": 420 }, { "epoch": 0.5735694822888283, "grad_norm": 2.9992501735687256, "learning_rate": 1.6764262324649024e-05, "loss": 0.5811352729797363, "step": 421 }, { "epoch": 0.5749318801089919, "grad_norm": 2.0361642837524414, "learning_rate": 1.674798572933075e-05, "loss": 1.5174281597137451, "step": 422 }, { "epoch": 0.5762942779291553, "grad_norm": 1.7866727113723755, "learning_rate": 1.6731676244008622e-05, "loss": 1.2192513942718506, "step": 423 }, { "epoch": 0.5776566757493188, "grad_norm": 4.030516624450684, "learning_rate": 1.6715333948175857e-05, "loss": 1.7085566520690918, "step": 424 }, { "epoch": 0.5790190735694822, "grad_norm": 1.27426278591156, "learning_rate": 1.6698958921485577e-05, "loss": 1.1156595945358276, "step": 425 }, { "epoch": 0.5803814713896458, "grad_norm": 1.4479507207870483, "learning_rate": 1.668255124375045e-05, "loss": 1.0298731327056885, "step": 426 }, { "epoch": 0.5817438692098093, "grad_norm": 1.751955509185791, "learning_rate": 1.6666110994942274e-05, "loss": 1.1680959463119507, "step": 427 }, { "epoch": 0.5831062670299727, "grad_norm": 5.590847015380859, "learning_rate": 1.6649638255191604e-05, "loss": 1.5689997673034668, "step": 428 }, { "epoch": 0.5844686648501363, "grad_norm": 4.874299049377441, "learning_rate": 1.663313310478736e-05, "loss": 1.5476088523864746, "step": 429 }, { "epoch": 0.5858310626702997, "grad_norm": 6.349812030792236, "learning_rate": 1.661659562417643e-05, "loss": 1.631278157234192, "step": 430 }, { "epoch": 0.5871934604904632, "grad_norm": 3.4641454219818115, "learning_rate": 1.660002589396328e-05, "loss": 1.3774057626724243, "step": 431 }, { "epoch": 0.5885558583106267, "grad_norm": 2.751311779022217, "learning_rate": 1.6583423994909573e-05, "loss": 1.8310673236846924, "step": 432 }, { "epoch": 0.5899182561307902, "grad_norm": 1.3634949922561646, "learning_rate": 1.6566790007933746e-05, "loss": 1.333249568939209, "step": 433 }, { "epoch": 0.5912806539509536, "grad_norm": 2.1073403358459473, "learning_rate": 1.6550124014110646e-05, "loss": 1.9114727973937988, "step": 434 }, { "epoch": 0.5926430517711172, "grad_norm": 1.7746835947036743, "learning_rate": 1.6533426094671125e-05, "loss": 1.6237621307373047, "step": 435 }, { "epoch": 0.5940054495912807, "grad_norm": 23.14777946472168, "learning_rate": 1.651669633100163e-05, "loss": 1.3902943134307861, "step": 436 }, { "epoch": 0.5953678474114441, "grad_norm": 2.367368698120117, "learning_rate": 1.6499934804643838e-05, "loss": 1.023665189743042, "step": 437 }, { "epoch": 0.5967302452316077, "grad_norm": 3.190218448638916, "learning_rate": 1.6483141597294214e-05, "loss": 1.045141577720642, "step": 438 }, { "epoch": 0.5980926430517711, "grad_norm": 2.3211097717285156, "learning_rate": 1.646631679080366e-05, "loss": 0.9104323387145996, "step": 439 }, { "epoch": 0.5994550408719346, "grad_norm": 4.2504191398620605, "learning_rate": 1.6449460467177078e-05, "loss": 1.1637187004089355, "step": 440 }, { "epoch": 0.6008174386920981, "grad_norm": 4.436872959136963, "learning_rate": 1.6432572708572997e-05, "loss": 1.2566183805465698, "step": 441 }, { "epoch": 0.6021798365122616, "grad_norm": 1.6939139366149902, "learning_rate": 1.641565359730315e-05, "loss": 1.4174838066101074, "step": 442 }, { "epoch": 0.603542234332425, "grad_norm": 1.617443323135376, "learning_rate": 1.6398703215832097e-05, "loss": 1.1487817764282227, "step": 443 }, { "epoch": 0.6049046321525886, "grad_norm": 4.588049411773682, "learning_rate": 1.6381721646776805e-05, "loss": 1.5958579778671265, "step": 444 }, { "epoch": 0.6062670299727521, "grad_norm": 4.323416233062744, "learning_rate": 1.6364708972906246e-05, "loss": 1.051882028579712, "step": 445 }, { "epoch": 0.6076294277929155, "grad_norm": 1.5835697650909424, "learning_rate": 1.6347665277141005e-05, "loss": 1.078721046447754, "step": 446 }, { "epoch": 0.6089918256130791, "grad_norm": 1.369110107421875, "learning_rate": 1.6330590642552867e-05, "loss": 1.0987346172332764, "step": 447 }, { "epoch": 0.6103542234332425, "grad_norm": 4.262504577636719, "learning_rate": 1.6313485152364417e-05, "loss": 1.1835674047470093, "step": 448 }, { "epoch": 0.611716621253406, "grad_norm": 4.475899696350098, "learning_rate": 1.6296348889948627e-05, "loss": 1.7073475122451782, "step": 449 }, { "epoch": 0.6130790190735694, "grad_norm": 2.032668352127075, "learning_rate": 1.627918193882845e-05, "loss": 1.2458873987197876, "step": 450 }, { "epoch": 0.614441416893733, "grad_norm": 1.8245234489440918, "learning_rate": 1.6261984382676432e-05, "loss": 1.069715976715088, "step": 451 }, { "epoch": 0.6158038147138964, "grad_norm": 3.338745355606079, "learning_rate": 1.624475630531428e-05, "loss": 1.2043849229812622, "step": 452 }, { "epoch": 0.6171662125340599, "grad_norm": 3.005784749984741, "learning_rate": 1.6227497790712458e-05, "loss": 1.150783658027649, "step": 453 }, { "epoch": 0.6185286103542235, "grad_norm": 2.092616081237793, "learning_rate": 1.621020892298979e-05, "loss": 1.0814651250839233, "step": 454 }, { "epoch": 0.6198910081743869, "grad_norm": 5.8309526443481445, "learning_rate": 1.6192889786413048e-05, "loss": 1.5327882766723633, "step": 455 }, { "epoch": 0.6212534059945504, "grad_norm": 4.993339538574219, "learning_rate": 1.617554046539652e-05, "loss": 2.296293258666992, "step": 456 }, { "epoch": 0.6226158038147139, "grad_norm": 3.412890911102295, "learning_rate": 1.6158161044501624e-05, "loss": 1.2551473379135132, "step": 457 }, { "epoch": 0.6239782016348774, "grad_norm": 4.109783172607422, "learning_rate": 1.6140751608436487e-05, "loss": 1.5705320835113525, "step": 458 }, { "epoch": 0.6253405994550408, "grad_norm": 19.845062255859375, "learning_rate": 1.6123312242055533e-05, "loss": 2.2374026775360107, "step": 459 }, { "epoch": 0.6267029972752044, "grad_norm": 11.941446304321289, "learning_rate": 1.6105843030359055e-05, "loss": 1.2218310832977295, "step": 460 }, { "epoch": 0.6280653950953679, "grad_norm": 19.218975067138672, "learning_rate": 1.6088344058492836e-05, "loss": 1.6118953227996826, "step": 461 }, { "epoch": 0.6294277929155313, "grad_norm": 16.879228591918945, "learning_rate": 1.6070815411747686e-05, "loss": 1.7829091548919678, "step": 462 }, { "epoch": 0.6307901907356949, "grad_norm": 3.5677173137664795, "learning_rate": 1.6053257175559074e-05, "loss": 0.915998101234436, "step": 463 }, { "epoch": 0.6321525885558583, "grad_norm": 21.6722412109375, "learning_rate": 1.6035669435506674e-05, "loss": 1.2303714752197266, "step": 464 }, { "epoch": 0.6335149863760218, "grad_norm": 3.3858675956726074, "learning_rate": 1.6018052277313966e-05, "loss": 1.1869078874588013, "step": 465 }, { "epoch": 0.6348773841961853, "grad_norm": 8.933525085449219, "learning_rate": 1.600040578684782e-05, "loss": 1.492060899734497, "step": 466 }, { "epoch": 0.6362397820163488, "grad_norm": 46.667293548583984, "learning_rate": 1.598273005011808e-05, "loss": 1.7147912979125977, "step": 467 }, { "epoch": 0.6376021798365122, "grad_norm": 2.954457998275757, "learning_rate": 1.5965025153277112e-05, "loss": 1.0731861591339111, "step": 468 }, { "epoch": 0.6389645776566758, "grad_norm": 4.583359241485596, "learning_rate": 1.5947291182619444e-05, "loss": 1.9054558277130127, "step": 469 }, { "epoch": 0.6403269754768393, "grad_norm": 4.073808193206787, "learning_rate": 1.5929528224581283e-05, "loss": 1.4747967720031738, "step": 470 }, { "epoch": 0.6416893732970027, "grad_norm": 2.903792381286621, "learning_rate": 1.5911736365740133e-05, "loss": 1.5801494121551514, "step": 471 }, { "epoch": 0.6430517711171662, "grad_norm": 27.286222457885742, "learning_rate": 1.5893915692814365e-05, "loss": 1.7026896476745605, "step": 472 }, { "epoch": 0.6444141689373297, "grad_norm": 6.102408409118652, "learning_rate": 1.5876066292662784e-05, "loss": 1.4940052032470703, "step": 473 }, { "epoch": 0.6457765667574932, "grad_norm": 2.290860176086426, "learning_rate": 1.585818825228422e-05, "loss": 1.1655769348144531, "step": 474 }, { "epoch": 0.6471389645776566, "grad_norm": 112.5971450805664, "learning_rate": 1.5840281658817093e-05, "loss": 1.3027015924453735, "step": 475 }, { "epoch": 0.6485013623978202, "grad_norm": 1.835148811340332, "learning_rate": 1.582234659953899e-05, "loss": 1.2031432390213013, "step": 476 }, { "epoch": 0.6498637602179836, "grad_norm": 5.070309638977051, "learning_rate": 1.5804383161866245e-05, "loss": 1.9298428297042847, "step": 477 }, { "epoch": 0.6512261580381471, "grad_norm": 13.096074104309082, "learning_rate": 1.5786391433353508e-05, "loss": 1.1800572872161865, "step": 478 }, { "epoch": 0.6525885558583107, "grad_norm": 2.888873815536499, "learning_rate": 1.5768371501693326e-05, "loss": 1.5343241691589355, "step": 479 }, { "epoch": 0.6539509536784741, "grad_norm": 3.4073545932769775, "learning_rate": 1.5750323454715696e-05, "loss": 1.093684434890747, "step": 480 }, { "epoch": 0.6553133514986376, "grad_norm": 12.18371295928955, "learning_rate": 1.5732247380387664e-05, "loss": 1.5973427295684814, "step": 481 }, { "epoch": 0.6566757493188011, "grad_norm": 229.597412109375, "learning_rate": 1.5714143366812876e-05, "loss": 1.0154012441635132, "step": 482 }, { "epoch": 0.6580381471389646, "grad_norm": 3.4331278800964355, "learning_rate": 1.5696011502231158e-05, "loss": 1.1458021402359009, "step": 483 }, { "epoch": 0.659400544959128, "grad_norm": 5.65518856048584, "learning_rate": 1.5677851875018076e-05, "loss": 1.131278157234192, "step": 484 }, { "epoch": 0.6607629427792916, "grad_norm": 3.97689151763916, "learning_rate": 1.565966457368453e-05, "loss": 0.9286713600158691, "step": 485 }, { "epoch": 0.662125340599455, "grad_norm": 5.2068586349487305, "learning_rate": 1.564144968687628e-05, "loss": 1.1594562530517578, "step": 486 }, { "epoch": 0.6634877384196185, "grad_norm": 17.21161651611328, "learning_rate": 1.5623207303373553e-05, "loss": 0.9647790193557739, "step": 487 }, { "epoch": 0.6648501362397821, "grad_norm": 3.4987435340881348, "learning_rate": 1.5604937512090602e-05, "loss": 1.23720383644104, "step": 488 }, { "epoch": 0.6662125340599455, "grad_norm": 9.694775581359863, "learning_rate": 1.5586640402075258e-05, "loss": 0.8154966831207275, "step": 489 }, { "epoch": 0.667574931880109, "grad_norm": 6.452354907989502, "learning_rate": 1.5568316062508502e-05, "loss": 1.2906818389892578, "step": 490 }, { "epoch": 0.6689373297002725, "grad_norm": 4.108471393585205, "learning_rate": 1.5549964582704044e-05, "loss": 1.356542944908142, "step": 491 }, { "epoch": 0.670299727520436, "grad_norm": 4.186506748199463, "learning_rate": 1.5531586052107868e-05, "loss": 1.2320566177368164, "step": 492 }, { "epoch": 0.6716621253405994, "grad_norm": 3.0822629928588867, "learning_rate": 1.5513180560297808e-05, "loss": 1.8199759721755981, "step": 493 }, { "epoch": 0.6730245231607629, "grad_norm": 8.514135360717773, "learning_rate": 1.5494748196983106e-05, "loss": 1.3669140338897705, "step": 494 }, { "epoch": 0.6743869209809265, "grad_norm": 2.3871264457702637, "learning_rate": 1.547628905200398e-05, "loss": 1.1539889574050903, "step": 495 }, { "epoch": 0.6757493188010899, "grad_norm": 3.196556329727173, "learning_rate": 1.5457803215331182e-05, "loss": 1.5780242681503296, "step": 496 }, { "epoch": 0.6771117166212534, "grad_norm": 1.7728488445281982, "learning_rate": 1.5439290777065558e-05, "loss": 0.8963057994842529, "step": 497 }, { "epoch": 0.6784741144414169, "grad_norm": 11.758073806762695, "learning_rate": 1.542075182743762e-05, "loss": 1.4622840881347656, "step": 498 }, { "epoch": 0.6798365122615804, "grad_norm": 2.185535192489624, "learning_rate": 1.5402186456807086e-05, "loss": 0.7718188166618347, "step": 499 }, { "epoch": 0.6811989100817438, "grad_norm": 4.886017322540283, "learning_rate": 1.5383594755662453e-05, "loss": 1.8217787742614746, "step": 500 }, { "epoch": 0.6825613079019074, "grad_norm": 2.021327018737793, "learning_rate": 1.5364976814620568e-05, "loss": 1.251737356185913, "step": 501 }, { "epoch": 0.6839237057220708, "grad_norm": 3.880868434906006, "learning_rate": 1.5346332724426155e-05, "loss": 1.578188180923462, "step": 502 }, { "epoch": 0.6852861035422343, "grad_norm": 36.96627426147461, "learning_rate": 1.5327662575951404e-05, "loss": 0.9220302104949951, "step": 503 }, { "epoch": 0.6866485013623979, "grad_norm": 1.9115111827850342, "learning_rate": 1.5308966460195503e-05, "loss": 0.5213961005210876, "step": 504 }, { "epoch": 0.6880108991825613, "grad_norm": 2.0304300785064697, "learning_rate": 1.5290244468284206e-05, "loss": 1.5966366529464722, "step": 505 }, { "epoch": 0.6893732970027248, "grad_norm": 2.941404342651367, "learning_rate": 1.5271496691469404e-05, "loss": 1.2299654483795166, "step": 506 }, { "epoch": 0.6907356948228883, "grad_norm": 24.103221893310547, "learning_rate": 1.525272322112865e-05, "loss": 1.2417209148406982, "step": 507 }, { "epoch": 0.6920980926430518, "grad_norm": 2.5017285346984863, "learning_rate": 1.5233924148764727e-05, "loss": 2.075056314468384, "step": 508 }, { "epoch": 0.6934604904632152, "grad_norm": 1.5615363121032715, "learning_rate": 1.5215099566005217e-05, "loss": 1.3851789236068726, "step": 509 }, { "epoch": 0.6948228882833788, "grad_norm": 4.137636661529541, "learning_rate": 1.519624956460203e-05, "loss": 0.8974437713623047, "step": 510 }, { "epoch": 0.6961852861035422, "grad_norm": 4.469723224639893, "learning_rate": 1.517737423643097e-05, "loss": 0.8670157194137573, "step": 511 }, { "epoch": 0.6975476839237057, "grad_norm": 3.793842315673828, "learning_rate": 1.5158473673491285e-05, "loss": 1.2061010599136353, "step": 512 }, { "epoch": 0.6989100817438693, "grad_norm": 2.1940932273864746, "learning_rate": 1.5139547967905221e-05, "loss": 1.6131023168563843, "step": 513 }, { "epoch": 0.7002724795640327, "grad_norm": 2.7493433952331543, "learning_rate": 1.5120597211917564e-05, "loss": 1.4582409858703613, "step": 514 }, { "epoch": 0.7016348773841962, "grad_norm": 23.74671173095703, "learning_rate": 1.510162149789521e-05, "loss": 1.23945152759552, "step": 515 }, { "epoch": 0.7029972752043597, "grad_norm": 2.1672754287719727, "learning_rate": 1.5082620918326685e-05, "loss": 1.1440198421478271, "step": 516 }, { "epoch": 0.7043596730245232, "grad_norm": 16.29448699951172, "learning_rate": 1.5063595565821721e-05, "loss": 1.3702163696289062, "step": 517 }, { "epoch": 0.7057220708446866, "grad_norm": 2.4655282497406006, "learning_rate": 1.5044545533110793e-05, "loss": 1.2323949337005615, "step": 518 }, { "epoch": 0.7070844686648501, "grad_norm": 4.2790350914001465, "learning_rate": 1.5025470913044666e-05, "loss": 1.7945969104766846, "step": 519 }, { "epoch": 0.7084468664850136, "grad_norm": 2.773815393447876, "learning_rate": 1.5006371798593948e-05, "loss": 1.5026427507400513, "step": 520 }, { "epoch": 0.7098092643051771, "grad_norm": 18.577421188354492, "learning_rate": 1.4987248282848637e-05, "loss": 1.1689552068710327, "step": 521 }, { "epoch": 0.7111716621253406, "grad_norm": 1.9001014232635498, "learning_rate": 1.4968100459017652e-05, "loss": 1.2212189435958862, "step": 522 }, { "epoch": 0.7125340599455041, "grad_norm": 31.087602615356445, "learning_rate": 1.4948928420428403e-05, "loss": 1.4974316358566284, "step": 523 }, { "epoch": 0.7138964577656676, "grad_norm": 9.785758018493652, "learning_rate": 1.4929732260526318e-05, "loss": 0.9730976819992065, "step": 524 }, { "epoch": 0.715258855585831, "grad_norm": 3.933379888534546, "learning_rate": 1.4910512072874395e-05, "loss": 1.3873491287231445, "step": 525 }, { "epoch": 0.7166212534059946, "grad_norm": 3.170851945877075, "learning_rate": 1.489126795115274e-05, "loss": 1.774458646774292, "step": 526 }, { "epoch": 0.717983651226158, "grad_norm": 2.8676583766937256, "learning_rate": 1.4871999989158123e-05, "loss": 2.1191864013671875, "step": 527 }, { "epoch": 0.7193460490463215, "grad_norm": 1.3488231897354126, "learning_rate": 1.4852708280803512e-05, "loss": 1.3862372636795044, "step": 528 }, { "epoch": 0.720708446866485, "grad_norm": 1.6586637496948242, "learning_rate": 1.4833392920117607e-05, "loss": 1.62174654006958, "step": 529 }, { "epoch": 0.7220708446866485, "grad_norm": 3.852313280105591, "learning_rate": 1.4814054001244395e-05, "loss": 1.5795090198516846, "step": 530 }, { "epoch": 0.723433242506812, "grad_norm": 2.9394686222076416, "learning_rate": 1.4794691618442691e-05, "loss": 1.0577733516693115, "step": 531 }, { "epoch": 0.7247956403269755, "grad_norm": 5.752668857574463, "learning_rate": 1.477530586608567e-05, "loss": 1.9041309356689453, "step": 532 }, { "epoch": 0.726158038147139, "grad_norm": 2.3236379623413086, "learning_rate": 1.4755896838660412e-05, "loss": 1.4680252075195312, "step": 533 }, { "epoch": 0.7275204359673024, "grad_norm": 3.003340244293213, "learning_rate": 1.4736464630767442e-05, "loss": 1.8783783912658691, "step": 534 }, { "epoch": 0.728882833787466, "grad_norm": 6.330926418304443, "learning_rate": 1.4717009337120268e-05, "loss": 1.2258846759796143, "step": 535 }, { "epoch": 0.7302452316076294, "grad_norm": 2.4778010845184326, "learning_rate": 1.4697531052544914e-05, "loss": 0.926813542842865, "step": 536 }, { "epoch": 0.7316076294277929, "grad_norm": 2.4640188217163086, "learning_rate": 1.4678029871979469e-05, "loss": 1.3399500846862793, "step": 537 }, { "epoch": 0.7329700272479565, "grad_norm": 11.105792045593262, "learning_rate": 1.4658505890473615e-05, "loss": 1.1746035814285278, "step": 538 }, { "epoch": 0.7343324250681199, "grad_norm": 1.56924307346344, "learning_rate": 1.463895920318817e-05, "loss": 0.8616644740104675, "step": 539 }, { "epoch": 0.7356948228882834, "grad_norm": 1.6439458131790161, "learning_rate": 1.4619389905394616e-05, "loss": 1.1292338371276855, "step": 540 }, { "epoch": 0.7370572207084468, "grad_norm": 3.102236747741699, "learning_rate": 1.4599798092474646e-05, "loss": 0.8248124122619629, "step": 541 }, { "epoch": 0.7384196185286104, "grad_norm": 3.0680243968963623, "learning_rate": 1.4580183859919686e-05, "loss": 1.3067448139190674, "step": 542 }, { "epoch": 0.7397820163487738, "grad_norm": 2.5821802616119385, "learning_rate": 1.4560547303330441e-05, "loss": 1.3107045888900757, "step": 543 }, { "epoch": 0.7411444141689373, "grad_norm": 2.131953001022339, "learning_rate": 1.4540888518416423e-05, "loss": 0.9684444665908813, "step": 544 }, { "epoch": 0.7425068119891008, "grad_norm": 2.3941733837127686, "learning_rate": 1.4521207600995487e-05, "loss": 1.5515406131744385, "step": 545 }, { "epoch": 0.7438692098092643, "grad_norm": 3.4251787662506104, "learning_rate": 1.4501504646993358e-05, "loss": 0.8153684735298157, "step": 546 }, { "epoch": 0.7452316076294278, "grad_norm": 2.622520685195923, "learning_rate": 1.4481779752443177e-05, "loss": 1.6344298124313354, "step": 547 }, { "epoch": 0.7465940054495913, "grad_norm": 2.652980089187622, "learning_rate": 1.446203301348502e-05, "loss": 1.5004336833953857, "step": 548 }, { "epoch": 0.7479564032697548, "grad_norm": 2.24822998046875, "learning_rate": 1.4442264526365425e-05, "loss": 1.2041709423065186, "step": 549 }, { "epoch": 0.7493188010899182, "grad_norm": 6.049685001373291, "learning_rate": 1.4422474387436951e-05, "loss": 1.0584933757781982, "step": 550 }, { "epoch": 0.7506811989100818, "grad_norm": 4.106502056121826, "learning_rate": 1.4402662693157672e-05, "loss": 0.9123461246490479, "step": 551 }, { "epoch": 0.7520435967302452, "grad_norm": 3.6901044845581055, "learning_rate": 1.4382829540090728e-05, "loss": 1.1109474897384644, "step": 552 }, { "epoch": 0.7534059945504087, "grad_norm": 1.219348669052124, "learning_rate": 1.4362975024903854e-05, "loss": 1.0663442611694336, "step": 553 }, { "epoch": 0.7547683923705722, "grad_norm": 3.80777907371521, "learning_rate": 1.43430992443689e-05, "loss": 1.3293004035949707, "step": 554 }, { "epoch": 0.7561307901907357, "grad_norm": 8.005372047424316, "learning_rate": 1.4323202295361375e-05, "loss": 1.4248263835906982, "step": 555 }, { "epoch": 0.7574931880108992, "grad_norm": 1.960978627204895, "learning_rate": 1.4303284274859947e-05, "loss": 1.2730631828308105, "step": 556 }, { "epoch": 0.7588555858310627, "grad_norm": 2.4936602115631104, "learning_rate": 1.4283345279946e-05, "loss": 0.7973443865776062, "step": 557 }, { "epoch": 0.7602179836512262, "grad_norm": 1.6648569107055664, "learning_rate": 1.4263385407803147e-05, "loss": 0.9895787835121155, "step": 558 }, { "epoch": 0.7615803814713896, "grad_norm": 2.425827980041504, "learning_rate": 1.424340475571675e-05, "loss": 1.0964505672454834, "step": 559 }, { "epoch": 0.7629427792915532, "grad_norm": 130.5381622314453, "learning_rate": 1.4223403421073465e-05, "loss": 2.199425220489502, "step": 560 }, { "epoch": 0.7643051771117166, "grad_norm": 3.1214098930358887, "learning_rate": 1.4203381501360746e-05, "loss": 1.2502741813659668, "step": 561 }, { "epoch": 0.7656675749318801, "grad_norm": 1.1926981210708618, "learning_rate": 1.4183339094166386e-05, "loss": 1.2412378787994385, "step": 562 }, { "epoch": 0.7670299727520435, "grad_norm": 1.9208431243896484, "learning_rate": 1.416327629717803e-05, "loss": 1.4617279767990112, "step": 563 }, { "epoch": 0.7683923705722071, "grad_norm": 1.8530316352844238, "learning_rate": 1.4143193208182705e-05, "loss": 0.8236516714096069, "step": 564 }, { "epoch": 0.7697547683923706, "grad_norm": 1.1635708808898926, "learning_rate": 1.4123089925066347e-05, "loss": 1.1197096109390259, "step": 565 }, { "epoch": 0.771117166212534, "grad_norm": 1.6644195318222046, "learning_rate": 1.4102966545813312e-05, "loss": 0.7913979291915894, "step": 566 }, { "epoch": 0.7724795640326976, "grad_norm": 2.858396053314209, "learning_rate": 1.4082823168505912e-05, "loss": 1.676666259765625, "step": 567 }, { "epoch": 0.773841961852861, "grad_norm": 2.469792604446411, "learning_rate": 1.4062659891323927e-05, "loss": 1.5743359327316284, "step": 568 }, { "epoch": 0.7752043596730245, "grad_norm": 1.827441930770874, "learning_rate": 1.4042476812544128e-05, "loss": 0.9356820583343506, "step": 569 }, { "epoch": 0.776566757493188, "grad_norm": 1.4277431964874268, "learning_rate": 1.4022274030539802e-05, "loss": 0.8907495141029358, "step": 570 }, { "epoch": 0.7779291553133515, "grad_norm": 2.531895875930786, "learning_rate": 1.4002051643780275e-05, "loss": 1.1490600109100342, "step": 571 }, { "epoch": 0.779291553133515, "grad_norm": 3.438741683959961, "learning_rate": 1.398180975083042e-05, "loss": 1.4157071113586426, "step": 572 }, { "epoch": 0.7806539509536785, "grad_norm": 2.872969627380371, "learning_rate": 1.3961548450350184e-05, "loss": 1.5624017715454102, "step": 573 }, { "epoch": 0.782016348773842, "grad_norm": 1.7302354574203491, "learning_rate": 1.3941267841094118e-05, "loss": 1.4399631023406982, "step": 574 }, { "epoch": 0.7833787465940054, "grad_norm": 2.7009763717651367, "learning_rate": 1.3920968021910872e-05, "loss": 1.5012354850769043, "step": 575 }, { "epoch": 0.784741144414169, "grad_norm": 6.51242208480835, "learning_rate": 1.3900649091742734e-05, "loss": 0.8782615661621094, "step": 576 }, { "epoch": 0.7861035422343324, "grad_norm": 2.1851634979248047, "learning_rate": 1.3880311149625141e-05, "loss": 1.3254035711288452, "step": 577 }, { "epoch": 0.7874659400544959, "grad_norm": 1.702254056930542, "learning_rate": 1.3859954294686185e-05, "loss": 1.331315040588379, "step": 578 }, { "epoch": 0.7888283378746594, "grad_norm": 1.3220089673995972, "learning_rate": 1.3839578626146143e-05, "loss": 1.3351058959960938, "step": 579 }, { "epoch": 0.7901907356948229, "grad_norm": 3.3983347415924072, "learning_rate": 1.3819184243317008e-05, "loss": 1.4671103954315186, "step": 580 }, { "epoch": 0.7915531335149864, "grad_norm": 1.3717108964920044, "learning_rate": 1.3798771245601961e-05, "loss": 1.407561182975769, "step": 581 }, { "epoch": 0.7929155313351499, "grad_norm": 3.0558109283447266, "learning_rate": 1.3778339732494933e-05, "loss": 1.527491807937622, "step": 582 }, { "epoch": 0.7942779291553134, "grad_norm": 6.091338634490967, "learning_rate": 1.3757889803580085e-05, "loss": 1.1004629135131836, "step": 583 }, { "epoch": 0.7956403269754768, "grad_norm": 2.088205337524414, "learning_rate": 1.373742155853135e-05, "loss": 1.0929726362228394, "step": 584 }, { "epoch": 0.7970027247956403, "grad_norm": 3.400243043899536, "learning_rate": 1.3716935097111926e-05, "loss": 1.8507790565490723, "step": 585 }, { "epoch": 0.7983651226158038, "grad_norm": 4.578840255737305, "learning_rate": 1.3696430519173802e-05, "loss": 2.519803524017334, "step": 586 }, { "epoch": 0.7997275204359673, "grad_norm": 2.8862452507019043, "learning_rate": 1.367590792465727e-05, "loss": 0.6406446695327759, "step": 587 }, { "epoch": 0.8010899182561307, "grad_norm": 3.2084100246429443, "learning_rate": 1.3655367413590433e-05, "loss": 1.923079013824463, "step": 588 }, { "epoch": 0.8024523160762943, "grad_norm": 3.3442091941833496, "learning_rate": 1.3634809086088715e-05, "loss": 1.1459531784057617, "step": 589 }, { "epoch": 0.8038147138964578, "grad_norm": 4.406625270843506, "learning_rate": 1.361423304235439e-05, "loss": 0.9176599979400635, "step": 590 }, { "epoch": 0.8051771117166212, "grad_norm": 2.3619635105133057, "learning_rate": 1.359363938267607e-05, "loss": 1.6312848329544067, "step": 591 }, { "epoch": 0.8065395095367848, "grad_norm": 1.752160668373108, "learning_rate": 1.3573028207428239e-05, "loss": 2.0165164470672607, "step": 592 }, { "epoch": 0.8079019073569482, "grad_norm": 4.275413513183594, "learning_rate": 1.3552399617070742e-05, "loss": 1.3809186220169067, "step": 593 }, { "epoch": 0.8092643051771117, "grad_norm": 2.5133349895477295, "learning_rate": 1.3531753712148312e-05, "loss": 1.5188066959381104, "step": 594 }, { "epoch": 0.8106267029972752, "grad_norm": 3.0917539596557617, "learning_rate": 1.3511090593290073e-05, "loss": 1.0098018646240234, "step": 595 }, { "epoch": 0.8119891008174387, "grad_norm": 1.4284080266952515, "learning_rate": 1.3490410361209051e-05, "loss": 1.5370709896087646, "step": 596 }, { "epoch": 0.8133514986376021, "grad_norm": 2.186067819595337, "learning_rate": 1.3469713116701683e-05, "loss": 1.6146628856658936, "step": 597 }, { "epoch": 0.8147138964577657, "grad_norm": 1.271701693534851, "learning_rate": 1.3448998960647324e-05, "loss": 1.4125964641571045, "step": 598 }, { "epoch": 0.8160762942779292, "grad_norm": 22.58783531188965, "learning_rate": 1.3428267994007756e-05, "loss": 1.7933295965194702, "step": 599 }, { "epoch": 0.8174386920980926, "grad_norm": 2.8981776237487793, "learning_rate": 1.3407520317826697e-05, "loss": 1.4284796714782715, "step": 600 }, { "epoch": 0.8188010899182562, "grad_norm": 64.28890991210938, "learning_rate": 1.3386756033229314e-05, "loss": 1.484025478363037, "step": 601 }, { "epoch": 0.8201634877384196, "grad_norm": 1.941318392753601, "learning_rate": 1.3365975241421712e-05, "loss": 0.7628053426742554, "step": 602 }, { "epoch": 0.8215258855585831, "grad_norm": 7.3000335693359375, "learning_rate": 1.3345178043690463e-05, "loss": 1.3994512557983398, "step": 603 }, { "epoch": 0.8228882833787466, "grad_norm": 2.5372769832611084, "learning_rate": 1.3324364541402102e-05, "loss": 1.0953727960586548, "step": 604 }, { "epoch": 0.8242506811989101, "grad_norm": 2.691676616668701, "learning_rate": 1.3303534836002629e-05, "loss": 1.787889838218689, "step": 605 }, { "epoch": 0.8256130790190735, "grad_norm": 2.5282795429229736, "learning_rate": 1.328268902901702e-05, "loss": 1.1380221843719482, "step": 606 }, { "epoch": 0.8269754768392371, "grad_norm": 3.7238845825195312, "learning_rate": 1.326182722204873e-05, "loss": 0.634700357913971, "step": 607 }, { "epoch": 0.8283378746594006, "grad_norm": 1.7504158020019531, "learning_rate": 1.32409495167792e-05, "loss": 1.483595848083496, "step": 608 }, { "epoch": 0.829700272479564, "grad_norm": 2.5789966583251953, "learning_rate": 1.3220056014967359e-05, "loss": 1.0786075592041016, "step": 609 }, { "epoch": 0.8310626702997275, "grad_norm": 3.616070508956909, "learning_rate": 1.3199146818449134e-05, "loss": 1.0314359664916992, "step": 610 }, { "epoch": 0.832425068119891, "grad_norm": 6.019405364990234, "learning_rate": 1.317822202913694e-05, "loss": 1.6719056367874146, "step": 611 }, { "epoch": 0.8337874659400545, "grad_norm": 1.4608734846115112, "learning_rate": 1.3157281749019199e-05, "loss": 1.3976244926452637, "step": 612 }, { "epoch": 0.8351498637602179, "grad_norm": 3.120438814163208, "learning_rate": 1.3136326080159836e-05, "loss": 1.3163340091705322, "step": 613 }, { "epoch": 0.8365122615803815, "grad_norm": 1.837367296218872, "learning_rate": 1.3115355124697775e-05, "loss": 0.8892254829406738, "step": 614 }, { "epoch": 0.837874659400545, "grad_norm": 1.63421630859375, "learning_rate": 1.3094368984846453e-05, "loss": 1.2673603296279907, "step": 615 }, { "epoch": 0.8392370572207084, "grad_norm": 1.9303702116012573, "learning_rate": 1.3073367762893316e-05, "loss": 0.48692119121551514, "step": 616 }, { "epoch": 0.840599455040872, "grad_norm": 4.58441686630249, "learning_rate": 1.3052351561199321e-05, "loss": 1.3120324611663818, "step": 617 }, { "epoch": 0.8419618528610354, "grad_norm": 2.789804458618164, "learning_rate": 1.3031320482198433e-05, "loss": 1.8188691139221191, "step": 618 }, { "epoch": 0.8433242506811989, "grad_norm": 3.087557792663574, "learning_rate": 1.3010274628397137e-05, "loss": 1.5895367860794067, "step": 619 }, { "epoch": 0.8446866485013624, "grad_norm": 3.7780449390411377, "learning_rate": 1.298921410237392e-05, "loss": 1.69302499294281, "step": 620 }, { "epoch": 0.8460490463215259, "grad_norm": 5.413235187530518, "learning_rate": 1.2968139006778797e-05, "loss": 1.5434781312942505, "step": 621 }, { "epoch": 0.8474114441416893, "grad_norm": 5.830272197723389, "learning_rate": 1.2947049444332782e-05, "loss": 1.8240950107574463, "step": 622 }, { "epoch": 0.8487738419618529, "grad_norm": 1.6833761930465698, "learning_rate": 1.292594551782741e-05, "loss": 1.2477277517318726, "step": 623 }, { "epoch": 0.8501362397820164, "grad_norm": 1.459324598312378, "learning_rate": 1.2904827330124223e-05, "loss": 1.5135703086853027, "step": 624 }, { "epoch": 0.8514986376021798, "grad_norm": 1.4175208806991577, "learning_rate": 1.2883694984154273e-05, "loss": 0.7292189598083496, "step": 625 }, { "epoch": 0.8528610354223434, "grad_norm": 2.885599374771118, "learning_rate": 1.2862548582917622e-05, "loss": 1.0432519912719727, "step": 626 }, { "epoch": 0.8542234332425068, "grad_norm": 4.810023307800293, "learning_rate": 1.2841388229482834e-05, "loss": 0.9524978399276733, "step": 627 }, { "epoch": 0.8555858310626703, "grad_norm": 2.138291358947754, "learning_rate": 1.2820214026986481e-05, "loss": 1.4655534029006958, "step": 628 }, { "epoch": 0.8569482288828338, "grad_norm": 2.2241628170013428, "learning_rate": 1.2799026078632638e-05, "loss": 1.4167919158935547, "step": 629 }, { "epoch": 0.8583106267029973, "grad_norm": 3.5293188095092773, "learning_rate": 1.2777824487692373e-05, "loss": 1.2887177467346191, "step": 630 }, { "epoch": 0.8596730245231607, "grad_norm": 3.3782262802124023, "learning_rate": 1.2756609357503248e-05, "loss": 1.7405619621276855, "step": 631 }, { "epoch": 0.8610354223433242, "grad_norm": 2.993549108505249, "learning_rate": 1.2735380791468814e-05, "loss": 1.2749803066253662, "step": 632 }, { "epoch": 0.8623978201634878, "grad_norm": 2.0504634380340576, "learning_rate": 1.271413889305812e-05, "loss": 1.1924537420272827, "step": 633 }, { "epoch": 0.8637602179836512, "grad_norm": 1.8078364133834839, "learning_rate": 1.2692883765805188e-05, "loss": 1.33396315574646, "step": 634 }, { "epoch": 0.8651226158038147, "grad_norm": 3.320988655090332, "learning_rate": 1.2671615513308524e-05, "loss": 1.079707145690918, "step": 635 }, { "epoch": 0.8664850136239782, "grad_norm": 1.6313542127609253, "learning_rate": 1.2650334239230598e-05, "loss": 1.1179430484771729, "step": 636 }, { "epoch": 0.8678474114441417, "grad_norm": 2.291811466217041, "learning_rate": 1.2629040047297356e-05, "loss": 1.078310489654541, "step": 637 }, { "epoch": 0.8692098092643051, "grad_norm": 1.7692354917526245, "learning_rate": 1.2607733041297703e-05, "loss": 0.71351158618927, "step": 638 }, { "epoch": 0.8705722070844687, "grad_norm": 10.692731857299805, "learning_rate": 1.2586413325083e-05, "loss": 1.4759784936904907, "step": 639 }, { "epoch": 0.8719346049046321, "grad_norm": 2.8110296726226807, "learning_rate": 1.2565081002566563e-05, "loss": 1.5391604900360107, "step": 640 }, { "epoch": 0.8732970027247956, "grad_norm": 4.826806545257568, "learning_rate": 1.2543736177723147e-05, "loss": 1.627281904220581, "step": 641 }, { "epoch": 0.8746594005449592, "grad_norm": 3.3672797679901123, "learning_rate": 1.2522378954588443e-05, "loss": 0.9370021820068359, "step": 642 }, { "epoch": 0.8760217983651226, "grad_norm": 1.874826192855835, "learning_rate": 1.2501009437258576e-05, "loss": 1.6654452085494995, "step": 643 }, { "epoch": 0.8773841961852861, "grad_norm": 1.7000356912612915, "learning_rate": 1.2479627729889587e-05, "loss": 1.7029834985733032, "step": 644 }, { "epoch": 0.8787465940054496, "grad_norm": 2.277583599090576, "learning_rate": 1.245823393669694e-05, "loss": 1.1498898267745972, "step": 645 }, { "epoch": 0.8801089918256131, "grad_norm": 3.1130685806274414, "learning_rate": 1.2436828161955004e-05, "loss": 1.2781298160552979, "step": 646 }, { "epoch": 0.8814713896457765, "grad_norm": 2.6190290451049805, "learning_rate": 1.2415410509996537e-05, "loss": 1.0321624279022217, "step": 647 }, { "epoch": 0.8828337874659401, "grad_norm": 4.355445861816406, "learning_rate": 1.2393981085212204e-05, "loss": 1.6381220817565918, "step": 648 }, { "epoch": 0.8841961852861036, "grad_norm": 1.858130693435669, "learning_rate": 1.2372539992050037e-05, "loss": 1.116075038909912, "step": 649 }, { "epoch": 0.885558583106267, "grad_norm": 11.300039291381836, "learning_rate": 1.2351087335014945e-05, "loss": 2.1072628498077393, "step": 650 }, { "epoch": 0.8869209809264306, "grad_norm": 2.790557622909546, "learning_rate": 1.2329623218668197e-05, "loss": 0.9104270935058594, "step": 651 }, { "epoch": 0.888283378746594, "grad_norm": 4.927196502685547, "learning_rate": 1.2308147747626926e-05, "loss": 1.2494080066680908, "step": 652 }, { "epoch": 0.8896457765667575, "grad_norm": 2.862684726715088, "learning_rate": 1.2286661026563597e-05, "loss": 0.7623242139816284, "step": 653 }, { "epoch": 0.8910081743869209, "grad_norm": 1.822767734527588, "learning_rate": 1.2265163160205514e-05, "loss": 0.7788611054420471, "step": 654 }, { "epoch": 0.8923705722070845, "grad_norm": 9.074105262756348, "learning_rate": 1.2243654253334299e-05, "loss": 1.412550926208496, "step": 655 }, { "epoch": 0.8937329700272479, "grad_norm": 8.627159118652344, "learning_rate": 1.2222134410785386e-05, "loss": 1.2101569175720215, "step": 656 }, { "epoch": 0.8950953678474114, "grad_norm": 5.410304069519043, "learning_rate": 1.2200603737447515e-05, "loss": 1.4161845445632935, "step": 657 }, { "epoch": 0.896457765667575, "grad_norm": 10.91976547241211, "learning_rate": 1.2179062338262217e-05, "loss": 2.1599631309509277, "step": 658 }, { "epoch": 0.8978201634877384, "grad_norm": 8.6303071975708, "learning_rate": 1.2157510318223296e-05, "loss": 2.024393081665039, "step": 659 }, { "epoch": 0.8991825613079019, "grad_norm": 4.618903160095215, "learning_rate": 1.2135947782376322e-05, "loss": 0.9179735779762268, "step": 660 }, { "epoch": 0.9005449591280654, "grad_norm": 32.600101470947266, "learning_rate": 1.2114374835818122e-05, "loss": 1.3276469707489014, "step": 661 }, { "epoch": 0.9019073569482289, "grad_norm": 1.8229365348815918, "learning_rate": 1.2092791583696266e-05, "loss": 1.343456745147705, "step": 662 }, { "epoch": 0.9032697547683923, "grad_norm": 4.12421989440918, "learning_rate": 1.207119813120855e-05, "loss": 1.1209739446640015, "step": 663 }, { "epoch": 0.9046321525885559, "grad_norm": 2.1861205101013184, "learning_rate": 1.2049594583602495e-05, "loss": 1.415198564529419, "step": 664 }, { "epoch": 0.9059945504087193, "grad_norm": 3.099585771560669, "learning_rate": 1.2027981046174817e-05, "loss": 1.6855437755584717, "step": 665 }, { "epoch": 0.9073569482288828, "grad_norm": 1.7012327909469604, "learning_rate": 1.2006357624270927e-05, "loss": 1.2436718940734863, "step": 666 }, { "epoch": 0.9087193460490464, "grad_norm": 3.646634578704834, "learning_rate": 1.198472442328442e-05, "loss": 1.7524023056030273, "step": 667 }, { "epoch": 0.9100817438692098, "grad_norm": 1.8761662244796753, "learning_rate": 1.1963081548656539e-05, "loss": 0.8352669477462769, "step": 668 }, { "epoch": 0.9114441416893733, "grad_norm": 41.714622497558594, "learning_rate": 1.1941429105875686e-05, "loss": 0.8845211863517761, "step": 669 }, { "epoch": 0.9128065395095368, "grad_norm": 2.5089035034179688, "learning_rate": 1.1919767200476904e-05, "loss": 1.2432684898376465, "step": 670 }, { "epoch": 0.9141689373297003, "grad_norm": 2.774197816848755, "learning_rate": 1.1898095938041352e-05, "loss": 1.0796873569488525, "step": 671 }, { "epoch": 0.9155313351498637, "grad_norm": 30.483869552612305, "learning_rate": 1.187641542419579e-05, "loss": 1.5262564420700073, "step": 672 }, { "epoch": 0.9168937329700273, "grad_norm": 2.856597423553467, "learning_rate": 1.1854725764612078e-05, "loss": 1.8823330402374268, "step": 673 }, { "epoch": 0.9182561307901907, "grad_norm": 8.157869338989258, "learning_rate": 1.183302706500665e-05, "loss": 1.070889949798584, "step": 674 }, { "epoch": 0.9196185286103542, "grad_norm": 5.732520580291748, "learning_rate": 1.181131943114e-05, "loss": 1.5851788520812988, "step": 675 }, { "epoch": 0.9209809264305178, "grad_norm": 4.615838050842285, "learning_rate": 1.1789602968816172e-05, "loss": 1.3814215660095215, "step": 676 }, { "epoch": 0.9223433242506812, "grad_norm": 1.5638784170150757, "learning_rate": 1.1767877783882235e-05, "loss": 0.9451487064361572, "step": 677 }, { "epoch": 0.9237057220708447, "grad_norm": 1.8010276556015015, "learning_rate": 1.1746143982227778e-05, "loss": 1.253969430923462, "step": 678 }, { "epoch": 0.9250681198910081, "grad_norm": 2.4255189895629883, "learning_rate": 1.1724401669784385e-05, "loss": 1.6594009399414062, "step": 679 }, { "epoch": 0.9264305177111717, "grad_norm": 3.130937099456787, "learning_rate": 1.1702650952525116e-05, "loss": 1.4525929689407349, "step": 680 }, { "epoch": 0.9277929155313351, "grad_norm": 5.014533996582031, "learning_rate": 1.168089193646401e-05, "loss": 1.1176152229309082, "step": 681 }, { "epoch": 0.9291553133514986, "grad_norm": 3.333151340484619, "learning_rate": 1.1659124727655546e-05, "loss": 0.8434218168258667, "step": 682 }, { "epoch": 0.9305177111716622, "grad_norm": 3.9242653846740723, "learning_rate": 1.1637349432194137e-05, "loss": 1.3347196578979492, "step": 683 }, { "epoch": 0.9318801089918256, "grad_norm": 1.848533272743225, "learning_rate": 1.1615566156213609e-05, "loss": 0.7870956659317017, "step": 684 }, { "epoch": 0.9332425068119891, "grad_norm": 3.9530065059661865, "learning_rate": 1.1593775005886687e-05, "loss": 0.769242525100708, "step": 685 }, { "epoch": 0.9346049046321526, "grad_norm": 4.127293109893799, "learning_rate": 1.1571976087424478e-05, "loss": 1.542427659034729, "step": 686 }, { "epoch": 0.9359673024523161, "grad_norm": 3.0786638259887695, "learning_rate": 1.1550169507075939e-05, "loss": 1.5594425201416016, "step": 687 }, { "epoch": 0.9373297002724795, "grad_norm": 4.065682411193848, "learning_rate": 1.1528355371127396e-05, "loss": 1.8929016590118408, "step": 688 }, { "epoch": 0.9386920980926431, "grad_norm": 8.515080451965332, "learning_rate": 1.1506533785901977e-05, "loss": 1.8667418956756592, "step": 689 }, { "epoch": 0.9400544959128065, "grad_norm": 1.8124748468399048, "learning_rate": 1.148470485775913e-05, "loss": 0.6627928018569946, "step": 690 }, { "epoch": 0.94141689373297, "grad_norm": 6.035774230957031, "learning_rate": 1.146286869309409e-05, "loss": 0.9768748879432678, "step": 691 }, { "epoch": 0.9427792915531336, "grad_norm": 73.94078063964844, "learning_rate": 1.1441025398337365e-05, "loss": 1.64554762840271, "step": 692 }, { "epoch": 0.944141689373297, "grad_norm": 3.9294233322143555, "learning_rate": 1.141917507995421e-05, "loss": 2.0647826194763184, "step": 693 }, { "epoch": 0.9455040871934605, "grad_norm": 4.653603553771973, "learning_rate": 1.1397317844444125e-05, "loss": 1.107535719871521, "step": 694 }, { "epoch": 0.946866485013624, "grad_norm": 3.3803422451019287, "learning_rate": 1.137545379834031e-05, "loss": 0.7544019818305969, "step": 695 }, { "epoch": 0.9482288828337875, "grad_norm": 4.974362850189209, "learning_rate": 1.1353583048209171e-05, "loss": 1.0620689392089844, "step": 696 }, { "epoch": 0.9495912806539509, "grad_norm": 4.676380157470703, "learning_rate": 1.1331705700649786e-05, "loss": 0.8690337538719177, "step": 697 }, { "epoch": 0.9509536784741145, "grad_norm": 3.142549514770508, "learning_rate": 1.1309821862293385e-05, "loss": 1.4317506551742554, "step": 698 }, { "epoch": 0.952316076294278, "grad_norm": 2.971370220184326, "learning_rate": 1.128793163980284e-05, "loss": 1.0544142723083496, "step": 699 }, { "epoch": 0.9536784741144414, "grad_norm": 2.6154518127441406, "learning_rate": 1.1266035139872142e-05, "loss": 1.0950567722320557, "step": 700 }, { "epoch": 0.9550408719346049, "grad_norm": 2.270753860473633, "learning_rate": 1.1244132469225872e-05, "loss": 1.148418664932251, "step": 701 }, { "epoch": 0.9564032697547684, "grad_norm": 1.2130918502807617, "learning_rate": 1.1222223734618689e-05, "loss": 1.212683916091919, "step": 702 }, { "epoch": 0.9577656675749319, "grad_norm": 4.032314300537109, "learning_rate": 1.120030904283481e-05, "loss": 1.1107025146484375, "step": 703 }, { "epoch": 0.9591280653950953, "grad_norm": 2.2939188480377197, "learning_rate": 1.1178388500687482e-05, "loss": 1.0611785650253296, "step": 704 }, { "epoch": 0.9604904632152589, "grad_norm": 2.9401087760925293, "learning_rate": 1.115646221501848e-05, "loss": 1.0636987686157227, "step": 705 }, { "epoch": 0.9618528610354223, "grad_norm": 30.427898406982422, "learning_rate": 1.1134530292697558e-05, "loss": 1.4979593753814697, "step": 706 }, { "epoch": 0.9632152588555858, "grad_norm": 1.5922436714172363, "learning_rate": 1.1112592840621954e-05, "loss": 1.0203757286071777, "step": 707 }, { "epoch": 0.9645776566757494, "grad_norm": 14.673893928527832, "learning_rate": 1.1090649965715852e-05, "loss": 1.5156220197677612, "step": 708 }, { "epoch": 0.9659400544959128, "grad_norm": 1.1958138942718506, "learning_rate": 1.1068701774929868e-05, "loss": 0.782781720161438, "step": 709 }, { "epoch": 0.9673024523160763, "grad_norm": 1.8420690298080444, "learning_rate": 1.1046748375240532e-05, "loss": 1.6665374040603638, "step": 710 }, { "epoch": 0.9686648501362398, "grad_norm": 1.670469880104065, "learning_rate": 1.1024789873649761e-05, "loss": 1.319372534751892, "step": 711 }, { "epoch": 0.9700272479564033, "grad_norm": 1.3639436960220337, "learning_rate": 1.1002826377184334e-05, "loss": 0.9996222257614136, "step": 712 }, { "epoch": 0.9713896457765667, "grad_norm": 2.808175563812256, "learning_rate": 1.0980857992895381e-05, "loss": 1.6011052131652832, "step": 713 }, { "epoch": 0.9727520435967303, "grad_norm": 6.178109645843506, "learning_rate": 1.0958884827857853e-05, "loss": 1.3378026485443115, "step": 714 }, { "epoch": 0.9741144414168937, "grad_norm": 2.2818052768707275, "learning_rate": 1.0936906989170004e-05, "loss": 1.2783794403076172, "step": 715 }, { "epoch": 0.9754768392370572, "grad_norm": 3.5280063152313232, "learning_rate": 1.0914924583952864e-05, "loss": 1.6861703395843506, "step": 716 }, { "epoch": 0.9768392370572208, "grad_norm": 6.454436779022217, "learning_rate": 1.0892937719349723e-05, "loss": 1.7874715328216553, "step": 717 }, { "epoch": 0.9782016348773842, "grad_norm": 10.468633651733398, "learning_rate": 1.087094650252561e-05, "loss": 1.0643081665039062, "step": 718 }, { "epoch": 0.9795640326975477, "grad_norm": 3.605581521987915, "learning_rate": 1.0848951040666762e-05, "loss": 1.3094207048416138, "step": 719 }, { "epoch": 0.9809264305177112, "grad_norm": 4.214629173278809, "learning_rate": 1.0826951440980105e-05, "loss": 1.666353464126587, "step": 720 }, { "epoch": 0.9822888283378747, "grad_norm": 4.246817588806152, "learning_rate": 1.0804947810692736e-05, "loss": 1.1319661140441895, "step": 721 }, { "epoch": 0.9836512261580381, "grad_norm": 8.950884819030762, "learning_rate": 1.07829402570514e-05, "loss": 1.8403904438018799, "step": 722 }, { "epoch": 0.9850136239782016, "grad_norm": 7.754586696624756, "learning_rate": 1.076092888732196e-05, "loss": 1.244396686553955, "step": 723 }, { "epoch": 0.9863760217983651, "grad_norm": 7.126558780670166, "learning_rate": 1.073891380878888e-05, "loss": 1.0654281377792358, "step": 724 }, { "epoch": 0.9877384196185286, "grad_norm": 7.486753940582275, "learning_rate": 1.0716895128754704e-05, "loss": 1.3881548643112183, "step": 725 }, { "epoch": 0.989100817438692, "grad_norm": 6.586914539337158, "learning_rate": 1.069487295453952e-05, "loss": 1.3590501546859741, "step": 726 }, { "epoch": 0.9904632152588556, "grad_norm": 3.5434587001800537, "learning_rate": 1.0672847393480466e-05, "loss": 1.4489696025848389, "step": 727 }, { "epoch": 0.9918256130790191, "grad_norm": 5.078987121582031, "learning_rate": 1.0650818552931162e-05, "loss": 1.5525749921798706, "step": 728 }, { "epoch": 0.9931880108991825, "grad_norm": 18.739749908447266, "learning_rate": 1.0628786540261235e-05, "loss": 1.607698678970337, "step": 729 }, { "epoch": 0.9945504087193461, "grad_norm": 3.111997127532959, "learning_rate": 1.0606751462855764e-05, "loss": 0.8593987226486206, "step": 730 }, { "epoch": 0.9959128065395095, "grad_norm": 3.60221791267395, "learning_rate": 1.0584713428114764e-05, "loss": 1.105581283569336, "step": 731 }, { "epoch": 0.997275204359673, "grad_norm": 3.8310182094573975, "learning_rate": 1.0562672543452666e-05, "loss": 1.77792489528656, "step": 732 }, { "epoch": 0.9986376021798365, "grad_norm": 20.509246826171875, "learning_rate": 1.0540628916297791e-05, "loss": 1.3992115259170532, "step": 733 }, { "epoch": 1.0, "grad_norm": 2.3849234580993652, "learning_rate": 1.0518582654091824e-05, "loss": 1.38824462890625, "step": 734 }, { "epoch": 1.0013623978201636, "grad_norm": 1.7319926023483276, "learning_rate": 1.0496533864289304e-05, "loss": 1.362045168876648, "step": 735 }, { "epoch": 1.002724795640327, "grad_norm": 2.783156633377075, "learning_rate": 1.047448265435708e-05, "loss": 1.3610879182815552, "step": 736 }, { "epoch": 1.0040871934604905, "grad_norm": 2.000793218612671, "learning_rate": 1.0452429131773801e-05, "loss": 1.0452824831008911, "step": 737 }, { "epoch": 1.005449591280654, "grad_norm": 3.9750893115997314, "learning_rate": 1.0430373404029383e-05, "loss": 1.4950426816940308, "step": 738 }, { "epoch": 1.0068119891008174, "grad_norm": 2.428896188735962, "learning_rate": 1.0408315578624496e-05, "loss": 0.9704780578613281, "step": 739 }, { "epoch": 1.008174386920981, "grad_norm": 10.781900405883789, "learning_rate": 1.038625576307003e-05, "loss": 0.8298189043998718, "step": 740 }, { "epoch": 1.0095367847411445, "grad_norm": 2.2069244384765625, "learning_rate": 1.0364194064886576e-05, "loss": 0.954738974571228, "step": 741 }, { "epoch": 1.0108991825613078, "grad_norm": 2.2506752014160156, "learning_rate": 1.0342130591603905e-05, "loss": 0.5585159063339233, "step": 742 }, { "epoch": 1.0122615803814714, "grad_norm": 1.2990283966064453, "learning_rate": 1.0320065450760437e-05, "loss": 0.9701113700866699, "step": 743 }, { "epoch": 1.013623978201635, "grad_norm": 1.4765596389770508, "learning_rate": 1.0297998749902715e-05, "loss": 0.8810998201370239, "step": 744 }, { "epoch": 1.0149863760217983, "grad_norm": 2.2870852947235107, "learning_rate": 1.0275930596584893e-05, "loss": 1.0681772232055664, "step": 745 }, { "epoch": 1.0163487738419619, "grad_norm": 1.8862252235412598, "learning_rate": 1.0253861098368203e-05, "loss": 1.5812091827392578, "step": 746 }, { "epoch": 1.0177111716621254, "grad_norm": 7.907939910888672, "learning_rate": 1.0231790362820426e-05, "loss": 1.388240933418274, "step": 747 }, { "epoch": 1.0190735694822888, "grad_norm": 2.1624796390533447, "learning_rate": 1.020971849751538e-05, "loss": 0.9330163598060608, "step": 748 }, { "epoch": 1.0204359673024523, "grad_norm": 1.5111256837844849, "learning_rate": 1.0187645610032389e-05, "loss": 0.8910378217697144, "step": 749 }, { "epoch": 1.021798365122616, "grad_norm": 1.3800830841064453, "learning_rate": 1.0165571807955757e-05, "loss": 1.1143087148666382, "step": 750 }, { "epoch": 1.0231607629427792, "grad_norm": 1.5221837759017944, "learning_rate": 1.0143497198874248e-05, "loss": 0.7064782381057739, "step": 751 }, { "epoch": 1.0245231607629428, "grad_norm": 2.2864277362823486, "learning_rate": 1.0121421890380556e-05, "loss": 1.4050331115722656, "step": 752 }, { "epoch": 1.0258855585831064, "grad_norm": 5.964427471160889, "learning_rate": 1.0099345990070788e-05, "loss": 1.6335821151733398, "step": 753 }, { "epoch": 1.0272479564032697, "grad_norm": 1.591855764389038, "learning_rate": 1.0077269605543931e-05, "loss": 1.0955898761749268, "step": 754 }, { "epoch": 1.0286103542234333, "grad_norm": 4.60707950592041, "learning_rate": 1.0055192844401336e-05, "loss": 1.2484338283538818, "step": 755 }, { "epoch": 1.0299727520435966, "grad_norm": 1.9873239994049072, "learning_rate": 1.003311581424619e-05, "loss": 1.3466179370880127, "step": 756 }, { "epoch": 1.0313351498637602, "grad_norm": 3.5848472118377686, "learning_rate": 1.001103862268299e-05, "loss": 1.5673408508300781, "step": 757 }, { "epoch": 1.0326975476839237, "grad_norm": 2.405305862426758, "learning_rate": 9.988961377317015e-06, "loss": 1.5746285915374756, "step": 758 }, { "epoch": 1.034059945504087, "grad_norm": 1.815234661102295, "learning_rate": 9.966884185753813e-06, "loss": 0.8490040302276611, "step": 759 }, { "epoch": 1.0354223433242506, "grad_norm": 2.4208099842071533, "learning_rate": 9.944807155598668e-06, "loss": 0.7891229391098022, "step": 760 }, { "epoch": 1.0367847411444142, "grad_norm": 1.7230857610702515, "learning_rate": 9.92273039445607e-06, "loss": 1.5125508308410645, "step": 761 }, { "epoch": 1.0381471389645776, "grad_norm": 26.7676944732666, "learning_rate": 9.900654009929214e-06, "loss": 1.1813056468963623, "step": 762 }, { "epoch": 1.0395095367847411, "grad_norm": 1.8865773677825928, "learning_rate": 9.878578109619447e-06, "loss": 1.3915643692016602, "step": 763 }, { "epoch": 1.0408719346049047, "grad_norm": 3.5965657234191895, "learning_rate": 9.856502801125755e-06, "loss": 1.6517629623413086, "step": 764 }, { "epoch": 1.042234332425068, "grad_norm": 2.9963393211364746, "learning_rate": 9.834428192044246e-06, "loss": 1.5576462745666504, "step": 765 }, { "epoch": 1.0435967302452316, "grad_norm": 1.6979749202728271, "learning_rate": 9.812354389967615e-06, "loss": 0.8711757063865662, "step": 766 }, { "epoch": 1.0449591280653951, "grad_norm": 2.1744089126586914, "learning_rate": 9.790281502484625e-06, "loss": 0.5984838008880615, "step": 767 }, { "epoch": 1.0463215258855585, "grad_norm": 2.6733531951904297, "learning_rate": 9.768209637179576e-06, "loss": 1.3294389247894287, "step": 768 }, { "epoch": 1.047683923705722, "grad_norm": 5.380911350250244, "learning_rate": 9.7461389016318e-06, "loss": 1.228567361831665, "step": 769 }, { "epoch": 1.0490463215258856, "grad_norm": 5.921473503112793, "learning_rate": 9.724069403415109e-06, "loss": 1.2465418577194214, "step": 770 }, { "epoch": 1.050408719346049, "grad_norm": 1.5790379047393799, "learning_rate": 9.702001250097287e-06, "loss": 0.974758505821228, "step": 771 }, { "epoch": 1.0517711171662125, "grad_norm": 2.0177783966064453, "learning_rate": 9.679934549239566e-06, "loss": 1.4122014045715332, "step": 772 }, { "epoch": 1.053133514986376, "grad_norm": 7.612659931182861, "learning_rate": 9.657869408396095e-06, "loss": 1.9325284957885742, "step": 773 }, { "epoch": 1.0544959128065394, "grad_norm": 5.36206579208374, "learning_rate": 9.635805935113427e-06, "loss": 1.6555426120758057, "step": 774 }, { "epoch": 1.055858310626703, "grad_norm": 9.911110877990723, "learning_rate": 9.613744236929973e-06, "loss": 0.9133412837982178, "step": 775 }, { "epoch": 1.0572207084468666, "grad_norm": 4.798624515533447, "learning_rate": 9.591684421375508e-06, "loss": 0.6536118984222412, "step": 776 }, { "epoch": 1.05858310626703, "grad_norm": 3.579150915145874, "learning_rate": 9.56962659597062e-06, "loss": 1.3938374519348145, "step": 777 }, { "epoch": 1.0599455040871935, "grad_norm": 3.5563905239105225, "learning_rate": 9.547570868226202e-06, "loss": 1.2725470066070557, "step": 778 }, { "epoch": 1.061307901907357, "grad_norm": 3.9213931560516357, "learning_rate": 9.525517345642921e-06, "loss": 1.3474805355072021, "step": 779 }, { "epoch": 1.0626702997275204, "grad_norm": 1.885108232498169, "learning_rate": 9.503466135710696e-06, "loss": 1.260632872581482, "step": 780 }, { "epoch": 1.064032697547684, "grad_norm": 15.429298400878906, "learning_rate": 9.48141734590818e-06, "loss": 1.6820505857467651, "step": 781 }, { "epoch": 1.0653950953678475, "grad_norm": 2.222320079803467, "learning_rate": 9.459371083702214e-06, "loss": 1.5272653102874756, "step": 782 }, { "epoch": 1.0667574931880108, "grad_norm": 1.6487114429473877, "learning_rate": 9.437327456547337e-06, "loss": 0.9049481153488159, "step": 783 }, { "epoch": 1.0681198910081744, "grad_norm": 1.8616079092025757, "learning_rate": 9.415286571885237e-06, "loss": 1.4690696001052856, "step": 784 }, { "epoch": 1.069482288828338, "grad_norm": 7.657268524169922, "learning_rate": 9.393248537144236e-06, "loss": 0.9072242379188538, "step": 785 }, { "epoch": 1.0708446866485013, "grad_norm": 1.275026559829712, "learning_rate": 9.371213459738767e-06, "loss": 0.8253852128982544, "step": 786 }, { "epoch": 1.0722070844686649, "grad_norm": 1.5019118785858154, "learning_rate": 9.349181447068838e-06, "loss": 1.1833354234695435, "step": 787 }, { "epoch": 1.0735694822888284, "grad_norm": 3.469975471496582, "learning_rate": 9.327152606519541e-06, "loss": 0.6224006414413452, "step": 788 }, { "epoch": 1.0749318801089918, "grad_norm": 1.8194968700408936, "learning_rate": 9.30512704546048e-06, "loss": 1.0510913133621216, "step": 789 }, { "epoch": 1.0762942779291553, "grad_norm": 1.1383819580078125, "learning_rate": 9.283104871245301e-06, "loss": 0.8747397661209106, "step": 790 }, { "epoch": 1.077656675749319, "grad_norm": 1.7342712879180908, "learning_rate": 9.261086191211124e-06, "loss": 1.4106638431549072, "step": 791 }, { "epoch": 1.0790190735694822, "grad_norm": 3.8735296726226807, "learning_rate": 9.239071112678042e-06, "loss": 0.9408797025680542, "step": 792 }, { "epoch": 1.0803814713896458, "grad_norm": 1.6715805530548096, "learning_rate": 9.217059742948605e-06, "loss": 0.7245050668716431, "step": 793 }, { "epoch": 1.0817438692098094, "grad_norm": 3.165630340576172, "learning_rate": 9.195052189307266e-06, "loss": 1.653625726699829, "step": 794 }, { "epoch": 1.0831062670299727, "grad_norm": 1.7407103776931763, "learning_rate": 9.1730485590199e-06, "loss": 1.5027657747268677, "step": 795 }, { "epoch": 1.0844686648501363, "grad_norm": 5.131650447845459, "learning_rate": 9.151048959333242e-06, "loss": 1.377716302871704, "step": 796 }, { "epoch": 1.0858310626702998, "grad_norm": 1.6653156280517578, "learning_rate": 9.12905349747439e-06, "loss": 1.023837685585022, "step": 797 }, { "epoch": 1.0871934604904632, "grad_norm": 1.717787742614746, "learning_rate": 9.107062280650278e-06, "loss": 1.1276087760925293, "step": 798 }, { "epoch": 1.0885558583106267, "grad_norm": 1.3233479261398315, "learning_rate": 9.085075416047138e-06, "loss": 0.9303517937660217, "step": 799 }, { "epoch": 1.0899182561307903, "grad_norm": 1.9744620323181152, "learning_rate": 9.06309301083e-06, "loss": 1.9008307456970215, "step": 800 }, { "epoch": 1.0912806539509536, "grad_norm": 1.3077399730682373, "learning_rate": 9.041115172142148e-06, "loss": 0.6807721853256226, "step": 801 }, { "epoch": 1.0926430517711172, "grad_norm": 1.4071216583251953, "learning_rate": 9.019142007104622e-06, "loss": 0.9831918478012085, "step": 802 }, { "epoch": 1.0940054495912808, "grad_norm": 2.8730175495147705, "learning_rate": 8.997173622815668e-06, "loss": 0.7299962639808655, "step": 803 }, { "epoch": 1.095367847411444, "grad_norm": 1.3878753185272217, "learning_rate": 8.975210126350239e-06, "loss": 1.1267969608306885, "step": 804 }, { "epoch": 1.0967302452316077, "grad_norm": 3.0161008834838867, "learning_rate": 8.953251624759471e-06, "loss": 1.56452476978302, "step": 805 }, { "epoch": 1.0980926430517712, "grad_norm": 2.984447956085205, "learning_rate": 8.931298225070134e-06, "loss": 2.313072443008423, "step": 806 }, { "epoch": 1.0994550408719346, "grad_norm": 1.8637878894805908, "learning_rate": 8.909350034284153e-06, "loss": 1.5315096378326416, "step": 807 }, { "epoch": 1.1008174386920981, "grad_norm": 1.7480387687683105, "learning_rate": 8.88740715937805e-06, "loss": 0.8981226682662964, "step": 808 }, { "epoch": 1.1021798365122615, "grad_norm": 1.2253121137619019, "learning_rate": 8.865469707302444e-06, "loss": 1.4429481029510498, "step": 809 }, { "epoch": 1.103542234332425, "grad_norm": 1.4876455068588257, "learning_rate": 8.843537784981525e-06, "loss": 1.093915343284607, "step": 810 }, { "epoch": 1.1049046321525886, "grad_norm": 2.851789951324463, "learning_rate": 8.82161149931252e-06, "loss": 1.13993501663208, "step": 811 }, { "epoch": 1.106267029972752, "grad_norm": 2.087999105453491, "learning_rate": 8.799690957165197e-06, "loss": 0.8953101634979248, "step": 812 }, { "epoch": 1.1076294277929155, "grad_norm": 1.7068129777908325, "learning_rate": 8.777776265381315e-06, "loss": 1.6603915691375732, "step": 813 }, { "epoch": 1.108991825613079, "grad_norm": 2.4144320487976074, "learning_rate": 8.755867530774133e-06, "loss": 1.434238314628601, "step": 814 }, { "epoch": 1.1103542234332424, "grad_norm": 4.680269241333008, "learning_rate": 8.733964860127861e-06, "loss": 2.2611663341522217, "step": 815 }, { "epoch": 1.111716621253406, "grad_norm": 3.040764808654785, "learning_rate": 8.71206836019716e-06, "loss": 1.2649214267730713, "step": 816 }, { "epoch": 1.1130790190735695, "grad_norm": 1.8095608949661255, "learning_rate": 8.690178137706619e-06, "loss": 1.036018967628479, "step": 817 }, { "epoch": 1.1144414168937329, "grad_norm": 1.5825600624084473, "learning_rate": 8.668294299350217e-06, "loss": 1.809496283531189, "step": 818 }, { "epoch": 1.1158038147138964, "grad_norm": 6.272243976593018, "learning_rate": 8.646416951790832e-06, "loss": 1.4022477865219116, "step": 819 }, { "epoch": 1.11716621253406, "grad_norm": 2.2713098526000977, "learning_rate": 8.62454620165969e-06, "loss": 1.8463554382324219, "step": 820 }, { "epoch": 1.1185286103542234, "grad_norm": 1.3348394632339478, "learning_rate": 8.602682155555875e-06, "loss": 1.3349345922470093, "step": 821 }, { "epoch": 1.119891008174387, "grad_norm": 1.2700073719024658, "learning_rate": 8.580824920045791e-06, "loss": 1.252305030822754, "step": 822 }, { "epoch": 1.1212534059945505, "grad_norm": 2.683323860168457, "learning_rate": 8.558974601662637e-06, "loss": 0.9200676083564758, "step": 823 }, { "epoch": 1.1226158038147138, "grad_norm": 3.1683108806610107, "learning_rate": 8.537131306905915e-06, "loss": 1.5866827964782715, "step": 824 }, { "epoch": 1.1239782016348774, "grad_norm": 1.395993947982788, "learning_rate": 8.515295142240873e-06, "loss": 1.0542783737182617, "step": 825 }, { "epoch": 1.125340599455041, "grad_norm": 2.7309155464172363, "learning_rate": 8.493466214098024e-06, "loss": 1.0537761449813843, "step": 826 }, { "epoch": 1.1267029972752043, "grad_norm": 0.9236940741539001, "learning_rate": 8.471644628872609e-06, "loss": 1.3605074882507324, "step": 827 }, { "epoch": 1.1280653950953679, "grad_norm": 1.9016244411468506, "learning_rate": 8.44983049292406e-06, "loss": 0.934337317943573, "step": 828 }, { "epoch": 1.1294277929155314, "grad_norm": 1.8819833993911743, "learning_rate": 8.42802391257553e-06, "loss": 1.528808355331421, "step": 829 }, { "epoch": 1.1307901907356948, "grad_norm": 1.5119550228118896, "learning_rate": 8.406224994113315e-06, "loss": 1.0426084995269775, "step": 830 }, { "epoch": 1.1321525885558583, "grad_norm": 2.3753795623779297, "learning_rate": 8.384433843786396e-06, "loss": 1.5145832300186157, "step": 831 }, { "epoch": 1.1335149863760219, "grad_norm": 2.2463390827178955, "learning_rate": 8.362650567805865e-06, "loss": 1.2560582160949707, "step": 832 }, { "epoch": 1.1348773841961852, "grad_norm": 1.2181698083877563, "learning_rate": 8.340875272344454e-06, "loss": 0.8411017060279846, "step": 833 }, { "epoch": 1.1362397820163488, "grad_norm": 1.5112247467041016, "learning_rate": 8.319108063535992e-06, "loss": 0.654084324836731, "step": 834 }, { "epoch": 1.1376021798365124, "grad_norm": 3.647519826889038, "learning_rate": 8.297349047474886e-06, "loss": 1.4922959804534912, "step": 835 }, { "epoch": 1.1389645776566757, "grad_norm": 1.3651139736175537, "learning_rate": 8.275598330215622e-06, "loss": 0.8173471093177795, "step": 836 }, { "epoch": 1.1403269754768393, "grad_norm": 1.8352748155593872, "learning_rate": 8.253856017772224e-06, "loss": 1.4325337409973145, "step": 837 }, { "epoch": 1.1416893732970028, "grad_norm": 2.666905641555786, "learning_rate": 8.232122216117763e-06, "loss": 1.5977535247802734, "step": 838 }, { "epoch": 1.1430517711171662, "grad_norm": 2.3766443729400635, "learning_rate": 8.21039703118383e-06, "loss": 1.336599349975586, "step": 839 }, { "epoch": 1.1444141689373297, "grad_norm": 2.7315871715545654, "learning_rate": 8.188680568860001e-06, "loss": 1.177632212638855, "step": 840 }, { "epoch": 1.145776566757493, "grad_norm": 2.1840827465057373, "learning_rate": 8.166972934993353e-06, "loss": 1.343082070350647, "step": 841 }, { "epoch": 1.1471389645776566, "grad_norm": 2.0587334632873535, "learning_rate": 8.145274235387924e-06, "loss": 1.2880394458770752, "step": 842 }, { "epoch": 1.1485013623978202, "grad_norm": 2.9728639125823975, "learning_rate": 8.123584575804215e-06, "loss": 1.1300387382507324, "step": 843 }, { "epoch": 1.1498637602179835, "grad_norm": 6.954526424407959, "learning_rate": 8.101904061958651e-06, "loss": 1.0844428539276123, "step": 844 }, { "epoch": 1.151226158038147, "grad_norm": 2.2863245010375977, "learning_rate": 8.080232799523095e-06, "loss": 1.4835712909698486, "step": 845 }, { "epoch": 1.1525885558583107, "grad_norm": 1.2595247030258179, "learning_rate": 8.058570894124317e-06, "loss": 0.9431970119476318, "step": 846 }, { "epoch": 1.153950953678474, "grad_norm": 2.0535993576049805, "learning_rate": 8.036918451343464e-06, "loss": 1.2059391736984253, "step": 847 }, { "epoch": 1.1553133514986376, "grad_norm": 1.9494422674179077, "learning_rate": 8.015275576715587e-06, "loss": 1.6023597717285156, "step": 848 }, { "epoch": 1.1566757493188011, "grad_norm": 1.8411693572998047, "learning_rate": 7.993642375729074e-06, "loss": 1.0910813808441162, "step": 849 }, { "epoch": 1.1580381471389645, "grad_norm": 2.1133406162261963, "learning_rate": 7.972018953825184e-06, "loss": 0.6914231777191162, "step": 850 }, { "epoch": 1.159400544959128, "grad_norm": 1.4655884504318237, "learning_rate": 7.950405416397509e-06, "loss": 1.1339576244354248, "step": 851 }, { "epoch": 1.1607629427792916, "grad_norm": 1.6884040832519531, "learning_rate": 7.928801868791452e-06, "loss": 1.0842143297195435, "step": 852 }, { "epoch": 1.162125340599455, "grad_norm": 2.77221417427063, "learning_rate": 7.90720841630374e-06, "loss": 1.0997743606567383, "step": 853 }, { "epoch": 1.1634877384196185, "grad_norm": 1.2846027612686157, "learning_rate": 7.88562516418188e-06, "loss": 1.218309760093689, "step": 854 }, { "epoch": 1.164850136239782, "grad_norm": 1.332168698310852, "learning_rate": 7.864052217623683e-06, "loss": 1.1969364881515503, "step": 855 }, { "epoch": 1.1662125340599454, "grad_norm": 2.06514310836792, "learning_rate": 7.842489681776706e-06, "loss": 1.4881412982940674, "step": 856 }, { "epoch": 1.167574931880109, "grad_norm": 2.107712984085083, "learning_rate": 7.820937661737783e-06, "loss": 1.5532594919204712, "step": 857 }, { "epoch": 1.1689373297002725, "grad_norm": 2.3416194915771484, "learning_rate": 7.799396262552486e-06, "loss": 1.837538480758667, "step": 858 }, { "epoch": 1.1702997275204359, "grad_norm": 1.5090652704238892, "learning_rate": 7.777865589214615e-06, "loss": 0.57518070936203, "step": 859 }, { "epoch": 1.1716621253405994, "grad_norm": 2.988835573196411, "learning_rate": 7.756345746665708e-06, "loss": 1.73407781124115, "step": 860 }, { "epoch": 1.173024523160763, "grad_norm": 1.442309856414795, "learning_rate": 7.73483683979449e-06, "loss": 0.2823115885257721, "step": 861 }, { "epoch": 1.1743869209809263, "grad_norm": 1.5289274454116821, "learning_rate": 7.713338973436403e-06, "loss": 1.1157079935073853, "step": 862 }, { "epoch": 1.17574931880109, "grad_norm": 1.2475718259811401, "learning_rate": 7.691852252373077e-06, "loss": 0.7817342281341553, "step": 863 }, { "epoch": 1.1771117166212535, "grad_norm": 2.354057550430298, "learning_rate": 7.670376781331803e-06, "loss": 1.669851303100586, "step": 864 }, { "epoch": 1.1784741144414168, "grad_norm": 1.7055414915084839, "learning_rate": 7.648912664985062e-06, "loss": 1.2531965970993042, "step": 865 }, { "epoch": 1.1798365122615804, "grad_norm": 3.4281036853790283, "learning_rate": 7.6274600079499675e-06, "loss": 1.7336888313293457, "step": 866 }, { "epoch": 1.181198910081744, "grad_norm": 4.999346733093262, "learning_rate": 7.606018914787802e-06, "loss": 1.3294944763183594, "step": 867 }, { "epoch": 1.1825613079019073, "grad_norm": 3.0687713623046875, "learning_rate": 7.584589490003464e-06, "loss": 0.8525564670562744, "step": 868 }, { "epoch": 1.1839237057220708, "grad_norm": 1.4954479932785034, "learning_rate": 7.563171838044998e-06, "loss": 1.1568007469177246, "step": 869 }, { "epoch": 1.1852861035422344, "grad_norm": 1.4144338369369507, "learning_rate": 7.541766063303062e-06, "loss": 0.8837820291519165, "step": 870 }, { "epoch": 1.1866485013623977, "grad_norm": 1.0398437976837158, "learning_rate": 7.520372270110415e-06, "loss": 0.975187361240387, "step": 871 }, { "epoch": 1.1880108991825613, "grad_norm": 1.5509750843048096, "learning_rate": 7.498990562741431e-06, "loss": 1.339482069015503, "step": 872 }, { "epoch": 1.1893732970027249, "grad_norm": 1.3519200086593628, "learning_rate": 7.477621045411559e-06, "loss": 0.9990026950836182, "step": 873 }, { "epoch": 1.1907356948228882, "grad_norm": 1.5797133445739746, "learning_rate": 7.456263822276855e-06, "loss": 0.77900230884552, "step": 874 }, { "epoch": 1.1920980926430518, "grad_norm": 2.6010658740997314, "learning_rate": 7.43491899743344e-06, "loss": 1.6766822338104248, "step": 875 }, { "epoch": 1.1934604904632153, "grad_norm": 1.9393657445907593, "learning_rate": 7.413586674917001e-06, "loss": 1.2134159803390503, "step": 876 }, { "epoch": 1.1948228882833787, "grad_norm": 2.9396872520446777, "learning_rate": 7.392266958702302e-06, "loss": 1.1980106830596924, "step": 877 }, { "epoch": 1.1961852861035422, "grad_norm": 1.9951303005218506, "learning_rate": 7.3709599527026475e-06, "loss": 0.9609541893005371, "step": 878 }, { "epoch": 1.1975476839237058, "grad_norm": 31.814308166503906, "learning_rate": 7.349665760769409e-06, "loss": 1.2163200378417969, "step": 879 }, { "epoch": 1.1989100817438691, "grad_norm": 1.511938214302063, "learning_rate": 7.32838448669148e-06, "loss": 0.73194420337677, "step": 880 }, { "epoch": 1.2002724795640327, "grad_norm": 1.7172006368637085, "learning_rate": 7.307116234194811e-06, "loss": 0.8080505132675171, "step": 881 }, { "epoch": 1.2016348773841963, "grad_norm": 1.9533432722091675, "learning_rate": 7.285861106941883e-06, "loss": 1.0879783630371094, "step": 882 }, { "epoch": 1.2029972752043596, "grad_norm": 6.96221923828125, "learning_rate": 7.264619208531187e-06, "loss": 1.1857813596725464, "step": 883 }, { "epoch": 1.2043596730245232, "grad_norm": 1.9619340896606445, "learning_rate": 7.243390642496759e-06, "loss": 1.38108491897583, "step": 884 }, { "epoch": 1.2057220708446867, "grad_norm": 0.9447917342185974, "learning_rate": 7.22217551230763e-06, "loss": 0.7872205972671509, "step": 885 }, { "epoch": 1.20708446866485, "grad_norm": 1.2873823642730713, "learning_rate": 7.2009739213673615e-06, "loss": 1.1352882385253906, "step": 886 }, { "epoch": 1.2084468664850136, "grad_norm": 2.579596757888794, "learning_rate": 7.17978597301352e-06, "loss": 1.5572775602340698, "step": 887 }, { "epoch": 1.2098092643051772, "grad_norm": 2.091141939163208, "learning_rate": 7.1586117705171675e-06, "loss": 1.7610681056976318, "step": 888 }, { "epoch": 1.2111716621253406, "grad_norm": 1.9530129432678223, "learning_rate": 7.1374514170823825e-06, "loss": 1.3934171199798584, "step": 889 }, { "epoch": 1.2125340599455041, "grad_norm": 1.8470689058303833, "learning_rate": 7.116305015845729e-06, "loss": 1.4546067714691162, "step": 890 }, { "epoch": 1.2138964577656677, "grad_norm": 23.58027458190918, "learning_rate": 7.095172669875781e-06, "loss": 1.1552492380142212, "step": 891 }, { "epoch": 1.215258855585831, "grad_norm": 8.496585845947266, "learning_rate": 7.074054482172592e-06, "loss": 1.4398317337036133, "step": 892 }, { "epoch": 1.2166212534059946, "grad_norm": 1.1559842824935913, "learning_rate": 7.052950555667218e-06, "loss": 1.4611413478851318, "step": 893 }, { "epoch": 1.2179836512261581, "grad_norm": 1.3327378034591675, "learning_rate": 7.031860993221207e-06, "loss": 0.725593090057373, "step": 894 }, { "epoch": 1.2193460490463215, "grad_norm": 1.983536720275879, "learning_rate": 7.01078589762608e-06, "loss": 0.5571556687355042, "step": 895 }, { "epoch": 1.220708446866485, "grad_norm": 2.5679686069488525, "learning_rate": 6.989725371602869e-06, "loss": 0.8927745819091797, "step": 896 }, { "epoch": 1.2220708446866486, "grad_norm": 12.808089256286621, "learning_rate": 6.9686795178015685e-06, "loss": 1.393315315246582, "step": 897 }, { "epoch": 1.223433242506812, "grad_norm": 1.6706115007400513, "learning_rate": 6.947648438800681e-06, "loss": 0.9781510829925537, "step": 898 }, { "epoch": 1.2247956403269755, "grad_norm": 1.1859040260314941, "learning_rate": 6.926632237106687e-06, "loss": 0.9889549612998962, "step": 899 }, { "epoch": 1.226158038147139, "grad_norm": 1.7185934782028198, "learning_rate": 6.905631015153549e-06, "loss": 1.1148736476898193, "step": 900 }, { "epoch": 1.2275204359673024, "grad_norm": 2.455629348754883, "learning_rate": 6.8846448753022285e-06, "loss": 1.7920029163360596, "step": 901 }, { "epoch": 1.228882833787466, "grad_norm": 1.3241729736328125, "learning_rate": 6.863673919840166e-06, "loss": 0.9524305462837219, "step": 902 }, { "epoch": 1.2302452316076296, "grad_norm": 2.0961315631866455, "learning_rate": 6.8427182509808045e-06, "loss": 1.5454907417297363, "step": 903 }, { "epoch": 1.231607629427793, "grad_norm": 3.71328067779541, "learning_rate": 6.821777970863063e-06, "loss": 1.253482699394226, "step": 904 }, { "epoch": 1.2329700272479565, "grad_norm": 1.3912283182144165, "learning_rate": 6.800853181550869e-06, "loss": 1.0354002714157104, "step": 905 }, { "epoch": 1.2343324250681198, "grad_norm": 1.5127853155136108, "learning_rate": 6.7799439850326445e-06, "loss": 0.5771975517272949, "step": 906 }, { "epoch": 1.2356948228882834, "grad_norm": 13.967789649963379, "learning_rate": 6.759050483220804e-06, "loss": 1.3927853107452393, "step": 907 }, { "epoch": 1.237057220708447, "grad_norm": 1.48551607131958, "learning_rate": 6.738172777951275e-06, "loss": 1.2377369403839111, "step": 908 }, { "epoch": 1.2384196185286103, "grad_norm": 4.73170280456543, "learning_rate": 6.717310970982984e-06, "loss": 1.7531287670135498, "step": 909 }, { "epoch": 1.2397820163487738, "grad_norm": 2.432232618331909, "learning_rate": 6.696465163997372e-06, "loss": 1.2073525190353394, "step": 910 }, { "epoch": 1.2411444141689374, "grad_norm": 1.661880373954773, "learning_rate": 6.6756354585979e-06, "loss": 1.2062729597091675, "step": 911 }, { "epoch": 1.2425068119891007, "grad_norm": 1.4256250858306885, "learning_rate": 6.654821956309537e-06, "loss": 1.1742050647735596, "step": 912 }, { "epoch": 1.2438692098092643, "grad_norm": 2.0355494022369385, "learning_rate": 6.634024758578292e-06, "loss": 1.0070958137512207, "step": 913 }, { "epoch": 1.2452316076294279, "grad_norm": 2.4949910640716553, "learning_rate": 6.613243966770691e-06, "loss": 1.5376157760620117, "step": 914 }, { "epoch": 1.2465940054495912, "grad_norm": 2.267374277114868, "learning_rate": 6.592479682173307e-06, "loss": 1.8130028247833252, "step": 915 }, { "epoch": 1.2479564032697548, "grad_norm": 2.2603695392608643, "learning_rate": 6.571732005992248e-06, "loss": 0.8836275339126587, "step": 916 }, { "epoch": 1.2493188010899183, "grad_norm": 1.9963089227676392, "learning_rate": 6.551001039352678e-06, "loss": 1.0346345901489258, "step": 917 }, { "epoch": 1.2506811989100817, "grad_norm": 4.163165092468262, "learning_rate": 6.53028688329832e-06, "loss": 1.3877449035644531, "step": 918 }, { "epoch": 1.2520435967302452, "grad_norm": 1.229958176612854, "learning_rate": 6.509589638790949e-06, "loss": 0.7306379675865173, "step": 919 }, { "epoch": 1.2534059945504088, "grad_norm": 1.433213233947754, "learning_rate": 6.488909406709931e-06, "loss": 1.3522355556488037, "step": 920 }, { "epoch": 1.2547683923705721, "grad_norm": 1.5883312225341797, "learning_rate": 6.468246287851691e-06, "loss": 1.4283676147460938, "step": 921 }, { "epoch": 1.2561307901907357, "grad_norm": 17.254358291625977, "learning_rate": 6.44760038292926e-06, "loss": 1.3928673267364502, "step": 922 }, { "epoch": 1.2574931880108993, "grad_norm": 2.331247091293335, "learning_rate": 6.426971792571765e-06, "loss": 0.9562796354293823, "step": 923 }, { "epoch": 1.2588555858310626, "grad_norm": 3.697079658508301, "learning_rate": 6.40636061732393e-06, "loss": 1.2348026037216187, "step": 924 }, { "epoch": 1.2602179836512262, "grad_norm": 1.3780945539474487, "learning_rate": 6.385766957645614e-06, "loss": 0.9386870861053467, "step": 925 }, { "epoch": 1.2615803814713895, "grad_norm": 1.7910583019256592, "learning_rate": 6.365190913911288e-06, "loss": 1.066786289215088, "step": 926 }, { "epoch": 1.262942779291553, "grad_norm": 2.2649776935577393, "learning_rate": 6.344632586409574e-06, "loss": 0.8573001623153687, "step": 927 }, { "epoch": 1.2643051771117166, "grad_norm": 3.8877835273742676, "learning_rate": 6.3240920753427336e-06, "loss": 2.053802967071533, "step": 928 }, { "epoch": 1.26566757493188, "grad_norm": 7.3311944007873535, "learning_rate": 6.3035694808262e-06, "loss": 1.2685723304748535, "step": 929 }, { "epoch": 1.2670299727520435, "grad_norm": 1.6128218173980713, "learning_rate": 6.2830649028880785e-06, "loss": 0.36333370208740234, "step": 930 }, { "epoch": 1.268392370572207, "grad_norm": 1.406497597694397, "learning_rate": 6.262578441468654e-06, "loss": 0.902088463306427, "step": 931 }, { "epoch": 1.2697547683923704, "grad_norm": 2.5424458980560303, "learning_rate": 6.242110196419919e-06, "loss": 1.5877082347869873, "step": 932 }, { "epoch": 1.271117166212534, "grad_norm": 2.656726598739624, "learning_rate": 6.221660267505072e-06, "loss": 0.7152581214904785, "step": 933 }, { "epoch": 1.2724795640326976, "grad_norm": 3.595240592956543, "learning_rate": 6.201228754398041e-06, "loss": 1.7111246585845947, "step": 934 }, { "epoch": 1.273841961852861, "grad_norm": 2.462385654449463, "learning_rate": 6.180815756682996e-06, "loss": 1.5699028968811035, "step": 935 }, { "epoch": 1.2752043596730245, "grad_norm": 2.744203567504883, "learning_rate": 6.160421373853856e-06, "loss": 1.7467997074127197, "step": 936 }, { "epoch": 1.276566757493188, "grad_norm": 3.9721944332122803, "learning_rate": 6.140045705313822e-06, "loss": 0.9605268239974976, "step": 937 }, { "epoch": 1.2779291553133514, "grad_norm": 6.127774715423584, "learning_rate": 6.119688850374863e-06, "loss": 1.0231908559799194, "step": 938 }, { "epoch": 1.279291553133515, "grad_norm": 2.2020037174224854, "learning_rate": 6.099350908257269e-06, "loss": 0.7266689538955688, "step": 939 }, { "epoch": 1.2806539509536785, "grad_norm": 1.9495772123336792, "learning_rate": 6.079031978089129e-06, "loss": 1.6469346284866333, "step": 940 }, { "epoch": 1.2820163487738419, "grad_norm": 1.30976402759552, "learning_rate": 6.058732158905886e-06, "loss": 1.1722667217254639, "step": 941 }, { "epoch": 1.2833787465940054, "grad_norm": 3.6592891216278076, "learning_rate": 6.038451549649819e-06, "loss": 1.5548686981201172, "step": 942 }, { "epoch": 1.284741144414169, "grad_norm": 1.5623364448547363, "learning_rate": 6.018190249169584e-06, "loss": 1.015455722808838, "step": 943 }, { "epoch": 1.2861035422343323, "grad_norm": 10.825885772705078, "learning_rate": 5.997948356219731e-06, "loss": 1.120009422302246, "step": 944 }, { "epoch": 1.2874659400544959, "grad_norm": 1.5466862916946411, "learning_rate": 5.9777259694602e-06, "loss": 1.2267658710479736, "step": 945 }, { "epoch": 1.2888283378746594, "grad_norm": 1.997960090637207, "learning_rate": 5.957523187455876e-06, "loss": 0.8885673880577087, "step": 946 }, { "epoch": 1.2901907356948228, "grad_norm": 5.55203104019165, "learning_rate": 5.937340108676077e-06, "loss": 1.0410943031311035, "step": 947 }, { "epoch": 1.2915531335149864, "grad_norm": 2.038248062133789, "learning_rate": 5.9171768314940915e-06, "loss": 1.0878962278366089, "step": 948 }, { "epoch": 1.29291553133515, "grad_norm": 1.8700592517852783, "learning_rate": 5.89703345418669e-06, "loss": 1.1958909034729004, "step": 949 }, { "epoch": 1.2942779291553133, "grad_norm": 2.9717767238616943, "learning_rate": 5.876910074933655e-06, "loss": 1.4264605045318604, "step": 950 }, { "epoch": 1.2956403269754768, "grad_norm": 3.428731918334961, "learning_rate": 5.856806791817299e-06, "loss": 1.3217412233352661, "step": 951 }, { "epoch": 1.2970027247956404, "grad_norm": 4.220239639282227, "learning_rate": 5.836723702821974e-06, "loss": 1.6535362005233765, "step": 952 }, { "epoch": 1.2983651226158037, "grad_norm": 4.103677272796631, "learning_rate": 5.816660905833617e-06, "loss": 1.014174222946167, "step": 953 }, { "epoch": 1.2997275204359673, "grad_norm": 3.8307180404663086, "learning_rate": 5.796618498639257e-06, "loss": 1.4770634174346924, "step": 954 }, { "epoch": 1.3010899182561309, "grad_norm": 2.6995770931243896, "learning_rate": 5.776596578926539e-06, "loss": 1.292000412940979, "step": 955 }, { "epoch": 1.3024523160762942, "grad_norm": 3.1744916439056396, "learning_rate": 5.756595244283253e-06, "loss": 1.2292355298995972, "step": 956 }, { "epoch": 1.3038147138964578, "grad_norm": 2.774714469909668, "learning_rate": 5.736614592196859e-06, "loss": 1.0887806415557861, "step": 957 }, { "epoch": 1.3051771117166213, "grad_norm": 2.465463399887085, "learning_rate": 5.716654720054e-06, "loss": 1.8095741271972656, "step": 958 }, { "epoch": 1.3065395095367847, "grad_norm": 1.8663467168807983, "learning_rate": 5.696715725140057e-06, "loss": 0.7355883121490479, "step": 959 }, { "epoch": 1.3079019073569482, "grad_norm": 1.9095938205718994, "learning_rate": 5.676797704638626e-06, "loss": 1.5098589658737183, "step": 960 }, { "epoch": 1.3092643051771118, "grad_norm": 2.1555159091949463, "learning_rate": 5.6569007556311025e-06, "loss": 0.945008397102356, "step": 961 }, { "epoch": 1.3106267029972751, "grad_norm": 29.606258392333984, "learning_rate": 5.6370249750961485e-06, "loss": 1.6413015127182007, "step": 962 }, { "epoch": 1.3119891008174387, "grad_norm": 7.397209644317627, "learning_rate": 5.617170459909274e-06, "loss": 1.633244514465332, "step": 963 }, { "epoch": 1.3133514986376023, "grad_norm": 2.437603235244751, "learning_rate": 5.597337306842331e-06, "loss": 1.334827184677124, "step": 964 }, { "epoch": 1.3147138964577656, "grad_norm": 9.446292877197266, "learning_rate": 5.5775256125630504e-06, "loss": 1.3500728607177734, "step": 965 }, { "epoch": 1.3160762942779292, "grad_norm": 32.16663360595703, "learning_rate": 5.5577354736345755e-06, "loss": 1.849884271621704, "step": 966 }, { "epoch": 1.3174386920980927, "grad_norm": 1.4507752656936646, "learning_rate": 5.537966986514985e-06, "loss": 1.2164182662963867, "step": 967 }, { "epoch": 1.318801089918256, "grad_norm": 1.7765029668807983, "learning_rate": 5.5182202475568245e-06, "loss": 1.1724369525909424, "step": 968 }, { "epoch": 1.3201634877384196, "grad_norm": 1.9247803688049316, "learning_rate": 5.498495353006645e-06, "loss": 1.2116200923919678, "step": 969 }, { "epoch": 1.3215258855585832, "grad_norm": 1.8831514120101929, "learning_rate": 5.478792399004513e-06, "loss": 0.9660593271255493, "step": 970 }, { "epoch": 1.3228882833787465, "grad_norm": 3.1345956325531006, "learning_rate": 5.459111481583581e-06, "loss": 1.671825885772705, "step": 971 }, { "epoch": 1.32425068119891, "grad_norm": 3.9775493144989014, "learning_rate": 5.43945269666956e-06, "loss": 1.785200595855713, "step": 972 }, { "epoch": 1.3256130790190737, "grad_norm": 3.92702579498291, "learning_rate": 5.419816140080318e-06, "loss": 1.4940744638442993, "step": 973 }, { "epoch": 1.326975476839237, "grad_norm": 3.763221025466919, "learning_rate": 5.400201907525355e-06, "loss": 0.6700971126556396, "step": 974 }, { "epoch": 1.3283378746594006, "grad_norm": 6.706611633300781, "learning_rate": 5.380610094605384e-06, "loss": 0.755683183670044, "step": 975 }, { "epoch": 1.3297002724795641, "grad_norm": 3.2125422954559326, "learning_rate": 5.361040796811831e-06, "loss": 1.473036289215088, "step": 976 }, { "epoch": 1.3310626702997275, "grad_norm": 5.571830749511719, "learning_rate": 5.341494109526386e-06, "loss": 1.0507359504699707, "step": 977 }, { "epoch": 1.332425068119891, "grad_norm": 1.3964585065841675, "learning_rate": 5.321970128020533e-06, "loss": 1.108083724975586, "step": 978 }, { "epoch": 1.3337874659400546, "grad_norm": 1.9543852806091309, "learning_rate": 5.3024689474550885e-06, "loss": 1.2806951999664307, "step": 979 }, { "epoch": 1.335149863760218, "grad_norm": 1.9527177810668945, "learning_rate": 5.282990662879735e-06, "loss": 0.5856119394302368, "step": 980 }, { "epoch": 1.3365122615803815, "grad_norm": 1.4536997079849243, "learning_rate": 5.26353536923256e-06, "loss": 1.066063404083252, "step": 981 }, { "epoch": 1.337874659400545, "grad_norm": 2.641045093536377, "learning_rate": 5.244103161339587e-06, "loss": 1.458785057067871, "step": 982 }, { "epoch": 1.3392370572207084, "grad_norm": 1.6934006214141846, "learning_rate": 5.224694133914333e-06, "loss": 0.9151726961135864, "step": 983 }, { "epoch": 1.340599455040872, "grad_norm": 2.6008119583129883, "learning_rate": 5.205308381557308e-06, "loss": 1.1924636363983154, "step": 984 }, { "epoch": 1.3419618528610355, "grad_norm": 12.207076072692871, "learning_rate": 5.185945998755609e-06, "loss": 1.5928189754486084, "step": 985 }, { "epoch": 1.3433242506811989, "grad_norm": 4.694320201873779, "learning_rate": 5.1666070798823955e-06, "loss": 1.2947814464569092, "step": 986 }, { "epoch": 1.3446866485013624, "grad_norm": 1.695266604423523, "learning_rate": 5.14729171919649e-06, "loss": 0.9944629669189453, "step": 987 }, { "epoch": 1.346049046321526, "grad_norm": 1.7176851034164429, "learning_rate": 5.128000010841876e-06, "loss": 1.168241024017334, "step": 988 }, { "epoch": 1.3474114441416893, "grad_norm": 9.157471656799316, "learning_rate": 5.108732048847262e-06, "loss": 1.3747432231903076, "step": 989 }, { "epoch": 1.348773841961853, "grad_norm": 16.196401596069336, "learning_rate": 5.089487927125609e-06, "loss": 1.3849818706512451, "step": 990 }, { "epoch": 1.3501362397820165, "grad_norm": 1.797544002532959, "learning_rate": 5.070267739473684e-06, "loss": 1.148231029510498, "step": 991 }, { "epoch": 1.3514986376021798, "grad_norm": 2.771800994873047, "learning_rate": 5.051071579571599e-06, "loss": 1.8690265417099, "step": 992 }, { "epoch": 1.3528610354223434, "grad_norm": 4.877021312713623, "learning_rate": 5.03189954098235e-06, "loss": 2.0547657012939453, "step": 993 }, { "epoch": 1.354223433242507, "grad_norm": 3.7085821628570557, "learning_rate": 5.012751717151363e-06, "loss": 1.5132076740264893, "step": 994 }, { "epoch": 1.3555858310626703, "grad_norm": 1.5582078695297241, "learning_rate": 4.993628201406052e-06, "loss": 0.9177080988883972, "step": 995 }, { "epoch": 1.3569482288828338, "grad_norm": 2.5337560176849365, "learning_rate": 4.974529086955333e-06, "loss": 1.5421903133392334, "step": 996 }, { "epoch": 1.3583106267029974, "grad_norm": 1.732593297958374, "learning_rate": 4.955454466889211e-06, "loss": 0.3873080611228943, "step": 997 }, { "epoch": 1.3596730245231607, "grad_norm": 1.4948740005493164, "learning_rate": 4.936404434178281e-06, "loss": 1.4705116748809814, "step": 998 }, { "epoch": 1.3610354223433243, "grad_norm": 4.089753150939941, "learning_rate": 4.917379081673317e-06, "loss": 1.044406533241272, "step": 999 }, { "epoch": 1.3623978201634879, "grad_norm": 2.062587261199951, "learning_rate": 4.898378502104792e-06, "loss": 0.9650086164474487, "step": 1000 }, { "epoch": 1.3637602179836512, "grad_norm": 1.6357592344284058, "learning_rate": 4.8794027880824356e-06, "loss": 1.235352635383606, "step": 1001 }, { "epoch": 1.3651226158038148, "grad_norm": 2.571821451187134, "learning_rate": 4.860452032094781e-06, "loss": 0.9818557500839233, "step": 1002 }, { "epoch": 1.3664850136239783, "grad_norm": 5.324853897094727, "learning_rate": 4.841526326508718e-06, "loss": 1.399820327758789, "step": 1003 }, { "epoch": 1.3678474114441417, "grad_norm": 1.1675382852554321, "learning_rate": 4.822625763569033e-06, "loss": 0.8495630621910095, "step": 1004 }, { "epoch": 1.3692098092643052, "grad_norm": 2.3282697200775146, "learning_rate": 4.803750435397973e-06, "loss": 0.8490650653839111, "step": 1005 }, { "epoch": 1.3705722070844686, "grad_norm": 1.8163917064666748, "learning_rate": 4.784900433994782e-06, "loss": 1.3846685886383057, "step": 1006 }, { "epoch": 1.3719346049046321, "grad_norm": 1.7692952156066895, "learning_rate": 4.766075851235274e-06, "loss": 0.8341988921165466, "step": 1007 }, { "epoch": 1.3732970027247957, "grad_norm": 5.912053108215332, "learning_rate": 4.747276778871352e-06, "loss": 1.1229275465011597, "step": 1008 }, { "epoch": 1.374659400544959, "grad_norm": 22.412076950073242, "learning_rate": 4.728503308530599e-06, "loss": 0.9307339191436768, "step": 1009 }, { "epoch": 1.3760217983651226, "grad_norm": 5.511975288391113, "learning_rate": 4.709755531715793e-06, "loss": 1.5541999340057373, "step": 1010 }, { "epoch": 1.3773841961852862, "grad_norm": 2.3859100341796875, "learning_rate": 4.6910335398044995e-06, "loss": 1.3574914932250977, "step": 1011 }, { "epoch": 1.3787465940054495, "grad_norm": 1.973809003829956, "learning_rate": 4.672337424048597e-06, "loss": 1.3359588384628296, "step": 1012 }, { "epoch": 1.380108991825613, "grad_norm": 6.836277484893799, "learning_rate": 4.6536672755738445e-06, "loss": 1.430633783340454, "step": 1013 }, { "epoch": 1.3814713896457766, "grad_norm": 3.3206160068511963, "learning_rate": 4.635023185379433e-06, "loss": 0.9864079356193542, "step": 1014 }, { "epoch": 1.38283378746594, "grad_norm": 2.7649002075195312, "learning_rate": 4.616405244337547e-06, "loss": 1.3912737369537354, "step": 1015 }, { "epoch": 1.3841961852861036, "grad_norm": 7.686048984527588, "learning_rate": 4.5978135431929185e-06, "loss": 0.8716543912887573, "step": 1016 }, { "epoch": 1.385558583106267, "grad_norm": 2.323291778564453, "learning_rate": 4.579248172562384e-06, "loss": 1.6432886123657227, "step": 1017 }, { "epoch": 1.3869209809264305, "grad_norm": 1.5738471746444702, "learning_rate": 4.56070922293444e-06, "loss": 1.1077617406845093, "step": 1018 }, { "epoch": 1.388283378746594, "grad_norm": 2.7729480266571045, "learning_rate": 4.542196784668821e-06, "loss": 1.6105077266693115, "step": 1019 }, { "epoch": 1.3896457765667574, "grad_norm": 1.536705732345581, "learning_rate": 4.52371094799602e-06, "loss": 1.4380189180374146, "step": 1020 }, { "epoch": 1.391008174386921, "grad_norm": 3.014023542404175, "learning_rate": 4.505251803016898e-06, "loss": 0.627190113067627, "step": 1021 }, { "epoch": 1.3923705722070845, "grad_norm": 2.5524754524230957, "learning_rate": 4.486819439702193e-06, "loss": 1.300781011581421, "step": 1022 }, { "epoch": 1.3937329700272478, "grad_norm": 3.375444173812866, "learning_rate": 4.468413947892131e-06, "loss": 1.132867693901062, "step": 1023 }, { "epoch": 1.3950953678474114, "grad_norm": 1.8252739906311035, "learning_rate": 4.450035417295955e-06, "loss": 1.5580737590789795, "step": 1024 }, { "epoch": 1.396457765667575, "grad_norm": 1.5530959367752075, "learning_rate": 4.431683937491497e-06, "loss": 1.8257856369018555, "step": 1025 }, { "epoch": 1.3978201634877383, "grad_norm": 1.558298945426941, "learning_rate": 4.413359597924744e-06, "loss": 1.131047010421753, "step": 1026 }, { "epoch": 1.3991825613079019, "grad_norm": 1.9497509002685547, "learning_rate": 4.3950624879094e-06, "loss": 1.3238310813903809, "step": 1027 }, { "epoch": 1.4005449591280654, "grad_norm": 2.191779851913452, "learning_rate": 4.3767926966264486e-06, "loss": 1.2455363273620605, "step": 1028 }, { "epoch": 1.4019073569482288, "grad_norm": 5.890145301818848, "learning_rate": 4.358550313123725e-06, "loss": 0.8004064559936523, "step": 1029 }, { "epoch": 1.4032697547683923, "grad_norm": 3.5881917476654053, "learning_rate": 4.340335426315472e-06, "loss": 0.977668285369873, "step": 1030 }, { "epoch": 1.404632152588556, "grad_norm": 1.6719446182250977, "learning_rate": 4.322148124981924e-06, "loss": 1.0931888818740845, "step": 1031 }, { "epoch": 1.4059945504087192, "grad_norm": 4.1104912757873535, "learning_rate": 4.3039884977688415e-06, "loss": 1.819986343383789, "step": 1032 }, { "epoch": 1.4073569482288828, "grad_norm": 2.1843199729919434, "learning_rate": 4.285856633187126e-06, "loss": 0.7080628871917725, "step": 1033 }, { "epoch": 1.4087193460490464, "grad_norm": 2.3885533809661865, "learning_rate": 4.267752619612336e-06, "loss": 0.9705085754394531, "step": 1034 }, { "epoch": 1.4100817438692097, "grad_norm": 2.661731004714966, "learning_rate": 4.249676545284305e-06, "loss": 1.2527896165847778, "step": 1035 }, { "epoch": 1.4114441416893733, "grad_norm": 1.7453866004943848, "learning_rate": 4.231628498306677e-06, "loss": 1.1103830337524414, "step": 1036 }, { "epoch": 1.4128065395095368, "grad_norm": 1.4746209383010864, "learning_rate": 4.213608566646492e-06, "loss": 0.8559749722480774, "step": 1037 }, { "epoch": 1.4141689373297002, "grad_norm": 1.6347116231918335, "learning_rate": 4.195616838133757e-06, "loss": 0.728329062461853, "step": 1038 }, { "epoch": 1.4155313351498637, "grad_norm": 4.918883323669434, "learning_rate": 4.177653400461012e-06, "loss": 0.8296566605567932, "step": 1039 }, { "epoch": 1.4168937329700273, "grad_norm": 2.439199686050415, "learning_rate": 4.15971834118291e-06, "loss": 1.499788522720337, "step": 1040 }, { "epoch": 1.4182561307901906, "grad_norm": 1.764665126800537, "learning_rate": 4.141811747715783e-06, "loss": 1.1484037637710571, "step": 1041 }, { "epoch": 1.4196185286103542, "grad_norm": 1.9582279920578003, "learning_rate": 4.123933707337219e-06, "loss": 1.6962906122207642, "step": 1042 }, { "epoch": 1.4209809264305178, "grad_norm": 3.0799622535705566, "learning_rate": 4.1060843071856394e-06, "loss": 1.3218719959259033, "step": 1043 }, { "epoch": 1.422343324250681, "grad_norm": 2.2284579277038574, "learning_rate": 4.088263634259868e-06, "loss": 1.0673415660858154, "step": 1044 }, { "epoch": 1.4237057220708447, "grad_norm": 4.19002103805542, "learning_rate": 4.070471775418723e-06, "loss": 1.6568541526794434, "step": 1045 }, { "epoch": 1.4250681198910082, "grad_norm": 3.9263699054718018, "learning_rate": 4.052708817380557e-06, "loss": 1.1425827741622925, "step": 1046 }, { "epoch": 1.4264305177111716, "grad_norm": 2.0896403789520264, "learning_rate": 4.034974846722887e-06, "loss": 1.6635379791259766, "step": 1047 }, { "epoch": 1.4277929155313351, "grad_norm": 6.75886344909668, "learning_rate": 4.017269949881924e-06, "loss": 1.7278867959976196, "step": 1048 }, { "epoch": 1.4291553133514987, "grad_norm": 3.282860040664673, "learning_rate": 3.9995942131521795e-06, "loss": 1.6127727031707764, "step": 1049 }, { "epoch": 1.430517711171662, "grad_norm": 2.176051616668701, "learning_rate": 3.9819477226860374e-06, "loss": 0.8341235518455505, "step": 1050 }, { "epoch": 1.4318801089918256, "grad_norm": 2.4680750370025635, "learning_rate": 3.96433056449333e-06, "loss": 1.4686334133148193, "step": 1051 }, { "epoch": 1.4332425068119892, "grad_norm": 1.2338483333587646, "learning_rate": 3.946742824440929e-06, "loss": 1.3731441497802734, "step": 1052 }, { "epoch": 1.4346049046321525, "grad_norm": 1.781758427619934, "learning_rate": 3.929184588252315e-06, "loss": 1.440997838973999, "step": 1053 }, { "epoch": 1.435967302452316, "grad_norm": 1.7874432802200317, "learning_rate": 3.911655941507166e-06, "loss": 1.3115761280059814, "step": 1054 }, { "epoch": 1.4373297002724796, "grad_norm": 2.8415114879608154, "learning_rate": 3.894156969640944e-06, "loss": 1.352166771888733, "step": 1055 }, { "epoch": 1.438692098092643, "grad_norm": 14.496922492980957, "learning_rate": 3.8766877579444706e-06, "loss": 1.373366355895996, "step": 1056 }, { "epoch": 1.4400544959128065, "grad_norm": 3.388000726699829, "learning_rate": 3.859248391563515e-06, "loss": 0.939631998538971, "step": 1057 }, { "epoch": 1.44141689373297, "grad_norm": 2.6212575435638428, "learning_rate": 3.841838955498377e-06, "loss": 0.9753890037536621, "step": 1058 }, { "epoch": 1.4427792915531334, "grad_norm": 1.6447877883911133, "learning_rate": 3.824459534603482e-06, "loss": 1.7026125192642212, "step": 1059 }, { "epoch": 1.444141689373297, "grad_norm": 6.184053897857666, "learning_rate": 3.807110213586954e-06, "loss": 1.981075644493103, "step": 1060 }, { "epoch": 1.4455040871934606, "grad_norm": 4.163010120391846, "learning_rate": 3.78979107701021e-06, "loss": 1.752202033996582, "step": 1061 }, { "epoch": 1.446866485013624, "grad_norm": 2.6495954990386963, "learning_rate": 3.772502209287544e-06, "loss": 1.1788198947906494, "step": 1062 }, { "epoch": 1.4482288828337875, "grad_norm": 16.163951873779297, "learning_rate": 3.755243694685724e-06, "loss": 0.9336347579956055, "step": 1063 }, { "epoch": 1.449591280653951, "grad_norm": 1.7173141241073608, "learning_rate": 3.7380156173235694e-06, "loss": 1.4367519617080688, "step": 1064 }, { "epoch": 1.4509536784741144, "grad_norm": 1.7192779779434204, "learning_rate": 3.7208180611715515e-06, "loss": 1.4065064191818237, "step": 1065 }, { "epoch": 1.452316076294278, "grad_norm": 2.2915637493133545, "learning_rate": 3.703651110051378e-06, "loss": 1.074788212776184, "step": 1066 }, { "epoch": 1.4536784741144415, "grad_norm": 2.8422911167144775, "learning_rate": 3.686514847635586e-06, "loss": 1.3369567394256592, "step": 1067 }, { "epoch": 1.4550408719346049, "grad_norm": 3.9457411766052246, "learning_rate": 3.6694093574471355e-06, "loss": 1.371973991394043, "step": 1068 }, { "epoch": 1.4564032697547684, "grad_norm": 2.6377012729644775, "learning_rate": 3.652334722858999e-06, "loss": 1.6865668296813965, "step": 1069 }, { "epoch": 1.457765667574932, "grad_norm": 1.2006773948669434, "learning_rate": 3.6352910270937593e-06, "loss": 1.0094540119171143, "step": 1070 }, { "epoch": 1.4591280653950953, "grad_norm": 7.682157039642334, "learning_rate": 3.618278353223198e-06, "loss": 1.1168352365493774, "step": 1071 }, { "epoch": 1.4604904632152589, "grad_norm": 4.172733783721924, "learning_rate": 3.6012967841679047e-06, "loss": 1.2975666522979736, "step": 1072 }, { "epoch": 1.4618528610354224, "grad_norm": 1.3310295343399048, "learning_rate": 3.584346402696852e-06, "loss": 0.7914919853210449, "step": 1073 }, { "epoch": 1.4632152588555858, "grad_norm": 2.3812522888183594, "learning_rate": 3.567427291427007e-06, "loss": 1.313239336013794, "step": 1074 }, { "epoch": 1.4645776566757494, "grad_norm": 2.521183490753174, "learning_rate": 3.5505395328229242e-06, "loss": 0.44808727502822876, "step": 1075 }, { "epoch": 1.465940054495913, "grad_norm": 4.1098856925964355, "learning_rate": 3.5336832091963424e-06, "loss": 0.6801596283912659, "step": 1076 }, { "epoch": 1.4673024523160763, "grad_norm": 2.2154054641723633, "learning_rate": 3.516858402705787e-06, "loss": 1.2594823837280273, "step": 1077 }, { "epoch": 1.4686648501362398, "grad_norm": 3.4918437004089355, "learning_rate": 3.500065195356165e-06, "loss": 2.053623914718628, "step": 1078 }, { "epoch": 1.4700272479564034, "grad_norm": 6.079390525817871, "learning_rate": 3.48330366899837e-06, "loss": 1.1268949508666992, "step": 1079 }, { "epoch": 1.4713896457765667, "grad_norm": 3.833160400390625, "learning_rate": 3.4665739053288793e-06, "loss": 1.3519601821899414, "step": 1080 }, { "epoch": 1.4727520435967303, "grad_norm": 1.6956721544265747, "learning_rate": 3.4498759858893573e-06, "loss": 1.1343610286712646, "step": 1081 }, { "epoch": 1.4741144414168939, "grad_norm": 9.757513999938965, "learning_rate": 3.43320999206626e-06, "loss": 1.4104050397872925, "step": 1082 }, { "epoch": 1.4754768392370572, "grad_norm": 2.155021905899048, "learning_rate": 3.4165760050904294e-06, "loss": 1.0882716178894043, "step": 1083 }, { "epoch": 1.4768392370572208, "grad_norm": 2.3599016666412354, "learning_rate": 3.399974106036723e-06, "loss": 1.144540548324585, "step": 1084 }, { "epoch": 1.4782016348773843, "grad_norm": 1.9101898670196533, "learning_rate": 3.3834043758235722e-06, "loss": 1.1556837558746338, "step": 1085 }, { "epoch": 1.4795640326975477, "grad_norm": 1.2333734035491943, "learning_rate": 3.3668668952126427e-06, "loss": 1.3224241733551025, "step": 1086 }, { "epoch": 1.4809264305177112, "grad_norm": 2.284268379211426, "learning_rate": 3.350361744808399e-06, "loss": 0.5696076154708862, "step": 1087 }, { "epoch": 1.4822888283378748, "grad_norm": 1.2406128644943237, "learning_rate": 3.3338890050577297e-06, "loss": 0.9746992588043213, "step": 1088 }, { "epoch": 1.4836512261580381, "grad_norm": 2.5079779624938965, "learning_rate": 3.317448756249553e-06, "loss": 0.86826491355896, "step": 1089 }, { "epoch": 1.4850136239782017, "grad_norm": 3.001905679702759, "learning_rate": 3.3010410785144255e-06, "loss": 1.4917911291122437, "step": 1090 }, { "epoch": 1.4863760217983653, "grad_norm": 15.153319358825684, "learning_rate": 3.284666051824148e-06, "loss": 1.566753625869751, "step": 1091 }, { "epoch": 1.4877384196185286, "grad_norm": 5.444674968719482, "learning_rate": 3.2683237559913826e-06, "loss": 0.6633802056312561, "step": 1092 }, { "epoch": 1.4891008174386922, "grad_norm": 2.5570409297943115, "learning_rate": 3.252014270669258e-06, "loss": 0.9802032113075256, "step": 1093 }, { "epoch": 1.4904632152588557, "grad_norm": 1.1654329299926758, "learning_rate": 3.2357376753509793e-06, "loss": 0.727063775062561, "step": 1094 }, { "epoch": 1.491825613079019, "grad_norm": 3.9165854454040527, "learning_rate": 3.219494049369447e-06, "loss": 1.61658775806427, "step": 1095 }, { "epoch": 1.4931880108991826, "grad_norm": 2.6263420581817627, "learning_rate": 3.2032834718968776e-06, "loss": 1.4094042778015137, "step": 1096 }, { "epoch": 1.494550408719346, "grad_norm": 1.8918052911758423, "learning_rate": 3.187106021944383e-06, "loss": 0.95722496509552, "step": 1097 }, { "epoch": 1.4959128065395095, "grad_norm": 10.490915298461914, "learning_rate": 3.1709617783616384e-06, "loss": 1.5695228576660156, "step": 1098 }, { "epoch": 1.497275204359673, "grad_norm": 1.867085337638855, "learning_rate": 3.1548508198364434e-06, "loss": 0.850907564163208, "step": 1099 }, { "epoch": 1.4986376021798364, "grad_norm": 1.3141895532608032, "learning_rate": 3.138773224894387e-06, "loss": 1.2381501197814941, "step": 1100 }, { "epoch": 1.5, "grad_norm": 1.5976762771606445, "learning_rate": 3.1227290718984282e-06, "loss": 1.085966944694519, "step": 1101 }, { "epoch": 1.5013623978201633, "grad_norm": 1.466168999671936, "learning_rate": 3.106718439048535e-06, "loss": 1.4222763776779175, "step": 1102 }, { "epoch": 1.5027247956403271, "grad_norm": 2.671313762664795, "learning_rate": 3.0907414043812933e-06, "loss": 1.0105867385864258, "step": 1103 }, { "epoch": 1.5040871934604905, "grad_norm": 1.666020393371582, "learning_rate": 3.0747980457695313e-06, "loss": 1.1257190704345703, "step": 1104 }, { "epoch": 1.5054495912806538, "grad_norm": 1.2834885120391846, "learning_rate": 3.058888440921938e-06, "loss": 0.9231491684913635, "step": 1105 }, { "epoch": 1.5068119891008176, "grad_norm": 21.858219146728516, "learning_rate": 3.0430126673826842e-06, "loss": 0.9742729663848877, "step": 1106 }, { "epoch": 1.508174386920981, "grad_norm": 1.549538254737854, "learning_rate": 3.0271708025310454e-06, "loss": 0.9615423083305359, "step": 1107 }, { "epoch": 1.5095367847411443, "grad_norm": 5.9181389808654785, "learning_rate": 3.0113629235810336e-06, "loss": 1.434371829032898, "step": 1108 }, { "epoch": 1.510899182561308, "grad_norm": 3.585644245147705, "learning_rate": 2.9955891075809906e-06, "loss": 1.766754388809204, "step": 1109 }, { "epoch": 1.5122615803814714, "grad_norm": 3.883751392364502, "learning_rate": 2.9798494314132607e-06, "loss": 1.3567906618118286, "step": 1110 }, { "epoch": 1.5136239782016347, "grad_norm": 1.699000358581543, "learning_rate": 2.9641439717937614e-06, "loss": 0.7791076898574829, "step": 1111 }, { "epoch": 1.5149863760217985, "grad_norm": 2.797952175140381, "learning_rate": 2.9484728052716614e-06, "loss": 1.2537765502929688, "step": 1112 }, { "epoch": 1.5163487738419619, "grad_norm": 3.6956400871276855, "learning_rate": 2.9328360082289685e-06, "loss": 1.0309852361679077, "step": 1113 }, { "epoch": 1.5177111716621252, "grad_norm": 1.9102109670639038, "learning_rate": 2.9172336568801785e-06, "loss": 0.8841735124588013, "step": 1114 }, { "epoch": 1.5190735694822888, "grad_norm": 2.2511720657348633, "learning_rate": 2.9016658272718924e-06, "loss": 0.7262409925460815, "step": 1115 }, { "epoch": 1.5204359673024523, "grad_norm": 1.7945059537887573, "learning_rate": 2.8861325952824537e-06, "loss": 1.0946037769317627, "step": 1116 }, { "epoch": 1.5217983651226157, "grad_norm": 1.6287676095962524, "learning_rate": 2.870634036621577e-06, "loss": 0.8504898548126221, "step": 1117 }, { "epoch": 1.5231607629427792, "grad_norm": 1.7399216890335083, "learning_rate": 2.855170226829973e-06, "loss": 0.8773958086967468, "step": 1118 }, { "epoch": 1.5245231607629428, "grad_norm": 10.414885520935059, "learning_rate": 2.839741241278985e-06, "loss": 1.049332618713379, "step": 1119 }, { "epoch": 1.5258855585831061, "grad_norm": 2.1847877502441406, "learning_rate": 2.8243471551702327e-06, "loss": 0.6023992896080017, "step": 1120 }, { "epoch": 1.5272479564032697, "grad_norm": 2.2924256324768066, "learning_rate": 2.8089880435352122e-06, "loss": 1.0050101280212402, "step": 1121 }, { "epoch": 1.5286103542234333, "grad_norm": 1.0029001235961914, "learning_rate": 2.7936639812349785e-06, "loss": 0.8349490761756897, "step": 1122 }, { "epoch": 1.5299727520435966, "grad_norm": 1.8599648475646973, "learning_rate": 2.778375042959729e-06, "loss": 1.237276315689087, "step": 1123 }, { "epoch": 1.5313351498637602, "grad_norm": 1.8701823949813843, "learning_rate": 2.7631213032284843e-06, "loss": 1.4086709022521973, "step": 1124 }, { "epoch": 1.5326975476839237, "grad_norm": 3.593202829360962, "learning_rate": 2.747902836388697e-06, "loss": 1.5341556072235107, "step": 1125 }, { "epoch": 1.534059945504087, "grad_norm": 1.1355699300765991, "learning_rate": 2.732719716615898e-06, "loss": 0.8650002479553223, "step": 1126 }, { "epoch": 1.5354223433242506, "grad_norm": 26.999711990356445, "learning_rate": 2.717572017913338e-06, "loss": 0.9282069206237793, "step": 1127 }, { "epoch": 1.5367847411444142, "grad_norm": 2.603379249572754, "learning_rate": 2.702459814111621e-06, "loss": 1.380162239074707, "step": 1128 }, { "epoch": 1.5381471389645776, "grad_norm": 2.976790189743042, "learning_rate": 2.687383178868348e-06, "loss": 1.319352626800537, "step": 1129 }, { "epoch": 1.5395095367847411, "grad_norm": 0.9751675128936768, "learning_rate": 2.672342185667758e-06, "loss": 0.8718571662902832, "step": 1130 }, { "epoch": 1.5408719346049047, "grad_norm": 3.9789717197418213, "learning_rate": 2.6573369078203646e-06, "loss": 1.0572667121887207, "step": 1131 }, { "epoch": 1.542234332425068, "grad_norm": 1.691832423210144, "learning_rate": 2.6423674184626147e-06, "loss": 1.0265076160430908, "step": 1132 }, { "epoch": 1.5435967302452316, "grad_norm": 4.066219329833984, "learning_rate": 2.6274337905565028e-06, "loss": 1.5856740474700928, "step": 1133 }, { "epoch": 1.5449591280653951, "grad_norm": 2.8472390174865723, "learning_rate": 2.6125360968892534e-06, "loss": 1.0180156230926514, "step": 1134 }, { "epoch": 1.5463215258855585, "grad_norm": 1.6441315412521362, "learning_rate": 2.597674410072922e-06, "loss": 0.7192747592926025, "step": 1135 }, { "epoch": 1.547683923705722, "grad_norm": 2.2804887294769287, "learning_rate": 2.582848802544088e-06, "loss": 0.6259332895278931, "step": 1136 }, { "epoch": 1.5490463215258856, "grad_norm": 5.335353851318359, "learning_rate": 2.5680593465634664e-06, "loss": 1.1395621299743652, "step": 1137 }, { "epoch": 1.550408719346049, "grad_norm": 2.306872606277466, "learning_rate": 2.553306114215568e-06, "loss": 0.9148708581924438, "step": 1138 }, { "epoch": 1.5517711171662125, "grad_norm": 6.428441047668457, "learning_rate": 2.5385891774083526e-06, "loss": 1.2786940336227417, "step": 1139 }, { "epoch": 1.553133514986376, "grad_norm": 3.494655132293701, "learning_rate": 2.523908607872868e-06, "loss": 1.2934033870697021, "step": 1140 }, { "epoch": 1.5544959128065394, "grad_norm": 2.5798754692077637, "learning_rate": 2.509264477162913e-06, "loss": 1.2517269849777222, "step": 1141 }, { "epoch": 1.555858310626703, "grad_norm": 5.731953144073486, "learning_rate": 2.4946568566546745e-06, "loss": 1.6838423013687134, "step": 1142 }, { "epoch": 1.5572207084468666, "grad_norm": 2.506864309310913, "learning_rate": 2.4800858175463903e-06, "loss": 0.7714192867279053, "step": 1143 }, { "epoch": 1.55858310626703, "grad_norm": 1.3156863451004028, "learning_rate": 2.4655514308580075e-06, "loss": 1.2057914733886719, "step": 1144 }, { "epoch": 1.5599455040871935, "grad_norm": 6.833941459655762, "learning_rate": 2.4510537674308067e-06, "loss": 0.9790871143341064, "step": 1145 }, { "epoch": 1.561307901907357, "grad_norm": 6.052799701690674, "learning_rate": 2.4365928979271005e-06, "loss": 1.1605284214019775, "step": 1146 }, { "epoch": 1.5626702997275204, "grad_norm": 2.603996753692627, "learning_rate": 2.422168892829846e-06, "loss": 0.9519481658935547, "step": 1147 }, { "epoch": 1.564032697547684, "grad_norm": 3.9207992553710938, "learning_rate": 2.40778182244234e-06, "loss": 1.5080692768096924, "step": 1148 }, { "epoch": 1.5653950953678475, "grad_norm": 6.154255390167236, "learning_rate": 2.393431756887843e-06, "loss": 1.4910547733306885, "step": 1149 }, { "epoch": 1.5667574931880108, "grad_norm": 1.4703644514083862, "learning_rate": 2.3791187661092616e-06, "loss": 1.118145227432251, "step": 1150 }, { "epoch": 1.5681198910081744, "grad_norm": 2.3339834213256836, "learning_rate": 2.3648429198687917e-06, "loss": 0.9044252634048462, "step": 1151 }, { "epoch": 1.569482288828338, "grad_norm": 1.1632272005081177, "learning_rate": 2.35060428774759e-06, "loss": 1.218314528465271, "step": 1152 }, { "epoch": 1.5708446866485013, "grad_norm": 1.948854923248291, "learning_rate": 2.3364029391454258e-06, "loss": 1.4576001167297363, "step": 1153 }, { "epoch": 1.5722070844686649, "grad_norm": 5.581846714019775, "learning_rate": 2.3222389432803504e-06, "loss": 1.547105073928833, "step": 1154 }, { "epoch": 1.5735694822888284, "grad_norm": 2.613879442214966, "learning_rate": 2.30811236918835e-06, "loss": 1.5422954559326172, "step": 1155 }, { "epoch": 1.5749318801089918, "grad_norm": 23.022436141967773, "learning_rate": 2.2940232857230282e-06, "loss": 0.6320388317108154, "step": 1156 }, { "epoch": 1.5762942779291553, "grad_norm": 3.3834846019744873, "learning_rate": 2.2799717615552387e-06, "loss": 0.9893863201141357, "step": 1157 }, { "epoch": 1.577656675749319, "grad_norm": 2.2176966667175293, "learning_rate": 2.26595786517279e-06, "loss": 1.2366623878479004, "step": 1158 }, { "epoch": 1.5790190735694822, "grad_norm": 1.6523469686508179, "learning_rate": 2.2519816648800687e-06, "loss": 1.2844536304473877, "step": 1159 }, { "epoch": 1.5803814713896458, "grad_norm": 4.447011470794678, "learning_rate": 2.2380432287977493e-06, "loss": 1.2803096771240234, "step": 1160 }, { "epoch": 1.5817438692098094, "grad_norm": 3.4576101303100586, "learning_rate": 2.224142624862432e-06, "loss": 1.618638515472412, "step": 1161 }, { "epoch": 1.5831062670299727, "grad_norm": 2.0708632469177246, "learning_rate": 2.2102799208263203e-06, "loss": 1.3221620321273804, "step": 1162 }, { "epoch": 1.5844686648501363, "grad_norm": 1.793702483177185, "learning_rate": 2.196455184256896e-06, "loss": 1.7202945947647095, "step": 1163 }, { "epoch": 1.5858310626702998, "grad_norm": 6.116926670074463, "learning_rate": 2.182668482536582e-06, "loss": 0.9838886857032776, "step": 1164 }, { "epoch": 1.5871934604904632, "grad_norm": 11.744044303894043, "learning_rate": 2.16891988286242e-06, "loss": 1.3922226428985596, "step": 1165 }, { "epoch": 1.5885558583106267, "grad_norm": 5.308543682098389, "learning_rate": 2.1552094522457388e-06, "loss": 1.0825259685516357, "step": 1166 }, { "epoch": 1.5899182561307903, "grad_norm": 4.146827220916748, "learning_rate": 2.141537257511828e-06, "loss": 1.7724759578704834, "step": 1167 }, { "epoch": 1.5912806539509536, "grad_norm": 2.0778002738952637, "learning_rate": 2.127903365299625e-06, "loss": 1.2583365440368652, "step": 1168 }, { "epoch": 1.5926430517711172, "grad_norm": 2.813443899154663, "learning_rate": 2.114307842061357e-06, "loss": 1.085511565208435, "step": 1169 }, { "epoch": 1.5940054495912808, "grad_norm": 1.3059700727462769, "learning_rate": 2.100750754062264e-06, "loss": 1.2720537185668945, "step": 1170 }, { "epoch": 1.595367847411444, "grad_norm": 2.376267910003662, "learning_rate": 2.0872321673802276e-06, "loss": 0.7001886963844299, "step": 1171 }, { "epoch": 1.5967302452316077, "grad_norm": 2.9999887943267822, "learning_rate": 2.073752147905491e-06, "loss": 0.9852319955825806, "step": 1172 }, { "epoch": 1.5980926430517712, "grad_norm": 2.2130115032196045, "learning_rate": 2.0603107613403094e-06, "loss": 1.0594502687454224, "step": 1173 }, { "epoch": 1.5994550408719346, "grad_norm": 3.7265443801879883, "learning_rate": 2.04690807319864e-06, "loss": 0.6252469420433044, "step": 1174 }, { "epoch": 1.6008174386920981, "grad_norm": 1.8262763023376465, "learning_rate": 2.0335441488058237e-06, "loss": 0.8215498924255371, "step": 1175 }, { "epoch": 1.6021798365122617, "grad_norm": 3.5439608097076416, "learning_rate": 2.0202190532982646e-06, "loss": 1.328934669494629, "step": 1176 }, { "epoch": 1.603542234332425, "grad_norm": 1.5581464767456055, "learning_rate": 2.0069328516231122e-06, "loss": 1.2998509407043457, "step": 1177 }, { "epoch": 1.6049046321525886, "grad_norm": 1.8512955904006958, "learning_rate": 1.9936856085379476e-06, "loss": 1.0403721332550049, "step": 1178 }, { "epoch": 1.6062670299727522, "grad_norm": 1.7699447870254517, "learning_rate": 1.98047738861046e-06, "loss": 0.8727090358734131, "step": 1179 }, { "epoch": 1.6076294277929155, "grad_norm": 1.0640281438827515, "learning_rate": 1.9673082562181513e-06, "loss": 0.6367319822311401, "step": 1180 }, { "epoch": 1.608991825613079, "grad_norm": 3.124621868133545, "learning_rate": 1.95417827554799e-06, "loss": 1.304957389831543, "step": 1181 }, { "epoch": 1.6103542234332426, "grad_norm": 6.237905979156494, "learning_rate": 1.941087510596138e-06, "loss": 0.8036034107208252, "step": 1182 }, { "epoch": 1.611716621253406, "grad_norm": 3.805401086807251, "learning_rate": 1.9280360251675945e-06, "loss": 1.3796262741088867, "step": 1183 }, { "epoch": 1.6130790190735693, "grad_norm": 3.8630027770996094, "learning_rate": 1.9150238828759315e-06, "loss": 1.266556978225708, "step": 1184 }, { "epoch": 1.614441416893733, "grad_norm": 2.8576297760009766, "learning_rate": 1.902051147142947e-06, "loss": 1.4903972148895264, "step": 1185 }, { "epoch": 1.6158038147138964, "grad_norm": 8.5491943359375, "learning_rate": 1.8891178811983724e-06, "loss": 1.311509132385254, "step": 1186 }, { "epoch": 1.6171662125340598, "grad_norm": 6.1031599044799805, "learning_rate": 1.8762241480795619e-06, "loss": 1.5847245454788208, "step": 1187 }, { "epoch": 1.6185286103542236, "grad_norm": 3.5766241550445557, "learning_rate": 1.8633700106311847e-06, "loss": 1.050424575805664, "step": 1188 }, { "epoch": 1.619891008174387, "grad_norm": 2.25299334526062, "learning_rate": 1.8505555315049196e-06, "loss": 1.1873726844787598, "step": 1189 }, { "epoch": 1.6212534059945503, "grad_norm": 3.324154853820801, "learning_rate": 1.837780773159148e-06, "loss": 1.5122402906417847, "step": 1190 }, { "epoch": 1.622615803814714, "grad_norm": 1.2781391143798828, "learning_rate": 1.8250457978586467e-06, "loss": 0.6869091987609863, "step": 1191 }, { "epoch": 1.6239782016348774, "grad_norm": 8.637799263000488, "learning_rate": 1.8123506676743018e-06, "loss": 1.4809160232543945, "step": 1192 }, { "epoch": 1.6253405994550407, "grad_norm": 2.572072982788086, "learning_rate": 1.7996954444827708e-06, "loss": 0.48086151480674744, "step": 1193 }, { "epoch": 1.6267029972752045, "grad_norm": 2.435760974884033, "learning_rate": 1.787080189966225e-06, "loss": 0.8215544819831848, "step": 1194 }, { "epoch": 1.6280653950953679, "grad_norm": 1.833471417427063, "learning_rate": 1.774504965612006e-06, "loss": 1.053711175918579, "step": 1195 }, { "epoch": 1.6294277929155312, "grad_norm": 5.049961566925049, "learning_rate": 1.7619698327123624e-06, "loss": 0.5934720039367676, "step": 1196 }, { "epoch": 1.630790190735695, "grad_norm": 2.670727491378784, "learning_rate": 1.749474852364128e-06, "loss": 1.3046576976776123, "step": 1197 }, { "epoch": 1.6321525885558583, "grad_norm": 5.478054523468018, "learning_rate": 1.7370200854684306e-06, "loss": 1.6333255767822266, "step": 1198 }, { "epoch": 1.6335149863760217, "grad_norm": 1.7159885168075562, "learning_rate": 1.7246055927303996e-06, "loss": 1.1651134490966797, "step": 1199 }, { "epoch": 1.6348773841961854, "grad_norm": 4.214008808135986, "learning_rate": 1.712231434658861e-06, "loss": 1.0489068031311035, "step": 1200 }, { "epoch": 1.6362397820163488, "grad_norm": 2.169602155685425, "learning_rate": 1.6998976715660532e-06, "loss": 0.5404323935508728, "step": 1201 }, { "epoch": 1.6376021798365121, "grad_norm": 2.4918267726898193, "learning_rate": 1.6876043635673234e-06, "loss": 1.3892674446105957, "step": 1202 }, { "epoch": 1.638964577656676, "grad_norm": 3.7561285495758057, "learning_rate": 1.6753515705808388e-06, "loss": 0.882109522819519, "step": 1203 }, { "epoch": 1.6403269754768393, "grad_norm": 1.9735994338989258, "learning_rate": 1.663139352327302e-06, "loss": 1.4199323654174805, "step": 1204 }, { "epoch": 1.6416893732970026, "grad_norm": 7.301559925079346, "learning_rate": 1.6509677683296376e-06, "loss": 1.678884506225586, "step": 1205 }, { "epoch": 1.6430517711171662, "grad_norm": 1.555078387260437, "learning_rate": 1.6388368779127362e-06, "loss": 1.175729513168335, "step": 1206 }, { "epoch": 1.6444141689373297, "grad_norm": 42.597023010253906, "learning_rate": 1.6267467402031246e-06, "loss": 0.9902324676513672, "step": 1207 }, { "epoch": 1.645776566757493, "grad_norm": 3.790900230407715, "learning_rate": 1.6146974141287176e-06, "loss": 1.1711788177490234, "step": 1208 }, { "epoch": 1.6471389645776566, "grad_norm": 2.251699924468994, "learning_rate": 1.6026889584185013e-06, "loss": 1.9310262203216553, "step": 1209 }, { "epoch": 1.6485013623978202, "grad_norm": 1.9188026189804077, "learning_rate": 1.5907214316022622e-06, "loss": 0.7175809144973755, "step": 1210 }, { "epoch": 1.6498637602179835, "grad_norm": 14.373571395874023, "learning_rate": 1.5787948920102948e-06, "loss": 1.0440547466278076, "step": 1211 }, { "epoch": 1.651226158038147, "grad_norm": 11.796160697937012, "learning_rate": 1.566909397773121e-06, "loss": 1.3627511262893677, "step": 1212 }, { "epoch": 1.6525885558583107, "grad_norm": 2.9357059001922607, "learning_rate": 1.5550650068212049e-06, "loss": 1.0608046054840088, "step": 1213 }, { "epoch": 1.653950953678474, "grad_norm": 3.347792625427246, "learning_rate": 1.543261776884677e-06, "loss": 1.0982666015625, "step": 1214 }, { "epoch": 1.6553133514986376, "grad_norm": 1.301390290260315, "learning_rate": 1.5314997654930352e-06, "loss": 0.6796656847000122, "step": 1215 }, { "epoch": 1.6566757493188011, "grad_norm": 6.237978935241699, "learning_rate": 1.5197790299748906e-06, "loss": 1.4878320693969727, "step": 1216 }, { "epoch": 1.6580381471389645, "grad_norm": 10.183542251586914, "learning_rate": 1.5080996274576587e-06, "loss": 0.898789644241333, "step": 1217 }, { "epoch": 1.659400544959128, "grad_norm": 2.8204987049102783, "learning_rate": 1.4964616148673116e-06, "loss": 1.1220042705535889, "step": 1218 }, { "epoch": 1.6607629427792916, "grad_norm": 9.805167198181152, "learning_rate": 1.484865048928068e-06, "loss": 1.274973750114441, "step": 1219 }, { "epoch": 1.662125340599455, "grad_norm": 1.6627198457717896, "learning_rate": 1.4733099861621502e-06, "loss": 1.1474993228912354, "step": 1220 }, { "epoch": 1.6634877384196185, "grad_norm": 2.7306973934173584, "learning_rate": 1.4617964828894814e-06, "loss": 0.671380877494812, "step": 1221 }, { "epoch": 1.664850136239782, "grad_norm": 4.862827301025391, "learning_rate": 1.4503245952274259e-06, "loss": 2.21177077293396, "step": 1222 }, { "epoch": 1.6662125340599454, "grad_norm": 47.44179153442383, "learning_rate": 1.4388943790905096e-06, "loss": 2.0054268836975098, "step": 1223 }, { "epoch": 1.667574931880109, "grad_norm": 2.8651211261749268, "learning_rate": 1.4275058901901518e-06, "loss": 1.3266336917877197, "step": 1224 }, { "epoch": 1.6689373297002725, "grad_norm": 7.409352779388428, "learning_rate": 1.4161591840343868e-06, "loss": 1.0433272123336792, "step": 1225 }, { "epoch": 1.6702997275204359, "grad_norm": 1.8390476703643799, "learning_rate": 1.404854315927604e-06, "loss": 1.3717821836471558, "step": 1226 }, { "epoch": 1.6716621253405994, "grad_norm": 2.777256727218628, "learning_rate": 1.393591340970266e-06, "loss": 0.48991456627845764, "step": 1227 }, { "epoch": 1.673024523160763, "grad_norm": 3.158966302871704, "learning_rate": 1.3823703140586543e-06, "loss": 1.0712467432022095, "step": 1228 }, { "epoch": 1.6743869209809263, "grad_norm": 1.4960707426071167, "learning_rate": 1.3711912898845814e-06, "loss": 1.2607753276824951, "step": 1229 }, { "epoch": 1.67574931880109, "grad_norm": 1.7990456819534302, "learning_rate": 1.3600543229351526e-06, "loss": 1.3402117490768433, "step": 1230 }, { "epoch": 1.6771117166212535, "grad_norm": 1.9410388469696045, "learning_rate": 1.3489594674924645e-06, "loss": 1.0337798595428467, "step": 1231 }, { "epoch": 1.6784741144414168, "grad_norm": 1.439047932624817, "learning_rate": 1.3379067776333787e-06, "loss": 1.0162678956985474, "step": 1232 }, { "epoch": 1.6798365122615804, "grad_norm": 2.6545941829681396, "learning_rate": 1.3268963072292306e-06, "loss": 0.47784119844436646, "step": 1233 }, { "epoch": 1.681198910081744, "grad_norm": 3.106361150741577, "learning_rate": 1.3159281099455767e-06, "loss": 1.4583879709243774, "step": 1234 }, { "epoch": 1.6825613079019073, "grad_norm": 5.069444179534912, "learning_rate": 1.3050022392419314e-06, "loss": 1.0929278135299683, "step": 1235 }, { "epoch": 1.6839237057220708, "grad_norm": 6.703405380249023, "learning_rate": 1.2941187483715123e-06, "loss": 0.9775675535202026, "step": 1236 }, { "epoch": 1.6852861035422344, "grad_norm": 2.575490951538086, "learning_rate": 1.2832776903809696e-06, "loss": 1.4463517665863037, "step": 1237 }, { "epoch": 1.6866485013623977, "grad_norm": 1.8293417692184448, "learning_rate": 1.272479118110137e-06, "loss": 1.0405806303024292, "step": 1238 }, { "epoch": 1.6880108991825613, "grad_norm": 10.825261116027832, "learning_rate": 1.2617230841917693e-06, "loss": 1.1845273971557617, "step": 1239 }, { "epoch": 1.6893732970027249, "grad_norm": 1.4789223670959473, "learning_rate": 1.2510096410512894e-06, "loss": 1.1628018617630005, "step": 1240 }, { "epoch": 1.6907356948228882, "grad_norm": 2.729337453842163, "learning_rate": 1.2403388409065276e-06, "loss": 1.310196042060852, "step": 1241 }, { "epoch": 1.6920980926430518, "grad_norm": 1.7597092390060425, "learning_rate": 1.2297107357674776e-06, "loss": 1.2804837226867676, "step": 1242 }, { "epoch": 1.6934604904632153, "grad_norm": 2.3652966022491455, "learning_rate": 1.2191253774360212e-06, "loss": 1.0036711692810059, "step": 1243 }, { "epoch": 1.6948228882833787, "grad_norm": 4.860243320465088, "learning_rate": 1.2085828175057058e-06, "loss": 0.9060076475143433, "step": 1244 }, { "epoch": 1.6961852861035422, "grad_norm": 2.292149543762207, "learning_rate": 1.1980831073614697e-06, "loss": 0.843977689743042, "step": 1245 }, { "epoch": 1.6975476839237058, "grad_norm": 1.974859595298767, "learning_rate": 1.1876262981793984e-06, "loss": 1.0323615074157715, "step": 1246 }, { "epoch": 1.6989100817438691, "grad_norm": 8.679472923278809, "learning_rate": 1.1772124409264784e-06, "loss": 1.273465633392334, "step": 1247 }, { "epoch": 1.7002724795640327, "grad_norm": 2.4746406078338623, "learning_rate": 1.1668415863603465e-06, "loss": 0.9936800003051758, "step": 1248 }, { "epoch": 1.7016348773841963, "grad_norm": 2.0481019020080566, "learning_rate": 1.156513785029042e-06, "loss": 1.308429479598999, "step": 1249 }, { "epoch": 1.7029972752043596, "grad_norm": 2.0081405639648438, "learning_rate": 1.1462290872707593e-06, "loss": 0.8582834601402283, "step": 1250 }, { "epoch": 1.7043596730245232, "grad_norm": 4.233523368835449, "learning_rate": 1.1359875432136059e-06, "loss": 1.1465060710906982, "step": 1251 }, { "epoch": 1.7057220708446867, "grad_norm": 3.7764577865600586, "learning_rate": 1.1257892027753558e-06, "loss": 1.3240163326263428, "step": 1252 }, { "epoch": 1.70708446866485, "grad_norm": 2.468902587890625, "learning_rate": 1.1156341156632034e-06, "loss": 1.854172945022583, "step": 1253 }, { "epoch": 1.7084468664850136, "grad_norm": 1.1863161325454712, "learning_rate": 1.1055223313735331e-06, "loss": 0.9262730479240417, "step": 1254 }, { "epoch": 1.7098092643051772, "grad_norm": 2.5537664890289307, "learning_rate": 1.0954538991916553e-06, "loss": 0.985627293586731, "step": 1255 }, { "epoch": 1.7111716621253406, "grad_norm": 2.2362513542175293, "learning_rate": 1.0854288681915937e-06, "loss": 0.8979532122612, "step": 1256 }, { "epoch": 1.7125340599455041, "grad_norm": 1.5585750341415405, "learning_rate": 1.0754472872358246e-06, "loss": 0.8770595192909241, "step": 1257 }, { "epoch": 1.7138964577656677, "grad_norm": 1.5150623321533203, "learning_rate": 1.0655092049750505e-06, "loss": 0.5905646085739136, "step": 1258 }, { "epoch": 1.715258855585831, "grad_norm": 6.906311988830566, "learning_rate": 1.0556146698479552e-06, "loss": 1.0420033931732178, "step": 1259 }, { "epoch": 1.7166212534059946, "grad_norm": 2.936511993408203, "learning_rate": 1.0457637300809765e-06, "loss": 1.2584857940673828, "step": 1260 }, { "epoch": 1.7179836512261581, "grad_norm": 4.697121620178223, "learning_rate": 1.0359564336880634e-06, "loss": 1.0962363481521606, "step": 1261 }, { "epoch": 1.7193460490463215, "grad_norm": 10.174349784851074, "learning_rate": 1.0261928284704447e-06, "loss": 0.9929618835449219, "step": 1262 }, { "epoch": 1.720708446866485, "grad_norm": 3.934086799621582, "learning_rate": 1.0164729620163982e-06, "loss": 1.1250343322753906, "step": 1263 }, { "epoch": 1.7220708446866486, "grad_norm": 1.8640360832214355, "learning_rate": 1.0067968817010166e-06, "loss": 0.8165608644485474, "step": 1264 }, { "epoch": 1.723433242506812, "grad_norm": 6.103856086730957, "learning_rate": 9.971646346859743e-07, "loss": 1.629396677017212, "step": 1265 }, { "epoch": 1.7247956403269755, "grad_norm": 2.3325653076171875, "learning_rate": 9.875762679193036e-07, "loss": 1.046817421913147, "step": 1266 }, { "epoch": 1.726158038147139, "grad_norm": 3.1879310607910156, "learning_rate": 9.780318281351575e-07, "loss": 1.217057466506958, "step": 1267 }, { "epoch": 1.7275204359673024, "grad_norm": 1.412672758102417, "learning_rate": 9.685313618535952e-07, "loss": 0.36086416244506836, "step": 1268 }, { "epoch": 1.728882833787466, "grad_norm": 2.2028894424438477, "learning_rate": 9.590749153803392e-07, "loss": 1.277956485748291, "step": 1269 }, { "epoch": 1.7302452316076296, "grad_norm": 9.052053451538086, "learning_rate": 9.496625348065603e-07, "loss": 1.9809240102767944, "step": 1270 }, { "epoch": 1.731607629427793, "grad_norm": 7.14845609664917, "learning_rate": 9.402942660086501e-07, "loss": 1.1679346561431885, "step": 1271 }, { "epoch": 1.7329700272479565, "grad_norm": 1.210813283920288, "learning_rate": 9.309701546479965e-07, "loss": 0.894662082195282, "step": 1272 }, { "epoch": 1.73433242506812, "grad_norm": 2.3145592212677, "learning_rate": 9.216902461707633e-07, "loss": 0.7624841332435608, "step": 1273 }, { "epoch": 1.7356948228882834, "grad_norm": 1.8255223035812378, "learning_rate": 9.12454585807665e-07, "loss": 1.1449463367462158, "step": 1274 }, { "epoch": 1.7370572207084467, "grad_norm": 1.5149394273757935, "learning_rate": 9.0326321857375e-07, "loss": 0.9642457365989685, "step": 1275 }, { "epoch": 1.7384196185286105, "grad_norm": 2.8881890773773193, "learning_rate": 8.941161892681815e-07, "loss": 1.4664623737335205, "step": 1276 }, { "epoch": 1.7397820163487738, "grad_norm": 2.1704492568969727, "learning_rate": 8.850135424740148e-07, "loss": 0.9321557283401489, "step": 1277 }, { "epoch": 1.7411444141689372, "grad_norm": 39.091793060302734, "learning_rate": 8.759553225579842e-07, "loss": 1.009666919708252, "step": 1278 }, { "epoch": 1.742506811989101, "grad_norm": 1.8131619691848755, "learning_rate": 8.669415736702846e-07, "loss": 0.992411732673645, "step": 1279 }, { "epoch": 1.7438692098092643, "grad_norm": 4.261100769042969, "learning_rate": 8.579723397443573e-07, "loss": 2.001893997192383, "step": 1280 }, { "epoch": 1.7452316076294276, "grad_norm": 1.1363927125930786, "learning_rate": 8.490476644966772e-07, "loss": 1.0667598247528076, "step": 1281 }, { "epoch": 1.7465940054495914, "grad_norm": 1.279687523841858, "learning_rate": 8.401675914265351e-07, "loss": 0.9579074382781982, "step": 1282 }, { "epoch": 1.7479564032697548, "grad_norm": 4.5116448402404785, "learning_rate": 8.313321638158312e-07, "loss": 1.451935052871704, "step": 1283 }, { "epoch": 1.749318801089918, "grad_norm": 8.18640422821045, "learning_rate": 8.225414247288599e-07, "loss": 1.3976483345031738, "step": 1284 }, { "epoch": 1.750681198910082, "grad_norm": 6.128304958343506, "learning_rate": 8.13795417012102e-07, "loss": 1.2727890014648438, "step": 1285 }, { "epoch": 1.7520435967302452, "grad_norm": 2.4841420650482178, "learning_rate": 8.050941832940163e-07, "loss": 1.1768107414245605, "step": 1286 }, { "epoch": 1.7534059945504086, "grad_norm": 3.0823144912719727, "learning_rate": 7.9643776598483e-07, "loss": 2.2902956008911133, "step": 1287 }, { "epoch": 1.7547683923705724, "grad_norm": 1.4804415702819824, "learning_rate": 7.878262072763332e-07, "loss": 0.8634886741638184, "step": 1288 }, { "epoch": 1.7561307901907357, "grad_norm": 1.2262098789215088, "learning_rate": 7.792595491416733e-07, "loss": 0.531088650226593, "step": 1289 }, { "epoch": 1.757493188010899, "grad_norm": 4.33739709854126, "learning_rate": 7.707378333351489e-07, "loss": 1.929915428161621, "step": 1290 }, { "epoch": 1.7588555858310628, "grad_norm": 1.6030082702636719, "learning_rate": 7.622611013920111e-07, "loss": 0.8317304849624634, "step": 1291 }, { "epoch": 1.7602179836512262, "grad_norm": 4.488954067230225, "learning_rate": 7.538293946282527e-07, "loss": 1.3098036050796509, "step": 1292 }, { "epoch": 1.7615803814713895, "grad_norm": 11.2507963180542, "learning_rate": 7.454427541404175e-07, "loss": 1.1017807722091675, "step": 1293 }, { "epoch": 1.7629427792915533, "grad_norm": 21.846887588500977, "learning_rate": 7.371012208053862e-07, "loss": 1.1967116594314575, "step": 1294 }, { "epoch": 1.7643051771117166, "grad_norm": 3.301680088043213, "learning_rate": 7.288048352801924e-07, "loss": 1.3514041900634766, "step": 1295 }, { "epoch": 1.76566757493188, "grad_norm": 1.4868799448013306, "learning_rate": 7.205536380018152e-07, "loss": 0.882289469242096, "step": 1296 }, { "epoch": 1.7670299727520435, "grad_norm": 2.3711631298065186, "learning_rate": 7.123476691869802e-07, "loss": 1.3820436000823975, "step": 1297 }, { "epoch": 1.768392370572207, "grad_norm": 7.22926664352417, "learning_rate": 7.041869688319725e-07, "loss": 0.9773348569869995, "step": 1298 }, { "epoch": 1.7697547683923704, "grad_norm": 2.936023235321045, "learning_rate": 6.960715767124337e-07, "loss": 1.6220792531967163, "step": 1299 }, { "epoch": 1.771117166212534, "grad_norm": 2.063976526260376, "learning_rate": 6.880015323831712e-07, "loss": 0.992411196231842, "step": 1300 }, { "epoch": 1.7724795640326976, "grad_norm": 3.171597719192505, "learning_rate": 6.799768751779667e-07, "loss": 0.7885507345199585, "step": 1301 }, { "epoch": 1.773841961852861, "grad_norm": 2.890035629272461, "learning_rate": 6.719976442093789e-07, "loss": 1.6067891120910645, "step": 1302 }, { "epoch": 1.7752043596730245, "grad_norm": 2.438349485397339, "learning_rate": 6.640638783685626e-07, "loss": 1.0840799808502197, "step": 1303 }, { "epoch": 1.776566757493188, "grad_norm": 4.774412155151367, "learning_rate": 6.561756163250666e-07, "loss": 1.2508540153503418, "step": 1304 }, { "epoch": 1.7779291553133514, "grad_norm": 1.7611292600631714, "learning_rate": 6.483328965266622e-07, "loss": 0.7288346290588379, "step": 1305 }, { "epoch": 1.779291553133515, "grad_norm": 6.026714324951172, "learning_rate": 6.40535757199131e-07, "loss": 1.0707077980041504, "step": 1306 }, { "epoch": 1.7806539509536785, "grad_norm": 2.0233771800994873, "learning_rate": 6.3278423634611e-07, "loss": 0.4874130189418793, "step": 1307 }, { "epoch": 1.7820163487738419, "grad_norm": 2.7655234336853027, "learning_rate": 6.250783717488729e-07, "loss": 0.4931323528289795, "step": 1308 }, { "epoch": 1.7833787465940054, "grad_norm": 4.192293643951416, "learning_rate": 6.174182009661767e-07, "loss": 1.5189175605773926, "step": 1309 }, { "epoch": 1.784741144414169, "grad_norm": 3.8415169715881348, "learning_rate": 6.098037613340568e-07, "loss": 1.2205891609191895, "step": 1310 }, { "epoch": 1.7861035422343323, "grad_norm": 2.1763052940368652, "learning_rate": 6.022350899656537e-07, "loss": 1.3215999603271484, "step": 1311 }, { "epoch": 1.7874659400544959, "grad_norm": 5.387167453765869, "learning_rate": 5.947122237510339e-07, "loss": 1.3907097578048706, "step": 1312 }, { "epoch": 1.7888283378746594, "grad_norm": 6.413577079772949, "learning_rate": 5.872351993570036e-07, "loss": 1.1078770160675049, "step": 1313 }, { "epoch": 1.7901907356948228, "grad_norm": 1.9668219089508057, "learning_rate": 5.798040532269377e-07, "loss": 1.1617542505264282, "step": 1314 }, { "epoch": 1.7915531335149864, "grad_norm": 4.747009754180908, "learning_rate": 5.724188215805948e-07, "loss": 1.6213951110839844, "step": 1315 }, { "epoch": 1.79291553133515, "grad_norm": 2.270843744277954, "learning_rate": 5.650795404139453e-07, "loss": 1.4318034648895264, "step": 1316 }, { "epoch": 1.7942779291553133, "grad_norm": 1.4657083749771118, "learning_rate": 5.57786245498999e-07, "loss": 1.3170151710510254, "step": 1317 }, { "epoch": 1.7956403269754768, "grad_norm": 1.7329161167144775, "learning_rate": 5.505389723836174e-07, "loss": 0.7722114324569702, "step": 1318 }, { "epoch": 1.7970027247956404, "grad_norm": 7.2884955406188965, "learning_rate": 5.433377563913611e-07, "loss": 1.3575527667999268, "step": 1319 }, { "epoch": 1.7983651226158037, "grad_norm": 4.0005974769592285, "learning_rate": 5.361826326212927e-07, "loss": 0.6978318095207214, "step": 1320 }, { "epoch": 1.7997275204359673, "grad_norm": 4.349133014678955, "learning_rate": 5.29073635947831e-07, "loss": 1.0556219816207886, "step": 1321 }, { "epoch": 1.8010899182561309, "grad_norm": 1.4942313432693481, "learning_rate": 5.220108010205627e-07, "loss": 1.4474365711212158, "step": 1322 }, { "epoch": 1.8024523160762942, "grad_norm": 3.176579236984253, "learning_rate": 5.149941622640819e-07, "loss": 1.1944425106048584, "step": 1323 }, { "epoch": 1.8038147138964578, "grad_norm": 3.832334518432617, "learning_rate": 5.08023753877821e-07, "loss": 1.0030550956726074, "step": 1324 }, { "epoch": 1.8051771117166213, "grad_norm": 9.216151237487793, "learning_rate": 5.010996098358822e-07, "loss": 1.3711564540863037, "step": 1325 }, { "epoch": 1.8065395095367847, "grad_norm": 12.742966651916504, "learning_rate": 4.942217638868718e-07, "loss": 1.5779058933258057, "step": 1326 }, { "epoch": 1.8079019073569482, "grad_norm": 7.035341739654541, "learning_rate": 4.873902495537408e-07, "loss": 2.1067183017730713, "step": 1327 }, { "epoch": 1.8092643051771118, "grad_norm": 1.374211072921753, "learning_rate": 4.80605100133612e-07, "loss": 1.3670783042907715, "step": 1328 }, { "epoch": 1.8106267029972751, "grad_norm": 2.1755785942077637, "learning_rate": 4.7386634869763246e-07, "loss": 1.1670506000518799, "step": 1329 }, { "epoch": 1.8119891008174387, "grad_norm": 4.503634452819824, "learning_rate": 4.6717402809079105e-07, "loss": 1.164706826210022, "step": 1330 }, { "epoch": 1.8133514986376023, "grad_norm": 2.6500797271728516, "learning_rate": 4.6052817093178324e-07, "loss": 0.7370744347572327, "step": 1331 }, { "epoch": 1.8147138964577656, "grad_norm": 2.871323823928833, "learning_rate": 4.5392880961282803e-07, "loss": 1.049136757850647, "step": 1332 }, { "epoch": 1.8160762942779292, "grad_norm": 1.4471031427383423, "learning_rate": 4.4737597629953e-07, "loss": 1.0570838451385498, "step": 1333 }, { "epoch": 1.8174386920980927, "grad_norm": 4.349843978881836, "learning_rate": 4.4086970293071073e-07, "loss": 1.4678266048431396, "step": 1334 }, { "epoch": 1.818801089918256, "grad_norm": 1.8210200071334839, "learning_rate": 4.344100212182556e-07, "loss": 1.2321367263793945, "step": 1335 }, { "epoch": 1.8201634877384196, "grad_norm": 2.0503737926483154, "learning_rate": 4.2799696264696155e-07, "loss": 0.8386318683624268, "step": 1336 }, { "epoch": 1.8215258855585832, "grad_norm": 1.3446118831634521, "learning_rate": 4.216305584743807e-07, "loss": 0.9875887632369995, "step": 1337 }, { "epoch": 1.8228882833787465, "grad_norm": 23.932052612304688, "learning_rate": 4.1531083973067154e-07, "loss": 1.310714840888977, "step": 1338 }, { "epoch": 1.82425068119891, "grad_norm": 1.5837066173553467, "learning_rate": 4.0903783721844334e-07, "loss": 1.012986183166504, "step": 1339 }, { "epoch": 1.8256130790190737, "grad_norm": 4.70964241027832, "learning_rate": 4.0281158151260767e-07, "loss": 0.7625452876091003, "step": 1340 }, { "epoch": 1.826975476839237, "grad_norm": 2.2192718982696533, "learning_rate": 3.966321029602349e-07, "loss": 1.5343716144561768, "step": 1341 }, { "epoch": 1.8283378746594006, "grad_norm": 4.060433387756348, "learning_rate": 3.904994316803945e-07, "loss": 1.0377511978149414, "step": 1342 }, { "epoch": 1.8297002724795641, "grad_norm": 14.235861778259277, "learning_rate": 3.8441359756402173e-07, "loss": 0.9891483187675476, "step": 1343 }, { "epoch": 1.8310626702997275, "grad_norm": 2.4544315338134766, "learning_rate": 3.7837463027375785e-07, "loss": 0.6186608076095581, "step": 1344 }, { "epoch": 1.832425068119891, "grad_norm": 2.773225784301758, "learning_rate": 3.7238255924381904e-07, "loss": 0.9534250497817993, "step": 1345 }, { "epoch": 1.8337874659400546, "grad_norm": 4.2906951904296875, "learning_rate": 3.664374136798465e-07, "loss": 1.0145323276519775, "step": 1346 }, { "epoch": 1.835149863760218, "grad_norm": 2.7557525634765625, "learning_rate": 3.6053922255876117e-07, "loss": 1.0453312397003174, "step": 1347 }, { "epoch": 1.8365122615803815, "grad_norm": 1.5330963134765625, "learning_rate": 3.546880146286269e-07, "loss": 0.5813907980918884, "step": 1348 }, { "epoch": 1.837874659400545, "grad_norm": 3.814319133758545, "learning_rate": 3.488838184085108e-07, "loss": 0.9711065292358398, "step": 1349 }, { "epoch": 1.8392370572207084, "grad_norm": 4.166487693786621, "learning_rate": 3.431266621883411e-07, "loss": 1.129095196723938, "step": 1350 }, { "epoch": 1.840599455040872, "grad_norm": 1.647719144821167, "learning_rate": 3.374165740287738e-07, "loss": 1.4044864177703857, "step": 1351 }, { "epoch": 1.8419618528610355, "grad_norm": 9.403477668762207, "learning_rate": 3.317535817610473e-07, "loss": 1.2547073364257812, "step": 1352 }, { "epoch": 1.8433242506811989, "grad_norm": 5.799179553985596, "learning_rate": 3.261377129868581e-07, "loss": 0.9228708148002625, "step": 1353 }, { "epoch": 1.8446866485013624, "grad_norm": 1.69400954246521, "learning_rate": 3.2056899507821536e-07, "loss": 1.3234930038452148, "step": 1354 }, { "epoch": 1.846049046321526, "grad_norm": 1.6221039295196533, "learning_rate": 3.150474551773186e-07, "loss": 1.3542914390563965, "step": 1355 }, { "epoch": 1.8474114441416893, "grad_norm": 2.9117283821105957, "learning_rate": 3.095731201964125e-07, "loss": 1.3881336450576782, "step": 1356 }, { "epoch": 1.848773841961853, "grad_norm": 13.552770614624023, "learning_rate": 3.041460168176702e-07, "loss": 1.5804922580718994, "step": 1357 }, { "epoch": 1.8501362397820165, "grad_norm": 2.206202507019043, "learning_rate": 2.987661714930501e-07, "loss": 1.5172555446624756, "step": 1358 }, { "epoch": 1.8514986376021798, "grad_norm": 2.6966822147369385, "learning_rate": 2.9343361044417595e-07, "loss": 1.418337106704712, "step": 1359 }, { "epoch": 1.8528610354223434, "grad_norm": 4.280932903289795, "learning_rate": 2.8814835966220255e-07, "loss": 0.6574370265007019, "step": 1360 }, { "epoch": 1.854223433242507, "grad_norm": 2.456181764602661, "learning_rate": 2.829104449076947e-07, "loss": 1.2777419090270996, "step": 1361 }, { "epoch": 1.8555858310626703, "grad_norm": 1.566738247871399, "learning_rate": 2.777198917104984e-07, "loss": 1.0245404243469238, "step": 1362 }, { "epoch": 1.8569482288828338, "grad_norm": 2.119300127029419, "learning_rate": 2.7257672536961765e-07, "loss": 1.3284130096435547, "step": 1363 }, { "epoch": 1.8583106267029974, "grad_norm": 1.4456303119659424, "learning_rate": 2.67480970953089e-07, "loss": 1.1857738494873047, "step": 1364 }, { "epoch": 1.8596730245231607, "grad_norm": 4.321845531463623, "learning_rate": 2.6243265329786384e-07, "loss": 1.845337152481079, "step": 1365 }, { "epoch": 1.861035422343324, "grad_norm": 3.087873697280884, "learning_rate": 2.574317970096807e-07, "loss": 1.4985558986663818, "step": 1366 }, { "epoch": 1.8623978201634879, "grad_norm": 1.300963044166565, "learning_rate": 2.524784264629543e-07, "loss": 0.8029797077178955, "step": 1367 }, { "epoch": 1.8637602179836512, "grad_norm": 1.8922648429870605, "learning_rate": 2.475725658006456e-07, "loss": 1.1313142776489258, "step": 1368 }, { "epoch": 1.8651226158038146, "grad_norm": 5.0586838722229, "learning_rate": 2.4271423893415415e-07, "loss": 1.105548620223999, "step": 1369 }, { "epoch": 1.8664850136239783, "grad_norm": 2.167701244354248, "learning_rate": 2.3790346954319698e-07, "loss": 1.2437108755111694, "step": 1370 }, { "epoch": 1.8678474114441417, "grad_norm": 1.0447425842285156, "learning_rate": 2.3314028107569332e-07, "loss": 0.7389098405838013, "step": 1371 }, { "epoch": 1.869209809264305, "grad_norm": 1.5186101198196411, "learning_rate": 2.2842469674765e-07, "loss": 1.582906723022461, "step": 1372 }, { "epoch": 1.8705722070844688, "grad_norm": 3.906873941421509, "learning_rate": 2.2375673954305065e-07, "loss": 1.454192876815796, "step": 1373 }, { "epoch": 1.8719346049046321, "grad_norm": 2.029965400695801, "learning_rate": 2.1913643221373904e-07, "loss": 1.25070059299469, "step": 1374 }, { "epoch": 1.8732970027247955, "grad_norm": 31.82815170288086, "learning_rate": 2.1456379727931575e-07, "loss": 1.3577702045440674, "step": 1375 }, { "epoch": 1.8746594005449593, "grad_norm": 2.3105993270874023, "learning_rate": 2.1003885702701842e-07, "loss": 0.9631004333496094, "step": 1376 }, { "epoch": 1.8760217983651226, "grad_norm": 1.7577451467514038, "learning_rate": 2.0556163351162507e-07, "loss": 1.2024483680725098, "step": 1377 }, { "epoch": 1.877384196185286, "grad_norm": 1.448703646659851, "learning_rate": 2.011321485553308e-07, "loss": 0.5723297595977783, "step": 1378 }, { "epoch": 1.8787465940054497, "grad_norm": 1.2089848518371582, "learning_rate": 1.9675042374766252e-07, "loss": 0.7422782182693481, "step": 1379 }, { "epoch": 1.880108991825613, "grad_norm": 12.520014762878418, "learning_rate": 1.9241648044535099e-07, "loss": 1.5310943126678467, "step": 1380 }, { "epoch": 1.8814713896457764, "grad_norm": 2.14847731590271, "learning_rate": 1.881303397722456e-07, "loss": 1.0022079944610596, "step": 1381 }, { "epoch": 1.8828337874659402, "grad_norm": 2.270681619644165, "learning_rate": 1.838920226191976e-07, "loss": 1.5518301725387573, "step": 1382 }, { "epoch": 1.8841961852861036, "grad_norm": 1.987114667892456, "learning_rate": 1.7970154964396692e-07, "loss": 1.1388556957244873, "step": 1383 }, { "epoch": 1.885558583106267, "grad_norm": 3.0272018909454346, "learning_rate": 1.7555894127112004e-07, "loss": 1.3038212060928345, "step": 1384 }, { "epoch": 1.8869209809264307, "grad_norm": 2.7421436309814453, "learning_rate": 1.714642176919268e-07, "loss": 1.3461058139801025, "step": 1385 }, { "epoch": 1.888283378746594, "grad_norm": 2.535875082015991, "learning_rate": 1.6741739886426357e-07, "loss": 1.396026372909546, "step": 1386 }, { "epoch": 1.8896457765667574, "grad_norm": 11.208163261413574, "learning_rate": 1.634185045125214e-07, "loss": 1.4761991500854492, "step": 1387 }, { "epoch": 1.891008174386921, "grad_norm": 8.644389152526855, "learning_rate": 1.5946755412749705e-07, "loss": 0.8535468578338623, "step": 1388 }, { "epoch": 1.8923705722070845, "grad_norm": 13.878341674804688, "learning_rate": 1.5556456696631527e-07, "loss": 0.8725439310073853, "step": 1389 }, { "epoch": 1.8937329700272478, "grad_norm": 3.157853364944458, "learning_rate": 1.5170956205231903e-07, "loss": 1.1029373407363892, "step": 1390 }, { "epoch": 1.8950953678474114, "grad_norm": 2.381556272506714, "learning_rate": 1.4790255817498712e-07, "loss": 1.0192207098007202, "step": 1391 }, { "epoch": 1.896457765667575, "grad_norm": 4.854372501373291, "learning_rate": 1.4414357388983557e-07, "loss": 0.9716941118240356, "step": 1392 }, { "epoch": 1.8978201634877383, "grad_norm": 4.360928535461426, "learning_rate": 1.4043262751833432e-07, "loss": 0.8772147297859192, "step": 1393 }, { "epoch": 1.8991825613079019, "grad_norm": 1.1853501796722412, "learning_rate": 1.3676973714781384e-07, "loss": 1.127824306488037, "step": 1394 }, { "epoch": 1.9005449591280654, "grad_norm": 1.5928237438201904, "learning_rate": 1.3315492063137537e-07, "loss": 0.8379042744636536, "step": 1395 }, { "epoch": 1.9019073569482288, "grad_norm": 2.6051831245422363, "learning_rate": 1.2958819558780534e-07, "loss": 0.8683719635009766, "step": 1396 }, { "epoch": 1.9032697547683923, "grad_norm": 6.150710105895996, "learning_rate": 1.2606957940149433e-07, "loss": 1.683314323425293, "step": 1397 }, { "epoch": 1.904632152588556, "grad_norm": 4.798482894897461, "learning_rate": 1.2259908922234165e-07, "loss": 1.8929637670516968, "step": 1398 }, { "epoch": 1.9059945504087192, "grad_norm": 2.8529117107391357, "learning_rate": 1.191767419656864e-07, "loss": 1.2108323574066162, "step": 1399 }, { "epoch": 1.9073569482288828, "grad_norm": 1.5547749996185303, "learning_rate": 1.1580255431220656e-07, "loss": 0.4886445105075836, "step": 1400 }, { "epoch": 1.9087193460490464, "grad_norm": 23.59695816040039, "learning_rate": 1.1247654270785891e-07, "loss": 1.2111785411834717, "step": 1401 }, { "epoch": 1.9100817438692097, "grad_norm": 2.914100408554077, "learning_rate": 1.0919872336377812e-07, "loss": 1.7062466144561768, "step": 1402 }, { "epoch": 1.9114441416893733, "grad_norm": 1.6518676280975342, "learning_rate": 1.0596911225621453e-07, "loss": 0.6783289909362793, "step": 1403 }, { "epoch": 1.9128065395095368, "grad_norm": 2.169886589050293, "learning_rate": 1.027877251264442e-07, "loss": 1.3239803314208984, "step": 1404 }, { "epoch": 1.9141689373297002, "grad_norm": 3.835590124130249, "learning_rate": 9.965457748070118e-08, "loss": 0.8546549081802368, "step": 1405 }, { "epoch": 1.9155313351498637, "grad_norm": 1.9253188371658325, "learning_rate": 9.656968459009653e-08, "loss": 0.9986761808395386, "step": 1406 }, { "epoch": 1.9168937329700273, "grad_norm": 1.7878540754318237, "learning_rate": 9.353306149054497e-08, "loss": 1.0160789489746094, "step": 1407 }, { "epoch": 1.9182561307901906, "grad_norm": 3.0247604846954346, "learning_rate": 9.054472298269057e-08, "loss": 0.9737996459007263, "step": 1408 }, { "epoch": 1.9196185286103542, "grad_norm": 7.85115385055542, "learning_rate": 8.760468363184004e-08, "loss": 0.935871958732605, "step": 1409 }, { "epoch": 1.9209809264305178, "grad_norm": 1.3199456930160522, "learning_rate": 8.47129577678818e-08, "loss": 0.606592059135437, "step": 1410 }, { "epoch": 1.922343324250681, "grad_norm": 2.534632921218872, "learning_rate": 8.186955948523035e-08, "loss": 1.2295576333999634, "step": 1411 }, { "epoch": 1.9237057220708447, "grad_norm": 18.38926887512207, "learning_rate": 7.907450264273864e-08, "loss": 1.7270119190216064, "step": 1412 }, { "epoch": 1.9250681198910082, "grad_norm": 1.2698009014129639, "learning_rate": 7.632780086365144e-08, "loss": 0.9189703464508057, "step": 1413 }, { "epoch": 1.9264305177111716, "grad_norm": 1.447047233581543, "learning_rate": 7.362946753552203e-08, "loss": 0.943728506565094, "step": 1414 }, { "epoch": 1.9277929155313351, "grad_norm": 1.3590550422668457, "learning_rate": 7.097951581015672e-08, "loss": 1.565992832183838, "step": 1415 }, { "epoch": 1.9291553133514987, "grad_norm": 1.5580801963806152, "learning_rate": 6.83779586035438e-08, "loss": 1.0411436557769775, "step": 1416 }, { "epoch": 1.930517711171662, "grad_norm": 5.042342662811279, "learning_rate": 6.58248085957991e-08, "loss": 1.3836987018585205, "step": 1417 }, { "epoch": 1.9318801089918256, "grad_norm": 52.62533950805664, "learning_rate": 6.332007823109832e-08, "loss": 1.113886833190918, "step": 1418 }, { "epoch": 1.9332425068119892, "grad_norm": 3.946906805038452, "learning_rate": 6.086377971761481e-08, "loss": 1.7039542198181152, "step": 1419 }, { "epoch": 1.9346049046321525, "grad_norm": 1.801292061805725, "learning_rate": 5.8455925027464114e-08, "loss": 1.248824119567871, "step": 1420 }, { "epoch": 1.935967302452316, "grad_norm": 2.054426670074463, "learning_rate": 5.609652589664616e-08, "loss": 1.0931246280670166, "step": 1421 }, { "epoch": 1.9373297002724796, "grad_norm": 1.764552354812622, "learning_rate": 5.378559382498427e-08, "loss": 1.1240458488464355, "step": 1422 }, { "epoch": 1.938692098092643, "grad_norm": 2.9879987239837646, "learning_rate": 5.1523140076070734e-08, "loss": 0.631972074508667, "step": 1423 }, { "epoch": 1.9400544959128065, "grad_norm": 6.285287380218506, "learning_rate": 4.9309175677213497e-08, "loss": 1.0347950458526611, "step": 1424 }, { "epoch": 1.94141689373297, "grad_norm": 1.5152701139450073, "learning_rate": 4.714371141938179e-08, "loss": 0.6329066157341003, "step": 1425 }, { "epoch": 1.9427792915531334, "grad_norm": 2.198545455932617, "learning_rate": 4.502675785714838e-08, "loss": 1.5803624391555786, "step": 1426 }, { "epoch": 1.944141689373297, "grad_norm": 11.419188499450684, "learning_rate": 4.295832530864852e-08, "loss": 0.8849520683288574, "step": 1427 }, { "epoch": 1.9455040871934606, "grad_norm": 10.670279502868652, "learning_rate": 4.0938423855518826e-08, "loss": 0.9460938572883606, "step": 1428 }, { "epoch": 1.946866485013624, "grad_norm": 4.178868293762207, "learning_rate": 3.896706334285738e-08, "loss": 1.1602928638458252, "step": 1429 }, { "epoch": 1.9482288828337875, "grad_norm": 4.152834892272949, "learning_rate": 3.704425337916706e-08, "loss": 1.492940902709961, "step": 1430 }, { "epoch": 1.949591280653951, "grad_norm": 1.2601171731948853, "learning_rate": 3.5170003336317814e-08, "loss": 1.3064179420471191, "step": 1431 }, { "epoch": 1.9509536784741144, "grad_norm": 4.145596504211426, "learning_rate": 3.334432234949114e-08, "loss": 1.2165179252624512, "step": 1432 }, { "epoch": 1.952316076294278, "grad_norm": 1.5077069997787476, "learning_rate": 3.156721931714457e-08, "loss": 0.6948808431625366, "step": 1433 }, { "epoch": 1.9536784741144415, "grad_norm": 3.32055926322937, "learning_rate": 2.983870290096169e-08, "loss": 1.536287546157837, "step": 1434 }, { "epoch": 1.9550408719346049, "grad_norm": 4.618061542510986, "learning_rate": 2.8158781525815525e-08, "loss": 1.1000325679779053, "step": 1435 }, { "epoch": 1.9564032697547684, "grad_norm": 2.4832377433776855, "learning_rate": 2.6527463379721898e-08, "loss": 0.9595693349838257, "step": 1436 }, { "epoch": 1.957765667574932, "grad_norm": 6.562358379364014, "learning_rate": 2.4944756413802784e-08, "loss": 1.300684928894043, "step": 1437 }, { "epoch": 1.9591280653950953, "grad_norm": 1.6557602882385254, "learning_rate": 2.3410668342247478e-08, "loss": 0.8573833703994751, "step": 1438 }, { "epoch": 1.9604904632152589, "grad_norm": 1.6862037181854248, "learning_rate": 2.192520664227593e-08, "loss": 1.6360219717025757, "step": 1439 }, { "epoch": 1.9618528610354224, "grad_norm": 5.195113182067871, "learning_rate": 2.0488378554096578e-08, "loss": 1.1369283199310303, "step": 1440 }, { "epoch": 1.9632152588555858, "grad_norm": 1.4376178979873657, "learning_rate": 1.9100191080879682e-08, "loss": 1.1068437099456787, "step": 1441 }, { "epoch": 1.9645776566757494, "grad_norm": 2.702178478240967, "learning_rate": 1.7760650988716264e-08, "loss": 1.2693946361541748, "step": 1442 }, { "epoch": 1.965940054495913, "grad_norm": 3.5857508182525635, "learning_rate": 1.646976480658924e-08, "loss": 1.5863407850265503, "step": 1443 }, { "epoch": 1.9673024523160763, "grad_norm": 2.24951434135437, "learning_rate": 1.5227538826338983e-08, "loss": 1.8422298431396484, "step": 1444 }, { "epoch": 1.9686648501362398, "grad_norm": 3.497892379760742, "learning_rate": 1.4033979102634488e-08, "loss": 0.9026853442192078, "step": 1445 }, { "epoch": 1.9700272479564034, "grad_norm": 3.716001033782959, "learning_rate": 1.2889091452942259e-08, "loss": 2.020556926727295, "step": 1446 }, { "epoch": 1.9713896457765667, "grad_norm": 5.044482707977295, "learning_rate": 1.1792881457498572e-08, "loss": 1.4826629161834717, "step": 1447 }, { "epoch": 1.9727520435967303, "grad_norm": 1.3918527364730835, "learning_rate": 1.074535445928504e-08, "loss": 0.550353467464447, "step": 1448 }, { "epoch": 1.9741144414168939, "grad_norm": 7.702760219573975, "learning_rate": 9.746515563996417e-09, "loss": 1.3358700275421143, "step": 1449 }, { "epoch": 1.9754768392370572, "grad_norm": 1.0946508646011353, "learning_rate": 8.79636964002173e-09, "loss": 0.8621606826782227, "step": 1450 }, { "epoch": 1.9768392370572208, "grad_norm": 2.978058338165283, "learning_rate": 7.894921318417625e-09, "loss": 1.3000658750534058, "step": 1451 }, { "epoch": 1.9782016348773843, "grad_norm": 1.576682209968567, "learning_rate": 7.0421749928850645e-09, "loss": 0.844996988773346, "step": 1452 }, { "epoch": 1.9795640326975477, "grad_norm": 4.54990291595459, "learning_rate": 6.238134819751551e-09, "loss": 1.375671148300171, "step": 1453 }, { "epoch": 1.9809264305177112, "grad_norm": 3.3552587032318115, "learning_rate": 5.482804717947821e-09, "loss": 1.477408528327942, "step": 1454 }, { "epoch": 1.9822888283378748, "grad_norm": 1.8630568981170654, "learning_rate": 4.776188368986745e-09, "loss": 1.4174425601959229, "step": 1455 }, { "epoch": 1.9836512261580381, "grad_norm": 3.187861442565918, "learning_rate": 4.1182892169511215e-09, "loss": 0.5318092107772827, "step": 1456 }, { "epoch": 1.9850136239782015, "grad_norm": 1.712842345237732, "learning_rate": 3.509110468472576e-09, "loss": 0.8178075551986694, "step": 1457 }, { "epoch": 1.9863760217983653, "grad_norm": 2.1262054443359375, "learning_rate": 2.9486550927171343e-09, "loss": 1.392982006072998, "step": 1458 }, { "epoch": 1.9877384196185286, "grad_norm": 2.4299228191375732, "learning_rate": 2.4369258213718937e-09, "loss": 1.1283490657806396, "step": 1459 }, { "epoch": 1.989100817438692, "grad_norm": 1.7974153757095337, "learning_rate": 1.973925148629485e-09, "loss": 0.8331289291381836, "step": 1460 }, { "epoch": 1.9904632152588557, "grad_norm": 2.3045740127563477, "learning_rate": 1.5596553311758578e-09, "loss": 1.3922330141067505, "step": 1461 }, { "epoch": 1.991825613079019, "grad_norm": 2.1342570781707764, "learning_rate": 1.194118388182508e-09, "loss": 0.9389033317565918, "step": 1462 }, { "epoch": 1.9931880108991824, "grad_norm": 2.787295341491699, "learning_rate": 8.773161012953779e-10, "loss": 1.0572896003723145, "step": 1463 }, { "epoch": 1.9945504087193462, "grad_norm": 1.3248298168182373, "learning_rate": 6.092500146215319e-10, "loss": 1.1103618144989014, "step": 1464 }, { "epoch": 1.9959128065395095, "grad_norm": 1.9048967361450195, "learning_rate": 3.899214347302671e-10, "loss": 1.2025456428527832, "step": 1465 }, { "epoch": 1.9972752043596729, "grad_norm": 4.280431747436523, "learning_rate": 2.1933143063757046e-10, "loss": 1.0383784770965576, "step": 1466 }, { "epoch": 1.9986376021798367, "grad_norm": 6.139904499053955, "learning_rate": 9.748083380833884e-11, "loss": 0.7356299757957458, "step": 1467 }, { "epoch": 2.0, "grad_norm": 3.0588088035583496, "learning_rate": 2.4370238147497506e-11, "loss": 1.5871186256408691, "step": 1468 } ], "logging_steps": 1, "max_steps": 1468, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.101885689686917e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }