| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 5775, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.005194805194805195, | |
| "grad_norm": 1.3003292727167528, | |
| "learning_rate": 2.8735632183908046e-06, | |
| "loss": 0.5747, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01038961038961039, | |
| "grad_norm": 0.8164205391130951, | |
| "learning_rate": 5.747126436781609e-06, | |
| "loss": 0.5181, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.015584415584415584, | |
| "grad_norm": 0.488946468114722, | |
| "learning_rate": 8.620689655172414e-06, | |
| "loss": 0.4525, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.02077922077922078, | |
| "grad_norm": 0.5240257767228009, | |
| "learning_rate": 1.1494252873563218e-05, | |
| "loss": 0.4153, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.025974025974025976, | |
| "grad_norm": 0.3878833187137561, | |
| "learning_rate": 1.4367816091954022e-05, | |
| "loss": 0.3917, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03116883116883117, | |
| "grad_norm": 0.3932438541306065, | |
| "learning_rate": 1.7241379310344828e-05, | |
| "loss": 0.3707, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.03636363636363636, | |
| "grad_norm": 0.4466114932768719, | |
| "learning_rate": 2.0114942528735632e-05, | |
| "loss": 0.3605, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.04155844155844156, | |
| "grad_norm": 0.8032738854038463, | |
| "learning_rate": 2.2988505747126437e-05, | |
| "loss": 0.3591, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.046753246753246755, | |
| "grad_norm": 0.543812557370986, | |
| "learning_rate": 2.5862068965517244e-05, | |
| "loss": 0.3472, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.05194805194805195, | |
| "grad_norm": 0.5733743111451876, | |
| "learning_rate": 2.8735632183908045e-05, | |
| "loss": 0.3341, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.05714285714285714, | |
| "grad_norm": 0.9136032757030011, | |
| "learning_rate": 3.160919540229885e-05, | |
| "loss": 0.34, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.06233766233766234, | |
| "grad_norm": 0.8015940010159968, | |
| "learning_rate": 3.4482758620689657e-05, | |
| "loss": 0.3346, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.06753246753246753, | |
| "grad_norm": 0.5317651402718742, | |
| "learning_rate": 3.735632183908046e-05, | |
| "loss": 0.3205, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.07272727272727272, | |
| "grad_norm": 0.9794116224334949, | |
| "learning_rate": 4.0229885057471265e-05, | |
| "loss": 0.3247, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.07792207792207792, | |
| "grad_norm": 0.5926607382301553, | |
| "learning_rate": 4.3103448275862066e-05, | |
| "loss": 0.3137, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.08311688311688312, | |
| "grad_norm": 0.501504811182817, | |
| "learning_rate": 4.597701149425287e-05, | |
| "loss": 0.3196, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.08831168831168831, | |
| "grad_norm": 0.4844466938932139, | |
| "learning_rate": 4.885057471264368e-05, | |
| "loss": 0.3185, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.09350649350649351, | |
| "grad_norm": 0.8792222600261355, | |
| "learning_rate": 4.999985842691236e-05, | |
| "loss": 0.317, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.0987012987012987, | |
| "grad_norm": 0.538260729358137, | |
| "learning_rate": 4.999899326385009e-05, | |
| "loss": 0.3122, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.1038961038961039, | |
| "grad_norm": 0.5532332343125024, | |
| "learning_rate": 4.99973416166265e-05, | |
| "loss": 0.308, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.10909090909090909, | |
| "grad_norm": 0.5065931571721304, | |
| "learning_rate": 4.999490353720347e-05, | |
| "loss": 0.305, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.11428571428571428, | |
| "grad_norm": 0.4661004020519144, | |
| "learning_rate": 4.9991679102284494e-05, | |
| "loss": 0.3031, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.11948051948051948, | |
| "grad_norm": 0.4350447913864305, | |
| "learning_rate": 4.998766841331236e-05, | |
| "loss": 0.2979, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.12467532467532468, | |
| "grad_norm": 0.3782265622233307, | |
| "learning_rate": 4.998287159646586e-05, | |
| "loss": 0.3035, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.12987012987012986, | |
| "grad_norm": 0.3863507226733708, | |
| "learning_rate": 4.997728880265592e-05, | |
| "loss": 0.3024, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.13506493506493505, | |
| "grad_norm": 0.45465564793802865, | |
| "learning_rate": 4.9970920207520756e-05, | |
| "loss": 0.2984, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.14025974025974025, | |
| "grad_norm": 0.331617769411873, | |
| "learning_rate": 4.9963766011420394e-05, | |
| "loss": 0.2947, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.14545454545454545, | |
| "grad_norm": 0.4323281120205803, | |
| "learning_rate": 4.9955826439430384e-05, | |
| "loss": 0.2885, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.15064935064935064, | |
| "grad_norm": 0.3828139513983012, | |
| "learning_rate": 4.994710174133469e-05, | |
| "loss": 0.2957, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.15584415584415584, | |
| "grad_norm": 0.45442672024966796, | |
| "learning_rate": 4.9937592191617846e-05, | |
| "loss": 0.2929, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.16103896103896104, | |
| "grad_norm": 0.4324048943234123, | |
| "learning_rate": 4.992729808945629e-05, | |
| "loss": 0.287, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.16623376623376623, | |
| "grad_norm": 0.3358736775493111, | |
| "learning_rate": 4.991621975870901e-05, | |
| "loss": 0.2831, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.17142857142857143, | |
| "grad_norm": 0.33113790181444713, | |
| "learning_rate": 4.990435754790731e-05, | |
| "loss": 0.2868, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.17662337662337663, | |
| "grad_norm": 0.3207965425558816, | |
| "learning_rate": 4.9891711830243845e-05, | |
| "loss": 0.2911, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.18181818181818182, | |
| "grad_norm": 0.361708114933415, | |
| "learning_rate": 4.987828300356091e-05, | |
| "loss": 0.2857, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.18701298701298702, | |
| "grad_norm": 0.34168626805695096, | |
| "learning_rate": 4.9864071490337896e-05, | |
| "loss": 0.2849, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.19220779220779222, | |
| "grad_norm": 0.2908926746764898, | |
| "learning_rate": 4.9849077737678e-05, | |
| "loss": 0.2794, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.1974025974025974, | |
| "grad_norm": 0.5087944109212423, | |
| "learning_rate": 4.983330221729419e-05, | |
| "loss": 0.2787, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.2025974025974026, | |
| "grad_norm": 0.27804001121817934, | |
| "learning_rate": 4.9816745425494326e-05, | |
| "loss": 0.2759, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.2077922077922078, | |
| "grad_norm": 0.4013635528070106, | |
| "learning_rate": 4.979940788316556e-05, | |
| "loss": 0.2817, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.21298701298701297, | |
| "grad_norm": 0.25511672135848257, | |
| "learning_rate": 4.978129013575796e-05, | |
| "loss": 0.2785, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.21818181818181817, | |
| "grad_norm": 0.298813003536876, | |
| "learning_rate": 4.976239275326733e-05, | |
| "loss": 0.2803, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.22337662337662337, | |
| "grad_norm": 0.25617082862388485, | |
| "learning_rate": 4.974271633021729e-05, | |
| "loss": 0.2736, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.22857142857142856, | |
| "grad_norm": 0.29247425322362797, | |
| "learning_rate": 4.9722261485640584e-05, | |
| "loss": 0.2767, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.23376623376623376, | |
| "grad_norm": 0.33775737625365165, | |
| "learning_rate": 4.9701028863059563e-05, | |
| "loss": 0.2753, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.23896103896103896, | |
| "grad_norm": 0.38447368527387143, | |
| "learning_rate": 4.967901913046598e-05, | |
| "loss": 0.2805, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.24415584415584415, | |
| "grad_norm": 0.36418471782114614, | |
| "learning_rate": 4.9656232980299976e-05, | |
| "loss": 0.2707, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.24935064935064935, | |
| "grad_norm": 0.2826049309528878, | |
| "learning_rate": 4.963267112942826e-05, | |
| "loss": 0.2775, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.2545454545454545, | |
| "grad_norm": 0.3501209948268494, | |
| "learning_rate": 4.9608334319121584e-05, | |
| "loss": 0.2731, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.2597402597402597, | |
| "grad_norm": 0.3608819312798831, | |
| "learning_rate": 4.958322331503141e-05, | |
| "loss": 0.2707, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.2649350649350649, | |
| "grad_norm": 0.299020142675044, | |
| "learning_rate": 4.9557338907165833e-05, | |
| "loss": 0.2732, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.2701298701298701, | |
| "grad_norm": 0.2685575579055106, | |
| "learning_rate": 4.9530681909864724e-05, | |
| "loss": 0.2689, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.2753246753246753, | |
| "grad_norm": 0.23478660501034596, | |
| "learning_rate": 4.950325316177409e-05, | |
| "loss": 0.2726, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.2805194805194805, | |
| "grad_norm": 0.26267550311986976, | |
| "learning_rate": 4.947505352581974e-05, | |
| "loss": 0.2688, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.2857142857142857, | |
| "grad_norm": 0.2804831055299143, | |
| "learning_rate": 4.944608388918005e-05, | |
| "loss": 0.2724, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.2909090909090909, | |
| "grad_norm": 0.36597728037439853, | |
| "learning_rate": 4.941634516325816e-05, | |
| "loss": 0.2674, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.2961038961038961, | |
| "grad_norm": 0.27054673370836463, | |
| "learning_rate": 4.9385838283653216e-05, | |
| "loss": 0.2649, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.3012987012987013, | |
| "grad_norm": 0.3066834081735395, | |
| "learning_rate": 4.9354564210130976e-05, | |
| "loss": 0.2677, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.3064935064935065, | |
| "grad_norm": 0.2950401672059928, | |
| "learning_rate": 4.93225239265936e-05, | |
| "loss": 0.2622, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.3116883116883117, | |
| "grad_norm": 0.31772179112234966, | |
| "learning_rate": 4.928971844104868e-05, | |
| "loss": 0.2641, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3168831168831169, | |
| "grad_norm": 0.27924556453889027, | |
| "learning_rate": 4.9256148785577606e-05, | |
| "loss": 0.2647, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.3220779220779221, | |
| "grad_norm": 0.28644663252200886, | |
| "learning_rate": 4.9221816016302966e-05, | |
| "loss": 0.2625, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.32727272727272727, | |
| "grad_norm": 0.2606246292323375, | |
| "learning_rate": 4.9186721213355455e-05, | |
| "loss": 0.2636, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.33246753246753247, | |
| "grad_norm": 0.3172496714001626, | |
| "learning_rate": 4.915086548083978e-05, | |
| "loss": 0.2683, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.33766233766233766, | |
| "grad_norm": 0.24927905040341644, | |
| "learning_rate": 4.9114249946800003e-05, | |
| "loss": 0.2654, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.34285714285714286, | |
| "grad_norm": 0.32250447729794757, | |
| "learning_rate": 4.907687576318401e-05, | |
| "loss": 0.2636, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.34805194805194806, | |
| "grad_norm": 0.32565974721616914, | |
| "learning_rate": 4.903874410580731e-05, | |
| "loss": 0.2601, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.35324675324675325, | |
| "grad_norm": 0.23517482221948124, | |
| "learning_rate": 4.899985617431597e-05, | |
| "loss": 0.2611, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.35844155844155845, | |
| "grad_norm": 0.2438523561922534, | |
| "learning_rate": 4.896021319214895e-05, | |
| "loss": 0.2601, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.36363636363636365, | |
| "grad_norm": 0.2668670929832916, | |
| "learning_rate": 4.8919816406499584e-05, | |
| "loss": 0.2696, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.36883116883116884, | |
| "grad_norm": 0.24937135360115686, | |
| "learning_rate": 4.887866708827633e-05, | |
| "loss": 0.2602, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.37402597402597404, | |
| "grad_norm": 0.23999629542679116, | |
| "learning_rate": 4.8836766532062804e-05, | |
| "loss": 0.2588, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.37922077922077924, | |
| "grad_norm": 0.23334349462758497, | |
| "learning_rate": 4.879411605607704e-05, | |
| "loss": 0.2606, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.38441558441558443, | |
| "grad_norm": 0.2215145938323352, | |
| "learning_rate": 4.8750717002130024e-05, | |
| "loss": 0.2567, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.38961038961038963, | |
| "grad_norm": 0.3218548674660387, | |
| "learning_rate": 4.870657073558349e-05, | |
| "loss": 0.2627, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.3948051948051948, | |
| "grad_norm": 0.29152150655446074, | |
| "learning_rate": 4.866167864530693e-05, | |
| "loss": 0.2561, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.29823420885887736, | |
| "learning_rate": 4.8616042143633937e-05, | |
| "loss": 0.2594, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.4051948051948052, | |
| "grad_norm": 0.256792339442467, | |
| "learning_rate": 4.856966266631777e-05, | |
| "loss": 0.2565, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.4103896103896104, | |
| "grad_norm": 0.25804885977573755, | |
| "learning_rate": 4.8522541672486156e-05, | |
| "loss": 0.2577, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.4155844155844156, | |
| "grad_norm": 0.2903609751193798, | |
| "learning_rate": 4.84746806445954e-05, | |
| "loss": 0.2534, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.42077922077922075, | |
| "grad_norm": 0.26400859462593973, | |
| "learning_rate": 4.8426081088383756e-05, | |
| "loss": 0.2586, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.42597402597402595, | |
| "grad_norm": 0.2960587838599708, | |
| "learning_rate": 4.837674453282404e-05, | |
| "loss": 0.261, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.43116883116883115, | |
| "grad_norm": 0.23331561495605277, | |
| "learning_rate": 4.832667253007554e-05, | |
| "loss": 0.2536, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.43636363636363634, | |
| "grad_norm": 0.24020811230350025, | |
| "learning_rate": 4.8275866655435175e-05, | |
| "loss": 0.2564, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.44155844155844154, | |
| "grad_norm": 0.22023402453548904, | |
| "learning_rate": 4.8224328507287946e-05, | |
| "loss": 0.2562, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.44675324675324674, | |
| "grad_norm": 0.293317498213313, | |
| "learning_rate": 4.8172059707056626e-05, | |
| "loss": 0.2565, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.45194805194805193, | |
| "grad_norm": 0.2692215437341758, | |
| "learning_rate": 4.811906189915078e-05, | |
| "loss": 0.2506, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.45714285714285713, | |
| "grad_norm": 0.23694698773474526, | |
| "learning_rate": 4.806533675091501e-05, | |
| "loss": 0.2518, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.4623376623376623, | |
| "grad_norm": 0.22885916720084376, | |
| "learning_rate": 4.80108859525765e-05, | |
| "loss": 0.252, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.4675324675324675, | |
| "grad_norm": 0.24916135308130166, | |
| "learning_rate": 4.795571121719187e-05, | |
| "loss": 0.253, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.4727272727272727, | |
| "grad_norm": 0.24894984701102493, | |
| "learning_rate": 4.7899814280593226e-05, | |
| "loss": 0.2529, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.4779220779220779, | |
| "grad_norm": 0.2723728137565129, | |
| "learning_rate": 4.78431969013336e-05, | |
| "loss": 0.2555, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.4831168831168831, | |
| "grad_norm": 0.3183305552275493, | |
| "learning_rate": 4.778586086063159e-05, | |
| "loss": 0.2514, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.4883116883116883, | |
| "grad_norm": 0.2414413013327865, | |
| "learning_rate": 4.772780796231537e-05, | |
| "loss": 0.2484, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.4935064935064935, | |
| "grad_norm": 0.22563943539011178, | |
| "learning_rate": 4.766904003276589e-05, | |
| "loss": 0.2539, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.4987012987012987, | |
| "grad_norm": 0.30205872999506944, | |
| "learning_rate": 4.760955892085942e-05, | |
| "loss": 0.2527, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.5038961038961038, | |
| "grad_norm": 0.2860786126383834, | |
| "learning_rate": 4.754936649790942e-05, | |
| "loss": 0.2516, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.509090909090909, | |
| "grad_norm": 0.27572406640999436, | |
| "learning_rate": 4.7488464657607635e-05, | |
| "loss": 0.2498, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.5142857142857142, | |
| "grad_norm": 0.24673507061013106, | |
| "learning_rate": 4.7426855315964535e-05, | |
| "loss": 0.2531, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.5194805194805194, | |
| "grad_norm": 0.21033978842271397, | |
| "learning_rate": 4.736454041124904e-05, | |
| "loss": 0.2504, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5246753246753246, | |
| "grad_norm": 0.24381830272002009, | |
| "learning_rate": 4.7301521903927505e-05, | |
| "loss": 0.2428, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.5298701298701298, | |
| "grad_norm": 0.2334282560856222, | |
| "learning_rate": 4.723780177660209e-05, | |
| "loss": 0.2501, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.535064935064935, | |
| "grad_norm": 0.2751808654527514, | |
| "learning_rate": 4.717338203394836e-05, | |
| "loss": 0.2507, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.5402597402597402, | |
| "grad_norm": 0.2697637268340861, | |
| "learning_rate": 4.71082647026522e-05, | |
| "loss": 0.2503, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.5454545454545454, | |
| "grad_norm": 0.22403492548323756, | |
| "learning_rate": 4.7042451831346136e-05, | |
| "loss": 0.2495, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.5506493506493506, | |
| "grad_norm": 0.22425594786040917, | |
| "learning_rate": 4.697594549054474e-05, | |
| "loss": 0.2475, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.5558441558441558, | |
| "grad_norm": 0.24265650046282458, | |
| "learning_rate": 4.690874777257964e-05, | |
| "loss": 0.2491, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.561038961038961, | |
| "grad_norm": 0.22077426247794457, | |
| "learning_rate": 4.684086079153359e-05, | |
| "loss": 0.2449, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.5662337662337662, | |
| "grad_norm": 0.30484192484636535, | |
| "learning_rate": 4.6772286683174025e-05, | |
| "loss": 0.245, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 0.22182045074526108, | |
| "learning_rate": 4.670302760488582e-05, | |
| "loss": 0.2477, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.5766233766233766, | |
| "grad_norm": 0.2200234503444333, | |
| "learning_rate": 4.663308573560343e-05, | |
| "loss": 0.2506, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.5818181818181818, | |
| "grad_norm": 0.21994287683589026, | |
| "learning_rate": 4.656246327574238e-05, | |
| "loss": 0.2421, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.587012987012987, | |
| "grad_norm": 0.272334351757034, | |
| "learning_rate": 4.649116244712998e-05, | |
| "loss": 0.2476, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.5922077922077922, | |
| "grad_norm": 0.2236806239622702, | |
| "learning_rate": 4.641918549293545e-05, | |
| "loss": 0.2454, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.5974025974025974, | |
| "grad_norm": 0.23546182291174625, | |
| "learning_rate": 4.634653467759936e-05, | |
| "loss": 0.2477, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.6025974025974026, | |
| "grad_norm": 0.2505748637769869, | |
| "learning_rate": 4.6273212286762376e-05, | |
| "loss": 0.2449, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.6077922077922078, | |
| "grad_norm": 0.18468403137918, | |
| "learning_rate": 4.619922062719335e-05, | |
| "loss": 0.2432, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.612987012987013, | |
| "grad_norm": 0.23235694779454488, | |
| "learning_rate": 4.6124562026716766e-05, | |
| "loss": 0.2457, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.6181818181818182, | |
| "grad_norm": 0.2587545121058708, | |
| "learning_rate": 4.604923883413946e-05, | |
| "loss": 0.2467, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.6233766233766234, | |
| "grad_norm": 0.23524218849591322, | |
| "learning_rate": 4.59732534191768e-05, | |
| "loss": 0.2425, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.6285714285714286, | |
| "grad_norm": 0.20482111091171828, | |
| "learning_rate": 4.589660817237805e-05, | |
| "loss": 0.2446, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.6337662337662338, | |
| "grad_norm": 0.23935891812153473, | |
| "learning_rate": 4.581930550505122e-05, | |
| "loss": 0.2359, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.638961038961039, | |
| "grad_norm": 0.2088944848253035, | |
| "learning_rate": 4.5741347849187186e-05, | |
| "loss": 0.2435, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.6441558441558441, | |
| "grad_norm": 0.19769218239953817, | |
| "learning_rate": 4.566273765738318e-05, | |
| "loss": 0.2429, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.6493506493506493, | |
| "grad_norm": 0.21104513975336958, | |
| "learning_rate": 4.558347740276562e-05, | |
| "loss": 0.2414, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.6545454545454545, | |
| "grad_norm": 0.22299730594468617, | |
| "learning_rate": 4.550356957891232e-05, | |
| "loss": 0.2405, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.6597402597402597, | |
| "grad_norm": 0.2553717349606562, | |
| "learning_rate": 4.5423016699774025e-05, | |
| "loss": 0.242, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.6649350649350649, | |
| "grad_norm": 0.184621184174687, | |
| "learning_rate": 4.5341821299595334e-05, | |
| "loss": 0.2377, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.6701298701298701, | |
| "grad_norm": 0.1931019421949112, | |
| "learning_rate": 4.525998593283496e-05, | |
| "loss": 0.2401, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.6753246753246753, | |
| "grad_norm": 0.2327800605150306, | |
| "learning_rate": 4.517751317408537e-05, | |
| "loss": 0.2405, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.6805194805194805, | |
| "grad_norm": 0.21060753158636902, | |
| "learning_rate": 4.5094405617991796e-05, | |
| "loss": 0.2363, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.6857142857142857, | |
| "grad_norm": 0.18780386461582757, | |
| "learning_rate": 4.501066587917058e-05, | |
| "loss": 0.2437, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.6909090909090909, | |
| "grad_norm": 0.2671736773226871, | |
| "learning_rate": 4.4926296592126946e-05, | |
| "loss": 0.2431, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.6961038961038961, | |
| "grad_norm": 0.23810395896846592, | |
| "learning_rate": 4.484130041117211e-05, | |
| "loss": 0.243, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.7012987012987013, | |
| "grad_norm": 0.18025186403432847, | |
| "learning_rate": 4.475568001033974e-05, | |
| "loss": 0.2457, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.7064935064935065, | |
| "grad_norm": 0.21115305940327297, | |
| "learning_rate": 4.466943808330189e-05, | |
| "loss": 0.2415, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.7116883116883117, | |
| "grad_norm": 0.25256979094205834, | |
| "learning_rate": 4.45825773432842e-05, | |
| "loss": 0.2407, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.7168831168831169, | |
| "grad_norm": 0.22014008453128092, | |
| "learning_rate": 4.449510052298056e-05, | |
| "loss": 0.2357, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.7220779220779221, | |
| "grad_norm": 0.20062628753000003, | |
| "learning_rate": 4.440701037446714e-05, | |
| "loss": 0.2396, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.7272727272727273, | |
| "grad_norm": 0.2083485579826855, | |
| "learning_rate": 4.431830966911582e-05, | |
| "loss": 0.2391, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.7324675324675325, | |
| "grad_norm": 0.18184305857175756, | |
| "learning_rate": 4.422900119750695e-05, | |
| "loss": 0.2355, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.7376623376623377, | |
| "grad_norm": 0.2048232593631597, | |
| "learning_rate": 4.4139087769341625e-05, | |
| "loss": 0.2332, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.7428571428571429, | |
| "grad_norm": 0.21132211989073768, | |
| "learning_rate": 4.4048572213353234e-05, | |
| "loss": 0.2422, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.7480519480519481, | |
| "grad_norm": 0.19792895604344352, | |
| "learning_rate": 4.39574573772185e-05, | |
| "loss": 0.2334, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.7532467532467533, | |
| "grad_norm": 0.22178836320367148, | |
| "learning_rate": 4.3865746127467876e-05, | |
| "loss": 0.2423, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.7584415584415585, | |
| "grad_norm": 0.21967650568135474, | |
| "learning_rate": 4.3773441349395374e-05, | |
| "loss": 0.2357, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.7636363636363637, | |
| "grad_norm": 0.1917556477695145, | |
| "learning_rate": 4.368054594696775e-05, | |
| "loss": 0.2443, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.7688311688311689, | |
| "grad_norm": 0.20969861600848638, | |
| "learning_rate": 4.3587062842733216e-05, | |
| "loss": 0.2341, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.7740259740259741, | |
| "grad_norm": 0.19871375239851857, | |
| "learning_rate": 4.349299497772945e-05, | |
| "loss": 0.2361, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.7792207792207793, | |
| "grad_norm": 0.19082750332598916, | |
| "learning_rate": 4.339834531139104e-05, | |
| "loss": 0.2316, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.7844155844155845, | |
| "grad_norm": 0.2177029161255871, | |
| "learning_rate": 4.330311682145645e-05, | |
| "loss": 0.2343, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.7896103896103897, | |
| "grad_norm": 0.20562958726540304, | |
| "learning_rate": 4.320731250387429e-05, | |
| "loss": 0.2401, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.7948051948051948, | |
| "grad_norm": 0.2070237852219627, | |
| "learning_rate": 4.311093537270905e-05, | |
| "loss": 0.2374, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.18967395002327114, | |
| "learning_rate": 4.301398846004634e-05, | |
| "loss": 0.2363, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.8051948051948052, | |
| "grad_norm": 0.1970271386066234, | |
| "learning_rate": 4.291647481589742e-05, | |
| "loss": 0.2302, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.8103896103896104, | |
| "grad_norm": 0.18673676151020974, | |
| "learning_rate": 4.28183975081033e-05, | |
| "loss": 0.2416, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.8155844155844156, | |
| "grad_norm": 0.2183111540642943, | |
| "learning_rate": 4.271975962223821e-05, | |
| "loss": 0.2342, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.8207792207792208, | |
| "grad_norm": 0.1792298886397136, | |
| "learning_rate": 4.2620564261512496e-05, | |
| "loss": 0.2388, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.825974025974026, | |
| "grad_norm": 0.21429193275126804, | |
| "learning_rate": 4.2520814546675037e-05, | |
| "loss": 0.2323, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.8311688311688312, | |
| "grad_norm": 0.1923357673969473, | |
| "learning_rate": 4.242051361591505e-05, | |
| "loss": 0.2398, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.8363636363636363, | |
| "grad_norm": 0.1825902322292911, | |
| "learning_rate": 4.2319664624763325e-05, | |
| "loss": 0.2355, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.8415584415584415, | |
| "grad_norm": 0.1708452665847616, | |
| "learning_rate": 4.2218270745993016e-05, | |
| "loss": 0.2361, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.8467532467532467, | |
| "grad_norm": 0.2003983431936864, | |
| "learning_rate": 4.211633516951975e-05, | |
| "loss": 0.237, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.8519480519480519, | |
| "grad_norm": 0.1809948763155965, | |
| "learning_rate": 4.201386110230134e-05, | |
| "loss": 0.2291, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.8571428571428571, | |
| "grad_norm": 0.19621979591943875, | |
| "learning_rate": 4.1910851768236825e-05, | |
| "loss": 0.2284, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.8623376623376623, | |
| "grad_norm": 0.20784597945629102, | |
| "learning_rate": 4.180731040806511e-05, | |
| "loss": 0.2359, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.8675324675324675, | |
| "grad_norm": 0.22581034014160772, | |
| "learning_rate": 4.170324027926297e-05, | |
| "loss": 0.2329, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.8727272727272727, | |
| "grad_norm": 0.1789163119753752, | |
| "learning_rate": 4.159864465594255e-05, | |
| "loss": 0.2338, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.8779220779220779, | |
| "grad_norm": 0.1949206924337472, | |
| "learning_rate": 4.1493526828748416e-05, | |
| "loss": 0.2392, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.8831168831168831, | |
| "grad_norm": 0.20147429000086556, | |
| "learning_rate": 4.1387890104754004e-05, | |
| "loss": 0.233, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.8883116883116883, | |
| "grad_norm": 0.1537005161376695, | |
| "learning_rate": 4.128173780735753e-05, | |
| "loss": 0.2291, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.8935064935064935, | |
| "grad_norm": 0.17777763693741433, | |
| "learning_rate": 4.117507327617751e-05, | |
| "loss": 0.2291, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.8987012987012987, | |
| "grad_norm": 0.174198062693491, | |
| "learning_rate": 4.1067899866947665e-05, | |
| "loss": 0.2294, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.9038961038961039, | |
| "grad_norm": 0.1884364748511166, | |
| "learning_rate": 4.096022095141132e-05, | |
| "loss": 0.235, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 0.1912652069094164, | |
| "learning_rate": 4.085203991721535e-05, | |
| "loss": 0.2318, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.9142857142857143, | |
| "grad_norm": 0.206558739242339, | |
| "learning_rate": 4.0743360167803614e-05, | |
| "loss": 0.2317, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.9194805194805195, | |
| "grad_norm": 0.18252166114267931, | |
| "learning_rate": 4.063418512230987e-05, | |
| "loss": 0.2346, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.9246753246753247, | |
| "grad_norm": 0.18463778266166328, | |
| "learning_rate": 4.0524518215450166e-05, | |
| "loss": 0.2306, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.9298701298701298, | |
| "grad_norm": 0.207923278938462, | |
| "learning_rate": 4.041436289741489e-05, | |
| "loss": 0.2301, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.935064935064935, | |
| "grad_norm": 0.25335695776490813, | |
| "learning_rate": 4.0303722633760085e-05, | |
| "loss": 0.2258, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.9402597402597402, | |
| "grad_norm": 0.16805426564943104, | |
| "learning_rate": 4.019260090529854e-05, | |
| "loss": 0.2284, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.9454545454545454, | |
| "grad_norm": 0.1884837989936669, | |
| "learning_rate": 4.008100120799019e-05, | |
| "loss": 0.2285, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.9506493506493506, | |
| "grad_norm": 0.19643081968195814, | |
| "learning_rate": 3.996892705283222e-05, | |
| "loss": 0.2354, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.9558441558441558, | |
| "grad_norm": 0.18949399067442121, | |
| "learning_rate": 3.9856381965748506e-05, | |
| "loss": 0.234, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.961038961038961, | |
| "grad_norm": 0.18422150470243814, | |
| "learning_rate": 3.974336948747879e-05, | |
| "loss": 0.228, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.9662337662337662, | |
| "grad_norm": 0.179837212988977, | |
| "learning_rate": 3.962989317346722e-05, | |
| "loss": 0.2283, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.9714285714285714, | |
| "grad_norm": 0.18190735457463206, | |
| "learning_rate": 3.951595659375048e-05, | |
| "loss": 0.2337, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.9766233766233766, | |
| "grad_norm": 0.17903757429753223, | |
| "learning_rate": 3.9401563332845545e-05, | |
| "loss": 0.2225, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.9818181818181818, | |
| "grad_norm": 0.18775008592274955, | |
| "learning_rate": 3.928671698963686e-05, | |
| "loss": 0.226, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.987012987012987, | |
| "grad_norm": 0.16101556678112905, | |
| "learning_rate": 3.917142117726312e-05, | |
| "loss": 0.2312, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.9922077922077922, | |
| "grad_norm": 0.18133929144854413, | |
| "learning_rate": 3.90556795230036e-05, | |
| "loss": 0.2264, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.9974025974025974, | |
| "grad_norm": 0.21238218367672568, | |
| "learning_rate": 3.893949566816404e-05, | |
| "loss": 0.2273, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.0025974025974025, | |
| "grad_norm": 0.180697571813627, | |
| "learning_rate": 3.8822873267962115e-05, | |
| "loss": 0.2148, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.0077922077922077, | |
| "grad_norm": 0.18289201426564544, | |
| "learning_rate": 3.870581599141239e-05, | |
| "loss": 0.1982, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.0129870129870129, | |
| "grad_norm": 0.1916290587115548, | |
| "learning_rate": 3.858832752121093e-05, | |
| "loss": 0.1972, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.018181818181818, | |
| "grad_norm": 0.16005158183997184, | |
| "learning_rate": 3.847041155361941e-05, | |
| "loss": 0.1945, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.0233766233766233, | |
| "grad_norm": 0.1933154004002062, | |
| "learning_rate": 3.835207179834886e-05, | |
| "loss": 0.1957, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.0285714285714285, | |
| "grad_norm": 0.19020919057253263, | |
| "learning_rate": 3.823331197844293e-05, | |
| "loss": 0.1997, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.0337662337662337, | |
| "grad_norm": 0.19051536230519941, | |
| "learning_rate": 3.8114135830160766e-05, | |
| "loss": 0.1965, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.0389610389610389, | |
| "grad_norm": 0.2061945657419727, | |
| "learning_rate": 3.799454710285949e-05, | |
| "loss": 0.199, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.044155844155844, | |
| "grad_norm": 0.1634129561802415, | |
| "learning_rate": 3.787454955887619e-05, | |
| "loss": 0.1979, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.0493506493506493, | |
| "grad_norm": 0.19104378460061372, | |
| "learning_rate": 3.775414697340962e-05, | |
| "loss": 0.1976, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.0545454545454545, | |
| "grad_norm": 0.17070052143672057, | |
| "learning_rate": 3.763334313440134e-05, | |
| "loss": 0.1958, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.0597402597402596, | |
| "grad_norm": 0.18748046307285904, | |
| "learning_rate": 3.7512141842416674e-05, | |
| "loss": 0.1941, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.0649350649350648, | |
| "grad_norm": 0.1989042435608863, | |
| "learning_rate": 3.739054691052501e-05, | |
| "loss": 0.1964, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.07012987012987, | |
| "grad_norm": 0.21700073670750156, | |
| "learning_rate": 3.726856216417992e-05, | |
| "loss": 0.1987, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.0753246753246752, | |
| "grad_norm": 0.1899511809741937, | |
| "learning_rate": 3.71461914410988e-05, | |
| "loss": 0.1972, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.0805194805194804, | |
| "grad_norm": 0.16164124372624425, | |
| "learning_rate": 3.702343859114208e-05, | |
| "loss": 0.196, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.0857142857142856, | |
| "grad_norm": 0.1962886097471118, | |
| "learning_rate": 3.690030747619218e-05, | |
| "loss": 0.1972, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 1.0909090909090908, | |
| "grad_norm": 0.20624255109168987, | |
| "learning_rate": 3.6776801970031956e-05, | |
| "loss": 0.1975, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.096103896103896, | |
| "grad_norm": 0.17127975336052367, | |
| "learning_rate": 3.665292595822286e-05, | |
| "loss": 0.1969, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 1.1012987012987012, | |
| "grad_norm": 0.22765509302500941, | |
| "learning_rate": 3.6528683337982675e-05, | |
| "loss": 0.204, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 1.1064935064935064, | |
| "grad_norm": 0.17313358078957405, | |
| "learning_rate": 3.640407801806292e-05, | |
| "loss": 0.2018, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 1.1116883116883116, | |
| "grad_norm": 0.16881351547306925, | |
| "learning_rate": 3.62791139186259e-05, | |
| "loss": 0.195, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 1.1168831168831168, | |
| "grad_norm": 0.1639645171706093, | |
| "learning_rate": 3.6153794971121305e-05, | |
| "loss": 0.1961, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.122077922077922, | |
| "grad_norm": 0.15232520727472273, | |
| "learning_rate": 3.602812511816262e-05, | |
| "loss": 0.1966, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.1272727272727272, | |
| "grad_norm": 0.1617732944247109, | |
| "learning_rate": 3.590210831340297e-05, | |
| "loss": 0.1994, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 1.1324675324675324, | |
| "grad_norm": 0.1614529353275871, | |
| "learning_rate": 3.577574852141089e-05, | |
| "loss": 0.1964, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.1376623376623376, | |
| "grad_norm": 0.1678102166459782, | |
| "learning_rate": 3.564904971754546e-05, | |
| "loss": 0.2012, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 1.1428571428571428, | |
| "grad_norm": 0.20333612359210218, | |
| "learning_rate": 3.552201588783127e-05, | |
| "loss": 0.1956, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.148051948051948, | |
| "grad_norm": 0.17535467832478063, | |
| "learning_rate": 3.5394651028833084e-05, | |
| "loss": 0.1967, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 1.1532467532467532, | |
| "grad_norm": 0.16637781141145597, | |
| "learning_rate": 3.526695914753002e-05, | |
| "loss": 0.2002, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.1584415584415584, | |
| "grad_norm": 0.17958171262199632, | |
| "learning_rate": 3.5138944261189545e-05, | |
| "loss": 0.1995, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.1636363636363636, | |
| "grad_norm": 0.16457847074469398, | |
| "learning_rate": 3.501061039724106e-05, | |
| "loss": 0.1961, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.1688311688311688, | |
| "grad_norm": 0.17382166641018432, | |
| "learning_rate": 3.48819615931492e-05, | |
| "loss": 0.1966, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.174025974025974, | |
| "grad_norm": 0.1796672158320363, | |
| "learning_rate": 3.475300189628685e-05, | |
| "loss": 0.2005, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.1792207792207792, | |
| "grad_norm": 0.19277006012807987, | |
| "learning_rate": 3.4623735363807706e-05, | |
| "loss": 0.1973, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 1.1844155844155844, | |
| "grad_norm": 0.16718509709095522, | |
| "learning_rate": 3.449416606251878e-05, | |
| "loss": 0.1966, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.1896103896103896, | |
| "grad_norm": 0.15948163188018513, | |
| "learning_rate": 3.436429806875236e-05, | |
| "loss": 0.1951, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 1.1948051948051948, | |
| "grad_norm": 0.14931342836105496, | |
| "learning_rate": 3.423413546823776e-05, | |
| "loss": 0.1981, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 0.17649267762779447, | |
| "learning_rate": 3.410368235597285e-05, | |
| "loss": 0.1965, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 1.2051948051948052, | |
| "grad_norm": 0.18378320454721475, | |
| "learning_rate": 3.3972942836095146e-05, | |
| "loss": 0.1984, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.2103896103896103, | |
| "grad_norm": 0.16895275790268519, | |
| "learning_rate": 3.3841921021752764e-05, | |
| "loss": 0.1973, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 1.2155844155844155, | |
| "grad_norm": 0.16837240010707993, | |
| "learning_rate": 3.3710621034974966e-05, | |
| "loss": 0.1996, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.2207792207792207, | |
| "grad_norm": 0.17505091322912789, | |
| "learning_rate": 3.357904700654248e-05, | |
| "loss": 0.1983, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.225974025974026, | |
| "grad_norm": 0.16274246547553728, | |
| "learning_rate": 3.3447203075857596e-05, | |
| "loss": 0.1999, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.2311688311688311, | |
| "grad_norm": 0.17154176441709548, | |
| "learning_rate": 3.3315093390813856e-05, | |
| "loss": 0.1958, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 1.2363636363636363, | |
| "grad_norm": 0.1661630930719061, | |
| "learning_rate": 3.318272210766564e-05, | |
| "loss": 0.1952, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 1.2415584415584415, | |
| "grad_norm": 0.17507230307092086, | |
| "learning_rate": 3.3050093390897355e-05, | |
| "loss": 0.1949, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 1.2467532467532467, | |
| "grad_norm": 0.17495618433072496, | |
| "learning_rate": 3.291721141309241e-05, | |
| "loss": 0.2, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.251948051948052, | |
| "grad_norm": 0.16517186947249202, | |
| "learning_rate": 3.278408035480202e-05, | |
| "loss": 0.1958, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 1.2571428571428571, | |
| "grad_norm": 0.15684230284407272, | |
| "learning_rate": 3.2650704404413556e-05, | |
| "loss": 0.1953, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 1.2623376623376623, | |
| "grad_norm": 0.18306937206961088, | |
| "learning_rate": 3.251708775801893e-05, | |
| "loss": 0.1951, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 1.2675324675324675, | |
| "grad_norm": 0.15796065260907452, | |
| "learning_rate": 3.2383234619282456e-05, | |
| "loss": 0.1975, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 1.2727272727272727, | |
| "grad_norm": 0.1590509287945099, | |
| "learning_rate": 3.2249149199308645e-05, | |
| "loss": 0.1966, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.277922077922078, | |
| "grad_norm": 0.1578980037591031, | |
| "learning_rate": 3.211483571650974e-05, | |
| "loss": 0.1942, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.283116883116883, | |
| "grad_norm": 0.15431493580944897, | |
| "learning_rate": 3.198029839647297e-05, | |
| "loss": 0.1963, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 1.2883116883116883, | |
| "grad_norm": 0.15203081144923972, | |
| "learning_rate": 3.184554147182764e-05, | |
| "loss": 0.192, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.2935064935064935, | |
| "grad_norm": 0.1648836808861431, | |
| "learning_rate": 3.171056918211195e-05, | |
| "loss": 0.1952, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.2987012987012987, | |
| "grad_norm": 0.17970050092893974, | |
| "learning_rate": 3.157538577363962e-05, | |
| "loss": 0.1953, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.3038961038961039, | |
| "grad_norm": 0.18613322149465572, | |
| "learning_rate": 3.1439995499366285e-05, | |
| "loss": 0.1932, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 1.309090909090909, | |
| "grad_norm": 0.17435271630087446, | |
| "learning_rate": 3.130440261875575e-05, | |
| "loss": 0.1925, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.3142857142857143, | |
| "grad_norm": 0.17016839586778193, | |
| "learning_rate": 3.1168611397645894e-05, | |
| "loss": 0.194, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 1.3194805194805195, | |
| "grad_norm": 0.16245806925048073, | |
| "learning_rate": 3.103262610811455e-05, | |
| "loss": 0.1916, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 1.3246753246753247, | |
| "grad_norm": 0.14467063077481634, | |
| "learning_rate": 3.0896451028345054e-05, | |
| "loss": 0.1926, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.3298701298701299, | |
| "grad_norm": 0.1564688986937739, | |
| "learning_rate": 3.076009044249165e-05, | |
| "loss": 0.1936, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.335064935064935, | |
| "grad_norm": 0.16082121945843722, | |
| "learning_rate": 3.0623548640544747e-05, | |
| "loss": 0.1967, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 1.3402597402597403, | |
| "grad_norm": 0.14645012653654418, | |
| "learning_rate": 3.0486829918195902e-05, | |
| "loss": 0.1948, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.3454545454545455, | |
| "grad_norm": 0.1554338631637541, | |
| "learning_rate": 3.0349938576702734e-05, | |
| "loss": 0.1954, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 1.3506493506493507, | |
| "grad_norm": 0.190100650523074, | |
| "learning_rate": 3.021287892275352e-05, | |
| "loss": 0.1994, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.3558441558441559, | |
| "grad_norm": 0.16042048393322747, | |
| "learning_rate": 3.0075655268331792e-05, | |
| "loss": 0.195, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 1.361038961038961, | |
| "grad_norm": 0.15621284876054406, | |
| "learning_rate": 2.9938271930580637e-05, | |
| "loss": 0.1945, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 1.3662337662337662, | |
| "grad_norm": 0.1619700504975918, | |
| "learning_rate": 2.980073323166686e-05, | |
| "loss": 0.1945, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 1.3714285714285714, | |
| "grad_norm": 0.16844868277250866, | |
| "learning_rate": 2.9663043498645055e-05, | |
| "loss": 0.1968, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.3766233766233766, | |
| "grad_norm": 0.16162842712288464, | |
| "learning_rate": 2.9525207063321407e-05, | |
| "loss": 0.1929, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.3818181818181818, | |
| "grad_norm": 0.15290543489779018, | |
| "learning_rate": 2.938722826211749e-05, | |
| "loss": 0.196, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.387012987012987, | |
| "grad_norm": 0.15019873611737544, | |
| "learning_rate": 2.9249111435933774e-05, | |
| "loss": 0.1971, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 1.3922077922077922, | |
| "grad_norm": 0.16303163651181884, | |
| "learning_rate": 2.9110860930013086e-05, | |
| "loss": 0.1953, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.3974025974025974, | |
| "grad_norm": 0.1664726514638493, | |
| "learning_rate": 2.8972481093803904e-05, | |
| "loss": 0.1908, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 1.4025974025974026, | |
| "grad_norm": 0.15130472315263208, | |
| "learning_rate": 2.8833976280823518e-05, | |
| "loss": 0.1921, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.4077922077922078, | |
| "grad_norm": 0.15072736412248944, | |
| "learning_rate": 2.8695350848521075e-05, | |
| "loss": 0.1978, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 1.412987012987013, | |
| "grad_norm": 0.15699378092430566, | |
| "learning_rate": 2.8556609158140463e-05, | |
| "loss": 0.1964, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.4181818181818182, | |
| "grad_norm": 0.16242931571448047, | |
| "learning_rate": 2.8417755574583137e-05, | |
| "loss": 0.1923, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 1.4233766233766234, | |
| "grad_norm": 0.17561234437581505, | |
| "learning_rate": 2.827879446627079e-05, | |
| "loss": 0.1968, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.4285714285714286, | |
| "grad_norm": 0.1960744043552905, | |
| "learning_rate": 2.8139730205007885e-05, | |
| "loss": 0.1957, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.4337662337662338, | |
| "grad_norm": 0.17235052078230464, | |
| "learning_rate": 2.8000567165844166e-05, | |
| "loss": 0.1915, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.438961038961039, | |
| "grad_norm": 0.15593868664885746, | |
| "learning_rate": 2.786130972693699e-05, | |
| "loss": 0.1948, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 1.4441558441558442, | |
| "grad_norm": 0.16960231230641076, | |
| "learning_rate": 2.7721962269413577e-05, | |
| "loss": 0.1939, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.4493506493506494, | |
| "grad_norm": 0.1696728165311113, | |
| "learning_rate": 2.7582529177233203e-05, | |
| "loss": 0.1941, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 1.4545454545454546, | |
| "grad_norm": 0.16089525009448685, | |
| "learning_rate": 2.7443014837049247e-05, | |
| "loss": 0.1949, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.4597402597402598, | |
| "grad_norm": 0.14342624961264755, | |
| "learning_rate": 2.7303423638071223e-05, | |
| "loss": 0.1907, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.464935064935065, | |
| "grad_norm": 0.16377445863374662, | |
| "learning_rate": 2.7163759971926668e-05, | |
| "loss": 0.1929, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.4701298701298702, | |
| "grad_norm": 0.14893765229334818, | |
| "learning_rate": 2.7024028232522962e-05, | |
| "loss": 0.1892, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.4753246753246754, | |
| "grad_norm": 0.15599419394354383, | |
| "learning_rate": 2.688423281590913e-05, | |
| "loss": 0.1957, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.4805194805194806, | |
| "grad_norm": 0.15330477393788722, | |
| "learning_rate": 2.6744378120137526e-05, | |
| "loss": 0.1895, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.4857142857142858, | |
| "grad_norm": 0.17621145634836316, | |
| "learning_rate": 2.660446854512545e-05, | |
| "loss": 0.1945, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.490909090909091, | |
| "grad_norm": 0.15862268063341767, | |
| "learning_rate": 2.6464508492516742e-05, | |
| "loss": 0.1942, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.4961038961038962, | |
| "grad_norm": 0.17315942606336032, | |
| "learning_rate": 2.6324502365543313e-05, | |
| "loss": 0.1905, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.5012987012987011, | |
| "grad_norm": 0.16945429053310193, | |
| "learning_rate": 2.618445456888658e-05, | |
| "loss": 0.1915, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.5064935064935066, | |
| "grad_norm": 0.16427452855902688, | |
| "learning_rate": 2.604436950853893e-05, | |
| "loss": 0.191, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.5116883116883115, | |
| "grad_norm": 0.14123750251448794, | |
| "learning_rate": 2.5904251591665078e-05, | |
| "loss": 0.1902, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 1.516883116883117, | |
| "grad_norm": 0.15868241668499022, | |
| "learning_rate": 2.5764105226463447e-05, | |
| "loss": 0.1916, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.522077922077922, | |
| "grad_norm": 0.17175562912305398, | |
| "learning_rate": 2.562393482202744e-05, | |
| "loss": 0.1918, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 1.5272727272727273, | |
| "grad_norm": 0.1506170694591851, | |
| "learning_rate": 2.5483744788206755e-05, | |
| "loss": 0.1924, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.5324675324675323, | |
| "grad_norm": 0.1573731670528784, | |
| "learning_rate": 2.5343539535468665e-05, | |
| "loss": 0.1933, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.5376623376623377, | |
| "grad_norm": 0.15102349530813702, | |
| "learning_rate": 2.5203323474759188e-05, | |
| "loss": 0.1878, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.5428571428571427, | |
| "grad_norm": 0.15486309972010434, | |
| "learning_rate": 2.5063101017364433e-05, | |
| "loss": 0.1924, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 1.5480519480519481, | |
| "grad_norm": 0.16776236741984477, | |
| "learning_rate": 2.4922876574771705e-05, | |
| "loss": 0.1923, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.553246753246753, | |
| "grad_norm": 0.14493538153609314, | |
| "learning_rate": 2.4782654558530767e-05, | |
| "loss": 0.1895, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 1.5584415584415585, | |
| "grad_norm": 0.1521099389385944, | |
| "learning_rate": 2.464243938011509e-05, | |
| "loss": 0.1953, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.5636363636363635, | |
| "grad_norm": 0.1520228941252825, | |
| "learning_rate": 2.4502235450782976e-05, | |
| "loss": 0.1918, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 1.568831168831169, | |
| "grad_norm": 0.14357994502338198, | |
| "learning_rate": 2.4362047181438835e-05, | |
| "loss": 0.1893, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.5740259740259739, | |
| "grad_norm": 0.1419085126020348, | |
| "learning_rate": 2.4221878982494423e-05, | |
| "loss": 0.1954, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 1.5792207792207793, | |
| "grad_norm": 0.149414700589382, | |
| "learning_rate": 2.4081735263730047e-05, | |
| "loss": 0.1918, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.5844155844155843, | |
| "grad_norm": 0.14522347482433445, | |
| "learning_rate": 2.3941620434155854e-05, | |
| "loss": 0.1915, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.5896103896103897, | |
| "grad_norm": 0.13817121165231117, | |
| "learning_rate": 2.380153890187314e-05, | |
| "loss": 0.1931, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.5948051948051947, | |
| "grad_norm": 0.1512561465765382, | |
| "learning_rate": 2.366149507393563e-05, | |
| "loss": 0.1882, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.15918442476721073, | |
| "learning_rate": 2.352149335621084e-05, | |
| "loss": 0.188, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 1.605194805194805, | |
| "grad_norm": 0.15634396731194353, | |
| "learning_rate": 2.3381538153241474e-05, | |
| "loss": 0.1867, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 1.6103896103896105, | |
| "grad_norm": 0.1586607565553087, | |
| "learning_rate": 2.3241633868106878e-05, | |
| "loss": 0.1939, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.6155844155844155, | |
| "grad_norm": 0.1560480985454943, | |
| "learning_rate": 2.310178490228446e-05, | |
| "loss": 0.1913, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 1.6207792207792209, | |
| "grad_norm": 0.1451574194093105, | |
| "learning_rate": 2.296199565551125e-05, | |
| "loss": 0.1901, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.6259740259740258, | |
| "grad_norm": 0.16572964821298886, | |
| "learning_rate": 2.2822270525645507e-05, | |
| "loss": 0.1921, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 1.6311688311688313, | |
| "grad_norm": 0.1526008677355272, | |
| "learning_rate": 2.268261390852831e-05, | |
| "loss": 0.1911, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.6363636363636362, | |
| "grad_norm": 0.15191599502910447, | |
| "learning_rate": 2.254303019784526e-05, | |
| "loss": 0.1918, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.6415584415584417, | |
| "grad_norm": 0.1463054127627514, | |
| "learning_rate": 2.240352378498834e-05, | |
| "loss": 0.1909, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 1.6467532467532466, | |
| "grad_norm": 0.14379825200348653, | |
| "learning_rate": 2.226409905891763e-05, | |
| "loss": 0.1889, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 1.651948051948052, | |
| "grad_norm": 0.20939441149322338, | |
| "learning_rate": 2.2124760406023315e-05, | |
| "loss": 0.19, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.657142857142857, | |
| "grad_norm": 0.14501247701314718, | |
| "learning_rate": 2.198551220998768e-05, | |
| "loss": 0.1873, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 1.6623376623376624, | |
| "grad_norm": 0.14787111177950868, | |
| "learning_rate": 2.1846358851647162e-05, | |
| "loss": 0.1892, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.6675324675324674, | |
| "grad_norm": 0.14627855688686608, | |
| "learning_rate": 2.1707304708854547e-05, | |
| "loss": 0.1957, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 1.6727272727272728, | |
| "grad_norm": 0.14191753318818107, | |
| "learning_rate": 2.156835415634123e-05, | |
| "loss": 0.1862, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 1.6779220779220778, | |
| "grad_norm": 0.14766613612356988, | |
| "learning_rate": 2.1429511565579612e-05, | |
| "loss": 0.188, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 1.6831168831168832, | |
| "grad_norm": 0.13830710205530652, | |
| "learning_rate": 2.129078130464553e-05, | |
| "loss": 0.1913, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 1.6883116883116882, | |
| "grad_norm": 0.14720437692164431, | |
| "learning_rate": 2.1152167738080848e-05, | |
| "loss": 0.1898, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.6935064935064936, | |
| "grad_norm": 0.14698633131391733, | |
| "learning_rate": 2.1013675226756178e-05, | |
| "loss": 0.1924, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 1.6987012987012986, | |
| "grad_norm": 0.1419475844570598, | |
| "learning_rate": 2.0875308127733634e-05, | |
| "loss": 0.1894, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 1.703896103896104, | |
| "grad_norm": 0.14518860678193932, | |
| "learning_rate": 2.0737070794129776e-05, | |
| "loss": 0.1897, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 1.709090909090909, | |
| "grad_norm": 0.13929880201308206, | |
| "learning_rate": 2.059896757497869e-05, | |
| "loss": 0.1894, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 1.7142857142857144, | |
| "grad_norm": 0.1560629988591411, | |
| "learning_rate": 2.046100281509511e-05, | |
| "loss": 0.1883, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.7194805194805194, | |
| "grad_norm": 0.14187945981501732, | |
| "learning_rate": 2.0323180854937775e-05, | |
| "loss": 0.1893, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 1.7246753246753248, | |
| "grad_norm": 0.13855333314294954, | |
| "learning_rate": 2.018550603047281e-05, | |
| "loss": 0.1851, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 1.7298701298701298, | |
| "grad_norm": 0.15293602282420193, | |
| "learning_rate": 2.004798267303743e-05, | |
| "loss": 0.1919, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 1.7350649350649352, | |
| "grad_norm": 0.1510311020666268, | |
| "learning_rate": 1.9910615109203533e-05, | |
| "loss": 0.188, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.7402597402597402, | |
| "grad_norm": 0.15491321483535742, | |
| "learning_rate": 1.977340766064169e-05, | |
| "loss": 0.1892, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.7454545454545456, | |
| "grad_norm": 0.14485954144018753, | |
| "learning_rate": 1.9636364643985132e-05, | |
| "loss": 0.1906, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.7506493506493506, | |
| "grad_norm": 0.1594394775258021, | |
| "learning_rate": 1.949949037069396e-05, | |
| "loss": 0.193, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 1.755844155844156, | |
| "grad_norm": 0.14909440982518038, | |
| "learning_rate": 1.9362789146919498e-05, | |
| "loss": 0.1891, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 1.761038961038961, | |
| "grad_norm": 0.15409122998170965, | |
| "learning_rate": 1.922626527336884e-05, | |
| "loss": 0.1887, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 1.7662337662337664, | |
| "grad_norm": 0.14506982813181468, | |
| "learning_rate": 1.90899230451695e-05, | |
| "loss": 0.1892, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.7714285714285714, | |
| "grad_norm": 0.1429677030388747, | |
| "learning_rate": 1.8953766751734343e-05, | |
| "loss": 0.1863, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 1.7766233766233768, | |
| "grad_norm": 0.14323572969542486, | |
| "learning_rate": 1.8817800676626572e-05, | |
| "loss": 0.1874, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 1.7818181818181817, | |
| "grad_norm": 0.13341435839039215, | |
| "learning_rate": 1.8682029097425063e-05, | |
| "loss": 0.1915, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 1.7870129870129872, | |
| "grad_norm": 0.13841332912364393, | |
| "learning_rate": 1.8546456285589663e-05, | |
| "loss": 0.1871, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 1.7922077922077921, | |
| "grad_norm": 0.14609137476892148, | |
| "learning_rate": 1.8411086506326893e-05, | |
| "loss": 0.1878, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.7974025974025976, | |
| "grad_norm": 0.14038564796108288, | |
| "learning_rate": 1.8275924018455765e-05, | |
| "loss": 0.1884, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 1.8025974025974025, | |
| "grad_norm": 0.14343484338333484, | |
| "learning_rate": 1.814097307427374e-05, | |
| "loss": 0.1915, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 1.807792207792208, | |
| "grad_norm": 0.1396561043467835, | |
| "learning_rate": 1.8006237919423004e-05, | |
| "loss": 0.1916, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 1.812987012987013, | |
| "grad_norm": 0.142463801296892, | |
| "learning_rate": 1.7871722792756852e-05, | |
| "loss": 0.1876, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 1.8181818181818183, | |
| "grad_norm": 0.1336858975526505, | |
| "learning_rate": 1.7737431926206383e-05, | |
| "loss": 0.1851, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.8233766233766233, | |
| "grad_norm": 0.1426293194672696, | |
| "learning_rate": 1.760336954464729e-05, | |
| "loss": 0.1865, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 1.8285714285714287, | |
| "grad_norm": 0.15015722852688704, | |
| "learning_rate": 1.7469539865767015e-05, | |
| "loss": 0.1881, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.8337662337662337, | |
| "grad_norm": 0.13366343514009785, | |
| "learning_rate": 1.7335947099932022e-05, | |
| "loss": 0.1852, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 1.838961038961039, | |
| "grad_norm": 0.14856809610613397, | |
| "learning_rate": 1.720259545005533e-05, | |
| "loss": 0.1843, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 1.844155844155844, | |
| "grad_norm": 0.1448912483018553, | |
| "learning_rate": 1.7069489111464304e-05, | |
| "loss": 0.1864, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.8493506493506493, | |
| "grad_norm": 0.1359780492493188, | |
| "learning_rate": 1.693663227176867e-05, | |
| "loss": 0.1877, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.8545454545454545, | |
| "grad_norm": 0.13772337189235462, | |
| "learning_rate": 1.680402911072874e-05, | |
| "loss": 0.1873, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 1.8597402597402597, | |
| "grad_norm": 0.14393209031617119, | |
| "learning_rate": 1.6671683800123932e-05, | |
| "loss": 0.1839, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.864935064935065, | |
| "grad_norm": 0.13956682462779404, | |
| "learning_rate": 1.6539600503621572e-05, | |
| "loss": 0.1874, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 1.87012987012987, | |
| "grad_norm": 0.1391253168614093, | |
| "learning_rate": 1.6407783376645803e-05, | |
| "loss": 0.1845, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.8753246753246753, | |
| "grad_norm": 0.13132359363192808, | |
| "learning_rate": 1.6276236566246916e-05, | |
| "loss": 0.1873, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 1.8805194805194805, | |
| "grad_norm": 0.14070930287195552, | |
| "learning_rate": 1.614496421097091e-05, | |
| "loss": 0.1884, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.8857142857142857, | |
| "grad_norm": 0.14427194349525638, | |
| "learning_rate": 1.6013970440729204e-05, | |
| "loss": 0.1889, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 1.8909090909090909, | |
| "grad_norm": 0.13521516110226986, | |
| "learning_rate": 1.588325937666878e-05, | |
| "loss": 0.1839, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.896103896103896, | |
| "grad_norm": 0.13376048212412542, | |
| "learning_rate": 1.5752835131042494e-05, | |
| "loss": 0.1839, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.9012987012987013, | |
| "grad_norm": 0.14138808345532902, | |
| "learning_rate": 1.5622701807079733e-05, | |
| "loss": 0.1871, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.9064935064935065, | |
| "grad_norm": 0.13070839091776826, | |
| "learning_rate": 1.5492863498857287e-05, | |
| "loss": 0.1823, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 1.9116883116883117, | |
| "grad_norm": 0.13600301900618547, | |
| "learning_rate": 1.5363324291170545e-05, | |
| "loss": 0.1868, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.9168831168831169, | |
| "grad_norm": 0.1400192321902119, | |
| "learning_rate": 1.5234088259405056e-05, | |
| "loss": 0.1867, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 1.922077922077922, | |
| "grad_norm": 0.13023052808570348, | |
| "learning_rate": 1.5105159469408209e-05, | |
| "loss": 0.1821, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.9272727272727272, | |
| "grad_norm": 0.14335411330199735, | |
| "learning_rate": 1.4976541977361402e-05, | |
| "loss": 0.189, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 1.9324675324675324, | |
| "grad_norm": 0.1336525441650799, | |
| "learning_rate": 1.48482398296524e-05, | |
| "loss": 0.1834, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.9376623376623376, | |
| "grad_norm": 0.14165962885271705, | |
| "learning_rate": 1.4720257062748022e-05, | |
| "loss": 0.1866, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 1.9428571428571428, | |
| "grad_norm": 0.13695354269783466, | |
| "learning_rate": 1.4592597703067187e-05, | |
| "loss": 0.1832, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 1.948051948051948, | |
| "grad_norm": 0.13868718580766137, | |
| "learning_rate": 1.446526576685418e-05, | |
| "loss": 0.183, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.9532467532467532, | |
| "grad_norm": 0.13843203988888092, | |
| "learning_rate": 1.4338265260052387e-05, | |
| "loss": 0.1842, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.9584415584415584, | |
| "grad_norm": 0.13271796860429588, | |
| "learning_rate": 1.4211600178178174e-05, | |
| "loss": 0.1832, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 1.9636363636363636, | |
| "grad_norm": 0.14628648866036364, | |
| "learning_rate": 1.4085274506195245e-05, | |
| "loss": 0.186, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.9688311688311688, | |
| "grad_norm": 0.14975084928952903, | |
| "learning_rate": 1.3959292218389248e-05, | |
| "loss": 0.1842, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 1.974025974025974, | |
| "grad_norm": 0.1304201723081621, | |
| "learning_rate": 1.383365727824275e-05, | |
| "loss": 0.1835, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.9792207792207792, | |
| "grad_norm": 0.15224990383617293, | |
| "learning_rate": 1.3708373638310531e-05, | |
| "loss": 0.1858, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 1.9844155844155844, | |
| "grad_norm": 0.13275223694042823, | |
| "learning_rate": 1.358344524009528e-05, | |
| "loss": 0.1832, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.9896103896103896, | |
| "grad_norm": 0.14813458539626156, | |
| "learning_rate": 1.3458876013923499e-05, | |
| "loss": 0.1866, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 1.9948051948051948, | |
| "grad_norm": 0.1330421923733361, | |
| "learning_rate": 1.3334669878821948e-05, | |
| "loss": 0.1827, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.14316381473328288, | |
| "learning_rate": 1.3210830742394298e-05, | |
| "loss": 0.1799, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 2.005194805194805, | |
| "grad_norm": 0.1512401129922707, | |
| "learning_rate": 1.3087362500698237e-05, | |
| "loss": 0.154, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 2.0103896103896104, | |
| "grad_norm": 0.14562927746343074, | |
| "learning_rate": 1.2964269038122836e-05, | |
| "loss": 0.1505, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 2.0155844155844154, | |
| "grad_norm": 0.13594065019007026, | |
| "learning_rate": 1.2841554227266373e-05, | |
| "loss": 0.151, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 2.020779220779221, | |
| "grad_norm": 0.14424278321251643, | |
| "learning_rate": 1.2719221928814545e-05, | |
| "loss": 0.1489, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 2.0259740259740258, | |
| "grad_norm": 0.14407964341771956, | |
| "learning_rate": 1.2597275991418928e-05, | |
| "loss": 0.1534, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.031168831168831, | |
| "grad_norm": 0.13922191062743117, | |
| "learning_rate": 1.247572025157595e-05, | |
| "loss": 0.1498, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 2.036363636363636, | |
| "grad_norm": 0.1315526163391668, | |
| "learning_rate": 1.2354558533506176e-05, | |
| "loss": 0.1486, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 2.0415584415584416, | |
| "grad_norm": 0.14073786153063886, | |
| "learning_rate": 1.2233794649033991e-05, | |
| "loss": 0.1561, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 2.0467532467532465, | |
| "grad_norm": 0.1428562906764405, | |
| "learning_rate": 1.211343239746768e-05, | |
| "loss": 0.1509, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 2.051948051948052, | |
| "grad_norm": 0.14280097216113558, | |
| "learning_rate": 1.199347556547993e-05, | |
| "loss": 0.1517, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 2.057142857142857, | |
| "grad_norm": 0.13105429870703852, | |
| "learning_rate": 1.187392792698864e-05, | |
| "loss": 0.1469, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 2.0623376623376624, | |
| "grad_norm": 0.14347982471711343, | |
| "learning_rate": 1.1754793243038239e-05, | |
| "loss": 0.1507, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 2.0675324675324673, | |
| "grad_norm": 0.14904614300033936, | |
| "learning_rate": 1.1636075261681315e-05, | |
| "loss": 0.1549, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 2.0727272727272728, | |
| "grad_norm": 0.13730655354719687, | |
| "learning_rate": 1.1517777717860776e-05, | |
| "loss": 0.1503, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 2.0779220779220777, | |
| "grad_norm": 0.14862501974595033, | |
| "learning_rate": 1.1399904333292269e-05, | |
| "loss": 0.1509, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.083116883116883, | |
| "grad_norm": 0.13183718490837706, | |
| "learning_rate": 1.1282458816347128e-05, | |
| "loss": 0.1509, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 2.088311688311688, | |
| "grad_norm": 0.13591710857386252, | |
| "learning_rate": 1.1165444861935701e-05, | |
| "loss": 0.1514, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 2.0935064935064935, | |
| "grad_norm": 0.13367182049390872, | |
| "learning_rate": 1.1048866151391102e-05, | |
| "loss": 0.1479, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 2.0987012987012985, | |
| "grad_norm": 0.13675222806448303, | |
| "learning_rate": 1.0932726352353393e-05, | |
| "loss": 0.1499, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 2.103896103896104, | |
| "grad_norm": 0.13838118900424312, | |
| "learning_rate": 1.081702911865423e-05, | |
| "loss": 0.1487, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.109090909090909, | |
| "grad_norm": 0.13434786073921717, | |
| "learning_rate": 1.0701778090201858e-05, | |
| "loss": 0.1533, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 2.1142857142857143, | |
| "grad_norm": 0.13473555962628936, | |
| "learning_rate": 1.0586976892866615e-05, | |
| "loss": 0.1505, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 2.1194805194805193, | |
| "grad_norm": 0.14196721586166652, | |
| "learning_rate": 1.0472629138366874e-05, | |
| "loss": 0.1468, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 2.1246753246753247, | |
| "grad_norm": 0.13373540922822927, | |
| "learning_rate": 1.0358738424155435e-05, | |
| "loss": 0.1514, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 2.1298701298701297, | |
| "grad_norm": 0.13735599472516719, | |
| "learning_rate": 1.024530833330629e-05, | |
| "loss": 0.1518, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.135064935064935, | |
| "grad_norm": 0.14328294051153082, | |
| "learning_rate": 1.0132342434401937e-05, | |
| "loss": 0.1507, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 2.14025974025974, | |
| "grad_norm": 0.1364465855704536, | |
| "learning_rate": 1.0019844281421107e-05, | |
| "loss": 0.1523, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 2.1454545454545455, | |
| "grad_norm": 0.1350058874546942, | |
| "learning_rate": 9.90781741362694e-06, | |
| "loss": 0.149, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 2.1506493506493505, | |
| "grad_norm": 0.12932693371267076, | |
| "learning_rate": 9.796265355455647e-06, | |
| "loss": 0.1535, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 2.155844155844156, | |
| "grad_norm": 0.13891031796141848, | |
| "learning_rate": 9.685191616405643e-06, | |
| "loss": 0.1511, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.161038961038961, | |
| "grad_norm": 0.12826558374453936, | |
| "learning_rate": 9.574599690927105e-06, | |
| "loss": 0.1497, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 2.1662337662337663, | |
| "grad_norm": 0.1356328538309424, | |
| "learning_rate": 9.46449305831204e-06, | |
| "loss": 0.1518, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 2.1714285714285713, | |
| "grad_norm": 0.1297425436242765, | |
| "learning_rate": 9.354875182584846e-06, | |
| "loss": 0.1534, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 2.1766233766233767, | |
| "grad_norm": 0.13530241943073487, | |
| "learning_rate": 9.245749512393334e-06, | |
| "loss": 0.1503, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 2.1818181818181817, | |
| "grad_norm": 0.1408560476379785, | |
| "learning_rate": 9.1371194809002e-06, | |
| "loss": 0.1492, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.187012987012987, | |
| "grad_norm": 0.13372249178503884, | |
| "learning_rate": 9.028988505675034e-06, | |
| "loss": 0.1507, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 2.192207792207792, | |
| "grad_norm": 0.13921966813026387, | |
| "learning_rate": 8.9213599885868e-06, | |
| "loss": 0.1531, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 2.1974025974025975, | |
| "grad_norm": 0.13262776415903957, | |
| "learning_rate": 8.814237315696817e-06, | |
| "loss": 0.1504, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 2.2025974025974024, | |
| "grad_norm": 0.1301186665851314, | |
| "learning_rate": 8.707623857152208e-06, | |
| "loss": 0.1492, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 2.207792207792208, | |
| "grad_norm": 0.13579689779095455, | |
| "learning_rate": 8.60152296707993e-06, | |
| "loss": 0.1521, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.212987012987013, | |
| "grad_norm": 0.14637447855770952, | |
| "learning_rate": 8.495937983481158e-06, | |
| "loss": 0.1487, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 2.2181818181818183, | |
| "grad_norm": 0.1289270701627187, | |
| "learning_rate": 8.390872228126362e-06, | |
| "loss": 0.1532, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 2.2233766233766232, | |
| "grad_norm": 0.13704790669730998, | |
| "learning_rate": 8.286329006450735e-06, | |
| "loss": 0.154, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 2.2285714285714286, | |
| "grad_norm": 0.1342079590854756, | |
| "learning_rate": 8.182311607450264e-06, | |
| "loss": 0.1519, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 2.2337662337662336, | |
| "grad_norm": 0.1289098530992212, | |
| "learning_rate": 8.078823303578198e-06, | |
| "loss": 0.1491, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.238961038961039, | |
| "grad_norm": 0.1332579940433215, | |
| "learning_rate": 7.9758673506421e-06, | |
| "loss": 0.1519, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 2.244155844155844, | |
| "grad_norm": 0.12978196786783464, | |
| "learning_rate": 7.87344698770148e-06, | |
| "loss": 0.1494, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 2.2493506493506494, | |
| "grad_norm": 0.1331325286342844, | |
| "learning_rate": 7.77156543696582e-06, | |
| "loss": 0.1496, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 2.2545454545454544, | |
| "grad_norm": 0.13175031314804772, | |
| "learning_rate": 7.670225903693229e-06, | |
| "loss": 0.1494, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 2.25974025974026, | |
| "grad_norm": 0.13252344104105662, | |
| "learning_rate": 7.5694315760896086e-06, | |
| "loss": 0.1501, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 2.264935064935065, | |
| "grad_norm": 0.1292549372813496, | |
| "learning_rate": 7.469185625208347e-06, | |
| "loss": 0.1493, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 2.27012987012987, | |
| "grad_norm": 0.14892873039516652, | |
| "learning_rate": 7.369491204850537e-06, | |
| "loss": 0.151, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 2.275324675324675, | |
| "grad_norm": 0.1334849414038533, | |
| "learning_rate": 7.270351451465806e-06, | |
| "loss": 0.1516, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 2.2805194805194806, | |
| "grad_norm": 0.13092908930580768, | |
| "learning_rate": 7.171769484053575e-06, | |
| "loss": 0.1471, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 2.2857142857142856, | |
| "grad_norm": 0.13163937976410825, | |
| "learning_rate": 7.0737484040649864e-06, | |
| "loss": 0.1493, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.290909090909091, | |
| "grad_norm": 0.12981020405290916, | |
| "learning_rate": 6.9762912953052706e-06, | |
| "loss": 0.1554, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 2.296103896103896, | |
| "grad_norm": 0.13098768915549264, | |
| "learning_rate": 6.879401223836823e-06, | |
| "loss": 0.1493, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 2.3012987012987014, | |
| "grad_norm": 0.4012203963638076, | |
| "learning_rate": 6.783081237882649e-06, | |
| "loss": 0.1485, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 2.3064935064935064, | |
| "grad_norm": 0.13383556830226412, | |
| "learning_rate": 6.68733436773051e-06, | |
| "loss": 0.1496, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 2.311688311688312, | |
| "grad_norm": 0.1333510593064724, | |
| "learning_rate": 6.592163625637582e-06, | |
| "loss": 0.1541, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 2.3168831168831168, | |
| "grad_norm": 0.1565215404000228, | |
| "learning_rate": 6.497572005735689e-06, | |
| "loss": 0.1501, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 2.322077922077922, | |
| "grad_norm": 0.13259468650053935, | |
| "learning_rate": 6.4035624839370975e-06, | |
| "loss": 0.1551, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 2.327272727272727, | |
| "grad_norm": 0.1307362325637928, | |
| "learning_rate": 6.310138017840917e-06, | |
| "loss": 0.1486, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 2.3324675324675326, | |
| "grad_norm": 0.13133190490766602, | |
| "learning_rate": 6.217301546640022e-06, | |
| "loss": 0.1518, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 2.3376623376623376, | |
| "grad_norm": 0.13025882458056662, | |
| "learning_rate": 6.125055991028583e-06, | |
| "loss": 0.1494, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.342857142857143, | |
| "grad_norm": 0.1254143759117903, | |
| "learning_rate": 6.0334042531102005e-06, | |
| "loss": 0.1458, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 2.348051948051948, | |
| "grad_norm": 0.12809326669267687, | |
| "learning_rate": 5.942349216306614e-06, | |
| "loss": 0.1471, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 2.3532467532467534, | |
| "grad_norm": 0.12509372527529825, | |
| "learning_rate": 5.851893745266945e-06, | |
| "loss": 0.1478, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 2.3584415584415583, | |
| "grad_norm": 0.13151188136576936, | |
| "learning_rate": 5.7620406857776e-06, | |
| "loss": 0.1495, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 2.3636363636363638, | |
| "grad_norm": 0.1347993314789362, | |
| "learning_rate": 5.67279286467274e-06, | |
| "loss": 0.1504, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 2.3688311688311687, | |
| "grad_norm": 0.13205701694453234, | |
| "learning_rate": 5.584153089745345e-06, | |
| "loss": 0.1492, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 2.374025974025974, | |
| "grad_norm": 0.130155592941109, | |
| "learning_rate": 5.4961241496588655e-06, | |
| "loss": 0.1522, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 2.379220779220779, | |
| "grad_norm": 0.13048892356720782, | |
| "learning_rate": 5.408708813859531e-06, | |
| "loss": 0.1464, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 2.3844155844155845, | |
| "grad_norm": 0.12447265332394425, | |
| "learning_rate": 5.3219098324891496e-06, | |
| "loss": 0.1487, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 2.3896103896103895, | |
| "grad_norm": 0.12292541161875337, | |
| "learning_rate": 5.235729936298661e-06, | |
| "loss": 0.149, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 2.394805194805195, | |
| "grad_norm": 0.12776875389985792, | |
| "learning_rate": 5.15017183656217e-06, | |
| "loss": 0.151, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 0.12547722558900828, | |
| "learning_rate": 5.065238224991698e-06, | |
| "loss": 0.1484, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 2.4051948051948053, | |
| "grad_norm": 0.12365440404472366, | |
| "learning_rate": 4.980931773652453e-06, | |
| "loss": 0.1465, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 2.4103896103896103, | |
| "grad_norm": 0.12432434525146745, | |
| "learning_rate": 4.897255134878786e-06, | |
| "loss": 0.1483, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 2.4155844155844157, | |
| "grad_norm": 0.139186530570493, | |
| "learning_rate": 4.814210941190755e-06, | |
| "loss": 0.1518, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 2.4207792207792207, | |
| "grad_norm": 0.12528679762457748, | |
| "learning_rate": 4.731801805211286e-06, | |
| "loss": 0.1489, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 2.425974025974026, | |
| "grad_norm": 0.12450530026135737, | |
| "learning_rate": 4.650030319583987e-06, | |
| "loss": 0.1505, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 2.431168831168831, | |
| "grad_norm": 0.13039926074492833, | |
| "learning_rate": 4.568899056891604e-06, | |
| "loss": 0.1485, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 2.4363636363636365, | |
| "grad_norm": 0.12466326477523655, | |
| "learning_rate": 4.488410569575028e-06, | |
| "loss": 0.1475, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 2.4415584415584415, | |
| "grad_norm": 0.12528932051361105, | |
| "learning_rate": 4.408567389853055e-06, | |
| "loss": 0.1516, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 2.446753246753247, | |
| "grad_norm": 0.1259262307284031, | |
| "learning_rate": 4.329372029642678e-06, | |
| "loss": 0.1511, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 2.451948051948052, | |
| "grad_norm": 0.12907058499763246, | |
| "learning_rate": 4.250826980480105e-06, | |
| "loss": 0.1458, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 2.4571428571428573, | |
| "grad_norm": 0.13360791308885997, | |
| "learning_rate": 4.172934713442328e-06, | |
| "loss": 0.1506, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 2.4623376623376623, | |
| "grad_norm": 0.1246999497148764, | |
| "learning_rate": 4.095697679069382e-06, | |
| "loss": 0.1474, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 2.4675324675324677, | |
| "grad_norm": 0.12422306029077695, | |
| "learning_rate": 4.019118307287307e-06, | |
| "loss": 0.1465, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 2.4727272727272727, | |
| "grad_norm": 0.12816176999453854, | |
| "learning_rate": 3.943199007331633e-06, | |
| "loss": 0.1476, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 2.477922077922078, | |
| "grad_norm": 0.1283395199969505, | |
| "learning_rate": 3.8679421676716235e-06, | |
| "loss": 0.1505, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 2.483116883116883, | |
| "grad_norm": 0.12466666439258484, | |
| "learning_rate": 3.793350155935113e-06, | |
| "loss": 0.1504, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 2.4883116883116885, | |
| "grad_norm": 0.12152148883126895, | |
| "learning_rate": 3.7194253188340412e-06, | |
| "loss": 0.1471, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 2.4935064935064934, | |
| "grad_norm": 0.12943483158505822, | |
| "learning_rate": 3.6461699820905915e-06, | |
| "loss": 0.1488, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 2.498701298701299, | |
| "grad_norm": 0.1237730899808012, | |
| "learning_rate": 3.5735864503640693e-06, | |
| "loss": 0.1477, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 2.503896103896104, | |
| "grad_norm": 0.12374067827054203, | |
| "learning_rate": 3.50167700717835e-06, | |
| "loss": 0.1502, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 2.509090909090909, | |
| "grad_norm": 0.1271358088834976, | |
| "learning_rate": 3.4304439148500624e-06, | |
| "loss": 0.1446, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 2.5142857142857142, | |
| "grad_norm": 0.1272280855510312, | |
| "learning_rate": 3.3598894144173913e-06, | |
| "loss": 0.1496, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 2.5194805194805197, | |
| "grad_norm": 0.1258197921752763, | |
| "learning_rate": 3.290015725569626e-06, | |
| "loss": 0.15, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 2.5246753246753246, | |
| "grad_norm": 0.12473206628431092, | |
| "learning_rate": 3.220825046577261e-06, | |
| "loss": 0.1462, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 2.5298701298701296, | |
| "grad_norm": 0.11843223945225219, | |
| "learning_rate": 3.152319554222885e-06, | |
| "loss": 0.1477, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 2.535064935064935, | |
| "grad_norm": 0.1259012256625921, | |
| "learning_rate": 3.084501403732673e-06, | |
| "loss": 0.1516, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 2.5402597402597404, | |
| "grad_norm": 0.12425830875342217, | |
| "learning_rate": 3.017372728708595e-06, | |
| "loss": 0.1508, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 2.5454545454545454, | |
| "grad_norm": 0.12294050842755987, | |
| "learning_rate": 2.950935641061275e-06, | |
| "loss": 0.1468, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 2.5506493506493504, | |
| "grad_norm": 0.12210571551971311, | |
| "learning_rate": 2.8851922309435815e-06, | |
| "loss": 0.1494, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 2.555844155844156, | |
| "grad_norm": 0.12349634158702745, | |
| "learning_rate": 2.8201445666848346e-06, | |
| "loss": 0.1441, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 2.5610389610389612, | |
| "grad_norm": 0.12742872858385557, | |
| "learning_rate": 2.755794694725741e-06, | |
| "loss": 0.1457, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 2.566233766233766, | |
| "grad_norm": 0.1242587205799011, | |
| "learning_rate": 2.6921446395540284e-06, | |
| "loss": 0.1474, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 2.571428571428571, | |
| "grad_norm": 0.1266773031643187, | |
| "learning_rate": 2.6291964036407545e-06, | |
| "loss": 0.1472, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 2.5766233766233766, | |
| "grad_norm": 0.1253376470917521, | |
| "learning_rate": 2.5669519673772847e-06, | |
| "loss": 0.1471, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 2.581818181818182, | |
| "grad_norm": 0.12644089026425634, | |
| "learning_rate": 2.5054132890130087e-06, | |
| "loss": 0.1521, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 2.587012987012987, | |
| "grad_norm": 0.1197473844911881, | |
| "learning_rate": 2.444582304593723e-06, | |
| "loss": 0.1478, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 2.592207792207792, | |
| "grad_norm": 0.11990806649152073, | |
| "learning_rate": 2.3844609279007226e-06, | |
| "loss": 0.1499, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 2.5974025974025974, | |
| "grad_norm": 0.1202956364762698, | |
| "learning_rate": 2.325051050390595e-06, | |
| "loss": 0.1478, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.602597402597403, | |
| "grad_norm": 0.12255235260438935, | |
| "learning_rate": 2.266354541135726e-06, | |
| "loss": 0.1489, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 2.6077922077922078, | |
| "grad_norm": 0.12671799892499586, | |
| "learning_rate": 2.2083732467654603e-06, | |
| "loss": 0.1462, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 2.6129870129870127, | |
| "grad_norm": 0.12223780402274406, | |
| "learning_rate": 2.1511089914080464e-06, | |
| "loss": 0.1494, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 2.618181818181818, | |
| "grad_norm": 0.12145201814175376, | |
| "learning_rate": 2.094563576633221e-06, | |
| "loss": 0.149, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 2.6233766233766236, | |
| "grad_norm": 0.12317156500158179, | |
| "learning_rate": 2.038738781395552e-06, | |
| "loss": 0.1525, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 2.6285714285714286, | |
| "grad_norm": 0.12189047171784712, | |
| "learning_rate": 1.9836363619784552e-06, | |
| "loss": 0.1463, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 2.6337662337662335, | |
| "grad_norm": 0.12030818805338123, | |
| "learning_rate": 1.929258051938945e-06, | |
| "loss": 0.1463, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 2.638961038961039, | |
| "grad_norm": 0.12106285056082301, | |
| "learning_rate": 1.8756055620530898e-06, | |
| "loss": 0.1471, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 2.6441558441558444, | |
| "grad_norm": 0.12038600565908254, | |
| "learning_rate": 1.8226805802622094e-06, | |
| "loss": 0.1494, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 2.6493506493506493, | |
| "grad_norm": 0.12263633853976644, | |
| "learning_rate": 1.770484771619743e-06, | |
| "loss": 0.1499, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 2.6545454545454543, | |
| "grad_norm": 0.12334958408238764, | |
| "learning_rate": 1.7190197782389035e-06, | |
| "loss": 0.1484, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 2.6597402597402597, | |
| "grad_norm": 0.12029874859542598, | |
| "learning_rate": 1.6682872192409632e-06, | |
| "loss": 0.148, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 2.664935064935065, | |
| "grad_norm": 0.11743108789978497, | |
| "learning_rate": 1.618288690704367e-06, | |
| "loss": 0.1464, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 2.67012987012987, | |
| "grad_norm": 0.12483269486534702, | |
| "learning_rate": 1.5690257656144846e-06, | |
| "loss": 0.1456, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 2.675324675324675, | |
| "grad_norm": 0.1207903651113982, | |
| "learning_rate": 1.520499993814148e-06, | |
| "loss": 0.1444, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 2.6805194805194805, | |
| "grad_norm": 0.12216310435997871, | |
| "learning_rate": 1.472712901954873e-06, | |
| "loss": 0.1487, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 2.685714285714286, | |
| "grad_norm": 0.12421180243276636, | |
| "learning_rate": 1.4256659934488215e-06, | |
| "loss": 0.1468, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 2.690909090909091, | |
| "grad_norm": 0.12291390409864483, | |
| "learning_rate": 1.3793607484215458e-06, | |
| "loss": 0.1492, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 2.696103896103896, | |
| "grad_norm": 0.12424898483815008, | |
| "learning_rate": 1.3337986236653777e-06, | |
| "loss": 0.1498, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 2.7012987012987013, | |
| "grad_norm": 0.12220447786976209, | |
| "learning_rate": 1.2889810525936214e-06, | |
| "loss": 0.1464, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 2.7064935064935067, | |
| "grad_norm": 0.1170794743951447, | |
| "learning_rate": 1.244909445195444e-06, | |
| "loss": 0.1479, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 2.7116883116883117, | |
| "grad_norm": 0.11982789622673459, | |
| "learning_rate": 1.2015851879915302e-06, | |
| "loss": 0.1456, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 2.7168831168831167, | |
| "grad_norm": 0.11731066611016075, | |
| "learning_rate": 1.1590096439904496e-06, | |
| "loss": 0.1441, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 2.722077922077922, | |
| "grad_norm": 0.12315215332934029, | |
| "learning_rate": 1.1171841526457838e-06, | |
| "loss": 0.1491, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 2.7272727272727275, | |
| "grad_norm": 0.11853604447437521, | |
| "learning_rate": 1.0761100298139788e-06, | |
| "loss": 0.1471, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 2.7324675324675325, | |
| "grad_norm": 0.11999351610266512, | |
| "learning_rate": 1.0357885677129614e-06, | |
| "loss": 0.1505, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 2.7376623376623375, | |
| "grad_norm": 0.12232868615312523, | |
| "learning_rate": 9.962210348814504e-07, | |
| "loss": 0.1446, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 2.742857142857143, | |
| "grad_norm": 0.12255388742745053, | |
| "learning_rate": 9.574086761391043e-07, | |
| "loss": 0.1488, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 2.7480519480519483, | |
| "grad_norm": 0.11949069963969916, | |
| "learning_rate": 9.193527125473056e-07, | |
| "loss": 0.149, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 2.7532467532467533, | |
| "grad_norm": 0.11543627566803302, | |
| "learning_rate": 8.820543413707694e-07, | |
| "loss": 0.1498, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 2.7584415584415583, | |
| "grad_norm": 0.12420569841945647, | |
| "learning_rate": 8.455147360398819e-07, | |
| "loss": 0.149, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 2.7636363636363637, | |
| "grad_norm": 0.11841650740456904, | |
| "learning_rate": 8.097350461137631e-07, | |
| "loss": 0.1458, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 2.768831168831169, | |
| "grad_norm": 0.12370250243324289, | |
| "learning_rate": 7.747163972441213e-07, | |
| "loss": 0.1474, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 2.774025974025974, | |
| "grad_norm": 0.1214996353399532, | |
| "learning_rate": 7.404598911398331e-07, | |
| "loss": 0.1493, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 2.779220779220779, | |
| "grad_norm": 0.1195478340774238, | |
| "learning_rate": 7.069666055322777e-07, | |
| "loss": 0.1482, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 2.7844155844155845, | |
| "grad_norm": 0.12483131247062451, | |
| "learning_rate": 6.742375941414247e-07, | |
| "loss": 0.1472, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 2.78961038961039, | |
| "grad_norm": 0.11952354364943257, | |
| "learning_rate": 6.422738866427053e-07, | |
| "loss": 0.1495, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 2.794805194805195, | |
| "grad_norm": 0.11960938019103264, | |
| "learning_rate": 6.110764886346043e-07, | |
| "loss": 0.1478, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 0.11946979649886329, | |
| "learning_rate": 5.806463816070251e-07, | |
| "loss": 0.1435, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 2.8051948051948052, | |
| "grad_norm": 0.12094569119420252, | |
| "learning_rate": 5.509845229103999e-07, | |
| "loss": 0.1477, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 2.8103896103896107, | |
| "grad_norm": 0.12040943576693716, | |
| "learning_rate": 5.220918457255947e-07, | |
| "loss": 0.147, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 2.8155844155844156, | |
| "grad_norm": 0.12306327075134675, | |
| "learning_rate": 4.939692590345324e-07, | |
| "loss": 0.1459, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 2.8207792207792206, | |
| "grad_norm": 0.11635380002309499, | |
| "learning_rate": 4.6661764759159954e-07, | |
| "loss": 0.146, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 2.825974025974026, | |
| "grad_norm": 0.12155462126032764, | |
| "learning_rate": 4.400378718958209e-07, | |
| "loss": 0.1477, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 2.8311688311688314, | |
| "grad_norm": 0.13019986073768214, | |
| "learning_rate": 4.1423076816376747e-07, | |
| "loss": 0.15, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 2.8363636363636364, | |
| "grad_norm": 0.11990566287768059, | |
| "learning_rate": 3.8919714830327194e-07, | |
| "loss": 0.1463, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 2.8415584415584414, | |
| "grad_norm": 0.123316194099736, | |
| "learning_rate": 3.6493779988786835e-07, | |
| "loss": 0.1493, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 2.846753246753247, | |
| "grad_norm": 0.11382332839422425, | |
| "learning_rate": 3.414534861320262e-07, | |
| "loss": 0.1459, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 2.851948051948052, | |
| "grad_norm": 0.11644720907565642, | |
| "learning_rate": 3.187449458671249e-07, | |
| "loss": 0.1523, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "grad_norm": 0.11676245617873343, | |
| "learning_rate": 2.968128935182279e-07, | |
| "loss": 0.1477, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.862337662337662, | |
| "grad_norm": 0.12250814554782062, | |
| "learning_rate": 2.756580190815927e-07, | |
| "loss": 0.1473, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 2.8675324675324676, | |
| "grad_norm": 0.11988558217156425, | |
| "learning_rate": 2.5528098810296276e-07, | |
| "loss": 0.148, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 2.8727272727272726, | |
| "grad_norm": 0.11907029230620142, | |
| "learning_rate": 2.3568244165664555e-07, | |
| "loss": 0.1479, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 2.877922077922078, | |
| "grad_norm": 0.1175605990368118, | |
| "learning_rate": 2.1686299632532049e-07, | |
| "loss": 0.1454, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 2.883116883116883, | |
| "grad_norm": 0.1170893554332405, | |
| "learning_rate": 1.9882324418065978e-07, | |
| "loss": 0.1463, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 2.8883116883116884, | |
| "grad_norm": 0.1217388184223315, | |
| "learning_rate": 1.8156375276468795e-07, | |
| "loss": 0.1469, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 2.8935064935064934, | |
| "grad_norm": 0.12104004201060858, | |
| "learning_rate": 1.6508506507193766e-07, | |
| "loss": 0.1482, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 2.898701298701299, | |
| "grad_norm": 0.12023073487651949, | |
| "learning_rate": 1.4938769953236064e-07, | |
| "loss": 0.1482, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 2.9038961038961038, | |
| "grad_norm": 0.11829219458577808, | |
| "learning_rate": 1.344721499950241e-07, | |
| "loss": 0.1466, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 2.909090909090909, | |
| "grad_norm": 0.1202978727970241, | |
| "learning_rate": 1.203388857125537e-07, | |
| "loss": 0.1446, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 2.914285714285714, | |
| "grad_norm": 0.12003327209462132, | |
| "learning_rate": 1.0698835132640361e-07, | |
| "loss": 0.1487, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 2.9194805194805196, | |
| "grad_norm": 0.1181424516998344, | |
| "learning_rate": 9.442096685283452e-08, | |
| "loss": 0.1465, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 2.9246753246753245, | |
| "grad_norm": 0.1230036027460985, | |
| "learning_rate": 8.263712766972686e-08, | |
| "loss": 0.1471, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 2.92987012987013, | |
| "grad_norm": 0.12320775023601876, | |
| "learning_rate": 7.163720450412415e-08, | |
| "loss": 0.1483, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 2.935064935064935, | |
| "grad_norm": 0.11757950962836114, | |
| "learning_rate": 6.142154342057282e-08, | |
| "loss": 0.1451, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 2.9402597402597404, | |
| "grad_norm": 0.12075101363106212, | |
| "learning_rate": 5.199046581024214e-08, | |
| "loss": 0.1457, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 2.9454545454545453, | |
| "grad_norm": 0.11591455658677356, | |
| "learning_rate": 4.334426838080719e-08, | |
| "loss": 0.1454, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 2.9506493506493507, | |
| "grad_norm": 0.11974339645293, | |
| "learning_rate": 3.5483223147114716e-08, | |
| "loss": 0.1493, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 2.9558441558441557, | |
| "grad_norm": 0.12221319049722304, | |
| "learning_rate": 2.8407577422628895e-08, | |
| "loss": 0.146, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 2.961038961038961, | |
| "grad_norm": 0.11694250162651787, | |
| "learning_rate": 2.2117553811643044e-08, | |
| "loss": 0.1458, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 2.966233766233766, | |
| "grad_norm": 0.11631918845285252, | |
| "learning_rate": 1.6613350202282496e-08, | |
| "loss": 0.1494, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 2.9714285714285715, | |
| "grad_norm": 0.12032018694766353, | |
| "learning_rate": 1.1895139760284557e-08, | |
| "loss": 0.1492, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 2.9766233766233765, | |
| "grad_norm": 0.1160647794022908, | |
| "learning_rate": 7.963070923533433e-09, | |
| "loss": 0.146, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 2.981818181818182, | |
| "grad_norm": 0.12107552388620237, | |
| "learning_rate": 4.817267397405623e-09, | |
| "loss": 0.1482, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 2.987012987012987, | |
| "grad_norm": 0.12616234731784437, | |
| "learning_rate": 2.4578281508702563e-09, | |
| "loss": 0.1466, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 2.9922077922077923, | |
| "grad_norm": 0.11975678944265007, | |
| "learning_rate": 8.848274133860246e-10, | |
| "loss": 0.1484, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 2.9974025974025973, | |
| "grad_norm": 0.12134318440981517, | |
| "learning_rate": 9.831467255028148e-11, | |
| "loss": 0.1452, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 5775, | |
| "total_flos": 1.816985160646656e+16, | |
| "train_loss": 0.20176902750353792, | |
| "train_runtime": 231849.8596, | |
| "train_samples_per_second": 1.195, | |
| "train_steps_per_second": 0.025 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 5775, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.816985160646656e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |