| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.990024577128813, | |
| "eval_steps": 500, | |
| "global_step": 648, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004626283070695388, | |
| "grad_norm": 0.6155030131340027, | |
| "learning_rate": 9.98456790123457e-06, | |
| "loss": 17.5936, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.009252566141390776, | |
| "grad_norm": 0.6313583254814148, | |
| "learning_rate": 9.969135802469136e-06, | |
| "loss": 21.7773, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.013878849212086165, | |
| "grad_norm": 0.4570765793323517, | |
| "learning_rate": 9.953703703703704e-06, | |
| "loss": 17.1664, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.018505132282781552, | |
| "grad_norm": 0.3836732506752014, | |
| "learning_rate": 9.938271604938273e-06, | |
| "loss": 18.3922, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.02313141535347694, | |
| "grad_norm": 0.3599857985973358, | |
| "learning_rate": 9.92283950617284e-06, | |
| "loss": 16.9993, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.02775769842417233, | |
| "grad_norm": 0.4342029094696045, | |
| "learning_rate": 9.907407407407408e-06, | |
| "loss": 17.4883, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.032383981494867715, | |
| "grad_norm": 0.40308186411857605, | |
| "learning_rate": 9.891975308641975e-06, | |
| "loss": 18.2562, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.037010264565563104, | |
| "grad_norm": 0.39312881231307983, | |
| "learning_rate": 9.876543209876543e-06, | |
| "loss": 17.9318, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.04163654763625849, | |
| "grad_norm": 0.6749095916748047, | |
| "learning_rate": 9.861111111111112e-06, | |
| "loss": 18.5244, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.04626283070695388, | |
| "grad_norm": 0.3622874617576599, | |
| "learning_rate": 9.84567901234568e-06, | |
| "loss": 16.4386, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05088911377764927, | |
| "grad_norm": 0.24345389008522034, | |
| "learning_rate": 9.830246913580249e-06, | |
| "loss": 17.5414, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.05551539684834466, | |
| "grad_norm": 0.34016355872154236, | |
| "learning_rate": 9.814814814814815e-06, | |
| "loss": 16.9405, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.06014167991904005, | |
| "grad_norm": 0.25097739696502686, | |
| "learning_rate": 9.799382716049384e-06, | |
| "loss": 14.4692, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.06476796298973543, | |
| "grad_norm": 0.2524467408657074, | |
| "learning_rate": 9.783950617283952e-06, | |
| "loss": 16.2185, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.06939424606043082, | |
| "grad_norm": 0.3705744743347168, | |
| "learning_rate": 9.768518518518519e-06, | |
| "loss": 16.3573, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.07402052913112621, | |
| "grad_norm": 0.2848168909549713, | |
| "learning_rate": 9.753086419753087e-06, | |
| "loss": 16.331, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0786468122018216, | |
| "grad_norm": 0.300182044506073, | |
| "learning_rate": 9.737654320987654e-06, | |
| "loss": 15.6623, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.08327309527251699, | |
| "grad_norm": 0.36958807706832886, | |
| "learning_rate": 9.722222222222223e-06, | |
| "loss": 18.5857, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.08789937834321238, | |
| "grad_norm": 0.3009611666202545, | |
| "learning_rate": 9.706790123456791e-06, | |
| "loss": 17.0655, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.09252566141390776, | |
| "grad_norm": 0.24450023472309113, | |
| "learning_rate": 9.691358024691358e-06, | |
| "loss": 15.3945, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.09715194448460315, | |
| "grad_norm": 0.3174358010292053, | |
| "learning_rate": 9.675925925925926e-06, | |
| "loss": 15.3045, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.10177822755529854, | |
| "grad_norm": 0.34466007351875305, | |
| "learning_rate": 9.660493827160495e-06, | |
| "loss": 16.7186, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.10640451062599393, | |
| "grad_norm": 0.294209361076355, | |
| "learning_rate": 9.645061728395062e-06, | |
| "loss": 16.2893, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.11103079369668932, | |
| "grad_norm": 0.3418446183204651, | |
| "learning_rate": 9.62962962962963e-06, | |
| "loss": 16.1827, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.11565707676738471, | |
| "grad_norm": 0.2584611177444458, | |
| "learning_rate": 9.614197530864198e-06, | |
| "loss": 15.7934, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.1202833598380801, | |
| "grad_norm": 0.3344869911670685, | |
| "learning_rate": 9.598765432098767e-06, | |
| "loss": 14.2338, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.12490964290877549, | |
| "grad_norm": 0.28123536705970764, | |
| "learning_rate": 9.583333333333335e-06, | |
| "loss": 16.2229, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.12953592597947086, | |
| "grad_norm": 0.22181656956672668, | |
| "learning_rate": 9.567901234567902e-06, | |
| "loss": 13.7744, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.13416220905016626, | |
| "grad_norm": 0.45139142870903015, | |
| "learning_rate": 9.55246913580247e-06, | |
| "loss": 15.4782, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.13878849212086164, | |
| "grad_norm": 0.21577508747577667, | |
| "learning_rate": 9.537037037037037e-06, | |
| "loss": 15.6752, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.14341477519155704, | |
| "grad_norm": 0.29512590169906616, | |
| "learning_rate": 9.521604938271606e-06, | |
| "loss": 14.7965, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.14804105826225242, | |
| "grad_norm": 0.28356751799583435, | |
| "learning_rate": 9.506172839506174e-06, | |
| "loss": 14.9097, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.15266734133294782, | |
| "grad_norm": 0.25781649351119995, | |
| "learning_rate": 9.490740740740741e-06, | |
| "loss": 14.494, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.1572936244036432, | |
| "grad_norm": 0.1967274248600006, | |
| "learning_rate": 9.47530864197531e-06, | |
| "loss": 15.0075, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.1619199074743386, | |
| "grad_norm": 0.42894116044044495, | |
| "learning_rate": 9.459876543209878e-06, | |
| "loss": 14.8055, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.16654619054503397, | |
| "grad_norm": 0.2417590320110321, | |
| "learning_rate": 9.444444444444445e-06, | |
| "loss": 14.7145, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.17117247361572938, | |
| "grad_norm": 0.15463685989379883, | |
| "learning_rate": 9.429012345679013e-06, | |
| "loss": 14.4273, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.17579875668642475, | |
| "grad_norm": 0.21183425188064575, | |
| "learning_rate": 9.413580246913581e-06, | |
| "loss": 16.6545, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.18042503975712013, | |
| "grad_norm": 0.41926464438438416, | |
| "learning_rate": 9.398148148148148e-06, | |
| "loss": 13.4609, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.18505132282781553, | |
| "grad_norm": 0.21585555374622345, | |
| "learning_rate": 9.382716049382717e-06, | |
| "loss": 14.1515, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1896776058985109, | |
| "grad_norm": 0.28051456809043884, | |
| "learning_rate": 9.367283950617285e-06, | |
| "loss": 14.0035, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.1943038889692063, | |
| "grad_norm": 0.18281345069408417, | |
| "learning_rate": 9.351851851851854e-06, | |
| "loss": 14.6411, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.19893017203990168, | |
| "grad_norm": 0.1747712790966034, | |
| "learning_rate": 9.33641975308642e-06, | |
| "loss": 14.4986, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.20355645511059708, | |
| "grad_norm": 0.2613980174064636, | |
| "learning_rate": 9.320987654320989e-06, | |
| "loss": 14.2572, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.20818273818129246, | |
| "grad_norm": 0.19127604365348816, | |
| "learning_rate": 9.305555555555557e-06, | |
| "loss": 14.3084, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.21280902125198786, | |
| "grad_norm": 0.24545663595199585, | |
| "learning_rate": 9.290123456790124e-06, | |
| "loss": 13.3381, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.21743530432268324, | |
| "grad_norm": 0.25891631841659546, | |
| "learning_rate": 9.274691358024692e-06, | |
| "loss": 13.1076, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.22206158739337864, | |
| "grad_norm": 0.23256707191467285, | |
| "learning_rate": 9.25925925925926e-06, | |
| "loss": 13.3061, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.22668787046407401, | |
| "grad_norm": 0.2427562177181244, | |
| "learning_rate": 9.243827160493828e-06, | |
| "loss": 16.0646, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.23131415353476942, | |
| "grad_norm": 0.22760462760925293, | |
| "learning_rate": 9.228395061728396e-06, | |
| "loss": 12.937, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.2359404366054648, | |
| "grad_norm": 0.27145224809646606, | |
| "learning_rate": 9.212962962962963e-06, | |
| "loss": 13.8299, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.2405667196761602, | |
| "grad_norm": 0.3120661973953247, | |
| "learning_rate": 9.197530864197531e-06, | |
| "loss": 14.074, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.24519300274685557, | |
| "grad_norm": 0.21604514122009277, | |
| "learning_rate": 9.1820987654321e-06, | |
| "loss": 13.6731, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.24981928581755097, | |
| "grad_norm": 0.22970032691955566, | |
| "learning_rate": 9.166666666666666e-06, | |
| "loss": 14.0123, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.2544455688882464, | |
| "grad_norm": 0.24079963564872742, | |
| "learning_rate": 9.151234567901235e-06, | |
| "loss": 13.5458, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.2590718519589417, | |
| "grad_norm": 0.21451319754123688, | |
| "learning_rate": 9.135802469135803e-06, | |
| "loss": 12.5504, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.2636981350296371, | |
| "grad_norm": 0.2508305609226227, | |
| "learning_rate": 9.120370370370372e-06, | |
| "loss": 14.8642, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.26832441810033253, | |
| "grad_norm": 0.24022799730300903, | |
| "learning_rate": 9.10493827160494e-06, | |
| "loss": 15.1011, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.2729507011710279, | |
| "grad_norm": 0.3794403076171875, | |
| "learning_rate": 9.089506172839507e-06, | |
| "loss": 14.3993, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.2775769842417233, | |
| "grad_norm": 0.28563258051872253, | |
| "learning_rate": 9.074074074074075e-06, | |
| "loss": 14.294, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2822032673124187, | |
| "grad_norm": 0.28061386942863464, | |
| "learning_rate": 9.058641975308642e-06, | |
| "loss": 13.033, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.2868295503831141, | |
| "grad_norm": 0.3351198136806488, | |
| "learning_rate": 9.04320987654321e-06, | |
| "loss": 14.5075, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.29145583345380943, | |
| "grad_norm": 0.23749324679374695, | |
| "learning_rate": 9.027777777777779e-06, | |
| "loss": 13.7952, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.29608211652450483, | |
| "grad_norm": 0.16919538378715515, | |
| "learning_rate": 9.012345679012346e-06, | |
| "loss": 14.7146, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.30070839959520024, | |
| "grad_norm": 0.18566973507404327, | |
| "learning_rate": 8.996913580246914e-06, | |
| "loss": 13.9967, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.30533468266589564, | |
| "grad_norm": 0.25144919753074646, | |
| "learning_rate": 8.981481481481483e-06, | |
| "loss": 13.0544, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.309960965736591, | |
| "grad_norm": 0.1971070021390915, | |
| "learning_rate": 8.96604938271605e-06, | |
| "loss": 12.7932, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.3145872488072864, | |
| "grad_norm": 0.2236046940088272, | |
| "learning_rate": 8.950617283950618e-06, | |
| "loss": 14.4338, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.3192135318779818, | |
| "grad_norm": 0.20062977075576782, | |
| "learning_rate": 8.935185185185186e-06, | |
| "loss": 13.6965, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.3238398149486772, | |
| "grad_norm": 0.3102545738220215, | |
| "learning_rate": 8.919753086419753e-06, | |
| "loss": 14.174, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.32846609801937254, | |
| "grad_norm": 0.282172828912735, | |
| "learning_rate": 8.904320987654322e-06, | |
| "loss": 14.012, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.33309238109006795, | |
| "grad_norm": 0.31374603509902954, | |
| "learning_rate": 8.888888888888888e-06, | |
| "loss": 12.9434, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.33771866416076335, | |
| "grad_norm": 0.24876756966114044, | |
| "learning_rate": 8.873456790123458e-06, | |
| "loss": 13.0801, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.34234494723145875, | |
| "grad_norm": 0.2828700840473175, | |
| "learning_rate": 8.858024691358025e-06, | |
| "loss": 12.3676, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.3469712303021541, | |
| "grad_norm": 0.23395580053329468, | |
| "learning_rate": 8.842592592592594e-06, | |
| "loss": 12.8463, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.3515975133728495, | |
| "grad_norm": 0.26694121956825256, | |
| "learning_rate": 8.827160493827162e-06, | |
| "loss": 11.5822, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.3562237964435449, | |
| "grad_norm": 0.2129782736301422, | |
| "learning_rate": 8.811728395061729e-06, | |
| "loss": 12.6834, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.36085007951424025, | |
| "grad_norm": 0.20400603115558624, | |
| "learning_rate": 8.796296296296297e-06, | |
| "loss": 14.2065, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.36547636258493565, | |
| "grad_norm": 0.45560839772224426, | |
| "learning_rate": 8.780864197530866e-06, | |
| "loss": 13.1033, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.37010264565563106, | |
| "grad_norm": 0.26329922676086426, | |
| "learning_rate": 8.765432098765432e-06, | |
| "loss": 11.9119, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.37472892872632646, | |
| "grad_norm": 0.21817731857299805, | |
| "learning_rate": 8.750000000000001e-06, | |
| "loss": 12.815, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.3793552117970218, | |
| "grad_norm": 0.2743465304374695, | |
| "learning_rate": 8.73456790123457e-06, | |
| "loss": 12.2468, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.3839814948677172, | |
| "grad_norm": 0.2570422291755676, | |
| "learning_rate": 8.719135802469136e-06, | |
| "loss": 11.9801, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.3886077779384126, | |
| "grad_norm": 0.21237581968307495, | |
| "learning_rate": 8.703703703703705e-06, | |
| "loss": 11.7092, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.393234061009108, | |
| "grad_norm": 0.27444881200790405, | |
| "learning_rate": 8.688271604938271e-06, | |
| "loss": 13.3074, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.39786034407980336, | |
| "grad_norm": 0.22574838995933533, | |
| "learning_rate": 8.67283950617284e-06, | |
| "loss": 13.1573, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.40248662715049877, | |
| "grad_norm": 0.252755731344223, | |
| "learning_rate": 8.657407407407408e-06, | |
| "loss": 12.3472, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.40711291022119417, | |
| "grad_norm": 0.2729659378528595, | |
| "learning_rate": 8.641975308641975e-06, | |
| "loss": 12.8413, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.41173919329188957, | |
| "grad_norm": 0.21728309988975525, | |
| "learning_rate": 8.626543209876543e-06, | |
| "loss": 11.3523, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.4163654763625849, | |
| "grad_norm": 0.264457106590271, | |
| "learning_rate": 8.611111111111112e-06, | |
| "loss": 13.1175, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.4209917594332803, | |
| "grad_norm": 0.20264536142349243, | |
| "learning_rate": 8.59567901234568e-06, | |
| "loss": 12.8442, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.4256180425039757, | |
| "grad_norm": 0.3003804385662079, | |
| "learning_rate": 8.580246913580249e-06, | |
| "loss": 14.5755, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.4302443255746711, | |
| "grad_norm": 0.294029176235199, | |
| "learning_rate": 8.564814814814816e-06, | |
| "loss": 13.5777, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.4348706086453665, | |
| "grad_norm": 0.21502816677093506, | |
| "learning_rate": 8.549382716049384e-06, | |
| "loss": 11.9804, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.4394968917160619, | |
| "grad_norm": 0.21898359060287476, | |
| "learning_rate": 8.53395061728395e-06, | |
| "loss": 13.6382, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.4441231747867573, | |
| "grad_norm": 0.20010338723659515, | |
| "learning_rate": 8.518518518518519e-06, | |
| "loss": 12.4733, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.4487494578574526, | |
| "grad_norm": 0.2247803956270218, | |
| "learning_rate": 8.503086419753088e-06, | |
| "loss": 12.1589, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.45337574092814803, | |
| "grad_norm": 0.29326918721199036, | |
| "learning_rate": 8.487654320987654e-06, | |
| "loss": 12.8177, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.45800202399884343, | |
| "grad_norm": 0.18403670191764832, | |
| "learning_rate": 8.472222222222223e-06, | |
| "loss": 13.8527, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.46262830706953884, | |
| "grad_norm": 0.24531710147857666, | |
| "learning_rate": 8.456790123456791e-06, | |
| "loss": 11.9126, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4672545901402342, | |
| "grad_norm": 0.24183672666549683, | |
| "learning_rate": 8.441358024691358e-06, | |
| "loss": 12.952, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.4718808732109296, | |
| "grad_norm": 0.29563647508621216, | |
| "learning_rate": 8.425925925925926e-06, | |
| "loss": 14.462, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.476507156281625, | |
| "grad_norm": 0.21156498789787292, | |
| "learning_rate": 8.410493827160495e-06, | |
| "loss": 13.4846, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.4811334393523204, | |
| "grad_norm": 0.24183927476406097, | |
| "learning_rate": 8.395061728395062e-06, | |
| "loss": 11.428, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.48575972242301574, | |
| "grad_norm": 0.21241174638271332, | |
| "learning_rate": 8.37962962962963e-06, | |
| "loss": 13.8068, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.49038600549371114, | |
| "grad_norm": 0.2149106115102768, | |
| "learning_rate": 8.364197530864199e-06, | |
| "loss": 12.7659, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.49501228856440654, | |
| "grad_norm": 0.21057994663715363, | |
| "learning_rate": 8.348765432098767e-06, | |
| "loss": 12.4651, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.49963857163510195, | |
| "grad_norm": 0.18793097138404846, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 11.7854, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.5042648547057973, | |
| "grad_norm": 0.1801634281873703, | |
| "learning_rate": 8.317901234567902e-06, | |
| "loss": 12.9306, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.5088911377764928, | |
| "grad_norm": 0.21523930132389069, | |
| "learning_rate": 8.30246913580247e-06, | |
| "loss": 11.6702, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.5135174208471881, | |
| "grad_norm": 0.22931678593158722, | |
| "learning_rate": 8.287037037037037e-06, | |
| "loss": 11.5856, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.5181437039178834, | |
| "grad_norm": 0.23802338540554047, | |
| "learning_rate": 8.271604938271606e-06, | |
| "loss": 12.7508, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.5227699869885789, | |
| "grad_norm": 0.23633763194084167, | |
| "learning_rate": 8.256172839506174e-06, | |
| "loss": 12.0906, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.5273962700592743, | |
| "grad_norm": 0.22553692758083344, | |
| "learning_rate": 8.240740740740741e-06, | |
| "loss": 12.4982, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.5320225531299696, | |
| "grad_norm": 0.2265913337469101, | |
| "learning_rate": 8.22530864197531e-06, | |
| "loss": 11.4945, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.5366488362006651, | |
| "grad_norm": 0.1848449558019638, | |
| "learning_rate": 8.209876543209876e-06, | |
| "loss": 11.866, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.5412751192713604, | |
| "grad_norm": 0.16533511877059937, | |
| "learning_rate": 8.194444444444445e-06, | |
| "loss": 12.3876, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.5459014023420558, | |
| "grad_norm": 0.19875134527683258, | |
| "learning_rate": 8.179012345679013e-06, | |
| "loss": 12.0186, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.5505276854127512, | |
| "grad_norm": 0.20982632040977478, | |
| "learning_rate": 8.16358024691358e-06, | |
| "loss": 13.1202, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.5551539684834466, | |
| "grad_norm": 0.2689734101295471, | |
| "learning_rate": 8.148148148148148e-06, | |
| "loss": 13.9764, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.559780251554142, | |
| "grad_norm": 0.14883318543434143, | |
| "learning_rate": 8.132716049382717e-06, | |
| "loss": 11.7656, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.5644065346248374, | |
| "grad_norm": 0.18115845322608948, | |
| "learning_rate": 8.117283950617285e-06, | |
| "loss": 11.8902, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.5690328176955327, | |
| "grad_norm": 0.23553551733493805, | |
| "learning_rate": 8.101851851851854e-06, | |
| "loss": 13.0425, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.5736591007662282, | |
| "grad_norm": 0.2110109180212021, | |
| "learning_rate": 8.08641975308642e-06, | |
| "loss": 13.6931, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.5782853838369235, | |
| "grad_norm": 0.2748431861400604, | |
| "learning_rate": 8.070987654320989e-06, | |
| "loss": 12.7852, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.5829116669076189, | |
| "grad_norm": 0.1847638040781021, | |
| "learning_rate": 8.055555555555557e-06, | |
| "loss": 12.9106, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.5875379499783143, | |
| "grad_norm": 0.48550117015838623, | |
| "learning_rate": 8.040123456790124e-06, | |
| "loss": 11.5484, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.5921642330490097, | |
| "grad_norm": 0.2579441964626312, | |
| "learning_rate": 8.024691358024692e-06, | |
| "loss": 11.9785, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.5967905161197051, | |
| "grad_norm": 0.19037479162216187, | |
| "learning_rate": 8.00925925925926e-06, | |
| "loss": 11.0466, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.6014167991904005, | |
| "grad_norm": 0.22901126742362976, | |
| "learning_rate": 7.993827160493828e-06, | |
| "loss": 11.1139, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.6060430822610958, | |
| "grad_norm": 0.16406911611557007, | |
| "learning_rate": 7.978395061728396e-06, | |
| "loss": 10.6613, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.6106693653317913, | |
| "grad_norm": 0.23492039740085602, | |
| "learning_rate": 7.962962962962963e-06, | |
| "loss": 11.3782, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.6152956484024866, | |
| "grad_norm": 0.21692270040512085, | |
| "learning_rate": 7.947530864197531e-06, | |
| "loss": 11.2788, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.619921931473182, | |
| "grad_norm": 0.17832407355308533, | |
| "learning_rate": 7.9320987654321e-06, | |
| "loss": 11.7617, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.6245482145438774, | |
| "grad_norm": 0.20935167372226715, | |
| "learning_rate": 7.916666666666667e-06, | |
| "loss": 10.9103, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.6291744976145728, | |
| "grad_norm": 0.199855774641037, | |
| "learning_rate": 7.901234567901235e-06, | |
| "loss": 11.5339, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.6338007806852681, | |
| "grad_norm": 0.1627349704504013, | |
| "learning_rate": 7.885802469135803e-06, | |
| "loss": 10.3593, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.6384270637559636, | |
| "grad_norm": 0.18615403771400452, | |
| "learning_rate": 7.870370370370372e-06, | |
| "loss": 12.3204, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.6430533468266589, | |
| "grad_norm": 0.17866994440555573, | |
| "learning_rate": 7.854938271604939e-06, | |
| "loss": 11.548, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.6476796298973544, | |
| "grad_norm": 0.22644223272800446, | |
| "learning_rate": 7.839506172839507e-06, | |
| "loss": 12.0289, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.6523059129680497, | |
| "grad_norm": 0.18510523438453674, | |
| "learning_rate": 7.824074074074076e-06, | |
| "loss": 12.1312, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.6569321960387451, | |
| "grad_norm": 0.1948799043893814, | |
| "learning_rate": 7.808641975308642e-06, | |
| "loss": 13.4831, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.6615584791094405, | |
| "grad_norm": 0.2084604948759079, | |
| "learning_rate": 7.79320987654321e-06, | |
| "loss": 11.1289, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.6661847621801359, | |
| "grad_norm": 0.194437637925148, | |
| "learning_rate": 7.77777777777778e-06, | |
| "loss": 11.5961, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.6708110452508312, | |
| "grad_norm": 0.2627541124820709, | |
| "learning_rate": 7.762345679012346e-06, | |
| "loss": 12.0198, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.6754373283215267, | |
| "grad_norm": 0.21856093406677246, | |
| "learning_rate": 7.746913580246914e-06, | |
| "loss": 12.5285, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.680063611392222, | |
| "grad_norm": 0.15228690207004547, | |
| "learning_rate": 7.731481481481483e-06, | |
| "loss": 10.8653, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.6846898944629175, | |
| "grad_norm": 0.22951403260231018, | |
| "learning_rate": 7.71604938271605e-06, | |
| "loss": 10.8751, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.6893161775336129, | |
| "grad_norm": 0.25641652941703796, | |
| "learning_rate": 7.700617283950618e-06, | |
| "loss": 11.0195, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.6939424606043082, | |
| "grad_norm": 0.17132331430912018, | |
| "learning_rate": 7.685185185185185e-06, | |
| "loss": 13.9163, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6985687436750037, | |
| "grad_norm": 0.24479900300502777, | |
| "learning_rate": 7.669753086419753e-06, | |
| "loss": 11.4487, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.703195026745699, | |
| "grad_norm": 0.1990821659564972, | |
| "learning_rate": 7.654320987654322e-06, | |
| "loss": 10.6267, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.7078213098163944, | |
| "grad_norm": 0.17914509773254395, | |
| "learning_rate": 7.638888888888888e-06, | |
| "loss": 11.5696, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.7124475928870898, | |
| "grad_norm": 0.19086718559265137, | |
| "learning_rate": 7.623456790123458e-06, | |
| "loss": 12.3853, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.7170738759577852, | |
| "grad_norm": 0.1417158991098404, | |
| "learning_rate": 7.608024691358026e-06, | |
| "loss": 10.8805, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.7217001590284805, | |
| "grad_norm": 0.32373055815696716, | |
| "learning_rate": 7.592592592592594e-06, | |
| "loss": 12.586, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.726326442099176, | |
| "grad_norm": 0.1995164453983307, | |
| "learning_rate": 7.577160493827161e-06, | |
| "loss": 11.4977, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.7309527251698713, | |
| "grad_norm": 0.16449247300624847, | |
| "learning_rate": 7.561728395061729e-06, | |
| "loss": 11.8027, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.7355790082405668, | |
| "grad_norm": 0.21888476610183716, | |
| "learning_rate": 7.546296296296297e-06, | |
| "loss": 11.8753, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.7402052913112621, | |
| "grad_norm": 0.19433487951755524, | |
| "learning_rate": 7.530864197530865e-06, | |
| "loss": 11.5252, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.7448315743819575, | |
| "grad_norm": 0.19709180295467377, | |
| "learning_rate": 7.515432098765433e-06, | |
| "loss": 10.7059, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.7494578574526529, | |
| "grad_norm": 0.17522279918193817, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 11.2329, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.7540841405233483, | |
| "grad_norm": 0.2038147747516632, | |
| "learning_rate": 7.484567901234569e-06, | |
| "loss": 11.425, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.7587104235940436, | |
| "grad_norm": 0.29561126232147217, | |
| "learning_rate": 7.469135802469136e-06, | |
| "loss": 13.192, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.7633367066647391, | |
| "grad_norm": 0.2295159548521042, | |
| "learning_rate": 7.453703703703704e-06, | |
| "loss": 10.9714, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.7679629897354344, | |
| "grad_norm": 0.1554422378540039, | |
| "learning_rate": 7.438271604938272e-06, | |
| "loss": 10.5617, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.7725892728061299, | |
| "grad_norm": 0.19565965235233307, | |
| "learning_rate": 7.42283950617284e-06, | |
| "loss": 10.8574, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.7772155558768252, | |
| "grad_norm": 0.2645648419857025, | |
| "learning_rate": 7.4074074074074075e-06, | |
| "loss": 11.4472, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.7818418389475206, | |
| "grad_norm": 0.2832958996295929, | |
| "learning_rate": 7.391975308641975e-06, | |
| "loss": 11.628, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.786468122018216, | |
| "grad_norm": 0.2097349464893341, | |
| "learning_rate": 7.3765432098765435e-06, | |
| "loss": 12.2379, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.7910944050889114, | |
| "grad_norm": 0.19168758392333984, | |
| "learning_rate": 7.361111111111112e-06, | |
| "loss": 10.0489, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.7957206881596067, | |
| "grad_norm": 0.22470812499523163, | |
| "learning_rate": 7.34567901234568e-06, | |
| "loss": 11.8545, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.8003469712303022, | |
| "grad_norm": 0.2722543179988861, | |
| "learning_rate": 7.330246913580248e-06, | |
| "loss": 10.4316, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.8049732543009975, | |
| "grad_norm": 0.1686207354068756, | |
| "learning_rate": 7.314814814814816e-06, | |
| "loss": 12.5917, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.8095995373716929, | |
| "grad_norm": 0.2805918753147125, | |
| "learning_rate": 7.299382716049383e-06, | |
| "loss": 10.6457, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.8142258204423883, | |
| "grad_norm": 0.28346166014671326, | |
| "learning_rate": 7.283950617283952e-06, | |
| "loss": 11.6686, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.8188521035130837, | |
| "grad_norm": 0.23147298395633698, | |
| "learning_rate": 7.268518518518519e-06, | |
| "loss": 10.316, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.8234783865837791, | |
| "grad_norm": 0.17895270884037018, | |
| "learning_rate": 7.253086419753087e-06, | |
| "loss": 10.5762, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.8281046696544745, | |
| "grad_norm": 0.2260911464691162, | |
| "learning_rate": 7.2376543209876545e-06, | |
| "loss": 10.9989, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.8327309527251698, | |
| "grad_norm": 0.2163412719964981, | |
| "learning_rate": 7.222222222222223e-06, | |
| "loss": 10.7737, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.8373572357958653, | |
| "grad_norm": 0.21451716125011444, | |
| "learning_rate": 7.2067901234567905e-06, | |
| "loss": 12.4478, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.8419835188665606, | |
| "grad_norm": 0.14181958138942719, | |
| "learning_rate": 7.191358024691358e-06, | |
| "loss": 11.1375, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.846609801937256, | |
| "grad_norm": 0.18365442752838135, | |
| "learning_rate": 7.1759259259259266e-06, | |
| "loss": 11.3768, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.8512360850079514, | |
| "grad_norm": 0.20560483634471893, | |
| "learning_rate": 7.160493827160494e-06, | |
| "loss": 10.6342, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.8558623680786468, | |
| "grad_norm": 0.3396760821342468, | |
| "learning_rate": 7.145061728395062e-06, | |
| "loss": 12.9821, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.8604886511493423, | |
| "grad_norm": 0.18169118463993073, | |
| "learning_rate": 7.129629629629629e-06, | |
| "loss": 11.067, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.8651149342200376, | |
| "grad_norm": 0.18200387060642242, | |
| "learning_rate": 7.114197530864199e-06, | |
| "loss": 10.6416, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.869741217290733, | |
| "grad_norm": 0.23793809115886688, | |
| "learning_rate": 7.098765432098766e-06, | |
| "loss": 12.6389, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.8743675003614284, | |
| "grad_norm": 0.6948350667953491, | |
| "learning_rate": 7.083333333333335e-06, | |
| "loss": 11.5131, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.8789937834321238, | |
| "grad_norm": 0.19556772708892822, | |
| "learning_rate": 7.067901234567902e-06, | |
| "loss": 12.9335, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.8836200665028191, | |
| "grad_norm": 0.21277347207069397, | |
| "learning_rate": 7.05246913580247e-06, | |
| "loss": 11.533, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.8882463495735146, | |
| "grad_norm": 0.13241365551948547, | |
| "learning_rate": 7.0370370370370375e-06, | |
| "loss": 11.7588, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.8928726326442099, | |
| "grad_norm": 0.16065774857997894, | |
| "learning_rate": 7.021604938271606e-06, | |
| "loss": 10.907, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.8974989157149053, | |
| "grad_norm": 0.2425934374332428, | |
| "learning_rate": 7.0061728395061735e-06, | |
| "loss": 12.0332, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.9021251987856007, | |
| "grad_norm": 0.32493147253990173, | |
| "learning_rate": 6.990740740740741e-06, | |
| "loss": 11.0481, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.9067514818562961, | |
| "grad_norm": 0.20006415247917175, | |
| "learning_rate": 6.975308641975309e-06, | |
| "loss": 10.3825, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.9113777649269915, | |
| "grad_norm": 0.14633627235889435, | |
| "learning_rate": 6.959876543209877e-06, | |
| "loss": 11.3131, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.9160040479976869, | |
| "grad_norm": 0.1695437729358673, | |
| "learning_rate": 6.944444444444445e-06, | |
| "loss": 11.6805, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.9206303310683822, | |
| "grad_norm": 0.22382433712482452, | |
| "learning_rate": 6.929012345679012e-06, | |
| "loss": 12.4138, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.9252566141390777, | |
| "grad_norm": 0.18652208149433136, | |
| "learning_rate": 6.913580246913581e-06, | |
| "loss": 11.1235, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.929882897209773, | |
| "grad_norm": 0.21566657721996307, | |
| "learning_rate": 6.898148148148148e-06, | |
| "loss": 11.1278, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.9345091802804684, | |
| "grad_norm": 0.17630067467689514, | |
| "learning_rate": 6.882716049382716e-06, | |
| "loss": 12.1383, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.9391354633511638, | |
| "grad_norm": 0.22753016650676727, | |
| "learning_rate": 6.867283950617285e-06, | |
| "loss": 12.0065, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.9437617464218592, | |
| "grad_norm": 0.12988047301769257, | |
| "learning_rate": 6.851851851851853e-06, | |
| "loss": 11.9477, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.9483880294925546, | |
| "grad_norm": 0.2507847547531128, | |
| "learning_rate": 6.8364197530864205e-06, | |
| "loss": 11.5818, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.95301431256325, | |
| "grad_norm": 0.18030938506126404, | |
| "learning_rate": 6.820987654320988e-06, | |
| "loss": 13.1558, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.9576405956339453, | |
| "grad_norm": 0.21600230038166046, | |
| "learning_rate": 6.8055555555555566e-06, | |
| "loss": 10.7038, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.9622668787046408, | |
| "grad_norm": 0.1989142745733261, | |
| "learning_rate": 6.790123456790124e-06, | |
| "loss": 11.9244, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.9668931617753361, | |
| "grad_norm": 0.2517797648906708, | |
| "learning_rate": 6.774691358024692e-06, | |
| "loss": 11.0019, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.9715194448460315, | |
| "grad_norm": 0.24413681030273438, | |
| "learning_rate": 6.75925925925926e-06, | |
| "loss": 12.4322, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.9761457279167269, | |
| "grad_norm": 0.20667782425880432, | |
| "learning_rate": 6.743827160493828e-06, | |
| "loss": 10.6134, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.9807720109874223, | |
| "grad_norm": 0.17336109280586243, | |
| "learning_rate": 6.728395061728395e-06, | |
| "loss": 10.4673, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.9853982940581176, | |
| "grad_norm": 0.17548175156116486, | |
| "learning_rate": 6.712962962962963e-06, | |
| "loss": 10.1377, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.9900245771288131, | |
| "grad_norm": 0.1717563420534134, | |
| "learning_rate": 6.6975308641975314e-06, | |
| "loss": 11.5888, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.9946508601995084, | |
| "grad_norm": 0.1841057986021042, | |
| "learning_rate": 6.682098765432099e-06, | |
| "loss": 12.2213, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.9992771432702039, | |
| "grad_norm": 0.3000975251197815, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 11.046, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.3000975251197815, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 1.4199, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.0046262830706953, | |
| "grad_norm": 0.4262640178203583, | |
| "learning_rate": 6.651234567901235e-06, | |
| "loss": 10.5265, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.0092525661413907, | |
| "grad_norm": 0.21694016456604004, | |
| "learning_rate": 6.635802469135803e-06, | |
| "loss": 10.4657, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.0138788492120863, | |
| "grad_norm": 0.2662133276462555, | |
| "learning_rate": 6.620370370370371e-06, | |
| "loss": 10.7531, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.0185051322827816, | |
| "grad_norm": 0.4052404463291168, | |
| "learning_rate": 6.60493827160494e-06, | |
| "loss": 11.1598, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.023131415353477, | |
| "grad_norm": 0.23900097608566284, | |
| "learning_rate": 6.589506172839507e-06, | |
| "loss": 11.5257, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.0277576984241723, | |
| "grad_norm": 0.22569526731967926, | |
| "learning_rate": 6.574074074074075e-06, | |
| "loss": 11.5564, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 1.0323839814948677, | |
| "grad_norm": 0.29018473625183105, | |
| "learning_rate": 6.558641975308642e-06, | |
| "loss": 11.8655, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.037010264565563, | |
| "grad_norm": 0.2856806516647339, | |
| "learning_rate": 6.543209876543211e-06, | |
| "loss": 11.8046, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.0416365476362586, | |
| "grad_norm": 0.16691464185714722, | |
| "learning_rate": 6.5277777777777784e-06, | |
| "loss": 12.3704, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.046262830706954, | |
| "grad_norm": 0.24121911823749542, | |
| "learning_rate": 6.512345679012346e-06, | |
| "loss": 10.7236, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.0508891137776493, | |
| "grad_norm": 0.205901101231575, | |
| "learning_rate": 6.4969135802469145e-06, | |
| "loss": 10.9316, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.0555153968483446, | |
| "grad_norm": 0.18375830352306366, | |
| "learning_rate": 6.481481481481482e-06, | |
| "loss": 9.422, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.06014167991904, | |
| "grad_norm": 0.2132750004529953, | |
| "learning_rate": 6.46604938271605e-06, | |
| "loss": 12.6065, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.0647679629897355, | |
| "grad_norm": 0.23490285873413086, | |
| "learning_rate": 6.450617283950617e-06, | |
| "loss": 11.2282, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.0693942460604309, | |
| "grad_norm": 0.2604386806488037, | |
| "learning_rate": 6.435185185185186e-06, | |
| "loss": 10.8157, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.0740205291311262, | |
| "grad_norm": 0.21887636184692383, | |
| "learning_rate": 6.419753086419753e-06, | |
| "loss": 10.6996, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.0786468122018216, | |
| "grad_norm": 0.16946089267730713, | |
| "learning_rate": 6.404320987654321e-06, | |
| "loss": 10.3805, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.083273095272517, | |
| "grad_norm": 0.20204631984233856, | |
| "learning_rate": 6.3888888888888885e-06, | |
| "loss": 11.4941, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.0878993783432125, | |
| "grad_norm": 0.202669158577919, | |
| "learning_rate": 6.373456790123458e-06, | |
| "loss": 11.7944, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.0925256614139078, | |
| "grad_norm": 0.22706535458564758, | |
| "learning_rate": 6.358024691358025e-06, | |
| "loss": 12.6031, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.0971519444846032, | |
| "grad_norm": 0.17785371840000153, | |
| "learning_rate": 6.342592592592594e-06, | |
| "loss": 10.5881, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.1017782275552985, | |
| "grad_norm": 0.17540261149406433, | |
| "learning_rate": 6.3271604938271615e-06, | |
| "loss": 10.996, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.1064045106259939, | |
| "grad_norm": 0.22038735449314117, | |
| "learning_rate": 6.311728395061729e-06, | |
| "loss": 10.0293, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.1110307936966892, | |
| "grad_norm": 0.17491741478443146, | |
| "learning_rate": 6.296296296296297e-06, | |
| "loss": 10.0996, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.1156570767673848, | |
| "grad_norm": 0.2827455699443817, | |
| "learning_rate": 6.280864197530865e-06, | |
| "loss": 11.0431, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.1202833598380801, | |
| "grad_norm": 0.21907830238342285, | |
| "learning_rate": 6.265432098765433e-06, | |
| "loss": 10.4707, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.1249096429087755, | |
| "grad_norm": 0.19320356845855713, | |
| "learning_rate": 6.25e-06, | |
| "loss": 10.7269, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.1295359259794708, | |
| "grad_norm": 0.22684165835380554, | |
| "learning_rate": 6.234567901234569e-06, | |
| "loss": 11.3143, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.1341622090501662, | |
| "grad_norm": 0.21541574597358704, | |
| "learning_rate": 6.219135802469136e-06, | |
| "loss": 10.397, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.1387884921208618, | |
| "grad_norm": 0.21086207032203674, | |
| "learning_rate": 6.203703703703704e-06, | |
| "loss": 11.0086, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.143414775191557, | |
| "grad_norm": 0.26652148365974426, | |
| "learning_rate": 6.1882716049382715e-06, | |
| "loss": 11.5252, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.1480410582622524, | |
| "grad_norm": 0.1995311677455902, | |
| "learning_rate": 6.17283950617284e-06, | |
| "loss": 9.3802, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.1526673413329478, | |
| "grad_norm": 0.19715267419815063, | |
| "learning_rate": 6.157407407407408e-06, | |
| "loss": 11.6554, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.1572936244036431, | |
| "grad_norm": 0.16357477009296417, | |
| "learning_rate": 6.141975308641975e-06, | |
| "loss": 10.7531, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.1619199074743385, | |
| "grad_norm": 0.2201809138059616, | |
| "learning_rate": 6.126543209876543e-06, | |
| "loss": 11.4639, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.166546190545034, | |
| "grad_norm": 0.24760210514068604, | |
| "learning_rate": 6.111111111111112e-06, | |
| "loss": 10.1224, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.1711724736157294, | |
| "grad_norm": 0.18471167981624603, | |
| "learning_rate": 6.09567901234568e-06, | |
| "loss": 10.8261, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.1757987566864248, | |
| "grad_norm": 0.20257946848869324, | |
| "learning_rate": 6.080246913580248e-06, | |
| "loss": 9.8945, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.18042503975712, | |
| "grad_norm": 0.15809153020381927, | |
| "learning_rate": 6.064814814814816e-06, | |
| "loss": 10.4489, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.1850513228278154, | |
| "grad_norm": 0.2175232172012329, | |
| "learning_rate": 6.049382716049383e-06, | |
| "loss": 10.6301, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.189677605898511, | |
| "grad_norm": 0.22652743756771088, | |
| "learning_rate": 6.033950617283951e-06, | |
| "loss": 9.6189, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.1943038889692064, | |
| "grad_norm": 0.21281598508358002, | |
| "learning_rate": 6.018518518518519e-06, | |
| "loss": 10.8016, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.1989301720399017, | |
| "grad_norm": 0.20046480000019073, | |
| "learning_rate": 6.003086419753087e-06, | |
| "loss": 10.1304, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.203556455110597, | |
| "grad_norm": 0.19008351862430573, | |
| "learning_rate": 5.9876543209876546e-06, | |
| "loss": 10.6718, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.2081827381812924, | |
| "grad_norm": 0.2159290611743927, | |
| "learning_rate": 5.972222222222222e-06, | |
| "loss": 10.1963, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.2128090212519878, | |
| "grad_norm": 0.1703040450811386, | |
| "learning_rate": 5.956790123456791e-06, | |
| "loss": 10.9394, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.2174353043226833, | |
| "grad_norm": 0.20243868231773376, | |
| "learning_rate": 5.941358024691358e-06, | |
| "loss": 9.7358, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.2220615873933787, | |
| "grad_norm": 0.21622534096240997, | |
| "learning_rate": 5.925925925925926e-06, | |
| "loss": 9.2896, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.226687870464074, | |
| "grad_norm": 0.19408589601516724, | |
| "learning_rate": 5.910493827160494e-06, | |
| "loss": 10.0141, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.2313141535347694, | |
| "grad_norm": 0.19652226567268372, | |
| "learning_rate": 5.895061728395062e-06, | |
| "loss": 10.6724, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.2359404366054647, | |
| "grad_norm": 0.22394828498363495, | |
| "learning_rate": 5.8796296296296295e-06, | |
| "loss": 11.7472, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.2405667196761603, | |
| "grad_norm": 0.18796682357788086, | |
| "learning_rate": 5.864197530864199e-06, | |
| "loss": 11.3051, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.2451930027468556, | |
| "grad_norm": 0.1897086501121521, | |
| "learning_rate": 5.848765432098766e-06, | |
| "loss": 9.9475, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.249819285817551, | |
| "grad_norm": 0.19912078976631165, | |
| "learning_rate": 5.833333333333334e-06, | |
| "loss": 10.6907, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.2544455688882463, | |
| "grad_norm": 0.25889354944229126, | |
| "learning_rate": 5.817901234567902e-06, | |
| "loss": 9.9069, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.2590718519589417, | |
| "grad_norm": 0.19028806686401367, | |
| "learning_rate": 5.80246913580247e-06, | |
| "loss": 10.0748, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.263698135029637, | |
| "grad_norm": 0.1661495566368103, | |
| "learning_rate": 5.787037037037038e-06, | |
| "loss": 10.1342, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.2683244181003326, | |
| "grad_norm": 0.17706620693206787, | |
| "learning_rate": 5.771604938271605e-06, | |
| "loss": 12.4703, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.272950701171028, | |
| "grad_norm": 0.26582854986190796, | |
| "learning_rate": 5.756172839506174e-06, | |
| "loss": 12.6464, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.2775769842417233, | |
| "grad_norm": 0.178712397813797, | |
| "learning_rate": 5.740740740740741e-06, | |
| "loss": 9.6011, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.2822032673124186, | |
| "grad_norm": 0.18910570442676544, | |
| "learning_rate": 5.725308641975309e-06, | |
| "loss": 10.8773, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.2868295503831142, | |
| "grad_norm": 0.24754610657691956, | |
| "learning_rate": 5.7098765432098764e-06, | |
| "loss": 10.8981, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.2914558334538095, | |
| "grad_norm": 0.14676739275455475, | |
| "learning_rate": 5.694444444444445e-06, | |
| "loss": 10.1851, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.296082116524505, | |
| "grad_norm": 0.21573857963085175, | |
| "learning_rate": 5.6790123456790125e-06, | |
| "loss": 10.2565, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.3007083995952002, | |
| "grad_norm": 0.24850860238075256, | |
| "learning_rate": 5.66358024691358e-06, | |
| "loss": 9.9282, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.3053346826658956, | |
| "grad_norm": 0.180882066488266, | |
| "learning_rate": 5.6481481481481485e-06, | |
| "loss": 11.6456, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.309960965736591, | |
| "grad_norm": 0.19305004179477692, | |
| "learning_rate": 5.632716049382716e-06, | |
| "loss": 10.0129, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.3145872488072863, | |
| "grad_norm": 0.18499980866909027, | |
| "learning_rate": 5.617283950617285e-06, | |
| "loss": 10.6136, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.3192135318779818, | |
| "grad_norm": 0.2523798942565918, | |
| "learning_rate": 5.601851851851853e-06, | |
| "loss": 9.1629, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.3238398149486772, | |
| "grad_norm": 0.19359612464904785, | |
| "learning_rate": 5.586419753086421e-06, | |
| "loss": 9.9917, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.3284660980193725, | |
| "grad_norm": 0.15306031703948975, | |
| "learning_rate": 5.570987654320988e-06, | |
| "loss": 9.6058, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.333092381090068, | |
| "grad_norm": 0.2070600688457489, | |
| "learning_rate": 5.555555555555557e-06, | |
| "loss": 10.2776, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.3377186641607635, | |
| "grad_norm": 0.1619105190038681, | |
| "learning_rate": 5.540123456790124e-06, | |
| "loss": 9.1465, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.3423449472314588, | |
| "grad_norm": 0.16202016174793243, | |
| "learning_rate": 5.524691358024692e-06, | |
| "loss": 10.0362, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.3469712303021542, | |
| "grad_norm": 0.19075003266334534, | |
| "learning_rate": 5.5092592592592595e-06, | |
| "loss": 10.0189, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.3515975133728495, | |
| "grad_norm": 0.16484172642230988, | |
| "learning_rate": 5.493827160493828e-06, | |
| "loss": 12.3743, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.3562237964435448, | |
| "grad_norm": 0.2151799499988556, | |
| "learning_rate": 5.4783950617283955e-06, | |
| "loss": 9.9846, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.3608500795142402, | |
| "grad_norm": 0.20363835990428925, | |
| "learning_rate": 5.462962962962963e-06, | |
| "loss": 9.671, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.3654763625849355, | |
| "grad_norm": 0.15196539461612701, | |
| "learning_rate": 5.447530864197531e-06, | |
| "loss": 9.97, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.3701026456556311, | |
| "grad_norm": 0.17592975497245789, | |
| "learning_rate": 5.432098765432099e-06, | |
| "loss": 10.8506, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.3747289287263265, | |
| "grad_norm": 0.20012719929218292, | |
| "learning_rate": 5.416666666666667e-06, | |
| "loss": 10.4503, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.3793552117970218, | |
| "grad_norm": 0.19312864542007446, | |
| "learning_rate": 5.401234567901234e-06, | |
| "loss": 9.7727, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.3839814948677172, | |
| "grad_norm": 0.14697419106960297, | |
| "learning_rate": 5.385802469135803e-06, | |
| "loss": 11.9123, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.3886077779384127, | |
| "grad_norm": 0.15580600500106812, | |
| "learning_rate": 5.370370370370371e-06, | |
| "loss": 10.3442, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.393234061009108, | |
| "grad_norm": 0.19586268067359924, | |
| "learning_rate": 5.354938271604939e-06, | |
| "loss": 8.9558, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.3978603440798034, | |
| "grad_norm": 0.19394423067569733, | |
| "learning_rate": 5.339506172839507e-06, | |
| "loss": 10.8312, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.4024866271504988, | |
| "grad_norm": 0.21832065284252167, | |
| "learning_rate": 5.324074074074075e-06, | |
| "loss": 9.9943, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.4071129102211941, | |
| "grad_norm": 0.16751375794410706, | |
| "learning_rate": 5.3086419753086425e-06, | |
| "loss": 11.3979, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.4117391932918895, | |
| "grad_norm": 0.1763811856508255, | |
| "learning_rate": 5.29320987654321e-06, | |
| "loss": 11.175, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.4163654763625848, | |
| "grad_norm": 0.1916954666376114, | |
| "learning_rate": 5.2777777777777785e-06, | |
| "loss": 8.8938, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.4209917594332804, | |
| "grad_norm": 0.12443282455205917, | |
| "learning_rate": 5.262345679012346e-06, | |
| "loss": 9.6263, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.4256180425039757, | |
| "grad_norm": 0.18852603435516357, | |
| "learning_rate": 5.246913580246914e-06, | |
| "loss": 10.6811, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.430244325574671, | |
| "grad_norm": 0.1854352355003357, | |
| "learning_rate": 5.231481481481482e-06, | |
| "loss": 9.9107, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.4348706086453664, | |
| "grad_norm": 0.2382035106420517, | |
| "learning_rate": 5.21604938271605e-06, | |
| "loss": 10.8001, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.439496891716062, | |
| "grad_norm": 0.18645620346069336, | |
| "learning_rate": 5.200617283950617e-06, | |
| "loss": 9.686, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.4441231747867573, | |
| "grad_norm": 0.1910058856010437, | |
| "learning_rate": 5.185185185185185e-06, | |
| "loss": 10.3407, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.4487494578574527, | |
| "grad_norm": 0.23428039252758026, | |
| "learning_rate": 5.1697530864197534e-06, | |
| "loss": 9.2624, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.453375740928148, | |
| "grad_norm": 0.17926917970180511, | |
| "learning_rate": 5.154320987654321e-06, | |
| "loss": 9.5153, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.4580020239988434, | |
| "grad_norm": 0.22386913001537323, | |
| "learning_rate": 5.138888888888889e-06, | |
| "loss": 10.6948, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.4626283070695387, | |
| "grad_norm": 0.20423167943954468, | |
| "learning_rate": 5.123456790123458e-06, | |
| "loss": 10.3966, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.467254590140234, | |
| "grad_norm": 0.15411533415317535, | |
| "learning_rate": 5.1080246913580255e-06, | |
| "loss": 10.2981, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.4718808732109296, | |
| "grad_norm": 0.11654549837112427, | |
| "learning_rate": 5.092592592592593e-06, | |
| "loss": 10.1807, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.476507156281625, | |
| "grad_norm": 0.20476582646369934, | |
| "learning_rate": 5.0771604938271616e-06, | |
| "loss": 9.9201, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.4811334393523203, | |
| "grad_norm": 0.19140280783176422, | |
| "learning_rate": 5.061728395061729e-06, | |
| "loss": 10.3292, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.4857597224230157, | |
| "grad_norm": 0.16844482719898224, | |
| "learning_rate": 5.046296296296297e-06, | |
| "loss": 10.0726, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.4903860054937113, | |
| "grad_norm": 0.22298437356948853, | |
| "learning_rate": 5.030864197530864e-06, | |
| "loss": 10.9447, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.4950122885644066, | |
| "grad_norm": 0.1775355190038681, | |
| "learning_rate": 5.015432098765433e-06, | |
| "loss": 12.1205, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.499638571635102, | |
| "grad_norm": 0.198349267244339, | |
| "learning_rate": 5e-06, | |
| "loss": 9.0277, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.5042648547057973, | |
| "grad_norm": 0.16838030517101288, | |
| "learning_rate": 4.984567901234568e-06, | |
| "loss": 10.3551, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.5088911377764926, | |
| "grad_norm": 0.15806855261325836, | |
| "learning_rate": 4.9691358024691365e-06, | |
| "loss": 10.0048, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.513517420847188, | |
| "grad_norm": 0.21915429830551147, | |
| "learning_rate": 4.953703703703704e-06, | |
| "loss": 10.554, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.5181437039178833, | |
| "grad_norm": 0.23262116312980652, | |
| "learning_rate": 4.938271604938272e-06, | |
| "loss": 10.8198, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.522769986988579, | |
| "grad_norm": 0.18927887082099915, | |
| "learning_rate": 4.92283950617284e-06, | |
| "loss": 10.7398, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.5273962700592743, | |
| "grad_norm": 0.138357013463974, | |
| "learning_rate": 4.907407407407408e-06, | |
| "loss": 9.8479, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.5320225531299696, | |
| "grad_norm": 0.20223885774612427, | |
| "learning_rate": 4.891975308641976e-06, | |
| "loss": 11.1796, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.5366488362006652, | |
| "grad_norm": 0.17738379538059235, | |
| "learning_rate": 4.876543209876544e-06, | |
| "loss": 9.1805, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.5412751192713605, | |
| "grad_norm": 0.18442702293395996, | |
| "learning_rate": 4.861111111111111e-06, | |
| "loss": 8.4714, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.5459014023420559, | |
| "grad_norm": 0.19169731438159943, | |
| "learning_rate": 4.845679012345679e-06, | |
| "loss": 9.1503, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.5505276854127512, | |
| "grad_norm": 0.19981014728546143, | |
| "learning_rate": 4.830246913580247e-06, | |
| "loss": 9.6427, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.5551539684834466, | |
| "grad_norm": 0.1916283518075943, | |
| "learning_rate": 4.814814814814815e-06, | |
| "loss": 8.8991, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.559780251554142, | |
| "grad_norm": 0.14049910008907318, | |
| "learning_rate": 4.7993827160493834e-06, | |
| "loss": 9.2888, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.5644065346248373, | |
| "grad_norm": 0.14010216295719147, | |
| "learning_rate": 4.783950617283951e-06, | |
| "loss": 10.4533, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.5690328176955326, | |
| "grad_norm": 0.1919959932565689, | |
| "learning_rate": 4.768518518518519e-06, | |
| "loss": 10.149, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.5736591007662282, | |
| "grad_norm": 0.1598203033208847, | |
| "learning_rate": 4.753086419753087e-06, | |
| "loss": 11.1529, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.5782853838369235, | |
| "grad_norm": 0.19053767621517181, | |
| "learning_rate": 4.737654320987655e-06, | |
| "loss": 10.5543, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.5829116669076189, | |
| "grad_norm": 0.21635524928569794, | |
| "learning_rate": 4.722222222222222e-06, | |
| "loss": 8.7965, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.5875379499783144, | |
| "grad_norm": 0.2088153064250946, | |
| "learning_rate": 4.706790123456791e-06, | |
| "loss": 9.1113, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.5921642330490098, | |
| "grad_norm": 0.1993483006954193, | |
| "learning_rate": 4.691358024691358e-06, | |
| "loss": 9.7291, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.5967905161197051, | |
| "grad_norm": 0.24835029244422913, | |
| "learning_rate": 4.675925925925927e-06, | |
| "loss": 9.5215, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.6014167991904005, | |
| "grad_norm": 0.24942326545715332, | |
| "learning_rate": 4.660493827160494e-06, | |
| "loss": 9.3023, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.6060430822610958, | |
| "grad_norm": 0.18829648196697235, | |
| "learning_rate": 4.645061728395062e-06, | |
| "loss": 9.6869, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.6106693653317912, | |
| "grad_norm": 0.17725762724876404, | |
| "learning_rate": 4.62962962962963e-06, | |
| "loss": 10.0154, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.6152956484024865, | |
| "grad_norm": 0.145020991563797, | |
| "learning_rate": 4.614197530864198e-06, | |
| "loss": 10.6015, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.6199219314731819, | |
| "grad_norm": 0.12719136476516724, | |
| "learning_rate": 4.598765432098766e-06, | |
| "loss": 9.9244, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.6245482145438774, | |
| "grad_norm": 0.1771680861711502, | |
| "learning_rate": 4.583333333333333e-06, | |
| "loss": 9.4468, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.6291744976145728, | |
| "grad_norm": 0.1684349775314331, | |
| "learning_rate": 4.567901234567902e-06, | |
| "loss": 11.5601, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.6338007806852681, | |
| "grad_norm": 0.1284870207309723, | |
| "learning_rate": 4.55246913580247e-06, | |
| "loss": 9.5658, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.6384270637559637, | |
| "grad_norm": 0.15432706475257874, | |
| "learning_rate": 4.537037037037038e-06, | |
| "loss": 10.2547, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.643053346826659, | |
| "grad_norm": 0.19519680738449097, | |
| "learning_rate": 4.521604938271605e-06, | |
| "loss": 9.2126, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.6476796298973544, | |
| "grad_norm": 0.1388140469789505, | |
| "learning_rate": 4.506172839506173e-06, | |
| "loss": 9.9578, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.6523059129680497, | |
| "grad_norm": 0.19714529812335968, | |
| "learning_rate": 4.490740740740741e-06, | |
| "loss": 11.7677, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.656932196038745, | |
| "grad_norm": 0.22792014479637146, | |
| "learning_rate": 4.475308641975309e-06, | |
| "loss": 9.6164, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.6615584791094404, | |
| "grad_norm": 0.1519528478384018, | |
| "learning_rate": 4.4598765432098765e-06, | |
| "loss": 9.4579, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.6661847621801358, | |
| "grad_norm": 0.17547698318958282, | |
| "learning_rate": 4.444444444444444e-06, | |
| "loss": 8.2953, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.6708110452508311, | |
| "grad_norm": 0.2348184585571289, | |
| "learning_rate": 4.429012345679013e-06, | |
| "loss": 9.8739, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.6754373283215267, | |
| "grad_norm": 0.16753868758678436, | |
| "learning_rate": 4.413580246913581e-06, | |
| "loss": 9.8006, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.680063611392222, | |
| "grad_norm": 0.16872891783714294, | |
| "learning_rate": 4.398148148148149e-06, | |
| "loss": 10.2161, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.6846898944629176, | |
| "grad_norm": 0.18189671635627747, | |
| "learning_rate": 4.382716049382716e-06, | |
| "loss": 9.5235, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.689316177533613, | |
| "grad_norm": 0.12333816289901733, | |
| "learning_rate": 4.367283950617285e-06, | |
| "loss": 10.2188, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.6939424606043083, | |
| "grad_norm": 0.22701486945152283, | |
| "learning_rate": 4.351851851851852e-06, | |
| "loss": 10.7382, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.6985687436750037, | |
| "grad_norm": 0.20510244369506836, | |
| "learning_rate": 4.33641975308642e-06, | |
| "loss": 8.7558, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.703195026745699, | |
| "grad_norm": 0.1589343398809433, | |
| "learning_rate": 4.3209876543209875e-06, | |
| "loss": 9.924, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.7078213098163944, | |
| "grad_norm": 0.1433723270893097, | |
| "learning_rate": 4.305555555555556e-06, | |
| "loss": 11.0008, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.7124475928870897, | |
| "grad_norm": 0.17607755959033966, | |
| "learning_rate": 4.290123456790124e-06, | |
| "loss": 10.0096, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.717073875957785, | |
| "grad_norm": 0.16202040016651154, | |
| "learning_rate": 4.274691358024692e-06, | |
| "loss": 9.9295, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.7217001590284804, | |
| "grad_norm": 0.21179550886154175, | |
| "learning_rate": 4.2592592592592596e-06, | |
| "loss": 8.9856, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.726326442099176, | |
| "grad_norm": 0.35698196291923523, | |
| "learning_rate": 4.243827160493827e-06, | |
| "loss": 9.8793, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.7309527251698713, | |
| "grad_norm": 0.18081314861774445, | |
| "learning_rate": 4.228395061728396e-06, | |
| "loss": 10.8514, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.7355790082405669, | |
| "grad_norm": 0.19385862350463867, | |
| "learning_rate": 4.212962962962963e-06, | |
| "loss": 8.6034, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.7402052913112622, | |
| "grad_norm": 0.17623740434646606, | |
| "learning_rate": 4.197530864197531e-06, | |
| "loss": 10.7457, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.7448315743819576, | |
| "grad_norm": 0.1551784873008728, | |
| "learning_rate": 4.182098765432099e-06, | |
| "loss": 9.2378, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.749457857452653, | |
| "grad_norm": 0.15923646092414856, | |
| "learning_rate": 4.166666666666667e-06, | |
| "loss": 10.0731, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.7540841405233483, | |
| "grad_norm": 0.16185562312602997, | |
| "learning_rate": 4.151234567901235e-06, | |
| "loss": 8.9162, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.7587104235940436, | |
| "grad_norm": 0.23534594476222992, | |
| "learning_rate": 4.135802469135803e-06, | |
| "loss": 9.3805, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.763336706664739, | |
| "grad_norm": 0.17162498831748962, | |
| "learning_rate": 4.1203703703703705e-06, | |
| "loss": 10.0609, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.7679629897354343, | |
| "grad_norm": 0.15775223076343536, | |
| "learning_rate": 4.104938271604938e-06, | |
| "loss": 9.3534, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.7725892728061299, | |
| "grad_norm": 0.2251134216785431, | |
| "learning_rate": 4.0895061728395066e-06, | |
| "loss": 9.746, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.7772155558768252, | |
| "grad_norm": 0.19613635540008545, | |
| "learning_rate": 4.074074074074074e-06, | |
| "loss": 10.42, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.7818418389475206, | |
| "grad_norm": 0.16256873309612274, | |
| "learning_rate": 4.058641975308643e-06, | |
| "loss": 9.4703, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.7864681220182161, | |
| "grad_norm": 0.16490335762500763, | |
| "learning_rate": 4.04320987654321e-06, | |
| "loss": 9.5418, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.7910944050889115, | |
| "grad_norm": 0.159736767411232, | |
| "learning_rate": 4.027777777777779e-06, | |
| "loss": 10.1403, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.7957206881596068, | |
| "grad_norm": 0.1494799256324768, | |
| "learning_rate": 4.012345679012346e-06, | |
| "loss": 9.1583, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.8003469712303022, | |
| "grad_norm": 0.19815048575401306, | |
| "learning_rate": 3.996913580246914e-06, | |
| "loss": 10.2705, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.8049732543009975, | |
| "grad_norm": 0.17572833597660065, | |
| "learning_rate": 3.9814814814814814e-06, | |
| "loss": 9.4608, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.8095995373716929, | |
| "grad_norm": 0.23244017362594604, | |
| "learning_rate": 3.96604938271605e-06, | |
| "loss": 10.6273, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.8142258204423882, | |
| "grad_norm": 0.17120610177516937, | |
| "learning_rate": 3.9506172839506175e-06, | |
| "loss": 10.0444, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.8188521035130836, | |
| "grad_norm": 0.27220579981803894, | |
| "learning_rate": 3.935185185185186e-06, | |
| "loss": 10.64, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.8234783865837791, | |
| "grad_norm": 0.1648477017879486, | |
| "learning_rate": 3.9197530864197535e-06, | |
| "loss": 9.6264, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.8281046696544745, | |
| "grad_norm": 0.15771055221557617, | |
| "learning_rate": 3.904320987654321e-06, | |
| "loss": 10.5564, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.8327309527251698, | |
| "grad_norm": 0.21713097393512726, | |
| "learning_rate": 3.88888888888889e-06, | |
| "loss": 10.6551, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.8373572357958654, | |
| "grad_norm": 0.17076119780540466, | |
| "learning_rate": 3.873456790123457e-06, | |
| "loss": 11.1632, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.8419835188665608, | |
| "grad_norm": 0.160685196518898, | |
| "learning_rate": 3.858024691358025e-06, | |
| "loss": 9.4666, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.846609801937256, | |
| "grad_norm": 0.17023253440856934, | |
| "learning_rate": 3.842592592592592e-06, | |
| "loss": 9.8118, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.8512360850079514, | |
| "grad_norm": 0.2186374068260193, | |
| "learning_rate": 3.827160493827161e-06, | |
| "loss": 10.4903, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.8558623680786468, | |
| "grad_norm": 0.17954406142234802, | |
| "learning_rate": 3.811728395061729e-06, | |
| "loss": 11.3069, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.8604886511493421, | |
| "grad_norm": 0.2778474986553192, | |
| "learning_rate": 3.796296296296297e-06, | |
| "loss": 10.5854, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.8651149342200375, | |
| "grad_norm": 0.27708959579467773, | |
| "learning_rate": 3.7808641975308645e-06, | |
| "loss": 10.2884, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.8697412172907328, | |
| "grad_norm": 0.14399868249893188, | |
| "learning_rate": 3.7654320987654325e-06, | |
| "loss": 10.1108, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.8743675003614284, | |
| "grad_norm": 0.19901257753372192, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "loss": 10.2892, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.8789937834321238, | |
| "grad_norm": 0.21875345706939697, | |
| "learning_rate": 3.734567901234568e-06, | |
| "loss": 9.0521, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.883620066502819, | |
| "grad_norm": 0.14913184940814972, | |
| "learning_rate": 3.719135802469136e-06, | |
| "loss": 10.5905, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.8882463495735147, | |
| "grad_norm": 0.22289720177650452, | |
| "learning_rate": 3.7037037037037037e-06, | |
| "loss": 9.1175, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.89287263264421, | |
| "grad_norm": 0.16322648525238037, | |
| "learning_rate": 3.6882716049382718e-06, | |
| "loss": 10.1569, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.8974989157149054, | |
| "grad_norm": 0.13389474153518677, | |
| "learning_rate": 3.67283950617284e-06, | |
| "loss": 9.7421, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.9021251987856007, | |
| "grad_norm": 0.17904877662658691, | |
| "learning_rate": 3.657407407407408e-06, | |
| "loss": 10.306, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.906751481856296, | |
| "grad_norm": 0.22490617632865906, | |
| "learning_rate": 3.641975308641976e-06, | |
| "loss": 10.0275, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.9113777649269914, | |
| "grad_norm": 0.20119769871234894, | |
| "learning_rate": 3.6265432098765434e-06, | |
| "loss": 9.5942, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.9160040479976868, | |
| "grad_norm": 0.156976118683815, | |
| "learning_rate": 3.6111111111111115e-06, | |
| "loss": 10.3435, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.920630331068382, | |
| "grad_norm": 0.13303594291210175, | |
| "learning_rate": 3.595679012345679e-06, | |
| "loss": 8.8616, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.9252566141390777, | |
| "grad_norm": 0.21186932921409607, | |
| "learning_rate": 3.580246913580247e-06, | |
| "loss": 9.8981, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.929882897209773, | |
| "grad_norm": 0.20669791102409363, | |
| "learning_rate": 3.5648148148148147e-06, | |
| "loss": 10.3689, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.9345091802804684, | |
| "grad_norm": 0.20095276832580566, | |
| "learning_rate": 3.549382716049383e-06, | |
| "loss": 9.136, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.939135463351164, | |
| "grad_norm": 0.16348451375961304, | |
| "learning_rate": 3.533950617283951e-06, | |
| "loss": 10.4419, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.9437617464218593, | |
| "grad_norm": 0.16439932584762573, | |
| "learning_rate": 3.5185185185185187e-06, | |
| "loss": 10.0692, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.9483880294925546, | |
| "grad_norm": 0.18562501668930054, | |
| "learning_rate": 3.5030864197530868e-06, | |
| "loss": 10.4452, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.95301431256325, | |
| "grad_norm": 0.15668661892414093, | |
| "learning_rate": 3.4876543209876544e-06, | |
| "loss": 9.1397, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.9576405956339453, | |
| "grad_norm": 0.129827082157135, | |
| "learning_rate": 3.4722222222222224e-06, | |
| "loss": 8.428, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.9622668787046407, | |
| "grad_norm": 0.14903448522090912, | |
| "learning_rate": 3.4567901234567904e-06, | |
| "loss": 9.0113, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.966893161775336, | |
| "grad_norm": 0.2033424973487854, | |
| "learning_rate": 3.441358024691358e-06, | |
| "loss": 8.6822, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.9715194448460314, | |
| "grad_norm": 0.1724868267774582, | |
| "learning_rate": 3.4259259259259265e-06, | |
| "loss": 9.3924, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.976145727916727, | |
| "grad_norm": 0.1788475513458252, | |
| "learning_rate": 3.410493827160494e-06, | |
| "loss": 7.8849, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.9807720109874223, | |
| "grad_norm": 0.13475210964679718, | |
| "learning_rate": 3.395061728395062e-06, | |
| "loss": 10.321, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.9853982940581176, | |
| "grad_norm": 0.15269149839878082, | |
| "learning_rate": 3.37962962962963e-06, | |
| "loss": 9.5632, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.9900245771288132, | |
| "grad_norm": 0.20584136247634888, | |
| "learning_rate": 3.3641975308641977e-06, | |
| "loss": 9.0406, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.9946508601995085, | |
| "grad_norm": 0.1267031729221344, | |
| "learning_rate": 3.3487654320987657e-06, | |
| "loss": 9.1269, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.999277143270204, | |
| "grad_norm": 0.20229946076869965, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 9.1644, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.20229946076869965, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 1.6753, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 2.0046262830706953, | |
| "grad_norm": 0.19674813747406006, | |
| "learning_rate": 3.3179012345679013e-06, | |
| "loss": 8.9472, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 2.0092525661413907, | |
| "grad_norm": 0.17176903784275055, | |
| "learning_rate": 3.30246913580247e-06, | |
| "loss": 8.1618, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 2.013878849212086, | |
| "grad_norm": 0.21751543879508972, | |
| "learning_rate": 3.2870370370370374e-06, | |
| "loss": 11.298, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 2.0185051322827814, | |
| "grad_norm": 0.15177738666534424, | |
| "learning_rate": 3.2716049382716054e-06, | |
| "loss": 8.9182, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 2.0231314153534767, | |
| "grad_norm": 0.13782791793346405, | |
| "learning_rate": 3.256172839506173e-06, | |
| "loss": 9.138, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 2.0277576984241725, | |
| "grad_norm": 0.1871718168258667, | |
| "learning_rate": 3.240740740740741e-06, | |
| "loss": 9.3935, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.032383981494868, | |
| "grad_norm": 0.13550569117069244, | |
| "learning_rate": 3.2253086419753086e-06, | |
| "loss": 9.4065, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 2.0370102645655632, | |
| "grad_norm": 0.1414715200662613, | |
| "learning_rate": 3.2098765432098767e-06, | |
| "loss": 10.0772, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 2.0416365476362586, | |
| "grad_norm": 0.16839131712913513, | |
| "learning_rate": 3.1944444444444443e-06, | |
| "loss": 8.4968, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 2.046262830706954, | |
| "grad_norm": 0.181600421667099, | |
| "learning_rate": 3.1790123456790127e-06, | |
| "loss": 8.0076, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 2.0508891137776493, | |
| "grad_norm": 0.1106899231672287, | |
| "learning_rate": 3.1635802469135807e-06, | |
| "loss": 8.5825, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 2.0555153968483446, | |
| "grad_norm": 0.17763055860996246, | |
| "learning_rate": 3.1481481481481483e-06, | |
| "loss": 9.2981, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 2.06014167991904, | |
| "grad_norm": 0.19595400989055634, | |
| "learning_rate": 3.1327160493827164e-06, | |
| "loss": 10.5243, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 2.0647679629897353, | |
| "grad_norm": 0.18254916369915009, | |
| "learning_rate": 3.1172839506172844e-06, | |
| "loss": 10.9755, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 2.0693942460604307, | |
| "grad_norm": 0.18021439015865326, | |
| "learning_rate": 3.101851851851852e-06, | |
| "loss": 9.3634, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 2.074020529131126, | |
| "grad_norm": 0.20369423925876617, | |
| "learning_rate": 3.08641975308642e-06, | |
| "loss": 10.1676, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.078646812201822, | |
| "grad_norm": 0.1611659973859787, | |
| "learning_rate": 3.0709876543209876e-06, | |
| "loss": 10.5072, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 2.083273095272517, | |
| "grad_norm": 0.16623584926128387, | |
| "learning_rate": 3.055555555555556e-06, | |
| "loss": 8.6923, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 2.0878993783432125, | |
| "grad_norm": 0.20521709322929382, | |
| "learning_rate": 3.040123456790124e-06, | |
| "loss": 10.3398, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 2.092525661413908, | |
| "grad_norm": 0.150822713971138, | |
| "learning_rate": 3.0246913580246917e-06, | |
| "loss": 8.1734, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 2.097151944484603, | |
| "grad_norm": 0.14610332250595093, | |
| "learning_rate": 3.0092592592592597e-06, | |
| "loss": 8.0033, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 2.1017782275552985, | |
| "grad_norm": 0.16596634685993195, | |
| "learning_rate": 2.9938271604938273e-06, | |
| "loss": 9.923, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 2.106404510625994, | |
| "grad_norm": 0.15604960918426514, | |
| "learning_rate": 2.9783950617283953e-06, | |
| "loss": 9.5128, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 2.1110307936966892, | |
| "grad_norm": 0.1488286554813385, | |
| "learning_rate": 2.962962962962963e-06, | |
| "loss": 8.704, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 2.1156570767673846, | |
| "grad_norm": 0.1483355164527893, | |
| "learning_rate": 2.947530864197531e-06, | |
| "loss": 8.4275, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 2.12028335983808, | |
| "grad_norm": 0.2545453906059265, | |
| "learning_rate": 2.9320987654320994e-06, | |
| "loss": 9.332, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.1249096429087757, | |
| "grad_norm": 0.18272359669208527, | |
| "learning_rate": 2.916666666666667e-06, | |
| "loss": 9.4908, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 2.129535925979471, | |
| "grad_norm": 0.19491931796073914, | |
| "learning_rate": 2.901234567901235e-06, | |
| "loss": 8.5668, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 2.1341622090501664, | |
| "grad_norm": 0.171804741024971, | |
| "learning_rate": 2.8858024691358026e-06, | |
| "loss": 8.9085, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 2.1387884921208618, | |
| "grad_norm": 0.22477325797080994, | |
| "learning_rate": 2.8703703703703706e-06, | |
| "loss": 9.5643, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 2.143414775191557, | |
| "grad_norm": 0.2512359023094177, | |
| "learning_rate": 2.8549382716049382e-06, | |
| "loss": 10.6236, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 2.1480410582622524, | |
| "grad_norm": 0.15513500571250916, | |
| "learning_rate": 2.8395061728395062e-06, | |
| "loss": 9.3385, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 2.152667341332948, | |
| "grad_norm": 0.18065772950649261, | |
| "learning_rate": 2.8240740740740743e-06, | |
| "loss": 9.3942, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 2.157293624403643, | |
| "grad_norm": 0.19600172340869904, | |
| "learning_rate": 2.8086419753086423e-06, | |
| "loss": 9.0732, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 2.1619199074743385, | |
| "grad_norm": 0.17671746015548706, | |
| "learning_rate": 2.7932098765432103e-06, | |
| "loss": 10.023, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 2.166546190545034, | |
| "grad_norm": 0.16406480967998505, | |
| "learning_rate": 2.7777777777777783e-06, | |
| "loss": 9.8105, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.171172473615729, | |
| "grad_norm": 0.11806418746709824, | |
| "learning_rate": 2.762345679012346e-06, | |
| "loss": 8.2566, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 2.175798756686425, | |
| "grad_norm": 0.32428812980651855, | |
| "learning_rate": 2.746913580246914e-06, | |
| "loss": 9.1191, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 2.1804250397571203, | |
| "grad_norm": 0.22312945127487183, | |
| "learning_rate": 2.7314814814814816e-06, | |
| "loss": 9.9265, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 2.1850513228278157, | |
| "grad_norm": 0.32287222146987915, | |
| "learning_rate": 2.7160493827160496e-06, | |
| "loss": 10.6421, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 2.189677605898511, | |
| "grad_norm": 0.15497097373008728, | |
| "learning_rate": 2.700617283950617e-06, | |
| "loss": 8.9862, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 2.1943038889692064, | |
| "grad_norm": 0.13556216657161713, | |
| "learning_rate": 2.6851851851851856e-06, | |
| "loss": 8.658, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 2.1989301720399017, | |
| "grad_norm": 0.17666535079479218, | |
| "learning_rate": 2.6697530864197536e-06, | |
| "loss": 10.2978, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 2.203556455110597, | |
| "grad_norm": 0.16342324018478394, | |
| "learning_rate": 2.6543209876543212e-06, | |
| "loss": 8.4887, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 2.2081827381812924, | |
| "grad_norm": 0.2034367471933365, | |
| "learning_rate": 2.6388888888888893e-06, | |
| "loss": 7.8828, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 2.2128090212519878, | |
| "grad_norm": 0.23170992732048035, | |
| "learning_rate": 2.623456790123457e-06, | |
| "loss": 9.6533, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.217435304322683, | |
| "grad_norm": 0.16840022802352905, | |
| "learning_rate": 2.608024691358025e-06, | |
| "loss": 8.1141, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 2.2220615873933784, | |
| "grad_norm": 0.1619078814983368, | |
| "learning_rate": 2.5925925925925925e-06, | |
| "loss": 8.6294, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 2.2266878704640742, | |
| "grad_norm": 0.22204962372779846, | |
| "learning_rate": 2.5771604938271605e-06, | |
| "loss": 10.4391, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 2.2313141535347696, | |
| "grad_norm": 0.1876753270626068, | |
| "learning_rate": 2.561728395061729e-06, | |
| "loss": 9.9287, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 2.235940436605465, | |
| "grad_norm": 0.18601296842098236, | |
| "learning_rate": 2.5462962962962966e-06, | |
| "loss": 9.3439, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 2.2405667196761603, | |
| "grad_norm": 0.17974725365638733, | |
| "learning_rate": 2.5308641975308646e-06, | |
| "loss": 9.6812, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 2.2451930027468556, | |
| "grad_norm": 0.1832571029663086, | |
| "learning_rate": 2.515432098765432e-06, | |
| "loss": 7.6883, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 2.249819285817551, | |
| "grad_norm": 0.18652378022670746, | |
| "learning_rate": 2.5e-06, | |
| "loss": 8.9935, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 2.2544455688882463, | |
| "grad_norm": 0.20331954956054688, | |
| "learning_rate": 2.4845679012345682e-06, | |
| "loss": 9.1709, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 2.2590718519589417, | |
| "grad_norm": 0.15439340472221375, | |
| "learning_rate": 2.469135802469136e-06, | |
| "loss": 9.0578, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.263698135029637, | |
| "grad_norm": 0.16174978017807007, | |
| "learning_rate": 2.453703703703704e-06, | |
| "loss": 9.2311, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 2.2683244181003324, | |
| "grad_norm": 0.20344924926757812, | |
| "learning_rate": 2.438271604938272e-06, | |
| "loss": 9.8727, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 2.2729507011710277, | |
| "grad_norm": 0.19355060160160065, | |
| "learning_rate": 2.4228395061728395e-06, | |
| "loss": 9.3433, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 2.2775769842417235, | |
| "grad_norm": 0.17609727382659912, | |
| "learning_rate": 2.4074074074074075e-06, | |
| "loss": 8.8289, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 2.282203267312419, | |
| "grad_norm": 0.16068409383296967, | |
| "learning_rate": 2.3919753086419755e-06, | |
| "loss": 9.1044, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 2.286829550383114, | |
| "grad_norm": 0.15089063346385956, | |
| "learning_rate": 2.3765432098765435e-06, | |
| "loss": 8.8363, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 2.2914558334538095, | |
| "grad_norm": 0.3163600265979767, | |
| "learning_rate": 2.361111111111111e-06, | |
| "loss": 9.3244, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 2.296082116524505, | |
| "grad_norm": 0.18847279250621796, | |
| "learning_rate": 2.345679012345679e-06, | |
| "loss": 9.1338, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 2.3007083995952002, | |
| "grad_norm": 0.23115108907222748, | |
| "learning_rate": 2.330246913580247e-06, | |
| "loss": 9.6425, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 2.3053346826658956, | |
| "grad_norm": 0.15251374244689941, | |
| "learning_rate": 2.314814814814815e-06, | |
| "loss": 9.6591, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.309960965736591, | |
| "grad_norm": 0.1706661432981491, | |
| "learning_rate": 2.299382716049383e-06, | |
| "loss": 10.3708, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 2.3145872488072863, | |
| "grad_norm": 0.16492144763469696, | |
| "learning_rate": 2.283950617283951e-06, | |
| "loss": 8.8977, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 2.3192135318779816, | |
| "grad_norm": 0.15373440086841583, | |
| "learning_rate": 2.268518518518519e-06, | |
| "loss": 10.9027, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 2.323839814948677, | |
| "grad_norm": 0.18270155787467957, | |
| "learning_rate": 2.2530864197530865e-06, | |
| "loss": 9.8633, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 2.3284660980193728, | |
| "grad_norm": 0.22537460923194885, | |
| "learning_rate": 2.2376543209876545e-06, | |
| "loss": 8.6262, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 2.333092381090068, | |
| "grad_norm": 0.1788664162158966, | |
| "learning_rate": 2.222222222222222e-06, | |
| "loss": 8.3806, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 2.3377186641607635, | |
| "grad_norm": 0.2230851948261261, | |
| "learning_rate": 2.2067901234567905e-06, | |
| "loss": 9.6656, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 2.342344947231459, | |
| "grad_norm": 0.157254159450531, | |
| "learning_rate": 2.191358024691358e-06, | |
| "loss": 9.0586, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 2.346971230302154, | |
| "grad_norm": 0.21109546720981598, | |
| "learning_rate": 2.175925925925926e-06, | |
| "loss": 10.4577, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 2.3515975133728495, | |
| "grad_norm": 0.16909867525100708, | |
| "learning_rate": 2.1604938271604937e-06, | |
| "loss": 9.5222, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.356223796443545, | |
| "grad_norm": 0.1652510166168213, | |
| "learning_rate": 2.145061728395062e-06, | |
| "loss": 9.5384, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 2.36085007951424, | |
| "grad_norm": 0.14154984056949615, | |
| "learning_rate": 2.1296296296296298e-06, | |
| "loss": 9.1477, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.3654763625849355, | |
| "grad_norm": 0.18620611727237701, | |
| "learning_rate": 2.114197530864198e-06, | |
| "loss": 9.8372, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 2.370102645655631, | |
| "grad_norm": 0.17792826890945435, | |
| "learning_rate": 2.0987654320987654e-06, | |
| "loss": 9.4475, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 2.3747289287263262, | |
| "grad_norm": 0.17278362810611725, | |
| "learning_rate": 2.0833333333333334e-06, | |
| "loss": 8.2241, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 2.379355211797022, | |
| "grad_norm": 0.17132383584976196, | |
| "learning_rate": 2.0679012345679015e-06, | |
| "loss": 10.9298, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 2.3839814948677174, | |
| "grad_norm": 0.15168847143650055, | |
| "learning_rate": 2.052469135802469e-06, | |
| "loss": 9.0072, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 2.3886077779384127, | |
| "grad_norm": 0.186196431517601, | |
| "learning_rate": 2.037037037037037e-06, | |
| "loss": 9.0923, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 2.393234061009108, | |
| "grad_norm": 0.12113353610038757, | |
| "learning_rate": 2.021604938271605e-06, | |
| "loss": 10.696, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 2.3978603440798034, | |
| "grad_norm": 0.25200334191322327, | |
| "learning_rate": 2.006172839506173e-06, | |
| "loss": 8.5775, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.4024866271504988, | |
| "grad_norm": 0.1717706322669983, | |
| "learning_rate": 1.9907407407407407e-06, | |
| "loss": 9.8223, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 2.407112910221194, | |
| "grad_norm": 0.21262796223163605, | |
| "learning_rate": 1.9753086419753087e-06, | |
| "loss": 9.1006, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 2.4117391932918895, | |
| "grad_norm": 0.15240874886512756, | |
| "learning_rate": 1.9598765432098768e-06, | |
| "loss": 8.8133, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 2.416365476362585, | |
| "grad_norm": 0.20154468715190887, | |
| "learning_rate": 1.944444444444445e-06, | |
| "loss": 8.9349, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 2.42099175943328, | |
| "grad_norm": 0.2465580701828003, | |
| "learning_rate": 1.9290123456790124e-06, | |
| "loss": 9.7463, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 2.4256180425039755, | |
| "grad_norm": 0.262588769197464, | |
| "learning_rate": 1.9135802469135804e-06, | |
| "loss": 8.9882, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 2.4302443255746713, | |
| "grad_norm": 0.1886514276266098, | |
| "learning_rate": 1.8981481481481484e-06, | |
| "loss": 10.1046, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 2.4348706086453666, | |
| "grad_norm": 0.15457630157470703, | |
| "learning_rate": 1.8827160493827162e-06, | |
| "loss": 9.1876, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 2.439496891716062, | |
| "grad_norm": 0.18536067008972168, | |
| "learning_rate": 1.867283950617284e-06, | |
| "loss": 8.9485, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 2.4441231747867573, | |
| "grad_norm": 0.16247576475143433, | |
| "learning_rate": 1.8518518518518519e-06, | |
| "loss": 9.7982, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.4487494578574527, | |
| "grad_norm": 0.13770771026611328, | |
| "learning_rate": 1.83641975308642e-06, | |
| "loss": 9.9136, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 2.453375740928148, | |
| "grad_norm": 0.1654641479253769, | |
| "learning_rate": 1.820987654320988e-06, | |
| "loss": 9.1883, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 2.4580020239988434, | |
| "grad_norm": 0.17362841963768005, | |
| "learning_rate": 1.8055555555555557e-06, | |
| "loss": 9.0925, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 2.4626283070695387, | |
| "grad_norm": 0.15451891720294952, | |
| "learning_rate": 1.7901234567901235e-06, | |
| "loss": 9.477, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 2.467254590140234, | |
| "grad_norm": 0.14848832786083221, | |
| "learning_rate": 1.7746913580246916e-06, | |
| "loss": 10.4986, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 2.4718808732109294, | |
| "grad_norm": 0.18573352694511414, | |
| "learning_rate": 1.7592592592592594e-06, | |
| "loss": 9.2772, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 2.4765071562816248, | |
| "grad_norm": 0.16295011341571808, | |
| "learning_rate": 1.7438271604938272e-06, | |
| "loss": 10.3985, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 2.4811334393523206, | |
| "grad_norm": 0.1890224814414978, | |
| "learning_rate": 1.7283950617283952e-06, | |
| "loss": 8.1752, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 2.485759722423016, | |
| "grad_norm": 0.1372375637292862, | |
| "learning_rate": 1.7129629629629632e-06, | |
| "loss": 8.903, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 2.4903860054937113, | |
| "grad_norm": 0.17448656260967255, | |
| "learning_rate": 1.697530864197531e-06, | |
| "loss": 8.5784, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.4950122885644066, | |
| "grad_norm": 0.1701819896697998, | |
| "learning_rate": 1.6820987654320989e-06, | |
| "loss": 8.7201, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 2.499638571635102, | |
| "grad_norm": 0.14620746672153473, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 9.211, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 2.5042648547057973, | |
| "grad_norm": 0.16162265837192535, | |
| "learning_rate": 1.651234567901235e-06, | |
| "loss": 9.4963, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 2.5088911377764926, | |
| "grad_norm": 0.15886524319648743, | |
| "learning_rate": 1.6358024691358027e-06, | |
| "loss": 8.8424, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 2.513517420847188, | |
| "grad_norm": 0.14556364715099335, | |
| "learning_rate": 1.6203703703703705e-06, | |
| "loss": 7.9769, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 2.5181437039178833, | |
| "grad_norm": 0.21578781306743622, | |
| "learning_rate": 1.6049382716049383e-06, | |
| "loss": 8.9909, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 2.522769986988579, | |
| "grad_norm": 0.18850085139274597, | |
| "learning_rate": 1.5895061728395064e-06, | |
| "loss": 9.8237, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 2.527396270059274, | |
| "grad_norm": 0.1415654569864273, | |
| "learning_rate": 1.5740740740740742e-06, | |
| "loss": 8.9615, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 2.53202255312997, | |
| "grad_norm": 0.17242810130119324, | |
| "learning_rate": 1.5586419753086422e-06, | |
| "loss": 8.9936, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 2.536648836200665, | |
| "grad_norm": 0.20581774413585663, | |
| "learning_rate": 1.54320987654321e-06, | |
| "loss": 9.376, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.5412751192713605, | |
| "grad_norm": 0.17876863479614258, | |
| "learning_rate": 1.527777777777778e-06, | |
| "loss": 8.9055, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 2.545901402342056, | |
| "grad_norm": 0.1470121592283249, | |
| "learning_rate": 1.5123456790123458e-06, | |
| "loss": 9.3424, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 2.550527685412751, | |
| "grad_norm": 0.23757006227970123, | |
| "learning_rate": 1.4969135802469136e-06, | |
| "loss": 9.7866, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 2.5551539684834466, | |
| "grad_norm": 0.16553503274917603, | |
| "learning_rate": 1.4814814814814815e-06, | |
| "loss": 8.6868, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 2.559780251554142, | |
| "grad_norm": 0.12771116197109222, | |
| "learning_rate": 1.4660493827160497e-06, | |
| "loss": 8.9301, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 2.5644065346248373, | |
| "grad_norm": 0.35117506980895996, | |
| "learning_rate": 1.4506172839506175e-06, | |
| "loss": 9.3037, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 2.5690328176955326, | |
| "grad_norm": 0.1245264932513237, | |
| "learning_rate": 1.4351851851851853e-06, | |
| "loss": 9.0822, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 2.5736591007662284, | |
| "grad_norm": 0.18772459030151367, | |
| "learning_rate": 1.4197530864197531e-06, | |
| "loss": 9.146, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 2.5782853838369233, | |
| "grad_norm": 0.18729011714458466, | |
| "learning_rate": 1.4043209876543211e-06, | |
| "loss": 8.183, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 2.582911666907619, | |
| "grad_norm": 0.12940698862075806, | |
| "learning_rate": 1.3888888888888892e-06, | |
| "loss": 9.4745, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.5875379499783144, | |
| "grad_norm": 0.16963091492652893, | |
| "learning_rate": 1.373456790123457e-06, | |
| "loss": 8.8193, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 2.59216423304901, | |
| "grad_norm": 0.17672613263130188, | |
| "learning_rate": 1.3580246913580248e-06, | |
| "loss": 8.2093, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 2.596790516119705, | |
| "grad_norm": 0.24918967485427856, | |
| "learning_rate": 1.3425925925925928e-06, | |
| "loss": 9.2257, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 2.6014167991904005, | |
| "grad_norm": 0.15989799797534943, | |
| "learning_rate": 1.3271604938271606e-06, | |
| "loss": 9.08, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 2.606043082261096, | |
| "grad_norm": 0.14997775852680206, | |
| "learning_rate": 1.3117283950617284e-06, | |
| "loss": 9.8745, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 2.610669365331791, | |
| "grad_norm": 0.17529721558094025, | |
| "learning_rate": 1.2962962962962962e-06, | |
| "loss": 8.8959, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 2.6152956484024865, | |
| "grad_norm": 0.16525782644748688, | |
| "learning_rate": 1.2808641975308645e-06, | |
| "loss": 8.2331, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 2.619921931473182, | |
| "grad_norm": 0.14114739000797272, | |
| "learning_rate": 1.2654320987654323e-06, | |
| "loss": 8.1592, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 2.6245482145438777, | |
| "grad_norm": 0.21027730405330658, | |
| "learning_rate": 1.25e-06, | |
| "loss": 9.1182, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 2.6291744976145726, | |
| "grad_norm": 0.13383808732032776, | |
| "learning_rate": 1.234567901234568e-06, | |
| "loss": 8.8642, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.6338007806852684, | |
| "grad_norm": 0.15454170107841492, | |
| "learning_rate": 1.219135802469136e-06, | |
| "loss": 7.1809, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 2.6384270637559637, | |
| "grad_norm": 0.1575554460287094, | |
| "learning_rate": 1.2037037037037037e-06, | |
| "loss": 8.5097, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 2.643053346826659, | |
| "grad_norm": 0.16009144484996796, | |
| "learning_rate": 1.1882716049382718e-06, | |
| "loss": 8.8008, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 2.6476796298973544, | |
| "grad_norm": 0.18514862656593323, | |
| "learning_rate": 1.1728395061728396e-06, | |
| "loss": 8.3351, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 2.6523059129680497, | |
| "grad_norm": 0.16186843812465668, | |
| "learning_rate": 1.1574074074074076e-06, | |
| "loss": 8.702, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 2.656932196038745, | |
| "grad_norm": 0.19223704934120178, | |
| "learning_rate": 1.1419753086419754e-06, | |
| "loss": 9.1931, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 2.6615584791094404, | |
| "grad_norm": 0.1866094172000885, | |
| "learning_rate": 1.1265432098765432e-06, | |
| "loss": 9.5196, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 2.666184762180136, | |
| "grad_norm": 0.2299501746892929, | |
| "learning_rate": 1.111111111111111e-06, | |
| "loss": 9.4269, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 2.670811045250831, | |
| "grad_norm": 0.19736136496067047, | |
| "learning_rate": 1.095679012345679e-06, | |
| "loss": 9.3998, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 2.675437328321527, | |
| "grad_norm": 0.16987043619155884, | |
| "learning_rate": 1.0802469135802469e-06, | |
| "loss": 8.2513, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.680063611392222, | |
| "grad_norm": 0.18940360844135284, | |
| "learning_rate": 1.0648148148148149e-06, | |
| "loss": 7.8771, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 2.6846898944629176, | |
| "grad_norm": 0.16819283366203308, | |
| "learning_rate": 1.0493827160493827e-06, | |
| "loss": 8.9367, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 2.689316177533613, | |
| "grad_norm": 0.14170341193675995, | |
| "learning_rate": 1.0339506172839507e-06, | |
| "loss": 7.1555, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 2.6939424606043083, | |
| "grad_norm": 0.16584675014019012, | |
| "learning_rate": 1.0185185185185185e-06, | |
| "loss": 9.7405, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 2.6985687436750037, | |
| "grad_norm": 0.15231122076511383, | |
| "learning_rate": 1.0030864197530866e-06, | |
| "loss": 9.8814, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 2.703195026745699, | |
| "grad_norm": 0.14071619510650635, | |
| "learning_rate": 9.876543209876544e-07, | |
| "loss": 8.0466, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 2.7078213098163944, | |
| "grad_norm": 0.21130667626857758, | |
| "learning_rate": 9.722222222222224e-07, | |
| "loss": 10.1551, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 2.7124475928870897, | |
| "grad_norm": 0.14736154675483704, | |
| "learning_rate": 9.567901234567902e-07, | |
| "loss": 7.5648, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 2.717073875957785, | |
| "grad_norm": 0.18979448080062866, | |
| "learning_rate": 9.413580246913581e-07, | |
| "loss": 9.5863, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 2.7217001590284804, | |
| "grad_norm": 0.14803054928779602, | |
| "learning_rate": 9.259259259259259e-07, | |
| "loss": 9.2012, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.726326442099176, | |
| "grad_norm": 0.24644902348518372, | |
| "learning_rate": 9.10493827160494e-07, | |
| "loss": 7.9091, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 2.730952725169871, | |
| "grad_norm": 0.18411195278167725, | |
| "learning_rate": 8.950617283950618e-07, | |
| "loss": 9.0894, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 2.735579008240567, | |
| "grad_norm": 0.21505457162857056, | |
| "learning_rate": 8.796296296296297e-07, | |
| "loss": 7.4988, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 2.7402052913112622, | |
| "grad_norm": 0.18174538016319275, | |
| "learning_rate": 8.641975308641976e-07, | |
| "loss": 8.2626, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 2.7448315743819576, | |
| "grad_norm": 0.1982118785381317, | |
| "learning_rate": 8.487654320987655e-07, | |
| "loss": 8.415, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 2.749457857452653, | |
| "grad_norm": 0.18941333889961243, | |
| "learning_rate": 8.333333333333333e-07, | |
| "loss": 8.931, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 2.7540841405233483, | |
| "grad_norm": 0.2093527615070343, | |
| "learning_rate": 8.179012345679014e-07, | |
| "loss": 9.6871, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 2.7587104235940436, | |
| "grad_norm": 0.25371572375297546, | |
| "learning_rate": 8.024691358024692e-07, | |
| "loss": 9.719, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 2.763336706664739, | |
| "grad_norm": 0.13767634332180023, | |
| "learning_rate": 7.870370370370371e-07, | |
| "loss": 7.0931, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 2.7679629897354343, | |
| "grad_norm": 0.18252308666706085, | |
| "learning_rate": 7.71604938271605e-07, | |
| "loss": 9.0813, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.7725892728061297, | |
| "grad_norm": 0.13574370741844177, | |
| "learning_rate": 7.561728395061729e-07, | |
| "loss": 8.8218, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 2.7772155558768254, | |
| "grad_norm": 0.17202846705913544, | |
| "learning_rate": 7.407407407407407e-07, | |
| "loss": 8.954, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 2.7818418389475204, | |
| "grad_norm": 0.18582294881343842, | |
| "learning_rate": 7.253086419753087e-07, | |
| "loss": 7.7078, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 2.786468122018216, | |
| "grad_norm": 0.27104949951171875, | |
| "learning_rate": 7.098765432098766e-07, | |
| "loss": 8.3435, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 2.7910944050889115, | |
| "grad_norm": 0.32489752769470215, | |
| "learning_rate": 6.944444444444446e-07, | |
| "loss": 8.5035, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 2.795720688159607, | |
| "grad_norm": 0.1454378217458725, | |
| "learning_rate": 6.790123456790124e-07, | |
| "loss": 8.4067, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 2.800346971230302, | |
| "grad_norm": 0.20940132439136505, | |
| "learning_rate": 6.635802469135803e-07, | |
| "loss": 9.4978, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 2.8049732543009975, | |
| "grad_norm": 0.17158959805965424, | |
| "learning_rate": 6.481481481481481e-07, | |
| "loss": 9.3416, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 2.809599537371693, | |
| "grad_norm": 0.17591050267219543, | |
| "learning_rate": 6.327160493827161e-07, | |
| "loss": 8.7415, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 2.8142258204423882, | |
| "grad_norm": 0.18380632996559143, | |
| "learning_rate": 6.17283950617284e-07, | |
| "loss": 8.2376, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.8188521035130836, | |
| "grad_norm": 0.19443170726299286, | |
| "learning_rate": 6.018518518518519e-07, | |
| "loss": 7.562, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 2.823478386583779, | |
| "grad_norm": 0.24004080891609192, | |
| "learning_rate": 5.864197530864198e-07, | |
| "loss": 9.346, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 2.8281046696544747, | |
| "grad_norm": 0.19434408843517303, | |
| "learning_rate": 5.709876543209877e-07, | |
| "loss": 6.8662, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 2.8327309527251696, | |
| "grad_norm": 0.1413394808769226, | |
| "learning_rate": 5.555555555555555e-07, | |
| "loss": 7.7949, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 2.8373572357958654, | |
| "grad_norm": 0.21421702206134796, | |
| "learning_rate": 5.401234567901234e-07, | |
| "loss": 8.6497, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 2.8419835188665608, | |
| "grad_norm": 0.18243171274662018, | |
| "learning_rate": 5.246913580246914e-07, | |
| "loss": 9.0258, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 2.846609801937256, | |
| "grad_norm": 0.15553410351276398, | |
| "learning_rate": 5.092592592592593e-07, | |
| "loss": 9.0613, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 2.8512360850079514, | |
| "grad_norm": 0.1925041526556015, | |
| "learning_rate": 4.938271604938272e-07, | |
| "loss": 8.8279, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 2.855862368078647, | |
| "grad_norm": 0.25465813279151917, | |
| "learning_rate": 4.783950617283951e-07, | |
| "loss": 8.5684, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 2.860488651149342, | |
| "grad_norm": 0.17943061888217926, | |
| "learning_rate": 4.6296296296296297e-07, | |
| "loss": 8.9017, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.8651149342200375, | |
| "grad_norm": 0.22506891191005707, | |
| "learning_rate": 4.475308641975309e-07, | |
| "loss": 8.7791, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 2.869741217290733, | |
| "grad_norm": 0.1924242228269577, | |
| "learning_rate": 4.320987654320988e-07, | |
| "loss": 8.4592, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 2.874367500361428, | |
| "grad_norm": 0.1336522102355957, | |
| "learning_rate": 4.1666666666666667e-07, | |
| "loss": 9.2921, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 2.878993783432124, | |
| "grad_norm": 0.16631512343883514, | |
| "learning_rate": 4.012345679012346e-07, | |
| "loss": 8.7771, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 2.883620066502819, | |
| "grad_norm": 0.13000570237636566, | |
| "learning_rate": 3.858024691358025e-07, | |
| "loss": 7.6173, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 2.8882463495735147, | |
| "grad_norm": 0.16806651651859283, | |
| "learning_rate": 3.7037037037037036e-07, | |
| "loss": 8.2835, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 2.89287263264421, | |
| "grad_norm": 0.16624124348163605, | |
| "learning_rate": 3.549382716049383e-07, | |
| "loss": 9.0295, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 2.8974989157149054, | |
| "grad_norm": 0.18300163745880127, | |
| "learning_rate": 3.395061728395062e-07, | |
| "loss": 9.3673, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 2.9021251987856007, | |
| "grad_norm": 0.1591711938381195, | |
| "learning_rate": 3.2407407407407406e-07, | |
| "loss": 9.2437, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 2.906751481856296, | |
| "grad_norm": 0.1935225874185562, | |
| "learning_rate": 3.08641975308642e-07, | |
| "loss": 8.1388, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.9113777649269914, | |
| "grad_norm": 0.18095123767852783, | |
| "learning_rate": 2.932098765432099e-07, | |
| "loss": 8.8998, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 2.9160040479976868, | |
| "grad_norm": 0.15141603350639343, | |
| "learning_rate": 2.7777777777777776e-07, | |
| "loss": 9.5553, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 2.920630331068382, | |
| "grad_norm": 0.1664353758096695, | |
| "learning_rate": 2.623456790123457e-07, | |
| "loss": 9.2671, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 2.9252566141390774, | |
| "grad_norm": 0.14757901430130005, | |
| "learning_rate": 2.469135802469136e-07, | |
| "loss": 9.3753, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 2.9298828972097732, | |
| "grad_norm": 0.12832246720790863, | |
| "learning_rate": 2.3148148148148148e-07, | |
| "loss": 7.55, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 2.934509180280468, | |
| "grad_norm": 0.14796899259090424, | |
| "learning_rate": 2.160493827160494e-07, | |
| "loss": 8.9455, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 2.939135463351164, | |
| "grad_norm": 0.16375280916690826, | |
| "learning_rate": 2.006172839506173e-07, | |
| "loss": 9.1212, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 2.9437617464218593, | |
| "grad_norm": 0.15986521542072296, | |
| "learning_rate": 1.8518518518518518e-07, | |
| "loss": 8.0966, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 2.9483880294925546, | |
| "grad_norm": 0.14000752568244934, | |
| "learning_rate": 1.697530864197531e-07, | |
| "loss": 8.1283, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 2.95301431256325, | |
| "grad_norm": 0.10660137236118317, | |
| "learning_rate": 1.54320987654321e-07, | |
| "loss": 7.7013, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.9576405956339453, | |
| "grad_norm": 0.12879547476768494, | |
| "learning_rate": 1.3888888888888888e-07, | |
| "loss": 8.2472, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 2.9622668787046407, | |
| "grad_norm": 0.24027042090892792, | |
| "learning_rate": 1.234567901234568e-07, | |
| "loss": 9.3202, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 2.966893161775336, | |
| "grad_norm": 0.18989317119121552, | |
| "learning_rate": 1.080246913580247e-07, | |
| "loss": 8.6135, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 2.9715194448460314, | |
| "grad_norm": 0.20127813518047333, | |
| "learning_rate": 9.259259259259259e-08, | |
| "loss": 8.3557, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 2.9761457279167267, | |
| "grad_norm": 0.20816563069820404, | |
| "learning_rate": 7.71604938271605e-08, | |
| "loss": 9.6997, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 2.9807720109874225, | |
| "grad_norm": 0.2171671837568283, | |
| "learning_rate": 6.17283950617284e-08, | |
| "loss": 9.6809, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 2.9853982940581174, | |
| "grad_norm": 0.18569743633270264, | |
| "learning_rate": 4.6296296296296295e-08, | |
| "loss": 8.4721, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 2.990024577128813, | |
| "grad_norm": 0.16961927711963654, | |
| "learning_rate": 3.08641975308642e-08, | |
| "loss": 9.1963, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 2.990024577128813, | |
| "step": 648, | |
| "total_flos": 2.9427853089130414e+18, | |
| "train_loss": 10.740490390930647, | |
| "train_runtime": 52216.2733, | |
| "train_samples_per_second": 1.59, | |
| "train_steps_per_second": 0.012 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 648, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.9427853089130414e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |