mohammadmahdinouri commited on
Commit
20355d8
·
verified ·
1 Parent(s): 14d13bd

Training in progress, step 102000, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b98730936aa6dc10188205017677ff9e14fddcaabd946d7ee45496f79bc09381
3
  size 304481530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7f5ea86071f8a443230b23461cfbfb9011f2eb0c114ed9f153b2befd1980b09
3
  size 304481530
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dab2512418edabf09a1f198b3ec1e34ef7a4c1ef244751dc6f143ee5c83a138a
3
  size 402029570
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2436d9ce5048c7b249db87baeb4a99589f5cb4ba3d6a5e83bf03cb11fe8f0be
3
  size 402029570
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a09508e2b30c85c33c8c92e149720f8c4c88cb50fa6fa2edbc4d77909aa39abc
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f0b98a5ebb9ed78e7e8e5b9ffb2e444a0031c547ea9bedbf7d34b7fa2ad1116
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b692cd01a96ee7aba0dd93b934c38beb8d67bf930ea0c2c3cc4357d8083ee1d
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cfcb9518936230adad38b3ece3c6f950bb8431417e2c158d94db199da5ecc7f
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2a004b1fccaef20bc4d739ab300e21e9c86f45064b872c12bf23541f452b512
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ca0fc3fe7a9627836bbdc1a3373de9ea1ca12bc7235315729c74c1a4f443961
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6d823023bcdcfcd96199c4d0cbbb176cb114397dd8cc91feea8c9e58ee31394
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b7069c2c37db8f9fc224f696a3a2d7a164145b4eecb3137491caa9925d870ba
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a078e01d117ebf31a09758a1d53fa5c61bd25aa21a07401f65e09cce62479119
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:922cd08a83c902a03c338945ddd81b7601735a1921c4a20e3f521ea886a2772d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.14961278433835598,
6
  "eval_steps": 500,
7
- "global_step": 101000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -35358,6 +35358,356 @@
35358
  "learning_rate": 0.000475182098533729,
35359
  "loss": 15.8686,
35360
  "step": 101000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35361
  }
35362
  ],
35363
  "logging_steps": 20,
@@ -35377,7 +35727,7 @@
35377
  "attributes": {}
35378
  }
35379
  },
35380
- "total_flos": 7.426261166766581e+19,
35381
  "train_batch_size": 48,
35382
  "trial_name": null,
35383
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.15109409903477533,
6
  "eval_steps": 500,
7
+ "global_step": 102000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
35358
  "learning_rate": 0.000475182098533729,
35359
  "loss": 15.8686,
35360
  "step": 101000
35361
+ },
35362
+ {
35363
+ "epoch": 0.14964241063228437,
35364
+ "grad_norm": 6.21875,
35365
+ "learning_rate": 0.0004751771595987214,
35366
+ "loss": 15.8408,
35367
+ "step": 101020
35368
+ },
35369
+ {
35370
+ "epoch": 0.14967203692621275,
35371
+ "grad_norm": 6.46875,
35372
+ "learning_rate": 0.00047517222066371386,
35373
+ "loss": 15.8667,
35374
+ "step": 101040
35375
+ },
35376
+ {
35377
+ "epoch": 0.14970166322014114,
35378
+ "grad_norm": 8.25,
35379
+ "learning_rate": 0.00047516728172870625,
35380
+ "loss": 15.8582,
35381
+ "step": 101060
35382
+ },
35383
+ {
35384
+ "epoch": 0.14973128951406953,
35385
+ "grad_norm": 6.78125,
35386
+ "learning_rate": 0.00047516234279369876,
35387
+ "loss": 15.8413,
35388
+ "step": 101080
35389
+ },
35390
+ {
35391
+ "epoch": 0.1497609158079979,
35392
+ "grad_norm": 6.84375,
35393
+ "learning_rate": 0.00047515740385869115,
35394
+ "loss": 15.837,
35395
+ "step": 101100
35396
+ },
35397
+ {
35398
+ "epoch": 0.1497905421019263,
35399
+ "grad_norm": 6.8125,
35400
+ "learning_rate": 0.0004751524649236836,
35401
+ "loss": 15.8153,
35402
+ "step": 101120
35403
+ },
35404
+ {
35405
+ "epoch": 0.14982016839585469,
35406
+ "grad_norm": 5.875,
35407
+ "learning_rate": 0.000475147525988676,
35408
+ "loss": 15.8841,
35409
+ "step": 101140
35410
+ },
35411
+ {
35412
+ "epoch": 0.14984979468978307,
35413
+ "grad_norm": 6.9375,
35414
+ "learning_rate": 0.0004751425870536685,
35415
+ "loss": 15.7932,
35416
+ "step": 101160
35417
+ },
35418
+ {
35419
+ "epoch": 0.14987942098371146,
35420
+ "grad_norm": 6.65625,
35421
+ "learning_rate": 0.0004751376481186609,
35422
+ "loss": 15.8238,
35423
+ "step": 101180
35424
+ },
35425
+ {
35426
+ "epoch": 0.14990904727763985,
35427
+ "grad_norm": 6.53125,
35428
+ "learning_rate": 0.00047513270918365333,
35429
+ "loss": 15.8007,
35430
+ "step": 101200
35431
+ },
35432
+ {
35433
+ "epoch": 0.14993867357156823,
35434
+ "grad_norm": 6.6875,
35435
+ "learning_rate": 0.00047512777024864573,
35436
+ "loss": 15.8114,
35437
+ "step": 101220
35438
+ },
35439
+ {
35440
+ "epoch": 0.14996829986549662,
35441
+ "grad_norm": 6.40625,
35442
+ "learning_rate": 0.00047512283131363823,
35443
+ "loss": 15.8998,
35444
+ "step": 101240
35445
+ },
35446
+ {
35447
+ "epoch": 0.149997926159425,
35448
+ "grad_norm": 7.28125,
35449
+ "learning_rate": 0.0004751178923786306,
35450
+ "loss": 15.7957,
35451
+ "step": 101260
35452
+ },
35453
+ {
35454
+ "epoch": 0.1500275524533534,
35455
+ "grad_norm": 6.9375,
35456
+ "learning_rate": 0.000475112953443623,
35457
+ "loss": 15.792,
35458
+ "step": 101280
35459
+ },
35460
+ {
35461
+ "epoch": 0.15005717874728178,
35462
+ "grad_norm": 6.90625,
35463
+ "learning_rate": 0.00047510801450861547,
35464
+ "loss": 15.858,
35465
+ "step": 101300
35466
+ },
35467
+ {
35468
+ "epoch": 0.15008680504121016,
35469
+ "grad_norm": 6.3125,
35470
+ "learning_rate": 0.0004751030755736079,
35471
+ "loss": 15.9071,
35472
+ "step": 101320
35473
+ },
35474
+ {
35475
+ "epoch": 0.15011643133513855,
35476
+ "grad_norm": 5.875,
35477
+ "learning_rate": 0.00047509813663860036,
35478
+ "loss": 15.8434,
35479
+ "step": 101340
35480
+ },
35481
+ {
35482
+ "epoch": 0.15014605762906694,
35483
+ "grad_norm": 6.84375,
35484
+ "learning_rate": 0.00047509319770359275,
35485
+ "loss": 15.8702,
35486
+ "step": 101360
35487
+ },
35488
+ {
35489
+ "epoch": 0.15017568392299532,
35490
+ "grad_norm": 6.96875,
35491
+ "learning_rate": 0.00047508825876858526,
35492
+ "loss": 15.8149,
35493
+ "step": 101380
35494
+ },
35495
+ {
35496
+ "epoch": 0.15020531021692374,
35497
+ "grad_norm": 6.78125,
35498
+ "learning_rate": 0.00047508331983357765,
35499
+ "loss": 15.8167,
35500
+ "step": 101400
35501
+ },
35502
+ {
35503
+ "epoch": 0.15023493651085212,
35504
+ "grad_norm": 5.71875,
35505
+ "learning_rate": 0.0004750783808985701,
35506
+ "loss": 15.8355,
35507
+ "step": 101420
35508
+ },
35509
+ {
35510
+ "epoch": 0.1502645628047805,
35511
+ "grad_norm": 6.125,
35512
+ "learning_rate": 0.0004750734419635625,
35513
+ "loss": 15.8727,
35514
+ "step": 101440
35515
+ },
35516
+ {
35517
+ "epoch": 0.1502941890987089,
35518
+ "grad_norm": 6.625,
35519
+ "learning_rate": 0.000475068503028555,
35520
+ "loss": 15.8786,
35521
+ "step": 101460
35522
+ },
35523
+ {
35524
+ "epoch": 0.15032381539263728,
35525
+ "grad_norm": 7.21875,
35526
+ "learning_rate": 0.0004750635640935474,
35527
+ "loss": 15.8171,
35528
+ "step": 101480
35529
+ },
35530
+ {
35531
+ "epoch": 0.15035344168656567,
35532
+ "grad_norm": 6.84375,
35533
+ "learning_rate": 0.00047505862515853983,
35534
+ "loss": 15.7871,
35535
+ "step": 101500
35536
+ },
35537
+ {
35538
+ "epoch": 0.15038306798049406,
35539
+ "grad_norm": 6.53125,
35540
+ "learning_rate": 0.00047505368622353223,
35541
+ "loss": 15.8054,
35542
+ "step": 101520
35543
+ },
35544
+ {
35545
+ "epoch": 0.15041269427442244,
35546
+ "grad_norm": 7.375,
35547
+ "learning_rate": 0.00047504874728852473,
35548
+ "loss": 15.834,
35549
+ "step": 101540
35550
+ },
35551
+ {
35552
+ "epoch": 0.15044232056835083,
35553
+ "grad_norm": 6.65625,
35554
+ "learning_rate": 0.0004750438083535171,
35555
+ "loss": 15.906,
35556
+ "step": 101560
35557
+ },
35558
+ {
35559
+ "epoch": 0.15047194686227922,
35560
+ "grad_norm": 6.59375,
35561
+ "learning_rate": 0.00047503886941850957,
35562
+ "loss": 15.8236,
35563
+ "step": 101580
35564
+ },
35565
+ {
35566
+ "epoch": 0.1505015731562076,
35567
+ "grad_norm": 7.875,
35568
+ "learning_rate": 0.00047503393048350197,
35569
+ "loss": 15.8215,
35570
+ "step": 101600
35571
+ },
35572
+ {
35573
+ "epoch": 0.150531199450136,
35574
+ "grad_norm": 6.625,
35575
+ "learning_rate": 0.0004750289915484944,
35576
+ "loss": 15.8343,
35577
+ "step": 101620
35578
+ },
35579
+ {
35580
+ "epoch": 0.15056082574406437,
35581
+ "grad_norm": 6.875,
35582
+ "learning_rate": 0.00047502405261348686,
35583
+ "loss": 15.7763,
35584
+ "step": 101640
35585
+ },
35586
+ {
35587
+ "epoch": 0.15059045203799276,
35588
+ "grad_norm": 7.0,
35589
+ "learning_rate": 0.00047501911367847925,
35590
+ "loss": 15.8537,
35591
+ "step": 101660
35592
+ },
35593
+ {
35594
+ "epoch": 0.15062007833192115,
35595
+ "grad_norm": 6.5,
35596
+ "learning_rate": 0.00047501417474347176,
35597
+ "loss": 15.902,
35598
+ "step": 101680
35599
+ },
35600
+ {
35601
+ "epoch": 0.15064970462584953,
35602
+ "grad_norm": 6.15625,
35603
+ "learning_rate": 0.00047500923580846415,
35604
+ "loss": 15.8103,
35605
+ "step": 101700
35606
+ },
35607
+ {
35608
+ "epoch": 0.15067933091977792,
35609
+ "grad_norm": 6.40625,
35610
+ "learning_rate": 0.0004750042968734566,
35611
+ "loss": 15.8109,
35612
+ "step": 101720
35613
+ },
35614
+ {
35615
+ "epoch": 0.1507089572137063,
35616
+ "grad_norm": 7.3125,
35617
+ "learning_rate": 0.000474999357938449,
35618
+ "loss": 15.8841,
35619
+ "step": 101740
35620
+ },
35621
+ {
35622
+ "epoch": 0.1507385835076347,
35623
+ "grad_norm": 6.375,
35624
+ "learning_rate": 0.0004749944190034415,
35625
+ "loss": 15.8348,
35626
+ "step": 101760
35627
+ },
35628
+ {
35629
+ "epoch": 0.15076820980156308,
35630
+ "grad_norm": 7.09375,
35631
+ "learning_rate": 0.0004749894800684339,
35632
+ "loss": 15.851,
35633
+ "step": 101780
35634
+ },
35635
+ {
35636
+ "epoch": 0.15079783609549147,
35637
+ "grad_norm": 6.59375,
35638
+ "learning_rate": 0.00047498454113342634,
35639
+ "loss": 15.822,
35640
+ "step": 101800
35641
+ },
35642
+ {
35643
+ "epoch": 0.15082746238941985,
35644
+ "grad_norm": 6.46875,
35645
+ "learning_rate": 0.00047497960219841873,
35646
+ "loss": 15.8174,
35647
+ "step": 101820
35648
+ },
35649
+ {
35650
+ "epoch": 0.15085708868334824,
35651
+ "grad_norm": 7.0625,
35652
+ "learning_rate": 0.00047497466326341123,
35653
+ "loss": 15.8871,
35654
+ "step": 101840
35655
+ },
35656
+ {
35657
+ "epoch": 0.15088671497727663,
35658
+ "grad_norm": 6.75,
35659
+ "learning_rate": 0.0004749697243284036,
35660
+ "loss": 15.8636,
35661
+ "step": 101860
35662
+ },
35663
+ {
35664
+ "epoch": 0.150916341271205,
35665
+ "grad_norm": 6.625,
35666
+ "learning_rate": 0.00047496478539339607,
35667
+ "loss": 15.8234,
35668
+ "step": 101880
35669
+ },
35670
+ {
35671
+ "epoch": 0.1509459675651334,
35672
+ "grad_norm": 7.78125,
35673
+ "learning_rate": 0.00047495984645838847,
35674
+ "loss": 15.8701,
35675
+ "step": 101900
35676
+ },
35677
+ {
35678
+ "epoch": 0.15097559385906179,
35679
+ "grad_norm": 6.25,
35680
+ "learning_rate": 0.00047495490752338097,
35681
+ "loss": 15.826,
35682
+ "step": 101920
35683
+ },
35684
+ {
35685
+ "epoch": 0.15100522015299017,
35686
+ "grad_norm": 6.84375,
35687
+ "learning_rate": 0.00047494996858837336,
35688
+ "loss": 15.8412,
35689
+ "step": 101940
35690
+ },
35691
+ {
35692
+ "epoch": 0.15103484644691856,
35693
+ "grad_norm": 6.21875,
35694
+ "learning_rate": 0.00047494502965336576,
35695
+ "loss": 15.831,
35696
+ "step": 101960
35697
+ },
35698
+ {
35699
+ "epoch": 0.15106447274084694,
35700
+ "grad_norm": 6.8125,
35701
+ "learning_rate": 0.00047494009071835826,
35702
+ "loss": 15.8454,
35703
+ "step": 101980
35704
+ },
35705
+ {
35706
+ "epoch": 0.15109409903477533,
35707
+ "grad_norm": 6.53125,
35708
+ "learning_rate": 0.00047493515178335065,
35709
+ "loss": 15.837,
35710
+ "step": 102000
35711
  }
35712
  ],
35713
  "logging_steps": 20,
 
35727
  "attributes": {}
35728
  }
35729
  },
35730
+ "total_flos": 7.499797585582883e+19,
35731
  "train_batch_size": 48,
35732
  "trial_name": null,
35733
  "trial_params": null