radna commited on
Commit
2146a64
·
verified ·
1 Parent(s): cce5155

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. checkpoint-10/adapter_config.json +4 -4
  2. checkpoint-10/global_step10/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3 -0
  3. checkpoint-10/global_step10/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3 -0
  4. checkpoint-10/global_step10/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3 -0
  5. checkpoint-10/global_step10/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3 -0
  6. checkpoint-10/global_step10/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3 -0
  7. checkpoint-10/global_step10/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3 -0
  8. checkpoint-10/global_step10/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3 -0
  9. checkpoint-10/global_step10/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3 -0
  10. checkpoint-10/rng_state_24.pth +3 -0
  11. checkpoint-10/rng_state_25.pth +3 -0
  12. checkpoint-10/rng_state_26.pth +3 -0
  13. checkpoint-10/rng_state_27.pth +3 -0
  14. checkpoint-10/rng_state_28.pth +3 -0
  15. checkpoint-10/rng_state_29.pth +3 -0
  16. checkpoint-10/rng_state_30.pth +3 -0
  17. checkpoint-10/rng_state_31.pth +3 -0
  18. checkpoint-10/trainer_state.json +12 -12
  19. checkpoint-10/training_args.bin +1 -1
  20. checkpoint-12/adapter_config.json +4 -4
  21. checkpoint-12/global_step12/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3 -0
  22. checkpoint-12/global_step12/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3 -0
  23. checkpoint-12/global_step12/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3 -0
  24. checkpoint-12/global_step12/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3 -0
  25. checkpoint-12/global_step12/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3 -0
  26. checkpoint-12/global_step12/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3 -0
  27. checkpoint-12/global_step12/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3 -0
  28. checkpoint-12/global_step12/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3 -0
  29. checkpoint-12/rng_state_24.pth +3 -0
  30. checkpoint-12/rng_state_25.pth +3 -0
  31. checkpoint-12/rng_state_26.pth +3 -0
  32. checkpoint-12/rng_state_27.pth +3 -0
  33. checkpoint-12/rng_state_28.pth +3 -0
  34. checkpoint-12/rng_state_29.pth +3 -0
  35. checkpoint-12/rng_state_30.pth +3 -0
  36. checkpoint-12/rng_state_31.pth +3 -0
  37. checkpoint-12/trainer_state.json +15 -15
  38. checkpoint-12/training_args.bin +1 -1
  39. checkpoint-14/adapter_config.json +4 -4
  40. checkpoint-14/global_step14/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3 -0
  41. checkpoint-14/global_step14/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3 -0
  42. checkpoint-14/global_step14/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3 -0
  43. checkpoint-14/global_step14/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3 -0
  44. checkpoint-14/global_step14/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3 -0
  45. checkpoint-14/global_step14/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3 -0
  46. checkpoint-14/global_step14/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3 -0
  47. checkpoint-14/global_step14/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3 -0
  48. checkpoint-14/rng_state_24.pth +3 -0
  49. checkpoint-14/rng_state_25.pth +3 -0
  50. checkpoint-14/rng_state_26.pth +3 -0
checkpoint-10/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "down_proj",
27
  "o_proj",
28
- "q_proj",
29
  "v_proj",
 
30
  "up_proj",
31
- "gate_proj",
32
- "k_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
26
  "o_proj",
27
+ "gate_proj",
28
  "v_proj",
29
+ "k_proj",
30
  "up_proj",
31
+ "down_proj",
32
+ "q_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
checkpoint-10/global_step10/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71abbcfbc1d91034bef919858008f09d982c64723e1a218893620a397a8c295d
3
+ size 51616527
checkpoint-10/global_step10/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83886d6ac740bb9d83d4b87fe96adeb4643f377e851eb098e73f297160c7d9d3
3
+ size 51616015
checkpoint-10/global_step10/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:179d5b03aaf5bcbf79ff20cc7f36df26058a6a1a4a05533e7e69a7279dfacdba
3
+ size 51616527
checkpoint-10/global_step10/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:add54f3213c8b877111f4099a1d78457f9070f13a9094dcb4e7653bb995b7a20
3
+ size 51616015
checkpoint-10/global_step10/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bd49f629473d2b2c09af335222472a1d5b60d7a227d271e7014f12d9b9aee1e
3
+ size 51616527
checkpoint-10/global_step10/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:102ac1e28d574e64fe98263d95c22438d89593c5950285c6b1e830f60736ffab
3
+ size 51616015
checkpoint-10/global_step10/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25c15c6816382a6305f8b9059be2c57a836d7696c9dd169a5d13dfed758a45b0
3
+ size 51616527
checkpoint-10/global_step10/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dd8c280e01b34990d01ca5e91b78639f9af57bc4e9e04e7fab3ffdd8f438f24
3
+ size 51616015
checkpoint-10/rng_state_24.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6de215273d87a18853a3f6d61d4ea2772d0ea0fc1cd40780b56559ffef7a5381
3
+ size 16340
checkpoint-10/rng_state_25.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e690d832f31274be09cfd78854848c40cfdcdc026fbab9205d325e4a231d4ebb
3
+ size 16340
checkpoint-10/rng_state_26.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94de4917f0fe3a8f493274bc6e589ddc8fa928aee05177a9c7aab840caf3e610
3
+ size 16404
checkpoint-10/rng_state_27.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebc562feb7cecf6e3c157442a4e8bc340e3346a6c73928ae239b0b5be6721e98
3
+ size 16404
checkpoint-10/rng_state_28.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49aa4273cffe5e146f520147f34d7430e3131b34109e5cd4adfba158a0427f23
3
+ size 16404
checkpoint-10/rng_state_29.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:689f2f1de740cd7ac2ce1d8a93e7c129f63ccf2c5e16724bf7958f096b259a5d
3
+ size 16340
checkpoint-10/rng_state_30.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8493cc7e1e56e19baaff651bb138b4536553d0fdd90351cb2b307080d4142d50
3
+ size 16468
checkpoint-10/rng_state_31.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15de90d412c8478ebd5e3119ab22306436b93749e32be50237ba8d59f51b6185
3
+ size 16340
checkpoint-10/trainer_state.json CHANGED
@@ -17,7 +17,7 @@
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
- "memory(GiB)": 180.29,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
@@ -33,7 +33,7 @@
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
- "memory(GiB)": 180.29,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  },
@@ -45,7 +45,7 @@
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
- "memory(GiB)": 180.29,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
@@ -61,7 +61,7 @@
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
- "memory(GiB)": 180.29,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
@@ -73,7 +73,7 @@
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
- "memory(GiB)": 180.29,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
@@ -87,9 +87,9 @@
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
- "memory(GiB)": 180.29,
91
  "step": 6,
92
- "train_speed(iter/s)": 0.000458
93
  },
94
  {
95
  "epoch": 1.4210526315789473,
@@ -102,7 +102,7 @@
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
- "eval_runtime": 1030.1223,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
@@ -115,7 +115,7 @@
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
- "memory(GiB)": 180.29,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
@@ -131,7 +131,7 @@
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
- "memory(GiB)": 180.29,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
@@ -143,7 +143,7 @@
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
- "memory(GiB)": 180.29,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
@@ -159,7 +159,7 @@
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
- "memory(GiB)": 180.29,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  }
 
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
+ "memory(GiB)": 186.69,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
 
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
+ "memory(GiB)": 186.69,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  },
 
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
+ "memory(GiB)": 186.69,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
 
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
+ "memory(GiB)": 186.69,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
 
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
+ "memory(GiB)": 186.69,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
 
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
+ "memory(GiB)": 186.69,
91
  "step": 6,
92
+ "train_speed(iter/s)": 0.000459
93
  },
94
  {
95
  "epoch": 1.4210526315789473,
 
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
+ "eval_runtime": 1030.1122,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
 
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
+ "memory(GiB)": 186.69,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
 
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
+ "memory(GiB)": 186.69,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
 
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
+ "memory(GiB)": 186.69,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
 
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
+ "memory(GiB)": 186.69,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  }
checkpoint-10/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412
3
  size 9809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e699c24e86e1a1f412d6f8c2d25c0baf53692e08baa670377bbc8abc7e07257
3
  size 9809
checkpoint-12/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "down_proj",
27
  "o_proj",
28
- "q_proj",
29
  "v_proj",
 
30
  "up_proj",
31
- "gate_proj",
32
- "k_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
26
  "o_proj",
27
+ "gate_proj",
28
  "v_proj",
29
+ "k_proj",
30
  "up_proj",
31
+ "down_proj",
32
+ "q_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
checkpoint-12/global_step12/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2921a649d4d836a585641dcd22939ac37dc9e0c9d0370a4e10c8e83179d1b4a2
3
+ size 51616527
checkpoint-12/global_step12/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:165bbaaf90b2562e316b6b96a1930fc31d8c519e2c106a13f35fd53eb318ad95
3
+ size 51616015
checkpoint-12/global_step12/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9326905b7b4f1404ae39d29fed6a682af03fa4051190b952f5769b819276db9e
3
+ size 51616527
checkpoint-12/global_step12/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0a202e9c0189a536f8f1ba2d8c65cd8b810b93c5fb9ff0f3d7b3d3187aeaec1
3
+ size 51616015
checkpoint-12/global_step12/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c21dda723beba3d3c3e1cf386b22d8135eae02f179b7e1203b964df70485eb70
3
+ size 51616527
checkpoint-12/global_step12/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e0fa132ab4bc4fb3a7fad138392ccce8eb68d17939ddda2401780f51a290964
3
+ size 51616015
checkpoint-12/global_step12/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88060c8357bd6875f498f9f37c20c6bb741294d03e9edd521f3d42ab81b0f759
3
+ size 51616527
checkpoint-12/global_step12/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29ab812e726cb366bb8b9b87f696c3b240c440803e24ab8a7217bbe836277700
3
+ size 51616015
checkpoint-12/rng_state_24.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4348c196f860de52942de2749ad741d5e57abaccaae216b023ead5fab6a12571
3
+ size 16340
checkpoint-12/rng_state_25.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cc2b2852efd811ec7d57f8ccc47c563842e50250fd34a97b672228e1a367e15
3
+ size 16340
checkpoint-12/rng_state_26.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52f96369c5de4d7f6e142789cc18b82f7b4992764d47865f431a6d8b81f8a53a
3
+ size 16404
checkpoint-12/rng_state_27.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a343ecdf54d9ce1440155a0fa10c1c0c3f65314996617365f0a60ed936c9032c
3
+ size 16404
checkpoint-12/rng_state_28.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4f84c89e31507050e75cba6f0110214f94a62a4ec0cc9addf80b8771909a5da
3
+ size 16404
checkpoint-12/rng_state_29.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53b5a1e6aa1c88d2d75f038abf2ebaff82c20105adaffdb30527363dc9e2e714
3
+ size 16340
checkpoint-12/rng_state_30.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1eafe1cd2a9dea56bdbf5460daeb214954a443a45c8ba110d9835802604ca8a
3
+ size 16468
checkpoint-12/rng_state_31.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a68f3fa20f9955083f4b614e60109fd310fe33b3f03e641323dff4f15af2ee13
3
+ size 16340
checkpoint-12/trainer_state.json CHANGED
@@ -17,7 +17,7 @@
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
- "memory(GiB)": 180.29,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
@@ -33,7 +33,7 @@
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
- "memory(GiB)": 180.29,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  },
@@ -45,7 +45,7 @@
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
- "memory(GiB)": 180.29,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
@@ -61,7 +61,7 @@
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
- "memory(GiB)": 180.29,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
@@ -73,7 +73,7 @@
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
- "memory(GiB)": 180.29,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
@@ -87,9 +87,9 @@
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
- "memory(GiB)": 180.29,
91
  "step": 6,
92
- "train_speed(iter/s)": 0.000458
93
  },
94
  {
95
  "epoch": 1.4210526315789473,
@@ -102,7 +102,7 @@
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
- "eval_runtime": 1030.1223,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
@@ -115,7 +115,7 @@
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
- "memory(GiB)": 180.29,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
@@ -131,7 +131,7 @@
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
- "memory(GiB)": 180.29,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
@@ -143,7 +143,7 @@
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
- "memory(GiB)": 180.29,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
@@ -159,7 +159,7 @@
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
- "memory(GiB)": 180.29,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  },
@@ -171,7 +171,7 @@
171
  "kl": 0.1275634765625,
172
  "learning_rate": 9.789947561577445e-05,
173
  "loss": -0.04600231721997261,
174
- "memory(GiB)": 180.29,
175
  "response_clip_ratio": 0.361328125,
176
  "reward": 0.023204635945148766,
177
  "reward_std": 0.10593634657561779,
@@ -185,7 +185,7 @@
185
  "grad_norm": 0.05781339108943939,
186
  "learning_rate": 9.698463103929542e-05,
187
  "loss": -0.05069056898355484,
188
- "memory(GiB)": 180.29,
189
  "step": 12,
190
  "train_speed(iter/s)": 0.000439
191
  },
@@ -200,7 +200,7 @@
200
  "eval_reward_std": 0.10685288906097412,
201
  "eval_rewards/CosineReward": 0.03234308212995529,
202
  "eval_rewards/RepetitionPenalty": 0.0,
203
- "eval_runtime": 1025.9045,
204
  "eval_samples_per_second": 0.001,
205
  "eval_steps_per_second": 0.001,
206
  "step": 12
 
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
+ "memory(GiB)": 186.69,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
 
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
+ "memory(GiB)": 186.69,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  },
 
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
+ "memory(GiB)": 186.69,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
 
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
+ "memory(GiB)": 186.69,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
 
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
+ "memory(GiB)": 186.69,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
 
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
+ "memory(GiB)": 186.69,
91
  "step": 6,
92
+ "train_speed(iter/s)": 0.000459
93
  },
94
  {
95
  "epoch": 1.4210526315789473,
 
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
+ "eval_runtime": 1030.1122,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
 
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
+ "memory(GiB)": 186.69,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
 
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
+ "memory(GiB)": 186.69,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
 
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
+ "memory(GiB)": 186.69,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
 
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
+ "memory(GiB)": 186.69,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  },
 
171
  "kl": 0.1275634765625,
172
  "learning_rate": 9.789947561577445e-05,
173
  "loss": -0.04600231721997261,
174
+ "memory(GiB)": 186.69,
175
  "response_clip_ratio": 0.361328125,
176
  "reward": 0.023204635945148766,
177
  "reward_std": 0.10593634657561779,
 
185
  "grad_norm": 0.05781339108943939,
186
  "learning_rate": 9.698463103929542e-05,
187
  "loss": -0.05069056898355484,
188
+ "memory(GiB)": 186.69,
189
  "step": 12,
190
  "train_speed(iter/s)": 0.000439
191
  },
 
200
  "eval_reward_std": 0.10685288906097412,
201
  "eval_rewards/CosineReward": 0.03234308212995529,
202
  "eval_rewards/RepetitionPenalty": 0.0,
203
+ "eval_runtime": 1025.9053,
204
  "eval_samples_per_second": 0.001,
205
  "eval_steps_per_second": 0.001,
206
  "step": 12
checkpoint-12/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412
3
  size 9809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e699c24e86e1a1f412d6f8c2d25c0baf53692e08baa670377bbc8abc7e07257
3
  size 9809
checkpoint-14/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "down_proj",
27
  "o_proj",
28
- "q_proj",
29
  "v_proj",
 
30
  "up_proj",
31
- "gate_proj",
32
- "k_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
26
  "o_proj",
27
+ "gate_proj",
28
  "v_proj",
29
+ "k_proj",
30
  "up_proj",
31
+ "down_proj",
32
+ "q_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
checkpoint-14/global_step14/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6da2f27fd4ab44feff8b85b8d6d02565abf211cd22247778427d57fb2a9945b1
3
+ size 51616527
checkpoint-14/global_step14/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdccca4eb69bb1e478f34a2942e1a8890c53f490e6a933224cc41d8c12615b1b
3
+ size 51616015
checkpoint-14/global_step14/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f3328f2c8971a0a11ef3eb7383a0050f44126ec59cb35fabbf5a939d5714f19
3
+ size 51616527
checkpoint-14/global_step14/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8222c0bf57d6697f585f1a91b18ae04f641d86d076069f34b224d736aa28cb3d
3
+ size 51616015
checkpoint-14/global_step14/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26960f3cb2f32d7ed2b2393dfc661cb4c8a45577ed75f218c59fc4ee2a313b12
3
+ size 51616527
checkpoint-14/global_step14/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d995e9ce3b2c00b9730697ef0fdcaccfd15fbd642baca7c325641aba31e1bfe
3
+ size 51616015
checkpoint-14/global_step14/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:945e02bfb5bc5a91fe224606519a25d739d8217113a5956a27d000473c9debdf
3
+ size 51616527
checkpoint-14/global_step14/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a53d07205f8ab28e533dccdcb73817c98ac455bc1388d40e3277ac277ed89761
3
+ size 51616015
checkpoint-14/rng_state_24.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ae0caca66644ba08044a94994e20548b0daa2c672adcbea3cd89870f9b6c7b8
3
+ size 16340
checkpoint-14/rng_state_25.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aca46251807a6ae3597de2fc152227ad87800f1b7618974b146a86626c826e3b
3
+ size 16340
checkpoint-14/rng_state_26.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c1012021531d2e3961abb5fc01b81fe28951ea9c4efbd436ab3505893bfa5e2
3
+ size 16404