radna commited on
Commit
24aafe0
·
verified ·
1 Parent(s): 68cd72a

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. checkpoint-10/adapter_config.json +4 -4
  2. checkpoint-10/trainer_state.json +12 -12
  3. checkpoint-10/training_args.bin +1 -1
  4. checkpoint-12/adapter_config.json +4 -4
  5. checkpoint-12/trainer_state.json +15 -15
  6. checkpoint-12/training_args.bin +1 -1
  7. checkpoint-14/adapter_config.json +4 -4
  8. checkpoint-14/trainer_state.json +17 -17
  9. checkpoint-14/training_args.bin +1 -1
  10. checkpoint-16/adapter_config.json +4 -4
  11. checkpoint-16/trainer_state.json +19 -19
  12. checkpoint-16/training_args.bin +1 -1
  13. checkpoint-18/adapter_config.json +4 -4
  14. checkpoint-18/trainer_state.json +22 -22
  15. checkpoint-18/training_args.bin +1 -1
  16. checkpoint-2/adapter_config.json +4 -4
  17. checkpoint-2/trainer_state.json +2 -2
  18. checkpoint-2/training_args.bin +1 -1
  19. checkpoint-20/adapter_config.json +4 -4
  20. checkpoint-20/trainer_state.json +24 -24
  21. checkpoint-20/training_args.bin +1 -1
  22. checkpoint-22/adapter_config.json +4 -4
  23. checkpoint-22/trainer_state.json +26 -26
  24. checkpoint-22/training_args.bin +1 -1
  25. checkpoint-24/adapter_config.json +4 -4
  26. checkpoint-24/trainer_state.json +29 -29
  27. checkpoint-24/training_args.bin +1 -1
  28. checkpoint-26/adapter_config.json +4 -4
  29. checkpoint-26/trainer_state.json +31 -31
  30. checkpoint-26/training_args.bin +1 -1
  31. checkpoint-28/adapter_config.json +4 -4
  32. checkpoint-28/trainer_state.json +33 -33
  33. checkpoint-28/training_args.bin +1 -1
  34. checkpoint-30/adapter_config.json +4 -4
  35. checkpoint-30/global_step30/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3 -0
  36. checkpoint-30/global_step30/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3 -0
  37. checkpoint-30/global_step30/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3 -0
  38. checkpoint-30/global_step30/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3 -0
  39. checkpoint-30/global_step30/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3 -0
  40. checkpoint-30/global_step30/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3 -0
  41. checkpoint-30/global_step30/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3 -0
  42. checkpoint-30/global_step30/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3 -0
  43. checkpoint-30/rng_state_24.pth +3 -0
  44. checkpoint-30/rng_state_25.pth +3 -0
  45. checkpoint-30/rng_state_26.pth +3 -0
  46. checkpoint-30/rng_state_27.pth +3 -0
  47. checkpoint-30/rng_state_28.pth +3 -0
  48. checkpoint-30/rng_state_29.pth +3 -0
  49. checkpoint-30/rng_state_30.pth +3 -0
  50. checkpoint-30/rng_state_31.pth +3 -0
checkpoint-10/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "down_proj",
27
  "o_proj",
28
- "q_proj",
29
  "v_proj",
 
30
  "up_proj",
31
- "gate_proj",
32
- "k_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
26
  "o_proj",
27
+ "gate_proj",
28
  "v_proj",
29
+ "k_proj",
30
  "up_proj",
31
+ "down_proj",
32
+ "q_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
checkpoint-10/trainer_state.json CHANGED
@@ -17,7 +17,7 @@
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
- "memory(GiB)": 180.29,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
@@ -33,7 +33,7 @@
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
- "memory(GiB)": 180.29,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  },
@@ -45,7 +45,7 @@
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
- "memory(GiB)": 180.29,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
@@ -61,7 +61,7 @@
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
- "memory(GiB)": 180.29,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
@@ -73,7 +73,7 @@
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
- "memory(GiB)": 180.29,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
@@ -87,9 +87,9 @@
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
- "memory(GiB)": 180.29,
91
  "step": 6,
92
- "train_speed(iter/s)": 0.000458
93
  },
94
  {
95
  "epoch": 1.4210526315789473,
@@ -102,7 +102,7 @@
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
- "eval_runtime": 1030.1223,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
@@ -115,7 +115,7 @@
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
- "memory(GiB)": 180.29,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
@@ -131,7 +131,7 @@
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
- "memory(GiB)": 180.29,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
@@ -143,7 +143,7 @@
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
- "memory(GiB)": 180.29,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
@@ -159,7 +159,7 @@
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
- "memory(GiB)": 180.29,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  }
 
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
+ "memory(GiB)": 186.69,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
 
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
+ "memory(GiB)": 186.69,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  },
 
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
+ "memory(GiB)": 186.69,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
 
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
+ "memory(GiB)": 186.69,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
 
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
+ "memory(GiB)": 186.69,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
 
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
+ "memory(GiB)": 186.69,
91
  "step": 6,
92
+ "train_speed(iter/s)": 0.000459
93
  },
94
  {
95
  "epoch": 1.4210526315789473,
 
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
+ "eval_runtime": 1030.1122,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
 
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
+ "memory(GiB)": 186.69,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
 
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
+ "memory(GiB)": 186.69,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
 
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
+ "memory(GiB)": 186.69,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
 
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
+ "memory(GiB)": 186.69,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  }
checkpoint-10/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412
3
  size 9809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e699c24e86e1a1f412d6f8c2d25c0baf53692e08baa670377bbc8abc7e07257
3
  size 9809
checkpoint-12/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "down_proj",
27
  "o_proj",
28
- "q_proj",
29
  "v_proj",
 
30
  "up_proj",
31
- "gate_proj",
32
- "k_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
26
  "o_proj",
27
+ "gate_proj",
28
  "v_proj",
29
+ "k_proj",
30
  "up_proj",
31
+ "down_proj",
32
+ "q_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
checkpoint-12/trainer_state.json CHANGED
@@ -17,7 +17,7 @@
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
- "memory(GiB)": 180.29,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
@@ -33,7 +33,7 @@
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
- "memory(GiB)": 180.29,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  },
@@ -45,7 +45,7 @@
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
- "memory(GiB)": 180.29,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
@@ -61,7 +61,7 @@
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
- "memory(GiB)": 180.29,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
@@ -73,7 +73,7 @@
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
- "memory(GiB)": 180.29,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
@@ -87,9 +87,9 @@
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
- "memory(GiB)": 180.29,
91
  "step": 6,
92
- "train_speed(iter/s)": 0.000458
93
  },
94
  {
95
  "epoch": 1.4210526315789473,
@@ -102,7 +102,7 @@
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
- "eval_runtime": 1030.1223,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
@@ -115,7 +115,7 @@
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
- "memory(GiB)": 180.29,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
@@ -131,7 +131,7 @@
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
- "memory(GiB)": 180.29,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
@@ -143,7 +143,7 @@
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
- "memory(GiB)": 180.29,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
@@ -159,7 +159,7 @@
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
- "memory(GiB)": 180.29,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  },
@@ -171,7 +171,7 @@
171
  "kl": 0.1275634765625,
172
  "learning_rate": 9.789947561577445e-05,
173
  "loss": -0.04600231721997261,
174
- "memory(GiB)": 180.29,
175
  "response_clip_ratio": 0.361328125,
176
  "reward": 0.023204635945148766,
177
  "reward_std": 0.10593634657561779,
@@ -185,7 +185,7 @@
185
  "grad_norm": 0.05781339108943939,
186
  "learning_rate": 9.698463103929542e-05,
187
  "loss": -0.05069056898355484,
188
- "memory(GiB)": 180.29,
189
  "step": 12,
190
  "train_speed(iter/s)": 0.000439
191
  },
@@ -200,7 +200,7 @@
200
  "eval_reward_std": 0.10685288906097412,
201
  "eval_rewards/CosineReward": 0.03234308212995529,
202
  "eval_rewards/RepetitionPenalty": 0.0,
203
- "eval_runtime": 1025.9045,
204
  "eval_samples_per_second": 0.001,
205
  "eval_steps_per_second": 0.001,
206
  "step": 12
 
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
+ "memory(GiB)": 186.69,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
 
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
+ "memory(GiB)": 186.69,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  },
 
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
+ "memory(GiB)": 186.69,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
 
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
+ "memory(GiB)": 186.69,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
 
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
+ "memory(GiB)": 186.69,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
 
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
+ "memory(GiB)": 186.69,
91
  "step": 6,
92
+ "train_speed(iter/s)": 0.000459
93
  },
94
  {
95
  "epoch": 1.4210526315789473,
 
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
+ "eval_runtime": 1030.1122,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
 
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
+ "memory(GiB)": 186.69,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
 
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
+ "memory(GiB)": 186.69,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
 
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
+ "memory(GiB)": 186.69,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
 
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
+ "memory(GiB)": 186.69,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  },
 
171
  "kl": 0.1275634765625,
172
  "learning_rate": 9.789947561577445e-05,
173
  "loss": -0.04600231721997261,
174
+ "memory(GiB)": 186.69,
175
  "response_clip_ratio": 0.361328125,
176
  "reward": 0.023204635945148766,
177
  "reward_std": 0.10593634657561779,
 
185
  "grad_norm": 0.05781339108943939,
186
  "learning_rate": 9.698463103929542e-05,
187
  "loss": -0.05069056898355484,
188
+ "memory(GiB)": 186.69,
189
  "step": 12,
190
  "train_speed(iter/s)": 0.000439
191
  },
 
200
  "eval_reward_std": 0.10685288906097412,
201
  "eval_rewards/CosineReward": 0.03234308212995529,
202
  "eval_rewards/RepetitionPenalty": 0.0,
203
+ "eval_runtime": 1025.9053,
204
  "eval_samples_per_second": 0.001,
205
  "eval_steps_per_second": 0.001,
206
  "step": 12
checkpoint-12/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412
3
  size 9809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e699c24e86e1a1f412d6f8c2d25c0baf53692e08baa670377bbc8abc7e07257
3
  size 9809
checkpoint-14/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "down_proj",
27
  "o_proj",
28
- "q_proj",
29
  "v_proj",
 
30
  "up_proj",
31
- "gate_proj",
32
- "k_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
26
  "o_proj",
27
+ "gate_proj",
28
  "v_proj",
29
+ "k_proj",
30
  "up_proj",
31
+ "down_proj",
32
+ "q_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
checkpoint-14/trainer_state.json CHANGED
@@ -17,7 +17,7 @@
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
- "memory(GiB)": 180.29,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
@@ -33,7 +33,7 @@
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
- "memory(GiB)": 180.29,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  },
@@ -45,7 +45,7 @@
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
- "memory(GiB)": 180.29,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
@@ -61,7 +61,7 @@
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
- "memory(GiB)": 180.29,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
@@ -73,7 +73,7 @@
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
- "memory(GiB)": 180.29,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
@@ -87,9 +87,9 @@
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
- "memory(GiB)": 180.29,
91
  "step": 6,
92
- "train_speed(iter/s)": 0.000458
93
  },
94
  {
95
  "epoch": 1.4210526315789473,
@@ -102,7 +102,7 @@
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
- "eval_runtime": 1030.1223,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
@@ -115,7 +115,7 @@
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
- "memory(GiB)": 180.29,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
@@ -131,7 +131,7 @@
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
- "memory(GiB)": 180.29,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
@@ -143,7 +143,7 @@
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
- "memory(GiB)": 180.29,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
@@ -159,7 +159,7 @@
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
- "memory(GiB)": 180.29,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  },
@@ -171,7 +171,7 @@
171
  "kl": 0.1275634765625,
172
  "learning_rate": 9.789947561577445e-05,
173
  "loss": -0.04600231721997261,
174
- "memory(GiB)": 180.29,
175
  "response_clip_ratio": 0.361328125,
176
  "reward": 0.023204635945148766,
177
  "reward_std": 0.10593634657561779,
@@ -185,7 +185,7 @@
185
  "grad_norm": 0.05781339108943939,
186
  "learning_rate": 9.698463103929542e-05,
187
  "loss": -0.05069056898355484,
188
- "memory(GiB)": 180.29,
189
  "step": 12,
190
  "train_speed(iter/s)": 0.000439
191
  },
@@ -200,7 +200,7 @@
200
  "eval_reward_std": 0.10685288906097412,
201
  "eval_rewards/CosineReward": 0.03234308212995529,
202
  "eval_rewards/RepetitionPenalty": 0.0,
203
- "eval_runtime": 1025.9045,
204
  "eval_samples_per_second": 0.001,
205
  "eval_steps_per_second": 0.001,
206
  "step": 12
@@ -213,7 +213,7 @@
213
  "kl": 0.151123046875,
214
  "learning_rate": 9.591080534401371e-05,
215
  "loss": -0.02191038429737091,
216
- "memory(GiB)": 180.29,
217
  "response_clip_ratio": 0.419921875,
218
  "reward": 0.035983758978545666,
219
  "reward_std": 0.11553369648754597,
@@ -229,7 +229,7 @@
229
  "kl": 0.169189453125,
230
  "learning_rate": 9.468163201617062e-05,
231
  "loss": -0.022672578692436218,
232
- "memory(GiB)": 180.29,
233
  "step": 14,
234
  "train_speed(iter/s)": 0.000427
235
  }
 
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
+ "memory(GiB)": 186.69,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
 
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
+ "memory(GiB)": 186.69,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  },
 
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
+ "memory(GiB)": 186.69,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
 
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
+ "memory(GiB)": 186.69,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
 
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
+ "memory(GiB)": 186.69,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
 
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
+ "memory(GiB)": 186.69,
91
  "step": 6,
92
+ "train_speed(iter/s)": 0.000459
93
  },
94
  {
95
  "epoch": 1.4210526315789473,
 
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
+ "eval_runtime": 1030.1122,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
 
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
+ "memory(GiB)": 186.69,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
 
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
+ "memory(GiB)": 186.69,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
 
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
+ "memory(GiB)": 186.69,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
 
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
+ "memory(GiB)": 186.69,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  },
 
171
  "kl": 0.1275634765625,
172
  "learning_rate": 9.789947561577445e-05,
173
  "loss": -0.04600231721997261,
174
+ "memory(GiB)": 186.69,
175
  "response_clip_ratio": 0.361328125,
176
  "reward": 0.023204635945148766,
177
  "reward_std": 0.10593634657561779,
 
185
  "grad_norm": 0.05781339108943939,
186
  "learning_rate": 9.698463103929542e-05,
187
  "loss": -0.05069056898355484,
188
+ "memory(GiB)": 186.69,
189
  "step": 12,
190
  "train_speed(iter/s)": 0.000439
191
  },
 
200
  "eval_reward_std": 0.10685288906097412,
201
  "eval_rewards/CosineReward": 0.03234308212995529,
202
  "eval_rewards/RepetitionPenalty": 0.0,
203
+ "eval_runtime": 1025.9053,
204
  "eval_samples_per_second": 0.001,
205
  "eval_steps_per_second": 0.001,
206
  "step": 12
 
213
  "kl": 0.151123046875,
214
  "learning_rate": 9.591080534401371e-05,
215
  "loss": -0.02191038429737091,
216
+ "memory(GiB)": 186.69,
217
  "response_clip_ratio": 0.419921875,
218
  "reward": 0.035983758978545666,
219
  "reward_std": 0.11553369648754597,
 
229
  "kl": 0.169189453125,
230
  "learning_rate": 9.468163201617062e-05,
231
  "loss": -0.022672578692436218,
232
+ "memory(GiB)": 186.69,
233
  "step": 14,
234
  "train_speed(iter/s)": 0.000427
235
  }
checkpoint-14/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412
3
  size 9809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e699c24e86e1a1f412d6f8c2d25c0baf53692e08baa670377bbc8abc7e07257
3
  size 9809
checkpoint-16/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "down_proj",
27
  "o_proj",
28
- "q_proj",
29
  "v_proj",
 
30
  "up_proj",
31
- "gate_proj",
32
- "k_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
26
  "o_proj",
27
+ "gate_proj",
28
  "v_proj",
29
+ "k_proj",
30
  "up_proj",
31
+ "down_proj",
32
+ "q_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
checkpoint-16/trainer_state.json CHANGED
@@ -17,7 +17,7 @@
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
- "memory(GiB)": 180.29,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
@@ -33,7 +33,7 @@
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
- "memory(GiB)": 180.29,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  },
@@ -45,7 +45,7 @@
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
- "memory(GiB)": 180.29,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
@@ -61,7 +61,7 @@
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
- "memory(GiB)": 180.29,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
@@ -73,7 +73,7 @@
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
- "memory(GiB)": 180.29,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
@@ -87,9 +87,9 @@
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
- "memory(GiB)": 180.29,
91
  "step": 6,
92
- "train_speed(iter/s)": 0.000458
93
  },
94
  {
95
  "epoch": 1.4210526315789473,
@@ -102,7 +102,7 @@
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
- "eval_runtime": 1030.1223,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
@@ -115,7 +115,7 @@
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
- "memory(GiB)": 180.29,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
@@ -131,7 +131,7 @@
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
- "memory(GiB)": 180.29,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
@@ -143,7 +143,7 @@
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
- "memory(GiB)": 180.29,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
@@ -159,7 +159,7 @@
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
- "memory(GiB)": 180.29,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  },
@@ -171,7 +171,7 @@
171
  "kl": 0.1275634765625,
172
  "learning_rate": 9.789947561577445e-05,
173
  "loss": -0.04600231721997261,
174
- "memory(GiB)": 180.29,
175
  "response_clip_ratio": 0.361328125,
176
  "reward": 0.023204635945148766,
177
  "reward_std": 0.10593634657561779,
@@ -185,7 +185,7 @@
185
  "grad_norm": 0.05781339108943939,
186
  "learning_rate": 9.698463103929542e-05,
187
  "loss": -0.05069056898355484,
188
- "memory(GiB)": 180.29,
189
  "step": 12,
190
  "train_speed(iter/s)": 0.000439
191
  },
@@ -200,7 +200,7 @@
200
  "eval_reward_std": 0.10685288906097412,
201
  "eval_rewards/CosineReward": 0.03234308212995529,
202
  "eval_rewards/RepetitionPenalty": 0.0,
203
- "eval_runtime": 1025.9045,
204
  "eval_samples_per_second": 0.001,
205
  "eval_steps_per_second": 0.001,
206
  "step": 12
@@ -213,7 +213,7 @@
213
  "kl": 0.151123046875,
214
  "learning_rate": 9.591080534401371e-05,
215
  "loss": -0.02191038429737091,
216
- "memory(GiB)": 180.29,
217
  "response_clip_ratio": 0.419921875,
218
  "reward": 0.035983758978545666,
219
  "reward_std": 0.11553369648754597,
@@ -229,7 +229,7 @@
229
  "kl": 0.169189453125,
230
  "learning_rate": 9.468163201617062e-05,
231
  "loss": -0.022672578692436218,
232
- "memory(GiB)": 180.29,
233
  "step": 14,
234
  "train_speed(iter/s)": 0.000427
235
  },
@@ -241,7 +241,7 @@
241
  "kl": 0.166748046875,
242
  "learning_rate": 9.330127018922194e-05,
243
  "loss": -0.059799157083034515,
244
- "memory(GiB)": 180.29,
245
  "response_clip_ratio": 0.4765625,
246
  "reward": 0.03584331553429365,
247
  "reward_std": 0.11829411797225475,
@@ -257,7 +257,7 @@
257
  "kl": 0.16748046875,
258
  "learning_rate": 9.177439057064683e-05,
259
  "loss": -0.06071458384394646,
260
- "memory(GiB)": 180.29,
261
  "step": 16,
262
  "train_speed(iter/s)": 0.000431
263
  }
 
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
+ "memory(GiB)": 186.69,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
 
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
+ "memory(GiB)": 186.69,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  },
 
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
+ "memory(GiB)": 186.69,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
 
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
+ "memory(GiB)": 186.69,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
 
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
+ "memory(GiB)": 186.69,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
 
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
+ "memory(GiB)": 186.69,
91
  "step": 6,
92
+ "train_speed(iter/s)": 0.000459
93
  },
94
  {
95
  "epoch": 1.4210526315789473,
 
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
+ "eval_runtime": 1030.1122,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
 
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
+ "memory(GiB)": 186.69,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
 
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
+ "memory(GiB)": 186.69,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
 
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
+ "memory(GiB)": 186.69,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
 
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
+ "memory(GiB)": 186.69,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  },
 
171
  "kl": 0.1275634765625,
172
  "learning_rate": 9.789947561577445e-05,
173
  "loss": -0.04600231721997261,
174
+ "memory(GiB)": 186.69,
175
  "response_clip_ratio": 0.361328125,
176
  "reward": 0.023204635945148766,
177
  "reward_std": 0.10593634657561779,
 
185
  "grad_norm": 0.05781339108943939,
186
  "learning_rate": 9.698463103929542e-05,
187
  "loss": -0.05069056898355484,
188
+ "memory(GiB)": 186.69,
189
  "step": 12,
190
  "train_speed(iter/s)": 0.000439
191
  },
 
200
  "eval_reward_std": 0.10685288906097412,
201
  "eval_rewards/CosineReward": 0.03234308212995529,
202
  "eval_rewards/RepetitionPenalty": 0.0,
203
+ "eval_runtime": 1025.9053,
204
  "eval_samples_per_second": 0.001,
205
  "eval_steps_per_second": 0.001,
206
  "step": 12
 
213
  "kl": 0.151123046875,
214
  "learning_rate": 9.591080534401371e-05,
215
  "loss": -0.02191038429737091,
216
+ "memory(GiB)": 186.69,
217
  "response_clip_ratio": 0.419921875,
218
  "reward": 0.035983758978545666,
219
  "reward_std": 0.11553369648754597,
 
229
  "kl": 0.169189453125,
230
  "learning_rate": 9.468163201617062e-05,
231
  "loss": -0.022672578692436218,
232
+ "memory(GiB)": 186.69,
233
  "step": 14,
234
  "train_speed(iter/s)": 0.000427
235
  },
 
241
  "kl": 0.166748046875,
242
  "learning_rate": 9.330127018922194e-05,
243
  "loss": -0.059799157083034515,
244
+ "memory(GiB)": 186.69,
245
  "response_clip_ratio": 0.4765625,
246
  "reward": 0.03584331553429365,
247
  "reward_std": 0.11829411797225475,
 
257
  "kl": 0.16748046875,
258
  "learning_rate": 9.177439057064683e-05,
259
  "loss": -0.06071458384394646,
260
+ "memory(GiB)": 186.69,
261
  "step": 16,
262
  "train_speed(iter/s)": 0.000431
263
  }
checkpoint-16/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412
3
  size 9809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e699c24e86e1a1f412d6f8c2d25c0baf53692e08baa670377bbc8abc7e07257
3
  size 9809
checkpoint-18/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "down_proj",
27
  "o_proj",
28
- "q_proj",
29
  "v_proj",
 
30
  "up_proj",
31
- "gate_proj",
32
- "k_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
26
  "o_proj",
27
+ "gate_proj",
28
  "v_proj",
29
+ "k_proj",
30
  "up_proj",
31
+ "down_proj",
32
+ "q_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
checkpoint-18/trainer_state.json CHANGED
@@ -17,7 +17,7 @@
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
- "memory(GiB)": 180.29,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
@@ -33,7 +33,7 @@
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
- "memory(GiB)": 180.29,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  },
@@ -45,7 +45,7 @@
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
- "memory(GiB)": 180.29,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
@@ -61,7 +61,7 @@
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
- "memory(GiB)": 180.29,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
@@ -73,7 +73,7 @@
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
- "memory(GiB)": 180.29,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
@@ -87,9 +87,9 @@
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
- "memory(GiB)": 180.29,
91
  "step": 6,
92
- "train_speed(iter/s)": 0.000458
93
  },
94
  {
95
  "epoch": 1.4210526315789473,
@@ -102,7 +102,7 @@
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
- "eval_runtime": 1030.1223,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
@@ -115,7 +115,7 @@
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
- "memory(GiB)": 180.29,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
@@ -131,7 +131,7 @@
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
- "memory(GiB)": 180.29,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
@@ -143,7 +143,7 @@
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
- "memory(GiB)": 180.29,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
@@ -159,7 +159,7 @@
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
- "memory(GiB)": 180.29,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  },
@@ -171,7 +171,7 @@
171
  "kl": 0.1275634765625,
172
  "learning_rate": 9.789947561577445e-05,
173
  "loss": -0.04600231721997261,
174
- "memory(GiB)": 180.29,
175
  "response_clip_ratio": 0.361328125,
176
  "reward": 0.023204635945148766,
177
  "reward_std": 0.10593634657561779,
@@ -185,7 +185,7 @@
185
  "grad_norm": 0.05781339108943939,
186
  "learning_rate": 9.698463103929542e-05,
187
  "loss": -0.05069056898355484,
188
- "memory(GiB)": 180.29,
189
  "step": 12,
190
  "train_speed(iter/s)": 0.000439
191
  },
@@ -200,7 +200,7 @@
200
  "eval_reward_std": 0.10685288906097412,
201
  "eval_rewards/CosineReward": 0.03234308212995529,
202
  "eval_rewards/RepetitionPenalty": 0.0,
203
- "eval_runtime": 1025.9045,
204
  "eval_samples_per_second": 0.001,
205
  "eval_steps_per_second": 0.001,
206
  "step": 12
@@ -213,7 +213,7 @@
213
  "kl": 0.151123046875,
214
  "learning_rate": 9.591080534401371e-05,
215
  "loss": -0.02191038429737091,
216
- "memory(GiB)": 180.29,
217
  "response_clip_ratio": 0.419921875,
218
  "reward": 0.035983758978545666,
219
  "reward_std": 0.11553369648754597,
@@ -229,7 +229,7 @@
229
  "kl": 0.169189453125,
230
  "learning_rate": 9.468163201617062e-05,
231
  "loss": -0.022672578692436218,
232
- "memory(GiB)": 180.29,
233
  "step": 14,
234
  "train_speed(iter/s)": 0.000427
235
  },
@@ -241,7 +241,7 @@
241
  "kl": 0.166748046875,
242
  "learning_rate": 9.330127018922194e-05,
243
  "loss": -0.059799157083034515,
244
- "memory(GiB)": 180.29,
245
  "response_clip_ratio": 0.4765625,
246
  "reward": 0.03584331553429365,
247
  "reward_std": 0.11829411797225475,
@@ -257,7 +257,7 @@
257
  "kl": 0.16748046875,
258
  "learning_rate": 9.177439057064683e-05,
259
  "loss": -0.06071458384394646,
260
- "memory(GiB)": 180.29,
261
  "step": 16,
262
  "train_speed(iter/s)": 0.000431
263
  },
@@ -269,7 +269,7 @@
269
  "kl": 0.1787109375,
270
  "learning_rate": 9.01061596377522e-05,
271
  "loss": -0.04504441097378731,
272
- "memory(GiB)": 180.29,
273
  "response_clip_ratio": 0.5625,
274
  "reward": 0.027318883687257767,
275
  "reward_std": 0.10441224090754986,
@@ -283,7 +283,7 @@
283
  "grad_norm": 0.005998397711664438,
284
  "learning_rate": 8.83022221559489e-05,
285
  "loss": -0.045487549155950546,
286
- "memory(GiB)": 180.29,
287
  "step": 18,
288
  "train_speed(iter/s)": 0.000432
289
  },
@@ -298,7 +298,7 @@
298
  "eval_reward_std": 0.10691346973180771,
299
  "eval_rewards/CosineReward": 0.03729327768087387,
300
  "eval_rewards/RepetitionPenalty": 0.0,
301
- "eval_runtime": 1041.2321,
302
  "eval_samples_per_second": 0.001,
303
  "eval_steps_per_second": 0.001,
304
  "step": 18
 
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
+ "memory(GiB)": 186.69,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
 
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
+ "memory(GiB)": 186.69,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  },
 
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
+ "memory(GiB)": 186.69,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
 
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
+ "memory(GiB)": 186.69,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
 
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
+ "memory(GiB)": 186.69,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
 
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
+ "memory(GiB)": 186.69,
91
  "step": 6,
92
+ "train_speed(iter/s)": 0.000459
93
  },
94
  {
95
  "epoch": 1.4210526315789473,
 
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
+ "eval_runtime": 1030.1122,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
 
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
+ "memory(GiB)": 186.69,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
 
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
+ "memory(GiB)": 186.69,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
 
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
+ "memory(GiB)": 186.69,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
 
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
+ "memory(GiB)": 186.69,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  },
 
171
  "kl": 0.1275634765625,
172
  "learning_rate": 9.789947561577445e-05,
173
  "loss": -0.04600231721997261,
174
+ "memory(GiB)": 186.69,
175
  "response_clip_ratio": 0.361328125,
176
  "reward": 0.023204635945148766,
177
  "reward_std": 0.10593634657561779,
 
185
  "grad_norm": 0.05781339108943939,
186
  "learning_rate": 9.698463103929542e-05,
187
  "loss": -0.05069056898355484,
188
+ "memory(GiB)": 186.69,
189
  "step": 12,
190
  "train_speed(iter/s)": 0.000439
191
  },
 
200
  "eval_reward_std": 0.10685288906097412,
201
  "eval_rewards/CosineReward": 0.03234308212995529,
202
  "eval_rewards/RepetitionPenalty": 0.0,
203
+ "eval_runtime": 1025.9053,
204
  "eval_samples_per_second": 0.001,
205
  "eval_steps_per_second": 0.001,
206
  "step": 12
 
213
  "kl": 0.151123046875,
214
  "learning_rate": 9.591080534401371e-05,
215
  "loss": -0.02191038429737091,
216
+ "memory(GiB)": 186.69,
217
  "response_clip_ratio": 0.419921875,
218
  "reward": 0.035983758978545666,
219
  "reward_std": 0.11553369648754597,
 
229
  "kl": 0.169189453125,
230
  "learning_rate": 9.468163201617062e-05,
231
  "loss": -0.022672578692436218,
232
+ "memory(GiB)": 186.69,
233
  "step": 14,
234
  "train_speed(iter/s)": 0.000427
235
  },
 
241
  "kl": 0.166748046875,
242
  "learning_rate": 9.330127018922194e-05,
243
  "loss": -0.059799157083034515,
244
+ "memory(GiB)": 186.69,
245
  "response_clip_ratio": 0.4765625,
246
  "reward": 0.03584331553429365,
247
  "reward_std": 0.11829411797225475,
 
257
  "kl": 0.16748046875,
258
  "learning_rate": 9.177439057064683e-05,
259
  "loss": -0.06071458384394646,
260
+ "memory(GiB)": 186.69,
261
  "step": 16,
262
  "train_speed(iter/s)": 0.000431
263
  },
 
269
  "kl": 0.1787109375,
270
  "learning_rate": 9.01061596377522e-05,
271
  "loss": -0.04504441097378731,
272
+ "memory(GiB)": 186.69,
273
  "response_clip_ratio": 0.5625,
274
  "reward": 0.027318883687257767,
275
  "reward_std": 0.10441224090754986,
 
283
  "grad_norm": 0.005998397711664438,
284
  "learning_rate": 8.83022221559489e-05,
285
  "loss": -0.045487549155950546,
286
+ "memory(GiB)": 186.69,
287
  "step": 18,
288
  "train_speed(iter/s)": 0.000432
289
  },
 
298
  "eval_reward_std": 0.10691346973180771,
299
  "eval_rewards/CosineReward": 0.03729327768087387,
300
  "eval_rewards/RepetitionPenalty": 0.0,
301
+ "eval_runtime": 1041.2277,
302
  "eval_samples_per_second": 0.001,
303
  "eval_steps_per_second": 0.001,
304
  "step": 18
checkpoint-18/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412
3
  size 9809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e699c24e86e1a1f412d6f8c2d25c0baf53692e08baa670377bbc8abc7e07257
3
  size 9809
checkpoint-2/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "down_proj",
27
  "o_proj",
28
- "q_proj",
29
  "v_proj",
 
30
  "up_proj",
31
- "gate_proj",
32
- "k_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
26
  "o_proj",
27
+ "gate_proj",
28
  "v_proj",
29
+ "k_proj",
30
  "up_proj",
31
+ "down_proj",
32
+ "q_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
checkpoint-2/trainer_state.json CHANGED
@@ -17,7 +17,7 @@
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
- "memory(GiB)": 180.29,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
@@ -33,7 +33,7 @@
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
- "memory(GiB)": 180.29,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  }
 
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
+ "memory(GiB)": 186.69,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
 
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
+ "memory(GiB)": 186.69,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  }
checkpoint-2/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412
3
  size 9809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e699c24e86e1a1f412d6f8c2d25c0baf53692e08baa670377bbc8abc7e07257
3
  size 9809
checkpoint-20/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "down_proj",
27
  "o_proj",
28
- "q_proj",
29
  "v_proj",
 
30
  "up_proj",
31
- "gate_proj",
32
- "k_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
26
  "o_proj",
27
+ "gate_proj",
28
  "v_proj",
29
+ "k_proj",
30
  "up_proj",
31
+ "down_proj",
32
+ "q_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
checkpoint-20/trainer_state.json CHANGED
@@ -17,7 +17,7 @@
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
- "memory(GiB)": 180.29,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
@@ -33,7 +33,7 @@
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
- "memory(GiB)": 180.29,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  },
@@ -45,7 +45,7 @@
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
- "memory(GiB)": 180.29,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
@@ -61,7 +61,7 @@
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
- "memory(GiB)": 180.29,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
@@ -73,7 +73,7 @@
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
- "memory(GiB)": 180.29,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
@@ -87,9 +87,9 @@
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
- "memory(GiB)": 180.29,
91
  "step": 6,
92
- "train_speed(iter/s)": 0.000458
93
  },
94
  {
95
  "epoch": 1.4210526315789473,
@@ -102,7 +102,7 @@
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
- "eval_runtime": 1030.1223,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
@@ -115,7 +115,7 @@
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
- "memory(GiB)": 180.29,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
@@ -131,7 +131,7 @@
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
- "memory(GiB)": 180.29,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
@@ -143,7 +143,7 @@
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
- "memory(GiB)": 180.29,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
@@ -159,7 +159,7 @@
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
- "memory(GiB)": 180.29,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  },
@@ -171,7 +171,7 @@
171
  "kl": 0.1275634765625,
172
  "learning_rate": 9.789947561577445e-05,
173
  "loss": -0.04600231721997261,
174
- "memory(GiB)": 180.29,
175
  "response_clip_ratio": 0.361328125,
176
  "reward": 0.023204635945148766,
177
  "reward_std": 0.10593634657561779,
@@ -185,7 +185,7 @@
185
  "grad_norm": 0.05781339108943939,
186
  "learning_rate": 9.698463103929542e-05,
187
  "loss": -0.05069056898355484,
188
- "memory(GiB)": 180.29,
189
  "step": 12,
190
  "train_speed(iter/s)": 0.000439
191
  },
@@ -200,7 +200,7 @@
200
  "eval_reward_std": 0.10685288906097412,
201
  "eval_rewards/CosineReward": 0.03234308212995529,
202
  "eval_rewards/RepetitionPenalty": 0.0,
203
- "eval_runtime": 1025.9045,
204
  "eval_samples_per_second": 0.001,
205
  "eval_steps_per_second": 0.001,
206
  "step": 12
@@ -213,7 +213,7 @@
213
  "kl": 0.151123046875,
214
  "learning_rate": 9.591080534401371e-05,
215
  "loss": -0.02191038429737091,
216
- "memory(GiB)": 180.29,
217
  "response_clip_ratio": 0.419921875,
218
  "reward": 0.035983758978545666,
219
  "reward_std": 0.11553369648754597,
@@ -229,7 +229,7 @@
229
  "kl": 0.169189453125,
230
  "learning_rate": 9.468163201617062e-05,
231
  "loss": -0.022672578692436218,
232
- "memory(GiB)": 180.29,
233
  "step": 14,
234
  "train_speed(iter/s)": 0.000427
235
  },
@@ -241,7 +241,7 @@
241
  "kl": 0.166748046875,
242
  "learning_rate": 9.330127018922194e-05,
243
  "loss": -0.059799157083034515,
244
- "memory(GiB)": 180.29,
245
  "response_clip_ratio": 0.4765625,
246
  "reward": 0.03584331553429365,
247
  "reward_std": 0.11829411797225475,
@@ -257,7 +257,7 @@
257
  "kl": 0.16748046875,
258
  "learning_rate": 9.177439057064683e-05,
259
  "loss": -0.06071458384394646,
260
- "memory(GiB)": 180.29,
261
  "step": 16,
262
  "train_speed(iter/s)": 0.000431
263
  },
@@ -269,7 +269,7 @@
269
  "kl": 0.1787109375,
270
  "learning_rate": 9.01061596377522e-05,
271
  "loss": -0.04504441097378731,
272
- "memory(GiB)": 180.29,
273
  "response_clip_ratio": 0.5625,
274
  "reward": 0.027318883687257767,
275
  "reward_std": 0.10441224090754986,
@@ -283,7 +283,7 @@
283
  "grad_norm": 0.005998397711664438,
284
  "learning_rate": 8.83022221559489e-05,
285
  "loss": -0.045487549155950546,
286
- "memory(GiB)": 180.29,
287
  "step": 18,
288
  "train_speed(iter/s)": 0.000432
289
  },
@@ -298,7 +298,7 @@
298
  "eval_reward_std": 0.10691346973180771,
299
  "eval_rewards/CosineReward": 0.03729327768087387,
300
  "eval_rewards/RepetitionPenalty": 0.0,
301
- "eval_runtime": 1041.2321,
302
  "eval_samples_per_second": 0.001,
303
  "eval_steps_per_second": 0.001,
304
  "step": 18
@@ -311,7 +311,7 @@
311
  "kl": 0.1820068359375,
312
  "learning_rate": 8.636868207865244e-05,
313
  "loss": -0.03466903418302536,
314
- "memory(GiB)": 180.29,
315
  "response_clip_ratio": 0.466796875,
316
  "reward": 0.04069916973821819,
317
  "reward_std": 0.11991005763411522,
@@ -327,7 +327,7 @@
327
  "kl": 0.19287109375,
328
  "learning_rate": 8.43120818934367e-05,
329
  "loss": -0.03502114117145538,
330
- "memory(GiB)": 180.29,
331
  "step": 20,
332
  "train_speed(iter/s)": 0.000424
333
  }
 
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
+ "memory(GiB)": 186.69,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
 
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
+ "memory(GiB)": 186.69,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  },
 
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
+ "memory(GiB)": 186.69,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
 
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
+ "memory(GiB)": 186.69,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
 
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
+ "memory(GiB)": 186.69,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
 
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
+ "memory(GiB)": 186.69,
91
  "step": 6,
92
+ "train_speed(iter/s)": 0.000459
93
  },
94
  {
95
  "epoch": 1.4210526315789473,
 
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
+ "eval_runtime": 1030.1122,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
 
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
+ "memory(GiB)": 186.69,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
 
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
+ "memory(GiB)": 186.69,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
 
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
+ "memory(GiB)": 186.69,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
 
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
+ "memory(GiB)": 186.69,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  },
 
171
  "kl": 0.1275634765625,
172
  "learning_rate": 9.789947561577445e-05,
173
  "loss": -0.04600231721997261,
174
+ "memory(GiB)": 186.69,
175
  "response_clip_ratio": 0.361328125,
176
  "reward": 0.023204635945148766,
177
  "reward_std": 0.10593634657561779,
 
185
  "grad_norm": 0.05781339108943939,
186
  "learning_rate": 9.698463103929542e-05,
187
  "loss": -0.05069056898355484,
188
+ "memory(GiB)": 186.69,
189
  "step": 12,
190
  "train_speed(iter/s)": 0.000439
191
  },
 
200
  "eval_reward_std": 0.10685288906097412,
201
  "eval_rewards/CosineReward": 0.03234308212995529,
202
  "eval_rewards/RepetitionPenalty": 0.0,
203
+ "eval_runtime": 1025.9053,
204
  "eval_samples_per_second": 0.001,
205
  "eval_steps_per_second": 0.001,
206
  "step": 12
 
213
  "kl": 0.151123046875,
214
  "learning_rate": 9.591080534401371e-05,
215
  "loss": -0.02191038429737091,
216
+ "memory(GiB)": 186.69,
217
  "response_clip_ratio": 0.419921875,
218
  "reward": 0.035983758978545666,
219
  "reward_std": 0.11553369648754597,
 
229
  "kl": 0.169189453125,
230
  "learning_rate": 9.468163201617062e-05,
231
  "loss": -0.022672578692436218,
232
+ "memory(GiB)": 186.69,
233
  "step": 14,
234
  "train_speed(iter/s)": 0.000427
235
  },
 
241
  "kl": 0.166748046875,
242
  "learning_rate": 9.330127018922194e-05,
243
  "loss": -0.059799157083034515,
244
+ "memory(GiB)": 186.69,
245
  "response_clip_ratio": 0.4765625,
246
  "reward": 0.03584331553429365,
247
  "reward_std": 0.11829411797225475,
 
257
  "kl": 0.16748046875,
258
  "learning_rate": 9.177439057064683e-05,
259
  "loss": -0.06071458384394646,
260
+ "memory(GiB)": 186.69,
261
  "step": 16,
262
  "train_speed(iter/s)": 0.000431
263
  },
 
269
  "kl": 0.1787109375,
270
  "learning_rate": 9.01061596377522e-05,
271
  "loss": -0.04504441097378731,
272
+ "memory(GiB)": 186.69,
273
  "response_clip_ratio": 0.5625,
274
  "reward": 0.027318883687257767,
275
  "reward_std": 0.10441224090754986,
 
283
  "grad_norm": 0.005998397711664438,
284
  "learning_rate": 8.83022221559489e-05,
285
  "loss": -0.045487549155950546,
286
+ "memory(GiB)": 186.69,
287
  "step": 18,
288
  "train_speed(iter/s)": 0.000432
289
  },
 
298
  "eval_reward_std": 0.10691346973180771,
299
  "eval_rewards/CosineReward": 0.03729327768087387,
300
  "eval_rewards/RepetitionPenalty": 0.0,
301
+ "eval_runtime": 1041.2277,
302
  "eval_samples_per_second": 0.001,
303
  "eval_steps_per_second": 0.001,
304
  "step": 18
 
311
  "kl": 0.1820068359375,
312
  "learning_rate": 8.636868207865244e-05,
313
  "loss": -0.03466903418302536,
314
+ "memory(GiB)": 186.69,
315
  "response_clip_ratio": 0.466796875,
316
  "reward": 0.04069916973821819,
317
  "reward_std": 0.11991005763411522,
 
327
  "kl": 0.19287109375,
328
  "learning_rate": 8.43120818934367e-05,
329
  "loss": -0.03502114117145538,
330
+ "memory(GiB)": 186.69,
331
  "step": 20,
332
  "train_speed(iter/s)": 0.000424
333
  }
checkpoint-20/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412
3
  size 9809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e699c24e86e1a1f412d6f8c2d25c0baf53692e08baa670377bbc8abc7e07257
3
  size 9809
checkpoint-22/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "down_proj",
27
  "o_proj",
28
- "q_proj",
29
  "v_proj",
 
30
  "up_proj",
31
- "gate_proj",
32
- "k_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
26
  "o_proj",
27
+ "gate_proj",
28
  "v_proj",
29
+ "k_proj",
30
  "up_proj",
31
+ "down_proj",
32
+ "q_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
checkpoint-22/trainer_state.json CHANGED
@@ -17,7 +17,7 @@
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
- "memory(GiB)": 180.29,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
@@ -33,7 +33,7 @@
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
- "memory(GiB)": 180.29,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  },
@@ -45,7 +45,7 @@
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
- "memory(GiB)": 180.29,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
@@ -61,7 +61,7 @@
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
- "memory(GiB)": 180.29,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
@@ -73,7 +73,7 @@
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
- "memory(GiB)": 180.29,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
@@ -87,9 +87,9 @@
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
- "memory(GiB)": 180.29,
91
  "step": 6,
92
- "train_speed(iter/s)": 0.000458
93
  },
94
  {
95
  "epoch": 1.4210526315789473,
@@ -102,7 +102,7 @@
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
- "eval_runtime": 1030.1223,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
@@ -115,7 +115,7 @@
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
- "memory(GiB)": 180.29,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
@@ -131,7 +131,7 @@
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
- "memory(GiB)": 180.29,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
@@ -143,7 +143,7 @@
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
- "memory(GiB)": 180.29,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
@@ -159,7 +159,7 @@
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
- "memory(GiB)": 180.29,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  },
@@ -171,7 +171,7 @@
171
  "kl": 0.1275634765625,
172
  "learning_rate": 9.789947561577445e-05,
173
  "loss": -0.04600231721997261,
174
- "memory(GiB)": 180.29,
175
  "response_clip_ratio": 0.361328125,
176
  "reward": 0.023204635945148766,
177
  "reward_std": 0.10593634657561779,
@@ -185,7 +185,7 @@
185
  "grad_norm": 0.05781339108943939,
186
  "learning_rate": 9.698463103929542e-05,
187
  "loss": -0.05069056898355484,
188
- "memory(GiB)": 180.29,
189
  "step": 12,
190
  "train_speed(iter/s)": 0.000439
191
  },
@@ -200,7 +200,7 @@
200
  "eval_reward_std": 0.10685288906097412,
201
  "eval_rewards/CosineReward": 0.03234308212995529,
202
  "eval_rewards/RepetitionPenalty": 0.0,
203
- "eval_runtime": 1025.9045,
204
  "eval_samples_per_second": 0.001,
205
  "eval_steps_per_second": 0.001,
206
  "step": 12
@@ -213,7 +213,7 @@
213
  "kl": 0.151123046875,
214
  "learning_rate": 9.591080534401371e-05,
215
  "loss": -0.02191038429737091,
216
- "memory(GiB)": 180.29,
217
  "response_clip_ratio": 0.419921875,
218
  "reward": 0.035983758978545666,
219
  "reward_std": 0.11553369648754597,
@@ -229,7 +229,7 @@
229
  "kl": 0.169189453125,
230
  "learning_rate": 9.468163201617062e-05,
231
  "loss": -0.022672578692436218,
232
- "memory(GiB)": 180.29,
233
  "step": 14,
234
  "train_speed(iter/s)": 0.000427
235
  },
@@ -241,7 +241,7 @@
241
  "kl": 0.166748046875,
242
  "learning_rate": 9.330127018922194e-05,
243
  "loss": -0.059799157083034515,
244
- "memory(GiB)": 180.29,
245
  "response_clip_ratio": 0.4765625,
246
  "reward": 0.03584331553429365,
247
  "reward_std": 0.11829411797225475,
@@ -257,7 +257,7 @@
257
  "kl": 0.16748046875,
258
  "learning_rate": 9.177439057064683e-05,
259
  "loss": -0.06071458384394646,
260
- "memory(GiB)": 180.29,
261
  "step": 16,
262
  "train_speed(iter/s)": 0.000431
263
  },
@@ -269,7 +269,7 @@
269
  "kl": 0.1787109375,
270
  "learning_rate": 9.01061596377522e-05,
271
  "loss": -0.04504441097378731,
272
- "memory(GiB)": 180.29,
273
  "response_clip_ratio": 0.5625,
274
  "reward": 0.027318883687257767,
275
  "reward_std": 0.10441224090754986,
@@ -283,7 +283,7 @@
283
  "grad_norm": 0.005998397711664438,
284
  "learning_rate": 8.83022221559489e-05,
285
  "loss": -0.045487549155950546,
286
- "memory(GiB)": 180.29,
287
  "step": 18,
288
  "train_speed(iter/s)": 0.000432
289
  },
@@ -298,7 +298,7 @@
298
  "eval_reward_std": 0.10691346973180771,
299
  "eval_rewards/CosineReward": 0.03729327768087387,
300
  "eval_rewards/RepetitionPenalty": 0.0,
301
- "eval_runtime": 1041.2321,
302
  "eval_samples_per_second": 0.001,
303
  "eval_steps_per_second": 0.001,
304
  "step": 18
@@ -311,7 +311,7 @@
311
  "kl": 0.1820068359375,
312
  "learning_rate": 8.636868207865244e-05,
313
  "loss": -0.03466903418302536,
314
- "memory(GiB)": 180.29,
315
  "response_clip_ratio": 0.466796875,
316
  "reward": 0.04069916973821819,
317
  "reward_std": 0.11991005763411522,
@@ -327,7 +327,7 @@
327
  "kl": 0.19287109375,
328
  "learning_rate": 8.43120818934367e-05,
329
  "loss": -0.03502114117145538,
330
- "memory(GiB)": 180.29,
331
  "step": 20,
332
  "train_speed(iter/s)": 0.000424
333
  },
@@ -339,7 +339,7 @@
339
  "kl": 0.17626953125,
340
  "learning_rate": 8.213938048432697e-05,
341
  "loss": -0.008662773296236992,
342
- "memory(GiB)": 180.29,
343
  "response_clip_ratio": 0.5625,
344
  "reward": 0.04996980866417289,
345
  "reward_std": 0.13849420100450516,
@@ -355,7 +355,7 @@
355
  "kl": 0.178955078125,
356
  "learning_rate": 7.985792958513931e-05,
357
  "loss": -0.008743642829358578,
358
- "memory(GiB)": 180.29,
359
  "step": 22,
360
  "train_speed(iter/s)": 0.000426
361
  }
 
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
+ "memory(GiB)": 186.69,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
 
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
+ "memory(GiB)": 186.69,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  },
 
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
+ "memory(GiB)": 186.69,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
 
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
+ "memory(GiB)": 186.69,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
 
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
+ "memory(GiB)": 186.69,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
 
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
+ "memory(GiB)": 186.69,
91
  "step": 6,
92
+ "train_speed(iter/s)": 0.000459
93
  },
94
  {
95
  "epoch": 1.4210526315789473,
 
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
+ "eval_runtime": 1030.1122,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
 
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
+ "memory(GiB)": 186.69,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
 
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
+ "memory(GiB)": 186.69,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
 
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
+ "memory(GiB)": 186.69,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
 
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
+ "memory(GiB)": 186.69,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  },
 
171
  "kl": 0.1275634765625,
172
  "learning_rate": 9.789947561577445e-05,
173
  "loss": -0.04600231721997261,
174
+ "memory(GiB)": 186.69,
175
  "response_clip_ratio": 0.361328125,
176
  "reward": 0.023204635945148766,
177
  "reward_std": 0.10593634657561779,
 
185
  "grad_norm": 0.05781339108943939,
186
  "learning_rate": 9.698463103929542e-05,
187
  "loss": -0.05069056898355484,
188
+ "memory(GiB)": 186.69,
189
  "step": 12,
190
  "train_speed(iter/s)": 0.000439
191
  },
 
200
  "eval_reward_std": 0.10685288906097412,
201
  "eval_rewards/CosineReward": 0.03234308212995529,
202
  "eval_rewards/RepetitionPenalty": 0.0,
203
+ "eval_runtime": 1025.9053,
204
  "eval_samples_per_second": 0.001,
205
  "eval_steps_per_second": 0.001,
206
  "step": 12
 
213
  "kl": 0.151123046875,
214
  "learning_rate": 9.591080534401371e-05,
215
  "loss": -0.02191038429737091,
216
+ "memory(GiB)": 186.69,
217
  "response_clip_ratio": 0.419921875,
218
  "reward": 0.035983758978545666,
219
  "reward_std": 0.11553369648754597,
 
229
  "kl": 0.169189453125,
230
  "learning_rate": 9.468163201617062e-05,
231
  "loss": -0.022672578692436218,
232
+ "memory(GiB)": 186.69,
233
  "step": 14,
234
  "train_speed(iter/s)": 0.000427
235
  },
 
241
  "kl": 0.166748046875,
242
  "learning_rate": 9.330127018922194e-05,
243
  "loss": -0.059799157083034515,
244
+ "memory(GiB)": 186.69,
245
  "response_clip_ratio": 0.4765625,
246
  "reward": 0.03584331553429365,
247
  "reward_std": 0.11829411797225475,
 
257
  "kl": 0.16748046875,
258
  "learning_rate": 9.177439057064683e-05,
259
  "loss": -0.06071458384394646,
260
+ "memory(GiB)": 186.69,
261
  "step": 16,
262
  "train_speed(iter/s)": 0.000431
263
  },
 
269
  "kl": 0.1787109375,
270
  "learning_rate": 9.01061596377522e-05,
271
  "loss": -0.04504441097378731,
272
+ "memory(GiB)": 186.69,
273
  "response_clip_ratio": 0.5625,
274
  "reward": 0.027318883687257767,
275
  "reward_std": 0.10441224090754986,
 
283
  "grad_norm": 0.005998397711664438,
284
  "learning_rate": 8.83022221559489e-05,
285
  "loss": -0.045487549155950546,
286
+ "memory(GiB)": 186.69,
287
  "step": 18,
288
  "train_speed(iter/s)": 0.000432
289
  },
 
298
  "eval_reward_std": 0.10691346973180771,
299
  "eval_rewards/CosineReward": 0.03729327768087387,
300
  "eval_rewards/RepetitionPenalty": 0.0,
301
+ "eval_runtime": 1041.2277,
302
  "eval_samples_per_second": 0.001,
303
  "eval_steps_per_second": 0.001,
304
  "step": 18
 
311
  "kl": 0.1820068359375,
312
  "learning_rate": 8.636868207865244e-05,
313
  "loss": -0.03466903418302536,
314
+ "memory(GiB)": 186.69,
315
  "response_clip_ratio": 0.466796875,
316
  "reward": 0.04069916973821819,
317
  "reward_std": 0.11991005763411522,
 
327
  "kl": 0.19287109375,
328
  "learning_rate": 8.43120818934367e-05,
329
  "loss": -0.03502114117145538,
330
+ "memory(GiB)": 186.69,
331
  "step": 20,
332
  "train_speed(iter/s)": 0.000424
333
  },
 
339
  "kl": 0.17626953125,
340
  "learning_rate": 8.213938048432697e-05,
341
  "loss": -0.008662773296236992,
342
+ "memory(GiB)": 186.69,
343
  "response_clip_ratio": 0.5625,
344
  "reward": 0.04996980866417289,
345
  "reward_std": 0.13849420100450516,
 
355
  "kl": 0.178955078125,
356
  "learning_rate": 7.985792958513931e-05,
357
  "loss": -0.008743642829358578,
358
+ "memory(GiB)": 186.69,
359
  "step": 22,
360
  "train_speed(iter/s)": 0.000426
361
  }
checkpoint-22/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412
3
  size 9809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e699c24e86e1a1f412d6f8c2d25c0baf53692e08baa670377bbc8abc7e07257
3
  size 9809
checkpoint-24/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "down_proj",
27
  "o_proj",
28
- "q_proj",
29
  "v_proj",
 
30
  "up_proj",
31
- "gate_proj",
32
- "k_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
26
  "o_proj",
27
+ "gate_proj",
28
  "v_proj",
29
+ "k_proj",
30
  "up_proj",
31
+ "down_proj",
32
+ "q_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
checkpoint-24/trainer_state.json CHANGED
@@ -17,7 +17,7 @@
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
- "memory(GiB)": 180.29,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
@@ -33,7 +33,7 @@
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
- "memory(GiB)": 180.29,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  },
@@ -45,7 +45,7 @@
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
- "memory(GiB)": 180.29,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
@@ -61,7 +61,7 @@
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
- "memory(GiB)": 180.29,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
@@ -73,7 +73,7 @@
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
- "memory(GiB)": 180.29,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
@@ -87,9 +87,9 @@
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
- "memory(GiB)": 180.29,
91
  "step": 6,
92
- "train_speed(iter/s)": 0.000458
93
  },
94
  {
95
  "epoch": 1.4210526315789473,
@@ -102,7 +102,7 @@
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
- "eval_runtime": 1030.1223,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
@@ -115,7 +115,7 @@
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
- "memory(GiB)": 180.29,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
@@ -131,7 +131,7 @@
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
- "memory(GiB)": 180.29,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
@@ -143,7 +143,7 @@
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
- "memory(GiB)": 180.29,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
@@ -159,7 +159,7 @@
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
- "memory(GiB)": 180.29,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  },
@@ -171,7 +171,7 @@
171
  "kl": 0.1275634765625,
172
  "learning_rate": 9.789947561577445e-05,
173
  "loss": -0.04600231721997261,
174
- "memory(GiB)": 180.29,
175
  "response_clip_ratio": 0.361328125,
176
  "reward": 0.023204635945148766,
177
  "reward_std": 0.10593634657561779,
@@ -185,7 +185,7 @@
185
  "grad_norm": 0.05781339108943939,
186
  "learning_rate": 9.698463103929542e-05,
187
  "loss": -0.05069056898355484,
188
- "memory(GiB)": 180.29,
189
  "step": 12,
190
  "train_speed(iter/s)": 0.000439
191
  },
@@ -200,7 +200,7 @@
200
  "eval_reward_std": 0.10685288906097412,
201
  "eval_rewards/CosineReward": 0.03234308212995529,
202
  "eval_rewards/RepetitionPenalty": 0.0,
203
- "eval_runtime": 1025.9045,
204
  "eval_samples_per_second": 0.001,
205
  "eval_steps_per_second": 0.001,
206
  "step": 12
@@ -213,7 +213,7 @@
213
  "kl": 0.151123046875,
214
  "learning_rate": 9.591080534401371e-05,
215
  "loss": -0.02191038429737091,
216
- "memory(GiB)": 180.29,
217
  "response_clip_ratio": 0.419921875,
218
  "reward": 0.035983758978545666,
219
  "reward_std": 0.11553369648754597,
@@ -229,7 +229,7 @@
229
  "kl": 0.169189453125,
230
  "learning_rate": 9.468163201617062e-05,
231
  "loss": -0.022672578692436218,
232
- "memory(GiB)": 180.29,
233
  "step": 14,
234
  "train_speed(iter/s)": 0.000427
235
  },
@@ -241,7 +241,7 @@
241
  "kl": 0.166748046875,
242
  "learning_rate": 9.330127018922194e-05,
243
  "loss": -0.059799157083034515,
244
- "memory(GiB)": 180.29,
245
  "response_clip_ratio": 0.4765625,
246
  "reward": 0.03584331553429365,
247
  "reward_std": 0.11829411797225475,
@@ -257,7 +257,7 @@
257
  "kl": 0.16748046875,
258
  "learning_rate": 9.177439057064683e-05,
259
  "loss": -0.06071458384394646,
260
- "memory(GiB)": 180.29,
261
  "step": 16,
262
  "train_speed(iter/s)": 0.000431
263
  },
@@ -269,7 +269,7 @@
269
  "kl": 0.1787109375,
270
  "learning_rate": 9.01061596377522e-05,
271
  "loss": -0.04504441097378731,
272
- "memory(GiB)": 180.29,
273
  "response_clip_ratio": 0.5625,
274
  "reward": 0.027318883687257767,
275
  "reward_std": 0.10441224090754986,
@@ -283,7 +283,7 @@
283
  "grad_norm": 0.005998397711664438,
284
  "learning_rate": 8.83022221559489e-05,
285
  "loss": -0.045487549155950546,
286
- "memory(GiB)": 180.29,
287
  "step": 18,
288
  "train_speed(iter/s)": 0.000432
289
  },
@@ -298,7 +298,7 @@
298
  "eval_reward_std": 0.10691346973180771,
299
  "eval_rewards/CosineReward": 0.03729327768087387,
300
  "eval_rewards/RepetitionPenalty": 0.0,
301
- "eval_runtime": 1041.2321,
302
  "eval_samples_per_second": 0.001,
303
  "eval_steps_per_second": 0.001,
304
  "step": 18
@@ -311,7 +311,7 @@
311
  "kl": 0.1820068359375,
312
  "learning_rate": 8.636868207865244e-05,
313
  "loss": -0.03466903418302536,
314
- "memory(GiB)": 180.29,
315
  "response_clip_ratio": 0.466796875,
316
  "reward": 0.04069916973821819,
317
  "reward_std": 0.11991005763411522,
@@ -327,7 +327,7 @@
327
  "kl": 0.19287109375,
328
  "learning_rate": 8.43120818934367e-05,
329
  "loss": -0.03502114117145538,
330
- "memory(GiB)": 180.29,
331
  "step": 20,
332
  "train_speed(iter/s)": 0.000424
333
  },
@@ -339,7 +339,7 @@
339
  "kl": 0.17626953125,
340
  "learning_rate": 8.213938048432697e-05,
341
  "loss": -0.008662773296236992,
342
- "memory(GiB)": 180.29,
343
  "response_clip_ratio": 0.5625,
344
  "reward": 0.04996980866417289,
345
  "reward_std": 0.13849420100450516,
@@ -355,7 +355,7 @@
355
  "kl": 0.178955078125,
356
  "learning_rate": 7.985792958513931e-05,
357
  "loss": -0.008743642829358578,
358
- "memory(GiB)": 180.29,
359
  "step": 22,
360
  "train_speed(iter/s)": 0.000426
361
  },
@@ -367,7 +367,7 @@
367
  "kl": 0.1796875,
368
  "learning_rate": 7.74754489035403e-05,
369
  "loss": -0.03423420712351799,
370
- "memory(GiB)": 180.29,
371
  "response_clip_ratio": 0.583984375,
372
  "reward": 0.034468831261619925,
373
  "reward_std": 0.11841745302081108,
@@ -381,7 +381,7 @@
381
  "grad_norm": 0.014131724834442139,
382
  "learning_rate": 7.500000000000001e-05,
383
  "loss": -0.03426633030176163,
384
- "memory(GiB)": 180.29,
385
  "step": 24,
386
  "train_speed(iter/s)": 0.000427
387
  },
@@ -396,7 +396,7 @@
396
  "eval_reward_std": 0.10456253588199615,
397
  "eval_rewards/CosineReward": 0.04339282959699631,
398
  "eval_rewards/RepetitionPenalty": 0.0,
399
- "eval_runtime": 1045.0616,
400
  "eval_samples_per_second": 0.001,
401
  "eval_steps_per_second": 0.001,
402
  "step": 24
 
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
+ "memory(GiB)": 186.69,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
 
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
+ "memory(GiB)": 186.69,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  },
 
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
+ "memory(GiB)": 186.69,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
 
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
+ "memory(GiB)": 186.69,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
 
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
+ "memory(GiB)": 186.69,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
 
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
+ "memory(GiB)": 186.69,
91
  "step": 6,
92
+ "train_speed(iter/s)": 0.000459
93
  },
94
  {
95
  "epoch": 1.4210526315789473,
 
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
+ "eval_runtime": 1030.1122,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
 
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
+ "memory(GiB)": 186.69,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
 
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
+ "memory(GiB)": 186.69,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
 
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
+ "memory(GiB)": 186.69,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
 
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
+ "memory(GiB)": 186.69,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  },
 
171
  "kl": 0.1275634765625,
172
  "learning_rate": 9.789947561577445e-05,
173
  "loss": -0.04600231721997261,
174
+ "memory(GiB)": 186.69,
175
  "response_clip_ratio": 0.361328125,
176
  "reward": 0.023204635945148766,
177
  "reward_std": 0.10593634657561779,
 
185
  "grad_norm": 0.05781339108943939,
186
  "learning_rate": 9.698463103929542e-05,
187
  "loss": -0.05069056898355484,
188
+ "memory(GiB)": 186.69,
189
  "step": 12,
190
  "train_speed(iter/s)": 0.000439
191
  },
 
200
  "eval_reward_std": 0.10685288906097412,
201
  "eval_rewards/CosineReward": 0.03234308212995529,
202
  "eval_rewards/RepetitionPenalty": 0.0,
203
+ "eval_runtime": 1025.9053,
204
  "eval_samples_per_second": 0.001,
205
  "eval_steps_per_second": 0.001,
206
  "step": 12
 
213
  "kl": 0.151123046875,
214
  "learning_rate": 9.591080534401371e-05,
215
  "loss": -0.02191038429737091,
216
+ "memory(GiB)": 186.69,
217
  "response_clip_ratio": 0.419921875,
218
  "reward": 0.035983758978545666,
219
  "reward_std": 0.11553369648754597,
 
229
  "kl": 0.169189453125,
230
  "learning_rate": 9.468163201617062e-05,
231
  "loss": -0.022672578692436218,
232
+ "memory(GiB)": 186.69,
233
  "step": 14,
234
  "train_speed(iter/s)": 0.000427
235
  },
 
241
  "kl": 0.166748046875,
242
  "learning_rate": 9.330127018922194e-05,
243
  "loss": -0.059799157083034515,
244
+ "memory(GiB)": 186.69,
245
  "response_clip_ratio": 0.4765625,
246
  "reward": 0.03584331553429365,
247
  "reward_std": 0.11829411797225475,
 
257
  "kl": 0.16748046875,
258
  "learning_rate": 9.177439057064683e-05,
259
  "loss": -0.06071458384394646,
260
+ "memory(GiB)": 186.69,
261
  "step": 16,
262
  "train_speed(iter/s)": 0.000431
263
  },
 
269
  "kl": 0.1787109375,
270
  "learning_rate": 9.01061596377522e-05,
271
  "loss": -0.04504441097378731,
272
+ "memory(GiB)": 186.69,
273
  "response_clip_ratio": 0.5625,
274
  "reward": 0.027318883687257767,
275
  "reward_std": 0.10441224090754986,
 
283
  "grad_norm": 0.005998397711664438,
284
  "learning_rate": 8.83022221559489e-05,
285
  "loss": -0.045487549155950546,
286
+ "memory(GiB)": 186.69,
287
  "step": 18,
288
  "train_speed(iter/s)": 0.000432
289
  },
 
298
  "eval_reward_std": 0.10691346973180771,
299
  "eval_rewards/CosineReward": 0.03729327768087387,
300
  "eval_rewards/RepetitionPenalty": 0.0,
301
+ "eval_runtime": 1041.2277,
302
  "eval_samples_per_second": 0.001,
303
  "eval_steps_per_second": 0.001,
304
  "step": 18
 
311
  "kl": 0.1820068359375,
312
  "learning_rate": 8.636868207865244e-05,
313
  "loss": -0.03466903418302536,
314
+ "memory(GiB)": 186.69,
315
  "response_clip_ratio": 0.466796875,
316
  "reward": 0.04069916973821819,
317
  "reward_std": 0.11991005763411522,
 
327
  "kl": 0.19287109375,
328
  "learning_rate": 8.43120818934367e-05,
329
  "loss": -0.03502114117145538,
330
+ "memory(GiB)": 186.69,
331
  "step": 20,
332
  "train_speed(iter/s)": 0.000424
333
  },
 
339
  "kl": 0.17626953125,
340
  "learning_rate": 8.213938048432697e-05,
341
  "loss": -0.008662773296236992,
342
+ "memory(GiB)": 186.69,
343
  "response_clip_ratio": 0.5625,
344
  "reward": 0.04996980866417289,
345
  "reward_std": 0.13849420100450516,
 
355
  "kl": 0.178955078125,
356
  "learning_rate": 7.985792958513931e-05,
357
  "loss": -0.008743642829358578,
358
+ "memory(GiB)": 186.69,
359
  "step": 22,
360
  "train_speed(iter/s)": 0.000426
361
  },
 
367
  "kl": 0.1796875,
368
  "learning_rate": 7.74754489035403e-05,
369
  "loss": -0.03423420712351799,
370
+ "memory(GiB)": 186.69,
371
  "response_clip_ratio": 0.583984375,
372
  "reward": 0.034468831261619925,
373
  "reward_std": 0.11841745302081108,
 
381
  "grad_norm": 0.014131724834442139,
382
  "learning_rate": 7.500000000000001e-05,
383
  "loss": -0.03426633030176163,
384
+ "memory(GiB)": 186.69,
385
  "step": 24,
386
  "train_speed(iter/s)": 0.000427
387
  },
 
396
  "eval_reward_std": 0.10456253588199615,
397
  "eval_rewards/CosineReward": 0.04339282959699631,
398
  "eval_rewards/RepetitionPenalty": 0.0,
399
+ "eval_runtime": 1045.0653,
400
  "eval_samples_per_second": 0.001,
401
  "eval_steps_per_second": 0.001,
402
  "step": 24
checkpoint-24/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412
3
  size 9809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e699c24e86e1a1f412d6f8c2d25c0baf53692e08baa670377bbc8abc7e07257
3
  size 9809
checkpoint-26/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "down_proj",
27
  "o_proj",
28
- "q_proj",
29
  "v_proj",
 
30
  "up_proj",
31
- "gate_proj",
32
- "k_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
26
  "o_proj",
27
+ "gate_proj",
28
  "v_proj",
29
+ "k_proj",
30
  "up_proj",
31
+ "down_proj",
32
+ "q_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
checkpoint-26/trainer_state.json CHANGED
@@ -17,7 +17,7 @@
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
- "memory(GiB)": 180.29,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
@@ -33,7 +33,7 @@
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
- "memory(GiB)": 180.29,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  },
@@ -45,7 +45,7 @@
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
- "memory(GiB)": 180.29,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
@@ -61,7 +61,7 @@
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
- "memory(GiB)": 180.29,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
@@ -73,7 +73,7 @@
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
- "memory(GiB)": 180.29,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
@@ -87,9 +87,9 @@
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
- "memory(GiB)": 180.29,
91
  "step": 6,
92
- "train_speed(iter/s)": 0.000458
93
  },
94
  {
95
  "epoch": 1.4210526315789473,
@@ -102,7 +102,7 @@
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
- "eval_runtime": 1030.1223,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
@@ -115,7 +115,7 @@
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
- "memory(GiB)": 180.29,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
@@ -131,7 +131,7 @@
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
- "memory(GiB)": 180.29,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
@@ -143,7 +143,7 @@
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
- "memory(GiB)": 180.29,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
@@ -159,7 +159,7 @@
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
- "memory(GiB)": 180.29,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  },
@@ -171,7 +171,7 @@
171
  "kl": 0.1275634765625,
172
  "learning_rate": 9.789947561577445e-05,
173
  "loss": -0.04600231721997261,
174
- "memory(GiB)": 180.29,
175
  "response_clip_ratio": 0.361328125,
176
  "reward": 0.023204635945148766,
177
  "reward_std": 0.10593634657561779,
@@ -185,7 +185,7 @@
185
  "grad_norm": 0.05781339108943939,
186
  "learning_rate": 9.698463103929542e-05,
187
  "loss": -0.05069056898355484,
188
- "memory(GiB)": 180.29,
189
  "step": 12,
190
  "train_speed(iter/s)": 0.000439
191
  },
@@ -200,7 +200,7 @@
200
  "eval_reward_std": 0.10685288906097412,
201
  "eval_rewards/CosineReward": 0.03234308212995529,
202
  "eval_rewards/RepetitionPenalty": 0.0,
203
- "eval_runtime": 1025.9045,
204
  "eval_samples_per_second": 0.001,
205
  "eval_steps_per_second": 0.001,
206
  "step": 12
@@ -213,7 +213,7 @@
213
  "kl": 0.151123046875,
214
  "learning_rate": 9.591080534401371e-05,
215
  "loss": -0.02191038429737091,
216
- "memory(GiB)": 180.29,
217
  "response_clip_ratio": 0.419921875,
218
  "reward": 0.035983758978545666,
219
  "reward_std": 0.11553369648754597,
@@ -229,7 +229,7 @@
229
  "kl": 0.169189453125,
230
  "learning_rate": 9.468163201617062e-05,
231
  "loss": -0.022672578692436218,
232
- "memory(GiB)": 180.29,
233
  "step": 14,
234
  "train_speed(iter/s)": 0.000427
235
  },
@@ -241,7 +241,7 @@
241
  "kl": 0.166748046875,
242
  "learning_rate": 9.330127018922194e-05,
243
  "loss": -0.059799157083034515,
244
- "memory(GiB)": 180.29,
245
  "response_clip_ratio": 0.4765625,
246
  "reward": 0.03584331553429365,
247
  "reward_std": 0.11829411797225475,
@@ -257,7 +257,7 @@
257
  "kl": 0.16748046875,
258
  "learning_rate": 9.177439057064683e-05,
259
  "loss": -0.06071458384394646,
260
- "memory(GiB)": 180.29,
261
  "step": 16,
262
  "train_speed(iter/s)": 0.000431
263
  },
@@ -269,7 +269,7 @@
269
  "kl": 0.1787109375,
270
  "learning_rate": 9.01061596377522e-05,
271
  "loss": -0.04504441097378731,
272
- "memory(GiB)": 180.29,
273
  "response_clip_ratio": 0.5625,
274
  "reward": 0.027318883687257767,
275
  "reward_std": 0.10441224090754986,
@@ -283,7 +283,7 @@
283
  "grad_norm": 0.005998397711664438,
284
  "learning_rate": 8.83022221559489e-05,
285
  "loss": -0.045487549155950546,
286
- "memory(GiB)": 180.29,
287
  "step": 18,
288
  "train_speed(iter/s)": 0.000432
289
  },
@@ -298,7 +298,7 @@
298
  "eval_reward_std": 0.10691346973180771,
299
  "eval_rewards/CosineReward": 0.03729327768087387,
300
  "eval_rewards/RepetitionPenalty": 0.0,
301
- "eval_runtime": 1041.2321,
302
  "eval_samples_per_second": 0.001,
303
  "eval_steps_per_second": 0.001,
304
  "step": 18
@@ -311,7 +311,7 @@
311
  "kl": 0.1820068359375,
312
  "learning_rate": 8.636868207865244e-05,
313
  "loss": -0.03466903418302536,
314
- "memory(GiB)": 180.29,
315
  "response_clip_ratio": 0.466796875,
316
  "reward": 0.04069916973821819,
317
  "reward_std": 0.11991005763411522,
@@ -327,7 +327,7 @@
327
  "kl": 0.19287109375,
328
  "learning_rate": 8.43120818934367e-05,
329
  "loss": -0.03502114117145538,
330
- "memory(GiB)": 180.29,
331
  "step": 20,
332
  "train_speed(iter/s)": 0.000424
333
  },
@@ -339,7 +339,7 @@
339
  "kl": 0.17626953125,
340
  "learning_rate": 8.213938048432697e-05,
341
  "loss": -0.008662773296236992,
342
- "memory(GiB)": 180.29,
343
  "response_clip_ratio": 0.5625,
344
  "reward": 0.04996980866417289,
345
  "reward_std": 0.13849420100450516,
@@ -355,7 +355,7 @@
355
  "kl": 0.178955078125,
356
  "learning_rate": 7.985792958513931e-05,
357
  "loss": -0.008743642829358578,
358
- "memory(GiB)": 180.29,
359
  "step": 22,
360
  "train_speed(iter/s)": 0.000426
361
  },
@@ -367,7 +367,7 @@
367
  "kl": 0.1796875,
368
  "learning_rate": 7.74754489035403e-05,
369
  "loss": -0.03423420712351799,
370
- "memory(GiB)": 180.29,
371
  "response_clip_ratio": 0.583984375,
372
  "reward": 0.034468831261619925,
373
  "reward_std": 0.11841745302081108,
@@ -381,7 +381,7 @@
381
  "grad_norm": 0.014131724834442139,
382
  "learning_rate": 7.500000000000001e-05,
383
  "loss": -0.03426633030176163,
384
- "memory(GiB)": 180.29,
385
  "step": 24,
386
  "train_speed(iter/s)": 0.000427
387
  },
@@ -396,7 +396,7 @@
396
  "eval_reward_std": 0.10456253588199615,
397
  "eval_rewards/CosineReward": 0.04339282959699631,
398
  "eval_rewards/RepetitionPenalty": 0.0,
399
- "eval_runtime": 1045.0616,
400
  "eval_samples_per_second": 0.001,
401
  "eval_steps_per_second": 0.001,
402
  "step": 24
@@ -409,7 +409,7 @@
409
  "kl": 0.1800537109375,
410
  "learning_rate": 7.243995901002312e-05,
411
  "loss": -0.02097315341234207,
412
- "memory(GiB)": 180.29,
413
  "response_clip_ratio": 0.6171875,
414
  "reward": 0.03010205877944827,
415
  "reward_std": 0.10742511600255966,
@@ -425,7 +425,7 @@
425
  "kl": 0.18408203125,
426
  "learning_rate": 6.980398830195785e-05,
427
  "loss": -0.02103913575410843,
428
- "memory(GiB)": 180.29,
429
  "step": 26,
430
  "train_speed(iter/s)": 0.000421
431
  }
 
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
+ "memory(GiB)": 186.69,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
 
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
+ "memory(GiB)": 186.69,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  },
 
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
+ "memory(GiB)": 186.69,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
 
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
+ "memory(GiB)": 186.69,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
 
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
+ "memory(GiB)": 186.69,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
 
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
+ "memory(GiB)": 186.69,
91
  "step": 6,
92
+ "train_speed(iter/s)": 0.000459
93
  },
94
  {
95
  "epoch": 1.4210526315789473,
 
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
+ "eval_runtime": 1030.1122,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
 
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
+ "memory(GiB)": 186.69,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
 
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
+ "memory(GiB)": 186.69,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
 
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
+ "memory(GiB)": 186.69,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
 
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
+ "memory(GiB)": 186.69,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  },
 
171
  "kl": 0.1275634765625,
172
  "learning_rate": 9.789947561577445e-05,
173
  "loss": -0.04600231721997261,
174
+ "memory(GiB)": 186.69,
175
  "response_clip_ratio": 0.361328125,
176
  "reward": 0.023204635945148766,
177
  "reward_std": 0.10593634657561779,
 
185
  "grad_norm": 0.05781339108943939,
186
  "learning_rate": 9.698463103929542e-05,
187
  "loss": -0.05069056898355484,
188
+ "memory(GiB)": 186.69,
189
  "step": 12,
190
  "train_speed(iter/s)": 0.000439
191
  },
 
200
  "eval_reward_std": 0.10685288906097412,
201
  "eval_rewards/CosineReward": 0.03234308212995529,
202
  "eval_rewards/RepetitionPenalty": 0.0,
203
+ "eval_runtime": 1025.9053,
204
  "eval_samples_per_second": 0.001,
205
  "eval_steps_per_second": 0.001,
206
  "step": 12
 
213
  "kl": 0.151123046875,
214
  "learning_rate": 9.591080534401371e-05,
215
  "loss": -0.02191038429737091,
216
+ "memory(GiB)": 186.69,
217
  "response_clip_ratio": 0.419921875,
218
  "reward": 0.035983758978545666,
219
  "reward_std": 0.11553369648754597,
 
229
  "kl": 0.169189453125,
230
  "learning_rate": 9.468163201617062e-05,
231
  "loss": -0.022672578692436218,
232
+ "memory(GiB)": 186.69,
233
  "step": 14,
234
  "train_speed(iter/s)": 0.000427
235
  },
 
241
  "kl": 0.166748046875,
242
  "learning_rate": 9.330127018922194e-05,
243
  "loss": -0.059799157083034515,
244
+ "memory(GiB)": 186.69,
245
  "response_clip_ratio": 0.4765625,
246
  "reward": 0.03584331553429365,
247
  "reward_std": 0.11829411797225475,
 
257
  "kl": 0.16748046875,
258
  "learning_rate": 9.177439057064683e-05,
259
  "loss": -0.06071458384394646,
260
+ "memory(GiB)": 186.69,
261
  "step": 16,
262
  "train_speed(iter/s)": 0.000431
263
  },
 
269
  "kl": 0.1787109375,
270
  "learning_rate": 9.01061596377522e-05,
271
  "loss": -0.04504441097378731,
272
+ "memory(GiB)": 186.69,
273
  "response_clip_ratio": 0.5625,
274
  "reward": 0.027318883687257767,
275
  "reward_std": 0.10441224090754986,
 
283
  "grad_norm": 0.005998397711664438,
284
  "learning_rate": 8.83022221559489e-05,
285
  "loss": -0.045487549155950546,
286
+ "memory(GiB)": 186.69,
287
  "step": 18,
288
  "train_speed(iter/s)": 0.000432
289
  },
 
298
  "eval_reward_std": 0.10691346973180771,
299
  "eval_rewards/CosineReward": 0.03729327768087387,
300
  "eval_rewards/RepetitionPenalty": 0.0,
301
+ "eval_runtime": 1041.2277,
302
  "eval_samples_per_second": 0.001,
303
  "eval_steps_per_second": 0.001,
304
  "step": 18
 
311
  "kl": 0.1820068359375,
312
  "learning_rate": 8.636868207865244e-05,
313
  "loss": -0.03466903418302536,
314
+ "memory(GiB)": 186.69,
315
  "response_clip_ratio": 0.466796875,
316
  "reward": 0.04069916973821819,
317
  "reward_std": 0.11991005763411522,
 
327
  "kl": 0.19287109375,
328
  "learning_rate": 8.43120818934367e-05,
329
  "loss": -0.03502114117145538,
330
+ "memory(GiB)": 186.69,
331
  "step": 20,
332
  "train_speed(iter/s)": 0.000424
333
  },
 
339
  "kl": 0.17626953125,
340
  "learning_rate": 8.213938048432697e-05,
341
  "loss": -0.008662773296236992,
342
+ "memory(GiB)": 186.69,
343
  "response_clip_ratio": 0.5625,
344
  "reward": 0.04996980866417289,
345
  "reward_std": 0.13849420100450516,
 
355
  "kl": 0.178955078125,
356
  "learning_rate": 7.985792958513931e-05,
357
  "loss": -0.008743642829358578,
358
+ "memory(GiB)": 186.69,
359
  "step": 22,
360
  "train_speed(iter/s)": 0.000426
361
  },
 
367
  "kl": 0.1796875,
368
  "learning_rate": 7.74754489035403e-05,
369
  "loss": -0.03423420712351799,
370
+ "memory(GiB)": 186.69,
371
  "response_clip_ratio": 0.583984375,
372
  "reward": 0.034468831261619925,
373
  "reward_std": 0.11841745302081108,
 
381
  "grad_norm": 0.014131724834442139,
382
  "learning_rate": 7.500000000000001e-05,
383
  "loss": -0.03426633030176163,
384
+ "memory(GiB)": 186.69,
385
  "step": 24,
386
  "train_speed(iter/s)": 0.000427
387
  },
 
396
  "eval_reward_std": 0.10456253588199615,
397
  "eval_rewards/CosineReward": 0.04339282959699631,
398
  "eval_rewards/RepetitionPenalty": 0.0,
399
+ "eval_runtime": 1045.0653,
400
  "eval_samples_per_second": 0.001,
401
  "eval_steps_per_second": 0.001,
402
  "step": 24
 
409
  "kl": 0.1800537109375,
410
  "learning_rate": 7.243995901002312e-05,
411
  "loss": -0.02097315341234207,
412
+ "memory(GiB)": 186.69,
413
  "response_clip_ratio": 0.6171875,
414
  "reward": 0.03010205877944827,
415
  "reward_std": 0.10742511600255966,
 
425
  "kl": 0.18408203125,
426
  "learning_rate": 6.980398830195785e-05,
427
  "loss": -0.02103913575410843,
428
+ "memory(GiB)": 186.69,
429
  "step": 26,
430
  "train_speed(iter/s)": 0.000421
431
  }
checkpoint-26/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412
3
  size 9809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e699c24e86e1a1f412d6f8c2d25c0baf53692e08baa670377bbc8abc7e07257
3
  size 9809
checkpoint-28/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "down_proj",
27
  "o_proj",
28
- "q_proj",
29
  "v_proj",
 
30
  "up_proj",
31
- "gate_proj",
32
- "k_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
26
  "o_proj",
27
+ "gate_proj",
28
  "v_proj",
29
+ "k_proj",
30
  "up_proj",
31
+ "down_proj",
32
+ "q_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
checkpoint-28/trainer_state.json CHANGED
@@ -17,7 +17,7 @@
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
- "memory(GiB)": 180.29,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
@@ -33,7 +33,7 @@
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
- "memory(GiB)": 180.29,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  },
@@ -45,7 +45,7 @@
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
- "memory(GiB)": 180.29,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
@@ -61,7 +61,7 @@
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
- "memory(GiB)": 180.29,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
@@ -73,7 +73,7 @@
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
- "memory(GiB)": 180.29,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
@@ -87,9 +87,9 @@
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
- "memory(GiB)": 180.29,
91
  "step": 6,
92
- "train_speed(iter/s)": 0.000458
93
  },
94
  {
95
  "epoch": 1.4210526315789473,
@@ -102,7 +102,7 @@
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
- "eval_runtime": 1030.1223,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
@@ -115,7 +115,7 @@
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
- "memory(GiB)": 180.29,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
@@ -131,7 +131,7 @@
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
- "memory(GiB)": 180.29,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
@@ -143,7 +143,7 @@
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
- "memory(GiB)": 180.29,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
@@ -159,7 +159,7 @@
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
- "memory(GiB)": 180.29,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  },
@@ -171,7 +171,7 @@
171
  "kl": 0.1275634765625,
172
  "learning_rate": 9.789947561577445e-05,
173
  "loss": -0.04600231721997261,
174
- "memory(GiB)": 180.29,
175
  "response_clip_ratio": 0.361328125,
176
  "reward": 0.023204635945148766,
177
  "reward_std": 0.10593634657561779,
@@ -185,7 +185,7 @@
185
  "grad_norm": 0.05781339108943939,
186
  "learning_rate": 9.698463103929542e-05,
187
  "loss": -0.05069056898355484,
188
- "memory(GiB)": 180.29,
189
  "step": 12,
190
  "train_speed(iter/s)": 0.000439
191
  },
@@ -200,7 +200,7 @@
200
  "eval_reward_std": 0.10685288906097412,
201
  "eval_rewards/CosineReward": 0.03234308212995529,
202
  "eval_rewards/RepetitionPenalty": 0.0,
203
- "eval_runtime": 1025.9045,
204
  "eval_samples_per_second": 0.001,
205
  "eval_steps_per_second": 0.001,
206
  "step": 12
@@ -213,7 +213,7 @@
213
  "kl": 0.151123046875,
214
  "learning_rate": 9.591080534401371e-05,
215
  "loss": -0.02191038429737091,
216
- "memory(GiB)": 180.29,
217
  "response_clip_ratio": 0.419921875,
218
  "reward": 0.035983758978545666,
219
  "reward_std": 0.11553369648754597,
@@ -229,7 +229,7 @@
229
  "kl": 0.169189453125,
230
  "learning_rate": 9.468163201617062e-05,
231
  "loss": -0.022672578692436218,
232
- "memory(GiB)": 180.29,
233
  "step": 14,
234
  "train_speed(iter/s)": 0.000427
235
  },
@@ -241,7 +241,7 @@
241
  "kl": 0.166748046875,
242
  "learning_rate": 9.330127018922194e-05,
243
  "loss": -0.059799157083034515,
244
- "memory(GiB)": 180.29,
245
  "response_clip_ratio": 0.4765625,
246
  "reward": 0.03584331553429365,
247
  "reward_std": 0.11829411797225475,
@@ -257,7 +257,7 @@
257
  "kl": 0.16748046875,
258
  "learning_rate": 9.177439057064683e-05,
259
  "loss": -0.06071458384394646,
260
- "memory(GiB)": 180.29,
261
  "step": 16,
262
  "train_speed(iter/s)": 0.000431
263
  },
@@ -269,7 +269,7 @@
269
  "kl": 0.1787109375,
270
  "learning_rate": 9.01061596377522e-05,
271
  "loss": -0.04504441097378731,
272
- "memory(GiB)": 180.29,
273
  "response_clip_ratio": 0.5625,
274
  "reward": 0.027318883687257767,
275
  "reward_std": 0.10441224090754986,
@@ -283,7 +283,7 @@
283
  "grad_norm": 0.005998397711664438,
284
  "learning_rate": 8.83022221559489e-05,
285
  "loss": -0.045487549155950546,
286
- "memory(GiB)": 180.29,
287
  "step": 18,
288
  "train_speed(iter/s)": 0.000432
289
  },
@@ -298,7 +298,7 @@
298
  "eval_reward_std": 0.10691346973180771,
299
  "eval_rewards/CosineReward": 0.03729327768087387,
300
  "eval_rewards/RepetitionPenalty": 0.0,
301
- "eval_runtime": 1041.2321,
302
  "eval_samples_per_second": 0.001,
303
  "eval_steps_per_second": 0.001,
304
  "step": 18
@@ -311,7 +311,7 @@
311
  "kl": 0.1820068359375,
312
  "learning_rate": 8.636868207865244e-05,
313
  "loss": -0.03466903418302536,
314
- "memory(GiB)": 180.29,
315
  "response_clip_ratio": 0.466796875,
316
  "reward": 0.04069916973821819,
317
  "reward_std": 0.11991005763411522,
@@ -327,7 +327,7 @@
327
  "kl": 0.19287109375,
328
  "learning_rate": 8.43120818934367e-05,
329
  "loss": -0.03502114117145538,
330
- "memory(GiB)": 180.29,
331
  "step": 20,
332
  "train_speed(iter/s)": 0.000424
333
  },
@@ -339,7 +339,7 @@
339
  "kl": 0.17626953125,
340
  "learning_rate": 8.213938048432697e-05,
341
  "loss": -0.008662773296236992,
342
- "memory(GiB)": 180.29,
343
  "response_clip_ratio": 0.5625,
344
  "reward": 0.04996980866417289,
345
  "reward_std": 0.13849420100450516,
@@ -355,7 +355,7 @@
355
  "kl": 0.178955078125,
356
  "learning_rate": 7.985792958513931e-05,
357
  "loss": -0.008743642829358578,
358
- "memory(GiB)": 180.29,
359
  "step": 22,
360
  "train_speed(iter/s)": 0.000426
361
  },
@@ -367,7 +367,7 @@
367
  "kl": 0.1796875,
368
  "learning_rate": 7.74754489035403e-05,
369
  "loss": -0.03423420712351799,
370
- "memory(GiB)": 180.29,
371
  "response_clip_ratio": 0.583984375,
372
  "reward": 0.034468831261619925,
373
  "reward_std": 0.11841745302081108,
@@ -381,7 +381,7 @@
381
  "grad_norm": 0.014131724834442139,
382
  "learning_rate": 7.500000000000001e-05,
383
  "loss": -0.03426633030176163,
384
- "memory(GiB)": 180.29,
385
  "step": 24,
386
  "train_speed(iter/s)": 0.000427
387
  },
@@ -396,7 +396,7 @@
396
  "eval_reward_std": 0.10456253588199615,
397
  "eval_rewards/CosineReward": 0.04339282959699631,
398
  "eval_rewards/RepetitionPenalty": 0.0,
399
- "eval_runtime": 1045.0616,
400
  "eval_samples_per_second": 0.001,
401
  "eval_steps_per_second": 0.001,
402
  "step": 24
@@ -409,7 +409,7 @@
409
  "kl": 0.1800537109375,
410
  "learning_rate": 7.243995901002312e-05,
411
  "loss": -0.02097315341234207,
412
- "memory(GiB)": 180.29,
413
  "response_clip_ratio": 0.6171875,
414
  "reward": 0.03010205877944827,
415
  "reward_std": 0.10742511600255966,
@@ -425,7 +425,7 @@
425
  "kl": 0.18408203125,
426
  "learning_rate": 6.980398830195785e-05,
427
  "loss": -0.02103913575410843,
428
- "memory(GiB)": 180.29,
429
  "step": 26,
430
  "train_speed(iter/s)": 0.000421
431
  },
@@ -437,7 +437,7 @@
437
  "kl": 0.174560546875,
438
  "learning_rate": 6.710100716628344e-05,
439
  "loss": -0.03593946248292923,
440
- "memory(GiB)": 180.29,
441
  "response_clip_ratio": 0.513671875,
442
  "reward": 0.04752760287374258,
443
  "reward_std": 0.14935147762298584,
@@ -453,7 +453,7 @@
453
  "kl": 0.182373046875,
454
  "learning_rate": 6.434016163555452e-05,
455
  "loss": -0.03595500811934471,
456
- "memory(GiB)": 180.29,
457
  "step": 28,
458
  "train_speed(iter/s)": 0.000422
459
  }
 
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
+ "memory(GiB)": 186.69,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
 
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
+ "memory(GiB)": 186.69,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  },
 
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
+ "memory(GiB)": 186.69,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
 
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
+ "memory(GiB)": 186.69,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
 
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
+ "memory(GiB)": 186.69,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
 
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
+ "memory(GiB)": 186.69,
91
  "step": 6,
92
+ "train_speed(iter/s)": 0.000459
93
  },
94
  {
95
  "epoch": 1.4210526315789473,
 
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
+ "eval_runtime": 1030.1122,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
 
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
+ "memory(GiB)": 186.69,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
 
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
+ "memory(GiB)": 186.69,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
 
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
+ "memory(GiB)": 186.69,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
 
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
+ "memory(GiB)": 186.69,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  },
 
171
  "kl": 0.1275634765625,
172
  "learning_rate": 9.789947561577445e-05,
173
  "loss": -0.04600231721997261,
174
+ "memory(GiB)": 186.69,
175
  "response_clip_ratio": 0.361328125,
176
  "reward": 0.023204635945148766,
177
  "reward_std": 0.10593634657561779,
 
185
  "grad_norm": 0.05781339108943939,
186
  "learning_rate": 9.698463103929542e-05,
187
  "loss": -0.05069056898355484,
188
+ "memory(GiB)": 186.69,
189
  "step": 12,
190
  "train_speed(iter/s)": 0.000439
191
  },
 
200
  "eval_reward_std": 0.10685288906097412,
201
  "eval_rewards/CosineReward": 0.03234308212995529,
202
  "eval_rewards/RepetitionPenalty": 0.0,
203
+ "eval_runtime": 1025.9053,
204
  "eval_samples_per_second": 0.001,
205
  "eval_steps_per_second": 0.001,
206
  "step": 12
 
213
  "kl": 0.151123046875,
214
  "learning_rate": 9.591080534401371e-05,
215
  "loss": -0.02191038429737091,
216
+ "memory(GiB)": 186.69,
217
  "response_clip_ratio": 0.419921875,
218
  "reward": 0.035983758978545666,
219
  "reward_std": 0.11553369648754597,
 
229
  "kl": 0.169189453125,
230
  "learning_rate": 9.468163201617062e-05,
231
  "loss": -0.022672578692436218,
232
+ "memory(GiB)": 186.69,
233
  "step": 14,
234
  "train_speed(iter/s)": 0.000427
235
  },
 
241
  "kl": 0.166748046875,
242
  "learning_rate": 9.330127018922194e-05,
243
  "loss": -0.059799157083034515,
244
+ "memory(GiB)": 186.69,
245
  "response_clip_ratio": 0.4765625,
246
  "reward": 0.03584331553429365,
247
  "reward_std": 0.11829411797225475,
 
257
  "kl": 0.16748046875,
258
  "learning_rate": 9.177439057064683e-05,
259
  "loss": -0.06071458384394646,
260
+ "memory(GiB)": 186.69,
261
  "step": 16,
262
  "train_speed(iter/s)": 0.000431
263
  },
 
269
  "kl": 0.1787109375,
270
  "learning_rate": 9.01061596377522e-05,
271
  "loss": -0.04504441097378731,
272
+ "memory(GiB)": 186.69,
273
  "response_clip_ratio": 0.5625,
274
  "reward": 0.027318883687257767,
275
  "reward_std": 0.10441224090754986,
 
283
  "grad_norm": 0.005998397711664438,
284
  "learning_rate": 8.83022221559489e-05,
285
  "loss": -0.045487549155950546,
286
+ "memory(GiB)": 186.69,
287
  "step": 18,
288
  "train_speed(iter/s)": 0.000432
289
  },
 
298
  "eval_reward_std": 0.10691346973180771,
299
  "eval_rewards/CosineReward": 0.03729327768087387,
300
  "eval_rewards/RepetitionPenalty": 0.0,
301
+ "eval_runtime": 1041.2277,
302
  "eval_samples_per_second": 0.001,
303
  "eval_steps_per_second": 0.001,
304
  "step": 18
 
311
  "kl": 0.1820068359375,
312
  "learning_rate": 8.636868207865244e-05,
313
  "loss": -0.03466903418302536,
314
+ "memory(GiB)": 186.69,
315
  "response_clip_ratio": 0.466796875,
316
  "reward": 0.04069916973821819,
317
  "reward_std": 0.11991005763411522,
 
327
  "kl": 0.19287109375,
328
  "learning_rate": 8.43120818934367e-05,
329
  "loss": -0.03502114117145538,
330
+ "memory(GiB)": 186.69,
331
  "step": 20,
332
  "train_speed(iter/s)": 0.000424
333
  },
 
339
  "kl": 0.17626953125,
340
  "learning_rate": 8.213938048432697e-05,
341
  "loss": -0.008662773296236992,
342
+ "memory(GiB)": 186.69,
343
  "response_clip_ratio": 0.5625,
344
  "reward": 0.04996980866417289,
345
  "reward_std": 0.13849420100450516,
 
355
  "kl": 0.178955078125,
356
  "learning_rate": 7.985792958513931e-05,
357
  "loss": -0.008743642829358578,
358
+ "memory(GiB)": 186.69,
359
  "step": 22,
360
  "train_speed(iter/s)": 0.000426
361
  },
 
367
  "kl": 0.1796875,
368
  "learning_rate": 7.74754489035403e-05,
369
  "loss": -0.03423420712351799,
370
+ "memory(GiB)": 186.69,
371
  "response_clip_ratio": 0.583984375,
372
  "reward": 0.034468831261619925,
373
  "reward_std": 0.11841745302081108,
 
381
  "grad_norm": 0.014131724834442139,
382
  "learning_rate": 7.500000000000001e-05,
383
  "loss": -0.03426633030176163,
384
+ "memory(GiB)": 186.69,
385
  "step": 24,
386
  "train_speed(iter/s)": 0.000427
387
  },
 
396
  "eval_reward_std": 0.10456253588199615,
397
  "eval_rewards/CosineReward": 0.04339282959699631,
398
  "eval_rewards/RepetitionPenalty": 0.0,
399
+ "eval_runtime": 1045.0653,
400
  "eval_samples_per_second": 0.001,
401
  "eval_steps_per_second": 0.001,
402
  "step": 24
 
409
  "kl": 0.1800537109375,
410
  "learning_rate": 7.243995901002312e-05,
411
  "loss": -0.02097315341234207,
412
+ "memory(GiB)": 186.69,
413
  "response_clip_ratio": 0.6171875,
414
  "reward": 0.03010205877944827,
415
  "reward_std": 0.10742511600255966,
 
425
  "kl": 0.18408203125,
426
  "learning_rate": 6.980398830195785e-05,
427
  "loss": -0.02103913575410843,
428
+ "memory(GiB)": 186.69,
429
  "step": 26,
430
  "train_speed(iter/s)": 0.000421
431
  },
 
437
  "kl": 0.174560546875,
438
  "learning_rate": 6.710100716628344e-05,
439
  "loss": -0.03593946248292923,
440
+ "memory(GiB)": 186.69,
441
  "response_clip_ratio": 0.513671875,
442
  "reward": 0.04752760287374258,
443
  "reward_std": 0.14935147762298584,
 
453
  "kl": 0.182373046875,
454
  "learning_rate": 6.434016163555452e-05,
455
  "loss": -0.03595500811934471,
456
+ "memory(GiB)": 186.69,
457
  "step": 28,
458
  "train_speed(iter/s)": 0.000422
459
  }
checkpoint-28/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412
3
  size 9809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e699c24e86e1a1f412d6f8c2d25c0baf53692e08baa670377bbc8abc7e07257
3
  size 9809
checkpoint-30/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "down_proj",
27
  "o_proj",
28
- "q_proj",
29
  "v_proj",
 
30
  "up_proj",
31
- "gate_proj",
32
- "k_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
26
  "o_proj",
27
+ "gate_proj",
28
  "v_proj",
29
+ "k_proj",
30
  "up_proj",
31
+ "down_proj",
32
+ "q_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
checkpoint-30/global_step30/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b1a3cd6933506800a430038393b6941bd000301ef9f2dc6faeb6e1459140ba3
3
+ size 51616527
checkpoint-30/global_step30/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d490b31e4ae11c61f13bceb96677e15b12a5316213c042d2a6ac1c5568b18e27
3
+ size 51616015
checkpoint-30/global_step30/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f96828d4477387a7cfbbff0217d2e06634144ead2d4531d5d012f92cbd3915ca
3
+ size 51616527
checkpoint-30/global_step30/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:703767cfa56852356c051e72a843812484422326afdc2226c9cb7315ddac9a0f
3
+ size 51616015
checkpoint-30/global_step30/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f30b72531a3686bb479a405afecb9a5e36ded3a1f0e77715b982d34b35bf4ea5
3
+ size 51616527
checkpoint-30/global_step30/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad1f53600a2e0547e940bc9d6722d6db5881c6b0b82db04e4633a3a671d790e5
3
+ size 51616015
checkpoint-30/global_step30/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62085edea348819aeb01edf89abb88dbc38895b13a999c46c497f8876545ab5d
3
+ size 51616527
checkpoint-30/global_step30/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:049092cac5d4dbecc59edf2e1beae0aa6ecb9be85a1769e39277b8132173827c
3
+ size 51616015
checkpoint-30/rng_state_24.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:550197960179c76e27d753c57b888a6aceb5f7914c64eb2030d99039f4c10ee3
3
+ size 16340
checkpoint-30/rng_state_25.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86494bd509e73b62b00dd992dd55557dc6918b884ceabf488a48de384d5a3124
3
+ size 16340
checkpoint-30/rng_state_26.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36e2ab1725dde0d17f163d31707c459492db313385e56d7d908960b9da1e3104
3
+ size 16404
checkpoint-30/rng_state_27.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:628965fd355f90ce9405c61784b5296f30f4d56f2d8079593166eb50fef5820c
3
+ size 16404
checkpoint-30/rng_state_28.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:653a95667e1e4ab42ade2b12f244e882d81edce91454b60df85abd86914e1c3a
3
+ size 16404
checkpoint-30/rng_state_29.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:148fc9650216d30f813309a1ff1a4839c72be88ef17b2125497372c57fe47150
3
+ size 16340
checkpoint-30/rng_state_30.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78a03b2904d2254d43024a5c2e00b159e258a66d6078b0523311e69df02402e8
3
+ size 16468
checkpoint-30/rng_state_31.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ada7dafd49fa06d30df1afdde7f7b919cb8d007421630f594e9f61fffd246751
3
+ size 16340