| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 74.98500299940012, | |
| "global_step": 250000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 6.3557, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 5.4171, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 6e-06, | |
| "loss": 4.8594, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 4.488, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1e-05, | |
| "loss": 4.2125, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 1.2e-05, | |
| "loss": 3.9836, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 1.4000000000000001e-05, | |
| "loss": 3.794, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 3.6179, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 1.8e-05, | |
| "loss": 3.4697, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2e-05, | |
| "loss": 3.3321, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.2000000000000003e-05, | |
| "loss": 3.2206, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.4e-05, | |
| "loss": 3.1178, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 2.6000000000000002e-05, | |
| "loss": 3.0262, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 2.8000000000000003e-05, | |
| "loss": 2.9351, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 3e-05, | |
| "loss": 2.8609, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 3.2000000000000005e-05, | |
| "loss": 2.793, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 3.4000000000000007e-05, | |
| "loss": 2.7255, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 3.6e-05, | |
| "loss": 2.6665, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 3.8e-05, | |
| "loss": 2.6135, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 4e-05, | |
| "loss": 2.5613, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 4.2e-05, | |
| "loss": 2.5102, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 2.4629, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 4.600000000000001e-05, | |
| "loss": 2.419, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 4.8e-05, | |
| "loss": 2.3811, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 5e-05, | |
| "loss": 2.3446, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "eval_loss": 2.2050914764404297, | |
| "eval_runtime": 371.9663, | |
| "eval_samples_per_second": 458.074, | |
| "eval_steps_per_second": 3.581, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 5.2000000000000004e-05, | |
| "loss": 2.3102, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 5.4000000000000005e-05, | |
| "loss": 2.2772, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 5.6000000000000006e-05, | |
| "loss": 2.2466, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 5.8e-05, | |
| "loss": 2.2166, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 6e-05, | |
| "loss": 2.1951, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 6.2e-05, | |
| "loss": 2.1697, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 6.400000000000001e-05, | |
| "loss": 2.1481, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "learning_rate": 6.6e-05, | |
| "loss": 2.1281, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 6.800000000000001e-05, | |
| "loss": 2.1042, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 5.25, | |
| "learning_rate": 7e-05, | |
| "loss": 2.0863, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "learning_rate": 7.2e-05, | |
| "loss": 2.0667, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 5.55, | |
| "learning_rate": 7.4e-05, | |
| "loss": 2.0539, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "learning_rate": 7.6e-05, | |
| "loss": 2.0358, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 5.85, | |
| "learning_rate": 7.800000000000001e-05, | |
| "loss": 2.0244, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 8e-05, | |
| "loss": 2.0081, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "learning_rate": 8.2e-05, | |
| "loss": 1.9923, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "learning_rate": 8.4e-05, | |
| "loss": 1.9846, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 8.6e-05, | |
| "loss": 1.968, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "learning_rate": 8.800000000000001e-05, | |
| "loss": 1.9588, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 6.75, | |
| "learning_rate": 9e-05, | |
| "loss": 1.9472, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "learning_rate": 9.200000000000001e-05, | |
| "loss": 1.9378, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 7.05, | |
| "learning_rate": 9.4e-05, | |
| "loss": 1.9285, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 9.6e-05, | |
| "loss": 1.9167, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 7.35, | |
| "learning_rate": 9.8e-05, | |
| "loss": 1.9053, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8975, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "eval_loss": 1.7942529916763306, | |
| "eval_runtime": 357.1217, | |
| "eval_samples_per_second": 477.115, | |
| "eval_steps_per_second": 3.73, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 7.65, | |
| "learning_rate": 9.977777777777779e-05, | |
| "loss": 1.8928, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 7.8, | |
| "learning_rate": 9.955555555555556e-05, | |
| "loss": 1.8814, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 7.95, | |
| "learning_rate": 9.933333333333334e-05, | |
| "loss": 1.8761, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 8.1, | |
| "learning_rate": 9.911111111111112e-05, | |
| "loss": 1.8629, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 8.25, | |
| "learning_rate": 9.888888888888889e-05, | |
| "loss": 1.8538, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 8.4, | |
| "learning_rate": 9.866666666666668e-05, | |
| "loss": 1.8486, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 8.55, | |
| "learning_rate": 9.844444444444444e-05, | |
| "loss": 1.8439, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "learning_rate": 9.822222222222223e-05, | |
| "loss": 1.835, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 8.85, | |
| "learning_rate": 9.8e-05, | |
| "loss": 1.8286, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 9.777777777777778e-05, | |
| "loss": 1.8235, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 9.15, | |
| "learning_rate": 9.755555555555555e-05, | |
| "loss": 1.8151, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 9.3, | |
| "learning_rate": 9.733333333333335e-05, | |
| "loss": 1.81, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 9.45, | |
| "learning_rate": 9.711111111111111e-05, | |
| "loss": 1.8049, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "learning_rate": 9.68888888888889e-05, | |
| "loss": 1.7986, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 9.75, | |
| "learning_rate": 9.666666666666667e-05, | |
| "loss": 1.797, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 9.9, | |
| "learning_rate": 9.644444444444445e-05, | |
| "loss": 1.7878, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 10.05, | |
| "learning_rate": 9.622222222222222e-05, | |
| "loss": 1.7842, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 10.2, | |
| "learning_rate": 9.6e-05, | |
| "loss": 1.7774, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 10.35, | |
| "learning_rate": 9.577777777777777e-05, | |
| "loss": 1.7719, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 10.5, | |
| "learning_rate": 9.555555555555557e-05, | |
| "loss": 1.769, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 10.65, | |
| "learning_rate": 9.533333333333334e-05, | |
| "loss": 1.7678, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 10.8, | |
| "learning_rate": 9.511111111111112e-05, | |
| "loss": 1.7598, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 10.95, | |
| "learning_rate": 9.488888888888889e-05, | |
| "loss": 1.759, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 11.1, | |
| "learning_rate": 9.466666666666667e-05, | |
| "loss": 1.7488, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 11.25, | |
| "learning_rate": 9.444444444444444e-05, | |
| "loss": 1.7479, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 11.25, | |
| "eval_loss": 1.6556859016418457, | |
| "eval_runtime": 356.8188, | |
| "eval_samples_per_second": 477.52, | |
| "eval_steps_per_second": 3.733, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 11.4, | |
| "learning_rate": 9.422222222222223e-05, | |
| "loss": 1.7473, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 11.55, | |
| "learning_rate": 9.4e-05, | |
| "loss": 1.7423, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 11.7, | |
| "learning_rate": 9.377777777777779e-05, | |
| "loss": 1.7374, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 11.85, | |
| "learning_rate": 9.355555555555556e-05, | |
| "loss": 1.7334, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 9.333333333333334e-05, | |
| "loss": 1.7315, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 12.15, | |
| "learning_rate": 9.311111111111111e-05, | |
| "loss": 1.725, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 12.3, | |
| "learning_rate": 9.28888888888889e-05, | |
| "loss": 1.722, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 12.45, | |
| "learning_rate": 9.266666666666666e-05, | |
| "loss": 1.7219, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 12.6, | |
| "learning_rate": 9.244444444444445e-05, | |
| "loss": 1.7211, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 12.75, | |
| "learning_rate": 9.222222222222223e-05, | |
| "loss": 1.716, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 12.9, | |
| "learning_rate": 9.200000000000001e-05, | |
| "loss": 1.7118, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 13.05, | |
| "learning_rate": 9.177777777777778e-05, | |
| "loss": 1.7106, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 13.2, | |
| "learning_rate": 9.155555555555557e-05, | |
| "loss": 1.7033, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 13.35, | |
| "learning_rate": 9.133333333333334e-05, | |
| "loss": 1.703, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 13.5, | |
| "learning_rate": 9.111111111111112e-05, | |
| "loss": 1.7002, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 13.65, | |
| "learning_rate": 9.088888888888889e-05, | |
| "loss": 1.7012, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 13.8, | |
| "learning_rate": 9.066666666666667e-05, | |
| "loss": 1.6943, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 13.95, | |
| "learning_rate": 9.044444444444445e-05, | |
| "loss": 1.6932, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 14.1, | |
| "learning_rate": 9.022222222222224e-05, | |
| "loss": 1.6912, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 14.25, | |
| "learning_rate": 9e-05, | |
| "loss": 1.687, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 14.4, | |
| "learning_rate": 8.977777777777779e-05, | |
| "loss": 1.6875, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 14.55, | |
| "learning_rate": 8.955555555555556e-05, | |
| "loss": 1.6849, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 14.7, | |
| "learning_rate": 8.933333333333334e-05, | |
| "loss": 1.6788, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 14.85, | |
| "learning_rate": 8.911111111111111e-05, | |
| "loss": 1.6801, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 8.888888888888889e-05, | |
| "loss": 1.6774, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_loss": 1.5897281169891357, | |
| "eval_runtime": 357.0501, | |
| "eval_samples_per_second": 477.21, | |
| "eval_steps_per_second": 3.731, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 15.15, | |
| "learning_rate": 8.866666666666668e-05, | |
| "loss": 1.6729, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 15.3, | |
| "learning_rate": 8.844444444444445e-05, | |
| "loss": 1.672, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 15.45, | |
| "learning_rate": 8.822222222222223e-05, | |
| "loss": 1.6724, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 15.6, | |
| "learning_rate": 8.800000000000001e-05, | |
| "loss": 1.6688, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 15.75, | |
| "learning_rate": 8.777777777777778e-05, | |
| "loss": 1.6658, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 15.9, | |
| "learning_rate": 8.755555555555556e-05, | |
| "loss": 1.6633, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 16.05, | |
| "learning_rate": 8.733333333333333e-05, | |
| "loss": 1.6618, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 16.2, | |
| "learning_rate": 8.711111111111112e-05, | |
| "loss": 1.6586, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 16.35, | |
| "learning_rate": 8.68888888888889e-05, | |
| "loss": 1.6573, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 16.5, | |
| "learning_rate": 8.666666666666667e-05, | |
| "loss": 1.6554, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 16.65, | |
| "learning_rate": 8.644444444444445e-05, | |
| "loss": 1.6576, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 16.8, | |
| "learning_rate": 8.622222222222222e-05, | |
| "loss": 1.6539, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 16.95, | |
| "learning_rate": 8.6e-05, | |
| "loss": 1.6523, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 17.1, | |
| "learning_rate": 8.577777777777777e-05, | |
| "loss": 1.6473, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 17.25, | |
| "learning_rate": 8.555555555555556e-05, | |
| "loss": 1.6475, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 17.4, | |
| "learning_rate": 8.533333333333334e-05, | |
| "loss": 1.647, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 17.55, | |
| "learning_rate": 8.511111111111112e-05, | |
| "loss": 1.6421, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 17.7, | |
| "learning_rate": 8.488888888888889e-05, | |
| "loss": 1.6444, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 17.85, | |
| "learning_rate": 8.466666666666667e-05, | |
| "loss": 1.6446, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "learning_rate": 8.444444444444444e-05, | |
| "loss": 1.6423, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 18.15, | |
| "learning_rate": 8.422222222222223e-05, | |
| "loss": 1.6394, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 18.3, | |
| "learning_rate": 8.4e-05, | |
| "loss": 1.6348, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 18.45, | |
| "learning_rate": 8.377777777777778e-05, | |
| "loss": 1.633, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 18.6, | |
| "learning_rate": 8.355555555555556e-05, | |
| "loss": 1.6364, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 18.75, | |
| "learning_rate": 8.333333333333334e-05, | |
| "loss": 1.6314, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 18.75, | |
| "eval_loss": 1.5525341033935547, | |
| "eval_runtime": 356.5096, | |
| "eval_samples_per_second": 477.934, | |
| "eval_steps_per_second": 3.736, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 18.9, | |
| "learning_rate": 8.311111111111111e-05, | |
| "loss": 1.6316, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 19.05, | |
| "learning_rate": 8.28888888888889e-05, | |
| "loss": 1.6315, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 19.2, | |
| "learning_rate": 8.266666666666667e-05, | |
| "loss": 1.6288, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 19.35, | |
| "learning_rate": 8.244444444444445e-05, | |
| "loss": 1.6277, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 19.5, | |
| "learning_rate": 8.222222222222222e-05, | |
| "loss": 1.6258, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 19.65, | |
| "learning_rate": 8.2e-05, | |
| "loss": 1.6223, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 19.8, | |
| "learning_rate": 8.177777777777778e-05, | |
| "loss": 1.6217, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 19.95, | |
| "learning_rate": 8.155555555555557e-05, | |
| "loss": 1.6254, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 20.1, | |
| "learning_rate": 8.133333333333334e-05, | |
| "loss": 1.6196, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 20.25, | |
| "learning_rate": 8.111111111111112e-05, | |
| "loss": 1.6172, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 20.4, | |
| "learning_rate": 8.088888888888889e-05, | |
| "loss": 1.6162, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 20.55, | |
| "learning_rate": 8.066666666666667e-05, | |
| "loss": 1.6136, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 20.7, | |
| "learning_rate": 8.044444444444444e-05, | |
| "loss": 1.6162, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 20.85, | |
| "learning_rate": 8.022222222222222e-05, | |
| "loss": 1.6153, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "learning_rate": 8e-05, | |
| "loss": 1.613, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 21.15, | |
| "learning_rate": 7.977777777777779e-05, | |
| "loss": 1.6098, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 21.3, | |
| "learning_rate": 7.955555555555556e-05, | |
| "loss": 1.6115, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 21.45, | |
| "learning_rate": 7.933333333333334e-05, | |
| "loss": 1.6077, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 21.6, | |
| "learning_rate": 7.911111111111111e-05, | |
| "loss": 1.6077, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 21.75, | |
| "learning_rate": 7.88888888888889e-05, | |
| "loss": 1.607, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 21.9, | |
| "learning_rate": 7.866666666666666e-05, | |
| "loss": 1.607, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 22.05, | |
| "learning_rate": 7.844444444444446e-05, | |
| "loss": 1.6067, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 22.2, | |
| "learning_rate": 7.822222222222223e-05, | |
| "loss": 1.6024, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 22.35, | |
| "learning_rate": 7.800000000000001e-05, | |
| "loss": 1.6028, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 22.5, | |
| "learning_rate": 7.777777777777778e-05, | |
| "loss": 1.6017, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 22.5, | |
| "eval_loss": 1.525025725364685, | |
| "eval_runtime": 356.6664, | |
| "eval_samples_per_second": 477.724, | |
| "eval_steps_per_second": 3.735, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 22.65, | |
| "learning_rate": 7.755555555555556e-05, | |
| "loss": 1.5995, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 22.8, | |
| "learning_rate": 7.733333333333333e-05, | |
| "loss": 1.596, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 22.95, | |
| "learning_rate": 7.711111111111112e-05, | |
| "loss": 1.6002, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 23.1, | |
| "learning_rate": 7.688888888888889e-05, | |
| "loss": 1.5939, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 23.25, | |
| "learning_rate": 7.666666666666667e-05, | |
| "loss": 1.5929, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 23.4, | |
| "learning_rate": 7.644444444444445e-05, | |
| "loss": 1.5948, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 23.55, | |
| "learning_rate": 7.622222222222223e-05, | |
| "loss": 1.5921, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 23.7, | |
| "learning_rate": 7.6e-05, | |
| "loss": 1.5917, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 23.85, | |
| "learning_rate": 7.577777777777779e-05, | |
| "loss": 1.5953, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "learning_rate": 7.555555555555556e-05, | |
| "loss": 1.5909, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 24.15, | |
| "learning_rate": 7.533333333333334e-05, | |
| "loss": 1.5886, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 24.3, | |
| "learning_rate": 7.511111111111111e-05, | |
| "loss": 1.5873, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 24.45, | |
| "learning_rate": 7.488888888888889e-05, | |
| "loss": 1.5863, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 24.6, | |
| "learning_rate": 7.466666666666667e-05, | |
| "loss": 1.5861, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 24.75, | |
| "learning_rate": 7.444444444444444e-05, | |
| "loss": 1.5845, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 24.9, | |
| "learning_rate": 7.422222222222223e-05, | |
| "loss": 1.5847, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 25.04, | |
| "learning_rate": 7.4e-05, | |
| "loss": 1.5832, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 25.19, | |
| "learning_rate": 7.377777777777778e-05, | |
| "loss": 1.5804, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 25.34, | |
| "learning_rate": 7.355555555555556e-05, | |
| "loss": 1.5804, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 25.49, | |
| "learning_rate": 7.333333333333333e-05, | |
| "loss": 1.5809, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 25.64, | |
| "learning_rate": 7.311111111111111e-05, | |
| "loss": 1.5785, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 25.79, | |
| "learning_rate": 7.28888888888889e-05, | |
| "loss": 1.5804, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 25.94, | |
| "learning_rate": 7.266666666666667e-05, | |
| "loss": 1.5796, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 26.09, | |
| "learning_rate": 7.244444444444445e-05, | |
| "loss": 1.5777, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 26.24, | |
| "learning_rate": 7.222222222222222e-05, | |
| "loss": 1.5761, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 26.24, | |
| "eval_loss": 1.5014694929122925, | |
| "eval_runtime": 356.8479, | |
| "eval_samples_per_second": 477.481, | |
| "eval_steps_per_second": 3.733, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 26.39, | |
| "learning_rate": 7.2e-05, | |
| "loss": 1.5762, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 26.54, | |
| "learning_rate": 7.177777777777777e-05, | |
| "loss": 1.5745, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 26.69, | |
| "learning_rate": 7.155555555555555e-05, | |
| "loss": 1.5726, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 26.84, | |
| "learning_rate": 7.133333333333334e-05, | |
| "loss": 1.5732, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 26.99, | |
| "learning_rate": 7.111111111111112e-05, | |
| "loss": 1.5723, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 27.14, | |
| "learning_rate": 7.088888888888889e-05, | |
| "loss": 1.57, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 27.29, | |
| "learning_rate": 7.066666666666667e-05, | |
| "loss": 1.5677, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 27.44, | |
| "learning_rate": 7.044444444444444e-05, | |
| "loss": 1.5684, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 27.59, | |
| "learning_rate": 7.022222222222222e-05, | |
| "loss": 1.5693, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 27.74, | |
| "learning_rate": 7e-05, | |
| "loss": 1.5676, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 27.89, | |
| "learning_rate": 6.977777777777779e-05, | |
| "loss": 1.5673, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 28.04, | |
| "learning_rate": 6.955555555555556e-05, | |
| "loss": 1.568, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 28.19, | |
| "learning_rate": 6.933333333333334e-05, | |
| "loss": 1.5646, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 28.34, | |
| "learning_rate": 6.911111111111111e-05, | |
| "loss": 1.5651, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 28.49, | |
| "learning_rate": 6.88888888888889e-05, | |
| "loss": 1.5658, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 28.64, | |
| "learning_rate": 6.866666666666666e-05, | |
| "loss": 1.563, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 28.79, | |
| "learning_rate": 6.844444444444445e-05, | |
| "loss": 1.5613, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 28.94, | |
| "learning_rate": 6.822222222222222e-05, | |
| "loss": 1.562, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 29.09, | |
| "learning_rate": 6.800000000000001e-05, | |
| "loss": 1.5602, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 29.24, | |
| "learning_rate": 6.777777777777778e-05, | |
| "loss": 1.5587, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 29.39, | |
| "learning_rate": 6.755555555555557e-05, | |
| "loss": 1.5586, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 29.54, | |
| "learning_rate": 6.733333333333333e-05, | |
| "loss": 1.5573, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 29.69, | |
| "learning_rate": 6.711111111111112e-05, | |
| "loss": 1.5574, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 29.84, | |
| "learning_rate": 6.688888888888889e-05, | |
| "loss": 1.5582, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 29.99, | |
| "learning_rate": 6.666666666666667e-05, | |
| "loss": 1.5571, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 29.99, | |
| "eval_loss": 1.486186146736145, | |
| "eval_runtime": 356.5839, | |
| "eval_samples_per_second": 477.834, | |
| "eval_steps_per_second": 3.735, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 30.14, | |
| "learning_rate": 6.644444444444444e-05, | |
| "loss": 1.5551, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 30.29, | |
| "learning_rate": 6.622222222222224e-05, | |
| "loss": 1.555, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 30.44, | |
| "learning_rate": 6.6e-05, | |
| "loss": 1.5542, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 30.59, | |
| "learning_rate": 6.577777777777779e-05, | |
| "loss": 1.5542, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 30.74, | |
| "learning_rate": 6.555555555555556e-05, | |
| "loss": 1.5527, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 30.89, | |
| "learning_rate": 6.533333333333334e-05, | |
| "loss": 1.5508, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 31.04, | |
| "learning_rate": 6.511111111111111e-05, | |
| "loss": 1.5514, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 31.19, | |
| "learning_rate": 6.488888888888889e-05, | |
| "loss": 1.5496, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 31.34, | |
| "learning_rate": 6.466666666666666e-05, | |
| "loss": 1.5478, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 31.49, | |
| "learning_rate": 6.444444444444446e-05, | |
| "loss": 1.5493, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 31.64, | |
| "learning_rate": 6.422222222222223e-05, | |
| "loss": 1.5473, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 31.79, | |
| "learning_rate": 6.400000000000001e-05, | |
| "loss": 1.5489, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 31.94, | |
| "learning_rate": 6.377777777777778e-05, | |
| "loss": 1.5475, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 32.09, | |
| "learning_rate": 6.355555555555556e-05, | |
| "loss": 1.5448, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 32.24, | |
| "learning_rate": 6.333333333333333e-05, | |
| "loss": 1.546, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 32.39, | |
| "learning_rate": 6.311111111111112e-05, | |
| "loss": 1.5472, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 32.54, | |
| "learning_rate": 6.28888888888889e-05, | |
| "loss": 1.5464, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 32.69, | |
| "learning_rate": 6.266666666666667e-05, | |
| "loss": 1.5435, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 32.84, | |
| "learning_rate": 6.244444444444445e-05, | |
| "loss": 1.5436, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 32.99, | |
| "learning_rate": 6.222222222222222e-05, | |
| "loss": 1.5449, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 33.14, | |
| "learning_rate": 6.2e-05, | |
| "loss": 1.5398, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 33.29, | |
| "learning_rate": 6.177777777777779e-05, | |
| "loss": 1.5387, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 33.44, | |
| "learning_rate": 6.155555555555555e-05, | |
| "loss": 1.5404, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 33.59, | |
| "learning_rate": 6.133333333333334e-05, | |
| "loss": 1.5397, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 33.74, | |
| "learning_rate": 6.111111111111112e-05, | |
| "loss": 1.5411, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 33.74, | |
| "eval_loss": 1.4691314697265625, | |
| "eval_runtime": 356.7348, | |
| "eval_samples_per_second": 477.632, | |
| "eval_steps_per_second": 3.734, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 33.89, | |
| "learning_rate": 6.08888888888889e-05, | |
| "loss": 1.5408, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 34.04, | |
| "learning_rate": 6.066666666666667e-05, | |
| "loss": 1.5398, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 34.19, | |
| "learning_rate": 6.044444444444445e-05, | |
| "loss": 1.5373, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 34.34, | |
| "learning_rate": 6.0222222222222225e-05, | |
| "loss": 1.5347, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 34.49, | |
| "learning_rate": 6e-05, | |
| "loss": 1.536, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 34.64, | |
| "learning_rate": 5.977777777777778e-05, | |
| "loss": 1.5363, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 34.79, | |
| "learning_rate": 5.9555555555555554e-05, | |
| "loss": 1.5343, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 34.94, | |
| "learning_rate": 5.9333333333333343e-05, | |
| "loss": 1.5377, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 35.09, | |
| "learning_rate": 5.911111111111112e-05, | |
| "loss": 1.5349, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 35.24, | |
| "learning_rate": 5.8888888888888896e-05, | |
| "loss": 1.5325, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 35.39, | |
| "learning_rate": 5.866666666666667e-05, | |
| "loss": 1.5325, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 35.54, | |
| "learning_rate": 5.844444444444445e-05, | |
| "loss": 1.5312, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 35.69, | |
| "learning_rate": 5.8222222222222224e-05, | |
| "loss": 1.5342, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 35.84, | |
| "learning_rate": 5.8e-05, | |
| "loss": 1.5304, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 35.99, | |
| "learning_rate": 5.7777777777777776e-05, | |
| "loss": 1.5312, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 36.14, | |
| "learning_rate": 5.755555555555556e-05, | |
| "loss": 1.5294, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 36.29, | |
| "learning_rate": 5.7333333333333336e-05, | |
| "loss": 1.5301, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 36.44, | |
| "learning_rate": 5.711111111111112e-05, | |
| "loss": 1.5298, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 36.59, | |
| "learning_rate": 5.6888888888888895e-05, | |
| "loss": 1.5284, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 36.74, | |
| "learning_rate": 5.666666666666667e-05, | |
| "loss": 1.5303, | |
| "step": 122500 | |
| }, | |
| { | |
| "epoch": 36.89, | |
| "learning_rate": 5.644444444444445e-05, | |
| "loss": 1.5285, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 37.04, | |
| "learning_rate": 5.622222222222222e-05, | |
| "loss": 1.527, | |
| "step": 123500 | |
| }, | |
| { | |
| "epoch": 37.19, | |
| "learning_rate": 5.6000000000000006e-05, | |
| "loss": 1.524, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 37.34, | |
| "learning_rate": 5.577777777777778e-05, | |
| "loss": 1.5259, | |
| "step": 124500 | |
| }, | |
| { | |
| "epoch": 37.49, | |
| "learning_rate": 5.555555555555556e-05, | |
| "loss": 1.5258, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 37.49, | |
| "eval_loss": 1.4563013315200806, | |
| "eval_runtime": 356.8442, | |
| "eval_samples_per_second": 477.486, | |
| "eval_steps_per_second": 3.733, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 37.64, | |
| "learning_rate": 5.5333333333333334e-05, | |
| "loss": 1.5242, | |
| "step": 125500 | |
| }, | |
| { | |
| "epoch": 37.79, | |
| "learning_rate": 5.511111111111111e-05, | |
| "loss": 1.5262, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 37.94, | |
| "learning_rate": 5.488888888888889e-05, | |
| "loss": 1.5257, | |
| "step": 126500 | |
| }, | |
| { | |
| "epoch": 38.09, | |
| "learning_rate": 5.466666666666666e-05, | |
| "loss": 1.5231, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 38.24, | |
| "learning_rate": 5.4444444444444446e-05, | |
| "loss": 1.5204, | |
| "step": 127500 | |
| }, | |
| { | |
| "epoch": 38.39, | |
| "learning_rate": 5.422222222222223e-05, | |
| "loss": 1.5227, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 38.54, | |
| "learning_rate": 5.4000000000000005e-05, | |
| "loss": 1.5226, | |
| "step": 128500 | |
| }, | |
| { | |
| "epoch": 38.69, | |
| "learning_rate": 5.377777777777778e-05, | |
| "loss": 1.5212, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 38.84, | |
| "learning_rate": 5.355555555555556e-05, | |
| "loss": 1.525, | |
| "step": 129500 | |
| }, | |
| { | |
| "epoch": 38.99, | |
| "learning_rate": 5.333333333333333e-05, | |
| "loss": 1.5222, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 39.14, | |
| "learning_rate": 5.311111111111111e-05, | |
| "loss": 1.5176, | |
| "step": 130500 | |
| }, | |
| { | |
| "epoch": 39.29, | |
| "learning_rate": 5.2888888888888885e-05, | |
| "loss": 1.5166, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 39.44, | |
| "learning_rate": 5.266666666666666e-05, | |
| "loss": 1.5186, | |
| "step": 131500 | |
| }, | |
| { | |
| "epoch": 39.59, | |
| "learning_rate": 5.244444444444445e-05, | |
| "loss": 1.5176, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 39.74, | |
| "learning_rate": 5.222222222222223e-05, | |
| "loss": 1.5199, | |
| "step": 132500 | |
| }, | |
| { | |
| "epoch": 39.89, | |
| "learning_rate": 5.2000000000000004e-05, | |
| "loss": 1.5188, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 40.04, | |
| "learning_rate": 5.177777777777778e-05, | |
| "loss": 1.5177, | |
| "step": 133500 | |
| }, | |
| { | |
| "epoch": 40.19, | |
| "learning_rate": 5.1555555555555556e-05, | |
| "loss": 1.5147, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 40.34, | |
| "learning_rate": 5.133333333333333e-05, | |
| "loss": 1.5163, | |
| "step": 134500 | |
| }, | |
| { | |
| "epoch": 40.49, | |
| "learning_rate": 5.111111111111111e-05, | |
| "loss": 1.5149, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 40.64, | |
| "learning_rate": 5.0888888888888884e-05, | |
| "loss": 1.5142, | |
| "step": 135500 | |
| }, | |
| { | |
| "epoch": 40.79, | |
| "learning_rate": 5.0666666666666674e-05, | |
| "loss": 1.5133, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 40.94, | |
| "learning_rate": 5.044444444444445e-05, | |
| "loss": 1.5135, | |
| "step": 136500 | |
| }, | |
| { | |
| "epoch": 41.09, | |
| "learning_rate": 5.0222222222222226e-05, | |
| "loss": 1.5129, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 41.24, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5114, | |
| "step": 137500 | |
| }, | |
| { | |
| "epoch": 41.24, | |
| "eval_loss": 1.4439390897750854, | |
| "eval_runtime": 356.5866, | |
| "eval_samples_per_second": 477.831, | |
| "eval_steps_per_second": 3.735, | |
| "step": 137500 | |
| }, | |
| { | |
| "epoch": 41.39, | |
| "learning_rate": 4.977777777777778e-05, | |
| "loss": 1.5106, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 41.54, | |
| "learning_rate": 4.955555555555556e-05, | |
| "loss": 1.5115, | |
| "step": 138500 | |
| }, | |
| { | |
| "epoch": 41.69, | |
| "learning_rate": 4.933333333333334e-05, | |
| "loss": 1.5114, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 41.84, | |
| "learning_rate": 4.9111111111111114e-05, | |
| "loss": 1.5111, | |
| "step": 139500 | |
| }, | |
| { | |
| "epoch": 41.99, | |
| "learning_rate": 4.888888888888889e-05, | |
| "loss": 1.5118, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 42.14, | |
| "learning_rate": 4.866666666666667e-05, | |
| "loss": 1.5073, | |
| "step": 140500 | |
| }, | |
| { | |
| "epoch": 42.29, | |
| "learning_rate": 4.844444444444445e-05, | |
| "loss": 1.5102, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 42.44, | |
| "learning_rate": 4.8222222222222225e-05, | |
| "loss": 1.508, | |
| "step": 141500 | |
| }, | |
| { | |
| "epoch": 42.59, | |
| "learning_rate": 4.8e-05, | |
| "loss": 1.5081, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 42.74, | |
| "learning_rate": 4.7777777777777784e-05, | |
| "loss": 1.5086, | |
| "step": 142500 | |
| }, | |
| { | |
| "epoch": 42.89, | |
| "learning_rate": 4.755555555555556e-05, | |
| "loss": 1.5093, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 43.04, | |
| "learning_rate": 4.7333333333333336e-05, | |
| "loss": 1.5064, | |
| "step": 143500 | |
| }, | |
| { | |
| "epoch": 43.19, | |
| "learning_rate": 4.711111111111111e-05, | |
| "loss": 1.5079, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 43.34, | |
| "learning_rate": 4.6888888888888895e-05, | |
| "loss": 1.5058, | |
| "step": 144500 | |
| }, | |
| { | |
| "epoch": 43.49, | |
| "learning_rate": 4.666666666666667e-05, | |
| "loss": 1.5073, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 43.64, | |
| "learning_rate": 4.644444444444445e-05, | |
| "loss": 1.5055, | |
| "step": 145500 | |
| }, | |
| { | |
| "epoch": 43.79, | |
| "learning_rate": 4.6222222222222224e-05, | |
| "loss": 1.5056, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 43.94, | |
| "learning_rate": 4.600000000000001e-05, | |
| "loss": 1.5069, | |
| "step": 146500 | |
| }, | |
| { | |
| "epoch": 44.09, | |
| "learning_rate": 4.577777777777778e-05, | |
| "loss": 1.5028, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 44.24, | |
| "learning_rate": 4.555555555555556e-05, | |
| "loss": 1.502, | |
| "step": 147500 | |
| }, | |
| { | |
| "epoch": 44.39, | |
| "learning_rate": 4.5333333333333335e-05, | |
| "loss": 1.5019, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 44.54, | |
| "learning_rate": 4.511111111111112e-05, | |
| "loss": 1.5024, | |
| "step": 148500 | |
| }, | |
| { | |
| "epoch": 44.69, | |
| "learning_rate": 4.4888888888888894e-05, | |
| "loss": 1.503, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 44.84, | |
| "learning_rate": 4.466666666666667e-05, | |
| "loss": 1.5041, | |
| "step": 149500 | |
| }, | |
| { | |
| "epoch": 44.99, | |
| "learning_rate": 4.4444444444444447e-05, | |
| "loss": 1.5015, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 44.99, | |
| "eval_loss": 1.436716914176941, | |
| "eval_runtime": 356.4605, | |
| "eval_samples_per_second": 478.0, | |
| "eval_steps_per_second": 3.737, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 45.14, | |
| "learning_rate": 4.422222222222222e-05, | |
| "loss": 1.4997, | |
| "step": 150500 | |
| }, | |
| { | |
| "epoch": 45.29, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 1.4993, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 45.44, | |
| "learning_rate": 4.377777777777778e-05, | |
| "loss": 1.4993, | |
| "step": 151500 | |
| }, | |
| { | |
| "epoch": 45.59, | |
| "learning_rate": 4.355555555555556e-05, | |
| "loss": 1.5004, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 45.74, | |
| "learning_rate": 4.3333333333333334e-05, | |
| "loss": 1.5004, | |
| "step": 152500 | |
| }, | |
| { | |
| "epoch": 45.89, | |
| "learning_rate": 4.311111111111111e-05, | |
| "loss": 1.4977, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 46.04, | |
| "learning_rate": 4.2888888888888886e-05, | |
| "loss": 1.497, | |
| "step": 153500 | |
| }, | |
| { | |
| "epoch": 46.19, | |
| "learning_rate": 4.266666666666667e-05, | |
| "loss": 1.4996, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 46.34, | |
| "learning_rate": 4.2444444444444445e-05, | |
| "loss": 1.4968, | |
| "step": 154500 | |
| }, | |
| { | |
| "epoch": 46.49, | |
| "learning_rate": 4.222222222222222e-05, | |
| "loss": 1.4967, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 46.64, | |
| "learning_rate": 4.2e-05, | |
| "loss": 1.4952, | |
| "step": 155500 | |
| }, | |
| { | |
| "epoch": 46.79, | |
| "learning_rate": 4.177777777777778e-05, | |
| "loss": 1.4965, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 46.94, | |
| "learning_rate": 4.155555555555556e-05, | |
| "loss": 1.4986, | |
| "step": 156500 | |
| }, | |
| { | |
| "epoch": 47.09, | |
| "learning_rate": 4.133333333333333e-05, | |
| "loss": 1.495, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 47.24, | |
| "learning_rate": 4.111111111111111e-05, | |
| "loss": 1.4936, | |
| "step": 157500 | |
| }, | |
| { | |
| "epoch": 47.39, | |
| "learning_rate": 4.088888888888889e-05, | |
| "loss": 1.4948, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 47.54, | |
| "learning_rate": 4.066666666666667e-05, | |
| "loss": 1.4933, | |
| "step": 158500 | |
| }, | |
| { | |
| "epoch": 47.69, | |
| "learning_rate": 4.0444444444444444e-05, | |
| "loss": 1.493, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 47.84, | |
| "learning_rate": 4.022222222222222e-05, | |
| "loss": 1.4944, | |
| "step": 159500 | |
| }, | |
| { | |
| "epoch": 47.99, | |
| "learning_rate": 4e-05, | |
| "loss": 1.4968, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 48.14, | |
| "learning_rate": 3.977777777777778e-05, | |
| "loss": 1.491, | |
| "step": 160500 | |
| }, | |
| { | |
| "epoch": 48.29, | |
| "learning_rate": 3.9555555555555556e-05, | |
| "loss": 1.4908, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 48.44, | |
| "learning_rate": 3.933333333333333e-05, | |
| "loss": 1.4916, | |
| "step": 161500 | |
| }, | |
| { | |
| "epoch": 48.59, | |
| "learning_rate": 3.9111111111111115e-05, | |
| "loss": 1.4937, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 48.74, | |
| "learning_rate": 3.888888888888889e-05, | |
| "loss": 1.4923, | |
| "step": 162500 | |
| }, | |
| { | |
| "epoch": 48.74, | |
| "eval_loss": 1.4284732341766357, | |
| "eval_runtime": 356.8446, | |
| "eval_samples_per_second": 477.485, | |
| "eval_steps_per_second": 3.733, | |
| "step": 162500 | |
| }, | |
| { | |
| "epoch": 48.89, | |
| "learning_rate": 3.866666666666667e-05, | |
| "loss": 1.4927, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 49.04, | |
| "learning_rate": 3.844444444444444e-05, | |
| "loss": 1.4895, | |
| "step": 163500 | |
| }, | |
| { | |
| "epoch": 49.19, | |
| "learning_rate": 3.8222222222222226e-05, | |
| "loss": 1.4884, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 49.34, | |
| "learning_rate": 3.8e-05, | |
| "loss": 1.4897, | |
| "step": 164500 | |
| }, | |
| { | |
| "epoch": 49.49, | |
| "learning_rate": 3.777777777777778e-05, | |
| "loss": 1.4909, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 49.64, | |
| "learning_rate": 3.7555555555555554e-05, | |
| "loss": 1.4906, | |
| "step": 165500 | |
| }, | |
| { | |
| "epoch": 49.79, | |
| "learning_rate": 3.733333333333334e-05, | |
| "loss": 1.4859, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 49.94, | |
| "learning_rate": 3.7111111111111113e-05, | |
| "loss": 1.4898, | |
| "step": 166500 | |
| }, | |
| { | |
| "epoch": 50.09, | |
| "learning_rate": 3.688888888888889e-05, | |
| "loss": 1.4881, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 50.24, | |
| "learning_rate": 3.6666666666666666e-05, | |
| "loss": 1.4886, | |
| "step": 167500 | |
| }, | |
| { | |
| "epoch": 50.39, | |
| "learning_rate": 3.644444444444445e-05, | |
| "loss": 1.4875, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 50.54, | |
| "learning_rate": 3.6222222222222225e-05, | |
| "loss": 1.4879, | |
| "step": 168500 | |
| }, | |
| { | |
| "epoch": 50.69, | |
| "learning_rate": 3.6e-05, | |
| "loss": 1.4867, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 50.84, | |
| "learning_rate": 3.577777777777778e-05, | |
| "loss": 1.4866, | |
| "step": 169500 | |
| }, | |
| { | |
| "epoch": 50.99, | |
| "learning_rate": 3.555555555555556e-05, | |
| "loss": 1.4862, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 51.14, | |
| "learning_rate": 3.5333333333333336e-05, | |
| "loss": 1.4839, | |
| "step": 170500 | |
| }, | |
| { | |
| "epoch": 51.29, | |
| "learning_rate": 3.511111111111111e-05, | |
| "loss": 1.4848, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 51.44, | |
| "learning_rate": 3.4888888888888895e-05, | |
| "loss": 1.4861, | |
| "step": 171500 | |
| }, | |
| { | |
| "epoch": 51.59, | |
| "learning_rate": 3.466666666666667e-05, | |
| "loss": 1.4846, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 51.74, | |
| "learning_rate": 3.444444444444445e-05, | |
| "loss": 1.4853, | |
| "step": 172500 | |
| }, | |
| { | |
| "epoch": 51.89, | |
| "learning_rate": 3.4222222222222224e-05, | |
| "loss": 1.4852, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 52.04, | |
| "learning_rate": 3.4000000000000007e-05, | |
| "loss": 1.4826, | |
| "step": 173500 | |
| }, | |
| { | |
| "epoch": 52.19, | |
| "learning_rate": 3.377777777777778e-05, | |
| "loss": 1.4829, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 52.34, | |
| "learning_rate": 3.355555555555556e-05, | |
| "loss": 1.4815, | |
| "step": 174500 | |
| }, | |
| { | |
| "epoch": 52.49, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 1.4809, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 52.49, | |
| "eval_loss": 1.4197227954864502, | |
| "eval_runtime": 356.5538, | |
| "eval_samples_per_second": 477.875, | |
| "eval_steps_per_second": 3.736, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 52.64, | |
| "learning_rate": 3.311111111111112e-05, | |
| "loss": 1.4826, | |
| "step": 175500 | |
| }, | |
| { | |
| "epoch": 52.79, | |
| "learning_rate": 3.2888888888888894e-05, | |
| "loss": 1.4834, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 52.94, | |
| "learning_rate": 3.266666666666667e-05, | |
| "loss": 1.4816, | |
| "step": 176500 | |
| }, | |
| { | |
| "epoch": 53.09, | |
| "learning_rate": 3.2444444444444446e-05, | |
| "loss": 1.4801, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 53.24, | |
| "learning_rate": 3.222222222222223e-05, | |
| "loss": 1.4807, | |
| "step": 177500 | |
| }, | |
| { | |
| "epoch": 53.39, | |
| "learning_rate": 3.2000000000000005e-05, | |
| "loss": 1.4798, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 53.54, | |
| "learning_rate": 3.177777777777778e-05, | |
| "loss": 1.4792, | |
| "step": 178500 | |
| }, | |
| { | |
| "epoch": 53.69, | |
| "learning_rate": 3.155555555555556e-05, | |
| "loss": 1.4795, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 53.84, | |
| "learning_rate": 3.1333333333333334e-05, | |
| "loss": 1.4796, | |
| "step": 179500 | |
| }, | |
| { | |
| "epoch": 53.99, | |
| "learning_rate": 3.111111111111111e-05, | |
| "loss": 1.4815, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 54.14, | |
| "learning_rate": 3.088888888888889e-05, | |
| "loss": 1.4782, | |
| "step": 180500 | |
| }, | |
| { | |
| "epoch": 54.29, | |
| "learning_rate": 3.066666666666667e-05, | |
| "loss": 1.4784, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 54.44, | |
| "learning_rate": 3.044444444444445e-05, | |
| "loss": 1.478, | |
| "step": 181500 | |
| }, | |
| { | |
| "epoch": 54.59, | |
| "learning_rate": 3.0222222222222225e-05, | |
| "loss": 1.4767, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 54.74, | |
| "learning_rate": 3e-05, | |
| "loss": 1.4771, | |
| "step": 182500 | |
| }, | |
| { | |
| "epoch": 54.89, | |
| "learning_rate": 2.9777777777777777e-05, | |
| "loss": 1.4781, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 55.04, | |
| "learning_rate": 2.955555555555556e-05, | |
| "loss": 1.477, | |
| "step": 183500 | |
| }, | |
| { | |
| "epoch": 55.19, | |
| "learning_rate": 2.9333333333333336e-05, | |
| "loss": 1.4764, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 55.34, | |
| "learning_rate": 2.9111111111111112e-05, | |
| "loss": 1.4765, | |
| "step": 184500 | |
| }, | |
| { | |
| "epoch": 55.49, | |
| "learning_rate": 2.8888888888888888e-05, | |
| "loss": 1.4765, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 55.64, | |
| "learning_rate": 2.8666666666666668e-05, | |
| "loss": 1.4749, | |
| "step": 185500 | |
| }, | |
| { | |
| "epoch": 55.79, | |
| "learning_rate": 2.8444444444444447e-05, | |
| "loss": 1.4762, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 55.94, | |
| "learning_rate": 2.8222222222222223e-05, | |
| "loss": 1.4754, | |
| "step": 186500 | |
| }, | |
| { | |
| "epoch": 56.09, | |
| "learning_rate": 2.8000000000000003e-05, | |
| "loss": 1.4719, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 56.24, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 1.473, | |
| "step": 187500 | |
| }, | |
| { | |
| "epoch": 56.24, | |
| "eval_loss": 1.4130820035934448, | |
| "eval_runtime": 356.901, | |
| "eval_samples_per_second": 477.41, | |
| "eval_steps_per_second": 3.732, | |
| "step": 187500 | |
| }, | |
| { | |
| "epoch": 56.39, | |
| "learning_rate": 2.7555555555555555e-05, | |
| "loss": 1.4724, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 56.54, | |
| "learning_rate": 2.733333333333333e-05, | |
| "loss": 1.4697, | |
| "step": 188500 | |
| }, | |
| { | |
| "epoch": 56.69, | |
| "learning_rate": 2.7111111111111114e-05, | |
| "loss": 1.4728, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 56.84, | |
| "learning_rate": 2.688888888888889e-05, | |
| "loss": 1.4738, | |
| "step": 189500 | |
| }, | |
| { | |
| "epoch": 56.99, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 1.4726, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 57.14, | |
| "learning_rate": 2.6444444444444443e-05, | |
| "loss": 1.4712, | |
| "step": 190500 | |
| }, | |
| { | |
| "epoch": 57.29, | |
| "learning_rate": 2.6222222222222226e-05, | |
| "loss": 1.47, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 57.44, | |
| "learning_rate": 2.6000000000000002e-05, | |
| "loss": 1.4709, | |
| "step": 191500 | |
| }, | |
| { | |
| "epoch": 57.59, | |
| "learning_rate": 2.5777777777777778e-05, | |
| "loss": 1.469, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 57.74, | |
| "learning_rate": 2.5555555555555554e-05, | |
| "loss": 1.472, | |
| "step": 192500 | |
| }, | |
| { | |
| "epoch": 57.89, | |
| "learning_rate": 2.5333333333333337e-05, | |
| "loss": 1.473, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 58.04, | |
| "learning_rate": 2.5111111111111113e-05, | |
| "loss": 1.4681, | |
| "step": 193500 | |
| }, | |
| { | |
| "epoch": 58.19, | |
| "learning_rate": 2.488888888888889e-05, | |
| "loss": 1.4679, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 58.34, | |
| "learning_rate": 2.466666666666667e-05, | |
| "loss": 1.47, | |
| "step": 194500 | |
| }, | |
| { | |
| "epoch": 58.49, | |
| "learning_rate": 2.4444444444444445e-05, | |
| "loss": 1.4668, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 58.64, | |
| "learning_rate": 2.4222222222222224e-05, | |
| "loss": 1.4678, | |
| "step": 195500 | |
| }, | |
| { | |
| "epoch": 58.79, | |
| "learning_rate": 2.4e-05, | |
| "loss": 1.4681, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 58.94, | |
| "learning_rate": 2.377777777777778e-05, | |
| "loss": 1.4694, | |
| "step": 196500 | |
| }, | |
| { | |
| "epoch": 59.09, | |
| "learning_rate": 2.3555555555555556e-05, | |
| "loss": 1.4651, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 59.24, | |
| "learning_rate": 2.3333333333333336e-05, | |
| "loss": 1.4694, | |
| "step": 197500 | |
| }, | |
| { | |
| "epoch": 59.39, | |
| "learning_rate": 2.3111111111111112e-05, | |
| "loss": 1.4692, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 59.54, | |
| "learning_rate": 2.288888888888889e-05, | |
| "loss": 1.4675, | |
| "step": 198500 | |
| }, | |
| { | |
| "epoch": 59.69, | |
| "learning_rate": 2.2666666666666668e-05, | |
| "loss": 1.4653, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 59.84, | |
| "learning_rate": 2.2444444444444447e-05, | |
| "loss": 1.465, | |
| "step": 199500 | |
| }, | |
| { | |
| "epoch": 59.99, | |
| "learning_rate": 2.2222222222222223e-05, | |
| "loss": 1.4695, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 59.99, | |
| "eval_loss": 1.4062610864639282, | |
| "eval_runtime": 356.555, | |
| "eval_samples_per_second": 477.873, | |
| "eval_steps_per_second": 3.736, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 60.14, | |
| "learning_rate": 2.2000000000000003e-05, | |
| "loss": 1.4618, | |
| "step": 200500 | |
| }, | |
| { | |
| "epoch": 60.29, | |
| "learning_rate": 2.177777777777778e-05, | |
| "loss": 1.467, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 60.44, | |
| "learning_rate": 2.1555555555555555e-05, | |
| "loss": 1.467, | |
| "step": 201500 | |
| }, | |
| { | |
| "epoch": 60.59, | |
| "learning_rate": 2.1333333333333335e-05, | |
| "loss": 1.4646, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 60.74, | |
| "learning_rate": 2.111111111111111e-05, | |
| "loss": 1.464, | |
| "step": 202500 | |
| }, | |
| { | |
| "epoch": 60.89, | |
| "learning_rate": 2.088888888888889e-05, | |
| "loss": 1.4644, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 61.04, | |
| "learning_rate": 2.0666666666666666e-05, | |
| "loss": 1.4632, | |
| "step": 203500 | |
| }, | |
| { | |
| "epoch": 61.19, | |
| "learning_rate": 2.0444444444444446e-05, | |
| "loss": 1.4625, | |
| "step": 204000 | |
| }, | |
| { | |
| "epoch": 61.34, | |
| "learning_rate": 2.0222222222222222e-05, | |
| "loss": 1.4625, | |
| "step": 204500 | |
| }, | |
| { | |
| "epoch": 61.49, | |
| "learning_rate": 2e-05, | |
| "loss": 1.4634, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 61.64, | |
| "learning_rate": 1.9777777777777778e-05, | |
| "loss": 1.4651, | |
| "step": 205500 | |
| }, | |
| { | |
| "epoch": 61.79, | |
| "learning_rate": 1.9555555555555557e-05, | |
| "loss": 1.4617, | |
| "step": 206000 | |
| }, | |
| { | |
| "epoch": 61.94, | |
| "learning_rate": 1.9333333333333333e-05, | |
| "loss": 1.4633, | |
| "step": 206500 | |
| }, | |
| { | |
| "epoch": 62.09, | |
| "learning_rate": 1.9111111111111113e-05, | |
| "loss": 1.4604, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 62.24, | |
| "learning_rate": 1.888888888888889e-05, | |
| "loss": 1.4589, | |
| "step": 207500 | |
| }, | |
| { | |
| "epoch": 62.39, | |
| "learning_rate": 1.866666666666667e-05, | |
| "loss": 1.4619, | |
| "step": 208000 | |
| }, | |
| { | |
| "epoch": 62.54, | |
| "learning_rate": 1.8444444444444445e-05, | |
| "loss": 1.4625, | |
| "step": 208500 | |
| }, | |
| { | |
| "epoch": 62.69, | |
| "learning_rate": 1.8222222222222224e-05, | |
| "loss": 1.4613, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 62.84, | |
| "learning_rate": 1.8e-05, | |
| "loss": 1.4587, | |
| "step": 209500 | |
| }, | |
| { | |
| "epoch": 62.99, | |
| "learning_rate": 1.777777777777778e-05, | |
| "loss": 1.459, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 63.14, | |
| "learning_rate": 1.7555555555555556e-05, | |
| "loss": 1.4563, | |
| "step": 210500 | |
| }, | |
| { | |
| "epoch": 63.29, | |
| "learning_rate": 1.7333333333333336e-05, | |
| "loss": 1.4609, | |
| "step": 211000 | |
| }, | |
| { | |
| "epoch": 63.44, | |
| "learning_rate": 1.7111111111111112e-05, | |
| "loss": 1.4587, | |
| "step": 211500 | |
| }, | |
| { | |
| "epoch": 63.59, | |
| "learning_rate": 1.688888888888889e-05, | |
| "loss": 1.46, | |
| "step": 212000 | |
| }, | |
| { | |
| "epoch": 63.74, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 1.46, | |
| "step": 212500 | |
| }, | |
| { | |
| "epoch": 63.74, | |
| "eval_loss": 1.3980076313018799, | |
| "eval_runtime": 356.9089, | |
| "eval_samples_per_second": 477.399, | |
| "eval_steps_per_second": 3.732, | |
| "step": 212500 | |
| }, | |
| { | |
| "epoch": 63.89, | |
| "learning_rate": 1.6444444444444447e-05, | |
| "loss": 1.4591, | |
| "step": 213000 | |
| }, | |
| { | |
| "epoch": 64.04, | |
| "learning_rate": 1.6222222222222223e-05, | |
| "loss": 1.4595, | |
| "step": 213500 | |
| }, | |
| { | |
| "epoch": 64.19, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 1.4565, | |
| "step": 214000 | |
| }, | |
| { | |
| "epoch": 64.34, | |
| "learning_rate": 1.577777777777778e-05, | |
| "loss": 1.4577, | |
| "step": 214500 | |
| }, | |
| { | |
| "epoch": 64.49, | |
| "learning_rate": 1.5555555555555555e-05, | |
| "loss": 1.4576, | |
| "step": 215000 | |
| }, | |
| { | |
| "epoch": 64.64, | |
| "learning_rate": 1.5333333333333334e-05, | |
| "loss": 1.4565, | |
| "step": 215500 | |
| }, | |
| { | |
| "epoch": 64.79, | |
| "learning_rate": 1.5111111111111112e-05, | |
| "loss": 1.4582, | |
| "step": 216000 | |
| }, | |
| { | |
| "epoch": 64.94, | |
| "learning_rate": 1.4888888888888888e-05, | |
| "loss": 1.4564, | |
| "step": 216500 | |
| }, | |
| { | |
| "epoch": 65.09, | |
| "learning_rate": 1.4666666666666668e-05, | |
| "loss": 1.4555, | |
| "step": 217000 | |
| }, | |
| { | |
| "epoch": 65.24, | |
| "learning_rate": 1.4444444444444444e-05, | |
| "loss": 1.457, | |
| "step": 217500 | |
| }, | |
| { | |
| "epoch": 65.39, | |
| "learning_rate": 1.4222222222222224e-05, | |
| "loss": 1.4554, | |
| "step": 218000 | |
| }, | |
| { | |
| "epoch": 65.54, | |
| "learning_rate": 1.4000000000000001e-05, | |
| "loss": 1.4575, | |
| "step": 218500 | |
| }, | |
| { | |
| "epoch": 65.69, | |
| "learning_rate": 1.3777777777777778e-05, | |
| "loss": 1.4558, | |
| "step": 219000 | |
| }, | |
| { | |
| "epoch": 65.84, | |
| "learning_rate": 1.3555555555555557e-05, | |
| "loss": 1.4566, | |
| "step": 219500 | |
| }, | |
| { | |
| "epoch": 65.99, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 1.4551, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 66.14, | |
| "learning_rate": 1.3111111111111113e-05, | |
| "loss": 1.4547, | |
| "step": 220500 | |
| }, | |
| { | |
| "epoch": 66.29, | |
| "learning_rate": 1.2888888888888889e-05, | |
| "loss": 1.4553, | |
| "step": 221000 | |
| }, | |
| { | |
| "epoch": 66.44, | |
| "learning_rate": 1.2666666666666668e-05, | |
| "loss": 1.4525, | |
| "step": 221500 | |
| }, | |
| { | |
| "epoch": 66.59, | |
| "learning_rate": 1.2444444444444445e-05, | |
| "loss": 1.4544, | |
| "step": 222000 | |
| }, | |
| { | |
| "epoch": 66.74, | |
| "learning_rate": 1.2222222222222222e-05, | |
| "loss": 1.4528, | |
| "step": 222500 | |
| }, | |
| { | |
| "epoch": 66.89, | |
| "learning_rate": 1.2e-05, | |
| "loss": 1.4531, | |
| "step": 223000 | |
| }, | |
| { | |
| "epoch": 67.04, | |
| "learning_rate": 1.1777777777777778e-05, | |
| "loss": 1.4533, | |
| "step": 223500 | |
| }, | |
| { | |
| "epoch": 67.19, | |
| "learning_rate": 1.1555555555555556e-05, | |
| "loss": 1.452, | |
| "step": 224000 | |
| }, | |
| { | |
| "epoch": 67.34, | |
| "learning_rate": 1.1333333333333334e-05, | |
| "loss": 1.4526, | |
| "step": 224500 | |
| }, | |
| { | |
| "epoch": 67.49, | |
| "learning_rate": 1.1111111111111112e-05, | |
| "loss": 1.451, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 67.49, | |
| "eval_loss": 1.3923670053482056, | |
| "eval_runtime": 356.8892, | |
| "eval_samples_per_second": 477.426, | |
| "eval_steps_per_second": 3.732, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 67.64, | |
| "learning_rate": 1.088888888888889e-05, | |
| "loss": 1.4535, | |
| "step": 225500 | |
| }, | |
| { | |
| "epoch": 67.79, | |
| "learning_rate": 1.0666666666666667e-05, | |
| "loss": 1.4527, | |
| "step": 226000 | |
| }, | |
| { | |
| "epoch": 67.94, | |
| "learning_rate": 1.0444444444444445e-05, | |
| "loss": 1.4516, | |
| "step": 226500 | |
| }, | |
| { | |
| "epoch": 68.09, | |
| "learning_rate": 1.0222222222222223e-05, | |
| "loss": 1.4526, | |
| "step": 227000 | |
| }, | |
| { | |
| "epoch": 68.24, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4517, | |
| "step": 227500 | |
| }, | |
| { | |
| "epoch": 68.39, | |
| "learning_rate": 9.777777777777779e-06, | |
| "loss": 1.4511, | |
| "step": 228000 | |
| }, | |
| { | |
| "epoch": 68.54, | |
| "learning_rate": 9.555555555555556e-06, | |
| "loss": 1.4507, | |
| "step": 228500 | |
| }, | |
| { | |
| "epoch": 68.69, | |
| "learning_rate": 9.333333333333334e-06, | |
| "loss": 1.4486, | |
| "step": 229000 | |
| }, | |
| { | |
| "epoch": 68.84, | |
| "learning_rate": 9.111111111111112e-06, | |
| "loss": 1.4531, | |
| "step": 229500 | |
| }, | |
| { | |
| "epoch": 68.99, | |
| "learning_rate": 8.88888888888889e-06, | |
| "loss": 1.4512, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 69.14, | |
| "learning_rate": 8.666666666666668e-06, | |
| "loss": 1.4494, | |
| "step": 230500 | |
| }, | |
| { | |
| "epoch": 69.29, | |
| "learning_rate": 8.444444444444446e-06, | |
| "loss": 1.4524, | |
| "step": 231000 | |
| }, | |
| { | |
| "epoch": 69.44, | |
| "learning_rate": 8.222222222222223e-06, | |
| "loss": 1.4509, | |
| "step": 231500 | |
| }, | |
| { | |
| "epoch": 69.59, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 1.45, | |
| "step": 232000 | |
| }, | |
| { | |
| "epoch": 69.74, | |
| "learning_rate": 7.777777777777777e-06, | |
| "loss": 1.4468, | |
| "step": 232500 | |
| }, | |
| { | |
| "epoch": 69.89, | |
| "learning_rate": 7.555555555555556e-06, | |
| "loss": 1.4463, | |
| "step": 233000 | |
| }, | |
| { | |
| "epoch": 70.04, | |
| "learning_rate": 7.333333333333334e-06, | |
| "loss": 1.4479, | |
| "step": 233500 | |
| }, | |
| { | |
| "epoch": 70.19, | |
| "learning_rate": 7.111111111111112e-06, | |
| "loss": 1.45, | |
| "step": 234000 | |
| }, | |
| { | |
| "epoch": 70.34, | |
| "learning_rate": 6.888888888888889e-06, | |
| "loss": 1.4478, | |
| "step": 234500 | |
| }, | |
| { | |
| "epoch": 70.49, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 1.448, | |
| "step": 235000 | |
| }, | |
| { | |
| "epoch": 70.64, | |
| "learning_rate": 6.4444444444444445e-06, | |
| "loss": 1.4477, | |
| "step": 235500 | |
| }, | |
| { | |
| "epoch": 70.79, | |
| "learning_rate": 6.222222222222222e-06, | |
| "loss": 1.4467, | |
| "step": 236000 | |
| }, | |
| { | |
| "epoch": 70.94, | |
| "learning_rate": 6e-06, | |
| "loss": 1.447, | |
| "step": 236500 | |
| }, | |
| { | |
| "epoch": 71.09, | |
| "learning_rate": 5.777777777777778e-06, | |
| "loss": 1.4475, | |
| "step": 237000 | |
| }, | |
| { | |
| "epoch": 71.24, | |
| "learning_rate": 5.555555555555556e-06, | |
| "loss": 1.4435, | |
| "step": 237500 | |
| }, | |
| { | |
| "epoch": 71.24, | |
| "eval_loss": 1.3885681629180908, | |
| "eval_runtime": 356.9911, | |
| "eval_samples_per_second": 477.289, | |
| "eval_steps_per_second": 3.731, | |
| "step": 237500 | |
| }, | |
| { | |
| "epoch": 71.39, | |
| "learning_rate": 5.333333333333334e-06, | |
| "loss": 1.4468, | |
| "step": 238000 | |
| }, | |
| { | |
| "epoch": 71.54, | |
| "learning_rate": 5.1111111111111115e-06, | |
| "loss": 1.446, | |
| "step": 238500 | |
| }, | |
| { | |
| "epoch": 71.69, | |
| "learning_rate": 4.888888888888889e-06, | |
| "loss": 1.4476, | |
| "step": 239000 | |
| }, | |
| { | |
| "epoch": 71.84, | |
| "learning_rate": 4.666666666666667e-06, | |
| "loss": 1.4479, | |
| "step": 239500 | |
| }, | |
| { | |
| "epoch": 71.99, | |
| "learning_rate": 4.444444444444445e-06, | |
| "loss": 1.4454, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 72.14, | |
| "learning_rate": 4.222222222222223e-06, | |
| "loss": 1.4464, | |
| "step": 240500 | |
| }, | |
| { | |
| "epoch": 72.29, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 1.445, | |
| "step": 241000 | |
| }, | |
| { | |
| "epoch": 72.44, | |
| "learning_rate": 3.777777777777778e-06, | |
| "loss": 1.4462, | |
| "step": 241500 | |
| }, | |
| { | |
| "epoch": 72.59, | |
| "learning_rate": 3.555555555555556e-06, | |
| "loss": 1.4457, | |
| "step": 242000 | |
| }, | |
| { | |
| "epoch": 72.74, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 1.4465, | |
| "step": 242500 | |
| }, | |
| { | |
| "epoch": 72.89, | |
| "learning_rate": 3.111111111111111e-06, | |
| "loss": 1.4463, | |
| "step": 243000 | |
| }, | |
| { | |
| "epoch": 73.04, | |
| "learning_rate": 2.888888888888889e-06, | |
| "loss": 1.4453, | |
| "step": 243500 | |
| }, | |
| { | |
| "epoch": 73.19, | |
| "learning_rate": 2.666666666666667e-06, | |
| "loss": 1.444, | |
| "step": 244000 | |
| }, | |
| { | |
| "epoch": 73.34, | |
| "learning_rate": 2.4444444444444447e-06, | |
| "loss": 1.4426, | |
| "step": 244500 | |
| }, | |
| { | |
| "epoch": 73.49, | |
| "learning_rate": 2.2222222222222225e-06, | |
| "loss": 1.4431, | |
| "step": 245000 | |
| }, | |
| { | |
| "epoch": 73.64, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 1.4438, | |
| "step": 245500 | |
| }, | |
| { | |
| "epoch": 73.79, | |
| "learning_rate": 1.777777777777778e-06, | |
| "loss": 1.4437, | |
| "step": 246000 | |
| }, | |
| { | |
| "epoch": 73.94, | |
| "learning_rate": 1.5555555555555556e-06, | |
| "loss": 1.444, | |
| "step": 246500 | |
| }, | |
| { | |
| "epoch": 74.09, | |
| "learning_rate": 1.3333333333333334e-06, | |
| "loss": 1.444, | |
| "step": 247000 | |
| }, | |
| { | |
| "epoch": 74.24, | |
| "learning_rate": 1.1111111111111112e-06, | |
| "loss": 1.4457, | |
| "step": 247500 | |
| }, | |
| { | |
| "epoch": 74.39, | |
| "learning_rate": 8.88888888888889e-07, | |
| "loss": 1.4434, | |
| "step": 248000 | |
| }, | |
| { | |
| "epoch": 74.54, | |
| "learning_rate": 6.666666666666667e-07, | |
| "loss": 1.4429, | |
| "step": 248500 | |
| }, | |
| { | |
| "epoch": 74.69, | |
| "learning_rate": 4.444444444444445e-07, | |
| "loss": 1.4426, | |
| "step": 249000 | |
| }, | |
| { | |
| "epoch": 74.84, | |
| "learning_rate": 2.2222222222222224e-07, | |
| "loss": 1.4413, | |
| "step": 249500 | |
| }, | |
| { | |
| "epoch": 74.99, | |
| "learning_rate": 0.0, | |
| "loss": 1.4436, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 74.99, | |
| "eval_loss": 1.3831435441970825, | |
| "eval_runtime": 356.4648, | |
| "eval_samples_per_second": 477.994, | |
| "eval_steps_per_second": 3.737, | |
| "step": 250000 | |
| } | |
| ], | |
| "max_steps": 250000, | |
| "num_train_epochs": 75, | |
| "total_flos": 4.212247412736e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |