| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "global_step": 3338128, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.9925557078698005e-05, | |
| "loss": 5.3279, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_accuracy": 0.31327971235572855, | |
| "eval_loss": 3.994140625, | |
| "eval_runtime": 39.5933, | |
| "eval_samples_per_second": 90.116, | |
| "eval_steps_per_second": 11.265, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.985067978220128e-05, | |
| "loss": 3.5754, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_accuracy": 0.3823957607318666, | |
| "eval_loss": 3.310546875, | |
| "eval_runtime": 39.5685, | |
| "eval_samples_per_second": 90.173, | |
| "eval_steps_per_second": 11.272, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.9775862399524525e-05, | |
| "loss": 3.6102, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_accuracy": 0.39768097216925513, | |
| "eval_loss": 3.166015625, | |
| "eval_runtime": 39.5825, | |
| "eval_samples_per_second": 90.141, | |
| "eval_steps_per_second": 11.268, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.970101505993779e-05, | |
| "loss": 3.0639, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_accuracy": 0.41336891627105715, | |
| "eval_loss": 3.021484375, | |
| "eval_runtime": 39.5961, | |
| "eval_samples_per_second": 90.11, | |
| "eval_steps_per_second": 11.264, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.962618269880604e-05, | |
| "loss": 2.9477, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_accuracy": 0.425242680676284, | |
| "eval_loss": 2.919921875, | |
| "eval_runtime": 39.6071, | |
| "eval_samples_per_second": 90.085, | |
| "eval_steps_per_second": 11.261, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9551335359219304e-05, | |
| "loss": 2.8589, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_accuracy": 0.4315466797294513, | |
| "eval_loss": 2.8671875, | |
| "eval_runtime": 39.6079, | |
| "eval_samples_per_second": 90.083, | |
| "eval_steps_per_second": 11.26, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.947647304117757e-05, | |
| "loss": 2.8063, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_accuracy": 0.43875449855125825, | |
| "eval_loss": 2.802734375, | |
| "eval_runtime": 39.6873, | |
| "eval_samples_per_second": 89.903, | |
| "eval_steps_per_second": 11.238, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.940162570159084e-05, | |
| "loss": 2.7646, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_accuracy": 0.44185690990623727, | |
| "eval_loss": 2.771484375, | |
| "eval_runtime": 39.6738, | |
| "eval_samples_per_second": 89.933, | |
| "eval_steps_per_second": 11.242, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.932679334045909e-05, | |
| "loss": 2.7306, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_accuracy": 0.4467244957896629, | |
| "eval_loss": 2.736328125, | |
| "eval_runtime": 39.7305, | |
| "eval_samples_per_second": 89.805, | |
| "eval_steps_per_second": 11.226, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9251960979327336e-05, | |
| "loss": 2.7106, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_accuracy": 0.4492548623804952, | |
| "eval_loss": 2.712890625, | |
| "eval_runtime": 39.6605, | |
| "eval_samples_per_second": 89.964, | |
| "eval_steps_per_second": 11.245, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.917712861819559e-05, | |
| "loss": 2.6829, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_accuracy": 0.45224302916332426, | |
| "eval_loss": 2.689453125, | |
| "eval_runtime": 39.6345, | |
| "eval_samples_per_second": 90.023, | |
| "eval_steps_per_second": 11.253, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9102311235518835e-05, | |
| "loss": 2.6703, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_accuracy": 0.45370464737056665, | |
| "eval_loss": 2.67578125, | |
| "eval_runtime": 39.6085, | |
| "eval_samples_per_second": 90.082, | |
| "eval_steps_per_second": 11.26, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.90274489174771e-05, | |
| "loss": 2.6522, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_accuracy": 0.4559714569388372, | |
| "eval_loss": 2.66015625, | |
| "eval_runtime": 39.7158, | |
| "eval_samples_per_second": 89.838, | |
| "eval_steps_per_second": 11.23, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.895260157789037e-05, | |
| "loss": 2.6377, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_accuracy": 0.4573840349100728, | |
| "eval_loss": 2.6484375, | |
| "eval_runtime": 40.0228, | |
| "eval_samples_per_second": 89.149, | |
| "eval_steps_per_second": 11.144, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.8877769216758615e-05, | |
| "loss": 2.6241, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_accuracy": 0.4586505880444836, | |
| "eval_loss": 2.634765625, | |
| "eval_runtime": 39.747, | |
| "eval_samples_per_second": 89.768, | |
| "eval_steps_per_second": 11.221, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.880295183408186e-05, | |
| "loss": 2.6159, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_accuracy": 0.46040042037619067, | |
| "eval_loss": 2.625, | |
| "eval_runtime": 39.8297, | |
| "eval_samples_per_second": 89.581, | |
| "eval_steps_per_second": 11.198, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.872807453758514e-05, | |
| "loss": 2.5959, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_accuracy": 0.4612864322379005, | |
| "eval_loss": 2.61328125, | |
| "eval_runtime": 39.842, | |
| "eval_samples_per_second": 89.554, | |
| "eval_steps_per_second": 11.194, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.86532271979984e-05, | |
| "loss": 2.5877, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_accuracy": 0.4624083303744811, | |
| "eval_loss": 2.603515625, | |
| "eval_runtime": 39.7194, | |
| "eval_samples_per_second": 89.83, | |
| "eval_steps_per_second": 11.229, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.857840981532165e-05, | |
| "loss": 2.5832, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_accuracy": 0.46323187757803697, | |
| "eval_loss": 2.599609375, | |
| "eval_runtime": 40.0969, | |
| "eval_samples_per_second": 88.984, | |
| "eval_steps_per_second": 11.123, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.8503562475734907e-05, | |
| "loss": 2.5726, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_accuracy": 0.46476143979941176, | |
| "eval_loss": 2.5859375, | |
| "eval_runtime": 39.6873, | |
| "eval_samples_per_second": 89.903, | |
| "eval_steps_per_second": 11.238, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.8428775049968125e-05, | |
| "loss": 2.5723, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_accuracy": 0.46553211121777593, | |
| "eval_loss": 2.580078125, | |
| "eval_runtime": 39.7946, | |
| "eval_samples_per_second": 89.66, | |
| "eval_steps_per_second": 11.208, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.83539127319264e-05, | |
| "loss": 2.5584, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_accuracy": 0.46414501225183996, | |
| "eval_loss": 2.59375, | |
| "eval_runtime": 39.8172, | |
| "eval_samples_per_second": 89.61, | |
| "eval_steps_per_second": 11.201, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.827905041388467e-05, | |
| "loss": 2.5541, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_accuracy": 0.4673400247228542, | |
| "eval_loss": 2.56640625, | |
| "eval_runtime": 39.875, | |
| "eval_samples_per_second": 89.48, | |
| "eval_steps_per_second": 11.185, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.820423303120791e-05, | |
| "loss": 2.541, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_accuracy": 0.46835206177206756, | |
| "eval_loss": 2.55859375, | |
| "eval_runtime": 39.7895, | |
| "eval_samples_per_second": 89.672, | |
| "eval_steps_per_second": 11.209, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.812941564853116e-05, | |
| "loss": 2.5359, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_accuracy": 0.4673909827334534, | |
| "eval_loss": 2.564453125, | |
| "eval_runtime": 39.8856, | |
| "eval_samples_per_second": 89.456, | |
| "eval_steps_per_second": 11.182, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.805458328739941e-05, | |
| "loss": 2.5298, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_accuracy": 0.4699054591919484, | |
| "eval_loss": 2.544921875, | |
| "eval_runtime": 39.8462, | |
| "eval_samples_per_second": 89.544, | |
| "eval_steps_per_second": 11.193, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.797972096935768e-05, | |
| "loss": 2.5258, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_accuracy": 0.47030819185636197, | |
| "eval_loss": 2.541015625, | |
| "eval_runtime": 39.8831, | |
| "eval_samples_per_second": 89.461, | |
| "eval_steps_per_second": 11.183, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.790488860822593e-05, | |
| "loss": 2.5207, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_accuracy": 0.47090352388341683, | |
| "eval_loss": 2.537109375, | |
| "eval_runtime": 40.0239, | |
| "eval_samples_per_second": 89.147, | |
| "eval_steps_per_second": 11.143, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.783005624709418e-05, | |
| "loss": 2.5167, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_accuracy": 0.47193419074295684, | |
| "eval_loss": 2.53125, | |
| "eval_runtime": 39.9144, | |
| "eval_samples_per_second": 89.391, | |
| "eval_steps_per_second": 11.174, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.7755223885962435e-05, | |
| "loss": 2.5101, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_accuracy": 0.4701947691876088, | |
| "eval_loss": 2.544921875, | |
| "eval_runtime": 40.0269, | |
| "eval_samples_per_second": 89.14, | |
| "eval_steps_per_second": 11.143, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.768039152483069e-05, | |
| "loss": 2.5058, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_accuracy": 0.4730330755844281, | |
| "eval_loss": 2.521484375, | |
| "eval_runtime": 39.9333, | |
| "eval_samples_per_second": 89.349, | |
| "eval_steps_per_second": 11.169, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.760554418524395e-05, | |
| "loss": 2.5021, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_accuracy": 0.473403480048569, | |
| "eval_loss": 2.51953125, | |
| "eval_runtime": 40.0331, | |
| "eval_samples_per_second": 89.126, | |
| "eval_steps_per_second": 11.141, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.753084663020711e-05, | |
| "loss": 2.8135, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_accuracy": 0.4317466762226635, | |
| "eval_loss": 2.83203125, | |
| "eval_runtime": 40.0127, | |
| "eval_samples_per_second": 89.172, | |
| "eval_steps_per_second": 11.146, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.7455954355255405e-05, | |
| "loss": 2.7932, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_accuracy": 0.4729974597705684, | |
| "eval_loss": 2.521484375, | |
| "eval_runtime": 39.9612, | |
| "eval_samples_per_second": 89.287, | |
| "eval_steps_per_second": 11.161, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.7381077058758686e-05, | |
| "loss": 2.4914, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_accuracy": 0.4751818050313638, | |
| "eval_loss": 2.505859375, | |
| "eval_runtime": 40.3867, | |
| "eval_samples_per_second": 88.346, | |
| "eval_steps_per_second": 11.043, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.730621474071695e-05, | |
| "loss": 2.487, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_accuracy": 0.4753875548483533, | |
| "eval_loss": 2.50390625, | |
| "eval_runtime": 40.0145, | |
| "eval_samples_per_second": 89.168, | |
| "eval_steps_per_second": 11.146, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.723138237958521e-05, | |
| "loss": 2.4829, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_accuracy": 0.47510947753244875, | |
| "eval_loss": 2.50390625, | |
| "eval_runtime": 40.0338, | |
| "eval_samples_per_second": 89.125, | |
| "eval_steps_per_second": 11.141, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.7156505083088486e-05, | |
| "loss": 2.4778, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_accuracy": 0.47625877244892145, | |
| "eval_loss": 2.49609375, | |
| "eval_runtime": 40.0473, | |
| "eval_samples_per_second": 89.095, | |
| "eval_steps_per_second": 11.137, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.708170267886672e-05, | |
| "loss": 2.4779, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_accuracy": 0.4770064305721763, | |
| "eval_loss": 2.4921875, | |
| "eval_runtime": 40.1061, | |
| "eval_samples_per_second": 88.964, | |
| "eval_steps_per_second": 11.121, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.700687031773498e-05, | |
| "loss": 2.4685, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_accuracy": 0.4765894515822188, | |
| "eval_loss": 2.494140625, | |
| "eval_runtime": 40.1514, | |
| "eval_samples_per_second": 88.864, | |
| "eval_steps_per_second": 11.108, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.693202297814823e-05, | |
| "loss": 2.4661, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_accuracy": 0.47763491270290054, | |
| "eval_loss": 2.484375, | |
| "eval_runtime": 40.1854, | |
| "eval_samples_per_second": 88.788, | |
| "eval_steps_per_second": 11.099, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.6857190617016483e-05, | |
| "loss": 2.4579, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_accuracy": 0.47826366880142374, | |
| "eval_loss": 2.48046875, | |
| "eval_runtime": 40.1794, | |
| "eval_samples_per_second": 88.802, | |
| "eval_steps_per_second": 11.1, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.6782358255884736e-05, | |
| "loss": 2.4589, | |
| "step": 215000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_accuracy": 0.4787937964923355, | |
| "eval_loss": 2.478515625, | |
| "eval_runtime": 40.1613, | |
| "eval_samples_per_second": 88.842, | |
| "eval_steps_per_second": 11.105, | |
| "step": 215000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.6707540873207975e-05, | |
| "loss": 2.4571, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_accuracy": 0.47927871949642525, | |
| "eval_loss": 2.474609375, | |
| "eval_runtime": 40.1097, | |
| "eval_samples_per_second": 88.956, | |
| "eval_steps_per_second": 11.119, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.663269353362124e-05, | |
| "loss": 2.4504, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_accuracy": 0.47965542521994137, | |
| "eval_loss": 2.47265625, | |
| "eval_runtime": 40.1284, | |
| "eval_samples_per_second": 88.915, | |
| "eval_steps_per_second": 11.114, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.65578461940345e-05, | |
| "loss": 2.4538, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_accuracy": 0.47995733773435206, | |
| "eval_loss": 2.46875, | |
| "eval_runtime": 40.2242, | |
| "eval_samples_per_second": 88.703, | |
| "eval_steps_per_second": 11.088, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.6483013832902755e-05, | |
| "loss": 2.4481, | |
| "step": 235000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_accuracy": 0.4806011620618159, | |
| "eval_loss": 2.466796875, | |
| "eval_runtime": 40.1909, | |
| "eval_samples_per_second": 88.776, | |
| "eval_steps_per_second": 11.097, | |
| "step": 235000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.640815151486103e-05, | |
| "loss": 2.4454, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_accuracy": 0.4809529367156302, | |
| "eval_loss": 2.4609375, | |
| "eval_runtime": 40.319, | |
| "eval_samples_per_second": 88.494, | |
| "eval_steps_per_second": 11.062, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.6333319153729274e-05, | |
| "loss": 2.44, | |
| "step": 245000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_accuracy": 0.4811408786256898, | |
| "eval_loss": 2.458984375, | |
| "eval_runtime": 40.235, | |
| "eval_samples_per_second": 88.679, | |
| "eval_steps_per_second": 11.085, | |
| "step": 245000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.625850177105252e-05, | |
| "loss": 2.4392, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_accuracy": 0.4810606060606061, | |
| "eval_loss": 2.458984375, | |
| "eval_runtime": 40.2635, | |
| "eval_samples_per_second": 88.616, | |
| "eval_steps_per_second": 11.077, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.618366940992077e-05, | |
| "loss": 2.431, | |
| "step": 255000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_accuracy": 0.48131512214580346, | |
| "eval_loss": 2.45703125, | |
| "eval_runtime": 40.2108, | |
| "eval_samples_per_second": 88.732, | |
| "eval_steps_per_second": 11.092, | |
| "step": 255000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.610885202724401e-05, | |
| "loss": 2.4377, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_accuracy": 0.482264420569064, | |
| "eval_loss": 2.451171875, | |
| "eval_runtime": 40.1835, | |
| "eval_samples_per_second": 88.793, | |
| "eval_steps_per_second": 11.099, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.6033959752292307e-05, | |
| "loss": 2.4299, | |
| "step": 265000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_accuracy": 0.4825972914447528, | |
| "eval_loss": 2.447265625, | |
| "eval_runtime": 40.2876, | |
| "eval_samples_per_second": 88.563, | |
| "eval_steps_per_second": 11.07, | |
| "step": 265000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.5959142369615546e-05, | |
| "loss": 2.4283, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_accuracy": 0.482810712360112, | |
| "eval_loss": 2.447265625, | |
| "eval_runtime": 40.3402, | |
| "eval_samples_per_second": 88.448, | |
| "eval_steps_per_second": 11.056, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.5884295030028805e-05, | |
| "loss": 2.4256, | |
| "step": 275000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_accuracy": 0.48325371829096697, | |
| "eval_loss": 2.443359375, | |
| "eval_runtime": 40.3191, | |
| "eval_samples_per_second": 88.494, | |
| "eval_steps_per_second": 11.062, | |
| "step": 275000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.5809462668897065e-05, | |
| "loss": 2.4198, | |
| "step": 280000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_accuracy": 0.48383589986367365, | |
| "eval_loss": 2.44140625, | |
| "eval_runtime": 40.3148, | |
| "eval_samples_per_second": 88.503, | |
| "eval_steps_per_second": 11.063, | |
| "step": 280000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.573461532931032e-05, | |
| "loss": 2.4174, | |
| "step": 285000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_accuracy": 0.4840175405143581, | |
| "eval_loss": 2.44140625, | |
| "eval_runtime": 40.703, | |
| "eval_samples_per_second": 87.659, | |
| "eval_steps_per_second": 10.957, | |
| "step": 285000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.565978296817857e-05, | |
| "loss": 2.4151, | |
| "step": 290000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_accuracy": 0.4844402728280929, | |
| "eval_loss": 2.435546875, | |
| "eval_runtime": 41.2392, | |
| "eval_samples_per_second": 86.52, | |
| "eval_steps_per_second": 10.815, | |
| "step": 290000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.558493562859184e-05, | |
| "loss": 2.4191, | |
| "step": 295000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_accuracy": 0.4847421853425036, | |
| "eval_loss": 2.43359375, | |
| "eval_runtime": 40.6977, | |
| "eval_samples_per_second": 87.671, | |
| "eval_steps_per_second": 10.959, | |
| "step": 295000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.55100882890051e-05, | |
| "loss": 2.4071, | |
| "step": 300000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_accuracy": 0.4848361562975334, | |
| "eval_loss": 2.431640625, | |
| "eval_runtime": 40.2058, | |
| "eval_samples_per_second": 88.744, | |
| "eval_steps_per_second": 11.093, | |
| "step": 300000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.543524094941836e-05, | |
| "loss": 2.4126, | |
| "step": 305000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_accuracy": 0.48549011743355736, | |
| "eval_loss": 2.427734375, | |
| "eval_runtime": 40.4122, | |
| "eval_samples_per_second": 88.29, | |
| "eval_steps_per_second": 11.036, | |
| "step": 305000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.536045352365158e-05, | |
| "loss": 2.4053, | |
| "step": 310000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_accuracy": 0.48513834277974305, | |
| "eval_loss": 2.4296875, | |
| "eval_runtime": 40.2632, | |
| "eval_samples_per_second": 88.617, | |
| "eval_steps_per_second": 11.077, | |
| "step": 310000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.528559120560985e-05, | |
| "loss": 2.4071, | |
| "step": 315000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_accuracy": 0.4857840848817993, | |
| "eval_loss": 2.42578125, | |
| "eval_runtime": 40.3587, | |
| "eval_samples_per_second": 88.407, | |
| "eval_steps_per_second": 11.051, | |
| "step": 315000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.5210743866023116e-05, | |
| "loss": 2.4027, | |
| "step": 320000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_accuracy": 0.486615303183725, | |
| "eval_loss": 2.421875, | |
| "eval_runtime": 40.2594, | |
| "eval_samples_per_second": 88.625, | |
| "eval_steps_per_second": 11.078, | |
| "step": 320000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.513591150489136e-05, | |
| "loss": 2.4013, | |
| "step": 325000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_accuracy": 0.4867040687505753, | |
| "eval_loss": 2.41796875, | |
| "eval_runtime": 40.2109, | |
| "eval_samples_per_second": 88.732, | |
| "eval_steps_per_second": 11.092, | |
| "step": 325000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.5061049186849636e-05, | |
| "loss": 2.4032, | |
| "step": 330000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_accuracy": 0.4866416040924214, | |
| "eval_loss": 2.41796875, | |
| "eval_runtime": 40.2756, | |
| "eval_samples_per_second": 88.59, | |
| "eval_steps_per_second": 11.074, | |
| "step": 330000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.498621682571789e-05, | |
| "loss": 2.3919, | |
| "step": 335000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_accuracy": 0.4870514599196069, | |
| "eval_loss": 2.416015625, | |
| "eval_runtime": 40.3049, | |
| "eval_samples_per_second": 88.525, | |
| "eval_steps_per_second": 11.066, | |
| "step": 335000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.491136948613115e-05, | |
| "loss": 2.3936, | |
| "step": 340000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_accuracy": 0.4872848804842874, | |
| "eval_loss": 2.4140625, | |
| "eval_runtime": 40.3719, | |
| "eval_samples_per_second": 88.378, | |
| "eval_steps_per_second": 11.047, | |
| "step": 340000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.48365371249994e-05, | |
| "loss": 2.3905, | |
| "step": 345000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_accuracy": 0.48784870621446635, | |
| "eval_loss": 2.41015625, | |
| "eval_runtime": 40.4162, | |
| "eval_samples_per_second": 88.281, | |
| "eval_steps_per_second": 11.035, | |
| "step": 345000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.4761704763867654e-05, | |
| "loss": 2.3889, | |
| "step": 350000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_accuracy": 0.4881240438523818, | |
| "eval_loss": 2.41015625, | |
| "eval_runtime": 40.2942, | |
| "eval_samples_per_second": 88.549, | |
| "eval_steps_per_second": 11.069, | |
| "step": 350000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.468688738119089e-05, | |
| "loss": 2.3866, | |
| "step": 355000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_accuracy": 0.48837801200198133, | |
| "eval_loss": 2.408203125, | |
| "eval_runtime": 40.2476, | |
| "eval_samples_per_second": 88.651, | |
| "eval_steps_per_second": 11.081, | |
| "step": 355000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.4612025063149173e-05, | |
| "loss": 2.3823, | |
| "step": 360000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_accuracy": 0.48875663550008985, | |
| "eval_loss": 2.40625, | |
| "eval_runtime": 40.3148, | |
| "eval_samples_per_second": 88.504, | |
| "eval_steps_per_second": 11.063, | |
| "step": 360000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.4537177723562427e-05, | |
| "loss": 2.3828, | |
| "step": 365000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_accuracy": 0.48881882619044487, | |
| "eval_loss": 2.40234375, | |
| "eval_runtime": 40.2734, | |
| "eval_samples_per_second": 88.594, | |
| "eval_steps_per_second": 11.074, | |
| "step": 365000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.446233038397569e-05, | |
| "loss": 2.3795, | |
| "step": 370000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_accuracy": 0.48893718027957866, | |
| "eval_loss": 2.400390625, | |
| "eval_runtime": 40.2791, | |
| "eval_samples_per_second": 88.582, | |
| "eval_steps_per_second": 11.073, | |
| "step": 370000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.43875729151189e-05, | |
| "loss": 2.3812, | |
| "step": 375000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_accuracy": 0.48680735461076846, | |
| "eval_loss": 2.416015625, | |
| "eval_runtime": 40.6108, | |
| "eval_samples_per_second": 87.858, | |
| "eval_steps_per_second": 10.982, | |
| "step": 375000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.431269561862217e-05, | |
| "loss": 2.3789, | |
| "step": 380000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_accuracy": 0.4895744293798684, | |
| "eval_loss": 2.396484375, | |
| "eval_runtime": 40.2591, | |
| "eval_samples_per_second": 88.626, | |
| "eval_steps_per_second": 11.078, | |
| "step": 380000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.423786325749043e-05, | |
| "loss": 2.372, | |
| "step": 385000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_accuracy": 0.48950730726913283, | |
| "eval_loss": 2.396484375, | |
| "eval_runtime": 40.2108, | |
| "eval_samples_per_second": 88.732, | |
| "eval_steps_per_second": 11.092, | |
| "step": 385000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.41630009394487e-05, | |
| "loss": 2.3732, | |
| "step": 390000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_accuracy": 0.4898645612789255, | |
| "eval_loss": 2.396484375, | |
| "eval_runtime": 40.4903, | |
| "eval_samples_per_second": 88.12, | |
| "eval_steps_per_second": 11.015, | |
| "step": 390000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.408815359986196e-05, | |
| "loss": 2.3725, | |
| "step": 395000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_accuracy": 0.4903272928913027, | |
| "eval_loss": 2.392578125, | |
| "eval_runtime": 40.3547, | |
| "eval_samples_per_second": 88.416, | |
| "eval_steps_per_second": 11.052, | |
| "step": 395000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.401332123873022e-05, | |
| "loss": 2.3716, | |
| "step": 400000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_accuracy": 0.49036948393233654, | |
| "eval_loss": 2.390625, | |
| "eval_runtime": 40.3654, | |
| "eval_samples_per_second": 88.392, | |
| "eval_steps_per_second": 11.049, | |
| "step": 400000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.393848887759846e-05, | |
| "loss": 2.3709, | |
| "step": 405000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_accuracy": 0.49040482577839734, | |
| "eval_loss": 2.390625, | |
| "eval_runtime": 40.3034, | |
| "eval_samples_per_second": 88.529, | |
| "eval_steps_per_second": 11.066, | |
| "step": 405000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.3863656516466716e-05, | |
| "loss": 2.3619, | |
| "step": 410000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_accuracy": 0.4906423558600616, | |
| "eval_loss": 2.388671875, | |
| "eval_runtime": 40.362, | |
| "eval_samples_per_second": 88.4, | |
| "eval_steps_per_second": 11.05, | |
| "step": 410000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.378877921997e-05, | |
| "loss": 2.367, | |
| "step": 415000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_accuracy": 0.49115138803045644, | |
| "eval_loss": 2.38671875, | |
| "eval_runtime": 40.2804, | |
| "eval_samples_per_second": 88.579, | |
| "eval_steps_per_second": 11.072, | |
| "step": 415000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.3713961837293236e-05, | |
| "loss": 2.3639, | |
| "step": 420000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_accuracy": 0.49116152483901654, | |
| "eval_loss": 2.384765625, | |
| "eval_runtime": 40.3366, | |
| "eval_samples_per_second": 88.456, | |
| "eval_steps_per_second": 11.057, | |
| "step": 420000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.363914445461648e-05, | |
| "loss": 2.3621, | |
| "step": 425000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_accuracy": 0.4918730192128138, | |
| "eval_loss": 2.3828125, | |
| "eval_runtime": 40.2687, | |
| "eval_samples_per_second": 88.605, | |
| "eval_steps_per_second": 11.076, | |
| "step": 425000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.3564282136574755e-05, | |
| "loss": 2.3578, | |
| "step": 430000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_accuracy": 0.4919609628762674, | |
| "eval_loss": 2.380859375, | |
| "eval_runtime": 40.2478, | |
| "eval_samples_per_second": 88.651, | |
| "eval_steps_per_second": 11.081, | |
| "step": 430000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.3489449775443e-05, | |
| "loss": 2.3608, | |
| "step": 435000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_accuracy": 0.4921738358560288, | |
| "eval_loss": 2.37890625, | |
| "eval_runtime": 40.4074, | |
| "eval_samples_per_second": 88.301, | |
| "eval_steps_per_second": 11.038, | |
| "step": 435000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.341461741431126e-05, | |
| "loss": 2.3541, | |
| "step": 440000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_accuracy": 0.4923423260523651, | |
| "eval_loss": 2.376953125, | |
| "eval_runtime": 40.2757, | |
| "eval_samples_per_second": 88.589, | |
| "eval_steps_per_second": 11.074, | |
| "step": 440000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.333978505317951e-05, | |
| "loss": 2.3556, | |
| "step": 445000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_accuracy": 0.4925938284917744, | |
| "eval_loss": 2.376953125, | |
| "eval_runtime": 40.292, | |
| "eval_samples_per_second": 88.553, | |
| "eval_steps_per_second": 11.069, | |
| "step": 445000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.326493771359277e-05, | |
| "loss": 2.3562, | |
| "step": 450000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_accuracy": 0.49278067453063834, | |
| "eval_loss": 2.376953125, | |
| "eval_runtime": 40.1882, | |
| "eval_samples_per_second": 88.782, | |
| "eval_steps_per_second": 11.098, | |
| "step": 450000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.9925197595778234e-05, | |
| "loss": 2.3641, | |
| "step": 455000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_accuracy": 0.4910004317732511, | |
| "eval_loss": 2.38671875, | |
| "eval_runtime": 39.6555, | |
| "eval_samples_per_second": 89.975, | |
| "eval_steps_per_second": 11.247, | |
| "step": 455000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.985036523464649e-05, | |
| "loss": 2.3641, | |
| "step": 460000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_accuracy": 0.4911015258910529, | |
| "eval_loss": 2.38671875, | |
| "eval_runtime": 39.5455, | |
| "eval_samples_per_second": 90.225, | |
| "eval_steps_per_second": 11.278, | |
| "step": 460000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.977551789505975e-05, | |
| "loss": 2.3646, | |
| "step": 465000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_accuracy": 0.4910639923026007, | |
| "eval_loss": 2.38671875, | |
| "eval_runtime": 39.4635, | |
| "eval_samples_per_second": 90.413, | |
| "eval_steps_per_second": 11.302, | |
| "step": 465000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.970071549083798e-05, | |
| "loss": 2.3629, | |
| "step": 470000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_accuracy": 0.4911439908998856, | |
| "eval_loss": 2.384765625, | |
| "eval_runtime": 39.5626, | |
| "eval_samples_per_second": 90.186, | |
| "eval_steps_per_second": 11.273, | |
| "step": 470000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.9625868151251246e-05, | |
| "loss": 2.3659, | |
| "step": 475000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_accuracy": 0.4913645349780168, | |
| "eval_loss": 2.3828125, | |
| "eval_runtime": 39.4888, | |
| "eval_samples_per_second": 90.355, | |
| "eval_steps_per_second": 11.294, | |
| "step": 475000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.9551020811664506e-05, | |
| "loss": 2.3651, | |
| "step": 480000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_accuracy": 0.4916360370667473, | |
| "eval_loss": 2.3828125, | |
| "eval_runtime": 38.5403, | |
| "eval_samples_per_second": 92.578, | |
| "eval_steps_per_second": 11.572, | |
| "step": 480000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.9476173472077765e-05, | |
| "loss": 2.3608, | |
| "step": 485000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_accuracy": 0.4917579527372671, | |
| "eval_loss": 2.380859375, | |
| "eval_runtime": 39.5737, | |
| "eval_samples_per_second": 90.161, | |
| "eval_steps_per_second": 11.27, | |
| "step": 485000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.9401356089401005e-05, | |
| "loss": 2.3612, | |
| "step": 490000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_accuracy": 0.49203685195656843, | |
| "eval_loss": 2.380859375, | |
| "eval_runtime": 38.5594, | |
| "eval_samples_per_second": 92.533, | |
| "eval_steps_per_second": 11.567, | |
| "step": 490000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.932649377135928e-05, | |
| "loss": 2.3569, | |
| "step": 495000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_accuracy": 0.49215328827110977, | |
| "eval_loss": 2.37890625, | |
| "eval_runtime": 39.5649, | |
| "eval_samples_per_second": 90.181, | |
| "eval_steps_per_second": 11.273, | |
| "step": 495000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.9251676388682524e-05, | |
| "loss": 2.3557, | |
| "step": 500000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_accuracy": 0.4923439698591586, | |
| "eval_loss": 2.37890625, | |
| "eval_runtime": 39.6114, | |
| "eval_samples_per_second": 90.075, | |
| "eval_steps_per_second": 11.259, | |
| "step": 500000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.9176829049095784e-05, | |
| "loss": 2.3541, | |
| "step": 505000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_accuracy": 0.49218726027817594, | |
| "eval_loss": 2.376953125, | |
| "eval_runtime": 39.5989, | |
| "eval_samples_per_second": 90.104, | |
| "eval_steps_per_second": 11.263, | |
| "step": 505000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.910196673105406e-05, | |
| "loss": 2.351, | |
| "step": 510000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_accuracy": 0.49274588062017544, | |
| "eval_loss": 2.375, | |
| "eval_runtime": 38.6221, | |
| "eval_samples_per_second": 92.382, | |
| "eval_steps_per_second": 11.548, | |
| "step": 510000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.9027134369922304e-05, | |
| "loss": 2.3504, | |
| "step": 515000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_accuracy": 0.49260917068851395, | |
| "eval_loss": 2.375, | |
| "eval_runtime": 39.6516, | |
| "eval_samples_per_second": 89.984, | |
| "eval_steps_per_second": 11.248, | |
| "step": 515000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.895231698724555e-05, | |
| "loss": 2.3479, | |
| "step": 520000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_accuracy": 0.492896014973984, | |
| "eval_loss": 2.373046875, | |
| "eval_runtime": 39.6699, | |
| "eval_samples_per_second": 89.942, | |
| "eval_steps_per_second": 11.243, | |
| "step": 520000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.887745466920382e-05, | |
| "loss": 2.3451, | |
| "step": 525000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_accuracy": 0.4929382060150178, | |
| "eval_loss": 2.37109375, | |
| "eval_runtime": 39.6216, | |
| "eval_samples_per_second": 90.052, | |
| "eval_steps_per_second": 11.256, | |
| "step": 525000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.880262230807207e-05, | |
| "loss": 2.3505, | |
| "step": 530000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_accuracy": 0.49343929311924395, | |
| "eval_loss": 2.369140625, | |
| "eval_runtime": 39.6785, | |
| "eval_samples_per_second": 89.923, | |
| "eval_steps_per_second": 11.24, | |
| "step": 530000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.8727804925395315e-05, | |
| "loss": 2.3457, | |
| "step": 535000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_accuracy": 0.4933929925612263, | |
| "eval_loss": 2.369140625, | |
| "eval_runtime": 39.6502, | |
| "eval_samples_per_second": 89.987, | |
| "eval_steps_per_second": 11.248, | |
| "step": 535000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.865297256426357e-05, | |
| "loss": 2.3479, | |
| "step": 540000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_accuracy": 0.4937496986354212, | |
| "eval_loss": 2.369140625, | |
| "eval_runtime": 39.6648, | |
| "eval_samples_per_second": 89.954, | |
| "eval_steps_per_second": 11.244, | |
| "step": 540000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.8578110246221835e-05, | |
| "loss": 2.3421, | |
| "step": 545000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_accuracy": 0.4935861398594655, | |
| "eval_loss": 2.3671875, | |
| "eval_runtime": 39.7026, | |
| "eval_samples_per_second": 89.868, | |
| "eval_steps_per_second": 11.234, | |
| "step": 545000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.850327788509009e-05, | |
| "loss": 2.3433, | |
| "step": 550000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_accuracy": 0.4937406576980568, | |
| "eval_loss": 2.3671875, | |
| "eval_runtime": 39.7646, | |
| "eval_samples_per_second": 89.728, | |
| "eval_steps_per_second": 11.216, | |
| "step": 550000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.842846050241333e-05, | |
| "loss": 2.3425, | |
| "step": 555000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_accuracy": 0.4939384624488776, | |
| "eval_loss": 2.365234375, | |
| "eval_runtime": 39.7934, | |
| "eval_samples_per_second": 89.663, | |
| "eval_steps_per_second": 11.208, | |
| "step": 555000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.835361316282659e-05, | |
| "loss": 2.3403, | |
| "step": 560000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_accuracy": 0.49420284137483617, | |
| "eval_loss": 2.36328125, | |
| "eval_runtime": 39.7702, | |
| "eval_samples_per_second": 89.715, | |
| "eval_steps_per_second": 11.214, | |
| "step": 560000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.827876582323985e-05, | |
| "loss": 2.3417, | |
| "step": 565000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_accuracy": 0.49440420770704296, | |
| "eval_loss": 2.361328125, | |
| "eval_runtime": 40.0918, | |
| "eval_samples_per_second": 88.996, | |
| "eval_steps_per_second": 11.124, | |
| "step": 565000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.82039484405631e-05, | |
| "loss": 2.3382, | |
| "step": 570000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_accuracy": 0.4947474893590907, | |
| "eval_loss": 2.361328125, | |
| "eval_runtime": 39.7167, | |
| "eval_samples_per_second": 89.836, | |
| "eval_steps_per_second": 11.23, | |
| "step": 570000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.812913105788634e-05, | |
| "loss": 2.3354, | |
| "step": 575000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_accuracy": 0.4949266642995849, | |
| "eval_loss": 2.359375, | |
| "eval_runtime": 39.8562, | |
| "eval_samples_per_second": 89.522, | |
| "eval_steps_per_second": 11.19, | |
| "step": 575000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.805425376138962e-05, | |
| "loss": 2.3366, | |
| "step": 580000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_accuracy": 0.4946513266616695, | |
| "eval_loss": 2.359375, | |
| "eval_runtime": 38.7841, | |
| "eval_samples_per_second": 91.997, | |
| "eval_steps_per_second": 11.5, | |
| "step": 580000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.797942140025787e-05, | |
| "loss": 2.3373, | |
| "step": 585000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_accuracy": 0.49454119160650334, | |
| "eval_loss": 2.359375, | |
| "eval_runtime": 38.7732, | |
| "eval_samples_per_second": 92.022, | |
| "eval_steps_per_second": 11.503, | |
| "step": 585000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.790460401758111e-05, | |
| "loss": 2.3365, | |
| "step": 590000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_accuracy": 0.49488255548395865, | |
| "eval_loss": 2.359375, | |
| "eval_runtime": 39.8158, | |
| "eval_samples_per_second": 89.613, | |
| "eval_steps_per_second": 11.202, | |
| "step": 590000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.782975667799438e-05, | |
| "loss": 2.3318, | |
| "step": 595000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_accuracy": 0.49525295994809954, | |
| "eval_loss": 2.35546875, | |
| "eval_runtime": 39.8567, | |
| "eval_samples_per_second": 89.521, | |
| "eval_steps_per_second": 11.19, | |
| "step": 595000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.7754894359952644e-05, | |
| "loss": 2.3278, | |
| "step": 600000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_accuracy": 0.4957521292777332, | |
| "eval_loss": 2.353515625, | |
| "eval_runtime": 39.8687, | |
| "eval_samples_per_second": 89.494, | |
| "eval_steps_per_second": 11.187, | |
| "step": 600000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.768004702036591e-05, | |
| "loss": 2.3277, | |
| "step": 605000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_accuracy": 0.4959060991807267, | |
| "eval_loss": 2.3515625, | |
| "eval_runtime": 40.0704, | |
| "eval_samples_per_second": 89.043, | |
| "eval_steps_per_second": 11.13, | |
| "step": 605000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.7605214659234157e-05, | |
| "loss": 2.326, | |
| "step": 610000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_accuracy": 0.49614417719798887, | |
| "eval_loss": 2.3515625, | |
| "eval_runtime": 39.9671, | |
| "eval_samples_per_second": 89.273, | |
| "eval_steps_per_second": 11.159, | |
| "step": 610000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.753036731964742e-05, | |
| "loss": 2.3273, | |
| "step": 615000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_accuracy": 0.49605705543793205, | |
| "eval_loss": 2.3515625, | |
| "eval_runtime": 39.9201, | |
| "eval_samples_per_second": 89.378, | |
| "eval_steps_per_second": 11.172, | |
| "step": 615000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.745551998006068e-05, | |
| "loss": 2.3284, | |
| "step": 620000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_accuracy": 0.49654992350819055, | |
| "eval_loss": 2.349609375, | |
| "eval_runtime": 39.956, | |
| "eval_samples_per_second": 89.298, | |
| "eval_steps_per_second": 11.162, | |
| "step": 620000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.7380687618928936e-05, | |
| "loss": 2.3276, | |
| "step": 625000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_accuracy": 0.49658115583726753, | |
| "eval_loss": 2.34765625, | |
| "eval_runtime": 39.9741, | |
| "eval_samples_per_second": 89.258, | |
| "eval_steps_per_second": 11.157, | |
| "step": 625000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.7305840279342196e-05, | |
| "loss": 2.3228, | |
| "step": 630000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_accuracy": 0.4966945785060207, | |
| "eval_loss": 2.345703125, | |
| "eval_runtime": 39.9089, | |
| "eval_samples_per_second": 89.404, | |
| "eval_steps_per_second": 11.175, | |
| "step": 630000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.723103787512043e-05, | |
| "loss": 2.3219, | |
| "step": 635000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_accuracy": 0.49684580873102496, | |
| "eval_loss": 2.345703125, | |
| "eval_runtime": 39.9108, | |
| "eval_samples_per_second": 89.399, | |
| "eval_steps_per_second": 11.175, | |
| "step": 635000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.715619053553369e-05, | |
| "loss": 2.326, | |
| "step": 640000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_accuracy": 0.49703155889869327, | |
| "eval_loss": 2.34375, | |
| "eval_runtime": 40.0352, | |
| "eval_samples_per_second": 89.122, | |
| "eval_steps_per_second": 11.14, | |
| "step": 640000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.708135817440194e-05, | |
| "loss": 2.3191, | |
| "step": 645000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_accuracy": 0.4972345690376936, | |
| "eval_loss": 2.341796875, | |
| "eval_runtime": 40.0269, | |
| "eval_samples_per_second": 89.14, | |
| "eval_steps_per_second": 11.143, | |
| "step": 645000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.70065258132702e-05, | |
| "loss": 2.3167, | |
| "step": 650000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_accuracy": 0.4972822394347058, | |
| "eval_loss": 2.34375, | |
| "eval_runtime": 40.0234, | |
| "eval_samples_per_second": 89.148, | |
| "eval_steps_per_second": 11.143, | |
| "step": 650000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.693166349522847e-05, | |
| "loss": 2.3172, | |
| "step": 655000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_accuracy": 0.4974205931731608, | |
| "eval_loss": 2.341796875, | |
| "eval_runtime": 40.0416, | |
| "eval_samples_per_second": 89.107, | |
| "eval_steps_per_second": 11.138, | |
| "step": 655000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.685683113409672e-05, | |
| "loss": 2.3194, | |
| "step": 660000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_accuracy": 0.4977205879129791, | |
| "eval_loss": 2.337890625, | |
| "eval_runtime": 40.5115, | |
| "eval_samples_per_second": 88.074, | |
| "eval_steps_per_second": 11.009, | |
| "step": 660000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.678198379450998e-05, | |
| "loss": 2.3204, | |
| "step": 665000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_accuracy": 0.49760332969504095, | |
| "eval_loss": 2.33984375, | |
| "eval_runtime": 40.059, | |
| "eval_samples_per_second": 89.069, | |
| "eval_steps_per_second": 11.134, | |
| "step": 665000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.670716641183322e-05, | |
| "loss": 2.309, | |
| "step": 670000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_accuracy": 0.49802085662059625, | |
| "eval_loss": 2.3359375, | |
| "eval_runtime": 40.172, | |
| "eval_samples_per_second": 88.818, | |
| "eval_steps_per_second": 11.102, | |
| "step": 670000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.663233405070147e-05, | |
| "loss": 2.3147, | |
| "step": 675000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_accuracy": 0.49805729433785273, | |
| "eval_loss": 2.337890625, | |
| "eval_runtime": 40.0906, | |
| "eval_samples_per_second": 88.999, | |
| "eval_steps_per_second": 11.125, | |
| "step": 675000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.655745675420475e-05, | |
| "loss": 2.3122, | |
| "step": 680000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_accuracy": 0.4980255140731779, | |
| "eval_loss": 2.3359375, | |
| "eval_runtime": 40.0778, | |
| "eval_samples_per_second": 89.027, | |
| "eval_steps_per_second": 11.128, | |
| "step": 680000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.6482624393073005e-05, | |
| "loss": 2.3096, | |
| "step": 685000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_accuracy": 0.4984096169272648, | |
| "eval_loss": 2.333984375, | |
| "eval_runtime": 40.3028, | |
| "eval_samples_per_second": 88.53, | |
| "eval_steps_per_second": 11.066, | |
| "step": 685000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.640780701039625e-05, | |
| "loss": 2.3093, | |
| "step": 690000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_accuracy": 0.49861701055104785, | |
| "eval_loss": 2.333984375, | |
| "eval_runtime": 40.1409, | |
| "eval_samples_per_second": 88.887, | |
| "eval_steps_per_second": 11.111, | |
| "step": 690000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.633295967080951e-05, | |
| "loss": 2.3048, | |
| "step": 695000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_accuracy": 0.498526601177404, | |
| "eval_loss": 2.33203125, | |
| "eval_runtime": 40.2295, | |
| "eval_samples_per_second": 88.691, | |
| "eval_steps_per_second": 11.086, | |
| "step": 695000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.6258127309677764e-05, | |
| "loss": 2.3111, | |
| "step": 700000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_accuracy": 0.4988186508510536, | |
| "eval_loss": 2.330078125, | |
| "eval_runtime": 40.2382, | |
| "eval_samples_per_second": 88.672, | |
| "eval_steps_per_second": 11.084, | |
| "step": 700000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.6183279970091023e-05, | |
| "loss": 2.3074, | |
| "step": 705000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_accuracy": 0.4989449500063561, | |
| "eval_loss": 2.330078125, | |
| "eval_runtime": 40.2221, | |
| "eval_samples_per_second": 88.707, | |
| "eval_steps_per_second": 11.088, | |
| "step": 705000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.610843263050428e-05, | |
| "loss": 2.3082, | |
| "step": 710000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_accuracy": 0.49918768547619985, | |
| "eval_loss": 2.330078125, | |
| "eval_runtime": 40.2424, | |
| "eval_samples_per_second": 88.663, | |
| "eval_steps_per_second": 11.083, | |
| "step": 710000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.603357031246256e-05, | |
| "loss": 2.3093, | |
| "step": 715000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_accuracy": 0.4993685042234876, | |
| "eval_loss": 2.328125, | |
| "eval_runtime": 39.2194, | |
| "eval_samples_per_second": 90.975, | |
| "eval_steps_per_second": 11.372, | |
| "step": 715000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.595873795133081e-05, | |
| "loss": 2.3011, | |
| "step": 720000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_accuracy": 0.4995095976399318, | |
| "eval_loss": 2.328125, | |
| "eval_runtime": 40.3274, | |
| "eval_samples_per_second": 88.476, | |
| "eval_steps_per_second": 11.059, | |
| "step": 720000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.588390559019906e-05, | |
| "loss": 2.2998, | |
| "step": 725000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_accuracy": 0.4994558999513433, | |
| "eval_loss": 2.326171875, | |
| "eval_runtime": 40.2634, | |
| "eval_samples_per_second": 88.616, | |
| "eval_steps_per_second": 11.077, | |
| "step": 725000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.580907322906731e-05, | |
| "loss": 2.3012, | |
| "step": 730000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_accuracy": 0.49959206194740696, | |
| "eval_loss": 2.326171875, | |
| "eval_runtime": 40.2894, | |
| "eval_samples_per_second": 88.559, | |
| "eval_steps_per_second": 11.07, | |
| "step": 730000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.573421091102558e-05, | |
| "loss": 2.3002, | |
| "step": 735000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_accuracy": 0.4997052106483612, | |
| "eval_loss": 2.32421875, | |
| "eval_runtime": 40.3059, | |
| "eval_samples_per_second": 88.523, | |
| "eval_steps_per_second": 11.065, | |
| "step": 735000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.5659378549893835e-05, | |
| "loss": 2.2994, | |
| "step": 740000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_accuracy": 0.5000197256815223, | |
| "eval_loss": 2.32421875, | |
| "eval_runtime": 40.7124, | |
| "eval_samples_per_second": 87.639, | |
| "eval_steps_per_second": 10.955, | |
| "step": 740000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.558454618876209e-05, | |
| "loss": 2.299, | |
| "step": 745000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_accuracy": 0.5000953407940244, | |
| "eval_loss": 2.322265625, | |
| "eval_runtime": 40.3194, | |
| "eval_samples_per_second": 88.493, | |
| "eval_steps_per_second": 11.062, | |
| "step": 745000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.550971382763034e-05, | |
| "loss": 2.2969, | |
| "step": 750000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_accuracy": 0.5002605433767736, | |
| "eval_loss": 2.322265625, | |
| "eval_runtime": 40.3665, | |
| "eval_samples_per_second": 88.39, | |
| "eval_steps_per_second": 11.049, | |
| "step": 750000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.543489644495358e-05, | |
| "loss": 2.2934, | |
| "step": 755000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_accuracy": 0.5003739660455269, | |
| "eval_loss": 2.3203125, | |
| "eval_runtime": 40.3284, | |
| "eval_samples_per_second": 88.474, | |
| "eval_steps_per_second": 11.059, | |
| "step": 755000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.536004910536684e-05, | |
| "loss": 2.2988, | |
| "step": 760000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_accuracy": 0.5004895804566715, | |
| "eval_loss": 2.318359375, | |
| "eval_runtime": 40.3305, | |
| "eval_samples_per_second": 88.469, | |
| "eval_steps_per_second": 11.059, | |
| "step": 760000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.5285186787325113e-05, | |
| "loss": 2.2911, | |
| "step": 765000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_accuracy": 0.5007449184452656, | |
| "eval_loss": 2.318359375, | |
| "eval_runtime": 39.3805, | |
| "eval_samples_per_second": 90.603, | |
| "eval_steps_per_second": 11.325, | |
| "step": 765000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.5210354426193366e-05, | |
| "loss": 2.2929, | |
| "step": 770000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_accuracy": 0.5008427249494803, | |
| "eval_loss": 2.318359375, | |
| "eval_runtime": 40.4207, | |
| "eval_samples_per_second": 88.272, | |
| "eval_steps_per_second": 11.034, | |
| "step": 770000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.5135567000426584e-05, | |
| "loss": 2.2926, | |
| "step": 775000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_accuracy": 0.5008994362838569, | |
| "eval_loss": 2.31640625, | |
| "eval_runtime": 40.4103, | |
| "eval_samples_per_second": 88.294, | |
| "eval_steps_per_second": 11.037, | |
| "step": 775000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.506070468238486e-05, | |
| "loss": 2.292, | |
| "step": 780000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_accuracy": 0.5011701164691906, | |
| "eval_loss": 2.31640625, | |
| "eval_runtime": 40.3936, | |
| "eval_samples_per_second": 88.331, | |
| "eval_steps_per_second": 11.041, | |
| "step": 780000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.498587232125311e-05, | |
| "loss": 2.2932, | |
| "step": 785000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_accuracy": 0.5014183312950129, | |
| "eval_loss": 2.314453125, | |
| "eval_runtime": 40.4142, | |
| "eval_samples_per_second": 88.286, | |
| "eval_steps_per_second": 11.036, | |
| "step": 785000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.4911039960121364e-05, | |
| "loss": 2.2903, | |
| "step": 790000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_accuracy": 0.5013958659355015, | |
| "eval_loss": 2.314453125, | |
| "eval_runtime": 40.3913, | |
| "eval_samples_per_second": 88.336, | |
| "eval_steps_per_second": 11.042, | |
| "step": 790000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.483620759898962e-05, | |
| "loss": 2.2886, | |
| "step": 795000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_accuracy": 0.5015205212840104, | |
| "eval_loss": 2.3125, | |
| "eval_runtime": 40.3641, | |
| "eval_samples_per_second": 88.395, | |
| "eval_steps_per_second": 11.049, | |
| "step": 795000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.476137523785787e-05, | |
| "loss": 2.2924, | |
| "step": 800000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_accuracy": 0.5014750426293895, | |
| "eval_loss": 2.3125, | |
| "eval_runtime": 40.359, | |
| "eval_samples_per_second": 88.407, | |
| "eval_steps_per_second": 11.051, | |
| "step": 800000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.468652789827113e-05, | |
| "loss": 2.2891, | |
| "step": 805000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_accuracy": 0.5018673645174441, | |
| "eval_loss": 2.310546875, | |
| "eval_runtime": 40.4253, | |
| "eval_samples_per_second": 88.261, | |
| "eval_steps_per_second": 11.033, | |
| "step": 805000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.461168055868439e-05, | |
| "loss": 2.2862, | |
| "step": 810000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_accuracy": 0.5019873624133714, | |
| "eval_loss": 2.30859375, | |
| "eval_runtime": 40.3854, | |
| "eval_samples_per_second": 88.349, | |
| "eval_steps_per_second": 11.044, | |
| "step": 810000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.453686317600763e-05, | |
| "loss": 2.2858, | |
| "step": 815000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_accuracy": 0.5021665373538656, | |
| "eval_loss": 2.30859375, | |
| "eval_runtime": 40.4075, | |
| "eval_samples_per_second": 88.301, | |
| "eval_steps_per_second": 11.038, | |
| "step": 815000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.44620008579659e-05, | |
| "loss": 2.2841, | |
| "step": 820000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_accuracy": 0.502265439729276, | |
| "eval_loss": 2.306640625, | |
| "eval_runtime": 40.4403, | |
| "eval_samples_per_second": 88.229, | |
| "eval_steps_per_second": 11.029, | |
| "step": 820000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.438718347528915e-05, | |
| "loss": 2.2843, | |
| "step": 825000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_accuracy": 0.5022361251747914, | |
| "eval_loss": 2.30859375, | |
| "eval_runtime": 40.4536, | |
| "eval_samples_per_second": 88.2, | |
| "eval_steps_per_second": 11.025, | |
| "step": 825000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.431233613570241e-05, | |
| "loss": 2.2832, | |
| "step": 830000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_accuracy": 0.5024813263548256, | |
| "eval_loss": 2.306640625, | |
| "eval_runtime": 40.4096, | |
| "eval_samples_per_second": 88.296, | |
| "eval_steps_per_second": 11.037, | |
| "step": 830000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.423756368839062e-05, | |
| "loss": 2.2846, | |
| "step": 835000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_accuracy": 0.502600776315155, | |
| "eval_loss": 2.306640625, | |
| "eval_runtime": 39.3247, | |
| "eval_samples_per_second": 90.732, | |
| "eval_steps_per_second": 11.341, | |
| "step": 835000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.416267141343891e-05, | |
| "loss": 2.2784, | |
| "step": 840000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_accuracy": 0.5026766653954561, | |
| "eval_loss": 2.3046875, | |
| "eval_runtime": 40.3768, | |
| "eval_samples_per_second": 88.367, | |
| "eval_steps_per_second": 11.046, | |
| "step": 840000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.408782407385217e-05, | |
| "loss": 2.277, | |
| "step": 845000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_accuracy": 0.5028424159138032, | |
| "eval_loss": 2.302734375, | |
| "eval_runtime": 40.4265, | |
| "eval_samples_per_second": 88.259, | |
| "eval_steps_per_second": 11.032, | |
| "step": 845000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.4013021669630405e-05, | |
| "loss": 2.276, | |
| "step": 850000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_accuracy": 0.5025583113063223, | |
| "eval_loss": 2.306640625, | |
| "eval_runtime": 40.7923, | |
| "eval_samples_per_second": 87.468, | |
| "eval_steps_per_second": 10.933, | |
| "step": 850000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.393818930849866e-05, | |
| "loss": 2.2802, | |
| "step": 855000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_accuracy": 0.5031032332583757, | |
| "eval_loss": 2.302734375, | |
| "eval_runtime": 40.3592, | |
| "eval_samples_per_second": 88.406, | |
| "eval_steps_per_second": 11.051, | |
| "step": 855000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.3863356947366904e-05, | |
| "loss": 2.2781, | |
| "step": 860000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_accuracy": 0.5032018616659872, | |
| "eval_loss": 2.30078125, | |
| "eval_runtime": 40.4036, | |
| "eval_samples_per_second": 88.309, | |
| "eval_steps_per_second": 11.039, | |
| "step": 860000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.378850960778017e-05, | |
| "loss": 2.2749, | |
| "step": 865000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_accuracy": 0.5038193850847547, | |
| "eval_loss": 2.298828125, | |
| "eval_runtime": 40.3881, | |
| "eval_samples_per_second": 88.343, | |
| "eval_steps_per_second": 11.043, | |
| "step": 865000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.371366226819343e-05, | |
| "loss": 2.2729, | |
| "step": 870000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_accuracy": 0.5037152773211648, | |
| "eval_loss": 2.296875, | |
| "eval_runtime": 40.4456, | |
| "eval_samples_per_second": 88.217, | |
| "eval_steps_per_second": 11.027, | |
| "step": 870000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.363882990706168e-05, | |
| "loss": 2.2708, | |
| "step": 875000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_accuracy": 0.5038993836820396, | |
| "eval_loss": 2.296875, | |
| "eval_runtime": 40.4095, | |
| "eval_samples_per_second": 88.296, | |
| "eval_steps_per_second": 11.037, | |
| "step": 875000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.3563997545929936e-05, | |
| "loss": 2.2754, | |
| "step": 880000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_accuracy": 0.5038591104155982, | |
| "eval_loss": 2.296875, | |
| "eval_runtime": 40.397, | |
| "eval_samples_per_second": 88.323, | |
| "eval_steps_per_second": 11.04, | |
| "step": 880000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.3489150206343196e-05, | |
| "loss": 2.2761, | |
| "step": 885000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "eval_accuracy": 0.5040640383291909, | |
| "eval_loss": 2.294921875, | |
| "eval_runtime": 40.3166, | |
| "eval_samples_per_second": 88.499, | |
| "eval_steps_per_second": 11.062, | |
| "step": 885000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.341434780212143e-05, | |
| "loss": 2.2742, | |
| "step": 890000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "eval_accuracy": 0.5041032157244366, | |
| "eval_loss": 2.294921875, | |
| "eval_runtime": 40.3381, | |
| "eval_samples_per_second": 88.452, | |
| "eval_steps_per_second": 11.057, | |
| "step": 890000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.3339470505624715e-05, | |
| "loss": 2.2734, | |
| "step": 895000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "eval_accuracy": 0.5040837640107132, | |
| "eval_loss": 2.294921875, | |
| "eval_runtime": 40.3833, | |
| "eval_samples_per_second": 88.353, | |
| "eval_steps_per_second": 11.044, | |
| "step": 895000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.326463814449296e-05, | |
| "loss": 2.2682, | |
| "step": 900000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "eval_accuracy": 0.5043944434946894, | |
| "eval_loss": 2.29296875, | |
| "eval_runtime": 40.4818, | |
| "eval_samples_per_second": 88.138, | |
| "eval_steps_per_second": 11.017, | |
| "step": 900000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.318982076181621e-05, | |
| "loss": 2.2667, | |
| "step": 905000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "eval_accuracy": 0.5045489613332808, | |
| "eval_loss": 2.29296875, | |
| "eval_runtime": 40.3614, | |
| "eval_samples_per_second": 88.401, | |
| "eval_steps_per_second": 11.05, | |
| "step": 905000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.311498840068446e-05, | |
| "loss": 2.2676, | |
| "step": 910000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "eval_accuracy": 0.5045801936623577, | |
| "eval_loss": 2.29296875, | |
| "eval_runtime": 40.4147, | |
| "eval_samples_per_second": 88.285, | |
| "eval_steps_per_second": 11.036, | |
| "step": 910000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.304012608264273e-05, | |
| "loss": 2.2707, | |
| "step": 915000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "eval_accuracy": 0.5046547129036641, | |
| "eval_loss": 2.291015625, | |
| "eval_runtime": 40.4009, | |
| "eval_samples_per_second": 88.315, | |
| "eval_steps_per_second": 11.039, | |
| "step": 915000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.296529372151098e-05, | |
| "loss": 2.265, | |
| "step": 920000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_accuracy": 0.5047700533470098, | |
| "eval_loss": 2.291015625, | |
| "eval_runtime": 40.3704, | |
| "eval_samples_per_second": 88.382, | |
| "eval_steps_per_second": 11.048, | |
| "step": 920000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.289046136037923e-05, | |
| "loss": 2.2676, | |
| "step": 925000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_accuracy": 0.5046149875728206, | |
| "eval_loss": 2.291015625, | |
| "eval_runtime": 40.3019, | |
| "eval_samples_per_second": 88.532, | |
| "eval_steps_per_second": 11.066, | |
| "step": 925000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.281564397770247e-05, | |
| "loss": 2.2662, | |
| "step": 930000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_accuracy": 0.5051503206519119, | |
| "eval_loss": 2.2890625, | |
| "eval_runtime": 40.3203, | |
| "eval_samples_per_second": 88.491, | |
| "eval_steps_per_second": 11.061, | |
| "step": 930000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.274079663811574e-05, | |
| "loss": 2.2706, | |
| "step": 935000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_accuracy": 0.5050968969311223, | |
| "eval_loss": 2.2890625, | |
| "eval_runtime": 40.3497, | |
| "eval_samples_per_second": 88.427, | |
| "eval_steps_per_second": 11.053, | |
| "step": 935000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.2665949298529e-05, | |
| "loss": 2.2657, | |
| "step": 940000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_accuracy": 0.5048788185631814, | |
| "eval_loss": 2.2890625, | |
| "eval_runtime": 40.7581, | |
| "eval_samples_per_second": 87.541, | |
| "eval_steps_per_second": 10.943, | |
| "step": 940000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.259110195894226e-05, | |
| "loss": 2.2672, | |
| "step": 945000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_accuracy": 0.5050453909849252, | |
| "eval_loss": 2.287109375, | |
| "eval_runtime": 40.3332, | |
| "eval_samples_per_second": 88.463, | |
| "eval_steps_per_second": 11.058, | |
| "step": 945000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.9925167638868255e-05, | |
| "loss": 2.2716, | |
| "step": 950000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_accuracy": 0.5037065103515993, | |
| "eval_loss": 2.296875, | |
| "eval_runtime": 38.5412, | |
| "eval_samples_per_second": 92.576, | |
| "eval_steps_per_second": 11.572, | |
| "step": 950000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.9850290342371536e-05, | |
| "loss": 2.2702, | |
| "step": 955000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_accuracy": 0.5036908941870608, | |
| "eval_loss": 2.298828125, | |
| "eval_runtime": 39.4253, | |
| "eval_samples_per_second": 90.5, | |
| "eval_steps_per_second": 11.313, | |
| "step": 955000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.977548793814977e-05, | |
| "loss": 2.2708, | |
| "step": 960000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_accuracy": 0.5035029522770011, | |
| "eval_loss": 2.298828125, | |
| "eval_runtime": 39.5074, | |
| "eval_samples_per_second": 90.312, | |
| "eval_steps_per_second": 11.289, | |
| "step": 960000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.970064059856303e-05, | |
| "loss": 2.2738, | |
| "step": 965000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_accuracy": 0.5035369242840674, | |
| "eval_loss": 2.298828125, | |
| "eval_runtime": 39.5917, | |
| "eval_samples_per_second": 90.12, | |
| "eval_steps_per_second": 11.265, | |
| "step": 965000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.962582321588627e-05, | |
| "loss": 2.2737, | |
| "step": 970000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_accuracy": 0.5035577458367854, | |
| "eval_loss": 2.298828125, | |
| "eval_runtime": 39.4627, | |
| "eval_samples_per_second": 90.415, | |
| "eval_steps_per_second": 11.302, | |
| "step": 970000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.9550990854754526e-05, | |
| "loss": 2.2763, | |
| "step": 975000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_accuracy": 0.49873289892999134, | |
| "eval_loss": 2.330078125, | |
| "eval_runtime": 39.5211, | |
| "eval_samples_per_second": 90.281, | |
| "eval_steps_per_second": 11.285, | |
| "step": 975000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.9476143515167786e-05, | |
| "loss": 2.2738, | |
| "step": 980000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_accuracy": 0.5034662405919458, | |
| "eval_loss": 2.296875, | |
| "eval_runtime": 39.4696, | |
| "eval_samples_per_second": 90.399, | |
| "eval_steps_per_second": 11.3, | |
| "step": 980000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.9401296175581046e-05, | |
| "loss": 2.2737, | |
| "step": 985000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_accuracy": 0.5036182927203469, | |
| "eval_loss": 2.296875, | |
| "eval_runtime": 39.6371, | |
| "eval_samples_per_second": 90.017, | |
| "eval_steps_per_second": 11.252, | |
| "step": 985000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.932644883599431e-05, | |
| "loss": 2.2748, | |
| "step": 990000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_accuracy": 0.5036056902015965, | |
| "eval_loss": 2.296875, | |
| "eval_runtime": 39.6139, | |
| "eval_samples_per_second": 90.069, | |
| "eval_steps_per_second": 11.259, | |
| "step": 990000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.9251631453317545e-05, | |
| "loss": 2.2724, | |
| "step": 995000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_accuracy": 0.5038232206339396, | |
| "eval_loss": 2.296875, | |
| "eval_runtime": 39.4746, | |
| "eval_samples_per_second": 90.387, | |
| "eval_steps_per_second": 11.298, | |
| "step": 995000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.917678411373081e-05, | |
| "loss": 2.2744, | |
| "step": 1000000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_accuracy": 0.5032999421380009, | |
| "eval_loss": 2.298828125, | |
| "eval_runtime": 39.6576, | |
| "eval_samples_per_second": 89.97, | |
| "eval_steps_per_second": 11.246, | |
| "step": 1000000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.9101951752599065e-05, | |
| "loss": 2.2694, | |
| "step": 1005000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_accuracy": 0.5033465166638174, | |
| "eval_loss": 2.298828125, | |
| "eval_runtime": 39.6623, | |
| "eval_samples_per_second": 89.959, | |
| "eval_steps_per_second": 11.245, | |
| "step": 1005000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.9027104413012324e-05, | |
| "loss": 2.2684, | |
| "step": 1010000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_accuracy": 0.5039421226586712, | |
| "eval_loss": 2.294921875, | |
| "eval_runtime": 39.6275, | |
| "eval_samples_per_second": 90.039, | |
| "eval_steps_per_second": 11.255, | |
| "step": 1010000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.895231698724555e-05, | |
| "loss": 2.2731, | |
| "step": 1015000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_accuracy": 0.5039547251774216, | |
| "eval_loss": 2.294921875, | |
| "eval_runtime": 39.6334, | |
| "eval_samples_per_second": 90.025, | |
| "eval_steps_per_second": 11.253, | |
| "step": 1015000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.887746964765881e-05, | |
| "loss": 2.2714, | |
| "step": 1020000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_accuracy": 0.5042445831086797, | |
| "eval_loss": 2.294921875, | |
| "eval_runtime": 39.6384, | |
| "eval_samples_per_second": 90.014, | |
| "eval_steps_per_second": 11.252, | |
| "step": 1020000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.880263728652706e-05, | |
| "loss": 2.2687, | |
| "step": 1025000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_accuracy": 0.5045218385211876, | |
| "eval_loss": 2.29296875, | |
| "eval_runtime": 39.6654, | |
| "eval_samples_per_second": 89.952, | |
| "eval_steps_per_second": 11.244, | |
| "step": 1025000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.872778994694032e-05, | |
| "loss": 2.2673, | |
| "step": 1030000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_accuracy": 0.5046215627999947, | |
| "eval_loss": 2.29296875, | |
| "eval_runtime": 39.7655, | |
| "eval_samples_per_second": 89.726, | |
| "eval_steps_per_second": 11.216, | |
| "step": 1030000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.8652957585808575e-05, | |
| "loss": 2.2677, | |
| "step": 1035000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_accuracy": 0.5044056761744452, | |
| "eval_loss": 2.29296875, | |
| "eval_runtime": 39.7885, | |
| "eval_samples_per_second": 89.674, | |
| "eval_steps_per_second": 11.209, | |
| "step": 1035000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.8578110246221835e-05, | |
| "loss": 2.265, | |
| "step": 1040000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_accuracy": 0.5046538910002674, | |
| "eval_loss": 2.291015625, | |
| "eval_runtime": 39.7963, | |
| "eval_samples_per_second": 89.657, | |
| "eval_steps_per_second": 11.207, | |
| "step": 1040000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.850327788509009e-05, | |
| "loss": 2.2659, | |
| "step": 1045000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_accuracy": 0.504468688768197, | |
| "eval_loss": 2.291015625, | |
| "eval_runtime": 40.1905, | |
| "eval_samples_per_second": 88.777, | |
| "eval_steps_per_second": 11.097, | |
| "step": 1045000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.842849045932331e-05, | |
| "loss": 2.2633, | |
| "step": 1050000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_accuracy": 0.5042100631660157, | |
| "eval_loss": 2.294921875, | |
| "eval_runtime": 39.7629, | |
| "eval_samples_per_second": 89.732, | |
| "eval_steps_per_second": 11.216, | |
| "step": 1050000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.835361316282659e-05, | |
| "loss": 2.2689, | |
| "step": 1055000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_accuracy": 0.5049952548777227, | |
| "eval_loss": 2.2890625, | |
| "eval_runtime": 39.8901, | |
| "eval_samples_per_second": 89.446, | |
| "eval_steps_per_second": 11.181, | |
| "step": 1055000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.827876582323985e-05, | |
| "loss": 2.2617, | |
| "step": 1060000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_accuracy": 0.5049081331176659, | |
| "eval_loss": 2.2890625, | |
| "eval_runtime": 39.8913, | |
| "eval_samples_per_second": 89.443, | |
| "eval_steps_per_second": 11.18, | |
| "step": 1060000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.820390350519813e-05, | |
| "loss": 2.2613, | |
| "step": 1065000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_accuracy": 0.5052440176391427, | |
| "eval_loss": 2.287109375, | |
| "eval_runtime": 39.9185, | |
| "eval_samples_per_second": 89.382, | |
| "eval_steps_per_second": 11.173, | |
| "step": 1065000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.8129086122521366e-05, | |
| "loss": 2.2649, | |
| "step": 1070000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_accuracy": 0.5047081366244537, | |
| "eval_loss": 2.2890625, | |
| "eval_runtime": 39.8452, | |
| "eval_samples_per_second": 89.547, | |
| "eval_steps_per_second": 11.193, | |
| "step": 1070000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.805422380447964e-05, | |
| "loss": 2.2587, | |
| "step": 1075000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_accuracy": 0.505284564873383, | |
| "eval_loss": 2.287109375, | |
| "eval_runtime": 39.8746, | |
| "eval_samples_per_second": 89.481, | |
| "eval_steps_per_second": 11.185, | |
| "step": 1075000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.797939144334789e-05, | |
| "loss": 2.2641, | |
| "step": 1080000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_accuracy": 0.5054223706762402, | |
| "eval_loss": 2.28515625, | |
| "eval_runtime": 39.914, | |
| "eval_samples_per_second": 89.392, | |
| "eval_steps_per_second": 11.174, | |
| "step": 1080000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.7904559082216145e-05, | |
| "loss": 2.2634, | |
| "step": 1085000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_accuracy": 0.5056552433053229, | |
| "eval_loss": 2.28515625, | |
| "eval_runtime": 39.8319, | |
| "eval_samples_per_second": 89.576, | |
| "eval_steps_per_second": 11.197, | |
| "step": 1085000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.7829741699539384e-05, | |
| "loss": 2.2597, | |
| "step": 1090000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_accuracy": 0.5057451047433689, | |
| "eval_loss": 2.283203125, | |
| "eval_runtime": 39.976, | |
| "eval_samples_per_second": 89.254, | |
| "eval_steps_per_second": 11.157, | |
| "step": 1090000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.775487938149766e-05, | |
| "loss": 2.2572, | |
| "step": 1095000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_accuracy": 0.5059667446926958, | |
| "eval_loss": 2.283203125, | |
| "eval_runtime": 40.0116, | |
| "eval_samples_per_second": 89.174, | |
| "eval_steps_per_second": 11.147, | |
| "step": 1095000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.768003204191092e-05, | |
| "loss": 2.2566, | |
| "step": 1100000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_accuracy": 0.5055771624826304, | |
| "eval_loss": 2.283203125, | |
| "eval_runtime": 39.963, | |
| "eval_samples_per_second": 89.283, | |
| "eval_steps_per_second": 11.16, | |
| "step": 1100000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.760524461614414e-05, | |
| "loss": 2.2576, | |
| "step": 1105000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_accuracy": 0.5055905869047775, | |
| "eval_loss": 2.283203125, | |
| "eval_runtime": 39.8683, | |
| "eval_samples_per_second": 89.495, | |
| "eval_steps_per_second": 11.187, | |
| "step": 1105000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.753038229810241e-05, | |
| "loss": 2.2612, | |
| "step": 1110000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_accuracy": 0.505675790890242, | |
| "eval_loss": 2.283203125, | |
| "eval_runtime": 39.9091, | |
| "eval_samples_per_second": 89.403, | |
| "eval_steps_per_second": 11.175, | |
| "step": 1110000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.7455534958515676e-05, | |
| "loss": 2.2585, | |
| "step": 1115000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_accuracy": 0.505924553651662, | |
| "eval_loss": 2.28125, | |
| "eval_runtime": 39.8428, | |
| "eval_samples_per_second": 89.552, | |
| "eval_steps_per_second": 11.194, | |
| "step": 1115000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.738073255429391e-05, | |
| "loss": 2.2528, | |
| "step": 1120000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_accuracy": 0.5059566078841358, | |
| "eval_loss": 2.28125, | |
| "eval_runtime": 40.0018, | |
| "eval_samples_per_second": 89.196, | |
| "eval_steps_per_second": 11.149, | |
| "step": 1120000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.730588521470717e-05, | |
| "loss": 2.2599, | |
| "step": 1125000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_accuracy": 0.5059996208285663, | |
| "eval_loss": 2.28125, | |
| "eval_runtime": 39.9769, | |
| "eval_samples_per_second": 89.251, | |
| "eval_steps_per_second": 11.156, | |
| "step": 1125000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.723105285357542e-05, | |
| "loss": 2.2556, | |
| "step": 1130000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_accuracy": 0.5065730354317075, | |
| "eval_loss": 2.27734375, | |
| "eval_runtime": 39.975, | |
| "eval_samples_per_second": 89.256, | |
| "eval_steps_per_second": 11.157, | |
| "step": 1130000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.715619053553369e-05, | |
| "loss": 2.2519, | |
| "step": 1135000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_accuracy": 0.5063790662300716, | |
| "eval_loss": 2.279296875, | |
| "eval_runtime": 40.0596, | |
| "eval_samples_per_second": 89.067, | |
| "eval_steps_per_second": 11.133, | |
| "step": 1135000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.708135817440194e-05, | |
| "loss": 2.2567, | |
| "step": 1140000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_accuracy": 0.5067524843400006, | |
| "eval_loss": 2.27734375, | |
| "eval_runtime": 40.0409, | |
| "eval_samples_per_second": 89.109, | |
| "eval_steps_per_second": 11.139, | |
| "step": 1140000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.700651083481521e-05, | |
| "loss": 2.2516, | |
| "step": 1145000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_accuracy": 0.506862071459569, | |
| "eval_loss": 2.275390625, | |
| "eval_runtime": 39.8652, | |
| "eval_samples_per_second": 89.502, | |
| "eval_steps_per_second": 11.188, | |
| "step": 1145000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.693166349522847e-05, | |
| "loss": 2.2533, | |
| "step": 1150000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_accuracy": 0.5067752236673111, | |
| "eval_loss": 2.275390625, | |
| "eval_runtime": 39.9361, | |
| "eval_samples_per_second": 89.343, | |
| "eval_steps_per_second": 11.168, | |
| "step": 1150000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.6856846112551706e-05, | |
| "loss": 2.2532, | |
| "step": 1155000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_accuracy": 0.5069639874807674, | |
| "eval_loss": 2.275390625, | |
| "eval_runtime": 40.0505, | |
| "eval_samples_per_second": 89.088, | |
| "eval_steps_per_second": 11.136, | |
| "step": 1155000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.6781998772964966e-05, | |
| "loss": 2.2572, | |
| "step": 1160000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_accuracy": 0.5063629021299353, | |
| "eval_loss": 2.279296875, | |
| "eval_runtime": 40.0931, | |
| "eval_samples_per_second": 88.993, | |
| "eval_steps_per_second": 11.124, | |
| "step": 1160000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.670712147646825e-05, | |
| "loss": 2.2514, | |
| "step": 1165000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_accuracy": 0.5071686414265613, | |
| "eval_loss": 2.2734375, | |
| "eval_runtime": 40.4455, | |
| "eval_samples_per_second": 88.217, | |
| "eval_steps_per_second": 11.027, | |
| "step": 1165000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.66322891153365e-05, | |
| "loss": 2.2471, | |
| "step": 1170000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_accuracy": 0.5073409671720824, | |
| "eval_loss": 2.2734375, | |
| "eval_runtime": 40.0562, | |
| "eval_samples_per_second": 89.075, | |
| "eval_steps_per_second": 11.134, | |
| "step": 1170000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.655745675420475e-05, | |
| "loss": 2.2524, | |
| "step": 1175000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_accuracy": 0.50760562006584, | |
| "eval_loss": 2.271484375, | |
| "eval_runtime": 40.09, | |
| "eval_samples_per_second": 89.0, | |
| "eval_steps_per_second": 11.125, | |
| "step": 1175000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.648260941461802e-05, | |
| "loss": 2.247, | |
| "step": 1180000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_accuracy": 0.5072979542276519, | |
| "eval_loss": 2.271484375, | |
| "eval_runtime": 40.3075, | |
| "eval_samples_per_second": 88.52, | |
| "eval_steps_per_second": 11.065, | |
| "step": 1180000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.640780701039625e-05, | |
| "loss": 2.2491, | |
| "step": 1185000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_accuracy": 0.5076653450460047, | |
| "eval_loss": 2.271484375, | |
| "eval_runtime": 40.1522, | |
| "eval_samples_per_second": 88.862, | |
| "eval_steps_per_second": 11.108, | |
| "step": 1185000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.63329746492645e-05, | |
| "loss": 2.2481, | |
| "step": 1190000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_accuracy": 0.5078031508488619, | |
| "eval_loss": 2.26953125, | |
| "eval_runtime": 40.1488, | |
| "eval_samples_per_second": 88.869, | |
| "eval_steps_per_second": 11.109, | |
| "step": 1190000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.992515266041327e-05, | |
| "loss": 2.2465, | |
| "step": 1195000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_accuracy": 0.5069333030872883, | |
| "eval_loss": 2.2734375, | |
| "eval_runtime": 39.5233, | |
| "eval_samples_per_second": 90.276, | |
| "eval_steps_per_second": 11.284, | |
| "step": 1195000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.985030532082652e-05, | |
| "loss": 2.2494, | |
| "step": 1200000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_accuracy": 0.5067053618785863, | |
| "eval_loss": 2.279296875, | |
| "eval_runtime": 38.5856, | |
| "eval_samples_per_second": 92.47, | |
| "eval_steps_per_second": 11.559, | |
| "step": 1200000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.977545798123979e-05, | |
| "loss": 2.2541, | |
| "step": 1205000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_accuracy": 0.5068631673307646, | |
| "eval_loss": 2.275390625, | |
| "eval_runtime": 39.5375, | |
| "eval_samples_per_second": 90.244, | |
| "eval_steps_per_second": 11.28, | |
| "step": 1205000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.9700625620108035e-05, | |
| "loss": 2.25, | |
| "step": 1210000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_accuracy": 0.5067459091128265, | |
| "eval_loss": 2.275390625, | |
| "eval_runtime": 39.4824, | |
| "eval_samples_per_second": 90.369, | |
| "eval_steps_per_second": 11.296, | |
| "step": 1210000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.962582321588627e-05, | |
| "loss": 2.25, | |
| "step": 1215000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_accuracy": 0.5064313940796654, | |
| "eval_loss": 2.279296875, | |
| "eval_runtime": 39.3855, | |
| "eval_samples_per_second": 90.592, | |
| "eval_steps_per_second": 11.324, | |
| "step": 1215000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.955096089784455e-05, | |
| "loss": 2.2508, | |
| "step": 1220000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_accuracy": 0.507028095945715, | |
| "eval_loss": 2.2734375, | |
| "eval_runtime": 39.5116, | |
| "eval_samples_per_second": 90.303, | |
| "eval_steps_per_second": 11.288, | |
| "step": 1220000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.947612853671279e-05, | |
| "loss": 2.2496, | |
| "step": 1225000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_accuracy": 0.507010562006584, | |
| "eval_loss": 2.2734375, | |
| "eval_runtime": 39.5165, | |
| "eval_samples_per_second": 90.291, | |
| "eval_steps_per_second": 11.286, | |
| "step": 1225000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.940131115403604e-05, | |
| "loss": 2.2499, | |
| "step": 1230000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_accuracy": 0.5073475423992566, | |
| "eval_loss": 2.2734375, | |
| "eval_runtime": 39.4273, | |
| "eval_samples_per_second": 90.496, | |
| "eval_steps_per_second": 11.312, | |
| "step": 1230000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.932647879290429e-05, | |
| "loss": 2.2467, | |
| "step": 1235000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_accuracy": 0.5075538401518439, | |
| "eval_loss": 2.271484375, | |
| "eval_runtime": 39.5247, | |
| "eval_samples_per_second": 90.273, | |
| "eval_steps_per_second": 11.284, | |
| "step": 1235000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.925164643177254e-05, | |
| "loss": 2.2497, | |
| "step": 1240000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_accuracy": 0.5073163100701796, | |
| "eval_loss": 2.271484375, | |
| "eval_runtime": 38.6276, | |
| "eval_samples_per_second": 92.369, | |
| "eval_steps_per_second": 11.546, | |
| "step": 1240000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.917678411373081e-05, | |
| "loss": 2.2463, | |
| "step": 1245000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_accuracy": 0.5073409671720824, | |
| "eval_loss": 2.271484375, | |
| "eval_runtime": 39.5689, | |
| "eval_samples_per_second": 90.172, | |
| "eval_steps_per_second": 11.271, | |
| "step": 1245000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.910196673105406e-05, | |
| "loss": 2.2479, | |
| "step": 1250000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_accuracy": 0.5077573982264421, | |
| "eval_loss": 2.26953125, | |
| "eval_runtime": 39.6608, | |
| "eval_samples_per_second": 89.963, | |
| "eval_steps_per_second": 11.245, | |
| "step": 1250000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.902711939146731e-05, | |
| "loss": 2.2445, | |
| "step": 1255000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_accuracy": 0.5078749304121791, | |
| "eval_loss": 2.26953125, | |
| "eval_runtime": 39.6577, | |
| "eval_samples_per_second": 89.97, | |
| "eval_steps_per_second": 11.246, | |
| "step": 1255000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.89522420949706e-05, | |
| "loss": 2.247, | |
| "step": 1260000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_accuracy": 0.5078483555356837, | |
| "eval_loss": 2.26953125, | |
| "eval_runtime": 39.6203, | |
| "eval_samples_per_second": 90.055, | |
| "eval_steps_per_second": 11.257, | |
| "step": 1260000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.887743969074883e-05, | |
| "loss": 2.2443, | |
| "step": 1265000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_accuracy": 0.5078826015105489, | |
| "eval_loss": 2.267578125, | |
| "eval_runtime": 39.6508, | |
| "eval_samples_per_second": 89.986, | |
| "eval_steps_per_second": 11.248, | |
| "step": 1265000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.880262230807207e-05, | |
| "loss": 2.243, | |
| "step": 1270000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_accuracy": 0.5080672558070215, | |
| "eval_loss": 2.267578125, | |
| "eval_runtime": 39.6639, | |
| "eval_samples_per_second": 89.956, | |
| "eval_steps_per_second": 11.244, | |
| "step": 1270000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.8727804925395315e-05, | |
| "loss": 2.2454, | |
| "step": 1275000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_accuracy": 0.5076889062767118, | |
| "eval_loss": 2.271484375, | |
| "eval_runtime": 39.6373, | |
| "eval_samples_per_second": 90.016, | |
| "eval_steps_per_second": 11.252, | |
| "step": 1275000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.86529126504436e-05, | |
| "loss": 2.2451, | |
| "step": 1280000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_accuracy": 0.5080541053526733, | |
| "eval_loss": 2.26953125, | |
| "eval_runtime": 39.7457, | |
| "eval_samples_per_second": 89.771, | |
| "eval_steps_per_second": 11.221, | |
| "step": 1280000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.8578080289311855e-05, | |
| "loss": 2.2455, | |
| "step": 1285000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_accuracy": 0.5083853324215685, | |
| "eval_loss": 2.265625, | |
| "eval_runtime": 39.6403, | |
| "eval_samples_per_second": 90.009, | |
| "eval_steps_per_second": 11.251, | |
| "step": 1285000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.850324792818011e-05, | |
| "loss": 2.241, | |
| "step": 1290000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_accuracy": 0.508259307234065, | |
| "eval_loss": 2.267578125, | |
| "eval_runtime": 40.1652, | |
| "eval_samples_per_second": 88.833, | |
| "eval_steps_per_second": 11.104, | |
| "step": 1290000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.8428415567048354e-05, | |
| "loss": 2.243, | |
| "step": 1295000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_accuracy": 0.5085801235266012, | |
| "eval_loss": 2.263671875, | |
| "eval_runtime": 39.68, | |
| "eval_samples_per_second": 89.919, | |
| "eval_steps_per_second": 11.24, | |
| "step": 1295000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.8353553249006635e-05, | |
| "loss": 2.2408, | |
| "step": 1300000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_accuracy": 0.5084371123355645, | |
| "eval_loss": 2.263671875, | |
| "eval_runtime": 39.9223, | |
| "eval_samples_per_second": 89.374, | |
| "eval_steps_per_second": 11.172, | |
| "step": 1300000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.827876582323985e-05, | |
| "loss": 2.2508, | |
| "step": 1305000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_accuracy": 0.5063459161264021, | |
| "eval_loss": 2.279296875, | |
| "eval_runtime": 39.7588, | |
| "eval_samples_per_second": 89.741, | |
| "eval_steps_per_second": 11.218, | |
| "step": 1305000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.820396341901809e-05, | |
| "loss": 2.252, | |
| "step": 1310000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_accuracy": 0.504651699257876, | |
| "eval_loss": 2.291015625, | |
| "eval_runtime": 39.8037, | |
| "eval_samples_per_second": 89.64, | |
| "eval_steps_per_second": 11.205, | |
| "step": 1310000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.8129146036341324e-05, | |
| "loss": 2.7482, | |
| "step": 1315000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_accuracy": 0.4505951128528157, | |
| "eval_loss": 2.646484375, | |
| "eval_runtime": 39.8831, | |
| "eval_samples_per_second": 89.462, | |
| "eval_steps_per_second": 11.183, | |
| "step": 1315000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.805426873984461e-05, | |
| "loss": 2.4189, | |
| "step": 1320000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_accuracy": 0.5070354930762858, | |
| "eval_loss": 2.275390625, | |
| "eval_runtime": 39.9021, | |
| "eval_samples_per_second": 89.419, | |
| "eval_steps_per_second": 11.177, | |
| "step": 1320000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.797939144334789e-05, | |
| "loss": 2.2446, | |
| "step": 1325000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_accuracy": 0.5081412271127301, | |
| "eval_loss": 2.267578125, | |
| "eval_runtime": 39.8679, | |
| "eval_samples_per_second": 89.495, | |
| "eval_steps_per_second": 11.187, | |
| "step": 1325000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.790457406067113e-05, | |
| "loss": 2.2416, | |
| "step": 1330000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_accuracy": 0.5086858750969846, | |
| "eval_loss": 2.263671875, | |
| "eval_runtime": 39.8423, | |
| "eval_samples_per_second": 89.553, | |
| "eval_steps_per_second": 11.194, | |
| "step": 1330000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.78297267210844e-05, | |
| "loss": 2.2421, | |
| "step": 1335000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_accuracy": 0.5087765584384274, | |
| "eval_loss": 2.26171875, | |
| "eval_runtime": 39.9585, | |
| "eval_samples_per_second": 89.293, | |
| "eval_steps_per_second": 11.162, | |
| "step": 1335000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.775487938149766e-05, | |
| "loss": 2.2367, | |
| "step": 1340000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_accuracy": 0.509168606358683, | |
| "eval_loss": 2.26171875, | |
| "eval_runtime": 39.8472, | |
| "eval_samples_per_second": 89.542, | |
| "eval_steps_per_second": 11.193, | |
| "step": 1340000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.768001706345593e-05, | |
| "loss": 2.2355, | |
| "step": 1345000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_accuracy": 0.5090864160190068, | |
| "eval_loss": 2.259765625, | |
| "eval_runtime": 39.8651, | |
| "eval_samples_per_second": 89.502, | |
| "eval_steps_per_second": 11.188, | |
| "step": 1345000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.760518470232418e-05, | |
| "loss": 2.2379, | |
| "step": 1350000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_accuracy": 0.5093981913741786, | |
| "eval_loss": 2.259765625, | |
| "eval_runtime": 39.8769, | |
| "eval_samples_per_second": 89.475, | |
| "eval_steps_per_second": 11.184, | |
| "step": 1350000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.7530337362737444e-05, | |
| "loss": 2.2365, | |
| "step": 1355000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_accuracy": 0.509393259953798, | |
| "eval_loss": 2.259765625, | |
| "eval_runtime": 39.8496, | |
| "eval_samples_per_second": 89.537, | |
| "eval_steps_per_second": 11.192, | |
| "step": 1355000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.745550500160569e-05, | |
| "loss": 2.2379, | |
| "step": 1360000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_accuracy": 0.5091491546449597, | |
| "eval_loss": 2.2578125, | |
| "eval_runtime": 39.8235, | |
| "eval_samples_per_second": 89.595, | |
| "eval_steps_per_second": 11.199, | |
| "step": 1360000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.738067264047394e-05, | |
| "loss": 2.235, | |
| "step": 1365000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_accuracy": 0.5094527109661638, | |
| "eval_loss": 2.2578125, | |
| "eval_runtime": 38.8651, | |
| "eval_samples_per_second": 91.805, | |
| "eval_steps_per_second": 11.476, | |
| "step": 1365000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.730582530088721e-05, | |
| "loss": 2.236, | |
| "step": 1370000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_accuracy": 0.5093398362330085, | |
| "eval_loss": 2.2578125, | |
| "eval_runtime": 40.9049, | |
| "eval_samples_per_second": 87.227, | |
| "eval_steps_per_second": 10.903, | |
| "step": 1370000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.7230992939755456e-05, | |
| "loss": 2.2344, | |
| "step": 1375000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_accuracy": 0.5095472298567916, | |
| "eval_loss": 2.2578125, | |
| "eval_runtime": 39.9487, | |
| "eval_samples_per_second": 89.314, | |
| "eval_steps_per_second": 11.164, | |
| "step": 1375000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.715614560016872e-05, | |
| "loss": 2.2348, | |
| "step": 1380000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_accuracy": 0.5095688733129063, | |
| "eval_loss": 2.255859375, | |
| "eval_runtime": 39.951, | |
| "eval_samples_per_second": 89.309, | |
| "eval_steps_per_second": 11.164, | |
| "step": 1380000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.7081313239036975e-05, | |
| "loss": 2.2306, | |
| "step": 1385000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_accuracy": 0.5097368155736447, | |
| "eval_loss": 2.255859375, | |
| "eval_runtime": 40.0156, | |
| "eval_samples_per_second": 89.165, | |
| "eval_steps_per_second": 11.146, | |
| "step": 1385000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.7006495856360214e-05, | |
| "loss": 2.2293, | |
| "step": 1390000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_accuracy": 0.5097521577703843, | |
| "eval_loss": 2.255859375, | |
| "eval_runtime": 40.0042, | |
| "eval_samples_per_second": 89.191, | |
| "eval_steps_per_second": 11.149, | |
| "step": 1390000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.693166349522847e-05, | |
| "loss": 2.2311, | |
| "step": 1395000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_accuracy": 0.510102562585204, | |
| "eval_loss": 2.25390625, | |
| "eval_runtime": 39.9554, | |
| "eval_samples_per_second": 89.3, | |
| "eval_steps_per_second": 11.162, | |
| "step": 1395000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.685683113409672e-05, | |
| "loss": 2.231, | |
| "step": 1400000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_accuracy": 0.5101116035225683, | |
| "eval_loss": 2.25390625, | |
| "eval_runtime": 39.9882, | |
| "eval_samples_per_second": 89.226, | |
| "eval_steps_per_second": 11.153, | |
| "step": 1400000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.678201375141996e-05, | |
| "loss": 2.2272, | |
| "step": 1405000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_accuracy": 0.5102126976403701, | |
| "eval_loss": 2.251953125, | |
| "eval_runtime": 40.3186, | |
| "eval_samples_per_second": 88.495, | |
| "eval_steps_per_second": 11.062, | |
| "step": 1405000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.670718139028821e-05, | |
| "loss": 2.2264, | |
| "step": 1410000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_accuracy": 0.5102436560016482, | |
| "eval_loss": 2.25390625, | |
| "eval_runtime": 40.0007, | |
| "eval_samples_per_second": 89.198, | |
| "eval_steps_per_second": 11.15, | |
| "step": 1410000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.6632349029156465e-05, | |
| "loss": 2.2295, | |
| "step": 1415000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_accuracy": 0.5104469401084474, | |
| "eval_loss": 2.251953125, | |
| "eval_runtime": 40.1353, | |
| "eval_samples_per_second": 88.899, | |
| "eval_steps_per_second": 11.112, | |
| "step": 1415000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.655753164647971e-05, | |
| "loss": 2.2281, | |
| "step": 1420000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_accuracy": 0.5103937903554567, | |
| "eval_loss": 2.251953125, | |
| "eval_runtime": 40.0088, | |
| "eval_samples_per_second": 89.18, | |
| "eval_steps_per_second": 11.148, | |
| "step": 1420000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.6482699285347956e-05, | |
| "loss": 2.2234, | |
| "step": 1425000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_accuracy": 0.5106672102187797, | |
| "eval_loss": 2.25, | |
| "eval_runtime": 40.1918, | |
| "eval_samples_per_second": 88.774, | |
| "eval_steps_per_second": 11.097, | |
| "step": 1425000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.64078819026712e-05, | |
| "loss": 2.2293, | |
| "step": 1430000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_accuracy": 0.5107220037785639, | |
| "eval_loss": 2.25, | |
| "eval_runtime": 40.1338, | |
| "eval_samples_per_second": 88.903, | |
| "eval_steps_per_second": 11.113, | |
| "step": 1430000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.633306451999444e-05, | |
| "loss": 2.2256, | |
| "step": 1435000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_accuracy": 0.5108652889373995, | |
| "eval_loss": 2.25, | |
| "eval_runtime": 40.1228, | |
| "eval_samples_per_second": 88.927, | |
| "eval_steps_per_second": 11.116, | |
| "step": 1435000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.6258232158862694e-05, | |
| "loss": 2.2247, | |
| "step": 1440000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_accuracy": 0.5107954271486747, | |
| "eval_loss": 2.25, | |
| "eval_runtime": 40.0563, | |
| "eval_samples_per_second": 89.075, | |
| "eval_steps_per_second": 11.134, | |
| "step": 1440000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.6183384819275954e-05, | |
| "loss": 2.222, | |
| "step": 1445000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_accuracy": 0.510766386561989, | |
| "eval_loss": 2.25, | |
| "eval_runtime": 40.2217, | |
| "eval_samples_per_second": 88.708, | |
| "eval_steps_per_second": 11.089, | |
| "step": 1445000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.61085674365992e-05, | |
| "loss": 2.2228, | |
| "step": 1450000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_accuracy": 0.5106184439505719, | |
| "eval_loss": 2.248046875, | |
| "eval_runtime": 40.5305, | |
| "eval_samples_per_second": 88.032, | |
| "eval_steps_per_second": 11.004, | |
| "step": 1450000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.603372009701246e-05, | |
| "loss": 2.2241, | |
| "step": 1455000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_accuracy": 0.5110554225898505, | |
| "eval_loss": 2.248046875, | |
| "eval_runtime": 40.2413, | |
| "eval_samples_per_second": 88.665, | |
| "eval_steps_per_second": 11.083, | |
| "step": 1455000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.595891769279069e-05, | |
| "loss": 2.2219, | |
| "step": 1460000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_accuracy": 0.511077613981563, | |
| "eval_loss": 2.24609375, | |
| "eval_runtime": 40.2762, | |
| "eval_samples_per_second": 88.588, | |
| "eval_steps_per_second": 11.074, | |
| "step": 1460000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.5884085331658944e-05, | |
| "loss": 2.2219, | |
| "step": 1465000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_accuracy": 0.5112833637985525, | |
| "eval_loss": 2.24609375, | |
| "eval_runtime": 40.2334, | |
| "eval_samples_per_second": 88.682, | |
| "eval_steps_per_second": 11.085, | |
| "step": 1465000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.58092529705272e-05, | |
| "loss": 2.2215, | |
| "step": 1470000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_accuracy": 0.5112595286000464, | |
| "eval_loss": 2.24609375, | |
| "eval_runtime": 40.2987, | |
| "eval_samples_per_second": 88.539, | |
| "eval_steps_per_second": 11.067, | |
| "step": 1470000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.573445056630543e-05, | |
| "loss": 2.2193, | |
| "step": 1475000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_accuracy": 0.5116091115114694, | |
| "eval_loss": 2.244140625, | |
| "eval_runtime": 40.1594, | |
| "eval_samples_per_second": 88.846, | |
| "eval_steps_per_second": 11.106, | |
| "step": 1475000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.56595882482637e-05, | |
| "loss": 2.2183, | |
| "step": 1480000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_accuracy": 0.5114707577730144, | |
| "eval_loss": 2.244140625, | |
| "eval_runtime": 40.9626, | |
| "eval_samples_per_second": 87.104, | |
| "eval_steps_per_second": 10.888, | |
| "step": 1480000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.5584755887131956e-05, | |
| "loss": 2.2177, | |
| "step": 1485000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_accuracy": 0.5116211660946219, | |
| "eval_loss": 2.244140625, | |
| "eval_runtime": 40.3714, | |
| "eval_samples_per_second": 88.379, | |
| "eval_steps_per_second": 11.047, | |
| "step": 1485000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.55099235260002e-05, | |
| "loss": 2.2211, | |
| "step": 1490000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_accuracy": 0.5115852763129632, | |
| "eval_loss": 2.2421875, | |
| "eval_runtime": 40.2564, | |
| "eval_samples_per_second": 88.632, | |
| "eval_steps_per_second": 11.079, | |
| "step": 1490000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.5435091164868455e-05, | |
| "loss": 2.2183, | |
| "step": 1495000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_accuracy": 0.5118184229098449, | |
| "eval_loss": 2.2421875, | |
| "eval_runtime": 40.2315, | |
| "eval_samples_per_second": 88.687, | |
| "eval_steps_per_second": 11.086, | |
| "step": 1495000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.5360243825281715e-05, | |
| "loss": 2.2182, | |
| "step": 1500000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_accuracy": 0.5120186933708559, | |
| "eval_loss": 2.240234375, | |
| "eval_runtime": 40.3125, | |
| "eval_samples_per_second": 88.509, | |
| "eval_steps_per_second": 11.064, | |
| "step": 1500000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.528539648569498e-05, | |
| "loss": 2.2148, | |
| "step": 1505000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_accuracy": 0.5121849918248009, | |
| "eval_loss": 2.240234375, | |
| "eval_runtime": 40.3172, | |
| "eval_samples_per_second": 88.498, | |
| "eval_steps_per_second": 11.062, | |
| "step": 1505000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.521059408147321e-05, | |
| "loss": 2.2217, | |
| "step": 1510000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_accuracy": 0.5122685520034718, | |
| "eval_loss": 2.240234375, | |
| "eval_runtime": 40.2766, | |
| "eval_samples_per_second": 88.587, | |
| "eval_steps_per_second": 11.073, | |
| "step": 1510000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.5135761720341466e-05, | |
| "loss": 2.2117, | |
| "step": 1515000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_accuracy": 0.5123701940568713, | |
| "eval_loss": 2.23828125, | |
| "eval_runtime": 40.4763, | |
| "eval_samples_per_second": 88.15, | |
| "eval_steps_per_second": 11.019, | |
| "step": 1515000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.5060944337664705e-05, | |
| "loss": 2.2152, | |
| "step": 1520000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_accuracy": 0.5123003322681465, | |
| "eval_loss": 2.23828125, | |
| "eval_runtime": 40.309, | |
| "eval_samples_per_second": 88.516, | |
| "eval_steps_per_second": 11.065, | |
| "step": 1520000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.4986096998077965e-05, | |
| "loss": 2.2148, | |
| "step": 1525000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_accuracy": 0.5124967671799727, | |
| "eval_loss": 2.23828125, | |
| "eval_runtime": 40.2971, | |
| "eval_samples_per_second": 88.542, | |
| "eval_steps_per_second": 11.068, | |
| "step": 1525000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.491127961540121e-05, | |
| "loss": 2.2151, | |
| "step": 1530000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_accuracy": 0.5127488175549798, | |
| "eval_loss": 2.236328125, | |
| "eval_runtime": 40.3675, | |
| "eval_samples_per_second": 88.388, | |
| "eval_steps_per_second": 11.049, | |
| "step": 1530000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.483646223272445e-05, | |
| "loss": 2.2129, | |
| "step": 1535000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_accuracy": 0.5127022430291633, | |
| "eval_loss": 2.236328125, | |
| "eval_runtime": 40.4605, | |
| "eval_samples_per_second": 88.185, | |
| "eval_steps_per_second": 11.023, | |
| "step": 1535000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.47616298715927e-05, | |
| "loss": 2.2145, | |
| "step": 1540000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_accuracy": 0.5127690911721, | |
| "eval_loss": 2.236328125, | |
| "eval_runtime": 40.376, | |
| "eval_samples_per_second": 88.369, | |
| "eval_steps_per_second": 11.046, | |
| "step": 1540000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.468681248891595e-05, | |
| "loss": 2.2099, | |
| "step": 1545000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_accuracy": 0.5128871712934349, | |
| "eval_loss": 2.236328125, | |
| "eval_runtime": 40.3585, | |
| "eval_samples_per_second": 88.408, | |
| "eval_steps_per_second": 11.051, | |
| "step": 1545000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.46119651493292e-05, | |
| "loss": 2.2125, | |
| "step": 1550000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_accuracy": 0.5131964809384164, | |
| "eval_loss": 2.234375, | |
| "eval_runtime": 40.3163, | |
| "eval_samples_per_second": 88.5, | |
| "eval_steps_per_second": 11.063, | |
| "step": 1550000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.453713278819746e-05, | |
| "loss": 2.2101, | |
| "step": 1555000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_accuracy": 0.5130504561015916, | |
| "eval_loss": 2.234375, | |
| "eval_runtime": 40.2993, | |
| "eval_samples_per_second": 88.537, | |
| "eval_steps_per_second": 11.067, | |
| "step": 1555000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.4462300427065714e-05, | |
| "loss": 2.211, | |
| "step": 1560000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_accuracy": 0.5132159326521398, | |
| "eval_loss": 2.234375, | |
| "eval_runtime": 40.3465, | |
| "eval_samples_per_second": 88.434, | |
| "eval_steps_per_second": 11.054, | |
| "step": 1560000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.438743810902398e-05, | |
| "loss": 2.2086, | |
| "step": 1565000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_accuracy": 0.5131567556075729, | |
| "eval_loss": 2.234375, | |
| "eval_runtime": 40.256, | |
| "eval_samples_per_second": 88.633, | |
| "eval_steps_per_second": 11.079, | |
| "step": 1565000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.4312605747892234e-05, | |
| "loss": 2.2137, | |
| "step": 1570000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_accuracy": 0.5131545638651815, | |
| "eval_loss": 2.232421875, | |
| "eval_runtime": 40.2935, | |
| "eval_samples_per_second": 88.55, | |
| "eval_steps_per_second": 11.069, | |
| "step": 1570000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.423778836521548e-05, | |
| "loss": 2.2122, | |
| "step": 1575000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_accuracy": 0.5134236002437218, | |
| "eval_loss": 2.232421875, | |
| "eval_runtime": 40.3698, | |
| "eval_samples_per_second": 88.383, | |
| "eval_steps_per_second": 11.048, | |
| "step": 1575000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.416297098253872e-05, | |
| "loss": 2.2053, | |
| "step": 1580000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_accuracy": 0.5133767517501063, | |
| "eval_loss": 2.232421875, | |
| "eval_runtime": 40.3058, | |
| "eval_samples_per_second": 88.523, | |
| "eval_steps_per_second": 11.065, | |
| "step": 1580000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.408813862140697e-05, | |
| "loss": 2.208, | |
| "step": 1585000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_accuracy": 0.513388258397661, | |
| "eval_loss": 2.23046875, | |
| "eval_runtime": 40.3539, | |
| "eval_samples_per_second": 88.418, | |
| "eval_steps_per_second": 11.052, | |
| "step": 1585000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.401329128182023e-05, | |
| "loss": 2.2081, | |
| "step": 1590000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_accuracy": 0.5135512692380189, | |
| "eval_loss": 2.23046875, | |
| "eval_runtime": 42.8555, | |
| "eval_samples_per_second": 83.257, | |
| "eval_steps_per_second": 10.407, | |
| "step": 1590000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.3938458920688484e-05, | |
| "loss": 2.2077, | |
| "step": 1595000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_accuracy": 0.5137540054092202, | |
| "eval_loss": 2.23046875, | |
| "eval_runtime": 40.2657, | |
| "eval_samples_per_second": 88.611, | |
| "eval_steps_per_second": 11.076, | |
| "step": 1595000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.3863611581101744e-05, | |
| "loss": 2.2061, | |
| "step": 1600000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_accuracy": 0.5135893507620688, | |
| "eval_loss": 2.23046875, | |
| "eval_runtime": 41.6767, | |
| "eval_samples_per_second": 85.611, | |
| "eval_steps_per_second": 10.701, | |
| "step": 1600000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.378880917687998e-05, | |
| "loss": 2.2055, | |
| "step": 1605000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_accuracy": 0.5138961946968601, | |
| "eval_loss": 2.228515625, | |
| "eval_runtime": 40.3609, | |
| "eval_samples_per_second": 88.402, | |
| "eval_steps_per_second": 11.05, | |
| "step": 1605000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.371397681574823e-05, | |
| "loss": 2.2065, | |
| "step": 1610000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_accuracy": 0.5138923591476752, | |
| "eval_loss": 2.228515625, | |
| "eval_runtime": 40.453, | |
| "eval_samples_per_second": 88.201, | |
| "eval_steps_per_second": 11.025, | |
| "step": 1610000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.363914445461648e-05, | |
| "loss": 2.2054, | |
| "step": 1615000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_accuracy": 0.5138997562782461, | |
| "eval_loss": 2.228515625, | |
| "eval_runtime": 41.6251, | |
| "eval_samples_per_second": 85.718, | |
| "eval_steps_per_second": 10.715, | |
| "step": 1615000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.356432707193973e-05, | |
| "loss": 2.2035, | |
| "step": 1620000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_accuracy": 0.5140274252725432, | |
| "eval_loss": 2.228515625, | |
| "eval_runtime": 40.4365, | |
| "eval_samples_per_second": 88.237, | |
| "eval_steps_per_second": 11.03, | |
| "step": 1620000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.348947973235298e-05, | |
| "loss": 2.2021, | |
| "step": 1625000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_accuracy": 0.5139844123281126, | |
| "eval_loss": 2.228515625, | |
| "eval_runtime": 40.3492, | |
| "eval_samples_per_second": 88.428, | |
| "eval_steps_per_second": 11.054, | |
| "step": 1625000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.3414662349676226e-05, | |
| "loss": 2.2036, | |
| "step": 1630000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_accuracy": 0.5138233192623471, | |
| "eval_loss": 2.228515625, | |
| "eval_runtime": 40.3662, | |
| "eval_samples_per_second": 88.391, | |
| "eval_steps_per_second": 11.049, | |
| "step": 1630000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.333981501008949e-05, | |
| "loss": 2.204, | |
| "step": 1635000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_accuracy": 0.5139803028111288, | |
| "eval_loss": 2.2265625, | |
| "eval_runtime": 40.2896, | |
| "eval_samples_per_second": 88.559, | |
| "eval_steps_per_second": 11.07, | |
| "step": 1635000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.3264967670502746e-05, | |
| "loss": 2.2042, | |
| "step": 1640000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_accuracy": 0.5140860543815122, | |
| "eval_loss": 2.2265625, | |
| "eval_runtime": 42.4068, | |
| "eval_samples_per_second": 84.137, | |
| "eval_steps_per_second": 10.517, | |
| "step": 1640000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.3190135309371006e-05, | |
| "loss": 2.2024, | |
| "step": 1645000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_accuracy": 0.514173450109368, | |
| "eval_loss": 2.2265625, | |
| "eval_runtime": 40.3018, | |
| "eval_samples_per_second": 88.532, | |
| "eval_steps_per_second": 11.067, | |
| "step": 1645000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.311530294823926e-05, | |
| "loss": 2.2023, | |
| "step": 1650000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_accuracy": 0.5144172814504074, | |
| "eval_loss": 2.2265625, | |
| "eval_runtime": 40.2694, | |
| "eval_samples_per_second": 88.603, | |
| "eval_steps_per_second": 11.075, | |
| "step": 1650000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.30404855655625e-05, | |
| "loss": 2.1976, | |
| "step": 1655000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_accuracy": 0.5145805662585642, | |
| "eval_loss": 2.224609375, | |
| "eval_runtime": 40.3729, | |
| "eval_samples_per_second": 88.376, | |
| "eval_steps_per_second": 11.047, | |
| "step": 1655000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.296565320443075e-05, | |
| "loss": 2.2028, | |
| "step": 1660000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_accuracy": 0.5147172761902257, | |
| "eval_loss": 2.224609375, | |
| "eval_runtime": 40.282, | |
| "eval_samples_per_second": 88.576, | |
| "eval_steps_per_second": 11.072, | |
| "step": 1660000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.289080586484401e-05, | |
| "loss": 2.1971, | |
| "step": 1665000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_accuracy": 0.5146457705947074, | |
| "eval_loss": 2.224609375, | |
| "eval_runtime": 40.4909, | |
| "eval_samples_per_second": 88.119, | |
| "eval_steps_per_second": 11.015, | |
| "step": 1665000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.281595852525727e-05, | |
| "loss": 2.1978, | |
| "step": 1670000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_accuracy": 0.5146065931994617, | |
| "eval_loss": 2.224609375, | |
| "eval_runtime": 40.3534, | |
| "eval_samples_per_second": 88.419, | |
| "eval_steps_per_second": 11.052, | |
| "step": 1670000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.27411561210355e-05, | |
| "loss": 2.1955, | |
| "step": 1675000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_accuracy": 0.5148249455352015, | |
| "eval_loss": 2.22265625, | |
| "eval_runtime": 39.3164, | |
| "eval_samples_per_second": 90.751, | |
| "eval_steps_per_second": 11.344, | |
| "step": 1675000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.266630878144877e-05, | |
| "loss": 2.1967, | |
| "step": 1680000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_accuracy": 0.5146874137001434, | |
| "eval_loss": 2.22265625, | |
| "eval_runtime": 40.3287, | |
| "eval_samples_per_second": 88.473, | |
| "eval_steps_per_second": 11.059, | |
| "step": 1680000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.259149139877201e-05, | |
| "loss": 2.1975, | |
| "step": 1685000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_accuracy": 0.5151745284466245, | |
| "eval_loss": 2.22265625, | |
| "eval_runtime": 40.2734, | |
| "eval_samples_per_second": 88.594, | |
| "eval_steps_per_second": 11.074, | |
| "step": 1685000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.251668899455024e-05, | |
| "loss": 2.1972, | |
| "step": 1690000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_accuracy": 0.5148622051558548, | |
| "eval_loss": 2.220703125, | |
| "eval_runtime": 40.2657, | |
| "eval_samples_per_second": 88.611, | |
| "eval_steps_per_second": 11.076, | |
| "step": 1690000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.2441841654963506e-05, | |
| "loss": 2.1967, | |
| "step": 1695000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_accuracy": 0.5150745302000184, | |
| "eval_loss": 2.220703125, | |
| "eval_runtime": 40.2491, | |
| "eval_samples_per_second": 88.648, | |
| "eval_steps_per_second": 11.081, | |
| "step": 1695000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.236699431537676e-05, | |
| "loss": 2.194, | |
| "step": 1700000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_accuracy": 0.5150706946508335, | |
| "eval_loss": 2.220703125, | |
| "eval_runtime": 40.3323, | |
| "eval_samples_per_second": 88.465, | |
| "eval_steps_per_second": 11.058, | |
| "step": 1700000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.9925167638868255e-05, | |
| "loss": 2.2009, | |
| "step": 1705000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_accuracy": 0.5139096191190072, | |
| "eval_loss": 2.228515625, | |
| "eval_runtime": 39.4304, | |
| "eval_samples_per_second": 90.489, | |
| "eval_steps_per_second": 11.311, | |
| "step": 1705000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.9850350256191494e-05, | |
| "loss": 2.2085, | |
| "step": 1710000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_accuracy": 0.5136279802217166, | |
| "eval_loss": 2.23046875, | |
| "eval_runtime": 39.2325, | |
| "eval_samples_per_second": 90.945, | |
| "eval_steps_per_second": 11.368, | |
| "step": 1710000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.977551789505975e-05, | |
| "loss": 2.2077, | |
| "step": 1715000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_accuracy": 0.5136983899460393, | |
| "eval_loss": 2.23046875, | |
| "eval_runtime": 39.2977, | |
| "eval_samples_per_second": 90.794, | |
| "eval_steps_per_second": 11.349, | |
| "step": 1715000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.9700670555473014e-05, | |
| "loss": 2.205, | |
| "step": 1720000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_accuracy": 0.51339866917402, | |
| "eval_loss": 2.23046875, | |
| "eval_runtime": 39.3375, | |
| "eval_samples_per_second": 90.702, | |
| "eval_steps_per_second": 11.338, | |
| "step": 1720000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.962583819434126e-05, | |
| "loss": 2.2063, | |
| "step": 1725000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_accuracy": 0.5134490792490214, | |
| "eval_loss": 2.23046875, | |
| "eval_runtime": 39.3134, | |
| "eval_samples_per_second": 90.758, | |
| "eval_steps_per_second": 11.345, | |
| "step": 1725000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.9551020811664506e-05, | |
| "loss": 2.2076, | |
| "step": 1730000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_accuracy": 0.5134718185763318, | |
| "eval_loss": 2.23046875, | |
| "eval_runtime": 39.2618, | |
| "eval_samples_per_second": 90.877, | |
| "eval_steps_per_second": 11.36, | |
| "step": 1730000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.9476173472077765e-05, | |
| "loss": 2.2036, | |
| "step": 1735000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_accuracy": 0.5133134651885556, | |
| "eval_loss": 2.23046875, | |
| "eval_runtime": 39.3642, | |
| "eval_samples_per_second": 90.641, | |
| "eval_steps_per_second": 11.33, | |
| "step": 1735000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.9401356089401005e-05, | |
| "loss": 2.2064, | |
| "step": 1740000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_accuracy": 0.5138024977096292, | |
| "eval_loss": 2.23046875, | |
| "eval_runtime": 39.4343, | |
| "eval_samples_per_second": 90.48, | |
| "eval_steps_per_second": 11.31, | |
| "step": 1740000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.932650874981427e-05, | |
| "loss": 2.2053, | |
| "step": 1745000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_accuracy": 0.5136904448798706, | |
| "eval_loss": 2.23046875, | |
| "eval_runtime": 39.4148, | |
| "eval_samples_per_second": 90.524, | |
| "eval_steps_per_second": 11.316, | |
| "step": 1745000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.9251676388682524e-05, | |
| "loss": 2.2048, | |
| "step": 1750000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_accuracy": 0.5138953727934633, | |
| "eval_loss": 2.23046875, | |
| "eval_runtime": 39.3715, | |
| "eval_samples_per_second": 90.624, | |
| "eval_steps_per_second": 11.328, | |
| "step": 1750000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.917684402755077e-05, | |
| "loss": 2.2075, | |
| "step": 1755000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_accuracy": 0.5137868815450907, | |
| "eval_loss": 2.23046875, | |
| "eval_runtime": 39.4167, | |
| "eval_samples_per_second": 90.52, | |
| "eval_steps_per_second": 11.315, | |
| "step": 1755000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.910201166641902e-05, | |
| "loss": 2.2041, | |
| "step": 1760000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_accuracy": 0.5136414046438638, | |
| "eval_loss": 2.228515625, | |
| "eval_runtime": 39.46, | |
| "eval_samples_per_second": 90.421, | |
| "eval_steps_per_second": 11.303, | |
| "step": 1760000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.902716432683229e-05, | |
| "loss": 2.2057, | |
| "step": 1765000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_accuracy": 0.5138789347255281, | |
| "eval_loss": 2.228515625, | |
| "eval_runtime": 39.4616, | |
| "eval_samples_per_second": 90.417, | |
| "eval_steps_per_second": 11.302, | |
| "step": 1765000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.8952331965700536e-05, | |
| "loss": 2.2054, | |
| "step": 1770000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_accuracy": 0.5138929070832731, | |
| "eval_loss": 2.228515625, | |
| "eval_runtime": 39.5693, | |
| "eval_samples_per_second": 90.171, | |
| "eval_steps_per_second": 11.271, | |
| "step": 1770000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.887752956147877e-05, | |
| "loss": 2.2085, | |
| "step": 1775000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_accuracy": 0.5139266051225403, | |
| "eval_loss": 2.228515625, | |
| "eval_runtime": 39.4709, | |
| "eval_samples_per_second": 90.396, | |
| "eval_steps_per_second": 11.299, | |
| "step": 1775000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.880269720034703e-05, | |
| "loss": 2.2051, | |
| "step": 1780000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_accuracy": 0.5141471492006715, | |
| "eval_loss": 2.2265625, | |
| "eval_runtime": 39.4552, | |
| "eval_samples_per_second": 90.432, | |
| "eval_steps_per_second": 11.304, | |
| "step": 1780000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.872786483921527e-05, | |
| "loss": 2.2023, | |
| "step": 1785000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_accuracy": 0.5139211257665619, | |
| "eval_loss": 2.2265625, | |
| "eval_runtime": 39.5212, | |
| "eval_samples_per_second": 90.281, | |
| "eval_steps_per_second": 11.285, | |
| "step": 1785000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.8653032478083526e-05, | |
| "loss": 2.205, | |
| "step": 1790000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_accuracy": 0.5140561918914298, | |
| "eval_loss": 2.2265625, | |
| "eval_runtime": 39.4725, | |
| "eval_samples_per_second": 90.392, | |
| "eval_steps_per_second": 11.299, | |
| "step": 1790000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.857818513849679e-05, | |
| "loss": 2.2009, | |
| "step": 1795000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_accuracy": 0.5141463272972748, | |
| "eval_loss": 2.2265625, | |
| "eval_runtime": 39.6114, | |
| "eval_samples_per_second": 90.075, | |
| "eval_steps_per_second": 11.259, | |
| "step": 1795000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.850335277736504e-05, | |
| "loss": 2.1998, | |
| "step": 1800000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_accuracy": 0.5143134476546165, | |
| "eval_loss": 2.2265625, | |
| "eval_runtime": 39.8498, | |
| "eval_samples_per_second": 89.536, | |
| "eval_steps_per_second": 11.192, | |
| "step": 1800000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.8428505437778305e-05, | |
| "loss": 2.2009, | |
| "step": 1805000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_accuracy": 0.5143608440838298, | |
| "eval_loss": 2.224609375, | |
| "eval_runtime": 38.3896, | |
| "eval_samples_per_second": 92.942, | |
| "eval_steps_per_second": 11.618, | |
| "step": 1805000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.8353688055101544e-05, | |
| "loss": 2.2027, | |
| "step": 1810000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_accuracy": 0.5143013930714639, | |
| "eval_loss": 2.2265625, | |
| "eval_runtime": 39.6231, | |
| "eval_samples_per_second": 90.048, | |
| "eval_steps_per_second": 11.256, | |
| "step": 1810000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.8278840715514804e-05, | |
| "loss": 2.2007, | |
| "step": 1815000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_accuracy": 0.5145857716467437, | |
| "eval_loss": 2.224609375, | |
| "eval_runtime": 39.6832, | |
| "eval_samples_per_second": 89.912, | |
| "eval_steps_per_second": 11.239, | |
| "step": 1815000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.820402333283805e-05, | |
| "loss": 2.1978, | |
| "step": 1820000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_accuracy": 0.5144972800476924, | |
| "eval_loss": 2.224609375, | |
| "eval_runtime": 39.721, | |
| "eval_samples_per_second": 89.827, | |
| "eval_steps_per_second": 11.228, | |
| "step": 1820000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.812920595016129e-05, | |
| "loss": 2.1999, | |
| "step": 1825000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_accuracy": 0.5145970043264995, | |
| "eval_loss": 2.22265625, | |
| "eval_runtime": 39.6521, | |
| "eval_samples_per_second": 89.983, | |
| "eval_steps_per_second": 11.248, | |
| "step": 1825000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.8054388567484535e-05, | |
| "loss": 2.1978, | |
| "step": 1830000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_accuracy": 0.5148150826944404, | |
| "eval_loss": 2.22265625, | |
| "eval_runtime": 39.5596, | |
| "eval_samples_per_second": 90.193, | |
| "eval_steps_per_second": 11.274, | |
| "step": 1830000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.79795262494428e-05, | |
| "loss": 2.1989, | |
| "step": 1835000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_accuracy": 0.5147271390309869, | |
| "eval_loss": 2.22265625, | |
| "eval_runtime": 39.6103, | |
| "eval_samples_per_second": 90.077, | |
| "eval_steps_per_second": 11.26, | |
| "step": 1835000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.790467890985607e-05, | |
| "loss": 2.1989, | |
| "step": 1840000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_accuracy": 0.5147980966909074, | |
| "eval_loss": 2.22265625, | |
| "eval_runtime": 39.6511, | |
| "eval_samples_per_second": 89.985, | |
| "eval_steps_per_second": 11.248, | |
| "step": 1840000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.7829876505634294e-05, | |
| "loss": 2.1982, | |
| "step": 1845000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_accuracy": 0.5149764497280047, | |
| "eval_loss": 2.220703125, | |
| "eval_runtime": 39.6684, | |
| "eval_samples_per_second": 89.946, | |
| "eval_steps_per_second": 11.243, | |
| "step": 1845000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.775501418759257e-05, | |
| "loss": 2.1974, | |
| "step": 1850000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_accuracy": 0.515062201649067, | |
| "eval_loss": 2.220703125, | |
| "eval_runtime": 39.5826, | |
| "eval_samples_per_second": 90.141, | |
| "eval_steps_per_second": 11.268, | |
| "step": 1850000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.768016684800583e-05, | |
| "loss": 2.1972, | |
| "step": 1855000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_accuracy": 0.5151389126327648, | |
| "eval_loss": 2.220703125, | |
| "eval_runtime": 39.774, | |
| "eval_samples_per_second": 89.707, | |
| "eval_steps_per_second": 11.213, | |
| "step": 1855000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.760533448687408e-05, | |
| "loss": 2.1966, | |
| "step": 1860000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_accuracy": 0.515106858400291, | |
| "eval_loss": 2.220703125, | |
| "eval_runtime": 39.7218, | |
| "eval_samples_per_second": 89.825, | |
| "eval_steps_per_second": 11.228, | |
| "step": 1860000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.753050212574233e-05, | |
| "loss": 2.198, | |
| "step": 1865000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_accuracy": 0.5150282296420008, | |
| "eval_loss": 2.220703125, | |
| "eval_runtime": 39.6783, | |
| "eval_samples_per_second": 89.923, | |
| "eval_steps_per_second": 11.24, | |
| "step": 1865000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.7455669764610586e-05, | |
| "loss": 2.1978, | |
| "step": 1870000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_accuracy": 0.5151887747721684, | |
| "eval_loss": 2.220703125, | |
| "eval_runtime": 39.753, | |
| "eval_samples_per_second": 89.754, | |
| "eval_steps_per_second": 11.219, | |
| "step": 1870000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.738083740347884e-05, | |
| "loss": 2.1938, | |
| "step": 1875000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_accuracy": 0.5152000074519242, | |
| "eval_loss": 2.220703125, | |
| "eval_runtime": 39.7549, | |
| "eval_samples_per_second": 89.75, | |
| "eval_steps_per_second": 11.219, | |
| "step": 1875000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.730600504234709e-05, | |
| "loss": 2.1908, | |
| "step": 1880000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_accuracy": 0.5152191851978486, | |
| "eval_loss": 2.21875, | |
| "eval_runtime": 39.6445, | |
| "eval_samples_per_second": 90.0, | |
| "eval_steps_per_second": 11.25, | |
| "step": 1880000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.723118765967034e-05, | |
| "loss": 2.1899, | |
| "step": 1885000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_accuracy": 0.5151602821210807, | |
| "eval_loss": 2.21875, | |
| "eval_runtime": 39.7932, | |
| "eval_samples_per_second": 89.664, | |
| "eval_steps_per_second": 11.208, | |
| "step": 1885000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.715634032008359e-05, | |
| "loss": 2.1938, | |
| "step": 1890000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_accuracy": 0.5151682271872493, | |
| "eval_loss": 2.21875, | |
| "eval_runtime": 39.7818, | |
| "eval_samples_per_second": 89.689, | |
| "eval_steps_per_second": 11.211, | |
| "step": 1890000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.708150795895185e-05, | |
| "loss": 2.1909, | |
| "step": 1895000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_accuracy": 0.5153520595803252, | |
| "eval_loss": 2.21875, | |
| "eval_runtime": 39.8181, | |
| "eval_samples_per_second": 89.607, | |
| "eval_steps_per_second": 11.201, | |
| "step": 1895000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.700669057627509e-05, | |
| "loss": 2.1921, | |
| "step": 1900000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_accuracy": 0.5155320564242162, | |
| "eval_loss": 2.21875, | |
| "eval_runtime": 40.2538, | |
| "eval_samples_per_second": 88.638, | |
| "eval_steps_per_second": 11.08, | |
| "step": 1900000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.693187319359833e-05, | |
| "loss": 2.1926, | |
| "step": 1905000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_accuracy": 0.5155928772755766, | |
| "eval_loss": 2.216796875, | |
| "eval_runtime": 39.9117, | |
| "eval_samples_per_second": 89.397, | |
| "eval_steps_per_second": 11.175, | |
| "step": 1905000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.685704083246658e-05, | |
| "loss": 2.194, | |
| "step": 1910000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_accuracy": 0.5154164420130716, | |
| "eval_loss": 2.216796875, | |
| "eval_runtime": 39.9039, | |
| "eval_samples_per_second": 89.415, | |
| "eval_steps_per_second": 11.177, | |
| "step": 1910000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.992518261732325e-05, | |
| "loss": 2.1942, | |
| "step": 1915000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_accuracy": 0.5151652135414612, | |
| "eval_loss": 2.21875, | |
| "eval_runtime": 39.5459, | |
| "eval_samples_per_second": 90.224, | |
| "eval_steps_per_second": 11.278, | |
| "step": 1915000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.985036523464649e-05, | |
| "loss": 2.1947, | |
| "step": 1920000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_accuracy": 0.5150559003896918, | |
| "eval_loss": 2.21875, | |
| "eval_runtime": 39.6198, | |
| "eval_samples_per_second": 90.056, | |
| "eval_steps_per_second": 11.257, | |
| "step": 1920000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.977553287351474e-05, | |
| "loss": 2.1941, | |
| "step": 1925000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_accuracy": 0.5150868587509698, | |
| "eval_loss": 2.220703125, | |
| "eval_runtime": 39.5078, | |
| "eval_samples_per_second": 90.311, | |
| "eval_steps_per_second": 11.289, | |
| "step": 1925000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.970071549083798e-05, | |
| "loss": 2.1984, | |
| "step": 1930000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_accuracy": 0.5151849392229835, | |
| "eval_loss": 2.220703125, | |
| "eval_runtime": 39.6089, | |
| "eval_samples_per_second": 90.081, | |
| "eval_steps_per_second": 11.26, | |
| "step": 1930000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.9625868151251246e-05, | |
| "loss": 2.1929, | |
| "step": 1935000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_accuracy": 0.5150846670085785, | |
| "eval_loss": 2.220703125, | |
| "eval_runtime": 39.574, | |
| "eval_samples_per_second": 90.16, | |
| "eval_steps_per_second": 11.27, | |
| "step": 1935000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.955106574702947e-05, | |
| "loss": 2.1921, | |
| "step": 1940000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_accuracy": 0.515442742921768, | |
| "eval_loss": 2.21875, | |
| "eval_runtime": 39.6485, | |
| "eval_samples_per_second": 89.991, | |
| "eval_steps_per_second": 11.249, | |
| "step": 1940000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.947621840744274e-05, | |
| "loss": 2.1932, | |
| "step": 1945000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_accuracy": 0.5153391830937759, | |
| "eval_loss": 2.21875, | |
| "eval_runtime": 39.6644, | |
| "eval_samples_per_second": 89.955, | |
| "eval_steps_per_second": 11.244, | |
| "step": 1945000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.940138604631099e-05, | |
| "loss": 2.1959, | |
| "step": 1950000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_accuracy": 0.5154117845604899, | |
| "eval_loss": 2.21875, | |
| "eval_runtime": 39.6108, | |
| "eval_samples_per_second": 90.077, | |
| "eval_steps_per_second": 11.26, | |
| "step": 1950000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.932656866363423e-05, | |
| "loss": 2.1927, | |
| "step": 1955000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_accuracy": 0.5153761687466302, | |
| "eval_loss": 2.21875, | |
| "eval_runtime": 39.6613, | |
| "eval_samples_per_second": 89.962, | |
| "eval_steps_per_second": 11.245, | |
| "step": 1955000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.9251751280957475e-05, | |
| "loss": 2.1949, | |
| "step": 1960000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_accuracy": 0.5154917831577748, | |
| "eval_loss": 2.21875, | |
| "eval_runtime": 39.8742, | |
| "eval_samples_per_second": 89.481, | |
| "eval_steps_per_second": 11.185, | |
| "step": 1960000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.9176933898280714e-05, | |
| "loss": 2.1918, | |
| "step": 1965000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_accuracy": 0.5153786344568205, | |
| "eval_loss": 2.216796875, | |
| "eval_runtime": 39.9956, | |
| "eval_samples_per_second": 89.21, | |
| "eval_steps_per_second": 11.151, | |
| "step": 1965000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.910210153714897e-05, | |
| "loss": 2.1957, | |
| "step": 1970000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_accuracy": 0.5154745231864427, | |
| "eval_loss": 2.216796875, | |
| "eval_runtime": 39.6981, | |
| "eval_samples_per_second": 89.878, | |
| "eval_steps_per_second": 11.235, | |
| "step": 1970000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.902726917601722e-05, | |
| "loss": 2.1884, | |
| "step": 1975000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_accuracy": 0.51571890246308, | |
| "eval_loss": 2.216796875, | |
| "eval_runtime": 39.9761, | |
| "eval_samples_per_second": 89.253, | |
| "eval_steps_per_second": 11.157, | |
| "step": 1975000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.895245179334046e-05, | |
| "loss": 2.1942, | |
| "step": 1980000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_accuracy": 0.5156073975689194, | |
| "eval_loss": 2.21484375, | |
| "eval_runtime": 39.9647, | |
| "eval_samples_per_second": 89.279, | |
| "eval_steps_per_second": 11.16, | |
| "step": 1980000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.8877604453753726e-05, | |
| "loss": 2.1938, | |
| "step": 1985000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_accuracy": 0.5155802747568262, | |
| "eval_loss": 2.216796875, | |
| "eval_runtime": 39.7886, | |
| "eval_samples_per_second": 89.674, | |
| "eval_steps_per_second": 11.209, | |
| "step": 1985000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.8802787071076965e-05, | |
| "loss": 2.1935, | |
| "step": 1990000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_accuracy": 0.5160071165875447, | |
| "eval_loss": 2.21484375, | |
| "eval_runtime": 40.1621, | |
| "eval_samples_per_second": 88.84, | |
| "eval_steps_per_second": 11.105, | |
| "step": 1990000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.872795470994522e-05, | |
| "loss": 2.1902, | |
| "step": 1995000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_accuracy": 0.5157413678225916, | |
| "eval_loss": 2.21484375, | |
| "eval_runtime": 39.8406, | |
| "eval_samples_per_second": 89.557, | |
| "eval_steps_per_second": 11.195, | |
| "step": 1995000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.865310737035848e-05, | |
| "loss": 2.188, | |
| "step": 2000000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_accuracy": 0.5158473933607739, | |
| "eval_loss": 2.21484375, | |
| "eval_runtime": 39.7424, | |
| "eval_samples_per_second": 89.778, | |
| "eval_steps_per_second": 11.222, | |
| "step": 2000000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.8578289987681716e-05, | |
| "loss": 2.1862, | |
| "step": 2005000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_accuracy": 0.5159235564088739, | |
| "eval_loss": 2.212890625, | |
| "eval_runtime": 39.7499, | |
| "eval_samples_per_second": 89.761, | |
| "eval_steps_per_second": 11.22, | |
| "step": 2005000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.850347260500496e-05, | |
| "loss": 2.1886, | |
| "step": 2010000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_accuracy": 0.5160695812456987, | |
| "eval_loss": 2.212890625, | |
| "eval_runtime": 40.1441, | |
| "eval_samples_per_second": 88.88, | |
| "eval_steps_per_second": 11.11, | |
| "step": 2010000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.8428610286963236e-05, | |
| "loss": 2.1811, | |
| "step": 2015000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_accuracy": 0.516141360809016, | |
| "eval_loss": 2.212890625, | |
| "eval_runtime": 40.0164, | |
| "eval_samples_per_second": 89.163, | |
| "eval_steps_per_second": 11.145, | |
| "step": 2015000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.835377792583148e-05, | |
| "loss": 2.19, | |
| "step": 2020000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_accuracy": 0.5160435543048012, | |
| "eval_loss": 2.212890625, | |
| "eval_runtime": 39.7821, | |
| "eval_samples_per_second": 89.689, | |
| "eval_steps_per_second": 11.211, | |
| "step": 2020000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.827893058624475e-05, | |
| "loss": 2.1895, | |
| "step": 2025000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_accuracy": 0.5164950532374227, | |
| "eval_loss": 2.212890625, | |
| "eval_runtime": 39.7945, | |
| "eval_samples_per_second": 89.661, | |
| "eval_steps_per_second": 11.208, | |
| "step": 2025000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.820411320356799e-05, | |
| "loss": 2.1904, | |
| "step": 2030000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_accuracy": 0.516082457732248, | |
| "eval_loss": 2.212890625, | |
| "eval_runtime": 39.8538, | |
| "eval_samples_per_second": 89.527, | |
| "eval_steps_per_second": 11.191, | |
| "step": 2030000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.812928084243624e-05, | |
| "loss": 2.1854, | |
| "step": 2035000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_accuracy": 0.516504368142586, | |
| "eval_loss": 2.212890625, | |
| "eval_runtime": 39.7999, | |
| "eval_samples_per_second": 89.649, | |
| "eval_steps_per_second": 11.206, | |
| "step": 2035000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.805447843821447e-05, | |
| "loss": 2.1883, | |
| "step": 2040000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_accuracy": 0.51652875127669, | |
| "eval_loss": 2.2109375, | |
| "eval_runtime": 39.8414, | |
| "eval_samples_per_second": 89.555, | |
| "eval_steps_per_second": 11.194, | |
| "step": 2040000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.7979646077082725e-05, | |
| "loss": 2.1859, | |
| "step": 2045000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_accuracy": 0.5165314909546791, | |
| "eval_loss": 2.2109375, | |
| "eval_runtime": 39.9091, | |
| "eval_samples_per_second": 89.403, | |
| "eval_steps_per_second": 11.175, | |
| "step": 2045000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.790481371595098e-05, | |
| "loss": 2.1849, | |
| "step": 2050000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_accuracy": 0.5167750483279198, | |
| "eval_loss": 2.208984375, | |
| "eval_runtime": 39.8945, | |
| "eval_samples_per_second": 89.436, | |
| "eval_steps_per_second": 11.179, | |
| "step": 2050000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.782996637636424e-05, | |
| "loss": 2.1844, | |
| "step": 2055000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_accuracy": 0.5167309395122934, | |
| "eval_loss": 2.2109375, | |
| "eval_runtime": 39.9417, | |
| "eval_samples_per_second": 89.33, | |
| "eval_steps_per_second": 11.166, | |
| "step": 2055000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.7755148993687484e-05, | |
| "loss": 2.1866, | |
| "step": 2060000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_accuracy": 0.5166953236984337, | |
| "eval_loss": 2.208984375, | |
| "eval_runtime": 39.9499, | |
| "eval_samples_per_second": 89.312, | |
| "eval_steps_per_second": 11.164, | |
| "step": 2060000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.768031663255573e-05, | |
| "loss": 2.1865, | |
| "step": 2065000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_accuracy": 0.5167821714906917, | |
| "eval_loss": 2.208984375, | |
| "eval_runtime": 39.8531, | |
| "eval_samples_per_second": 89.529, | |
| "eval_steps_per_second": 11.191, | |
| "step": 2065000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.7605499249878976e-05, | |
| "loss": 2.1846, | |
| "step": 2070000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_accuracy": 0.5171333982089081, | |
| "eval_loss": 2.20703125, | |
| "eval_runtime": 39.9613, | |
| "eval_samples_per_second": 89.286, | |
| "eval_steps_per_second": 11.161, | |
| "step": 2070000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.753066688874723e-05, | |
| "loss": 2.1821, | |
| "step": 2075000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_accuracy": 0.5169824419517027, | |
| "eval_loss": 2.20703125, | |
| "eval_runtime": 39.9564, | |
| "eval_samples_per_second": 89.297, | |
| "eval_steps_per_second": 11.162, | |
| "step": 2075000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.7455849506070474e-05, | |
| "loss": 2.184, | |
| "step": 2080000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_accuracy": 0.5170257288639323, | |
| "eval_loss": 2.20703125, | |
| "eval_runtime": 41.0867, | |
| "eval_samples_per_second": 86.841, | |
| "eval_steps_per_second": 10.855, | |
| "step": 2080000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.738101714493872e-05, | |
| "loss": 2.1847, | |
| "step": 2085000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_accuracy": 0.5173227099579624, | |
| "eval_loss": 2.205078125, | |
| "eval_runtime": 39.9314, | |
| "eval_samples_per_second": 89.353, | |
| "eval_steps_per_second": 11.169, | |
| "step": 2085000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.730618478380697e-05, | |
| "loss": 2.1836, | |
| "step": 2090000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_accuracy": 0.5173509286412512, | |
| "eval_loss": 2.205078125, | |
| "eval_runtime": 40.0844, | |
| "eval_samples_per_second": 89.012, | |
| "eval_steps_per_second": 11.127, | |
| "step": 2090000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.723136740113022e-05, | |
| "loss": 2.1791, | |
| "step": 2095000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_accuracy": 0.5174243520113619, | |
| "eval_loss": 2.205078125, | |
| "eval_runtime": 39.9803, | |
| "eval_samples_per_second": 89.244, | |
| "eval_steps_per_second": 11.156, | |
| "step": 2095000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.715652006154348e-05, | |
| "loss": 2.1812, | |
| "step": 2100000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_accuracy": 0.5173136690205979, | |
| "eval_loss": 2.205078125, | |
| "eval_runtime": 40.0609, | |
| "eval_samples_per_second": 89.064, | |
| "eval_steps_per_second": 11.133, | |
| "step": 2100000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.708168770041173e-05, | |
| "loss": 2.1835, | |
| "step": 2105000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_accuracy": 0.5175572263938386, | |
| "eval_loss": 2.205078125, | |
| "eval_runtime": 40.0398, | |
| "eval_samples_per_second": 89.111, | |
| "eval_steps_per_second": 11.139, | |
| "step": 2105000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.700687031773498e-05, | |
| "loss": 2.1806, | |
| "step": 2110000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_accuracy": 0.517552294973458, | |
| "eval_loss": 2.205078125, | |
| "eval_runtime": 40.3766, | |
| "eval_samples_per_second": 88.368, | |
| "eval_steps_per_second": 11.046, | |
| "step": 2110000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.6932037956603224e-05, | |
| "loss": 2.1832, | |
| "step": 2115000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_accuracy": 0.5174777757321516, | |
| "eval_loss": 2.205078125, | |
| "eval_runtime": 41.019, | |
| "eval_samples_per_second": 86.984, | |
| "eval_steps_per_second": 10.873, | |
| "step": 2115000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.6857205595471476e-05, | |
| "loss": 2.1766, | |
| "step": 2120000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_accuracy": 0.5177577708226486, | |
| "eval_loss": 2.203125, | |
| "eval_runtime": 40.432, | |
| "eval_samples_per_second": 88.247, | |
| "eval_steps_per_second": 11.031, | |
| "step": 2120000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.678237323433973e-05, | |
| "loss": 2.1775, | |
| "step": 2125000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_accuracy": 0.5178353037097432, | |
| "eval_loss": 2.203125, | |
| "eval_runtime": 41.1107, | |
| "eval_samples_per_second": 86.79, | |
| "eval_steps_per_second": 10.849, | |
| "step": 2125000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.6707540873207975e-05, | |
| "loss": 2.1801, | |
| "step": 2130000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_accuracy": 0.5176514713166673, | |
| "eval_loss": 2.203125, | |
| "eval_runtime": 41.0472, | |
| "eval_samples_per_second": 86.924, | |
| "eval_steps_per_second": 10.866, | |
| "step": 2130000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.6632708512076235e-05, | |
| "loss": 2.1789, | |
| "step": 2135000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_accuracy": 0.5177799622143612, | |
| "eval_loss": 2.203125, | |
| "eval_runtime": 41.4469, | |
| "eval_samples_per_second": 86.086, | |
| "eval_steps_per_second": 10.761, | |
| "step": 2135000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.655787615094449e-05, | |
| "loss": 2.1794, | |
| "step": 2140000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_accuracy": 0.5178131123180306, | |
| "eval_loss": 2.203125, | |
| "eval_runtime": 40.4764, | |
| "eval_samples_per_second": 88.15, | |
| "eval_steps_per_second": 11.019, | |
| "step": 2140000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.6483043789812734e-05, | |
| "loss": 2.1799, | |
| "step": 2145000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_accuracy": 0.5178917410763209, | |
| "eval_loss": 2.201171875, | |
| "eval_runtime": 40.376, | |
| "eval_samples_per_second": 88.369, | |
| "eval_steps_per_second": 11.046, | |
| "step": 2145000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.6408196450226e-05, | |
| "loss": 2.1746, | |
| "step": 2150000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_accuracy": 0.5180391357521402, | |
| "eval_loss": 2.201171875, | |
| "eval_runtime": 40.5387, | |
| "eval_samples_per_second": 88.015, | |
| "eval_steps_per_second": 11.002, | |
| "step": 2150000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.633336408909425e-05, | |
| "loss": 2.1766, | |
| "step": 2155000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_accuracy": 0.5178873575915381, | |
| "eval_loss": 2.201171875, | |
| "eval_runtime": 40.514, | |
| "eval_samples_per_second": 88.068, | |
| "eval_steps_per_second": 11.009, | |
| "step": 2155000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.62585317279625e-05, | |
| "loss": 2.1754, | |
| "step": 2160000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_accuracy": 0.5177369492699306, | |
| "eval_loss": 2.201171875, | |
| "eval_runtime": 42.6971, | |
| "eval_samples_per_second": 83.565, | |
| "eval_steps_per_second": 10.446, | |
| "step": 2160000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.618369936683075e-05, | |
| "loss": 2.1764, | |
| "step": 2165000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_accuracy": 0.5177405108513166, | |
| "eval_loss": 2.201171875, | |
| "eval_runtime": 40.5808, | |
| "eval_samples_per_second": 87.923, | |
| "eval_steps_per_second": 10.99, | |
| "step": 2165000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.6108881984154e-05, | |
| "loss": 2.1745, | |
| "step": 2170000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_accuracy": 0.51831337751886, | |
| "eval_loss": 2.19921875, | |
| "eval_runtime": 42.1403, | |
| "eval_samples_per_second": 84.67, | |
| "eval_steps_per_second": 10.584, | |
| "step": 2170000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.6034049623022244e-05, | |
| "loss": 2.1735, | |
| "step": 2175000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_accuracy": 0.5180128348434438, | |
| "eval_loss": 2.19921875, | |
| "eval_runtime": 40.2519, | |
| "eval_samples_per_second": 88.642, | |
| "eval_steps_per_second": 11.08, | |
| "step": 2175000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.59592172618905e-05, | |
| "loss": 2.1778, | |
| "step": 2180000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_accuracy": 0.5180588614336625, | |
| "eval_loss": 2.19921875, | |
| "eval_runtime": 42.4462, | |
| "eval_samples_per_second": 84.059, | |
| "eval_steps_per_second": 10.507, | |
| "step": 2180000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.5884384900758757e-05, | |
| "loss": 2.1717, | |
| "step": 2185000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_accuracy": 0.5183484453971218, | |
| "eval_loss": 2.19921875, | |
| "eval_runtime": 41.2843, | |
| "eval_samples_per_second": 86.425, | |
| "eval_steps_per_second": 10.803, | |
| "step": 2185000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.580953756117201e-05, | |
| "loss": 2.1752, | |
| "step": 2190000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_accuracy": 0.518520497174844, | |
| "eval_loss": 2.197265625, | |
| "eval_runtime": 40.4537, | |
| "eval_samples_per_second": 88.2, | |
| "eval_steps_per_second": 11.025, | |
| "step": 2190000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.5734720178495255e-05, | |
| "loss": 2.1747, | |
| "step": 2195000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_accuracy": 0.5184799499406038, | |
| "eval_loss": 2.197265625, | |
| "eval_runtime": 43.6458, | |
| "eval_samples_per_second": 81.749, | |
| "eval_steps_per_second": 10.219, | |
| "step": 2195000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.56599027958185e-05, | |
| "loss": 2.1754, | |
| "step": 2200000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_accuracy": 0.5186040573535149, | |
| "eval_loss": 2.197265625, | |
| "eval_runtime": 41.0029, | |
| "eval_samples_per_second": 87.018, | |
| "eval_steps_per_second": 10.877, | |
| "step": 2200000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.558508541314174e-05, | |
| "loss": 2.1728, | |
| "step": 2205000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_accuracy": 0.5187728215176501, | |
| "eval_loss": 2.197265625, | |
| "eval_runtime": 40.4446, | |
| "eval_samples_per_second": 88.22, | |
| "eval_steps_per_second": 11.027, | |
| "step": 2205000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.551025305200999e-05, | |
| "loss": 2.1684, | |
| "step": 2210000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_accuracy": 0.5185736469278347, | |
| "eval_loss": 2.197265625, | |
| "eval_runtime": 40.6605, | |
| "eval_samples_per_second": 87.751, | |
| "eval_steps_per_second": 10.969, | |
| "step": 2210000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.543540571242325e-05, | |
| "loss": 2.1722, | |
| "step": 2215000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_accuracy": 0.5188199439790645, | |
| "eval_loss": 2.1953125, | |
| "eval_runtime": 40.5747, | |
| "eval_samples_per_second": 87.937, | |
| "eval_steps_per_second": 10.992, | |
| "step": 2215000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.536058832974649e-05, | |
| "loss": 2.1692, | |
| "step": 2220000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_accuracy": 0.519004872243336, | |
| "eval_loss": 2.1953125, | |
| "eval_runtime": 41.1424, | |
| "eval_samples_per_second": 86.723, | |
| "eval_steps_per_second": 10.84, | |
| "step": 2220000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.528577094706974e-05, | |
| "loss": 2.176, | |
| "step": 2225000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_accuracy": 0.519122404429073, | |
| "eval_loss": 2.1953125, | |
| "eval_runtime": 42.7268, | |
| "eval_samples_per_second": 83.507, | |
| "eval_steps_per_second": 10.438, | |
| "step": 2225000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.5210923607483e-05, | |
| "loss": 2.1697, | |
| "step": 2230000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_accuracy": 0.5190202144400756, | |
| "eval_loss": 2.1953125, | |
| "eval_runtime": 40.4249, | |
| "eval_samples_per_second": 88.262, | |
| "eval_steps_per_second": 11.033, | |
| "step": 2230000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.5136106224806244e-05, | |
| "loss": 2.1731, | |
| "step": 2235000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_accuracy": 0.5190837749694251, | |
| "eval_loss": 2.1953125, | |
| "eval_runtime": 42.0959, | |
| "eval_samples_per_second": 84.759, | |
| "eval_steps_per_second": 10.595, | |
| "step": 2235000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.506124390676451e-05, | |
| "loss": 2.173, | |
| "step": 2240000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_accuracy": 0.5191125415883119, | |
| "eval_loss": 2.193359375, | |
| "eval_runtime": 40.9494, | |
| "eval_samples_per_second": 87.132, | |
| "eval_steps_per_second": 10.892, | |
| "step": 2240000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.498644150254274e-05, | |
| "loss": 2.1714, | |
| "step": 2245000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_accuracy": 0.5192900727220126, | |
| "eval_loss": 2.193359375, | |
| "eval_runtime": 40.4515, | |
| "eval_samples_per_second": 88.204, | |
| "eval_steps_per_second": 11.026, | |
| "step": 2245000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.4911594162956e-05, | |
| "loss": 2.1719, | |
| "step": 2250000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_accuracy": 0.5192147315773094, | |
| "eval_loss": 2.193359375, | |
| "eval_runtime": 40.7422, | |
| "eval_samples_per_second": 87.575, | |
| "eval_steps_per_second": 10.947, | |
| "step": 2250000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.483674682336927e-05, | |
| "loss": 2.1667, | |
| "step": 2255000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_accuracy": 0.5189793932380363, | |
| "eval_loss": 2.193359375, | |
| "eval_runtime": 40.4467, | |
| "eval_samples_per_second": 88.215, | |
| "eval_steps_per_second": 11.027, | |
| "step": 2255000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.476191446223752e-05, | |
| "loss": 2.1653, | |
| "step": 2260000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_accuracy": 0.5191834992482324, | |
| "eval_loss": 2.193359375, | |
| "eval_runtime": 40.3493, | |
| "eval_samples_per_second": 88.428, | |
| "eval_steps_per_second": 11.053, | |
| "step": 2260000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.468709707956076e-05, | |
| "loss": 2.1656, | |
| "step": 2265000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_accuracy": 0.5193434964428021, | |
| "eval_loss": 2.19140625, | |
| "eval_runtime": 44.129, | |
| "eval_samples_per_second": 80.854, | |
| "eval_steps_per_second": 10.107, | |
| "step": 2265000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.4612264718429014e-05, | |
| "loss": 2.1695, | |
| "step": 2270000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_accuracy": 0.5194328099452503, | |
| "eval_loss": 2.19140625, | |
| "eval_runtime": 42.07, | |
| "eval_samples_per_second": 84.811, | |
| "eval_steps_per_second": 10.601, | |
| "step": 2270000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.453744733575226e-05, | |
| "loss": 2.17, | |
| "step": 2275000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_accuracy": 0.5195643144887323, | |
| "eval_loss": 2.19140625, | |
| "eval_runtime": 40.4092, | |
| "eval_samples_per_second": 88.297, | |
| "eval_steps_per_second": 11.037, | |
| "step": 2275000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.44626299530755e-05, | |
| "loss": 2.1628, | |
| "step": 2280000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_accuracy": 0.5197062298085732, | |
| "eval_loss": 2.19140625, | |
| "eval_runtime": 40.3663, | |
| "eval_samples_per_second": 88.391, | |
| "eval_steps_per_second": 11.049, | |
| "step": 2280000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.438779759194375e-05, | |
| "loss": 2.1648, | |
| "step": 2285000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_accuracy": 0.5196210258231089, | |
| "eval_loss": 2.189453125, | |
| "eval_runtime": 41.4265, | |
| "eval_samples_per_second": 86.128, | |
| "eval_steps_per_second": 10.766, | |
| "step": 2285000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.431298020926699e-05, | |
| "loss": 2.1647, | |
| "step": 2290000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_accuracy": 0.5199106097865681, | |
| "eval_loss": 2.189453125, | |
| "eval_runtime": 43.7368, | |
| "eval_samples_per_second": 81.579, | |
| "eval_steps_per_second": 10.197, | |
| "step": 2290000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.423813286968026e-05, | |
| "loss": 2.1648, | |
| "step": 2295000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_accuracy": 0.5198015706025977, | |
| "eval_loss": 2.189453125, | |
| "eval_runtime": 40.3741, | |
| "eval_samples_per_second": 88.374, | |
| "eval_steps_per_second": 11.047, | |
| "step": 2295000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.4163315487003496e-05, | |
| "loss": 2.168, | |
| "step": 2300000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_accuracy": 0.5196733536727027, | |
| "eval_loss": 2.189453125, | |
| "eval_runtime": 40.4104, | |
| "eval_samples_per_second": 88.294, | |
| "eval_steps_per_second": 11.037, | |
| "step": 2300000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.4088498104326735e-05, | |
| "loss": 2.1607, | |
| "step": 2305000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_accuracy": 0.5197840366634667, | |
| "eval_loss": 2.189453125, | |
| "eval_runtime": 40.7993, | |
| "eval_samples_per_second": 87.452, | |
| "eval_steps_per_second": 10.932, | |
| "step": 2305000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.401365076474e-05, | |
| "loss": 2.1674, | |
| "step": 2310000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_accuracy": 0.5199684169921404, | |
| "eval_loss": 2.1875, | |
| "eval_runtime": 40.3824, | |
| "eval_samples_per_second": 88.355, | |
| "eval_steps_per_second": 11.044, | |
| "step": 2310000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.393881840360825e-05, | |
| "loss": 2.1656, | |
| "step": 2315000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_accuracy": 0.5199689649277383, | |
| "eval_loss": 2.1875, | |
| "eval_runtime": 40.7535, | |
| "eval_samples_per_second": 87.551, | |
| "eval_steps_per_second": 10.944, | |
| "step": 2315000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.38639860424765e-05, | |
| "loss": 2.1637, | |
| "step": 2320000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_accuracy": 0.5201547150954066, | |
| "eval_loss": 2.1875, | |
| "eval_runtime": 40.6083, | |
| "eval_samples_per_second": 87.864, | |
| "eval_steps_per_second": 10.983, | |
| "step": 2320000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.3789168659799746e-05, | |
| "loss": 2.1649, | |
| "step": 2325000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_accuracy": 0.5201152637323619, | |
| "eval_loss": 2.1875, | |
| "eval_runtime": 41.4342, | |
| "eval_samples_per_second": 86.112, | |
| "eval_steps_per_second": 10.764, | |
| "step": 2325000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.371436625557798e-05, | |
| "loss": 2.1625, | |
| "step": 2330000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_accuracy": 0.5200544428810016, | |
| "eval_loss": 2.1875, | |
| "eval_runtime": 44.1316, | |
| "eval_samples_per_second": 80.849, | |
| "eval_steps_per_second": 10.106, | |
| "step": 2330000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.363950393753625e-05, | |
| "loss": 2.1627, | |
| "step": 2335000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_accuracy": 0.5202593707945943, | |
| "eval_loss": 2.1875, | |
| "eval_runtime": 42.6548, | |
| "eval_samples_per_second": 83.648, | |
| "eval_steps_per_second": 10.456, | |
| "step": 2335000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.356468655485949e-05, | |
| "loss": 2.1598, | |
| "step": 2340000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_accuracy": 0.5203048494492152, | |
| "eval_loss": 2.185546875, | |
| "eval_runtime": 43.9859, | |
| "eval_samples_per_second": 81.117, | |
| "eval_steps_per_second": 10.14, | |
| "step": 2340000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.348982423681776e-05, | |
| "loss": 2.1638, | |
| "step": 2345000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_accuracy": 0.5201473179648357, | |
| "eval_loss": 2.1875, | |
| "eval_runtime": 44.5632, | |
| "eval_samples_per_second": 80.066, | |
| "eval_steps_per_second": 10.008, | |
| "step": 2345000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.341499187568602e-05, | |
| "loss": 2.1588, | |
| "step": 2350000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_accuracy": 0.5204785450337309, | |
| "eval_loss": 2.185546875, | |
| "eval_runtime": 41.6749, | |
| "eval_samples_per_second": 85.615, | |
| "eval_steps_per_second": 10.702, | |
| "step": 2350000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.334015951455427e-05, | |
| "loss": 2.1633, | |
| "step": 2355000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_accuracy": 0.5204648466437849, | |
| "eval_loss": 2.185546875, | |
| "eval_runtime": 40.4241, | |
| "eval_samples_per_second": 88.264, | |
| "eval_steps_per_second": 11.033, | |
| "step": 2355000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.326534213187751e-05, | |
| "loss": 2.1621, | |
| "step": 2360000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_accuracy": 0.5205470369834612, | |
| "eval_loss": 2.185546875, | |
| "eval_runtime": 40.3566, | |
| "eval_samples_per_second": 88.412, | |
| "eval_steps_per_second": 11.051, | |
| "step": 2360000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.319049479229077e-05, | |
| "loss": 2.165, | |
| "step": 2365000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_accuracy": 0.5207188147933844, | |
| "eval_loss": 2.18359375, | |
| "eval_runtime": 40.8467, | |
| "eval_samples_per_second": 87.351, | |
| "eval_steps_per_second": 10.919, | |
| "step": 2365000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.311566243115902e-05, | |
| "loss": 2.159, | |
| "step": 2370000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_accuracy": 0.52062347399936, | |
| "eval_loss": 2.18359375, | |
| "eval_runtime": 43.216, | |
| "eval_samples_per_second": 82.562, | |
| "eval_steps_per_second": 10.32, | |
| "step": 2370000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.3040830070027275e-05, | |
| "loss": 2.1573, | |
| "step": 2375000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_accuracy": 0.5207314173121348, | |
| "eval_loss": 2.18359375, | |
| "eval_runtime": 44.6428, | |
| "eval_samples_per_second": 79.923, | |
| "eval_steps_per_second": 9.99, | |
| "step": 2375000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.2966012687350514e-05, | |
| "loss": 2.1556, | |
| "step": 2380000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_accuracy": 0.520848401562274, | |
| "eval_loss": 2.18359375, | |
| "eval_runtime": 40.6164, | |
| "eval_samples_per_second": 87.846, | |
| "eval_steps_per_second": 10.981, | |
| "step": 2380000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.289119530467376e-05, | |
| "loss": 2.1562, | |
| "step": 2385000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_accuracy": 0.5209563448750487, | |
| "eval_loss": 2.18359375, | |
| "eval_runtime": 40.356, | |
| "eval_samples_per_second": 88.413, | |
| "eval_steps_per_second": 11.052, | |
| "step": 2385000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.281636294354201e-05, | |
| "loss": 2.1572, | |
| "step": 2390000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_accuracy": 0.5209188112865967, | |
| "eval_loss": 2.18359375, | |
| "eval_runtime": 44.3639, | |
| "eval_samples_per_second": 80.426, | |
| "eval_steps_per_second": 10.053, | |
| "step": 2390000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.274154556086525e-05, | |
| "loss": 2.1577, | |
| "step": 2395000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_accuracy": 0.5208686751793942, | |
| "eval_loss": 2.181640625, | |
| "eval_runtime": 42.2402, | |
| "eval_samples_per_second": 84.469, | |
| "eval_steps_per_second": 10.559, | |
| "step": 2395000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.2666713199733505e-05, | |
| "loss": 2.1529, | |
| "step": 2400000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_accuracy": 0.5209933305279031, | |
| "eval_loss": 2.181640625, | |
| "eval_runtime": 42.0012, | |
| "eval_samples_per_second": 84.95, | |
| "eval_steps_per_second": 10.619, | |
| "step": 2400000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.259188083860176e-05, | |
| "loss": 2.1636, | |
| "step": 2405000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_accuracy": 0.5210516856690732, | |
| "eval_loss": 2.181640625, | |
| "eval_runtime": 40.8766, | |
| "eval_samples_per_second": 87.287, | |
| "eval_steps_per_second": 10.911, | |
| "step": 2405000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.2517063455924996e-05, | |
| "loss": 2.1521, | |
| "step": 2410000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_accuracy": 0.5212941471711181, | |
| "eval_loss": 2.181640625, | |
| "eval_runtime": 44.8987, | |
| "eval_samples_per_second": 79.468, | |
| "eval_steps_per_second": 9.933, | |
| "step": 2410000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.244221611633826e-05, | |
| "loss": 2.1574, | |
| "step": 2415000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_accuracy": 0.5213697622836202, | |
| "eval_loss": 2.181640625, | |
| "eval_runtime": 40.8755, | |
| "eval_samples_per_second": 87.289, | |
| "eval_steps_per_second": 10.911, | |
| "step": 2415000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.236736877675152e-05, | |
| "loss": 2.1546, | |
| "step": 2420000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_accuracy": 0.5213067496898685, | |
| "eval_loss": 2.1796875, | |
| "eval_runtime": 43.2862, | |
| "eval_samples_per_second": 82.428, | |
| "eval_steps_per_second": 10.304, | |
| "step": 2420000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.2292536415619776e-05, | |
| "loss": 2.1572, | |
| "step": 2425000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_accuracy": 0.521194148924512, | |
| "eval_loss": 2.1796875, | |
| "eval_runtime": 42.5113, | |
| "eval_samples_per_second": 83.931, | |
| "eval_steps_per_second": 10.491, | |
| "step": 2425000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.2217719032943015e-05, | |
| "loss": 2.1544, | |
| "step": 2430000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_accuracy": 0.5212160663484257, | |
| "eval_loss": 2.1796875, | |
| "eval_runtime": 42.4347, | |
| "eval_samples_per_second": 84.082, | |
| "eval_steps_per_second": 10.51, | |
| "step": 2430000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.214290165026626e-05, | |
| "loss": 2.15, | |
| "step": 2435000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_accuracy": 0.5213122290458468, | |
| "eval_loss": 2.1796875, | |
| "eval_runtime": 41.657, | |
| "eval_samples_per_second": 85.652, | |
| "eval_steps_per_second": 10.706, | |
| "step": 2435000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.206805431067952e-05, | |
| "loss": 2.1537, | |
| "step": 2440000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_accuracy": 0.5217242766154238, | |
| "eval_loss": 2.177734375, | |
| "eval_runtime": 45.9893, | |
| "eval_samples_per_second": 77.583, | |
| "eval_steps_per_second": 9.698, | |
| "step": 2440000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.199322194954777e-05, | |
| "loss": 2.1552, | |
| "step": 2445000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_accuracy": 0.5215689368734356, | |
| "eval_loss": 2.177734375, | |
| "eval_runtime": 43.8458, | |
| "eval_samples_per_second": 81.376, | |
| "eval_steps_per_second": 10.172, | |
| "step": 2445000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.1918389588416026e-05, | |
| "loss": 2.1522, | |
| "step": 2450000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_accuracy": 0.5215245540900105, | |
| "eval_loss": 2.177734375, | |
| "eval_runtime": 45.304, | |
| "eval_samples_per_second": 78.757, | |
| "eval_steps_per_second": 9.845, | |
| "step": 2450000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.1843542248829286e-05, | |
| "loss": 2.1487, | |
| "step": 2455000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_accuracy": 0.5214875684371562, | |
| "eval_loss": 2.177734375, | |
| "eval_runtime": 41.9232, | |
| "eval_samples_per_second": 85.108, | |
| "eval_steps_per_second": 10.639, | |
| "step": 2455000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.176870988769754e-05, | |
| "loss": 2.1582, | |
| "step": 2460000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_accuracy": 0.52146373323865, | |
| "eval_loss": 2.177734375, | |
| "eval_runtime": 44.2104, | |
| "eval_samples_per_second": 80.705, | |
| "eval_steps_per_second": 10.088, | |
| "step": 2460000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.169390748347577e-05, | |
| "loss": 2.1582, | |
| "step": 2465000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_accuracy": 0.5218020834703172, | |
| "eval_loss": 2.177734375, | |
| "eval_runtime": 43.1549, | |
| "eval_samples_per_second": 82.679, | |
| "eval_steps_per_second": 10.335, | |
| "step": 2465000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.161907512234402e-05, | |
| "loss": 2.1529, | |
| "step": 2470000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_accuracy": 0.5217976999855345, | |
| "eval_loss": 2.177734375, | |
| "eval_runtime": 45.1081, | |
| "eval_samples_per_second": 79.099, | |
| "eval_steps_per_second": 9.887, | |
| "step": 2470000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.1544242761212276e-05, | |
| "loss": 2.1549, | |
| "step": 2475000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_accuracy": 0.5219418070477668, | |
| "eval_loss": 2.17578125, | |
| "eval_runtime": 40.8292, | |
| "eval_samples_per_second": 87.388, | |
| "eval_steps_per_second": 10.924, | |
| "step": 2475000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.1469425378535516e-05, | |
| "loss": 2.1525, | |
| "step": 2480000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_accuracy": 0.521930574368011, | |
| "eval_loss": 2.17578125, | |
| "eval_runtime": 43.1177, | |
| "eval_samples_per_second": 82.75, | |
| "eval_steps_per_second": 10.344, | |
| "step": 2480000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.139459301740377e-05, | |
| "loss": 2.1478, | |
| "step": 2485000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_accuracy": 0.5221272832476362, | |
| "eval_loss": 2.17578125, | |
| "eval_runtime": 44.4519, | |
| "eval_samples_per_second": 80.266, | |
| "eval_steps_per_second": 10.033, | |
| "step": 2485000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.131977563472701e-05, | |
| "loss": 2.1524, | |
| "step": 2490000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "eval_accuracy": 0.5219949568007575, | |
| "eval_loss": 2.17578125, | |
| "eval_runtime": 43.8567, | |
| "eval_samples_per_second": 81.356, | |
| "eval_steps_per_second": 10.169, | |
| "step": 2490000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.124494327359526e-05, | |
| "loss": 2.1477, | |
| "step": 2495000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "eval_accuracy": 0.5220256411942366, | |
| "eval_loss": 2.173828125, | |
| "eval_runtime": 40.3311, | |
| "eval_samples_per_second": 88.468, | |
| "eval_steps_per_second": 11.058, | |
| "step": 2495000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.117011091246352e-05, | |
| "loss": 2.1524, | |
| "step": 2500000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "eval_accuracy": 0.5221511184461423, | |
| "eval_loss": 2.173828125, | |
| "eval_runtime": 42.2823, | |
| "eval_samples_per_second": 84.385, | |
| "eval_steps_per_second": 10.548, | |
| "step": 2500000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.1095278551331766e-05, | |
| "loss": 2.147, | |
| "step": 2505000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "eval_accuracy": 0.5221815288718226, | |
| "eval_loss": 2.173828125, | |
| "eval_runtime": 40.7144, | |
| "eval_samples_per_second": 87.635, | |
| "eval_steps_per_second": 10.954, | |
| "step": 2505000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.102046116865501e-05, | |
| "loss": 2.1481, | |
| "step": 2510000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "eval_accuracy": 0.5222954994761736, | |
| "eval_loss": 2.173828125, | |
| "eval_runtime": 42.7002, | |
| "eval_samples_per_second": 83.559, | |
| "eval_steps_per_second": 10.445, | |
| "step": 2510000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.094559885061328e-05, | |
| "loss": 2.1494, | |
| "step": 2515000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "eval_accuracy": 0.5222659109538901, | |
| "eval_loss": 2.173828125, | |
| "eval_runtime": 42.0725, | |
| "eval_samples_per_second": 84.806, | |
| "eval_steps_per_second": 10.601, | |
| "step": 2515000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.087078146793652e-05, | |
| "loss": 2.1484, | |
| "step": 2520000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "eval_accuracy": 0.5222675547606836, | |
| "eval_loss": 2.173828125, | |
| "eval_runtime": 44.1957, | |
| "eval_samples_per_second": 80.732, | |
| "eval_steps_per_second": 10.091, | |
| "step": 2520000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.079594910680478e-05, | |
| "loss": 2.1474, | |
| "step": 2525000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_accuracy": 0.5223297454510387, | |
| "eval_loss": 2.173828125, | |
| "eval_runtime": 43.3427, | |
| "eval_samples_per_second": 82.321, | |
| "eval_steps_per_second": 10.29, | |
| "step": 2525000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.072110176721804e-05, | |
| "loss": 2.1487, | |
| "step": 2530000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_accuracy": 0.5222724861810643, | |
| "eval_loss": 2.173828125, | |
| "eval_runtime": 44.0468, | |
| "eval_samples_per_second": 81.005, | |
| "eval_steps_per_second": 10.126, | |
| "step": 2530000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.064626940608628e-05, | |
| "loss": 2.1465, | |
| "step": 2535000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_accuracy": 0.5224768661590591, | |
| "eval_loss": 2.171875, | |
| "eval_runtime": 40.4028, | |
| "eval_samples_per_second": 88.311, | |
| "eval_steps_per_second": 11.039, | |
| "step": 2535000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.057145202340953e-05, | |
| "loss": 2.1456, | |
| "step": 2540000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_accuracy": 0.5226201513178947, | |
| "eval_loss": 2.171875, | |
| "eval_runtime": 42.5959, | |
| "eval_samples_per_second": 83.764, | |
| "eval_steps_per_second": 10.47, | |
| "step": 2540000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.049661966227778e-05, | |
| "loss": 2.1482, | |
| "step": 2545000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_accuracy": 0.5223516628749524, | |
| "eval_loss": 2.171875, | |
| "eval_runtime": 43.342, | |
| "eval_samples_per_second": 82.322, | |
| "eval_steps_per_second": 10.29, | |
| "step": 2545000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.042180227960102e-05, | |
| "loss": 2.1451, | |
| "step": 2550000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_accuracy": 0.5226286443196613, | |
| "eval_loss": 2.171875, | |
| "eval_runtime": 42.1401, | |
| "eval_samples_per_second": 84.67, | |
| "eval_steps_per_second": 10.584, | |
| "step": 2550000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.0346969918469274e-05, | |
| "loss": 2.143, | |
| "step": 2555000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_accuracy": 0.5225609742733278, | |
| "eval_loss": 2.171875, | |
| "eval_runtime": 44.8351, | |
| "eval_samples_per_second": 79.581, | |
| "eval_steps_per_second": 9.948, | |
| "step": 2555000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.027210760042755e-05, | |
| "loss": 2.1463, | |
| "step": 2560000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_accuracy": 0.5225061807135436, | |
| "eval_loss": 2.171875, | |
| "eval_runtime": 42.3643, | |
| "eval_samples_per_second": 84.222, | |
| "eval_steps_per_second": 10.528, | |
| "step": 2560000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.0197290217750786e-05, | |
| "loss": 2.1466, | |
| "step": 2565000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_accuracy": 0.5227820662870569, | |
| "eval_loss": 2.169921875, | |
| "eval_runtime": 44.9199, | |
| "eval_samples_per_second": 79.43, | |
| "eval_steps_per_second": 9.929, | |
| "step": 2565000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.012247283507403e-05, | |
| "loss": 2.1423, | |
| "step": 2570000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_accuracy": 0.5229272692204849, | |
| "eval_loss": 2.169921875, | |
| "eval_runtime": 43.6123, | |
| "eval_samples_per_second": 81.812, | |
| "eval_steps_per_second": 10.226, | |
| "step": 2570000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.004765545239728e-05, | |
| "loss": 2.1423, | |
| "step": 2575000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_accuracy": 0.5230557601181788, | |
| "eval_loss": 2.169921875, | |
| "eval_runtime": 44.819, | |
| "eval_samples_per_second": 79.609, | |
| "eval_steps_per_second": 9.951, | |
| "step": 2575000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.997283806972052e-05, | |
| "loss": 2.1444, | |
| "step": 2580000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_accuracy": 0.5230245277891018, | |
| "eval_loss": 2.169921875, | |
| "eval_runtime": 42.2873, | |
| "eval_samples_per_second": 84.375, | |
| "eval_steps_per_second": 10.547, | |
| "step": 2580000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.9898020687043756e-05, | |
| "loss": 2.1402, | |
| "step": 2585000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_accuracy": 0.5230417877604338, | |
| "eval_loss": 2.16796875, | |
| "eval_runtime": 44.6974, | |
| "eval_samples_per_second": 79.826, | |
| "eval_steps_per_second": 9.978, | |
| "step": 2585000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 3.9823203304367e-05, | |
| "loss": 2.1376, | |
| "step": 2590000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_accuracy": 0.5230924718032341, | |
| "eval_loss": 2.16796875, | |
| "eval_runtime": 43.4573, | |
| "eval_samples_per_second": 82.104, | |
| "eval_steps_per_second": 10.263, | |
| "step": 2590000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 3.974838592169024e-05, | |
| "loss": 2.1395, | |
| "step": 2595000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_accuracy": 0.5231894564040521, | |
| "eval_loss": 2.16796875, | |
| "eval_runtime": 43.8137, | |
| "eval_samples_per_second": 81.436, | |
| "eval_steps_per_second": 10.179, | |
| "step": 2595000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 3.9673553560558494e-05, | |
| "loss": 2.1399, | |
| "step": 2600000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_accuracy": 0.5232801397454949, | |
| "eval_loss": 2.16796875, | |
| "eval_runtime": 41.9685, | |
| "eval_samples_per_second": 85.016, | |
| "eval_steps_per_second": 10.627, | |
| "step": 2600000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 3.9598721199426747e-05, | |
| "loss": 2.1379, | |
| "step": 2605000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_accuracy": 0.5230908279964406, | |
| "eval_loss": 2.16796875, | |
| "eval_runtime": 42.9652, | |
| "eval_samples_per_second": 83.044, | |
| "eval_steps_per_second": 10.38, | |
| "step": 2605000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 3.952390381674999e-05, | |
| "loss": 2.1411, | |
| "step": 2610000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_accuracy": 0.5233708230869376, | |
| "eval_loss": 2.166015625, | |
| "eval_runtime": 41.4989, | |
| "eval_samples_per_second": 85.978, | |
| "eval_steps_per_second": 10.747, | |
| "step": 2610000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 3.944908643407323e-05, | |
| "loss": 2.1421, | |
| "step": 2615000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_accuracy": 0.5232478115452223, | |
| "eval_loss": 2.166015625, | |
| "eval_runtime": 41.1264, | |
| "eval_samples_per_second": 86.757, | |
| "eval_steps_per_second": 10.845, | |
| "step": 2615000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 3.9374254072941484e-05, | |
| "loss": 2.1412, | |
| "step": 2620000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_accuracy": 0.5236650645029786, | |
| "eval_loss": 2.166015625, | |
| "eval_runtime": 43.7159, | |
| "eval_samples_per_second": 81.618, | |
| "eval_steps_per_second": 10.202, | |
| "step": 2620000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.929942171180974e-05, | |
| "loss": 2.1381, | |
| "step": 2625000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_accuracy": 0.5235886274870797, | |
| "eval_loss": 2.166015625, | |
| "eval_runtime": 43.6678, | |
| "eval_samples_per_second": 81.708, | |
| "eval_steps_per_second": 10.213, | |
| "step": 2625000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.922458935067799e-05, | |
| "loss": 2.142, | |
| "step": 2630000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_accuracy": 0.523625339172135, | |
| "eval_loss": 2.166015625, | |
| "eval_runtime": 42.3354, | |
| "eval_samples_per_second": 84.279, | |
| "eval_steps_per_second": 10.535, | |
| "step": 2630000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.9149756989546236e-05, | |
| "loss": 2.1394, | |
| "step": 2635000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_accuracy": 0.5236212296551512, | |
| "eval_loss": 2.1640625, | |
| "eval_runtime": 42.9131, | |
| "eval_samples_per_second": 83.145, | |
| "eval_steps_per_second": 10.393, | |
| "step": 2635000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.907493960686948e-05, | |
| "loss": 2.1384, | |
| "step": 2640000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_accuracy": 0.5233793160887042, | |
| "eval_loss": 2.1640625, | |
| "eval_runtime": 40.2525, | |
| "eval_samples_per_second": 88.64, | |
| "eval_steps_per_second": 11.08, | |
| "step": 2640000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.900012222419273e-05, | |
| "loss": 2.138, | |
| "step": 2645000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_accuracy": 0.5235653402241714, | |
| "eval_loss": 2.1640625, | |
| "eval_runtime": 43.9803, | |
| "eval_samples_per_second": 81.127, | |
| "eval_steps_per_second": 10.141, | |
| "step": 2645000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.892530484151597e-05, | |
| "loss": 2.1346, | |
| "step": 2650000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_accuracy": 0.523867526706381, | |
| "eval_loss": 2.1640625, | |
| "eval_runtime": 44.5226, | |
| "eval_samples_per_second": 80.139, | |
| "eval_steps_per_second": 10.017, | |
| "step": 2650000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.885047248038421e-05, | |
| "loss": 2.1376, | |
| "step": 2655000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_accuracy": 0.5239204024915728, | |
| "eval_loss": 2.1640625, | |
| "eval_runtime": 42.646, | |
| "eval_samples_per_second": 83.666, | |
| "eval_steps_per_second": 10.458, | |
| "step": 2655000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.877564011925247e-05, | |
| "loss": 2.1409, | |
| "step": 2660000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_accuracy": 0.5239705385987753, | |
| "eval_loss": 2.1640625, | |
| "eval_runtime": 40.3234, | |
| "eval_samples_per_second": 88.485, | |
| "eval_steps_per_second": 11.061, | |
| "step": 2660000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.8700807758120725e-05, | |
| "loss": 2.1343, | |
| "step": 2665000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_accuracy": 0.5239877985701072, | |
| "eval_loss": 2.1640625, | |
| "eval_runtime": 42.9773, | |
| "eval_samples_per_second": 83.021, | |
| "eval_steps_per_second": 10.378, | |
| "step": 2665000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.862597539698897e-05, | |
| "loss": 2.1363, | |
| "step": 2670000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_accuracy": 0.5240447838722828, | |
| "eval_loss": 2.162109375, | |
| "eval_runtime": 44.9261, | |
| "eval_samples_per_second": 79.419, | |
| "eval_steps_per_second": 9.927, | |
| "step": 2670000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.855112805740224e-05, | |
| "loss": 2.1343, | |
| "step": 2675000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_accuracy": 0.5241869731599227, | |
| "eval_loss": 2.162109375, | |
| "eval_runtime": 43.4218, | |
| "eval_samples_per_second": 82.171, | |
| "eval_steps_per_second": 10.271, | |
| "step": 2675000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.847631067472548e-05, | |
| "loss": 2.1381, | |
| "step": 2680000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_accuracy": 0.5243439567087043, | |
| "eval_loss": 2.162109375, | |
| "eval_runtime": 43.0405, | |
| "eval_samples_per_second": 82.899, | |
| "eval_steps_per_second": 10.362, | |
| "step": 2680000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.840147831359373e-05, | |
| "loss": 2.1355, | |
| "step": 2685000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_accuracy": 0.5241456040222856, | |
| "eval_loss": 2.162109375, | |
| "eval_runtime": 43.7247, | |
| "eval_samples_per_second": 81.601, | |
| "eval_steps_per_second": 10.2, | |
| "step": 2685000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.8326615995552e-05, | |
| "loss": 2.1394, | |
| "step": 2690000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_accuracy": 0.5242305340399511, | |
| "eval_loss": 2.16015625, | |
| "eval_runtime": 40.8207, | |
| "eval_samples_per_second": 87.407, | |
| "eval_steps_per_second": 10.926, | |
| "step": 2690000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.825179861287524e-05, | |
| "loss": 2.1359, | |
| "step": 2695000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_accuracy": 0.5244875158353388, | |
| "eval_loss": 2.16015625, | |
| "eval_runtime": 44.3125, | |
| "eval_samples_per_second": 80.519, | |
| "eval_steps_per_second": 10.065, | |
| "step": 2695000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.817698123019848e-05, | |
| "loss": 2.1365, | |
| "step": 2700000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_accuracy": 0.524362312551232, | |
| "eval_loss": 2.16015625, | |
| "eval_runtime": 42.9757, | |
| "eval_samples_per_second": 83.024, | |
| "eval_steps_per_second": 10.378, | |
| "step": 2700000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.810216384752173e-05, | |
| "loss": 2.131, | |
| "step": 2705000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_accuracy": 0.5244225854669946, | |
| "eval_loss": 2.16015625, | |
| "eval_runtime": 44.5469, | |
| "eval_samples_per_second": 80.095, | |
| "eval_steps_per_second": 10.012, | |
| "step": 2705000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.802731650793499e-05, | |
| "loss": 2.1337, | |
| "step": 2710000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_accuracy": 0.5244025858176733, | |
| "eval_loss": 2.16015625, | |
| "eval_runtime": 43.6203, | |
| "eval_samples_per_second": 81.797, | |
| "eval_steps_per_second": 10.225, | |
| "step": 2710000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.7952499125258226e-05, | |
| "loss": 2.1307, | |
| "step": 2715000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_accuracy": 0.5245954591481136, | |
| "eval_loss": 2.158203125, | |
| "eval_runtime": 43.7473, | |
| "eval_samples_per_second": 81.559, | |
| "eval_steps_per_second": 10.195, | |
| "step": 2715000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.787765178567149e-05, | |
| "loss": 2.1333, | |
| "step": 2720000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_accuracy": 0.524749429051107, | |
| "eval_loss": 2.158203125, | |
| "eval_runtime": 43.6989, | |
| "eval_samples_per_second": 81.65, | |
| "eval_steps_per_second": 10.206, | |
| "step": 2720000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.780283440299474e-05, | |
| "loss": 2.1354, | |
| "step": 2725000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "eval_accuracy": 0.5246316228975711, | |
| "eval_loss": 2.158203125, | |
| "eval_runtime": 43.001, | |
| "eval_samples_per_second": 82.975, | |
| "eval_steps_per_second": 10.372, | |
| "step": 2725000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.7728002041862985e-05, | |
| "loss": 2.1372, | |
| "step": 2730000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "eval_accuracy": 0.5248077841922771, | |
| "eval_loss": 2.158203125, | |
| "eval_runtime": 41.555, | |
| "eval_samples_per_second": 85.862, | |
| "eval_steps_per_second": 10.733, | |
| "step": 2730000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.765316968073124e-05, | |
| "loss": 2.1323, | |
| "step": 2735000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "eval_accuracy": 0.5248480574587185, | |
| "eval_loss": 2.158203125, | |
| "eval_runtime": 41.1337, | |
| "eval_samples_per_second": 86.742, | |
| "eval_steps_per_second": 10.843, | |
| "step": 2735000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.757835229805448e-05, | |
| "loss": 2.1315, | |
| "step": 2740000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "eval_accuracy": 0.5249064125998887, | |
| "eval_loss": 2.15625, | |
| "eval_runtime": 43.6977, | |
| "eval_samples_per_second": 81.652, | |
| "eval_steps_per_second": 10.206, | |
| "step": 2740000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.750351993692273e-05, | |
| "loss": 2.1341, | |
| "step": 2745000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "eval_accuracy": 0.5249143576660573, | |
| "eval_loss": 2.15625, | |
| "eval_runtime": 44.5031, | |
| "eval_samples_per_second": 80.174, | |
| "eval_steps_per_second": 10.022, | |
| "step": 2745000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.7428702554245975e-05, | |
| "loss": 2.132, | |
| "step": 2750000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "eval_accuracy": 0.5249768223242113, | |
| "eval_loss": 2.15625, | |
| "eval_runtime": 40.671, | |
| "eval_samples_per_second": 87.728, | |
| "eval_steps_per_second": 10.966, | |
| "step": 2750000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.7353885171569214e-05, | |
| "loss": 2.1322, | |
| "step": 2755000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_accuracy": 0.5251954486277501, | |
| "eval_loss": 2.15625, | |
| "eval_runtime": 43.2998, | |
| "eval_samples_per_second": 82.402, | |
| "eval_steps_per_second": 10.3, | |
| "step": 2755000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.727906778889246e-05, | |
| "loss": 2.1298, | |
| "step": 2760000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_accuracy": 0.5252044895651145, | |
| "eval_loss": 2.15625, | |
| "eval_runtime": 44.6651, | |
| "eval_samples_per_second": 79.883, | |
| "eval_steps_per_second": 9.985, | |
| "step": 2760000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.720422044930572e-05, | |
| "loss": 2.1285, | |
| "step": 2765000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_accuracy": 0.5252362698297893, | |
| "eval_loss": 2.154296875, | |
| "eval_runtime": 43.7277, | |
| "eval_samples_per_second": 81.596, | |
| "eval_steps_per_second": 10.199, | |
| "step": 2765000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.7129403066628966e-05, | |
| "loss": 2.1299, | |
| "step": 2770000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_accuracy": 0.5251601067816893, | |
| "eval_loss": 2.15625, | |
| "eval_runtime": 43.8556, | |
| "eval_samples_per_second": 81.358, | |
| "eval_steps_per_second": 10.17, | |
| "step": 2770000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.7054585683952205e-05, | |
| "loss": 2.1304, | |
| "step": 2775000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_accuracy": 0.5252631186740835, | |
| "eval_loss": 2.154296875, | |
| "eval_runtime": 44.0024, | |
| "eval_samples_per_second": 81.086, | |
| "eval_steps_per_second": 10.136, | |
| "step": 2775000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.6979768301275444e-05, | |
| "loss": 2.1288, | |
| "step": 2780000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_accuracy": 0.5254154447702835, | |
| "eval_loss": 2.154296875, | |
| "eval_runtime": 43.1187, | |
| "eval_samples_per_second": 82.748, | |
| "eval_steps_per_second": 10.344, | |
| "step": 2780000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.6904935940143697e-05, | |
| "loss": 2.1295, | |
| "step": 2785000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_accuracy": 0.5253255833322374, | |
| "eval_loss": 2.154296875, | |
| "eval_runtime": 44.6338, | |
| "eval_samples_per_second": 79.939, | |
| "eval_steps_per_second": 9.992, | |
| "step": 2785000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.683010357901195e-05, | |
| "loss": 2.129, | |
| "step": 2790000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_accuracy": 0.5255368125052053, | |
| "eval_loss": 2.154296875, | |
| "eval_runtime": 44.4544, | |
| "eval_samples_per_second": 80.262, | |
| "eval_steps_per_second": 10.033, | |
| "step": 2790000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.6755286196335195e-05, | |
| "loss": 2.1285, | |
| "step": 2795000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_accuracy": 0.5253598293071026, | |
| "eval_loss": 2.154296875, | |
| "eval_runtime": 44.2524, | |
| "eval_samples_per_second": 80.628, | |
| "eval_steps_per_second": 10.079, | |
| "step": 2795000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.668045383520344e-05, | |
| "loss": 2.1292, | |
| "step": 2800000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_accuracy": 0.5252880497437853, | |
| "eval_loss": 2.154296875, | |
| "eval_runtime": 40.6928, | |
| "eval_samples_per_second": 87.681, | |
| "eval_steps_per_second": 10.96, | |
| "step": 2800000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.660563645252669e-05, | |
| "loss": 2.1278, | |
| "step": 2805000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_accuracy": 0.5256472215281704, | |
| "eval_loss": 2.15234375, | |
| "eval_runtime": 43.3931, | |
| "eval_samples_per_second": 82.225, | |
| "eval_steps_per_second": 10.278, | |
| "step": 2805000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.653080409139494e-05, | |
| "loss": 2.1239, | |
| "step": 2810000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_accuracy": 0.5255110595321069, | |
| "eval_loss": 2.15234375, | |
| "eval_runtime": 42.2966, | |
| "eval_samples_per_second": 84.357, | |
| "eval_steps_per_second": 10.545, | |
| "step": 2810000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.645598670871818e-05, | |
| "loss": 2.1241, | |
| "step": 2815000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_accuracy": 0.5259063950659495, | |
| "eval_loss": 2.15234375, | |
| "eval_runtime": 42.6516, | |
| "eval_samples_per_second": 83.655, | |
| "eval_steps_per_second": 10.457, | |
| "step": 2815000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.638115434758643e-05, | |
| "loss": 2.1232, | |
| "step": 2820000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_accuracy": 0.5256781798894485, | |
| "eval_loss": 2.15234375, | |
| "eval_runtime": 46.2947, | |
| "eval_samples_per_second": 77.071, | |
| "eval_steps_per_second": 9.634, | |
| "step": 2820000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.6306321986454685e-05, | |
| "loss": 2.1241, | |
| "step": 2825000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_accuracy": 0.525697357635373, | |
| "eval_loss": 2.150390625, | |
| "eval_runtime": 43.287, | |
| "eval_samples_per_second": 82.427, | |
| "eval_steps_per_second": 10.303, | |
| "step": 2825000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.623148962532294e-05, | |
| "loss": 2.1236, | |
| "step": 2830000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_accuracy": 0.5259439286544016, | |
| "eval_loss": 2.150390625, | |
| "eval_runtime": 43.0484, | |
| "eval_samples_per_second": 82.884, | |
| "eval_steps_per_second": 10.36, | |
| "step": 2830000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.6156672242646177e-05, | |
| "loss": 2.1272, | |
| "step": 2835000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_accuracy": 0.5259442026222007, | |
| "eval_loss": 2.150390625, | |
| "eval_runtime": 42.9436, | |
| "eval_samples_per_second": 83.086, | |
| "eval_steps_per_second": 10.386, | |
| "step": 2835000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.608183988151443e-05, | |
| "loss": 2.1271, | |
| "step": 2840000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_accuracy": 0.5260592690977474, | |
| "eval_loss": 2.150390625, | |
| "eval_runtime": 40.596, | |
| "eval_samples_per_second": 87.89, | |
| "eval_steps_per_second": 10.986, | |
| "step": 2840000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.6007022498837675e-05, | |
| "loss": 2.1249, | |
| "step": 2845000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_accuracy": 0.52616776034612, | |
| "eval_loss": 2.1484375, | |
| "eval_runtime": 43.4159, | |
| "eval_samples_per_second": 82.182, | |
| "eval_steps_per_second": 10.273, | |
| "step": 2845000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.5932175159250935e-05, | |
| "loss": 2.1245, | |
| "step": 2850000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_accuracy": 0.5260307764466595, | |
| "eval_loss": 2.1484375, | |
| "eval_runtime": 43.8356, | |
| "eval_samples_per_second": 81.395, | |
| "eval_steps_per_second": 10.174, | |
| "step": 2850000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.585735777657418e-05, | |
| "loss": 2.1222, | |
| "step": 2855000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_accuracy": 0.5261137886897326, | |
| "eval_loss": 2.1484375, | |
| "eval_runtime": 43.0232, | |
| "eval_samples_per_second": 82.932, | |
| "eval_steps_per_second": 10.367, | |
| "step": 2855000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.5782525415442434e-05, | |
| "loss": 2.125, | |
| "step": 2860000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_accuracy": 0.5263050182133793, | |
| "eval_loss": 2.1484375, | |
| "eval_runtime": 43.0096, | |
| "eval_samples_per_second": 82.958, | |
| "eval_steps_per_second": 10.37, | |
| "step": 2860000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.570770803276567e-05, | |
| "loss": 2.1261, | |
| "step": 2865000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_accuracy": 0.5260803646182642, | |
| "eval_loss": 2.1484375, | |
| "eval_runtime": 40.5259, | |
| "eval_samples_per_second": 88.042, | |
| "eval_steps_per_second": 11.005, | |
| "step": 2865000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.563289065008891e-05, | |
| "loss": 2.1247, | |
| "step": 2870000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_accuracy": 0.5262392659416383, | |
| "eval_loss": 2.1484375, | |
| "eval_runtime": 46.7614, | |
| "eval_samples_per_second": 76.302, | |
| "eval_steps_per_second": 9.538, | |
| "step": 2870000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.5558028332047185e-05, | |
| "loss": 2.1225, | |
| "step": 2875000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_accuracy": 0.5263167988287328, | |
| "eval_loss": 2.1484375, | |
| "eval_runtime": 43.7319, | |
| "eval_samples_per_second": 81.588, | |
| "eval_steps_per_second": 10.199, | |
| "step": 2875000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.5483210949370424e-05, | |
| "loss": 2.122, | |
| "step": 2880000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_accuracy": 0.5261085833015531, | |
| "eval_loss": 2.1484375, | |
| "eval_runtime": 46.8934, | |
| "eval_samples_per_second": 76.087, | |
| "eval_steps_per_second": 9.511, | |
| "step": 2880000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.540839356669367e-05, | |
| "loss": 2.1237, | |
| "step": 2885000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_accuracy": 0.5261107750439444, | |
| "eval_loss": 2.146484375, | |
| "eval_runtime": 42.8485, | |
| "eval_samples_per_second": 83.27, | |
| "eval_steps_per_second": 10.409, | |
| "step": 2885000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.533356120556192e-05, | |
| "loss": 2.1219, | |
| "step": 2890000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_accuracy": 0.5261825546072617, | |
| "eval_loss": 2.146484375, | |
| "eval_runtime": 44.3826, | |
| "eval_samples_per_second": 80.392, | |
| "eval_steps_per_second": 10.049, | |
| "step": 2890000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.525874382288516e-05, | |
| "loss": 2.1248, | |
| "step": 2895000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_accuracy": 0.526191595544626, | |
| "eval_loss": 2.146484375, | |
| "eval_runtime": 40.9038, | |
| "eval_samples_per_second": 87.229, | |
| "eval_steps_per_second": 10.904, | |
| "step": 2895000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.51839264402084e-05, | |
| "loss": 2.1191, | |
| "step": 2900000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_accuracy": 0.526354332417185, | |
| "eval_loss": 2.146484375, | |
| "eval_runtime": 44.4026, | |
| "eval_samples_per_second": 80.356, | |
| "eval_steps_per_second": 10.044, | |
| "step": 2900000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.510909407907666e-05, | |
| "loss": 2.1181, | |
| "step": 2905000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_accuracy": 0.5264343310144699, | |
| "eval_loss": 2.146484375, | |
| "eval_runtime": 43.5492, | |
| "eval_samples_per_second": 81.93, | |
| "eval_steps_per_second": 10.241, | |
| "step": 2905000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.50342766963999e-05, | |
| "loss": 2.1176, | |
| "step": 2910000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_accuracy": 0.5263020045675911, | |
| "eval_loss": 2.146484375, | |
| "eval_runtime": 46.4423, | |
| "eval_samples_per_second": 76.827, | |
| "eval_steps_per_second": 9.603, | |
| "step": 2910000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.495944433526815e-05, | |
| "loss": 2.1191, | |
| "step": 2915000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_accuracy": 0.5266524093824109, | |
| "eval_loss": 2.146484375, | |
| "eval_runtime": 42.6675, | |
| "eval_samples_per_second": 83.623, | |
| "eval_steps_per_second": 10.453, | |
| "step": 2915000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.488462695259139e-05, | |
| "loss": 2.1206, | |
| "step": 2920000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_accuracy": 0.5267954205734475, | |
| "eval_loss": 2.14453125, | |
| "eval_runtime": 42.9737, | |
| "eval_samples_per_second": 83.028, | |
| "eval_steps_per_second": 10.378, | |
| "step": 2920000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.4809794591459645e-05, | |
| "loss": 2.1148, | |
| "step": 2925000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_accuracy": 0.5267219972033367, | |
| "eval_loss": 2.14453125, | |
| "eval_runtime": 44.1462, | |
| "eval_samples_per_second": 80.822, | |
| "eval_steps_per_second": 10.103, | |
| "step": 2925000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.473497720878289e-05, | |
| "loss": 2.1188, | |
| "step": 2930000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_accuracy": 0.5270244576533453, | |
| "eval_loss": 2.14453125, | |
| "eval_runtime": 44.0298, | |
| "eval_samples_per_second": 81.036, | |
| "eval_steps_per_second": 10.129, | |
| "step": 2930000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.466015982610613e-05, | |
| "loss": 2.1118, | |
| "step": 2935000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_accuracy": 0.5270036361006273, | |
| "eval_loss": 2.14453125, | |
| "eval_runtime": 41.5147, | |
| "eval_samples_per_second": 85.945, | |
| "eval_steps_per_second": 10.743, | |
| "step": 2935000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.992518261732325e-05, | |
| "loss": 2.1283, | |
| "step": 2940000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_accuracy": 0.5243740931665856, | |
| "eval_loss": 2.158203125, | |
| "eval_runtime": 39.6741, | |
| "eval_samples_per_second": 89.933, | |
| "eval_steps_per_second": 11.242, | |
| "step": 2940000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.9850350256191494e-05, | |
| "loss": 2.1336, | |
| "step": 2945000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_accuracy": 0.5240346470637227, | |
| "eval_loss": 2.162109375, | |
| "eval_runtime": 39.6588, | |
| "eval_samples_per_second": 89.967, | |
| "eval_steps_per_second": 11.246, | |
| "step": 2945000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.977551789505975e-05, | |
| "loss": 2.1311, | |
| "step": 2950000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_accuracy": 0.5237494465850462, | |
| "eval_loss": 2.162109375, | |
| "eval_runtime": 39.7632, | |
| "eval_samples_per_second": 89.731, | |
| "eval_steps_per_second": 11.216, | |
| "step": 2950000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 4.970070051238299e-05, | |
| "loss": 2.1377, | |
| "step": 2955000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_accuracy": 0.523618763944961, | |
| "eval_loss": 2.1640625, | |
| "eval_runtime": 39.7355, | |
| "eval_samples_per_second": 89.794, | |
| "eval_steps_per_second": 11.224, | |
| "step": 2955000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 4.962588312970624e-05, | |
| "loss": 2.136, | |
| "step": 2960000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_accuracy": 0.523584244002297, | |
| "eval_loss": 2.1640625, | |
| "eval_runtime": 39.6172, | |
| "eval_samples_per_second": 90.062, | |
| "eval_steps_per_second": 11.258, | |
| "step": 2960000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 4.955103579011949e-05, | |
| "loss": 2.1394, | |
| "step": 2965000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_accuracy": 0.5233458920172359, | |
| "eval_loss": 2.1640625, | |
| "eval_runtime": 39.7138, | |
| "eval_samples_per_second": 89.843, | |
| "eval_steps_per_second": 11.23, | |
| "step": 2965000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 4.947621840744274e-05, | |
| "loss": 2.1405, | |
| "step": 2970000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_accuracy": 0.5233305498204963, | |
| "eval_loss": 2.166015625, | |
| "eval_runtime": 39.7403, | |
| "eval_samples_per_second": 89.783, | |
| "eval_steps_per_second": 11.223, | |
| "step": 2970000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 4.9401401024765983e-05, | |
| "loss": 2.1391, | |
| "step": 2975000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_accuracy": 0.5235795865497153, | |
| "eval_loss": 2.166015625, | |
| "eval_runtime": 39.791, | |
| "eval_samples_per_second": 89.668, | |
| "eval_steps_per_second": 11.209, | |
| "step": 2975000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 4.9326553685179237e-05, | |
| "loss": 2.1353, | |
| "step": 2980000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_accuracy": 0.5233823297344923, | |
| "eval_loss": 2.166015625, | |
| "eval_runtime": 39.6943, | |
| "eval_samples_per_second": 89.887, | |
| "eval_steps_per_second": 11.236, | |
| "step": 2980000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 4.925173630250248e-05, | |
| "loss": 2.1392, | |
| "step": 2985000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_accuracy": 0.5233889049616665, | |
| "eval_loss": 2.166015625, | |
| "eval_runtime": 39.7244, | |
| "eval_samples_per_second": 89.819, | |
| "eval_steps_per_second": 11.227, | |
| "step": 2985000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.917691891982573e-05, | |
| "loss": 2.1384, | |
| "step": 2990000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_accuracy": 0.5235058892118056, | |
| "eval_loss": 2.166015625, | |
| "eval_runtime": 39.7383, | |
| "eval_samples_per_second": 89.787, | |
| "eval_steps_per_second": 11.223, | |
| "step": 2990000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.910210153714897e-05, | |
| "loss": 2.1373, | |
| "step": 2995000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_accuracy": 0.523321234915333, | |
| "eval_loss": 2.166015625, | |
| "eval_runtime": 39.8394, | |
| "eval_samples_per_second": 89.56, | |
| "eval_steps_per_second": 11.195, | |
| "step": 2995000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.9027284154472206e-05, | |
| "loss": 2.1346, | |
| "step": 3000000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_accuracy": 0.523394110349846, | |
| "eval_loss": 2.166015625, | |
| "eval_runtime": 39.7248, | |
| "eval_samples_per_second": 89.818, | |
| "eval_steps_per_second": 11.227, | |
| "step": 3000000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.895246677179545e-05, | |
| "loss": 2.1368, | |
| "step": 3005000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_accuracy": 0.523494108596452, | |
| "eval_loss": 2.166015625, | |
| "eval_runtime": 39.8116, | |
| "eval_samples_per_second": 89.622, | |
| "eval_steps_per_second": 11.203, | |
| "step": 3005000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.8877634410663705e-05, | |
| "loss": 2.1383, | |
| "step": 3010000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_accuracy": 0.5232738384861197, | |
| "eval_loss": 2.166015625, | |
| "eval_runtime": 39.8277, | |
| "eval_samples_per_second": 89.586, | |
| "eval_steps_per_second": 11.198, | |
| "step": 3010000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.880280204953195e-05, | |
| "loss": 2.1447, | |
| "step": 3015000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_accuracy": 0.5233116460423708, | |
| "eval_loss": 2.166015625, | |
| "eval_runtime": 39.9037, | |
| "eval_samples_per_second": 89.415, | |
| "eval_steps_per_second": 11.177, | |
| "step": 3015000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.87279846668552e-05, | |
| "loss": 2.1392, | |
| "step": 3020000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_accuracy": 0.5234119182567758, | |
| "eval_loss": 2.166015625, | |
| "eval_runtime": 39.8551, | |
| "eval_samples_per_second": 89.524, | |
| "eval_steps_per_second": 11.191, | |
| "step": 3020000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.865315230572345e-05, | |
| "loss": 2.1359, | |
| "step": 3025000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eval_accuracy": 0.5233072625575881, | |
| "eval_loss": 2.166015625, | |
| "eval_runtime": 39.8861, | |
| "eval_samples_per_second": 89.455, | |
| "eval_steps_per_second": 11.182, | |
| "step": 3025000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.8578334923046695e-05, | |
| "loss": 2.1408, | |
| "step": 3030000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eval_accuracy": 0.5233184952373438, | |
| "eval_loss": 2.166015625, | |
| "eval_runtime": 39.8536, | |
| "eval_samples_per_second": 89.528, | |
| "eval_steps_per_second": 11.191, | |
| "step": 3030000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.850350256191494e-05, | |
| "loss": 2.1437, | |
| "step": 3035000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eval_accuracy": 0.5232642496131574, | |
| "eval_loss": 2.166015625, | |
| "eval_runtime": 39.8602, | |
| "eval_samples_per_second": 89.513, | |
| "eval_steps_per_second": 11.189, | |
| "step": 3035000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.8428715136148166e-05, | |
| "loss": 2.1354, | |
| "step": 3040000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eval_accuracy": 0.5233253444323168, | |
| "eval_loss": 2.166015625, | |
| "eval_runtime": 39.8447, | |
| "eval_samples_per_second": 89.548, | |
| "eval_steps_per_second": 11.193, | |
| "step": 3040000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.835383783965145e-05, | |
| "loss": 2.1371, | |
| "step": 3045000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eval_accuracy": 0.5234626022995761, | |
| "eval_loss": 2.166015625, | |
| "eval_runtime": 40.0007, | |
| "eval_samples_per_second": 89.199, | |
| "eval_steps_per_second": 11.15, | |
| "step": 3045000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.82790054785197e-05, | |
| "loss": 2.1399, | |
| "step": 3050000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eval_accuracy": 0.5233957541566394, | |
| "eval_loss": 2.166015625, | |
| "eval_runtime": 39.8876, | |
| "eval_samples_per_second": 89.451, | |
| "eval_steps_per_second": 11.181, | |
| "step": 3050000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.820420307429793e-05, | |
| "loss": 2.1387, | |
| "step": 3055000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "eval_accuracy": 0.5234242468077273, | |
| "eval_loss": 2.166015625, | |
| "eval_runtime": 39.9685, | |
| "eval_samples_per_second": 89.27, | |
| "eval_steps_per_second": 11.159, | |
| "step": 3055000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.8129340756256206e-05, | |
| "loss": 2.1406, | |
| "step": 3060000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "eval_accuracy": 0.5232119217635636, | |
| "eval_loss": 2.166015625, | |
| "eval_runtime": 39.9746, | |
| "eval_samples_per_second": 89.257, | |
| "eval_steps_per_second": 11.157, | |
| "step": 3060000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.805450839512445e-05, | |
| "loss": 2.1387, | |
| "step": 3065000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "eval_accuracy": 0.5234650680097664, | |
| "eval_loss": 2.166015625, | |
| "eval_runtime": 40.0463, | |
| "eval_samples_per_second": 89.097, | |
| "eval_steps_per_second": 11.137, | |
| "step": 3065000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.797966105553772e-05, | |
| "loss": 2.1413, | |
| "step": 3070000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "eval_accuracy": 0.5234842457556909, | |
| "eval_loss": 2.166015625, | |
| "eval_runtime": 39.9277, | |
| "eval_samples_per_second": 89.362, | |
| "eval_steps_per_second": 11.17, | |
| "step": 3070000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.790484367286096e-05, | |
| "loss": 2.1371, | |
| "step": 3075000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "eval_accuracy": 0.523473561011533, | |
| "eval_loss": 2.1640625, | |
| "eval_runtime": 39.9812, | |
| "eval_samples_per_second": 89.242, | |
| "eval_steps_per_second": 11.155, | |
| "step": 3075000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.782999633327422e-05, | |
| "loss": 2.138, | |
| "step": 3080000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "eval_accuracy": 0.5234982181134358, | |
| "eval_loss": 2.1640625, | |
| "eval_runtime": 39.9703, | |
| "eval_samples_per_second": 89.266, | |
| "eval_steps_per_second": 11.158, | |
| "step": 3080000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.7755148993687484e-05, | |
| "loss": 2.1385, | |
| "step": 3085000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "eval_accuracy": 0.5236360239162929, | |
| "eval_loss": 2.1640625, | |
| "eval_runtime": 40.1115, | |
| "eval_samples_per_second": 88.952, | |
| "eval_steps_per_second": 11.119, | |
| "step": 3085000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.768031663255573e-05, | |
| "loss": 2.135, | |
| "step": 3090000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_accuracy": 0.5233746586361225, | |
| "eval_loss": 2.166015625, | |
| "eval_runtime": 40.1304, | |
| "eval_samples_per_second": 88.91, | |
| "eval_steps_per_second": 11.114, | |
| "step": 3090000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.760548427142398e-05, | |
| "loss": 2.1401, | |
| "step": 3095000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_accuracy": 0.5235689018055574, | |
| "eval_loss": 2.1640625, | |
| "eval_runtime": 40.1365, | |
| "eval_samples_per_second": 88.897, | |
| "eval_steps_per_second": 11.112, | |
| "step": 3095000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.753066688874723e-05, | |
| "loss": 2.1374, | |
| "step": 3100000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_accuracy": 0.5235552034156113, | |
| "eval_loss": 2.1640625, | |
| "eval_runtime": 40.133, | |
| "eval_samples_per_second": 88.904, | |
| "eval_steps_per_second": 11.113, | |
| "step": 3100000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.745586448452546e-05, | |
| "loss": 2.1358, | |
| "step": 3105000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_accuracy": 0.5237198580627628, | |
| "eval_loss": 2.1640625, | |
| "eval_runtime": 40.0267, | |
| "eval_samples_per_second": 89.141, | |
| "eval_steps_per_second": 11.143, | |
| "step": 3105000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.7381032123393713e-05, | |
| "loss": 2.1344, | |
| "step": 3110000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_accuracy": 0.5239288954933393, | |
| "eval_loss": 2.162109375, | |
| "eval_runtime": 40.1156, | |
| "eval_samples_per_second": 88.943, | |
| "eval_steps_per_second": 11.118, | |
| "step": 3110000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.730621474071695e-05, | |
| "loss": 2.1368, | |
| "step": 3115000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_accuracy": 0.5238793073217346, | |
| "eval_loss": 2.162109375, | |
| "eval_runtime": 40.1212, | |
| "eval_samples_per_second": 88.93, | |
| "eval_steps_per_second": 11.116, | |
| "step": 3115000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.723136740113022e-05, | |
| "loss": 2.1345, | |
| "step": 3120000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_accuracy": 0.5236836943133052, | |
| "eval_loss": 2.162109375, | |
| "eval_runtime": 40.203, | |
| "eval_samples_per_second": 88.75, | |
| "eval_steps_per_second": 11.094, | |
| "step": 3120000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.7156535039998465e-05, | |
| "loss": 2.1358, | |
| "step": 3125000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_accuracy": 0.5238979371320612, | |
| "eval_loss": 2.162109375, | |
| "eval_runtime": 40.2344, | |
| "eval_samples_per_second": 88.68, | |
| "eval_steps_per_second": 11.085, | |
| "step": 3125000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.708171765732171e-05, | |
| "loss": 2.1395, | |
| "step": 3130000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_accuracy": 0.5239330050103231, | |
| "eval_loss": 2.162109375, | |
| "eval_runtime": 40.4542, | |
| "eval_samples_per_second": 88.199, | |
| "eval_steps_per_second": 11.025, | |
| "step": 3130000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.700690027464496e-05, | |
| "loss": 2.1359, | |
| "step": 3135000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_accuracy": 0.5242872453743277, | |
| "eval_loss": 2.162109375, | |
| "eval_runtime": 40.3355, | |
| "eval_samples_per_second": 88.458, | |
| "eval_steps_per_second": 11.057, | |
| "step": 3135000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.693205293505821e-05, | |
| "loss": 2.1373, | |
| "step": 3140000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_accuracy": 0.5241631379614166, | |
| "eval_loss": 2.16015625, | |
| "eval_runtime": 40.2137, | |
| "eval_samples_per_second": 88.726, | |
| "eval_steps_per_second": 11.091, | |
| "step": 3140000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.6857235552381456e-05, | |
| "loss": 2.1357, | |
| "step": 3145000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_accuracy": 0.5243412170307151, | |
| "eval_loss": 2.16015625, | |
| "eval_runtime": 40.3184, | |
| "eval_samples_per_second": 88.496, | |
| "eval_steps_per_second": 11.062, | |
| "step": 3145000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.67824181697047e-05, | |
| "loss": 2.1354, | |
| "step": 3150000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_accuracy": 0.5243636823902266, | |
| "eval_loss": 2.16015625, | |
| "eval_runtime": 40.1892, | |
| "eval_samples_per_second": 88.78, | |
| "eval_steps_per_second": 11.098, | |
| "step": 3150000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.670757083011796e-05, | |
| "loss": 2.1323, | |
| "step": 3155000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_accuracy": 0.5243839560073468, | |
| "eval_loss": 2.16015625, | |
| "eval_runtime": 40.3635, | |
| "eval_samples_per_second": 88.397, | |
| "eval_steps_per_second": 11.05, | |
| "step": 3155000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.66327534474412e-05, | |
| "loss": 2.133, | |
| "step": 3160000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_accuracy": 0.5242327257823425, | |
| "eval_loss": 2.16015625, | |
| "eval_runtime": 41.1497, | |
| "eval_samples_per_second": 86.708, | |
| "eval_steps_per_second": 10.838, | |
| "step": 3160000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.655790610785447e-05, | |
| "loss": 2.1315, | |
| "step": 3165000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_accuracy": 0.524407243270255, | |
| "eval_loss": 2.16015625, | |
| "eval_runtime": 41.4927, | |
| "eval_samples_per_second": 85.991, | |
| "eval_steps_per_second": 10.749, | |
| "step": 3165000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.6483088725177706e-05, | |
| "loss": 2.1363, | |
| "step": 3170000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_accuracy": 0.5242845056963384, | |
| "eval_loss": 2.16015625, | |
| "eval_runtime": 43.5552, | |
| "eval_samples_per_second": 81.919, | |
| "eval_steps_per_second": 10.24, | |
| "step": 3170000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.640825636404596e-05, | |
| "loss": 2.1349, | |
| "step": 3175000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_accuracy": 0.5245045018388719, | |
| "eval_loss": 2.16015625, | |
| "eval_runtime": 41.5498, | |
| "eval_samples_per_second": 85.873, | |
| "eval_steps_per_second": 10.734, | |
| "step": 3175000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.633342400291421e-05, | |
| "loss": 2.1336, | |
| "step": 3180000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_accuracy": 0.524365600164819, | |
| "eval_loss": 2.16015625, | |
| "eval_runtime": 45.093, | |
| "eval_samples_per_second": 79.125, | |
| "eval_steps_per_second": 9.891, | |
| "step": 3180000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.625860662023746e-05, | |
| "loss": 2.1364, | |
| "step": 3185000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_accuracy": 0.5243612166800363, | |
| "eval_loss": 2.158203125, | |
| "eval_runtime": 42.4027, | |
| "eval_samples_per_second": 84.146, | |
| "eval_steps_per_second": 10.518, | |
| "step": 3185000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.618375928065071e-05, | |
| "loss": 2.133, | |
| "step": 3190000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_accuracy": 0.5243256008661766, | |
| "eval_loss": 2.158203125, | |
| "eval_runtime": 40.6817, | |
| "eval_samples_per_second": 87.705, | |
| "eval_steps_per_second": 10.963, | |
| "step": 3190000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.6108941897973956e-05, | |
| "loss": 2.1349, | |
| "step": 3195000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_accuracy": 0.5245045018388719, | |
| "eval_loss": 2.158203125, | |
| "eval_runtime": 42.2213, | |
| "eval_samples_per_second": 84.507, | |
| "eval_steps_per_second": 10.563, | |
| "step": 3195000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.60341245152972e-05, | |
| "loss": 2.134, | |
| "step": 3200000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_accuracy": 0.5245702541106129, | |
| "eval_loss": 2.158203125, | |
| "eval_runtime": 41.8771, | |
| "eval_samples_per_second": 85.202, | |
| "eval_steps_per_second": 10.65, | |
| "step": 3200000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.5959277175710455e-05, | |
| "loss": 2.1308, | |
| "step": 3205000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_accuracy": 0.5249423023815473, | |
| "eval_loss": 2.15625, | |
| "eval_runtime": 42.7291, | |
| "eval_samples_per_second": 83.503, | |
| "eval_steps_per_second": 10.438, | |
| "step": 3205000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.5884444814578715e-05, | |
| "loss": 2.1302, | |
| "step": 3210000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_accuracy": 0.5246628552266481, | |
| "eval_loss": 2.15625, | |
| "eval_runtime": 41.7473, | |
| "eval_samples_per_second": 85.467, | |
| "eval_steps_per_second": 10.683, | |
| "step": 3210000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.580964241035695e-05, | |
| "loss": 2.1302, | |
| "step": 3215000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_accuracy": 0.5246708002928168, | |
| "eval_loss": 2.15625, | |
| "eval_runtime": 43.5638, | |
| "eval_samples_per_second": 81.903, | |
| "eval_steps_per_second": 10.238, | |
| "step": 3215000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.5734825027680186e-05, | |
| "loss": 2.1331, | |
| "step": 3220000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_accuracy": 0.5247650452156455, | |
| "eval_loss": 2.15625, | |
| "eval_runtime": 42.8533, | |
| "eval_samples_per_second": 83.261, | |
| "eval_steps_per_second": 10.408, | |
| "step": 3220000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.565999266654844e-05, | |
| "loss": 2.1273, | |
| "step": 3225000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_accuracy": 0.5247250459170031, | |
| "eval_loss": 2.15625, | |
| "eval_runtime": 42.4534, | |
| "eval_samples_per_second": 84.045, | |
| "eval_steps_per_second": 10.506, | |
| "step": 3225000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.5585175283871685e-05, | |
| "loss": 2.1286, | |
| "step": 3230000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_accuracy": 0.5249765483564124, | |
| "eval_loss": 2.15625, | |
| "eval_runtime": 43.0626, | |
| "eval_samples_per_second": 82.856, | |
| "eval_steps_per_second": 10.357, | |
| "step": 3230000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.551034292273994e-05, | |
| "loss": 2.1282, | |
| "step": 3235000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_accuracy": 0.525047232048534, | |
| "eval_loss": 2.154296875, | |
| "eval_runtime": 42.4424, | |
| "eval_samples_per_second": 84.067, | |
| "eval_steps_per_second": 10.508, | |
| "step": 3235000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.543552554006317e-05, | |
| "loss": 2.1309, | |
| "step": 3240000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_accuracy": 0.5250951764133451, | |
| "eval_loss": 2.154296875, | |
| "eval_runtime": 44.2507, | |
| "eval_samples_per_second": 80.631, | |
| "eval_steps_per_second": 10.079, | |
| "step": 3240000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.5360708157386415e-05, | |
| "loss": 2.1295, | |
| "step": 3245000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_accuracy": 0.5253592813715047, | |
| "eval_loss": 2.154296875, | |
| "eval_runtime": 44.2616, | |
| "eval_samples_per_second": 80.612, | |
| "eval_steps_per_second": 10.076, | |
| "step": 3245000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.528589077470966e-05, | |
| "loss": 2.1275, | |
| "step": 3250000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_accuracy": 0.5253707880190593, | |
| "eval_loss": 2.154296875, | |
| "eval_runtime": 43.0103, | |
| "eval_samples_per_second": 82.957, | |
| "eval_steps_per_second": 10.37, | |
| "step": 3250000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.5211058413577914e-05, | |
| "loss": 2.133, | |
| "step": 3255000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_accuracy": 0.5253924314751741, | |
| "eval_loss": 2.154296875, | |
| "eval_runtime": 41.4169, | |
| "eval_samples_per_second": 86.148, | |
| "eval_steps_per_second": 10.769, | |
| "step": 3255000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.513622605244616e-05, | |
| "loss": 2.1301, | |
| "step": 3260000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_accuracy": 0.5251132582880739, | |
| "eval_loss": 2.154296875, | |
| "eval_runtime": 42.1862, | |
| "eval_samples_per_second": 84.577, | |
| "eval_steps_per_second": 10.572, | |
| "step": 3260000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.5061408669769406e-05, | |
| "loss": 2.1314, | |
| "step": 3265000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_accuracy": 0.5253096931999001, | |
| "eval_loss": 2.15234375, | |
| "eval_runtime": 43.9727, | |
| "eval_samples_per_second": 81.141, | |
| "eval_steps_per_second": 10.143, | |
| "step": 3265000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.498656133018267e-05, | |
| "loss": 2.1258, | |
| "step": 3270000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_accuracy": 0.5254751697504482, | |
| "eval_loss": 2.15234375, | |
| "eval_runtime": 42.0779, | |
| "eval_samples_per_second": 84.795, | |
| "eval_steps_per_second": 10.599, | |
| "step": 3270000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.491172896905092e-05, | |
| "loss": 2.1286, | |
| "step": 3275000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_accuracy": 0.525354897886722, | |
| "eval_loss": 2.15234375, | |
| "eval_runtime": 41.8893, | |
| "eval_samples_per_second": 85.177, | |
| "eval_steps_per_second": 10.647, | |
| "step": 3275000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.4836911586374165e-05, | |
| "loss": 2.1267, | |
| "step": 3280000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_accuracy": 0.5253622950172928, | |
| "eval_loss": 2.15234375, | |
| "eval_runtime": 42.0241, | |
| "eval_samples_per_second": 84.904, | |
| "eval_steps_per_second": 10.613, | |
| "step": 3280000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.476207922524242e-05, | |
| "loss": 2.13, | |
| "step": 3285000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_accuracy": 0.5254332526772133, | |
| "eval_loss": 2.15234375, | |
| "eval_runtime": 43.4043, | |
| "eval_samples_per_second": 82.204, | |
| "eval_steps_per_second": 10.275, | |
| "step": 3285000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.4687246864110663e-05, | |
| "loss": 2.1284, | |
| "step": 3290000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_accuracy": 0.5254976351099597, | |
| "eval_loss": 2.15234375, | |
| "eval_runtime": 43.4747, | |
| "eval_samples_per_second": 82.071, | |
| "eval_steps_per_second": 10.259, | |
| "step": 3290000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.461242948143391e-05, | |
| "loss": 2.1295, | |
| "step": 3295000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_accuracy": 0.5254190063516695, | |
| "eval_loss": 2.15234375, | |
| "eval_runtime": 44.0831, | |
| "eval_samples_per_second": 80.938, | |
| "eval_steps_per_second": 10.117, | |
| "step": 3295000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.453759712030216e-05, | |
| "loss": 2.1241, | |
| "step": 3300000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_accuracy": 0.5255573600901244, | |
| "eval_loss": 2.15234375, | |
| "eval_runtime": 43.4229, | |
| "eval_samples_per_second": 82.169, | |
| "eval_steps_per_second": 10.271, | |
| "step": 3300000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.446276475917041e-05, | |
| "loss": 2.1297, | |
| "step": 3305000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_accuracy": 0.5257825616208374, | |
| "eval_loss": 2.15234375, | |
| "eval_runtime": 41.6675, | |
| "eval_samples_per_second": 85.63, | |
| "eval_steps_per_second": 10.704, | |
| "step": 3305000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.4387947376493654e-05, | |
| "loss": 2.126, | |
| "step": 3310000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_accuracy": 0.5256425640755888, | |
| "eval_loss": 2.150390625, | |
| "eval_runtime": 43.5829, | |
| "eval_samples_per_second": 81.867, | |
| "eval_steps_per_second": 10.233, | |
| "step": 3310000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.43131299938169e-05, | |
| "loss": 2.1263, | |
| "step": 3315000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_accuracy": 0.5255672229308856, | |
| "eval_loss": 2.150390625, | |
| "eval_runtime": 40.6138, | |
| "eval_samples_per_second": 87.852, | |
| "eval_steps_per_second": 10.981, | |
| "step": 3315000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.423828265423016e-05, | |
| "loss": 2.1273, | |
| "step": 3320000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_accuracy": 0.525589962258196, | |
| "eval_loss": 2.150390625, | |
| "eval_runtime": 41.1643, | |
| "eval_samples_per_second": 86.677, | |
| "eval_steps_per_second": 10.835, | |
| "step": 3320000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.4163450293098406e-05, | |
| "loss": 2.1214, | |
| "step": 3325000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.5255433877323795, | |
| "eval_loss": 2.150390625, | |
| "eval_runtime": 40.8274, | |
| "eval_samples_per_second": 87.392, | |
| "eval_steps_per_second": 10.924, | |
| "step": 3325000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.408863291042165e-05, | |
| "loss": 2.1275, | |
| "step": 3330000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.5255872225802068, | |
| "eval_loss": 2.150390625, | |
| "eval_runtime": 42.63, | |
| "eval_samples_per_second": 83.697, | |
| "eval_steps_per_second": 10.462, | |
| "step": 3330000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.40138155277449e-05, | |
| "loss": 2.1227, | |
| "step": 3335000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.5258348894704312, | |
| "eval_loss": 2.150390625, | |
| "eval_runtime": 42.4569, | |
| "eval_samples_per_second": 84.038, | |
| "eval_steps_per_second": 10.505, | |
| "step": 3335000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 3338128, | |
| "total_flos": 1.0872066371139498e+21, | |
| "train_loss": 0.2576859601399347, | |
| "train_runtime": 150388.7494, | |
| "train_samples_per_second": 177.573, | |
| "train_steps_per_second": 22.197 | |
| } | |
| ], | |
| "max_steps": 3338128, | |
| "num_train_epochs": 1, | |
| "total_flos": 1.0872066371139498e+21, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |