Nicolas-BZRD commited on
Commit
870fbf5
Β·
verified Β·
1 Parent(s): 2918494

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ tags:
4
+ - generated_from_trainer
5
+ model-index:
6
+ - name: lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-4b/0
7
+ results: []
8
+ ---
9
+
10
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
11
+ should probably proofread and complete it, then remove this comment. -->
12
+
13
+ [<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
14
+ <details><summary>See axolotl config</summary>
15
+
16
+ axolotl version: `0.12.2`
17
+ ```yaml
18
+ base_model: /lustre/fswork/projects/rech/qwv/udv55np/Gemma/base/gemma-3-4b
19
+
20
+ datasets:
21
+ - path: /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking
22
+ ds_type: json
23
+ type: chat_template
24
+ field_messages: conversations
25
+ data_files:
26
+ - /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0007.jsonl
27
+ - /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0009.jsonl
28
+ - /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0005.jsonl
29
+ - /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0006.jsonl
30
+ - /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0014.jsonl
31
+ - /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0010.jsonl
32
+ - /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0012.jsonl
33
+ - /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0008.jsonl
34
+ - /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0001.jsonl
35
+ - /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0002.jsonl
36
+ - /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0013.jsonl
37
+ - /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0015.jsonl
38
+ - /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0004.jsonl
39
+ - /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0011.jsonl
40
+ - /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0000.jsonl
41
+ - /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0003.jsonl
42
+
43
+ dataset_prepared_path: /lustre/fswork/projects/rech/dgo/udv55np/dataset_gemma/Nemotron-Super-49B-v1_5/split_0
44
+ tokenizer_config: "/lustre/fswork/projects/rech/qwv/udv55np/Gemma/base/gemma-3-27b"
45
+ chat_template: gemma3
46
+ eot_tokens:
47
+ - "<end_of_turn>"
48
+
49
+ shuffle_merged_datasets: true
50
+ output_dir: /lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-4b/0
51
+
52
+ sequence_len: 16384
53
+ sample_packing: true
54
+
55
+ gradient_accumulation_steps: 1
56
+ micro_batch_size: 1
57
+ num_epochs: 0.6
58
+ auto_resume_from_checkpoints: true
59
+
60
+ optimizer: adamw_torch_fused
61
+ lr_scheduler: warmup_stable_decay
62
+ learning_rate: 5e-6
63
+ lr_scheduler_kwargs:
64
+ num_decay_steps: 200
65
+ min_lr_ratio: 0.1
66
+ warmup_steps: 100
67
+
68
+ bf16: true
69
+ tf32: false
70
+
71
+ gradient_checkpointing: true
72
+ logging_steps: 10
73
+ flash_attention: true
74
+
75
+ evals_per_epoch: 0
76
+ saves_per_epoch: 1
77
+ save_total_limit: 20
78
+ save_only_model: true
79
+
80
+ use_tensorboard: true
81
+ deepspeed: /lustre/fswork/projects/rech/qwv/udv55np/axolotl/zero3.json
82
+
83
+ ```
84
+
85
+ </details><br>
86
+
87
+ # lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-4b/0
88
+
89
+ This model was trained from scratch on the None dataset.
90
+
91
+ ## Model description
92
+
93
+ More information needed
94
+
95
+ ## Intended uses & limitations
96
+
97
+ More information needed
98
+
99
+ ## Training and evaluation data
100
+
101
+ More information needed
102
+
103
+ ## Training procedure
104
+
105
+ ### Training hyperparameters
106
+
107
+ The following hyperparameters were used during training:
108
+ - learning_rate: 5e-06
109
+ - train_batch_size: 1
110
+ - eval_batch_size: 1
111
+ - seed: 42
112
+ - distributed_type: multi-GPU
113
+ - num_devices: 16
114
+ - total_train_batch_size: 16
115
+ - total_eval_batch_size: 16
116
+ - optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
117
+ - lr_scheduler_type: warmup_stable_decay
118
+ - lr_scheduler_warmup_steps: 100
119
+ - training_steps: 711
120
+
121
+ ### Training results
122
+
123
+
124
+
125
+ ### Framework versions
126
+
127
+ - Transformers 4.55.2
128
+ - Pytorch 2.6.0+cu124
129
+ - Datasets 4.0.0
130
+ - Tokenizers 0.21.1
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<image_soft_token>": 262144
3
+ }
chat_template.jinja ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{ bos_token }}
2
+ {%- if messages[0]['role'] == 'system' -%}
3
+ {%- if messages[0]['content'] is string -%}
4
+ {%- set first_user_prefix = messages[0]['content'] + '
5
+
6
+ ' -%}
7
+ {%- else -%}
8
+ {%- set first_user_prefix = messages[0]['content'][0]['text'] + '
9
+
10
+ ' -%}
11
+ {%- endif -%}
12
+ {%- set loop_messages = messages[1:] -%}
13
+ {%- else -%}
14
+ {%- set first_user_prefix = "" -%}
15
+ {%- set loop_messages = messages -%}
16
+ {%- endif -%}
17
+ {%- for message in loop_messages -%}
18
+ {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
19
+ {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
20
+ {%- endif -%}
21
+ {%- if (message['role'] == 'assistant') -%}
22
+ {%- set role = "model" -%}
23
+ {%- else -%}
24
+ {%- set role = message['role'] -%}
25
+ {%- endif -%}
26
+ {{ '<start_of_turn>' + role + '
27
+ ' + (first_user_prefix if loop.first else "") }}
28
+ {%- if message['content'] is string -%}
29
+ {{ message['content'] | trim }}
30
+ {%- elif message['content'] is iterable -%}
31
+ {%- for item in message['content'] -%}
32
+ {%- if item['type'] == 'image' -%}
33
+ {{ '<start_of_image>' }}
34
+ {%- elif item['type'] == 'text' -%}
35
+ {{ item['text'] | trim }}
36
+ {%- endif -%}
37
+ {%- endfor -%}
38
+ {%- else -%}
39
+ {{ raise_exception("Invalid content type") }}
40
+ {%- endif -%}
41
+ {{ '<end_of_turn>
42
+ ' }}
43
+ {%- endfor -%}
44
+ {%- if add_generation_prompt -%}
45
+ {{'<start_of_turn>model
46
+ '}}
47
+ {%- endif -%}
config.json ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Gemma3ForConditionalGeneration"
4
+ ],
5
+ "boi_token_index": 255999,
6
+ "eoi_token_index": 256000,
7
+ "image_token_index": 262144,
8
+ "initializer_range": 0.02,
9
+ "mm_tokens_per_image": 256,
10
+ "model_type": "gemma3",
11
+ "text_config": {
12
+ "_sliding_window_pattern": 6,
13
+ "attention_bias": false,
14
+ "attention_dropout": 0.0,
15
+ "attn_logit_softcapping": null,
16
+ "final_logit_softcapping": null,
17
+ "head_dim": 256,
18
+ "hidden_activation": "gelu_pytorch_tanh",
19
+ "hidden_size": 2560,
20
+ "initializer_range": 0.02,
21
+ "intermediate_size": 10240,
22
+ "layer_types": [
23
+ "sliding_attention",
24
+ "sliding_attention",
25
+ "sliding_attention",
26
+ "sliding_attention",
27
+ "sliding_attention",
28
+ "full_attention",
29
+ "sliding_attention",
30
+ "sliding_attention",
31
+ "sliding_attention",
32
+ "sliding_attention",
33
+ "sliding_attention",
34
+ "full_attention",
35
+ "sliding_attention",
36
+ "sliding_attention",
37
+ "sliding_attention",
38
+ "sliding_attention",
39
+ "sliding_attention",
40
+ "full_attention",
41
+ "sliding_attention",
42
+ "sliding_attention",
43
+ "sliding_attention",
44
+ "sliding_attention",
45
+ "sliding_attention",
46
+ "full_attention",
47
+ "sliding_attention",
48
+ "sliding_attention",
49
+ "sliding_attention",
50
+ "sliding_attention",
51
+ "sliding_attention",
52
+ "full_attention",
53
+ "sliding_attention",
54
+ "sliding_attention",
55
+ "sliding_attention",
56
+ "sliding_attention"
57
+ ],
58
+ "max_position_embeddings": 131072,
59
+ "model_type": "gemma3_text",
60
+ "num_attention_heads": 8,
61
+ "num_hidden_layers": 34,
62
+ "num_key_value_heads": 4,
63
+ "query_pre_attn_scalar": 256,
64
+ "rms_norm_eps": 1e-06,
65
+ "rope_local_base_freq": 10000.0,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "rope_type": "linear"
69
+ },
70
+ "rope_theta": 1000000.0,
71
+ "sliding_window": 1024,
72
+ "torch_dtype": "bfloat16",
73
+ "use_cache": false,
74
+ "vocab_size": 262208
75
+ },
76
+ "torch_dtype": "bfloat16",
77
+ "transformers_version": "4.55.2",
78
+ "vision_config": {
79
+ "attention_dropout": 0.0,
80
+ "hidden_act": "gelu_pytorch_tanh",
81
+ "hidden_size": 1152,
82
+ "image_size": 896,
83
+ "intermediate_size": 4304,
84
+ "layer_norm_eps": 1e-06,
85
+ "model_type": "siglip_vision_model",
86
+ "num_attention_heads": 16,
87
+ "num_channels": 3,
88
+ "num_hidden_layers": 27,
89
+ "patch_size": 14,
90
+ "torch_dtype": "bfloat16",
91
+ "vision_use_head": false
92
+ }
93
+ }
generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 2,
3
+ "cache_implementation": "hybrid",
4
+ "do_sample": true,
5
+ "eos_token_id": [
6
+ 1,
7
+ 106
8
+ ],
9
+ "pad_token_id": 0,
10
+ "top_k": 64,
11
+ "top_p": 0.95,
12
+ "transformers_version": "4.55.2"
13
+ }
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeeda08d9f2f24cd261c299eee1899e67b71ba65b535ddc5979e9bdabfc7b401
3
+ size 4961251752
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cdd866c965026d81e2e0debf50e346712ead4809d87b7a46b56eed1ead1da99
3
+ size 3639026128
model.safetensors.index.json ADDED
@@ -0,0 +1,891 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_parameters": 768880,
4
+ "total_size": 8600158944
5
+ },
6
+ "weight_map": {
7
+ "language_model.model.embed_tokens.weight": "model-00001-of-00002.safetensors",
8
+ "language_model.model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
9
+ "language_model.model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
10
+ "language_model.model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
11
+ "language_model.model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
12
+ "language_model.model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
13
+ "language_model.model.layers.0.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
14
+ "language_model.model.layers.0.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
15
+ "language_model.model.layers.0.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
16
+ "language_model.model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
17
+ "language_model.model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
18
+ "language_model.model.layers.0.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
19
+ "language_model.model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
20
+ "language_model.model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
21
+ "language_model.model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
22
+ "language_model.model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
23
+ "language_model.model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
24
+ "language_model.model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
25
+ "language_model.model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
26
+ "language_model.model.layers.1.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
27
+ "language_model.model.layers.1.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
28
+ "language_model.model.layers.1.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
29
+ "language_model.model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
30
+ "language_model.model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
31
+ "language_model.model.layers.1.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
32
+ "language_model.model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
33
+ "language_model.model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
34
+ "language_model.model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
35
+ "language_model.model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
36
+ "language_model.model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
37
+ "language_model.model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
38
+ "language_model.model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
39
+ "language_model.model.layers.10.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
40
+ "language_model.model.layers.10.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
41
+ "language_model.model.layers.10.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
42
+ "language_model.model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
43
+ "language_model.model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
44
+ "language_model.model.layers.10.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
45
+ "language_model.model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
46
+ "language_model.model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
47
+ "language_model.model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
48
+ "language_model.model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
49
+ "language_model.model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
50
+ "language_model.model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
51
+ "language_model.model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
52
+ "language_model.model.layers.11.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
53
+ "language_model.model.layers.11.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
54
+ "language_model.model.layers.11.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
55
+ "language_model.model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
56
+ "language_model.model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
57
+ "language_model.model.layers.11.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
58
+ "language_model.model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
59
+ "language_model.model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
60
+ "language_model.model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
61
+ "language_model.model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
62
+ "language_model.model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
63
+ "language_model.model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
64
+ "language_model.model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
65
+ "language_model.model.layers.12.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
66
+ "language_model.model.layers.12.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
67
+ "language_model.model.layers.12.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
68
+ "language_model.model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
69
+ "language_model.model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
70
+ "language_model.model.layers.12.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
71
+ "language_model.model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
72
+ "language_model.model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
73
+ "language_model.model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
74
+ "language_model.model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
75
+ "language_model.model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
76
+ "language_model.model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
77
+ "language_model.model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
78
+ "language_model.model.layers.13.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
79
+ "language_model.model.layers.13.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
80
+ "language_model.model.layers.13.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
81
+ "language_model.model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
82
+ "language_model.model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
83
+ "language_model.model.layers.13.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
84
+ "language_model.model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
85
+ "language_model.model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
86
+ "language_model.model.layers.14.input_layernorm.weight": "model-00002-of-00002.safetensors",
87
+ "language_model.model.layers.14.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
88
+ "language_model.model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
89
+ "language_model.model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
90
+ "language_model.model.layers.14.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
91
+ "language_model.model.layers.14.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
92
+ "language_model.model.layers.14.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
93
+ "language_model.model.layers.14.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
94
+ "language_model.model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
95
+ "language_model.model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
96
+ "language_model.model.layers.14.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
97
+ "language_model.model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
98
+ "language_model.model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
99
+ "language_model.model.layers.15.input_layernorm.weight": "model-00002-of-00002.safetensors",
100
+ "language_model.model.layers.15.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
101
+ "language_model.model.layers.15.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
102
+ "language_model.model.layers.15.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
103
+ "language_model.model.layers.15.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
104
+ "language_model.model.layers.15.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
105
+ "language_model.model.layers.15.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
106
+ "language_model.model.layers.15.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
107
+ "language_model.model.layers.15.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
108
+ "language_model.model.layers.15.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
109
+ "language_model.model.layers.15.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
110
+ "language_model.model.layers.15.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
111
+ "language_model.model.layers.15.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
112
+ "language_model.model.layers.16.input_layernorm.weight": "model-00002-of-00002.safetensors",
113
+ "language_model.model.layers.16.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
114
+ "language_model.model.layers.16.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
115
+ "language_model.model.layers.16.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
116
+ "language_model.model.layers.16.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
117
+ "language_model.model.layers.16.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
118
+ "language_model.model.layers.16.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
119
+ "language_model.model.layers.16.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
120
+ "language_model.model.layers.16.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
121
+ "language_model.model.layers.16.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
122
+ "language_model.model.layers.16.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
123
+ "language_model.model.layers.16.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
124
+ "language_model.model.layers.16.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
125
+ "language_model.model.layers.17.input_layernorm.weight": "model-00002-of-00002.safetensors",
126
+ "language_model.model.layers.17.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
127
+ "language_model.model.layers.17.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
128
+ "language_model.model.layers.17.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
129
+ "language_model.model.layers.17.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
130
+ "language_model.model.layers.17.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
131
+ "language_model.model.layers.17.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
132
+ "language_model.model.layers.17.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
133
+ "language_model.model.layers.17.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
134
+ "language_model.model.layers.17.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
135
+ "language_model.model.layers.17.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
136
+ "language_model.model.layers.17.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
137
+ "language_model.model.layers.17.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
138
+ "language_model.model.layers.18.input_layernorm.weight": "model-00002-of-00002.safetensors",
139
+ "language_model.model.layers.18.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
140
+ "language_model.model.layers.18.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
141
+ "language_model.model.layers.18.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
142
+ "language_model.model.layers.18.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
143
+ "language_model.model.layers.18.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
144
+ "language_model.model.layers.18.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
145
+ "language_model.model.layers.18.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
146
+ "language_model.model.layers.18.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
147
+ "language_model.model.layers.18.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
148
+ "language_model.model.layers.18.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
149
+ "language_model.model.layers.18.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
150
+ "language_model.model.layers.18.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
151
+ "language_model.model.layers.19.input_layernorm.weight": "model-00002-of-00002.safetensors",
152
+ "language_model.model.layers.19.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
153
+ "language_model.model.layers.19.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
154
+ "language_model.model.layers.19.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
155
+ "language_model.model.layers.19.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
156
+ "language_model.model.layers.19.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
157
+ "language_model.model.layers.19.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
158
+ "language_model.model.layers.19.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
159
+ "language_model.model.layers.19.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
160
+ "language_model.model.layers.19.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
161
+ "language_model.model.layers.19.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
162
+ "language_model.model.layers.19.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
163
+ "language_model.model.layers.19.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
164
+ "language_model.model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
165
+ "language_model.model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
166
+ "language_model.model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
167
+ "language_model.model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
168
+ "language_model.model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
169
+ "language_model.model.layers.2.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
170
+ "language_model.model.layers.2.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
171
+ "language_model.model.layers.2.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
172
+ "language_model.model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
173
+ "language_model.model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
174
+ "language_model.model.layers.2.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
175
+ "language_model.model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
176
+ "language_model.model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
177
+ "language_model.model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
178
+ "language_model.model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
179
+ "language_model.model.layers.20.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
180
+ "language_model.model.layers.20.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
181
+ "language_model.model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
182
+ "language_model.model.layers.20.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
183
+ "language_model.model.layers.20.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
184
+ "language_model.model.layers.20.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
185
+ "language_model.model.layers.20.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
186
+ "language_model.model.layers.20.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
187
+ "language_model.model.layers.20.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
188
+ "language_model.model.layers.20.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
189
+ "language_model.model.layers.20.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
190
+ "language_model.model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
191
+ "language_model.model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
192
+ "language_model.model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
193
+ "language_model.model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
194
+ "language_model.model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
195
+ "language_model.model.layers.21.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
196
+ "language_model.model.layers.21.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
197
+ "language_model.model.layers.21.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
198
+ "language_model.model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
199
+ "language_model.model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
200
+ "language_model.model.layers.21.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
201
+ "language_model.model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
202
+ "language_model.model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
203
+ "language_model.model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
204
+ "language_model.model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
205
+ "language_model.model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
206
+ "language_model.model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
207
+ "language_model.model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
208
+ "language_model.model.layers.22.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
209
+ "language_model.model.layers.22.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
210
+ "language_model.model.layers.22.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
211
+ "language_model.model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
212
+ "language_model.model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
213
+ "language_model.model.layers.22.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
214
+ "language_model.model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
215
+ "language_model.model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
216
+ "language_model.model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
217
+ "language_model.model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
218
+ "language_model.model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
219
+ "language_model.model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
220
+ "language_model.model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
221
+ "language_model.model.layers.23.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
222
+ "language_model.model.layers.23.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
223
+ "language_model.model.layers.23.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
224
+ "language_model.model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
225
+ "language_model.model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
226
+ "language_model.model.layers.23.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
227
+ "language_model.model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
228
+ "language_model.model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
229
+ "language_model.model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
230
+ "language_model.model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
231
+ "language_model.model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
232
+ "language_model.model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
233
+ "language_model.model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
234
+ "language_model.model.layers.24.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
235
+ "language_model.model.layers.24.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
236
+ "language_model.model.layers.24.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
237
+ "language_model.model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
238
+ "language_model.model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
239
+ "language_model.model.layers.24.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
240
+ "language_model.model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
241
+ "language_model.model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
242
+ "language_model.model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
243
+ "language_model.model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
244
+ "language_model.model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
245
+ "language_model.model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
246
+ "language_model.model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
247
+ "language_model.model.layers.25.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
248
+ "language_model.model.layers.25.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
249
+ "language_model.model.layers.25.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
250
+ "language_model.model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
251
+ "language_model.model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
252
+ "language_model.model.layers.25.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
253
+ "language_model.model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
254
+ "language_model.model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
255
+ "language_model.model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
256
+ "language_model.model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
257
+ "language_model.model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
258
+ "language_model.model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
259
+ "language_model.model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
260
+ "language_model.model.layers.26.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
261
+ "language_model.model.layers.26.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
262
+ "language_model.model.layers.26.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
263
+ "language_model.model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
264
+ "language_model.model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
265
+ "language_model.model.layers.26.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
266
+ "language_model.model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
267
+ "language_model.model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
268
+ "language_model.model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
269
+ "language_model.model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
270
+ "language_model.model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
271
+ "language_model.model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
272
+ "language_model.model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
273
+ "language_model.model.layers.27.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
274
+ "language_model.model.layers.27.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
275
+ "language_model.model.layers.27.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
276
+ "language_model.model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
277
+ "language_model.model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
278
+ "language_model.model.layers.27.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
279
+ "language_model.model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
280
+ "language_model.model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
281
+ "language_model.model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors",
282
+ "language_model.model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
283
+ "language_model.model.layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
284
+ "language_model.model.layers.28.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
285
+ "language_model.model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
286
+ "language_model.model.layers.28.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
287
+ "language_model.model.layers.28.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
288
+ "language_model.model.layers.28.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
289
+ "language_model.model.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
290
+ "language_model.model.layers.28.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
291
+ "language_model.model.layers.28.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
292
+ "language_model.model.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
293
+ "language_model.model.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
294
+ "language_model.model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors",
295
+ "language_model.model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
296
+ "language_model.model.layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
297
+ "language_model.model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
298
+ "language_model.model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
299
+ "language_model.model.layers.29.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
300
+ "language_model.model.layers.29.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
301
+ "language_model.model.layers.29.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
302
+ "language_model.model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
303
+ "language_model.model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
304
+ "language_model.model.layers.29.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
305
+ "language_model.model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
306
+ "language_model.model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
307
+ "language_model.model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
308
+ "language_model.model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
309
+ "language_model.model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
310
+ "language_model.model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
311
+ "language_model.model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
312
+ "language_model.model.layers.3.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
313
+ "language_model.model.layers.3.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
314
+ "language_model.model.layers.3.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
315
+ "language_model.model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
316
+ "language_model.model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
317
+ "language_model.model.layers.3.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
318
+ "language_model.model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
319
+ "language_model.model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
320
+ "language_model.model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors",
321
+ "language_model.model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
322
+ "language_model.model.layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
323
+ "language_model.model.layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
324
+ "language_model.model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
325
+ "language_model.model.layers.30.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
326
+ "language_model.model.layers.30.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
327
+ "language_model.model.layers.30.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
328
+ "language_model.model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
329
+ "language_model.model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
330
+ "language_model.model.layers.30.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
331
+ "language_model.model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
332
+ "language_model.model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
333
+ "language_model.model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
334
+ "language_model.model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
335
+ "language_model.model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
336
+ "language_model.model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
337
+ "language_model.model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
338
+ "language_model.model.layers.31.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
339
+ "language_model.model.layers.31.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
340
+ "language_model.model.layers.31.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
341
+ "language_model.model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
342
+ "language_model.model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
343
+ "language_model.model.layers.31.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
344
+ "language_model.model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
345
+ "language_model.model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
346
+ "language_model.model.layers.32.input_layernorm.weight": "model-00002-of-00002.safetensors",
347
+ "language_model.model.layers.32.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
348
+ "language_model.model.layers.32.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
349
+ "language_model.model.layers.32.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
350
+ "language_model.model.layers.32.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
351
+ "language_model.model.layers.32.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
352
+ "language_model.model.layers.32.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
353
+ "language_model.model.layers.32.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
354
+ "language_model.model.layers.32.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
355
+ "language_model.model.layers.32.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
356
+ "language_model.model.layers.32.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
357
+ "language_model.model.layers.32.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
358
+ "language_model.model.layers.32.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
359
+ "language_model.model.layers.33.input_layernorm.weight": "model-00002-of-00002.safetensors",
360
+ "language_model.model.layers.33.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
361
+ "language_model.model.layers.33.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
362
+ "language_model.model.layers.33.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
363
+ "language_model.model.layers.33.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
364
+ "language_model.model.layers.33.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
365
+ "language_model.model.layers.33.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
366
+ "language_model.model.layers.33.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
367
+ "language_model.model.layers.33.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
368
+ "language_model.model.layers.33.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
369
+ "language_model.model.layers.33.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
370
+ "language_model.model.layers.33.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
371
+ "language_model.model.layers.33.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
372
+ "language_model.model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
373
+ "language_model.model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
374
+ "language_model.model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
375
+ "language_model.model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
376
+ "language_model.model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
377
+ "language_model.model.layers.4.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
378
+ "language_model.model.layers.4.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
379
+ "language_model.model.layers.4.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
380
+ "language_model.model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
381
+ "language_model.model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
382
+ "language_model.model.layers.4.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
383
+ "language_model.model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
384
+ "language_model.model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
385
+ "language_model.model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
386
+ "language_model.model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
387
+ "language_model.model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
388
+ "language_model.model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
389
+ "language_model.model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
390
+ "language_model.model.layers.5.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
391
+ "language_model.model.layers.5.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
392
+ "language_model.model.layers.5.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
393
+ "language_model.model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
394
+ "language_model.model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
395
+ "language_model.model.layers.5.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
396
+ "language_model.model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
397
+ "language_model.model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
398
+ "language_model.model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
399
+ "language_model.model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
400
+ "language_model.model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
401
+ "language_model.model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
402
+ "language_model.model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
403
+ "language_model.model.layers.6.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
404
+ "language_model.model.layers.6.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
405
+ "language_model.model.layers.6.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
406
+ "language_model.model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
407
+ "language_model.model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
408
+ "language_model.model.layers.6.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
409
+ "language_model.model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
410
+ "language_model.model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
411
+ "language_model.model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
412
+ "language_model.model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
413
+ "language_model.model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
414
+ "language_model.model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
415
+ "language_model.model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
416
+ "language_model.model.layers.7.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
417
+ "language_model.model.layers.7.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
418
+ "language_model.model.layers.7.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
419
+ "language_model.model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
420
+ "language_model.model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
421
+ "language_model.model.layers.7.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
422
+ "language_model.model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
423
+ "language_model.model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
424
+ "language_model.model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
425
+ "language_model.model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
426
+ "language_model.model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
427
+ "language_model.model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
428
+ "language_model.model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
429
+ "language_model.model.layers.8.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
430
+ "language_model.model.layers.8.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
431
+ "language_model.model.layers.8.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
432
+ "language_model.model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
433
+ "language_model.model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
434
+ "language_model.model.layers.8.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
435
+ "language_model.model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
436
+ "language_model.model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
437
+ "language_model.model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
438
+ "language_model.model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
439
+ "language_model.model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
440
+ "language_model.model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
441
+ "language_model.model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
442
+ "language_model.model.layers.9.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
443
+ "language_model.model.layers.9.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
444
+ "language_model.model.layers.9.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
445
+ "language_model.model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
446
+ "language_model.model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
447
+ "language_model.model.layers.9.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
448
+ "language_model.model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
449
+ "language_model.model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
450
+ "language_model.model.norm.weight": "model-00002-of-00002.safetensors",
451
+ "multi_modal_projector.mm_input_projection_weight": "model-00001-of-00002.safetensors",
452
+ "multi_modal_projector.mm_soft_emb_norm.weight": "model-00001-of-00002.safetensors",
453
+ "vision_tower.vision_model.embeddings.patch_embedding.bias": "model-00001-of-00002.safetensors",
454
+ "vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00001-of-00002.safetensors",
455
+ "vision_tower.vision_model.embeddings.position_embedding.weight": "model-00001-of-00002.safetensors",
456
+ "vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00001-of-00002.safetensors",
457
+ "vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00001-of-00002.safetensors",
458
+ "vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00001-of-00002.safetensors",
459
+ "vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00001-of-00002.safetensors",
460
+ "vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias": "model-00001-of-00002.safetensors",
461
+ "vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight": "model-00001-of-00002.safetensors",
462
+ "vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias": "model-00001-of-00002.safetensors",
463
+ "vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight": "model-00001-of-00002.safetensors",
464
+ "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
465
+ "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
466
+ "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
467
+ "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
468
+ "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
469
+ "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
470
+ "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
471
+ "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
472
+ "vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00001-of-00002.safetensors",
473
+ "vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00001-of-00002.safetensors",
474
+ "vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00001-of-00002.safetensors",
475
+ "vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00001-of-00002.safetensors",
476
+ "vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias": "model-00001-of-00002.safetensors",
477
+ "vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight": "model-00001-of-00002.safetensors",
478
+ "vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias": "model-00001-of-00002.safetensors",
479
+ "vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight": "model-00001-of-00002.safetensors",
480
+ "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
481
+ "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
482
+ "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
483
+ "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
484
+ "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
485
+ "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
486
+ "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
487
+ "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
488
+ "vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00001-of-00002.safetensors",
489
+ "vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00001-of-00002.safetensors",
490
+ "vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00001-of-00002.safetensors",
491
+ "vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00001-of-00002.safetensors",
492
+ "vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias": "model-00001-of-00002.safetensors",
493
+ "vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight": "model-00001-of-00002.safetensors",
494
+ "vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias": "model-00001-of-00002.safetensors",
495
+ "vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight": "model-00001-of-00002.safetensors",
496
+ "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
497
+ "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
498
+ "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
499
+ "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
500
+ "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
501
+ "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
502
+ "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
503
+ "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
504
+ "vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00001-of-00002.safetensors",
505
+ "vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00001-of-00002.safetensors",
506
+ "vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00001-of-00002.safetensors",
507
+ "vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00001-of-00002.safetensors",
508
+ "vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias": "model-00001-of-00002.safetensors",
509
+ "vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight": "model-00001-of-00002.safetensors",
510
+ "vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias": "model-00001-of-00002.safetensors",
511
+ "vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight": "model-00001-of-00002.safetensors",
512
+ "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
513
+ "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
514
+ "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
515
+ "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
516
+ "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
517
+ "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
518
+ "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
519
+ "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
520
+ "vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00001-of-00002.safetensors",
521
+ "vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00001-of-00002.safetensors",
522
+ "vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00001-of-00002.safetensors",
523
+ "vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00001-of-00002.safetensors",
524
+ "vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias": "model-00001-of-00002.safetensors",
525
+ "vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight": "model-00001-of-00002.safetensors",
526
+ "vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias": "model-00001-of-00002.safetensors",
527
+ "vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight": "model-00001-of-00002.safetensors",
528
+ "vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
529
+ "vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
530
+ "vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
531
+ "vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
532
+ "vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
533
+ "vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
534
+ "vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
535
+ "vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
536
+ "vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00001-of-00002.safetensors",
537
+ "vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00001-of-00002.safetensors",
538
+ "vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00001-of-00002.safetensors",
539
+ "vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00001-of-00002.safetensors",
540
+ "vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias": "model-00001-of-00002.safetensors",
541
+ "vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight": "model-00001-of-00002.safetensors",
542
+ "vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias": "model-00001-of-00002.safetensors",
543
+ "vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight": "model-00001-of-00002.safetensors",
544
+ "vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
545
+ "vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
546
+ "vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
547
+ "vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
548
+ "vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
549
+ "vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
550
+ "vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
551
+ "vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
552
+ "vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00001-of-00002.safetensors",
553
+ "vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00001-of-00002.safetensors",
554
+ "vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00001-of-00002.safetensors",
555
+ "vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00001-of-00002.safetensors",
556
+ "vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias": "model-00001-of-00002.safetensors",
557
+ "vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight": "model-00001-of-00002.safetensors",
558
+ "vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias": "model-00001-of-00002.safetensors",
559
+ "vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight": "model-00001-of-00002.safetensors",
560
+ "vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
561
+ "vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
562
+ "vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
563
+ "vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
564
+ "vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
565
+ "vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
566
+ "vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
567
+ "vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
568
+ "vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00001-of-00002.safetensors",
569
+ "vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00001-of-00002.safetensors",
570
+ "vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00001-of-00002.safetensors",
571
+ "vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00001-of-00002.safetensors",
572
+ "vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias": "model-00001-of-00002.safetensors",
573
+ "vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight": "model-00001-of-00002.safetensors",
574
+ "vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias": "model-00001-of-00002.safetensors",
575
+ "vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight": "model-00001-of-00002.safetensors",
576
+ "vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
577
+ "vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
578
+ "vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
579
+ "vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
580
+ "vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
581
+ "vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
582
+ "vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
583
+ "vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
584
+ "vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00001-of-00002.safetensors",
585
+ "vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00001-of-00002.safetensors",
586
+ "vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00001-of-00002.safetensors",
587
+ "vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00001-of-00002.safetensors",
588
+ "vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias": "model-00001-of-00002.safetensors",
589
+ "vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight": "model-00001-of-00002.safetensors",
590
+ "vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias": "model-00001-of-00002.safetensors",
591
+ "vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight": "model-00001-of-00002.safetensors",
592
+ "vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
593
+ "vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
594
+ "vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
595
+ "vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
596
+ "vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
597
+ "vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
598
+ "vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
599
+ "vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
600
+ "vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00001-of-00002.safetensors",
601
+ "vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00001-of-00002.safetensors",
602
+ "vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00001-of-00002.safetensors",
603
+ "vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00001-of-00002.safetensors",
604
+ "vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias": "model-00001-of-00002.safetensors",
605
+ "vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight": "model-00001-of-00002.safetensors",
606
+ "vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias": "model-00001-of-00002.safetensors",
607
+ "vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight": "model-00001-of-00002.safetensors",
608
+ "vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
609
+ "vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
610
+ "vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
611
+ "vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
612
+ "vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
613
+ "vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
614
+ "vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
615
+ "vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
616
+ "vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00001-of-00002.safetensors",
617
+ "vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00001-of-00002.safetensors",
618
+ "vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00001-of-00002.safetensors",
619
+ "vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00001-of-00002.safetensors",
620
+ "vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias": "model-00001-of-00002.safetensors",
621
+ "vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight": "model-00001-of-00002.safetensors",
622
+ "vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias": "model-00001-of-00002.safetensors",
623
+ "vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight": "model-00001-of-00002.safetensors",
624
+ "vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
625
+ "vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
626
+ "vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
627
+ "vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
628
+ "vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
629
+ "vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
630
+ "vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
631
+ "vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
632
+ "vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00001-of-00002.safetensors",
633
+ "vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00001-of-00002.safetensors",
634
+ "vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00001-of-00002.safetensors",
635
+ "vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00001-of-00002.safetensors",
636
+ "vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias": "model-00001-of-00002.safetensors",
637
+ "vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight": "model-00001-of-00002.safetensors",
638
+ "vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias": "model-00001-of-00002.safetensors",
639
+ "vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight": "model-00001-of-00002.safetensors",
640
+ "vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
641
+ "vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
642
+ "vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
643
+ "vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
644
+ "vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
645
+ "vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
646
+ "vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
647
+ "vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
648
+ "vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00001-of-00002.safetensors",
649
+ "vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00001-of-00002.safetensors",
650
+ "vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00001-of-00002.safetensors",
651
+ "vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00001-of-00002.safetensors",
652
+ "vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias": "model-00001-of-00002.safetensors",
653
+ "vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight": "model-00001-of-00002.safetensors",
654
+ "vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias": "model-00001-of-00002.safetensors",
655
+ "vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight": "model-00001-of-00002.safetensors",
656
+ "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
657
+ "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
658
+ "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
659
+ "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
660
+ "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
661
+ "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
662
+ "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
663
+ "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
664
+ "vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00001-of-00002.safetensors",
665
+ "vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00001-of-00002.safetensors",
666
+ "vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00001-of-00002.safetensors",
667
+ "vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00001-of-00002.safetensors",
668
+ "vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias": "model-00001-of-00002.safetensors",
669
+ "vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight": "model-00001-of-00002.safetensors",
670
+ "vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias": "model-00001-of-00002.safetensors",
671
+ "vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight": "model-00001-of-00002.safetensors",
672
+ "vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
673
+ "vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
674
+ "vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
675
+ "vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
676
+ "vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
677
+ "vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
678
+ "vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
679
+ "vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
680
+ "vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00001-of-00002.safetensors",
681
+ "vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00001-of-00002.safetensors",
682
+ "vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00001-of-00002.safetensors",
683
+ "vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00001-of-00002.safetensors",
684
+ "vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias": "model-00001-of-00002.safetensors",
685
+ "vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight": "model-00001-of-00002.safetensors",
686
+ "vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias": "model-00001-of-00002.safetensors",
687
+ "vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight": "model-00001-of-00002.safetensors",
688
+ "vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
689
+ "vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
690
+ "vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
691
+ "vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
692
+ "vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
693
+ "vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
694
+ "vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
695
+ "vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
696
+ "vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00001-of-00002.safetensors",
697
+ "vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00001-of-00002.safetensors",
698
+ "vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00001-of-00002.safetensors",
699
+ "vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00001-of-00002.safetensors",
700
+ "vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias": "model-00001-of-00002.safetensors",
701
+ "vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight": "model-00001-of-00002.safetensors",
702
+ "vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias": "model-00001-of-00002.safetensors",
703
+ "vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight": "model-00001-of-00002.safetensors",
704
+ "vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
705
+ "vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
706
+ "vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
707
+ "vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
708
+ "vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
709
+ "vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
710
+ "vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
711
+ "vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
712
+ "vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00001-of-00002.safetensors",
713
+ "vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00001-of-00002.safetensors",
714
+ "vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00001-of-00002.safetensors",
715
+ "vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00001-of-00002.safetensors",
716
+ "vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias": "model-00001-of-00002.safetensors",
717
+ "vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight": "model-00001-of-00002.safetensors",
718
+ "vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias": "model-00001-of-00002.safetensors",
719
+ "vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight": "model-00001-of-00002.safetensors",
720
+ "vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
721
+ "vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
722
+ "vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
723
+ "vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
724
+ "vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
725
+ "vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
726
+ "vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
727
+ "vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
728
+ "vision_tower.vision_model.encoder.layers.24.layer_norm1.bias": "model-00001-of-00002.safetensors",
729
+ "vision_tower.vision_model.encoder.layers.24.layer_norm1.weight": "model-00001-of-00002.safetensors",
730
+ "vision_tower.vision_model.encoder.layers.24.layer_norm2.bias": "model-00001-of-00002.safetensors",
731
+ "vision_tower.vision_model.encoder.layers.24.layer_norm2.weight": "model-00001-of-00002.safetensors",
732
+ "vision_tower.vision_model.encoder.layers.24.mlp.fc1.bias": "model-00001-of-00002.safetensors",
733
+ "vision_tower.vision_model.encoder.layers.24.mlp.fc1.weight": "model-00001-of-00002.safetensors",
734
+ "vision_tower.vision_model.encoder.layers.24.mlp.fc2.bias": "model-00001-of-00002.safetensors",
735
+ "vision_tower.vision_model.encoder.layers.24.mlp.fc2.weight": "model-00001-of-00002.safetensors",
736
+ "vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
737
+ "vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
738
+ "vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
739
+ "vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
740
+ "vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
741
+ "vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
742
+ "vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
743
+ "vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
744
+ "vision_tower.vision_model.encoder.layers.25.layer_norm1.bias": "model-00001-of-00002.safetensors",
745
+ "vision_tower.vision_model.encoder.layers.25.layer_norm1.weight": "model-00001-of-00002.safetensors",
746
+ "vision_tower.vision_model.encoder.layers.25.layer_norm2.bias": "model-00001-of-00002.safetensors",
747
+ "vision_tower.vision_model.encoder.layers.25.layer_norm2.weight": "model-00001-of-00002.safetensors",
748
+ "vision_tower.vision_model.encoder.layers.25.mlp.fc1.bias": "model-00001-of-00002.safetensors",
749
+ "vision_tower.vision_model.encoder.layers.25.mlp.fc1.weight": "model-00001-of-00002.safetensors",
750
+ "vision_tower.vision_model.encoder.layers.25.mlp.fc2.bias": "model-00001-of-00002.safetensors",
751
+ "vision_tower.vision_model.encoder.layers.25.mlp.fc2.weight": "model-00001-of-00002.safetensors",
752
+ "vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
753
+ "vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
754
+ "vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
755
+ "vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
756
+ "vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
757
+ "vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
758
+ "vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
759
+ "vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
760
+ "vision_tower.vision_model.encoder.layers.26.layer_norm1.bias": "model-00001-of-00002.safetensors",
761
+ "vision_tower.vision_model.encoder.layers.26.layer_norm1.weight": "model-00001-of-00002.safetensors",
762
+ "vision_tower.vision_model.encoder.layers.26.layer_norm2.bias": "model-00001-of-00002.safetensors",
763
+ "vision_tower.vision_model.encoder.layers.26.layer_norm2.weight": "model-00001-of-00002.safetensors",
764
+ "vision_tower.vision_model.encoder.layers.26.mlp.fc1.bias": "model-00001-of-00002.safetensors",
765
+ "vision_tower.vision_model.encoder.layers.26.mlp.fc1.weight": "model-00001-of-00002.safetensors",
766
+ "vision_tower.vision_model.encoder.layers.26.mlp.fc2.bias": "model-00001-of-00002.safetensors",
767
+ "vision_tower.vision_model.encoder.layers.26.mlp.fc2.weight": "model-00001-of-00002.safetensors",
768
+ "vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
769
+ "vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
770
+ "vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
771
+ "vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
772
+ "vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
773
+ "vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
774
+ "vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
775
+ "vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
776
+ "vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00001-of-00002.safetensors",
777
+ "vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00001-of-00002.safetensors",
778
+ "vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00001-of-00002.safetensors",
779
+ "vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00001-of-00002.safetensors",
780
+ "vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias": "model-00001-of-00002.safetensors",
781
+ "vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight": "model-00001-of-00002.safetensors",
782
+ "vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias": "model-00001-of-00002.safetensors",
783
+ "vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight": "model-00001-of-00002.safetensors",
784
+ "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
785
+ "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
786
+ "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
787
+ "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
788
+ "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
789
+ "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
790
+ "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
791
+ "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
792
+ "vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00001-of-00002.safetensors",
793
+ "vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00001-of-00002.safetensors",
794
+ "vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00001-of-00002.safetensors",
795
+ "vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00001-of-00002.safetensors",
796
+ "vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias": "model-00001-of-00002.safetensors",
797
+ "vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight": "model-00001-of-00002.safetensors",
798
+ "vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias": "model-00001-of-00002.safetensors",
799
+ "vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight": "model-00001-of-00002.safetensors",
800
+ "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
801
+ "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
802
+ "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
803
+ "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
804
+ "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
805
+ "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
806
+ "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
807
+ "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
808
+ "vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00001-of-00002.safetensors",
809
+ "vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00001-of-00002.safetensors",
810
+ "vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00001-of-00002.safetensors",
811
+ "vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00001-of-00002.safetensors",
812
+ "vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias": "model-00001-of-00002.safetensors",
813
+ "vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight": "model-00001-of-00002.safetensors",
814
+ "vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias": "model-00001-of-00002.safetensors",
815
+ "vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight": "model-00001-of-00002.safetensors",
816
+ "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
817
+ "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
818
+ "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
819
+ "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
820
+ "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
821
+ "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
822
+ "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
823
+ "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
824
+ "vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00001-of-00002.safetensors",
825
+ "vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00001-of-00002.safetensors",
826
+ "vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00001-of-00002.safetensors",
827
+ "vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00001-of-00002.safetensors",
828
+ "vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias": "model-00001-of-00002.safetensors",
829
+ "vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight": "model-00001-of-00002.safetensors",
830
+ "vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias": "model-00001-of-00002.safetensors",
831
+ "vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight": "model-00001-of-00002.safetensors",
832
+ "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
833
+ "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
834
+ "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
835
+ "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
836
+ "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
837
+ "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
838
+ "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
839
+ "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
840
+ "vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00001-of-00002.safetensors",
841
+ "vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00001-of-00002.safetensors",
842
+ "vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00001-of-00002.safetensors",
843
+ "vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00001-of-00002.safetensors",
844
+ "vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias": "model-00001-of-00002.safetensors",
845
+ "vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight": "model-00001-of-00002.safetensors",
846
+ "vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias": "model-00001-of-00002.safetensors",
847
+ "vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight": "model-00001-of-00002.safetensors",
848
+ "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
849
+ "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
850
+ "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
851
+ "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
852
+ "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
853
+ "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
854
+ "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
855
+ "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
856
+ "vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00001-of-00002.safetensors",
857
+ "vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00001-of-00002.safetensors",
858
+ "vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00001-of-00002.safetensors",
859
+ "vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00001-of-00002.safetensors",
860
+ "vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias": "model-00001-of-00002.safetensors",
861
+ "vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight": "model-00001-of-00002.safetensors",
862
+ "vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias": "model-00001-of-00002.safetensors",
863
+ "vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight": "model-00001-of-00002.safetensors",
864
+ "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
865
+ "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
866
+ "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
867
+ "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
868
+ "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
869
+ "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
870
+ "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
871
+ "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
872
+ "vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00001-of-00002.safetensors",
873
+ "vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00001-of-00002.safetensors",
874
+ "vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00001-of-00002.safetensors",
875
+ "vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00001-of-00002.safetensors",
876
+ "vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias": "model-00001-of-00002.safetensors",
877
+ "vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight": "model-00001-of-00002.safetensors",
878
+ "vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias": "model-00001-of-00002.safetensors",
879
+ "vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight": "model-00001-of-00002.safetensors",
880
+ "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
881
+ "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
882
+ "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
883
+ "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
884
+ "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
885
+ "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
886
+ "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
887
+ "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
888
+ "vision_tower.vision_model.post_layernorm.bias": "model-00001-of-00002.safetensors",
889
+ "vision_tower.vision_model.post_layernorm.weight": "model-00001-of-00002.safetensors"
890
+ }
891
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_pan_and_scan": null,
5
+ "do_rescale": true,
6
+ "do_resize": true,
7
+ "image_mean": [
8
+ 0.5,
9
+ 0.5,
10
+ 0.5
11
+ ],
12
+ "image_processor_type": "Gemma3ImageProcessor",
13
+ "image_seq_length": 256,
14
+ "image_std": [
15
+ 0.5,
16
+ 0.5,
17
+ 0.5
18
+ ],
19
+ "pan_and_scan_max_num_crops": null,
20
+ "pan_and_scan_min_crop_size": null,
21
+ "pan_and_scan_min_ratio_to_activate": null,
22
+ "processor_class": "Gemma3Processor",
23
+ "resample": 2,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "height": 896,
27
+ "width": 896
28
+ }
29
+ }
processor_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "image_seq_length": 256,
3
+ "processor_class": "Gemma3Processor"
4
+ }
runs/Nov24_00-10-02_jzxh298/events.out.tfevents.1763939502.jzxh298.1912876.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:133e4f210283a16b10e10502f85c480f116b833d4e37704964defe4479573088
3
+ size 41816
slurm.out ADDED
@@ -0,0 +1,382 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/711 [00:00<?, ?it/s]
1
  0%| | 1/711 [03:10<37:32:11, 190.33s/it]
2
  0%| | 2/711 [03:14<15:52:57, 80.65s/it]
3
  0%| | 3/711 [03:16<8:49:26, 44.87s/it]
4
  1%| | 4/711 [03:18<5:30:58, 28.09s/it]
5
  1%| | 5/711 [03:21<3:41:20, 18.81s/it]
6
  1%| | 6/711 [03:23<2:35:25, 13.23s/it]
7
  1%| | 7/711 [03:25<1:53:16, 9.65s/it]
8
  1%| | 8/711 [03:28<1:25:41, 7.31s/it]
9
  1%|▏ | 9/711 [03:30<1:07:15, 5.75s/it]
10
  1%|▏ | 10/711 [03:32<54:44, 4.68s/it]
11
 
12
  1%|▏ | 10/711 [03:32<54:44, 4.68s/it]
13
  2%|▏ | 11/711 [03:35<46:17, 3.97s/it]
14
  2%|▏ | 12/711 [03:37<40:18, 3.46s/it]
15
  2%|▏ | 13/711 [03:39<36:10, 3.11s/it]
16
  2%|▏ | 14/711 [03:42<34:11, 2.94s/it]
17
  2%|▏ | 15/711 [03:44<32:06, 2.77s/it]
18
  2%|▏ | 16/711 [03:46<30:28, 2.63s/it]
19
  2%|▏ | 17/711 [03:49<29:24, 2.54s/it]
20
  3%|β–Ž
 
 
 
21
  3%|β–Ž | 19/711 [03:53<28:02, 2.43s/it]
22
  3%|β–Ž | 20/711 [03:56<27:33, 2.39s/it]
23
 
24
  3%|β–Ž | 20/711 [03:56<27:33, 2.39s/it]
25
  3%|β–Ž | 21/711 [03:58<27:12, 2.37s/it]
26
  3%|β–Ž | 22/711 [04:00<26:57, 2.35s/it]
27
  3%|β–Ž | 23/711 [04:03<26:45, 2.33s/it]
28
  3%|β–Ž | 24/711 [04:05<26:37, 2.33s/it]
29
  4%|β–Ž | 25/711 [04:07<26:33, 2.32s/it]
30
  4%|β–Ž | 26/711 [04:10<26:26, 2.32s/it]
31
  4%|▍ | 27/711 [04:12<26:21, 2.31s/it]
32
  4%|▍ | 28/711 [04:14<26:17, 2.31s/it]
33
  4%|▍ | 29/711 [04:17<26:14, 2.31s/it]
34
  4%|▍ | 30/711 [04:19<26:39, 2.35s/it]
35
 
36
  4%|▍ | 30/711 [04:19<26:39, 2.35s/it]
37
  4%|▍ | 31/711 [04:21<26:38, 2.35s/it]
38
  5%|▍ | 32/711 [04:24<26:30, 2.34s/it]
39
  5%|▍ | 33/711 [04:26<26:21, 2.33s/it]
40
  5%|▍ | 34/
 
 
 
41
  5%|▍ | 35/711 [04:31<26:08, 2.32s/it]
42
  5%|β–Œ | 36/711 [04:33<26:03, 2.32s/it]
43
  5%|β–Œ | 37/711 [04:35<26:01, 2.32s/it]
44
  5%|β–Œ | 38/711 [04:38<25:57, 2.31s/it]
45
  5%|β–Œ | 39/711 [04:40<25:55, 2.31s/it]
46
  6%|β–Œ | 40/711 [04:42<25:53, 2.31s/it]
47
 
48
  6%|β–Œ | 40/711 [04:42<25:53, 2.31s/it]
49
  6%|β–Œ | 41/711 [04:44<25:50, 2.31s/it]
50
  6%|β–Œ | 42/711 [04:47<25:47, 2.31s/it]
51
  6%|β–Œ | 43/711 [04:49<25:50, 2.32s/it]
52
  6%|β–Œ | 44/711 [04:51<25:45, 2.32s/it]
53
  6%|β–‹ | 45/711 [04:54<25:40, 2.31s/it]
54
  6%|β–‹ | 46/711 [04:56<25:51, 2.33s/it]
55
  7%|β–‹ | 47/711 [04:58<25:53, 2.34s/it]
56
  7%|β–‹ | 48/711 [05:01<25:46, 2.33s/it]
57
  7%|β–‹ | 49/711 [05:03<25:41, 2.33s/it]
58
  7%|β–‹ | 50/711 [05:05<25:40, 2.33s/it]
59
 
60
  7%|β–‹ | 50/711 [0
 
 
61
  7%|β–‹ | 51/711 [05:08<25:33, 2.32s/it]
62
  7%|β–‹ | 52/711 [05:10<25:29, 2.32s/it]
63
  7%|β–‹ | 53/711 [05:12<25:34, 2.33s/it]
64
  8%|β–Š | 54/711 [05:15<25:28, 2.33s/it]
65
  8%|β–Š | 55/711 [05:17<25:23, 2.32s/it]
66
  8%|β–Š | 56/711 [05:19<25:34, 2.34s/it]
67
  8%|β–Š | 57/711 [05:22<25:26, 2.33s/it]
68
  8%|β–Š | 58/711 [05:24<25:37, 2.35s/it]
69
  8%|β–Š | 59/711 [05:26<25:28, 2.34s/it]
70
  8%|β–Š | 60/711 [05:29<25:19, 2.33s/it]
71
 
72
  8%|β–Š | 60/711 [05:29<25:19, 2.33s/it]
73
  9%|β–Š | 61/711 [05:31<25:11, 2.33s/it]
74
  9%|β–Š | 62/711 [05:33<25:19, 2.34s/it]
75
  9%|β–‰ | 63/711 [05:36<25:30, 2.36s/it]
76
  9%|β–‰ | 64/711 [05:38<25:18, 2.35s/it]
77
  9%|β–‰ | 65/711 [05:40<25:08, 2.33s/it]
78
  9%|β–‰ | 66/711 [05:43<25:00, 2.33s/it]
79
  9%|β–‰ | 67/711 [05:45<24:55, 2.32s/it]
80
  10%|β–‰ | 68/711 [05:47<
 
 
 
81
  10%|β–‰ | 69/711 [05:50<24:46, 2.31s/it]
82
  10%|β–‰ | 70/711 [05:52<24:42, 2.31s/it]
83
 
84
  10%|β–‰ | 70/711 [05:52<24:42, 2.31s/it]
85
  10%|β–‰ | 71/711 [05:54<24:39, 2.31s/it]
86
  10%|β–ˆ | 72/711 [05:57<24:35, 2.31s/it]
87
  10%|β–ˆ | 73/711 [05:59<24:32, 2.31s/it]
88
  10%|β–ˆ | 74/711 [06:01<24:29, 2.31s/it]
89
  11%|β–ˆ | 75/711 [06:04<24:26, 2.31s/it]
90
  11%|β–ˆ | 76/711 [06:06<24:23, 2.30s/it]
91
  11%|β–ˆ | 77/711 [06:08<24:20, 2.30s/it]
92
  11%|β–ˆ | 78/711 [06:11<24:30, 2.32s/it]
93
  11%|β–ˆ | 79/711 [06:13<24:35, 2.33s/it]
94
  11%|β–ˆβ– | 80/711 [06:15<24:32, 2.33s/it]
95
 
96
  11%|β–ˆβ– | 80/711 [06:15<24:32, 2.33s/it]
97
  11%|β–ˆβ– | 81/711 [06:18<24:42, 2.35s/it]
98
  12%|β–ˆβ– | 82/711 [06:20<24:32, 2.34s/it]
99
  12%|β–ˆβ– | 83/711 [06:22<24:38, 2.35s/it]
100
  12%|β–ˆβ– | 84/711 [
 
 
101
  12%|β–ˆβ– | 85/711 [06:27<24:18, 2.33s/it]
102
  12%|β–ˆβ– | 86/711 [06:29<24:24, 2.34s/it]
103
  12%|β–ˆβ– | 87/711 [06:32<24:14, 2.33s/it]
104
  12%|β–ˆβ– | 88/711 [06:34<24:07, 2.32s/it]
105
  13%|β–ˆβ–Ž | 89/711 [06:36<24:02, 2.32s/it]
106
  13%|β–ˆβ–Ž | 90/711 [06:39<23:59, 2.32s/it]
107
 
108
  13%|β–ˆβ–Ž | 90/711 [06:39<23:59, 2.32s/it]
109
  13%|β–ˆβ–Ž | 91/711 [06:41<23:57, 2.32s/it]
110
  13%|β–ˆβ–Ž | 92/711 [06:43<23:58, 2.32s/it]
111
  13%|β–ˆβ–Ž | 93/711 [06:46<24:09, 2.35s/it]
112
  13%|β–ˆβ–Ž | 94/711 [06:48<24:28, 2.38s/it]
113
  13%|β–ˆβ–Ž | 95/711 [06:50<24:37, 2.40s/it]
114
  14%|β–ˆβ–Ž | 96/711 [06:53<24:19, 2.37s/it]
115
  14%|β–ˆβ–Ž | 97/711 [06:55<24:06, 2.36s/it]
116
  14%|β–ˆβ– | 98/711 [06:58<24:10, 2.37s/it]
117
  14%|β–ˆβ– | 99/711 [07:00<23:57, 2.35s/it]
118
  14%|β–ˆβ– | 100/711 [07:02<23:47, 2.34s/it]
119
 
 
 
 
120
  14%|β–ˆβ– | 100/711 [07:02<23:47, 2.34s/it]
121
  14%|β–ˆβ– | 101/711 [07:04<23:40, 2.33s/it]
122
  14%|β–ˆβ– | 102/711 [07:07<23:35, 2.32s/it]
123
  14%|β–ˆβ– | 103/711 [07:09<23:30, 2.32s/it]
124
  15%|β–ˆβ– | 104/711 [07:11<23:26, 2.32s/it]
125
  15%|β–ˆβ– | 105/711 [07:14<23:23, 2.32s/it]
126
  15%|β–ˆβ– | 106/711 [07:16<23:21, 2.32s/it]
127
  15%|β–ˆβ–Œ | 107/711 [07:18<23:18, 2.32s/it]
128
  15%|β–ˆβ–Œ | 108/711 [07:21<23:13, 2.31s/it]
129
  15%|β–ˆβ–Œ | 109/711 [07:23<23:10, 2.31s/it]
130
  15%|β–ˆβ–Œ | 110/711 [07:25<23:20, 2.33s/it]
131
 
132
  15%|β–ˆβ–Œ | 110/711 [07:25<23:20, 2.33s/it]
133
  16%|β–ˆβ–Œ | 111/711 [07:28<23:30, 2.35s/it]
134
  16%|β–ˆβ–Œ | 112/711 [07:30<23:23, 2.34s/it]
135
  16%|β–ˆβ–Œ | 113/711 [07:32<23:27, 2.35s/it]
136
  16%|β–ˆβ–Œ | 114/711 [07:35<23:16, 2.34s/it]
137
  16%|β–ˆβ–Œ | 115/711 [07:37<23:07, 2.33s/it]
138
  16%|β–ˆβ–‹ | 116/711 [07:39<23:01, 2.32s/it
 
 
 
139
  16%|β–ˆβ–‹ | 117/711 [07:42<22:57, 2.32s/it]
140
  17%|β–ˆβ–‹ | 118/711 [07:44<22:52, 2.31s/it]
141
  17%|β–ˆβ–‹ | 119/711 [07:46<22:48, 2.31s/it]
142
  17%|β–ˆβ–‹ | 120/711 [07:49<22:46, 2.31s/it]
143
 
144
  17%|β–ˆβ–‹ | 120/711 [07:49<22:46, 2.31s/it]
145
  17%|β–ˆβ–‹ | 121/711 [07:51<22:45, 2.31s/it]
146
  17%|β–ˆβ–‹ | 122/711 [07:53<22:45, 2.32s/it]
147
  17%|β–ˆβ–‹ | 123/711 [07:56<22:42, 2.32s/it]
148
  17%|β–ˆβ–‹ | 124/711 [07:58<22:42, 2.32s/it]
149
  18%|β–ˆβ–Š | 125/711 [08:00<22:52, 2.34s/it]
150
  18%|β–ˆβ–Š | 126/711 [08:03<23:14, 2.38s/it]
151
  18%|β–ˆβ–Š | 127/711 [08:05<23:21, 2.40s/it]
152
  18%|β–ˆβ–Š | 128/711 [08:07<23:04, 2.38s/it]
153
  18%|β–ˆβ–Š | 129/711 [08:10<22:51, 2.36s/it]
154
  18%|β–ˆβ–Š | 130/711 [08:12<22:42, 2.35s/it]
155
 
156
  18%|β–ˆβ–Š | 130/711 [08:12<22:42, 2.35s/it]
157
  18%|β–ˆβ–Š | 131/711 [08:14<22:36, 2.34s/it]
158
  1
 
 
159
  19%|β–ˆβ–Š | 133/711 [08:19<22:31, 2.34s/it]
160
  19%|β–ˆβ–‰ | 134/711 [08:21<22:25, 2.33s/it]
161
  19%|β–ˆβ–‰ | 135/711 [08:24<22:19, 2.33s/it]
162
  19%|β–ˆβ–‰ | 136/711 [08:26<22:15, 2.32s/it]
163
  19%|β–ˆβ–‰ | 137/711 [08:28<22:10, 2.32s/it]
164
  19%|β–ˆβ–‰ | 138/711 [08:31<22:05, 2.31s/it]
165
  20%|β–ˆβ–‰ | 139/711 [08:33<22:00, 2.31s/it]
166
  20%|β–ˆβ–‰ | 140/711 [08:35<21:57, 2.31s/it]
167
 
168
  20%|β–ˆβ–‰ | 140/711 [08:35<21:57, 2.31s/it]
169
  20%|β–ˆβ–‰ | 141/711 [08:38<21:56, 2.31s/it]
170
  20%|β–ˆβ–‰ | 142/711 [08:40<22:05, 2.33s/it]
171
  20%|β–ˆβ–ˆ | 143/711 [08:42<22:07, 2.34s/it]
172
  20%|β–ˆβ–ˆ | 144/711 [08:45<22:03, 2.34s/it]
173
  20%|β–ˆβ–ˆ | 145/711 [08:47<21:58, 2.33s/it]
174
  21%|β–ˆβ–ˆ | 146/711 [08:49<21:53, 2.32s/it]
175
  21%|β–ˆβ–ˆ | 147/711 [08:52<21:49, 2.32s/it]
176
  21%|β–ˆβ–ˆ | 148/711 [08:54<21:45, 2.32s/it]
177
  21
 
 
 
178
  21%|β–ˆβ–ˆ | 150/711 [08:58<21:38, 2.32s/it]
179
 
180
  21%|β–ˆβ–ˆ | 150/711 [08:59<21:38, 2.32s/it]
181
  21%|β–ˆβ–ˆ | 151/711 [09:01<21:35, 2.31s/it]
182
  21%|β–ˆβ–ˆβ– | 152/711 [09:03<21:40, 2.33s/it]
183
  22%|β–ˆβ–ˆβ– | 153/711 [09:05<21:36, 2.32s/it]
184
  22%|β–ˆβ–ˆβ– | 154/711 [09:08<21:31, 2.32s/it]
185
  22%|β–ˆβ–ˆβ– | 155/711 [09:10<21:26, 2.31s/it]
186
  22%|β–ˆβ–ˆβ– | 156/711 [09:12<21:22, 2.31s/it]
187
  22%|β–ˆβ–ˆβ– | 157/711 [09:15<21:20, 2.31s/it]
188
  22%|β–ˆβ–ˆβ– | 158/711 [09:17<21:20, 2.31s/it]
189
  22%|β–ˆβ–ˆβ– | 159/711 [09:19<21:41, 2.36s/it]
190
  23%|β–ˆβ–ˆβ–Ž | 160/711 [09:22<21:36, 2.35s/it]
191
 
192
  23%|β–ˆβ–ˆβ–Ž | 160/711 [09:22<21:36, 2.35s/it]
193
  23%|β–ˆβ–ˆβ–Ž | 161/711 [09:24<21:28, 2.34s/it]
194
  23%|β–ˆβ–ˆβ–Ž | 162/711 [09:26<21:23, 2.34s/it]
195
  23%|β–ˆβ–ˆβ–Ž | 163/711 [09:29
 
 
196
  23%|β–ˆβ–ˆβ–Ž | 164/711 [09:31<21:12, 2.33s/it]
197
  23%|β–ˆβ–ˆβ–Ž | 165/711 [09:33<21:08, 2.32s/it]
198
  23%|β–ˆβ–ˆβ–Ž | 166/711 [09:36<21:17, 2.34s/it]
199
  23%|β–ˆβ–ˆβ–Ž | 167/711 [09:38<21:09, 2.33s/it]
200
  24%|β–ˆβ–ˆβ–Ž | 168/711 [09:40<21:04, 2.33s/it]
201
  24%|β–ˆβ–ˆβ– | 169/711 [09:43<20:59, 2.32s/it]
202
  24%|β–ˆβ–ˆβ– | 170/711 [09:45<20:54, 2.32s/it]
203
 
204
  24%|β–ˆβ–ˆβ– | 170/711 [09:45<20:54, 2.32s/it]
205
  24%|β–ˆβ–ˆβ– | 171/711 [09:47<20:52, 2.32s/it]
206
  24%|β–ˆβ–ˆβ– | 172/711 [09:50<20:49, 2.32s/it]
207
  24%|β–ˆβ–ˆβ– | 173/711 [09:52<20:46, 2.32s/it]
208
  24%|β–ˆβ–ˆβ– | 174/711 [09:54<20:44, 2.32s/it]
209
  25%|β–ˆβ–ˆβ– | 175/711 [09:57<20:48, 2.33s/it]
210
  25%|β–ˆβ–ˆβ– | 176/711 [09:59<20:53, 2.34s/it]
211
  25%|β–ˆβ–ˆβ– | 177/711 [10:01<20:46, 2.33s/it]
212
  25%|β–ˆβ–ˆβ–Œ | 178/711 [10:04<20:53, 2.35s/it]
213
  25%|β–ˆβ–ˆβ–Œ | 179/711 [10:06<20:45, 2.34s/it]
214
  2
 
 
 
215
 
216
  25%|β–ˆβ–ˆβ–Œ | 180/711 [10:08<20:39, 2.33s/it]
217
  25%|β–ˆβ–ˆβ–Œ | 181/711 [10:11<20:33, 2.33s/it]
218
  26%|β–ˆβ–ˆβ–Œ | 182/711 [10:13<20:27, 2.32s/it]
219
  26%|β–ˆβ–ˆβ–Œ | 183/711 [10:15<20:23, 2.32s/it]
220
  26%|β–ˆβ–ˆβ–Œ | 184/711 [10:18<20:19, 2.31s/it]
221
  26%|β–ˆβ–ˆβ–Œ | 185/711 [10:20<20:15, 2.31s/it]
222
  26%|β–ˆβ–ˆβ–Œ | 186/711 [10:22<20:13, 2.31s/it]
223
  26%|β–ˆβ–ˆβ–‹ | 187/711 [10:25<20:09, 2.31s/it]
224
  26%|β–ˆβ–ˆβ–‹ | 188/711 [10:27<20:20, 2.33s/it]
225
  27%|β–ˆβ–ˆβ–‹ | 189/711 [10:29<20:14, 2.33s/it]
226
  27%|β–ˆβ–ˆβ–‹ | 190/711 [10:32<20:44, 2.39s/it]
227
 
228
  27%|β–ˆβ–ˆβ–‹ | 190/711 [10:32<20:44, 2.39s/it]
229
  27%|β–ˆβ–ˆβ–‹ | 191/711 [10:34<20:38, 2.38s/it]
230
  27%|β–ˆβ–ˆβ–‹ | 192/711 [10:37<20:32, 2.37s/it]
231
  27%|β–ˆβ–ˆβ–‹ | 193/711 [10:39<20:19, 2.35s/it]
232
  27%|β–ˆβ–ˆβ–‹ | 194/7
 
 
233
  27%|β–ˆβ–ˆβ–‹ | 195/711 [10:43<20:06, 2.34s/it]
234
  28%|β–ˆβ–ˆβ–Š | 196/711 [10:46<19:58, 2.33s/it]
235
  28%|β–ˆβ–ˆβ–Š | 197/711 [10:48<20:00, 2.34s/it]
236
  28%|β–ˆβ–ˆβ–Š | 198/711 [10:51<20:08, 2.36s/it]
237
  28%|β–ˆβ–ˆβ–Š | 199/711 [10:53<19:59, 2.34s/it]
238
  28%|β–ˆβ–ˆβ–Š | 200/711 [10:55<19:52, 2.33s/it]
239
 
240
  28%|β–ˆβ–ˆβ–Š | 200/711 [10:55<19:52, 2.33s/it]
241
  28%|β–ˆβ–ˆβ–Š | 201/711 [10:57<19:47, 2.33s/it]
242
  28%|β–ˆβ–ˆβ–Š | 202/711 [11:00<19:40, 2.32s/it]
243
  29%|β–ˆβ–ˆβ–Š | 203/711 [11:02<19:37, 2.32s/it]
244
  29%|β–ˆβ–ˆβ–Š | 204/711 [11:04<19:45, 2.34s/it]
245
  29%|β–ˆβ–ˆβ–‰ | 205/711 [11:07<19:39, 2.33s/it]
246
  29%|β–ˆβ–ˆβ–‰ | 206/711 [11:09<19:37, 2.33s/it]
247
  29%|β–ˆβ–ˆβ–‰ | 207/711 [11:12<20:12, 2.41s/it]
248
  29%|β–ˆβ–ˆβ–‰ | 208/711 [11:14<20:01, 2.39s/it]
249
  29%|β–ˆβ–ˆβ–‰ | 209/711 [11:16<19:46, 2.36s/it]
250
  30%|β–ˆβ–ˆβ–‰ | 210/711 [11:19<19:37, 2.3
 
 
 
251
 
252
  30%|β–ˆβ–ˆβ–‰ | 210/711 [11:19<19:37, 2.35s/it]
253
  30%|β–ˆβ–ˆβ–‰ | 211/711 [11:21<19:30, 2.34s/it]
254
  30%|β–ˆβ–ˆβ–‰ | 212/711 [11:23<19:23, 2.33s/it]
255
  30%|β–ˆβ–ˆβ–‰ | 213/711 [11:26<19:16, 2.32s/it]
256
  30%|β–ˆβ–ˆβ–ˆ | 214/711 [11:28<19:12, 2.32s/it]
257
  30%|β–ˆβ–ˆβ–ˆ | 215/711 [11:30<19:09, 2.32s/it]
258
  30%|β–ˆβ–ˆβ–ˆ | 216/711 [11:33<19:06, 2.32s/it]
259
  31%|β–ˆβ–ˆβ–ˆ | 217/711 [11:35<19:03, 2.31s/it]
260
  31%|β–ˆβ–ˆβ–ˆ | 218/711 [11:37<19:03, 2.32s/it]
261
  31%|β–ˆβ–ˆβ–ˆ | 219/711 [11:39<19:01, 2.32s/it]
262
  31%|β–ˆβ–ˆβ–ˆ | 220/711 [11:42<18:59, 2.32s/it]
263
 
264
  31%|β–ˆβ–ˆβ–ˆ | 220/711 [11:42<18:59, 2.32s/it]
265
  31%|β–ˆβ–ˆβ–ˆ | 221/711 [11:44<18:58, 2.32s/it]
266
  31%|β–ˆβ–ˆβ–ˆ | 222/711 [11:47<19:09, 2.35s/it]
267
  31%|β–ˆβ–ˆβ–ˆβ– | 223/711 [11:49<19:11, 2.36s/it]
268
  32%|β–ˆβ–ˆβ–ˆβ– | 224/711 [11:51<19:07, 2.36s/it]
269
  32%|β–ˆβ–ˆβ–ˆοΏ½
 
 
270
  32%|β–ˆβ–ˆβ–ˆβ– | 226/711 [11:56<19:01, 2.35s/it]
271
  32%|β–ˆβ–ˆβ–ˆβ– | 227/711 [11:58<18:52, 2.34s/it]
272
  32%|β–ˆβ–ˆβ–ˆβ– | 228/711 [12:01<18:45, 2.33s/it]
273
  32%|β–ˆβ–ˆβ–ˆβ– | 229/711 [12:03<18:40, 2.33s/it]
274
  32%|β–ˆβ–ˆβ–ˆβ– | 230/711 [12:05<18:35, 2.32s/it]
275
 
276
  32%|β–ˆβ–ˆβ–ˆβ– | 230/711 [12:05<18:35, 2.32s/it]
277
  32%|β–ˆβ–ˆβ–ˆβ– | 231/711 [12:08<18:32, 2.32s/it]
278
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 232/711 [12:10<18:30, 2.32s/it]
279
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 233/711 [12:12<18:28, 2.32s/it]
280
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 234/711 [12:14<18:24, 2.32s/it]
281
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 235/711 [12:17<18:21, 2.31s/it]
282
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 236/711 [12:19<18:19, 2.31s/it]
283
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 237/711 [12:21<18:15, 2.31s/it]
284
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 238/711 [12:24<18:13, 2.31s/it]
285
  34%|β–ˆβ–ˆβ–ˆβ–Ž | 239/711 [12:26<18:18, 2.33s/it]
286
  34%|β–ˆβ–ˆβ–ˆβ– | 240/711 [12:28<18:20, 2.34s/it]
287
 
 
 
 
288
  34%|β–ˆβ–ˆβ–ˆβ– | 240/711 [12:28<18:20, 2.34s/it]
289
  34%|β–ˆβ–ˆβ–ˆβ– | 241/711 [12:31<18:15, 2.33s/it]
290
  34%|β–ˆβ–ˆβ–ˆβ– | 242/711 [12:33<18:09, 2.32s/it]
291
  34%|β–ˆβ–ˆβ–ˆβ– | 243/711 [12:35<18:11, 2.33s/it]
292
  34%|β–ˆβ–ˆβ–ˆβ– | 244/711 [12:38<18:05, 2.32s/it]
293
  34%|β–ˆβ–ˆβ–ˆβ– | 245/711 [12:40<18:00, 2.32s/it]
294
  35%|β–ˆβ–ˆβ–ˆβ– | 246/711 [12:42<17:56, 2.32s/it]
295
  35%|β–ˆβ–ˆβ–ˆβ– | 247/711 [12:45<17:53, 2.31s/it]
296
  35%|β–ˆβ–ˆβ–ˆβ– | 248/711 [12:47<17:51, 2.31s/it]
297
  35%|β–ˆβ–ˆβ–ˆβ–Œ | 249/711 [12:49<17:49, 2.31s/it]
298
  35%|β–ˆβ–ˆβ–ˆβ–Œ | 250/711 [12:52<17:46, 2.31s/it]
299
 
300
  35%|β–ˆβ–ˆβ–ˆβ–Œ | 250/711 [12:52<17:46, 2.31s/it]
301
  35%|β–ˆβ–ˆβ–ˆβ–Œ | 251/711 [12:54<17:44, 2.31s/it]
302
  35%|β–ˆβ–ˆβ–ˆβ–Œ | 252/711 [12:56<17:42, 2.32s/it]
303
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 253/711 [12:59<17:41, 2.32s/it]
304
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 254/711 [13:01<17:38, 2.32s/
 
 
305
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 255/711 [13:03<17:45, 2.34s/it]
306
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 256/711 [13:06<17:52, 2.36s/it]
307
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 257/711 [13:08<17:45, 2.35s/it]
308
  36%|β–ˆβ–ˆβ–ˆβ–‹ | 258/711 [13:10<17:40, 2.34s/it]
309
  36%|β–ˆβ–ˆβ–ˆβ–‹ | 259/711 [13:13<17:34, 2.33s/it]
310
  37%|β–ˆβ–ˆβ–ˆβ–‹ | 260/711 [13:15<17:29, 2.33s/it]
311
 
312
  37%|β–ˆβ–ˆβ–ˆβ–‹ | 260/711 [13:15<17:29, 2.33s/it]
313
  37%|β–ˆβ–ˆβ–ˆβ–‹ | 261/711 [13:17<17:25, 2.32s/it]
314
  37%|β–ˆβ–ˆβ–ˆβ–‹ | 262/711 [13:20<17:21, 2.32s/it]
315
  37%|β–ˆβ–ˆβ–ˆβ–‹ | 263/711 [13:22<17:17, 2.32s/it]
316
  37%|β–ˆβ–ˆβ–ˆβ–‹ | 264/711 [13:24<17:15, 2.32s/it]
317
  37%|β–ˆβ–ˆβ–ˆβ–‹ | 265/711 [13:26<17:13, 2.32s/it]
318
  37%|β–ˆβ–ˆβ–ˆβ–‹ | 266/711 [13:29<17:10, 2.32s/it]
319
  38%|β–ˆβ–ˆβ–ˆβ–Š | 267/711 [13:31<17:07, 2.31s/it]
320
  38%|β–ˆβ–ˆβ–ˆβ–Š | 268/711 [13:34<17:16, 2.34s/it]
321
  38%|β–ˆβ–ˆβ–ˆβ–Š | 269/711 [13:36<17:13, 2.34s/it]
322
  38%|β–ˆβ–ˆβ–ˆβ–Š | 270/711 [13:38<1
 
 
 
323
 
324
  38%|β–ˆβ–ˆβ–ˆβ–Š | 270/711 [13:38<17:08, 2.33s/it]
325
  38%|β–ˆβ–ˆβ–ˆβ–Š | 271/711 [13:41<17:12, 2.35s/it]
326
  38%|β–ˆβ–ˆβ–ˆβ–Š | 272/711 [13:43<17:13, 2.35s/it]
327
  38%|β–ˆβ–ˆβ–ˆβ–Š | 273/711 [13:45<17:07, 2.35s/it]
328
  39%|β–ˆβ–ˆβ–ˆβ–Š | 274/711 [13:48<17:10, 2.36s/it]
329
  39%|β–ˆβ–ˆβ–ˆβ–Š | 275/711 [13:50<17:06, 2.35s/it]
330
  39%|β–ˆβ–ˆβ–ˆβ–‰ | 276/711 [13:52<16:58, 2.34s/it]
331
  39%|β–ˆβ–ˆβ–ˆβ–‰ | 277/711 [13:55<16:53, 2.34s/it]
332
  39%|β–ˆβ–ˆβ–ˆβ–‰ | 278/711 [13:57<16:49, 2.33s/it]
333
  39%|β–ˆβ–ˆβ–ˆβ–‰ | 279/711 [13:59<16:44, 2.33s/it]
334
  39%|β–ˆβ–ˆβ–ˆβ–‰ | 280/711 [14:02<16:40, 2.32s/it]
335
 
336
  39%|β–ˆβ–ˆβ–ˆβ–‰ | 280/711 [14:02<16:40, 2.32s/it]
337
  40%|β–ˆβ–ˆβ–ˆβ–‰ | 281/711 [14:04<16:36, 2.32s/it]
338
  40%|β–ˆβ–ˆβ–ˆβ–‰ | 282/711 [14:06<16:34, 2.32s/it]
339
  40%|β–ˆβ–ˆβ–ˆβ–‰ | 283/711 [14:09<16:34, 2.32s/it]
340
  40%|β–ˆβ–ˆβ–ˆβ–‰ | 284/711 [1
 
 
341
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 285/711 [14:13<16:36, 2.34s/it]
342
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 286/711 [14:16<16:31, 2.33s/it]
343
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 287/711 [14:18<16:43, 2.37s/it]
344
  41%|β–ˆβ–ˆβ–ˆβ–ˆ | 288/711 [14:20<16:41, 2.37s/it]
345
  41%|β–ˆβ–ˆβ–ˆβ–ˆ | 289/711 [14:23<16:31, 2.35s/it]
346
  41%|β–ˆβ–ˆβ–ˆβ–ˆ | 290/711 [14:25<16:26, 2.34s/it]
347
 
348
  41%|β–ˆβ–ˆβ–ˆβ–ˆ | 290/711 [14:25<16:26, 2.34s/it]
349
  41%|β–ˆβ–ˆβ–ˆβ–ˆ | 291/711 [14:27<16:20, 2.33s/it]
350
  41%|β–ˆβ–ˆβ–ˆβ–ˆ | 292/711 [14:30<16:25, 2.35s/it]
351
  41%|β–ˆβ–ˆβ–ˆβ–ˆ | 293/711 [14:32<16:24, 2.35s/it]
352
  41%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 294/711 [14:34<16:18, 2.35s/it]
353
  41%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 295/711 [14:37<16:13, 2.34s/it]
354
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 296/711 [14:39<16:08, 2.33s/it]
355
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 297/711 [14:41<16:13, 2.35s/it]
356
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 298/711 [14:44<16:07, 2.34s/it]
357
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 299/711 [14:46<16:03, 2.34s/it]
358
  42%|β–ˆοΏ½
 
 
 
359
 
360
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 300/711 [14:48<15:59, 2.33s/it]
361
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 301/711 [14:51<15:55, 2.33s/it]
362
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 302/711 [14:53<15:50, 2.32s/it]
363
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 303/711 [14:55<15:54, 2.34s/it]
364
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 304/711 [14:58<15:55, 2.35s/it]
365
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 305/711 [15:00<15:51, 2.34s/it]
366
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 306/711 [15:02<15:46, 2.34s/it]
367
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 307/711 [15:05<15:42, 2.33s/it]
368
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 308/711 [15:07<15:37, 2.33s/it]
369
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 309/711 [15:10<15:47, 2.36s/it]
370
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 310/711 [15:12<15:52, 2.37s/it]
371
 
372
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 310/711 [15:12<15:52, 2.37s/it]
373
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 311/711 [15:14<15:42, 2.36s/it]
374
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 312/711 [15:17<15:49, 2.38s/it]
375
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 313/7
 
 
376
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 314/711 [15:22<15:55, 2.41s/it]
377
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 315/711 [15:24<15:44, 2.38s/it]
378
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 316/711 [15:26<15:33, 2.36s/it]
379
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 317/711 [15:29<15:25, 2.35s/it]
380
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 318/711 [15:31<15:25, 2.35s/it]
381
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 319/711 [15:33<15:25, 2.36s/it]
382
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 320/711 [15:36<15:22, 2.36s/it]
383
 
384
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 320/711 [15:36<15:22, 2.36s/it]
385
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 321/711 [15:38<15:45, 2.43s/it]
386
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 322/711 [15:41<15:30, 2.39s/it]
387
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 323/711 [15:43<16:06, 2.49s/it]
388
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 324/711 [15:46<16:30, 2.56s/it]
389
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 325/711 [15:48<15:59, 2.49s/it]
390
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 326/711 [15:51<16:07, 2.51s/it]
391
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 327/711 [15:53<15:44, 2.46s/it]
392
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 328/711 [15:55<15:
 
 
 
393
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 329/711 [15:58<15:13, 2.39s/it]
394
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 330/711 [16:00<15:07, 2.38s/it]
395
 
396
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 330/711 [16:00<15:07, 2.38s/it]
397
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 331/711 [16:03<14:57, 2.36s/it]
398
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 332/711 [16:05<14:49, 2.35s/it]
399
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 333/711 [16:07<15:21, 2.44s/it]
400
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 334/711 [16:10<15:12, 2.42s/it]
401
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 335/711 [16:12<15:12, 2.43s/it]
402
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 336/711 [16:15<14:57, 2.39s/it]
403
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 337/711 [16:17<14:46, 2.37s/it]
404
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 338/711 [16:19<14:39, 2.36s/it]
405
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 339/711 [16:22<14:32, 2.35s/it]
406
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 340/711 [16:24<14:27, 2.34s/it]
407
 
408
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 340/711 [16:24<14:27, 2.34s/it]
409
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 341/711 [16:26<14:23, 2.33s/it]
410
  48%|οΏ½
 
 
411
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 343/711 [16:31<14:15, 2.33s/it]
412
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 344/711 [16:33<14:14, 2.33s/it]
413
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 345/711 [16:35<14:08, 2.32s/it]
414
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 346/711 [16:38<14:38, 2.41s/it]
415
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 347/711 [16:40<14:25, 2.38s/it]
416
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 348/711 [16:43<14:14, 2.35s/it]
417
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 349/711 [16:45<14:06, 2.34s/it]
418
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 350/711 [16:47<14:06, 2.35s/it]
419
 
420
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 350/711 [16:47<14:06, 2.35s/it]
421
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 351/711 [16:50<14:07, 2.36s/it]
422
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 352/711 [16:52<14:00, 2.34s/it]
423
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 353/711 [16:54<13:55, 2.33s/it]
424
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 354/711 [16:57<13:50, 2.33s/it]
425
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 355/711 [16:59<13:47, 2.32s/it]
426
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 356/711 [17:01<13:44, 2.32s/it]
427
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ
 
 
 
428
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 358/711 [17:06<13:38, 2.32s/it]
429
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 359/711 [17:08<13:55, 2.37s/it]
430
  51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 360/711 [17:11<13:46, 2.35s/it]
431
 
432
  51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 360/711 [17:11<13:46, 2.35s/it]
433
  51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 361/711 [17:13<13:40, 2.34s/it]
434
  51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 362/711 [17:15<13:35, 2.34s/it]
435
  51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 363/711 [17:18<13:30, 2.33s/it]
436
  51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 364/711 [17:20<13:26, 2.32s/it]
437
  51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 365/711 [17:22<13:27, 2.33s/it]
438
  51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 366/711 [17:25<13:28, 2.34s/it]
439
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 367/711 [17:27<13:29, 2.35s/it]
440
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 368/711 [17:29<13:23, 2.34s/it]
441
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 369/711 [17:32<13:19, 2.34s/it]
442
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 370/711 [17:34<13:14, 2.33s/it]
443
 
444
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 3
 
 
445
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 371/711 [17:36<13:11, 2.33s/it]
446
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 372/711 [17:39<13:08, 2.33s/it]
447
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 373/711 [17:41<13:07, 2.33s/it]
448
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 374/711 [17:43<13:04, 2.33s/it]
449
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 375/711 [17:46<13:01, 2.33s/it]
450
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 376/711 [17:48<12:57, 2.32s/it]
451
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 377/711 [17:50<12:54, 2.32s/it]
452
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 378/711 [17:53<12:53, 2.32s/it]
453
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 379/711 [17:55<12:57, 2.34s/it]
454
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 380/711 [17:57<12:52, 2.33s/it]
455
 
456
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 380/711 [17:57<12:52, 2.33s/it]
457
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 381/711 [18:00<12:57, 2.35s/it]
458
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 382/711 [18:02<12:56, 2.36s/it]
459
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 383/711 [18:05<12:55, 2.36s/it]
460
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 384/711 [18:07<12:48, 2.35s/it]
461
  54%|β–ˆβ–ˆ
 
 
462
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 386/711 [18:11<12:38, 2.33s/it]
463
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 387/711 [18:14<12:33, 2.33s/it]
464
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 388/711 [18:16<12:30, 2.32s/it]
465
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 389/711 [18:18<12:28, 2.32s/it]
466
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 390/711 [18:21<12:28, 2.33s/it]
467
 
468
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 390/711 [18:21<12:28, 2.33s/it]
469
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 391/711 [18:23<12:24, 2.33s/it]
470
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 392/711 [18:25<12:24, 2.34s/it]
471
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 393/711 [18:28<12:20, 2.33s/it]
472
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 394/711 [18:30<12:16, 2.32s/it]
473
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 395/711 [18:32<12:22, 2.35s/it]
474
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 396/711 [18:35<12:16, 2.34s/it]
475
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 397/711 [18:37<12:11, 2.33s/it]
476
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 398/711 [18:39<12:11, 2.34s/it]
477
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 399/711 [18:42<12:12, 2.
 
 
 
478
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 400/711 [18:44<12:07, 2.34s/it]
479
 
480
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 400/711 [18:44<12:07, 2.34s/it]
481
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 401/711 [18:46<12:02, 2.33s/it]
482
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 402/711 [18:49<11:59, 2.33s/it]
483
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 403/711 [18:51<11:56, 2.32s/it]
484
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 404/711 [18:53<11:52, 2.32s/it]
485
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 405/711 [18:56<12:01, 2.36s/it]
486
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 406/711 [18:58<11:54, 2.34s/it]
487
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 407/711 [19:00<11:49, 2.33s/it]
488
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 408/711 [19:03<11:44, 2.33s/it]
489
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 409/711 [19:05<11:48, 2.35s/it]
490
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 410/711 [19:07<11:42, 2.33s/it]
491
 
492
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 410/711 [19:07<11:42, 2.33s/it]
493
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 411/711 [19:10<11:45, 2.35s/it]
494
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 412/711 [19:12<11
 
 
495
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 413/711 [19:14<11:34, 2.33s/it]
496
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 414/711 [19:17<11:35, 2.34s/it]
497
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 415/711 [19:19<11:35, 2.35s/it]
498
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 416/711 [19:22<11:37, 2.36s/it]
499
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 417/711 [19:24<11:30, 2.35s/it]
500
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 418/711 [19:26<11:26, 2.34s/it]
501
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 419/711 [19:29<11:22, 2.34s/it]
502
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 420/711 [19:31<11:18, 2.33s/it]
503
 
504
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 420/711 [19:31<11:18, 2.33s/it]
505
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 421/711 [19:33<11:15, 2.33s/it]
506
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 422/711 [19:36<11:11, 2.32s/it]
507
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 423/711 [19:38<11:07, 2.32s/it]
508
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 424/711 [19:40<11:06, 2.32s/it]
509
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 425/711 [19:42<11:02, 2.32s/it]
510
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 426/711 [19:45<11:00, 2.32s/it]
511
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ
 
 
 
512
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 428/711 [19:49<11:02, 2.34s/it]
513
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 429/711 [19:52<10:58, 2.34s/it]
514
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 430/711 [19:54<10:59, 2.35s/it]
515
 
516
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 430/711 [19:54<10:59, 2.35s/it]
517
  61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 431/711 [19:57<10:59, 2.35s/it]
518
  61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 432/711 [19:59<10:53, 2.34s/it]
519
  61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 433/711 [20:01<10:49, 2.34s/it]
520
  61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 434/711 [20:03<10:44, 2.33s/it]
521
  61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 435/711 [20:06<10:40, 2.32s/it]
522
  61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 436/711 [20:08<10:42, 2.33s/it]
523
  61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 437/711 [20:11<10:40, 2.34s/it]
524
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 438/711 [20:13<10:36, 2.33s/it]
525
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 439/711 [20:15<10:32, 2.33s/it]
526
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 440/711 [20:17<10:29, 2.32s/it]
527
 
528
  62%|β–ˆοΏ½
 
 
529
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 441/711 [20:20<10:27, 2.32s/it]
530
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 442/711 [20:22<10:24, 2.32s/it]
531
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 443/711 [20:24<10:21, 2.32s/it]
532
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 444/711 [20:27<10:18, 2.32s/it]
533
  63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 445/711 [20:29<10:36, 2.39s/it]
534
  63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 446/711 [20:32<10:31, 2.38s/it]
535
  63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 447/711 [20:34<10:41, 2.43s/it]
536
  63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 448/711 [20:37<10:30, 2.40s/it]
537
  63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 449/711 [20:39<10:21, 2.37s/it]
538
  63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 450/711 [20:41<10:14, 2.35s/it]
539
 
540
  63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 450/711 [20:41<10:14, 2.35s/it]
541
  63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 451/711 [20:43<10:08, 2.34s/it]
542
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 452/711 [20:46<10:17, 2.39s/it]
543
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 453/711 [20:48<10:09, 2.36s/it]
544
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆοΏ½
 
 
545
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 455/711 [20:53<09:58, 2.34s/it]
546
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 456/711 [20:55<09:54, 2.33s/it]
547
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 457/711 [20:57<09:50, 2.32s/it]
548
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 458/711 [21:00<09:46, 2.32s/it]
549
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 459/711 [21:02<09:44, 2.32s/it]
550
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 460/711 [21:04<09:42, 2.32s/it]
551
 
552
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 460/711 [21:04<09:42, 2.32s/it]
553
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 461/711 [21:07<09:40, 2.32s/it]
554
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 462/711 [21:09<09:41, 2.34s/it]
555
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 463/711 [21:12<09:42, 2.35s/it]
556
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 464/711 [21:14<09:37, 2.34s/it]
557
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 465/711 [21:16<09:33, 2.33s/it]
558
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 466/711 [21:18<09:29, 2.32s/it]
559
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 467/711 [21:21<09:26, 2.32s/it]
560
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 468/
 
 
 
561
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 469/711 [21:25<09:26, 2.34s/it]
562
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 470/711 [21:28<09:22, 2.33s/it]
563
 
564
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 470/711 [21:28<09:22, 2.33s/it]
565
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 471/711 [21:30<09:19, 2.33s/it]
566
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 472/711 [21:32<09:16, 2.33s/it]
567
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 473/711 [21:35<09:13, 2.33s/it]
568
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 474/711 [21:37<09:11, 2.33s/it]
569
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 475/711 [21:39<09:08, 2.32s/it]
570
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 476/711 [21:42<09:10, 2.34s/it]
571
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 477/711 [21:44<09:06, 2.34s/it]
572
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 478/711 [21:46<09:07, 2.35s/it]
573
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 479/711 [21:49<09:06, 2.36s/it]
574
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 480/711 [21:51<09:02, 2.35s/it]
575
 
576
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 480/711 [21:51<09:02, 2.35s/i
 
 
577
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 481/711 [21:54<09:03, 2.36s/it]
578
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 482/711 [21:56<08:58, 2.35s/it]
579
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 483/711 [21:58<08:53, 2.34s/it]
580
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 484/711 [22:01<08:49, 2.33s/it]
581
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 485/711 [22:03<08:58, 2.38s/it]
582
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 486/711 [22:05<08:50, 2.36s/it]
583
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 487/711 [22:08<08:45, 2.34s/it]
584
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 488/711 [22:10<08:40, 2.33s/it]
585
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 489/711 [22:12<08:41, 2.35s/it]
586
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 490/711 [22:15<08:37, 2.34s/it]
587
 
588
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 490/711 [22:15<08:37, 2.34s/it]
589
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 491/711 [22:17<08:37, 2.35s/it]
590
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 492/711 [22:19<08:33, 2.34s/it]
591
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 493/711 [22:22<08:29, 2.34s/it]
592
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 494/711 [22:24<08:35, 2.37s/it]
593
  70%|β–ˆοΏ½
 
 
594
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 496/711 [22:29<08:27, 2.36s/it]
595
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 497/711 [22:31<08:25, 2.36s/it]
596
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 498/711 [22:34<08:20, 2.35s/it]
597
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 499/711 [22:36<08:21, 2.37s/it]
598
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 500/711 [22:38<08:16, 2.35s/it]
599
 
600
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 500/711 [22:38<08:16, 2.35s/it]
601
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 501/711 [22:41<08:12, 2.34s/it]
602
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 502/711 [22:43<08:08, 2.34s/it]
603
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 503/711 [22:45<08:05, 2.33s/it]
604
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 504/711 [22:48<08:02, 2.33s/it]
605
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 505/711 [22:50<07:59, 2.33s/it]
606
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 506/711 [22:52<07:56, 2.32s/it]
607
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 507/711 [22:55<07:53, 2.32s/it]
608
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 508/711 [22:57<07:50, 2.32s/it]
609
  72%|β–ˆβ–ˆβ–ˆβ–ˆοΏ½
 
 
 
610
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 510/711 [23:02<07:49, 2.33s/it]
611
 
612
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 510/711 [23:02<07:49, 2.33s/it]
613
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 511/711 [23:04<07:52, 2.36s/it]
614
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 512/711 [23:06<07:59, 2.41s/it]
615
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 513/711 [23:09<08:02, 2.44s/it]
616
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 514/711 [23:11<07:53, 2.40s/it]
617
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 515/711 [23:14<07:45, 2.38s/it]
618
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 516/711 [23:16<07:39, 2.36s/it]
619
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 517/711 [23:18<07:35, 2.35s/it]
620
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 518/711 [23:21<07:31, 2.34s/it]
621
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 519/711 [23:23<07:27, 2.33s/it]
622
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 520/711 [23:25<07:28, 2.35s/it]
623
 
624
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 520/711 [23:25<07:28, 2.35s/it]
625
  73%|β–ˆβ–ˆβ–ˆβ–ˆ
 
 
626
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 522/711 [23:30<07:20, 2.33s/it]
627
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 523/711 [23:32<07:18, 2.33s/it]
628
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 524/711 [23:35<07:18, 2.34s/it]
629
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 525/711 [23:37<07:18, 2.36s/it]
630
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 526/711 [23:39<07:17, 2.37s/it]
631
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 527/711 [23:42<07:13, 2.36s/it]
632
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 528/711 [23:44<07:11, 2.36s/it]
633
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 529/711 [23:46<07:06, 2.34s/it]
634
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 530/711 [23:49<07:03, 2.34s/it]
635
 
636
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 530/711 [23:49<07:03, 2.34s/it]
637
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 531/711 [23:51<07:00, 2.33s/it]
638
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 532/711 [23:54<07:07, 2.39s/it]
639
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 533/711 [23:56<07:00, 2.36s/it]
640
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 534/711 [23:58<06:55, 2.35s/it]
 
 
641
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 536/711 [24:03<06:49, 2.34s/it]
642
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 537/711 [24:05<06:45, 2.33s/it]
643
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 538/711 [24:07<06:41, 2.32s/it]
644
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 539/711 [24:10<06:38, 2.32s/it]
645
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 540/711 [24:12<06:35, 2.31s/it]
646
 
647
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 540/711 [24:12<06:35, 2.31s/it]
648
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 541/711 [24:14<06:36, 2.33s/it]
649
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 542/711 [24:17<06:35, 2.34s/it]
650
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 543/711 [24:19<06:33, 2.34s/it]
651
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 544/711 [24:21<06:29, 2.33s/it]
652
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 545/711 [24:24<06:26, 2.33s/it]
653
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 546/711 [24:26<06:23, 2.32s/it]
654
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 547/711 [24:28<06:20, 2.32s/it]
655
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 548/711 [24:31<0
 
 
 
656
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 549/711 [24:33<06:15, 2.32s/it]
657
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 550/711 [24:35<06:13, 2.32s/it]
658
 
659
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 550/711 [24:35<06:13, 2.32s/it]
660
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 551/711 [24:38<06:10, 2.32s/it]
661
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 552/711 [24:40<06:08, 2.32s/it]
662
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 553/711 [24:42<06:05, 2.32s/it]
663
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 554/711 [24:45<06:03, 2.31s/it]
664
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 555/711 [24:47<06:01, 2.32s/it]
665
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 556/711 [24:49<05:58, 2.32s/it]
666
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 557/711 [24:52<06:01, 2.35s/it]
667
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 558/711 [24:54<06:03, 2.38s/it]
668
  79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 559/711 [24:56<05:59, 2.37s/it]
669
  79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 560/711 [24:59<05:55, 2.35s/it]
670
 
671
  79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 560/711 [24:59
 
 
672
  79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 561/711 [25:01<05:51, 2.34s/it]
673
  79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 562/711 [25:03<05:47, 2.33s/it]
674
  79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 563/711 [25:06<05:47, 2.35s/it]
675
  79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 564/711 [25:08<05:43, 2.34s/it]
676
  79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 565/711 [25:11<05:48, 2.39s/it]
677
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 566/711 [25:13<05:43, 2.37s/it]
678
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 567/711 [25:15<05:41, 2.37s/it]
679
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 568/711 [25:18<05:36, 2.35s/it]
680
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 569/711 [25:20<05:32, 2.34s/it]
681
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 570/711 [25:22<05:28, 2.33s/it]
682
 
683
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 570/711 [25:22<05:28, 2.33s/it]
684
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 571/711 [25:24<05:25, 2.33s/it]
685
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 572/711 [25:27<05:22, 2.32s/it]
686
  81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 573/711 [25:29<05:22, 2.34s/it]
687
  81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ
 
 
688
  81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 575/711 [25:34<05:18, 2.34s/it]
689
  81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 576/711 [25:36<05:15, 2.33s/it]
690
  81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 577/711 [25:39<05:15, 2.35s/it]
691
  81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 578/711 [25:41<05:13, 2.36s/it]
692
  81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 579/711 [25:43<05:09, 2.35s/it]
693
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 580/711 [25:46<05:13, 2.40s/it]
694
 
695
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 580/711 [25:46<05:13, 2.40s/it]
696
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 581/711 [25:48<05:08, 2.37s/it]
697
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 582/711 [25:50<05:03, 2.36s/it]
698
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 583/711 [25:53<05:00, 2.35s/it]
699
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 584/711 [25:55<04:56, 2.34s/it]
700
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 585/711 [25:57<04:53, 2.33s/it]
701
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 586/711 [26:00<04:50, 2.32s/it]
702
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 587/711 [26:02<04:50, 2
 
 
703
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 588/711 [26:04<04:48, 2.35s/it]
704
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 589/711 [26:07<04:47, 2.36s/it]
705
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 590/711 [26:09<04:47, 2.38s/it]
706
 
707
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 590/711 [26:09<04:47, 2.38s/it]
708
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 591/711 [26:12<04:43, 2.36s/it]
709
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 592/711 [26:14<04:39, 2.35s/it]
710
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 593/711 [26:16<04:36, 2.34s/it]
711
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 594/711 [26:19<04:33, 2.33s/it]
712
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 595/711 [26:21<04:30, 2.33s/it]
713
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 596/711 [26:23<04:27, 2.33s/it]
714
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 597/711 [26:25<04:25, 2.32s/it]
715
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 598/711 [26:28<04:22, 2.32s/it]
716
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 599/711 [26:30<04:20, 2.32s/it]
717
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 600/711 [26:32<04:17, 2.32s/it]
718
 
 
 
 
719
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 600/711 [26:32<04:17, 2.32s/it]
720
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 601/711 [26:35<04:15, 2.32s/it]
721
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 602/711 [26:37<04:13, 2.32s/it]
722
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 603/711 [26:39<04:10, 2.32s/it]
723
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 604/711 [26:42<04:08, 2.32s/it]
724
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 605/711 [26:44<04:08, 2.34s/it]
725
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 606/711 [26:46<04:06, 2.35s/it]
726
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 607/711 [26:49<04:03, 2.34s/it]
727
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 608/711 [26:51<04:00, 2.33s/it]
728
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 609/711 [26:53<03:57, 2.33s/it]
729
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 610/711 [26:56<03:55, 2.33s/it]
730
 
731
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 610/711 [26:56<03:55, 2.33s/it]
732
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 611/711 [26:58<03:52, 2.32s/it]
733
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 612/711 [27:00<
 
 
734
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 613/711 [27:03<03:47, 2.32s/it]
735
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 614/711 [27:05<03:44, 2.32s/it]
736
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 615/711 [27:07<03:42, 2.32s/it]
737
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 616/711 [27:10<03:40, 2.32s/it]
738
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 617/711 [27:12<03:37, 2.32s/it]
739
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 618/711 [27:14<03:38, 2.35s/it]
740
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 619/711 [27:17<03:34, 2.34s/it]
741
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 620/711 [27:19<03:32, 2.33s/it]
742
 
743
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 620/711 [27:19<03:32, 2.33s/it]
744
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 621/711 [27:21<03:31, 2.35s/it]
745
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 622/711 [27:24<03:29, 2.36s/it]
746
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 623/711 [27:26<03:28, 2.36s/it]
747
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 624/711 [27:28<03:24, 2.35s/it]
748
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 625/711 [27:31<03:21, 2.34s/it]
749
  88
 
 
750
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 627/711 [27:35<03:15, 2.33s/it]
751
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 628/711 [27:38<03:12, 2.32s/it]
752
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 629/711 [27:40<03:10, 2.32s/it]
753
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 630/711 [27:42<03:10, 2.35s/it]
754
 
755
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 630/711 [27:42<03:10, 2.35s/it]
756
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 631/711 [27:45<03:07, 2.35s/it]
757
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 632/711 [27:47<03:09, 2.39s/it]
758
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 633/711 [27:50<03:04, 2.37s/it]
759
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 634/711 [27:52<03:00, 2.35s/it]
760
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 635/711 [27:54<02:57, 2.34s/it]
761
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 636/711 [27:57<02:56, 2.35s/it]
762
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 637/711 [27:59<02:55, 2.37s/it]
763
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 638/711 [28:01<02:52, 2.36s/it]
764
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆοΏ½
 
 
 
765
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 640/711 [28:06<02:46, 2.34s/it]
766
 
767
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 640/711 [28:06<02:46, 2.34s/it]
768
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 641/711 [28:08<02:43, 2.33s/it]
769
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 642/711 [28:11<02:40, 2.33s/it]
770
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 643/711 [28:13<02:38, 2.33s/it]
771
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 644/711 [28:15<02:35, 2.33s/it]
772
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 645/711 [28:18<02:35, 2.35s/it]
773
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 646/711 [28:20<02:32, 2.34s/it]
774
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 647/711 [28:22<02:29, 2.34s/it]
775
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 648/711 [28:25<02:27, 2.33s/it]
776
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 649/711 [28:27<02:24, 2.33s/it]
777
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 650/711 [28:29<02:23, 2.35s/it]
778
 
779
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 650/711 [28:29<02:23,
 
 
780
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 651/711 [28:32<02:20, 2.34s/it]
781
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 652/711 [28:34<02:17, 2.33s/it]
782
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 653/711 [28:36<02:17, 2.37s/it]
783
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 654/711 [28:39<02:14, 2.36s/it]
784
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 655/711 [28:41<02:11, 2.35s/it]
785
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 656/711 [28:44<02:10, 2.37s/it]
786
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 657/711 [28:46<02:06, 2.35s/it]
787
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 658/711 [28:48<02:04, 2.34s/it]
788
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 659/711 [28:51<02:01, 2.33s/it]
789
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 660/711 [28:53<01:58, 2.33s/it]
790
 
791
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 660/711 [28:53<01:58, 2.33s/it]
792
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 661/711 [28:55<01:57, 2.35s/it]
793
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 662/711 [28:58<01:54, 2.34s/it]
794
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 663/711 [29:00
 
 
795
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 664/711 [29:02<01:49, 2.33s/it]
796
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 665/711 [29:05<01:46, 2.32s/it]
797
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 666/711 [29:07<01:44, 2.32s/it]
798
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 667/711 [29:09<01:42, 2.32s/it]
799
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 668/711 [29:11<01:39, 2.32s/it]
800
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 669/711 [29:14<01:38, 2.34s/it]
801
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 670/711 [29:16<01:36, 2.36s/it]
802
 
803
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 670/711 [29:16<01:36, 2.36s/it]
804
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 671/711 [29:19<01:34, 2.35s/it]
805
  95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 672/711 [29:21<01:31, 2.34s/it]
806
  95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 673/711 [29:23<01:28, 2.34s/it]
807
  95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 674/711 [29:26<01:26, 2.33s/it]
808
  95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 675/711 [29:28<01:24, 2.33s/it]
809
  95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 676/711
 
 
810
  95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 677/711 [29:33<01:19, 2.33s/it]
811
  95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 678/711 [29:35<01:16, 2.32s/it]
812
  95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 679/711 [29:37<01:14, 2.32s/it]
813
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 680/711 [29:39<01:11, 2.32s/it]
814
 
815
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 680/711 [29:39<01:11, 2.32s/it]
816
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 681/711 [29:42<01:09, 2.32s/it]
817
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 682/711 [29:44<01:08, 2.36s/it]
818
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 683/711 [29:47<01:05, 2.35s/it]
819
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 684/711 [29:49<01:03, 2.34s/it]
820
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 685/711 [29:51<01:01, 2.36s/it]
821
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 686/711 [29:54<00:58, 2.36s/it]
822
  97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 687/711 [29:56<00:56, 2.35s/it]
823
  97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 688/711 [29:58<00:53, 2.34s/it]
824
  97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹|
 
 
 
825
  97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 690/711 [30:03<00:48, 2.32s/it]
826
 
827
  97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 690/711 [30:03<00:48, 2.32s/it]
828
  97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 691/711 [30:05<00:46, 2.34s/it]
829
  97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 692/711 [30:08<00:44, 2.33s/it]
830
  97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 693/711 [30:10<00:41, 2.33s/it]
831
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 694/711 [30:12<00:39, 2.33s/it]
832
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 695/711 [30:15<00:37, 2.32s/it]
833
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 696/711 [30:17<00:34, 2.32s/it]
834
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 697/711 [30:19<00:32, 2.32s/it]
835
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 698/711 [30:22<00:30, 2.32s/it]
836
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 699/711 [30:24<00:27, 2.32s/it]
837
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 700/711 [30:26<00:25, 2.34s/it]
838
 
839
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 700/711 [30:
 
 
 
 
 
840
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 701/711 [30:29<00:23, 2.37s/it]
841
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 702/711 [30:31<00:21, 2.37s/it]
842
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 703/711 [30:33<00:18, 2.35s/it]
843
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 704/711 [30:36<00:16, 2.35s/it]
844
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 705/711 [30:38<00:14, 2.36s/it]
845
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 706/711 [30:40<00:11, 2.34s/it]
846
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 707/711 [30:43<00:09, 2.42s/it]
847
 
848
 
 
 
 
 
 
 
1
+ 2: W1124 00:08:21.177000 270804 torch/distributed/run.py:792]
2
+ 2: W1124 00:08:21.177000 270804 torch/distributed/run.py:792] *****************************************
3
+ 2: W1124 00:08:21.177000 270804 torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
4
+ 2: W1124 00:08:21.177000 270804 torch/distributed/run.py:792] *****************************************
5
+ 3: W1124 00:08:21.180000 1900294 torch/distributed/run.py:792]
6
+ 3: W1124 00:08:21.180000 1900294 torch/distributed/run.py:792] *****************************************
7
+ 3: W1124 00:08:21.180000 1900294 torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
8
+ 3: W1124 00:08:21.180000 1900294 torch/distributed/run.py:792] *****************************************
9
+ 0: W1124 00:08:21.180000 1912798 torch/distributed/run.py:792]
10
+ 0: W1124 00:08:21.180000 1912798 torch/distributed/run.py:792] *****************************************
11
+ 0: W1124 00:08:21.180000 1912798 torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
12
+ 0: W1124 00:08:21.180000 1912798 torch/distributed/run.py:792] *****************************************
13
+ 1: W1124 00:08:21.306000 434889 torch/distributed/run.py:792]
14
+ 1: W1124 00:08:21.306000 434889 torch/distributed/run.py:792] *****************************************
15
+ 1: W1124 00:08:21.306000 434889 torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
16
+ 1: W1124 00:08:21.306000 434889 torch/distributed/run.py:792] *****************************************
17
+ 0: [2025-11-24 00:08:46,359] [INFO] [axolotl.utils.schemas.validation.check_eval_packing:119] [PID:1912876] [RANK:0] explicitly setting `eval_sample_packing` to match `sample_packing`
18
+ 0: [2025-11-24 00:08:46,359] [INFO] [axolotl.utils.schemas.validation.hint_sample_packing_padding:218] [PID:1912876] [RANK:0] Setting `pad_to_sequence_len: true` to prevent memory leaks when sample_packing
19
+ 2: [2025-11-24 00:08:47,025] [INFO] [axolotl.utils.schemas.validation.check_eval_packing:119] [PID:270879] [RANK:0] explicitly setting `eval_sample_packing` to match `sample_packing`
20
+ 2: [2025-11-24 00:08:47,026] [INFO] [axolotl.utils.schemas.validation.hint_sample_packing_padding:218] [PID:270879] [RANK:0] Setting `pad_to_sequence_len: true` to prevent memory leaks when sample_packing
21
+ 1: [2025-11-24 00:08:47,077] [INFO] [axolotl.utils.schemas.validation.check_eval_packing:119] [PID:434964] [RANK:0] explicitly setting `eval_sample_packing` to match `sample_packing`
22
+ 1: [2025-11-24 00:08:47,078] [INFO] [axolotl.utils.schemas.validation.hint_sample_packing_padding:218] [PID:434964] [RANK:0] Setting `pad_to_sequence_len: true` to prevent memory leaks when sample_packing
23
+ 3: [2025-11-24 00:08:47,277] [INFO] [axolotl.utils.schemas.validation.check_eval_packing:119] [PID:1900370] [RANK:0] explicitly setting `eval_sample_packing` to match `sample_packing`
24
+ 3: [2025-11-24 00:08:47,277] [INFO] [axolotl.utils.schemas.validation.hint_sample_packing_padding:218] [PID:1900370] [RANK:0] Setting `pad_to_sequence_len: true` to prevent memory leaks when sample_packing
25
+ 0: [2025-11-24 00:08:49,792] [WARNING] [axolotl.utils.config.normalize_config:139] [PID:1912876] [RANK:0] Invalid value for save_steps (1.6666666666666667) from saves_per_epoch and/or num_epochs. Saving at training end only.
26
+ 0: [2025-11-24 00:08:49,874] [INFO] [axolotl.cli.config.load_cfg:245] [PID:1912876] [RANK:0] config:
27
+ 0: {
28
+ 0: "activation_offloading": false,
29
+ 0: "auto_resume_from_checkpoints": true,
30
+ 0: "axolotl_config_path": "/lustre/fswork/projects/rech/dgo/udv55np/train/tmp/1763939290239020138.yaml",
31
+ 0: "base_model": "/lustre/fswork/projects/rech/qwv/udv55np/Gemma/base/gemma-3-4b",
32
+ 0: "base_model_config": "/lustre/fswork/projects/rech/qwv/udv55np/Gemma/base/gemma-3-4b",
33
+ 0: "batch_size": 16,
34
+ 0: "bf16": true,
35
+ 0: "capabilities": {
36
+ 0: "bf16": true,
37
+ 0: "compute_capability": "sm_90",
38
+ 0: "fp8": false,
39
+ 0: "n_gpu": 16,
40
+ 0: "n_node": 1
41
+ 0: },
42
+ 0: "chat_template": "gemma3",
43
+ 0: "context_parallel_size": 1,
44
+ 0: "dataloader_num_workers": 16,
45
+ 0: "dataloader_pin_memory": true,
46
+ 0: "dataloader_prefetch_factor": 256,
47
+ 0: "dataset_prepared_path": "/lustre/fswork/projects/rech/dgo/udv55np/dataset_gemma/Nemotron-Super-49B-v1_5/split_0",
48
+ 0: "dataset_processes": 192,
49
+ 0: "datasets": [
50
+ 0: {
51
+ 0: "chat_template": "tokenizer_default",
52
+ 0: "data_files": [
53
+ 0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0007.jsonl",
54
+ 0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0009.jsonl",
55
+ 0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0005.jsonl",
56
+ 0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0006.jsonl",
57
+ 0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0014.jsonl",
58
+ 0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0010.jsonl",
59
+ 0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0012.jsonl",
60
+ 0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0008.jsonl",
61
+ 0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0001.jsonl",
62
+ 0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0002.jsonl",
63
+ 0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0013.jsonl",
64
+ 0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0015.jsonl",
65
+ 0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0004.jsonl",
66
+ 0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0011.jsonl",
67
+ 0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0000.jsonl",
68
+ 0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0003.jsonl"
69
+ 0: ],
70
+ 0: "ds_type": "json",
71
+ 0: "field_messages": "conversations",
72
+ 0: "message_property_mappings": {
73
+ 0: "content": "content",
74
+ 0: "role": "role"
75
+ 0: },
76
+ 0: "path": "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking",
77
+ 0: "trust_remote_code": false,
78
+ 0: "type": "chat_template"
79
+ 0: }
80
+ 0: ],
81
+ 0: "ddp": true,
82
+ 0: "deepspeed": {
83
+ 0: "bf16": {
84
+ 0: "enabled": true
85
+ 0: },
86
+ 0: "gradient_accumulation_steps": "auto",
87
+ 0: "gradient_clipping": "auto",
88
+ 0: "train_batch_size": "auto",
89
+ 0: "train_micro_batch_size_per_gpu": "auto",
90
+ 0: "wall_clock_breakdown": false,
91
+ 0: "zero_optimization": {
92
+ 0: "contiguous_gradients": true,
93
+ 0: "overlap_comm": true,
94
+ 0: "reduce_bucket_size": "auto",
95
+ 0: "stage": 3,
96
+ 0: "stage3_gather_16bit_weights_on_model_save": true,
97
+ 0: "stage3_param_persistence_threshold": "auto",
98
+ 0: "stage3_prefetch_bucket_size": "auto",
99
+ 0: "sub_group_size": 0
100
+ 0: }
101
+ 0: },
102
+ 0: "device": "cuda:0",
103
+ 0: "device_map": {
104
+ 0: "": 0
105
+ 0: },
106
+ 0: "dion_rank_fraction": 1.0,
107
+ 0: "dion_rank_multiple_of": 1,
108
+ 0: "env_capabilities": {
109
+ 0: "torch_version": "2.6.0"
110
+ 0: },
111
+ 0: "eot_tokens": [
112
+ 0: "<end_of_turn>"
113
+ 0: ],
114
+ 0: "eval_batch_size": 1,
115
+ 0: "eval_causal_lm_metrics": [
116
+ 0: "sacrebleu",
117
+ 0: "comet",
118
+ 0: "ter",
119
+ 0: "chrf"
120
+ 0: ],
121
+ 0: "eval_max_new_tokens": 128,
122
+ 0: "eval_sample_packing": true,
123
+ 0: "eval_table_size": 0,
124
+ 0: "evals_per_epoch": 0,
125
+ 0: "flash_attention": true,
126
+ 0: "fp16": false,
127
+ 0: "gradient_accumulation_steps": 1,
128
+ 0: "gradient_checkpointing": true,
129
+ 0: "gradient_checkpointing_kwargs": {
130
+ 0: "use_reentrant": true
131
+ 0: },
132
+ 0: "is_multimodal": true,
133
+ 0: "learning_rate": 5e-06,
134
+ 0: "lisa_layers_attribute": "model.layers",
135
+ 0: "load_best_model_at_end": false,
136
+ 0: "load_in_4bit": false,
137
+ 0: "load_in_8bit": false,
138
+ 0: "local_rank": 0,
139
+ 0: "logging_steps": 10,
140
+ 0: "lora_dropout": 0.0,
141
+ 0: "loraplus_lr_embedding": 1e-06,
142
+ 0: "lr_scheduler": "warmup_stable_decay",
143
+ 0: "lr_scheduler_kwargs": {
144
+ 0: "min_lr_ratio": 0.1,
145
+ 0: "num_decay_steps": 200
146
+ 0: },
147
+ 0: "max_prompt_len": 512,
148
+ 0: "mean_resizing_embeddings": false,
149
+ 0: "micro_batch_size": 1,
150
+ 0: "model_config_type": "gemma3",
151
+ 0: "num_epochs": 0.6,
152
+ 0: "optimizer": "adamw_torch_fused",
153
+ 0: "output_dir": "/lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-4b/0",
154
+ 0: "pad_to_sequence_len": true,
155
+ 0: "pretrain_multipack_attn": true,
156
+ 0: "pretrain_multipack_buffer_size": 10000,
157
+ 0: "processor_config": "/lustre/fswork/projects/rech/qwv/udv55np/Gemma/base/gemma-3-4b",
158
+ 0: "profiler_steps_start": 0,
159
+ 0: "qlora_sharded_model_loading": false,
160
+ 0: "ray_num_workers": 1,
161
+ 0: "resources_per_worker": {
162
+ 0: "GPU": 1
163
+ 0: },
164
+ 0: "sample_packing": true,
165
+ 0: "sample_packing_bin_size": 200,
166
+ 0: "sample_packing_group_size": 100000,
167
+ 0: "save_only_model": true,
168
+ 0: "save_safetensors": true,
169
+ 0: "save_total_limit": 20,
170
+ 0: "saves_per_epoch": 1,
171
+ 0: "sequence_len": 16384,
172
+ 0: "shuffle_before_merging_datasets": false,
173
+ 0: "shuffle_merged_datasets": true,
174
+ 0: "skip_prepare_dataset": false,
175
+ 0: "strict": false,
176
+ 0: "tensor_parallel_size": 1,
177
+ 0: "tf32": false,
178
+ 0: "tiled_mlp_use_original_mlp": true,
179
+ 0: "tokenizer_config": "/lustre/fswork/projects/rech/qwv/udv55np/Gemma/base/gemma-3-27b",
180
+ 0: "torch_dtype": "torch.bfloat16",
181
+ 0: "train_on_inputs": false,
182
+ 0: "trl": {
183
+ 0: "log_completions": false,
184
+ 0: "mask_truncated_completions": false,
185
+ 0: "ref_model_mixup_alpha": 0.9,
186
+ 0: "ref_model_sync_steps": 64,
187
+ 0: "scale_rewards": true,
188
+ 0: "sync_ref_model": false,
189
+ 0: "use_vllm": false,
190
+ 0: "vllm_server_host": "0.0.0.0",
191
+ 0: "vllm_server_port": 8000
192
+ 0: },
193
+ 0: "use_ray": false,
194
+ 0: "use_tensorboard": true,
195
+ 0: "val_set_size": 0.0,
196
+ 0: "vllm": {
197
+ 0: "device": "auto",
198
+ 0: "dtype": "auto",
199
+ 0: "gpu_memory_utilization": 0.9,
200
+ 0: "host": "0.0.0.0",
201
+ 0: "port": 8000
202
+ 0: },
203
+ 0: "warmup_steps": 100,
204
+ 0: "weight_decay": 0.0,
205
+ 0: "world_size": 16
206
+ 0: }
207
+ 0: [2025-11-24 00:08:49,876] [INFO] [axolotl.cli.checks.check_user_token:35] [PID:1912876] [RANK:0] Skipping HuggingFace token verification because HF_HUB_OFFLINE is set to True. Only local files will be used.
208
+ 0: [2025-11-24 00:08:51,148] [INFO] [axolotl.utils.data.shared.load_preprocessed_dataset:472] [PID:1912876] [RANK:0] Loading prepared dataset from disk at /lustre/fswork/projects/rech/dgo/udv55np/dataset_gemma/Nemotron-Super-49B-v1_5/split_0/06698e902d3dba325ca34849b1dea5ea...
209
+ 0: [2025-11-24 00:09:24,738] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:436] [PID:1912876] [RANK:0] gather_len_batches: [18976, 18976, 18975, 18976, 18976, 18976, 18976, 18976, 18976, 18975, 18976, 18976, 18976, 18976, 18976, 18976]
210
+ 0: [2025-11-24 00:09:24,803] [INFO] [axolotl.utils.trainer.calc_sample_packing_eff_est:495] [PID:1912876] [RANK:0] sample_packing_eff_est across ranks: [0.9988827705383301, 0.9989354014396667, 0.9989354014396667, 0.9988827705383301, 0.9989354014396667, 0.9989354014396667, 0.9989354014396667, 0.9989354014396667, 0.9988827705383301, 0.9988827705383301, 0.9988827705383301, 0.9989880323410034, 0.9988301396369934, 0.9989354014396667, 0.9989354014396667, 0.9989354014396667]
211
+ 0: [2025-11-24 00:09:24,810] [INFO] [axolotl.utils.data.sft._prepare_standard_dataset:127] [PID:1912876] [RANK:0] Maximum number of steps set at 711
212
+ 1: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
213
+ 1: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
214
+ 1: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
215
+ 1: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
216
+ 2: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
217
+ 3: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
218
+ 0: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
219
+ 0: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
220
+ 2: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
221
+ 0: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
222
+ 3: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
223
+ 2: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
224
+ 3: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
225
+ 3: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
226
+ 2: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
227
+ 0: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
228
+ 0: [2025-11-24 00:09:31,797] [INFO] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_evaluation_loop:110] [PID:1912876] [RANK:0] Patched Trainer.evaluation_loop with nanmean loss calculation
229
+ 0: [2025-11-24 00:09:31,798] [INFO] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_maybe_log_save_evaluate:164] [PID:1912876] [RANK:0] Patched Trainer._maybe_log_save_evaluate with nanmean loss calculation
230
+ 0:
231
+ 0:
232
+ 1:
233
+ 3:
234
+ 2:
235
+ 3:
236
+ 3:
237
+ 1:
238
+ 1:
239
+ 0:
240
+ 1:
241
+ 2: β–ˆ| 2/2 [00:24<00:00, 12.34s/it]
242
+ 2:
243
+ 2:
244
+ 2:
245
+ 3:
246
+ 0:
247
+ 0: [2025-11-24 00:09:59,406] [INFO] [axolotl.loaders.model._configure_embedding_dtypes:345] [PID:1912876] [RANK:0] Converting modules to torch.bfloat16
248
+ 0: [2025-11-24 00:10:03,242] [INFO] [axolotl.train.save_initial_configs:416] [PID:1912876] [RANK:0] Pre-saving tokenizer to /lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-4b/0...
249
+ 0: [2025-11-24 00:10:03,660] [INFO] [axolotl.train.save_initial_configs:419] [PID:1912876] [RANK:0] Pre-saving model config to /lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-4b/0...
250
+ 0: [2025-11-24 00:10:03,690] [INFO] [axolotl.train.save_initial_configs:423] [PID:1912876] [RANK:0] Pre-saving processor to /lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-4b/0...
251
+ 0: [2025-11-24 00:10:06,488] [INFO] [axolotl.train.execute_training:203] [PID:1912876] [RANK:0] Starting trainer...
252
+ 0: [2025-11-24 00:11:39,109] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:436] [PID:1912876] [RANK:0] gather_len_batches: [18976, 18976, 18976, 18976, 18976, 18976, 18976, 18976, 18976, 18976, 18976, 18976, 18976, 18976, 18976, 18976]
253
+ 0: Parameter Offload - Persistent parameters statistics: param_count = 479, numel = 768880
254
+ 0: {'loss': 0.7282, 'grad_norm': 2.3999579863224176, 'learning_rate': 9.05e-07, 'memory/max_mem_active(gib)': 57.15, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 66.2, 'epoch': 0.01}
255
+ 0:
256
  0%| | 0/711 [00:00<?, ?it/s]
257
  0%| | 1/711 [03:10<37:32:11, 190.33s/it]
258
  0%| | 2/711 [03:14<15:52:57, 80.65s/it]
259
  0%| | 3/711 [03:16<8:49:26, 44.87s/it]
260
  1%| | 4/711 [03:18<5:30:58, 28.09s/it]
261
  1%| | 5/711 [03:21<3:41:20, 18.81s/it]
262
  1%| | 6/711 [03:23<2:35:25, 13.23s/it]
263
  1%| | 7/711 [03:25<1:53:16, 9.65s/it]
264
  1%| | 8/711 [03:28<1:25:41, 7.31s/it]
265
  1%|▏ | 9/711 [03:30<1:07:15, 5.75s/it]
266
  1%|▏ | 10/711 [03:32<54:44, 4.68s/it]
267
 
268
  1%|▏ | 10/711 [03:32<54:44, 4.68s/it]
269
  2%|▏ | 11/711 [03:35<46:17, 3.97s/it]
270
  2%|▏ | 12/711 [03:37<40:18, 3.46s/it]
271
  2%|▏ | 13/711 [03:39<36:10, 3.11s/it]
272
  2%|▏ | 14/711 [03:42<34:11, 2.94s/it]
273
  2%|▏ | 15/711 [03:44<32:06, 2.77s/it]
274
  2%|▏ | 16/711 [03:46<30:28, 2.63s/it]
275
  2%|▏ | 17/711 [03:49<29:24, 2.54s/it]
276
  3%|β–Ž
277
+ 0: {'loss': 0.6672, 'grad_norm': 1.3408937334456381, 'learning_rate': 1.3550000000000002e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.02}
278
+ 0: {'loss': 0.6271, 'grad_norm': 0.8591296514459729, 'learning_rate': 1.805e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.03}
279
+ 0: | 18/711 [03:51<28:42, 2.49s/it]
280
  3%|β–Ž | 19/711 [03:53<28:02, 2.43s/it]
281
  3%|β–Ž | 20/711 [03:56<27:33, 2.39s/it]
282
 
283
  3%|β–Ž | 20/711 [03:56<27:33, 2.39s/it]
284
  3%|β–Ž | 21/711 [03:58<27:12, 2.37s/it]
285
  3%|β–Ž | 22/711 [04:00<26:57, 2.35s/it]
286
  3%|β–Ž | 23/711 [04:03<26:45, 2.33s/it]
287
  3%|β–Ž | 24/711 [04:05<26:37, 2.33s/it]
288
  4%|β–Ž | 25/711 [04:07<26:33, 2.32s/it]
289
  4%|β–Ž | 26/711 [04:10<26:26, 2.32s/it]
290
  4%|▍ | 27/711 [04:12<26:21, 2.31s/it]
291
  4%|▍ | 28/711 [04:14<26:17, 2.31s/it]
292
  4%|▍ | 29/711 [04:17<26:14, 2.31s/it]
293
  4%|▍ | 30/711 [04:19<26:39, 2.35s/it]
294
 
295
  4%|▍ | 30/711 [04:19<26:39, 2.35s/it]
296
  4%|▍ | 31/711 [04:21<26:38, 2.35s/it]
297
  5%|▍ | 32/711 [04:24<26:30, 2.34s/it]
298
  5%|▍ | 33/711 [04:26<26:21, 2.33s/it]
299
  5%|▍ | 34/
300
+ 0: {'loss': 0.6047, 'grad_norm': 0.8292303871371115, 'learning_rate': 2.2550000000000004e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.03}
301
+ 0: {'loss': 0.5823, 'grad_norm': 0.7246674717655568, 'learning_rate': 2.7050000000000004e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.04}
302
+ 0: 711 [04:28<26:13, 2.32s/it]
303
  5%|▍ | 35/711 [04:31<26:08, 2.32s/it]
304
  5%|β–Œ | 36/711 [04:33<26:03, 2.32s/it]
305
  5%|β–Œ | 37/711 [04:35<26:01, 2.32s/it]
306
  5%|β–Œ | 38/711 [04:38<25:57, 2.31s/it]
307
  5%|β–Œ | 39/711 [04:40<25:55, 2.31s/it]
308
  6%|β–Œ | 40/711 [04:42<25:53, 2.31s/it]
309
 
310
  6%|β–Œ | 40/711 [04:42<25:53, 2.31s/it]
311
  6%|β–Œ | 41/711 [04:44<25:50, 2.31s/it]
312
  6%|β–Œ | 42/711 [04:47<25:47, 2.31s/it]
313
  6%|β–Œ | 43/711 [04:49<25:50, 2.32s/it]
314
  6%|β–Œ | 44/711 [04:51<25:45, 2.32s/it]
315
  6%|β–‹ | 45/711 [04:54<25:40, 2.31s/it]
316
  6%|β–‹ | 46/711 [04:56<25:51, 2.33s/it]
317
  7%|β–‹ | 47/711 [04:58<25:53, 2.34s/it]
318
  7%|β–‹ | 48/711 [05:01<25:46, 2.33s/it]
319
  7%|β–‹ | 49/711 [05:03<25:41, 2.33s/it]
320
  7%|β–‹ | 50/711 [05:05<25:40, 2.33s/it]
321
 
322
  7%|β–‹ | 50/711 [0
323
+ 0: {'loss': 0.5601, 'grad_norm': 0.7685808720049759, 'learning_rate': 3.1550000000000003e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.05}
324
+ 0: 5:05<25:40, 2.33s/it]
325
  7%|β–‹ | 51/711 [05:08<25:33, 2.32s/it]
326
  7%|β–‹ | 52/711 [05:10<25:29, 2.32s/it]
327
  7%|β–‹ | 53/711 [05:12<25:34, 2.33s/it]
328
  8%|β–Š | 54/711 [05:15<25:28, 2.33s/it]
329
  8%|β–Š | 55/711 [05:17<25:23, 2.32s/it]
330
  8%|β–Š | 56/711 [05:19<25:34, 2.34s/it]
331
  8%|β–Š | 57/711 [05:22<25:26, 2.33s/it]
332
  8%|β–Š | 58/711 [05:24<25:37, 2.35s/it]
333
  8%|β–Š | 59/711 [05:26<25:28, 2.34s/it]
334
  8%|β–Š | 60/711 [05:29<25:19, 2.33s/it]
335
 
336
  8%|β–Š | 60/711 [05:29<25:19, 2.33s/it]
337
  9%|β–Š | 61/711 [05:31<25:11, 2.33s/it]
338
  9%|β–Š | 62/711 [05:33<25:19, 2.34s/it]
339
  9%|β–‰ | 63/711 [05:36<25:30, 2.36s/it]
340
  9%|β–‰ | 64/711 [05:38<25:18, 2.35s/it]
341
  9%|β–‰ | 65/711 [05:40<25:08, 2.33s/it]
342
  9%|β–‰ | 66/711 [05:43<25:00, 2.33s/it]
343
  9%|β–‰ | 67/711 [05:45<24:55, 2.32s/it]
344
  10%|β–‰ | 68/711 [05:47<
345
+ 0: {'loss': 0.5653, 'grad_norm': 0.7575662741162992, 'learning_rate': 3.6050000000000002e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.06}
346
+ 0: {'loss': 0.5571, 'grad_norm': 0.7808588522979137, 'learning_rate': 4.055000000000001e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.07}
347
+ 0: 24:49, 2.32s/it]
348
  10%|β–‰ | 69/711 [05:50<24:46, 2.31s/it]
349
  10%|β–‰ | 70/711 [05:52<24:42, 2.31s/it]
350
 
351
  10%|β–‰ | 70/711 [05:52<24:42, 2.31s/it]
352
  10%|β–‰ | 71/711 [05:54<24:39, 2.31s/it]
353
  10%|β–ˆ | 72/711 [05:57<24:35, 2.31s/it]
354
  10%|β–ˆ | 73/711 [05:59<24:32, 2.31s/it]
355
  10%|β–ˆ | 74/711 [06:01<24:29, 2.31s/it]
356
  11%|β–ˆ | 75/711 [06:04<24:26, 2.31s/it]
357
  11%|β–ˆ | 76/711 [06:06<24:23, 2.30s/it]
358
  11%|β–ˆ | 77/711 [06:08<24:20, 2.30s/it]
359
  11%|β–ˆ | 78/711 [06:11<24:30, 2.32s/it]
360
  11%|β–ˆ | 79/711 [06:13<24:35, 2.33s/it]
361
  11%|β–ˆβ– | 80/711 [06:15<24:32, 2.33s/it]
362
 
363
  11%|β–ˆβ– | 80/711 [06:15<24:32, 2.33s/it]
364
  11%|β–ˆβ– | 81/711 [06:18<24:42, 2.35s/it]
365
  12%|β–ˆβ– | 82/711 [06:20<24:32, 2.34s/it]
366
  12%|β–ˆβ– | 83/711 [06:22<24:38, 2.35s/it]
367
  12%|β–ˆβ– | 84/711 [
368
+ 0: {'loss': 0.5341, 'grad_norm': 0.8642141374162505, 'learning_rate': 4.505e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.08}
369
+ 0: 06:25<24:27, 2.34s/it]
370
  12%|β–ˆβ– | 85/711 [06:27<24:18, 2.33s/it]
371
  12%|β–ˆβ– | 86/711 [06:29<24:24, 2.34s/it]
372
  12%|β–ˆβ– | 87/711 [06:32<24:14, 2.33s/it]
373
  12%|β–ˆβ– | 88/711 [06:34<24:07, 2.32s/it]
374
  13%|β–ˆβ–Ž | 89/711 [06:36<24:02, 2.32s/it]
375
  13%|β–ˆβ–Ž | 90/711 [06:39<23:59, 2.32s/it]
376
 
377
  13%|β–ˆβ–Ž | 90/711 [06:39<23:59, 2.32s/it]
378
  13%|β–ˆβ–Ž | 91/711 [06:41<23:57, 2.32s/it]
379
  13%|β–ˆβ–Ž | 92/711 [06:43<23:58, 2.32s/it]
380
  13%|β–ˆβ–Ž | 93/711 [06:46<24:09, 2.35s/it]
381
  13%|β–ˆβ–Ž | 94/711 [06:48<24:28, 2.38s/it]
382
  13%|β–ˆβ–Ž | 95/711 [06:50<24:37, 2.40s/it]
383
  14%|β–ˆβ–Ž | 96/711 [06:53<24:19, 2.37s/it]
384
  14%|β–ˆβ–Ž | 97/711 [06:55<24:06, 2.36s/it]
385
  14%|β–ˆβ– | 98/711 [06:58<24:10, 2.37s/it]
386
  14%|β–ˆβ– | 99/711 [07:00<23:57, 2.35s/it]
387
  14%|β–ˆβ– | 100/711 [07:02<23:47, 2.34s/it]
388
 
389
+ 0: {'loss': 0.5192, 'grad_norm': 0.8305549171618009, 'learning_rate': 4.955e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.08}
390
+ 0: {'loss': 0.5459, 'grad_norm': 0.8622685683478952, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.09}
391
+ 0:
392
  14%|β–ˆβ– | 100/711 [07:02<23:47, 2.34s/it]
393
  14%|β–ˆβ– | 101/711 [07:04<23:40, 2.33s/it]
394
  14%|β–ˆβ– | 102/711 [07:07<23:35, 2.32s/it]
395
  14%|β–ˆβ– | 103/711 [07:09<23:30, 2.32s/it]
396
  15%|β–ˆβ– | 104/711 [07:11<23:26, 2.32s/it]
397
  15%|β–ˆβ– | 105/711 [07:14<23:23, 2.32s/it]
398
  15%|β–ˆβ– | 106/711 [07:16<23:21, 2.32s/it]
399
  15%|β–ˆβ–Œ | 107/711 [07:18<23:18, 2.32s/it]
400
  15%|β–ˆβ–Œ | 108/711 [07:21<23:13, 2.31s/it]
401
  15%|β–ˆβ–Œ | 109/711 [07:23<23:10, 2.31s/it]
402
  15%|β–ˆβ–Œ | 110/711 [07:25<23:20, 2.33s/it]
403
 
404
  15%|β–ˆβ–Œ | 110/711 [07:25<23:20, 2.33s/it]
405
  16%|β–ˆβ–Œ | 111/711 [07:28<23:30, 2.35s/it]
406
  16%|β–ˆβ–Œ | 112/711 [07:30<23:23, 2.34s/it]
407
  16%|β–ˆβ–Œ | 113/711 [07:32<23:27, 2.35s/it]
408
  16%|β–ˆβ–Œ | 114/711 [07:35<23:16, 2.34s/it]
409
  16%|β–ˆβ–Œ | 115/711 [07:37<23:07, 2.33s/it]
410
  16%|β–ˆβ–‹ | 116/711 [07:39<23:01, 2.32s/it
411
+ 0: {'loss': 0.5358, 'grad_norm': 0.8178819353819496, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.1}
412
+ 0: {'loss': 0.5179, 'grad_norm': 1.6879902769065394, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.11}
413
+ 0: ]
414
  16%|β–ˆβ–‹ | 117/711 [07:42<22:57, 2.32s/it]
415
  17%|β–ˆβ–‹ | 118/711 [07:44<22:52, 2.31s/it]
416
  17%|β–ˆβ–‹ | 119/711 [07:46<22:48, 2.31s/it]
417
  17%|β–ˆβ–‹ | 120/711 [07:49<22:46, 2.31s/it]
418
 
419
  17%|β–ˆβ–‹ | 120/711 [07:49<22:46, 2.31s/it]
420
  17%|β–ˆβ–‹ | 121/711 [07:51<22:45, 2.31s/it]
421
  17%|β–ˆβ–‹ | 122/711 [07:53<22:45, 2.32s/it]
422
  17%|β–ˆβ–‹ | 123/711 [07:56<22:42, 2.32s/it]
423
  17%|β–ˆβ–‹ | 124/711 [07:58<22:42, 2.32s/it]
424
  18%|β–ˆβ–Š | 125/711 [08:00<22:52, 2.34s/it]
425
  18%|β–ˆβ–Š | 126/711 [08:03<23:14, 2.38s/it]
426
  18%|β–ˆβ–Š | 127/711 [08:05<23:21, 2.40s/it]
427
  18%|β–ˆβ–Š | 128/711 [08:07<23:04, 2.38s/it]
428
  18%|β–ˆβ–Š | 129/711 [08:10<22:51, 2.36s/it]
429
  18%|β–ˆβ–Š | 130/711 [08:12<22:42, 2.35s/it]
430
 
431
  18%|β–ˆβ–Š | 130/711 [08:12<22:42, 2.35s/it]
432
  18%|β–ˆβ–Š | 131/711 [08:14<22:36, 2.34s/it]
433
  1
434
+ 0: {'loss': 0.5223, 'grad_norm': 0.8227895864412552, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.12}
435
+ 0: 9%|β–ˆβ–Š | 132/711 [08:17<22:30, 2.33s/it]
436
  19%|β–ˆβ–Š | 133/711 [08:19<22:31, 2.34s/it]
437
  19%|β–ˆβ–‰ | 134/711 [08:21<22:25, 2.33s/it]
438
  19%|β–ˆβ–‰ | 135/711 [08:24<22:19, 2.33s/it]
439
  19%|β–ˆβ–‰ | 136/711 [08:26<22:15, 2.32s/it]
440
  19%|β–ˆβ–‰ | 137/711 [08:28<22:10, 2.32s/it]
441
  19%|β–ˆβ–‰ | 138/711 [08:31<22:05, 2.31s/it]
442
  20%|β–ˆβ–‰ | 139/711 [08:33<22:00, 2.31s/it]
443
  20%|β–ˆβ–‰ | 140/711 [08:35<21:57, 2.31s/it]
444
 
445
  20%|β–ˆβ–‰ | 140/711 [08:35<21:57, 2.31s/it]
446
  20%|β–ˆβ–‰ | 141/711 [08:38<21:56, 2.31s/it]
447
  20%|β–ˆβ–‰ | 142/711 [08:40<22:05, 2.33s/it]
448
  20%|β–ˆβ–ˆ | 143/711 [08:42<22:07, 2.34s/it]
449
  20%|β–ˆβ–ˆ | 144/711 [08:45<22:03, 2.34s/it]
450
  20%|β–ˆβ–ˆ | 145/711 [08:47<21:58, 2.33s/it]
451
  21%|β–ˆβ–ˆ | 146/711 [08:49<21:53, 2.32s/it]
452
  21%|β–ˆβ–ˆ | 147/711 [08:52<21:49, 2.32s/it]
453
  21%|β–ˆβ–ˆ | 148/711 [08:54<21:45, 2.32s/it]
454
  21
455
+ 0: {'loss': 0.523, 'grad_norm': 0.7452356447124456, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.13}
456
+ 0: {'loss': 0.5237, 'grad_norm': 0.8791556578937845, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.13}
457
+ 0: %|β–ˆβ–ˆ | 149/711 [08:56<21:41, 2.32s/it]
458
  21%|β–ˆβ–ˆ | 150/711 [08:58<21:38, 2.32s/it]
459
 
460
  21%|β–ˆβ–ˆ | 150/711 [08:59<21:38, 2.32s/it]
461
  21%|β–ˆβ–ˆ | 151/711 [09:01<21:35, 2.31s/it]
462
  21%|β–ˆβ–ˆβ– | 152/711 [09:03<21:40, 2.33s/it]
463
  22%|β–ˆβ–ˆβ– | 153/711 [09:05<21:36, 2.32s/it]
464
  22%|β–ˆβ–ˆβ– | 154/711 [09:08<21:31, 2.32s/it]
465
  22%|β–ˆβ–ˆβ– | 155/711 [09:10<21:26, 2.31s/it]
466
  22%|β–ˆβ–ˆβ– | 156/711 [09:12<21:22, 2.31s/it]
467
  22%|β–ˆβ–ˆβ– | 157/711 [09:15<21:20, 2.31s/it]
468
  22%|β–ˆβ–ˆβ– | 158/711 [09:17<21:20, 2.31s/it]
469
  22%|β–ˆβ–ˆβ– | 159/711 [09:19<21:41, 2.36s/it]
470
  23%|β–ˆβ–ˆβ–Ž | 160/711 [09:22<21:36, 2.35s/it]
471
 
472
  23%|β–ˆβ–ˆβ–Ž | 160/711 [09:22<21:36, 2.35s/it]
473
  23%|β–ˆβ–ˆβ–Ž | 161/711 [09:24<21:28, 2.34s/it]
474
  23%|β–ˆβ–ˆβ–Ž | 162/711 [09:26<21:23, 2.34s/it]
475
  23%|β–ˆβ–ˆβ–Ž | 163/711 [09:29
476
+ 0: {'loss': 0.5143, 'grad_norm': 0.7496360454577663, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.14}
477
+ 0: <21:15, 2.33s/it]
478
  23%|β–ˆβ–ˆβ–Ž | 164/711 [09:31<21:12, 2.33s/it]
479
  23%|β–ˆβ–ˆβ–Ž | 165/711 [09:33<21:08, 2.32s/it]
480
  23%|β–ˆβ–ˆβ–Ž | 166/711 [09:36<21:17, 2.34s/it]
481
  23%|β–ˆβ–ˆβ–Ž | 167/711 [09:38<21:09, 2.33s/it]
482
  24%|β–ˆβ–ˆβ–Ž | 168/711 [09:40<21:04, 2.33s/it]
483
  24%|β–ˆβ–ˆβ– | 169/711 [09:43<20:59, 2.32s/it]
484
  24%|β–ˆβ–ˆβ– | 170/711 [09:45<20:54, 2.32s/it]
485
 
486
  24%|β–ˆβ–ˆβ– | 170/711 [09:45<20:54, 2.32s/it]
487
  24%|β–ˆβ–ˆβ– | 171/711 [09:47<20:52, 2.32s/it]
488
  24%|β–ˆβ–ˆβ– | 172/711 [09:50<20:49, 2.32s/it]
489
  24%|β–ˆβ–ˆβ– | 173/711 [09:52<20:46, 2.32s/it]
490
  24%|β–ˆβ–ˆβ– | 174/711 [09:54<20:44, 2.32s/it]
491
  25%|β–ˆβ–ˆβ– | 175/711 [09:57<20:48, 2.33s/it]
492
  25%|β–ˆβ–ˆβ– | 176/711 [09:59<20:53, 2.34s/it]
493
  25%|β–ˆβ–ˆβ– | 177/711 [10:01<20:46, 2.33s/it]
494
  25%|β–ˆβ–ˆβ–Œ | 178/711 [10:04<20:53, 2.35s/it]
495
  25%|β–ˆβ–ˆβ–Œ | 179/711 [10:06<20:45, 2.34s/it]
496
  2
497
+ 0: {'loss': 0.5072, 'grad_norm': 0.7656965770735714, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.15}
498
+ 0: {'loss': 0.5029, 'grad_norm': 0.7795187884752995, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.16}
499
+ 0: 5%|β–ˆβ–ˆβ–Œ | 180/711 [10:08<20:39, 2.33s/it]
500
 
501
  25%|β–ˆβ–ˆβ–Œ | 180/711 [10:08<20:39, 2.33s/it]
502
  25%|β–ˆβ–ˆβ–Œ | 181/711 [10:11<20:33, 2.33s/it]
503
  26%|β–ˆβ–ˆβ–Œ | 182/711 [10:13<20:27, 2.32s/it]
504
  26%|β–ˆβ–ˆβ–Œ | 183/711 [10:15<20:23, 2.32s/it]
505
  26%|β–ˆβ–ˆβ–Œ | 184/711 [10:18<20:19, 2.31s/it]
506
  26%|β–ˆβ–ˆβ–Œ | 185/711 [10:20<20:15, 2.31s/it]
507
  26%|β–ˆβ–ˆβ–Œ | 186/711 [10:22<20:13, 2.31s/it]
508
  26%|β–ˆβ–ˆβ–‹ | 187/711 [10:25<20:09, 2.31s/it]
509
  26%|β–ˆβ–ˆβ–‹ | 188/711 [10:27<20:20, 2.33s/it]
510
  27%|β–ˆβ–ˆβ–‹ | 189/711 [10:29<20:14, 2.33s/it]
511
  27%|β–ˆβ–ˆβ–‹ | 190/711 [10:32<20:44, 2.39s/it]
512
 
513
  27%|β–ˆβ–ˆβ–‹ | 190/711 [10:32<20:44, 2.39s/it]
514
  27%|β–ˆβ–ˆβ–‹ | 191/711 [10:34<20:38, 2.38s/it]
515
  27%|β–ˆβ–ˆβ–‹ | 192/711 [10:37<20:32, 2.37s/it]
516
  27%|β–ˆβ–ˆβ–‹ | 193/711 [10:39<20:19, 2.35s/it]
517
  27%|β–ˆβ–ˆβ–‹ | 194/7
518
+ 0: {'loss': 0.5088, 'grad_norm': 0.8707955733484418, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.17}
519
+ 0: 11 [10:41<20:10, 2.34s/it]
520
  27%|β–ˆβ–ˆβ–‹ | 195/711 [10:43<20:06, 2.34s/it]
521
  28%|β–ˆβ–ˆβ–Š | 196/711 [10:46<19:58, 2.33s/it]
522
  28%|β–ˆβ–ˆβ–Š | 197/711 [10:48<20:00, 2.34s/it]
523
  28%|β–ˆβ–ˆβ–Š | 198/711 [10:51<20:08, 2.36s/it]
524
  28%|β–ˆβ–ˆβ–Š | 199/711 [10:53<19:59, 2.34s/it]
525
  28%|β–ˆβ–ˆβ–Š | 200/711 [10:55<19:52, 2.33s/it]
526
 
527
  28%|β–ˆβ–ˆβ–Š | 200/711 [10:55<19:52, 2.33s/it]
528
  28%|β–ˆβ–ˆβ–Š | 201/711 [10:57<19:47, 2.33s/it]
529
  28%|β–ˆβ–ˆβ–Š | 202/711 [11:00<19:40, 2.32s/it]
530
  29%|β–ˆβ–ˆβ–Š | 203/711 [11:02<19:37, 2.32s/it]
531
  29%|β–ˆβ–ˆβ–Š | 204/711 [11:04<19:45, 2.34s/it]
532
  29%|β–ˆβ–ˆβ–‰ | 205/711 [11:07<19:39, 2.33s/it]
533
  29%|β–ˆβ–ˆβ–‰ | 206/711 [11:09<19:37, 2.33s/it]
534
  29%|β–ˆβ–ˆβ–‰ | 207/711 [11:12<20:12, 2.41s/it]
535
  29%|β–ˆβ–ˆβ–‰ | 208/711 [11:14<20:01, 2.39s/it]
536
  29%|β–ˆβ–ˆβ–‰ | 209/711 [11:16<19:46, 2.36s/it]
537
  30%|β–ˆβ–ˆβ–‰ | 210/711 [11:19<19:37, 2.3
538
+ 0: {'loss': 0.5012, 'grad_norm': 0.8320228094655582, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.18}
539
+ 0: {'loss': 0.5124, 'grad_norm': 1.083294587127778, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.19}
540
+ 0: 5s/it]
541
 
542
  30%|β–ˆβ–ˆβ–‰ | 210/711 [11:19<19:37, 2.35s/it]
543
  30%|β–ˆβ–ˆβ–‰ | 211/711 [11:21<19:30, 2.34s/it]
544
  30%|β–ˆβ–ˆβ–‰ | 212/711 [11:23<19:23, 2.33s/it]
545
  30%|β–ˆβ–ˆβ–‰ | 213/711 [11:26<19:16, 2.32s/it]
546
  30%|β–ˆβ–ˆβ–ˆ | 214/711 [11:28<19:12, 2.32s/it]
547
  30%|β–ˆβ–ˆβ–ˆ | 215/711 [11:30<19:09, 2.32s/it]
548
  30%|β–ˆβ–ˆβ–ˆ | 216/711 [11:33<19:06, 2.32s/it]
549
  31%|β–ˆβ–ˆβ–ˆ | 217/711 [11:35<19:03, 2.31s/it]
550
  31%|β–ˆβ–ˆβ–ˆ | 218/711 [11:37<19:03, 2.32s/it]
551
  31%|β–ˆβ–ˆβ–ˆ | 219/711 [11:39<19:01, 2.32s/it]
552
  31%|β–ˆβ–ˆβ–ˆ | 220/711 [11:42<18:59, 2.32s/it]
553
 
554
  31%|β–ˆβ–ˆβ–ˆ | 220/711 [11:42<18:59, 2.32s/it]
555
  31%|β–ˆβ–ˆβ–ˆ | 221/711 [11:44<18:58, 2.32s/it]
556
  31%|β–ˆβ–ˆβ–ˆ | 222/711 [11:47<19:09, 2.35s/it]
557
  31%|β–ˆβ–ˆβ–ˆβ– | 223/711 [11:49<19:11, 2.36s/it]
558
  32%|β–ˆβ–ˆβ–ˆβ– | 224/711 [11:51<19:07, 2.36s/it]
559
  32%|β–ˆβ–ˆβ–ˆοΏ½
560
+ 0: {'loss': 0.4994, 'grad_norm': 0.7355209967169852, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.19}
561
+ 0: οΏ½οΏ½ | 225/711 [11:54<18:57, 2.34s/it]
562
  32%|β–ˆβ–ˆβ–ˆβ– | 226/711 [11:56<19:01, 2.35s/it]
563
  32%|β–ˆβ–ˆβ–ˆβ– | 227/711 [11:58<18:52, 2.34s/it]
564
  32%|β–ˆβ–ˆβ–ˆβ– | 228/711 [12:01<18:45, 2.33s/it]
565
  32%|β–ˆβ–ˆβ–ˆβ– | 229/711 [12:03<18:40, 2.33s/it]
566
  32%|β–ˆβ–ˆβ–ˆβ– | 230/711 [12:05<18:35, 2.32s/it]
567
 
568
  32%|β–ˆβ–ˆβ–ˆβ– | 230/711 [12:05<18:35, 2.32s/it]
569
  32%|β–ˆβ–ˆβ–ˆβ– | 231/711 [12:08<18:32, 2.32s/it]
570
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 232/711 [12:10<18:30, 2.32s/it]
571
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 233/711 [12:12<18:28, 2.32s/it]
572
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 234/711 [12:14<18:24, 2.32s/it]
573
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 235/711 [12:17<18:21, 2.31s/it]
574
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 236/711 [12:19<18:19, 2.31s/it]
575
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 237/711 [12:21<18:15, 2.31s/it]
576
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 238/711 [12:24<18:13, 2.31s/it]
577
  34%|β–ˆβ–ˆβ–ˆβ–Ž | 239/711 [12:26<18:18, 2.33s/it]
578
  34%|β–ˆβ–ˆβ–ˆβ– | 240/711 [12:28<18:20, 2.34s/it]
579
 
580
+ 0: {'loss': 0.5051, 'grad_norm': 0.7864641958494194, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.2}
581
+ 0: {'loss': 0.4913, 'grad_norm': 0.8505484395139187, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.21}
582
+ 0:
583
  34%|β–ˆβ–ˆβ–ˆβ– | 240/711 [12:28<18:20, 2.34s/it]
584
  34%|β–ˆβ–ˆβ–ˆβ– | 241/711 [12:31<18:15, 2.33s/it]
585
  34%|β–ˆβ–ˆβ–ˆβ– | 242/711 [12:33<18:09, 2.32s/it]
586
  34%|β–ˆβ–ˆβ–ˆβ– | 243/711 [12:35<18:11, 2.33s/it]
587
  34%|β–ˆβ–ˆβ–ˆβ– | 244/711 [12:38<18:05, 2.32s/it]
588
  34%|β–ˆβ–ˆβ–ˆβ– | 245/711 [12:40<18:00, 2.32s/it]
589
  35%|β–ˆβ–ˆβ–ˆβ– | 246/711 [12:42<17:56, 2.32s/it]
590
  35%|β–ˆβ–ˆβ–ˆβ– | 247/711 [12:45<17:53, 2.31s/it]
591
  35%|β–ˆβ–ˆβ–ˆβ– | 248/711 [12:47<17:51, 2.31s/it]
592
  35%|β–ˆβ–ˆβ–ˆβ–Œ | 249/711 [12:49<17:49, 2.31s/it]
593
  35%|β–ˆβ–ˆβ–ˆβ–Œ | 250/711 [12:52<17:46, 2.31s/it]
594
 
595
  35%|β–ˆβ–ˆβ–ˆβ–Œ | 250/711 [12:52<17:46, 2.31s/it]
596
  35%|β–ˆβ–ˆβ–ˆβ–Œ | 251/711 [12:54<17:44, 2.31s/it]
597
  35%|β–ˆβ–ˆβ–ˆβ–Œ | 252/711 [12:56<17:42, 2.32s/it]
598
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 253/711 [12:59<17:41, 2.32s/it]
599
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 254/711 [13:01<17:38, 2.32s/
600
+ 0: {'loss': 0.4871, 'grad_norm': 0.8233442983825041, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.22}
601
+ 0: it]
602
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 255/711 [13:03<17:45, 2.34s/it]
603
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 256/711 [13:06<17:52, 2.36s/it]
604
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 257/711 [13:08<17:45, 2.35s/it]
605
  36%|β–ˆβ–ˆβ–ˆβ–‹ | 258/711 [13:10<17:40, 2.34s/it]
606
  36%|β–ˆβ–ˆβ–ˆβ–‹ | 259/711 [13:13<17:34, 2.33s/it]
607
  37%|β–ˆβ–ˆβ–ˆβ–‹ | 260/711 [13:15<17:29, 2.33s/it]
608
 
609
  37%|β–ˆβ–ˆβ–ˆβ–‹ | 260/711 [13:15<17:29, 2.33s/it]
610
  37%|β–ˆβ–ˆβ–ˆβ–‹ | 261/711 [13:17<17:25, 2.32s/it]
611
  37%|β–ˆβ–ˆβ–ˆβ–‹ | 262/711 [13:20<17:21, 2.32s/it]
612
  37%|β–ˆβ–ˆβ–ˆβ–‹ | 263/711 [13:22<17:17, 2.32s/it]
613
  37%|β–ˆβ–ˆβ–ˆβ–‹ | 264/711 [13:24<17:15, 2.32s/it]
614
  37%|β–ˆβ–ˆβ–ˆβ–‹ | 265/711 [13:26<17:13, 2.32s/it]
615
  37%|β–ˆβ–ˆβ–ˆβ–‹ | 266/711 [13:29<17:10, 2.32s/it]
616
  38%|β–ˆβ–ˆβ–ˆβ–Š | 267/711 [13:31<17:07, 2.31s/it]
617
  38%|β–ˆβ–ˆβ–ˆβ–Š | 268/711 [13:34<17:16, 2.34s/it]
618
  38%|β–ˆβ–ˆβ–ˆβ–Š | 269/711 [13:36<17:13, 2.34s/it]
619
  38%|β–ˆβ–ˆβ–ˆβ–Š | 270/711 [13:38<1
620
+ 0: {'loss': 0.4887, 'grad_norm': 0.7977283697648062, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.23}
621
+ 0: {'loss': 0.501, 'grad_norm': 0.788114718310765, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.24}
622
+ 0: 7:08, 2.33s/it]
623
 
624
  38%|β–ˆβ–ˆβ–ˆβ–Š | 270/711 [13:38<17:08, 2.33s/it]
625
  38%|β–ˆβ–ˆβ–ˆβ–Š | 271/711 [13:41<17:12, 2.35s/it]
626
  38%|β–ˆβ–ˆβ–ˆβ–Š | 272/711 [13:43<17:13, 2.35s/it]
627
  38%|β–ˆβ–ˆβ–ˆβ–Š | 273/711 [13:45<17:07, 2.35s/it]
628
  39%|β–ˆβ–ˆβ–ˆβ–Š | 274/711 [13:48<17:10, 2.36s/it]
629
  39%|β–ˆβ–ˆβ–ˆβ–Š | 275/711 [13:50<17:06, 2.35s/it]
630
  39%|β–ˆβ–ˆβ–ˆβ–‰ | 276/711 [13:52<16:58, 2.34s/it]
631
  39%|β–ˆβ–ˆβ–ˆβ–‰ | 277/711 [13:55<16:53, 2.34s/it]
632
  39%|β–ˆβ–ˆβ–ˆβ–‰ | 278/711 [13:57<16:49, 2.33s/it]
633
  39%|β–ˆβ–ˆβ–ˆβ–‰ | 279/711 [13:59<16:44, 2.33s/it]
634
  39%|β–ˆβ–ˆβ–ˆβ–‰ | 280/711 [14:02<16:40, 2.32s/it]
635
 
636
  39%|β–ˆβ–ˆβ–ˆβ–‰ | 280/711 [14:02<16:40, 2.32s/it]
637
  40%|β–ˆβ–ˆβ–ˆβ–‰ | 281/711 [14:04<16:36, 2.32s/it]
638
  40%|β–ˆβ–ˆβ–ˆβ–‰ | 282/711 [14:06<16:34, 2.32s/it]
639
  40%|β–ˆβ–ˆβ–ˆβ–‰ | 283/711 [14:09<16:34, 2.32s/it]
640
  40%|β–ˆβ–ˆβ–ˆβ–‰ | 284/711 [1
641
+ 0: {'loss': 0.4765, 'grad_norm': 0.7750954499136393, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.24}
642
+ 0: 4:11<16:41, 2.35s/it]
643
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 285/711 [14:13<16:36, 2.34s/it]
644
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 286/711 [14:16<16:31, 2.33s/it]
645
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 287/711 [14:18<16:43, 2.37s/it]
646
  41%|β–ˆβ–ˆβ–ˆβ–ˆ | 288/711 [14:20<16:41, 2.37s/it]
647
  41%|β–ˆβ–ˆβ–ˆβ–ˆ | 289/711 [14:23<16:31, 2.35s/it]
648
  41%|β–ˆβ–ˆβ–ˆβ–ˆ | 290/711 [14:25<16:26, 2.34s/it]
649
 
650
  41%|β–ˆβ–ˆβ–ˆβ–ˆ | 290/711 [14:25<16:26, 2.34s/it]
651
  41%|β–ˆβ–ˆβ–ˆβ–ˆ | 291/711 [14:27<16:20, 2.33s/it]
652
  41%|β–ˆβ–ˆβ–ˆβ–ˆ | 292/711 [14:30<16:25, 2.35s/it]
653
  41%|β–ˆβ–ˆβ–ˆβ–ˆ | 293/711 [14:32<16:24, 2.35s/it]
654
  41%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 294/711 [14:34<16:18, 2.35s/it]
655
  41%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 295/711 [14:37<16:13, 2.34s/it]
656
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 296/711 [14:39<16:08, 2.33s/it]
657
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 297/711 [14:41<16:13, 2.35s/it]
658
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 298/711 [14:44<16:07, 2.34s/it]
659
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 299/711 [14:46<16:03, 2.34s/it]
660
  42%|β–ˆοΏ½
661
+ 0: {'loss': 0.488, 'grad_norm': 0.778477888845856, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.25}
662
+ 0: {'loss': 0.4871, 'grad_norm': 0.7785532844235397, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.26}
663
+ 0: οΏ½β–ˆβ–ˆβ– | 300/711 [14:48<15:59, 2.33s/it]
664
 
665
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 300/711 [14:48<15:59, 2.33s/it]
666
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 301/711 [14:51<15:55, 2.33s/it]
667
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 302/711 [14:53<15:50, 2.32s/it]
668
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 303/711 [14:55<15:54, 2.34s/it]
669
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 304/711 [14:58<15:55, 2.35s/it]
670
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 305/711 [15:00<15:51, 2.34s/it]
671
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 306/711 [15:02<15:46, 2.34s/it]
672
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 307/711 [15:05<15:42, 2.33s/it]
673
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 308/711 [15:07<15:37, 2.33s/it]
674
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 309/711 [15:10<15:47, 2.36s/it]
675
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 310/711 [15:12<15:52, 2.37s/it]
676
 
677
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 310/711 [15:12<15:52, 2.37s/it]
678
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 311/711 [15:14<15:42, 2.36s/it]
679
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 312/711 [15:17<15:49, 2.38s/it]
680
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 313/7
681
+ 0: {'loss': 0.4915, 'grad_norm': 0.8063698152361907, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.27}
682
+ 0: 11 [15:19<16:01, 2.42s/it]
683
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 314/711 [15:22<15:55, 2.41s/it]
684
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 315/711 [15:24<15:44, 2.38s/it]
685
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 316/711 [15:26<15:33, 2.36s/it]
686
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 317/711 [15:29<15:25, 2.35s/it]
687
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 318/711 [15:31<15:25, 2.35s/it]
688
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 319/711 [15:33<15:25, 2.36s/it]
689
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 320/711 [15:36<15:22, 2.36s/it]
690
 
691
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 320/711 [15:36<15:22, 2.36s/it]
692
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 321/711 [15:38<15:45, 2.43s/it]
693
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 322/711 [15:41<15:30, 2.39s/it]
694
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 323/711 [15:43<16:06, 2.49s/it]
695
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 324/711 [15:46<16:30, 2.56s/it]
696
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 325/711 [15:48<15:59, 2.49s/it]
697
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 326/711 [15:51<16:07, 2.51s/it]
698
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 327/711 [15:53<15:44, 2.46s/it]
699
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 328/711 [15:55<15:
700
+ 0: {'loss': 0.4894, 'grad_norm': 0.7798282062358487, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.28}
701
+ 0: {'loss': 0.4825, 'grad_norm': 0.750224606954942, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.29}
702
+ 0: 26, 2.42s/it]
703
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 329/711 [15:58<15:13, 2.39s/it]
704
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 330/711 [16:00<15:07, 2.38s/it]
705
 
706
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 330/711 [16:00<15:07, 2.38s/it]
707
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 331/711 [16:03<14:57, 2.36s/it]
708
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 332/711 [16:05<14:49, 2.35s/it]
709
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 333/711 [16:07<15:21, 2.44s/it]
710
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 334/711 [16:10<15:12, 2.42s/it]
711
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 335/711 [16:12<15:12, 2.43s/it]
712
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 336/711 [16:15<14:57, 2.39s/it]
713
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 337/711 [16:17<14:46, 2.37s/it]
714
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 338/711 [16:19<14:39, 2.36s/it]
715
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 339/711 [16:22<14:32, 2.35s/it]
716
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 340/711 [16:24<14:27, 2.34s/it]
717
 
718
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 340/711 [16:24<14:27, 2.34s/it]
719
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 341/711 [16:26<14:23, 2.33s/it]
720
  48%|οΏ½
721
+ 0: {'loss': 0.4856, 'grad_norm': 0.736611045158727, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.3}
722
+ 0: οΏ½β–ˆβ–ˆβ–ˆβ–Š | 342/711 [16:29<14:19, 2.33s/it]
723
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 343/711 [16:31<14:15, 2.33s/it]
724
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 344/711 [16:33<14:14, 2.33s/it]
725
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 345/711 [16:35<14:08, 2.32s/it]
726
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 346/711 [16:38<14:38, 2.41s/it]
727
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 347/711 [16:40<14:25, 2.38s/it]
728
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 348/711 [16:43<14:14, 2.35s/it]
729
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 349/711 [16:45<14:06, 2.34s/it]
730
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 350/711 [16:47<14:06, 2.35s/it]
731
 
732
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 350/711 [16:47<14:06, 2.35s/it]
733
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 351/711 [16:50<14:07, 2.36s/it]
734
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 352/711 [16:52<14:00, 2.34s/it]
735
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 353/711 [16:54<13:55, 2.33s/it]
736
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 354/711 [16:57<13:50, 2.33s/it]
737
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 355/711 [16:59<13:47, 2.32s/it]
738
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 356/711 [17:01<13:44, 2.32s/it]
739
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ
740
+ 0: {'loss': 0.4927, 'grad_norm': 0.7853737850371227, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.3}
741
+ 0: {'loss': 0.4881, 'grad_norm': 0.7490924239534897, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.31}
742
+ 0: | 357/711 [17:04<13:41, 2.32s/it]
743
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 358/711 [17:06<13:38, 2.32s/it]
744
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 359/711 [17:08<13:55, 2.37s/it]
745
  51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 360/711 [17:11<13:46, 2.35s/it]
746
 
747
  51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 360/711 [17:11<13:46, 2.35s/it]
748
  51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 361/711 [17:13<13:40, 2.34s/it]
749
  51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 362/711 [17:15<13:35, 2.34s/it]
750
  51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 363/711 [17:18<13:30, 2.33s/it]
751
  51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 364/711 [17:20<13:26, 2.32s/it]
752
  51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 365/711 [17:22<13:27, 2.33s/it]
753
  51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 366/711 [17:25<13:28, 2.34s/it]
754
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 367/711 [17:27<13:29, 2.35s/it]
755
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 368/711 [17:29<13:23, 2.34s/it]
756
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 369/711 [17:32<13:19, 2.34s/it]
757
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 370/711 [17:34<13:14, 2.33s/it]
758
 
759
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 3
760
+ 0: {'loss': 0.4889, 'grad_norm': 0.7921991687866194, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.32}
761
+ 0: 70/711 [17:34<13:14, 2.33s/it]
762
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 371/711 [17:36<13:11, 2.33s/it]
763
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 372/711 [17:39<13:08, 2.33s/it]
764
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 373/711 [17:41<13:07, 2.33s/it]
765
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 374/711 [17:43<13:04, 2.33s/it]
766
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 375/711 [17:46<13:01, 2.33s/it]
767
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 376/711 [17:48<12:57, 2.32s/it]
768
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 377/711 [17:50<12:54, 2.32s/it]
769
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 378/711 [17:53<12:53, 2.32s/it]
770
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 379/711 [17:55<12:57, 2.34s/it]
771
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 380/711 [17:57<12:52, 2.33s/it]
772
 
773
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 380/711 [17:57<12:52, 2.33s/it]
774
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 381/711 [18:00<12:57, 2.35s/it]
775
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 382/711 [18:02<12:56, 2.36s/it]
776
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 383/711 [18:05<12:55, 2.36s/it]
777
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 384/711 [18:07<12:48, 2.35s/it]
778
  54%|β–ˆβ–ˆ
779
+ 0: {'loss': 0.4822, 'grad_norm': 0.8102116642711951, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.33}
780
+ 0: β–ˆβ–ˆβ–ˆβ– | 385/711 [18:09<12:42, 2.34s/it]
781
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 386/711 [18:11<12:38, 2.33s/it]
782
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 387/711 [18:14<12:33, 2.33s/it]
783
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 388/711 [18:16<12:30, 2.32s/it]
784
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 389/711 [18:18<12:28, 2.32s/it]
785
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 390/711 [18:21<12:28, 2.33s/it]
786
 
787
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 390/711 [18:21<12:28, 2.33s/it]
788
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 391/711 [18:23<12:24, 2.33s/it]
789
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 392/711 [18:25<12:24, 2.34s/it]
790
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 393/711 [18:28<12:20, 2.33s/it]
791
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 394/711 [18:30<12:16, 2.32s/it]
792
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 395/711 [18:32<12:22, 2.35s/it]
793
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 396/711 [18:35<12:16, 2.34s/it]
794
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 397/711 [18:37<12:11, 2.33s/it]
795
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 398/711 [18:39<12:11, 2.34s/it]
796
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 399/711 [18:42<12:12, 2.
797
+ 0: {'loss': 0.4678, 'grad_norm': 0.7889843890610096, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.34}
798
+ 0: {'loss': 0.4732, 'grad_norm': 0.7803377614587503, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.35}
799
+ 0: 35s/it]
800
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 400/711 [18:44<12:07, 2.34s/it]
801
 
802
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 400/711 [18:44<12:07, 2.34s/it]
803
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 401/711 [18:46<12:02, 2.33s/it]
804
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 402/711 [18:49<11:59, 2.33s/it]
805
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 403/711 [18:51<11:56, 2.32s/it]
806
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 404/711 [18:53<11:52, 2.32s/it]
807
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 405/711 [18:56<12:01, 2.36s/it]
808
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 406/711 [18:58<11:54, 2.34s/it]
809
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 407/711 [19:00<11:49, 2.33s/it]
810
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 408/711 [19:03<11:44, 2.33s/it]
811
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 409/711 [19:05<11:48, 2.35s/it]
812
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 410/711 [19:07<11:42, 2.33s/it]
813
 
814
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 410/711 [19:07<11:42, 2.33s/it]
815
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 411/711 [19:10<11:45, 2.35s/it]
816
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 412/711 [19:12<11
817
+ 0: {'loss': 0.4773, 'grad_norm': 0.7786794033275539, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.35}
818
+ 0: :38, 2.34s/it]
819
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 413/711 [19:14<11:34, 2.33s/it]
820
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 414/711 [19:17<11:35, 2.34s/it]
821
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 415/711 [19:19<11:35, 2.35s/it]
822
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 416/711 [19:22<11:37, 2.36s/it]
823
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 417/711 [19:24<11:30, 2.35s/it]
824
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 418/711 [19:26<11:26, 2.34s/it]
825
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 419/711 [19:29<11:22, 2.34s/it]
826
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 420/711 [19:31<11:18, 2.33s/it]
827
 
828
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 420/711 [19:31<11:18, 2.33s/it]
829
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 421/711 [19:33<11:15, 2.33s/it]
830
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 422/711 [19:36<11:11, 2.32s/it]
831
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 423/711 [19:38<11:07, 2.32s/it]
832
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 424/711 [19:40<11:06, 2.32s/it]
833
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 425/711 [19:42<11:02, 2.32s/it]
834
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 426/711 [19:45<11:00, 2.32s/it]
835
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ
836
+ 0: {'loss': 0.4759, 'grad_norm': 0.7409304393739385, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.36}
837
+ 0: {'loss': 0.4784, 'grad_norm': 0.7489672735206069, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.37}
838
+ 0: | 427/711 [19:47<10:58, 2.32s/it]
839
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 428/711 [19:49<11:02, 2.34s/it]
840
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 429/711 [19:52<10:58, 2.34s/it]
841
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 430/711 [19:54<10:59, 2.35s/it]
842
 
843
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 430/711 [19:54<10:59, 2.35s/it]
844
  61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 431/711 [19:57<10:59, 2.35s/it]
845
  61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 432/711 [19:59<10:53, 2.34s/it]
846
  61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 433/711 [20:01<10:49, 2.34s/it]
847
  61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 434/711 [20:03<10:44, 2.33s/it]
848
  61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 435/711 [20:06<10:40, 2.32s/it]
849
  61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 436/711 [20:08<10:42, 2.33s/it]
850
  61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 437/711 [20:11<10:40, 2.34s/it]
851
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 438/711 [20:13<10:36, 2.33s/it]
852
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 439/711 [20:15<10:32, 2.33s/it]
853
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 440/711 [20:17<10:29, 2.32s/it]
854
 
855
  62%|β–ˆοΏ½
856
+ 0: {'loss': 0.4716, 'grad_norm': 0.7729942449390255, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.38}
857
+ 0: οΏ½οΏ½β–ˆβ–ˆβ–ˆβ–ˆβ– | 440/711 [20:17<10:29, 2.32s/it]
858
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 441/711 [20:20<10:27, 2.32s/it]
859
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 442/711 [20:22<10:24, 2.32s/it]
860
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 443/711 [20:24<10:21, 2.32s/it]
861
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 444/711 [20:27<10:18, 2.32s/it]
862
  63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 445/711 [20:29<10:36, 2.39s/it]
863
  63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 446/711 [20:32<10:31, 2.38s/it]
864
  63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 447/711 [20:34<10:41, 2.43s/it]
865
  63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 448/711 [20:37<10:30, 2.40s/it]
866
  63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 449/711 [20:39<10:21, 2.37s/it]
867
  63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 450/711 [20:41<10:14, 2.35s/it]
868
 
869
  63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 450/711 [20:41<10:14, 2.35s/it]
870
  63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 451/711 [20:43<10:08, 2.34s/it]
871
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 452/711 [20:46<10:17, 2.39s/it]
872
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 453/711 [20:48<10:09, 2.36s/it]
873
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆοΏ½
874
+ 0: {'loss': 0.467, 'grad_norm': 0.7403462320672021, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.39}
875
+ 0: �▍ | 454/711 [20:51<10:03, 2.35s/it]
876
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 455/711 [20:53<09:58, 2.34s/it]
877
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 456/711 [20:55<09:54, 2.33s/it]
878
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 457/711 [20:57<09:50, 2.32s/it]
879
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 458/711 [21:00<09:46, 2.32s/it]
880
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 459/711 [21:02<09:44, 2.32s/it]
881
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 460/711 [21:04<09:42, 2.32s/it]
882
 
883
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 460/711 [21:04<09:42, 2.32s/it]
884
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 461/711 [21:07<09:40, 2.32s/it]
885
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 462/711 [21:09<09:41, 2.34s/it]
886
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 463/711 [21:12<09:42, 2.35s/it]
887
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 464/711 [21:14<09:37, 2.34s/it]
888
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 465/711 [21:16<09:33, 2.33s/it]
889
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 466/711 [21:18<09:29, 2.32s/it]
890
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 467/711 [21:21<09:26, 2.32s/it]
891
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 468/
892
+ 0: {'loss': 0.4727, 'grad_norm': 0.7765476983805598, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.4}
893
+ 0: {'loss': 0.4761, 'grad_norm': 0.7166795921281778, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.4}
894
+ 0: 711 [21:23<09:23, 2.32s/it]
895
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 469/711 [21:25<09:26, 2.34s/it]
896
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 470/711 [21:28<09:22, 2.33s/it]
897
 
898
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 470/711 [21:28<09:22, 2.33s/it]
899
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 471/711 [21:30<09:19, 2.33s/it]
900
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 472/711 [21:32<09:16, 2.33s/it]
901
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 473/711 [21:35<09:13, 2.33s/it]
902
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 474/711 [21:37<09:11, 2.33s/it]
903
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 475/711 [21:39<09:08, 2.32s/it]
904
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 476/711 [21:42<09:10, 2.34s/it]
905
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 477/711 [21:44<09:06, 2.34s/it]
906
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 478/711 [21:46<09:07, 2.35s/it]
907
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 479/711 [21:49<09:06, 2.36s/it]
908
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 480/711 [21:51<09:02, 2.35s/it]
909
 
910
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 480/711 [21:51<09:02, 2.35s/i
911
+ 0: {'loss': 0.4545, 'grad_norm': 0.7592461340919713, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.41}
912
+ 0: t]
913
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 481/711 [21:54<09:03, 2.36s/it]
914
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 482/711 [21:56<08:58, 2.35s/it]
915
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 483/711 [21:58<08:53, 2.34s/it]
916
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 484/711 [22:01<08:49, 2.33s/it]
917
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 485/711 [22:03<08:58, 2.38s/it]
918
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 486/711 [22:05<08:50, 2.36s/it]
919
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 487/711 [22:08<08:45, 2.34s/it]
920
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 488/711 [22:10<08:40, 2.33s/it]
921
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 489/711 [22:12<08:41, 2.35s/it]
922
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 490/711 [22:15<08:37, 2.34s/it]
923
 
924
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 490/711 [22:15<08:37, 2.34s/it]
925
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 491/711 [22:17<08:37, 2.35s/it]
926
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 492/711 [22:19<08:33, 2.34s/it]
927
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 493/711 [22:22<08:29, 2.34s/it]
928
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 494/711 [22:24<08:35, 2.37s/it]
929
  70%|β–ˆοΏ½
930
+ 0: {'loss': 0.4621, 'grad_norm': 0.8060919908075219, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.42}
931
+ 0: οΏ½β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 495/711 [22:27<08:33, 2.38s/it]
932
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 496/711 [22:29<08:27, 2.36s/it]
933
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 497/711 [22:31<08:25, 2.36s/it]
934
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 498/711 [22:34<08:20, 2.35s/it]
935
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 499/711 [22:36<08:21, 2.37s/it]
936
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 500/711 [22:38<08:16, 2.35s/it]
937
 
938
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 500/711 [22:38<08:16, 2.35s/it]
939
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 501/711 [22:41<08:12, 2.34s/it]
940
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 502/711 [22:43<08:08, 2.34s/it]
941
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 503/711 [22:45<08:05, 2.33s/it]
942
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 504/711 [22:48<08:02, 2.33s/it]
943
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 505/711 [22:50<07:59, 2.33s/it]
944
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 506/711 [22:52<07:56, 2.32s/it]
945
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 507/711 [22:55<07:53, 2.32s/it]
946
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 508/711 [22:57<07:50, 2.32s/it]
947
  72%|β–ˆβ–ˆβ–ˆβ–ˆοΏ½
948
+ 0: {'loss': 0.4759, 'grad_norm': 0.7434049511511707, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.43}
949
+ 0: {'loss': 0.4643, 'grad_norm': 0.8519398991308196, 'learning_rate': 4.982258077957576e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.44}
950
+ 0: οΏ½β–ˆβ–ˆβ– | 509/711 [22:59<07:47, 2.32s/it]
951
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 510/711 [23:02<07:49, 2.33s/it]
952
 
953
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 510/711 [23:02<07:49, 2.33s/it]
954
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 511/711 [23:04<07:52, 2.36s/it]
955
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 512/711 [23:06<07:59, 2.41s/it]
956
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 513/711 [23:09<08:02, 2.44s/it]
957
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 514/711 [23:11<07:53, 2.40s/it]
958
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 515/711 [23:14<07:45, 2.38s/it]
959
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 516/711 [23:16<07:39, 2.36s/it]
960
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 517/711 [23:18<07:35, 2.35s/it]
961
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 518/711 [23:21<07:31, 2.34s/it]
962
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 519/711 [23:23<07:27, 2.33s/it]
963
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 520/711 [23:25<07:28, 2.35s/it]
964
 
965
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 520/711 [23:25<07:28, 2.35s/it]
966
  73%|β–ˆβ–ˆβ–ˆβ–ˆ
967
+ 0: {'loss': 0.4652, 'grad_norm': 0.7323364456399427, 'learning_rate': 4.910660792773122e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.45}
968
+ 0: β–ˆβ–ˆβ–ˆβ–Ž | 521/711 [23:28<07:24, 2.34s/it]
969
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 522/711 [23:30<07:20, 2.33s/it]
970
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 523/711 [23:32<07:18, 2.33s/it]
971
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 524/711 [23:35<07:18, 2.34s/it]
972
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 525/711 [23:37<07:18, 2.36s/it]
973
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 526/711 [23:39<07:17, 2.37s/it]
974
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 527/711 [23:42<07:13, 2.36s/it]
975
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 528/711 [23:44<07:11, 2.36s/it]
976
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 529/711 [23:46<07:06, 2.34s/it]
977
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 530/711 [23:49<07:03, 2.34s/it]
978
 
979
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 530/711 [23:49<07:03, 2.34s/it]
980
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 531/711 [23:51<07:00, 2.33s/it]
981
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 532/711 [23:54<07:07, 2.39s/it]
982
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 533/711 [23:56<07:00, 2.36s/it]
983
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 534/711 [23:58<06:55, 2.35s/it]
984
+ 0: {'loss': 0.4762, 'grad_norm': 0.7467850320594309, 'learning_rate': 4.7858608680485444e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.46}
985
+ 0: 75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 535/711 [24:00<06:50, 2.33s/it]
986
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 536/711 [24:03<06:49, 2.34s/it]
987
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 537/711 [24:05<06:45, 2.33s/it]
988
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 538/711 [24:07<06:41, 2.32s/it]
989
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 539/711 [24:10<06:38, 2.32s/it]
990
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 540/711 [24:12<06:35, 2.31s/it]
991
 
992
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 540/711 [24:12<06:35, 2.31s/it]
993
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 541/711 [24:14<06:36, 2.33s/it]
994
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 542/711 [24:17<06:35, 2.34s/it]
995
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 543/711 [24:19<06:33, 2.34s/it]
996
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 544/711 [24:21<06:29, 2.33s/it]
997
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 545/711 [24:24<06:26, 2.33s/it]
998
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 546/711 [24:26<06:23, 2.32s/it]
999
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 547/711 [24:28<06:20, 2.32s/it]
1000
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 548/711 [24:31<0
1001
+ 0: {'loss': 0.474, 'grad_norm': 0.8194845306669991, 'learning_rate': 4.610931292117764e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.46}
1002
+ 0: {'loss': 0.4615, 'grad_norm': 1.4968465915839815, 'learning_rate': 4.390179411698176e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.47}
1003
+ 0: 6:18, 2.32s/it]
1004
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 549/711 [24:33<06:15, 2.32s/it]
1005
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 550/711 [24:35<06:13, 2.32s/it]
1006
 
1007
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 550/711 [24:35<06:13, 2.32s/it]
1008
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 551/711 [24:38<06:10, 2.32s/it]
1009
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 552/711 [24:40<06:08, 2.32s/it]
1010
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 553/711 [24:42<06:05, 2.32s/it]
1011
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 554/711 [24:45<06:03, 2.31s/it]
1012
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 555/711 [24:47<06:01, 2.32s/it]
1013
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 556/711 [24:49<05:58, 2.32s/it]
1014
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 557/711 [24:52<06:01, 2.35s/it]
1015
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 558/711 [24:54<06:03, 2.38s/it]
1016
  79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 559/711 [24:56<05:59, 2.37s/it]
1017
  79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 560/711 [24:59<05:55, 2.35s/it]
1018
 
1019
  79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 560/711 [24:59
1020
+ 0: {'loss': 0.4737, 'grad_norm': 0.7375795650774891, 'learning_rate': 4.129040870719198e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.48}
1021
+ 0: <05:55, 2.35s/it]
1022
  79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 561/711 [25:01<05:51, 2.34s/it]
1023
  79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 562/711 [25:03<05:47, 2.33s/it]
1024
  79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 563/711 [25:06<05:47, 2.35s/it]
1025
  79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 564/711 [25:08<05:43, 2.34s/it]
1026
  79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 565/711 [25:11<05:48, 2.39s/it]
1027
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 566/711 [25:13<05:43, 2.37s/it]
1028
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 567/711 [25:15<05:41, 2.37s/it]
1029
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 568/711 [25:18<05:36, 2.35s/it]
1030
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 569/711 [25:20<05:32, 2.34s/it]
1031
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 570/711 [25:22<05:28, 2.33s/it]
1032
 
1033
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 570/711 [25:22<05:28, 2.33s/it]
1034
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 571/711 [25:24<05:25, 2.33s/it]
1035
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 572/711 [25:27<05:22, 2.32s/it]
1036
  81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 573/711 [25:29<05:22, 2.34s/it]
1037
  81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ
1038
+ 0: {'loss': 0.4643, 'grad_norm': 0.7331473812508006, 'learning_rate': 3.833945766728859e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.49}
1039
+ 0: | 574/711 [25:32<05:21, 2.35s/it]
1040
  81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 575/711 [25:34<05:18, 2.34s/it]
1041
  81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 576/711 [25:36<05:15, 2.33s/it]
1042
  81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 577/711 [25:39<05:15, 2.35s/it]
1043
  81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 578/711 [25:41<05:13, 2.36s/it]
1044
  81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 579/711 [25:43<05:09, 2.35s/it]
1045
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 580/711 [25:46<05:13, 2.40s/it]
1046
 
1047
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 580/711 [25:46<05:13, 2.40s/it]
1048
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 581/711 [25:48<05:08, 2.37s/it]
1049
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 582/711 [25:50<05:03, 2.36s/it]
1050
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 583/711 [25:53<05:00, 2.35s/it]
1051
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 584/711 [25:55<04:56, 2.34s/it]
1052
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 585/711 [25:57<04:53, 2.33s/it]
1053
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 586/711 [26:00<04:50, 2.32s/it]
1054
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 587/711 [26:02<04:50, 2
1055
+ 0: {'loss': 0.4559, 'grad_norm': 0.7238676692923376, 'learning_rate': 3.512160320551906e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.5}
1056
+ 0: .34s/it]
1057
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 588/711 [26:04<04:48, 2.35s/it]
1058
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 589/711 [26:07<04:47, 2.36s/it]
1059
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 590/711 [26:09<04:47, 2.38s/it]
1060
 
1061
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 590/711 [26:09<04:47, 2.38s/it]
1062
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 591/711 [26:12<04:43, 2.36s/it]
1063
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 592/711 [26:14<04:39, 2.35s/it]
1064
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 593/711 [26:16<04:36, 2.34s/it]
1065
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 594/711 [26:19<04:33, 2.33s/it]
1066
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 595/711 [26:21<04:30, 2.33s/it]
1067
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 596/711 [26:23<04:27, 2.33s/it]
1068
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 597/711 [26:25<04:25, 2.32s/it]
1069
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 598/711 [26:28<04:22, 2.32s/it]
1070
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 599/711 [26:30<04:20, 2.32s/it]
1071
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 600/711 [26:32<04:17, 2.32s/it]
1072
 
1073
+ 0: {'loss': 0.4728, 'grad_norm': 0.7685550630490101, 'learning_rate': 3.171607957817881e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.51}
1074
+ 0: {'loss': 0.452, 'grad_norm': 0.7387438977228977, 'learning_rate': 2.820674207925789e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.51}
1075
+ 0:
1076
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 600/711 [26:32<04:17, 2.32s/it]
1077
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 601/711 [26:35<04:15, 2.32s/it]
1078
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 602/711 [26:37<04:13, 2.32s/it]
1079
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 603/711 [26:39<04:10, 2.32s/it]
1080
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 604/711 [26:42<04:08, 2.32s/it]
1081
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 605/711 [26:44<04:08, 2.34s/it]
1082
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 606/711 [26:46<04:06, 2.35s/it]
1083
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 607/711 [26:49<04:03, 2.34s/it]
1084
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 608/711 [26:51<04:00, 2.33s/it]
1085
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 609/711 [26:53<03:57, 2.33s/it]
1086
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 610/711 [26:56<03:55, 2.33s/it]
1087
 
1088
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 610/711 [26:56<03:55, 2.33s/it]
1089
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 611/711 [26:58<03:52, 2.32s/it]
1090
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 612/711 [27:00<
1091
+ 0: {'loss': 0.4531, 'grad_norm': 0.7268143927347638, 'learning_rate': 2.4680002244803154e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.52}
1092
+ 0: 03:49, 2.32s/it]
1093
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 613/711 [27:03<03:47, 2.32s/it]
1094
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 614/711 [27:05<03:44, 2.32s/it]
1095
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 615/711 [27:07<03:42, 2.32s/it]
1096
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 616/711 [27:10<03:40, 2.32s/it]
1097
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 617/711 [27:12<03:37, 2.32s/it]
1098
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 618/711 [27:14<03:38, 2.35s/it]
1099
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 619/711 [27:17<03:34, 2.34s/it]
1100
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 620/711 [27:19<03:32, 2.33s/it]
1101
 
1102
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 620/711 [27:19<03:32, 2.33s/it]
1103
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 621/711 [27:21<03:31, 2.35s/it]
1104
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 622/711 [27:24<03:29, 2.36s/it]
1105
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 623/711 [27:26<03:28, 2.36s/it]
1106
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 624/711 [27:28<03:24, 2.35s/it]
1107
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 625/711 [27:31<03:21, 2.34s/it]
1108
  88
1109
+ 0: {'loss': 0.4502, 'grad_norm': 0.6625178435258192, 'learning_rate': 2.1222700114117344e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.53}
1110
+ 0: %|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 626/711 [27:33<03:18, 2.33s/it]
1111
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 627/711 [27:35<03:15, 2.33s/it]
1112
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 628/711 [27:38<03:12, 2.32s/it]
1113
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 629/711 [27:40<03:10, 2.32s/it]
1114
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 630/711 [27:42<03:10, 2.35s/it]
1115
 
1116
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 630/711 [27:42<03:10, 2.35s/it]
1117
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 631/711 [27:45<03:07, 2.35s/it]
1118
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 632/711 [27:47<03:09, 2.39s/it]
1119
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 633/711 [27:50<03:04, 2.37s/it]
1120
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 634/711 [27:52<03:00, 2.35s/it]
1121
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 635/711 [27:54<02:57, 2.34s/it]
1122
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 636/711 [27:57<02:56, 2.35s/it]
1123
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 637/711 [27:59<02:55, 2.37s/it]
1124
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 638/711 [28:01<02:52, 2.36s/it]
1125
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆοΏ½
1126
+ 0: {'loss': 0.455, 'grad_norm': 0.7830830598414732, 'learning_rate': 1.7919965939785867e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.54}
1127
+ 0: {'loss': 0.4578, 'grad_norm': 0.6885412177331103, 'learning_rate': 1.4853123998327068e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.55}
1128
+ 0: οΏ½οΏ½β–ˆβ–‰ | 639/711 [28:04<02:49, 2.35s/it]
1129
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 640/711 [28:06<02:46, 2.34s/it]
1130
 
1131
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 640/711 [28:06<02:46, 2.34s/it]
1132
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 641/711 [28:08<02:43, 2.33s/it]
1133
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 642/711 [28:11<02:40, 2.33s/it]
1134
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 643/711 [28:13<02:38, 2.33s/it]
1135
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 644/711 [28:15<02:35, 2.33s/it]
1136
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 645/711 [28:18<02:35, 2.35s/it]
1137
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 646/711 [28:20<02:32, 2.34s/it]
1138
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 647/711 [28:22<02:29, 2.34s/it]
1139
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 648/711 [28:25<02:27, 2.33s/it]
1140
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 649/711 [28:27<02:24, 2.33s/it]
1141
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 650/711 [28:29<02:23, 2.35s/it]
1142
 
1143
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 650/711 [28:29<02:23,
1144
+ 0: {'loss': 0.4629, 'grad_norm': 0.7008874020320417, 'learning_rate': 1.2097690116604504e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.56}
1145
+ 0: 2.35s/it]
1146
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 651/711 [28:32<02:20, 2.34s/it]
1147
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 652/711 [28:34<02:17, 2.33s/it]
1148
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 653/711 [28:36<02:17, 2.37s/it]
1149
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 654/711 [28:39<02:14, 2.36s/it]
1150
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 655/711 [28:41<02:11, 2.35s/it]
1151
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 656/711 [28:44<02:10, 2.37s/it]
1152
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 657/711 [28:46<02:06, 2.35s/it]
1153
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 658/711 [28:48<02:04, 2.34s/it]
1154
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 659/711 [28:51<02:01, 2.33s/it]
1155
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 660/711 [28:53<01:58, 2.33s/it]
1156
 
1157
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 660/711 [28:53<01:58, 2.33s/it]
1158
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 661/711 [28:55<01:57, 2.35s/it]
1159
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 662/711 [28:58<01:54, 2.34s/it]
1160
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 663/711 [29:00
1161
+ 0: {'loss': 0.4435, 'grad_norm': 0.698029654257462, 'learning_rate': 9.721512221546967e-07, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.56}
1162
+ 0: <01:51, 2.33s/it]
1163
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 664/711 [29:02<01:49, 2.33s/it]
1164
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 665/711 [29:05<01:46, 2.32s/it]
1165
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 666/711 [29:07<01:44, 2.32s/it]
1166
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 667/711 [29:09<01:42, 2.32s/it]
1167
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 668/711 [29:11<01:39, 2.32s/it]
1168
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 669/711 [29:14<01:38, 2.34s/it]
1169
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 670/711 [29:16<01:36, 2.36s/it]
1170
 
1171
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 670/711 [29:16<01:36, 2.36s/it]
1172
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 671/711 [29:19<01:34, 2.35s/it]
1173
  95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 672/711 [29:21<01:31, 2.34s/it]
1174
  95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 673/711 [29:23<01:28, 2.34s/it]
1175
  95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 674/711 [29:26<01:26, 2.33s/it]
1176
  95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 675/711 [29:28<01:24, 2.33s/it]
1177
  95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 676/711
1178
+ 0: {'loss': 0.4649, 'grad_norm': 0.7032914493453137, 'learning_rate': 7.783099699013075e-07, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.57}
1179
+ 0: [29:30<01:21, 2.33s/it]
1180
  95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 677/711 [29:33<01:19, 2.33s/it]
1181
  95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 678/711 [29:35<01:16, 2.32s/it]
1182
  95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 679/711 [29:37<01:14, 2.32s/it]
1183
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 680/711 [29:39<01:11, 2.32s/it]
1184
 
1185
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 680/711 [29:39<01:11, 2.32s/it]
1186
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 681/711 [29:42<01:09, 2.32s/it]
1187
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 682/711 [29:44<01:08, 2.36s/it]
1188
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 683/711 [29:47<01:05, 2.35s/it]
1189
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 684/711 [29:49<01:03, 2.34s/it]
1190
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 685/711 [29:51<01:01, 2.36s/it]
1191
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 686/711 [29:54<00:58, 2.36s/it]
1192
  97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 687/711 [29:56<00:56, 2.35s/it]
1193
  97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 688/711 [29:58<00:53, 2.34s/it]
1194
  97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹|
1195
+ 0: {'loss': 0.4638, 'grad_norm': 0.7522821052393728, 'learning_rate': 6.330182698529928e-07, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.58}
1196
+ 0: {'loss': 0.456, 'grad_norm': 0.6485448600183656, 'learning_rate': 5.398536858604507e-07, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.59}
1197
+ 0: 689/711 [30:01<00:51, 2.33s/it]
1198
  97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 690/711 [30:03<00:48, 2.32s/it]
1199
 
1200
  97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 690/711 [30:03<00:48, 2.32s/it]
1201
  97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 691/711 [30:05<00:46, 2.34s/it]
1202
  97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 692/711 [30:08<00:44, 2.33s/it]
1203
  97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 693/711 [30:10<00:41, 2.33s/it]
1204
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 694/711 [30:12<00:39, 2.33s/it]
1205
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 695/711 [30:15<00:37, 2.32s/it]
1206
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 696/711 [30:17<00:34, 2.32s/it]
1207
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 697/711 [30:19<00:32, 2.32s/it]
1208
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 698/711 [30:22<00:30, 2.32s/it]
1209
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 699/711 [30:24<00:27, 2.32s/it]
1210
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 700/711 [30:26<00:25, 2.34s/it]
1211
 
1212
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 700/711 [30:
1213
+ 0: {'loss': 0.4527, 'grad_norm': 0.6894880207361598, 'learning_rate': 5.011102391771039e-07, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.6}
1214
+ 0: [2025-11-24 00:42:38,710] [INFO] [axolotl.core.trainers.base._save:613] [PID:1912876] [RANK:0] Saving model checkpoint to /lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-4b/0/checkpoint-711
1215
+ 0: [2025-11-24 00:42:45,711] [INFO] [axolotl.core.trainers.base._save:662] [PID:1912876] [RANK:0] Saving Trainer.data_collator.tokenizer by default as Trainer.processing_class is `None`
1216
+ 0: {'train_runtime': 1863.7709, 'train_samples_per_second': 6.104, 'train_steps_per_second': 0.381, 'train_loss': 0.49791947950290727, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.6}
1217
+ 0: 26<00:25, 2.34s/it]
1218
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 701/711 [30:29<00:23, 2.37s/it]
1219
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 702/711 [30:31<00:21, 2.37s/it]
1220
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 703/711 [30:33<00:18, 2.35s/it]
1221
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 704/711 [30:36<00:16, 2.35s/it]
1222
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 705/711 [30:38<00:14, 2.36s/it]
1223
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 706/711 [30:40<00:11, 2.34s/it]
1224
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 707/711 [30:43<00:09, 2.42s/it]
1225
 
1226
 
1227
+ 0: .49s/it]
1228
+ 0: [2025-11-24 00:42:49,004] [INFO] [axolotl.train.save_trained_model:228] [PID:1912876] [RANK:0] Training completed! Saving trained model to /lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-4b/0.
1229
+ 0: [2025-11-24 00:42:51,675] [INFO] [axolotl.core.trainers.base._save:613] [PID:1912876] [RANK:0] Saving model checkpoint to /lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-4b/0
1230
+ 0: [2025-11-24 00:42:58,432] [INFO] [axolotl.core.trainers.base._save:662] [PID:1912876] [RANK:0] Saving Trainer.data_collator.tokenizer by default as Trainer.processing_class is `None`
1231
+ 0: [2025-11-24 00:42:59,125] [INFO] [axolotl.train.save_trained_model:350] [PID:1912876] [RANK:0] Model successfully saved to /lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-4b/0
special_tokens_map.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "boi_token": "<start_of_image>",
3
+ "bos_token": {
4
+ "content": "<bos>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ "eoi_token": "<end_of_image>",
11
+ "eos_token": {
12
+ "content": "<eos>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false
17
+ },
18
+ "image_token": "<image_soft_token>",
19
+ "pad_token": {
20
+ "content": "<pad>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "unk_token": {
27
+ "content": "<unk>",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ }
33
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795
3
+ size 33384568
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
3
+ size 4689074
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e7d4fb6306d6b78ab8fbed42c85a3ca8c24572a732b2e2c29fe3ef0a1ac7eff
3
+ size 10424