Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- README.md +130 -0
- added_tokens.json +3 -0
- chat_template.jinja +47 -0
- config.json +93 -0
- generation_config.json +13 -0
- model-00001-of-00002.safetensors +3 -0
- model-00002-of-00002.safetensors +3 -0
- model.safetensors.index.json +891 -0
- preprocessor_config.json +29 -0
- processor_config.json +4 -0
- runs/Nov24_00-10-02_jzxh298/events.out.tfevents.1763939502.jzxh298.1912876.0 +3 -0
- slurm.out +382 -0
- special_tokens_map.json +33 -0
- tokenizer.json +3 -0
- tokenizer.model +3 -0
- tokenizer_config.json +0 -0
- training_args.bin +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
library_name: transformers
|
| 3 |
+
tags:
|
| 4 |
+
- generated_from_trainer
|
| 5 |
+
model-index:
|
| 6 |
+
- name: lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-4b/0
|
| 7 |
+
results: []
|
| 8 |
+
---
|
| 9 |
+
|
| 10 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
| 11 |
+
should probably proofread and complete it, then remove this comment. -->
|
| 12 |
+
|
| 13 |
+
[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
|
| 14 |
+
<details><summary>See axolotl config</summary>
|
| 15 |
+
|
| 16 |
+
axolotl version: `0.12.2`
|
| 17 |
+
```yaml
|
| 18 |
+
base_model: /lustre/fswork/projects/rech/qwv/udv55np/Gemma/base/gemma-3-4b
|
| 19 |
+
|
| 20 |
+
datasets:
|
| 21 |
+
- path: /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking
|
| 22 |
+
ds_type: json
|
| 23 |
+
type: chat_template
|
| 24 |
+
field_messages: conversations
|
| 25 |
+
data_files:
|
| 26 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0007.jsonl
|
| 27 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0009.jsonl
|
| 28 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0005.jsonl
|
| 29 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0006.jsonl
|
| 30 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0014.jsonl
|
| 31 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0010.jsonl
|
| 32 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0012.jsonl
|
| 33 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0008.jsonl
|
| 34 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0001.jsonl
|
| 35 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0002.jsonl
|
| 36 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0013.jsonl
|
| 37 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0015.jsonl
|
| 38 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0004.jsonl
|
| 39 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0011.jsonl
|
| 40 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0000.jsonl
|
| 41 |
+
- /lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0003.jsonl
|
| 42 |
+
|
| 43 |
+
dataset_prepared_path: /lustre/fswork/projects/rech/dgo/udv55np/dataset_gemma/Nemotron-Super-49B-v1_5/split_0
|
| 44 |
+
tokenizer_config: "/lustre/fswork/projects/rech/qwv/udv55np/Gemma/base/gemma-3-27b"
|
| 45 |
+
chat_template: gemma3
|
| 46 |
+
eot_tokens:
|
| 47 |
+
- "<end_of_turn>"
|
| 48 |
+
|
| 49 |
+
shuffle_merged_datasets: true
|
| 50 |
+
output_dir: /lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-4b/0
|
| 51 |
+
|
| 52 |
+
sequence_len: 16384
|
| 53 |
+
sample_packing: true
|
| 54 |
+
|
| 55 |
+
gradient_accumulation_steps: 1
|
| 56 |
+
micro_batch_size: 1
|
| 57 |
+
num_epochs: 0.6
|
| 58 |
+
auto_resume_from_checkpoints: true
|
| 59 |
+
|
| 60 |
+
optimizer: adamw_torch_fused
|
| 61 |
+
lr_scheduler: warmup_stable_decay
|
| 62 |
+
learning_rate: 5e-6
|
| 63 |
+
lr_scheduler_kwargs:
|
| 64 |
+
num_decay_steps: 200
|
| 65 |
+
min_lr_ratio: 0.1
|
| 66 |
+
warmup_steps: 100
|
| 67 |
+
|
| 68 |
+
bf16: true
|
| 69 |
+
tf32: false
|
| 70 |
+
|
| 71 |
+
gradient_checkpointing: true
|
| 72 |
+
logging_steps: 10
|
| 73 |
+
flash_attention: true
|
| 74 |
+
|
| 75 |
+
evals_per_epoch: 0
|
| 76 |
+
saves_per_epoch: 1
|
| 77 |
+
save_total_limit: 20
|
| 78 |
+
save_only_model: true
|
| 79 |
+
|
| 80 |
+
use_tensorboard: true
|
| 81 |
+
deepspeed: /lustre/fswork/projects/rech/qwv/udv55np/axolotl/zero3.json
|
| 82 |
+
|
| 83 |
+
```
|
| 84 |
+
|
| 85 |
+
</details><br>
|
| 86 |
+
|
| 87 |
+
# lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-4b/0
|
| 88 |
+
|
| 89 |
+
This model was trained from scratch on the None dataset.
|
| 90 |
+
|
| 91 |
+
## Model description
|
| 92 |
+
|
| 93 |
+
More information needed
|
| 94 |
+
|
| 95 |
+
## Intended uses & limitations
|
| 96 |
+
|
| 97 |
+
More information needed
|
| 98 |
+
|
| 99 |
+
## Training and evaluation data
|
| 100 |
+
|
| 101 |
+
More information needed
|
| 102 |
+
|
| 103 |
+
## Training procedure
|
| 104 |
+
|
| 105 |
+
### Training hyperparameters
|
| 106 |
+
|
| 107 |
+
The following hyperparameters were used during training:
|
| 108 |
+
- learning_rate: 5e-06
|
| 109 |
+
- train_batch_size: 1
|
| 110 |
+
- eval_batch_size: 1
|
| 111 |
+
- seed: 42
|
| 112 |
+
- distributed_type: multi-GPU
|
| 113 |
+
- num_devices: 16
|
| 114 |
+
- total_train_batch_size: 16
|
| 115 |
+
- total_eval_batch_size: 16
|
| 116 |
+
- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
| 117 |
+
- lr_scheduler_type: warmup_stable_decay
|
| 118 |
+
- lr_scheduler_warmup_steps: 100
|
| 119 |
+
- training_steps: 711
|
| 120 |
+
|
| 121 |
+
### Training results
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
### Framework versions
|
| 126 |
+
|
| 127 |
+
- Transformers 4.55.2
|
| 128 |
+
- Pytorch 2.6.0+cu124
|
| 129 |
+
- Datasets 4.0.0
|
| 130 |
+
- Tokenizers 0.21.1
|
added_tokens.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"<image_soft_token>": 262144
|
| 3 |
+
}
|
chat_template.jinja
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{{ bos_token }}
|
| 2 |
+
{%- if messages[0]['role'] == 'system' -%}
|
| 3 |
+
{%- if messages[0]['content'] is string -%}
|
| 4 |
+
{%- set first_user_prefix = messages[0]['content'] + '
|
| 5 |
+
|
| 6 |
+
' -%}
|
| 7 |
+
{%- else -%}
|
| 8 |
+
{%- set first_user_prefix = messages[0]['content'][0]['text'] + '
|
| 9 |
+
|
| 10 |
+
' -%}
|
| 11 |
+
{%- endif -%}
|
| 12 |
+
{%- set loop_messages = messages[1:] -%}
|
| 13 |
+
{%- else -%}
|
| 14 |
+
{%- set first_user_prefix = "" -%}
|
| 15 |
+
{%- set loop_messages = messages -%}
|
| 16 |
+
{%- endif -%}
|
| 17 |
+
{%- for message in loop_messages -%}
|
| 18 |
+
{%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
|
| 19 |
+
{{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
|
| 20 |
+
{%- endif -%}
|
| 21 |
+
{%- if (message['role'] == 'assistant') -%}
|
| 22 |
+
{%- set role = "model" -%}
|
| 23 |
+
{%- else -%}
|
| 24 |
+
{%- set role = message['role'] -%}
|
| 25 |
+
{%- endif -%}
|
| 26 |
+
{{ '<start_of_turn>' + role + '
|
| 27 |
+
' + (first_user_prefix if loop.first else "") }}
|
| 28 |
+
{%- if message['content'] is string -%}
|
| 29 |
+
{{ message['content'] | trim }}
|
| 30 |
+
{%- elif message['content'] is iterable -%}
|
| 31 |
+
{%- for item in message['content'] -%}
|
| 32 |
+
{%- if item['type'] == 'image' -%}
|
| 33 |
+
{{ '<start_of_image>' }}
|
| 34 |
+
{%- elif item['type'] == 'text' -%}
|
| 35 |
+
{{ item['text'] | trim }}
|
| 36 |
+
{%- endif -%}
|
| 37 |
+
{%- endfor -%}
|
| 38 |
+
{%- else -%}
|
| 39 |
+
{{ raise_exception("Invalid content type") }}
|
| 40 |
+
{%- endif -%}
|
| 41 |
+
{{ '<end_of_turn>
|
| 42 |
+
' }}
|
| 43 |
+
{%- endfor -%}
|
| 44 |
+
{%- if add_generation_prompt -%}
|
| 45 |
+
{{'<start_of_turn>model
|
| 46 |
+
'}}
|
| 47 |
+
{%- endif -%}
|
config.json
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Gemma3ForConditionalGeneration"
|
| 4 |
+
],
|
| 5 |
+
"boi_token_index": 255999,
|
| 6 |
+
"eoi_token_index": 256000,
|
| 7 |
+
"image_token_index": 262144,
|
| 8 |
+
"initializer_range": 0.02,
|
| 9 |
+
"mm_tokens_per_image": 256,
|
| 10 |
+
"model_type": "gemma3",
|
| 11 |
+
"text_config": {
|
| 12 |
+
"_sliding_window_pattern": 6,
|
| 13 |
+
"attention_bias": false,
|
| 14 |
+
"attention_dropout": 0.0,
|
| 15 |
+
"attn_logit_softcapping": null,
|
| 16 |
+
"final_logit_softcapping": null,
|
| 17 |
+
"head_dim": 256,
|
| 18 |
+
"hidden_activation": "gelu_pytorch_tanh",
|
| 19 |
+
"hidden_size": 2560,
|
| 20 |
+
"initializer_range": 0.02,
|
| 21 |
+
"intermediate_size": 10240,
|
| 22 |
+
"layer_types": [
|
| 23 |
+
"sliding_attention",
|
| 24 |
+
"sliding_attention",
|
| 25 |
+
"sliding_attention",
|
| 26 |
+
"sliding_attention",
|
| 27 |
+
"sliding_attention",
|
| 28 |
+
"full_attention",
|
| 29 |
+
"sliding_attention",
|
| 30 |
+
"sliding_attention",
|
| 31 |
+
"sliding_attention",
|
| 32 |
+
"sliding_attention",
|
| 33 |
+
"sliding_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"sliding_attention",
|
| 36 |
+
"sliding_attention",
|
| 37 |
+
"sliding_attention",
|
| 38 |
+
"sliding_attention",
|
| 39 |
+
"sliding_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"sliding_attention",
|
| 42 |
+
"sliding_attention",
|
| 43 |
+
"sliding_attention",
|
| 44 |
+
"sliding_attention",
|
| 45 |
+
"sliding_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"sliding_attention",
|
| 48 |
+
"sliding_attention",
|
| 49 |
+
"sliding_attention",
|
| 50 |
+
"sliding_attention",
|
| 51 |
+
"sliding_attention",
|
| 52 |
+
"full_attention",
|
| 53 |
+
"sliding_attention",
|
| 54 |
+
"sliding_attention",
|
| 55 |
+
"sliding_attention",
|
| 56 |
+
"sliding_attention"
|
| 57 |
+
],
|
| 58 |
+
"max_position_embeddings": 131072,
|
| 59 |
+
"model_type": "gemma3_text",
|
| 60 |
+
"num_attention_heads": 8,
|
| 61 |
+
"num_hidden_layers": 34,
|
| 62 |
+
"num_key_value_heads": 4,
|
| 63 |
+
"query_pre_attn_scalar": 256,
|
| 64 |
+
"rms_norm_eps": 1e-06,
|
| 65 |
+
"rope_local_base_freq": 10000.0,
|
| 66 |
+
"rope_scaling": {
|
| 67 |
+
"factor": 8.0,
|
| 68 |
+
"rope_type": "linear"
|
| 69 |
+
},
|
| 70 |
+
"rope_theta": 1000000.0,
|
| 71 |
+
"sliding_window": 1024,
|
| 72 |
+
"torch_dtype": "bfloat16",
|
| 73 |
+
"use_cache": false,
|
| 74 |
+
"vocab_size": 262208
|
| 75 |
+
},
|
| 76 |
+
"torch_dtype": "bfloat16",
|
| 77 |
+
"transformers_version": "4.55.2",
|
| 78 |
+
"vision_config": {
|
| 79 |
+
"attention_dropout": 0.0,
|
| 80 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 81 |
+
"hidden_size": 1152,
|
| 82 |
+
"image_size": 896,
|
| 83 |
+
"intermediate_size": 4304,
|
| 84 |
+
"layer_norm_eps": 1e-06,
|
| 85 |
+
"model_type": "siglip_vision_model",
|
| 86 |
+
"num_attention_heads": 16,
|
| 87 |
+
"num_channels": 3,
|
| 88 |
+
"num_hidden_layers": 27,
|
| 89 |
+
"patch_size": 14,
|
| 90 |
+
"torch_dtype": "bfloat16",
|
| 91 |
+
"vision_use_head": false
|
| 92 |
+
}
|
| 93 |
+
}
|
generation_config.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token_id": 2,
|
| 3 |
+
"cache_implementation": "hybrid",
|
| 4 |
+
"do_sample": true,
|
| 5 |
+
"eos_token_id": [
|
| 6 |
+
1,
|
| 7 |
+
106
|
| 8 |
+
],
|
| 9 |
+
"pad_token_id": 0,
|
| 10 |
+
"top_k": 64,
|
| 11 |
+
"top_p": 0.95,
|
| 12 |
+
"transformers_version": "4.55.2"
|
| 13 |
+
}
|
model-00001-of-00002.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeeda08d9f2f24cd261c299eee1899e67b71ba65b535ddc5979e9bdabfc7b401
|
| 3 |
+
size 4961251752
|
model-00002-of-00002.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5cdd866c965026d81e2e0debf50e346712ead4809d87b7a46b56eed1ead1da99
|
| 3 |
+
size 3639026128
|
model.safetensors.index.json
ADDED
|
@@ -0,0 +1,891 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"metadata": {
|
| 3 |
+
"total_parameters": 768880,
|
| 4 |
+
"total_size": 8600158944
|
| 5 |
+
},
|
| 6 |
+
"weight_map": {
|
| 7 |
+
"language_model.model.embed_tokens.weight": "model-00001-of-00002.safetensors",
|
| 8 |
+
"language_model.model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 9 |
+
"language_model.model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 10 |
+
"language_model.model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 11 |
+
"language_model.model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 12 |
+
"language_model.model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 13 |
+
"language_model.model.layers.0.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 14 |
+
"language_model.model.layers.0.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 15 |
+
"language_model.model.layers.0.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 16 |
+
"language_model.model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 17 |
+
"language_model.model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 18 |
+
"language_model.model.layers.0.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 19 |
+
"language_model.model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 20 |
+
"language_model.model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 21 |
+
"language_model.model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 22 |
+
"language_model.model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 23 |
+
"language_model.model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 24 |
+
"language_model.model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 25 |
+
"language_model.model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 26 |
+
"language_model.model.layers.1.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 27 |
+
"language_model.model.layers.1.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 28 |
+
"language_model.model.layers.1.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 29 |
+
"language_model.model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 30 |
+
"language_model.model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 31 |
+
"language_model.model.layers.1.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 32 |
+
"language_model.model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 33 |
+
"language_model.model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 34 |
+
"language_model.model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 35 |
+
"language_model.model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 36 |
+
"language_model.model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 37 |
+
"language_model.model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 38 |
+
"language_model.model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 39 |
+
"language_model.model.layers.10.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 40 |
+
"language_model.model.layers.10.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 41 |
+
"language_model.model.layers.10.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 42 |
+
"language_model.model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 43 |
+
"language_model.model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 44 |
+
"language_model.model.layers.10.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 45 |
+
"language_model.model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 46 |
+
"language_model.model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 47 |
+
"language_model.model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 48 |
+
"language_model.model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 49 |
+
"language_model.model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 50 |
+
"language_model.model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 51 |
+
"language_model.model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 52 |
+
"language_model.model.layers.11.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 53 |
+
"language_model.model.layers.11.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 54 |
+
"language_model.model.layers.11.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 55 |
+
"language_model.model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 56 |
+
"language_model.model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 57 |
+
"language_model.model.layers.11.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 58 |
+
"language_model.model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 59 |
+
"language_model.model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 60 |
+
"language_model.model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 61 |
+
"language_model.model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 62 |
+
"language_model.model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 63 |
+
"language_model.model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 64 |
+
"language_model.model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 65 |
+
"language_model.model.layers.12.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 66 |
+
"language_model.model.layers.12.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 67 |
+
"language_model.model.layers.12.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 68 |
+
"language_model.model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 69 |
+
"language_model.model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 70 |
+
"language_model.model.layers.12.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 71 |
+
"language_model.model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 72 |
+
"language_model.model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 73 |
+
"language_model.model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 74 |
+
"language_model.model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 75 |
+
"language_model.model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 76 |
+
"language_model.model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 77 |
+
"language_model.model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 78 |
+
"language_model.model.layers.13.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 79 |
+
"language_model.model.layers.13.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 80 |
+
"language_model.model.layers.13.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 81 |
+
"language_model.model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 82 |
+
"language_model.model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 83 |
+
"language_model.model.layers.13.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 84 |
+
"language_model.model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 85 |
+
"language_model.model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 86 |
+
"language_model.model.layers.14.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 87 |
+
"language_model.model.layers.14.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 88 |
+
"language_model.model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 89 |
+
"language_model.model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 90 |
+
"language_model.model.layers.14.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 91 |
+
"language_model.model.layers.14.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 92 |
+
"language_model.model.layers.14.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 93 |
+
"language_model.model.layers.14.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 94 |
+
"language_model.model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 95 |
+
"language_model.model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 96 |
+
"language_model.model.layers.14.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 97 |
+
"language_model.model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 98 |
+
"language_model.model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 99 |
+
"language_model.model.layers.15.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 100 |
+
"language_model.model.layers.15.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 101 |
+
"language_model.model.layers.15.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 102 |
+
"language_model.model.layers.15.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 103 |
+
"language_model.model.layers.15.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 104 |
+
"language_model.model.layers.15.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 105 |
+
"language_model.model.layers.15.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 106 |
+
"language_model.model.layers.15.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 107 |
+
"language_model.model.layers.15.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 108 |
+
"language_model.model.layers.15.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 109 |
+
"language_model.model.layers.15.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 110 |
+
"language_model.model.layers.15.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 111 |
+
"language_model.model.layers.15.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 112 |
+
"language_model.model.layers.16.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 113 |
+
"language_model.model.layers.16.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 114 |
+
"language_model.model.layers.16.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 115 |
+
"language_model.model.layers.16.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 116 |
+
"language_model.model.layers.16.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 117 |
+
"language_model.model.layers.16.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 118 |
+
"language_model.model.layers.16.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 119 |
+
"language_model.model.layers.16.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 120 |
+
"language_model.model.layers.16.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 121 |
+
"language_model.model.layers.16.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 122 |
+
"language_model.model.layers.16.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 123 |
+
"language_model.model.layers.16.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 124 |
+
"language_model.model.layers.16.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 125 |
+
"language_model.model.layers.17.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 126 |
+
"language_model.model.layers.17.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 127 |
+
"language_model.model.layers.17.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 128 |
+
"language_model.model.layers.17.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 129 |
+
"language_model.model.layers.17.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 130 |
+
"language_model.model.layers.17.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 131 |
+
"language_model.model.layers.17.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 132 |
+
"language_model.model.layers.17.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 133 |
+
"language_model.model.layers.17.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 134 |
+
"language_model.model.layers.17.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 135 |
+
"language_model.model.layers.17.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 136 |
+
"language_model.model.layers.17.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 137 |
+
"language_model.model.layers.17.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 138 |
+
"language_model.model.layers.18.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 139 |
+
"language_model.model.layers.18.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 140 |
+
"language_model.model.layers.18.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 141 |
+
"language_model.model.layers.18.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 142 |
+
"language_model.model.layers.18.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 143 |
+
"language_model.model.layers.18.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 144 |
+
"language_model.model.layers.18.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 145 |
+
"language_model.model.layers.18.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 146 |
+
"language_model.model.layers.18.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 147 |
+
"language_model.model.layers.18.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 148 |
+
"language_model.model.layers.18.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 149 |
+
"language_model.model.layers.18.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 150 |
+
"language_model.model.layers.18.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 151 |
+
"language_model.model.layers.19.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 152 |
+
"language_model.model.layers.19.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 153 |
+
"language_model.model.layers.19.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 154 |
+
"language_model.model.layers.19.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 155 |
+
"language_model.model.layers.19.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 156 |
+
"language_model.model.layers.19.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 157 |
+
"language_model.model.layers.19.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 158 |
+
"language_model.model.layers.19.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 159 |
+
"language_model.model.layers.19.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 160 |
+
"language_model.model.layers.19.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 161 |
+
"language_model.model.layers.19.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 162 |
+
"language_model.model.layers.19.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 163 |
+
"language_model.model.layers.19.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 164 |
+
"language_model.model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 165 |
+
"language_model.model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 166 |
+
"language_model.model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 167 |
+
"language_model.model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 168 |
+
"language_model.model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 169 |
+
"language_model.model.layers.2.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 170 |
+
"language_model.model.layers.2.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 171 |
+
"language_model.model.layers.2.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 172 |
+
"language_model.model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 173 |
+
"language_model.model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 174 |
+
"language_model.model.layers.2.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 175 |
+
"language_model.model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 176 |
+
"language_model.model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 177 |
+
"language_model.model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 178 |
+
"language_model.model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 179 |
+
"language_model.model.layers.20.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 180 |
+
"language_model.model.layers.20.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 181 |
+
"language_model.model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 182 |
+
"language_model.model.layers.20.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 183 |
+
"language_model.model.layers.20.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 184 |
+
"language_model.model.layers.20.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 185 |
+
"language_model.model.layers.20.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 186 |
+
"language_model.model.layers.20.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 187 |
+
"language_model.model.layers.20.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 188 |
+
"language_model.model.layers.20.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 189 |
+
"language_model.model.layers.20.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 190 |
+
"language_model.model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 191 |
+
"language_model.model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 192 |
+
"language_model.model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 193 |
+
"language_model.model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 194 |
+
"language_model.model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 195 |
+
"language_model.model.layers.21.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 196 |
+
"language_model.model.layers.21.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 197 |
+
"language_model.model.layers.21.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 198 |
+
"language_model.model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 199 |
+
"language_model.model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 200 |
+
"language_model.model.layers.21.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 201 |
+
"language_model.model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 202 |
+
"language_model.model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 203 |
+
"language_model.model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 204 |
+
"language_model.model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 205 |
+
"language_model.model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 206 |
+
"language_model.model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 207 |
+
"language_model.model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 208 |
+
"language_model.model.layers.22.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 209 |
+
"language_model.model.layers.22.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 210 |
+
"language_model.model.layers.22.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 211 |
+
"language_model.model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 212 |
+
"language_model.model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 213 |
+
"language_model.model.layers.22.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 214 |
+
"language_model.model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 215 |
+
"language_model.model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 216 |
+
"language_model.model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 217 |
+
"language_model.model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 218 |
+
"language_model.model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 219 |
+
"language_model.model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 220 |
+
"language_model.model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 221 |
+
"language_model.model.layers.23.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 222 |
+
"language_model.model.layers.23.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 223 |
+
"language_model.model.layers.23.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 224 |
+
"language_model.model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 225 |
+
"language_model.model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 226 |
+
"language_model.model.layers.23.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 227 |
+
"language_model.model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 228 |
+
"language_model.model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 229 |
+
"language_model.model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 230 |
+
"language_model.model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 231 |
+
"language_model.model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 232 |
+
"language_model.model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 233 |
+
"language_model.model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 234 |
+
"language_model.model.layers.24.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 235 |
+
"language_model.model.layers.24.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 236 |
+
"language_model.model.layers.24.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 237 |
+
"language_model.model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 238 |
+
"language_model.model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 239 |
+
"language_model.model.layers.24.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 240 |
+
"language_model.model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 241 |
+
"language_model.model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 242 |
+
"language_model.model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 243 |
+
"language_model.model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 244 |
+
"language_model.model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 245 |
+
"language_model.model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 246 |
+
"language_model.model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 247 |
+
"language_model.model.layers.25.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 248 |
+
"language_model.model.layers.25.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 249 |
+
"language_model.model.layers.25.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 250 |
+
"language_model.model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 251 |
+
"language_model.model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 252 |
+
"language_model.model.layers.25.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 253 |
+
"language_model.model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 254 |
+
"language_model.model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 255 |
+
"language_model.model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 256 |
+
"language_model.model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 257 |
+
"language_model.model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 258 |
+
"language_model.model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 259 |
+
"language_model.model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 260 |
+
"language_model.model.layers.26.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 261 |
+
"language_model.model.layers.26.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 262 |
+
"language_model.model.layers.26.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 263 |
+
"language_model.model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 264 |
+
"language_model.model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 265 |
+
"language_model.model.layers.26.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 266 |
+
"language_model.model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 267 |
+
"language_model.model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 268 |
+
"language_model.model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 269 |
+
"language_model.model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 270 |
+
"language_model.model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 271 |
+
"language_model.model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 272 |
+
"language_model.model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 273 |
+
"language_model.model.layers.27.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 274 |
+
"language_model.model.layers.27.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 275 |
+
"language_model.model.layers.27.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 276 |
+
"language_model.model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 277 |
+
"language_model.model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 278 |
+
"language_model.model.layers.27.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 279 |
+
"language_model.model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 280 |
+
"language_model.model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 281 |
+
"language_model.model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 282 |
+
"language_model.model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 283 |
+
"language_model.model.layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 284 |
+
"language_model.model.layers.28.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 285 |
+
"language_model.model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 286 |
+
"language_model.model.layers.28.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 287 |
+
"language_model.model.layers.28.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 288 |
+
"language_model.model.layers.28.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 289 |
+
"language_model.model.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 290 |
+
"language_model.model.layers.28.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 291 |
+
"language_model.model.layers.28.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 292 |
+
"language_model.model.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 293 |
+
"language_model.model.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 294 |
+
"language_model.model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 295 |
+
"language_model.model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 296 |
+
"language_model.model.layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 297 |
+
"language_model.model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 298 |
+
"language_model.model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 299 |
+
"language_model.model.layers.29.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 300 |
+
"language_model.model.layers.29.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 301 |
+
"language_model.model.layers.29.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 302 |
+
"language_model.model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 303 |
+
"language_model.model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 304 |
+
"language_model.model.layers.29.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 305 |
+
"language_model.model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 306 |
+
"language_model.model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 307 |
+
"language_model.model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 308 |
+
"language_model.model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 309 |
+
"language_model.model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 310 |
+
"language_model.model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 311 |
+
"language_model.model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 312 |
+
"language_model.model.layers.3.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 313 |
+
"language_model.model.layers.3.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 314 |
+
"language_model.model.layers.3.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 315 |
+
"language_model.model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 316 |
+
"language_model.model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 317 |
+
"language_model.model.layers.3.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 318 |
+
"language_model.model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 319 |
+
"language_model.model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 320 |
+
"language_model.model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 321 |
+
"language_model.model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 322 |
+
"language_model.model.layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 323 |
+
"language_model.model.layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 324 |
+
"language_model.model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 325 |
+
"language_model.model.layers.30.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 326 |
+
"language_model.model.layers.30.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 327 |
+
"language_model.model.layers.30.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 328 |
+
"language_model.model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 329 |
+
"language_model.model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 330 |
+
"language_model.model.layers.30.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 331 |
+
"language_model.model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 332 |
+
"language_model.model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 333 |
+
"language_model.model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 334 |
+
"language_model.model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 335 |
+
"language_model.model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 336 |
+
"language_model.model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 337 |
+
"language_model.model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 338 |
+
"language_model.model.layers.31.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 339 |
+
"language_model.model.layers.31.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 340 |
+
"language_model.model.layers.31.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 341 |
+
"language_model.model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 342 |
+
"language_model.model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 343 |
+
"language_model.model.layers.31.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 344 |
+
"language_model.model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 345 |
+
"language_model.model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 346 |
+
"language_model.model.layers.32.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 347 |
+
"language_model.model.layers.32.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 348 |
+
"language_model.model.layers.32.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 349 |
+
"language_model.model.layers.32.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 350 |
+
"language_model.model.layers.32.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 351 |
+
"language_model.model.layers.32.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 352 |
+
"language_model.model.layers.32.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 353 |
+
"language_model.model.layers.32.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 354 |
+
"language_model.model.layers.32.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 355 |
+
"language_model.model.layers.32.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 356 |
+
"language_model.model.layers.32.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 357 |
+
"language_model.model.layers.32.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 358 |
+
"language_model.model.layers.32.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 359 |
+
"language_model.model.layers.33.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 360 |
+
"language_model.model.layers.33.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
| 361 |
+
"language_model.model.layers.33.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
| 362 |
+
"language_model.model.layers.33.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
| 363 |
+
"language_model.model.layers.33.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 364 |
+
"language_model.model.layers.33.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 365 |
+
"language_model.model.layers.33.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 366 |
+
"language_model.model.layers.33.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
|
| 367 |
+
"language_model.model.layers.33.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
| 368 |
+
"language_model.model.layers.33.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
| 369 |
+
"language_model.model.layers.33.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
|
| 370 |
+
"language_model.model.layers.33.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
| 371 |
+
"language_model.model.layers.33.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
| 372 |
+
"language_model.model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 373 |
+
"language_model.model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 374 |
+
"language_model.model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 375 |
+
"language_model.model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 376 |
+
"language_model.model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 377 |
+
"language_model.model.layers.4.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 378 |
+
"language_model.model.layers.4.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 379 |
+
"language_model.model.layers.4.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 380 |
+
"language_model.model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 381 |
+
"language_model.model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 382 |
+
"language_model.model.layers.4.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 383 |
+
"language_model.model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 384 |
+
"language_model.model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 385 |
+
"language_model.model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 386 |
+
"language_model.model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 387 |
+
"language_model.model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 388 |
+
"language_model.model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 389 |
+
"language_model.model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 390 |
+
"language_model.model.layers.5.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 391 |
+
"language_model.model.layers.5.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 392 |
+
"language_model.model.layers.5.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 393 |
+
"language_model.model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 394 |
+
"language_model.model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 395 |
+
"language_model.model.layers.5.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 396 |
+
"language_model.model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 397 |
+
"language_model.model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 398 |
+
"language_model.model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 399 |
+
"language_model.model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 400 |
+
"language_model.model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 401 |
+
"language_model.model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 402 |
+
"language_model.model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 403 |
+
"language_model.model.layers.6.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 404 |
+
"language_model.model.layers.6.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 405 |
+
"language_model.model.layers.6.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 406 |
+
"language_model.model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 407 |
+
"language_model.model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 408 |
+
"language_model.model.layers.6.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 409 |
+
"language_model.model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 410 |
+
"language_model.model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 411 |
+
"language_model.model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 412 |
+
"language_model.model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 413 |
+
"language_model.model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 414 |
+
"language_model.model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 415 |
+
"language_model.model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 416 |
+
"language_model.model.layers.7.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 417 |
+
"language_model.model.layers.7.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 418 |
+
"language_model.model.layers.7.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 419 |
+
"language_model.model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 420 |
+
"language_model.model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 421 |
+
"language_model.model.layers.7.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 422 |
+
"language_model.model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 423 |
+
"language_model.model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 424 |
+
"language_model.model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 425 |
+
"language_model.model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 426 |
+
"language_model.model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 427 |
+
"language_model.model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 428 |
+
"language_model.model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 429 |
+
"language_model.model.layers.8.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 430 |
+
"language_model.model.layers.8.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 431 |
+
"language_model.model.layers.8.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 432 |
+
"language_model.model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 433 |
+
"language_model.model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 434 |
+
"language_model.model.layers.8.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 435 |
+
"language_model.model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 436 |
+
"language_model.model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 437 |
+
"language_model.model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 438 |
+
"language_model.model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
| 439 |
+
"language_model.model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
| 440 |
+
"language_model.model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
| 441 |
+
"language_model.model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 442 |
+
"language_model.model.layers.9.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 443 |
+
"language_model.model.layers.9.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 444 |
+
"language_model.model.layers.9.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
|
| 445 |
+
"language_model.model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 446 |
+
"language_model.model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
| 447 |
+
"language_model.model.layers.9.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
|
| 448 |
+
"language_model.model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 449 |
+
"language_model.model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 450 |
+
"language_model.model.norm.weight": "model-00002-of-00002.safetensors",
|
| 451 |
+
"multi_modal_projector.mm_input_projection_weight": "model-00001-of-00002.safetensors",
|
| 452 |
+
"multi_modal_projector.mm_soft_emb_norm.weight": "model-00001-of-00002.safetensors",
|
| 453 |
+
"vision_tower.vision_model.embeddings.patch_embedding.bias": "model-00001-of-00002.safetensors",
|
| 454 |
+
"vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00001-of-00002.safetensors",
|
| 455 |
+
"vision_tower.vision_model.embeddings.position_embedding.weight": "model-00001-of-00002.safetensors",
|
| 456 |
+
"vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00001-of-00002.safetensors",
|
| 457 |
+
"vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00001-of-00002.safetensors",
|
| 458 |
+
"vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00001-of-00002.safetensors",
|
| 459 |
+
"vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00001-of-00002.safetensors",
|
| 460 |
+
"vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
| 461 |
+
"vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 462 |
+
"vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
| 463 |
+
"vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 464 |
+
"vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 465 |
+
"vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 466 |
+
"vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
|
| 467 |
+
"vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
|
| 468 |
+
"vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
| 469 |
+
"vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 470 |
+
"vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
| 471 |
+
"vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 472 |
+
"vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00001-of-00002.safetensors",
|
| 473 |
+
"vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00001-of-00002.safetensors",
|
| 474 |
+
"vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00001-of-00002.safetensors",
|
| 475 |
+
"vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00001-of-00002.safetensors",
|
| 476 |
+
"vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
| 477 |
+
"vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 478 |
+
"vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
| 479 |
+
"vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 480 |
+
"vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 481 |
+
"vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 482 |
+
"vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
|
| 483 |
+
"vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
|
| 484 |
+
"vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
| 485 |
+
"vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 486 |
+
"vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
| 487 |
+
"vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 488 |
+
"vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00001-of-00002.safetensors",
|
| 489 |
+
"vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00001-of-00002.safetensors",
|
| 490 |
+
"vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00001-of-00002.safetensors",
|
| 491 |
+
"vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00001-of-00002.safetensors",
|
| 492 |
+
"vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
| 493 |
+
"vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 494 |
+
"vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
| 495 |
+
"vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 496 |
+
"vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 497 |
+
"vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 498 |
+
"vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
|
| 499 |
+
"vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
|
| 500 |
+
"vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
| 501 |
+
"vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 502 |
+
"vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
| 503 |
+
"vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 504 |
+
"vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00001-of-00002.safetensors",
|
| 505 |
+
"vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00001-of-00002.safetensors",
|
| 506 |
+
"vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00001-of-00002.safetensors",
|
| 507 |
+
"vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00001-of-00002.safetensors",
|
| 508 |
+
"vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
| 509 |
+
"vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 510 |
+
"vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
| 511 |
+
"vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 512 |
+
"vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 513 |
+
"vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 514 |
+
"vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
|
| 515 |
+
"vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
|
| 516 |
+
"vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
| 517 |
+
"vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 518 |
+
"vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
| 519 |
+
"vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 520 |
+
"vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00001-of-00002.safetensors",
|
| 521 |
+
"vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00001-of-00002.safetensors",
|
| 522 |
+
"vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00001-of-00002.safetensors",
|
| 523 |
+
"vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00001-of-00002.safetensors",
|
| 524 |
+
"vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
| 525 |
+
"vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 526 |
+
"vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
| 527 |
+
"vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 528 |
+
"vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 529 |
+
"vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 530 |
+
"vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
|
| 531 |
+
"vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
|
| 532 |
+
"vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
| 533 |
+
"vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 534 |
+
"vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
| 535 |
+
"vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 536 |
+
"vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00001-of-00002.safetensors",
|
| 537 |
+
"vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00001-of-00002.safetensors",
|
| 538 |
+
"vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00001-of-00002.safetensors",
|
| 539 |
+
"vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00001-of-00002.safetensors",
|
| 540 |
+
"vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
| 541 |
+
"vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 542 |
+
"vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
| 543 |
+
"vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 544 |
+
"vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 545 |
+
"vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 546 |
+
"vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
|
| 547 |
+
"vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
|
| 548 |
+
"vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
| 549 |
+
"vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 550 |
+
"vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
| 551 |
+
"vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 552 |
+
"vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00001-of-00002.safetensors",
|
| 553 |
+
"vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00001-of-00002.safetensors",
|
| 554 |
+
"vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00001-of-00002.safetensors",
|
| 555 |
+
"vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00001-of-00002.safetensors",
|
| 556 |
+
"vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
| 557 |
+
"vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 558 |
+
"vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
| 559 |
+
"vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 560 |
+
"vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 561 |
+
"vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 562 |
+
"vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
|
| 563 |
+
"vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
|
| 564 |
+
"vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
| 565 |
+
"vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 566 |
+
"vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
| 567 |
+
"vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 568 |
+
"vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00001-of-00002.safetensors",
|
| 569 |
+
"vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00001-of-00002.safetensors",
|
| 570 |
+
"vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00001-of-00002.safetensors",
|
| 571 |
+
"vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00001-of-00002.safetensors",
|
| 572 |
+
"vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
| 573 |
+
"vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 574 |
+
"vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
| 575 |
+
"vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 576 |
+
"vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 577 |
+
"vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 578 |
+
"vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
|
| 579 |
+
"vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
|
| 580 |
+
"vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
| 581 |
+
"vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 582 |
+
"vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
| 583 |
+
"vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 584 |
+
"vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00001-of-00002.safetensors",
|
| 585 |
+
"vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00001-of-00002.safetensors",
|
| 586 |
+
"vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00001-of-00002.safetensors",
|
| 587 |
+
"vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00001-of-00002.safetensors",
|
| 588 |
+
"vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
| 589 |
+
"vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 590 |
+
"vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
| 591 |
+
"vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 592 |
+
"vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 593 |
+
"vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 594 |
+
"vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
|
| 595 |
+
"vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
|
| 596 |
+
"vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
| 597 |
+
"vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 598 |
+
"vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
| 599 |
+
"vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 600 |
+
"vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00001-of-00002.safetensors",
|
| 601 |
+
"vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00001-of-00002.safetensors",
|
| 602 |
+
"vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00001-of-00002.safetensors",
|
| 603 |
+
"vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00001-of-00002.safetensors",
|
| 604 |
+
"vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
| 605 |
+
"vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 606 |
+
"vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
| 607 |
+
"vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 608 |
+
"vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 609 |
+
"vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 610 |
+
"vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
|
| 611 |
+
"vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
|
| 612 |
+
"vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
| 613 |
+
"vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 614 |
+
"vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
| 615 |
+
"vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 616 |
+
"vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00001-of-00002.safetensors",
|
| 617 |
+
"vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00001-of-00002.safetensors",
|
| 618 |
+
"vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00001-of-00002.safetensors",
|
| 619 |
+
"vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00001-of-00002.safetensors",
|
| 620 |
+
"vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
| 621 |
+
"vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 622 |
+
"vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
| 623 |
+
"vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 624 |
+
"vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 625 |
+
"vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 626 |
+
"vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
|
| 627 |
+
"vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
|
| 628 |
+
"vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
| 629 |
+
"vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 630 |
+
"vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
| 631 |
+
"vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 632 |
+
"vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00001-of-00002.safetensors",
|
| 633 |
+
"vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00001-of-00002.safetensors",
|
| 634 |
+
"vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00001-of-00002.safetensors",
|
| 635 |
+
"vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00001-of-00002.safetensors",
|
| 636 |
+
"vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
| 637 |
+
"vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 638 |
+
"vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
| 639 |
+
"vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 640 |
+
"vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 641 |
+
"vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 642 |
+
"vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
|
| 643 |
+
"vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
|
| 644 |
+
"vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
| 645 |
+
"vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 646 |
+
"vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
| 647 |
+
"vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 648 |
+
"vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00001-of-00002.safetensors",
|
| 649 |
+
"vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00001-of-00002.safetensors",
|
| 650 |
+
"vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00001-of-00002.safetensors",
|
| 651 |
+
"vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00001-of-00002.safetensors",
|
| 652 |
+
"vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
| 653 |
+
"vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 654 |
+
"vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
| 655 |
+
"vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 656 |
+
"vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 657 |
+
"vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 658 |
+
"vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
|
| 659 |
+
"vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
|
| 660 |
+
"vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
| 661 |
+
"vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 662 |
+
"vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
| 663 |
+
"vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 664 |
+
"vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00001-of-00002.safetensors",
|
| 665 |
+
"vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00001-of-00002.safetensors",
|
| 666 |
+
"vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00001-of-00002.safetensors",
|
| 667 |
+
"vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00001-of-00002.safetensors",
|
| 668 |
+
"vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
| 669 |
+
"vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 670 |
+
"vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
| 671 |
+
"vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 672 |
+
"vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 673 |
+
"vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 674 |
+
"vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
|
| 675 |
+
"vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
|
| 676 |
+
"vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
| 677 |
+
"vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 678 |
+
"vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
| 679 |
+
"vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 680 |
+
"vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00001-of-00002.safetensors",
|
| 681 |
+
"vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00001-of-00002.safetensors",
|
| 682 |
+
"vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00001-of-00002.safetensors",
|
| 683 |
+
"vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00001-of-00002.safetensors",
|
| 684 |
+
"vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
| 685 |
+
"vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 686 |
+
"vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
| 687 |
+
"vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 688 |
+
"vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 689 |
+
"vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 690 |
+
"vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
|
| 691 |
+
"vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
|
| 692 |
+
"vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
| 693 |
+
"vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 694 |
+
"vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
| 695 |
+
"vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 696 |
+
"vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00001-of-00002.safetensors",
|
| 697 |
+
"vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00001-of-00002.safetensors",
|
| 698 |
+
"vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00001-of-00002.safetensors",
|
| 699 |
+
"vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00001-of-00002.safetensors",
|
| 700 |
+
"vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
| 701 |
+
"vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 702 |
+
"vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
| 703 |
+
"vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 704 |
+
"vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 705 |
+
"vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 706 |
+
"vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
|
| 707 |
+
"vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
|
| 708 |
+
"vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
| 709 |
+
"vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 710 |
+
"vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
| 711 |
+
"vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 712 |
+
"vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00001-of-00002.safetensors",
|
| 713 |
+
"vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00001-of-00002.safetensors",
|
| 714 |
+
"vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00001-of-00002.safetensors",
|
| 715 |
+
"vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00001-of-00002.safetensors",
|
| 716 |
+
"vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
| 717 |
+
"vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 718 |
+
"vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
| 719 |
+
"vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 720 |
+
"vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 721 |
+
"vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 722 |
+
"vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
|
| 723 |
+
"vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
|
| 724 |
+
"vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
| 725 |
+
"vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 726 |
+
"vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
| 727 |
+
"vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 728 |
+
"vision_tower.vision_model.encoder.layers.24.layer_norm1.bias": "model-00001-of-00002.safetensors",
|
| 729 |
+
"vision_tower.vision_model.encoder.layers.24.layer_norm1.weight": "model-00001-of-00002.safetensors",
|
| 730 |
+
"vision_tower.vision_model.encoder.layers.24.layer_norm2.bias": "model-00001-of-00002.safetensors",
|
| 731 |
+
"vision_tower.vision_model.encoder.layers.24.layer_norm2.weight": "model-00001-of-00002.safetensors",
|
| 732 |
+
"vision_tower.vision_model.encoder.layers.24.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
| 733 |
+
"vision_tower.vision_model.encoder.layers.24.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 734 |
+
"vision_tower.vision_model.encoder.layers.24.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
| 735 |
+
"vision_tower.vision_model.encoder.layers.24.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 736 |
+
"vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 737 |
+
"vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 738 |
+
"vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
|
| 739 |
+
"vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
|
| 740 |
+
"vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
| 741 |
+
"vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 742 |
+
"vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
| 743 |
+
"vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 744 |
+
"vision_tower.vision_model.encoder.layers.25.layer_norm1.bias": "model-00001-of-00002.safetensors",
|
| 745 |
+
"vision_tower.vision_model.encoder.layers.25.layer_norm1.weight": "model-00001-of-00002.safetensors",
|
| 746 |
+
"vision_tower.vision_model.encoder.layers.25.layer_norm2.bias": "model-00001-of-00002.safetensors",
|
| 747 |
+
"vision_tower.vision_model.encoder.layers.25.layer_norm2.weight": "model-00001-of-00002.safetensors",
|
| 748 |
+
"vision_tower.vision_model.encoder.layers.25.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
| 749 |
+
"vision_tower.vision_model.encoder.layers.25.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 750 |
+
"vision_tower.vision_model.encoder.layers.25.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
| 751 |
+
"vision_tower.vision_model.encoder.layers.25.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 752 |
+
"vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 753 |
+
"vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 754 |
+
"vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
|
| 755 |
+
"vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
|
| 756 |
+
"vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
| 757 |
+
"vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 758 |
+
"vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
| 759 |
+
"vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 760 |
+
"vision_tower.vision_model.encoder.layers.26.layer_norm1.bias": "model-00001-of-00002.safetensors",
|
| 761 |
+
"vision_tower.vision_model.encoder.layers.26.layer_norm1.weight": "model-00001-of-00002.safetensors",
|
| 762 |
+
"vision_tower.vision_model.encoder.layers.26.layer_norm2.bias": "model-00001-of-00002.safetensors",
|
| 763 |
+
"vision_tower.vision_model.encoder.layers.26.layer_norm2.weight": "model-00001-of-00002.safetensors",
|
| 764 |
+
"vision_tower.vision_model.encoder.layers.26.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
| 765 |
+
"vision_tower.vision_model.encoder.layers.26.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 766 |
+
"vision_tower.vision_model.encoder.layers.26.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
| 767 |
+
"vision_tower.vision_model.encoder.layers.26.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 768 |
+
"vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 769 |
+
"vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 770 |
+
"vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
|
| 771 |
+
"vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
|
| 772 |
+
"vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
| 773 |
+
"vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 774 |
+
"vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
| 775 |
+
"vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 776 |
+
"vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00001-of-00002.safetensors",
|
| 777 |
+
"vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00001-of-00002.safetensors",
|
| 778 |
+
"vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00001-of-00002.safetensors",
|
| 779 |
+
"vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00001-of-00002.safetensors",
|
| 780 |
+
"vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
| 781 |
+
"vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 782 |
+
"vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
| 783 |
+
"vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 784 |
+
"vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 785 |
+
"vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 786 |
+
"vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
|
| 787 |
+
"vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
|
| 788 |
+
"vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
| 789 |
+
"vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 790 |
+
"vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
| 791 |
+
"vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 792 |
+
"vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00001-of-00002.safetensors",
|
| 793 |
+
"vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00001-of-00002.safetensors",
|
| 794 |
+
"vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00001-of-00002.safetensors",
|
| 795 |
+
"vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00001-of-00002.safetensors",
|
| 796 |
+
"vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
| 797 |
+
"vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 798 |
+
"vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
| 799 |
+
"vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 800 |
+
"vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 801 |
+
"vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 802 |
+
"vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
|
| 803 |
+
"vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
|
| 804 |
+
"vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
| 805 |
+
"vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 806 |
+
"vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
| 807 |
+
"vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 808 |
+
"vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00001-of-00002.safetensors",
|
| 809 |
+
"vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00001-of-00002.safetensors",
|
| 810 |
+
"vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00001-of-00002.safetensors",
|
| 811 |
+
"vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00001-of-00002.safetensors",
|
| 812 |
+
"vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
| 813 |
+
"vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 814 |
+
"vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
| 815 |
+
"vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 816 |
+
"vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 817 |
+
"vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 818 |
+
"vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
|
| 819 |
+
"vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
|
| 820 |
+
"vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
| 821 |
+
"vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 822 |
+
"vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
| 823 |
+
"vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 824 |
+
"vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00001-of-00002.safetensors",
|
| 825 |
+
"vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00001-of-00002.safetensors",
|
| 826 |
+
"vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00001-of-00002.safetensors",
|
| 827 |
+
"vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00001-of-00002.safetensors",
|
| 828 |
+
"vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
| 829 |
+
"vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 830 |
+
"vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
| 831 |
+
"vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 832 |
+
"vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 833 |
+
"vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 834 |
+
"vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
|
| 835 |
+
"vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
|
| 836 |
+
"vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
| 837 |
+
"vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 838 |
+
"vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
| 839 |
+
"vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 840 |
+
"vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00001-of-00002.safetensors",
|
| 841 |
+
"vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00001-of-00002.safetensors",
|
| 842 |
+
"vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00001-of-00002.safetensors",
|
| 843 |
+
"vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00001-of-00002.safetensors",
|
| 844 |
+
"vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
| 845 |
+
"vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 846 |
+
"vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
| 847 |
+
"vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 848 |
+
"vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 849 |
+
"vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 850 |
+
"vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
|
| 851 |
+
"vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
|
| 852 |
+
"vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
| 853 |
+
"vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 854 |
+
"vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
| 855 |
+
"vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 856 |
+
"vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00001-of-00002.safetensors",
|
| 857 |
+
"vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00001-of-00002.safetensors",
|
| 858 |
+
"vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00001-of-00002.safetensors",
|
| 859 |
+
"vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00001-of-00002.safetensors",
|
| 860 |
+
"vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
| 861 |
+
"vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 862 |
+
"vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
| 863 |
+
"vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 864 |
+
"vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 865 |
+
"vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 866 |
+
"vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
|
| 867 |
+
"vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
|
| 868 |
+
"vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
| 869 |
+
"vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 870 |
+
"vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
| 871 |
+
"vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 872 |
+
"vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00001-of-00002.safetensors",
|
| 873 |
+
"vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00001-of-00002.safetensors",
|
| 874 |
+
"vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00001-of-00002.safetensors",
|
| 875 |
+
"vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00001-of-00002.safetensors",
|
| 876 |
+
"vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
| 877 |
+
"vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
| 878 |
+
"vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
| 879 |
+
"vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
| 880 |
+
"vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 881 |
+
"vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
| 882 |
+
"vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
|
| 883 |
+
"vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
|
| 884 |
+
"vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
| 885 |
+
"vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
| 886 |
+
"vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
| 887 |
+
"vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
| 888 |
+
"vision_tower.vision_model.post_layernorm.bias": "model-00001-of-00002.safetensors",
|
| 889 |
+
"vision_tower.vision_model.post_layernorm.weight": "model-00001-of-00002.safetensors"
|
| 890 |
+
}
|
| 891 |
+
}
|
preprocessor_config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"do_convert_rgb": null,
|
| 3 |
+
"do_normalize": true,
|
| 4 |
+
"do_pan_and_scan": null,
|
| 5 |
+
"do_rescale": true,
|
| 6 |
+
"do_resize": true,
|
| 7 |
+
"image_mean": [
|
| 8 |
+
0.5,
|
| 9 |
+
0.5,
|
| 10 |
+
0.5
|
| 11 |
+
],
|
| 12 |
+
"image_processor_type": "Gemma3ImageProcessor",
|
| 13 |
+
"image_seq_length": 256,
|
| 14 |
+
"image_std": [
|
| 15 |
+
0.5,
|
| 16 |
+
0.5,
|
| 17 |
+
0.5
|
| 18 |
+
],
|
| 19 |
+
"pan_and_scan_max_num_crops": null,
|
| 20 |
+
"pan_and_scan_min_crop_size": null,
|
| 21 |
+
"pan_and_scan_min_ratio_to_activate": null,
|
| 22 |
+
"processor_class": "Gemma3Processor",
|
| 23 |
+
"resample": 2,
|
| 24 |
+
"rescale_factor": 0.00392156862745098,
|
| 25 |
+
"size": {
|
| 26 |
+
"height": 896,
|
| 27 |
+
"width": 896
|
| 28 |
+
}
|
| 29 |
+
}
|
processor_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"image_seq_length": 256,
|
| 3 |
+
"processor_class": "Gemma3Processor"
|
| 4 |
+
}
|
runs/Nov24_00-10-02_jzxh298/events.out.tfevents.1763939502.jzxh298.1912876.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:133e4f210283a16b10e10502f85c480f116b833d4e37704964defe4479573088
|
| 3 |
+
size 41816
|
slurm.out
ADDED
|
@@ -0,0 +1,382 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 0 |
0%| | 0/711 [00:00<?, ?it/s]
|
| 1 |
0%| | 1/711 [03:10<37:32:11, 190.33s/it]
|
| 2 |
0%| | 2/711 [03:14<15:52:57, 80.65s/it]
|
| 3 |
0%| | 3/711 [03:16<8:49:26, 44.87s/it]
|
| 4 |
1%| | 4/711 [03:18<5:30:58, 28.09s/it]
|
| 5 |
1%| | 5/711 [03:21<3:41:20, 18.81s/it]
|
| 6 |
1%| | 6/711 [03:23<2:35:25, 13.23s/it]
|
| 7 |
1%| | 7/711 [03:25<1:53:16, 9.65s/it]
|
| 8 |
1%| | 8/711 [03:28<1:25:41, 7.31s/it]
|
| 9 |
1%|β | 9/711 [03:30<1:07:15, 5.75s/it]
|
| 10 |
1%|β | 10/711 [03:32<54:44, 4.68s/it]
|
| 11 |
|
| 12 |
1%|β | 10/711 [03:32<54:44, 4.68s/it]
|
| 13 |
2%|β | 11/711 [03:35<46:17, 3.97s/it]
|
| 14 |
2%|β | 12/711 [03:37<40:18, 3.46s/it]
|
| 15 |
2%|β | 13/711 [03:39<36:10, 3.11s/it]
|
| 16 |
2%|β | 14/711 [03:42<34:11, 2.94s/it]
|
| 17 |
2%|β | 15/711 [03:44<32:06, 2.77s/it]
|
| 18 |
2%|β | 16/711 [03:46<30:28, 2.63s/it]
|
| 19 |
2%|β | 17/711 [03:49<29:24, 2.54s/it]
|
| 20 |
3%|β
|
|
|
|
|
|
|
|
|
|
| 21 |
3%|β | 19/711 [03:53<28:02, 2.43s/it]
|
| 22 |
3%|β | 20/711 [03:56<27:33, 2.39s/it]
|
| 23 |
|
| 24 |
3%|β | 20/711 [03:56<27:33, 2.39s/it]
|
| 25 |
3%|β | 21/711 [03:58<27:12, 2.37s/it]
|
| 26 |
3%|β | 22/711 [04:00<26:57, 2.35s/it]
|
| 27 |
3%|β | 23/711 [04:03<26:45, 2.33s/it]
|
| 28 |
3%|β | 24/711 [04:05<26:37, 2.33s/it]
|
| 29 |
4%|β | 25/711 [04:07<26:33, 2.32s/it]
|
| 30 |
4%|β | 26/711 [04:10<26:26, 2.32s/it]
|
| 31 |
4%|β | 27/711 [04:12<26:21, 2.31s/it]
|
| 32 |
4%|β | 28/711 [04:14<26:17, 2.31s/it]
|
| 33 |
4%|β | 29/711 [04:17<26:14, 2.31s/it]
|
| 34 |
4%|β | 30/711 [04:19<26:39, 2.35s/it]
|
| 35 |
|
| 36 |
4%|β | 30/711 [04:19<26:39, 2.35s/it]
|
| 37 |
4%|β | 31/711 [04:21<26:38, 2.35s/it]
|
| 38 |
5%|β | 32/711 [04:24<26:30, 2.34s/it]
|
| 39 |
5%|β | 33/711 [04:26<26:21, 2.33s/it]
|
| 40 |
5%|β | 34/
|
|
|
|
|
|
|
|
|
|
| 41 |
5%|β | 35/711 [04:31<26:08, 2.32s/it]
|
| 42 |
5%|β | 36/711 [04:33<26:03, 2.32s/it]
|
| 43 |
5%|β | 37/711 [04:35<26:01, 2.32s/it]
|
| 44 |
5%|β | 38/711 [04:38<25:57, 2.31s/it]
|
| 45 |
5%|β | 39/711 [04:40<25:55, 2.31s/it]
|
| 46 |
6%|β | 40/711 [04:42<25:53, 2.31s/it]
|
| 47 |
|
| 48 |
6%|β | 40/711 [04:42<25:53, 2.31s/it]
|
| 49 |
6%|β | 41/711 [04:44<25:50, 2.31s/it]
|
| 50 |
6%|β | 42/711 [04:47<25:47, 2.31s/it]
|
| 51 |
6%|β | 43/711 [04:49<25:50, 2.32s/it]
|
| 52 |
6%|β | 44/711 [04:51<25:45, 2.32s/it]
|
| 53 |
6%|β | 45/711 [04:54<25:40, 2.31s/it]
|
| 54 |
6%|β | 46/711 [04:56<25:51, 2.33s/it]
|
| 55 |
7%|β | 47/711 [04:58<25:53, 2.34s/it]
|
| 56 |
7%|β | 48/711 [05:01<25:46, 2.33s/it]
|
| 57 |
7%|β | 49/711 [05:03<25:41, 2.33s/it]
|
| 58 |
7%|β | 50/711 [05:05<25:40, 2.33s/it]
|
| 59 |
|
| 60 |
7%|β | 50/711 [0
|
|
|
|
|
|
|
| 61 |
7%|β | 51/711 [05:08<25:33, 2.32s/it]
|
| 62 |
7%|β | 52/711 [05:10<25:29, 2.32s/it]
|
| 63 |
7%|β | 53/711 [05:12<25:34, 2.33s/it]
|
| 64 |
8%|β | 54/711 [05:15<25:28, 2.33s/it]
|
| 65 |
8%|β | 55/711 [05:17<25:23, 2.32s/it]
|
| 66 |
8%|β | 56/711 [05:19<25:34, 2.34s/it]
|
| 67 |
8%|β | 57/711 [05:22<25:26, 2.33s/it]
|
| 68 |
8%|β | 58/711 [05:24<25:37, 2.35s/it]
|
| 69 |
8%|β | 59/711 [05:26<25:28, 2.34s/it]
|
| 70 |
8%|β | 60/711 [05:29<25:19, 2.33s/it]
|
| 71 |
|
| 72 |
8%|β | 60/711 [05:29<25:19, 2.33s/it]
|
| 73 |
9%|β | 61/711 [05:31<25:11, 2.33s/it]
|
| 74 |
9%|β | 62/711 [05:33<25:19, 2.34s/it]
|
| 75 |
9%|β | 63/711 [05:36<25:30, 2.36s/it]
|
| 76 |
9%|β | 64/711 [05:38<25:18, 2.35s/it]
|
| 77 |
9%|β | 65/711 [05:40<25:08, 2.33s/it]
|
| 78 |
9%|β | 66/711 [05:43<25:00, 2.33s/it]
|
| 79 |
9%|β | 67/711 [05:45<24:55, 2.32s/it]
|
| 80 |
10%|β | 68/711 [05:47<
|
|
|
|
|
|
|
|
|
|
| 81 |
10%|β | 69/711 [05:50<24:46, 2.31s/it]
|
| 82 |
10%|β | 70/711 [05:52<24:42, 2.31s/it]
|
| 83 |
|
| 84 |
10%|β | 70/711 [05:52<24:42, 2.31s/it]
|
| 85 |
10%|β | 71/711 [05:54<24:39, 2.31s/it]
|
| 86 |
10%|β | 72/711 [05:57<24:35, 2.31s/it]
|
| 87 |
10%|β | 73/711 [05:59<24:32, 2.31s/it]
|
| 88 |
10%|β | 74/711 [06:01<24:29, 2.31s/it]
|
| 89 |
11%|β | 75/711 [06:04<24:26, 2.31s/it]
|
| 90 |
11%|β | 76/711 [06:06<24:23, 2.30s/it]
|
| 91 |
11%|β | 77/711 [06:08<24:20, 2.30s/it]
|
| 92 |
11%|β | 78/711 [06:11<24:30, 2.32s/it]
|
| 93 |
11%|β | 79/711 [06:13<24:35, 2.33s/it]
|
| 94 |
11%|ββ | 80/711 [06:15<24:32, 2.33s/it]
|
| 95 |
|
| 96 |
11%|ββ | 80/711 [06:15<24:32, 2.33s/it]
|
| 97 |
11%|ββ | 81/711 [06:18<24:42, 2.35s/it]
|
| 98 |
12%|ββ | 82/711 [06:20<24:32, 2.34s/it]
|
| 99 |
12%|ββ | 83/711 [06:22<24:38, 2.35s/it]
|
| 100 |
12%|ββ | 84/711 [
|
|
|
|
|
|
|
| 101 |
12%|ββ | 85/711 [06:27<24:18, 2.33s/it]
|
| 102 |
12%|ββ | 86/711 [06:29<24:24, 2.34s/it]
|
| 103 |
12%|ββ | 87/711 [06:32<24:14, 2.33s/it]
|
| 104 |
12%|ββ | 88/711 [06:34<24:07, 2.32s/it]
|
| 105 |
13%|ββ | 89/711 [06:36<24:02, 2.32s/it]
|
| 106 |
13%|ββ | 90/711 [06:39<23:59, 2.32s/it]
|
| 107 |
|
| 108 |
13%|ββ | 90/711 [06:39<23:59, 2.32s/it]
|
| 109 |
13%|ββ | 91/711 [06:41<23:57, 2.32s/it]
|
| 110 |
13%|ββ | 92/711 [06:43<23:58, 2.32s/it]
|
| 111 |
13%|ββ | 93/711 [06:46<24:09, 2.35s/it]
|
| 112 |
13%|ββ | 94/711 [06:48<24:28, 2.38s/it]
|
| 113 |
13%|ββ | 95/711 [06:50<24:37, 2.40s/it]
|
| 114 |
14%|ββ | 96/711 [06:53<24:19, 2.37s/it]
|
| 115 |
14%|ββ | 97/711 [06:55<24:06, 2.36s/it]
|
| 116 |
14%|ββ | 98/711 [06:58<24:10, 2.37s/it]
|
| 117 |
14%|ββ | 99/711 [07:00<23:57, 2.35s/it]
|
| 118 |
14%|ββ | 100/711 [07:02<23:47, 2.34s/it]
|
| 119 |
|
|
|
|
|
|
|
|
|
|
| 120 |
14%|ββ | 100/711 [07:02<23:47, 2.34s/it]
|
| 121 |
14%|ββ | 101/711 [07:04<23:40, 2.33s/it]
|
| 122 |
14%|ββ | 102/711 [07:07<23:35, 2.32s/it]
|
| 123 |
14%|ββ | 103/711 [07:09<23:30, 2.32s/it]
|
| 124 |
15%|ββ | 104/711 [07:11<23:26, 2.32s/it]
|
| 125 |
15%|ββ | 105/711 [07:14<23:23, 2.32s/it]
|
| 126 |
15%|ββ | 106/711 [07:16<23:21, 2.32s/it]
|
| 127 |
15%|ββ | 107/711 [07:18<23:18, 2.32s/it]
|
| 128 |
15%|ββ | 108/711 [07:21<23:13, 2.31s/it]
|
| 129 |
15%|ββ | 109/711 [07:23<23:10, 2.31s/it]
|
| 130 |
15%|ββ | 110/711 [07:25<23:20, 2.33s/it]
|
| 131 |
|
| 132 |
15%|ββ | 110/711 [07:25<23:20, 2.33s/it]
|
| 133 |
16%|ββ | 111/711 [07:28<23:30, 2.35s/it]
|
| 134 |
16%|ββ | 112/711 [07:30<23:23, 2.34s/it]
|
| 135 |
16%|ββ | 113/711 [07:32<23:27, 2.35s/it]
|
| 136 |
16%|ββ | 114/711 [07:35<23:16, 2.34s/it]
|
| 137 |
16%|ββ | 115/711 [07:37<23:07, 2.33s/it]
|
| 138 |
16%|ββ | 116/711 [07:39<23:01, 2.32s/it
|
|
|
|
|
|
|
|
|
|
| 139 |
16%|ββ | 117/711 [07:42<22:57, 2.32s/it]
|
| 140 |
17%|ββ | 118/711 [07:44<22:52, 2.31s/it]
|
| 141 |
17%|ββ | 119/711 [07:46<22:48, 2.31s/it]
|
| 142 |
17%|ββ | 120/711 [07:49<22:46, 2.31s/it]
|
| 143 |
|
| 144 |
17%|ββ | 120/711 [07:49<22:46, 2.31s/it]
|
| 145 |
17%|ββ | 121/711 [07:51<22:45, 2.31s/it]
|
| 146 |
17%|ββ | 122/711 [07:53<22:45, 2.32s/it]
|
| 147 |
17%|ββ | 123/711 [07:56<22:42, 2.32s/it]
|
| 148 |
17%|ββ | 124/711 [07:58<22:42, 2.32s/it]
|
| 149 |
18%|ββ | 125/711 [08:00<22:52, 2.34s/it]
|
| 150 |
18%|ββ | 126/711 [08:03<23:14, 2.38s/it]
|
| 151 |
18%|ββ | 127/711 [08:05<23:21, 2.40s/it]
|
| 152 |
18%|ββ | 128/711 [08:07<23:04, 2.38s/it]
|
| 153 |
18%|ββ | 129/711 [08:10<22:51, 2.36s/it]
|
| 154 |
18%|ββ | 130/711 [08:12<22:42, 2.35s/it]
|
| 155 |
|
| 156 |
18%|ββ | 130/711 [08:12<22:42, 2.35s/it]
|
| 157 |
18%|ββ | 131/711 [08:14<22:36, 2.34s/it]
|
| 158 |
1
|
|
|
|
|
|
|
| 159 |
19%|ββ | 133/711 [08:19<22:31, 2.34s/it]
|
| 160 |
19%|ββ | 134/711 [08:21<22:25, 2.33s/it]
|
| 161 |
19%|ββ | 135/711 [08:24<22:19, 2.33s/it]
|
| 162 |
19%|ββ | 136/711 [08:26<22:15, 2.32s/it]
|
| 163 |
19%|ββ | 137/711 [08:28<22:10, 2.32s/it]
|
| 164 |
19%|ββ | 138/711 [08:31<22:05, 2.31s/it]
|
| 165 |
20%|ββ | 139/711 [08:33<22:00, 2.31s/it]
|
| 166 |
20%|ββ | 140/711 [08:35<21:57, 2.31s/it]
|
| 167 |
|
| 168 |
20%|ββ | 140/711 [08:35<21:57, 2.31s/it]
|
| 169 |
20%|ββ | 141/711 [08:38<21:56, 2.31s/it]
|
| 170 |
20%|ββ | 142/711 [08:40<22:05, 2.33s/it]
|
| 171 |
20%|ββ | 143/711 [08:42<22:07, 2.34s/it]
|
| 172 |
20%|ββ | 144/711 [08:45<22:03, 2.34s/it]
|
| 173 |
20%|ββ | 145/711 [08:47<21:58, 2.33s/it]
|
| 174 |
21%|ββ | 146/711 [08:49<21:53, 2.32s/it]
|
| 175 |
21%|ββ | 147/711 [08:52<21:49, 2.32s/it]
|
| 176 |
21%|ββ | 148/711 [08:54<21:45, 2.32s/it]
|
| 177 |
21
|
|
|
|
|
|
|
|
|
|
| 178 |
21%|ββ | 150/711 [08:58<21:38, 2.32s/it]
|
| 179 |
|
| 180 |
21%|ββ | 150/711 [08:59<21:38, 2.32s/it]
|
| 181 |
21%|ββ | 151/711 [09:01<21:35, 2.31s/it]
|
| 182 |
21%|βββ | 152/711 [09:03<21:40, 2.33s/it]
|
| 183 |
22%|βββ | 153/711 [09:05<21:36, 2.32s/it]
|
| 184 |
22%|βββ | 154/711 [09:08<21:31, 2.32s/it]
|
| 185 |
22%|βββ | 155/711 [09:10<21:26, 2.31s/it]
|
| 186 |
22%|βββ | 156/711 [09:12<21:22, 2.31s/it]
|
| 187 |
22%|βββ | 157/711 [09:15<21:20, 2.31s/it]
|
| 188 |
22%|βββ | 158/711 [09:17<21:20, 2.31s/it]
|
| 189 |
22%|βββ | 159/711 [09:19<21:41, 2.36s/it]
|
| 190 |
23%|βββ | 160/711 [09:22<21:36, 2.35s/it]
|
| 191 |
|
| 192 |
23%|βββ | 160/711 [09:22<21:36, 2.35s/it]
|
| 193 |
23%|βββ | 161/711 [09:24<21:28, 2.34s/it]
|
| 194 |
23%|βββ | 162/711 [09:26<21:23, 2.34s/it]
|
| 195 |
23%|βββ | 163/711 [09:29
|
|
|
|
|
|
|
| 196 |
23%|βββ | 164/711 [09:31<21:12, 2.33s/it]
|
| 197 |
23%|βββ | 165/711 [09:33<21:08, 2.32s/it]
|
| 198 |
23%|βββ | 166/711 [09:36<21:17, 2.34s/it]
|
| 199 |
23%|βββ | 167/711 [09:38<21:09, 2.33s/it]
|
| 200 |
24%|βββ | 168/711 [09:40<21:04, 2.33s/it]
|
| 201 |
24%|βββ | 169/711 [09:43<20:59, 2.32s/it]
|
| 202 |
24%|βββ | 170/711 [09:45<20:54, 2.32s/it]
|
| 203 |
|
| 204 |
24%|βββ | 170/711 [09:45<20:54, 2.32s/it]
|
| 205 |
24%|βββ | 171/711 [09:47<20:52, 2.32s/it]
|
| 206 |
24%|βββ | 172/711 [09:50<20:49, 2.32s/it]
|
| 207 |
24%|βββ | 173/711 [09:52<20:46, 2.32s/it]
|
| 208 |
24%|βββ | 174/711 [09:54<20:44, 2.32s/it]
|
| 209 |
25%|βββ | 175/711 [09:57<20:48, 2.33s/it]
|
| 210 |
25%|βββ | 176/711 [09:59<20:53, 2.34s/it]
|
| 211 |
25%|βββ | 177/711 [10:01<20:46, 2.33s/it]
|
| 212 |
25%|βββ | 178/711 [10:04<20:53, 2.35s/it]
|
| 213 |
25%|βββ | 179/711 [10:06<20:45, 2.34s/it]
|
| 214 |
2
|
|
|
|
|
|
|
|
|
|
| 215 |
|
| 216 |
25%|βββ | 180/711 [10:08<20:39, 2.33s/it]
|
| 217 |
25%|βββ | 181/711 [10:11<20:33, 2.33s/it]
|
| 218 |
26%|βββ | 182/711 [10:13<20:27, 2.32s/it]
|
| 219 |
26%|βββ | 183/711 [10:15<20:23, 2.32s/it]
|
| 220 |
26%|βββ | 184/711 [10:18<20:19, 2.31s/it]
|
| 221 |
26%|βββ | 185/711 [10:20<20:15, 2.31s/it]
|
| 222 |
26%|βββ | 186/711 [10:22<20:13, 2.31s/it]
|
| 223 |
26%|βββ | 187/711 [10:25<20:09, 2.31s/it]
|
| 224 |
26%|βββ | 188/711 [10:27<20:20, 2.33s/it]
|
| 225 |
27%|βββ | 189/711 [10:29<20:14, 2.33s/it]
|
| 226 |
27%|βββ | 190/711 [10:32<20:44, 2.39s/it]
|
| 227 |
|
| 228 |
27%|βββ | 190/711 [10:32<20:44, 2.39s/it]
|
| 229 |
27%|βββ | 191/711 [10:34<20:38, 2.38s/it]
|
| 230 |
27%|βββ | 192/711 [10:37<20:32, 2.37s/it]
|
| 231 |
27%|βββ | 193/711 [10:39<20:19, 2.35s/it]
|
| 232 |
27%|βββ | 194/7
|
|
|
|
|
|
|
| 233 |
27%|βββ | 195/711 [10:43<20:06, 2.34s/it]
|
| 234 |
28%|βββ | 196/711 [10:46<19:58, 2.33s/it]
|
| 235 |
28%|βββ | 197/711 [10:48<20:00, 2.34s/it]
|
| 236 |
28%|βββ | 198/711 [10:51<20:08, 2.36s/it]
|
| 237 |
28%|βββ | 199/711 [10:53<19:59, 2.34s/it]
|
| 238 |
28%|βββ | 200/711 [10:55<19:52, 2.33s/it]
|
| 239 |
|
| 240 |
28%|βββ | 200/711 [10:55<19:52, 2.33s/it]
|
| 241 |
28%|βββ | 201/711 [10:57<19:47, 2.33s/it]
|
| 242 |
28%|βββ | 202/711 [11:00<19:40, 2.32s/it]
|
| 243 |
29%|βββ | 203/711 [11:02<19:37, 2.32s/it]
|
| 244 |
29%|βββ | 204/711 [11:04<19:45, 2.34s/it]
|
| 245 |
29%|βββ | 205/711 [11:07<19:39, 2.33s/it]
|
| 246 |
29%|βββ | 206/711 [11:09<19:37, 2.33s/it]
|
| 247 |
29%|βββ | 207/711 [11:12<20:12, 2.41s/it]
|
| 248 |
29%|βββ | 208/711 [11:14<20:01, 2.39s/it]
|
| 249 |
29%|βββ | 209/711 [11:16<19:46, 2.36s/it]
|
| 250 |
30%|βββ | 210/711 [11:19<19:37, 2.3
|
|
|
|
|
|
|
|
|
|
| 251 |
|
| 252 |
30%|βββ | 210/711 [11:19<19:37, 2.35s/it]
|
| 253 |
30%|βββ | 211/711 [11:21<19:30, 2.34s/it]
|
| 254 |
30%|βββ | 212/711 [11:23<19:23, 2.33s/it]
|
| 255 |
30%|βββ | 213/711 [11:26<19:16, 2.32s/it]
|
| 256 |
30%|βββ | 214/711 [11:28<19:12, 2.32s/it]
|
| 257 |
30%|βββ | 215/711 [11:30<19:09, 2.32s/it]
|
| 258 |
30%|βββ | 216/711 [11:33<19:06, 2.32s/it]
|
| 259 |
31%|βββ | 217/711 [11:35<19:03, 2.31s/it]
|
| 260 |
31%|βββ | 218/711 [11:37<19:03, 2.32s/it]
|
| 261 |
31%|βββ | 219/711 [11:39<19:01, 2.32s/it]
|
| 262 |
31%|βββ | 220/711 [11:42<18:59, 2.32s/it]
|
| 263 |
|
| 264 |
31%|βββ | 220/711 [11:42<18:59, 2.32s/it]
|
| 265 |
31%|βββ | 221/711 [11:44<18:58, 2.32s/it]
|
| 266 |
31%|βββ | 222/711 [11:47<19:09, 2.35s/it]
|
| 267 |
31%|ββββ | 223/711 [11:49<19:11, 2.36s/it]
|
| 268 |
32%|ββββ | 224/711 [11:51<19:07, 2.36s/it]
|
| 269 |
32%|βββοΏ½
|
|
|
|
|
|
|
| 270 |
32%|ββββ | 226/711 [11:56<19:01, 2.35s/it]
|
| 271 |
32%|ββββ | 227/711 [11:58<18:52, 2.34s/it]
|
| 272 |
32%|ββββ | 228/711 [12:01<18:45, 2.33s/it]
|
| 273 |
32%|ββββ | 229/711 [12:03<18:40, 2.33s/it]
|
| 274 |
32%|ββββ | 230/711 [12:05<18:35, 2.32s/it]
|
| 275 |
|
| 276 |
32%|ββββ | 230/711 [12:05<18:35, 2.32s/it]
|
| 277 |
32%|ββββ | 231/711 [12:08<18:32, 2.32s/it]
|
| 278 |
33%|ββββ | 232/711 [12:10<18:30, 2.32s/it]
|
| 279 |
33%|ββββ | 233/711 [12:12<18:28, 2.32s/it]
|
| 280 |
33%|ββββ | 234/711 [12:14<18:24, 2.32s/it]
|
| 281 |
33%|ββββ | 235/711 [12:17<18:21, 2.31s/it]
|
| 282 |
33%|ββββ | 236/711 [12:19<18:19, 2.31s/it]
|
| 283 |
33%|ββββ | 237/711 [12:21<18:15, 2.31s/it]
|
| 284 |
33%|ββββ | 238/711 [12:24<18:13, 2.31s/it]
|
| 285 |
34%|ββββ | 239/711 [12:26<18:18, 2.33s/it]
|
| 286 |
34%|ββββ | 240/711 [12:28<18:20, 2.34s/it]
|
| 287 |
|
|
|
|
|
|
|
|
|
|
| 288 |
34%|ββββ | 240/711 [12:28<18:20, 2.34s/it]
|
| 289 |
34%|ββββ | 241/711 [12:31<18:15, 2.33s/it]
|
| 290 |
34%|ββββ | 242/711 [12:33<18:09, 2.32s/it]
|
| 291 |
34%|ββββ | 243/711 [12:35<18:11, 2.33s/it]
|
| 292 |
34%|ββββ | 244/711 [12:38<18:05, 2.32s/it]
|
| 293 |
34%|ββββ | 245/711 [12:40<18:00, 2.32s/it]
|
| 294 |
35%|ββββ | 246/711 [12:42<17:56, 2.32s/it]
|
| 295 |
35%|ββββ | 247/711 [12:45<17:53, 2.31s/it]
|
| 296 |
35%|ββββ | 248/711 [12:47<17:51, 2.31s/it]
|
| 297 |
35%|ββββ | 249/711 [12:49<17:49, 2.31s/it]
|
| 298 |
35%|ββββ | 250/711 [12:52<17:46, 2.31s/it]
|
| 299 |
|
| 300 |
35%|ββββ | 250/711 [12:52<17:46, 2.31s/it]
|
| 301 |
35%|ββββ | 251/711 [12:54<17:44, 2.31s/it]
|
| 302 |
35%|ββββ | 252/711 [12:56<17:42, 2.32s/it]
|
| 303 |
36%|ββββ | 253/711 [12:59<17:41, 2.32s/it]
|
| 304 |
36%|ββββ | 254/711 [13:01<17:38, 2.32s/
|
|
|
|
|
|
|
| 305 |
36%|ββββ | 255/711 [13:03<17:45, 2.34s/it]
|
| 306 |
36%|ββββ | 256/711 [13:06<17:52, 2.36s/it]
|
| 307 |
36%|ββββ | 257/711 [13:08<17:45, 2.35s/it]
|
| 308 |
36%|ββββ | 258/711 [13:10<17:40, 2.34s/it]
|
| 309 |
36%|ββββ | 259/711 [13:13<17:34, 2.33s/it]
|
| 310 |
37%|ββββ | 260/711 [13:15<17:29, 2.33s/it]
|
| 311 |
|
| 312 |
37%|ββββ | 260/711 [13:15<17:29, 2.33s/it]
|
| 313 |
37%|ββββ | 261/711 [13:17<17:25, 2.32s/it]
|
| 314 |
37%|ββββ | 262/711 [13:20<17:21, 2.32s/it]
|
| 315 |
37%|ββββ | 263/711 [13:22<17:17, 2.32s/it]
|
| 316 |
37%|ββββ | 264/711 [13:24<17:15, 2.32s/it]
|
| 317 |
37%|ββββ | 265/711 [13:26<17:13, 2.32s/it]
|
| 318 |
37%|ββββ | 266/711 [13:29<17:10, 2.32s/it]
|
| 319 |
38%|ββββ | 267/711 [13:31<17:07, 2.31s/it]
|
| 320 |
38%|ββββ | 268/711 [13:34<17:16, 2.34s/it]
|
| 321 |
38%|ββββ | 269/711 [13:36<17:13, 2.34s/it]
|
| 322 |
38%|ββββ | 270/711 [13:38<1
|
|
|
|
|
|
|
|
|
|
| 323 |
|
| 324 |
38%|ββββ | 270/711 [13:38<17:08, 2.33s/it]
|
| 325 |
38%|ββββ | 271/711 [13:41<17:12, 2.35s/it]
|
| 326 |
38%|ββββ | 272/711 [13:43<17:13, 2.35s/it]
|
| 327 |
38%|ββββ | 273/711 [13:45<17:07, 2.35s/it]
|
| 328 |
39%|ββββ | 274/711 [13:48<17:10, 2.36s/it]
|
| 329 |
39%|ββββ | 275/711 [13:50<17:06, 2.35s/it]
|
| 330 |
39%|ββββ | 276/711 [13:52<16:58, 2.34s/it]
|
| 331 |
39%|ββββ | 277/711 [13:55<16:53, 2.34s/it]
|
| 332 |
39%|ββββ | 278/711 [13:57<16:49, 2.33s/it]
|
| 333 |
39%|ββββ | 279/711 [13:59<16:44, 2.33s/it]
|
| 334 |
39%|ββββ | 280/711 [14:02<16:40, 2.32s/it]
|
| 335 |
|
| 336 |
39%|ββββ | 280/711 [14:02<16:40, 2.32s/it]
|
| 337 |
40%|ββββ | 281/711 [14:04<16:36, 2.32s/it]
|
| 338 |
40%|ββββ | 282/711 [14:06<16:34, 2.32s/it]
|
| 339 |
40%|ββββ | 283/711 [14:09<16:34, 2.32s/it]
|
| 340 |
40%|ββββ | 284/711 [1
|
|
|
|
|
|
|
| 341 |
40%|ββββ | 285/711 [14:13<16:36, 2.34s/it]
|
| 342 |
40%|ββββ | 286/711 [14:16<16:31, 2.33s/it]
|
| 343 |
40%|ββββ | 287/711 [14:18<16:43, 2.37s/it]
|
| 344 |
41%|ββββ | 288/711 [14:20<16:41, 2.37s/it]
|
| 345 |
41%|ββββ | 289/711 [14:23<16:31, 2.35s/it]
|
| 346 |
41%|ββββ | 290/711 [14:25<16:26, 2.34s/it]
|
| 347 |
|
| 348 |
41%|ββββ | 290/711 [14:25<16:26, 2.34s/it]
|
| 349 |
41%|ββββ | 291/711 [14:27<16:20, 2.33s/it]
|
| 350 |
41%|ββββ | 292/711 [14:30<16:25, 2.35s/it]
|
| 351 |
41%|ββββ | 293/711 [14:32<16:24, 2.35s/it]
|
| 352 |
41%|βββββ | 294/711 [14:34<16:18, 2.35s/it]
|
| 353 |
41%|βββββ | 295/711 [14:37<16:13, 2.34s/it]
|
| 354 |
42%|βββββ | 296/711 [14:39<16:08, 2.33s/it]
|
| 355 |
42%|βββββ | 297/711 [14:41<16:13, 2.35s/it]
|
| 356 |
42%|βββββ | 298/711 [14:44<16:07, 2.34s/it]
|
| 357 |
42%|βββββ | 299/711 [14:46<16:03, 2.34s/it]
|
| 358 |
42%|βοΏ½
|
|
|
|
|
|
|
|
|
|
| 359 |
|
| 360 |
42%|βββββ | 300/711 [14:48<15:59, 2.33s/it]
|
| 361 |
42%|βββββ | 301/711 [14:51<15:55, 2.33s/it]
|
| 362 |
42%|βββββ | 302/711 [14:53<15:50, 2.32s/it]
|
| 363 |
43%|βββββ | 303/711 [14:55<15:54, 2.34s/it]
|
| 364 |
43%|βββββ | 304/711 [14:58<15:55, 2.35s/it]
|
| 365 |
43%|βββββ | 305/711 [15:00<15:51, 2.34s/it]
|
| 366 |
43%|βββββ | 306/711 [15:02<15:46, 2.34s/it]
|
| 367 |
43%|βββββ | 307/711 [15:05<15:42, 2.33s/it]
|
| 368 |
43%|βββββ | 308/711 [15:07<15:37, 2.33s/it]
|
| 369 |
43%|βββββ | 309/711 [15:10<15:47, 2.36s/it]
|
| 370 |
44%|βββββ | 310/711 [15:12<15:52, 2.37s/it]
|
| 371 |
|
| 372 |
44%|βββββ | 310/711 [15:12<15:52, 2.37s/it]
|
| 373 |
44%|βββββ | 311/711 [15:14<15:42, 2.36s/it]
|
| 374 |
44%|βββββ | 312/711 [15:17<15:49, 2.38s/it]
|
| 375 |
44%|βββββ | 313/7
|
|
|
|
|
|
|
| 376 |
44%|βββββ | 314/711 [15:22<15:55, 2.41s/it]
|
| 377 |
44%|βββββ | 315/711 [15:24<15:44, 2.38s/it]
|
| 378 |
44%|βββββ | 316/711 [15:26<15:33, 2.36s/it]
|
| 379 |
45%|βββββ | 317/711 [15:29<15:25, 2.35s/it]
|
| 380 |
45%|βββββ | 318/711 [15:31<15:25, 2.35s/it]
|
| 381 |
45%|βββββ | 319/711 [15:33<15:25, 2.36s/it]
|
| 382 |
45%|βββββ | 320/711 [15:36<15:22, 2.36s/it]
|
| 383 |
|
| 384 |
45%|βββββ | 320/711 [15:36<15:22, 2.36s/it]
|
| 385 |
45%|βββββ | 321/711 [15:38<15:45, 2.43s/it]
|
| 386 |
45%|βββββ | 322/711 [15:41<15:30, 2.39s/it]
|
| 387 |
45%|βββββ | 323/711 [15:43<16:06, 2.49s/it]
|
| 388 |
46%|βββββ | 324/711 [15:46<16:30, 2.56s/it]
|
| 389 |
46%|βββββ | 325/711 [15:48<15:59, 2.49s/it]
|
| 390 |
46%|βββββ | 326/711 [15:51<16:07, 2.51s/it]
|
| 391 |
46%|βββββ | 327/711 [15:53<15:44, 2.46s/it]
|
| 392 |
46%|βββββ | 328/711 [15:55<15:
|
|
|
|
|
|
|
|
|
|
| 393 |
46%|βββββ | 329/711 [15:58<15:13, 2.39s/it]
|
| 394 |
46%|βββββ | 330/711 [16:00<15:07, 2.38s/it]
|
| 395 |
|
| 396 |
46%|βββββ | 330/711 [16:00<15:07, 2.38s/it]
|
| 397 |
47%|βββββ | 331/711 [16:03<14:57, 2.36s/it]
|
| 398 |
47%|βββββ | 332/711 [16:05<14:49, 2.35s/it]
|
| 399 |
47%|βββββ | 333/711 [16:07<15:21, 2.44s/it]
|
| 400 |
47%|βββββ | 334/711 [16:10<15:12, 2.42s/it]
|
| 401 |
47%|βββββ | 335/711 [16:12<15:12, 2.43s/it]
|
| 402 |
47%|βββββ | 336/711 [16:15<14:57, 2.39s/it]
|
| 403 |
47%|βββββ | 337/711 [16:17<14:46, 2.37s/it]
|
| 404 |
48%|βββββ | 338/711 [16:19<14:39, 2.36s/it]
|
| 405 |
48%|βββββ | 339/711 [16:22<14:32, 2.35s/it]
|
| 406 |
48%|βββββ | 340/711 [16:24<14:27, 2.34s/it]
|
| 407 |
|
| 408 |
48%|βββββ | 340/711 [16:24<14:27, 2.34s/it]
|
| 409 |
48%|βββββ | 341/711 [16:26<14:23, 2.33s/it]
|
| 410 |
48%|οΏ½
|
|
|
|
|
|
|
| 411 |
48%|βββββ | 343/711 [16:31<14:15, 2.33s/it]
|
| 412 |
48%|βββββ | 344/711 [16:33<14:14, 2.33s/it]
|
| 413 |
49%|βββββ | 345/711 [16:35<14:08, 2.32s/it]
|
| 414 |
49%|βββββ | 346/711 [16:38<14:38, 2.41s/it]
|
| 415 |
49%|βββββ | 347/711 [16:40<14:25, 2.38s/it]
|
| 416 |
49%|βββββ | 348/711 [16:43<14:14, 2.35s/it]
|
| 417 |
49%|βββββ | 349/711 [16:45<14:06, 2.34s/it]
|
| 418 |
49%|βββββ | 350/711 [16:47<14:06, 2.35s/it]
|
| 419 |
|
| 420 |
49%|βββββ | 350/711 [16:47<14:06, 2.35s/it]
|
| 421 |
49%|βββββ | 351/711 [16:50<14:07, 2.36s/it]
|
| 422 |
50%|βββββ | 352/711 [16:52<14:00, 2.34s/it]
|
| 423 |
50%|βββββ | 353/711 [16:54<13:55, 2.33s/it]
|
| 424 |
50%|βββββ | 354/711 [16:57<13:50, 2.33s/it]
|
| 425 |
50%|βββββ | 355/711 [16:59<13:47, 2.32s/it]
|
| 426 |
50%|βββββ | 356/711 [17:01<13:44, 2.32s/it]
|
| 427 |
50%|βββββ
|
|
|
|
|
|
|
|
|
|
| 428 |
50%|βββββ | 358/711 [17:06<13:38, 2.32s/it]
|
| 429 |
50%|βββββ | 359/711 [17:08<13:55, 2.37s/it]
|
| 430 |
51%|βββββ | 360/711 [17:11<13:46, 2.35s/it]
|
| 431 |
|
| 432 |
51%|βββββ | 360/711 [17:11<13:46, 2.35s/it]
|
| 433 |
51%|βββββ | 361/711 [17:13<13:40, 2.34s/it]
|
| 434 |
51%|βββββ | 362/711 [17:15<13:35, 2.34s/it]
|
| 435 |
51%|βββββ | 363/711 [17:18<13:30, 2.33s/it]
|
| 436 |
51%|βββββ | 364/711 [17:20<13:26, 2.32s/it]
|
| 437 |
51%|ββββββ | 365/711 [17:22<13:27, 2.33s/it]
|
| 438 |
51%|ββββββ | 366/711 [17:25<13:28, 2.34s/it]
|
| 439 |
52%|ββββββ | 367/711 [17:27<13:29, 2.35s/it]
|
| 440 |
52%|ββββββ | 368/711 [17:29<13:23, 2.34s/it]
|
| 441 |
52%|ββββββ | 369/711 [17:32<13:19, 2.34s/it]
|
| 442 |
52%|ββββββ | 370/711 [17:34<13:14, 2.33s/it]
|
| 443 |
|
| 444 |
52%|ββββββ | 3
|
|
|
|
|
|
|
| 445 |
52%|ββββββ | 371/711 [17:36<13:11, 2.33s/it]
|
| 446 |
52%|ββββββ | 372/711 [17:39<13:08, 2.33s/it]
|
| 447 |
52%|ββββββ | 373/711 [17:41<13:07, 2.33s/it]
|
| 448 |
53%|ββββββ | 374/711 [17:43<13:04, 2.33s/it]
|
| 449 |
53%|ββββββ | 375/711 [17:46<13:01, 2.33s/it]
|
| 450 |
53%|ββββββ | 376/711 [17:48<12:57, 2.32s/it]
|
| 451 |
53%|ββββββ | 377/711 [17:50<12:54, 2.32s/it]
|
| 452 |
53%|ββββββ | 378/711 [17:53<12:53, 2.32s/it]
|
| 453 |
53%|ββββββ | 379/711 [17:55<12:57, 2.34s/it]
|
| 454 |
53%|ββββββ | 380/711 [17:57<12:52, 2.33s/it]
|
| 455 |
|
| 456 |
53%|ββββββ | 380/711 [17:57<12:52, 2.33s/it]
|
| 457 |
54%|ββββββ | 381/711 [18:00<12:57, 2.35s/it]
|
| 458 |
54%|ββββββ | 382/711 [18:02<12:56, 2.36s/it]
|
| 459 |
54%|ββββββ | 383/711 [18:05<12:55, 2.36s/it]
|
| 460 |
54%|ββββββ | 384/711 [18:07<12:48, 2.35s/it]
|
| 461 |
54%|ββ
|
|
|
|
|
|
|
| 462 |
54%|ββββββ | 386/711 [18:11<12:38, 2.33s/it]
|
| 463 |
54%|ββββββ | 387/711 [18:14<12:33, 2.33s/it]
|
| 464 |
55%|ββββββ | 388/711 [18:16<12:30, 2.32s/it]
|
| 465 |
55%|ββββββ | 389/711 [18:18<12:28, 2.32s/it]
|
| 466 |
55%|ββββββ | 390/711 [18:21<12:28, 2.33s/it]
|
| 467 |
|
| 468 |
55%|ββββββ | 390/711 [18:21<12:28, 2.33s/it]
|
| 469 |
55%|ββββββ | 391/711 [18:23<12:24, 2.33s/it]
|
| 470 |
55%|ββββββ | 392/711 [18:25<12:24, 2.34s/it]
|
| 471 |
55%|ββββββ | 393/711 [18:28<12:20, 2.33s/it]
|
| 472 |
55%|ββββββ | 394/711 [18:30<12:16, 2.32s/it]
|
| 473 |
56%|ββββββ | 395/711 [18:32<12:22, 2.35s/it]
|
| 474 |
56%|ββββββ | 396/711 [18:35<12:16, 2.34s/it]
|
| 475 |
56%|ββββββ | 397/711 [18:37<12:11, 2.33s/it]
|
| 476 |
56%|ββββββ | 398/711 [18:39<12:11, 2.34s/it]
|
| 477 |
56%|ββββββ | 399/711 [18:42<12:12, 2.
|
|
|
|
|
|
|
|
|
|
| 478 |
56%|ββββββ | 400/711 [18:44<12:07, 2.34s/it]
|
| 479 |
|
| 480 |
56%|ββββββ | 400/711 [18:44<12:07, 2.34s/it]
|
| 481 |
56%|ββββββ | 401/711 [18:46<12:02, 2.33s/it]
|
| 482 |
57%|ββββββ | 402/711 [18:49<11:59, 2.33s/it]
|
| 483 |
57%|ββββββ | 403/711 [18:51<11:56, 2.32s/it]
|
| 484 |
57%|ββββββ | 404/711 [18:53<11:52, 2.32s/it]
|
| 485 |
57%|ββββββ | 405/711 [18:56<12:01, 2.36s/it]
|
| 486 |
57%|ββββββ | 406/711 [18:58<11:54, 2.34s/it]
|
| 487 |
57%|ββββββ | 407/711 [19:00<11:49, 2.33s/it]
|
| 488 |
57%|ββββββ | 408/711 [19:03<11:44, 2.33s/it]
|
| 489 |
58%|ββββββ | 409/711 [19:05<11:48, 2.35s/it]
|
| 490 |
58%|ββββββ | 410/711 [19:07<11:42, 2.33s/it]
|
| 491 |
|
| 492 |
58%|ββββββ | 410/711 [19:07<11:42, 2.33s/it]
|
| 493 |
58%|ββββββ | 411/711 [19:10<11:45, 2.35s/it]
|
| 494 |
58%|ββββββ | 412/711 [19:12<11
|
|
|
|
|
|
|
| 495 |
58%|ββββββ | 413/711 [19:14<11:34, 2.33s/it]
|
| 496 |
58%|ββββββ | 414/711 [19:17<11:35, 2.34s/it]
|
| 497 |
58%|ββββββ | 415/711 [19:19<11:35, 2.35s/it]
|
| 498 |
59%|ββββββ | 416/711 [19:22<11:37, 2.36s/it]
|
| 499 |
59%|ββββββ | 417/711 [19:24<11:30, 2.35s/it]
|
| 500 |
59%|ββββββ | 418/711 [19:26<11:26, 2.34s/it]
|
| 501 |
59%|ββββββ | 419/711 [19:29<11:22, 2.34s/it]
|
| 502 |
59%|ββββββ | 420/711 [19:31<11:18, 2.33s/it]
|
| 503 |
|
| 504 |
59%|ββββββ | 420/711 [19:31<11:18, 2.33s/it]
|
| 505 |
59%|ββββββ | 421/711 [19:33<11:15, 2.33s/it]
|
| 506 |
59%|ββββββ | 422/711 [19:36<11:11, 2.32s/it]
|
| 507 |
59%|ββββββ | 423/711 [19:38<11:07, 2.32s/it]
|
| 508 |
60%|ββββββ | 424/711 [19:40<11:06, 2.32s/it]
|
| 509 |
60%|ββββββ | 425/711 [19:42<11:02, 2.32s/it]
|
| 510 |
60%|ββββββ | 426/711 [19:45<11:00, 2.32s/it]
|
| 511 |
60%|ββββββ
|
|
|
|
|
|
|
|
|
|
| 512 |
60%|ββββββ | 428/711 [19:49<11:02, 2.34s/it]
|
| 513 |
60%|ββββββ | 429/711 [19:52<10:58, 2.34s/it]
|
| 514 |
60%|ββββββ | 430/711 [19:54<10:59, 2.35s/it]
|
| 515 |
|
| 516 |
60%|ββββββ | 430/711 [19:54<10:59, 2.35s/it]
|
| 517 |
61%|ββββββ | 431/711 [19:57<10:59, 2.35s/it]
|
| 518 |
61%|ββββββ | 432/711 [19:59<10:53, 2.34s/it]
|
| 519 |
61%|ββββββ | 433/711 [20:01<10:49, 2.34s/it]
|
| 520 |
61%|ββββββ | 434/711 [20:03<10:44, 2.33s/it]
|
| 521 |
61%|ββββββ | 435/711 [20:06<10:40, 2.32s/it]
|
| 522 |
61%|βββββββ | 436/711 [20:08<10:42, 2.33s/it]
|
| 523 |
61%|βββββββ | 437/711 [20:11<10:40, 2.34s/it]
|
| 524 |
62%|βββββββ | 438/711 [20:13<10:36, 2.33s/it]
|
| 525 |
62%|βββββββ | 439/711 [20:15<10:32, 2.33s/it]
|
| 526 |
62%|βββββββ | 440/711 [20:17<10:29, 2.32s/it]
|
| 527 |
|
| 528 |
62%|βοΏ½
|
|
|
|
|
|
|
| 529 |
62%|βββββββ | 441/711 [20:20<10:27, 2.32s/it]
|
| 530 |
62%|βββββββ | 442/711 [20:22<10:24, 2.32s/it]
|
| 531 |
62%|βββββββ | 443/711 [20:24<10:21, 2.32s/it]
|
| 532 |
62%|βββββββ | 444/711 [20:27<10:18, 2.32s/it]
|
| 533 |
63%|βββββββ | 445/711 [20:29<10:36, 2.39s/it]
|
| 534 |
63%|βββββββ | 446/711 [20:32<10:31, 2.38s/it]
|
| 535 |
63%|βββββββ | 447/711 [20:34<10:41, 2.43s/it]
|
| 536 |
63%|βββββββ | 448/711 [20:37<10:30, 2.40s/it]
|
| 537 |
63%|βββββββ | 449/711 [20:39<10:21, 2.37s/it]
|
| 538 |
63%|βββββββ | 450/711 [20:41<10:14, 2.35s/it]
|
| 539 |
|
| 540 |
63%|βββββββ | 450/711 [20:41<10:14, 2.35s/it]
|
| 541 |
63%|βββββββ | 451/711 [20:43<10:08, 2.34s/it]
|
| 542 |
64%|βββββββ | 452/711 [20:46<10:17, 2.39s/it]
|
| 543 |
64%|βββββββ | 453/711 [20:48<10:09, 2.36s/it]
|
| 544 |
64%|βββββοΏ½
|
|
|
|
|
|
|
| 545 |
64%|βββββββ | 455/711 [20:53<09:58, 2.34s/it]
|
| 546 |
64%|βββββββ | 456/711 [20:55<09:54, 2.33s/it]
|
| 547 |
64%|βββββββ | 457/711 [20:57<09:50, 2.32s/it]
|
| 548 |
64%|βββββββ | 458/711 [21:00<09:46, 2.32s/it]
|
| 549 |
65%|βββββββ | 459/711 [21:02<09:44, 2.32s/it]
|
| 550 |
65%|βββββββ | 460/711 [21:04<09:42, 2.32s/it]
|
| 551 |
|
| 552 |
65%|βββββββ | 460/711 [21:04<09:42, 2.32s/it]
|
| 553 |
65%|βββββββ | 461/711 [21:07<09:40, 2.32s/it]
|
| 554 |
65%|βββββββ | 462/711 [21:09<09:41, 2.34s/it]
|
| 555 |
65%|βββββββ | 463/711 [21:12<09:42, 2.35s/it]
|
| 556 |
65%|βββββββ | 464/711 [21:14<09:37, 2.34s/it]
|
| 557 |
65%|βββββββ | 465/711 [21:16<09:33, 2.33s/it]
|
| 558 |
66%|βββββββ | 466/711 [21:18<09:29, 2.32s/it]
|
| 559 |
66%|βββββββ | 467/711 [21:21<09:26, 2.32s/it]
|
| 560 |
66%|βββββββ | 468/
|
|
|
|
|
|
|
|
|
|
| 561 |
66%|βββββββ | 469/711 [21:25<09:26, 2.34s/it]
|
| 562 |
66%|βββββββ | 470/711 [21:28<09:22, 2.33s/it]
|
| 563 |
|
| 564 |
66%|βββββββ | 470/711 [21:28<09:22, 2.33s/it]
|
| 565 |
66%|βββββββ | 471/711 [21:30<09:19, 2.33s/it]
|
| 566 |
66%|βββββββ | 472/711 [21:32<09:16, 2.33s/it]
|
| 567 |
67%|βββββββ | 473/711 [21:35<09:13, 2.33s/it]
|
| 568 |
67%|βββββββ | 474/711 [21:37<09:11, 2.33s/it]
|
| 569 |
67%|βββββββ | 475/711 [21:39<09:08, 2.32s/it]
|
| 570 |
67%|βββββββ | 476/711 [21:42<09:10, 2.34s/it]
|
| 571 |
67%|βββββββ | 477/711 [21:44<09:06, 2.34s/it]
|
| 572 |
67%|βββββββ | 478/711 [21:46<09:07, 2.35s/it]
|
| 573 |
67%|βββββββ | 479/711 [21:49<09:06, 2.36s/it]
|
| 574 |
68%|βββββββ | 480/711 [21:51<09:02, 2.35s/it]
|
| 575 |
|
| 576 |
68%|βββββββ | 480/711 [21:51<09:02, 2.35s/i
|
|
|
|
|
|
|
| 577 |
68%|βββββββ | 481/711 [21:54<09:03, 2.36s/it]
|
| 578 |
68%|βββββββ | 482/711 [21:56<08:58, 2.35s/it]
|
| 579 |
68%|βββββββ | 483/711 [21:58<08:53, 2.34s/it]
|
| 580 |
68%|βββββββ | 484/711 [22:01<08:49, 2.33s/it]
|
| 581 |
68%|βββββββ | 485/711 [22:03<08:58, 2.38s/it]
|
| 582 |
68%|βββββββ | 486/711 [22:05<08:50, 2.36s/it]
|
| 583 |
68%|βββββββ | 487/711 [22:08<08:45, 2.34s/it]
|
| 584 |
69%|βββββββ | 488/711 [22:10<08:40, 2.33s/it]
|
| 585 |
69%|βββββββ | 489/711 [22:12<08:41, 2.35s/it]
|
| 586 |
69%|βββββββ | 490/711 [22:15<08:37, 2.34s/it]
|
| 587 |
|
| 588 |
69%|βββββββ | 490/711 [22:15<08:37, 2.34s/it]
|
| 589 |
69%|βββββββ | 491/711 [22:17<08:37, 2.35s/it]
|
| 590 |
69%|βββββββ | 492/711 [22:19<08:33, 2.34s/it]
|
| 591 |
69%|βββββββ | 493/711 [22:22<08:29, 2.34s/it]
|
| 592 |
69%|βββββββ | 494/711 [22:24<08:35, 2.37s/it]
|
| 593 |
70%|βοΏ½
|
|
|
|
|
|
|
| 594 |
70%|βββββββ | 496/711 [22:29<08:27, 2.36s/it]
|
| 595 |
70%|βββββββ | 497/711 [22:31<08:25, 2.36s/it]
|
| 596 |
70%|βββββββ | 498/711 [22:34<08:20, 2.35s/it]
|
| 597 |
70%|βββββββ | 499/711 [22:36<08:21, 2.37s/it]
|
| 598 |
70%|βββββββ | 500/711 [22:38<08:16, 2.35s/it]
|
| 599 |
|
| 600 |
70%|βββββββ | 500/711 [22:38<08:16, 2.35s/it]
|
| 601 |
70%|βββββββ | 501/711 [22:41<08:12, 2.34s/it]
|
| 602 |
71%|βββββββ | 502/711 [22:43<08:08, 2.34s/it]
|
| 603 |
71%|βββββββ | 503/711 [22:45<08:05, 2.33s/it]
|
| 604 |
71%|βββββββ | 504/711 [22:48<08:02, 2.33s/it]
|
| 605 |
71%|βββββββ | 505/711 [22:50<07:59, 2.33s/it]
|
| 606 |
71%|βββββββ | 506/711 [22:52<07:56, 2.32s/it]
|
| 607 |
71%|ββββββββ | 507/711 [22:55<07:53, 2.32s/it]
|
| 608 |
71%|ββββββββ | 508/711 [22:57<07:50, 2.32s/it]
|
| 609 |
72%|ββββοΏ½
|
|
|
|
|
|
|
|
|
|
| 610 |
72%|ββββββββ | 510/711 [23:02<07:49, 2.33s/it]
|
| 611 |
|
| 612 |
72%|ββββββββ | 510/711 [23:02<07:49, 2.33s/it]
|
| 613 |
72%|ββββββββ | 511/711 [23:04<07:52, 2.36s/it]
|
| 614 |
72%|ββββββββ | 512/711 [23:06<07:59, 2.41s/it]
|
| 615 |
72%|ββββββββ | 513/711 [23:09<08:02, 2.44s/it]
|
| 616 |
72%|ββββββββ | 514/711 [23:11<07:53, 2.40s/it]
|
| 617 |
72%|ββββββββ | 515/711 [23:14<07:45, 2.38s/it]
|
| 618 |
73%|ββββββββ | 516/711 [23:16<07:39, 2.36s/it]
|
| 619 |
73%|ββββββββ | 517/711 [23:18<07:35, 2.35s/it]
|
| 620 |
73%|ββββββββ | 518/711 [23:21<07:31, 2.34s/it]
|
| 621 |
73%|ββββββββ | 519/711 [23:23<07:27, 2.33s/it]
|
| 622 |
73%|ββββββββ | 520/711 [23:25<07:28, 2.35s/it]
|
| 623 |
|
| 624 |
73%|ββββββββ | 520/711 [23:25<07:28, 2.35s/it]
|
| 625 |
73%|ββββ
|
|
|
|
|
|
|
| 626 |
73%|ββββββββ | 522/711 [23:30<07:20, 2.33s/it]
|
| 627 |
74%|ββββββββ | 523/711 [23:32<07:18, 2.33s/it]
|
| 628 |
74%|ββββββββ | 524/711 [23:35<07:18, 2.34s/it]
|
| 629 |
74%|ββββββββ | 525/711 [23:37<07:18, 2.36s/it]
|
| 630 |
74%|ββββββββ | 526/711 [23:39<07:17, 2.37s/it]
|
| 631 |
74%|ββββββββ | 527/711 [23:42<07:13, 2.36s/it]
|
| 632 |
74%|ββββββββ | 528/711 [23:44<07:11, 2.36s/it]
|
| 633 |
74%|ββββββββ | 529/711 [23:46<07:06, 2.34s/it]
|
| 634 |
75%|ββββββββ | 530/711 [23:49<07:03, 2.34s/it]
|
| 635 |
|
| 636 |
75%|ββββββββ | 530/711 [23:49<07:03, 2.34s/it]
|
| 637 |
75%|ββββββββ | 531/711 [23:51<07:00, 2.33s/it]
|
| 638 |
75%|ββββββββ | 532/711 [23:54<07:07, 2.39s/it]
|
| 639 |
75%|ββββββββ | 533/711 [23:56<07:00, 2.36s/it]
|
| 640 |
75%|ββββββββ | 534/711 [23:58<06:55, 2.35s/it]
|
|
|
|
|
|
|
| 641 |
75%|ββββββββ | 536/711 [24:03<06:49, 2.34s/it]
|
| 642 |
76%|ββββββββ | 537/711 [24:05<06:45, 2.33s/it]
|
| 643 |
76%|ββββββββ | 538/711 [24:07<06:41, 2.32s/it]
|
| 644 |
76%|ββββββββ | 539/711 [24:10<06:38, 2.32s/it]
|
| 645 |
76%|ββββββββ | 540/711 [24:12<06:35, 2.31s/it]
|
| 646 |
|
| 647 |
76%|ββββββββ | 540/711 [24:12<06:35, 2.31s/it]
|
| 648 |
76%|ββββββββ | 541/711 [24:14<06:36, 2.33s/it]
|
| 649 |
76%|ββββββββ | 542/711 [24:17<06:35, 2.34s/it]
|
| 650 |
76%|ββββββββ | 543/711 [24:19<06:33, 2.34s/it]
|
| 651 |
77%|ββββββββ | 544/711 [24:21<06:29, 2.33s/it]
|
| 652 |
77%|ββββββββ | 545/711 [24:24<06:26, 2.33s/it]
|
| 653 |
77%|ββββββββ | 546/711 [24:26<06:23, 2.32s/it]
|
| 654 |
77%|ββββββββ | 547/711 [24:28<06:20, 2.32s/it]
|
| 655 |
77%|ββββββββ | 548/711 [24:31<0
|
|
|
|
|
|
|
|
|
|
| 656 |
77%|ββββββββ | 549/711 [24:33<06:15, 2.32s/it]
|
| 657 |
77%|ββββββββ | 550/711 [24:35<06:13, 2.32s/it]
|
| 658 |
|
| 659 |
77%|ββββββββ | 550/711 [24:35<06:13, 2.32s/it]
|
| 660 |
77%|ββββββββ | 551/711 [24:38<06:10, 2.32s/it]
|
| 661 |
78%|ββββββββ | 552/711 [24:40<06:08, 2.32s/it]
|
| 662 |
78%|ββββββββ | 553/711 [24:42<06:05, 2.32s/it]
|
| 663 |
78%|ββββββββ | 554/711 [24:45<06:03, 2.31s/it]
|
| 664 |
78%|ββββββββ | 555/711 [24:47<06:01, 2.32s/it]
|
| 665 |
78%|ββββββββ | 556/711 [24:49<05:58, 2.32s/it]
|
| 666 |
78%|ββββββββ | 557/711 [24:52<06:01, 2.35s/it]
|
| 667 |
78%|ββββββββ | 558/711 [24:54<06:03, 2.38s/it]
|
| 668 |
79%|ββββββββ | 559/711 [24:56<05:59, 2.37s/it]
|
| 669 |
79%|ββββββββ | 560/711 [24:59<05:55, 2.35s/it]
|
| 670 |
|
| 671 |
79%|ββββββββ | 560/711 [24:59
|
|
|
|
|
|
|
| 672 |
79%|ββββββββ | 561/711 [25:01<05:51, 2.34s/it]
|
| 673 |
79%|ββββββββ | 562/711 [25:03<05:47, 2.33s/it]
|
| 674 |
79%|ββββββββ | 563/711 [25:06<05:47, 2.35s/it]
|
| 675 |
79%|ββββββββ | 564/711 [25:08<05:43, 2.34s/it]
|
| 676 |
79%|ββββββββ | 565/711 [25:11<05:48, 2.39s/it]
|
| 677 |
80%|ββββββββ | 566/711 [25:13<05:43, 2.37s/it]
|
| 678 |
80%|ββββββββ | 567/711 [25:15<05:41, 2.37s/it]
|
| 679 |
80%|ββββββββ | 568/711 [25:18<05:36, 2.35s/it]
|
| 680 |
80%|ββββββββ | 569/711 [25:20<05:32, 2.34s/it]
|
| 681 |
80%|ββββββββ | 570/711 [25:22<05:28, 2.33s/it]
|
| 682 |
|
| 683 |
80%|ββββββββ | 570/711 [25:22<05:28, 2.33s/it]
|
| 684 |
80%|ββββββββ | 571/711 [25:24<05:25, 2.33s/it]
|
| 685 |
80%|ββββββββ | 572/711 [25:27<05:22, 2.32s/it]
|
| 686 |
81%|ββββββββ | 573/711 [25:29<05:22, 2.34s/it]
|
| 687 |
81%|ββββββββ
|
|
|
|
|
|
|
| 688 |
81%|ββββββββ | 575/711 [25:34<05:18, 2.34s/it]
|
| 689 |
81%|ββββββββ | 576/711 [25:36<05:15, 2.33s/it]
|
| 690 |
81%|ββββββββ | 577/711 [25:39<05:15, 2.35s/it]
|
| 691 |
81%|βββββββββ | 578/711 [25:41<05:13, 2.36s/it]
|
| 692 |
81%|βββββββββ | 579/711 [25:43<05:09, 2.35s/it]
|
| 693 |
82%|βββββββββ | 580/711 [25:46<05:13, 2.40s/it]
|
| 694 |
|
| 695 |
82%|βββββββββ | 580/711 [25:46<05:13, 2.40s/it]
|
| 696 |
82%|βββββββββ | 581/711 [25:48<05:08, 2.37s/it]
|
| 697 |
82%|βββββββββ | 582/711 [25:50<05:03, 2.36s/it]
|
| 698 |
82%|βββββββββ | 583/711 [25:53<05:00, 2.35s/it]
|
| 699 |
82%|βββββββββ | 584/711 [25:55<04:56, 2.34s/it]
|
| 700 |
82%|βββββββββ | 585/711 [25:57<04:53, 2.33s/it]
|
| 701 |
82%|βββββββββ | 586/711 [26:00<04:50, 2.32s/it]
|
| 702 |
83%|βββββββββ | 587/711 [26:02<04:50, 2
|
|
|
|
|
|
|
| 703 |
83%|βββββββββ | 588/711 [26:04<04:48, 2.35s/it]
|
| 704 |
83%|βββββββββ | 589/711 [26:07<04:47, 2.36s/it]
|
| 705 |
83%|βββββββββ | 590/711 [26:09<04:47, 2.38s/it]
|
| 706 |
|
| 707 |
83%|βββββββββ | 590/711 [26:09<04:47, 2.38s/it]
|
| 708 |
83%|βββββββββ | 591/711 [26:12<04:43, 2.36s/it]
|
| 709 |
83%|βββββββββ | 592/711 [26:14<04:39, 2.35s/it]
|
| 710 |
83%|βββββββββ | 593/711 [26:16<04:36, 2.34s/it]
|
| 711 |
84%|βββββββββ | 594/711 [26:19<04:33, 2.33s/it]
|
| 712 |
84%|βββββββββ | 595/711 [26:21<04:30, 2.33s/it]
|
| 713 |
84%|βββββββββ | 596/711 [26:23<04:27, 2.33s/it]
|
| 714 |
84%|βββββββββ | 597/711 [26:25<04:25, 2.32s/it]
|
| 715 |
84%|βββββββββ | 598/711 [26:28<04:22, 2.32s/it]
|
| 716 |
84%|βββββββββ | 599/711 [26:30<04:20, 2.32s/it]
|
| 717 |
84%|βββββββββ | 600/711 [26:32<04:17, 2.32s/it]
|
| 718 |
|
|
|
|
|
|
|
|
|
|
| 719 |
84%|βββββββββ | 600/711 [26:32<04:17, 2.32s/it]
|
| 720 |
85%|βββββββββ | 601/711 [26:35<04:15, 2.32s/it]
|
| 721 |
85%|βββββββββ | 602/711 [26:37<04:13, 2.32s/it]
|
| 722 |
85%|βββββββββ | 603/711 [26:39<04:10, 2.32s/it]
|
| 723 |
85%|βββββββββ | 604/711 [26:42<04:08, 2.32s/it]
|
| 724 |
85%|βββββββββ | 605/711 [26:44<04:08, 2.34s/it]
|
| 725 |
85%|βββββββββ | 606/711 [26:46<04:06, 2.35s/it]
|
| 726 |
85%|βββββββββ | 607/711 [26:49<04:03, 2.34s/it]
|
| 727 |
86%|βββββββββ | 608/711 [26:51<04:00, 2.33s/it]
|
| 728 |
86%|βββββββββ | 609/711 [26:53<03:57, 2.33s/it]
|
| 729 |
86%|βββββββββ | 610/711 [26:56<03:55, 2.33s/it]
|
| 730 |
|
| 731 |
86%|βββββββββ | 610/711 [26:56<03:55, 2.33s/it]
|
| 732 |
86%|βββββββββ | 611/711 [26:58<03:52, 2.32s/it]
|
| 733 |
86%|βββββββββ | 612/711 [27:00<
|
|
|
|
|
|
|
| 734 |
86%|βββββββββ | 613/711 [27:03<03:47, 2.32s/it]
|
| 735 |
86%|βββββββββ | 614/711 [27:05<03:44, 2.32s/it]
|
| 736 |
86%|βββββββββ | 615/711 [27:07<03:42, 2.32s/it]
|
| 737 |
87%|βββββββββ | 616/711 [27:10<03:40, 2.32s/it]
|
| 738 |
87%|βββββββββ | 617/711 [27:12<03:37, 2.32s/it]
|
| 739 |
87%|βββββββββ | 618/711 [27:14<03:38, 2.35s/it]
|
| 740 |
87%|βββββββββ | 619/711 [27:17<03:34, 2.34s/it]
|
| 741 |
87%|βββββββββ | 620/711 [27:19<03:32, 2.33s/it]
|
| 742 |
|
| 743 |
87%|βββββββββ | 620/711 [27:19<03:32, 2.33s/it]
|
| 744 |
87%|βββββββββ | 621/711 [27:21<03:31, 2.35s/it]
|
| 745 |
87%|βββββββββ | 622/711 [27:24<03:29, 2.36s/it]
|
| 746 |
88%|βββββββββ | 623/711 [27:26<03:28, 2.36s/it]
|
| 747 |
88%|βββββββββ | 624/711 [27:28<03:24, 2.35s/it]
|
| 748 |
88%|βββββββββ | 625/711 [27:31<03:21, 2.34s/it]
|
| 749 |
88
|
|
|
|
|
|
|
| 750 |
88%|βββββββββ | 627/711 [27:35<03:15, 2.33s/it]
|
| 751 |
88%|βββββββββ | 628/711 [27:38<03:12, 2.32s/it]
|
| 752 |
88%|βββββββββ | 629/711 [27:40<03:10, 2.32s/it]
|
| 753 |
89%|βββββββββ | 630/711 [27:42<03:10, 2.35s/it]
|
| 754 |
|
| 755 |
89%|βββββββββ | 630/711 [27:42<03:10, 2.35s/it]
|
| 756 |
89%|βββββββββ | 631/711 [27:45<03:07, 2.35s/it]
|
| 757 |
89%|βββββββββ | 632/711 [27:47<03:09, 2.39s/it]
|
| 758 |
89%|βββββββββ | 633/711 [27:50<03:04, 2.37s/it]
|
| 759 |
89%|βββββββββ | 634/711 [27:52<03:00, 2.35s/it]
|
| 760 |
89%|βββββββββ | 635/711 [27:54<02:57, 2.34s/it]
|
| 761 |
89%|βββββββββ | 636/711 [27:57<02:56, 2.35s/it]
|
| 762 |
90%|βββββββββ | 637/711 [27:59<02:55, 2.37s/it]
|
| 763 |
90%|βββββββββ | 638/711 [28:01<02:52, 2.36s/it]
|
| 764 |
90%|ββββββοΏ½
|
|
|
|
|
|
|
|
|
|
| 765 |
90%|βββββββββ | 640/711 [28:06<02:46, 2.34s/it]
|
| 766 |
|
| 767 |
90%|βββββββββ | 640/711 [28:06<02:46, 2.34s/it]
|
| 768 |
90%|βββββββββ | 641/711 [28:08<02:43, 2.33s/it]
|
| 769 |
90%|βββββββββ | 642/711 [28:11<02:40, 2.33s/it]
|
| 770 |
90%|βββββββββ | 643/711 [28:13<02:38, 2.33s/it]
|
| 771 |
91%|βββββββββ | 644/711 [28:15<02:35, 2.33s/it]
|
| 772 |
91%|βββββββββ | 645/711 [28:18<02:35, 2.35s/it]
|
| 773 |
91%|βββββββββ | 646/711 [28:20<02:32, 2.34s/it]
|
| 774 |
91%|βββββββββ | 647/711 [28:22<02:29, 2.34s/it]
|
| 775 |
91%|βββββββββ | 648/711 [28:25<02:27, 2.33s/it]
|
| 776 |
91%|ββββββββββ| 649/711 [28:27<02:24, 2.33s/it]
|
| 777 |
91%|ββββββββββ| 650/711 [28:29<02:23, 2.35s/it]
|
| 778 |
|
| 779 |
91%|ββββββββββ| 650/711 [28:29<02:23,
|
|
|
|
|
|
|
| 780 |
92%|ββββββββββ| 651/711 [28:32<02:20, 2.34s/it]
|
| 781 |
92%|ββββββββββ| 652/711 [28:34<02:17, 2.33s/it]
|
| 782 |
92%|ββββββββββ| 653/711 [28:36<02:17, 2.37s/it]
|
| 783 |
92%|ββββββββββ| 654/711 [28:39<02:14, 2.36s/it]
|
| 784 |
92%|ββββββββββ| 655/711 [28:41<02:11, 2.35s/it]
|
| 785 |
92%|ββββββββββ| 656/711 [28:44<02:10, 2.37s/it]
|
| 786 |
92%|ββββββββββ| 657/711 [28:46<02:06, 2.35s/it]
|
| 787 |
93%|ββββββββββ| 658/711 [28:48<02:04, 2.34s/it]
|
| 788 |
93%|ββββββββββ| 659/711 [28:51<02:01, 2.33s/it]
|
| 789 |
93%|ββββββββββ| 660/711 [28:53<01:58, 2.33s/it]
|
| 790 |
|
| 791 |
93%|ββββββββββ| 660/711 [28:53<01:58, 2.33s/it]
|
| 792 |
93%|ββββββββββ| 661/711 [28:55<01:57, 2.35s/it]
|
| 793 |
93%|ββββββββββ| 662/711 [28:58<01:54, 2.34s/it]
|
| 794 |
93%|ββββββββββ| 663/711 [29:00
|
|
|
|
|
|
|
| 795 |
93%|ββββββββββ| 664/711 [29:02<01:49, 2.33s/it]
|
| 796 |
94%|ββββββββββ| 665/711 [29:05<01:46, 2.32s/it]
|
| 797 |
94%|ββββββββββ| 666/711 [29:07<01:44, 2.32s/it]
|
| 798 |
94%|ββββββββββ| 667/711 [29:09<01:42, 2.32s/it]
|
| 799 |
94%|ββββββββββ| 668/711 [29:11<01:39, 2.32s/it]
|
| 800 |
94%|ββββββββββ| 669/711 [29:14<01:38, 2.34s/it]
|
| 801 |
94%|ββββββββββ| 670/711 [29:16<01:36, 2.36s/it]
|
| 802 |
|
| 803 |
94%|ββββββββββ| 670/711 [29:16<01:36, 2.36s/it]
|
| 804 |
94%|ββββββββββ| 671/711 [29:19<01:34, 2.35s/it]
|
| 805 |
95%|ββββββββββ| 672/711 [29:21<01:31, 2.34s/it]
|
| 806 |
95%|ββββββββββ| 673/711 [29:23<01:28, 2.34s/it]
|
| 807 |
95%|ββββββββββ| 674/711 [29:26<01:26, 2.33s/it]
|
| 808 |
95%|ββββββββββ| 675/711 [29:28<01:24, 2.33s/it]
|
| 809 |
95%|ββββββββββ| 676/711
|
|
|
|
|
|
|
| 810 |
95%|ββββββββββ| 677/711 [29:33<01:19, 2.33s/it]
|
| 811 |
95%|ββββββββββ| 678/711 [29:35<01:16, 2.32s/it]
|
| 812 |
95%|ββββββββββ| 679/711 [29:37<01:14, 2.32s/it]
|
| 813 |
96%|ββββββββββ| 680/711 [29:39<01:11, 2.32s/it]
|
| 814 |
|
| 815 |
96%|ββββββββββ| 680/711 [29:39<01:11, 2.32s/it]
|
| 816 |
96%|ββββββββββ| 681/711 [29:42<01:09, 2.32s/it]
|
| 817 |
96%|ββββββββββ| 682/711 [29:44<01:08, 2.36s/it]
|
| 818 |
96%|ββββββββββ| 683/711 [29:47<01:05, 2.35s/it]
|
| 819 |
96%|ββββββββββ| 684/711 [29:49<01:03, 2.34s/it]
|
| 820 |
96%|ββββββββββ| 685/711 [29:51<01:01, 2.36s/it]
|
| 821 |
96%|ββββββββββ| 686/711 [29:54<00:58, 2.36s/it]
|
| 822 |
97%|ββββββββββ| 687/711 [29:56<00:56, 2.35s/it]
|
| 823 |
97%|ββββββββββ| 688/711 [29:58<00:53, 2.34s/it]
|
| 824 |
97%|ββββββββββ|
|
|
|
|
|
|
|
|
|
|
| 825 |
97%|ββββββββββ| 690/711 [30:03<00:48, 2.32s/it]
|
| 826 |
|
| 827 |
97%|ββββββββββ| 690/711 [30:03<00:48, 2.32s/it]
|
| 828 |
97%|ββββββββββ| 691/711 [30:05<00:46, 2.34s/it]
|
| 829 |
97%|ββββββββββ| 692/711 [30:08<00:44, 2.33s/it]
|
| 830 |
97%|ββββββββββ| 693/711 [30:10<00:41, 2.33s/it]
|
| 831 |
98%|ββββββββββ| 694/711 [30:12<00:39, 2.33s/it]
|
| 832 |
98%|ββββββββββ| 695/711 [30:15<00:37, 2.32s/it]
|
| 833 |
98%|ββββββββββ| 696/711 [30:17<00:34, 2.32s/it]
|
| 834 |
98%|ββββββββββ| 697/711 [30:19<00:32, 2.32s/it]
|
| 835 |
98%|ββββββββββ| 698/711 [30:22<00:30, 2.32s/it]
|
| 836 |
98%|ββββββββββ| 699/711 [30:24<00:27, 2.32s/it]
|
| 837 |
98%|ββββββββββ| 700/711 [30:26<00:25, 2.34s/it]
|
| 838 |
|
| 839 |
98%|ββββββββββ| 700/711 [30:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 840 |
99%|ββββββββββ| 701/711 [30:29<00:23, 2.37s/it]
|
| 841 |
99%|ββββββββββ| 702/711 [30:31<00:21, 2.37s/it]
|
| 842 |
99%|ββββββββββ| 703/711 [30:33<00:18, 2.35s/it]
|
| 843 |
99%|ββββββββββ| 704/711 [30:36<00:16, 2.35s/it]
|
| 844 |
99%|ββββββββββ| 705/711 [30:38<00:14, 2.36s/it]
|
| 845 |
99%|ββββββββββ| 706/711 [30:40<00:11, 2.34s/it]
|
| 846 |
99%|ββββββββββ| 707/711 [30:43<00:09, 2.42s/it]
|
| 847 |
|
| 848 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2: W1124 00:08:21.177000 270804 torch/distributed/run.py:792]
|
| 2 |
+
2: W1124 00:08:21.177000 270804 torch/distributed/run.py:792] *****************************************
|
| 3 |
+
2: W1124 00:08:21.177000 270804 torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
|
| 4 |
+
2: W1124 00:08:21.177000 270804 torch/distributed/run.py:792] *****************************************
|
| 5 |
+
3: W1124 00:08:21.180000 1900294 torch/distributed/run.py:792]
|
| 6 |
+
3: W1124 00:08:21.180000 1900294 torch/distributed/run.py:792] *****************************************
|
| 7 |
+
3: W1124 00:08:21.180000 1900294 torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
|
| 8 |
+
3: W1124 00:08:21.180000 1900294 torch/distributed/run.py:792] *****************************************
|
| 9 |
+
0: W1124 00:08:21.180000 1912798 torch/distributed/run.py:792]
|
| 10 |
+
0: W1124 00:08:21.180000 1912798 torch/distributed/run.py:792] *****************************************
|
| 11 |
+
0: W1124 00:08:21.180000 1912798 torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
|
| 12 |
+
0: W1124 00:08:21.180000 1912798 torch/distributed/run.py:792] *****************************************
|
| 13 |
+
1: W1124 00:08:21.306000 434889 torch/distributed/run.py:792]
|
| 14 |
+
1: W1124 00:08:21.306000 434889 torch/distributed/run.py:792] *****************************************
|
| 15 |
+
1: W1124 00:08:21.306000 434889 torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
|
| 16 |
+
1: W1124 00:08:21.306000 434889 torch/distributed/run.py:792] *****************************************
|
| 17 |
+
0: [2025-11-24 00:08:46,359] [INFO] [axolotl.utils.schemas.validation.check_eval_packing:119] [PID:1912876] [RANK:0] explicitly setting `eval_sample_packing` to match `sample_packing`[39m
|
| 18 |
+
0: [2025-11-24 00:08:46,359] [INFO] [axolotl.utils.schemas.validation.hint_sample_packing_padding:218] [PID:1912876] [RANK:0] Setting `pad_to_sequence_len: true` to prevent memory leaks when sample_packing[39m
|
| 19 |
+
2: [2025-11-24 00:08:47,025] [INFO] [axolotl.utils.schemas.validation.check_eval_packing:119] [PID:270879] [RANK:0] explicitly setting `eval_sample_packing` to match `sample_packing`[39m
|
| 20 |
+
2: [2025-11-24 00:08:47,026] [INFO] [axolotl.utils.schemas.validation.hint_sample_packing_padding:218] [PID:270879] [RANK:0] Setting `pad_to_sequence_len: true` to prevent memory leaks when sample_packing[39m
|
| 21 |
+
1: [2025-11-24 00:08:47,077] [INFO] [axolotl.utils.schemas.validation.check_eval_packing:119] [PID:434964] [RANK:0] explicitly setting `eval_sample_packing` to match `sample_packing`[39m
|
| 22 |
+
1: [2025-11-24 00:08:47,078] [INFO] [axolotl.utils.schemas.validation.hint_sample_packing_padding:218] [PID:434964] [RANK:0] Setting `pad_to_sequence_len: true` to prevent memory leaks when sample_packing[39m
|
| 23 |
+
3: [2025-11-24 00:08:47,277] [INFO] [axolotl.utils.schemas.validation.check_eval_packing:119] [PID:1900370] [RANK:0] explicitly setting `eval_sample_packing` to match `sample_packing`[39m
|
| 24 |
+
3: [2025-11-24 00:08:47,277] [INFO] [axolotl.utils.schemas.validation.hint_sample_packing_padding:218] [PID:1900370] [RANK:0] Setting `pad_to_sequence_len: true` to prevent memory leaks when sample_packing[39m
|
| 25 |
+
0: [33m[2025-11-24 00:08:49,792] [WARNING] [axolotl.utils.config.normalize_config:139] [PID:1912876] [RANK:0] Invalid value for save_steps (1.6666666666666667) from saves_per_epoch and/or num_epochs. Saving at training end only.[39m
|
| 26 |
+
0: [2025-11-24 00:08:49,874] [INFO] [axolotl.cli.config.load_cfg:245] [PID:1912876] [RANK:0] config:
|
| 27 |
+
0: {
|
| 28 |
+
0: "activation_offloading": false,
|
| 29 |
+
0: "auto_resume_from_checkpoints": true,
|
| 30 |
+
0: "axolotl_config_path": "/lustre/fswork/projects/rech/dgo/udv55np/train/tmp/1763939290239020138.yaml",
|
| 31 |
+
0: "base_model": "/lustre/fswork/projects/rech/qwv/udv55np/Gemma/base/gemma-3-4b",
|
| 32 |
+
0: "base_model_config": "/lustre/fswork/projects/rech/qwv/udv55np/Gemma/base/gemma-3-4b",
|
| 33 |
+
0: "batch_size": 16,
|
| 34 |
+
0: "bf16": true,
|
| 35 |
+
0: "capabilities": {
|
| 36 |
+
0: "bf16": true,
|
| 37 |
+
0: "compute_capability": "sm_90",
|
| 38 |
+
0: "fp8": false,
|
| 39 |
+
0: "n_gpu": 16,
|
| 40 |
+
0: "n_node": 1
|
| 41 |
+
0: },
|
| 42 |
+
0: "chat_template": "gemma3",
|
| 43 |
+
0: "context_parallel_size": 1,
|
| 44 |
+
0: "dataloader_num_workers": 16,
|
| 45 |
+
0: "dataloader_pin_memory": true,
|
| 46 |
+
0: "dataloader_prefetch_factor": 256,
|
| 47 |
+
0: "dataset_prepared_path": "/lustre/fswork/projects/rech/dgo/udv55np/dataset_gemma/Nemotron-Super-49B-v1_5/split_0",
|
| 48 |
+
0: "dataset_processes": 192,
|
| 49 |
+
0: "datasets": [
|
| 50 |
+
0: {
|
| 51 |
+
0: "chat_template": "tokenizer_default",
|
| 52 |
+
0: "data_files": [
|
| 53 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0007.jsonl",
|
| 54 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0009.jsonl",
|
| 55 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0005.jsonl",
|
| 56 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0006.jsonl",
|
| 57 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0014.jsonl",
|
| 58 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0010.jsonl",
|
| 59 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0012.jsonl",
|
| 60 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0008.jsonl",
|
| 61 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0001.jsonl",
|
| 62 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0002.jsonl",
|
| 63 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0013.jsonl",
|
| 64 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0015.jsonl",
|
| 65 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0004.jsonl",
|
| 66 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0011.jsonl",
|
| 67 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0000.jsonl",
|
| 68 |
+
0: "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking/0003.jsonl"
|
| 69 |
+
0: ],
|
| 70 |
+
0: "ds_type": "json",
|
| 71 |
+
0: "field_messages": "conversations",
|
| 72 |
+
0: "message_property_mappings": {
|
| 73 |
+
0: "content": "content",
|
| 74 |
+
0: "role": "role"
|
| 75 |
+
0: },
|
| 76 |
+
0: "path": "/lustre/fswork/projects/rech/qwv/udv55np/dataset/ift/Nemotron-Super-49B-v1_5/no_thinking",
|
| 77 |
+
0: "trust_remote_code": false,
|
| 78 |
+
0: "type": "chat_template"
|
| 79 |
+
0: }
|
| 80 |
+
0: ],
|
| 81 |
+
0: "ddp": true,
|
| 82 |
+
0: "deepspeed": {
|
| 83 |
+
0: "bf16": {
|
| 84 |
+
0: "enabled": true
|
| 85 |
+
0: },
|
| 86 |
+
0: "gradient_accumulation_steps": "auto",
|
| 87 |
+
0: "gradient_clipping": "auto",
|
| 88 |
+
0: "train_batch_size": "auto",
|
| 89 |
+
0: "train_micro_batch_size_per_gpu": "auto",
|
| 90 |
+
0: "wall_clock_breakdown": false,
|
| 91 |
+
0: "zero_optimization": {
|
| 92 |
+
0: "contiguous_gradients": true,
|
| 93 |
+
0: "overlap_comm": true,
|
| 94 |
+
0: "reduce_bucket_size": "auto",
|
| 95 |
+
0: "stage": 3,
|
| 96 |
+
0: "stage3_gather_16bit_weights_on_model_save": true,
|
| 97 |
+
0: "stage3_param_persistence_threshold": "auto",
|
| 98 |
+
0: "stage3_prefetch_bucket_size": "auto",
|
| 99 |
+
0: "sub_group_size": 0
|
| 100 |
+
0: }
|
| 101 |
+
0: },
|
| 102 |
+
0: "device": "cuda:0",
|
| 103 |
+
0: "device_map": {
|
| 104 |
+
0: "": 0
|
| 105 |
+
0: },
|
| 106 |
+
0: "dion_rank_fraction": 1.0,
|
| 107 |
+
0: "dion_rank_multiple_of": 1,
|
| 108 |
+
0: "env_capabilities": {
|
| 109 |
+
0: "torch_version": "2.6.0"
|
| 110 |
+
0: },
|
| 111 |
+
0: "eot_tokens": [
|
| 112 |
+
0: "<end_of_turn>"
|
| 113 |
+
0: ],
|
| 114 |
+
0: "eval_batch_size": 1,
|
| 115 |
+
0: "eval_causal_lm_metrics": [
|
| 116 |
+
0: "sacrebleu",
|
| 117 |
+
0: "comet",
|
| 118 |
+
0: "ter",
|
| 119 |
+
0: "chrf"
|
| 120 |
+
0: ],
|
| 121 |
+
0: "eval_max_new_tokens": 128,
|
| 122 |
+
0: "eval_sample_packing": true,
|
| 123 |
+
0: "eval_table_size": 0,
|
| 124 |
+
0: "evals_per_epoch": 0,
|
| 125 |
+
0: "flash_attention": true,
|
| 126 |
+
0: "fp16": false,
|
| 127 |
+
0: "gradient_accumulation_steps": 1,
|
| 128 |
+
0: "gradient_checkpointing": true,
|
| 129 |
+
0: "gradient_checkpointing_kwargs": {
|
| 130 |
+
0: "use_reentrant": true
|
| 131 |
+
0: },
|
| 132 |
+
0: "is_multimodal": true,
|
| 133 |
+
0: "learning_rate": 5e-06,
|
| 134 |
+
0: "lisa_layers_attribute": "model.layers",
|
| 135 |
+
0: "load_best_model_at_end": false,
|
| 136 |
+
0: "load_in_4bit": false,
|
| 137 |
+
0: "load_in_8bit": false,
|
| 138 |
+
0: "local_rank": 0,
|
| 139 |
+
0: "logging_steps": 10,
|
| 140 |
+
0: "lora_dropout": 0.0,
|
| 141 |
+
0: "loraplus_lr_embedding": 1e-06,
|
| 142 |
+
0: "lr_scheduler": "warmup_stable_decay",
|
| 143 |
+
0: "lr_scheduler_kwargs": {
|
| 144 |
+
0: "min_lr_ratio": 0.1,
|
| 145 |
+
0: "num_decay_steps": 200
|
| 146 |
+
0: },
|
| 147 |
+
0: "max_prompt_len": 512,
|
| 148 |
+
0: "mean_resizing_embeddings": false,
|
| 149 |
+
0: "micro_batch_size": 1,
|
| 150 |
+
0: "model_config_type": "gemma3",
|
| 151 |
+
0: "num_epochs": 0.6,
|
| 152 |
+
0: "optimizer": "adamw_torch_fused",
|
| 153 |
+
0: "output_dir": "/lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-4b/0",
|
| 154 |
+
0: "pad_to_sequence_len": true,
|
| 155 |
+
0: "pretrain_multipack_attn": true,
|
| 156 |
+
0: "pretrain_multipack_buffer_size": 10000,
|
| 157 |
+
0: "processor_config": "/lustre/fswork/projects/rech/qwv/udv55np/Gemma/base/gemma-3-4b",
|
| 158 |
+
0: "profiler_steps_start": 0,
|
| 159 |
+
0: "qlora_sharded_model_loading": false,
|
| 160 |
+
0: "ray_num_workers": 1,
|
| 161 |
+
0: "resources_per_worker": {
|
| 162 |
+
0: "GPU": 1
|
| 163 |
+
0: },
|
| 164 |
+
0: "sample_packing": true,
|
| 165 |
+
0: "sample_packing_bin_size": 200,
|
| 166 |
+
0: "sample_packing_group_size": 100000,
|
| 167 |
+
0: "save_only_model": true,
|
| 168 |
+
0: "save_safetensors": true,
|
| 169 |
+
0: "save_total_limit": 20,
|
| 170 |
+
0: "saves_per_epoch": 1,
|
| 171 |
+
0: "sequence_len": 16384,
|
| 172 |
+
0: "shuffle_before_merging_datasets": false,
|
| 173 |
+
0: "shuffle_merged_datasets": true,
|
| 174 |
+
0: "skip_prepare_dataset": false,
|
| 175 |
+
0: "strict": false,
|
| 176 |
+
0: "tensor_parallel_size": 1,
|
| 177 |
+
0: "tf32": false,
|
| 178 |
+
0: "tiled_mlp_use_original_mlp": true,
|
| 179 |
+
0: "tokenizer_config": "/lustre/fswork/projects/rech/qwv/udv55np/Gemma/base/gemma-3-27b",
|
| 180 |
+
0: "torch_dtype": "torch.bfloat16",
|
| 181 |
+
0: "train_on_inputs": false,
|
| 182 |
+
0: "trl": {
|
| 183 |
+
0: "log_completions": false,
|
| 184 |
+
0: "mask_truncated_completions": false,
|
| 185 |
+
0: "ref_model_mixup_alpha": 0.9,
|
| 186 |
+
0: "ref_model_sync_steps": 64,
|
| 187 |
+
0: "scale_rewards": true,
|
| 188 |
+
0: "sync_ref_model": false,
|
| 189 |
+
0: "use_vllm": false,
|
| 190 |
+
0: "vllm_server_host": "0.0.0.0",
|
| 191 |
+
0: "vllm_server_port": 8000
|
| 192 |
+
0: },
|
| 193 |
+
0: "use_ray": false,
|
| 194 |
+
0: "use_tensorboard": true,
|
| 195 |
+
0: "val_set_size": 0.0,
|
| 196 |
+
0: "vllm": {
|
| 197 |
+
0: "device": "auto",
|
| 198 |
+
0: "dtype": "auto",
|
| 199 |
+
0: "gpu_memory_utilization": 0.9,
|
| 200 |
+
0: "host": "0.0.0.0",
|
| 201 |
+
0: "port": 8000
|
| 202 |
+
0: },
|
| 203 |
+
0: "warmup_steps": 100,
|
| 204 |
+
0: "weight_decay": 0.0,
|
| 205 |
+
0: "world_size": 16
|
| 206 |
+
0: }[39m
|
| 207 |
+
0: [2025-11-24 00:08:49,876] [INFO] [axolotl.cli.checks.check_user_token:35] [PID:1912876] [RANK:0] Skipping HuggingFace token verification because HF_HUB_OFFLINE is set to True. Only local files will be used.[39m
|
| 208 |
+
0: [2025-11-24 00:08:51,148] [INFO] [axolotl.utils.data.shared.load_preprocessed_dataset:472] [PID:1912876] [RANK:0] Loading prepared dataset from disk at /lustre/fswork/projects/rech/dgo/udv55np/dataset_gemma/Nemotron-Super-49B-v1_5/split_0/06698e902d3dba325ca34849b1dea5ea...[39m
|
| 209 |
+
0: [2025-11-24 00:09:24,738] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:436] [PID:1912876] [RANK:0] gather_len_batches: [18976, 18976, 18975, 18976, 18976, 18976, 18976, 18976, 18976, 18975, 18976, 18976, 18976, 18976, 18976, 18976][39m
|
| 210 |
+
0: [2025-11-24 00:09:24,803] [INFO] [axolotl.utils.trainer.calc_sample_packing_eff_est:495] [PID:1912876] [RANK:0] sample_packing_eff_est across ranks: [0.9988827705383301, 0.9989354014396667, 0.9989354014396667, 0.9988827705383301, 0.9989354014396667, 0.9989354014396667, 0.9989354014396667, 0.9989354014396667, 0.9988827705383301, 0.9988827705383301, 0.9988827705383301, 0.9989880323410034, 0.9988301396369934, 0.9989354014396667, 0.9989354014396667, 0.9989354014396667][39m
|
| 211 |
+
0: [2025-11-24 00:09:24,810] [INFO] [axolotl.utils.data.sft._prepare_standard_dataset:127] [PID:1912876] [RANK:0] Maximum number of steps set at 711[39m
|
| 212 |
+
1: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 213 |
+
1: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 214 |
+
1: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 215 |
+
1: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 216 |
+
2: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 217 |
+
3: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 218 |
+
0: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 219 |
+
0: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 220 |
+
2: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 221 |
+
0: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 222 |
+
3: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 223 |
+
2: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 224 |
+
3: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 225 |
+
3: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 226 |
+
2: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 227 |
+
0: Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
|
| 228 |
+
0: [2025-11-24 00:09:31,797] [INFO] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_evaluation_loop:110] [PID:1912876] [RANK:0] Patched Trainer.evaluation_loop with nanmean loss calculation[39m
|
| 229 |
+
0: [2025-11-24 00:09:31,798] [INFO] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_maybe_log_save_evaluate:164] [PID:1912876] [RANK:0] Patched Trainer._maybe_log_save_evaluate with nanmean loss calculation[39m
|
| 230 |
+
0:
|
| 231 |
+
0:
|
| 232 |
+
1:
|
| 233 |
+
3:
|
| 234 |
+
2:
|
| 235 |
+
3:
|
| 236 |
+
3:
|
| 237 |
+
1:
|
| 238 |
+
1:
|
| 239 |
+
0:
|
| 240 |
+
1:
|
| 241 |
+
2: β| 2/2 [00:24<00:00, 12.34s/it]
|
| 242 |
+
2:
|
| 243 |
+
2:
|
| 244 |
+
2:
|
| 245 |
+
3:
|
| 246 |
+
0:
|
| 247 |
+
0: [2025-11-24 00:09:59,406] [INFO] [axolotl.loaders.model._configure_embedding_dtypes:345] [PID:1912876] [RANK:0] Converting modules to torch.bfloat16[39m
|
| 248 |
+
0: [2025-11-24 00:10:03,242] [INFO] [axolotl.train.save_initial_configs:416] [PID:1912876] [RANK:0] Pre-saving tokenizer to /lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-4b/0...[39m
|
| 249 |
+
0: [2025-11-24 00:10:03,660] [INFO] [axolotl.train.save_initial_configs:419] [PID:1912876] [RANK:0] Pre-saving model config to /lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-4b/0...[39m
|
| 250 |
+
0: [2025-11-24 00:10:03,690] [INFO] [axolotl.train.save_initial_configs:423] [PID:1912876] [RANK:0] Pre-saving processor to /lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-4b/0...[39m
|
| 251 |
+
0: [2025-11-24 00:10:06,488] [INFO] [axolotl.train.execute_training:203] [PID:1912876] [RANK:0] Starting trainer...[39m
|
| 252 |
+
0: [2025-11-24 00:11:39,109] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:436] [PID:1912876] [RANK:0] gather_len_batches: [18976, 18976, 18976, 18976, 18976, 18976, 18976, 18976, 18976, 18976, 18976, 18976, 18976, 18976, 18976, 18976][39m
|
| 253 |
+
0: Parameter Offload - Persistent parameters statistics: param_count = 479, numel = 768880
|
| 254 |
+
0: {'loss': 0.7282, 'grad_norm': 2.3999579863224176, 'learning_rate': 9.05e-07, 'memory/max_mem_active(gib)': 57.15, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 66.2, 'epoch': 0.01}
|
| 255 |
+
0:
|
| 256 |
0%| | 0/711 [00:00<?, ?it/s]
|
| 257 |
0%| | 1/711 [03:10<37:32:11, 190.33s/it]
|
| 258 |
0%| | 2/711 [03:14<15:52:57, 80.65s/it]
|
| 259 |
0%| | 3/711 [03:16<8:49:26, 44.87s/it]
|
| 260 |
1%| | 4/711 [03:18<5:30:58, 28.09s/it]
|
| 261 |
1%| | 5/711 [03:21<3:41:20, 18.81s/it]
|
| 262 |
1%| | 6/711 [03:23<2:35:25, 13.23s/it]
|
| 263 |
1%| | 7/711 [03:25<1:53:16, 9.65s/it]
|
| 264 |
1%| | 8/711 [03:28<1:25:41, 7.31s/it]
|
| 265 |
1%|β | 9/711 [03:30<1:07:15, 5.75s/it]
|
| 266 |
1%|β | 10/711 [03:32<54:44, 4.68s/it]
|
| 267 |
|
| 268 |
1%|β | 10/711 [03:32<54:44, 4.68s/it]
|
| 269 |
2%|β | 11/711 [03:35<46:17, 3.97s/it]
|
| 270 |
2%|β | 12/711 [03:37<40:18, 3.46s/it]
|
| 271 |
2%|β | 13/711 [03:39<36:10, 3.11s/it]
|
| 272 |
2%|β | 14/711 [03:42<34:11, 2.94s/it]
|
| 273 |
2%|β | 15/711 [03:44<32:06, 2.77s/it]
|
| 274 |
2%|β | 16/711 [03:46<30:28, 2.63s/it]
|
| 275 |
2%|β | 17/711 [03:49<29:24, 2.54s/it]
|
| 276 |
3%|β
|
| 277 |
+
0: {'loss': 0.6672, 'grad_norm': 1.3408937334456381, 'learning_rate': 1.3550000000000002e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.02}
|
| 278 |
+
0: {'loss': 0.6271, 'grad_norm': 0.8591296514459729, 'learning_rate': 1.805e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.03}
|
| 279 |
+
0: | 18/711 [03:51<28:42, 2.49s/it]
|
| 280 |
3%|β | 19/711 [03:53<28:02, 2.43s/it]
|
| 281 |
3%|β | 20/711 [03:56<27:33, 2.39s/it]
|
| 282 |
|
| 283 |
3%|β | 20/711 [03:56<27:33, 2.39s/it]
|
| 284 |
3%|β | 21/711 [03:58<27:12, 2.37s/it]
|
| 285 |
3%|β | 22/711 [04:00<26:57, 2.35s/it]
|
| 286 |
3%|β | 23/711 [04:03<26:45, 2.33s/it]
|
| 287 |
3%|β | 24/711 [04:05<26:37, 2.33s/it]
|
| 288 |
4%|β | 25/711 [04:07<26:33, 2.32s/it]
|
| 289 |
4%|β | 26/711 [04:10<26:26, 2.32s/it]
|
| 290 |
4%|β | 27/711 [04:12<26:21, 2.31s/it]
|
| 291 |
4%|β | 28/711 [04:14<26:17, 2.31s/it]
|
| 292 |
4%|β | 29/711 [04:17<26:14, 2.31s/it]
|
| 293 |
4%|β | 30/711 [04:19<26:39, 2.35s/it]
|
| 294 |
|
| 295 |
4%|β | 30/711 [04:19<26:39, 2.35s/it]
|
| 296 |
4%|β | 31/711 [04:21<26:38, 2.35s/it]
|
| 297 |
5%|β | 32/711 [04:24<26:30, 2.34s/it]
|
| 298 |
5%|β | 33/711 [04:26<26:21, 2.33s/it]
|
| 299 |
5%|β | 34/
|
| 300 |
+
0: {'loss': 0.6047, 'grad_norm': 0.8292303871371115, 'learning_rate': 2.2550000000000004e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.03}
|
| 301 |
+
0: {'loss': 0.5823, 'grad_norm': 0.7246674717655568, 'learning_rate': 2.7050000000000004e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.04}
|
| 302 |
+
0: 711 [04:28<26:13, 2.32s/it]
|
| 303 |
5%|β | 35/711 [04:31<26:08, 2.32s/it]
|
| 304 |
5%|β | 36/711 [04:33<26:03, 2.32s/it]
|
| 305 |
5%|β | 37/711 [04:35<26:01, 2.32s/it]
|
| 306 |
5%|β | 38/711 [04:38<25:57, 2.31s/it]
|
| 307 |
5%|β | 39/711 [04:40<25:55, 2.31s/it]
|
| 308 |
6%|β | 40/711 [04:42<25:53, 2.31s/it]
|
| 309 |
|
| 310 |
6%|β | 40/711 [04:42<25:53, 2.31s/it]
|
| 311 |
6%|β | 41/711 [04:44<25:50, 2.31s/it]
|
| 312 |
6%|β | 42/711 [04:47<25:47, 2.31s/it]
|
| 313 |
6%|β | 43/711 [04:49<25:50, 2.32s/it]
|
| 314 |
6%|β | 44/711 [04:51<25:45, 2.32s/it]
|
| 315 |
6%|β | 45/711 [04:54<25:40, 2.31s/it]
|
| 316 |
6%|β | 46/711 [04:56<25:51, 2.33s/it]
|
| 317 |
7%|β | 47/711 [04:58<25:53, 2.34s/it]
|
| 318 |
7%|β | 48/711 [05:01<25:46, 2.33s/it]
|
| 319 |
7%|β | 49/711 [05:03<25:41, 2.33s/it]
|
| 320 |
7%|β | 50/711 [05:05<25:40, 2.33s/it]
|
| 321 |
|
| 322 |
7%|β | 50/711 [0
|
| 323 |
+
0: {'loss': 0.5601, 'grad_norm': 0.7685808720049759, 'learning_rate': 3.1550000000000003e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.05}
|
| 324 |
+
0: 5:05<25:40, 2.33s/it]
|
| 325 |
7%|β | 51/711 [05:08<25:33, 2.32s/it]
|
| 326 |
7%|β | 52/711 [05:10<25:29, 2.32s/it]
|
| 327 |
7%|β | 53/711 [05:12<25:34, 2.33s/it]
|
| 328 |
8%|β | 54/711 [05:15<25:28, 2.33s/it]
|
| 329 |
8%|β | 55/711 [05:17<25:23, 2.32s/it]
|
| 330 |
8%|β | 56/711 [05:19<25:34, 2.34s/it]
|
| 331 |
8%|β | 57/711 [05:22<25:26, 2.33s/it]
|
| 332 |
8%|β | 58/711 [05:24<25:37, 2.35s/it]
|
| 333 |
8%|β | 59/711 [05:26<25:28, 2.34s/it]
|
| 334 |
8%|β | 60/711 [05:29<25:19, 2.33s/it]
|
| 335 |
|
| 336 |
8%|β | 60/711 [05:29<25:19, 2.33s/it]
|
| 337 |
9%|β | 61/711 [05:31<25:11, 2.33s/it]
|
| 338 |
9%|β | 62/711 [05:33<25:19, 2.34s/it]
|
| 339 |
9%|β | 63/711 [05:36<25:30, 2.36s/it]
|
| 340 |
9%|β | 64/711 [05:38<25:18, 2.35s/it]
|
| 341 |
9%|β | 65/711 [05:40<25:08, 2.33s/it]
|
| 342 |
9%|β | 66/711 [05:43<25:00, 2.33s/it]
|
| 343 |
9%|β | 67/711 [05:45<24:55, 2.32s/it]
|
| 344 |
10%|β | 68/711 [05:47<
|
| 345 |
+
0: {'loss': 0.5653, 'grad_norm': 0.7575662741162992, 'learning_rate': 3.6050000000000002e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.06}
|
| 346 |
+
0: {'loss': 0.5571, 'grad_norm': 0.7808588522979137, 'learning_rate': 4.055000000000001e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.07}
|
| 347 |
+
0: 24:49, 2.32s/it]
|
| 348 |
10%|β | 69/711 [05:50<24:46, 2.31s/it]
|
| 349 |
10%|β | 70/711 [05:52<24:42, 2.31s/it]
|
| 350 |
|
| 351 |
10%|β | 70/711 [05:52<24:42, 2.31s/it]
|
| 352 |
10%|β | 71/711 [05:54<24:39, 2.31s/it]
|
| 353 |
10%|β | 72/711 [05:57<24:35, 2.31s/it]
|
| 354 |
10%|β | 73/711 [05:59<24:32, 2.31s/it]
|
| 355 |
10%|β | 74/711 [06:01<24:29, 2.31s/it]
|
| 356 |
11%|β | 75/711 [06:04<24:26, 2.31s/it]
|
| 357 |
11%|β | 76/711 [06:06<24:23, 2.30s/it]
|
| 358 |
11%|β | 77/711 [06:08<24:20, 2.30s/it]
|
| 359 |
11%|β | 78/711 [06:11<24:30, 2.32s/it]
|
| 360 |
11%|β | 79/711 [06:13<24:35, 2.33s/it]
|
| 361 |
11%|ββ | 80/711 [06:15<24:32, 2.33s/it]
|
| 362 |
|
| 363 |
11%|ββ | 80/711 [06:15<24:32, 2.33s/it]
|
| 364 |
11%|ββ | 81/711 [06:18<24:42, 2.35s/it]
|
| 365 |
12%|ββ | 82/711 [06:20<24:32, 2.34s/it]
|
| 366 |
12%|ββ | 83/711 [06:22<24:38, 2.35s/it]
|
| 367 |
12%|ββ | 84/711 [
|
| 368 |
+
0: {'loss': 0.5341, 'grad_norm': 0.8642141374162505, 'learning_rate': 4.505e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.08}
|
| 369 |
+
0: 06:25<24:27, 2.34s/it]
|
| 370 |
12%|ββ | 85/711 [06:27<24:18, 2.33s/it]
|
| 371 |
12%|ββ | 86/711 [06:29<24:24, 2.34s/it]
|
| 372 |
12%|ββ | 87/711 [06:32<24:14, 2.33s/it]
|
| 373 |
12%|ββ | 88/711 [06:34<24:07, 2.32s/it]
|
| 374 |
13%|ββ | 89/711 [06:36<24:02, 2.32s/it]
|
| 375 |
13%|ββ | 90/711 [06:39<23:59, 2.32s/it]
|
| 376 |
|
| 377 |
13%|ββ | 90/711 [06:39<23:59, 2.32s/it]
|
| 378 |
13%|ββ | 91/711 [06:41<23:57, 2.32s/it]
|
| 379 |
13%|ββ | 92/711 [06:43<23:58, 2.32s/it]
|
| 380 |
13%|ββ | 93/711 [06:46<24:09, 2.35s/it]
|
| 381 |
13%|ββ | 94/711 [06:48<24:28, 2.38s/it]
|
| 382 |
13%|ββ | 95/711 [06:50<24:37, 2.40s/it]
|
| 383 |
14%|ββ | 96/711 [06:53<24:19, 2.37s/it]
|
| 384 |
14%|ββ | 97/711 [06:55<24:06, 2.36s/it]
|
| 385 |
14%|ββ | 98/711 [06:58<24:10, 2.37s/it]
|
| 386 |
14%|ββ | 99/711 [07:00<23:57, 2.35s/it]
|
| 387 |
14%|ββ | 100/711 [07:02<23:47, 2.34s/it]
|
| 388 |
|
| 389 |
+
0: {'loss': 0.5192, 'grad_norm': 0.8305549171618009, 'learning_rate': 4.955e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.08}
|
| 390 |
+
0: {'loss': 0.5459, 'grad_norm': 0.8622685683478952, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.09}
|
| 391 |
+
0:
|
| 392 |
14%|ββ | 100/711 [07:02<23:47, 2.34s/it]
|
| 393 |
14%|ββ | 101/711 [07:04<23:40, 2.33s/it]
|
| 394 |
14%|ββ | 102/711 [07:07<23:35, 2.32s/it]
|
| 395 |
14%|ββ | 103/711 [07:09<23:30, 2.32s/it]
|
| 396 |
15%|ββ | 104/711 [07:11<23:26, 2.32s/it]
|
| 397 |
15%|ββ | 105/711 [07:14<23:23, 2.32s/it]
|
| 398 |
15%|ββ | 106/711 [07:16<23:21, 2.32s/it]
|
| 399 |
15%|ββ | 107/711 [07:18<23:18, 2.32s/it]
|
| 400 |
15%|ββ | 108/711 [07:21<23:13, 2.31s/it]
|
| 401 |
15%|ββ | 109/711 [07:23<23:10, 2.31s/it]
|
| 402 |
15%|ββ | 110/711 [07:25<23:20, 2.33s/it]
|
| 403 |
|
| 404 |
15%|ββ | 110/711 [07:25<23:20, 2.33s/it]
|
| 405 |
16%|ββ | 111/711 [07:28<23:30, 2.35s/it]
|
| 406 |
16%|ββ | 112/711 [07:30<23:23, 2.34s/it]
|
| 407 |
16%|ββ | 113/711 [07:32<23:27, 2.35s/it]
|
| 408 |
16%|ββ | 114/711 [07:35<23:16, 2.34s/it]
|
| 409 |
16%|ββ | 115/711 [07:37<23:07, 2.33s/it]
|
| 410 |
16%|ββ | 116/711 [07:39<23:01, 2.32s/it
|
| 411 |
+
0: {'loss': 0.5358, 'grad_norm': 0.8178819353819496, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.1}
|
| 412 |
+
0: {'loss': 0.5179, 'grad_norm': 1.6879902769065394, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.11}
|
| 413 |
+
0: ]
|
| 414 |
16%|ββ | 117/711 [07:42<22:57, 2.32s/it]
|
| 415 |
17%|ββ | 118/711 [07:44<22:52, 2.31s/it]
|
| 416 |
17%|ββ | 119/711 [07:46<22:48, 2.31s/it]
|
| 417 |
17%|ββ | 120/711 [07:49<22:46, 2.31s/it]
|
| 418 |
|
| 419 |
17%|ββ | 120/711 [07:49<22:46, 2.31s/it]
|
| 420 |
17%|ββ | 121/711 [07:51<22:45, 2.31s/it]
|
| 421 |
17%|ββ | 122/711 [07:53<22:45, 2.32s/it]
|
| 422 |
17%|ββ | 123/711 [07:56<22:42, 2.32s/it]
|
| 423 |
17%|ββ | 124/711 [07:58<22:42, 2.32s/it]
|
| 424 |
18%|ββ | 125/711 [08:00<22:52, 2.34s/it]
|
| 425 |
18%|ββ | 126/711 [08:03<23:14, 2.38s/it]
|
| 426 |
18%|ββ | 127/711 [08:05<23:21, 2.40s/it]
|
| 427 |
18%|ββ | 128/711 [08:07<23:04, 2.38s/it]
|
| 428 |
18%|ββ | 129/711 [08:10<22:51, 2.36s/it]
|
| 429 |
18%|ββ | 130/711 [08:12<22:42, 2.35s/it]
|
| 430 |
|
| 431 |
18%|ββ | 130/711 [08:12<22:42, 2.35s/it]
|
| 432 |
18%|ββ | 131/711 [08:14<22:36, 2.34s/it]
|
| 433 |
1
|
| 434 |
+
0: {'loss': 0.5223, 'grad_norm': 0.8227895864412552, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.12}
|
| 435 |
+
0: 9%|ββ | 132/711 [08:17<22:30, 2.33s/it]
|
| 436 |
19%|ββ | 133/711 [08:19<22:31, 2.34s/it]
|
| 437 |
19%|ββ | 134/711 [08:21<22:25, 2.33s/it]
|
| 438 |
19%|ββ | 135/711 [08:24<22:19, 2.33s/it]
|
| 439 |
19%|ββ | 136/711 [08:26<22:15, 2.32s/it]
|
| 440 |
19%|ββ | 137/711 [08:28<22:10, 2.32s/it]
|
| 441 |
19%|ββ | 138/711 [08:31<22:05, 2.31s/it]
|
| 442 |
20%|ββ | 139/711 [08:33<22:00, 2.31s/it]
|
| 443 |
20%|ββ | 140/711 [08:35<21:57, 2.31s/it]
|
| 444 |
|
| 445 |
20%|ββ | 140/711 [08:35<21:57, 2.31s/it]
|
| 446 |
20%|ββ | 141/711 [08:38<21:56, 2.31s/it]
|
| 447 |
20%|ββ | 142/711 [08:40<22:05, 2.33s/it]
|
| 448 |
20%|ββ | 143/711 [08:42<22:07, 2.34s/it]
|
| 449 |
20%|ββ | 144/711 [08:45<22:03, 2.34s/it]
|
| 450 |
20%|ββ | 145/711 [08:47<21:58, 2.33s/it]
|
| 451 |
21%|ββ | 146/711 [08:49<21:53, 2.32s/it]
|
| 452 |
21%|ββ | 147/711 [08:52<21:49, 2.32s/it]
|
| 453 |
21%|ββ | 148/711 [08:54<21:45, 2.32s/it]
|
| 454 |
21
|
| 455 |
+
0: {'loss': 0.523, 'grad_norm': 0.7452356447124456, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.13}
|
| 456 |
+
0: {'loss': 0.5237, 'grad_norm': 0.8791556578937845, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.13}
|
| 457 |
+
0: %|ββ | 149/711 [08:56<21:41, 2.32s/it]
|
| 458 |
21%|ββ | 150/711 [08:58<21:38, 2.32s/it]
|
| 459 |
|
| 460 |
21%|ββ | 150/711 [08:59<21:38, 2.32s/it]
|
| 461 |
21%|ββ | 151/711 [09:01<21:35, 2.31s/it]
|
| 462 |
21%|βββ | 152/711 [09:03<21:40, 2.33s/it]
|
| 463 |
22%|βββ | 153/711 [09:05<21:36, 2.32s/it]
|
| 464 |
22%|βββ | 154/711 [09:08<21:31, 2.32s/it]
|
| 465 |
22%|βββ | 155/711 [09:10<21:26, 2.31s/it]
|
| 466 |
22%|βββ | 156/711 [09:12<21:22, 2.31s/it]
|
| 467 |
22%|βββ | 157/711 [09:15<21:20, 2.31s/it]
|
| 468 |
22%|βββ | 158/711 [09:17<21:20, 2.31s/it]
|
| 469 |
22%|βββ | 159/711 [09:19<21:41, 2.36s/it]
|
| 470 |
23%|βββ | 160/711 [09:22<21:36, 2.35s/it]
|
| 471 |
|
| 472 |
23%|βββ | 160/711 [09:22<21:36, 2.35s/it]
|
| 473 |
23%|βββ | 161/711 [09:24<21:28, 2.34s/it]
|
| 474 |
23%|βββ | 162/711 [09:26<21:23, 2.34s/it]
|
| 475 |
23%|βββ | 163/711 [09:29
|
| 476 |
+
0: {'loss': 0.5143, 'grad_norm': 0.7496360454577663, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.14}
|
| 477 |
+
0: <21:15, 2.33s/it]
|
| 478 |
23%|βββ | 164/711 [09:31<21:12, 2.33s/it]
|
| 479 |
23%|βββ | 165/711 [09:33<21:08, 2.32s/it]
|
| 480 |
23%|βββ | 166/711 [09:36<21:17, 2.34s/it]
|
| 481 |
23%|βββ | 167/711 [09:38<21:09, 2.33s/it]
|
| 482 |
24%|βββ | 168/711 [09:40<21:04, 2.33s/it]
|
| 483 |
24%|βββ | 169/711 [09:43<20:59, 2.32s/it]
|
| 484 |
24%|βββ | 170/711 [09:45<20:54, 2.32s/it]
|
| 485 |
|
| 486 |
24%|βββ | 170/711 [09:45<20:54, 2.32s/it]
|
| 487 |
24%|βββ | 171/711 [09:47<20:52, 2.32s/it]
|
| 488 |
24%|βββ | 172/711 [09:50<20:49, 2.32s/it]
|
| 489 |
24%|βββ | 173/711 [09:52<20:46, 2.32s/it]
|
| 490 |
24%|βββ | 174/711 [09:54<20:44, 2.32s/it]
|
| 491 |
25%|βββ | 175/711 [09:57<20:48, 2.33s/it]
|
| 492 |
25%|βββ | 176/711 [09:59<20:53, 2.34s/it]
|
| 493 |
25%|βββ | 177/711 [10:01<20:46, 2.33s/it]
|
| 494 |
25%|βββ | 178/711 [10:04<20:53, 2.35s/it]
|
| 495 |
25%|βββ | 179/711 [10:06<20:45, 2.34s/it]
|
| 496 |
2
|
| 497 |
+
0: {'loss': 0.5072, 'grad_norm': 0.7656965770735714, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 67.45, 'epoch': 0.15}
|
| 498 |
+
0: {'loss': 0.5029, 'grad_norm': 0.7795187884752995, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.16}
|
| 499 |
+
0: 5%|βββ | 180/711 [10:08<20:39, 2.33s/it]
|
| 500 |
|
| 501 |
25%|βββ | 180/711 [10:08<20:39, 2.33s/it]
|
| 502 |
25%|βββ | 181/711 [10:11<20:33, 2.33s/it]
|
| 503 |
26%|βββ | 182/711 [10:13<20:27, 2.32s/it]
|
| 504 |
26%|βββ | 183/711 [10:15<20:23, 2.32s/it]
|
| 505 |
26%|βββ | 184/711 [10:18<20:19, 2.31s/it]
|
| 506 |
26%|βββ | 185/711 [10:20<20:15, 2.31s/it]
|
| 507 |
26%|βββ | 186/711 [10:22<20:13, 2.31s/it]
|
| 508 |
26%|βββ | 187/711 [10:25<20:09, 2.31s/it]
|
| 509 |
26%|βββ | 188/711 [10:27<20:20, 2.33s/it]
|
| 510 |
27%|βββ | 189/711 [10:29<20:14, 2.33s/it]
|
| 511 |
27%|βββ | 190/711 [10:32<20:44, 2.39s/it]
|
| 512 |
|
| 513 |
27%|βββ | 190/711 [10:32<20:44, 2.39s/it]
|
| 514 |
27%|βββ | 191/711 [10:34<20:38, 2.38s/it]
|
| 515 |
27%|βββ | 192/711 [10:37<20:32, 2.37s/it]
|
| 516 |
27%|βββ | 193/711 [10:39<20:19, 2.35s/it]
|
| 517 |
27%|βββ | 194/7
|
| 518 |
+
0: {'loss': 0.5088, 'grad_norm': 0.8707955733484418, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.17}
|
| 519 |
+
0: 11 [10:41<20:10, 2.34s/it]
|
| 520 |
27%|βββ | 195/711 [10:43<20:06, 2.34s/it]
|
| 521 |
28%|βββ | 196/711 [10:46<19:58, 2.33s/it]
|
| 522 |
28%|βββ | 197/711 [10:48<20:00, 2.34s/it]
|
| 523 |
28%|βββ | 198/711 [10:51<20:08, 2.36s/it]
|
| 524 |
28%|βββ | 199/711 [10:53<19:59, 2.34s/it]
|
| 525 |
28%|βββ | 200/711 [10:55<19:52, 2.33s/it]
|
| 526 |
|
| 527 |
28%|βββ | 200/711 [10:55<19:52, 2.33s/it]
|
| 528 |
28%|βββ | 201/711 [10:57<19:47, 2.33s/it]
|
| 529 |
28%|βββ | 202/711 [11:00<19:40, 2.32s/it]
|
| 530 |
29%|βββ | 203/711 [11:02<19:37, 2.32s/it]
|
| 531 |
29%|βββ | 204/711 [11:04<19:45, 2.34s/it]
|
| 532 |
29%|βββ | 205/711 [11:07<19:39, 2.33s/it]
|
| 533 |
29%|βββ | 206/711 [11:09<19:37, 2.33s/it]
|
| 534 |
29%|βββ | 207/711 [11:12<20:12, 2.41s/it]
|
| 535 |
29%|βββ | 208/711 [11:14<20:01, 2.39s/it]
|
| 536 |
29%|βββ | 209/711 [11:16<19:46, 2.36s/it]
|
| 537 |
30%|βββ | 210/711 [11:19<19:37, 2.3
|
| 538 |
+
0: {'loss': 0.5012, 'grad_norm': 0.8320228094655582, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.18}
|
| 539 |
+
0: {'loss': 0.5124, 'grad_norm': 1.083294587127778, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.19}
|
| 540 |
+
0: 5s/it]
|
| 541 |
|
| 542 |
30%|βββ | 210/711 [11:19<19:37, 2.35s/it]
|
| 543 |
30%|βββ | 211/711 [11:21<19:30, 2.34s/it]
|
| 544 |
30%|βββ | 212/711 [11:23<19:23, 2.33s/it]
|
| 545 |
30%|βββ | 213/711 [11:26<19:16, 2.32s/it]
|
| 546 |
30%|βββ | 214/711 [11:28<19:12, 2.32s/it]
|
| 547 |
30%|βββ | 215/711 [11:30<19:09, 2.32s/it]
|
| 548 |
30%|βββ | 216/711 [11:33<19:06, 2.32s/it]
|
| 549 |
31%|βββ | 217/711 [11:35<19:03, 2.31s/it]
|
| 550 |
31%|βββ | 218/711 [11:37<19:03, 2.32s/it]
|
| 551 |
31%|βββ | 219/711 [11:39<19:01, 2.32s/it]
|
| 552 |
31%|βββ | 220/711 [11:42<18:59, 2.32s/it]
|
| 553 |
|
| 554 |
31%|βββ | 220/711 [11:42<18:59, 2.32s/it]
|
| 555 |
31%|βββ | 221/711 [11:44<18:58, 2.32s/it]
|
| 556 |
31%|βββ | 222/711 [11:47<19:09, 2.35s/it]
|
| 557 |
31%|ββββ | 223/711 [11:49<19:11, 2.36s/it]
|
| 558 |
32%|ββββ | 224/711 [11:51<19:07, 2.36s/it]
|
| 559 |
32%|βββοΏ½
|
| 560 |
+
0: {'loss': 0.4994, 'grad_norm': 0.7355209967169852, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.19}
|
| 561 |
+
0: οΏ½οΏ½ | 225/711 [11:54<18:57, 2.34s/it]
|
| 562 |
32%|ββββ | 226/711 [11:56<19:01, 2.35s/it]
|
| 563 |
32%|ββββ | 227/711 [11:58<18:52, 2.34s/it]
|
| 564 |
32%|ββββ | 228/711 [12:01<18:45, 2.33s/it]
|
| 565 |
32%|ββββ | 229/711 [12:03<18:40, 2.33s/it]
|
| 566 |
32%|ββββ | 230/711 [12:05<18:35, 2.32s/it]
|
| 567 |
|
| 568 |
32%|ββββ | 230/711 [12:05<18:35, 2.32s/it]
|
| 569 |
32%|ββββ | 231/711 [12:08<18:32, 2.32s/it]
|
| 570 |
33%|ββββ | 232/711 [12:10<18:30, 2.32s/it]
|
| 571 |
33%|ββββ | 233/711 [12:12<18:28, 2.32s/it]
|
| 572 |
33%|ββββ | 234/711 [12:14<18:24, 2.32s/it]
|
| 573 |
33%|ββββ | 235/711 [12:17<18:21, 2.31s/it]
|
| 574 |
33%|ββββ | 236/711 [12:19<18:19, 2.31s/it]
|
| 575 |
33%|ββββ | 237/711 [12:21<18:15, 2.31s/it]
|
| 576 |
33%|ββββ | 238/711 [12:24<18:13, 2.31s/it]
|
| 577 |
34%|ββββ | 239/711 [12:26<18:18, 2.33s/it]
|
| 578 |
34%|ββββ | 240/711 [12:28<18:20, 2.34s/it]
|
| 579 |
|
| 580 |
+
0: {'loss': 0.5051, 'grad_norm': 0.7864641958494194, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.2}
|
| 581 |
+
0: {'loss': 0.4913, 'grad_norm': 0.8505484395139187, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.21}
|
| 582 |
+
0:
|
| 583 |
34%|ββββ | 240/711 [12:28<18:20, 2.34s/it]
|
| 584 |
34%|ββββ | 241/711 [12:31<18:15, 2.33s/it]
|
| 585 |
34%|ββββ | 242/711 [12:33<18:09, 2.32s/it]
|
| 586 |
34%|ββββ | 243/711 [12:35<18:11, 2.33s/it]
|
| 587 |
34%|ββββ | 244/711 [12:38<18:05, 2.32s/it]
|
| 588 |
34%|ββββ | 245/711 [12:40<18:00, 2.32s/it]
|
| 589 |
35%|ββββ | 246/711 [12:42<17:56, 2.32s/it]
|
| 590 |
35%|ββββ | 247/711 [12:45<17:53, 2.31s/it]
|
| 591 |
35%|ββββ | 248/711 [12:47<17:51, 2.31s/it]
|
| 592 |
35%|ββββ | 249/711 [12:49<17:49, 2.31s/it]
|
| 593 |
35%|ββββ | 250/711 [12:52<17:46, 2.31s/it]
|
| 594 |
|
| 595 |
35%|ββββ | 250/711 [12:52<17:46, 2.31s/it]
|
| 596 |
35%|ββββ | 251/711 [12:54<17:44, 2.31s/it]
|
| 597 |
35%|ββββ | 252/711 [12:56<17:42, 2.32s/it]
|
| 598 |
36%|ββββ | 253/711 [12:59<17:41, 2.32s/it]
|
| 599 |
36%|ββββ | 254/711 [13:01<17:38, 2.32s/
|
| 600 |
+
0: {'loss': 0.4871, 'grad_norm': 0.8233442983825041, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.22}
|
| 601 |
+
0: it]
|
| 602 |
36%|ββββ | 255/711 [13:03<17:45, 2.34s/it]
|
| 603 |
36%|ββββ | 256/711 [13:06<17:52, 2.36s/it]
|
| 604 |
36%|ββββ | 257/711 [13:08<17:45, 2.35s/it]
|
| 605 |
36%|ββββ | 258/711 [13:10<17:40, 2.34s/it]
|
| 606 |
36%|ββββ | 259/711 [13:13<17:34, 2.33s/it]
|
| 607 |
37%|ββββ | 260/711 [13:15<17:29, 2.33s/it]
|
| 608 |
|
| 609 |
37%|ββββ | 260/711 [13:15<17:29, 2.33s/it]
|
| 610 |
37%|ββββ | 261/711 [13:17<17:25, 2.32s/it]
|
| 611 |
37%|ββββ | 262/711 [13:20<17:21, 2.32s/it]
|
| 612 |
37%|ββββ | 263/711 [13:22<17:17, 2.32s/it]
|
| 613 |
37%|ββββ | 264/711 [13:24<17:15, 2.32s/it]
|
| 614 |
37%|ββββ | 265/711 [13:26<17:13, 2.32s/it]
|
| 615 |
37%|ββββ | 266/711 [13:29<17:10, 2.32s/it]
|
| 616 |
38%|ββββ | 267/711 [13:31<17:07, 2.31s/it]
|
| 617 |
38%|ββββ | 268/711 [13:34<17:16, 2.34s/it]
|
| 618 |
38%|ββββ | 269/711 [13:36<17:13, 2.34s/it]
|
| 619 |
38%|ββββ | 270/711 [13:38<1
|
| 620 |
+
0: {'loss': 0.4887, 'grad_norm': 0.7977283697648062, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.23}
|
| 621 |
+
0: {'loss': 0.501, 'grad_norm': 0.788114718310765, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.24}
|
| 622 |
+
0: 7:08, 2.33s/it]
|
| 623 |
|
| 624 |
38%|ββββ | 270/711 [13:38<17:08, 2.33s/it]
|
| 625 |
38%|ββββ | 271/711 [13:41<17:12, 2.35s/it]
|
| 626 |
38%|ββββ | 272/711 [13:43<17:13, 2.35s/it]
|
| 627 |
38%|ββββ | 273/711 [13:45<17:07, 2.35s/it]
|
| 628 |
39%|ββββ | 274/711 [13:48<17:10, 2.36s/it]
|
| 629 |
39%|ββββ | 275/711 [13:50<17:06, 2.35s/it]
|
| 630 |
39%|ββββ | 276/711 [13:52<16:58, 2.34s/it]
|
| 631 |
39%|ββββ | 277/711 [13:55<16:53, 2.34s/it]
|
| 632 |
39%|ββββ | 278/711 [13:57<16:49, 2.33s/it]
|
| 633 |
39%|ββββ | 279/711 [13:59<16:44, 2.33s/it]
|
| 634 |
39%|ββββ | 280/711 [14:02<16:40, 2.32s/it]
|
| 635 |
|
| 636 |
39%|ββββ | 280/711 [14:02<16:40, 2.32s/it]
|
| 637 |
40%|ββββ | 281/711 [14:04<16:36, 2.32s/it]
|
| 638 |
40%|ββββ | 282/711 [14:06<16:34, 2.32s/it]
|
| 639 |
40%|ββββ | 283/711 [14:09<16:34, 2.32s/it]
|
| 640 |
40%|ββββ | 284/711 [1
|
| 641 |
+
0: {'loss': 0.4765, 'grad_norm': 0.7750954499136393, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.24}
|
| 642 |
+
0: 4:11<16:41, 2.35s/it]
|
| 643 |
40%|ββββ | 285/711 [14:13<16:36, 2.34s/it]
|
| 644 |
40%|ββββ | 286/711 [14:16<16:31, 2.33s/it]
|
| 645 |
40%|ββββ | 287/711 [14:18<16:43, 2.37s/it]
|
| 646 |
41%|ββββ | 288/711 [14:20<16:41, 2.37s/it]
|
| 647 |
41%|ββββ | 289/711 [14:23<16:31, 2.35s/it]
|
| 648 |
41%|ββββ | 290/711 [14:25<16:26, 2.34s/it]
|
| 649 |
|
| 650 |
41%|ββββ | 290/711 [14:25<16:26, 2.34s/it]
|
| 651 |
41%|ββββ | 291/711 [14:27<16:20, 2.33s/it]
|
| 652 |
41%|ββββ | 292/711 [14:30<16:25, 2.35s/it]
|
| 653 |
41%|ββββ | 293/711 [14:32<16:24, 2.35s/it]
|
| 654 |
41%|βββββ | 294/711 [14:34<16:18, 2.35s/it]
|
| 655 |
41%|βββββ | 295/711 [14:37<16:13, 2.34s/it]
|
| 656 |
42%|βββββ | 296/711 [14:39<16:08, 2.33s/it]
|
| 657 |
42%|βββββ | 297/711 [14:41<16:13, 2.35s/it]
|
| 658 |
42%|βββββ | 298/711 [14:44<16:07, 2.34s/it]
|
| 659 |
42%|βββββ | 299/711 [14:46<16:03, 2.34s/it]
|
| 660 |
42%|βοΏ½
|
| 661 |
+
0: {'loss': 0.488, 'grad_norm': 0.778477888845856, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.25}
|
| 662 |
+
0: {'loss': 0.4871, 'grad_norm': 0.7785532844235397, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.26}
|
| 663 |
+
0: οΏ½βββ | 300/711 [14:48<15:59, 2.33s/it]
|
| 664 |
|
| 665 |
42%|βββββ | 300/711 [14:48<15:59, 2.33s/it]
|
| 666 |
42%|βββββ | 301/711 [14:51<15:55, 2.33s/it]
|
| 667 |
42%|βββββ | 302/711 [14:53<15:50, 2.32s/it]
|
| 668 |
43%|βββββ | 303/711 [14:55<15:54, 2.34s/it]
|
| 669 |
43%|βββββ | 304/711 [14:58<15:55, 2.35s/it]
|
| 670 |
43%|βββββ | 305/711 [15:00<15:51, 2.34s/it]
|
| 671 |
43%|βββββ | 306/711 [15:02<15:46, 2.34s/it]
|
| 672 |
43%|βββββ | 307/711 [15:05<15:42, 2.33s/it]
|
| 673 |
43%|βββββ | 308/711 [15:07<15:37, 2.33s/it]
|
| 674 |
43%|βββββ | 309/711 [15:10<15:47, 2.36s/it]
|
| 675 |
44%|βββββ | 310/711 [15:12<15:52, 2.37s/it]
|
| 676 |
|
| 677 |
44%|βββββ | 310/711 [15:12<15:52, 2.37s/it]
|
| 678 |
44%|βββββ | 311/711 [15:14<15:42, 2.36s/it]
|
| 679 |
44%|βββββ | 312/711 [15:17<15:49, 2.38s/it]
|
| 680 |
44%|βββββ | 313/7
|
| 681 |
+
0: {'loss': 0.4915, 'grad_norm': 0.8063698152361907, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.27}
|
| 682 |
+
0: 11 [15:19<16:01, 2.42s/it]
|
| 683 |
44%|βββββ | 314/711 [15:22<15:55, 2.41s/it]
|
| 684 |
44%|βββββ | 315/711 [15:24<15:44, 2.38s/it]
|
| 685 |
44%|βββββ | 316/711 [15:26<15:33, 2.36s/it]
|
| 686 |
45%|βββββ | 317/711 [15:29<15:25, 2.35s/it]
|
| 687 |
45%|βββββ | 318/711 [15:31<15:25, 2.35s/it]
|
| 688 |
45%|βββββ | 319/711 [15:33<15:25, 2.36s/it]
|
| 689 |
45%|βββββ | 320/711 [15:36<15:22, 2.36s/it]
|
| 690 |
|
| 691 |
45%|βββββ | 320/711 [15:36<15:22, 2.36s/it]
|
| 692 |
45%|βββββ | 321/711 [15:38<15:45, 2.43s/it]
|
| 693 |
45%|βββββ | 322/711 [15:41<15:30, 2.39s/it]
|
| 694 |
45%|βββββ | 323/711 [15:43<16:06, 2.49s/it]
|
| 695 |
46%|βββββ | 324/711 [15:46<16:30, 2.56s/it]
|
| 696 |
46%|βββββ | 325/711 [15:48<15:59, 2.49s/it]
|
| 697 |
46%|βββββ | 326/711 [15:51<16:07, 2.51s/it]
|
| 698 |
46%|βββββ | 327/711 [15:53<15:44, 2.46s/it]
|
| 699 |
46%|βββββ | 328/711 [15:55<15:
|
| 700 |
+
0: {'loss': 0.4894, 'grad_norm': 0.7798282062358487, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.28}
|
| 701 |
+
0: {'loss': 0.4825, 'grad_norm': 0.750224606954942, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.29}
|
| 702 |
+
0: 26, 2.42s/it]
|
| 703 |
46%|βββββ | 329/711 [15:58<15:13, 2.39s/it]
|
| 704 |
46%|βββββ | 330/711 [16:00<15:07, 2.38s/it]
|
| 705 |
|
| 706 |
46%|βββββ | 330/711 [16:00<15:07, 2.38s/it]
|
| 707 |
47%|βββββ | 331/711 [16:03<14:57, 2.36s/it]
|
| 708 |
47%|βββββ | 332/711 [16:05<14:49, 2.35s/it]
|
| 709 |
47%|βββββ | 333/711 [16:07<15:21, 2.44s/it]
|
| 710 |
47%|βββββ | 334/711 [16:10<15:12, 2.42s/it]
|
| 711 |
47%|βββββ | 335/711 [16:12<15:12, 2.43s/it]
|
| 712 |
47%|βββββ | 336/711 [16:15<14:57, 2.39s/it]
|
| 713 |
47%|βββββ | 337/711 [16:17<14:46, 2.37s/it]
|
| 714 |
48%|βββββ | 338/711 [16:19<14:39, 2.36s/it]
|
| 715 |
48%|βββββ | 339/711 [16:22<14:32, 2.35s/it]
|
| 716 |
48%|βββββ | 340/711 [16:24<14:27, 2.34s/it]
|
| 717 |
|
| 718 |
48%|βββββ | 340/711 [16:24<14:27, 2.34s/it]
|
| 719 |
48%|βββββ | 341/711 [16:26<14:23, 2.33s/it]
|
| 720 |
48%|οΏ½
|
| 721 |
+
0: {'loss': 0.4856, 'grad_norm': 0.736611045158727, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.3}
|
| 722 |
+
0: οΏ½ββββ | 342/711 [16:29<14:19, 2.33s/it]
|
| 723 |
48%|βββββ | 343/711 [16:31<14:15, 2.33s/it]
|
| 724 |
48%|βββββ | 344/711 [16:33<14:14, 2.33s/it]
|
| 725 |
49%|βββββ | 345/711 [16:35<14:08, 2.32s/it]
|
| 726 |
49%|βββββ | 346/711 [16:38<14:38, 2.41s/it]
|
| 727 |
49%|βββββ | 347/711 [16:40<14:25, 2.38s/it]
|
| 728 |
49%|βββββ | 348/711 [16:43<14:14, 2.35s/it]
|
| 729 |
49%|βββββ | 349/711 [16:45<14:06, 2.34s/it]
|
| 730 |
49%|βββββ | 350/711 [16:47<14:06, 2.35s/it]
|
| 731 |
|
| 732 |
49%|βββββ | 350/711 [16:47<14:06, 2.35s/it]
|
| 733 |
49%|βββββ | 351/711 [16:50<14:07, 2.36s/it]
|
| 734 |
50%|βββββ | 352/711 [16:52<14:00, 2.34s/it]
|
| 735 |
50%|βββββ | 353/711 [16:54<13:55, 2.33s/it]
|
| 736 |
50%|βββββ | 354/711 [16:57<13:50, 2.33s/it]
|
| 737 |
50%|βββββ | 355/711 [16:59<13:47, 2.32s/it]
|
| 738 |
50%|βββββ | 356/711 [17:01<13:44, 2.32s/it]
|
| 739 |
50%|βββββ
|
| 740 |
+
0: {'loss': 0.4927, 'grad_norm': 0.7853737850371227, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.3}
|
| 741 |
+
0: {'loss': 0.4881, 'grad_norm': 0.7490924239534897, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.31}
|
| 742 |
+
0: | 357/711 [17:04<13:41, 2.32s/it]
|
| 743 |
50%|βββββ | 358/711 [17:06<13:38, 2.32s/it]
|
| 744 |
50%|βββββ | 359/711 [17:08<13:55, 2.37s/it]
|
| 745 |
51%|βββββ | 360/711 [17:11<13:46, 2.35s/it]
|
| 746 |
|
| 747 |
51%|βββββ | 360/711 [17:11<13:46, 2.35s/it]
|
| 748 |
51%|βββββ | 361/711 [17:13<13:40, 2.34s/it]
|
| 749 |
51%|βββββ | 362/711 [17:15<13:35, 2.34s/it]
|
| 750 |
51%|βββββ | 363/711 [17:18<13:30, 2.33s/it]
|
| 751 |
51%|βββββ | 364/711 [17:20<13:26, 2.32s/it]
|
| 752 |
51%|ββββββ | 365/711 [17:22<13:27, 2.33s/it]
|
| 753 |
51%|ββββββ | 366/711 [17:25<13:28, 2.34s/it]
|
| 754 |
52%|ββββββ | 367/711 [17:27<13:29, 2.35s/it]
|
| 755 |
52%|ββββββ | 368/711 [17:29<13:23, 2.34s/it]
|
| 756 |
52%|ββββββ | 369/711 [17:32<13:19, 2.34s/it]
|
| 757 |
52%|ββββββ | 370/711 [17:34<13:14, 2.33s/it]
|
| 758 |
|
| 759 |
52%|ββββββ | 3
|
| 760 |
+
0: {'loss': 0.4889, 'grad_norm': 0.7921991687866194, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.32}
|
| 761 |
+
0: 70/711 [17:34<13:14, 2.33s/it]
|
| 762 |
52%|ββββββ | 371/711 [17:36<13:11, 2.33s/it]
|
| 763 |
52%|ββββββ | 372/711 [17:39<13:08, 2.33s/it]
|
| 764 |
52%|ββββββ | 373/711 [17:41<13:07, 2.33s/it]
|
| 765 |
53%|ββββββ | 374/711 [17:43<13:04, 2.33s/it]
|
| 766 |
53%|ββββββ | 375/711 [17:46<13:01, 2.33s/it]
|
| 767 |
53%|ββββββ | 376/711 [17:48<12:57, 2.32s/it]
|
| 768 |
53%|ββββββ | 377/711 [17:50<12:54, 2.32s/it]
|
| 769 |
53%|ββββββ | 378/711 [17:53<12:53, 2.32s/it]
|
| 770 |
53%|ββββββ | 379/711 [17:55<12:57, 2.34s/it]
|
| 771 |
53%|ββββββ | 380/711 [17:57<12:52, 2.33s/it]
|
| 772 |
|
| 773 |
53%|ββββββ | 380/711 [17:57<12:52, 2.33s/it]
|
| 774 |
54%|ββββββ | 381/711 [18:00<12:57, 2.35s/it]
|
| 775 |
54%|ββββββ | 382/711 [18:02<12:56, 2.36s/it]
|
| 776 |
54%|ββββββ | 383/711 [18:05<12:55, 2.36s/it]
|
| 777 |
54%|ββββββ | 384/711 [18:07<12:48, 2.35s/it]
|
| 778 |
54%|ββ
|
| 779 |
+
0: {'loss': 0.4822, 'grad_norm': 0.8102116642711951, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.33}
|
| 780 |
+
0: ββββ | 385/711 [18:09<12:42, 2.34s/it]
|
| 781 |
54%|ββββββ | 386/711 [18:11<12:38, 2.33s/it]
|
| 782 |
54%|ββββββ | 387/711 [18:14<12:33, 2.33s/it]
|
| 783 |
55%|ββββββ | 388/711 [18:16<12:30, 2.32s/it]
|
| 784 |
55%|ββββββ | 389/711 [18:18<12:28, 2.32s/it]
|
| 785 |
55%|ββββββ | 390/711 [18:21<12:28, 2.33s/it]
|
| 786 |
|
| 787 |
55%|ββββββ | 390/711 [18:21<12:28, 2.33s/it]
|
| 788 |
55%|ββββββ | 391/711 [18:23<12:24, 2.33s/it]
|
| 789 |
55%|ββββββ | 392/711 [18:25<12:24, 2.34s/it]
|
| 790 |
55%|ββββββ | 393/711 [18:28<12:20, 2.33s/it]
|
| 791 |
55%|ββββββ | 394/711 [18:30<12:16, 2.32s/it]
|
| 792 |
56%|ββββββ | 395/711 [18:32<12:22, 2.35s/it]
|
| 793 |
56%|ββββββ | 396/711 [18:35<12:16, 2.34s/it]
|
| 794 |
56%|ββββββ | 397/711 [18:37<12:11, 2.33s/it]
|
| 795 |
56%|ββββββ | 398/711 [18:39<12:11, 2.34s/it]
|
| 796 |
56%|ββββββ | 399/711 [18:42<12:12, 2.
|
| 797 |
+
0: {'loss': 0.4678, 'grad_norm': 0.7889843890610096, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.34}
|
| 798 |
+
0: {'loss': 0.4732, 'grad_norm': 0.7803377614587503, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.35}
|
| 799 |
+
0: 35s/it]
|
| 800 |
56%|ββββββ | 400/711 [18:44<12:07, 2.34s/it]
|
| 801 |
|
| 802 |
56%|ββββββ | 400/711 [18:44<12:07, 2.34s/it]
|
| 803 |
56%|ββββββ | 401/711 [18:46<12:02, 2.33s/it]
|
| 804 |
57%|ββββββ | 402/711 [18:49<11:59, 2.33s/it]
|
| 805 |
57%|ββββββ | 403/711 [18:51<11:56, 2.32s/it]
|
| 806 |
57%|ββββββ | 404/711 [18:53<11:52, 2.32s/it]
|
| 807 |
57%|ββββββ | 405/711 [18:56<12:01, 2.36s/it]
|
| 808 |
57%|ββββββ | 406/711 [18:58<11:54, 2.34s/it]
|
| 809 |
57%|ββββββ | 407/711 [19:00<11:49, 2.33s/it]
|
| 810 |
57%|ββββββ | 408/711 [19:03<11:44, 2.33s/it]
|
| 811 |
58%|ββββββ | 409/711 [19:05<11:48, 2.35s/it]
|
| 812 |
58%|ββββββ | 410/711 [19:07<11:42, 2.33s/it]
|
| 813 |
|
| 814 |
58%|ββββββ | 410/711 [19:07<11:42, 2.33s/it]
|
| 815 |
58%|ββββββ | 411/711 [19:10<11:45, 2.35s/it]
|
| 816 |
58%|ββββββ | 412/711 [19:12<11
|
| 817 |
+
0: {'loss': 0.4773, 'grad_norm': 0.7786794033275539, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.35}
|
| 818 |
+
0: :38, 2.34s/it]
|
| 819 |
58%|ββββββ | 413/711 [19:14<11:34, 2.33s/it]
|
| 820 |
58%|ββββββ | 414/711 [19:17<11:35, 2.34s/it]
|
| 821 |
58%|ββββββ | 415/711 [19:19<11:35, 2.35s/it]
|
| 822 |
59%|ββββββ | 416/711 [19:22<11:37, 2.36s/it]
|
| 823 |
59%|ββββββ | 417/711 [19:24<11:30, 2.35s/it]
|
| 824 |
59%|ββββββ | 418/711 [19:26<11:26, 2.34s/it]
|
| 825 |
59%|ββββββ | 419/711 [19:29<11:22, 2.34s/it]
|
| 826 |
59%|ββββββ | 420/711 [19:31<11:18, 2.33s/it]
|
| 827 |
|
| 828 |
59%|ββββββ | 420/711 [19:31<11:18, 2.33s/it]
|
| 829 |
59%|ββββββ | 421/711 [19:33<11:15, 2.33s/it]
|
| 830 |
59%|ββββββ | 422/711 [19:36<11:11, 2.32s/it]
|
| 831 |
59%|ββββββ | 423/711 [19:38<11:07, 2.32s/it]
|
| 832 |
60%|ββββββ | 424/711 [19:40<11:06, 2.32s/it]
|
| 833 |
60%|ββββββ | 425/711 [19:42<11:02, 2.32s/it]
|
| 834 |
60%|ββββββ | 426/711 [19:45<11:00, 2.32s/it]
|
| 835 |
60%|ββββββ
|
| 836 |
+
0: {'loss': 0.4759, 'grad_norm': 0.7409304393739385, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.36}
|
| 837 |
+
0: {'loss': 0.4784, 'grad_norm': 0.7489672735206069, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.37}
|
| 838 |
+
0: | 427/711 [19:47<10:58, 2.32s/it]
|
| 839 |
60%|ββββββ | 428/711 [19:49<11:02, 2.34s/it]
|
| 840 |
60%|ββββββ | 429/711 [19:52<10:58, 2.34s/it]
|
| 841 |
60%|ββββββ | 430/711 [19:54<10:59, 2.35s/it]
|
| 842 |
|
| 843 |
60%|ββββββ | 430/711 [19:54<10:59, 2.35s/it]
|
| 844 |
61%|ββββββ | 431/711 [19:57<10:59, 2.35s/it]
|
| 845 |
61%|ββββββ | 432/711 [19:59<10:53, 2.34s/it]
|
| 846 |
61%|ββββββ | 433/711 [20:01<10:49, 2.34s/it]
|
| 847 |
61%|ββββββ | 434/711 [20:03<10:44, 2.33s/it]
|
| 848 |
61%|ββββββ | 435/711 [20:06<10:40, 2.32s/it]
|
| 849 |
61%|βββββββ | 436/711 [20:08<10:42, 2.33s/it]
|
| 850 |
61%|βββββββ | 437/711 [20:11<10:40, 2.34s/it]
|
| 851 |
62%|βββββββ | 438/711 [20:13<10:36, 2.33s/it]
|
| 852 |
62%|βββββββ | 439/711 [20:15<10:32, 2.33s/it]
|
| 853 |
62%|βββββββ | 440/711 [20:17<10:29, 2.32s/it]
|
| 854 |
|
| 855 |
62%|βοΏ½
|
| 856 |
+
0: {'loss': 0.4716, 'grad_norm': 0.7729942449390255, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.38}
|
| 857 |
+
0: οΏ½οΏ½βββββ | 440/711 [20:17<10:29, 2.32s/it]
|
| 858 |
62%|βββββββ | 441/711 [20:20<10:27, 2.32s/it]
|
| 859 |
62%|βββββββ | 442/711 [20:22<10:24, 2.32s/it]
|
| 860 |
62%|βββββββ | 443/711 [20:24<10:21, 2.32s/it]
|
| 861 |
62%|βββββββ | 444/711 [20:27<10:18, 2.32s/it]
|
| 862 |
63%|βββββββ | 445/711 [20:29<10:36, 2.39s/it]
|
| 863 |
63%|βββββββ | 446/711 [20:32<10:31, 2.38s/it]
|
| 864 |
63%|βββββββ | 447/711 [20:34<10:41, 2.43s/it]
|
| 865 |
63%|βββββββ | 448/711 [20:37<10:30, 2.40s/it]
|
| 866 |
63%|βββββββ | 449/711 [20:39<10:21, 2.37s/it]
|
| 867 |
63%|βββββββ | 450/711 [20:41<10:14, 2.35s/it]
|
| 868 |
|
| 869 |
63%|βββββββ | 450/711 [20:41<10:14, 2.35s/it]
|
| 870 |
63%|βββββββ | 451/711 [20:43<10:08, 2.34s/it]
|
| 871 |
64%|βββββββ | 452/711 [20:46<10:17, 2.39s/it]
|
| 872 |
64%|βββββββ | 453/711 [20:48<10:09, 2.36s/it]
|
| 873 |
64%|βββββοΏ½
|
| 874 |
+
0: {'loss': 0.467, 'grad_norm': 0.7403462320672021, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.39}
|
| 875 |
+
0: οΏ½β | 454/711 [20:51<10:03, 2.35s/it]
|
| 876 |
64%|βββββββ | 455/711 [20:53<09:58, 2.34s/it]
|
| 877 |
64%|βββββββ | 456/711 [20:55<09:54, 2.33s/it]
|
| 878 |
64%|βββββββ | 457/711 [20:57<09:50, 2.32s/it]
|
| 879 |
64%|βββββββ | 458/711 [21:00<09:46, 2.32s/it]
|
| 880 |
65%|βββββββ | 459/711 [21:02<09:44, 2.32s/it]
|
| 881 |
65%|βββββββ | 460/711 [21:04<09:42, 2.32s/it]
|
| 882 |
|
| 883 |
65%|βββββββ | 460/711 [21:04<09:42, 2.32s/it]
|
| 884 |
65%|βββββββ | 461/711 [21:07<09:40, 2.32s/it]
|
| 885 |
65%|βββββββ | 462/711 [21:09<09:41, 2.34s/it]
|
| 886 |
65%|βββββββ | 463/711 [21:12<09:42, 2.35s/it]
|
| 887 |
65%|βββββββ | 464/711 [21:14<09:37, 2.34s/it]
|
| 888 |
65%|βββββββ | 465/711 [21:16<09:33, 2.33s/it]
|
| 889 |
66%|βββββββ | 466/711 [21:18<09:29, 2.32s/it]
|
| 890 |
66%|βββββββ | 467/711 [21:21<09:26, 2.32s/it]
|
| 891 |
66%|βββββββ | 468/
|
| 892 |
+
0: {'loss': 0.4727, 'grad_norm': 0.7765476983805598, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.4}
|
| 893 |
+
0: {'loss': 0.4761, 'grad_norm': 0.7166795921281778, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.4}
|
| 894 |
+
0: 711 [21:23<09:23, 2.32s/it]
|
| 895 |
66%|βββββββ | 469/711 [21:25<09:26, 2.34s/it]
|
| 896 |
66%|βββββββ | 470/711 [21:28<09:22, 2.33s/it]
|
| 897 |
|
| 898 |
66%|βββββββ | 470/711 [21:28<09:22, 2.33s/it]
|
| 899 |
66%|βββββββ | 471/711 [21:30<09:19, 2.33s/it]
|
| 900 |
66%|βββββββ | 472/711 [21:32<09:16, 2.33s/it]
|
| 901 |
67%|βββββββ | 473/711 [21:35<09:13, 2.33s/it]
|
| 902 |
67%|βββββββ | 474/711 [21:37<09:11, 2.33s/it]
|
| 903 |
67%|βββββββ | 475/711 [21:39<09:08, 2.32s/it]
|
| 904 |
67%|βββββββ | 476/711 [21:42<09:10, 2.34s/it]
|
| 905 |
67%|βββββββ | 477/711 [21:44<09:06, 2.34s/it]
|
| 906 |
67%|βββββββ | 478/711 [21:46<09:07, 2.35s/it]
|
| 907 |
67%|βββββββ | 479/711 [21:49<09:06, 2.36s/it]
|
| 908 |
68%|βββββββ | 480/711 [21:51<09:02, 2.35s/it]
|
| 909 |
|
| 910 |
68%|βββββββ | 480/711 [21:51<09:02, 2.35s/i
|
| 911 |
+
0: {'loss': 0.4545, 'grad_norm': 0.7592461340919713, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.41}
|
| 912 |
+
0: t]
|
| 913 |
68%|βββββββ | 481/711 [21:54<09:03, 2.36s/it]
|
| 914 |
68%|βββββββ | 482/711 [21:56<08:58, 2.35s/it]
|
| 915 |
68%|βββββββ | 483/711 [21:58<08:53, 2.34s/it]
|
| 916 |
68%|βββββββ | 484/711 [22:01<08:49, 2.33s/it]
|
| 917 |
68%|βββββββ | 485/711 [22:03<08:58, 2.38s/it]
|
| 918 |
68%|βββββββ | 486/711 [22:05<08:50, 2.36s/it]
|
| 919 |
68%|βββββββ | 487/711 [22:08<08:45, 2.34s/it]
|
| 920 |
69%|βββββββ | 488/711 [22:10<08:40, 2.33s/it]
|
| 921 |
69%|βββββββ | 489/711 [22:12<08:41, 2.35s/it]
|
| 922 |
69%|βββββββ | 490/711 [22:15<08:37, 2.34s/it]
|
| 923 |
|
| 924 |
69%|βββββββ | 490/711 [22:15<08:37, 2.34s/it]
|
| 925 |
69%|βββββββ | 491/711 [22:17<08:37, 2.35s/it]
|
| 926 |
69%|βββββββ | 492/711 [22:19<08:33, 2.34s/it]
|
| 927 |
69%|βββββββ | 493/711 [22:22<08:29, 2.34s/it]
|
| 928 |
69%|βββββββ | 494/711 [22:24<08:35, 2.37s/it]
|
| 929 |
70%|βοΏ½
|
| 930 |
+
0: {'loss': 0.4621, 'grad_norm': 0.8060919908075219, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.42}
|
| 931 |
+
0: οΏ½βββββ | 495/711 [22:27<08:33, 2.38s/it]
|
| 932 |
70%|βββββββ | 496/711 [22:29<08:27, 2.36s/it]
|
| 933 |
70%|βββββββ | 497/711 [22:31<08:25, 2.36s/it]
|
| 934 |
70%|βββββββ | 498/711 [22:34<08:20, 2.35s/it]
|
| 935 |
70%|βββββββ | 499/711 [22:36<08:21, 2.37s/it]
|
| 936 |
70%|βββββββ | 500/711 [22:38<08:16, 2.35s/it]
|
| 937 |
|
| 938 |
70%|βββββββ | 500/711 [22:38<08:16, 2.35s/it]
|
| 939 |
70%|βββββββ | 501/711 [22:41<08:12, 2.34s/it]
|
| 940 |
71%|βββββββ | 502/711 [22:43<08:08, 2.34s/it]
|
| 941 |
71%|βββββββ | 503/711 [22:45<08:05, 2.33s/it]
|
| 942 |
71%|βββββββ | 504/711 [22:48<08:02, 2.33s/it]
|
| 943 |
71%|βββββββ | 505/711 [22:50<07:59, 2.33s/it]
|
| 944 |
71%|βββββββ | 506/711 [22:52<07:56, 2.32s/it]
|
| 945 |
71%|ββββββββ | 507/711 [22:55<07:53, 2.32s/it]
|
| 946 |
71%|ββββββββ | 508/711 [22:57<07:50, 2.32s/it]
|
| 947 |
72%|ββββοΏ½
|
| 948 |
+
0: {'loss': 0.4759, 'grad_norm': 0.7434049511511707, 'learning_rate': 5e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.43}
|
| 949 |
+
0: {'loss': 0.4643, 'grad_norm': 0.8519398991308196, 'learning_rate': 4.982258077957576e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.44}
|
| 950 |
+
0: οΏ½βββ | 509/711 [22:59<07:47, 2.32s/it]
|
| 951 |
72%|ββββββββ | 510/711 [23:02<07:49, 2.33s/it]
|
| 952 |
|
| 953 |
72%|ββββββββ | 510/711 [23:02<07:49, 2.33s/it]
|
| 954 |
72%|ββββββββ | 511/711 [23:04<07:52, 2.36s/it]
|
| 955 |
72%|ββββββββ | 512/711 [23:06<07:59, 2.41s/it]
|
| 956 |
72%|ββββββββ | 513/711 [23:09<08:02, 2.44s/it]
|
| 957 |
72%|ββββββββ | 514/711 [23:11<07:53, 2.40s/it]
|
| 958 |
72%|ββββββββ | 515/711 [23:14<07:45, 2.38s/it]
|
| 959 |
73%|ββββββββ | 516/711 [23:16<07:39, 2.36s/it]
|
| 960 |
73%|ββββββββ | 517/711 [23:18<07:35, 2.35s/it]
|
| 961 |
73%|ββββββββ | 518/711 [23:21<07:31, 2.34s/it]
|
| 962 |
73%|ββββββββ | 519/711 [23:23<07:27, 2.33s/it]
|
| 963 |
73%|ββββββββ | 520/711 [23:25<07:28, 2.35s/it]
|
| 964 |
|
| 965 |
73%|ββββββββ | 520/711 [23:25<07:28, 2.35s/it]
|
| 966 |
73%|ββββ
|
| 967 |
+
0: {'loss': 0.4652, 'grad_norm': 0.7323364456399427, 'learning_rate': 4.910660792773122e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.45}
|
| 968 |
+
0: ββββ | 521/711 [23:28<07:24, 2.34s/it]
|
| 969 |
73%|ββββββββ | 522/711 [23:30<07:20, 2.33s/it]
|
| 970 |
74%|ββββββββ | 523/711 [23:32<07:18, 2.33s/it]
|
| 971 |
74%|ββββββββ | 524/711 [23:35<07:18, 2.34s/it]
|
| 972 |
74%|ββββββββ | 525/711 [23:37<07:18, 2.36s/it]
|
| 973 |
74%|ββββββββ | 526/711 [23:39<07:17, 2.37s/it]
|
| 974 |
74%|ββββββββ | 527/711 [23:42<07:13, 2.36s/it]
|
| 975 |
74%|ββββββββ | 528/711 [23:44<07:11, 2.36s/it]
|
| 976 |
74%|ββββββββ | 529/711 [23:46<07:06, 2.34s/it]
|
| 977 |
75%|ββββββββ | 530/711 [23:49<07:03, 2.34s/it]
|
| 978 |
|
| 979 |
75%|ββββββββ | 530/711 [23:49<07:03, 2.34s/it]
|
| 980 |
75%|ββββββββ | 531/711 [23:51<07:00, 2.33s/it]
|
| 981 |
75%|ββββββββ | 532/711 [23:54<07:07, 2.39s/it]
|
| 982 |
75%|ββββββββ | 533/711 [23:56<07:00, 2.36s/it]
|
| 983 |
75%|ββββββββ | 534/711 [23:58<06:55, 2.35s/it]
|
| 984 |
+
0: {'loss': 0.4762, 'grad_norm': 0.7467850320594309, 'learning_rate': 4.7858608680485444e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.46}
|
| 985 |
+
0: 75%|ββββββββ | 535/711 [24:00<06:50, 2.33s/it]
|
| 986 |
75%|ββββββββ | 536/711 [24:03<06:49, 2.34s/it]
|
| 987 |
76%|ββββββββ | 537/711 [24:05<06:45, 2.33s/it]
|
| 988 |
76%|ββββββββ | 538/711 [24:07<06:41, 2.32s/it]
|
| 989 |
76%|ββββββββ | 539/711 [24:10<06:38, 2.32s/it]
|
| 990 |
76%|ββββββββ | 540/711 [24:12<06:35, 2.31s/it]
|
| 991 |
|
| 992 |
76%|ββββββββ | 540/711 [24:12<06:35, 2.31s/it]
|
| 993 |
76%|ββββββββ | 541/711 [24:14<06:36, 2.33s/it]
|
| 994 |
76%|ββββββββ | 542/711 [24:17<06:35, 2.34s/it]
|
| 995 |
76%|ββββββββ | 543/711 [24:19<06:33, 2.34s/it]
|
| 996 |
77%|ββββββββ | 544/711 [24:21<06:29, 2.33s/it]
|
| 997 |
77%|ββββββββ | 545/711 [24:24<06:26, 2.33s/it]
|
| 998 |
77%|ββββββββ | 546/711 [24:26<06:23, 2.32s/it]
|
| 999 |
77%|ββββββββ | 547/711 [24:28<06:20, 2.32s/it]
|
| 1000 |
77%|ββββββββ | 548/711 [24:31<0
|
| 1001 |
+
0: {'loss': 0.474, 'grad_norm': 0.8194845306669991, 'learning_rate': 4.610931292117764e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.46}
|
| 1002 |
+
0: {'loss': 0.4615, 'grad_norm': 1.4968465915839815, 'learning_rate': 4.390179411698176e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.47}
|
| 1003 |
+
0: 6:18, 2.32s/it]
|
| 1004 |
77%|ββββββββ | 549/711 [24:33<06:15, 2.32s/it]
|
| 1005 |
77%|ββββββββ | 550/711 [24:35<06:13, 2.32s/it]
|
| 1006 |
|
| 1007 |
77%|ββββββββ | 550/711 [24:35<06:13, 2.32s/it]
|
| 1008 |
77%|ββββββββ | 551/711 [24:38<06:10, 2.32s/it]
|
| 1009 |
78%|ββββββββ | 552/711 [24:40<06:08, 2.32s/it]
|
| 1010 |
78%|ββββββββ | 553/711 [24:42<06:05, 2.32s/it]
|
| 1011 |
78%|ββββββββ | 554/711 [24:45<06:03, 2.31s/it]
|
| 1012 |
78%|ββββββββ | 555/711 [24:47<06:01, 2.32s/it]
|
| 1013 |
78%|ββββββββ | 556/711 [24:49<05:58, 2.32s/it]
|
| 1014 |
78%|ββββββββ | 557/711 [24:52<06:01, 2.35s/it]
|
| 1015 |
78%|ββββββββ | 558/711 [24:54<06:03, 2.38s/it]
|
| 1016 |
79%|ββββββββ | 559/711 [24:56<05:59, 2.37s/it]
|
| 1017 |
79%|ββββββββ | 560/711 [24:59<05:55, 2.35s/it]
|
| 1018 |
|
| 1019 |
79%|ββββββββ | 560/711 [24:59
|
| 1020 |
+
0: {'loss': 0.4737, 'grad_norm': 0.7375795650774891, 'learning_rate': 4.129040870719198e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.48}
|
| 1021 |
+
0: <05:55, 2.35s/it]
|
| 1022 |
79%|ββββββββ | 561/711 [25:01<05:51, 2.34s/it]
|
| 1023 |
79%|ββββββββ | 562/711 [25:03<05:47, 2.33s/it]
|
| 1024 |
79%|ββββββββ | 563/711 [25:06<05:47, 2.35s/it]
|
| 1025 |
79%|ββββββββ | 564/711 [25:08<05:43, 2.34s/it]
|
| 1026 |
79%|ββββββββ | 565/711 [25:11<05:48, 2.39s/it]
|
| 1027 |
80%|ββββββββ | 566/711 [25:13<05:43, 2.37s/it]
|
| 1028 |
80%|ββββββββ | 567/711 [25:15<05:41, 2.37s/it]
|
| 1029 |
80%|ββββββββ | 568/711 [25:18<05:36, 2.35s/it]
|
| 1030 |
80%|ββββββββ | 569/711 [25:20<05:32, 2.34s/it]
|
| 1031 |
80%|ββββββββ | 570/711 [25:22<05:28, 2.33s/it]
|
| 1032 |
|
| 1033 |
80%|ββββββββ | 570/711 [25:22<05:28, 2.33s/it]
|
| 1034 |
80%|ββββββββ | 571/711 [25:24<05:25, 2.33s/it]
|
| 1035 |
80%|ββββββββ | 572/711 [25:27<05:22, 2.32s/it]
|
| 1036 |
81%|ββββββββ | 573/711 [25:29<05:22, 2.34s/it]
|
| 1037 |
81%|ββββββββ
|
| 1038 |
+
0: {'loss': 0.4643, 'grad_norm': 0.7331473812508006, 'learning_rate': 3.833945766728859e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.49}
|
| 1039 |
+
0: | 574/711 [25:32<05:21, 2.35s/it]
|
| 1040 |
81%|ββββββββ | 575/711 [25:34<05:18, 2.34s/it]
|
| 1041 |
81%|ββββββββ | 576/711 [25:36<05:15, 2.33s/it]
|
| 1042 |
81%|ββββββββ | 577/711 [25:39<05:15, 2.35s/it]
|
| 1043 |
81%|βββββββββ | 578/711 [25:41<05:13, 2.36s/it]
|
| 1044 |
81%|βββββββββ | 579/711 [25:43<05:09, 2.35s/it]
|
| 1045 |
82%|βββββββββ | 580/711 [25:46<05:13, 2.40s/it]
|
| 1046 |
|
| 1047 |
82%|βββββββββ | 580/711 [25:46<05:13, 2.40s/it]
|
| 1048 |
82%|βββββββββ | 581/711 [25:48<05:08, 2.37s/it]
|
| 1049 |
82%|βββββββββ | 582/711 [25:50<05:03, 2.36s/it]
|
| 1050 |
82%|βββββββββ | 583/711 [25:53<05:00, 2.35s/it]
|
| 1051 |
82%|βββββββββ | 584/711 [25:55<04:56, 2.34s/it]
|
| 1052 |
82%|βββββββββ | 585/711 [25:57<04:53, 2.33s/it]
|
| 1053 |
82%|βββββββββ | 586/711 [26:00<04:50, 2.32s/it]
|
| 1054 |
83%|βββββββββ | 587/711 [26:02<04:50, 2
|
| 1055 |
+
0: {'loss': 0.4559, 'grad_norm': 0.7238676692923376, 'learning_rate': 3.512160320551906e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.5}
|
| 1056 |
+
0: .34s/it]
|
| 1057 |
83%|βββββββββ | 588/711 [26:04<04:48, 2.35s/it]
|
| 1058 |
83%|βββββββββ | 589/711 [26:07<04:47, 2.36s/it]
|
| 1059 |
83%|βββββββββ | 590/711 [26:09<04:47, 2.38s/it]
|
| 1060 |
|
| 1061 |
83%|βββββββββ | 590/711 [26:09<04:47, 2.38s/it]
|
| 1062 |
83%|βββββββββ | 591/711 [26:12<04:43, 2.36s/it]
|
| 1063 |
83%|βββββββββ | 592/711 [26:14<04:39, 2.35s/it]
|
| 1064 |
83%|βββββββββ | 593/711 [26:16<04:36, 2.34s/it]
|
| 1065 |
84%|βββββββββ | 594/711 [26:19<04:33, 2.33s/it]
|
| 1066 |
84%|βββββββββ | 595/711 [26:21<04:30, 2.33s/it]
|
| 1067 |
84%|βββββββββ | 596/711 [26:23<04:27, 2.33s/it]
|
| 1068 |
84%|βββββββββ | 597/711 [26:25<04:25, 2.32s/it]
|
| 1069 |
84%|βββββββββ | 598/711 [26:28<04:22, 2.32s/it]
|
| 1070 |
84%|βββββββββ | 599/711 [26:30<04:20, 2.32s/it]
|
| 1071 |
84%|βββββββββ | 600/711 [26:32<04:17, 2.32s/it]
|
| 1072 |
|
| 1073 |
+
0: {'loss': 0.4728, 'grad_norm': 0.7685550630490101, 'learning_rate': 3.171607957817881e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.51}
|
| 1074 |
+
0: {'loss': 0.452, 'grad_norm': 0.7387438977228977, 'learning_rate': 2.820674207925789e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.51}
|
| 1075 |
+
0:
|
| 1076 |
84%|βββββββββ | 600/711 [26:32<04:17, 2.32s/it]
|
| 1077 |
85%|βββββββββ | 601/711 [26:35<04:15, 2.32s/it]
|
| 1078 |
85%|βββββββββ | 602/711 [26:37<04:13, 2.32s/it]
|
| 1079 |
85%|βββββββββ | 603/711 [26:39<04:10, 2.32s/it]
|
| 1080 |
85%|βββββββββ | 604/711 [26:42<04:08, 2.32s/it]
|
| 1081 |
85%|βββββββββ | 605/711 [26:44<04:08, 2.34s/it]
|
| 1082 |
85%|βββββββββ | 606/711 [26:46<04:06, 2.35s/it]
|
| 1083 |
85%|βββββββββ | 607/711 [26:49<04:03, 2.34s/it]
|
| 1084 |
86%|βββββββββ | 608/711 [26:51<04:00, 2.33s/it]
|
| 1085 |
86%|βββββββββ | 609/711 [26:53<03:57, 2.33s/it]
|
| 1086 |
86%|βββββββββ | 610/711 [26:56<03:55, 2.33s/it]
|
| 1087 |
|
| 1088 |
86%|βββββββββ | 610/711 [26:56<03:55, 2.33s/it]
|
| 1089 |
86%|βββββββββ | 611/711 [26:58<03:52, 2.32s/it]
|
| 1090 |
86%|βββββββββ | 612/711 [27:00<
|
| 1091 |
+
0: {'loss': 0.4531, 'grad_norm': 0.7268143927347638, 'learning_rate': 2.4680002244803154e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.52}
|
| 1092 |
+
0: 03:49, 2.32s/it]
|
| 1093 |
86%|βββββββββ | 613/711 [27:03<03:47, 2.32s/it]
|
| 1094 |
86%|βββββββββ | 614/711 [27:05<03:44, 2.32s/it]
|
| 1095 |
86%|βββββββββ | 615/711 [27:07<03:42, 2.32s/it]
|
| 1096 |
87%|βββββββββ | 616/711 [27:10<03:40, 2.32s/it]
|
| 1097 |
87%|βββββββββ | 617/711 [27:12<03:37, 2.32s/it]
|
| 1098 |
87%|βββββββββ | 618/711 [27:14<03:38, 2.35s/it]
|
| 1099 |
87%|βββββββββ | 619/711 [27:17<03:34, 2.34s/it]
|
| 1100 |
87%|βββββββββ | 620/711 [27:19<03:32, 2.33s/it]
|
| 1101 |
|
| 1102 |
87%|βββββββββ | 620/711 [27:19<03:32, 2.33s/it]
|
| 1103 |
87%|βββββββββ | 621/711 [27:21<03:31, 2.35s/it]
|
| 1104 |
87%|βββββββββ | 622/711 [27:24<03:29, 2.36s/it]
|
| 1105 |
88%|βββββββββ | 623/711 [27:26<03:28, 2.36s/it]
|
| 1106 |
88%|βββββββββ | 624/711 [27:28<03:24, 2.35s/it]
|
| 1107 |
88%|βββββββββ | 625/711 [27:31<03:21, 2.34s/it]
|
| 1108 |
88
|
| 1109 |
+
0: {'loss': 0.4502, 'grad_norm': 0.6625178435258192, 'learning_rate': 2.1222700114117344e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.53}
|
| 1110 |
+
0: %|βββββββββ | 626/711 [27:33<03:18, 2.33s/it]
|
| 1111 |
88%|βββββββββ | 627/711 [27:35<03:15, 2.33s/it]
|
| 1112 |
88%|βββββββββ | 628/711 [27:38<03:12, 2.32s/it]
|
| 1113 |
88%|βββββββββ | 629/711 [27:40<03:10, 2.32s/it]
|
| 1114 |
89%|βββββββββ | 630/711 [27:42<03:10, 2.35s/it]
|
| 1115 |
|
| 1116 |
89%|βββββββββ | 630/711 [27:42<03:10, 2.35s/it]
|
| 1117 |
89%|βββββββββ | 631/711 [27:45<03:07, 2.35s/it]
|
| 1118 |
89%|βββββββββ | 632/711 [27:47<03:09, 2.39s/it]
|
| 1119 |
89%|βββββββββ | 633/711 [27:50<03:04, 2.37s/it]
|
| 1120 |
89%|βββββββββ | 634/711 [27:52<03:00, 2.35s/it]
|
| 1121 |
89%|βββββββββ | 635/711 [27:54<02:57, 2.34s/it]
|
| 1122 |
89%|βββββββββ | 636/711 [27:57<02:56, 2.35s/it]
|
| 1123 |
90%|βββββββββ | 637/711 [27:59<02:55, 2.37s/it]
|
| 1124 |
90%|βββββββββ | 638/711 [28:01<02:52, 2.36s/it]
|
| 1125 |
90%|ββββββοΏ½
|
| 1126 |
+
0: {'loss': 0.455, 'grad_norm': 0.7830830598414732, 'learning_rate': 1.7919965939785867e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.54}
|
| 1127 |
+
0: {'loss': 0.4578, 'grad_norm': 0.6885412177331103, 'learning_rate': 1.4853123998327068e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.55}
|
| 1128 |
+
0: οΏ½οΏ½ββ | 639/711 [28:04<02:49, 2.35s/it]
|
| 1129 |
90%|βββββββββ | 640/711 [28:06<02:46, 2.34s/it]
|
| 1130 |
|
| 1131 |
90%|βββββββββ | 640/711 [28:06<02:46, 2.34s/it]
|
| 1132 |
90%|βββββββββ | 641/711 [28:08<02:43, 2.33s/it]
|
| 1133 |
90%|βββββββββ | 642/711 [28:11<02:40, 2.33s/it]
|
| 1134 |
90%|βββββββββ | 643/711 [28:13<02:38, 2.33s/it]
|
| 1135 |
91%|βββββββββ | 644/711 [28:15<02:35, 2.33s/it]
|
| 1136 |
91%|βββββββββ | 645/711 [28:18<02:35, 2.35s/it]
|
| 1137 |
91%|βββββββββ | 646/711 [28:20<02:32, 2.34s/it]
|
| 1138 |
91%|βββββββββ | 647/711 [28:22<02:29, 2.34s/it]
|
| 1139 |
91%|βββββββββ | 648/711 [28:25<02:27, 2.33s/it]
|
| 1140 |
91%|ββββββββββ| 649/711 [28:27<02:24, 2.33s/it]
|
| 1141 |
91%|ββββββββββ| 650/711 [28:29<02:23, 2.35s/it]
|
| 1142 |
|
| 1143 |
91%|ββββββββββ| 650/711 [28:29<02:23,
|
| 1144 |
+
0: {'loss': 0.4629, 'grad_norm': 0.7008874020320417, 'learning_rate': 1.2097690116604504e-06, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.56}
|
| 1145 |
+
0: 2.35s/it]
|
| 1146 |
92%|ββββββββββ| 651/711 [28:32<02:20, 2.34s/it]
|
| 1147 |
92%|ββββββββββ| 652/711 [28:34<02:17, 2.33s/it]
|
| 1148 |
92%|ββββββββββ| 653/711 [28:36<02:17, 2.37s/it]
|
| 1149 |
92%|ββββββββββ| 654/711 [28:39<02:14, 2.36s/it]
|
| 1150 |
92%|ββββββββββ| 655/711 [28:41<02:11, 2.35s/it]
|
| 1151 |
92%|ββββββββββ| 656/711 [28:44<02:10, 2.37s/it]
|
| 1152 |
92%|ββββββββββ| 657/711 [28:46<02:06, 2.35s/it]
|
| 1153 |
93%|ββββββββββ| 658/711 [28:48<02:04, 2.34s/it]
|
| 1154 |
93%|ββββββββββ| 659/711 [28:51<02:01, 2.33s/it]
|
| 1155 |
93%|ββββββββββ| 660/711 [28:53<01:58, 2.33s/it]
|
| 1156 |
|
| 1157 |
93%|ββββββββββ| 660/711 [28:53<01:58, 2.33s/it]
|
| 1158 |
93%|ββββββββββ| 661/711 [28:55<01:57, 2.35s/it]
|
| 1159 |
93%|ββββββββββ| 662/711 [28:58<01:54, 2.34s/it]
|
| 1160 |
93%|ββββββββββ| 663/711 [29:00
|
| 1161 |
+
0: {'loss': 0.4435, 'grad_norm': 0.698029654257462, 'learning_rate': 9.721512221546967e-07, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.56}
|
| 1162 |
+
0: <01:51, 2.33s/it]
|
| 1163 |
93%|ββββββββββ| 664/711 [29:02<01:49, 2.33s/it]
|
| 1164 |
94%|ββββββββββ| 665/711 [29:05<01:46, 2.32s/it]
|
| 1165 |
94%|ββββββββββ| 666/711 [29:07<01:44, 2.32s/it]
|
| 1166 |
94%|ββββββββββ| 667/711 [29:09<01:42, 2.32s/it]
|
| 1167 |
94%|ββββββββββ| 668/711 [29:11<01:39, 2.32s/it]
|
| 1168 |
94%|ββββββββββ| 669/711 [29:14<01:38, 2.34s/it]
|
| 1169 |
94%|ββββββββββ| 670/711 [29:16<01:36, 2.36s/it]
|
| 1170 |
|
| 1171 |
94%|ββββββββββ| 670/711 [29:16<01:36, 2.36s/it]
|
| 1172 |
94%|ββββββββββ| 671/711 [29:19<01:34, 2.35s/it]
|
| 1173 |
95%|ββββββββββ| 672/711 [29:21<01:31, 2.34s/it]
|
| 1174 |
95%|ββββββββββ| 673/711 [29:23<01:28, 2.34s/it]
|
| 1175 |
95%|ββββββββββ| 674/711 [29:26<01:26, 2.33s/it]
|
| 1176 |
95%|ββββββββββ| 675/711 [29:28<01:24, 2.33s/it]
|
| 1177 |
95%|ββββββββββ| 676/711
|
| 1178 |
+
0: {'loss': 0.4649, 'grad_norm': 0.7032914493453137, 'learning_rate': 7.783099699013075e-07, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.57}
|
| 1179 |
+
0: [29:30<01:21, 2.33s/it]
|
| 1180 |
95%|ββββββββββ| 677/711 [29:33<01:19, 2.33s/it]
|
| 1181 |
95%|ββββββββββ| 678/711 [29:35<01:16, 2.32s/it]
|
| 1182 |
95%|ββββββββββ| 679/711 [29:37<01:14, 2.32s/it]
|
| 1183 |
96%|ββββββββββ| 680/711 [29:39<01:11, 2.32s/it]
|
| 1184 |
|
| 1185 |
96%|ββββββββββ| 680/711 [29:39<01:11, 2.32s/it]
|
| 1186 |
96%|ββββββββββ| 681/711 [29:42<01:09, 2.32s/it]
|
| 1187 |
96%|ββββββββββ| 682/711 [29:44<01:08, 2.36s/it]
|
| 1188 |
96%|ββββββββββ| 683/711 [29:47<01:05, 2.35s/it]
|
| 1189 |
96%|ββββββββββ| 684/711 [29:49<01:03, 2.34s/it]
|
| 1190 |
96%|ββββββββββ| 685/711 [29:51<01:01, 2.36s/it]
|
| 1191 |
96%|ββββββββββ| 686/711 [29:54<00:58, 2.36s/it]
|
| 1192 |
97%|ββββββββββ| 687/711 [29:56<00:56, 2.35s/it]
|
| 1193 |
97%|ββββββββββ| 688/711 [29:58<00:53, 2.34s/it]
|
| 1194 |
97%|ββββββββββ|
|
| 1195 |
+
0: {'loss': 0.4638, 'grad_norm': 0.7522821052393728, 'learning_rate': 6.330182698529928e-07, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.58}
|
| 1196 |
+
0: {'loss': 0.456, 'grad_norm': 0.6485448600183656, 'learning_rate': 5.398536858604507e-07, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.59}
|
| 1197 |
+
0: 689/711 [30:01<00:51, 2.33s/it]
|
| 1198 |
97%|ββββββββββ| 690/711 [30:03<00:48, 2.32s/it]
|
| 1199 |
|
| 1200 |
97%|ββββββββββ| 690/711 [30:03<00:48, 2.32s/it]
|
| 1201 |
97%|ββββββββββ| 691/711 [30:05<00:46, 2.34s/it]
|
| 1202 |
97%|ββββββββββ| 692/711 [30:08<00:44, 2.33s/it]
|
| 1203 |
97%|ββββββββββ| 693/711 [30:10<00:41, 2.33s/it]
|
| 1204 |
98%|ββββββββββ| 694/711 [30:12<00:39, 2.33s/it]
|
| 1205 |
98%|ββββββββββ| 695/711 [30:15<00:37, 2.32s/it]
|
| 1206 |
98%|ββββββββββ| 696/711 [30:17<00:34, 2.32s/it]
|
| 1207 |
98%|ββββββββββ| 697/711 [30:19<00:32, 2.32s/it]
|
| 1208 |
98%|ββββββββββ| 698/711 [30:22<00:30, 2.32s/it]
|
| 1209 |
98%|ββββββββββ| 699/711 [30:24<00:27, 2.32s/it]
|
| 1210 |
98%|ββββββββββ| 700/711 [30:26<00:25, 2.34s/it]
|
| 1211 |
|
| 1212 |
98%|ββββββββββ| 700/711 [30:
|
| 1213 |
+
0: {'loss': 0.4527, 'grad_norm': 0.6894880207361598, 'learning_rate': 5.011102391771039e-07, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.6}
|
| 1214 |
+
0: [2025-11-24 00:42:38,710] [INFO] [axolotl.core.trainers.base._save:613] [PID:1912876] [RANK:0] Saving model checkpoint to /lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-4b/0/checkpoint-711[39m
|
| 1215 |
+
0: [2025-11-24 00:42:45,711] [INFO] [axolotl.core.trainers.base._save:662] [PID:1912876] [RANK:0] Saving Trainer.data_collator.tokenizer by default as Trainer.processing_class is `None`[39m
|
| 1216 |
+
0: {'train_runtime': 1863.7709, 'train_samples_per_second': 6.104, 'train_steps_per_second': 0.381, 'train_loss': 0.49791947950290727, 'memory/max_mem_active(gib)': 58.47, 'memory/max_mem_allocated(gib)': 57.09, 'memory/device_mem_reserved(gib)': 68.71, 'epoch': 0.6}
|
| 1217 |
+
0: 26<00:25, 2.34s/it]
|
| 1218 |
99%|ββββββββββ| 701/711 [30:29<00:23, 2.37s/it]
|
| 1219 |
99%|ββββββββββ| 702/711 [30:31<00:21, 2.37s/it]
|
| 1220 |
99%|ββββββββββ| 703/711 [30:33<00:18, 2.35s/it]
|
| 1221 |
99%|ββββββββββ| 704/711 [30:36<00:16, 2.35s/it]
|
| 1222 |
99%|ββββββββββ| 705/711 [30:38<00:14, 2.36s/it]
|
| 1223 |
99%|ββββββββββ| 706/711 [30:40<00:11, 2.34s/it]
|
| 1224 |
99%|ββββββββββ| 707/711 [30:43<00:09, 2.42s/it]
|
| 1225 |
|
| 1226 |
|
| 1227 |
+
0: .49s/it]
|
| 1228 |
+
0: [2025-11-24 00:42:49,004] [INFO] [axolotl.train.save_trained_model:228] [PID:1912876] [RANK:0] Training completed! Saving trained model to /lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-4b/0.[39m
|
| 1229 |
+
0: [2025-11-24 00:42:51,675] [INFO] [axolotl.core.trainers.base._save:613] [PID:1912876] [RANK:0] Saving model checkpoint to /lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-4b/0[39m
|
| 1230 |
+
0: [2025-11-24 00:42:58,432] [INFO] [axolotl.core.trainers.base._save:662] [PID:1912876] [RANK:0] Saving Trainer.data_collator.tokenizer by default as Trainer.processing_class is `None`[39m
|
| 1231 |
+
0: [2025-11-24 00:42:59,125] [INFO] [axolotl.train.save_trained_model:350] [PID:1912876] [RANK:0] Model successfully saved to /lustre/fswork/projects/rech/dgo/udv55np/ift/Nemotron-Super-49B-v1_5/gemma-3-4b/0[39m
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"boi_token": "<start_of_image>",
|
| 3 |
+
"bos_token": {
|
| 4 |
+
"content": "<bos>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false
|
| 9 |
+
},
|
| 10 |
+
"eoi_token": "<end_of_image>",
|
| 11 |
+
"eos_token": {
|
| 12 |
+
"content": "<eos>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false
|
| 17 |
+
},
|
| 18 |
+
"image_token": "<image_soft_token>",
|
| 19 |
+
"pad_token": {
|
| 20 |
+
"content": "<pad>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false
|
| 25 |
+
},
|
| 26 |
+
"unk_token": {
|
| 27 |
+
"content": "<unk>",
|
| 28 |
+
"lstrip": false,
|
| 29 |
+
"normalized": false,
|
| 30 |
+
"rstrip": false,
|
| 31 |
+
"single_word": false
|
| 32 |
+
}
|
| 33 |
+
}
|
tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795
|
| 3 |
+
size 33384568
|
tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
|
| 3 |
+
size 4689074
|
tokenizer_config.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6e7d4fb6306d6b78ab8fbed42c85a3ca8c24572a732b2e2c29fe3ef0a1ac7eff
|
| 3 |
+
size 10424
|