Arlow-Vision-Encoder / config.json
yuchenxie's picture
Upload folder using huggingface_hub
49d4668 verified
{
"architectures": [
"ArlowVLVisionModel"
],
"deepstack_visual_indexes": [
12,
24,
44
],
"deformable_attention_strength": 4.0,
"deformable_attention_window": 0.25,
"depth": 48,
"dtype": "float16",
"embed_dim": 1536,
"hidden_act": "gelu_pytorch_tanh",
"hidden_size": 3072,
"in_channels": 3,
"initializer_range": 0.02,
"max_position_embeddings": 32768,
"mlp_ratio": 4,
"model_type": "arlow",
"mrope_sections": [
21,
21,
22
],
"num_attention_heads": 24,
"num_heads": 24,
"patch_size": 14,
"spatial_merge_size": 2,
"temporal_patch_size": 2,
"token_pruning_ratio": 0.0,
"transformers_version": "5.3.0.dev0",
"use_deformable_attention": true,
"use_progressive_patches": true
}