yuchenxie
/

Arlow-Vision-Encoder

Text Generation

Model card Files Files and versions

Arlow-Vision-Encoder / config.json

yuchenxie's picture

Upload folder using huggingface_hub

49d4668 verified about 2 months ago

history blame contribute delete

753 Bytes

	{
	"architectures": [
	"ArlowVLVisionModel"
	],
	"deepstack_visual_indexes": [
	12,
	24,
	44
	],
	"deformable_attention_strength": 4.0,
	"deformable_attention_window": 0.25,
	"depth": 48,
	"dtype": "float16",
	"embed_dim": 1536,
	"hidden_act": "gelu_pytorch_tanh",
	"hidden_size": 3072,
	"in_channels": 3,
	"initializer_range": 0.02,
	"max_position_embeddings": 32768,
	"mlp_ratio": 4,
	"model_type": "arlow",
	"mrope_sections": [
	21,
	21,
	22
	],
	"num_attention_heads": 24,
	"num_heads": 24,
	"patch_size": 14,
	"spatial_merge_size": 2,
	"temporal_patch_size": 2,
	"token_pruning_ratio": 0.0,
	"transformers_version": "5.3.0.dev0",
	"use_deformable_attention": true,
	"use_progressive_patches": true
	}