Spaces:

maomao88
/

model_structure_viewer

Running

maomao88 commited on Jan 26

Commit

24b8880

1 Parent(s): b0223ae

add support for vlm

Files changed (13) hide show

.gitignore ADDED Viewed

+# Byte-compiled / optimized / DLL files
+ backend/__pycache__/
+*.pyc
+# PyCharm settings (can be sensitive or specific to your machine)
+.idea/
+backend/.idea/

.idea/.gitignore DELETED Viewed

@@ -1,3 +0,0 @@
-# Default ignored files
-/shelf/
-/workspace.xml

.idea/inspectionProfiles/profiles_settings.xml DELETED Viewed

@@ -1,6 +0,0 @@
-<component name="InspectionProjectProfileManager">
-  <settings>
-    <option name="USE_PROJECT_PROFILE" value="false" />
-    <version value="1.0" />
-  </settings>
-</component>

.idea/misc.xml DELETED Viewed

@@ -1,7 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="Black">
-    <option name="sdkName" value="Python 3.13" />
-  </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13 (model_structure_viewer)" project-jdk-type="Python SDK" />
-</project>

.idea/model_structure_viewer.iml DELETED Viewed

@@ -1,10 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<module type="PYTHON_MODULE" version="4">
-  <component name="NewModuleRootManager">
-    <content url="file://$MODULE_DIR$">
-      <excludeFolder url="file://$MODULE_DIR$/.venv" />
-    </content>
-    <orderEntry type="jdk" jdkName="Python 3.13 (model_structure_viewer)" jdkType="Python SDK" />
-    <orderEntry type="sourceFolder" forTests="false" />
-  </component>
-</module>

.idea/modules.xml DELETED Viewed

@@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectModuleManager">
-    <modules>
-      <module fileurl="file://$PROJECT_DIR$/.idea/model_structure_viewer.iml" filepath="$PROJECT_DIR$/.idea/model_structure_viewer.iml" />
-    </modules>
-  </component>
-</project>

.idea/vcs.xml DELETED Viewed

@@ -1,6 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="VcsDirectoryMappings">
-    <mapping directory="" vcs="Git" />
-  </component>
-</project>

backend/__pycache__/app.cpython-313.pyc DELETED Viewed

Binary file (2.22 kB)

backend/__pycache__/hf_model_utils.cpython-313.pyc DELETED Viewed

Binary file (6.87 kB)

backend/hf_model_utils.py CHANGED Viewed

@@ -3,7 +3,18 @@ import torch.nn as nn
 import json
 import hashlib
 import gc
-from transformers import AutoConfig, AutoModel, AutoModelForCausalLM, AutoModelForMaskedLM, AutoModelForSequenceClassification, AutoModelForTokenClassification, AutoModelForQuestionAnswering, AutoModelForSeq2SeqLM, AutoModelForImageClassification
 from accelerate import init_empty_weights
@@ -104,10 +115,20 @@ def get_model_structure(model_name: str, model_type: str | None):
         config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
         with init_empty_weights():
             model = AutoModelForImageClassification.from_config(config)
     else:
         config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
-        with torch.device("meta"):
-            model = AutoModel.from_config(config, trust_remote_code=True)
     structure = {
         "model_type": config.model_type,

 import json
 import hashlib
 import gc
+from transformers import (
+    AutoConfig,
+    AutoModel,
+    AutoModelForCausalLM,
+    AutoModelForMaskedLM,
+    AutoModelForSequenceClassification,
+    AutoModelForTokenClassification,
+    AutoModelForQuestionAnswering,
+    AutoModelForSeq2SeqLM,
+    AutoModelForImageClassification,
+    AutoModelForImageTextToText
+)
 from accelerate import init_empty_weights
         config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
         with init_empty_weights():
             model = AutoModelForImageClassification.from_config(config)
+    elif model_type == "vlm":
+        config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
+        with init_empty_weights():
+            model = AutoModelForImageTextToText.from_config(config, trust_remote_code=True)
     else:
         config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
+        if hasattr(config, "vision_config"):
+            # It's a VLM
+            with init_empty_weights():
+                model = AutoModelForImageTextToText.from_config(config, trust_remote_code=True)
+        else:
+            # It's a standard model
+            with init_empty_weights():
+                model = AutoModel.from_config(config, trust_remote_code=True)
     structure = {
         "model_type": config.model_type,

backend/requirements.txt CHANGED Viewed

@@ -31,7 +31,7 @@ sympy==1.14.0
 tokenizers==0.22.0
 torch==2.8.0
 tqdm==4.67.1
-transformers==4.56.1
 typing-inspection==0.4.1
 typing_extensions==4.15.0
 urllib3==2.5.0
@@ -39,4 +39,4 @@ uvicorn==0.35.0
 uvloop==0.21.0
 watchfiles==1.1.0
 websockets==15.0.1
-accelerate==1.10.1

 tokenizers==0.22.0
 torch==2.8.0
 tqdm==4.67.1
+transformers>=4.57.0
 typing-inspection==0.4.1
 typing_extensions==4.15.0
 urllib3==2.5.0
 uvloop==0.21.0
 watchfiles==1.1.0
 websockets==15.0.1
+accelerate==1.12.0

frontend/src/components/ModelInputBar.jsx CHANGED Viewed

@@ -11,6 +11,7 @@ export default function ModelInputBar({ loading, fetchModelStructure }) {
         { label: "Question Answering Models (e.g. BERT QA, RoBERTa QA)", value: "qa", default: "distilbert-base-uncased-distilled-squad" },
         { label: "Seq2Seq (encoder-decoder, e.g. T5, BART, MarianMT)", value: "s2s", default: "t5-base" },
         { label: "Vision models (image classification, CLIP vision tower, etc.)", value: "vision", default: "google/vit-base-patch16-224" },
     ];
     const [modelName, setModelName] = useState("");

         { label: "Question Answering Models (e.g. BERT QA, RoBERTa QA)", value: "qa", default: "distilbert-base-uncased-distilled-squad" },
         { label: "Seq2Seq (encoder-decoder, e.g. T5, BART, MarianMT)", value: "s2s", default: "t5-base" },
         { label: "Vision models (image classification, CLIP vision tower, etc.)", value: "vision", default: "google/vit-base-patch16-224" },
+        { label: "Vision-Language models (image-text models, Qwen-VL, etc.)", value: "vlm", default: "Qwen/Qwen3-VL-8B-Instruct" },
     ];
     const [modelName, setModelName] = useState("");

package-lock.json ADDED Viewed

+{
+  "name": "model_structure_viewer",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {}
+}