Spaces:

anfastech
/

zlaqa-version-b-ai-enginee

Sleeping

App Files Files Community

anfastech commited on Dec 7, 2025

Commit

13a1b12

1 Parent(s): 900bd70

Changing the model to [facebook/mms-1b-all] to [ai4bharat/indicwav2vec-hindi]

Browse files

Files changed (1) hide show

diagnosis/ai_engine/detect_stuttering.py +18 -29

diagnosis/ai_engine/detect_stuttering.py CHANGED Viewed

@@ -22,7 +22,7 @@ from sklearn.ensemble import IsolationForest
 logger = logging.getLogger(__name__)
 # === CONFIGURATION ===
-MODEL_ID = "facebook/mms-1b-all"
 LID_MODEL_ID = "facebook/mms-lid-126"
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
@@ -138,17 +138,14 @@ class AdvancedStutterDetector:
     def __init__(self):
         logger.info(f"🚀 Initializing Advanced AI Engine on {DEVICE}...")
         try:
-            # Wav2Vec2 Model Loading
             self.processor = AutoProcessor.from_pretrained(MODEL_ID)
             self.model = Wav2Vec2ForCTC.from_pretrained(
                 MODEL_ID,
-                torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
-                target_lang="eng",
-                ignore_mismatched_sizes=True
             ).to(DEVICE)
             self.model.eval()
-            self.loaded_adapters = set()
-            self._init_common_adapters()
             # Anomaly Detection Model (for outlier stutter events)
             self.anomaly_detector = IsolationForest(
@@ -162,12 +159,9 @@ class AdvancedStutterDetector:
             raise
     def _init_common_adapters(self):
-        """Preload common language adapters"""
-        for code in ['eng', 'hin']:
-            try:
-                self.model.load_adapter(code)
-                self.loaded_adapters.add(code)
-            except: pass
     def _detect_language_robust(self, audio_path: str) -> str:
         """Detect language using MMS LID model"""
@@ -190,18 +184,12 @@ class AdvancedStutterDetector:
             return 'eng'
     def _activate_adapter(self, lang_code: str):
-        """Activate language adapter for MMS model"""
-        if lang_code not in self.loaded_adapters:
-            try:
-                self.model.load_adapter(lang_code)
-                self.loaded_adapters.add(lang_code)
-            except Exception as e:
-                logger.warning(f"Failed to load adapter {lang_code}: {e}")
-        try:
-            self.model.set_adapter(lang_code)
-        except Exception as e:
-            logger.warning(f"Failed to activate adapter {lang_code}: {e}")
     def _extract_comprehensive_features(self, audio: np.ndarray, sr: int, audio_path: str) -> Dict[str, Any]:
         """Extract multi-modal acoustic features"""
@@ -666,10 +654,11 @@ class AdvancedStutterDetector:
         start_time = time.time()
         # === STEP 1: Language Detection & Setup ===
         if language == 'auto':
             lang_code = self._detect_language_robust(audio_path)
         else:
-            lang_code = INDIAN_LANGUAGES.get(language.lower(), 'eng')
         self._activate_adapter(lang_code)
         # === STEP 2: Audio Loading & Preprocessing ===
@@ -775,16 +764,16 @@ class AdvancedStutterDetector:
                 'energy_entropy': float(np.mean(features['energy_entropy']))
             },
             'analysis_duration_seconds': round(time.time() - start_time, 2),
-            'model_version': f'advanced-research-v2-{lang_code}'
         }
     # Legacy methods - kept for backward compatibility but may not work without additional model initialization
     # These methods reference models (xlsr, base, large) that are not initialized in __init__
-    # The main analyze_audio() method uses the MMS model instead
     def generate_target_transcript(self, audio_file: str) -> str:
-        """Generate expected transcript - Legacy method (uses main MMS model)"""
         try:
             audio, sr = librosa.load(audio_file, sr=16000)
             transcript, _, _ = self._transcribe_with_timestamps(audio)

 logger = logging.getLogger(__name__)
 # === CONFIGURATION ===
+MODEL_ID = "ai4bharat/indicwav2vec-hindi"
 LID_MODEL_ID = "facebook/mms-lid-126"
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
     def __init__(self):
         logger.info(f"🚀 Initializing Advanced AI Engine on {DEVICE}...")
         try:
+            # Wav2Vec2 Model Loading - IndicWav2Vec Hindi Model
             self.processor = AutoProcessor.from_pretrained(MODEL_ID)
             self.model = Wav2Vec2ForCTC.from_pretrained(
                 MODEL_ID,
+                torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
             ).to(DEVICE)
             self.model.eval()
+            self.loaded_adapters = set()  # Keep for backward compatibility but not used with indicwav2vec
             # Anomaly Detection Model (for outlier stutter events)
             self.anomaly_detector = IsolationForest(
             raise
     def _init_common_adapters(self):
+        """Preload common language adapters - Not applicable for indicwav2vec-hindi"""
+        # IndicWav2Vec Hindi model is pre-trained for Hindi, no adapters needed
+        pass
     def _detect_language_robust(self, audio_path: str) -> str:
         """Detect language using MMS LID model"""
             return 'eng'
     def _activate_adapter(self, lang_code: str):
+        """Activate language adapter - Not applicable for indicwav2vec-hindi"""
+        # IndicWav2Vec Hindi model is pre-trained for Hindi, no adapter switching needed
+        # Log for debugging but no action required
+        if lang_code != 'hin':
+            logger.info(f"Note: Using Hindi-specific model (indicwav2vec-hindi), language code '{lang_code}' requested but model is optimized for Hindi")
+        pass
     def _extract_comprehensive_features(self, audio: np.ndarray, sr: int, audio_path: str) -> Dict[str, Any]:
         """Extract multi-modal acoustic features"""
         start_time = time.time()
         # === STEP 1: Language Detection & Setup ===
+        # Note: indicwav2vec-hindi is optimized for Hindi, but can handle other languages
         if language == 'auto':
             lang_code = self._detect_language_robust(audio_path)
         else:
+            lang_code = INDIAN_LANGUAGES.get(language.lower(), 'hin')  # Default to Hindi for indicwav2vec
         self._activate_adapter(lang_code)
         # === STEP 2: Audio Loading & Preprocessing ===
                 'energy_entropy': float(np.mean(features['energy_entropy']))
             },
             'analysis_duration_seconds': round(time.time() - start_time, 2),
+            'model_version': f'indicwav2vec-hindi-v1-{lang_code}'
         }
     # Legacy methods - kept for backward compatibility but may not work without additional model initialization
     # These methods reference models (xlsr, base, large) that are not initialized in __init__
+    # The main analyze_audio() method uses the IndicWav2Vec Hindi model instead
     def generate_target_transcript(self, audio_file: str) -> str:
+        """Generate expected transcript - Legacy method (uses IndicWav2Vec Hindi model)"""
         try:
             audio, sr = librosa.load(audio_file, sr=16000)
             transcript, _, _ = self._transcribe_with_timestamps(audio)