Spaces:

nivakaran
/

modelx

Running

App Files Files Community

nivakaran commited on 5 days ago

Commit

eb6b502

verified ·

1 Parent(s): 752f5cc

Upload folder using huggingface_hub

Browse files

Files changed (18) hide show

.gitattributes +2 -0
README.md +24 -10
create_visualization.py +120 -0
frontend/app/components/dashboard/TrendingTopics.tsx +192 -0
main.py +107 -0
models/anomaly-detection/src/components/model_trainer.py +35 -1
src/api/vectorization_api.py +7 -0
src/graphs/vectorizationAgentGraph.py +13 -8
src/nodes/vectorizationAgentNode.py +205 -4
src/rag.py +60 -9
src/states/vectorizationAgentState.py +4 -0
test_multilingual_anomaly.py +54 -0
test_trending_integration.py +80 -0
test_vectorizer_and_anomaly.py +379 -0
trending_detection_visualization.png +3 -0
vectorizer_anomaly_visualization.png +3 -0
vectorizer_graph_flow.txt +52 -0
visualize_trending.py +173 -0

.gitattributes CHANGED Viewed

@@ -34,3 +34,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 ModelX[[:space:]]Final[[:space:]]Problem.pdf filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 ModelX[[:space:]]Final[[:space:]]Problem.pdf filter=lfs diff=lfs merge=lfs -text
+trending_detection_visualization.png filter=lfs diff=lfs merge=lfs -text
+vectorizer_anomaly_visualization.png filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -345,7 +345,7 @@ DELETE /api/intel/config/remove?target_type=profile&value=CompetitorX&platform=t
 ---
 ### 8. Vectorization Agent Graph (`vectorizationAgentGraph.py`) 🆕
-**Multilingual Text-to-Vector Conversion + Anomaly Detection**
 ```
 ┌─────────────────────────────────────────────────┐
@@ -366,28 +366,42 @@ DELETE /api/intel/config/remove?target_type=profile&value=CompetitorX&platform=t
                   │
                   ▼
 ┌─────────────────────────────────────────────────┐
-│ Step 3: Anomaly Detection (Isolation Forest) 🆕 │
-│ - Runs inference on every graph cycle           │
 │ - Outputs anomaly_score (0-1)                   │
-│ - Graceful fallback if model not trained        │
 └─────────────────┬───────────────────────────────┘
                   │
                   ▼
 ┌─────────────────────────────────────────────────┐
-│ Step 4: Expert Summary (GroqLLM)                │
-│ - Opportunity identification                    │
-│ - Threat detection                              │
 │ - Sentiment analysis                            │
 └─────────────────┬───────────────────────────────┘
                   │
                   ▼
 ┌─────────────────────────────────────────────────┐
-│ Step 5: Format Output                           │
-│ - Includes anomaly insights in domain_insights  │
-│ - Passes results to parent graph                │
 └─────────────────────────────────────────────────┘
 ```
 ---
 ### 10. Weather Prediction Pipeline (`models/weather-prediction/`) 🆕

 ---
 ### 8. Vectorization Agent Graph (`vectorizationAgentGraph.py`) 🆕
+**6-Step Multilingual NLP Pipeline with Anomaly + Trending Detection**
 ```
 ┌─────────────────────────────────────────────────┐
                   │
                   ▼
 ┌─────────────────────────────────────────────────┐
+│ Step 3: Anomaly Detection (Isolation Forest)    │
+│ - English: ML model inference                    │
+│ - Sinhala/Tamil: Skipped (incompatible vectors) │
 │ - Outputs anomaly_score (0-1)                   │
 └─────────────────┬───────────────────────────────┘
                   │
                   ▼
 ┌─────────────────────────────────────────────────┐
+│ Step 4: Trending Detection 🆕                    │
+│ - Entity extraction (hashtags, proper nouns)    │
+│ - Momentum: current_hour / avg_last_6_hours     │
+│ - Spike alerts when momentum > 3x               │
+└─────────────────┬───────────────────────────────┘
+                  │
+                  ▼
+┌─────────────────────────────────────────────────┐
+│ Step 5: Expert Summary (GroqLLM)                │
+│ - Opportunity & threat identification           │
 │ - Sentiment analysis                            │
 └─────────────────┬───────────────────────────────┘
                   │
                   ▼
 ┌─────────────────────────────────────────────────┐
+│ Step 6: Format Output                           │
+│ - Includes anomaly + trending in domain_insights│
 └─────────────────────────────────────────────────┘
 ```
+**Trending Detection API Endpoints:**
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `/api/trending` | GET | Get trending topics & spike alerts |
+| `/api/trending/topic/{topic}` | GET | Get hourly history for a topic |
+| `/api/trending/record` | POST | Record a topic mention (testing) |
 ---
 ### 10. Weather Prediction Pipeline (`models/weather-prediction/`) 🆕

create_visualization.py ADDED Viewed

	@@ -0,0 +1,120 @@

+"""
+create_visualization.py
+Creates visualization of multilingual embeddings and anomaly detection results
+using actual training data.
+"""
+import sys
+import numpy as np
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+from pathlib import Path
+from sklearn.decomposition import PCA
+import joblib
+if sys.platform == 'win32':
+    sys.stdout.reconfigure(encoding='utf-8')
+print("=" * 60)
+print("  CREATING VECTORIZER VISUALIZATION")
+print("=" * 60)
+# Load saved embeddings from the training pipeline
+embeddings_path = Path('models/anomaly-detection/artifacts/data_transformation')
+# Find the latest embeddings file
+emb_files = list(embeddings_path.glob('embeddings_*.npy'))
+if emb_files:
+    latest_emb = sorted(emb_files)[-1]
+    embeddings = np.load(latest_emb)
+    print(f'Loaded embeddings: {embeddings.shape}')
+else:
+    print('No embeddings found')
+    sys.exit(1)
+# Load transformed data to get language info
+import pandas as pd
+data_files = list(embeddings_path.glob('transformed_*.parquet'))
+if data_files:
+    latest_data = sorted(data_files)[-1]
+    df = pd.read_parquet(latest_data)
+    languages = df['language'].values
+    lang_counts = df['language'].value_counts().to_dict()
+    print(f'Languages: {lang_counts}')
+else:
+    languages = ['english'] * len(embeddings)
+    lang_counts = {'english': len(embeddings)}
+# Load anomaly model and predict
+model_path = Path('models/anomaly-detection/artifacts/model_trainer/isolation_forest_embeddings_only.joblib')
+model = joblib.load(model_path)
+predictions = model.predict(embeddings)
+anomaly_mask = predictions == -1
+print(f'Total samples: {len(embeddings)}')
+print(f'Anomalies detected: {anomaly_mask.sum()}')
+print(f'Normal samples: {(~anomaly_mask).sum()}')
+# PCA for visualization
+print('\nRunning PCA...')
+pca = PCA(n_components=2)
+X_2d = pca.fit_transform(embeddings)
+print(f'Explained variance: {pca.explained_variance_ratio_.sum():.2%}')
+# Create figure with 2 subplots
+fig, axes = plt.subplots(1, 2, figsize=(14, 6))
+# Plot 1: By Language
+ax1 = axes[0]
+colors = {'english': '#3498db', 'sinhala': '#2ecc71', 'tamil': '#e74c3c', 'unknown': '#95a5a6'}
+for lang in colors:
+    mask = np.array(languages) == lang
+    if mask.any():
+        ax1.scatter(X_2d[mask, 0], X_2d[mask, 1],
+                   c=colors[lang], label=f'{lang.capitalize()} ({mask.sum()})',
+                   alpha=0.7, s=60, edgecolors='white', linewidth=0.5)
+ax1.set_title('Text Embeddings by Language (PCA Projection)', fontsize=14, fontweight='bold')
+ax1.set_xlabel(f'PC1 ({pca.explained_variance_ratio_[0]:.1%} variance)')
+ax1.set_ylabel(f'PC2 ({pca.explained_variance_ratio_[1]:.1%} variance)')
+ax1.legend(loc='best', framealpha=0.9)
+ax1.grid(True, alpha=0.3)
+# Plot 2: Anomalies
+ax2 = axes[1]
+normal_mask = ~anomaly_mask
+# Plot normal points first (so anomalies are on top)
+ax2.scatter(X_2d[normal_mask, 0], X_2d[normal_mask, 1],
+           c='#3498db', label=f'Normal ({normal_mask.sum()})', alpha=0.6, s=60,
+           edgecolors='white', linewidth=0.5)
+# Plot anomalies with X markers
+ax2.scatter(X_2d[anomaly_mask, 0], X_2d[anomaly_mask, 1],
+           c='#e74c3c', marker='X', label=f'Anomaly ({anomaly_mask.sum()})',
+           alpha=0.9, s=120, edgecolors='black', linewidth=0.5)
+ax2.set_title('Anomaly Detection Results (Isolation Forest)', fontsize=14, fontweight='bold')
+ax2.set_xlabel(f'PC1 ({pca.explained_variance_ratio_[0]:.1%} variance)')
+ax2.set_ylabel(f'PC2 ({pca.explained_variance_ratio_[1]:.1%} variance)')
+ax2.legend(loc='best', framealpha=0.9)
+ax2.grid(True, alpha=0.3)
+plt.tight_layout()
+output_path = 'vectorizer_anomaly_visualization.png'
+plt.savefig(output_path, dpi=150, bbox_inches='tight', facecolor='white')
+print(f'\nVisualization saved: {output_path}')
+# Also create the visualization in artifacts dir
+artifacts_dir = Path(r'C:\Users\LENOVO\.gemini\antigravity\brain\b892f63f-afbc-4c4a-bbf1-37195faf04a5')
+if artifacts_dir.exists():
+    artifacts_output = artifacts_dir / 'vectorizer_visualization.png'
+    plt.savefig(str(artifacts_output), dpi=150, bbox_inches='tight', facecolor='white')
+    print(f'Also saved to: {artifacts_output}')
+plt.close()
+print("\n" + "=" * 60)
+print("  VISUALIZATION COMPLETE")
+print("=" * 60)

frontend/app/components/dashboard/TrendingTopics.tsx ADDED Viewed

	@@ -0,0 +1,192 @@

+/**
+ * TrendingTopics.tsx
+ * Dashboard component for displaying trending topics and spike alerts
+ */
+import React, { useEffect, useState } from 'react';
+interface TrendingTopic {
+    topic: string;
+    momentum: number;
+    is_spike: boolean;
+    count_current_hour?: number;
+    avg_count?: number;
+}
+interface TrendingData {
+    status: string;
+    trending_topics: TrendingTopic[];
+    spike_alerts: TrendingTopic[];
+    total_trending: number;
+    total_spikes: number;
+}
+export const TrendingTopics: React.FC = () => {
+    const [data, setData] = useState<TrendingData | null>(null);
+    const [loading, setLoading] = useState(true);
+    const [error, setError] = useState<string | null>(null);
+    useEffect(() => {
+        const fetchTrending = async () => {
+            try {
+                const response = await fetch('/api/trending');
+                const result = await response.json();
+                setData(result);
+                setError(null);
+            } catch (err) {
+                setError('Failed to fetch trending data');
+                console.error('Trending fetch error:', err);
+            } finally {
+                setLoading(false);
+            }
+        };
+        fetchTrending();
+        // Refresh every 30 seconds
+        const interval = setInterval(fetchTrending, 30000);
+        return () => clearInterval(interval);
+    }, []);
+    const getMomentumColor = (momentum: number) => {
+        if (momentum >= 10) return 'text-red-500';
+        if (momentum >= 5) return 'text-orange-500';
+        if (momentum >= 2) return 'text-yellow-500';
+        return 'text-gray-400';
+    };
+    const getMomentumBg = (momentum: number) => {
+        if (momentum >= 10) return 'bg-red-500/20';
+        if (momentum >= 5) return 'bg-orange-500/20';
+        if (momentum >= 2) return 'bg-yellow-500/20';
+        return 'bg-gray-500/10';
+    };
+    if (loading) {
+        return (
+            <div className="bg-gradient-to-br from-gray-900/90 to-gray-800/90 backdrop-blur-lg rounded-2xl p-6 border border-gray-700/50 shadow-xl">
+                <div className="flex items-center gap-3 mb-4">
+                    <div className="w-10 h-10 rounded-xl bg-gradient-to-br from-purple-500 to-pink-500 flex items-center justify-center">
+                        <svg className="w-5 h-5 text-white animate-pulse" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                            <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M13 7h8m0 0v8m0-8l-8 8-4-4-6 6" />
+                        </svg>
+                    </div>
+                    <div>
+                        <h3 className="text-lg font-bold text-white">Trending Topics</h3>
+                        <p className="text-xs text-gray-400">Loading...</p>
+                    </div>
+                </div>
+                <div className="animate-pulse space-y-3">
+                    {[1, 2, 3].map((i) => (
+                        <div key={i} className="h-10 bg-gray-700/50 rounded-lg"></div>
+                    ))}
+                </div>
+            </div>
+        );
+    }
+    if (error || !data) {
+        return (
+            <div className="bg-gradient-to-br from-gray-900/90 to-gray-800/90 backdrop-blur-lg rounded-2xl p-6 border border-red-700/50 shadow-xl">
+                <div className="flex items-center gap-3">
+                    <div className="w-10 h-10 rounded-xl bg-red-500/20 flex items-center justify-center">
+                        <svg className="w-5 h-5 text-red-400" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                            <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z" />
+                        </svg>
+                    </div>
+                    <div>
+                        <h3 className="text-lg font-bold text-white">Trending Topics</h3>
+                        <p className="text-xs text-red-400">{error || 'No data available'}</p>
+                    </div>
+                </div>
+            </div>
+        );
+    }
+    return (
+        <div className="bg-gradient-to-br from-gray-900/90 to-gray-800/90 backdrop-blur-lg rounded-2xl p-6 border border-gray-700/50 shadow-xl">
+            {/* Header */}
+            <div className="flex items-center justify-between mb-4">
+                <div className="flex items-center gap-3">
+                    <div className="w-10 h-10 rounded-xl bg-gradient-to-br from-purple-500 to-pink-500 flex items-center justify-center">
+                        <svg className="w-5 h-5 text-white" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                            <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M13 7h8m0 0v8m0-8l-8 8-4-4-6 6" />
+                        </svg>
+                    </div>
+                    <div>
+                        <h3 className="text-lg font-bold text-white">Trending Topics</h3>
+                        <p className="text-xs text-gray-400">{data.total_trending} trending • {data.total_spikes} spikes</p>
+                    </div>
+                </div>
+                {data.total_spikes > 0 && (
+                    <span className="px-2 py-1 bg-red-500/20 text-red-400 text-xs font-medium rounded-lg animate-pulse">
+                        🔥 {data.total_spikes} SPIKE{data.total_spikes > 1 ? 'S' : ''}
+                    </span>
+                )}
+            </div>
+            {/* Spike Alerts */}
+            {data.spike_alerts.length > 0 && (
+                <div className="mb-4 p-3 bg-red-500/10 rounded-xl border border-red-500/30">
+                    <h4 className="text-sm font-semibold text-red-400 mb-2 flex items-center gap-2">
+                        <span>🔥</span> SPIKE ALERTS
+                    </h4>
+                    <div className="flex flex-wrap gap-2">
+                        {data.spike_alerts.slice(0, 5).map((spike, idx) => (
+                            <span
+                                key={idx}
+                                className="px-3 py-1 bg-red-500/20 text-red-300 text-sm font-medium rounded-full border border-red-500/30"
+                            >
+                                {spike.topic} <span className="text-red-400 font-bold">{spike.momentum.toFixed(0)}x</span>
+                            </span>
+                        ))}
+                    </div>
+                </div>
+            )}
+            {/* Trending Topics List */}
+            <div className="space-y-2">
+                {data.trending_topics.length === 0 ? (
+                    <div className="text-center py-8 text-gray-500">
+                        <svg className="w-12 h-12 mx-auto mb-2 opacity-50" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                            <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M13 7h8m0 0v8m0-8l-8 8-4-4-6 6" />
+                        </svg>
+                        <p>No trending topics yet</p>
+                        <p className="text-xs mt-1">Topics will appear as data flows in</p>
+                    </div>
+                ) : (
+                    data.trending_topics.slice(0, 8).map((topic, idx) => (
+                        <div
+                            key={idx}
+                            className={`flex items-center justify-between p-3 rounded-xl ${getMomentumBg(topic.momentum)} border border-gray-700/30 transition-all hover:scale-[1.02]`}
+                        >
+                            <div className="flex items-center gap-3">
+                                <span className="text-lg font-bold text-gray-500">#{idx + 1}</span>
+                                <div>
+                                    <p className="font-semibold text-white capitalize">{topic.topic}</p>
+                                    <p className="text-xs text-gray-400">
+                                        {topic.is_spike ? '🔥 Spiking' : 'Trending'}
+                                    </p>
+                                </div>
+                            </div>
+                            <div className="text-right">
+                                <p className={`text-lg font-bold ${getMomentumColor(topic.momentum)}`}>
+                                    {topic.momentum.toFixed(0)}x
+                                </p>
+                                <p className="text-xs text-gray-500">momentum</p>
+                            </div>
+                        </div>
+                    ))
+                )}
+            </div>
+            {/* Footer */}
+            <div className="mt-4 pt-4 border-t border-gray-700/50">
+                <p className="text-xs text-gray-500 text-center">
+                    Momentum = current hour mentions / avg last 6 hours
+                </p>
+            </div>
+        </div>
+    );
+};
+export default TrendingTopics;

main.py CHANGED Viewed

@@ -913,6 +913,113 @@ def get_currency_history(days: int = 7):
         }
 # ============================================
 # ANOMALY DETECTION ENDPOINTS
 # ============================================

         }
+# ============================================
+# TRENDING DETECTION ENDPOINTS
+# ============================================
+@app.get("/api/trending")
+def get_trending_topics(limit: int = 10):
+    """
+    Get currently trending topics.
+    Returns topics with momentum > 2x (gaining traction).
+    """
+    try:
+        from src.utils.trending_detector import get_trending_now, get_spikes
+        trending = get_trending_now(limit=limit)
+        spikes = get_spikes()
+        return {
+            "status": "success",
+            "trending_topics": trending,
+            "spike_alerts": spikes,
+            "total_trending": len(trending),
+            "total_spikes": len(spikes)
+        }
+    except Exception as e:
+        logger.error(f"[TrendingAPI] Error: {e}")
+        return {
+            "status": "error",
+            "error": str(e),
+            "trending_topics": [],
+            "spike_alerts": []
+        }
+@app.get("/api/trending/topic/{topic}")
+def get_topic_history(topic: str, hours: int = 24):
+    """
+    Get hourly mention history for a specific topic.
+    Args:
+        topic: Topic name to get history for
+        hours: Number of hours of history to return (default 24)
+    """
+    try:
+        from src.utils.trending_detector import get_trending_detector
+        detector = get_trending_detector()
+        history = detector.get_topic_history(topic, hours=hours)
+        momentum = detector.get_momentum(topic)
+        is_spike = detector.is_spike(topic)
+        return {
+            "status": "success",
+            "topic": topic,
+            "momentum": momentum,
+            "is_spike": is_spike,
+            "history": history
+        }
+    except Exception as e:
+        logger.error(f"[TrendingAPI] Error getting history for {topic}: {e}")
+        return {
+            "status": "error",
+            "error": str(e),
+            "topic": topic,
+            "momentum": 1.0,
+            "is_spike": False,
+            "history": []
+        }
+@app.post("/api/trending/record")
+def record_topic_mention(topic: str, source: str = "manual", domain: str = "general"):
+    """
+    Record a topic mention (for testing/manual tracking).
+    Args:
+        topic: Topic/keyword being mentioned
+        source: Source of the mention (twitter, news, etc.)
+        domain: Domain category (political, economical, etc.)
+    """
+    try:
+        from src.utils.trending_detector import record_topic_mention as record_mention
+        record_mention(topic=topic, source=source, domain=domain)
+        # Get updated momentum
+        from src.utils.trending_detector import get_trending_detector
+        detector = get_trending_detector()
+        momentum = detector.get_momentum(topic)
+        return {
+            "status": "success",
+            "message": f"Recorded mention for '{topic}'",
+            "current_momentum": momentum,
+            "is_spike": detector.is_spike(topic)
+        }
+    except Exception as e:
+        logger.error(f"[TrendingAPI] Error recording mention: {e}")
+        return {
+            "status": "error",
+            "error": str(e)
+        }
 # ============================================
 # ANOMALY DETECTION ENDPOINTS
 # ============================================

models/anomaly-detection/src/components/model_trainer.py CHANGED Viewed

@@ -472,7 +472,41 @@ class ModelTrainer:
             optuna_study_name=None
         )
-        logger.info(f"[ModelTrainer] ✓ Training complete in {duration:.1f}s")
         logger.info(f"[ModelTrainer] Best model: {best_model['name'] if best_model else 'N/A'}")
         return artifact

             optuna_study_name=None
         )
+        logger.info(f"[ModelTrainer] Training complete in {duration:.1f}s")
         logger.info(f"[ModelTrainer] Best model: {best_model['name'] if best_model else 'N/A'}")
+        # ============================================
+        # TRAIN EMBEDDING-ONLY MODEL FOR LIVE INFERENCE
+        # ============================================
+        # The Vectorizer Agent only has 768-dim embeddings at inference time
+        # (no temporal/engagement features), so we train a separate model
+        try:
+            # Check if features include extra metadata (> 768 dims)
+            if X.shape[1] > 768:
+                logger.info(f"[ModelTrainer] Training embedding-only model for Vectorizer Agent...")
+                # Extract only the first 768 dimensions (BERT embeddings)
+                X_embeddings_only = X[:, :768]
+                logger.info(f"[ModelTrainer] Embedding-only shape: {X_embeddings_only.shape}")
+                # Train Isolation Forest on embeddings only
+                embedding_model = IsolationForest(
+                    contamination=0.1,
+                    n_estimators=100,
+                    random_state=42,
+                    n_jobs=-1
+                )
+                embedding_model.fit(X_embeddings_only)
+                # Save to a dedicated path for the Vectorizer Agent
+                embedding_model_path = Path(self.config.output_directory) / "isolation_forest_embeddings_only.joblib"
+                joblib.dump(embedding_model, embedding_model_path)
+                logger.info(f"[ModelTrainer] Embedding-only model saved: {embedding_model_path}")
+                logger.info(f"[ModelTrainer] This model is for real-time inference by Vectorizer Agent")
+            else:
+                logger.info(f"[ModelTrainer] Features are already embedding-only ({X.shape[1]} dims)")
+        except Exception as e:
+            logger.warning(f"[ModelTrainer] Embedding-only model training failed: {e}")
         return artifact

src/api/vectorization_api.py CHANGED Viewed

@@ -72,6 +72,10 @@ class VectorizationResponse(BaseModel):
     domain_insights: List[Dict[str, Any]]
     processing_time_seconds: float
     vectors: Optional[List[Dict[str, Any]]] = None
 class HealthResponse(BaseModel):
@@ -169,6 +173,9 @@ async def vectorize_texts(request: VectorizationRequest):
             vectors=(
                 result.get("vector_embeddings") if request.include_vectors else None
             ),
         )
         return response

     domain_insights: List[Dict[str, Any]]
     processing_time_seconds: float
     vectors: Optional[List[Dict[str, Any]]] = None
+    # Anomaly Detection Results
+    anomaly_results: Optional[Dict[str, Any]] = None
+    # Trending Detection Results
+    trending_results: Optional[Dict[str, Any]] = None
 class HealthResponse(BaseModel):
             vectors=(
                 result.get("vector_embeddings") if request.include_vectors else None
             ),
+            # Include anomaly & trending detection results
+            anomaly_results=result.get("anomaly_results"),
+            trending_results=result.get("trending_results"),
         )
         return response

src/graphs/vectorizationAgentGraph.py CHANGED Viewed

@@ -17,8 +17,9 @@ class VectorizationGraphBuilder:
     Step 1: Language Detection (FastText/lingua-py)
     Step 2: Text Vectorization (SinhalaBERTo/Tamil-BERT/DistilBERT)
     Step 3: Anomaly Detection (Isolation Forest on vectors)
-    Step 4: Expert Summary (GroqLLM)
-    Step 5: Format Output
     """
     def __init__(self, llm=None):
@@ -29,7 +30,7 @@ class VectorizationGraphBuilder:
         Build the vectorization agent graph.
         Flow:
-        detect_languages → vectorize_texts → anomaly_detection → expert_summary → format_output → END
         """
         node = VectorizationAgentNode(self.llm)
@@ -40,16 +41,18 @@ class VectorizationGraphBuilder:
         graph.add_node("detect_languages", node.detect_languages)
         graph.add_node("vectorize_texts", node.vectorize_texts)
         graph.add_node("anomaly_detection", node.run_anomaly_detection)
         graph.add_node("generate_expert_summary", node.generate_expert_summary)
         graph.add_node("format_output", node.format_final_output)
         # Set entry point
         graph.set_entry_point("detect_languages")
-        # Sequential flow with anomaly detection
         graph.add_edge("detect_languages", "vectorize_texts")
         graph.add_edge("vectorize_texts", "anomaly_detection")
-        graph.add_edge("anomaly_detection", "generate_expert_summary")
         graph.add_edge("generate_expert_summary", "format_output")
         graph.add_edge("format_output", END)
@@ -60,12 +63,13 @@ class VectorizationGraphBuilder:
 print("\n" + "=" * 60)
 print("[BRAIN] BUILDING VECTORIZATION AGENT GRAPH")
 print("=" * 60)
-print("Architecture: 5-Step Sequential Pipeline")
 print("  Step 1: Language Detection (FastText/Unicode)")
 print("  Step 2: Text Vectorization (SinhalaBERTo/Tamil-BERT/DistilBERT)")
 print("  Step 3: Anomaly Detection (Isolation Forest)")
-print("  Step 4: Expert Summary (GroqLLM)")
-print("  Step 5: Format Output")
 print("-" * 60)
 llm = GroqLLM().get_llm()
@@ -73,3 +77,4 @@ graph = VectorizationGraphBuilder(llm).build_graph()
 print("[OK] Vectorization Agent Graph compiled successfully")
 print("=" * 60 + "\n")

     Step 1: Language Detection (FastText/lingua-py)
     Step 2: Text Vectorization (SinhalaBERTo/Tamil-BERT/DistilBERT)
     Step 3: Anomaly Detection (Isolation Forest on vectors)
+    Step 4: Trending Detection (Velocity/Spike tracking)
+    Step 5: Expert Summary (GroqLLM)
+    Step 6: Format Output
     """
     def __init__(self, llm=None):
         Build the vectorization agent graph.
         Flow:
+        detect_languages → vectorize_texts → anomaly_detection → trending_detection → expert_summary → format_output → END
         """
         node = VectorizationAgentNode(self.llm)
         graph.add_node("detect_languages", node.detect_languages)
         graph.add_node("vectorize_texts", node.vectorize_texts)
         graph.add_node("anomaly_detection", node.run_anomaly_detection)
+        graph.add_node("trending_detection", node.run_trending_detection)
         graph.add_node("generate_expert_summary", node.generate_expert_summary)
         graph.add_node("format_output", node.format_final_output)
         # Set entry point
         graph.set_entry_point("detect_languages")
+        # Sequential flow with anomaly + trending detection
         graph.add_edge("detect_languages", "vectorize_texts")
         graph.add_edge("vectorize_texts", "anomaly_detection")
+        graph.add_edge("anomaly_detection", "trending_detection")
+        graph.add_edge("trending_detection", "generate_expert_summary")
         graph.add_edge("generate_expert_summary", "format_output")
         graph.add_edge("format_output", END)
 print("\n" + "=" * 60)
 print("[BRAIN] BUILDING VECTORIZATION AGENT GRAPH")
 print("=" * 60)
+print("Architecture: 6-Step Sequential Pipeline")
 print("  Step 1: Language Detection (FastText/Unicode)")
 print("  Step 2: Text Vectorization (SinhalaBERTo/Tamil-BERT/DistilBERT)")
 print("  Step 3: Anomaly Detection (Isolation Forest)")
+print("  Step 4: Trending Detection (Velocity/Spikes)")
+print("  Step 5: Expert Summary (GroqLLM)")
+print("  Step 6: Format Output")
 print("-" * 60)
 llm = GroqLLM().get_llm()
 print("[OK] Vectorization Agent Graph compiled successfully")
 print("=" * 60 + "\n")

src/nodes/vectorizationAgentNode.py CHANGED Viewed

@@ -324,6 +324,13 @@ class VectorizationAgentNode:
             import joblib
             model_paths = [
                 MODELS_PATH / "output" / "isolation_forest_model.joblib",
                 MODELS_PATH
                 / "artifacts"
@@ -337,7 +344,7 @@ class VectorizationAgentNode:
                     anomaly_model = joblib.load(model_path)
                     model_name = model_path.stem
                     logger.info(
-                        f"[VectorizationAgent] ✓ Loaded anomaly model: {model_path.name}"
                     )
                     break
@@ -361,18 +368,36 @@ class VectorizationAgentNode:
             }
         # Run inference on each embedding
         anomalies = []
         normal_count = 0
         for emb in embeddings:
             try:
                 vector = emb.get("vector", [])
                 post_id = emb.get("post_id", "")
                 if not vector or len(vector) != 768:
                     continue
-                # Reshape for sklearn
                 vector_array = np.array(vector).reshape(1, -1)
                 # Predict: -1 = anomaly, 1 = normal
@@ -395,7 +420,8 @@ class VectorizationAgentNode:
                             "post_id": post_id,
                             "anomaly_score": float(normalized_score),
                             "is_anomaly": True,
-                            "language": emb.get("language", "unknown"),
                         }
                     )
                 else:
@@ -407,7 +433,8 @@ class VectorizationAgentNode:
                 )
         logger.info(
-            f"[VectorizationAgent] Anomaly detection: {len(anomalies)} anomalies, {normal_count} normal"
         )
         return {
@@ -423,6 +450,180 @@ class VectorizationAgentNode:
             },
         }
     def generate_expert_summary(self, state: VectorizationAgentState) -> Dict[str, Any]:
         """
         Step 3: Use GroqLLM to generate expert summary combining all insights.

             import joblib
             model_paths = [
+                # Embedding-only model (768-dim) - compatible with Vectorizer Agent
+                MODELS_PATH
+                / "artifacts"
+                / "model_trainer"
+                / "isolation_forest_embeddings_only.joblib",
+                # Full-feature models (may have different dimensions)
+                MODELS_PATH / "output" / "isolation_forest_embeddings_only.joblib",
                 MODELS_PATH / "output" / "isolation_forest_model.joblib",
                 MODELS_PATH
                 / "artifacts"
                     anomaly_model = joblib.load(model_path)
                     model_name = model_path.stem
                     logger.info(
+                        f"[VectorizationAgent] Loaded anomaly model: {model_path.name}"
                     )
                     break
             }
         # Run inference on each embedding
+        # IMPORTANT: The anomaly model was trained primarily on English embeddings.
+        # Different BERT models (SinhalaBERTo, Tamil-BERT, DistilBERT) produce embeddings
+        # in completely different vector spaces, so non-English texts would incorrectly
+        # appear as anomalies. We handle this by:
+        # 1. Only running the model on English texts
+        # 2. Using a content-based heuristic for non-English texts
         anomalies = []
         normal_count = 0
+        skipped_non_english = 0
         for emb in embeddings:
             try:
                 vector = emb.get("vector", [])
                 post_id = emb.get("post_id", "")
+                language = emb.get("language", "english")
                 if not vector or len(vector) != 768:
                     continue
+                # For non-English languages, skip anomaly detection
+                # The ML model was trained on English embeddings only.
+                # Different BERT models (SinhalaBERTo, Tamil-BERT) have completely
+                # different embedding spaces - Tamil embeddings have magnitude ~0.64
+                # while English has ~7.5 and Sinhala ~13.7. They cannot be compared.
+                if language in ["sinhala", "tamil"]:
+                    skipped_non_english += 1
+                    normal_count += 1  # Treat as normal (not anomalous)
+                    continue
+                # For English texts, use the trained ML model
                 vector_array = np.array(vector).reshape(1, -1)
                 # Predict: -1 = anomaly, 1 = normal
                             "post_id": post_id,
                             "anomaly_score": float(normalized_score),
                             "is_anomaly": True,
+                            "language": language,
+                            "detection_method": "isolation_forest",
                         }
                     )
                 else:
                 )
         logger.info(
+            f"[VectorizationAgent] Anomaly detection: {len(anomalies)} anomalies, "
+            f"{normal_count} normal, {skipped_non_english} non-English (heuristic)"
         )
         return {
             },
         }
+    def run_trending_detection(self, state: VectorizationAgentState) -> Dict[str, Any]:
+        """
+        Step 2.6: Detect trending topics from the input texts.
+        Extracts key entities/topics and tracks their mention velocity.
+        Identifies:
+        - Trending topics (momentum > 2x normal)
+        - Spike alerts (volume > 3x normal)
+        - Topics with increasing momentum
+        """
+        logger.info("[VectorizationAgent] STEP 2.6: Trending Detection")
+        detection_results = state.get("language_detection_results", [])
+        if not detection_results:
+            logger.warning("[VectorizationAgent] No texts for trending detection")
+            return {
+                "current_step": "trending_detection",
+                "trending_results": {
+                    "status": "skipped",
+                    "reason": "no_texts",
+                    "trending_topics": [],
+                    "spike_alerts": [],
+                },
+            }
+        # Import trending detector
+        try:
+            from src.utils.trending_detector import (
+                get_trending_detector,
+                record_topic_mention,
+                get_trending_now,
+                get_spikes,
+            )
+            TRENDING_AVAILABLE = True
+        except ImportError as e:
+            logger.warning(f"[VectorizationAgent] Trending detector not available: {e}")
+            TRENDING_AVAILABLE = False
+        if not TRENDING_AVAILABLE:
+            return {
+                "current_step": "trending_detection",
+                "trending_results": {
+                    "status": "unavailable",
+                    "reason": "trending_detector_not_installed",
+                    "trending_topics": [],
+                    "spike_alerts": [],
+                },
+            }
+        # Extract entities and record mentions
+        entities_found = []
+        for item in detection_results:
+            text = item.get("text", "")  # Field is 'text', not 'original_text'
+            language = item.get("language", "english")
+            post_id = item.get("post_id", "")
+            # Simple entity extraction (keywords, capitalized words, etc.)
+            # In production, you'd use NER or more sophisticated extraction
+            extracted = self._extract_entities(text, language)
+            for entity in extracted:
+                try:
+                    # Record mention with trending detector
+                    record_topic_mention(
+                        topic=entity["text"],
+                        source=entity.get("source", "feed"),
+                        domain=entity.get("domain", "general"),
+                    )
+                    entities_found.append({
+                        "entity": entity["text"],
+                        "type": entity.get("type", "keyword"),
+                        "post_id": post_id,
+                        "language": language,
+                    })
+                except Exception as e:
+                    logger.debug(f"[VectorizationAgent] Failed to record mention: {e}")
+        # Get current trending topics and spikes
+        try:
+            trending_topics = get_trending_now(limit=10)
+            spike_alerts = get_spikes()
+        except Exception as e:
+            logger.warning(f"[VectorizationAgent] Failed to get trending data: {e}")
+            trending_topics = []
+            spike_alerts = []
+        logger.info(
+            f"[VectorizationAgent] Trending detection: {len(entities_found)} entities, "
+            f"{len(trending_topics)} trending, {len(spike_alerts)} spikes"
+        )
+        return {
+            "current_step": "trending_detection",
+            "trending_results": {
+                "status": "success",
+                "entities_extracted": len(entities_found),
+                "entities": entities_found[:20],  # Limit for state size
+                "trending_topics": trending_topics,
+                "spike_alerts": spike_alerts,
+            },
+        }
+    def _extract_entities(self, text: str, language: str = "english") -> List[Dict[str, Any]]:
+        """
+        Extract entities/topics from text for trending tracking.
+        Uses simple heuristics:
+        - Capitalized words/phrases (potential proper nouns)
+        - Hashtags
+        - Common news keywords
+        In production, integrate with NER model for better extraction.
+        """
+        entities = []
+        if not text:
+            return entities
+        import re
+        # Extract hashtags
+        hashtags = re.findall(r'#(\w+)', text)
+        for tag in hashtags:
+            entities.append({
+                "text": tag.lower(),
+                "type": "hashtag",
+                "source": "hashtag",
+                "domain": "social",
+            })
+        # Extract capitalized phrases (potential proper nouns)
+        # Match 1-4 consecutive capitalized words
+        cap_phrases = re.findall(r'\b([A-Z][a-z]+(?: [A-Z][a-z]+){0,3})\b', text)
+        for phrase in cap_phrases:
+            # Skip common words
+            if phrase.lower() not in ['the', 'a', 'an', 'is', 'are', 'was', 'were', 'i', 'he', 'she', 'it']:
+                entities.append({
+                    "text": phrase,
+                    "type": "proper_noun",
+                    "source": "text",
+                    "domain": "general",
+                })
+        # News/event keywords
+        news_keywords = [
+            'breaking', 'urgent', 'alert', 'emergency', 'crisis',
+            'earthquake', 'flood', 'tsunami', 'election', 'protest',
+            'strike', 'scandal', 'corruption', 'price', 'inflation',
+        ]
+        text_lower = text.lower()
+        for keyword in news_keywords:
+            if keyword in text_lower:
+                entities.append({
+                    "text": keyword,
+                    "type": "news_keyword",
+                    "source": "keyword_match",
+                    "domain": "news",
+                })
+        # Deduplicate by text
+        seen = set()
+        unique_entities = []
+        for e in entities:
+            key = e["text"].lower()
+            if key not in seen:
+                seen.add(key)
+                unique_entities.append(e)
+        return unique_entities[:15]  # Limit entities per text
     def generate_expert_summary(self, state: VectorizationAgentState) -> Dict[str, Any]:
         """
         Step 3: Use GroqLLM to generate expert summary combining all insights.

src/rag.py CHANGED Viewed

@@ -246,19 +246,54 @@ class RogerRAG:
             logger.error(f"[RAG] LLM initialization error: {e}")
     def _format_context(self, docs: List[Dict[str, Any]]) -> str:
-        """Format retrieved documents as context for LLM"""
         if not docs:
             return "No relevant intelligence data found."
         context_parts = []
         for i, doc in enumerate(docs[:5], 1):  # Top 5 docs
             meta = doc.get("metadata", {})
             domain = meta.get("domain", "unknown")
             platform = meta.get("platform", "")
             timestamp = meta.get("timestamp", "")
             context_parts.append(
-                f"[Source {i}] Domain: {domain} | Platform: {platform} | Time: {timestamp}\n"
                 f"{doc['content']}\n"
             )
@@ -344,18 +379,34 @@ class RogerRAG:
                 "question": question,
             }
-        # RAG prompt
         rag_prompt = ChatPromptTemplate.from_messages(
             [
                 (
                     "system",
-                    """You are Roger, an AI intelligence analyst for Sri Lanka.
-            Answer questions based ONLY on the provided intelligence context.
-            Be concise but informative. Cite sources when possible.
-            If the context doesn't contain relevant information, say so.
-            Context:
-            {context}""",
                 ),
                 MessagesPlaceholder(variable_name="history"),
                 ("human", "{question}"),

             logger.error(f"[RAG] LLM initialization error: {e}")
     def _format_context(self, docs: List[Dict[str, Any]]) -> str:
+        """Format retrieved documents as context for LLM with temporal awareness"""
         if not docs:
             return "No relevant intelligence data found."
         context_parts = []
+        now = datetime.now()
         for i, doc in enumerate(docs[:5], 1):  # Top 5 docs
             meta = doc.get("metadata", {})
             domain = meta.get("domain", "unknown")
             platform = meta.get("platform", "")
             timestamp = meta.get("timestamp", "")
+            # Calculate age of the source
+            age_str = "unknown date"
+            if timestamp:
+                try:
+                    # Try to parse various timestamp formats
+                    for fmt in [
+                        "%Y-%m-%d %H:%M:%S",
+                        "%Y-%m-%dT%H:%M:%S",
+                        "%Y-%m-%d",
+                        "%d/%m/%Y",
+                    ]:
+                        try:
+                            ts_date = datetime.strptime(timestamp[:19], fmt)
+                            days_old = (now - ts_date).days
+                            if days_old == 0:
+                                age_str = "TODAY"
+                            elif days_old == 1:
+                                age_str = "1 day ago"
+                            elif days_old < 7:
+                                age_str = f"{days_old} days ago"
+                            elif days_old < 30:
+                                age_str = f"{days_old // 7} weeks ago"
+                            elif days_old < 365:
+                                age_str = f"{days_old // 30} months ago (⚠️ POTENTIALLY OUTDATED)"
+                            else:
+                                age_str = f"{days_old // 365} years ago (⚠️ OUTDATED)"
+                            break
+                        except ValueError:
+                            continue
+                except Exception:
+                    age_str = f"Date: {timestamp}"
             context_parts.append(
+                f"[Source {i}] Domain: {domain} | Platform: {platform}\n"
+                f"📅 TIMESTAMP: {timestamp} ({age_str})\n"
                 f"{doc['content']}\n"
             )
                 "question": question,
             }
+        # RAG prompt with temporal awareness
+        current_date = datetime.now().strftime("%B %d, %Y")
         rag_prompt = ChatPromptTemplate.from_messages(
             [
                 (
                     "system",
+                    f"""You are Roger, an AI intelligence analyst for Sri Lanka.
+TODAY'S DATE: {current_date}
+CRITICAL TEMPORAL AWARENESS INSTRUCTIONS:
+1. ALWAYS check the timestamp/date of each source before using information
+2. For questions about "current" situations, ONLY use sources from the last 30 days
+3. If sources are outdated (more than 30 days old), explicitly mention this: "Based on data from [date], which may be outdated..."
+4. For political leadership questions, verify information is from recent sources
+5. If you find conflicting information from different time periods, prefer the most recent source
+6. Never present old information as current fact without temporal qualification
+IMPORTANT POLITICAL CONTEXT:
+- Presidential elections were held in Sri Lanka in September 2024
+- Always verify any claims about political leadership against the most recent sources
+Answer questions based ONLY on the provided intelligence context.
+Be concise but informative. Always cite source timestamps when available.
+If the context doesn't contain relevant RECENT information for current-state questions, say so.
+Context (check timestamps carefully):
+{{context}}""",
                 ),
                 MessagesPlaceholder(variable_name="history"),
                 ("human", "{question}"),

src/states/vectorizationAgentState.py CHANGED Viewed

@@ -33,6 +33,10 @@ class VectorizationAgentState(TypedDict, total=False):
     clustering_results: Optional[Dict[str, Any]]
     anomaly_results: Optional[Dict[str, Any]]
     # ===== EXPERT ANALYSIS =====
     expert_summary: Optional[str]  # LLM-generated summary combining all insights
     opportunities: List[Dict[str, Any]]  # Detected opportunities

     clustering_results: Optional[Dict[str, Any]]
     anomaly_results: Optional[Dict[str, Any]]
+    # ===== TRENDING DETECTION =====
+    trending_results: Optional[Dict[str, Any]]
+    # {trending_topics, spike_alerts, entities_extracted}
     # ===== EXPERT ANALYSIS =====
     expert_summary: Optional[str]  # LLM-generated summary combining all insights
     opportunities: List[Dict[str, Any]]  # Detected opportunities

test_multilingual_anomaly.py ADDED Viewed

	@@ -0,0 +1,54 @@

+"""
+test_multilingual_anomaly.py
+Test the multilingual anomaly detection fix.
+"""
+import sys
+from pathlib import Path
+if sys.platform == 'win32':
+    sys.stdout.reconfigure(encoding='utf-8')
+sys.path.insert(0, str(Path('.').resolve()))
+from src.graphs.vectorizationAgentGraph import graph
+from datetime import datetime
+test_texts = [
+    {"text": "URGENT: Massive landslide in Ratnapura!", "post_id": "EN_001"},
+    {"text": "Normal stock market day", "post_id": "EN_002"},
+    {"text": "ආර්ථික අර්බුදය නිසා ජනතාව දුෂ්කරතාවන්ට මුහුණ දෙයි", "post_id": "SI_001"},
+    {"text": "கொழும்பில் பெரும் மழை பெய்தது", "post_id": "TA_001"},
+    {"text": "Breaking news about corruption scandal", "post_id": "EN_003"},
+]
+result = graph.invoke({
+    "input_texts": test_texts,
+    "batch_id": datetime.now().strftime("%Y%m%d_%H%M%S"),
+})
+print("=" * 60)
+print("MULTILINGUAL ANOMALY DETECTION TEST")
+print("=" * 60)
+anomaly_results = result.get("anomaly_results", {})
+print(f"\nStatus: {anomaly_results.get('status')}")
+print(f"Model: {anomaly_results.get('model_used')}")
+print(f"Total analyzed: {anomaly_results.get('total_analyzed')}")
+anomalies = anomaly_results.get("anomalies", [])
+print(f"\nAnomalies found: {len(anomalies)}")
+for a in anomalies:
+    method = a.get("detection_method", "unknown")
+    print(f"  - {a.get('post_id')}: {a.get('language')} | method: {method} | score: {a.get('anomaly_score', 0):.2f}")
+lang_results = result.get("language_detection_results", [])
+print(f"\nLanguage Detection:")
+for lr in lang_results:
+    print(f"  - {lr.get('post_id')}: {lr.get('language')} (conf: {lr.get('confidence', 0):.2f})")
+# Summary
+print("\n" + "=" * 60)
+print("The fix ensures:")
+print("  - English texts: Isolation Forest ML model")
+print("  - Sinhala/Tamil: Magnitude-based heuristic (avoids false positives)")
+print("=" * 60)

test_trending_integration.py ADDED Viewed

	@@ -0,0 +1,80 @@

+"""
+test_trending_integration.py
+Test the trending detection integration in the vectorizer pipeline.
+"""
+import sys
+from pathlib import Path
+if sys.platform == 'win32':
+    sys.stdout.reconfigure(encoding='utf-8')
+sys.path.insert(0, str(Path('.').resolve()))
+from src.graphs.vectorizationAgentGraph import graph
+from datetime import datetime
+print("=" * 60)
+print("TESTING TRENDING DETECTION INTEGRATION")
+print("=" * 60)
+# Test with multiple mentions of the same topic to trigger trending
+test_texts = [
+    {"text": "URGENT: Major earthquake hits Colombo, buildings damaged!", "post_id": "EN_001"},
+    {"text": "Breaking news: Earthquake in Colombo measuring 5.5 magnitude", "post_id": "EN_002"},
+    {"text": "Colombo earthquake causes panic, residents evacuated", "post_id": "EN_003"},
+    {"text": "Sri Lanka Cricket team wins against India in thrilling match", "post_id": "EN_004"},
+    {"text": "Stock market shows bullish trends in JKH", "post_id": "EN_005"},
+    {"text": "Another earthquake aftershock reported in Colombo area", "post_id": "EN_006"},
+]
+print(f"\nProcessing {len(test_texts)} texts with repeated topics...")
+result = graph.invoke({
+    "input_texts": test_texts,
+    "batch_id": datetime.now().strftime("%Y%m%d_%H%M%S"),
+})
+# Show trending results
+print("\n" + "=" * 60)
+print("TRENDING DETECTION RESULTS")
+print("=" * 60)
+trending_results = result.get("trending_results", {})
+print(f"\nStatus: {trending_results.get('status', 'N/A')}")
+print(f"Entities extracted: {trending_results.get('entities_extracted', 0)}")
+# Show extracted entities
+entities = trending_results.get("entities", [])
+print(f"\nExtracted Entities ({len(entities)}):")
+for e in entities[:10]:
+    print(f"  - {e.get('entity')} ({e.get('type')}) from {e.get('post_id')}")
+# Show trending topics
+trending_topics = trending_results.get("trending_topics", [])
+print(f"\nTrending Topics ({len(trending_topics)}):")
+if trending_topics:
+    for t in trending_topics:
+        print(f"  - {t.get('topic')}: momentum={t.get('momentum', 0):.2f}, is_spike={t.get('is_spike', False)}")
+else:
+    print("  (No trending topics yet - need more historical data)")
+# Show spike alerts
+spike_alerts = trending_results.get("spike_alerts", [])
+print(f"\nSpike Alerts ({len(spike_alerts)}):")
+if spike_alerts:
+    for s in spike_alerts:
+        print(f"  - {s.get('topic')}: momentum={s.get('momentum', 0):.2f}")
+else:
+    print("  (No spike alerts)")
+# Show anomaly results
+print("\n" + "=" * 60)
+print("ANOMALY DETECTION RESULTS")
+print("=" * 60)
+anomaly_results = result.get("anomaly_results", {})
+print(f"Status: {anomaly_results.get('status', 'N/A')}")
+print(f"Anomalies found: {anomaly_results.get('anomalies_found', 0)}")
+print("\n" + "=" * 60)
+print("PIPELINE COMPLETE - 6-Step Architecture Working!")
+print("=" * 60)

test_vectorizer_and_anomaly.py ADDED Viewed

	@@ -0,0 +1,379 @@

+"""
+test_vectorizer_and_anomaly.py
+Test script to run the Vectorizer Agent and Anomaly Detection pipeline
+Generates visualizations of the results
+"""
+import os
+import sys
+import json
+import numpy as np
+from pathlib import Path
+from datetime import datetime
+# Fix Windows encoding
+if sys.platform == "win32":
+    sys.stdout.reconfigure(encoding='utf-8')
+# Add project root to path
+PROJECT_ROOT = Path(__file__).parent
+sys.path.insert(0, str(PROJECT_ROOT))
+# Load environment
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+except ImportError:
+    pass
+print("=" * 70)
+print("  VECTORIZER AGENT & ANOMALY DETECTION TEST")
+print("=" * 70)
+print()
+# ============================================
+# STEP 1: TEST VECTORIZER AGENT
+# ============================================
+print("\n" + "=" * 50)
+print("STEP 1: Testing Vectorizer Agent")
+print("=" * 50)
+# Sample multilingual test data
+test_texts = [
+    {"text": "The political situation in Colombo is tense with protests happening", "post_id": "EN_001"},
+    {"text": "Stock market shows bullish trends in JKH and Commercial Bank", "post_id": "EN_002"},
+    {"text": "Heavy rainfall expected in Southern Province causing flood warnings", "post_id": "EN_003"},
+    {"text": "Economic reforms by the government receive mixed public response", "post_id": "EN_004"},
+    {"text": "URGENT: Massive landslide in Ratnapura district, evacuations underway!", "post_id": "EN_005"},
+    {"text": "Normal day, nothing much happening, just regular news", "post_id": "EN_006"},
+    {"text": "Coffee prices remain stable in local markets", "post_id": "EN_007"},
+    {"text": "BREAKING: Major corruption scandal exposed in government ministry", "post_id": "EN_008"},
+    {"text": "Sri Lanka cricket team wins against India in thrilling match", "post_id": "EN_009"},
+    {"text": "Warning: Tsunami alert issued for coastal areas - immediate evacuation!", "post_id": "EN_010"},
+]
+# Add some Sinhala text samples (using romanized for simplicity)
+sinhala_texts = [
+    {"text": "කොළඹ නගරයේ අද මහ වර්ෂාවක් ඇති විය", "post_id": "SI_001"},
+    {"text": "ආර්ථික අර්බුදය හේතුවෙන් ජනතාව දුෂ්කරතාවන්ට මුහුණ දෙයි", "post_id": "SI_002"},
+]
+# Add Tamil text samples
+tamil_texts = [
+    {"text": "கொழும்பில் பெரும் மழை பெய்தது", "post_id": "TA_001"},
+]
+all_texts = test_texts + sinhala_texts + tamil_texts
+print(f"📝 Testing with {len(all_texts)} sample texts")
+print(f"   - English: {len(test_texts)}")
+print(f"   - Sinhala: {len(sinhala_texts)}")
+print(f"   - Tamil: {len(tamil_texts)}")
+# Run the vectorizer agent
+try:
+    from src.graphs.vectorizationAgentGraph import graph as vectorizer_graph
+    initial_state = {
+        "input_texts": all_texts,
+        "batch_id": datetime.now().strftime("%Y%m%d_%H%M%S"),
+    }
+    print("\n🔄 Running Vectorizer Agent Graph...")
+    result = vectorizer_graph.invoke(initial_state)
+    print("\n✅ Vectorizer Agent Results:")
+    print("-" * 40)
+    # Language detection results
+    lang_results = result.get("language_detection_results", [])
+    print(f"\n📊 Language Detection:")
+    lang_stats = {}
+    for item in lang_results:
+        lang = item.get("language", "unknown")
+        lang_stats[lang] = lang_stats.get(lang, 0) + 1
+        print(f"   - {item.get('post_id')}: {lang} (conf: {item.get('confidence', 0):.2f})")
+    print(f"\n📈 Language Distribution: {lang_stats}")
+    # Vector embeddings
+    embeddings = result.get("vector_embeddings", [])
+    print(f"\n🔢 Vector Embeddings Generated: {len(embeddings)}")
+    if embeddings:
+        sample = embeddings[0]
+        print(f"   Sample vector dim: {sample.get('vector_dim', 0)}")
+        print(f"   Models used: {set(e.get('model_used', '') for e in embeddings)}")
+    # Anomaly detection results
+    anomaly_results = result.get("anomaly_results", {})
+    print(f"\n🔍 Anomaly Detection:")
+    print(f"   Status: {anomaly_results.get('status', 'unknown')}")
+    print(f"   Model: {anomaly_results.get('model_used', 'none')}")
+    print(f"   Total Analyzed: {anomaly_results.get('total_analyzed', 0)}")
+    print(f"   Anomalies Found: {anomaly_results.get('anomalies_found', 0)}")
+    anomalies = anomaly_results.get("anomalies", [])
+    if anomalies:
+        print(f"\n⚠️ Detected Anomalies:")
+        for a in anomalies:
+            print(f"   - {a.get('post_id')}: score={a.get('anomaly_score', 0):.3f}")
+    # Expert summary
+    expert_summary = result.get("expert_summary", "")
+    if expert_summary:
+        print(f"\n📋 Expert Summary (first 500 chars):")
+        print(f"   {expert_summary[:500]}...")
+    # Domain insights
+    domain_insights = result.get("domain_insights", [])
+    print(f"\n💡 Domain Insights Generated: {len(domain_insights)}")
+except Exception as e:
+    print(f"❌ Vectorizer Agent Error: {e}")
+    import traceback
+    traceback.print_exc()
+# ============================================
+# STEP 2: RUN ANOMALY DETECTION PIPELINE
+# ============================================
+print("\n\n" + "=" * 50)
+print("STEP 2: Running Anomaly Detection ML Pipeline")
+print("=" * 50)
+MODELS_PATH = PROJECT_ROOT / "models" / "anomaly-detection"
+sys.path.insert(0, str(MODELS_PATH))
+try:
+    from src.pipeline.training_pipeline import TrainingPipeline
+    print("\n🔄 Running Anomaly Detection Training Pipeline...")
+    pipeline = TrainingPipeline()
+    artifacts = pipeline.run()
+    print("\n✅ Training Pipeline Results:")
+    print("-" * 40)
+    if artifacts.get("model_trainer"):
+        trainer_artifact = artifacts["model_trainer"]
+        print(f"   Best Score: {getattr(trainer_artifact, 'best_score', 'N/A')}")
+        print(f"   Best Model: {getattr(trainer_artifact, 'best_model', 'N/A')}")
+        # Check for model files
+        model_path = MODELS_PATH / "output"
+        if model_path.exists():
+            models = list(model_path.glob("*.joblib"))
+            print(f"\n📁 Saved Models: {len(models)}")
+            for m in models:
+                print(f"   - {m.name}")
+except ImportError as e:
+    print(f"⚠️ Could not import training pipeline: {e}")
+    print("   Running standalone model training instead...")
+    try:
+        # Try running the main.py directly
+        os.chdir(MODELS_PATH)
+        exec(open(MODELS_PATH / "main.py").read())
+    except Exception as e2:
+        print(f"❌ Standalone training error: {e2}")
+except Exception as e:
+    print(f"❌ Pipeline Error: {e}")
+    import traceback
+    traceback.print_exc()
+# ============================================
+# STEP 3: VISUALIZATION
+# ============================================
+print("\n\n" + "=" * 50)
+print("STEP 3: Generating Visualizations")
+print("=" * 50)
+try:
+    import matplotlib
+    matplotlib.use('Agg')  # Non-interactive backend
+    import matplotlib.pyplot as plt
+    from sklearn.decomposition import PCA
+    # Only visualize if we have embeddings
+    if 'embeddings' in dir() and embeddings:
+        # Extract vectors
+        vectors = []
+        labels = []
+        for emb in embeddings:
+            vec = emb.get("vector", [])
+            if len(vec) == 768:
+                vectors.append(vec)
+                labels.append(emb.get("post_id", ""))
+        if vectors:
+            X = np.array(vectors)
+            # PCA for visualization
+            pca = PCA(n_components=2)
+            X_2d = pca.fit_transform(X)
+            # Create figure
+            fig, axes = plt.subplots(1, 2, figsize=(14, 6))
+            # Plot 1: Language distribution scatter
+            ax1 = axes[0]
+            colors = {'english': 'blue', 'sinhala': 'green', 'tamil': 'orange', 'unknown': 'gray'}
+            for i, emb in enumerate(embeddings):
+                if i < len(X_2d):
+                    lang = emb.get("language", "unknown")
+                    ax1.scatter(X_2d[i, 0], X_2d[i, 1], c=colors.get(lang, 'gray'),
+                               s=100, alpha=0.7, label=lang if lang not in [e.get('language') for e in embeddings[:i]] else "")
+            ax1.set_title("Text Embeddings by Language (PCA 2D)")
+            ax1.set_xlabel("PC1")
+            ax1.set_ylabel("PC2")
+            # Add legend (unique labels only)
+            handles, legend_labels = ax1.get_legend_handles_labels()
+            by_label = dict(zip(legend_labels, handles))
+            ax1.legend(by_label.values(), by_label.keys())
+            # Plot 2: Anomaly scores
+            ax2 = axes[1]
+            if anomalies:
+                anomaly_ids = [a.get("post_id", "") for a in anomalies]
+                for i, emb in enumerate(embeddings):
+                    if i < len(X_2d):
+                        is_anomaly = emb.get("post_id", "") in anomaly_ids
+                        color = 'red' if is_anomaly else 'blue'
+                        marker = 'X' if is_anomaly else 'o'
+                        ax2.scatter(X_2d[i, 0], X_2d[i, 1], c=color, marker=marker,
+                                   s=150 if is_anomaly else 80, alpha=0.7)
+                ax2.scatter([], [], c='red', marker='X', s=150, label='Anomaly')
+                ax2.scatter([], [], c='blue', marker='o', s=80, label='Normal')
+                ax2.legend()
+            else:
+                ax2.scatter(X_2d[:, 0], X_2d[:, 1], c='blue', s=80, alpha=0.7)
+                ax2.text(0.5, 0.5, "No anomalies detected\n(Model may not be trained)",
+                        ha='center', va='center', transform=ax2.transAxes)
+            ax2.set_title("Anomaly Detection Results (PCA 2D)")
+            ax2.set_xlabel("PC1")
+            ax2.set_ylabel("PC2")
+            plt.tight_layout()
+            # Save figure
+            output_path = PROJECT_ROOT / "vectorizer_anomaly_visualization.png"
+            plt.savefig(output_path, dpi=150, bbox_inches='tight')
+            print(f"\n✅ Visualization saved to: {output_path}")
+            plt.close()
+    else:
+        print("⚠️ No embeddings available for visualization")
+except ImportError as e:
+    print(f"⚠️ Matplotlib not available for visualization: {e}")
+except Exception as e:
+    print(f"❌ Visualization Error: {e}")
+    import traceback
+    traceback.print_exc()
+# ============================================
+# STEP 4: GRAPH FLOW VISUALIZATION
+# ============================================
+print("\n\n" + "=" * 50)
+print("STEP 4: Generating Graph Flow Diagram")
+print("=" * 50)
+try:
+    # Create a simple ASCII graph visualization
+    graph_viz = """
+    ╔═══════════════════════════════════════════════════════════════════╗
+    ║           VECTORIZATION AGENT GRAPH FLOW                          ║
+    ╠═══════════════════════════════════════════════════════════════════╣
+    ║                                                                   ║
+    ║   ┌─────────────────┐                                             ║
+    ║   │   INPUT TEXTS   │ (Multilingual: EN, SI, TA)                  ║
+    ║   └────────┬────────┘                                             ║
+    ║            │                                                      ║
+    ║            ▼                                                      ║
+    ║   ┌─────────────────────────────────────────────────────┐        ║
+    ║   │  STEP 1: LANGUAGE DETECTION                         │        ║
+    ║   │  ├─ FastText (primary)                              │        ║
+    ║   │  └─ Unicode Script Analysis (fallback)              │        ║
+    ║   └────────┬────────────────────────────────────────────┘        ║
+    ║            │                                                      ║
+    ║            ▼                                                      ║
+    ║   ┌─────────────────────────────────────────────────────┐        ║
+    ║   │  STEP 2: TEXT VECTORIZATION                         │        ║
+    ║   │  ├─ English  → DistilBERT (768-dim)                 │        ║
+    ║   │  ├─ Sinhala  → SinhalaBERTo (768-dim)               │        ║
+    ║   │  └─ Tamil    → Tamil-BERT (768-dim)                 │        ║
+    ║   └────────┬────────────────────────────────────────────┘        ║
+    ║            │                                                      ║
+    ║            ▼                                                      ║
+    ║   ┌─────────────────────────────────────────────────────┐        ║
+    ║   │  STEP 3: ANOMALY DETECTION                          │        ║
+    ║   │  ├─ Model: Isolation Forest / LOF                   │        ║
+    ║   │  ├─ Input: 768-dim embedding vectors                │        ║
+    ║   │  └─ Output: anomaly_score (0-1), is_anomaly flag    │        ║
+    ║   └────────┬────────────────────────────────────────────┘        ║
+    ║            │                                                      ║
+    ║            ▼                                                      ║
+    ║   ┌─────────────────────────────────────────────────────┐        ║
+    ║   │  STEP 4: EXPERT SUMMARY (GroqLLM)                   │        ║
+    ║   │  ├─ Opportunity Detection                           │        ║
+    ║   │  └─ Threat Detection                                │        ║
+    ║   └────────┬───────────────────────���────────────────────┘        ║
+    ║            │                                                      ║
+    ║            ▼                                                      ║
+    ║   ┌─────────────────────────────────────────────────────┐        ║
+    ║   │  STEP 5: FORMAT OUTPUT                              │        ║
+    ║   │  └─ domain_insights[] for Combined Agent            │        ║
+    ║   └────────┬────────────────────────────────────────────┘        ║
+    ║            │                                                      ║
+    ║            ▼                                                      ║
+    ║   ┌─────────────────┐                                             ║
+    ║   │      END        │ → Passed to Feed Aggregator                 ║
+    ║   └─────────────────┘                                             ║
+    ║                                                                   ║
+    ╚═══════════════════════════════════════════════════════════════════╝
+    """
+    print(graph_viz)
+    # Save as text file
+    graph_path = PROJECT_ROOT / "vectorizer_graph_flow.txt"
+    with open(graph_path, "w", encoding="utf-8") as f:
+        f.write(graph_viz)
+    print(f"✅ Graph flow saved to: {graph_path}")
+except Exception as e:
+    print(f"❌ Error: {e}")
+# ============================================
+# SUMMARY
+# ============================================
+print("\n\n" + "=" * 70)
+print("  TEST SUMMARY")
+print("=" * 70)
+print("""
+📊 VECTORIZER AGENT ARCHITECTURE:
+   ├── 5-Step Sequential Pipeline
+   ├── Multilingual Support: English, Sinhala, Tamil
+   ├── BERT Models: DistilBERT, SinhalaBERTo, Tamil-BERT
+   └── Output: 768-dimensional embeddings
+🔍 ANOMALY DETECTION:
+   ├── Algorithm: Isolation Forest / LOF
+   ├── Training: Optuna hyperparameter optimization
+   ├── MLflow: Experiment tracking (DagsHub)
+   └── Integration: Real-time inference on every graph cycle
+📁 OUTPUT FILES:
+   ├── vectorizer_anomaly_visualization.png (if matplotlib available)
+   └── vectorizer_graph_flow.txt (graph architecture)
+""")
+print("=" * 70)
+print("  TEST COMPLETE")
+print("=" * 70)

trending_detection_visualization.png ADDED Viewed

Git LFS Details

SHA256: 906c6fafc9c7b95270a27abd5381eeca38407ced3d6b3e0f9ef967205e42af3b
Pointer size: 131 Bytes
Size of remote file: 190 kB

vectorizer_anomaly_visualization.png ADDED Viewed

Git LFS Details

SHA256: 5f0bc1c338b312268601e3e62ea34d3f8301745b3519cec96f3362becc030813
Pointer size: 131 Bytes
Size of remote file: 182 kB

vectorizer_graph_flow.txt ADDED Viewed

	@@ -0,0 +1,52 @@

+    ╔═══════════════════════════════════════════════════════════════════╗
+    ║           VECTORIZATION AGENT GRAPH FLOW                          ║
+    ╠═══════════════════════════════════════════════════════════════════╣
+    ║                                                                   ║
+    ║   ┌─────────────────┐                                             ║
+    ║   │   INPUT TEXTS   │ (Multilingual: EN, SI, TA)                  ║
+    ║   └────────┬────────┘                                             ║
+    ║            │                                                      ║
+    ║            ▼                                                      ║
+    ║   ┌─────────────────────────────────────────────────────┐        ║
+    ║   │  STEP 1: LANGUAGE DETECTION                         │        ║
+    ║   │  ├─ FastText (primary)                              │        ║
+    ║   │  └─ Unicode Script Analysis (fallback)              │        ║
+    ║   └────────┬────────────────────────────────────────────┘        ║
+    ║            │                                                      ║
+    ║            ▼                                                      ║
+    ║   ┌─────────────────────────────────────────────────────┐        ║
+    ║   │  STEP 2: TEXT VECTORIZATION                         │        ║
+    ║   │  ├─ English  → DistilBERT (768-dim)                 │        ║
+    ║   │  ├─ Sinhala  → SinhalaBERTo (768-dim)               │        ║
+    ║   │  └─ Tamil    → Tamil-BERT (768-dim)                 │        ║
+    ║   └────────┬────────────────────────────────────────────┘        ║
+    ║            │                                                      ║
+    ║            ▼                                                      ║
+    ║   ┌─────────────────────────────────────────────────────┐        ║
+    ║   │  STEP 3: ANOMALY DETECTION                          │        ║
+    ║   │  ├─ Model: Isolation Forest / LOF                   │        ║
+    ║   │  ├─ Input: 768-dim embedding vectors                │        ║
+    ║   │  └─ Output: anomaly_score (0-1), is_anomaly flag    │        ║
+    ║   └────────┬────────────────────────────────────────────┘        ║
+    ║            │                                                      ║
+    ║            ▼                                                      ║
+    ║   ┌─────────────────────────────────────────────────────┐        ║
+    ║   │  STEP 4: EXPERT SUMMARY (GroqLLM)                   │        ║
+    ║   │  ├─ Opportunity Detection                           │        ║
+    ║   │  └─ Threat Detection                                │        ║
+    ║   └────────┬────────────────────────────────────────────┘        ║
+    ║            │                                                      ║
+    ║            ▼                                                      ║
+    ║   ┌─────────────────────────────────────────────────────┐        ║
+    ║   │  STEP 5: FORMAT OUTPUT                              │        ║
+    ║   │  └─ domain_insights[] for Combined Agent            │        ║
+    ║   └────────┬────────────────────────────────────────────┘        ║
+    ║            │                                                      ║
+    ║            ▼                                                      ║
+    ║   ┌─────────────────┐                                             ║
+    ║   │      END        │ → Passed to Feed Aggregator                 ║
+    ║   └─────────────────┘                                             ║
+    ║                                                                   ║
+    ╚═══════════════════════════════════════════════════════════════════╝

visualize_trending.py ADDED Viewed

	@@ -0,0 +1,173 @@

+"""
+visualize_trending.py
+Creates visual graphs for trending detection results
+"""
+import sys
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+import numpy as np
+from pathlib import Path
+from datetime import datetime
+if sys.platform == 'win32':
+    sys.stdout.reconfigure(encoding='utf-8')
+sys.path.insert(0, str(Path('.').resolve()))
+print("=" * 60)
+print("GENERATING TRENDING DETECTION VISUALIZATION")
+print("=" * 60)
+# Run the vectorizer to get fresh data
+from src.graphs.vectorizationAgentGraph import graph
+test_texts = [
+    {"text": "URGENT: Major earthquake hits Colombo, buildings damaged!", "post_id": "EN_001"},
+    {"text": "Breaking news: Earthquake in Colombo measuring 5.5 magnitude", "post_id": "EN_002"},
+    {"text": "Colombo earthquake causes panic, residents evacuated", "post_id": "EN_003"},
+    {"text": "Sri Lanka Cricket team wins against India in thrilling match", "post_id": "EN_004"},
+    {"text": "Stock market shows bullish trends in JKH and Commercial Bank", "post_id": "EN_005"},
+    {"text": "Another earthquake aftershock reported in Colombo area", "post_id": "EN_006"},
+    {"text": "President announces relief fund for earthquake victims", "post_id": "EN_007"},
+    {"text": "Heavy rainfall expected in Southern Province", "post_id": "EN_008"},
+]
+print(f"\nProcessing {len(test_texts)} texts...")
+result = graph.invoke({
+    "input_texts": test_texts,
+    "batch_id": datetime.now().strftime("%Y%m%d_%H%M%S"),
+})
+trending_results = result.get("trending_results", {})
+anomaly_results = result.get("anomaly_results", {})
+# Get trending data
+trending_topics = trending_results.get("trending_topics", [])
+spike_alerts = trending_results.get("spike_alerts", [])
+entities = trending_results.get("entities", [])
+print(f"Trending topics: {len(trending_topics)}")
+print(f"Spike alerts: {len(spike_alerts)}")
+# Create visualization
+fig, axes = plt.subplots(2, 2, figsize=(14, 10))
+fig.suptitle('Vectorizer Agent: Trending Detection Dashboard', fontsize=16, fontweight='bold')
+# ===== PLOT 1: Trending Topics Momentum =====
+ax1 = axes[0, 0]
+if trending_topics:
+    topics = [t.get('topic', '')[:15] for t in trending_topics[:10]]
+    momentums = [t.get('momentum', 0) for t in trending_topics[:10]]
+    colors = ['#e74c3c' if m > 30 else '#f39c12' if m > 10 else '#3498db' for m in momentums]
+    bars = ax1.barh(topics, momentums, color=colors, edgecolor='black', linewidth=0.5)
+    ax1.set_xlabel('Momentum Score', fontsize=11)
+    ax1.set_title('Top Trending Topics by Momentum', fontsize=12, fontweight='bold')
+    ax1.axvline(x=3, color='orange', linestyle='--', alpha=0.7, label='Spike Threshold (3x)')
+    ax1.axvline(x=2, color='green', linestyle='--', alpha=0.7, label='Trending Threshold (2x)')
+    ax1.legend(loc='lower right', fontsize=8)
+    # Add value labels
+    for bar, val in zip(bars, momentums):
+        ax1.text(val + 1, bar.get_y() + bar.get_height()/2, f'{val:.0f}x',
+                va='center', fontsize=9, fontweight='bold')
+else:
+    ax1.text(0.5, 0.5, 'No trending topics', ha='center', va='center', fontsize=12)
+    ax1.set_title('Top Trending Topics', fontsize=12, fontweight='bold')
+# ===== PLOT 2: Entity Types Distribution =====
+ax2 = axes[0, 1]
+if entities:
+    entity_types = {}
+    for e in entities:
+        t = e.get('type', 'unknown')
+        entity_types[t] = entity_types.get(t, 0) + 1
+    labels = list(entity_types.keys())
+    sizes = list(entity_types.values())
+    colors = ['#3498db', '#2ecc71', '#e74c3c', '#9b59b6', '#f1c40f'][:len(labels)]
+    explode = [0.05] * len(labels)
+    ax2.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%',
+            colors=colors, shadow=True, startangle=90)
+    ax2.set_title('Extracted Entity Types', fontsize=12, fontweight='bold')
+else:
+    ax2.text(0.5, 0.5, 'No entities', ha='center', va='center', fontsize=12)
+    ax2.set_title('Extracted Entity Types', fontsize=12, fontweight='bold')
+# ===== PLOT 3: Spike Alerts =====
+ax3 = axes[1, 0]
+if spike_alerts:
+    spike_topics = [s.get('topic', '')[:12] for s in spike_alerts[:8]]
+    spike_moms = [s.get('momentum', 0) for s in spike_alerts[:8]]
+    bars = ax3.bar(spike_topics, spike_moms, color='#e74c3c', edgecolor='black', linewidth=1)
+    ax3.set_ylabel('Momentum', fontsize=11)
+    ax3.set_title('🔥 SPIKE ALERTS (>3x Normal Volume)', fontsize=12, fontweight='bold', color='#c0392b')
+    ax3.axhline(y=3, color='orange', linestyle='--', alpha=0.7)
+    ax3.set_xticklabels(spike_topics, rotation=45, ha='right', fontsize=9)
+    # Add explosion effect
+    for bar, val in zip(bars, spike_moms):
+        ax3.text(bar.get_x() + bar.get_width()/2, val + 2, f'{val:.0f}x',
+                ha='center', fontsize=10, fontweight='bold', color='#c0392b')
+else:
+    ax3.text(0.5, 0.5, 'No spike alerts', ha='center', va='center', fontsize=12)
+    ax3.set_title('Spike Alerts', fontsize=12, fontweight='bold')
+# ===== PLOT 4: Pipeline Summary =====
+ax4 = axes[1, 1]
+ax4.axis('off')
+# Create a summary box
+summary_text = f"""
+╔══════════════════════════════════════════════════╗
+║        VECTORIZER AGENT PIPELINE SUMMARY         ║
+╠══════════════════════════════════════════════════╣
+║                                                  ║
+║  📝 Texts Processed:        {len(test_texts):>5}                  ║
+║  🌐 Entities Extracted:     {len(entities):>5}                  ║
+║  📈 Trending Topics:        {len(trending_topics):>5}                  ║
+║  🔥 Spike Alerts:           {len(spike_alerts):>5}                  ║
+║  ⚠️  Anomalies Detected:    {anomaly_results.get('anomalies_found', 0):>5}                  ║
+║                                                  ║
+╠══════════════════════════════════════════════════╣
+║  Top Trending:                                   ║
+"""
+if trending_topics:
+    for i, t in enumerate(trending_topics[:3]):
+        topic = t.get('topic', 'N/A')[:20]
+        mom = t.get('momentum', 0)
+        summary_text += f"║    {i+1}. {topic:<20} ({mom:.0f}x)     ║\n"
+else:
+    summary_text += "║    (No trending topics)                          ║\n"
+summary_text += """╚══════════════════════════════════════════════════╝"""
+ax4.text(0.5, 0.5, summary_text, family='monospace', fontsize=9,
+         ha='center', va='center',
+         bbox=dict(boxstyle='round', facecolor='#ecf0f1', edgecolor='#2c3e50'))
+plt.tight_layout()
+plt.subplots_adjust(top=0.93)
+# Save
+output_path = Path('trending_detection_visualization.png')
+plt.savefig(output_path, dpi=150, bbox_inches='tight', facecolor='white')
+print(f"\n✅ Visualization saved: {output_path}")
+# Also save to artifacts
+artifacts_dir = Path(r'C:\Users\LENOVO\.gemini\antigravity\brain\b892f63f-afbc-4c4a-bbf1-37195faf04a5')
+if artifacts_dir.exists():
+    artifacts_output = artifacts_dir / 'trending_visualization.png'
+    plt.savefig(str(artifacts_output), dpi=150, bbox_inches='tight', facecolor='white')
+    print(f"✅ Also saved to: {artifacts_output}")
+plt.close()
+print("\n" + "=" * 60)
+print("VISUALIZATION COMPLETE")
+print("=" * 60)