File size: 9,993 Bytes
ac649ea |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 |
"""
test_ml_pipelines.py
Test script to verify all 4 ML pipelines are working correctly
"""
import sys
import os
import io
# Force UTF-8 output on Windows
if sys.platform == 'win32':
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
# Add project root to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
print("="*70)
print("[ML PIPELINE TESTING]")
print("="*70)
results = {}
# =============================================================================
# 1. ANOMALY DETECTION PIPELINE
# =============================================================================
print("\n" + "="*60)
print("[1] ANOMALY DETECTION PIPELINE")
print("="*60)
try:
# Check if model exists
from pathlib import Path
model_dir = Path(__file__).parent / "models" / "anomaly-detection" / "output"
models_found = list(model_dir.glob("*.joblib")) if model_dir.exists() else []
if models_found:
print(f"[OK] Found {len(models_found)} trained models:")
for m in models_found[:3]:
print(f" - {m.name}")
# Try to load and run prediction
from models.anomaly_detection.src.utils.vectorizer import get_vectorizer
vectorizer = get_vectorizer()
print(f"[OK] Vectorizer loaded")
import joblib
model = joblib.load(models_found[0])
print(f"[OK] Model loaded: {models_found[0].name}")
# Test prediction
test_text = "Breaking news: Major political announcement in Colombo"
vector = vectorizer.vectorize(test_text, "en")
prediction = model.predict([vector])[0]
score = -model.decision_function([vector])[0] if hasattr(model, 'decision_function') else 0
print(f"[OK] Test prediction: is_anomaly={prediction==-1}, score={score:.3f}")
results["anomaly_detection"] = {"status": "success", "models": len(models_found)}
else:
print("[WARN] No trained models found. Run training first.")
print(" Command: python models/anomaly-detection/main.py --mode train")
results["anomaly_detection"] = {"status": "not_trained"}
except Exception as e:
print(f"[FAIL] Anomaly Detection error: {e}")
results["anomaly_detection"] = {"status": "error", "error": str(e)}
# =============================================================================
# 2. WEATHER PREDICTION PIPELINE
# =============================================================================
print("\n" + "="*60)
print("[2] WEATHER PREDICTION PIPELINE")
print("="*60)
try:
from pathlib import Path
weather_model_dir = Path(__file__).parent / "models" / "weather-prediction" / "artifacts" / "models"
weather_models = list(weather_model_dir.glob("*.h5")) if weather_model_dir.exists() else []
predictions_dir = Path(__file__).parent / "models" / "weather-prediction" / "output" / "predictions"
prediction_files = list(predictions_dir.glob("*.json")) if predictions_dir.exists() else []
if weather_models:
print(f"[OK] Found {len(weather_models)} trained LSTM models:")
for m in weather_models[:5]:
print(f" - {m.name}")
# Check for predictions
if prediction_files:
import json
latest = max(prediction_files, key=lambda p: p.stat().st_mtime)
with open(latest) as f:
preds = json.load(f)
districts = preds.get("districts", {})
print(f"[OK] Found predictions for {len(districts)} districts")
print(f" Latest prediction date: {preds.get('prediction_date', 'N/A')}")
# Show sample
if districts:
sample_district = list(districts.keys())[0]
sample = districts[sample_district]
print(f" Sample ({sample_district}):")
print(f" - Temp: {sample.get('temp_max', 'N/A')}C - {sample.get('temp_min', 'N/A')}C")
print(f" - Rain: {sample.get('rainfall_mm', 'N/A')}mm")
results["weather_prediction"] = {"status": "success", "models": len(weather_models), "districts": len(districts)}
else:
print("[WARN] No prediction files found. Run predictor.")
results["weather_prediction"] = {"status": "models_only", "models": len(weather_models)}
else:
print("[WARN] No trained models found")
print(" Command: python models/weather-prediction/main.py --mode train")
results["weather_prediction"] = {"status": "not_trained"}
except Exception as e:
print(f"[FAIL] Weather Prediction error: {e}")
results["weather_prediction"] = {"status": "error", "error": str(e)}
# =============================================================================
# 3. CURRENCY PREDICTION PIPELINE
# =============================================================================
print("\n" + "="*60)
print("[3] CURRENCY PREDICTION PIPELINE (USD/LKR)")
print("="*60)
try:
from pathlib import Path
currency_model_dir = Path(__file__).parent / "models" / "currency-volatility-prediction" / "artifacts" / "models"
currency_model = currency_model_dir / "gru_usd_lkr.h5" if currency_model_dir.exists() else None
predictions_dir = Path(__file__).parent / "models" / "currency-volatility-prediction" / "output" / "predictions"
prediction_files = list(predictions_dir.glob("*.json")) if predictions_dir.exists() else []
if currency_model and currency_model.exists():
print(f"[OK] Found GRU model: {currency_model.name}")
# Check for predictions
if prediction_files:
import json
latest = max(prediction_files, key=lambda p: p.stat().st_mtime)
with open(latest) as f:
pred = json.load(f)
print(f"[OK] Latest prediction found:")
print(f" - Current Rate: {pred.get('current_rate', 'N/A')} LKR")
print(f" - Predicted: {pred.get('predicted_rate', 'N/A')} LKR")
print(f" - Change: {pred.get('change_percent', 'N/A')}%")
print(f" - Direction: {pred.get('direction', 'N/A')}")
results["currency_prediction"] = {"status": "success", "rate": pred.get("predicted_rate")}
else:
print("[WARN] No prediction files found")
results["currency_prediction"] = {"status": "model_only"}
else:
print("[WARN] No trained model found")
print(" Command: python models/currency-volatility-prediction/main.py --mode train")
results["currency_prediction"] = {"status": "not_trained"}
except Exception as e:
print(f"[FAIL] Currency Prediction error: {e}")
results["currency_prediction"] = {"status": "error", "error": str(e)}
# =============================================================================
# 4. STOCK PRICE PREDICTION PIPELINE
# =============================================================================
print("\n" + "="*60)
print("[4] STOCK PRICE PREDICTION PIPELINE")
print("="*60)
try:
from pathlib import Path
stock_model_dir = Path(__file__).parent / "models" / "stock-price-prediction" / "artifacts" / "models"
stock_models = list(stock_model_dir.glob("*.h5")) if stock_model_dir.exists() else []
predictions_dir = Path(__file__).parent / "models" / "stock-price-prediction" / "output" / "predictions"
prediction_files = list(predictions_dir.glob("*.json")) if predictions_dir.exists() else []
if stock_models:
print(f"[OK] Found {len(stock_models)} stock models:")
for m in stock_models[:5]:
print(f" - {m.name}")
# Check for predictions
if prediction_files:
import json
latest = max(prediction_files, key=lambda p: p.stat().st_mtime)
with open(latest) as f:
preds = json.load(f)
stocks = preds.get("stocks", preds.get("predictions", {}))
print(f"[OK] Found predictions for {len(stocks)} stocks")
# Show sample
if stocks:
sample_stock = list(stocks.keys())[0] if isinstance(stocks, dict) else stocks[0]
if isinstance(stocks, dict):
sample = stocks[sample_stock]
print(f" Sample ({sample_stock}):")
print(f" - Current: {sample.get('current_price', 'N/A')}")
print(f" - Predicted: {sample.get('predicted_price', 'N/A')}")
results["stock_prediction"] = {"status": "success", "models": len(stock_models), "stocks": len(stocks)}
else:
print("[WARN] No prediction files found")
results["stock_prediction"] = {"status": "models_only", "models": len(stock_models)}
else:
print("[WARN] No trained models found")
print(" Command: python models/stock-price-prediction/main.py --mode train")
results["stock_prediction"] = {"status": "not_trained"}
except Exception as e:
print(f"[FAIL] Stock Prediction error: {e}")
results["stock_prediction"] = {"status": "error", "error": str(e)}
# =============================================================================
# SUMMARY
# =============================================================================
print("\n" + "="*70)
print("[SUMMARY] ML PIPELINE STATUS")
print("="*70)
for pipeline, result in results.items():
status = result.get("status", "unknown")
if status == "success":
print(f"[OK] {pipeline}: Working")
elif status == "not_trained":
print(f"[WARN] {pipeline}: Not trained yet")
elif status in ["model_only", "models_only"]:
print(f"[WARN] {pipeline}: Model exists, no recent predictions")
else:
print(f"[FAIL] {pipeline}: {result.get('error', status)}")
print("="*70)
|