Upload folder using huggingface_hub
Browse files- .gitignore +5 -3
- Dockerfile +6 -2
- main.py +22 -1
- scripts/start_backend.sh +21 -0
- scripts/train_ml_models.py +25 -0
- src/utils/utils.py +16 -5
.gitignore
CHANGED
|
@@ -12,10 +12,12 @@ wheels/
|
|
| 12 |
.env
|
| 13 |
.env.template
|
| 14 |
|
|
|
|
|
|
|
| 15 |
models/
|
| 16 |
-
models
|
| 17 |
-
.langgraph_api
|
| 18 |
data/
|
| 19 |
datasets/
|
|
|
|
| 20 |
datasets
|
| 21 |
-
|
|
|
|
|
|
| 12 |
.env
|
| 13 |
.env.template
|
| 14 |
|
| 15 |
+
|
| 16 |
+
#Data & Models (Un-ignored for Hackathon Demo persistence)
|
| 17 |
models/
|
|
|
|
|
|
|
| 18 |
data/
|
| 19 |
datasets/
|
| 20 |
+
data
|
| 21 |
datasets
|
| 22 |
+
models
|
| 23 |
+
.langgraph_api
|
Dockerfile
CHANGED
|
@@ -41,5 +41,9 @@ EXPOSE 7860
|
|
| 41 |
# Set environment variable for HuggingFace
|
| 42 |
ENV PORT=7860
|
| 43 |
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
# Set environment variable for HuggingFace
|
| 42 |
ENV PORT=7860
|
| 43 |
|
| 44 |
+
|
| 45 |
+
# Set execution permissions for scripts
|
| 46 |
+
RUN chmod +x scripts/start_backend.sh
|
| 47 |
+
|
| 48 |
+
# Run API server (and ML training) via startup script
|
| 49 |
+
CMD ["/bin/bash", "scripts/start_backend.sh"]
|
main.py
CHANGED
|
@@ -1122,14 +1122,35 @@ def get_weather_predictor():
|
|
| 1122 |
if _weather_predictor is None:
|
| 1123 |
try:
|
| 1124 |
import sys
|
|
|
|
| 1125 |
from pathlib import Path
|
|
|
|
| 1126 |
weather_path = Path(__file__).parent / "models" / "weather-prediction" / "src"
|
| 1127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1128 |
from components.predictor import WeatherPredictor
|
| 1129 |
_weather_predictor = WeatherPredictor()
|
| 1130 |
logger.info("[WeatherAPI] Weather predictor initialized")
|
| 1131 |
except Exception as e:
|
| 1132 |
logger.warning(f"[WeatherAPI] Failed to initialize predictor: {e}")
|
|
|
|
|
|
|
| 1133 |
_weather_predictor = None
|
| 1134 |
return _weather_predictor
|
| 1135 |
|
|
|
|
| 1122 |
if _weather_predictor is None:
|
| 1123 |
try:
|
| 1124 |
import sys
|
| 1125 |
+
import importlib
|
| 1126 |
from pathlib import Path
|
| 1127 |
+
|
| 1128 |
weather_path = Path(__file__).parent / "models" / "weather-prediction" / "src"
|
| 1129 |
+
weather_path_str = str(weather_path)
|
| 1130 |
+
|
| 1131 |
+
# Ensure path is in sys.path
|
| 1132 |
+
if weather_path_str not in sys.path:
|
| 1133 |
+
sys.path.insert(0, weather_path_str)
|
| 1134 |
+
|
| 1135 |
+
# CRITICAL FIX: Handle 'components' package name collision
|
| 1136 |
+
# If 'components' is already loaded from another model (e.g. currency), unload it
|
| 1137 |
+
if 'components' in sys.modules:
|
| 1138 |
+
existing_path = getattr(sys.modules['components'], '__file__', '')
|
| 1139 |
+
if existing_path and weather_path_str not in str(existing_path):
|
| 1140 |
+
logger.warning(f"[WeatherAPI] components collision detected. Unloading {existing_path}")
|
| 1141 |
+
# Unload generic modules to force reload from new path
|
| 1142 |
+
for mod in list(sys.modules.keys()):
|
| 1143 |
+
if mod.startswith('components') or mod.startswith('utils'):
|
| 1144 |
+
del sys.modules[mod]
|
| 1145 |
+
|
| 1146 |
+
# Now import fresh
|
| 1147 |
from components.predictor import WeatherPredictor
|
| 1148 |
_weather_predictor = WeatherPredictor()
|
| 1149 |
logger.info("[WeatherAPI] Weather predictor initialized")
|
| 1150 |
except Exception as e:
|
| 1151 |
logger.warning(f"[WeatherAPI] Failed to initialize predictor: {e}")
|
| 1152 |
+
import traceback
|
| 1153 |
+
logger.warning(traceback.format_exc())
|
| 1154 |
_weather_predictor = None
|
| 1155 |
return _weather_predictor
|
| 1156 |
|
scripts/start_backend.sh
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
set -e
|
| 3 |
+
|
| 4 |
+
echo "🚀 Starting ModelX Backend on HuggingFace Space..."
|
| 5 |
+
|
| 6 |
+
# 1. Run ML Training Pipeline (if models missing)
|
| 7 |
+
# We trust the script to handle logic. For Hackathon, we force run it to ensure fresh state if possible,
|
| 8 |
+
# or we can check if output dir is empty.
|
| 9 |
+
echo "🧠 Checking ML Models..."
|
| 10 |
+
# Create output dir if not exists
|
| 11 |
+
mkdir -p models/anomaly-detection/output
|
| 12 |
+
|
| 13 |
+
# Run training (standalone script)
|
| 14 |
+
# This will use data from 'datasets/' if available.
|
| 15 |
+
# If datasets are empty, it might fail/skip, so we allow failure without stopping container.
|
| 16 |
+
python scripts/train_ml_models.py || echo "⚠️ ML Training warning (continuing anyway)..."
|
| 17 |
+
|
| 18 |
+
# 2. Start Request Server
|
| 19 |
+
# HuggingFace expects us to listen on port 7860
|
| 20 |
+
echo "⚡ Starting FastAPI Server on port $PORT..."
|
| 21 |
+
uvicorn main:app --host 0.0.0.0 --port $PORT
|
scripts/train_ml_models.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import os
|
| 3 |
+
import sys
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
# Add the anomaly detection model source to path
|
| 7 |
+
MODEL_DIR = Path(__file__).parent.parent / "models" / "anomaly-detection"
|
| 8 |
+
sys.path.append(str(MODEL_DIR))
|
| 9 |
+
|
| 10 |
+
try:
|
| 11 |
+
from src.pipeline.training_pipeline import run_training_pipeline
|
| 12 |
+
print("🚀 Starting ML Model Training Pipeline (Standalone)...")
|
| 13 |
+
print(f"📂 Model Directory: {MODEL_DIR}")
|
| 14 |
+
|
| 15 |
+
# Run the pipeline
|
| 16 |
+
artifact = run_training_pipeline()
|
| 17 |
+
|
| 18 |
+
print("\n✅ Training Complete!")
|
| 19 |
+
print(f"📊 Model Artifacts stored in: {MODEL_DIR}/output")
|
| 20 |
+
|
| 21 |
+
except ImportError as e:
|
| 22 |
+
print(f"❌ Error: Could not import training pipeline. {e}")
|
| 23 |
+
print("Ensure you are running this from the project root.")
|
| 24 |
+
except Exception as e:
|
| 25 |
+
print(f"❌ Training Failed: {e}")
|
src/utils/utils.py
CHANGED
|
@@ -518,15 +518,26 @@ def scrape_rivernet_impl(
|
|
| 518 |
except (ValueError, IndexError):
|
| 519 |
continue
|
| 520 |
|
| 521 |
-
# Determine status based on keywords (
|
| 522 |
text_lower = page_text.lower()
|
| 523 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 524 |
river_data["status"] = "danger"
|
| 525 |
-
|
|
|
|
|
|
|
| 526 |
river_data["status"] = "warning"
|
| 527 |
-
|
|
|
|
|
|
|
| 528 |
river_data["status"] = "rising"
|
| 529 |
-
|
|
|
|
|
|
|
| 530 |
river_data["status"] = "normal"
|
| 531 |
|
| 532 |
results["rivers"].append(river_data)
|
|
|
|
| 518 |
except (ValueError, IndexError):
|
| 519 |
continue
|
| 520 |
|
| 521 |
+
# Determine status based on keywords (refined to avoid false positives)
|
| 522 |
text_lower = page_text.lower()
|
| 523 |
+
|
| 524 |
+
# Default to normal
|
| 525 |
+
river_data["status"] = "normal"
|
| 526 |
+
|
| 527 |
+
# DANGER / CRITICAL
|
| 528 |
+
if any(w in text_lower for w in ["major flood", "danger level", "critical level", "red alert", "evacuate", "extreme flood"]):
|
| 529 |
river_data["status"] = "danger"
|
| 530 |
+
|
| 531 |
+
# WARNING (Stricter: removed generic "high", "alert")
|
| 532 |
+
elif any(w in text_lower for w in ["minor flood", "warning level", "flood alert", "amber alert", "high risk", "flood warning"]):
|
| 533 |
river_data["status"] = "warning"
|
| 534 |
+
|
| 535 |
+
# RISING
|
| 536 |
+
elif any(w in text_lower for w in ["water level rising", "rising trend", "level is rising"]):
|
| 537 |
river_data["status"] = "rising"
|
| 538 |
+
|
| 539 |
+
# explicitly check for normal keywords to confirm (optional, as we default to normal)
|
| 540 |
+
elif any(w in text_lower for w in ["normal", "safe", "stable", "low", "green", "decreasing"]):
|
| 541 |
river_data["status"] = "normal"
|
| 542 |
|
| 543 |
results["rivers"].append(river_data)
|