import os
import re
import requests
from dotenv import load_dotenv
from markdownify import markdownify
from requests.exceptions import RequestException
from smolagents import (
    LiteLLMModel,
    CodeAgent,
    ToolCallingAgent,
    InferenceClientModel,
    WebSearchTool,
    tool,
    FinalAnswerTool,
    WikipediaSearchTool,
    VisitWebpageTool,
    DuckDuckGoSearchTool
)

load_dotenv()

from langfuse import Langfuse,get_client
langfuse = Langfuse(environment='PROD_V1')
langfuse = get_client()

if langfuse.auth_check():
    print("Langfuse client is authenticated and ready!")
else:
    print("Authentication failed. Please check your credentials and host.")


from openinference.instrumentation.smolagents import SmolagentsInstrumentor
SmolagentsInstrumentor().instrument()


# Define model/provider to use
model = LiteLLMModel(
    model_id="openai/Qwen/Qwen3-Coder-480B-A35B-Instruct",
    api_key=os.environ.get("NEBIUS_API_KEY"),
    api_base="https://api.tokenfactory.nebius.com/v1/"
)

# Tools : use docstring to pass instructions to CodeAgent
from tool_clinical_trial import ClinicalTrialsSearchTool

@tool
def search_pubmed(topic: str, author: str) -> list[str]:
    """
    Searches the PubMed database for articles related to a specific topic.
    
    Args:
        topic: The topic or keywords to search for (e.g., "CRISPR gene editing").
        author: The name of the author to search for (e.g., "Albert Einstein").

    Returns:
        A list of PubMed IDs (strings) for the top 100 articles found.
        
    Raises:
        requests.exceptions.HTTPError: If the API request fails.
    """
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"

    terms = []
    if topic:
        terms.append(topic)
    if author:
        terms.append(f"{author}[Author]")

    query = " AND ".join(terms)
    params = {
        "db": "pubmed",
        "term": query,
        "retmode": "json",
        "retmax": 1000
    }
    response = requests.get(base_url, params=params)
    response.raise_for_status()
    data = response.json()

    return data["esearchresult"]["idlist"]

@tool
def parse_pdf(pdf_path:str)->list[str]:
    """
    Reads a PDF file from a specified path and extracts the text content
    from every page.

    Args:
        pdf_path: The local file path (string) to the PDF document to be parsed.
                  **NOTE**: In a remote agent environment, this path must be
                  accessible by the executing process (e.g., a path to an
                  uploaded file).

    Returns:
        A list of strings, where each string is the extracted text content
        from a single page of the PDF.
    """
    from pypdf import PdfReader

    reader = PdfReader(pdf_path)
    number_of_pages = len(reader.pages)
    text=list()
    for p in range(number_of_pages):
        page = reader.pages[p]
        text.append(page.extract_text())
    return text


# Create clinical trial search agent
clinical_agent = CodeAgent(
    name="clinical_agent",
    description=(
        "Retrieve and parse clinical study data for a given disease. "
        "Use ClinicalTrialsSearchTool for trials, search_pubmed for authors, and parse_pdf for full-text analysis. "
        "Return structured tables or summaries as requested."
        "Gather general or recent information from online sources. "
        "Use Wikipedia for overviews, DuckDuckGo for recent data, and VisitWebpageTool for specific URLs. "
        "Return structured summaries with sources."
        "Use the ClinicalTrialsSearchTool() for any question related to clinical trial"
    ),
    tools=[ClinicalTrialsSearchTool()],
    additional_authorized_imports=["time", "numpy", "pandas"],
    # executor_type="blaxel", #executor_type="modal",
    return_full_result=True,
    planning_interval=3,                      # Structure planing
    use_structured_outputs_internally=True,   # Uses output for planning
    model=model,
    max_steps=6,
    verbosity_level=2
)

search_online_info = CodeAgent(
    name="search_online_info",
    description=(
        "Gather general or recent information from online sources. "
        "Use Wikipedia for overviews, DuckDuckGo for recent data, and VisitWebpageTool for specific URLs. "
        "Return structured summaries with sources."
    ),
    tools=[WikipediaSearchTool(),VisitWebpageTool(max_output_length=10000),DuckDuckGoSearchTool(max_results=5),search_pubmed,parse_pdf],
    additional_authorized_imports=["time", "numpy", "pandas"],
    # use_structured_outputs_internally=True,
    # executor_type="modal",
    planning_interval=2, 
    model=model,
    max_steps=4,
    verbosity_level=2
)


manager_agent = CodeAgent(
    name="manager_agent",
    description=(
    "Most important task is to provide a complete answer to user questions based on clinical trial data and online information. "
    "Orchestrate workflow between clinical and online agents. "
    "Validate outputs, resolve conflicts, and ensure the final answer is complete and accurate."
    "rimarily use the managed agent clinical_agent for question related to clinical trials"
    ),
    tools=[FinalAnswerTool(),ClinicalTrialsSearchTool(),WikipediaSearchTool(),VisitWebpageTool(max_output_length=10000),DuckDuckGoSearchTool(max_results=5),search_pubmed,parse_pdf],
    model=model,
    # managed_agents=[clinical_agent,search_online_info],
    # executor_type="modal",
    provide_run_summary=True,
    additional_authorized_imports=["time", "numpy", "pandas"],
    use_structured_outputs_internally=True,
    verbosity_level=2,
    planning_interval=3, 
    max_steps=6,
)