Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns | |
| import pandas as pd | |
| from apscheduler.schedulers.background import BackgroundScheduler | |
| import json | |
| import os | |
| import datetime | |
| import urllib.parse | |
| from src.about import ( | |
| CITATION_BUTTON_LABEL, | |
| CITATION_BUTTON_TEXT, | |
| EVALUATION_QUEUE_TEXT, | |
| INTRODUCTION_TEXT, | |
| LLM_BENCHMARKS_TEXT, | |
| TITLE, | |
| ) | |
| from src.display.css_html_js import custom_css | |
| from src.display.utils import ( | |
| BENCHMARK_COLS, | |
| COLS, | |
| EVAL_COLS, | |
| EVAL_TYPES, | |
| AutoEvalColumn, | |
| ModelType, | |
| fields, | |
| WeightType, | |
| Precision, | |
| ) | |
| from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN | |
| from src.populate import get_evaluation_queue_df, get_leaderboard_df | |
| def restart_space(): | |
| API.restart_space(repo_id=REPO_ID) | |
| def save_submission_and_notify(model_name, contact_email, weight_link, json_results, paper_link, description): | |
| """Save submission to file and provide instructions for email""" | |
| try: | |
| # Validate JSON format if provided | |
| if json_results.strip(): | |
| try: | |
| json.loads(json_results) | |
| except json.JSONDecodeError: | |
| return "β Invalid JSON format in results field" | |
| # Create submission data | |
| submission_data = { | |
| "timestamp": datetime.datetime.now().isoformat(), | |
| "model_name": model_name, | |
| "contact_email": contact_email, | |
| "weight_link": weight_link, | |
| "paper_link": paper_link, | |
| "description": description, | |
| "json_results": json_results, | |
| } | |
| # Save to submissions directory | |
| os.makedirs("submissions", exist_ok=True) | |
| filename = ( | |
| f"submissions/{model_name.replace('/', '_')}_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.json" | |
| ) | |
| with open(filename, "w") as f: | |
| json.dump(submission_data, f, indent=2) | |
| # Create mailto link for user | |
| subject = f"SearchAgent Leaderboard Submission: {model_name}" | |
| body = f"""New model submission for SearchAgent Leaderboard: | |
| Model Name: {model_name} | |
| Contact Email: {contact_email} | |
| Weight Link: {weight_link} | |
| Paper Link: {paper_link} | |
| Description: {description} | |
| JSON Results: | |
| {json_results}""" | |
| # URL encode the email content | |
| mailto_link = ( | |
| f"mailto:[email protected]?subject={urllib.parse.quote(subject)}&body={urllib.parse.quote(body[:500])}" | |
| ) | |
| return f"""β Submission saved successfully! | |
| π§ **Please send your submission to: [email protected]** | |
| You can either: | |
| 1. Click here to open your email client: [Send Email](mailto:[email protected]) | |
| 2. Or copy the submission details above and send manually | |
| Your submission has been saved to: {filename} | |
| We'll review your model and get back to you at {contact_email}.""" | |
| except Exception as e: | |
| return f"β Failed to save submission: {str(e)}" | |
| ### Space initialisation | |
| # Use local data for demo purposes | |
| try: | |
| print(EVAL_REQUESTS_PATH) | |
| # For demo, use local eval-queue directory if it exists | |
| import os | |
| if not os.path.exists(EVAL_REQUESTS_PATH): | |
| os.makedirs(EVAL_REQUESTS_PATH, exist_ok=True) | |
| # snapshot_download( | |
| # repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN | |
| # ) | |
| except Exception as e: | |
| print(f"Could not setup eval requests path: {e}") | |
| try: | |
| print(EVAL_RESULTS_PATH) | |
| # For demo, use local eval-results directory if it exists | |
| if not os.path.exists(EVAL_RESULTS_PATH): | |
| os.makedirs(EVAL_RESULTS_PATH, exist_ok=True) | |
| # snapshot_download( | |
| # repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN | |
| # ) | |
| except Exception as e: | |
| print(f"Could not setup eval results path: {e}") | |
| def _debug_print_dataframe(name: str, dataframe: pd.DataFrame) -> None: | |
| if dataframe is None: | |
| print(f"[debug] {name}: DataFrame is None") | |
| return | |
| print(f"[debug] {name}: shape={dataframe.shape}, columns={list(dataframe.columns)}") | |
| if not dataframe.empty: | |
| preview = dataframe.head().to_dict(orient="records") | |
| print(f"[debug] {name}: head={preview}") | |
| else: | |
| print(f"[debug] {name}: DataFrame is empty") | |
| def _debug_list_dir(label: str, path: str, limit: int = 10) -> None: | |
| try: | |
| entries = os.listdir(path) | |
| print(f"[debug] {label}: path={path}, count={len(entries)}, preview={entries[:limit]}") | |
| except FileNotFoundError: | |
| print(f"[debug] {label}: path={path} not found") | |
| except Exception as exc: | |
| print(f"[debug] {label}: path={path} error={exc}") | |
| _debug_list_dir("EVAL_RESULTS", EVAL_RESULTS_PATH) | |
| _debug_list_dir("EVAL_QUEUE", EVAL_REQUESTS_PATH) | |
| LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS) | |
| _debug_print_dataframe("LEADERBOARD", LEADERBOARD_DF) | |
| ( | |
| finished_eval_queue_df, | |
| running_eval_queue_df, | |
| pending_eval_queue_df, | |
| ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS) | |
| _debug_print_dataframe("EVAL_QUEUE_FINISHED", finished_eval_queue_df) | |
| _debug_print_dataframe("EVAL_QUEUE_RUNNING", running_eval_queue_df) | |
| _debug_print_dataframe("EVAL_QUEUE_PENDING", pending_eval_queue_df) | |
| def init_leaderboard(dataframe): | |
| if dataframe is None or dataframe.empty: | |
| raise ValueError("Leaderboard DataFrame is empty or None.") | |
| return Leaderboard( | |
| value=dataframe, | |
| datatype=[c.type for c in fields(AutoEvalColumn)], | |
| select_columns=SelectColumns( | |
| default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default], | |
| cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden], | |
| label="Select Columns to Display:", | |
| ), | |
| search_columns=[AutoEvalColumn.model.name], | |
| hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden], | |
| filter_columns=[ | |
| ColumnFilter(AutoEvalColumn.model_size.name, type="checkboxgroup", label="Model Size"), | |
| ], | |
| bool_checkboxgroup_label="Hide models", | |
| interactive=False, | |
| ) | |
| def create_demo(): | |
| """Create the Gradio interface.""" | |
| with gr.Blocks(css=custom_css) as demo: | |
| gr.HTML(TITLE) | |
| with gr.Tabs(elem_classes="tab-buttons") as tabs: | |
| print("[debug] Rendering leaderboard tab start") | |
| with gr.TabItem("π SearchAgent Benchmark", elem_id="llm-benchmark-tab-table", id=0): | |
| leaderboard = init_leaderboard(LEADERBOARD_DF) | |
| gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
| print("[debug] Rendering leaderboard tab done") | |
| print("[debug] Rendering about tab start") | |
| with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=2): | |
| gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") | |
| print("[debug] Rendering about tab done") | |
| print("[debug] Rendering submit tab start") | |
| with gr.TabItem("π Submit here! ", elem_id="llm-benchmark-tab-table", id=3): | |
| with gr.Column(): | |
| with gr.Row(): | |
| gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") | |
| print("[debug] Rendering submit tab done") | |
| with gr.Row(): | |
| print("[debug] Rendering citation start") | |
| with gr.Accordion("π Citation", open=False): | |
| gr.Textbox( | |
| value=CITATION_BUTTON_TEXT, | |
| label=CITATION_BUTTON_LABEL, | |
| lines=20, | |
| elem_id="citation-button", | |
| show_copy_button=True, | |
| ) | |
| print("[debug] Rendering citation done") | |
| return demo | |
| demo = create_demo() | |
| scheduler = BackgroundScheduler() | |
| scheduler.add_job(restart_space, "interval", seconds=1800) | |
| scheduler.start() | |
| demo.launch(show_error=True) | |