Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import os | |
| import subprocess | |
| from pathlib import Path | |
| import time | |
| import requests | |
| def check_services(): | |
| """Check if all required services are running""" | |
| services = [ | |
| ("Controller", "http://localhost:21001"), | |
| ("API Server", "http://localhost:8000"), | |
| ("Model Worker", "http://localhost:8080") | |
| ] | |
| for service_name, url in services: | |
| try: | |
| requests.get(url) | |
| print(f"{service_name} is running") | |
| except requests.exceptions.ConnectionError: | |
| return False, f"{service_name} is not running" | |
| return True, "All services are running" | |
| def check_training_status(): | |
| # First check if services are running | |
| services_ok, message = check_services() | |
| if not services_ok: | |
| return message | |
| results_dir = Path("/app/results") | |
| if not results_dir.exists(): | |
| return "Training hasn't started yet." | |
| iterations = len(list(results_dir.glob("iter_*"))) | |
| return f"Completed {iterations} training iterations." | |
| def start_training(model_path, instruct_count, max_iter): | |
| # Check if services are running | |
| services_ok, message = check_services() | |
| if not services_ok: | |
| return message | |
| os.environ["MODEL_PATH"] = model_path | |
| os.environ["INSTRUCT_COUNT"] = str(instruct_count) | |
| os.environ["MAX_ITER"] = str(max_iter) | |
| try: | |
| subprocess.run(["bash", "run.sh"], | |
| check=True, | |
| cwd="/app/qwen") | |
| return "Training completed successfully!" | |
| except subprocess.CalledProcessError as e: | |
| return f"Error during training: {str(e)}" | |
| # Create the interface | |
| with gr.Blocks() as iface: | |
| gr.Markdown("# Self-Lengthen Training Interface") | |
| with gr.Row(): | |
| with gr.Column(): | |
| model_path = gr.Textbox( | |
| label="Model Path", | |
| value="/app/models/base_model", | |
| info="Path to the base model" | |
| ) | |
| instruct_count = gr.Number( | |
| label="Instruction Count", | |
| value=5000, | |
| minimum=100, | |
| info="Number of instructions to generate" | |
| ) | |
| max_iter = gr.Number( | |
| label="Max Iterations", | |
| value=3, | |
| minimum=1, | |
| info="Number of training iterations" | |
| ) | |
| train_btn = gr.Button("Start Training") | |
| with gr.Column(): | |
| status_output = gr.Textbox( | |
| label="Status", | |
| value="Ready to start training...", | |
| interactive=False | |
| ) | |
| refresh_btn = gr.Button("Refresh Status") | |
| train_btn.click( | |
| fn=start_training, | |
| inputs=[model_path, instruct_count, max_iter], | |
| outputs=status_output | |
| ) | |
| refresh_btn.click( | |
| fn=check_training_status, | |
| inputs=None, | |
| outputs=status_output | |
| ) | |
| if __name__ == "__main__": | |
| # Wait for services to be ready | |
| print("Waiting for services to start...") | |
| while True: | |
| services_ok, message = check_services() | |
| if services_ok: | |
| break | |
| print(message) | |
| time.sleep(5) | |
| print("All services are running, starting web interface...") | |
| iface.launch(server_name="0.0.0.0", server_port=7860) |