Spaces:

Sergidev
/

selflengthen

Runtime error

App Files Files Community

selflengthen / app.py

Sergidev

Update app.py

8da94da verified about 1 year ago

raw

history blame contribute delete

3.36 kB

	import gradio as gr
	import os
	import subprocess
	from pathlib import Path
	import time
	import requests

	def check_services():
	"""Check if all required services are running"""
	services = [
	("Controller", "http://localhost:21001"),
	("API Server", "http://localhost:8000"),
	("Model Worker", "http://localhost:8080")
	]

	for service_name, url in services:
	try:
	requests.get(url)
	print(f"{service_name} is running")
	except requests.exceptions.ConnectionError:
	return False, f"{service_name} is not running"
	return True, "All services are running"

	def check_training_status():
	# First check if services are running
	services_ok, message = check_services()
	if not services_ok:
	return message

	results_dir = Path("/app/results")
	if not results_dir.exists():
	return "Training hasn't started yet."

	iterations = len(list(results_dir.glob("iter_*")))
	return f"Completed {iterations} training iterations."

	def start_training(model_path, instruct_count, max_iter):
	# Check if services are running
	services_ok, message = check_services()
	if not services_ok:
	return message

	os.environ["MODEL_PATH"] = model_path
	os.environ["INSTRUCT_COUNT"] = str(instruct_count)
	os.environ["MAX_ITER"] = str(max_iter)

	try:
	subprocess.run(["bash", "run.sh"],
	check=True,
	cwd="/app/qwen")
	return "Training completed successfully!"
	except subprocess.CalledProcessError as e:
	return f"Error during training: {str(e)}"

	# Create the interface
	with gr.Blocks() as iface:
	gr.Markdown("# Self-Lengthen Training Interface")

	with gr.Row():
	with gr.Column():
	model_path = gr.Textbox(
	label="Model Path",
	value="/app/models/base_model",
	info="Path to the base model"
	)
	instruct_count = gr.Number(
	label="Instruction Count",
	value=5000,
	minimum=100,
	info="Number of instructions to generate"
	)
	max_iter = gr.Number(
	label="Max Iterations",
	value=3,
	minimum=1,
	info="Number of training iterations"
	)
	train_btn = gr.Button("Start Training")

	with gr.Column():
	status_output = gr.Textbox(
	label="Status",
	value="Ready to start training...",
	interactive=False
	)
	refresh_btn = gr.Button("Refresh Status")

	train_btn.click(
	fn=start_training,
	inputs=[model_path, instruct_count, max_iter],
	outputs=status_output
	)

	refresh_btn.click(
	fn=check_training_status,
	inputs=None,
	outputs=status_output
	)

	if __name__ == "__main__":
	# Wait for services to be ready
	print("Waiting for services to start...")
	while True:
	services_ok, message = check_services()
	if services_ok:
	break
	print(message)
	time.sleep(5)

	print("All services are running, starting web interface...")
	iface.launch(server_name="0.0.0.0", server_port=7860)