| |
|
| | from unsloth import FastVisionModel
|
| | import torch
|
| | import gradio as gr
|
| |
|
| |
|
| | system_message = (
|
| | "You are Ana-bot. Your role is to detect drones in the image. "
|
| | "Describe the drone (speed, color, etc.) and return its bounding box.\n\n"
|
| | )
|
| |
|
| | model, tokenizer = FastVisionModel.from_pretrained(
|
| | "tetttssts/llama_adapter",
|
| | load_in_4bit=True,
|
| | use_gradient_checkpointing="unsloth"
|
| | )
|
| |
|
| |
|
| | def infer(image, query):
|
| | try:
|
| | messages = [{
|
| | "role": "user",
|
| | "content": [
|
| | {"type": "image", "image": image},
|
| | {"type": "text", "text": system_message + query}
|
| | ]
|
| | }]
|
| |
|
| | input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
|
| |
|
| | inputs = tokenizer(
|
| | image=image,
|
| | text=input_text,
|
| | add_special_tokens=False,
|
| | return_tensors="pt"
|
| | ).to("cuda")
|
| |
|
| | output = model.generate(
|
| | **inputs,
|
| | max_new_tokens=128,
|
| | use_cache=True,
|
| | temperature=1.5,
|
| | min_p=0.1
|
| | )
|
| |
|
| | decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)
|
| | return decoded_output
|
| |
|
| | except Exception as e:
|
| | return f"Error during inference: {str(e)}"
|
| |
|
| |
|
| | interface = gr.Interface(
|
| | fn=infer,
|
| | inputs=[
|
| | gr.Image(type="pil", label="Upload Image"),
|
| | gr.Textbox(label="Enter your query")
|
| | ],
|
| | outputs=gr.Textbox(label="Model Response"),
|
| | title="Drone Detector - Ana-bot",
|
| | description="Upload an image and ask the bot to describe drones and detect them."
|
| | )
|
| |
|
| |
|
| | if __name__ == "__main__":
|
| | interface.launch()
|
| |
|