import gradio as gr
from huggingface_hub import InferenceClient
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig
import os
client = InferenceClient(api_key=(os.getenv('HF_TOKEN')))


def get_biomedical_response(query):
    messages = [
        {
            "role": "system",
            "content": "You are a biomedical assistant. Please provide a concise and accurate response."
        },
        {
            "role": "user",
            "content": query
        }
    ]
    # Make the call to the model
    response = client.chat.completions.create(
        model="mistralai/Mistral-7B-Instruct-v0.3",
        messages=messages,
        max_tokens=100
    )
    return response.choices[0].message["content"]
example_queries = [
    "What are the symptoms of anemia?",
    "Explain the genetic basis of cystic fibrosis.",
    "What are the latest treatments for Alzheimer's disease?",
    "How does insulin affect blood sugar levels?",
    "Can you summarize recent advances in cancer immunotherapy?"
]
# Set up the Gradio UI with a single response box
with gr.Blocks() as demo:
    gr.Markdown("## Biomedical Assistant")
    
    query_input = gr.Textbox(placeholder="Enter your biomedical question...", label="Your Query")
    response_output = gr.Textbox(label="Response", interactive=False)

    gr.Examples(
        examples=example_queries,
        inputs=query_input
    )
    submit_button = gr.Button("Submit") 
    submit_button.click(get_biomedical_response, query_input, response_output)

demo.launch()