import gradio as gr
from huggingface_hub import InferenceClient
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig
import os
client = InferenceClient(api_key=(os.getenv('HF_TOKEN')))


base_model = AutoModelForCausalLM.from_pretrained("BeastGokul/Bio-Mistral-7B-finetuned")
base_model.resize_token_embeddings(len(tokenizer))
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
import gradio as gr
from huggingface_hub import InferenceClient

# Set up Hugging Face client
client = InferenceClient(api_key="YOUR_HF_API_KEY")

def get_response(user_query):
    # Define the request message with a biomedical role
    messages = [
        {
            "role": "user",
            "content": user_query
        }
    ]
    
    # Make a call to the model
    response = client.chat_completions.create(
        model="mistralai/Mistral-7B-Instruct-v0.3",
        messages=messages,
        max_tokens=500
    )

    # Collect and return the output
    reply = "".join(chunk.choices[0].delta.content for chunk in response)
    return reply

# Define the UI with examples
example_queries = [
    "What are the symptoms of anemia?",
    "Explain the genetic basis of cystic fibrosis.",
    "What are the latest treatments for Alzheimer's disease?",
    "How does insulin affect blood sugar levels?",
    "Can you summarize recent advances in cancer immunotherapy?"
]

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# Biomedical Assistant")
    user_input = gr.Textbox(placeholder="Enter your biomedical query...", label="Your Query")
    output_text = gr.Textbox(label="Response")
    example = gr.Examples(examples=example_queries, inputs=user_input)
    
    submit_btn = gr.Button("Get Response")
    submit_btn.click(fn=get_response, inputs=user_input, outputs=output_text)

demo.launch()