import spaces
import gradio as gr
import torch
import subprocess
import numpy as np
import requests


# Function to start the ochat server
@spaces.GPU
def start_ochat_server():
    print(f"Is CUDA available: {torch.cuda.is_available()}")
    print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")

    command = [
        "python", "-m", "ochat.serving.openai_api_server", 
        "--model", "openchat/openchat_3.5"
    ]

    # Start the server in a separate process
    try:
        subprocess.Popen(command)
        return "ochat server started successfully"
    except Exception as e:
        return f"Failed to start ochat server: {e}"

start_ochat_server()

# Function to send a message to the ochat server and get a response
def chat_with_ochat(message):
    url = "https://macadeliccc-openchat-3-5-chatbot.hf.space:18888/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    data = {
        "model": "openchat_3.5",
        "messages": [{"role": "user", "content": message}]
    }

    try:
        response = requests.post(url, json=data, headers=headers)
        if response.status_code == 200:
            return response.json()['choices'][0]['message']['content']
        else:
            return f"Error: Server responded with status code {response.status_code}"
    except requests.RequestException as e:
        return f"Error: {e}"


chat_history = []

# Create a Gradio Blocks interface
with gr.Blocks(theme=gr.themes.Soft()) as app:
    gr.Markdown("## vLLM OpenChat-3.5 Interface")
    gr.Markdown("Run on your own machine using this command: ```docker run -it -p 7860:7860 --platform=linux/amd64 --gpus all \
	registry.hf.space/macadeliccc-openchat-3-5-chatbot:latest python app.py```")

    with gr.Row():
        input_text = gr.Textbox(label="Your Message", placeholder="Type your message here")
        submit_button = gr.Button("Send")
    output_chat = gr.Chatbot()

    chat_history = []

    def update_output(input_message):
        global chat_history
        server_response = chat_with_ochat(input_message)  # Server's response
        chat_history.append((input_message, server_response))
        return chat_history

    submit_button.click(fn=update_output, inputs=[input_text], outputs=[output_chat])

app.launch()