import spaces import gradio as gr import torch import subprocess import numpy as np import requests # Function to start the ochat server @spaces.GPU def start_ochat_server(): print(f"Is CUDA available: {torch.cuda.is_available()}") print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}") command = [ "python", "-m", "ochat.serving.openai_api_server", "--model", "openchat/openchat_3.5" ] # Start the server in a separate process try: subprocess.Popen(command) return "ochat server started successfully" except Exception as e: return f"Failed to start ochat server: {e}" start_ochat_server() # Function to send a message to the ochat server and get a response def chat_with_ochat(message): url = "https://macadeliccc-openchat-3-5-chatbot.hf.space:18888/v1/chat/completions" headers = {"Content-Type": "application/json"} data = { "model": "openchat_3.5", "messages": [{"role": "user", "content": message}] } try: response = requests.post(url, json=data, headers=headers) if response.status_code == 200: return response.json()['choices'][0]['message']['content'] else: return f"Error: Server responded with status code {response.status_code}" except requests.RequestException as e: return f"Error: {e}" chat_history = [] # Create a Gradio Blocks interface with gr.Blocks(theme=gr.themes.Soft()) as app: gr.Markdown("## vLLM OpenChat-3.5 Interface") gr.Markdown("Run on your own machine using this command: ```docker run -it -p 7860:7860 --platform=linux/amd64 --gpus all \ registry.hf.space/macadeliccc-openchat-3-5-chatbot:latest python app.py```") with gr.Row(): input_text = gr.Textbox(label="Your Message", placeholder="Type your message here") submit_button = gr.Button("Send") output_chat = gr.Chatbot() chat_history = [] def update_output(input_message): global chat_history server_response = chat_with_ochat(input_message) # Server's response chat_history.append((input_message, server_response)) return chat_history submit_button.click(fn=update_output, inputs=[input_text], outputs=[output_chat]) app.launch()