Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,834 Bytes
5956319 74995d7 5956319 866e7a0 a505b42 4ea1c4e 866e7a0 a1908d6 9c1d271 9568922 76789b2 5956319 9568922 5956319 a1908d6 c375c59 a1908d6 5f7b7b0 ebcc5ea 9568922 87956ea a1908d6 ebcc5ea a1908d6 3bc8972 866e7a0 87956ea 3bc8972 89bcd26 3bc8972 9c1d271 866e7a0 ef2a575 a505b42 ef2a575 ba9f5b4 3fbf6d6 9a367a8 a505b42 395f92e 46707cf 8e4674b 46707cf 4e58f83 d20a350 d2b7f91 866e7a0 4ea1c4e 866e7a0 4ea1c4e 866e7a0 4ea1c4e 0e4adfe ea66e79 8d68072 9a9a69d c375c59 0e4adfe a505b42 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import spaces
import gradio as gr
import torch
import subprocess
import aiohttp
from gradio import State
import asyncio
import json
import asyncio
import threading
# Function to start the ochat server
@spaces.GPU
def start_ochat_server():
print(f"Is CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
command = [
"python", "-m", "ochat.serving.openai_api_server",
"--model", "openchat/openchat_3.5"
]
# Start the server in a separate process
try:
subprocess.Popen(command)
return "ochat server started successfully"
except Exception as e:
return f"Failed to start ochat server: {e}"
async def monitor_server():
while True:
async with aiohttp.ClientSession() as session:
try:
async with session.get("http://localhost:18888/") as response:
if response.status == 200:
print("Server is running.")
else:
print("Server is not running. Attempting to restart...")
start_ochat_server()
except aiohttp.ClientError:
print("Server is not running. Attempting to restart...")
start_ochat_server()
await asyncio.sleep(60) # Check every 60 seconds
def run_async_monitor():
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(monitor_server())
loop.close()
start_ochat_server()
# Start the monitoring in a separate thread
thread = threading.Thread(target=run_async_monitor)
thread.start()
# Function to send a message to the ochat server and get a response
async def chat_with_ochat(message):
base_url = "http://localhost:18888"
chat_url = f"{base_url}/v1/chat/completions"
headers = {"Content-Type": "application/json"}
data = {
"model": "openchat_3.5",
"messages": [{"role": "user", "content": message}]
}
async with aiohttp.ClientSession() as session:
try:
async with session.post(chat_url, headers=headers, json=data) as response:
if response.status == 200:
response_data = await response.json()
return response_data['choices'][0]['message']['content']
else:
return f"Error: Server responded with status code {response.status}"
except aiohttp.ClientError as e:
return f"Error: {e}"
# Create a Gradio Blocks interface with session state
with gr.Blocks(theme=gr.themes.Soft()) as app:
gr.Markdown("## vLLM OpenChat-3.5 Interface")
gr.Markdown("### the vLLM server cannot handle concurrent users in spaces. If you get an error, run it on docker.")
gr.Markdown("This will run better on your own machine: ```docker run -it -p 7860:7860 --platform=linux/amd64 --gpus all \
registry.hf.space/macadeliccc-openchat-3-5-chatbot:latest python app.py```")
message = gr.Textbox(label="Your Message", placeholder="Type your message here")
chatbot = gr.Chatbot()
clear = gr.Button("Clear")
history = State([]) # Session state for chat history
async def user(message, history):
return "", history + [[message, None]]
async def bot(history):
if history and history[-1] and history[-1][0]:
user_message = history[-1][0]
bot_response = await chat_with_ochat(user_message)
history[-1][1] = bot_response # Update the last entry with the bot's response
return history
message.submit(user, [message, chatbot], [message, chatbot], queue=True).then(
bot, chatbot, chatbot
)
clear.click(lambda: None, None, chatbot, queue=False)
app.queue()
app.launch()
|