Update app.py
Browse files
app.py
CHANGED
@@ -7,7 +7,7 @@ from huggingface_hub import InferenceClient
|
|
7 |
import os
|
8 |
|
9 |
# Initialize Qwen client
|
10 |
-
qwen_client = InferenceClient(
|
11 |
|
12 |
# Model and ONNX setup
|
13 |
HG_MODEL = "livekit/turn-detector"
|
@@ -84,6 +84,7 @@ def respond(
|
|
84 |
# Generate raw response without any processing
|
85 |
full_response = ""
|
86 |
stream = qwen_client.chat.completions.create(
|
|
|
87 |
messages=messages,
|
88 |
max_tokens=max_tokens,
|
89 |
stream=True,
|
@@ -93,11 +94,10 @@ def respond(
|
|
93 |
)
|
94 |
|
95 |
for chunk in stream:
|
96 |
-
|
97 |
yield chunk.choices[0].delta.content # Send raw unmodified response to Gradio
|
98 |
|
99 |
# This will match both console and Gradio output
|
100 |
-
print(f"Final response: {full_response}")
|
101 |
# Create Gradio interface
|
102 |
demo = gr.ChatInterface(
|
103 |
respond,
|
|
|
7 |
import os
|
8 |
|
9 |
# Initialize Qwen client
|
10 |
+
qwen_client = InferenceClient(os.environ.get("HF_TOKEN"))
|
11 |
|
12 |
# Model and ONNX setup
|
13 |
HG_MODEL = "livekit/turn-detector"
|
|
|
84 |
# Generate raw response without any processing
|
85 |
full_response = ""
|
86 |
stream = qwen_client.chat.completions.create(
|
87 |
+
model="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
|
88 |
messages=messages,
|
89 |
max_tokens=max_tokens,
|
90 |
stream=True,
|
|
|
94 |
)
|
95 |
|
96 |
for chunk in stream:
|
97 |
+
full_response += chunk.choices[0].delta.content
|
98 |
yield chunk.choices[0].delta.content # Send raw unmodified response to Gradio
|
99 |
|
100 |
# This will match both console and Gradio output
|
|
|
101 |
# Create Gradio interface
|
102 |
demo = gr.ChatInterface(
|
103 |
respond,
|