beyoru commited on
Commit
4f81850
·
verified ·
1 Parent(s): bc6e181

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -7,7 +7,7 @@ from huggingface_hub import InferenceClient
7
  import os
8
 
9
  # Initialize Qwen client
10
- qwen_client = InferenceClient("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")
11
 
12
  # Model and ONNX setup
13
  HG_MODEL = "livekit/turn-detector"
@@ -84,6 +84,7 @@ def respond(
84
  # Generate raw response without any processing
85
  full_response = ""
86
  stream = qwen_client.chat.completions.create(
 
87
  messages=messages,
88
  max_tokens=max_tokens,
89
  stream=True,
@@ -93,11 +94,10 @@ def respond(
93
  )
94
 
95
  for chunk in stream:
96
- print(chunk.choices[0].delta.content)
97
  yield chunk.choices[0].delta.content # Send raw unmodified response to Gradio
98
 
99
  # This will match both console and Gradio output
100
- print(f"Final response: {full_response}")
101
  # Create Gradio interface
102
  demo = gr.ChatInterface(
103
  respond,
 
7
  import os
8
 
9
  # Initialize Qwen client
10
+ qwen_client = InferenceClient(os.environ.get("HF_TOKEN"))
11
 
12
  # Model and ONNX setup
13
  HG_MODEL = "livekit/turn-detector"
 
84
  # Generate raw response without any processing
85
  full_response = ""
86
  stream = qwen_client.chat.completions.create(
87
+ model="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
88
  messages=messages,
89
  max_tokens=max_tokens,
90
  stream=True,
 
94
  )
95
 
96
  for chunk in stream:
97
+ full_response += chunk.choices[0].delta.content
98
  yield chunk.choices[0].delta.content # Send raw unmodified response to Gradio
99
 
100
  # This will match both console and Gradio output
 
101
  # Create Gradio interface
102
  demo = gr.ChatInterface(
103
  respond,