Update app.py
Browse files
app.py
CHANGED
@@ -62,8 +62,8 @@ def calculate_eou(chat_ctx, session):
|
|
62 |
def respond(
|
63 |
message,
|
64 |
history: list[tuple[str, str]],
|
65 |
-
max_tokens=
|
66 |
-
temperature=0.
|
67 |
top_p=0.95,
|
68 |
):
|
69 |
messages = [{"role": "system", "content": os.environ.get("CHARACTER_DESC", "You are a helpful assistant.")}]
|
@@ -81,22 +81,23 @@ def respond(
|
|
81 |
yield "[Wait... Keep typing...]"
|
82 |
return
|
83 |
|
84 |
-
# Generate response
|
85 |
-
|
86 |
-
|
87 |
-
for chunk in qwen_client.chat.completions.create(
|
88 |
messages=messages,
|
89 |
max_tokens=max_tokens,
|
90 |
stream=True,
|
91 |
temperature=temperature,
|
92 |
top_p=top_p,
|
93 |
-
)
|
94 |
-
token = chunk.choices[0].delta.content or ""
|
95 |
-
accumulated_response += token
|
96 |
-
yield accumulated_response # Yield accumulated response for live updates
|
97 |
|
98 |
-
|
|
|
|
|
|
|
99 |
|
|
|
|
|
100 |
# Create Gradio interface
|
101 |
demo = gr.ChatInterface(
|
102 |
respond,
|
|
|
62 |
def respond(
|
63 |
message,
|
64 |
history: list[tuple[str, str]],
|
65 |
+
max_tokens=1024,
|
66 |
+
temperature=0.6,
|
67 |
top_p=0.95,
|
68 |
):
|
69 |
messages = [{"role": "system", "content": os.environ.get("CHARACTER_DESC", "You are a helpful assistant.")}]
|
|
|
81 |
yield "[Wait... Keep typing...]"
|
82 |
return
|
83 |
|
84 |
+
# Generate raw response without any processing
|
85 |
+
full_response = ""
|
86 |
+
stream = qwen_client.chat.completions.create(
|
|
|
87 |
messages=messages,
|
88 |
max_tokens=max_tokens,
|
89 |
stream=True,
|
90 |
temperature=temperature,
|
91 |
top_p=top_p,
|
92 |
+
)
|
|
|
|
|
|
|
93 |
|
94 |
+
for chunk in stream:
|
95 |
+
token = chunk.choices[0].delta.content or ""
|
96 |
+
full_response += token
|
97 |
+
yield full_response # Send raw unmodified response to Gradio
|
98 |
|
99 |
+
# This will match both console and Gradio output
|
100 |
+
print(f"Final response: {full_response}")
|
101 |
# Create Gradio interface
|
102 |
demo = gr.ChatInterface(
|
103 |
respond,
|