import time from openai import AsyncOpenAI import chainlit as cl client = AsyncOpenAI( api_key="ollama", base_url="https://mahadih534-own-ollama-api-server.hf.space/v1/" ) @cl.on_message async def on_message(msg: cl.Message): start = time.time() stream = await client.chat.completions.create( model="deepseek-r1:1.5b", messages=[ {"role": "system", "content": "You are an helpful assistant"}, *cl.chat_context.to_openai() ], stream=True ) thinking = False # Streaming the thinking async with cl.Step(name="Thinking") as thinking_step: final_answer = cl.Message(content="") async for chunk in stream: delta = chunk.choices[0].delta if delta.content == "": thinking = True continue if delta.content == "": thinking = False thought_for = round(time.time() - start) thinking_step.name = f"Thought for {thought_for}s" await thinking_step.update() continue if thinking: await thinking_step.stream_token(delta.content) else: await final_answer.stream_token(delta.content) await final_answer.send()