import gradio as gr import random import time import os from huggingface_hub import InferenceClient endpoint_url = os.getenv('url') hf_token = os.getenv('hf_token') # Streaming Client client = InferenceClient(endpoint_url, token=hf_token) gen_kwargs = dict( max_new_tokens=1024, top_k=50, top_p=0.9, temperature=0.5, repetition_penalty=1.2, #1.02 stop= ["\nUser:", "<|endoftext|>", ""], ) def generate_text(prompt): """Generates text using the Hugging Face Inference API.""" chat_prompt = f""" System: You are urdu chatbot. Always respond in Urdu. ### Instruction: {prompt} ### Response: "" """ stream = client.text_generation(chat_prompt, stream=True, details=True, **gen_kwargs) generated_text = "" for r in stream: if r.token.special: continue if r.token.text in gen_kwargs["stop"]: break generated_text += r.token.text yield generated_text iface = gr.Interface( fn=generate_text, inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."), examples = ['میں کراچی جانا چاہتا ہوں، وہاں کے کچھ بہترین مقامات کون سے ہیں؟','amazing food locations in Singapore','best activities in London'], outputs="text", title="Urdu Chatbot- Powered by traversaal-urdu-llama-3.1-8b", description="Ask me anything in Urdu!", ) iface.launch()