import gradio as gr import random import time import os from transformers import AutoTokenizer import transformers import torch from huggingface_hub import InferenceClient import gradio as gr import random import time import os from unsloth import FastLanguageModel import torch max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally! # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+ load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False. #endpoint_url = os.getenv('url') hf_token = os.getenv('hf_token') from huggingface_hub import login login(token = hf_token) from unsloth import FastLanguageModel model, tokenizer = FastLanguageModel.from_pretrained( model_name = "traversaal-llm-regional-languages/Unsloth_Urdu_Llama3_1_4bit_PF100", max_seq_length = max_seq_length, dtype = 'Auto', load_in_4bit = load_in_4bit ) FastLanguageModel.for_inference(model) chat_prompt = """ ### Instruction: You are a chatbot. Provide answers with your best knowledge in Urdu only. Don't say you don't know unless you really don't ### Input: {prompt} ### Response: """ def generate_response(query): prompt = chat_prompt.format(prompt=query) inputs = tokenizer([prompt], return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu") streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) generation_kwargs = dict( inputs, streamer=streamer, max_new_tokens=1024, do_sample=True, top_p=0.95, top_k=50, temperature=0.7, repetition_penalty=1.2, #1.02 ) thread = Thread(target=model.generate, kwargs=generation_kwargs) thread.start() generated_text = "" for new_text in streamer: if new_text.endswith(tokenizer.eos_token): new_text = new_text[:len(new_text) - len(tokenizer.eos_token)] generated_text += new_text yield generated_text # for r in streamer: # if r.token.special: # continue # generated_text += r.token.text iface = gr.Interface( fn=generate_response, inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."), examples = ['میں کراچی جانا چاہتا ہوں، وہاں کے کچھ بہترین مقامات کون سے ہیں؟','amazing food locations in Singapore','best activities in London'], outputs="text", title="Urdu Chatbot- Powered by traversaal-urdu-llama-3.1-8b", description="Ask me anything in Urdu!", ) iface.launch()