Spaces:
Runtime error
Runtime error
import gradio as gr | |
import random | |
import time | |
import os | |
from transformers import AutoTokenizer | |
import transformers | |
import torch | |
from huggingface_hub import InferenceClient | |
import gradio as gr | |
import random | |
import time | |
import os | |
from unsloth import FastLanguageModel | |
import torch | |
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally! # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+ | |
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False. | |
#endpoint_url = os.getenv('url') | |
hf_token = os.getenv('hf_token') | |
from huggingface_hub import login | |
login(token = hf_token) | |
from unsloth import FastLanguageModel | |
model, tokenizer = FastLanguageModel.from_pretrained( | |
model_name = "traversaal-llm-regional-languages/Unsloth_Urdu_Llama3_1_4bit_PF100", | |
max_seq_length = max_seq_length, | |
dtype = 'Auto', | |
load_in_4bit = load_in_4bit | |
) | |
FastLanguageModel.for_inference(model) | |
chat_prompt = """ | |
### Instruction: | |
You are a chatbot. Provide answers with your best knowledge in Urdu only. Don't say you don't know unless you really don't | |
### Input: | |
{prompt} | |
### Response: | |
""" | |
def generate_response(query): | |
prompt = chat_prompt.format(prompt=query) | |
inputs = tokenizer([prompt], return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu") | |
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) | |
generation_kwargs = dict( | |
inputs, | |
streamer=streamer, | |
max_new_tokens=1024, | |
do_sample=True, | |
top_p=0.95, | |
top_k=50, | |
temperature=0.7, | |
repetition_penalty=1.2, #1.02 | |
) | |
thread = Thread(target=model.generate, kwargs=generation_kwargs) | |
thread.start() | |
generated_text = "" | |
for new_text in streamer: | |
if new_text.endswith(tokenizer.eos_token): | |
new_text = new_text[:len(new_text) - len(tokenizer.eos_token)] | |
generated_text += new_text | |
yield generated_text | |
# for r in streamer: | |
# if r.token.special: | |
# continue | |
# generated_text += r.token.text | |
iface = gr.Interface( | |
fn=generate_response, | |
inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."), | |
examples = ['میں کراچی جانا چاہتا ہوں، وہاں کے کچھ بہترین مقامات کون سے ہیں؟','amazing food locations in Singapore','best activities in London'], | |
outputs="text", | |
title="Urdu Chatbot- Powered by traversaal-urdu-llama-3.1-8b", | |
description="Ask me anything in Urdu!", | |
) | |
iface.launch() | |