urdu-llama / run.py
traversaal-ai's picture
Update run.py
a6b011a verified
raw
history blame
2.63 kB
import gradio as gr
import random
import time
import os
from transformers import AutoTokenizer
import transformers
import torch
from huggingface_hub import InferenceClient
import gradio as gr
import random
import time
import os
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
#endpoint_url = os.getenv('url')
hf_token = os.getenv('hf_token')
from huggingface_hub import login
login(token = hf_token)
from unsloth import FastLanguageModel
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "traversaal-llm-regional-languages/Unsloth_Urdu_Llama3_1_4bit_PF100",
max_seq_length = max_seq_length,
dtype = dtype,
load_in_4bit = load_in_4bit
)
FastLanguageModel.for_inference(model)
chat_prompt = """
### Instruction:
You are a chatbot. Provide answers with your best knowledge in Urdu only. Don't say you don't know unless you really don't
### Input:
{prompt}
### Response:
"""
def generate_response(query):
prompt = chat_prompt.format(prompt=query)
inputs = tokenizer([prompt], return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generation_kwargs = dict(
inputs,
streamer=streamer,
max_new_tokens=1024,
do_sample=True,
top_p=0.95,
top_k=50,
temperature=0.7,
repetition_penalty=1.2, #1.02
)
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
generated_text = ""
for new_text in streamer:
if new_text.endswith(tokenizer.eos_token):
new_text = new_text[:len(new_text) - len(tokenizer.eos_token)]
generated_text += new_text
yield generated_text
# for r in streamer:
# if r.token.special:
# continue
# generated_text += r.token.text
iface = gr.Interface(
fn=generate_response,
inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
examples = ['میں کراچی جانا چاہتا ہوں، وہاں کے کچھ بہترین مقامات کون سے ہیں؟','amazing food locations in Singapore','best activities in London'],
outputs="text",
title="Urdu Chatbot- Powered by traversaal-urdu-llama-3.1-8b",
description="Ask me anything in Urdu!",
)
iface.launch()