Spaces:
Runtime error
Runtime error
import gradio as gr | |
import random | |
import time | |
import os | |
from transformers import AutoTokenizer | |
import transformers | |
import torch | |
from huggingface_hub import InferenceClient | |
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally! | |
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+ | |
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False. | |
#endpoint_url = os.getenv('url') | |
hf_token = os.getenv('hf_token') | |
from huggingface_hub import login | |
login(token = hf_token) | |
# # Streaming Client | |
# client = InferenceClient(endpoint_url, token=hf_token) | |
# gen_kwargs = dict( | |
# max_new_tokens=1024, | |
# top_k=50, | |
# top_p=0.9, | |
# temperature=0.5, | |
# repetition_penalty=1.2, #1.02 | |
# stop= ["\nUser:", "<|endoftext|>", "</s>"], | |
# ) | |
from transformers import AutoTokenizer | |
import transformers | |
import torch | |
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B") | |
pipeline = transformers.pipeline( | |
"text-generation", | |
model = "meta-llama/Llama-3.2-1B", | |
#model="traversaal-llm-regional-languages/Unsloth_Urdu_Llama3_1_FP16_PF100", | |
torch_dtype=torch.float16, | |
device_map="auto" | |
) | |
def predict(prompt): | |
chat_prompt = f""" | |
### Instruction: | |
You are a chatbot. Provide answers with your best knowledge. Don't say you don't know unless you really don't | |
### Input: | |
{prompt} | |
### Response: | |
"" | |
""" | |
sequences = pipeline( | |
chat_prompt, | |
do_sample=True, | |
temperature=0.2, | |
top_p=0.9, | |
num_return_sequences=1, | |
eos_token_id=tokenizer.eos_token_id, | |
max_length=100, | |
) | |
response = '' | |
for seq in sequences: | |
response += seq['generated_text'] | |
return response | |
# def generate_text(prompt): | |
# """Generates text using the Hugging Face Inference API.""" | |
# chat_prompt = f""" | |
# ### Instruction: | |
# You are a chatbot. Chat in Urdu. Provide answers with your best knowledge. Don't say you don't know unless you really don't | |
# ### Input: | |
# {prompt} | |
# ### Response: | |
# "" | |
# """ | |
# stream = client.text_generation(chat_prompt, stream=True, details=True, **gen_kwargs) | |
# generated_text = "" | |
# for r in stream: | |
# if r.token.special: | |
# continue | |
# if r.token.text in gen_kwargs["stop"]: | |
# break | |
# generated_text += r.token.text | |
# yield generated_text | |
iface = gr.Interface( | |
fn=predict, | |
inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."), | |
examples = ['میں کراچی جانا چاہتا ہوں، وہاں کے کچھ بہترین مقامات کون سے ہیں؟','amazing food locations in Singapore','best activities in London'], | |
outputs="text", | |
title="Urdu Chatbot- Powered by traversaal-urdu-llama-3.1-8b", | |
description="Ask me anything in Urdu!", | |
) | |
iface.launch() | |