Spaces:
Runtime error
Runtime error
File size: 2,842 Bytes
0047098 18f6aaf efa99ab b48da00 0047098 efa99ab 418756d baed359 29c8cd3 418756d efa99ab 418756d efa99ab b1cf6ea fb6b355 8976123 efa99ab 77bfe0b efa99ab 77bfe0b efa99ab a50536c efa99ab 77bfe0b efa99ab 77bfe0b 85c01a5 77bfe0b 53f8187 77bfe0b 418756d 77bfe0b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
import gradio as gr
import random
import time
import os
from transformers import AutoTokenizer
import transformers
import torch
from huggingface_hub import InferenceClient
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
#endpoint_url = os.getenv('url')
hf_token = os.getenv('hf_token')
from huggingface_hub import login
login(token = hf_token)
# # Streaming Client
# client = InferenceClient(endpoint_url, token=hf_token)
# gen_kwargs = dict(
# max_new_tokens=1024,
# top_k=50,
# top_p=0.9,
# temperature=0.5,
# repetition_penalty=1.2, #1.02
# stop= ["\nUser:", "<|endoftext|>", "</s>"],
# )
from transformers import AutoTokenizer
import transformers
import torch
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B")
pipeline = transformers.pipeline(
"text-generation",
model = "meta-llama/Llama-3.2-1B",
#model="traversaal-llm-regional-languages/Unsloth_Urdu_Llama3_1_FP16_PF100",
torch_dtype=torch.float16,
device_map="auto"
)
def predict(prompt):
chat_prompt = f"""
### Instruction:
You are a chatbot. Provide answers with your best knowledge. Don't say you don't know unless you really don't
### Input:
{prompt}
### Response:
""
"""
sequences = pipeline(
chat_prompt,
do_sample=True,
temperature=0.2,
top_p=0.9,
num_return_sequences=1,
eos_token_id=tokenizer.eos_token_id,
max_length=100,
)
response = ''
for seq in sequences:
response += seq['generated_text']
return response
# def generate_text(prompt):
# """Generates text using the Hugging Face Inference API."""
# chat_prompt = f"""
# ### Instruction:
# You are a chatbot. Chat in Urdu. Provide answers with your best knowledge. Don't say you don't know unless you really don't
# ### Input:
# {prompt}
# ### Response:
# ""
# """
# stream = client.text_generation(chat_prompt, stream=True, details=True, **gen_kwargs)
# generated_text = ""
# for r in stream:
# if r.token.special:
# continue
# if r.token.text in gen_kwargs["stop"]:
# break
# generated_text += r.token.text
# yield generated_text
iface = gr.Interface(
fn=predict,
inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
examples = ['میں کراچی جانا چاہتا ہوں، وہاں کے کچھ بہترین مقامات کون سے ہیں؟','amazing food locations in Singapore','best activities in London'],
outputs="text",
title="Urdu Chatbot- Powered by traversaal-urdu-llama-3.1-8b",
description="Ask me anything in Urdu!",
)
iface.launch()
|