Spaces:
Runtime error
Runtime error
traversaal-ai
commited on
Commit
•
efa99ab
1
Parent(s):
0e1b82c
Update run.py
Browse files
run.py
CHANGED
@@ -2,30 +2,56 @@ import gradio as gr
|
|
2 |
import random
|
3 |
import time
|
4 |
import os
|
|
|
|
|
|
|
5 |
from huggingface_hub import InferenceClient
|
6 |
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
hf_token = os.getenv('hf_token')
|
10 |
|
11 |
-
# Streaming Client
|
12 |
-
client = InferenceClient(endpoint_url, token=hf_token)
|
13 |
|
14 |
-
gen_kwargs = dict(
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
)
|
22 |
|
23 |
-
|
24 |
-
|
|
|
25 |
chat_prompt = f"""
|
26 |
|
27 |
### Instruction:
|
28 |
-
You are a chatbot.
|
29 |
|
30 |
### Input:
|
31 |
{prompt}
|
@@ -33,18 +59,46 @@ You are a chatbot. Chat in Urdu. Provide answers with your best knowledge. Don't
|
|
33 |
### Response:
|
34 |
""
|
35 |
"""
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
iface = gr.Interface(
|
47 |
-
fn=
|
48 |
inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
|
49 |
examples = ['میں کراچی جانا چاہتا ہوں، وہاں کے کچھ بہترین مقامات کون سے ہیں؟','amazing food locations in Singapore','best activities in London'],
|
50 |
outputs="text",
|
|
|
2 |
import random
|
3 |
import time
|
4 |
import os
|
5 |
+
from transformers import AutoTokenizer
|
6 |
+
import transformers
|
7 |
+
import torch
|
8 |
from huggingface_hub import InferenceClient
|
9 |
|
10 |
+
|
11 |
+
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
|
12 |
+
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
|
13 |
+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
|
14 |
+
|
15 |
+
|
16 |
+
#endpoint_url = os.getenv('url')
|
17 |
|
18 |
hf_token = os.getenv('hf_token')
|
19 |
|
20 |
+
# # Streaming Client
|
21 |
+
# client = InferenceClient(endpoint_url, token=hf_token)
|
22 |
|
23 |
+
# gen_kwargs = dict(
|
24 |
+
# max_new_tokens=1024,
|
25 |
+
# top_k=50,
|
26 |
+
# top_p=0.9,
|
27 |
+
# temperature=0.5,
|
28 |
+
# repetition_penalty=1.2, #1.02
|
29 |
+
# stop= ["\nUser:", "<|endoftext|>", "</s>"],
|
30 |
+
# )
|
31 |
+
|
32 |
+
|
33 |
+
from transformers import AutoTokenizer
|
34 |
+
import transformers
|
35 |
+
import torch
|
36 |
+
|
37 |
+
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B")
|
38 |
+
|
39 |
+
pipeline = transformers.pipeline(
|
40 |
+
"text-generation",
|
41 |
+
model = "meta-llama/Llama-3.2-1B",
|
42 |
+
#model="traversaal-llm-regional-languages/Unsloth_Urdu_Llama3_1_FP16_PF100",
|
43 |
+
torch_dtype=torch.float16,
|
44 |
+
device_map="auto",
|
45 |
+
load_in_4bit=True
|
46 |
)
|
47 |
|
48 |
+
|
49 |
+
|
50 |
+
def predict(prompt):
|
51 |
chat_prompt = f"""
|
52 |
|
53 |
### Instruction:
|
54 |
+
You are a chatbot. Provide answers with your best knowledge. Don't say you don't know unless you really don't
|
55 |
|
56 |
### Input:
|
57 |
{prompt}
|
|
|
59 |
### Response:
|
60 |
""
|
61 |
"""
|
62 |
+
sequences = pipeline(
|
63 |
+
prompt,
|
64 |
+
do_sample=True,
|
65 |
+
temperature=0.2,
|
66 |
+
top_p=0.9,
|
67 |
+
num_return_sequences=1,
|
68 |
+
eos_token_id=tokenizer.eos_token_id,
|
69 |
+
max_length=100,
|
70 |
+
)
|
71 |
+
response = ''
|
72 |
+
for seq in sequences:
|
73 |
+
response += seq['generated_text']
|
74 |
+
|
75 |
+
|
76 |
+
return response
|
77 |
+
# def generate_text(prompt):
|
78 |
+
# """Generates text using the Hugging Face Inference API."""
|
79 |
+
# chat_prompt = f"""
|
80 |
+
|
81 |
+
# ### Instruction:
|
82 |
+
# You are a chatbot. Chat in Urdu. Provide answers with your best knowledge. Don't say you don't know unless you really don't
|
83 |
+
|
84 |
+
# ### Input:
|
85 |
+
# {prompt}
|
86 |
+
|
87 |
+
# ### Response:
|
88 |
+
# ""
|
89 |
+
# """
|
90 |
+
# stream = client.text_generation(chat_prompt, stream=True, details=True, **gen_kwargs)
|
91 |
+
# generated_text = ""
|
92 |
+
# for r in stream:
|
93 |
+
# if r.token.special:
|
94 |
+
# continue
|
95 |
+
# if r.token.text in gen_kwargs["stop"]:
|
96 |
+
# break
|
97 |
+
# generated_text += r.token.text
|
98 |
+
# yield generated_text
|
99 |
|
100 |
iface = gr.Interface(
|
101 |
+
fn=predict,
|
102 |
inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
|
103 |
examples = ['میں کراچی جانا چاہتا ہوں، وہاں کے کچھ بہترین مقامات کون سے ہیں؟','amazing food locations in Singapore','best activities in London'],
|
104 |
outputs="text",
|