traversaal-ai commited on
Commit
efa99ab
1 Parent(s): 0e1b82c

Update run.py

Browse files
Files changed (1) hide show
  1. run.py +77 -23
run.py CHANGED
@@ -2,30 +2,56 @@ import gradio as gr
2
  import random
3
  import time
4
  import os
 
 
 
5
  from huggingface_hub import InferenceClient
6
 
7
- endpoint_url = os.getenv('url')
 
 
 
 
 
 
8
 
9
  hf_token = os.getenv('hf_token')
10
 
11
- # Streaming Client
12
- client = InferenceClient(endpoint_url, token=hf_token)
13
 
14
- gen_kwargs = dict(
15
- max_new_tokens=1024,
16
- top_k=50,
17
- top_p=0.9,
18
- temperature=0.5,
19
- repetition_penalty=1.2, #1.02
20
- stop= ["\nUser:", "<|endoftext|>", "</s>"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  )
22
 
23
- def generate_text(prompt):
24
- """Generates text using the Hugging Face Inference API."""
 
25
  chat_prompt = f"""
26
 
27
  ### Instruction:
28
- You are a chatbot. Chat in Urdu. Provide answers with your best knowledge. Don't say you don't know unless you really don't
29
 
30
  ### Input:
31
  {prompt}
@@ -33,18 +59,46 @@ You are a chatbot. Chat in Urdu. Provide answers with your best knowledge. Don't
33
  ### Response:
34
  ""
35
  """
36
- stream = client.text_generation(chat_prompt, stream=True, details=True, **gen_kwargs)
37
- generated_text = ""
38
- for r in stream:
39
- if r.token.special:
40
- continue
41
- if r.token.text in gen_kwargs["stop"]:
42
- break
43
- generated_text += r.token.text
44
- yield generated_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  iface = gr.Interface(
47
- fn=generate_text,
48
  inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
49
  examples = ['میں کراچی جانا چاہتا ہوں، وہاں کے کچھ بہترین مقامات کون سے ہیں؟','amazing food locations in Singapore','best activities in London'],
50
  outputs="text",
 
2
  import random
3
  import time
4
  import os
5
+ from transformers import AutoTokenizer
6
+ import transformers
7
+ import torch
8
  from huggingface_hub import InferenceClient
9
 
10
+
11
+ max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
12
+ dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
13
+ load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
14
+
15
+
16
+ #endpoint_url = os.getenv('url')
17
 
18
  hf_token = os.getenv('hf_token')
19
 
20
+ # # Streaming Client
21
+ # client = InferenceClient(endpoint_url, token=hf_token)
22
 
23
+ # gen_kwargs = dict(
24
+ # max_new_tokens=1024,
25
+ # top_k=50,
26
+ # top_p=0.9,
27
+ # temperature=0.5,
28
+ # repetition_penalty=1.2, #1.02
29
+ # stop= ["\nUser:", "<|endoftext|>", "</s>"],
30
+ # )
31
+
32
+
33
+ from transformers import AutoTokenizer
34
+ import transformers
35
+ import torch
36
+
37
+ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B")
38
+
39
+ pipeline = transformers.pipeline(
40
+ "text-generation",
41
+ model = "meta-llama/Llama-3.2-1B",
42
+ #model="traversaal-llm-regional-languages/Unsloth_Urdu_Llama3_1_FP16_PF100",
43
+ torch_dtype=torch.float16,
44
+ device_map="auto",
45
+ load_in_4bit=True
46
  )
47
 
48
+
49
+
50
+ def predict(prompt):
51
  chat_prompt = f"""
52
 
53
  ### Instruction:
54
+ You are a chatbot. Provide answers with your best knowledge. Don't say you don't know unless you really don't
55
 
56
  ### Input:
57
  {prompt}
 
59
  ### Response:
60
  ""
61
  """
62
+ sequences = pipeline(
63
+ prompt,
64
+ do_sample=True,
65
+ temperature=0.2,
66
+ top_p=0.9,
67
+ num_return_sequences=1,
68
+ eos_token_id=tokenizer.eos_token_id,
69
+ max_length=100,
70
+ )
71
+ response = ''
72
+ for seq in sequences:
73
+ response += seq['generated_text']
74
+
75
+
76
+ return response
77
+ # def generate_text(prompt):
78
+ # """Generates text using the Hugging Face Inference API."""
79
+ # chat_prompt = f"""
80
+
81
+ # ### Instruction:
82
+ # You are a chatbot. Chat in Urdu. Provide answers with your best knowledge. Don't say you don't know unless you really don't
83
+
84
+ # ### Input:
85
+ # {prompt}
86
+
87
+ # ### Response:
88
+ # ""
89
+ # """
90
+ # stream = client.text_generation(chat_prompt, stream=True, details=True, **gen_kwargs)
91
+ # generated_text = ""
92
+ # for r in stream:
93
+ # if r.token.special:
94
+ # continue
95
+ # if r.token.text in gen_kwargs["stop"]:
96
+ # break
97
+ # generated_text += r.token.text
98
+ # yield generated_text
99
 
100
  iface = gr.Interface(
101
+ fn=predict,
102
  inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
103
  examples = ['میں کراچی جانا چاہتا ہوں، وہاں کے کچھ بہترین مقامات کون سے ہیں؟','amazing food locations in Singapore','best activities in London'],
104
  outputs="text",