Ibrahimarain commited on
Commit
ac1af52
·
1 Parent(s): 319bf3d

added local.py

Browse files
Files changed (1) hide show
  1. app.py +189 -4
app.py CHANGED
@@ -1,7 +1,192 @@
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
1
+ import llama_cpp
2
+ import llama_cpp.llama_tokenizer
3
+ from llama_cpp import Llama
4
+
5
  import gradio as gr
6
+ from loguru import logger
7
+ import psutil
8
+ from ctransformers import AutoModelForCausalLM,AutoTokenizer
9
+
10
+
11
+ prompt_template = """[INST] <<SYS>>
12
+ You are a helpful assistant for a crowdfunding platform called GiveSendGo. Your goal is to gather essential information for campaign and generate a title and sample pitch of atleast 1000 words for the campaign.
13
+ <</SYS>>
14
+
15
+ {question} [/INST]
16
+ """
17
+
18
+ model_loc = "models/llama-2-13b-chat.Q5_K_M.gguf"
19
+
20
+ # llama = AutoModelForCausalLM.from_pretrained(
21
+ # model_loc,
22
+ # model_type="llama",
23
+ # context_length=4096,
24
+ # max_new_tokens=2048,
25
+ # hf=True
26
+ # # threads=cpu_count,
27
+ # )
28
+
29
+
30
+ # llama = llama_cpp.Llama.from_pretrained(
31
+ # #repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
32
+ # #filename="*q8_0.gguf",
33
+ # mode_path=model_loc,
34
+ # model_type="llama",
35
+ # context_length=4096,
36
+ # max_new_tokens=2048,
37
+ # filename="llama-2-13b-chat.Q5_K_M.gguf",
38
+ # tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"),
39
+ # verbose=False
40
+ # )
41
+
42
+ llama = Llama(
43
+ model_path=model_loc,
44
+ max_tokens=4096,
45
+ n_ctx=4096,
46
+ verbose=False,
47
+ )
48
+
49
+ _ = [elm for elm in prompt_template.splitlines() if elm.strip()]
50
+ stop_string = [elm.split(":")[0] + ":" for elm in _][-2]
51
+
52
+ logger.debug(f"{stop_string=}")
53
+
54
+ _ = psutil.cpu_count(logical=False) - 1
55
+ cpu_count: int = int(_) if _ else 1
56
+ logger.debug(f"{cpu_count=}")
57
+
58
+
59
+
60
+ model = "gpt-3.5-turbo"
61
+
62
+ def predict(message, history):
63
+ messages = []
64
+ prompt = prompt_template.format(question=message)
65
+
66
+ for user_message, assistant_message in history:
67
+ messages.append({"role": "system", "content": prompt},)
68
+ messages.append({"role": "user", "content": user_message})
69
+ messages.append({"role": "assistant", "content": assistant_message})
70
+
71
+ messages.append({"role": "user", "content": message})
72
+
73
+ response = llama.create_chat_completion_openai_v1(
74
+ model=model,
75
+ messages=messages,
76
+ response_format={
77
+ "type": "json_object",
78
+ "schema": {
79
+ "type": "object",
80
+ "properties": {"title": {"type": "string"},
81
+ #"description": {"type": "string"},
82
+ "sample_pitch": {"type": "string"},
83
+ "amount": {"type": "string"},
84
+ "location": {"type": "string"}},
85
+ "required": ["title","sample_pitch","amount","location"], #description
86
+ },
87
+ },
88
+ temperature=0.7,
89
+ stream=True
90
+ )
91
+
92
+ text = ""
93
+ for chunk in response:
94
+ content = chunk.choices[0].delta.content
95
+ if content:
96
+ text += content
97
+ yield text
98
+
99
+
100
+ def generate(message):
101
+
102
+ try:
103
+ messages = []
104
+ prompt = prompt_template.format(question=message)
105
+
106
+ #for user_message, assistant_message in history:
107
+ messages.append({"role": "system", "content": prompt},)
108
+ #messages.append({"role": "user", "content": user_message})
109
+ #messages.append({"role": "assistant", "content": assistant_message})
110
+
111
+ messages.append({"role": "user", "content": message})
112
+
113
+ response = llama.create_chat_completion_openai_v1(
114
+ model=model,
115
+ messages=messages,
116
+ response_format={
117
+ "type": "json_object",
118
+ "schema": {
119
+ "type": "object",
120
+ "properties": {"title": {"type": "string"},
121
+ #"description": {"type": "string"},
122
+ "sample_pitch": {"type": "string"},
123
+ "amount": {"type": "string"},
124
+ "location": {"type": "string"}},
125
+ "required": ["title","sample_pitch","amount","location"], #description
126
+ },
127
+ },
128
+ temperature=0.7,
129
+ stream=False)
130
+
131
+ # text = ""
132
+ # for chunk in response:
133
+ # content = chunk.choices[0].delta.content
134
+ # if content:
135
+ # text += content
136
+ # logger.debug(f"api: {content=}")
137
+
138
+ # yield text
139
+
140
+ logger.debug(f"{response}")
141
+
142
+ return response.choices[0].delta.content
143
+
144
+
145
+ except Exception as exc:
146
+ logger.error(exc)
147
+ response = f"{exc=}"
148
+
149
+
150
+ def predict_api(message):
151
+ logger.debug(f"{message=}")
152
+ text = generate(message)
153
+ logger.debug(f"text::{text=}")
154
+
155
+ return f"json: {text=}"
156
+
157
+
158
+
159
+ js = """function () {
160
+ gradioURL = window.location.href
161
+ if (!gradioURL.endsWith('?__theme=dark')) {
162
+ window.location.replace(gradioURL + '?__theme=dark');
163
+ }
164
+ }"""
165
+
166
+ css = """
167
+ footer {
168
+ visibility: hidden;
169
+ }
170
+ full-height {
171
+ height: 100%;
172
+ }
173
+ """
174
+
175
+ with gr.Blocks(theme=gr.themes.Soft(), js=js, css=css) as demo:
176
+ gr.ChatInterface(predict, examples=["What is the capital of France?", "Who was the first person on the moon?"])
177
+
178
+ with gr.Accordion("For Chat/Translation API", open=False, visible=False):
179
+ input_text = gr.Text()
180
+ api_btn = gr.Button("Go", variant="primary")
181
+ out_text = gr.Text()
182
+
183
+ api_btn.click(
184
+ predict_api,
185
+ input_text,
186
+ out_text,
187
+ api_name="api",
188
+ )
189
 
 
 
190
 
191
+ if __name__ == "__main__":
192
+ demo.queue().launch(debug=True, share=True)