xp3857 commited on
Commit
0ee5f31
1 Parent(s): c36c0cd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +319 -0
app.py ADDED
@@ -0,0 +1,319 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import os
4
+ from huggingface_hub import InferenceClient,HfApi
5
+ import random
6
+ import json
7
+ import datetime
8
+ import uuid
9
+ import yt_dlp
10
+ import cv2
11
+ import whisper
12
+
13
+ from agent import (
14
+ PREFIX,
15
+ COMPRESS_DATA_PROMPT,
16
+ COMPRESS_DATA_PROMPT_SMALL,
17
+ LOG_PROMPT,
18
+ LOG_RESPONSE,
19
+ )
20
+ client = InferenceClient(
21
+ "mistralai/Mixtral-8x7B-Instruct-v0.1"
22
+ )
23
+ reponame="Omnibus/tmp"
24
+ save_data=f'https://huggingface.co/datasets/{reponame}/raw/main/'
25
+ #token_self = os.environ['HF_TOKEN']
26
+ #api=HfApi(token=token_self)
27
+
28
+ sizes = list(whisper._MODELS.keys())
29
+ langs = ["none"] + sorted(list(whisper.tokenizer.LANGUAGES.values()))
30
+ current_size = "base"
31
+ loaded_model = whisper.load_model(current_size)
32
+
33
+ VERBOSE = True
34
+ MAX_HISTORY = 100
35
+ MAX_DATA = 20000
36
+
37
+ def dl(inp,img):
38
+ uid=uuid.uuid4()
39
+ fps="Error"
40
+ out = None
41
+ out_file=[]
42
+ if img == None and inp !="":
43
+ try:
44
+ inp_out=inp.replace("https://","")
45
+ inp_out=inp_out.replace("/","_").replace(".","_").replace("=","_").replace("?","_")
46
+ if "twitter" in inp:
47
+ os.system(f'yt-dlp "{inp}" --extractor-arg "twitter:api=syndication" --trim-filenames 160 -o "{uid}/{inp_out}.mp4" -S res,mp4 --recode mp4')
48
+ else:
49
+ os.system(f'yt-dlp "{inp}" --trim-filenames 160 -o "{uid}/{inp_out}.mp4" -S res,mp4 --recode mp4')
50
+
51
+ out = f"{uid}/{inp_out}.mp4"
52
+ capture = cv2.VideoCapture(out)
53
+ fps = capture.get(cv2.CAP_PROP_FPS)
54
+ capture.release()
55
+ except Exception as e:
56
+ print(e)
57
+ out = None
58
+ elif img !=None and inp == "":
59
+ capture = cv2.VideoCapture(img)
60
+ fps = capture.get(cv2.CAP_PROP_FPS)
61
+ capture.release()
62
+ out = f"{img}"
63
+ return out
64
+
65
+ def csv(segments):
66
+ output = ""
67
+ for segment in segments:
68
+ output += f"{segment['start']},{segment['end']},{segment['text']}\n"
69
+ return output
70
+ def transcribe(path,lang,size):
71
+ yield (None,[("","Transcribing Video...")])
72
+ #if size != current_size:
73
+ loaded_model = whisper.load_model(size)
74
+ current_size = size
75
+ results = loaded_model.transcribe(path, language=lang)
76
+ subs = ".csv"
77
+ if subs == "None":
78
+ yield results["text"],[("","Transcription Complete...")]
79
+ elif subs == ".csv":
80
+ yield csv(results["segments"]),[("","Transcription Complete...")]
81
+
82
+
83
+ def format_prompt(message, history):
84
+ prompt = "<s>"
85
+ for user_prompt, bot_response in history:
86
+ prompt += f"[INST] {user_prompt} [/INST]"
87
+ prompt += f" {bot_response}</s> "
88
+ prompt += f"[INST] {message} [/INST]"
89
+ return prompt
90
+
91
+
92
+
93
+ def run_gpt(
94
+ prompt_template,
95
+ stop_tokens,
96
+ max_tokens,
97
+ seed,
98
+ **prompt_kwargs,
99
+ ):
100
+ print(seed)
101
+ timestamp=datetime.datetime.now()
102
+
103
+ generate_kwargs = dict(
104
+ temperature=0.9,
105
+ max_new_tokens=max_tokens,
106
+ top_p=0.95,
107
+ repetition_penalty=1.0,
108
+ do_sample=True,
109
+ seed=seed,
110
+ )
111
+
112
+ content = PREFIX.format(
113
+ timestamp=timestamp,
114
+ purpose="Compile the provided data and complete the users task"
115
+ ) + prompt_template.format(**prompt_kwargs)
116
+ if VERBOSE:
117
+ print(LOG_PROMPT.format(content))
118
+
119
+
120
+ #formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
121
+ #formatted_prompt = format_prompt(f'{content}', history)
122
+
123
+ stream = client.text_generation(content, **generate_kwargs, stream=True, details=True, return_full_text=False)
124
+ resp = ""
125
+ for response in stream:
126
+ resp += response.token.text
127
+ #yield resp
128
+
129
+ if VERBOSE:
130
+ print(LOG_RESPONSE.format(resp))
131
+ return resp
132
+
133
+
134
+ def compress_data(c, instruct, history):
135
+ seed=random.randint(1,1000000000)
136
+
137
+ print (f'c:: {c}')
138
+ #tot=len(purpose)
139
+ #print(tot)
140
+ divr=int(c)/MAX_DATA
141
+ divi=int(divr)+1 if divr != int(divr) else int(divr)
142
+ chunk = int(int(c)/divr)
143
+ print(f'chunk:: {chunk}')
144
+ print(f'divr:: {divr}')
145
+ print (f'divi:: {divi}')
146
+ out = []
147
+ #out=""
148
+ s=0
149
+ e=chunk
150
+ print(f'e:: {e}')
151
+ new_history=""
152
+ #task = f'Compile this data to fulfill the task: {task}, and complete the purpose: {purpose}\n'
153
+ for z in range(divi):
154
+ print(f's:e :: {s}:{e}')
155
+
156
+ hist = history[s:e]
157
+
158
+ resp = run_gpt(
159
+ COMPRESS_DATA_PROMPT_SMALL,
160
+ stop_tokens=["observation:", "task:", "action:", "thought:"],
161
+ max_tokens=8192,
162
+ seed=seed,
163
+ direction=instruct,
164
+ knowledge="",
165
+ history=hist,
166
+ )
167
+ out.append(resp)
168
+ #new_history = resp
169
+ #print (resp)
170
+ #out+=resp
171
+ e=e+chunk
172
+ s=s+chunk
173
+ return out
174
+
175
+
176
+ def compress_data_og(c, instruct, history):
177
+ seed=random.randint(1,1000000000)
178
+
179
+ print (c)
180
+ #tot=len(purpose)
181
+ #print(tot)
182
+ divr=int(c)/MAX_DATA
183
+ divi=int(divr)+1 if divr != int(divr) else int(divr)
184
+ chunk = int(int(c)/divr)
185
+ print(f'chunk:: {chunk}')
186
+ print(f'divr:: {divr}')
187
+ print (f'divi:: {divi}')
188
+ out = []
189
+ #out=""
190
+ s=0
191
+ e=chunk
192
+ print(f'e:: {e}')
193
+ new_history=""
194
+ #task = f'Compile this data to fulfill the task: {task}, and complete the purpose: {purpose}\n'
195
+ for z in range(divi):
196
+ print(f's:e :: {s}:{e}')
197
+
198
+ hist = history[s:e]
199
+
200
+ resp = run_gpt(
201
+ COMPRESS_DATA_PROMPT,
202
+ stop_tokens=["observation:", "task:", "action:", "thought:"],
203
+ max_tokens=8192,
204
+ seed=seed,
205
+ direction=instruct,
206
+ knowledge=new_history,
207
+ history=hist,
208
+ )
209
+
210
+ new_history = resp
211
+ print (resp)
212
+ out+=resp
213
+ e=e+chunk
214
+ s=s+chunk
215
+ '''
216
+ resp = run_gpt(
217
+ COMPRESS_DATA_PROMPT,
218
+ stop_tokens=["observation:", "task:", "action:", "thought:"],
219
+ max_tokens=8192,
220
+ seed=seed,
221
+ direction=instruct,
222
+ knowledge=new_history,
223
+ history="All data has been recieved.",
224
+ )'''
225
+ print ("final" + resp)
226
+ #history = "observation: {}\n".format(resp)
227
+ return resp
228
+
229
+
230
+
231
+ def summarize(inp,history,mem_check,data=None):
232
+
233
+ json_box=[]
234
+ error_box=""
235
+ json_out={}
236
+ rawp="Error"
237
+ if inp == "":
238
+ inp = "Process this data"
239
+ history.clear()
240
+ history = [(inp,"Summarizing Transcription...")]
241
+ yield "",history,error_box,json_box
242
+
243
+ if data != "Error" and data != "" and data != None:
244
+ print(inp)
245
+ out = str(data)
246
+ rl = len(out)
247
+ print(f'rl:: {rl}')
248
+ c=1
249
+ for i in str(out):
250
+ print(f'i:: {i}')
251
+ if i == " " or i=="," or i=="\n":
252
+ c +=1
253
+ print (f'c:: {c}')
254
+ json_out = compress_data(c,inp,out)
255
+ history = [(inp,"Generating Report...")]
256
+ yield "", history,error_box,json_out
257
+
258
+ out = str(json_out)
259
+ print (out)
260
+ rl = len(out)
261
+ print(f'rl:: {rl}')
262
+ c=1
263
+ for i in str(out):
264
+ if i == " " or i=="," or i=="\n":
265
+ c +=1
266
+ print (f'c2:: {c}')
267
+ rawp = compress_data_og(c,inp,out)
268
+ history.clear()
269
+ history.append((inp,rawp))
270
+
271
+ yield "", history,error_box,json_out
272
+ else:
273
+ rawp = "Provide a valid data source"
274
+ history.clear()
275
+ history.append((inp,rawp))
276
+ yield "", history,error_box,json_out
277
+
278
+
279
+ #################################
280
+ def clear_fn():
281
+ return "",[(None,None)]
282
+
283
+ with gr.Blocks() as app:
284
+ gr.HTML("""<center><h1>Video Summarizer</h1><h3>Mixtral 8x7B + Whisper</h3>""")
285
+ with gr.Row():
286
+ with gr.Column():
287
+ with gr.Row():
288
+ inp_url = gr.Textbox(label="Video URL")
289
+ url_btn = gr.Button("Load Video")
290
+ vid = gr.Video()
291
+ #trans_btn=gr.Button("Transcribe")
292
+ trans = gr.Textbox(interactive=True)
293
+ chatbot = gr.Chatbot(label="Mixtral 8x7B Chatbot",show_copy_button=True)
294
+
295
+ with gr.Row():
296
+ with gr.Column(scale=3):
297
+ prompt=gr.Textbox(label = "Instructions (optional)")
298
+ with gr.Column(scale=1):
299
+ mem_check=gr.Checkbox(label="Memory", value=False)
300
+ button=gr.Button()
301
+
302
+ #models_dd=gr.Dropdown(choices=[m for m in return_list],interactive=True)
303
+ with gr.Row():
304
+ stop_button=gr.Button("Stop")
305
+ clear_btn = gr.Button("Clear")
306
+ with gr.Row():
307
+ sz = gr.Dropdown(label="Model Size", choices=sizes, value='base')
308
+ lang = gr.Dropdown(label="Language (Optional)", choices=langs, value="English")
309
+ json_out=gr.JSON()
310
+ e_box=gr.Textbox()
311
+ #text=gr.JSON()
312
+ #inp_query.change(search_models,inp_query,models_dd)
313
+
314
+ url_btn.click(dl,[inp_url,vid],vid)
315
+ #trans_btn.click(transcribe,[vid,lang,sz],trans)
316
+ clear_btn.click(clear_fn,None,[prompt,chatbot])
317
+ go=button.click(transcribe,[vid,lang,sz],[trans,chatbot]).then(summarize,[prompt,chatbot,mem_check,trans],[prompt,chatbot,e_box,json_out])
318
+ stop_button.click(None,None,None,cancels=[go])
319
+ app.queue(default_concurrency_limit=20).launch(show_api=False)