DiamondYin awacke1 commited on
Commit
d8b7878
Β·
0 Parent(s):

Duplicate from awacke1/Voice-ChatGPT-Streamlit-12

Browse files

Co-authored-by: Aaron C Wacker <[email protected]>

Files changed (5) hide show
  1. .gitattributes +35 -0
  2. .streamlit/config.toml +6 -0
  3. README.md +14 -0
  4. app.py +293 -0
  5. requirements.txt +5 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.streamlit/config.toml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [theme]
2
+ primaryColor="#F63366"
3
+ backgroundColor="#FFFFFF"
4
+ secondaryBackgroundColor="#F0F2F6"
5
+ textColor="#262730"
6
+ font="sans serif"
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Voice ChatGPT Streamlit 12
3
+ emoji: 🌍
4
+ colorFrom: blue
5
+ colorTo: gray
6
+ sdk: streamlit
7
+ sdk_version: 1.21.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ duplicated_from: awacke1/Voice-ChatGPT-Streamlit-12
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,293 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import openai
3
+ import os
4
+ import base64
5
+ import glob
6
+ import json
7
+ import mistune
8
+ import pytz
9
+ import math
10
+ import requests
11
+ import time
12
+
13
+ from datetime import datetime
14
+ from openai import ChatCompletion
15
+ from xml.etree import ElementTree as ET
16
+ from bs4 import BeautifulSoup
17
+ from collections import deque
18
+ from audio_recorder_streamlit import audio_recorder
19
+
20
+ def generate_filename(prompt, file_type):
21
+ central = pytz.timezone('US/Central')
22
+ safe_date_time = datetime.now(central).strftime("%m%d_%I%M")
23
+ safe_prompt = "".join(x for x in prompt if x.isalnum())[:45]
24
+ return f"{safe_date_time}_{safe_prompt}.{file_type}"
25
+
26
+ def transcribe_audio(openai_key, file_path, model):
27
+ OPENAI_API_URL = "https://api.openai.com/v1/audio/transcriptions"
28
+ headers = {
29
+ "Authorization": f"Bearer {openai_key}",
30
+ }
31
+ with open(file_path, 'rb') as f:
32
+ data = {'file': f}
33
+ response = requests.post(OPENAI_API_URL, headers=headers, files=data, data={'model': model})
34
+ if response.status_code == 200:
35
+ st.write(response.json())
36
+
37
+ response2 = chat_with_model(response.json().get('text'), '') # *************************************
38
+ st.write('Responses:')
39
+ #st.write(response)
40
+ st.write(response2)
41
+ return response.json().get('text')
42
+ else:
43
+ st.write(response.json())
44
+ st.error("Error in API call.")
45
+ return None
46
+
47
+ def save_and_play_audio(audio_recorder):
48
+ audio_bytes = audio_recorder()
49
+ if audio_bytes:
50
+ filename = generate_filename("Recording", "wav")
51
+ with open(filename, 'wb') as f:
52
+ f.write(audio_bytes)
53
+ st.audio(audio_bytes, format="audio/wav")
54
+ return filename
55
+ return None
56
+
57
+ def create_file(filename, prompt, response):
58
+ if filename.endswith(".txt"):
59
+ with open(filename, 'w') as file:
60
+ file.write(f"{prompt}\n{response}")
61
+ elif filename.endswith(".htm"):
62
+ with open(filename, 'w') as file:
63
+ file.write(f"{prompt} {response}")
64
+ elif filename.endswith(".md"):
65
+ with open(filename, 'w') as file:
66
+ file.write(f"{prompt}\n\n{response}")
67
+
68
+ def truncate_document(document, length):
69
+ return document[:length]
70
+ def divide_document(document, max_length):
71
+ return [document[i:i+max_length] for i in range(0, len(document), max_length)]
72
+
73
+ def get_table_download_link(file_path):
74
+ with open(file_path, 'r') as file:
75
+ data = file.read()
76
+ b64 = base64.b64encode(data.encode()).decode()
77
+ file_name = os.path.basename(file_path)
78
+ ext = os.path.splitext(file_name)[1] # get the file extension
79
+ if ext == '.txt':
80
+ mime_type = 'text/plain'
81
+ elif ext == '.py':
82
+ mime_type = 'text/plain'
83
+ elif ext == '.xlsx':
84
+ mime_type = 'text/plain'
85
+ elif ext == '.csv':
86
+ mime_type = 'text/plain'
87
+ elif ext == '.htm':
88
+ mime_type = 'text/html'
89
+ elif ext == '.md':
90
+ mime_type = 'text/markdown'
91
+ else:
92
+ mime_type = 'application/octet-stream' # general binary data type
93
+ href = f'<a href="data:{mime_type};base64,{b64}" target="_blank" download="{file_name}">{file_name}</a>'
94
+ return href
95
+
96
+ def CompressXML(xml_text):
97
+ root = ET.fromstring(xml_text)
98
+ for elem in list(root.iter()):
99
+ if isinstance(elem.tag, str) and 'Comment' in elem.tag:
100
+ elem.parent.remove(elem)
101
+ return ET.tostring(root, encoding='unicode', method="xml")
102
+
103
+ def read_file_content(file,max_length):
104
+ if file.type == "application/json":
105
+ content = json.load(file)
106
+ return str(content)
107
+ elif file.type == "text/html" or file.type == "text/htm":
108
+ content = BeautifulSoup(file, "html.parser")
109
+ return content.text
110
+ elif file.type == "application/xml" or file.type == "text/xml":
111
+ tree = ET.parse(file)
112
+ root = tree.getroot()
113
+ xml = CompressXML(ET.tostring(root, encoding='unicode'))
114
+ return xml
115
+ elif file.type == "text/markdown" or file.type == "text/md":
116
+ md = mistune.create_markdown()
117
+ content = md(file.read().decode())
118
+ return content
119
+ elif file.type == "text/plain":
120
+ return file.getvalue().decode()
121
+ else:
122
+ return ""
123
+
124
+ def chat_with_model(prompt, document_section, model_choice='gpt-3.5-turbo'):
125
+ model = model_choice
126
+ conversation = [{'role': 'system', 'content': 'You are a helpful assistant.'}]
127
+ conversation.append({'role': 'user', 'content': prompt})
128
+ if len(document_section)>0:
129
+ conversation.append({'role': 'assistant', 'content': document_section})
130
+
131
+ # iterate through the stream of events
132
+ start_time = time.time()
133
+
134
+
135
+ report = []
136
+ res_box = st.empty()
137
+
138
+ collected_chunks = []
139
+ collected_messages = []
140
+
141
+ for chunk in openai.ChatCompletion.create(
142
+ model='gpt-3.5-turbo',
143
+ messages=conversation,
144
+ temperature=0.5,
145
+ stream=True
146
+ ):
147
+
148
+ collected_chunks.append(chunk) # save the event response
149
+ chunk_message = chunk['choices'][0]['delta'] # extract the message
150
+ collected_messages.append(chunk_message) # save the message
151
+
152
+ content=chunk["choices"][0].get("delta",{}).get("content")
153
+
154
+ try:
155
+ report.append(content)
156
+ if len(content) > 0:
157
+ result = "".join(report).strip()
158
+ #result = result.replace("\n", "")
159
+ res_box.markdown(f'*{result}*')
160
+ except:
161
+ st.write('.')
162
+
163
+ full_reply_content = ''.join([m.get('content', '') for m in collected_messages])
164
+ #st.write(f"Full conversation received: {full_reply_content}")
165
+ st.write("Elapsed time:")
166
+ st.write(time.time() - start_time)
167
+ return full_reply_content
168
+
169
+ def chat_with_file_contents(prompt, file_content, model_choice='gpt-3.5-turbo'):
170
+ conversation = [{'role': 'system', 'content': 'You are a helpful assistant.'}]
171
+ conversation.append({'role': 'user', 'content': prompt})
172
+ if len(file_content)>0:
173
+ conversation.append({'role': 'assistant', 'content': file_content})
174
+ response = openai.ChatCompletion.create(model=model_choice, messages=conversation)
175
+ return response['choices'][0]['message']['content']
176
+
177
+
178
+ def main():
179
+ # Sidebar and global
180
+ openai.api_key = os.getenv('OPENAI_KEY')
181
+ st.set_page_config(page_title="GPT Streamlit Document Reasoner",layout="wide")
182
+ menu = ["htm", "txt", "xlsx", "csv", "md", "py"] #619
183
+ choice = st.sidebar.selectbox("Output File Type:", menu)
184
+ model_choice = st.sidebar.radio("Select Model:", ('gpt-3.5-turbo', 'gpt-3.5-turbo-0301'))
185
+
186
+ # Audio, transcribe, GPT:
187
+ filename = save_and_play_audio(audio_recorder)
188
+ if filename is not None:
189
+ transcription = transcribe_audio(openai.api_key, filename, "whisper-1")
190
+ st.write(transcription)
191
+ gptOutput = chat_with_model(transcription, '', model_choice) # *************************************
192
+ filename = generate_filename(transcription, choice)
193
+ create_file(filename, transcription, gptOutput)
194
+ st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
195
+
196
+
197
+ user_prompt = st.text_area("Enter prompts, instructions & questions:", '', height=100)
198
+
199
+ collength, colupload = st.columns([2,3]) # adjust the ratio as needed
200
+ with collength:
201
+ #max_length = 12000 - optimal for gpt35 turbo. 2x=24000 for gpt4. 8x=96000 for gpt4-32k.
202
+ max_length = st.slider("File section length for large files", min_value=1000, max_value=128000, value=12000, step=1000)
203
+ with colupload:
204
+ uploaded_file = st.file_uploader("Add a file for context:", type=["xml", "json", "xlsx","csv","html", "htm", "md", "txt"])
205
+
206
+ document_sections = deque()
207
+ document_responses = {}
208
+
209
+ if uploaded_file is not None:
210
+ file_content = read_file_content(uploaded_file, max_length)
211
+ document_sections.extend(divide_document(file_content, max_length))
212
+
213
+ if len(document_sections) > 0:
214
+
215
+ if st.button("πŸ‘οΈ View Upload"):
216
+ st.markdown("**Sections of the uploaded file:**")
217
+ for i, section in enumerate(list(document_sections)):
218
+ st.markdown(f"**Section {i+1}**\n{section}")
219
+
220
+ st.markdown("**Chat with the model:**")
221
+ for i, section in enumerate(list(document_sections)):
222
+ if i in document_responses:
223
+ st.markdown(f"**Section {i+1}**\n{document_responses[i]}")
224
+ else:
225
+ if st.button(f"Chat about Section {i+1}"):
226
+ st.write('Reasoning with your inputs...')
227
+ response = chat_with_model(user_prompt, section, model_choice) # *************************************
228
+ st.write('Response:')
229
+ st.write(response)
230
+ document_responses[i] = response
231
+ filename = generate_filename(f"{user_prompt}_section_{i+1}", choice)
232
+ create_file(filename, user_prompt, response)
233
+ st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
234
+
235
+ if st.button('πŸ’¬ Chat'):
236
+ st.write('Reasoning with your inputs...')
237
+ response = chat_with_model(user_prompt, ''.join(list(document_sections,)), model_choice) # *************************************
238
+ st.write('Response:')
239
+ st.write(response)
240
+
241
+ filename = generate_filename(user_prompt, choice)
242
+ create_file(filename, user_prompt, response)
243
+ st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
244
+
245
+ all_files = glob.glob("*.*")
246
+ all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 20] # exclude files with short names
247
+ all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by file type and file name in descending order
248
+
249
+ # sidebar of files
250
+ file_contents=''
251
+ next_action=''
252
+ for file in all_files:
253
+ col1, col2, col3, col4, col5 = st.sidebar.columns([1,6,1,1,1]) # adjust the ratio as needed
254
+ with col1:
255
+ if st.button("🌐", key="md_"+file): # md emoji button
256
+ with open(file, 'r') as f:
257
+ file_contents = f.read()
258
+ next_action='md'
259
+ with col2:
260
+ st.markdown(get_table_download_link(file), unsafe_allow_html=True)
261
+ with col3:
262
+ if st.button("πŸ“‚", key="open_"+file): # open emoji button
263
+ with open(file, 'r') as f:
264
+ file_contents = f.read()
265
+ next_action='open'
266
+ with col4:
267
+ if st.button("πŸ”", key="read_"+file): # search emoji button
268
+ with open(file, 'r') as f:
269
+ file_contents = f.read()
270
+ next_action='search'
271
+ with col5:
272
+ if st.button("πŸ—‘", key="delete_"+file):
273
+ os.remove(file)
274
+ st.experimental_rerun()
275
+
276
+ if len(file_contents) > 0:
277
+ if next_action=='open':
278
+ file_content_area = st.text_area("File Contents:", file_contents, height=500)
279
+ if next_action=='md':
280
+ st.markdown(file_contents)
281
+ if next_action=='search':
282
+ file_content_area = st.text_area("File Contents:", file_contents, height=500)
283
+ st.write('Reasoning with your inputs...')
284
+ #response = chat_with_file_contents(user_prompt, file_contents)
285
+ response = chat_with_model(user_prompt, file_contents, model_choice)
286
+ st.write('Response:')
287
+ st.write(response)
288
+ filename = generate_filename(file_content_area, choice)
289
+ create_file(filename, file_content_area, response)
290
+ st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
291
+
292
+ if __name__ == "__main__":
293
+ main()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ openai
2
+ beautifulsoup4
3
+ mistune
4
+ pytz
5
+ audio-recorder-streamlit