YingxuHe commited on
Commit
cf9d671
Β·
1 Parent(s): 5e8f8a6

add mic button

Browse files
app.py CHANGED
@@ -1,17 +1,42 @@
1
- import os
2
 
3
  import streamlit as st
4
- import streamlit.components.v1 as components
5
 
6
- from pages import *
 
 
7
 
8
- def main():
9
- ## Set Streamlit configuration
10
- st.set_page_config(page_title='MERaLiON-AudioLLM', page_icon = "πŸ”₯", layout='wide')
11
 
12
- st.markdown('<style>' + open('./style/sidebar_style.css').read() + '</style>', unsafe_allow_html=True)
13
-
14
- audio_llm()
15
 
16
- if __name__ == '__main__':
17
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
 
3
  import streamlit as st
 
4
 
5
+ from src.tunnel import start_server
6
+ from src.generation import FIXED_GENERATION_CONFIG, load_model
7
+ from src.pages import DEFAULT_DIALOGUE_STATES, sidebar_fragment, specify_audio_fragment, conversation_section
8
 
 
 
 
9
 
10
+ st.set_page_config(page_title='MERaLiON-AudioLLM', page_icon = "πŸ”₯", layout='wide')
 
 
11
 
12
+ st.markdown('<style>' + open('./style/app_style.css').read() + '</style>', unsafe_allow_html=True)
13
+
14
+ if "server" not in st.session_state:
15
+ st.session_state.server = start_server()
16
+
17
+ if "client" not in st.session_state or 'model_name' not in st.session_state:
18
+ st.session_state.client, st.session_state.model_name = load_model()
19
+
20
+ for key, value in FIXED_GENERATION_CONFIG.items():
21
+ if key not in st.session_state:
22
+ st.session_state[key]=copy.deepcopy(value)
23
+
24
+ for key, value in DEFAULT_DIALOGUE_STATES.items():
25
+ if key not in st.session_state:
26
+ st.session_state[key]=copy.deepcopy(value)
27
+
28
+ with st.sidebar:
29
+ sidebar_fragment()
30
+
31
+ if st.sidebar.button('Clear History'):
32
+ st.session_state.update(copy.deepcopy(DEFAULT_DIALOGUE_STATES))
33
+
34
+ st.markdown("<h1 style='text-align: center;'>MERaLiON-AudioLLM Demo πŸ€–</h1>", unsafe_allow_html=True)
35
+ st.markdown(
36
+ """This demo is based on [MERaLiON-AudioLLM](https://huggingface.co/MERaLiON/MERaLiON-AudioLLM-Whisper-SEA-LION),
37
+ developed by I2R, A*STAR, in collaboration with AISG, Singapore.
38
+ It is tailored for Singapore’s multilingual and multicultural landscape."""
39
+ )
40
+
41
+ specify_audio_fragment()
42
+ conversation_section()
requirements.txt CHANGED
@@ -1,7 +1,5 @@
1
  librosa==0.10.2.post1
2
  streamlit==1.40.2
3
- streamlit-on-Hover-tabs==1.0.1
4
  openai==1.57.1
5
  streamlit_mic_recorder==0.0.8
6
- streamlit-server-state==0.18.2
7
  sshtunnel
 
1
  librosa==0.10.2.post1
2
  streamlit==1.40.2
 
3
  openai==1.57.1
4
  streamlit_mic_recorder==0.0.8
 
5
  sshtunnel
src/exceptions.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ class NoAudioException(Exception):
2
+ pass
3
+
4
+
5
+ class TunnelNotRunningException(Exception):
6
+ pass
src/generation.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import time
4
+ from typing import List
5
+
6
+ import streamlit as st
7
+ from openai import OpenAI, APIConnectionError
8
+
9
+ from src.exceptions import NoAudioException, TunnelNotRunningException
10
+
11
+
12
+ local_port = int(os.getenv('LOCAL_PORT'))
13
+
14
+
15
+ FIXED_GENERATION_CONFIG = dict(
16
+ max_completion_tokens=1024,
17
+ top_k=50,
18
+ length_penalty=1.0,
19
+ seed=42
20
+ )
21
+
22
+
23
+ def load_model():
24
+ """
25
+ Create an OpenAI client with connection to vllm server.
26
+ """
27
+ openai_api_key = os.getenv('API_KEY')
28
+ openai_api_base = f"http://localhost:{local_port}/v1"
29
+
30
+ client = OpenAI(
31
+ api_key=openai_api_key,
32
+ base_url=openai_api_base,
33
+ )
34
+
35
+ models = client.models.list()
36
+ model_name = models.data[0].id
37
+
38
+ return client, model_name
39
+
40
+ def _retrive_response(text_input: str, base64_audio_input: str, **kwargs):
41
+ """
42
+ Send request through OpenAI client.
43
+ """
44
+ return st.session_state.client.chat.completions.create(
45
+ messages=[{
46
+ "role":
47
+ "user",
48
+ "content": [
49
+ {
50
+ "type": "text",
51
+ "text": f"Text instruction: {text_input}"
52
+ },
53
+ {
54
+ "type": "audio_url",
55
+ "audio_url": {
56
+ "url": f"data:audio/ogg;base64,{base64_audio_input}"
57
+ },
58
+ },
59
+ ],
60
+ }],
61
+ **kwargs
62
+ )
63
+
64
+
65
+ def _retry_retrive_response_throws_exception(text_input, base64_audio_input, stream=False, retry=3):
66
+ if not base64_audio_input:
67
+ raise NoAudioException("audio is empty.")
68
+
69
+ try:
70
+ response_object = _retrive_response(
71
+ text_input=text_input,
72
+ base64_audio_input=base64_audio_input,
73
+ model=st.session_state.model_name,
74
+ max_completion_tokens=st.session_state.max_completion_tokens,
75
+ temperature=st.session_state.temperature,
76
+ top_p=st.session_state.top_p,
77
+ extra_body={
78
+ "repetition_penalty": st.session_state.repetition_penalty,
79
+ "top_k": st.session_state.top_k,
80
+ "length_penalty": st.session_state.length_penalty
81
+ },
82
+ seed=st.session_state.seed,
83
+ stream=stream
84
+ )
85
+ except APIConnectionError as e:
86
+ if not st.session_state.server.is_running():
87
+ if retry == 0:
88
+ raise TunnelNotRunningException()
89
+
90
+ st.toast(f":warning: Internet connection is down. Trying to re-establish connection ({retry}).")
91
+
92
+ if st.session_state.server.is_down():
93
+ st.session_state.server.restart()
94
+ elif st.session_state.server.is_starting():
95
+ time.sleep(2)
96
+
97
+ return _retry_retrive_response_throws_exception(text_input, retry-1)
98
+ raise e
99
+
100
+ return response_object
101
+
102
+
103
+ def _validate_text_input(text_input) -> List[str]:
104
+ """
105
+ TODO: improve the input validation regex.
106
+ """
107
+ warnings = []
108
+ if re.search("tool|code|python|java|math|calculate", text_input):
109
+ warnings.append("WARNING: MERaLiON-AudioLLM is not intended for use in tool calling, math, and coding tasks.")
110
+
111
+ if re.search(r'[\u4e00-\u9fff]+', text_input):
112
+ warnings.append("NOTE: Please try to prompt in English for the best performance.")
113
+
114
+ return warnings
115
+
116
+
117
+ def retrive_response(text_input, base64_audio_input, stream=False):
118
+ warnings = _validate_text_input(text_input)
119
+
120
+ response_object, error_msg = None, ""
121
+ try:
122
+ response_object = _retry_retrive_response_throws_exception(
123
+ text_input, base64_audio_input, stream
124
+ )
125
+ except NoAudioException:
126
+ error_msg = "Please specify audio first!"
127
+ except TunnelNotRunningException:
128
+ error_msg = "Internet connection cannot be established. Please contact the administrator."
129
+ except Exception as e:
130
+ error_msg = f"Caught Exception: {repr(e)}. Please contact the administrator."
131
+
132
+ return error_msg, warnings, response_object
133
+
134
+
135
+ def postprocess_voice_transcription(text):
136
+ text = re.sub("<.*>:?|\(.*\)|\[.*\]", "", text)
137
+ text = re.sub("\s+", " ", text).strip()
138
+ return text
pages.py β†’ src/pages.py RENAMED
@@ -3,19 +3,17 @@ import base64
3
 
4
  import numpy as np
5
  import streamlit as st
 
6
 
7
- from utils import (
 
8
  GENERAL_INSTRUCTIONS,
9
  AUDIO_SAMPLES_W_INSTRUCT,
10
- NoAudioException,
11
- TunnelNotRunningException,
12
- retry_generate_response,
13
- load_model,
14
  bytes_to_array,
15
  array_to_bytes,
16
- start_server,
17
  )
18
 
 
19
  DEFAULT_DIALOGUE_STATES = dict(
20
  default_instruction=[],
21
  audio_base64='',
@@ -23,14 +21,18 @@ DEFAULT_DIALOGUE_STATES = dict(
23
  disprompt = False,
24
  new_prompt = "",
25
  messages=[],
 
26
  on_select=False,
27
  on_upload=False,
28
  on_record=False,
29
- on_click_button = False
 
30
  )
31
 
 
32
  MAX_AUDIO_LENGTH = 120
33
 
 
34
  def _update_audio(audio_bytes):
35
  origin_audio_array = bytes_to_array(audio_bytes)
36
  truncated_audio_array = origin_audio_array[: MAX_AUDIO_LENGTH*16000]
@@ -141,7 +143,42 @@ def specify_audio_fragment():
141
  st.rerun(scope="app")
142
 
143
 
144
- def dialogue_section():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  for message in st.session_state.messages:
146
  with st.chat_message(message["role"]):
147
  if message.get("error"):
@@ -151,12 +188,8 @@ def dialogue_section():
151
  if message.get("content"):
152
  st.write(message["content"])
153
 
154
- if chat_input := st.chat_input(
155
- placeholder="Type Your Instruction Here",
156
- disabled=st.session_state.disprompt,
157
- on_submit=lambda: st.session_state.update(disprompt=True, messages=[])
158
- ):
159
- st.session_state.new_prompt = chat_input
160
 
161
  if one_time_prompt := st.session_state.new_prompt:
162
  st.session_state.update(new_prompt="", messages=[])
@@ -167,15 +200,17 @@ def dialogue_section():
167
 
168
  with st.chat_message("assistant"):
169
  with st.spinner("Thinking..."):
170
- error_msg, warnings, response = "", [], ""
171
- try:
172
- response, warnings = retry_generate_response(one_time_prompt)
173
- except NoAudioException:
174
- error_msg = "Please specify audio first!"
175
- except TunnelNotRunningException:
176
- error_msg = "Internet connection cannot be established. Please contact the administrator."
177
- except Exception as e:
178
- error_msg = f"Caught Exception: {repr(e)}. Please contact the administrator."
 
 
179
  st.session_state.messages.append({
180
  "role": "assistant",
181
  "error": error_msg,
@@ -184,32 +219,4 @@ def dialogue_section():
184
  })
185
 
186
  st.session_state.disprompt=False
187
- st.rerun(scope="app")
188
-
189
-
190
- def audio_llm():
191
- if "server" not in st.session_state:
192
- st.session_state.server = start_server()
193
-
194
- if "client" not in st.session_state or 'model_name' not in st.session_state:
195
- st.session_state.client, st.session_state.model_name = load_model()
196
-
197
- for key, value in DEFAULT_DIALOGUE_STATES.items():
198
- if key not in st.session_state:
199
- st.session_state[key]=copy.deepcopy(value)
200
-
201
- with st.sidebar:
202
- sidebar_fragment()
203
-
204
- if st.sidebar.button('Clear History'):
205
- st.session_state.update(DEFAULT_DIALOGUE_STATES)
206
-
207
- st.markdown("<h1 style='text-align: center;'>MERaLiON-AudioLLM Demo πŸ€–</h1>", unsafe_allow_html=True)
208
- st.markdown(
209
- """This demo is based on [MERaLiON-AudioLLM](https://huggingface.co/MERaLiON/MERaLiON-AudioLLM-Whisper-SEA-LION),
210
- developed by I2R, A*STAR, in collaboration with AISG, Singapore.
211
- It is tailored for Singapore’s multilingual and multicultural landscape."""
212
- )
213
-
214
- specify_audio_fragment()
215
- dialogue_section()
 
3
 
4
  import numpy as np
5
  import streamlit as st
6
+ from streamlit_float import *
7
 
8
+ from src.generation import retrive_response, postprocess_voice_transcription
9
+ from src.utils import (
10
  GENERAL_INSTRUCTIONS,
11
  AUDIO_SAMPLES_W_INSTRUCT,
 
 
 
 
12
  bytes_to_array,
13
  array_to_bytes,
 
14
  )
15
 
16
+
17
  DEFAULT_DIALOGUE_STATES = dict(
18
  default_instruction=[],
19
  audio_base64='',
 
21
  disprompt = False,
22
  new_prompt = "",
23
  messages=[],
24
+ voice_instruction="",
25
  on_select=False,
26
  on_upload=False,
27
  on_record=False,
28
+ on_click_button=False,
29
+ on_record_voice=False
30
  )
31
 
32
+
33
  MAX_AUDIO_LENGTH = 120
34
 
35
+
36
  def _update_audio(audio_bytes):
37
  origin_audio_array = bytes_to_array(audio_bytes)
38
  truncated_audio_array = origin_audio_array[: MAX_AUDIO_LENGTH*16000]
 
143
  st.rerun(scope="app")
144
 
145
 
146
+ def bottom_input_section():
147
+ bottom_cols = st.columns([0.02, 0.98])
148
+
149
+ uploaded_file = bottom_cols[0].audio_input(
150
+ label="voice",
151
+ label_visibility="collapsed",
152
+ disabled=st.session_state.disprompt,
153
+ on_change=lambda: st.session_state.update(on_record_voice=True),
154
+ key='voice'
155
+ )
156
+
157
+ if uploaded_file and st.session_state.on_record_voice:
158
+ audio_bytes = uploaded_file.read()
159
+ audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
160
+ error_msg, warnings, completion = retrive_response(
161
+ "Write out the dialogue as text.", audio_base64, stream=False)
162
+
163
+ if error_msg:
164
+ st.toast(error_msg, icon="🚨")
165
+ for warning_msg in warnings:
166
+ st.toast(warning_msg, icon="❗")
167
+
168
+ st.session_state.update(
169
+ new_prompt = postprocess_voice_transcription(
170
+ completion.choices[0].message.content),
171
+ on_record_voice = False
172
+ )
173
+
174
+ if chat_input := bottom_cols[1].chat_input(
175
+ placeholder="Type Your Instruction Here",
176
+ disabled=st.session_state.disprompt,
177
+ on_submit=lambda: st.session_state.update(disprompt=True, messages=[])
178
+ ):
179
+ st.session_state.new_prompt = chat_input
180
+
181
+ def conversation_section():
182
  for message in st.session_state.messages:
183
  with st.chat_message(message["role"]):
184
  if message.get("error"):
 
188
  if message.get("content"):
189
  st.write(message["content"])
190
 
191
+ with st._bottom:
192
+ bottom_input_section()
 
 
 
 
193
 
194
  if one_time_prompt := st.session_state.new_prompt:
195
  st.session_state.update(new_prompt="", messages=[])
 
200
 
201
  with st.chat_message("assistant"):
202
  with st.spinner("Thinking..."):
203
+ error_msg, warnings, stream = retrive_response(
204
+ one_time_prompt, st.session_state.audio_base64, stream=True)
205
+ response = ""
206
+
207
+ if error_msg:
208
+ st.error(error_msg)
209
+ for warning_msg in warnings:
210
+ st.warning(warning_msg)
211
+ if stream:
212
+ response = st.write_stream(stream)
213
+
214
  st.session_state.messages.append({
215
  "role": "assistant",
216
  "error": error_msg,
 
219
  })
220
 
221
  st.session_state.disprompt=False
222
+ st.rerun(scope="app")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/tunnel.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import os
3
+
4
+ import paramiko
5
+ import streamlit as st
6
+ from sshtunnel import SSHTunnelForwarder
7
+
8
+
9
+ @st.cache_resource()
10
+ def start_server():
11
+ server = SSHTunnelManager()
12
+ server.start()
13
+ return server
14
+
15
+
16
+ class SSHTunnelManager:
17
+ def __init__(self):
18
+ pkey = paramiko.RSAKey.from_private_key(io.StringIO(os.getenv('PRIVATE_KEY')))
19
+
20
+ self.server = SSHTunnelForwarder(
21
+ ssh_address_or_host=os.getenv('SERVER_DNS_NAME'),
22
+ ssh_username="ec2-user",
23
+ ssh_pkey=pkey,
24
+ local_bind_address=("127.0.0.1", int(os.getenv('LOCAL_PORT'))),
25
+ remote_bind_address=("127.0.0.1", 8000)
26
+ )
27
+
28
+ self._is_starting = False
29
+ self._is_running = False
30
+
31
+ def update_status(self):
32
+ if not self._is_starting:
33
+ self.server.check_tunnels()
34
+ self._is_running = list(self.server.tunnel_is_up.values())[0]
35
+ else:
36
+ self._is_running = False
37
+
38
+ def is_starting(self):
39
+ self.update_status()
40
+ return self._is_starting
41
+
42
+ def is_running(self):
43
+ self.update_status()
44
+ return self._is_running
45
+
46
+ def is_down(self):
47
+ self.update_status()
48
+ return (not self._is_running) and (not self._is_starting)
49
+
50
+ def start(self, *args, **kwargs):
51
+ if not self._is_starting:
52
+ self._is_starting = True
53
+ self.server.start(*args, **kwargs)
54
+ self._is_starting = False
55
+
56
+ def restart(self, *args, **kwargs):
57
+ if not self._is_starting:
58
+ self._is_starting = True
59
+ self.server.restart(*args, **kwargs)
60
+ self._is_starting = False
utils.py β†’ src/utils.py RENAMED
@@ -1,16 +1,7 @@
1
  import io
2
- import os
3
- import re
4
- import time
5
  from scipy.io.wavfile import write
6
 
7
  import librosa
8
- import paramiko
9
- import streamlit as st
10
- from openai import OpenAI, APIConnectionError
11
- from sshtunnel import SSHTunnelForwarder
12
-
13
- local_port = int(os.getenv('LOCAL_PORT'))
14
 
15
 
16
  GENERAL_INSTRUCTIONS = [
@@ -77,156 +68,6 @@ AUDIO_SAMPLES_W_INSTRUCT = {
77
  }
78
 
79
 
80
- class NoAudioException(Exception):
81
- pass
82
-
83
-
84
- class TunnelNotRunningException(Exception):
85
- pass
86
-
87
-
88
- class SSHTunnelManager:
89
- def __init__(self):
90
- pkey = paramiko.RSAKey.from_private_key(io.StringIO(os.getenv('PRIVATE_KEY')))
91
-
92
- self.server = SSHTunnelForwarder(
93
- ssh_address_or_host=os.getenv('SERVER_DNS_NAME'),
94
- ssh_username="ec2-user",
95
- ssh_pkey=pkey,
96
- local_bind_address=("127.0.0.1", local_port),
97
- remote_bind_address=("127.0.0.1", 8000)
98
- )
99
-
100
- self._is_starting = False
101
- self._is_running = False
102
-
103
- def update_status(self):
104
- if not self._is_starting:
105
- self.server.check_tunnels()
106
- self._is_running = list(self.server.tunnel_is_up.values())[0]
107
- else:
108
- self._is_running = False
109
-
110
- def is_starting(self):
111
- self.update_status()
112
- return self._is_starting
113
-
114
- def is_running(self):
115
- self.update_status()
116
- return self._is_running
117
-
118
- def is_down(self):
119
- self.update_status()
120
- return (not self._is_running) and (not self._is_starting)
121
-
122
- def start(self, *args, **kwargs):
123
- if not self._is_starting:
124
- self._is_starting = True
125
- self.server.start(*args, **kwargs)
126
- self._is_starting = False
127
-
128
- def restart(self, *args, **kwargs):
129
- if not self._is_starting:
130
- self._is_starting = True
131
- self.server.restart(*args, **kwargs)
132
- self._is_starting = False
133
-
134
-
135
- @st.cache_resource()
136
- def start_server():
137
- server = SSHTunnelManager()
138
- server.start()
139
- return server
140
-
141
-
142
- def load_model():
143
- openai_api_key = os.getenv('API_KEY')
144
- openai_api_base = f"http://localhost:{local_port}/v1"
145
-
146
- client = OpenAI(
147
- api_key=openai_api_key,
148
- base_url=openai_api_base,
149
- )
150
-
151
- models = client.models.list()
152
- model_name = models.data[0].id
153
-
154
- return client, model_name
155
-
156
-
157
- def generate_response(text_input):
158
- if not st.session_state.audio_base64:
159
- raise NoAudioException("audio is empty.")
160
-
161
- warnings = []
162
- if re.search("tool|code|python|java|math|calculate", text_input):
163
- warnings.append("WARNING: MERaLiON-AudioLLM is not intended for use in tool calling, math, and coding tasks.")
164
-
165
- if re.search(r'[\u4e00-\u9fff]+', text_input):
166
- warnings.append("NOTE: Please try to prompt in English for the best performance.")
167
-
168
- try:
169
- stream = st.session_state.client.chat.completions.create(
170
- messages=[{
171
- "role":
172
- "user",
173
- "content": [
174
- {
175
- "type": "text",
176
- "text": f"Text instruction: {text_input}"
177
- },
178
- {
179
- "type": "audio_url",
180
- "audio_url": {
181
- "url": f"data:audio/ogg;base64,{st.session_state.audio_base64}"
182
- },
183
- },
184
- ],
185
- }],
186
- model=st.session_state.model_name,
187
- max_completion_tokens=1024,
188
- temperature=st.session_state.temperature,
189
- top_p=st.session_state.top_p,
190
- extra_body={
191
- "repetition_penalty": st.session_state.repetition_penalty,
192
- "top_k": 50,
193
- "length_penalty": 1.0
194
- },
195
- seed=42,
196
- stream=True,
197
- )
198
- except APIConnectionError as e:
199
- if not st.session_state.server.is_running():
200
- raise TunnelNotRunningException()
201
- raise e
202
-
203
- return stream, warnings
204
-
205
-
206
- def retry_generate_response(prompt, retry=3):
207
- response, warnings = "", []
208
-
209
- try:
210
- stream, warnings = generate_response(prompt)
211
- for warning_msg in warnings:
212
- st.warning(warning_msg)
213
- response = st.write_stream(stream)
214
- except TunnelNotRunningException as e:
215
- if retry == 0:
216
- raise e
217
-
218
- st.warning(f"Internet connection is down. Trying to re-establish connection ({retry}).")
219
-
220
- if st.session_state.server.is_down():
221
- st.session_state.server.restart()
222
- elif st.session_state.server.is_starting():
223
- time.sleep(2)
224
-
225
- return retry_generate_response(retry-1)
226
-
227
- return response, warnings
228
-
229
-
230
  def bytes_to_array(audio_bytes):
231
  audio_array, _ = librosa.load(
232
  io.BytesIO(audio_bytes),
@@ -234,6 +75,7 @@ def bytes_to_array(audio_bytes):
234
  )
235
  return audio_array
236
 
 
237
  def array_to_bytes(audio_array):
238
  bytes_wav = bytes()
239
  byte_io = io.BytesIO(bytes_wav)
 
1
  import io
 
 
 
2
  from scipy.io.wavfile import write
3
 
4
  import librosa
 
 
 
 
 
 
5
 
6
 
7
  GENERAL_INSTRUCTIONS = [
 
68
  }
69
 
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  def bytes_to_array(audio_bytes):
72
  audio_array, _ = librosa.load(
73
  io.BytesIO(audio_bytes),
 
75
  )
76
  return audio_array
77
 
78
+
79
  def array_to_bytes(audio_array):
80
  bytes_wav = bytes()
81
  byte_io = io.BytesIO(bytes_wav)
style/{sidebar_style.css β†’ app_style.css} RENAMED
@@ -1,4 +1,4 @@
1
- .st-emotion-cache-1c7y2kd {
2
  flex-direction: row-reverse;
3
  text-align: right;
4
  }
@@ -21,10 +21,39 @@ div[data-testid="stFileUploaderDropzoneInstructions"]>div>span {
21
  display:none;
22
  }
23
 
24
- .st-emotion-cache-1aq2la2 {
25
  max-height: 3rem;
26
  }
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  [class='stAudio'] {
29
  max-width: 500px !important;
30
  margin: auto !important;
 
1
+ div[data-testid="stChatMessage"]:has(> div[data-testid="stChatMessageAvatarUser"]) {
2
  flex-direction: row-reverse;
3
  text-align: right;
4
  }
 
21
  display:none;
22
  }
23
 
24
+ div[data-testid="stMainBlockContainer"] div[data-testid="stAudioInput"]>div {
25
  max-height: 3rem;
26
  }
27
 
28
+ div[data-testid="stBottomBlockContainer"] div[data-testid="stAudioInput"]>div {
29
+ background-color:transparent;
30
+ /* border:1px solid rgba(49, 51, 63, 0.2); */
31
+ max-height: 40px;
32
+ display: block;
33
+ padding: 0;
34
+ margin: auto;
35
+ }
36
+
37
+ div[data-testid="stBottomBlockContainer"] div[data-testid="stAudioInput"]>div>div:last-of-type {
38
+ display:none;
39
+ }
40
+
41
+ div[data-testid="stBottomBlockContainer"] div[data-testid="stAudioInput"]>div>div:nth-of-type(2) {
42
+ margin:auto;
43
+ }
44
+
45
+ div[data-testid="stBottomBlockContainer"] div[data-testid="stAudioInput"]>div>div:nth-of-type(2)>span:last-of-type {
46
+ display:none;
47
+ }
48
+
49
+ div[data-testid="stBottomBlockContainer"] div[data-testid="stAudioInput"]>div>div:nth-of-type(2)>span:only-of-type {
50
+ display:block;
51
+ }
52
+
53
+ div[data-testid="stBottomBlockContainer"] div[data-testid="stAudioInput"]>div>span {
54
+ display:none;
55
+ }
56
+
57
  [class='stAudio'] {
58
  max-width: 500px !important;
59
  margin: auto !important;