Kevin676 commited on
Commit
5ce216b
·
1 Parent(s): b5af9c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -26
app.py CHANGED
@@ -50,7 +50,7 @@ from scipy.io import wavfile
50
  import subprocess
51
 
52
  import whisper
53
- model1 = whisper.load_model("base")
54
  os.system('pip install voicefixer --upgrade')
55
  from voicefixer import VoiceFixer
56
  voicefixer = VoiceFixer()
@@ -64,19 +64,10 @@ savedir="pretrained_models/metricgan-plus-voicebank",
64
  run_opts={"device":"cuda"},
65
  )
66
 
67
- mes1 = [
68
- {"role": "system", "content": "You are a TOEFL examiner. Help me improve my oral Englsih and give me feedback."}
69
  ]
70
 
71
- mes2 = [
72
- {"role": "system", "content": "You are a mental health therapist. Respond to me only in Chinese. Your name is Tina."}
73
- ]
74
-
75
- mes3 = [
76
- {"role": "system", "content": "You are my personal assistant. Respond to me only in Chinese. Your name is Alice."}
77
- ]
78
-
79
- res = []
80
 
81
 
82
  '''
@@ -158,7 +149,7 @@ def compute_spec(ref_file):
158
  return spec
159
 
160
 
161
- def voice_conversion(apikey, upload, audio, choice1):
162
 
163
  openai.api_key = apikey
164
 
@@ -166,7 +157,7 @@ def voice_conversion(apikey, upload, audio, choice1):
166
  audio = whisper.load_audio(audio)
167
  audio = whisper.pad_or_trim(audio)
168
 
169
- # make log-Mel spectrogram and move to the same device as the model
170
  mel = whisper.log_mel_spectrogram(audio).to(model1.device)
171
 
172
  # detect the spoken language
@@ -176,18 +167,11 @@ def voice_conversion(apikey, upload, audio, choice1):
176
  # decode the audio
177
  options = whisper.DecodingOptions()
178
  result = whisper.decode(model1, mel, options)
179
- res.append(result.text)
180
 
181
- if choice1 == "TOEFL":
182
- messages = mes1
183
- elif choice1 == "Therapist":
184
- messages = mes2
185
- elif choice1 == "Alice":
186
- messages = mes3
187
 
188
  # chatgpt
189
- n = len(res)
190
- content = res[n-1]
191
  messages.append({"role": "user", "content": content})
192
 
193
  completion = openai.ChatCompletion.create(
@@ -200,7 +184,6 @@ def voice_conversion(apikey, upload, audio, choice1):
200
  messages.append({"role": "assistant", "content": chat_response})
201
 
202
  wavs = synthesizer.tts(chat_response + "。")
203
-
204
 
205
  synthesizer.save_wav(wavs, "output.wav")
206
  #tts.tts_to_file(chat_response + "。", file_path="output.wav")
@@ -211,7 +194,7 @@ def voice_conversion(apikey, upload, audio, choice1):
211
 
212
  rate1, data1 = wavfile.read("output.wav")
213
 
214
- data1 = (data1 * 32767).astype(np.int16)
215
 
216
  #data1 = np.asarray(data1, dtype=np.int16)
217
 
@@ -270,7 +253,7 @@ c1=gr.Interface(
270
  gr.Textbox(lines=1, label = "请填写您的OpenAI-API-key"),
271
  gr.Audio(source="upload", label = "请上传您喜欢的声音(wav文件)"),
272
  gr.Audio(source="microphone", label = "和您的专属AI聊天吧!", type="filepath"),
273
- gr.Radio(["TOEFL", "Therapist", "Alice"], label="TOEFL Examiner, Therapist Tina, or Assistant Alice?"),
274
  ],
275
  outputs=[
276
  gr.Textbox(label="Speech to Text"), gr.Textbox(label="ChatGPT Output"), gr.Audio(label="Audio with Custom Voice"),
 
50
  import subprocess
51
 
52
  import whisper
53
+ model1 = whisper.load_model("small")
54
  os.system('pip install voicefixer --upgrade')
55
  from voicefixer import VoiceFixer
56
  voicefixer = VoiceFixer()
 
64
  run_opts={"device":"cuda"},
65
  )
66
 
67
+ mes = [
68
+ {"role": "system", "content": "You are my personal assistant. Try to be helpful. Respond to me only in Chinese."}
69
  ]
70
 
 
 
 
 
 
 
 
 
 
71
 
72
 
73
  '''
 
149
  return spec
150
 
151
 
152
+ def voice_conversion(apikey, upload, audio):
153
 
154
  openai.api_key = apikey
155
 
 
157
  audio = whisper.load_audio(audio)
158
  audio = whisper.pad_or_trim(audio)
159
 
160
+ # make log-Mel spectrogram and move to the same device as the model1
161
  mel = whisper.log_mel_spectrogram(audio).to(model1.device)
162
 
163
  # detect the spoken language
 
167
  # decode the audio
168
  options = whisper.DecodingOptions()
169
  result = whisper.decode(model1, mel, options)
 
170
 
171
+ messages = mes
 
 
 
 
 
172
 
173
  # chatgpt
174
+ content = result.text
 
175
  messages.append({"role": "user", "content": content})
176
 
177
  completion = openai.ChatCompletion.create(
 
184
  messages.append({"role": "assistant", "content": chat_response})
185
 
186
  wavs = synthesizer.tts(chat_response + "。")
 
187
 
188
  synthesizer.save_wav(wavs, "output.wav")
189
  #tts.tts_to_file(chat_response + "。", file_path="output.wav")
 
194
 
195
  rate1, data1 = wavfile.read("output.wav")
196
 
197
+ #data1 = (data1 * 32767).astype(np.int16)
198
 
199
  #data1 = np.asarray(data1, dtype=np.int16)
200
 
 
253
  gr.Textbox(lines=1, label = "请填写您的OpenAI-API-key"),
254
  gr.Audio(source="upload", label = "请上传您喜欢的声音(wav文件)"),
255
  gr.Audio(source="microphone", label = "和您的专属AI聊天吧!", type="filepath"),
256
+ # gr.Radio(["TOEFL", "Therapist", "Alice"], label="TOEFL Examiner, Therapist Tina, or Assistant Alice?"),
257
  ],
258
  outputs=[
259
  gr.Textbox(label="Speech to Text"), gr.Textbox(label="ChatGPT Output"), gr.Audio(label="Audio with Custom Voice"),