Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -50,7 +50,7 @@ from scipy.io import wavfile
|
|
50 |
import subprocess
|
51 |
|
52 |
import whisper
|
53 |
-
model1 = whisper.load_model("
|
54 |
os.system('pip install voicefixer --upgrade')
|
55 |
from voicefixer import VoiceFixer
|
56 |
voicefixer = VoiceFixer()
|
@@ -64,19 +64,10 @@ savedir="pretrained_models/metricgan-plus-voicebank",
|
|
64 |
run_opts={"device":"cuda"},
|
65 |
)
|
66 |
|
67 |
-
|
68 |
-
{"role": "system", "content": "You are
|
69 |
]
|
70 |
|
71 |
-
mes2 = [
|
72 |
-
{"role": "system", "content": "You are a mental health therapist. Respond to me only in Chinese. Your name is Tina."}
|
73 |
-
]
|
74 |
-
|
75 |
-
mes3 = [
|
76 |
-
{"role": "system", "content": "You are my personal assistant. Respond to me only in Chinese. Your name is Alice."}
|
77 |
-
]
|
78 |
-
|
79 |
-
res = []
|
80 |
|
81 |
|
82 |
'''
|
@@ -158,7 +149,7 @@ def compute_spec(ref_file):
|
|
158 |
return spec
|
159 |
|
160 |
|
161 |
-
def voice_conversion(apikey, upload, audio
|
162 |
|
163 |
openai.api_key = apikey
|
164 |
|
@@ -166,7 +157,7 @@ def voice_conversion(apikey, upload, audio, choice1):
|
|
166 |
audio = whisper.load_audio(audio)
|
167 |
audio = whisper.pad_or_trim(audio)
|
168 |
|
169 |
-
# make log-Mel spectrogram and move to the same device as the
|
170 |
mel = whisper.log_mel_spectrogram(audio).to(model1.device)
|
171 |
|
172 |
# detect the spoken language
|
@@ -176,18 +167,11 @@ def voice_conversion(apikey, upload, audio, choice1):
|
|
176 |
# decode the audio
|
177 |
options = whisper.DecodingOptions()
|
178 |
result = whisper.decode(model1, mel, options)
|
179 |
-
res.append(result.text)
|
180 |
|
181 |
-
|
182 |
-
messages = mes1
|
183 |
-
elif choice1 == "Therapist":
|
184 |
-
messages = mes2
|
185 |
-
elif choice1 == "Alice":
|
186 |
-
messages = mes3
|
187 |
|
188 |
# chatgpt
|
189 |
-
|
190 |
-
content = res[n-1]
|
191 |
messages.append({"role": "user", "content": content})
|
192 |
|
193 |
completion = openai.ChatCompletion.create(
|
@@ -200,7 +184,6 @@ def voice_conversion(apikey, upload, audio, choice1):
|
|
200 |
messages.append({"role": "assistant", "content": chat_response})
|
201 |
|
202 |
wavs = synthesizer.tts(chat_response + "。")
|
203 |
-
|
204 |
|
205 |
synthesizer.save_wav(wavs, "output.wav")
|
206 |
#tts.tts_to_file(chat_response + "。", file_path="output.wav")
|
@@ -211,7 +194,7 @@ def voice_conversion(apikey, upload, audio, choice1):
|
|
211 |
|
212 |
rate1, data1 = wavfile.read("output.wav")
|
213 |
|
214 |
-
data1 = (data1 * 32767).astype(np.int16)
|
215 |
|
216 |
#data1 = np.asarray(data1, dtype=np.int16)
|
217 |
|
@@ -270,7 +253,7 @@ c1=gr.Interface(
|
|
270 |
gr.Textbox(lines=1, label = "请填写您的OpenAI-API-key"),
|
271 |
gr.Audio(source="upload", label = "请上传您喜欢的声音(wav文件)"),
|
272 |
gr.Audio(source="microphone", label = "和您的专属AI聊天吧!", type="filepath"),
|
273 |
-
gr.Radio(["TOEFL", "Therapist", "Alice"], label="TOEFL Examiner, Therapist Tina, or Assistant Alice?"),
|
274 |
],
|
275 |
outputs=[
|
276 |
gr.Textbox(label="Speech to Text"), gr.Textbox(label="ChatGPT Output"), gr.Audio(label="Audio with Custom Voice"),
|
|
|
50 |
import subprocess
|
51 |
|
52 |
import whisper
|
53 |
+
model1 = whisper.load_model("small")
|
54 |
os.system('pip install voicefixer --upgrade')
|
55 |
from voicefixer import VoiceFixer
|
56 |
voicefixer = VoiceFixer()
|
|
|
64 |
run_opts={"device":"cuda"},
|
65 |
)
|
66 |
|
67 |
+
mes = [
|
68 |
+
{"role": "system", "content": "You are my personal assistant. Try to be helpful. Respond to me only in Chinese."}
|
69 |
]
|
70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
|
73 |
'''
|
|
|
149 |
return spec
|
150 |
|
151 |
|
152 |
+
def voice_conversion(apikey, upload, audio):
|
153 |
|
154 |
openai.api_key = apikey
|
155 |
|
|
|
157 |
audio = whisper.load_audio(audio)
|
158 |
audio = whisper.pad_or_trim(audio)
|
159 |
|
160 |
+
# make log-Mel spectrogram and move to the same device as the model1
|
161 |
mel = whisper.log_mel_spectrogram(audio).to(model1.device)
|
162 |
|
163 |
# detect the spoken language
|
|
|
167 |
# decode the audio
|
168 |
options = whisper.DecodingOptions()
|
169 |
result = whisper.decode(model1, mel, options)
|
|
|
170 |
|
171 |
+
messages = mes
|
|
|
|
|
|
|
|
|
|
|
172 |
|
173 |
# chatgpt
|
174 |
+
content = result.text
|
|
|
175 |
messages.append({"role": "user", "content": content})
|
176 |
|
177 |
completion = openai.ChatCompletion.create(
|
|
|
184 |
messages.append({"role": "assistant", "content": chat_response})
|
185 |
|
186 |
wavs = synthesizer.tts(chat_response + "。")
|
|
|
187 |
|
188 |
synthesizer.save_wav(wavs, "output.wav")
|
189 |
#tts.tts_to_file(chat_response + "。", file_path="output.wav")
|
|
|
194 |
|
195 |
rate1, data1 = wavfile.read("output.wav")
|
196 |
|
197 |
+
#data1 = (data1 * 32767).astype(np.int16)
|
198 |
|
199 |
#data1 = np.asarray(data1, dtype=np.int16)
|
200 |
|
|
|
253 |
gr.Textbox(lines=1, label = "请填写您的OpenAI-API-key"),
|
254 |
gr.Audio(source="upload", label = "请上传您喜欢的声音(wav文件)"),
|
255 |
gr.Audio(source="microphone", label = "和您的专属AI聊天吧!", type="filepath"),
|
256 |
+
# gr.Radio(["TOEFL", "Therapist", "Alice"], label="TOEFL Examiner, Therapist Tina, or Assistant Alice?"),
|
257 |
],
|
258 |
outputs=[
|
259 |
gr.Textbox(label="Speech to Text"), gr.Textbox(label="ChatGPT Output"), gr.Audio(label="Audio with Custom Voice"),
|