aar2dee2 commited on
Commit
838a92b
·
1 Parent(s): 2930d08

add audio files

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  title: Chatty Vader
3
- emoji: 👀
4
  colorFrom: indigo
5
  colorTo: yellow
6
  sdk: gradio
 
1
  ---
2
  title: Chatty Vader
3
+ emoji: 💣
4
  colorFrom: indigo
5
  colorTo: yellow
6
  sdk: gradio
app.ipynb ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": ["# Import required libraries"]
7
+ },
8
+ {
9
+ "cell_type": "code",
10
+ "execution_count": null,
11
+ "metadata": {},
12
+ "outputs": [],
13
+ "source": [
14
+ "import requests\n",
15
+ "import json\n",
16
+ "from gtts import gTTS\n",
17
+ "import os\n",
18
+ "from pydub import AudioSegment"
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "markdown",
23
+ "metadata": {},
24
+ "source": ["# 1. Function to take in audio output and return text output"]
25
+ },
26
+ {
27
+ "cell_type": "code",
28
+ "execution_count": null,
29
+ "metadata": {},
30
+ "outputs": [],
31
+ "source": [
32
+ "def transcribe_audio_to_text(file_path):\n",
33
+ " # Replace with your implementation for transcribing audio to text\n",
34
+ " pass"
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "markdown",
39
+ "metadata": {},
40
+ "source": [
41
+ "# 2. Function to call the ChatGPT API with a text and system prompt and return the response"
42
+ ]
43
+ },
44
+ {
45
+ "cell_type": "code",
46
+ "execution_count": null,
47
+ "metadata": {},
48
+ "outputs": [],
49
+ "source": [
50
+ "def call_chatgpt_api(prompt, system_prompt):\n",
51
+ " # Replace with your OpenAI API Key\n",
52
+ " api_key = 'your-api-key'\n",
53
+ "\n",
54
+ " headers = {\n",
55
+ " 'Content-Type': 'application/json',\n",
56
+ " 'Authorization': f'Bearer {api_key}',\n",
57
+ " }\n",
58
+ "\n",
59
+ " data = json.dumps({\n",
60
+ " 'model': 'text-davinci-002',\n",
61
+ " 'prompt': f'{system_prompt} {prompt}',\n",
62
+ " 'max_tokens': 150,\n",
63
+ " 'n': 1,\n",
64
+ " 'stop': None,\n",
65
+ " 'temperature': 0.5,\n",
66
+ " })\n",
67
+ "\n",
68
+ " response = requests.post('https://api.openai.com/v1/engines/davinci-codex/completions', headers=headers, data=data)\n",
69
+ " response_text = response.json()['choices'][0]['text'].strip()\n",
70
+ "\n",
71
+ " return response_text"
72
+ ]
73
+ },
74
+ {
75
+ "cell_type": "markdown",
76
+ "metadata": {},
77
+ "source": [
78
+ "# 3. Function to convert text to speech using a suitable library and add intonation for Yoda's voice"
79
+ ]
80
+ },
81
+ {
82
+ "cell_type": "code",
83
+ "execution_count": null,
84
+ "metadata": {},
85
+ "outputs": [],
86
+ "source": [
87
+ "def text_to_speech_yoda(text, output_file):\n",
88
+ " tts = gTTS(text, lang='en')\n",
89
+ " tts.save(output_file)\n",
90
+ "\n",
91
+ " # Add intonation for Yoda voice (you may need to customize this for better results)\n",
92
+ " audio = AudioSegment.from_file(output_file, format=\"mp3\")\n",
93
+ " audio = audio.speedup(playback_speed=1.2)\n",
94
+ " audio.export(output_file, format=\"mp3\")"
95
+ ]
96
+ },
97
+ {
98
+ "cell_type": "markdown",
99
+ "metadata": {},
100
+ "source": [
101
+ "# 4. Wrapper function that calls all of these functions in order"
102
+ ]
103
+ },
104
+ {
105
+ "cell_type": "code",
106
+ "execution_count": null,
107
+ "metadata": {},
108
+ "outputs": [],
109
+ "source": [
110
+ "def process_audio(input_audio_file, output_audio_file, system_prompt):\n",
111
+ " transcribed_text = transcribe_audio_to_text(input_audio_file)\n",
112
+ " chatgpt_response = call_chatgpt_api(transcribed_text, system_prompt)\n",
113
+ " text_to_speech_yoda(chatgpt_response, output_audio_file)"
114
+ ]
115
+ }
116
+ ],
117
+ "metadata": {
118
+ "kernelspec": {
119
+ "display_name": "Python 3",
120
+ "language": "python",
121
+ "name": "python3"
122
+ },
123
+ "language_info": {
124
+ "codemirror_mode": {
125
+ "name": "ipython",
126
+ "version": 3
127
+ },
128
+ "file_extension": ".py",
129
+ "mimetype": "text/x-python",
130
+ "name": "python",
131
+ "nbconvert_exporter": "python",
132
+ "pygments_lexer": "ipython3",
133
+ "version": "3.8.5"
134
+ }
135
+ },
136
+ "nbformat": 4,
137
+ "nbformat_minor": 4
138
+ }
data/metadata.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbc2bf511a6f46b0c195ac5cc6ad82ef2762c0258a9a50b25f9ca0ac55657d36
3
+ size 1966
data/wavs/wav1.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1ab36530b9abdf0ef53a932c04915c325eeeecc576a1d4265e4fa4d30d4c5d4
3
+ size 17734
data/wavs/wav10.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfad8f3e36b9bf2a743646ecadf503382bfc213a690a37f103c2639348cb02cb
3
+ size 12030
data/wavs/wav11.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b03bd0d9cbaa7e385466ce541c75ba22d89fd75aaf65f402d2db5a2e68ab9d71
3
+ size 8650
data/wavs/wav12.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de0347db8880d5cb40d11f67b7c4ecfeb6630f6e2cd4dc16af53d866493bc9bf
3
+ size 6830
data/wavs/wav13.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a761d63251887e5481cca23f0eb2cefd51d3158db8101402ccd0fedb0a14a4e0
3
+ size 14370
data/wavs/wav14.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3d08743983001498860a1e9830a7ddcb28b8d3217ce1cab41eb672a1f8184b3
3
+ size 34304
data/wavs/wav15.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:064415dbd40fe447b618d328c5fc93d58651f6b1bc1b975b113c97bbbc3c9c38
3
+ size 74834
data/wavs/wav16.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6181e0fafc34deb20ee74d3e2f7c9811975fe12a2a3bb5577bf0affea3e2c83c
3
+ size 23006
data/wavs/wav17.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5dee5a713f8a4b62ccd2ec32b10c950f455d8f7a9d8435f4f8b201d701152ae
3
+ size 43472
data/wavs/wav18.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fd77a6d3e15aea495745c518f33f4a125511001611616b82039a1c1c97cb8cc
3
+ size 83258
data/wavs/wav19.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:920acc4cda72c697d72e6030f0568b75ab0da79efed1df8f3a8b0f62b31f8119
3
+ size 41826
data/wavs/wav2.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48656c12ef9a556aad43a8a1c6e289f0afb1f5b3283d83a72e2648e464845d51
3
+ size 46146
data/wavs/wav20.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10f6de671c0daeaf7189902aa694d0f8ef5d108b361b9250d6a67a6981335111
3
+ size 78474
data/wavs/wav21.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3aceca45bbe69f717346ab25063d801ae07fe530d5ff8b7a7fdc86646c50b2e0
3
+ size 50536
data/wavs/wav22.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccd832488f585aa852b22061c11c9a251595aed4a9b62c46025387af2b901988
3
+ size 38170
data/wavs/wav23.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7d3bdb55e029cc5cce78ac03edc9de459d4757a23aa895f21a1be0e5ba8aa45
3
+ size 77422
data/wavs/wav24.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9253767d354b1aa484498ba013c1f981fa166b83025620173bcd37c29e096f64
3
+ size 51398
data/wavs/wav25.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:831108ab28494e7d36438a04eb527a1c05cd14c769218f5f5f8d01ef8f39c8e3
3
+ size 6310
data/wavs/wav26.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00bf0d0defa342786883d7c5ef444d76329bca499e356fcc1c2f7d1649fddd48
3
+ size 7350
data/wavs/wav27.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62cf76057721d42234f616947c0ae5aeaa7deac1667d79cb57e2cd8fee1a1844
3
+ size 9170
data/wavs/wav28.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:441101a2d56838fa952aaeeb78f4a5ad55ce7b4fcdb4199d5648e4aea6d4ad52
3
+ size 18230
data/wavs/wav29.mp3 ADDED
Binary file (114 kB). View file
 
data/wavs/wav3.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5049fb7e23db05081275a9c94f8d7056993b3b929125d7f30758d1e05f3ea187
3
+ size 28066
data/wavs/wav30.mp3 ADDED
Binary file (48.1 kB). View file
 
data/wavs/wav31.mp3 ADDED
Binary file (17.1 kB). View file
 
data/wavs/wav32.mp3 ADDED
Binary file (96.1 kB). View file
 
data/wavs/wav33.mp3 ADDED
Binary file (27.6 kB). View file
 
data/wavs/wav34.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eeb545563ca25dcea2c0e6cf83aeb096a6198b7b10976ff0af305297028c3f29
3
+ size 9690
data/wavs/wav4.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d07f6e5fd84f2f83ccb0b9069deed1e512099d7ffa870233bde0bf9673d326e5
3
+ size 5530
data/wavs/wav5.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17da780707c66bc4297a11cd0fc1be80cef0cd3e8559519ff4404acf264070f4
3
+ size 529964
data/wavs/wav6.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4af156d450349d48672f160fc817906a9b9928fd3135451b747738317e3533c0
3
+ size 19162
data/wavs/wav7.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a891200cd5a05a382caa265d81b9567bbfc8129e6aed524ef512f587624b0087
3
+ size 11510
data/wavs/wav8.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5346a94429fdf19eb9721c133203022dcb479f7adeaa65a3ea7213bb242946f
3
+ size 8390
data/wavs/wav9.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a0e839cf656d25867ef2c3d1977173e465917804ce51d5acabd6c07b505878c
3
+ size 9170
gpt-data.md ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [Audio link](https://www.drodd.com/star-wars-soundboard/bickering.wav)
2
+ Admiral Motti: "This station is now the ultimate power in the universe. I suggest we use it!"
3
+ Darth Vader: "Don't be too proud of this technological terror you've constructed. The ability to destroy a planet is insignificant next to the power of the Force."
4
+ Admiral Motti: "Don't try to frighten us with your sorcerer's ways, Lord Vader. Your sad devotion to that ancient religion has not helped you conjure up the stolen data tapes, or given you clairvoyance enough to find the Rebel's hidden fortre..."
5
+ Darth Vader: "(Vader chokes Motti with the Force) I find your lack of faith disturbing."
6
+ Grand Moff Tarkin: "Enough of this! Vader, release him!"
7
+ Darth Vader: "As you wish."
8
+ Grand Moff Tarkin: "This bickering is pointless. Now, Lord Vader will provide us with the location of the Rebel fortress by the time this station is operational. We will then crush the Rebellion with one swift stroke."
9
+
10
+ [Audio link](https://www.drodd.com/star-wars-soundboard/sorcerersways.wav)
11
+ Admiral Motti: "Don't try to frighten us with your sorcerer's ways, Lord Vader. Your sad devotion to that ancient religion has not helped you conjure up the stolen data tapes, or given you clairvoyance enough to find the Rebel's hidden fortre..."
12
+ Darth Vader: "(Vader chokes Motti with the Force) I find your lack of faith disturbing."
13
+ Grand Moff Tarkin: "Enough of this! Vader, release him!"
14
+ Darth Vader: "As you wish."
15
+
16
+ [Audio link](https://www.drodd.com/star-wars-soundboard/mindprobe.wav)
17
+ Darth Vader: "Her resistance to the mind probe is considerable. It will be some time before we can extract any information from her."
18
+ Imperial Officer: "The final check-out is complete. All systems are operational. What course shall we set?"
19
+ Grand Moff Tarken: "Perhaps she would respond to an alternative form of persuasion."
20
+ Darth Vader: "What do you mean?"
21
+ Grand Moff Tarken: "I think it is time we demonstrate the full power of this station. Set your course for Alderaan."
22
+ Imperial Officer: "With pleasure."
23
+
24
+ [Audio link](https://www.drodd.com/star-wars-soundboard/thoseplans.wav)
25
+ Stormtrooper: "The Death Star plans are not in the main computer."
26
+ Darth Vader (James Earl Jones): "Where are those transmissions you intercepted? What have you done with those plans?"
27
+ Rebel (???): "We intercepted no transmissions. This is a consular ship. We're on a diplomatic mission."
28
+ Darth Vader: "If this is a consular ship, where is the ambassador? (He throws him against the wall) Commander, tear this ship apart untill you've found those plans and bring me the passengers! I want them alive!"
29
+
30
+ [Audio link](https://www.drodd.com/star-wars-soundboard/diplomatic.wav)
31
+ Princess Leia Organa: "Darth Vader, only you could be so bold. The imperial senate will not sit still for this. When they hear you've attacked a diplomatic--"
32
+ Darth Vader: "Don't act so surprised, your highness. You weren't on any mercy mission this time. Several transmissions were beamed to this ship by rebel spies. I want to know what happened to the plans they sent you."
33
+ Leia: "I don't know what you're talking about. I'm a member of the imperial senate on a diplomatic mission to Alderaan."
34
+ Darth Vader: "You are part of the Rebel Alliance and a traitor. Take her away!"
35
+
36
+ [Audio link](https://www.drodd.com/star-wars-soundboard/takeheraway.wav)
37
+ Leia: "I'm a member of the imperial senate on a diplomatic mission to Alderaan."
38
+ Darth Vader: "You are part of the Rebel Alliance and a traitor. Take her away!"
39
+
40
+ [Audio link](https://www.drodd.com/star-wars-soundboard/notaboard.wav)
41
+ Imperial Officer Commander (???): "Lord Vader, the battle station plans are not aboard this ship and no transmissions were made. An escape pod was jettisoned during the fighting but no life forms were aboard."
42
+ Darth Vader: "She must have hidden the plans in the escape pod. Send a detachment down to retrieve them. See to it personally, Commander. There'll be no one to stop us this time."
43
+ Imperial Officer Commander: "Yes, sir."
resources.md ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Star Wars Audio Files
2
+
3
+ [Very clear audio](http://www.moviesoundclips.net/sound.php?id=137) - check for the [other movies](http://www.moviesoundclips.net/movies.php?page=3) also
4
+
5
+ [1.](https://www.drodd.com/star-wars-soundboard/)
6
+
7
+ [2.](http://www.galaxyfaraway.com/gfa/1998/12/star-wars-sounds-archive/#episode%20i%20sounds)
8
+
9
+ [3.](https://www.wavsource.com/movies/star_wars.htm)
10
+
11
+ [4.](https://www.thesoundarchive.com/star-wars.asp)
12
+
13
+ [5.]()
14
+
15
+ [Whisper API Reference](https://platform.openai.com/docs/api-reference/audio)
16
+ [Voice training with Resemble.ai](https://www.resemble.ai/api/)
17
+ [Resemble API reference](https://docs.app.resemble.ai/docs/resource_voice/create)
18
+ [Resemble.ai Dataset reference](https://resemble.notion.site/resemble/Resemble-AI-Supported-Datasets-64e94dffc7fe4f518989df48d815879f)
test.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import app
2
+
3
+ def test_transcribe_audio_to_text():
4
+ input_audio_file = "test_audio_input.wav"
5
+ expected_transcribed_text = "your expected transcription"
6
+
7
+ result = app.transcribe_audio_to_text(input_audio_file)
8
+ assert result == expected_transcribed_text
9
+
10
+ def test_call_chatgpt_api():
11
+ prompt = "What is the meaning of life?"
12
+ system_prompt = "Yoda:"
13
+ expected_response = "your expected chatgpt response"
14
+
15
+ result = app.call_chatgpt_api(prompt, system_prompt)
16
+ assert result == expected_response
17
+
18
+ def test_text_to_speech_yoda():
19
+ input_text = "Do or do not, there is no try."
20
+ output_audio_file = "test_audio_output.mp3"
21
+
22
+ app.text_to_speech_yoda(input_text, output_audio_file)
23
+ assert os.path.exists(output_audio_file)
24
+
25
+ def test_process_audio():
26
+ input_audio_file = "test_audio_input.wav"
27
+ output_audio_file = "test_audio_output.mp3"
28
+ system_prompt = "Yoda:"
29
+
30
+ app.process_audio(input_audio_file, output_audio_file, system_prompt)
31
+ assert os.path.exists(output_audio_file)