yonas commited on
Commit
a009c84
·
1 Parent(s): 2b32664

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ model/Kinyarwanda_nemo_stt_conformer_model.nemo filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: ASR Demo Kinyarwanda
3
- emoji: 📈
4
- colorFrom: indigo
5
- colorTo: purple
6
  sdk: gradio
7
  sdk_version: 4.7.1
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: ASR_Demo_Kinyarwanda
3
+ app_file: gradio_demo.py
 
 
4
  sdk: gradio
5
  sdk_version: 4.7.1
 
 
6
  ---
 
 
gradio_demo.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pyaudioconvert as pac
3
+ from pydub import AudioSegment
4
+
5
+ import nemo
6
+ import nemo.collections.asr as nemo_asr
7
+
8
+ # Load the pre-trained model
9
+ model = nemo_asr.models.EncDecCTCModelBPE.restore_from(
10
+ restore_path="/home/yonas/stt/demo/model/Kinyarwanda_nemo_stt_conformer_model.nemo"
11
+ )
12
+
13
+ assert isinstance(model, nemo.collections.asr.models.EncDecCTCModel)
14
+
15
+
16
+ def convert(file_name):
17
+ if file_name.endswith(("mp3", "wav", "ogg")):
18
+ if file_name.endswith("mp3"):
19
+ sound = AudioSegment.from_mp3(file_name)
20
+ sound.export(file_name, format="wav")
21
+ elif file_name.endswith("ogg"):
22
+ sound = AudioSegment.from_ogg(file_name)
23
+ sound.export(file_name, format="wav")
24
+ else:
25
+ return False
26
+ pac.convert_wav_to_16bit_mono(file_name, file_name)
27
+ return True
28
+
29
+
30
+ def transcribe(audio):
31
+ if not audio:
32
+ return "No audio provided"
33
+
34
+ if not convert(audio):
35
+ return "The format must be mp3, wav, or ogg"
36
+
37
+ result = model.transcribe([audio])
38
+
39
+ return result[0]
40
+
41
+
42
+ gradio_ui = gr.Interface(
43
+ fn=transcribe,
44
+ title="Kinyarwanda Speech Recognition",
45
+ description="Upload an audio clip or record from browser using microphone.",
46
+ inputs=[
47
+ gr.Audio(label="Upload Audio File or Record from microphone", sources=["upload", "microphone"], type="filepath", format="wav"),
48
+ ],
49
+ outputs=gr.Text(label="Recognized speech")
50
+ )
51
+
52
+ # Launch the Gradio app
53
+ gradio_ui.launch(share=True, debug=True)
model/Kinyarwanda_nemo_stt_conformer_model.nemo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b96caae115055a37013539d5cc109f40b8994b8e323368b1116e543c9d7c0708
3
+ size 488570880
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ cython<3.0.0
2
+ pyyaml==5.4.1
3
+ wheel
4
+ gradio
5
+ nemo_toolkit[asr]
6
+ pydub
7
+ pyaudioconvert