ManziBryan commited on
Commit
7ab7b33
·
1 Parent(s): 869286c

Luganda Model Initial

Browse files
Files changed (7) hide show
  1. .gitattributes +2 -10
  2. 5gram.bin +3 -0
  3. README.md +38 -0
  4. app.py +71 -0
  5. packages.txt +2 -0
  6. requirements.txt +8 -0
  7. unigrams.txt +0 -0
.gitattributes CHANGED
@@ -1,35 +1,27 @@
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
 
4
  *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
  *.ftz filter=lfs diff=lfs merge=lfs -text
7
  *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
  *.model filter=lfs diff=lfs merge=lfs -text
13
  *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
  *.onnx filter=lfs diff=lfs merge=lfs -text
17
  *.ot filter=lfs diff=lfs merge=lfs -text
18
  *.parquet filter=lfs diff=lfs merge=lfs -text
19
  *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
  *.pt filter=lfs diff=lfs merge=lfs -text
23
  *.pth filter=lfs diff=lfs merge=lfs -text
24
  *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
  *.tflite filter=lfs diff=lfs merge=lfs -text
30
  *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
  *.bz2 filter=lfs diff=lfs merge=lfs -text
 
6
  *.ftz filter=lfs diff=lfs merge=lfs -text
7
  *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
 
11
  *.model filter=lfs diff=lfs merge=lfs -text
12
  *.msgpack filter=lfs diff=lfs merge=lfs -text
 
 
13
  *.onnx filter=lfs diff=lfs merge=lfs -text
14
  *.ot filter=lfs diff=lfs merge=lfs -text
15
  *.parquet filter=lfs diff=lfs merge=lfs -text
16
  *.pb filter=lfs diff=lfs merge=lfs -text
 
 
17
  *.pt filter=lfs diff=lfs merge=lfs -text
18
  *.pth filter=lfs diff=lfs merge=lfs -text
19
  *.rar filter=lfs diff=lfs merge=lfs -text
 
20
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
  *.tar.* filter=lfs diff=lfs merge=lfs -text
 
22
  *.tflite filter=lfs diff=lfs merge=lfs -text
23
  *.tgz filter=lfs diff=lfs merge=lfs -text
 
24
  *.xz filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
5gram.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46e982596dbb0c7c225dd9b88ef89c733ba6d718befc3c3b833b1daddc60816a
3
+ size 11939611
README.md ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Luganda ASR
3
+ emoji: 🌍
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: gradio
7
+ sdk_version: 3.0.5
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ # Configuration
13
+
14
+ `title`: _string_
15
+ Display title for the Space
16
+
17
+ `emoji`: _string_
18
+ Space emoji (emoji-only character allowed)
19
+
20
+ `colorFrom`: _string_
21
+ Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
22
+
23
+ `colorTo`: _string_
24
+ Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
25
+
26
+ `sdk`: _string_
27
+ Can be either `gradio` or `streamlit`
28
+
29
+ `sdk_version` : _string_
30
+ Only applicable for `streamlit` SDK.
31
+ See [doc](https://hf.co/docs/hub/spaces) for more info on supported versions.
32
+
33
+ `app_file`: _string_
34
+ Path to your main application file (which contains either `gradio` or `streamlit` Python code).
35
+ Path is relative to the root of the repository.
36
+
37
+ `pinned`: _boolean_
38
+ Whether the Space stays on top of your list.
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import soundfile as sf
2
+ import torch
3
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
4
+ from pyctcdecode import build_ctcdecoder
5
+ import gradio as gr
6
+ import librosa
7
+ import os
8
+ from multiprocessing import Pool
9
+
10
+
11
+ class KenLM:
12
+ def __init__(self, tokenizer, model_name, num_workers=8, beam_width=128):
13
+ self.num_workers = num_workers
14
+ self.beam_width = beam_width
15
+ vocab_dict = tokenizer.get_vocab()
16
+ self.vocabulary = [x[0] for x in sorted(vocab_dict.items(), key=lambda x: x[1], reverse=False)]
17
+ # Workaround for wrong number of vocabularies:
18
+ self.vocabulary = self.vocabulary[:-2]
19
+ self.decoder = build_ctcdecoder(self.vocabulary, model_name)
20
+
21
+ @staticmethod
22
+ def lm_postprocess(text):
23
+ return ' '.join([x if len(x) > 1 else "" for x in text.split()]).strip()
24
+
25
+ def decode(self, logits):
26
+ probs = logits.cpu().numpy()
27
+ # probs = logits.numpy()
28
+ with Pool(self.num_workers) as pool:
29
+ text = self.decoder.decode_batch(pool, probs)
30
+ text = [KenLM.lm_postprocess(x) for x in text]
31
+ return text
32
+
33
+
34
+ def convert(inputfile, outfile):
35
+ target_sr = 16000
36
+ data, sample_rate = librosa.load(inputfile)
37
+ data = librosa.resample(data, orig_sr=sample_rate, target_sr=target_sr)
38
+ sf.write(outfile, data, target_sr)
39
+
40
+
41
+ api_token = os.getenv("API_TOKEN")
42
+ model_name = "indonesian-nlp/wav2vec2-luganda"
43
+ processor = Wav2Vec2Processor.from_pretrained(model_name, use_auth_token=api_token)
44
+ model = Wav2Vec2ForCTC.from_pretrained(model_name, use_auth_token=api_token)
45
+ kenlm = KenLM(processor.tokenizer, "5gram.bin")
46
+
47
+
48
+ def parse_transcription(wav_file):
49
+ filename = wav_file.name.split('.')[0]
50
+ convert(wav_file.name, filename + "16k.wav")
51
+ speech, _ = sf.read(filename + "16k.wav")
52
+ input_values = processor(speech, sampling_rate=16_000, return_tensors="pt").input_values
53
+ with torch.no_grad():
54
+ logits = model(input_values).logits
55
+ transcription = kenlm.decode(logits)[0]
56
+ return transcription
57
+
58
+
59
+ output = gr.outputs.Textbox(label="The transcript")
60
+
61
+ input_ = gr.inputs.Audio(source="microphone", type="file")
62
+
63
+ gr.Interface(parse_transcription, inputs=input_, outputs=[output],
64
+ analytics_enabled=False,
65
+ title="Automatic Speech Recognition for Luganda",
66
+ description="Speech Recognition Live Demo for Luganda",
67
+ article="This demo was built for the "
68
+ "<a href='https://zindi.africa/competitions/mozilla-luganda-automatic-speech-recognition' target='_blank'>Mozilla Luganda Automatic Speech Recognition Competition</a>. "
69
+ "It uses the <a href='https://huggingface.co/indonesian-nlp/wav2vec2-luganda' target='_blank'>indonesian-nlp/wav2vec2-luganda</a> model "
70
+ "which was fine-tuned on Luganda Common Voice speech datasets.",
71
+ enable_queue=True).launch(inline=False, server_name="0.0.0.0", show_tips=False, enable_queue=True)
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ libsndfile1
2
+ sox
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ soundfile
3
+ torch
4
+ transformers
5
+ librosa
6
+ sentencepiece
7
+ pyctcdecode==0.3.0
8
+ kenlm @ https://github.com/kpu/kenlm/archive/master.zip
unigrams.txt ADDED
The diff for this file is too large to render. See raw diff