Update README.md
Browse files
README.md
CHANGED
@@ -219,6 +219,7 @@ print(result["text"])
|
|
219 |
Kotoba-whisper can generate transcription with prompting as below:
|
220 |
|
221 |
```python
|
|
|
222 |
import torch
|
223 |
from transformers import pipeline
|
224 |
from datasets import load_dataset, Audio
|
@@ -243,16 +244,17 @@ pipe = pipeline(
|
|
243 |
dataset = load_dataset("japanese-asr/ja_asr.reazonspeech_test", split="test")
|
244 |
|
245 |
# --- Without prompt ---
|
246 |
-
|
247 |
-
print(
|
248 |
# 81ζ³γεεΌ·γθ΅°γγ«ε€γγ£γ¦γγΎγγ
|
249 |
|
250 |
# --- With prompt ---: Let's change `81` to `91`.
|
251 |
prompt = "91ζ³"
|
252 |
generate_kwargs['prompt_ids'] = pipe.tokenizer.get_prompt_ids(prompt, return_tensors="pt").to(device)
|
253 |
-
|
254 |
-
|
255 |
-
|
|
|
256 |
# γγ£γΆγ£γγ§γγΉγ«γ¬γγγ91ζ³γεεΌ·γθ΅°γγ«ε€γγ£γ¦γγΎγγ
|
257 |
```
|
258 |
|
|
|
219 |
Kotoba-whisper can generate transcription with prompting as below:
|
220 |
|
221 |
```python
|
222 |
+
import re
|
223 |
import torch
|
224 |
from transformers import pipeline
|
225 |
from datasets import load_dataset, Audio
|
|
|
244 |
dataset = load_dataset("japanese-asr/ja_asr.reazonspeech_test", split="test")
|
245 |
|
246 |
# --- Without prompt ---
|
247 |
+
text = pipe(dataset[10]["audio"], generate_kwargs=generate_kwargs)['text']
|
248 |
+
print(text)
|
249 |
# 81ζ³γεεΌ·γθ΅°γγ«ε€γγ£γ¦γγΎγγ
|
250 |
|
251 |
# --- With prompt ---: Let's change `81` to `91`.
|
252 |
prompt = "91ζ³"
|
253 |
generate_kwargs['prompt_ids'] = pipe.tokenizer.get_prompt_ids(prompt, return_tensors="pt").to(device)
|
254 |
+
text = pipe(dataset[10]["audio"], generate_kwargs=generate_kwargs)['text']
|
255 |
+
# currently the pipeline for ASR appends the prompt at the beginning of the transcription, so remove it
|
256 |
+
text = re.sub(rf"\A\s*{prompt}\s*", "", text)
|
257 |
+
print(text)
|
258 |
# γγ£γΆγ£γγ§γγΉγ«γ¬γγγ91ζ³γεεΌ·γθ΅°γγ«ε€γγ£γ¦γγΎγγ
|
259 |
```
|
260 |
|