Commit
•
278e5c7
1
Parent(s):
5c5b7f5
Fix imports in multilingual examples
Browse files
README.md
CHANGED
@@ -226,7 +226,7 @@ transcription.
|
|
226 |
|
227 |
```python
|
228 |
>>> from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
229 |
-
>>> from datasets import load_dataset
|
230 |
>>> import torch
|
231 |
|
232 |
>>> # load model and processor
|
@@ -235,7 +235,7 @@ transcription.
|
|
235 |
|
236 |
>>> # load dummy dataset and read soundfiles
|
237 |
>>> ds = load_dataset("common_voice", "fr", split="test", streaming=True)
|
238 |
-
>>> ds = ds.cast_column("audio",
|
239 |
>>> input_speech = next(iter(ds))["audio"]["array"]
|
240 |
>>> model.config.forced_decoder_ids = processor.get_decoder_prompt_ids(language = "fr", task = "transcribe")
|
241 |
>>> input_features = processor(input_speech, return_tensors="pt").input_features
|
@@ -254,7 +254,7 @@ The "<|translate|>" is used as the first decoder input token to specify the tran
|
|
254 |
|
255 |
```python
|
256 |
>>> from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
257 |
-
>>> from datasets import load_dataset
|
258 |
>>> import torch
|
259 |
|
260 |
>>> # load model and processor
|
@@ -263,7 +263,7 @@ The "<|translate|>" is used as the first decoder input token to specify the tran
|
|
263 |
|
264 |
>>> # load dummy dataset and read soundfiles
|
265 |
>>> ds = load_dataset("common_voice", "fr", split="test", streaming=True)
|
266 |
-
>>> ds = ds.cast_column("audio",
|
267 |
>>> input_speech = next(iter(ds))["audio"]["array"]
|
268 |
>>> # tokenize
|
269 |
>>> input_features = processor(input_speech, return_tensors="pt").input_features
|
|
|
226 |
|
227 |
```python
|
228 |
>>> from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
229 |
+
>>> from datasets import Audio, load_dataset
|
230 |
>>> import torch
|
231 |
|
232 |
>>> # load model and processor
|
|
|
235 |
|
236 |
>>> # load dummy dataset and read soundfiles
|
237 |
>>> ds = load_dataset("common_voice", "fr", split="test", streaming=True)
|
238 |
+
>>> ds = ds.cast_column("audio", Audio(sampling_rate=16_000))
|
239 |
>>> input_speech = next(iter(ds))["audio"]["array"]
|
240 |
>>> model.config.forced_decoder_ids = processor.get_decoder_prompt_ids(language = "fr", task = "transcribe")
|
241 |
>>> input_features = processor(input_speech, return_tensors="pt").input_features
|
|
|
254 |
|
255 |
```python
|
256 |
>>> from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
257 |
+
>>> from datasets import Audio, load_dataset
|
258 |
>>> import torch
|
259 |
|
260 |
>>> # load model and processor
|
|
|
263 |
|
264 |
>>> # load dummy dataset and read soundfiles
|
265 |
>>> ds = load_dataset("common_voice", "fr", split="test", streaming=True)
|
266 |
+
>>> ds = ds.cast_column("audio", Audio(sampling_rate=16_000))
|
267 |
>>> input_speech = next(iter(ds))["audio"]["array"]
|
268 |
>>> # tokenize
|
269 |
>>> input_features = processor(input_speech, return_tensors="pt").input_features
|