File size: 898 Bytes
7cfb43e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
from datasets import load_dataset
import json
# Load the first few examples of the dataset
dataset = load_dataset("asierhv/composite_corpus_eu_v2.1", split="train", streaming=True)
# Get the first example
examples = []
for i, example in enumerate(dataset):
if i >= 3: # Get first 3 examples
break
examples.append(example)
# Print the structure and content
for i, example in enumerate(examples):
print(f"\nExample {i+1}:")
for key, value in example.items():
if key == "audio":
print(f"audio keys: {value.keys()}")
for audio_key, audio_value in value.items():
if isinstance(audio_value, bytes) or isinstance(audio_value, memoryview):
print(f" {audio_key}: <binary data>")
else:
print(f" {audio_key}: {audio_value}")
else:
print(f"{key}: {value}") |