File size: 898 Bytes
7cfb43e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from datasets import load_dataset
import json

# Load the first few examples of the dataset
dataset = load_dataset("asierhv/composite_corpus_eu_v2.1", split="train", streaming=True)

# Get the first example
examples = []
for i, example in enumerate(dataset):
    if i >= 3:  # Get first 3 examples
        break
    examples.append(example)

# Print the structure and content
for i, example in enumerate(examples):
    print(f"\nExample {i+1}:")
    for key, value in example.items():
        if key == "audio":
            print(f"audio keys: {value.keys()}")
            for audio_key, audio_value in value.items():
                if isinstance(audio_value, bytes) or isinstance(audio_value, memoryview):
                    print(f"  {audio_key}: <binary data>")
                else:
                    print(f"  {audio_key}: {audio_value}")
        else:
            print(f"{key}: {value}")