Spaces:
Sleeping
Sleeping
nileshhanotia
commited on
Commit
•
ca9766d
1
Parent(s):
baaa181
Update app.py
Browse files
app.py
CHANGED
@@ -65,6 +65,15 @@ def create_dataset(data, tokenizer, max_length):
|
|
65 |
texts = [item.get('prompt', '') for item in data]
|
66 |
labels = [item.get('label', -1) for item in data]
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
encodings = tokenizer(texts, truncation=True, padding='max_length', max_length=max_length)
|
69 |
dataset = Dataset.from_dict({
|
70 |
'input_ids': encodings['input_ids'],
|
@@ -73,6 +82,7 @@ def create_dataset(data, tokenizer, max_length):
|
|
73 |
})
|
74 |
return dataset
|
75 |
|
|
|
76 |
def split_data(data, test_size=0.2):
|
77 |
if not data:
|
78 |
raise ValueError("Data is empty, cannot split.")
|
|
|
65 |
texts = [item.get('prompt', '') for item in data]
|
66 |
labels = [item.get('label', -1) for item in data]
|
67 |
|
68 |
+
# Debugging: Print out labels to check for invalid values
|
69 |
+
print(f"Labels before adjustment: {labels}")
|
70 |
+
|
71 |
+
# Ensure all labels are within the valid range
|
72 |
+
labels = [label if 0 <= label < num_labels else 0 for label in labels]
|
73 |
+
|
74 |
+
# Debugging: Print out adjusted labels
|
75 |
+
print(f"Labels after adjustment: {labels}")
|
76 |
+
|
77 |
encodings = tokenizer(texts, truncation=True, padding='max_length', max_length=max_length)
|
78 |
dataset = Dataset.from_dict({
|
79 |
'input_ids': encodings['input_ids'],
|
|
|
82 |
})
|
83 |
return dataset
|
84 |
|
85 |
+
|
86 |
def split_data(data, test_size=0.2):
|
87 |
if not data:
|
88 |
raise ValueError("Data is empty, cannot split.")
|