nileshhanotia commited on
Commit
ca9766d
1 Parent(s): baaa181

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -0
app.py CHANGED
@@ -65,6 +65,15 @@ def create_dataset(data, tokenizer, max_length):
65
  texts = [item.get('prompt', '') for item in data]
66
  labels = [item.get('label', -1) for item in data]
67
 
 
 
 
 
 
 
 
 
 
68
  encodings = tokenizer(texts, truncation=True, padding='max_length', max_length=max_length)
69
  dataset = Dataset.from_dict({
70
  'input_ids': encodings['input_ids'],
@@ -73,6 +82,7 @@ def create_dataset(data, tokenizer, max_length):
73
  })
74
  return dataset
75
 
 
76
  def split_data(data, test_size=0.2):
77
  if not data:
78
  raise ValueError("Data is empty, cannot split.")
 
65
  texts = [item.get('prompt', '') for item in data]
66
  labels = [item.get('label', -1) for item in data]
67
 
68
+ # Debugging: Print out labels to check for invalid values
69
+ print(f"Labels before adjustment: {labels}")
70
+
71
+ # Ensure all labels are within the valid range
72
+ labels = [label if 0 <= label < num_labels else 0 for label in labels]
73
+
74
+ # Debugging: Print out adjusted labels
75
+ print(f"Labels after adjustment: {labels}")
76
+
77
  encodings = tokenizer(texts, truncation=True, padding='max_length', max_length=max_length)
78
  dataset = Dataset.from_dict({
79
  'input_ids': encodings['input_ids'],
 
82
  })
83
  return dataset
84
 
85
+
86
  def split_data(data, test_size=0.2):
87
  if not data:
88
  raise ValueError("Data is empty, cannot split.")