Spaces:

TraceForce
/

varun-kd-finetune

Sleeping

Varun Wadhwa commited on 27 days ago

Commit

cb88b43

unverified ·

1 Parent(s): 157e35b

Logs

Files changed (1) hide show

app.py CHANGED Viewed

@@ -86,10 +86,11 @@ def align_labels_with_tokens(label, word_ids):
             aligned_label_ids.append(-100)
         elif word_idx != previous_word_idx:  # Only label the first token of a given word.
             if label[word_idx].startswith("B-"):
-                print(word_idx)
-                print(label[word_idx])
                 label[word_idx] = label[word_idx].replace("B-", "I-")
-            aligned_label_ids.append(label[word_idx])
         else:
             aligned_label_ids.append(-100)
         previous_word_idx = word_idx
@@ -112,7 +113,7 @@ def tokenize_function(examples):
     print("Printing partial input with tokenized output")
     print(inputs.tokens()[:1000])
     print(inputs.word_ids()[:1000])
-    print(new_labels[:1000])
     inputs["labels"] = new_labels
     return inputs

             aligned_label_ids.append(-100)
         elif word_idx != previous_word_idx:  # Only label the first token of a given word.
             if label[word_idx].startswith("B-"):
+                print("B vs I")
+                print(label2id[label[word_idx]])
                 label[word_idx] = label[word_idx].replace("B-", "I-")
+                print(label2id[label[word_idx]])
+            aligned_label_ids.append(label2id[label[word_idx]])
         else:
             aligned_label_ids.append(-100)
         previous_word_idx = word_idx
     print("Printing partial input with tokenized output")
     print(inputs.tokens()[:1000])
     print(inputs.word_ids()[:1000])
+    print(new_labels[0])
     inputs["labels"] = new_labels
     return inputs