pei-germany
/

MEDNER-de-fp-gbert

Token Classification

Model card Files Files and versions Community

farnazzeidi commited on 10 days ago

Commit

3098a9a

·

verified ·

1 Parent(s): 5d1c512

Update README.md

Files changed (1) hide show

README.md +39 -13

README.md CHANGED Viewed

@@ -71,25 +71,51 @@ from transformers import AutoTokenizer, AutoModelForTokenClassification
 import torch
 # Load model and tokenizer
 tokenizer = AutoTokenizer.from_pretrained("pei-germany/MEDNER-de-fp-gbert")
 model = AutoModelForTokenClassification.from_pretrained("pei-germany/MEDNER-de-fp-gbert")
-text="Der Patient bekam den COVID-Impfstoff und nahm danach Aspirin."
 inputs = tokenizer(text, return_tensors="pt")
 outputs = model(**inputs)
-# Process logits and map predictions to labels
-predictions = [
-    (token, model.config.id2label[label.item()])
-    for token, label in zip(
-        tokenizer.convert_ids_to_tokens(inputs["input_ids"][0]),
-        torch.argmax(torch.softmax(outputs.logits, dim=-1), dim=-1)[0]
-    )
-    if token not in tokenizer.all_special_tokens
-]
-print(predictions)
 ```
 ---
 # Authors

 import torch
 # Load model and tokenizer
 tokenizer = AutoTokenizer.from_pretrained("pei-germany/MEDNER-de-fp-gbert")
 model = AutoModelForTokenClassification.from_pretrained("pei-germany/MEDNER-de-fp-gbert")
+text = "Der Patient wurde mit AstraZeneca geimpft und nahm anschließend Ibuprofen, um das Fieber zu senken."
+# Tokenize and get predictions
 inputs = tokenizer(text, return_tensors="pt")
 outputs = model(**inputs)
+# Decode tokens and predictions
+tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
+predictions = torch.argmax(outputs.logits, dim=2)[0].tolist()
+labels = [model.config.id2label[pred] for pred in predictions]
+# Process and merge subwords
+entities = []
+current_word = ""
+current_entity = None
+for token, label in zip(tokens, labels):
+    token = token.replace("##", "")  # Remove subword markers
+    if label.startswith("B-"):  # Beginning of a new entity
+        if current_entity and current_entity == label[2:]:  # Merge consecutive B- labels
+            current_word += token
+        else:  # Save the previous entity and start a new one
+            if current_word:
+                entities.append({"entity": current_entity, "word": current_word})
+            current_word = token
+            current_entity = label[2:]
+    elif label.startswith("I-") and current_entity == label[2:]:  # Continuation of the same entity
+        current_word += token
+    else:  # Outside any entity
+        if current_word:  # Save the previous entity
+            entities.append({"entity": current_entity, "word": current_word})
+        current_word = ""
+        current_entity = None
+if current_word:  # Append the last entity
+    entities.append({"entity": current_entity, "word": current_word})
+# Print results
+for entity in entities:
+    print(f"Entity: {entity['entity']}, Word: {entity['word']}")
 ```
 ---
 # Authors