farnazzeidi commited on
Commit
3098a9a
·
verified ·
1 Parent(s): 5d1c512

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +39 -13
README.md CHANGED
@@ -71,25 +71,51 @@ from transformers import AutoTokenizer, AutoModelForTokenClassification
71
  import torch
72
 
73
  # Load model and tokenizer
74
-
75
  tokenizer = AutoTokenizer.from_pretrained("pei-germany/MEDNER-de-fp-gbert")
76
  model = AutoModelForTokenClassification.from_pretrained("pei-germany/MEDNER-de-fp-gbert")
77
 
78
- text="Der Patient bekam den COVID-Impfstoff und nahm danach Aspirin."
 
 
79
  inputs = tokenizer(text, return_tensors="pt")
80
  outputs = model(**inputs)
81
 
82
- # Process logits and map predictions to labels
83
- predictions = [
84
- (token, model.config.id2label[label.item()])
85
- for token, label in zip(
86
- tokenizer.convert_ids_to_tokens(inputs["input_ids"][0]),
87
- torch.argmax(torch.softmax(outputs.logits, dim=-1), dim=-1)[0]
88
- )
89
- if token not in tokenizer.all_special_tokens
90
- ]
91
-
92
- print(predictions)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  ```
94
  ---
95
  # Authors
 
71
  import torch
72
 
73
  # Load model and tokenizer
 
74
  tokenizer = AutoTokenizer.from_pretrained("pei-germany/MEDNER-de-fp-gbert")
75
  model = AutoModelForTokenClassification.from_pretrained("pei-germany/MEDNER-de-fp-gbert")
76
 
77
+ text = "Der Patient wurde mit AstraZeneca geimpft und nahm anschließend Ibuprofen, um das Fieber zu senken."
78
+
79
+ # Tokenize and get predictions
80
  inputs = tokenizer(text, return_tensors="pt")
81
  outputs = model(**inputs)
82
 
83
+ # Decode tokens and predictions
84
+ tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
85
+ predictions = torch.argmax(outputs.logits, dim=2)[0].tolist()
86
+ labels = [model.config.id2label[pred] for pred in predictions]
87
+
88
+ # Process and merge subwords
89
+ entities = []
90
+ current_word = ""
91
+ current_entity = None
92
+
93
+ for token, label in zip(tokens, labels):
94
+ token = token.replace("##", "") # Remove subword markers
95
+
96
+ if label.startswith("B-"): # Beginning of a new entity
97
+ if current_entity and current_entity == label[2:]: # Merge consecutive B- labels
98
+ current_word += token
99
+ else: # Save the previous entity and start a new one
100
+ if current_word:
101
+ entities.append({"entity": current_entity, "word": current_word})
102
+ current_word = token
103
+ current_entity = label[2:]
104
+ elif label.startswith("I-") and current_entity == label[2:]: # Continuation of the same entity
105
+ current_word += token
106
+ else: # Outside any entity
107
+ if current_word: # Save the previous entity
108
+ entities.append({"entity": current_entity, "word": current_word})
109
+ current_word = ""
110
+ current_entity = None
111
+
112
+ if current_word: # Append the last entity
113
+ entities.append({"entity": current_entity, "word": current_word})
114
+
115
+ # Print results
116
+ for entity in entities:
117
+ print(f"Entity: {entity['entity']}, Word: {entity['word']}")
118
+
119
  ```
120
  ---
121
  # Authors