Upload BERT_CRF

Browse files

Files changed (3) hide show

config.json +37 -0
model.py +85 -0
pytorch_model.bin +3 -0

config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "_name_or_path": "/notebooks/src/hugging_face_pipeline/BERT-CRF/out/model",
+  "architectures": [
+    "BERT_CRF"
+  ],
+  "auto_map": {
+    "AutoConfig": "model.BERT_CRF_Config",
+    "AutoModelForTokenClassification": "model.BERT_CRF"
+  },
+  "bert_name": "neuralmind/bert-large-portuguese-cased",
+  "id2label": {
+    "0": "O",
+    "1": "B-PER",
+    "2": "I-PER",
+    "3": "B-ORG",
+    "4": "I-ORG",
+    "5": "B-LOC",
+    "6": "I-LOC",
+    "7": "B-MISC",
+    "8": "I-MISC"
+  },
+  "label2id": {
+    "B-LOC": 5,
+    "B-MISC": 7,
+    "B-ORG": 3,
+    "B-PER": 1,
+    "I-LOC": 6,
+    "I-MISC": 8,
+    "I-ORG": 4,
+    "I-PER": 2,
+    "O": 0
+  },
+  "model_name": "BERT_CRF",
+  "model_type": "BERT_CRF",
+  "torch_dtype": "float32",
+  "transformers_version": "4.29.2"
+}

model.py ADDED Viewed

	@@ -0,0 +1,85 @@

+from torch import nn
+from transformers import PreTrainedModel, PretrainedConfig
+from transformers import BertModel, BertConfig
+from transformers import AutoModelForTokenClassification, AutoConfig
+from torchcrf import CRF
+class BERT_CRF_Config(PretrainedConfig):
+    model_type = "BERT_CRF"
+    def __init__(self, **kwarg):
+        super().__init__(**kwarg)
+        self.model_name = "BERT_CRF"
+class BERT_CRF(PreTrainedModel):
+    config_class = BERT_CRF_Config
+    def __init__(self, config):
+        super().__init__(config)
+        bert_config = BertConfig.from_pretrained(config.bert_name)
+        bert_config.output_attentions = True
+        bert_config.output_hidden_states = True
+        self.bert = BertModel.from_pretrained(config.bert_name, config=bert_config)
+        self.dropout = nn.Dropout(p=0.5)
+        self.linear = nn.Linear(
+            self.bert.config.hidden_size, config.num_labels)
+        self.crf = CRF(config.num_labels, batch_first=True)
+    def forward(self, input_ids, token_type_ids, attention_mask, labels, labels_mask):
+        last_hidden_layer = self.bert(input_ids=input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask)[
+            'last_hidden_state']
+        last_hidden_layer = self.dropout(last_hidden_layer)
+        logits = self.linear(last_hidden_layer)
+        batch_size = logits.shape[0]
+        output_tags = []
+        if labels is not None:
+            loss = 0
+            for seq_logits, seq_labels, seq_mask in zip(logits, labels, labels_mask):
+                # Index logits and labels using prediction mask to pass only the
+                # first subtoken of each word to CRF.
+                seq_logits = seq_logits[seq_mask].unsqueeze(0)
+                seq_labels = seq_labels[seq_mask].unsqueeze(0)
+                if seq_logits.numel() != 0:
+                    loss -= self.crf(seq_logits, seq_labels,
+                                     reduction='token_mean')
+            return loss / batch_size
+        else:
+            for seq_logits, seq_mask in zip(logits, labels_mask):
+                seq_logits = seq_logits[seq_mask].unsqueeze(0)
+                if seq_logits.numel() != 0:
+                    tags = self.crf.decode(seq_logits)
+                else:
+                    tags = [[]]
+                # Unpack "batch" results
+                output_tags.append(tags[0])
+            return output_tags
+class ModelRegisterStep():
+    def __call__(self, args):
+        AutoConfig.register("BERT_CRF", BERT_CRF_Config)
+        AutoModelForTokenClassification.register(BERT_CRF_Config, BERT_CRF)
+        return {
+            **args,
+        }

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5996a9b88e08415a34eef073d5708a7b916cebc099aa379d3a4f1051cdcafb0e
+size 1337754151