arubenruben commited on
Commit
b06c1b9
1 Parent(s): f356772

Update deploy_pipeline.py

Browse files
Files changed (1) hide show
  1. deploy_pipeline.py +10 -12
deploy_pipeline.py CHANGED
@@ -13,15 +13,14 @@ class TokenizeAndAlignLabelsStep():
13
 
14
  # Adapted From : https://huggingface.co/docs/transformers/tasks/token_classification
15
  def tokenize_and_align_labels(self, examples, tokenizer):
16
-
17
- tokenized_inputs = tokenizer(examples, padding='max_length', truncation=True, max_length=128)
18
 
19
  # Map tokens to their respective word.
20
  word_ids = tokenized_inputs.word_ids()
21
 
22
  previous_word_idx = None
23
-
24
-
25
  labels_mask = []
26
 
27
  for word_idx in word_ids: # Set the special tokens to -100.
@@ -35,28 +34,25 @@ class TokenizeAndAlignLabelsStep():
35
 
36
  previous_word_idx = word_idx
37
 
38
- tokenized_inputs["tokens"] = tokenizer.decode(tokenized_inputs["input_ids"], skip_special_tokens=True)
39
  tokenized_inputs["labels_mask"] = labels_mask
40
 
41
  return tokenized_inputs
42
 
43
 
44
 
45
-
46
  class BERT_CRF_Pipeline(Pipeline):
47
 
48
  def _sanitize_parameters(self, **kwargs):
49
  return {}, {}, {}
50
 
51
- def preprocess(self, text):
 
52
 
53
  tokenizer = AutoTokenizer.from_pretrained(
54
  "neuralmind/bert-base-portuguese-cased", do_lower_case=False)
 
 
55
 
56
- TokenizeAndAlignLabelsStep().tokenize_and_align_labels(
57
- examples=text, tokenizer=tokenizer)
58
-
59
- return TokenizeAndAlignLabelsStep().tokenize_and_align_labels(examples=text, tokenizer=tokenizer)
60
 
61
  def _forward(self, tokenizer_results):
62
 
@@ -79,13 +75,15 @@ class BERT_CRF_Pipeline(Pipeline):
79
  return outputs
80
 
81
  def postprocess(self, model_outputs):
 
82
  # From Ner_tags to Ner_labels
83
  for i, label in enumerate(model_outputs[0]):
84
  model_outputs[0][i] = self.model.config.id2label[label]
85
-
86
  return model_outputs[0]
87
 
88
 
 
89
  def main():
90
 
91
  PIPELINE_REGISTRY.register_pipeline("PT-BERT-Large-CRF-Conll2003-pipeline",
 
13
 
14
  # Adapted From : https://huggingface.co/docs/transformers/tasks/token_classification
15
  def tokenize_and_align_labels(self, examples, tokenizer):
16
+
17
+ tokenized_inputs = tokenizer(examples, padding='max_length', truncation=True, max_length=128, is_split_into_words=True)
18
 
19
  # Map tokens to their respective word.
20
  word_ids = tokenized_inputs.word_ids()
21
 
22
  previous_word_idx = None
23
+
 
24
  labels_mask = []
25
 
26
  for word_idx in word_ids: # Set the special tokens to -100.
 
34
 
35
  previous_word_idx = word_idx
36
 
 
37
  tokenized_inputs["labels_mask"] = labels_mask
38
 
39
  return tokenized_inputs
40
 
41
 
42
 
 
43
  class BERT_CRF_Pipeline(Pipeline):
44
 
45
  def _sanitize_parameters(self, **kwargs):
46
  return {}, {}, {}
47
 
48
+ def preprocess(self, inputs):
49
+ tokens = inputs['tokens']
50
 
51
  tokenizer = AutoTokenizer.from_pretrained(
52
  "neuralmind/bert-base-portuguese-cased", do_lower_case=False)
53
+
54
+ return TokenizeAndAlignLabelsStep().tokenize_and_align_labels(examples=tokens, tokenizer=tokenizer)
55
 
 
 
 
 
56
 
57
  def _forward(self, tokenizer_results):
58
 
 
75
  return outputs
76
 
77
  def postprocess(self, model_outputs):
78
+
79
  # From Ner_tags to Ner_labels
80
  for i, label in enumerate(model_outputs[0]):
81
  model_outputs[0][i] = self.model.config.id2label[label]
82
+
83
  return model_outputs[0]
84
 
85
 
86
+
87
  def main():
88
 
89
  PIPELINE_REGISTRY.register_pipeline("PT-BERT-Large-CRF-Conll2003-pipeline",