Text Classification
Transformers
PyTorch
bert
Inference Endpoints
rttl commited on
Commit
79ff564
1 Parent(s): e10f5dc

Upload foodybert.py

Browse files
Files changed (1) hide show
  1. foodybert.py +147 -0
foodybert.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from sklearn.metrics import classification_report
3
+ import torch.nn as nn
4
+ import transformers
5
+ from transformers import BertModel, BertTokenizer, BertForSequenceClassification
6
+ import numpy as np
7
+ from datasets import load_dataset, load_metric
8
+
9
+ import math
10
+ import warnings
11
+ from dataclasses import dataclass
12
+ import torch
13
+ import torch.utils.checkpoint
14
+ from packaging import version
15
+ from torch import nn
16
+ from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
17
+ from typing import List, Optional, Tuple, Union
18
+ import torch
19
+
20
+ from transformers.modeling_outputs import (
21
+ BaseModelOutputWithPastAndCrossAttentions,
22
+ BaseModelOutputWithPoolingAndCrossAttentions,
23
+ CausalLMOutputWithCrossAttentions,
24
+ MaskedLMOutput,
25
+ MultipleChoiceModelOutput,
26
+ NextSentencePredictorOutput,
27
+ QuestionAnsweringModelOutput,
28
+ SequenceClassifierOutput,
29
+ TokenClassifierOutput,
30
+ )
31
+
32
+
33
+ class FoodyBertForSequenceClassification(BertForSequenceClassification):
34
+ def __init__(self, config):
35
+ super().__init__(config)
36
+
37
+ self.num_labels = config.num_labels
38
+ self.config = config
39
+
40
+ self.bert = BertModel(config)
41
+ classifier_dropout = (
42
+ config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob
43
+ )
44
+
45
+ self.pre_classifier = torch.nn.Linear(4*config.hidden_size, 4*config.hidden_size)
46
+ self.tanh = nn.Tanh()
47
+ #self.relu = nn.ReLU()
48
+ self.dropout = nn.Dropout(classifier_dropout)
49
+ self.classifier = nn.Linear(4*config.hidden_size, config.num_labels)
50
+
51
+ self.post_init()
52
+ def post_init(self):
53
+ pass
54
+ def forward(
55
+ self,
56
+ input_ids: Optional[torch.Tensor] = None,
57
+ attention_mask: Optional[torch.Tensor] = None,
58
+ token_type_ids: Optional[torch.Tensor] = None,
59
+ position_ids: Optional[torch.Tensor] = None,
60
+ head_mask: Optional[torch.Tensor] = None,
61
+ inputs_embeds: Optional[torch.Tensor] = None,
62
+ labels: Optional[torch.Tensor] = None,
63
+ output_attentions: Optional[bool] = None,
64
+ output_hidden_states: Optional[bool] = None,
65
+ return_dict: Optional[bool] = None,
66
+ ) -> Union[Tuple[torch.Tensor], SequenceClassifierOutput]:
67
+ r"""
68
+ labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
69
+ Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
70
+ config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
71
+ `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
72
+ """
73
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
74
+
75
+ outputs = self.bert(
76
+ input_ids,
77
+ attention_mask=attention_mask,
78
+ token_type_ids=token_type_ids,
79
+ position_ids=position_ids,
80
+ head_mask=head_mask,
81
+ inputs_embeds=inputs_embeds,
82
+ output_attentions=output_attentions,
83
+ output_hidden_states=output_hidden_states,
84
+ return_dict=return_dict,
85
+ )
86
+
87
+ # outputs is a tuple contains(last_hidden_state, pooler_output,hidden_states...+3more)
88
+ # [0] last_hidden_state -> tensor [batch, #tokens, 768]
89
+ # [1] pooler_output -> tensor [1, 768]
90
+ # [2] hidden_states -> tuple with 13 tensors of size [batch, #tokens,768]
91
+
92
+ # use only CLS
93
+ #pooled_output = outputs[1]
94
+
95
+ #average actoss tokens at the last layer
96
+ #last_state = outputs[0]
97
+ #pooled_output = torch.mean(last_state,1)
98
+
99
+ # use hidden_states and concatenate layers -> change classifier dimensions!
100
+ hidden_states = outputs[2]
101
+
102
+ #concatenate 4 layers and average tokens
103
+ pooled_output = torch.cat(tuple([hidden_states[i] for i in [-4,-3,-2,-1]]), dim = -1)
104
+ pooled_output = torch.mean(pooled_output,1)
105
+
106
+ #concatenate 4 layers and use CLS
107
+ #pooled_output = torch.cat(tuple([hidden_states[i] for i in [-4,-3,-2,-1]]), dim = -1)
108
+ #pooled_output = pooled_output[:, 0, :]
109
+
110
+ pooled_output = self.pre_classifier(pooled_output)
111
+ pooled_output = self.tanh(pooled_output)
112
+ #pooled_output = self.relu(pooled_output)
113
+ pooled_output = self.dropout(pooled_output)
114
+ logits = self.classifier(pooled_output)
115
+
116
+ loss = None
117
+ if labels is not None:
118
+ if self.config.problem_type is None:
119
+ if self.num_labels == 1:
120
+ self.config.problem_type = "regression"
121
+ elif self.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
122
+ self.config.problem_type = "single_label_classification"
123
+ else:
124
+ self.config.problem_type = "multi_label_classification"
125
+
126
+ if self.config.problem_type == "regression":
127
+ loss_fct = MSELoss()
128
+ if self.num_labels == 1:
129
+ loss = loss_fct(logits.squeeze(), labels.squeeze())
130
+ else:
131
+ loss = loss_fct(logits, labels)
132
+ elif self.config.problem_type == "single_label_classification":
133
+ loss_fct = CrossEntropyLoss()
134
+ loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
135
+ elif self.config.problem_type == "multi_label_classification":
136
+ loss_fct = BCEWithLogitsLoss()
137
+ loss = loss_fct(logits, labels)
138
+ if not return_dict:
139
+ output = (logits,) + outputs[2:]
140
+ return ((loss,) + output) if loss is not None else output
141
+
142
+ return SequenceClassifierOutput(
143
+ loss=loss,
144
+ logits=logits,
145
+ hidden_states=outputs.hidden_states,
146
+ attentions=outputs.attentions,
147
+ )