DinoLiu commited on
Commit
0bbfa9a
1 Parent(s): e732be5
Files changed (1) hide show
  1. inference.py +53 -147
inference.py CHANGED
@@ -1,156 +1,62 @@
1
  import os
 
2
  import json
3
  import torch
4
- import numpy as np
5
  from transformers import BertTokenizer
6
- from sklearn.preprocessing import OneHotEncoder
7
- import transformers
8
- import torch
9
- import torch.nn as nn
10
- import torch.nn.functional as F
11
-
12
- class AttentionPool(nn.Module):
13
- def __init__(self, hidden_size):
14
- super().__init__()
15
- self.attention = nn.Linear(hidden_size, 1)
16
-
17
- def forward(self, last_hidden_state):
18
- attention_scores = self.attention(last_hidden_state).squeeze(-1)
19
- attention_weights = F.softmax(attention_scores, dim=1)
20
- pooled_output = torch.bmm(attention_weights.unsqueeze(1), last_hidden_state).squeeze(1)
21
- return pooled_output
22
 
23
- class MultiSampleDropout(nn.Module):
24
- def __init__(self, dropout=0.5, num_samples=5):
25
- super().__init__()
26
- self.dropout = nn.Dropout(dropout)
27
- self.num_samples = num_samples
28
-
29
- def forward(self, x):
30
- return torch.mean(torch.stack([self.dropout(x) for _ in range(self.num_samples)]), dim=0)
31
 
 
32
 
33
- class ImprovedBERTClass(nn.Module):
34
- def __init__(self, num_classes=13):
35
  super().__init__()
36
- self.bert = transformers.BertModel.from_pretrained('bert-base-uncased')
37
- self.attention_pool = AttentionPool(768)
38
- self.dropout = MultiSampleDropout()
39
- self.norm = nn.LayerNorm(768)
40
- self.classifier = nn.Linear(768, num_classes)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
- def forward(self, input_ids, attention_mask, token_type_ids):
43
- bert_output = self.bert(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
44
- pooled_output = self.attention_pool(bert_output.last_hidden_state)
45
- pooled_output = self.dropout(pooled_output)
46
- pooled_output = self.norm(pooled_output)
47
- logits = self.classifier(pooled_output)
48
- return logits
49
-
50
- def handler(data, context):
51
- """Handle incoming requests to the SageMaker endpoint."""
52
-
53
- if context.request_content_type != 'application/json':
54
- raise ValueError("This model only supports application/json input")
55
-
56
- # Set up device
57
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
58
-
59
- # Load model and tokenizer (consider caching these for better performance)
60
- model, tokenizer = load_model_and_tokenizer(context)
61
-
62
- # Process the input data
63
- input_data = json.loads(data.read().decode('utf-8'))
64
- query = input_data.get('text', '')
65
- k = input_data.get('k', 3) # Default to top 3 if not specified
66
-
67
- # Tokenize and prepare the input
68
- inputs = tokenizer.encode_plus(
69
- query,
70
- add_special_tokens=True,
71
- max_length=64,
72
- padding='max_length',
73
- return_tensors='pt',
74
- truncation=True
75
- )
76
- ids = inputs['input_ids'].to(device, dtype=torch.long)
77
- mask = inputs['attention_mask'].to(device, dtype=torch.long)
78
- token_type_ids = inputs['token_type_ids'].to(device, dtype=torch.long)
79
-
80
- # Make the prediction
81
- model.eval()
82
- with torch.no_grad():
83
- outputs = model(ids, mask, token_type_ids)
84
-
85
- # Apply sigmoid for multi-label classification
86
- probabilities = torch.sigmoid(outputs)
87
-
88
- # Convert to numpy array
89
- probabilities = probabilities.cpu().detach().numpy().flatten()
90
-
91
- # Get top k predictions
92
- top_k_indices = np.argsort(probabilities)[-k:][::-1]
93
- top_k_probs = probabilities[top_k_indices]
94
-
95
- # Create one-hot encodings for top k indices
96
- top_k_one_hot = np.zeros((k, len(probabilities)))
97
- for i, idx in enumerate(top_k_indices):
98
- top_k_one_hot[i, idx] = 1
99
-
100
- # Decode the top k predictions
101
- top_k_cards = [decode_vector(one_hot.reshape(1, -1)) for one_hot in top_k_one_hot]
102
-
103
- # Create a list of tuples (card, probability) for top k predictions
104
- top_k_predictions = list(zip(top_k_cards, top_k_probs.tolist()))
105
-
106
- # Determine the most likely card
107
- predicted_labels = (probabilities > 0.5).astype(int)
108
- if sum(predicted_labels) == 0:
109
- most_likely_card = "Answer"
110
- else:
111
- most_likely_card = decode_vector(predicted_labels.reshape(1, -1))
112
-
113
- # Prepare the response
114
- result = {
115
- "most_likely_card": most_likely_card,
116
- "top_k_predictions": top_k_predictions
117
- }
118
-
119
- return json.dumps(result), 'application/json'
120
-
121
-
122
- def load_model_and_tokenizer(context):
123
- """Load the PyTorch model and tokenizer."""
124
- global global_encoder
125
- labels = ['Videos', 'Unit Conversion', 'Translation', 'Shopping Product Comparison', 'Restaurants', 'Product', 'Information', 'Images', 'Gift', 'General Comparison', 'Flights', 'Answer', 'Aircraft Seat Map']
126
-
127
- model_dir = context.model_dir if hasattr(context, 'model_dir') else os.environ.get('SM_MODEL_DIR', '/opt/ml/model')
128
-
129
- # Load config and model
130
- config_path = os.path.join(model_dir, 'config.json')
131
- model_path = os.path.join(model_dir, 'model.pth')
132
-
133
- with open(config_path, 'r') as f:
134
- config = json.load(f)
135
-
136
- # Initialize the encoder and labels
137
- global_labels = labels
138
- labels_np = np.array(global_labels).reshape(-1, 1)
139
- global_encoder = OneHotEncoder(sparse_output=False)
140
- global_encoder.fit(labels_np)
141
-
142
- model = ImprovedBERTClass()
143
- model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
144
- model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
145
- model.eval()
146
-
147
- # Load tokenizer
148
- tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
149
-
150
- return model, tokenizer
151
-
152
-
153
- def decode_vector(vector):
154
- global global_encoder
155
- original_label = global_encoder.inverse_transform(vector)
156
- return original_label[0][0] # Returns the label as a string
 
1
  import os
2
+ import sys
3
  import json
4
  import torch
5
+ from ts.torch_handler.base_handler import BaseHandler
6
  from transformers import BertTokenizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ # Add the model directory to the Python path
9
+ model_dir = os.path.dirname(os.path.abspath(__file__))
10
+ sys.path.append(model_dir)
 
 
 
 
 
11
 
12
+ from model import ImprovedBERTClass # Ensure this import matches your model file name
13
 
14
+ class UICardMappingHandler(BaseHandler):
15
+ def __init__(self):
16
  super().__init__()
17
+ self.initialized = False
18
+
19
+ def initialize(self, context):
20
+ self.manifest = context.manifest
21
+ properties = context.system_properties
22
+ model_dir = properties.get("model_dir")
23
+ self.device = torch.device("cuda:" + str(properties.get("gpu_id")) if torch.cuda.is_available() else "cpu")
24
+
25
+ self.tokenizer = BertTokenizer.from_pretrained(model_dir)
26
+ self.model = ImprovedBERTClass()
27
+ self.model.load_state_dict(torch.load(os.path.join(model_dir, 'model.pth'), map_location=self.device))
28
+ self.model.to(self.device)
29
+ self.model.eval()
30
+
31
+ self.initialized = True
32
+
33
+ def preprocess(self, data):
34
+ text = data[0].get("data")
35
+ if text is None:
36
+ text = data[0].get("body")
37
+ inputs = self.tokenizer(text, return_tensors="pt", max_length=64, padding='max_length', truncation=True)
38
+ return inputs.to(self.device)
39
+
40
+ def inference(self, inputs):
41
+ with torch.no_grad():
42
+ outputs = self.model(**inputs)
43
+ return torch.sigmoid(outputs.logits)
44
+
45
+ def postprocess(self, inference_output):
46
+ probabilities = inference_output.cpu().numpy().flatten()
47
+ labels = ['Videos', 'Unit Conversion', 'Translation', 'Shopping Product Comparison', 'Restaurants', 'Product', 'Information', 'Images', 'Gift', 'General Comparison', 'Flights', 'Answer', 'Aircraft Seat Map']
48
 
49
+ top_k = 3 # You can adjust this value
50
+ top_k_indices = probabilities.argsort()[-top_k:][::-1]
51
+ top_k_probs = probabilities[top_k_indices]
52
+
53
+ top_k_predictions = [{"card": labels[i], "probability": float(p)} for i, p in zip(top_k_indices, top_k_probs)]
54
+
55
+ most_likely_card = "Answer" if sum(probabilities > 0.5) == 0 else labels[probabilities.argmax()]
56
+
57
+ result = {
58
+ "most_likely_card": most_likely_card,
59
+ "top_k_predictions": top_k_predictions
60
+ }
61
+
62
+ return [result]