DinoLiu commited on
Commit
02c198f
1 Parent(s): bc7142a

add the inference file

Browse files
Files changed (4) hide show
  1. inference.py +115 -0
  2. model.py +1 -1
  3. pytorch_model.bin +2 -2
  4. requirements.txt +3 -1
inference.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import torch
4
+ import numpy as np
5
+ from transformers import BertTokenizer
6
+ from model import ImprovedBERTClass # Ensure this import matches your model file name
7
+ from sklearn.preprocessing import OneHotEncoder
8
+
9
+ def handler(data, context):
10
+ """Handle incoming requests to the SageMaker endpoint."""
11
+
12
+ if context.request_content_type != 'application/json':
13
+ raise ValueError("This model only supports application/json input")
14
+
15
+ # Set up device
16
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
17
+
18
+ # Load model and tokenizer (consider caching these for better performance)
19
+ model, tokenizer = load_model_and_tokenizer(context)
20
+
21
+ # Process the input data
22
+ input_data = json.loads(data.read().decode('utf-8'))
23
+ query = input_data.get('text', '')
24
+ k = input_data.get('k', 3) # Default to top 3 if not specified
25
+
26
+ # Tokenize and prepare the input
27
+ inputs = tokenizer.encode_plus(
28
+ query,
29
+ add_special_tokens=True,
30
+ max_length=64,
31
+ padding='max_length',
32
+ return_tensors='pt',
33
+ truncation=True
34
+ )
35
+ ids = inputs['input_ids'].to(device, dtype=torch.long)
36
+ mask = inputs['attention_mask'].to(device, dtype=torch.long)
37
+ token_type_ids = inputs['token_type_ids'].to(device, dtype=torch.long)
38
+
39
+ # Make the prediction
40
+ model.eval()
41
+ with torch.no_grad():
42
+ outputs = model(ids, mask, token_type_ids)
43
+
44
+ # Apply sigmoid for multi-label classification
45
+ probabilities = torch.sigmoid(outputs)
46
+
47
+ # Convert to numpy array
48
+ probabilities = probabilities.cpu().detach().numpy().flatten()
49
+
50
+ # Get top k predictions
51
+ top_k_indices = np.argsort(probabilities)[-k:][::-1]
52
+ top_k_probs = probabilities[top_k_indices]
53
+
54
+ # Create one-hot encodings for top k indices
55
+ top_k_one_hot = np.zeros((k, len(probabilities)))
56
+ for i, idx in enumerate(top_k_indices):
57
+ top_k_one_hot[i, idx] = 1
58
+
59
+ # Decode the top k predictions
60
+ top_k_cards = [decode_vector(one_hot.reshape(1, -1)) for one_hot in top_k_one_hot]
61
+
62
+ # Create a list of tuples (card, probability) for top k predictions
63
+ top_k_predictions = list(zip(top_k_cards, top_k_probs.tolist()))
64
+
65
+ # Determine the most likely card
66
+ predicted_labels = (probabilities > 0.5).astype(int)
67
+ if sum(predicted_labels) == 0:
68
+ most_likely_card = "Answer"
69
+ else:
70
+ most_likely_card = decode_vector(predicted_labels.reshape(1, -1))
71
+
72
+ # Prepare the response
73
+ result = {
74
+ "most_likely_card": most_likely_card,
75
+ "top_k_predictions": top_k_predictions
76
+ }
77
+
78
+ return json.dumps(result), 'application/json'
79
+
80
+
81
+ def load_model_and_tokenizer(context):
82
+ """Load the PyTorch model and tokenizer."""
83
+ global global_encoder
84
+ labels = ['Videos', 'Unit Conversion', 'Translation', 'Shopping Product Comparison', 'Restaurants', 'Product', 'Information', 'Images', 'Gift', 'General Comparison', 'Flights', 'Answer', 'Aircraft Seat Map']
85
+
86
+ model_dir = context.model_dir if hasattr(context, 'model_dir') else os.environ.get('SM_MODEL_DIR', '/opt/ml/model')
87
+
88
+ # Load config and model
89
+ config_path = os.path.join(model_dir, 'config.json')
90
+ model_path = os.path.join(model_dir, 'pytorch_model.bin')
91
+
92
+ with open(config_path, 'r') as f:
93
+ config = json.load(f)
94
+
95
+ # Initialize the encoder and labels
96
+ global_labels = labels
97
+ labels_np = np.array(global_labels).reshape(-1, 1)
98
+ global_encoder = OneHotEncoder(sparse_output=False)
99
+ global_encoder.fit(labels_np)
100
+
101
+ model = ImprovedBERTClass()
102
+ model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
103
+ model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
104
+ model.eval()
105
+
106
+ # Load tokenizer
107
+ tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
108
+
109
+ return model, tokenizer
110
+
111
+
112
+ def decode_vector(vector):
113
+ global global_encoder
114
+ original_label = global_encoder.inverse_transform(vector)
115
+ return original_label[0][0] # Returns the label as a string
model.py CHANGED
@@ -1,8 +1,8 @@
1
 
2
  import transformers
3
  import torch
4
- from transformers import BertTokenizer, BertModel, BertConfig
5
  import torch.nn as nn
 
6
 
7
  class AttentionPool(nn.Module):
8
  def __init__(self, hidden_size):
 
1
 
2
  import transformers
3
  import torch
 
4
  import torch.nn as nn
5
+ import torch.nn.functional as F
6
 
7
  class AttentionPool(nn.Module):
8
  def __init__(self, hidden_size):
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb04d745e5cccb78ac9c40836014b9ea1a861e4b435c798ddd27cd8f2514ef5e
3
- size 438063716
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a3e55ee4b24285f21c615afd035ed1a89ed9016ff73dbe669313a643b5b5250
3
+ size 438062398
requirements.txt CHANGED
@@ -1,2 +1,4 @@
 
1
  torch==1.9.0
2
- transformers==4.11.3
 
 
1
+ numpy==1.21.0
2
  torch==1.9.0
3
+ transformers==4.9.2
4
+ scikit-learn==0.24.2