DinoLiu commited on
Commit
a94233d
1 Parent(s): 02c198f

update the model

Browse files
Files changed (6) hide show
  1. README.md +42 -1
  2. handler.py +111 -0
  3. inference.py +1 -1
  4. pytorch_model.bin → model.pth +0 -0
  5. model.py +1 -1
  6. requirements.txt +4 -4
README.md CHANGED
@@ -11,4 +11,45 @@ metrics:
11
  - accuracy
12
  - f1
13
  ---
14
- This a fine-tuned bert model for card mapping in genUI.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  - accuracy
12
  - f1
13
  ---
14
+ # Fine-tuned BERT Model for Card Mapping in genUI
15
+
16
+ This repository contains a fine-tuned BERT model for card mapping in genUI.
17
+
18
+ ## Model Details
19
+
20
+ - **Language**: English
21
+ - **Framework**: PyTorch
22
+ - **Task**: Text Classification
23
+ - **Model Type**: Custom BERT
24
+ - **Datasets**: Custom
25
+ - **Metrics**: Accuracy, F1 Score
26
+
27
+ ## Getting Started
28
+
29
+ ### Prerequisites
30
+
31
+ - Python 3.7+
32
+ - PyTorch
33
+ - Transformers library
34
+
35
+ ### Installation
36
+
37
+ 1. Clone the repository:
38
+
39
+ git clone https://github.com/yourusername/genui-card-mapping.git
40
+
41
+ cd genui-card-mapping
42
+
43
+ 2. Install the required packages:
44
+
45
+ pip install -r requirements.txt
46
+
47
+ ### Usage
48
+
49
+ 1. Run the inference script:
50
+
51
+ python inference.py
52
+
53
+ You may need to modify the inference.py to fit your needs.
54
+
55
+ ##ex
handler.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import torch
4
+ import numpy as np
5
+ from transformers import BertTokenizer
6
+ from ts.torch_handler.base_handler import BaseHandler
7
+ from model import ImprovedBERTClass
8
+ from sklearn.preprocessing import OneHotEncoder
9
+
10
+ class UICardMappingHandler(BaseHandler):
11
+ def __init__(self):
12
+ super().__init__()
13
+ self.initialized = False
14
+
15
+ def initialize(self, context):
16
+ self.manifest = context.manifest
17
+ properties = context.system_properties
18
+ model_dir = properties.get("model_dir")
19
+
20
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
21
+
22
+ # Load config
23
+ with open(os.path.join(model_dir, 'config.json'), 'r') as f:
24
+ self.config = json.load(f)
25
+
26
+ # Initialize encoder and labels
27
+ self.labels = ['Videos', 'Unit Conversion', 'Translation', 'Shopping Product Comparison', 'Restaurants', 'Product', 'Information', 'Images', 'Gift', 'General Comparison', 'Flights', 'Answer', 'Aircraft Seat Map']
28
+ labels_np = np.array(self.labels).reshape(-1, 1)
29
+ self.encoder = OneHotEncoder(sparse_output=False)
30
+ self.encoder.fit(labels_np)
31
+
32
+ # Load model
33
+ self.model = ImprovedBERTClass()
34
+ self.model.load_state_dict(torch.load(os.path.join(model_dir, 'model.pth'), map_location=self.device))
35
+ self.model.to(self.device)
36
+ self.model.eval()
37
+
38
+ # Load tokenizer
39
+ self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
40
+
41
+ self.initialized = True
42
+
43
+ def preprocess(self, data):
44
+ text = data[0].get("body").get("text", "")
45
+ k = data[0].get("body").get("k", 3)
46
+
47
+ inputs = self.tokenizer.encode_plus(
48
+ text,
49
+ add_special_tokens=True,
50
+ max_length=64,
51
+ padding='max_length',
52
+ return_tensors='pt',
53
+ truncation=True
54
+ )
55
+
56
+ return {
57
+ "ids": inputs['input_ids'].to(self.device, dtype=torch.long),
58
+ "mask": inputs['attention_mask'].to(self.device, dtype=torch.long),
59
+ "token_type_ids": inputs['token_type_ids'].to(self.device, dtype=torch.long),
60
+ "k": k
61
+ }
62
+
63
+ def inference(self, data):
64
+ with torch.no_grad():
65
+ outputs = self.model(data["ids"], data["mask"], data["token_type_ids"])
66
+ probabilities = torch.sigmoid(outputs)
67
+ return probabilities.cpu().detach().numpy().flatten(), data["k"]
68
+
69
+ def postprocess(self, inference_output):
70
+ probabilities, k = inference_output
71
+
72
+ # Get top k predictions
73
+ top_k_indices = np.argsort(probabilities)[-k:][::-1]
74
+ top_k_probs = probabilities[top_k_indices]
75
+
76
+ # Create one-hot encodings for top k indices
77
+ top_k_one_hot = np.zeros((k, len(probabilities)))
78
+ for i, idx in enumerate(top_k_indices):
79
+ top_k_one_hot[i, idx] = 1
80
+
81
+ # Decode the top k predictions
82
+ top_k_cards = [self.decode_vector(one_hot.reshape(1, -1)) for one_hot in top_k_one_hot]
83
+
84
+ # Create a list of tuples (card, probability) for top k predictions
85
+ top_k_predictions = list(zip(top_k_cards, top_k_probs.tolist()))
86
+
87
+ # Determine the most likely card
88
+ predicted_labels = (probabilities > 0.5).astype(int)
89
+ if sum(predicted_labels) == 0:
90
+ most_likely_card = "Answer"
91
+ else:
92
+ most_likely_card = self.decode_vector(predicted_labels.reshape(1, -1))
93
+
94
+ # Prepare the response
95
+ result = {
96
+ "most_likely_card": most_likely_card,
97
+ "top_k_predictions": top_k_predictions
98
+ }
99
+
100
+ return [result]
101
+
102
+ def decode_vector(self, vector):
103
+ original_label = self.encoder.inverse_transform(vector)
104
+ return original_label[0][0] # Returns the label as a string
105
+
106
+ def handle(self, data, context):
107
+ self.context = context
108
+ data = self.preprocess(data)
109
+ data = self.inference(data)
110
+ data = self.postprocess(data)
111
+ return data
inference.py CHANGED
@@ -87,7 +87,7 @@ def load_model_and_tokenizer(context):
87
 
88
  # Load config and model
89
  config_path = os.path.join(model_dir, 'config.json')
90
- model_path = os.path.join(model_dir, 'pytorch_model.bin')
91
 
92
  with open(config_path, 'r') as f:
93
  config = json.load(f)
 
87
 
88
  # Load config and model
89
  config_path = os.path.join(model_dir, 'config.json')
90
+ model_path = os.path.join(model_dir, 'model.pth')
91
 
92
  with open(config_path, 'r') as f:
93
  config = json.load(f)
pytorch_model.bin → model.pth RENAMED
File without changes
model.py CHANGED
@@ -40,4 +40,4 @@ class ImprovedBERTClass(nn.Module):
40
  pooled_output = self.dropout(pooled_output)
41
  pooled_output = self.norm(pooled_output)
42
  logits = self.classifier(pooled_output)
43
- return logits
 
40
  pooled_output = self.dropout(pooled_output)
41
  pooled_output = self.norm(pooled_output)
42
  logits = self.classifier(pooled_output)
43
+ return logits
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- numpy==1.21.0
2
- torch==1.9.0
3
- transformers==4.9.2
4
- scikit-learn==0.24.2
 
1
+ numpy
2
+ torch
3
+ transformers
4
+ scikit-learn