Spaces:
Running
Running
from PIL import Image | |
import torch | |
from torch.utils.data import DataLoader | |
from torch.nn.functional import softmax | |
from utils import * | |
class Recognizer: | |
def __init__( | |
self, | |
model_file, | |
device = "cpu", | |
): | |
weights = torch.load(model_file) | |
self.model = weights["model"] | |
self.converter = weights["converter"] | |
self.opt = self.model.opt | |
self.imgH = self.opt.imgH | |
self.imgW = self.opt.imgW | |
self.input_channel = self.opt.input_channel | |
self.device = device | |
_ = self.model.to(device) | |
self.model.eval() | |
def __call__( | |
self, | |
imgs | |
): | |
results = [] | |
transform = NormalizePAD((self.input_channel, self.imgH, self.imgW)) | |
with torch.no_grad(): | |
for img in imgs: | |
img = Image.fromarray(img).convert("L") | |
w, h = img.size | |
ratio = w / float(h) | |
if math.ceil(self.imgH * ratio) > self.imgW: | |
resized_w = self.imgW | |
else: | |
resized_w = math.ceil(self.imgH * ratio) | |
img = img.resize((resized_w, self.imgH), Image.BICUBIC) | |
img = transform(img) | |
img = img.unsqueeze(0) | |
img = img.to(self.device) | |
text_for_pred = torch.LongTensor(1, w // 10 + 1).fill_(0).to(self.device) | |
preds = self.model(img, text_for_pred) | |
preds_size = [preds.size(1)] | |
preds_prob = softmax(preds, dim=-1).squeeze().cpu().detach().numpy() | |
values = preds_prob.max(axis=-1) | |
indices = preds_prob.argmax(axis=-1) | |
preds_str = self.converter.decode_greedy(indices.ravel(), preds_size)[0] | |
confidence_score = custom_mean(values) | |
results.append([preds_str, confidence_score]) | |
return results | |