|
import os |
|
os.system('pip install --upgrade gdown') |
|
import gdown |
|
gdown.download(id='1mYM_26qHUom_5NU7iutHneB_KHlLjL5y', output='workdir.zip') |
|
os.system('unzip workdir.zip') |
|
|
|
import glob |
|
import gradio as gr |
|
from demo import get_model, preprocess, postprocess, load |
|
from utils import Config, Logger, CharsetMapper |
|
|
|
def process_image(image): |
|
config = Config('configs/train_abinet.yaml') |
|
config.model_vision_checkpoint = None |
|
model = get_model(config) |
|
model = load(model, 'workdir/train-abinet/best-train-abinet.pth') |
|
charset = CharsetMapper(filename=config.dataset_charset_path, max_length=config.dataset_max_length + 1) |
|
|
|
img = image.convert('RGB') |
|
img = preprocess(img, config.dataset_image_width, config.dataset_image_height) |
|
res = model(img) |
|
return postprocess(res, charset, 'alignment')[0][0] |
|
|
|
title = "Interactive demo: ABINet" |
|
description = "Demo for ABINet, ABINet uses a vision model and an explicit language model to recognize text in the wild, which are trained in end-to-end way. The language model (BCN) achieves bidirectional language representation in simulating cloze test, additionally utilizing iterative correction strategy. To use it, simply upload a (single-text line) image or use one of the example images below and click 'submit'. Results will show up in a few seconds." |
|
article = "<p style='text-align: center'><a href='https://arxiv.org/pdf/2103.06495.pdf'>Read Like Humans: Autonomous, Bidirectional and Iterative Language Modeling for Scene Text Recognition</a> | <a href='https://github.com/FangShancheng/ABINet'>Github Repo</a></p>" |
|
|
|
iface = gr.Interface(fn=process_image, |
|
inputs=gr.inputs.Image(type="pil"), |
|
outputs=gr.outputs.Textbox(), |
|
title=title, |
|
description=description, |
|
article=article, |
|
examples=glob.glob('figs/test/*.png')) |
|
iface.launch(debug=True) |