Spaces:
Running
Running
Use processor instead
Browse files
app.py
CHANGED
@@ -14,12 +14,11 @@ os.system('pip install -q pytesseract')
|
|
14 |
|
15 |
import gradio as gr
|
16 |
import numpy as np
|
17 |
-
from transformers import
|
18 |
from datasets import load_dataset
|
19 |
from PIL import Image, ImageDraw, ImageFont
|
20 |
|
21 |
-
|
22 |
-
tokenizer = LayoutLMv2TokenizerFast.from_pretrained("microsoft/layoutlmv2-base-uncased")
|
23 |
model = LayoutLMv2ForTokenClassification.from_pretrained("nielsr/layoutlmv2-finetuned-funsd")
|
24 |
|
25 |
# load image example
|
@@ -48,15 +47,10 @@ def iob_to_label(label):
|
|
48 |
|
49 |
def process_image(image):
|
50 |
width, height = image.size
|
51 |
-
|
52 |
-
# get words, boxes
|
53 |
-
encoding_feature_extractor = feature_extractor(image, return_tensors="pt")
|
54 |
-
words, boxes = encoding_feature_extractor.words, encoding_feature_extractor.boxes
|
55 |
|
56 |
# encode
|
57 |
encoding = tokenizer(words, boxes=boxes, truncation=True, return_offsets_mapping=True, return_tensors="pt")
|
58 |
offset_mapping = encoding.pop('offset_mapping')
|
59 |
-
encoding["image"] = encoding_feature_extractor.pixel_values
|
60 |
|
61 |
# forward pass
|
62 |
outputs = model(**encoding)
|
|
|
14 |
|
15 |
import gradio as gr
|
16 |
import numpy as np
|
17 |
+
from transformers import LayoutLMv2Processor, LayoutLMv2ForTokenClassification
|
18 |
from datasets import load_dataset
|
19 |
from PIL import Image, ImageDraw, ImageFont
|
20 |
|
21 |
+
processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")
|
|
|
22 |
model = LayoutLMv2ForTokenClassification.from_pretrained("nielsr/layoutlmv2-finetuned-funsd")
|
23 |
|
24 |
# load image example
|
|
|
47 |
|
48 |
def process_image(image):
|
49 |
width, height = image.size
|
|
|
|
|
|
|
|
|
50 |
|
51 |
# encode
|
52 |
encoding = tokenizer(words, boxes=boxes, truncation=True, return_offsets_mapping=True, return_tensors="pt")
|
53 |
offset_mapping = encoding.pop('offset_mapping')
|
|
|
54 |
|
55 |
# forward pass
|
56 |
outputs = model(**encoding)
|