Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,6 @@ import time
|
|
3 |
import cv2
|
4 |
import gradio as gr
|
5 |
from lineless_table_rec import LinelessTableRecognition
|
6 |
-
from paddleocr import PPStructure
|
7 |
from rapid_table import RapidTable
|
8 |
from rapidocr_onnxruntime import RapidOCR
|
9 |
from table_cls import TableCls
|
@@ -25,17 +24,16 @@ table_engine_list = [
|
|
25 |
"RapidTable(SLANet)",
|
26 |
"RapidTable(SLANet-plus)",
|
27 |
"wired_table_v2",
|
28 |
-
"pp_table",
|
29 |
"wired_table_v1",
|
30 |
"lineless_table"
|
31 |
]
|
32 |
|
33 |
# 示例图片路径
|
34 |
example_images = [
|
35 |
-
"images/wired1.
|
36 |
-
"images/wired2.
|
37 |
"images/wired3.png",
|
38 |
-
"images/lineless1.
|
39 |
"images/wired4.jpg",
|
40 |
"images/lineless2.png",
|
41 |
"images/wired5.jpg",
|
@@ -57,15 +55,6 @@ for det_model in det_model_dir.keys():
|
|
57 |
rec_model_path = rec_model_dir[rec_model]
|
58 |
key = f"{det_model}_{rec_model}"
|
59 |
ocr_engine_dict[key] = RapidOCR(det_model_path=det_model_path, rec_model_path=rec_model_path)
|
60 |
-
pp_engine_dict[key] = PPStructure(
|
61 |
-
layout=False,
|
62 |
-
show_log=False,
|
63 |
-
table=True,
|
64 |
-
use_onnx=True,
|
65 |
-
table_model_dir=table_rec_path,
|
66 |
-
det_model_dir=det_model_path,
|
67 |
-
rec_model_dir=rec_model_path
|
68 |
-
)
|
69 |
|
70 |
def trans_char_ocr_res(ocr_res):
|
71 |
word_result = []
|
@@ -95,8 +84,6 @@ def select_table_model(img, table_engine_type, det_model, rec_model):
|
|
95 |
return wired_table_engine_v2, table_engine_type
|
96 |
elif table_engine_type == "lineless_table":
|
97 |
return lineless_table_engine, table_engine_type
|
98 |
-
elif table_engine_type == "pp_table":
|
99 |
-
return pp_engine_dict[f"{det_model}_{rec_model}"], 0
|
100 |
elif table_engine_type == "auto":
|
101 |
cls, elasp = table_cls(img)
|
102 |
if cls == 'wired':
|
@@ -113,30 +100,22 @@ def process_image(img_input, small_box_cut_enhance, table_engine_type, char_ocr,
|
|
113 |
table_engine, talbe_type = select_table_model(img, table_engine_type, det_model, rec_model)
|
114 |
ocr_engine = select_ocr_model(det_model, rec_model)
|
115 |
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
|
|
|
|
|
|
120 |
polygons = [[polygon[0], polygon[1], polygon[4], polygon[5]] for polygon in polygons]
|
121 |
-
|
122 |
-
|
123 |
-
else:
|
124 |
-
ocr_res, ocr_infer_elapse = ocr_engine(img, return_word_box=char_ocr)
|
125 |
-
det_cost, cls_cost, rec_cost = ocr_infer_elapse
|
126 |
-
if char_ocr:
|
127 |
-
ocr_res = trans_char_ocr_res(ocr_res)
|
128 |
-
ocr_boxes = [box_4_2_poly_to_box_4_1(ori_ocr[0]) for ori_ocr in ocr_res]
|
129 |
-
if isinstance(table_engine, RapidTable):
|
130 |
-
html, polygons, table_rec_elapse = table_engine(img, ocr_result=ocr_res)
|
131 |
-
polygons = [[polygon[0], polygon[1], polygon[4], polygon[5]] for polygon in polygons]
|
132 |
-
elif isinstance(table_engine, (WiredTableRecognition, LinelessTableRecognition)):
|
133 |
-
html, table_rec_elapse, polygons, logic_points, ocr_res = table_engine(img, ocr_result=ocr_res,
|
134 |
enhance_box_line=small_box_cut_enhance,
|
135 |
rotated_fix=rotated_fix,
|
136 |
col_threshold=col_threshold,
|
137 |
row_threshold=row_threshold)
|
138 |
-
|
139 |
-
|
140 |
|
141 |
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
142 |
table_boxes_img = plot_rec_box(img.copy(), polygons)
|
@@ -165,7 +144,7 @@ def main():
|
|
165 |
}
|
166 |
""") as demo:
|
167 |
gr.HTML(
|
168 |
-
"<h1 style='text-align: center;'><a href='https://github.com/RapidAI/TableStructureRec?tab=readme-ov-file'>TableStructureRec</a></h1>"
|
169 |
)
|
170 |
gr.HTML('''
|
171 |
<div class="header-links">
|
@@ -174,6 +153,7 @@ def main():
|
|
174 |
<a href="https://pypi.org/project/lineless-table-rec/"><img alt="PyPI" src="https://img.shields.io/pypi/v/lineless-table-rec"></a>
|
175 |
<a href="https://pepy.tech/project/lineless-table-rec"><img src="https://static.pepy.tech/personalized-badge/lineless-table-rec?period=total&units=abbreviation&left_color=grey&right_color=blue&left_text=Downloads%20Lineless"></a>
|
176 |
<a href="https://pepy.tech/project/wired-table-rec"><img src="https://static.pepy.tech/personalized-badge/wired-table-rec?period=total&units=abbreviation&left_color=grey&right_color=blue&left_text=Downloads%20Wired"></a>
|
|
|
177 |
<a href="https://semver.org/"><img alt="SemVer2.0" src="https://img.shields.io/badge/SemVer-2.0-brightgreen"></a>
|
178 |
<a href="https://github.com/psf/black"><img src="https://img.shields.io/badge/code%20style-black-000000.svg"></a>
|
179 |
<a href="https://github.com/RapidAI/TableStructureRec/blob/c41bbd23898cb27a957ed962b0ffee3c74dfeff1/LICENSE"><img alt="GitHub" src="https://img.shields.io/badge/license-Apache 2.0-blue"></a>
|
|
|
3 |
import cv2
|
4 |
import gradio as gr
|
5 |
from lineless_table_rec import LinelessTableRecognition
|
|
|
6 |
from rapid_table import RapidTable
|
7 |
from rapidocr_onnxruntime import RapidOCR
|
8 |
from table_cls import TableCls
|
|
|
24 |
"RapidTable(SLANet)",
|
25 |
"RapidTable(SLANet-plus)",
|
26 |
"wired_table_v2",
|
|
|
27 |
"wired_table_v1",
|
28 |
"lineless_table"
|
29 |
]
|
30 |
|
31 |
# 示例图片路径
|
32 |
example_images = [
|
33 |
+
"images/wired1.jpg",
|
34 |
+
"images/wired2.png",
|
35 |
"images/wired3.png",
|
36 |
+
"images/lineless1.jpg",
|
37 |
"images/wired4.jpg",
|
38 |
"images/lineless2.png",
|
39 |
"images/wired5.jpg",
|
|
|
55 |
rec_model_path = rec_model_dir[rec_model]
|
56 |
key = f"{det_model}_{rec_model}"
|
57 |
ocr_engine_dict[key] = RapidOCR(det_model_path=det_model_path, rec_model_path=rec_model_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
def trans_char_ocr_res(ocr_res):
|
60 |
word_result = []
|
|
|
84 |
return wired_table_engine_v2, table_engine_type
|
85 |
elif table_engine_type == "lineless_table":
|
86 |
return lineless_table_engine, table_engine_type
|
|
|
|
|
87 |
elif table_engine_type == "auto":
|
88 |
cls, elasp = table_cls(img)
|
89 |
if cls == 'wired':
|
|
|
100 |
table_engine, talbe_type = select_table_model(img, table_engine_type, det_model, rec_model)
|
101 |
ocr_engine = select_ocr_model(det_model, rec_model)
|
102 |
|
103 |
+
ocr_res, ocr_infer_elapse = ocr_engine(img, return_word_box=char_ocr)
|
104 |
+
det_cost, cls_cost, rec_cost = ocr_infer_elapse
|
105 |
+
if char_ocr:
|
106 |
+
ocr_res = trans_char_ocr_res(ocr_res)
|
107 |
+
ocr_boxes = [box_4_2_poly_to_box_4_1(ori_ocr[0]) for ori_ocr in ocr_res]
|
108 |
+
if isinstance(table_engine, RapidTable):
|
109 |
+
html, polygons, table_rec_elapse = table_engine(img, ocr_result=ocr_res)
|
110 |
polygons = [[polygon[0], polygon[1], polygon[4], polygon[5]] for polygon in polygons]
|
111 |
+
elif isinstance(table_engine, (WiredTableRecognition, LinelessTableRecognition)):
|
112 |
+
html, table_rec_elapse, polygons, logic_points, ocr_res = table_engine(img, ocr_result=ocr_res,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
enhance_box_line=small_box_cut_enhance,
|
114 |
rotated_fix=rotated_fix,
|
115 |
col_threshold=col_threshold,
|
116 |
row_threshold=row_threshold)
|
117 |
+
sum_elapse = time.time() - start
|
118 |
+
all_elapse = f"- table_type: {talbe_type}\n table all cost: {sum_elapse:.5f}\n - table rec cost: {table_rec_elapse:.5f}\n - ocr cost: {det_cost + cls_cost + rec_cost:.5f}"
|
119 |
|
120 |
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
121 |
table_boxes_img = plot_rec_box(img.copy(), polygons)
|
|
|
144 |
}
|
145 |
""") as demo:
|
146 |
gr.HTML(
|
147 |
+
"<h1 style='text-align: center;'><a href='https://github.com/RapidAI/TableStructureRec?tab=readme-ov-file'>TableStructureRec</a> & <a href='https://github.com/RapidAI/RapidTable'>RapidTable</a></h1>"
|
148 |
)
|
149 |
gr.HTML('''
|
150 |
<div class="header-links">
|
|
|
153 |
<a href="https://pypi.org/project/lineless-table-rec/"><img alt="PyPI" src="https://img.shields.io/pypi/v/lineless-table-rec"></a>
|
154 |
<a href="https://pepy.tech/project/lineless-table-rec"><img src="https://static.pepy.tech/personalized-badge/lineless-table-rec?period=total&units=abbreviation&left_color=grey&right_color=blue&left_text=Downloads%20Lineless"></a>
|
155 |
<a href="https://pepy.tech/project/wired-table-rec"><img src="https://static.pepy.tech/personalized-badge/wired-table-rec?period=total&units=abbreviation&left_color=grey&right_color=blue&left_text=Downloads%20Wired"></a>
|
156 |
+
<a href="https://pepy.tech/project/rapid-table"><img src="https://static.pepy.tech/personalized-badge/rapid-table?period=total&units=abbreviation&left_color=grey&right_color=blue&left_text=Downloads%20RapidTable"></a>
|
157 |
<a href="https://semver.org/"><img alt="SemVer2.0" src="https://img.shields.io/badge/SemVer-2.0-brightgreen"></a>
|
158 |
<a href="https://github.com/psf/black"><img src="https://img.shields.io/badge/code%20style-black-000000.svg"></a>
|
159 |
<a href="https://github.com/RapidAI/TableStructureRec/blob/c41bbd23898cb27a957ed962b0ffee3c74dfeff1/LICENSE"><img alt="GitHub" src="https://img.shields.io/badge/license-Apache 2.0-blue"></a>
|