Spaces:
Sleeping
Sleeping
added new dataset
Browse files- .gitignore +2 -0
- app.py +25 -13
- helper/examples/create_examples.py +61 -0
- helper/examples/examples.py +38 -20
- helper/text/__init__.py +8 -0
- helper/text/text_about.py +4 -0
- helper/text/text_app.py +4 -0
- helper/text/text_howto.py +4 -0
- helper/text/text_riksarkivet.py +4 -0
- helper/text/text_roadmap.py +4 -0
- requirements.txt +1 -0
- src/htr_pipeline/models.py +4 -0
- src/htr_pipeline/utils/filter_segmask.py +4 -1
- src/htr_pipeline/utils/helper.py +0 -7
- src/htr_pipeline/utils/order_of_object.py +4 -0
- src/htr_pipeline/utils/parser_xml.py +4 -0
- src/htr_pipeline/utils/preprocess_img.py +4 -0
- src/htr_pipeline/utils/process_segmask.py +4 -0
- src/htr_pipeline/utils/process_xml.py +4 -0
.gitignore
CHANGED
@@ -21,3 +21,5 @@ src/htr_pipeline.egg-info/
|
|
21 |
page_xml.xml
|
22 |
page_txt.txt
|
23 |
transcribed_text.txt
|
|
|
|
|
|
21 |
page_xml.xml
|
22 |
page_txt.txt
|
23 |
transcribed_text.txt
|
24 |
+
helper/examples/.cache_images/
|
25 |
+
helper/examples/images/*.jpg
|
app.py
CHANGED
@@ -1,17 +1,17 @@
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
|
3 |
-
from helper.examples.examples import
|
4 |
from helper.gradio_config import css, js, theme
|
5 |
-
from helper.text
|
6 |
-
from helper.text.text_app import TextApp
|
7 |
-
from helper.text.text_howto import TextHowTo
|
8 |
-
from helper.text.text_riksarkivet import TextRiksarkivet
|
9 |
-
from helper.text.text_roadmap import TextRoadmap
|
10 |
from src.htr_pipeline.gradio_backend import CustomTrack, FastTrack, SingletonModelLoader
|
11 |
|
12 |
model_loader = SingletonModelLoader()
|
13 |
fast_track = FastTrack(model_loader)
|
14 |
custom_track = CustomTrack(model_loader)
|
|
|
15 |
|
16 |
with gr.Blocks(title="HTR Riksarkivet", theme=theme, css=css) as demo:
|
17 |
gr.Markdown(" ")
|
@@ -36,15 +36,17 @@ with gr.Blocks(title="HTR Riksarkivet", theme=theme, css=css) as demo:
|
|
36 |
# visible=True,
|
37 |
# ).style(full_width=True)
|
38 |
radio_file_input = gr.Radio(
|
39 |
-
value="Text file", choices=["Text file", "Page XML"], label="What kind file output?"
|
40 |
)
|
|
|
|
|
41 |
|
42 |
htr_pipeline_button = gr.Button(
|
43 |
"Run HTR",
|
44 |
variant="primary",
|
45 |
visible=True,
|
46 |
elem_id="run_pipeline_button",
|
47 |
-
).style(full_width=
|
48 |
|
49 |
with gr.Group():
|
50 |
with gr.Row():
|
@@ -54,8 +56,8 @@ with gr.Blocks(title="HTR Riksarkivet", theme=theme, css=css) as demo:
|
|
54 |
fast_name_files_placeholder = gr.Markdown(visible=False)
|
55 |
|
56 |
gr.Examples(
|
57 |
-
examples=
|
58 |
-
inputs=[
|
59 |
label="Example images",
|
60 |
examples_per_page=3,
|
61 |
)
|
@@ -82,6 +84,7 @@ with gr.Blocks(title="HTR Riksarkivet", theme=theme, css=css) as demo:
|
|
82 |
with gr.Tab("1. Region Segmentation"):
|
83 |
with gr.Row():
|
84 |
with gr.Column(scale=2):
|
|
|
85 |
name_files_placeholder = gr.Markdown(visible=False)
|
86 |
|
87 |
with gr.Row():
|
@@ -132,8 +135,8 @@ with gr.Blocks(title="HTR Riksarkivet", theme=theme, css=css) as demo:
|
|
132 |
with gr.Row():
|
133 |
with gr.Accordion("Example images to use:", open=False) as example_accord:
|
134 |
gr.Examples(
|
135 |
-
examples=
|
136 |
-
inputs=[
|
137 |
label="Example images",
|
138 |
examples_per_page=2,
|
139 |
)
|
@@ -161,7 +164,7 @@ with gr.Blocks(title="HTR Riksarkivet", theme=theme, css=css) as demo:
|
|
161 |
columns=[2],
|
162 |
rows=[2],
|
163 |
# object_fit="contain",
|
164 |
-
height=
|
165 |
preview=True,
|
166 |
container=False,
|
167 |
)
|
@@ -474,8 +477,14 @@ with gr.Blocks(title="HTR Riksarkivet", theme=theme, css=css) as demo:
|
|
474 |
outputs=[txt_file_downlod, txt_file_downlod],
|
475 |
)
|
476 |
|
|
|
|
|
|
|
|
|
|
|
477 |
clear_button.click(
|
478 |
lambda: (
|
|
|
479 |
None,
|
480 |
None,
|
481 |
None,
|
@@ -494,6 +503,7 @@ with gr.Blocks(title="HTR Riksarkivet", theme=theme, css=css) as demo:
|
|
494 |
),
|
495 |
inputs=[],
|
496 |
outputs=[
|
|
|
497 |
input_region_image,
|
498 |
regions_cropped_gallery,
|
499 |
input_region_from_gallery,
|
@@ -520,3 +530,5 @@ demo.queue(concurrency_count=5, max_size=20)
|
|
520 |
|
521 |
if __name__ == "__main__":
|
522 |
demo.launch(server_name="0.0.0.0", server_port=7860, show_api=False, show_error=True)
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import shutil
|
3 |
+
|
4 |
import gradio as gr
|
5 |
|
6 |
+
from helper.examples.examples import DemoImages
|
7 |
from helper.gradio_config import css, js, theme
|
8 |
+
from helper.text import TextAbout, TextApp, TextHowTo, TextRiksarkivet, TextRoadmap
|
|
|
|
|
|
|
|
|
9 |
from src.htr_pipeline.gradio_backend import CustomTrack, FastTrack, SingletonModelLoader
|
10 |
|
11 |
model_loader = SingletonModelLoader()
|
12 |
fast_track = FastTrack(model_loader)
|
13 |
custom_track = CustomTrack(model_loader)
|
14 |
+
images_for_demo = DemoImages()
|
15 |
|
16 |
with gr.Blocks(title="HTR Riksarkivet", theme=theme, css=css) as demo:
|
17 |
gr.Markdown(" ")
|
|
|
36 |
# visible=True,
|
37 |
# ).style(full_width=True)
|
38 |
radio_file_input = gr.Radio(
|
39 |
+
value="Text file", choices=["Text file ", "Page XML file "], label="What kind file output?"
|
40 |
)
|
41 |
+
with gr.Row():
|
42 |
+
htr_clear_button = gr.Button("", variant="Secondary")
|
43 |
|
44 |
htr_pipeline_button = gr.Button(
|
45 |
"Run HTR",
|
46 |
variant="primary",
|
47 |
visible=True,
|
48 |
elem_id="run_pipeline_button",
|
49 |
+
).style(full_width=True)
|
50 |
|
51 |
with gr.Group():
|
52 |
with gr.Row():
|
|
|
56 |
fast_name_files_placeholder = gr.Markdown(visible=False)
|
57 |
|
58 |
gr.Examples(
|
59 |
+
examples=images_for_demo.examples_list,
|
60 |
+
inputs=[fast_name_files_placeholder, fast_track_input_region_image],
|
61 |
label="Example images",
|
62 |
examples_per_page=3,
|
63 |
)
|
|
|
84 |
with gr.Tab("1. Region Segmentation"):
|
85 |
with gr.Row():
|
86 |
with gr.Column(scale=2):
|
87 |
+
vis_data_folder_placeholder = gr.Markdown(visible=False)
|
88 |
name_files_placeholder = gr.Markdown(visible=False)
|
89 |
|
90 |
with gr.Row():
|
|
|
135 |
with gr.Row():
|
136 |
with gr.Accordion("Example images to use:", open=False) as example_accord:
|
137 |
gr.Examples(
|
138 |
+
examples=images_for_demo.examples_list,
|
139 |
+
inputs=[name_files_placeholder, input_region_image],
|
140 |
label="Example images",
|
141 |
examples_per_page=2,
|
142 |
)
|
|
|
164 |
columns=[2],
|
165 |
rows=[2],
|
166 |
# object_fit="contain",
|
167 |
+
height=400,
|
168 |
preview=True,
|
169 |
container=False,
|
170 |
)
|
|
|
477 |
outputs=[txt_file_downlod, txt_file_downlod],
|
478 |
)
|
479 |
|
480 |
+
# def remove_temp_vis():
|
481 |
+
# if os.path.exists("./vis_data"):
|
482 |
+
# os.remove("././vis_data")
|
483 |
+
# return None
|
484 |
+
|
485 |
clear_button.click(
|
486 |
lambda: (
|
487 |
+
(shutil.rmtree("./vis_data") if os.path.exists("./vis_data") else None, None)[1],
|
488 |
None,
|
489 |
None,
|
490 |
None,
|
|
|
503 |
),
|
504 |
inputs=[],
|
505 |
outputs=[
|
506 |
+
vis_data_folder_placeholder,
|
507 |
input_region_image,
|
508 |
regions_cropped_gallery,
|
509 |
input_region_from_gallery,
|
|
|
530 |
|
531 |
if __name__ == "__main__":
|
532 |
demo.launch(server_name="0.0.0.0", server_port=7860, show_api=False, show_error=True)
|
533 |
+
if __name__ == "__main__":
|
534 |
+
demo.launch(server_name="0.0.0.0", server_port=7860, show_api=False, show_error=True)
|
helper/examples/create_examples.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import datasets
|
2 |
+
|
3 |
+
_CITATION = """\
|
4 |
+
@InProceedings{huggingface:dataset,
|
5 |
+
title = {Small htr examples images},
|
6 |
+
author={Gabriel Borg},
|
7 |
+
year={2023}
|
8 |
+
}
|
9 |
+
"""
|
10 |
+
|
11 |
+
_DESCRIPTION = """\
|
12 |
+
Demo dataset for the htr demo.
|
13 |
+
"""
|
14 |
+
_HOMEPAGE = "https://huggingface.co/datasets/Riksarkivet/test_images_demo"
|
15 |
+
|
16 |
+
_LICENSE = ""
|
17 |
+
|
18 |
+
_REPO = "https://huggingface.co/datasets/Riksarkivet/test_images_demo"
|
19 |
+
|
20 |
+
|
21 |
+
class ExampleImages(datasets.GeneratorBasedBuilder):
|
22 |
+
"""Small sample of image-text pairs"""
|
23 |
+
|
24 |
+
def _info(self):
|
25 |
+
return datasets.DatasetInfo(
|
26 |
+
description=_DESCRIPTION,
|
27 |
+
features=datasets.Features(
|
28 |
+
{
|
29 |
+
"text": datasets.Value("string"),
|
30 |
+
"image": datasets.Image(),
|
31 |
+
}
|
32 |
+
),
|
33 |
+
supervised_keys=None,
|
34 |
+
homepage=_HOMEPAGE,
|
35 |
+
citation=_CITATION,
|
36 |
+
)
|
37 |
+
|
38 |
+
def _split_generators(self, dl_manager):
|
39 |
+
images_archive = dl_manager.download(f"{_REPO}/resolve/main/images.tar.gz")
|
40 |
+
metadata_path = dl_manager.download(f"{_REPO}/resolve/main/images.txt")
|
41 |
+
image_iters = dl_manager.iter_archive(images_archive)
|
42 |
+
return [
|
43 |
+
datasets.SplitGenerator(
|
44 |
+
name=datasets.Split.TRAIN, gen_kwargs={"images": image_iters, "metadata_path": metadata_path}
|
45 |
+
),
|
46 |
+
]
|
47 |
+
|
48 |
+
def _generate_examples(self, images, metadata_path):
|
49 |
+
"""Generate images and text."""
|
50 |
+
with open(metadata_path, encoding="utf-8") as f:
|
51 |
+
metadata_list = f.read().split("\n")
|
52 |
+
for idx, (img_obj, meta_txt) in enumerate(zip(images, metadata_list)):
|
53 |
+
filepath, image = img_obj
|
54 |
+
yield idx, {
|
55 |
+
"image": {"path": filepath, "bytes": image.read()},
|
56 |
+
"text": meta_txt,
|
57 |
+
}
|
58 |
+
|
59 |
+
|
60 |
+
if __name__ == "__main__":
|
61 |
+
pass
|
helper/examples/examples.py
CHANGED
@@ -1,20 +1,38 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import io
|
2 |
+
|
3 |
+
import datasets
|
4 |
+
from PIL import Image
|
5 |
+
|
6 |
+
|
7 |
+
class DemoImages:
|
8 |
+
def __init__(self, url="Riksarkivet/test_images_demo", cache_dir="./helper/examples/.cache_images") -> None:
|
9 |
+
self.images_datasets = datasets.load_dataset(url, cache_dir=cache_dir)
|
10 |
+
self.example_df = self.images_datasets["train"].to_pandas()
|
11 |
+
self.examples_list = self.convert_bytes_to_images()
|
12 |
+
|
13 |
+
def convert_bytes_to_images(self):
|
14 |
+
examples_list = []
|
15 |
+
# For each row in the dataframe
|
16 |
+
for index, row in self.example_df.iterrows():
|
17 |
+
image_bytes = row["image"]["bytes"]
|
18 |
+
image = Image.open(io.BytesIO(image_bytes))
|
19 |
+
|
20 |
+
# Set the path to save the image
|
21 |
+
path_to_image = f"./helper/examples/images/image_{index}.jpg"
|
22 |
+
|
23 |
+
# Save the image
|
24 |
+
image.save(path_to_image)
|
25 |
+
|
26 |
+
# Get the description
|
27 |
+
description = row["text"]
|
28 |
+
|
29 |
+
# Append to the examples list
|
30 |
+
examples_list.append([description, path_to_image])
|
31 |
+
|
32 |
+
return examples_list
|
33 |
+
|
34 |
+
|
35 |
+
if __name__ == "__main__":
|
36 |
+
test = DemoImages(cache_dir=".cache_images")
|
37 |
+
|
38 |
+
print(test.examples_list)
|
helper/text/__init__.py
CHANGED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from helper.text.text_about import TextAbout
|
2 |
+
from helper.text.text_app import TextApp
|
3 |
+
from helper.text.text_howto import TextHowTo
|
4 |
+
from helper.text.text_riksarkivet import TextRiksarkivet
|
5 |
+
from helper.text.text_roadmap import TextRoadmap
|
6 |
+
|
7 |
+
if __name__ == "__main__":
|
8 |
+
pass
|
helper/text/text_about.py
CHANGED
@@ -70,3 +70,7 @@ class TextAbout:
|
|
70 |
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
71 |
|
72 |
"""
|
|
|
|
|
|
|
|
|
|
70 |
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
71 |
|
72 |
"""
|
73 |
+
|
74 |
+
|
75 |
+
if __name__ == "__main__":
|
76 |
+
pass
|
helper/text/text_app.py
CHANGED
@@ -6,3 +6,7 @@ class TextApp:
|
|
6 |
<h1><center> Handwritten Text Recognition Tool </center></h1>
|
7 |
|
8 |
<h3><center> Swedish National Archives - Riksarkivet </center></h3>"""
|
|
|
|
|
|
|
|
|
|
6 |
<h1><center> Handwritten Text Recognition Tool </center></h1>
|
7 |
|
8 |
<h3><center> Swedish National Archives - Riksarkivet </center></h3>"""
|
9 |
+
|
10 |
+
|
11 |
+
if __name__ == "__main__":
|
12 |
+
pass
|
helper/text/text_howto.py
CHANGED
@@ -92,3 +92,7 @@ To explore the HTR results, follow these steps:
|
|
92 |
##
|
93 |
Alternatively, you can watch the instructional video below, which provides a step-by-step walkthrough of the HTR Tool and some additional features.
|
94 |
"""
|
|
|
|
|
|
|
|
|
|
92 |
##
|
93 |
Alternatively, you can watch the instructional video below, which provides a step-by-step walkthrough of the HTR Tool and some additional features.
|
94 |
"""
|
95 |
+
|
96 |
+
|
97 |
+
if __name__ == "__main__":
|
98 |
+
pass
|
helper/text/text_riksarkivet.py
CHANGED
@@ -8,3 +8,7 @@ class TextRiksarkivet:
|
|
8 |
## Contact us
|
9 |
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
10 |
"""
|
|
|
|
|
|
|
|
|
|
8 |
## Contact us
|
9 |
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
10 |
"""
|
11 |
+
|
12 |
+
|
13 |
+
if __name__ == "__main__":
|
14 |
+
pass
|
helper/text/text_roadmap.py
CHANGED
@@ -15,3 +15,7 @@ class TextRoadmap:
|
|
15 |
|
16 |
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
17 |
"""
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
17 |
"""
|
18 |
+
|
19 |
+
|
20 |
+
if __name__ == "__main__":
|
21 |
+
pass
|
requirements.txt
CHANGED
@@ -8,6 +8,7 @@ opencv-python-headless
|
|
8 |
jinja2
|
9 |
transformers
|
10 |
huggingface_hub
|
|
|
11 |
requests
|
12 |
# scipy
|
13 |
# sklearn
|
|
|
8 |
jinja2
|
9 |
transformers
|
10 |
huggingface_hub
|
11 |
+
datasets
|
12 |
requests
|
13 |
# scipy
|
14 |
# sklearn
|
src/htr_pipeline/models.py
CHANGED
@@ -57,3 +57,7 @@ class HtrModels:
|
|
57 |
}
|
58 |
|
59 |
return config_path
|
|
|
|
|
|
|
|
|
|
57 |
}
|
58 |
|
59 |
return config_path
|
60 |
+
|
61 |
+
|
62 |
+
if __name__ == "__main__":
|
63 |
+
pass
|
src/htr_pipeline/utils/filter_segmask.py
CHANGED
@@ -124,4 +124,7 @@ class FilterSegMask:
|
|
124 |
|
125 |
new_filtered_result.pred_instances = new_pred_instances
|
126 |
return new_filtered_result
|
127 |
-
|
|
|
|
|
|
|
|
124 |
|
125 |
new_filtered_result.pred_instances = new_pred_instances
|
126 |
return new_filtered_result
|
127 |
+
|
128 |
+
|
129 |
+
if __name__ == "__main__":
|
130 |
+
pass
|
src/htr_pipeline/utils/helper.py
CHANGED
@@ -90,10 +90,3 @@ if __name__ == "__main__":
|
|
90 |
kwargs={"spam": "eggs"},
|
91 |
)
|
92 |
print(retval)
|
93 |
-
|
94 |
-
# Example of using the decorator
|
95 |
-
retval = another_long_running_function()
|
96 |
-
print(retval)
|
97 |
-
retval = another_long_running_function()
|
98 |
-
print(retval)
|
99 |
-
print(retval)
|
|
|
90 |
kwargs={"spam": "eggs"},
|
91 |
)
|
92 |
print(retval)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/htr_pipeline/utils/order_of_object.py
CHANGED
@@ -86,3 +86,7 @@ class OrderObject:
|
|
86 |
|
87 |
# Return the ordered regions
|
88 |
return df["region_id"].tolist()
|
|
|
|
|
|
|
|
|
|
86 |
|
87 |
# Return the ordered regions
|
88 |
return df["region_id"].tolist()
|
89 |
+
|
90 |
+
|
91 |
+
if __name__ == "__main__":
|
92 |
+
pass
|
src/htr_pipeline/utils/parser_xml.py
CHANGED
@@ -74,3 +74,7 @@ class XmlParser:
|
|
74 |
text = textline.find(f"{self.namespace}TextEquiv").find(f"{self.namespace}Unicode").text
|
75 |
f.write(text + "\n")
|
76 |
f.write("\n")
|
|
|
|
|
|
|
|
|
|
74 |
text = textline.find(f"{self.namespace}TextEquiv").find(f"{self.namespace}Unicode").text
|
75 |
f.write(text + "\n")
|
76 |
f.write("\n")
|
77 |
+
|
78 |
+
|
79 |
+
if __name__ == "__main__":
|
80 |
+
pass
|
src/htr_pipeline/utils/preprocess_img.py
CHANGED
@@ -17,3 +17,7 @@ class Preprocess:
|
|
17 |
img_gradio = cv2.cvtColor(threshed, cv2.COLOR_BGR2RGB)
|
18 |
|
19 |
return img_gradio
|
|
|
|
|
|
|
|
|
|
17 |
img_gradio = cv2.cvtColor(threshed, cv2.COLOR_BGR2RGB)
|
18 |
|
19 |
return img_gradio
|
20 |
+
|
21 |
+
|
22 |
+
if __name__ == "__main__":
|
23 |
+
pass
|
src/htr_pipeline/utils/process_segmask.py
CHANGED
@@ -85,3 +85,7 @@ class SegMaskHelper:
|
|
85 |
translated_line_polygons = [[[a + box[0], b + box[1]] for [a, b] in poly] for poly in line_polygons]
|
86 |
|
87 |
return translated_line_polygons
|
|
|
|
|
|
|
|
|
|
85 |
translated_line_polygons = [[[a + box[0], b + box[1]] for [a, b] in poly] for poly in line_polygons]
|
86 |
|
87 |
return translated_line_polygons
|
88 |
+
|
89 |
+
|
90 |
+
if __name__ == "__main__":
|
91 |
+
pass
|
src/htr_pipeline/utils/process_xml.py
CHANGED
@@ -148,3 +148,7 @@ class XMLHelper:
|
|
148 |
text_lines.append(line_data)
|
149 |
|
150 |
return text_lines, htr_scores
|
|
|
|
|
|
|
|
|
|
148 |
text_lines.append(line_data)
|
149 |
|
150 |
return text_lines, htr_scores
|
151 |
+
|
152 |
+
|
153 |
+
if __name__ == "__main__":
|
154 |
+
pass
|