Spaces:
Sleeping
Sleeping
import re | |
from datetime import datetime | |
import jinja2 | |
class XMLHelper: | |
def __init__(self, xml_file_name="page_xml.xml"): | |
self.xml_file_name = xml_file_name | |
self.searchpath = "./src/htr_pipeline/utils/templates" | |
self.template = "page_xml_2013.xml" | |
def render(self, template_data): | |
rendered_xml = self._render_xml(template_data) | |
return rendered_xml | |
def _transform_coords(self, input_string): | |
pattern = r"\[\s*([^\s,]+)\s*,\s*([^\s\]]+)\s*\]" | |
replacement = r"\1,\2" | |
return re.sub(pattern, replacement, input_string) | |
def _render_xml(self, template_data): | |
template_loader = jinja2.FileSystemLoader(searchpath=self.searchpath) | |
template_env = jinja2.Environment(loader=template_loader, trim_blocks=True) | |
template = template_env.get_template(self.template) | |
rendered_xml = template.render(template_data) | |
rendered_xml = self._transform_coords(rendered_xml) | |
return rendered_xml | |
def prepare_template_data(self, img_file_name, image): | |
img_height = image.shape[0] | |
img_width = image.shape[1] | |
now = datetime.now() | |
date_time = now.strftime("%Y-%m-%d, %H:%M:%S") | |
return { | |
"created": date_time, | |
"imageFilename": img_file_name, | |
"imageWidth": img_width, | |
"imageHeight": img_height, | |
"textRegions": list(), | |
} | |
def escape_xml_chars(self, textline): | |
return ( | |
textline.replace("&", "&") | |
.replace("<", "<") | |
.replace(">", ">") | |
.replace("'", "'") | |
.replace('"', """) | |
) | |
if __name__ == "__main__": | |
pass | |