Gabriel commited on
Commit
21c87da
Β·
1 Parent(s): 2d4e52a

htrflow app 1.0.0

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. .dockerignore β†’ .docker/.dockerignore +5 -5
  2. .docker/dockerfile +10 -0
  3. .gitattributes +0 -35
  4. .github/README.md +27 -0
  5. .gitignore +3 -0
  6. Dockerfile +0 -54
  7. LICENSE +0 -0
  8. app.py +0 -65
  9. {helper β†’ app}/__init__.py +0 -0
  10. {helper/examples β†’ app/assets}/__init__.py +0 -0
  11. {helper/examples β†’ app/assets}/examples.py +3 -25
  12. {helper/examples β†’ app/assets}/images/.gitkeep +0 -0
  13. helper/text/__init__.py β†’ app/backend.py +0 -0
  14. app/gradio_config.py +38 -0
  15. app/main.py +54 -0
  16. {src/htr_pipeline β†’ app/tabs}/__init__.py +0 -0
  17. app/tabs/adv_htrflow_tab.py +53 -0
  18. app/tabs/htrflow_tab.py +290 -0
  19. app/tabs/overview_tab.py +134 -0
  20. {src/htr_pipeline/utils β†’ app/texts_langs}/__init__.py +0 -0
  21. {helper/text β†’ app/texts_langs}/overview/changelog_roadmap/changelog.md +0 -0
  22. {helper/text β†’ app/texts_langs}/overview/changelog_roadmap/old_changelog.md +0 -0
  23. {helper/text β†’ app/texts_langs}/overview/changelog_roadmap/roadmap.md +0 -0
  24. {helper/text β†’ app/texts_langs}/overview/contributions/contributions.md +0 -0
  25. {helper/text β†’ app/texts_langs}/overview/contributions/huminfra_image.md +0 -0
  26. {helper/text β†’ app/texts_langs}/overview/contributions/riksarkivet_image.md +0 -0
  27. {helper/text β†’ app/texts_langs}/overview/duplicate_api/api1.md +0 -0
  28. {helper/text β†’ app/texts_langs}/overview/duplicate_api/api2.md +0 -0
  29. {helper/text β†’ app/texts_langs}/overview/duplicate_api/api_code1.md +0 -0
  30. {helper/text β†’ app/texts_langs}/overview/duplicate_api/api_code2.md +0 -0
  31. {helper/text β†’ app/texts_langs}/overview/duplicate_api/duplicate.md +0 -0
  32. {helper/text β†’ app/texts_langs}/overview/faq_discussion/discussion.md +0 -0
  33. {helper/text β†’ app/texts_langs}/overview/faq_discussion/faq.md +0 -0
  34. {helper/text β†’ app/texts_langs}/overview/htrflow/htrflow_col1.md +0 -0
  35. {helper/text β†’ app/texts_langs}/overview/htrflow/htrflow_col2.md +0 -0
  36. {helper/text β†’ app/texts_langs}/overview/htrflow/htrflow_row1.md +0 -0
  37. {helper/text β†’ app/texts_langs}/overview/htrflow/htrflow_tab1.md +0 -0
  38. {helper/text β†’ app/texts_langs}/overview/htrflow/htrflow_tab2.md +0 -0
  39. {helper/text β†’ app/texts_langs}/overview/htrflow/htrflow_tab3.md +0 -0
  40. {helper/text β†’ app/texts_langs}/overview/htrflow/htrflow_tab4.md +0 -0
  41. {helper/text β†’ app/texts_langs}/text_app.py +1 -11
  42. app/texts_langs/text_overview.py +37 -0
  43. helper/text/help/fasttrack/fast_track.md β†’ app/utils/yaml_helper.py +0 -0
  44. helper/examples/create_examples.py +0 -87
  45. helper/gradio_config.py +0 -139
  46. helper/text/docs_strucutre.md +0 -20
  47. helper/text/help/stepwise/stepwise.md +0 -0
  48. helper/text/markdown_reader.py +0 -5
  49. helper/text/text_overview.py +0 -37
  50. helper/utils.py +0 -97
.dockerignore β†’ .docker/.dockerignore RENAMED
@@ -1,11 +1,11 @@
1
  # .github/
2
- __pycache__
3
- *.pyc
4
- *.pyo
5
- *.pyd
 
6
  .Python
7
  env
8
  .env
9
  page_txt.txt
10
  page_xml.xml
11
- src/tests/
 
1
  # .github/
2
+
3
+ **pycache**
4
+ _.pyc
5
+ _.pyo
6
+ \*.pyd
7
  .Python
8
  env
9
  .env
10
  page_txt.txt
11
  page_xml.xml
 
.docker/dockerfile ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim-bullseye
2
+
3
+ COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv
4
+
5
+ COPY . /app
6
+
7
+ WORKDIR /app
8
+ RUN uv sync --frozen --no-cache
9
+
10
+ CMD ["/app/.venv/bin/fastapi", "run", "app/main.py", "--port", "80"]
.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tflite filter=lfs diff=lfs merge=lfs -text
29
- *.tgz filter=lfs diff=lfs merge=lfs -text
30
- *.wasm filter=lfs diff=lfs merge=lfs -text
31
- *.xz filter=lfs diff=lfs merge=lfs -text
32
- *.zip filter=lfs diff=lfs merge=lfs -text
33
- *.zst filter=lfs diff=lfs merge=lfs -text
34
- *tfevents* filter=lfs diff=lfs merge=lfs -text
35
- helper/text/videos/eating_spaghetti.mp4 filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.github/README.md CHANGED
@@ -1,3 +1,7 @@
 
 
 
 
1
  # htrflow_app: A demo app for htrflow
2
 
3
  We're thrilled to introduce [htrflow](https://huggingface.co/spaces/Riksarkivet/htr_demo), our demonstration platform that brings to life the process of transcribing Swedish handwritten documents from the 17th to the 19th century.
@@ -66,3 +70,26 @@ You can also just run it from Hugging Face:
66
  docker run -it -p 7860:7860 --platform=linux/amd64 --gpus all \
67
  -e registry.hf.space/riksarkivet-htr-demo:latest
68
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # WORK IN PROGRESS
2
+
3
+ > :warning: **Dont use yet !**
4
+
5
  # htrflow_app: A demo app for htrflow
6
 
7
  We're thrilled to introduce [htrflow](https://huggingface.co/spaces/Riksarkivet/htr_demo), our demonstration platform that brings to life the process of transcribing Swedish handwritten documents from the 17th to the 19th century.
 
70
  docker run -it -p 7860:7860 --platform=linux/amd64 --gpus all \
71
  -e registry.hf.space/riksarkivet-htr-demo:latest
72
  ```
73
+
74
+ ---
75
+
76
+ ## Instructions for documentation
77
+
78
+ - Naming convention of folder is based on tab
79
+ - Naming convention of file is based on subtabs
80
+ - If subtab uses columns and rows
81
+ - Use suffix such as col1, row1 or tab1, to indicate differences in postion of text.
82
+
83
+ see image below:
84
+
85
+ <p align="center">
86
+ <img src="https://github.com/Borg93/htr_gradio_file_placeholder/blob/main/layout_structure.png?raw=true" alt="Badge 1">
87
+ </p>
88
+
89
+ ## Assets and file sharing with app
90
+
91
+ This repo acts as asset manager for the app:
92
+
93
+ - [Github Repo](https://github.com/Borg93/htr_gradio_file_placeholder)
94
+
95
+ **Note**: this repo is an work in progress
.gitignore CHANGED
@@ -16,6 +16,8 @@ helper/examples/images/*.jpg
16
  flagged_data_points/
17
  src/htr_pipeline.egg-info/
18
 
 
 
19
  #
20
  page_xml.xml
21
  page_txt.txt
@@ -34,3 +36,4 @@ mlruns/
34
 
35
  #models
36
  models--Riksarkivet--HTR_pipeline_models/
 
 
16
  flagged_data_points/
17
  src/htr_pipeline.egg-info/
18
 
19
+ .gradio/
20
+
21
  #
22
  page_xml.xml
23
  page_txt.txt
 
36
 
37
  #models
38
  models--Riksarkivet--HTR_pipeline_models/
39
+ app/assets/images/*.jpg
Dockerfile DELETED
@@ -1,54 +0,0 @@
1
- FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04
2
-
3
- ARG DEBIAN_FRONTEND=noninteractive
4
- ENV PYTHONUNBUFFERED=1
5
-
6
- RUN apt-get update && apt-get install --no-install-recommends -y \
7
- build-essential \
8
- python3-pip \
9
- git \
10
- ffmpeg \
11
- libsm6 \
12
- libxext6 \
13
- curl \
14
- && curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash \
15
- && apt-get install --no-install-recommends -y git-lfs \
16
- && git lfs install \
17
- && apt-get clean && rm -rf /var/lib/apt/lists/*
18
-
19
- WORKDIR /code
20
-
21
- COPY ./requirements.txt /code/requirements.txt
22
-
23
- RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
24
-
25
- RUN mim install mmdet
26
- RUN mim install mmocr
27
- RUN mim install mmcv==2.0.1
28
- RUN mim install mmengine
29
-
30
-
31
- # Set up a new user named "user" with user ID 1000
32
- RUN useradd -m -u 1000 user
33
-
34
- # Switch to the "user" user
35
- USER user
36
-
37
- ENV HOME=/home/user \
38
- PATH=/home/user/.local/bin:$PATH \
39
- PYTHONPATH=$HOME/app \
40
- PYTHONUNBUFFERED=1 \
41
- GRADIO_ALLOW_FLAGGING=never \
42
- GRADIO_NUM_PORTS=1 \
43
- GRADIO_SERVER_NAME=0.0.0.0 \
44
- GRADIO_THEME=huggingface \
45
- SYSTEM=spaces \
46
- AM_I_IN_A_DOCKER_CONTAINER=Yes
47
-
48
- # Set the working directory to the user's home directory
49
- WORKDIR $HOME/app
50
-
51
- # Copy the current directory contents into the container at $HOME/app setting the owner to the user
52
- COPY --chown=user . $HOME/app
53
-
54
- CMD ["python3", "app.py"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LICENSE CHANGED
The diff for this file is too large to render. See raw diff
 
app.py DELETED
@@ -1,65 +0,0 @@
1
- import os
2
- SECRET_KEY = os.environ.get("HUB_TOKEN", False)
3
- if SECRET_KEY:
4
- from helper.utils import TrafficDataHandler
5
-
6
- import uuid
7
-
8
- import gradio as gr
9
-
10
- from helper.gradio_config import css, theme
11
- from helper.text.text_app import TextApp
12
-
13
- from tabs.htr_tool import htr_tool_tab
14
- from tabs.overview_tab import overview
15
- from tabs.stepwise_htr_tool import stepwise_htr_tool_tab
16
-
17
- session_uuid = str(uuid.uuid1())
18
-
19
- with gr.Blocks(title="Riksarkivet", theme=theme, css=css) as demo:
20
- with gr.Row():
21
- with gr.Column(scale=1):
22
- text_ip_output = gr.Markdown(TextApp.demo_version)
23
- with gr.Column(scale=2):
24
- gr.Markdown(TextApp.title_markdown)
25
- with gr.Column(scale=1):
26
- gr.Markdown(TextApp.title_markdown_img)
27
-
28
- with gr.Tabs():
29
- with gr.Tab("Fast track"):
30
- htr_tool_tab.render()
31
-
32
- with gr.Tab("Stepwise"):
33
- stepwise_htr_tool_tab.render()
34
-
35
- with gr.Tab("Overview"):
36
- overview.render()
37
-
38
- with gr.Tab("How to use"):
39
- with gr.Row():
40
- with gr.Column():
41
- gr.Markdown("## Fast track")
42
- gr.Video(
43
- value="https://github.com/Borg93/htr_gradio_file_placeholder/blob/main/eating_spaghetti.mp4",
44
- format="mp4",
45
- )
46
- with gr.Column():
47
- gr.Markdown("## Stepwise")
48
- gr.Video(
49
- "https://github.com/Borg93/htr_gradio_file_placeholder/blob/main/htr_tool_media_cut.mp4",
50
- format="mp4",
51
- )
52
-
53
- if SECRET_KEY:
54
- demo.load(
55
- fn=TrafficDataHandler.onload_store_metric_data,
56
- inputs=None,
57
- outputs=None,
58
- )
59
-
60
-
61
- demo.queue(concurrency_count=2, max_size=2)
62
-
63
-
64
- if __name__ == "__main__":
65
- demo.launch(server_name="0.0.0.0", server_port=7860, show_api=False, show_error=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
{helper β†’ app}/__init__.py RENAMED
File without changes
{helper/examples β†’ app/assets}/__init__.py RENAMED
File without changes
{helper/examples β†’ app/assets}/examples.py RENAMED
@@ -5,14 +5,7 @@ from PIL import Image
5
 
6
 
7
  class DemoImages:
8
- _instance = None
9
-
10
- def __new__(cls, *args, **kwargs):
11
- if not cls._instance:
12
- cls._instance = super(DemoImages, cls).__new__(cls, *args, **kwargs)
13
- return cls._instance
14
-
15
- def __init__(self, url="Riksarkivet/test_images_demo", cache_dir="./helper/examples/.cache_images"):
16
  if not hasattr(self, "images_datasets"):
17
  self.images_datasets = datasets.load_dataset(url, cache_dir=cache_dir, split="train")
18
  self.example_df = self.images_datasets.to_pandas()
@@ -20,30 +13,15 @@ class DemoImages:
20
 
21
  def convert_bytes_to_images(self):
22
  examples_list = []
23
- # For each row in the dataframe
24
  for index, row in self.example_df.iterrows():
25
  image_bytes = row["image"]["bytes"]
26
  image = Image.open(io.BytesIO(image_bytes))
27
 
28
- # Set the path to save the image
29
- path_to_image = f"./helper/examples/images/image_{index}.jpg"
30
-
31
- # Save the image
32
  image.save(path_to_image)
33
 
34
- # Get the description
35
  description = row["text"]
36
 
37
- # Append to the examples list
38
- examples_list.append([description, path_to_image])
39
 
40
  return examples_list
41
-
42
-
43
- if __name__ == "__main__":
44
- # test = DemoImages(cache_dir=".cache_images")
45
-
46
- # print(test.examples_list)
47
-
48
- images_datasets = datasets.load_dataset("Riksarkivet/test_images_demo", cache_dir="./helper/examples/.cache_images")
49
- print(images_datasets["train"]["image"][0])
 
5
 
6
 
7
  class DemoImages:
8
+ def __init__(self, url="Riksarkivet/test_images_demo", cache_dir=".app/assets/images/.cache_images"):
 
 
 
 
 
 
 
9
  if not hasattr(self, "images_datasets"):
10
  self.images_datasets = datasets.load_dataset(url, cache_dir=cache_dir, split="train")
11
  self.example_df = self.images_datasets.to_pandas()
 
13
 
14
  def convert_bytes_to_images(self):
15
  examples_list = []
 
16
  for index, row in self.example_df.iterrows():
17
  image_bytes = row["image"]["bytes"]
18
  image = Image.open(io.BytesIO(image_bytes))
19
 
20
+ path_to_image = f"./app/assets/images/image_{index}.jpg"
 
 
 
21
  image.save(path_to_image)
22
 
 
23
  description = row["text"]
24
 
25
+ examples_list.append([description, "Nested segmentation", path_to_image])
 
26
 
27
  return examples_list
 
 
 
 
 
 
 
 
 
{helper/examples β†’ app/assets}/images/.gitkeep RENAMED
File without changes
helper/text/__init__.py β†’ app/backend.py RENAMED
File without changes
app/gradio_config.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ theme = gr.themes.Default(
4
+ primary_hue="blue",
5
+ secondary_hue="blue",
6
+ neutral_hue="slate",
7
+ # font=[
8
+ # gr.themes.GoogleFont("Open Sans"),
9
+ # "ui-sans-serif",
10
+ # "system-ui",
11
+ # "sans-serif",
12
+ # ],
13
+ )
14
+
15
+ css = """
16
+ body > gradio-app > div > div > div.wrap.svelte-1rjryqp > footer > a {
17
+ display: none !important;
18
+ }
19
+ body > gradio-app > div > div > div.wrap.svelte-1rjryqp > footer > div {
20
+ display: none !important;
21
+ }
22
+
23
+ # .top-navbar .tab-container {justify-content: center;}
24
+ # .top-navbar .tab-container button {font-size:large !important;}
25
+ #langdropdown {width: 100px;}
26
+
27
+ #column-form .wrap {flex-direction: column; height:100vh;}
28
+
29
+ @media screen and (max-width: 1024px) {
30
+ #column-form .wrap {
31
+ flex-direction: column;
32
+ height: auto;
33
+ }
34
+ }
35
+
36
+ #htrflowouttab-button {opacity: 0; cursor:auto;}
37
+
38
+ """
app/main.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from app.gradio_config import css, theme
4
+ from app.tabs.adv_htrflow_tab import adv_htrflow_pipeline
5
+ from app.tabs.htrflow_tab import htrflow_pipeline
6
+ from app.tabs.overview_tab import overview
7
+ from app.texts_langs.text_app import TextApp
8
+
9
+ with gr.Blocks(title="HTRflow", theme=theme, css=css) as demo:
10
+ with gr.Row():
11
+ with gr.Column(scale=1):
12
+ radio = gr.Dropdown(
13
+ choices=["ENG", "SWE"], value="ENG", container=False, min_width=50, scale=0, elem_id="langdropdown"
14
+ )
15
+
16
+ with gr.Column(scale=2):
17
+ gr.Markdown(TextApp.title_markdown)
18
+ with gr.Column(scale=1):
19
+ gr.Markdown(TextApp.title_markdown_img)
20
+
21
+ with gr.Tabs(elem_classes="top-navbar") as navbar:
22
+ with gr.Tab("Home"):
23
+ overview.render()
24
+
25
+ with gr.Tab("Simple HTR"):
26
+ htrflow_pipeline.render()
27
+
28
+ with gr.Tab("Custom HTR"):
29
+ adv_htrflow_pipeline.render()
30
+
31
+ # radio.change(
32
+ # None,
33
+ # inputs=radio,
34
+ # js="""
35
+ # (data) => {
36
+ # window.localStorage.setItem('data', JSON.stringify(data))
37
+ # }
38
+ # """,
39
+ # )
40
+
41
+ demo.load(
42
+ None,
43
+ inputs=radio,
44
+ js="""
45
+ (data) => {
46
+ window.localStorage.setItem('data', JSON.stringify(data))
47
+ }
48
+ """,
49
+ )
50
+
51
+ demo.queue()
52
+
53
+ if __name__ == "__main__":
54
+ demo.launch(server_name="0.0.0.0", server_port=7860, enable_monitoring=False)
{src/htr_pipeline β†’ app/tabs}/__init__.py RENAMED
File without changes
app/tabs/adv_htrflow_tab.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ with gr.Blocks() as adv_htrflow_pipeline:
4
+ with gr.Row(variant="panel"):
5
+ with gr.Column():
6
+ gr.Markdown("<h2>Templates</h2>")
7
+ # TODO: We want to either crop or draw polygon or bbox and send it to the custom model. Or just as the image is.
8
+ # TODO: For the viewer we should be able to select from the output of the model what for values we want to
9
+ gr.ImageMask()
10
+
11
+ with gr.Group():
12
+ with gr.Row(visible=True) as yaml_pipeline:
13
+ custom_template_yaml = gr.Code(
14
+ value="""
15
+ steps:
16
+ - step: Segmentation
17
+ settings:
18
+ model: yolo
19
+ model_settings:
20
+ model: Riksarkivet/yolov9-lines-within-regions-1
21
+ - step: TextRecognition
22
+ settings:
23
+ model: TrOCR
24
+ model_settings:
25
+ model: Riksarkivet/trocr-base-handwritten-hist-swe-2
26
+ - step: OrderLines
27
+ - step: Export
28
+ settings:
29
+ format: txt
30
+ dest: outputs
31
+ """,
32
+ language="yaml",
33
+ label="yaml",
34
+ interactive=True,
35
+ )
36
+
37
+ with gr.Row():
38
+ gr.Button("Submit", variant="primary", scale=0)
39
+
40
+ with gr.Column():
41
+ gr.Markdown("<h2>Viewer</h2>")
42
+ with gr.Tabs():
43
+ with gr.Tab("HTR ouput"):
44
+ gr.CheckboxGroup(
45
+ ["Reading Order", "Line", "Region", "Word"],
46
+ info="Checkboxgroup should be basedon output structure from htrflow",
47
+ )
48
+
49
+ gr.Image()
50
+ with gr.Tab("Table"):
51
+ pass
52
+ with gr.Tab("Analysis"):
53
+ pass
app/tabs/htrflow_tab.py ADDED
@@ -0,0 +1,290 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+
4
+ from app.assets.examples import DemoImages
5
+
6
+ images_for_demo = DemoImages()
7
+
8
+
9
+ def toggle_visibility_default_templates(selected_option):
10
+ return [
11
+ gr.update(visible=(selected_option == "Simple layout")),
12
+ gr.update(visible=(selected_option == "Nested segmentation")),
13
+ gr.update(visible=(selected_option == "Tables")),
14
+ selected_option,
15
+ ]
16
+
17
+
18
+ def dummy_revealer(reveal_bool):
19
+ if reveal_bool == 0:
20
+ return gr.update(visible=False)
21
+ else:
22
+ return gr.update(visible=True)
23
+
24
+
25
+ def submit_button_pipeline_fn(method, input_image, yaml_str):
26
+ data = {"transcription": ["Example transcription"], "prediction score": [0.95]}
27
+
28
+ df = pd.DataFrame(data)
29
+
30
+ # HTRflow code here
31
+
32
+ serialized_files = (
33
+ "https://raw.githubusercontent.com/Borg93/htr_gradio_file_placeholder/refs/heads/main/app_project_line.png"
34
+ )
35
+
36
+ return f"Selected Option: {method}", input_image, serialized_files, gr.update(visible=True), df
37
+
38
+
39
+ def htr_image_placehholder(txt, method, image):
40
+ needs_yaml_to_forward_tohtrflow_ = """steps:
41
+ """
42
+ print(method)
43
+
44
+ return txt, method, image
45
+
46
+
47
+ def get_yaml_button_fn(
48
+ method,
49
+ output_formats,
50
+ simple_segment_model=None,
51
+ simple_htr_model=None,
52
+ simple_htr_model_type=None,
53
+ simple_segment_model_type=None,
54
+ nested_segment_model_1=None,
55
+ nested_segment_model_2=None,
56
+ nested_htr_model=None,
57
+ nested_segment_model_1_type=None,
58
+ nested_segment_model_2_type=None,
59
+ nested_htr_model_type=None,
60
+ ):
61
+ if method == "Simple layout":
62
+ yaml_value = f"""steps:
63
+ - step: Segmentation
64
+ settings:
65
+ model: {simple_htr_model_type}
66
+ model_settings:
67
+ model: {simple_segment_model}
68
+ - step: TextRecognition
69
+ settings:
70
+ model: {simple_segment_model_type}
71
+ model_settings:
72
+ model: {simple_htr_model}
73
+ - step: OrderLines
74
+ """
75
+ elif method == "Nested segmentation":
76
+ yaml_value = f"""steps:
77
+ - step: Segmentation
78
+ settings:
79
+ model: {nested_segment_model_1_type}
80
+ model_settings:
81
+ model: {nested_segment_model_1}
82
+ - step: Segmentation
83
+ settings:
84
+ model: {nested_segment_model_2_type}
85
+ model_settings:
86
+ model: {nested_segment_model_2}
87
+ - step: TextRecognition
88
+ settings:
89
+ model: {nested_htr_model_type}
90
+ model_settings:
91
+ model: {nested_htr_model}
92
+ - step: OrderLines
93
+ """
94
+ else:
95
+ return gr.Error("Invalid method or not yet supported.")
96
+
97
+ export_steps = ""
98
+ for output_format in output_formats:
99
+ export_steps += f""" - step: Export
100
+ settings:
101
+ format: {output_format}
102
+ dest: {output_format}-outputs
103
+ """
104
+
105
+ yaml_value += export_steps
106
+
107
+ return yaml_value
108
+
109
+
110
+ output_image_placehholder = gr.Image(label="Output image", height=500, show_share_button=True)
111
+ markdown_selected_option = gr.Markdown(container=True)
112
+
113
+ inital_state_selection_option = "Simple layout"
114
+
115
+ with gr.Blocks() as htrflow_pipeline:
116
+ with gr.Row(variant="panel"):
117
+ with gr.Column():
118
+ # gr.Markdown("<h2>Control Panel</h2>")
119
+ with gr.Tabs():
120
+ with gr.Tab("Templates"):
121
+ with gr.Group():
122
+ example_text_input_placeholder = gr.Markdown(visible=False, container=False)
123
+ example_method_input_placeholder = gr.Markdown(visible=False, container=False)
124
+ example_text_output_placeholder = gr.Markdown(visible=False, container=False)
125
+
126
+ selected_option = gr.State(inital_state_selection_option)
127
+ dummy_none = gr.State(0)
128
+
129
+ user_image_input = gr.Image(
130
+ interactive=True, sources=["upload", "clipboard"], label="Input image", height=300
131
+ )
132
+
133
+ template_method_radio = gr.Dropdown(
134
+ [inital_state_selection_option, "Nested segmentation", "Tables"],
135
+ value=inital_state_selection_option,
136
+ label="Select template",
137
+ info="Will add more templates later!",
138
+ )
139
+
140
+ with gr.Row() as simple_pipeline:
141
+ with gr.Column():
142
+ with gr.Row():
143
+ simple_segment_model = gr.Textbox(
144
+ "model1", label="Segmentation", info="Info about the Segmentation model"
145
+ )
146
+ simple_segment_model_type = gr.Dropdown(
147
+ choices=["yolo"], value="yolo", label="Segmentation", info="Model"
148
+ )
149
+ with gr.Row():
150
+ simple_htr_model = gr.Textbox(
151
+ "model1", label="HTR", info="Info about the HTR model"
152
+ )
153
+ simple_htr_model_type = gr.Dropdown(
154
+ choices=["TrOCR"], value="TrOCR", label="HTR", info="Model"
155
+ )
156
+
157
+ with gr.Row(visible=False) as nested_pipeline:
158
+ with gr.Column():
159
+ with gr.Row():
160
+ nested_segment_model_1 = gr.Textbox(
161
+ "model1", label="Segmentation", info="Info about the Segmentation model"
162
+ )
163
+ nested_segment_model_1_type = gr.Dropdown(
164
+ choices=["yolo"], value="yolo", label="Segmentation", info="Model"
165
+ )
166
+ with gr.Row():
167
+ nested_segment_model_2 = gr.Textbox(
168
+ "model2", label="Segmentation", info="Info about the Segmentation model"
169
+ )
170
+ nested_segment_model_2_type = gr.Dropdown(
171
+ choices=["yolo"], value="yolo", label="Segmentation", info="Model"
172
+ )
173
+ with gr.Row():
174
+ nested_htr_model = gr.Textbox(
175
+ "model1", label="HTR", info="Info about the HTR model"
176
+ )
177
+ nested_htr_model_type = gr.Dropdown(
178
+ choices=["TrOCR"], value="TrOCR", label="HTR", info="Model"
179
+ )
180
+
181
+ with gr.Row(visible=False) as table_pipeline:
182
+ with gr.Column():
183
+ gr.Textbox("WIP")
184
+ with gr.Row():
185
+ output_formats = gr.Dropdown(
186
+ choices=["txt", "alto", "page"],
187
+ value="txt",
188
+ multiselect=True,
189
+ label="Serialized Output",
190
+ info="Supported format are: ...",
191
+ )
192
+
193
+ with gr.Row():
194
+ submit_button_pipeline = gr.Button("Submit", variant="primary", scale=0)
195
+ get_yaml_button = gr.Button("Get Yaml", variant="secondary", scale=0)
196
+
197
+ with gr.Tab("Examples") as examples_tab:
198
+ # TODO: Perhaps we should move examples to a seperate tab for simplicity?
199
+ gr.Examples(
200
+ fn=htr_image_placehholder,
201
+ examples=images_for_demo.examples_list,
202
+ inputs=[
203
+ example_text_input_placeholder,
204
+ example_method_input_placeholder,
205
+ user_image_input,
206
+ ],
207
+ outputs=[example_text_output_placeholder, markdown_selected_option, output_image_placehholder],
208
+ cache_examples=True,
209
+ cache_mode="lazy",
210
+ label="Example images",
211
+ examples_per_page=7,
212
+ )
213
+
214
+ with gr.Column():
215
+ # gr.Markdown("<h2>Output Panel</h2>")
216
+ with gr.Tabs():
217
+ with gr.Tab("Viewer"): #interactive=False, elem_id="htrflowouttab"
218
+ with gr.Group():
219
+ with gr.Row():
220
+ output_image_placehholder.render()
221
+ with gr.Row():
222
+ markdown_selected_option.render()
223
+ with gr.Row():
224
+ output_dataframe_pipeline = gr.Textbox(label="Click text",info="click on image bla bla..")
225
+ with gr.Tab("Table") as htrflow_output_table_tab:
226
+ with gr.Group():
227
+ with gr.Row():
228
+ output_dataframe_pipeline = gr.Image(label="Output image", interactive=False, height="100")
229
+ with gr.Row():
230
+ output_dataframe_pipeline = gr.Dataframe(label="Output image", col_count=2)
231
+
232
+ output_files_pipeline = gr.Files(label="Output files", height=100, visible=False)
233
+ output_yaml_code = gr.Code(language="yaml", label="yaml", interactive=True, visible=False)
234
+
235
+ submit_button_pipeline.click(
236
+ get_yaml_button_fn,
237
+ inputs=[
238
+ template_method_radio,
239
+ output_formats,
240
+ simple_segment_model,
241
+ simple_htr_model,
242
+ simple_htr_model_type,
243
+ simple_segment_model_type,
244
+ nested_segment_model_1,
245
+ nested_segment_model_2,
246
+ nested_htr_model,
247
+ nested_segment_model_1_type,
248
+ nested_segment_model_2_type,
249
+ nested_htr_model_type,
250
+ ],
251
+ outputs=[output_yaml_code],
252
+ ).then(
253
+ submit_button_pipeline_fn,
254
+ inputs=[template_method_radio, user_image_input, output_yaml_code],
255
+ outputs=[
256
+ markdown_selected_option,
257
+ output_image_placehholder,
258
+ output_files_pipeline,
259
+ output_files_pipeline,
260
+ output_dataframe_pipeline,
261
+ ],
262
+ ).then(dummy_revealer, inputs=dummy_none, outputs=output_yaml_code)
263
+
264
+ get_yaml_button.click(
265
+ get_yaml_button_fn,
266
+ inputs=[
267
+ template_method_radio,
268
+ output_formats,
269
+ simple_segment_model,
270
+ simple_htr_model,
271
+ simple_htr_model_type,
272
+ simple_segment_model_type,
273
+ nested_segment_model_1,
274
+ nested_segment_model_2,
275
+ nested_htr_model,
276
+ nested_segment_model_1_type,
277
+ nested_segment_model_2_type,
278
+ nested_htr_model_type,
279
+ ],
280
+ outputs=[output_yaml_code],
281
+ ).then(dummy_revealer, inputs=output_yaml_code, outputs=output_yaml_code)
282
+
283
+ # TODO : hide the tab when selected for yaml code
284
+ # htrflow_output_table_tab.select(dummy_revealer, inputs=output_yaml_code, outputs=output_yaml_code)
285
+
286
+ template_method_radio.select(
287
+ lambda choice: toggle_visibility_default_templates(choice),
288
+ inputs=template_method_radio,
289
+ outputs=[simple_pipeline, nested_pipeline, table_pipeline, selected_option],
290
+ )
app/tabs/overview_tab.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from app.texts_langs.text_overview import TextOverview
4
+
5
+ default_value_radio_overview = "Home"
6
+ overview_choices_eng = [
7
+ "Home",
8
+ "About App",
9
+ "Guide",
10
+ "Model & Data",
11
+ "Contributions",
12
+ "Duplicate App",
13
+ "FAQ & Contact",
14
+ ]
15
+
16
+
17
+ def toggle_visibility(selected_option):
18
+ return [
19
+ gr.update(visible=(selected_option == "Home")),
20
+ gr.update(visible=(selected_option == "About App")),
21
+ gr.update(visible=(selected_option == "Guide")),
22
+ gr.update(visible=(selected_option == "Model & Data")),
23
+ gr.update(visible=(selected_option == "Contributions")),
24
+ gr.update(visible=(selected_option == "FAQ & Contact")),
25
+ gr.update(visible=(selected_option == "Duplicate App")),
26
+ ]
27
+
28
+
29
+ with gr.Blocks() as overview:
30
+ with gr.Row():
31
+
32
+ with gr.Column(visible=True, min_width=170, scale=0, variant="panel") as sidebar:
33
+ options_overview = gr.Radio(
34
+ overview_choices_eng,
35
+ label="Side Navigation",
36
+ container=False,
37
+ value=default_value_radio_overview,
38
+ elem_id="column-form",
39
+ min_width=100,
40
+ scale=0,
41
+ )
42
+
43
+ with gr.Column(variant="panel") as overview_main:
44
+ with gr.Row(visible=True) as overview_home:
45
+ with gr.Column():
46
+
47
+ gr.Markdown("## landing page to explain version")
48
+ gr.Markdown("## htrflow app 1.0.0")
49
+ gr.Markdown("## links to different stuff")
50
+ gr.Markdown("## Whats new..")
51
+
52
+ with gr.Row(visible=False) as overview_about:
53
+ with gr.Column():
54
+ gr.Markdown(TextOverview.htrflow_col1)
55
+ gr.Markdown(TextOverview.htrflow_col2)
56
+
57
+ with gr.Row(visible=False) as overview_guide:
58
+ with gr.Column():
59
+ with gr.Row():
60
+ with gr.Column():
61
+ gr.Markdown("## Fast track")
62
+ gr.Video(
63
+ value="https://github.com/Borg93/htr_gradio_file_placeholder/blob/main/eating_spaghetti.mp4",
64
+ format="mp4",
65
+ )
66
+ with gr.Row():
67
+ with gr.Column():
68
+ gr.Markdown("## Stepwise")
69
+ gr.Video(
70
+ "https://github.com/Borg93/htr_gradio_file_placeholder/blob/main/htr_tool_media_cut.mp4",
71
+ format="mp4",
72
+ )
73
+
74
+ with gr.Row(visible=False) as overview_model_data:
75
+ with gr.Column():
76
+ gr.Markdown(TextOverview.htrflow_row1)
77
+ with gr.Tabs():
78
+ with gr.Tab("Binarization"):
79
+ gr.Markdown(TextOverview.htrflow_tab1)
80
+ with gr.Tab("Region segmentation"):
81
+ gr.Markdown(TextOverview.htrflow_tab2)
82
+ with gr.Tab("Line segmentation"):
83
+ gr.Markdown(TextOverview.htrflow_tab3)
84
+ with gr.Tab("Text recognition"):
85
+ gr.Markdown(TextOverview.htrflow_tab4)
86
+
87
+ with gr.Row(visible=False) as overview_contribute:
88
+ with gr.Column():
89
+ gr.Markdown(TextOverview.contributions)
90
+ gr.Markdown(TextOverview.huminfra_image)
91
+
92
+ with gr.Row(visible=False) as overview_duplicate:
93
+ with gr.Column():
94
+ gr.Markdown(TextOverview.duplicate)
95
+
96
+ with gr.Column():
97
+ gr.Markdown(TextOverview.api1)
98
+ gr.Code(
99
+ value=TextOverview.api_code1,
100
+ language="python",
101
+ interactive=False,
102
+ show_label=False,
103
+ )
104
+
105
+ gr.Markdown(TextOverview.api2)
106
+
107
+ gr.Code(
108
+ value=TextOverview.api_code2,
109
+ language=None,
110
+ interactive=False,
111
+ show_label=False,
112
+ )
113
+
114
+ with gr.Row(visible=False) as overview_faq:
115
+ with gr.Column():
116
+ gr.Markdown(TextOverview.text_faq)
117
+ with gr.Column():
118
+ gr.Markdown(TextOverview.text_discussion)
119
+ with gr.Column(visible=True, min_width=0, scale=0) as empty:
120
+ pass
121
+
122
+ options_overview.change(
123
+ lambda choice: toggle_visibility(choice),
124
+ inputs=options_overview,
125
+ outputs=[
126
+ overview_home,
127
+ overview_about,
128
+ overview_guide,
129
+ overview_model_data,
130
+ overview_contribute,
131
+ overview_duplicate,
132
+ overview_faq,
133
+ ],
134
+ )
{src/htr_pipeline/utils β†’ app/texts_langs}/__init__.py RENAMED
File without changes
{helper/text β†’ app/texts_langs}/overview/changelog_roadmap/changelog.md RENAMED
File without changes
{helper/text β†’ app/texts_langs}/overview/changelog_roadmap/old_changelog.md RENAMED
File without changes
{helper/text β†’ app/texts_langs}/overview/changelog_roadmap/roadmap.md RENAMED
File without changes
{helper/text β†’ app/texts_langs}/overview/contributions/contributions.md RENAMED
File without changes
{helper/text β†’ app/texts_langs}/overview/contributions/huminfra_image.md RENAMED
File without changes
{helper/text β†’ app/texts_langs}/overview/contributions/riksarkivet_image.md RENAMED
File without changes
{helper/text β†’ app/texts_langs}/overview/duplicate_api/api1.md RENAMED
File without changes
{helper/text β†’ app/texts_langs}/overview/duplicate_api/api2.md RENAMED
File without changes
{helper/text β†’ app/texts_langs}/overview/duplicate_api/api_code1.md RENAMED
File without changes
{helper/text β†’ app/texts_langs}/overview/duplicate_api/api_code2.md RENAMED
File without changes
{helper/text β†’ app/texts_langs}/overview/duplicate_api/duplicate.md RENAMED
File without changes
{helper/text β†’ app/texts_langs}/overview/faq_discussion/discussion.md RENAMED
File without changes
{helper/text β†’ app/texts_langs}/overview/faq_discussion/faq.md RENAMED
File without changes
{helper/text β†’ app/texts_langs}/overview/htrflow/htrflow_col1.md RENAMED
File without changes
{helper/text β†’ app/texts_langs}/overview/htrflow/htrflow_col2.md RENAMED
File without changes
{helper/text β†’ app/texts_langs}/overview/htrflow/htrflow_row1.md RENAMED
File without changes
{helper/text β†’ app/texts_langs}/overview/htrflow/htrflow_tab1.md RENAMED
File without changes
{helper/text β†’ app/texts_langs}/overview/htrflow/htrflow_tab2.md RENAMED
File without changes
{helper/text β†’ app/texts_langs}/overview/htrflow/htrflow_tab3.md RENAMED
File without changes
{helper/text β†’ app/texts_langs}/overview/htrflow/htrflow_tab4.md RENAMED
File without changes
{helper/text β†’ app/texts_langs}/text_app.py RENAMED
@@ -1,19 +1,9 @@
1
  class TextApp:
2
- demo_version = """<em>Version 0.1.0</em>"""
3
-
4
  title_markdown = """
5
-
6
-
7
- <h1><center> HTRFLOW </center></h1>
8
-
9
- <p><center>Explore AI models for Handwritten Text Recogntion developed by the Swedish National Archives </center></p>"""
10
 
11
  title_markdown_img = """
12
  <a href="https://riksarkivet.se">
13
  <img src="https://raw.githubusercontent.com/Borg93/Riksarkivet_docs/main/docs/assets/fav-removebg-preview.png" width="17%" align="right" margin-right="100" />
14
  </a>
15
  """
16
-
17
-
18
- if __name__ == "__main__":
19
- pass
 
1
  class TextApp:
 
 
2
  title_markdown = """
3
+ <h1><center> HTRflow πŸ” App </center></h1>""" #
 
 
 
 
4
 
5
  title_markdown_img = """
6
  <a href="https://riksarkivet.se">
7
  <img src="https://raw.githubusercontent.com/Borg93/Riksarkivet_docs/main/docs/assets/fav-removebg-preview.png" width="17%" align="right" margin-right="100" />
8
  </a>
9
  """
 
 
 
 
app/texts_langs/text_overview.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def read_markdown(file_path: str) -> str:
2
+ with open(file_path, "r") as file:
3
+ content = file.read()
4
+
5
+ return f"""{content}"""
6
+
7
+
8
+ class TextOverview:
9
+ # HTRFLOW
10
+ htrflow_col1 = read_markdown("app/texts_langs/overview/htrflow/htrflow_col1.md")
11
+ htrflow_col2 = read_markdown("app/texts_langs/overview/htrflow/htrflow_col2.md")
12
+ htrflow_row1 = read_markdown("app/texts_langs/overview/htrflow/htrflow_row1.md")
13
+ htrflow_tab1 = read_markdown("app/texts_langs/overview/htrflow/htrflow_tab1.md")
14
+ htrflow_tab2 = read_markdown("app/texts_langs/overview/htrflow/htrflow_tab2.md")
15
+ htrflow_tab3 = read_markdown("app/texts_langs/overview/htrflow/htrflow_tab3.md")
16
+ htrflow_tab4 = read_markdown("app/texts_langs/overview/htrflow/htrflow_tab4.md")
17
+
18
+ # faq & discussion
19
+ text_faq = read_markdown("app/texts_langs/overview/faq_discussion/faq.md")
20
+ text_discussion = read_markdown("app/texts_langs/overview/faq_discussion/discussion.md")
21
+
22
+ # Contributions
23
+ contributions = read_markdown("app/texts_langs/overview/contributions/contributions.md")
24
+ huminfra_image = read_markdown("app/texts_langs/overview/contributions/huminfra_image.md")
25
+
26
+ # Changelog & Roadmap
27
+ changelog = read_markdown("app/texts_langs/overview/changelog_roadmap/changelog.md")
28
+ old_changelog = read_markdown("app/texts_langs/overview/changelog_roadmap/old_changelog.md")
29
+
30
+ roadmap = read_markdown("app/texts_langs/overview/changelog_roadmap/roadmap.md")
31
+
32
+ # duplicate & api
33
+ duplicate = read_markdown("app/texts_langs/overview/duplicate_api/duplicate.md")
34
+ api1 = read_markdown("app/texts_langs/overview/duplicate_api/api1.md")
35
+ api_code1 = read_markdown("app/texts_langs/overview/duplicate_api/api_code1.md")
36
+ api2 = read_markdown("app/texts_langs/overview/duplicate_api/api2.md")
37
+ api_code2 = read_markdown("app/texts_langs/overview/duplicate_api/api_code2.md")
helper/text/help/fasttrack/fast_track.md β†’ app/utils/yaml_helper.py RENAMED
File without changes
helper/examples/create_examples.py DELETED
@@ -1,87 +0,0 @@
1
- import os
2
- import tarfile
3
-
4
- import datasets
5
- import pandas as pd
6
-
7
- _CITATION = """\
8
- @InProceedings{huggingface:dataset,
9
- title = {Small htr examples images},
10
- author={Gabriel Borg},
11
- year={2023}
12
- }
13
- """
14
-
15
- _DESCRIPTION = """\
16
- Demo dataset for the htr demo.
17
- """
18
- _HOMEPAGE = "https://huggingface.co/datasets/Riksarkivet/test_images_demo"
19
-
20
- _LICENSE = ""
21
-
22
- _REPO = "https://huggingface.co/datasets/Riksarkivet/test_images_demo"
23
-
24
-
25
- class ExampleImages(datasets.GeneratorBasedBuilder):
26
- """Small sample of image-text pairs"""
27
-
28
- def _info(self):
29
- return datasets.DatasetInfo(
30
- description=_DESCRIPTION,
31
- features=datasets.Features(
32
- {
33
- "text": datasets.Value("string"),
34
- "image": datasets.Image(),
35
- }
36
- ),
37
- supervised_keys=None,
38
- homepage=_HOMEPAGE,
39
- citation=_CITATION,
40
- )
41
-
42
- def _split_generators(self, dl_manager):
43
- images_archive = dl_manager.download(f"{_REPO}/resolve/main/images.tar.gz")
44
- metadata_path = dl_manager.download(f"{_REPO}/resolve/main/images.txt")
45
- image_iters = dl_manager.iter_archive(images_archive)
46
- return [
47
- datasets.SplitGenerator(
48
- name=datasets.Split.TRAIN, gen_kwargs={"images": image_iters, "metadata_path": metadata_path}
49
- ),
50
- ]
51
-
52
- def _generate_examples(self, images, metadata_path):
53
- """Generate images and text."""
54
- with open(metadata_path, encoding="utf-8") as f:
55
- metadata_list = f.read().split("\n")
56
- for idx, (img_obj, meta_txt) in enumerate(zip(images, metadata_list)):
57
- filepath, image = img_obj
58
-
59
- text_value = meta_txt.split("= ")[-1].strip()
60
-
61
- yield idx, {
62
- "image": {"path": filepath, "bytes": image.read()},
63
- "text": text_value,
64
- }
65
-
66
-
67
- def txt_to_csv(file_name):
68
- text_file_path = f"{file_name}.txt"
69
- df = pd.read_csv(text_file_path, delimiter="=", header=None, names=["Key", "Label"], encoding="utf-8")
70
- df["Key"] = df["Key"].str.strip()
71
- df["Label"] = df["Label"].str.strip()
72
- print(df)
73
- df.to_csv(f"{file_name}.csv", index=False)
74
-
75
-
76
- def sort_and_compress_images(images_folder, tar_file):
77
- sorted_images = sorted(os.listdir(images_folder))
78
- with tarfile.open(tar_file, "w:gz") as tar:
79
- for image_name in sorted_images:
80
- image_path = os.path.join(images_folder, image_name)
81
- tar.add(image_path, arcname=image_name)
82
- print("Images sorted and compressed into tar.gz archive.")
83
-
84
-
85
- if __name__ == "__main__":
86
- txt_to_csv("info")
87
- sort_and_compress_images("images", "sorted_images.tar.gz")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
helper/gradio_config.py DELETED
@@ -1,139 +0,0 @@
1
- import gradio as gr
2
-
3
-
4
- class GradioConfig:
5
- def __init__(self, tooltip_dict):
6
- self.tooltip_dict = tooltip_dict
7
- self.theme = gr.themes.Base(
8
- primary_hue="blue",
9
- secondary_hue="blue",
10
- neutral_hue="slate",
11
- font=[
12
- gr.themes.GoogleFont("Open Sans"),
13
- "ui-sans-serif",
14
- "system-ui",
15
- "sans-serif",
16
- ],
17
- )
18
- self.css = """
19
- footer {display: none !important;}
20
- #image_upload {min-height:450}
21
- #image_upload [data-testid="image"], #image_upload [data-testid="image"] > div{min-height: 450px}
22
- #gallery {height: 400px}
23
- .fixed-height.svelte-g4rw9.svelte-g4rw9 {min-height: 400px;}
24
-
25
- #download_file > div.empty.svelte-lk9eg8.large.unpadded_box {min-height: 100px;}
26
- #gallery_lines > div.preview.svelte-1b19cri > div.thumbnails.scroll-hide.svelte-1b19cri {display: none;}
27
-
28
- .tr-head.svelte-13hsdno>.svelte-13hsdno+.svelte-13hsdno {display: none;}
29
- """
30
-
31
- def generate_tooltip_css(self):
32
- temp_css_list = [self.css]
33
- for button_id, tooltip_text in self.tooltip_dict.items():
34
- temp_css_list.append(self.template_tooltip_css(button_id, tooltip_text))
35
-
36
- return "\n".join(temp_css_list)
37
-
38
- def template_tooltip_css(self, button_id, tooltip_text):
39
- return f"""
40
- /* For tooltip */
41
- #{button_id} {{
42
- position: relative;
43
- }}
44
-
45
- #{button_id}::before {{
46
- visibility: hidden;
47
- content: '';
48
- position: absolute;
49
- bottom: 100%; /* Position on top of the parent element */
50
- left: 50%;
51
- margin-left: 5px; /* Adjust for the desired space between the button and tooltip */
52
- transform: translateY(-50%);
53
- border-width: 7px;
54
- border-style: solid;
55
- border-color: rgba(51, 51, 51, 0) transparent transparent rgba(51, 51, 51, 0);
56
- transition: opacity 0.4s ease-in-out, border-color 0.4s ease-in-out;
57
- opacity: 0;
58
- z-index: 999;
59
- }}
60
-
61
- #{button_id}::after {{
62
- visibility: hidden;
63
- content: '{tooltip_text}';
64
- position: absolute;
65
- bottom: 100%; /* Position on top of the parent element */
66
- left: 42%;
67
- background-color: rgba(51, 51, 51, 0);
68
- color: white;
69
- padding: 5px;
70
- border-radius: 3px;
71
- z-index: 998;
72
- opacity: 0;
73
- transition: opacity 0.4s ease-in-out, background-color 0.4s ease-in-out;
74
- margin-bottom: 20px !important; /* Increased from 18px to 23px to move tooltip 5px upwards */
75
- margin-left: 0px; /* Adjust for the arrow width and the desired space between the arrow and tooltip */
76
- white-space: normal; /* Allows the text to wrap */
77
- width: 200px; /* Maximum line length before wrapping */
78
- box-sizing: border-box;
79
- }}
80
-
81
- #{button_id}.showTooltip::before {{
82
- visibility: visible;
83
- opacity: 1;
84
- border-color: rgba(51, 51, 51, 0.7) transparent transparent rgba(51, 51, 51, 0.7);
85
- }}
86
-
87
- #{button_id}.showTooltip::after {{
88
- visibility: visible;
89
- opacity: 1;
90
- background-color: rgba(51, 51, 51, 0.7);
91
- }}
92
- """
93
-
94
- def add_interaction_to_buttons(self):
95
- button_ids_list = ", ".join([f"'#{id}'" for id, _ in self.tooltip_dict.items()])
96
- button_ids = button_ids_list.replace("'", "")
97
- return f"""
98
- function monitorButtonHover() {{
99
-
100
- const buttons = document.querySelectorAll('{button_ids}');
101
- buttons.forEach(function(button) {{
102
- button.addEventListener('mouseenter', function() {{
103
- this.classList.add('showTooltip');
104
- }});
105
-
106
- button.addEventListener('mouseleave', function() {{
107
- this.classList.remove('showTooltip');
108
- }});
109
- }})
110
- }}
111
- """
112
-
113
- # gradioURL = window.location.href
114
- # if (!gradioURL.endsWith('?__theme=dark')) {{
115
- # window.location.replace(gradioURL + '?__theme=dark');
116
- # }}
117
-
118
-
119
- buttons_with_tooltip = {
120
- "run_pipeline_button": "Runs HTR on the image. Takes approx 1-2 mins per image (depending on hardware).",
121
- "clear_button": "Clears all states and resets the entire workflow in the stepwise tool.",
122
- "region_segment_button": "Segments text regions in the chosen image with the chosen settings.",
123
- "line_segment_button": "Segments chosen regions from the image gallery into lines segments.",
124
- "transcribe_button": "Transcribes each line segment into text and streams back the data.",
125
- }
126
- gradio_config = GradioConfig(buttons_with_tooltip)
127
-
128
- theme = gradio_config.theme
129
- css = gradio_config.generate_tooltip_css()
130
- js = gradio_config.add_interaction_to_buttons()
131
-
132
-
133
- if __name__ == "__main__":
134
- tooltip = GradioConfig({"run_pipeline_button": "this is a tooltop", "clear_button": "this is a tooltop"})
135
- css = tooltip.generate_tooltip_css()
136
- js = tooltip.add_interaction_to_buttons()
137
-
138
- print(css)
139
- print(js)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
helper/text/docs_strucutre.md DELETED
@@ -1,20 +0,0 @@
1
- ## Instructions for documentation
2
-
3
- - Naming convention of folder is based on tab
4
- - Naming convention of file is based on subtabs
5
- - If subtab uses columns and rows
6
- - Use suffix such as col1, row1 or tab1, to indicate differences in postion of text.
7
-
8
- see image below:
9
-
10
- <p align="center">
11
- <img src="https://github.com/Borg93/htr_gradio_file_placeholder/blob/main/layout_structure.png?raw=true" alt="Badge 1">
12
- </p>
13
-
14
- ## Assets and file sharing with app
15
-
16
- This repo acts as asset manager for the app:
17
-
18
- - [Github Repo](https://github.com/Borg93/htr_gradio_file_placeholder)
19
-
20
- **Note**: this repo is an work in progress
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
helper/text/help/stepwise/stepwise.md DELETED
File without changes
helper/text/markdown_reader.py DELETED
@@ -1,5 +0,0 @@
1
- def read_markdown(file_path: str) -> str:
2
- with open(file_path, "r") as file:
3
- content = file.read()
4
-
5
- return f"""{content}"""
 
 
 
 
 
 
helper/text/text_overview.py DELETED
@@ -1,37 +0,0 @@
1
- from helper.text.markdown_reader import read_markdown
2
-
3
-
4
- class TextOverview:
5
- # HTRFLOW
6
- htrflow_col1 = read_markdown("helper/text/overview/htrflow/htrflow_col1.md")
7
- htrflow_col2 = read_markdown("helper/text/overview/htrflow/htrflow_col2.md")
8
- htrflow_row1 = read_markdown("helper/text/overview/htrflow/htrflow_row1.md")
9
- htrflow_tab1 = read_markdown("helper/text/overview/htrflow/htrflow_tab1.md")
10
- htrflow_tab2 = read_markdown("helper/text/overview/htrflow/htrflow_tab2.md")
11
- htrflow_tab3 = read_markdown("helper/text/overview/htrflow/htrflow_tab3.md")
12
- htrflow_tab4 = read_markdown("helper/text/overview/htrflow/htrflow_tab4.md")
13
-
14
- # faq & discussion
15
- text_faq = read_markdown("helper/text/overview/faq_discussion/faq.md")
16
- text_discussion = read_markdown("helper/text/overview/faq_discussion/discussion.md")
17
-
18
- # Contributions
19
- contributions = read_markdown("helper/text/overview/contributions/contributions.md")
20
- huminfra_image = read_markdown("helper/text/overview/contributions/huminfra_image.md")
21
-
22
- # Changelog & Roadmap
23
- changelog = read_markdown("helper/text/overview/changelog_roadmap/changelog.md")
24
- old_changelog = read_markdown("helper/text/overview/changelog_roadmap/old_changelog.md")
25
-
26
- roadmap = read_markdown("helper/text/overview/changelog_roadmap/roadmap.md")
27
-
28
- # duplicate & api
29
- duplicate = read_markdown("helper/text/overview/duplicate_api/duplicate.md")
30
- api1 = read_markdown("helper/text/overview/duplicate_api/api1.md")
31
- api_code1 = read_markdown("helper/text/overview/duplicate_api/api_code1.md")
32
- api2 = read_markdown("helper/text/overview/duplicate_api/api2.md")
33
- api_code2 = read_markdown("helper/text/overview/duplicate_api/api_code2.md")
34
-
35
-
36
- if __name__ == "__main__":
37
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
helper/utils.py DELETED
@@ -1,97 +0,0 @@
1
- import hashlib
2
- import os
3
- import shutil
4
- import sqlite3
5
- import uuid
6
- from datetime import datetime
7
-
8
- import gradio as gr
9
- import huggingface_hub
10
- import pandas as pd
11
- import pytz
12
- from apscheduler.schedulers.background import BackgroundScheduler
13
-
14
-
15
- class TrafficDataHandler:
16
- _DB_FILE_PATH = "./traffic_data.db"
17
- _DB_TEMP_PATH = "./data/traffic_data.db"
18
- _TOKEN = os.environ.get("HUB_TOKEN")
19
- _TZ = "Europe/Stockholm"
20
- _INTERVAL_MIN_UPDATE = 30
21
- _repo = huggingface_hub.Repository(
22
- local_dir="data", repo_type="dataset", clone_from="Riksarkivet/traffic_demo_data", use_auth_token=_TOKEN
23
- )
24
- _session_uuid = None
25
-
26
- @classmethod
27
- def _pull_repo_data(cls):
28
- cls._repo.git_pull()
29
- shutil.copyfile(cls._DB_TEMP_PATH, cls._DB_FILE_PATH)
30
-
31
- @staticmethod
32
- def _hash_ip(ip_address):
33
- return hashlib.sha256(ip_address.encode()).hexdigest()
34
-
35
- @classmethod
36
- def _current_time_in_sweden(cls):
37
- swedish_tz = pytz.timezone(cls._TZ)
38
- return datetime.now(swedish_tz).strftime("%Y-%m-%d %H:%M:%S")
39
-
40
- @classmethod
41
- def onload_store_metric_data(cls, request: gr.Request):
42
- cls._session_uuid = str(uuid.uuid1())
43
- cls._setup_database()
44
- hashed_host = cls._hash_ip(request.client.host)
45
- cls._backup_and_update_database(hashed_host, "load")
46
-
47
- @classmethod
48
- def store_metric_data(cls, action, request: gr.Request):
49
- hashed_host = cls._hash_ip(request.client.host)
50
- cls._backup_and_update_database(hashed_host, action)
51
-
52
- @classmethod
53
- def _commit_host_to_database(cls, hashed_host, action):
54
- with sqlite3.connect(cls._DB_FILE_PATH) as db:
55
- db.execute(
56
- "INSERT INTO ip_data(current_time, hashed_ip, session_uuid, action) VALUES(?,?,?,?)",
57
- [cls._current_time_in_sweden(), hashed_host, cls._session_uuid, action],
58
- )
59
-
60
- @classmethod
61
- def _setup_database(cls):
62
- with sqlite3.connect(cls._DB_FILE_PATH) as db:
63
- try:
64
- db.execute("SELECT * FROM ip_data").fetchall()
65
- except sqlite3.OperationalError:
66
- db.execute(
67
- """
68
- CREATE TABLE ip_data (id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
69
- current_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
70
- hashed_ip TEXT,
71
- session_uuid TEXT,
72
- action TEXT)
73
- """
74
- )
75
- cls._pull_repo_data()
76
-
77
- @classmethod
78
- def _backup_and_update_database(cls, hashed_host, action):
79
- cls._commit_host_to_database(hashed_host, action)
80
- shutil.copyfile(cls._DB_FILE_PATH, cls._DB_TEMP_PATH)
81
-
82
- with sqlite3.connect(cls._DB_FILE_PATH) as db:
83
- ip_data = db.execute("SELECT * FROM ip_data").fetchall()
84
- pd.DataFrame(ip_data, columns=["id", "current_time", "hashed_ip", "session_uuid", "action"]).to_csv(
85
- "./data/ip_data.csv", index=False
86
- )
87
-
88
- cls._repo.push_to_hub(blocking=False, commit_message=f"Updating data at {datetime.now()}")
89
-
90
- @classmethod
91
- def _initialize_and_schedule_backup(cls, hashed_host, action):
92
- cls._backup_and_update_database(hashed_host, action)
93
- scheduler = BackgroundScheduler()
94
- scheduler.add_job(
95
- cls._backup_and_update_database, "interval", minutes=cls._INTERVAL_MIN_UPDATE, args=(hashed_host, action)
96
- )
97
- scheduler.start()