Spaces:

Riksarkivet
/

htr_demo

Sleeping

App Files Files Community

Gabriel commited on Dec 20, 2024

Commit

21c87da

1 Parent(s): 2d4e52a

htrflow app 1.0.0

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.dockerignore → .docker/.dockerignore +5 -5
.docker/dockerfile +10 -0
.gitattributes +0 -35
.github/README.md +27 -0
.gitignore +3 -0
Dockerfile +0 -54
LICENSE +0 -0
app.py +0 -65
{helper → app}/__init__.py +0 -0
{helper/examples → app/assets}/__init__.py +0 -0
{helper/examples → app/assets}/examples.py +3 -25
{helper/examples → app/assets}/images/.gitkeep +0 -0
helper/text/__init__.py → app/backend.py +0 -0
app/gradio_config.py +38 -0
app/main.py +54 -0
{src/htr_pipeline → app/tabs}/__init__.py +0 -0
app/tabs/adv_htrflow_tab.py +53 -0
app/tabs/htrflow_tab.py +290 -0
app/tabs/overview_tab.py +134 -0
{src/htr_pipeline/utils → app/texts_langs}/__init__.py +0 -0
{helper/text → app/texts_langs}/overview/changelog_roadmap/changelog.md +0 -0
{helper/text → app/texts_langs}/overview/changelog_roadmap/old_changelog.md +0 -0
{helper/text → app/texts_langs}/overview/changelog_roadmap/roadmap.md +0 -0
{helper/text → app/texts_langs}/overview/contributions/contributions.md +0 -0
{helper/text → app/texts_langs}/overview/contributions/huminfra_image.md +0 -0
{helper/text → app/texts_langs}/overview/contributions/riksarkivet_image.md +0 -0
{helper/text → app/texts_langs}/overview/duplicate_api/api1.md +0 -0
{helper/text → app/texts_langs}/overview/duplicate_api/api2.md +0 -0
{helper/text → app/texts_langs}/overview/duplicate_api/api_code1.md +0 -0
{helper/text → app/texts_langs}/overview/duplicate_api/api_code2.md +0 -0
{helper/text → app/texts_langs}/overview/duplicate_api/duplicate.md +0 -0
{helper/text → app/texts_langs}/overview/faq_discussion/discussion.md +0 -0
{helper/text → app/texts_langs}/overview/faq_discussion/faq.md +0 -0
{helper/text → app/texts_langs}/overview/htrflow/htrflow_col1.md +0 -0
{helper/text → app/texts_langs}/overview/htrflow/htrflow_col2.md +0 -0
{helper/text → app/texts_langs}/overview/htrflow/htrflow_row1.md +0 -0
{helper/text → app/texts_langs}/overview/htrflow/htrflow_tab1.md +0 -0
{helper/text → app/texts_langs}/overview/htrflow/htrflow_tab2.md +0 -0
{helper/text → app/texts_langs}/overview/htrflow/htrflow_tab3.md +0 -0
{helper/text → app/texts_langs}/overview/htrflow/htrflow_tab4.md +0 -0
{helper/text → app/texts_langs}/text_app.py +1 -11
app/texts_langs/text_overview.py +37 -0
helper/text/help/fasttrack/fast_track.md → app/utils/yaml_helper.py +0 -0
helper/examples/create_examples.py +0 -87
helper/gradio_config.py +0 -139
helper/text/docs_strucutre.md +0 -20
helper/text/help/stepwise/stepwise.md +0 -0
helper/text/markdown_reader.py +0 -5
helper/text/text_overview.py +0 -37
helper/utils.py +0 -97

.dockerignore → .docker/.dockerignore RENAMED Viewed

@@ -1,11 +1,11 @@
 # .github/
-__pycache__
-*.pyc
-*.pyo
-*.pyd
 .Python
 env
 .env
 page_txt.txt
 page_xml.xml
-src/tests/

 # .github/
+**pycache**
+_.pyc
+_.pyo
+\*.pyd
 .Python
 env
 .env
 page_txt.txt
 page_xml.xml

.docker/dockerfile ADDED Viewed

	@@ -0,0 +1,10 @@

+FROM python:3.12-slim-bullseye
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv
+COPY . /app
+WORKDIR /app
+RUN uv sync --frozen --no-cache
+CMD ["/app/.venv/bin/fastapi", "run", "app/main.py", "--port", "80"]

.gitattributes DELETED Viewed

@@ -1,35 +0,0 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text
-helper/text/videos/eating_spaghetti.mp4 filter=lfs diff=lfs merge=lfs -text

.github/README.md CHANGED Viewed

@@ -1,3 +1,7 @@
 # htrflow_app: A demo app for htrflow
 We're thrilled to introduce [htrflow](https://huggingface.co/spaces/Riksarkivet/htr_demo), our demonstration platform that brings to life the process of transcribing Swedish handwritten documents from the 17th to the 19th century.
@@ -66,3 +70,26 @@ You can also just run it from Hugging Face:
 docker run -it -p 7860:7860 --platform=linux/amd64 --gpus all \
 	-e registry.hf.space/riksarkivet-htr-demo:latest
 ```

+# WORK IN PROGRESS
+> :warning: **Dont use yet !**
 # htrflow_app: A demo app for htrflow
 We're thrilled to introduce [htrflow](https://huggingface.co/spaces/Riksarkivet/htr_demo), our demonstration platform that brings to life the process of transcribing Swedish handwritten documents from the 17th to the 19th century.
 docker run -it -p 7860:7860 --platform=linux/amd64 --gpus all \
 	-e registry.hf.space/riksarkivet-htr-demo:latest
 ```
+---
+## Instructions for documentation
+- Naming convention of folder is based on tab
+- Naming convention of file is based on subtabs
+  - If subtab uses columns and rows
+    - Use suffix such as col1, row1 or tab1, to indicate differences in postion of text.
+see image below:
+<p align="center">
+        <img src="https://github.com/Borg93/htr_gradio_file_placeholder/blob/main/layout_structure.png?raw=true" alt="Badge 1">
+</p>
+## Assets and file sharing with app
+This repo acts as asset manager for the app:
+- [Github Repo](https://github.com/Borg93/htr_gradio_file_placeholder)
+**Note**: this repo is an work in progress

.gitignore CHANGED Viewed

@@ -16,6 +16,8 @@ helper/examples/images/*.jpg
 flagged_data_points/
 src/htr_pipeline.egg-info/
 #
 page_xml.xml
 page_txt.txt
@@ -34,3 +36,4 @@ mlruns/
 #models
 models--Riksarkivet--HTR_pipeline_models/

 flagged_data_points/
 src/htr_pipeline.egg-info/
+.gradio/
 #
 page_xml.xml
 page_txt.txt
 #models
 models--Riksarkivet--HTR_pipeline_models/
+app/assets/images/*.jpg

Dockerfile DELETED Viewed

@@ -1,54 +0,0 @@
-FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04
-ARG DEBIAN_FRONTEND=noninteractive
-ENV PYTHONUNBUFFERED=1
-RUN apt-get update && apt-get install --no-install-recommends -y \
-    build-essential \
-    python3-pip \
-    git \
-    ffmpeg \
-    libsm6 \
-    libxext6 \
-    curl \
-    && curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash \
-    && apt-get install --no-install-recommends -y git-lfs \
-    && git lfs install \
-    && apt-get clean && rm -rf /var/lib/apt/lists/*
-WORKDIR /code
-COPY ./requirements.txt /code/requirements.txt
-RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
-RUN mim install mmdet
-RUN mim install mmocr
-RUN mim install mmcv==2.0.1
-RUN mim install mmengine
-# Set up a new user named "user" with user ID 1000
-RUN useradd -m -u 1000 user
-# Switch to the "user" user
-USER user
-ENV HOME=/home/user \
-    PATH=/home/user/.local/bin:$PATH \
-    PYTHONPATH=$HOME/app \
-    PYTHONUNBUFFERED=1 \
-    GRADIO_ALLOW_FLAGGING=never \
-    GRADIO_NUM_PORTS=1 \
-    GRADIO_SERVER_NAME=0.0.0.0 \
-    GRADIO_THEME=huggingface \
-    SYSTEM=spaces \
-    AM_I_IN_A_DOCKER_CONTAINER=Yes
-# Set the working directory to the user's home directory
-WORKDIR $HOME/app
-# Copy the current directory contents into the container at $HOME/app setting the owner to the user
-COPY --chown=user . $HOME/app
-CMD ["python3", "app.py"]

LICENSE CHANGED Viewed

The diff for this file is too large to render. See raw diff

app.py DELETED Viewed

@@ -1,65 +0,0 @@
-import os
-SECRET_KEY = os.environ.get("HUB_TOKEN", False)
-if SECRET_KEY:
-    from helper.utils import TrafficDataHandler
-import uuid
-import gradio as gr
-from helper.gradio_config import css, theme
-from helper.text.text_app import TextApp
-from tabs.htr_tool import htr_tool_tab
-from tabs.overview_tab import overview
-from tabs.stepwise_htr_tool import stepwise_htr_tool_tab
-session_uuid = str(uuid.uuid1())
-with gr.Blocks(title="Riksarkivet", theme=theme, css=css) as demo:
-    with gr.Row():
-        with gr.Column(scale=1):
-            text_ip_output = gr.Markdown(TextApp.demo_version)
-        with gr.Column(scale=2):
-            gr.Markdown(TextApp.title_markdown)
-        with gr.Column(scale=1):
-            gr.Markdown(TextApp.title_markdown_img)
-    with gr.Tabs():
-        with gr.Tab("Fast track"):
-            htr_tool_tab.render()
-        with gr.Tab("Stepwise"):
-            stepwise_htr_tool_tab.render()
-        with gr.Tab("Overview"):
-            overview.render()
-        with gr.Tab("How to use"):
-            with gr.Row():
-                with gr.Column():
-                    gr.Markdown("## Fast track")
-                    gr.Video(
-                        value="https://github.com/Borg93/htr_gradio_file_placeholder/blob/main/eating_spaghetti.mp4",
-                        format="mp4",
-                    )
-                with gr.Column():
-                    gr.Markdown("## Stepwise")
-                    gr.Video(
-                        "https://github.com/Borg93/htr_gradio_file_placeholder/blob/main/htr_tool_media_cut.mp4",
-                        format="mp4",
-                    )
-    if SECRET_KEY:
-        demo.load(
-            fn=TrafficDataHandler.onload_store_metric_data,
-            inputs=None,
-            outputs=None,
-        )
-demo.queue(concurrency_count=2, max_size=2)
-if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860, show_api=False, show_error=False)

{helper → app}/__init__.py RENAMED Viewed

File without changes

{helper/examples → app/assets}/__init__.py RENAMED Viewed

File without changes

{helper/examples → app/assets}/examples.py RENAMED Viewed

@@ -5,14 +5,7 @@ from PIL import Image
 class DemoImages:
-    _instance = None
-    def __new__(cls, *args, **kwargs):
-        if not cls._instance:
-            cls._instance = super(DemoImages, cls).__new__(cls, *args, **kwargs)
-        return cls._instance
-    def __init__(self, url="Riksarkivet/test_images_demo", cache_dir="./helper/examples/.cache_images"):
         if not hasattr(self, "images_datasets"):
             self.images_datasets = datasets.load_dataset(url, cache_dir=cache_dir, split="train")
             self.example_df = self.images_datasets.to_pandas()
@@ -20,30 +13,15 @@ class DemoImages:
     def convert_bytes_to_images(self):
         examples_list = []
-        # For each row in the dataframe
         for index, row in self.example_df.iterrows():
             image_bytes = row["image"]["bytes"]
             image = Image.open(io.BytesIO(image_bytes))
-            # Set the path to save the image
-            path_to_image = f"./helper/examples/images/image_{index}.jpg"
-            # Save the image
             image.save(path_to_image)
-            # Get the description
             description = row["text"]
-            # Append to the examples list
-            examples_list.append([description, path_to_image])
         return examples_list
-if __name__ == "__main__":
-    # test = DemoImages(cache_dir=".cache_images")
-    # print(test.examples_list)
-    images_datasets = datasets.load_dataset("Riksarkivet/test_images_demo", cache_dir="./helper/examples/.cache_images")
-    print(images_datasets["train"]["image"][0])

 class DemoImages:
+    def __init__(self, url="Riksarkivet/test_images_demo", cache_dir=".app/assets/images/.cache_images"):
         if not hasattr(self, "images_datasets"):
             self.images_datasets = datasets.load_dataset(url, cache_dir=cache_dir, split="train")
             self.example_df = self.images_datasets.to_pandas()
     def convert_bytes_to_images(self):
         examples_list = []
         for index, row in self.example_df.iterrows():
             image_bytes = row["image"]["bytes"]
             image = Image.open(io.BytesIO(image_bytes))
+            path_to_image = f"./app/assets/images/image_{index}.jpg"
             image.save(path_to_image)
             description = row["text"]
+            examples_list.append([description, "Nested segmentation", path_to_image])
         return examples_list

{helper/examples → app/assets}/images/.gitkeep RENAMED Viewed

File without changes

helper/text/__init__.py → app/backend.py RENAMED Viewed

File without changes

app/gradio_config.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import gradio as gr
+theme = gr.themes.Default(
+    primary_hue="blue",
+    secondary_hue="blue",
+    neutral_hue="slate",
+    # font=[
+    #     gr.themes.GoogleFont("Open Sans"),
+    #     "ui-sans-serif",
+    #     "system-ui",
+    #     "sans-serif",
+    # ],
+)
+css = """
+body > gradio-app > div > div > div.wrap.svelte-1rjryqp > footer > a {
+    display: none !important;
+}
+body > gradio-app > div > div > div.wrap.svelte-1rjryqp > footer > div {
+    display: none !important;
+}
+# .top-navbar .tab-container {justify-content: center;}
+# .top-navbar .tab-container button {font-size:large !important;}
+#langdropdown {width: 100px;}
+#column-form .wrap {flex-direction: column; height:100vh;}
+@media screen and (max-width: 1024px) {
+    #column-form .wrap {
+        flex-direction: column;
+        height: auto;
+    }
+}
+#htrflowouttab-button {opacity: 0; cursor:auto;}
+"""

app/main.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import gradio as gr
+from app.gradio_config import css, theme
+from app.tabs.adv_htrflow_tab import adv_htrflow_pipeline
+from app.tabs.htrflow_tab import htrflow_pipeline
+from app.tabs.overview_tab import overview
+from app.texts_langs.text_app import TextApp
+with gr.Blocks(title="HTRflow", theme=theme, css=css) as demo:
+    with gr.Row():
+        with gr.Column(scale=1):
+            radio = gr.Dropdown(
+                choices=["ENG", "SWE"], value="ENG", container=False, min_width=50, scale=0, elem_id="langdropdown"
+            )
+        with gr.Column(scale=2):
+            gr.Markdown(TextApp.title_markdown)
+        with gr.Column(scale=1):
+            gr.Markdown(TextApp.title_markdown_img)
+    with gr.Tabs(elem_classes="top-navbar") as navbar:
+        with gr.Tab("Home"):
+            overview.render()
+        with gr.Tab("Simple HTR"):
+            htrflow_pipeline.render()
+        with gr.Tab("Custom HTR"):
+            adv_htrflow_pipeline.render()
+    # radio.change(
+    #     None,
+    #     inputs=radio,
+    #     js="""
+    #     (data) => {
+    #     window.localStorage.setItem('data', JSON.stringify(data))
+    #     }
+    #     """,
+    # )
+    demo.load(
+        None,
+        inputs=radio,
+        js="""
+        (data) => {
+        window.localStorage.setItem('data', JSON.stringify(data))
+        }
+        """,
+    )
+demo.queue()
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860, enable_monitoring=False)

{src/htr_pipeline → app/tabs}/__init__.py RENAMED Viewed

File without changes

app/tabs/adv_htrflow_tab.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import gradio as gr
+with gr.Blocks() as adv_htrflow_pipeline:
+    with gr.Row(variant="panel"):
+        with gr.Column():
+            gr.Markdown("<h2>Templates</h2>")
+            # TODO: We want to either crop or draw polygon or bbox and send it to the custom model. Or just as the image is.
+            # TODO: For the viewer we should be able to select from the output of the model what for values we want to
+            gr.ImageMask()
+            with gr.Group():
+                with gr.Row(visible=True) as yaml_pipeline:
+                    custom_template_yaml = gr.Code(
+                        value="""
+    steps:
+    - step: Segmentation
+        settings:
+        model: yolo
+        model_settings:
+            model: Riksarkivet/yolov9-lines-within-regions-1
+    - step: TextRecognition
+        settings:
+        model: TrOCR
+        model_settings:
+            model: Riksarkivet/trocr-base-handwritten-hist-swe-2
+    - step: OrderLines
+    - step: Export
+        settings:
+        format: txt
+        dest: outputs
+                                    """,
+                        language="yaml",
+                        label="yaml",
+                        interactive=True,
+                    )
+            with gr.Row():
+                gr.Button("Submit", variant="primary", scale=0)
+        with gr.Column():
+            gr.Markdown("<h2>Viewer</h2>")
+            with gr.Tabs():
+                with gr.Tab("HTR ouput"):
+                    gr.CheckboxGroup(
+                        ["Reading Order", "Line", "Region", "Word"],
+                        info="Checkboxgroup should be basedon output structure from htrflow",
+                    )
+                    gr.Image()
+                with gr.Tab("Table"):
+                    pass
+                with gr.Tab("Analysis"):
+                    pass

app/tabs/htrflow_tab.py ADDED Viewed

	@@ -0,0 +1,290 @@

+import gradio as gr
+import pandas as pd
+from app.assets.examples import DemoImages
+images_for_demo = DemoImages()
+def toggle_visibility_default_templates(selected_option):
+    return [
+        gr.update(visible=(selected_option == "Simple layout")),
+        gr.update(visible=(selected_option == "Nested segmentation")),
+        gr.update(visible=(selected_option == "Tables")),
+        selected_option,
+    ]
+def dummy_revealer(reveal_bool):
+    if reveal_bool == 0:
+        return gr.update(visible=False)
+    else:
+        return gr.update(visible=True)
+def submit_button_pipeline_fn(method, input_image, yaml_str):
+    data = {"transcription": ["Example transcription"], "prediction score": [0.95]}
+    df = pd.DataFrame(data)
+    # HTRflow code here
+    serialized_files = (
+        "https://raw.githubusercontent.com/Borg93/htr_gradio_file_placeholder/refs/heads/main/app_project_line.png"
+    )
+    return f"Selected Option: {method}", input_image, serialized_files, gr.update(visible=True), df
+def htr_image_placehholder(txt, method, image):
+    needs_yaml_to_forward_tohtrflow_ = """steps:
+"""
+    print(method)
+    return txt, method, image
+def get_yaml_button_fn(
+    method,
+    output_formats,
+    simple_segment_model=None,
+    simple_htr_model=None,
+    simple_htr_model_type=None,
+    simple_segment_model_type=None,
+    nested_segment_model_1=None,
+    nested_segment_model_2=None,
+    nested_htr_model=None,
+    nested_segment_model_1_type=None,
+    nested_segment_model_2_type=None,
+    nested_htr_model_type=None,
+):
+    if method == "Simple layout":
+        yaml_value = f"""steps:
+  - step: Segmentation
+    settings:
+      model: {simple_htr_model_type}
+      model_settings:
+        model: {simple_segment_model}
+  - step: TextRecognition
+    settings:
+      model: {simple_segment_model_type}
+      model_settings:
+        model: {simple_htr_model}
+  - step: OrderLines
+"""
+    elif method == "Nested segmentation":
+        yaml_value = f"""steps:
+  - step: Segmentation
+    settings:
+      model: {nested_segment_model_1_type}
+      model_settings:
+        model: {nested_segment_model_1}
+  - step: Segmentation
+    settings:
+      model: {nested_segment_model_2_type}
+      model_settings:
+        model: {nested_segment_model_2}
+  - step: TextRecognition
+    settings:
+      model: {nested_htr_model_type}
+      model_settings:
+        model: {nested_htr_model}
+  - step: OrderLines
+"""
+    else:
+        return gr.Error("Invalid method or not yet supported.")
+    export_steps = ""
+    for output_format in output_formats:
+        export_steps += f"""  - step: Export
+    settings:
+      format: {output_format}
+      dest: {output_format}-outputs
+"""
+    yaml_value += export_steps
+    return yaml_value
+output_image_placehholder = gr.Image(label="Output image", height=500, show_share_button=True)
+markdown_selected_option = gr.Markdown(container=True)
+inital_state_selection_option = "Simple layout"
+with gr.Blocks() as htrflow_pipeline:
+    with gr.Row(variant="panel"):
+        with gr.Column():
+            # gr.Markdown("<h2>Control Panel</h2>")
+            with gr.Tabs():
+                with gr.Tab("Templates"):
+                    with gr.Group():
+                        example_text_input_placeholder = gr.Markdown(visible=False, container=False)
+                        example_method_input_placeholder = gr.Markdown(visible=False, container=False)
+                        example_text_output_placeholder = gr.Markdown(visible=False, container=False)
+                        selected_option = gr.State(inital_state_selection_option)
+                        dummy_none = gr.State(0)
+                        user_image_input = gr.Image(
+                            interactive=True, sources=["upload", "clipboard"], label="Input image", height=300
+                        )
+                        template_method_radio = gr.Dropdown(
+                            [inital_state_selection_option, "Nested segmentation", "Tables"],
+                            value=inital_state_selection_option,
+                            label="Select template",
+                            info="Will add more templates later!",
+                        )
+                        with gr.Row() as simple_pipeline:
+                            with gr.Column():
+                                with gr.Row():
+                                    simple_segment_model = gr.Textbox(
+                                        "model1", label="Segmentation", info="Info about the Segmentation model"
+                                    )
+                                    simple_segment_model_type = gr.Dropdown(
+                                        choices=["yolo"], value="yolo", label="Segmentation", info="Model"
+                                    )
+                                with gr.Row():
+                                    simple_htr_model = gr.Textbox(
+                                        "model1", label="HTR", info="Info about the HTR model"
+                                    )
+                                    simple_htr_model_type = gr.Dropdown(
+                                        choices=["TrOCR"], value="TrOCR", label="HTR", info="Model"
+                                    )
+                        with gr.Row(visible=False) as nested_pipeline:
+                            with gr.Column():
+                                with gr.Row():
+                                    nested_segment_model_1 = gr.Textbox(
+                                        "model1", label="Segmentation", info="Info about the Segmentation model"
+                                    )
+                                    nested_segment_model_1_type = gr.Dropdown(
+                                        choices=["yolo"], value="yolo", label="Segmentation", info="Model"
+                                    )
+                                with gr.Row():
+                                    nested_segment_model_2 = gr.Textbox(
+                                        "model2", label="Segmentation", info="Info about the Segmentation model"
+                                    )
+                                    nested_segment_model_2_type = gr.Dropdown(
+                                        choices=["yolo"], value="yolo", label="Segmentation", info="Model"
+                                    )
+                                with gr.Row():
+                                    nested_htr_model = gr.Textbox(
+                                        "model1", label="HTR", info="Info about the HTR model"
+                                    )
+                                    nested_htr_model_type = gr.Dropdown(
+                                        choices=["TrOCR"], value="TrOCR", label="HTR", info="Model"
+                                    )
+                        with gr.Row(visible=False) as table_pipeline:
+                            with gr.Column():
+                                gr.Textbox("WIP")
+                        with gr.Row():
+                            output_formats = gr.Dropdown(
+                                choices=["txt", "alto", "page"],
+                                value="txt",
+                                multiselect=True,
+                                label="Serialized Output",
+                                info="Supported format are: ...",
+                            )
+                    with gr.Row():
+                        submit_button_pipeline = gr.Button("Submit", variant="primary", scale=0)
+                        get_yaml_button = gr.Button("Get Yaml", variant="secondary", scale=0)
+                with gr.Tab("Examples") as examples_tab:
+                    # TODO:  Perhaps we should move examples to a seperate tab for simplicity?
+                    gr.Examples(
+                        fn=htr_image_placehholder,
+                        examples=images_for_demo.examples_list,
+                        inputs=[
+                            example_text_input_placeholder,
+                            example_method_input_placeholder,
+                            user_image_input,
+                        ],
+                        outputs=[example_text_output_placeholder, markdown_selected_option, output_image_placehholder],
+                        cache_examples=True,
+                        cache_mode="lazy",
+                        label="Example images",
+                        examples_per_page=7,
+                    )
+        with gr.Column():
+            # gr.Markdown("<h2>Output Panel</h2>")
+            with gr.Tabs():
+                with gr.Tab("Viewer"): #interactive=False, elem_id="htrflowouttab"
+                    with gr.Group():
+                        with gr.Row():
+                            output_image_placehholder.render()
+                        with gr.Row():
+                            markdown_selected_option.render()
+                        with gr.Row():
+                            output_dataframe_pipeline = gr.Textbox(label="Click text",info="click on image bla bla..")
+                with gr.Tab("Table") as htrflow_output_table_tab:
+                    with gr.Group():
+                        with gr.Row():
+                            output_dataframe_pipeline = gr.Image(label="Output image", interactive=False, height="100")
+                        with gr.Row():
+                            output_dataframe_pipeline = gr.Dataframe(label="Output image", col_count=2)
+            output_files_pipeline = gr.Files(label="Output files", height=100, visible=False)
+            output_yaml_code = gr.Code(language="yaml", label="yaml", interactive=True, visible=False)
+    submit_button_pipeline.click(
+        get_yaml_button_fn,
+        inputs=[
+            template_method_radio,
+            output_formats,
+            simple_segment_model,
+            simple_htr_model,
+            simple_htr_model_type,
+            simple_segment_model_type,
+            nested_segment_model_1,
+            nested_segment_model_2,
+            nested_htr_model,
+            nested_segment_model_1_type,
+            nested_segment_model_2_type,
+            nested_htr_model_type,
+        ],
+        outputs=[output_yaml_code],
+    ).then(
+        submit_button_pipeline_fn,
+        inputs=[template_method_radio, user_image_input, output_yaml_code],
+        outputs=[
+            markdown_selected_option,
+            output_image_placehholder,
+            output_files_pipeline,
+            output_files_pipeline,
+            output_dataframe_pipeline,
+        ],
+    ).then(dummy_revealer, inputs=dummy_none, outputs=output_yaml_code)
+    get_yaml_button.click(
+        get_yaml_button_fn,
+        inputs=[
+            template_method_radio,
+            output_formats,
+            simple_segment_model,
+            simple_htr_model,
+            simple_htr_model_type,
+            simple_segment_model_type,
+            nested_segment_model_1,
+            nested_segment_model_2,
+            nested_htr_model,
+            nested_segment_model_1_type,
+            nested_segment_model_2_type,
+            nested_htr_model_type,
+        ],
+        outputs=[output_yaml_code],
+    ).then(dummy_revealer, inputs=output_yaml_code, outputs=output_yaml_code)
+# TODO : hide the tab when selected for yaml code
+# htrflow_output_table_tab.select(dummy_revealer, inputs=output_yaml_code, outputs=output_yaml_code)
+template_method_radio.select(
+    lambda choice: toggle_visibility_default_templates(choice),
+    inputs=template_method_radio,
+    outputs=[simple_pipeline, nested_pipeline, table_pipeline, selected_option],
+)

app/tabs/overview_tab.py ADDED Viewed

	@@ -0,0 +1,134 @@

+import gradio as gr
+from app.texts_langs.text_overview import TextOverview
+default_value_radio_overview = "Home"
+overview_choices_eng = [
+    "Home",
+    "About App",
+    "Guide",
+    "Model & Data",
+    "Contributions",
+    "Duplicate App",
+    "FAQ & Contact",
+]
+def toggle_visibility(selected_option):
+    return [
+        gr.update(visible=(selected_option == "Home")),
+        gr.update(visible=(selected_option == "About App")),
+        gr.update(visible=(selected_option == "Guide")),
+        gr.update(visible=(selected_option == "Model & Data")),
+        gr.update(visible=(selected_option == "Contributions")),
+        gr.update(visible=(selected_option == "FAQ & Contact")),
+        gr.update(visible=(selected_option == "Duplicate App")),
+    ]
+with gr.Blocks() as overview:
+    with gr.Row():
+        with gr.Column(visible=True, min_width=170, scale=0, variant="panel") as sidebar:
+            options_overview = gr.Radio(
+                overview_choices_eng,
+                label="Side Navigation",
+                container=False,
+                value=default_value_radio_overview,
+                elem_id="column-form",
+                min_width=100,
+                scale=0,
+            )
+        with gr.Column(variant="panel") as overview_main:
+            with gr.Row(visible=True) as overview_home:
+                with gr.Column():
+                    gr.Markdown("## landing page to explain version")
+                    gr.Markdown("## htrflow app 1.0.0")
+                    gr.Markdown("## links to different stuff")
+                    gr.Markdown("## Whats new..")
+            with gr.Row(visible=False) as overview_about:
+                with gr.Column():
+                    gr.Markdown(TextOverview.htrflow_col1)
+                    gr.Markdown(TextOverview.htrflow_col2)
+            with gr.Row(visible=False) as overview_guide:
+                with gr.Column():
+                    with gr.Row():
+                        with gr.Column():
+                            gr.Markdown("## Fast track")
+                            gr.Video(
+                                value="https://github.com/Borg93/htr_gradio_file_placeholder/blob/main/eating_spaghetti.mp4",
+                                format="mp4",
+                            )
+                    with gr.Row():
+                        with gr.Column():
+                            gr.Markdown("## Stepwise")
+                            gr.Video(
+                                "https://github.com/Borg93/htr_gradio_file_placeholder/blob/main/htr_tool_media_cut.mp4",
+                                format="mp4",
+                            )
+            with gr.Row(visible=False) as overview_model_data:
+                with gr.Column():
+                    gr.Markdown(TextOverview.htrflow_row1)
+                    with gr.Tabs():
+                        with gr.Tab("Binarization"):
+                            gr.Markdown(TextOverview.htrflow_tab1)
+                        with gr.Tab("Region segmentation"):
+                            gr.Markdown(TextOverview.htrflow_tab2)
+                        with gr.Tab("Line segmentation"):
+                            gr.Markdown(TextOverview.htrflow_tab3)
+                        with gr.Tab("Text recognition"):
+                            gr.Markdown(TextOverview.htrflow_tab4)
+            with gr.Row(visible=False) as overview_contribute:
+                with gr.Column():
+                    gr.Markdown(TextOverview.contributions)
+                    gr.Markdown(TextOverview.huminfra_image)
+            with gr.Row(visible=False) as overview_duplicate:
+                with gr.Column():
+                    gr.Markdown(TextOverview.duplicate)
+                with gr.Column():
+                    gr.Markdown(TextOverview.api1)
+                    gr.Code(
+                        value=TextOverview.api_code1,
+                        language="python",
+                        interactive=False,
+                        show_label=False,
+                    )
+                    gr.Markdown(TextOverview.api2)
+                    gr.Code(
+                        value=TextOverview.api_code2,
+                        language=None,
+                        interactive=False,
+                        show_label=False,
+                    )
+            with gr.Row(visible=False) as overview_faq:
+                with gr.Column():
+                    gr.Markdown(TextOverview.text_faq)
+                with gr.Column():
+                    gr.Markdown(TextOverview.text_discussion)
+        with gr.Column(visible=True, min_width=0, scale=0) as empty:
+            pass
+    options_overview.change(
+        lambda choice: toggle_visibility(choice),
+        inputs=options_overview,
+        outputs=[
+            overview_home,
+            overview_about,
+            overview_guide,
+            overview_model_data,
+            overview_contribute,
+            overview_duplicate,
+            overview_faq,
+        ],
+    )

{src/htr_pipeline/utils → app/texts_langs}/__init__.py RENAMED Viewed

File without changes

{helper/text → app/texts_langs}/overview/changelog_roadmap/changelog.md RENAMED Viewed

File without changes

{helper/text → app/texts_langs}/overview/changelog_roadmap/old_changelog.md RENAMED Viewed

File without changes

{helper/text → app/texts_langs}/overview/changelog_roadmap/roadmap.md RENAMED Viewed

File without changes

{helper/text → app/texts_langs}/overview/contributions/contributions.md RENAMED Viewed

File without changes

{helper/text → app/texts_langs}/overview/contributions/huminfra_image.md RENAMED Viewed

File without changes

{helper/text → app/texts_langs}/overview/contributions/riksarkivet_image.md RENAMED Viewed

File without changes

{helper/text → app/texts_langs}/overview/duplicate_api/api1.md RENAMED Viewed

File without changes

{helper/text → app/texts_langs}/overview/duplicate_api/api2.md RENAMED Viewed

File without changes

{helper/text → app/texts_langs}/overview/duplicate_api/api_code1.md RENAMED Viewed

File without changes

{helper/text → app/texts_langs}/overview/duplicate_api/api_code2.md RENAMED Viewed

File without changes

{helper/text → app/texts_langs}/overview/duplicate_api/duplicate.md RENAMED Viewed

File without changes

{helper/text → app/texts_langs}/overview/faq_discussion/discussion.md RENAMED Viewed

File without changes

{helper/text → app/texts_langs}/overview/faq_discussion/faq.md RENAMED Viewed

File without changes

{helper/text → app/texts_langs}/overview/htrflow/htrflow_col1.md RENAMED Viewed

File without changes

{helper/text → app/texts_langs}/overview/htrflow/htrflow_col2.md RENAMED Viewed

File without changes

{helper/text → app/texts_langs}/overview/htrflow/htrflow_row1.md RENAMED Viewed

File without changes

{helper/text → app/texts_langs}/overview/htrflow/htrflow_tab1.md RENAMED Viewed

File without changes

{helper/text → app/texts_langs}/overview/htrflow/htrflow_tab2.md RENAMED Viewed

File without changes

{helper/text → app/texts_langs}/overview/htrflow/htrflow_tab3.md RENAMED Viewed

File without changes

{helper/text → app/texts_langs}/overview/htrflow/htrflow_tab4.md RENAMED Viewed

File without changes

{helper/text → app/texts_langs}/text_app.py RENAMED Viewed

@@ -1,19 +1,9 @@
 class TextApp:
-    demo_version = """<em>Version 0.1.0</em>"""
     title_markdown = """
-    <h1><center> HTRFLOW </center></h1>
-    <p><center>Explore AI models for Handwritten Text Recogntion developed by the Swedish National Archives </center></p>"""
     title_markdown_img = """
     <a href="https://riksarkivet.se">
     <img src="https://raw.githubusercontent.com/Borg93/Riksarkivet_docs/main/docs/assets/fav-removebg-preview.png" width="17%" align="right" margin-right="100" />
     </a>
     """
-if __name__ == "__main__":
-    pass

 class TextApp:
     title_markdown = """
+    <h1><center> HTRflow 🔍 App </center></h1>"""  #
     title_markdown_img = """
     <a href="https://riksarkivet.se">
     <img src="https://raw.githubusercontent.com/Borg93/Riksarkivet_docs/main/docs/assets/fav-removebg-preview.png" width="17%" align="right" margin-right="100" />
     </a>
     """

app/texts_langs/text_overview.py ADDED Viewed

	@@ -0,0 +1,37 @@

+def read_markdown(file_path: str) -> str:
+    with open(file_path, "r") as file:
+        content = file.read()
+    return f"""{content}"""
+class TextOverview:
+    # HTRFLOW
+    htrflow_col1 = read_markdown("app/texts_langs/overview/htrflow/htrflow_col1.md")
+    htrflow_col2 = read_markdown("app/texts_langs/overview/htrflow/htrflow_col2.md")
+    htrflow_row1 = read_markdown("app/texts_langs/overview/htrflow/htrflow_row1.md")
+    htrflow_tab1 = read_markdown("app/texts_langs/overview/htrflow/htrflow_tab1.md")
+    htrflow_tab2 = read_markdown("app/texts_langs/overview/htrflow/htrflow_tab2.md")
+    htrflow_tab3 = read_markdown("app/texts_langs/overview/htrflow/htrflow_tab3.md")
+    htrflow_tab4 = read_markdown("app/texts_langs/overview/htrflow/htrflow_tab4.md")
+    # faq & discussion
+    text_faq = read_markdown("app/texts_langs/overview/faq_discussion/faq.md")
+    text_discussion = read_markdown("app/texts_langs/overview/faq_discussion/discussion.md")
+    # Contributions
+    contributions = read_markdown("app/texts_langs/overview/contributions/contributions.md")
+    huminfra_image = read_markdown("app/texts_langs/overview/contributions/huminfra_image.md")
+    # Changelog & Roadmap
+    changelog = read_markdown("app/texts_langs/overview/changelog_roadmap/changelog.md")
+    old_changelog = read_markdown("app/texts_langs/overview/changelog_roadmap/old_changelog.md")
+    roadmap = read_markdown("app/texts_langs/overview/changelog_roadmap/roadmap.md")
+    # duplicate & api
+    duplicate = read_markdown("app/texts_langs/overview/duplicate_api/duplicate.md")
+    api1 = read_markdown("app/texts_langs/overview/duplicate_api/api1.md")
+    api_code1 = read_markdown("app/texts_langs/overview/duplicate_api/api_code1.md")
+    api2 = read_markdown("app/texts_langs/overview/duplicate_api/api2.md")
+    api_code2 = read_markdown("app/texts_langs/overview/duplicate_api/api_code2.md")

helper/text/help/fasttrack/fast_track.md → app/utils/yaml_helper.py RENAMED Viewed

File without changes

helper/examples/create_examples.py DELETED Viewed

@@ -1,87 +0,0 @@
-import os
-import tarfile
-import datasets
-import pandas as pd
-_CITATION = """\
-@InProceedings{huggingface:dataset,
-title = {Small htr examples images},
-author={Gabriel Borg},
-year={2023}
-}
-"""
-_DESCRIPTION = """\
-Demo dataset for the htr demo.
-"""
-_HOMEPAGE = "https://huggingface.co/datasets/Riksarkivet/test_images_demo"
-_LICENSE = ""
-_REPO = "https://huggingface.co/datasets/Riksarkivet/test_images_demo"
-class ExampleImages(datasets.GeneratorBasedBuilder):
-    """Small sample of image-text pairs"""
-    def _info(self):
-        return datasets.DatasetInfo(
-            description=_DESCRIPTION,
-            features=datasets.Features(
-                {
-                    "text": datasets.Value("string"),
-                    "image": datasets.Image(),
-                }
-            ),
-            supervised_keys=None,
-            homepage=_HOMEPAGE,
-            citation=_CITATION,
-        )
-    def _split_generators(self, dl_manager):
-        images_archive = dl_manager.download(f"{_REPO}/resolve/main/images.tar.gz")
-        metadata_path = dl_manager.download(f"{_REPO}/resolve/main/images.txt")
-        image_iters = dl_manager.iter_archive(images_archive)
-        return [
-            datasets.SplitGenerator(
-                name=datasets.Split.TRAIN, gen_kwargs={"images": image_iters, "metadata_path": metadata_path}
-            ),
-        ]
-    def _generate_examples(self, images, metadata_path):
-        """Generate images and text."""
-        with open(metadata_path, encoding="utf-8") as f:
-            metadata_list = f.read().split("\n")
-        for idx, (img_obj, meta_txt) in enumerate(zip(images, metadata_list)):
-            filepath, image = img_obj
-            text_value = meta_txt.split("= ")[-1].strip()
-            yield idx, {
-                "image": {"path": filepath, "bytes": image.read()},
-                "text": text_value,
-            }
-def txt_to_csv(file_name):
-    text_file_path = f"{file_name}.txt"
-    df = pd.read_csv(text_file_path, delimiter="=", header=None, names=["Key", "Label"], encoding="utf-8")
-    df["Key"] = df["Key"].str.strip()
-    df["Label"] = df["Label"].str.strip()
-    print(df)
-    df.to_csv(f"{file_name}.csv", index=False)
-def sort_and_compress_images(images_folder, tar_file):
-    sorted_images = sorted(os.listdir(images_folder))
-    with tarfile.open(tar_file, "w:gz") as tar:
-        for image_name in sorted_images:
-            image_path = os.path.join(images_folder, image_name)
-            tar.add(image_path, arcname=image_name)
-    print("Images sorted and compressed into tar.gz archive.")
-if __name__ == "__main__":
-    txt_to_csv("info")
-    sort_and_compress_images("images", "sorted_images.tar.gz")

helper/gradio_config.py DELETED Viewed

@@ -1,139 +0,0 @@
-import gradio as gr
-class GradioConfig:
-    def __init__(self, tooltip_dict):
-        self.tooltip_dict = tooltip_dict
-        self.theme = gr.themes.Base(
-            primary_hue="blue",
-            secondary_hue="blue",
-            neutral_hue="slate",
-            font=[
-                gr.themes.GoogleFont("Open Sans"),
-                "ui-sans-serif",
-                "system-ui",
-                "sans-serif",
-            ],
-        )
-        self.css = """
-        footer {display: none !important;}
-        #image_upload {min-height:450}
-        #image_upload [data-testid="image"], #image_upload [data-testid="image"] > div{min-height: 450px}
-        #gallery {height: 400px}
-        .fixed-height.svelte-g4rw9.svelte-g4rw9 {min-height: 400px;}
-        #download_file > div.empty.svelte-lk9eg8.large.unpadded_box  {min-height: 100px;}
-        #gallery_lines > div.preview.svelte-1b19cri > div.thumbnails.scroll-hide.svelte-1b19cri {display: none;}
-        .tr-head.svelte-13hsdno>.svelte-13hsdno+.svelte-13hsdno {display: none;}
-                """
-    def generate_tooltip_css(self):
-        temp_css_list = [self.css]
-        for button_id, tooltip_text in self.tooltip_dict.items():
-            temp_css_list.append(self.template_tooltip_css(button_id, tooltip_text))
-        return "\n".join(temp_css_list)
-    def template_tooltip_css(self, button_id, tooltip_text):
-        return f"""
-        /* For tooltip */
-        #{button_id} {{
-            position: relative;
-        }}
-        #{button_id}::before {{
-            visibility: hidden;
-            content: '';
-            position: absolute;
-            bottom: 100%; /* Position on top of the parent element */
-            left: 50%;
-            margin-left: 5px; /* Adjust for the desired space between the button and tooltip */
-            transform: translateY(-50%);
-            border-width: 7px;
-            border-style: solid;
-            border-color: rgba(51, 51, 51, 0) transparent transparent rgba(51, 51, 51, 0);
-            transition: opacity 0.4s ease-in-out, border-color 0.4s ease-in-out;
-            opacity: 0;
-            z-index: 999;
-        }}
-        #{button_id}::after {{
-            visibility: hidden;
-            content: '{tooltip_text}';
-            position: absolute;
-            bottom: 100%; /* Position on top of the parent element */
-            left: 42%;
-            background-color: rgba(51, 51, 51, 0);
-            color: white;
-            padding: 5px;
-            border-radius: 3px;
-            z-index: 998;
-            opacity: 0;
-            transition: opacity 0.4s ease-in-out, background-color 0.4s ease-in-out;
-            margin-bottom: 20px !important; /* Increased from 18px to 23px to move tooltip 5px upwards */
-            margin-left: 0px; /* Adjust for the arrow width and the desired space between the arrow and tooltip */
-            white-space: normal; /* Allows the text to wrap */
-            width: 200px; /* Maximum line length before wrapping */
-            box-sizing: border-box;
-        }}
-        #{button_id}.showTooltip::before {{
-            visibility: visible;
-            opacity: 1;
-            border-color: rgba(51, 51, 51, 0.7) transparent transparent rgba(51, 51, 51, 0.7);
-        }}
-        #{button_id}.showTooltip::after {{
-            visibility: visible;
-            opacity: 1;
-            background-color: rgba(51, 51, 51, 0.7);
-        }}
-        """
-    def add_interaction_to_buttons(self):
-        button_ids_list = ", ".join([f"'#{id}'" for id, _ in self.tooltip_dict.items()])
-        button_ids = button_ids_list.replace("'", "")
-        return f"""
-        function monitorButtonHover() {{
-            const buttons = document.querySelectorAll('{button_ids}');
-            buttons.forEach(function(button) {{
-                button.addEventListener('mouseenter', function() {{
-                    this.classList.add('showTooltip');
-                }});
-                button.addEventListener('mouseleave', function() {{
-                    this.classList.remove('showTooltip');
-                }});
-            }})
-        }}
-        """
-        #     gradioURL = window.location.href
-        # if (!gradioURL.endsWith('?__theme=dark')) {{
-        #     window.location.replace(gradioURL + '?__theme=dark');
-        # }}
-buttons_with_tooltip = {
-    "run_pipeline_button": "Runs HTR on the image. Takes approx 1-2 mins per image (depending on hardware).",
-    "clear_button": "Clears all states and resets the entire workflow in the stepwise tool.",
-    "region_segment_button": "Segments text regions in the chosen image with the chosen settings.",
-    "line_segment_button": "Segments chosen regions from the image gallery into lines segments.",
-    "transcribe_button": "Transcribes each line segment into text and streams back the data.",
-}
-gradio_config = GradioConfig(buttons_with_tooltip)
-theme = gradio_config.theme
-css = gradio_config.generate_tooltip_css()
-js = gradio_config.add_interaction_to_buttons()
-if __name__ == "__main__":
-    tooltip = GradioConfig({"run_pipeline_button": "this is a tooltop", "clear_button": "this is a tooltop"})
-    css = tooltip.generate_tooltip_css()
-    js = tooltip.add_interaction_to_buttons()
-    print(css)
-    print(js)

helper/text/docs_strucutre.md DELETED Viewed

@@ -1,20 +0,0 @@
-## Instructions for documentation
-- Naming convention of folder is based on tab
-- Naming convention of file is based on subtabs
-  - If subtab uses columns and rows
-    - Use suffix such as col1, row1 or tab1, to indicate differences in postion of text.
-see image below:
-<p align="center">
-        <img src="https://github.com/Borg93/htr_gradio_file_placeholder/blob/main/layout_structure.png?raw=true" alt="Badge 1">
-</p>
-## Assets and file sharing with app
-This repo acts as asset manager for the app:
-- [Github Repo](https://github.com/Borg93/htr_gradio_file_placeholder)
-**Note**: this repo is an work in progress

helper/text/help/stepwise/stepwise.md DELETED Viewed

File without changes

helper/text/markdown_reader.py DELETED Viewed

@@ -1,5 +0,0 @@
-def read_markdown(file_path: str) -> str:
-    with open(file_path, "r") as file:
-        content = file.read()
-    return f"""{content}"""

helper/text/text_overview.py DELETED Viewed

@@ -1,37 +0,0 @@
-from helper.text.markdown_reader import read_markdown
-class TextOverview:
-    # HTRFLOW
-    htrflow_col1 = read_markdown("helper/text/overview/htrflow/htrflow_col1.md")
-    htrflow_col2 = read_markdown("helper/text/overview/htrflow/htrflow_col2.md")
-    htrflow_row1 = read_markdown("helper/text/overview/htrflow/htrflow_row1.md")
-    htrflow_tab1 = read_markdown("helper/text/overview/htrflow/htrflow_tab1.md")
-    htrflow_tab2 = read_markdown("helper/text/overview/htrflow/htrflow_tab2.md")
-    htrflow_tab3 = read_markdown("helper/text/overview/htrflow/htrflow_tab3.md")
-    htrflow_tab4 = read_markdown("helper/text/overview/htrflow/htrflow_tab4.md")
-    # faq & discussion
-    text_faq = read_markdown("helper/text/overview/faq_discussion/faq.md")
-    text_discussion = read_markdown("helper/text/overview/faq_discussion/discussion.md")
-    # Contributions
-    contributions = read_markdown("helper/text/overview/contributions/contributions.md")
-    huminfra_image = read_markdown("helper/text/overview/contributions/huminfra_image.md")
-    # Changelog & Roadmap
-    changelog = read_markdown("helper/text/overview/changelog_roadmap/changelog.md")
-    old_changelog = read_markdown("helper/text/overview/changelog_roadmap/old_changelog.md")
-    roadmap = read_markdown("helper/text/overview/changelog_roadmap/roadmap.md")
-    # duplicate & api
-    duplicate = read_markdown("helper/text/overview/duplicate_api/duplicate.md")
-    api1 = read_markdown("helper/text/overview/duplicate_api/api1.md")
-    api_code1 = read_markdown("helper/text/overview/duplicate_api/api_code1.md")
-    api2 = read_markdown("helper/text/overview/duplicate_api/api2.md")
-    api_code2 = read_markdown("helper/text/overview/duplicate_api/api_code2.md")
-if __name__ == "__main__":
-    pass

helper/utils.py DELETED Viewed

@@ -1,97 +0,0 @@
-import hashlib
-import os
-import shutil
-import sqlite3
-import uuid
-from datetime import datetime
-import gradio as gr
-import huggingface_hub
-import pandas as pd
-import pytz
-from apscheduler.schedulers.background import BackgroundScheduler
-class TrafficDataHandler:
-    _DB_FILE_PATH = "./traffic_data.db"
-    _DB_TEMP_PATH = "./data/traffic_data.db"
-    _TOKEN = os.environ.get("HUB_TOKEN")
-    _TZ = "Europe/Stockholm"
-    _INTERVAL_MIN_UPDATE = 30
-    _repo = huggingface_hub.Repository(
-        local_dir="data", repo_type="dataset", clone_from="Riksarkivet/traffic_demo_data", use_auth_token=_TOKEN
-    )
-    _session_uuid = None
-    @classmethod
-    def _pull_repo_data(cls):
-        cls._repo.git_pull()
-        shutil.copyfile(cls._DB_TEMP_PATH, cls._DB_FILE_PATH)
-    @staticmethod
-    def _hash_ip(ip_address):
-        return hashlib.sha256(ip_address.encode()).hexdigest()
-    @classmethod
-    def _current_time_in_sweden(cls):
-        swedish_tz = pytz.timezone(cls._TZ)
-        return datetime.now(swedish_tz).strftime("%Y-%m-%d %H:%M:%S")
-    @classmethod
-    def onload_store_metric_data(cls, request: gr.Request):
-        cls._session_uuid = str(uuid.uuid1())
-        cls._setup_database()
-        hashed_host = cls._hash_ip(request.client.host)
-        cls._backup_and_update_database(hashed_host, "load")
-    @classmethod
-    def store_metric_data(cls, action, request: gr.Request):
-        hashed_host = cls._hash_ip(request.client.host)
-        cls._backup_and_update_database(hashed_host, action)
-    @classmethod
-    def _commit_host_to_database(cls, hashed_host, action):
-        with sqlite3.connect(cls._DB_FILE_PATH) as db:
-            db.execute(
-                "INSERT INTO ip_data(current_time, hashed_ip, session_uuid, action) VALUES(?,?,?,?)",
-                [cls._current_time_in_sweden(), hashed_host, cls._session_uuid, action],
-            )
-    @classmethod
-    def _setup_database(cls):
-        with sqlite3.connect(cls._DB_FILE_PATH) as db:
-            try:
-                db.execute("SELECT * FROM ip_data").fetchall()
-            except sqlite3.OperationalError:
-                db.execute(
-                    """
-                    CREATE TABLE ip_data (id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
-                                          current_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
-                                          hashed_ip TEXT,
-                                          session_uuid TEXT,
-                                          action TEXT)
-                    """
-                )
-        cls._pull_repo_data()
-    @classmethod
-    def _backup_and_update_database(cls, hashed_host, action):
-        cls._commit_host_to_database(hashed_host, action)
-        shutil.copyfile(cls._DB_FILE_PATH, cls._DB_TEMP_PATH)
-        with sqlite3.connect(cls._DB_FILE_PATH) as db:
-            ip_data = db.execute("SELECT * FROM ip_data").fetchall()
-            pd.DataFrame(ip_data, columns=["id", "current_time", "hashed_ip", "session_uuid", "action"]).to_csv(
-                "./data/ip_data.csv", index=False
-            )
-        cls._repo.push_to_hub(blocking=False, commit_message=f"Updating data at {datetime.now()}")
-    @classmethod
-    def _initialize_and_schedule_backup(cls, hashed_host, action):
-        cls._backup_and_update_database(hashed_host, action)
-        scheduler = BackgroundScheduler()
-        scheduler.add_job(
-            cls._backup_and_update_database, "interval", minutes=cls._INTERVAL_MIN_UPDATE, args=(hashed_host, action)
-        )
-        scheduler.start()