htr_demo / app.py
Gabriel's picture
test new scheduler
14d4a0b
raw
history blame
5.34 kB
import hashlib
import os
import shutil
import sqlite3
from datetime import datetime
import gradio as gr
import huggingface_hub
import pandas as pd
import pytz
from apscheduler.schedulers.background import BackgroundScheduler
from helper.gradio_config import css, theme
from helper.text.text_about import TextAbout
from helper.text.text_app import TextApp
from helper.text.text_howto import TextHowTo
from helper.text.text_roadmap import TextRoadmap
from tabs.htr_tool import htr_tool_tab
from tabs.stepwise_htr_tool import stepwise_htr_tool_tab
DB_FILE = "./traffic_data.db"
TOKEN = os.environ.get("HUB_TOKEN")
repo = huggingface_hub.Repository(
local_dir="data", repo_type="dataset", clone_from="Riksarkivet/traffic_demo_data", use_auth_token=TOKEN
)
repo.git_pull()
# Set db to latest
shutil.copyfile("./data/traffic_data.db", DB_FILE)
def hash_ip(ip_address):
return hashlib.sha256(ip_address.encode()).hexdigest()
# Create table if it doesn't already exist
db = sqlite3.connect(DB_FILE)
try:
db.execute("SELECT * FROM ip_data").fetchall()
db.close()
except sqlite3.OperationalError:
db.execute(
"""
CREATE TABLE ip_data (id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
current_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
hashed_ip TEXT)
"""
)
db.commit()
db.close()
def current_time_sw():
swedish_tz = pytz.timezone("Europe/Stockholm")
return datetime.now(swedish_tz).strftime("%Y-%m-%d %H:%M:%S")
def add_ip_data(request: gr.Request):
host = request.client.host
hashed_ip = hash_ip(host)
db = sqlite3.connect(DB_FILE)
cursor = db.cursor()
cursor.execute("INSERT INTO ip_data(current_time, hashed_ip) VALUES(?,?)", [current_time_sw(), hashed_ip])
db.commit()
db.close()
def backup_db():
shutil.copyfile(DB_FILE, "./data/traffic_data.db")
db = sqlite3.connect(DB_FILE)
ip_data = db.execute("SELECT * FROM ip_data").fetchall()
pd.DataFrame(ip_data, columns=["id", "current_time", "hashed_ip"]).to_csv("./data/ip_data.csv", index=False)
print("updating traffic_data")
repo.push_to_hub(blocking=False, commit_message=f"Updating data at {datetime.now()}")
scheduler = BackgroundScheduler()
scheduler.add_job(func=backup_db, trigger="interval", seconds=60)
scheduler.start()
with gr.Blocks(title="HTR Riksarkivet", theme=theme, css=css) as demo:
with gr.Row():
with gr.Column(scale=1):
text_ip_output = gr.Markdown()
with gr.Column(scale=1):
gr.Markdown(TextApp.title_markdown)
with gr.Column(scale=1):
gr.Markdown(TextApp.title_markdown_img)
with gr.Tabs():
with gr.Tab("HTR Tool"):
htr_tool_tab.render()
with gr.Tab("Stepwise HTR Tool"):
stepwise_htr_tool_tab.render()
with gr.Tab("About"):
with gr.Tabs():
with gr.Tab("Project"):
with gr.Row():
with gr.Column():
gr.Markdown(TextAbout.intro_text)
with gr.Column():
gr.Markdown(TextAbout.text_src_code_data_models)
with gr.Row():
gr.Markdown(TextAbout.pipeline_overview_text)
with gr.Row():
with gr.Tabs():
with gr.Tab("I. Binarization"):
gr.Markdown(TextAbout.binarization)
with gr.Tab("II. Region Segmentation"):
gr.Markdown(TextAbout.text_region_segment)
with gr.Tab("III. Line Segmentation"):
gr.Markdown(TextAbout.text_line_segmentation)
with gr.Tab("IV. Transcriber"):
gr.Markdown(TextAbout.text_htr)
with gr.Tab("Contribution"):
with gr.Row():
gr.Markdown(TextRoadmap.text_contribution)
with gr.Tab("API & Duplicate for Privat use"):
with gr.Row():
with gr.Column():
gr.Markdown(TextHowTo.htr_tool_api_text)
gr.Code(
value=TextHowTo.code_for_api,
language="python",
interactive=False,
show_label=False,
)
with gr.Column():
gr.Markdown(TextHowTo.duplicatin_space_htr_text)
gr.Markdown(TextHowTo.figure_htr_hardware)
gr.Markdown(TextHowTo.duplicatin_for_privat)
with gr.Tab("Roadmap"):
with gr.Row():
with gr.Column():
gr.Markdown(TextRoadmap.roadmap)
with gr.Column():
gr.Markdown(TextRoadmap.discussion)
demo.load(add_ip_data)
demo.queue(concurrency_count=2, max_size=2)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860, show_api=False, show_error=True)