Spaces:
Running
on
Zero
Running
on
Zero
MohamedRashad
commited on
Commit
·
71311e8
1
Parent(s):
e076d40
Add model description to app.py
Browse files- app.py +10 -0
- requirements.txt +1 -0
app.py
CHANGED
@@ -55,8 +55,18 @@ def extract_text_from_pdf(pdf_path, progress=gr.Progress()):
|
|
55 |
|
56 |
return "\n".join(texts)
|
57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
with gr.Blocks(title="Arabic Small Nougat") as demo:
|
59 |
gr.HTML("<h1 style='text-align: center'>Arabic End-to-End Structured OCR for textbooks</h1>")
|
|
|
60 |
|
61 |
with gr.Tab("Extract Text from Image"):
|
62 |
with gr.Row():
|
|
|
55 |
|
56 |
return "\n".join(texts)
|
57 |
|
58 |
+
model_description = """
|
59 |
+
This is a demo for the Arabic Small Nougat model. It is an end-to-end OCR model that can extract text from images and PDFs.
|
60 |
+
|
61 |
+
- The model is trained on the [Khatt dataset](https://huggingface.co/datasets/Fakhraddin/khatt) and custom made dataset.
|
62 |
+
- The model is a finetune of [facebook/nougat-small](https://huggingface.co/facebook/nougat-small) model.
|
63 |
+
|
64 |
+
**Note**: The model is a prototype in my book and may not work well on all types of images and PDFs. **Check the output carefully before using it for any serious work.**
|
65 |
+
"""
|
66 |
+
|
67 |
with gr.Blocks(title="Arabic Small Nougat") as demo:
|
68 |
gr.HTML("<h1 style='text-align: center'>Arabic End-to-End Structured OCR for textbooks</h1>")
|
69 |
+
gr.Markdown(model_description)
|
70 |
|
71 |
with gr.Tab("Extract Text from Image"):
|
72 |
with gr.Row():
|
requirements.txt
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
pdf2image
|
|
|
2 |
transformers
|
3 |
gradio
|
|
|
1 |
pdf2image
|
2 |
+
torch
|
3 |
transformers
|
4 |
gradio
|