AndresIgnacio's picture
Update app.py
a358cff verified
raw
history blame
4.88 kB
import os
from pathlib import Path
from typing import Tuple, Union
import logging
from transformers import DonutProcessor, VisionEncoderDecoderModel
from PIL import Image
from pdf2image import convert_from_path
from docx import Document
from docx.shared import Pt
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
import gradio as gr
# Configuración avanzada de logging
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
handlers=[
logging.FileHandler("app.log", mode="a", encoding="utf-8"),
logging.StreamHandler()
]
)
class HuggingFaceProcessor:
"""Clase para manejar modelos avanzados de Hugging Face para procesamiento de documentos."""
def __init__(self, model_name: str = "naver-clova-ix/donut-base-finetuned-docvqa"):
self.logger = logging.getLogger("HuggingFaceProcessor")
self.logger.info("Cargando modelo de Hugging Face...")
try:
self.processor = DonutProcessor.from_pretrained(model_name)
self.model = VisionEncoderDecoderModel.from_pretrained(model_name)
except Exception as e:
self.logger.error(f"Error cargando el modelo: {e}")
raise
def process_image(self, image: Image.Image) -> str:
"""Procesa una imagen y extrae texto usando el modelo Donut."""
try:
pixel_values = self.processor(image, return_tensors="pt").pixel_values
outputs = self.model.generate(pixel_values, max_length=512)
result = self.processor.batch_decode(outputs, skip_special_tokens=True)[0]
return result.strip()
except Exception as e:
self.logger.error(f"Error procesando la imagen con Donut: {e}")
return ""
class PDFToWordProcessor:
"""Procesa un PDF escaneado y genera un documento Word."""
def __init__(self):
self.logger = logging.getLogger("PDFToWordProcessor")
self.hf_processor = HuggingFaceProcessor()
def process_pdf(self, file_path: Path) -> Document:
"""Convierte un PDF a un documento Word."""
self.logger.info(f"Procesando PDF: {file_path}")
doc = Document()
try:
# Convertir cada página del PDF a imagen
images = convert_from_path(file_path)
for page_num, image in enumerate(images, start=1):
self.logger.debug(f"Procesando página {page_num}")
# Extraer texto usando el modelo Donut
page_text = self.hf_processor.process_image(image)
# Agregar encabezado para cada página
doc.add_heading(f"Página {page_num}", level=2)
# Agregar texto extraído al documento Word
self._add_text_to_doc(doc, page_text)
except Exception as e:
self.logger.error(f"Error procesando PDF: {e}")
raise
return doc
def _add_text_to_doc(self, doc: Document, text: str):
"""Agrega texto extraído al documento Word."""
for line in text.split('\n'):
if line.strip(): # Evitar líneas vacías
paragraph = doc.add_paragraph(line.strip(), style="Normal")
paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
def process_file(self, file_path: Union[str, Path]) -> Tuple[str, str]:
"""Procesa un archivo PDF y guarda el documento Word generado."""
file_path = Path(file_path)
output_path = file_path.with_suffix(".docx")
try:
if file_path.suffix.lower() != ".pdf":
raise ValueError(f"Formato no soportado: {file_path.suffix}")
doc = self.process_pdf(file_path)
doc.save(output_path)
return "Documento procesado exitosamente", str(output_path)
except Exception as e:
return f"Error: {e}", ""
def create_interface():
"""Crea la interfaz de usuario con Gradio."""
processor = PDFToWordProcessor()
def process_file(file):
if not file:
return "Por favor, seleccione un archivo", None
return processor.process_file(file.name)
with gr.Blocks(title="Procesador de PDF a Word") as demo:
gr.Markdown("# Procesador PDF a Word con Hugging Face")
gr.Markdown("Convierte documentos PDF escaneados a Word utilizando modelos avanzados de Hugging Face.")
file_input = gr.File(label="Seleccionar PDF", file_types=[".pdf"], type="filepath")
process_button = gr.Button("Procesar", variant="primary")
output_text = gr.Textbox(label="Estado del Proceso")
output_file = gr.File(label="Documento Procesado")
process_button.click(process_file, inputs=[file_input], outputs=[output_text, output_file])
return demo
if __name__ == "__main__":
demo = create_interface()
demo.launch(share=True)