Spaces:
Runtime error
Runtime error
import os | |
from pathlib import Path | |
from typing import Tuple, Union | |
import logging | |
from transformers import DonutProcessor, VisionEncoderDecoderModel | |
from PIL import Image | |
from pdf2image import convert_from_path | |
from docx import Document | |
from docx.shared import Pt | |
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT | |
import gradio as gr | |
# Configuración avanzada de logging | |
logging.basicConfig( | |
level=logging.DEBUG, | |
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", | |
handlers=[ | |
logging.FileHandler("app.log", mode="a", encoding="utf-8"), | |
logging.StreamHandler() | |
] | |
) | |
class HuggingFaceProcessor: | |
"""Clase para manejar modelos avanzados de Hugging Face para procesamiento de documentos.""" | |
def __init__(self, model_name: str = "naver-clova-ix/donut-base-finetuned-docvqa"): | |
self.logger = logging.getLogger("HuggingFaceProcessor") | |
self.logger.info("Cargando modelo de Hugging Face...") | |
try: | |
self.processor = DonutProcessor.from_pretrained(model_name) | |
self.model = VisionEncoderDecoderModel.from_pretrained(model_name) | |
except Exception as e: | |
self.logger.error(f"Error cargando el modelo: {e}") | |
raise | |
def process_image(self, image: Image.Image) -> str: | |
"""Procesa una imagen y extrae texto usando el modelo Donut.""" | |
try: | |
pixel_values = self.processor(image, return_tensors="pt").pixel_values | |
outputs = self.model.generate(pixel_values, max_length=512) | |
result = self.processor.batch_decode(outputs, skip_special_tokens=True)[0] | |
return result.strip() | |
except Exception as e: | |
self.logger.error(f"Error procesando la imagen con Donut: {e}") | |
return "" | |
class PDFToWordProcessor: | |
"""Procesa un PDF escaneado y genera un documento Word.""" | |
def __init__(self): | |
self.logger = logging.getLogger("PDFToWordProcessor") | |
self.hf_processor = HuggingFaceProcessor() | |
def process_pdf(self, file_path: Path) -> Document: | |
"""Convierte un PDF a un documento Word.""" | |
self.logger.info(f"Procesando PDF: {file_path}") | |
doc = Document() | |
try: | |
# Convertir cada página del PDF a imagen | |
images = convert_from_path(file_path) | |
for page_num, image in enumerate(images, start=1): | |
self.logger.debug(f"Procesando página {page_num}") | |
# Extraer texto usando el modelo Donut | |
page_text = self.hf_processor.process_image(image) | |
# Agregar encabezado para cada página | |
doc.add_heading(f"Página {page_num}", level=2) | |
# Agregar texto extraído al documento Word | |
self._add_text_to_doc(doc, page_text) | |
except Exception as e: | |
self.logger.error(f"Error procesando PDF: {e}") | |
raise | |
return doc | |
def _add_text_to_doc(self, doc: Document, text: str): | |
"""Agrega texto extraído al documento Word.""" | |
for line in text.split('\n'): | |
if line.strip(): # Evitar líneas vacías | |
paragraph = doc.add_paragraph(line.strip(), style="Normal") | |
paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT | |
def process_file(self, file_path: Union[str, Path]) -> Tuple[str, str]: | |
"""Procesa un archivo PDF y guarda el documento Word generado.""" | |
file_path = Path(file_path) | |
output_path = file_path.with_suffix(".docx") | |
try: | |
if file_path.suffix.lower() != ".pdf": | |
raise ValueError(f"Formato no soportado: {file_path.suffix}") | |
doc = self.process_pdf(file_path) | |
doc.save(output_path) | |
return "Documento procesado exitosamente", str(output_path) | |
except Exception as e: | |
return f"Error: {e}", "" | |
def create_interface(): | |
"""Crea la interfaz de usuario con Gradio.""" | |
processor = PDFToWordProcessor() | |
def process_file(file): | |
if not file: | |
return "Por favor, seleccione un archivo", None | |
return processor.process_file(file.name) | |
with gr.Blocks(title="Procesador de PDF a Word") as demo: | |
gr.Markdown("# Procesador PDF a Word con Hugging Face") | |
gr.Markdown("Convierte documentos PDF escaneados a Word utilizando modelos avanzados de Hugging Face.") | |
file_input = gr.File(label="Seleccionar PDF", file_types=[".pdf"], type="filepath") | |
process_button = gr.Button("Procesar", variant="primary") | |
output_text = gr.Textbox(label="Estado del Proceso") | |
output_file = gr.File(label="Documento Procesado") | |
process_button.click(process_file, inputs=[file_input], outputs=[output_text, output_file]) | |
return demo | |
if __name__ == "__main__": | |
demo = create_interface() | |
demo.launch(share=True) | |