import os from pathlib import Path from typing import Tuple, Union import logging from transformers import DonutProcessor, VisionEncoderDecoderModel from PIL import Image from pdf2image import convert_from_path from docx import Document from docx.shared import Pt from docx.enum.text import WD_PARAGRAPH_ALIGNMENT import gradio as gr # Configuración avanzada de logging logging.basicConfig( level=logging.DEBUG, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", handlers=[ logging.FileHandler("app.log", mode="a", encoding="utf-8"), logging.StreamHandler() ] ) class HuggingFaceProcessor: """Clase para manejar modelos avanzados de Hugging Face para procesamiento de documentos.""" def __init__(self, model_name: str = "naver-clova-ix/donut-base-finetuned-docvqa"): self.logger = logging.getLogger("HuggingFaceProcessor") self.logger.info("Cargando modelo de Hugging Face...") try: self.processor = DonutProcessor.from_pretrained(model_name) self.model = VisionEncoderDecoderModel.from_pretrained(model_name) except Exception as e: self.logger.error(f"Error cargando el modelo: {e}") raise def process_image(self, image: Image.Image) -> str: """Procesa una imagen y extrae texto usando el modelo Donut.""" try: pixel_values = self.processor(image, return_tensors="pt").pixel_values outputs = self.model.generate(pixel_values, max_length=512) result = self.processor.batch_decode(outputs, skip_special_tokens=True)[0] return result.strip() except Exception as e: self.logger.error(f"Error procesando la imagen con Donut: {e}") return "" class PDFToWordProcessor: """Procesa un PDF escaneado y genera un documento Word.""" def __init__(self): self.logger = logging.getLogger("PDFToWordProcessor") self.hf_processor = HuggingFaceProcessor() def process_pdf(self, file_path: Path) -> Document: """Convierte un PDF a un documento Word.""" self.logger.info(f"Procesando PDF: {file_path}") doc = Document() try: # Convertir cada página del PDF a imagen images = convert_from_path(file_path) for page_num, image in enumerate(images, start=1): self.logger.debug(f"Procesando página {page_num}") # Extraer texto usando el modelo Donut page_text = self.hf_processor.process_image(image) # Agregar encabezado para cada página doc.add_heading(f"Página {page_num}", level=2) # Agregar texto extraído al documento Word self._add_text_to_doc(doc, page_text) except Exception as e: self.logger.error(f"Error procesando PDF: {e}") raise return doc def _add_text_to_doc(self, doc: Document, text: str): """Agrega texto extraído al documento Word.""" for line in text.split('\n'): if line.strip(): # Evitar líneas vacías paragraph = doc.add_paragraph(line.strip(), style="Normal") paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT def process_file(self, file_path: Union[str, Path]) -> Tuple[str, str]: """Procesa un archivo PDF y guarda el documento Word generado.""" file_path = Path(file_path) output_path = file_path.with_suffix(".docx") try: if file_path.suffix.lower() != ".pdf": raise ValueError(f"Formato no soportado: {file_path.suffix}") doc = self.process_pdf(file_path) doc.save(output_path) return "Documento procesado exitosamente", str(output_path) except Exception as e: return f"Error: {e}", "" def create_interface(): """Crea la interfaz de usuario con Gradio.""" processor = PDFToWordProcessor() def process_file(file): if not file: return "Por favor, seleccione un archivo", None return processor.process_file(file.name) with gr.Blocks(title="Procesador de PDF a Word") as demo: gr.Markdown("# Procesador PDF a Word con Hugging Face") gr.Markdown("Convierte documentos PDF escaneados a Word utilizando modelos avanzados de Hugging Face.") file_input = gr.File(label="Seleccionar PDF", file_types=[".pdf"], type="filepath") process_button = gr.Button("Procesar", variant="primary") output_text = gr.Textbox(label="Estado del Proceso") output_file = gr.File(label="Documento Procesado") process_button.click(process_file, inputs=[file_input], outputs=[output_text, output_file]) return demo if __name__ == "__main__": demo = create_interface() demo.launch(share=True)