import cv2 import fitz import numpy as np from io import BytesIO import matplotlib.pyplot as plt from skimage.color import rgb2gray from skimage.measure import label, regionprops from fastapi import APIRouter, UploadFile, File, HTTPException from fastapi.responses import StreamingResponse router = APIRouter() def convert_and_process_pdf(pdf_content: bytes, area_threshold: int = 100) -> BytesIO: """ Convert the first page of a PDF to a PNG and apply image enhancement. Args: pdf_content: The PDF file content as bytes. area_threshold: Threshold for area filtering (default: 100). Returns: BytesIO: Enhanced PNG image content. """ # Open the PDF from bytes doc = fitz.open(stream=pdf_content, filetype="pdf") # Load the first page page = doc.load_page(0) # Render the page as an image pix = page.get_pixmap(dpi=300) png_image = pix.tobytes("png") # Load the image with OpenCV np_array = np.frombuffer(png_image, dtype=np.uint8) img = cv2.imdecode(np_array, cv2.IMREAD_COLOR) # Convert to grayscale img_gray = rgb2gray(img) # Convert grayscale to binary using Otsu's threshold _, img_binary = cv2.threshold((img_gray * 255).astype(np.uint8), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) # Invert the binary image img_binary = ~img_binary # Label connected components label_img = label(img_binary) regions = regionprops(label_img) # Filter by area threshold valid_labels = [region.label for region in regions if region.area >= area_threshold] img_filtered = np.isin(label_img, valid_labels) # Save enhanced image to memory output_buffer = BytesIO() plt.imsave(output_buffer, ~img_filtered, cmap="gray", format="png") output_buffer.seek(0) return output_buffer @router.post("/process-pdf/") async def process_pdf( file: UploadFile = File(...), area_threshold: int = 100 ): """ Process a PDF file and return an enhanced PNG image. Args: file: The PDF file to process area_threshold: Threshold for area filtering (default: 100) Returns: StreamingResponse: Enhanced PNG image """ try: # Read PDF file content pdf_content = await file.read() # Process the PDF and get the enhanced image enhanced_image = convert_and_process_pdf(pdf_content, area_threshold) # Return the processed image as a StreamingResponse return StreamingResponse( enhanced_image, media_type="image/png", headers={"Content-Disposition": f"attachment; filename={file.filename.rsplit('.', 1)[0]}_enhanced.png"} ) except Exception as e: raise HTTPException(status_code=500, detail=f"Error processing PDF: {str(e)}") @router.post("/process-image/") async def process_image( file: UploadFile = File(...), area_threshold: int = 100 ): """ Process an image file and return an enhanced image. Args: file: The image file to process area_threshold: Threshold for area filtering (default: 100) Returns: StreamingResponse: Enhanced image """ try: # Read image file content image_content = await file.read() # Convert to numpy array np_array = np.frombuffer(image_content, dtype=np.uint8) img = cv2.imdecode(np_array, cv2.IMREAD_COLOR) # Convert to grayscale img_gray = rgb2gray(img) # Convert grayscale to binary using Otsu's threshold _, img_binary = cv2.threshold((img_gray * 255).astype(np.uint8), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) # Invert the binary image img_binary = ~img_binary # Label connected components label_img = label(img_binary) regions = regionprops(label_img) # Filter by area threshold valid_labels = [region.label for region in regions if region.area >= area_threshold] img_filtered = np.isin(label_img, valid_labels) # Save enhanced image to memory output_buffer = BytesIO() plt.imsave(output_buffer, ~img_filtered, cmap="gray", format="png") output_buffer.seek(0) # Return the processed image as a StreamingResponse return StreamingResponse( output_buffer, media_type="image/png", headers={"Content-Disposition": f"attachment; filename={file.filename.rsplit('.', 1)[0]}_enhanced.png"} ) except Exception as e: raise HTTPException(status_code=500, detail=f"Error processing image: {str(e)}")