Spaces:
Running
Running
import cv2 | |
import fitz | |
import numpy as np | |
from io import BytesIO | |
import matplotlib.pyplot as plt | |
from skimage.color import rgb2gray | |
from skimage.measure import label, regionprops | |
from fastapi import APIRouter, UploadFile, File, HTTPException | |
from fastapi.responses import StreamingResponse | |
router = APIRouter() | |
def convert_and_process_pdf(pdf_content: bytes, area_threshold: int = 100) -> BytesIO: | |
""" | |
Convert the first page of a PDF to a PNG and apply image enhancement. | |
Args: | |
pdf_content: The PDF file content as bytes. | |
area_threshold: Threshold for area filtering (default: 100). | |
Returns: | |
BytesIO: Enhanced PNG image content. | |
""" | |
# Open the PDF from bytes | |
doc = fitz.open(stream=pdf_content, filetype="pdf") | |
# Load the first page | |
page = doc.load_page(0) | |
# Render the page as an image | |
pix = page.get_pixmap(dpi=300) | |
png_image = pix.tobytes("png") | |
# Load the image with OpenCV | |
np_array = np.frombuffer(png_image, dtype=np.uint8) | |
img = cv2.imdecode(np_array, cv2.IMREAD_COLOR) | |
# Convert to grayscale | |
img_gray = rgb2gray(img) | |
# Convert grayscale to binary using Otsu's threshold | |
_, img_binary = cv2.threshold((img_gray * 255).astype(np.uint8), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) | |
# Invert the binary image | |
img_binary = ~img_binary | |
# Label connected components | |
label_img = label(img_binary) | |
regions = regionprops(label_img) | |
# Filter by area threshold | |
valid_labels = [region.label for region in regions if region.area >= area_threshold] | |
img_filtered = np.isin(label_img, valid_labels) | |
# Save enhanced image to memory | |
output_buffer = BytesIO() | |
plt.imsave(output_buffer, ~img_filtered, cmap="gray", format="png") | |
output_buffer.seek(0) | |
return output_buffer | |
async def process_pdf( | |
file: UploadFile = File(...), | |
area_threshold: int = 100 | |
): | |
""" | |
Process a PDF file and return an enhanced PNG image. | |
Args: | |
file: The PDF file to process | |
area_threshold: Threshold for area filtering (default: 100) | |
Returns: | |
StreamingResponse: Enhanced PNG image | |
""" | |
try: | |
# Read PDF file content | |
pdf_content = await file.read() | |
# Process the PDF and get the enhanced image | |
enhanced_image = convert_and_process_pdf(pdf_content, area_threshold) | |
# Return the processed image as a StreamingResponse | |
return StreamingResponse( | |
enhanced_image, | |
media_type="image/png", | |
headers={"Content-Disposition": f"attachment; filename={file.filename.rsplit('.', 1)[0]}_enhanced.png"} | |
) | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=f"Error processing PDF: {str(e)}") | |
async def process_image( | |
file: UploadFile = File(...), | |
area_threshold: int = 100 | |
): | |
""" | |
Process an image file and return an enhanced image. | |
Args: | |
file: The image file to process | |
area_threshold: Threshold for area filtering (default: 100) | |
Returns: | |
StreamingResponse: Enhanced image | |
""" | |
try: | |
# Read image file content | |
image_content = await file.read() | |
# Convert to numpy array | |
np_array = np.frombuffer(image_content, dtype=np.uint8) | |
img = cv2.imdecode(np_array, cv2.IMREAD_COLOR) | |
# Convert to grayscale | |
img_gray = rgb2gray(img) | |
# Convert grayscale to binary using Otsu's threshold | |
_, img_binary = cv2.threshold((img_gray * 255).astype(np.uint8), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) | |
# Invert the binary image | |
img_binary = ~img_binary | |
# Label connected components | |
label_img = label(img_binary) | |
regions = regionprops(label_img) | |
# Filter by area threshold | |
valid_labels = [region.label for region in regions if region.area >= area_threshold] | |
img_filtered = np.isin(label_img, valid_labels) | |
# Save enhanced image to memory | |
output_buffer = BytesIO() | |
plt.imsave(output_buffer, ~img_filtered, cmap="gray", format="png") | |
output_buffer.seek(0) | |
# Return the processed image as a StreamingResponse | |
return StreamingResponse( | |
output_buffer, | |
media_type="image/png", | |
headers={"Content-Disposition": f"attachment; filename={file.filename.rsplit('.', 1)[0]}_enhanced.png"} | |
) | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=f"Error processing image: {str(e)}") | |