working prototype

This commit is contained in:
Aron Petau 2025-09-17 16:35:11 +02:00
parent 4f2723b767
commit 1a4abe978f
21 changed files with 706 additions and 145 deletions

View file

@ -3,6 +3,7 @@ import os
from PyPDF2 import PdfReader
from pdf2image import convert_from_path
import numpy as np
from PIL import Image
UPLOAD_FOLDER = "data/uploads"
ALLOWED_EXTENSIONS = {"pdf"}
@ -13,6 +14,7 @@ def allowed_file(filename: str) -> bool:
def points_to_meters(points: float) -> float:
"""Convert PDF points to meters (1 point = 1/72 inch)"""
return (points / 72.0) * 0.0254
@ -24,6 +26,21 @@ def get_rate_color() -> float:
return float(os.environ.get("RATE_PER_M2_COLOR", "5.0"))
def get_page_size(page):
"""Returns width and height in meters, rotation-aware, prefers CropBox"""
box = getattr(page, "cropbox", None) or page.mediabox
width_pts = float(box.width)
height_pts = float(box.height)
rotation = page.get("/Rotate") or 0
if rotation in [90, 270]:
width_pts, height_pts = height_pts, width_pts
width_m = points_to_meters(width_pts)
height_m = points_to_meters(height_pts)
return width_m, height_m
def analyze_pdf(path, compute_ink=True):
reader = PdfReader(path)
pages_info = []
@ -31,9 +48,8 @@ def analyze_pdf(path, compute_ink=True):
total_cost_black = total_cost_color = 0.0
for i, page in enumerate(reader.pages):
box = page.mediabox
width_m = points_to_meters(float(box.width))
height_m = points_to_meters(float(box.height))
# Get page size robustly
width_m, height_m = get_page_size(page)
area = width_m * height_m
ink_pct = None
@ -45,14 +61,22 @@ def analyze_pdf(path, compute_ink=True):
img = images[0].convert("RGB")
arr = np.array(img)
# ink pixels: any channel < 240
ink_mask = np.any(arr < 240, axis=2)
ink_pct = float(np.count_nonzero(ink_mask)) / (arr.shape[0] * arr.shape[1]) * 100.0
# Detect ink pixels (anything not near-white)
ink_mask = np.any(arr < 250, axis=2)
num_ink_pixels = np.count_nonzero(ink_mask)
total_pixels = arr.shape[0] * arr.shape[1]
ink_pct = (num_ink_pixels / total_pixels) * 100.0
if num_ink_pixels > 0:
# Convert to HSV using Pillow
hsv_img = img.convert("HSV")
hsv_arr = np.array(hsv_img)
saturation = hsv_arr[:, :, 1][ink_mask]
# Color if even a tiny fraction of ink pixels have saturation > 10
color_ratio = np.count_nonzero(saturation > 10) / len(saturation)
is_color = color_ratio > 0.001 # 0.1% threshold
# simple color detection: if RGB channels differ significantly
avg_rgb = arr.mean(axis=(0, 1))
if np.ptp(avg_rgb) > 30:
is_color = True
except Exception as e:
print(f"Page {i+1} ink/color calc failed: {e}")
ink_pct = None