working prototype
This commit is contained in:
parent
4f2723b767
commit
1a4abe978f
21 changed files with 706 additions and 145 deletions
|
|
@ -3,6 +3,7 @@ import os
|
|||
from PyPDF2 import PdfReader
|
||||
from pdf2image import convert_from_path
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
UPLOAD_FOLDER = "data/uploads"
|
||||
ALLOWED_EXTENSIONS = {"pdf"}
|
||||
|
|
@ -13,6 +14,7 @@ def allowed_file(filename: str) -> bool:
|
|||
|
||||
|
||||
def points_to_meters(points: float) -> float:
|
||||
"""Convert PDF points to meters (1 point = 1/72 inch)"""
|
||||
return (points / 72.0) * 0.0254
|
||||
|
||||
|
||||
|
|
@ -24,6 +26,21 @@ def get_rate_color() -> float:
|
|||
return float(os.environ.get("RATE_PER_M2_COLOR", "5.0"))
|
||||
|
||||
|
||||
def get_page_size(page):
|
||||
"""Returns width and height in meters, rotation-aware, prefers CropBox"""
|
||||
box = getattr(page, "cropbox", None) or page.mediabox
|
||||
width_pts = float(box.width)
|
||||
height_pts = float(box.height)
|
||||
|
||||
rotation = page.get("/Rotate") or 0
|
||||
if rotation in [90, 270]:
|
||||
width_pts, height_pts = height_pts, width_pts
|
||||
|
||||
width_m = points_to_meters(width_pts)
|
||||
height_m = points_to_meters(height_pts)
|
||||
return width_m, height_m
|
||||
|
||||
|
||||
def analyze_pdf(path, compute_ink=True):
|
||||
reader = PdfReader(path)
|
||||
pages_info = []
|
||||
|
|
@ -31,9 +48,8 @@ def analyze_pdf(path, compute_ink=True):
|
|||
total_cost_black = total_cost_color = 0.0
|
||||
|
||||
for i, page in enumerate(reader.pages):
|
||||
box = page.mediabox
|
||||
width_m = points_to_meters(float(box.width))
|
||||
height_m = points_to_meters(float(box.height))
|
||||
# Get page size robustly
|
||||
width_m, height_m = get_page_size(page)
|
||||
area = width_m * height_m
|
||||
|
||||
ink_pct = None
|
||||
|
|
@ -45,14 +61,22 @@ def analyze_pdf(path, compute_ink=True):
|
|||
img = images[0].convert("RGB")
|
||||
arr = np.array(img)
|
||||
|
||||
# ink pixels: any channel < 240
|
||||
ink_mask = np.any(arr < 240, axis=2)
|
||||
ink_pct = float(np.count_nonzero(ink_mask)) / (arr.shape[0] * arr.shape[1]) * 100.0
|
||||
# Detect ink pixels (anything not near-white)
|
||||
ink_mask = np.any(arr < 250, axis=2)
|
||||
num_ink_pixels = np.count_nonzero(ink_mask)
|
||||
total_pixels = arr.shape[0] * arr.shape[1]
|
||||
ink_pct = (num_ink_pixels / total_pixels) * 100.0
|
||||
|
||||
if num_ink_pixels > 0:
|
||||
# Convert to HSV using Pillow
|
||||
hsv_img = img.convert("HSV")
|
||||
hsv_arr = np.array(hsv_img)
|
||||
saturation = hsv_arr[:, :, 1][ink_mask]
|
||||
|
||||
# Color if even a tiny fraction of ink pixels have saturation > 10
|
||||
color_ratio = np.count_nonzero(saturation > 10) / len(saturation)
|
||||
is_color = color_ratio > 0.001 # 0.1% threshold
|
||||
|
||||
# simple color detection: if RGB channels differ significantly
|
||||
avg_rgb = arr.mean(axis=(0, 1))
|
||||
if np.ptp(avg_rgb) > 30:
|
||||
is_color = True
|
||||
except Exception as e:
|
||||
print(f"Page {i+1} ink/color calc failed: {e}")
|
||||
ink_pct = None
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue