# cost_calculator.py import os from PyPDF2 import PdfReader from pdf2image import convert_from_path import numpy as np from PIL import Image UPLOAD_FOLDER = "data/uploads" ALLOWED_EXTENSIONS = {"pdf"} def allowed_file(filename: str) -> bool: return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS def points_to_meters(points: float) -> float: """Convert PDF points to meters (1 point = 1/72 inch)""" return (points / 72.0) * 0.0254 def get_rate_black() -> float: return float(os.environ.get("RATE_PER_M2_BLACK", "4.0")) def get_rate_color() -> float: return float(os.environ.get("RATE_PER_M2_COLOR", "5.0")) def get_page_size(page): """Returns width and height in meters, rotation-aware, prefers CropBox""" box = getattr(page, "cropbox", None) or page.mediabox width_pts = float(box.width) height_pts = float(box.height) rotation = page.get("/Rotate") or 0 if rotation in [90, 270]: width_pts, height_pts = height_pts, width_pts width_m = points_to_meters(width_pts) height_m = points_to_meters(height_pts) return width_m, height_m def analyze_pdf(path, compute_ink=True): reader = PdfReader(path) pages_info = [] total_area_black = total_area_color = 0.0 total_cost_black = total_cost_color = 0.0 for i, page in enumerate(reader.pages): # Get page size robustly width_m, height_m = get_page_size(page) area = width_m * height_m ink_pct = None is_color = False if compute_ink: try: images = convert_from_path(path, first_page=i + 1, last_page=i + 1, dpi=150) img = images[0].convert("RGB") arr = np.array(img) # Detect ink pixels (anything not near-white) ink_mask = np.any(arr < 250, axis=2) num_ink_pixels = np.count_nonzero(ink_mask) total_pixels = arr.shape[0] * arr.shape[1] ink_pct = (num_ink_pixels / total_pixels) * 100.0 if num_ink_pixels > 0: # Convert to HSV using Pillow hsv_img = img.convert("HSV") hsv_arr = np.array(hsv_img) saturation = hsv_arr[:, :, 1][ink_mask] # Color if even a tiny fraction of ink pixels have saturation > 10 color_ratio = np.count_nonzero(saturation > 10) / len(saturation) is_color = color_ratio > 0.001 # 0.1% threshold except Exception as e: print(f"Page {i+1} ink/color calc failed: {e}") ink_pct = None if is_color: rate = get_rate_color() total_area_color += area total_cost_color += area * rate else: rate = get_rate_black() total_area_black += area total_cost_black += area * rate pages_info.append({ "page": i + 1, "width_m": width_m, "height_m": height_m, "area_m2": area, "ink_pct": ink_pct, "is_color": is_color, "cost": round(area * rate, 2) }) return { "filename": os.path.basename(path), "pages": pages_info, "total_area_black": total_area_black, "total_area_color": total_area_color, "total_cost_black": round(total_cost_black, 2), "total_cost_color": round(total_cost_color, 2), "grand_total": round(total_cost_black + total_cost_color, 2) }