124-webapp/cost_calculator.py
2025-09-17 16:35:11 +02:00

111 lines
3.5 KiB
Python

# cost_calculator.py
import os
from PyPDF2 import PdfReader
from pdf2image import convert_from_path
import numpy as np
from PIL import Image
UPLOAD_FOLDER = "data/uploads"
ALLOWED_EXTENSIONS = {"pdf"}
def allowed_file(filename: str) -> bool:
return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
def points_to_meters(points: float) -> float:
"""Convert PDF points to meters (1 point = 1/72 inch)"""
return (points / 72.0) * 0.0254
def get_rate_black() -> float:
return float(os.environ.get("RATE_PER_M2_BLACK", "4.0"))
def get_rate_color() -> float:
return float(os.environ.get("RATE_PER_M2_COLOR", "5.0"))
def get_page_size(page):
"""Returns width and height in meters, rotation-aware, prefers CropBox"""
box = getattr(page, "cropbox", None) or page.mediabox
width_pts = float(box.width)
height_pts = float(box.height)
rotation = page.get("/Rotate") or 0
if rotation in [90, 270]:
width_pts, height_pts = height_pts, width_pts
width_m = points_to_meters(width_pts)
height_m = points_to_meters(height_pts)
return width_m, height_m
def analyze_pdf(path, compute_ink=True):
reader = PdfReader(path)
pages_info = []
total_area_black = total_area_color = 0.0
total_cost_black = total_cost_color = 0.0
for i, page in enumerate(reader.pages):
# Get page size robustly
width_m, height_m = get_page_size(page)
area = width_m * height_m
ink_pct = None
is_color = False
if compute_ink:
try:
images = convert_from_path(path, first_page=i + 1, last_page=i + 1, dpi=150)
img = images[0].convert("RGB")
arr = np.array(img)
# Detect ink pixels (anything not near-white)
ink_mask = np.any(arr < 250, axis=2)
num_ink_pixels = np.count_nonzero(ink_mask)
total_pixels = arr.shape[0] * arr.shape[1]
ink_pct = (num_ink_pixels / total_pixels) * 100.0
if num_ink_pixels > 0:
# Convert to HSV using Pillow
hsv_img = img.convert("HSV")
hsv_arr = np.array(hsv_img)
saturation = hsv_arr[:, :, 1][ink_mask]
# Color if even a tiny fraction of ink pixels have saturation > 10
color_ratio = np.count_nonzero(saturation > 10) / len(saturation)
is_color = color_ratio > 0.001 # 0.1% threshold
except Exception as e:
print(f"Page {i+1} ink/color calc failed: {e}")
ink_pct = None
if is_color:
rate = get_rate_color()
total_area_color += area
total_cost_color += area * rate
else:
rate = get_rate_black()
total_area_black += area
total_cost_black += area * rate
pages_info.append({
"page": i + 1,
"width_m": width_m,
"height_m": height_m,
"area_m2": area,
"ink_pct": ink_pct,
"is_color": is_color,
"cost": round(area * rate, 2)
})
return {
"filename": os.path.basename(path),
"pages": pages_info,
"total_area_black": total_area_black,
"total_area_color": total_area_color,
"total_cost_black": round(total_cost_black, 2),
"total_cost_color": round(total_cost_color, 2),
"grand_total": round(total_cost_black + total_cost_color, 2)
}