111 lines
3.5 KiB
Python
111 lines
3.5 KiB
Python
# cost_calculator.py
|
|
import os
|
|
from PyPDF2 import PdfReader
|
|
from pdf2image import convert_from_path
|
|
import numpy as np
|
|
from PIL import Image
|
|
|
|
UPLOAD_FOLDER = "data/uploads"
|
|
ALLOWED_EXTENSIONS = {"pdf"}
|
|
|
|
|
|
def allowed_file(filename: str) -> bool:
|
|
return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
|
|
|
|
|
|
def points_to_meters(points: float) -> float:
|
|
"""Convert PDF points to meters (1 point = 1/72 inch)"""
|
|
return (points / 72.0) * 0.0254
|
|
|
|
|
|
def get_rate_black() -> float:
|
|
return float(os.environ.get("RATE_PER_M2_BLACK", "4.0"))
|
|
|
|
|
|
def get_rate_color() -> float:
|
|
return float(os.environ.get("RATE_PER_M2_COLOR", "5.0"))
|
|
|
|
|
|
def get_page_size(page):
|
|
"""Returns width and height in meters, rotation-aware, prefers CropBox"""
|
|
box = getattr(page, "cropbox", None) or page.mediabox
|
|
width_pts = float(box.width)
|
|
height_pts = float(box.height)
|
|
|
|
rotation = page.get("/Rotate") or 0
|
|
if rotation in [90, 270]:
|
|
width_pts, height_pts = height_pts, width_pts
|
|
|
|
width_m = points_to_meters(width_pts)
|
|
height_m = points_to_meters(height_pts)
|
|
return width_m, height_m
|
|
|
|
|
|
def analyze_pdf(path, compute_ink=True):
|
|
reader = PdfReader(path)
|
|
pages_info = []
|
|
total_area_black = total_area_color = 0.0
|
|
total_cost_black = total_cost_color = 0.0
|
|
|
|
for i, page in enumerate(reader.pages):
|
|
# Get page size robustly
|
|
width_m, height_m = get_page_size(page)
|
|
area = width_m * height_m
|
|
|
|
ink_pct = None
|
|
is_color = False
|
|
|
|
if compute_ink:
|
|
try:
|
|
images = convert_from_path(path, first_page=i + 1, last_page=i + 1, dpi=150)
|
|
img = images[0].convert("RGB")
|
|
arr = np.array(img)
|
|
|
|
# Detect ink pixels (anything not near-white)
|
|
ink_mask = np.any(arr < 250, axis=2)
|
|
num_ink_pixels = np.count_nonzero(ink_mask)
|
|
total_pixels = arr.shape[0] * arr.shape[1]
|
|
ink_pct = (num_ink_pixels / total_pixels) * 100.0
|
|
|
|
if num_ink_pixels > 0:
|
|
# Convert to HSV using Pillow
|
|
hsv_img = img.convert("HSV")
|
|
hsv_arr = np.array(hsv_img)
|
|
saturation = hsv_arr[:, :, 1][ink_mask]
|
|
|
|
# Color if even a tiny fraction of ink pixels have saturation > 10
|
|
color_ratio = np.count_nonzero(saturation > 10) / len(saturation)
|
|
is_color = color_ratio > 0.001 # 0.1% threshold
|
|
|
|
except Exception as e:
|
|
print(f"Page {i+1} ink/color calc failed: {e}")
|
|
ink_pct = None
|
|
|
|
if is_color:
|
|
rate = get_rate_color()
|
|
total_area_color += area
|
|
total_cost_color += area * rate
|
|
else:
|
|
rate = get_rate_black()
|
|
total_area_black += area
|
|
total_cost_black += area * rate
|
|
|
|
pages_info.append({
|
|
"page": i + 1,
|
|
"width_m": width_m,
|
|
"height_m": height_m,
|
|
"area_m2": area,
|
|
"ink_pct": ink_pct,
|
|
"is_color": is_color,
|
|
"cost": round(area * rate, 2)
|
|
})
|
|
|
|
return {
|
|
"filename": os.path.basename(path),
|
|
"pages": pages_info,
|
|
"total_area_black": total_area_black,
|
|
"total_area_color": total_area_color,
|
|
"total_cost_black": round(total_cost_black, 2),
|
|
"total_cost_color": round(total_cost_color, 2),
|
|
"grand_total": round(total_cost_black + total_cost_color, 2)
|
|
}
|