machine_archivist/marker_detect.py
2025-04-06 19:58:31 +02:00

166 lines
6.7 KiB
Python

import os
import cv2
import numpy as np
import time
import cairosvg
from logger_config import logger
TEMPLATE_DIR = "./markers/"
USE_ORB = False # Set to True to use ORB, False to use SIFT
# Load and convert SVG templates to grayscale images
def load_template(filename):
"""Load a template image, converting SVG to grayscale if necessary."""
template_path = os.path.join(TEMPLATE_DIR, filename)
if filename.endswith(".svg"):
# Convert SVG to PNG (grayscale)
png_data = cairosvg.svg2png(url=template_path)
np_arr = np.frombuffer(png_data, dtype=np.uint8)
template = cv2.imdecode(np_arr, cv2.IMREAD_GRAYSCALE)
else:
# Load JPG/PNG directly
template = cv2.imread(template_path, cv2.IMREAD_GRAYSCALE)
return template
# Load all templates
templates = {}
logger.info("🔄 Loading templates...")
start_time = time.time()
for filename in os.listdir(TEMPLATE_DIR):
if filename.endswith((".jpg", ".png", ".svg")):
template_name = os.path.splitext(filename)[0]
template = load_template(filename)
if template is not None:
templates[template_name] = template
else:
logger.error(f"❌ Failed to load template: {filename}")
logger.info(f"✅ Template loading complete in {time.time() - start_time:.2f} seconds.\n")
# Log the loaded templates once all are loaded
logger.info("Templates loaded: %s", ', '.join(templates.keys()))
# Initialize feature detector (SIFT or ORB)
if USE_ORB:
detector = cv2.ORB_create(nfeatures=500)
matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
ratio_test = 0.6 # ORB ratio test
else:
detector = cv2.SIFT_create()
matcher = cv2.FlannBasedMatcher(dict(algorithm=1, trees=10), dict())
ratio_test = 0.6 # SIFT stricter ratio test
def is_valid_aspect_ratio(bounding_box, expected_aspect_ratio=1.0, tolerance=0.2):
"""Ensure detected bounding box is approximately square."""
x, y, w, h = bounding_box
aspect_ratio = w / float(h)
return (expected_aspect_ratio - tolerance) <= aspect_ratio <= (expected_aspect_ratio + tolerance)
def detect_markers(image_path, templates, min_matches=15, min_area=500):
"""Detects markers using feature matching and filters based on shape constraints."""
logger.info(f"🔄 Reading image: {image_path}")
image = cv2.imread(image_path)
if image is None:
logger.error("❌ Failed to load image")
return []
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
detected_markers = []
# Detect keypoints and descriptors in the input image
kp_image, des_image = detector.detectAndCompute(gray_image, None)
logger.info(f"🔍 Detected {len(kp_image)} keypoints in the input image.")
keypoints_image = cv2.drawKeypoints(image, kp_image, None, (0, 255, 0), cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
logger.info("🔄 Starting template matching...")
for name, template in templates.items():
logger.debug(f"🔍 Processing template: {name}")
# Detect keypoints and descriptors in the template
kp_template, des_template = detector.detectAndCompute(template, None)
if des_template is None or des_image is None:
continue
logger.debug(f"🔍 Found {len(kp_template)} keypoints in template {name}.")
# Match descriptors
if USE_ORB:
matches = matcher.match(des_template, des_image)
good_matches = sorted(matches, key=lambda x: x.distance)[:min_matches]
else:
raw_matches = matcher.knnMatch(des_template, des_image, k=2)
good_matches = [m for m, n in raw_matches if m.distance < ratio_test * n.distance]
if len(good_matches) >= min_matches:
src_pts = np.float32([kp_template[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
dst_pts = np.float32([kp_image[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)
if USE_ORB:
x_min, y_min = np.min(dst_pts, axis=0)[0]
x_max, y_max = np.max(dst_pts, axis=0)[0]
bounding_box = (int(x_min), int(y_min), int(x_max - x_min), int(y_max - y_min))
if is_valid_aspect_ratio(bounding_box) and bounding_box[2] * bounding_box[3] >= min_area:
cv2.rectangle(image, (bounding_box[0], bounding_box[1]),
(bounding_box[0] + bounding_box[2], bounding_box[1] + bounding_box[3]),
(0, 255, 0), 2)
cv2.putText(image, name, (bounding_box[0], bounding_box[1] - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
detected_markers.append(name)
else:
logger.warning(f"{name} detected but doesn't meet square size constraints.")
else:
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
if M is not None:
h, w = template.shape
pts = np.float32([[0, 0], [0, h - 1], [w - 1, h - 1], [w - 1, 0]]).reshape(-1, 1, 2)
dst = cv2.perspectiveTransform(pts, M)
bounding_box = cv2.boundingRect(dst)
if is_valid_aspect_ratio(bounding_box) and bounding_box[2] * bounding_box[3] >= min_area:
hull = cv2.convexHull(dst)
if len(hull) == 4:
detected_markers.append(name)
image = cv2.polylines(image, [np.int32(hull)], True, (0, 255, 0), 3)
x, y = dst[0][0]
cv2.putText(image, name, (int(x), int(y) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0),
2)
else:
logger.warning(f"{name} detected but not forming a perfect quadrilateral.")
debug_image_path = "./scans/detected_markers.png"
keypoints_image_path = "./scans/keypoints.png"
cv2.imwrite(debug_image_path, image)
cv2.imwrite(keypoints_image_path, keypoints_image)
logger.info(f"📸 Debug image saved to {debug_image_path}")
logger.info(f"📸 Keypoints image saved to {keypoints_image_path}")
return detected_markers, debug_image_path, keypoints_image_path
if __name__ == '__main__':
image_path = "scans/snap_2025-02-23_17-10-06-836370.png"
logger.info(f"🔍 Detecting markers in image: {image_path}")
markers, debug_img, kp_img = detect_markers(image_path, templates)
logger.info(f"🔍 Detected markers: {markers}")
if markers:
logger.info(f"📍 Total markers detected: {len(markers)}")
else:
logger.warning("🛑 No markers detected.")