roughly working again, now dev docker exists

2025-12-28 19:59:31 +01:00 · 2025-12-28 19:59:31 +01:00 · 155ab39368
commit 155ab39368
parent a77a0c0393
26 changed files with 1976 additions and 235 deletions
--- a/application_handler.py
+++ b/application_handler.py
@ -6,10 +6,49 @@ from handlers.degewo_handler import DegewoHandler
 from handlers.gesobau_handler import GesobauHandler
 from handlers.stadtundland_handler import StadtUndLandHandler
 from handlers.wbm_handler import WBMHandler
+import json
+from pathlib import Path
+import pandas as pd
+from typing import Optional
+import matplotlib.pyplot as plt
+import matplotlib.dates as mdates
+import logging
+import matplotlib
+import matplotlib.font_manager as fm
+import html
+import re
+import hashlib
+import asyncio
+from playwright.async_api import async_playwright
+import os
+
+STATE_FILE = Path("data/state.json")
+APPLICATIONS_FILE = Path("data/applications.json")
+TIMING_FILE = Path("data/timing.csv")
+LISTINGS_FILE = Path("data/listings.json")
+DATA_DIR = Path("data")
+
+
+# --- Matplotlib Font Setup (for emoji support in plots) ---
+font_cache_dir = Path("data/fonts")
+font_cache_dir.mkdir(parents=True, exist_ok=True)
+matplotlib.get_configdir = lambda: str(font_cache_dir)
+fm.findSystemFonts(fontpaths=str(font_cache_dir), fontext='ttf')
+matplotlib.rcParams['font.family'] = 'Noto Sans'
+
+# Use the root logger for consistency with main.py
+logger = logging.getLogger()

 class ApplicationHandler:
-    def __init__(self, browser_context):
+    """
+    Main handler for apartment monitoring, application automation, and notification logic.
+    Handles browser automation, listing extraction, application delegation, and Telegram notifications.
+    """
+
+    def __init__(self, browser_context, state_manager, applications_file: Path = None):
        self.context = browser_context
+        self.state_manager = state_manager
+        self.applications_file = applications_file or APPLICATIONS_FILE
        self.handlers = {
            "howoge": HowogeHandler(browser_context),
            "gewobag": GewobagHandler(browser_context),
@ -19,6 +58,142 @@ class ApplicationHandler:
            "wbm": WBMHandler(browser_context),
        }

+    def set_telegram_bot(self, telegram_bot):
+        """Attach a TelegramBot instance for notifications."""
+        self.telegram_bot = telegram_bot
+
+    def notify_new_listings(self, new_listings: list[dict], application_results: Optional[dict] = None):
+        """
+        Send a Telegram notification for each new listing.
+        Includes application result if autopilot was enabled.
+        """
+        if not new_listings:
+            return
+
+        for listing in new_listings:
+            link = listing.get('link', 'https://www.inberlinwohnen.de/wohnungsfinder/')
+            # Detect company for header
+            company = self._detect_company(link)
+            company_label = company.capitalize() if company != "unknown" else "Wohnung"
+            message = (
+                f"🏠 <b>[{company_label}] Neue Wohnung!</b>\n\n"
+                f"🚪 <b>{listing['rooms']}</b>\n"
+                f"📐 {listing['size']}\n"
+                f"💰 {listing['price']}\n"
+                f"📍 {listing['address']}\n\n"
+                f"👉 <a href=\"{link}\">Alle Details</a>"
+            )
+
+            # Add autopilot/apply status if attempted
+            if application_results and listing["id"] in application_results:
+                result = application_results[listing["id"]]
+                if result["success"]:
+                    message += f"\n\n🤖 <b>Auto-applied!</b> ({result['company']})"
+                    if result["message"]:
+                        message += f"\n<i>{result['message']}</i>"
+                else:
+                    message += f"\n\n⚠️ <b>Auto-apply failed</b> ({result['company']})"
+                    if result["message"]:
+                        message += f"\n<i>{result['message']}</i>"
+
+            # Send via TelegramBot if available
+            if hasattr(self, 'telegram_bot') and self.telegram_bot:
+                logger.info(f"Notifying Telegram: {listing['address']} ({listing['rooms']}, {listing['size']}, {listing['price']})")
+                self.telegram_bot._send_message(message)
+            else:
+                logger.info(f"[TELEGRAM] Would send message for: {listing['address']} ({listing['rooms']}, {listing['size']}, {listing['price']})")
+
+    async def apply_to_listings(self, listings: list[dict]) -> dict:
+        """
+        Apply to multiple listings (autopilot mode).
+        Returns a dict of application results keyed by listing ID.
+        """
+        results = {}
+        for listing in listings:
+            if self.has_applied(listing["id"]):
+                logger.info(f"Already applied to {listing['id']} ({listing['address']}), skipping.")
+                continue
+            result = await self.apply(listing)
+            results[listing["id"]] = result
+            self.save_application(result)
+            status = "✅" if result["success"] else "❌"
+            logger.info(f"Application {status} for {listing['address']}: {result['message']}")
+            await asyncio.sleep(2)
+        return results
+
+
+    def log_listing_times(self, new_listings: list[dict]):
+        """
+        Log new listing appearance times to CSV for later analysis and pattern mining.
+        Appends to data/listing_times.csv, creating header if needed.
+        """
+        if not new_listings:
+            return
+
+        import csv
+        TIMING_FILE = Path("data/listing_times.csv")
+        file_exists = TIMING_FILE.exists()
+
+        with open(TIMING_FILE, "a", newline="", encoding="utf-8") as f:
+            writer = csv.writer(f)
+            if not file_exists:
+                writer.writerow(["timestamp", "weekday", "hour", "minute", "rooms", "size", "price", "address", "listing_id"])
+
+            now = datetime.now()
+            for listing in new_listings:
+                writer.writerow([
+                    now.isoformat(),
+                    now.strftime("%A"),  # Weekday name
+                    now.hour,
+                    now.minute,
+                    listing["rooms"],
+                    listing["size"],
+                    listing["price"],
+                    listing["address"],
+                    listing["id"]
+                ])
+
+        logger.info(f"Logged {len(new_listings)} new listing times to CSV.")
+
+    def __init__(self, browser_context, state_manager):
+        self.context = browser_context
+        self.state_manager = state_manager
+        self.handlers = {
+            "howoge": HowogeHandler(browser_context),
+            "gewobag": GewobagHandler(browser_context),
+            "degewo": DegewoHandler(browser_context),
+            "gesobau": GesobauHandler(browser_context),
+            "stadtundland": StadtUndLandHandler(browser_context),
+            "wbm": WBMHandler(browser_context),
+        }
+        self.applications_file = applications_file or APPLICATIONS_FILE
+
+    def __init__(self, browser_context, state_manager, applications_file: Path = None):
+        self.context = browser_context
+        self.state_manager = state_manager
+        self.applications_file = applications_file or APPLICATIONS_FILE
+        self.handlers = {
+            "howoge": HowogeHandler(browser_context),
+            "gewobag": GewobagHandler(browser_context),
+            "degewo": DegewoHandler(browser_context),
+            "gesobau": GesobauHandler(browser_context),
+            "stadtundland": StadtUndLandHandler(browser_context),
+            "wbm": WBMHandler(browser_context),
+        }
+
+
+    async def init_browser(self):
+        """Initialize Playwright browser (minimal, like test script)"""
+        if not hasattr(self, 'browser') or self.browser is None:
+            self.playwright = await async_playwright().start()
+            self.browser = await self.playwright.chromium.launch(headless=True)
+            self.context = await self.browser.new_context(
+                user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
+            )
+            logger.info("Browser initialized (minimal context)")
+            self.application_handler = ApplicationHandler(self.context, self.state_manager)
+
+
    async def apply(self, listing: dict) -> dict:
        company = self._detect_company(listing.get("link", ""))
        handler = self.handlers.get(company)
@ -41,11 +216,463 @@ class ApplicationHandler:

        return result

+
    def _detect_company(self, link: str) -> str:
-        if "howoge.de" in link: return "howoge"
-        elif "gewobag.de" in link: return "gewobag"
-        elif "degewo.de" in link: return "degewo"
-        elif "gesobau.de" in link: return "gesobau"
-        elif "stadtundland.de" in link: return "stadtundland"
-        elif "wbm.de" in link: return "wbm"
-        return "unknown"
+        """Robust company detection logic, matching monitor.py as closely as possible."""
+        link = (link or "").lower()
+        # Remove URL scheme and www for easier matching
+        link = re.sub(r"^https?://(www\.)?", "", link)
+        # Use domain-based matching, including subdomains
+        if re.search(r"howoge\\.de", link):
+            return "howoge"
+        if re.search(r"gewobag\\.de", link):
+            return "gewobag"
+        if re.search(r"degewo\\.de", link):
+            return "degewo"
+        if re.search(r"gesobau\\.de", link):
+            return "gesobau"
+        if re.search(r"stadt-und-land\\.de|stadtundland\\.de", link):
+            return "stadtundland"
+        if re.search(r"wbm\\.de", link):
+            return "wbm"
+        # Also check for company in the path or query (legacy/edge cases)
+        if re.search(r"howoge", link):
+            return "howoge"
+        if re.search(r"gewobag", link):
+            return "gewobag"
+        if re.search(r"degewo", link):
+            return "degewo"
+        if re.search(r"gesobau", link):
+            return "gesobau"
+        if re.search(r"stadt-und-land|stadtundland", link):
+            return "stadtundland"
+        if re.search(r"wbm", link):
+            return "wbm"
+        return "unknown"
+
+
+    def load_state(self) -> dict:
+        """Load persistent state"""
+        if STATE_FILE.exists():
+            with open(STATE_FILE, "r") as f:
+                return json.load(f)
+        return {"autopilot": False}
+
+
+    def save_state(self, state: dict):
+        """Save persistent state"""
+        with open(STATE_FILE, "w") as f:
+            json.dump(state, f, indent=2)
+
+
+    def set_autopilot(self, enabled: bool):
+        """Enable or disable autopilot mode"""
+        self.state_manager.set_autopilot(enabled)
+
+
+    def is_autopilot_enabled(self) -> bool:
+        """Check if autopilot mode is enabled"""
+        return self.state_manager.is_autopilot_enabled()
+
+
+    def load_applications(self) -> dict:
+        """Load application history."""
+        if self.applications_file.exists():
+            try:
+                with open(self.applications_file, "r", encoding="utf-8") as f:
+                    return json.load(f)
+            except json.JSONDecodeError:
+                logger.error("Failed to decode applications file. Returning empty history.")
+        return {}
+
+
+    def save_application(self, result: dict):
+        """Save an application result."""
+        applications = self.load_applications()
+        applications[result["listing_id"]] = result
+        with open(self.applications_file, "w", encoding="utf-8") as f:
+            json.dump(applications, f, indent=2, ensure_ascii=False)
+
+
+    def has_applied(self, listing_id: str) -> bool:
+        """Check if we've already applied to this listing."""
+        return listing_id in self.load_applications()
+
+
+    def load_previous_listings(self) -> dict:
+        """Load previously saved listings"""
+        if LISTINGS_FILE.exists():
+            with open(LISTINGS_FILE, "r") as f:
+                return json.load(f)
+        return {}
+
+
+    def save_listings(self, listings: list[dict]):
+        """Save current listings"""
+        listings_dict = {l["id"]: l for l in listings}
+        with open(LISTINGS_FILE, "w") as f:
+            json.dump(listings_dict, f, indent=2, ensure_ascii=False)
+
+
+    def find_new_listings(self, current: list[dict], previous: dict) -> list[dict]:
+        """Find listings that are new since last check"""
+        new = []
+        for listing in current:
+            if listing["id"] not in previous:
+                new.append(listing)
+        return new
+
+
+    def _generate_weekly_plot(self) -> str:
+        """Generate a heatmap of listings by day of week and hour"""
+        if not TIMING_FILE.exists():
+            logger.warning("No timing file found for weekly plot")
+            return ""
+
+        try:
+            df = pd.read_csv(TIMING_FILE, parse_dates=["timestamp"])
+            df["day_of_week"] = df["timestamp"].dt.dayofweek
+            df["hour"] = df["timestamp"].dt.hour
+
+            heatmap_data = df.groupby(["day_of_week", "hour"]).size().unstack(fill_value=0)
+
+            fig, ax = plt.subplots(figsize=(10, 6))
+            cax = ax.matshow(heatmap_data, cmap="YlGnBu", aspect="auto")
+            fig.colorbar(cax)
+
+            ax.set_xticks(range(24))
+            ax.set_yticks(range(7))
+            ax.set_xticklabels([f"{h}:00" for h in range(24)], rotation=90)
+            ax.set_yticklabels(["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"])
+
+            ax.set_title("Listings Heatmap (Day of Week vs Hour)")
+
+            plot_path = DATA_DIR / "weekly_plot.png"
+            plt.savefig(plot_path)
+            plt.close(fig)
+
+            logger.info(f"Weekly plot saved to {plot_path}")
+            return str(plot_path)
+        except Exception as e:
+            logger.error(f"Failed to generate weekly plot: {e}")
+            return ""
+
+
+    def _generate_error_rate_plot(self):
+        """Read applications.json and produce a plot image + summary text.
+
+        Returns (plot_path, summary_text) or (None, "") if insufficient data.
+        """
+        if not self.applications_file.exists():
+            logger.warning("No applications.json found for errorrate plot")
+            return None, ""
+
+        try:
+            with open(self.applications_file, 'r', encoding='utf-8') as f:
+                apps = json.load(f)
+
+            if not apps:
+                logger.warning("No application data available for errorrate plot")
+                return None, ""
+
+            # Convert to DataFrame
+            rows = []
+            for _id, rec in apps.items():
+                rows.append({
+                    "id": _id,
+                    "ts": pd.to_datetime(rec.get("timestamp")),
+                    "success": rec.get("success", False),
+                    "company": rec.get("company", "unknown")
+                })
+
+            df = pd.DataFrame(rows)
+            df = df.dropna(subset=['ts'])
+            if df.empty:
+                logger.warning("No valid data for errorrate plot")
+                return None, ""
+
+            df['date'] = df['ts'].dt.floor('D')
+            grouped = df.groupby('date').agg(total=('id','count'), successes=('success', lambda x: x.sum()))
+            grouped['failures'] = grouped['total'] - grouped['successes']
+            grouped['error_rate'] = grouped['failures'] / grouped['total']
+
+            # Ensure index is sorted by date for plotting
+            grouped = grouped.sort_index()
+
+            # Prepare plot
+            fig, ax = plt.subplots(figsize=(10, 6))
+            ax.plot(grouped.index, grouped['error_rate'], marker='o', color='red', label='Error Rate')
+            ax.set_title('Autopilot Error Rate Over Time')
+            ax.set_xlabel('Date')
+            ax.set_ylabel('Error Rate')
+            ax.legend()
+            ax.grid(True)
+
+            # Save plot to the same directory as the applications file
+            plot_path = self.applications_file.parent / 'error_rate.png'
+            plt.savefig(plot_path)
+            plt.close(fig)
+
+            # Summary
+            total_attempts = int(grouped['total'].sum())
+            total_success = int(grouped['successes'].sum())
+            total_fail = int(grouped['failures'].sum())
+            overall_error = (total_fail / total_attempts) if total_attempts > 0 else 0.0
+            summary = f"<b>Total attempts:</b> {total_attempts}\n<b>Successes:</b> {total_success}\n<b>Failures:</b> {total_fail}\n<b>Overall error rate:</b> {overall_error:.1%}"
+
+            return plot_path, summary
+        except Exception as e:
+            logger.exception(f"Failed to generate error rate plot: {e}")
+            return None, ""
+
+
+    async def login(self, page):
+        """Login to inberlinwohnen.de (minimal, like test script)"""
+        if not self.state_manager.email or not self.state_manager.password:
+            logger.warning("No credentials provided. Ensure INBERLIN_EMAIL and INBERLIN_PASSWORD are set in the environment.")
+            return False
+
+        try:
+            logger.info("Navigating to login page...")
+            login_response = await page.goto("https://www.inberlinwohnen.de/login", wait_until="networkidle")
+            logger.info(f"Login page status: {login_response.status if login_response else 'No response'}")
+            await asyncio.sleep(2)
+
+
+            # Dismiss cookie/privacy modal before login
+            logger.info("Attempting to dismiss cookie/privacy modal before login...")
+            await self.dismiss_cookie_modal(page)
+            logger.info("Cookie/privacy modal dismissed.")
+
+            # Fill login form (if present)
+            logger.info("Filling in login credentials...")
+            await page.fill('input[name="email"], input[type="email"]', self.state_manager.email)
+            await page.fill('input[name="password"], input[type="password"]', self.state_manager.password)
+            logger.info("Login credentials filled.")
+
+            # Click submit button
+            logger.info("Submitting login form...")
+            submit_response = await page.click('button[type="submit"], input[type="submit"]', timeout=30000)
+            logger.info(f"Clicked submit, waiting for navigation...")
+            try:
+                await page.wait_for_load_state("networkidle", timeout=30000)
+                logger.info(f"After login, page url: {page.url}")
+                logger.info(f"After login, page content length: {len(await page.content())}")
+            except Exception as e:
+                logger.error(f"Timeout or error after login submit: {e}")
+            await asyncio.sleep(2)
+
+            # Check if login successful
+            logger.info("Checking if login was successful...")
+            if "mein-bereich" in page.url or await page.query_selector('text="Abmelden"'):
+                logger.info("Login successful.")
+                return True
+            else:
+                logger.error(f"Login failed - ended up at {page.url}")
+                return False
+        except Exception as e:
+            logger.error(f"Login error: {e}")
+            logger.debug("Exception occurred during login", exc_info=True)
+            return False
+
+
+    async def fetch_listings(self) -> list[dict]:
+        """Fetch listings from the Wohnungsfinder"""
+        listings = []
+
+        try:
+
+            page = await self.context.new_page()
+
+            # Attempt login if not already logged in
+            if not self.state_manager.logged_in:
+                login_success = await self.login(page)
+                if login_success:
+                    self.state_manager.logged_in = True
+                else:
+                    logger.warning("Login failed. Proceeding with public listings.")
+
+            # Select the correct URL after login check
+            if self.state_manager.logged_in:
+                url = "https://www.inberlinwohnen.de/mein-bereich/wohnungsfinder"
+            else:
+                url = "https://www.inberlinwohnen.de/wohnungsfinder/"
+
+            logger.info(f"Fetching listings from {url}")
+
+
+            # Navigate to the page with a longer wait condition for slow internet
+            logger.info("Navigating to listings page with extended timeout...")
+            await page.goto(url, wait_until="networkidle", timeout=20000)
+
+            # Check if the page is a download
+            if "download" in page.url or page.url.endswith(".pdf"):
+                logger.error("Page redirected to a download. Aborting.")
+                return []
+
+            # Handle cookie modal if not logged in
+            if not self.state_manager.logged_in:
+                await self.dismiss_cookie_modal(page)
+
+            # Wait a short time for the page to render, but do not block on any selector
+            await asyncio.sleep(2)
+
+            # Collect all listings content by clicking through pagination
+            all_content = ""
+            page_num = 1
+            max_pages = 10  # Safety limit
+
+            while page_num <= max_pages:
+                # Get current page content
+                current_content = await page.content()
+                all_content += current_content
+
+                # Check for "next page" button (Livewire pagination)
+                next_btn = await page.query_selector('[wire\\:click*="nextPage"]')
+                if next_btn and await next_btn.is_visible():
+                    await next_btn.click()
+                    await asyncio.sleep(2)  # Wait for Livewire to update
+                    page_num += 1
+                else:
+                    break
+
+            logger.info(f"Collected content from {page_num} page(s)")
+            content = all_content
+
+            # Debug: save HTML to file for inspection
+            debug_path = DATA_DIR / "debug_page.html"
+            with open(debug_path, "w", encoding="utf-8") as f:
+                f.write(content)
+            logger.info(f"Saved debug HTML to {debug_path}")
+
+            # Debug: Log page title and check for listing count
+            count_match = re.search(r'(\\d+)\\s*Wohnungen? für Sie gefunden', content)
+            if count_match:
+                logger.info(f"Page shows {count_match.group(1)} listings available")
+
+            # Also check for "Zeige X bis Y von Z Angeboten"
+            show_match = re.search(r'Zeige \\d+ bis \\d+ von (\\d+) Angeboten', content)
+            if show_match:
+                logger.info(f"Page shows {show_match.group(1)} total offers")
+
+            # Decode HTML entities and JSON escaped slashes for extraction
+            content_decoded = html.unescape(content)
+            content_decoded = content_decoded.replace('\\/', '/')
+
+            # Build flatId -> deeplink mapping from wire:snapshot JSON data (monitor.py logic)
+            # Format in HTML: "deeplink":"https://...","flatId":12345
+            deeplink_pattern = r'"deeplink":"(https://[^"]+)","flatId":(\d+)'
+            deeplink_matches = re.findall(deeplink_pattern, content_decoded)
+            # Use string keys for flatId to match button extraction
+            id_to_link = {str(flat_id): link for link, flat_id in deeplink_matches}
+            logger.info(f"Found {len(id_to_link)} deeplink mappings")
+
+
+            # --- Extraction logic copied from monitor.py for robustness ---
+            # Extract listings from button elements with aria-label
+            # Format: @click="open !== 12345 ..." aria-label="Wohnungsangebot - 2,0 Zimmer, 53,01 m², 494,38 € Kaltmiete | Adresse"
+            button_pattern = r'@click="open !== (\d+)[^\"]*"[^>]*aria-label="Wohnungsangebot - ([^"]+)'
+            button_matches = re.findall(button_pattern, content_decoded)
+            logger.info(f"Found {len(button_matches)} listing buttons (monitor.py pattern)")
+
+            for flat_id, listing_text in button_matches:
+                # Parse listing text: "2,0 Zimmer, 53,01 m², 494,38 € Kaltmiete | Rhinstraße 4, 10315 Lichtenberg"
+                parts_match = re.match(r'(\d,\d)\s*Zimmer,\s*([\d,.]+)\s*m²,\s*([\d.,]+)\s*€\s*(?:Kaltmiete)?\s*\|\s*(.+)', listing_text)
+                if not parts_match:
+                    continue
+
+                rooms, size, price, address = parts_match.groups()
+                rooms = rooms.strip()
+                address = address.strip()
+
+                if len(address) < 5:
+                    continue
+
+                # Get the deeplink for this flat (monitor.py logic: flat_id as string)
+                detail_link = id_to_link.get(str(flat_id), url)
+
+                listing_id = hashlib.md5(f"{rooms}{size}{price}{address}".encode()).hexdigest()[:12]
+
+                listings.append({
+                    "id": listing_id,
+                    "rooms": f"{rooms} Zimmer",
+                    "size": f"{size} m²",
+                    "price": f"{price} €",
+                    "address": address,
+                    "link": detail_link,
+                    "fetched_at": datetime.now().isoformat()
+                })
+
+            # Deduplicate by id
+            seen_ids = set()
+            unique_listings = []
+            for listing in listings:
+                if listing["id"] not in seen_ids:
+                    seen_ids.add(listing["id"])
+                    unique_listings.append(listing)
+            listings = unique_listings
+
+            if not listings:
+                logger.warning("No listings found after parsing. Dumping HTML snippet for debugging:")
+                logger.warning(content[:1000])
+
+            await page.close()
+            logger.info(f"Fetched {len(listings)} unique listings")
+            return listings
+
+        except Exception as e:
+            logger.error(f"Error fetching listings: {e}")
+            import traceback
+            logger.error(traceback.format_exc())
+            return []
+
+
+    async def dismiss_cookie_modal(self, page):
+        """Dismiss the privacy/cookie consent modal if present"""
+        try:
+            # Wait a bit for modal to appear
+            await asyncio.sleep(2)
+
+            # Try to find and click the accept button in the privacy modal
+            # Look for common accept button patterns in German
+            accept_selectors = [
+                'button:has-text("Akzeptieren")',
+                'button:has-text("Alle akzeptieren")',
+                'button:has-text("Accept")',
+                'button:has-text("Zustimmen")',
+                '[x-show="showPrivacyModal"] button',
+                '.privacy-modal button',
+                'button.accept-cookies',
+                # More specific to inberlinwohnen
+                'div[x-show="showPrivacyModal"] button:first-of-type',
+            ]
+
+            for selector in accept_selectors:
+                try:
+                    button = await page.query_selector(selector)
+                    if button and await button.is_visible():
+                        await button.click()
+                        logger.info(f"Clicked cookie accept button: {selector}")
+                        await asyncio.sleep(1)
+                        return True
+                except:
+                    continue
+
+            # Try clicking any visible button in the modal overlay
+            modal = await page.query_selector('div[x-show="showPrivacyModal"]')
+            if modal:
+                buttons = await modal.query_selector_all('button')
+                for btn in buttons:
+                    if await btn.is_visible():
+                        text = await btn.inner_text()
+                        logger.info(f"Found modal button: {text}")
+                        # Click the first button (usually accept)
+                        await btn.click()
+                        await asyncio.sleep(1)
+                        return True
+
+            logger.info("No cookie modal found or already dismissed")
+            return False
+        except Exception as e:
+            logger.debug(f"Cookie modal handling: {e}")
+            return False