wohnbot/application_handler.py

from datetime import datetime
from handlers.base_handler import BaseHandler
from handlers.howoge_handler import HowogeHandler
from handlers.gewobag_handler import GewobagHandler
from handlers.degewo_handler import DegewoHandler
from handlers.gesobau_handler import GesobauHandler
from handlers.stadtundland_handler import StadtUndLandHandler
from handlers.wbm_handler import WBMHandler
import json
from pathlib import Path
import pandas as pd
from typing import Optional
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import logging
import matplotlib
import matplotlib.font_manager as fm
import seaborn as sns
import html
import re
import hashlib
import asyncio
from playwright.async_api import async_playwright
import os

STATE_FILE = Path("data/state.json")
APPLICATIONS_FILE = Path("data/applications.json")
TIMING_FILE = Path("data/listing_times.csv")
LISTINGS_FILE = Path("data/listings.json")
DATA_DIR = Path("data")


# --- Matplotlib & Seaborn Setup ---
font_cache_dir = Path("data/fonts")
font_cache_dir.mkdir(parents=True, exist_ok=True)
matplotlib.get_configdir = lambda: str(font_cache_dir)
fm.findSystemFonts(fontpaths=str(font_cache_dir), fontext='ttf')
matplotlib.rcParams['font.family'] = 'Noto Sans'

# Configure seaborn for beautiful plots
sns.set_theme(style="whitegrid", palette="deep")
sns.set_context("notebook", font_scale=1.1)
matplotlib.rcParams['figure.dpi'] = 300
matplotlib.rcParams['savefig.dpi'] = 300
matplotlib.rcParams['figure.facecolor'] = 'white'

# Use the root logger for consistency with main.py
logger = logging.getLogger()

class ApplicationHandler:
    """
    Main handler for apartment monitoring, application automation, and notification logic.
    Handles browser automation, listing extraction, application delegation, and Telegram notifications.
    """

    def __init__(self, browser_context, state_manager, applications_file: Optional[Path] = None):
        if browser_context is None:
            raise ValueError("browser_context must not be None. ApplicationHandler requires a valid Playwright context.")
        self.context = browser_context
        self.state_manager = state_manager
        self.applications_file = applications_file or APPLICATIONS_FILE
        self.handlers = {
            "howoge": HowogeHandler(browser_context),
            "gewobag": GewobagHandler(browser_context),
            "degewo": DegewoHandler(browser_context),
            "gesobau": GesobauHandler(browser_context),
            "stadtundland": StadtUndLandHandler(browser_context),
            "wbm": WBMHandler(browser_context),
        }

    def set_telegram_bot(self, telegram_bot) -> None:
        """Attach a TelegramBot instance for notifications."""
        self.telegram_bot = telegram_bot

    def notify_new_listings(self, new_listings: list[dict], application_results: Optional[dict] = None) -> None:
        """
        Send a Telegram notification for each new listing.
        Includes application result if autopilot was enabled.
        """
        for listing in new_listings:
            link = listing.get('link', 'https://www.inberlinwohnen.de/wohnungsfinder/')
            company = self._detect_company(link)
            if company == "wgcompany":
                continue  # skip WGCompany listings for main handler

            company_label = company.capitalize() if company != "unknown" else "Wohnung"
            message = (
                f"<b>[{company_label}] Neue Wohnung!</b>\n\n"
                f"🚪 <b>{listing['rooms']}</b>\n"
                f"📏 {listing['size']}\n"
                f"💰 {listing['price']}\n"
                f"📍 {listing['address']}\n\n"
                f"👉 <a href=\"{link}\">Alle Details</a>"
            )

            # Always show autopilot/apply status for clarity
            if application_results is not None:
                if listing["id"] in application_results:
                    result = application_results[listing["id"]]
                    # Skip already-applied listings (no notification needed)
                    if result.get("skipped"):
                        logger.debug(f"Skip notification for already-applied: {listing['address']}")
                        continue  # Skip to next listing
                    if result["success"]:
                        message += f"\n\n\ud83e\udd16 <b>Auto-applied!</b> ({result['company']})"
                        if result["message"]:
                            message += f"\n<i>{result['message']}</i>"
                    else:
                        # Handler attempted but failed
                        fail_msg = result.get("message") or "Unknown error during application."
                        message += f"\n\n\u26a0\ufe0f <b>Auto-apply failed</b> ({result['company']})"
                        message += f"\n<b>Reason:</b> <i>{html.escape(fail_msg)}</i>"
                else:
                    # Should not happen if logic is correct, but fallback
                    # Save as failed so /retryfailed can retry later
                    message += "\n\n\u2139\ufe0f <b>No application attempted (internal logic error)</b>"
                    failed_result = {
                        "listing_id": listing["id"],
                        "company": company,
                        "link": link,
                        "timestamp": listing.get("timestamp", ""),
                        "success": False,
                        "message": "Internal logic error: listing not in application_results",
                        "address": listing.get("address", ""),
                        "rooms": listing.get("rooms", ""),
                        "price": listing.get("price", ""),
                        "retries": 0
                    }
                    self.save_application(failed_result)
                    logger.warning(f"[INTERNAL ERROR] Saved as failed: {listing['id']} - {listing.get('address', '')}")
            else:
                # Autopilot was off or not attempted at all
                message += "\n\n\u2139\ufe0f <b>No application attempted (autopilot off)</b>"

            # Send via TelegramBot if available
            if hasattr(self, 'telegram_bot') and self.telegram_bot:
                loop = getattr(self.telegram_bot, 'event_loop', None) or asyncio.get_event_loop()
                asyncio.run_coroutine_threadsafe(self.telegram_bot._send_message(message), loop)
            else:
                logger.debug(f"[No Telegram] {listing['address']} ({listing['rooms']})")

    async def apply_to_listings(self, listings: list[dict]) -> dict:
        """
        Apply to multiple listings (autopilot mode).
        Returns a dict of application results keyed by listing ID.
        """
        results = {}
        # Fail fast if context is ever None (should never happen)
        if self.context is None:
            raise RuntimeError("browser_context is None in apply_to_listings. This should never happen.")
        for listing in listings:
            # Check if we've already successfully applied
            applications = self.load_applications()
            if listing["id"] in applications:
                app = applications[listing["id"]]
                if app.get("success", False):
                    # Check if it's the same listing (same link) or a reused ID
                    if app.get("link") == listing.get("link"):
                        logger.debug(f"Skip (applied): {listing['address']}")
                        # Mark as skipped so notify_new_listings knows not to send notification
                        results[listing["id"]] = {
                            "listing_id": listing["id"],
                            "skipped": True,  # Flag to prevent duplicate notifications
                        }
                        continue
                    else:
                        # Same ID but different link - companies reused the ID for a new listing
                        logger.info(f"Reused ID detected for {listing['address']}: old link={app.get('link')}, new link={listing.get('link')}")
                        # Treat as new listing and apply
            result = await self.apply(listing)
            results[listing["id"]] = result
            self.save_application(result)
            status = "[SUCCESS]" if result["success"] else "[FAILED]"
            logger.info(f"{status} {listing['address'][:30]}... | {result['message'][:50]}")
            await asyncio.sleep(2)
        return results


    def log_listing_times(self, new_listings: list[dict]) -> None:
        """
        Log new listing appearance times to CSV for later analysis and pattern mining.
        Appends to data/listing_times.csv, creating header if needed.
        """
        if not new_listings:
            return

        import csv
        TIMING_FILE = Path("data/listing_times.csv")
        file_exists = TIMING_FILE.exists()

        with open(TIMING_FILE, "a", newline="", encoding="utf-8") as f:
            writer = csv.writer(f)
            if not file_exists:
                writer.writerow(["timestamp", "weekday", "hour", "minute", "rooms", "size", "price", "address", "listing_id"])

            now = datetime.now()
            for listing in new_listings:
                writer.writerow([
                    now.isoformat(),
                    now.strftime("%A"),  # Weekday name
                    now.hour,
                    now.minute,
                    listing["rooms"],
                    listing["size"],
                    listing["price"],
                    listing["address"],
                    listing["id"]
                ])

        logger.debug(f"Logged {len(new_listings)} listings to CSV")

    # ...existing code...


    async def init_browser(self) -> None:
        """Initialize Playwright browser (minimal, like test script)"""
        if not hasattr(self, 'browser') or self.browser is None:
            self.playwright = await async_playwright().start()
            self.browser = await self.playwright.chromium.launch(headless=True)
            self.context = await self.browser.new_context(
                user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
            )
            logger.info("Browser initialized (minimal context)")
            self.application_handler = ApplicationHandler(self.context, self.state_manager)


    async def apply(self, listing: dict) -> dict:
        company = self._detect_company(listing.get("link", ""))
        handler = self.handlers.get(company)
        result = {
            "listing_id": listing.get("id"),
            "company": company,
            "link": listing.get("link"),
            "timestamp": datetime.now().isoformat(),
            "success": False,
            "message": "",
            "address": listing.get("address", ""),
            "rooms": listing.get("rooms", ""),
            "price": listing.get("price", "")
        }

        if handler:
            result = await handler.apply(listing, result)
        else:
            result["message"] = f"No handler found for company: {company}"

        return result


    def _detect_company(self, link: str) -> str:
        """Robust company detection logic, matching monitor.py as closely as possible."""
        link = (link or "").lower()
        # Remove URL scheme and www for easier matching
        link = re.sub(r"^https?://(www\.)?", "", link)
        # Use domain-based matching, including subdomains
        if re.search(r"howoge\\.de", link):
            return "howoge"
        if re.search(r"gewobag\\.de", link):
            return "gewobag"
        if re.search(r"degewo\\.de", link):
            return "degewo"
        if re.search(r"gesobau\\.de", link):
            return "gesobau"
        if re.search(r"stadt-und-land\\.de|stadtundland\\.de", link):
            return "stadtundland"
        if re.search(r"wbm\\.de", link):
            return "wbm"
        # Also check for company in the path or query (legacy/edge cases)
        if re.search(r"howoge", link):
            return "howoge"
        if re.search(r"gewobag", link):
            return "gewobag"
        if re.search(r"degewo", link):
            return "degewo"
        if re.search(r"gesobau", link):
            return "gesobau"
        if re.search(r"stadt-und-land|stadtundland", link):
            return "stadtundland"
        if re.search(r"wbm", link):
            return "wbm"
        return "unknown"


    def load_state(self) -> dict:
        """Load persistent state"""
        if STATE_FILE.exists():
            with open(STATE_FILE, "r") as f:
                return json.load(f)
        return {"autopilot": False}


    def save_state(self, state: dict) -> None:
        """Save persistent state"""
        with open(STATE_FILE, "w") as f:
            json.dump(state, f, indent=2)


    def set_autopilot(self, enabled: bool) -> None:
        """Enable or disable autopilot mode"""
        self.state_manager.set_autopilot(enabled)


    def is_autopilot_enabled(self) -> bool:
        """Check if autopilot mode is enabled"""
        return self.state_manager.is_autopilot_enabled()


    def load_applications(self) -> dict:
        """Load application history."""
        if self.applications_file.exists():
            try:
                with open(self.applications_file, "r", encoding="utf-8") as f:
                    return json.load(f)
            except json.JSONDecodeError:
                logger.error("Failed to decode applications file. Returning empty history.")
        return {}


    def save_application(self, result: dict) -> None:
        """Save an application result."""
        applications = self.load_applications()
        applications[result["listing_id"]] = result
        with open(self.applications_file, "w", encoding="utf-8") as f:
            json.dump(applications, f, indent=2, ensure_ascii=False)


    def has_applied(self, listing_id: str) -> bool:
        """
        Check if we've successfully applied to this listing.
        Only returns True if application was successful.
        Failed applications can be retried.
        """
        applications = self.load_applications()
        if listing_id not in applications:
            return False
        app = applications[listing_id]
        # Only skip if application was successful
        # Failed applications (success=False) should be retried
        return app.get("success", False)


    def load_previous_listings(self) -> dict:
        """Load previously saved listings"""
        if LISTINGS_FILE.exists():
            with open(LISTINGS_FILE, "r") as f:
                return json.load(f)
        return {}


    def save_listings(self, listings: list[dict]) -> None:
        """Save current listings"""
        listings_dict = {l["id"]: l for l in listings}
        with open(LISTINGS_FILE, "w") as f:
            json.dump(listings_dict, f, indent=2, ensure_ascii=False)


    def find_new_listings(self, current: list[dict], previous: dict) -> list[dict]:
        """Find listings that are new since last check"""
        new = []
        for listing in current:
            if listing["id"] not in previous:
                new.append(listing)
        return new


    def _generate_weekly_plot(self) -> str:
        """Generate a heatmap, bar chart, line chart, and summary of listings by day/hour, like monitor.py."""
        plot_path = DATA_DIR / "weekly_plot.png"
        try:
            if not TIMING_FILE.exists():
                logger.warning("No timing data file found")
                return ""

            df = pd.read_csv(TIMING_FILE)
            if len(df) < 1:
                logger.warning("Timing file is empty")
                return ""

            logger.info(f"Loaded {len(df)} listing records for plot")

            # Create day-hour matrix
            days_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

            # Count listings per day and hour
            heatmap_data = pd.DataFrame(0, index=days_order, columns=range(24))

            for _, row in df.iterrows():
                day = row['weekday']
                hour = int(row['hour'])
                if day in days_order:
                    # Use pd.to_numeric to ensure value is numeric before incrementing
                    val = pd.to_numeric(heatmap_data.loc[day, hour], errors='coerce')
                    if pd.isna(val):
                        heatmap_data.loc[day, hour] = 1
                    else:
                        heatmap_data.loc[day, hour] = int(val) + 1

            # Create figure with two subplots
            fig, axes = plt.subplots(2, 2, figsize=(16, 12))
            fig.suptitle('Listing Appearance Patterns', fontsize=18, fontweight='bold', y=0.995)

            # 1. Heatmap - Day vs Hour (using seaborn)
            ax1 = axes[0, 0]
            sns.heatmap(heatmap_data, cmap='RdYlGn_r', annot=False, fmt='d',
                       cbar_kws={'label': 'Count'}, ax=ax1, linewidths=0.5, linecolor='gray')
            ax1.set_xlabel('Hour of Day', fontsize=11, fontweight='bold')
            ax1.set_ylabel('Day of Week', fontsize=11, fontweight='bold')
            ax1.set_title('Listings by Day & Hour', fontsize=12, fontweight='bold', pad=10)
            ax1.set_xticklabels(range(24), fontsize=9)
            ax1.set_yticklabels(days_order, rotation=0, fontsize=9)

            # 2. Bar chart - By day of week (seaborn style)
            ax2 = axes[0, 1]
            day_counts = df['weekday'].value_counts().reindex(days_order, fill_value=0)
            sns.barplot(x=range(7), y=day_counts.values, ax=ax2, palette='Blues_d', hue=range(7), legend=False)
            ax2.set_xticks(range(7))
            ax2.set_xticklabels(['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'], fontsize=9)
            ax2.set_xlabel('Day of Week', fontsize=11, fontweight='bold')
            ax2.set_ylabel('Number of Listings', fontsize=11, fontweight='bold')
            ax2.set_title('Total Listings by Day', fontsize=12, fontweight='bold', pad=10)
            for i, v in enumerate(day_counts.values):
                if v > 0:
                    ax2.text(i, v + 0.5, str(v), ha='center', fontsize=9, fontweight='bold')

            # 3. Line chart - By hour (seaborn style)
            ax3 = axes[1, 0]
            hour_counts = df['hour'].value_counts().reindex(range(24), fill_value=0)
            sns.lineplot(x=range(24), y=hour_counts.values, ax=ax3, marker='o',
                        linewidth=2.5, markersize=6, color='#2E86AB')
            ax3.fill_between(range(24), hour_counts.values, alpha=0.2, color='#2E86AB')
            ax3.set_xticks(range(0, 24, 2))
            ax3.set_xlabel('Hour of Day', fontsize=11, fontweight='bold')
            ax3.set_ylabel('Number of Listings', fontsize=11, fontweight='bold')
            ax3.set_title('Total Listings by Hour', fontsize=12, fontweight='bold', pad=10)
            ax3.grid(True, alpha=0.3, linestyle='--')

            # 4. Summary stats
            ax4 = axes[1, 1]
            ax4.axis('off')

            # Calculate best times
            best_day = day_counts.idxmax() if day_counts.max() > 0 else "N/A"
            best_hour = hour_counts.idxmax() if hour_counts.max() > 0 else "N/A"
            total_listings = len(df)

            # Find peak combinations
            peak_combo = heatmap_data.stack().idxmax() if heatmap_data.values.max() > 0 else ("N/A", "N/A")

            # Fix: Ensure peak_combo is iterable
            if isinstance(peak_combo, tuple) and len(peak_combo) == 2:
                stats_text = f"🎯 Peak time: {peak_combo[0]} at {peak_combo[1]}:00"
            else:
                stats_text = "🎯 Peak time: N/A"

            stats_text = f"""<b>Summary Statistics</b>

Total listings tracked: {total_listings}

🏆 Best day: {best_day}
⏰ Best hour: {best_hour}:00
{stats_text}

📈 Average per day: {total_listings/7:.1f}
📅 Data collection period:
   From: {df['timestamp'].min()[:10] if 'timestamp' in df.columns else 'N/A'}
   To: {df['timestamp'].max()[:10] if 'timestamp' in df.columns else 'N/A'}
"""
            ax4.text(0.1, 0.9, stats_text, transform=ax4.transAxes, fontsize=11,
                    verticalalignment='top', fontfamily='monospace',
                    bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

            plt.tight_layout(rect=(0, 0, 1, 0.99))

            # Save plot with high resolution
            plt.savefig(plot_path, dpi=300, bbox_inches='tight', facecolor='white', edgecolor='none')
            plt.close()

            logger.info(f"Plot saved to {plot_path}")
            return str(plot_path)
        except Exception as e:
            logger.error(f"Error creating plot: {e}")
            return ""


    def _generate_error_rate_plot(self) -> tuple[str | None, str]:
        """Read applications.json and produce a plot image + summary text.

        Returns (plot_path, summary_text) or (None, "") if insufficient data.
        """
        import matplotlib.dates as mdates
        from pathlib import Path
        if not self.applications_file.exists():
            logger.warning("No applications.json found for errorrate plot")
            return None, ""

        try:
            with open(self.applications_file, 'r', encoding='utf-8') as f:
                apps = json.load(f)
            if not apps:
                return None, ""

            # Convert to DataFrame
            rows = []
            for _id, rec in apps.items():
                ts = rec.get('timestamp')
                try:
                    dt = pd.to_datetime(ts)
                except Exception:
                    dt = pd.NaT
                rows.append({'id': _id, 'company': rec.get('company'), 'success': bool(rec.get('success')), 'ts': dt})
            df = pd.DataFrame(rows)
            df = df.dropna(subset=['ts'])
            if df.empty:
                return None, ""

            df['date'] = df['ts'].dt.floor('D')
            grouped = df.groupby('date').agg(total=('id','count'), successes=('success', lambda x: x.sum()))
            grouped['failures'] = grouped['total'] - grouped['successes']
            grouped['error_rate'] = grouped['failures'] / grouped['total']

            # Ensure index is sorted by date for plotting
            grouped = grouped.sort_index()

            # Prepare plot: convert dates to matplotlib numeric x-values so bars and line align
            fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(14, 14), sharex=True)
            fig.suptitle('Autopilot Performance Analysis', fontsize=18, fontweight='bold', y=0.995)

            dates = pd.to_datetime(grouped.index).to_pydatetime()
            x = mdates.date2num(dates)
            width = 0.6  # width in days for bars

            successes = grouped['successes'].values
            failures = grouped['failures'].values

            # Use seaborn color palette
            success_color = sns.color_palette('RdYlGn', n_colors=10)[8]  # Green
            failure_color = sns.color_palette('RdYlGn', n_colors=10)[1]  # Red

            ax1.bar(x, successes, width=width, color=success_color, align='center', label='Success', edgecolor='white', linewidth=0.5)
            ax1.bar(x, failures, bottom=successes, width=width, color=failure_color, align='center', label='Failure', edgecolor='white', linewidth=0.5)
            ax1.set_ylabel('Count', fontsize=11, fontweight='bold')
            ax1.set_title('Successes vs Failures (by day)', fontsize=13, fontweight='bold', pad=10)
            ax1.set_xticks(x)
            ax1.set_xlim(min(x) - 1, max(x) + 1)
            ax1.xaxis.set_major_locator(mdates.AutoDateLocator())
            ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
            ax1.legend(loc='upper left', framealpha=0.9)
            ax1.grid(True, alpha=0.3, linestyle='--', axis='y')

            # Plot error rate line on same x (date) axis
            sns.lineplot(x=x, y=grouped['error_rate'].values, ax=ax2, marker='o',
                        linewidth=2.5, markersize=8, color='#E74C3C')
            ax2.fill_between(x, grouped['error_rate'].values, alpha=0.2, color='#E74C3C')
            ax2.set_ylim(-0.02, 1.02)
            ax2.set_ylabel('Error Rate', fontsize=11, fontweight='bold')
            ax2.set_xlabel('Date', fontsize=11, fontweight='bold')
            ax2.set_title('Daily Error Rate (failures / total)', fontsize=13, fontweight='bold', pad=10)
            ax2.grid(True, alpha=0.3, linestyle='--')
            ax2.set_xticks(x)
            ax2.set_xlim(min(x) - 1, max(x) + 1)
            ax2.xaxis.set_major_locator(mdates.AutoDateLocator())
            ax2.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))

            # Error rate by company (line plot with seaborn palette)
            company_grouped = df.groupby(['date', 'company']).agg(total=('id','count'), successes=('success', lambda x: x.sum()))
            company_grouped['failures'] = company_grouped['total'] - company_grouped['successes']
            company_grouped['error_rate'] = company_grouped['failures'] / company_grouped['total']
            company_grouped = company_grouped.reset_index()
            error_rate_pivot = company_grouped.pivot(index='date', columns='company', values='error_rate')

            # Use distinct seaborn colors for each company
            palette = sns.color_palette('husl', n_colors=len(error_rate_pivot.columns))
            for idx, company in enumerate(error_rate_pivot.columns):
                y = error_rate_pivot[company].values
                ax3.plot(x, y, marker='o', label=str(company), linewidth=2.5,
                        markersize=7, color=palette[idx])
            ax3.set_ylim(-0.02, 1.02)
            ax3.set_ylabel('Error Rate', fontsize=11, fontweight='bold')
            ax3.set_xlabel('Date', fontsize=11, fontweight='bold')
            ax3.set_title('Daily Error Rate by Company', fontsize=13, fontweight='bold', pad=10)
            ax3.grid(True, alpha=0.3, linestyle='--')
            ax3.set_xticks(x)
            ax3.set_xlim(min(x) - 1, max(x) + 1)
            ax3.xaxis.set_major_locator(mdates.AutoDateLocator())
            ax3.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
            ax3.legend(title='Company', loc='upper right', fontsize=10, framealpha=0.9)

            fig.autofmt_xdate()
            plt.tight_layout(rect=(0, 0, 1, 0.99))
            plot_path = self.applications_file.parent / 'error_rate.png'
            tmp_path = self.applications_file.parent / 'error_rate.tmp.png'
            # Save to a temp file first and atomically replace to ensure overwrite
            fig.savefig(tmp_path, format='png', dpi=300, bbox_inches='tight', facecolor='white', edgecolor='none')
            plt.close(fig)
            try:
                tmp_path.replace(plot_path)
            except Exception:
                # Fallback: try removing existing and renaming
                try:
                    if plot_path.exists():
                        plot_path.unlink()
                    tmp_path.rename(plot_path)
                except Exception:
                    logger.exception(f"Failed to write plot to {plot_path}")

            # Summary
            total_attempts = int(grouped['total'].sum())
            total_success = int(grouped['successes'].sum())
            total_fail = int(grouped['failures'].sum())
            overall_error = (total_fail / total_attempts) if total_attempts>0 else 0.0
            summary = f"<b>Total attempts:</b> {total_attempts}\n<b>Successes:</b> {total_success}\n<b>Failures:</b> {total_fail}\n<b>Overall error rate:</b> {overall_error:.1%}"

            return str(plot_path), summary
        except Exception as e:
            logger.exception(f"Failed to generate error rate plot: {e}")
            return None, ""


    async def login(self, page) -> bool:
        """Login to inberlinwohnen.de (minimal, like test script)"""
        if not self.state_manager.email or not self.state_manager.password:
            logger.warning("No credentials provided. Ensure INBERLIN_EMAIL and INBERLIN_PASSWORD are set in the environment.")
            return False

        try:
            logger.info("Navigating to login page...")
            login_response = await page.goto("https://www.inberlinwohnen.de/login", wait_until="networkidle")
            logger.info(f"Login page status: {login_response.status if login_response else 'No response'}")
            await asyncio.sleep(2)


            # Dismiss cookie/privacy modal before login
            logger.info("Attempting to dismiss cookie/privacy modal before login...")
            await self.dismiss_cookie_modal(page)
            logger.info("Cookie/privacy modal dismissed.")

            # Fill login form (if present)
            logger.info("Filling in login credentials...")
            await page.fill('input[name="email"], input[type="email"]', self.state_manager.email)
            await page.fill('input[name="password"], input[type="password"]', self.state_manager.password)
            logger.info("Login credentials filled.")

            # Click submit button
            logger.info("Submitting login form...")
            submit_response = await page.click('button[type="submit"], input[type="submit"]', timeout=30000)
            logger.info(f"Clicked submit, waiting for navigation...")
            try:
                await page.wait_for_load_state("networkidle", timeout=30000)
                logger.info(f"After login, page url: {page.url}")
                logger.info(f"After login, page content length: {len(await page.content())}")
            except Exception as e:
                logger.error(f"Timeout or error after login submit: {e}")
            await asyncio.sleep(2)

            # Check if login successful
            logger.info("Checking if login was successful...")
            if "mein-bereich" in page.url or await page.query_selector('text="Abmelden"'):
                logger.info("Login successful.")
                return True
            else:
                logger.error(f"Login failed - ended up at {page.url}")
                return False
        except Exception as e:
            logger.error(f"Login error: {e}")
            logger.debug("Exception occurred during login", exc_info=True)
            return False


    async def fetch_listings(self) -> list[dict]:
        """Fetch listings from the Wohnungsfinder with retry logic for transient failures"""
        max_retries = 3
        retry_delay = 2  # Initial delay in seconds

        for attempt in range(max_retries):
            try:
                listings = await self._fetch_listings_attempt()
                if attempt > 0:
                    logger.info(f"Fetch succeeded (attempt {attempt + 1})")
                return listings
            except Exception as e:
                if attempt < max_retries - 1:
                    wait_time = retry_delay * (2 ** attempt)  # Exponential backoff
                    logger.warning(f"Fetch failed (attempt {attempt + 1}/{max_retries}): {str(e)[:50]}... Retrying in {wait_time}s")
                    await asyncio.sleep(wait_time)
                else:
                    logger.error(f"Fetch failed after {max_retries} attempts")
                    return []

        return []

    async def _fetch_listings_attempt(self) -> list[dict]:
        """Single attempt to fetch listings (extracted for retry logic)"""
        listings = []

        try:

            page = await self.context.new_page()

            # Attempt login if not already logged in
            if not self.state_manager.logged_in:
                logger.info("Not logged in - attempting login")
                login_success = await self.login(page)
                if login_success:
                    self.state_manager.logged_in = True
                    logger.info("Login successful - session established")
                else:
                    logger.warning("Login failed. Proceeding with public listings.")

            # Select the correct URL after login check
            if self.state_manager.logged_in:
                url = "https://www.inberlinwohnen.de/mein-bereich/wohnungsfinder"
            else:
                url = "https://www.inberlinwohnen.de/wohnungsfinder/"

            logger.info(f"Fetching listings from {url}")


            # Navigate to the page with a longer wait condition for slow internet
            logger.info("Navigating to listings page with extended timeout...")
            await page.goto(url, wait_until="networkidle", timeout=20000)

            # Check if the page is a download
            if "download" in page.url or page.url.endswith(".pdf"):
                logger.error("Page redirected to a download. Aborting.")
                return []

            # Handle cookie modal if not logged in
            if not self.state_manager.logged_in:
                await self.dismiss_cookie_modal(page)

            # Wait a short time for the page to render, but do not block on any selector
            await asyncio.sleep(2)

            # Collect all listings content by clicking through pagination
            all_content = ""
            page_num = 1
            max_pages = 10  # Safety limit

            while page_num <= max_pages:
                # Get current page content
                current_content = await page.content()
                all_content += current_content

                # Check for "next page" button (Livewire pagination)
                next_btn = await page.query_selector('[wire\\:click*="nextPage"]')
                if next_btn and await next_btn.is_visible():
                    await next_btn.click()
                    await asyncio.sleep(2)  # Wait for Livewire to update
                    page_num += 1
                else:
                    break

            logger.info(f"Collected content from {page_num} page(s)")
            content = all_content

            # Session validation: if logged in but only got 1 page, session may be expired
            if self.state_manager.logged_in and page_num == 1:
                logger.warning("Logged in but only 1 page fetched - possible session expiration")

            # Debug: save HTML to file for inspection
            debug_path = DATA_DIR / "debug_page.html"
            with open(debug_path, "w", encoding="utf-8") as f:
                f.write(content)
            logger.info(f"Saved debug HTML to {debug_path}")

            # Debug: Log page title and check for listing count
            count_match = re.search(r'(\\d+)\\s*Wohnungen? für Sie gefunden', content)
            if count_match:
                logger.info(f"Page shows {count_match.group(1)} listings available")

            # Also check for "Zeige X bis Y von Z Angeboten"
            show_match = re.search(r'Zeige \\d+ bis \\d+ von (\\d+) Angeboten', content)
            if show_match:
                logger.info(f"Page shows {show_match.group(1)} total offers")

            # Decode HTML entities and JSON escaped slashes for extraction
            content_decoded = html.unescape(content)
            content_decoded = content_decoded.replace('\\/', '/')

            # Build flatId -> deeplink mapping from wire:snapshot JSON data (monitor.py logic)
            # Format in HTML: "deeplink":"https://...","flatId":12345
            deeplink_pattern = r'"deeplink":"(https://[^"]+)","flatId":(\d+)'
            deeplink_matches = re.findall(deeplink_pattern, content_decoded)
            # Use string keys for flatId to match button extraction
            id_to_link = {str(flat_id): link for link, flat_id in deeplink_matches}
            logger.info(f"Found {len(id_to_link)} deeplink mappings")


            # --- Extraction logic copied from monitor.py for robustness ---
            # Extract listings from button elements with aria-label
            # Format: @click="open !== 12345 ..." aria-label="Wohnungsangebot - 2,0 Zimmer, 53,01 m², 494,38 € Kaltmiete | Adresse"
            button_pattern = r'@click="open !== (\d+)[^\"]*"[^>]*aria-label="Wohnungsangebot - ([^"]+)'
            button_matches = re.findall(button_pattern, content_decoded)
            logger.info(f"Found {len(button_matches)} listing buttons (monitor.py pattern)")

            for flat_id, listing_text in button_matches:
                # Parse listing text: "2,0 Zimmer, 53,01 m², 494,38 € Kaltmiete | Rhinstraße 4, 10315 Lichtenberg"
                parts_match = re.match(r'(\d,\d)\s*Zimmer,\s*([\d,.]+)\s*m²,\s*([\d.,]+)\s*€\s*(?:Kaltmiete)?\s*\|\s*(.+)', listing_text)
                if not parts_match:
                    continue

                rooms, size, price, address = parts_match.groups()
                rooms = rooms.strip()
                address = address.strip()

                if len(address) < 5:
                    continue

                # Get the deeplink for this flat (monitor.py logic: flat_id as string)
                detail_link = id_to_link.get(str(flat_id), url)

                listing_id = hashlib.md5(f"{rooms}{size}{price}{address}".encode()).hexdigest()[:12]

                listings.append({
                    "id": listing_id,
                    "rooms": f"{rooms} Zimmer",
                    "size": f"{size} m²",
                    "price": f"{price} €",
                    "address": address,
                    "link": detail_link,
                    "fetched_at": datetime.now().isoformat()
                })

            # Deduplicate by id
            seen_ids = set()
            unique_listings = []
            for listing in listings:
                if listing["id"] not in seen_ids:
                    seen_ids.add(listing["id"])
                    unique_listings.append(listing)
            listings = unique_listings

            if not listings:
                logger.warning("No listings parsed")
                # Additional check: if we have deeplinks but no parsed listings, something went wrong
                if len(id_to_link) > 0:
                    logger.warning(f"Found {len(id_to_link)} deeplinks but parsed 0 listings - extraction issue")

            await page.close()
            logger.info(f"Fetched {len(listings)} listings")
            return listings

        except Exception as e:
            logger.error(f"Fetch error: {str(e)[:100]}")
            return []


    async def dismiss_cookie_modal(self, page):
        """Dismiss the privacy/cookie consent modal if present"""
        try:
            # Wait a bit for modal to appear
            await asyncio.sleep(2)

            # Try to find and click the accept button in the privacy modal
            # Look for common accept button patterns in German
            accept_selectors = [
                'button:has-text("Akzeptieren")',
                'button:has-text("Alle akzeptieren")',
                'button:has-text("Accept")',
                'button:has-text("Zustimmen")',
                '[x-show="showPrivacyModal"] button',
                '.privacy-modal button',
                'button.accept-cookies',
                # More specific to inberlinwohnen
                'div[x-show="showPrivacyModal"] button:first-of-type',
            ]

            for selector in accept_selectors:
                try:
                    button = await page.query_selector(selector)
                    if button and await button.is_visible():
                        await button.click()
                        logger.info(f"Clicked cookie accept button: {selector}")
                        await asyncio.sleep(1)
                        return True
                except:
                    continue

            # Try clicking any visible button in the modal overlay
            modal = await page.query_selector('div[x-show="showPrivacyModal"]')
            if modal:
                buttons = await modal.query_selector_all('button')
                for btn in buttons:
                    if await btn.is_visible():
                        text = await btn.inner_text()
                        logger.info(f"Found modal button: {text}")
                        # Click the first button (usually accept)
                        await btn.click()
                        await asyncio.sleep(1)
                        return True

            logger.info("No cookie modal found or already dismissed")
            return False
        except Exception as e:
            logger.debug(f"Cookie modal handling: {e}")
            return False