prod

2026-01-01 15:27:25 +01:00 · 2026-01-01 15:27:25 +01:00 · aa6626d80d
commit aa6626d80d
parent d596ed7e19
21 changed files with 1051 additions and 333 deletions
--- a/application_handler.py
+++ b/application_handler.py
@ -15,6 +15,7 @@ import matplotlib.dates as mdates
 import logging
 import matplotlib
 import matplotlib.font_manager as fm
+import seaborn as sns
 import html
 import re
 import hashlib
@ -29,13 +30,20 @@ LISTINGS_FILE = Path("data/listings.json")
 DATA_DIR = Path("data")


-# --- Matplotlib Font Setup (for emoji support in plots) ---
+# --- Matplotlib & Seaborn Setup ---
 font_cache_dir = Path("data/fonts")
 font_cache_dir.mkdir(parents=True, exist_ok=True)
 matplotlib.get_configdir = lambda: str(font_cache_dir)
 fm.findSystemFonts(fontpaths=str(font_cache_dir), fontext='ttf')
 matplotlib.rcParams['font.family'] = 'Noto Sans'

+# Configure seaborn for beautiful plots
+sns.set_theme(style="whitegrid", palette="deep")
+sns.set_context("notebook", font_scale=1.1)
+matplotlib.rcParams['figure.dpi'] = 300
+matplotlib.rcParams['savefig.dpi'] = 300
+matplotlib.rcParams['figure.facecolor'] = 'white'
+
 # Use the root logger for consistency with main.py
 logger = logging.getLogger()

@ -60,11 +68,11 @@ class ApplicationHandler:
            "wbm": WBMHandler(browser_context),
        }

-    def set_telegram_bot(self, telegram_bot):
+    def set_telegram_bot(self, telegram_bot) -> None:
        """Attach a TelegramBot instance for notifications."""
        self.telegram_bot = telegram_bot

-    def notify_new_listings(self, new_listings: list[dict], application_results: Optional[dict] = None):
+    def notify_new_listings(self, new_listings: list[dict], application_results: Optional[dict] = None) -> None:
        """
        Send a Telegram notification for each new listing.
        Includes application result if autopilot was enabled.
@ -77,12 +85,12 @@ class ApplicationHandler:

            company_label = company.capitalize() if company != "unknown" else "Wohnung"
            message = (
-                f"\ud83c\udfe0 <b>[{company_label}] Neue Wohnung!</b>\n\n"
-                f"\ud83d\udeaa <b>{listing['rooms']}</b>\n"
-                f"\ud83d\udcd0 {listing['size']}\n"
-                f"\ud83d\udcb0 {listing['price']}\n"
-                f"\ud83d\udccd {listing['address']}\n\n"
-                f"\ud83d\udc49 <a href=\"{link}\">Alle Details</a>"
+                f"🏠 <b>[{company_label}] Neue Wohnung!</b>\n\n"
+                f"🚪 <b>{listing['rooms']}</b>\n"
+                f"📏 {listing['size']}\n"
+                f"💰 {listing['price']}\n"
+                f"📍 {listing['address']}\n\n"
+                f"👉 <a href=\"{link}\">Alle Details</a>"
            )

            # Always show autopilot/apply status for clarity
@ -107,11 +115,10 @@ class ApplicationHandler:

            # Send via TelegramBot if available
            if hasattr(self, 'telegram_bot') and self.telegram_bot:
-                logger.info(f"Notifying Telegram: {listing['address']} ({listing['rooms']}, {listing['size']}, {listing['price']})")
                loop = getattr(self.telegram_bot, 'event_loop', None) or asyncio.get_event_loop()
                asyncio.run_coroutine_threadsafe(self.telegram_bot._send_message(message), loop)
            else:
-                    logger.info(f"[TELEGRAM] Would send message for: {listing['address']} ({listing['rooms']}, {listing['size']}, {listing['price']})")
+                logger.debug(f"[No Telegram] {listing['address']} ({listing['rooms']})")

    async def apply_to_listings(self, listings: list[dict]) -> dict:
        """
@ -124,19 +131,19 @@ class ApplicationHandler:
            raise RuntimeError("browser_context is None in apply_to_listings. This should never happen.")
        for listing in listings:
            if self.has_applied(listing["id"]):
-                logger.info(f"Already applied to {listing['id']} ({listing['address']}), skipping.")
+                logger.debug(f"Skip (applied): {listing['address']}")
                continue
            result = await self.apply(listing)
            results[listing["id"]] = result
            self.save_application(result)
            status = "✅" if result["success"] else "❌"
-            logger.info(f"Application {status} for {listing['address']}: {result['message']}")
+            logger.info(f"{status} {listing['address'][:30]}... | {result['message'][:50]}")
            await asyncio.sleep(2)
        return results



-    def log_listing_times(self, new_listings: list[dict]):
+    def log_listing_times(self, new_listings: list[dict]) -> None:
        """
        Log new listing appearance times to CSV for later analysis and pattern mining.
        Appends to data/listing_times.csv, creating header if needed.
@ -167,12 +174,12 @@ class ApplicationHandler:
                    listing["id"]
                ])

-        logger.info(f"Logged {len(new_listings)} new listing times to CSV.")
+        logger.debug(f"Logged {len(new_listings)} listings to CSV")

    # ...existing code...


-    async def init_browser(self):
+    async def init_browser(self) -> None:
        """Initialize Playwright browser (minimal, like test script)"""
        if not hasattr(self, 'browser') or self.browser is None:
            self.playwright = await async_playwright().start()
@ -249,13 +256,13 @@ class ApplicationHandler:
        return {"autopilot": False}


-    def save_state(self, state: dict):
+    def save_state(self, state: dict) -> None:
        """Save persistent state"""
        with open(STATE_FILE, "w") as f:
            json.dump(state, f, indent=2)


-    def set_autopilot(self, enabled: bool):
+    def set_autopilot(self, enabled: bool) -> None:
        """Enable or disable autopilot mode"""
        self.state_manager.set_autopilot(enabled)

@ -276,7 +283,7 @@ class ApplicationHandler:
        return {}


-    def save_application(self, result: dict):
+    def save_application(self, result: dict) -> None:
        """Save an application result."""
        applications = self.load_applications()
        applications[result["listing_id"]] = result
@ -297,7 +304,7 @@ class ApplicationHandler:
        return {}


-    def save_listings(self, listings: list[dict]):
+    def save_listings(self, listings: list[dict]) -> None:
        """Save current listings"""
        listings_dict = {l["id"]: l for l in listings}
        with open(LISTINGS_FILE, "w") as f:
@ -346,45 +353,43 @@ class ApplicationHandler:
                        heatmap_data.loc[day, hour] = int(val) + 1

            # Create figure with two subplots
-            fig, axes = plt.subplots(2, 2, figsize=(14, 10))
-            fig.suptitle('Listing Appearance Patterns', fontsize=16, fontweight='bold')
+            fig, axes = plt.subplots(2, 2, figsize=(16, 12))
+            fig.suptitle('Listing Appearance Patterns', fontsize=18, fontweight='bold', y=0.995)

-            # 1. Heatmap - Day vs Hour
+            # 1. Heatmap - Day vs Hour (using seaborn)
            ax1 = axes[0, 0]
-            im = ax1.imshow(heatmap_data.values, cmap='YlOrRd', aspect='auto')
-            ax1.set_xticks(range(24))
-            ax1.set_xticklabels(range(24), fontsize=8)
-            ax1.set_yticks(range(7))
-            ax1.set_yticklabels(days_order)
-            ax1.set_xlabel('Hour of Day')
-            ax1.set_ylabel('Day of Week')
-            ax1.set_title('Listings by Day & Hour')
-            plt.colorbar(im, ax=ax1, label='Count')
+            sns.heatmap(heatmap_data, cmap='RdYlGn_r', annot=False, fmt='d',
+                       cbar_kws={'label': 'Count'}, ax=ax1, linewidths=0.5, linecolor='gray')
+            ax1.set_xlabel('Hour of Day', fontsize=11, fontweight='bold')
+            ax1.set_ylabel('Day of Week', fontsize=11, fontweight='bold')
+            ax1.set_title('Listings by Day & Hour', fontsize=12, fontweight='bold', pad=10)
+            ax1.set_xticklabels(range(24), fontsize=9)
+            ax1.set_yticklabels(days_order, rotation=0, fontsize=9)

-            # 2. Bar chart - By day of week
+            # 2. Bar chart - By day of week (seaborn style)
            ax2 = axes[0, 1]
            day_counts = df['weekday'].value_counts().reindex(days_order, fill_value=0)
-            colors = plt.cm.get_cmap('Blues')(day_counts / day_counts.max() if day_counts.max() > 0 else day_counts)
-            bars = ax2.bar(range(7), day_counts.values, color=colors)
+            sns.barplot(x=range(7), y=day_counts.values, ax=ax2, palette='Blues_d', hue=range(7), legend=False)
            ax2.set_xticks(range(7))
-            ax2.set_xticklabels(['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'])
-            ax2.set_xlabel('Day of Week')
-            ax2.set_ylabel('Number of Listings')
-            ax2.set_title('Total Listings by Day')
+            ax2.set_xticklabels(['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'], fontsize=9)
+            ax2.set_xlabel('Day of Week', fontsize=11, fontweight='bold')
+            ax2.set_ylabel('Number of Listings', fontsize=11, fontweight='bold')
+            ax2.set_title('Total Listings by Day', fontsize=12, fontweight='bold', pad=10)
            for i, v in enumerate(day_counts.values):
                if v > 0:
-                    ax2.text(i, v + 0.1, str(v), ha='center', fontsize=9)
+                    ax2.text(i, v + 0.5, str(v), ha='center', fontsize=9, fontweight='bold')

-            # 3. Line chart - By hour
+            # 3. Line chart - By hour (seaborn style)
            ax3 = axes[1, 0]
            hour_counts = df['hour'].value_counts().reindex(range(24), fill_value=0)
-            ax3.plot(range(24), hour_counts.values, marker='o', linewidth=2, markersize=4, color='#2E86AB')
-            ax3.fill_between(range(24), hour_counts.values, alpha=0.3, color='#2E86AB')
+            sns.lineplot(x=range(24), y=hour_counts.values, ax=ax3, marker='o',
+                        linewidth=2.5, markersize=6, color='#2E86AB')
+            ax3.fill_between(range(24), hour_counts.values, alpha=0.2, color='#2E86AB')
            ax3.set_xticks(range(0, 24, 2))
-            ax3.set_xlabel('Hour of Day')
-            ax3.set_ylabel('Number of Listings')
-            ax3.set_title('Total Listings by Hour')
-            ax3.grid(True, alpha=0.3)
+            ax3.set_xlabel('Hour of Day', fontsize=11, fontweight='bold')
+            ax3.set_ylabel('Number of Listings', fontsize=11, fontweight='bold')
+            ax3.set_title('Total Listings by Hour', fontsize=12, fontweight='bold', pad=10)
+            ax3.grid(True, alpha=0.3, linestyle='--')

            # 4. Summary stats
            ax4 = axes[1, 1]
@ -421,10 +426,10 @@ Total listings tracked: {total_listings}
                    verticalalignment='top', fontfamily='monospace',
                    bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

-            plt.tight_layout()
+            plt.tight_layout(rect=(0, 0, 1, 0.99))

-            # Save plot
-            plt.savefig(plot_path, dpi=150, bbox_inches='tight')
+            # Save plot with high resolution
+            plt.savefig(plot_path, dpi=300, bbox_inches='tight', facecolor='white', edgecolor='none')
            plt.close()

            logger.info(f"Plot saved to {plot_path}")
@ -434,7 +439,7 @@ Total listings tracked: {total_listings}
            return ""


-    def _generate_error_rate_plot(self):
+    def _generate_error_rate_plot(self) -> tuple[str | None, str]:
        """Read applications.json and produce a plot image + summary text.

        Returns (plot_path, summary_text) or (None, "") if insufficient data.
@ -474,7 +479,8 @@ Total listings tracked: {total_listings}
            grouped = grouped.sort_index()

            # Prepare plot: convert dates to matplotlib numeric x-values so bars and line align
-            fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(12, 12), sharex=True)
+            fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(14, 14), sharex=True)
+            fig.suptitle('Autopilot Performance Analysis', fontsize=18, fontweight='bold', y=0.995)

            dates = pd.to_datetime(grouped.index).to_pydatetime()
            x = mdates.date2num(dates)
@ -483,53 +489,65 @@ Total listings tracked: {total_listings}
            successes = grouped['successes'].values
            failures = grouped['failures'].values

-            ax1.bar(x, successes, width=width, color='#2E8B57', align='center')
-            ax1.bar(x, failures, bottom=successes, width=width, color='#C44A4A', align='center')
-            ax1.set_ylabel('Count')
-            ax1.set_title('Autopilot: Successes vs Failures (by day)')
+            # Use seaborn color palette
+            success_color = sns.color_palette('RdYlGn', n_colors=10)[8]  # Green
+            failure_color = sns.color_palette('RdYlGn', n_colors=10)[1]  # Red
+
+            ax1.bar(x, successes, width=width, color=success_color, align='center', label='Success', edgecolor='white', linewidth=0.5)
+            ax1.bar(x, failures, bottom=successes, width=width, color=failure_color, align='center', label='Failure', edgecolor='white', linewidth=0.5)
+            ax1.set_ylabel('Count', fontsize=11, fontweight='bold')
+            ax1.set_title('Successes vs Failures (by day)', fontsize=13, fontweight='bold', pad=10)
            ax1.set_xticks(x)
            ax1.set_xlim(min(x) - 1, max(x) + 1)
            ax1.xaxis.set_major_locator(mdates.AutoDateLocator())
            ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
+            ax1.legend(loc='upper left', framealpha=0.9)
+            ax1.grid(True, alpha=0.3, linestyle='--', axis='y')

            # Plot error rate line on same x (date) axis
-            ax2.plot(x, grouped['error_rate'].values, marker='o', color='#3333AA', linewidth=2)
+            sns.lineplot(x=x, y=grouped['error_rate'].values, ax=ax2, marker='o',
+                        linewidth=2.5, markersize=8, color='#E74C3C')
+            ax2.fill_between(x, grouped['error_rate'].values, alpha=0.2, color='#E74C3C')
            ax2.set_ylim(-0.02, 1.02)
-            ax2.set_ylabel('Error rate')
-            ax2.set_xlabel('Date')
-            ax2.set_title('Daily Error Rate (failures / total)')
-            ax2.grid(True, alpha=0.3)
+            ax2.set_ylabel('Error Rate', fontsize=11, fontweight='bold')
+            ax2.set_xlabel('Date', fontsize=11, fontweight='bold')
+            ax2.set_title('Daily Error Rate (failures / total)', fontsize=13, fontweight='bold', pad=10)
+            ax2.grid(True, alpha=0.3, linestyle='--')
            ax2.set_xticks(x)
            ax2.set_xlim(min(x) - 1, max(x) + 1)
            ax2.xaxis.set_major_locator(mdates.AutoDateLocator())
            ax2.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))

-            # Error rate by company (line plot)
+            # Error rate by company (line plot with seaborn palette)
            company_grouped = df.groupby(['date', 'company']).agg(total=('id','count'), successes=('success', lambda x: x.sum()))
            company_grouped['failures'] = company_grouped['total'] - company_grouped['successes']
            company_grouped['error_rate'] = company_grouped['failures'] / company_grouped['total']
            company_grouped = company_grouped.reset_index()
            error_rate_pivot = company_grouped.pivot(index='date', columns='company', values='error_rate')
-            for company in error_rate_pivot.columns:
+
+            # Use distinct seaborn colors for each company
+            palette = sns.color_palette('husl', n_colors=len(error_rate_pivot.columns))
+            for idx, company in enumerate(error_rate_pivot.columns):
                y = error_rate_pivot[company].values
-                ax3.plot(x, y, marker='o', label=str(company))
+                ax3.plot(x, y, marker='o', label=str(company), linewidth=2.5,
+                        markersize=7, color=palette[idx])
            ax3.set_ylim(-0.02, 1.02)
-            ax3.set_ylabel('Error rate')
-            ax3.set_xlabel('Date')
-            ax3.set_title('Daily Error Rate by Company')
-            ax3.grid(True, alpha=0.3)
+            ax3.set_ylabel('Error Rate', fontsize=11, fontweight='bold')
+            ax3.set_xlabel('Date', fontsize=11, fontweight='bold')
+            ax3.set_title('Daily Error Rate by Company', fontsize=13, fontweight='bold', pad=10)
+            ax3.grid(True, alpha=0.3, linestyle='--')
            ax3.set_xticks(x)
            ax3.set_xlim(min(x) - 1, max(x) + 1)
            ax3.xaxis.set_major_locator(mdates.AutoDateLocator())
            ax3.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
-            ax3.legend(title='Company', loc='upper right', fontsize='small')
+            ax3.legend(title='Company', loc='upper right', fontsize=10, framealpha=0.9)

            fig.autofmt_xdate()
-            plt.tight_layout()
+            plt.tight_layout(rect=(0, 0, 1, 0.99))
            plot_path = self.applications_file.parent / 'error_rate.png'
            tmp_path = self.applications_file.parent / 'error_rate.tmp.png'
            # Save to a temp file first and atomically replace to ensure overwrite
-            fig.savefig(tmp_path, format='png')
+            fig.savefig(tmp_path, format='png', dpi=300, bbox_inches='tight', facecolor='white', edgecolor='none')
            plt.close(fig)
            try:
                tmp_path.replace(plot_path)
@ -555,7 +573,7 @@ Total listings tracked: {total_listings}
            return None, ""


-    async def login(self, page):
+    async def login(self, page) -> bool:
        """Login to inberlinwohnen.de (minimal, like test script)"""
        if not self.state_manager.email or not self.state_manager.password:
            logger.warning("No credentials provided. Ensure INBERLIN_EMAIL and INBERLIN_PASSWORD are set in the environment.")
@ -606,7 +624,29 @@ Total listings tracked: {total_listings}


    async def fetch_listings(self) -> list[dict]:
-        """Fetch listings from the Wohnungsfinder"""
+        """Fetch listings from the Wohnungsfinder with retry logic for transient failures"""
+        max_retries = 3
+        retry_delay = 2  # Initial delay in seconds
+
+        for attempt in range(max_retries):
+            try:
+                listings = await self._fetch_listings_attempt()
+                if attempt > 0:
+                    logger.info(f"✅ Fetch succeeded (attempt {attempt + 1})")
+                return listings
+            except Exception as e:
+                if attempt < max_retries - 1:
+                    wait_time = retry_delay * (2 ** attempt)  # Exponential backoff
+                    logger.warning(f"⚠️  Fetch failed (attempt {attempt + 1}/{max_retries}): {str(e)[:50]}... Retrying in {wait_time}s")
+                    await asyncio.sleep(wait_time)
+                else:
+                    logger.error(f"❌ Fetch failed after {max_retries} attempts")
+                    return []
+
+        return []
+
+    async def _fetch_listings_attempt(self) -> list[dict]:
+        """Single attempt to fetch listings (extracted for retry logic)"""
        listings = []

        try:
@ -742,17 +782,14 @@ Total listings tracked: {total_listings}
            listings = unique_listings

            if not listings:
-                logger.warning("No listings found after parsing. Dumping HTML snippet for debugging:")
-                logger.warning(content[:1000])
+                logger.warning("⚠️  No listings parsed")

            await page.close()
-            logger.info(f"Fetched {len(listings)} unique listings")
+            logger.info(f"📊 Fetched {len(listings)} listings")
            return listings

        except Exception as e:
-            logger.error(f"Error fetching listings: {e}")
-            import traceback
-            logger.error(traceback.format_exc())
+            logger.error(f"❌ Fetch error: {str(e)[:100]}")
            return []