new errorlisting

2025-12-16 13:51:25 +01:00 · 2025-12-16 13:51:25 +01:00 · d33b6c4226
commit d33b6c4226
parent d39453d688
3 changed files with 75 additions and 66 deletions
--- a/.python-version
+++ b/.python-version
@ -1 +1 @@
-3.13
+3.12
--- a/monitor.py
+++ b/monitor.py
@ -261,13 +261,13 @@ When autopilot is ON, I will automatically apply to new listings."""
        """
        if not APPLICATIONS_FILE.exists():
            logger.warning("No applications.json found for errorrate plot")
-            return None, ""
+            return "", ""  # Return empty strings

        try:
            with open(APPLICATIONS_FILE, 'r', encoding='utf-8') as f:
                apps = json.load(f)
            if not apps:
-                return None, ""
+                return "", ""

            # Convert to DataFrame
            rows = []
@ -281,7 +281,7 @@ When autopilot is ON, I will automatically apply to new listings."""
            df = pd.DataFrame(rows)
            df = df.dropna(subset=['ts'])
            if df.empty:
-                return None, ""
+                return "", ""

            df['date'] = df['ts'].dt.floor('D')
            grouped = df.groupby('date').agg(total=('id','count'), successes=('success', lambda x: x.sum()))
@ -293,7 +293,7 @@ When autopilot is ON, I will automatically apply to new listings."""

            # Prepare plot: convert dates to matplotlib numeric x-values so bars and line align
            import matplotlib.dates as mdates
-            fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8), sharex=True)
+            fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(12, 12), sharex=True)

            dates = pd.to_datetime(grouped.index).to_pydatetime()
            x = mdates.date2num(dates)
@ -322,6 +322,27 @@ When autopilot is ON, I will automatically apply to new listings."""
            ax2.set_xlim(min(x) - 1, max(x) + 1)
            ax2.xaxis.set_major_locator(mdates.AutoDateLocator())
            ax2.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
+
+            # New: Error rate by company (line plot)
+            company_grouped = df.groupby(['date', 'company']).agg(total=('id','count'), successes=('success', lambda x: x.sum()))
+            company_grouped['failures'] = company_grouped['total'] - company_grouped['successes']
+            company_grouped['error_rate'] = company_grouped['failures'] / company_grouped['total']
+            company_grouped = company_grouped.reset_index()
+            error_rate_pivot = company_grouped.pivot(index='date', columns='company', values='error_rate')
+            for company in error_rate_pivot.columns:
+                y = error_rate_pivot[company].values
+                ax3.plot(x, y, marker='o', label=str(company))
+            ax3.set_ylim(-0.02, 1.02)
+            ax3.set_ylabel('Error rate')
+            ax3.set_xlabel('Date')
+            ax3.set_title('Daily Error Rate by Company')
+            ax3.grid(True, alpha=0.3)
+            ax3.set_xticks(x)
+            ax3.set_xlim(min(x) - 1, max(x) + 1)
+            ax3.xaxis.set_major_locator(mdates.AutoDateLocator())
+            ax3.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
+            ax3.legend(title='Company', loc='upper right', fontsize='small')
+
            fig.autofmt_xdate()

            plt.tight_layout()
@ -351,7 +372,7 @@ When autopilot is ON, I will automatically apply to new listings."""
            return str(plot_path), summary
        except Exception as e:
            logger.exception(f"Failed to generate error rate plot: {e}")
-            return None, ""
+            return "", ""
        self._send_message(f"❓ Unknown command: <code>{cmd}</code>\n\nUse /help to see available commands.")

    def _handle_plot_command(self):
@ -373,13 +394,13 @@ When autopilot is ON, I will automatically apply to new listings."""
        """Generate a heatmap of listings by day of week and hour"""
        if not TIMING_FILE.exists():
            logger.warning("No timing data file found")
-            return None
+            return ""

        try:
            df = pd.read_csv(TIMING_FILE)
            if len(df) < 1:
                logger.warning("Timing file is empty")
-                return None
+                return ""

            logger.info(f"Loaded {len(df)} listing records for plot")

@ -393,7 +414,11 @@ When autopilot is ON, I will automatically apply to new listings."""
                day = row['weekday']
                hour = int(row['hour'])
                if day in days_order:
-                    heatmap_data.loc[day, hour] += 1
+                    # Fix: Ensure the value is numeric before incrementing
+                    if pd.api.types.is_numeric_dtype(heatmap_data.loc[day, hour]):
+                        heatmap_data.loc[day, hour] += 1
+                    else:
+                        heatmap_data.loc[day, hour] = 1  # Initialize if not numeric

            # Create figure with two subplots
            fig, axes = plt.subplots(2, 2, figsize=(14, 10))
@ -414,7 +439,7 @@ When autopilot is ON, I will automatically apply to new listings."""
            # 2. Bar chart - By day of week
            ax2 = axes[0, 1]
            day_counts = df['weekday'].value_counts().reindex(days_order, fill_value=0)
-            colors = plt.cm.Blues(day_counts / day_counts.max() if day_counts.max() > 0 else day_counts)
+            colors = plt.cm.get_cmap('Blues')(day_counts / day_counts.max() if day_counts.max() > 0 else day_counts)
            bars = ax2.bar(range(7), day_counts.values, color=colors)
            ax2.set_xticks(range(7))
            ax2.set_xticklabels(['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'])
@ -448,13 +473,19 @@ When autopilot is ON, I will automatically apply to new listings."""
            # Find peak combinations
            peak_combo = heatmap_data.stack().idxmax() if heatmap_data.values.max() > 0 else ("N/A", "N/A")

+            # Fix: Ensure peak_combo is iterable
+            if isinstance(peak_combo, tuple) and len(peak_combo) == 2:
+                stats_text = f"🎯 Peak time: {peak_combo[0]} at {peak_combo[1]}:00"
+            else:
+                stats_text = "🎯 Peak time: N/A"
+
            stats_text = f"""📊 Summary Statistics

 Total listings tracked: {total_listings}

 🏆 Best day: {best_day}
 ⏰ Best hour: {best_hour}:00
-🎯 Peak time: {peak_combo[0]} at {peak_combo[1]}:00
+{stats_text}

 📈 Average per day: {total_listings/7:.1f}
 📅 Data collection period:
@ -479,7 +510,8 @@ Total listings tracked: {total_listings}
            logger.error(f"Error creating plot: {e}")
            import traceback
            logger.error(traceback.format_exc())
-            return None
+            return ""
+        self._send_message(f"❓ Unknown command: <code>{cmd}</code>\n\nUse /help to see available commands.")

    def _send_message(self, text):
        try:
@ -1720,7 +1752,7 @@ class InBerlinMonitor:

            # Extract listings from button elements with aria-label
            # Format: @click="open !== 12345 ..." aria-label="Wohnungsangebot - 2,0 Zimmer, 53,01 m², 494,38 € Kaltmiete | Adresse"
-            button_pattern = r'@click="open !== (\d+)[^"]*"[^>]*aria-label="Wohnungsangebot - ([^"]+)"'
+            button_pattern = r'@click="open !== (\d+)[^"]*"[^>]*aria-label="Wohnungsangebot - ([^"]+)'
            button_matches = re.findall(button_pattern, content_decoded)
            logger.info(f"Found {len(button_matches)} listing buttons")

@ -2238,54 +2270,3 @@ class WGCompanyMonitor:
    async def _async_fetch(self):
        await self.init_browser()
        return await self.fetch_listings()
-
-
-def main():
-    """Main entry point"""
-
-    # Ensure data directory exists
-    DATA_DIR.mkdir(parents=True, exist_ok=True)
-
-    # Initialize monitors
-    inberlin_monitor = InBerlinMonitor()
-    wgcompany_monitor = WGCompanyMonitor() if WGCOMPANY_ENABLED else None
-
-    # Start Telegram command listener
-    telegram_bot = TelegramBot(inberlin_monitor)
-    telegram_bot.start()
-
-    logger.info(f"Monitor started (interval: {CHECK_INTERVAL}s)")
-    logger.info(f"InBerlin Autopilot: {'ENABLED' if inberlin_monitor.is_autopilot_enabled() else 'DISABLED'}")
-    logger.info(f"WGcompany: {'ENABLED' if WGCOMPANY_ENABLED else 'DISABLED'}")
-
-    # Run periodic cleanup hourly
-    last_cleanup = 0
-
-    while True:
-        # Check InBerlinWohnen
-        try:
-            inberlin_monitor.check()
-        except Exception as e:
-            logger.error(f"InBerlin check failed: {e}")
-
-        # Periodic cleanup: remove PNGs older than 24h and prune logs older than 7 days
-        try:
-            if time.time() - last_cleanup > 3600:  # every hour
-                logger.info("Running periodic cleanup (old PNGs, prune logs)")
-                _cleanup_old_files(png_hours=24, log_days=7)
-                last_cleanup = time.time()
-        except Exception:
-            logger.exception("Cleanup failed")
-
-        # Check WGcompany
-        if wgcompany_monitor:
-            try:
-                wgcompany_monitor.check()
-            except Exception as e:
-                logger.error(f"WGcompany check failed: {e}")
-
-        time.sleep(CHECK_INTERVAL)
-
-
-if __name__ == "__main__":
-    main()
--- a/tests/test_errorrate_runner.py
+++ b/tests/test_errorrate_runner.py
@ -52,12 +52,15 @@ def generate_error_rate_plot(applications_file: str):
        grouped = grouped.sort_index()

        # Plot
-        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8), sharex=True)
+        import matplotlib.dates as mdates
+        # Add a third subplot for error rate by company
+        fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(12, 12), sharex=True)
+
+        # Stacked bar: successes vs failures (all companies)
        grouped[['successes','failures']].plot(kind='bar', stacked=True, ax=ax1, color=['#2E8B57','#C44A4A'])
        ax1.set_ylabel('Count')
        ax1.set_title('Autopilot: Successes vs Failures (by day)')

-        import matplotlib.dates as mdates
        dates = pd.to_datetime(grouped.index).to_pydatetime()
        x = mdates.date2num(dates)
        width = 0.6
@ -69,6 +72,7 @@ def generate_error_rate_plot(applications_file: str):
        ax1.xaxis.set_major_locator(mdates.AutoDateLocator())
        ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))

+        # Line: overall error rate
        ax2.plot(x, grouped['error_rate'].values, marker='o', color='#3333AA', linewidth=2)
        ax2.set_ylim(-0.02, 1.02)
        ax2.set_ylabel('Error rate')
@ -79,6 +83,30 @@ def generate_error_rate_plot(applications_file: str):
        ax2.set_xlim(min(x) - 1, max(x) + 1)
        ax2.xaxis.set_major_locator(mdates.AutoDateLocator())
        ax2.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
+
+        # New: Error rate by company (line plot)
+        # Group by date and company
+        company_grouped = df.groupby(['date', 'company']).agg(total=('id','count'), successes=('success', lambda x: x.sum()))
+        company_grouped['failures'] = company_grouped['total'] - company_grouped['successes']
+        company_grouped['error_rate'] = company_grouped['failures'] / company_grouped['total']
+        company_grouped = company_grouped.reset_index()
+        # Pivot for plotting: index=date, columns=company, values=error_rate
+        error_rate_pivot = company_grouped.pivot(index='date', columns='company', values='error_rate')
+        # Plot each company as a line
+        for company in error_rate_pivot.columns:
+            y = error_rate_pivot[company].values
+            ax3.plot(x, y, marker='o', label=str(company))
+        ax3.set_ylim(-0.02, 1.02)
+        ax3.set_ylabel('Error rate')
+        ax3.set_xlabel('Date')
+        ax3.set_title('Daily Error Rate by Company')
+        ax3.grid(True, alpha=0.3)
+        ax3.set_xticks(x)
+        ax3.set_xlim(min(x) - 1, max(x) + 1)
+        ax3.xaxis.set_major_locator(mdates.AutoDateLocator())
+        ax3.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
+        ax3.legend(title='Company', loc='upper right', fontsize='small')
+
        fig.autofmt_xdate()

        plot_path = os.path.join(DATA_DIR, 'error_rate.png')
 @ -1 +1 @@
 .13
 .12