mostly working shape?

2025-12-31 16:06:42 +01:00 · 2025-12-31 16:06:42 +01:00 · 540a3cc884
commit 540a3cc884
parent 3057cda8d3
10 changed files with 462 additions and 183 deletions
--- a/application_handler.py
+++ b/application_handler.py
@ -108,7 +108,8 @@ class ApplicationHandler:
            # Send via TelegramBot if available
            if hasattr(self, 'telegram_bot') and self.telegram_bot:
                logger.info(f"Notifying Telegram: {listing['address']} ({listing['rooms']}, {listing['size']}, {listing['price']})")
-                self.telegram_bot._send_message(message)
+                loop = getattr(self.telegram_bot, 'event_loop', None) or asyncio.get_event_loop()
+                asyncio.run_coroutine_threadsafe(self.telegram_bot._send_message(message), loop)
            else:
                    logger.info(f"[TELEGRAM] Would send message for: {listing['address']} ({listing['rooms']}, {listing['size']}, {listing['price']})")

@ -313,69 +314,124 @@ class ApplicationHandler:


    def _generate_weekly_plot(self) -> str:
-        """Generate a heatmap of listings by day of week and hour. Always returns a plot path, even if no data."""
+        """Generate a heatmap, bar chart, line chart, and summary of listings by day/hour, like monitor.py."""
        plot_path = DATA_DIR / "weekly_plot.png"
        try:
            if not TIMING_FILE.exists():
-                logger.warning("No timing file found for weekly plot. Generating empty plot.")
-                # Generate empty plot
-                fig, ax = plt.subplots(figsize=(10, 6))
-                ax.set_xticks(range(24))
-                ax.set_yticks(range(7))
-                ax.set_xticklabels([f"{h}:00" for h in range(24)], rotation=90)
-                ax.set_yticklabels(["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"])
-                ax.set_title("Listings Heatmap (No Data)")
-                ax.text(0.5, 0.5, "No data available", fontsize=18, ha='center', va='center', transform=ax.transAxes, color='gray')
-                plt.savefig(plot_path)
-                plt.close(fig)
-                return str(plot_path)
+                logger.warning("No timing data file found")
+                return ""

-            df = pd.read_csv(TIMING_FILE, parse_dates=["timestamp"])
-            if df.empty:
-                logger.warning("Timing file is empty. Generating empty plot.")
-                fig, ax = plt.subplots(figsize=(10, 6))
-                ax.set_xticks(range(24))
-                ax.set_yticks(range(7))
-                ax.set_xticklabels([f"{h}:00" for h in range(24)], rotation=90)
-                ax.set_yticklabels(["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"])
-                ax.set_title("Listings Heatmap (No Data)")
-                ax.text(0.5, 0.5, "No data available", fontsize=18, ha='center', va='center', transform=ax.transAxes, color='gray')
-                plt.savefig(plot_path)
-                plt.close(fig)
-                return str(plot_path)
+            df = pd.read_csv(TIMING_FILE)
+            if len(df) < 1:
+                logger.warning("Timing file is empty")
+                return ""

-            df["day_of_week"] = df["timestamp"].dt.dayofweek
-            df["hour"] = df["timestamp"].dt.hour
-            heatmap_data = df.groupby(["day_of_week", "hour"]).size().unstack(fill_value=0)
+            logger.info(f"Loaded {len(df)} listing records for plot")

-            fig, ax = plt.subplots(figsize=(10, 6))
-            cax = ax.matshow(heatmap_data, cmap="YlGnBu", aspect="auto")
-            fig.colorbar(cax)
+            # Create day-hour matrix
+            days_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

-            ax.set_xticks(range(24))
-            ax.set_yticks(range(7))
-            ax.set_xticklabels([f"{h}:00" for h in range(24)], rotation=90)
-            ax.set_yticklabels(["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"])
+            # Count listings per day and hour
+            heatmap_data = pd.DataFrame(0, index=days_order, columns=range(24))

-            ax.set_title("Listings Heatmap (Day of Week vs Hour)")
+            for _, row in df.iterrows():
+                day = row['weekday']
+                hour = int(row['hour'])
+                if day in days_order:
+                    # Use pd.to_numeric to ensure value is numeric before incrementing
+                    val = pd.to_numeric(heatmap_data.loc[day, hour], errors='coerce')
+                    if pd.isna(val):
+                        heatmap_data.loc[day, hour] = 1
+                    else:
+                        heatmap_data.loc[day, hour] = int(val) + 1

-            plt.savefig(plot_path)
-            plt.close(fig)
-            logger.info(f"Weekly plot saved to {plot_path}")
+            # Create figure with two subplots
+            fig, axes = plt.subplots(2, 2, figsize=(14, 10))
+            fig.suptitle('Listing Appearance Patterns', fontsize=16, fontweight='bold')
+
+            # 1. Heatmap - Day vs Hour
+            ax1 = axes[0, 0]
+            im = ax1.imshow(heatmap_data.values, cmap='YlOrRd', aspect='auto')
+            ax1.set_xticks(range(24))
+            ax1.set_xticklabels(range(24), fontsize=8)
+            ax1.set_yticks(range(7))
+            ax1.set_yticklabels(days_order)
+            ax1.set_xlabel('Hour of Day')
+            ax1.set_ylabel('Day of Week')
+            ax1.set_title('Listings by Day & Hour')
+            plt.colorbar(im, ax=ax1, label='Count')
+
+            # 2. Bar chart - By day of week
+            ax2 = axes[0, 1]
+            day_counts = df['weekday'].value_counts().reindex(days_order, fill_value=0)
+            colors = plt.cm.get_cmap('Blues')(day_counts / day_counts.max() if day_counts.max() > 0 else day_counts)
+            bars = ax2.bar(range(7), day_counts.values, color=colors)
+            ax2.set_xticks(range(7))
+            ax2.set_xticklabels(['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'])
+            ax2.set_xlabel('Day of Week')
+            ax2.set_ylabel('Number of Listings')
+            ax2.set_title('Total Listings by Day')
+            for i, v in enumerate(day_counts.values):
+                if v > 0:
+                    ax2.text(i, v + 0.1, str(v), ha='center', fontsize=9)
+
+            # 3. Line chart - By hour
+            ax3 = axes[1, 0]
+            hour_counts = df['hour'].value_counts().reindex(range(24), fill_value=0)
+            ax3.plot(range(24), hour_counts.values, marker='o', linewidth=2, markersize=4, color='#2E86AB')
+            ax3.fill_between(range(24), hour_counts.values, alpha=0.3, color='#2E86AB')
+            ax3.set_xticks(range(0, 24, 2))
+            ax3.set_xlabel('Hour of Day')
+            ax3.set_ylabel('Number of Listings')
+            ax3.set_title('Total Listings by Hour')
+            ax3.grid(True, alpha=0.3)
+
+            # 4. Summary stats
+            ax4 = axes[1, 1]
+            ax4.axis('off')
+
+            # Calculate best times
+            best_day = day_counts.idxmax() if day_counts.max() > 0 else "N/A"
+            best_hour = hour_counts.idxmax() if hour_counts.max() > 0 else "N/A"
+            total_listings = len(df)
+
+            # Find peak combinations
+            peak_combo = heatmap_data.stack().idxmax() if heatmap_data.values.max() > 0 else ("N/A", "N/A")
+
+            # Fix: Ensure peak_combo is iterable
+            if isinstance(peak_combo, tuple) and len(peak_combo) == 2:
+                stats_text = f"🎯 Peak time: {peak_combo[0]} at {peak_combo[1]}:00"
+            else:
+                stats_text = "🎯 Peak time: N/A"
+
+            stats_text = f"""📊 Summary Statistics
+
+Total listings tracked: {total_listings}
+
+🏆 Best day: {best_day}
+⏰ Best hour: {best_hour}:00
+{stats_text}
+
+📈 Average per day: {total_listings/7:.1f}
+📅 Data collection period:
+   From: {df['timestamp'].min()[:10] if 'timestamp' in df.columns else 'N/A'}
+   To: {df['timestamp'].max()[:10] if 'timestamp' in df.columns else 'N/A'}
+"""
+            ax4.text(0.1, 0.9, stats_text, transform=ax4.transAxes, fontsize=11,
+                    verticalalignment='top', fontfamily='monospace',
+                    bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
+
+            plt.tight_layout()
+
+            # Save plot
+            plt.savefig(plot_path, dpi=150, bbox_inches='tight')
+            plt.close()
+
+            logger.info(f"Plot saved to {plot_path}")
            return str(plot_path)
        except Exception as e:
-            logger.error(f"Failed to generate weekly plot: {e}")
-            # Always generate a fallback empty plot
-            fig, ax = plt.subplots(figsize=(10, 6))
-            ax.set_xticks(range(24))
-            ax.set_yticks(range(7))
-            ax.set_xticklabels([f"{h}:00" for h in range(24)], rotation=90)
-            ax.set_yticklabels(["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"])
-            ax.set_title("Listings Heatmap (Error)")
-            ax.text(0.5, 0.5, "Plot error", fontsize=18, ha='center', va='center', transform=ax.transAxes, color='red')
-            plt.savefig(plot_path)
-            plt.close(fig)
-            return str(plot_path)
+            logger.error(f"Error creating plot: {e}")
+            return ""


    def _generate_error_rate_plot(self):
@ -383,6 +439,8 @@ class ApplicationHandler:

        Returns (plot_path, summary_text) or (None, "") if insufficient data.
        """
+        import matplotlib.dates as mdates
+        from pathlib import Path
        if not self.applications_file.exists():
            logger.warning("No applications.json found for errorrate plot")
            return None, ""
@ -390,25 +448,21 @@ class ApplicationHandler:
        try:
            with open(self.applications_file, 'r', encoding='utf-8') as f:
                apps = json.load(f)
-
            if not apps:
-                logger.warning("No application data available for errorrate plot")
                return None, ""

            # Convert to DataFrame
            rows = []
            for _id, rec in apps.items():
-                rows.append({
-                    "id": _id,
-                    "ts": pd.to_datetime(rec.get("timestamp")),
-                    "success": rec.get("success", False),
-                    "company": rec.get("company", "unknown")
-                })
-
+                ts = rec.get('timestamp')
+                try:
+                    dt = pd.to_datetime(ts)
+                except Exception:
+                    dt = pd.NaT
+                rows.append({'id': _id, 'company': rec.get('company'), 'success': bool(rec.get('success')), 'ts': dt})
            df = pd.DataFrame(rows)
            df = df.dropna(subset=['ts'])
            if df.empty:
-                logger.warning("No valid data for errorrate plot")
                return None, ""

            df['date'] = df['ts'].dt.floor('D')
@ -419,28 +473,83 @@ class ApplicationHandler:
            # Ensure index is sorted by date for plotting
            grouped = grouped.sort_index()

-            # Prepare plot
-            fig, ax = plt.subplots(figsize=(10, 6))
-            ax.plot(grouped.index, grouped['error_rate'], marker='o', color='red', label='Error Rate')
-            ax.set_title('Autopilot Error Rate Over Time')
-            ax.set_xlabel('Date')
-            ax.set_ylabel('Error Rate')
-            ax.legend()
-            ax.grid(True)
+            # Prepare plot: convert dates to matplotlib numeric x-values so bars and line align
+            fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(12, 12), sharex=True)

-            # Save plot to the same directory as the applications file
+            dates = pd.to_datetime(grouped.index).to_pydatetime()
+            x = mdates.date2num(dates)
+            width = 0.6  # width in days for bars
+
+            successes = grouped['successes'].values
+            failures = grouped['failures'].values
+
+            ax1.bar(x, successes, width=width, color='#2E8B57', align='center')
+            ax1.bar(x, failures, bottom=successes, width=width, color='#C44A4A', align='center')
+            ax1.set_ylabel('Count')
+            ax1.set_title('Autopilot: Successes vs Failures (by day)')
+            ax1.set_xticks(x)
+            ax1.set_xlim(min(x) - 1, max(x) + 1)
+            ax1.xaxis.set_major_locator(mdates.AutoDateLocator())
+            ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
+
+            # Plot error rate line on same x (date) axis
+            ax2.plot(x, grouped['error_rate'].values, marker='o', color='#3333AA', linewidth=2)
+            ax2.set_ylim(-0.02, 1.02)
+            ax2.set_ylabel('Error rate')
+            ax2.set_xlabel('Date')
+            ax2.set_title('Daily Error Rate (failures / total)')
+            ax2.grid(True, alpha=0.3)
+            ax2.set_xticks(x)
+            ax2.set_xlim(min(x) - 1, max(x) + 1)
+            ax2.xaxis.set_major_locator(mdates.AutoDateLocator())
+            ax2.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
+
+            # Error rate by company (line plot)
+            company_grouped = df.groupby(['date', 'company']).agg(total=('id','count'), successes=('success', lambda x: x.sum()))
+            company_grouped['failures'] = company_grouped['total'] - company_grouped['successes']
+            company_grouped['error_rate'] = company_grouped['failures'] / company_grouped['total']
+            company_grouped = company_grouped.reset_index()
+            error_rate_pivot = company_grouped.pivot(index='date', columns='company', values='error_rate')
+            for company in error_rate_pivot.columns:
+                y = error_rate_pivot[company].values
+                ax3.plot(x, y, marker='o', label=str(company))
+            ax3.set_ylim(-0.02, 1.02)
+            ax3.set_ylabel('Error rate')
+            ax3.set_xlabel('Date')
+            ax3.set_title('Daily Error Rate by Company')
+            ax3.grid(True, alpha=0.3)
+            ax3.set_xticks(x)
+            ax3.set_xlim(min(x) - 1, max(x) + 1)
+            ax3.xaxis.set_major_locator(mdates.AutoDateLocator())
+            ax3.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
+            ax3.legend(title='Company', loc='upper right', fontsize='small')
+
+            fig.autofmt_xdate()
+            plt.tight_layout()
            plot_path = self.applications_file.parent / 'error_rate.png'
-            plt.savefig(plot_path)
+            tmp_path = self.applications_file.parent / 'error_rate.tmp.png'
+            # Save to a temp file first and atomically replace to ensure overwrite
+            fig.savefig(tmp_path, format='png')
            plt.close(fig)
+            try:
+                tmp_path.replace(plot_path)
+            except Exception:
+                # Fallback: try removing existing and renaming
+                try:
+                    if plot_path.exists():
+                        plot_path.unlink()
+                    tmp_path.rename(plot_path)
+                except Exception:
+                    logger.exception(f"Failed to write plot to {plot_path}")

            # Summary
            total_attempts = int(grouped['total'].sum())
            total_success = int(grouped['successes'].sum())
            total_fail = int(grouped['failures'].sum())
-            overall_error = (total_fail / total_attempts) if total_attempts > 0 else 0.0
+            overall_error = (total_fail / total_attempts) if total_attempts>0 else 0.0
            summary = f"<b>Total attempts:</b> {total_attempts}\n<b>Successes:</b> {total_success}\n<b>Failures:</b> {total_fail}\n<b>Overall error rate:</b> {overall_error:.1%}"

-            return plot_path, summary
+            return str(plot_path), summary
        except Exception as e:
            logger.exception(f"Failed to generate error rate plot: {e}")
            return None, ""