new errorlisting

This commit is contained in:
Aron Petau 2025-12-16 13:51:25 +01:00
parent d39453d688
commit d33b6c4226
3 changed files with 75 additions and 66 deletions

View file

@ -1 +1 @@
3.13
3.12

View file

@ -261,13 +261,13 @@ When autopilot is ON, I will automatically apply to new listings."""
"""
if not APPLICATIONS_FILE.exists():
logger.warning("No applications.json found for errorrate plot")
return None, ""
return "", "" # Return empty strings
try:
with open(APPLICATIONS_FILE, 'r', encoding='utf-8') as f:
apps = json.load(f)
if not apps:
return None, ""
return "", ""
# Convert to DataFrame
rows = []
@ -281,7 +281,7 @@ When autopilot is ON, I will automatically apply to new listings."""
df = pd.DataFrame(rows)
df = df.dropna(subset=['ts'])
if df.empty:
return None, ""
return "", ""
df['date'] = df['ts'].dt.floor('D')
grouped = df.groupby('date').agg(total=('id','count'), successes=('success', lambda x: x.sum()))
@ -293,7 +293,7 @@ When autopilot is ON, I will automatically apply to new listings."""
# Prepare plot: convert dates to matplotlib numeric x-values so bars and line align
import matplotlib.dates as mdates
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8), sharex=True)
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(12, 12), sharex=True)
dates = pd.to_datetime(grouped.index).to_pydatetime()
x = mdates.date2num(dates)
@ -322,6 +322,27 @@ When autopilot is ON, I will automatically apply to new listings."""
ax2.set_xlim(min(x) - 1, max(x) + 1)
ax2.xaxis.set_major_locator(mdates.AutoDateLocator())
ax2.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
# New: Error rate by company (line plot)
company_grouped = df.groupby(['date', 'company']).agg(total=('id','count'), successes=('success', lambda x: x.sum()))
company_grouped['failures'] = company_grouped['total'] - company_grouped['successes']
company_grouped['error_rate'] = company_grouped['failures'] / company_grouped['total']
company_grouped = company_grouped.reset_index()
error_rate_pivot = company_grouped.pivot(index='date', columns='company', values='error_rate')
for company in error_rate_pivot.columns:
y = error_rate_pivot[company].values
ax3.plot(x, y, marker='o', label=str(company))
ax3.set_ylim(-0.02, 1.02)
ax3.set_ylabel('Error rate')
ax3.set_xlabel('Date')
ax3.set_title('Daily Error Rate by Company')
ax3.grid(True, alpha=0.3)
ax3.set_xticks(x)
ax3.set_xlim(min(x) - 1, max(x) + 1)
ax3.xaxis.set_major_locator(mdates.AutoDateLocator())
ax3.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
ax3.legend(title='Company', loc='upper right', fontsize='small')
fig.autofmt_xdate()
plt.tight_layout()
@ -351,7 +372,7 @@ When autopilot is ON, I will automatically apply to new listings."""
return str(plot_path), summary
except Exception as e:
logger.exception(f"Failed to generate error rate plot: {e}")
return None, ""
return "", ""
self._send_message(f"❓ Unknown command: <code>{cmd}</code>\n\nUse /help to see available commands.")
def _handle_plot_command(self):
@ -373,13 +394,13 @@ When autopilot is ON, I will automatically apply to new listings."""
"""Generate a heatmap of listings by day of week and hour"""
if not TIMING_FILE.exists():
logger.warning("No timing data file found")
return None
return ""
try:
df = pd.read_csv(TIMING_FILE)
if len(df) < 1:
logger.warning("Timing file is empty")
return None
return ""
logger.info(f"Loaded {len(df)} listing records for plot")
@ -393,7 +414,11 @@ When autopilot is ON, I will automatically apply to new listings."""
day = row['weekday']
hour = int(row['hour'])
if day in days_order:
heatmap_data.loc[day, hour] += 1
# Fix: Ensure the value is numeric before incrementing
if pd.api.types.is_numeric_dtype(heatmap_data.loc[day, hour]):
heatmap_data.loc[day, hour] += 1
else:
heatmap_data.loc[day, hour] = 1 # Initialize if not numeric
# Create figure with two subplots
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
@ -414,7 +439,7 @@ When autopilot is ON, I will automatically apply to new listings."""
# 2. Bar chart - By day of week
ax2 = axes[0, 1]
day_counts = df['weekday'].value_counts().reindex(days_order, fill_value=0)
colors = plt.cm.Blues(day_counts / day_counts.max() if day_counts.max() > 0 else day_counts)
colors = plt.cm.get_cmap('Blues')(day_counts / day_counts.max() if day_counts.max() > 0 else day_counts)
bars = ax2.bar(range(7), day_counts.values, color=colors)
ax2.set_xticks(range(7))
ax2.set_xticklabels(['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'])
@ -448,13 +473,19 @@ When autopilot is ON, I will automatically apply to new listings."""
# Find peak combinations
peak_combo = heatmap_data.stack().idxmax() if heatmap_data.values.max() > 0 else ("N/A", "N/A")
# Fix: Ensure peak_combo is iterable
if isinstance(peak_combo, tuple) and len(peak_combo) == 2:
stats_text = f"🎯 Peak time: {peak_combo[0]} at {peak_combo[1]}:00"
else:
stats_text = "🎯 Peak time: N/A"
stats_text = f"""📊 Summary Statistics
Total listings tracked: {total_listings}
🏆 Best day: {best_day}
Best hour: {best_hour}:00
🎯 Peak time: {peak_combo[0]} at {peak_combo[1]}:00
{stats_text}
📈 Average per day: {total_listings/7:.1f}
📅 Data collection period:
@ -479,7 +510,8 @@ Total listings tracked: {total_listings}
logger.error(f"Error creating plot: {e}")
import traceback
logger.error(traceback.format_exc())
return None
return ""
self._send_message(f"❓ Unknown command: <code>{cmd}</code>\n\nUse /help to see available commands.")
def _send_message(self, text):
try:
@ -1720,7 +1752,7 @@ class InBerlinMonitor:
# Extract listings from button elements with aria-label
# Format: @click="open !== 12345 ..." aria-label="Wohnungsangebot - 2,0 Zimmer, 53,01 m², 494,38 € Kaltmiete | Adresse"
button_pattern = r'@click="open !== (\d+)[^"]*"[^>]*aria-label="Wohnungsangebot - ([^"]+)"'
button_pattern = r'@click="open !== (\d+)[^"]*"[^>]*aria-label="Wohnungsangebot - ([^"]+)'
button_matches = re.findall(button_pattern, content_decoded)
logger.info(f"Found {len(button_matches)} listing buttons")
@ -2238,54 +2270,3 @@ class WGCompanyMonitor:
async def _async_fetch(self):
await self.init_browser()
return await self.fetch_listings()
def main():
"""Main entry point"""
# Ensure data directory exists
DATA_DIR.mkdir(parents=True, exist_ok=True)
# Initialize monitors
inberlin_monitor = InBerlinMonitor()
wgcompany_monitor = WGCompanyMonitor() if WGCOMPANY_ENABLED else None
# Start Telegram command listener
telegram_bot = TelegramBot(inberlin_monitor)
telegram_bot.start()
logger.info(f"Monitor started (interval: {CHECK_INTERVAL}s)")
logger.info(f"InBerlin Autopilot: {'ENABLED' if inberlin_monitor.is_autopilot_enabled() else 'DISABLED'}")
logger.info(f"WGcompany: {'ENABLED' if WGCOMPANY_ENABLED else 'DISABLED'}")
# Run periodic cleanup hourly
last_cleanup = 0
while True:
# Check InBerlinWohnen
try:
inberlin_monitor.check()
except Exception as e:
logger.error(f"InBerlin check failed: {e}")
# Periodic cleanup: remove PNGs older than 24h and prune logs older than 7 days
try:
if time.time() - last_cleanup > 3600: # every hour
logger.info("Running periodic cleanup (old PNGs, prune logs)")
_cleanup_old_files(png_hours=24, log_days=7)
last_cleanup = time.time()
except Exception:
logger.exception("Cleanup failed")
# Check WGcompany
if wgcompany_monitor:
try:
wgcompany_monitor.check()
except Exception as e:
logger.error(f"WGcompany check failed: {e}")
time.sleep(CHECK_INTERVAL)
if __name__ == "__main__":
main()

View file

@ -52,12 +52,15 @@ def generate_error_rate_plot(applications_file: str):
grouped = grouped.sort_index()
# Plot
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8), sharex=True)
import matplotlib.dates as mdates
# Add a third subplot for error rate by company
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(12, 12), sharex=True)
# Stacked bar: successes vs failures (all companies)
grouped[['successes','failures']].plot(kind='bar', stacked=True, ax=ax1, color=['#2E8B57','#C44A4A'])
ax1.set_ylabel('Count')
ax1.set_title('Autopilot: Successes vs Failures (by day)')
import matplotlib.dates as mdates
dates = pd.to_datetime(grouped.index).to_pydatetime()
x = mdates.date2num(dates)
width = 0.6
@ -69,6 +72,7 @@ def generate_error_rate_plot(applications_file: str):
ax1.xaxis.set_major_locator(mdates.AutoDateLocator())
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
# Line: overall error rate
ax2.plot(x, grouped['error_rate'].values, marker='o', color='#3333AA', linewidth=2)
ax2.set_ylim(-0.02, 1.02)
ax2.set_ylabel('Error rate')
@ -79,6 +83,30 @@ def generate_error_rate_plot(applications_file: str):
ax2.set_xlim(min(x) - 1, max(x) + 1)
ax2.xaxis.set_major_locator(mdates.AutoDateLocator())
ax2.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
# New: Error rate by company (line plot)
# Group by date and company
company_grouped = df.groupby(['date', 'company']).agg(total=('id','count'), successes=('success', lambda x: x.sum()))
company_grouped['failures'] = company_grouped['total'] - company_grouped['successes']
company_grouped['error_rate'] = company_grouped['failures'] / company_grouped['total']
company_grouped = company_grouped.reset_index()
# Pivot for plotting: index=date, columns=company, values=error_rate
error_rate_pivot = company_grouped.pivot(index='date', columns='company', values='error_rate')
# Plot each company as a line
for company in error_rate_pivot.columns:
y = error_rate_pivot[company].values
ax3.plot(x, y, marker='o', label=str(company))
ax3.set_ylim(-0.02, 1.02)
ax3.set_ylabel('Error rate')
ax3.set_xlabel('Date')
ax3.set_title('Daily Error Rate by Company')
ax3.grid(True, alpha=0.3)
ax3.set_xticks(x)
ax3.set_xlim(min(x) - 1, max(x) + 1)
ax3.xaxis.set_major_locator(mdates.AutoDateLocator())
ax3.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
ax3.legend(title='Company', loc='upper right', fontsize='small')
fig.autofmt_xdate()
plot_path = os.path.join(DATA_DIR, 'error_rate.png')