This commit is contained in:
Aron Petau 2026-01-01 15:27:25 +01:00
parent d596ed7e19
commit aa6626d80d
21 changed files with 1051 additions and 333 deletions

View file

@ -15,6 +15,7 @@ import matplotlib.dates as mdates
import logging
import matplotlib
import matplotlib.font_manager as fm
import seaborn as sns
import html
import re
import hashlib
@ -29,13 +30,20 @@ LISTINGS_FILE = Path("data/listings.json")
DATA_DIR = Path("data")
# --- Matplotlib Font Setup (for emoji support in plots) ---
# --- Matplotlib & Seaborn Setup ---
font_cache_dir = Path("data/fonts")
font_cache_dir.mkdir(parents=True, exist_ok=True)
matplotlib.get_configdir = lambda: str(font_cache_dir)
fm.findSystemFonts(fontpaths=str(font_cache_dir), fontext='ttf')
matplotlib.rcParams['font.family'] = 'Noto Sans'
# Configure seaborn for beautiful plots
sns.set_theme(style="whitegrid", palette="deep")
sns.set_context("notebook", font_scale=1.1)
matplotlib.rcParams['figure.dpi'] = 300
matplotlib.rcParams['savefig.dpi'] = 300
matplotlib.rcParams['figure.facecolor'] = 'white'
# Use the root logger for consistency with main.py
logger = logging.getLogger()
@ -60,11 +68,11 @@ class ApplicationHandler:
"wbm": WBMHandler(browser_context),
}
def set_telegram_bot(self, telegram_bot):
def set_telegram_bot(self, telegram_bot) -> None:
"""Attach a TelegramBot instance for notifications."""
self.telegram_bot = telegram_bot
def notify_new_listings(self, new_listings: list[dict], application_results: Optional[dict] = None):
def notify_new_listings(self, new_listings: list[dict], application_results: Optional[dict] = None) -> None:
"""
Send a Telegram notification for each new listing.
Includes application result if autopilot was enabled.
@ -77,12 +85,12 @@ class ApplicationHandler:
company_label = company.capitalize() if company != "unknown" else "Wohnung"
message = (
f"\ud83c\udfe0 <b>[{company_label}] Neue Wohnung!</b>\n\n"
f"\ud83d\udeaa <b>{listing['rooms']}</b>\n"
f"\ud83d\udcd0 {listing['size']}\n"
f"\ud83d\udcb0 {listing['price']}\n"
f"\ud83d\udccd {listing['address']}\n\n"
f"\ud83d\udc49 <a href=\"{link}\">Alle Details</a>"
f"🏠 <b>[{company_label}] Neue Wohnung!</b>\n\n"
f"🚪 <b>{listing['rooms']}</b>\n"
f"📏 {listing['size']}\n"
f"💰 {listing['price']}\n"
f"📍 {listing['address']}\n\n"
f"👉 <a href=\"{link}\">Alle Details</a>"
)
# Always show autopilot/apply status for clarity
@ -107,11 +115,10 @@ class ApplicationHandler:
# Send via TelegramBot if available
if hasattr(self, 'telegram_bot') and self.telegram_bot:
logger.info(f"Notifying Telegram: {listing['address']} ({listing['rooms']}, {listing['size']}, {listing['price']})")
loop = getattr(self.telegram_bot, 'event_loop', None) or asyncio.get_event_loop()
asyncio.run_coroutine_threadsafe(self.telegram_bot._send_message(message), loop)
else:
logger.info(f"[TELEGRAM] Would send message for: {listing['address']} ({listing['rooms']}, {listing['size']}, {listing['price']})")
logger.debug(f"[No Telegram] {listing['address']} ({listing['rooms']})")
async def apply_to_listings(self, listings: list[dict]) -> dict:
"""
@ -124,19 +131,19 @@ class ApplicationHandler:
raise RuntimeError("browser_context is None in apply_to_listings. This should never happen.")
for listing in listings:
if self.has_applied(listing["id"]):
logger.info(f"Already applied to {listing['id']} ({listing['address']}), skipping.")
logger.debug(f"Skip (applied): {listing['address']}")
continue
result = await self.apply(listing)
results[listing["id"]] = result
self.save_application(result)
status = "" if result["success"] else ""
logger.info(f"Application {status} for {listing['address']}: {result['message']}")
logger.info(f"{status} {listing['address'][:30]}... | {result['message'][:50]}")
await asyncio.sleep(2)
return results
def log_listing_times(self, new_listings: list[dict]):
def log_listing_times(self, new_listings: list[dict]) -> None:
"""
Log new listing appearance times to CSV for later analysis and pattern mining.
Appends to data/listing_times.csv, creating header if needed.
@ -167,12 +174,12 @@ class ApplicationHandler:
listing["id"]
])
logger.info(f"Logged {len(new_listings)} new listing times to CSV.")
logger.debug(f"Logged {len(new_listings)} listings to CSV")
# ...existing code...
async def init_browser(self):
async def init_browser(self) -> None:
"""Initialize Playwright browser (minimal, like test script)"""
if not hasattr(self, 'browser') or self.browser is None:
self.playwright = await async_playwright().start()
@ -249,13 +256,13 @@ class ApplicationHandler:
return {"autopilot": False}
def save_state(self, state: dict):
def save_state(self, state: dict) -> None:
"""Save persistent state"""
with open(STATE_FILE, "w") as f:
json.dump(state, f, indent=2)
def set_autopilot(self, enabled: bool):
def set_autopilot(self, enabled: bool) -> None:
"""Enable or disable autopilot mode"""
self.state_manager.set_autopilot(enabled)
@ -276,7 +283,7 @@ class ApplicationHandler:
return {}
def save_application(self, result: dict):
def save_application(self, result: dict) -> None:
"""Save an application result."""
applications = self.load_applications()
applications[result["listing_id"]] = result
@ -297,7 +304,7 @@ class ApplicationHandler:
return {}
def save_listings(self, listings: list[dict]):
def save_listings(self, listings: list[dict]) -> None:
"""Save current listings"""
listings_dict = {l["id"]: l for l in listings}
with open(LISTINGS_FILE, "w") as f:
@ -346,45 +353,43 @@ class ApplicationHandler:
heatmap_data.loc[day, hour] = int(val) + 1
# Create figure with two subplots
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
fig.suptitle('Listing Appearance Patterns', fontsize=16, fontweight='bold')
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('Listing Appearance Patterns', fontsize=18, fontweight='bold', y=0.995)
# 1. Heatmap - Day vs Hour
# 1. Heatmap - Day vs Hour (using seaborn)
ax1 = axes[0, 0]
im = ax1.imshow(heatmap_data.values, cmap='YlOrRd', aspect='auto')
ax1.set_xticks(range(24))
ax1.set_xticklabels(range(24), fontsize=8)
ax1.set_yticks(range(7))
ax1.set_yticklabels(days_order)
ax1.set_xlabel('Hour of Day')
ax1.set_ylabel('Day of Week')
ax1.set_title('Listings by Day & Hour')
plt.colorbar(im, ax=ax1, label='Count')
sns.heatmap(heatmap_data, cmap='RdYlGn_r', annot=False, fmt='d',
cbar_kws={'label': 'Count'}, ax=ax1, linewidths=0.5, linecolor='gray')
ax1.set_xlabel('Hour of Day', fontsize=11, fontweight='bold')
ax1.set_ylabel('Day of Week', fontsize=11, fontweight='bold')
ax1.set_title('Listings by Day & Hour', fontsize=12, fontweight='bold', pad=10)
ax1.set_xticklabels(range(24), fontsize=9)
ax1.set_yticklabels(days_order, rotation=0, fontsize=9)
# 2. Bar chart - By day of week
# 2. Bar chart - By day of week (seaborn style)
ax2 = axes[0, 1]
day_counts = df['weekday'].value_counts().reindex(days_order, fill_value=0)
colors = plt.cm.get_cmap('Blues')(day_counts / day_counts.max() if day_counts.max() > 0 else day_counts)
bars = ax2.bar(range(7), day_counts.values, color=colors)
sns.barplot(x=range(7), y=day_counts.values, ax=ax2, palette='Blues_d', hue=range(7), legend=False)
ax2.set_xticks(range(7))
ax2.set_xticklabels(['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'])
ax2.set_xlabel('Day of Week')
ax2.set_ylabel('Number of Listings')
ax2.set_title('Total Listings by Day')
ax2.set_xticklabels(['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'], fontsize=9)
ax2.set_xlabel('Day of Week', fontsize=11, fontweight='bold')
ax2.set_ylabel('Number of Listings', fontsize=11, fontweight='bold')
ax2.set_title('Total Listings by Day', fontsize=12, fontweight='bold', pad=10)
for i, v in enumerate(day_counts.values):
if v > 0:
ax2.text(i, v + 0.1, str(v), ha='center', fontsize=9)
ax2.text(i, v + 0.5, str(v), ha='center', fontsize=9, fontweight='bold')
# 3. Line chart - By hour
# 3. Line chart - By hour (seaborn style)
ax3 = axes[1, 0]
hour_counts = df['hour'].value_counts().reindex(range(24), fill_value=0)
ax3.plot(range(24), hour_counts.values, marker='o', linewidth=2, markersize=4, color='#2E86AB')
ax3.fill_between(range(24), hour_counts.values, alpha=0.3, color='#2E86AB')
sns.lineplot(x=range(24), y=hour_counts.values, ax=ax3, marker='o',
linewidth=2.5, markersize=6, color='#2E86AB')
ax3.fill_between(range(24), hour_counts.values, alpha=0.2, color='#2E86AB')
ax3.set_xticks(range(0, 24, 2))
ax3.set_xlabel('Hour of Day')
ax3.set_ylabel('Number of Listings')
ax3.set_title('Total Listings by Hour')
ax3.grid(True, alpha=0.3)
ax3.set_xlabel('Hour of Day', fontsize=11, fontweight='bold')
ax3.set_ylabel('Number of Listings', fontsize=11, fontweight='bold')
ax3.set_title('Total Listings by Hour', fontsize=12, fontweight='bold', pad=10)
ax3.grid(True, alpha=0.3, linestyle='--')
# 4. Summary stats
ax4 = axes[1, 1]
@ -421,10 +426,10 @@ Total listings tracked: {total_listings}
verticalalignment='top', fontfamily='monospace',
bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
plt.tight_layout()
plt.tight_layout(rect=(0, 0, 1, 0.99))
# Save plot
plt.savefig(plot_path, dpi=150, bbox_inches='tight')
# Save plot with high resolution
plt.savefig(plot_path, dpi=300, bbox_inches='tight', facecolor='white', edgecolor='none')
plt.close()
logger.info(f"Plot saved to {plot_path}")
@ -434,7 +439,7 @@ Total listings tracked: {total_listings}
return ""
def _generate_error_rate_plot(self):
def _generate_error_rate_plot(self) -> tuple[str | None, str]:
"""Read applications.json and produce a plot image + summary text.
Returns (plot_path, summary_text) or (None, "") if insufficient data.
@ -474,7 +479,8 @@ Total listings tracked: {total_listings}
grouped = grouped.sort_index()
# Prepare plot: convert dates to matplotlib numeric x-values so bars and line align
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(12, 12), sharex=True)
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(14, 14), sharex=True)
fig.suptitle('Autopilot Performance Analysis', fontsize=18, fontweight='bold', y=0.995)
dates = pd.to_datetime(grouped.index).to_pydatetime()
x = mdates.date2num(dates)
@ -483,53 +489,65 @@ Total listings tracked: {total_listings}
successes = grouped['successes'].values
failures = grouped['failures'].values
ax1.bar(x, successes, width=width, color='#2E8B57', align='center')
ax1.bar(x, failures, bottom=successes, width=width, color='#C44A4A', align='center')
ax1.set_ylabel('Count')
ax1.set_title('Autopilot: Successes vs Failures (by day)')
# Use seaborn color palette
success_color = sns.color_palette('RdYlGn', n_colors=10)[8] # Green
failure_color = sns.color_palette('RdYlGn', n_colors=10)[1] # Red
ax1.bar(x, successes, width=width, color=success_color, align='center', label='Success', edgecolor='white', linewidth=0.5)
ax1.bar(x, failures, bottom=successes, width=width, color=failure_color, align='center', label='Failure', edgecolor='white', linewidth=0.5)
ax1.set_ylabel('Count', fontsize=11, fontweight='bold')
ax1.set_title('Successes vs Failures (by day)', fontsize=13, fontweight='bold', pad=10)
ax1.set_xticks(x)
ax1.set_xlim(min(x) - 1, max(x) + 1)
ax1.xaxis.set_major_locator(mdates.AutoDateLocator())
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
ax1.legend(loc='upper left', framealpha=0.9)
ax1.grid(True, alpha=0.3, linestyle='--', axis='y')
# Plot error rate line on same x (date) axis
ax2.plot(x, grouped['error_rate'].values, marker='o', color='#3333AA', linewidth=2)
sns.lineplot(x=x, y=grouped['error_rate'].values, ax=ax2, marker='o',
linewidth=2.5, markersize=8, color='#E74C3C')
ax2.fill_between(x, grouped['error_rate'].values, alpha=0.2, color='#E74C3C')
ax2.set_ylim(-0.02, 1.02)
ax2.set_ylabel('Error rate')
ax2.set_xlabel('Date')
ax2.set_title('Daily Error Rate (failures / total)')
ax2.grid(True, alpha=0.3)
ax2.set_ylabel('Error Rate', fontsize=11, fontweight='bold')
ax2.set_xlabel('Date', fontsize=11, fontweight='bold')
ax2.set_title('Daily Error Rate (failures / total)', fontsize=13, fontweight='bold', pad=10)
ax2.grid(True, alpha=0.3, linestyle='--')
ax2.set_xticks(x)
ax2.set_xlim(min(x) - 1, max(x) + 1)
ax2.xaxis.set_major_locator(mdates.AutoDateLocator())
ax2.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
# Error rate by company (line plot)
# Error rate by company (line plot with seaborn palette)
company_grouped = df.groupby(['date', 'company']).agg(total=('id','count'), successes=('success', lambda x: x.sum()))
company_grouped['failures'] = company_grouped['total'] - company_grouped['successes']
company_grouped['error_rate'] = company_grouped['failures'] / company_grouped['total']
company_grouped = company_grouped.reset_index()
error_rate_pivot = company_grouped.pivot(index='date', columns='company', values='error_rate')
for company in error_rate_pivot.columns:
# Use distinct seaborn colors for each company
palette = sns.color_palette('husl', n_colors=len(error_rate_pivot.columns))
for idx, company in enumerate(error_rate_pivot.columns):
y = error_rate_pivot[company].values
ax3.plot(x, y, marker='o', label=str(company))
ax3.plot(x, y, marker='o', label=str(company), linewidth=2.5,
markersize=7, color=palette[idx])
ax3.set_ylim(-0.02, 1.02)
ax3.set_ylabel('Error rate')
ax3.set_xlabel('Date')
ax3.set_title('Daily Error Rate by Company')
ax3.grid(True, alpha=0.3)
ax3.set_ylabel('Error Rate', fontsize=11, fontweight='bold')
ax3.set_xlabel('Date', fontsize=11, fontweight='bold')
ax3.set_title('Daily Error Rate by Company', fontsize=13, fontweight='bold', pad=10)
ax3.grid(True, alpha=0.3, linestyle='--')
ax3.set_xticks(x)
ax3.set_xlim(min(x) - 1, max(x) + 1)
ax3.xaxis.set_major_locator(mdates.AutoDateLocator())
ax3.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
ax3.legend(title='Company', loc='upper right', fontsize='small')
ax3.legend(title='Company', loc='upper right', fontsize=10, framealpha=0.9)
fig.autofmt_xdate()
plt.tight_layout()
plt.tight_layout(rect=(0, 0, 1, 0.99))
plot_path = self.applications_file.parent / 'error_rate.png'
tmp_path = self.applications_file.parent / 'error_rate.tmp.png'
# Save to a temp file first and atomically replace to ensure overwrite
fig.savefig(tmp_path, format='png')
fig.savefig(tmp_path, format='png', dpi=300, bbox_inches='tight', facecolor='white', edgecolor='none')
plt.close(fig)
try:
tmp_path.replace(plot_path)
@ -555,7 +573,7 @@ Total listings tracked: {total_listings}
return None, ""
async def login(self, page):
async def login(self, page) -> bool:
"""Login to inberlinwohnen.de (minimal, like test script)"""
if not self.state_manager.email or not self.state_manager.password:
logger.warning("No credentials provided. Ensure INBERLIN_EMAIL and INBERLIN_PASSWORD are set in the environment.")
@ -606,7 +624,29 @@ Total listings tracked: {total_listings}
async def fetch_listings(self) -> list[dict]:
"""Fetch listings from the Wohnungsfinder"""
"""Fetch listings from the Wohnungsfinder with retry logic for transient failures"""
max_retries = 3
retry_delay = 2 # Initial delay in seconds
for attempt in range(max_retries):
try:
listings = await self._fetch_listings_attempt()
if attempt > 0:
logger.info(f"✅ Fetch succeeded (attempt {attempt + 1})")
return listings
except Exception as e:
if attempt < max_retries - 1:
wait_time = retry_delay * (2 ** attempt) # Exponential backoff
logger.warning(f"⚠️ Fetch failed (attempt {attempt + 1}/{max_retries}): {str(e)[:50]}... Retrying in {wait_time}s")
await asyncio.sleep(wait_time)
else:
logger.error(f"❌ Fetch failed after {max_retries} attempts")
return []
return []
async def _fetch_listings_attempt(self) -> list[dict]:
"""Single attempt to fetch listings (extracted for retry logic)"""
listings = []
try:
@ -742,17 +782,14 @@ Total listings tracked: {total_listings}
listings = unique_listings
if not listings:
logger.warning("No listings found after parsing. Dumping HTML snippet for debugging:")
logger.warning(content[:1000])
logger.warning("⚠️ No listings parsed")
await page.close()
logger.info(f"Fetched {len(listings)} unique listings")
logger.info(f"📊 Fetched {len(listings)} listings")
return listings
except Exception as e:
logger.error(f"Error fetching listings: {e}")
import traceback
logger.error(traceback.format_exc())
logger.error(f"❌ Fetch error: {str(e)[:100]}")
return []