prod
This commit is contained in:
parent
d596ed7e19
commit
aa6626d80d
21 changed files with 1051 additions and 333 deletions
|
|
@ -15,6 +15,7 @@ import matplotlib.dates as mdates
|
|||
import logging
|
||||
import matplotlib
|
||||
import matplotlib.font_manager as fm
|
||||
import seaborn as sns
|
||||
import html
|
||||
import re
|
||||
import hashlib
|
||||
|
|
@ -29,13 +30,20 @@ LISTINGS_FILE = Path("data/listings.json")
|
|||
DATA_DIR = Path("data")
|
||||
|
||||
|
||||
# --- Matplotlib Font Setup (for emoji support in plots) ---
|
||||
# --- Matplotlib & Seaborn Setup ---
|
||||
font_cache_dir = Path("data/fonts")
|
||||
font_cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
matplotlib.get_configdir = lambda: str(font_cache_dir)
|
||||
fm.findSystemFonts(fontpaths=str(font_cache_dir), fontext='ttf')
|
||||
matplotlib.rcParams['font.family'] = 'Noto Sans'
|
||||
|
||||
# Configure seaborn for beautiful plots
|
||||
sns.set_theme(style="whitegrid", palette="deep")
|
||||
sns.set_context("notebook", font_scale=1.1)
|
||||
matplotlib.rcParams['figure.dpi'] = 300
|
||||
matplotlib.rcParams['savefig.dpi'] = 300
|
||||
matplotlib.rcParams['figure.facecolor'] = 'white'
|
||||
|
||||
# Use the root logger for consistency with main.py
|
||||
logger = logging.getLogger()
|
||||
|
||||
|
|
@ -60,11 +68,11 @@ class ApplicationHandler:
|
|||
"wbm": WBMHandler(browser_context),
|
||||
}
|
||||
|
||||
def set_telegram_bot(self, telegram_bot):
|
||||
def set_telegram_bot(self, telegram_bot) -> None:
|
||||
"""Attach a TelegramBot instance for notifications."""
|
||||
self.telegram_bot = telegram_bot
|
||||
|
||||
def notify_new_listings(self, new_listings: list[dict], application_results: Optional[dict] = None):
|
||||
def notify_new_listings(self, new_listings: list[dict], application_results: Optional[dict] = None) -> None:
|
||||
"""
|
||||
Send a Telegram notification for each new listing.
|
||||
Includes application result if autopilot was enabled.
|
||||
|
|
@ -77,12 +85,12 @@ class ApplicationHandler:
|
|||
|
||||
company_label = company.capitalize() if company != "unknown" else "Wohnung"
|
||||
message = (
|
||||
f"\ud83c\udfe0 <b>[{company_label}] Neue Wohnung!</b>\n\n"
|
||||
f"\ud83d\udeaa <b>{listing['rooms']}</b>\n"
|
||||
f"\ud83d\udcd0 {listing['size']}\n"
|
||||
f"\ud83d\udcb0 {listing['price']}\n"
|
||||
f"\ud83d\udccd {listing['address']}\n\n"
|
||||
f"\ud83d\udc49 <a href=\"{link}\">Alle Details</a>"
|
||||
f"🏠 <b>[{company_label}] Neue Wohnung!</b>\n\n"
|
||||
f"🚪 <b>{listing['rooms']}</b>\n"
|
||||
f"📏 {listing['size']}\n"
|
||||
f"💰 {listing['price']}\n"
|
||||
f"📍 {listing['address']}\n\n"
|
||||
f"👉 <a href=\"{link}\">Alle Details</a>"
|
||||
)
|
||||
|
||||
# Always show autopilot/apply status for clarity
|
||||
|
|
@ -107,11 +115,10 @@ class ApplicationHandler:
|
|||
|
||||
# Send via TelegramBot if available
|
||||
if hasattr(self, 'telegram_bot') and self.telegram_bot:
|
||||
logger.info(f"Notifying Telegram: {listing['address']} ({listing['rooms']}, {listing['size']}, {listing['price']})")
|
||||
loop = getattr(self.telegram_bot, 'event_loop', None) or asyncio.get_event_loop()
|
||||
asyncio.run_coroutine_threadsafe(self.telegram_bot._send_message(message), loop)
|
||||
else:
|
||||
logger.info(f"[TELEGRAM] Would send message for: {listing['address']} ({listing['rooms']}, {listing['size']}, {listing['price']})")
|
||||
logger.debug(f"[No Telegram] {listing['address']} ({listing['rooms']})")
|
||||
|
||||
async def apply_to_listings(self, listings: list[dict]) -> dict:
|
||||
"""
|
||||
|
|
@ -124,19 +131,19 @@ class ApplicationHandler:
|
|||
raise RuntimeError("browser_context is None in apply_to_listings. This should never happen.")
|
||||
for listing in listings:
|
||||
if self.has_applied(listing["id"]):
|
||||
logger.info(f"Already applied to {listing['id']} ({listing['address']}), skipping.")
|
||||
logger.debug(f"Skip (applied): {listing['address']}")
|
||||
continue
|
||||
result = await self.apply(listing)
|
||||
results[listing["id"]] = result
|
||||
self.save_application(result)
|
||||
status = "✅" if result["success"] else "❌"
|
||||
logger.info(f"Application {status} for {listing['address']}: {result['message']}")
|
||||
logger.info(f"{status} {listing['address'][:30]}... | {result['message'][:50]}")
|
||||
await asyncio.sleep(2)
|
||||
return results
|
||||
|
||||
|
||||
|
||||
def log_listing_times(self, new_listings: list[dict]):
|
||||
def log_listing_times(self, new_listings: list[dict]) -> None:
|
||||
"""
|
||||
Log new listing appearance times to CSV for later analysis and pattern mining.
|
||||
Appends to data/listing_times.csv, creating header if needed.
|
||||
|
|
@ -167,12 +174,12 @@ class ApplicationHandler:
|
|||
listing["id"]
|
||||
])
|
||||
|
||||
logger.info(f"Logged {len(new_listings)} new listing times to CSV.")
|
||||
logger.debug(f"Logged {len(new_listings)} listings to CSV")
|
||||
|
||||
# ...existing code...
|
||||
|
||||
|
||||
async def init_browser(self):
|
||||
async def init_browser(self) -> None:
|
||||
"""Initialize Playwright browser (minimal, like test script)"""
|
||||
if not hasattr(self, 'browser') or self.browser is None:
|
||||
self.playwright = await async_playwright().start()
|
||||
|
|
@ -249,13 +256,13 @@ class ApplicationHandler:
|
|||
return {"autopilot": False}
|
||||
|
||||
|
||||
def save_state(self, state: dict):
|
||||
def save_state(self, state: dict) -> None:
|
||||
"""Save persistent state"""
|
||||
with open(STATE_FILE, "w") as f:
|
||||
json.dump(state, f, indent=2)
|
||||
|
||||
|
||||
def set_autopilot(self, enabled: bool):
|
||||
def set_autopilot(self, enabled: bool) -> None:
|
||||
"""Enable or disable autopilot mode"""
|
||||
self.state_manager.set_autopilot(enabled)
|
||||
|
||||
|
|
@ -276,7 +283,7 @@ class ApplicationHandler:
|
|||
return {}
|
||||
|
||||
|
||||
def save_application(self, result: dict):
|
||||
def save_application(self, result: dict) -> None:
|
||||
"""Save an application result."""
|
||||
applications = self.load_applications()
|
||||
applications[result["listing_id"]] = result
|
||||
|
|
@ -297,7 +304,7 @@ class ApplicationHandler:
|
|||
return {}
|
||||
|
||||
|
||||
def save_listings(self, listings: list[dict]):
|
||||
def save_listings(self, listings: list[dict]) -> None:
|
||||
"""Save current listings"""
|
||||
listings_dict = {l["id"]: l for l in listings}
|
||||
with open(LISTINGS_FILE, "w") as f:
|
||||
|
|
@ -346,45 +353,43 @@ class ApplicationHandler:
|
|||
heatmap_data.loc[day, hour] = int(val) + 1
|
||||
|
||||
# Create figure with two subplots
|
||||
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
|
||||
fig.suptitle('Listing Appearance Patterns', fontsize=16, fontweight='bold')
|
||||
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
|
||||
fig.suptitle('Listing Appearance Patterns', fontsize=18, fontweight='bold', y=0.995)
|
||||
|
||||
# 1. Heatmap - Day vs Hour
|
||||
# 1. Heatmap - Day vs Hour (using seaborn)
|
||||
ax1 = axes[0, 0]
|
||||
im = ax1.imshow(heatmap_data.values, cmap='YlOrRd', aspect='auto')
|
||||
ax1.set_xticks(range(24))
|
||||
ax1.set_xticklabels(range(24), fontsize=8)
|
||||
ax1.set_yticks(range(7))
|
||||
ax1.set_yticklabels(days_order)
|
||||
ax1.set_xlabel('Hour of Day')
|
||||
ax1.set_ylabel('Day of Week')
|
||||
ax1.set_title('Listings by Day & Hour')
|
||||
plt.colorbar(im, ax=ax1, label='Count')
|
||||
sns.heatmap(heatmap_data, cmap='RdYlGn_r', annot=False, fmt='d',
|
||||
cbar_kws={'label': 'Count'}, ax=ax1, linewidths=0.5, linecolor='gray')
|
||||
ax1.set_xlabel('Hour of Day', fontsize=11, fontweight='bold')
|
||||
ax1.set_ylabel('Day of Week', fontsize=11, fontweight='bold')
|
||||
ax1.set_title('Listings by Day & Hour', fontsize=12, fontweight='bold', pad=10)
|
||||
ax1.set_xticklabels(range(24), fontsize=9)
|
||||
ax1.set_yticklabels(days_order, rotation=0, fontsize=9)
|
||||
|
||||
# 2. Bar chart - By day of week
|
||||
# 2. Bar chart - By day of week (seaborn style)
|
||||
ax2 = axes[0, 1]
|
||||
day_counts = df['weekday'].value_counts().reindex(days_order, fill_value=0)
|
||||
colors = plt.cm.get_cmap('Blues')(day_counts / day_counts.max() if day_counts.max() > 0 else day_counts)
|
||||
bars = ax2.bar(range(7), day_counts.values, color=colors)
|
||||
sns.barplot(x=range(7), y=day_counts.values, ax=ax2, palette='Blues_d', hue=range(7), legend=False)
|
||||
ax2.set_xticks(range(7))
|
||||
ax2.set_xticklabels(['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'])
|
||||
ax2.set_xlabel('Day of Week')
|
||||
ax2.set_ylabel('Number of Listings')
|
||||
ax2.set_title('Total Listings by Day')
|
||||
ax2.set_xticklabels(['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'], fontsize=9)
|
||||
ax2.set_xlabel('Day of Week', fontsize=11, fontweight='bold')
|
||||
ax2.set_ylabel('Number of Listings', fontsize=11, fontweight='bold')
|
||||
ax2.set_title('Total Listings by Day', fontsize=12, fontweight='bold', pad=10)
|
||||
for i, v in enumerate(day_counts.values):
|
||||
if v > 0:
|
||||
ax2.text(i, v + 0.1, str(v), ha='center', fontsize=9)
|
||||
ax2.text(i, v + 0.5, str(v), ha='center', fontsize=9, fontweight='bold')
|
||||
|
||||
# 3. Line chart - By hour
|
||||
# 3. Line chart - By hour (seaborn style)
|
||||
ax3 = axes[1, 0]
|
||||
hour_counts = df['hour'].value_counts().reindex(range(24), fill_value=0)
|
||||
ax3.plot(range(24), hour_counts.values, marker='o', linewidth=2, markersize=4, color='#2E86AB')
|
||||
ax3.fill_between(range(24), hour_counts.values, alpha=0.3, color='#2E86AB')
|
||||
sns.lineplot(x=range(24), y=hour_counts.values, ax=ax3, marker='o',
|
||||
linewidth=2.5, markersize=6, color='#2E86AB')
|
||||
ax3.fill_between(range(24), hour_counts.values, alpha=0.2, color='#2E86AB')
|
||||
ax3.set_xticks(range(0, 24, 2))
|
||||
ax3.set_xlabel('Hour of Day')
|
||||
ax3.set_ylabel('Number of Listings')
|
||||
ax3.set_title('Total Listings by Hour')
|
||||
ax3.grid(True, alpha=0.3)
|
||||
ax3.set_xlabel('Hour of Day', fontsize=11, fontweight='bold')
|
||||
ax3.set_ylabel('Number of Listings', fontsize=11, fontweight='bold')
|
||||
ax3.set_title('Total Listings by Hour', fontsize=12, fontweight='bold', pad=10)
|
||||
ax3.grid(True, alpha=0.3, linestyle='--')
|
||||
|
||||
# 4. Summary stats
|
||||
ax4 = axes[1, 1]
|
||||
|
|
@ -421,10 +426,10 @@ Total listings tracked: {total_listings}
|
|||
verticalalignment='top', fontfamily='monospace',
|
||||
bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
|
||||
|
||||
plt.tight_layout()
|
||||
plt.tight_layout(rect=(0, 0, 1, 0.99))
|
||||
|
||||
# Save plot
|
||||
plt.savefig(plot_path, dpi=150, bbox_inches='tight')
|
||||
# Save plot with high resolution
|
||||
plt.savefig(plot_path, dpi=300, bbox_inches='tight', facecolor='white', edgecolor='none')
|
||||
plt.close()
|
||||
|
||||
logger.info(f"Plot saved to {plot_path}")
|
||||
|
|
@ -434,7 +439,7 @@ Total listings tracked: {total_listings}
|
|||
return ""
|
||||
|
||||
|
||||
def _generate_error_rate_plot(self):
|
||||
def _generate_error_rate_plot(self) -> tuple[str | None, str]:
|
||||
"""Read applications.json and produce a plot image + summary text.
|
||||
|
||||
Returns (plot_path, summary_text) or (None, "") if insufficient data.
|
||||
|
|
@ -474,7 +479,8 @@ Total listings tracked: {total_listings}
|
|||
grouped = grouped.sort_index()
|
||||
|
||||
# Prepare plot: convert dates to matplotlib numeric x-values so bars and line align
|
||||
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(12, 12), sharex=True)
|
||||
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(14, 14), sharex=True)
|
||||
fig.suptitle('Autopilot Performance Analysis', fontsize=18, fontweight='bold', y=0.995)
|
||||
|
||||
dates = pd.to_datetime(grouped.index).to_pydatetime()
|
||||
x = mdates.date2num(dates)
|
||||
|
|
@ -483,53 +489,65 @@ Total listings tracked: {total_listings}
|
|||
successes = grouped['successes'].values
|
||||
failures = grouped['failures'].values
|
||||
|
||||
ax1.bar(x, successes, width=width, color='#2E8B57', align='center')
|
||||
ax1.bar(x, failures, bottom=successes, width=width, color='#C44A4A', align='center')
|
||||
ax1.set_ylabel('Count')
|
||||
ax1.set_title('Autopilot: Successes vs Failures (by day)')
|
||||
# Use seaborn color palette
|
||||
success_color = sns.color_palette('RdYlGn', n_colors=10)[8] # Green
|
||||
failure_color = sns.color_palette('RdYlGn', n_colors=10)[1] # Red
|
||||
|
||||
ax1.bar(x, successes, width=width, color=success_color, align='center', label='Success', edgecolor='white', linewidth=0.5)
|
||||
ax1.bar(x, failures, bottom=successes, width=width, color=failure_color, align='center', label='Failure', edgecolor='white', linewidth=0.5)
|
||||
ax1.set_ylabel('Count', fontsize=11, fontweight='bold')
|
||||
ax1.set_title('Successes vs Failures (by day)', fontsize=13, fontweight='bold', pad=10)
|
||||
ax1.set_xticks(x)
|
||||
ax1.set_xlim(min(x) - 1, max(x) + 1)
|
||||
ax1.xaxis.set_major_locator(mdates.AutoDateLocator())
|
||||
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
|
||||
ax1.legend(loc='upper left', framealpha=0.9)
|
||||
ax1.grid(True, alpha=0.3, linestyle='--', axis='y')
|
||||
|
||||
# Plot error rate line on same x (date) axis
|
||||
ax2.plot(x, grouped['error_rate'].values, marker='o', color='#3333AA', linewidth=2)
|
||||
sns.lineplot(x=x, y=grouped['error_rate'].values, ax=ax2, marker='o',
|
||||
linewidth=2.5, markersize=8, color='#E74C3C')
|
||||
ax2.fill_between(x, grouped['error_rate'].values, alpha=0.2, color='#E74C3C')
|
||||
ax2.set_ylim(-0.02, 1.02)
|
||||
ax2.set_ylabel('Error rate')
|
||||
ax2.set_xlabel('Date')
|
||||
ax2.set_title('Daily Error Rate (failures / total)')
|
||||
ax2.grid(True, alpha=0.3)
|
||||
ax2.set_ylabel('Error Rate', fontsize=11, fontweight='bold')
|
||||
ax2.set_xlabel('Date', fontsize=11, fontweight='bold')
|
||||
ax2.set_title('Daily Error Rate (failures / total)', fontsize=13, fontweight='bold', pad=10)
|
||||
ax2.grid(True, alpha=0.3, linestyle='--')
|
||||
ax2.set_xticks(x)
|
||||
ax2.set_xlim(min(x) - 1, max(x) + 1)
|
||||
ax2.xaxis.set_major_locator(mdates.AutoDateLocator())
|
||||
ax2.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
|
||||
|
||||
# Error rate by company (line plot)
|
||||
# Error rate by company (line plot with seaborn palette)
|
||||
company_grouped = df.groupby(['date', 'company']).agg(total=('id','count'), successes=('success', lambda x: x.sum()))
|
||||
company_grouped['failures'] = company_grouped['total'] - company_grouped['successes']
|
||||
company_grouped['error_rate'] = company_grouped['failures'] / company_grouped['total']
|
||||
company_grouped = company_grouped.reset_index()
|
||||
error_rate_pivot = company_grouped.pivot(index='date', columns='company', values='error_rate')
|
||||
for company in error_rate_pivot.columns:
|
||||
|
||||
# Use distinct seaborn colors for each company
|
||||
palette = sns.color_palette('husl', n_colors=len(error_rate_pivot.columns))
|
||||
for idx, company in enumerate(error_rate_pivot.columns):
|
||||
y = error_rate_pivot[company].values
|
||||
ax3.plot(x, y, marker='o', label=str(company))
|
||||
ax3.plot(x, y, marker='o', label=str(company), linewidth=2.5,
|
||||
markersize=7, color=palette[idx])
|
||||
ax3.set_ylim(-0.02, 1.02)
|
||||
ax3.set_ylabel('Error rate')
|
||||
ax3.set_xlabel('Date')
|
||||
ax3.set_title('Daily Error Rate by Company')
|
||||
ax3.grid(True, alpha=0.3)
|
||||
ax3.set_ylabel('Error Rate', fontsize=11, fontweight='bold')
|
||||
ax3.set_xlabel('Date', fontsize=11, fontweight='bold')
|
||||
ax3.set_title('Daily Error Rate by Company', fontsize=13, fontweight='bold', pad=10)
|
||||
ax3.grid(True, alpha=0.3, linestyle='--')
|
||||
ax3.set_xticks(x)
|
||||
ax3.set_xlim(min(x) - 1, max(x) + 1)
|
||||
ax3.xaxis.set_major_locator(mdates.AutoDateLocator())
|
||||
ax3.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
|
||||
ax3.legend(title='Company', loc='upper right', fontsize='small')
|
||||
ax3.legend(title='Company', loc='upper right', fontsize=10, framealpha=0.9)
|
||||
|
||||
fig.autofmt_xdate()
|
||||
plt.tight_layout()
|
||||
plt.tight_layout(rect=(0, 0, 1, 0.99))
|
||||
plot_path = self.applications_file.parent / 'error_rate.png'
|
||||
tmp_path = self.applications_file.parent / 'error_rate.tmp.png'
|
||||
# Save to a temp file first and atomically replace to ensure overwrite
|
||||
fig.savefig(tmp_path, format='png')
|
||||
fig.savefig(tmp_path, format='png', dpi=300, bbox_inches='tight', facecolor='white', edgecolor='none')
|
||||
plt.close(fig)
|
||||
try:
|
||||
tmp_path.replace(plot_path)
|
||||
|
|
@ -555,7 +573,7 @@ Total listings tracked: {total_listings}
|
|||
return None, ""
|
||||
|
||||
|
||||
async def login(self, page):
|
||||
async def login(self, page) -> bool:
|
||||
"""Login to inberlinwohnen.de (minimal, like test script)"""
|
||||
if not self.state_manager.email or not self.state_manager.password:
|
||||
logger.warning("No credentials provided. Ensure INBERLIN_EMAIL and INBERLIN_PASSWORD are set in the environment.")
|
||||
|
|
@ -606,7 +624,29 @@ Total listings tracked: {total_listings}
|
|||
|
||||
|
||||
async def fetch_listings(self) -> list[dict]:
|
||||
"""Fetch listings from the Wohnungsfinder"""
|
||||
"""Fetch listings from the Wohnungsfinder with retry logic for transient failures"""
|
||||
max_retries = 3
|
||||
retry_delay = 2 # Initial delay in seconds
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
listings = await self._fetch_listings_attempt()
|
||||
if attempt > 0:
|
||||
logger.info(f"✅ Fetch succeeded (attempt {attempt + 1})")
|
||||
return listings
|
||||
except Exception as e:
|
||||
if attempt < max_retries - 1:
|
||||
wait_time = retry_delay * (2 ** attempt) # Exponential backoff
|
||||
logger.warning(f"⚠️ Fetch failed (attempt {attempt + 1}/{max_retries}): {str(e)[:50]}... Retrying in {wait_time}s")
|
||||
await asyncio.sleep(wait_time)
|
||||
else:
|
||||
logger.error(f"❌ Fetch failed after {max_retries} attempts")
|
||||
return []
|
||||
|
||||
return []
|
||||
|
||||
async def _fetch_listings_attempt(self) -> list[dict]:
|
||||
"""Single attempt to fetch listings (extracted for retry logic)"""
|
||||
listings = []
|
||||
|
||||
try:
|
||||
|
|
@ -742,17 +782,14 @@ Total listings tracked: {total_listings}
|
|||
listings = unique_listings
|
||||
|
||||
if not listings:
|
||||
logger.warning("No listings found after parsing. Dumping HTML snippet for debugging:")
|
||||
logger.warning(content[:1000])
|
||||
logger.warning("⚠️ No listings parsed")
|
||||
|
||||
await page.close()
|
||||
logger.info(f"Fetched {len(listings)} unique listings")
|
||||
logger.info(f"📊 Fetched {len(listings)} listings")
|
||||
return listings
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching listings: {e}")
|
||||
import traceback
|
||||
logger.error(traceback.format_exc())
|
||||
logger.error(f"❌ Fetch error: {str(e)[:100]}")
|
||||
return []
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue