2025-12-27 11:59:04 +01:00
|
|
|
from datetime import datetime
|
|
|
|
|
from handlers.base_handler import BaseHandler
|
|
|
|
|
from handlers.howoge_handler import HowogeHandler
|
|
|
|
|
from handlers.gewobag_handler import GewobagHandler
|
|
|
|
|
from handlers.degewo_handler import DegewoHandler
|
|
|
|
|
from handlers.gesobau_handler import GesobauHandler
|
|
|
|
|
from handlers.stadtundland_handler import StadtUndLandHandler
|
|
|
|
|
from handlers.wbm_handler import WBMHandler
|
2025-12-28 19:59:31 +01:00
|
|
|
import json
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
import pandas as pd
|
|
|
|
|
from typing import Optional
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
import matplotlib.dates as mdates
|
|
|
|
|
import logging
|
|
|
|
|
import matplotlib
|
|
|
|
|
import matplotlib.font_manager as fm
|
|
|
|
|
import html
|
|
|
|
|
import re
|
|
|
|
|
import hashlib
|
|
|
|
|
import asyncio
|
|
|
|
|
from playwright.async_api import async_playwright
|
|
|
|
|
import os
|
|
|
|
|
|
|
|
|
|
STATE_FILE = Path("data/state.json")
|
|
|
|
|
APPLICATIONS_FILE = Path("data/applications.json")
|
|
|
|
|
TIMING_FILE = Path("data/timing.csv")
|
|
|
|
|
LISTINGS_FILE = Path("data/listings.json")
|
|
|
|
|
DATA_DIR = Path("data")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# --- Matplotlib Font Setup (for emoji support in plots) ---
|
|
|
|
|
font_cache_dir = Path("data/fonts")
|
|
|
|
|
font_cache_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
matplotlib.get_configdir = lambda: str(font_cache_dir)
|
|
|
|
|
fm.findSystemFonts(fontpaths=str(font_cache_dir), fontext='ttf')
|
|
|
|
|
matplotlib.rcParams['font.family'] = 'Noto Sans'
|
|
|
|
|
|
|
|
|
|
# Use the root logger for consistency with main.py
|
|
|
|
|
logger = logging.getLogger()
|
2025-12-27 11:59:04 +01:00
|
|
|
|
|
|
|
|
class ApplicationHandler:
|
2025-12-28 19:59:31 +01:00
|
|
|
"""
|
|
|
|
|
Main handler for apartment monitoring, application automation, and notification logic.
|
|
|
|
|
Handles browser automation, listing extraction, application delegation, and Telegram notifications.
|
|
|
|
|
"""
|
|
|
|
|
|
2025-12-29 22:46:10 +01:00
|
|
|
def __init__(self, browser_context, state_manager, applications_file: Optional[Path] = None):
|
|
|
|
|
if browser_context is None:
|
|
|
|
|
raise ValueError("browser_context must not be None. ApplicationHandler requires a valid Playwright context.")
|
2025-12-27 11:59:04 +01:00
|
|
|
self.context = browser_context
|
2025-12-28 19:59:31 +01:00
|
|
|
self.state_manager = state_manager
|
|
|
|
|
self.applications_file = applications_file or APPLICATIONS_FILE
|
2025-12-27 11:59:04 +01:00
|
|
|
self.handlers = {
|
|
|
|
|
"howoge": HowogeHandler(browser_context),
|
|
|
|
|
"gewobag": GewobagHandler(browser_context),
|
|
|
|
|
"degewo": DegewoHandler(browser_context),
|
|
|
|
|
"gesobau": GesobauHandler(browser_context),
|
|
|
|
|
"stadtundland": StadtUndLandHandler(browser_context),
|
|
|
|
|
"wbm": WBMHandler(browser_context),
|
|
|
|
|
}
|
|
|
|
|
|
2025-12-28 19:59:31 +01:00
|
|
|
def set_telegram_bot(self, telegram_bot):
|
|
|
|
|
"""Attach a TelegramBot instance for notifications."""
|
|
|
|
|
self.telegram_bot = telegram_bot
|
|
|
|
|
|
|
|
|
|
def notify_new_listings(self, new_listings: list[dict], application_results: Optional[dict] = None):
|
|
|
|
|
"""
|
|
|
|
|
Send a Telegram notification for each new listing.
|
|
|
|
|
Includes application result if autopilot was enabled.
|
|
|
|
|
"""
|
|
|
|
|
for listing in new_listings:
|
|
|
|
|
link = listing.get('link', 'https://www.inberlinwohnen.de/wohnungsfinder/')
|
|
|
|
|
company = self._detect_company(link)
|
2025-12-29 10:27:14 +01:00
|
|
|
if company == "wgcompany":
|
|
|
|
|
continue # skip WGCompany listings for main handler
|
2025-12-29 22:46:10 +01:00
|
|
|
|
2025-12-28 19:59:31 +01:00
|
|
|
company_label = company.capitalize() if company != "unknown" else "Wohnung"
|
|
|
|
|
message = (
|
2025-12-29 10:27:14 +01:00
|
|
|
f"\ud83c\udfe0 <b>[{company_label}] Neue Wohnung!</b>\n\n"
|
|
|
|
|
f"\ud83d\udeaa <b>{listing['rooms']}</b>\n"
|
|
|
|
|
f"\ud83d\udcd0 {listing['size']}\n"
|
|
|
|
|
f"\ud83d\udcb0 {listing['price']}\n"
|
|
|
|
|
f"\ud83d\udccd {listing['address']}\n\n"
|
|
|
|
|
f"\ud83d\udc49 <a href=\"{link}\">Alle Details</a>"
|
2025-12-28 19:59:31 +01:00
|
|
|
)
|
|
|
|
|
|
2025-12-29 10:27:14 +01:00
|
|
|
# Always show autopilot/apply status for clarity
|
|
|
|
|
if application_results is not None:
|
|
|
|
|
if listing["id"] in application_results:
|
|
|
|
|
result = application_results[listing["id"]]
|
|
|
|
|
if result["success"]:
|
|
|
|
|
message += f"\n\n\ud83e\udd16 <b>Auto-applied!</b> ({result['company']})"
|
|
|
|
|
if result["message"]:
|
|
|
|
|
message += f"\n<i>{result['message']}</i>"
|
|
|
|
|
else:
|
|
|
|
|
# Handler attempted but failed
|
|
|
|
|
fail_msg = result.get("message") or "Unknown error during application."
|
|
|
|
|
message += f"\n\n\u26a0\ufe0f <b>Auto-apply failed</b> ({result['company']})"
|
|
|
|
|
message += f"\n<b>Reason:</b> <i>{html.escape(fail_msg)}</i>"
|
2025-12-28 19:59:31 +01:00
|
|
|
else:
|
2025-12-29 10:27:14 +01:00
|
|
|
# Should not happen if logic is correct, but fallback
|
|
|
|
|
message += "\n\n\u2139\ufe0f <b>No application attempted (internal logic error)</b>"
|
|
|
|
|
else:
|
|
|
|
|
# Autopilot was off or not attempted at all
|
|
|
|
|
message += "\n\n\u2139\ufe0f <b>No application attempted (autopilot off)</b>"
|
2025-12-28 19:59:31 +01:00
|
|
|
|
|
|
|
|
# Send via TelegramBot if available
|
|
|
|
|
if hasattr(self, 'telegram_bot') and self.telegram_bot:
|
|
|
|
|
logger.info(f"Notifying Telegram: {listing['address']} ({listing['rooms']}, {listing['size']}, {listing['price']})")
|
|
|
|
|
self.telegram_bot._send_message(message)
|
|
|
|
|
else:
|
2025-12-29 22:46:10 +01:00
|
|
|
logger.info(f"[TELEGRAM] Would send message for: {listing['address']} ({listing['rooms']}, {listing['size']}, {listing['price']})")
|
2025-12-28 19:59:31 +01:00
|
|
|
|
|
|
|
|
async def apply_to_listings(self, listings: list[dict]) -> dict:
|
|
|
|
|
"""
|
|
|
|
|
Apply to multiple listings (autopilot mode).
|
|
|
|
|
Returns a dict of application results keyed by listing ID.
|
|
|
|
|
"""
|
|
|
|
|
results = {}
|
2025-12-29 22:46:10 +01:00
|
|
|
# Fail fast if context is ever None (should never happen)
|
|
|
|
|
if self.context is None:
|
|
|
|
|
raise RuntimeError("browser_context is None in apply_to_listings. This should never happen.")
|
2025-12-28 19:59:31 +01:00
|
|
|
for listing in listings:
|
|
|
|
|
if self.has_applied(listing["id"]):
|
|
|
|
|
logger.info(f"Already applied to {listing['id']} ({listing['address']}), skipping.")
|
|
|
|
|
continue
|
|
|
|
|
result = await self.apply(listing)
|
|
|
|
|
results[listing["id"]] = result
|
|
|
|
|
self.save_application(result)
|
|
|
|
|
status = "✅" if result["success"] else "❌"
|
|
|
|
|
logger.info(f"Application {status} for {listing['address']}: {result['message']}")
|
|
|
|
|
await asyncio.sleep(2)
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
|
2025-12-29 22:46:10 +01:00
|
|
|
|
2025-12-28 19:59:31 +01:00
|
|
|
def log_listing_times(self, new_listings: list[dict]):
|
|
|
|
|
"""
|
|
|
|
|
Log new listing appearance times to CSV for later analysis and pattern mining.
|
|
|
|
|
Appends to data/listing_times.csv, creating header if needed.
|
|
|
|
|
"""
|
|
|
|
|
if not new_listings:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
import csv
|
|
|
|
|
TIMING_FILE = Path("data/listing_times.csv")
|
|
|
|
|
file_exists = TIMING_FILE.exists()
|
|
|
|
|
|
|
|
|
|
with open(TIMING_FILE, "a", newline="", encoding="utf-8") as f:
|
|
|
|
|
writer = csv.writer(f)
|
|
|
|
|
if not file_exists:
|
|
|
|
|
writer.writerow(["timestamp", "weekday", "hour", "minute", "rooms", "size", "price", "address", "listing_id"])
|
|
|
|
|
|
|
|
|
|
now = datetime.now()
|
|
|
|
|
for listing in new_listings:
|
|
|
|
|
writer.writerow([
|
|
|
|
|
now.isoformat(),
|
|
|
|
|
now.strftime("%A"), # Weekday name
|
|
|
|
|
now.hour,
|
|
|
|
|
now.minute,
|
|
|
|
|
listing["rooms"],
|
|
|
|
|
listing["size"],
|
|
|
|
|
listing["price"],
|
|
|
|
|
listing["address"],
|
|
|
|
|
listing["id"]
|
|
|
|
|
])
|
|
|
|
|
|
|
|
|
|
logger.info(f"Logged {len(new_listings)} new listing times to CSV.")
|
|
|
|
|
|
2025-12-29 22:46:10 +01:00
|
|
|
# ...existing code...
|
2025-12-28 19:59:31 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
async def init_browser(self):
|
|
|
|
|
"""Initialize Playwright browser (minimal, like test script)"""
|
|
|
|
|
if not hasattr(self, 'browser') or self.browser is None:
|
|
|
|
|
self.playwright = await async_playwright().start()
|
|
|
|
|
self.browser = await self.playwright.chromium.launch(headless=True)
|
|
|
|
|
self.context = await self.browser.new_context(
|
|
|
|
|
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
|
|
|
|
|
)
|
|
|
|
|
logger.info("Browser initialized (minimal context)")
|
|
|
|
|
self.application_handler = ApplicationHandler(self.context, self.state_manager)
|
|
|
|
|
|
|
|
|
|
|
2025-12-27 11:59:04 +01:00
|
|
|
async def apply(self, listing: dict) -> dict:
|
|
|
|
|
company = self._detect_company(listing.get("link", ""))
|
|
|
|
|
handler = self.handlers.get(company)
|
|
|
|
|
result = {
|
|
|
|
|
"listing_id": listing.get("id"),
|
|
|
|
|
"company": company,
|
|
|
|
|
"link": listing.get("link"),
|
|
|
|
|
"timestamp": datetime.now().isoformat(),
|
|
|
|
|
"success": False,
|
|
|
|
|
"message": "",
|
|
|
|
|
"address": listing.get("address", ""),
|
|
|
|
|
"rooms": listing.get("rooms", ""),
|
|
|
|
|
"price": listing.get("price", "")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if handler:
|
|
|
|
|
result = await handler.apply(listing, result)
|
|
|
|
|
else:
|
|
|
|
|
result["message"] = f"No handler found for company: {company}"
|
|
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
2025-12-28 19:59:31 +01:00
|
|
|
|
2025-12-27 11:59:04 +01:00
|
|
|
def _detect_company(self, link: str) -> str:
|
2025-12-28 19:59:31 +01:00
|
|
|
"""Robust company detection logic, matching monitor.py as closely as possible."""
|
|
|
|
|
link = (link or "").lower()
|
|
|
|
|
# Remove URL scheme and www for easier matching
|
|
|
|
|
link = re.sub(r"^https?://(www\.)?", "", link)
|
|
|
|
|
# Use domain-based matching, including subdomains
|
|
|
|
|
if re.search(r"howoge\\.de", link):
|
|
|
|
|
return "howoge"
|
|
|
|
|
if re.search(r"gewobag\\.de", link):
|
|
|
|
|
return "gewobag"
|
|
|
|
|
if re.search(r"degewo\\.de", link):
|
|
|
|
|
return "degewo"
|
|
|
|
|
if re.search(r"gesobau\\.de", link):
|
|
|
|
|
return "gesobau"
|
|
|
|
|
if re.search(r"stadt-und-land\\.de|stadtundland\\.de", link):
|
|
|
|
|
return "stadtundland"
|
|
|
|
|
if re.search(r"wbm\\.de", link):
|
|
|
|
|
return "wbm"
|
|
|
|
|
# Also check for company in the path or query (legacy/edge cases)
|
|
|
|
|
if re.search(r"howoge", link):
|
|
|
|
|
return "howoge"
|
|
|
|
|
if re.search(r"gewobag", link):
|
|
|
|
|
return "gewobag"
|
|
|
|
|
if re.search(r"degewo", link):
|
|
|
|
|
return "degewo"
|
|
|
|
|
if re.search(r"gesobau", link):
|
|
|
|
|
return "gesobau"
|
|
|
|
|
if re.search(r"stadt-und-land|stadtundland", link):
|
|
|
|
|
return "stadtundland"
|
|
|
|
|
if re.search(r"wbm", link):
|
|
|
|
|
return "wbm"
|
|
|
|
|
return "unknown"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_state(self) -> dict:
|
|
|
|
|
"""Load persistent state"""
|
|
|
|
|
if STATE_FILE.exists():
|
|
|
|
|
with open(STATE_FILE, "r") as f:
|
|
|
|
|
return json.load(f)
|
|
|
|
|
return {"autopilot": False}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def save_state(self, state: dict):
|
|
|
|
|
"""Save persistent state"""
|
|
|
|
|
with open(STATE_FILE, "w") as f:
|
|
|
|
|
json.dump(state, f, indent=2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def set_autopilot(self, enabled: bool):
|
|
|
|
|
"""Enable or disable autopilot mode"""
|
|
|
|
|
self.state_manager.set_autopilot(enabled)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def is_autopilot_enabled(self) -> bool:
|
|
|
|
|
"""Check if autopilot mode is enabled"""
|
|
|
|
|
return self.state_manager.is_autopilot_enabled()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_applications(self) -> dict:
|
|
|
|
|
"""Load application history."""
|
|
|
|
|
if self.applications_file.exists():
|
|
|
|
|
try:
|
|
|
|
|
with open(self.applications_file, "r", encoding="utf-8") as f:
|
|
|
|
|
return json.load(f)
|
|
|
|
|
except json.JSONDecodeError:
|
|
|
|
|
logger.error("Failed to decode applications file. Returning empty history.")
|
|
|
|
|
return {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def save_application(self, result: dict):
|
|
|
|
|
"""Save an application result."""
|
|
|
|
|
applications = self.load_applications()
|
|
|
|
|
applications[result["listing_id"]] = result
|
|
|
|
|
with open(self.applications_file, "w", encoding="utf-8") as f:
|
|
|
|
|
json.dump(applications, f, indent=2, ensure_ascii=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def has_applied(self, listing_id: str) -> bool:
|
|
|
|
|
"""Check if we've already applied to this listing."""
|
|
|
|
|
return listing_id in self.load_applications()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_previous_listings(self) -> dict:
|
|
|
|
|
"""Load previously saved listings"""
|
|
|
|
|
if LISTINGS_FILE.exists():
|
|
|
|
|
with open(LISTINGS_FILE, "r") as f:
|
|
|
|
|
return json.load(f)
|
|
|
|
|
return {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def save_listings(self, listings: list[dict]):
|
|
|
|
|
"""Save current listings"""
|
|
|
|
|
listings_dict = {l["id"]: l for l in listings}
|
|
|
|
|
with open(LISTINGS_FILE, "w") as f:
|
|
|
|
|
json.dump(listings_dict, f, indent=2, ensure_ascii=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def find_new_listings(self, current: list[dict], previous: dict) -> list[dict]:
|
|
|
|
|
"""Find listings that are new since last check"""
|
|
|
|
|
new = []
|
|
|
|
|
for listing in current:
|
|
|
|
|
if listing["id"] not in previous:
|
|
|
|
|
new.append(listing)
|
|
|
|
|
return new
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _generate_weekly_plot(self) -> str:
|
2025-12-29 22:46:10 +01:00
|
|
|
"""Generate a heatmap of listings by day of week and hour. Always returns a plot path, even if no data."""
|
|
|
|
|
plot_path = DATA_DIR / "weekly_plot.png"
|
2025-12-28 19:59:31 +01:00
|
|
|
try:
|
2025-12-29 22:46:10 +01:00
|
|
|
if not TIMING_FILE.exists():
|
|
|
|
|
logger.warning("No timing file found for weekly plot. Generating empty plot.")
|
|
|
|
|
# Generate empty plot
|
|
|
|
|
fig, ax = plt.subplots(figsize=(10, 6))
|
|
|
|
|
ax.set_xticks(range(24))
|
|
|
|
|
ax.set_yticks(range(7))
|
|
|
|
|
ax.set_xticklabels([f"{h}:00" for h in range(24)], rotation=90)
|
|
|
|
|
ax.set_yticklabels(["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"])
|
|
|
|
|
ax.set_title("Listings Heatmap (No Data)")
|
|
|
|
|
ax.text(0.5, 0.5, "No data available", fontsize=18, ha='center', va='center', transform=ax.transAxes, color='gray')
|
|
|
|
|
plt.savefig(plot_path)
|
|
|
|
|
plt.close(fig)
|
|
|
|
|
return str(plot_path)
|
|
|
|
|
|
2025-12-28 19:59:31 +01:00
|
|
|
df = pd.read_csv(TIMING_FILE, parse_dates=["timestamp"])
|
2025-12-29 22:46:10 +01:00
|
|
|
if df.empty:
|
|
|
|
|
logger.warning("Timing file is empty. Generating empty plot.")
|
|
|
|
|
fig, ax = plt.subplots(figsize=(10, 6))
|
|
|
|
|
ax.set_xticks(range(24))
|
|
|
|
|
ax.set_yticks(range(7))
|
|
|
|
|
ax.set_xticklabels([f"{h}:00" for h in range(24)], rotation=90)
|
|
|
|
|
ax.set_yticklabels(["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"])
|
|
|
|
|
ax.set_title("Listings Heatmap (No Data)")
|
|
|
|
|
ax.text(0.5, 0.5, "No data available", fontsize=18, ha='center', va='center', transform=ax.transAxes, color='gray')
|
|
|
|
|
plt.savefig(plot_path)
|
|
|
|
|
plt.close(fig)
|
|
|
|
|
return str(plot_path)
|
|
|
|
|
|
2025-12-28 19:59:31 +01:00
|
|
|
df["day_of_week"] = df["timestamp"].dt.dayofweek
|
|
|
|
|
df["hour"] = df["timestamp"].dt.hour
|
|
|
|
|
heatmap_data = df.groupby(["day_of_week", "hour"]).size().unstack(fill_value=0)
|
|
|
|
|
|
|
|
|
|
fig, ax = plt.subplots(figsize=(10, 6))
|
|
|
|
|
cax = ax.matshow(heatmap_data, cmap="YlGnBu", aspect="auto")
|
|
|
|
|
fig.colorbar(cax)
|
|
|
|
|
|
|
|
|
|
ax.set_xticks(range(24))
|
|
|
|
|
ax.set_yticks(range(7))
|
|
|
|
|
ax.set_xticklabels([f"{h}:00" for h in range(24)], rotation=90)
|
|
|
|
|
ax.set_yticklabels(["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"])
|
|
|
|
|
|
|
|
|
|
ax.set_title("Listings Heatmap (Day of Week vs Hour)")
|
|
|
|
|
|
|
|
|
|
plt.savefig(plot_path)
|
|
|
|
|
plt.close(fig)
|
|
|
|
|
logger.info(f"Weekly plot saved to {plot_path}")
|
|
|
|
|
return str(plot_path)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"Failed to generate weekly plot: {e}")
|
2025-12-29 22:46:10 +01:00
|
|
|
# Always generate a fallback empty plot
|
|
|
|
|
fig, ax = plt.subplots(figsize=(10, 6))
|
|
|
|
|
ax.set_xticks(range(24))
|
|
|
|
|
ax.set_yticks(range(7))
|
|
|
|
|
ax.set_xticklabels([f"{h}:00" for h in range(24)], rotation=90)
|
|
|
|
|
ax.set_yticklabels(["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"])
|
|
|
|
|
ax.set_title("Listings Heatmap (Error)")
|
|
|
|
|
ax.text(0.5, 0.5, "Plot error", fontsize=18, ha='center', va='center', transform=ax.transAxes, color='red')
|
|
|
|
|
plt.savefig(plot_path)
|
|
|
|
|
plt.close(fig)
|
|
|
|
|
return str(plot_path)
|
2025-12-28 19:59:31 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def _generate_error_rate_plot(self):
|
|
|
|
|
"""Read applications.json and produce a plot image + summary text.
|
|
|
|
|
|
|
|
|
|
Returns (plot_path, summary_text) or (None, "") if insufficient data.
|
|
|
|
|
"""
|
|
|
|
|
if not self.applications_file.exists():
|
|
|
|
|
logger.warning("No applications.json found for errorrate plot")
|
|
|
|
|
return None, ""
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
with open(self.applications_file, 'r', encoding='utf-8') as f:
|
|
|
|
|
apps = json.load(f)
|
|
|
|
|
|
|
|
|
|
if not apps:
|
|
|
|
|
logger.warning("No application data available for errorrate plot")
|
|
|
|
|
return None, ""
|
|
|
|
|
|
|
|
|
|
# Convert to DataFrame
|
|
|
|
|
rows = []
|
|
|
|
|
for _id, rec in apps.items():
|
|
|
|
|
rows.append({
|
|
|
|
|
"id": _id,
|
|
|
|
|
"ts": pd.to_datetime(rec.get("timestamp")),
|
|
|
|
|
"success": rec.get("success", False),
|
|
|
|
|
"company": rec.get("company", "unknown")
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
df = pd.DataFrame(rows)
|
|
|
|
|
df = df.dropna(subset=['ts'])
|
|
|
|
|
if df.empty:
|
|
|
|
|
logger.warning("No valid data for errorrate plot")
|
|
|
|
|
return None, ""
|
|
|
|
|
|
|
|
|
|
df['date'] = df['ts'].dt.floor('D')
|
|
|
|
|
grouped = df.groupby('date').agg(total=('id','count'), successes=('success', lambda x: x.sum()))
|
|
|
|
|
grouped['failures'] = grouped['total'] - grouped['successes']
|
|
|
|
|
grouped['error_rate'] = grouped['failures'] / grouped['total']
|
|
|
|
|
|
|
|
|
|
# Ensure index is sorted by date for plotting
|
|
|
|
|
grouped = grouped.sort_index()
|
|
|
|
|
|
|
|
|
|
# Prepare plot
|
|
|
|
|
fig, ax = plt.subplots(figsize=(10, 6))
|
|
|
|
|
ax.plot(grouped.index, grouped['error_rate'], marker='o', color='red', label='Error Rate')
|
|
|
|
|
ax.set_title('Autopilot Error Rate Over Time')
|
|
|
|
|
ax.set_xlabel('Date')
|
|
|
|
|
ax.set_ylabel('Error Rate')
|
|
|
|
|
ax.legend()
|
|
|
|
|
ax.grid(True)
|
|
|
|
|
|
|
|
|
|
# Save plot to the same directory as the applications file
|
|
|
|
|
plot_path = self.applications_file.parent / 'error_rate.png'
|
|
|
|
|
plt.savefig(plot_path)
|
|
|
|
|
plt.close(fig)
|
|
|
|
|
|
|
|
|
|
# Summary
|
|
|
|
|
total_attempts = int(grouped['total'].sum())
|
|
|
|
|
total_success = int(grouped['successes'].sum())
|
|
|
|
|
total_fail = int(grouped['failures'].sum())
|
|
|
|
|
overall_error = (total_fail / total_attempts) if total_attempts > 0 else 0.0
|
|
|
|
|
summary = f"<b>Total attempts:</b> {total_attempts}\n<b>Successes:</b> {total_success}\n<b>Failures:</b> {total_fail}\n<b>Overall error rate:</b> {overall_error:.1%}"
|
|
|
|
|
|
|
|
|
|
return plot_path, summary
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.exception(f"Failed to generate error rate plot: {e}")
|
|
|
|
|
return None, ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def login(self, page):
|
|
|
|
|
"""Login to inberlinwohnen.de (minimal, like test script)"""
|
|
|
|
|
if not self.state_manager.email or not self.state_manager.password:
|
|
|
|
|
logger.warning("No credentials provided. Ensure INBERLIN_EMAIL and INBERLIN_PASSWORD are set in the environment.")
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
logger.info("Navigating to login page...")
|
|
|
|
|
login_response = await page.goto("https://www.inberlinwohnen.de/login", wait_until="networkidle")
|
|
|
|
|
logger.info(f"Login page status: {login_response.status if login_response else 'No response'}")
|
|
|
|
|
await asyncio.sleep(2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Dismiss cookie/privacy modal before login
|
|
|
|
|
logger.info("Attempting to dismiss cookie/privacy modal before login...")
|
|
|
|
|
await self.dismiss_cookie_modal(page)
|
|
|
|
|
logger.info("Cookie/privacy modal dismissed.")
|
|
|
|
|
|
|
|
|
|
# Fill login form (if present)
|
|
|
|
|
logger.info("Filling in login credentials...")
|
|
|
|
|
await page.fill('input[name="email"], input[type="email"]', self.state_manager.email)
|
|
|
|
|
await page.fill('input[name="password"], input[type="password"]', self.state_manager.password)
|
|
|
|
|
logger.info("Login credentials filled.")
|
|
|
|
|
|
|
|
|
|
# Click submit button
|
|
|
|
|
logger.info("Submitting login form...")
|
|
|
|
|
submit_response = await page.click('button[type="submit"], input[type="submit"]', timeout=30000)
|
|
|
|
|
logger.info(f"Clicked submit, waiting for navigation...")
|
|
|
|
|
try:
|
|
|
|
|
await page.wait_for_load_state("networkidle", timeout=30000)
|
|
|
|
|
logger.info(f"After login, page url: {page.url}")
|
|
|
|
|
logger.info(f"After login, page content length: {len(await page.content())}")
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"Timeout or error after login submit: {e}")
|
|
|
|
|
await asyncio.sleep(2)
|
|
|
|
|
|
|
|
|
|
# Check if login successful
|
|
|
|
|
logger.info("Checking if login was successful...")
|
|
|
|
|
if "mein-bereich" in page.url or await page.query_selector('text="Abmelden"'):
|
|
|
|
|
logger.info("Login successful.")
|
|
|
|
|
return True
|
|
|
|
|
else:
|
|
|
|
|
logger.error(f"Login failed - ended up at {page.url}")
|
|
|
|
|
return False
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"Login error: {e}")
|
|
|
|
|
logger.debug("Exception occurred during login", exc_info=True)
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def fetch_listings(self) -> list[dict]:
|
|
|
|
|
"""Fetch listings from the Wohnungsfinder"""
|
|
|
|
|
listings = []
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
|
|
page = await self.context.new_page()
|
|
|
|
|
|
|
|
|
|
# Attempt login if not already logged in
|
|
|
|
|
if not self.state_manager.logged_in:
|
|
|
|
|
login_success = await self.login(page)
|
|
|
|
|
if login_success:
|
|
|
|
|
self.state_manager.logged_in = True
|
|
|
|
|
else:
|
|
|
|
|
logger.warning("Login failed. Proceeding with public listings.")
|
|
|
|
|
|
|
|
|
|
# Select the correct URL after login check
|
|
|
|
|
if self.state_manager.logged_in:
|
|
|
|
|
url = "https://www.inberlinwohnen.de/mein-bereich/wohnungsfinder"
|
|
|
|
|
else:
|
|
|
|
|
url = "https://www.inberlinwohnen.de/wohnungsfinder/"
|
|
|
|
|
|
|
|
|
|
logger.info(f"Fetching listings from {url}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Navigate to the page with a longer wait condition for slow internet
|
|
|
|
|
logger.info("Navigating to listings page with extended timeout...")
|
|
|
|
|
await page.goto(url, wait_until="networkidle", timeout=20000)
|
|
|
|
|
|
|
|
|
|
# Check if the page is a download
|
|
|
|
|
if "download" in page.url or page.url.endswith(".pdf"):
|
|
|
|
|
logger.error("Page redirected to a download. Aborting.")
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
# Handle cookie modal if not logged in
|
|
|
|
|
if not self.state_manager.logged_in:
|
|
|
|
|
await self.dismiss_cookie_modal(page)
|
|
|
|
|
|
|
|
|
|
# Wait a short time for the page to render, but do not block on any selector
|
|
|
|
|
await asyncio.sleep(2)
|
|
|
|
|
|
|
|
|
|
# Collect all listings content by clicking through pagination
|
|
|
|
|
all_content = ""
|
|
|
|
|
page_num = 1
|
|
|
|
|
max_pages = 10 # Safety limit
|
|
|
|
|
|
|
|
|
|
while page_num <= max_pages:
|
|
|
|
|
# Get current page content
|
|
|
|
|
current_content = await page.content()
|
|
|
|
|
all_content += current_content
|
|
|
|
|
|
|
|
|
|
# Check for "next page" button (Livewire pagination)
|
|
|
|
|
next_btn = await page.query_selector('[wire\\:click*="nextPage"]')
|
|
|
|
|
if next_btn and await next_btn.is_visible():
|
|
|
|
|
await next_btn.click()
|
|
|
|
|
await asyncio.sleep(2) # Wait for Livewire to update
|
|
|
|
|
page_num += 1
|
|
|
|
|
else:
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
logger.info(f"Collected content from {page_num} page(s)")
|
|
|
|
|
content = all_content
|
|
|
|
|
|
|
|
|
|
# Debug: save HTML to file for inspection
|
|
|
|
|
debug_path = DATA_DIR / "debug_page.html"
|
|
|
|
|
with open(debug_path, "w", encoding="utf-8") as f:
|
|
|
|
|
f.write(content)
|
|
|
|
|
logger.info(f"Saved debug HTML to {debug_path}")
|
|
|
|
|
|
|
|
|
|
# Debug: Log page title and check for listing count
|
|
|
|
|
count_match = re.search(r'(\\d+)\\s*Wohnungen? für Sie gefunden', content)
|
|
|
|
|
if count_match:
|
|
|
|
|
logger.info(f"Page shows {count_match.group(1)} listings available")
|
|
|
|
|
|
|
|
|
|
# Also check for "Zeige X bis Y von Z Angeboten"
|
|
|
|
|
show_match = re.search(r'Zeige \\d+ bis \\d+ von (\\d+) Angeboten', content)
|
|
|
|
|
if show_match:
|
|
|
|
|
logger.info(f"Page shows {show_match.group(1)} total offers")
|
|
|
|
|
|
|
|
|
|
# Decode HTML entities and JSON escaped slashes for extraction
|
|
|
|
|
content_decoded = html.unescape(content)
|
|
|
|
|
content_decoded = content_decoded.replace('\\/', '/')
|
|
|
|
|
|
|
|
|
|
# Build flatId -> deeplink mapping from wire:snapshot JSON data (monitor.py logic)
|
|
|
|
|
# Format in HTML: "deeplink":"https://...","flatId":12345
|
|
|
|
|
deeplink_pattern = r'"deeplink":"(https://[^"]+)","flatId":(\d+)'
|
|
|
|
|
deeplink_matches = re.findall(deeplink_pattern, content_decoded)
|
|
|
|
|
# Use string keys for flatId to match button extraction
|
|
|
|
|
id_to_link = {str(flat_id): link for link, flat_id in deeplink_matches}
|
|
|
|
|
logger.info(f"Found {len(id_to_link)} deeplink mappings")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# --- Extraction logic copied from monitor.py for robustness ---
|
|
|
|
|
# Extract listings from button elements with aria-label
|
|
|
|
|
# Format: @click="open !== 12345 ..." aria-label="Wohnungsangebot - 2,0 Zimmer, 53,01 m², 494,38 € Kaltmiete | Adresse"
|
|
|
|
|
button_pattern = r'@click="open !== (\d+)[^\"]*"[^>]*aria-label="Wohnungsangebot - ([^"]+)'
|
|
|
|
|
button_matches = re.findall(button_pattern, content_decoded)
|
|
|
|
|
logger.info(f"Found {len(button_matches)} listing buttons (monitor.py pattern)")
|
|
|
|
|
|
|
|
|
|
for flat_id, listing_text in button_matches:
|
|
|
|
|
# Parse listing text: "2,0 Zimmer, 53,01 m², 494,38 € Kaltmiete | Rhinstraße 4, 10315 Lichtenberg"
|
|
|
|
|
parts_match = re.match(r'(\d,\d)\s*Zimmer,\s*([\d,.]+)\s*m²,\s*([\d.,]+)\s*€\s*(?:Kaltmiete)?\s*\|\s*(.+)', listing_text)
|
|
|
|
|
if not parts_match:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
rooms, size, price, address = parts_match.groups()
|
|
|
|
|
rooms = rooms.strip()
|
|
|
|
|
address = address.strip()
|
|
|
|
|
|
|
|
|
|
if len(address) < 5:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# Get the deeplink for this flat (monitor.py logic: flat_id as string)
|
|
|
|
|
detail_link = id_to_link.get(str(flat_id), url)
|
|
|
|
|
|
|
|
|
|
listing_id = hashlib.md5(f"{rooms}{size}{price}{address}".encode()).hexdigest()[:12]
|
|
|
|
|
|
|
|
|
|
listings.append({
|
|
|
|
|
"id": listing_id,
|
|
|
|
|
"rooms": f"{rooms} Zimmer",
|
|
|
|
|
"size": f"{size} m²",
|
|
|
|
|
"price": f"{price} €",
|
|
|
|
|
"address": address,
|
|
|
|
|
"link": detail_link,
|
|
|
|
|
"fetched_at": datetime.now().isoformat()
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
# Deduplicate by id
|
|
|
|
|
seen_ids = set()
|
|
|
|
|
unique_listings = []
|
|
|
|
|
for listing in listings:
|
|
|
|
|
if listing["id"] not in seen_ids:
|
|
|
|
|
seen_ids.add(listing["id"])
|
|
|
|
|
unique_listings.append(listing)
|
|
|
|
|
listings = unique_listings
|
|
|
|
|
|
|
|
|
|
if not listings:
|
|
|
|
|
logger.warning("No listings found after parsing. Dumping HTML snippet for debugging:")
|
|
|
|
|
logger.warning(content[:1000])
|
|
|
|
|
|
|
|
|
|
await page.close()
|
|
|
|
|
logger.info(f"Fetched {len(listings)} unique listings")
|
|
|
|
|
return listings
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"Error fetching listings: {e}")
|
|
|
|
|
import traceback
|
|
|
|
|
logger.error(traceback.format_exc())
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def dismiss_cookie_modal(self, page):
|
|
|
|
|
"""Dismiss the privacy/cookie consent modal if present"""
|
|
|
|
|
try:
|
|
|
|
|
# Wait a bit for modal to appear
|
|
|
|
|
await asyncio.sleep(2)
|
|
|
|
|
|
|
|
|
|
# Try to find and click the accept button in the privacy modal
|
|
|
|
|
# Look for common accept button patterns in German
|
|
|
|
|
accept_selectors = [
|
|
|
|
|
'button:has-text("Akzeptieren")',
|
|
|
|
|
'button:has-text("Alle akzeptieren")',
|
|
|
|
|
'button:has-text("Accept")',
|
|
|
|
|
'button:has-text("Zustimmen")',
|
|
|
|
|
'[x-show="showPrivacyModal"] button',
|
|
|
|
|
'.privacy-modal button',
|
|
|
|
|
'button.accept-cookies',
|
|
|
|
|
# More specific to inberlinwohnen
|
|
|
|
|
'div[x-show="showPrivacyModal"] button:first-of-type',
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
for selector in accept_selectors:
|
|
|
|
|
try:
|
|
|
|
|
button = await page.query_selector(selector)
|
|
|
|
|
if button and await button.is_visible():
|
|
|
|
|
await button.click()
|
|
|
|
|
logger.info(f"Clicked cookie accept button: {selector}")
|
|
|
|
|
await asyncio.sleep(1)
|
|
|
|
|
return True
|
|
|
|
|
except:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# Try clicking any visible button in the modal overlay
|
|
|
|
|
modal = await page.query_selector('div[x-show="showPrivacyModal"]')
|
|
|
|
|
if modal:
|
|
|
|
|
buttons = await modal.query_selector_all('button')
|
|
|
|
|
for btn in buttons:
|
|
|
|
|
if await btn.is_visible():
|
|
|
|
|
text = await btn.inner_text()
|
|
|
|
|
logger.info(f"Found modal button: {text}")
|
|
|
|
|
# Click the first button (usually accept)
|
|
|
|
|
await btn.click()
|
|
|
|
|
await asyncio.sleep(1)
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
logger.info("No cookie modal found or already dismissed")
|
|
|
|
|
return False
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.debug(f"Cookie modal handling: {e}")
|
|
|
|
|
return False
|