This commit is contained in:
Aron Petau 2026-01-01 15:27:25 +01:00
parent d596ed7e19
commit aa6626d80d
21 changed files with 1051 additions and 333 deletions

View file

@ -1,8 +1,11 @@
from .base_handler import BaseHandler
import logging
import asyncio
from pathlib import Path
logger = logging.getLogger(__name__)
DATA_DIR = Path("data/gesobau")
DATA_DIR.mkdir(parents=True, exist_ok=True)
class GesobauHandler(BaseHandler):
def __init__(self, browser_context):
@ -34,7 +37,7 @@ class GesobauHandler(BaseHandler):
# Save HTML after modal handling for debugging
try:
html_content = await page.content()
with open("data/gesobau_debug.html", "w", encoding="utf-8") as f:
with open(DATA_DIR / "gesobau_debug.html", "w", encoding="utf-8") as f:
f.write(html_content)
except Exception as e:
logger.debug(f"[GESOBAU] Debug HTML not saved: {e}")

View file

@ -1,8 +1,11 @@
from .base_handler import BaseHandler
import logging
import asyncio
from pathlib import Path
logger = logging.getLogger(__name__)
DATA_DIR = Path("data/gewobag")
DATA_DIR.mkdir(parents=True, exist_ok=True)
class GewobagHandler(BaseHandler):
def __init__(self, browser_context):
@ -33,7 +36,7 @@ class GewobagHandler(BaseHandler):
# Save HTML after modal handling for debugging
try:
html_content = await page.content()
with open("data/gewobag_debug.html", "w", encoding="utf-8") as f:
with open(DATA_DIR / "gewobag_debug.html", "w", encoding="utf-8") as f:
f.write(html_content)
except Exception as e:
logger.warning(f"[GEWOBAG] Could not save debug HTML: {e}")

View file

@ -14,7 +14,8 @@ FORM_PLZ = os.environ.get("FORM_PLZ", "")
FORM_ORT = os.environ.get("FORM_ORT", "")
FORM_PHONE = os.environ.get("FORM_PHONE", "")
FORM_EMAIL = os.environ.get("FORM_EMAIL", "")
DATA_DIR = Path(os.environ.get("DATA_DIR", "data"))
DATA_DIR = Path("data/stadtundland")
DATA_DIR.mkdir(parents=True, exist_ok=True)
logger = logging.getLogger(__name__)

View file

@ -1,8 +1,11 @@
from .base_handler import BaseHandler
import logging
import asyncio
from pathlib import Path
logger = logging.getLogger(__name__)
DATA_DIR = Path("data/wbm")
DATA_DIR.mkdir(parents=True, exist_ok=True)
class WBMHandler(BaseHandler):
def __init__(self, browser_context):
@ -23,7 +26,7 @@ class WBMHandler(BaseHandler):
# Save HTML after modal handling for debugging
try:
html_content = await page.content()
with open("data/wbm_debug.html", "w", encoding="utf-8") as f:
with open(DATA_DIR / "wbm_debug.html", "w", encoding="utf-8") as f:
f.write(html_content)
except Exception as e:
logger.warning(f"[WBM] Could not save debug HTML: {e}")

View file

@ -32,10 +32,8 @@ class WGCompanyNotifier:
if self.browser is None:
self.playwright = await async_playwright().start()
self.browser = await self.playwright.chromium.launch(headless=True)
self.context = await self.browser.new_context(
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
)
logger.info("[WGCOMPANY] Browser initialized")
self.context = await self.browser.new_context()
logger.debug("[WG] Browser ready")
async def fetch_listings(self):
await self.init_browser()
@ -134,28 +132,27 @@ class WGCompanyNotifier:
return []
def load_previous_listings(self):
if WGCOMPANY_LISTINGS_FILE.exists():
with open(WGCOMPANY_LISTINGS_FILE, "r") as f:
if self.listings_file.exists():
with open(self.listings_file, 'r') as f:
data = json.load(f)
logger.info(f"[WGCOMPANY] Loaded {len(data)} previous listings from file. IDs: {list(data.keys())[:10]}{'...' if len(data) > 10 else ''}")
logger.debug(f"[WG] Loaded {len(data)} previous listings")
return data
logger.info("[WGCOMPANY] No previous listings file found.")
return {}
def save_listings(self, listings):
listings_dict = {l["id"]: l for l in listings}
logger.info(f"[WGCOMPANY] Saving {len(listings_dict)} listings to file. IDs: {list(listings_dict.keys())[:10]}{'...' if len(listings_dict) > 10 else ''}")
with open(WGCOMPANY_LISTINGS_FILE, "w") as f:
def save_listings(self, listings: list[dict]) -> None:
listings_dict = {l['id']: l for l in listings}
logger.debug(f"[WG] Saving {len(listings_dict)} listings")
with open(self.listings_file, 'w') as f:
json.dump(listings_dict, f, indent=2, ensure_ascii=False)
def find_new_listings(self, current, previous):
current_ids = [l["id"] for l in current]
previous_ids = list(previous.keys())
logger.info(f"[WGCOMPANY] Current listing IDs: {current_ids[:10]}{'...' if len(current_ids) > 10 else ''}")
logger.info(f"[WGCOMPANY] Previous listing IDs: {previous_ids[:10]}{'...' if len(previous_ids) > 10 else ''}")
new_listings = [l for l in current if l["id"] not in previous]
logger.info(f"[WGCOMPANY] Detected {len(new_listings)} new listings (not in previous)")
return new_listings
def find_new_listings(self, current: list[dict], previous: dict) -> list[dict]:
new = []
for listing in current:
if listing['id'] not in previous:
new.append(listing)
if new:
logger.info(f"[WG] 🏠 {len(new)} new listing{'s' if len(new) > 1 else ''} detected")
return new
def log_listing_times(self, new_listings):
if not new_listings:
@ -177,29 +174,29 @@ class WGCompanyNotifier:
listing["size"],
listing["price"],
listing["address"],
listing["id"]
listing['id']
])
logger.info(f"[WGCOMPANY] Logged {len(new_listings)} listing times to CSV")
logger.debug(f"[WG] Logged {len(new_listings)} to CSV")
async def notify_new_listings(self, new_listings):
async def notify_new_listings(self, new_listings: list[dict]) -> None:
if not new_listings or not self.telegram_bot:
logger.info("[WGCOMPANY] No new listings to notify or Telegram bot not set.")
return
logger.info(f"[WGCOMPANY] Notifying {len(new_listings)} new listing(s) via Telegram")
for idx, listing in enumerate(new_listings, 1):
for idx, listing in enumerate(new_listings, start=1):
try:
logger.info(f"[WGCOMPANY] Sending listing {idx}/{len(new_listings)}: {listing['link']} | {listing['rooms']} | {listing['size']} | {listing['price']} | {listing['address']}")
message = f"<b>[WGCOMPANY]</b> <a href=\"{listing['link']}\">{listing['link']}</a>\n"
message += f"🚪 <b>{listing['rooms']}</b>\n"
message += f"📐 {listing['size']}\n"
message += f"💰 {listing['price']}\n"
message += f"📍 {listing['address']}"
await self.telegram_bot._send_message(message)
message = (
f"🏠 <b>[WG-Company] Neues WG-Zimmer!</b>\n\n"
f"🚪 <b>{listing['rooms']}</b>\n"
f"📏 {listing['size']}\n"
f"💰 {listing['price']}\n"
f"📍 {listing['address']}\n\n"
f"👉 <a href=\"{listing['link']}\">Zum Angebot</a>"
)
loop = self.telegram_bot.event_loop or asyncio.get_event_loop()
asyncio.run_coroutine_threadsafe(self.telegram_bot._send_message(message), loop)
await asyncio.sleep(0.5)
except Exception as e:
logger.error(f"[WGCOMPANY] Error sending Telegram message for listing {idx}/{len(new_listings)}: {e}")
import traceback
logger.error(traceback.format_exc())
logger.error(f"[WG] ❌ Telegram failed for listing {idx}: {str(e)[:50]}")
async def run(self):
await self.init_browser()