roughly working again, now dev docker exists

This commit is contained in:
Aron Petau 2025-12-28 19:59:31 +01:00
parent a77a0c0393
commit 155ab39368
26 changed files with 1976 additions and 235 deletions

View file

@ -1,12 +1,23 @@
from abc import ABC, abstractmethod
from playwright.async_api import Page
import logging
import asyncio
import html
import re
import hashlib
from datetime import datetime
import traceback
from pathlib import Path
logger = logging.getLogger(__name__)
DATA_DIR = Path("data")
class BaseHandler(ABC):
def __init__(self, context):
def __init__(self, context, email=None, password=None):
self.context = context
self.email = email
self.password = password
@abstractmethod
async def apply(self, listing: dict, result: dict) -> dict:
@ -16,11 +27,18 @@ class BaseHandler(ABC):
async def handle_cookies(self, page: Page):
"""Handle cookie banners if present."""
try:
cookie_btn = await page.query_selector('button:has-text("Akzeptieren"), button:has-text("Alle akzeptieren")')
if cookie_btn and await cookie_btn.is_visible():
await cookie_btn.click()
logger.info("[BaseHandler] Dismissed cookie banner")
await asyncio.sleep(1)
cookie_selectors = [
'button:has-text("Akzeptieren")',
'button:has-text("Alle akzeptieren")',
'#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll'
]
for sel in cookie_selectors:
cookie_btn = await page.query_selector(sel)
if cookie_btn and await cookie_btn.is_visible():
await cookie_btn.click()
logger.info("[BaseHandler] Dismissed cookie banner")
await asyncio.sleep(1)
break
except Exception as e:
logger.warning(f"[BaseHandler] Failed to handle cookies: {e}")
@ -39,4 +57,181 @@ class BaseHandler(ABC):
await asyncio.sleep(1)
break
except Exception as e:
logger.warning(f"[BaseHandler] Failed to handle consent manager: {e}")
logger.warning(f"[BaseHandler] Failed to handle consent manager: {e}")
async def log_listing_details(self, listing: dict):
"""Log details of the listing being processed."""
logger.info(f"[BaseHandler] Processing listing: {listing}")
async def login(self, page):
"""Login to inberlinwohnen.de"""
if not self.email or not self.password:
logger.warning("No credentials provided, using public listings")
return False
try:
await page.goto("https://www.inberlinwohnen.de/login", wait_until="networkidle")
# Handle cookie/privacy modal first
await self.handle_cookies(page)
# Fill login form
await page.fill('input[name="email"], input[type="email"]', self.email)
await page.fill('input[name="password"], input[type="password"]', self.password)
# Click submit button
await page.click('button[type="submit"], input[type="submit"]')
# Wait for navigation
await page.wait_for_load_state("networkidle")
await asyncio.sleep(2)
# Check if login successful
if "mein-bereich" in page.url or await page.query_selector('text="Abmelden"'):
logger.info("Login successful")
return True
else:
logger.error(f"Login failed - ended up at {page.url}")
return False
except Exception as e:
logger.error(f"Login error: {e}")
return False
async def fetch_listings(self, logged_in: bool) -> list[dict]:
"""Fetch listings from the Wohnungsfinder"""
listings = []
try:
page = await self.context.new_page()
# Use personal Wohnungsfinder when logged in to see filtered listings
url = "https://www.inberlinwohnen.de/mein-bereich/wohnungsfinder" if logged_in else "https://www.inberlinwohnen.de/wohnungsfinder/"
logger.info(f"Fetching listings from {url}")
await page.goto(url, wait_until="networkidle")
# Handle cookie modal if not logged in
if not logged_in:
await self.handle_cookies(page)
# Wait for dynamic content to load - look for listing text pattern
try:
await page.wait_for_selector('text=/\\d,\\d\\s*Zimmer/', timeout=15000)
logger.info("Listings content loaded")
except:
logger.warning("Timeout waiting for listings content")
# Additional wait for initial listings to render
await asyncio.sleep(2)
# Collect all listings content by clicking through pagination
all_content = ""
page_num = 1
max_pages = 10 # Safety limit
while page_num <= max_pages:
current_content = await page.content()
all_content += current_content
next_btn = await page.query_selector('[wire\\:click*="nextPage"]')
if next_btn and await next_btn.is_visible():
await next_btn.click()
await asyncio.sleep(2) # Wait for Livewire to update
page_num += 1
else:
break
logger.info(f"Collected content from {page_num} page(s)")
# Debug: save HTML to file for inspection
debug_path = DATA_DIR / "debug_page.html"
with open(debug_path, "w", encoding="utf-8") as f:
f.write(all_content)
logger.info(f"Saved debug HTML to {debug_path}")
# Decode HTML entities and JSON escaped slashes for extraction
content_decoded = html.unescape(all_content).replace('\\/', '/')
# Build flatId -> deeplink mapping from wire:snapshot JSON data
deeplink_pattern = r'"deeplink":"(https://[^"]+)","flatId":(\d+)'
deeplink_matches = re.findall(deeplink_pattern, content_decoded)
id_to_link = {flat_id: link for link, flat_id in deeplink_matches}
logger.info(f"Found {len(id_to_link)} deeplink mappings")
# Extract listings from button elements with aria-label
button_pattern = r'@click="open !== (\d+)[^\"]*"[^>]*aria-label="Wohnungsangebot - ([^\"]+)'
button_matches = re.findall(button_pattern, content_decoded)
logger.info(f"Found {len(button_matches)} listing buttons")
for flat_id, listing_text in button_matches:
parts_match = re.match(r'(\d,\d)\\s*Zimmer,\\s*([\d,]+)\\s*m²,\\s*([\d.,]+)\\s*€\\s*(?:Kaltmiete\\s*)?\\|\\s*(.+)', listing_text)
if not parts_match:
continue
rooms, size, price, address = parts_match.groups()
rooms = rooms.strip()
address = address.strip()
if len(address) < 5:
continue
detail_link = id_to_link.get(flat_id, url)
listing_id = hashlib.md5(f"{rooms}{size}{price}{address}".encode()).hexdigest()[:12]
listings.append({
"id": listing_id,
"rooms": f"{rooms} Zimmer",
"size": f"{size}",
"price": f"{price}",
"address": address,
"link": detail_link,
"fetched_at": datetime.now().isoformat()
})
# Deduplicate by id
seen_ids = set()
unique_listings = []
for listing in listings:
if listing["id"] not in seen_ids:
seen_ids.add(listing["id"])
unique_listings.append(listing)
listings = unique_listings
await page.close()
logger.info(f"Fetched {len(listings)} unique listings")
return listings
except Exception as e:
logger.error(f"Error fetching listings: {e}")
import traceback
logger.error(traceback.format_exc())
return []
async def save_screenshot(self, page, filename):
"""Save a screenshot of the current page."""
screenshot_path = DATA_DIR / filename
await page.screenshot(path=str(screenshot_path))
logger.info(f"Saved screenshot to {screenshot_path}")
async def save_html(self, page, filename):
"""Save the HTML content of the current page."""
html_path = DATA_DIR / filename
content = await page.content()
with open(html_path, "w", encoding="utf-8") as f:
f.write(content)
logger.info(f"Saved HTML to {html_path}")
async def log_buttons(self, page):
"""Log the text of buttons on the current page."""
buttons = await page.query_selector_all('button, a.btn, a[class*="button"]')
for btn in buttons[:10]:
try:
text = await btn.inner_text()
logger.info(f"Found button: {text[:50]}")
except Exception as e:
logger.debug(f"Error logging button text: {e}")
async def handle_exception(self, e):
"""Log an exception with traceback."""
logger.error(f"Exception: {str(e)}")
logger.error(traceback.format_exc())

View file

@ -5,50 +5,76 @@ import asyncio
logger = logging.getLogger(__name__)
class DegewoHandler(BaseHandler):
def __init__(self, browser_context):
self.context = browser_context
async def apply(self, listing: dict, result: dict) -> dict:
page = await self.context.new_page()
try:
logger.info(f"[DEGEWO] Opening page: {listing['link']}")
await page.goto(listing["link"], wait_until="networkidle")
logger.info("[DEGEWO] Page loaded")
logger.info(f"[DEGEWO] Open: {listing['link']}")
response = await page.goto(listing["link"], wait_until="networkidle")
await asyncio.sleep(2)
# Handle cookies and consent
# Detect 404 by status or page title
status = response.status if response else None
page_title = await page.title()
if status == 404 or (page_title and "404" in page_title):
logger.warning(f"[DEGEWO] Listing is down (404): {listing['link']}")
result["success"] = False
result["message"] = "Listing is no longer available (404). Application impossible. Will not retry."
result["permanent_fail"] = True
return result
# Always handle cookies and consent before anything else
await self.handle_cookies(page)
await self.handle_consent(page)
# Look for application button
logger.info("[DEGEWO] Looking for application button...")
selectors = [
'a[href*="bewerben"]',
'button:has-text("Bewerben")'
]
# Save HTML after modal handling for debugging
try:
html_content = await page.content()
with open("data/degewo_debug.html", "w", encoding="utf-8") as f:
f.write(html_content)
except Exception as e:
logger.debug(f"[DEGEWO] Debug HTML not saved: {e}")
logger.info("[DEGEWO] Searching for application button...")
selectors = [
'a.btn',
'button.btn',
'a:has-text("Bewerben")',
'button:has-text("Bewerben")',
'a:has-text("Anfrage")',
'button:has-text("Anfrage")',
'a:has-text("Kontakt")',
'button:has-text("Kontakt")',
]
apply_btn = None
for sel in selectors:
all_btns = await page.query_selector_all(sel)
logger.info(f"[DEGEWO] Selector '{sel}' found {len(all_btns)} matches")
logger.debug(f"[DEGEWO] Selector '{sel}': {len(all_btns)} matches")
for btn in all_btns:
try:
if await btn.is_visible():
btn_text = (await btn.inner_text()).lower()
if any(x in btn_text for x in ["drucken", "merken", "zurück"]):
continue
apply_btn = btn
logger.info(f"[DEGEWO] Found visible button with selector '{sel}'")
logger.info(f"[DEGEWO] Found visible application button: {sel} [{btn_text}]")
break
except Exception as e:
logger.warning(f"[DEGEWO] Error checking button visibility: {e}")
logger.debug(f"[DEGEWO] Button visibility error: {e}")
if apply_btn:
break
if apply_btn:
logger.info("[DEGEWO] Found application button, scrolling into view...")
await apply_btn.scroll_into_view_if_needed()
await asyncio.sleep(0.5)
logger.info("[DEGEWO] Clicking button...")
await apply_btn.click()
await asyncio.sleep(2)
result["success"] = True
result["message"] = "Application submitted successfully."
else:
logger.warning("[DEGEWO] No application button found.")
result["message"] = "No application button found."
except Exception as e:
result["message"] = f"Error during application: {e}"

View file

@ -5,50 +5,68 @@ import asyncio
logger = logging.getLogger(__name__)
class GesobauHandler(BaseHandler):
def __init__(self, browser_context):
self.context = browser_context
async def apply(self, listing: dict, result: dict) -> dict:
page = await self.context.new_page()
try:
logger.info(f"[GESOBAU] Opening page: {listing['link']}")
logger.info(f"[GESOBAU] Open: {listing['link']}")
await page.goto(listing["link"], wait_until="networkidle")
logger.info("[GESOBAU] Page loaded")
await asyncio.sleep(2)
# Handle cookies and consent
# Always handle cookies and consent before anything else
await self.handle_cookies(page)
await self.handle_consent(page)
# Save HTML after modal handling for debugging
try:
html_content = await page.content()
with open("data/gesobau_debug.html", "w", encoding="utf-8") as f:
f.write(html_content)
except Exception as e:
logger.debug(f"[GESOBAU] Debug HTML not saved: {e}")
# Tailored 404 detection: Angebot nicht mehr verfügbar
if "Angebot nicht mehr verfügbar" in html_content:
logger.warning("[GESOBAU] Permanent fail: Angebot nicht mehr verfügbar")
result["permanent_fail"] = True
result["message"] = "Listing is no longer available (Angebot nicht mehr verfügbar). Marked as permanent fail."
return result
# Look for application button
logger.info("[GESOBAU] Looking for application button...")
logger.info("[GESOBAU] Searching for application button...")
selectors = [
'a[href*="bewerben"]',
'button:has-text("Bewerben")'
'button:has-text("Bewerben")',
'a:has-text("Bewerben")',
'button.btn',
]
apply_btn = None
for sel in selectors:
all_btns = await page.query_selector_all(sel)
logger.info(f"[GESOBAU] Selector '{sel}' found {len(all_btns)} matches")
logger.debug(f"[GESOBAU] Selector '{sel}': {len(all_btns)} matches")
for btn in all_btns:
try:
if await btn.is_visible():
apply_btn = btn
logger.info(f"[GESOBAU] Found visible button with selector '{sel}'")
logger.info(f"[GESOBAU] Found visible application button: {sel}")
break
except Exception as e:
logger.warning(f"[GESOBAU] Error checking button visibility: {e}")
logger.debug(f"[GESOBAU] Button visibility error: {e}")
if apply_btn:
break
if apply_btn:
logger.info("[GESOBAU] Found application button, scrolling into view...")
await apply_btn.scroll_into_view_if_needed()
await asyncio.sleep(0.5)
logger.info("[GESOBAU] Clicking button...")
await apply_btn.click()
await asyncio.sleep(2)
result["success"] = True
result["message"] = "Application submitted successfully."
else:
logger.warning("[GESOBAU] No application button found.")
result["message"] = "No application button found."
except Exception as e:
result["message"] = f"Error during application: {e}"

View file

@ -5,23 +5,49 @@ import asyncio
logger = logging.getLogger(__name__)
class GewobagHandler(BaseHandler):
def __init__(self, browser_context):
self.context = browser_context
async def apply(self, listing: dict, result: dict) -> dict:
page = await self.context.new_page()
try:
logger.info(f"[GEWOBAG] Opening page: {listing['link']}")
await page.goto(listing["link"], wait_until="networkidle")
response = await page.goto(listing["link"], wait_until="networkidle")
logger.info("[GEWOBAG] Page loaded")
await asyncio.sleep(2)
# Handle cookies and consent
# Detect 404 by status or page title
status = response.status if response else None
page_title = await page.title()
if status == 404 or (page_title and "404" in page_title):
logger.warning(f"[GEWOBAG] Listing is down (404): {listing['link']}")
result["success"] = False
result["message"] = "Listing is no longer available (404). Application impossible. Will not retry."
result["permanent_fail"] = True
return result
# Always handle cookies and consent before anything else
await self.handle_cookies(page)
await self.handle_consent(page)
# Look for application button
# Save HTML after modal handling for debugging
try:
html_content = await page.content()
with open("data/gewobag_debug.html", "w", encoding="utf-8") as f:
f.write(html_content)
except Exception as e:
logger.warning(f"[GEWOBAG] Could not save debug HTML: {e}")
# Log listing details
await self.log_listing_details(listing)
# Look for application button ("Anfrage senden") in tab or footer
logger.info("[GEWOBAG] Looking for application button...")
selectors = [
'a[href*="bewerben"]',
'button:has-text("Bewerben")'
'button.rental-contact',
'button:has-text("Anfrage senden")',
'div.contact-button button',
'iframe#contact-iframe',
]
apply_btn = None
@ -39,6 +65,24 @@ class GewobagHandler(BaseHandler):
if apply_btn:
break
# If not found, check for iframe (Wohnungshelden)
if not apply_btn:
iframe = await page.query_selector('iframe#contact-iframe')
if iframe:
logger.info("[GEWOBAG] Found Wohnungshelden iframe, switching context...")
frame = await iframe.content_frame()
if frame:
# Try to find a submit/apply button in the iframe
iframe_btns = await frame.query_selector_all('button, input[type="submit"]')
for btn in iframe_btns:
try:
if await btn.is_visible():
apply_btn = btn
logger.info("[GEWOBAG] Found visible button in iframe")
break
except Exception as e:
logger.warning(f"[GEWOBAG] Error checking iframe button visibility: {e}")
if apply_btn:
logger.info("[GEWOBAG] Found application button, scrolling into view...")
await apply_btn.scroll_into_view_if_needed()

View file

@ -5,20 +5,41 @@ import asyncio
logger = logging.getLogger(__name__)
class HowogeHandler(BaseHandler):
def __init__(self, browser_context):
self.context = browser_context
async def apply(self, listing: dict, result: dict) -> dict:
page = await self.context.new_page()
try:
logger.info(f"[HOWOGE] Opening page: {listing['link']}")
await page.goto(listing["link"], wait_until="networkidle")
logger.info("[HOWOGE] Page loaded")
logger.info(f"[HOWOGE] Open: {listing['link']}")
response = await page.goto(listing["link"], wait_until="networkidle")
await asyncio.sleep(2)
# Handle cookies and consent
# Detect 404 by status or page title
status = response.status if response else None
page_title = await page.title()
if status == 404 or (page_title and "404" in page_title):
logger.warning(f"[HOWOGE] Listing is down (404): {listing['link']}")
result["success"] = False
result["message"] = "Listing is no longer available (404). Application impossible. Will not retry."
result["permanent_fail"] = True
return result
# Always handle cookies and consent before anything else
await self.handle_cookies(page)
await self.handle_consent(page)
# Look for "Besichtigung vereinbaren" button
logger.info("[HOWOGE] Looking for 'Besichtigung vereinbaren' button...")
# Save HTML after modal handling for debugging
try:
html_content = await page.content()
with open("data/howoge_debug.html", "w", encoding="utf-8") as f:
f.write(html_content)
except Exception as e:
logger.debug(f"[HOWOGE] Debug HTML not saved: {e}")
await self.log_listing_details(listing)
logger.info("[HOWOGE] Searching for application button...")
selectors = [
'a[href*="besichtigung-vereinbaren"]',
'a:has-text("Besichtigung vereinbaren")',
@ -26,32 +47,30 @@ class HowogeHandler(BaseHandler):
'a:has-text("Anfragen")',
'button:has-text("Anfragen")'
]
apply_btn = None
for sel in selectors:
all_btns = await page.query_selector_all(sel)
logger.info(f"[HOWOGE] Selector '{sel}' found {len(all_btns)} matches")
logger.debug(f"[HOWOGE] Selector '{sel}': {len(all_btns)} matches")
for btn in all_btns:
try:
if await btn.is_visible():
apply_btn = btn
logger.info(f"[HOWOGE] Found visible button with selector '{sel}'")
logger.info(f"[HOWOGE] Found visible application button: {sel}")
break
except Exception as e:
logger.warning(f"[HOWOGE] Error checking button visibility: {e}")
logger.debug(f"[HOWOGE] Button visibility error: {e}")
if apply_btn:
break
if apply_btn:
logger.info("[HOWOGE] Found application button, scrolling into view...")
await apply_btn.scroll_into_view_if_needed()
await asyncio.sleep(0.5)
logger.info("[HOWOGE] Clicking button...")
await apply_btn.click()
await asyncio.sleep(2)
result["success"] = True
result["message"] = "Application submitted successfully."
else:
logger.warning("[HOWOGE] No application button found.")
result["message"] = "No application button found."
except Exception as e:
result["message"] = f"Error during application: {e}"

View file

@ -5,50 +5,78 @@ import asyncio
logger = logging.getLogger(__name__)
class StadtUndLandHandler(BaseHandler):
def __init__(self, browser_context):
self.context = browser_context
async def apply(self, listing: dict, result: dict) -> dict:
page = await self.context.new_page()
try:
logger.info(f"[STADT UND LAND] Opening page: {listing['link']}")
logger.info(f"[STADT UND LAND] Open: {listing['link']}")
await page.goto(listing["link"], wait_until="networkidle")
logger.info("[STADT UND LAND] Page loaded")
await asyncio.sleep(2)
# Handle cookies and consent
# Always handle cookies and consent before anything else
await self.handle_cookies(page)
await self.handle_consent(page)
# Look for application button
logger.info("[STADT UND LAND] Looking for application button...")
# Save HTML after modal handling for debugging
try:
html_content = await page.content()
with open("data/stadtundland_debug.html", "w", encoding="utf-8") as f:
f.write(html_content)
except Exception as e:
logger.debug(f"[STADT UND LAND] Debug HTML not saved: {e}")
# 404/permanent fail detection
error_texts = [
"Hier ist etwas schief gelaufen",
"Leider können wir Ihnen zur Zeit keine Details zu diesem Inserat anzeigen"
]
page_text = await page.text_content('body')
if page_text:
for err in error_texts:
if err in page_text:
logger.warning(f"[STADT UND LAND] Permanent fail: {err}")
result["permanent_fail"] = True
result["message"] = "Listing is no longer available (404 detected on STADT UND LAND)."
await page.close()
return result
# Look for application button (robust selectors)
logger.info("[STADT UND LAND] Searching for application button...")
selectors = [
'a[href*="bewerben"]',
'button:has-text("Bewerben")'
'button:has-text("Bewerben")',
'a:has-text("Bewerben")',
'button.btn',
'a.Button_button__JnZ4E',
'button.Button_button__JnZ4E',
]
apply_btn = None
for sel in selectors:
all_btns = await page.query_selector_all(sel)
logger.info(f"[STADT UND LAND] Selector '{sel}' found {len(all_btns)} matches")
logger.debug(f"[STADT UND LAND] Selector '{sel}': {len(all_btns)} matches")
for btn in all_btns:
try:
if await btn.is_visible():
apply_btn = btn
logger.info(f"[STADT UND LAND] Found visible button with selector '{sel}'")
logger.info(f"[STADT UND LAND] Found visible application button: {sel}")
break
except Exception as e:
logger.warning(f"[STADT UND LAND] Error checking button visibility: {e}")
logger.debug(f"[STADT UND LAND] Button visibility error: {e}")
if apply_btn:
break
if apply_btn:
logger.info("[STADT UND LAND] Found application button, scrolling into view...")
await apply_btn.scroll_into_view_if_needed()
await asyncio.sleep(0.5)
logger.info("[STADT UND LAND] Clicking button...")
await apply_btn.click()
await asyncio.sleep(2)
result["success"] = True
result["message"] = "Application submitted successfully."
else:
logger.warning("[STADT UND LAND] No application button found.")
result["message"] = "No application button found."
except Exception as e:
result["message"] = f"Error during application: {e}"

View file

@ -5,34 +5,107 @@ import asyncio
logger = logging.getLogger(__name__)
class WBMHandler(BaseHandler):
def __init__(self, browser_context):
self.context = browser_context
async def apply(self, listing: dict, result: dict) -> dict:
page = await self.context.new_page()
try:
logger.info(f"[WBM] Opening page: {listing['link']}")
logger.info(f"[WBM] Opening listing overview page: {listing['link']}")
await page.goto(listing["link"], wait_until="networkidle")
logger.info("[WBM] Page loaded")
logger.info("[WBM] Overview page loaded")
await asyncio.sleep(2)
# Handle cookies and consent
# Always handle cookies and consent before anything else
await self.handle_cookies(page)
await self.handle_consent(page)
# Look for application button
logger.info("[WBM] Looking for application button...")
selectors = [
'a[href*="bewerben"]',
'button:has-text("Bewerben")'
]
# Save HTML after modal handling for debugging
try:
html_content = await page.content()
with open("data/wbm_debug.html", "w", encoding="utf-8") as f:
f.write(html_content)
except Exception as e:
logger.warning(f"[WBM] Could not save debug HTML: {e}")
# 404/permanent fail detection
error_texts = [
"Keine passenden Angebote gefunden",
"Das Angebot existiert nicht mehr",
"Die gewünschte Seite konnte nicht gefunden werden",
"404",
"Es wurden keine Immobilien gefunden"
]
page_text = await page.text_content('body')
if page_text:
for err in error_texts:
if err in page_text:
result["permanent_fail"] = True
result["message"] = "Listing is no longer available (404 detected on WBM)."
logger.warning(f"[WBM] Permanent fail: {err}")
await page.close()
return result
# Find and follow the 'Details' link to the detail page
logger.info("[WBM] Looking for 'Details' link to open detail page...")
detail_link = None
detail_selectors = [
'a.btn.sign[title="Details"]',
'a.immo-button-cta[title="Details"]',
'a[title="Details"]',
]
for sel in detail_selectors:
links = await page.query_selector_all(sel)
logger.info(f"[WBM] Selector '{sel}' found {len(links)} matches for details link")
for link in links:
try:
if await link.is_visible():
detail_link = link
break
except Exception as e:
logger.warning(f"[WBM] Error checking details link visibility: {e}")
if detail_link:
break
if not detail_link:
result["message"] = "No details link found on overview page."
await page.close()
return result
# Click the details link and wait for navigation
logger.info("[WBM] Clicking details link to open detail page...")
await detail_link.click()
await page.wait_for_load_state("networkidle")
await asyncio.sleep(2)
# Save HTML of detail page for debugging
try:
html_content = await page.content()
with open("data/wbm_detail_debug.html", "w", encoding="utf-8") as f:
f.write(html_content)
except Exception as e:
logger.warning(f"[WBM] Could not save detail debug HTML: {e}")
# Look for application button on detail page
logger.info("[WBM] Looking for application button on detail page...")
selectors = [
'a[href*="expose-anfordern"]',
'a[href*="bewerben"]',
'a:has-text("Anfragen")',
'button:has-text("Interesse")',
'a:has-text("Bewerben")',
'button:has-text("Bewerben")',
'button.btn',
]
apply_btn = None
for sel in selectors:
all_btns = await page.query_selector_all(sel)
logger.info(f"[WBM] Selector '{sel}' found {len(all_btns)} matches")
logger.info(f"[WBM] Selector '{sel}' found {len(all_btns)} matches on detail page")
for btn in all_btns:
try:
if await btn.is_visible():
apply_btn = btn
logger.info(f"[WBM] Found visible button with selector '{sel}'")
logger.info(f"[WBM] Found visible application button with selector '{sel}' on detail page")
break
except Exception as e:
logger.warning(f"[WBM] Error checking button visibility: {e}")
@ -43,13 +116,13 @@ class WBMHandler(BaseHandler):
logger.info("[WBM] Found application button, scrolling into view...")
await apply_btn.scroll_into_view_if_needed()
await asyncio.sleep(0.5)
logger.info("[WBM] Clicking button...")
logger.info("[WBM] Clicking application button...")
await apply_btn.click()
await asyncio.sleep(2)
result["success"] = True
result["message"] = "Application submitted successfully."
result["message"] = "Application button clicked on detail page. (Submission not implemented)"
else:
result["message"] = "No application button found."
result["message"] = "No application button found on detail page."
except Exception as e:
result["message"] = f"Error during application: {e}"
logger.error(f"[WBM] Application error: {e}")

View file

@ -0,0 +1,216 @@
import asyncio
import logging
import hashlib
import re
from datetime import datetime
from pathlib import Path
import json
import os
from playwright.async_api import async_playwright
logger = logging.getLogger(__name__)
WGCOMPANY_LISTINGS_FILE = Path("data/wgcompany_listings.json")
WGCOMPANY_TIMING_FILE = Path("data/wgcompany_times.csv")
# Environment variables for search filters
WGCOMPANY_MIN_SIZE = os.environ.get("WGCOMPANY_MIN_SIZE", "")
WGCOMPANY_MAX_PRICE = os.environ.get("WGCOMPANY_MAX_PRICE", "")
WGCOMPANY_AGE = os.environ.get("WGCOMPANY_AGE", "")
WGCOMPANY_SMOKER = os.environ.get("WGCOMPANY_SMOKER", "")
WGCOMPANY_BEZIRK = os.environ.get("WGCOMPANY_BEZIRK", "0")
class WGCompanyNotifier:
def __init__(self, telegram_bot=None, refresh_minutes=10):
self.browser = None
self.context = None
self.telegram_bot = telegram_bot
self.refresh_minutes = refresh_minutes
async def init_browser(self):
if self.browser is None:
self.playwright = await async_playwright().start()
self.browser = await self.playwright.chromium.launch(headless=True)
self.context = await self.browser.new_context(
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
)
logger.info("[WGCOMPANY] Browser initialized")
async def fetch_listings(self):
listings = []
try:
page = await self.context.new_page()
search_url = "http://www.wgcompany.de/cgi-bin/seite?st=1&mi=10&li=100"
logger.info(f"[WGCOMPANY] Loading search page: {search_url}")
await page.goto(search_url, wait_until="networkidle")
await asyncio.sleep(2)
if WGCOMPANY_MIN_SIZE:
min_size_field = await page.query_selector('input[name="c"]')
if min_size_field:
await min_size_field.fill(WGCOMPANY_MIN_SIZE)
if WGCOMPANY_MAX_PRICE:
max_price_field = await page.query_selector('input[name="a"]')
if max_price_field:
await max_price_field.fill(WGCOMPANY_MAX_PRICE)
if WGCOMPANY_AGE:
age_field = await page.query_selector('input[name="l"]')
if age_field:
await age_field.fill(WGCOMPANY_AGE)
if WGCOMPANY_SMOKER:
smoker_select = await page.query_selector('select[name="o"]')
if smoker_select:
await smoker_select.select_option(WGCOMPANY_SMOKER)
if WGCOMPANY_BEZIRK and WGCOMPANY_BEZIRK != "0":
bezirk_select = await page.query_selector('select[name="e"]')
if bezirk_select:
await bezirk_select.select_option(WGCOMPANY_BEZIRK)
submit_btn = await page.query_selector('input[type="submit"][value*="finde"], input[type="submit"]')
if submit_btn:
await submit_btn.click()
await page.wait_for_load_state("networkidle")
await asyncio.sleep(2)
content = await page.content()
with open("data/wgcompany_debug.html", "w", encoding="utf-8") as f:
f.write(content)
listing_links = await page.query_selector_all('a[href*="wg.pl"][href*="wgzeigen"]')
logger.info(f"[WGCOMPANY] Found {len(listing_links)} listing links")
for link_elem in listing_links:
try:
href = await link_elem.get_attribute("href")
if not href:
continue
parent = await link_elem.evaluate_handle("el => el.closest('tr') || el.parentElement")
row_text = await parent.evaluate("el => el.innerText") if parent else ""
price_match = re.search(r'(\d+)\s*€', row_text)
price = price_match.group(1) + "" if price_match else "?"
size_match = re.search(r'(\d+)\s*m²', row_text)
size = size_match.group(1) + "" if size_match else "?"
bezirk_patterns = [
"Kreuzberg", "Neukölln", "Friedrichshain", "Prenzlauer Berg",
"Mitte", "Wedding", "Charlottenburg", "Schöneberg", "Tempelhof",
"Steglitz", "Wilmersdorf", "Pankow", "Lichtenberg", "Treptow",
"Köpenick", "Reinickendorf", "Spandau", "Zehlendorf", "Moabit"
]
location = "Berlin"
for bez in bezirk_patterns:
if bez.lower() in row_text.lower():
location = bez
break
if not href.startswith("http"):
href = f"http://www.wgcompany.de{href}" if href.startswith("/") else f"http://www.wgcompany.de/cgi-bin/{href}"
listing_id = hashlib.md5(f"{href}{price}{size}".encode()).hexdigest()[:12]
listings.append({
"id": listing_id,
"rooms": "1 Zimmer (WG)",
"size": size,
"price": price,
"address": location,
"link": href,
"source": "wgcompany",
"fetched_at": datetime.now().isoformat()
})
except Exception as e:
logger.debug(f"[WGCOMPANY] Error parsing listing: {e}")
continue
# Deduplicate
seen_ids = set()
unique_listings = []
for listing in listings:
if listing["id"] not in seen_ids:
seen_ids.add(listing["id"])
unique_listings.append(listing)
await page.close()
logger.info(f"[WGCOMPANY] Fetched {len(unique_listings)} unique listings")
return unique_listings
except Exception as e:
logger.error(f"[WGCOMPANY] Error fetching listings: {e}")
return []
def load_previous_listings(self):
if WGCOMPANY_LISTINGS_FILE.exists():
with open(WGCOMPANY_LISTINGS_FILE, "r") as f:
data = json.load(f)
logger.info(f"[WGCOMPANY] Loaded {len(data)} previous listings from file. IDs: {list(data.keys())[:10]}{'...' if len(data) > 10 else ''}")
return data
logger.info("[WGCOMPANY] No previous listings file found.")
return {}
def save_listings(self, listings):
listings_dict = {l["id"]: l for l in listings}
logger.info(f"[WGCOMPANY] Saving {len(listings_dict)} listings to file. IDs: {list(listings_dict.keys())[:10]}{'...' if len(listings_dict) > 10 else ''}")
with open(WGCOMPANY_LISTINGS_FILE, "w") as f:
json.dump(listings_dict, f, indent=2, ensure_ascii=False)
def find_new_listings(self, current, previous):
current_ids = [l["id"] for l in current]
previous_ids = list(previous.keys())
logger.info(f"[WGCOMPANY] Current listing IDs: {current_ids[:10]}{'...' if len(current_ids) > 10 else ''}")
logger.info(f"[WGCOMPANY] Previous listing IDs: {previous_ids[:10]}{'...' if len(previous_ids) > 10 else ''}")
new_listings = [l for l in current if l["id"] not in previous]
logger.info(f"[WGCOMPANY] Detected {len(new_listings)} new listings (not in previous)")
return new_listings
def log_listing_times(self, new_listings):
if not new_listings:
return
import csv
file_exists = WGCOMPANY_TIMING_FILE.exists()
with open(WGCOMPANY_TIMING_FILE, "a", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
if not file_exists:
writer.writerow(["timestamp", "weekday", "hour", "minute", "rooms", "size", "price", "address", "listing_id"])
now = datetime.now()
for listing in new_listings:
writer.writerow([
now.isoformat(),
now.strftime("%A"),
now.hour,
now.minute,
listing["rooms"],
listing["size"],
listing["price"],
listing["address"],
listing["id"]
])
logger.info(f"[WGCOMPANY] Logged {len(new_listings)} listing times to CSV")
async def notify_new_listings(self, new_listings):
if not new_listings or not self.telegram_bot:
logger.info("[WGCOMPANY] No new listings to notify or Telegram bot not set.")
return
logger.info(f"[WGCOMPANY] Notifying {len(new_listings)} new listing(s) via Telegram")
for idx, listing in enumerate(new_listings, 1):
try:
logger.info(f"[WGCOMPANY] Sending listing {idx}/{len(new_listings)}: {listing['link']} | {listing['rooms']} | {listing['size']} | {listing['price']} | {listing['address']}")
message = f"<b>[WGCOMPANY]</b> <a href=\"{listing['link']}\">{listing['link']}</a>\n"
message += f"🚪 <b>{listing['rooms']}</b>\n"
message += f"📐 {listing['size']}\n"
message += f"💰 {listing['price']}\n"
message += f"📍 {listing['address']}"
await self.telegram_bot._send_message(message)
await asyncio.sleep(0.5)
except Exception as e:
logger.error(f"[WGCOMPANY] Error sending Telegram message for listing {idx}/{len(new_listings)}: {e}")
import traceback
logger.error(traceback.format_exc())
async def run(self):
await self.init_browser()
while True:
listings = await self.fetch_listings()
previous = self.load_previous_listings()
new_listings = self.find_new_listings(listings, previous)
if new_listings:
logger.info(f"[WGCOMPANY] Found {len(new_listings)} new listing(s)")
self.log_listing_times(new_listings)
await self.notify_new_listings(new_listings)
else:
logger.info("[WGCOMPANY] No new listings")
self.save_listings(listings)
await asyncio.sleep(self.refresh_minutes * 60)