2025-12-27 11:59:04 +01:00
|
|
|
from .base_handler import BaseHandler
|
|
|
|
|
import logging
|
|
|
|
|
import asyncio
|
2026-01-02 11:43:24 +01:00
|
|
|
import os
|
2026-01-01 15:27:25 +01:00
|
|
|
from pathlib import Path
|
2025-12-27 11:59:04 +01:00
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
2026-01-01 15:27:25 +01:00
|
|
|
DATA_DIR = Path("data/wbm")
|
|
|
|
|
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
2025-12-27 11:59:04 +01:00
|
|
|
|
|
|
|
|
class WBMHandler(BaseHandler):
|
2025-12-28 19:59:31 +01:00
|
|
|
def __init__(self, browser_context):
|
|
|
|
|
self.context = browser_context
|
|
|
|
|
|
2025-12-27 11:59:04 +01:00
|
|
|
async def apply(self, listing: dict, result: dict) -> dict:
|
|
|
|
|
page = await self.context.new_page()
|
|
|
|
|
try:
|
2025-12-28 19:59:31 +01:00
|
|
|
logger.info(f"[WBM] Opening listing overview page: {listing['link']}")
|
2025-12-27 11:59:04 +01:00
|
|
|
await page.goto(listing["link"], wait_until="networkidle")
|
2025-12-28 19:59:31 +01:00
|
|
|
logger.info("[WBM] Overview page loaded")
|
2025-12-27 11:59:04 +01:00
|
|
|
await asyncio.sleep(2)
|
|
|
|
|
|
2025-12-28 19:59:31 +01:00
|
|
|
# Always handle cookies and consent before anything else
|
2025-12-27 11:59:04 +01:00
|
|
|
await self.handle_cookies(page)
|
|
|
|
|
await self.handle_consent(page)
|
|
|
|
|
|
2025-12-28 19:59:31 +01:00
|
|
|
# Save HTML after modal handling for debugging
|
|
|
|
|
try:
|
|
|
|
|
html_content = await page.content()
|
2026-01-01 15:27:25 +01:00
|
|
|
with open(DATA_DIR / "wbm_debug.html", "w", encoding="utf-8") as f:
|
2025-12-28 19:59:31 +01:00
|
|
|
f.write(html_content)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.warning(f"[WBM] Could not save debug HTML: {e}")
|
|
|
|
|
|
|
|
|
|
# 404/permanent fail detection
|
|
|
|
|
error_texts = [
|
|
|
|
|
"Keine passenden Angebote gefunden",
|
|
|
|
|
"Das Angebot existiert nicht mehr",
|
|
|
|
|
"Die gewünschte Seite konnte nicht gefunden werden",
|
|
|
|
|
"404",
|
|
|
|
|
"Es wurden keine Immobilien gefunden"
|
|
|
|
|
]
|
|
|
|
|
page_text = await page.text_content('body')
|
|
|
|
|
if page_text:
|
|
|
|
|
for err in error_texts:
|
|
|
|
|
if err in page_text:
|
2026-01-01 22:14:55 +01:00
|
|
|
result["deactivated"] = True
|
2025-12-28 19:59:31 +01:00
|
|
|
result["message"] = "Listing is no longer available (404 detected on WBM)."
|
|
|
|
|
logger.warning(f"[WBM] Permanent fail: {err}")
|
|
|
|
|
await page.close()
|
|
|
|
|
return result
|
|
|
|
|
|
2026-01-02 12:25:27 +01:00
|
|
|
# Check if we landed on a generic overview/search page with multiple listings
|
|
|
|
|
# This happens when the listing link is wrong or the listing was removed
|
2026-01-02 11:32:49 +01:00
|
|
|
current_url = page.url
|
2026-01-02 12:25:27 +01:00
|
|
|
# The overview page is at /angebote/ without /details/, and shows text like "X Mietwohnungen in Berlin"
|
|
|
|
|
if '/angebote/' in current_url and '/details/' not in current_url:
|
|
|
|
|
# Check for the heading pattern "X Mietwohnungen in Berlin" which appears on overview pages
|
|
|
|
|
overview_heading = await page.query_selector('h2:has-text("Mietwohnungen in Berlin"), h3:has-text("Mietwohnungen in Berlin")')
|
|
|
|
|
if overview_heading:
|
|
|
|
|
result["deactivated"] = True
|
|
|
|
|
result["message"] = "Redirected to generic overview page - listing no longer exists"
|
|
|
|
|
logger.warning(f"[WBM] Landed on overview page (/angebote/) instead of specific listing detail")
|
|
|
|
|
await page.screenshot(path=DATA_DIR / f"wbm_overview_redirect_{listing['id']}.png")
|
|
|
|
|
await page.close()
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
# Check if we're already on the detail page (URL contains '/details/')
|
2026-01-02 11:32:49 +01:00
|
|
|
if '/details/' not in current_url:
|
|
|
|
|
# Find and follow the 'Details' link to the detail page
|
|
|
|
|
logger.info("[WBM] Looking for 'Details' link to open detail page...")
|
|
|
|
|
detail_link = None
|
|
|
|
|
detail_selectors = [
|
|
|
|
|
'a.btn.sign[title="Details"]',
|
|
|
|
|
'a.immo-button-cta[title="Details"]',
|
|
|
|
|
'a[title="Details"]',
|
|
|
|
|
]
|
|
|
|
|
for sel in detail_selectors:
|
|
|
|
|
links = await page.query_selector_all(sel)
|
|
|
|
|
logger.info(f"[WBM] Selector '{sel}' found {len(links)} matches for details link")
|
|
|
|
|
for link in links:
|
|
|
|
|
try:
|
|
|
|
|
if await link.is_visible():
|
|
|
|
|
detail_link = link
|
|
|
|
|
break
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.warning(f"[WBM] Error checking details link visibility: {e}")
|
|
|
|
|
if detail_link:
|
|
|
|
|
break
|
2025-12-28 19:59:31 +01:00
|
|
|
|
2026-01-02 11:32:49 +01:00
|
|
|
if not detail_link:
|
|
|
|
|
result["message"] = "No details link found on overview page."
|
|
|
|
|
await page.close()
|
|
|
|
|
return result
|
2025-12-28 19:59:31 +01:00
|
|
|
|
2026-01-02 11:32:49 +01:00
|
|
|
# Click the details link and wait for navigation
|
|
|
|
|
logger.info("[WBM] Clicking details link to open detail page...")
|
|
|
|
|
await detail_link.click()
|
|
|
|
|
await page.wait_for_load_state("networkidle")
|
|
|
|
|
await asyncio.sleep(2)
|
|
|
|
|
else:
|
|
|
|
|
logger.info("[WBM] Already on detail page, skipping details link search")
|
2025-12-28 19:59:31 +01:00
|
|
|
|
|
|
|
|
# Save HTML of detail page for debugging
|
|
|
|
|
try:
|
|
|
|
|
html_content = await page.content()
|
2026-01-02 12:25:27 +01:00
|
|
|
with open(DATA_DIR / "wbm_detail_debug.html", "w", encoding="utf-8") as f:
|
2025-12-28 19:59:31 +01:00
|
|
|
f.write(html_content)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.warning(f"[WBM] Could not save detail debug HTML: {e}")
|
|
|
|
|
|
|
|
|
|
# Look for application button on detail page
|
|
|
|
|
logger.info("[WBM] Looking for application button on detail page...")
|
2025-12-27 11:59:04 +01:00
|
|
|
selectors = [
|
2026-01-02 11:27:10 +01:00
|
|
|
'button.btn.btn-primary[type="submit"]', # The actual form submit button
|
2026-01-02 11:23:35 +01:00
|
|
|
'button:has-text("Anfrage absenden")',
|
2025-12-28 19:59:31 +01:00
|
|
|
'a[href*="expose-anfordern"]',
|
2025-12-27 11:59:04 +01:00
|
|
|
'a[href*="bewerben"]',
|
2025-12-28 19:59:31 +01:00
|
|
|
'a:has-text("Anfragen")',
|
|
|
|
|
'button:has-text("Interesse")',
|
|
|
|
|
'a:has-text("Bewerben")',
|
|
|
|
|
'button:has-text("Bewerben")',
|
|
|
|
|
'button.btn',
|
2025-12-27 11:59:04 +01:00
|
|
|
]
|
|
|
|
|
apply_btn = None
|
|
|
|
|
for sel in selectors:
|
|
|
|
|
all_btns = await page.query_selector_all(sel)
|
2025-12-28 19:59:31 +01:00
|
|
|
logger.info(f"[WBM] Selector '{sel}' found {len(all_btns)} matches on detail page")
|
2025-12-27 11:59:04 +01:00
|
|
|
for btn in all_btns:
|
|
|
|
|
try:
|
|
|
|
|
if await btn.is_visible():
|
|
|
|
|
apply_btn = btn
|
2025-12-28 19:59:31 +01:00
|
|
|
logger.info(f"[WBM] Found visible application button with selector '{sel}' on detail page")
|
2025-12-27 11:59:04 +01:00
|
|
|
break
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.warning(f"[WBM] Error checking button visibility: {e}")
|
|
|
|
|
if apply_btn:
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
if apply_btn:
|
2026-01-02 11:43:24 +01:00
|
|
|
logger.info("[WBM] Found submit button, scrolling form into view...")
|
2025-12-27 11:59:04 +01:00
|
|
|
await apply_btn.scroll_into_view_if_needed()
|
2026-01-02 11:43:24 +01:00
|
|
|
await asyncio.sleep(1)
|
|
|
|
|
|
|
|
|
|
# Handle cookies/consent again after scrolling (modals may reappear)
|
|
|
|
|
await self.handle_cookies(page)
|
|
|
|
|
await self.handle_consent(page)
|
|
|
|
|
await asyncio.sleep(1)
|
|
|
|
|
|
|
|
|
|
# Fill out the form fields before clicking submit
|
|
|
|
|
logger.info("[WBM] Filling out application form...")
|
|
|
|
|
form_filled = False
|
|
|
|
|
try:
|
|
|
|
|
# Anrede dropdown
|
|
|
|
|
anrede_select = await page.query_selector('select[name*="anrede" i]')
|
|
|
|
|
if anrede_select:
|
|
|
|
|
await anrede_select.select_option(os.getenv("FORM_ANREDE", "Frau"))
|
|
|
|
|
logger.debug("[WBM] Filled Anrede")
|
|
|
|
|
form_filled = True
|
|
|
|
|
|
|
|
|
|
# Name (Nachname)
|
|
|
|
|
name_input = await page.query_selector('input[name*="name" i]:not([name*="vorname" i])')
|
|
|
|
|
if name_input:
|
|
|
|
|
await name_input.fill(os.getenv("FORM_NACHNAME", ""))
|
|
|
|
|
logger.debug("[WBM] Filled Name")
|
|
|
|
|
form_filled = True
|
|
|
|
|
|
|
|
|
|
# Vorname
|
|
|
|
|
vorname_input = await page.query_selector('input[name*="vorname" i]')
|
|
|
|
|
if vorname_input:
|
|
|
|
|
await vorname_input.fill(os.getenv("FORM_VORNAME", ""))
|
|
|
|
|
logger.debug("[WBM] Filled Vorname")
|
|
|
|
|
form_filled = True
|
|
|
|
|
|
2026-01-02 12:25:27 +01:00
|
|
|
# Email (use ID or specific field name pattern)
|
|
|
|
|
email_input = await page.query_selector('input#powermail_field_e_mail, input[name*="[e_mail]"], input[name*="[email]"]')
|
2026-01-02 11:43:24 +01:00
|
|
|
if email_input:
|
|
|
|
|
await email_input.fill(os.getenv("FORM_EMAIL", ""))
|
|
|
|
|
logger.debug("[WBM] Filled Email")
|
|
|
|
|
form_filled = True
|
|
|
|
|
|
|
|
|
|
# Telefon
|
|
|
|
|
phone_input = await page.query_selector('input[name*="telefon" i]')
|
|
|
|
|
if phone_input:
|
|
|
|
|
await phone_input.fill(os.getenv("FORM_PHONE", ""))
|
|
|
|
|
logger.debug("[WBM] Filled Phone")
|
|
|
|
|
|
|
|
|
|
# Strasse
|
|
|
|
|
strasse_input = await page.query_selector('input[name*="strasse" i]')
|
|
|
|
|
if strasse_input:
|
|
|
|
|
await strasse_input.fill(os.getenv("FORM_STRASSE", ""))
|
|
|
|
|
logger.debug("[WBM] Filled Strasse")
|
|
|
|
|
|
|
|
|
|
# Hausnummer
|
|
|
|
|
hausnummer_input = await page.query_selector('input[name*="hausnummer" i]')
|
|
|
|
|
if hausnummer_input:
|
|
|
|
|
await hausnummer_input.fill(os.getenv("FORM_HAUSNUMMER", ""))
|
|
|
|
|
logger.debug("[WBM] Filled Hausnummer")
|
|
|
|
|
|
|
|
|
|
# PLZ
|
|
|
|
|
plz_input = await page.query_selector('input[name*="plz" i]')
|
|
|
|
|
if plz_input:
|
|
|
|
|
await plz_input.fill(os.getenv("FORM_PLZ", ""))
|
|
|
|
|
logger.debug("[WBM] Filled PLZ")
|
|
|
|
|
|
|
|
|
|
# Ort
|
|
|
|
|
ort_input = await page.query_selector('input[name*="ort" i]')
|
|
|
|
|
if ort_input:
|
|
|
|
|
await ort_input.fill(os.getenv("FORM_ORT", ""))
|
|
|
|
|
logger.debug("[WBM] Filled Ort")
|
|
|
|
|
|
2026-01-02 12:25:27 +01:00
|
|
|
# Datenschutz checkbox - use force click or click the label to avoid interception
|
2026-01-02 11:43:24 +01:00
|
|
|
datenschutz_checkbox = await page.query_selector('input[name*="datenschutz" i][type="checkbox"]')
|
|
|
|
|
if datenschutz_checkbox:
|
|
|
|
|
is_checked = await datenschutz_checkbox.is_checked()
|
|
|
|
|
if not is_checked:
|
2026-01-02 12:25:27 +01:00
|
|
|
# Try clicking the label first, fall back to force click on input
|
|
|
|
|
datenschutz_label = await page.query_selector('label[for]:has(input[name*="datenschutz" i])')
|
|
|
|
|
if datenschutz_label:
|
|
|
|
|
await datenschutz_label.click()
|
|
|
|
|
logger.debug("[WBM] Clicked Datenschutz label")
|
|
|
|
|
else:
|
|
|
|
|
await datenschutz_checkbox.click(force=True)
|
|
|
|
|
logger.debug("[WBM] Force-clicked Datenschutz checkbox")
|
2026-01-02 11:43:24 +01:00
|
|
|
|
|
|
|
|
if not form_filled:
|
|
|
|
|
logger.error("[WBM] No form fields found - form may not be visible")
|
|
|
|
|
result["message"] = "Form not found or not visible"
|
|
|
|
|
await page.screenshot(path=DATA_DIR / f"wbm_no_form_{listing['id']}.png")
|
|
|
|
|
await page.close()
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
logger.info("[WBM] Form filled successfully")
|
|
|
|
|
await asyncio.sleep(1)
|
|
|
|
|
|
|
|
|
|
# Save screenshot before submit
|
|
|
|
|
await page.screenshot(path=DATA_DIR / f"wbm_before_submit_{listing['id']}.png")
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"[WBM] Error filling form: {e}")
|
|
|
|
|
result["message"] = f"Error filling form: {e}"
|
|
|
|
|
await page.screenshot(path=DATA_DIR / f"wbm_form_error_{listing['id']}.png")
|
|
|
|
|
await page.close()
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
logger.info("[WBM] Clicking submit button...")
|
2025-12-27 11:59:04 +01:00
|
|
|
await apply_btn.click()
|
2026-01-02 11:43:24 +01:00
|
|
|
await asyncio.sleep(3)
|
2025-12-29 22:46:10 +01:00
|
|
|
# --- Post-click confirmation logic ---
|
2026-01-02 11:43:24 +01:00
|
|
|
logger.info("[WBM] Clicked submit button, checking for confirmation...")
|
2025-12-29 22:46:10 +01:00
|
|
|
# Save screenshot and HTML after click
|
|
|
|
|
try:
|
2026-01-02 11:43:24 +01:00
|
|
|
await page.screenshot(path=DATA_DIR / "wbm_after_apply.png")
|
2025-12-29 22:46:10 +01:00
|
|
|
logger.info("[WBM] Saved screenshot after application click.")
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.warning(f"[WBM] Could not save screenshot: {e}")
|
|
|
|
|
try:
|
|
|
|
|
html_after = await page.content()
|
2026-01-02 11:43:24 +01:00
|
|
|
with open(DATA_DIR / "wbm_after_apply.html", "w", encoding="utf-8") as f:
|
2025-12-29 22:46:10 +01:00
|
|
|
f.write(html_after)
|
|
|
|
|
logger.info("[WBM] Saved HTML after application click.")
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.warning(f"[WBM] Could not save HTML after apply: {e}")
|
|
|
|
|
|
|
|
|
|
# Look for confirmation message on the page
|
|
|
|
|
confirmation_selectors = [
|
|
|
|
|
'text="Vielen Dank"',
|
|
|
|
|
'text="Ihre Anfrage wurde gesendet"',
|
|
|
|
|
'text="Bestätigung"',
|
|
|
|
|
'div:has-text("Vielen Dank")',
|
|
|
|
|
'div:has-text("Ihre Anfrage wurde gesendet")',
|
|
|
|
|
]
|
|
|
|
|
confirmed = False
|
|
|
|
|
for sel in confirmation_selectors:
|
|
|
|
|
try:
|
|
|
|
|
el = await page.query_selector(sel)
|
|
|
|
|
if el and await el.is_visible():
|
|
|
|
|
logger.info(f"[WBM] Found confirmation element: {sel}")
|
|
|
|
|
confirmed = True
|
|
|
|
|
break
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.debug(f"[WBM] Error checking confirmation selector {sel}: {e}")
|
|
|
|
|
if confirmed:
|
|
|
|
|
result["success"] = True
|
|
|
|
|
result["message"] = "Application submitted and confirmation detected."
|
|
|
|
|
else:
|
|
|
|
|
logger.warning("[WBM] No confirmation message detected after application click.")
|
|
|
|
|
result["success"] = False
|
|
|
|
|
result["message"] = "Clicked application button, but no confirmation detected. Check screenshot and HTML."
|
2025-12-27 11:59:04 +01:00
|
|
|
else:
|
2025-12-28 19:59:31 +01:00
|
|
|
result["message"] = "No application button found on detail page."
|
2025-12-27 11:59:04 +01:00
|
|
|
except Exception as e:
|
|
|
|
|
result["message"] = f"Error during application: {e}"
|
|
|
|
|
logger.error(f"[WBM] Application error: {e}")
|
|
|
|
|
finally:
|
|
|
|
|
await page.close()
|
|
|
|
|
|
|
|
|
|
return result
|