wohnbot/handlers/wbm_handler.py

298 lines
15 KiB
Python
Raw Permalink Normal View History

2025-12-27 11:59:04 +01:00
from .base_handler import BaseHandler
import logging
import asyncio
2026-01-02 11:43:24 +01:00
import os
2026-01-01 15:27:25 +01:00
from pathlib import Path
2025-12-27 11:59:04 +01:00
logger = logging.getLogger(__name__)
2026-01-01 15:27:25 +01:00
DATA_DIR = Path("data/wbm")
DATA_DIR.mkdir(parents=True, exist_ok=True)
2025-12-27 11:59:04 +01:00
class WBMHandler(BaseHandler):
def __init__(self, browser_context):
self.context = browser_context
2025-12-27 11:59:04 +01:00
async def apply(self, listing: dict, result: dict) -> dict:
page = await self.context.new_page()
try:
logger.info(f"[WBM] Opening listing overview page: {listing['link']}")
2025-12-27 11:59:04 +01:00
await page.goto(listing["link"], wait_until="networkidle")
logger.info("[WBM] Overview page loaded")
2025-12-27 11:59:04 +01:00
await asyncio.sleep(2)
# Always handle cookies and consent before anything else
2025-12-27 11:59:04 +01:00
await self.handle_cookies(page)
await self.handle_consent(page)
# Save HTML after modal handling for debugging
try:
html_content = await page.content()
2026-01-01 15:27:25 +01:00
with open(DATA_DIR / "wbm_debug.html", "w", encoding="utf-8") as f:
f.write(html_content)
except Exception as e:
logger.warning(f"[WBM] Could not save debug HTML: {e}")
# 404/permanent fail detection
error_texts = [
"Keine passenden Angebote gefunden",
"Das Angebot existiert nicht mehr",
"Die gewünschte Seite konnte nicht gefunden werden",
"404",
"Es wurden keine Immobilien gefunden"
]
page_text = await page.text_content('body')
if page_text:
for err in error_texts:
if err in page_text:
2026-01-01 22:14:55 +01:00
result["deactivated"] = True
result["message"] = "Listing is no longer available (404 detected on WBM)."
logger.warning(f"[WBM] Permanent fail: {err}")
await page.close()
return result
2026-01-02 12:25:27 +01:00
# Check if we landed on a generic overview/search page with multiple listings
# This happens when the listing link is wrong or the listing was removed
2026-01-02 11:32:49 +01:00
current_url = page.url
2026-01-02 12:25:27 +01:00
# The overview page is at /angebote/ without /details/, and shows text like "X Mietwohnungen in Berlin"
if '/angebote/' in current_url and '/details/' not in current_url:
# Check for the heading pattern "X Mietwohnungen in Berlin" which appears on overview pages
overview_heading = await page.query_selector('h2:has-text("Mietwohnungen in Berlin"), h3:has-text("Mietwohnungen in Berlin")')
if overview_heading:
result["deactivated"] = True
result["message"] = "Redirected to generic overview page - listing no longer exists"
logger.warning(f"[WBM] Landed on overview page (/angebote/) instead of specific listing detail")
await page.screenshot(path=DATA_DIR / f"wbm_overview_redirect_{listing['id']}.png")
await page.close()
return result
# Check if we're already on the detail page (URL contains '/details/')
2026-01-02 11:32:49 +01:00
if '/details/' not in current_url:
# Find and follow the 'Details' link to the detail page
logger.info("[WBM] Looking for 'Details' link to open detail page...")
detail_link = None
detail_selectors = [
'a.btn.sign[title="Details"]',
'a.immo-button-cta[title="Details"]',
'a[title="Details"]',
]
for sel in detail_selectors:
links = await page.query_selector_all(sel)
logger.info(f"[WBM] Selector '{sel}' found {len(links)} matches for details link")
for link in links:
try:
if await link.is_visible():
detail_link = link
break
except Exception as e:
logger.warning(f"[WBM] Error checking details link visibility: {e}")
if detail_link:
break
2026-01-02 11:32:49 +01:00
if not detail_link:
result["message"] = "No details link found on overview page."
await page.close()
return result
2026-01-02 11:32:49 +01:00
# Click the details link and wait for navigation
logger.info("[WBM] Clicking details link to open detail page...")
await detail_link.click()
await page.wait_for_load_state("networkidle")
await asyncio.sleep(2)
else:
logger.info("[WBM] Already on detail page, skipping details link search")
# Save HTML of detail page for debugging
try:
html_content = await page.content()
2026-01-02 12:25:27 +01:00
with open(DATA_DIR / "wbm_detail_debug.html", "w", encoding="utf-8") as f:
f.write(html_content)
except Exception as e:
logger.warning(f"[WBM] Could not save detail debug HTML: {e}")
# Look for application button on detail page
logger.info("[WBM] Looking for application button on detail page...")
2025-12-27 11:59:04 +01:00
selectors = [
2026-01-02 11:27:10 +01:00
'button.btn.btn-primary[type="submit"]', # The actual form submit button
2026-01-02 11:23:35 +01:00
'button:has-text("Anfrage absenden")',
'a[href*="expose-anfordern"]',
2025-12-27 11:59:04 +01:00
'a[href*="bewerben"]',
'a:has-text("Anfragen")',
'button:has-text("Interesse")',
'a:has-text("Bewerben")',
'button:has-text("Bewerben")',
'button.btn',
2025-12-27 11:59:04 +01:00
]
apply_btn = None
for sel in selectors:
all_btns = await page.query_selector_all(sel)
logger.info(f"[WBM] Selector '{sel}' found {len(all_btns)} matches on detail page")
2025-12-27 11:59:04 +01:00
for btn in all_btns:
try:
if await btn.is_visible():
apply_btn = btn
logger.info(f"[WBM] Found visible application button with selector '{sel}' on detail page")
2025-12-27 11:59:04 +01:00
break
except Exception as e:
logger.warning(f"[WBM] Error checking button visibility: {e}")
if apply_btn:
break
if apply_btn:
2026-01-02 11:43:24 +01:00
logger.info("[WBM] Found submit button, scrolling form into view...")
2025-12-27 11:59:04 +01:00
await apply_btn.scroll_into_view_if_needed()
2026-01-02 11:43:24 +01:00
await asyncio.sleep(1)
# Handle cookies/consent again after scrolling (modals may reappear)
await self.handle_cookies(page)
await self.handle_consent(page)
await asyncio.sleep(1)
# Fill out the form fields before clicking submit
logger.info("[WBM] Filling out application form...")
form_filled = False
try:
# Anrede dropdown
anrede_select = await page.query_selector('select[name*="anrede" i]')
if anrede_select:
await anrede_select.select_option(os.getenv("FORM_ANREDE", "Frau"))
logger.debug("[WBM] Filled Anrede")
form_filled = True
# Name (Nachname)
name_input = await page.query_selector('input[name*="name" i]:not([name*="vorname" i])')
if name_input:
await name_input.fill(os.getenv("FORM_NACHNAME", ""))
logger.debug("[WBM] Filled Name")
form_filled = True
# Vorname
vorname_input = await page.query_selector('input[name*="vorname" i]')
if vorname_input:
await vorname_input.fill(os.getenv("FORM_VORNAME", ""))
logger.debug("[WBM] Filled Vorname")
form_filled = True
2026-01-02 12:25:27 +01:00
# Email (use ID or specific field name pattern)
email_input = await page.query_selector('input#powermail_field_e_mail, input[name*="[e_mail]"], input[name*="[email]"]')
2026-01-02 11:43:24 +01:00
if email_input:
await email_input.fill(os.getenv("FORM_EMAIL", ""))
logger.debug("[WBM] Filled Email")
form_filled = True
# Telefon
phone_input = await page.query_selector('input[name*="telefon" i]')
if phone_input:
await phone_input.fill(os.getenv("FORM_PHONE", ""))
logger.debug("[WBM] Filled Phone")
# Strasse
strasse_input = await page.query_selector('input[name*="strasse" i]')
if strasse_input:
await strasse_input.fill(os.getenv("FORM_STRASSE", ""))
logger.debug("[WBM] Filled Strasse")
# Hausnummer
hausnummer_input = await page.query_selector('input[name*="hausnummer" i]')
if hausnummer_input:
await hausnummer_input.fill(os.getenv("FORM_HAUSNUMMER", ""))
logger.debug("[WBM] Filled Hausnummer")
# PLZ
plz_input = await page.query_selector('input[name*="plz" i]')
if plz_input:
await plz_input.fill(os.getenv("FORM_PLZ", ""))
logger.debug("[WBM] Filled PLZ")
# Ort
ort_input = await page.query_selector('input[name*="ort" i]')
if ort_input:
await ort_input.fill(os.getenv("FORM_ORT", ""))
logger.debug("[WBM] Filled Ort")
2026-01-02 12:25:27 +01:00
# Datenschutz checkbox - use force click or click the label to avoid interception
2026-01-02 11:43:24 +01:00
datenschutz_checkbox = await page.query_selector('input[name*="datenschutz" i][type="checkbox"]')
if datenschutz_checkbox:
is_checked = await datenschutz_checkbox.is_checked()
if not is_checked:
2026-01-02 12:25:27 +01:00
# Try clicking the label first, fall back to force click on input
datenschutz_label = await page.query_selector('label[for]:has(input[name*="datenschutz" i])')
if datenschutz_label:
await datenschutz_label.click()
logger.debug("[WBM] Clicked Datenschutz label")
else:
await datenschutz_checkbox.click(force=True)
logger.debug("[WBM] Force-clicked Datenschutz checkbox")
2026-01-02 11:43:24 +01:00
if not form_filled:
logger.error("[WBM] No form fields found - form may not be visible")
result["message"] = "Form not found or not visible"
await page.screenshot(path=DATA_DIR / f"wbm_no_form_{listing['id']}.png")
await page.close()
return result
logger.info("[WBM] Form filled successfully")
await asyncio.sleep(1)
# Save screenshot before submit
await page.screenshot(path=DATA_DIR / f"wbm_before_submit_{listing['id']}.png")
except Exception as e:
logger.error(f"[WBM] Error filling form: {e}")
result["message"] = f"Error filling form: {e}"
await page.screenshot(path=DATA_DIR / f"wbm_form_error_{listing['id']}.png")
await page.close()
return result
logger.info("[WBM] Clicking submit button...")
2025-12-27 11:59:04 +01:00
await apply_btn.click()
2026-01-02 11:43:24 +01:00
await asyncio.sleep(3)
2025-12-29 22:46:10 +01:00
# --- Post-click confirmation logic ---
2026-01-02 11:43:24 +01:00
logger.info("[WBM] Clicked submit button, checking for confirmation...")
2025-12-29 22:46:10 +01:00
# Save screenshot and HTML after click
try:
2026-01-02 11:43:24 +01:00
await page.screenshot(path=DATA_DIR / "wbm_after_apply.png")
2025-12-29 22:46:10 +01:00
logger.info("[WBM] Saved screenshot after application click.")
except Exception as e:
logger.warning(f"[WBM] Could not save screenshot: {e}")
try:
html_after = await page.content()
2026-01-02 11:43:24 +01:00
with open(DATA_DIR / "wbm_after_apply.html", "w", encoding="utf-8") as f:
2025-12-29 22:46:10 +01:00
f.write(html_after)
logger.info("[WBM] Saved HTML after application click.")
except Exception as e:
logger.warning(f"[WBM] Could not save HTML after apply: {e}")
# Look for confirmation message on the page
confirmation_selectors = [
'text="Vielen Dank"',
'text="Ihre Anfrage wurde gesendet"',
'text="Bestätigung"',
'div:has-text("Vielen Dank")',
'div:has-text("Ihre Anfrage wurde gesendet")',
]
confirmed = False
for sel in confirmation_selectors:
try:
el = await page.query_selector(sel)
if el and await el.is_visible():
logger.info(f"[WBM] Found confirmation element: {sel}")
confirmed = True
break
except Exception as e:
logger.debug(f"[WBM] Error checking confirmation selector {sel}: {e}")
if confirmed:
result["success"] = True
result["message"] = "Application submitted and confirmation detected."
else:
logger.warning("[WBM] No confirmation message detected after application click.")
result["success"] = False
result["message"] = "Clicked application button, but no confirmation detected. Check screenshot and HTML."
2025-12-27 11:59:04 +01:00
else:
result["message"] = "No application button found on detail page."
2025-12-27 11:59:04 +01:00
except Exception as e:
result["message"] = f"Error during application: {e}"
logger.error(f"[WBM] Application error: {e}")
finally:
await page.close()
return result