fix handler errors

This commit is contained in:
Aron Petau 2026-01-02 12:25:27 +01:00
parent e6b0f844fe
commit c68ee12d4e
3 changed files with 70 additions and 20 deletions

View file

@ -19,11 +19,18 @@ class GewobagHandler(BaseHandler):
logger.info("[GEWOBAG] Page loaded") logger.info("[GEWOBAG] Page loaded")
await asyncio.sleep(2) await asyncio.sleep(2)
# Detect 404 by status or page title # Detect 404 by status, page title, or "nicht gefunden" message
status = response.status if response else None status = response.status if response else None
page_title = await page.title() page_title = await page.title()
if status == 404 or (page_title and "404" in page_title): page_content = await page.content()
logger.warning(f"[GEWOBAG] Listing is down (404): {listing['link']}") is_404 = (
status == 404 or
(page_title and "404" in page_title) or
(page_title and "nicht gefunden" in page_title.lower()) or
("Mietangebot nicht gefunden" in page_content)
)
if is_404:
logger.warning(f"[GEWOBAG] Listing is down (404 or unavailable): {listing['link']}")
result["success"] = False result["success"] = False
result["message"] = "Listing is no longer available (404). Application impossible. Will not retry." result["message"] = "Listing is no longer available (404). Application impossible. Will not retry."
result["deactivated"] = True result["deactivated"] = True

View file

@ -31,20 +31,44 @@ class StadtUndLandHandler(BaseHandler):
logger.info("[STADTUNDLAND] Page loaded") logger.info("[STADTUNDLAND] Page loaded")
await asyncio.sleep(2) await asyncio.sleep(2)
# Dismiss cookie banner # Always handle cookies and consent
try: await self.handle_cookies(page)
cookie_btn = await page.query_selector('button:has-text("Akzeptieren"), button:has-text("Alle akzeptieren")') await self.handle_consent(page)
if cookie_btn and await cookie_btn.is_visible(): await asyncio.sleep(1)
await cookie_btn.click()
logger.info("[STADTUNDLAND] Dismissed cookie banner") # Check for 404 or error page early
await asyncio.sleep(1) page_title = await page.title()
except Exception as e: page_content = await page.content()
logger.debug(f"[STADTUNDLAND] Cookie banner dismiss failed: {e}")
# Check for error messages
if "schief gelaufen" in page_content.lower() or "schief gelaufen" in page_title.lower():
logger.warning(f"[STADTUNDLAND] Error page detected (schief gelaufen) for {listing['link']}")
result["success"] = False
result["details"] = "Listing no longer available (error page)"
await page.screenshot(path=str(DATA_DIR / f"stadtundland_404_{listing['id']}.png"))
return result
# Check for "nicht verfügbar" or similar messages
if "nicht verfügbar" in page_content.lower() or "nicht mehr" in page_content.lower():
logger.warning(f"[STADTUNDLAND] Listing not available: {listing['link']}")
result["success"] = False
result["details"] = "Listing no longer available"
await page.screenshot(path=str(DATA_DIR / f"stadtundland_404_{listing['id']}.png"))
return result
# Scroll to form # Scroll to form
await page.evaluate("window.scrollBy(0, 500)") await page.evaluate("window.scrollBy(0, 500)")
await asyncio.sleep(0.5) await asyncio.sleep(0.5)
# Save HTML for debugging
try:
html_content = await page.content()
with open(DATA_DIR / f"stadtundland_debug_{listing['id']}.html", "w", encoding="utf-8") as f:
f.write(html_content)
logger.debug(f"[STADTUNDLAND] Saved debug HTML")
except Exception as e:
logger.warning(f"[STADTUNDLAND] Could not save debug HTML: {e}")
# Fill out the embedded form directly # Fill out the embedded form directly
form_filled = False form_filled = False
try: try:

View file

@ -48,11 +48,24 @@ class WBMHandler(BaseHandler):
result["message"] = "Listing is no longer available (404 detected on WBM)." result["message"] = "Listing is no longer available (404 detected on WBM)."
logger.warning(f"[WBM] Permanent fail: {err}") logger.warning(f"[WBM] Permanent fail: {err}")
await page.close() await page.close()
await page.close()
return result return result
# Check if we're already on the detail page (URL contains '/details/') # Check if we landed on a generic overview/search page with multiple listings
# This happens when the listing link is wrong or the listing was removed
current_url = page.url current_url = page.url
# The overview page is at /angebote/ without /details/, and shows text like "X Mietwohnungen in Berlin"
if '/angebote/' in current_url and '/details/' not in current_url:
# Check for the heading pattern "X Mietwohnungen in Berlin" which appears on overview pages
overview_heading = await page.query_selector('h2:has-text("Mietwohnungen in Berlin"), h3:has-text("Mietwohnungen in Berlin")')
if overview_heading:
result["deactivated"] = True
result["message"] = "Redirected to generic overview page - listing no longer exists"
logger.warning(f"[WBM] Landed on overview page (/angebote/) instead of specific listing detail")
await page.screenshot(path=DATA_DIR / f"wbm_overview_redirect_{listing['id']}.png")
await page.close()
return result
# Check if we're already on the detail page (URL contains '/details/')
if '/details/' not in current_url: if '/details/' not in current_url:
# Find and follow the 'Details' link to the detail page # Find and follow the 'Details' link to the detail page
logger.info("[WBM] Looking for 'Details' link to open detail page...") logger.info("[WBM] Looking for 'Details' link to open detail page...")
@ -91,7 +104,7 @@ class WBMHandler(BaseHandler):
# Save HTML of detail page for debugging # Save HTML of detail page for debugging
try: try:
html_content = await page.content() html_content = await page.content()
with open("data/wbm_detail_debug.html", "w", encoding="utf-8") as f: with open(DATA_DIR / "wbm_detail_debug.html", "w", encoding="utf-8") as f:
f.write(html_content) f.write(html_content)
except Exception as e: except Exception as e:
logger.warning(f"[WBM] Could not save detail debug HTML: {e}") logger.warning(f"[WBM] Could not save detail debug HTML: {e}")
@ -159,8 +172,8 @@ class WBMHandler(BaseHandler):
logger.debug("[WBM] Filled Vorname") logger.debug("[WBM] Filled Vorname")
form_filled = True form_filled = True
# Email # Email (use ID or specific field name pattern)
email_input = await page.query_selector('input[name*="email" i]') email_input = await page.query_selector('input#powermail_field_e_mail, input[name*="[e_mail]"], input[name*="[email]"]')
if email_input: if email_input:
await email_input.fill(os.getenv("FORM_EMAIL", "")) await email_input.fill(os.getenv("FORM_EMAIL", ""))
logger.debug("[WBM] Filled Email") logger.debug("[WBM] Filled Email")
@ -196,13 +209,19 @@ class WBMHandler(BaseHandler):
await ort_input.fill(os.getenv("FORM_ORT", "")) await ort_input.fill(os.getenv("FORM_ORT", ""))
logger.debug("[WBM] Filled Ort") logger.debug("[WBM] Filled Ort")
# Datenschutz checkbox # Datenschutz checkbox - use force click or click the label to avoid interception
datenschutz_checkbox = await page.query_selector('input[name*="datenschutz" i][type="checkbox"]') datenschutz_checkbox = await page.query_selector('input[name*="datenschutz" i][type="checkbox"]')
if datenschutz_checkbox: if datenschutz_checkbox:
is_checked = await datenschutz_checkbox.is_checked() is_checked = await datenschutz_checkbox.is_checked()
if not is_checked: if not is_checked:
await datenschutz_checkbox.check() # Try clicking the label first, fall back to force click on input
logger.debug("[WBM] Checked Datenschutz") datenschutz_label = await page.query_selector('label[for]:has(input[name*="datenschutz" i])')
if datenschutz_label:
await datenschutz_label.click()
logger.debug("[WBM] Clicked Datenschutz label")
else:
await datenschutz_checkbox.click(force=True)
logger.debug("[WBM] Force-clicked Datenschutz checkbox")
if not form_filled: if not form_filled:
logger.error("[WBM] No form fields found - form may not be visible") logger.error("[WBM] No form fields found - form may not be visible")