diff --git a/handlers/gewobag_handler.py b/handlers/gewobag_handler.py index 0fa2731..fc78348 100644 --- a/handlers/gewobag_handler.py +++ b/handlers/gewobag_handler.py @@ -19,11 +19,18 @@ class GewobagHandler(BaseHandler): logger.info("[GEWOBAG] Page loaded") await asyncio.sleep(2) - # Detect 404 by status or page title + # Detect 404 by status, page title, or "nicht gefunden" message status = response.status if response else None page_title = await page.title() - if status == 404 or (page_title and "404" in page_title): - logger.warning(f"[GEWOBAG] Listing is down (404): {listing['link']}") + page_content = await page.content() + is_404 = ( + status == 404 or + (page_title and "404" in page_title) or + (page_title and "nicht gefunden" in page_title.lower()) or + ("Mietangebot nicht gefunden" in page_content) + ) + if is_404: + logger.warning(f"[GEWOBAG] Listing is down (404 or unavailable): {listing['link']}") result["success"] = False result["message"] = "Listing is no longer available (404). Application impossible. Will not retry." result["deactivated"] = True diff --git a/handlers/stadtundland_handler.py b/handlers/stadtundland_handler.py index 9e6343e..e8f173f 100644 --- a/handlers/stadtundland_handler.py +++ b/handlers/stadtundland_handler.py @@ -31,20 +31,44 @@ class StadtUndLandHandler(BaseHandler): logger.info("[STADTUNDLAND] Page loaded") await asyncio.sleep(2) - # Dismiss cookie banner - try: - cookie_btn = await page.query_selector('button:has-text("Akzeptieren"), button:has-text("Alle akzeptieren")') - if cookie_btn and await cookie_btn.is_visible(): - await cookie_btn.click() - logger.info("[STADTUNDLAND] Dismissed cookie banner") - await asyncio.sleep(1) - except Exception as e: - logger.debug(f"[STADTUNDLAND] Cookie banner dismiss failed: {e}") + # Always handle cookies and consent + await self.handle_cookies(page) + await self.handle_consent(page) + await asyncio.sleep(1) + + # Check for 404 or error page early + page_title = await page.title() + page_content = await page.content() + + # Check for error messages + if "schief gelaufen" in page_content.lower() or "schief gelaufen" in page_title.lower(): + logger.warning(f"[STADTUNDLAND] Error page detected (schief gelaufen) for {listing['link']}") + result["success"] = False + result["details"] = "Listing no longer available (error page)" + await page.screenshot(path=str(DATA_DIR / f"stadtundland_404_{listing['id']}.png")) + return result + + # Check for "nicht verfügbar" or similar messages + if "nicht verfügbar" in page_content.lower() or "nicht mehr" in page_content.lower(): + logger.warning(f"[STADTUNDLAND] Listing not available: {listing['link']}") + result["success"] = False + result["details"] = "Listing no longer available" + await page.screenshot(path=str(DATA_DIR / f"stadtundland_404_{listing['id']}.png")) + return result # Scroll to form await page.evaluate("window.scrollBy(0, 500)") await asyncio.sleep(0.5) + # Save HTML for debugging + try: + html_content = await page.content() + with open(DATA_DIR / f"stadtundland_debug_{listing['id']}.html", "w", encoding="utf-8") as f: + f.write(html_content) + logger.debug(f"[STADTUNDLAND] Saved debug HTML") + except Exception as e: + logger.warning(f"[STADTUNDLAND] Could not save debug HTML: {e}") + # Fill out the embedded form directly form_filled = False try: diff --git a/handlers/wbm_handler.py b/handlers/wbm_handler.py index 7a4268a..5f67c55 100644 --- a/handlers/wbm_handler.py +++ b/handlers/wbm_handler.py @@ -48,11 +48,24 @@ class WBMHandler(BaseHandler): result["message"] = "Listing is no longer available (404 detected on WBM)." logger.warning(f"[WBM] Permanent fail: {err}") await page.close() - await page.close() return result - # Check if we're already on the detail page (URL contains '/details/') + # Check if we landed on a generic overview/search page with multiple listings + # This happens when the listing link is wrong or the listing was removed current_url = page.url + # The overview page is at /angebote/ without /details/, and shows text like "X Mietwohnungen in Berlin" + if '/angebote/' in current_url and '/details/' not in current_url: + # Check for the heading pattern "X Mietwohnungen in Berlin" which appears on overview pages + overview_heading = await page.query_selector('h2:has-text("Mietwohnungen in Berlin"), h3:has-text("Mietwohnungen in Berlin")') + if overview_heading: + result["deactivated"] = True + result["message"] = "Redirected to generic overview page - listing no longer exists" + logger.warning(f"[WBM] Landed on overview page (/angebote/) instead of specific listing detail") + await page.screenshot(path=DATA_DIR / f"wbm_overview_redirect_{listing['id']}.png") + await page.close() + return result + + # Check if we're already on the detail page (URL contains '/details/') if '/details/' not in current_url: # Find and follow the 'Details' link to the detail page logger.info("[WBM] Looking for 'Details' link to open detail page...") @@ -91,7 +104,7 @@ class WBMHandler(BaseHandler): # Save HTML of detail page for debugging try: html_content = await page.content() - with open("data/wbm_detail_debug.html", "w", encoding="utf-8") as f: + with open(DATA_DIR / "wbm_detail_debug.html", "w", encoding="utf-8") as f: f.write(html_content) except Exception as e: logger.warning(f"[WBM] Could not save detail debug HTML: {e}") @@ -159,8 +172,8 @@ class WBMHandler(BaseHandler): logger.debug("[WBM] Filled Vorname") form_filled = True - # Email - email_input = await page.query_selector('input[name*="email" i]') + # Email (use ID or specific field name pattern) + email_input = await page.query_selector('input#powermail_field_e_mail, input[name*="[e_mail]"], input[name*="[email]"]') if email_input: await email_input.fill(os.getenv("FORM_EMAIL", "")) logger.debug("[WBM] Filled Email") @@ -196,13 +209,19 @@ class WBMHandler(BaseHandler): await ort_input.fill(os.getenv("FORM_ORT", "")) logger.debug("[WBM] Filled Ort") - # Datenschutz checkbox + # Datenschutz checkbox - use force click or click the label to avoid interception datenschutz_checkbox = await page.query_selector('input[name*="datenschutz" i][type="checkbox"]') if datenschutz_checkbox: is_checked = await datenschutz_checkbox.is_checked() if not is_checked: - await datenschutz_checkbox.check() - logger.debug("[WBM] Checked Datenschutz") + # Try clicking the label first, fall back to force click on input + datenschutz_label = await page.query_selector('label[for]:has(input[name*="datenschutz" i])') + if datenschutz_label: + await datenschutz_label.click() + logger.debug("[WBM] Clicked Datenschutz label") + else: + await datenschutz_checkbox.click(force=True) + logger.debug("[WBM] Force-clicked Datenschutz checkbox") if not form_filled: logger.error("[WBM] No form fields found - form may not be visible")