fix handler errors
This commit is contained in:
parent
e6b0f844fe
commit
c68ee12d4e
3 changed files with 70 additions and 20 deletions
|
|
@ -19,11 +19,18 @@ class GewobagHandler(BaseHandler):
|
|||
logger.info("[GEWOBAG] Page loaded")
|
||||
await asyncio.sleep(2)
|
||||
|
||||
# Detect 404 by status or page title
|
||||
# Detect 404 by status, page title, or "nicht gefunden" message
|
||||
status = response.status if response else None
|
||||
page_title = await page.title()
|
||||
if status == 404 or (page_title and "404" in page_title):
|
||||
logger.warning(f"[GEWOBAG] Listing is down (404): {listing['link']}")
|
||||
page_content = await page.content()
|
||||
is_404 = (
|
||||
status == 404 or
|
||||
(page_title and "404" in page_title) or
|
||||
(page_title and "nicht gefunden" in page_title.lower()) or
|
||||
("Mietangebot nicht gefunden" in page_content)
|
||||
)
|
||||
if is_404:
|
||||
logger.warning(f"[GEWOBAG] Listing is down (404 or unavailable): {listing['link']}")
|
||||
result["success"] = False
|
||||
result["message"] = "Listing is no longer available (404). Application impossible. Will not retry."
|
||||
result["deactivated"] = True
|
||||
|
|
|
|||
|
|
@ -31,20 +31,44 @@ class StadtUndLandHandler(BaseHandler):
|
|||
logger.info("[STADTUNDLAND] Page loaded")
|
||||
await asyncio.sleep(2)
|
||||
|
||||
# Dismiss cookie banner
|
||||
try:
|
||||
cookie_btn = await page.query_selector('button:has-text("Akzeptieren"), button:has-text("Alle akzeptieren")')
|
||||
if cookie_btn and await cookie_btn.is_visible():
|
||||
await cookie_btn.click()
|
||||
logger.info("[STADTUNDLAND] Dismissed cookie banner")
|
||||
await asyncio.sleep(1)
|
||||
except Exception as e:
|
||||
logger.debug(f"[STADTUNDLAND] Cookie banner dismiss failed: {e}")
|
||||
# Always handle cookies and consent
|
||||
await self.handle_cookies(page)
|
||||
await self.handle_consent(page)
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# Check for 404 or error page early
|
||||
page_title = await page.title()
|
||||
page_content = await page.content()
|
||||
|
||||
# Check for error messages
|
||||
if "schief gelaufen" in page_content.lower() or "schief gelaufen" in page_title.lower():
|
||||
logger.warning(f"[STADTUNDLAND] Error page detected (schief gelaufen) for {listing['link']}")
|
||||
result["success"] = False
|
||||
result["details"] = "Listing no longer available (error page)"
|
||||
await page.screenshot(path=str(DATA_DIR / f"stadtundland_404_{listing['id']}.png"))
|
||||
return result
|
||||
|
||||
# Check for "nicht verfügbar" or similar messages
|
||||
if "nicht verfügbar" in page_content.lower() or "nicht mehr" in page_content.lower():
|
||||
logger.warning(f"[STADTUNDLAND] Listing not available: {listing['link']}")
|
||||
result["success"] = False
|
||||
result["details"] = "Listing no longer available"
|
||||
await page.screenshot(path=str(DATA_DIR / f"stadtundland_404_{listing['id']}.png"))
|
||||
return result
|
||||
|
||||
# Scroll to form
|
||||
await page.evaluate("window.scrollBy(0, 500)")
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
# Save HTML for debugging
|
||||
try:
|
||||
html_content = await page.content()
|
||||
with open(DATA_DIR / f"stadtundland_debug_{listing['id']}.html", "w", encoding="utf-8") as f:
|
||||
f.write(html_content)
|
||||
logger.debug(f"[STADTUNDLAND] Saved debug HTML")
|
||||
except Exception as e:
|
||||
logger.warning(f"[STADTUNDLAND] Could not save debug HTML: {e}")
|
||||
|
||||
# Fill out the embedded form directly
|
||||
form_filled = False
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -48,11 +48,24 @@ class WBMHandler(BaseHandler):
|
|||
result["message"] = "Listing is no longer available (404 detected on WBM)."
|
||||
logger.warning(f"[WBM] Permanent fail: {err}")
|
||||
await page.close()
|
||||
await page.close()
|
||||
return result
|
||||
|
||||
# Check if we're already on the detail page (URL contains '/details/')
|
||||
# Check if we landed on a generic overview/search page with multiple listings
|
||||
# This happens when the listing link is wrong or the listing was removed
|
||||
current_url = page.url
|
||||
# The overview page is at /angebote/ without /details/, and shows text like "X Mietwohnungen in Berlin"
|
||||
if '/angebote/' in current_url and '/details/' not in current_url:
|
||||
# Check for the heading pattern "X Mietwohnungen in Berlin" which appears on overview pages
|
||||
overview_heading = await page.query_selector('h2:has-text("Mietwohnungen in Berlin"), h3:has-text("Mietwohnungen in Berlin")')
|
||||
if overview_heading:
|
||||
result["deactivated"] = True
|
||||
result["message"] = "Redirected to generic overview page - listing no longer exists"
|
||||
logger.warning(f"[WBM] Landed on overview page (/angebote/) instead of specific listing detail")
|
||||
await page.screenshot(path=DATA_DIR / f"wbm_overview_redirect_{listing['id']}.png")
|
||||
await page.close()
|
||||
return result
|
||||
|
||||
# Check if we're already on the detail page (URL contains '/details/')
|
||||
if '/details/' not in current_url:
|
||||
# Find and follow the 'Details' link to the detail page
|
||||
logger.info("[WBM] Looking for 'Details' link to open detail page...")
|
||||
|
|
@ -91,7 +104,7 @@ class WBMHandler(BaseHandler):
|
|||
# Save HTML of detail page for debugging
|
||||
try:
|
||||
html_content = await page.content()
|
||||
with open("data/wbm_detail_debug.html", "w", encoding="utf-8") as f:
|
||||
with open(DATA_DIR / "wbm_detail_debug.html", "w", encoding="utf-8") as f:
|
||||
f.write(html_content)
|
||||
except Exception as e:
|
||||
logger.warning(f"[WBM] Could not save detail debug HTML: {e}")
|
||||
|
|
@ -159,8 +172,8 @@ class WBMHandler(BaseHandler):
|
|||
logger.debug("[WBM] Filled Vorname")
|
||||
form_filled = True
|
||||
|
||||
# Email
|
||||
email_input = await page.query_selector('input[name*="email" i]')
|
||||
# Email (use ID or specific field name pattern)
|
||||
email_input = await page.query_selector('input#powermail_field_e_mail, input[name*="[e_mail]"], input[name*="[email]"]')
|
||||
if email_input:
|
||||
await email_input.fill(os.getenv("FORM_EMAIL", ""))
|
||||
logger.debug("[WBM] Filled Email")
|
||||
|
|
@ -196,13 +209,19 @@ class WBMHandler(BaseHandler):
|
|||
await ort_input.fill(os.getenv("FORM_ORT", ""))
|
||||
logger.debug("[WBM] Filled Ort")
|
||||
|
||||
# Datenschutz checkbox
|
||||
# Datenschutz checkbox - use force click or click the label to avoid interception
|
||||
datenschutz_checkbox = await page.query_selector('input[name*="datenschutz" i][type="checkbox"]')
|
||||
if datenschutz_checkbox:
|
||||
is_checked = await datenschutz_checkbox.is_checked()
|
||||
if not is_checked:
|
||||
await datenschutz_checkbox.check()
|
||||
logger.debug("[WBM] Checked Datenschutz")
|
||||
# Try clicking the label first, fall back to force click on input
|
||||
datenschutz_label = await page.query_selector('label[for]:has(input[name*="datenschutz" i])')
|
||||
if datenschutz_label:
|
||||
await datenschutz_label.click()
|
||||
logger.debug("[WBM] Clicked Datenschutz label")
|
||||
else:
|
||||
await datenschutz_checkbox.click(force=True)
|
||||
logger.debug("[WBM] Force-clicked Datenschutz checkbox")
|
||||
|
||||
if not form_filled:
|
||||
logger.error("[WBM] No form fields found - form may not be visible")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue