fix handler errors
This commit is contained in:
parent
e6b0f844fe
commit
c68ee12d4e
3 changed files with 70 additions and 20 deletions
|
|
@ -19,11 +19,18 @@ class GewobagHandler(BaseHandler):
|
||||||
logger.info("[GEWOBAG] Page loaded")
|
logger.info("[GEWOBAG] Page loaded")
|
||||||
await asyncio.sleep(2)
|
await asyncio.sleep(2)
|
||||||
|
|
||||||
# Detect 404 by status or page title
|
# Detect 404 by status, page title, or "nicht gefunden" message
|
||||||
status = response.status if response else None
|
status = response.status if response else None
|
||||||
page_title = await page.title()
|
page_title = await page.title()
|
||||||
if status == 404 or (page_title and "404" in page_title):
|
page_content = await page.content()
|
||||||
logger.warning(f"[GEWOBAG] Listing is down (404): {listing['link']}")
|
is_404 = (
|
||||||
|
status == 404 or
|
||||||
|
(page_title and "404" in page_title) or
|
||||||
|
(page_title and "nicht gefunden" in page_title.lower()) or
|
||||||
|
("Mietangebot nicht gefunden" in page_content)
|
||||||
|
)
|
||||||
|
if is_404:
|
||||||
|
logger.warning(f"[GEWOBAG] Listing is down (404 or unavailable): {listing['link']}")
|
||||||
result["success"] = False
|
result["success"] = False
|
||||||
result["message"] = "Listing is no longer available (404). Application impossible. Will not retry."
|
result["message"] = "Listing is no longer available (404). Application impossible. Will not retry."
|
||||||
result["deactivated"] = True
|
result["deactivated"] = True
|
||||||
|
|
|
||||||
|
|
@ -31,20 +31,44 @@ class StadtUndLandHandler(BaseHandler):
|
||||||
logger.info("[STADTUNDLAND] Page loaded")
|
logger.info("[STADTUNDLAND] Page loaded")
|
||||||
await asyncio.sleep(2)
|
await asyncio.sleep(2)
|
||||||
|
|
||||||
# Dismiss cookie banner
|
# Always handle cookies and consent
|
||||||
try:
|
await self.handle_cookies(page)
|
||||||
cookie_btn = await page.query_selector('button:has-text("Akzeptieren"), button:has-text("Alle akzeptieren")')
|
await self.handle_consent(page)
|
||||||
if cookie_btn and await cookie_btn.is_visible():
|
|
||||||
await cookie_btn.click()
|
|
||||||
logger.info("[STADTUNDLAND] Dismissed cookie banner")
|
|
||||||
await asyncio.sleep(1)
|
await asyncio.sleep(1)
|
||||||
except Exception as e:
|
|
||||||
logger.debug(f"[STADTUNDLAND] Cookie banner dismiss failed: {e}")
|
# Check for 404 or error page early
|
||||||
|
page_title = await page.title()
|
||||||
|
page_content = await page.content()
|
||||||
|
|
||||||
|
# Check for error messages
|
||||||
|
if "schief gelaufen" in page_content.lower() or "schief gelaufen" in page_title.lower():
|
||||||
|
logger.warning(f"[STADTUNDLAND] Error page detected (schief gelaufen) for {listing['link']}")
|
||||||
|
result["success"] = False
|
||||||
|
result["details"] = "Listing no longer available (error page)"
|
||||||
|
await page.screenshot(path=str(DATA_DIR / f"stadtundland_404_{listing['id']}.png"))
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Check for "nicht verfügbar" or similar messages
|
||||||
|
if "nicht verfügbar" in page_content.lower() or "nicht mehr" in page_content.lower():
|
||||||
|
logger.warning(f"[STADTUNDLAND] Listing not available: {listing['link']}")
|
||||||
|
result["success"] = False
|
||||||
|
result["details"] = "Listing no longer available"
|
||||||
|
await page.screenshot(path=str(DATA_DIR / f"stadtundland_404_{listing['id']}.png"))
|
||||||
|
return result
|
||||||
|
|
||||||
# Scroll to form
|
# Scroll to form
|
||||||
await page.evaluate("window.scrollBy(0, 500)")
|
await page.evaluate("window.scrollBy(0, 500)")
|
||||||
await asyncio.sleep(0.5)
|
await asyncio.sleep(0.5)
|
||||||
|
|
||||||
|
# Save HTML for debugging
|
||||||
|
try:
|
||||||
|
html_content = await page.content()
|
||||||
|
with open(DATA_DIR / f"stadtundland_debug_{listing['id']}.html", "w", encoding="utf-8") as f:
|
||||||
|
f.write(html_content)
|
||||||
|
logger.debug(f"[STADTUNDLAND] Saved debug HTML")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"[STADTUNDLAND] Could not save debug HTML: {e}")
|
||||||
|
|
||||||
# Fill out the embedded form directly
|
# Fill out the embedded form directly
|
||||||
form_filled = False
|
form_filled = False
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -48,11 +48,24 @@ class WBMHandler(BaseHandler):
|
||||||
result["message"] = "Listing is no longer available (404 detected on WBM)."
|
result["message"] = "Listing is no longer available (404 detected on WBM)."
|
||||||
logger.warning(f"[WBM] Permanent fail: {err}")
|
logger.warning(f"[WBM] Permanent fail: {err}")
|
||||||
await page.close()
|
await page.close()
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Check if we landed on a generic overview/search page with multiple listings
|
||||||
|
# This happens when the listing link is wrong or the listing was removed
|
||||||
|
current_url = page.url
|
||||||
|
# The overview page is at /angebote/ without /details/, and shows text like "X Mietwohnungen in Berlin"
|
||||||
|
if '/angebote/' in current_url and '/details/' not in current_url:
|
||||||
|
# Check for the heading pattern "X Mietwohnungen in Berlin" which appears on overview pages
|
||||||
|
overview_heading = await page.query_selector('h2:has-text("Mietwohnungen in Berlin"), h3:has-text("Mietwohnungen in Berlin")')
|
||||||
|
if overview_heading:
|
||||||
|
result["deactivated"] = True
|
||||||
|
result["message"] = "Redirected to generic overview page - listing no longer exists"
|
||||||
|
logger.warning(f"[WBM] Landed on overview page (/angebote/) instead of specific listing detail")
|
||||||
|
await page.screenshot(path=DATA_DIR / f"wbm_overview_redirect_{listing['id']}.png")
|
||||||
await page.close()
|
await page.close()
|
||||||
return result
|
return result
|
||||||
|
|
||||||
# Check if we're already on the detail page (URL contains '/details/')
|
# Check if we're already on the detail page (URL contains '/details/')
|
||||||
current_url = page.url
|
|
||||||
if '/details/' not in current_url:
|
if '/details/' not in current_url:
|
||||||
# Find and follow the 'Details' link to the detail page
|
# Find and follow the 'Details' link to the detail page
|
||||||
logger.info("[WBM] Looking for 'Details' link to open detail page...")
|
logger.info("[WBM] Looking for 'Details' link to open detail page...")
|
||||||
|
|
@ -91,7 +104,7 @@ class WBMHandler(BaseHandler):
|
||||||
# Save HTML of detail page for debugging
|
# Save HTML of detail page for debugging
|
||||||
try:
|
try:
|
||||||
html_content = await page.content()
|
html_content = await page.content()
|
||||||
with open("data/wbm_detail_debug.html", "w", encoding="utf-8") as f:
|
with open(DATA_DIR / "wbm_detail_debug.html", "w", encoding="utf-8") as f:
|
||||||
f.write(html_content)
|
f.write(html_content)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"[WBM] Could not save detail debug HTML: {e}")
|
logger.warning(f"[WBM] Could not save detail debug HTML: {e}")
|
||||||
|
|
@ -159,8 +172,8 @@ class WBMHandler(BaseHandler):
|
||||||
logger.debug("[WBM] Filled Vorname")
|
logger.debug("[WBM] Filled Vorname")
|
||||||
form_filled = True
|
form_filled = True
|
||||||
|
|
||||||
# Email
|
# Email (use ID or specific field name pattern)
|
||||||
email_input = await page.query_selector('input[name*="email" i]')
|
email_input = await page.query_selector('input#powermail_field_e_mail, input[name*="[e_mail]"], input[name*="[email]"]')
|
||||||
if email_input:
|
if email_input:
|
||||||
await email_input.fill(os.getenv("FORM_EMAIL", ""))
|
await email_input.fill(os.getenv("FORM_EMAIL", ""))
|
||||||
logger.debug("[WBM] Filled Email")
|
logger.debug("[WBM] Filled Email")
|
||||||
|
|
@ -196,13 +209,19 @@ class WBMHandler(BaseHandler):
|
||||||
await ort_input.fill(os.getenv("FORM_ORT", ""))
|
await ort_input.fill(os.getenv("FORM_ORT", ""))
|
||||||
logger.debug("[WBM] Filled Ort")
|
logger.debug("[WBM] Filled Ort")
|
||||||
|
|
||||||
# Datenschutz checkbox
|
# Datenschutz checkbox - use force click or click the label to avoid interception
|
||||||
datenschutz_checkbox = await page.query_selector('input[name*="datenschutz" i][type="checkbox"]')
|
datenschutz_checkbox = await page.query_selector('input[name*="datenschutz" i][type="checkbox"]')
|
||||||
if datenschutz_checkbox:
|
if datenschutz_checkbox:
|
||||||
is_checked = await datenschutz_checkbox.is_checked()
|
is_checked = await datenschutz_checkbox.is_checked()
|
||||||
if not is_checked:
|
if not is_checked:
|
||||||
await datenschutz_checkbox.check()
|
# Try clicking the label first, fall back to force click on input
|
||||||
logger.debug("[WBM] Checked Datenschutz")
|
datenschutz_label = await page.query_selector('label[for]:has(input[name*="datenschutz" i])')
|
||||||
|
if datenschutz_label:
|
||||||
|
await datenschutz_label.click()
|
||||||
|
logger.debug("[WBM] Clicked Datenschutz label")
|
||||||
|
else:
|
||||||
|
await datenschutz_checkbox.click(force=True)
|
||||||
|
logger.debug("[WBM] Force-clicked Datenschutz checkbox")
|
||||||
|
|
||||||
if not form_filled:
|
if not form_filled:
|
||||||
logger.error("[WBM] No form fields found - form may not be visible")
|
logger.error("[WBM] No form fields found - form may not be visible")
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue